full_lengther_next 0.6.1 → 0.6.2

Sign up to get free protection for your applications and to get access to all the features.
@@ -125,7 +125,7 @@ def filter_and_makeDB(formatted_db_path, dbtype, db_group, isoform_hash, prefix,
125
125
  puts 'Building ' + file_name
126
126
  fasta = File.join(formatted_db_path,"#{file_name}","#{file_name}.fasta")
127
127
  blastdb_input = fasta.gsub('.fasta', '')
128
- current_db_source = File.join(formatted_db_path, "uniprot_#{dbtype}_#{db_group}.dat")
128
+ current_db_source = File.join(formatted_db_path, "uniprot_#{dbtype}_#{db_group}.dat.gz")
129
129
  if File.exists?(current_db_source)
130
130
  seqs = filter_incomplete_seqs(current_db_source, isoform_hash, formatted_db_path, file_name, options)
131
131
  if !options[:only_index]
@@ -267,7 +267,8 @@ def filter_incomplete_seqs(file_name, isoform_hash, formatted_db_path, db_name,
267
267
  main_name = File.join(db_folder, db_name)
268
268
  index = File.open(main_name + '.index', 'w') if !options[:all]
269
269
  seqs = ''
270
- Bio::FlatFile.auto(file_name).each_entry {|uniprot_record|
270
+ #Bio::FlatFile.auto(file_name).each_entry {|uniprot_record|
271
+ Bio::FlatFile.auto(IO.popen("gzip -dc #{file_name}")).each_entry {|uniprot_record|
271
272
  if !options[:all] && !complete?(uniprot_record)
272
273
  next
273
274
  else #Get attributes of full length records
@@ -292,7 +293,7 @@ end
292
293
  options = {}
293
294
 
294
295
  divs = %w{fungi invertebrates mammals plants rodents vertebrates}
295
- all_divs = %w{human fungi invertebrates mammals plants rodents vertebrates}
296
+ all_divs = %w{human fungi invertebrates mammals plants rodents vertebrates archaea viruses unclassified}
296
297
 
297
298
  optparse = OptionParser.new do |opts|
298
299
  options[:uniprot_div] = divs
@@ -380,12 +381,12 @@ download_ncrna(formatted_db_path, options[:no_download]) if !options[:no_ncrna]
380
381
  if !options[:no_download]
381
382
  if !options[:no_uniprot]
382
383
  conecta_uniprot(options[:uniprot_div], formatted_db_path)
383
- system('gunzip '+formatted_db_path+'*.gz')
384
+ #system('gunzip '+formatted_db_path+'*.gz')
384
385
  end
385
386
  end
386
387
 
387
388
  if !options[:no_uniprot]
388
- isoform_hash = load_isoform_hash(File.join(formatted_db_path, "uniprot_sprot_varsplic.fasta")) #archivo de variantes de splicing. POR QUE?
389
+ isoform_hash = load_isoform_hash(File.join(formatted_db_path, "uniprot_sprot_varsplic.fasta.gz")) #archivo de variantes de splicing. POR QUE?
389
390
  options[:uniprot_div].each do |db_group|
390
391
  filter_and_makeDB(formatted_db_path, 'sprot', db_group, isoform_hash, 'sp', options)
391
392
  filter_and_makeDB(formatted_db_path, 'trembl', db_group, nil, 'tr', options) if !options[:no_trembl]
@@ -64,7 +64,7 @@ optparse = OptionParser.new do |opts|
64
64
  end
65
65
 
66
66
  options[:user_fasta] = nil
67
- opts.on( '-f', '--user_fasta FILE', 'Use a cutom fasta file to build the user database') do |file|
67
+ opts.on( '-f', '--user_fasta FILE', 'Use a custom fasta file to build the user database') do |file|
68
68
  options[:user_fasta] = file
69
69
  end
70
70
 
@@ -119,7 +119,7 @@ output_file_path.gsub!(' ', '_')
119
119
 
120
120
  seqs = ''
121
121
  if options[:user_fasta].nil?
122
- isoform_hash = load_isoform_hash(File.join(formatted_db_path, 'uniprot_sprot_varsplic.fasta'))
122
+ isoform_hash = load_isoform_hash(File.join(formatted_db_path, 'uniprot_sprot_varsplic.fasta.gz'))
123
123
  seqs = get_seqs(File.join(formatted_db_path, 'sp_' + options[:uniprot_div],"sp_#{options[:uniprot_div]}.index"), options[:taxon], isoform_hash)
124
124
  isoform_hash = nil
125
125
  seqs << get_seqs(File.join(formatted_db_path, 'tr_' + options[:uniprot_div],"tr_#{options[:uniprot_div]}.index"), options[:taxon], isoform_hash)
@@ -7,7 +7,7 @@ $: << File.expand_path(File.join(File.dirname(__FILE__), 'full_lengther_next', '
7
7
 
8
8
 
9
9
  module FullLengtherNext
10
- VERSION = '0.6.1'
10
+ VERSION = '0.6.2'
11
11
 
12
12
  FULL_LENGHTER_VERSION = VERSION
13
13
  end
@@ -1,23 +1,37 @@
1
- require 'scbi_fasta'
2
-
1
+ require 'scbi_zcat'
3
2
 
4
3
  def load_isoform_hash(file)
5
4
  isoform_hash = {}
6
5
  if File.exists?(file)
7
- fasta = FastaQualFile.new(file)
8
- fasta.each do |name, seq, desc|
9
- name =~ /(\w+\|(\w+)\-\d+\|)/
10
- if isoform_hash[$2].nil?
11
- isoform_hash[$2] = ">#{$1}#{desc}\n#{seq}"
12
- else
13
- isoform_hash[$2] += "\n>#{$1}#{desc}\n#{seq}"
14
- end
6
+ fasta = ScbiZcatFile.new(file)
7
+ filtered_fasta = ''
8
+ seq_name = nil
9
+ seq = ''
10
+ while !fasta.eof
11
+ line = fasta.readline.chomp
12
+ if line[0] == '>'
13
+ load_seq_in_hash(seq_name, seq, isoform_hash) if !seq_name.nil?
14
+ seq_name = line
15
+ seq = ''
16
+ else
17
+ seq << line
18
+ end
15
19
  end
16
- fasta.close
20
+ load_seq_in_hash(seq_name, seq, isoform_hash)
17
21
  end
18
22
  return isoform_hash
19
23
  end
20
24
 
25
+ def load_seq_in_hash(seq_name, seq, isoform_hash)
26
+ name, desc = seq_name.split(' ', 2)
27
+ name =~ /(\w+\|(\w+)\-\d+\|)/
28
+ if isoform_hash[$2].nil?
29
+ isoform_hash[$2] = ">#{$1}#{desc}\n#{seq}"
30
+ else
31
+ isoform_hash[$2] += "\n>#{$1}#{desc}\n#{seq}"
32
+ end
33
+ end
34
+
21
35
  def do_makeblastdb(seqs, output, dbtype)
22
36
  cmd="makeblastdb -in - -out #{output} -title #{File.basename(output)} -dbtype #{dbtype} -parse_seqids"
23
37
  IO.popen(cmd,'w+') {|makedb|
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: full_lengther_next
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.6.1
4
+ version: 0.6.2
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2016-05-03 00:00:00.000000000 Z
12
+ date: 2016-05-04 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: xml-simple