full_lengther_next 0.6.1 → 0.6.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -125,7 +125,7 @@ def filter_and_makeDB(formatted_db_path, dbtype, db_group, isoform_hash, prefix,
125
125
  puts 'Building ' + file_name
126
126
  fasta = File.join(formatted_db_path,"#{file_name}","#{file_name}.fasta")
127
127
  blastdb_input = fasta.gsub('.fasta', '')
128
- current_db_source = File.join(formatted_db_path, "uniprot_#{dbtype}_#{db_group}.dat")
128
+ current_db_source = File.join(formatted_db_path, "uniprot_#{dbtype}_#{db_group}.dat.gz")
129
129
  if File.exists?(current_db_source)
130
130
  seqs = filter_incomplete_seqs(current_db_source, isoform_hash, formatted_db_path, file_name, options)
131
131
  if !options[:only_index]
@@ -267,7 +267,8 @@ def filter_incomplete_seqs(file_name, isoform_hash, formatted_db_path, db_name,
267
267
  main_name = File.join(db_folder, db_name)
268
268
  index = File.open(main_name + '.index', 'w') if !options[:all]
269
269
  seqs = ''
270
- Bio::FlatFile.auto(file_name).each_entry {|uniprot_record|
270
+ #Bio::FlatFile.auto(file_name).each_entry {|uniprot_record|
271
+ Bio::FlatFile.auto(IO.popen("gzip -dc #{file_name}")).each_entry {|uniprot_record|
271
272
  if !options[:all] && !complete?(uniprot_record)
272
273
  next
273
274
  else #Get attributes of full length records
@@ -292,7 +293,7 @@ end
292
293
  options = {}
293
294
 
294
295
  divs = %w{fungi invertebrates mammals plants rodents vertebrates}
295
- all_divs = %w{human fungi invertebrates mammals plants rodents vertebrates}
296
+ all_divs = %w{human fungi invertebrates mammals plants rodents vertebrates archaea viruses unclassified}
296
297
 
297
298
  optparse = OptionParser.new do |opts|
298
299
  options[:uniprot_div] = divs
@@ -380,12 +381,12 @@ download_ncrna(formatted_db_path, options[:no_download]) if !options[:no_ncrna]
380
381
  if !options[:no_download]
381
382
  if !options[:no_uniprot]
382
383
  conecta_uniprot(options[:uniprot_div], formatted_db_path)
383
- system('gunzip '+formatted_db_path+'*.gz')
384
+ #system('gunzip '+formatted_db_path+'*.gz')
384
385
  end
385
386
  end
386
387
 
387
388
  if !options[:no_uniprot]
388
- isoform_hash = load_isoform_hash(File.join(formatted_db_path, "uniprot_sprot_varsplic.fasta")) #archivo de variantes de splicing. POR QUE?
389
+ isoform_hash = load_isoform_hash(File.join(formatted_db_path, "uniprot_sprot_varsplic.fasta.gz")) #archivo de variantes de splicing. POR QUE?
389
390
  options[:uniprot_div].each do |db_group|
390
391
  filter_and_makeDB(formatted_db_path, 'sprot', db_group, isoform_hash, 'sp', options)
391
392
  filter_and_makeDB(formatted_db_path, 'trembl', db_group, nil, 'tr', options) if !options[:no_trembl]
@@ -64,7 +64,7 @@ optparse = OptionParser.new do |opts|
64
64
  end
65
65
 
66
66
  options[:user_fasta] = nil
67
- opts.on( '-f', '--user_fasta FILE', 'Use a cutom fasta file to build the user database') do |file|
67
+ opts.on( '-f', '--user_fasta FILE', 'Use a custom fasta file to build the user database') do |file|
68
68
  options[:user_fasta] = file
69
69
  end
70
70
 
@@ -119,7 +119,7 @@ output_file_path.gsub!(' ', '_')
119
119
 
120
120
  seqs = ''
121
121
  if options[:user_fasta].nil?
122
- isoform_hash = load_isoform_hash(File.join(formatted_db_path, 'uniprot_sprot_varsplic.fasta'))
122
+ isoform_hash = load_isoform_hash(File.join(formatted_db_path, 'uniprot_sprot_varsplic.fasta.gz'))
123
123
  seqs = get_seqs(File.join(formatted_db_path, 'sp_' + options[:uniprot_div],"sp_#{options[:uniprot_div]}.index"), options[:taxon], isoform_hash)
124
124
  isoform_hash = nil
125
125
  seqs << get_seqs(File.join(formatted_db_path, 'tr_' + options[:uniprot_div],"tr_#{options[:uniprot_div]}.index"), options[:taxon], isoform_hash)
@@ -7,7 +7,7 @@ $: << File.expand_path(File.join(File.dirname(__FILE__), 'full_lengther_next', '
7
7
 
8
8
 
9
9
  module FullLengtherNext
10
- VERSION = '0.6.1'
10
+ VERSION = '0.6.2'
11
11
 
12
12
  FULL_LENGHTER_VERSION = VERSION
13
13
  end
@@ -1,23 +1,37 @@
1
- require 'scbi_fasta'
2
-
1
+ require 'scbi_zcat'
3
2
 
4
3
  def load_isoform_hash(file)
5
4
  isoform_hash = {}
6
5
  if File.exists?(file)
7
- fasta = FastaQualFile.new(file)
8
- fasta.each do |name, seq, desc|
9
- name =~ /(\w+\|(\w+)\-\d+\|)/
10
- if isoform_hash[$2].nil?
11
- isoform_hash[$2] = ">#{$1}#{desc}\n#{seq}"
12
- else
13
- isoform_hash[$2] += "\n>#{$1}#{desc}\n#{seq}"
14
- end
6
+ fasta = ScbiZcatFile.new(file)
7
+ filtered_fasta = ''
8
+ seq_name = nil
9
+ seq = ''
10
+ while !fasta.eof
11
+ line = fasta.readline.chomp
12
+ if line[0] == '>'
13
+ load_seq_in_hash(seq_name, seq, isoform_hash) if !seq_name.nil?
14
+ seq_name = line
15
+ seq = ''
16
+ else
17
+ seq << line
18
+ end
15
19
  end
16
- fasta.close
20
+ load_seq_in_hash(seq_name, seq, isoform_hash)
17
21
  end
18
22
  return isoform_hash
19
23
  end
20
24
 
25
+ def load_seq_in_hash(seq_name, seq, isoform_hash)
26
+ name, desc = seq_name.split(' ', 2)
27
+ name =~ /(\w+\|(\w+)\-\d+\|)/
28
+ if isoform_hash[$2].nil?
29
+ isoform_hash[$2] = ">#{$1}#{desc}\n#{seq}"
30
+ else
31
+ isoform_hash[$2] += "\n>#{$1}#{desc}\n#{seq}"
32
+ end
33
+ end
34
+
21
35
  def do_makeblastdb(seqs, output, dbtype)
22
36
  cmd="makeblastdb -in - -out #{output} -title #{File.basename(output)} -dbtype #{dbtype} -parse_seqids"
23
37
  IO.popen(cmd,'w+') {|makedb|
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: full_lengther_next
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.6.1
4
+ version: 0.6.2
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2016-05-03 00:00:00.000000000 Z
12
+ date: 2016-05-04 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: xml-simple