full_lengther_next 0.6.0 → 0.6.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -23,7 +23,7 @@ def download_ncrna(formatted_db_path, no_download)
23
23
  ncrna_zip = File.join(formatted_db_path, 'ncrna.gz')
24
24
  db_path = File.join(formatted_db_path, 'nc_rna_db')
25
25
  db_files = File.join(db_path, 'ncrna')
26
- fasta = File.join(db_path , 'filtered.fasta')
26
+ fasta = File.join(db_path, 'filtered.fasta')
27
27
  if !no_download
28
28
  puts "Downloading ncRNA database"
29
29
  open(ncrna_zip, 'wb') do |my_file|
@@ -34,20 +34,23 @@ def download_ncrna(formatted_db_path, no_download)
34
34
  end
35
35
 
36
36
  if File.exists?(ncrna_zip)
37
+ puts "\nFiltering ncRNA database"
37
38
  Dir.mkdir(db_path) if !File.exists?(db_path)
38
39
  black_list = [' 16S ', 'rRNA', 'ribosomal', 'tRNA', 'rrn'] #rrn = ribosonal rna
39
- filtered_fasta = filtering_ncbi_seqs(ncrna_zip, 40, black_list)
40
- puts "\nncRNA database decompressed and cleaned"
40
+ filtered_fasta = filtering_seqs(ncrna_zip, 40, black_list)
41
41
  #do_makeblastdb(filtered_fasta, db_files, 'nucl')
42
42
  output_file = File.open(fasta, 'w')
43
43
  output_file.puts filtered_fasta
44
44
  output_file.close
45
- system("cd-hit -i #{fasta} -o /dev/stderr -c 0.95 -n 11 -M 0 2>&1 >/dev/null | makeblastdb -in - -out #{db_files} -title #{File.basename(db_files)} -dbtype 'nucl' -parse_seqids")
45
+ puts "\nncRNA database filtered"
46
+ puts "\nncRNA database removing redundance with cdhit and creating BlastDb"
47
+ cmd = "cd-hit-est -i #{fasta} -o /dev/stderr -c 0.95 -n 11 -M 0 2>&1 >/dev/null | makeblastdb -in - -out #{db_files} -title #{File.basename(db_files)} -dbtype 'nucl' -parse_seqids"
48
+ system(cmd)
46
49
  puts "\nncRNA database completed"
47
50
  end
48
51
  end
49
52
 
50
- def filtering_ncbi_seqs(fasta_file, max_length, black_list)
53
+ def filtering_seqs(fasta_file, max_length, black_list)
51
54
  fasta = ScbiZcatFile.new(fasta_file)
52
55
  filtered_fasta = ''
53
56
  seq_name = nil
@@ -288,13 +291,14 @@ end
288
291
 
289
292
  options = {}
290
293
 
291
- divs = %w{human fungi invertebrates mammals plants rodents vertebrates}
294
+ divs = %w{fungi invertebrates mammals plants rodents vertebrates}
295
+ all_divs = %w{human fungi invertebrates mammals plants rodents vertebrates}
292
296
 
293
297
  optparse = OptionParser.new do |opts|
294
298
  options[:uniprot_div] = divs
295
299
  opts.on( '-u', '--file String', 'Uniprot DBs to be downloaded. String structure: \'div_name1,div_name2..\'. Posible options: human, fungi, invertebrates, mammals, plants, rodents, vertebrates. Default: download all') do |uniprot_div|
296
300
  temp_divs = uniprot_div.split(',')
297
- check_valid_ids = temp_divs - divs
301
+ check_valid_ids = temp_divs - all_divs
298
302
  if !check_valid_ids.empty?
299
303
  puts 'This uniprot division not exists', check_valid_ids
300
304
  process.exit
@@ -314,7 +318,7 @@ optparse = OptionParser.new do |opts|
314
318
  end
315
319
 
316
320
  options[:only_index] = FALSE
317
- opts.on( '-i', '--only_index', 'Build annotation index only without blast DB') do
321
+ opts.on( '-i', '--only_index', 'Build annotation index only without do blast DB') do
318
322
  options[:only_index] = TRUE
319
323
  end
320
324
 
@@ -333,7 +337,7 @@ optparse = OptionParser.new do |opts|
333
337
  options[:cdhit] = cdhit.to_f
334
338
  end
335
339
 
336
- options[:no_uniprot] = FALSE
340
+ options[:no_uniprot] = FALSE
337
341
  opts.on( '-p', '--no_uniprot', 'No use uniprot sequences') do
338
342
  options[:no_uniprot] = TRUE
339
343
  end
@@ -374,8 +378,10 @@ puts "\nTo set the path for storing databases, execute next line in your termina
374
378
  download_ncrna(formatted_db_path, options[:no_download]) if !options[:no_ncrna]
375
379
 
376
380
  if !options[:no_download]
377
- conecta_uniprot(options[:uniprot_div], formatted_db_path)
378
- system('gunzip '+formatted_db_path+'*.gz')
381
+ if !options[:no_uniprot]
382
+ conecta_uniprot(options[:uniprot_div], formatted_db_path)
383
+ system('gunzip '+formatted_db_path+'*.gz')
384
+ end
379
385
  end
380
386
 
381
387
  if !options[:no_uniprot]
@@ -7,7 +7,7 @@ $: << File.expand_path(File.join(File.dirname(__FILE__), 'full_lengther_next', '
7
7
 
8
8
 
9
9
  module FullLengtherNext
10
- VERSION = '0.6.0'
10
+ VERSION = '0.6.1'
11
11
 
12
12
  FULL_LENGHTER_VERSION = VERSION
13
13
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: full_lengther_next
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.6.0
4
+ version: 0.6.1
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2016-04-14 00:00:00.000000000 Z
12
+ date: 2016-05-03 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: xml-simple