full_lengther_next 0.6.0 → 0.6.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -23,7 +23,7 @@ def download_ncrna(formatted_db_path, no_download)
23
23
  ncrna_zip = File.join(formatted_db_path, 'ncrna.gz')
24
24
  db_path = File.join(formatted_db_path, 'nc_rna_db')
25
25
  db_files = File.join(db_path, 'ncrna')
26
- fasta = File.join(db_path , 'filtered.fasta')
26
+ fasta = File.join(db_path, 'filtered.fasta')
27
27
  if !no_download
28
28
  puts "Downloading ncRNA database"
29
29
  open(ncrna_zip, 'wb') do |my_file|
@@ -34,20 +34,23 @@ def download_ncrna(formatted_db_path, no_download)
34
34
  end
35
35
 
36
36
  if File.exists?(ncrna_zip)
37
+ puts "\nFiltering ncRNA database"
37
38
  Dir.mkdir(db_path) if !File.exists?(db_path)
38
39
  black_list = [' 16S ', 'rRNA', 'ribosomal', 'tRNA', 'rrn'] #rrn = ribosonal rna
39
- filtered_fasta = filtering_ncbi_seqs(ncrna_zip, 40, black_list)
40
- puts "\nncRNA database decompressed and cleaned"
40
+ filtered_fasta = filtering_seqs(ncrna_zip, 40, black_list)
41
41
  #do_makeblastdb(filtered_fasta, db_files, 'nucl')
42
42
  output_file = File.open(fasta, 'w')
43
43
  output_file.puts filtered_fasta
44
44
  output_file.close
45
- system("cd-hit -i #{fasta} -o /dev/stderr -c 0.95 -n 11 -M 0 2>&1 >/dev/null | makeblastdb -in - -out #{db_files} -title #{File.basename(db_files)} -dbtype 'nucl' -parse_seqids")
45
+ puts "\nncRNA database filtered"
46
+ puts "\nncRNA database removing redundance with cdhit and creating BlastDb"
47
+ cmd = "cd-hit-est -i #{fasta} -o /dev/stderr -c 0.95 -n 11 -M 0 2>&1 >/dev/null | makeblastdb -in - -out #{db_files} -title #{File.basename(db_files)} -dbtype 'nucl' -parse_seqids"
48
+ system(cmd)
46
49
  puts "\nncRNA database completed"
47
50
  end
48
51
  end
49
52
 
50
- def filtering_ncbi_seqs(fasta_file, max_length, black_list)
53
+ def filtering_seqs(fasta_file, max_length, black_list)
51
54
  fasta = ScbiZcatFile.new(fasta_file)
52
55
  filtered_fasta = ''
53
56
  seq_name = nil
@@ -288,13 +291,14 @@ end
288
291
 
289
292
  options = {}
290
293
 
291
- divs = %w{human fungi invertebrates mammals plants rodents vertebrates}
294
+ divs = %w{fungi invertebrates mammals plants rodents vertebrates}
295
+ all_divs = %w{human fungi invertebrates mammals plants rodents vertebrates}
292
296
 
293
297
  optparse = OptionParser.new do |opts|
294
298
  options[:uniprot_div] = divs
295
299
  opts.on( '-u', '--file String', 'Uniprot DBs to be downloaded. String structure: \'div_name1,div_name2..\'. Posible options: human, fungi, invertebrates, mammals, plants, rodents, vertebrates. Default: download all') do |uniprot_div|
296
300
  temp_divs = uniprot_div.split(',')
297
- check_valid_ids = temp_divs - divs
301
+ check_valid_ids = temp_divs - all_divs
298
302
  if !check_valid_ids.empty?
299
303
  puts 'This uniprot division not exists', check_valid_ids
300
304
  process.exit
@@ -314,7 +318,7 @@ optparse = OptionParser.new do |opts|
314
318
  end
315
319
 
316
320
  options[:only_index] = FALSE
317
- opts.on( '-i', '--only_index', 'Build annotation index only without blast DB') do
321
+ opts.on( '-i', '--only_index', 'Build annotation index only without do blast DB') do
318
322
  options[:only_index] = TRUE
319
323
  end
320
324
 
@@ -333,7 +337,7 @@ optparse = OptionParser.new do |opts|
333
337
  options[:cdhit] = cdhit.to_f
334
338
  end
335
339
 
336
- options[:no_uniprot] = FALSE
340
+ options[:no_uniprot] = FALSE
337
341
  opts.on( '-p', '--no_uniprot', 'No use uniprot sequences') do
338
342
  options[:no_uniprot] = TRUE
339
343
  end
@@ -374,8 +378,10 @@ puts "\nTo set the path for storing databases, execute next line in your termina
374
378
  download_ncrna(formatted_db_path, options[:no_download]) if !options[:no_ncrna]
375
379
 
376
380
  if !options[:no_download]
377
- conecta_uniprot(options[:uniprot_div], formatted_db_path)
378
- system('gunzip '+formatted_db_path+'*.gz')
381
+ if !options[:no_uniprot]
382
+ conecta_uniprot(options[:uniprot_div], formatted_db_path)
383
+ system('gunzip '+formatted_db_path+'*.gz')
384
+ end
379
385
  end
380
386
 
381
387
  if !options[:no_uniprot]
@@ -7,7 +7,7 @@ $: << File.expand_path(File.join(File.dirname(__FILE__), 'full_lengther_next', '
7
7
 
8
8
 
9
9
  module FullLengtherNext
10
- VERSION = '0.6.0'
10
+ VERSION = '0.6.1'
11
11
 
12
12
  FULL_LENGHTER_VERSION = VERSION
13
13
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: full_lengther_next
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.6.0
4
+ version: 0.6.1
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2016-04-14 00:00:00.000000000 Z
12
+ date: 2016-05-03 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: xml-simple