full_lengther_next 0.6.0 → 0.6.1
Sign up to get free protection for your applications and to get access to all the features.
- data/bin/download_fln_dbs.rb +17 -11
- data/lib/full_lengther_next.rb +1 -1
- metadata +2 -2
data/bin/download_fln_dbs.rb
CHANGED
@@ -23,7 +23,7 @@ def download_ncrna(formatted_db_path, no_download)
|
|
23
23
|
ncrna_zip = File.join(formatted_db_path, 'ncrna.gz')
|
24
24
|
db_path = File.join(formatted_db_path, 'nc_rna_db')
|
25
25
|
db_files = File.join(db_path, 'ncrna')
|
26
|
-
fasta = File.join(db_path
|
26
|
+
fasta = File.join(db_path, 'filtered.fasta')
|
27
27
|
if !no_download
|
28
28
|
puts "Downloading ncRNA database"
|
29
29
|
open(ncrna_zip, 'wb') do |my_file|
|
@@ -34,20 +34,23 @@ def download_ncrna(formatted_db_path, no_download)
|
|
34
34
|
end
|
35
35
|
|
36
36
|
if File.exists?(ncrna_zip)
|
37
|
+
puts "\nFiltering ncRNA database"
|
37
38
|
Dir.mkdir(db_path) if !File.exists?(db_path)
|
38
39
|
black_list = [' 16S ', 'rRNA', 'ribosomal', 'tRNA', 'rrn'] #rrn = ribosonal rna
|
39
|
-
filtered_fasta =
|
40
|
-
puts "\nncRNA database decompressed and cleaned"
|
40
|
+
filtered_fasta = filtering_seqs(ncrna_zip, 40, black_list)
|
41
41
|
#do_makeblastdb(filtered_fasta, db_files, 'nucl')
|
42
42
|
output_file = File.open(fasta, 'w')
|
43
43
|
output_file.puts filtered_fasta
|
44
44
|
output_file.close
|
45
|
-
|
45
|
+
puts "\nncRNA database filtered"
|
46
|
+
puts "\nncRNA database removing redundance with cdhit and creating BlastDb"
|
47
|
+
cmd = "cd-hit-est -i #{fasta} -o /dev/stderr -c 0.95 -n 11 -M 0 2>&1 >/dev/null | makeblastdb -in - -out #{db_files} -title #{File.basename(db_files)} -dbtype 'nucl' -parse_seqids"
|
48
|
+
system(cmd)
|
46
49
|
puts "\nncRNA database completed"
|
47
50
|
end
|
48
51
|
end
|
49
52
|
|
50
|
-
def
|
53
|
+
def filtering_seqs(fasta_file, max_length, black_list)
|
51
54
|
fasta = ScbiZcatFile.new(fasta_file)
|
52
55
|
filtered_fasta = ''
|
53
56
|
seq_name = nil
|
@@ -288,13 +291,14 @@ end
|
|
288
291
|
|
289
292
|
options = {}
|
290
293
|
|
291
|
-
divs = %w{
|
294
|
+
divs = %w{fungi invertebrates mammals plants rodents vertebrates}
|
295
|
+
all_divs = %w{human fungi invertebrates mammals plants rodents vertebrates}
|
292
296
|
|
293
297
|
optparse = OptionParser.new do |opts|
|
294
298
|
options[:uniprot_div] = divs
|
295
299
|
opts.on( '-u', '--file String', 'Uniprot DBs to be downloaded. String structure: \'div_name1,div_name2..\'. Posible options: human, fungi, invertebrates, mammals, plants, rodents, vertebrates. Default: download all') do |uniprot_div|
|
296
300
|
temp_divs = uniprot_div.split(',')
|
297
|
-
check_valid_ids = temp_divs -
|
301
|
+
check_valid_ids = temp_divs - all_divs
|
298
302
|
if !check_valid_ids.empty?
|
299
303
|
puts 'This uniprot division not exists', check_valid_ids
|
300
304
|
process.exit
|
@@ -314,7 +318,7 @@ optparse = OptionParser.new do |opts|
|
|
314
318
|
end
|
315
319
|
|
316
320
|
options[:only_index] = FALSE
|
317
|
-
opts.on( '-i', '--only_index', 'Build annotation index only without blast DB') do
|
321
|
+
opts.on( '-i', '--only_index', 'Build annotation index only without do blast DB') do
|
318
322
|
options[:only_index] = TRUE
|
319
323
|
end
|
320
324
|
|
@@ -333,7 +337,7 @@ optparse = OptionParser.new do |opts|
|
|
333
337
|
options[:cdhit] = cdhit.to_f
|
334
338
|
end
|
335
339
|
|
336
|
-
|
340
|
+
options[:no_uniprot] = FALSE
|
337
341
|
opts.on( '-p', '--no_uniprot', 'No use uniprot sequences') do
|
338
342
|
options[:no_uniprot] = TRUE
|
339
343
|
end
|
@@ -374,8 +378,10 @@ puts "\nTo set the path for storing databases, execute next line in your termina
|
|
374
378
|
download_ncrna(formatted_db_path, options[:no_download]) if !options[:no_ncrna]
|
375
379
|
|
376
380
|
if !options[:no_download]
|
377
|
-
|
378
|
-
|
381
|
+
if !options[:no_uniprot]
|
382
|
+
conecta_uniprot(options[:uniprot_div], formatted_db_path)
|
383
|
+
system('gunzip '+formatted_db_path+'*.gz')
|
384
|
+
end
|
379
385
|
end
|
380
386
|
|
381
387
|
if !options[:no_uniprot]
|
data/lib/full_lengther_next.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: full_lengther_next
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.6.
|
4
|
+
version: 0.6.1
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2016-
|
12
|
+
date: 2016-05-03 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: xml-simple
|