full_lengther_next 0.6.0 → 0.6.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/bin/download_fln_dbs.rb +17 -11
- data/lib/full_lengther_next.rb +1 -1
- metadata +2 -2
data/bin/download_fln_dbs.rb
CHANGED
@@ -23,7 +23,7 @@ def download_ncrna(formatted_db_path, no_download)
|
|
23
23
|
ncrna_zip = File.join(formatted_db_path, 'ncrna.gz')
|
24
24
|
db_path = File.join(formatted_db_path, 'nc_rna_db')
|
25
25
|
db_files = File.join(db_path, 'ncrna')
|
26
|
-
fasta = File.join(db_path
|
26
|
+
fasta = File.join(db_path, 'filtered.fasta')
|
27
27
|
if !no_download
|
28
28
|
puts "Downloading ncRNA database"
|
29
29
|
open(ncrna_zip, 'wb') do |my_file|
|
@@ -34,20 +34,23 @@ def download_ncrna(formatted_db_path, no_download)
|
|
34
34
|
end
|
35
35
|
|
36
36
|
if File.exists?(ncrna_zip)
|
37
|
+
puts "\nFiltering ncRNA database"
|
37
38
|
Dir.mkdir(db_path) if !File.exists?(db_path)
|
38
39
|
black_list = [' 16S ', 'rRNA', 'ribosomal', 'tRNA', 'rrn'] #rrn = ribosonal rna
|
39
|
-
filtered_fasta =
|
40
|
-
puts "\nncRNA database decompressed and cleaned"
|
40
|
+
filtered_fasta = filtering_seqs(ncrna_zip, 40, black_list)
|
41
41
|
#do_makeblastdb(filtered_fasta, db_files, 'nucl')
|
42
42
|
output_file = File.open(fasta, 'w')
|
43
43
|
output_file.puts filtered_fasta
|
44
44
|
output_file.close
|
45
|
-
|
45
|
+
puts "\nncRNA database filtered"
|
46
|
+
puts "\nncRNA database removing redundance with cdhit and creating BlastDb"
|
47
|
+
cmd = "cd-hit-est -i #{fasta} -o /dev/stderr -c 0.95 -n 11 -M 0 2>&1 >/dev/null | makeblastdb -in - -out #{db_files} -title #{File.basename(db_files)} -dbtype 'nucl' -parse_seqids"
|
48
|
+
system(cmd)
|
46
49
|
puts "\nncRNA database completed"
|
47
50
|
end
|
48
51
|
end
|
49
52
|
|
50
|
-
def
|
53
|
+
def filtering_seqs(fasta_file, max_length, black_list)
|
51
54
|
fasta = ScbiZcatFile.new(fasta_file)
|
52
55
|
filtered_fasta = ''
|
53
56
|
seq_name = nil
|
@@ -288,13 +291,14 @@ end
|
|
288
291
|
|
289
292
|
options = {}
|
290
293
|
|
291
|
-
divs = %w{
|
294
|
+
divs = %w{fungi invertebrates mammals plants rodents vertebrates}
|
295
|
+
all_divs = %w{human fungi invertebrates mammals plants rodents vertebrates}
|
292
296
|
|
293
297
|
optparse = OptionParser.new do |opts|
|
294
298
|
options[:uniprot_div] = divs
|
295
299
|
opts.on( '-u', '--file String', 'Uniprot DBs to be downloaded. String structure: \'div_name1,div_name2..\'. Posible options: human, fungi, invertebrates, mammals, plants, rodents, vertebrates. Default: download all') do |uniprot_div|
|
296
300
|
temp_divs = uniprot_div.split(',')
|
297
|
-
check_valid_ids = temp_divs -
|
301
|
+
check_valid_ids = temp_divs - all_divs
|
298
302
|
if !check_valid_ids.empty?
|
299
303
|
puts 'This uniprot division not exists', check_valid_ids
|
300
304
|
process.exit
|
@@ -314,7 +318,7 @@ optparse = OptionParser.new do |opts|
|
|
314
318
|
end
|
315
319
|
|
316
320
|
options[:only_index] = FALSE
|
317
|
-
opts.on( '-i', '--only_index', 'Build annotation index only without blast DB') do
|
321
|
+
opts.on( '-i', '--only_index', 'Build annotation index only without do blast DB') do
|
318
322
|
options[:only_index] = TRUE
|
319
323
|
end
|
320
324
|
|
@@ -333,7 +337,7 @@ optparse = OptionParser.new do |opts|
|
|
333
337
|
options[:cdhit] = cdhit.to_f
|
334
338
|
end
|
335
339
|
|
336
|
-
|
340
|
+
options[:no_uniprot] = FALSE
|
337
341
|
opts.on( '-p', '--no_uniprot', 'No use uniprot sequences') do
|
338
342
|
options[:no_uniprot] = TRUE
|
339
343
|
end
|
@@ -374,8 +378,10 @@ puts "\nTo set the path for storing databases, execute next line in your termina
|
|
374
378
|
download_ncrna(formatted_db_path, options[:no_download]) if !options[:no_ncrna]
|
375
379
|
|
376
380
|
if !options[:no_download]
|
377
|
-
|
378
|
-
|
381
|
+
if !options[:no_uniprot]
|
382
|
+
conecta_uniprot(options[:uniprot_div], formatted_db_path)
|
383
|
+
system('gunzip '+formatted_db_path+'*.gz')
|
384
|
+
end
|
379
385
|
end
|
380
386
|
|
381
387
|
if !options[:no_uniprot]
|
data/lib/full_lengther_next.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: full_lengther_next
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.6.
|
4
|
+
version: 0.6.1
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2016-
|
12
|
+
date: 2016-05-03 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: xml-simple
|