full_lengther_next 0.6.1 → 0.6.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/bin/download_fln_dbs.rb +6 -5
- data/bin/make_user_db.rb +2 -2
- data/lib/full_lengther_next.rb +1 -1
- data/lib/full_lengther_next/classes/handle_db.rb +25 -11
- metadata +2 -2
data/bin/download_fln_dbs.rb
CHANGED
@@ -125,7 +125,7 @@ def filter_and_makeDB(formatted_db_path, dbtype, db_group, isoform_hash, prefix,
|
|
125
125
|
puts 'Building ' + file_name
|
126
126
|
fasta = File.join(formatted_db_path,"#{file_name}","#{file_name}.fasta")
|
127
127
|
blastdb_input = fasta.gsub('.fasta', '')
|
128
|
-
current_db_source = File.join(formatted_db_path, "uniprot_#{dbtype}_#{db_group}.dat")
|
128
|
+
current_db_source = File.join(formatted_db_path, "uniprot_#{dbtype}_#{db_group}.dat.gz")
|
129
129
|
if File.exists?(current_db_source)
|
130
130
|
seqs = filter_incomplete_seqs(current_db_source, isoform_hash, formatted_db_path, file_name, options)
|
131
131
|
if !options[:only_index]
|
@@ -267,7 +267,8 @@ def filter_incomplete_seqs(file_name, isoform_hash, formatted_db_path, db_name,
|
|
267
267
|
main_name = File.join(db_folder, db_name)
|
268
268
|
index = File.open(main_name + '.index', 'w') if !options[:all]
|
269
269
|
seqs = ''
|
270
|
-
Bio::FlatFile.auto(file_name).each_entry {|uniprot_record|
|
270
|
+
#Bio::FlatFile.auto(file_name).each_entry {|uniprot_record|
|
271
|
+
Bio::FlatFile.auto(IO.popen("gzip -dc #{file_name}")).each_entry {|uniprot_record|
|
271
272
|
if !options[:all] && !complete?(uniprot_record)
|
272
273
|
next
|
273
274
|
else #Get attributes of full length records
|
@@ -292,7 +293,7 @@ end
|
|
292
293
|
options = {}
|
293
294
|
|
294
295
|
divs = %w{fungi invertebrates mammals plants rodents vertebrates}
|
295
|
-
all_divs = %w{human fungi invertebrates mammals plants rodents vertebrates}
|
296
|
+
all_divs = %w{human fungi invertebrates mammals plants rodents vertebrates archaea viruses unclassified}
|
296
297
|
|
297
298
|
optparse = OptionParser.new do |opts|
|
298
299
|
options[:uniprot_div] = divs
|
@@ -380,12 +381,12 @@ download_ncrna(formatted_db_path, options[:no_download]) if !options[:no_ncrna]
|
|
380
381
|
if !options[:no_download]
|
381
382
|
if !options[:no_uniprot]
|
382
383
|
conecta_uniprot(options[:uniprot_div], formatted_db_path)
|
383
|
-
system('gunzip '+formatted_db_path+'*.gz')
|
384
|
+
#system('gunzip '+formatted_db_path+'*.gz')
|
384
385
|
end
|
385
386
|
end
|
386
387
|
|
387
388
|
if !options[:no_uniprot]
|
388
|
-
isoform_hash = load_isoform_hash(File.join(formatted_db_path, "uniprot_sprot_varsplic.fasta")) #archivo de variantes de splicing. POR QUE?
|
389
|
+
isoform_hash = load_isoform_hash(File.join(formatted_db_path, "uniprot_sprot_varsplic.fasta.gz")) #archivo de variantes de splicing. POR QUE?
|
389
390
|
options[:uniprot_div].each do |db_group|
|
390
391
|
filter_and_makeDB(formatted_db_path, 'sprot', db_group, isoform_hash, 'sp', options)
|
391
392
|
filter_and_makeDB(formatted_db_path, 'trembl', db_group, nil, 'tr', options) if !options[:no_trembl]
|
data/bin/make_user_db.rb
CHANGED
@@ -64,7 +64,7 @@ optparse = OptionParser.new do |opts|
|
|
64
64
|
end
|
65
65
|
|
66
66
|
options[:user_fasta] = nil
|
67
|
-
opts.on( '-f', '--user_fasta FILE', 'Use a
|
67
|
+
opts.on( '-f', '--user_fasta FILE', 'Use a custom fasta file to build the user database') do |file|
|
68
68
|
options[:user_fasta] = file
|
69
69
|
end
|
70
70
|
|
@@ -119,7 +119,7 @@ output_file_path.gsub!(' ', '_')
|
|
119
119
|
|
120
120
|
seqs = ''
|
121
121
|
if options[:user_fasta].nil?
|
122
|
-
isoform_hash = load_isoform_hash(File.join(formatted_db_path, 'uniprot_sprot_varsplic.fasta'))
|
122
|
+
isoform_hash = load_isoform_hash(File.join(formatted_db_path, 'uniprot_sprot_varsplic.fasta.gz'))
|
123
123
|
seqs = get_seqs(File.join(formatted_db_path, 'sp_' + options[:uniprot_div],"sp_#{options[:uniprot_div]}.index"), options[:taxon], isoform_hash)
|
124
124
|
isoform_hash = nil
|
125
125
|
seqs << get_seqs(File.join(formatted_db_path, 'tr_' + options[:uniprot_div],"tr_#{options[:uniprot_div]}.index"), options[:taxon], isoform_hash)
|
data/lib/full_lengther_next.rb
CHANGED
@@ -1,23 +1,37 @@
|
|
1
|
-
require '
|
2
|
-
|
1
|
+
require 'scbi_zcat'
|
3
2
|
|
4
3
|
def load_isoform_hash(file)
|
5
4
|
isoform_hash = {}
|
6
5
|
if File.exists?(file)
|
7
|
-
fasta =
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
6
|
+
fasta = ScbiZcatFile.new(file)
|
7
|
+
filtered_fasta = ''
|
8
|
+
seq_name = nil
|
9
|
+
seq = ''
|
10
|
+
while !fasta.eof
|
11
|
+
line = fasta.readline.chomp
|
12
|
+
if line[0] == '>'
|
13
|
+
load_seq_in_hash(seq_name, seq, isoform_hash) if !seq_name.nil?
|
14
|
+
seq_name = line
|
15
|
+
seq = ''
|
16
|
+
else
|
17
|
+
seq << line
|
18
|
+
end
|
15
19
|
end
|
16
|
-
|
20
|
+
load_seq_in_hash(seq_name, seq, isoform_hash)
|
17
21
|
end
|
18
22
|
return isoform_hash
|
19
23
|
end
|
20
24
|
|
25
|
+
def load_seq_in_hash(seq_name, seq, isoform_hash)
|
26
|
+
name, desc = seq_name.split(' ', 2)
|
27
|
+
name =~ /(\w+\|(\w+)\-\d+\|)/
|
28
|
+
if isoform_hash[$2].nil?
|
29
|
+
isoform_hash[$2] = ">#{$1}#{desc}\n#{seq}"
|
30
|
+
else
|
31
|
+
isoform_hash[$2] += "\n>#{$1}#{desc}\n#{seq}"
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
21
35
|
def do_makeblastdb(seqs, output, dbtype)
|
22
36
|
cmd="makeblastdb -in - -out #{output} -title #{File.basename(output)} -dbtype #{dbtype} -parse_seqids"
|
23
37
|
IO.popen(cmd,'w+') {|makedb|
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: full_lengther_next
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.6.
|
4
|
+
version: 0.6.2
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2016-05-
|
12
|
+
date: 2016-05-04 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: xml-simple
|