full_lengther_next 0.6.1 → 0.6.2
Sign up to get free protection for your applications and to get access to all the features.
- data/bin/download_fln_dbs.rb +6 -5
- data/bin/make_user_db.rb +2 -2
- data/lib/full_lengther_next.rb +1 -1
- data/lib/full_lengther_next/classes/handle_db.rb +25 -11
- metadata +2 -2
data/bin/download_fln_dbs.rb
CHANGED
@@ -125,7 +125,7 @@ def filter_and_makeDB(formatted_db_path, dbtype, db_group, isoform_hash, prefix,
|
|
125
125
|
puts 'Building ' + file_name
|
126
126
|
fasta = File.join(formatted_db_path,"#{file_name}","#{file_name}.fasta")
|
127
127
|
blastdb_input = fasta.gsub('.fasta', '')
|
128
|
-
current_db_source = File.join(formatted_db_path, "uniprot_#{dbtype}_#{db_group}.dat")
|
128
|
+
current_db_source = File.join(formatted_db_path, "uniprot_#{dbtype}_#{db_group}.dat.gz")
|
129
129
|
if File.exists?(current_db_source)
|
130
130
|
seqs = filter_incomplete_seqs(current_db_source, isoform_hash, formatted_db_path, file_name, options)
|
131
131
|
if !options[:only_index]
|
@@ -267,7 +267,8 @@ def filter_incomplete_seqs(file_name, isoform_hash, formatted_db_path, db_name,
|
|
267
267
|
main_name = File.join(db_folder, db_name)
|
268
268
|
index = File.open(main_name + '.index', 'w') if !options[:all]
|
269
269
|
seqs = ''
|
270
|
-
Bio::FlatFile.auto(file_name).each_entry {|uniprot_record|
|
270
|
+
#Bio::FlatFile.auto(file_name).each_entry {|uniprot_record|
|
271
|
+
Bio::FlatFile.auto(IO.popen("gzip -dc #{file_name}")).each_entry {|uniprot_record|
|
271
272
|
if !options[:all] && !complete?(uniprot_record)
|
272
273
|
next
|
273
274
|
else #Get attributes of full length records
|
@@ -292,7 +293,7 @@ end
|
|
292
293
|
options = {}
|
293
294
|
|
294
295
|
divs = %w{fungi invertebrates mammals plants rodents vertebrates}
|
295
|
-
all_divs = %w{human fungi invertebrates mammals plants rodents vertebrates}
|
296
|
+
all_divs = %w{human fungi invertebrates mammals plants rodents vertebrates archaea viruses unclassified}
|
296
297
|
|
297
298
|
optparse = OptionParser.new do |opts|
|
298
299
|
options[:uniprot_div] = divs
|
@@ -380,12 +381,12 @@ download_ncrna(formatted_db_path, options[:no_download]) if !options[:no_ncrna]
|
|
380
381
|
if !options[:no_download]
|
381
382
|
if !options[:no_uniprot]
|
382
383
|
conecta_uniprot(options[:uniprot_div], formatted_db_path)
|
383
|
-
system('gunzip '+formatted_db_path+'*.gz')
|
384
|
+
#system('gunzip '+formatted_db_path+'*.gz')
|
384
385
|
end
|
385
386
|
end
|
386
387
|
|
387
388
|
if !options[:no_uniprot]
|
388
|
-
isoform_hash = load_isoform_hash(File.join(formatted_db_path, "uniprot_sprot_varsplic.fasta")) #archivo de variantes de splicing. POR QUE?
|
389
|
+
isoform_hash = load_isoform_hash(File.join(formatted_db_path, "uniprot_sprot_varsplic.fasta.gz")) #archivo de variantes de splicing. POR QUE?
|
389
390
|
options[:uniprot_div].each do |db_group|
|
390
391
|
filter_and_makeDB(formatted_db_path, 'sprot', db_group, isoform_hash, 'sp', options)
|
391
392
|
filter_and_makeDB(formatted_db_path, 'trembl', db_group, nil, 'tr', options) if !options[:no_trembl]
|
data/bin/make_user_db.rb
CHANGED
@@ -64,7 +64,7 @@ optparse = OptionParser.new do |opts|
|
|
64
64
|
end
|
65
65
|
|
66
66
|
options[:user_fasta] = nil
|
67
|
-
opts.on( '-f', '--user_fasta FILE', 'Use a
|
67
|
+
opts.on( '-f', '--user_fasta FILE', 'Use a custom fasta file to build the user database') do |file|
|
68
68
|
options[:user_fasta] = file
|
69
69
|
end
|
70
70
|
|
@@ -119,7 +119,7 @@ output_file_path.gsub!(' ', '_')
|
|
119
119
|
|
120
120
|
seqs = ''
|
121
121
|
if options[:user_fasta].nil?
|
122
|
-
isoform_hash = load_isoform_hash(File.join(formatted_db_path, 'uniprot_sprot_varsplic.fasta'))
|
122
|
+
isoform_hash = load_isoform_hash(File.join(formatted_db_path, 'uniprot_sprot_varsplic.fasta.gz'))
|
123
123
|
seqs = get_seqs(File.join(formatted_db_path, 'sp_' + options[:uniprot_div],"sp_#{options[:uniprot_div]}.index"), options[:taxon], isoform_hash)
|
124
124
|
isoform_hash = nil
|
125
125
|
seqs << get_seqs(File.join(formatted_db_path, 'tr_' + options[:uniprot_div],"tr_#{options[:uniprot_div]}.index"), options[:taxon], isoform_hash)
|
data/lib/full_lengther_next.rb
CHANGED
@@ -1,23 +1,37 @@
|
|
1
|
-
require '
|
2
|
-
|
1
|
+
require 'scbi_zcat'
|
3
2
|
|
4
3
|
def load_isoform_hash(file)
|
5
4
|
isoform_hash = {}
|
6
5
|
if File.exists?(file)
|
7
|
-
fasta =
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
6
|
+
fasta = ScbiZcatFile.new(file)
|
7
|
+
filtered_fasta = ''
|
8
|
+
seq_name = nil
|
9
|
+
seq = ''
|
10
|
+
while !fasta.eof
|
11
|
+
line = fasta.readline.chomp
|
12
|
+
if line[0] == '>'
|
13
|
+
load_seq_in_hash(seq_name, seq, isoform_hash) if !seq_name.nil?
|
14
|
+
seq_name = line
|
15
|
+
seq = ''
|
16
|
+
else
|
17
|
+
seq << line
|
18
|
+
end
|
15
19
|
end
|
16
|
-
|
20
|
+
load_seq_in_hash(seq_name, seq, isoform_hash)
|
17
21
|
end
|
18
22
|
return isoform_hash
|
19
23
|
end
|
20
24
|
|
25
|
+
def load_seq_in_hash(seq_name, seq, isoform_hash)
|
26
|
+
name, desc = seq_name.split(' ', 2)
|
27
|
+
name =~ /(\w+\|(\w+)\-\d+\|)/
|
28
|
+
if isoform_hash[$2].nil?
|
29
|
+
isoform_hash[$2] = ">#{$1}#{desc}\n#{seq}"
|
30
|
+
else
|
31
|
+
isoform_hash[$2] += "\n>#{$1}#{desc}\n#{seq}"
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
21
35
|
def do_makeblastdb(seqs, output, dbtype)
|
22
36
|
cmd="makeblastdb -in - -out #{output} -title #{File.basename(output)} -dbtype #{dbtype} -parse_seqids"
|
23
37
|
IO.popen(cmd,'w+') {|makedb|
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: full_lengther_next
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.6.
|
4
|
+
version: 0.6.2
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2016-05-
|
12
|
+
date: 2016-05-04 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: xml-simple
|