bio-rocker 0.1.04 → 0.2.0alpha
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/bin/ROCker +11 -11
- data/lib/rocker.rb +59 -105
- data/lib/rocker/alignment.rb +8 -8
- metadata +19 -5
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 83c3e3b92659ce86b8bc1c2119b3d37db119b432
|
4
|
+
data.tar.gz: 0f27ba6ed5086baf64e99732dc86a4c7e31432d9
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: cc2045b7943455232a5d9e6a3d3f26f45c953bf31a6b480f6ce3a2c8ab37c1468d0d6ce511521c5afc52ee65dfecc66f9b6195ad17dce4db211c17cdc591ead7
|
7
|
+
data.tar.gz: c8bda317dda0da8eed23cd976eb6c848f866382e37f5c8315bbd365fe65f85f7319a60eb2c2d511461fca4e329af403c31ccd77a16c0a880ec720edfd9ac269f
|
data/bin/ROCker
CHANGED
@@ -3,9 +3,10 @@
|
|
3
3
|
# @author Luis M. Rodriguez-R <lmrodriguezr at gmail dot com>
|
4
4
|
# @author Luis (Coto) Orellana
|
5
5
|
# @license artistic license 2.0
|
6
|
-
# @update
|
6
|
+
# @update May-07-2015
|
7
7
|
#
|
8
8
|
|
9
|
+
$:.push File.expand_path(File.dirname(__FILE__) + '/../lib')
|
9
10
|
require 'rocker'
|
10
11
|
require 'optparse'
|
11
12
|
|
@@ -35,25 +36,24 @@ opts = OptionParser.new do |opt|
|
|
35
36
|
opt.separator "+ UNSATISFIED REQUIREMENTS"
|
36
37
|
opt.separator " The building task requires uninstalled gems, please install them executing:"
|
37
38
|
opt.separator " gem install rest_client"
|
38
|
-
opt.separator " gem install nokogiri"
|
39
39
|
opt.separator ""
|
40
40
|
end
|
41
41
|
opt.separator "+ BUILDING ARGUMENTS"
|
42
|
-
opt.on("-p", "--positive
|
43
|
-
opt.on("-n", "--negative
|
42
|
+
opt.on("-p", "--positive ID1,ID2,ID3", Array, "Comma-separated list of UniProtKB IDs corresponding to the 'positive' training set. Required unless -P or -a are used."){ |v| o[:posori]=v }
|
43
|
+
opt.on("-n", "--negative ID1,ID2,ID3", Array, "Comma-separated list of UniProtKB IDs corresponding to the 'negative' training set. See also -N."){ |v| o[:negative]=v }
|
44
44
|
opt.on("-o", "--baseout PATH", "Prefix for the output files to be generated. Required."){ |v| o[:baseout]=v }
|
45
|
-
opt.on( "--nucleotides", "If set, it assumes that the input sequences are in nucleotides
|
45
|
+
#opt.on( "--nucleotides", "If set, it assumes that the input sequences are in nucleotides (currently not implemented)."){ raise "--nucleotides: This option is currently not implemented." }
|
46
46
|
opt.on("-t", "--threads INT", "Number of threads to use. By default: #{ROCker.default :thr}."){ |v| o[:thr]=v.to_i }
|
47
47
|
opt.separator ""
|
48
48
|
opt.separator "+ ADVANCED BUILDING ARGUMENTS"
|
49
|
-
opt.on("-P", "--positive-file PATH", "File containing the positive set (see -p), one
|
50
|
-
opt.on("-N", "--negative-file PATH", "File containing the negative set (see -n), one
|
51
|
-
opt.on("-a", "--alignment PATH", "Protein alignment of the reference sequences. The defline must contain
|
49
|
+
opt.on("-P", "--positive-file PATH", "File containing the positive set (see -p), one UniProtKB ID per line. If used, -p is not required."){ |v| o[:posfile]=v }
|
50
|
+
opt.on("-N", "--negative-file PATH", "File containing the negative set (see -n), one UniProtKB ID per line."){ |v| o[:negfile]=v }
|
51
|
+
opt.on("-a", "--alignment PATH", "Protein alignment of the reference sequences. The defline must contain UniProtKB ID. If used, -p is not required."){ |v| o[:aln]=v }
|
52
52
|
opt.on("-s", "--seqdepth NUMBER", "Sequencing depth to be used in building the in silico metagenome. By default: '#{ROCker.default :seqdepth}'."){ |v| o[:seqdepth]=v.to_f }
|
53
53
|
opt.on("-v", "--overlap NUMBER", "Minimum overlap with reference gene to tag a read as positive. By default: '#{ROCker.default :minovl}'."){ |v| o[:minovl]=v.to_f }
|
54
54
|
opt.on( "--genome-frx NUMBER", "Fraction to subsample the positive set genomes to generate the metagenome. By default: #{ROCker.default :genomefrx}"){ |v| o[:genomefrx]=v.to_f }
|
55
|
-
opt.on( "--per-
|
56
|
-
|
55
|
+
opt.on( "--per-taxon RANK", "If selected, only one genome per taxon is used to build the metagenome. Valid ranks include: species, genus, family, order, class, phylum.",
|
56
|
+
"This option replaces --per-genus and --per-species, but is temporarily out of service."){ |v| o[:pertaxon]=v.downcase }
|
57
57
|
opt.on( "--nometagenome", "Do not create metagenome. Implies --noblast. By default, metagenome is created."){ |v| o[:nomg]=v }
|
58
58
|
opt.on( "--noblast", "Do not execute BLAST. By default, BLAST is executed."){ |v| o[:noblast]=v }
|
59
59
|
opt.on( "--noalignment", "Do not align reference set. By default, references are aligned."){ |v| o[:noaln]=v }
|
@@ -78,7 +78,7 @@ opts = OptionParser.new do |opt|
|
|
78
78
|
opt.on("-b", "--ref-blast PATH",
|
79
79
|
"Tabular BLAST (blastx) of the test reads vs. the reference dataset. Required unless -t exists."){ |v| o[:blast]=v }
|
80
80
|
opt.on("-k", "--rocker PATH", "ROCker file to be created. Required."){ |v| o[:rocker]=v }
|
81
|
-
opt.on( "--nucleotides", "If set, it assumes that the input sequences are in nucleotides. By default, proteins are assumed."){
|
81
|
+
opt.on( "--nucleotides", "If set, it assumes that the input sequences are in nucleotides. By default, proteins are assumed."){ raise "--nucleotides: This option is currently not implemented." }
|
82
82
|
opt.separator ""
|
83
83
|
opt.separator "+ ADVANCED COMPILATION ARGUMENTS"
|
84
84
|
opt.on("-t", "--table PATH", "Formated tabular file to be created (or reused). Required unless -b is provided."){ |v| o[:table]=v }
|
data/lib/rocker.rb
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
# @author Luis M. Rodriguez-R <lmrodriguezr at gmail dot com>
|
3
3
|
# @author Luis (Coto) Orellana
|
4
4
|
# @license artistic license 2.0
|
5
|
-
# @update
|
5
|
+
# @update May-07-2015
|
6
6
|
#
|
7
7
|
|
8
8
|
require 'rocker/blasthit'
|
@@ -10,12 +10,12 @@ require 'rocker/rocdata'
|
|
10
10
|
|
11
11
|
class ROCker
|
12
12
|
#================================[ Class ]
|
13
|
-
@@
|
13
|
+
@@EBIREST = 'http://www.ebi.ac.uk/Tools'
|
14
14
|
@@DEFAULTS = {
|
15
15
|
# General
|
16
16
|
:q=>false, :r=>'R', :nucl=>false, :debug=>false,
|
17
17
|
# Build
|
18
|
-
:positive=>[], :negative=>[], :thr=>2,:genomefrx=>1.0,
|
18
|
+
:positive=>[], :negative=>[], :thr=>2,:genomefrx=>1.0,
|
19
19
|
# ext. software
|
20
20
|
:grinder=>'grinder', :muscle=>'muscle', :blastbins=>'', :seqdepth=>3, :minovl=>0.75,
|
21
21
|
:grindercmd=>'%1$s -reference_file "%2$s" -cf "%3$f" -base_name "%4$s" -dc \'-~*Nn\' -md "poly4 3e-3 3.3e-8" -mr "95 5" -rd "100 uniform 5"',
|
@@ -30,16 +30,15 @@ class ROCker
|
|
30
30
|
:color=>false, :gformat=>'pdf', :width=>9, :height=>9, :impact=>false, :transparency=>true,
|
31
31
|
}
|
32
32
|
@@HAS_BUILD_GEMS = nil
|
33
|
-
def self.
|
34
|
-
def self.defaults() @@DEFAULTS end
|
35
|
-
def self.default(k) @@DEFAULTS[k] end
|
33
|
+
def self.ebirest() @@EBIREST ; end
|
34
|
+
def self.defaults() @@DEFAULTS ; end
|
35
|
+
def self.default(k) @@DEFAULTS[k] ; end
|
36
36
|
def self.has_build_gems?
|
37
37
|
return @@HAS_BUILD_GEMS unless @@HAS_BUILD_GEMS.nil?
|
38
38
|
@@HAS_BUILD_GEMS = TRUE
|
39
39
|
begin
|
40
40
|
require 'rubygems'
|
41
41
|
require 'restclient'
|
42
|
-
require 'nokogiri'
|
43
42
|
rescue LoadError
|
44
43
|
@@HAS_BUILD_GEMS = FALSE
|
45
44
|
end
|
@@ -66,7 +65,7 @@ class ROCker
|
|
66
65
|
unless @o[:aln].nil?
|
67
66
|
aln = Alignment.new
|
68
67
|
aln.read_fasta @o[:aln]
|
69
|
-
@o[:positive] += aln.
|
68
|
+
@o[:positive] += aln.get_ids
|
70
69
|
end
|
71
70
|
raise "-p or -P are mandatory." if @o[:positive].size==0
|
72
71
|
raise "-o/--baseout is mandatory." if @o[:baseout].nil?
|
@@ -89,124 +88,76 @@ class ROCker
|
|
89
88
|
$stderr.puts " # #{@o[:positive]}" if @o[:debug]
|
90
89
|
ids = Array.new(@o[:positive])
|
91
90
|
while ids.size>0
|
92
|
-
f.print
|
91
|
+
f.print ebiFetch(:uniprotkb, ids.shift(200), :fasta)
|
93
92
|
end
|
94
93
|
end
|
95
94
|
f.close
|
96
|
-
|
95
|
+
genome_ids = {:positive=>[], :negative=>[]}
|
97
96
|
[:positive, :negative].each do |set|
|
98
97
|
unless @o[set].size==0
|
99
98
|
puts " * gathering genomes from #{@o[set].size} #{set.to_s} sequence(s)." unless @o[:q]
|
100
99
|
$stderr.puts " # #{@o[set]}" if @o[:debug]
|
101
|
-
|
100
|
+
genome_ids[set] = genes2genomes(@o[set])
|
102
101
|
end
|
103
102
|
end
|
104
|
-
raise "No genomes associated with the positive set." if
|
105
|
-
|
106
|
-
raise "No positive genomes selected for metagenome construction, is --genome-frx too small?" if
|
107
|
-
|
103
|
+
raise "No genomes associated with the positive set." if genome_ids[:positive].size==0
|
104
|
+
genome_ids[:positive] = genome_ids[:positive].sample( (genome_ids[:positive].size*@o[:genomefrx]).round ) if @o[:genomefrx]
|
105
|
+
raise "No positive genomes selected for metagenome construction, is --genome-frx too small?" if genome_ids[:positive].empty?
|
106
|
+
all_genome_ids = genome_ids.values.reduce(:+).uniq
|
108
107
|
|
109
108
|
# Locate genes
|
110
109
|
puts "Analyzing genome data." unless @o[:q]
|
111
|
-
puts " * downloading and parsing #{
|
112
|
-
$stderr.puts " # #{
|
110
|
+
puts " * downloading and parsing #{genome_ids[:positive].size} GFF3 document(s)." unless @o[:q]
|
111
|
+
$stderr.puts " # #{genome_ids[:positive]}" if @o[:debug]
|
113
112
|
positive_coords = {}
|
114
113
|
genome_org = {}
|
115
114
|
i = 0
|
116
|
-
|
117
|
-
print " * scanning #{(i+=1).ordinalize} genome out of #{
|
115
|
+
genome_ids[:positive].each do |genome_id|
|
116
|
+
print " * scanning #{(i+=1).ordinalize} genome out of #{genome_ids[:positive].size}. \r" unless @o[:q]
|
117
|
+
# ToDo check organism name using genome_org unless @o[:pertaxon].nil?
|
118
118
|
$stderr.puts " # Looking for any of #{@o[:positive]}" if @o[:debug]
|
119
|
-
genome_file = @o[:baseout] + '.src.' + i.to_s + '.
|
119
|
+
genome_file = @o[:baseout] + '.src.' + i.to_s + '.gff3'
|
120
120
|
if @o[:reuse] and File.exist? genome_file
|
121
121
|
puts " * reusing existing file: #{genome_file}." unless @o[:q]
|
122
122
|
ifh = File.open(genome_file, 'r')
|
123
|
-
doc =
|
123
|
+
doc = ifh.readlines.grep(/^[^#]/)
|
124
124
|
ifh.close
|
125
125
|
else
|
126
126
|
genome_file=nil unless @o[:noclean]
|
127
|
-
res =
|
128
|
-
doc =
|
127
|
+
res = ebiFetch(:embl, [genome_id], :gff3, genome_file)
|
128
|
+
doc = res.split("\n").grep(/^[^#]/)
|
129
129
|
end
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
if name_g.nil? or (name_s.nil? and @o[:perspecies])
|
143
|
-
name = nil
|
144
|
-
else
|
145
|
-
name = @o[:perspecies] ? name_g.content + " " + name_s.content : name_g.content
|
146
|
-
end
|
147
|
-
end
|
148
|
-
if name.nil?
|
149
|
-
warn "WARNING: Cannot find binomial name of #{gi}, using genome regardless of taxonomy."
|
150
|
-
name = rand(36**100).to_s(36)
|
151
|
-
end
|
152
|
-
break unless genome_org[ name ].nil?
|
153
|
-
genome_org[ name ] = gi
|
154
|
-
end
|
155
|
-
$stderr.puts " # traversing #{gi}" if @o[:debug]
|
156
|
-
genome.xpath('./Seq-entry_set/Bioseq-set/Bioseq-set_annot/Seq-annot/Seq-annot_data/Seq-annot_data_ftable/Seq-feat').each do |pr|
|
157
|
-
pr_gi = pr.at_xpath('./Seq-feat_product/Seq-loc/Seq-loc_whole/Seq-id/Seq-id_gi')
|
158
|
-
next if pr_gi.nil?
|
159
|
-
if @o[:positive].include? pr_gi.content
|
160
|
-
$stderr.puts " # found #{pr_gi.content}" if @o[:debug]
|
161
|
-
pr_loc = pr.at_xpath('./Seq-feat_location/Seq-loc/Seq-loc_int/Seq-interval')
|
162
|
-
if pr_loc.nil?
|
163
|
-
pr_loc = pr.xpath('./Seq-feat_location/Seq-loc/Seq-loc_mix//Seq-loc/Seq-loc_int/Seq-interval')
|
164
|
-
if pr_loc.nil?
|
165
|
-
warn "WARNING: Impossible to find location of '#{pr_gi.content}' in '#{gi}'."
|
166
|
-
incomplete = true
|
167
|
-
else
|
168
|
-
pr_loc.each do |loc_int|
|
169
|
-
positive_coords[gi] << {
|
170
|
-
:gi => pr_gi.content,
|
171
|
-
:from => loc_int.at_xpath('./Seq-interval_from').content.to_i,
|
172
|
-
:to => loc_int.at_xpath('./Seq-interval_to').content.to_i
|
173
|
-
#, :strand => loc_int.at_xpath('./Seq-interval_strand/Na-strand/@value').content
|
174
|
-
}
|
175
|
-
end
|
176
|
-
end
|
177
|
-
else
|
178
|
-
positive_coords[gi] << {
|
179
|
-
:gi => pr_gi.content,
|
180
|
-
:from => pr_loc.at_xpath('./Seq-interval_from').content.to_i,
|
181
|
-
:to => pr_loc.at_xpath('./Seq-interval_to').content.to_i
|
182
|
-
#, :strand => pr_loc.at_xpath('./Seq-interval_strand/Na-strand/@value').content
|
183
|
-
}
|
184
|
-
end
|
185
|
-
end
|
186
|
-
end
|
187
|
-
break
|
188
|
-
end
|
130
|
+
doc.each do |ln|
|
131
|
+
r = ln.chomp.split /\t/
|
132
|
+
prots = r[8].split(/;/).grep(/^db_xref=UniProtKB\/TrEMBL:/){ |xref| xref.split(/:/)[1] }
|
133
|
+
p = prots.select{ |p| @o[:positive].include? p }.first
|
134
|
+
next if p.nil?
|
135
|
+
positive_coords[ r[0] ] ||= []
|
136
|
+
positive_coords[ r[0] ] << {
|
137
|
+
#:strand => r[6],
|
138
|
+
:prot_id => p,
|
139
|
+
:from => r[3].to_i,
|
140
|
+
:to => r[4].to_i
|
141
|
+
}
|
189
142
|
end
|
190
|
-
doc = nil
|
191
|
-
warn "WARNING: Cannot find GI '#{gi}'." if incomplete
|
192
143
|
end
|
193
|
-
genome_gis[:positive] = genome_org.values if @o[:pergenus] or @o[:perspecies]
|
194
|
-
all_gis = genome_gis.values.reduce(:+).uniq
|
195
144
|
print "\n" unless @o[:q]
|
196
|
-
|
197
|
-
|
145
|
+
genome_ids[:positive] = genome_org.values unless @o[:pertaxon].nil?
|
146
|
+
all_genome_ids = genome_ids.values.reduce(:+).uniq
|
147
|
+
missing = @o[:positive] - positive_coords.values.map{ |a| a.map{ |b| b[:prot_id] } }.reduce(:+)
|
148
|
+
warn "\nWARNING: Cannot find genomic location of sequence(s) #{missing.join(',')}.\n\n" unless missing.size==0 or @o[:genomefrx]<1.0 or not @o[:pertaxon].nil?
|
198
149
|
|
199
150
|
# Download genomes
|
200
151
|
genomes_file = @o[:baseout] + '.src.fasta'
|
201
152
|
if @o[:reuse] and File.exist? genomes_file
|
202
153
|
puts " * reusing existing file: #{genomes_file}." unless @o[:q]
|
203
154
|
else
|
204
|
-
puts " * downloading #{
|
205
|
-
$stderr.puts " # #{
|
206
|
-
ids = Array.new(
|
155
|
+
puts " * downloading #{all_genome_ids.size} genome(s) in FastA." unless @o[:q]
|
156
|
+
$stderr.puts " # #{all_genome_ids}" if @o[:debug]
|
157
|
+
ids = Array.new(all_genome_ids)
|
207
158
|
ofh = File.open(genomes_file, 'w')
|
208
159
|
while ids.size>0
|
209
|
-
ofh.print
|
160
|
+
ofh.print ebiFetch('embl', ids.shift(200), 'fasta')
|
210
161
|
end
|
211
162
|
ofh.close
|
212
163
|
end
|
@@ -244,11 +195,11 @@ class ROCker
|
|
244
195
|
Thread.current[:ifh] = File.open(@o[:baseout] + ".mg.tmp.#{thr_i.to_s}-reads.fa", 'r')
|
245
196
|
Thread.current[:ofh] = File.open(@o[:baseout] + ".mg.fasta.#{thr_i.to_s}", 'w')
|
246
197
|
while Thread.current[:l]=Thread.current[:ifh].gets
|
247
|
-
Thread.current[:rd] = /^>(?<id>\d+) reference=
|
198
|
+
Thread.current[:rd] = /^>(?<id>\d+) reference=[A-Za-z]+\|(?<genome_id>[A-Za-z0-9_]+)\|.* position=(?<comp>complement\()?(?<from>\d+)\.\.(?<to>\d+)\)? /.match(Thread.current[:l])
|
248
199
|
unless Thread.current[:rd].nil?
|
249
200
|
Thread.current[:positive] = false
|
250
|
-
positive_coords[Thread.current[:rd][:
|
251
|
-
positive_coords[Thread.current[:rd][:
|
201
|
+
positive_coords[Thread.current[:rd][:genome_id]] ||= []
|
202
|
+
positive_coords[Thread.current[:rd][:genome_id]].each do |gn|
|
252
203
|
Thread.current[:left] = Thread.current[:rd][:to].to_i - gn[:from]
|
253
204
|
Thread.current[:right] = gn[:to] - Thread.current[:rd][:from].to_i
|
254
205
|
if (Thread.current[:left]*Thread.current[:right] >= 0) and ([Thread.current[:left], Thread.current[:right]].min/(Thread.current[:rd][:to].to_i-Thread.current[:rd][:from].to_i) >= @o[:minovl])
|
@@ -256,7 +207,7 @@ class ROCker
|
|
256
207
|
break
|
257
208
|
end
|
258
209
|
end
|
259
|
-
Thread.current[:l] = ">#{Thread.current[:rd][:id]}#{Thread.current[:positive] ? "@%" : ""} ref=#{Thread.current[:rd][:
|
210
|
+
Thread.current[:l] = ">#{Thread.current[:rd][:id]}#{Thread.current[:positive] ? "@%" : ""} ref=#{Thread.current[:rd][:genome_id]}:#{Thread.current[:rd][:from]}..#{Thread.current[:rd][:to]}#{(Thread.current[:rd][:comp]=='complement(')?'-':'+'}\n"
|
260
211
|
end
|
261
212
|
Thread.current[:ofh].print Thread.current[:l]
|
262
213
|
end
|
@@ -470,18 +421,18 @@ class ROCker
|
|
470
421
|
ifh.close
|
471
422
|
ofh.close
|
472
423
|
end
|
473
|
-
def genes2genomes(
|
424
|
+
def genes2genomes(gene_ids)
|
474
425
|
genomes = []
|
475
|
-
ids = Array.new(
|
426
|
+
ids = Array.new(gene_ids)
|
476
427
|
while ids.size>0
|
477
|
-
doc =
|
478
|
-
genomes += doc.
|
428
|
+
doc = ebiFetch(:uniprotkb, ids.shift(200), :annot).split("\n")
|
429
|
+
genomes += doc.grep( /^DR\s+EMBL;/ ).map{ |ln| ln.split('; ')[1] }
|
479
430
|
end
|
480
431
|
genomes.uniq
|
481
432
|
end
|
482
|
-
def
|
483
|
-
response = RestClient.get
|
484
|
-
raise "Unable to reach
|
433
|
+
def restcall(url, outfile=nil)
|
434
|
+
response = RestClient.get url
|
435
|
+
raise "Unable to reach EBI REST client, error code #{response.code}." unless response.code == 200
|
485
436
|
unless outfile.nil?
|
486
437
|
ohf = File.open(outfile, 'w')
|
487
438
|
ohf.print response.to_s
|
@@ -489,8 +440,11 @@ class ROCker
|
|
489
440
|
end
|
490
441
|
response.to_s
|
491
442
|
end
|
492
|
-
def
|
493
|
-
|
443
|
+
def ebiFetch(db, ids, format, outfile=nil)
|
444
|
+
url = "#{ROCker.ebirest}/dbfetch/dbfetch/#{db.to_s}/#{ids.join(",")}/#{format.to_s}"
|
445
|
+
$stderr.puts " # Calling: #{url}" if @o[:debug]
|
446
|
+
self.restcall url
|
447
|
+
end
|
494
448
|
def bash(cmd, err_msg=nil)
|
495
449
|
o = `#{cmd} 2>&1 && echo '{'`
|
496
450
|
raise (err_msg.nil? ? "Error executing: #{cmd}\n\n#{o}" : err_msg) unless o[-2]=='{'
|
data/lib/rocker/alignment.rb
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
# @author Luis M. Rodriguez-R <lmrodriguezr at gmail dot com>
|
3
3
|
# @author Luis (Coto) Orellana
|
4
4
|
# @license artistic license 2.0
|
5
|
-
# @update
|
5
|
+
# @update May-07-2015
|
6
6
|
#
|
7
7
|
|
8
8
|
require 'rocker/sequence'
|
@@ -40,20 +40,20 @@ class Alignment
|
|
40
40
|
@cols = seq.cols if self.cols.nil?
|
41
41
|
raise "Aligned sequence #{seq.id} has a different length (#{seq.cols} vs #{self.cols})" unless seq.cols == self.cols
|
42
42
|
end
|
43
|
-
def
|
44
|
-
regexps = [/^
|
45
|
-
|
43
|
+
def get_ids
|
44
|
+
regexps = [/^[A-Za-z]+\|([A-Za-z0-9_]+)\|/, /^([A-Za-z0-9_]+)$/, /^([A-Za-z0-9_]+) /]
|
45
|
+
prot_ids = []
|
46
46
|
self.seqs.keys.each do |id|
|
47
|
-
|
47
|
+
prot_id = nil
|
48
48
|
regexps.each do |regexp|
|
49
49
|
unless regexp.match(id).nil?
|
50
|
-
|
50
|
+
prot_id = $1
|
51
51
|
break
|
52
52
|
end
|
53
53
|
end
|
54
|
-
|
54
|
+
prot_ids << prot_id unless prot_id.nil?
|
55
55
|
end
|
56
|
-
|
56
|
+
prot_ids
|
57
57
|
end
|
58
58
|
def seq(id) @seqs[id] end
|
59
59
|
def size() self.seqs.size end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: bio-rocker
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0alpha
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Luis (Coto) Orellana
|
@@ -9,8 +9,22 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2015-
|
13
|
-
dependencies:
|
12
|
+
date: 2015-05-07 00:00:00.000000000 Z
|
13
|
+
dependencies:
|
14
|
+
- !ruby/object:Gem::Dependency
|
15
|
+
name: rest-client
|
16
|
+
requirement: !ruby/object:Gem::Requirement
|
17
|
+
requirements:
|
18
|
+
- - ~>
|
19
|
+
- !ruby/object:Gem::Version
|
20
|
+
version: 1.7.3
|
21
|
+
type: :runtime
|
22
|
+
prerelease: false
|
23
|
+
version_requirements: !ruby/object:Gem::Requirement
|
24
|
+
requirements:
|
25
|
+
- - ~>
|
26
|
+
- !ruby/object:Gem::Version
|
27
|
+
version: 1.7.3
|
14
28
|
description: Detecting and quantifying functional genes in short-read metagenomic
|
15
29
|
datasets
|
16
30
|
email: lhorellana@gatech.edu
|
@@ -42,9 +56,9 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
42
56
|
version: '0'
|
43
57
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
44
58
|
requirements:
|
45
|
-
- - '
|
59
|
+
- - '>'
|
46
60
|
- !ruby/object:Gem::Version
|
47
|
-
version:
|
61
|
+
version: 1.3.1
|
48
62
|
requirements: []
|
49
63
|
rubyforge_project:
|
50
64
|
rubygems_version: 2.0.14
|