bio-rocker 0.1.04 → 0.2.0alpha
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/bin/ROCker +11 -11
- data/lib/rocker.rb +59 -105
- data/lib/rocker/alignment.rb +8 -8
- metadata +19 -5
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 83c3e3b92659ce86b8bc1c2119b3d37db119b432
|
4
|
+
data.tar.gz: 0f27ba6ed5086baf64e99732dc86a4c7e31432d9
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: cc2045b7943455232a5d9e6a3d3f26f45c953bf31a6b480f6ce3a2c8ab37c1468d0d6ce511521c5afc52ee65dfecc66f9b6195ad17dce4db211c17cdc591ead7
|
7
|
+
data.tar.gz: c8bda317dda0da8eed23cd976eb6c848f866382e37f5c8315bbd365fe65f85f7319a60eb2c2d511461fca4e329af403c31ccd77a16c0a880ec720edfd9ac269f
|
data/bin/ROCker
CHANGED
@@ -3,9 +3,10 @@
|
|
3
3
|
# @author Luis M. Rodriguez-R <lmrodriguezr at gmail dot com>
|
4
4
|
# @author Luis (Coto) Orellana
|
5
5
|
# @license artistic license 2.0
|
6
|
-
# @update
|
6
|
+
# @update May-07-2015
|
7
7
|
#
|
8
8
|
|
9
|
+
$:.push File.expand_path(File.dirname(__FILE__) + '/../lib')
|
9
10
|
require 'rocker'
|
10
11
|
require 'optparse'
|
11
12
|
|
@@ -35,25 +36,24 @@ opts = OptionParser.new do |opt|
|
|
35
36
|
opt.separator "+ UNSATISFIED REQUIREMENTS"
|
36
37
|
opt.separator " The building task requires uninstalled gems, please install them executing:"
|
37
38
|
opt.separator " gem install rest_client"
|
38
|
-
opt.separator " gem install nokogiri"
|
39
39
|
opt.separator ""
|
40
40
|
end
|
41
41
|
opt.separator "+ BUILDING ARGUMENTS"
|
42
|
-
opt.on("-p", "--positive
|
43
|
-
opt.on("-n", "--negative
|
42
|
+
opt.on("-p", "--positive ID1,ID2,ID3", Array, "Comma-separated list of UniProtKB IDs corresponding to the 'positive' training set. Required unless -P or -a are used."){ |v| o[:posori]=v }
|
43
|
+
opt.on("-n", "--negative ID1,ID2,ID3", Array, "Comma-separated list of UniProtKB IDs corresponding to the 'negative' training set. See also -N."){ |v| o[:negative]=v }
|
44
44
|
opt.on("-o", "--baseout PATH", "Prefix for the output files to be generated. Required."){ |v| o[:baseout]=v }
|
45
|
-
opt.on( "--nucleotides", "If set, it assumes that the input sequences are in nucleotides
|
45
|
+
#opt.on( "--nucleotides", "If set, it assumes that the input sequences are in nucleotides (currently not implemented)."){ raise "--nucleotides: This option is currently not implemented." }
|
46
46
|
opt.on("-t", "--threads INT", "Number of threads to use. By default: #{ROCker.default :thr}."){ |v| o[:thr]=v.to_i }
|
47
47
|
opt.separator ""
|
48
48
|
opt.separator "+ ADVANCED BUILDING ARGUMENTS"
|
49
|
-
opt.on("-P", "--positive-file PATH", "File containing the positive set (see -p), one
|
50
|
-
opt.on("-N", "--negative-file PATH", "File containing the negative set (see -n), one
|
51
|
-
opt.on("-a", "--alignment PATH", "Protein alignment of the reference sequences. The defline must contain
|
49
|
+
opt.on("-P", "--positive-file PATH", "File containing the positive set (see -p), one UniProtKB ID per line. If used, -p is not required."){ |v| o[:posfile]=v }
|
50
|
+
opt.on("-N", "--negative-file PATH", "File containing the negative set (see -n), one UniProtKB ID per line."){ |v| o[:negfile]=v }
|
51
|
+
opt.on("-a", "--alignment PATH", "Protein alignment of the reference sequences. The defline must contain UniProtKB ID. If used, -p is not required."){ |v| o[:aln]=v }
|
52
52
|
opt.on("-s", "--seqdepth NUMBER", "Sequencing depth to be used in building the in silico metagenome. By default: '#{ROCker.default :seqdepth}'."){ |v| o[:seqdepth]=v.to_f }
|
53
53
|
opt.on("-v", "--overlap NUMBER", "Minimum overlap with reference gene to tag a read as positive. By default: '#{ROCker.default :minovl}'."){ |v| o[:minovl]=v.to_f }
|
54
54
|
opt.on( "--genome-frx NUMBER", "Fraction to subsample the positive set genomes to generate the metagenome. By default: #{ROCker.default :genomefrx}"){ |v| o[:genomefrx]=v.to_f }
|
55
|
-
opt.on( "--per-
|
56
|
-
|
55
|
+
opt.on( "--per-taxon RANK", "If selected, only one genome per taxon is used to build the metagenome. Valid ranks include: species, genus, family, order, class, phylum.",
|
56
|
+
"This option replaces --per-genus and --per-species, but is temporarily out of service."){ |v| o[:pertaxon]=v.downcase }
|
57
57
|
opt.on( "--nometagenome", "Do not create metagenome. Implies --noblast. By default, metagenome is created."){ |v| o[:nomg]=v }
|
58
58
|
opt.on( "--noblast", "Do not execute BLAST. By default, BLAST is executed."){ |v| o[:noblast]=v }
|
59
59
|
opt.on( "--noalignment", "Do not align reference set. By default, references are aligned."){ |v| o[:noaln]=v }
|
@@ -78,7 +78,7 @@ opts = OptionParser.new do |opt|
|
|
78
78
|
opt.on("-b", "--ref-blast PATH",
|
79
79
|
"Tabular BLAST (blastx) of the test reads vs. the reference dataset. Required unless -t exists."){ |v| o[:blast]=v }
|
80
80
|
opt.on("-k", "--rocker PATH", "ROCker file to be created. Required."){ |v| o[:rocker]=v }
|
81
|
-
opt.on( "--nucleotides", "If set, it assumes that the input sequences are in nucleotides. By default, proteins are assumed."){
|
81
|
+
opt.on( "--nucleotides", "If set, it assumes that the input sequences are in nucleotides. By default, proteins are assumed."){ raise "--nucleotides: This option is currently not implemented." }
|
82
82
|
opt.separator ""
|
83
83
|
opt.separator "+ ADVANCED COMPILATION ARGUMENTS"
|
84
84
|
opt.on("-t", "--table PATH", "Formated tabular file to be created (or reused). Required unless -b is provided."){ |v| o[:table]=v }
|
data/lib/rocker.rb
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
# @author Luis M. Rodriguez-R <lmrodriguezr at gmail dot com>
|
3
3
|
# @author Luis (Coto) Orellana
|
4
4
|
# @license artistic license 2.0
|
5
|
-
# @update
|
5
|
+
# @update May-07-2015
|
6
6
|
#
|
7
7
|
|
8
8
|
require 'rocker/blasthit'
|
@@ -10,12 +10,12 @@ require 'rocker/rocdata'
|
|
10
10
|
|
11
11
|
class ROCker
|
12
12
|
#================================[ Class ]
|
13
|
-
@@
|
13
|
+
@@EBIREST = 'http://www.ebi.ac.uk/Tools'
|
14
14
|
@@DEFAULTS = {
|
15
15
|
# General
|
16
16
|
:q=>false, :r=>'R', :nucl=>false, :debug=>false,
|
17
17
|
# Build
|
18
|
-
:positive=>[], :negative=>[], :thr=>2,:genomefrx=>1.0,
|
18
|
+
:positive=>[], :negative=>[], :thr=>2,:genomefrx=>1.0,
|
19
19
|
# ext. software
|
20
20
|
:grinder=>'grinder', :muscle=>'muscle', :blastbins=>'', :seqdepth=>3, :minovl=>0.75,
|
21
21
|
:grindercmd=>'%1$s -reference_file "%2$s" -cf "%3$f" -base_name "%4$s" -dc \'-~*Nn\' -md "poly4 3e-3 3.3e-8" -mr "95 5" -rd "100 uniform 5"',
|
@@ -30,16 +30,15 @@ class ROCker
|
|
30
30
|
:color=>false, :gformat=>'pdf', :width=>9, :height=>9, :impact=>false, :transparency=>true,
|
31
31
|
}
|
32
32
|
@@HAS_BUILD_GEMS = nil
|
33
|
-
def self.
|
34
|
-
def self.defaults() @@DEFAULTS end
|
35
|
-
def self.default(k) @@DEFAULTS[k] end
|
33
|
+
def self.ebirest() @@EBIREST ; end
|
34
|
+
def self.defaults() @@DEFAULTS ; end
|
35
|
+
def self.default(k) @@DEFAULTS[k] ; end
|
36
36
|
def self.has_build_gems?
|
37
37
|
return @@HAS_BUILD_GEMS unless @@HAS_BUILD_GEMS.nil?
|
38
38
|
@@HAS_BUILD_GEMS = TRUE
|
39
39
|
begin
|
40
40
|
require 'rubygems'
|
41
41
|
require 'restclient'
|
42
|
-
require 'nokogiri'
|
43
42
|
rescue LoadError
|
44
43
|
@@HAS_BUILD_GEMS = FALSE
|
45
44
|
end
|
@@ -66,7 +65,7 @@ class ROCker
|
|
66
65
|
unless @o[:aln].nil?
|
67
66
|
aln = Alignment.new
|
68
67
|
aln.read_fasta @o[:aln]
|
69
|
-
@o[:positive] += aln.
|
68
|
+
@o[:positive] += aln.get_ids
|
70
69
|
end
|
71
70
|
raise "-p or -P are mandatory." if @o[:positive].size==0
|
72
71
|
raise "-o/--baseout is mandatory." if @o[:baseout].nil?
|
@@ -89,124 +88,76 @@ class ROCker
|
|
89
88
|
$stderr.puts " # #{@o[:positive]}" if @o[:debug]
|
90
89
|
ids = Array.new(@o[:positive])
|
91
90
|
while ids.size>0
|
92
|
-
f.print
|
91
|
+
f.print ebiFetch(:uniprotkb, ids.shift(200), :fasta)
|
93
92
|
end
|
94
93
|
end
|
95
94
|
f.close
|
96
|
-
|
95
|
+
genome_ids = {:positive=>[], :negative=>[]}
|
97
96
|
[:positive, :negative].each do |set|
|
98
97
|
unless @o[set].size==0
|
99
98
|
puts " * gathering genomes from #{@o[set].size} #{set.to_s} sequence(s)." unless @o[:q]
|
100
99
|
$stderr.puts " # #{@o[set]}" if @o[:debug]
|
101
|
-
|
100
|
+
genome_ids[set] = genes2genomes(@o[set])
|
102
101
|
end
|
103
102
|
end
|
104
|
-
raise "No genomes associated with the positive set." if
|
105
|
-
|
106
|
-
raise "No positive genomes selected for metagenome construction, is --genome-frx too small?" if
|
107
|
-
|
103
|
+
raise "No genomes associated with the positive set." if genome_ids[:positive].size==0
|
104
|
+
genome_ids[:positive] = genome_ids[:positive].sample( (genome_ids[:positive].size*@o[:genomefrx]).round ) if @o[:genomefrx]
|
105
|
+
raise "No positive genomes selected for metagenome construction, is --genome-frx too small?" if genome_ids[:positive].empty?
|
106
|
+
all_genome_ids = genome_ids.values.reduce(:+).uniq
|
108
107
|
|
109
108
|
# Locate genes
|
110
109
|
puts "Analyzing genome data." unless @o[:q]
|
111
|
-
puts " * downloading and parsing #{
|
112
|
-
$stderr.puts " # #{
|
110
|
+
puts " * downloading and parsing #{genome_ids[:positive].size} GFF3 document(s)." unless @o[:q]
|
111
|
+
$stderr.puts " # #{genome_ids[:positive]}" if @o[:debug]
|
113
112
|
positive_coords = {}
|
114
113
|
genome_org = {}
|
115
114
|
i = 0
|
116
|
-
|
117
|
-
print " * scanning #{(i+=1).ordinalize} genome out of #{
|
115
|
+
genome_ids[:positive].each do |genome_id|
|
116
|
+
print " * scanning #{(i+=1).ordinalize} genome out of #{genome_ids[:positive].size}. \r" unless @o[:q]
|
117
|
+
# ToDo check organism name using genome_org unless @o[:pertaxon].nil?
|
118
118
|
$stderr.puts " # Looking for any of #{@o[:positive]}" if @o[:debug]
|
119
|
-
genome_file = @o[:baseout] + '.src.' + i.to_s + '.
|
119
|
+
genome_file = @o[:baseout] + '.src.' + i.to_s + '.gff3'
|
120
120
|
if @o[:reuse] and File.exist? genome_file
|
121
121
|
puts " * reusing existing file: #{genome_file}." unless @o[:q]
|
122
122
|
ifh = File.open(genome_file, 'r')
|
123
|
-
doc =
|
123
|
+
doc = ifh.readlines.grep(/^[^#]/)
|
124
124
|
ifh.close
|
125
125
|
else
|
126
126
|
genome_file=nil unless @o[:noclean]
|
127
|
-
res =
|
128
|
-
doc =
|
127
|
+
res = ebiFetch(:embl, [genome_id], :gff3, genome_file)
|
128
|
+
doc = res.split("\n").grep(/^[^#]/)
|
129
129
|
end
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
if name_g.nil? or (name_s.nil? and @o[:perspecies])
|
143
|
-
name = nil
|
144
|
-
else
|
145
|
-
name = @o[:perspecies] ? name_g.content + " " + name_s.content : name_g.content
|
146
|
-
end
|
147
|
-
end
|
148
|
-
if name.nil?
|
149
|
-
warn "WARNING: Cannot find binomial name of #{gi}, using genome regardless of taxonomy."
|
150
|
-
name = rand(36**100).to_s(36)
|
151
|
-
end
|
152
|
-
break unless genome_org[ name ].nil?
|
153
|
-
genome_org[ name ] = gi
|
154
|
-
end
|
155
|
-
$stderr.puts " # traversing #{gi}" if @o[:debug]
|
156
|
-
genome.xpath('./Seq-entry_set/Bioseq-set/Bioseq-set_annot/Seq-annot/Seq-annot_data/Seq-annot_data_ftable/Seq-feat').each do |pr|
|
157
|
-
pr_gi = pr.at_xpath('./Seq-feat_product/Seq-loc/Seq-loc_whole/Seq-id/Seq-id_gi')
|
158
|
-
next if pr_gi.nil?
|
159
|
-
if @o[:positive].include? pr_gi.content
|
160
|
-
$stderr.puts " # found #{pr_gi.content}" if @o[:debug]
|
161
|
-
pr_loc = pr.at_xpath('./Seq-feat_location/Seq-loc/Seq-loc_int/Seq-interval')
|
162
|
-
if pr_loc.nil?
|
163
|
-
pr_loc = pr.xpath('./Seq-feat_location/Seq-loc/Seq-loc_mix//Seq-loc/Seq-loc_int/Seq-interval')
|
164
|
-
if pr_loc.nil?
|
165
|
-
warn "WARNING: Impossible to find location of '#{pr_gi.content}' in '#{gi}'."
|
166
|
-
incomplete = true
|
167
|
-
else
|
168
|
-
pr_loc.each do |loc_int|
|
169
|
-
positive_coords[gi] << {
|
170
|
-
:gi => pr_gi.content,
|
171
|
-
:from => loc_int.at_xpath('./Seq-interval_from').content.to_i,
|
172
|
-
:to => loc_int.at_xpath('./Seq-interval_to').content.to_i
|
173
|
-
#, :strand => loc_int.at_xpath('./Seq-interval_strand/Na-strand/@value').content
|
174
|
-
}
|
175
|
-
end
|
176
|
-
end
|
177
|
-
else
|
178
|
-
positive_coords[gi] << {
|
179
|
-
:gi => pr_gi.content,
|
180
|
-
:from => pr_loc.at_xpath('./Seq-interval_from').content.to_i,
|
181
|
-
:to => pr_loc.at_xpath('./Seq-interval_to').content.to_i
|
182
|
-
#, :strand => pr_loc.at_xpath('./Seq-interval_strand/Na-strand/@value').content
|
183
|
-
}
|
184
|
-
end
|
185
|
-
end
|
186
|
-
end
|
187
|
-
break
|
188
|
-
end
|
130
|
+
doc.each do |ln|
|
131
|
+
r = ln.chomp.split /\t/
|
132
|
+
prots = r[8].split(/;/).grep(/^db_xref=UniProtKB\/TrEMBL:/){ |xref| xref.split(/:/)[1] }
|
133
|
+
p = prots.select{ |p| @o[:positive].include? p }.first
|
134
|
+
next if p.nil?
|
135
|
+
positive_coords[ r[0] ] ||= []
|
136
|
+
positive_coords[ r[0] ] << {
|
137
|
+
#:strand => r[6],
|
138
|
+
:prot_id => p,
|
139
|
+
:from => r[3].to_i,
|
140
|
+
:to => r[4].to_i
|
141
|
+
}
|
189
142
|
end
|
190
|
-
doc = nil
|
191
|
-
warn "WARNING: Cannot find GI '#{gi}'." if incomplete
|
192
143
|
end
|
193
|
-
genome_gis[:positive] = genome_org.values if @o[:pergenus] or @o[:perspecies]
|
194
|
-
all_gis = genome_gis.values.reduce(:+).uniq
|
195
144
|
print "\n" unless @o[:q]
|
196
|
-
|
197
|
-
|
145
|
+
genome_ids[:positive] = genome_org.values unless @o[:pertaxon].nil?
|
146
|
+
all_genome_ids = genome_ids.values.reduce(:+).uniq
|
147
|
+
missing = @o[:positive] - positive_coords.values.map{ |a| a.map{ |b| b[:prot_id] } }.reduce(:+)
|
148
|
+
warn "\nWARNING: Cannot find genomic location of sequence(s) #{missing.join(',')}.\n\n" unless missing.size==0 or @o[:genomefrx]<1.0 or not @o[:pertaxon].nil?
|
198
149
|
|
199
150
|
# Download genomes
|
200
151
|
genomes_file = @o[:baseout] + '.src.fasta'
|
201
152
|
if @o[:reuse] and File.exist? genomes_file
|
202
153
|
puts " * reusing existing file: #{genomes_file}." unless @o[:q]
|
203
154
|
else
|
204
|
-
puts " * downloading #{
|
205
|
-
$stderr.puts " # #{
|
206
|
-
ids = Array.new(
|
155
|
+
puts " * downloading #{all_genome_ids.size} genome(s) in FastA." unless @o[:q]
|
156
|
+
$stderr.puts " # #{all_genome_ids}" if @o[:debug]
|
157
|
+
ids = Array.new(all_genome_ids)
|
207
158
|
ofh = File.open(genomes_file, 'w')
|
208
159
|
while ids.size>0
|
209
|
-
ofh.print
|
160
|
+
ofh.print ebiFetch('embl', ids.shift(200), 'fasta')
|
210
161
|
end
|
211
162
|
ofh.close
|
212
163
|
end
|
@@ -244,11 +195,11 @@ class ROCker
|
|
244
195
|
Thread.current[:ifh] = File.open(@o[:baseout] + ".mg.tmp.#{thr_i.to_s}-reads.fa", 'r')
|
245
196
|
Thread.current[:ofh] = File.open(@o[:baseout] + ".mg.fasta.#{thr_i.to_s}", 'w')
|
246
197
|
while Thread.current[:l]=Thread.current[:ifh].gets
|
247
|
-
Thread.current[:rd] = /^>(?<id>\d+) reference=
|
198
|
+
Thread.current[:rd] = /^>(?<id>\d+) reference=[A-Za-z]+\|(?<genome_id>[A-Za-z0-9_]+)\|.* position=(?<comp>complement\()?(?<from>\d+)\.\.(?<to>\d+)\)? /.match(Thread.current[:l])
|
248
199
|
unless Thread.current[:rd].nil?
|
249
200
|
Thread.current[:positive] = false
|
250
|
-
positive_coords[Thread.current[:rd][:
|
251
|
-
positive_coords[Thread.current[:rd][:
|
201
|
+
positive_coords[Thread.current[:rd][:genome_id]] ||= []
|
202
|
+
positive_coords[Thread.current[:rd][:genome_id]].each do |gn|
|
252
203
|
Thread.current[:left] = Thread.current[:rd][:to].to_i - gn[:from]
|
253
204
|
Thread.current[:right] = gn[:to] - Thread.current[:rd][:from].to_i
|
254
205
|
if (Thread.current[:left]*Thread.current[:right] >= 0) and ([Thread.current[:left], Thread.current[:right]].min/(Thread.current[:rd][:to].to_i-Thread.current[:rd][:from].to_i) >= @o[:minovl])
|
@@ -256,7 +207,7 @@ class ROCker
|
|
256
207
|
break
|
257
208
|
end
|
258
209
|
end
|
259
|
-
Thread.current[:l] = ">#{Thread.current[:rd][:id]}#{Thread.current[:positive] ? "@%" : ""} ref=#{Thread.current[:rd][:
|
210
|
+
Thread.current[:l] = ">#{Thread.current[:rd][:id]}#{Thread.current[:positive] ? "@%" : ""} ref=#{Thread.current[:rd][:genome_id]}:#{Thread.current[:rd][:from]}..#{Thread.current[:rd][:to]}#{(Thread.current[:rd][:comp]=='complement(')?'-':'+'}\n"
|
260
211
|
end
|
261
212
|
Thread.current[:ofh].print Thread.current[:l]
|
262
213
|
end
|
@@ -470,18 +421,18 @@ class ROCker
|
|
470
421
|
ifh.close
|
471
422
|
ofh.close
|
472
423
|
end
|
473
|
-
def genes2genomes(
|
424
|
+
def genes2genomes(gene_ids)
|
474
425
|
genomes = []
|
475
|
-
ids = Array.new(
|
426
|
+
ids = Array.new(gene_ids)
|
476
427
|
while ids.size>0
|
477
|
-
doc =
|
478
|
-
genomes += doc.
|
428
|
+
doc = ebiFetch(:uniprotkb, ids.shift(200), :annot).split("\n")
|
429
|
+
genomes += doc.grep( /^DR\s+EMBL;/ ).map{ |ln| ln.split('; ')[1] }
|
479
430
|
end
|
480
431
|
genomes.uniq
|
481
432
|
end
|
482
|
-
def
|
483
|
-
response = RestClient.get
|
484
|
-
raise "Unable to reach
|
433
|
+
def restcall(url, outfile=nil)
|
434
|
+
response = RestClient.get url
|
435
|
+
raise "Unable to reach EBI REST client, error code #{response.code}." unless response.code == 200
|
485
436
|
unless outfile.nil?
|
486
437
|
ohf = File.open(outfile, 'w')
|
487
438
|
ohf.print response.to_s
|
@@ -489,8 +440,11 @@ class ROCker
|
|
489
440
|
end
|
490
441
|
response.to_s
|
491
442
|
end
|
492
|
-
def
|
493
|
-
|
443
|
+
def ebiFetch(db, ids, format, outfile=nil)
|
444
|
+
url = "#{ROCker.ebirest}/dbfetch/dbfetch/#{db.to_s}/#{ids.join(",")}/#{format.to_s}"
|
445
|
+
$stderr.puts " # Calling: #{url}" if @o[:debug]
|
446
|
+
self.restcall url
|
447
|
+
end
|
494
448
|
def bash(cmd, err_msg=nil)
|
495
449
|
o = `#{cmd} 2>&1 && echo '{'`
|
496
450
|
raise (err_msg.nil? ? "Error executing: #{cmd}\n\n#{o}" : err_msg) unless o[-2]=='{'
|
data/lib/rocker/alignment.rb
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
# @author Luis M. Rodriguez-R <lmrodriguezr at gmail dot com>
|
3
3
|
# @author Luis (Coto) Orellana
|
4
4
|
# @license artistic license 2.0
|
5
|
-
# @update
|
5
|
+
# @update May-07-2015
|
6
6
|
#
|
7
7
|
|
8
8
|
require 'rocker/sequence'
|
@@ -40,20 +40,20 @@ class Alignment
|
|
40
40
|
@cols = seq.cols if self.cols.nil?
|
41
41
|
raise "Aligned sequence #{seq.id} has a different length (#{seq.cols} vs #{self.cols})" unless seq.cols == self.cols
|
42
42
|
end
|
43
|
-
def
|
44
|
-
regexps = [/^
|
45
|
-
|
43
|
+
def get_ids
|
44
|
+
regexps = [/^[A-Za-z]+\|([A-Za-z0-9_]+)\|/, /^([A-Za-z0-9_]+)$/, /^([A-Za-z0-9_]+) /]
|
45
|
+
prot_ids = []
|
46
46
|
self.seqs.keys.each do |id|
|
47
|
-
|
47
|
+
prot_id = nil
|
48
48
|
regexps.each do |regexp|
|
49
49
|
unless regexp.match(id).nil?
|
50
|
-
|
50
|
+
prot_id = $1
|
51
51
|
break
|
52
52
|
end
|
53
53
|
end
|
54
|
-
|
54
|
+
prot_ids << prot_id unless prot_id.nil?
|
55
55
|
end
|
56
|
-
|
56
|
+
prot_ids
|
57
57
|
end
|
58
58
|
def seq(id) @seqs[id] end
|
59
59
|
def size() self.seqs.size end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: bio-rocker
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0alpha
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Luis (Coto) Orellana
|
@@ -9,8 +9,22 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2015-
|
13
|
-
dependencies:
|
12
|
+
date: 2015-05-07 00:00:00.000000000 Z
|
13
|
+
dependencies:
|
14
|
+
- !ruby/object:Gem::Dependency
|
15
|
+
name: rest-client
|
16
|
+
requirement: !ruby/object:Gem::Requirement
|
17
|
+
requirements:
|
18
|
+
- - ~>
|
19
|
+
- !ruby/object:Gem::Version
|
20
|
+
version: 1.7.3
|
21
|
+
type: :runtime
|
22
|
+
prerelease: false
|
23
|
+
version_requirements: !ruby/object:Gem::Requirement
|
24
|
+
requirements:
|
25
|
+
- - ~>
|
26
|
+
- !ruby/object:Gem::Version
|
27
|
+
version: 1.7.3
|
14
28
|
description: Detecting and quantifying functional genes in short-read metagenomic
|
15
29
|
datasets
|
16
30
|
email: lhorellana@gatech.edu
|
@@ -42,9 +56,9 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
42
56
|
version: '0'
|
43
57
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
44
58
|
requirements:
|
45
|
-
- - '
|
59
|
+
- - '>'
|
46
60
|
- !ruby/object:Gem::Version
|
47
|
-
version:
|
61
|
+
version: 1.3.1
|
48
62
|
requirements: []
|
49
63
|
rubyforge_project:
|
50
64
|
rubygems_version: 2.0.14
|