bio-rocker 0.2.0alpha → 0.2.1beta
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/bin/ROCker +2 -1
- data/lib/rocker.rb +18 -2
- metadata +1 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 80a163d27ab76ae02864ec1f3b1e288371e8bf50
|
4
|
+
data.tar.gz: c444438d8b9372b6949c900a46dc1dd913efe56b
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: bc9303fd05913c720ca440ad358d3313cd2b601193e4e3bc46d945e4bfef14713f5d2ba1483fa097472e0be44d08f3d426b73d593fd4a4e410f663c2db284c21
|
7
|
+
data.tar.gz: d78afc4fe61106c453a52a87eec977495ac52016af356dc3d60f792da35de16f385c26b9af3df190857529f95d03acbca7edf766e6c439bf4315952506dd9577
|
data/bin/ROCker
CHANGED
data/lib/rocker.rb
CHANGED
@@ -114,7 +114,11 @@ class ROCker
|
|
114
114
|
i = 0
|
115
115
|
genome_ids[:positive].each do |genome_id|
|
116
116
|
print " * scanning #{(i+=1).ordinalize} genome out of #{genome_ids[:positive].size}. \r" unless @o[:q]
|
117
|
-
|
117
|
+
unless @o[:pertaxon].nil?
|
118
|
+
genome_taxon = genome2taxon(genome_id, @o[:pertaxon])
|
119
|
+
next unless genome_org[ genome_taxon ].nil?
|
120
|
+
genome_org[ genome_taxon ] = genome_id
|
121
|
+
end
|
118
122
|
$stderr.puts " # Looking for any of #{@o[:positive]}" if @o[:debug]
|
119
123
|
genome_file = @o[:baseout] + '.src.' + i.to_s + '.gff3'
|
120
124
|
if @o[:reuse] and File.exist? genome_file
|
@@ -142,7 +146,10 @@ class ROCker
|
|
142
146
|
end
|
143
147
|
end
|
144
148
|
print "\n" unless @o[:q]
|
145
|
-
|
149
|
+
unless @o[:pertaxon].nil?
|
150
|
+
genome_ids[:positive] = genome_org.values
|
151
|
+
puts " Using #{genome_org.size} genomes after filtering by #{@o[:pertaxon]}." unless @o[:q]
|
152
|
+
end
|
146
153
|
all_genome_ids = genome_ids.values.reduce(:+).uniq
|
147
154
|
missing = @o[:positive] - positive_coords.values.map{ |a| a.map{ |b| b[:prot_id] } }.reduce(:+)
|
148
155
|
warn "\nWARNING: Cannot find genomic location of sequence(s) #{missing.join(',')}.\n\n" unless missing.size==0 or @o[:genomefrx]<1.0 or not @o[:pertaxon].nil?
|
@@ -430,6 +437,15 @@ class ROCker
|
|
430
437
|
end
|
431
438
|
genomes.uniq
|
432
439
|
end
|
440
|
+
def genome2taxid(genome_id)
|
441
|
+
ln = ebiFetch('embl', [genome_id], 'annot').split(/[\n\r]/).grep(/^FT\s+\/db_xref="taxon:/).first
|
442
|
+
return ln if ln.nil?
|
443
|
+
ln.sub(/.*"taxon:(\d+)".*/, "\\1")
|
444
|
+
end
|
445
|
+
def genome2taxon(genome_id, rank='species')
|
446
|
+
xml = ebiFetch('taxonomy', [genome2taxid(genome_id)], 'enataxonomyxml').gsub(/\s*\n\s*/,'')
|
447
|
+
xml.scan(/<taxon [^>]+>/).grep(/rank="#{rank}"/).first.sub(/.* taxId="(\d+)".*/,"\\1")
|
448
|
+
end
|
433
449
|
def restcall(url, outfile=nil)
|
434
450
|
response = RestClient.get url
|
435
451
|
raise "Unable to reach EBI REST client, error code #{response.code}." unless response.code == 200
|