bio-blastxmlparser 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.document +5 -0
- data/.rspec +1 -0
- data/Gemfile +18 -0
- data/Gemfile.lock +34 -0
- data/LICENSE.txt +20 -0
- data/README.rdoc +211 -0
- data/Rakefile +50 -0
- data/VERSION +1 -0
- data/bin/blastxmlparser +165 -0
- data/bio-blastxmlparser.gemspec +88 -0
- data/lib/bio-blastxmlparser.rb +17 -0
- data/lib/bio/db/blast/parser/nokogiri.rb +203 -0
- data/lib/bio/db/blast/xmliterator.rb +19 -0
- data/lib/bio/db/blast/xmlsplitter.rb +43 -0
- data/sample/bioruby.rb +14 -0
- data/sample/blastxmlparserdemo.rb +17 -0
- data/sample/libxml_sax.rb +25 -0
- data/sample/nokogiri_dom.rb +17 -0
- data/sample/nokogiri_sax.rb +26 -0
- data/sample/nokogiri_split_dom.rb +34 -0
- data/spec/bio-blastxmlparser_spec.rb +104 -0
- data/spec/spec_helper.rb +12 -0
- data/test/data/aa_example.fasta +42 -0
- data/test/data/aa_example_blastp.m7 +5021 -0
- data/test/data/nt_example.fasta +88 -0
- data/test/data/nt_example_blastn.m7 +85538 -0
- data/timings.sh +28 -0
- metadata +180 -0
data/.document
ADDED
data/.rspec
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
--color
|
data/Gemfile
ADDED
@@ -0,0 +1,18 @@
|
|
1
|
+
source "http://rubygems.org"
|
2
|
+
# Add dependencies required to use your gem here.
|
3
|
+
# Example:
|
4
|
+
# gem "activesupport", ">= 2.3.5"
|
5
|
+
|
6
|
+
# Runtime dependencies
|
7
|
+
# gem "bio", ">= 1.3.1"
|
8
|
+
gem "bio-logger", "> 0.8.0"
|
9
|
+
gem "nokogiri", ">= 1.4.4"
|
10
|
+
|
11
|
+
# Add dependencies to develop your gem here.
|
12
|
+
# Include everything needed to run rake, tests, features, etc.
|
13
|
+
group :development do
|
14
|
+
gem "rspec", "~> 2.3.0"
|
15
|
+
gem "bundler", "~> 1.0.0"
|
16
|
+
gem "jeweler", "~> 1.5.2"
|
17
|
+
gem "rcov", ">= 0"
|
18
|
+
end
|
data/Gemfile.lock
ADDED
@@ -0,0 +1,34 @@
|
|
1
|
+
GEM
|
2
|
+
remote: http://rubygems.org/
|
3
|
+
specs:
|
4
|
+
bio-logger (0.9.0)
|
5
|
+
log4r (>= 1.1.9)
|
6
|
+
diff-lcs (1.1.2)
|
7
|
+
git (1.2.5)
|
8
|
+
jeweler (1.5.2)
|
9
|
+
bundler (~> 1.0.0)
|
10
|
+
git (>= 1.2.5)
|
11
|
+
rake
|
12
|
+
log4r (1.1.9)
|
13
|
+
nokogiri (1.4.4)
|
14
|
+
rake (0.8.7)
|
15
|
+
rcov (0.9.9)
|
16
|
+
rspec (2.3.0)
|
17
|
+
rspec-core (~> 2.3.0)
|
18
|
+
rspec-expectations (~> 2.3.0)
|
19
|
+
rspec-mocks (~> 2.3.0)
|
20
|
+
rspec-core (2.3.1)
|
21
|
+
rspec-expectations (2.3.0)
|
22
|
+
diff-lcs (~> 1.1.2)
|
23
|
+
rspec-mocks (2.3.0)
|
24
|
+
|
25
|
+
PLATFORMS
|
26
|
+
ruby
|
27
|
+
|
28
|
+
DEPENDENCIES
|
29
|
+
bio-logger (> 0.8.0)
|
30
|
+
bundler (~> 1.0.0)
|
31
|
+
jeweler (~> 1.5.2)
|
32
|
+
nokogiri (>= 1.4.4)
|
33
|
+
rcov
|
34
|
+
rspec (~> 2.3.0)
|
data/LICENSE.txt
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
Copyright (c) 2011 Pjotr Prins
|
2
|
+
|
3
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
4
|
+
a copy of this software and associated documentation files (the
|
5
|
+
"Software"), to deal in the Software without restriction, including
|
6
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
7
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
8
|
+
permit persons to whom the Software is furnished to do so, subject to
|
9
|
+
the following conditions:
|
10
|
+
|
11
|
+
The above copyright notice and this permission notice shall be
|
12
|
+
included in all copies or substantial portions of the Software.
|
13
|
+
|
14
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
15
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
16
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
17
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
18
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
19
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
20
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.rdoc
ADDED
@@ -0,0 +1,211 @@
|
|
1
|
+
= bio-blastxmlparser
|
2
|
+
|
3
|
+
blastxmlparser is a fast big-data BLAST XML file parser. Rather than
|
4
|
+
loading everything in memory, XML is parsed by BLAST query
|
5
|
+
(Iteration). Not only has this the advantage of low memory use, it may
|
6
|
+
also be faster when IO continues in parallel (disks read ahead).
|
7
|
+
|
8
|
+
Next to the API, blastxmlparser comes as a command line utility, which
|
9
|
+
can be used to filter results and requires no understanding of Ruby.
|
10
|
+
|
11
|
+
== Performance
|
12
|
+
|
13
|
+
XML parsing is expensive. blastxmlparser uses the Nokogiri C, or Java, XML
|
14
|
+
parser, based on libxml2. Basically a DOM parser is used for subsections of a
|
15
|
+
document, tests show this is faster than a SAX parser with Ruby callbacks. To
|
16
|
+
see why libxml2 based Nokogiri is fast, see
|
17
|
+
http://www.rubyinside.com/ruby-xml-performance-benchmarks-1641.html and
|
18
|
+
http://www.xml.com/lpt/a/1703.
|
19
|
+
|
20
|
+
The parser is also designed with other optimizations, such as lazy evaluation,
|
21
|
+
only creating objects when required, and (future) parallelization. When parsing
|
22
|
+
a full BLAST result usually only a few fields are used. By using XPath queries
|
23
|
+
only the relevant fields are queried.
|
24
|
+
|
25
|
+
Timings for parsing test/data/nt_example_blastn.m7 (file size 3.4Mb)
|
26
|
+
|
27
|
+
Nokogiri DOM (default)
|
28
|
+
|
29
|
+
real 0m1.259s
|
30
|
+
user 0m1.052s
|
31
|
+
sys 0m0.144s
|
32
|
+
|
33
|
+
Nokogiri split DOM
|
34
|
+
|
35
|
+
real 0m1.713s
|
36
|
+
user 0m1.444s
|
37
|
+
sys 0m0.160s
|
38
|
+
|
39
|
+
BioRuby ReXML DOM parser
|
40
|
+
|
41
|
+
real 1m14.548s
|
42
|
+
user 1m13.065s
|
43
|
+
sys 0m0.472s
|
44
|
+
|
45
|
+
== Install
|
46
|
+
|
47
|
+
gem install bio-blastxmlparser
|
48
|
+
|
49
|
+
Nokogiri XML parser is required. To install it,
|
50
|
+
the libxml2 libraries and headers need to be installed first, for
|
51
|
+
example on Debian:
|
52
|
+
|
53
|
+
apt-get install libxslt-dev libxml2-dev
|
54
|
+
gem install bio-blastxmlparser
|
55
|
+
|
56
|
+
for more installation on other platforms see
|
57
|
+
http://nokogiri.org/tutorials/installing_nokogiri.html.
|
58
|
+
|
59
|
+
== API
|
60
|
+
|
61
|
+
To loop through a BLAST result:
|
62
|
+
|
63
|
+
>> require 'bio-blastxmlparser'
|
64
|
+
>> fn = 'test/data/nt_example_blastn.m7'
|
65
|
+
>> n = Bio::Blast::XmlIterator.new(fn).to_enum
|
66
|
+
>> n.each do | iter |
|
67
|
+
>> puts "Hits for " + iter.query_id
|
68
|
+
>> iter.each do | hit |
|
69
|
+
>> hit.each do | hsp |
|
70
|
+
>> print hit.hit_id, "\t", hsp.evalue, "\n" if hsp.evalue < 0.001
|
71
|
+
>> end
|
72
|
+
>> end
|
73
|
+
>> end
|
74
|
+
|
75
|
+
The next example parses XML using less memory
|
76
|
+
|
77
|
+
>> blast = XmlSplitterIterator.new(fn).to_enum
|
78
|
+
>> iter = blast.next
|
79
|
+
>> iter.iter_num
|
80
|
+
>> 1
|
81
|
+
>> iter.query_id
|
82
|
+
=> "lcl|1_0"
|
83
|
+
|
84
|
+
Get the first hit
|
85
|
+
|
86
|
+
>> hit = iter.hits.first
|
87
|
+
>> hit.hit_num
|
88
|
+
=> 1
|
89
|
+
>> hit.hit_id
|
90
|
+
=> "lcl|I_74685"
|
91
|
+
>> hit.hit_def
|
92
|
+
=> "[57809 - 57666] (REVERSE SENSE) "
|
93
|
+
>> hit.accession
|
94
|
+
=> "I_74685"
|
95
|
+
>> hit.len
|
96
|
+
=> 144
|
97
|
+
|
98
|
+
Get the parent info
|
99
|
+
|
100
|
+
>> hit.parent.query_id
|
101
|
+
=> "lcl|1_0"
|
102
|
+
|
103
|
+
Get the first Hsp
|
104
|
+
|
105
|
+
>> hsp = hit.hsps.first
|
106
|
+
>> hsp.hsp_num
|
107
|
+
=> 1
|
108
|
+
>> hsp.bit_score
|
109
|
+
=> 145.205
|
110
|
+
>> hsp.score
|
111
|
+
=> 73
|
112
|
+
>> hsp.evalue
|
113
|
+
=> 5.82208e-34
|
114
|
+
>> hsp.query_from
|
115
|
+
=> 28
|
116
|
+
>> hsp.query_to
|
117
|
+
=> 100
|
118
|
+
>> hsp.query_frame
|
119
|
+
=> 1
|
120
|
+
>> hsp.hit_frame
|
121
|
+
=> 1
|
122
|
+
>> hsp.identity
|
123
|
+
=> 73
|
124
|
+
>> hsp.positive
|
125
|
+
=> 73
|
126
|
+
>> hsp.align_len
|
127
|
+
=> 73
|
128
|
+
>> hsp.qseq
|
129
|
+
=> "AGTGAAGCTTCTAGATATTTGGCGGGTACCTCTAATTTTGCCTGCCTGCCAACCTATATGCTCCTGTGTTTAG"
|
130
|
+
>> hsp.hseq
|
131
|
+
=> "AGTGAAGCTTCTAGATATTTGGCGGGTACCTCTAATTTTGCCTGCCTGCCAACCTATATGCTCCTGTGTTTAG"
|
132
|
+
>> hsp.midline
|
133
|
+
=> "|||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||"
|
134
|
+
|
135
|
+
It is possible to use the XML element names, over methods. E.g.
|
136
|
+
|
137
|
+
>> hsp.field("Hsp_bit-score")
|
138
|
+
=> "145.205"
|
139
|
+
>> hsp["Hsp_bit-score"]
|
140
|
+
=> "145.205"
|
141
|
+
|
142
|
+
Note that these are always String values.
|
143
|
+
|
144
|
+
Fetch the next result (Iteration)
|
145
|
+
|
146
|
+
>> iter2 = blast.next
|
147
|
+
>> iter2.iter_num
|
148
|
+
>> 2
|
149
|
+
>> iter2.query_id
|
150
|
+
=> "lcl|2_0"
|
151
|
+
|
152
|
+
etc. etc.
|
153
|
+
|
154
|
+
For more examples see the files in ./spec
|
155
|
+
|
156
|
+
== Usage
|
157
|
+
|
158
|
+
blastxmlparser [options] file(s)
|
159
|
+
|
160
|
+
-p, --parser name Use full|split parser (default full)
|
161
|
+
-n, --named fields Set named fields
|
162
|
+
-e, --exec filter Execute filter
|
163
|
+
|
164
|
+
--logger filename Log to file (default stderr)
|
165
|
+
--trace options Set log level (default INFO, see bio-logger)
|
166
|
+
-q, --quiet Run quietly
|
167
|
+
-v, --verbose Run verbosely
|
168
|
+
--debug Show debug messages
|
169
|
+
-h, --help Show help and examples
|
170
|
+
|
171
|
+
bioblastxmlparser filename(s)
|
172
|
+
|
173
|
+
Use --help switch for more information
|
174
|
+
|
175
|
+
== Examples
|
176
|
+
|
177
|
+
Print result fields of iterations containing 'lcl', using a regex
|
178
|
+
|
179
|
+
blastxmlparser -e 'iter.query_id=~/lcl/' test/data/nt_example_blastn.m7
|
180
|
+
|
181
|
+
Print fields where bit_score > 145
|
182
|
+
|
183
|
+
blastxmlparser -e 'hsp.bit_score>145' test/data/nt_example_blastn.m7
|
184
|
+
|
185
|
+
It is also possible to use the XML element names directly
|
186
|
+
|
187
|
+
blastxmlparser -e 'hsp["Hsp_bit-score"].to_i>145' test/data/nt_example_blastn.m7
|
188
|
+
|
189
|
+
Print named fields where E-value < 0.001 and hit length > 100
|
190
|
+
|
191
|
+
blastxmlparser -n 'hsp.evalue,hsp.qseq' -e 'hsp.evalue<0.01 and hit.len>100' test/data/nt_example_blastn.m7
|
192
|
+
|
193
|
+
1 5.82208e-34 AGTGAAGCTTCTAGATATTTGGCGGGTACCTCTAATTTTGCCT...
|
194
|
+
2 5.82208e-34 AGTGAAGCTTCTAGATATTTGGCGGGTACCTCTAATTTTGCCT...
|
195
|
+
3 2.76378e-11 AATATGGTAGCTACAGAAACGGTAGTACACTCTTC
|
196
|
+
4 1.13373e-13 CTAAACACAGGAGCATATAGGTTGGCAGGCAGGCAAAAT
|
197
|
+
5 2.76378e-11 GAAGAGTGTACTACCGTTTCTGTAGCTACCATATT
|
198
|
+
etc. etc.
|
199
|
+
|
200
|
+
To use the low-mem version use
|
201
|
+
|
202
|
+
blastxmlparser --parser split -n 'hsp.evalue,hsp.qseq' -e 'hsp.evalue<0.01 and hit.len>100' test/data/nt_example_blastn.m7
|
203
|
+
|
204
|
+
== URL
|
205
|
+
|
206
|
+
The project lives at http://github.com/pjotrp/blastxmlparser. If you use this software, please cite http://dx.doi.org/10.1093/bioinformatics/btq475
|
207
|
+
|
208
|
+
== Copyright
|
209
|
+
|
210
|
+
Copyright (c) 2011 Pjotr Prins under the MIT licence. See LICENSE.txt and http://www.opensource.org/licenses/mit-license.html for further details.
|
211
|
+
|
data/Rakefile
ADDED
@@ -0,0 +1,50 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'bundler'
|
3
|
+
begin
|
4
|
+
Bundler.setup(:default, :development)
|
5
|
+
rescue Bundler::BundlerError => e
|
6
|
+
$stderr.puts e.message
|
7
|
+
$stderr.puts "Run `bundle install` to install missing gems"
|
8
|
+
exit e.status_code
|
9
|
+
end
|
10
|
+
require 'rake'
|
11
|
+
|
12
|
+
require 'jeweler'
|
13
|
+
Jeweler::Tasks.new do |gem|
|
14
|
+
# gem is a Gem::Specification... see http://docs.rubygems.org/read/chapter/20 for more options
|
15
|
+
gem.name = "bio-blastxmlparser"
|
16
|
+
gem.homepage = "http://github.com/pjotrp/bioruby-blastxmlparser"
|
17
|
+
gem.license = "MIT"
|
18
|
+
gem.summary = %Q{BLAST XML parser}
|
19
|
+
gem.description = %Q{Fast big data XML parser and library, written in Ruby}
|
20
|
+
gem.email = "pjotr.public01@thebird.nl"
|
21
|
+
gem.authors = ["Pjotr Prins"]
|
22
|
+
# Include your dependencies below. Runtime dependencies are required when using your gem,
|
23
|
+
# and development dependencies are only needed for development (ie running rake tasks, tests, etc)
|
24
|
+
# gem.add_runtime_dependency 'jabber4r', '> 0.1'
|
25
|
+
# gem.add_development_dependency 'rspec', '> 1.2.3'
|
26
|
+
end
|
27
|
+
Jeweler::RubygemsDotOrgTasks.new
|
28
|
+
|
29
|
+
require 'rspec/core'
|
30
|
+
require 'rspec/core/rake_task'
|
31
|
+
RSpec::Core::RakeTask.new(:spec) do |spec|
|
32
|
+
spec.pattern = FileList['spec/**/*_spec.rb']
|
33
|
+
end
|
34
|
+
|
35
|
+
RSpec::Core::RakeTask.new(:rcov) do |spec|
|
36
|
+
spec.pattern = 'spec/**/*_spec.rb'
|
37
|
+
spec.rcov = true
|
38
|
+
end
|
39
|
+
|
40
|
+
task :default => :spec
|
41
|
+
|
42
|
+
require 'rake/rdoctask'
|
43
|
+
Rake::RDocTask.new do |rdoc|
|
44
|
+
version = File.exist?('VERSION') ? File.read('VERSION') : ""
|
45
|
+
|
46
|
+
rdoc.rdoc_dir = 'rdoc'
|
47
|
+
rdoc.title = "bio-blastxmlparser #{version}"
|
48
|
+
rdoc.rdoc_files.include('README*')
|
49
|
+
rdoc.rdoc_files.include('lib/**/*.rb')
|
50
|
+
end
|
data/VERSION
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
0.6.0
|
data/bin/blastxmlparser
ADDED
@@ -0,0 +1,165 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
#
|
3
|
+
# BioRuby bio-blastxmlparser Plugin
|
4
|
+
# Author:: Pjotr Prins
|
5
|
+
# Copyright:: 2011
|
6
|
+
# License:: MIT License
|
7
|
+
#
|
8
|
+
# Copyright (C) 2010,2011 Pjotr Prins <pjotr.prins@thebird.nl>
|
9
|
+
|
10
|
+
rootpath = File.dirname(File.dirname(__FILE__))
|
11
|
+
$: << File.join(rootpath,'lib')
|
12
|
+
|
13
|
+
BLASTXML_VERSION = File.new(File.join(rootpath,'VERSION')).read.chomp
|
14
|
+
|
15
|
+
$stderr.print "BioRuby BLAST XML Parser "+BLASTXML_VERSION+" Copyright (C) 2011 Pjotr Prins <pjotr.prins@thebird.nl>\n\n"
|
16
|
+
|
17
|
+
USAGE = <<EOM
|
18
|
+
|
19
|
+
bioblastxmlparser filename(s)
|
20
|
+
|
21
|
+
Use --help switch for more information
|
22
|
+
|
23
|
+
== Examples
|
24
|
+
|
25
|
+
Print result fields of iterations containing 'lcl', using a regex
|
26
|
+
|
27
|
+
blastxmlparser -e 'iter.query_id=~/lcl/' test/data/nt_example_blastn.m7
|
28
|
+
|
29
|
+
Print fields where bit_score > 145
|
30
|
+
|
31
|
+
blastxmlparser -e 'hsp.bit_score>145' test/data/nt_example_blastn.m7
|
32
|
+
|
33
|
+
It is also possible to use the XML element names directly
|
34
|
+
|
35
|
+
blastxmlparser -e 'hsp["Hsp_bit-score"].to_i>145' test/data/nt_example_blastn.m7
|
36
|
+
|
37
|
+
Print named fields where E-value < 0.001 and hit length > 100
|
38
|
+
|
39
|
+
blastxmlparser -n 'hsp.evalue,hsp.qseq' -e 'hsp.evalue<0.01 and hit.len>100' test/data/nt_example_blastn.m7
|
40
|
+
|
41
|
+
1 5.82208e-34 AGTGAAGCTTCTAGATATTTGGCGGGTACCTCTAATTTTGCCT...
|
42
|
+
2 5.82208e-34 AGTGAAGCTTCTAGATATTTGGCGGGTACCTCTAATTTTGCCT...
|
43
|
+
3 2.76378e-11 AATATGGTAGCTACAGAAACGGTAGTACACTCTTC
|
44
|
+
4 1.13373e-13 CTAAACACAGGAGCATATAGGTTGGCAGGCAGGCAAAAT
|
45
|
+
5 2.76378e-11 GAAGAGTGTACTACCGTTTCTGTAGCTACCATATT
|
46
|
+
etc. etc.
|
47
|
+
|
48
|
+
To use the low-mem version use
|
49
|
+
|
50
|
+
blastxmlparser --parser split -n 'hsp.evalue,hsp.qseq' -e 'hsp.evalue<0.01 and hit.len>100' test/data/nt_example_blastn.m7
|
51
|
+
|
52
|
+
== URL
|
53
|
+
|
54
|
+
The project lives at http://github.com/pjotrp/blastxmlparser. If you use this software, please cite http://dx.doi.org/10.1093/bioinformatics/btq475
|
55
|
+
|
56
|
+
== Copyright
|
57
|
+
|
58
|
+
Copyright (c) 2011 Pjotr Prins under the MIT licence. See LICENSE.txt and http://www.opensource.org/licenses/mit-license.html for further details.
|
59
|
+
|
60
|
+
EOM
|
61
|
+
|
62
|
+
if ARGV.size == 0
|
63
|
+
print USAGE
|
64
|
+
exit 1
|
65
|
+
end
|
66
|
+
|
67
|
+
require 'bio-blastxmlparser'
|
68
|
+
require 'optparse'
|
69
|
+
require 'ostruct'
|
70
|
+
|
71
|
+
require 'bio-logger'
|
72
|
+
Bio::Log::CLI.logger('stderr')
|
73
|
+
Bio::Log::CLI.trace('info')
|
74
|
+
|
75
|
+
options = OpenStruct.new()
|
76
|
+
|
77
|
+
opts = OptionParser.new do |o|
|
78
|
+
|
79
|
+
o.on_tail("-h", "--help", "Show help and examples") {
|
80
|
+
print(opts)
|
81
|
+
print USAGE
|
82
|
+
exit()
|
83
|
+
}
|
84
|
+
|
85
|
+
o.banner = "== Usage\n #{File.basename($0)} [options] file(s)"
|
86
|
+
|
87
|
+
o.separator ""
|
88
|
+
|
89
|
+
o.on("-p name", "--parser name", "Use full|split parser (default full)") do |p|
|
90
|
+
options.parser = p.to_sym
|
91
|
+
end
|
92
|
+
|
93
|
+
o.on("-n fields","--named fields",String, "Set named fields") do |s|
|
94
|
+
options.fields = s.split(/,/)
|
95
|
+
end
|
96
|
+
|
97
|
+
o.on("-e filter","--exec filter",String, "Execute filter") do |s|
|
98
|
+
options.exec = s
|
99
|
+
end
|
100
|
+
|
101
|
+
o.separator ""
|
102
|
+
|
103
|
+
o.on("--logger filename",String,"Log to file (default stderr)") do | name |
|
104
|
+
Bio::Log::CLI.logger(name)
|
105
|
+
end
|
106
|
+
|
107
|
+
o.on("--trace options",String,"Set log level (default INFO, see bio-logger)") do | s |
|
108
|
+
Bio::Log::CLI.trace(s)
|
109
|
+
end
|
110
|
+
|
111
|
+
o.on("-q", "--quiet", "Run quietly") do |q|
|
112
|
+
Bio::Log::CLI.trace('error')
|
113
|
+
end
|
114
|
+
|
115
|
+
o.on("-v", "--verbose", "Run verbosely") do |v|
|
116
|
+
Bio::Log::CLI.trace('info')
|
117
|
+
end
|
118
|
+
|
119
|
+
o.on("--debug", "Show debug messages") do |v|
|
120
|
+
Bio::Log::CLI.trace('debug')
|
121
|
+
end
|
122
|
+
|
123
|
+
end
|
124
|
+
|
125
|
+
begin
|
126
|
+
opts.parse!(ARGV)
|
127
|
+
|
128
|
+
Bio::Log::CLI.configure('bio-blastxmlparser')
|
129
|
+
logger = Bio::Log::LoggerPlus['bio-blastxmlparser']
|
130
|
+
|
131
|
+
ARGV.each do | fn |
|
132
|
+
logger.info("XML parsing #{fn}")
|
133
|
+
n = if options.parser == :split
|
134
|
+
Bio::Blast::XmlSplitterIterator.new(fn).to_enum
|
135
|
+
else
|
136
|
+
Bio::Blast::XmlIterator.new(fn).to_enum
|
137
|
+
end
|
138
|
+
i = 1
|
139
|
+
n.each do | iter |
|
140
|
+
iter.each do | hit |
|
141
|
+
hit.each do | hsp |
|
142
|
+
do_print = if options.exec
|
143
|
+
eval(options.exec)
|
144
|
+
else
|
145
|
+
true
|
146
|
+
end
|
147
|
+
if do_print
|
148
|
+
if options.fields
|
149
|
+
print i,"\t"
|
150
|
+
options.fields.each do | f |
|
151
|
+
print eval(f),"\t"
|
152
|
+
end
|
153
|
+
print "\n"
|
154
|
+
else
|
155
|
+
print [i,iter.iter_num,iter.query_id,hit.hit_id,hsp.hsp_num,hsp.evalue].join("\t"),"\n"
|
156
|
+
end
|
157
|
+
i += 1
|
158
|
+
end
|
159
|
+
end
|
160
|
+
end
|
161
|
+
end
|
162
|
+
end
|
163
|
+
rescue OptionParser::InvalidOption => e
|
164
|
+
opts[:invalid_argument] = e.message
|
165
|
+
end
|