bio-blastxmlparser 0.6.0
Sign up to get free protection for your applications and to get access to all the features.
- data/.document +5 -0
- data/.rspec +1 -0
- data/Gemfile +18 -0
- data/Gemfile.lock +34 -0
- data/LICENSE.txt +20 -0
- data/README.rdoc +211 -0
- data/Rakefile +50 -0
- data/VERSION +1 -0
- data/bin/blastxmlparser +165 -0
- data/bio-blastxmlparser.gemspec +88 -0
- data/lib/bio-blastxmlparser.rb +17 -0
- data/lib/bio/db/blast/parser/nokogiri.rb +203 -0
- data/lib/bio/db/blast/xmliterator.rb +19 -0
- data/lib/bio/db/blast/xmlsplitter.rb +43 -0
- data/sample/bioruby.rb +14 -0
- data/sample/blastxmlparserdemo.rb +17 -0
- data/sample/libxml_sax.rb +25 -0
- data/sample/nokogiri_dom.rb +17 -0
- data/sample/nokogiri_sax.rb +26 -0
- data/sample/nokogiri_split_dom.rb +34 -0
- data/spec/bio-blastxmlparser_spec.rb +104 -0
- data/spec/spec_helper.rb +12 -0
- data/test/data/aa_example.fasta +42 -0
- data/test/data/aa_example_blastp.m7 +5021 -0
- data/test/data/nt_example.fasta +88 -0
- data/test/data/nt_example_blastn.m7 +85538 -0
- data/timings.sh +28 -0
- metadata +180 -0
data/.document
ADDED
data/.rspec
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
--color
|
data/Gemfile
ADDED
@@ -0,0 +1,18 @@
|
|
1
|
+
source "http://rubygems.org"
|
2
|
+
# Add dependencies required to use your gem here.
|
3
|
+
# Example:
|
4
|
+
# gem "activesupport", ">= 2.3.5"
|
5
|
+
|
6
|
+
# Runtime dependencies
|
7
|
+
# gem "bio", ">= 1.3.1"
|
8
|
+
gem "bio-logger", "> 0.8.0"
|
9
|
+
gem "nokogiri", ">= 1.4.4"
|
10
|
+
|
11
|
+
# Add dependencies to develop your gem here.
|
12
|
+
# Include everything needed to run rake, tests, features, etc.
|
13
|
+
group :development do
|
14
|
+
gem "rspec", "~> 2.3.0"
|
15
|
+
gem "bundler", "~> 1.0.0"
|
16
|
+
gem "jeweler", "~> 1.5.2"
|
17
|
+
gem "rcov", ">= 0"
|
18
|
+
end
|
data/Gemfile.lock
ADDED
@@ -0,0 +1,34 @@
|
|
1
|
+
GEM
|
2
|
+
remote: http://rubygems.org/
|
3
|
+
specs:
|
4
|
+
bio-logger (0.9.0)
|
5
|
+
log4r (>= 1.1.9)
|
6
|
+
diff-lcs (1.1.2)
|
7
|
+
git (1.2.5)
|
8
|
+
jeweler (1.5.2)
|
9
|
+
bundler (~> 1.0.0)
|
10
|
+
git (>= 1.2.5)
|
11
|
+
rake
|
12
|
+
log4r (1.1.9)
|
13
|
+
nokogiri (1.4.4)
|
14
|
+
rake (0.8.7)
|
15
|
+
rcov (0.9.9)
|
16
|
+
rspec (2.3.0)
|
17
|
+
rspec-core (~> 2.3.0)
|
18
|
+
rspec-expectations (~> 2.3.0)
|
19
|
+
rspec-mocks (~> 2.3.0)
|
20
|
+
rspec-core (2.3.1)
|
21
|
+
rspec-expectations (2.3.0)
|
22
|
+
diff-lcs (~> 1.1.2)
|
23
|
+
rspec-mocks (2.3.0)
|
24
|
+
|
25
|
+
PLATFORMS
|
26
|
+
ruby
|
27
|
+
|
28
|
+
DEPENDENCIES
|
29
|
+
bio-logger (> 0.8.0)
|
30
|
+
bundler (~> 1.0.0)
|
31
|
+
jeweler (~> 1.5.2)
|
32
|
+
nokogiri (>= 1.4.4)
|
33
|
+
rcov
|
34
|
+
rspec (~> 2.3.0)
|
data/LICENSE.txt
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
Copyright (c) 2011 Pjotr Prins
|
2
|
+
|
3
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
4
|
+
a copy of this software and associated documentation files (the
|
5
|
+
"Software"), to deal in the Software without restriction, including
|
6
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
7
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
8
|
+
permit persons to whom the Software is furnished to do so, subject to
|
9
|
+
the following conditions:
|
10
|
+
|
11
|
+
The above copyright notice and this permission notice shall be
|
12
|
+
included in all copies or substantial portions of the Software.
|
13
|
+
|
14
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
15
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
16
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
17
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
18
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
19
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
20
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.rdoc
ADDED
@@ -0,0 +1,211 @@
|
|
1
|
+
= bio-blastxmlparser
|
2
|
+
|
3
|
+
blastxmlparser is a fast big-data BLAST XML file parser. Rather than
|
4
|
+
loading everything in memory, XML is parsed by BLAST query
|
5
|
+
(Iteration). Not only has this the advantage of low memory use, it may
|
6
|
+
also be faster when IO continues in parallel (disks read ahead).
|
7
|
+
|
8
|
+
Next to the API, blastxmlparser comes as a command line utility, which
|
9
|
+
can be used to filter results and requires no understanding of Ruby.
|
10
|
+
|
11
|
+
== Performance
|
12
|
+
|
13
|
+
XML parsing is expensive. blastxmlparser uses the Nokogiri C, or Java, XML
|
14
|
+
parser, based on libxml2. Basically a DOM parser is used for subsections of a
|
15
|
+
document, tests show this is faster than a SAX parser with Ruby callbacks. To
|
16
|
+
see why libxml2 based Nokogiri is fast, see
|
17
|
+
http://www.rubyinside.com/ruby-xml-performance-benchmarks-1641.html and
|
18
|
+
http://www.xml.com/lpt/a/1703.
|
19
|
+
|
20
|
+
The parser is also designed with other optimizations, such as lazy evaluation,
|
21
|
+
only creating objects when required, and (future) parallelization. When parsing
|
22
|
+
a full BLAST result usually only a few fields are used. By using XPath queries
|
23
|
+
only the relevant fields are queried.
|
24
|
+
|
25
|
+
Timings for parsing test/data/nt_example_blastn.m7 (file size 3.4Mb)
|
26
|
+
|
27
|
+
Nokogiri DOM (default)
|
28
|
+
|
29
|
+
real 0m1.259s
|
30
|
+
user 0m1.052s
|
31
|
+
sys 0m0.144s
|
32
|
+
|
33
|
+
Nokogiri split DOM
|
34
|
+
|
35
|
+
real 0m1.713s
|
36
|
+
user 0m1.444s
|
37
|
+
sys 0m0.160s
|
38
|
+
|
39
|
+
BioRuby ReXML DOM parser
|
40
|
+
|
41
|
+
real 1m14.548s
|
42
|
+
user 1m13.065s
|
43
|
+
sys 0m0.472s
|
44
|
+
|
45
|
+
== Install
|
46
|
+
|
47
|
+
gem install bio-blastxmlparser
|
48
|
+
|
49
|
+
Nokogiri XML parser is required. To install it,
|
50
|
+
the libxml2 libraries and headers need to be installed first, for
|
51
|
+
example on Debian:
|
52
|
+
|
53
|
+
apt-get install libxslt-dev libxml2-dev
|
54
|
+
gem install bio-blastxmlparser
|
55
|
+
|
56
|
+
for more installation on other platforms see
|
57
|
+
http://nokogiri.org/tutorials/installing_nokogiri.html.
|
58
|
+
|
59
|
+
== API
|
60
|
+
|
61
|
+
To loop through a BLAST result:
|
62
|
+
|
63
|
+
>> require 'bio-blastxmlparser'
|
64
|
+
>> fn = 'test/data/nt_example_blastn.m7'
|
65
|
+
>> n = Bio::Blast::XmlIterator.new(fn).to_enum
|
66
|
+
>> n.each do | iter |
|
67
|
+
>> puts "Hits for " + iter.query_id
|
68
|
+
>> iter.each do | hit |
|
69
|
+
>> hit.each do | hsp |
|
70
|
+
>> print hit.hit_id, "\t", hsp.evalue, "\n" if hsp.evalue < 0.001
|
71
|
+
>> end
|
72
|
+
>> end
|
73
|
+
>> end
|
74
|
+
|
75
|
+
The next example parses XML using less memory
|
76
|
+
|
77
|
+
>> blast = XmlSplitterIterator.new(fn).to_enum
|
78
|
+
>> iter = blast.next
|
79
|
+
>> iter.iter_num
|
80
|
+
>> 1
|
81
|
+
>> iter.query_id
|
82
|
+
=> "lcl|1_0"
|
83
|
+
|
84
|
+
Get the first hit
|
85
|
+
|
86
|
+
>> hit = iter.hits.first
|
87
|
+
>> hit.hit_num
|
88
|
+
=> 1
|
89
|
+
>> hit.hit_id
|
90
|
+
=> "lcl|I_74685"
|
91
|
+
>> hit.hit_def
|
92
|
+
=> "[57809 - 57666] (REVERSE SENSE) "
|
93
|
+
>> hit.accession
|
94
|
+
=> "I_74685"
|
95
|
+
>> hit.len
|
96
|
+
=> 144
|
97
|
+
|
98
|
+
Get the parent info
|
99
|
+
|
100
|
+
>> hit.parent.query_id
|
101
|
+
=> "lcl|1_0"
|
102
|
+
|
103
|
+
Get the first Hsp
|
104
|
+
|
105
|
+
>> hsp = hit.hsps.first
|
106
|
+
>> hsp.hsp_num
|
107
|
+
=> 1
|
108
|
+
>> hsp.bit_score
|
109
|
+
=> 145.205
|
110
|
+
>> hsp.score
|
111
|
+
=> 73
|
112
|
+
>> hsp.evalue
|
113
|
+
=> 5.82208e-34
|
114
|
+
>> hsp.query_from
|
115
|
+
=> 28
|
116
|
+
>> hsp.query_to
|
117
|
+
=> 100
|
118
|
+
>> hsp.query_frame
|
119
|
+
=> 1
|
120
|
+
>> hsp.hit_frame
|
121
|
+
=> 1
|
122
|
+
>> hsp.identity
|
123
|
+
=> 73
|
124
|
+
>> hsp.positive
|
125
|
+
=> 73
|
126
|
+
>> hsp.align_len
|
127
|
+
=> 73
|
128
|
+
>> hsp.qseq
|
129
|
+
=> "AGTGAAGCTTCTAGATATTTGGCGGGTACCTCTAATTTTGCCTGCCTGCCAACCTATATGCTCCTGTGTTTAG"
|
130
|
+
>> hsp.hseq
|
131
|
+
=> "AGTGAAGCTTCTAGATATTTGGCGGGTACCTCTAATTTTGCCTGCCTGCCAACCTATATGCTCCTGTGTTTAG"
|
132
|
+
>> hsp.midline
|
133
|
+
=> "|||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||"
|
134
|
+
|
135
|
+
It is possible to use the XML element names, over methods. E.g.
|
136
|
+
|
137
|
+
>> hsp.field("Hsp_bit-score")
|
138
|
+
=> "145.205"
|
139
|
+
>> hsp["Hsp_bit-score"]
|
140
|
+
=> "145.205"
|
141
|
+
|
142
|
+
Note that these are always String values.
|
143
|
+
|
144
|
+
Fetch the next result (Iteration)
|
145
|
+
|
146
|
+
>> iter2 = blast.next
|
147
|
+
>> iter2.iter_num
|
148
|
+
>> 2
|
149
|
+
>> iter2.query_id
|
150
|
+
=> "lcl|2_0"
|
151
|
+
|
152
|
+
etc. etc.
|
153
|
+
|
154
|
+
For more examples see the files in ./spec
|
155
|
+
|
156
|
+
== Usage
|
157
|
+
|
158
|
+
blastxmlparser [options] file(s)
|
159
|
+
|
160
|
+
-p, --parser name Use full|split parser (default full)
|
161
|
+
-n, --named fields Set named fields
|
162
|
+
-e, --exec filter Execute filter
|
163
|
+
|
164
|
+
--logger filename Log to file (default stderr)
|
165
|
+
--trace options Set log level (default INFO, see bio-logger)
|
166
|
+
-q, --quiet Run quietly
|
167
|
+
-v, --verbose Run verbosely
|
168
|
+
--debug Show debug messages
|
169
|
+
-h, --help Show help and examples
|
170
|
+
|
171
|
+
bioblastxmlparser filename(s)
|
172
|
+
|
173
|
+
Use --help switch for more information
|
174
|
+
|
175
|
+
== Examples
|
176
|
+
|
177
|
+
Print result fields of iterations containing 'lcl', using a regex
|
178
|
+
|
179
|
+
blastxmlparser -e 'iter.query_id=~/lcl/' test/data/nt_example_blastn.m7
|
180
|
+
|
181
|
+
Print fields where bit_score > 145
|
182
|
+
|
183
|
+
blastxmlparser -e 'hsp.bit_score>145' test/data/nt_example_blastn.m7
|
184
|
+
|
185
|
+
It is also possible to use the XML element names directly
|
186
|
+
|
187
|
+
blastxmlparser -e 'hsp["Hsp_bit-score"].to_i>145' test/data/nt_example_blastn.m7
|
188
|
+
|
189
|
+
Print named fields where E-value < 0.001 and hit length > 100
|
190
|
+
|
191
|
+
blastxmlparser -n 'hsp.evalue,hsp.qseq' -e 'hsp.evalue<0.01 and hit.len>100' test/data/nt_example_blastn.m7
|
192
|
+
|
193
|
+
1 5.82208e-34 AGTGAAGCTTCTAGATATTTGGCGGGTACCTCTAATTTTGCCT...
|
194
|
+
2 5.82208e-34 AGTGAAGCTTCTAGATATTTGGCGGGTACCTCTAATTTTGCCT...
|
195
|
+
3 2.76378e-11 AATATGGTAGCTACAGAAACGGTAGTACACTCTTC
|
196
|
+
4 1.13373e-13 CTAAACACAGGAGCATATAGGTTGGCAGGCAGGCAAAAT
|
197
|
+
5 2.76378e-11 GAAGAGTGTACTACCGTTTCTGTAGCTACCATATT
|
198
|
+
etc. etc.
|
199
|
+
|
200
|
+
To use the low-mem version use
|
201
|
+
|
202
|
+
blastxmlparser --parser split -n 'hsp.evalue,hsp.qseq' -e 'hsp.evalue<0.01 and hit.len>100' test/data/nt_example_blastn.m7
|
203
|
+
|
204
|
+
== URL
|
205
|
+
|
206
|
+
The project lives at http://github.com/pjotrp/blastxmlparser. If you use this software, please cite http://dx.doi.org/10.1093/bioinformatics/btq475
|
207
|
+
|
208
|
+
== Copyright
|
209
|
+
|
210
|
+
Copyright (c) 2011 Pjotr Prins under the MIT licence. See LICENSE.txt and http://www.opensource.org/licenses/mit-license.html for further details.
|
211
|
+
|
data/Rakefile
ADDED
@@ -0,0 +1,50 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'bundler'
|
3
|
+
begin
|
4
|
+
Bundler.setup(:default, :development)
|
5
|
+
rescue Bundler::BundlerError => e
|
6
|
+
$stderr.puts e.message
|
7
|
+
$stderr.puts "Run `bundle install` to install missing gems"
|
8
|
+
exit e.status_code
|
9
|
+
end
|
10
|
+
require 'rake'
|
11
|
+
|
12
|
+
require 'jeweler'
|
13
|
+
Jeweler::Tasks.new do |gem|
|
14
|
+
# gem is a Gem::Specification... see http://docs.rubygems.org/read/chapter/20 for more options
|
15
|
+
gem.name = "bio-blastxmlparser"
|
16
|
+
gem.homepage = "http://github.com/pjotrp/bioruby-blastxmlparser"
|
17
|
+
gem.license = "MIT"
|
18
|
+
gem.summary = %Q{BLAST XML parser}
|
19
|
+
gem.description = %Q{Fast big data XML parser and library, written in Ruby}
|
20
|
+
gem.email = "pjotr.public01@thebird.nl"
|
21
|
+
gem.authors = ["Pjotr Prins"]
|
22
|
+
# Include your dependencies below. Runtime dependencies are required when using your gem,
|
23
|
+
# and development dependencies are only needed for development (ie running rake tasks, tests, etc)
|
24
|
+
# gem.add_runtime_dependency 'jabber4r', '> 0.1'
|
25
|
+
# gem.add_development_dependency 'rspec', '> 1.2.3'
|
26
|
+
end
|
27
|
+
Jeweler::RubygemsDotOrgTasks.new
|
28
|
+
|
29
|
+
require 'rspec/core'
|
30
|
+
require 'rspec/core/rake_task'
|
31
|
+
RSpec::Core::RakeTask.new(:spec) do |spec|
|
32
|
+
spec.pattern = FileList['spec/**/*_spec.rb']
|
33
|
+
end
|
34
|
+
|
35
|
+
RSpec::Core::RakeTask.new(:rcov) do |spec|
|
36
|
+
spec.pattern = 'spec/**/*_spec.rb'
|
37
|
+
spec.rcov = true
|
38
|
+
end
|
39
|
+
|
40
|
+
task :default => :spec
|
41
|
+
|
42
|
+
require 'rake/rdoctask'
|
43
|
+
Rake::RDocTask.new do |rdoc|
|
44
|
+
version = File.exist?('VERSION') ? File.read('VERSION') : ""
|
45
|
+
|
46
|
+
rdoc.rdoc_dir = 'rdoc'
|
47
|
+
rdoc.title = "bio-blastxmlparser #{version}"
|
48
|
+
rdoc.rdoc_files.include('README*')
|
49
|
+
rdoc.rdoc_files.include('lib/**/*.rb')
|
50
|
+
end
|
data/VERSION
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
0.6.0
|
data/bin/blastxmlparser
ADDED
@@ -0,0 +1,165 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
#
|
3
|
+
# BioRuby bio-blastxmlparser Plugin
|
4
|
+
# Author:: Pjotr Prins
|
5
|
+
# Copyright:: 2011
|
6
|
+
# License:: MIT License
|
7
|
+
#
|
8
|
+
# Copyright (C) 2010,2011 Pjotr Prins <pjotr.prins@thebird.nl>
|
9
|
+
|
10
|
+
rootpath = File.dirname(File.dirname(__FILE__))
|
11
|
+
$: << File.join(rootpath,'lib')
|
12
|
+
|
13
|
+
BLASTXML_VERSION = File.new(File.join(rootpath,'VERSION')).read.chomp
|
14
|
+
|
15
|
+
$stderr.print "BioRuby BLAST XML Parser "+BLASTXML_VERSION+" Copyright (C) 2011 Pjotr Prins <pjotr.prins@thebird.nl>\n\n"
|
16
|
+
|
17
|
+
USAGE = <<EOM
|
18
|
+
|
19
|
+
bioblastxmlparser filename(s)
|
20
|
+
|
21
|
+
Use --help switch for more information
|
22
|
+
|
23
|
+
== Examples
|
24
|
+
|
25
|
+
Print result fields of iterations containing 'lcl', using a regex
|
26
|
+
|
27
|
+
blastxmlparser -e 'iter.query_id=~/lcl/' test/data/nt_example_blastn.m7
|
28
|
+
|
29
|
+
Print fields where bit_score > 145
|
30
|
+
|
31
|
+
blastxmlparser -e 'hsp.bit_score>145' test/data/nt_example_blastn.m7
|
32
|
+
|
33
|
+
It is also possible to use the XML element names directly
|
34
|
+
|
35
|
+
blastxmlparser -e 'hsp["Hsp_bit-score"].to_i>145' test/data/nt_example_blastn.m7
|
36
|
+
|
37
|
+
Print named fields where E-value < 0.001 and hit length > 100
|
38
|
+
|
39
|
+
blastxmlparser -n 'hsp.evalue,hsp.qseq' -e 'hsp.evalue<0.01 and hit.len>100' test/data/nt_example_blastn.m7
|
40
|
+
|
41
|
+
1 5.82208e-34 AGTGAAGCTTCTAGATATTTGGCGGGTACCTCTAATTTTGCCT...
|
42
|
+
2 5.82208e-34 AGTGAAGCTTCTAGATATTTGGCGGGTACCTCTAATTTTGCCT...
|
43
|
+
3 2.76378e-11 AATATGGTAGCTACAGAAACGGTAGTACACTCTTC
|
44
|
+
4 1.13373e-13 CTAAACACAGGAGCATATAGGTTGGCAGGCAGGCAAAAT
|
45
|
+
5 2.76378e-11 GAAGAGTGTACTACCGTTTCTGTAGCTACCATATT
|
46
|
+
etc. etc.
|
47
|
+
|
48
|
+
To use the low-mem version use
|
49
|
+
|
50
|
+
blastxmlparser --parser split -n 'hsp.evalue,hsp.qseq' -e 'hsp.evalue<0.01 and hit.len>100' test/data/nt_example_blastn.m7
|
51
|
+
|
52
|
+
== URL
|
53
|
+
|
54
|
+
The project lives at http://github.com/pjotrp/blastxmlparser. If you use this software, please cite http://dx.doi.org/10.1093/bioinformatics/btq475
|
55
|
+
|
56
|
+
== Copyright
|
57
|
+
|
58
|
+
Copyright (c) 2011 Pjotr Prins under the MIT licence. See LICENSE.txt and http://www.opensource.org/licenses/mit-license.html for further details.
|
59
|
+
|
60
|
+
EOM
|
61
|
+
|
62
|
+
if ARGV.size == 0
|
63
|
+
print USAGE
|
64
|
+
exit 1
|
65
|
+
end
|
66
|
+
|
67
|
+
require 'bio-blastxmlparser'
|
68
|
+
require 'optparse'
|
69
|
+
require 'ostruct'
|
70
|
+
|
71
|
+
require 'bio-logger'
|
72
|
+
Bio::Log::CLI.logger('stderr')
|
73
|
+
Bio::Log::CLI.trace('info')
|
74
|
+
|
75
|
+
options = OpenStruct.new()
|
76
|
+
|
77
|
+
opts = OptionParser.new do |o|
|
78
|
+
|
79
|
+
o.on_tail("-h", "--help", "Show help and examples") {
|
80
|
+
print(opts)
|
81
|
+
print USAGE
|
82
|
+
exit()
|
83
|
+
}
|
84
|
+
|
85
|
+
o.banner = "== Usage\n #{File.basename($0)} [options] file(s)"
|
86
|
+
|
87
|
+
o.separator ""
|
88
|
+
|
89
|
+
o.on("-p name", "--parser name", "Use full|split parser (default full)") do |p|
|
90
|
+
options.parser = p.to_sym
|
91
|
+
end
|
92
|
+
|
93
|
+
o.on("-n fields","--named fields",String, "Set named fields") do |s|
|
94
|
+
options.fields = s.split(/,/)
|
95
|
+
end
|
96
|
+
|
97
|
+
o.on("-e filter","--exec filter",String, "Execute filter") do |s|
|
98
|
+
options.exec = s
|
99
|
+
end
|
100
|
+
|
101
|
+
o.separator ""
|
102
|
+
|
103
|
+
o.on("--logger filename",String,"Log to file (default stderr)") do | name |
|
104
|
+
Bio::Log::CLI.logger(name)
|
105
|
+
end
|
106
|
+
|
107
|
+
o.on("--trace options",String,"Set log level (default INFO, see bio-logger)") do | s |
|
108
|
+
Bio::Log::CLI.trace(s)
|
109
|
+
end
|
110
|
+
|
111
|
+
o.on("-q", "--quiet", "Run quietly") do |q|
|
112
|
+
Bio::Log::CLI.trace('error')
|
113
|
+
end
|
114
|
+
|
115
|
+
o.on("-v", "--verbose", "Run verbosely") do |v|
|
116
|
+
Bio::Log::CLI.trace('info')
|
117
|
+
end
|
118
|
+
|
119
|
+
o.on("--debug", "Show debug messages") do |v|
|
120
|
+
Bio::Log::CLI.trace('debug')
|
121
|
+
end
|
122
|
+
|
123
|
+
end
|
124
|
+
|
125
|
+
begin
|
126
|
+
opts.parse!(ARGV)
|
127
|
+
|
128
|
+
Bio::Log::CLI.configure('bio-blastxmlparser')
|
129
|
+
logger = Bio::Log::LoggerPlus['bio-blastxmlparser']
|
130
|
+
|
131
|
+
ARGV.each do | fn |
|
132
|
+
logger.info("XML parsing #{fn}")
|
133
|
+
n = if options.parser == :split
|
134
|
+
Bio::Blast::XmlSplitterIterator.new(fn).to_enum
|
135
|
+
else
|
136
|
+
Bio::Blast::XmlIterator.new(fn).to_enum
|
137
|
+
end
|
138
|
+
i = 1
|
139
|
+
n.each do | iter |
|
140
|
+
iter.each do | hit |
|
141
|
+
hit.each do | hsp |
|
142
|
+
do_print = if options.exec
|
143
|
+
eval(options.exec)
|
144
|
+
else
|
145
|
+
true
|
146
|
+
end
|
147
|
+
if do_print
|
148
|
+
if options.fields
|
149
|
+
print i,"\t"
|
150
|
+
options.fields.each do | f |
|
151
|
+
print eval(f),"\t"
|
152
|
+
end
|
153
|
+
print "\n"
|
154
|
+
else
|
155
|
+
print [i,iter.iter_num,iter.query_id,hit.hit_id,hsp.hsp_num,hsp.evalue].join("\t"),"\n"
|
156
|
+
end
|
157
|
+
i += 1
|
158
|
+
end
|
159
|
+
end
|
160
|
+
end
|
161
|
+
end
|
162
|
+
end
|
163
|
+
rescue OptionParser::InvalidOption => e
|
164
|
+
opts[:invalid_argument] = e.message
|
165
|
+
end
|