bio-blastxmlparser 1.1.1 → 1.1.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.travis.yml +1 -1
- data/Gemfile +6 -13
- data/Gemfile.lock +44 -10
- data/LICENSE.txt +1 -1
- data/{README.rdoc → README.md} +158 -37
- data/Rakefile +2 -12
- data/VERSION +1 -1
- data/bin/blastxmlparser +26 -72
- data/bio-blastxmlparser.gemspec +29 -26
- data/lib/bio-blastxmlparser.rb +1 -0
- data/lib/bio/writers/rdf.rb +79 -0
- data/template/json.erb +15 -0
- data/template/rdf.erb +21 -0
- metadata +72 -53
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 31b42217bb809cde8d5ef3c06d11c6c9123c6413
|
4
|
+
data.tar.gz: 5d23e19fb8c774f7edaffd03bbcb156800679f7f
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: de99019d564d5ea759f6e3ef330b8e9e68f6a7bbdb0578c34699ad7f716da16562d702da935bcfc5e3baa9a9e673b2ad99a62ae07210c4feea144134ad822e94
|
7
|
+
data.tar.gz: 24bb61197ff82129b404dcaf928b38ac9f7b9d5c10b90a630032253e468adc558c1675639b11c4bc60f7da8082626b3b288c2f0b9e6328e0ef2525b3a79453a8
|
data/.travis.yml
CHANGED
data/Gemfile
CHANGED
@@ -1,21 +1,14 @@
|
|
1
1
|
source "http://rubygems.org"
|
2
|
-
# Add dependencies required to use your gem here.
|
3
|
-
# Example:
|
4
|
-
# gem "activesupport", ">= 2.3.5"
|
5
|
-
|
6
2
|
# Runtime dependencies
|
7
|
-
|
8
|
-
gem "
|
9
|
-
gem "nokogiri", ">= 1.5.0"
|
3
|
+
gem "bio-logger"
|
4
|
+
gem "nokogiri", "~>1.6.0"
|
10
5
|
|
11
6
|
# Add dependencies to develop your gem here.
|
12
7
|
# Include everything needed to run rake, tests, features, etc.
|
13
8
|
group :development do
|
14
|
-
|
15
|
-
gem "rake", ">= 0.9.2.2"
|
9
|
+
gem "rake"
|
16
10
|
gem "bundler"
|
17
|
-
gem "jeweler", "~>
|
18
|
-
gem "rspec"
|
19
|
-
gem "rdoc"
|
20
|
-
# gem "rcov", ">= 0"
|
11
|
+
gem "jeweler", "~> 2.0.1"
|
12
|
+
gem "rspec"
|
13
|
+
gem "rdoc"
|
21
14
|
end
|
data/Gemfile.lock
CHANGED
@@ -1,18 +1,51 @@
|
|
1
1
|
GEM
|
2
2
|
remote: http://rubygems.org/
|
3
3
|
specs:
|
4
|
+
addressable (2.3.6)
|
4
5
|
bio-logger (1.0.0)
|
5
6
|
log4r (>= 1.1.9)
|
7
|
+
builder (3.2.2)
|
8
|
+
descendants_tracker (0.0.4)
|
9
|
+
thread_safe (~> 0.3, >= 0.3.1)
|
6
10
|
diff-lcs (1.1.3)
|
7
|
-
|
8
|
-
|
9
|
-
|
11
|
+
faraday (0.9.0)
|
12
|
+
multipart-post (>= 1.2, < 3)
|
13
|
+
git (1.2.8)
|
14
|
+
github_api (0.12.1)
|
15
|
+
addressable (~> 2.3)
|
16
|
+
descendants_tracker (~> 0.0.4)
|
17
|
+
faraday (~> 0.8, < 0.10)
|
18
|
+
hashie (>= 3.2)
|
19
|
+
multi_json (>= 1.7.5, < 2.0)
|
20
|
+
nokogiri (~> 1.6.3)
|
21
|
+
oauth2
|
22
|
+
hashie (3.3.1)
|
23
|
+
highline (1.6.21)
|
24
|
+
jeweler (2.0.1)
|
25
|
+
builder
|
26
|
+
bundler (>= 1.0)
|
10
27
|
git (>= 1.2.5)
|
28
|
+
github_api
|
29
|
+
highline (>= 1.6.15)
|
30
|
+
nokogiri (>= 1.5.10)
|
11
31
|
rake
|
12
32
|
rdoc
|
13
33
|
json (1.6.5)
|
34
|
+
jwt (1.0.0)
|
14
35
|
log4r (1.1.9)
|
15
|
-
|
36
|
+
mini_portile (0.6.0)
|
37
|
+
multi_json (1.10.1)
|
38
|
+
multi_xml (0.5.5)
|
39
|
+
multipart-post (2.0.0)
|
40
|
+
nokogiri (1.6.3.1)
|
41
|
+
mini_portile (= 0.6.0)
|
42
|
+
oauth2 (1.0.0)
|
43
|
+
faraday (>= 0.8, < 0.10)
|
44
|
+
jwt (~> 1.0)
|
45
|
+
multi_json (~> 1.3)
|
46
|
+
multi_xml (~> 0.5)
|
47
|
+
rack (~> 1.2)
|
48
|
+
rack (1.5.2)
|
16
49
|
rake (0.9.2.2)
|
17
50
|
rdoc (3.12)
|
18
51
|
json (~> 1.4)
|
@@ -24,15 +57,16 @@ GEM
|
|
24
57
|
rspec-expectations (2.8.0)
|
25
58
|
diff-lcs (~> 1.1.2)
|
26
59
|
rspec-mocks (2.8.0)
|
60
|
+
thread_safe (0.3.4)
|
27
61
|
|
28
62
|
PLATFORMS
|
29
63
|
ruby
|
30
64
|
|
31
65
|
DEPENDENCIES
|
32
|
-
bio-logger
|
66
|
+
bio-logger
|
33
67
|
bundler
|
34
|
-
jeweler (~>
|
35
|
-
nokogiri (
|
36
|
-
rake
|
37
|
-
rdoc
|
38
|
-
rspec
|
68
|
+
jeweler (~> 2.0.1)
|
69
|
+
nokogiri (~> 1.6.0)
|
70
|
+
rake
|
71
|
+
rdoc
|
72
|
+
rspec
|
data/LICENSE.txt
CHANGED
data/{README.rdoc → README.md}
RENAMED
@@ -1,41 +1,51 @@
|
|
1
|
-
|
1
|
+
[![Build Status](https://travis-ci.org/pjotrp/blastxmlparser.svg?branch=master)](https://travis-ci.org/pjotrp/blastxmlparser)
|
2
|
+
|
3
|
+
# bio-blastxmlparser
|
2
4
|
|
3
|
-
|
5
|
+
blastxmlparser is a very fast big-data BLAST XML file parser, which can be used
|
6
|
+
as command line utility. Use blastxmlparser to:
|
4
7
|
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
8
|
+
* Parse BLAST XML
|
9
|
+
* Filter output
|
10
|
+
* Generate FASTA, JSON, YAML, RDF, HTML, tabular output etc.
|
11
|
+
|
12
|
+
Rather than loading everything in memory, XML is parsed by BLAST query
|
13
|
+
(Iteration). Not only has this the advantage of low memory use, it also shows
|
14
|
+
results early, and it may be faster when IO continues in parallel (disk
|
15
|
+
read-ahead).
|
11
16
|
|
12
17
|
Next to the API, blastxmlparser comes as a command line utility, which
|
13
18
|
can be used to filter results and requires no understanding of Ruby.
|
14
19
|
|
15
|
-
|
20
|
+
# Quick start
|
16
21
|
|
22
|
+
```sh
|
17
23
|
gem install bio-blastxmlparser
|
18
24
|
blastxmlparser --help
|
25
|
+
```
|
19
26
|
|
20
27
|
(see Installation, below, if it does not work)
|
21
28
|
|
22
|
-
|
29
|
+
## Performance
|
23
30
|
|
24
|
-
XML parsing is expensive. blastxmlparser
|
25
|
-
parsers, based on libxml2. Basically, a DOM parser is used
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
31
|
+
XML parsing is expensive. blastxmlparser can use the fast Nokogiri C, or
|
32
|
+
Java XML parsers, based on libxml2. Basically, a DOM parser is used
|
33
|
+
after splitting the BLAST XML document into subsections.
|
34
|
+
Tests show this is faster than a SAX
|
35
|
+
parser with Ruby callbacks. To see why libxml2 based Nokogiri is
|
36
|
+
fast, see this
|
37
|
+
[benchmark](http://www.rubyinside.com/ruby-xml-performance-benchmarks-1641.html)
|
38
|
+
and [xml.com](http://www.xml.com/lpt/a/1703).
|
30
39
|
|
31
|
-
|
32
|
-
evaluation, i.e
|
33
|
-
version) parallelization. When parsing a full BLAST result
|
34
|
-
only a few fields are used. By using XPath queries
|
35
|
-
fields are queried.
|
40
|
+
Blastxmlparser is designed with other optimizations, such as lazy
|
41
|
+
evaluation, i.e., only creating objects when required, and (in a
|
42
|
+
future version) parallelization. When parsing a full BLAST result
|
43
|
+
usually only a few fields are used. By using XPath queries the parser
|
44
|
+
makes sure only the relevant fields are queried.
|
36
45
|
|
37
46
|
Timings for parsing test/data/nt_example_blastn.m7 (file size 3.4Mb)
|
38
47
|
|
48
|
+
```
|
39
49
|
bio-blastxmlparser + Nokogiri DOM (default)
|
40
50
|
|
41
51
|
real 0m1.259s
|
@@ -53,29 +63,39 @@ Timings for parsing test/data/nt_example_blastn.m7 (file size 3.4Mb)
|
|
53
63
|
real 1m14.548s
|
54
64
|
user 1m13.065s
|
55
65
|
sys 0m0.472s
|
66
|
+
```
|
56
67
|
|
57
|
-
|
68
|
+
## Install
|
58
69
|
|
70
|
+
```sh
|
59
71
|
gem install bio-blastxmlparser
|
72
|
+
```
|
60
73
|
|
61
|
-
Important: the parser is written for Ruby >= 1.9.
|
74
|
+
Important: the parser is written for Ruby >= 1.9. Check with
|
62
75
|
|
76
|
+
```sh
|
63
77
|
ruby -v
|
64
78
|
gem env
|
79
|
+
```
|
65
80
|
|
66
81
|
Nokogiri XML parser is required. To install it,
|
67
82
|
the libxml2 libraries and headers need to be installed first, for
|
68
83
|
example on Debian:
|
69
84
|
|
85
|
+
```sh
|
70
86
|
apt-get install libxslt-dev libxml2-dev
|
71
87
|
gem install bio-blastxmlparser
|
88
|
+
```
|
89
|
+
|
90
|
+
Nokogiri balks when libxml2 or libxslt is missing on your system (or
|
91
|
+
may install something automatically). In the worst case you'll have to
|
92
|
+
provide build paths, as described [here](http://nokogiri.org/tutorials/installing_nokogiri.html).
|
72
93
|
|
73
|
-
|
74
|
-
http://nokogiri.org/tutorials/installing_nokogiri.html.
|
94
|
+
## Command line usage
|
75
95
|
|
76
|
-
|
96
|
+
### Usage
|
77
97
|
|
78
|
-
|
98
|
+
```
|
79
99
|
blastxmlparser [options] file(s)
|
80
100
|
|
81
101
|
-p, --parser name Use full|split parser (default full)
|
@@ -93,23 +113,24 @@ http://nokogiri.org/tutorials/installing_nokogiri.html.
|
|
93
113
|
bioblastxmlparser filename(s)
|
94
114
|
|
95
115
|
Use --help switch for more information
|
116
|
+
```
|
96
117
|
|
97
|
-
|
118
|
+
### Examples
|
98
119
|
|
99
120
|
Print result fields of iterations containing 'lcl', using a regex
|
100
121
|
|
122
|
+
```sh
|
101
123
|
blastxmlparser -e 'iter.query_id=~/lcl/' test/data/nt_example_blastn.m7
|
102
|
-
|
103
|
-
Print fields where bit_score > 145
|
104
|
-
|
105
|
-
blastxmlparser -e 'hsp.bit_score>145' test/data/nt_example_blastn.m7
|
124
|
+
```
|
106
125
|
|
107
126
|
prints a tab delimited
|
108
127
|
|
128
|
+
```sh
|
109
129
|
1 1 lcl|1_0 lcl|I_74685 1 5.82208e-34
|
110
130
|
2 1 lcl|1_0 lcl|I_1 1 5.82208e-34
|
111
131
|
3 2 lcl|2_0 lcl|I_2 1 6.05436e-59
|
112
132
|
4 3 lcl|3_0 lcl|I_3 1 2.03876e-56
|
133
|
+
```
|
113
134
|
|
114
135
|
The second and third column show the BLAST iteration, and the others
|
115
136
|
relate to the hits.
|
@@ -117,11 +138,20 @@ relate to the hits.
|
|
117
138
|
As this is evaluated Ruby, it is also possible to use the XML element
|
118
139
|
names directly
|
119
140
|
|
141
|
+
```sh
|
120
142
|
blastxmlparser -e 'hsp["Hsp_bit-score"].to_i>145' test/data/nt_example_blastn.m7
|
143
|
+
```
|
144
|
+
|
145
|
+
Or the shorter
|
146
|
+
|
147
|
+
```sh
|
148
|
+
blastxmlparser -e 'hsp.bit_score>145' test/data/nt_example_blastn.m7
|
149
|
+
```
|
121
150
|
|
122
151
|
And it is possible to print (non default) named fields where E-value < 0.001
|
123
152
|
and hit length > 100. E.g.
|
124
153
|
|
154
|
+
```sh
|
125
155
|
blastxmlparser -n 'hsp.evalue,hsp.qseq' -e 'hsp.evalue<0.01 and hit.len>100' test/data/nt_example_blastn.m7
|
126
156
|
|
127
157
|
1 5.82208e-34 AGTGAAGCTTCTAGATATTTGGCGGGTACCTCTAATTTTGCCT...
|
@@ -130,28 +160,104 @@ and hit length > 100. E.g.
|
|
130
160
|
4 1.13373e-13 CTAAACACAGGAGCATATAGGTTGGCAGGCAGGCAAAAT
|
131
161
|
5 2.76378e-11 GAAGAGTGTACTACCGTTTCTGTAGCTACCATATT
|
132
162
|
etc. etc.
|
163
|
+
```
|
133
164
|
|
134
165
|
prints the evalue and qseq columns. To output FASTA use --output-fasta
|
135
166
|
|
167
|
+
```sh
|
136
168
|
blastxmlparser --output-fasta -e 'hsp.evalue<0.01 and hit.len>100' test/data/nt_example_blastn.m7
|
169
|
+
```
|
137
170
|
|
138
171
|
which prints matching sequences, where the first field is the accession, followed
|
139
172
|
by query iteration id, and hit_id. E.g.
|
140
173
|
|
174
|
+
```sh
|
141
175
|
>I_74685 1|lcl|1_0 lcl|I_74685 [57809 - 57666] (REVERSE SENSE)
|
142
176
|
AGTGAAGCTTCTAGATATTTGGCGGGTACCTCTAATTTTGCCTGCCTGCCAACCTATATGCTCCTGTGTTTAG
|
143
177
|
>I_1 1|lcl|1_0 lcl|I_1 [477 - 884]
|
144
178
|
AGTGAAGCTTCTAGATATTTGGCGGGTACCTCTAATTTTGCCTGCCTGCCAACCTATATGCTCCTGTGTTTAG
|
145
179
|
etc. etc.
|
180
|
+
```
|
181
|
+
|
182
|
+
## Modify output
|
183
|
+
|
184
|
+
To have more output options blastxmlparser can use an [ERB
|
185
|
+
template](http://www.stuartellis.eu/articles/erb/) for every match. This is a
|
186
|
+
very flexible option that can output textual formats such as JSON, YAML, HTML
|
187
|
+
and RDF. Examples are provided in
|
188
|
+
[./templates](https://github.com/pjotrp/bioruby-vcf/templates/). A JSON
|
189
|
+
template could be
|
190
|
+
|
191
|
+
```Javascript
|
192
|
+
{ "<%= hit.parent.query_def %>": {
|
193
|
+
"num": <%= hit.hit_num %>,
|
194
|
+
"id": "<%= hit.hit_id %>",
|
195
|
+
"len": <%= hit.len %>,
|
196
|
+
"E-value": <%= hsp.evalue %>,
|
197
|
+
"bitscore": <%= hsp.bit_score %>,
|
198
|
+
"qseq": "<%= hsp.qseq %>",
|
199
|
+
"midline": "<%= hsp.midline %>",
|
200
|
+
"hseq": "<%= hsp.hseq %>",
|
201
|
+
};
|
202
|
+
```
|
203
|
+
|
204
|
+
To get JSON, run it with
|
205
|
+
|
206
|
+
```sh
|
207
|
+
blastxmlparser --template template/json.erb -e 'hsp.evalue<0.01 and hit.len>100' test/data/nt_example_blastn.m7
|
208
|
+
```
|
209
|
+
|
210
|
+
```Javascript
|
211
|
+
{ "B0511.9d gene=WBGene00015235": {
|
212
|
+
"num": 5,
|
213
|
+
"id": "gi|268566471|ref|XP_002639731.1|",
|
214
|
+
"len": 199,
|
215
|
+
"E-value": 1.72502e-22,
|
216
|
+
"bitscore": 96.6709,
|
217
|
+
"qseq": "MSMLRRPLTQLELSVI------------------VPKCXXXXXXXXXXXXQSEPPRGITRRNLRSADRKNRDVPGPSTGECTRTSIAPNRCEMSFTEVQ-TLTSARTPVAAPTLTLSTPVNPVSSAEMLX----XXXXXXXXXXXASRSGDNDSPLLFNAYDTPQQ--GINXXXXXXXXXXXXXNAHLYAXXXXXXXXXXXXXXXXRSHRH",
|
218
|
+
"midline": "MSMLRRPLTQLEL K QSEP GI++RNLRSADR+ +DVPG ++GE + FT+ +++SARTPV+ ++ LSTPVNP SS EM+ SR + D PL+FNAYDTPQQ G + NAHLY+ RS RH",
|
219
|
+
"hseq": "MSMLRRPLTQLELCEDDIQWLSEQLAKKETGFEDEVKYEVMDVDEDEPMDQSEPTGGISKRNLRSADRRKKDVPG-TSGEGAQ-----------FTDQGLSISSARTPVSGASVNLSTPVNPSSSNEMMALPPPVRLARAGRRQRDSRVVNGDVPLMFNAYDTPQQPAGGSNGSPTPSDSPESPNAHLYSTPINPTSSSGGPSSNTRSQRH",
|
220
|
+
};
|
221
|
+
```
|
222
|
+
|
223
|
+
Likewise, using the RDF template
|
224
|
+
|
225
|
+
```sh
|
226
|
+
blastxmlparser --template template/rdf.erb -e 'hsp.evalue<0.01 and hit.len>100' test/data/nt_example_blastn.m7
|
227
|
+
```
|
228
|
+
|
229
|
+
```ruby
|
230
|
+
:Minc_Contig50_77_42056___42484_1_64492 :query :Minc_Contig50_77_42056___42484_1_64492_23
|
231
|
+
:Minc_Contig50_77_42056___42484_1_64492_23
|
232
|
+
:query_id "lcl|30_0",
|
233
|
+
:query_def "Minc_Contig50_77 [42056 - 42484] 1 64492",
|
234
|
+
:num 23,
|
235
|
+
:accession "Minc02032",
|
236
|
+
:id "lcl|Minc02032",
|
237
|
+
:len 147,
|
238
|
+
:E-value 8.1089e-12,
|
239
|
+
:identity 60,
|
240
|
+
:align_len 69,
|
241
|
+
:bitscore 69.8753,
|
242
|
+
:qseq "ATGGGAGATGGAATTGAACCGTCATGGAAAGGGCCCAAACCGAAGCACAACCGACTGTGCCACCATCCA",
|
243
|
+
:midline "|||||||||||||||||||| |||||||| | |||||||||||||||||||||||||||||||",
|
244
|
+
:hseq "ATGGGAGATGGAATTGAACCATCATGGAATG-------ACCGAAGCACAACCGACTGTGCCACCATCCA",
|
245
|
+
:evalue 8.1089e-12 .
|
246
|
+
```
|
247
|
+
|
248
|
+
## Additional options
|
146
249
|
|
147
250
|
To use the low-mem (iterated slower) version of the parser use
|
148
251
|
|
252
|
+
```sh
|
149
253
|
blastxmlparser --parser split -n 'hsp.evalue,hsp.qseq' -e 'hsp.evalue<0.01 and hit.len>100' test/data/nt_example_blastn.m7
|
254
|
+
```
|
150
255
|
|
151
|
-
|
256
|
+
## API (Ruby library)
|
152
257
|
|
153
258
|
To loop through a BLAST result:
|
154
259
|
|
260
|
+
```ruby
|
155
261
|
>> require 'bio-blastxmlparser'
|
156
262
|
>> fn = 'test/data/nt_example_blastn.m7'
|
157
263
|
>> n = Bio::BlastXMLParser::XmlIterator.new(fn).to_enum
|
@@ -163,19 +269,23 @@ To loop through a BLAST result:
|
|
163
269
|
>> end
|
164
270
|
>> end
|
165
271
|
>> end
|
272
|
+
```
|
166
273
|
|
167
274
|
The next example parses XML using less memory by using a Ruby
|
168
275
|
Iterator
|
169
276
|
|
277
|
+
```ruby
|
170
278
|
>> blast = Bio::BlastXMLParser::XmlSplitterIterator.new(fn).to_enum
|
171
279
|
>> iter = blast.next
|
172
280
|
>> iter.iter_num
|
173
281
|
=> 1
|
174
282
|
>> iter.query_id
|
175
283
|
=> "lcl|1_0"
|
284
|
+
```
|
176
285
|
|
177
286
|
Get the first hit
|
178
287
|
|
288
|
+
```ruby
|
179
289
|
>> hit = iter.hits.first
|
180
290
|
>> hit.hit_num
|
181
291
|
=> 1
|
@@ -187,14 +297,18 @@ Get the first hit
|
|
187
297
|
=> "I_74685"
|
188
298
|
>> hit.len
|
189
299
|
=> 144
|
300
|
+
```
|
190
301
|
|
191
302
|
Get the parent info
|
192
303
|
|
304
|
+
```ruby
|
193
305
|
>> hit.parent.query_id
|
194
306
|
=> "lcl|1_0"
|
195
|
-
|
307
|
+
```
|
308
|
+
|
196
309
|
Get the first Hsp
|
197
310
|
|
311
|
+
```ruby
|
198
312
|
>> hsp = hit.hsps.first
|
199
313
|
>> hsp.hsp_num
|
200
314
|
=> 1
|
@@ -224,6 +338,7 @@ Get the first Hsp
|
|
224
338
|
=> "AGTGAAGCTTCTAGATATTTGGCGGGTACCTCTAATTTTGCCTGCCTGCCAACCTATATGCTCCTGTGTTTAG"
|
225
339
|
>> hsp.midline
|
226
340
|
=> "|||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||"
|
341
|
+
```
|
227
342
|
|
228
343
|
Unlike BioRuby, this module uses the actual element names in the XML
|
229
344
|
definition, to avoid confusion (if anyone wants a translation,
|
@@ -232,30 +347,36 @@ feel free to contribute an adaptor).
|
|
232
347
|
It is also possible to use the XML element names as Strings, rather
|
233
348
|
than methods. E.g.
|
234
349
|
|
350
|
+
```ruby
|
235
351
|
>> hsp.field("Hsp_bit-score")
|
236
352
|
=> "145.205"
|
237
353
|
>> hsp["Hsp_bit-score"]
|
238
354
|
=> "145.205"
|
355
|
+
```
|
239
356
|
|
240
357
|
Note that, when using the element names, the results are always String values.
|
241
358
|
|
242
359
|
Fetch the next result (Iteration)
|
243
360
|
|
361
|
+
```ruby
|
244
362
|
>> iter2 = blast.next
|
245
363
|
>> iter2.iter_num
|
246
364
|
>> 2
|
247
365
|
>> iter2.query_id
|
248
366
|
=> "lcl|2_0"
|
367
|
+
```
|
249
368
|
|
250
369
|
etc. etc.
|
251
370
|
|
252
371
|
For more examples see the files in ./spec
|
253
372
|
|
254
|
-
|
373
|
+
## URL
|
255
374
|
|
256
375
|
The project lives at http://github.com/pjotrp/blastxmlparser. If you use this software, please cite http://dx.doi.org/10.1093/bioinformatics/btq475
|
257
376
|
|
258
|
-
|
377
|
+
blastxmlparser is listed at http://biogems.info
|
378
|
+
|
379
|
+
## Copyright
|
259
380
|
|
260
|
-
Copyright (c) 2011
|
381
|
+
Copyright (c) 2011-2014 Pjotr Prins under the MIT licence. See LICENSE.txt and http://www.opensource.org/licenses/mit-license.html for further details.
|
261
382
|
|
data/Rakefile
CHANGED
@@ -15,14 +15,10 @@ Jeweler::Tasks.new do |gem|
|
|
15
15
|
gem.name = "bio-blastxmlparser"
|
16
16
|
gem.homepage = "http://github.com/pjotrp/blastxmlparser"
|
17
17
|
gem.license = "MIT"
|
18
|
-
gem.summary = %Q{Very fast BLAST XML
|
19
|
-
gem.description = %Q{Fast big data BLAST XML parser and library; this libxml2 based version is 50x faster than BioRuby}
|
18
|
+
gem.summary = %Q{Very fast BLAST XML to RDF/HTML/JSON/YAML/csv transformer}
|
19
|
+
gem.description = %Q{Fast big data BLAST XML parser and library; this libxml2 based version is 50x faster than BioRuby and comes with a nice CLI}
|
20
20
|
gem.email = "pjotr.public01@thebird.nl"
|
21
21
|
gem.authors = ["Pjotr Prins"]
|
22
|
-
# Include your dependencies below. Runtime dependencies are required when using your gem,
|
23
|
-
# and development dependencies are only needed for development (ie running rake tasks, tests, etc)
|
24
|
-
# gem.add_runtime_dependency 'jabber4r', '> 0.1'
|
25
|
-
# gem.add_development_dependency 'rspec', '> 1.2.3'
|
26
22
|
end
|
27
23
|
Jeweler::RubygemsDotOrgTasks.new
|
28
24
|
|
@@ -32,17 +28,11 @@ RSpec::Core::RakeTask.new(:spec) do |spec|
|
|
32
28
|
spec.pattern = FileList['spec/**/*_spec.rb']
|
33
29
|
end
|
34
30
|
|
35
|
-
# RSpec::Core::RakeTask.new(:rcov) do |spec|
|
36
|
-
# spec.pattern = 'spec/**/*_spec.rb'
|
37
|
-
# spec.rcov = true
|
38
|
-
# end
|
39
|
-
|
40
31
|
task :default => [ :test, :spec ]
|
41
32
|
|
42
33
|
require 'rake/testtask'
|
43
34
|
Rake::TestTask.new(:test) do |test|
|
44
35
|
test.libs << 'lib' << 'test'
|
45
|
-
# test.pattern = 'test/**/test_*.rb' # breaks in 1.9.3
|
46
36
|
test.test_files = Dir.glob("test/**/test_*.rb")
|
47
37
|
test.verbose = true
|
48
38
|
end
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
1.1.
|
1
|
+
1.1.2
|
data/bin/blastxmlparser
CHANGED
@@ -4,83 +4,21 @@
|
|
4
4
|
# Author:: Pjotr Prins
|
5
5
|
# License:: MIT License
|
6
6
|
#
|
7
|
-
# Copyright (C) 2010-
|
7
|
+
# Copyright (C) 2010-2014 Pjotr Prins <pjotr.prins@thebird.nl>
|
8
8
|
|
9
9
|
rootpath = File.dirname(File.dirname(__FILE__))
|
10
10
|
$: << File.join(rootpath,'lib')
|
11
11
|
|
12
12
|
BLASTXML_VERSION = File.new(File.join(rootpath,'VERSION')).read.chomp
|
13
13
|
|
14
|
-
$stderr.print "
|
14
|
+
$stderr.print "BLAST XML Parser "+BLASTXML_VERSION+" Copyright (C) 2014 Pjotr Prins <pjotr.prins@thebird.nl>\n\n"
|
15
15
|
|
16
16
|
USAGE = <<EOM
|
17
17
|
|
18
|
-
|
18
|
+
blastxmlparser filename(s)
|
19
19
|
|
20
20
|
Use --help switch for more information
|
21
21
|
|
22
|
-
== Examples
|
23
|
-
|
24
|
-
Print result fields of iterations containing 'lcl', using a regex
|
25
|
-
|
26
|
-
blastxmlparser -e 'iter.query_id=~/lcl/' test/data/nt_example_blastn.m7
|
27
|
-
|
28
|
-
Print fields where bit_score > 145
|
29
|
-
|
30
|
-
blastxmlparser -e 'hsp.bit_score>145' test/data/nt_example_blastn.m7
|
31
|
-
|
32
|
-
prints a tab delimited
|
33
|
-
|
34
|
-
1 1 lcl|1_0 lcl|I_74685 1 5.82208e-34
|
35
|
-
2 1 lcl|1_0 lcl|I_1 1 5.82208e-34
|
36
|
-
3 2 lcl|2_0 lcl|I_2 1 6.05436e-59
|
37
|
-
4 3 lcl|3_0 lcl|I_3 1 2.03876e-56
|
38
|
-
|
39
|
-
The second and third column show the BLAST iteration, and the others
|
40
|
-
relate to the hits.
|
41
|
-
|
42
|
-
As this is evaluated Ruby, it is also possible to use the XML element
|
43
|
-
names directly
|
44
|
-
|
45
|
-
blastxmlparser -e 'hsp["Hsp_bit-score"].to_i>145' test/data/nt_example_blastn.m7
|
46
|
-
|
47
|
-
And it is possible to print (non default) named fields where E-value < 0.001
|
48
|
-
and hit length > 100. E.g.
|
49
|
-
|
50
|
-
blastxmlparser -n 'hsp.evalue,hsp.qseq' -e 'hsp.evalue<0.01 and hit.len>100' test/data/nt_example_blastn.m7
|
51
|
-
|
52
|
-
1 5.82208e-34 AGTGAAGCTTCTAGATATTTGGCGGGTACCTCTAATTTTGCCT...
|
53
|
-
2 5.82208e-34 AGTGAAGCTTCTAGATATTTGGCGGGTACCTCTAATTTTGCCT...
|
54
|
-
3 2.76378e-11 AATATGGTAGCTACAGAAACGGTAGTACACTCTTC
|
55
|
-
4 1.13373e-13 CTAAACACAGGAGCATATAGGTTGGCAGGCAGGCAAAAT
|
56
|
-
5 2.76378e-11 GAAGAGTGTACTACCGTTTCTGTAGCTACCATATT
|
57
|
-
etc. etc.
|
58
|
-
|
59
|
-
prints the evalue and qseq columns. To output FASTA use --output-fasta
|
60
|
-
|
61
|
-
blastxmlparser --output-fasta -e 'hsp.evalue<0.01 and hit.len>100' test/data/nt_example_blastn.m7
|
62
|
-
|
63
|
-
which prints matching sequences, where the first field is the accession, followed
|
64
|
-
by query iteration id, and hit_id. E.g.
|
65
|
-
|
66
|
-
>I_74685 1|lcl|1_0 lcl|I_74685 [57809 - 57666] (REVERSE SENSE)
|
67
|
-
AGTGAAGCTTCTAGATATTTGGCGGGTACCTCTAATTTTGCCTGCCTGCCAACCTATATGCTCCTGTGTTTAG
|
68
|
-
>I_1 1|lcl|1_0 lcl|I_1 [477 - 884]
|
69
|
-
AGTGAAGCTTCTAGATATTTGGCGGGTACCTCTAATTTTGCCTGCCTGCCAACCTATATGCTCCTGTGTTTAG
|
70
|
-
etc. etc.
|
71
|
-
|
72
|
-
To use the low-mem (iterated slower) version of the parser use
|
73
|
-
|
74
|
-
blastxmlparser --parser split -n 'hsp.evalue,hsp.qseq' -e 'hsp.evalue<0.01 and hit.len>100' test/data/nt_example_blastn.m7
|
75
|
-
|
76
|
-
== URL
|
77
|
-
|
78
|
-
The project lives at http://github.com/pjotrp/blastxmlparser. If you use this software, please cite http://dx.doi.org/10.1093/bioinformatics/btq475
|
79
|
-
|
80
|
-
== Copyright
|
81
|
-
|
82
|
-
Copyright (c) 2011 Pjotr Prins under the MIT licence. See LICENSE.txt and http://www.opensource.org/licenses/mit-license.html for further details.
|
83
|
-
|
84
22
|
EOM
|
85
23
|
|
86
24
|
if ARGV.size == 0
|
@@ -114,16 +52,22 @@ opts = OptionParser.new do |o|
|
|
114
52
|
options.parser = p.to_sym
|
115
53
|
end
|
116
54
|
|
117
|
-
o.on("
|
118
|
-
options.
|
55
|
+
o.on("-e filter","--exec filter",String, "Evaluate filter") do |s|
|
56
|
+
options.exec = s
|
119
57
|
end
|
120
|
-
|
121
|
-
o.
|
58
|
+
|
59
|
+
o.separator ""
|
60
|
+
|
61
|
+
o.on("-n fields","--named fields",String, "Print named fields") do |s|
|
122
62
|
options.fields = s.split(/,/)
|
123
63
|
end
|
64
|
+
o.on("--output-fasta","Output FASTA") do |b|
|
65
|
+
options.output_fasta = true
|
66
|
+
end
|
124
67
|
|
125
|
-
o.on("-
|
126
|
-
|
68
|
+
o.on("-t erb","--template erb",String, "Use ERB template for output") do |s|
|
69
|
+
require 'erb'
|
70
|
+
options.template = s
|
127
71
|
end
|
128
72
|
|
129
73
|
o.separator ""
|
@@ -156,6 +100,13 @@ begin
|
|
156
100
|
Bio::Log::CLI.configure('bio-blastxmlparser')
|
157
101
|
logger = Bio::Log::LoggerPlus['bio-blastxmlparser']
|
158
102
|
|
103
|
+
if options[:template]
|
104
|
+
include BioRdf
|
105
|
+
fn = options.template
|
106
|
+
raise "No template #{fn}!" if not File.exist?(fn)
|
107
|
+
template = ERB.new(File.read(fn))
|
108
|
+
end
|
109
|
+
|
159
110
|
ARGV.each do | fn |
|
160
111
|
logger.info("XML parsing #{fn}")
|
161
112
|
n = if options.parser == :split
|
@@ -173,10 +124,13 @@ begin
|
|
173
124
|
true
|
174
125
|
end
|
175
126
|
if do_print
|
176
|
-
if
|
127
|
+
if template
|
128
|
+
print template.result(binding)
|
129
|
+
elsif options.output_fasta
|
177
130
|
print ">"+hit.accession+' '+iter.iter_num.to_s+'|'+iter.query_id+' '+hit.hit_id+' '+hit.hit_def+"\n"
|
178
131
|
print hsp.qseq+"\n"
|
179
132
|
else
|
133
|
+
# Default output
|
180
134
|
if options.fields
|
181
135
|
print i,"\t"
|
182
136
|
options.fields.each do | f |
|
data/bio-blastxmlparser.gemspec
CHANGED
@@ -5,17 +5,17 @@
|
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
7
|
s.name = "bio-blastxmlparser"
|
8
|
-
s.version = "1.1.
|
8
|
+
s.version = "1.1.2"
|
9
9
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
11
|
s.authors = ["Pjotr Prins"]
|
12
|
-
s.date = "
|
13
|
-
s.description = "Fast big data BLAST XML parser and library; this libxml2 based version is 50x faster than BioRuby"
|
12
|
+
s.date = "2014-09-02"
|
13
|
+
s.description = "Fast big data BLAST XML parser and library; this libxml2 based version is 50x faster than BioRuby and comes with a nice CLI"
|
14
14
|
s.email = "pjotr.public01@thebird.nl"
|
15
15
|
s.executables = ["blastxmlparser"]
|
16
16
|
s.extra_rdoc_files = [
|
17
17
|
"LICENSE.txt",
|
18
|
-
"README.
|
18
|
+
"README.md"
|
19
19
|
]
|
20
20
|
s.files = [
|
21
21
|
".document",
|
@@ -24,7 +24,7 @@ Gem::Specification.new do |s|
|
|
24
24
|
"Gemfile",
|
25
25
|
"Gemfile.lock",
|
26
26
|
"LICENSE.txt",
|
27
|
-
"README.
|
27
|
+
"README.md",
|
28
28
|
"Rakefile",
|
29
29
|
"VERSION",
|
30
30
|
"bin/blastxmlparser",
|
@@ -33,6 +33,7 @@ Gem::Specification.new do |s|
|
|
33
33
|
"lib/bio/db/blast/parser/nokogiri.rb",
|
34
34
|
"lib/bio/db/blast/xmliterator.rb",
|
35
35
|
"lib/bio/db/blast/xmlsplitter.rb",
|
36
|
+
"lib/bio/writers/rdf.rb",
|
36
37
|
"sample/bioruby.rb",
|
37
38
|
"sample/blastxmlparserdemo.rb",
|
38
39
|
"sample/libxml_sax.rb",
|
@@ -41,6 +42,8 @@ Gem::Specification.new do |s|
|
|
41
42
|
"sample/nokogiri_split_dom.rb",
|
42
43
|
"spec/bio-blastxmlparser_spec.rb",
|
43
44
|
"spec/spec_helper.rb",
|
45
|
+
"template/json.erb",
|
46
|
+
"template/rdf.erb",
|
44
47
|
"test/data/aa_example.fasta",
|
45
48
|
"test/data/aa_example_blastp.m7",
|
46
49
|
"test/data/nt_example.fasta",
|
@@ -50,37 +53,37 @@ Gem::Specification.new do |s|
|
|
50
53
|
s.homepage = "http://github.com/pjotrp/blastxmlparser"
|
51
54
|
s.licenses = ["MIT"]
|
52
55
|
s.require_paths = ["lib"]
|
53
|
-
s.rubygems_version = "
|
54
|
-
s.summary = "Very fast BLAST XML
|
56
|
+
s.rubygems_version = "2.0.3"
|
57
|
+
s.summary = "Very fast BLAST XML to RDF/HTML/JSON/YAML/csv transformer"
|
55
58
|
|
56
59
|
if s.respond_to? :specification_version then
|
57
|
-
s.specification_version =
|
60
|
+
s.specification_version = 4
|
58
61
|
|
59
62
|
if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
|
60
|
-
s.add_runtime_dependency(%q<bio-logger>, [">=
|
61
|
-
s.add_runtime_dependency(%q<nokogiri>, ["
|
62
|
-
s.add_development_dependency(%q<rake>, [">= 0
|
63
|
+
s.add_runtime_dependency(%q<bio-logger>, [">= 0"])
|
64
|
+
s.add_runtime_dependency(%q<nokogiri>, ["~> 1.6.0"])
|
65
|
+
s.add_development_dependency(%q<rake>, [">= 0"])
|
63
66
|
s.add_development_dependency(%q<bundler>, [">= 0"])
|
64
|
-
s.add_development_dependency(%q<jeweler>, ["~>
|
65
|
-
s.add_development_dependency(%q<rspec>, [">=
|
66
|
-
s.add_development_dependency(%q<rdoc>, [">=
|
67
|
+
s.add_development_dependency(%q<jeweler>, ["~> 2.0.1"])
|
68
|
+
s.add_development_dependency(%q<rspec>, [">= 0"])
|
69
|
+
s.add_development_dependency(%q<rdoc>, [">= 0"])
|
67
70
|
else
|
68
|
-
s.add_dependency(%q<bio-logger>, [">=
|
69
|
-
s.add_dependency(%q<nokogiri>, ["
|
70
|
-
s.add_dependency(%q<rake>, [">= 0
|
71
|
+
s.add_dependency(%q<bio-logger>, [">= 0"])
|
72
|
+
s.add_dependency(%q<nokogiri>, ["~> 1.6.0"])
|
73
|
+
s.add_dependency(%q<rake>, [">= 0"])
|
71
74
|
s.add_dependency(%q<bundler>, [">= 0"])
|
72
|
-
s.add_dependency(%q<jeweler>, ["~>
|
73
|
-
s.add_dependency(%q<rspec>, [">=
|
74
|
-
s.add_dependency(%q<rdoc>, [">=
|
75
|
+
s.add_dependency(%q<jeweler>, ["~> 2.0.1"])
|
76
|
+
s.add_dependency(%q<rspec>, [">= 0"])
|
77
|
+
s.add_dependency(%q<rdoc>, [">= 0"])
|
75
78
|
end
|
76
79
|
else
|
77
|
-
s.add_dependency(%q<bio-logger>, [">=
|
78
|
-
s.add_dependency(%q<nokogiri>, ["
|
79
|
-
s.add_dependency(%q<rake>, [">= 0
|
80
|
+
s.add_dependency(%q<bio-logger>, [">= 0"])
|
81
|
+
s.add_dependency(%q<nokogiri>, ["~> 1.6.0"])
|
82
|
+
s.add_dependency(%q<rake>, [">= 0"])
|
80
83
|
s.add_dependency(%q<bundler>, [">= 0"])
|
81
|
-
s.add_dependency(%q<jeweler>, ["~>
|
82
|
-
s.add_dependency(%q<rspec>, [">=
|
83
|
-
s.add_dependency(%q<rdoc>, [">=
|
84
|
+
s.add_dependency(%q<jeweler>, ["~> 2.0.1"])
|
85
|
+
s.add_dependency(%q<rspec>, [">= 0"])
|
86
|
+
s.add_dependency(%q<rdoc>, [">= 0"])
|
84
87
|
end
|
85
88
|
end
|
86
89
|
|
data/lib/bio-blastxmlparser.rb
CHANGED
@@ -0,0 +1,79 @@
|
|
1
|
+
# RDF support module. Original is part of bioruby-rdf by Pjotr Prins
|
2
|
+
#
|
3
|
+
module BioRdf
|
4
|
+
|
5
|
+
module RDF
|
6
|
+
|
7
|
+
def RDF::valid_uri? uri
|
8
|
+
uri =~ /^([!#$&-;=?_a-z~]|%[0-9a-f]{2})+$/i
|
9
|
+
end
|
10
|
+
|
11
|
+
def RDF::escape_string_literal(literal)
|
12
|
+
s = literal.to_s
|
13
|
+
# Put a slash before every double quote if there is no such slash already
|
14
|
+
s = s.gsub(/(?<!\\)"/,'\"')
|
15
|
+
# Put a slash before a single slash if it is not \["utnr>\]
|
16
|
+
if s =~ /[^\\]\\[^\\]/
|
17
|
+
s2 = []
|
18
|
+
s.each_char.with_index { |c,i|
|
19
|
+
res = c
|
20
|
+
if i>0 and c == '\\' and s[i-1] != '\\' and s[i+1] !~ /^[uUtnr\\"]/
|
21
|
+
res = '\\' + c
|
22
|
+
end
|
23
|
+
# p [i,c,s[i+1],res]
|
24
|
+
s2 << res
|
25
|
+
}
|
26
|
+
s = s2.join('')
|
27
|
+
end
|
28
|
+
s
|
29
|
+
end
|
30
|
+
|
31
|
+
def RDF::stringify_literal(literal)
|
32
|
+
RDF::escape_string_literal(literal.to_s)
|
33
|
+
end
|
34
|
+
|
35
|
+
def RDF::quoted_stringify_literal(literal)
|
36
|
+
'"' + stringify_literal(literal) + '"'
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
40
|
+
module Turtle
|
41
|
+
|
42
|
+
def Turtle::stringify_literal(literal)
|
43
|
+
RDF::stringify_literal(literal)
|
44
|
+
end
|
45
|
+
|
46
|
+
def Turtle::identifier(id)
|
47
|
+
raise "Illegal identifier #{id}" if id != Turtle::mangle_identifier(id)
|
48
|
+
end
|
49
|
+
|
50
|
+
# Replace letters/symbols that are not allowed in a Turtle identifier
|
51
|
+
# (short hand URI). This should be the definite mangler and replace the
|
52
|
+
# ones in bioruby-table and bio-exominer. Manglers are useful when using
|
53
|
+
# data from other sources and trying to transform them into simple RDF
|
54
|
+
# identifiers.
|
55
|
+
|
56
|
+
def Turtle::mangle_identifier(s)
|
57
|
+
id = s.strip.gsub(/[^[:print:]]/, '').gsub(/[#)(,]/,"").gsub(/[%]/,"perc").gsub(/(\s|\.|\$|\/|\\|\>)+/,"_")
|
58
|
+
id = id.gsub(/\[|\]/,'')
|
59
|
+
# id = URI::escape(id)
|
60
|
+
id = id.gsub(/\|/,'_')
|
61
|
+
id = id.gsub(/\-|:/,'_')
|
62
|
+
if id != s
|
63
|
+
# Don't want Bio depency in templates!
|
64
|
+
# logger = Bio::Log::LoggerPlus.new 'bio-rdf'
|
65
|
+
# logger.warn "\nWARNING: Changed identifier <#{s}> to <#{id}>"
|
66
|
+
$stderr.print "\nWARNING: Changed identifier <#{s}> to <#{id}>"
|
67
|
+
end
|
68
|
+
if not RDF::valid_uri?(id)
|
69
|
+
raise "Invalid URI after mangling <#{s}> to <#{id}>!"
|
70
|
+
end
|
71
|
+
valid_id = if id =~ /^\d/
|
72
|
+
'r' + id
|
73
|
+
else
|
74
|
+
id
|
75
|
+
end
|
76
|
+
valid_id # we certainly hope so!
|
77
|
+
end
|
78
|
+
end
|
79
|
+
end
|
data/template/json.erb
ADDED
@@ -0,0 +1,15 @@
|
|
1
|
+
{ "<%= hit.parent.query_def %>": {
|
2
|
+
"query_id": "<%= hit.parent.query_id %>",
|
3
|
+
"num": <%= hit.hit_num %>,
|
4
|
+
"accession": "<%= hit.accession %>",
|
5
|
+
"id": "<%= hit.hit_id %>",
|
6
|
+
"len": <%= hit.len %>,
|
7
|
+
"E-value": <%= hsp.evalue %>,
|
8
|
+
"identity": <%= hsp.identity %>,
|
9
|
+
"align_len": <%= hsp.align_len %>,
|
10
|
+
"bitscore": <%= hsp.bit_score %>,
|
11
|
+
"qseq": "<%= hsp.qseq %>",
|
12
|
+
"midline": "<%= hsp.midline %>",
|
13
|
+
"hseq": "<%= hsp.hseq %>",
|
14
|
+
};
|
15
|
+
|
data/template/rdf.erb
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
<%
|
2
|
+
blastid = Turtle::mangle_identifier(hit.parent.query_def)
|
3
|
+
id = blastid+'_'+hit.hit_num.to_s
|
4
|
+
%>
|
5
|
+
:<%= blastid %> :query :<%= id %>
|
6
|
+
:<%= id %>
|
7
|
+
:query_id "<%= hit.parent.query_id %>",
|
8
|
+
:query_def "<%= hit.parent.query_def %>",
|
9
|
+
:num <%= hit.hit_num %>,
|
10
|
+
:accession "<%= hit.accession %>",
|
11
|
+
:id "<%= hit.hit_id %>",
|
12
|
+
:len <%= hit.len %>,
|
13
|
+
:E-value <%= hsp.evalue %>,
|
14
|
+
:identity <%= hsp.identity %>,
|
15
|
+
:align_len <%= hsp.align_len %>,
|
16
|
+
:bitscore <%= hsp.bit_score %>,
|
17
|
+
:qseq "<%= hsp.qseq %>",
|
18
|
+
:midline "<%= hsp.midline %>",
|
19
|
+
:hseq "<%= hsp.hseq %>",
|
20
|
+
:evalue <%= hsp.evalue %> .
|
21
|
+
|
metadata
CHANGED
@@ -1,110 +1,130 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: bio-blastxmlparser
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.1.
|
5
|
-
prerelease:
|
4
|
+
version: 1.1.2
|
6
5
|
platform: ruby
|
7
6
|
authors:
|
8
7
|
- Pjotr Prins
|
9
8
|
autorequire:
|
10
9
|
bindir: bin
|
11
10
|
cert_chain: []
|
12
|
-
date:
|
11
|
+
date: 2014-09-02 00:00:00.000000000 Z
|
13
12
|
dependencies:
|
14
13
|
- !ruby/object:Gem::Dependency
|
15
14
|
name: bio-logger
|
16
|
-
requirement:
|
17
|
-
none: false
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
18
16
|
requirements:
|
19
|
-
- -
|
17
|
+
- - ">="
|
20
18
|
- !ruby/object:Gem::Version
|
21
|
-
version:
|
19
|
+
version: '0'
|
22
20
|
type: :runtime
|
23
21
|
prerelease: false
|
24
|
-
version_requirements:
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - ">="
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '0'
|
25
27
|
- !ruby/object:Gem::Dependency
|
26
28
|
name: nokogiri
|
27
|
-
requirement:
|
28
|
-
none: false
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
29
30
|
requirements:
|
30
|
-
- -
|
31
|
+
- - "~>"
|
31
32
|
- !ruby/object:Gem::Version
|
32
|
-
version: 1.
|
33
|
+
version: 1.6.0
|
33
34
|
type: :runtime
|
34
35
|
prerelease: false
|
35
|
-
version_requirements:
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - "~>"
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: 1.6.0
|
36
41
|
- !ruby/object:Gem::Dependency
|
37
42
|
name: rake
|
38
|
-
requirement:
|
39
|
-
none: false
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
40
44
|
requirements:
|
41
|
-
- -
|
45
|
+
- - ">="
|
42
46
|
- !ruby/object:Gem::Version
|
43
|
-
version: 0
|
47
|
+
version: '0'
|
44
48
|
type: :development
|
45
49
|
prerelease: false
|
46
|
-
version_requirements:
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - ">="
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '0'
|
47
55
|
- !ruby/object:Gem::Dependency
|
48
56
|
name: bundler
|
49
|
-
requirement:
|
50
|
-
none: false
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
51
58
|
requirements:
|
52
|
-
- -
|
59
|
+
- - ">="
|
53
60
|
- !ruby/object:Gem::Version
|
54
61
|
version: '0'
|
55
62
|
type: :development
|
56
63
|
prerelease: false
|
57
|
-
version_requirements:
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - ">="
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '0'
|
58
69
|
- !ruby/object:Gem::Dependency
|
59
70
|
name: jeweler
|
60
|
-
requirement:
|
61
|
-
none: false
|
71
|
+
requirement: !ruby/object:Gem::Requirement
|
62
72
|
requirements:
|
63
|
-
- - ~>
|
73
|
+
- - "~>"
|
64
74
|
- !ruby/object:Gem::Version
|
65
|
-
version:
|
75
|
+
version: 2.0.1
|
66
76
|
type: :development
|
67
77
|
prerelease: false
|
68
|
-
version_requirements:
|
78
|
+
version_requirements: !ruby/object:Gem::Requirement
|
79
|
+
requirements:
|
80
|
+
- - "~>"
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
version: 2.0.1
|
69
83
|
- !ruby/object:Gem::Dependency
|
70
84
|
name: rspec
|
71
|
-
requirement:
|
72
|
-
none: false
|
85
|
+
requirement: !ruby/object:Gem::Requirement
|
73
86
|
requirements:
|
74
|
-
- -
|
87
|
+
- - ">="
|
75
88
|
- !ruby/object:Gem::Version
|
76
|
-
version:
|
89
|
+
version: '0'
|
77
90
|
type: :development
|
78
91
|
prerelease: false
|
79
|
-
version_requirements:
|
92
|
+
version_requirements: !ruby/object:Gem::Requirement
|
93
|
+
requirements:
|
94
|
+
- - ">="
|
95
|
+
- !ruby/object:Gem::Version
|
96
|
+
version: '0'
|
80
97
|
- !ruby/object:Gem::Dependency
|
81
98
|
name: rdoc
|
82
|
-
requirement:
|
83
|
-
none: false
|
99
|
+
requirement: !ruby/object:Gem::Requirement
|
84
100
|
requirements:
|
85
|
-
- -
|
101
|
+
- - ">="
|
86
102
|
- !ruby/object:Gem::Version
|
87
|
-
version:
|
103
|
+
version: '0'
|
88
104
|
type: :development
|
89
105
|
prerelease: false
|
90
|
-
version_requirements:
|
106
|
+
version_requirements: !ruby/object:Gem::Requirement
|
107
|
+
requirements:
|
108
|
+
- - ">="
|
109
|
+
- !ruby/object:Gem::Version
|
110
|
+
version: '0'
|
91
111
|
description: Fast big data BLAST XML parser and library; this libxml2 based version
|
92
|
-
is 50x faster than BioRuby
|
112
|
+
is 50x faster than BioRuby and comes with a nice CLI
|
93
113
|
email: pjotr.public01@thebird.nl
|
94
114
|
executables:
|
95
115
|
- blastxmlparser
|
96
116
|
extensions: []
|
97
117
|
extra_rdoc_files:
|
98
118
|
- LICENSE.txt
|
99
|
-
- README.
|
119
|
+
- README.md
|
100
120
|
files:
|
101
|
-
- .document
|
102
|
-
- .rspec
|
103
|
-
- .travis.yml
|
121
|
+
- ".document"
|
122
|
+
- ".rspec"
|
123
|
+
- ".travis.yml"
|
104
124
|
- Gemfile
|
105
125
|
- Gemfile.lock
|
106
126
|
- LICENSE.txt
|
107
|
-
- README.
|
127
|
+
- README.md
|
108
128
|
- Rakefile
|
109
129
|
- VERSION
|
110
130
|
- bin/blastxmlparser
|
@@ -113,6 +133,7 @@ files:
|
|
113
133
|
- lib/bio/db/blast/parser/nokogiri.rb
|
114
134
|
- lib/bio/db/blast/xmliterator.rb
|
115
135
|
- lib/bio/db/blast/xmlsplitter.rb
|
136
|
+
- lib/bio/writers/rdf.rb
|
116
137
|
- sample/bioruby.rb
|
117
138
|
- sample/blastxmlparserdemo.rb
|
118
139
|
- sample/libxml_sax.rb
|
@@ -121,6 +142,8 @@ files:
|
|
121
142
|
- sample/nokogiri_split_dom.rb
|
122
143
|
- spec/bio-blastxmlparser_spec.rb
|
123
144
|
- spec/spec_helper.rb
|
145
|
+
- template/json.erb
|
146
|
+
- template/rdf.erb
|
124
147
|
- test/data/aa_example.fasta
|
125
148
|
- test/data/aa_example_blastp.m7
|
126
149
|
- test/data/nt_example.fasta
|
@@ -129,29 +152,25 @@ files:
|
|
129
152
|
homepage: http://github.com/pjotrp/blastxmlparser
|
130
153
|
licenses:
|
131
154
|
- MIT
|
155
|
+
metadata: {}
|
132
156
|
post_install_message:
|
133
157
|
rdoc_options: []
|
134
158
|
require_paths:
|
135
159
|
- lib
|
136
160
|
required_ruby_version: !ruby/object:Gem::Requirement
|
137
|
-
none: false
|
138
161
|
requirements:
|
139
|
-
- -
|
162
|
+
- - ">="
|
140
163
|
- !ruby/object:Gem::Version
|
141
164
|
version: '0'
|
142
|
-
segments:
|
143
|
-
- 0
|
144
|
-
hash: -3287387609254152406
|
145
165
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
146
|
-
none: false
|
147
166
|
requirements:
|
148
|
-
- -
|
167
|
+
- - ">="
|
149
168
|
- !ruby/object:Gem::Version
|
150
169
|
version: '0'
|
151
170
|
requirements: []
|
152
171
|
rubyforge_project:
|
153
|
-
rubygems_version:
|
172
|
+
rubygems_version: 2.0.3
|
154
173
|
signing_key:
|
155
|
-
specification_version:
|
156
|
-
summary: Very fast BLAST XML
|
174
|
+
specification_version: 4
|
175
|
+
summary: Very fast BLAST XML to RDF/HTML/JSON/YAML/csv transformer
|
157
176
|
test_files: []
|