bio-blastxmlparser 1.1.1 → 1.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.travis.yml +1 -1
- data/Gemfile +6 -13
- data/Gemfile.lock +44 -10
- data/LICENSE.txt +1 -1
- data/{README.rdoc → README.md} +158 -37
- data/Rakefile +2 -12
- data/VERSION +1 -1
- data/bin/blastxmlparser +26 -72
- data/bio-blastxmlparser.gemspec +29 -26
- data/lib/bio-blastxmlparser.rb +1 -0
- data/lib/bio/writers/rdf.rb +79 -0
- data/template/json.erb +15 -0
- data/template/rdf.erb +21 -0
- metadata +72 -53
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 31b42217bb809cde8d5ef3c06d11c6c9123c6413
|
4
|
+
data.tar.gz: 5d23e19fb8c774f7edaffd03bbcb156800679f7f
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: de99019d564d5ea759f6e3ef330b8e9e68f6a7bbdb0578c34699ad7f716da16562d702da935bcfc5e3baa9a9e673b2ad99a62ae07210c4feea144134ad822e94
|
7
|
+
data.tar.gz: 24bb61197ff82129b404dcaf928b38ac9f7b9d5c10b90a630032253e468adc558c1675639b11c4bc60f7da8082626b3b288c2f0b9e6328e0ef2525b3a79453a8
|
data/.travis.yml
CHANGED
data/Gemfile
CHANGED
@@ -1,21 +1,14 @@
|
|
1
1
|
source "http://rubygems.org"
|
2
|
-
# Add dependencies required to use your gem here.
|
3
|
-
# Example:
|
4
|
-
# gem "activesupport", ">= 2.3.5"
|
5
|
-
|
6
2
|
# Runtime dependencies
|
7
|
-
|
8
|
-
gem "
|
9
|
-
gem "nokogiri", ">= 1.5.0"
|
3
|
+
gem "bio-logger"
|
4
|
+
gem "nokogiri", "~>1.6.0"
|
10
5
|
|
11
6
|
# Add dependencies to develop your gem here.
|
12
7
|
# Include everything needed to run rake, tests, features, etc.
|
13
8
|
group :development do
|
14
|
-
|
15
|
-
gem "rake", ">= 0.9.2.2"
|
9
|
+
gem "rake"
|
16
10
|
gem "bundler"
|
17
|
-
gem "jeweler", "~>
|
18
|
-
gem "rspec"
|
19
|
-
gem "rdoc"
|
20
|
-
# gem "rcov", ">= 0"
|
11
|
+
gem "jeweler", "~> 2.0.1"
|
12
|
+
gem "rspec"
|
13
|
+
gem "rdoc"
|
21
14
|
end
|
data/Gemfile.lock
CHANGED
@@ -1,18 +1,51 @@
|
|
1
1
|
GEM
|
2
2
|
remote: http://rubygems.org/
|
3
3
|
specs:
|
4
|
+
addressable (2.3.6)
|
4
5
|
bio-logger (1.0.0)
|
5
6
|
log4r (>= 1.1.9)
|
7
|
+
builder (3.2.2)
|
8
|
+
descendants_tracker (0.0.4)
|
9
|
+
thread_safe (~> 0.3, >= 0.3.1)
|
6
10
|
diff-lcs (1.1.3)
|
7
|
-
|
8
|
-
|
9
|
-
|
11
|
+
faraday (0.9.0)
|
12
|
+
multipart-post (>= 1.2, < 3)
|
13
|
+
git (1.2.8)
|
14
|
+
github_api (0.12.1)
|
15
|
+
addressable (~> 2.3)
|
16
|
+
descendants_tracker (~> 0.0.4)
|
17
|
+
faraday (~> 0.8, < 0.10)
|
18
|
+
hashie (>= 3.2)
|
19
|
+
multi_json (>= 1.7.5, < 2.0)
|
20
|
+
nokogiri (~> 1.6.3)
|
21
|
+
oauth2
|
22
|
+
hashie (3.3.1)
|
23
|
+
highline (1.6.21)
|
24
|
+
jeweler (2.0.1)
|
25
|
+
builder
|
26
|
+
bundler (>= 1.0)
|
10
27
|
git (>= 1.2.5)
|
28
|
+
github_api
|
29
|
+
highline (>= 1.6.15)
|
30
|
+
nokogiri (>= 1.5.10)
|
11
31
|
rake
|
12
32
|
rdoc
|
13
33
|
json (1.6.5)
|
34
|
+
jwt (1.0.0)
|
14
35
|
log4r (1.1.9)
|
15
|
-
|
36
|
+
mini_portile (0.6.0)
|
37
|
+
multi_json (1.10.1)
|
38
|
+
multi_xml (0.5.5)
|
39
|
+
multipart-post (2.0.0)
|
40
|
+
nokogiri (1.6.3.1)
|
41
|
+
mini_portile (= 0.6.0)
|
42
|
+
oauth2 (1.0.0)
|
43
|
+
faraday (>= 0.8, < 0.10)
|
44
|
+
jwt (~> 1.0)
|
45
|
+
multi_json (~> 1.3)
|
46
|
+
multi_xml (~> 0.5)
|
47
|
+
rack (~> 1.2)
|
48
|
+
rack (1.5.2)
|
16
49
|
rake (0.9.2.2)
|
17
50
|
rdoc (3.12)
|
18
51
|
json (~> 1.4)
|
@@ -24,15 +57,16 @@ GEM
|
|
24
57
|
rspec-expectations (2.8.0)
|
25
58
|
diff-lcs (~> 1.1.2)
|
26
59
|
rspec-mocks (2.8.0)
|
60
|
+
thread_safe (0.3.4)
|
27
61
|
|
28
62
|
PLATFORMS
|
29
63
|
ruby
|
30
64
|
|
31
65
|
DEPENDENCIES
|
32
|
-
bio-logger
|
66
|
+
bio-logger
|
33
67
|
bundler
|
34
|
-
jeweler (~>
|
35
|
-
nokogiri (
|
36
|
-
rake
|
37
|
-
rdoc
|
38
|
-
rspec
|
68
|
+
jeweler (~> 2.0.1)
|
69
|
+
nokogiri (~> 1.6.0)
|
70
|
+
rake
|
71
|
+
rdoc
|
72
|
+
rspec
|
data/LICENSE.txt
CHANGED
data/{README.rdoc → README.md}
RENAMED
@@ -1,41 +1,51 @@
|
|
1
|
-
|
1
|
+
[](https://travis-ci.org/pjotrp/blastxmlparser)
|
2
|
+
|
3
|
+
# bio-blastxmlparser
|
2
4
|
|
3
|
-
|
5
|
+
blastxmlparser is a very fast big-data BLAST XML file parser, which can be used
|
6
|
+
as command line utility. Use blastxmlparser to:
|
4
7
|
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
8
|
+
* Parse BLAST XML
|
9
|
+
* Filter output
|
10
|
+
* Generate FASTA, JSON, YAML, RDF, HTML, tabular output etc.
|
11
|
+
|
12
|
+
Rather than loading everything in memory, XML is parsed by BLAST query
|
13
|
+
(Iteration). Not only has this the advantage of low memory use, it also shows
|
14
|
+
results early, and it may be faster when IO continues in parallel (disk
|
15
|
+
read-ahead).
|
11
16
|
|
12
17
|
Next to the API, blastxmlparser comes as a command line utility, which
|
13
18
|
can be used to filter results and requires no understanding of Ruby.
|
14
19
|
|
15
|
-
|
20
|
+
# Quick start
|
16
21
|
|
22
|
+
```sh
|
17
23
|
gem install bio-blastxmlparser
|
18
24
|
blastxmlparser --help
|
25
|
+
```
|
19
26
|
|
20
27
|
(see Installation, below, if it does not work)
|
21
28
|
|
22
|
-
|
29
|
+
## Performance
|
23
30
|
|
24
|
-
XML parsing is expensive. blastxmlparser
|
25
|
-
parsers, based on libxml2. Basically, a DOM parser is used
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
31
|
+
XML parsing is expensive. blastxmlparser can use the fast Nokogiri C, or
|
32
|
+
Java XML parsers, based on libxml2. Basically, a DOM parser is used
|
33
|
+
after splitting the BLAST XML document into subsections.
|
34
|
+
Tests show this is faster than a SAX
|
35
|
+
parser with Ruby callbacks. To see why libxml2 based Nokogiri is
|
36
|
+
fast, see this
|
37
|
+
[benchmark](http://www.rubyinside.com/ruby-xml-performance-benchmarks-1641.html)
|
38
|
+
and [xml.com](http://www.xml.com/lpt/a/1703).
|
30
39
|
|
31
|
-
|
32
|
-
evaluation, i.e
|
33
|
-
version) parallelization. When parsing a full BLAST result
|
34
|
-
only a few fields are used. By using XPath queries
|
35
|
-
fields are queried.
|
40
|
+
Blastxmlparser is designed with other optimizations, such as lazy
|
41
|
+
evaluation, i.e., only creating objects when required, and (in a
|
42
|
+
future version) parallelization. When parsing a full BLAST result
|
43
|
+
usually only a few fields are used. By using XPath queries the parser
|
44
|
+
makes sure only the relevant fields are queried.
|
36
45
|
|
37
46
|
Timings for parsing test/data/nt_example_blastn.m7 (file size 3.4Mb)
|
38
47
|
|
48
|
+
```
|
39
49
|
bio-blastxmlparser + Nokogiri DOM (default)
|
40
50
|
|
41
51
|
real 0m1.259s
|
@@ -53,29 +63,39 @@ Timings for parsing test/data/nt_example_blastn.m7 (file size 3.4Mb)
|
|
53
63
|
real 1m14.548s
|
54
64
|
user 1m13.065s
|
55
65
|
sys 0m0.472s
|
66
|
+
```
|
56
67
|
|
57
|
-
|
68
|
+
## Install
|
58
69
|
|
70
|
+
```sh
|
59
71
|
gem install bio-blastxmlparser
|
72
|
+
```
|
60
73
|
|
61
|
-
Important: the parser is written for Ruby >= 1.9.
|
74
|
+
Important: the parser is written for Ruby >= 1.9. Check with
|
62
75
|
|
76
|
+
```sh
|
63
77
|
ruby -v
|
64
78
|
gem env
|
79
|
+
```
|
65
80
|
|
66
81
|
Nokogiri XML parser is required. To install it,
|
67
82
|
the libxml2 libraries and headers need to be installed first, for
|
68
83
|
example on Debian:
|
69
84
|
|
85
|
+
```sh
|
70
86
|
apt-get install libxslt-dev libxml2-dev
|
71
87
|
gem install bio-blastxmlparser
|
88
|
+
```
|
89
|
+
|
90
|
+
Nokogiri balks when libxml2 or libxslt is missing on your system (or
|
91
|
+
may install something automatically). In the worst case you'll have to
|
92
|
+
provide build paths, as described [here](http://nokogiri.org/tutorials/installing_nokogiri.html).
|
72
93
|
|
73
|
-
|
74
|
-
http://nokogiri.org/tutorials/installing_nokogiri.html.
|
94
|
+
## Command line usage
|
75
95
|
|
76
|
-
|
96
|
+
### Usage
|
77
97
|
|
78
|
-
|
98
|
+
```
|
79
99
|
blastxmlparser [options] file(s)
|
80
100
|
|
81
101
|
-p, --parser name Use full|split parser (default full)
|
@@ -93,23 +113,24 @@ http://nokogiri.org/tutorials/installing_nokogiri.html.
|
|
93
113
|
bioblastxmlparser filename(s)
|
94
114
|
|
95
115
|
Use --help switch for more information
|
116
|
+
```
|
96
117
|
|
97
|
-
|
118
|
+
### Examples
|
98
119
|
|
99
120
|
Print result fields of iterations containing 'lcl', using a regex
|
100
121
|
|
122
|
+
```sh
|
101
123
|
blastxmlparser -e 'iter.query_id=~/lcl/' test/data/nt_example_blastn.m7
|
102
|
-
|
103
|
-
Print fields where bit_score > 145
|
104
|
-
|
105
|
-
blastxmlparser -e 'hsp.bit_score>145' test/data/nt_example_blastn.m7
|
124
|
+
```
|
106
125
|
|
107
126
|
prints a tab delimited
|
108
127
|
|
128
|
+
```sh
|
109
129
|
1 1 lcl|1_0 lcl|I_74685 1 5.82208e-34
|
110
130
|
2 1 lcl|1_0 lcl|I_1 1 5.82208e-34
|
111
131
|
3 2 lcl|2_0 lcl|I_2 1 6.05436e-59
|
112
132
|
4 3 lcl|3_0 lcl|I_3 1 2.03876e-56
|
133
|
+
```
|
113
134
|
|
114
135
|
The second and third column show the BLAST iteration, and the others
|
115
136
|
relate to the hits.
|
@@ -117,11 +138,20 @@ relate to the hits.
|
|
117
138
|
As this is evaluated Ruby, it is also possible to use the XML element
|
118
139
|
names directly
|
119
140
|
|
141
|
+
```sh
|
120
142
|
blastxmlparser -e 'hsp["Hsp_bit-score"].to_i>145' test/data/nt_example_blastn.m7
|
143
|
+
```
|
144
|
+
|
145
|
+
Or the shorter
|
146
|
+
|
147
|
+
```sh
|
148
|
+
blastxmlparser -e 'hsp.bit_score>145' test/data/nt_example_blastn.m7
|
149
|
+
```
|
121
150
|
|
122
151
|
And it is possible to print (non default) named fields where E-value < 0.001
|
123
152
|
and hit length > 100. E.g.
|
124
153
|
|
154
|
+
```sh
|
125
155
|
blastxmlparser -n 'hsp.evalue,hsp.qseq' -e 'hsp.evalue<0.01 and hit.len>100' test/data/nt_example_blastn.m7
|
126
156
|
|
127
157
|
1 5.82208e-34 AGTGAAGCTTCTAGATATTTGGCGGGTACCTCTAATTTTGCCT...
|
@@ -130,28 +160,104 @@ and hit length > 100. E.g.
|
|
130
160
|
4 1.13373e-13 CTAAACACAGGAGCATATAGGTTGGCAGGCAGGCAAAAT
|
131
161
|
5 2.76378e-11 GAAGAGTGTACTACCGTTTCTGTAGCTACCATATT
|
132
162
|
etc. etc.
|
163
|
+
```
|
133
164
|
|
134
165
|
prints the evalue and qseq columns. To output FASTA use --output-fasta
|
135
166
|
|
167
|
+
```sh
|
136
168
|
blastxmlparser --output-fasta -e 'hsp.evalue<0.01 and hit.len>100' test/data/nt_example_blastn.m7
|
169
|
+
```
|
137
170
|
|
138
171
|
which prints matching sequences, where the first field is the accession, followed
|
139
172
|
by query iteration id, and hit_id. E.g.
|
140
173
|
|
174
|
+
```sh
|
141
175
|
>I_74685 1|lcl|1_0 lcl|I_74685 [57809 - 57666] (REVERSE SENSE)
|
142
176
|
AGTGAAGCTTCTAGATATTTGGCGGGTACCTCTAATTTTGCCTGCCTGCCAACCTATATGCTCCTGTGTTTAG
|
143
177
|
>I_1 1|lcl|1_0 lcl|I_1 [477 - 884]
|
144
178
|
AGTGAAGCTTCTAGATATTTGGCGGGTACCTCTAATTTTGCCTGCCTGCCAACCTATATGCTCCTGTGTTTAG
|
145
179
|
etc. etc.
|
180
|
+
```
|
181
|
+
|
182
|
+
## Modify output
|
183
|
+
|
184
|
+
To have more output options blastxmlparser can use an [ERB
|
185
|
+
template](http://www.stuartellis.eu/articles/erb/) for every match. This is a
|
186
|
+
very flexible option that can output textual formats such as JSON, YAML, HTML
|
187
|
+
and RDF. Examples are provided in
|
188
|
+
[./templates](https://github.com/pjotrp/bioruby-vcf/templates/). A JSON
|
189
|
+
template could be
|
190
|
+
|
191
|
+
```Javascript
|
192
|
+
{ "<%= hit.parent.query_def %>": {
|
193
|
+
"num": <%= hit.hit_num %>,
|
194
|
+
"id": "<%= hit.hit_id %>",
|
195
|
+
"len": <%= hit.len %>,
|
196
|
+
"E-value": <%= hsp.evalue %>,
|
197
|
+
"bitscore": <%= hsp.bit_score %>,
|
198
|
+
"qseq": "<%= hsp.qseq %>",
|
199
|
+
"midline": "<%= hsp.midline %>",
|
200
|
+
"hseq": "<%= hsp.hseq %>",
|
201
|
+
};
|
202
|
+
```
|
203
|
+
|
204
|
+
To get JSON, run it with
|
205
|
+
|
206
|
+
```sh
|
207
|
+
blastxmlparser --template template/json.erb -e 'hsp.evalue<0.01 and hit.len>100' test/data/nt_example_blastn.m7
|
208
|
+
```
|
209
|
+
|
210
|
+
```Javascript
|
211
|
+
{ "B0511.9d gene=WBGene00015235": {
|
212
|
+
"num": 5,
|
213
|
+
"id": "gi|268566471|ref|XP_002639731.1|",
|
214
|
+
"len": 199,
|
215
|
+
"E-value": 1.72502e-22,
|
216
|
+
"bitscore": 96.6709,
|
217
|
+
"qseq": "MSMLRRPLTQLELSVI------------------VPKCXXXXXXXXXXXXQSEPPRGITRRNLRSADRKNRDVPGPSTGECTRTSIAPNRCEMSFTEVQ-TLTSARTPVAAPTLTLSTPVNPVSSAEMLX----XXXXXXXXXXXASRSGDNDSPLLFNAYDTPQQ--GINXXXXXXXXXXXXXNAHLYAXXXXXXXXXXXXXXXXRSHRH",
|
218
|
+
"midline": "MSMLRRPLTQLEL K QSEP GI++RNLRSADR+ +DVPG ++GE + FT+ +++SARTPV+ ++ LSTPVNP SS EM+ SR + D PL+FNAYDTPQQ G + NAHLY+ RS RH",
|
219
|
+
"hseq": "MSMLRRPLTQLELCEDDIQWLSEQLAKKETGFEDEVKYEVMDVDEDEPMDQSEPTGGISKRNLRSADRRKKDVPG-TSGEGAQ-----------FTDQGLSISSARTPVSGASVNLSTPVNPSSSNEMMALPPPVRLARAGRRQRDSRVVNGDVPLMFNAYDTPQQPAGGSNGSPTPSDSPESPNAHLYSTPINPTSSSGGPSSNTRSQRH",
|
220
|
+
};
|
221
|
+
```
|
222
|
+
|
223
|
+
Likewise, using the RDF template
|
224
|
+
|
225
|
+
```sh
|
226
|
+
blastxmlparser --template template/rdf.erb -e 'hsp.evalue<0.01 and hit.len>100' test/data/nt_example_blastn.m7
|
227
|
+
```
|
228
|
+
|
229
|
+
```ruby
|
230
|
+
:Minc_Contig50_77_42056___42484_1_64492 :query :Minc_Contig50_77_42056___42484_1_64492_23
|
231
|
+
:Minc_Contig50_77_42056___42484_1_64492_23
|
232
|
+
:query_id "lcl|30_0",
|
233
|
+
:query_def "Minc_Contig50_77 [42056 - 42484] 1 64492",
|
234
|
+
:num 23,
|
235
|
+
:accession "Minc02032",
|
236
|
+
:id "lcl|Minc02032",
|
237
|
+
:len 147,
|
238
|
+
:E-value 8.1089e-12,
|
239
|
+
:identity 60,
|
240
|
+
:align_len 69,
|
241
|
+
:bitscore 69.8753,
|
242
|
+
:qseq "ATGGGAGATGGAATTGAACCGTCATGGAAAGGGCCCAAACCGAAGCACAACCGACTGTGCCACCATCCA",
|
243
|
+
:midline "|||||||||||||||||||| |||||||| | |||||||||||||||||||||||||||||||",
|
244
|
+
:hseq "ATGGGAGATGGAATTGAACCATCATGGAATG-------ACCGAAGCACAACCGACTGTGCCACCATCCA",
|
245
|
+
:evalue 8.1089e-12 .
|
246
|
+
```
|
247
|
+
|
248
|
+
## Additional options
|
146
249
|
|
147
250
|
To use the low-mem (iterated slower) version of the parser use
|
148
251
|
|
252
|
+
```sh
|
149
253
|
blastxmlparser --parser split -n 'hsp.evalue,hsp.qseq' -e 'hsp.evalue<0.01 and hit.len>100' test/data/nt_example_blastn.m7
|
254
|
+
```
|
150
255
|
|
151
|
-
|
256
|
+
## API (Ruby library)
|
152
257
|
|
153
258
|
To loop through a BLAST result:
|
154
259
|
|
260
|
+
```ruby
|
155
261
|
>> require 'bio-blastxmlparser'
|
156
262
|
>> fn = 'test/data/nt_example_blastn.m7'
|
157
263
|
>> n = Bio::BlastXMLParser::XmlIterator.new(fn).to_enum
|
@@ -163,19 +269,23 @@ To loop through a BLAST result:
|
|
163
269
|
>> end
|
164
270
|
>> end
|
165
271
|
>> end
|
272
|
+
```
|
166
273
|
|
167
274
|
The next example parses XML using less memory by using a Ruby
|
168
275
|
Iterator
|
169
276
|
|
277
|
+
```ruby
|
170
278
|
>> blast = Bio::BlastXMLParser::XmlSplitterIterator.new(fn).to_enum
|
171
279
|
>> iter = blast.next
|
172
280
|
>> iter.iter_num
|
173
281
|
=> 1
|
174
282
|
>> iter.query_id
|
175
283
|
=> "lcl|1_0"
|
284
|
+
```
|
176
285
|
|
177
286
|
Get the first hit
|
178
287
|
|
288
|
+
```ruby
|
179
289
|
>> hit = iter.hits.first
|
180
290
|
>> hit.hit_num
|
181
291
|
=> 1
|
@@ -187,14 +297,18 @@ Get the first hit
|
|
187
297
|
=> "I_74685"
|
188
298
|
>> hit.len
|
189
299
|
=> 144
|
300
|
+
```
|
190
301
|
|
191
302
|
Get the parent info
|
192
303
|
|
304
|
+
```ruby
|
193
305
|
>> hit.parent.query_id
|
194
306
|
=> "lcl|1_0"
|
195
|
-
|
307
|
+
```
|
308
|
+
|
196
309
|
Get the first Hsp
|
197
310
|
|
311
|
+
```ruby
|
198
312
|
>> hsp = hit.hsps.first
|
199
313
|
>> hsp.hsp_num
|
200
314
|
=> 1
|
@@ -224,6 +338,7 @@ Get the first Hsp
|
|
224
338
|
=> "AGTGAAGCTTCTAGATATTTGGCGGGTACCTCTAATTTTGCCTGCCTGCCAACCTATATGCTCCTGTGTTTAG"
|
225
339
|
>> hsp.midline
|
226
340
|
=> "|||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||"
|
341
|
+
```
|
227
342
|
|
228
343
|
Unlike BioRuby, this module uses the actual element names in the XML
|
229
344
|
definition, to avoid confusion (if anyone wants a translation,
|
@@ -232,30 +347,36 @@ feel free to contribute an adaptor).
|
|
232
347
|
It is also possible to use the XML element names as Strings, rather
|
233
348
|
than methods. E.g.
|
234
349
|
|
350
|
+
```ruby
|
235
351
|
>> hsp.field("Hsp_bit-score")
|
236
352
|
=> "145.205"
|
237
353
|
>> hsp["Hsp_bit-score"]
|
238
354
|
=> "145.205"
|
355
|
+
```
|
239
356
|
|
240
357
|
Note that, when using the element names, the results are always String values.
|
241
358
|
|
242
359
|
Fetch the next result (Iteration)
|
243
360
|
|
361
|
+
```ruby
|
244
362
|
>> iter2 = blast.next
|
245
363
|
>> iter2.iter_num
|
246
364
|
>> 2
|
247
365
|
>> iter2.query_id
|
248
366
|
=> "lcl|2_0"
|
367
|
+
```
|
249
368
|
|
250
369
|
etc. etc.
|
251
370
|
|
252
371
|
For more examples see the files in ./spec
|
253
372
|
|
254
|
-
|
373
|
+
## URL
|
255
374
|
|
256
375
|
The project lives at http://github.com/pjotrp/blastxmlparser. If you use this software, please cite http://dx.doi.org/10.1093/bioinformatics/btq475
|
257
376
|
|
258
|
-
|
377
|
+
blastxmlparser is listed at http://biogems.info
|
378
|
+
|
379
|
+
## Copyright
|
259
380
|
|
260
|
-
Copyright (c) 2011
|
381
|
+
Copyright (c) 2011-2014 Pjotr Prins under the MIT licence. See LICENSE.txt and http://www.opensource.org/licenses/mit-license.html for further details.
|
261
382
|
|
data/Rakefile
CHANGED
@@ -15,14 +15,10 @@ Jeweler::Tasks.new do |gem|
|
|
15
15
|
gem.name = "bio-blastxmlparser"
|
16
16
|
gem.homepage = "http://github.com/pjotrp/blastxmlparser"
|
17
17
|
gem.license = "MIT"
|
18
|
-
gem.summary = %Q{Very fast BLAST XML
|
19
|
-
gem.description = %Q{Fast big data BLAST XML parser and library; this libxml2 based version is 50x faster than BioRuby}
|
18
|
+
gem.summary = %Q{Very fast BLAST XML to RDF/HTML/JSON/YAML/csv transformer}
|
19
|
+
gem.description = %Q{Fast big data BLAST XML parser and library; this libxml2 based version is 50x faster than BioRuby and comes with a nice CLI}
|
20
20
|
gem.email = "pjotr.public01@thebird.nl"
|
21
21
|
gem.authors = ["Pjotr Prins"]
|
22
|
-
# Include your dependencies below. Runtime dependencies are required when using your gem,
|
23
|
-
# and development dependencies are only needed for development (ie running rake tasks, tests, etc)
|
24
|
-
# gem.add_runtime_dependency 'jabber4r', '> 0.1'
|
25
|
-
# gem.add_development_dependency 'rspec', '> 1.2.3'
|
26
22
|
end
|
27
23
|
Jeweler::RubygemsDotOrgTasks.new
|
28
24
|
|
@@ -32,17 +28,11 @@ RSpec::Core::RakeTask.new(:spec) do |spec|
|
|
32
28
|
spec.pattern = FileList['spec/**/*_spec.rb']
|
33
29
|
end
|
34
30
|
|
35
|
-
# RSpec::Core::RakeTask.new(:rcov) do |spec|
|
36
|
-
# spec.pattern = 'spec/**/*_spec.rb'
|
37
|
-
# spec.rcov = true
|
38
|
-
# end
|
39
|
-
|
40
31
|
task :default => [ :test, :spec ]
|
41
32
|
|
42
33
|
require 'rake/testtask'
|
43
34
|
Rake::TestTask.new(:test) do |test|
|
44
35
|
test.libs << 'lib' << 'test'
|
45
|
-
# test.pattern = 'test/**/test_*.rb' # breaks in 1.9.3
|
46
36
|
test.test_files = Dir.glob("test/**/test_*.rb")
|
47
37
|
test.verbose = true
|
48
38
|
end
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
1.1.
|
1
|
+
1.1.2
|
data/bin/blastxmlparser
CHANGED
@@ -4,83 +4,21 @@
|
|
4
4
|
# Author:: Pjotr Prins
|
5
5
|
# License:: MIT License
|
6
6
|
#
|
7
|
-
# Copyright (C) 2010-
|
7
|
+
# Copyright (C) 2010-2014 Pjotr Prins <pjotr.prins@thebird.nl>
|
8
8
|
|
9
9
|
rootpath = File.dirname(File.dirname(__FILE__))
|
10
10
|
$: << File.join(rootpath,'lib')
|
11
11
|
|
12
12
|
BLASTXML_VERSION = File.new(File.join(rootpath,'VERSION')).read.chomp
|
13
13
|
|
14
|
-
$stderr.print "
|
14
|
+
$stderr.print "BLAST XML Parser "+BLASTXML_VERSION+" Copyright (C) 2014 Pjotr Prins <pjotr.prins@thebird.nl>\n\n"
|
15
15
|
|
16
16
|
USAGE = <<EOM
|
17
17
|
|
18
|
-
|
18
|
+
blastxmlparser filename(s)
|
19
19
|
|
20
20
|
Use --help switch for more information
|
21
21
|
|
22
|
-
== Examples
|
23
|
-
|
24
|
-
Print result fields of iterations containing 'lcl', using a regex
|
25
|
-
|
26
|
-
blastxmlparser -e 'iter.query_id=~/lcl/' test/data/nt_example_blastn.m7
|
27
|
-
|
28
|
-
Print fields where bit_score > 145
|
29
|
-
|
30
|
-
blastxmlparser -e 'hsp.bit_score>145' test/data/nt_example_blastn.m7
|
31
|
-
|
32
|
-
prints a tab delimited
|
33
|
-
|
34
|
-
1 1 lcl|1_0 lcl|I_74685 1 5.82208e-34
|
35
|
-
2 1 lcl|1_0 lcl|I_1 1 5.82208e-34
|
36
|
-
3 2 lcl|2_0 lcl|I_2 1 6.05436e-59
|
37
|
-
4 3 lcl|3_0 lcl|I_3 1 2.03876e-56
|
38
|
-
|
39
|
-
The second and third column show the BLAST iteration, and the others
|
40
|
-
relate to the hits.
|
41
|
-
|
42
|
-
As this is evaluated Ruby, it is also possible to use the XML element
|
43
|
-
names directly
|
44
|
-
|
45
|
-
blastxmlparser -e 'hsp["Hsp_bit-score"].to_i>145' test/data/nt_example_blastn.m7
|
46
|
-
|
47
|
-
And it is possible to print (non default) named fields where E-value < 0.001
|
48
|
-
and hit length > 100. E.g.
|
49
|
-
|
50
|
-
blastxmlparser -n 'hsp.evalue,hsp.qseq' -e 'hsp.evalue<0.01 and hit.len>100' test/data/nt_example_blastn.m7
|
51
|
-
|
52
|
-
1 5.82208e-34 AGTGAAGCTTCTAGATATTTGGCGGGTACCTCTAATTTTGCCT...
|
53
|
-
2 5.82208e-34 AGTGAAGCTTCTAGATATTTGGCGGGTACCTCTAATTTTGCCT...
|
54
|
-
3 2.76378e-11 AATATGGTAGCTACAGAAACGGTAGTACACTCTTC
|
55
|
-
4 1.13373e-13 CTAAACACAGGAGCATATAGGTTGGCAGGCAGGCAAAAT
|
56
|
-
5 2.76378e-11 GAAGAGTGTACTACCGTTTCTGTAGCTACCATATT
|
57
|
-
etc. etc.
|
58
|
-
|
59
|
-
prints the evalue and qseq columns. To output FASTA use --output-fasta
|
60
|
-
|
61
|
-
blastxmlparser --output-fasta -e 'hsp.evalue<0.01 and hit.len>100' test/data/nt_example_blastn.m7
|
62
|
-
|
63
|
-
which prints matching sequences, where the first field is the accession, followed
|
64
|
-
by query iteration id, and hit_id. E.g.
|
65
|
-
|
66
|
-
>I_74685 1|lcl|1_0 lcl|I_74685 [57809 - 57666] (REVERSE SENSE)
|
67
|
-
AGTGAAGCTTCTAGATATTTGGCGGGTACCTCTAATTTTGCCTGCCTGCCAACCTATATGCTCCTGTGTTTAG
|
68
|
-
>I_1 1|lcl|1_0 lcl|I_1 [477 - 884]
|
69
|
-
AGTGAAGCTTCTAGATATTTGGCGGGTACCTCTAATTTTGCCTGCCTGCCAACCTATATGCTCCTGTGTTTAG
|
70
|
-
etc. etc.
|
71
|
-
|
72
|
-
To use the low-mem (iterated slower) version of the parser use
|
73
|
-
|
74
|
-
blastxmlparser --parser split -n 'hsp.evalue,hsp.qseq' -e 'hsp.evalue<0.01 and hit.len>100' test/data/nt_example_blastn.m7
|
75
|
-
|
76
|
-
== URL
|
77
|
-
|
78
|
-
The project lives at http://github.com/pjotrp/blastxmlparser. If you use this software, please cite http://dx.doi.org/10.1093/bioinformatics/btq475
|
79
|
-
|
80
|
-
== Copyright
|
81
|
-
|
82
|
-
Copyright (c) 2011 Pjotr Prins under the MIT licence. See LICENSE.txt and http://www.opensource.org/licenses/mit-license.html for further details.
|
83
|
-
|
84
22
|
EOM
|
85
23
|
|
86
24
|
if ARGV.size == 0
|
@@ -114,16 +52,22 @@ opts = OptionParser.new do |o|
|
|
114
52
|
options.parser = p.to_sym
|
115
53
|
end
|
116
54
|
|
117
|
-
o.on("
|
118
|
-
options.
|
55
|
+
o.on("-e filter","--exec filter",String, "Evaluate filter") do |s|
|
56
|
+
options.exec = s
|
119
57
|
end
|
120
|
-
|
121
|
-
o.
|
58
|
+
|
59
|
+
o.separator ""
|
60
|
+
|
61
|
+
o.on("-n fields","--named fields",String, "Print named fields") do |s|
|
122
62
|
options.fields = s.split(/,/)
|
123
63
|
end
|
64
|
+
o.on("--output-fasta","Output FASTA") do |b|
|
65
|
+
options.output_fasta = true
|
66
|
+
end
|
124
67
|
|
125
|
-
o.on("-
|
126
|
-
|
68
|
+
o.on("-t erb","--template erb",String, "Use ERB template for output") do |s|
|
69
|
+
require 'erb'
|
70
|
+
options.template = s
|
127
71
|
end
|
128
72
|
|
129
73
|
o.separator ""
|
@@ -156,6 +100,13 @@ begin
|
|
156
100
|
Bio::Log::CLI.configure('bio-blastxmlparser')
|
157
101
|
logger = Bio::Log::LoggerPlus['bio-blastxmlparser']
|
158
102
|
|
103
|
+
if options[:template]
|
104
|
+
include BioRdf
|
105
|
+
fn = options.template
|
106
|
+
raise "No template #{fn}!" if not File.exist?(fn)
|
107
|
+
template = ERB.new(File.read(fn))
|
108
|
+
end
|
109
|
+
|
159
110
|
ARGV.each do | fn |
|
160
111
|
logger.info("XML parsing #{fn}")
|
161
112
|
n = if options.parser == :split
|
@@ -173,10 +124,13 @@ begin
|
|
173
124
|
true
|
174
125
|
end
|
175
126
|
if do_print
|
176
|
-
if
|
127
|
+
if template
|
128
|
+
print template.result(binding)
|
129
|
+
elsif options.output_fasta
|
177
130
|
print ">"+hit.accession+' '+iter.iter_num.to_s+'|'+iter.query_id+' '+hit.hit_id+' '+hit.hit_def+"\n"
|
178
131
|
print hsp.qseq+"\n"
|
179
132
|
else
|
133
|
+
# Default output
|
180
134
|
if options.fields
|
181
135
|
print i,"\t"
|
182
136
|
options.fields.each do | f |
|
data/bio-blastxmlparser.gemspec
CHANGED
@@ -5,17 +5,17 @@
|
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
7
|
s.name = "bio-blastxmlparser"
|
8
|
-
s.version = "1.1.
|
8
|
+
s.version = "1.1.2"
|
9
9
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
11
|
s.authors = ["Pjotr Prins"]
|
12
|
-
s.date = "
|
13
|
-
s.description = "Fast big data BLAST XML parser and library; this libxml2 based version is 50x faster than BioRuby"
|
12
|
+
s.date = "2014-09-02"
|
13
|
+
s.description = "Fast big data BLAST XML parser and library; this libxml2 based version is 50x faster than BioRuby and comes with a nice CLI"
|
14
14
|
s.email = "pjotr.public01@thebird.nl"
|
15
15
|
s.executables = ["blastxmlparser"]
|
16
16
|
s.extra_rdoc_files = [
|
17
17
|
"LICENSE.txt",
|
18
|
-
"README.
|
18
|
+
"README.md"
|
19
19
|
]
|
20
20
|
s.files = [
|
21
21
|
".document",
|
@@ -24,7 +24,7 @@ Gem::Specification.new do |s|
|
|
24
24
|
"Gemfile",
|
25
25
|
"Gemfile.lock",
|
26
26
|
"LICENSE.txt",
|
27
|
-
"README.
|
27
|
+
"README.md",
|
28
28
|
"Rakefile",
|
29
29
|
"VERSION",
|
30
30
|
"bin/blastxmlparser",
|
@@ -33,6 +33,7 @@ Gem::Specification.new do |s|
|
|
33
33
|
"lib/bio/db/blast/parser/nokogiri.rb",
|
34
34
|
"lib/bio/db/blast/xmliterator.rb",
|
35
35
|
"lib/bio/db/blast/xmlsplitter.rb",
|
36
|
+
"lib/bio/writers/rdf.rb",
|
36
37
|
"sample/bioruby.rb",
|
37
38
|
"sample/blastxmlparserdemo.rb",
|
38
39
|
"sample/libxml_sax.rb",
|
@@ -41,6 +42,8 @@ Gem::Specification.new do |s|
|
|
41
42
|
"sample/nokogiri_split_dom.rb",
|
42
43
|
"spec/bio-blastxmlparser_spec.rb",
|
43
44
|
"spec/spec_helper.rb",
|
45
|
+
"template/json.erb",
|
46
|
+
"template/rdf.erb",
|
44
47
|
"test/data/aa_example.fasta",
|
45
48
|
"test/data/aa_example_blastp.m7",
|
46
49
|
"test/data/nt_example.fasta",
|
@@ -50,37 +53,37 @@ Gem::Specification.new do |s|
|
|
50
53
|
s.homepage = "http://github.com/pjotrp/blastxmlparser"
|
51
54
|
s.licenses = ["MIT"]
|
52
55
|
s.require_paths = ["lib"]
|
53
|
-
s.rubygems_version = "
|
54
|
-
s.summary = "Very fast BLAST XML
|
56
|
+
s.rubygems_version = "2.0.3"
|
57
|
+
s.summary = "Very fast BLAST XML to RDF/HTML/JSON/YAML/csv transformer"
|
55
58
|
|
56
59
|
if s.respond_to? :specification_version then
|
57
|
-
s.specification_version =
|
60
|
+
s.specification_version = 4
|
58
61
|
|
59
62
|
if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
|
60
|
-
s.add_runtime_dependency(%q<bio-logger>, [">=
|
61
|
-
s.add_runtime_dependency(%q<nokogiri>, ["
|
62
|
-
s.add_development_dependency(%q<rake>, [">= 0
|
63
|
+
s.add_runtime_dependency(%q<bio-logger>, [">= 0"])
|
64
|
+
s.add_runtime_dependency(%q<nokogiri>, ["~> 1.6.0"])
|
65
|
+
s.add_development_dependency(%q<rake>, [">= 0"])
|
63
66
|
s.add_development_dependency(%q<bundler>, [">= 0"])
|
64
|
-
s.add_development_dependency(%q<jeweler>, ["~>
|
65
|
-
s.add_development_dependency(%q<rspec>, [">=
|
66
|
-
s.add_development_dependency(%q<rdoc>, [">=
|
67
|
+
s.add_development_dependency(%q<jeweler>, ["~> 2.0.1"])
|
68
|
+
s.add_development_dependency(%q<rspec>, [">= 0"])
|
69
|
+
s.add_development_dependency(%q<rdoc>, [">= 0"])
|
67
70
|
else
|
68
|
-
s.add_dependency(%q<bio-logger>, [">=
|
69
|
-
s.add_dependency(%q<nokogiri>, ["
|
70
|
-
s.add_dependency(%q<rake>, [">= 0
|
71
|
+
s.add_dependency(%q<bio-logger>, [">= 0"])
|
72
|
+
s.add_dependency(%q<nokogiri>, ["~> 1.6.0"])
|
73
|
+
s.add_dependency(%q<rake>, [">= 0"])
|
71
74
|
s.add_dependency(%q<bundler>, [">= 0"])
|
72
|
-
s.add_dependency(%q<jeweler>, ["~>
|
73
|
-
s.add_dependency(%q<rspec>, [">=
|
74
|
-
s.add_dependency(%q<rdoc>, [">=
|
75
|
+
s.add_dependency(%q<jeweler>, ["~> 2.0.1"])
|
76
|
+
s.add_dependency(%q<rspec>, [">= 0"])
|
77
|
+
s.add_dependency(%q<rdoc>, [">= 0"])
|
75
78
|
end
|
76
79
|
else
|
77
|
-
s.add_dependency(%q<bio-logger>, [">=
|
78
|
-
s.add_dependency(%q<nokogiri>, ["
|
79
|
-
s.add_dependency(%q<rake>, [">= 0
|
80
|
+
s.add_dependency(%q<bio-logger>, [">= 0"])
|
81
|
+
s.add_dependency(%q<nokogiri>, ["~> 1.6.0"])
|
82
|
+
s.add_dependency(%q<rake>, [">= 0"])
|
80
83
|
s.add_dependency(%q<bundler>, [">= 0"])
|
81
|
-
s.add_dependency(%q<jeweler>, ["~>
|
82
|
-
s.add_dependency(%q<rspec>, [">=
|
83
|
-
s.add_dependency(%q<rdoc>, [">=
|
84
|
+
s.add_dependency(%q<jeweler>, ["~> 2.0.1"])
|
85
|
+
s.add_dependency(%q<rspec>, [">= 0"])
|
86
|
+
s.add_dependency(%q<rdoc>, [">= 0"])
|
84
87
|
end
|
85
88
|
end
|
86
89
|
|
data/lib/bio-blastxmlparser.rb
CHANGED
@@ -0,0 +1,79 @@
|
|
1
|
+
# RDF support module. Original is part of bioruby-rdf by Pjotr Prins
|
2
|
+
#
|
3
|
+
module BioRdf
|
4
|
+
|
5
|
+
module RDF
|
6
|
+
|
7
|
+
def RDF::valid_uri? uri
|
8
|
+
uri =~ /^([!#$&-;=?_a-z~]|%[0-9a-f]{2})+$/i
|
9
|
+
end
|
10
|
+
|
11
|
+
def RDF::escape_string_literal(literal)
|
12
|
+
s = literal.to_s
|
13
|
+
# Put a slash before every double quote if there is no such slash already
|
14
|
+
s = s.gsub(/(?<!\\)"/,'\"')
|
15
|
+
# Put a slash before a single slash if it is not \["utnr>\]
|
16
|
+
if s =~ /[^\\]\\[^\\]/
|
17
|
+
s2 = []
|
18
|
+
s.each_char.with_index { |c,i|
|
19
|
+
res = c
|
20
|
+
if i>0 and c == '\\' and s[i-1] != '\\' and s[i+1] !~ /^[uUtnr\\"]/
|
21
|
+
res = '\\' + c
|
22
|
+
end
|
23
|
+
# p [i,c,s[i+1],res]
|
24
|
+
s2 << res
|
25
|
+
}
|
26
|
+
s = s2.join('')
|
27
|
+
end
|
28
|
+
s
|
29
|
+
end
|
30
|
+
|
31
|
+
def RDF::stringify_literal(literal)
|
32
|
+
RDF::escape_string_literal(literal.to_s)
|
33
|
+
end
|
34
|
+
|
35
|
+
def RDF::quoted_stringify_literal(literal)
|
36
|
+
'"' + stringify_literal(literal) + '"'
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
40
|
+
module Turtle
|
41
|
+
|
42
|
+
def Turtle::stringify_literal(literal)
|
43
|
+
RDF::stringify_literal(literal)
|
44
|
+
end
|
45
|
+
|
46
|
+
def Turtle::identifier(id)
|
47
|
+
raise "Illegal identifier #{id}" if id != Turtle::mangle_identifier(id)
|
48
|
+
end
|
49
|
+
|
50
|
+
# Replace letters/symbols that are not allowed in a Turtle identifier
|
51
|
+
# (short hand URI). This should be the definite mangler and replace the
|
52
|
+
# ones in bioruby-table and bio-exominer. Manglers are useful when using
|
53
|
+
# data from other sources and trying to transform them into simple RDF
|
54
|
+
# identifiers.
|
55
|
+
|
56
|
+
def Turtle::mangle_identifier(s)
|
57
|
+
id = s.strip.gsub(/[^[:print:]]/, '').gsub(/[#)(,]/,"").gsub(/[%]/,"perc").gsub(/(\s|\.|\$|\/|\\|\>)+/,"_")
|
58
|
+
id = id.gsub(/\[|\]/,'')
|
59
|
+
# id = URI::escape(id)
|
60
|
+
id = id.gsub(/\|/,'_')
|
61
|
+
id = id.gsub(/\-|:/,'_')
|
62
|
+
if id != s
|
63
|
+
# Don't want Bio depency in templates!
|
64
|
+
# logger = Bio::Log::LoggerPlus.new 'bio-rdf'
|
65
|
+
# logger.warn "\nWARNING: Changed identifier <#{s}> to <#{id}>"
|
66
|
+
$stderr.print "\nWARNING: Changed identifier <#{s}> to <#{id}>"
|
67
|
+
end
|
68
|
+
if not RDF::valid_uri?(id)
|
69
|
+
raise "Invalid URI after mangling <#{s}> to <#{id}>!"
|
70
|
+
end
|
71
|
+
valid_id = if id =~ /^\d/
|
72
|
+
'r' + id
|
73
|
+
else
|
74
|
+
id
|
75
|
+
end
|
76
|
+
valid_id # we certainly hope so!
|
77
|
+
end
|
78
|
+
end
|
79
|
+
end
|
data/template/json.erb
ADDED
@@ -0,0 +1,15 @@
|
|
1
|
+
{ "<%= hit.parent.query_def %>": {
|
2
|
+
"query_id": "<%= hit.parent.query_id %>",
|
3
|
+
"num": <%= hit.hit_num %>,
|
4
|
+
"accession": "<%= hit.accession %>",
|
5
|
+
"id": "<%= hit.hit_id %>",
|
6
|
+
"len": <%= hit.len %>,
|
7
|
+
"E-value": <%= hsp.evalue %>,
|
8
|
+
"identity": <%= hsp.identity %>,
|
9
|
+
"align_len": <%= hsp.align_len %>,
|
10
|
+
"bitscore": <%= hsp.bit_score %>,
|
11
|
+
"qseq": "<%= hsp.qseq %>",
|
12
|
+
"midline": "<%= hsp.midline %>",
|
13
|
+
"hseq": "<%= hsp.hseq %>",
|
14
|
+
};
|
15
|
+
|
data/template/rdf.erb
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
<%
|
2
|
+
blastid = Turtle::mangle_identifier(hit.parent.query_def)
|
3
|
+
id = blastid+'_'+hit.hit_num.to_s
|
4
|
+
%>
|
5
|
+
:<%= blastid %> :query :<%= id %>
|
6
|
+
:<%= id %>
|
7
|
+
:query_id "<%= hit.parent.query_id %>",
|
8
|
+
:query_def "<%= hit.parent.query_def %>",
|
9
|
+
:num <%= hit.hit_num %>,
|
10
|
+
:accession "<%= hit.accession %>",
|
11
|
+
:id "<%= hit.hit_id %>",
|
12
|
+
:len <%= hit.len %>,
|
13
|
+
:E-value <%= hsp.evalue %>,
|
14
|
+
:identity <%= hsp.identity %>,
|
15
|
+
:align_len <%= hsp.align_len %>,
|
16
|
+
:bitscore <%= hsp.bit_score %>,
|
17
|
+
:qseq "<%= hsp.qseq %>",
|
18
|
+
:midline "<%= hsp.midline %>",
|
19
|
+
:hseq "<%= hsp.hseq %>",
|
20
|
+
:evalue <%= hsp.evalue %> .
|
21
|
+
|
metadata
CHANGED
@@ -1,110 +1,130 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: bio-blastxmlparser
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.1.
|
5
|
-
prerelease:
|
4
|
+
version: 1.1.2
|
6
5
|
platform: ruby
|
7
6
|
authors:
|
8
7
|
- Pjotr Prins
|
9
8
|
autorequire:
|
10
9
|
bindir: bin
|
11
10
|
cert_chain: []
|
12
|
-
date:
|
11
|
+
date: 2014-09-02 00:00:00.000000000 Z
|
13
12
|
dependencies:
|
14
13
|
- !ruby/object:Gem::Dependency
|
15
14
|
name: bio-logger
|
16
|
-
requirement:
|
17
|
-
none: false
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
18
16
|
requirements:
|
19
|
-
- -
|
17
|
+
- - ">="
|
20
18
|
- !ruby/object:Gem::Version
|
21
|
-
version:
|
19
|
+
version: '0'
|
22
20
|
type: :runtime
|
23
21
|
prerelease: false
|
24
|
-
version_requirements:
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - ">="
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '0'
|
25
27
|
- !ruby/object:Gem::Dependency
|
26
28
|
name: nokogiri
|
27
|
-
requirement:
|
28
|
-
none: false
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
29
30
|
requirements:
|
30
|
-
- -
|
31
|
+
- - "~>"
|
31
32
|
- !ruby/object:Gem::Version
|
32
|
-
version: 1.
|
33
|
+
version: 1.6.0
|
33
34
|
type: :runtime
|
34
35
|
prerelease: false
|
35
|
-
version_requirements:
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - "~>"
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: 1.6.0
|
36
41
|
- !ruby/object:Gem::Dependency
|
37
42
|
name: rake
|
38
|
-
requirement:
|
39
|
-
none: false
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
40
44
|
requirements:
|
41
|
-
- -
|
45
|
+
- - ">="
|
42
46
|
- !ruby/object:Gem::Version
|
43
|
-
version: 0
|
47
|
+
version: '0'
|
44
48
|
type: :development
|
45
49
|
prerelease: false
|
46
|
-
version_requirements:
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - ">="
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '0'
|
47
55
|
- !ruby/object:Gem::Dependency
|
48
56
|
name: bundler
|
49
|
-
requirement:
|
50
|
-
none: false
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
51
58
|
requirements:
|
52
|
-
- -
|
59
|
+
- - ">="
|
53
60
|
- !ruby/object:Gem::Version
|
54
61
|
version: '0'
|
55
62
|
type: :development
|
56
63
|
prerelease: false
|
57
|
-
version_requirements:
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - ">="
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '0'
|
58
69
|
- !ruby/object:Gem::Dependency
|
59
70
|
name: jeweler
|
60
|
-
requirement:
|
61
|
-
none: false
|
71
|
+
requirement: !ruby/object:Gem::Requirement
|
62
72
|
requirements:
|
63
|
-
- - ~>
|
73
|
+
- - "~>"
|
64
74
|
- !ruby/object:Gem::Version
|
65
|
-
version:
|
75
|
+
version: 2.0.1
|
66
76
|
type: :development
|
67
77
|
prerelease: false
|
68
|
-
version_requirements:
|
78
|
+
version_requirements: !ruby/object:Gem::Requirement
|
79
|
+
requirements:
|
80
|
+
- - "~>"
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
version: 2.0.1
|
69
83
|
- !ruby/object:Gem::Dependency
|
70
84
|
name: rspec
|
71
|
-
requirement:
|
72
|
-
none: false
|
85
|
+
requirement: !ruby/object:Gem::Requirement
|
73
86
|
requirements:
|
74
|
-
- -
|
87
|
+
- - ">="
|
75
88
|
- !ruby/object:Gem::Version
|
76
|
-
version:
|
89
|
+
version: '0'
|
77
90
|
type: :development
|
78
91
|
prerelease: false
|
79
|
-
version_requirements:
|
92
|
+
version_requirements: !ruby/object:Gem::Requirement
|
93
|
+
requirements:
|
94
|
+
- - ">="
|
95
|
+
- !ruby/object:Gem::Version
|
96
|
+
version: '0'
|
80
97
|
- !ruby/object:Gem::Dependency
|
81
98
|
name: rdoc
|
82
|
-
requirement:
|
83
|
-
none: false
|
99
|
+
requirement: !ruby/object:Gem::Requirement
|
84
100
|
requirements:
|
85
|
-
- -
|
101
|
+
- - ">="
|
86
102
|
- !ruby/object:Gem::Version
|
87
|
-
version:
|
103
|
+
version: '0'
|
88
104
|
type: :development
|
89
105
|
prerelease: false
|
90
|
-
version_requirements:
|
106
|
+
version_requirements: !ruby/object:Gem::Requirement
|
107
|
+
requirements:
|
108
|
+
- - ">="
|
109
|
+
- !ruby/object:Gem::Version
|
110
|
+
version: '0'
|
91
111
|
description: Fast big data BLAST XML parser and library; this libxml2 based version
|
92
|
-
is 50x faster than BioRuby
|
112
|
+
is 50x faster than BioRuby and comes with a nice CLI
|
93
113
|
email: pjotr.public01@thebird.nl
|
94
114
|
executables:
|
95
115
|
- blastxmlparser
|
96
116
|
extensions: []
|
97
117
|
extra_rdoc_files:
|
98
118
|
- LICENSE.txt
|
99
|
-
- README.
|
119
|
+
- README.md
|
100
120
|
files:
|
101
|
-
- .document
|
102
|
-
- .rspec
|
103
|
-
- .travis.yml
|
121
|
+
- ".document"
|
122
|
+
- ".rspec"
|
123
|
+
- ".travis.yml"
|
104
124
|
- Gemfile
|
105
125
|
- Gemfile.lock
|
106
126
|
- LICENSE.txt
|
107
|
-
- README.
|
127
|
+
- README.md
|
108
128
|
- Rakefile
|
109
129
|
- VERSION
|
110
130
|
- bin/blastxmlparser
|
@@ -113,6 +133,7 @@ files:
|
|
113
133
|
- lib/bio/db/blast/parser/nokogiri.rb
|
114
134
|
- lib/bio/db/blast/xmliterator.rb
|
115
135
|
- lib/bio/db/blast/xmlsplitter.rb
|
136
|
+
- lib/bio/writers/rdf.rb
|
116
137
|
- sample/bioruby.rb
|
117
138
|
- sample/blastxmlparserdemo.rb
|
118
139
|
- sample/libxml_sax.rb
|
@@ -121,6 +142,8 @@ files:
|
|
121
142
|
- sample/nokogiri_split_dom.rb
|
122
143
|
- spec/bio-blastxmlparser_spec.rb
|
123
144
|
- spec/spec_helper.rb
|
145
|
+
- template/json.erb
|
146
|
+
- template/rdf.erb
|
124
147
|
- test/data/aa_example.fasta
|
125
148
|
- test/data/aa_example_blastp.m7
|
126
149
|
- test/data/nt_example.fasta
|
@@ -129,29 +152,25 @@ files:
|
|
129
152
|
homepage: http://github.com/pjotrp/blastxmlparser
|
130
153
|
licenses:
|
131
154
|
- MIT
|
155
|
+
metadata: {}
|
132
156
|
post_install_message:
|
133
157
|
rdoc_options: []
|
134
158
|
require_paths:
|
135
159
|
- lib
|
136
160
|
required_ruby_version: !ruby/object:Gem::Requirement
|
137
|
-
none: false
|
138
161
|
requirements:
|
139
|
-
- -
|
162
|
+
- - ">="
|
140
163
|
- !ruby/object:Gem::Version
|
141
164
|
version: '0'
|
142
|
-
segments:
|
143
|
-
- 0
|
144
|
-
hash: -3287387609254152406
|
145
165
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
146
|
-
none: false
|
147
166
|
requirements:
|
148
|
-
- -
|
167
|
+
- - ">="
|
149
168
|
- !ruby/object:Gem::Version
|
150
169
|
version: '0'
|
151
170
|
requirements: []
|
152
171
|
rubyforge_project:
|
153
|
-
rubygems_version:
|
172
|
+
rubygems_version: 2.0.3
|
154
173
|
signing_key:
|
155
|
-
specification_version:
|
156
|
-
summary: Very fast BLAST XML
|
174
|
+
specification_version: 4
|
175
|
+
summary: Very fast BLAST XML to RDF/HTML/JSON/YAML/csv transformer
|
157
176
|
test_files: []
|