bio-blastxmlparser 1.1.1 → 1.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 31b42217bb809cde8d5ef3c06d11c6c9123c6413
4
+ data.tar.gz: 5d23e19fb8c774f7edaffd03bbcb156800679f7f
5
+ SHA512:
6
+ metadata.gz: de99019d564d5ea759f6e3ef330b8e9e68f6a7bbdb0578c34699ad7f716da16562d702da935bcfc5e3baa9a9e673b2ad99a62ae07210c4feea144134ad822e94
7
+ data.tar.gz: 24bb61197ff82129b404dcaf928b38ac9f7b9d5c10b90a630032253e468adc558c1675639b11c4bc60f7da8082626b3b288c2f0b9e6328e0ef2525b3a79453a8
data/.travis.yml CHANGED
@@ -1,9 +1,9 @@
1
1
  language: ruby
2
2
  rvm:
3
- - 1.9.2
4
3
  - 1.9.3
5
4
  - jruby-19mode # JRuby in 1.9 mode
6
5
  - rbx-19mode
6
+ - 2.1.0
7
7
  # - 1.8.7
8
8
  # - jruby-18mode # JRuby in 1.8 mode
9
9
  # - rbx-18mode
data/Gemfile CHANGED
@@ -1,21 +1,14 @@
1
1
  source "http://rubygems.org"
2
- # Add dependencies required to use your gem here.
3
- # Example:
4
- # gem "activesupport", ">= 2.3.5"
5
-
6
2
  # Runtime dependencies
7
- # gem "bio", ">= 1.3.1"
8
- gem "bio-logger", ">= 1.0.0"
9
- gem "nokogiri", ">= 1.5.0"
3
+ gem "bio-logger"
4
+ gem "nokogiri", "~>1.6.0"
10
5
 
11
6
  # Add dependencies to develop your gem here.
12
7
  # Include everything needed to run rake, tests, features, etc.
13
8
  group :development do
14
- # gem "rspec", "~> 2.3.0"
15
- gem "rake", ">= 0.9.2.2"
9
+ gem "rake"
16
10
  gem "bundler"
17
- gem "jeweler", "~> 1.8.4"
18
- gem "rspec", ">= 2.3.0"
19
- gem "rdoc", ">= 2.4.2"
20
- # gem "rcov", ">= 0"
11
+ gem "jeweler", "~> 2.0.1"
12
+ gem "rspec"
13
+ gem "rdoc"
21
14
  end
data/Gemfile.lock CHANGED
@@ -1,18 +1,51 @@
1
1
  GEM
2
2
  remote: http://rubygems.org/
3
3
  specs:
4
+ addressable (2.3.6)
4
5
  bio-logger (1.0.0)
5
6
  log4r (>= 1.1.9)
7
+ builder (3.2.2)
8
+ descendants_tracker (0.0.4)
9
+ thread_safe (~> 0.3, >= 0.3.1)
6
10
  diff-lcs (1.1.3)
7
- git (1.2.5)
8
- jeweler (1.8.4)
9
- bundler (~> 1.0)
11
+ faraday (0.9.0)
12
+ multipart-post (>= 1.2, < 3)
13
+ git (1.2.8)
14
+ github_api (0.12.1)
15
+ addressable (~> 2.3)
16
+ descendants_tracker (~> 0.0.4)
17
+ faraday (~> 0.8, < 0.10)
18
+ hashie (>= 3.2)
19
+ multi_json (>= 1.7.5, < 2.0)
20
+ nokogiri (~> 1.6.3)
21
+ oauth2
22
+ hashie (3.3.1)
23
+ highline (1.6.21)
24
+ jeweler (2.0.1)
25
+ builder
26
+ bundler (>= 1.0)
10
27
  git (>= 1.2.5)
28
+ github_api
29
+ highline (>= 1.6.15)
30
+ nokogiri (>= 1.5.10)
11
31
  rake
12
32
  rdoc
13
33
  json (1.6.5)
34
+ jwt (1.0.0)
14
35
  log4r (1.1.9)
15
- nokogiri (1.5.0)
36
+ mini_portile (0.6.0)
37
+ multi_json (1.10.1)
38
+ multi_xml (0.5.5)
39
+ multipart-post (2.0.0)
40
+ nokogiri (1.6.3.1)
41
+ mini_portile (= 0.6.0)
42
+ oauth2 (1.0.0)
43
+ faraday (>= 0.8, < 0.10)
44
+ jwt (~> 1.0)
45
+ multi_json (~> 1.3)
46
+ multi_xml (~> 0.5)
47
+ rack (~> 1.2)
48
+ rack (1.5.2)
16
49
  rake (0.9.2.2)
17
50
  rdoc (3.12)
18
51
  json (~> 1.4)
@@ -24,15 +57,16 @@ GEM
24
57
  rspec-expectations (2.8.0)
25
58
  diff-lcs (~> 1.1.2)
26
59
  rspec-mocks (2.8.0)
60
+ thread_safe (0.3.4)
27
61
 
28
62
  PLATFORMS
29
63
  ruby
30
64
 
31
65
  DEPENDENCIES
32
- bio-logger (>= 1.0.0)
66
+ bio-logger
33
67
  bundler
34
- jeweler (~> 1.8.4)
35
- nokogiri (>= 1.5.0)
36
- rake (>= 0.9.2.2)
37
- rdoc (>= 2.4.2)
38
- rspec (>= 2.3.0)
68
+ jeweler (~> 2.0.1)
69
+ nokogiri (~> 1.6.0)
70
+ rake
71
+ rdoc
72
+ rspec
data/LICENSE.txt CHANGED
@@ -1,4 +1,4 @@
1
- Copyright (c) 2011 Pjotr Prins
1
+ Copyright (c) 2011-2014 Pjotr Prins
2
2
 
3
3
  Permission is hereby granted, free of charge, to any person obtaining
4
4
  a copy of this software and associated documentation files (the
@@ -1,41 +1,51 @@
1
- blastxmlparser is listed at http://biogems.info
1
+ [![Build Status](https://travis-ci.org/pjotrp/blastxmlparser.svg?branch=master)](https://travis-ci.org/pjotrp/blastxmlparser)
2
+
3
+ # bio-blastxmlparser
2
4
 
3
- = bio-blastxmlparser
5
+ blastxmlparser is a very fast big-data BLAST XML file parser, which can be used
6
+ as command line utility. Use blastxmlparser to:
4
7
 
5
- blastxmlparser is a very fast big-data BLAST XML file parser, which can be
6
- used as command line utility, or as a Ruby library. Rather than
7
- loading everything in memory, XML is parsed by BLAST query
8
- (Iteration). Not only has this the advantage of low memory use, it
9
- also shows results early, and it may be faster when IO continues in
10
- parallel (disk read-ahead).
8
+ * Parse BLAST XML
9
+ * Filter output
10
+ * Generate FASTA, JSON, YAML, RDF, HTML, tabular output etc.
11
+
12
+ Rather than loading everything in memory, XML is parsed by BLAST query
13
+ (Iteration). Not only has this the advantage of low memory use, it also shows
14
+ results early, and it may be faster when IO continues in parallel (disk
15
+ read-ahead).
11
16
 
12
17
  Next to the API, blastxmlparser comes as a command line utility, which
13
18
  can be used to filter results and requires no understanding of Ruby.
14
19
 
15
- = Quick start
20
+ # Quick start
16
21
 
22
+ ```sh
17
23
  gem install bio-blastxmlparser
18
24
  blastxmlparser --help
25
+ ```
19
26
 
20
27
  (see Installation, below, if it does not work)
21
28
 
22
- == Performance
29
+ ## Performance
23
30
 
24
- XML parsing is expensive. blastxmlparser uses the fast Nokogiri C, or Java, XML
25
- parsers, based on libxml2. Basically, a DOM parser is used for subsections of a
26
- document. Tests show this is faster than a SAX parser with Ruby callbacks. To
27
- see why libxml2 based Nokogiri is fast, see
28
- http://www.rubyinside.com/ruby-xml-performance-benchmarks-1641.html and
29
- http://www.xml.com/lpt/a/1703.
31
+ XML parsing is expensive. blastxmlparser can use the fast Nokogiri C, or
32
+ Java XML parsers, based on libxml2. Basically, a DOM parser is used
33
+ after splitting the BLAST XML document into subsections.
34
+ Tests show this is faster than a SAX
35
+ parser with Ruby callbacks. To see why libxml2 based Nokogiri is
36
+ fast, see this
37
+ [benchmark](http://www.rubyinside.com/ruby-xml-performance-benchmarks-1641.html)
38
+ and [xml.com](http://www.xml.com/lpt/a/1703).
30
39
 
31
- The parser is also designed with other optimizations, such as lazy
32
- evaluation, i.e. only creating objects when required, and (in a future
33
- version) parallelization. When parsing a full BLAST result usually
34
- only a few fields are used. By using XPath queries only the relevant
35
- fields are queried.
40
+ Blastxmlparser is designed with other optimizations, such as lazy
41
+ evaluation, i.e., only creating objects when required, and (in a
42
+ future version) parallelization. When parsing a full BLAST result
43
+ usually only a few fields are used. By using XPath queries the parser
44
+ makes sure only the relevant fields are queried.
36
45
 
37
46
  Timings for parsing test/data/nt_example_blastn.m7 (file size 3.4Mb)
38
47
 
48
+ ```
39
49
  bio-blastxmlparser + Nokogiri DOM (default)
40
50
 
41
51
  real 0m1.259s
@@ -53,29 +63,39 @@ Timings for parsing test/data/nt_example_blastn.m7 (file size 3.4Mb)
53
63
  real 1m14.548s
54
64
  user 1m13.065s
55
65
  sys 0m0.472s
66
+ ```
56
67
 
57
- == Install
68
+ ## Install
58
69
 
70
+ ```sh
59
71
  gem install bio-blastxmlparser
72
+ ```
60
73
 
61
- Important: the parser is written for Ruby >= 1.9. You can check with
74
+ Important: the parser is written for Ruby >= 1.9. Check with
62
75
 
76
+ ```sh
63
77
  ruby -v
64
78
  gem env
79
+ ```
65
80
 
66
81
  Nokogiri XML parser is required. To install it,
67
82
  the libxml2 libraries and headers need to be installed first, for
68
83
  example on Debian:
69
84
 
85
+ ```sh
70
86
  apt-get install libxslt-dev libxml2-dev
71
87
  gem install bio-blastxmlparser
88
+ ```
89
+
90
+ Nokogiri balks when libxml2 or libxslt is missing on your system (or
91
+ may install something automatically). In the worst case you'll have to
92
+ provide build paths, as described [here](http://nokogiri.org/tutorials/installing_nokogiri.html).
72
93
 
73
- for more installation on other platforms see
74
- http://nokogiri.org/tutorials/installing_nokogiri.html.
94
+ ## Command line usage
75
95
 
76
- == Command line usage
96
+ ### Usage
77
97
 
78
- === Usage
98
+ ```
79
99
  blastxmlparser [options] file(s)
80
100
 
81
101
  -p, --parser name Use full|split parser (default full)
@@ -93,23 +113,24 @@ http://nokogiri.org/tutorials/installing_nokogiri.html.
93
113
  bioblastxmlparser filename(s)
94
114
 
95
115
  Use --help switch for more information
116
+ ```
96
117
 
97
- === Examples
118
+ ### Examples
98
119
 
99
120
  Print result fields of iterations containing 'lcl', using a regex
100
121
 
122
+ ```sh
101
123
  blastxmlparser -e 'iter.query_id=~/lcl/' test/data/nt_example_blastn.m7
102
-
103
- Print fields where bit_score > 145
104
-
105
- blastxmlparser -e 'hsp.bit_score>145' test/data/nt_example_blastn.m7
124
+ ```
106
125
 
107
126
  prints a tab delimited
108
127
 
128
+ ```sh
109
129
  1 1 lcl|1_0 lcl|I_74685 1 5.82208e-34
110
130
  2 1 lcl|1_0 lcl|I_1 1 5.82208e-34
111
131
  3 2 lcl|2_0 lcl|I_2 1 6.05436e-59
112
132
  4 3 lcl|3_0 lcl|I_3 1 2.03876e-56
133
+ ```
113
134
 
114
135
  The second and third column show the BLAST iteration, and the others
115
136
  relate to the hits.
@@ -117,11 +138,20 @@ relate to the hits.
117
138
  As this is evaluated Ruby, it is also possible to use the XML element
118
139
  names directly
119
140
 
141
+ ```sh
120
142
  blastxmlparser -e 'hsp["Hsp_bit-score"].to_i>145' test/data/nt_example_blastn.m7
143
+ ```
144
+
145
+ Or the shorter
146
+
147
+ ```sh
148
+ blastxmlparser -e 'hsp.bit_score>145' test/data/nt_example_blastn.m7
149
+ ```
121
150
 
122
151
  And it is possible to print (non default) named fields where E-value < 0.001
123
152
  and hit length > 100. E.g.
124
153
 
154
+ ```sh
125
155
  blastxmlparser -n 'hsp.evalue,hsp.qseq' -e 'hsp.evalue<0.01 and hit.len>100' test/data/nt_example_blastn.m7
126
156
 
127
157
  1 5.82208e-34 AGTGAAGCTTCTAGATATTTGGCGGGTACCTCTAATTTTGCCT...
@@ -130,28 +160,104 @@ and hit length > 100. E.g.
130
160
  4 1.13373e-13 CTAAACACAGGAGCATATAGGTTGGCAGGCAGGCAAAAT
131
161
  5 2.76378e-11 GAAGAGTGTACTACCGTTTCTGTAGCTACCATATT
132
162
  etc. etc.
163
+ ```
133
164
 
134
165
  prints the evalue and qseq columns. To output FASTA use --output-fasta
135
166
 
167
+ ```sh
136
168
  blastxmlparser --output-fasta -e 'hsp.evalue<0.01 and hit.len>100' test/data/nt_example_blastn.m7
169
+ ```
137
170
 
138
171
  which prints matching sequences, where the first field is the accession, followed
139
172
  by query iteration id, and hit_id. E.g.
140
173
 
174
+ ```sh
141
175
  >I_74685 1|lcl|1_0 lcl|I_74685 [57809 - 57666] (REVERSE SENSE)
142
176
  AGTGAAGCTTCTAGATATTTGGCGGGTACCTCTAATTTTGCCTGCCTGCCAACCTATATGCTCCTGTGTTTAG
143
177
  >I_1 1|lcl|1_0 lcl|I_1 [477 - 884]
144
178
  AGTGAAGCTTCTAGATATTTGGCGGGTACCTCTAATTTTGCCTGCCTGCCAACCTATATGCTCCTGTGTTTAG
145
179
  etc. etc.
180
+ ```
181
+
182
+ ## Modify output
183
+
184
+ To have more output options blastxmlparser can use an [ERB
185
+ template](http://www.stuartellis.eu/articles/erb/) for every match. This is a
186
+ very flexible option that can output textual formats such as JSON, YAML, HTML
187
+ and RDF. Examples are provided in
188
+ [./templates](https://github.com/pjotrp/bioruby-vcf/templates/). A JSON
189
+ template could be
190
+
191
+ ```Javascript
192
+ { "<%= hit.parent.query_def %>": {
193
+ "num": <%= hit.hit_num %>,
194
+ "id": "<%= hit.hit_id %>",
195
+ "len": <%= hit.len %>,
196
+ "E-value": <%= hsp.evalue %>,
197
+ "bitscore": <%= hsp.bit_score %>,
198
+ "qseq": "<%= hsp.qseq %>",
199
+ "midline": "<%= hsp.midline %>",
200
+ "hseq": "<%= hsp.hseq %>",
201
+ };
202
+ ```
203
+
204
+ To get JSON, run it with
205
+
206
+ ```sh
207
+ blastxmlparser --template template/json.erb -e 'hsp.evalue<0.01 and hit.len>100' test/data/nt_example_blastn.m7
208
+ ```
209
+
210
+ ```Javascript
211
+ { "B0511.9d gene=WBGene00015235": {
212
+ "num": 5,
213
+ "id": "gi|268566471|ref|XP_002639731.1|",
214
+ "len": 199,
215
+ "E-value": 1.72502e-22,
216
+ "bitscore": 96.6709,
217
+ "qseq": "MSMLRRPLTQLELSVI------------------VPKCXXXXXXXXXXXXQSEPPRGITRRNLRSADRKNRDVPGPSTGECTRTSIAPNRCEMSFTEVQ-TLTSARTPVAAPTLTLSTPVNPVSSAEMLX----XXXXXXXXXXXASRSGDNDSPLLFNAYDTPQQ--GINXXXXXXXXXXXXXNAHLYAXXXXXXXXXXXXXXXXRSHRH",
218
+ "midline": "MSMLRRPLTQLEL K QSEP GI++RNLRSADR+ +DVPG ++GE + FT+ +++SARTPV+ ++ LSTPVNP SS EM+ SR + D PL+FNAYDTPQQ G + NAHLY+ RS RH",
219
+ "hseq": "MSMLRRPLTQLELCEDDIQWLSEQLAKKETGFEDEVKYEVMDVDEDEPMDQSEPTGGISKRNLRSADRRKKDVPG-TSGEGAQ-----------FTDQGLSISSARTPVSGASVNLSTPVNPSSSNEMMALPPPVRLARAGRRQRDSRVVNGDVPLMFNAYDTPQQPAGGSNGSPTPSDSPESPNAHLYSTPINPTSSSGGPSSNTRSQRH",
220
+ };
221
+ ```
222
+
223
+ Likewise, using the RDF template
224
+
225
+ ```sh
226
+ blastxmlparser --template template/rdf.erb -e 'hsp.evalue<0.01 and hit.len>100' test/data/nt_example_blastn.m7
227
+ ```
228
+
229
+ ```ruby
230
+ :Minc_Contig50_77_42056___42484_1_64492 :query :Minc_Contig50_77_42056___42484_1_64492_23
231
+ :Minc_Contig50_77_42056___42484_1_64492_23
232
+ :query_id "lcl|30_0",
233
+ :query_def "Minc_Contig50_77 [42056 - 42484] 1 64492",
234
+ :num 23,
235
+ :accession "Minc02032",
236
+ :id "lcl|Minc02032",
237
+ :len 147,
238
+ :E-value 8.1089e-12,
239
+ :identity 60,
240
+ :align_len 69,
241
+ :bitscore 69.8753,
242
+ :qseq "ATGGGAGATGGAATTGAACCGTCATGGAAAGGGCCCAAACCGAAGCACAACCGACTGTGCCACCATCCA",
243
+ :midline "|||||||||||||||||||| |||||||| | |||||||||||||||||||||||||||||||",
244
+ :hseq "ATGGGAGATGGAATTGAACCATCATGGAATG-------ACCGAAGCACAACCGACTGTGCCACCATCCA",
245
+ :evalue 8.1089e-12 .
246
+ ```
247
+
248
+ ## Additional options
146
249
 
147
250
  To use the low-mem (iterated slower) version of the parser use
148
251
 
252
+ ```sh
149
253
  blastxmlparser --parser split -n 'hsp.evalue,hsp.qseq' -e 'hsp.evalue<0.01 and hit.len>100' test/data/nt_example_blastn.m7
254
+ ```
150
255
 
151
- == API (Ruby library)
256
+ ## API (Ruby library)
152
257
 
153
258
  To loop through a BLAST result:
154
259
 
260
+ ```ruby
155
261
  >> require 'bio-blastxmlparser'
156
262
  >> fn = 'test/data/nt_example_blastn.m7'
157
263
  >> n = Bio::BlastXMLParser::XmlIterator.new(fn).to_enum
@@ -163,19 +269,23 @@ To loop through a BLAST result:
163
269
  >> end
164
270
  >> end
165
271
  >> end
272
+ ```
166
273
 
167
274
  The next example parses XML using less memory by using a Ruby
168
275
  Iterator
169
276
 
277
+ ```ruby
170
278
  >> blast = Bio::BlastXMLParser::XmlSplitterIterator.new(fn).to_enum
171
279
  >> iter = blast.next
172
280
  >> iter.iter_num
173
281
  => 1
174
282
  >> iter.query_id
175
283
  => "lcl|1_0"
284
+ ```
176
285
 
177
286
  Get the first hit
178
287
 
288
+ ```ruby
179
289
  >> hit = iter.hits.first
180
290
  >> hit.hit_num
181
291
  => 1
@@ -187,14 +297,18 @@ Get the first hit
187
297
  => "I_74685"
188
298
  >> hit.len
189
299
  => 144
300
+ ```
190
301
 
191
302
  Get the parent info
192
303
 
304
+ ```ruby
193
305
  >> hit.parent.query_id
194
306
  => "lcl|1_0"
195
-
307
+ ```
308
+
196
309
  Get the first Hsp
197
310
 
311
+ ```ruby
198
312
  >> hsp = hit.hsps.first
199
313
  >> hsp.hsp_num
200
314
  => 1
@@ -224,6 +338,7 @@ Get the first Hsp
224
338
  => "AGTGAAGCTTCTAGATATTTGGCGGGTACCTCTAATTTTGCCTGCCTGCCAACCTATATGCTCCTGTGTTTAG"
225
339
  >> hsp.midline
226
340
  => "|||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||"
341
+ ```
227
342
 
228
343
  Unlike BioRuby, this module uses the actual element names in the XML
229
344
  definition, to avoid confusion (if anyone wants a translation,
@@ -232,30 +347,36 @@ feel free to contribute an adaptor).
232
347
  It is also possible to use the XML element names as Strings, rather
233
348
  than methods. E.g.
234
349
 
350
+ ```ruby
235
351
  >> hsp.field("Hsp_bit-score")
236
352
  => "145.205"
237
353
  >> hsp["Hsp_bit-score"]
238
354
  => "145.205"
355
+ ```
239
356
 
240
357
  Note that, when using the element names, the results are always String values.
241
358
 
242
359
  Fetch the next result (Iteration)
243
360
 
361
+ ```ruby
244
362
  >> iter2 = blast.next
245
363
  >> iter2.iter_num
246
364
  >> 2
247
365
  >> iter2.query_id
248
366
  => "lcl|2_0"
367
+ ```
249
368
 
250
369
  etc. etc.
251
370
 
252
371
  For more examples see the files in ./spec
253
372
 
254
- == URL
373
+ ## URL
255
374
 
256
375
  The project lives at http://github.com/pjotrp/blastxmlparser. If you use this software, please cite http://dx.doi.org/10.1093/bioinformatics/btq475
257
376
 
258
- == Copyright
377
+ blastxmlparser is listed at http://biogems.info
378
+
379
+ ## Copyright
259
380
 
260
- Copyright (c) 2011,2012 Pjotr Prins under the MIT licence. See LICENSE.txt and http://www.opensource.org/licenses/mit-license.html for further details.
381
+ Copyright (c) 2011-2014 Pjotr Prins under the MIT licence. See LICENSE.txt and http://www.opensource.org/licenses/mit-license.html for further details.
261
382
 
data/Rakefile CHANGED
@@ -15,14 +15,10 @@ Jeweler::Tasks.new do |gem|
15
15
  gem.name = "bio-blastxmlparser"
16
16
  gem.homepage = "http://github.com/pjotrp/blastxmlparser"
17
17
  gem.license = "MIT"
18
- gem.summary = %Q{Very fast BLAST XML parser and library for big data}
19
- gem.description = %Q{Fast big data BLAST XML parser and library; this libxml2 based version is 50x faster than BioRuby}
18
+ gem.summary = %Q{Very fast BLAST XML to RDF/HTML/JSON/YAML/csv transformer}
19
+ gem.description = %Q{Fast big data BLAST XML parser and library; this libxml2 based version is 50x faster than BioRuby and comes with a nice CLI}
20
20
  gem.email = "pjotr.public01@thebird.nl"
21
21
  gem.authors = ["Pjotr Prins"]
22
- # Include your dependencies below. Runtime dependencies are required when using your gem,
23
- # and development dependencies are only needed for development (ie running rake tasks, tests, etc)
24
- # gem.add_runtime_dependency 'jabber4r', '> 0.1'
25
- # gem.add_development_dependency 'rspec', '> 1.2.3'
26
22
  end
27
23
  Jeweler::RubygemsDotOrgTasks.new
28
24
 
@@ -32,17 +28,11 @@ RSpec::Core::RakeTask.new(:spec) do |spec|
32
28
  spec.pattern = FileList['spec/**/*_spec.rb']
33
29
  end
34
30
 
35
- # RSpec::Core::RakeTask.new(:rcov) do |spec|
36
- # spec.pattern = 'spec/**/*_spec.rb'
37
- # spec.rcov = true
38
- # end
39
-
40
31
  task :default => [ :test, :spec ]
41
32
 
42
33
  require 'rake/testtask'
43
34
  Rake::TestTask.new(:test) do |test|
44
35
  test.libs << 'lib' << 'test'
45
- # test.pattern = 'test/**/test_*.rb' # breaks in 1.9.3
46
36
  test.test_files = Dir.glob("test/**/test_*.rb")
47
37
  test.verbose = true
48
38
  end
data/VERSION CHANGED
@@ -1 +1 @@
1
- 1.1.1
1
+ 1.1.2
data/bin/blastxmlparser CHANGED
@@ -4,83 +4,21 @@
4
4
  # Author:: Pjotr Prins
5
5
  # License:: MIT License
6
6
  #
7
- # Copyright (C) 2010-2013 Pjotr Prins <pjotr.prins@thebird.nl>
7
+ # Copyright (C) 2010-2014 Pjotr Prins <pjotr.prins@thebird.nl>
8
8
 
9
9
  rootpath = File.dirname(File.dirname(__FILE__))
10
10
  $: << File.join(rootpath,'lib')
11
11
 
12
12
  BLASTXML_VERSION = File.new(File.join(rootpath,'VERSION')).read.chomp
13
13
 
14
- $stderr.print "BioRuby BLAST XML Parser "+BLASTXML_VERSION+" Copyright (C) 2011 Pjotr Prins <pjotr.prins@thebird.nl>\n\n"
14
+ $stderr.print "BLAST XML Parser "+BLASTXML_VERSION+" Copyright (C) 2014 Pjotr Prins <pjotr.prins@thebird.nl>\n\n"
15
15
 
16
16
  USAGE = <<EOM
17
17
 
18
- bioblastxmlparser filename(s)
18
+ blastxmlparser filename(s)
19
19
 
20
20
  Use --help switch for more information
21
21
 
22
- == Examples
23
-
24
- Print result fields of iterations containing 'lcl', using a regex
25
-
26
- blastxmlparser -e 'iter.query_id=~/lcl/' test/data/nt_example_blastn.m7
27
-
28
- Print fields where bit_score > 145
29
-
30
- blastxmlparser -e 'hsp.bit_score>145' test/data/nt_example_blastn.m7
31
-
32
- prints a tab delimited
33
-
34
- 1 1 lcl|1_0 lcl|I_74685 1 5.82208e-34
35
- 2 1 lcl|1_0 lcl|I_1 1 5.82208e-34
36
- 3 2 lcl|2_0 lcl|I_2 1 6.05436e-59
37
- 4 3 lcl|3_0 lcl|I_3 1 2.03876e-56
38
-
39
- The second and third column show the BLAST iteration, and the others
40
- relate to the hits.
41
-
42
- As this is evaluated Ruby, it is also possible to use the XML element
43
- names directly
44
-
45
- blastxmlparser -e 'hsp["Hsp_bit-score"].to_i>145' test/data/nt_example_blastn.m7
46
-
47
- And it is possible to print (non default) named fields where E-value < 0.001
48
- and hit length > 100. E.g.
49
-
50
- blastxmlparser -n 'hsp.evalue,hsp.qseq' -e 'hsp.evalue<0.01 and hit.len>100' test/data/nt_example_blastn.m7
51
-
52
- 1 5.82208e-34 AGTGAAGCTTCTAGATATTTGGCGGGTACCTCTAATTTTGCCT...
53
- 2 5.82208e-34 AGTGAAGCTTCTAGATATTTGGCGGGTACCTCTAATTTTGCCT...
54
- 3 2.76378e-11 AATATGGTAGCTACAGAAACGGTAGTACACTCTTC
55
- 4 1.13373e-13 CTAAACACAGGAGCATATAGGTTGGCAGGCAGGCAAAAT
56
- 5 2.76378e-11 GAAGAGTGTACTACCGTTTCTGTAGCTACCATATT
57
- etc. etc.
58
-
59
- prints the evalue and qseq columns. To output FASTA use --output-fasta
60
-
61
- blastxmlparser --output-fasta -e 'hsp.evalue<0.01 and hit.len>100' test/data/nt_example_blastn.m7
62
-
63
- which prints matching sequences, where the first field is the accession, followed
64
- by query iteration id, and hit_id. E.g.
65
-
66
- >I_74685 1|lcl|1_0 lcl|I_74685 [57809 - 57666] (REVERSE SENSE)
67
- AGTGAAGCTTCTAGATATTTGGCGGGTACCTCTAATTTTGCCTGCCTGCCAACCTATATGCTCCTGTGTTTAG
68
- >I_1 1|lcl|1_0 lcl|I_1 [477 - 884]
69
- AGTGAAGCTTCTAGATATTTGGCGGGTACCTCTAATTTTGCCTGCCTGCCAACCTATATGCTCCTGTGTTTAG
70
- etc. etc.
71
-
72
- To use the low-mem (iterated slower) version of the parser use
73
-
74
- blastxmlparser --parser split -n 'hsp.evalue,hsp.qseq' -e 'hsp.evalue<0.01 and hit.len>100' test/data/nt_example_blastn.m7
75
-
76
- == URL
77
-
78
- The project lives at http://github.com/pjotrp/blastxmlparser. If you use this software, please cite http://dx.doi.org/10.1093/bioinformatics/btq475
79
-
80
- == Copyright
81
-
82
- Copyright (c) 2011 Pjotr Prins under the MIT licence. See LICENSE.txt and http://www.opensource.org/licenses/mit-license.html for further details.
83
-
84
22
  EOM
85
23
 
86
24
  if ARGV.size == 0
@@ -114,16 +52,22 @@ opts = OptionParser.new do |o|
114
52
  options.parser = p.to_sym
115
53
  end
116
54
 
117
- o.on("--output-fasta","Output FASTA") do |b|
118
- options.output_fasta = true
55
+ o.on("-e filter","--exec filter",String, "Evaluate filter") do |s|
56
+ options.exec = s
119
57
  end
120
-
121
- o.on("-n fields","--named fields",String, "Set named fields") do |s|
58
+
59
+ o.separator ""
60
+
61
+ o.on("-n fields","--named fields",String, "Print named fields") do |s|
122
62
  options.fields = s.split(/,/)
123
63
  end
64
+ o.on("--output-fasta","Output FASTA") do |b|
65
+ options.output_fasta = true
66
+ end
124
67
 
125
- o.on("-e filter","--exec filter",String, "Execute filter") do |s|
126
- options.exec = s
68
+ o.on("-t erb","--template erb",String, "Use ERB template for output") do |s|
69
+ require 'erb'
70
+ options.template = s
127
71
  end
128
72
 
129
73
  o.separator ""
@@ -156,6 +100,13 @@ begin
156
100
  Bio::Log::CLI.configure('bio-blastxmlparser')
157
101
  logger = Bio::Log::LoggerPlus['bio-blastxmlparser']
158
102
 
103
+ if options[:template]
104
+ include BioRdf
105
+ fn = options.template
106
+ raise "No template #{fn}!" if not File.exist?(fn)
107
+ template = ERB.new(File.read(fn))
108
+ end
109
+
159
110
  ARGV.each do | fn |
160
111
  logger.info("XML parsing #{fn}")
161
112
  n = if options.parser == :split
@@ -173,10 +124,13 @@ begin
173
124
  true
174
125
  end
175
126
  if do_print
176
- if options.output_fasta
127
+ if template
128
+ print template.result(binding)
129
+ elsif options.output_fasta
177
130
  print ">"+hit.accession+' '+iter.iter_num.to_s+'|'+iter.query_id+' '+hit.hit_id+' '+hit.hit_def+"\n"
178
131
  print hsp.qseq+"\n"
179
132
  else
133
+ # Default output
180
134
  if options.fields
181
135
  print i,"\t"
182
136
  options.fields.each do | f |
@@ -5,17 +5,17 @@
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = "bio-blastxmlparser"
8
- s.version = "1.1.1"
8
+ s.version = "1.1.2"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = ["Pjotr Prins"]
12
- s.date = "2013-02-07"
13
- s.description = "Fast big data BLAST XML parser and library; this libxml2 based version is 50x faster than BioRuby"
12
+ s.date = "2014-09-02"
13
+ s.description = "Fast big data BLAST XML parser and library; this libxml2 based version is 50x faster than BioRuby and comes with a nice CLI"
14
14
  s.email = "pjotr.public01@thebird.nl"
15
15
  s.executables = ["blastxmlparser"]
16
16
  s.extra_rdoc_files = [
17
17
  "LICENSE.txt",
18
- "README.rdoc"
18
+ "README.md"
19
19
  ]
20
20
  s.files = [
21
21
  ".document",
@@ -24,7 +24,7 @@ Gem::Specification.new do |s|
24
24
  "Gemfile",
25
25
  "Gemfile.lock",
26
26
  "LICENSE.txt",
27
- "README.rdoc",
27
+ "README.md",
28
28
  "Rakefile",
29
29
  "VERSION",
30
30
  "bin/blastxmlparser",
@@ -33,6 +33,7 @@ Gem::Specification.new do |s|
33
33
  "lib/bio/db/blast/parser/nokogiri.rb",
34
34
  "lib/bio/db/blast/xmliterator.rb",
35
35
  "lib/bio/db/blast/xmlsplitter.rb",
36
+ "lib/bio/writers/rdf.rb",
36
37
  "sample/bioruby.rb",
37
38
  "sample/blastxmlparserdemo.rb",
38
39
  "sample/libxml_sax.rb",
@@ -41,6 +42,8 @@ Gem::Specification.new do |s|
41
42
  "sample/nokogiri_split_dom.rb",
42
43
  "spec/bio-blastxmlparser_spec.rb",
43
44
  "spec/spec_helper.rb",
45
+ "template/json.erb",
46
+ "template/rdf.erb",
44
47
  "test/data/aa_example.fasta",
45
48
  "test/data/aa_example_blastp.m7",
46
49
  "test/data/nt_example.fasta",
@@ -50,37 +53,37 @@ Gem::Specification.new do |s|
50
53
  s.homepage = "http://github.com/pjotrp/blastxmlparser"
51
54
  s.licenses = ["MIT"]
52
55
  s.require_paths = ["lib"]
53
- s.rubygems_version = "1.8.10"
54
- s.summary = "Very fast BLAST XML parser and library for big data"
56
+ s.rubygems_version = "2.0.3"
57
+ s.summary = "Very fast BLAST XML to RDF/HTML/JSON/YAML/csv transformer"
55
58
 
56
59
  if s.respond_to? :specification_version then
57
- s.specification_version = 3
60
+ s.specification_version = 4
58
61
 
59
62
  if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
60
- s.add_runtime_dependency(%q<bio-logger>, [">= 1.0.0"])
61
- s.add_runtime_dependency(%q<nokogiri>, [">= 1.5.0"])
62
- s.add_development_dependency(%q<rake>, [">= 0.9.2.2"])
63
+ s.add_runtime_dependency(%q<bio-logger>, [">= 0"])
64
+ s.add_runtime_dependency(%q<nokogiri>, ["~> 1.6.0"])
65
+ s.add_development_dependency(%q<rake>, [">= 0"])
63
66
  s.add_development_dependency(%q<bundler>, [">= 0"])
64
- s.add_development_dependency(%q<jeweler>, ["~> 1.8.4"])
65
- s.add_development_dependency(%q<rspec>, [">= 2.3.0"])
66
- s.add_development_dependency(%q<rdoc>, [">= 2.4.2"])
67
+ s.add_development_dependency(%q<jeweler>, ["~> 2.0.1"])
68
+ s.add_development_dependency(%q<rspec>, [">= 0"])
69
+ s.add_development_dependency(%q<rdoc>, [">= 0"])
67
70
  else
68
- s.add_dependency(%q<bio-logger>, [">= 1.0.0"])
69
- s.add_dependency(%q<nokogiri>, [">= 1.5.0"])
70
- s.add_dependency(%q<rake>, [">= 0.9.2.2"])
71
+ s.add_dependency(%q<bio-logger>, [">= 0"])
72
+ s.add_dependency(%q<nokogiri>, ["~> 1.6.0"])
73
+ s.add_dependency(%q<rake>, [">= 0"])
71
74
  s.add_dependency(%q<bundler>, [">= 0"])
72
- s.add_dependency(%q<jeweler>, ["~> 1.8.4"])
73
- s.add_dependency(%q<rspec>, [">= 2.3.0"])
74
- s.add_dependency(%q<rdoc>, [">= 2.4.2"])
75
+ s.add_dependency(%q<jeweler>, ["~> 2.0.1"])
76
+ s.add_dependency(%q<rspec>, [">= 0"])
77
+ s.add_dependency(%q<rdoc>, [">= 0"])
75
78
  end
76
79
  else
77
- s.add_dependency(%q<bio-logger>, [">= 1.0.0"])
78
- s.add_dependency(%q<nokogiri>, [">= 1.5.0"])
79
- s.add_dependency(%q<rake>, [">= 0.9.2.2"])
80
+ s.add_dependency(%q<bio-logger>, [">= 0"])
81
+ s.add_dependency(%q<nokogiri>, ["~> 1.6.0"])
82
+ s.add_dependency(%q<rake>, [">= 0"])
80
83
  s.add_dependency(%q<bundler>, [">= 0"])
81
- s.add_dependency(%q<jeweler>, ["~> 1.8.4"])
82
- s.add_dependency(%q<rspec>, [">= 2.3.0"])
83
- s.add_dependency(%q<rdoc>, [">= 2.4.2"])
84
+ s.add_dependency(%q<jeweler>, ["~> 2.0.1"])
85
+ s.add_dependency(%q<rspec>, [">= 0"])
86
+ s.add_dependency(%q<rdoc>, [">= 0"])
84
87
  end
85
88
  end
86
89
 
@@ -17,3 +17,4 @@ Bio::Log::LoggerPlus.new('bio-blastxmlparser')
17
17
  require 'bio/db/blast/parser/nokogiri'
18
18
  require 'bio/db/blast/xmlsplitter'
19
19
  require 'bio/db/blast/xmliterator'
20
+ require 'bio/writers/rdf'
@@ -0,0 +1,79 @@
1
+ # RDF support module. Original is part of bioruby-rdf by Pjotr Prins
2
+ #
3
+ module BioRdf
4
+
5
+ module RDF
6
+
7
+ def RDF::valid_uri? uri
8
+ uri =~ /^([!#$&-;=?_a-z~]|%[0-9a-f]{2})+$/i
9
+ end
10
+
11
+ def RDF::escape_string_literal(literal)
12
+ s = literal.to_s
13
+ # Put a slash before every double quote if there is no such slash already
14
+ s = s.gsub(/(?<!\\)"/,'\"')
15
+ # Put a slash before a single slash if it is not \["utnr>\]
16
+ if s =~ /[^\\]\\[^\\]/
17
+ s2 = []
18
+ s.each_char.with_index { |c,i|
19
+ res = c
20
+ if i>0 and c == '\\' and s[i-1] != '\\' and s[i+1] !~ /^[uUtnr\\"]/
21
+ res = '\\' + c
22
+ end
23
+ # p [i,c,s[i+1],res]
24
+ s2 << res
25
+ }
26
+ s = s2.join('')
27
+ end
28
+ s
29
+ end
30
+
31
+ def RDF::stringify_literal(literal)
32
+ RDF::escape_string_literal(literal.to_s)
33
+ end
34
+
35
+ def RDF::quoted_stringify_literal(literal)
36
+ '"' + stringify_literal(literal) + '"'
37
+ end
38
+ end
39
+
40
+ module Turtle
41
+
42
+ def Turtle::stringify_literal(literal)
43
+ RDF::stringify_literal(literal)
44
+ end
45
+
46
+ def Turtle::identifier(id)
47
+ raise "Illegal identifier #{id}" if id != Turtle::mangle_identifier(id)
48
+ end
49
+
50
+ # Replace letters/symbols that are not allowed in a Turtle identifier
51
+ # (short hand URI). This should be the definite mangler and replace the
52
+ # ones in bioruby-table and bio-exominer. Manglers are useful when using
53
+ # data from other sources and trying to transform them into simple RDF
54
+ # identifiers.
55
+
56
+ def Turtle::mangle_identifier(s)
57
+ id = s.strip.gsub(/[^[:print:]]/, '').gsub(/[#)(,]/,"").gsub(/[%]/,"perc").gsub(/(\s|\.|\$|\/|\\|\>)+/,"_")
58
+ id = id.gsub(/\[|\]/,'')
59
+ # id = URI::escape(id)
60
+ id = id.gsub(/\|/,'_')
61
+ id = id.gsub(/\-|:/,'_')
62
+ if id != s
63
+ # Don't want Bio depency in templates!
64
+ # logger = Bio::Log::LoggerPlus.new 'bio-rdf'
65
+ # logger.warn "\nWARNING: Changed identifier <#{s}> to <#{id}>"
66
+ $stderr.print "\nWARNING: Changed identifier <#{s}> to <#{id}>"
67
+ end
68
+ if not RDF::valid_uri?(id)
69
+ raise "Invalid URI after mangling <#{s}> to <#{id}>!"
70
+ end
71
+ valid_id = if id =~ /^\d/
72
+ 'r' + id
73
+ else
74
+ id
75
+ end
76
+ valid_id # we certainly hope so!
77
+ end
78
+ end
79
+ end
data/template/json.erb ADDED
@@ -0,0 +1,15 @@
1
+ { "<%= hit.parent.query_def %>": {
2
+ "query_id": "<%= hit.parent.query_id %>",
3
+ "num": <%= hit.hit_num %>,
4
+ "accession": "<%= hit.accession %>",
5
+ "id": "<%= hit.hit_id %>",
6
+ "len": <%= hit.len %>,
7
+ "E-value": <%= hsp.evalue %>,
8
+ "identity": <%= hsp.identity %>,
9
+ "align_len": <%= hsp.align_len %>,
10
+ "bitscore": <%= hsp.bit_score %>,
11
+ "qseq": "<%= hsp.qseq %>",
12
+ "midline": "<%= hsp.midline %>",
13
+ "hseq": "<%= hsp.hseq %>",
14
+ };
15
+
data/template/rdf.erb ADDED
@@ -0,0 +1,21 @@
1
+ <%
2
+ blastid = Turtle::mangle_identifier(hit.parent.query_def)
3
+ id = blastid+'_'+hit.hit_num.to_s
4
+ %>
5
+ :<%= blastid %> :query :<%= id %>
6
+ :<%= id %>
7
+ :query_id "<%= hit.parent.query_id %>",
8
+ :query_def "<%= hit.parent.query_def %>",
9
+ :num <%= hit.hit_num %>,
10
+ :accession "<%= hit.accession %>",
11
+ :id "<%= hit.hit_id %>",
12
+ :len <%= hit.len %>,
13
+ :E-value <%= hsp.evalue %>,
14
+ :identity <%= hsp.identity %>,
15
+ :align_len <%= hsp.align_len %>,
16
+ :bitscore <%= hsp.bit_score %>,
17
+ :qseq "<%= hsp.qseq %>",
18
+ :midline "<%= hsp.midline %>",
19
+ :hseq "<%= hsp.hseq %>",
20
+ :evalue <%= hsp.evalue %> .
21
+
metadata CHANGED
@@ -1,110 +1,130 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: bio-blastxmlparser
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.1.1
5
- prerelease:
4
+ version: 1.1.2
6
5
  platform: ruby
7
6
  authors:
8
7
  - Pjotr Prins
9
8
  autorequire:
10
9
  bindir: bin
11
10
  cert_chain: []
12
- date: 2013-02-07 00:00:00.000000000Z
11
+ date: 2014-09-02 00:00:00.000000000 Z
13
12
  dependencies:
14
13
  - !ruby/object:Gem::Dependency
15
14
  name: bio-logger
16
- requirement: &24214160 !ruby/object:Gem::Requirement
17
- none: false
15
+ requirement: !ruby/object:Gem::Requirement
18
16
  requirements:
19
- - - ! '>='
17
+ - - ">="
20
18
  - !ruby/object:Gem::Version
21
- version: 1.0.0
19
+ version: '0'
22
20
  type: :runtime
23
21
  prerelease: false
24
- version_requirements: *24214160
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ">="
25
+ - !ruby/object:Gem::Version
26
+ version: '0'
25
27
  - !ruby/object:Gem::Dependency
26
28
  name: nokogiri
27
- requirement: &24213120 !ruby/object:Gem::Requirement
28
- none: false
29
+ requirement: !ruby/object:Gem::Requirement
29
30
  requirements:
30
- - - ! '>='
31
+ - - "~>"
31
32
  - !ruby/object:Gem::Version
32
- version: 1.5.0
33
+ version: 1.6.0
33
34
  type: :runtime
34
35
  prerelease: false
35
- version_requirements: *24213120
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: 1.6.0
36
41
  - !ruby/object:Gem::Dependency
37
42
  name: rake
38
- requirement: &24212220 !ruby/object:Gem::Requirement
39
- none: false
43
+ requirement: !ruby/object:Gem::Requirement
40
44
  requirements:
41
- - - ! '>='
45
+ - - ">="
42
46
  - !ruby/object:Gem::Version
43
- version: 0.9.2.2
47
+ version: '0'
44
48
  type: :development
45
49
  prerelease: false
46
- version_requirements: *24212220
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ">="
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
47
55
  - !ruby/object:Gem::Dependency
48
56
  name: bundler
49
- requirement: &24211440 !ruby/object:Gem::Requirement
50
- none: false
57
+ requirement: !ruby/object:Gem::Requirement
51
58
  requirements:
52
- - - ! '>='
59
+ - - ">="
53
60
  - !ruby/object:Gem::Version
54
61
  version: '0'
55
62
  type: :development
56
63
  prerelease: false
57
- version_requirements: *24211440
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - ">="
67
+ - !ruby/object:Gem::Version
68
+ version: '0'
58
69
  - !ruby/object:Gem::Dependency
59
70
  name: jeweler
60
- requirement: &24174660 !ruby/object:Gem::Requirement
61
- none: false
71
+ requirement: !ruby/object:Gem::Requirement
62
72
  requirements:
63
- - - ~>
73
+ - - "~>"
64
74
  - !ruby/object:Gem::Version
65
- version: 1.8.4
75
+ version: 2.0.1
66
76
  type: :development
67
77
  prerelease: false
68
- version_requirements: *24174660
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - "~>"
81
+ - !ruby/object:Gem::Version
82
+ version: 2.0.1
69
83
  - !ruby/object:Gem::Dependency
70
84
  name: rspec
71
- requirement: &24173840 !ruby/object:Gem::Requirement
72
- none: false
85
+ requirement: !ruby/object:Gem::Requirement
73
86
  requirements:
74
- - - ! '>='
87
+ - - ">="
75
88
  - !ruby/object:Gem::Version
76
- version: 2.3.0
89
+ version: '0'
77
90
  type: :development
78
91
  prerelease: false
79
- version_requirements: *24173840
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - ">="
95
+ - !ruby/object:Gem::Version
96
+ version: '0'
80
97
  - !ruby/object:Gem::Dependency
81
98
  name: rdoc
82
- requirement: &24173100 !ruby/object:Gem::Requirement
83
- none: false
99
+ requirement: !ruby/object:Gem::Requirement
84
100
  requirements:
85
- - - ! '>='
101
+ - - ">="
86
102
  - !ruby/object:Gem::Version
87
- version: 2.4.2
103
+ version: '0'
88
104
  type: :development
89
105
  prerelease: false
90
- version_requirements: *24173100
106
+ version_requirements: !ruby/object:Gem::Requirement
107
+ requirements:
108
+ - - ">="
109
+ - !ruby/object:Gem::Version
110
+ version: '0'
91
111
  description: Fast big data BLAST XML parser and library; this libxml2 based version
92
- is 50x faster than BioRuby
112
+ is 50x faster than BioRuby and comes with a nice CLI
93
113
  email: pjotr.public01@thebird.nl
94
114
  executables:
95
115
  - blastxmlparser
96
116
  extensions: []
97
117
  extra_rdoc_files:
98
118
  - LICENSE.txt
99
- - README.rdoc
119
+ - README.md
100
120
  files:
101
- - .document
102
- - .rspec
103
- - .travis.yml
121
+ - ".document"
122
+ - ".rspec"
123
+ - ".travis.yml"
104
124
  - Gemfile
105
125
  - Gemfile.lock
106
126
  - LICENSE.txt
107
- - README.rdoc
127
+ - README.md
108
128
  - Rakefile
109
129
  - VERSION
110
130
  - bin/blastxmlparser
@@ -113,6 +133,7 @@ files:
113
133
  - lib/bio/db/blast/parser/nokogiri.rb
114
134
  - lib/bio/db/blast/xmliterator.rb
115
135
  - lib/bio/db/blast/xmlsplitter.rb
136
+ - lib/bio/writers/rdf.rb
116
137
  - sample/bioruby.rb
117
138
  - sample/blastxmlparserdemo.rb
118
139
  - sample/libxml_sax.rb
@@ -121,6 +142,8 @@ files:
121
142
  - sample/nokogiri_split_dom.rb
122
143
  - spec/bio-blastxmlparser_spec.rb
123
144
  - spec/spec_helper.rb
145
+ - template/json.erb
146
+ - template/rdf.erb
124
147
  - test/data/aa_example.fasta
125
148
  - test/data/aa_example_blastp.m7
126
149
  - test/data/nt_example.fasta
@@ -129,29 +152,25 @@ files:
129
152
  homepage: http://github.com/pjotrp/blastxmlparser
130
153
  licenses:
131
154
  - MIT
155
+ metadata: {}
132
156
  post_install_message:
133
157
  rdoc_options: []
134
158
  require_paths:
135
159
  - lib
136
160
  required_ruby_version: !ruby/object:Gem::Requirement
137
- none: false
138
161
  requirements:
139
- - - ! '>='
162
+ - - ">="
140
163
  - !ruby/object:Gem::Version
141
164
  version: '0'
142
- segments:
143
- - 0
144
- hash: -3287387609254152406
145
165
  required_rubygems_version: !ruby/object:Gem::Requirement
146
- none: false
147
166
  requirements:
148
- - - ! '>='
167
+ - - ">="
149
168
  - !ruby/object:Gem::Version
150
169
  version: '0'
151
170
  requirements: []
152
171
  rubyforge_project:
153
- rubygems_version: 1.8.10
172
+ rubygems_version: 2.0.3
154
173
  signing_key:
155
- specification_version: 3
156
- summary: Very fast BLAST XML parser and library for big data
174
+ specification_version: 4
175
+ summary: Very fast BLAST XML to RDF/HTML/JSON/YAML/csv transformer
157
176
  test_files: []