bio-blastxmlparser 1.1.1 → 1.1.2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 31b42217bb809cde8d5ef3c06d11c6c9123c6413
4
+ data.tar.gz: 5d23e19fb8c774f7edaffd03bbcb156800679f7f
5
+ SHA512:
6
+ metadata.gz: de99019d564d5ea759f6e3ef330b8e9e68f6a7bbdb0578c34699ad7f716da16562d702da935bcfc5e3baa9a9e673b2ad99a62ae07210c4feea144134ad822e94
7
+ data.tar.gz: 24bb61197ff82129b404dcaf928b38ac9f7b9d5c10b90a630032253e468adc558c1675639b11c4bc60f7da8082626b3b288c2f0b9e6328e0ef2525b3a79453a8
data/.travis.yml CHANGED
@@ -1,9 +1,9 @@
1
1
  language: ruby
2
2
  rvm:
3
- - 1.9.2
4
3
  - 1.9.3
5
4
  - jruby-19mode # JRuby in 1.9 mode
6
5
  - rbx-19mode
6
+ - 2.1.0
7
7
  # - 1.8.7
8
8
  # - jruby-18mode # JRuby in 1.8 mode
9
9
  # - rbx-18mode
data/Gemfile CHANGED
@@ -1,21 +1,14 @@
1
1
  source "http://rubygems.org"
2
- # Add dependencies required to use your gem here.
3
- # Example:
4
- # gem "activesupport", ">= 2.3.5"
5
-
6
2
  # Runtime dependencies
7
- # gem "bio", ">= 1.3.1"
8
- gem "bio-logger", ">= 1.0.0"
9
- gem "nokogiri", ">= 1.5.0"
3
+ gem "bio-logger"
4
+ gem "nokogiri", "~>1.6.0"
10
5
 
11
6
  # Add dependencies to develop your gem here.
12
7
  # Include everything needed to run rake, tests, features, etc.
13
8
  group :development do
14
- # gem "rspec", "~> 2.3.0"
15
- gem "rake", ">= 0.9.2.2"
9
+ gem "rake"
16
10
  gem "bundler"
17
- gem "jeweler", "~> 1.8.4"
18
- gem "rspec", ">= 2.3.0"
19
- gem "rdoc", ">= 2.4.2"
20
- # gem "rcov", ">= 0"
11
+ gem "jeweler", "~> 2.0.1"
12
+ gem "rspec"
13
+ gem "rdoc"
21
14
  end
data/Gemfile.lock CHANGED
@@ -1,18 +1,51 @@
1
1
  GEM
2
2
  remote: http://rubygems.org/
3
3
  specs:
4
+ addressable (2.3.6)
4
5
  bio-logger (1.0.0)
5
6
  log4r (>= 1.1.9)
7
+ builder (3.2.2)
8
+ descendants_tracker (0.0.4)
9
+ thread_safe (~> 0.3, >= 0.3.1)
6
10
  diff-lcs (1.1.3)
7
- git (1.2.5)
8
- jeweler (1.8.4)
9
- bundler (~> 1.0)
11
+ faraday (0.9.0)
12
+ multipart-post (>= 1.2, < 3)
13
+ git (1.2.8)
14
+ github_api (0.12.1)
15
+ addressable (~> 2.3)
16
+ descendants_tracker (~> 0.0.4)
17
+ faraday (~> 0.8, < 0.10)
18
+ hashie (>= 3.2)
19
+ multi_json (>= 1.7.5, < 2.0)
20
+ nokogiri (~> 1.6.3)
21
+ oauth2
22
+ hashie (3.3.1)
23
+ highline (1.6.21)
24
+ jeweler (2.0.1)
25
+ builder
26
+ bundler (>= 1.0)
10
27
  git (>= 1.2.5)
28
+ github_api
29
+ highline (>= 1.6.15)
30
+ nokogiri (>= 1.5.10)
11
31
  rake
12
32
  rdoc
13
33
  json (1.6.5)
34
+ jwt (1.0.0)
14
35
  log4r (1.1.9)
15
- nokogiri (1.5.0)
36
+ mini_portile (0.6.0)
37
+ multi_json (1.10.1)
38
+ multi_xml (0.5.5)
39
+ multipart-post (2.0.0)
40
+ nokogiri (1.6.3.1)
41
+ mini_portile (= 0.6.0)
42
+ oauth2 (1.0.0)
43
+ faraday (>= 0.8, < 0.10)
44
+ jwt (~> 1.0)
45
+ multi_json (~> 1.3)
46
+ multi_xml (~> 0.5)
47
+ rack (~> 1.2)
48
+ rack (1.5.2)
16
49
  rake (0.9.2.2)
17
50
  rdoc (3.12)
18
51
  json (~> 1.4)
@@ -24,15 +57,16 @@ GEM
24
57
  rspec-expectations (2.8.0)
25
58
  diff-lcs (~> 1.1.2)
26
59
  rspec-mocks (2.8.0)
60
+ thread_safe (0.3.4)
27
61
 
28
62
  PLATFORMS
29
63
  ruby
30
64
 
31
65
  DEPENDENCIES
32
- bio-logger (>= 1.0.0)
66
+ bio-logger
33
67
  bundler
34
- jeweler (~> 1.8.4)
35
- nokogiri (>= 1.5.0)
36
- rake (>= 0.9.2.2)
37
- rdoc (>= 2.4.2)
38
- rspec (>= 2.3.0)
68
+ jeweler (~> 2.0.1)
69
+ nokogiri (~> 1.6.0)
70
+ rake
71
+ rdoc
72
+ rspec
data/LICENSE.txt CHANGED
@@ -1,4 +1,4 @@
1
- Copyright (c) 2011 Pjotr Prins
1
+ Copyright (c) 2011-2014 Pjotr Prins
2
2
 
3
3
  Permission is hereby granted, free of charge, to any person obtaining
4
4
  a copy of this software and associated documentation files (the
@@ -1,41 +1,51 @@
1
- blastxmlparser is listed at http://biogems.info
1
+ [![Build Status](https://travis-ci.org/pjotrp/blastxmlparser.svg?branch=master)](https://travis-ci.org/pjotrp/blastxmlparser)
2
+
3
+ # bio-blastxmlparser
2
4
 
3
- = bio-blastxmlparser
5
+ blastxmlparser is a very fast big-data BLAST XML file parser, which can be used
6
+ as command line utility. Use blastxmlparser to:
4
7
 
5
- blastxmlparser is a very fast big-data BLAST XML file parser, which can be
6
- used as command line utility, or as a Ruby library. Rather than
7
- loading everything in memory, XML is parsed by BLAST query
8
- (Iteration). Not only has this the advantage of low memory use, it
9
- also shows results early, and it may be faster when IO continues in
10
- parallel (disk read-ahead).
8
+ * Parse BLAST XML
9
+ * Filter output
10
+ * Generate FASTA, JSON, YAML, RDF, HTML, tabular output etc.
11
+
12
+ Rather than loading everything in memory, XML is parsed by BLAST query
13
+ (Iteration). Not only has this the advantage of low memory use, it also shows
14
+ results early, and it may be faster when IO continues in parallel (disk
15
+ read-ahead).
11
16
 
12
17
  Next to the API, blastxmlparser comes as a command line utility, which
13
18
  can be used to filter results and requires no understanding of Ruby.
14
19
 
15
- = Quick start
20
+ # Quick start
16
21
 
22
+ ```sh
17
23
  gem install bio-blastxmlparser
18
24
  blastxmlparser --help
25
+ ```
19
26
 
20
27
  (see Installation, below, if it does not work)
21
28
 
22
- == Performance
29
+ ## Performance
23
30
 
24
- XML parsing is expensive. blastxmlparser uses the fast Nokogiri C, or Java, XML
25
- parsers, based on libxml2. Basically, a DOM parser is used for subsections of a
26
- document. Tests show this is faster than a SAX parser with Ruby callbacks. To
27
- see why libxml2 based Nokogiri is fast, see
28
- http://www.rubyinside.com/ruby-xml-performance-benchmarks-1641.html and
29
- http://www.xml.com/lpt/a/1703.
31
+ XML parsing is expensive. blastxmlparser can use the fast Nokogiri C, or
32
+ Java XML parsers, based on libxml2. Basically, a DOM parser is used
33
+ after splitting the BLAST XML document into subsections.
34
+ Tests show this is faster than a SAX
35
+ parser with Ruby callbacks. To see why libxml2 based Nokogiri is
36
+ fast, see this
37
+ [benchmark](http://www.rubyinside.com/ruby-xml-performance-benchmarks-1641.html)
38
+ and [xml.com](http://www.xml.com/lpt/a/1703).
30
39
 
31
- The parser is also designed with other optimizations, such as lazy
32
- evaluation, i.e. only creating objects when required, and (in a future
33
- version) parallelization. When parsing a full BLAST result usually
34
- only a few fields are used. By using XPath queries only the relevant
35
- fields are queried.
40
+ Blastxmlparser is designed with other optimizations, such as lazy
41
+ evaluation, i.e., only creating objects when required, and (in a
42
+ future version) parallelization. When parsing a full BLAST result
43
+ usually only a few fields are used. By using XPath queries the parser
44
+ makes sure only the relevant fields are queried.
36
45
 
37
46
  Timings for parsing test/data/nt_example_blastn.m7 (file size 3.4Mb)
38
47
 
48
+ ```
39
49
  bio-blastxmlparser + Nokogiri DOM (default)
40
50
 
41
51
  real 0m1.259s
@@ -53,29 +63,39 @@ Timings for parsing test/data/nt_example_blastn.m7 (file size 3.4Mb)
53
63
  real 1m14.548s
54
64
  user 1m13.065s
55
65
  sys 0m0.472s
66
+ ```
56
67
 
57
- == Install
68
+ ## Install
58
69
 
70
+ ```sh
59
71
  gem install bio-blastxmlparser
72
+ ```
60
73
 
61
- Important: the parser is written for Ruby >= 1.9. You can check with
74
+ Important: the parser is written for Ruby >= 1.9. Check with
62
75
 
76
+ ```sh
63
77
  ruby -v
64
78
  gem env
79
+ ```
65
80
 
66
81
  Nokogiri XML parser is required. To install it,
67
82
  the libxml2 libraries and headers need to be installed first, for
68
83
  example on Debian:
69
84
 
85
+ ```sh
70
86
  apt-get install libxslt-dev libxml2-dev
71
87
  gem install bio-blastxmlparser
88
+ ```
89
+
90
+ Nokogiri balks when libxml2 or libxslt is missing on your system (or
91
+ may install something automatically). In the worst case you'll have to
92
+ provide build paths, as described [here](http://nokogiri.org/tutorials/installing_nokogiri.html).
72
93
 
73
- for more installation on other platforms see
74
- http://nokogiri.org/tutorials/installing_nokogiri.html.
94
+ ## Command line usage
75
95
 
76
- == Command line usage
96
+ ### Usage
77
97
 
78
- === Usage
98
+ ```
79
99
  blastxmlparser [options] file(s)
80
100
 
81
101
  -p, --parser name Use full|split parser (default full)
@@ -93,23 +113,24 @@ http://nokogiri.org/tutorials/installing_nokogiri.html.
93
113
  bioblastxmlparser filename(s)
94
114
 
95
115
  Use --help switch for more information
116
+ ```
96
117
 
97
- === Examples
118
+ ### Examples
98
119
 
99
120
  Print result fields of iterations containing 'lcl', using a regex
100
121
 
122
+ ```sh
101
123
  blastxmlparser -e 'iter.query_id=~/lcl/' test/data/nt_example_blastn.m7
102
-
103
- Print fields where bit_score > 145
104
-
105
- blastxmlparser -e 'hsp.bit_score>145' test/data/nt_example_blastn.m7
124
+ ```
106
125
 
107
126
  prints a tab delimited
108
127
 
128
+ ```sh
109
129
  1 1 lcl|1_0 lcl|I_74685 1 5.82208e-34
110
130
  2 1 lcl|1_0 lcl|I_1 1 5.82208e-34
111
131
  3 2 lcl|2_0 lcl|I_2 1 6.05436e-59
112
132
  4 3 lcl|3_0 lcl|I_3 1 2.03876e-56
133
+ ```
113
134
 
114
135
  The second and third column show the BLAST iteration, and the others
115
136
  relate to the hits.
@@ -117,11 +138,20 @@ relate to the hits.
117
138
  As this is evaluated Ruby, it is also possible to use the XML element
118
139
  names directly
119
140
 
141
+ ```sh
120
142
  blastxmlparser -e 'hsp["Hsp_bit-score"].to_i>145' test/data/nt_example_blastn.m7
143
+ ```
144
+
145
+ Or the shorter
146
+
147
+ ```sh
148
+ blastxmlparser -e 'hsp.bit_score>145' test/data/nt_example_blastn.m7
149
+ ```
121
150
 
122
151
  And it is possible to print (non default) named fields where E-value < 0.001
123
152
  and hit length > 100. E.g.
124
153
 
154
+ ```sh
125
155
  blastxmlparser -n 'hsp.evalue,hsp.qseq' -e 'hsp.evalue<0.01 and hit.len>100' test/data/nt_example_blastn.m7
126
156
 
127
157
  1 5.82208e-34 AGTGAAGCTTCTAGATATTTGGCGGGTACCTCTAATTTTGCCT...
@@ -130,28 +160,104 @@ and hit length > 100. E.g.
130
160
  4 1.13373e-13 CTAAACACAGGAGCATATAGGTTGGCAGGCAGGCAAAAT
131
161
  5 2.76378e-11 GAAGAGTGTACTACCGTTTCTGTAGCTACCATATT
132
162
  etc. etc.
163
+ ```
133
164
 
134
165
  prints the evalue and qseq columns. To output FASTA use --output-fasta
135
166
 
167
+ ```sh
136
168
  blastxmlparser --output-fasta -e 'hsp.evalue<0.01 and hit.len>100' test/data/nt_example_blastn.m7
169
+ ```
137
170
 
138
171
  which prints matching sequences, where the first field is the accession, followed
139
172
  by query iteration id, and hit_id. E.g.
140
173
 
174
+ ```sh
141
175
  >I_74685 1|lcl|1_0 lcl|I_74685 [57809 - 57666] (REVERSE SENSE)
142
176
  AGTGAAGCTTCTAGATATTTGGCGGGTACCTCTAATTTTGCCTGCCTGCCAACCTATATGCTCCTGTGTTTAG
143
177
  >I_1 1|lcl|1_0 lcl|I_1 [477 - 884]
144
178
  AGTGAAGCTTCTAGATATTTGGCGGGTACCTCTAATTTTGCCTGCCTGCCAACCTATATGCTCCTGTGTTTAG
145
179
  etc. etc.
180
+ ```
181
+
182
+ ## Modify output
183
+
184
+ To have more output options blastxmlparser can use an [ERB
185
+ template](http://www.stuartellis.eu/articles/erb/) for every match. This is a
186
+ very flexible option that can output textual formats such as JSON, YAML, HTML
187
+ and RDF. Examples are provided in
188
+ [./templates](https://github.com/pjotrp/bioruby-vcf/templates/). A JSON
189
+ template could be
190
+
191
+ ```Javascript
192
+ { "<%= hit.parent.query_def %>": {
193
+ "num": <%= hit.hit_num %>,
194
+ "id": "<%= hit.hit_id %>",
195
+ "len": <%= hit.len %>,
196
+ "E-value": <%= hsp.evalue %>,
197
+ "bitscore": <%= hsp.bit_score %>,
198
+ "qseq": "<%= hsp.qseq %>",
199
+ "midline": "<%= hsp.midline %>",
200
+ "hseq": "<%= hsp.hseq %>",
201
+ };
202
+ ```
203
+
204
+ To get JSON, run it with
205
+
206
+ ```sh
207
+ blastxmlparser --template template/json.erb -e 'hsp.evalue<0.01 and hit.len>100' test/data/nt_example_blastn.m7
208
+ ```
209
+
210
+ ```Javascript
211
+ { "B0511.9d gene=WBGene00015235": {
212
+ "num": 5,
213
+ "id": "gi|268566471|ref|XP_002639731.1|",
214
+ "len": 199,
215
+ "E-value": 1.72502e-22,
216
+ "bitscore": 96.6709,
217
+ "qseq": "MSMLRRPLTQLELSVI------------------VPKCXXXXXXXXXXXXQSEPPRGITRRNLRSADRKNRDVPGPSTGECTRTSIAPNRCEMSFTEVQ-TLTSARTPVAAPTLTLSTPVNPVSSAEMLX----XXXXXXXXXXXASRSGDNDSPLLFNAYDTPQQ--GINXXXXXXXXXXXXXNAHLYAXXXXXXXXXXXXXXXXRSHRH",
218
+ "midline": "MSMLRRPLTQLEL K QSEP GI++RNLRSADR+ +DVPG ++GE + FT+ +++SARTPV+ ++ LSTPVNP SS EM+ SR + D PL+FNAYDTPQQ G + NAHLY+ RS RH",
219
+ "hseq": "MSMLRRPLTQLELCEDDIQWLSEQLAKKETGFEDEVKYEVMDVDEDEPMDQSEPTGGISKRNLRSADRRKKDVPG-TSGEGAQ-----------FTDQGLSISSARTPVSGASVNLSTPVNPSSSNEMMALPPPVRLARAGRRQRDSRVVNGDVPLMFNAYDTPQQPAGGSNGSPTPSDSPESPNAHLYSTPINPTSSSGGPSSNTRSQRH",
220
+ };
221
+ ```
222
+
223
+ Likewise, using the RDF template
224
+
225
+ ```sh
226
+ blastxmlparser --template template/rdf.erb -e 'hsp.evalue<0.01 and hit.len>100' test/data/nt_example_blastn.m7
227
+ ```
228
+
229
+ ```ruby
230
+ :Minc_Contig50_77_42056___42484_1_64492 :query :Minc_Contig50_77_42056___42484_1_64492_23
231
+ :Minc_Contig50_77_42056___42484_1_64492_23
232
+ :query_id "lcl|30_0",
233
+ :query_def "Minc_Contig50_77 [42056 - 42484] 1 64492",
234
+ :num 23,
235
+ :accession "Minc02032",
236
+ :id "lcl|Minc02032",
237
+ :len 147,
238
+ :E-value 8.1089e-12,
239
+ :identity 60,
240
+ :align_len 69,
241
+ :bitscore 69.8753,
242
+ :qseq "ATGGGAGATGGAATTGAACCGTCATGGAAAGGGCCCAAACCGAAGCACAACCGACTGTGCCACCATCCA",
243
+ :midline "|||||||||||||||||||| |||||||| | |||||||||||||||||||||||||||||||",
244
+ :hseq "ATGGGAGATGGAATTGAACCATCATGGAATG-------ACCGAAGCACAACCGACTGTGCCACCATCCA",
245
+ :evalue 8.1089e-12 .
246
+ ```
247
+
248
+ ## Additional options
146
249
 
147
250
  To use the low-mem (iterated slower) version of the parser use
148
251
 
252
+ ```sh
149
253
  blastxmlparser --parser split -n 'hsp.evalue,hsp.qseq' -e 'hsp.evalue<0.01 and hit.len>100' test/data/nt_example_blastn.m7
254
+ ```
150
255
 
151
- == API (Ruby library)
256
+ ## API (Ruby library)
152
257
 
153
258
  To loop through a BLAST result:
154
259
 
260
+ ```ruby
155
261
  >> require 'bio-blastxmlparser'
156
262
  >> fn = 'test/data/nt_example_blastn.m7'
157
263
  >> n = Bio::BlastXMLParser::XmlIterator.new(fn).to_enum
@@ -163,19 +269,23 @@ To loop through a BLAST result:
163
269
  >> end
164
270
  >> end
165
271
  >> end
272
+ ```
166
273
 
167
274
  The next example parses XML using less memory by using a Ruby
168
275
  Iterator
169
276
 
277
+ ```ruby
170
278
  >> blast = Bio::BlastXMLParser::XmlSplitterIterator.new(fn).to_enum
171
279
  >> iter = blast.next
172
280
  >> iter.iter_num
173
281
  => 1
174
282
  >> iter.query_id
175
283
  => "lcl|1_0"
284
+ ```
176
285
 
177
286
  Get the first hit
178
287
 
288
+ ```ruby
179
289
  >> hit = iter.hits.first
180
290
  >> hit.hit_num
181
291
  => 1
@@ -187,14 +297,18 @@ Get the first hit
187
297
  => "I_74685"
188
298
  >> hit.len
189
299
  => 144
300
+ ```
190
301
 
191
302
  Get the parent info
192
303
 
304
+ ```ruby
193
305
  >> hit.parent.query_id
194
306
  => "lcl|1_0"
195
-
307
+ ```
308
+
196
309
  Get the first Hsp
197
310
 
311
+ ```ruby
198
312
  >> hsp = hit.hsps.first
199
313
  >> hsp.hsp_num
200
314
  => 1
@@ -224,6 +338,7 @@ Get the first Hsp
224
338
  => "AGTGAAGCTTCTAGATATTTGGCGGGTACCTCTAATTTTGCCTGCCTGCCAACCTATATGCTCCTGTGTTTAG"
225
339
  >> hsp.midline
226
340
  => "|||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||"
341
+ ```
227
342
 
228
343
  Unlike BioRuby, this module uses the actual element names in the XML
229
344
  definition, to avoid confusion (if anyone wants a translation,
@@ -232,30 +347,36 @@ feel free to contribute an adaptor).
232
347
  It is also possible to use the XML element names as Strings, rather
233
348
  than methods. E.g.
234
349
 
350
+ ```ruby
235
351
  >> hsp.field("Hsp_bit-score")
236
352
  => "145.205"
237
353
  >> hsp["Hsp_bit-score"]
238
354
  => "145.205"
355
+ ```
239
356
 
240
357
  Note that, when using the element names, the results are always String values.
241
358
 
242
359
  Fetch the next result (Iteration)
243
360
 
361
+ ```ruby
244
362
  >> iter2 = blast.next
245
363
  >> iter2.iter_num
246
364
  >> 2
247
365
  >> iter2.query_id
248
366
  => "lcl|2_0"
367
+ ```
249
368
 
250
369
  etc. etc.
251
370
 
252
371
  For more examples see the files in ./spec
253
372
 
254
- == URL
373
+ ## URL
255
374
 
256
375
  The project lives at http://github.com/pjotrp/blastxmlparser. If you use this software, please cite http://dx.doi.org/10.1093/bioinformatics/btq475
257
376
 
258
- == Copyright
377
+ blastxmlparser is listed at http://biogems.info
378
+
379
+ ## Copyright
259
380
 
260
- Copyright (c) 2011,2012 Pjotr Prins under the MIT licence. See LICENSE.txt and http://www.opensource.org/licenses/mit-license.html for further details.
381
+ Copyright (c) 2011-2014 Pjotr Prins under the MIT licence. See LICENSE.txt and http://www.opensource.org/licenses/mit-license.html for further details.
261
382
 
data/Rakefile CHANGED
@@ -15,14 +15,10 @@ Jeweler::Tasks.new do |gem|
15
15
  gem.name = "bio-blastxmlparser"
16
16
  gem.homepage = "http://github.com/pjotrp/blastxmlparser"
17
17
  gem.license = "MIT"
18
- gem.summary = %Q{Very fast BLAST XML parser and library for big data}
19
- gem.description = %Q{Fast big data BLAST XML parser and library; this libxml2 based version is 50x faster than BioRuby}
18
+ gem.summary = %Q{Very fast BLAST XML to RDF/HTML/JSON/YAML/csv transformer}
19
+ gem.description = %Q{Fast big data BLAST XML parser and library; this libxml2 based version is 50x faster than BioRuby and comes with a nice CLI}
20
20
  gem.email = "pjotr.public01@thebird.nl"
21
21
  gem.authors = ["Pjotr Prins"]
22
- # Include your dependencies below. Runtime dependencies are required when using your gem,
23
- # and development dependencies are only needed for development (ie running rake tasks, tests, etc)
24
- # gem.add_runtime_dependency 'jabber4r', '> 0.1'
25
- # gem.add_development_dependency 'rspec', '> 1.2.3'
26
22
  end
27
23
  Jeweler::RubygemsDotOrgTasks.new
28
24
 
@@ -32,17 +28,11 @@ RSpec::Core::RakeTask.new(:spec) do |spec|
32
28
  spec.pattern = FileList['spec/**/*_spec.rb']
33
29
  end
34
30
 
35
- # RSpec::Core::RakeTask.new(:rcov) do |spec|
36
- # spec.pattern = 'spec/**/*_spec.rb'
37
- # spec.rcov = true
38
- # end
39
-
40
31
  task :default => [ :test, :spec ]
41
32
 
42
33
  require 'rake/testtask'
43
34
  Rake::TestTask.new(:test) do |test|
44
35
  test.libs << 'lib' << 'test'
45
- # test.pattern = 'test/**/test_*.rb' # breaks in 1.9.3
46
36
  test.test_files = Dir.glob("test/**/test_*.rb")
47
37
  test.verbose = true
48
38
  end
data/VERSION CHANGED
@@ -1 +1 @@
1
- 1.1.1
1
+ 1.1.2
data/bin/blastxmlparser CHANGED
@@ -4,83 +4,21 @@
4
4
  # Author:: Pjotr Prins
5
5
  # License:: MIT License
6
6
  #
7
- # Copyright (C) 2010-2013 Pjotr Prins <pjotr.prins@thebird.nl>
7
+ # Copyright (C) 2010-2014 Pjotr Prins <pjotr.prins@thebird.nl>
8
8
 
9
9
  rootpath = File.dirname(File.dirname(__FILE__))
10
10
  $: << File.join(rootpath,'lib')
11
11
 
12
12
  BLASTXML_VERSION = File.new(File.join(rootpath,'VERSION')).read.chomp
13
13
 
14
- $stderr.print "BioRuby BLAST XML Parser "+BLASTXML_VERSION+" Copyright (C) 2011 Pjotr Prins <pjotr.prins@thebird.nl>\n\n"
14
+ $stderr.print "BLAST XML Parser "+BLASTXML_VERSION+" Copyright (C) 2014 Pjotr Prins <pjotr.prins@thebird.nl>\n\n"
15
15
 
16
16
  USAGE = <<EOM
17
17
 
18
- bioblastxmlparser filename(s)
18
+ blastxmlparser filename(s)
19
19
 
20
20
  Use --help switch for more information
21
21
 
22
- == Examples
23
-
24
- Print result fields of iterations containing 'lcl', using a regex
25
-
26
- blastxmlparser -e 'iter.query_id=~/lcl/' test/data/nt_example_blastn.m7
27
-
28
- Print fields where bit_score > 145
29
-
30
- blastxmlparser -e 'hsp.bit_score>145' test/data/nt_example_blastn.m7
31
-
32
- prints a tab delimited
33
-
34
- 1 1 lcl|1_0 lcl|I_74685 1 5.82208e-34
35
- 2 1 lcl|1_0 lcl|I_1 1 5.82208e-34
36
- 3 2 lcl|2_0 lcl|I_2 1 6.05436e-59
37
- 4 3 lcl|3_0 lcl|I_3 1 2.03876e-56
38
-
39
- The second and third column show the BLAST iteration, and the others
40
- relate to the hits.
41
-
42
- As this is evaluated Ruby, it is also possible to use the XML element
43
- names directly
44
-
45
- blastxmlparser -e 'hsp["Hsp_bit-score"].to_i>145' test/data/nt_example_blastn.m7
46
-
47
- And it is possible to print (non default) named fields where E-value < 0.001
48
- and hit length > 100. E.g.
49
-
50
- blastxmlparser -n 'hsp.evalue,hsp.qseq' -e 'hsp.evalue<0.01 and hit.len>100' test/data/nt_example_blastn.m7
51
-
52
- 1 5.82208e-34 AGTGAAGCTTCTAGATATTTGGCGGGTACCTCTAATTTTGCCT...
53
- 2 5.82208e-34 AGTGAAGCTTCTAGATATTTGGCGGGTACCTCTAATTTTGCCT...
54
- 3 2.76378e-11 AATATGGTAGCTACAGAAACGGTAGTACACTCTTC
55
- 4 1.13373e-13 CTAAACACAGGAGCATATAGGTTGGCAGGCAGGCAAAAT
56
- 5 2.76378e-11 GAAGAGTGTACTACCGTTTCTGTAGCTACCATATT
57
- etc. etc.
58
-
59
- prints the evalue and qseq columns. To output FASTA use --output-fasta
60
-
61
- blastxmlparser --output-fasta -e 'hsp.evalue<0.01 and hit.len>100' test/data/nt_example_blastn.m7
62
-
63
- which prints matching sequences, where the first field is the accession, followed
64
- by query iteration id, and hit_id. E.g.
65
-
66
- >I_74685 1|lcl|1_0 lcl|I_74685 [57809 - 57666] (REVERSE SENSE)
67
- AGTGAAGCTTCTAGATATTTGGCGGGTACCTCTAATTTTGCCTGCCTGCCAACCTATATGCTCCTGTGTTTAG
68
- >I_1 1|lcl|1_0 lcl|I_1 [477 - 884]
69
- AGTGAAGCTTCTAGATATTTGGCGGGTACCTCTAATTTTGCCTGCCTGCCAACCTATATGCTCCTGTGTTTAG
70
- etc. etc.
71
-
72
- To use the low-mem (iterated slower) version of the parser use
73
-
74
- blastxmlparser --parser split -n 'hsp.evalue,hsp.qseq' -e 'hsp.evalue<0.01 and hit.len>100' test/data/nt_example_blastn.m7
75
-
76
- == URL
77
-
78
- The project lives at http://github.com/pjotrp/blastxmlparser. If you use this software, please cite http://dx.doi.org/10.1093/bioinformatics/btq475
79
-
80
- == Copyright
81
-
82
- Copyright (c) 2011 Pjotr Prins under the MIT licence. See LICENSE.txt and http://www.opensource.org/licenses/mit-license.html for further details.
83
-
84
22
  EOM
85
23
 
86
24
  if ARGV.size == 0
@@ -114,16 +52,22 @@ opts = OptionParser.new do |o|
114
52
  options.parser = p.to_sym
115
53
  end
116
54
 
117
- o.on("--output-fasta","Output FASTA") do |b|
118
- options.output_fasta = true
55
+ o.on("-e filter","--exec filter",String, "Evaluate filter") do |s|
56
+ options.exec = s
119
57
  end
120
-
121
- o.on("-n fields","--named fields",String, "Set named fields") do |s|
58
+
59
+ o.separator ""
60
+
61
+ o.on("-n fields","--named fields",String, "Print named fields") do |s|
122
62
  options.fields = s.split(/,/)
123
63
  end
64
+ o.on("--output-fasta","Output FASTA") do |b|
65
+ options.output_fasta = true
66
+ end
124
67
 
125
- o.on("-e filter","--exec filter",String, "Execute filter") do |s|
126
- options.exec = s
68
+ o.on("-t erb","--template erb",String, "Use ERB template for output") do |s|
69
+ require 'erb'
70
+ options.template = s
127
71
  end
128
72
 
129
73
  o.separator ""
@@ -156,6 +100,13 @@ begin
156
100
  Bio::Log::CLI.configure('bio-blastxmlparser')
157
101
  logger = Bio::Log::LoggerPlus['bio-blastxmlparser']
158
102
 
103
+ if options[:template]
104
+ include BioRdf
105
+ fn = options.template
106
+ raise "No template #{fn}!" if not File.exist?(fn)
107
+ template = ERB.new(File.read(fn))
108
+ end
109
+
159
110
  ARGV.each do | fn |
160
111
  logger.info("XML parsing #{fn}")
161
112
  n = if options.parser == :split
@@ -173,10 +124,13 @@ begin
173
124
  true
174
125
  end
175
126
  if do_print
176
- if options.output_fasta
127
+ if template
128
+ print template.result(binding)
129
+ elsif options.output_fasta
177
130
  print ">"+hit.accession+' '+iter.iter_num.to_s+'|'+iter.query_id+' '+hit.hit_id+' '+hit.hit_def+"\n"
178
131
  print hsp.qseq+"\n"
179
132
  else
133
+ # Default output
180
134
  if options.fields
181
135
  print i,"\t"
182
136
  options.fields.each do | f |
@@ -5,17 +5,17 @@
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = "bio-blastxmlparser"
8
- s.version = "1.1.1"
8
+ s.version = "1.1.2"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = ["Pjotr Prins"]
12
- s.date = "2013-02-07"
13
- s.description = "Fast big data BLAST XML parser and library; this libxml2 based version is 50x faster than BioRuby"
12
+ s.date = "2014-09-02"
13
+ s.description = "Fast big data BLAST XML parser and library; this libxml2 based version is 50x faster than BioRuby and comes with a nice CLI"
14
14
  s.email = "pjotr.public01@thebird.nl"
15
15
  s.executables = ["blastxmlparser"]
16
16
  s.extra_rdoc_files = [
17
17
  "LICENSE.txt",
18
- "README.rdoc"
18
+ "README.md"
19
19
  ]
20
20
  s.files = [
21
21
  ".document",
@@ -24,7 +24,7 @@ Gem::Specification.new do |s|
24
24
  "Gemfile",
25
25
  "Gemfile.lock",
26
26
  "LICENSE.txt",
27
- "README.rdoc",
27
+ "README.md",
28
28
  "Rakefile",
29
29
  "VERSION",
30
30
  "bin/blastxmlparser",
@@ -33,6 +33,7 @@ Gem::Specification.new do |s|
33
33
  "lib/bio/db/blast/parser/nokogiri.rb",
34
34
  "lib/bio/db/blast/xmliterator.rb",
35
35
  "lib/bio/db/blast/xmlsplitter.rb",
36
+ "lib/bio/writers/rdf.rb",
36
37
  "sample/bioruby.rb",
37
38
  "sample/blastxmlparserdemo.rb",
38
39
  "sample/libxml_sax.rb",
@@ -41,6 +42,8 @@ Gem::Specification.new do |s|
41
42
  "sample/nokogiri_split_dom.rb",
42
43
  "spec/bio-blastxmlparser_spec.rb",
43
44
  "spec/spec_helper.rb",
45
+ "template/json.erb",
46
+ "template/rdf.erb",
44
47
  "test/data/aa_example.fasta",
45
48
  "test/data/aa_example_blastp.m7",
46
49
  "test/data/nt_example.fasta",
@@ -50,37 +53,37 @@ Gem::Specification.new do |s|
50
53
  s.homepage = "http://github.com/pjotrp/blastxmlparser"
51
54
  s.licenses = ["MIT"]
52
55
  s.require_paths = ["lib"]
53
- s.rubygems_version = "1.8.10"
54
- s.summary = "Very fast BLAST XML parser and library for big data"
56
+ s.rubygems_version = "2.0.3"
57
+ s.summary = "Very fast BLAST XML to RDF/HTML/JSON/YAML/csv transformer"
55
58
 
56
59
  if s.respond_to? :specification_version then
57
- s.specification_version = 3
60
+ s.specification_version = 4
58
61
 
59
62
  if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
60
- s.add_runtime_dependency(%q<bio-logger>, [">= 1.0.0"])
61
- s.add_runtime_dependency(%q<nokogiri>, [">= 1.5.0"])
62
- s.add_development_dependency(%q<rake>, [">= 0.9.2.2"])
63
+ s.add_runtime_dependency(%q<bio-logger>, [">= 0"])
64
+ s.add_runtime_dependency(%q<nokogiri>, ["~> 1.6.0"])
65
+ s.add_development_dependency(%q<rake>, [">= 0"])
63
66
  s.add_development_dependency(%q<bundler>, [">= 0"])
64
- s.add_development_dependency(%q<jeweler>, ["~> 1.8.4"])
65
- s.add_development_dependency(%q<rspec>, [">= 2.3.0"])
66
- s.add_development_dependency(%q<rdoc>, [">= 2.4.2"])
67
+ s.add_development_dependency(%q<jeweler>, ["~> 2.0.1"])
68
+ s.add_development_dependency(%q<rspec>, [">= 0"])
69
+ s.add_development_dependency(%q<rdoc>, [">= 0"])
67
70
  else
68
- s.add_dependency(%q<bio-logger>, [">= 1.0.0"])
69
- s.add_dependency(%q<nokogiri>, [">= 1.5.0"])
70
- s.add_dependency(%q<rake>, [">= 0.9.2.2"])
71
+ s.add_dependency(%q<bio-logger>, [">= 0"])
72
+ s.add_dependency(%q<nokogiri>, ["~> 1.6.0"])
73
+ s.add_dependency(%q<rake>, [">= 0"])
71
74
  s.add_dependency(%q<bundler>, [">= 0"])
72
- s.add_dependency(%q<jeweler>, ["~> 1.8.4"])
73
- s.add_dependency(%q<rspec>, [">= 2.3.0"])
74
- s.add_dependency(%q<rdoc>, [">= 2.4.2"])
75
+ s.add_dependency(%q<jeweler>, ["~> 2.0.1"])
76
+ s.add_dependency(%q<rspec>, [">= 0"])
77
+ s.add_dependency(%q<rdoc>, [">= 0"])
75
78
  end
76
79
  else
77
- s.add_dependency(%q<bio-logger>, [">= 1.0.0"])
78
- s.add_dependency(%q<nokogiri>, [">= 1.5.0"])
79
- s.add_dependency(%q<rake>, [">= 0.9.2.2"])
80
+ s.add_dependency(%q<bio-logger>, [">= 0"])
81
+ s.add_dependency(%q<nokogiri>, ["~> 1.6.0"])
82
+ s.add_dependency(%q<rake>, [">= 0"])
80
83
  s.add_dependency(%q<bundler>, [">= 0"])
81
- s.add_dependency(%q<jeweler>, ["~> 1.8.4"])
82
- s.add_dependency(%q<rspec>, [">= 2.3.0"])
83
- s.add_dependency(%q<rdoc>, [">= 2.4.2"])
84
+ s.add_dependency(%q<jeweler>, ["~> 2.0.1"])
85
+ s.add_dependency(%q<rspec>, [">= 0"])
86
+ s.add_dependency(%q<rdoc>, [">= 0"])
84
87
  end
85
88
  end
86
89
 
@@ -17,3 +17,4 @@ Bio::Log::LoggerPlus.new('bio-blastxmlparser')
17
17
  require 'bio/db/blast/parser/nokogiri'
18
18
  require 'bio/db/blast/xmlsplitter'
19
19
  require 'bio/db/blast/xmliterator'
20
+ require 'bio/writers/rdf'
@@ -0,0 +1,79 @@
1
+ # RDF support module. Original is part of bioruby-rdf by Pjotr Prins
2
+ #
3
+ module BioRdf
4
+
5
+ module RDF
6
+
7
+ def RDF::valid_uri? uri
8
+ uri =~ /^([!#$&-;=?_a-z~]|%[0-9a-f]{2})+$/i
9
+ end
10
+
11
+ def RDF::escape_string_literal(literal)
12
+ s = literal.to_s
13
+ # Put a slash before every double quote if there is no such slash already
14
+ s = s.gsub(/(?<!\\)"/,'\"')
15
+ # Put a slash before a single slash if it is not \["utnr>\]
16
+ if s =~ /[^\\]\\[^\\]/
17
+ s2 = []
18
+ s.each_char.with_index { |c,i|
19
+ res = c
20
+ if i>0 and c == '\\' and s[i-1] != '\\' and s[i+1] !~ /^[uUtnr\\"]/
21
+ res = '\\' + c
22
+ end
23
+ # p [i,c,s[i+1],res]
24
+ s2 << res
25
+ }
26
+ s = s2.join('')
27
+ end
28
+ s
29
+ end
30
+
31
+ def RDF::stringify_literal(literal)
32
+ RDF::escape_string_literal(literal.to_s)
33
+ end
34
+
35
+ def RDF::quoted_stringify_literal(literal)
36
+ '"' + stringify_literal(literal) + '"'
37
+ end
38
+ end
39
+
40
+ module Turtle
41
+
42
+ def Turtle::stringify_literal(literal)
43
+ RDF::stringify_literal(literal)
44
+ end
45
+
46
+ def Turtle::identifier(id)
47
+ raise "Illegal identifier #{id}" if id != Turtle::mangle_identifier(id)
48
+ end
49
+
50
+ # Replace letters/symbols that are not allowed in a Turtle identifier
51
+ # (short hand URI). This should be the definite mangler and replace the
52
+ # ones in bioruby-table and bio-exominer. Manglers are useful when using
53
+ # data from other sources and trying to transform them into simple RDF
54
+ # identifiers.
55
+
56
+ def Turtle::mangle_identifier(s)
57
+ id = s.strip.gsub(/[^[:print:]]/, '').gsub(/[#)(,]/,"").gsub(/[%]/,"perc").gsub(/(\s|\.|\$|\/|\\|\>)+/,"_")
58
+ id = id.gsub(/\[|\]/,'')
59
+ # id = URI::escape(id)
60
+ id = id.gsub(/\|/,'_')
61
+ id = id.gsub(/\-|:/,'_')
62
+ if id != s
63
+ # Don't want Bio depency in templates!
64
+ # logger = Bio::Log::LoggerPlus.new 'bio-rdf'
65
+ # logger.warn "\nWARNING: Changed identifier <#{s}> to <#{id}>"
66
+ $stderr.print "\nWARNING: Changed identifier <#{s}> to <#{id}>"
67
+ end
68
+ if not RDF::valid_uri?(id)
69
+ raise "Invalid URI after mangling <#{s}> to <#{id}>!"
70
+ end
71
+ valid_id = if id =~ /^\d/
72
+ 'r' + id
73
+ else
74
+ id
75
+ end
76
+ valid_id # we certainly hope so!
77
+ end
78
+ end
79
+ end
data/template/json.erb ADDED
@@ -0,0 +1,15 @@
1
+ { "<%= hit.parent.query_def %>": {
2
+ "query_id": "<%= hit.parent.query_id %>",
3
+ "num": <%= hit.hit_num %>,
4
+ "accession": "<%= hit.accession %>",
5
+ "id": "<%= hit.hit_id %>",
6
+ "len": <%= hit.len %>,
7
+ "E-value": <%= hsp.evalue %>,
8
+ "identity": <%= hsp.identity %>,
9
+ "align_len": <%= hsp.align_len %>,
10
+ "bitscore": <%= hsp.bit_score %>,
11
+ "qseq": "<%= hsp.qseq %>",
12
+ "midline": "<%= hsp.midline %>",
13
+ "hseq": "<%= hsp.hseq %>",
14
+ };
15
+
data/template/rdf.erb ADDED
@@ -0,0 +1,21 @@
1
+ <%
2
+ blastid = Turtle::mangle_identifier(hit.parent.query_def)
3
+ id = blastid+'_'+hit.hit_num.to_s
4
+ %>
5
+ :<%= blastid %> :query :<%= id %>
6
+ :<%= id %>
7
+ :query_id "<%= hit.parent.query_id %>",
8
+ :query_def "<%= hit.parent.query_def %>",
9
+ :num <%= hit.hit_num %>,
10
+ :accession "<%= hit.accession %>",
11
+ :id "<%= hit.hit_id %>",
12
+ :len <%= hit.len %>,
13
+ :E-value <%= hsp.evalue %>,
14
+ :identity <%= hsp.identity %>,
15
+ :align_len <%= hsp.align_len %>,
16
+ :bitscore <%= hsp.bit_score %>,
17
+ :qseq "<%= hsp.qseq %>",
18
+ :midline "<%= hsp.midline %>",
19
+ :hseq "<%= hsp.hseq %>",
20
+ :evalue <%= hsp.evalue %> .
21
+
metadata CHANGED
@@ -1,110 +1,130 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: bio-blastxmlparser
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.1.1
5
- prerelease:
4
+ version: 1.1.2
6
5
  platform: ruby
7
6
  authors:
8
7
  - Pjotr Prins
9
8
  autorequire:
10
9
  bindir: bin
11
10
  cert_chain: []
12
- date: 2013-02-07 00:00:00.000000000Z
11
+ date: 2014-09-02 00:00:00.000000000 Z
13
12
  dependencies:
14
13
  - !ruby/object:Gem::Dependency
15
14
  name: bio-logger
16
- requirement: &24214160 !ruby/object:Gem::Requirement
17
- none: false
15
+ requirement: !ruby/object:Gem::Requirement
18
16
  requirements:
19
- - - ! '>='
17
+ - - ">="
20
18
  - !ruby/object:Gem::Version
21
- version: 1.0.0
19
+ version: '0'
22
20
  type: :runtime
23
21
  prerelease: false
24
- version_requirements: *24214160
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ">="
25
+ - !ruby/object:Gem::Version
26
+ version: '0'
25
27
  - !ruby/object:Gem::Dependency
26
28
  name: nokogiri
27
- requirement: &24213120 !ruby/object:Gem::Requirement
28
- none: false
29
+ requirement: !ruby/object:Gem::Requirement
29
30
  requirements:
30
- - - ! '>='
31
+ - - "~>"
31
32
  - !ruby/object:Gem::Version
32
- version: 1.5.0
33
+ version: 1.6.0
33
34
  type: :runtime
34
35
  prerelease: false
35
- version_requirements: *24213120
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: 1.6.0
36
41
  - !ruby/object:Gem::Dependency
37
42
  name: rake
38
- requirement: &24212220 !ruby/object:Gem::Requirement
39
- none: false
43
+ requirement: !ruby/object:Gem::Requirement
40
44
  requirements:
41
- - - ! '>='
45
+ - - ">="
42
46
  - !ruby/object:Gem::Version
43
- version: 0.9.2.2
47
+ version: '0'
44
48
  type: :development
45
49
  prerelease: false
46
- version_requirements: *24212220
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ">="
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
47
55
  - !ruby/object:Gem::Dependency
48
56
  name: bundler
49
- requirement: &24211440 !ruby/object:Gem::Requirement
50
- none: false
57
+ requirement: !ruby/object:Gem::Requirement
51
58
  requirements:
52
- - - ! '>='
59
+ - - ">="
53
60
  - !ruby/object:Gem::Version
54
61
  version: '0'
55
62
  type: :development
56
63
  prerelease: false
57
- version_requirements: *24211440
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - ">="
67
+ - !ruby/object:Gem::Version
68
+ version: '0'
58
69
  - !ruby/object:Gem::Dependency
59
70
  name: jeweler
60
- requirement: &24174660 !ruby/object:Gem::Requirement
61
- none: false
71
+ requirement: !ruby/object:Gem::Requirement
62
72
  requirements:
63
- - - ~>
73
+ - - "~>"
64
74
  - !ruby/object:Gem::Version
65
- version: 1.8.4
75
+ version: 2.0.1
66
76
  type: :development
67
77
  prerelease: false
68
- version_requirements: *24174660
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - "~>"
81
+ - !ruby/object:Gem::Version
82
+ version: 2.0.1
69
83
  - !ruby/object:Gem::Dependency
70
84
  name: rspec
71
- requirement: &24173840 !ruby/object:Gem::Requirement
72
- none: false
85
+ requirement: !ruby/object:Gem::Requirement
73
86
  requirements:
74
- - - ! '>='
87
+ - - ">="
75
88
  - !ruby/object:Gem::Version
76
- version: 2.3.0
89
+ version: '0'
77
90
  type: :development
78
91
  prerelease: false
79
- version_requirements: *24173840
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - ">="
95
+ - !ruby/object:Gem::Version
96
+ version: '0'
80
97
  - !ruby/object:Gem::Dependency
81
98
  name: rdoc
82
- requirement: &24173100 !ruby/object:Gem::Requirement
83
- none: false
99
+ requirement: !ruby/object:Gem::Requirement
84
100
  requirements:
85
- - - ! '>='
101
+ - - ">="
86
102
  - !ruby/object:Gem::Version
87
- version: 2.4.2
103
+ version: '0'
88
104
  type: :development
89
105
  prerelease: false
90
- version_requirements: *24173100
106
+ version_requirements: !ruby/object:Gem::Requirement
107
+ requirements:
108
+ - - ">="
109
+ - !ruby/object:Gem::Version
110
+ version: '0'
91
111
  description: Fast big data BLAST XML parser and library; this libxml2 based version
92
- is 50x faster than BioRuby
112
+ is 50x faster than BioRuby and comes with a nice CLI
93
113
  email: pjotr.public01@thebird.nl
94
114
  executables:
95
115
  - blastxmlparser
96
116
  extensions: []
97
117
  extra_rdoc_files:
98
118
  - LICENSE.txt
99
- - README.rdoc
119
+ - README.md
100
120
  files:
101
- - .document
102
- - .rspec
103
- - .travis.yml
121
+ - ".document"
122
+ - ".rspec"
123
+ - ".travis.yml"
104
124
  - Gemfile
105
125
  - Gemfile.lock
106
126
  - LICENSE.txt
107
- - README.rdoc
127
+ - README.md
108
128
  - Rakefile
109
129
  - VERSION
110
130
  - bin/blastxmlparser
@@ -113,6 +133,7 @@ files:
113
133
  - lib/bio/db/blast/parser/nokogiri.rb
114
134
  - lib/bio/db/blast/xmliterator.rb
115
135
  - lib/bio/db/blast/xmlsplitter.rb
136
+ - lib/bio/writers/rdf.rb
116
137
  - sample/bioruby.rb
117
138
  - sample/blastxmlparserdemo.rb
118
139
  - sample/libxml_sax.rb
@@ -121,6 +142,8 @@ files:
121
142
  - sample/nokogiri_split_dom.rb
122
143
  - spec/bio-blastxmlparser_spec.rb
123
144
  - spec/spec_helper.rb
145
+ - template/json.erb
146
+ - template/rdf.erb
124
147
  - test/data/aa_example.fasta
125
148
  - test/data/aa_example_blastp.m7
126
149
  - test/data/nt_example.fasta
@@ -129,29 +152,25 @@ files:
129
152
  homepage: http://github.com/pjotrp/blastxmlparser
130
153
  licenses:
131
154
  - MIT
155
+ metadata: {}
132
156
  post_install_message:
133
157
  rdoc_options: []
134
158
  require_paths:
135
159
  - lib
136
160
  required_ruby_version: !ruby/object:Gem::Requirement
137
- none: false
138
161
  requirements:
139
- - - ! '>='
162
+ - - ">="
140
163
  - !ruby/object:Gem::Version
141
164
  version: '0'
142
- segments:
143
- - 0
144
- hash: -3287387609254152406
145
165
  required_rubygems_version: !ruby/object:Gem::Requirement
146
- none: false
147
166
  requirements:
148
- - - ! '>='
167
+ - - ">="
149
168
  - !ruby/object:Gem::Version
150
169
  version: '0'
151
170
  requirements: []
152
171
  rubyforge_project:
153
- rubygems_version: 1.8.10
172
+ rubygems_version: 2.0.3
154
173
  signing_key:
155
- specification_version: 3
156
- summary: Very fast BLAST XML parser and library for big data
174
+ specification_version: 4
175
+ summary: Very fast BLAST XML to RDF/HTML/JSON/YAML/csv transformer
157
176
  test_files: []