bio-blastxmlparser 2.0.1 → 2.0.2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 7222df89b2f60ef4b027ea7ca766a30c04de567b
4
- data.tar.gz: b8d7c84c85dd58e7794a62b83a73b17a04b60ce1
3
+ metadata.gz: 76df7cd1f6e1bc4f2b04fdf3f0fc830110f2e376
4
+ data.tar.gz: 68f44a797aa5357690e6c4a10a1fe241a7b8fe37
5
5
  SHA512:
6
- metadata.gz: e9feee95e3063b0c6c9e9ac28c0f7389e4036130e51c107314216fe8e30f98342d2fbc5f1af0ef16f9c5a11be95aa97d86d16f5d9e2169eda2a54d2594c0dc84
7
- data.tar.gz: 63971bd220b178e7ff0dbd7c50a4df6277b9dc8035f610f173603e2e304655610e65894fb3bde7e67c17963d13557b22c3c1d4a3e6dbadbf65c7f170ddbd12f5
6
+ metadata.gz: f424b8cfedf921840dbf2fee412c191f1c9951d289b80db047744a56883b171b50d4b1161e86415f0e873be4bd43935031f966d32d394efea9208e65b2529903
7
+ data.tar.gz: 6214606ac08afa7306503c78e969046ab2a54d9c45e9dc42c597d8c033db2095fc5a25f15f56090a0337bd1049081699a81ceefa90d2acaf17cb782340074fc6
data/README.md CHANGED
@@ -107,7 +107,7 @@ Print result fields of iterations containing 'lcl', using a regex
107
107
  blastxmlparser --filter 'iter.query_id=~/lcl/' test/data/nt_example_blastn.m7
108
108
  ```
109
109
 
110
- prints a tab delimited
110
+ prints a (default) tab delimited to stdout
111
111
 
112
112
  ```sh
113
113
  1 1 lcl|1_0 lcl|I_74685 1 5.82208e-34
@@ -228,6 +228,59 @@ Likewise, using the RDF template
228
228
  :evalue 8.1089e-12 .
229
229
  ```
230
230
 
231
+ ### Metadata
232
+
233
+ Templates can also print data as a header of the JSON/YAML/RDF output. For this
234
+ use the '=' prefix with HEADER, BODY, FOOTER keywords in the template. A small example
235
+ can be
236
+
237
+ ```Javascript
238
+ =HEADER
239
+ <% require 'json' %>
240
+ [
241
+ { "HEADER": {
242
+ "options": <%= options.to_h.to_json %>,
243
+ "files": <%= ARGV %>,
244
+ "version": "<%= BLASTXML_VERSION %>"
245
+ },
246
+ =BODY
247
+ { "<%= hit.parent.query_def %>": {
248
+ "num": <%= hit.hit_num %>,
249
+ "id": "<%= hit.hit_id %>",
250
+ "len": <%= hit.len %>,
251
+ "E-value": <%= hsp.evalue %>,
252
+ },
253
+ =FOOTER
254
+ ]
255
+ ```
256
+
257
+ may generate something like
258
+
259
+ ```Javascript
260
+ [
261
+ { "HEADER": {
262
+ "options": {"template":"template/blast2json2.erb","filter":"hsp.evalue>0.01"},
263
+ "files": ["test/data/nt_example_blastn.m7"],
264
+ "version": "2.0.2-pre1"
265
+ },
266
+ { "I_1 [477 - 884] ": {
267
+ "num": 41,
268
+ "id": "lcl|X_42251",
269
+ "len": 153,
270
+ "E-value": 0.0247015,
271
+ },
272
+ { "I_1 [477 - 884] ": {
273
+ "num": 43,
274
+ "id": "lcl|V_105720",
275
+ "len": 180,
276
+ "E-value": 0.0247015,
277
+ },
278
+ ]
279
+ ```
280
+
281
+ Note that the template is not smart enough to remove the final comma
282
+ from the last BODY element. To make it valid JSON that needs to be removed.
283
+
231
284
  ## Additional options
232
285
 
233
286
  To use the high-mem version of the parser (slightly faster on single core) use
data/VERSION CHANGED
@@ -1 +1 @@
1
- 2.0.1
1
+ 2.0.2
@@ -1,6 +1,6 @@
1
1
  #!/usr/bin/env ruby
2
2
  #
3
- # BioRuby bio-blastxmlparser Plugin
3
+ # blastxmlparser
4
4
  # Author:: Pjotr Prins
5
5
  # License:: MIT License
6
6
  #
@@ -75,7 +75,6 @@ opts = OptionParser.new do |o|
75
75
  end
76
76
 
77
77
  o.on("-t erb","--template erb",String, "Use ERB template for output") do |s|
78
- require 'erb'
79
78
  options.template = s
80
79
  end
81
80
 
@@ -91,6 +90,7 @@ opts = OptionParser.new do |o|
91
90
 
92
91
  o.on("-q", "--quiet", "Run quietly") do |q|
93
92
  Bio::Log::CLI.trace('error')
93
+ options.quiet = true
94
94
  end
95
95
 
96
96
  o.on("-v", "--verbose", "Run verbosely") do |v|
@@ -118,13 +118,18 @@ begin
118
118
  end
119
119
  end
120
120
 
121
+ # Prepare the ERB template
121
122
  if options.template
122
123
  include BioRdf
123
- fn = options.template
124
- raise "No template #{fn}!" if not File.exist?(fn)
125
- template = ERB.new(File.read(fn))
124
+ require 'bio/writers/template'
125
+ template = Bio::Template.new(options.template)
126
126
  end
127
127
 
128
+ $stderr.print options,"\n" if not options.quiet
129
+
130
+ raise "No input file(s) defined" if ARGV.size == 0
131
+
132
+
128
133
  ARGV.each do | fn |
129
134
  logger.info("XML parsing #{fn}")
130
135
  parser_type = options.parser
@@ -145,6 +150,7 @@ begin
145
150
  chunks = []
146
151
  chunks_count = 0
147
152
  NUM_CHUNKS=10_000
153
+ print template.header(binding) if template
148
154
 
149
155
  process = lambda { |iter2,i| # Process one BLAST iter block
150
156
  if parser_type == :nosplit
@@ -167,7 +173,7 @@ begin
167
173
  if do_print
168
174
  line_count += 1
169
175
  if template
170
- res << template.result(binding)
176
+ res << template.body(binding)
171
177
  elsif options.output_fasta
172
178
  res << ">"+hit.accession+' '+iter.iter_num.to_s+'|'+iter.query_id+' '+hit.hit_id+' '+hit.hit_def+"\n"
173
179
  res << hsp.qseq+"\n"
@@ -221,6 +227,7 @@ begin
221
227
  process.call(iter,i)
222
228
  }
223
229
  end
230
+ print template.footer(binding) if template
224
231
  end
225
232
  rescue OptionParser::InvalidOption => e
226
233
  $stderr.print e.message
@@ -5,11 +5,11 @@
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = "bio-blastxmlparser"
8
- s.version = "2.0.1"
8
+ s.version = "2.0.2"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = ["Pjotr Prins"]
12
- s.date = "2014-09-07"
12
+ s.date = "2014-11-07"
13
13
  s.description = "Fast big data BLAST XML parser and library; this libxml2 based version is 50x faster than BioRuby and comes with a nice CLI"
14
14
  s.email = "pjotr.public01@thebird.nl"
15
15
  s.executables = ["blastxmlparser"]
@@ -34,6 +34,7 @@ Gem::Specification.new do |s|
34
34
  "lib/bio/db/blast/xmliterator.rb",
35
35
  "lib/bio/db/blast/xmlsplitter.rb",
36
36
  "lib/bio/writers/rdf.rb",
37
+ "lib/bio/writers/template.rb",
37
38
  "sample/bioruby.rb",
38
39
  "sample/blastxmlparserdemo.rb",
39
40
  "sample/libxml_sax.rb",
@@ -43,6 +44,7 @@ Gem::Specification.new do |s|
43
44
  "spec/bio-blastxmlparser_spec.rb",
44
45
  "spec/spec_helper.rb",
45
46
  "template/blast2json.erb",
47
+ "template/blast2json2.erb",
46
48
  "template/blast2rdf-minimal.erb",
47
49
  "template/blast2rdf.erb",
48
50
  "test/data/aa_example.fasta",
@@ -0,0 +1,75 @@
1
+ require 'erb'
2
+
3
+ module Bio
4
+
5
+ class Template
6
+
7
+ def initialize fn
8
+ raise "Can not find template #{fn}!" if not File.exist?(fn)
9
+ parse(File.read(fn))
10
+ end
11
+
12
+ def parse buf
13
+ header = []
14
+ body = []
15
+ footer = []
16
+ where = :header
17
+ buf.split("\n").each do | line |
18
+ case where
19
+ when :header
20
+ next if line =~ /=HEADER/
21
+ if line =~ /=BODY/
22
+ body = []
23
+ where = :body
24
+ next
25
+ end
26
+ header << line
27
+ when :body
28
+ if line =~ /=FOOTER/
29
+ footer = []
30
+ where = :footer
31
+ next
32
+ end
33
+ body << line
34
+ else
35
+ footer << line
36
+ end
37
+ end
38
+ if body == []
39
+ body = header
40
+ header = []
41
+ end
42
+ @erb_header = ERB.new(header.join("\n")) if header.size
43
+ @erb_body = ERB.new(body.join("\n")) if body.size
44
+ @erb_footer = ERB.new(footer.join("\n")) if footer.size
45
+ end
46
+
47
+ def result env
48
+ @erb.result(env)
49
+ end
50
+
51
+ def header env
52
+ if @erb_header
53
+ @erb_header.result(env)
54
+ else
55
+ ""
56
+ end
57
+ end
58
+
59
+ def body env
60
+ if @erb_body
61
+ @erb_body.result(env)
62
+ else
63
+ ""
64
+ end
65
+ end
66
+
67
+ def footer env
68
+ if @erb_footer
69
+ @erb_footer.result(env)
70
+ else
71
+ ""
72
+ end
73
+ end
74
+ end
75
+ end
@@ -0,0 +1,28 @@
1
+ =HEADER
2
+ <% require 'json' %>
3
+ [
4
+ { "HEADER": {
5
+ "options": <%= options.to_h.to_json %>,
6
+ "files": <%= ARGV %>,
7
+ "version": "<%= BLASTXML_VERSION %>"
8
+ },
9
+
10
+ =BODY
11
+ { "<%= hit.parent.query_def %>": {
12
+ "query_id": "<%= hit.parent.query_id %>",
13
+ "num": <%= hit.hit_num %>,
14
+ "accession": "<%= hit.accession %>",
15
+ "id": "<%= hit.hit_id %>",
16
+ "len": <%= hit.len %>,
17
+ "E-value": <%= hsp.evalue %>,
18
+ "identity": <%= hsp.identity %>,
19
+ "align_len": <%= hsp.align_len %>,
20
+ "bitscore": <%= hsp.bit_score %>,
21
+ "qseq": "<%= hsp.qseq %>",
22
+ "midline": "<%= hsp.midline %>",
23
+ "hseq": "<%= hsp.hseq %>",
24
+ },
25
+
26
+ =FOOTER
27
+ ]
28
+
@@ -4,11 +4,11 @@
4
4
  %>
5
5
  :<%= blastid %> :query :<%= id %>
6
6
  :<%= id %>
7
- :query_def "<%= hit.parent.query_def %>",
8
- :num <%= hit.hit_num %>,
9
- :accession "<%= hit.accession %>",
10
- :len <%= hit.len %>,
11
- :identity <%= hsp.identity %>,
12
- :align_len <%= hsp.align_len %>,
13
- :bitscore <%= hsp.bit_score %>,
14
- :evalue <%= hsp.evalue %> .
7
+ :query_def "<%= hit.parent.query_def %>";
8
+ :num <%= hit.hit_num %>;
9
+ :accession "<%= hit.accession %>";
10
+ :len <%= hit.len %>;
11
+ :identity <%= hsp.identity %>;
12
+ :align_len <%= hsp.align_len %>;
13
+ :bitscore <%= hsp.bit_score %>;
14
+ :evalue <%= hsp.evalue %> .
@@ -4,17 +4,17 @@
4
4
  %>
5
5
  :<%= blastid %> :query :<%= id %>
6
6
  :<%= id %>
7
- :query_id "<%= hit.parent.query_id %>",
8
- :query_def "<%= hit.parent.query_def %>",
9
- :num <%= hit.hit_num %>,
10
- :accession "<%= hit.accession %>",
11
- :id "<%= hit.hit_id %>",
12
- :len <%= hit.len %>,
13
- :identity <%= hsp.identity %>,
14
- :align_len <%= hsp.align_len %>,
15
- :bitscore <%= hsp.bit_score %>,
16
- :qseq "<%= hsp.qseq %>",
7
+ :query_id "<%= hit.parent.query_id %>";
8
+ :query_def "<%= hit.parent.query_def %>";
9
+ :num <%= hit.hit_num %>;
10
+ :accession "<%= hit.accession %>";
11
+ :id "<%= hit.hit_id %>";
12
+ :len <%= hit.len %>;
13
+ :identity <%= hsp.identity %>;
14
+ :align_len <%= hsp.align_len %>;
15
+ :bitscore <%= hsp.bit_score %>;
16
+ :qseq "<%= hsp.qseq %>";
17
17
  :midline "<%= hsp.midline %>",
18
- :hseq "<%= hsp.hseq %>",
18
+ :hseq "<%= hsp.hseq %>";
19
19
  :evalue <%= hsp.evalue %> .
20
20
 
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: bio-blastxmlparser
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.0.1
4
+ version: 2.0.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Pjotr Prins
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-09-07 00:00:00.000000000 Z
11
+ date: 2014-11-07 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bio-logger
@@ -134,6 +134,7 @@ files:
134
134
  - lib/bio/db/blast/xmliterator.rb
135
135
  - lib/bio/db/blast/xmlsplitter.rb
136
136
  - lib/bio/writers/rdf.rb
137
+ - lib/bio/writers/template.rb
137
138
  - sample/bioruby.rb
138
139
  - sample/blastxmlparserdemo.rb
139
140
  - sample/libxml_sax.rb
@@ -143,6 +144,7 @@ files:
143
144
  - spec/bio-blastxmlparser_spec.rb
144
145
  - spec/spec_helper.rb
145
146
  - template/blast2json.erb
147
+ - template/blast2json2.erb
146
148
  - template/blast2rdf-minimal.erb
147
149
  - template/blast2rdf.erb
148
150
  - test/data/aa_example.fasta