bio-blastxmlparser 2.0.2 → 2.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.travis.yml +1 -1
- data/Gemfile +1 -1
- data/Gemfile.lock +1 -1
- data/README.md +20 -5
- data/Rakefile +1 -0
- data/VERSION +1 -1
- data/bin/blastxmlparser +14 -5
- data/bio-blastxmlparser.gemspec +8 -7
- data/template/blast2rdf-minimal.erb +17 -1
- metadata +43 -27
- checksums.yaml +0 -7
data/.travis.yml
CHANGED
data/Gemfile
CHANGED
data/Gemfile.lock
CHANGED
data/README.md
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
|
3
3
|
# bio-blastxmlparser
|
4
4
|
|
5
|
-
blastxmlparser is a very fast
|
5
|
+
blastxmlparser is a very fast parallel big-data BLAST XML file
|
6
6
|
parser, which can be used as command line utility. Use blastxmlparser
|
7
7
|
to:
|
8
8
|
|
@@ -163,6 +163,12 @@ by query iteration id, and hit_id. E.g.
|
|
163
163
|
etc. etc.
|
164
164
|
```
|
165
165
|
|
166
|
+
Another example outputs all definitions containing a string
|
167
|
+
|
168
|
+
```sh
|
169
|
+
/blastxmlparser -n hit.hit_def --filter 'hit.hit_def=~/G. Ratti/i'
|
170
|
+
```
|
171
|
+
|
166
172
|
## Modify output
|
167
173
|
|
168
174
|
To have more output options blastxmlparser can use an [ERB
|
@@ -244,7 +250,7 @@ can be
|
|
244
250
|
"version": "<%= BLASTXML_VERSION %>"
|
245
251
|
},
|
246
252
|
=BODY
|
247
|
-
{ "<%= hit.parent.query_def %>": {
|
253
|
+
{ "<%= hit.parent.query_def.strip %>": {
|
248
254
|
"num": <%= hit.hit_num %>,
|
249
255
|
"id": "<%= hit.hit_id %>",
|
250
256
|
"len": <%= hit.len %>,
|
@@ -263,13 +269,13 @@ may generate something like
|
|
263
269
|
"files": ["test/data/nt_example_blastn.m7"],
|
264
270
|
"version": "2.0.2-pre1"
|
265
271
|
},
|
266
|
-
{ "I_1 [477 - 884]
|
272
|
+
{ "I_1 [477 - 884]": {
|
267
273
|
"num": 41,
|
268
274
|
"id": "lcl|X_42251",
|
269
275
|
"len": 153,
|
270
276
|
"E-value": 0.0247015,
|
271
277
|
},
|
272
|
-
{ "I_1 [477 - 884]
|
278
|
+
{ "I_1 [477 - 884]": {
|
273
279
|
"num": 43,
|
274
280
|
"id": "lcl|V_105720",
|
275
281
|
"len": 180,
|
@@ -279,7 +285,16 @@ may generate something like
|
|
279
285
|
```
|
280
286
|
|
281
287
|
Note that the template is not smart enough to remove the final comma
|
282
|
-
from the last BODY element. To make it valid JSON that needs to be
|
288
|
+
from the last BODY element. To make it valid JSON that needs to be
|
289
|
+
removed. A future version may add a parameter to the BODY element or a
|
290
|
+
global rewrite function for this purpose. A simple
|
291
|
+
|
292
|
+
```ruby
|
293
|
+
<%= ( body.last? ? "" : "," ) %>
|
294
|
+
```
|
295
|
+
|
296
|
+
does not work here because the parallel parser does not
|
297
|
+
know which line is the last.
|
283
298
|
|
284
299
|
## Additional options
|
285
300
|
|
data/Rakefile
CHANGED
@@ -19,6 +19,7 @@ Jeweler::Tasks.new do |gem|
|
|
19
19
|
gem.description = %Q{Fast big data BLAST XML parser and library; this libxml2 based version is 50x faster than BioRuby and comes with a nice CLI}
|
20
20
|
gem.email = "pjotr.public01@thebird.nl"
|
21
21
|
gem.authors = ["Pjotr Prins"]
|
22
|
+
gem.required_ruby_version = '>=1.9.2'
|
22
23
|
end
|
23
24
|
Jeweler::RubygemsDotOrgTasks.new
|
24
25
|
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
2.0.
|
1
|
+
2.0.3
|
data/bin/blastxmlparser
CHANGED
@@ -10,8 +10,8 @@ rootpath = File.dirname(File.dirname(__FILE__))
|
|
10
10
|
$: << File.join(rootpath,'lib')
|
11
11
|
|
12
12
|
BLASTXML_VERSION = File.new(File.join(rootpath,'VERSION')).read.chomp
|
13
|
-
|
14
|
-
$stderr.print "
|
13
|
+
BLASTXML_BANNER = "BLAST XML Parser "+BLASTXML_VERSION+" Copyright (C) 2014 Pjotr Prins <pjotr.prins@thebird.nl>"
|
14
|
+
$stderr.print BLASTXML_BANNER,"\n\n"
|
15
15
|
|
16
16
|
USAGE = <<EOM
|
17
17
|
|
@@ -129,6 +129,7 @@ begin
|
|
129
129
|
|
130
130
|
raise "No input file(s) defined" if ARGV.size == 0
|
131
131
|
|
132
|
+
output_running = false # a lock for tracking the output fork
|
132
133
|
|
133
134
|
ARGV.each do | fn |
|
134
135
|
logger.info("XML parsing #{fn}")
|
@@ -214,15 +215,23 @@ begin
|
|
214
215
|
process.call(iter,i)
|
215
216
|
}
|
216
217
|
# Output is forked to a separate process too
|
217
|
-
|
218
|
+
while output_running do
|
219
|
+
sleep 0.01
|
220
|
+
end
|
221
|
+
output_running = true
|
222
|
+
|
223
|
+
Parallel.map(1..1, :finish => lambda { |item,i,result| output_running=false }) { |num|
|
218
224
|
output.call out
|
219
225
|
STDOUT.flush
|
220
226
|
STDOUT.close
|
221
|
-
|
222
|
-
|
227
|
+
}
|
228
|
+
|
223
229
|
chunks = []
|
224
230
|
end
|
225
231
|
end
|
232
|
+
while output_running do
|
233
|
+
sleep 0.01
|
234
|
+
end
|
226
235
|
output.call Parallel.map_with_index(chunks, :in_processes => options.threads) { | iter,i |
|
227
236
|
process.call(iter,i)
|
228
237
|
}
|
data/bio-blastxmlparser.gemspec
CHANGED
@@ -5,11 +5,11 @@
|
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
7
|
s.name = "bio-blastxmlparser"
|
8
|
-
s.version = "2.0.
|
8
|
+
s.version = "2.0.3"
|
9
9
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
11
|
s.authors = ["Pjotr Prins"]
|
12
|
-
s.date = "
|
12
|
+
s.date = "2015-05-07"
|
13
13
|
s.description = "Fast big data BLAST XML parser and library; this libxml2 based version is 50x faster than BioRuby and comes with a nice CLI"
|
14
14
|
s.email = "pjotr.public01@thebird.nl"
|
15
15
|
s.executables = ["blastxmlparser"]
|
@@ -56,15 +56,16 @@ Gem::Specification.new do |s|
|
|
56
56
|
s.homepage = "http://github.com/pjotrp/blastxmlparser"
|
57
57
|
s.licenses = ["MIT"]
|
58
58
|
s.require_paths = ["lib"]
|
59
|
-
s.
|
59
|
+
s.required_ruby_version = Gem::Requirement.new(">= 1.9.2")
|
60
|
+
s.rubygems_version = "1.8.23"
|
60
61
|
s.summary = "Very fast parallel BLAST XML to RDF/HTML/JSON/YAML/csv transformer"
|
61
62
|
|
62
63
|
if s.respond_to? :specification_version then
|
63
|
-
s.specification_version =
|
64
|
+
s.specification_version = 3
|
64
65
|
|
65
66
|
if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
|
66
67
|
s.add_runtime_dependency(%q<bio-logger>, [">= 0"])
|
67
|
-
s.add_runtime_dependency(%q<nokogiri>, ["
|
68
|
+
s.add_runtime_dependency(%q<nokogiri>, [">= 0"])
|
68
69
|
s.add_development_dependency(%q<rake>, [">= 0"])
|
69
70
|
s.add_development_dependency(%q<bundler>, [">= 0"])
|
70
71
|
s.add_development_dependency(%q<jeweler>, ["~> 2.0.1"])
|
@@ -72,7 +73,7 @@ Gem::Specification.new do |s|
|
|
72
73
|
s.add_development_dependency(%q<rdoc>, [">= 0"])
|
73
74
|
else
|
74
75
|
s.add_dependency(%q<bio-logger>, [">= 0"])
|
75
|
-
s.add_dependency(%q<nokogiri>, ["
|
76
|
+
s.add_dependency(%q<nokogiri>, [">= 0"])
|
76
77
|
s.add_dependency(%q<rake>, [">= 0"])
|
77
78
|
s.add_dependency(%q<bundler>, [">= 0"])
|
78
79
|
s.add_dependency(%q<jeweler>, ["~> 2.0.1"])
|
@@ -81,7 +82,7 @@ Gem::Specification.new do |s|
|
|
81
82
|
end
|
82
83
|
else
|
83
84
|
s.add_dependency(%q<bio-logger>, [">= 0"])
|
84
|
-
s.add_dependency(%q<nokogiri>, ["
|
85
|
+
s.add_dependency(%q<nokogiri>, [">= 0"])
|
85
86
|
s.add_dependency(%q<rake>, [">= 0"])
|
86
87
|
s.add_dependency(%q<bundler>, [">= 0"])
|
87
88
|
s.add_dependency(%q<jeweler>, ["~> 2.0.1"])
|
@@ -1,10 +1,24 @@
|
|
1
|
+
=HEADER
|
2
|
+
# BLAST RDF template example
|
3
|
+
#
|
4
|
+
# options: <%= options.to_h %>
|
5
|
+
# files: <%= ARGV %>
|
6
|
+
# date: <%= Time.now %>
|
7
|
+
# version: <%= BLASTXML_BANNER %>
|
8
|
+
|
9
|
+
@prefix gene: <http://biobeat.org/rdf/gwp/gene-names#> .
|
10
|
+
@prefix : <http://biobeat.org/rdf/megablast#> .
|
11
|
+
|
12
|
+
=BODY
|
1
13
|
<%
|
2
14
|
blastid = Turtle::mangle_identifier(hit.parent.query_def)
|
3
15
|
id = blastid+'_'+hit.hit_num.to_s
|
16
|
+
gene_name = hit.parent.query_def.split(/ /).first
|
4
17
|
%>
|
5
18
|
:<%= blastid %> :query :<%= id %>
|
6
19
|
:<%= id %>
|
7
|
-
:query_def "<%= hit.parent.query_def %>";
|
20
|
+
:query_def "<%= hit.parent.query_def.strip %>";
|
21
|
+
gene:gene_name "<%= gene_name %>";
|
8
22
|
:num <%= hit.hit_num %>;
|
9
23
|
:accession "<%= hit.accession %>";
|
10
24
|
:len <%= hit.len %>;
|
@@ -12,3 +26,5 @@
|
|
12
26
|
:align_len <%= hsp.align_len %>;
|
13
27
|
:bitscore <%= hsp.bit_score %>;
|
14
28
|
:evalue <%= hsp.evalue %> .
|
29
|
+
|
30
|
+
=FOOTER
|
metadata
CHANGED
@@ -1,111 +1,126 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: bio-blastxmlparser
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.0.
|
4
|
+
version: 2.0.3
|
5
|
+
prerelease:
|
5
6
|
platform: ruby
|
6
7
|
authors:
|
7
8
|
- Pjotr Prins
|
8
9
|
autorequire:
|
9
10
|
bindir: bin
|
10
11
|
cert_chain: []
|
11
|
-
date:
|
12
|
+
date: 2015-05-07 00:00:00.000000000 Z
|
12
13
|
dependencies:
|
13
14
|
- !ruby/object:Gem::Dependency
|
14
15
|
name: bio-logger
|
15
16
|
requirement: !ruby/object:Gem::Requirement
|
17
|
+
none: false
|
16
18
|
requirements:
|
17
|
-
- -
|
19
|
+
- - ! '>='
|
18
20
|
- !ruby/object:Gem::Version
|
19
21
|
version: '0'
|
20
22
|
type: :runtime
|
21
23
|
prerelease: false
|
22
24
|
version_requirements: !ruby/object:Gem::Requirement
|
25
|
+
none: false
|
23
26
|
requirements:
|
24
|
-
- -
|
27
|
+
- - ! '>='
|
25
28
|
- !ruby/object:Gem::Version
|
26
29
|
version: '0'
|
27
30
|
- !ruby/object:Gem::Dependency
|
28
31
|
name: nokogiri
|
29
32
|
requirement: !ruby/object:Gem::Requirement
|
33
|
+
none: false
|
30
34
|
requirements:
|
31
|
-
- -
|
35
|
+
- - ! '>='
|
32
36
|
- !ruby/object:Gem::Version
|
33
|
-
version:
|
37
|
+
version: '0'
|
34
38
|
type: :runtime
|
35
39
|
prerelease: false
|
36
40
|
version_requirements: !ruby/object:Gem::Requirement
|
41
|
+
none: false
|
37
42
|
requirements:
|
38
|
-
- -
|
43
|
+
- - ! '>='
|
39
44
|
- !ruby/object:Gem::Version
|
40
|
-
version:
|
45
|
+
version: '0'
|
41
46
|
- !ruby/object:Gem::Dependency
|
42
47
|
name: rake
|
43
48
|
requirement: !ruby/object:Gem::Requirement
|
49
|
+
none: false
|
44
50
|
requirements:
|
45
|
-
- -
|
51
|
+
- - ! '>='
|
46
52
|
- !ruby/object:Gem::Version
|
47
53
|
version: '0'
|
48
54
|
type: :development
|
49
55
|
prerelease: false
|
50
56
|
version_requirements: !ruby/object:Gem::Requirement
|
57
|
+
none: false
|
51
58
|
requirements:
|
52
|
-
- -
|
59
|
+
- - ! '>='
|
53
60
|
- !ruby/object:Gem::Version
|
54
61
|
version: '0'
|
55
62
|
- !ruby/object:Gem::Dependency
|
56
63
|
name: bundler
|
57
64
|
requirement: !ruby/object:Gem::Requirement
|
65
|
+
none: false
|
58
66
|
requirements:
|
59
|
-
- -
|
67
|
+
- - ! '>='
|
60
68
|
- !ruby/object:Gem::Version
|
61
69
|
version: '0'
|
62
70
|
type: :development
|
63
71
|
prerelease: false
|
64
72
|
version_requirements: !ruby/object:Gem::Requirement
|
73
|
+
none: false
|
65
74
|
requirements:
|
66
|
-
- -
|
75
|
+
- - ! '>='
|
67
76
|
- !ruby/object:Gem::Version
|
68
77
|
version: '0'
|
69
78
|
- !ruby/object:Gem::Dependency
|
70
79
|
name: jeweler
|
71
80
|
requirement: !ruby/object:Gem::Requirement
|
81
|
+
none: false
|
72
82
|
requirements:
|
73
|
-
- -
|
83
|
+
- - ~>
|
74
84
|
- !ruby/object:Gem::Version
|
75
85
|
version: 2.0.1
|
76
86
|
type: :development
|
77
87
|
prerelease: false
|
78
88
|
version_requirements: !ruby/object:Gem::Requirement
|
89
|
+
none: false
|
79
90
|
requirements:
|
80
|
-
- -
|
91
|
+
- - ~>
|
81
92
|
- !ruby/object:Gem::Version
|
82
93
|
version: 2.0.1
|
83
94
|
- !ruby/object:Gem::Dependency
|
84
95
|
name: rspec
|
85
96
|
requirement: !ruby/object:Gem::Requirement
|
97
|
+
none: false
|
86
98
|
requirements:
|
87
|
-
- -
|
99
|
+
- - ! '>='
|
88
100
|
- !ruby/object:Gem::Version
|
89
101
|
version: '0'
|
90
102
|
type: :development
|
91
103
|
prerelease: false
|
92
104
|
version_requirements: !ruby/object:Gem::Requirement
|
105
|
+
none: false
|
93
106
|
requirements:
|
94
|
-
- -
|
107
|
+
- - ! '>='
|
95
108
|
- !ruby/object:Gem::Version
|
96
109
|
version: '0'
|
97
110
|
- !ruby/object:Gem::Dependency
|
98
111
|
name: rdoc
|
99
112
|
requirement: !ruby/object:Gem::Requirement
|
113
|
+
none: false
|
100
114
|
requirements:
|
101
|
-
- -
|
115
|
+
- - ! '>='
|
102
116
|
- !ruby/object:Gem::Version
|
103
117
|
version: '0'
|
104
118
|
type: :development
|
105
119
|
prerelease: false
|
106
120
|
version_requirements: !ruby/object:Gem::Requirement
|
121
|
+
none: false
|
107
122
|
requirements:
|
108
|
-
- -
|
123
|
+
- - ! '>='
|
109
124
|
- !ruby/object:Gem::Version
|
110
125
|
version: '0'
|
111
126
|
description: Fast big data BLAST XML parser and library; this libxml2 based version
|
@@ -118,9 +133,9 @@ extra_rdoc_files:
|
|
118
133
|
- LICENSE.txt
|
119
134
|
- README.md
|
120
135
|
files:
|
121
|
-
-
|
122
|
-
-
|
123
|
-
-
|
136
|
+
- .document
|
137
|
+
- .rspec
|
138
|
+
- .travis.yml
|
124
139
|
- Gemfile
|
125
140
|
- Gemfile.lock
|
126
141
|
- LICENSE.txt
|
@@ -155,25 +170,26 @@ files:
|
|
155
170
|
homepage: http://github.com/pjotrp/blastxmlparser
|
156
171
|
licenses:
|
157
172
|
- MIT
|
158
|
-
metadata: {}
|
159
173
|
post_install_message:
|
160
174
|
rdoc_options: []
|
161
175
|
require_paths:
|
162
176
|
- lib
|
163
177
|
required_ruby_version: !ruby/object:Gem::Requirement
|
178
|
+
none: false
|
164
179
|
requirements:
|
165
|
-
- -
|
180
|
+
- - ! '>='
|
166
181
|
- !ruby/object:Gem::Version
|
167
|
-
version:
|
182
|
+
version: 1.9.2
|
168
183
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
184
|
+
none: false
|
169
185
|
requirements:
|
170
|
-
- -
|
186
|
+
- - ! '>='
|
171
187
|
- !ruby/object:Gem::Version
|
172
188
|
version: '0'
|
173
189
|
requirements: []
|
174
190
|
rubyforge_project:
|
175
|
-
rubygems_version:
|
191
|
+
rubygems_version: 1.8.23
|
176
192
|
signing_key:
|
177
|
-
specification_version:
|
193
|
+
specification_version: 3
|
178
194
|
summary: Very fast parallel BLAST XML to RDF/HTML/JSON/YAML/csv transformer
|
179
195
|
test_files: []
|
checksums.yaml
DELETED
@@ -1,7 +0,0 @@
|
|
1
|
-
---
|
2
|
-
SHA1:
|
3
|
-
metadata.gz: 76df7cd1f6e1bc4f2b04fdf3f0fc830110f2e376
|
4
|
-
data.tar.gz: 68f44a797aa5357690e6c4a10a1fe241a7b8fe37
|
5
|
-
SHA512:
|
6
|
-
metadata.gz: f424b8cfedf921840dbf2fee412c191f1c9951d289b80db047744a56883b171b50d4b1161e86415f0e873be4bd43935031f966d32d394efea9208e65b2529903
|
7
|
-
data.tar.gz: 6214606ac08afa7306503c78e969046ab2a54d9c45e9dc42c597d8c033db2095fc5a25f15f56090a0337bd1049081699a81ceefa90d2acaf17cb782340074fc6
|