bio-blastxmlparser 2.0.2 → 2.0.3
Sign up to get free protection for your applications and to get access to all the features.
- data/.travis.yml +1 -1
- data/Gemfile +1 -1
- data/Gemfile.lock +1 -1
- data/README.md +20 -5
- data/Rakefile +1 -0
- data/VERSION +1 -1
- data/bin/blastxmlparser +14 -5
- data/bio-blastxmlparser.gemspec +8 -7
- data/template/blast2rdf-minimal.erb +17 -1
- metadata +43 -27
- checksums.yaml +0 -7
data/.travis.yml
CHANGED
data/Gemfile
CHANGED
data/Gemfile.lock
CHANGED
data/README.md
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
|
3
3
|
# bio-blastxmlparser
|
4
4
|
|
5
|
-
blastxmlparser is a very fast
|
5
|
+
blastxmlparser is a very fast parallel big-data BLAST XML file
|
6
6
|
parser, which can be used as command line utility. Use blastxmlparser
|
7
7
|
to:
|
8
8
|
|
@@ -163,6 +163,12 @@ by query iteration id, and hit_id. E.g.
|
|
163
163
|
etc. etc.
|
164
164
|
```
|
165
165
|
|
166
|
+
Another example outputs all definitions containing a string
|
167
|
+
|
168
|
+
```sh
|
169
|
+
/blastxmlparser -n hit.hit_def --filter 'hit.hit_def=~/G. Ratti/i'
|
170
|
+
```
|
171
|
+
|
166
172
|
## Modify output
|
167
173
|
|
168
174
|
To have more output options blastxmlparser can use an [ERB
|
@@ -244,7 +250,7 @@ can be
|
|
244
250
|
"version": "<%= BLASTXML_VERSION %>"
|
245
251
|
},
|
246
252
|
=BODY
|
247
|
-
{ "<%= hit.parent.query_def %>": {
|
253
|
+
{ "<%= hit.parent.query_def.strip %>": {
|
248
254
|
"num": <%= hit.hit_num %>,
|
249
255
|
"id": "<%= hit.hit_id %>",
|
250
256
|
"len": <%= hit.len %>,
|
@@ -263,13 +269,13 @@ may generate something like
|
|
263
269
|
"files": ["test/data/nt_example_blastn.m7"],
|
264
270
|
"version": "2.0.2-pre1"
|
265
271
|
},
|
266
|
-
{ "I_1 [477 - 884]
|
272
|
+
{ "I_1 [477 - 884]": {
|
267
273
|
"num": 41,
|
268
274
|
"id": "lcl|X_42251",
|
269
275
|
"len": 153,
|
270
276
|
"E-value": 0.0247015,
|
271
277
|
},
|
272
|
-
{ "I_1 [477 - 884]
|
278
|
+
{ "I_1 [477 - 884]": {
|
273
279
|
"num": 43,
|
274
280
|
"id": "lcl|V_105720",
|
275
281
|
"len": 180,
|
@@ -279,7 +285,16 @@ may generate something like
|
|
279
285
|
```
|
280
286
|
|
281
287
|
Note that the template is not smart enough to remove the final comma
|
282
|
-
from the last BODY element. To make it valid JSON that needs to be
|
288
|
+
from the last BODY element. To make it valid JSON that needs to be
|
289
|
+
removed. A future version may add a parameter to the BODY element or a
|
290
|
+
global rewrite function for this purpose. A simple
|
291
|
+
|
292
|
+
```ruby
|
293
|
+
<%= ( body.last? ? "" : "," ) %>
|
294
|
+
```
|
295
|
+
|
296
|
+
does not work here because the parallel parser does not
|
297
|
+
know which line is the last.
|
283
298
|
|
284
299
|
## Additional options
|
285
300
|
|
data/Rakefile
CHANGED
@@ -19,6 +19,7 @@ Jeweler::Tasks.new do |gem|
|
|
19
19
|
gem.description = %Q{Fast big data BLAST XML parser and library; this libxml2 based version is 50x faster than BioRuby and comes with a nice CLI}
|
20
20
|
gem.email = "pjotr.public01@thebird.nl"
|
21
21
|
gem.authors = ["Pjotr Prins"]
|
22
|
+
gem.required_ruby_version = '>=1.9.2'
|
22
23
|
end
|
23
24
|
Jeweler::RubygemsDotOrgTasks.new
|
24
25
|
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
2.0.
|
1
|
+
2.0.3
|
data/bin/blastxmlparser
CHANGED
@@ -10,8 +10,8 @@ rootpath = File.dirname(File.dirname(__FILE__))
|
|
10
10
|
$: << File.join(rootpath,'lib')
|
11
11
|
|
12
12
|
BLASTXML_VERSION = File.new(File.join(rootpath,'VERSION')).read.chomp
|
13
|
-
|
14
|
-
$stderr.print "
|
13
|
+
BLASTXML_BANNER = "BLAST XML Parser "+BLASTXML_VERSION+" Copyright (C) 2014 Pjotr Prins <pjotr.prins@thebird.nl>"
|
14
|
+
$stderr.print BLASTXML_BANNER,"\n\n"
|
15
15
|
|
16
16
|
USAGE = <<EOM
|
17
17
|
|
@@ -129,6 +129,7 @@ begin
|
|
129
129
|
|
130
130
|
raise "No input file(s) defined" if ARGV.size == 0
|
131
131
|
|
132
|
+
output_running = false # a lock for tracking the output fork
|
132
133
|
|
133
134
|
ARGV.each do | fn |
|
134
135
|
logger.info("XML parsing #{fn}")
|
@@ -214,15 +215,23 @@ begin
|
|
214
215
|
process.call(iter,i)
|
215
216
|
}
|
216
217
|
# Output is forked to a separate process too
|
217
|
-
|
218
|
+
while output_running do
|
219
|
+
sleep 0.01
|
220
|
+
end
|
221
|
+
output_running = true
|
222
|
+
|
223
|
+
Parallel.map(1..1, :finish => lambda { |item,i,result| output_running=false }) { |num|
|
218
224
|
output.call out
|
219
225
|
STDOUT.flush
|
220
226
|
STDOUT.close
|
221
|
-
|
222
|
-
|
227
|
+
}
|
228
|
+
|
223
229
|
chunks = []
|
224
230
|
end
|
225
231
|
end
|
232
|
+
while output_running do
|
233
|
+
sleep 0.01
|
234
|
+
end
|
226
235
|
output.call Parallel.map_with_index(chunks, :in_processes => options.threads) { | iter,i |
|
227
236
|
process.call(iter,i)
|
228
237
|
}
|
data/bio-blastxmlparser.gemspec
CHANGED
@@ -5,11 +5,11 @@
|
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
7
|
s.name = "bio-blastxmlparser"
|
8
|
-
s.version = "2.0.
|
8
|
+
s.version = "2.0.3"
|
9
9
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
11
|
s.authors = ["Pjotr Prins"]
|
12
|
-
s.date = "
|
12
|
+
s.date = "2015-05-07"
|
13
13
|
s.description = "Fast big data BLAST XML parser and library; this libxml2 based version is 50x faster than BioRuby and comes with a nice CLI"
|
14
14
|
s.email = "pjotr.public01@thebird.nl"
|
15
15
|
s.executables = ["blastxmlparser"]
|
@@ -56,15 +56,16 @@ Gem::Specification.new do |s|
|
|
56
56
|
s.homepage = "http://github.com/pjotrp/blastxmlparser"
|
57
57
|
s.licenses = ["MIT"]
|
58
58
|
s.require_paths = ["lib"]
|
59
|
-
s.
|
59
|
+
s.required_ruby_version = Gem::Requirement.new(">= 1.9.2")
|
60
|
+
s.rubygems_version = "1.8.23"
|
60
61
|
s.summary = "Very fast parallel BLAST XML to RDF/HTML/JSON/YAML/csv transformer"
|
61
62
|
|
62
63
|
if s.respond_to? :specification_version then
|
63
|
-
s.specification_version =
|
64
|
+
s.specification_version = 3
|
64
65
|
|
65
66
|
if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
|
66
67
|
s.add_runtime_dependency(%q<bio-logger>, [">= 0"])
|
67
|
-
s.add_runtime_dependency(%q<nokogiri>, ["
|
68
|
+
s.add_runtime_dependency(%q<nokogiri>, [">= 0"])
|
68
69
|
s.add_development_dependency(%q<rake>, [">= 0"])
|
69
70
|
s.add_development_dependency(%q<bundler>, [">= 0"])
|
70
71
|
s.add_development_dependency(%q<jeweler>, ["~> 2.0.1"])
|
@@ -72,7 +73,7 @@ Gem::Specification.new do |s|
|
|
72
73
|
s.add_development_dependency(%q<rdoc>, [">= 0"])
|
73
74
|
else
|
74
75
|
s.add_dependency(%q<bio-logger>, [">= 0"])
|
75
|
-
s.add_dependency(%q<nokogiri>, ["
|
76
|
+
s.add_dependency(%q<nokogiri>, [">= 0"])
|
76
77
|
s.add_dependency(%q<rake>, [">= 0"])
|
77
78
|
s.add_dependency(%q<bundler>, [">= 0"])
|
78
79
|
s.add_dependency(%q<jeweler>, ["~> 2.0.1"])
|
@@ -81,7 +82,7 @@ Gem::Specification.new do |s|
|
|
81
82
|
end
|
82
83
|
else
|
83
84
|
s.add_dependency(%q<bio-logger>, [">= 0"])
|
84
|
-
s.add_dependency(%q<nokogiri>, ["
|
85
|
+
s.add_dependency(%q<nokogiri>, [">= 0"])
|
85
86
|
s.add_dependency(%q<rake>, [">= 0"])
|
86
87
|
s.add_dependency(%q<bundler>, [">= 0"])
|
87
88
|
s.add_dependency(%q<jeweler>, ["~> 2.0.1"])
|
@@ -1,10 +1,24 @@
|
|
1
|
+
=HEADER
|
2
|
+
# BLAST RDF template example
|
3
|
+
#
|
4
|
+
# options: <%= options.to_h %>
|
5
|
+
# files: <%= ARGV %>
|
6
|
+
# date: <%= Time.now %>
|
7
|
+
# version: <%= BLASTXML_BANNER %>
|
8
|
+
|
9
|
+
@prefix gene: <http://biobeat.org/rdf/gwp/gene-names#> .
|
10
|
+
@prefix : <http://biobeat.org/rdf/megablast#> .
|
11
|
+
|
12
|
+
=BODY
|
1
13
|
<%
|
2
14
|
blastid = Turtle::mangle_identifier(hit.parent.query_def)
|
3
15
|
id = blastid+'_'+hit.hit_num.to_s
|
16
|
+
gene_name = hit.parent.query_def.split(/ /).first
|
4
17
|
%>
|
5
18
|
:<%= blastid %> :query :<%= id %>
|
6
19
|
:<%= id %>
|
7
|
-
:query_def "<%= hit.parent.query_def %>";
|
20
|
+
:query_def "<%= hit.parent.query_def.strip %>";
|
21
|
+
gene:gene_name "<%= gene_name %>";
|
8
22
|
:num <%= hit.hit_num %>;
|
9
23
|
:accession "<%= hit.accession %>";
|
10
24
|
:len <%= hit.len %>;
|
@@ -12,3 +26,5 @@
|
|
12
26
|
:align_len <%= hsp.align_len %>;
|
13
27
|
:bitscore <%= hsp.bit_score %>;
|
14
28
|
:evalue <%= hsp.evalue %> .
|
29
|
+
|
30
|
+
=FOOTER
|
metadata
CHANGED
@@ -1,111 +1,126 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: bio-blastxmlparser
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.0.
|
4
|
+
version: 2.0.3
|
5
|
+
prerelease:
|
5
6
|
platform: ruby
|
6
7
|
authors:
|
7
8
|
- Pjotr Prins
|
8
9
|
autorequire:
|
9
10
|
bindir: bin
|
10
11
|
cert_chain: []
|
11
|
-
date:
|
12
|
+
date: 2015-05-07 00:00:00.000000000 Z
|
12
13
|
dependencies:
|
13
14
|
- !ruby/object:Gem::Dependency
|
14
15
|
name: bio-logger
|
15
16
|
requirement: !ruby/object:Gem::Requirement
|
17
|
+
none: false
|
16
18
|
requirements:
|
17
|
-
- -
|
19
|
+
- - ! '>='
|
18
20
|
- !ruby/object:Gem::Version
|
19
21
|
version: '0'
|
20
22
|
type: :runtime
|
21
23
|
prerelease: false
|
22
24
|
version_requirements: !ruby/object:Gem::Requirement
|
25
|
+
none: false
|
23
26
|
requirements:
|
24
|
-
- -
|
27
|
+
- - ! '>='
|
25
28
|
- !ruby/object:Gem::Version
|
26
29
|
version: '0'
|
27
30
|
- !ruby/object:Gem::Dependency
|
28
31
|
name: nokogiri
|
29
32
|
requirement: !ruby/object:Gem::Requirement
|
33
|
+
none: false
|
30
34
|
requirements:
|
31
|
-
- -
|
35
|
+
- - ! '>='
|
32
36
|
- !ruby/object:Gem::Version
|
33
|
-
version:
|
37
|
+
version: '0'
|
34
38
|
type: :runtime
|
35
39
|
prerelease: false
|
36
40
|
version_requirements: !ruby/object:Gem::Requirement
|
41
|
+
none: false
|
37
42
|
requirements:
|
38
|
-
- -
|
43
|
+
- - ! '>='
|
39
44
|
- !ruby/object:Gem::Version
|
40
|
-
version:
|
45
|
+
version: '0'
|
41
46
|
- !ruby/object:Gem::Dependency
|
42
47
|
name: rake
|
43
48
|
requirement: !ruby/object:Gem::Requirement
|
49
|
+
none: false
|
44
50
|
requirements:
|
45
|
-
- -
|
51
|
+
- - ! '>='
|
46
52
|
- !ruby/object:Gem::Version
|
47
53
|
version: '0'
|
48
54
|
type: :development
|
49
55
|
prerelease: false
|
50
56
|
version_requirements: !ruby/object:Gem::Requirement
|
57
|
+
none: false
|
51
58
|
requirements:
|
52
|
-
- -
|
59
|
+
- - ! '>='
|
53
60
|
- !ruby/object:Gem::Version
|
54
61
|
version: '0'
|
55
62
|
- !ruby/object:Gem::Dependency
|
56
63
|
name: bundler
|
57
64
|
requirement: !ruby/object:Gem::Requirement
|
65
|
+
none: false
|
58
66
|
requirements:
|
59
|
-
- -
|
67
|
+
- - ! '>='
|
60
68
|
- !ruby/object:Gem::Version
|
61
69
|
version: '0'
|
62
70
|
type: :development
|
63
71
|
prerelease: false
|
64
72
|
version_requirements: !ruby/object:Gem::Requirement
|
73
|
+
none: false
|
65
74
|
requirements:
|
66
|
-
- -
|
75
|
+
- - ! '>='
|
67
76
|
- !ruby/object:Gem::Version
|
68
77
|
version: '0'
|
69
78
|
- !ruby/object:Gem::Dependency
|
70
79
|
name: jeweler
|
71
80
|
requirement: !ruby/object:Gem::Requirement
|
81
|
+
none: false
|
72
82
|
requirements:
|
73
|
-
- -
|
83
|
+
- - ~>
|
74
84
|
- !ruby/object:Gem::Version
|
75
85
|
version: 2.0.1
|
76
86
|
type: :development
|
77
87
|
prerelease: false
|
78
88
|
version_requirements: !ruby/object:Gem::Requirement
|
89
|
+
none: false
|
79
90
|
requirements:
|
80
|
-
- -
|
91
|
+
- - ~>
|
81
92
|
- !ruby/object:Gem::Version
|
82
93
|
version: 2.0.1
|
83
94
|
- !ruby/object:Gem::Dependency
|
84
95
|
name: rspec
|
85
96
|
requirement: !ruby/object:Gem::Requirement
|
97
|
+
none: false
|
86
98
|
requirements:
|
87
|
-
- -
|
99
|
+
- - ! '>='
|
88
100
|
- !ruby/object:Gem::Version
|
89
101
|
version: '0'
|
90
102
|
type: :development
|
91
103
|
prerelease: false
|
92
104
|
version_requirements: !ruby/object:Gem::Requirement
|
105
|
+
none: false
|
93
106
|
requirements:
|
94
|
-
- -
|
107
|
+
- - ! '>='
|
95
108
|
- !ruby/object:Gem::Version
|
96
109
|
version: '0'
|
97
110
|
- !ruby/object:Gem::Dependency
|
98
111
|
name: rdoc
|
99
112
|
requirement: !ruby/object:Gem::Requirement
|
113
|
+
none: false
|
100
114
|
requirements:
|
101
|
-
- -
|
115
|
+
- - ! '>='
|
102
116
|
- !ruby/object:Gem::Version
|
103
117
|
version: '0'
|
104
118
|
type: :development
|
105
119
|
prerelease: false
|
106
120
|
version_requirements: !ruby/object:Gem::Requirement
|
121
|
+
none: false
|
107
122
|
requirements:
|
108
|
-
- -
|
123
|
+
- - ! '>='
|
109
124
|
- !ruby/object:Gem::Version
|
110
125
|
version: '0'
|
111
126
|
description: Fast big data BLAST XML parser and library; this libxml2 based version
|
@@ -118,9 +133,9 @@ extra_rdoc_files:
|
|
118
133
|
- LICENSE.txt
|
119
134
|
- README.md
|
120
135
|
files:
|
121
|
-
-
|
122
|
-
-
|
123
|
-
-
|
136
|
+
- .document
|
137
|
+
- .rspec
|
138
|
+
- .travis.yml
|
124
139
|
- Gemfile
|
125
140
|
- Gemfile.lock
|
126
141
|
- LICENSE.txt
|
@@ -155,25 +170,26 @@ files:
|
|
155
170
|
homepage: http://github.com/pjotrp/blastxmlparser
|
156
171
|
licenses:
|
157
172
|
- MIT
|
158
|
-
metadata: {}
|
159
173
|
post_install_message:
|
160
174
|
rdoc_options: []
|
161
175
|
require_paths:
|
162
176
|
- lib
|
163
177
|
required_ruby_version: !ruby/object:Gem::Requirement
|
178
|
+
none: false
|
164
179
|
requirements:
|
165
|
-
- -
|
180
|
+
- - ! '>='
|
166
181
|
- !ruby/object:Gem::Version
|
167
|
-
version:
|
182
|
+
version: 1.9.2
|
168
183
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
184
|
+
none: false
|
169
185
|
requirements:
|
170
|
-
- -
|
186
|
+
- - ! '>='
|
171
187
|
- !ruby/object:Gem::Version
|
172
188
|
version: '0'
|
173
189
|
requirements: []
|
174
190
|
rubyforge_project:
|
175
|
-
rubygems_version:
|
191
|
+
rubygems_version: 1.8.23
|
176
192
|
signing_key:
|
177
|
-
specification_version:
|
193
|
+
specification_version: 3
|
178
194
|
summary: Very fast parallel BLAST XML to RDF/HTML/JSON/YAML/csv transformer
|
179
195
|
test_files: []
|
checksums.yaml
DELETED
@@ -1,7 +0,0 @@
|
|
1
|
-
---
|
2
|
-
SHA1:
|
3
|
-
metadata.gz: 76df7cd1f6e1bc4f2b04fdf3f0fc830110f2e376
|
4
|
-
data.tar.gz: 68f44a797aa5357690e6c4a10a1fe241a7b8fe37
|
5
|
-
SHA512:
|
6
|
-
metadata.gz: f424b8cfedf921840dbf2fee412c191f1c9951d289b80db047744a56883b171b50d4b1161e86415f0e873be4bd43935031f966d32d394efea9208e65b2529903
|
7
|
-
data.tar.gz: 6214606ac08afa7306503c78e969046ab2a54d9c45e9dc42c597d8c033db2095fc5a25f15f56090a0337bd1049081699a81ceefa90d2acaf17cb782340074fc6
|