bio-blastxmlparser 2.0.2 → 2.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -2,8 +2,8 @@ language: ruby
2
2
  rvm:
3
3
  - 1.9.3
4
4
  - jruby-19mode # JRuby in 1.9 mode
5
- - rbx-19mode
6
5
  - 2.1.0
6
+ # - rbx-19mode
7
7
  # - 1.8.7
8
8
  # - jruby-18mode # JRuby in 1.8 mode
9
9
  # - rbx-18mode
data/Gemfile CHANGED
@@ -1,7 +1,7 @@
1
1
  source "http://rubygems.org"
2
2
  # Runtime dependencies
3
3
  gem "bio-logger"
4
- gem "nokogiri", "~>1.6.3"
4
+ gem "nokogiri"
5
5
 
6
6
  # Add dependencies to develop your gem here.
7
7
  # Include everything needed to run rake, tests, features, etc.
@@ -66,7 +66,7 @@ DEPENDENCIES
66
66
  bio-logger
67
67
  bundler
68
68
  jeweler (~> 2.0.1)
69
- nokogiri (~> 1.6.3)
69
+ nokogiri
70
70
  rake
71
71
  rdoc
72
72
  rspec
data/README.md CHANGED
@@ -2,7 +2,7 @@
2
2
 
3
3
  # bio-blastxmlparser
4
4
 
5
- blastxmlparser is a very fast parallelised big-data BLAST XML file
5
+ blastxmlparser is a very fast parallel big-data BLAST XML file
6
6
  parser, which can be used as command line utility. Use blastxmlparser
7
7
  to:
8
8
 
@@ -163,6 +163,12 @@ by query iteration id, and hit_id. E.g.
163
163
  etc. etc.
164
164
  ```
165
165
 
166
+ Another example outputs all definitions containing a string
167
+
168
+ ```sh
169
+ /blastxmlparser -n hit.hit_def --filter 'hit.hit_def=~/G. Ratti/i'
170
+ ```
171
+
166
172
  ## Modify output
167
173
 
168
174
  To have more output options blastxmlparser can use an [ERB
@@ -244,7 +250,7 @@ can be
244
250
  "version": "<%= BLASTXML_VERSION %>"
245
251
  },
246
252
  =BODY
247
- { "<%= hit.parent.query_def %>": {
253
+ { "<%= hit.parent.query_def.strip %>": {
248
254
  "num": <%= hit.hit_num %>,
249
255
  "id": "<%= hit.hit_id %>",
250
256
  "len": <%= hit.len %>,
@@ -263,13 +269,13 @@ may generate something like
263
269
  "files": ["test/data/nt_example_blastn.m7"],
264
270
  "version": "2.0.2-pre1"
265
271
  },
266
- { "I_1 [477 - 884] ": {
272
+ { "I_1 [477 - 884]": {
267
273
  "num": 41,
268
274
  "id": "lcl|X_42251",
269
275
  "len": 153,
270
276
  "E-value": 0.0247015,
271
277
  },
272
- { "I_1 [477 - 884] ": {
278
+ { "I_1 [477 - 884]": {
273
279
  "num": 43,
274
280
  "id": "lcl|V_105720",
275
281
  "len": 180,
@@ -279,7 +285,16 @@ may generate something like
279
285
  ```
280
286
 
281
287
  Note that the template is not smart enough to remove the final comma
282
- from the last BODY element. To make it valid JSON that needs to be removed.
288
+ from the last BODY element. To make it valid JSON that needs to be
289
+ removed. A future version may add a parameter to the BODY element or a
290
+ global rewrite function for this purpose. A simple
291
+
292
+ ```ruby
293
+ <%= ( body.last? ? "" : "," ) %>
294
+ ```
295
+
296
+ does not work here because the parallel parser does not
297
+ know which line is the last.
283
298
 
284
299
  ## Additional options
285
300
 
data/Rakefile CHANGED
@@ -19,6 +19,7 @@ Jeweler::Tasks.new do |gem|
19
19
  gem.description = %Q{Fast big data BLAST XML parser and library; this libxml2 based version is 50x faster than BioRuby and comes with a nice CLI}
20
20
  gem.email = "pjotr.public01@thebird.nl"
21
21
  gem.authors = ["Pjotr Prins"]
22
+ gem.required_ruby_version = '>=1.9.2'
22
23
  end
23
24
  Jeweler::RubygemsDotOrgTasks.new
24
25
 
data/VERSION CHANGED
@@ -1 +1 @@
1
- 2.0.2
1
+ 2.0.3
@@ -10,8 +10,8 @@ rootpath = File.dirname(File.dirname(__FILE__))
10
10
  $: << File.join(rootpath,'lib')
11
11
 
12
12
  BLASTXML_VERSION = File.new(File.join(rootpath,'VERSION')).read.chomp
13
-
14
- $stderr.print "BLAST XML Parser "+BLASTXML_VERSION+" Copyright (C) 2014 Pjotr Prins <pjotr.prins@thebird.nl>\n\n"
13
+ BLASTXML_BANNER = "BLAST XML Parser "+BLASTXML_VERSION+" Copyright (C) 2014 Pjotr Prins <pjotr.prins@thebird.nl>"
14
+ $stderr.print BLASTXML_BANNER,"\n\n"
15
15
 
16
16
  USAGE = <<EOM
17
17
 
@@ -129,6 +129,7 @@ begin
129
129
 
130
130
  raise "No input file(s) defined" if ARGV.size == 0
131
131
 
132
+ output_running = false # a lock for tracking the output fork
132
133
 
133
134
  ARGV.each do | fn |
134
135
  logger.info("XML parsing #{fn}")
@@ -214,15 +215,23 @@ begin
214
215
  process.call(iter,i)
215
216
  }
216
217
  # Output is forked to a separate process too
217
- fork do
218
+ while output_running do
219
+ sleep 0.01
220
+ end
221
+ output_running = true
222
+
223
+ Parallel.map(1..1, :finish => lambda { |item,i,result| output_running=false }) { |num|
218
224
  output.call out
219
225
  STDOUT.flush
220
226
  STDOUT.close
221
- exit 0
222
- end
227
+ }
228
+
223
229
  chunks = []
224
230
  end
225
231
  end
232
+ while output_running do
233
+ sleep 0.01
234
+ end
226
235
  output.call Parallel.map_with_index(chunks, :in_processes => options.threads) { | iter,i |
227
236
  process.call(iter,i)
228
237
  }
@@ -5,11 +5,11 @@
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = "bio-blastxmlparser"
8
- s.version = "2.0.2"
8
+ s.version = "2.0.3"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = ["Pjotr Prins"]
12
- s.date = "2014-11-07"
12
+ s.date = "2015-05-07"
13
13
  s.description = "Fast big data BLAST XML parser and library; this libxml2 based version is 50x faster than BioRuby and comes with a nice CLI"
14
14
  s.email = "pjotr.public01@thebird.nl"
15
15
  s.executables = ["blastxmlparser"]
@@ -56,15 +56,16 @@ Gem::Specification.new do |s|
56
56
  s.homepage = "http://github.com/pjotrp/blastxmlparser"
57
57
  s.licenses = ["MIT"]
58
58
  s.require_paths = ["lib"]
59
- s.rubygems_version = "2.0.3"
59
+ s.required_ruby_version = Gem::Requirement.new(">= 1.9.2")
60
+ s.rubygems_version = "1.8.23"
60
61
  s.summary = "Very fast parallel BLAST XML to RDF/HTML/JSON/YAML/csv transformer"
61
62
 
62
63
  if s.respond_to? :specification_version then
63
- s.specification_version = 4
64
+ s.specification_version = 3
64
65
 
65
66
  if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
66
67
  s.add_runtime_dependency(%q<bio-logger>, [">= 0"])
67
- s.add_runtime_dependency(%q<nokogiri>, ["~> 1.6.3"])
68
+ s.add_runtime_dependency(%q<nokogiri>, [">= 0"])
68
69
  s.add_development_dependency(%q<rake>, [">= 0"])
69
70
  s.add_development_dependency(%q<bundler>, [">= 0"])
70
71
  s.add_development_dependency(%q<jeweler>, ["~> 2.0.1"])
@@ -72,7 +73,7 @@ Gem::Specification.new do |s|
72
73
  s.add_development_dependency(%q<rdoc>, [">= 0"])
73
74
  else
74
75
  s.add_dependency(%q<bio-logger>, [">= 0"])
75
- s.add_dependency(%q<nokogiri>, ["~> 1.6.3"])
76
+ s.add_dependency(%q<nokogiri>, [">= 0"])
76
77
  s.add_dependency(%q<rake>, [">= 0"])
77
78
  s.add_dependency(%q<bundler>, [">= 0"])
78
79
  s.add_dependency(%q<jeweler>, ["~> 2.0.1"])
@@ -81,7 +82,7 @@ Gem::Specification.new do |s|
81
82
  end
82
83
  else
83
84
  s.add_dependency(%q<bio-logger>, [">= 0"])
84
- s.add_dependency(%q<nokogiri>, ["~> 1.6.3"])
85
+ s.add_dependency(%q<nokogiri>, [">= 0"])
85
86
  s.add_dependency(%q<rake>, [">= 0"])
86
87
  s.add_dependency(%q<bundler>, [">= 0"])
87
88
  s.add_dependency(%q<jeweler>, ["~> 2.0.1"])
@@ -1,10 +1,24 @@
1
+ =HEADER
2
+ # BLAST RDF template example
3
+ #
4
+ # options: <%= options.to_h %>
5
+ # files: <%= ARGV %>
6
+ # date: <%= Time.now %>
7
+ # version: <%= BLASTXML_BANNER %>
8
+
9
+ @prefix gene: <http://biobeat.org/rdf/gwp/gene-names#> .
10
+ @prefix : <http://biobeat.org/rdf/megablast#> .
11
+
12
+ =BODY
1
13
  <%
2
14
  blastid = Turtle::mangle_identifier(hit.parent.query_def)
3
15
  id = blastid+'_'+hit.hit_num.to_s
16
+ gene_name = hit.parent.query_def.split(/ /).first
4
17
  %>
5
18
  :<%= blastid %> :query :<%= id %>
6
19
  :<%= id %>
7
- :query_def "<%= hit.parent.query_def %>";
20
+ :query_def "<%= hit.parent.query_def.strip %>";
21
+ gene:gene_name "<%= gene_name %>";
8
22
  :num <%= hit.hit_num %>;
9
23
  :accession "<%= hit.accession %>";
10
24
  :len <%= hit.len %>;
@@ -12,3 +26,5 @@
12
26
  :align_len <%= hsp.align_len %>;
13
27
  :bitscore <%= hsp.bit_score %>;
14
28
  :evalue <%= hsp.evalue %> .
29
+
30
+ =FOOTER
metadata CHANGED
@@ -1,111 +1,126 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: bio-blastxmlparser
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.0.2
4
+ version: 2.0.3
5
+ prerelease:
5
6
  platform: ruby
6
7
  authors:
7
8
  - Pjotr Prins
8
9
  autorequire:
9
10
  bindir: bin
10
11
  cert_chain: []
11
- date: 2014-11-07 00:00:00.000000000 Z
12
+ date: 2015-05-07 00:00:00.000000000 Z
12
13
  dependencies:
13
14
  - !ruby/object:Gem::Dependency
14
15
  name: bio-logger
15
16
  requirement: !ruby/object:Gem::Requirement
17
+ none: false
16
18
  requirements:
17
- - - ">="
19
+ - - ! '>='
18
20
  - !ruby/object:Gem::Version
19
21
  version: '0'
20
22
  type: :runtime
21
23
  prerelease: false
22
24
  version_requirements: !ruby/object:Gem::Requirement
25
+ none: false
23
26
  requirements:
24
- - - ">="
27
+ - - ! '>='
25
28
  - !ruby/object:Gem::Version
26
29
  version: '0'
27
30
  - !ruby/object:Gem::Dependency
28
31
  name: nokogiri
29
32
  requirement: !ruby/object:Gem::Requirement
33
+ none: false
30
34
  requirements:
31
- - - "~>"
35
+ - - ! '>='
32
36
  - !ruby/object:Gem::Version
33
- version: 1.6.3
37
+ version: '0'
34
38
  type: :runtime
35
39
  prerelease: false
36
40
  version_requirements: !ruby/object:Gem::Requirement
41
+ none: false
37
42
  requirements:
38
- - - "~>"
43
+ - - ! '>='
39
44
  - !ruby/object:Gem::Version
40
- version: 1.6.3
45
+ version: '0'
41
46
  - !ruby/object:Gem::Dependency
42
47
  name: rake
43
48
  requirement: !ruby/object:Gem::Requirement
49
+ none: false
44
50
  requirements:
45
- - - ">="
51
+ - - ! '>='
46
52
  - !ruby/object:Gem::Version
47
53
  version: '0'
48
54
  type: :development
49
55
  prerelease: false
50
56
  version_requirements: !ruby/object:Gem::Requirement
57
+ none: false
51
58
  requirements:
52
- - - ">="
59
+ - - ! '>='
53
60
  - !ruby/object:Gem::Version
54
61
  version: '0'
55
62
  - !ruby/object:Gem::Dependency
56
63
  name: bundler
57
64
  requirement: !ruby/object:Gem::Requirement
65
+ none: false
58
66
  requirements:
59
- - - ">="
67
+ - - ! '>='
60
68
  - !ruby/object:Gem::Version
61
69
  version: '0'
62
70
  type: :development
63
71
  prerelease: false
64
72
  version_requirements: !ruby/object:Gem::Requirement
73
+ none: false
65
74
  requirements:
66
- - - ">="
75
+ - - ! '>='
67
76
  - !ruby/object:Gem::Version
68
77
  version: '0'
69
78
  - !ruby/object:Gem::Dependency
70
79
  name: jeweler
71
80
  requirement: !ruby/object:Gem::Requirement
81
+ none: false
72
82
  requirements:
73
- - - "~>"
83
+ - - ~>
74
84
  - !ruby/object:Gem::Version
75
85
  version: 2.0.1
76
86
  type: :development
77
87
  prerelease: false
78
88
  version_requirements: !ruby/object:Gem::Requirement
89
+ none: false
79
90
  requirements:
80
- - - "~>"
91
+ - - ~>
81
92
  - !ruby/object:Gem::Version
82
93
  version: 2.0.1
83
94
  - !ruby/object:Gem::Dependency
84
95
  name: rspec
85
96
  requirement: !ruby/object:Gem::Requirement
97
+ none: false
86
98
  requirements:
87
- - - ">="
99
+ - - ! '>='
88
100
  - !ruby/object:Gem::Version
89
101
  version: '0'
90
102
  type: :development
91
103
  prerelease: false
92
104
  version_requirements: !ruby/object:Gem::Requirement
105
+ none: false
93
106
  requirements:
94
- - - ">="
107
+ - - ! '>='
95
108
  - !ruby/object:Gem::Version
96
109
  version: '0'
97
110
  - !ruby/object:Gem::Dependency
98
111
  name: rdoc
99
112
  requirement: !ruby/object:Gem::Requirement
113
+ none: false
100
114
  requirements:
101
- - - ">="
115
+ - - ! '>='
102
116
  - !ruby/object:Gem::Version
103
117
  version: '0'
104
118
  type: :development
105
119
  prerelease: false
106
120
  version_requirements: !ruby/object:Gem::Requirement
121
+ none: false
107
122
  requirements:
108
- - - ">="
123
+ - - ! '>='
109
124
  - !ruby/object:Gem::Version
110
125
  version: '0'
111
126
  description: Fast big data BLAST XML parser and library; this libxml2 based version
@@ -118,9 +133,9 @@ extra_rdoc_files:
118
133
  - LICENSE.txt
119
134
  - README.md
120
135
  files:
121
- - ".document"
122
- - ".rspec"
123
- - ".travis.yml"
136
+ - .document
137
+ - .rspec
138
+ - .travis.yml
124
139
  - Gemfile
125
140
  - Gemfile.lock
126
141
  - LICENSE.txt
@@ -155,25 +170,26 @@ files:
155
170
  homepage: http://github.com/pjotrp/blastxmlparser
156
171
  licenses:
157
172
  - MIT
158
- metadata: {}
159
173
  post_install_message:
160
174
  rdoc_options: []
161
175
  require_paths:
162
176
  - lib
163
177
  required_ruby_version: !ruby/object:Gem::Requirement
178
+ none: false
164
179
  requirements:
165
- - - ">="
180
+ - - ! '>='
166
181
  - !ruby/object:Gem::Version
167
- version: '0'
182
+ version: 1.9.2
168
183
  required_rubygems_version: !ruby/object:Gem::Requirement
184
+ none: false
169
185
  requirements:
170
- - - ">="
186
+ - - ! '>='
171
187
  - !ruby/object:Gem::Version
172
188
  version: '0'
173
189
  requirements: []
174
190
  rubyforge_project:
175
- rubygems_version: 2.0.3
191
+ rubygems_version: 1.8.23
176
192
  signing_key:
177
- specification_version: 4
193
+ specification_version: 3
178
194
  summary: Very fast parallel BLAST XML to RDF/HTML/JSON/YAML/csv transformer
179
195
  test_files: []
checksums.yaml DELETED
@@ -1,7 +0,0 @@
1
- ---
2
- SHA1:
3
- metadata.gz: 76df7cd1f6e1bc4f2b04fdf3f0fc830110f2e376
4
- data.tar.gz: 68f44a797aa5357690e6c4a10a1fe241a7b8fe37
5
- SHA512:
6
- metadata.gz: f424b8cfedf921840dbf2fee412c191f1c9951d289b80db047744a56883b171b50d4b1161e86415f0e873be4bd43935031f966d32d394efea9208e65b2529903
7
- data.tar.gz: 6214606ac08afa7306503c78e969046ab2a54d9c45e9dc42c597d8c033db2095fc5a25f15f56090a0337bd1049081699a81ceefa90d2acaf17cb782340074fc6