bio-blastxmlparser 2.0.2 → 2.0.3

Sign up to get free protection for your applications and to get access to all the features.
@@ -2,8 +2,8 @@ language: ruby
2
2
  rvm:
3
3
  - 1.9.3
4
4
  - jruby-19mode # JRuby in 1.9 mode
5
- - rbx-19mode
6
5
  - 2.1.0
6
+ # - rbx-19mode
7
7
  # - 1.8.7
8
8
  # - jruby-18mode # JRuby in 1.8 mode
9
9
  # - rbx-18mode
data/Gemfile CHANGED
@@ -1,7 +1,7 @@
1
1
  source "http://rubygems.org"
2
2
  # Runtime dependencies
3
3
  gem "bio-logger"
4
- gem "nokogiri", "~>1.6.3"
4
+ gem "nokogiri"
5
5
 
6
6
  # Add dependencies to develop your gem here.
7
7
  # Include everything needed to run rake, tests, features, etc.
@@ -66,7 +66,7 @@ DEPENDENCIES
66
66
  bio-logger
67
67
  bundler
68
68
  jeweler (~> 2.0.1)
69
- nokogiri (~> 1.6.3)
69
+ nokogiri
70
70
  rake
71
71
  rdoc
72
72
  rspec
data/README.md CHANGED
@@ -2,7 +2,7 @@
2
2
 
3
3
  # bio-blastxmlparser
4
4
 
5
- blastxmlparser is a very fast parallelised big-data BLAST XML file
5
+ blastxmlparser is a very fast parallel big-data BLAST XML file
6
6
  parser, which can be used as command line utility. Use blastxmlparser
7
7
  to:
8
8
 
@@ -163,6 +163,12 @@ by query iteration id, and hit_id. E.g.
163
163
  etc. etc.
164
164
  ```
165
165
 
166
+ Another example outputs all definitions containing a string
167
+
168
+ ```sh
169
+ /blastxmlparser -n hit.hit_def --filter 'hit.hit_def=~/G. Ratti/i'
170
+ ```
171
+
166
172
  ## Modify output
167
173
 
168
174
  To have more output options blastxmlparser can use an [ERB
@@ -244,7 +250,7 @@ can be
244
250
  "version": "<%= BLASTXML_VERSION %>"
245
251
  },
246
252
  =BODY
247
- { "<%= hit.parent.query_def %>": {
253
+ { "<%= hit.parent.query_def.strip %>": {
248
254
  "num": <%= hit.hit_num %>,
249
255
  "id": "<%= hit.hit_id %>",
250
256
  "len": <%= hit.len %>,
@@ -263,13 +269,13 @@ may generate something like
263
269
  "files": ["test/data/nt_example_blastn.m7"],
264
270
  "version": "2.0.2-pre1"
265
271
  },
266
- { "I_1 [477 - 884] ": {
272
+ { "I_1 [477 - 884]": {
267
273
  "num": 41,
268
274
  "id": "lcl|X_42251",
269
275
  "len": 153,
270
276
  "E-value": 0.0247015,
271
277
  },
272
- { "I_1 [477 - 884] ": {
278
+ { "I_1 [477 - 884]": {
273
279
  "num": 43,
274
280
  "id": "lcl|V_105720",
275
281
  "len": 180,
@@ -279,7 +285,16 @@ may generate something like
279
285
  ```
280
286
 
281
287
  Note that the template is not smart enough to remove the final comma
282
- from the last BODY element. To make it valid JSON that needs to be removed.
288
+ from the last BODY element. To make it valid JSON that needs to be
289
+ removed. A future version may add a parameter to the BODY element or a
290
+ global rewrite function for this purpose. A simple
291
+
292
+ ```ruby
293
+ <%= ( body.last? ? "" : "," ) %>
294
+ ```
295
+
296
+ does not work here because the parallel parser does not
297
+ know which line is the last.
283
298
 
284
299
  ## Additional options
285
300
 
data/Rakefile CHANGED
@@ -19,6 +19,7 @@ Jeweler::Tasks.new do |gem|
19
19
  gem.description = %Q{Fast big data BLAST XML parser and library; this libxml2 based version is 50x faster than BioRuby and comes with a nice CLI}
20
20
  gem.email = "pjotr.public01@thebird.nl"
21
21
  gem.authors = ["Pjotr Prins"]
22
+ gem.required_ruby_version = '>=1.9.2'
22
23
  end
23
24
  Jeweler::RubygemsDotOrgTasks.new
24
25
 
data/VERSION CHANGED
@@ -1 +1 @@
1
- 2.0.2
1
+ 2.0.3
@@ -10,8 +10,8 @@ rootpath = File.dirname(File.dirname(__FILE__))
10
10
  $: << File.join(rootpath,'lib')
11
11
 
12
12
  BLASTXML_VERSION = File.new(File.join(rootpath,'VERSION')).read.chomp
13
-
14
- $stderr.print "BLAST XML Parser "+BLASTXML_VERSION+" Copyright (C) 2014 Pjotr Prins <pjotr.prins@thebird.nl>\n\n"
13
+ BLASTXML_BANNER = "BLAST XML Parser "+BLASTXML_VERSION+" Copyright (C) 2014 Pjotr Prins <pjotr.prins@thebird.nl>"
14
+ $stderr.print BLASTXML_BANNER,"\n\n"
15
15
 
16
16
  USAGE = <<EOM
17
17
 
@@ -129,6 +129,7 @@ begin
129
129
 
130
130
  raise "No input file(s) defined" if ARGV.size == 0
131
131
 
132
+ output_running = false # a lock for tracking the output fork
132
133
 
133
134
  ARGV.each do | fn |
134
135
  logger.info("XML parsing #{fn}")
@@ -214,15 +215,23 @@ begin
214
215
  process.call(iter,i)
215
216
  }
216
217
  # Output is forked to a separate process too
217
- fork do
218
+ while output_running do
219
+ sleep 0.01
220
+ end
221
+ output_running = true
222
+
223
+ Parallel.map(1..1, :finish => lambda { |item,i,result| output_running=false }) { |num|
218
224
  output.call out
219
225
  STDOUT.flush
220
226
  STDOUT.close
221
- exit 0
222
- end
227
+ }
228
+
223
229
  chunks = []
224
230
  end
225
231
  end
232
+ while output_running do
233
+ sleep 0.01
234
+ end
226
235
  output.call Parallel.map_with_index(chunks, :in_processes => options.threads) { | iter,i |
227
236
  process.call(iter,i)
228
237
  }
@@ -5,11 +5,11 @@
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = "bio-blastxmlparser"
8
- s.version = "2.0.2"
8
+ s.version = "2.0.3"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = ["Pjotr Prins"]
12
- s.date = "2014-11-07"
12
+ s.date = "2015-05-07"
13
13
  s.description = "Fast big data BLAST XML parser and library; this libxml2 based version is 50x faster than BioRuby and comes with a nice CLI"
14
14
  s.email = "pjotr.public01@thebird.nl"
15
15
  s.executables = ["blastxmlparser"]
@@ -56,15 +56,16 @@ Gem::Specification.new do |s|
56
56
  s.homepage = "http://github.com/pjotrp/blastxmlparser"
57
57
  s.licenses = ["MIT"]
58
58
  s.require_paths = ["lib"]
59
- s.rubygems_version = "2.0.3"
59
+ s.required_ruby_version = Gem::Requirement.new(">= 1.9.2")
60
+ s.rubygems_version = "1.8.23"
60
61
  s.summary = "Very fast parallel BLAST XML to RDF/HTML/JSON/YAML/csv transformer"
61
62
 
62
63
  if s.respond_to? :specification_version then
63
- s.specification_version = 4
64
+ s.specification_version = 3
64
65
 
65
66
  if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
66
67
  s.add_runtime_dependency(%q<bio-logger>, [">= 0"])
67
- s.add_runtime_dependency(%q<nokogiri>, ["~> 1.6.3"])
68
+ s.add_runtime_dependency(%q<nokogiri>, [">= 0"])
68
69
  s.add_development_dependency(%q<rake>, [">= 0"])
69
70
  s.add_development_dependency(%q<bundler>, [">= 0"])
70
71
  s.add_development_dependency(%q<jeweler>, ["~> 2.0.1"])
@@ -72,7 +73,7 @@ Gem::Specification.new do |s|
72
73
  s.add_development_dependency(%q<rdoc>, [">= 0"])
73
74
  else
74
75
  s.add_dependency(%q<bio-logger>, [">= 0"])
75
- s.add_dependency(%q<nokogiri>, ["~> 1.6.3"])
76
+ s.add_dependency(%q<nokogiri>, [">= 0"])
76
77
  s.add_dependency(%q<rake>, [">= 0"])
77
78
  s.add_dependency(%q<bundler>, [">= 0"])
78
79
  s.add_dependency(%q<jeweler>, ["~> 2.0.1"])
@@ -81,7 +82,7 @@ Gem::Specification.new do |s|
81
82
  end
82
83
  else
83
84
  s.add_dependency(%q<bio-logger>, [">= 0"])
84
- s.add_dependency(%q<nokogiri>, ["~> 1.6.3"])
85
+ s.add_dependency(%q<nokogiri>, [">= 0"])
85
86
  s.add_dependency(%q<rake>, [">= 0"])
86
87
  s.add_dependency(%q<bundler>, [">= 0"])
87
88
  s.add_dependency(%q<jeweler>, ["~> 2.0.1"])
@@ -1,10 +1,24 @@
1
+ =HEADER
2
+ # BLAST RDF template example
3
+ #
4
+ # options: <%= options.to_h %>
5
+ # files: <%= ARGV %>
6
+ # date: <%= Time.now %>
7
+ # version: <%= BLASTXML_BANNER %>
8
+
9
+ @prefix gene: <http://biobeat.org/rdf/gwp/gene-names#> .
10
+ @prefix : <http://biobeat.org/rdf/megablast#> .
11
+
12
+ =BODY
1
13
  <%
2
14
  blastid = Turtle::mangle_identifier(hit.parent.query_def)
3
15
  id = blastid+'_'+hit.hit_num.to_s
16
+ gene_name = hit.parent.query_def.split(/ /).first
4
17
  %>
5
18
  :<%= blastid %> :query :<%= id %>
6
19
  :<%= id %>
7
- :query_def "<%= hit.parent.query_def %>";
20
+ :query_def "<%= hit.parent.query_def.strip %>";
21
+ gene:gene_name "<%= gene_name %>";
8
22
  :num <%= hit.hit_num %>;
9
23
  :accession "<%= hit.accession %>";
10
24
  :len <%= hit.len %>;
@@ -12,3 +26,5 @@
12
26
  :align_len <%= hsp.align_len %>;
13
27
  :bitscore <%= hsp.bit_score %>;
14
28
  :evalue <%= hsp.evalue %> .
29
+
30
+ =FOOTER
metadata CHANGED
@@ -1,111 +1,126 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: bio-blastxmlparser
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.0.2
4
+ version: 2.0.3
5
+ prerelease:
5
6
  platform: ruby
6
7
  authors:
7
8
  - Pjotr Prins
8
9
  autorequire:
9
10
  bindir: bin
10
11
  cert_chain: []
11
- date: 2014-11-07 00:00:00.000000000 Z
12
+ date: 2015-05-07 00:00:00.000000000 Z
12
13
  dependencies:
13
14
  - !ruby/object:Gem::Dependency
14
15
  name: bio-logger
15
16
  requirement: !ruby/object:Gem::Requirement
17
+ none: false
16
18
  requirements:
17
- - - ">="
19
+ - - ! '>='
18
20
  - !ruby/object:Gem::Version
19
21
  version: '0'
20
22
  type: :runtime
21
23
  prerelease: false
22
24
  version_requirements: !ruby/object:Gem::Requirement
25
+ none: false
23
26
  requirements:
24
- - - ">="
27
+ - - ! '>='
25
28
  - !ruby/object:Gem::Version
26
29
  version: '0'
27
30
  - !ruby/object:Gem::Dependency
28
31
  name: nokogiri
29
32
  requirement: !ruby/object:Gem::Requirement
33
+ none: false
30
34
  requirements:
31
- - - "~>"
35
+ - - ! '>='
32
36
  - !ruby/object:Gem::Version
33
- version: 1.6.3
37
+ version: '0'
34
38
  type: :runtime
35
39
  prerelease: false
36
40
  version_requirements: !ruby/object:Gem::Requirement
41
+ none: false
37
42
  requirements:
38
- - - "~>"
43
+ - - ! '>='
39
44
  - !ruby/object:Gem::Version
40
- version: 1.6.3
45
+ version: '0'
41
46
  - !ruby/object:Gem::Dependency
42
47
  name: rake
43
48
  requirement: !ruby/object:Gem::Requirement
49
+ none: false
44
50
  requirements:
45
- - - ">="
51
+ - - ! '>='
46
52
  - !ruby/object:Gem::Version
47
53
  version: '0'
48
54
  type: :development
49
55
  prerelease: false
50
56
  version_requirements: !ruby/object:Gem::Requirement
57
+ none: false
51
58
  requirements:
52
- - - ">="
59
+ - - ! '>='
53
60
  - !ruby/object:Gem::Version
54
61
  version: '0'
55
62
  - !ruby/object:Gem::Dependency
56
63
  name: bundler
57
64
  requirement: !ruby/object:Gem::Requirement
65
+ none: false
58
66
  requirements:
59
- - - ">="
67
+ - - ! '>='
60
68
  - !ruby/object:Gem::Version
61
69
  version: '0'
62
70
  type: :development
63
71
  prerelease: false
64
72
  version_requirements: !ruby/object:Gem::Requirement
73
+ none: false
65
74
  requirements:
66
- - - ">="
75
+ - - ! '>='
67
76
  - !ruby/object:Gem::Version
68
77
  version: '0'
69
78
  - !ruby/object:Gem::Dependency
70
79
  name: jeweler
71
80
  requirement: !ruby/object:Gem::Requirement
81
+ none: false
72
82
  requirements:
73
- - - "~>"
83
+ - - ~>
74
84
  - !ruby/object:Gem::Version
75
85
  version: 2.0.1
76
86
  type: :development
77
87
  prerelease: false
78
88
  version_requirements: !ruby/object:Gem::Requirement
89
+ none: false
79
90
  requirements:
80
- - - "~>"
91
+ - - ~>
81
92
  - !ruby/object:Gem::Version
82
93
  version: 2.0.1
83
94
  - !ruby/object:Gem::Dependency
84
95
  name: rspec
85
96
  requirement: !ruby/object:Gem::Requirement
97
+ none: false
86
98
  requirements:
87
- - - ">="
99
+ - - ! '>='
88
100
  - !ruby/object:Gem::Version
89
101
  version: '0'
90
102
  type: :development
91
103
  prerelease: false
92
104
  version_requirements: !ruby/object:Gem::Requirement
105
+ none: false
93
106
  requirements:
94
- - - ">="
107
+ - - ! '>='
95
108
  - !ruby/object:Gem::Version
96
109
  version: '0'
97
110
  - !ruby/object:Gem::Dependency
98
111
  name: rdoc
99
112
  requirement: !ruby/object:Gem::Requirement
113
+ none: false
100
114
  requirements:
101
- - - ">="
115
+ - - ! '>='
102
116
  - !ruby/object:Gem::Version
103
117
  version: '0'
104
118
  type: :development
105
119
  prerelease: false
106
120
  version_requirements: !ruby/object:Gem::Requirement
121
+ none: false
107
122
  requirements:
108
- - - ">="
123
+ - - ! '>='
109
124
  - !ruby/object:Gem::Version
110
125
  version: '0'
111
126
  description: Fast big data BLAST XML parser and library; this libxml2 based version
@@ -118,9 +133,9 @@ extra_rdoc_files:
118
133
  - LICENSE.txt
119
134
  - README.md
120
135
  files:
121
- - ".document"
122
- - ".rspec"
123
- - ".travis.yml"
136
+ - .document
137
+ - .rspec
138
+ - .travis.yml
124
139
  - Gemfile
125
140
  - Gemfile.lock
126
141
  - LICENSE.txt
@@ -155,25 +170,26 @@ files:
155
170
  homepage: http://github.com/pjotrp/blastxmlparser
156
171
  licenses:
157
172
  - MIT
158
- metadata: {}
159
173
  post_install_message:
160
174
  rdoc_options: []
161
175
  require_paths:
162
176
  - lib
163
177
  required_ruby_version: !ruby/object:Gem::Requirement
178
+ none: false
164
179
  requirements:
165
- - - ">="
180
+ - - ! '>='
166
181
  - !ruby/object:Gem::Version
167
- version: '0'
182
+ version: 1.9.2
168
183
  required_rubygems_version: !ruby/object:Gem::Requirement
184
+ none: false
169
185
  requirements:
170
- - - ">="
186
+ - - ! '>='
171
187
  - !ruby/object:Gem::Version
172
188
  version: '0'
173
189
  requirements: []
174
190
  rubyforge_project:
175
- rubygems_version: 2.0.3
191
+ rubygems_version: 1.8.23
176
192
  signing_key:
177
- specification_version: 4
193
+ specification_version: 3
178
194
  summary: Very fast parallel BLAST XML to RDF/HTML/JSON/YAML/csv transformer
179
195
  test_files: []
checksums.yaml DELETED
@@ -1,7 +0,0 @@
1
- ---
2
- SHA1:
3
- metadata.gz: 76df7cd1f6e1bc4f2b04fdf3f0fc830110f2e376
4
- data.tar.gz: 68f44a797aa5357690e6c4a10a1fe241a7b8fe37
5
- SHA512:
6
- metadata.gz: f424b8cfedf921840dbf2fee412c191f1c9951d289b80db047744a56883b171b50d4b1161e86415f0e873be4bd43935031f966d32d394efea9208e65b2529903
7
- data.tar.gz: 6214606ac08afa7306503c78e969046ab2a54d9c45e9dc42c597d8c033db2095fc5a25f15f56090a0337bd1049081699a81ceefa90d2acaf17cb782340074fc6