simple_bioc 0.0.23 → 0.0.24
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +31 -11
- data/lib/simple_bioc/pub_ann_writer.rb +2 -2
- data/lib/simple_bioc/version.rb +1 -1
- data/samples/convert_pubann.rb +4 -5
- data/simple_bioc.gemspec +2 -3
- data/spec/pubann_spec.rb +0 -1
- metadata +11 -26
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 6adbc57804c5758eb35f41ed1f4094feb7a3ea2f
|
4
|
+
data.tar.gz: 4fed4c229116b2955d80741efb08754db6620c78
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 85fc7d5ad789c553afcc86d97200b4c78424fbfeca3c96880618d3928a5034cfdab571242b0732d2ffbdec7eb13147c988aabe10e1fa7cac3ade1e94dfc3af34
|
7
|
+
data.tar.gz: 00733eac042e12a046ce8e4c4d5813c1f42ee33898c8ea64e38bbad7484c3129c10f1417bc9387b962a9006c566c92e99dd88bcf74b3183dcdb3acfbdfe5b26f
|
data/README.md
CHANGED
@@ -7,6 +7,7 @@ SimpleBioC is a simple parser / builder for BioC data format. BioC is a simple X
|
|
7
7
|
* Parse & convert a BioC document to an object instance compatible to BioC DTD
|
8
8
|
* Use plain ruby objects for simplicity
|
9
9
|
* Build a BioC document from an object instance
|
10
|
+
* Convert BioC to PubAnnotation JSON
|
10
11
|
|
11
12
|
|
12
13
|
## Installation
|
@@ -36,40 +37,41 @@ Parse with a file name (path)
|
|
36
37
|
collection = SimpleBioC::from_xml(filename)
|
37
38
|
|
38
39
|
Traverse & Manipulate Data. Data structure are almost the same as the DTD. Please refer [library documents](http://rubydoc.info/gems/simple_bioc/0.0.2/frames) and [the BioC DTD](http://www.ncbi.nlm.nih.gov/CBBresearch/Dogan/BioC/BioCDTD.html).
|
39
|
-
|
40
|
+
```ruby
|
40
41
|
puts collection.documents[2].passages[0].text
|
41
|
-
|
42
|
+
```
|
42
43
|
Build XML text from data
|
43
|
-
|
44
|
+
```ruby
|
44
45
|
puts SimpleBioC::to_xml(collection)
|
45
|
-
|
46
|
+
```
|
46
47
|
Convert PubAnnotation JSON from data
|
47
48
|
|
49
|
+
```ruby
|
48
50
|
puts SimpleBioC::to_pubann(collection, {
|
49
51
|
sourcedb: 'PubMed',
|
50
52
|
target: 'http://pubannotation.org/docs/sourcedb/PubMed/sourceid/18034444',
|
51
53
|
project: 'Ab3P-abbreviations'
|
52
54
|
}))
|
53
|
-
|
55
|
+
```
|
54
56
|
## Options
|
55
57
|
|
56
58
|
### Specify set of <document>s to parse
|
57
59
|
|
58
60
|
You can parse only a set of document elements in a large xml document instead of parsing all the document elements. It may decrease the processing time. For example, the following code will return a collection with two documents ("1234", "4567").
|
59
|
-
|
61
|
+
```ruby
|
60
62
|
collection = SimpleBioc::from_xml(filename, {documents: ["1234", "4567"]})
|
61
|
-
|
63
|
+
```
|
62
64
|
### No whitespace in output
|
63
65
|
|
64
66
|
By default, outputs of SimpleBioC::to_xml() will be formatted with whitespace. If you do not want this whitespace, you should pass 'save_with' option with 0 to the to_xml() function.
|
65
|
-
|
67
|
+
```ruby
|
66
68
|
puts SimpleBioC::to_xml(collection, {save_with:0})
|
67
|
-
|
69
|
+
```
|
68
70
|
|
69
71
|
## Sample
|
70
72
|
|
71
73
|
More samples can be found in Samples directory
|
72
|
-
|
74
|
+
```ruby
|
73
75
|
require 'simple_bioc'
|
74
76
|
|
75
77
|
# Sample1: parse, traverse, manipulate, and build BioC data
|
@@ -103,7 +105,25 @@ More samples can be found in Samples directory
|
|
103
105
|
# build BioC document from data
|
104
106
|
xml = SimpleBioC.to_xml(collection)
|
105
107
|
puts xml
|
106
|
-
|
108
|
+
```
|
109
|
+
|
110
|
+
## Sample2: PubAnnotation Converter (convert_pubann.rb)
|
111
|
+
```ruby
|
112
|
+
# convert document to PubAnnotation JSON
|
113
|
+
require 'simple_bioc'
|
114
|
+
|
115
|
+
if ARGF.argv.size < 1
|
116
|
+
puts "usage: ruby convert_pubann.rb {filepath}"
|
117
|
+
exit
|
118
|
+
end
|
119
|
+
|
120
|
+
collection = SimpleBioC::from_xml(ARGF.argv[0])
|
121
|
+
puts SimpleBioC::to_pubann(collection, {
|
122
|
+
sourcedb: 'PubMed',
|
123
|
+
target: 'http://pubannotation.org/docs/sourcedb/PubMed/sourceid/18034444',
|
124
|
+
project: 'Ab3P-abbreviations'
|
125
|
+
})
|
126
|
+
```
|
107
127
|
|
108
128
|
## Contributing
|
109
129
|
|
@@ -25,8 +25,8 @@ module PubAnnWriter
|
|
25
25
|
json.denotations document.all_annotations do |a|
|
26
26
|
a.locations.each do |l|
|
27
27
|
json.span do
|
28
|
-
json.begin l.offset
|
29
|
-
json.end l.offset + l.length
|
28
|
+
json.begin l.offset.to_i
|
29
|
+
json.end l.offset.to_i + l.length.to_i
|
30
30
|
end
|
31
31
|
json.obj a.infons.map{|k,v| v}.join(",")
|
32
32
|
json.id a.id unless a.id.nil?
|
data/lib/simple_bioc/version.rb
CHANGED
data/samples/convert_pubann.rb
CHANGED
@@ -1,15 +1,14 @@
|
|
1
|
-
#
|
2
|
-
|
1
|
+
# convert document to PubAnnotation JSON
|
3
2
|
require 'simple_bioc'
|
4
3
|
|
5
4
|
if ARGF.argv.size < 1
|
6
|
-
puts "usage: ruby
|
5
|
+
puts "usage: ruby convert_pubann.rb {filepath}"
|
7
6
|
exit
|
8
7
|
end
|
9
8
|
|
10
9
|
collection = SimpleBioC::from_xml(ARGF.argv[0])
|
11
|
-
puts SimpleBioC::
|
10
|
+
puts SimpleBioC::to_pubann(collection, {
|
12
11
|
sourcedb: 'PubMed',
|
13
12
|
target: 'http://pubannotation.org/docs/sourcedb/PubMed/sourceid/18034444',
|
14
13
|
project: 'Ab3P-abbreviations'
|
15
|
-
})
|
14
|
+
})
|
data/simple_bioc.gemspec
CHANGED
@@ -22,10 +22,9 @@ Gem::Specification.new do |spec|
|
|
22
22
|
spec.add_dependency "jbuilder", "~> 2.3"
|
23
23
|
|
24
24
|
spec.add_development_dependency "bundler", "~> 1.3"
|
25
|
-
spec.add_development_dependency "rake", "
|
26
|
-
spec.add_development_dependency "yard", "
|
25
|
+
spec.add_development_dependency "rake", ">= 12.3.3"
|
26
|
+
spec.add_development_dependency "yard", ">= 0.9.20"
|
27
27
|
spec.add_development_dependency "rspec", "~> 3.2"
|
28
28
|
spec.add_development_dependency("test_xml", ["~> 0.1"])
|
29
29
|
spec.add_development_dependency "json-compare", "~> 0.1"
|
30
|
-
spec.add_development_dependency "yajl-ruby", "~> 1.2"
|
31
30
|
end
|
data/spec/pubann_spec.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: simple_bioc
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.24
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Dongseop Kwon
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2020-05-21 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: nokogiri
|
@@ -56,30 +56,30 @@ dependencies:
|
|
56
56
|
name: rake
|
57
57
|
requirement: !ruby/object:Gem::Requirement
|
58
58
|
requirements:
|
59
|
-
- - "
|
59
|
+
- - ">="
|
60
60
|
- !ruby/object:Gem::Version
|
61
|
-
version:
|
61
|
+
version: 12.3.3
|
62
62
|
type: :development
|
63
63
|
prerelease: false
|
64
64
|
version_requirements: !ruby/object:Gem::Requirement
|
65
65
|
requirements:
|
66
|
-
- - "
|
66
|
+
- - ">="
|
67
67
|
- !ruby/object:Gem::Version
|
68
|
-
version:
|
68
|
+
version: 12.3.3
|
69
69
|
- !ruby/object:Gem::Dependency
|
70
70
|
name: yard
|
71
71
|
requirement: !ruby/object:Gem::Requirement
|
72
72
|
requirements:
|
73
|
-
- - "
|
73
|
+
- - ">="
|
74
74
|
- !ruby/object:Gem::Version
|
75
|
-
version:
|
75
|
+
version: 0.9.20
|
76
76
|
type: :development
|
77
77
|
prerelease: false
|
78
78
|
version_requirements: !ruby/object:Gem::Requirement
|
79
79
|
requirements:
|
80
|
-
- - "
|
80
|
+
- - ">="
|
81
81
|
- !ruby/object:Gem::Version
|
82
|
-
version:
|
82
|
+
version: 0.9.20
|
83
83
|
- !ruby/object:Gem::Dependency
|
84
84
|
name: rspec
|
85
85
|
requirement: !ruby/object:Gem::Requirement
|
@@ -122,20 +122,6 @@ dependencies:
|
|
122
122
|
- - "~>"
|
123
123
|
- !ruby/object:Gem::Version
|
124
124
|
version: '0.1'
|
125
|
-
- !ruby/object:Gem::Dependency
|
126
|
-
name: yajl-ruby
|
127
|
-
requirement: !ruby/object:Gem::Requirement
|
128
|
-
requirements:
|
129
|
-
- - "~>"
|
130
|
-
- !ruby/object:Gem::Version
|
131
|
-
version: '1.2'
|
132
|
-
type: :development
|
133
|
-
prerelease: false
|
134
|
-
version_requirements: !ruby/object:Gem::Requirement
|
135
|
-
requirements:
|
136
|
-
- - "~>"
|
137
|
-
- !ruby/object:Gem::Version
|
138
|
-
version: '1.2'
|
139
125
|
description: SimpleBioC is a simple parser / builder for BioC data format. BioC is
|
140
126
|
a simple XML format to share text documents and annotations. You can find more information
|
141
127
|
about BioC from the official BioC web site (http://www.ncbi.nlm.nih.gov/CBBresearch/Dogan/BioC/)
|
@@ -295,7 +281,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
295
281
|
version: '0'
|
296
282
|
requirements: []
|
297
283
|
rubyforge_project:
|
298
|
-
rubygems_version: 2.
|
284
|
+
rubygems_version: 2.6.13
|
299
285
|
signing_key:
|
300
286
|
specification_version: 4
|
301
287
|
summary: Simple BioC parser/builder for ruby
|
@@ -303,4 +289,3 @@ test_files:
|
|
303
289
|
- spec/file_check_spec.rb
|
304
290
|
- spec/pubann_spec.rb
|
305
291
|
- spec/simple_bioc_spec.rb
|
306
|
-
has_rdoc:
|