simple_bioc 0.0.23 → 0.0.24
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +31 -11
- data/lib/simple_bioc/pub_ann_writer.rb +2 -2
- data/lib/simple_bioc/version.rb +1 -1
- data/samples/convert_pubann.rb +4 -5
- data/simple_bioc.gemspec +2 -3
- data/spec/pubann_spec.rb +0 -1
- metadata +11 -26
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 6adbc57804c5758eb35f41ed1f4094feb7a3ea2f
|
4
|
+
data.tar.gz: 4fed4c229116b2955d80741efb08754db6620c78
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 85fc7d5ad789c553afcc86d97200b4c78424fbfeca3c96880618d3928a5034cfdab571242b0732d2ffbdec7eb13147c988aabe10e1fa7cac3ade1e94dfc3af34
|
7
|
+
data.tar.gz: 00733eac042e12a046ce8e4c4d5813c1f42ee33898c8ea64e38bbad7484c3129c10f1417bc9387b962a9006c566c92e99dd88bcf74b3183dcdb3acfbdfe5b26f
|
data/README.md
CHANGED
@@ -7,6 +7,7 @@ SimpleBioC is a simple parser / builder for BioC data format. BioC is a simple X
|
|
7
7
|
* Parse & convert a BioC document to an object instance compatible to BioC DTD
|
8
8
|
* Use plain ruby objects for simplicity
|
9
9
|
* Build a BioC document from an object instance
|
10
|
+
* Convert BioC to PubAnnotation JSON
|
10
11
|
|
11
12
|
|
12
13
|
## Installation
|
@@ -36,40 +37,41 @@ Parse with a file name (path)
|
|
36
37
|
collection = SimpleBioC::from_xml(filename)
|
37
38
|
|
38
39
|
Traverse & Manipulate Data. Data structure are almost the same as the DTD. Please refer [library documents](http://rubydoc.info/gems/simple_bioc/0.0.2/frames) and [the BioC DTD](http://www.ncbi.nlm.nih.gov/CBBresearch/Dogan/BioC/BioCDTD.html).
|
39
|
-
|
40
|
+
```ruby
|
40
41
|
puts collection.documents[2].passages[0].text
|
41
|
-
|
42
|
+
```
|
42
43
|
Build XML text from data
|
43
|
-
|
44
|
+
```ruby
|
44
45
|
puts SimpleBioC::to_xml(collection)
|
45
|
-
|
46
|
+
```
|
46
47
|
Convert PubAnnotation JSON from data
|
47
48
|
|
49
|
+
```ruby
|
48
50
|
puts SimpleBioC::to_pubann(collection, {
|
49
51
|
sourcedb: 'PubMed',
|
50
52
|
target: 'http://pubannotation.org/docs/sourcedb/PubMed/sourceid/18034444',
|
51
53
|
project: 'Ab3P-abbreviations'
|
52
54
|
}))
|
53
|
-
|
55
|
+
```
|
54
56
|
## Options
|
55
57
|
|
56
58
|
### Specify set of <document>s to parse
|
57
59
|
|
58
60
|
You can parse only a set of document elements in a large xml document instead of parsing all the document elements. It may decrease the processing time. For example, the following code will return a collection with two documents ("1234", "4567").
|
59
|
-
|
61
|
+
```ruby
|
60
62
|
collection = SimpleBioc::from_xml(filename, {documents: ["1234", "4567"]})
|
61
|
-
|
63
|
+
```
|
62
64
|
### No whitespace in output
|
63
65
|
|
64
66
|
By default, outputs of SimpleBioC::to_xml() will be formatted with whitespace. If you do not want this whitespace, you should pass 'save_with' option with 0 to the to_xml() function.
|
65
|
-
|
67
|
+
```ruby
|
66
68
|
puts SimpleBioC::to_xml(collection, {save_with:0})
|
67
|
-
|
69
|
+
```
|
68
70
|
|
69
71
|
## Sample
|
70
72
|
|
71
73
|
More samples can be found in Samples directory
|
72
|
-
|
74
|
+
```ruby
|
73
75
|
require 'simple_bioc'
|
74
76
|
|
75
77
|
# Sample1: parse, traverse, manipulate, and build BioC data
|
@@ -103,7 +105,25 @@ More samples can be found in Samples directory
|
|
103
105
|
# build BioC document from data
|
104
106
|
xml = SimpleBioC.to_xml(collection)
|
105
107
|
puts xml
|
106
|
-
|
108
|
+
```
|
109
|
+
|
110
|
+
## Sample2: PubAnnotation Converter (convert_pubann.rb)
|
111
|
+
```ruby
|
112
|
+
# convert document to PubAnnotation JSON
|
113
|
+
require 'simple_bioc'
|
114
|
+
|
115
|
+
if ARGF.argv.size < 1
|
116
|
+
puts "usage: ruby convert_pubann.rb {filepath}"
|
117
|
+
exit
|
118
|
+
end
|
119
|
+
|
120
|
+
collection = SimpleBioC::from_xml(ARGF.argv[0])
|
121
|
+
puts SimpleBioC::to_pubann(collection, {
|
122
|
+
sourcedb: 'PubMed',
|
123
|
+
target: 'http://pubannotation.org/docs/sourcedb/PubMed/sourceid/18034444',
|
124
|
+
project: 'Ab3P-abbreviations'
|
125
|
+
})
|
126
|
+
```
|
107
127
|
|
108
128
|
## Contributing
|
109
129
|
|
@@ -25,8 +25,8 @@ module PubAnnWriter
|
|
25
25
|
json.denotations document.all_annotations do |a|
|
26
26
|
a.locations.each do |l|
|
27
27
|
json.span do
|
28
|
-
json.begin l.offset
|
29
|
-
json.end l.offset + l.length
|
28
|
+
json.begin l.offset.to_i
|
29
|
+
json.end l.offset.to_i + l.length.to_i
|
30
30
|
end
|
31
31
|
json.obj a.infons.map{|k,v| v}.join(",")
|
32
32
|
json.id a.id unless a.id.nil?
|
data/lib/simple_bioc/version.rb
CHANGED
data/samples/convert_pubann.rb
CHANGED
@@ -1,15 +1,14 @@
|
|
1
|
-
#
|
2
|
-
|
1
|
+
# convert document to PubAnnotation JSON
|
3
2
|
require 'simple_bioc'
|
4
3
|
|
5
4
|
if ARGF.argv.size < 1
|
6
|
-
puts "usage: ruby
|
5
|
+
puts "usage: ruby convert_pubann.rb {filepath}"
|
7
6
|
exit
|
8
7
|
end
|
9
8
|
|
10
9
|
collection = SimpleBioC::from_xml(ARGF.argv[0])
|
11
|
-
puts SimpleBioC::
|
10
|
+
puts SimpleBioC::to_pubann(collection, {
|
12
11
|
sourcedb: 'PubMed',
|
13
12
|
target: 'http://pubannotation.org/docs/sourcedb/PubMed/sourceid/18034444',
|
14
13
|
project: 'Ab3P-abbreviations'
|
15
|
-
})
|
14
|
+
})
|
data/simple_bioc.gemspec
CHANGED
@@ -22,10 +22,9 @@ Gem::Specification.new do |spec|
|
|
22
22
|
spec.add_dependency "jbuilder", "~> 2.3"
|
23
23
|
|
24
24
|
spec.add_development_dependency "bundler", "~> 1.3"
|
25
|
-
spec.add_development_dependency "rake", "
|
26
|
-
spec.add_development_dependency "yard", "
|
25
|
+
spec.add_development_dependency "rake", ">= 12.3.3"
|
26
|
+
spec.add_development_dependency "yard", ">= 0.9.20"
|
27
27
|
spec.add_development_dependency "rspec", "~> 3.2"
|
28
28
|
spec.add_development_dependency("test_xml", ["~> 0.1"])
|
29
29
|
spec.add_development_dependency "json-compare", "~> 0.1"
|
30
|
-
spec.add_development_dependency "yajl-ruby", "~> 1.2"
|
31
30
|
end
|
data/spec/pubann_spec.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: simple_bioc
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.24
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Dongseop Kwon
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2020-05-21 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: nokogiri
|
@@ -56,30 +56,30 @@ dependencies:
|
|
56
56
|
name: rake
|
57
57
|
requirement: !ruby/object:Gem::Requirement
|
58
58
|
requirements:
|
59
|
-
- - "
|
59
|
+
- - ">="
|
60
60
|
- !ruby/object:Gem::Version
|
61
|
-
version:
|
61
|
+
version: 12.3.3
|
62
62
|
type: :development
|
63
63
|
prerelease: false
|
64
64
|
version_requirements: !ruby/object:Gem::Requirement
|
65
65
|
requirements:
|
66
|
-
- - "
|
66
|
+
- - ">="
|
67
67
|
- !ruby/object:Gem::Version
|
68
|
-
version:
|
68
|
+
version: 12.3.3
|
69
69
|
- !ruby/object:Gem::Dependency
|
70
70
|
name: yard
|
71
71
|
requirement: !ruby/object:Gem::Requirement
|
72
72
|
requirements:
|
73
|
-
- - "
|
73
|
+
- - ">="
|
74
74
|
- !ruby/object:Gem::Version
|
75
|
-
version:
|
75
|
+
version: 0.9.20
|
76
76
|
type: :development
|
77
77
|
prerelease: false
|
78
78
|
version_requirements: !ruby/object:Gem::Requirement
|
79
79
|
requirements:
|
80
|
-
- - "
|
80
|
+
- - ">="
|
81
81
|
- !ruby/object:Gem::Version
|
82
|
-
version:
|
82
|
+
version: 0.9.20
|
83
83
|
- !ruby/object:Gem::Dependency
|
84
84
|
name: rspec
|
85
85
|
requirement: !ruby/object:Gem::Requirement
|
@@ -122,20 +122,6 @@ dependencies:
|
|
122
122
|
- - "~>"
|
123
123
|
- !ruby/object:Gem::Version
|
124
124
|
version: '0.1'
|
125
|
-
- !ruby/object:Gem::Dependency
|
126
|
-
name: yajl-ruby
|
127
|
-
requirement: !ruby/object:Gem::Requirement
|
128
|
-
requirements:
|
129
|
-
- - "~>"
|
130
|
-
- !ruby/object:Gem::Version
|
131
|
-
version: '1.2'
|
132
|
-
type: :development
|
133
|
-
prerelease: false
|
134
|
-
version_requirements: !ruby/object:Gem::Requirement
|
135
|
-
requirements:
|
136
|
-
- - "~>"
|
137
|
-
- !ruby/object:Gem::Version
|
138
|
-
version: '1.2'
|
139
125
|
description: SimpleBioC is a simple parser / builder for BioC data format. BioC is
|
140
126
|
a simple XML format to share text documents and annotations. You can find more information
|
141
127
|
about BioC from the official BioC web site (http://www.ncbi.nlm.nih.gov/CBBresearch/Dogan/BioC/)
|
@@ -295,7 +281,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
295
281
|
version: '0'
|
296
282
|
requirements: []
|
297
283
|
rubyforge_project:
|
298
|
-
rubygems_version: 2.
|
284
|
+
rubygems_version: 2.6.13
|
299
285
|
signing_key:
|
300
286
|
specification_version: 4
|
301
287
|
summary: Simple BioC parser/builder for ruby
|
@@ -303,4 +289,3 @@ test_files:
|
|
303
289
|
- spec/file_check_spec.rb
|
304
290
|
- spec/pubann_spec.rb
|
305
291
|
- spec/simple_bioc_spec.rb
|
306
|
-
has_rdoc:
|