simple_bioc 0.0.2 → 0.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +17 -3
- data/Rakefile +2 -1
- data/html/BioCReader.html +40 -30
- data/html/BioCWriter.html +14 -12
- data/html/README_md.html +31 -5
- data/html/SimpleBioC/NodeBase.html +3 -3
- data/html/SimpleBioC.html +48 -9
- data/html/created.rid +7 -7
- data/html/index.html +26 -3
- data/html/js/search_index.js +1 -1
- data/html/table_of_contents.html +5 -2
- data/lib/simple_bioc/bioc_reader.rb +24 -14
- data/lib/simple_bioc/bioc_writer.rb +4 -2
- data/lib/simple_bioc/version.rb +1 -1
- data/lib/simple_bioc.rb +29 -5
- data/simple_bioc.gemspec +1 -1
- data/spec/file_check_spec.rb +13 -0
- data/spec/simple_bioc_spec.rb +21 -10
- metadata +8 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: ac0961af71c5744bc71ea16221f5325af35e4583
|
4
|
+
data.tar.gz: 9f407b566a8dc6687d53f8145fc600f3d73039eb
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: ab77aa143472f5959a0ea97702ae68dba76bb6b75542086a065b625216234d8912a52d83a087f6acf3c553f0ee8adf5050ebafd94833171d9dd84d7a34124e0a
|
7
|
+
data.tar.gz: b02aa1b1d73ff1d0c6ed2650c0170ff88e22fd01a6e6df7d9508cf365e548bd6bf0413616a80a765b2fc35f9d3e1da2d20edf41cece737ceeb26577d39068251
|
data/README.md
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
#
|
1
|
+
# SimpleBioC
|
2
2
|
|
3
3
|
SimpleBioC is a simple parser / builder for BioC data format. BioC is a simple XML format to share text documents and annotations. You can find more information about BioC from the official BioC web site ([http://www.ncbi.nlm.nih.gov/CBBresearch/Dogan/BioC/](http://www.ncbi.nlm.nih.gov/CBBresearch/Dogan/BioC/))
|
4
4
|
|
@@ -24,7 +24,7 @@ Or install it yourself as:
|
|
24
24
|
$ gem install simple_bioc
|
25
25
|
|
26
26
|
|
27
|
-
## Usages
|
27
|
+
## Simple Usages
|
28
28
|
|
29
29
|
Include library
|
30
30
|
|
@@ -35,7 +35,7 @@ Parse with a file name (path)
|
|
35
35
|
|
36
36
|
collection = SimpleBioC::from_xml(filename)
|
37
37
|
|
38
|
-
Traverse & Manipulate Data. Data structure are almost the same as the DTD. Please refer [library documents](
|
38
|
+
Traverse & Manipulate Data. Data structure are almost the same as the DTD. Please refer [library documents](http://rubydoc.info/gems/simple_bioc/0.0.2/frames) and [the BioC DTD](http://www.ncbi.nlm.nih.gov/CBBresearch/Dogan/BioC/BioCDTD.html).
|
39
39
|
|
40
40
|
puts collection.documents[2].passages[0].text
|
41
41
|
|
@@ -43,6 +43,20 @@ Build XML text from data
|
|
43
43
|
|
44
44
|
puts SimpleBioC::to_xml(collection)
|
45
45
|
|
46
|
+
## Options
|
47
|
+
|
48
|
+
### Specify set of <document>s to parse
|
49
|
+
|
50
|
+
You can parse only a set of document elements in a large xml document instead of parsing all the document elements. It may decrease the processing time. For example, the following code will return a collection with two documents ("1234", "4567").
|
51
|
+
|
52
|
+
collection = SimpleBioc::from_xml(filename, {documents: ["1234", "4567"]})
|
53
|
+
|
54
|
+
### No whitespace in output
|
55
|
+
|
56
|
+
By default, outputs of SimpleBioC::to_xml() will be formatted with whitespace. If you do not want this whitespace, you should pass 'save_with' option with 0 to the to_xml() function.
|
57
|
+
|
58
|
+
puts SimpleBioC::to_xml(collection, {save_with:0})
|
59
|
+
|
46
60
|
|
47
61
|
## Sample
|
48
62
|
|
data/Rakefile
CHANGED
@@ -1,6 +1,7 @@
|
|
1
1
|
require "bundler/gem_tasks"
|
2
2
|
require 'rspec/core/rake_task'
|
3
3
|
require 'rdoc/task'
|
4
|
+
require 'yard'
|
4
5
|
|
5
6
|
task :default => [:spec]
|
6
7
|
RSpec::Core::RakeTask.new do |t|
|
@@ -10,4 +11,4 @@ end
|
|
10
11
|
Rake::RDocTask.new do |rd|
|
11
12
|
rd.main = "README.md"
|
12
13
|
rd.rdoc_files.include("README.md", "lib/**/*.rb")
|
13
|
-
end
|
14
|
+
end
|
data/html/BioCReader.html
CHANGED
@@ -173,7 +173,7 @@
|
|
173
173
|
|
174
174
|
<div class="method-heading">
|
175
175
|
<span class="method-name">read</span><span
|
176
|
-
class="method-args">(path)</span>
|
176
|
+
class="method-args">(path, options)</span>
|
177
177
|
|
178
178
|
<span class="method-click-advice">click to toggle source</span>
|
179
179
|
|
@@ -189,7 +189,7 @@
|
|
189
189
|
|
190
190
|
<div class="method-source-code" id="read-source">
|
191
191
|
<pre><span class="ruby-comment"># File lib/simple_bioc/bioc_reader.rb, line 7</span>
|
192
|
-
<span class="ruby-keyword">def</span> <span class="ruby-identifier">read</span>(<span class="ruby-identifier">path</span>)
|
192
|
+
<span class="ruby-keyword">def</span> <span class="ruby-identifier">read</span>(<span class="ruby-identifier">path</span>, <span class="ruby-identifier">options</span>)
|
193
193
|
<span class="ruby-identifier">collection</span> = <span class="ruby-keyword">nil</span>
|
194
194
|
<span class="ruby-constant">File</span>.<span class="ruby-identifier">open</span>(<span class="ruby-identifier">path</span>) <span class="ruby-keyword">do</span> <span class="ruby-operator">|</span><span class="ruby-identifier">file</span><span class="ruby-operator">|</span>
|
195
195
|
<span class="ruby-identifier">xml_doc</span> = <span class="ruby-constant">Nokogiri</span><span class="ruby-operator">::</span><span class="ruby-constant">XML</span>(<span class="ruby-identifier">file</span>) <span class="ruby-keyword">do</span> <span class="ruby-operator">|</span><span class="ruby-identifier">config</span><span class="ruby-operator">|</span>
|
@@ -200,7 +200,7 @@
|
|
200
200
|
<span class="ruby-identifier">fail</span> <span class="ruby-string">'Wrong format'</span>
|
201
201
|
<span class="ruby-keyword">end</span>
|
202
202
|
<span class="ruby-identifier">collection</span> = <span class="ruby-constant">SimpleBioC</span><span class="ruby-operator">::</span><span class="ruby-constant">Collection</span>.<span class="ruby-identifier">new</span>
|
203
|
-
<span class="ruby-identifier">read_collection</span>(<span class="ruby-identifier">xml</span>, <span class="ruby-identifier">collection</span>)
|
203
|
+
<span class="ruby-identifier">read_collection</span>(<span class="ruby-identifier">xml</span>, <span class="ruby-identifier">collection</span>, <span class="ruby-identifier">options</span>)
|
204
204
|
<span class="ruby-keyword">end</span>
|
205
205
|
|
206
206
|
<span class="ruby-identifier">collection</span>
|
@@ -219,7 +219,7 @@
|
|
219
219
|
|
220
220
|
<div class="method-heading">
|
221
221
|
<span class="method-name">read_annotation</span><span
|
222
|
-
class="method-args">(xml, annotation)</span>
|
222
|
+
class="method-args">(xml, annotation, options = {})</span>
|
223
223
|
|
224
224
|
<span class="method-click-advice">click to toggle source</span>
|
225
225
|
|
@@ -234,12 +234,13 @@
|
|
234
234
|
|
235
235
|
|
236
236
|
<div class="method-source-code" id="read_annotation-source">
|
237
|
-
<pre><span class="ruby-comment"># File lib/simple_bioc/bioc_reader.rb, line
|
238
|
-
<span class="ruby-keyword">def</span> <span class="ruby-identifier">read_annotation</span>(<span class="ruby-identifier">xml</span>, <span class="ruby-identifier">annotation</span>)
|
237
|
+
<pre><span class="ruby-comment"># File lib/simple_bioc/bioc_reader.rb, line 86</span>
|
238
|
+
<span class="ruby-keyword">def</span> <span class="ruby-identifier">read_annotation</span>(<span class="ruby-identifier">xml</span>, <span class="ruby-identifier">annotation</span>, <span class="ruby-identifier">options</span> = {})
|
239
239
|
<span class="ruby-identifier">annotation</span>.<span class="ruby-identifier">id</span> = <span class="ruby-identifier">xml</span>[<span class="ruby-string">"id"</span>]
|
240
240
|
<span class="ruby-identifier">annotation</span>.<span class="ruby-identifier">text</span> = <span class="ruby-identifier">read_text</span>(<span class="ruby-identifier">xml</span>, <span class="ruby-string">"text"</span>)
|
241
241
|
<span class="ruby-identifier">read_infon</span>(<span class="ruby-identifier">xml</span>, <span class="ruby-identifier">annotation</span>)
|
242
242
|
<span class="ruby-identifier">read_recursive</span>(<span class="ruby-identifier">xml</span>, <span class="ruby-identifier">annotation</span>, <span class="ruby-string">"location"</span>)
|
243
|
+
<span class="ruby-keyword">true</span>
|
243
244
|
<span class="ruby-keyword">end</span></pre>
|
244
245
|
</div><!-- read_annotation-source -->
|
245
246
|
|
@@ -255,7 +256,7 @@
|
|
255
256
|
|
256
257
|
<div class="method-heading">
|
257
258
|
<span class="method-name">read_collection</span><span
|
258
|
-
class="method-args">(xml, collection)</span>
|
259
|
+
class="method-args">(xml, collection, options = {})</span>
|
259
260
|
|
260
261
|
<span class="method-click-advice">click to toggle source</span>
|
261
262
|
|
@@ -271,12 +272,12 @@
|
|
271
272
|
|
272
273
|
<div class="method-source-code" id="read_collection-source">
|
273
274
|
<pre><span class="ruby-comment"># File lib/simple_bioc/bioc_reader.rb, line 47</span>
|
274
|
-
<span class="ruby-keyword">def</span> <span class="ruby-identifier">read_collection</span>(<span class="ruby-identifier">xml</span>, <span class="ruby-identifier">collection</span>)
|
275
|
+
<span class="ruby-keyword">def</span> <span class="ruby-identifier">read_collection</span>(<span class="ruby-identifier">xml</span>, <span class="ruby-identifier">collection</span>, <span class="ruby-identifier">options</span> = {})
|
275
276
|
<span class="ruby-identifier">collection</span>.<span class="ruby-identifier">source</span> = <span class="ruby-identifier">read_text</span>(<span class="ruby-identifier">xml</span>, <span class="ruby-string">"source"</span>)
|
276
277
|
<span class="ruby-identifier">collection</span>.<span class="ruby-identifier">date</span> = <span class="ruby-identifier">read_text</span>(<span class="ruby-identifier">xml</span>, <span class="ruby-string">"date"</span>)
|
277
278
|
<span class="ruby-identifier">collection</span>.<span class="ruby-identifier">key</span> = <span class="ruby-identifier">read_text</span>(<span class="ruby-identifier">xml</span>, <span class="ruby-string">"key"</span>)
|
278
279
|
<span class="ruby-identifier">read_infon</span>(<span class="ruby-identifier">xml</span>, <span class="ruby-identifier">collection</span>)
|
279
|
-
<span class="ruby-identifier">read_recursive</span>(<span class="ruby-identifier">xml</span>, <span class="ruby-identifier">collection</span>, <span class="ruby-string">"document"</span>)
|
280
|
+
<span class="ruby-identifier">read_recursive</span>(<span class="ruby-identifier">xml</span>, <span class="ruby-identifier">collection</span>, <span class="ruby-string">"document"</span>, <span class="ruby-identifier">options</span>)
|
280
281
|
<span class="ruby-keyword">end</span></pre>
|
281
282
|
</div><!-- read_collection-source -->
|
282
283
|
|
@@ -292,7 +293,7 @@
|
|
292
293
|
|
293
294
|
<div class="method-heading">
|
294
295
|
<span class="method-name">read_document</span><span
|
295
|
-
class="method-args">(xml, document)</span>
|
296
|
+
class="method-args">(xml, document, options = {})</span>
|
296
297
|
|
297
298
|
<span class="method-click-advice">click to toggle source</span>
|
298
299
|
|
@@ -308,12 +309,16 @@
|
|
308
309
|
|
309
310
|
<div class="method-source-code" id="read_document-source">
|
310
311
|
<pre><span class="ruby-comment"># File lib/simple_bioc/bioc_reader.rb, line 55</span>
|
311
|
-
<span class="ruby-keyword">def</span> <span class="ruby-identifier">read_document</span>(<span class="ruby-identifier">xml</span>, <span class="ruby-identifier">document</span>)
|
312
|
+
<span class="ruby-keyword">def</span> <span class="ruby-identifier">read_document</span>(<span class="ruby-identifier">xml</span>, <span class="ruby-identifier">document</span>, <span class="ruby-identifier">options</span> = {})
|
312
313
|
<span class="ruby-identifier">document</span>.<span class="ruby-identifier">id</span> = <span class="ruby-identifier">read_text</span>(<span class="ruby-identifier">xml</span>, <span class="ruby-string">"id"</span>)
|
314
|
+
<span class="ruby-keyword">if</span> <span class="ruby-identifier">options</span>[<span class="ruby-value">:documents</span>].<span class="ruby-identifier">kind_of?</span>(<span class="ruby-constant">Array</span>) <span class="ruby-operator">&&</span> <span class="ruby-operator">!</span><span class="ruby-identifier">options</span>[<span class="ruby-value">:documents</span>].<span class="ruby-identifier">include?</span>(<span class="ruby-identifier">document</span>.<span class="ruby-identifier">id</span>)
|
315
|
+
<span class="ruby-keyword">return</span> <span class="ruby-keyword">false</span>
|
316
|
+
<span class="ruby-keyword">end</span>
|
313
317
|
<span class="ruby-identifier">read_infon</span>(<span class="ruby-identifier">xml</span>, <span class="ruby-identifier">document</span>)
|
314
318
|
<span class="ruby-identifier">read_recursive</span>(<span class="ruby-identifier">xml</span>, <span class="ruby-identifier">document</span>, <span class="ruby-string">"passage"</span>)
|
315
319
|
<span class="ruby-identifier">read_recursive</span>(<span class="ruby-identifier">xml</span>, <span class="ruby-identifier">document</span>, <span class="ruby-string">"relation"</span>)
|
316
320
|
<span class="ruby-identifier">document</span>.<span class="ruby-identifier">adjust_ref</span>
|
321
|
+
<span class="ruby-keyword">true</span>
|
317
322
|
<span class="ruby-keyword">end</span></pre>
|
318
323
|
</div><!-- read_document-source -->
|
319
324
|
|
@@ -396,7 +401,7 @@
|
|
396
401
|
|
397
402
|
<div class="method-heading">
|
398
403
|
<span class="method-name">read_location</span><span
|
399
|
-
class="method-args">(xml, location)</span>
|
404
|
+
class="method-args">(xml, location, options = {})</span>
|
400
405
|
|
401
406
|
<span class="method-click-advice">click to toggle source</span>
|
402
407
|
|
@@ -411,10 +416,11 @@
|
|
411
416
|
|
412
417
|
|
413
418
|
<div class="method-source-code" id="read_location-source">
|
414
|
-
<pre><span class="ruby-comment"># File lib/simple_bioc/bioc_reader.rb, line
|
415
|
-
<span class="ruby-keyword">def</span> <span class="ruby-identifier">read_location</span>(<span class="ruby-identifier">xml</span>, <span class="ruby-identifier">location</span>)
|
419
|
+
<pre><span class="ruby-comment"># File lib/simple_bioc/bioc_reader.rb, line 101</span>
|
420
|
+
<span class="ruby-keyword">def</span> <span class="ruby-identifier">read_location</span>(<span class="ruby-identifier">xml</span>, <span class="ruby-identifier">location</span>, <span class="ruby-identifier">options</span> = {})
|
416
421
|
<span class="ruby-identifier">location</span>.<span class="ruby-identifier">offset</span> = <span class="ruby-identifier">xml</span>[<span class="ruby-string">"offset"</span>]
|
417
422
|
<span class="ruby-identifier">location</span>.<span class="ruby-identifier">length</span> = <span class="ruby-identifier">xml</span>[<span class="ruby-string">"length"</span>]
|
423
|
+
<span class="ruby-keyword">true</span>
|
418
424
|
<span class="ruby-keyword">end</span></pre>
|
419
425
|
</div><!-- read_location-source -->
|
420
426
|
|
@@ -430,7 +436,7 @@
|
|
430
436
|
|
431
437
|
<div class="method-heading">
|
432
438
|
<span class="method-name">read_node</span><span
|
433
|
-
class="method-args">(xml, node)</span>
|
439
|
+
class="method-args">(xml, node, options = {})</span>
|
434
440
|
|
435
441
|
<span class="method-click-advice">click to toggle source</span>
|
436
442
|
|
@@ -445,10 +451,11 @@
|
|
445
451
|
|
446
452
|
|
447
453
|
<div class="method-source-code" id="read_node-source">
|
448
|
-
<pre><span class="ruby-comment"># File lib/simple_bioc/bioc_reader.rb, line
|
449
|
-
<span class="ruby-keyword">def</span> <span class="ruby-identifier">read_node</span>(<span class="ruby-identifier">xml</span>, <span class="ruby-identifier">node</span>)
|
454
|
+
<pre><span class="ruby-comment"># File lib/simple_bioc/bioc_reader.rb, line 107</span>
|
455
|
+
<span class="ruby-keyword">def</span> <span class="ruby-identifier">read_node</span>(<span class="ruby-identifier">xml</span>, <span class="ruby-identifier">node</span>, <span class="ruby-identifier">options</span> = {})
|
450
456
|
<span class="ruby-identifier">node</span>.<span class="ruby-identifier">refid</span> = <span class="ruby-identifier">xml</span>[<span class="ruby-string">"refid"</span>]
|
451
457
|
<span class="ruby-identifier">node</span>.<span class="ruby-identifier">role</span> = <span class="ruby-identifier">xml</span>[<span class="ruby-string">"role"</span>]
|
458
|
+
<span class="ruby-keyword">true</span>
|
452
459
|
<span class="ruby-keyword">end</span></pre>
|
453
460
|
</div><!-- read_node-source -->
|
454
461
|
|
@@ -464,7 +471,7 @@
|
|
464
471
|
|
465
472
|
<div class="method-heading">
|
466
473
|
<span class="method-name">read_passage</span><span
|
467
|
-
class="method-args">(xml, passage)</span>
|
474
|
+
class="method-args">(xml, passage, options = {})</span>
|
468
475
|
|
469
476
|
<span class="method-click-advice">click to toggle source</span>
|
470
477
|
|
@@ -479,14 +486,15 @@
|
|
479
486
|
|
480
487
|
|
481
488
|
<div class="method-source-code" id="read_passage-source">
|
482
|
-
<pre><span class="ruby-comment"># File lib/simple_bioc/bioc_reader.rb, line
|
483
|
-
<span class="ruby-keyword">def</span> <span class="ruby-identifier">read_passage</span>(<span class="ruby-identifier">xml</span>, <span class="ruby-identifier">passage</span>)
|
489
|
+
<pre><span class="ruby-comment"># File lib/simple_bioc/bioc_reader.rb, line 67</span>
|
490
|
+
<span class="ruby-keyword">def</span> <span class="ruby-identifier">read_passage</span>(<span class="ruby-identifier">xml</span>, <span class="ruby-identifier">passage</span>, <span class="ruby-identifier">options</span> = {})
|
484
491
|
<span class="ruby-identifier">passage</span>.<span class="ruby-identifier">text</span> = <span class="ruby-identifier">read_text</span>(<span class="ruby-identifier">xml</span>, <span class="ruby-string">"text"</span>)
|
485
492
|
<span class="ruby-identifier">passage</span>.<span class="ruby-identifier">offset</span> = <span class="ruby-identifier">read_int</span>(<span class="ruby-identifier">xml</span>, <span class="ruby-string">"offset"</span>)
|
486
493
|
<span class="ruby-identifier">read_infon</span>(<span class="ruby-identifier">xml</span>, <span class="ruby-identifier">passage</span>)
|
487
494
|
<span class="ruby-identifier">read_recursive</span>(<span class="ruby-identifier">xml</span>, <span class="ruby-identifier">passage</span>, <span class="ruby-string">"sentence"</span>)
|
488
495
|
<span class="ruby-identifier">read_recursive</span>(<span class="ruby-identifier">xml</span>, <span class="ruby-identifier">passage</span>, <span class="ruby-string">"annotation"</span>)
|
489
496
|
<span class="ruby-identifier">read_recursive</span>(<span class="ruby-identifier">xml</span>, <span class="ruby-identifier">passage</span>, <span class="ruby-string">"relation"</span>)
|
497
|
+
<span class="ruby-keyword">true</span>
|
490
498
|
<span class="ruby-keyword">end</span></pre>
|
491
499
|
</div><!-- read_passage-source -->
|
492
500
|
|
@@ -502,7 +510,7 @@
|
|
502
510
|
|
503
511
|
<div class="method-heading">
|
504
512
|
<span class="method-name">read_recursive</span><span
|
505
|
-
class="method-args">(xml, obj, name)</span>
|
513
|
+
class="method-args">(xml, obj, name, options = {})</span>
|
506
514
|
|
507
515
|
<span class="method-click-advice">click to toggle source</span>
|
508
516
|
|
@@ -518,12 +526,12 @@
|
|
518
526
|
|
519
527
|
<div class="method-source-code" id="read_recursive-source">
|
520
528
|
<pre><span class="ruby-comment"># File lib/simple_bioc/bioc_reader.rb, line 38</span>
|
521
|
-
<span class="ruby-keyword">def</span> <span class="ruby-identifier">read_recursive</span>(<span class="ruby-identifier">xml</span>, <span class="ruby-identifier">obj</span>, <span class="ruby-identifier">name</span>)
|
529
|
+
<span class="ruby-keyword">def</span> <span class="ruby-identifier">read_recursive</span>(<span class="ruby-identifier">xml</span>, <span class="ruby-identifier">obj</span>, <span class="ruby-identifier">name</span>, <span class="ruby-identifier">options</span> = {})
|
522
530
|
<span class="ruby-identifier">target_class</span> = <span class="ruby-constant">SimpleBioC</span>.<span class="ruby-identifier">const_get</span>(<span class="ruby-identifier">name</span>.<span class="ruby-identifier">capitalize</span>)
|
523
531
|
<span class="ruby-identifier">xml</span>.<span class="ruby-identifier">xpath</span>(<span class="ruby-identifier">name</span>).<span class="ruby-identifier">each</span> <span class="ruby-keyword">do</span> <span class="ruby-operator">|</span><span class="ruby-identifier">node</span><span class="ruby-operator">|</span>
|
524
532
|
<span class="ruby-identifier">instance</span> = <span class="ruby-identifier">target_class</span>.<span class="ruby-identifier">new</span>(<span class="ruby-identifier">obj</span>)
|
525
|
-
<span class="ruby-identifier">send</span>(<span class="ruby-value">:"read_#{name}"</span>, <span class="ruby-identifier">node</span>, <span class="ruby-identifier">instance</span>)
|
526
|
-
<span class="ruby-identifier">obj</span>.<span class="ruby-identifier">instance_variable_get</span>(<span class="ruby-value">:"@#{name}s"</span>) <span class="ruby-operator"><<</span> <span class="ruby-identifier">instance</span>
|
533
|
+
<span class="ruby-identifier">ret</span> = <span class="ruby-identifier">send</span>(<span class="ruby-value">:"read_#{name}"</span>, <span class="ruby-identifier">node</span>, <span class="ruby-identifier">instance</span>, <span class="ruby-identifier">options</span>)
|
534
|
+
<span class="ruby-identifier">obj</span>.<span class="ruby-identifier">instance_variable_get</span>(<span class="ruby-value">:"@#{name}s"</span>) <span class="ruby-operator"><<</span> <span class="ruby-identifier">instance</span> <span class="ruby-keyword">if</span> <span class="ruby-identifier">ret</span>
|
527
535
|
<span class="ruby-keyword">end</span>
|
528
536
|
<span class="ruby-keyword">end</span></pre>
|
529
537
|
</div><!-- read_recursive-source -->
|
@@ -540,7 +548,7 @@
|
|
540
548
|
|
541
549
|
<div class="method-heading">
|
542
550
|
<span class="method-name">read_relation</span><span
|
543
|
-
class="method-args">(xml, relation)</span>
|
551
|
+
class="method-args">(xml, relation, options = {})</span>
|
544
552
|
|
545
553
|
<span class="method-click-advice">click to toggle source</span>
|
546
554
|
|
@@ -555,11 +563,12 @@
|
|
555
563
|
|
556
564
|
|
557
565
|
<div class="method-source-code" id="read_relation-source">
|
558
|
-
<pre><span class="ruby-comment"># File lib/simple_bioc/bioc_reader.rb, line
|
559
|
-
<span class="ruby-keyword">def</span> <span class="ruby-identifier">read_relation</span>(<span class="ruby-identifier">xml</span>, <span class="ruby-identifier">relation</span>)
|
566
|
+
<pre><span class="ruby-comment"># File lib/simple_bioc/bioc_reader.rb, line 94</span>
|
567
|
+
<span class="ruby-keyword">def</span> <span class="ruby-identifier">read_relation</span>(<span class="ruby-identifier">xml</span>, <span class="ruby-identifier">relation</span>, <span class="ruby-identifier">options</span> = {})
|
560
568
|
<span class="ruby-identifier">relation</span>.<span class="ruby-identifier">id</span> = <span class="ruby-identifier">xml</span>[<span class="ruby-string">"id"</span>]
|
561
569
|
<span class="ruby-identifier">read_infon</span>(<span class="ruby-identifier">xml</span>, <span class="ruby-identifier">relation</span>)
|
562
570
|
<span class="ruby-identifier">read_recursive</span>(<span class="ruby-identifier">xml</span>, <span class="ruby-identifier">relation</span>, <span class="ruby-string">"node"</span>)
|
571
|
+
<span class="ruby-keyword">true</span>
|
563
572
|
<span class="ruby-keyword">end</span></pre>
|
564
573
|
</div><!-- read_relation-source -->
|
565
574
|
|
@@ -575,7 +584,7 @@
|
|
575
584
|
|
576
585
|
<div class="method-heading">
|
577
586
|
<span class="method-name">read_sentence</span><span
|
578
|
-
class="method-args">(xml, sentence)</span>
|
587
|
+
class="method-args">(xml, sentence, options = {})</span>
|
579
588
|
|
580
589
|
<span class="method-click-advice">click to toggle source</span>
|
581
590
|
|
@@ -590,13 +599,14 @@
|
|
590
599
|
|
591
600
|
|
592
601
|
<div class="method-source-code" id="read_sentence-source">
|
593
|
-
<pre><span class="ruby-comment"># File lib/simple_bioc/bioc_reader.rb, line
|
594
|
-
<span class="ruby-keyword">def</span> <span class="ruby-identifier">read_sentence</span>(<span class="ruby-identifier">xml</span>, <span class="ruby-identifier">sentence</span>)
|
602
|
+
<pre><span class="ruby-comment"># File lib/simple_bioc/bioc_reader.rb, line 77</span>
|
603
|
+
<span class="ruby-keyword">def</span> <span class="ruby-identifier">read_sentence</span>(<span class="ruby-identifier">xml</span>, <span class="ruby-identifier">sentence</span>, <span class="ruby-identifier">options</span> = {})
|
595
604
|
<span class="ruby-identifier">sentence</span>.<span class="ruby-identifier">text</span> = <span class="ruby-identifier">read_text</span>(<span class="ruby-identifier">xml</span>, <span class="ruby-string">"text"</span>)
|
596
605
|
<span class="ruby-identifier">sentence</span>.<span class="ruby-identifier">offset</span> = <span class="ruby-identifier">read_int</span>(<span class="ruby-identifier">xml</span>, <span class="ruby-string">"offset"</span>)
|
597
606
|
<span class="ruby-identifier">read_infon</span>(<span class="ruby-identifier">xml</span>, <span class="ruby-identifier">sentence</span>)
|
598
607
|
<span class="ruby-identifier">read_recursive</span>(<span class="ruby-identifier">xml</span>, <span class="ruby-identifier">sentence</span>, <span class="ruby-string">"annotation"</span>)
|
599
608
|
<span class="ruby-identifier">read_recursive</span>(<span class="ruby-identifier">xml</span>, <span class="ruby-identifier">sentence</span>, <span class="ruby-string">"relation"</span>)
|
609
|
+
<span class="ruby-keyword">true</span>
|
600
610
|
<span class="ruby-keyword">end</span></pre>
|
601
611
|
</div><!-- read_sentence-source -->
|
602
612
|
|
data/html/BioCWriter.html
CHANGED
@@ -167,7 +167,7 @@
|
|
167
167
|
|
168
168
|
<div class="method-heading">
|
169
169
|
<span class="method-name">write</span><span
|
170
|
-
class="method-args">(collection)</span>
|
170
|
+
class="method-args">(collection, options = {})</span>
|
171
171
|
|
172
172
|
<span class="method-click-advice">click to toggle source</span>
|
173
173
|
|
@@ -183,11 +183,13 @@
|
|
183
183
|
|
184
184
|
<div class="method-source-code" id="write-source">
|
185
185
|
<pre><span class="ruby-comment"># File lib/simple_bioc/bioc_writer.rb, line 6</span>
|
186
|
-
<span class="ruby-keyword">def</span> <span class="ruby-identifier">write</span>(<span class="ruby-identifier">collection</span>)
|
186
|
+
<span class="ruby-keyword">def</span> <span class="ruby-identifier">write</span>(<span class="ruby-identifier">collection</span>, <span class="ruby-identifier">options</span> = {})
|
187
|
+
<span class="ruby-identifier">options</span>[<span class="ruby-value">:save_with</span>] = <span class="ruby-value">1</span> <span class="ruby-keyword">if</span> <span class="ruby-identifier">options</span>[<span class="ruby-value">:save_with</span>].<span class="ruby-identifier">nil?</span>
|
187
188
|
<span class="ruby-identifier">builder</span> = <span class="ruby-constant">Nokogiri</span><span class="ruby-operator">::</span><span class="ruby-constant">XML</span><span class="ruby-operator">::</span><span class="ruby-constant">Builder</span>.<span class="ruby-identifier">new</span>(<span class="ruby-value">:encoding</span> =<span class="ruby-operator">></span> <span class="ruby-string">'UTF-8'</span>) <span class="ruby-keyword">do</span> <span class="ruby-operator">|</span><span class="ruby-identifier">xml</span><span class="ruby-operator">|</span>
|
189
|
+
<span class="ruby-identifier">xml</span>.<span class="ruby-identifier">doc</span>.<span class="ruby-identifier">create_internal_subset</span>( <span class="ruby-string">'collection'</span>, <span class="ruby-keyword">nil</span>, <span class="ruby-string">'BioC.dtd'</span> )
|
188
190
|
<span class="ruby-identifier">write_collection</span>(<span class="ruby-identifier">xml</span>, <span class="ruby-identifier">collection</span>)
|
189
191
|
<span class="ruby-keyword">end</span>
|
190
|
-
<span class="ruby-identifier">builder</span>.<span class="ruby-identifier">to_xml</span>
|
192
|
+
<span class="ruby-identifier">builder</span>.<span class="ruby-identifier">to_xml</span>(<span class="ruby-identifier">options</span>)
|
191
193
|
<span class="ruby-keyword">end</span></pre>
|
192
194
|
</div><!-- write-source -->
|
193
195
|
|
@@ -218,7 +220,7 @@
|
|
218
220
|
|
219
221
|
|
220
222
|
<div class="method-source-code" id="write_annotation-source">
|
221
|
-
<pre><span class="ruby-comment"># File lib/simple_bioc/bioc_writer.rb, line
|
223
|
+
<pre><span class="ruby-comment"># File lib/simple_bioc/bioc_writer.rb, line 63</span>
|
222
224
|
<span class="ruby-keyword">def</span> <span class="ruby-identifier">write_annotation</span>(<span class="ruby-identifier">xml</span>, <span class="ruby-identifier">annotation</span>)
|
223
225
|
<span class="ruby-keyword">if</span> <span class="ruby-identifier">annotation</span>.<span class="ruby-identifier">id</span>.<span class="ruby-identifier">nil?</span>
|
224
226
|
<span class="ruby-identifier">attribute</span> = <span class="ruby-keyword">nil</span>
|
@@ -260,7 +262,7 @@
|
|
260
262
|
|
261
263
|
|
262
264
|
<div class="method-source-code" id="write_collection-source">
|
263
|
-
<pre><span class="ruby-comment"># File lib/simple_bioc/bioc_writer.rb, line
|
265
|
+
<pre><span class="ruby-comment"># File lib/simple_bioc/bioc_writer.rb, line 23</span>
|
264
266
|
<span class="ruby-keyword">def</span> <span class="ruby-identifier">write_collection</span>(<span class="ruby-identifier">xml</span>, <span class="ruby-identifier">collection</span>)
|
265
267
|
<span class="ruby-identifier">xml</span>.<span class="ruby-identifier">collection</span> {
|
266
268
|
<span class="ruby-identifier">xml</span>.<span class="ruby-identifier">source</span> <span class="ruby-identifier">collection</span>.<span class="ruby-identifier">source</span>
|
@@ -299,7 +301,7 @@
|
|
299
301
|
|
300
302
|
|
301
303
|
<div class="method-source-code" id="write_document-source">
|
302
|
-
<pre><span class="ruby-comment"># File lib/simple_bioc/bioc_writer.rb, line
|
304
|
+
<pre><span class="ruby-comment"># File lib/simple_bioc/bioc_writer.rb, line 33</span>
|
303
305
|
<span class="ruby-keyword">def</span> <span class="ruby-identifier">write_document</span>(<span class="ruby-identifier">xml</span>, <span class="ruby-identifier">document</span>)
|
304
306
|
<span class="ruby-identifier">xml</span>.<span class="ruby-identifier">document</span> {
|
305
307
|
<span class="ruby-identifier">xml</span>.<span class="ruby-identifier">id_</span> <span class="ruby-identifier">document</span>.<span class="ruby-identifier">id</span>
|
@@ -337,7 +339,7 @@
|
|
337
339
|
|
338
340
|
|
339
341
|
<div class="method-source-code" id="write_infon-source">
|
340
|
-
<pre><span class="ruby-comment"># File lib/simple_bioc/bioc_writer.rb, line
|
342
|
+
<pre><span class="ruby-comment"># File lib/simple_bioc/bioc_writer.rb, line 15</span>
|
341
343
|
<span class="ruby-keyword">def</span> <span class="ruby-identifier">write_infon</span>(<span class="ruby-identifier">xml</span>, <span class="ruby-identifier">obj</span>)
|
342
344
|
<span class="ruby-identifier">obj</span>.<span class="ruby-identifier">infons</span>.<span class="ruby-identifier">each</span> <span class="ruby-keyword">do</span> <span class="ruby-operator">|</span><span class="ruby-identifier">k</span>, <span class="ruby-identifier">v</span><span class="ruby-operator">|</span>
|
343
345
|
<span class="ruby-identifier">xml</span>.<span class="ruby-identifier">infon</span>(<span class="ruby-value">:key</span> =<span class="ruby-operator">></span> <span class="ruby-identifier">k</span>) {
|
@@ -374,7 +376,7 @@
|
|
374
376
|
|
375
377
|
|
376
378
|
<div class="method-source-code" id="write_location-source">
|
377
|
-
<pre><span class="ruby-comment"># File lib/simple_bioc/bioc_writer.rb, line
|
379
|
+
<pre><span class="ruby-comment"># File lib/simple_bioc/bioc_writer.rb, line 88</span>
|
378
380
|
<span class="ruby-keyword">def</span> <span class="ruby-identifier">write_location</span>(<span class="ruby-identifier">xml</span>, <span class="ruby-identifier">location</span>)
|
379
381
|
<span class="ruby-identifier">xml</span>.<span class="ruby-identifier">location</span>(<span class="ruby-value">:offset</span> =<span class="ruby-operator">></span> <span class="ruby-identifier">location</span>.<span class="ruby-identifier">offset</span>, <span class="ruby-value">:length</span> =<span class="ruby-operator">></span> <span class="ruby-identifier">location</span>.<span class="ruby-identifier">length</span>)
|
380
382
|
<span class="ruby-keyword">end</span></pre>
|
@@ -407,7 +409,7 @@
|
|
407
409
|
|
408
410
|
|
409
411
|
<div class="method-source-code" id="write_node-source">
|
410
|
-
<pre><span class="ruby-comment"># File lib/simple_bioc/bioc_writer.rb, line
|
412
|
+
<pre><span class="ruby-comment"># File lib/simple_bioc/bioc_writer.rb, line 92</span>
|
411
413
|
<span class="ruby-keyword">def</span> <span class="ruby-identifier">write_node</span>(<span class="ruby-identifier">xml</span>, <span class="ruby-identifier">node</span>)
|
412
414
|
<span class="ruby-identifier">xml</span>.<span class="ruby-identifier">node_</span>(<span class="ruby-value">:refid</span> =<span class="ruby-operator">></span> <span class="ruby-identifier">node</span>.<span class="ruby-identifier">refid</span>, <span class="ruby-value">:role</span> =<span class="ruby-operator">></span> <span class="ruby-identifier">node</span>.<span class="ruby-identifier">role</span>)
|
413
415
|
<span class="ruby-keyword">end</span></pre>
|
@@ -440,7 +442,7 @@
|
|
440
442
|
|
441
443
|
|
442
444
|
<div class="method-source-code" id="write_passage-source">
|
443
|
-
<pre><span class="ruby-comment"># File lib/simple_bioc/bioc_writer.rb, line
|
445
|
+
<pre><span class="ruby-comment"># File lib/simple_bioc/bioc_writer.rb, line 42</span>
|
444
446
|
<span class="ruby-keyword">def</span> <span class="ruby-identifier">write_passage</span>(<span class="ruby-identifier">xml</span>, <span class="ruby-identifier">passage</span>)
|
445
447
|
<span class="ruby-identifier">xml</span>.<span class="ruby-identifier">passage</span> {
|
446
448
|
<span class="ruby-identifier">write_infon</span>(<span class="ruby-identifier">xml</span>, <span class="ruby-identifier">passage</span>)
|
@@ -480,7 +482,7 @@
|
|
480
482
|
|
481
483
|
|
482
484
|
<div class="method-source-code" id="write_relation-source">
|
483
|
-
<pre><span class="ruby-comment"># File lib/simple_bioc/bioc_writer.rb, line
|
485
|
+
<pre><span class="ruby-comment"># File lib/simple_bioc/bioc_writer.rb, line 76</span>
|
484
486
|
<span class="ruby-keyword">def</span> <span class="ruby-identifier">write_relation</span>(<span class="ruby-identifier">xml</span>, <span class="ruby-identifier">relation</span>)
|
485
487
|
<span class="ruby-keyword">if</span> <span class="ruby-identifier">relation</span>.<span class="ruby-identifier">id</span>.<span class="ruby-identifier">nil?</span>
|
486
488
|
<span class="ruby-identifier">attribute</span> = <span class="ruby-keyword">nil</span>
|
@@ -521,7 +523,7 @@
|
|
521
523
|
|
522
524
|
|
523
525
|
<div class="method-source-code" id="write_sentence-source">
|
524
|
-
<pre><span class="ruby-comment"># File lib/simple_bioc/bioc_writer.rb, line
|
526
|
+
<pre><span class="ruby-comment"># File lib/simple_bioc/bioc_writer.rb, line 53</span>
|
525
527
|
<span class="ruby-keyword">def</span> <span class="ruby-identifier">write_sentence</span>(<span class="ruby-identifier">xml</span>, <span class="ruby-identifier">sentence</span>)
|
526
528
|
<span class="ruby-identifier">xml</span>.<span class="ruby-identifier">sentence</span> {
|
527
529
|
<span class="ruby-identifier">write_infon</span>(<span class="ruby-identifier">xml</span>, <span class="ruby-identifier">sentence</span>)
|
data/html/README_md.html
CHANGED
@@ -48,10 +48,13 @@
|
|
48
48
|
<nav class="section">
|
49
49
|
<h3 class="section-header">Table of Contents</h3>
|
50
50
|
<ul>
|
51
|
-
<li><a href="#label-
|
51
|
+
<li><a href="#label-SimpleBioC">SimpleBioC</a>
|
52
52
|
<li><a href="#label-Feature%3A">Feature:</a>
|
53
53
|
<li><a href="#label-Installation">Installation</a>
|
54
|
-
<li><a href="#label-Usages">Usages</a>
|
54
|
+
<li><a href="#label-Simple+Usages">Simple Usages</a>
|
55
|
+
<li><a href="#label-Options">Options</a>
|
56
|
+
<li><a href="#label-Specify+set+of+%3Cdocument%3Es+to+parse">Specify set of <document>s to parse</a>
|
57
|
+
<li><a href="#label-No+whitespace+in+output">No whitespace in output</a>
|
55
58
|
<li><a href="#label-Sample">Sample</a>
|
56
59
|
<li><a href="#label-Contributing">Contributing</a>
|
57
60
|
<li><a href="#label-LICENSE">LICENSE</a>
|
@@ -108,7 +111,7 @@
|
|
108
111
|
|
109
112
|
<div id="documentation" class="description">
|
110
113
|
|
111
|
-
<h1 id="label-
|
114
|
+
<h1 id="label-SimpleBioC"><a href="SimpleBioC.html">SimpleBioC</a><span><a href="#label-SimpleBioC">¶</a> <a href="#documentation">↑</a></span></h1>
|
112
115
|
|
113
116
|
<p><a href="SimpleBioC.html">SimpleBioC</a> is a simple parser / builder for
|
114
117
|
BioC data format. BioC is a simple XML format to share text documents and
|
@@ -140,7 +143,7 @@ BioC DTD</p>
|
|
140
143
|
|
141
144
|
<pre>$ gem install simple_bioc</pre>
|
142
145
|
|
143
|
-
<h2 id="label-Usages">Usages<span><a href="#label-Usages">¶</a> <a href="#documentation">↑</a></span></h2>
|
146
|
+
<h2 id="label-Simple+Usages">Simple Usages<span><a href="#label-Simple+Usages">¶</a> <a href="#documentation">↑</a></span></h2>
|
144
147
|
|
145
148
|
<p>Include library</p>
|
146
149
|
|
@@ -152,7 +155,11 @@ BioC DTD</p>
|
|
152
155
|
<pre>collection = SimpleBioC::from_xml(filename)</pre>
|
153
156
|
|
154
157
|
<p>Traverse & Manipulate Data. Data structure are almost the same as the
|
155
|
-
DTD. Please refer <a
|
158
|
+
DTD. Please refer <a
|
159
|
+
href="http://rubydoc.info/gems/simple_bioc/0.0.2/frames">library
|
160
|
+
documents</a> and <a
|
161
|
+
href="http://www.ncbi.nlm.nih.gov/CBBresearch/Dogan/BioC/BioCDTD.html">the
|
162
|
+
BioC DTD</a>.</p>
|
156
163
|
|
157
164
|
<pre>puts collection.documents[2].passages[0].text</pre>
|
158
165
|
|
@@ -160,6 +167,25 @@ DTD. Please refer <a href="http://">library documents</a>.</p>
|
|
160
167
|
|
161
168
|
<pre>puts SimpleBioC::to_xml(collection)</pre>
|
162
169
|
|
170
|
+
<h2 id="label-Options">Options<span><a href="#label-Options">¶</a> <a href="#documentation">↑</a></span></h2>
|
171
|
+
|
172
|
+
<h3 id="label-Specify+set+of+%3Cdocument%3Es+to+parse">Specify set of <document>s to parse<span><a href="#label-Specify+set+of+%3Cdocument%3Es+to+parse">¶</a> <a href="#documentation">↑</a></span></h3>
|
173
|
+
|
174
|
+
<p>You can parse only a set of document elements in a large xml document
|
175
|
+
instead of parsing all the document elements. It may decrease the
|
176
|
+
processing time. For example, the following code will return a collection
|
177
|
+
with two documents (“1234”, “4567”).</p>
|
178
|
+
|
179
|
+
<pre>collection = SimpleBioc::from_xml(filename, {documents: ["1234", "4567"]})</pre>
|
180
|
+
|
181
|
+
<h3 id="label-No+whitespace+in+output">No whitespace in output<span><a href="#label-No+whitespace+in+output">¶</a> <a href="#documentation">↑</a></span></h3>
|
182
|
+
|
183
|
+
<p>By default, outputs of SimpleBioC::to_xml() will be formatted with
|
184
|
+
whitespace. If you do not want this whitespace, you should pass
|
185
|
+
'save_with' option with 0 to the to_xml() function.</p>
|
186
|
+
|
187
|
+
<pre>puts SimpleBioC::to_xml(collection, {save_with:0})</pre>
|
188
|
+
|
163
189
|
<h2 id="label-Sample">Sample<span><a href="#label-Sample">¶</a> <a href="#documentation">↑</a></span></h2>
|
164
190
|
|
165
191
|
<p>More samples can be found in Samples directory</p>
|
@@ -131,9 +131,9 @@
|
|
131
131
|
|
132
132
|
<div id="description" class="description">
|
133
133
|
|
134
|
-
<p><a href="NodeBase.html">NodeBase</a> is not
|
135
|
-
class of <a href="Annotation.html">Annotation</a> & <a
|
136
|
-
href="Relation.html">Relation</a
|
134
|
+
<p><a href="NodeBase.html">NodeBase</a> is not a BioC DTD entity. This is a
|
135
|
+
super class of <a href="Annotation.html">Annotation</a> & <a
|
136
|
+
href="Relation.html">Relation</a>.</p>
|
137
137
|
|
138
138
|
</div><!-- description -->
|
139
139
|
|
data/html/SimpleBioC.html
CHANGED
@@ -177,7 +177,7 @@
|
|
177
177
|
|
178
178
|
<div class="method-heading">
|
179
179
|
<span class="method-name">from_xml</span><span
|
180
|
-
class="method-args">(file_path)</span>
|
180
|
+
class="method-args">(file_path, options = {})</span>
|
181
181
|
|
182
182
|
<span class="method-click-advice">click to toggle source</span>
|
183
183
|
|
@@ -188,14 +188,33 @@
|
|
188
188
|
|
189
189
|
<p>parse a BioC XML file in the given path and convert it into a collection
|
190
190
|
instance</p>
|
191
|
+
|
192
|
+
<h4 id="method-i-from_xml-label-Arguments">Arguments<span><a href="#method-i-from_xml-label-Arguments">¶</a> <a href="#documentation">↑</a></span></h4>
|
193
|
+
<ul><li>
|
194
|
+
<p><code>file_path</code> - file path for parse</p>
|
195
|
+
</li><li>
|
196
|
+
<p><code>options</code> - (optional) additional options</p>
|
197
|
+
</li></ul>
|
198
|
+
|
199
|
+
<h4 id="method-i-from_xml-label-Options">Options<span><a href="#method-i-from_xml-label-Options">¶</a> <a href="#documentation">↑</a></span></h4>
|
200
|
+
<ul><li>
|
201
|
+
<p><code>documents</code> - specify IDs of documents to parse. The result will
|
202
|
+
include only the specified documents</p>
|
203
|
+
</li></ul>
|
204
|
+
|
205
|
+
<h4 id="method-i-from_xml-label-Examples">Examples<span><a href="#method-i-from_xml-label-Examples">¶</a> <a href="#documentation">↑</a></span></h4>
|
206
|
+
|
207
|
+
<pre>collection = SimpleBioC.from_xml("./xml/everything.xml")
|
208
|
+
collection = SimpleBioC.from_xml("./xml/lemma.xml", {documents:[21785578, 21488974]})</pre>
|
191
209
|
|
192
210
|
|
193
211
|
|
194
212
|
|
195
213
|
<div class="method-source-code" id="from_xml-source">
|
196
|
-
<pre><span class="ruby-comment"># File lib/simple_bioc.rb, line
|
197
|
-
<span class="ruby-keyword">def</span> <span class="ruby-identifier">from_xml</span>(<span class="ruby-identifier">file_path</span>)
|
198
|
-
<span class="ruby-
|
214
|
+
<pre><span class="ruby-comment"># File lib/simple_bioc.rb, line 22</span>
|
215
|
+
<span class="ruby-keyword">def</span> <span class="ruby-identifier">from_xml</span>(<span class="ruby-identifier">file_path</span>, <span class="ruby-identifier">options</span> = {})
|
216
|
+
<span class="ruby-identifier">options</span>[<span class="ruby-value">:documents</span>] = <span class="ruby-identifier">options</span>[<span class="ruby-value">:documents</span>].<span class="ruby-identifier">map</span>{<span class="ruby-operator">|</span><span class="ruby-identifier">e</span><span class="ruby-operator">|</span> <span class="ruby-identifier">e</span>.<span class="ruby-identifier">to_s</span>} <span class="ruby-keyword">if</span> <span class="ruby-identifier">options</span>[<span class="ruby-value">:documents</span>].<span class="ruby-identifier">kind_of?</span>(<span class="ruby-constant">Array</span>)
|
217
|
+
<span class="ruby-constant">BioCReader</span>.<span class="ruby-identifier">read</span>(<span class="ruby-identifier">file_path</span>, <span class="ruby-identifier">options</span>)
|
199
218
|
<span class="ruby-keyword">end</span></pre>
|
200
219
|
</div><!-- from_xml-source -->
|
201
220
|
|
@@ -211,7 +230,7 @@ instance</p>
|
|
211
230
|
|
212
231
|
<div class="method-heading">
|
213
232
|
<span class="method-name">to_xml</span><span
|
214
|
-
class="method-args">(collection)</span>
|
233
|
+
class="method-args">(collection, options = {})</span>
|
215
234
|
|
216
235
|
<span class="method-click-advice">click to toggle source</span>
|
217
236
|
|
@@ -220,15 +239,35 @@ instance</p>
|
|
220
239
|
|
221
240
|
<div class="method-description">
|
222
241
|
|
223
|
-
<p>convert a collection instance to a BioC XML text
|
242
|
+
<p>convert a collection instance to a BioC XML text. Output will return as
|
243
|
+
string</p>
|
244
|
+
|
245
|
+
<h4 id="method-i-to_xml-label-Arguments">Arguments<span><a href="#method-i-to_xml-label-Arguments">¶</a> <a href="#documentation">↑</a></span></h4>
|
246
|
+
<ul><li>
|
247
|
+
<p><code>collection</code> - <a
|
248
|
+
href="SimpleBioC/Collection.html">Collection</a> instance to process</p>
|
249
|
+
</li><li>
|
250
|
+
<p><code>options</code> - (optional) additional options</p>
|
251
|
+
</li></ul>
|
252
|
+
|
253
|
+
<h4 id="method-i-to_xml-label-Options">Options<span><a href="#method-i-to_xml-label-Options">¶</a> <a href="#documentation">↑</a></span></h4>
|
254
|
+
<ul><li>
|
255
|
+
<p><code>save_with</code> - SaveOption for Nokorigi. If you set this 0, output
|
256
|
+
has no format (no indentation, no whitespace)</p>
|
257
|
+
</li></ul>
|
258
|
+
|
259
|
+
<h4 id="method-i-to_xml-label-Examples">Examples<span><a href="#method-i-to_xml-label-Examples">¶</a> <a href="#documentation">↑</a></span></h4>
|
260
|
+
|
261
|
+
<pre>output = SimpleBioC.to_xml(collection)
|
262
|
+
output = SimpleBioC.to_xml(collection, {save_with: 0})</pre>
|
224
263
|
|
225
264
|
|
226
265
|
|
227
266
|
|
228
267
|
<div class="method-source-code" id="to_xml-source">
|
229
|
-
<pre><span class="ruby-comment"># File lib/simple_bioc.rb, line
|
230
|
-
<span class="ruby-keyword">def</span> <span class="ruby-identifier">to_xml</span>(<span class="ruby-identifier">collection</span>)
|
231
|
-
<span class="ruby-constant">BioCWriter</span>.<span class="ruby-identifier">write</span>(<span class="ruby-identifier">collection</span>)
|
268
|
+
<pre><span class="ruby-comment"># File lib/simple_bioc.rb, line 39</span>
|
269
|
+
<span class="ruby-keyword">def</span> <span class="ruby-identifier">to_xml</span>(<span class="ruby-identifier">collection</span>, <span class="ruby-identifier">options</span> = {})
|
270
|
+
<span class="ruby-constant">BioCWriter</span>.<span class="ruby-identifier">write</span>(<span class="ruby-identifier">collection</span>, <span class="ruby-identifier">options</span>)
|
232
271
|
<span class="ruby-keyword">end</span></pre>
|
233
272
|
</div><!-- to_xml-source -->
|
234
273
|
|
data/html/created.rid
CHANGED
@@ -1,15 +1,15 @@
|
|
1
|
-
|
2
|
-
README.md
|
3
|
-
lib/simple_bioc.rb
|
1
|
+
Sat, 21 Dec 2013 09:10:44 -0800
|
2
|
+
README.md Sat, 21 Dec 2013 09:10:35 -0800
|
3
|
+
lib/simple_bioc.rb Sat, 21 Dec 2013 08:58:11 -0800
|
4
4
|
lib/simple_bioc/annotation.rb Fri, 06 Dec 2013 20:52:11 -0800
|
5
|
-
lib/simple_bioc/bioc_reader.rb
|
6
|
-
lib/simple_bioc/bioc_writer.rb
|
5
|
+
lib/simple_bioc/bioc_reader.rb Sat, 21 Dec 2013 08:39:36 -0800
|
6
|
+
lib/simple_bioc/bioc_writer.rb Sat, 21 Dec 2013 08:09:06 -0800
|
7
7
|
lib/simple_bioc/collection.rb Fri, 06 Dec 2013 20:52:11 -0800
|
8
8
|
lib/simple_bioc/document.rb Fri, 06 Dec 2013 21:18:25 -0800
|
9
9
|
lib/simple_bioc/location.rb Fri, 06 Dec 2013 20:52:11 -0800
|
10
10
|
lib/simple_bioc/node.rb Fri, 06 Dec 2013 20:52:11 -0800
|
11
|
-
lib/simple_bioc/node_base.rb Fri, 06 Dec 2013 22:
|
11
|
+
lib/simple_bioc/node_base.rb Fri, 06 Dec 2013 22:06:00 -0800
|
12
12
|
lib/simple_bioc/passage.rb Fri, 06 Dec 2013 20:52:11 -0800
|
13
13
|
lib/simple_bioc/relation.rb Fri, 06 Dec 2013 20:52:11 -0800
|
14
14
|
lib/simple_bioc/sentence.rb Fri, 06 Dec 2013 20:52:11 -0800
|
15
|
-
lib/simple_bioc/version.rb Fri, 06 Dec 2013
|
15
|
+
lib/simple_bioc/version.rb Fri, 06 Dec 2013 22:06:07 -0800
|
data/html/index.html
CHANGED
@@ -91,7 +91,7 @@
|
|
91
91
|
|
92
92
|
<div id="documentation" class="description">
|
93
93
|
|
94
|
-
<h1 id="label-
|
94
|
+
<h1 id="label-SimpleBioC"><a href="SimpleBioC.html">SimpleBioC</a><span><a href="#label-SimpleBioC">¶</a> <a href="#documentation">↑</a></span></h1>
|
95
95
|
|
96
96
|
<p><a href="SimpleBioC.html">SimpleBioC</a> is a simple parser / builder for
|
97
97
|
BioC data format. BioC is a simple XML format to share text documents and
|
@@ -123,7 +123,7 @@ BioC DTD</p>
|
|
123
123
|
|
124
124
|
<pre>$ gem install simple_bioc</pre>
|
125
125
|
|
126
|
-
<h2 id="label-Usages">Usages<span><a href="#label-Usages">¶</a> <a href="#documentation">↑</a></span></h2>
|
126
|
+
<h2 id="label-Simple+Usages">Simple Usages<span><a href="#label-Simple+Usages">¶</a> <a href="#documentation">↑</a></span></h2>
|
127
127
|
|
128
128
|
<p>Include library</p>
|
129
129
|
|
@@ -135,7 +135,11 @@ BioC DTD</p>
|
|
135
135
|
<pre>collection = SimpleBioC::from_xml(filename)</pre>
|
136
136
|
|
137
137
|
<p>Traverse & Manipulate Data. Data structure are almost the same as the
|
138
|
-
DTD. Please refer <a
|
138
|
+
DTD. Please refer <a
|
139
|
+
href="http://rubydoc.info/gems/simple_bioc/0.0.2/frames">library
|
140
|
+
documents</a> and <a
|
141
|
+
href="http://www.ncbi.nlm.nih.gov/CBBresearch/Dogan/BioC/BioCDTD.html">the
|
142
|
+
BioC DTD</a>.</p>
|
139
143
|
|
140
144
|
<pre>puts collection.documents[2].passages[0].text</pre>
|
141
145
|
|
@@ -143,6 +147,25 @@ DTD. Please refer <a href="http://">library documents</a>.</p>
|
|
143
147
|
|
144
148
|
<pre>puts SimpleBioC::to_xml(collection)</pre>
|
145
149
|
|
150
|
+
<h2 id="label-Options">Options<span><a href="#label-Options">¶</a> <a href="#documentation">↑</a></span></h2>
|
151
|
+
|
152
|
+
<h3 id="label-Specify+set+of+%3Cdocument%3Es+to+parse">Specify set of <document>s to parse<span><a href="#label-Specify+set+of+%3Cdocument%3Es+to+parse">¶</a> <a href="#documentation">↑</a></span></h3>
|
153
|
+
|
154
|
+
<p>You can parse only a set of document elements in a large xml document
|
155
|
+
instead of parsing all the document elements. It may decrease the
|
156
|
+
processing time. For example, the following code will return a collection
|
157
|
+
with two documents (“1234”, “4567”).</p>
|
158
|
+
|
159
|
+
<pre>collection = SimpleBioc::from_xml(filename, {documents: ["1234", "4567"]})</pre>
|
160
|
+
|
161
|
+
<h3 id="label-No+whitespace+in+output">No whitespace in output<span><a href="#label-No+whitespace+in+output">¶</a> <a href="#documentation">↑</a></span></h3>
|
162
|
+
|
163
|
+
<p>By default, outputs of SimpleBioC::to_xml() will be formatted with
|
164
|
+
whitespace. If you do not want this whitespace, you should pass
|
165
|
+
'save_with' option with 0 to the to_xml() function.</p>
|
166
|
+
|
167
|
+
<pre>puts SimpleBioC::to_xml(collection, {save_with:0})</pre>
|
168
|
+
|
146
169
|
<h2 id="label-Sample">Sample<span><a href="#label-Sample">¶</a> <a href="#documentation">↑</a></span></h2>
|
147
170
|
|
148
171
|
<p>More samples can be found in Samples directory</p>
|
data/html/js/search_index.js
CHANGED
@@ -1 +1 @@
|
|
1
|
-
var search_data = {"index":{"searchIndex":["biocreader","biocwriter","simplebioc","annotation","collection","document","location","node","nodebase","passage","relation","sentence","adjust_ref()","adjust_ref()","adjust_ref()","each_relation()","each_relation()","each_relation()","find_node()","find_node()","find_node()","from_xml()","new()","new()","new()","new()","new()","new()","new()","new()","new()","read()","read_annotation()","read_collection()","read_document()","read_infon()","read_int()","read_location()","read_node()","read_passage()","read_recursive()","read_relation()","read_sentence()","read_text()","to_c()","to_c()","to_c()","to_c()","to_s()","to_s()","to_s()","to_s()","to_xml()","write()","write_annotation()","write_collection()","write_document()","write_infon()","write_location()","write_node()","write_passage()","write_relation()","write_sentence()","readme"],"longSearchIndex":["biocreader","biocwriter","simplebioc","simplebioc::annotation","simplebioc::collection","simplebioc::document","simplebioc::location","simplebioc::node","simplebioc::nodebase","simplebioc::passage","simplebioc::relation","simplebioc::sentence","simplebioc::document#adjust_ref()","simplebioc::node#adjust_ref()","simplebioc::relation#adjust_ref()","simplebioc::document#each_relation()","simplebioc::passage#each_relation()","simplebioc::sentence#each_relation()","simplebioc::document#find_node()","simplebioc::passage#find_node()","simplebioc::sentence#find_node()","simplebioc#from_xml()","simplebioc::annotation::new()","simplebioc::collection::new()","simplebioc::document::new()","simplebioc::location::new()","simplebioc::node::new()","simplebioc::nodebase::new()","simplebioc::passage::new()","simplebioc::relation::new()","simplebioc::sentence::new()","biocreader#read()","biocreader#read_annotation()","biocreader#read_collection()","biocreader#read_document()","biocreader#read_infon()","biocreader#read_int()","biocreader#read_location()","biocreader#read_node()","biocreader#read_passage()","biocreader#read_recursive()","biocreader#read_relation()","biocreader#read_sentence()","biocreader#read_text()","simplebioc::collection#to_c()","simplebioc::node#to_c()","simplebioc::relation#to_c()","simplebioc::sentence#to_c()","simplebioc::annotation#to_s()","simplebioc::document#to_s()","simplebioc::location#to_s()","simplebioc::passage#to_s()","simplebioc#to_xml()","biocwriter#write()","biocwriter#write_annotation()","biocwriter#write_collection()","biocwriter#write_document()","biocwriter#write_infon()","biocwriter#write_location()","biocwriter#write_node()","biocwriter#write_passage()","biocwriter#write_relation()","biocwriter#write_sentence()",""],"info":[["BioCReader","","BioCReader.html","",""],["BioCWriter","","BioCWriter.html","",""],["SimpleBioC","","SimpleBioC.html","","<p>SimpleBioC main library\n"],["SimpleBioC::Annotation","","SimpleBioC/Annotation.html","",""],["SimpleBioC::Collection","","SimpleBioC/Collection.html","",""],["SimpleBioC::Document","","SimpleBioC/Document.html","",""],["SimpleBioC::Location","","SimpleBioC/Location.html","",""],["SimpleBioC::Node","","SimpleBioC/Node.html","",""],["SimpleBioC::NodeBase","","SimpleBioC/NodeBase.html","","<p>NodeBase is not
|
1
|
+
var search_data = {"index":{"searchIndex":["biocreader","biocwriter","simplebioc","annotation","collection","document","location","node","nodebase","passage","relation","sentence","adjust_ref()","adjust_ref()","adjust_ref()","each_relation()","each_relation()","each_relation()","find_node()","find_node()","find_node()","from_xml()","new()","new()","new()","new()","new()","new()","new()","new()","new()","read()","read_annotation()","read_collection()","read_document()","read_infon()","read_int()","read_location()","read_node()","read_passage()","read_recursive()","read_relation()","read_sentence()","read_text()","to_c()","to_c()","to_c()","to_c()","to_s()","to_s()","to_s()","to_s()","to_xml()","write()","write_annotation()","write_collection()","write_document()","write_infon()","write_location()","write_node()","write_passage()","write_relation()","write_sentence()","readme"],"longSearchIndex":["biocreader","biocwriter","simplebioc","simplebioc::annotation","simplebioc::collection","simplebioc::document","simplebioc::location","simplebioc::node","simplebioc::nodebase","simplebioc::passage","simplebioc::relation","simplebioc::sentence","simplebioc::document#adjust_ref()","simplebioc::node#adjust_ref()","simplebioc::relation#adjust_ref()","simplebioc::document#each_relation()","simplebioc::passage#each_relation()","simplebioc::sentence#each_relation()","simplebioc::document#find_node()","simplebioc::passage#find_node()","simplebioc::sentence#find_node()","simplebioc#from_xml()","simplebioc::annotation::new()","simplebioc::collection::new()","simplebioc::document::new()","simplebioc::location::new()","simplebioc::node::new()","simplebioc::nodebase::new()","simplebioc::passage::new()","simplebioc::relation::new()","simplebioc::sentence::new()","biocreader#read()","biocreader#read_annotation()","biocreader#read_collection()","biocreader#read_document()","biocreader#read_infon()","biocreader#read_int()","biocreader#read_location()","biocreader#read_node()","biocreader#read_passage()","biocreader#read_recursive()","biocreader#read_relation()","biocreader#read_sentence()","biocreader#read_text()","simplebioc::collection#to_c()","simplebioc::node#to_c()","simplebioc::relation#to_c()","simplebioc::sentence#to_c()","simplebioc::annotation#to_s()","simplebioc::document#to_s()","simplebioc::location#to_s()","simplebioc::passage#to_s()","simplebioc#to_xml()","biocwriter#write()","biocwriter#write_annotation()","biocwriter#write_collection()","biocwriter#write_document()","biocwriter#write_infon()","biocwriter#write_location()","biocwriter#write_node()","biocwriter#write_passage()","biocwriter#write_relation()","biocwriter#write_sentence()",""],"info":[["BioCReader","","BioCReader.html","",""],["BioCWriter","","BioCWriter.html","",""],["SimpleBioC","","SimpleBioC.html","","<p>SimpleBioC main library\n"],["SimpleBioC::Annotation","","SimpleBioC/Annotation.html","",""],["SimpleBioC::Collection","","SimpleBioC/Collection.html","",""],["SimpleBioC::Document","","SimpleBioC/Document.html","",""],["SimpleBioC::Location","","SimpleBioC/Location.html","",""],["SimpleBioC::Node","","SimpleBioC/Node.html","",""],["SimpleBioC::NodeBase","","SimpleBioC/NodeBase.html","","<p>NodeBase is not a BioC DTD entity. This is a super class of Annotation\n& Relation.\n"],["SimpleBioC::Passage","","SimpleBioC/Passage.html","",""],["SimpleBioC::Relation","","SimpleBioC/Relation.html","",""],["SimpleBioC::Sentence","","SimpleBioC/Sentence.html","",""],["adjust_ref","SimpleBioC::Document","SimpleBioC/Document.html#method-i-adjust_ref","()",""],["adjust_ref","SimpleBioC::Node","SimpleBioC/Node.html#method-i-adjust_ref","()",""],["adjust_ref","SimpleBioC::Relation","SimpleBioC/Relation.html#method-i-adjust_ref","()",""],["each_relation","SimpleBioC::Document","SimpleBioC/Document.html#method-i-each_relation","()",""],["each_relation","SimpleBioC::Passage","SimpleBioC/Passage.html#method-i-each_relation","()",""],["each_relation","SimpleBioC::Sentence","SimpleBioC/Sentence.html#method-i-each_relation","()",""],["find_node","SimpleBioC::Document","SimpleBioC/Document.html#method-i-find_node","(id)",""],["find_node","SimpleBioC::Passage","SimpleBioC/Passage.html#method-i-find_node","(id)",""],["find_node","SimpleBioC::Sentence","SimpleBioC/Sentence.html#method-i-find_node","(id)",""],["from_xml","SimpleBioC","SimpleBioC.html#method-i-from_xml","(file_path, options = {})","<p>parse a BioC XML file in the given path and convert it into a collection\ninstance\n<p>Arguments\n<p><code>file_path</code> - …\n"],["new","SimpleBioC::Annotation","SimpleBioC/Annotation.html#method-c-new","(parent)",""],["new","SimpleBioC::Collection","SimpleBioC/Collection.html#method-c-new","()",""],["new","SimpleBioC::Document","SimpleBioC/Document.html#method-c-new","(parent)",""],["new","SimpleBioC::Location","SimpleBioC/Location.html#method-c-new","(parent)",""],["new","SimpleBioC::Node","SimpleBioC/Node.html#method-c-new","(parent)",""],["new","SimpleBioC::NodeBase","SimpleBioC/NodeBase.html#method-c-new","(parent)",""],["new","SimpleBioC::Passage","SimpleBioC/Passage.html#method-c-new","(parent)",""],["new","SimpleBioC::Relation","SimpleBioC/Relation.html#method-c-new","(parent)",""],["new","SimpleBioC::Sentence","SimpleBioC/Sentence.html#method-c-new","(parent)",""],["read","BioCReader","BioCReader.html#method-i-read","(path, options)",""],["read_annotation","BioCReader","BioCReader.html#method-i-read_annotation","(xml, annotation, options = {})",""],["read_collection","BioCReader","BioCReader.html#method-i-read_collection","(xml, collection, options = {})",""],["read_document","BioCReader","BioCReader.html#method-i-read_document","(xml, document, options = {})",""],["read_infon","BioCReader","BioCReader.html#method-i-read_infon","(xml, obj)",""],["read_int","BioCReader","BioCReader.html#method-i-read_int","(xml, name)",""],["read_location","BioCReader","BioCReader.html#method-i-read_location","(xml, location, options = {})",""],["read_node","BioCReader","BioCReader.html#method-i-read_node","(xml, node, options = {})",""],["read_passage","BioCReader","BioCReader.html#method-i-read_passage","(xml, passage, options = {})",""],["read_recursive","BioCReader","BioCReader.html#method-i-read_recursive","(xml, obj, name, options = {})",""],["read_relation","BioCReader","BioCReader.html#method-i-read_relation","(xml, relation, options = {})",""],["read_sentence","BioCReader","BioCReader.html#method-i-read_sentence","(xml, sentence, options = {})",""],["read_text","BioCReader","BioCReader.html#method-i-read_text","(xml, name)",""],["to_c","SimpleBioC::Collection","SimpleBioC/Collection.html#method-i-to_c","()",""],["to_c","SimpleBioC::Node","SimpleBioC/Node.html#method-i-to_c","()",""],["to_c","SimpleBioC::Relation","SimpleBioC/Relation.html#method-i-to_c","()",""],["to_c","SimpleBioC::Sentence","SimpleBioC/Sentence.html#method-i-to_c","()",""],["to_s","SimpleBioC::Annotation","SimpleBioC/Annotation.html#method-i-to_s","()",""],["to_s","SimpleBioC::Document","SimpleBioC/Document.html#method-i-to_s","()",""],["to_s","SimpleBioC::Location","SimpleBioC/Location.html#method-i-to_s","()",""],["to_s","SimpleBioC::Passage","SimpleBioC/Passage.html#method-i-to_s","()",""],["to_xml","SimpleBioC","SimpleBioC.html#method-i-to_xml","(collection, options = {})","<p>convert a collection instance to a BioC XML text. Output will return as\nstring\n<p>Arguments\n<p><code>collection</code> - …\n"],["write","BioCWriter","BioCWriter.html#method-i-write","(collection, options = {})",""],["write_annotation","BioCWriter","BioCWriter.html#method-i-write_annotation","(xml, annotation)",""],["write_collection","BioCWriter","BioCWriter.html#method-i-write_collection","(xml, collection)",""],["write_document","BioCWriter","BioCWriter.html#method-i-write_document","(xml, document)",""],["write_infon","BioCWriter","BioCWriter.html#method-i-write_infon","(xml, obj)",""],["write_location","BioCWriter","BioCWriter.html#method-i-write_location","(xml, location)",""],["write_node","BioCWriter","BioCWriter.html#method-i-write_node","(xml, node)",""],["write_passage","BioCWriter","BioCWriter.html#method-i-write_passage","(xml, passage)",""],["write_relation","BioCWriter","BioCWriter.html#method-i-write_relation","(xml, relation)",""],["write_sentence","BioCWriter","BioCWriter.html#method-i-write_sentence","(xml, sentence)",""],["README","","README_md.html","","<p>SimpleBioC\n<p>SimpleBioC is a simple parser / builder for BioC data format. BioC is a\nsimple XML format to …\n"]]}}
|
data/html/table_of_contents.html
CHANGED
@@ -30,10 +30,13 @@
|
|
30
30
|
|
31
31
|
<img class="toc-toggle" src="images/transparent.png" alt="" title="toggle headings">
|
32
32
|
<ul class="initially-hidden">
|
33
|
-
<li><a href="README_md.html#label-
|
33
|
+
<li><a href="README_md.html#label-SimpleBioC">SimpleBioC</a>
|
34
34
|
<li><a href="README_md.html#label-Feature%3A">Feature:</a>
|
35
35
|
<li><a href="README_md.html#label-Installation">Installation</a>
|
36
|
-
<li><a href="README_md.html#label-Usages">Usages</a>
|
36
|
+
<li><a href="README_md.html#label-Simple+Usages">Simple Usages</a>
|
37
|
+
<li><a href="README_md.html#label-Options">Options</a>
|
38
|
+
<li><a href="README_md.html#label-Specify+set+of+%3Cdocument%3Es+to+parse">Specify set of <document>s to parse</a>
|
39
|
+
<li><a href="README_md.html#label-No+whitespace+in+output">No whitespace in output</a>
|
37
40
|
<li><a href="README_md.html#label-Sample">Sample</a>
|
38
41
|
<li><a href="README_md.html#label-Contributing">Contributing</a>
|
39
42
|
<li><a href="README_md.html#label-LICENSE">LICENSE</a>
|
@@ -4,7 +4,7 @@ Dir[File.dirname(__FILE__) + '/*.rb'].each {|file| require file }
|
|
4
4
|
module BioCReader
|
5
5
|
module_function
|
6
6
|
|
7
|
-
def read(path)
|
7
|
+
def read(path, options)
|
8
8
|
collection = nil
|
9
9
|
File.open(path) do |file|
|
10
10
|
xml_doc = Nokogiri::XML(file) do |config|
|
@@ -15,7 +15,7 @@ module BioCReader
|
|
15
15
|
fail 'Wrong format'
|
16
16
|
end
|
17
17
|
collection = SimpleBioC::Collection.new
|
18
|
-
read_collection(xml, collection)
|
18
|
+
read_collection(xml, collection, options)
|
19
19
|
end
|
20
20
|
|
21
21
|
collection
|
@@ -35,68 +35,78 @@ module BioCReader
|
|
35
35
|
xml.xpath("infon").each{ |i| obj.infons[i["key"]] = i.content}
|
36
36
|
end
|
37
37
|
|
38
|
-
def read_recursive(xml, obj, name)
|
38
|
+
def read_recursive(xml, obj, name, options = {})
|
39
39
|
target_class = SimpleBioC.const_get(name.capitalize)
|
40
40
|
xml.xpath(name).each do |node|
|
41
41
|
instance = target_class.new(obj)
|
42
|
-
send(:"read_#{name}", node, instance)
|
43
|
-
obj.instance_variable_get(:"@#{name}s") << instance
|
42
|
+
ret = send(:"read_#{name}", node, instance, options)
|
43
|
+
obj.instance_variable_get(:"@#{name}s") << instance if ret
|
44
44
|
end
|
45
45
|
end
|
46
46
|
|
47
|
-
def read_collection(xml, collection)
|
47
|
+
def read_collection(xml, collection, options = {})
|
48
48
|
collection.source = read_text(xml, "source")
|
49
49
|
collection.date = read_text(xml, "date")
|
50
50
|
collection.key = read_text(xml, "key")
|
51
51
|
read_infon(xml, collection)
|
52
|
-
read_recursive(xml, collection, "document")
|
52
|
+
read_recursive(xml, collection, "document", options)
|
53
53
|
end
|
54
54
|
|
55
|
-
def read_document(xml, document)
|
55
|
+
def read_document(xml, document, options = {})
|
56
56
|
document.id = read_text(xml, "id")
|
57
|
+
if options[:documents].kind_of?(Array) && !options[:documents].include?(document.id)
|
58
|
+
return false
|
59
|
+
end
|
57
60
|
read_infon(xml, document)
|
58
61
|
read_recursive(xml, document, "passage")
|
59
62
|
read_recursive(xml, document, "relation")
|
60
63
|
document.adjust_ref
|
64
|
+
true
|
61
65
|
end
|
62
66
|
|
63
|
-
def read_passage(xml, passage)
|
67
|
+
def read_passage(xml, passage, options = {})
|
64
68
|
passage.text = read_text(xml, "text")
|
65
69
|
passage.offset = read_int(xml, "offset")
|
66
70
|
read_infon(xml, passage)
|
67
71
|
read_recursive(xml, passage, "sentence")
|
68
72
|
read_recursive(xml, passage, "annotation")
|
69
73
|
read_recursive(xml, passage, "relation")
|
74
|
+
true
|
70
75
|
end
|
71
76
|
|
72
|
-
def read_sentence(xml, sentence)
|
77
|
+
def read_sentence(xml, sentence, options = {})
|
73
78
|
sentence.text = read_text(xml, "text")
|
74
79
|
sentence.offset = read_int(xml, "offset")
|
75
80
|
read_infon(xml, sentence)
|
76
81
|
read_recursive(xml, sentence, "annotation")
|
77
82
|
read_recursive(xml, sentence, "relation")
|
83
|
+
true
|
78
84
|
end
|
79
85
|
|
80
|
-
def read_annotation(xml, annotation)
|
86
|
+
def read_annotation(xml, annotation, options = {})
|
81
87
|
annotation.id = xml["id"]
|
82
88
|
annotation.text = read_text(xml, "text")
|
83
89
|
read_infon(xml, annotation)
|
84
90
|
read_recursive(xml, annotation, "location")
|
91
|
+
true
|
85
92
|
end
|
86
93
|
|
87
|
-
def read_relation(xml, relation)
|
94
|
+
def read_relation(xml, relation, options = {})
|
88
95
|
relation.id = xml["id"]
|
89
96
|
read_infon(xml, relation)
|
90
97
|
read_recursive(xml, relation, "node")
|
98
|
+
true
|
91
99
|
end
|
92
100
|
|
93
|
-
def read_location(xml, location)
|
101
|
+
def read_location(xml, location, options = {})
|
94
102
|
location.offset = xml["offset"]
|
95
103
|
location.length = xml["length"]
|
104
|
+
true
|
96
105
|
end
|
97
106
|
|
98
|
-
def read_node(xml, node)
|
107
|
+
def read_node(xml, node, options = {})
|
99
108
|
node.refid = xml["refid"]
|
100
109
|
node.role = xml["role"]
|
110
|
+
true
|
101
111
|
end
|
102
112
|
end
|
@@ -3,11 +3,13 @@ Dir[File.dirname(__FILE__) + '/*.rb'].each {|file| require file }
|
|
3
3
|
|
4
4
|
module BioCWriter
|
5
5
|
module_function
|
6
|
-
def write(collection)
|
6
|
+
def write(collection, options = {})
|
7
|
+
options[:save_with] = 1 if options[:save_with].nil?
|
7
8
|
builder = Nokogiri::XML::Builder.new(:encoding => 'UTF-8') do |xml|
|
9
|
+
xml.doc.create_internal_subset( 'collection', nil, 'BioC.dtd' )
|
8
10
|
write_collection(xml, collection)
|
9
11
|
end
|
10
|
-
builder.to_xml
|
12
|
+
builder.to_xml(options)
|
11
13
|
end
|
12
14
|
|
13
15
|
def write_infon(xml, obj)
|
data/lib/simple_bioc/version.rb
CHANGED
data/lib/simple_bioc.rb
CHANGED
@@ -1,3 +1,4 @@
|
|
1
|
+
# @author Dongseop Kwon
|
1
2
|
require "simple_bioc/version"
|
2
3
|
require "simple_bioc/bioc_reader"
|
3
4
|
require "simple_bioc/bioc_writer"
|
@@ -7,12 +8,35 @@ module SimpleBioC
|
|
7
8
|
module_function
|
8
9
|
|
9
10
|
# parse a BioC XML file in the given path and convert it into a collection instance
|
10
|
-
|
11
|
-
|
11
|
+
#
|
12
|
+
# ==== Arguments
|
13
|
+
# * +file_path+ - file path for parse
|
14
|
+
# * +options+ - (optional) additional options
|
15
|
+
#
|
16
|
+
# ==== Options
|
17
|
+
# * +documents+ - specify IDs of documents to parse. The result will include only the specified documents
|
18
|
+
#
|
19
|
+
# ==== Examples
|
20
|
+
# collection = SimpleBioC.from_xml("./xml/everything.xml")
|
21
|
+
# collection = SimpleBioC.from_xml("./xml/lemma.xml", {documents:[21785578, 21488974]})
|
22
|
+
def from_xml(file_path, options = {})
|
23
|
+
options[:documents] = options[:documents].map{|e| e.to_s} if options[:documents].kind_of?(Array)
|
24
|
+
BioCReader.read(file_path, options)
|
12
25
|
end
|
13
26
|
|
14
|
-
# convert a collection instance to a BioC XML text
|
15
|
-
|
16
|
-
|
27
|
+
# convert a collection instance to a BioC XML text. Output will return as string
|
28
|
+
#
|
29
|
+
# ==== Arguments
|
30
|
+
# * +collection+ - Collection instance to process
|
31
|
+
# * +options+ - (optional) additional options
|
32
|
+
#
|
33
|
+
# ==== Options
|
34
|
+
# * +save_with+ - SaveOption for Nokorigi. If you set this 0, output has no format (no indentation, no whitespace)
|
35
|
+
#
|
36
|
+
# ==== Examples
|
37
|
+
# output = SimpleBioC.to_xml(collection)
|
38
|
+
# output = SimpleBioC.to_xml(collection, {save_with: 0})
|
39
|
+
def to_xml(collection, options = {})
|
40
|
+
BioCWriter.write(collection, options)
|
17
41
|
end
|
18
42
|
end
|
data/simple_bioc.gemspec
CHANGED
@@ -8,7 +8,7 @@ Gem::Specification.new do |spec|
|
|
8
8
|
spec.version = SimpleBioC::VERSION
|
9
9
|
spec.authors = ["Dongseop Kwon"]
|
10
10
|
spec.email = ["dongseop@gmail.com"]
|
11
|
-
spec.description = "
|
11
|
+
spec.description = "SimpleBioC is a simple parser / builder for BioC data format. BioC is a simple XML format to share text documents and annotations. You can find more information about BioC from the official BioC web site (http://www.ncbi.nlm.nih.gov/CBBresearch/Dogan/BioC/)"
|
12
12
|
spec.summary = "Simple BioC parser/builder for ruby"
|
13
13
|
spec.homepage = "https://github.com/dongseop/simple_bioc"
|
14
14
|
spec.license = "MIT"
|
@@ -0,0 +1,13 @@
|
|
1
|
+
require 'simple_bioc'
|
2
|
+
require 'test_xml/spec'
|
3
|
+
describe "File Check" do
|
4
|
+
it "should be load successfully" do
|
5
|
+
Dir["./xml/*.xml"].each do |file_path|
|
6
|
+
puts file_path
|
7
|
+
collection = SimpleBioC.from_xml(file_path)
|
8
|
+
output = SimpleBioC.to_xml(collection)
|
9
|
+
expected = File.read(file_path)
|
10
|
+
expect(output).to equal_xml(expected)
|
11
|
+
end
|
12
|
+
end
|
13
|
+
end
|
data/spec/simple_bioc_spec.rb
CHANGED
@@ -1,14 +1,25 @@
|
|
1
|
-
# bowling_spec.rb
|
2
1
|
require 'simple_bioc'
|
3
2
|
require 'test_xml/spec'
|
4
|
-
describe
|
5
|
-
it "should
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
3
|
+
describe "Simple function" do
|
4
|
+
it "should have a DTD declaration" do
|
5
|
+
collection = SimpleBioC.from_xml("./xml/everything.xml")
|
6
|
+
output = SimpleBioC.to_xml(collection)
|
7
|
+
output.should include('<!DOCTYPE collection SYSTEM "BioC.dtd">')
|
8
|
+
end
|
9
|
+
|
10
|
+
it "should no space when save_with = 0" do
|
11
|
+
collection = SimpleBioC.from_xml("./xml/everything.xml")
|
12
|
+
output = SimpleBioC.to_xml(collection, {save_with: 0})
|
13
|
+
output.should include('<collection><source>Made up file to test that everything is allowed and processed. Has text in the passage.</source><date>20130426</date><key>everything.key</key><infon key="collection-infon-key">collection-infon-value</infon><document><id>1</id><infon key="document-infon-key">document-infon-value</infon><passage><infon key="passage-infon-key">passage-infon-value</infon><offset>0</offset><text>text of passage</text><annotation id="P1"><infon key="annotation-infon-key">annotation-infon-value</infon><text>annotation text</text><location offset="1" length="2"/></annotation><relation id="RP1"><infon key="passage-relation-infon-key">passage-relation-infon-value</infon><node refid="RP1" role="passage-relation"/></relation></passage><relation id="D1"><infon key="document-relation-infon-key">document-relation-infon-value</infon><node refid="RD1" role="document-relation"/></relation></document></collection>')
|
14
|
+
end
|
15
|
+
|
16
|
+
it "should process only one document by options" do
|
17
|
+
collection = SimpleBioC.from_xml("./xml/lemma.xml", {documents:[21785578, 21488974]})
|
18
|
+
output = SimpleBioC.to_xml(collection, {save_with: 0})
|
19
|
+
output.scan("<document>").size.should equal 2
|
20
|
+
output.should include("<document><id>21785578</id>")
|
21
|
+
output.should include("<document><id>21488974</id>")
|
22
|
+
output.should_not include("<document><id>21660417</id>")
|
23
|
+
output.should_not include("<document><id>21951408</id>")
|
13
24
|
end
|
14
25
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: simple_bioc
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Dongseop Kwon
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2013-12-
|
11
|
+
date: 2013-12-21 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: nokogiri
|
@@ -80,8 +80,9 @@ dependencies:
|
|
80
80
|
- - ~>
|
81
81
|
- !ruby/object:Gem::Version
|
82
82
|
version: 0.1.6
|
83
|
-
description:
|
84
|
-
to
|
83
|
+
description: SimpleBioC is a simple parser / builder for BioC data format. BioC is
|
84
|
+
a simple XML format to share text documents and annotations. You can find more information
|
85
|
+
about BioC from the official BioC web site (http://www.ncbi.nlm.nih.gov/CBBresearch/Dogan/BioC/)
|
85
86
|
email:
|
86
87
|
- dongseop@gmail.com
|
87
88
|
executables: []
|
@@ -158,6 +159,7 @@ files:
|
|
158
159
|
- samples/print_annotation.rb
|
159
160
|
- samples/sample1.rb
|
160
161
|
- simple_bioc.gemspec
|
162
|
+
- spec/file_check_spec.rb
|
161
163
|
- spec/simple_bioc_spec.rb
|
162
164
|
- xml/BioC.dtd
|
163
165
|
- xml/PMID-8557975-simplified-sentences-tokens.xml
|
@@ -200,4 +202,6 @@ signing_key:
|
|
200
202
|
specification_version: 4
|
201
203
|
summary: Simple BioC parser/builder for ruby
|
202
204
|
test_files:
|
205
|
+
- spec/file_check_spec.rb
|
203
206
|
- spec/simple_bioc_spec.rb
|
207
|
+
has_rdoc:
|