simple_bioc 0.0.2 → 0.0.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +17 -3
- data/Rakefile +2 -1
- data/html/BioCReader.html +40 -30
- data/html/BioCWriter.html +14 -12
- data/html/README_md.html +31 -5
- data/html/SimpleBioC/NodeBase.html +3 -3
- data/html/SimpleBioC.html +48 -9
- data/html/created.rid +7 -7
- data/html/index.html +26 -3
- data/html/js/search_index.js +1 -1
- data/html/table_of_contents.html +5 -2
- data/lib/simple_bioc/bioc_reader.rb +24 -14
- data/lib/simple_bioc/bioc_writer.rb +4 -2
- data/lib/simple_bioc/version.rb +1 -1
- data/lib/simple_bioc.rb +29 -5
- data/simple_bioc.gemspec +1 -1
- data/spec/file_check_spec.rb +13 -0
- data/spec/simple_bioc_spec.rb +21 -10
- metadata +8 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: ac0961af71c5744bc71ea16221f5325af35e4583
|
4
|
+
data.tar.gz: 9f407b566a8dc6687d53f8145fc600f3d73039eb
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: ab77aa143472f5959a0ea97702ae68dba76bb6b75542086a065b625216234d8912a52d83a087f6acf3c553f0ee8adf5050ebafd94833171d9dd84d7a34124e0a
|
7
|
+
data.tar.gz: b02aa1b1d73ff1d0c6ed2650c0170ff88e22fd01a6e6df7d9508cf365e548bd6bf0413616a80a765b2fc35f9d3e1da2d20edf41cece737ceeb26577d39068251
|
data/README.md
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
#
|
1
|
+
# SimpleBioC
|
2
2
|
|
3
3
|
SimpleBioC is a simple parser / builder for BioC data format. BioC is a simple XML format to share text documents and annotations. You can find more information about BioC from the official BioC web site ([http://www.ncbi.nlm.nih.gov/CBBresearch/Dogan/BioC/](http://www.ncbi.nlm.nih.gov/CBBresearch/Dogan/BioC/))
|
4
4
|
|
@@ -24,7 +24,7 @@ Or install it yourself as:
|
|
24
24
|
$ gem install simple_bioc
|
25
25
|
|
26
26
|
|
27
|
-
## Usages
|
27
|
+
## Simple Usages
|
28
28
|
|
29
29
|
Include library
|
30
30
|
|
@@ -35,7 +35,7 @@ Parse with a file name (path)
|
|
35
35
|
|
36
36
|
collection = SimpleBioC::from_xml(filename)
|
37
37
|
|
38
|
-
Traverse & Manipulate Data. Data structure are almost the same as the DTD. Please refer [library documents](
|
38
|
+
Traverse & Manipulate Data. Data structure are almost the same as the DTD. Please refer [library documents](http://rubydoc.info/gems/simple_bioc/0.0.2/frames) and [the BioC DTD](http://www.ncbi.nlm.nih.gov/CBBresearch/Dogan/BioC/BioCDTD.html).
|
39
39
|
|
40
40
|
puts collection.documents[2].passages[0].text
|
41
41
|
|
@@ -43,6 +43,20 @@ Build XML text from data
|
|
43
43
|
|
44
44
|
puts SimpleBioC::to_xml(collection)
|
45
45
|
|
46
|
+
## Options
|
47
|
+
|
48
|
+
### Specify set of <document>s to parse
|
49
|
+
|
50
|
+
You can parse only a set of document elements in a large xml document instead of parsing all the document elements. It may decrease the processing time. For example, the following code will return a collection with two documents ("1234", "4567").
|
51
|
+
|
52
|
+
collection = SimpleBioc::from_xml(filename, {documents: ["1234", "4567"]})
|
53
|
+
|
54
|
+
### No whitespace in output
|
55
|
+
|
56
|
+
By default, outputs of SimpleBioC::to_xml() will be formatted with whitespace. If you do not want this whitespace, you should pass 'save_with' option with 0 to the to_xml() function.
|
57
|
+
|
58
|
+
puts SimpleBioC::to_xml(collection, {save_with:0})
|
59
|
+
|
46
60
|
|
47
61
|
## Sample
|
48
62
|
|
data/Rakefile
CHANGED
@@ -1,6 +1,7 @@
|
|
1
1
|
require "bundler/gem_tasks"
|
2
2
|
require 'rspec/core/rake_task'
|
3
3
|
require 'rdoc/task'
|
4
|
+
require 'yard'
|
4
5
|
|
5
6
|
task :default => [:spec]
|
6
7
|
RSpec::Core::RakeTask.new do |t|
|
@@ -10,4 +11,4 @@ end
|
|
10
11
|
Rake::RDocTask.new do |rd|
|
11
12
|
rd.main = "README.md"
|
12
13
|
rd.rdoc_files.include("README.md", "lib/**/*.rb")
|
13
|
-
end
|
14
|
+
end
|
data/html/BioCReader.html
CHANGED
@@ -173,7 +173,7 @@
|
|
173
173
|
|
174
174
|
<div class="method-heading">
|
175
175
|
<span class="method-name">read</span><span
|
176
|
-
class="method-args">(path)</span>
|
176
|
+
class="method-args">(path, options)</span>
|
177
177
|
|
178
178
|
<span class="method-click-advice">click to toggle source</span>
|
179
179
|
|
@@ -189,7 +189,7 @@
|
|
189
189
|
|
190
190
|
<div class="method-source-code" id="read-source">
|
191
191
|
<pre><span class="ruby-comment"># File lib/simple_bioc/bioc_reader.rb, line 7</span>
|
192
|
-
<span class="ruby-keyword">def</span> <span class="ruby-identifier">read</span>(<span class="ruby-identifier">path</span>)
|
192
|
+
<span class="ruby-keyword">def</span> <span class="ruby-identifier">read</span>(<span class="ruby-identifier">path</span>, <span class="ruby-identifier">options</span>)
|
193
193
|
<span class="ruby-identifier">collection</span> = <span class="ruby-keyword">nil</span>
|
194
194
|
<span class="ruby-constant">File</span>.<span class="ruby-identifier">open</span>(<span class="ruby-identifier">path</span>) <span class="ruby-keyword">do</span> <span class="ruby-operator">|</span><span class="ruby-identifier">file</span><span class="ruby-operator">|</span>
|
195
195
|
<span class="ruby-identifier">xml_doc</span> = <span class="ruby-constant">Nokogiri</span><span class="ruby-operator">::</span><span class="ruby-constant">XML</span>(<span class="ruby-identifier">file</span>) <span class="ruby-keyword">do</span> <span class="ruby-operator">|</span><span class="ruby-identifier">config</span><span class="ruby-operator">|</span>
|
@@ -200,7 +200,7 @@
|
|
200
200
|
<span class="ruby-identifier">fail</span> <span class="ruby-string">'Wrong format'</span>
|
201
201
|
<span class="ruby-keyword">end</span>
|
202
202
|
<span class="ruby-identifier">collection</span> = <span class="ruby-constant">SimpleBioC</span><span class="ruby-operator">::</span><span class="ruby-constant">Collection</span>.<span class="ruby-identifier">new</span>
|
203
|
-
<span class="ruby-identifier">read_collection</span>(<span class="ruby-identifier">xml</span>, <span class="ruby-identifier">collection</span>)
|
203
|
+
<span class="ruby-identifier">read_collection</span>(<span class="ruby-identifier">xml</span>, <span class="ruby-identifier">collection</span>, <span class="ruby-identifier">options</span>)
|
204
204
|
<span class="ruby-keyword">end</span>
|
205
205
|
|
206
206
|
<span class="ruby-identifier">collection</span>
|
@@ -219,7 +219,7 @@
|
|
219
219
|
|
220
220
|
<div class="method-heading">
|
221
221
|
<span class="method-name">read_annotation</span><span
|
222
|
-
class="method-args">(xml, annotation)</span>
|
222
|
+
class="method-args">(xml, annotation, options = {})</span>
|
223
223
|
|
224
224
|
<span class="method-click-advice">click to toggle source</span>
|
225
225
|
|
@@ -234,12 +234,13 @@
|
|
234
234
|
|
235
235
|
|
236
236
|
<div class="method-source-code" id="read_annotation-source">
|
237
|
-
<pre><span class="ruby-comment"># File lib/simple_bioc/bioc_reader.rb, line
|
238
|
-
<span class="ruby-keyword">def</span> <span class="ruby-identifier">read_annotation</span>(<span class="ruby-identifier">xml</span>, <span class="ruby-identifier">annotation</span>)
|
237
|
+
<pre><span class="ruby-comment"># File lib/simple_bioc/bioc_reader.rb, line 86</span>
|
238
|
+
<span class="ruby-keyword">def</span> <span class="ruby-identifier">read_annotation</span>(<span class="ruby-identifier">xml</span>, <span class="ruby-identifier">annotation</span>, <span class="ruby-identifier">options</span> = {})
|
239
239
|
<span class="ruby-identifier">annotation</span>.<span class="ruby-identifier">id</span> = <span class="ruby-identifier">xml</span>[<span class="ruby-string">"id"</span>]
|
240
240
|
<span class="ruby-identifier">annotation</span>.<span class="ruby-identifier">text</span> = <span class="ruby-identifier">read_text</span>(<span class="ruby-identifier">xml</span>, <span class="ruby-string">"text"</span>)
|
241
241
|
<span class="ruby-identifier">read_infon</span>(<span class="ruby-identifier">xml</span>, <span class="ruby-identifier">annotation</span>)
|
242
242
|
<span class="ruby-identifier">read_recursive</span>(<span class="ruby-identifier">xml</span>, <span class="ruby-identifier">annotation</span>, <span class="ruby-string">"location"</span>)
|
243
|
+
<span class="ruby-keyword">true</span>
|
243
244
|
<span class="ruby-keyword">end</span></pre>
|
244
245
|
</div><!-- read_annotation-source -->
|
245
246
|
|
@@ -255,7 +256,7 @@
|
|
255
256
|
|
256
257
|
<div class="method-heading">
|
257
258
|
<span class="method-name">read_collection</span><span
|
258
|
-
class="method-args">(xml, collection)</span>
|
259
|
+
class="method-args">(xml, collection, options = {})</span>
|
259
260
|
|
260
261
|
<span class="method-click-advice">click to toggle source</span>
|
261
262
|
|
@@ -271,12 +272,12 @@
|
|
271
272
|
|
272
273
|
<div class="method-source-code" id="read_collection-source">
|
273
274
|
<pre><span class="ruby-comment"># File lib/simple_bioc/bioc_reader.rb, line 47</span>
|
274
|
-
<span class="ruby-keyword">def</span> <span class="ruby-identifier">read_collection</span>(<span class="ruby-identifier">xml</span>, <span class="ruby-identifier">collection</span>)
|
275
|
+
<span class="ruby-keyword">def</span> <span class="ruby-identifier">read_collection</span>(<span class="ruby-identifier">xml</span>, <span class="ruby-identifier">collection</span>, <span class="ruby-identifier">options</span> = {})
|
275
276
|
<span class="ruby-identifier">collection</span>.<span class="ruby-identifier">source</span> = <span class="ruby-identifier">read_text</span>(<span class="ruby-identifier">xml</span>, <span class="ruby-string">"source"</span>)
|
276
277
|
<span class="ruby-identifier">collection</span>.<span class="ruby-identifier">date</span> = <span class="ruby-identifier">read_text</span>(<span class="ruby-identifier">xml</span>, <span class="ruby-string">"date"</span>)
|
277
278
|
<span class="ruby-identifier">collection</span>.<span class="ruby-identifier">key</span> = <span class="ruby-identifier">read_text</span>(<span class="ruby-identifier">xml</span>, <span class="ruby-string">"key"</span>)
|
278
279
|
<span class="ruby-identifier">read_infon</span>(<span class="ruby-identifier">xml</span>, <span class="ruby-identifier">collection</span>)
|
279
|
-
<span class="ruby-identifier">read_recursive</span>(<span class="ruby-identifier">xml</span>, <span class="ruby-identifier">collection</span>, <span class="ruby-string">"document"</span>)
|
280
|
+
<span class="ruby-identifier">read_recursive</span>(<span class="ruby-identifier">xml</span>, <span class="ruby-identifier">collection</span>, <span class="ruby-string">"document"</span>, <span class="ruby-identifier">options</span>)
|
280
281
|
<span class="ruby-keyword">end</span></pre>
|
281
282
|
</div><!-- read_collection-source -->
|
282
283
|
|
@@ -292,7 +293,7 @@
|
|
292
293
|
|
293
294
|
<div class="method-heading">
|
294
295
|
<span class="method-name">read_document</span><span
|
295
|
-
class="method-args">(xml, document)</span>
|
296
|
+
class="method-args">(xml, document, options = {})</span>
|
296
297
|
|
297
298
|
<span class="method-click-advice">click to toggle source</span>
|
298
299
|
|
@@ -308,12 +309,16 @@
|
|
308
309
|
|
309
310
|
<div class="method-source-code" id="read_document-source">
|
310
311
|
<pre><span class="ruby-comment"># File lib/simple_bioc/bioc_reader.rb, line 55</span>
|
311
|
-
<span class="ruby-keyword">def</span> <span class="ruby-identifier">read_document</span>(<span class="ruby-identifier">xml</span>, <span class="ruby-identifier">document</span>)
|
312
|
+
<span class="ruby-keyword">def</span> <span class="ruby-identifier">read_document</span>(<span class="ruby-identifier">xml</span>, <span class="ruby-identifier">document</span>, <span class="ruby-identifier">options</span> = {})
|
312
313
|
<span class="ruby-identifier">document</span>.<span class="ruby-identifier">id</span> = <span class="ruby-identifier">read_text</span>(<span class="ruby-identifier">xml</span>, <span class="ruby-string">"id"</span>)
|
314
|
+
<span class="ruby-keyword">if</span> <span class="ruby-identifier">options</span>[<span class="ruby-value">:documents</span>].<span class="ruby-identifier">kind_of?</span>(<span class="ruby-constant">Array</span>) <span class="ruby-operator">&&</span> <span class="ruby-operator">!</span><span class="ruby-identifier">options</span>[<span class="ruby-value">:documents</span>].<span class="ruby-identifier">include?</span>(<span class="ruby-identifier">document</span>.<span class="ruby-identifier">id</span>)
|
315
|
+
<span class="ruby-keyword">return</span> <span class="ruby-keyword">false</span>
|
316
|
+
<span class="ruby-keyword">end</span>
|
313
317
|
<span class="ruby-identifier">read_infon</span>(<span class="ruby-identifier">xml</span>, <span class="ruby-identifier">document</span>)
|
314
318
|
<span class="ruby-identifier">read_recursive</span>(<span class="ruby-identifier">xml</span>, <span class="ruby-identifier">document</span>, <span class="ruby-string">"passage"</span>)
|
315
319
|
<span class="ruby-identifier">read_recursive</span>(<span class="ruby-identifier">xml</span>, <span class="ruby-identifier">document</span>, <span class="ruby-string">"relation"</span>)
|
316
320
|
<span class="ruby-identifier">document</span>.<span class="ruby-identifier">adjust_ref</span>
|
321
|
+
<span class="ruby-keyword">true</span>
|
317
322
|
<span class="ruby-keyword">end</span></pre>
|
318
323
|
</div><!-- read_document-source -->
|
319
324
|
|
@@ -396,7 +401,7 @@
|
|
396
401
|
|
397
402
|
<div class="method-heading">
|
398
403
|
<span class="method-name">read_location</span><span
|
399
|
-
class="method-args">(xml, location)</span>
|
404
|
+
class="method-args">(xml, location, options = {})</span>
|
400
405
|
|
401
406
|
<span class="method-click-advice">click to toggle source</span>
|
402
407
|
|
@@ -411,10 +416,11 @@
|
|
411
416
|
|
412
417
|
|
413
418
|
<div class="method-source-code" id="read_location-source">
|
414
|
-
<pre><span class="ruby-comment"># File lib/simple_bioc/bioc_reader.rb, line
|
415
|
-
<span class="ruby-keyword">def</span> <span class="ruby-identifier">read_location</span>(<span class="ruby-identifier">xml</span>, <span class="ruby-identifier">location</span>)
|
419
|
+
<pre><span class="ruby-comment"># File lib/simple_bioc/bioc_reader.rb, line 101</span>
|
420
|
+
<span class="ruby-keyword">def</span> <span class="ruby-identifier">read_location</span>(<span class="ruby-identifier">xml</span>, <span class="ruby-identifier">location</span>, <span class="ruby-identifier">options</span> = {})
|
416
421
|
<span class="ruby-identifier">location</span>.<span class="ruby-identifier">offset</span> = <span class="ruby-identifier">xml</span>[<span class="ruby-string">"offset"</span>]
|
417
422
|
<span class="ruby-identifier">location</span>.<span class="ruby-identifier">length</span> = <span class="ruby-identifier">xml</span>[<span class="ruby-string">"length"</span>]
|
423
|
+
<span class="ruby-keyword">true</span>
|
418
424
|
<span class="ruby-keyword">end</span></pre>
|
419
425
|
</div><!-- read_location-source -->
|
420
426
|
|
@@ -430,7 +436,7 @@
|
|
430
436
|
|
431
437
|
<div class="method-heading">
|
432
438
|
<span class="method-name">read_node</span><span
|
433
|
-
class="method-args">(xml, node)</span>
|
439
|
+
class="method-args">(xml, node, options = {})</span>
|
434
440
|
|
435
441
|
<span class="method-click-advice">click to toggle source</span>
|
436
442
|
|
@@ -445,10 +451,11 @@
|
|
445
451
|
|
446
452
|
|
447
453
|
<div class="method-source-code" id="read_node-source">
|
448
|
-
<pre><span class="ruby-comment"># File lib/simple_bioc/bioc_reader.rb, line
|
449
|
-
<span class="ruby-keyword">def</span> <span class="ruby-identifier">read_node</span>(<span class="ruby-identifier">xml</span>, <span class="ruby-identifier">node</span>)
|
454
|
+
<pre><span class="ruby-comment"># File lib/simple_bioc/bioc_reader.rb, line 107</span>
|
455
|
+
<span class="ruby-keyword">def</span> <span class="ruby-identifier">read_node</span>(<span class="ruby-identifier">xml</span>, <span class="ruby-identifier">node</span>, <span class="ruby-identifier">options</span> = {})
|
450
456
|
<span class="ruby-identifier">node</span>.<span class="ruby-identifier">refid</span> = <span class="ruby-identifier">xml</span>[<span class="ruby-string">"refid"</span>]
|
451
457
|
<span class="ruby-identifier">node</span>.<span class="ruby-identifier">role</span> = <span class="ruby-identifier">xml</span>[<span class="ruby-string">"role"</span>]
|
458
|
+
<span class="ruby-keyword">true</span>
|
452
459
|
<span class="ruby-keyword">end</span></pre>
|
453
460
|
</div><!-- read_node-source -->
|
454
461
|
|
@@ -464,7 +471,7 @@
|
|
464
471
|
|
465
472
|
<div class="method-heading">
|
466
473
|
<span class="method-name">read_passage</span><span
|
467
|
-
class="method-args">(xml, passage)</span>
|
474
|
+
class="method-args">(xml, passage, options = {})</span>
|
468
475
|
|
469
476
|
<span class="method-click-advice">click to toggle source</span>
|
470
477
|
|
@@ -479,14 +486,15 @@
|
|
479
486
|
|
480
487
|
|
481
488
|
<div class="method-source-code" id="read_passage-source">
|
482
|
-
<pre><span class="ruby-comment"># File lib/simple_bioc/bioc_reader.rb, line
|
483
|
-
<span class="ruby-keyword">def</span> <span class="ruby-identifier">read_passage</span>(<span class="ruby-identifier">xml</span>, <span class="ruby-identifier">passage</span>)
|
489
|
+
<pre><span class="ruby-comment"># File lib/simple_bioc/bioc_reader.rb, line 67</span>
|
490
|
+
<span class="ruby-keyword">def</span> <span class="ruby-identifier">read_passage</span>(<span class="ruby-identifier">xml</span>, <span class="ruby-identifier">passage</span>, <span class="ruby-identifier">options</span> = {})
|
484
491
|
<span class="ruby-identifier">passage</span>.<span class="ruby-identifier">text</span> = <span class="ruby-identifier">read_text</span>(<span class="ruby-identifier">xml</span>, <span class="ruby-string">"text"</span>)
|
485
492
|
<span class="ruby-identifier">passage</span>.<span class="ruby-identifier">offset</span> = <span class="ruby-identifier">read_int</span>(<span class="ruby-identifier">xml</span>, <span class="ruby-string">"offset"</span>)
|
486
493
|
<span class="ruby-identifier">read_infon</span>(<span class="ruby-identifier">xml</span>, <span class="ruby-identifier">passage</span>)
|
487
494
|
<span class="ruby-identifier">read_recursive</span>(<span class="ruby-identifier">xml</span>, <span class="ruby-identifier">passage</span>, <span class="ruby-string">"sentence"</span>)
|
488
495
|
<span class="ruby-identifier">read_recursive</span>(<span class="ruby-identifier">xml</span>, <span class="ruby-identifier">passage</span>, <span class="ruby-string">"annotation"</span>)
|
489
496
|
<span class="ruby-identifier">read_recursive</span>(<span class="ruby-identifier">xml</span>, <span class="ruby-identifier">passage</span>, <span class="ruby-string">"relation"</span>)
|
497
|
+
<span class="ruby-keyword">true</span>
|
490
498
|
<span class="ruby-keyword">end</span></pre>
|
491
499
|
</div><!-- read_passage-source -->
|
492
500
|
|
@@ -502,7 +510,7 @@
|
|
502
510
|
|
503
511
|
<div class="method-heading">
|
504
512
|
<span class="method-name">read_recursive</span><span
|
505
|
-
class="method-args">(xml, obj, name)</span>
|
513
|
+
class="method-args">(xml, obj, name, options = {})</span>
|
506
514
|
|
507
515
|
<span class="method-click-advice">click to toggle source</span>
|
508
516
|
|
@@ -518,12 +526,12 @@
|
|
518
526
|
|
519
527
|
<div class="method-source-code" id="read_recursive-source">
|
520
528
|
<pre><span class="ruby-comment"># File lib/simple_bioc/bioc_reader.rb, line 38</span>
|
521
|
-
<span class="ruby-keyword">def</span> <span class="ruby-identifier">read_recursive</span>(<span class="ruby-identifier">xml</span>, <span class="ruby-identifier">obj</span>, <span class="ruby-identifier">name</span>)
|
529
|
+
<span class="ruby-keyword">def</span> <span class="ruby-identifier">read_recursive</span>(<span class="ruby-identifier">xml</span>, <span class="ruby-identifier">obj</span>, <span class="ruby-identifier">name</span>, <span class="ruby-identifier">options</span> = {})
|
522
530
|
<span class="ruby-identifier">target_class</span> = <span class="ruby-constant">SimpleBioC</span>.<span class="ruby-identifier">const_get</span>(<span class="ruby-identifier">name</span>.<span class="ruby-identifier">capitalize</span>)
|
523
531
|
<span class="ruby-identifier">xml</span>.<span class="ruby-identifier">xpath</span>(<span class="ruby-identifier">name</span>).<span class="ruby-identifier">each</span> <span class="ruby-keyword">do</span> <span class="ruby-operator">|</span><span class="ruby-identifier">node</span><span class="ruby-operator">|</span>
|
524
532
|
<span class="ruby-identifier">instance</span> = <span class="ruby-identifier">target_class</span>.<span class="ruby-identifier">new</span>(<span class="ruby-identifier">obj</span>)
|
525
|
-
<span class="ruby-identifier">send</span>(<span class="ruby-value">:"read_#{name}"</span>, <span class="ruby-identifier">node</span>, <span class="ruby-identifier">instance</span>)
|
526
|
-
<span class="ruby-identifier">obj</span>.<span class="ruby-identifier">instance_variable_get</span>(<span class="ruby-value">:"@#{name}s"</span>) <span class="ruby-operator"><<</span> <span class="ruby-identifier">instance</span>
|
533
|
+
<span class="ruby-identifier">ret</span> = <span class="ruby-identifier">send</span>(<span class="ruby-value">:"read_#{name}"</span>, <span class="ruby-identifier">node</span>, <span class="ruby-identifier">instance</span>, <span class="ruby-identifier">options</span>)
|
534
|
+
<span class="ruby-identifier">obj</span>.<span class="ruby-identifier">instance_variable_get</span>(<span class="ruby-value">:"@#{name}s"</span>) <span class="ruby-operator"><<</span> <span class="ruby-identifier">instance</span> <span class="ruby-keyword">if</span> <span class="ruby-identifier">ret</span>
|
527
535
|
<span class="ruby-keyword">end</span>
|
528
536
|
<span class="ruby-keyword">end</span></pre>
|
529
537
|
</div><!-- read_recursive-source -->
|
@@ -540,7 +548,7 @@
|
|
540
548
|
|
541
549
|
<div class="method-heading">
|
542
550
|
<span class="method-name">read_relation</span><span
|
543
|
-
class="method-args">(xml, relation)</span>
|
551
|
+
class="method-args">(xml, relation, options = {})</span>
|
544
552
|
|
545
553
|
<span class="method-click-advice">click to toggle source</span>
|
546
554
|
|
@@ -555,11 +563,12 @@
|
|
555
563
|
|
556
564
|
|
557
565
|
<div class="method-source-code" id="read_relation-source">
|
558
|
-
<pre><span class="ruby-comment"># File lib/simple_bioc/bioc_reader.rb, line
|
559
|
-
<span class="ruby-keyword">def</span> <span class="ruby-identifier">read_relation</span>(<span class="ruby-identifier">xml</span>, <span class="ruby-identifier">relation</span>)
|
566
|
+
<pre><span class="ruby-comment"># File lib/simple_bioc/bioc_reader.rb, line 94</span>
|
567
|
+
<span class="ruby-keyword">def</span> <span class="ruby-identifier">read_relation</span>(<span class="ruby-identifier">xml</span>, <span class="ruby-identifier">relation</span>, <span class="ruby-identifier">options</span> = {})
|
560
568
|
<span class="ruby-identifier">relation</span>.<span class="ruby-identifier">id</span> = <span class="ruby-identifier">xml</span>[<span class="ruby-string">"id"</span>]
|
561
569
|
<span class="ruby-identifier">read_infon</span>(<span class="ruby-identifier">xml</span>, <span class="ruby-identifier">relation</span>)
|
562
570
|
<span class="ruby-identifier">read_recursive</span>(<span class="ruby-identifier">xml</span>, <span class="ruby-identifier">relation</span>, <span class="ruby-string">"node"</span>)
|
571
|
+
<span class="ruby-keyword">true</span>
|
563
572
|
<span class="ruby-keyword">end</span></pre>
|
564
573
|
</div><!-- read_relation-source -->
|
565
574
|
|
@@ -575,7 +584,7 @@
|
|
575
584
|
|
576
585
|
<div class="method-heading">
|
577
586
|
<span class="method-name">read_sentence</span><span
|
578
|
-
class="method-args">(xml, sentence)</span>
|
587
|
+
class="method-args">(xml, sentence, options = {})</span>
|
579
588
|
|
580
589
|
<span class="method-click-advice">click to toggle source</span>
|
581
590
|
|
@@ -590,13 +599,14 @@
|
|
590
599
|
|
591
600
|
|
592
601
|
<div class="method-source-code" id="read_sentence-source">
|
593
|
-
<pre><span class="ruby-comment"># File lib/simple_bioc/bioc_reader.rb, line
|
594
|
-
<span class="ruby-keyword">def</span> <span class="ruby-identifier">read_sentence</span>(<span class="ruby-identifier">xml</span>, <span class="ruby-identifier">sentence</span>)
|
602
|
+
<pre><span class="ruby-comment"># File lib/simple_bioc/bioc_reader.rb, line 77</span>
|
603
|
+
<span class="ruby-keyword">def</span> <span class="ruby-identifier">read_sentence</span>(<span class="ruby-identifier">xml</span>, <span class="ruby-identifier">sentence</span>, <span class="ruby-identifier">options</span> = {})
|
595
604
|
<span class="ruby-identifier">sentence</span>.<span class="ruby-identifier">text</span> = <span class="ruby-identifier">read_text</span>(<span class="ruby-identifier">xml</span>, <span class="ruby-string">"text"</span>)
|
596
605
|
<span class="ruby-identifier">sentence</span>.<span class="ruby-identifier">offset</span> = <span class="ruby-identifier">read_int</span>(<span class="ruby-identifier">xml</span>, <span class="ruby-string">"offset"</span>)
|
597
606
|
<span class="ruby-identifier">read_infon</span>(<span class="ruby-identifier">xml</span>, <span class="ruby-identifier">sentence</span>)
|
598
607
|
<span class="ruby-identifier">read_recursive</span>(<span class="ruby-identifier">xml</span>, <span class="ruby-identifier">sentence</span>, <span class="ruby-string">"annotation"</span>)
|
599
608
|
<span class="ruby-identifier">read_recursive</span>(<span class="ruby-identifier">xml</span>, <span class="ruby-identifier">sentence</span>, <span class="ruby-string">"relation"</span>)
|
609
|
+
<span class="ruby-keyword">true</span>
|
600
610
|
<span class="ruby-keyword">end</span></pre>
|
601
611
|
</div><!-- read_sentence-source -->
|
602
612
|
|
data/html/BioCWriter.html
CHANGED
@@ -167,7 +167,7 @@
|
|
167
167
|
|
168
168
|
<div class="method-heading">
|
169
169
|
<span class="method-name">write</span><span
|
170
|
-
class="method-args">(collection)</span>
|
170
|
+
class="method-args">(collection, options = {})</span>
|
171
171
|
|
172
172
|
<span class="method-click-advice">click to toggle source</span>
|
173
173
|
|
@@ -183,11 +183,13 @@
|
|
183
183
|
|
184
184
|
<div class="method-source-code" id="write-source">
|
185
185
|
<pre><span class="ruby-comment"># File lib/simple_bioc/bioc_writer.rb, line 6</span>
|
186
|
-
<span class="ruby-keyword">def</span> <span class="ruby-identifier">write</span>(<span class="ruby-identifier">collection</span>)
|
186
|
+
<span class="ruby-keyword">def</span> <span class="ruby-identifier">write</span>(<span class="ruby-identifier">collection</span>, <span class="ruby-identifier">options</span> = {})
|
187
|
+
<span class="ruby-identifier">options</span>[<span class="ruby-value">:save_with</span>] = <span class="ruby-value">1</span> <span class="ruby-keyword">if</span> <span class="ruby-identifier">options</span>[<span class="ruby-value">:save_with</span>].<span class="ruby-identifier">nil?</span>
|
187
188
|
<span class="ruby-identifier">builder</span> = <span class="ruby-constant">Nokogiri</span><span class="ruby-operator">::</span><span class="ruby-constant">XML</span><span class="ruby-operator">::</span><span class="ruby-constant">Builder</span>.<span class="ruby-identifier">new</span>(<span class="ruby-value">:encoding</span> =<span class="ruby-operator">></span> <span class="ruby-string">'UTF-8'</span>) <span class="ruby-keyword">do</span> <span class="ruby-operator">|</span><span class="ruby-identifier">xml</span><span class="ruby-operator">|</span>
|
189
|
+
<span class="ruby-identifier">xml</span>.<span class="ruby-identifier">doc</span>.<span class="ruby-identifier">create_internal_subset</span>( <span class="ruby-string">'collection'</span>, <span class="ruby-keyword">nil</span>, <span class="ruby-string">'BioC.dtd'</span> )
|
188
190
|
<span class="ruby-identifier">write_collection</span>(<span class="ruby-identifier">xml</span>, <span class="ruby-identifier">collection</span>)
|
189
191
|
<span class="ruby-keyword">end</span>
|
190
|
-
<span class="ruby-identifier">builder</span>.<span class="ruby-identifier">to_xml</span>
|
192
|
+
<span class="ruby-identifier">builder</span>.<span class="ruby-identifier">to_xml</span>(<span class="ruby-identifier">options</span>)
|
191
193
|
<span class="ruby-keyword">end</span></pre>
|
192
194
|
</div><!-- write-source -->
|
193
195
|
|
@@ -218,7 +220,7 @@
|
|
218
220
|
|
219
221
|
|
220
222
|
<div class="method-source-code" id="write_annotation-source">
|
221
|
-
<pre><span class="ruby-comment"># File lib/simple_bioc/bioc_writer.rb, line
|
223
|
+
<pre><span class="ruby-comment"># File lib/simple_bioc/bioc_writer.rb, line 63</span>
|
222
224
|
<span class="ruby-keyword">def</span> <span class="ruby-identifier">write_annotation</span>(<span class="ruby-identifier">xml</span>, <span class="ruby-identifier">annotation</span>)
|
223
225
|
<span class="ruby-keyword">if</span> <span class="ruby-identifier">annotation</span>.<span class="ruby-identifier">id</span>.<span class="ruby-identifier">nil?</span>
|
224
226
|
<span class="ruby-identifier">attribute</span> = <span class="ruby-keyword">nil</span>
|
@@ -260,7 +262,7 @@
|
|
260
262
|
|
261
263
|
|
262
264
|
<div class="method-source-code" id="write_collection-source">
|
263
|
-
<pre><span class="ruby-comment"># File lib/simple_bioc/bioc_writer.rb, line
|
265
|
+
<pre><span class="ruby-comment"># File lib/simple_bioc/bioc_writer.rb, line 23</span>
|
264
266
|
<span class="ruby-keyword">def</span> <span class="ruby-identifier">write_collection</span>(<span class="ruby-identifier">xml</span>, <span class="ruby-identifier">collection</span>)
|
265
267
|
<span class="ruby-identifier">xml</span>.<span class="ruby-identifier">collection</span> {
|
266
268
|
<span class="ruby-identifier">xml</span>.<span class="ruby-identifier">source</span> <span class="ruby-identifier">collection</span>.<span class="ruby-identifier">source</span>
|
@@ -299,7 +301,7 @@
|
|
299
301
|
|
300
302
|
|
301
303
|
<div class="method-source-code" id="write_document-source">
|
302
|
-
<pre><span class="ruby-comment"># File lib/simple_bioc/bioc_writer.rb, line
|
304
|
+
<pre><span class="ruby-comment"># File lib/simple_bioc/bioc_writer.rb, line 33</span>
|
303
305
|
<span class="ruby-keyword">def</span> <span class="ruby-identifier">write_document</span>(<span class="ruby-identifier">xml</span>, <span class="ruby-identifier">document</span>)
|
304
306
|
<span class="ruby-identifier">xml</span>.<span class="ruby-identifier">document</span> {
|
305
307
|
<span class="ruby-identifier">xml</span>.<span class="ruby-identifier">id_</span> <span class="ruby-identifier">document</span>.<span class="ruby-identifier">id</span>
|
@@ -337,7 +339,7 @@
|
|
337
339
|
|
338
340
|
|
339
341
|
<div class="method-source-code" id="write_infon-source">
|
340
|
-
<pre><span class="ruby-comment"># File lib/simple_bioc/bioc_writer.rb, line
|
342
|
+
<pre><span class="ruby-comment"># File lib/simple_bioc/bioc_writer.rb, line 15</span>
|
341
343
|
<span class="ruby-keyword">def</span> <span class="ruby-identifier">write_infon</span>(<span class="ruby-identifier">xml</span>, <span class="ruby-identifier">obj</span>)
|
342
344
|
<span class="ruby-identifier">obj</span>.<span class="ruby-identifier">infons</span>.<span class="ruby-identifier">each</span> <span class="ruby-keyword">do</span> <span class="ruby-operator">|</span><span class="ruby-identifier">k</span>, <span class="ruby-identifier">v</span><span class="ruby-operator">|</span>
|
343
345
|
<span class="ruby-identifier">xml</span>.<span class="ruby-identifier">infon</span>(<span class="ruby-value">:key</span> =<span class="ruby-operator">></span> <span class="ruby-identifier">k</span>) {
|
@@ -374,7 +376,7 @@
|
|
374
376
|
|
375
377
|
|
376
378
|
<div class="method-source-code" id="write_location-source">
|
377
|
-
<pre><span class="ruby-comment"># File lib/simple_bioc/bioc_writer.rb, line
|
379
|
+
<pre><span class="ruby-comment"># File lib/simple_bioc/bioc_writer.rb, line 88</span>
|
378
380
|
<span class="ruby-keyword">def</span> <span class="ruby-identifier">write_location</span>(<span class="ruby-identifier">xml</span>, <span class="ruby-identifier">location</span>)
|
379
381
|
<span class="ruby-identifier">xml</span>.<span class="ruby-identifier">location</span>(<span class="ruby-value">:offset</span> =<span class="ruby-operator">></span> <span class="ruby-identifier">location</span>.<span class="ruby-identifier">offset</span>, <span class="ruby-value">:length</span> =<span class="ruby-operator">></span> <span class="ruby-identifier">location</span>.<span class="ruby-identifier">length</span>)
|
380
382
|
<span class="ruby-keyword">end</span></pre>
|
@@ -407,7 +409,7 @@
|
|
407
409
|
|
408
410
|
|
409
411
|
<div class="method-source-code" id="write_node-source">
|
410
|
-
<pre><span class="ruby-comment"># File lib/simple_bioc/bioc_writer.rb, line
|
412
|
+
<pre><span class="ruby-comment"># File lib/simple_bioc/bioc_writer.rb, line 92</span>
|
411
413
|
<span class="ruby-keyword">def</span> <span class="ruby-identifier">write_node</span>(<span class="ruby-identifier">xml</span>, <span class="ruby-identifier">node</span>)
|
412
414
|
<span class="ruby-identifier">xml</span>.<span class="ruby-identifier">node_</span>(<span class="ruby-value">:refid</span> =<span class="ruby-operator">></span> <span class="ruby-identifier">node</span>.<span class="ruby-identifier">refid</span>, <span class="ruby-value">:role</span> =<span class="ruby-operator">></span> <span class="ruby-identifier">node</span>.<span class="ruby-identifier">role</span>)
|
413
415
|
<span class="ruby-keyword">end</span></pre>
|
@@ -440,7 +442,7 @@
|
|
440
442
|
|
441
443
|
|
442
444
|
<div class="method-source-code" id="write_passage-source">
|
443
|
-
<pre><span class="ruby-comment"># File lib/simple_bioc/bioc_writer.rb, line
|
445
|
+
<pre><span class="ruby-comment"># File lib/simple_bioc/bioc_writer.rb, line 42</span>
|
444
446
|
<span class="ruby-keyword">def</span> <span class="ruby-identifier">write_passage</span>(<span class="ruby-identifier">xml</span>, <span class="ruby-identifier">passage</span>)
|
445
447
|
<span class="ruby-identifier">xml</span>.<span class="ruby-identifier">passage</span> {
|
446
448
|
<span class="ruby-identifier">write_infon</span>(<span class="ruby-identifier">xml</span>, <span class="ruby-identifier">passage</span>)
|
@@ -480,7 +482,7 @@
|
|
480
482
|
|
481
483
|
|
482
484
|
<div class="method-source-code" id="write_relation-source">
|
483
|
-
<pre><span class="ruby-comment"># File lib/simple_bioc/bioc_writer.rb, line
|
485
|
+
<pre><span class="ruby-comment"># File lib/simple_bioc/bioc_writer.rb, line 76</span>
|
484
486
|
<span class="ruby-keyword">def</span> <span class="ruby-identifier">write_relation</span>(<span class="ruby-identifier">xml</span>, <span class="ruby-identifier">relation</span>)
|
485
487
|
<span class="ruby-keyword">if</span> <span class="ruby-identifier">relation</span>.<span class="ruby-identifier">id</span>.<span class="ruby-identifier">nil?</span>
|
486
488
|
<span class="ruby-identifier">attribute</span> = <span class="ruby-keyword">nil</span>
|
@@ -521,7 +523,7 @@
|
|
521
523
|
|
522
524
|
|
523
525
|
<div class="method-source-code" id="write_sentence-source">
|
524
|
-
<pre><span class="ruby-comment"># File lib/simple_bioc/bioc_writer.rb, line
|
526
|
+
<pre><span class="ruby-comment"># File lib/simple_bioc/bioc_writer.rb, line 53</span>
|
525
527
|
<span class="ruby-keyword">def</span> <span class="ruby-identifier">write_sentence</span>(<span class="ruby-identifier">xml</span>, <span class="ruby-identifier">sentence</span>)
|
526
528
|
<span class="ruby-identifier">xml</span>.<span class="ruby-identifier">sentence</span> {
|
527
529
|
<span class="ruby-identifier">write_infon</span>(<span class="ruby-identifier">xml</span>, <span class="ruby-identifier">sentence</span>)
|
data/html/README_md.html
CHANGED
@@ -48,10 +48,13 @@
|
|
48
48
|
<nav class="section">
|
49
49
|
<h3 class="section-header">Table of Contents</h3>
|
50
50
|
<ul>
|
51
|
-
<li><a href="#label-
|
51
|
+
<li><a href="#label-SimpleBioC">SimpleBioC</a>
|
52
52
|
<li><a href="#label-Feature%3A">Feature:</a>
|
53
53
|
<li><a href="#label-Installation">Installation</a>
|
54
|
-
<li><a href="#label-Usages">Usages</a>
|
54
|
+
<li><a href="#label-Simple+Usages">Simple Usages</a>
|
55
|
+
<li><a href="#label-Options">Options</a>
|
56
|
+
<li><a href="#label-Specify+set+of+%3Cdocument%3Es+to+parse">Specify set of <document>s to parse</a>
|
57
|
+
<li><a href="#label-No+whitespace+in+output">No whitespace in output</a>
|
55
58
|
<li><a href="#label-Sample">Sample</a>
|
56
59
|
<li><a href="#label-Contributing">Contributing</a>
|
57
60
|
<li><a href="#label-LICENSE">LICENSE</a>
|
@@ -108,7 +111,7 @@
|
|
108
111
|
|
109
112
|
<div id="documentation" class="description">
|
110
113
|
|
111
|
-
<h1 id="label-
|
114
|
+
<h1 id="label-SimpleBioC"><a href="SimpleBioC.html">SimpleBioC</a><span><a href="#label-SimpleBioC">¶</a> <a href="#documentation">↑</a></span></h1>
|
112
115
|
|
113
116
|
<p><a href="SimpleBioC.html">SimpleBioC</a> is a simple parser / builder for
|
114
117
|
BioC data format. BioC is a simple XML format to share text documents and
|
@@ -140,7 +143,7 @@ BioC DTD</p>
|
|
140
143
|
|
141
144
|
<pre>$ gem install simple_bioc</pre>
|
142
145
|
|
143
|
-
<h2 id="label-Usages">Usages<span><a href="#label-Usages">¶</a> <a href="#documentation">↑</a></span></h2>
|
146
|
+
<h2 id="label-Simple+Usages">Simple Usages<span><a href="#label-Simple+Usages">¶</a> <a href="#documentation">↑</a></span></h2>
|
144
147
|
|
145
148
|
<p>Include library</p>
|
146
149
|
|
@@ -152,7 +155,11 @@ BioC DTD</p>
|
|
152
155
|
<pre>collection = SimpleBioC::from_xml(filename)</pre>
|
153
156
|
|
154
157
|
<p>Traverse & Manipulate Data. Data structure are almost the same as the
|
155
|
-
DTD. Please refer <a
|
158
|
+
DTD. Please refer <a
|
159
|
+
href="http://rubydoc.info/gems/simple_bioc/0.0.2/frames">library
|
160
|
+
documents</a> and <a
|
161
|
+
href="http://www.ncbi.nlm.nih.gov/CBBresearch/Dogan/BioC/BioCDTD.html">the
|
162
|
+
BioC DTD</a>.</p>
|
156
163
|
|
157
164
|
<pre>puts collection.documents[2].passages[0].text</pre>
|
158
165
|
|
@@ -160,6 +167,25 @@ DTD. Please refer <a href="http://">library documents</a>.</p>
|
|
160
167
|
|
161
168
|
<pre>puts SimpleBioC::to_xml(collection)</pre>
|
162
169
|
|
170
|
+
<h2 id="label-Options">Options<span><a href="#label-Options">¶</a> <a href="#documentation">↑</a></span></h2>
|
171
|
+
|
172
|
+
<h3 id="label-Specify+set+of+%3Cdocument%3Es+to+parse">Specify set of <document>s to parse<span><a href="#label-Specify+set+of+%3Cdocument%3Es+to+parse">¶</a> <a href="#documentation">↑</a></span></h3>
|
173
|
+
|
174
|
+
<p>You can parse only a set of document elements in a large xml document
|
175
|
+
instead of parsing all the document elements. It may decrease the
|
176
|
+
processing time. For example, the following code will return a collection
|
177
|
+
with two documents (“1234”, “4567”).</p>
|
178
|
+
|
179
|
+
<pre>collection = SimpleBioc::from_xml(filename, {documents: ["1234", "4567"]})</pre>
|
180
|
+
|
181
|
+
<h3 id="label-No+whitespace+in+output">No whitespace in output<span><a href="#label-No+whitespace+in+output">¶</a> <a href="#documentation">↑</a></span></h3>
|
182
|
+
|
183
|
+
<p>By default, outputs of SimpleBioC::to_xml() will be formatted with
|
184
|
+
whitespace. If you do not want this whitespace, you should pass
|
185
|
+
'save_with' option with 0 to the to_xml() function.</p>
|
186
|
+
|
187
|
+
<pre>puts SimpleBioC::to_xml(collection, {save_with:0})</pre>
|
188
|
+
|
163
189
|
<h2 id="label-Sample">Sample<span><a href="#label-Sample">¶</a> <a href="#documentation">↑</a></span></h2>
|
164
190
|
|
165
191
|
<p>More samples can be found in Samples directory</p>
|
@@ -131,9 +131,9 @@
|
|
131
131
|
|
132
132
|
<div id="description" class="description">
|
133
133
|
|
134
|
-
<p><a href="NodeBase.html">NodeBase</a> is not
|
135
|
-
class of <a href="Annotation.html">Annotation</a> & <a
|
136
|
-
href="Relation.html">Relation</a
|
134
|
+
<p><a href="NodeBase.html">NodeBase</a> is not a BioC DTD entity. This is a
|
135
|
+
super class of <a href="Annotation.html">Annotation</a> & <a
|
136
|
+
href="Relation.html">Relation</a>.</p>
|
137
137
|
|
138
138
|
</div><!-- description -->
|
139
139
|
|
data/html/SimpleBioC.html
CHANGED
@@ -177,7 +177,7 @@
|
|
177
177
|
|
178
178
|
<div class="method-heading">
|
179
179
|
<span class="method-name">from_xml</span><span
|
180
|
-
class="method-args">(file_path)</span>
|
180
|
+
class="method-args">(file_path, options = {})</span>
|
181
181
|
|
182
182
|
<span class="method-click-advice">click to toggle source</span>
|
183
183
|
|
@@ -188,14 +188,33 @@
|
|
188
188
|
|
189
189
|
<p>parse a BioC XML file in the given path and convert it into a collection
|
190
190
|
instance</p>
|
191
|
+
|
192
|
+
<h4 id="method-i-from_xml-label-Arguments">Arguments<span><a href="#method-i-from_xml-label-Arguments">¶</a> <a href="#documentation">↑</a></span></h4>
|
193
|
+
<ul><li>
|
194
|
+
<p><code>file_path</code> - file path for parse</p>
|
195
|
+
</li><li>
|
196
|
+
<p><code>options</code> - (optional) additional options</p>
|
197
|
+
</li></ul>
|
198
|
+
|
199
|
+
<h4 id="method-i-from_xml-label-Options">Options<span><a href="#method-i-from_xml-label-Options">¶</a> <a href="#documentation">↑</a></span></h4>
|
200
|
+
<ul><li>
|
201
|
+
<p><code>documents</code> - specify IDs of documents to parse. The result will
|
202
|
+
include only the specified documents</p>
|
203
|
+
</li></ul>
|
204
|
+
|
205
|
+
<h4 id="method-i-from_xml-label-Examples">Examples<span><a href="#method-i-from_xml-label-Examples">¶</a> <a href="#documentation">↑</a></span></h4>
|
206
|
+
|
207
|
+
<pre>collection = SimpleBioC.from_xml("./xml/everything.xml")
|
208
|
+
collection = SimpleBioC.from_xml("./xml/lemma.xml", {documents:[21785578, 21488974]})</pre>
|
191
209
|
|
192
210
|
|
193
211
|
|
194
212
|
|
195
213
|
<div class="method-source-code" id="from_xml-source">
|
196
|
-
<pre><span class="ruby-comment"># File lib/simple_bioc.rb, line
|
197
|
-
<span class="ruby-keyword">def</span> <span class="ruby-identifier">from_xml</span>(<span class="ruby-identifier">file_path</span>)
|
198
|
-
<span class="ruby-
|
214
|
+
<pre><span class="ruby-comment"># File lib/simple_bioc.rb, line 22</span>
|
215
|
+
<span class="ruby-keyword">def</span> <span class="ruby-identifier">from_xml</span>(<span class="ruby-identifier">file_path</span>, <span class="ruby-identifier">options</span> = {})
|
216
|
+
<span class="ruby-identifier">options</span>[<span class="ruby-value">:documents</span>] = <span class="ruby-identifier">options</span>[<span class="ruby-value">:documents</span>].<span class="ruby-identifier">map</span>{<span class="ruby-operator">|</span><span class="ruby-identifier">e</span><span class="ruby-operator">|</span> <span class="ruby-identifier">e</span>.<span class="ruby-identifier">to_s</span>} <span class="ruby-keyword">if</span> <span class="ruby-identifier">options</span>[<span class="ruby-value">:documents</span>].<span class="ruby-identifier">kind_of?</span>(<span class="ruby-constant">Array</span>)
|
217
|
+
<span class="ruby-constant">BioCReader</span>.<span class="ruby-identifier">read</span>(<span class="ruby-identifier">file_path</span>, <span class="ruby-identifier">options</span>)
|
199
218
|
<span class="ruby-keyword">end</span></pre>
|
200
219
|
</div><!-- from_xml-source -->
|
201
220
|
|
@@ -211,7 +230,7 @@ instance</p>
|
|
211
230
|
|
212
231
|
<div class="method-heading">
|
213
232
|
<span class="method-name">to_xml</span><span
|
214
|
-
class="method-args">(collection)</span>
|
233
|
+
class="method-args">(collection, options = {})</span>
|
215
234
|
|
216
235
|
<span class="method-click-advice">click to toggle source</span>
|
217
236
|
|
@@ -220,15 +239,35 @@ instance</p>
|
|
220
239
|
|
221
240
|
<div class="method-description">
|
222
241
|
|
223
|
-
<p>convert a collection instance to a BioC XML text
|
242
|
+
<p>convert a collection instance to a BioC XML text. Output will return as
|
243
|
+
string</p>
|
244
|
+
|
245
|
+
<h4 id="method-i-to_xml-label-Arguments">Arguments<span><a href="#method-i-to_xml-label-Arguments">¶</a> <a href="#documentation">↑</a></span></h4>
|
246
|
+
<ul><li>
|
247
|
+
<p><code>collection</code> - <a
|
248
|
+
href="SimpleBioC/Collection.html">Collection</a> instance to process</p>
|
249
|
+
</li><li>
|
250
|
+
<p><code>options</code> - (optional) additional options</p>
|
251
|
+
</li></ul>
|
252
|
+
|
253
|
+
<h4 id="method-i-to_xml-label-Options">Options<span><a href="#method-i-to_xml-label-Options">¶</a> <a href="#documentation">↑</a></span></h4>
|
254
|
+
<ul><li>
|
255
|
+
<p><code>save_with</code> - SaveOption for Nokorigi. If you set this 0, output
|
256
|
+
has no format (no indentation, no whitespace)</p>
|
257
|
+
</li></ul>
|
258
|
+
|
259
|
+
<h4 id="method-i-to_xml-label-Examples">Examples<span><a href="#method-i-to_xml-label-Examples">¶</a> <a href="#documentation">↑</a></span></h4>
|
260
|
+
|
261
|
+
<pre>output = SimpleBioC.to_xml(collection)
|
262
|
+
output = SimpleBioC.to_xml(collection, {save_with: 0})</pre>
|
224
263
|
|
225
264
|
|
226
265
|
|
227
266
|
|
228
267
|
<div class="method-source-code" id="to_xml-source">
|
229
|
-
<pre><span class="ruby-comment"># File lib/simple_bioc.rb, line
|
230
|
-
<span class="ruby-keyword">def</span> <span class="ruby-identifier">to_xml</span>(<span class="ruby-identifier">collection</span>)
|
231
|
-
<span class="ruby-constant">BioCWriter</span>.<span class="ruby-identifier">write</span>(<span class="ruby-identifier">collection</span>)
|
268
|
+
<pre><span class="ruby-comment"># File lib/simple_bioc.rb, line 39</span>
|
269
|
+
<span class="ruby-keyword">def</span> <span class="ruby-identifier">to_xml</span>(<span class="ruby-identifier">collection</span>, <span class="ruby-identifier">options</span> = {})
|
270
|
+
<span class="ruby-constant">BioCWriter</span>.<span class="ruby-identifier">write</span>(<span class="ruby-identifier">collection</span>, <span class="ruby-identifier">options</span>)
|
232
271
|
<span class="ruby-keyword">end</span></pre>
|
233
272
|
</div><!-- to_xml-source -->
|
234
273
|
|
data/html/created.rid
CHANGED
@@ -1,15 +1,15 @@
|
|
1
|
-
|
2
|
-
README.md
|
3
|
-
lib/simple_bioc.rb
|
1
|
+
Sat, 21 Dec 2013 09:10:44 -0800
|
2
|
+
README.md Sat, 21 Dec 2013 09:10:35 -0800
|
3
|
+
lib/simple_bioc.rb Sat, 21 Dec 2013 08:58:11 -0800
|
4
4
|
lib/simple_bioc/annotation.rb Fri, 06 Dec 2013 20:52:11 -0800
|
5
|
-
lib/simple_bioc/bioc_reader.rb
|
6
|
-
lib/simple_bioc/bioc_writer.rb
|
5
|
+
lib/simple_bioc/bioc_reader.rb Sat, 21 Dec 2013 08:39:36 -0800
|
6
|
+
lib/simple_bioc/bioc_writer.rb Sat, 21 Dec 2013 08:09:06 -0800
|
7
7
|
lib/simple_bioc/collection.rb Fri, 06 Dec 2013 20:52:11 -0800
|
8
8
|
lib/simple_bioc/document.rb Fri, 06 Dec 2013 21:18:25 -0800
|
9
9
|
lib/simple_bioc/location.rb Fri, 06 Dec 2013 20:52:11 -0800
|
10
10
|
lib/simple_bioc/node.rb Fri, 06 Dec 2013 20:52:11 -0800
|
11
|
-
lib/simple_bioc/node_base.rb Fri, 06 Dec 2013 22:
|
11
|
+
lib/simple_bioc/node_base.rb Fri, 06 Dec 2013 22:06:00 -0800
|
12
12
|
lib/simple_bioc/passage.rb Fri, 06 Dec 2013 20:52:11 -0800
|
13
13
|
lib/simple_bioc/relation.rb Fri, 06 Dec 2013 20:52:11 -0800
|
14
14
|
lib/simple_bioc/sentence.rb Fri, 06 Dec 2013 20:52:11 -0800
|
15
|
-
lib/simple_bioc/version.rb Fri, 06 Dec 2013
|
15
|
+
lib/simple_bioc/version.rb Fri, 06 Dec 2013 22:06:07 -0800
|
data/html/index.html
CHANGED
@@ -91,7 +91,7 @@
|
|
91
91
|
|
92
92
|
<div id="documentation" class="description">
|
93
93
|
|
94
|
-
<h1 id="label-
|
94
|
+
<h1 id="label-SimpleBioC"><a href="SimpleBioC.html">SimpleBioC</a><span><a href="#label-SimpleBioC">¶</a> <a href="#documentation">↑</a></span></h1>
|
95
95
|
|
96
96
|
<p><a href="SimpleBioC.html">SimpleBioC</a> is a simple parser / builder for
|
97
97
|
BioC data format. BioC is a simple XML format to share text documents and
|
@@ -123,7 +123,7 @@ BioC DTD</p>
|
|
123
123
|
|
124
124
|
<pre>$ gem install simple_bioc</pre>
|
125
125
|
|
126
|
-
<h2 id="label-Usages">Usages<span><a href="#label-Usages">¶</a> <a href="#documentation">↑</a></span></h2>
|
126
|
+
<h2 id="label-Simple+Usages">Simple Usages<span><a href="#label-Simple+Usages">¶</a> <a href="#documentation">↑</a></span></h2>
|
127
127
|
|
128
128
|
<p>Include library</p>
|
129
129
|
|
@@ -135,7 +135,11 @@ BioC DTD</p>
|
|
135
135
|
<pre>collection = SimpleBioC::from_xml(filename)</pre>
|
136
136
|
|
137
137
|
<p>Traverse & Manipulate Data. Data structure are almost the same as the
|
138
|
-
DTD. Please refer <a
|
138
|
+
DTD. Please refer <a
|
139
|
+
href="http://rubydoc.info/gems/simple_bioc/0.0.2/frames">library
|
140
|
+
documents</a> and <a
|
141
|
+
href="http://www.ncbi.nlm.nih.gov/CBBresearch/Dogan/BioC/BioCDTD.html">the
|
142
|
+
BioC DTD</a>.</p>
|
139
143
|
|
140
144
|
<pre>puts collection.documents[2].passages[0].text</pre>
|
141
145
|
|
@@ -143,6 +147,25 @@ DTD. Please refer <a href="http://">library documents</a>.</p>
|
|
143
147
|
|
144
148
|
<pre>puts SimpleBioC::to_xml(collection)</pre>
|
145
149
|
|
150
|
+
<h2 id="label-Options">Options<span><a href="#label-Options">¶</a> <a href="#documentation">↑</a></span></h2>
|
151
|
+
|
152
|
+
<h3 id="label-Specify+set+of+%3Cdocument%3Es+to+parse">Specify set of <document>s to parse<span><a href="#label-Specify+set+of+%3Cdocument%3Es+to+parse">¶</a> <a href="#documentation">↑</a></span></h3>
|
153
|
+
|
154
|
+
<p>You can parse only a set of document elements in a large xml document
|
155
|
+
instead of parsing all the document elements. It may decrease the
|
156
|
+
processing time. For example, the following code will return a collection
|
157
|
+
with two documents (“1234”, “4567”).</p>
|
158
|
+
|
159
|
+
<pre>collection = SimpleBioc::from_xml(filename, {documents: ["1234", "4567"]})</pre>
|
160
|
+
|
161
|
+
<h3 id="label-No+whitespace+in+output">No whitespace in output<span><a href="#label-No+whitespace+in+output">¶</a> <a href="#documentation">↑</a></span></h3>
|
162
|
+
|
163
|
+
<p>By default, outputs of SimpleBioC::to_xml() will be formatted with
|
164
|
+
whitespace. If you do not want this whitespace, you should pass
|
165
|
+
'save_with' option with 0 to the to_xml() function.</p>
|
166
|
+
|
167
|
+
<pre>puts SimpleBioC::to_xml(collection, {save_with:0})</pre>
|
168
|
+
|
146
169
|
<h2 id="label-Sample">Sample<span><a href="#label-Sample">¶</a> <a href="#documentation">↑</a></span></h2>
|
147
170
|
|
148
171
|
<p>More samples can be found in Samples directory</p>
|
data/html/js/search_index.js
CHANGED
@@ -1 +1 @@
|
|
1
|
-
var search_data = {"index":{"searchIndex":["biocreader","biocwriter","simplebioc","annotation","collection","document","location","node","nodebase","passage","relation","sentence","adjust_ref()","adjust_ref()","adjust_ref()","each_relation()","each_relation()","each_relation()","find_node()","find_node()","find_node()","from_xml()","new()","new()","new()","new()","new()","new()","new()","new()","new()","read()","read_annotation()","read_collection()","read_document()","read_infon()","read_int()","read_location()","read_node()","read_passage()","read_recursive()","read_relation()","read_sentence()","read_text()","to_c()","to_c()","to_c()","to_c()","to_s()","to_s()","to_s()","to_s()","to_xml()","write()","write_annotation()","write_collection()","write_document()","write_infon()","write_location()","write_node()","write_passage()","write_relation()","write_sentence()","readme"],"longSearchIndex":["biocreader","biocwriter","simplebioc","simplebioc::annotation","simplebioc::collection","simplebioc::document","simplebioc::location","simplebioc::node","simplebioc::nodebase","simplebioc::passage","simplebioc::relation","simplebioc::sentence","simplebioc::document#adjust_ref()","simplebioc::node#adjust_ref()","simplebioc::relation#adjust_ref()","simplebioc::document#each_relation()","simplebioc::passage#each_relation()","simplebioc::sentence#each_relation()","simplebioc::document#find_node()","simplebioc::passage#find_node()","simplebioc::sentence#find_node()","simplebioc#from_xml()","simplebioc::annotation::new()","simplebioc::collection::new()","simplebioc::document::new()","simplebioc::location::new()","simplebioc::node::new()","simplebioc::nodebase::new()","simplebioc::passage::new()","simplebioc::relation::new()","simplebioc::sentence::new()","biocreader#read()","biocreader#read_annotation()","biocreader#read_collection()","biocreader#read_document()","biocreader#read_infon()","biocreader#read_int()","biocreader#read_location()","biocreader#read_node()","biocreader#read_passage()","biocreader#read_recursive()","biocreader#read_relation()","biocreader#read_sentence()","biocreader#read_text()","simplebioc::collection#to_c()","simplebioc::node#to_c()","simplebioc::relation#to_c()","simplebioc::sentence#to_c()","simplebioc::annotation#to_s()","simplebioc::document#to_s()","simplebioc::location#to_s()","simplebioc::passage#to_s()","simplebioc#to_xml()","biocwriter#write()","biocwriter#write_annotation()","biocwriter#write_collection()","biocwriter#write_document()","biocwriter#write_infon()","biocwriter#write_location()","biocwriter#write_node()","biocwriter#write_passage()","biocwriter#write_relation()","biocwriter#write_sentence()",""],"info":[["BioCReader","","BioCReader.html","",""],["BioCWriter","","BioCWriter.html","",""],["SimpleBioC","","SimpleBioC.html","","<p>SimpleBioC main library\n"],["SimpleBioC::Annotation","","SimpleBioC/Annotation.html","",""],["SimpleBioC::Collection","","SimpleBioC/Collection.html","",""],["SimpleBioC::Document","","SimpleBioC/Document.html","",""],["SimpleBioC::Location","","SimpleBioC/Location.html","",""],["SimpleBioC::Node","","SimpleBioC/Node.html","",""],["SimpleBioC::NodeBase","","SimpleBioC/NodeBase.html","","<p>NodeBase is not
|
1
|
+
var search_data = {"index":{"searchIndex":["biocreader","biocwriter","simplebioc","annotation","collection","document","location","node","nodebase","passage","relation","sentence","adjust_ref()","adjust_ref()","adjust_ref()","each_relation()","each_relation()","each_relation()","find_node()","find_node()","find_node()","from_xml()","new()","new()","new()","new()","new()","new()","new()","new()","new()","read()","read_annotation()","read_collection()","read_document()","read_infon()","read_int()","read_location()","read_node()","read_passage()","read_recursive()","read_relation()","read_sentence()","read_text()","to_c()","to_c()","to_c()","to_c()","to_s()","to_s()","to_s()","to_s()","to_xml()","write()","write_annotation()","write_collection()","write_document()","write_infon()","write_location()","write_node()","write_passage()","write_relation()","write_sentence()","readme"],"longSearchIndex":["biocreader","biocwriter","simplebioc","simplebioc::annotation","simplebioc::collection","simplebioc::document","simplebioc::location","simplebioc::node","simplebioc::nodebase","simplebioc::passage","simplebioc::relation","simplebioc::sentence","simplebioc::document#adjust_ref()","simplebioc::node#adjust_ref()","simplebioc::relation#adjust_ref()","simplebioc::document#each_relation()","simplebioc::passage#each_relation()","simplebioc::sentence#each_relation()","simplebioc::document#find_node()","simplebioc::passage#find_node()","simplebioc::sentence#find_node()","simplebioc#from_xml()","simplebioc::annotation::new()","simplebioc::collection::new()","simplebioc::document::new()","simplebioc::location::new()","simplebioc::node::new()","simplebioc::nodebase::new()","simplebioc::passage::new()","simplebioc::relation::new()","simplebioc::sentence::new()","biocreader#read()","biocreader#read_annotation()","biocreader#read_collection()","biocreader#read_document()","biocreader#read_infon()","biocreader#read_int()","biocreader#read_location()","biocreader#read_node()","biocreader#read_passage()","biocreader#read_recursive()","biocreader#read_relation()","biocreader#read_sentence()","biocreader#read_text()","simplebioc::collection#to_c()","simplebioc::node#to_c()","simplebioc::relation#to_c()","simplebioc::sentence#to_c()","simplebioc::annotation#to_s()","simplebioc::document#to_s()","simplebioc::location#to_s()","simplebioc::passage#to_s()","simplebioc#to_xml()","biocwriter#write()","biocwriter#write_annotation()","biocwriter#write_collection()","biocwriter#write_document()","biocwriter#write_infon()","biocwriter#write_location()","biocwriter#write_node()","biocwriter#write_passage()","biocwriter#write_relation()","biocwriter#write_sentence()",""],"info":[["BioCReader","","BioCReader.html","",""],["BioCWriter","","BioCWriter.html","",""],["SimpleBioC","","SimpleBioC.html","","<p>SimpleBioC main library\n"],["SimpleBioC::Annotation","","SimpleBioC/Annotation.html","",""],["SimpleBioC::Collection","","SimpleBioC/Collection.html","",""],["SimpleBioC::Document","","SimpleBioC/Document.html","",""],["SimpleBioC::Location","","SimpleBioC/Location.html","",""],["SimpleBioC::Node","","SimpleBioC/Node.html","",""],["SimpleBioC::NodeBase","","SimpleBioC/NodeBase.html","","<p>NodeBase is not a BioC DTD entity. This is a super class of Annotation\n& Relation.\n"],["SimpleBioC::Passage","","SimpleBioC/Passage.html","",""],["SimpleBioC::Relation","","SimpleBioC/Relation.html","",""],["SimpleBioC::Sentence","","SimpleBioC/Sentence.html","",""],["adjust_ref","SimpleBioC::Document","SimpleBioC/Document.html#method-i-adjust_ref","()",""],["adjust_ref","SimpleBioC::Node","SimpleBioC/Node.html#method-i-adjust_ref","()",""],["adjust_ref","SimpleBioC::Relation","SimpleBioC/Relation.html#method-i-adjust_ref","()",""],["each_relation","SimpleBioC::Document","SimpleBioC/Document.html#method-i-each_relation","()",""],["each_relation","SimpleBioC::Passage","SimpleBioC/Passage.html#method-i-each_relation","()",""],["each_relation","SimpleBioC::Sentence","SimpleBioC/Sentence.html#method-i-each_relation","()",""],["find_node","SimpleBioC::Document","SimpleBioC/Document.html#method-i-find_node","(id)",""],["find_node","SimpleBioC::Passage","SimpleBioC/Passage.html#method-i-find_node","(id)",""],["find_node","SimpleBioC::Sentence","SimpleBioC/Sentence.html#method-i-find_node","(id)",""],["from_xml","SimpleBioC","SimpleBioC.html#method-i-from_xml","(file_path, options = {})","<p>parse a BioC XML file in the given path and convert it into a collection\ninstance\n<p>Arguments\n<p><code>file_path</code> - …\n"],["new","SimpleBioC::Annotation","SimpleBioC/Annotation.html#method-c-new","(parent)",""],["new","SimpleBioC::Collection","SimpleBioC/Collection.html#method-c-new","()",""],["new","SimpleBioC::Document","SimpleBioC/Document.html#method-c-new","(parent)",""],["new","SimpleBioC::Location","SimpleBioC/Location.html#method-c-new","(parent)",""],["new","SimpleBioC::Node","SimpleBioC/Node.html#method-c-new","(parent)",""],["new","SimpleBioC::NodeBase","SimpleBioC/NodeBase.html#method-c-new","(parent)",""],["new","SimpleBioC::Passage","SimpleBioC/Passage.html#method-c-new","(parent)",""],["new","SimpleBioC::Relation","SimpleBioC/Relation.html#method-c-new","(parent)",""],["new","SimpleBioC::Sentence","SimpleBioC/Sentence.html#method-c-new","(parent)",""],["read","BioCReader","BioCReader.html#method-i-read","(path, options)",""],["read_annotation","BioCReader","BioCReader.html#method-i-read_annotation","(xml, annotation, options = {})",""],["read_collection","BioCReader","BioCReader.html#method-i-read_collection","(xml, collection, options = {})",""],["read_document","BioCReader","BioCReader.html#method-i-read_document","(xml, document, options = {})",""],["read_infon","BioCReader","BioCReader.html#method-i-read_infon","(xml, obj)",""],["read_int","BioCReader","BioCReader.html#method-i-read_int","(xml, name)",""],["read_location","BioCReader","BioCReader.html#method-i-read_location","(xml, location, options = {})",""],["read_node","BioCReader","BioCReader.html#method-i-read_node","(xml, node, options = {})",""],["read_passage","BioCReader","BioCReader.html#method-i-read_passage","(xml, passage, options = {})",""],["read_recursive","BioCReader","BioCReader.html#method-i-read_recursive","(xml, obj, name, options = {})",""],["read_relation","BioCReader","BioCReader.html#method-i-read_relation","(xml, relation, options = {})",""],["read_sentence","BioCReader","BioCReader.html#method-i-read_sentence","(xml, sentence, options = {})",""],["read_text","BioCReader","BioCReader.html#method-i-read_text","(xml, name)",""],["to_c","SimpleBioC::Collection","SimpleBioC/Collection.html#method-i-to_c","()",""],["to_c","SimpleBioC::Node","SimpleBioC/Node.html#method-i-to_c","()",""],["to_c","SimpleBioC::Relation","SimpleBioC/Relation.html#method-i-to_c","()",""],["to_c","SimpleBioC::Sentence","SimpleBioC/Sentence.html#method-i-to_c","()",""],["to_s","SimpleBioC::Annotation","SimpleBioC/Annotation.html#method-i-to_s","()",""],["to_s","SimpleBioC::Document","SimpleBioC/Document.html#method-i-to_s","()",""],["to_s","SimpleBioC::Location","SimpleBioC/Location.html#method-i-to_s","()",""],["to_s","SimpleBioC::Passage","SimpleBioC/Passage.html#method-i-to_s","()",""],["to_xml","SimpleBioC","SimpleBioC.html#method-i-to_xml","(collection, options = {})","<p>convert a collection instance to a BioC XML text. Output will return as\nstring\n<p>Arguments\n<p><code>collection</code> - …\n"],["write","BioCWriter","BioCWriter.html#method-i-write","(collection, options = {})",""],["write_annotation","BioCWriter","BioCWriter.html#method-i-write_annotation","(xml, annotation)",""],["write_collection","BioCWriter","BioCWriter.html#method-i-write_collection","(xml, collection)",""],["write_document","BioCWriter","BioCWriter.html#method-i-write_document","(xml, document)",""],["write_infon","BioCWriter","BioCWriter.html#method-i-write_infon","(xml, obj)",""],["write_location","BioCWriter","BioCWriter.html#method-i-write_location","(xml, location)",""],["write_node","BioCWriter","BioCWriter.html#method-i-write_node","(xml, node)",""],["write_passage","BioCWriter","BioCWriter.html#method-i-write_passage","(xml, passage)",""],["write_relation","BioCWriter","BioCWriter.html#method-i-write_relation","(xml, relation)",""],["write_sentence","BioCWriter","BioCWriter.html#method-i-write_sentence","(xml, sentence)",""],["README","","README_md.html","","<p>SimpleBioC\n<p>SimpleBioC is a simple parser / builder for BioC data format. BioC is a\nsimple XML format to …\n"]]}}
|
data/html/table_of_contents.html
CHANGED
@@ -30,10 +30,13 @@
|
|
30
30
|
|
31
31
|
<img class="toc-toggle" src="images/transparent.png" alt="" title="toggle headings">
|
32
32
|
<ul class="initially-hidden">
|
33
|
-
<li><a href="README_md.html#label-
|
33
|
+
<li><a href="README_md.html#label-SimpleBioC">SimpleBioC</a>
|
34
34
|
<li><a href="README_md.html#label-Feature%3A">Feature:</a>
|
35
35
|
<li><a href="README_md.html#label-Installation">Installation</a>
|
36
|
-
<li><a href="README_md.html#label-Usages">Usages</a>
|
36
|
+
<li><a href="README_md.html#label-Simple+Usages">Simple Usages</a>
|
37
|
+
<li><a href="README_md.html#label-Options">Options</a>
|
38
|
+
<li><a href="README_md.html#label-Specify+set+of+%3Cdocument%3Es+to+parse">Specify set of <document>s to parse</a>
|
39
|
+
<li><a href="README_md.html#label-No+whitespace+in+output">No whitespace in output</a>
|
37
40
|
<li><a href="README_md.html#label-Sample">Sample</a>
|
38
41
|
<li><a href="README_md.html#label-Contributing">Contributing</a>
|
39
42
|
<li><a href="README_md.html#label-LICENSE">LICENSE</a>
|
@@ -4,7 +4,7 @@ Dir[File.dirname(__FILE__) + '/*.rb'].each {|file| require file }
|
|
4
4
|
module BioCReader
|
5
5
|
module_function
|
6
6
|
|
7
|
-
def read(path)
|
7
|
+
def read(path, options)
|
8
8
|
collection = nil
|
9
9
|
File.open(path) do |file|
|
10
10
|
xml_doc = Nokogiri::XML(file) do |config|
|
@@ -15,7 +15,7 @@ module BioCReader
|
|
15
15
|
fail 'Wrong format'
|
16
16
|
end
|
17
17
|
collection = SimpleBioC::Collection.new
|
18
|
-
read_collection(xml, collection)
|
18
|
+
read_collection(xml, collection, options)
|
19
19
|
end
|
20
20
|
|
21
21
|
collection
|
@@ -35,68 +35,78 @@ module BioCReader
|
|
35
35
|
xml.xpath("infon").each{ |i| obj.infons[i["key"]] = i.content}
|
36
36
|
end
|
37
37
|
|
38
|
-
def read_recursive(xml, obj, name)
|
38
|
+
def read_recursive(xml, obj, name, options = {})
|
39
39
|
target_class = SimpleBioC.const_get(name.capitalize)
|
40
40
|
xml.xpath(name).each do |node|
|
41
41
|
instance = target_class.new(obj)
|
42
|
-
send(:"read_#{name}", node, instance)
|
43
|
-
obj.instance_variable_get(:"@#{name}s") << instance
|
42
|
+
ret = send(:"read_#{name}", node, instance, options)
|
43
|
+
obj.instance_variable_get(:"@#{name}s") << instance if ret
|
44
44
|
end
|
45
45
|
end
|
46
46
|
|
47
|
-
def read_collection(xml, collection)
|
47
|
+
def read_collection(xml, collection, options = {})
|
48
48
|
collection.source = read_text(xml, "source")
|
49
49
|
collection.date = read_text(xml, "date")
|
50
50
|
collection.key = read_text(xml, "key")
|
51
51
|
read_infon(xml, collection)
|
52
|
-
read_recursive(xml, collection, "document")
|
52
|
+
read_recursive(xml, collection, "document", options)
|
53
53
|
end
|
54
54
|
|
55
|
-
def read_document(xml, document)
|
55
|
+
def read_document(xml, document, options = {})
|
56
56
|
document.id = read_text(xml, "id")
|
57
|
+
if options[:documents].kind_of?(Array) && !options[:documents].include?(document.id)
|
58
|
+
return false
|
59
|
+
end
|
57
60
|
read_infon(xml, document)
|
58
61
|
read_recursive(xml, document, "passage")
|
59
62
|
read_recursive(xml, document, "relation")
|
60
63
|
document.adjust_ref
|
64
|
+
true
|
61
65
|
end
|
62
66
|
|
63
|
-
def read_passage(xml, passage)
|
67
|
+
def read_passage(xml, passage, options = {})
|
64
68
|
passage.text = read_text(xml, "text")
|
65
69
|
passage.offset = read_int(xml, "offset")
|
66
70
|
read_infon(xml, passage)
|
67
71
|
read_recursive(xml, passage, "sentence")
|
68
72
|
read_recursive(xml, passage, "annotation")
|
69
73
|
read_recursive(xml, passage, "relation")
|
74
|
+
true
|
70
75
|
end
|
71
76
|
|
72
|
-
def read_sentence(xml, sentence)
|
77
|
+
def read_sentence(xml, sentence, options = {})
|
73
78
|
sentence.text = read_text(xml, "text")
|
74
79
|
sentence.offset = read_int(xml, "offset")
|
75
80
|
read_infon(xml, sentence)
|
76
81
|
read_recursive(xml, sentence, "annotation")
|
77
82
|
read_recursive(xml, sentence, "relation")
|
83
|
+
true
|
78
84
|
end
|
79
85
|
|
80
|
-
def read_annotation(xml, annotation)
|
86
|
+
def read_annotation(xml, annotation, options = {})
|
81
87
|
annotation.id = xml["id"]
|
82
88
|
annotation.text = read_text(xml, "text")
|
83
89
|
read_infon(xml, annotation)
|
84
90
|
read_recursive(xml, annotation, "location")
|
91
|
+
true
|
85
92
|
end
|
86
93
|
|
87
|
-
def read_relation(xml, relation)
|
94
|
+
def read_relation(xml, relation, options = {})
|
88
95
|
relation.id = xml["id"]
|
89
96
|
read_infon(xml, relation)
|
90
97
|
read_recursive(xml, relation, "node")
|
98
|
+
true
|
91
99
|
end
|
92
100
|
|
93
|
-
def read_location(xml, location)
|
101
|
+
def read_location(xml, location, options = {})
|
94
102
|
location.offset = xml["offset"]
|
95
103
|
location.length = xml["length"]
|
104
|
+
true
|
96
105
|
end
|
97
106
|
|
98
|
-
def read_node(xml, node)
|
107
|
+
def read_node(xml, node, options = {})
|
99
108
|
node.refid = xml["refid"]
|
100
109
|
node.role = xml["role"]
|
110
|
+
true
|
101
111
|
end
|
102
112
|
end
|
@@ -3,11 +3,13 @@ Dir[File.dirname(__FILE__) + '/*.rb'].each {|file| require file }
|
|
3
3
|
|
4
4
|
module BioCWriter
|
5
5
|
module_function
|
6
|
-
def write(collection)
|
6
|
+
def write(collection, options = {})
|
7
|
+
options[:save_with] = 1 if options[:save_with].nil?
|
7
8
|
builder = Nokogiri::XML::Builder.new(:encoding => 'UTF-8') do |xml|
|
9
|
+
xml.doc.create_internal_subset( 'collection', nil, 'BioC.dtd' )
|
8
10
|
write_collection(xml, collection)
|
9
11
|
end
|
10
|
-
builder.to_xml
|
12
|
+
builder.to_xml(options)
|
11
13
|
end
|
12
14
|
|
13
15
|
def write_infon(xml, obj)
|
data/lib/simple_bioc/version.rb
CHANGED
data/lib/simple_bioc.rb
CHANGED
@@ -1,3 +1,4 @@
|
|
1
|
+
# @author Dongseop Kwon
|
1
2
|
require "simple_bioc/version"
|
2
3
|
require "simple_bioc/bioc_reader"
|
3
4
|
require "simple_bioc/bioc_writer"
|
@@ -7,12 +8,35 @@ module SimpleBioC
|
|
7
8
|
module_function
|
8
9
|
|
9
10
|
# parse a BioC XML file in the given path and convert it into a collection instance
|
10
|
-
|
11
|
-
|
11
|
+
#
|
12
|
+
# ==== Arguments
|
13
|
+
# * +file_path+ - file path for parse
|
14
|
+
# * +options+ - (optional) additional options
|
15
|
+
#
|
16
|
+
# ==== Options
|
17
|
+
# * +documents+ - specify IDs of documents to parse. The result will include only the specified documents
|
18
|
+
#
|
19
|
+
# ==== Examples
|
20
|
+
# collection = SimpleBioC.from_xml("./xml/everything.xml")
|
21
|
+
# collection = SimpleBioC.from_xml("./xml/lemma.xml", {documents:[21785578, 21488974]})
|
22
|
+
def from_xml(file_path, options = {})
|
23
|
+
options[:documents] = options[:documents].map{|e| e.to_s} if options[:documents].kind_of?(Array)
|
24
|
+
BioCReader.read(file_path, options)
|
12
25
|
end
|
13
26
|
|
14
|
-
# convert a collection instance to a BioC XML text
|
15
|
-
|
16
|
-
|
27
|
+
# convert a collection instance to a BioC XML text. Output will return as string
|
28
|
+
#
|
29
|
+
# ==== Arguments
|
30
|
+
# * +collection+ - Collection instance to process
|
31
|
+
# * +options+ - (optional) additional options
|
32
|
+
#
|
33
|
+
# ==== Options
|
34
|
+
# * +save_with+ - SaveOption for Nokorigi. If you set this 0, output has no format (no indentation, no whitespace)
|
35
|
+
#
|
36
|
+
# ==== Examples
|
37
|
+
# output = SimpleBioC.to_xml(collection)
|
38
|
+
# output = SimpleBioC.to_xml(collection, {save_with: 0})
|
39
|
+
def to_xml(collection, options = {})
|
40
|
+
BioCWriter.write(collection, options)
|
17
41
|
end
|
18
42
|
end
|
data/simple_bioc.gemspec
CHANGED
@@ -8,7 +8,7 @@ Gem::Specification.new do |spec|
|
|
8
8
|
spec.version = SimpleBioC::VERSION
|
9
9
|
spec.authors = ["Dongseop Kwon"]
|
10
10
|
spec.email = ["dongseop@gmail.com"]
|
11
|
-
spec.description = "
|
11
|
+
spec.description = "SimpleBioC is a simple parser / builder for BioC data format. BioC is a simple XML format to share text documents and annotations. You can find more information about BioC from the official BioC web site (http://www.ncbi.nlm.nih.gov/CBBresearch/Dogan/BioC/)"
|
12
12
|
spec.summary = "Simple BioC parser/builder for ruby"
|
13
13
|
spec.homepage = "https://github.com/dongseop/simple_bioc"
|
14
14
|
spec.license = "MIT"
|
@@ -0,0 +1,13 @@
|
|
1
|
+
require 'simple_bioc'
|
2
|
+
require 'test_xml/spec'
|
3
|
+
describe "File Check" do
|
4
|
+
it "should be load successfully" do
|
5
|
+
Dir["./xml/*.xml"].each do |file_path|
|
6
|
+
puts file_path
|
7
|
+
collection = SimpleBioC.from_xml(file_path)
|
8
|
+
output = SimpleBioC.to_xml(collection)
|
9
|
+
expected = File.read(file_path)
|
10
|
+
expect(output).to equal_xml(expected)
|
11
|
+
end
|
12
|
+
end
|
13
|
+
end
|
data/spec/simple_bioc_spec.rb
CHANGED
@@ -1,14 +1,25 @@
|
|
1
|
-
# bowling_spec.rb
|
2
1
|
require 'simple_bioc'
|
3
2
|
require 'test_xml/spec'
|
4
|
-
describe
|
5
|
-
it "should
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
3
|
+
describe "Simple function" do
|
4
|
+
it "should have a DTD declaration" do
|
5
|
+
collection = SimpleBioC.from_xml("./xml/everything.xml")
|
6
|
+
output = SimpleBioC.to_xml(collection)
|
7
|
+
output.should include('<!DOCTYPE collection SYSTEM "BioC.dtd">')
|
8
|
+
end
|
9
|
+
|
10
|
+
it "should no space when save_with = 0" do
|
11
|
+
collection = SimpleBioC.from_xml("./xml/everything.xml")
|
12
|
+
output = SimpleBioC.to_xml(collection, {save_with: 0})
|
13
|
+
output.should include('<collection><source>Made up file to test that everything is allowed and processed. Has text in the passage.</source><date>20130426</date><key>everything.key</key><infon key="collection-infon-key">collection-infon-value</infon><document><id>1</id><infon key="document-infon-key">document-infon-value</infon><passage><infon key="passage-infon-key">passage-infon-value</infon><offset>0</offset><text>text of passage</text><annotation id="P1"><infon key="annotation-infon-key">annotation-infon-value</infon><text>annotation text</text><location offset="1" length="2"/></annotation><relation id="RP1"><infon key="passage-relation-infon-key">passage-relation-infon-value</infon><node refid="RP1" role="passage-relation"/></relation></passage><relation id="D1"><infon key="document-relation-infon-key">document-relation-infon-value</infon><node refid="RD1" role="document-relation"/></relation></document></collection>')
|
14
|
+
end
|
15
|
+
|
16
|
+
it "should process only one document by options" do
|
17
|
+
collection = SimpleBioC.from_xml("./xml/lemma.xml", {documents:[21785578, 21488974]})
|
18
|
+
output = SimpleBioC.to_xml(collection, {save_with: 0})
|
19
|
+
output.scan("<document>").size.should equal 2
|
20
|
+
output.should include("<document><id>21785578</id>")
|
21
|
+
output.should include("<document><id>21488974</id>")
|
22
|
+
output.should_not include("<document><id>21660417</id>")
|
23
|
+
output.should_not include("<document><id>21951408</id>")
|
13
24
|
end
|
14
25
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: simple_bioc
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Dongseop Kwon
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2013-12-
|
11
|
+
date: 2013-12-21 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: nokogiri
|
@@ -80,8 +80,9 @@ dependencies:
|
|
80
80
|
- - ~>
|
81
81
|
- !ruby/object:Gem::Version
|
82
82
|
version: 0.1.6
|
83
|
-
description:
|
84
|
-
to
|
83
|
+
description: SimpleBioC is a simple parser / builder for BioC data format. BioC is
|
84
|
+
a simple XML format to share text documents and annotations. You can find more information
|
85
|
+
about BioC from the official BioC web site (http://www.ncbi.nlm.nih.gov/CBBresearch/Dogan/BioC/)
|
85
86
|
email:
|
86
87
|
- dongseop@gmail.com
|
87
88
|
executables: []
|
@@ -158,6 +159,7 @@ files:
|
|
158
159
|
- samples/print_annotation.rb
|
159
160
|
- samples/sample1.rb
|
160
161
|
- simple_bioc.gemspec
|
162
|
+
- spec/file_check_spec.rb
|
161
163
|
- spec/simple_bioc_spec.rb
|
162
164
|
- xml/BioC.dtd
|
163
165
|
- xml/PMID-8557975-simplified-sentences-tokens.xml
|
@@ -200,4 +202,6 @@ signing_key:
|
|
200
202
|
specification_version: 4
|
201
203
|
summary: Simple BioC parser/builder for ruby
|
202
204
|
test_files:
|
205
|
+
- spec/file_check_spec.rb
|
203
206
|
- spec/simple_bioc_spec.rb
|
207
|
+
has_rdoc:
|