rbbt-text 1.2.0 → 1.3.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (76) hide show
  1. checksums.yaml +4 -4
  2. data/lib/rbbt/bow/bow.rb +5 -2
  3. data/lib/rbbt/bow/dictionary.rb +27 -23
  4. data/lib/rbbt/document.rb +55 -0
  5. data/lib/rbbt/document/annotation.rb +45 -0
  6. data/lib/rbbt/document/corpus.rb +63 -0
  7. data/lib/rbbt/document/corpus/pubmed.rb +33 -0
  8. data/lib/rbbt/ner/NER.rb +3 -3
  9. data/lib/rbbt/ner/abner.rb +1 -1
  10. data/lib/rbbt/ner/banner.rb +1 -1
  11. data/lib/rbbt/ner/brat.rb +1 -1
  12. data/lib/rbbt/ner/chemical_tagger.rb +1 -2
  13. data/lib/rbbt/ner/g_norm_plus.rb +26 -3
  14. data/lib/rbbt/ner/linnaeus.rb +3 -3
  15. data/lib/rbbt/ner/ngram_prefix_dictionary.rb +3 -3
  16. data/lib/rbbt/ner/oscar3.rb +1 -2
  17. data/lib/rbbt/ner/oscar4.rb +3 -3
  18. data/lib/rbbt/ner/patterns.rb +5 -5
  19. data/lib/rbbt/ner/regexpNER.rb +1 -2
  20. data/lib/rbbt/ner/token_trieNER.rb +35 -22
  21. data/lib/rbbt/nlp/genia/sentence_splitter.rb +3 -2
  22. data/lib/rbbt/nlp/nlp.rb +5 -5
  23. data/lib/rbbt/nlp/open_nlp/sentence_splitter.rb +37 -36
  24. data/lib/rbbt/nlp/spaCy.rb +52 -0
  25. data/lib/rbbt/segment.rb +179 -0
  26. data/lib/rbbt/segment/annotation.rb +58 -0
  27. data/lib/rbbt/segment/encoding.rb +18 -0
  28. data/lib/rbbt/{text/segment → segment}/named_entity.rb +14 -11
  29. data/lib/rbbt/segment/overlaps.rb +63 -0
  30. data/lib/rbbt/segment/range_index.rb +35 -0
  31. data/lib/rbbt/segment/relationship.rb +7 -0
  32. data/lib/rbbt/{text/segment → segment}/segmented.rb +1 -1
  33. data/lib/rbbt/segment/token.rb +23 -0
  34. data/lib/rbbt/{text/segment → segment}/transformed.rb +12 -10
  35. data/lib/rbbt/segment/tsv.rb +41 -0
  36. data/share/install/software/Linnaeus +1 -1
  37. data/share/install/software/OpenNLP +1 -1
  38. data/test/rbbt/document/corpus/test_pubmed.rb +15 -0
  39. data/test/rbbt/document/test_annotation.rb +140 -0
  40. data/test/rbbt/document/test_corpus.rb +33 -0
  41. data/test/rbbt/ner/test_finder.rb +3 -3
  42. data/test/rbbt/ner/test_g_norm_plus.rb +20 -2
  43. data/test/rbbt/ner/test_patterns.rb +9 -9
  44. data/test/rbbt/ner/test_regexpNER.rb +14 -14
  45. data/test/rbbt/ner/test_rnorm.rb +3 -4
  46. data/test/rbbt/ner/test_token_trieNER.rb +1 -0
  47. data/test/rbbt/nlp/genia/test_sentence_splitter.rb +37 -3
  48. data/test/rbbt/nlp/open_nlp/test_sentence_splitter.rb +20 -4
  49. data/test/rbbt/segment/test_annotation.rb +39 -0
  50. data/test/rbbt/segment/test_corpus.rb +36 -0
  51. data/test/rbbt/segment/test_encoding.rb +24 -0
  52. data/test/rbbt/{text/segment → segment}/test_named_entity.rb +15 -11
  53. data/test/rbbt/segment/test_overlaps.rb +69 -0
  54. data/test/rbbt/segment/test_range_index.rb +42 -0
  55. data/test/rbbt/{text/segment → segment}/test_transformed.rb +105 -51
  56. data/test/rbbt/test_document.rb +14 -0
  57. data/test/rbbt/test_segment.rb +182 -0
  58. data/test/test_helper.rb +5 -3
  59. data/test/test_spaCy.rb +32 -0
  60. metadata +44 -32
  61. data/lib/rbbt/text/corpus.rb +0 -106
  62. data/lib/rbbt/text/corpus/document.rb +0 -383
  63. data/lib/rbbt/text/corpus/document_repo.rb +0 -68
  64. data/lib/rbbt/text/corpus/sources/pmid.rb +0 -34
  65. data/lib/rbbt/text/document.rb +0 -39
  66. data/lib/rbbt/text/segment.rb +0 -363
  67. data/lib/rbbt/text/segment/docid.rb +0 -46
  68. data/lib/rbbt/text/segment/relationship.rb +0 -24
  69. data/lib/rbbt/text/segment/token.rb +0 -49
  70. data/test/rbbt/text/corpus/sources/test_pmid.rb +0 -33
  71. data/test/rbbt/text/corpus/test_document.rb +0 -82
  72. data/test/rbbt/text/segment/test_relationship.rb +0 -0
  73. data/test/rbbt/text/segment/test_segmented.rb +0 -23
  74. data/test/rbbt/text/test_corpus.rb +0 -34
  75. data/test/rbbt/text/test_document.rb +0 -58
  76. data/test/rbbt/text/test_segment.rb +0 -100
@@ -0,0 +1,58 @@
1
+ require 'rbbt-util'
2
+ require 'rbbt/segment'
3
+ require 'rbbt/entity'
4
+
5
+ module AnnotID
6
+ extend Entity
7
+ self.annotation :corpus
8
+
9
+ def _parts
10
+ @parts ||= self.split(":")
11
+ end
12
+
13
+ def segid
14
+ @segid ||= _parts[0..4] * ":"
15
+ end
16
+
17
+ def type
18
+ @type ||= _parts[5]
19
+ end
20
+
21
+ property :annotation do
22
+ segment = SegID.setup(segid, :corpus => corpus).segment
23
+
24
+ SegmentAnnotation.setup(segment, :type => type)
25
+ end
26
+
27
+ property :annotid do
28
+ self
29
+ end
30
+
31
+ end
32
+
33
+ module SegmentAnnotation
34
+ extend Entity
35
+ include Object::Segment
36
+ self.annotation :type
37
+
38
+ property :segid do
39
+ case self
40
+ when SegID
41
+ self
42
+ when Segment
43
+ super()
44
+ else
45
+ raise "Unknown object: #{self}"
46
+ end
47
+ end
48
+
49
+ property :annotid do |corpus=nil|
50
+ AnnotID.setup([segid, type, Misc.obj2digest(self.info)] * ":", :corpus => corpus)
51
+ end
52
+
53
+ alias id annotid
54
+
55
+ property :annotation do
56
+ self
57
+ end
58
+ end
@@ -0,0 +1,18 @@
1
+ require 'rbbt/segment'
2
+ module Segment
3
+ def self.bad_chars(text)
4
+ segments = []
5
+ text.chars.each_with_index do |c,i|
6
+ if ! c.ascii_only?
7
+ segments << Segment.setup(c, :offset => i)
8
+ end
9
+ end
10
+ segments
11
+ end
12
+
13
+ def self.ascii(text, replace = nil, &block)
14
+ bad = bad_chars(text)
15
+ replace = "?" if replace.nil?
16
+ Transformed.with_transform(text, bad, replace, &block)
17
+ end
18
+ end
@@ -1,42 +1,46 @@
1
- require 'rbbt/text/segment'
2
- require 'rbbt/entity'
1
+ require 'rbbt/segment'
2
+ require 'rbbt/segment/annotation'
3
3
 
4
- module NamedEntity
4
+ module NamedEntity
5
5
  extend Entity
6
6
  include Segment
7
+ include SegmentAnnotation
7
8
 
8
- self.annotation :type, :code, :score
9
+ self.annotation :entity_type, :code, :score
9
10
 
10
11
  def report
11
12
  <<-EOF
12
13
  String: #{ self }
13
14
  Offset: #{ offset.inspect }
14
- Type: #{type.inspect}
15
+ Type: #{entity_type.inspect}
15
16
  Code: #{code.inspect}
16
17
  Score: #{score.inspect}
17
18
  EOF
18
19
  end
19
20
 
20
21
  def html
22
+ title = code.nil? ? entity_type : [entity_type, code].compact * ":"
23
+
21
24
  text = <<-EOF
22
25
  <span class='Entity'\
23
- #{type.nil? ? "" : " attr-entity-type='#{Array === type ? type * " " : type}'"}\
26
+ #{entity_type.nil? ? "" : " attr-entity-type='#{Array === entity_type ? entity_type * " " : entity_type}'"}\
24
27
  #{code.nil? ? "" : " attr-entity-code='#{Array === code ? code * " " : code}'"}\
25
28
  #{score.nil? ? "" : " attr-entity-score='#{Array === score ? score * " " : score}'"}\
29
+ #{title.nil? ? "" : " title='#{Array === title ? title * " " : title}'"}\
26
30
  >#{ self }</span>
27
31
  EOF
28
32
  text.chomp
29
33
  end
30
34
 
31
35
  def entity(params = nil)
32
- code = self.dup
36
+ code = self.code || self.dup
33
37
  format, entity = code.split(":")
34
38
  entity, format = format, nil if entity.nil?
35
-
36
- if defined?(Entity) && Entity.formats.include?(type) or Entity.formats.include?(format)
39
+
40
+ if defined?(Entity) && Entity.formats.include?(entity_type) or Entity.formats.include?(format)
37
41
  params ||= {}
38
42
  params[:format] = format if format and params[:format].nil?
39
- mod = (Entity.formats[type] || Entity.format[entity])
43
+ mod = (Entity.formats[entity_type] || Entity.format[entity])
40
44
  mod.setup(entity, params)
41
45
  end
42
46
 
@@ -44,4 +48,3 @@ Score: #{score.inspect}
44
48
  end
45
49
 
46
50
  end
47
-
@@ -0,0 +1,63 @@
1
+ module Segment
2
+ def pull(offset)
3
+ if self.offset.nil? or offset.nil?
4
+ self.offset = nil
5
+ else
6
+ self.offset += offset
7
+ end
8
+
9
+ self
10
+ end
11
+
12
+ def push(offset)
13
+ if self.offset.nil? or offset.nil?
14
+ self.offset = nil
15
+ else
16
+ self.offset -= offset
17
+ end
18
+
19
+ self
20
+ end
21
+
22
+ def make_relative(segments, &block)
23
+ if block_given?
24
+ segments.each{|s| s.push offset}
25
+ yield(segments)
26
+ segments.each{|s| s.pull offset}
27
+ else
28
+ segments.each{|s| s.push offset}
29
+ end
30
+ end
31
+
32
+ def range_in(container = nil)
33
+ raise "No offset specified" if offset.nil?
34
+ case
35
+ when (Segment === container and not container.offset.nil?)
36
+ ((offset - container.offset)..(self.eend - container.offset))
37
+ when Integer === container
38
+ ((offset - container)..(self.eend - container))
39
+ else
40
+ range
41
+ end
42
+ end
43
+
44
+ def includes?(segment)
45
+ (segment.offset.to_i >= self.offset.to_i) and
46
+ (segment.offset.to_i + segment.segment_length.to_i <= self.offset.to_i + self.segment_length.to_i)
47
+ end
48
+
49
+ def overlaps?(segment)
50
+ segment.offset.to_i >= self.offset.to_i && segment.offset.to_i <= self.eend ||
51
+ self.offset.to_i >= segment.offset.to_i && self.offset.to_i <= segment.eend
52
+ end
53
+
54
+ def overlaps(segments)
55
+ segments.select{|s| self.overlaps?(s) }
56
+ end
57
+
58
+ def self.collisions(main, secondary)
59
+ secondary.select do |ss|
60
+ main.select{|ms| ms.overlaps? ss }.any?
61
+ end
62
+ end
63
+ end
@@ -0,0 +1,35 @@
1
+ module Segment::RangeIndex
2
+ attr_accessor :corpus
3
+
4
+ def [](*args)
5
+ res = super(*args)
6
+ SegID.setup(res, :corpus => corpus)
7
+ end
8
+
9
+ def self.index(segments, corpus, persist_file = :memory)
10
+ segments = segments.values.flatten if Hash === segments
11
+
12
+ annotation_index =
13
+ Persist.persist("Segment_index", :fwt, :persist => (! (persist_file.nil? or persist_file == :memory)), :file => persist_file) do
14
+
15
+ value_size = 0
16
+ index_data = segments.collect{|segment|
17
+ next if segment.offset.nil?
18
+ range = segment.range
19
+ value_size = [segment.segid.length, value_size].max
20
+ [segment.segid, [range.begin, range.end]]
21
+ }.compact
22
+
23
+ fwt = FixWidthTable.get :memory, value_size, true
24
+ fwt.add_range index_data
25
+
26
+ fwt
27
+ end
28
+
29
+ annotation_index.extend Segment::RangeIndex
30
+ annotation_index.corpus = corpus
31
+ annotation_index
32
+ end
33
+
34
+ end
35
+
@@ -0,0 +1,7 @@
1
+ module Relationship
2
+ extend Entity
3
+
4
+ self.annotation :segments
5
+ self.annotation :type
6
+
7
+ end
@@ -1,5 +1,5 @@
1
1
  require 'rbbt/annotations'
2
- require 'rbbt/text/segment'
2
+ require 'rbbt/segment'
3
3
 
4
4
  module Segmented
5
5
  extend Annotation
@@ -0,0 +1,23 @@
1
+ require 'rbbt/segment'
2
+
3
+ module Token
4
+ extend Entity
5
+ include Segment
6
+
7
+ self.annotation :original
8
+
9
+ def self.tokenize(text, split_at = /\s|(\(|\)|[-."':,])/, start = 0)
10
+
11
+ tokens = []
12
+ while matchdata = text.match(split_at)
13
+ tokens << Token.setup(matchdata.pre_match, :offset => start) unless matchdata.pre_match.empty?
14
+ tokens << Token.setup(matchdata.captures.first, :offset => start + matchdata.begin(1)) if matchdata.captures.any? and not matchdata.captures.first.empty?
15
+ start += matchdata.end(0)
16
+ text = matchdata.post_match
17
+ end
18
+
19
+ tokens << Token.setup(text, :offset => start) unless text.empty?
20
+
21
+ tokens
22
+ end
23
+ end
@@ -1,6 +1,3 @@
1
- require 'rbbt/util/misc'
2
- require 'rbbt/text/segment'
3
-
4
1
  module Transformed
5
2
 
6
3
  def self.transform(text, segments, replacement = nil, &block)
@@ -71,6 +68,10 @@ module Transformed
71
68
 
72
69
  segments = [segments] unless Array === segments
73
70
  orig_length = self.length
71
+
72
+ offset = self.respond_to?(:offset) ? self.offset.to_i : 0
73
+ segments = segments.select{|s| s.offset.to_i >= offset && s.offset.to_i <= offset + self.length - 1 }
74
+
74
75
  Segment.clean_sort(segments).each do |segment|
75
76
  next if segment.offset.nil?
76
77
 
@@ -89,7 +90,7 @@ module Transformed
89
90
 
90
91
  updated_text = self[updated_begin..updated_end]
91
92
  if updated_text.nil?
92
- Log.warn "Range outside of segment: #{self.length} #{segment.locus} (#{updated_range})"
93
+ Log.warn "Range outside of segment: #{self.length} #{segment.range} (#{updated_range})"
93
94
  next
94
95
  end
95
96
 
@@ -111,10 +112,10 @@ module Transformed
111
112
 
112
113
  self[updated_begin..updated_end] = new
113
114
 
114
- @transformed_segments[segment.segment_id] = [segment.range, diff, updated_text, updated_range, @transformed_segments.size]
115
+ @transformed_segments[segment.object_id] = [segment.range, diff, updated_text, updated_range, @transformed_segments.size]
115
116
 
116
117
  segment.replace original_text
117
- stack << segment.segment_id
118
+ stack << segment.object_id
118
119
  end
119
120
  @transformation_stack << stack
120
121
  end
@@ -122,13 +123,13 @@ module Transformed
122
123
  def fix_segment(segment, range, diff)
123
124
  case
124
125
  # Before
125
- when segment.end < range.begin
126
+ when segment.eend < range.begin
126
127
  # After
127
128
  when segment.offset.to_i > range.end + diff
128
129
  segment.offset = segment.offset.to_i - diff
129
130
  # Includes
130
- when (segment.offset.to_i <= range.begin and segment.end >= range.end + diff)
131
- segment.replace self[segment.offset.to_i..segment.end - diff]
131
+ when (segment.offset.to_i <= range.begin and segment.eend >= range.end + diff)
132
+ segment.replace self[segment.offset.to_i..segment.eend - diff]
132
133
  else
133
134
  raise "Segment Overlaps"
134
135
  end
@@ -141,7 +142,8 @@ module Transformed
141
142
 
142
143
  if first_only
143
144
  @transformation_stack.pop.reverse.each do |id|
144
- orig_range, diff, text, range = @transformed_segments.delete id
145
+ segment_info = @transformed_segments.delete id
146
+ orig_range, diff, text, range = segment_info
145
147
 
146
148
  new_range = (range.begin..range.last + diff)
147
149
  self[new_range] = text
@@ -0,0 +1,41 @@
1
+ #module Segment
2
+ #
3
+ # def self.set_tsv_fields(fields, segments)
4
+ # tsv_fields = []
5
+ # add_types = ! (fields.delete(:no_types) || fields.delete("no_types") || fields.include?(:JSON) || fields.include?("JSON"))
6
+ # literal = (fields.delete(:literal) || fields.delete("literal"))
7
+ # tsv_fields << "Start" << "End"
8
+ # tsv_fields << :annotation_types if add_types
9
+ # tsv_fields << :literal if literal
10
+ #
11
+ # if fields.any? and not (fields == [:all] or fields == ["all"])
12
+ # tsv_fields.concat fields
13
+ # else
14
+ # tsv_fields.concat segments.first.annotations if segments.any?
15
+ # end
16
+ # tsv_fields
17
+ # tsv_fields.collect!{|f| f.to_s}
18
+ # tsv_fields.delete "offset"
19
+ # tsv_fields
20
+ # end
21
+ #
22
+ # def self.tsv(segments, *fields)
23
+ # fields = set_tsv_fields fields, segments
24
+ # tsv = TSV.setup({}, :key_field => "ID", :fields => fields, :type => :double)
25
+ #
26
+ # segments.each do |segment|
27
+ # tsv[segment.segment_id] = self.tsv_values_for_segment(segment, fields)
28
+ # end
29
+ #
30
+ # tsv
31
+ # end
32
+ #
33
+ # def self.load_tsv(tsv)
34
+ # fields = tsv.fields
35
+ # tsv.with_unnamed do
36
+ # tsv.collect do |id, values|
37
+ # Annotated.load_tsv_values(id, values, fields)
38
+ # end
39
+ # end
40
+ # end
41
+ #end
@@ -12,7 +12,7 @@ pkg_dir="`opt_dir \"$name\"`"
12
12
  build_dir=`build_dir`
13
13
  mv "$build_dir" "$pkg_dir"
14
14
  tmp_file="~/.rbbt/tmp/species-proxy-properties.tmp"
15
- mkdir -p $(basename "$tmp_file")
15
+ mkdir -p $(dirname "$tmp_file")
16
16
  cat "$pkg_dir/species-proxy/properties.conf" |grep -v "^.dir =" >> $tmp_file
17
17
  echo "\$dir = $pkg_dir/species-proxy/" > "$pkg_dir/species-proxy/properties.conf"
18
18
  cat $tmp_file | grep -v "^#" >> "$pkg_dir/species-proxy/properties.conf"
@@ -1,7 +1,7 @@
1
1
  #!/bin/bash
2
2
 
3
3
  name='OpenNLP'
4
- url="http://apache.rediris.es/opennlp/opennlp-1.9.1/apache-opennlp-1.9.1-bin.tar.gz"
4
+ url="http://apache.rediris.es/opennlp/opennlp-1.9.2/apache-opennlp-1.9.2-bin.tar.gz"
5
5
 
6
6
  get_src "$name" "$url"
7
7
  move_opt "$name"
@@ -0,0 +1,15 @@
1
+ require File.join(File.expand_path(File.dirname(__FILE__)), '../../..', 'test_helper.rb')
2
+ require 'rbbt/document'
3
+ require 'rbbt/document/corpus'
4
+ require 'rbbt/document/corpus/pubmed'
5
+
6
+ class TestCorpusPubmed < Test::Unit::TestCase
7
+ def test_add_pmid
8
+ corpus = Document::Corpus.setup({})
9
+
10
+ document = corpus.add_pmid("32299157", :abstract).first
11
+ title = document.to(:title)
12
+ assert title.include?("COVID-19")
13
+ end
14
+ end
15
+
@@ -0,0 +1,140 @@
1
+ require File.join(File.expand_path(File.dirname(__FILE__)), '../..', 'test_helper.rb')
2
+ require 'rbbt/document'
3
+ require 'rbbt/document/corpus'
4
+ require 'rbbt/segment'
5
+ require 'rbbt/document/annotation'
6
+ require 'rbbt/segment/named_entity'
7
+
8
+ class TestAnnotation < Test::Unit::TestCase
9
+ class CalledOnce < Exception; end
10
+ def setup
11
+ Document.define :words do
12
+ self.split(" ")
13
+ end
14
+
15
+ $called_once = false
16
+ Document.define :persisted_words do
17
+ raise CalledOnce if $called_once
18
+ $called_once = true
19
+ self.split(" ")
20
+ end
21
+
22
+ Document.define_multiple :multiple_words do |list|
23
+ list.collect{|doc| doc.words}
24
+ end
25
+
26
+ Document.define :ner do
27
+ $called_once = true
28
+ self.split(" ").collect{|e| NamedEntity.setup(e, :code => Misc.digest(e)) }
29
+ end
30
+
31
+ Document.persist :ner
32
+ end
33
+
34
+ def test_define
35
+ text = "This sentence mentions the TP53 gene and the CDK5R1 protein"
36
+ Document.setup(text, "TEST", "test_doc1", nil)
37
+
38
+ corpus = {}
39
+ Document::Corpus.setup corpus
40
+
41
+ corpus.add_document(text)
42
+
43
+ assert_equal text[text.words[1].range], text.words[1]
44
+ end
45
+
46
+ def test_define_multiple
47
+ text1 = "This sentence mentions the TP53 gene and the CDK5R1 protein"
48
+ text2 = "This is another sentence"
49
+ Document.setup(text1, "TEST", "test_doc1", nil)
50
+ Document.setup(text2, "TEST", "test_doc2", nil)
51
+
52
+ corpus = {}
53
+ Document::Corpus.setup corpus
54
+
55
+ corpus.add_document(text1)
56
+ corpus.add_document(text2)
57
+
58
+ assert_equal 2, Document.setup([text1, text2]).multiple_words.length
59
+ assert_equal text1.split(" "), text1.multiple_words
60
+
61
+ #Document.persist :multiple_words, :annotations, :annotation_repo => Rbbt.tmp.test.multiple_words
62
+ #assert_equal 2, Document.setup([text1, text2]).multiple_words.length
63
+ #assert_equal text1.split(" "), text1.multiple_words
64
+ end
65
+
66
+ def test_persist
67
+ text = "This sentence mentions the TP53 gene and the CDK5R1 protein"
68
+ Document.setup(text, "TEST", "test_doc1", nil)
69
+
70
+ corpus = {}
71
+ Document::Corpus.setup corpus
72
+
73
+ corpus.add_document(text)
74
+
75
+ assert_equal "persisted_words", text.persisted_words.first.type
76
+
77
+ assert_raise CalledOnce do
78
+ assert_equal text[text.persisted_words[1].range], text.persisted_words[1]
79
+ end
80
+
81
+ Log.severity = 0
82
+ Document.persist :persisted_words, :annotations, :file => Rbbt.tmp.test.persisted_words.find(:user)
83
+
84
+ $called_once = false
85
+ text.persisted_words
86
+ assert $called_once
87
+
88
+ assert_nothing_raised do
89
+ assert_equal text[text.persisted_words[1].range], text.persisted_words[1]
90
+ end
91
+ end
92
+
93
+ def test_persist_annotation_repo
94
+ text = "This sentence mentions the TP53 gene and the CDK5R1 protein"
95
+ Document.setup(text, "TEST", "test_doc1", nil)
96
+
97
+ corpus = {}
98
+ Document::Corpus.setup corpus
99
+
100
+ corpus.add_document(text)
101
+
102
+ assert_equal "persisted_words", text.persisted_words.first.type
103
+
104
+ assert_raise CalledOnce do
105
+ assert_equal text[text.persisted_words[1].range], text.persisted_words[1]
106
+ end
107
+
108
+ Log.severity = 0
109
+ Document.persist :persisted_words, :annotations, :annotation_repo => Rbbt.tmp.test.persisted_words_repo.find(:user)
110
+
111
+ $called_once = false
112
+ text.persisted_words
113
+ assert $called_once
114
+
115
+ assert_nothing_raised do
116
+ assert_equal text[text.persisted_words[1].range], text.persisted_words[1]
117
+ end
118
+ end
119
+
120
+ def test_persist_ner
121
+ text = "This sentence mentions the TP53 gene and the CDK5R1 protein"
122
+ Document.setup(text, "TEST", "test_doc1", nil)
123
+
124
+ corpus = {}
125
+ Document::Corpus.setup corpus
126
+
127
+ corpus.add_document(text)
128
+
129
+
130
+ text.ner
131
+
132
+ $called_once = false
133
+ text.ner
134
+
135
+ assert ! $called_once
136
+
137
+ assert text.ner.first.segid.include?("TEST:")
138
+ end
139
+ end
140
+