rbbt-text 1.2.0 → 1.3.4

Sign up to get free protection for your applications and to get access to all the features.
Files changed (76) hide show
  1. checksums.yaml +4 -4
  2. data/lib/rbbt/bow/bow.rb +5 -2
  3. data/lib/rbbt/bow/dictionary.rb +27 -23
  4. data/lib/rbbt/document.rb +55 -0
  5. data/lib/rbbt/document/annotation.rb +45 -0
  6. data/lib/rbbt/document/corpus.rb +63 -0
  7. data/lib/rbbt/document/corpus/pubmed.rb +33 -0
  8. data/lib/rbbt/ner/NER.rb +3 -3
  9. data/lib/rbbt/ner/abner.rb +1 -1
  10. data/lib/rbbt/ner/banner.rb +1 -1
  11. data/lib/rbbt/ner/brat.rb +1 -1
  12. data/lib/rbbt/ner/chemical_tagger.rb +1 -2
  13. data/lib/rbbt/ner/g_norm_plus.rb +26 -3
  14. data/lib/rbbt/ner/linnaeus.rb +3 -3
  15. data/lib/rbbt/ner/ngram_prefix_dictionary.rb +3 -3
  16. data/lib/rbbt/ner/oscar3.rb +1 -2
  17. data/lib/rbbt/ner/oscar4.rb +3 -3
  18. data/lib/rbbt/ner/patterns.rb +5 -5
  19. data/lib/rbbt/ner/regexpNER.rb +1 -2
  20. data/lib/rbbt/ner/token_trieNER.rb +35 -22
  21. data/lib/rbbt/nlp/genia/sentence_splitter.rb +3 -2
  22. data/lib/rbbt/nlp/nlp.rb +5 -5
  23. data/lib/rbbt/nlp/open_nlp/sentence_splitter.rb +37 -36
  24. data/lib/rbbt/nlp/spaCy.rb +52 -0
  25. data/lib/rbbt/segment.rb +179 -0
  26. data/lib/rbbt/segment/annotation.rb +58 -0
  27. data/lib/rbbt/segment/encoding.rb +18 -0
  28. data/lib/rbbt/{text/segment → segment}/named_entity.rb +14 -11
  29. data/lib/rbbt/segment/overlaps.rb +63 -0
  30. data/lib/rbbt/segment/range_index.rb +35 -0
  31. data/lib/rbbt/segment/relationship.rb +7 -0
  32. data/lib/rbbt/{text/segment → segment}/segmented.rb +1 -1
  33. data/lib/rbbt/segment/token.rb +23 -0
  34. data/lib/rbbt/{text/segment → segment}/transformed.rb +12 -10
  35. data/lib/rbbt/segment/tsv.rb +41 -0
  36. data/share/install/software/Linnaeus +1 -1
  37. data/share/install/software/OpenNLP +1 -1
  38. data/test/rbbt/document/corpus/test_pubmed.rb +15 -0
  39. data/test/rbbt/document/test_annotation.rb +140 -0
  40. data/test/rbbt/document/test_corpus.rb +33 -0
  41. data/test/rbbt/ner/test_finder.rb +3 -3
  42. data/test/rbbt/ner/test_g_norm_plus.rb +20 -2
  43. data/test/rbbt/ner/test_patterns.rb +9 -9
  44. data/test/rbbt/ner/test_regexpNER.rb +14 -14
  45. data/test/rbbt/ner/test_rnorm.rb +3 -4
  46. data/test/rbbt/ner/test_token_trieNER.rb +1 -0
  47. data/test/rbbt/nlp/genia/test_sentence_splitter.rb +37 -3
  48. data/test/rbbt/nlp/open_nlp/test_sentence_splitter.rb +20 -4
  49. data/test/rbbt/segment/test_annotation.rb +39 -0
  50. data/test/rbbt/segment/test_corpus.rb +36 -0
  51. data/test/rbbt/segment/test_encoding.rb +24 -0
  52. data/test/rbbt/{text/segment → segment}/test_named_entity.rb +15 -11
  53. data/test/rbbt/segment/test_overlaps.rb +69 -0
  54. data/test/rbbt/segment/test_range_index.rb +42 -0
  55. data/test/rbbt/{text/segment → segment}/test_transformed.rb +105 -51
  56. data/test/rbbt/test_document.rb +14 -0
  57. data/test/rbbt/test_segment.rb +182 -0
  58. data/test/test_helper.rb +5 -3
  59. data/test/test_spaCy.rb +32 -0
  60. metadata +44 -32
  61. data/lib/rbbt/text/corpus.rb +0 -106
  62. data/lib/rbbt/text/corpus/document.rb +0 -383
  63. data/lib/rbbt/text/corpus/document_repo.rb +0 -68
  64. data/lib/rbbt/text/corpus/sources/pmid.rb +0 -34
  65. data/lib/rbbt/text/document.rb +0 -39
  66. data/lib/rbbt/text/segment.rb +0 -363
  67. data/lib/rbbt/text/segment/docid.rb +0 -46
  68. data/lib/rbbt/text/segment/relationship.rb +0 -24
  69. data/lib/rbbt/text/segment/token.rb +0 -49
  70. data/test/rbbt/text/corpus/sources/test_pmid.rb +0 -33
  71. data/test/rbbt/text/corpus/test_document.rb +0 -82
  72. data/test/rbbt/text/segment/test_relationship.rb +0 -0
  73. data/test/rbbt/text/segment/test_segmented.rb +0 -23
  74. data/test/rbbt/text/test_corpus.rb +0 -34
  75. data/test/rbbt/text/test_document.rb +0 -58
  76. data/test/rbbt/text/test_segment.rb +0 -100
@@ -0,0 +1,58 @@
1
+ require 'rbbt-util'
2
+ require 'rbbt/segment'
3
+ require 'rbbt/entity'
4
+
5
+ module AnnotID
6
+ extend Entity
7
+ self.annotation :corpus
8
+
9
+ def _parts
10
+ @parts ||= self.split(":")
11
+ end
12
+
13
+ def segid
14
+ @segid ||= _parts[0..4] * ":"
15
+ end
16
+
17
+ def type
18
+ @type ||= _parts[5]
19
+ end
20
+
21
+ property :annotation do
22
+ segment = SegID.setup(segid, :corpus => corpus).segment
23
+
24
+ SegmentAnnotation.setup(segment, :type => type)
25
+ end
26
+
27
+ property :annotid do
28
+ self
29
+ end
30
+
31
+ end
32
+
33
+ module SegmentAnnotation
34
+ extend Entity
35
+ include Object::Segment
36
+ self.annotation :type
37
+
38
+ property :segid do
39
+ case self
40
+ when SegID
41
+ self
42
+ when Segment
43
+ super()
44
+ else
45
+ raise "Unknown object: #{self}"
46
+ end
47
+ end
48
+
49
+ property :annotid do |corpus=nil|
50
+ AnnotID.setup([segid, type, Misc.obj2digest(self.info)] * ":", :corpus => corpus)
51
+ end
52
+
53
+ alias id annotid
54
+
55
+ property :annotation do
56
+ self
57
+ end
58
+ end
@@ -0,0 +1,18 @@
1
+ require 'rbbt/segment'
2
+ module Segment
3
+ def self.bad_chars(text)
4
+ segments = []
5
+ text.chars.each_with_index do |c,i|
6
+ if ! c.ascii_only?
7
+ segments << Segment.setup(c, :offset => i)
8
+ end
9
+ end
10
+ segments
11
+ end
12
+
13
+ def self.ascii(text, replace = nil, &block)
14
+ bad = bad_chars(text)
15
+ replace = "?" if replace.nil?
16
+ Transformed.with_transform(text, bad, replace, &block)
17
+ end
18
+ end
@@ -1,42 +1,46 @@
1
- require 'rbbt/text/segment'
2
- require 'rbbt/entity'
1
+ require 'rbbt/segment'
2
+ require 'rbbt/segment/annotation'
3
3
 
4
- module NamedEntity
4
+ module NamedEntity
5
5
  extend Entity
6
6
  include Segment
7
+ include SegmentAnnotation
7
8
 
8
- self.annotation :type, :code, :score
9
+ self.annotation :entity_type, :code, :score
9
10
 
10
11
  def report
11
12
  <<-EOF
12
13
  String: #{ self }
13
14
  Offset: #{ offset.inspect }
14
- Type: #{type.inspect}
15
+ Type: #{entity_type.inspect}
15
16
  Code: #{code.inspect}
16
17
  Score: #{score.inspect}
17
18
  EOF
18
19
  end
19
20
 
20
21
  def html
22
+ title = code.nil? ? entity_type : [entity_type, code].compact * ":"
23
+
21
24
  text = <<-EOF
22
25
  <span class='Entity'\
23
- #{type.nil? ? "" : " attr-entity-type='#{Array === type ? type * " " : type}'"}\
26
+ #{entity_type.nil? ? "" : " attr-entity-type='#{Array === entity_type ? entity_type * " " : entity_type}'"}\
24
27
  #{code.nil? ? "" : " attr-entity-code='#{Array === code ? code * " " : code}'"}\
25
28
  #{score.nil? ? "" : " attr-entity-score='#{Array === score ? score * " " : score}'"}\
29
+ #{title.nil? ? "" : " title='#{Array === title ? title * " " : title}'"}\
26
30
  >#{ self }</span>
27
31
  EOF
28
32
  text.chomp
29
33
  end
30
34
 
31
35
  def entity(params = nil)
32
- code = self.dup
36
+ code = self.code || self.dup
33
37
  format, entity = code.split(":")
34
38
  entity, format = format, nil if entity.nil?
35
-
36
- if defined?(Entity) && Entity.formats.include?(type) or Entity.formats.include?(format)
39
+
40
+ if defined?(Entity) && Entity.formats.include?(entity_type) or Entity.formats.include?(format)
37
41
  params ||= {}
38
42
  params[:format] = format if format and params[:format].nil?
39
- mod = (Entity.formats[type] || Entity.format[entity])
43
+ mod = (Entity.formats[entity_type] || Entity.format[entity])
40
44
  mod.setup(entity, params)
41
45
  end
42
46
 
@@ -44,4 +48,3 @@ Score: #{score.inspect}
44
48
  end
45
49
 
46
50
  end
47
-
@@ -0,0 +1,63 @@
1
+ module Segment
2
+ def pull(offset)
3
+ if self.offset.nil? or offset.nil?
4
+ self.offset = nil
5
+ else
6
+ self.offset += offset
7
+ end
8
+
9
+ self
10
+ end
11
+
12
+ def push(offset)
13
+ if self.offset.nil? or offset.nil?
14
+ self.offset = nil
15
+ else
16
+ self.offset -= offset
17
+ end
18
+
19
+ self
20
+ end
21
+
22
+ def make_relative(segments, &block)
23
+ if block_given?
24
+ segments.each{|s| s.push offset}
25
+ yield(segments)
26
+ segments.each{|s| s.pull offset}
27
+ else
28
+ segments.each{|s| s.push offset}
29
+ end
30
+ end
31
+
32
+ def range_in(container = nil)
33
+ raise "No offset specified" if offset.nil?
34
+ case
35
+ when (Segment === container and not container.offset.nil?)
36
+ ((offset - container.offset)..(self.eend - container.offset))
37
+ when Integer === container
38
+ ((offset - container)..(self.eend - container))
39
+ else
40
+ range
41
+ end
42
+ end
43
+
44
+ def includes?(segment)
45
+ (segment.offset.to_i >= self.offset.to_i) and
46
+ (segment.offset.to_i + segment.segment_length.to_i <= self.offset.to_i + self.segment_length.to_i)
47
+ end
48
+
49
+ def overlaps?(segment)
50
+ segment.offset.to_i >= self.offset.to_i && segment.offset.to_i <= self.eend ||
51
+ self.offset.to_i >= segment.offset.to_i && self.offset.to_i <= segment.eend
52
+ end
53
+
54
+ def overlaps(segments)
55
+ segments.select{|s| self.overlaps?(s) }
56
+ end
57
+
58
+ def self.collisions(main, secondary)
59
+ secondary.select do |ss|
60
+ main.select{|ms| ms.overlaps? ss }.any?
61
+ end
62
+ end
63
+ end
@@ -0,0 +1,35 @@
1
+ module Segment::RangeIndex
2
+ attr_accessor :corpus
3
+
4
+ def [](*args)
5
+ res = super(*args)
6
+ SegID.setup(res, :corpus => corpus)
7
+ end
8
+
9
+ def self.index(segments, corpus, persist_file = :memory)
10
+ segments = segments.values.flatten if Hash === segments
11
+
12
+ annotation_index =
13
+ Persist.persist("Segment_index", :fwt, :persist => (! (persist_file.nil? or persist_file == :memory)), :file => persist_file) do
14
+
15
+ value_size = 0
16
+ index_data = segments.collect{|segment|
17
+ next if segment.offset.nil?
18
+ range = segment.range
19
+ value_size = [segment.segid.length, value_size].max
20
+ [segment.segid, [range.begin, range.end]]
21
+ }.compact
22
+
23
+ fwt = FixWidthTable.get :memory, value_size, true
24
+ fwt.add_range index_data
25
+
26
+ fwt
27
+ end
28
+
29
+ annotation_index.extend Segment::RangeIndex
30
+ annotation_index.corpus = corpus
31
+ annotation_index
32
+ end
33
+
34
+ end
35
+
@@ -0,0 +1,7 @@
1
+ module Relationship
2
+ extend Entity
3
+
4
+ self.annotation :segments
5
+ self.annotation :type
6
+
7
+ end
@@ -1,5 +1,5 @@
1
1
  require 'rbbt/annotations'
2
- require 'rbbt/text/segment'
2
+ require 'rbbt/segment'
3
3
 
4
4
  module Segmented
5
5
  extend Annotation
@@ -0,0 +1,23 @@
1
+ require 'rbbt/segment'
2
+
3
+ module Token
4
+ extend Entity
5
+ include Segment
6
+
7
+ self.annotation :original
8
+
9
+ def self.tokenize(text, split_at = /\s|(\(|\)|[-."':,])/, start = 0)
10
+
11
+ tokens = []
12
+ while matchdata = text.match(split_at)
13
+ tokens << Token.setup(matchdata.pre_match, :offset => start) unless matchdata.pre_match.empty?
14
+ tokens << Token.setup(matchdata.captures.first, :offset => start + matchdata.begin(1)) if matchdata.captures.any? and not matchdata.captures.first.empty?
15
+ start += matchdata.end(0)
16
+ text = matchdata.post_match
17
+ end
18
+
19
+ tokens << Token.setup(text, :offset => start) unless text.empty?
20
+
21
+ tokens
22
+ end
23
+ end
@@ -1,6 +1,3 @@
1
- require 'rbbt/util/misc'
2
- require 'rbbt/text/segment'
3
-
4
1
  module Transformed
5
2
 
6
3
  def self.transform(text, segments, replacement = nil, &block)
@@ -71,6 +68,10 @@ module Transformed
71
68
 
72
69
  segments = [segments] unless Array === segments
73
70
  orig_length = self.length
71
+
72
+ offset = self.respond_to?(:offset) ? self.offset.to_i : 0
73
+ segments = segments.select{|s| s.offset.to_i >= offset && s.offset.to_i <= offset + self.length - 1 }
74
+
74
75
  Segment.clean_sort(segments).each do |segment|
75
76
  next if segment.offset.nil?
76
77
 
@@ -89,7 +90,7 @@ module Transformed
89
90
 
90
91
  updated_text = self[updated_begin..updated_end]
91
92
  if updated_text.nil?
92
- Log.warn "Range outside of segment: #{self.length} #{segment.locus} (#{updated_range})"
93
+ Log.warn "Range outside of segment: #{self.length} #{segment.range} (#{updated_range})"
93
94
  next
94
95
  end
95
96
 
@@ -111,10 +112,10 @@ module Transformed
111
112
 
112
113
  self[updated_begin..updated_end] = new
113
114
 
114
- @transformed_segments[segment.segment_id] = [segment.range, diff, updated_text, updated_range, @transformed_segments.size]
115
+ @transformed_segments[segment.object_id] = [segment.range, diff, updated_text, updated_range, @transformed_segments.size]
115
116
 
116
117
  segment.replace original_text
117
- stack << segment.segment_id
118
+ stack << segment.object_id
118
119
  end
119
120
  @transformation_stack << stack
120
121
  end
@@ -122,13 +123,13 @@ module Transformed
122
123
  def fix_segment(segment, range, diff)
123
124
  case
124
125
  # Before
125
- when segment.end < range.begin
126
+ when segment.eend < range.begin
126
127
  # After
127
128
  when segment.offset.to_i > range.end + diff
128
129
  segment.offset = segment.offset.to_i - diff
129
130
  # Includes
130
- when (segment.offset.to_i <= range.begin and segment.end >= range.end + diff)
131
- segment.replace self[segment.offset.to_i..segment.end - diff]
131
+ when (segment.offset.to_i <= range.begin and segment.eend >= range.end + diff)
132
+ segment.replace self[segment.offset.to_i..segment.eend - diff]
132
133
  else
133
134
  raise "Segment Overlaps"
134
135
  end
@@ -141,7 +142,8 @@ module Transformed
141
142
 
142
143
  if first_only
143
144
  @transformation_stack.pop.reverse.each do |id|
144
- orig_range, diff, text, range = @transformed_segments.delete id
145
+ segment_info = @transformed_segments.delete id
146
+ orig_range, diff, text, range = segment_info
145
147
 
146
148
  new_range = (range.begin..range.last + diff)
147
149
  self[new_range] = text
@@ -0,0 +1,41 @@
1
+ #module Segment
2
+ #
3
+ # def self.set_tsv_fields(fields, segments)
4
+ # tsv_fields = []
5
+ # add_types = ! (fields.delete(:no_types) || fields.delete("no_types") || fields.include?(:JSON) || fields.include?("JSON"))
6
+ # literal = (fields.delete(:literal) || fields.delete("literal"))
7
+ # tsv_fields << "Start" << "End"
8
+ # tsv_fields << :annotation_types if add_types
9
+ # tsv_fields << :literal if literal
10
+ #
11
+ # if fields.any? and not (fields == [:all] or fields == ["all"])
12
+ # tsv_fields.concat fields
13
+ # else
14
+ # tsv_fields.concat segments.first.annotations if segments.any?
15
+ # end
16
+ # tsv_fields
17
+ # tsv_fields.collect!{|f| f.to_s}
18
+ # tsv_fields.delete "offset"
19
+ # tsv_fields
20
+ # end
21
+ #
22
+ # def self.tsv(segments, *fields)
23
+ # fields = set_tsv_fields fields, segments
24
+ # tsv = TSV.setup({}, :key_field => "ID", :fields => fields, :type => :double)
25
+ #
26
+ # segments.each do |segment|
27
+ # tsv[segment.segment_id] = self.tsv_values_for_segment(segment, fields)
28
+ # end
29
+ #
30
+ # tsv
31
+ # end
32
+ #
33
+ # def self.load_tsv(tsv)
34
+ # fields = tsv.fields
35
+ # tsv.with_unnamed do
36
+ # tsv.collect do |id, values|
37
+ # Annotated.load_tsv_values(id, values, fields)
38
+ # end
39
+ # end
40
+ # end
41
+ #end
@@ -12,7 +12,7 @@ pkg_dir="`opt_dir \"$name\"`"
12
12
  build_dir=`build_dir`
13
13
  mv "$build_dir" "$pkg_dir"
14
14
  tmp_file="~/.rbbt/tmp/species-proxy-properties.tmp"
15
- mkdir -p $(basename "$tmp_file")
15
+ mkdir -p $(dirname "$tmp_file")
16
16
  cat "$pkg_dir/species-proxy/properties.conf" |grep -v "^.dir =" >> $tmp_file
17
17
  echo "\$dir = $pkg_dir/species-proxy/" > "$pkg_dir/species-proxy/properties.conf"
18
18
  cat $tmp_file | grep -v "^#" >> "$pkg_dir/species-proxy/properties.conf"
@@ -1,7 +1,7 @@
1
1
  #!/bin/bash
2
2
 
3
3
  name='OpenNLP'
4
- url="http://apache.rediris.es/opennlp/opennlp-1.9.1/apache-opennlp-1.9.1-bin.tar.gz"
4
+ url="http://apache.rediris.es/opennlp/opennlp-1.9.2/apache-opennlp-1.9.2-bin.tar.gz"
5
5
 
6
6
  get_src "$name" "$url"
7
7
  move_opt "$name"
@@ -0,0 +1,15 @@
1
+ require File.join(File.expand_path(File.dirname(__FILE__)), '../../..', 'test_helper.rb')
2
+ require 'rbbt/document'
3
+ require 'rbbt/document/corpus'
4
+ require 'rbbt/document/corpus/pubmed'
5
+
6
+ class TestCorpusPubmed < Test::Unit::TestCase
7
+ def test_add_pmid
8
+ corpus = Document::Corpus.setup({})
9
+
10
+ document = corpus.add_pmid("32299157", :abstract).first
11
+ title = document.to(:title)
12
+ assert title.include?("COVID-19")
13
+ end
14
+ end
15
+
@@ -0,0 +1,140 @@
1
+ require File.join(File.expand_path(File.dirname(__FILE__)), '../..', 'test_helper.rb')
2
+ require 'rbbt/document'
3
+ require 'rbbt/document/corpus'
4
+ require 'rbbt/segment'
5
+ require 'rbbt/document/annotation'
6
+ require 'rbbt/segment/named_entity'
7
+
8
+ class TestAnnotation < Test::Unit::TestCase
9
+ class CalledOnce < Exception; end
10
+ def setup
11
+ Document.define :words do
12
+ self.split(" ")
13
+ end
14
+
15
+ $called_once = false
16
+ Document.define :persisted_words do
17
+ raise CalledOnce if $called_once
18
+ $called_once = true
19
+ self.split(" ")
20
+ end
21
+
22
+ Document.define_multiple :multiple_words do |list|
23
+ list.collect{|doc| doc.words}
24
+ end
25
+
26
+ Document.define :ner do
27
+ $called_once = true
28
+ self.split(" ").collect{|e| NamedEntity.setup(e, :code => Misc.digest(e)) }
29
+ end
30
+
31
+ Document.persist :ner
32
+ end
33
+
34
+ def test_define
35
+ text = "This sentence mentions the TP53 gene and the CDK5R1 protein"
36
+ Document.setup(text, "TEST", "test_doc1", nil)
37
+
38
+ corpus = {}
39
+ Document::Corpus.setup corpus
40
+
41
+ corpus.add_document(text)
42
+
43
+ assert_equal text[text.words[1].range], text.words[1]
44
+ end
45
+
46
+ def test_define_multiple
47
+ text1 = "This sentence mentions the TP53 gene and the CDK5R1 protein"
48
+ text2 = "This is another sentence"
49
+ Document.setup(text1, "TEST", "test_doc1", nil)
50
+ Document.setup(text2, "TEST", "test_doc2", nil)
51
+
52
+ corpus = {}
53
+ Document::Corpus.setup corpus
54
+
55
+ corpus.add_document(text1)
56
+ corpus.add_document(text2)
57
+
58
+ assert_equal 2, Document.setup([text1, text2]).multiple_words.length
59
+ assert_equal text1.split(" "), text1.multiple_words
60
+
61
+ #Document.persist :multiple_words, :annotations, :annotation_repo => Rbbt.tmp.test.multiple_words
62
+ #assert_equal 2, Document.setup([text1, text2]).multiple_words.length
63
+ #assert_equal text1.split(" "), text1.multiple_words
64
+ end
65
+
66
+ def test_persist
67
+ text = "This sentence mentions the TP53 gene and the CDK5R1 protein"
68
+ Document.setup(text, "TEST", "test_doc1", nil)
69
+
70
+ corpus = {}
71
+ Document::Corpus.setup corpus
72
+
73
+ corpus.add_document(text)
74
+
75
+ assert_equal "persisted_words", text.persisted_words.first.type
76
+
77
+ assert_raise CalledOnce do
78
+ assert_equal text[text.persisted_words[1].range], text.persisted_words[1]
79
+ end
80
+
81
+ Log.severity = 0
82
+ Document.persist :persisted_words, :annotations, :file => Rbbt.tmp.test.persisted_words.find(:user)
83
+
84
+ $called_once = false
85
+ text.persisted_words
86
+ assert $called_once
87
+
88
+ assert_nothing_raised do
89
+ assert_equal text[text.persisted_words[1].range], text.persisted_words[1]
90
+ end
91
+ end
92
+
93
+ def test_persist_annotation_repo
94
+ text = "This sentence mentions the TP53 gene and the CDK5R1 protein"
95
+ Document.setup(text, "TEST", "test_doc1", nil)
96
+
97
+ corpus = {}
98
+ Document::Corpus.setup corpus
99
+
100
+ corpus.add_document(text)
101
+
102
+ assert_equal "persisted_words", text.persisted_words.first.type
103
+
104
+ assert_raise CalledOnce do
105
+ assert_equal text[text.persisted_words[1].range], text.persisted_words[1]
106
+ end
107
+
108
+ Log.severity = 0
109
+ Document.persist :persisted_words, :annotations, :annotation_repo => Rbbt.tmp.test.persisted_words_repo.find(:user)
110
+
111
+ $called_once = false
112
+ text.persisted_words
113
+ assert $called_once
114
+
115
+ assert_nothing_raised do
116
+ assert_equal text[text.persisted_words[1].range], text.persisted_words[1]
117
+ end
118
+ end
119
+
120
+ def test_persist_ner
121
+ text = "This sentence mentions the TP53 gene and the CDK5R1 protein"
122
+ Document.setup(text, "TEST", "test_doc1", nil)
123
+
124
+ corpus = {}
125
+ Document::Corpus.setup corpus
126
+
127
+ corpus.add_document(text)
128
+
129
+
130
+ text.ner
131
+
132
+ $called_once = false
133
+ text.ner
134
+
135
+ assert ! $called_once
136
+
137
+ assert text.ner.first.segid.include?("TEST:")
138
+ end
139
+ end
140
+