rbbt-text 1.1.9 → 1.3.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (76) hide show
  1. checksums.yaml +4 -4
  2. data/lib/rbbt/bow/bow.rb +5 -2
  3. data/lib/rbbt/bow/dictionary.rb +27 -23
  4. data/lib/rbbt/document.rb +56 -0
  5. data/lib/rbbt/document/annotation.rb +45 -0
  6. data/lib/rbbt/document/corpus.rb +61 -0
  7. data/lib/rbbt/document/corpus/pubmed.rb +33 -0
  8. data/lib/rbbt/ner/NER.rb +3 -3
  9. data/lib/rbbt/ner/abner.rb +1 -1
  10. data/lib/rbbt/ner/banner.rb +1 -1
  11. data/lib/rbbt/ner/brat.rb +1 -1
  12. data/lib/rbbt/ner/chemical_tagger.rb +1 -2
  13. data/lib/rbbt/ner/g_norm_plus.rb +42 -12
  14. data/lib/rbbt/ner/linnaeus.rb +3 -3
  15. data/lib/rbbt/ner/ngram_prefix_dictionary.rb +3 -3
  16. data/lib/rbbt/ner/oscar3.rb +1 -2
  17. data/lib/rbbt/ner/oscar4.rb +3 -3
  18. data/lib/rbbt/ner/patterns.rb +5 -5
  19. data/lib/rbbt/ner/regexpNER.rb +1 -2
  20. data/lib/rbbt/ner/token_trieNER.rb +35 -22
  21. data/lib/rbbt/nlp/genia/sentence_splitter.rb +3 -2
  22. data/lib/rbbt/nlp/nlp.rb +5 -5
  23. data/lib/rbbt/nlp/open_nlp/sentence_splitter.rb +37 -36
  24. data/lib/rbbt/nlp/spaCy.rb +52 -0
  25. data/lib/rbbt/segment.rb +179 -0
  26. data/lib/rbbt/segment/annotation.rb +58 -0
  27. data/lib/rbbt/segment/encoding.rb +18 -0
  28. data/lib/rbbt/{text/segment → segment}/named_entity.rb +11 -10
  29. data/lib/rbbt/segment/overlaps.rb +63 -0
  30. data/lib/rbbt/segment/range_index.rb +35 -0
  31. data/lib/rbbt/segment/relationship.rb +7 -0
  32. data/lib/rbbt/{text/segment → segment}/segmented.rb +1 -1
  33. data/lib/rbbt/segment/token.rb +23 -0
  34. data/lib/rbbt/{text/segment → segment}/transformed.rb +10 -8
  35. data/lib/rbbt/segment/tsv.rb +41 -0
  36. data/share/install/software/Linnaeus +1 -1
  37. data/share/install/software/OpenNLP +1 -1
  38. data/test/rbbt/document/corpus/test_pubmed.rb +15 -0
  39. data/test/rbbt/document/test_annotation.rb +140 -0
  40. data/test/rbbt/document/test_corpus.rb +33 -0
  41. data/test/rbbt/ner/test_finder.rb +3 -3
  42. data/test/rbbt/ner/test_g_norm_plus.rb +20 -3
  43. data/test/rbbt/ner/test_patterns.rb +9 -9
  44. data/test/rbbt/ner/test_regexpNER.rb +14 -14
  45. data/test/rbbt/ner/test_rnorm.rb +3 -4
  46. data/test/rbbt/ner/test_token_trieNER.rb +1 -0
  47. data/test/rbbt/nlp/genia/test_sentence_splitter.rb +37 -3
  48. data/test/rbbt/nlp/open_nlp/test_sentence_splitter.rb +20 -4
  49. data/test/rbbt/segment/test_annotation.rb +39 -0
  50. data/test/rbbt/segment/test_corpus.rb +36 -0
  51. data/test/rbbt/segment/test_encoding.rb +24 -0
  52. data/test/rbbt/{text/segment → segment}/test_named_entity.rb +15 -11
  53. data/test/rbbt/segment/test_overlaps.rb +69 -0
  54. data/test/rbbt/segment/test_range_index.rb +42 -0
  55. data/test/rbbt/{text/segment → segment}/test_transformed.rb +105 -51
  56. data/test/rbbt/test_document.rb +14 -0
  57. data/test/rbbt/test_segment.rb +182 -0
  58. data/test/test_helper.rb +5 -3
  59. data/test/test_spaCy.rb +32 -0
  60. metadata +44 -32
  61. data/lib/rbbt/text/corpus.rb +0 -106
  62. data/lib/rbbt/text/corpus/document.rb +0 -361
  63. data/lib/rbbt/text/corpus/document_repo.rb +0 -68
  64. data/lib/rbbt/text/corpus/sources/pmid.rb +0 -34
  65. data/lib/rbbt/text/document.rb +0 -39
  66. data/lib/rbbt/text/segment.rb +0 -355
  67. data/lib/rbbt/text/segment/docid.rb +0 -46
  68. data/lib/rbbt/text/segment/relationship.rb +0 -24
  69. data/lib/rbbt/text/segment/token.rb +0 -49
  70. data/test/rbbt/text/corpus/sources/test_pmid.rb +0 -33
  71. data/test/rbbt/text/corpus/test_document.rb +0 -52
  72. data/test/rbbt/text/segment/test_relationship.rb +0 -0
  73. data/test/rbbt/text/segment/test_segmented.rb +0 -23
  74. data/test/rbbt/text/test_corpus.rb +0 -34
  75. data/test/rbbt/text/test_document.rb +0 -58
  76. data/test/rbbt/text/test_segment.rb +0 -100
@@ -1,6 +1,3 @@
1
- require 'rbbt/util/misc'
2
- require 'rbbt/text/segment'
3
-
4
1
  module Transformed
5
2
 
6
3
  def self.transform(text, segments, replacement = nil, &block)
@@ -71,6 +68,10 @@ module Transformed
71
68
 
72
69
  segments = [segments] unless Array === segments
73
70
  orig_length = self.length
71
+
72
+ offset = self.respond_to?(:offset) ? self.offset.to_i : 0
73
+ segments = segments.select{|s| s.offset.to_i >= offset && s.offset.to_i <= offset + self.length - 1 }
74
+
74
75
  Segment.clean_sort(segments).each do |segment|
75
76
  next if segment.offset.nil?
76
77
 
@@ -89,7 +90,7 @@ module Transformed
89
90
 
90
91
  updated_text = self[updated_begin..updated_end]
91
92
  if updated_text.nil?
92
- Log.warn "Range outside of segment: #{self.length} #{segment.locus} (#{updated_range})"
93
+ Log.warn "Range outside of segment: #{self.length} #{segment.range} (#{updated_range})"
93
94
  next
94
95
  end
95
96
 
@@ -122,13 +123,13 @@ module Transformed
122
123
  def fix_segment(segment, range, diff)
123
124
  case
124
125
  # Before
125
- when segment.end < range.begin
126
+ when segment.eend < range.begin
126
127
  # After
127
128
  when segment.offset.to_i > range.end + diff
128
129
  segment.offset = segment.offset.to_i - diff
129
130
  # Includes
130
- when (segment.offset.to_i <= range.begin and segment.end >= range.end + diff)
131
- segment.replace self[segment.offset.to_i..segment.end - diff]
131
+ when (segment.offset.to_i <= range.begin and segment.eend >= range.end + diff)
132
+ segment.replace self[segment.offset.to_i..segment.eend - diff]
132
133
  else
133
134
  raise "Segment Overlaps"
134
135
  end
@@ -141,7 +142,8 @@ module Transformed
141
142
 
142
143
  if first_only
143
144
  @transformation_stack.pop.reverse.each do |id|
144
- orig_range, diff, text, range = @transformed_segments.delete id
145
+ segment_info = @transformed_segments.delete id
146
+ orig_range, diff, text, range = segment_info
145
147
 
146
148
  new_range = (range.begin..range.last + diff)
147
149
  self[new_range] = text
@@ -0,0 +1,41 @@
1
+ #module Segment
2
+ #
3
+ # def self.set_tsv_fields(fields, segments)
4
+ # tsv_fields = []
5
+ # add_types = ! (fields.delete(:no_types) || fields.delete("no_types") || fields.include?(:JSON) || fields.include?("JSON"))
6
+ # literal = (fields.delete(:literal) || fields.delete("literal"))
7
+ # tsv_fields << "Start" << "End"
8
+ # tsv_fields << :annotation_types if add_types
9
+ # tsv_fields << :literal if literal
10
+ #
11
+ # if fields.any? and not (fields == [:all] or fields == ["all"])
12
+ # tsv_fields.concat fields
13
+ # else
14
+ # tsv_fields.concat segments.first.annotations if segments.any?
15
+ # end
16
+ # tsv_fields
17
+ # tsv_fields.collect!{|f| f.to_s}
18
+ # tsv_fields.delete "offset"
19
+ # tsv_fields
20
+ # end
21
+ #
22
+ # def self.tsv(segments, *fields)
23
+ # fields = set_tsv_fields fields, segments
24
+ # tsv = TSV.setup({}, :key_field => "ID", :fields => fields, :type => :double)
25
+ #
26
+ # segments.each do |segment|
27
+ # tsv[segment.segment_id] = self.tsv_values_for_segment(segment, fields)
28
+ # end
29
+ #
30
+ # tsv
31
+ # end
32
+ #
33
+ # def self.load_tsv(tsv)
34
+ # fields = tsv.fields
35
+ # tsv.with_unnamed do
36
+ # tsv.collect do |id, values|
37
+ # Annotated.load_tsv_values(id, values, fields)
38
+ # end
39
+ # end
40
+ # end
41
+ #end
@@ -12,7 +12,7 @@ pkg_dir="`opt_dir \"$name\"`"
12
12
  build_dir=`build_dir`
13
13
  mv "$build_dir" "$pkg_dir"
14
14
  tmp_file="~/.rbbt/tmp/species-proxy-properties.tmp"
15
- mkdir -p $(basename "$tmp_file")
15
+ mkdir -p $(dirname "$tmp_file")
16
16
  cat "$pkg_dir/species-proxy/properties.conf" |grep -v "^.dir =" >> $tmp_file
17
17
  echo "\$dir = $pkg_dir/species-proxy/" > "$pkg_dir/species-proxy/properties.conf"
18
18
  cat $tmp_file | grep -v "^#" >> "$pkg_dir/species-proxy/properties.conf"
@@ -1,7 +1,7 @@
1
1
  #!/bin/bash
2
2
 
3
3
  name='OpenNLP'
4
- url="http://apache.rediris.es/opennlp/opennlp-1.9.1/apache-opennlp-1.9.1-bin.tar.gz"
4
+ url="http://apache.rediris.es/opennlp/opennlp-1.9.2/apache-opennlp-1.9.2-bin.tar.gz"
5
5
 
6
6
  get_src "$name" "$url"
7
7
  move_opt "$name"
@@ -0,0 +1,15 @@
1
+ require File.join(File.expand_path(File.dirname(__FILE__)), '../../..', 'test_helper.rb')
2
+ require 'rbbt/document'
3
+ require 'rbbt/document/corpus'
4
+ require 'rbbt/document/corpus/pubmed'
5
+
6
+ class TestCorpusPubmed < Test::Unit::TestCase
7
+ def test_add_pmid
8
+ corpus = Document::Corpus.setup({})
9
+
10
+ document = corpus.add_pmid("32299157", :abstract).first
11
+ title = document.to(:title)
12
+ assert title.include?("COVID-19")
13
+ end
14
+ end
15
+
@@ -0,0 +1,140 @@
1
+ require File.join(File.expand_path(File.dirname(__FILE__)), '../..', 'test_helper.rb')
2
+ require 'rbbt/document'
3
+ require 'rbbt/document/corpus'
4
+ require 'rbbt/segment'
5
+ require 'rbbt/document/annotation'
6
+ require 'rbbt/segment/named_entity'
7
+
8
+ class TestAnnotation < Test::Unit::TestCase
9
+ class CalledOnce < Exception; end
10
+ def setup
11
+ Document.define :words do
12
+ self.split(" ")
13
+ end
14
+
15
+ $called_once = false
16
+ Document.define :persisted_words do
17
+ raise CalledOnce if $called_once
18
+ $called_once = true
19
+ self.split(" ")
20
+ end
21
+
22
+ Document.define_multiple :multiple_words do |list|
23
+ list.collect{|doc| doc.words}
24
+ end
25
+
26
+ Document.define :ner do
27
+ $called_once = true
28
+ self.split(" ").collect{|e| NamedEntity.setup(e, :code => Misc.digest(e)) }
29
+ end
30
+
31
+ Document.persist :ner
32
+ end
33
+
34
+ def test_define
35
+ text = "This sentence mentions the TP53 gene and the CDK5R1 protein"
36
+ Document.setup(text, "TEST", "test_doc1", nil)
37
+
38
+ corpus = {}
39
+ Document::Corpus.setup corpus
40
+
41
+ corpus.add_document(text)
42
+
43
+ assert_equal text[text.words[1].range], text.words[1]
44
+ end
45
+
46
+ def test_define_multiple
47
+ text1 = "This sentence mentions the TP53 gene and the CDK5R1 protein"
48
+ text2 = "This is another sentence"
49
+ Document.setup(text1, "TEST", "test_doc1", nil)
50
+ Document.setup(text2, "TEST", "test_doc2", nil)
51
+
52
+ corpus = {}
53
+ Document::Corpus.setup corpus
54
+
55
+ corpus.add_document(text1)
56
+ corpus.add_document(text2)
57
+
58
+ assert_equal 2, Document.setup([text1, text2]).multiple_words.length
59
+ assert_equal text1.split(" "), text1.multiple_words
60
+
61
+ #Document.persist :multiple_words, :annotations, :annotation_repo => Rbbt.tmp.test.multiple_words
62
+ #assert_equal 2, Document.setup([text1, text2]).multiple_words.length
63
+ #assert_equal text1.split(" "), text1.multiple_words
64
+ end
65
+
66
+ def test_persist
67
+ text = "This sentence mentions the TP53 gene and the CDK5R1 protein"
68
+ Document.setup(text, "TEST", "test_doc1", nil)
69
+
70
+ corpus = {}
71
+ Document::Corpus.setup corpus
72
+
73
+ corpus.add_document(text)
74
+
75
+ assert_equal "persisted_words", text.persisted_words.first.type
76
+
77
+ assert_raise CalledOnce do
78
+ assert_equal text[text.persisted_words[1].range], text.persisted_words[1]
79
+ end
80
+
81
+ Log.severity = 0
82
+ Document.persist :persisted_words, :annotations, :file => Rbbt.tmp.test.persisted_words.find(:user)
83
+
84
+ $called_once = false
85
+ text.persisted_words
86
+ assert $called_once
87
+
88
+ assert_nothing_raised do
89
+ assert_equal text[text.persisted_words[1].range], text.persisted_words[1]
90
+ end
91
+ end
92
+
93
+ def test_persist_annotation_repo
94
+ text = "This sentence mentions the TP53 gene and the CDK5R1 protein"
95
+ Document.setup(text, "TEST", "test_doc1", nil)
96
+
97
+ corpus = {}
98
+ Document::Corpus.setup corpus
99
+
100
+ corpus.add_document(text)
101
+
102
+ assert_equal "persisted_words", text.persisted_words.first.type
103
+
104
+ assert_raise CalledOnce do
105
+ assert_equal text[text.persisted_words[1].range], text.persisted_words[1]
106
+ end
107
+
108
+ Log.severity = 0
109
+ Document.persist :persisted_words, :annotations, :annotation_repo => Rbbt.tmp.test.persisted_words_repo.find(:user)
110
+
111
+ $called_once = false
112
+ text.persisted_words
113
+ assert $called_once
114
+
115
+ assert_nothing_raised do
116
+ assert_equal text[text.persisted_words[1].range], text.persisted_words[1]
117
+ end
118
+ end
119
+
120
+ def test_persist_ner
121
+ text = "This sentence mentions the TP53 gene and the CDK5R1 protein"
122
+ Document.setup(text, "TEST", "test_doc1", nil)
123
+
124
+ corpus = {}
125
+ Document::Corpus.setup corpus
126
+
127
+ corpus.add_document(text)
128
+
129
+
130
+ text.ner
131
+
132
+ $called_once = false
133
+ text.ner
134
+
135
+ assert ! $called_once
136
+
137
+ assert text.ner.first.segid.include?("TEST:")
138
+ end
139
+ end
140
+
@@ -0,0 +1,33 @@
1
+ require File.join(File.expand_path(File.dirname(__FILE__)), '../..', 'test_helper.rb')
2
+ require 'rbbt/document'
3
+ require 'rbbt/document/corpus'
4
+
5
+ class TestDocumentCorpus < Test::Unit::TestCase
6
+ def test_corpus
7
+ text = "This is a document"
8
+ Document.setup(text, "TEST", "test_doc1", nil)
9
+
10
+ corpus = Document::Corpus.setup({})
11
+
12
+ corpus.add_document(text)
13
+
14
+ docid = text.docid(corpus)
15
+
16
+ assert_equal docid.document, text
17
+ end
18
+
19
+ def test_find
20
+ text = "This is a document"
21
+ Document.setup(text, "TEST", "test_doc1", nil)
22
+
23
+ TmpFile.with_file do |path|
24
+ corpus = Persist.open_tokyocabinet(path, true, :single, "BDB")
25
+ corpus.extend Document::Corpus
26
+
27
+ corpus.add_document(text)
28
+
29
+ assert corpus.docids("TEST:").include?(text.docid)
30
+ end
31
+ end
32
+ end
33
+
@@ -8,13 +8,13 @@ require 'rbbt/sources/NCI'
8
8
 
9
9
  class TestFinder < Test::Unit::TestCase
10
10
 
11
- def test_namespace_and_format
11
+ def _test_namespace_and_format
12
12
  f = Finder.new(CMD.cmd("head -n 1000", :in => Open.open(Organism.identifiers(Organism.default_code("Hsa")).produce.find)))
13
13
  assert_equal Organism.default_code("Hsa"), f.instances.first.namespace
14
14
  assert_equal "Ensembl Gene ID", f.instances.first.format
15
15
  end
16
16
 
17
- def test_find
17
+ def _test_find
18
18
  f = Finder.new(Organism.lexicon(Organism.default_code("Hsa")), :grep => ["SF3B1"])
19
19
 
20
20
  assert_equal "ENSG00000115524", f.find("SF3B1").first
@@ -23,7 +23,7 @@ class TestFinder < Test::Unit::TestCase
23
23
  end
24
24
  end
25
25
 
26
- def test_find2
26
+ def _test_find2
27
27
  f = Finder.new(Organism.lexicon(Organism.default_code("Hsa")), :grep => ["RASGRF2"])
28
28
 
29
29
  m = f.find("RAS").first
@@ -5,12 +5,29 @@ Log.severity = 0
5
5
  class TestGNormPlus < Test::Unit::TestCase
6
6
  def test_match
7
7
  text =<<-EOF
8
- We found that TP53 is regulated by MDM2 in Homo sapiens
9
- EOF
10
8
 
9
+ Introduction
10
+
11
+ We found that TP53 is regulated by MDM2 in Homo
12
+ sapiens
13
+ EOF
11
14
 
12
15
  mentions = GNormPlus.process({:file => text})
13
- Log.tsv mentions
16
+
17
+ assert_equal 1, mentions.length
18
+ assert_equal 3, mentions["file"].length
19
+ end
20
+
21
+ def test_entities
22
+ text =<<-EOF
23
+ We found that TP53 is regulated by MDM2 in Homo sapiens
24
+ EOF
25
+
26
+ mentions = GNormPlus.entities({:file => text})
27
+ assert mentions["file"].include?("TP53")
28
+ mentions["file"].each do |mention|
29
+ assert_equal mention, text[mention.range].sub("\n", ' ')
30
+ end
14
31
  end
15
32
  end
16
33
 
@@ -2,17 +2,17 @@ require File.join(File.expand_path(File.dirname(__FILE__)), '../../test_helper.r
2
2
  require 'rbbt/ner/patterns'
3
3
 
4
4
  class TestPatternRelExt < Test::Unit::TestCase
5
- def test_simple_pattern
5
+ def _test_simple_pattern
6
6
  text = "Experiments have shown that TP53 interacts with CDK5 under certain conditions"
7
7
 
8
8
  gene1 = "TP53"
9
- NamedEntity.setup(gene1, text.index(gene1), "Gene")
9
+ NamedEntity.setup(gene1, :offset => text.index(gene1), :entity_type => "Gene")
10
10
 
11
11
  gene2 = "CDK5"
12
- NamedEntity.setup(gene2, text.index(gene2), "Gene")
12
+ NamedEntity.setup(gene2, :offset => text.index(gene2), :entity_type => "Gene")
13
13
 
14
14
  interaction = "interacts"
15
- NamedEntity.setup(interaction, text.index(interaction), "Interaction")
15
+ NamedEntity.setup(interaction, :offset => text.index(interaction), :entity_type => "Interaction")
16
16
 
17
17
  Segmented.setup(text, [gene1, gene2, interaction])
18
18
 
@@ -23,13 +23,13 @@ class TestPatternRelExt < Test::Unit::TestCase
23
23
  text = "Experiments have shown that TP53 found in cultivated cells interacts with CDK5 under certain conditions"
24
24
 
25
25
  gene1 = "TP53"
26
- NamedEntity.setup(gene1, text.index(gene1), "Gene")
26
+ NamedEntity.setup(gene1, :offset => text.index(gene1), :entity_type => "Gene")
27
27
 
28
28
  gene2 = "CDK5"
29
- NamedEntity.setup(gene2, text.index(gene2), "Gene")
29
+ NamedEntity.setup(gene2, :offset => text.index(gene2), :entity_type => "Gene")
30
30
 
31
31
  interaction = "interacts"
32
- NamedEntity.setup(interaction, text.index(interaction), "Interaction")
32
+ NamedEntity.setup(interaction, :offset => text.index(interaction), :entity_type => "Interaction")
33
33
 
34
34
  Segmented.setup(text, {:entities => [gene1, gene2, interaction]})
35
35
 
@@ -40,7 +40,7 @@ class TestPatternRelExt < Test::Unit::TestCase
40
40
  PatternRelExt.new(["NP[entity:Gene] VP[stem:interacts] with NP[entity:Gene]"]).match_sentences([text]).first.first
41
41
  end
42
42
 
43
- def test_chunk_pattern
43
+ def _test_chunk_pattern
44
44
  text = "There is a concern with the use of thiazolidinediones in patients with an increased risk of colon cancer (e.g., familial colon polyposis)."
45
45
 
46
46
  drug = "thiazolidinediones"
@@ -57,7 +57,7 @@ class TestPatternRelExt < Test::Unit::TestCase
57
57
  end
58
58
 
59
59
 
60
- def test_entities_with_spaces
60
+ def _test_entities_with_spaces
61
61
  PatternRelExt.new("NP[entity:Gene Name]").token_trie
62
62
  end
63
63
 
@@ -23,9 +23,9 @@ class TestRegExpNER < Test::Unit::TestCase
23
23
  matches = RegExpNER.match_regexp_hash(sentence, regexp_hash)
24
24
 
25
25
  assert_equal ["this", "this", "that"].sort, matches.sort
26
- assert_equal "In ".length, matches.select{|m| m.type == :this}[0].offset
27
- assert_equal "In this sentence I should find ".length, matches.select{|m| m.type == :this}[1].offset
28
- assert_equal :this, matches.select{|m| m.type == :this}[0].type
26
+ assert_equal "In ".length, matches.select{|m| m.entity_type == :this}[0].offset
27
+ assert_equal "In this sentence I should find ".length, matches.select{|m| m.entity_type == :this}[1].offset
28
+ assert_equal :this, matches.select{|m| m.entity_type == :this}[0].entity_type
29
29
  end
30
30
 
31
31
  def test_define_regexps
@@ -39,9 +39,9 @@ class TestRegExpNER < Test::Unit::TestCase
39
39
 
40
40
  matches = ner.entities(sentence)
41
41
  assert_equal ["this", "this", "that"].sort, matches.sort
42
- assert_equal "In ".length, matches.select{|m| m.type == :this }[0].offset
43
- assert_equal "In this sentence I should find ".length, matches.select{|m| m.type == :this }[1].offset
44
- assert_equal :this, matches.select{|m| m.type == :this }[0].type
42
+ assert_equal "In ".length, matches.select{|m| m.entity_type == :this }[0].offset
43
+ assert_equal "In this sentence I should find ".length, matches.select{|m| m.entity_type == :this }[1].offset
44
+ assert_equal :this, matches.select{|m| m.entity_type == :this }[0].entity_type
45
45
  end
46
46
 
47
47
 
@@ -51,9 +51,9 @@ class TestRegExpNER < Test::Unit::TestCase
51
51
  ner = RegExpNER.new({:this => /this/, :that => /that/})
52
52
  matches = ner.entities(sentence)
53
53
  assert_equal ["this", "this", "that"].sort, matches.sort
54
- assert_equal "In ".length, matches.select{|m| m.type == :this}[0].offset
55
- assert_equal "In this sentence I should find ".length, matches.select{|m| m.type == :this}[1].offset
56
- assert_equal :this, matches.select{|m| m.type == :this}[0].type
54
+ assert_equal "In ".length, matches.select{|m| m.entity_type == :this}[0].offset
55
+ assert_equal "In this sentence I should find ".length, matches.select{|m| m.entity_type == :this}[1].offset
56
+ assert_equal :this, matches.select{|m| m.entity_type == :this}[0].entity_type
57
57
 
58
58
  Segmented.setup(sentence)
59
59
  ner_this = RegExpNER.new({:this => /this/})
@@ -64,9 +64,9 @@ class TestRegExpNER < Test::Unit::TestCase
64
64
  matches = sentence.segments
65
65
 
66
66
  assert_equal ["this", "this", "that"].sort, matches.sort
67
- assert_equal "In ".length, matches.select{|m| m.type == :this}[0].offset
68
- assert_equal "In this sentence I should find ".length, matches.select{|m| m.type == :this}[1].offset
69
- assert_equal :this, matches.select{|m| m.type == :this}[0].type
67
+ assert_equal "In ".length, matches.select{|m| m.entity_type == :this}[0].offset
68
+ assert_equal "In this sentence I should find ".length, matches.select{|m| m.entity_type == :this}[1].offset
69
+ assert_equal :this, matches.select{|m| m.entity_type == :this}[0].entity_type
70
70
  end
71
71
 
72
72
  def test_entities_captures
@@ -75,8 +75,8 @@ class TestRegExpNER < Test::Unit::TestCase
75
75
  ner = RegExpNER.new({:this => /this/, :that => /that/, :should => /I (should)/})
76
76
  matches = ner.entities(sentence)
77
77
  assert_equal ["this", "this", "that", "should"].sort, matches.sort
78
- assert_equal "In this sentence I ".length, matches.select{|m| m.type == :should}[0].offset
79
- assert_equal :should, matches.select{|m| m.type == :should}[0].type
78
+ assert_equal "In this sentence I ".length, matches.select{|m| m.entity_type == :should}[0].offset
79
+ assert_equal :should, matches.select{|m| m.entity_type == :should}[0].entity_type
80
80
  end
81
81
 
82
82