rbbt-text 0.5.0 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. data/lib/rbbt/corpus/corpus.rb +15 -6
  2. data/lib/rbbt/corpus/document.rb +100 -127
  3. data/lib/rbbt/corpus/document_repo.rb +72 -51
  4. data/lib/rbbt/ner/NER.rb +4 -4
  5. data/lib/rbbt/ner/abner.rb +5 -4
  6. data/lib/rbbt/ner/banner.rb +3 -3
  7. data/lib/rbbt/ner/chemical_tagger.rb +3 -3
  8. data/lib/rbbt/ner/ngram_prefix_dictionary.rb +45 -10
  9. data/lib/rbbt/ner/oscar3.rb +3 -3
  10. data/lib/rbbt/ner/oscar4.rb +3 -3
  11. data/lib/rbbt/ner/patterns.rb +15 -13
  12. data/lib/rbbt/ner/regexpNER.rb +3 -2
  13. data/lib/rbbt/ner/rnorm.rb +2 -2
  14. data/lib/rbbt/ner/rnorm/cue_index.rb +2 -2
  15. data/lib/rbbt/ner/{annotations.rb → segment.rb} +161 -109
  16. data/lib/rbbt/ner/{annotations → segment}/named_entity.rb +3 -11
  17. data/lib/rbbt/ner/segment/relationship.rb +20 -0
  18. data/lib/rbbt/ner/segment/segmented.rb +13 -0
  19. data/lib/rbbt/ner/segment/token.rb +24 -0
  20. data/lib/rbbt/ner/{annotations → segment}/transformed.rb +10 -10
  21. data/lib/rbbt/ner/token_trieNER.rb +30 -22
  22. data/lib/rbbt/nlp/genia/sentence_splitter.rb +2 -1
  23. data/lib/rbbt/nlp/nlp.rb +23 -37
  24. data/test/rbbt/corpus/test_document.rb +39 -37
  25. data/test/rbbt/ner/segment/test_named_entity.rb +29 -0
  26. data/test/rbbt/ner/segment/test_segmented.rb +23 -0
  27. data/test/rbbt/ner/{annotations → segment}/test_transformed.rb +6 -6
  28. data/test/rbbt/ner/test_ngram_prefix_dictionary.rb +15 -1
  29. data/test/rbbt/ner/test_patterns.rb +11 -12
  30. data/test/rbbt/ner/test_regexpNER.rb +5 -4
  31. data/test/rbbt/ner/test_segment.rb +101 -0
  32. data/test/rbbt/ner/test_token_trieNER.rb +8 -9
  33. data/test/test_helper.rb +6 -6
  34. metadata +40 -22
  35. data/lib/rbbt/ner/annotations/annotated.rb +0 -15
  36. data/lib/rbbt/ner/annotations/relations.rb +0 -25
  37. data/lib/rbbt/ner/annotations/token.rb +0 -28
  38. data/test/rbbt/ner/annotations/test_named_entity.rb +0 -14
  39. data/test/rbbt/ner/test_annotations.rb +0 -70
@@ -0,0 +1,29 @@
1
+ require File.join(File.expand_path(File.dirname(__FILE__)), '../../..', 'test_helper.rb')
2
+ require 'rbbt/ner/segment'
3
+ require 'rbbt/ner/segment/named_entity'
4
+
5
+ class TestClass < Test::Unit::TestCase
6
+ def test_info
7
+ a = "test"
8
+ NamedEntity.setup a
9
+ assert(! a.info.keys.include?(:code))
10
+ a.code = 10
11
+ a.offset = 100
12
+ assert a.info.include? :code
13
+ assert a.info.include? :offset
14
+ end
15
+
16
+ def test_all_args
17
+ a = "test"
18
+ NamedEntity.setup a, 10, "TYPE", "CODE", "SCORE"
19
+ assert_equal 10, a.offset
20
+ end
21
+
22
+ def test_tsv
23
+ a = "test"
24
+ NamedEntity.setup a, 10, "TYPE", "CODE", "SCORE"
25
+ assert Segment.tsv([a]).fields.include? "code"
26
+ assert Segment.tsv([a], nil).fields.include? "code"
27
+ assert Segment.tsv([a], "literal").fields.include? "code"
28
+ end
29
+ end
@@ -0,0 +1,23 @@
1
+ require File.join(File.expand_path(File.dirname(__FILE__)), '../../..', 'test_helper.rb')
2
+ require 'rbbt/ner/segment/segmented'
3
+
4
+ class TestClass < Test::Unit::TestCase
5
+ def test_split
6
+ a = "This sentence mentions the TP53 gene and the CDK5R1 protein"
7
+
8
+ gene1 = "TP53"
9
+ gene1.extend Segment
10
+ gene1.offset = a.index gene1
11
+
12
+ gene2 = "CDK5R1"
13
+ gene2.extend Segment
14
+ gene2.offset = a.index gene2
15
+
16
+ gene3 = "TP53 gene"
17
+ gene3.extend Segment
18
+ gene3.offset = a.index gene3
19
+
20
+ Segmented.setup(a, [gene2, gene1, gene3])
21
+ assert_equal ["This sentence mentions the ", gene3, " and the ", gene2, " protein"], a.split_segments
22
+ end
23
+ end
@@ -1,6 +1,6 @@
1
1
  require File.join(File.expand_path(File.dirname(__FILE__)), '../../..', 'test_helper.rb')
2
- require 'rbbt/ner/annotations/transformed'
3
- require 'rbbt/ner/annotations/named_entity'
2
+ require 'rbbt/ner/segment/transformed'
3
+ require 'rbbt/ner/segment/named_entity'
4
4
 
5
5
  class TestClass < Test::Unit::TestCase
6
6
  def test_transform
@@ -8,11 +8,11 @@ class TestClass < Test::Unit::TestCase
8
8
  original = a.dup
9
9
 
10
10
  gene1 = "TP53"
11
- gene1.extend NamedEntity
11
+ gene1.extend Segment
12
12
  gene1.offset = a.index gene1
13
13
 
14
14
  gene2 = "CDK5"
15
- gene2.extend NamedEntity
15
+ gene2.extend Segment
16
16
  gene2.offset = a.index gene2
17
17
 
18
18
  assert_equal gene1, a[gene1.range]
@@ -30,7 +30,7 @@ class TestClass < Test::Unit::TestCase
30
30
 
31
31
 
32
32
  gene3 = "GN gene"
33
- gene3.extend NamedEntity
33
+ gene3.extend Segment
34
34
  gene3.offset = a.index gene3
35
35
 
36
36
  assert_equal gene3, a[gene3.range]
@@ -108,7 +108,7 @@ class TestClass < Test::Unit::TestCase
108
108
 
109
109
  def test_html_with_offset
110
110
  a = "This sentence mentions the TP53 gene and the CDK5R1 protein"
111
- Segment.annotate(a, 10)
111
+ Segment.setup(a, 10)
112
112
 
113
113
  gene1 = "TP53"
114
114
  gene1.extend NamedEntity
@@ -10,7 +10,21 @@ C2;11;22;3 3;bb
10
10
  EOF
11
11
 
12
12
  TmpFile.with_file(lexicon) do |file|
13
- index = NGramPrefixDictionary.new(TSV.new(file, :flat, :sep => ';'), "test")
13
+ index = NGramPrefixDictionary.new(TSV.open(file, :flat, :sep => ';'), "test")
14
+
15
+ matches = index.match(' asdfa dsf asdf aa asdfasdf ')
16
+ assert matches.select{|m| m.code.include? 'C1'}.any?
17
+ end
18
+ end
19
+
20
+ def test_stream
21
+ lexicon =<<-EOF
22
+ C1;aa;AA;bb b
23
+ C2;11;22;3 3;bb
24
+ EOF
25
+
26
+ TmpFile.with_file(lexicon.gsub(/;/,"\t")) do |file|
27
+ index = NGramPrefixDictionary.new(file, "test")
14
28
 
15
29
  matches = index.match(' asdfa dsf asdf aa asdfasdf ')
16
30
  assert matches.select{|m| m.code.include? 'C1'}.any?
@@ -6,33 +6,32 @@ class TestPatternRelExt < Test::Unit::TestCase
6
6
  text = "Experiments have shown that TP53 interacts with CDK5 under certain conditions"
7
7
 
8
8
  gene1 = "TP53"
9
- NamedEntity.annotate(gene1, text.index(gene1), "Gene")
9
+ NamedEntity.setup(gene1, text.index(gene1), "Gene")
10
10
 
11
11
  gene2 = "CDK5"
12
- NamedEntity.annotate(gene2, text.index(gene2), "Gene")
12
+ NamedEntity.setup(gene2, text.index(gene2), "Gene")
13
13
 
14
14
  interaction = "interacts"
15
- NamedEntity.annotate(interaction, text.index(interaction), "Interaction")
15
+ NamedEntity.setup(interaction, text.index(interaction), "Interaction")
16
16
 
17
- Annotated.annotate(text, [gene1, gene2, interaction])
17
+ Segmented.setup(text, [gene1, gene2, interaction])
18
18
 
19
19
  assert_equal "TP53 interacts with CDK5", PatternRelExt.simple_pattern(text, "GENE INTERACTION with GENE").first
20
-
21
20
  end
22
21
 
23
22
  def test_chunk_pattern
24
23
  text = "Experiments have shown that TP53 found in cultivated cells interacts with CDK5 under certain conditions"
25
24
 
26
25
  gene1 = "TP53"
27
- NamedEntity.annotate(gene1, text.index(gene1), "Gene")
26
+ NamedEntity.setup(gene1, text.index(gene1), "Gene")
28
27
 
29
28
  gene2 = "CDK5"
30
- NamedEntity.annotate(gene2, text.index(gene2), "Gene")
29
+ NamedEntity.setup(gene2, text.index(gene2), "Gene")
31
30
 
32
31
  interaction = "interacts"
33
- NamedEntity.annotate(interaction, text.index(interaction), "Interaction")
32
+ NamedEntity.setup(interaction, text.index(interaction), "Interaction")
34
33
 
35
- Annotated.annotate(text, {:entities => [gene1, gene2, interaction]})
34
+ Segmented.setup(text, {:entities => [gene1, gene2, interaction]})
36
35
 
37
36
  assert_equal "TP53 found in cultivated cells interacts with CDK5",
38
37
  PatternRelExt.new("NP[entity:Gene] VP[stem:interacts] with NP[entity:Gene]").match_sentences([text]).first.first
@@ -45,12 +44,12 @@ class TestPatternRelExt < Test::Unit::TestCase
45
44
  text = "There is a concern with the use of thiazolidinediones in patients with an increased risk of colon cancer (e.g., familial colon polyposis)."
46
45
 
47
46
  drug = "thiazolidinediones"
48
- NamedEntity.annotate(drug, text.index(drug), "Chemical Mention")
47
+ NamedEntity.setup(drug, text.index(drug), "Chemical Mention")
49
48
 
50
49
  disease = "colon cancer"
51
- NamedEntity.annotate(disease, text.index(disease), "disease")
50
+ NamedEntity.setup(disease, text.index(disease), "disease")
52
51
 
53
- Annotated.annotate(text, {:entitites => [drug, disease]})
52
+ Segmented.setup(text, {:entitites => [drug, disease]})
54
53
 
55
54
  assert_equal "thiazolidinediones in patients with an increased risk of colon cancer",
56
55
  PatternRelExt.new("NP[entity:Chemical Mention] NP[stem:risk] NP[entity:disease]").match_sentences([text]).first.first
@@ -55,12 +55,13 @@ class TestRegExpNER < Test::Unit::TestCase
55
55
  assert_equal "In this sentence I should find ".length, matches.select{|m| m.type == :this}[1].offset
56
56
  assert_equal :this, matches.select{|m| m.type == :this}[0].type
57
57
 
58
- Annotated.annotate(sentence)
58
+ Segmented.setup(sentence)
59
59
  ner_this = RegExpNER.new({:this => /this/})
60
60
  ner_that = RegExpNER.new({:that => /that/})
61
- sentence.annotations += ner_this.entities(sentence)
62
- sentence.annotations += ner_that.entities(sentence)
63
- matches = sentence.annotations
61
+ sentence.segments ||= []
62
+ sentence.segments += ner_this.entities(sentence)
63
+ sentence.segments += ner_that.entities(sentence)
64
+ matches = sentence.segments
64
65
 
65
66
  assert_equal ["this", "this", "that"].sort, matches.sort
66
67
  assert_equal "In ".length, matches.select{|m| m.type == :this}[0].offset
@@ -0,0 +1,101 @@
1
+ require File.join(File.expand_path(File.dirname(__FILE__)), '../..', 'test_helper.rb')
2
+ require 'rbbt/ner/segment'
3
+
4
+ class TestClass < Test::Unit::TestCase
5
+ def test_info
6
+ a = "test"
7
+ a.extend Segment
8
+ a.offset = 10
9
+ assert a.info.include? :offset
10
+ end
11
+
12
+ def test_sort
13
+ a = "This sentence mentions the TP53 gene and the CDK5R1 protein"
14
+
15
+ gene1 = "TP53"
16
+ gene1.extend Segment
17
+ gene1.offset = a.index gene1
18
+
19
+ gene2 = "CDK5R1"
20
+ gene2.extend Segment
21
+ gene2.offset = a.index gene2
22
+
23
+ assert_equal [gene1,gene2], Segment.sort([gene2,gene1])
24
+ end
25
+
26
+ def test_clean_sort
27
+ a = "This sentence mentions the TP53 gene and the CDK5R1 protein"
28
+
29
+ gene1 = "TP53"
30
+ gene1.extend Segment
31
+ gene1.offset = a.index gene1
32
+
33
+ gene2 = "CDK5R1"
34
+ gene2.extend Segment
35
+ gene2.offset = a.index gene2
36
+
37
+ gene3 = "TP53 gene"
38
+ gene3.extend Segment
39
+ gene3.offset = a.index gene3
40
+
41
+ assert_equal [gene3,gene2], Segment.clean_sort([gene2,gene1,gene3])
42
+ end
43
+
44
+ def test_split
45
+ a = "This sentence mentions the TP53 gene and the CDK5R1 protein"
46
+
47
+ gene1 = "TP53"
48
+ gene1.extend Segment
49
+ gene1.offset = a.index gene1
50
+
51
+ gene2 = "CDK5R1"
52
+ gene2.extend Segment
53
+ gene2.offset = a.index gene2
54
+
55
+ gene3 = "TP53 gene"
56
+ gene3.extend Segment
57
+ gene3.offset = a.index gene3
58
+
59
+ assert_equal ["This sentence mentions the ", gene3, " and the ", gene2, " protein"], Segment.split(a, [gene2,gene1,gene3])
60
+ end
61
+
62
+
63
+ def test_align
64
+ text =<<-EOF
65
+ Atypical teratoid/rhabdoid tumors (AT/RTs) are highly aggressive brain tumors of early childhood poorly responding to therapy.
66
+ EOF
67
+
68
+ parts = text.split(/\W/)
69
+ Segment.align(text, parts)
70
+
71
+ assert_equal "Atypical teratoid/".length, parts.select{|s| s == "rhabdoid"}.first.offset
72
+ end
73
+
74
+ def test_segment_index
75
+ a = "This sentence mentions the TP53 gene and the CDK5R1 protein"
76
+
77
+ gene1 = "TP53"
78
+ gene1.extend Segment
79
+ gene1.offset = a.index gene1
80
+
81
+ gene2 = "CDK5R1"
82
+ gene2.extend Segment
83
+ gene2.offset = a.index gene2
84
+
85
+ gene3 = "TP53 gene"
86
+ gene3.extend Segment
87
+ gene3.offset = a.index gene3
88
+
89
+ index = Segment.index([gene1, gene2, gene3])
90
+ assert_equal %w(CDK5R1), index[gene2.offset + 1]
91
+
92
+ TmpFile.with_file do |fwt|
93
+ index = Segment.index([gene1, gene2, gene3], fwt)
94
+ assert_equal %w(CDK5R1), index[gene2.offset + 1]
95
+ index = Segment.index([gene1, gene2, gene3], fwt)
96
+ assert_equal %w(CDK5R1), index[gene2.offset + 1]
97
+ end
98
+ end
99
+
100
+ end
101
+
@@ -30,7 +30,7 @@ C2;11;22;3 3;bb
30
30
 
31
31
  TmpFile.with_file(lexicon) do |file|
32
32
 
33
- index = TokenTrieNER.process({}, TSV.new(file, :flat, :sep => ';'))
33
+ index = TokenTrieNER.process({}, TSV.open(file, :flat, :sep => ';'))
34
34
 
35
35
  assert_equal ['AA', 'aa', 'bb', '11', '22', '3'].sort, index.keys.sort
36
36
  assert_equal [:END], index['aa'].keys
@@ -47,7 +47,7 @@ C2;11;22;3 3;bb
47
47
 
48
48
 
49
49
  TmpFile.with_file(lexicon) do |file|
50
- index = TokenTrieNER.process({}, TSV.new(file, :sep => ';', :type => :flat ))
50
+ index = TokenTrieNER.process({}, TSV.open(file, :sep => ';', :type => :flat ))
51
51
 
52
52
  assert TokenTrieNER.find(index, TokenTrieNER.tokenize('aa asdf').extend(TokenTrieNER::EnumeratedArray), false).first.collect{|c| c.code}.include? 'C1'
53
53
  assert_equal %w(aa), TokenTrieNER.find(index, TokenTrieNER.tokenize('aa asdf').extend(TokenTrieNER::EnumeratedArray), false).last
@@ -71,9 +71,8 @@ C2;11;22;3 3;bb
71
71
  EOF
72
72
 
73
73
  TmpFile.with_file(lexicon) do |file|
74
- index = TokenTrieNER.new("test", TSV.new(file, :flat, :sep => ';'))
74
+ index = TokenTrieNER.new("test", TSV.open(file, :flat, :sep => ';'))
75
75
 
76
- index.match(' asdfa dsf asdf aa asdfasdf ')
77
76
  assert index.match(' asdfa dsf asdf aa asdfasdf ').select{|m| m.code.include? 'C1'}.any?
78
77
  end
79
78
  end
@@ -81,18 +80,19 @@ C2;11;22;3 3;bb
81
80
  def test_slack
82
81
  lexicon =<<-EOF
83
82
  C1;aa;AA;bb cc cc b
84
- C2;11;22;3 3;bb
83
+ C2;11;22;3 3;bb;bbbb
85
84
  EOF
86
85
 
87
86
  TmpFile.with_file(lexicon) do |file|
88
87
  index = TokenTrieNER.new({})
89
88
  index.slack = Proc.new{|t| t =~ /^c*$/}
90
89
 
91
- index.merge TSV.new(file, :flat, :sep => ';')
90
+ index.merge TSV.open(file, :flat, :sep => ';')
92
91
 
93
92
  assert index.match(' aaaaa 3 cc 3').select{|m| m.code.include? 'C2'}.any?
94
93
  assert index.match(' bb cc b').select{|m| m.code.include? 'C1'}.any?
95
94
  assert index.match(' bb b').select{|m| m.code.include? 'C1'}.any?
95
+ assert index.match(' BBBB b').select{|m| m.code.include? 'C2'}.any?
96
96
  end
97
97
  end
98
98
 
@@ -106,7 +106,7 @@ C2;11;22;3 3;bb
106
106
  index = TokenTrieNER.new({})
107
107
  index.slack = Proc.new{|t| t =~ /^c*$/}
108
108
 
109
- index.merge TSV.new(file, :flat, :sep => ';')
109
+ index.merge TSV.open(file, :flat, :sep => ';')
110
110
 
111
111
  assert index.match(Token.tokenize('3 cc 3')).select{|m| m.code.include? 'C2'}.any?
112
112
  end
@@ -126,9 +126,8 @@ C2;11;22;3 3;bb
126
126
  EOF
127
127
 
128
128
  TmpFile.with_file(lexicon) do |file|
129
- index = TokenTrieNER.new("test", TSV.new(file, :flat, :sep => ';'), :persistence => true)
129
+ index = TokenTrieNER.new("test", TSV.open(file, :flat, :sep => ';'), :persistence => true)
130
130
 
131
- index.match(' asdfa dsf asdf aa asdfasdf ')
132
131
  assert index.match(' asdfa dsf asdf aa asdfasdf ').select{|m| m.code.include? 'C1'}.any?
133
132
  end
134
133
  end
data/test/test_helper.rb CHANGED
@@ -3,7 +3,7 @@ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
3
3
  $LOAD_PATH.unshift(File.dirname(__FILE__))
4
4
 
5
5
  require 'rbbt'
6
- require 'rbbt/util/persistence'
6
+ require 'rbbt/persist'
7
7
  require 'rbbt/util/tmpfile'
8
8
  require 'rbbt/util/log'
9
9
  require 'rbbt/corpus/document_repo'
@@ -15,15 +15,15 @@ class Test::Unit::TestCase
15
15
 
16
16
  def setup
17
17
  FileUtils.mkdir_p Rbbt.tmp.test.persistence.find(:user)
18
- Persistence.cachedir = Rbbt.tmp.test.persistence.find :user
18
+ Persist.cachedir = Rbbt.tmp.test.persistence.find :user
19
19
  end
20
20
 
21
21
  def teardown
22
22
  FileUtils.rm_rf Rbbt.tmp.test.find :user
23
- TCHash::CONNECTIONS.values.each do |c| c.close end
24
- TCHash::CONNECTIONS.clear
25
- DocumentRepo::CONNECTIONS.values.each do |c| c.close end
26
- DocumentRepo::CONNECTIONS.clear
23
+ Persist::TC_CONNECTIONS.values.each do |c| c.close end
24
+ Persist::TC_CONNECTIONS.clear
25
+ DocumentRepo::TC_CONNECTIONS.values.each do |c| c.close end
26
+ DocumentRepo::TC_CONNECTIONS.clear
27
27
  end
28
28
 
29
29
  end
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rbbt-text
3
3
  version: !ruby/object:Gem::Version
4
- hash: 11
5
- prerelease:
4
+ hash: 7
5
+ prerelease: false
6
6
  segments:
7
7
  - 0
8
- - 5
8
+ - 6
9
9
  - 0
10
- version: 0.5.0
10
+ version: 0.6.0
11
11
  platform: ruby
12
12
  authors:
13
13
  - Miguel Vazquez
@@ -15,7 +15,7 @@ autorequire:
15
15
  bindir: bin
16
16
  cert_chain: []
17
17
 
18
- date: 2011-07-05 00:00:00 +02:00
18
+ date: 2011-09-07 00:00:00 +02:00
19
19
  default_executable: get_ppis.rb
20
20
  dependencies:
21
21
  - !ruby/object:Gem::Dependency
@@ -26,10 +26,12 @@ dependencies:
26
26
  requirements:
27
27
  - - ">="
28
28
  - !ruby/object:Gem::Version
29
- hash: 3
29
+ hash: 63
30
30
  segments:
31
+ - 4
31
32
  - 0
32
- version: "0"
33
+ - 0
34
+ version: 4.0.0
33
35
  type: :runtime
34
36
  version_requirements: *id001
35
37
  - !ruby/object:Gem::Dependency
@@ -74,6 +76,20 @@ dependencies:
74
76
  version: "0"
75
77
  type: :runtime
76
78
  version_requirements: *id004
79
+ - !ruby/object:Gem::Dependency
80
+ name: rjb
81
+ prerelease: false
82
+ requirement: &id005 !ruby/object:Gem::Requirement
83
+ none: false
84
+ requirements:
85
+ - - ">="
86
+ - !ruby/object:Gem::Version
87
+ hash: 3
88
+ segments:
89
+ - 0
90
+ version: "0"
91
+ type: :runtime
92
+ version_requirements: *id005
77
93
  description: "Text mining tools: named entity recognition and normalization, document classification, bag-of-words, dictionaries, etc"
78
94
  email: miguel.vazquez@fdi.ucm.es
79
95
  executables:
@@ -92,12 +108,6 @@ files:
92
108
  - lib/rbbt/corpus/sources/pubmed.rb
93
109
  - lib/rbbt/ner/NER.rb
94
110
  - lib/rbbt/ner/abner.rb
95
- - lib/rbbt/ner/annotations.rb
96
- - lib/rbbt/ner/annotations/annotated.rb
97
- - lib/rbbt/ner/annotations/named_entity.rb
98
- - lib/rbbt/ner/annotations/relations.rb
99
- - lib/rbbt/ner/annotations/token.rb
100
- - lib/rbbt/ner/annotations/transformed.rb
101
111
  - lib/rbbt/ner/banner.rb
102
112
  - lib/rbbt/ner/chemical_tagger.rb
103
113
  - lib/rbbt/ner/ngram_prefix_dictionary.rb
@@ -108,6 +118,12 @@ files:
108
118
  - lib/rbbt/ner/rnorm.rb
109
119
  - lib/rbbt/ner/rnorm/cue_index.rb
110
120
  - lib/rbbt/ner/rnorm/tokens.rb
121
+ - lib/rbbt/ner/segment.rb
122
+ - lib/rbbt/ner/segment/named_entity.rb
123
+ - lib/rbbt/ner/segment/relationship.rb
124
+ - lib/rbbt/ner/segment/segmented.rb
125
+ - lib/rbbt/ner/segment/token.rb
126
+ - lib/rbbt/ner/segment/transformed.rb
111
127
  - lib/rbbt/ner/token_trieNER.rb
112
128
  - lib/rbbt/nlp/genia/sentence_splitter.rb
113
129
  - lib/rbbt/nlp/nlp.rb
@@ -130,12 +146,13 @@ files:
130
146
  - test/rbbt/ner/test_regexpNER.rb
131
147
  - test/rbbt/ner/test_abner.rb
132
148
  - test/rbbt/ner/test_banner.rb
133
- - test/rbbt/ner/annotations/test_transformed.rb
134
- - test/rbbt/ner/annotations/test_named_entity.rb
149
+ - test/rbbt/ner/test_NER.rb
135
150
  - test/rbbt/ner/test_token_trieNER.rb
136
- - test/rbbt/ner/test_annotations.rb
137
151
  - test/rbbt/ner/test_patterns.rb
138
- - test/rbbt/ner/test_NER.rb
152
+ - test/rbbt/ner/segment/test_named_entity.rb
153
+ - test/rbbt/ner/segment/test_segmented.rb
154
+ - test/rbbt/ner/segment/test_transformed.rb
155
+ - test/rbbt/ner/test_segment.rb
139
156
  - test/rbbt/ner/test_rnorm.rb
140
157
  - test/rbbt/ner/test_oscar4.rb
141
158
  - test/rbbt/ner/test_chemical_tagger.rb
@@ -174,7 +191,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
174
191
  requirements: []
175
192
 
176
193
  rubyforge_project:
177
- rubygems_version: 1.6.2
194
+ rubygems_version: 1.3.7
178
195
  signing_key:
179
196
  specification_version: 3
180
197
  summary: Text mining tools for the Ruby Bioinformatics Toolkit (rbbt)
@@ -186,12 +203,13 @@ test_files:
186
203
  - test/rbbt/ner/test_regexpNER.rb
187
204
  - test/rbbt/ner/test_abner.rb
188
205
  - test/rbbt/ner/test_banner.rb
189
- - test/rbbt/ner/annotations/test_transformed.rb
190
- - test/rbbt/ner/annotations/test_named_entity.rb
206
+ - test/rbbt/ner/test_NER.rb
191
207
  - test/rbbt/ner/test_token_trieNER.rb
192
- - test/rbbt/ner/test_annotations.rb
193
208
  - test/rbbt/ner/test_patterns.rb
194
- - test/rbbt/ner/test_NER.rb
209
+ - test/rbbt/ner/segment/test_named_entity.rb
210
+ - test/rbbt/ner/segment/test_segmented.rb
211
+ - test/rbbt/ner/segment/test_transformed.rb
212
+ - test/rbbt/ner/test_segment.rb
195
213
  - test/rbbt/ner/test_rnorm.rb
196
214
  - test/rbbt/ner/test_oscar4.rb
197
215
  - test/rbbt/ner/test_chemical_tagger.rb