rbbt-text 0.5.0 → 0.6.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (39) hide show
  1. data/lib/rbbt/corpus/corpus.rb +15 -6
  2. data/lib/rbbt/corpus/document.rb +100 -127
  3. data/lib/rbbt/corpus/document_repo.rb +72 -51
  4. data/lib/rbbt/ner/NER.rb +4 -4
  5. data/lib/rbbt/ner/abner.rb +5 -4
  6. data/lib/rbbt/ner/banner.rb +3 -3
  7. data/lib/rbbt/ner/chemical_tagger.rb +3 -3
  8. data/lib/rbbt/ner/ngram_prefix_dictionary.rb +45 -10
  9. data/lib/rbbt/ner/oscar3.rb +3 -3
  10. data/lib/rbbt/ner/oscar4.rb +3 -3
  11. data/lib/rbbt/ner/patterns.rb +15 -13
  12. data/lib/rbbt/ner/regexpNER.rb +3 -2
  13. data/lib/rbbt/ner/rnorm.rb +2 -2
  14. data/lib/rbbt/ner/rnorm/cue_index.rb +2 -2
  15. data/lib/rbbt/ner/{annotations.rb → segment.rb} +161 -109
  16. data/lib/rbbt/ner/{annotations → segment}/named_entity.rb +3 -11
  17. data/lib/rbbt/ner/segment/relationship.rb +20 -0
  18. data/lib/rbbt/ner/segment/segmented.rb +13 -0
  19. data/lib/rbbt/ner/segment/token.rb +24 -0
  20. data/lib/rbbt/ner/{annotations → segment}/transformed.rb +10 -10
  21. data/lib/rbbt/ner/token_trieNER.rb +30 -22
  22. data/lib/rbbt/nlp/genia/sentence_splitter.rb +2 -1
  23. data/lib/rbbt/nlp/nlp.rb +23 -37
  24. data/test/rbbt/corpus/test_document.rb +39 -37
  25. data/test/rbbt/ner/segment/test_named_entity.rb +29 -0
  26. data/test/rbbt/ner/segment/test_segmented.rb +23 -0
  27. data/test/rbbt/ner/{annotations → segment}/test_transformed.rb +6 -6
  28. data/test/rbbt/ner/test_ngram_prefix_dictionary.rb +15 -1
  29. data/test/rbbt/ner/test_patterns.rb +11 -12
  30. data/test/rbbt/ner/test_regexpNER.rb +5 -4
  31. data/test/rbbt/ner/test_segment.rb +101 -0
  32. data/test/rbbt/ner/test_token_trieNER.rb +8 -9
  33. data/test/test_helper.rb +6 -6
  34. metadata +40 -22
  35. data/lib/rbbt/ner/annotations/annotated.rb +0 -15
  36. data/lib/rbbt/ner/annotations/relations.rb +0 -25
  37. data/lib/rbbt/ner/annotations/token.rb +0 -28
  38. data/test/rbbt/ner/annotations/test_named_entity.rb +0 -14
  39. data/test/rbbt/ner/test_annotations.rb +0 -70
@@ -0,0 +1,29 @@
1
+ require File.join(File.expand_path(File.dirname(__FILE__)), '../../..', 'test_helper.rb')
2
+ require 'rbbt/ner/segment'
3
+ require 'rbbt/ner/segment/named_entity'
4
+
5
+ class TestClass < Test::Unit::TestCase
6
+ def test_info
7
+ a = "test"
8
+ NamedEntity.setup a
9
+ assert(! a.info.keys.include?(:code))
10
+ a.code = 10
11
+ a.offset = 100
12
+ assert a.info.include? :code
13
+ assert a.info.include? :offset
14
+ end
15
+
16
+ def test_all_args
17
+ a = "test"
18
+ NamedEntity.setup a, 10, "TYPE", "CODE", "SCORE"
19
+ assert_equal 10, a.offset
20
+ end
21
+
22
+ def test_tsv
23
+ a = "test"
24
+ NamedEntity.setup a, 10, "TYPE", "CODE", "SCORE"
25
+ assert Segment.tsv([a]).fields.include? "code"
26
+ assert Segment.tsv([a], nil).fields.include? "code"
27
+ assert Segment.tsv([a], "literal").fields.include? "code"
28
+ end
29
+ end
@@ -0,0 +1,23 @@
1
+ require File.join(File.expand_path(File.dirname(__FILE__)), '../../..', 'test_helper.rb')
2
+ require 'rbbt/ner/segment/segmented'
3
+
4
+ class TestClass < Test::Unit::TestCase
5
+ def test_split
6
+ a = "This sentence mentions the TP53 gene and the CDK5R1 protein"
7
+
8
+ gene1 = "TP53"
9
+ gene1.extend Segment
10
+ gene1.offset = a.index gene1
11
+
12
+ gene2 = "CDK5R1"
13
+ gene2.extend Segment
14
+ gene2.offset = a.index gene2
15
+
16
+ gene3 = "TP53 gene"
17
+ gene3.extend Segment
18
+ gene3.offset = a.index gene3
19
+
20
+ Segmented.setup(a, [gene2, gene1, gene3])
21
+ assert_equal ["This sentence mentions the ", gene3, " and the ", gene2, " protein"], a.split_segments
22
+ end
23
+ end
@@ -1,6 +1,6 @@
1
1
  require File.join(File.expand_path(File.dirname(__FILE__)), '../../..', 'test_helper.rb')
2
- require 'rbbt/ner/annotations/transformed'
3
- require 'rbbt/ner/annotations/named_entity'
2
+ require 'rbbt/ner/segment/transformed'
3
+ require 'rbbt/ner/segment/named_entity'
4
4
 
5
5
  class TestClass < Test::Unit::TestCase
6
6
  def test_transform
@@ -8,11 +8,11 @@ class TestClass < Test::Unit::TestCase
8
8
  original = a.dup
9
9
 
10
10
  gene1 = "TP53"
11
- gene1.extend NamedEntity
11
+ gene1.extend Segment
12
12
  gene1.offset = a.index gene1
13
13
 
14
14
  gene2 = "CDK5"
15
- gene2.extend NamedEntity
15
+ gene2.extend Segment
16
16
  gene2.offset = a.index gene2
17
17
 
18
18
  assert_equal gene1, a[gene1.range]
@@ -30,7 +30,7 @@ class TestClass < Test::Unit::TestCase
30
30
 
31
31
 
32
32
  gene3 = "GN gene"
33
- gene3.extend NamedEntity
33
+ gene3.extend Segment
34
34
  gene3.offset = a.index gene3
35
35
 
36
36
  assert_equal gene3, a[gene3.range]
@@ -108,7 +108,7 @@ class TestClass < Test::Unit::TestCase
108
108
 
109
109
  def test_html_with_offset
110
110
  a = "This sentence mentions the TP53 gene and the CDK5R1 protein"
111
- Segment.annotate(a, 10)
111
+ Segment.setup(a, 10)
112
112
 
113
113
  gene1 = "TP53"
114
114
  gene1.extend NamedEntity
@@ -10,7 +10,21 @@ C2;11;22;3 3;bb
10
10
  EOF
11
11
 
12
12
  TmpFile.with_file(lexicon) do |file|
13
- index = NGramPrefixDictionary.new(TSV.new(file, :flat, :sep => ';'), "test")
13
+ index = NGramPrefixDictionary.new(TSV.open(file, :flat, :sep => ';'), "test")
14
+
15
+ matches = index.match(' asdfa dsf asdf aa asdfasdf ')
16
+ assert matches.select{|m| m.code.include? 'C1'}.any?
17
+ end
18
+ end
19
+
20
+ def test_stream
21
+ lexicon =<<-EOF
22
+ C1;aa;AA;bb b
23
+ C2;11;22;3 3;bb
24
+ EOF
25
+
26
+ TmpFile.with_file(lexicon.gsub(/;/,"\t")) do |file|
27
+ index = NGramPrefixDictionary.new(file, "test")
14
28
 
15
29
  matches = index.match(' asdfa dsf asdf aa asdfasdf ')
16
30
  assert matches.select{|m| m.code.include? 'C1'}.any?
@@ -6,33 +6,32 @@ class TestPatternRelExt < Test::Unit::TestCase
6
6
  text = "Experiments have shown that TP53 interacts with CDK5 under certain conditions"
7
7
 
8
8
  gene1 = "TP53"
9
- NamedEntity.annotate(gene1, text.index(gene1), "Gene")
9
+ NamedEntity.setup(gene1, text.index(gene1), "Gene")
10
10
 
11
11
  gene2 = "CDK5"
12
- NamedEntity.annotate(gene2, text.index(gene2), "Gene")
12
+ NamedEntity.setup(gene2, text.index(gene2), "Gene")
13
13
 
14
14
  interaction = "interacts"
15
- NamedEntity.annotate(interaction, text.index(interaction), "Interaction")
15
+ NamedEntity.setup(interaction, text.index(interaction), "Interaction")
16
16
 
17
- Annotated.annotate(text, [gene1, gene2, interaction])
17
+ Segmented.setup(text, [gene1, gene2, interaction])
18
18
 
19
19
  assert_equal "TP53 interacts with CDK5", PatternRelExt.simple_pattern(text, "GENE INTERACTION with GENE").first
20
-
21
20
  end
22
21
 
23
22
  def test_chunk_pattern
24
23
  text = "Experiments have shown that TP53 found in cultivated cells interacts with CDK5 under certain conditions"
25
24
 
26
25
  gene1 = "TP53"
27
- NamedEntity.annotate(gene1, text.index(gene1), "Gene")
26
+ NamedEntity.setup(gene1, text.index(gene1), "Gene")
28
27
 
29
28
  gene2 = "CDK5"
30
- NamedEntity.annotate(gene2, text.index(gene2), "Gene")
29
+ NamedEntity.setup(gene2, text.index(gene2), "Gene")
31
30
 
32
31
  interaction = "interacts"
33
- NamedEntity.annotate(interaction, text.index(interaction), "Interaction")
32
+ NamedEntity.setup(interaction, text.index(interaction), "Interaction")
34
33
 
35
- Annotated.annotate(text, {:entities => [gene1, gene2, interaction]})
34
+ Segmented.setup(text, {:entities => [gene1, gene2, interaction]})
36
35
 
37
36
  assert_equal "TP53 found in cultivated cells interacts with CDK5",
38
37
  PatternRelExt.new("NP[entity:Gene] VP[stem:interacts] with NP[entity:Gene]").match_sentences([text]).first.first
@@ -45,12 +44,12 @@ class TestPatternRelExt < Test::Unit::TestCase
45
44
  text = "There is a concern with the use of thiazolidinediones in patients with an increased risk of colon cancer (e.g., familial colon polyposis)."
46
45
 
47
46
  drug = "thiazolidinediones"
48
- NamedEntity.annotate(drug, text.index(drug), "Chemical Mention")
47
+ NamedEntity.setup(drug, text.index(drug), "Chemical Mention")
49
48
 
50
49
  disease = "colon cancer"
51
- NamedEntity.annotate(disease, text.index(disease), "disease")
50
+ NamedEntity.setup(disease, text.index(disease), "disease")
52
51
 
53
- Annotated.annotate(text, {:entitites => [drug, disease]})
52
+ Segmented.setup(text, {:entitites => [drug, disease]})
54
53
 
55
54
  assert_equal "thiazolidinediones in patients with an increased risk of colon cancer",
56
55
  PatternRelExt.new("NP[entity:Chemical Mention] NP[stem:risk] NP[entity:disease]").match_sentences([text]).first.first
@@ -55,12 +55,13 @@ class TestRegExpNER < Test::Unit::TestCase
55
55
  assert_equal "In this sentence I should find ".length, matches.select{|m| m.type == :this}[1].offset
56
56
  assert_equal :this, matches.select{|m| m.type == :this}[0].type
57
57
 
58
- Annotated.annotate(sentence)
58
+ Segmented.setup(sentence)
59
59
  ner_this = RegExpNER.new({:this => /this/})
60
60
  ner_that = RegExpNER.new({:that => /that/})
61
- sentence.annotations += ner_this.entities(sentence)
62
- sentence.annotations += ner_that.entities(sentence)
63
- matches = sentence.annotations
61
+ sentence.segments ||= []
62
+ sentence.segments += ner_this.entities(sentence)
63
+ sentence.segments += ner_that.entities(sentence)
64
+ matches = sentence.segments
64
65
 
65
66
  assert_equal ["this", "this", "that"].sort, matches.sort
66
67
  assert_equal "In ".length, matches.select{|m| m.type == :this}[0].offset
@@ -0,0 +1,101 @@
1
+ require File.join(File.expand_path(File.dirname(__FILE__)), '../..', 'test_helper.rb')
2
+ require 'rbbt/ner/segment'
3
+
4
+ class TestClass < Test::Unit::TestCase
5
+ def test_info
6
+ a = "test"
7
+ a.extend Segment
8
+ a.offset = 10
9
+ assert a.info.include? :offset
10
+ end
11
+
12
+ def test_sort
13
+ a = "This sentence mentions the TP53 gene and the CDK5R1 protein"
14
+
15
+ gene1 = "TP53"
16
+ gene1.extend Segment
17
+ gene1.offset = a.index gene1
18
+
19
+ gene2 = "CDK5R1"
20
+ gene2.extend Segment
21
+ gene2.offset = a.index gene2
22
+
23
+ assert_equal [gene1,gene2], Segment.sort([gene2,gene1])
24
+ end
25
+
26
+ def test_clean_sort
27
+ a = "This sentence mentions the TP53 gene and the CDK5R1 protein"
28
+
29
+ gene1 = "TP53"
30
+ gene1.extend Segment
31
+ gene1.offset = a.index gene1
32
+
33
+ gene2 = "CDK5R1"
34
+ gene2.extend Segment
35
+ gene2.offset = a.index gene2
36
+
37
+ gene3 = "TP53 gene"
38
+ gene3.extend Segment
39
+ gene3.offset = a.index gene3
40
+
41
+ assert_equal [gene3,gene2], Segment.clean_sort([gene2,gene1,gene3])
42
+ end
43
+
44
+ def test_split
45
+ a = "This sentence mentions the TP53 gene and the CDK5R1 protein"
46
+
47
+ gene1 = "TP53"
48
+ gene1.extend Segment
49
+ gene1.offset = a.index gene1
50
+
51
+ gene2 = "CDK5R1"
52
+ gene2.extend Segment
53
+ gene2.offset = a.index gene2
54
+
55
+ gene3 = "TP53 gene"
56
+ gene3.extend Segment
57
+ gene3.offset = a.index gene3
58
+
59
+ assert_equal ["This sentence mentions the ", gene3, " and the ", gene2, " protein"], Segment.split(a, [gene2,gene1,gene3])
60
+ end
61
+
62
+
63
+ def test_align
64
+ text =<<-EOF
65
+ Atypical teratoid/rhabdoid tumors (AT/RTs) are highly aggressive brain tumors of early childhood poorly responding to therapy.
66
+ EOF
67
+
68
+ parts = text.split(/\W/)
69
+ Segment.align(text, parts)
70
+
71
+ assert_equal "Atypical teratoid/".length, parts.select{|s| s == "rhabdoid"}.first.offset
72
+ end
73
+
74
+ def test_segment_index
75
+ a = "This sentence mentions the TP53 gene and the CDK5R1 protein"
76
+
77
+ gene1 = "TP53"
78
+ gene1.extend Segment
79
+ gene1.offset = a.index gene1
80
+
81
+ gene2 = "CDK5R1"
82
+ gene2.extend Segment
83
+ gene2.offset = a.index gene2
84
+
85
+ gene3 = "TP53 gene"
86
+ gene3.extend Segment
87
+ gene3.offset = a.index gene3
88
+
89
+ index = Segment.index([gene1, gene2, gene3])
90
+ assert_equal %w(CDK5R1), index[gene2.offset + 1]
91
+
92
+ TmpFile.with_file do |fwt|
93
+ index = Segment.index([gene1, gene2, gene3], fwt)
94
+ assert_equal %w(CDK5R1), index[gene2.offset + 1]
95
+ index = Segment.index([gene1, gene2, gene3], fwt)
96
+ assert_equal %w(CDK5R1), index[gene2.offset + 1]
97
+ end
98
+ end
99
+
100
+ end
101
+
@@ -30,7 +30,7 @@ C2;11;22;3 3;bb
30
30
 
31
31
  TmpFile.with_file(lexicon) do |file|
32
32
 
33
- index = TokenTrieNER.process({}, TSV.new(file, :flat, :sep => ';'))
33
+ index = TokenTrieNER.process({}, TSV.open(file, :flat, :sep => ';'))
34
34
 
35
35
  assert_equal ['AA', 'aa', 'bb', '11', '22', '3'].sort, index.keys.sort
36
36
  assert_equal [:END], index['aa'].keys
@@ -47,7 +47,7 @@ C2;11;22;3 3;bb
47
47
 
48
48
 
49
49
  TmpFile.with_file(lexicon) do |file|
50
- index = TokenTrieNER.process({}, TSV.new(file, :sep => ';', :type => :flat ))
50
+ index = TokenTrieNER.process({}, TSV.open(file, :sep => ';', :type => :flat ))
51
51
 
52
52
  assert TokenTrieNER.find(index, TokenTrieNER.tokenize('aa asdf').extend(TokenTrieNER::EnumeratedArray), false).first.collect{|c| c.code}.include? 'C1'
53
53
  assert_equal %w(aa), TokenTrieNER.find(index, TokenTrieNER.tokenize('aa asdf').extend(TokenTrieNER::EnumeratedArray), false).last
@@ -71,9 +71,8 @@ C2;11;22;3 3;bb
71
71
  EOF
72
72
 
73
73
  TmpFile.with_file(lexicon) do |file|
74
- index = TokenTrieNER.new("test", TSV.new(file, :flat, :sep => ';'))
74
+ index = TokenTrieNER.new("test", TSV.open(file, :flat, :sep => ';'))
75
75
 
76
- index.match(' asdfa dsf asdf aa asdfasdf ')
77
76
  assert index.match(' asdfa dsf asdf aa asdfasdf ').select{|m| m.code.include? 'C1'}.any?
78
77
  end
79
78
  end
@@ -81,18 +80,19 @@ C2;11;22;3 3;bb
81
80
  def test_slack
82
81
  lexicon =<<-EOF
83
82
  C1;aa;AA;bb cc cc b
84
- C2;11;22;3 3;bb
83
+ C2;11;22;3 3;bb;bbbb
85
84
  EOF
86
85
 
87
86
  TmpFile.with_file(lexicon) do |file|
88
87
  index = TokenTrieNER.new({})
89
88
  index.slack = Proc.new{|t| t =~ /^c*$/}
90
89
 
91
- index.merge TSV.new(file, :flat, :sep => ';')
90
+ index.merge TSV.open(file, :flat, :sep => ';')
92
91
 
93
92
  assert index.match(' aaaaa 3 cc 3').select{|m| m.code.include? 'C2'}.any?
94
93
  assert index.match(' bb cc b').select{|m| m.code.include? 'C1'}.any?
95
94
  assert index.match(' bb b').select{|m| m.code.include? 'C1'}.any?
95
+ assert index.match(' BBBB b').select{|m| m.code.include? 'C2'}.any?
96
96
  end
97
97
  end
98
98
 
@@ -106,7 +106,7 @@ C2;11;22;3 3;bb
106
106
  index = TokenTrieNER.new({})
107
107
  index.slack = Proc.new{|t| t =~ /^c*$/}
108
108
 
109
- index.merge TSV.new(file, :flat, :sep => ';')
109
+ index.merge TSV.open(file, :flat, :sep => ';')
110
110
 
111
111
  assert index.match(Token.tokenize('3 cc 3')).select{|m| m.code.include? 'C2'}.any?
112
112
  end
@@ -126,9 +126,8 @@ C2;11;22;3 3;bb
126
126
  EOF
127
127
 
128
128
  TmpFile.with_file(lexicon) do |file|
129
- index = TokenTrieNER.new("test", TSV.new(file, :flat, :sep => ';'), :persistence => true)
129
+ index = TokenTrieNER.new("test", TSV.open(file, :flat, :sep => ';'), :persistence => true)
130
130
 
131
- index.match(' asdfa dsf asdf aa asdfasdf ')
132
131
  assert index.match(' asdfa dsf asdf aa asdfasdf ').select{|m| m.code.include? 'C1'}.any?
133
132
  end
134
133
  end
data/test/test_helper.rb CHANGED
@@ -3,7 +3,7 @@ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
3
3
  $LOAD_PATH.unshift(File.dirname(__FILE__))
4
4
 
5
5
  require 'rbbt'
6
- require 'rbbt/util/persistence'
6
+ require 'rbbt/persist'
7
7
  require 'rbbt/util/tmpfile'
8
8
  require 'rbbt/util/log'
9
9
  require 'rbbt/corpus/document_repo'
@@ -15,15 +15,15 @@ class Test::Unit::TestCase
15
15
 
16
16
  def setup
17
17
  FileUtils.mkdir_p Rbbt.tmp.test.persistence.find(:user)
18
- Persistence.cachedir = Rbbt.tmp.test.persistence.find :user
18
+ Persist.cachedir = Rbbt.tmp.test.persistence.find :user
19
19
  end
20
20
 
21
21
  def teardown
22
22
  FileUtils.rm_rf Rbbt.tmp.test.find :user
23
- TCHash::CONNECTIONS.values.each do |c| c.close end
24
- TCHash::CONNECTIONS.clear
25
- DocumentRepo::CONNECTIONS.values.each do |c| c.close end
26
- DocumentRepo::CONNECTIONS.clear
23
+ Persist::TC_CONNECTIONS.values.each do |c| c.close end
24
+ Persist::TC_CONNECTIONS.clear
25
+ DocumentRepo::TC_CONNECTIONS.values.each do |c| c.close end
26
+ DocumentRepo::TC_CONNECTIONS.clear
27
27
  end
28
28
 
29
29
  end
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rbbt-text
3
3
  version: !ruby/object:Gem::Version
4
- hash: 11
5
- prerelease:
4
+ hash: 7
5
+ prerelease: false
6
6
  segments:
7
7
  - 0
8
- - 5
8
+ - 6
9
9
  - 0
10
- version: 0.5.0
10
+ version: 0.6.0
11
11
  platform: ruby
12
12
  authors:
13
13
  - Miguel Vazquez
@@ -15,7 +15,7 @@ autorequire:
15
15
  bindir: bin
16
16
  cert_chain: []
17
17
 
18
- date: 2011-07-05 00:00:00 +02:00
18
+ date: 2011-09-07 00:00:00 +02:00
19
19
  default_executable: get_ppis.rb
20
20
  dependencies:
21
21
  - !ruby/object:Gem::Dependency
@@ -26,10 +26,12 @@ dependencies:
26
26
  requirements:
27
27
  - - ">="
28
28
  - !ruby/object:Gem::Version
29
- hash: 3
29
+ hash: 63
30
30
  segments:
31
+ - 4
31
32
  - 0
32
- version: "0"
33
+ - 0
34
+ version: 4.0.0
33
35
  type: :runtime
34
36
  version_requirements: *id001
35
37
  - !ruby/object:Gem::Dependency
@@ -74,6 +76,20 @@ dependencies:
74
76
  version: "0"
75
77
  type: :runtime
76
78
  version_requirements: *id004
79
+ - !ruby/object:Gem::Dependency
80
+ name: rjb
81
+ prerelease: false
82
+ requirement: &id005 !ruby/object:Gem::Requirement
83
+ none: false
84
+ requirements:
85
+ - - ">="
86
+ - !ruby/object:Gem::Version
87
+ hash: 3
88
+ segments:
89
+ - 0
90
+ version: "0"
91
+ type: :runtime
92
+ version_requirements: *id005
77
93
  description: "Text mining tools: named entity recognition and normalization, document classification, bag-of-words, dictionaries, etc"
78
94
  email: miguel.vazquez@fdi.ucm.es
79
95
  executables:
@@ -92,12 +108,6 @@ files:
92
108
  - lib/rbbt/corpus/sources/pubmed.rb
93
109
  - lib/rbbt/ner/NER.rb
94
110
  - lib/rbbt/ner/abner.rb
95
- - lib/rbbt/ner/annotations.rb
96
- - lib/rbbt/ner/annotations/annotated.rb
97
- - lib/rbbt/ner/annotations/named_entity.rb
98
- - lib/rbbt/ner/annotations/relations.rb
99
- - lib/rbbt/ner/annotations/token.rb
100
- - lib/rbbt/ner/annotations/transformed.rb
101
111
  - lib/rbbt/ner/banner.rb
102
112
  - lib/rbbt/ner/chemical_tagger.rb
103
113
  - lib/rbbt/ner/ngram_prefix_dictionary.rb
@@ -108,6 +118,12 @@ files:
108
118
  - lib/rbbt/ner/rnorm.rb
109
119
  - lib/rbbt/ner/rnorm/cue_index.rb
110
120
  - lib/rbbt/ner/rnorm/tokens.rb
121
+ - lib/rbbt/ner/segment.rb
122
+ - lib/rbbt/ner/segment/named_entity.rb
123
+ - lib/rbbt/ner/segment/relationship.rb
124
+ - lib/rbbt/ner/segment/segmented.rb
125
+ - lib/rbbt/ner/segment/token.rb
126
+ - lib/rbbt/ner/segment/transformed.rb
111
127
  - lib/rbbt/ner/token_trieNER.rb
112
128
  - lib/rbbt/nlp/genia/sentence_splitter.rb
113
129
  - lib/rbbt/nlp/nlp.rb
@@ -130,12 +146,13 @@ files:
130
146
  - test/rbbt/ner/test_regexpNER.rb
131
147
  - test/rbbt/ner/test_abner.rb
132
148
  - test/rbbt/ner/test_banner.rb
133
- - test/rbbt/ner/annotations/test_transformed.rb
134
- - test/rbbt/ner/annotations/test_named_entity.rb
149
+ - test/rbbt/ner/test_NER.rb
135
150
  - test/rbbt/ner/test_token_trieNER.rb
136
- - test/rbbt/ner/test_annotations.rb
137
151
  - test/rbbt/ner/test_patterns.rb
138
- - test/rbbt/ner/test_NER.rb
152
+ - test/rbbt/ner/segment/test_named_entity.rb
153
+ - test/rbbt/ner/segment/test_segmented.rb
154
+ - test/rbbt/ner/segment/test_transformed.rb
155
+ - test/rbbt/ner/test_segment.rb
139
156
  - test/rbbt/ner/test_rnorm.rb
140
157
  - test/rbbt/ner/test_oscar4.rb
141
158
  - test/rbbt/ner/test_chemical_tagger.rb
@@ -174,7 +191,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
174
191
  requirements: []
175
192
 
176
193
  rubyforge_project:
177
- rubygems_version: 1.6.2
194
+ rubygems_version: 1.3.7
178
195
  signing_key:
179
196
  specification_version: 3
180
197
  summary: Text mining tools for the Ruby Bioinformatics Toolkit (rbbt)
@@ -186,12 +203,13 @@ test_files:
186
203
  - test/rbbt/ner/test_regexpNER.rb
187
204
  - test/rbbt/ner/test_abner.rb
188
205
  - test/rbbt/ner/test_banner.rb
189
- - test/rbbt/ner/annotations/test_transformed.rb
190
- - test/rbbt/ner/annotations/test_named_entity.rb
206
+ - test/rbbt/ner/test_NER.rb
191
207
  - test/rbbt/ner/test_token_trieNER.rb
192
- - test/rbbt/ner/test_annotations.rb
193
208
  - test/rbbt/ner/test_patterns.rb
194
- - test/rbbt/ner/test_NER.rb
209
+ - test/rbbt/ner/segment/test_named_entity.rb
210
+ - test/rbbt/ner/segment/test_segmented.rb
211
+ - test/rbbt/ner/segment/test_transformed.rb
212
+ - test/rbbt/ner/test_segment.rb
195
213
  - test/rbbt/ner/test_rnorm.rb
196
214
  - test/rbbt/ner/test_oscar4.rb
197
215
  - test/rbbt/ner/test_chemical_tagger.rb