rbbt-text 0.5.0 → 0.6.0
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/rbbt/corpus/corpus.rb +15 -6
- data/lib/rbbt/corpus/document.rb +100 -127
- data/lib/rbbt/corpus/document_repo.rb +72 -51
- data/lib/rbbt/ner/NER.rb +4 -4
- data/lib/rbbt/ner/abner.rb +5 -4
- data/lib/rbbt/ner/banner.rb +3 -3
- data/lib/rbbt/ner/chemical_tagger.rb +3 -3
- data/lib/rbbt/ner/ngram_prefix_dictionary.rb +45 -10
- data/lib/rbbt/ner/oscar3.rb +3 -3
- data/lib/rbbt/ner/oscar4.rb +3 -3
- data/lib/rbbt/ner/patterns.rb +15 -13
- data/lib/rbbt/ner/regexpNER.rb +3 -2
- data/lib/rbbt/ner/rnorm.rb +2 -2
- data/lib/rbbt/ner/rnorm/cue_index.rb +2 -2
- data/lib/rbbt/ner/{annotations.rb → segment.rb} +161 -109
- data/lib/rbbt/ner/{annotations → segment}/named_entity.rb +3 -11
- data/lib/rbbt/ner/segment/relationship.rb +20 -0
- data/lib/rbbt/ner/segment/segmented.rb +13 -0
- data/lib/rbbt/ner/segment/token.rb +24 -0
- data/lib/rbbt/ner/{annotations → segment}/transformed.rb +10 -10
- data/lib/rbbt/ner/token_trieNER.rb +30 -22
- data/lib/rbbt/nlp/genia/sentence_splitter.rb +2 -1
- data/lib/rbbt/nlp/nlp.rb +23 -37
- data/test/rbbt/corpus/test_document.rb +39 -37
- data/test/rbbt/ner/segment/test_named_entity.rb +29 -0
- data/test/rbbt/ner/segment/test_segmented.rb +23 -0
- data/test/rbbt/ner/{annotations → segment}/test_transformed.rb +6 -6
- data/test/rbbt/ner/test_ngram_prefix_dictionary.rb +15 -1
- data/test/rbbt/ner/test_patterns.rb +11 -12
- data/test/rbbt/ner/test_regexpNER.rb +5 -4
- data/test/rbbt/ner/test_segment.rb +101 -0
- data/test/rbbt/ner/test_token_trieNER.rb +8 -9
- data/test/test_helper.rb +6 -6
- metadata +40 -22
- data/lib/rbbt/ner/annotations/annotated.rb +0 -15
- data/lib/rbbt/ner/annotations/relations.rb +0 -25
- data/lib/rbbt/ner/annotations/token.rb +0 -28
- data/test/rbbt/ner/annotations/test_named_entity.rb +0 -14
- data/test/rbbt/ner/test_annotations.rb +0 -70
@@ -0,0 +1,29 @@
|
|
1
|
+
require File.join(File.expand_path(File.dirname(__FILE__)), '../../..', 'test_helper.rb')
|
2
|
+
require 'rbbt/ner/segment'
|
3
|
+
require 'rbbt/ner/segment/named_entity'
|
4
|
+
|
5
|
+
class TestClass < Test::Unit::TestCase
|
6
|
+
def test_info
|
7
|
+
a = "test"
|
8
|
+
NamedEntity.setup a
|
9
|
+
assert(! a.info.keys.include?(:code))
|
10
|
+
a.code = 10
|
11
|
+
a.offset = 100
|
12
|
+
assert a.info.include? :code
|
13
|
+
assert a.info.include? :offset
|
14
|
+
end
|
15
|
+
|
16
|
+
def test_all_args
|
17
|
+
a = "test"
|
18
|
+
NamedEntity.setup a, 10, "TYPE", "CODE", "SCORE"
|
19
|
+
assert_equal 10, a.offset
|
20
|
+
end
|
21
|
+
|
22
|
+
def test_tsv
|
23
|
+
a = "test"
|
24
|
+
NamedEntity.setup a, 10, "TYPE", "CODE", "SCORE"
|
25
|
+
assert Segment.tsv([a]).fields.include? "code"
|
26
|
+
assert Segment.tsv([a], nil).fields.include? "code"
|
27
|
+
assert Segment.tsv([a], "literal").fields.include? "code"
|
28
|
+
end
|
29
|
+
end
|
@@ -0,0 +1,23 @@
|
|
1
|
+
require File.join(File.expand_path(File.dirname(__FILE__)), '../../..', 'test_helper.rb')
|
2
|
+
require 'rbbt/ner/segment/segmented'
|
3
|
+
|
4
|
+
class TestClass < Test::Unit::TestCase
|
5
|
+
def test_split
|
6
|
+
a = "This sentence mentions the TP53 gene and the CDK5R1 protein"
|
7
|
+
|
8
|
+
gene1 = "TP53"
|
9
|
+
gene1.extend Segment
|
10
|
+
gene1.offset = a.index gene1
|
11
|
+
|
12
|
+
gene2 = "CDK5R1"
|
13
|
+
gene2.extend Segment
|
14
|
+
gene2.offset = a.index gene2
|
15
|
+
|
16
|
+
gene3 = "TP53 gene"
|
17
|
+
gene3.extend Segment
|
18
|
+
gene3.offset = a.index gene3
|
19
|
+
|
20
|
+
Segmented.setup(a, [gene2, gene1, gene3])
|
21
|
+
assert_equal ["This sentence mentions the ", gene3, " and the ", gene2, " protein"], a.split_segments
|
22
|
+
end
|
23
|
+
end
|
@@ -1,6 +1,6 @@
|
|
1
1
|
require File.join(File.expand_path(File.dirname(__FILE__)), '../../..', 'test_helper.rb')
|
2
|
-
require 'rbbt/ner/
|
3
|
-
require 'rbbt/ner/
|
2
|
+
require 'rbbt/ner/segment/transformed'
|
3
|
+
require 'rbbt/ner/segment/named_entity'
|
4
4
|
|
5
5
|
class TestClass < Test::Unit::TestCase
|
6
6
|
def test_transform
|
@@ -8,11 +8,11 @@ class TestClass < Test::Unit::TestCase
|
|
8
8
|
original = a.dup
|
9
9
|
|
10
10
|
gene1 = "TP53"
|
11
|
-
gene1.extend
|
11
|
+
gene1.extend Segment
|
12
12
|
gene1.offset = a.index gene1
|
13
13
|
|
14
14
|
gene2 = "CDK5"
|
15
|
-
gene2.extend
|
15
|
+
gene2.extend Segment
|
16
16
|
gene2.offset = a.index gene2
|
17
17
|
|
18
18
|
assert_equal gene1, a[gene1.range]
|
@@ -30,7 +30,7 @@ class TestClass < Test::Unit::TestCase
|
|
30
30
|
|
31
31
|
|
32
32
|
gene3 = "GN gene"
|
33
|
-
gene3.extend
|
33
|
+
gene3.extend Segment
|
34
34
|
gene3.offset = a.index gene3
|
35
35
|
|
36
36
|
assert_equal gene3, a[gene3.range]
|
@@ -108,7 +108,7 @@ class TestClass < Test::Unit::TestCase
|
|
108
108
|
|
109
109
|
def test_html_with_offset
|
110
110
|
a = "This sentence mentions the TP53 gene and the CDK5R1 protein"
|
111
|
-
Segment.
|
111
|
+
Segment.setup(a, 10)
|
112
112
|
|
113
113
|
gene1 = "TP53"
|
114
114
|
gene1.extend NamedEntity
|
@@ -10,7 +10,21 @@ C2;11;22;3 3;bb
|
|
10
10
|
EOF
|
11
11
|
|
12
12
|
TmpFile.with_file(lexicon) do |file|
|
13
|
-
index = NGramPrefixDictionary.new(TSV.
|
13
|
+
index = NGramPrefixDictionary.new(TSV.open(file, :flat, :sep => ';'), "test")
|
14
|
+
|
15
|
+
matches = index.match(' asdfa dsf asdf aa asdfasdf ')
|
16
|
+
assert matches.select{|m| m.code.include? 'C1'}.any?
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
def test_stream
|
21
|
+
lexicon =<<-EOF
|
22
|
+
C1;aa;AA;bb b
|
23
|
+
C2;11;22;3 3;bb
|
24
|
+
EOF
|
25
|
+
|
26
|
+
TmpFile.with_file(lexicon.gsub(/;/,"\t")) do |file|
|
27
|
+
index = NGramPrefixDictionary.new(file, "test")
|
14
28
|
|
15
29
|
matches = index.match(' asdfa dsf asdf aa asdfasdf ')
|
16
30
|
assert matches.select{|m| m.code.include? 'C1'}.any?
|
@@ -6,33 +6,32 @@ class TestPatternRelExt < Test::Unit::TestCase
|
|
6
6
|
text = "Experiments have shown that TP53 interacts with CDK5 under certain conditions"
|
7
7
|
|
8
8
|
gene1 = "TP53"
|
9
|
-
NamedEntity.
|
9
|
+
NamedEntity.setup(gene1, text.index(gene1), "Gene")
|
10
10
|
|
11
11
|
gene2 = "CDK5"
|
12
|
-
NamedEntity.
|
12
|
+
NamedEntity.setup(gene2, text.index(gene2), "Gene")
|
13
13
|
|
14
14
|
interaction = "interacts"
|
15
|
-
NamedEntity.
|
15
|
+
NamedEntity.setup(interaction, text.index(interaction), "Interaction")
|
16
16
|
|
17
|
-
|
17
|
+
Segmented.setup(text, [gene1, gene2, interaction])
|
18
18
|
|
19
19
|
assert_equal "TP53 interacts with CDK5", PatternRelExt.simple_pattern(text, "GENE INTERACTION with GENE").first
|
20
|
-
|
21
20
|
end
|
22
21
|
|
23
22
|
def test_chunk_pattern
|
24
23
|
text = "Experiments have shown that TP53 found in cultivated cells interacts with CDK5 under certain conditions"
|
25
24
|
|
26
25
|
gene1 = "TP53"
|
27
|
-
NamedEntity.
|
26
|
+
NamedEntity.setup(gene1, text.index(gene1), "Gene")
|
28
27
|
|
29
28
|
gene2 = "CDK5"
|
30
|
-
NamedEntity.
|
29
|
+
NamedEntity.setup(gene2, text.index(gene2), "Gene")
|
31
30
|
|
32
31
|
interaction = "interacts"
|
33
|
-
NamedEntity.
|
32
|
+
NamedEntity.setup(interaction, text.index(interaction), "Interaction")
|
34
33
|
|
35
|
-
|
34
|
+
Segmented.setup(text, {:entities => [gene1, gene2, interaction]})
|
36
35
|
|
37
36
|
assert_equal "TP53 found in cultivated cells interacts with CDK5",
|
38
37
|
PatternRelExt.new("NP[entity:Gene] VP[stem:interacts] with NP[entity:Gene]").match_sentences([text]).first.first
|
@@ -45,12 +44,12 @@ class TestPatternRelExt < Test::Unit::TestCase
|
|
45
44
|
text = "There is a concern with the use of thiazolidinediones in patients with an increased risk of colon cancer (e.g., familial colon polyposis)."
|
46
45
|
|
47
46
|
drug = "thiazolidinediones"
|
48
|
-
NamedEntity.
|
47
|
+
NamedEntity.setup(drug, text.index(drug), "Chemical Mention")
|
49
48
|
|
50
49
|
disease = "colon cancer"
|
51
|
-
NamedEntity.
|
50
|
+
NamedEntity.setup(disease, text.index(disease), "disease")
|
52
51
|
|
53
|
-
|
52
|
+
Segmented.setup(text, {:entitites => [drug, disease]})
|
54
53
|
|
55
54
|
assert_equal "thiazolidinediones in patients with an increased risk of colon cancer",
|
56
55
|
PatternRelExt.new("NP[entity:Chemical Mention] NP[stem:risk] NP[entity:disease]").match_sentences([text]).first.first
|
@@ -55,12 +55,13 @@ class TestRegExpNER < Test::Unit::TestCase
|
|
55
55
|
assert_equal "In this sentence I should find ".length, matches.select{|m| m.type == :this}[1].offset
|
56
56
|
assert_equal :this, matches.select{|m| m.type == :this}[0].type
|
57
57
|
|
58
|
-
|
58
|
+
Segmented.setup(sentence)
|
59
59
|
ner_this = RegExpNER.new({:this => /this/})
|
60
60
|
ner_that = RegExpNER.new({:that => /that/})
|
61
|
-
sentence.
|
62
|
-
sentence.
|
63
|
-
|
61
|
+
sentence.segments ||= []
|
62
|
+
sentence.segments += ner_this.entities(sentence)
|
63
|
+
sentence.segments += ner_that.entities(sentence)
|
64
|
+
matches = sentence.segments
|
64
65
|
|
65
66
|
assert_equal ["this", "this", "that"].sort, matches.sort
|
66
67
|
assert_equal "In ".length, matches.select{|m| m.type == :this}[0].offset
|
@@ -0,0 +1,101 @@
|
|
1
|
+
require File.join(File.expand_path(File.dirname(__FILE__)), '../..', 'test_helper.rb')
|
2
|
+
require 'rbbt/ner/segment'
|
3
|
+
|
4
|
+
class TestClass < Test::Unit::TestCase
|
5
|
+
def test_info
|
6
|
+
a = "test"
|
7
|
+
a.extend Segment
|
8
|
+
a.offset = 10
|
9
|
+
assert a.info.include? :offset
|
10
|
+
end
|
11
|
+
|
12
|
+
def test_sort
|
13
|
+
a = "This sentence mentions the TP53 gene and the CDK5R1 protein"
|
14
|
+
|
15
|
+
gene1 = "TP53"
|
16
|
+
gene1.extend Segment
|
17
|
+
gene1.offset = a.index gene1
|
18
|
+
|
19
|
+
gene2 = "CDK5R1"
|
20
|
+
gene2.extend Segment
|
21
|
+
gene2.offset = a.index gene2
|
22
|
+
|
23
|
+
assert_equal [gene1,gene2], Segment.sort([gene2,gene1])
|
24
|
+
end
|
25
|
+
|
26
|
+
def test_clean_sort
|
27
|
+
a = "This sentence mentions the TP53 gene and the CDK5R1 protein"
|
28
|
+
|
29
|
+
gene1 = "TP53"
|
30
|
+
gene1.extend Segment
|
31
|
+
gene1.offset = a.index gene1
|
32
|
+
|
33
|
+
gene2 = "CDK5R1"
|
34
|
+
gene2.extend Segment
|
35
|
+
gene2.offset = a.index gene2
|
36
|
+
|
37
|
+
gene3 = "TP53 gene"
|
38
|
+
gene3.extend Segment
|
39
|
+
gene3.offset = a.index gene3
|
40
|
+
|
41
|
+
assert_equal [gene3,gene2], Segment.clean_sort([gene2,gene1,gene3])
|
42
|
+
end
|
43
|
+
|
44
|
+
def test_split
|
45
|
+
a = "This sentence mentions the TP53 gene and the CDK5R1 protein"
|
46
|
+
|
47
|
+
gene1 = "TP53"
|
48
|
+
gene1.extend Segment
|
49
|
+
gene1.offset = a.index gene1
|
50
|
+
|
51
|
+
gene2 = "CDK5R1"
|
52
|
+
gene2.extend Segment
|
53
|
+
gene2.offset = a.index gene2
|
54
|
+
|
55
|
+
gene3 = "TP53 gene"
|
56
|
+
gene3.extend Segment
|
57
|
+
gene3.offset = a.index gene3
|
58
|
+
|
59
|
+
assert_equal ["This sentence mentions the ", gene3, " and the ", gene2, " protein"], Segment.split(a, [gene2,gene1,gene3])
|
60
|
+
end
|
61
|
+
|
62
|
+
|
63
|
+
def test_align
|
64
|
+
text =<<-EOF
|
65
|
+
Atypical teratoid/rhabdoid tumors (AT/RTs) are highly aggressive brain tumors of early childhood poorly responding to therapy.
|
66
|
+
EOF
|
67
|
+
|
68
|
+
parts = text.split(/\W/)
|
69
|
+
Segment.align(text, parts)
|
70
|
+
|
71
|
+
assert_equal "Atypical teratoid/".length, parts.select{|s| s == "rhabdoid"}.first.offset
|
72
|
+
end
|
73
|
+
|
74
|
+
def test_segment_index
|
75
|
+
a = "This sentence mentions the TP53 gene and the CDK5R1 protein"
|
76
|
+
|
77
|
+
gene1 = "TP53"
|
78
|
+
gene1.extend Segment
|
79
|
+
gene1.offset = a.index gene1
|
80
|
+
|
81
|
+
gene2 = "CDK5R1"
|
82
|
+
gene2.extend Segment
|
83
|
+
gene2.offset = a.index gene2
|
84
|
+
|
85
|
+
gene3 = "TP53 gene"
|
86
|
+
gene3.extend Segment
|
87
|
+
gene3.offset = a.index gene3
|
88
|
+
|
89
|
+
index = Segment.index([gene1, gene2, gene3])
|
90
|
+
assert_equal %w(CDK5R1), index[gene2.offset + 1]
|
91
|
+
|
92
|
+
TmpFile.with_file do |fwt|
|
93
|
+
index = Segment.index([gene1, gene2, gene3], fwt)
|
94
|
+
assert_equal %w(CDK5R1), index[gene2.offset + 1]
|
95
|
+
index = Segment.index([gene1, gene2, gene3], fwt)
|
96
|
+
assert_equal %w(CDK5R1), index[gene2.offset + 1]
|
97
|
+
end
|
98
|
+
end
|
99
|
+
|
100
|
+
end
|
101
|
+
|
@@ -30,7 +30,7 @@ C2;11;22;3 3;bb
|
|
30
30
|
|
31
31
|
TmpFile.with_file(lexicon) do |file|
|
32
32
|
|
33
|
-
index = TokenTrieNER.process({}, TSV.
|
33
|
+
index = TokenTrieNER.process({}, TSV.open(file, :flat, :sep => ';'))
|
34
34
|
|
35
35
|
assert_equal ['AA', 'aa', 'bb', '11', '22', '3'].sort, index.keys.sort
|
36
36
|
assert_equal [:END], index['aa'].keys
|
@@ -47,7 +47,7 @@ C2;11;22;3 3;bb
|
|
47
47
|
|
48
48
|
|
49
49
|
TmpFile.with_file(lexicon) do |file|
|
50
|
-
index = TokenTrieNER.process({}, TSV.
|
50
|
+
index = TokenTrieNER.process({}, TSV.open(file, :sep => ';', :type => :flat ))
|
51
51
|
|
52
52
|
assert TokenTrieNER.find(index, TokenTrieNER.tokenize('aa asdf').extend(TokenTrieNER::EnumeratedArray), false).first.collect{|c| c.code}.include? 'C1'
|
53
53
|
assert_equal %w(aa), TokenTrieNER.find(index, TokenTrieNER.tokenize('aa asdf').extend(TokenTrieNER::EnumeratedArray), false).last
|
@@ -71,9 +71,8 @@ C2;11;22;3 3;bb
|
|
71
71
|
EOF
|
72
72
|
|
73
73
|
TmpFile.with_file(lexicon) do |file|
|
74
|
-
index = TokenTrieNER.new("test", TSV.
|
74
|
+
index = TokenTrieNER.new("test", TSV.open(file, :flat, :sep => ';'))
|
75
75
|
|
76
|
-
index.match(' asdfa dsf asdf aa asdfasdf ')
|
77
76
|
assert index.match(' asdfa dsf asdf aa asdfasdf ').select{|m| m.code.include? 'C1'}.any?
|
78
77
|
end
|
79
78
|
end
|
@@ -81,18 +80,19 @@ C2;11;22;3 3;bb
|
|
81
80
|
def test_slack
|
82
81
|
lexicon =<<-EOF
|
83
82
|
C1;aa;AA;bb cc cc b
|
84
|
-
C2;11;22;3 3;bb
|
83
|
+
C2;11;22;3 3;bb;bbbb
|
85
84
|
EOF
|
86
85
|
|
87
86
|
TmpFile.with_file(lexicon) do |file|
|
88
87
|
index = TokenTrieNER.new({})
|
89
88
|
index.slack = Proc.new{|t| t =~ /^c*$/}
|
90
89
|
|
91
|
-
index.merge TSV.
|
90
|
+
index.merge TSV.open(file, :flat, :sep => ';')
|
92
91
|
|
93
92
|
assert index.match(' aaaaa 3 cc 3').select{|m| m.code.include? 'C2'}.any?
|
94
93
|
assert index.match(' bb cc b').select{|m| m.code.include? 'C1'}.any?
|
95
94
|
assert index.match(' bb b').select{|m| m.code.include? 'C1'}.any?
|
95
|
+
assert index.match(' BBBB b').select{|m| m.code.include? 'C2'}.any?
|
96
96
|
end
|
97
97
|
end
|
98
98
|
|
@@ -106,7 +106,7 @@ C2;11;22;3 3;bb
|
|
106
106
|
index = TokenTrieNER.new({})
|
107
107
|
index.slack = Proc.new{|t| t =~ /^c*$/}
|
108
108
|
|
109
|
-
index.merge TSV.
|
109
|
+
index.merge TSV.open(file, :flat, :sep => ';')
|
110
110
|
|
111
111
|
assert index.match(Token.tokenize('3 cc 3')).select{|m| m.code.include? 'C2'}.any?
|
112
112
|
end
|
@@ -126,9 +126,8 @@ C2;11;22;3 3;bb
|
|
126
126
|
EOF
|
127
127
|
|
128
128
|
TmpFile.with_file(lexicon) do |file|
|
129
|
-
index = TokenTrieNER.new("test", TSV.
|
129
|
+
index = TokenTrieNER.new("test", TSV.open(file, :flat, :sep => ';'), :persistence => true)
|
130
130
|
|
131
|
-
index.match(' asdfa dsf asdf aa asdfasdf ')
|
132
131
|
assert index.match(' asdfa dsf asdf aa asdfasdf ').select{|m| m.code.include? 'C1'}.any?
|
133
132
|
end
|
134
133
|
end
|
data/test/test_helper.rb
CHANGED
@@ -3,7 +3,7 @@ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
|
|
3
3
|
$LOAD_PATH.unshift(File.dirname(__FILE__))
|
4
4
|
|
5
5
|
require 'rbbt'
|
6
|
-
require 'rbbt/
|
6
|
+
require 'rbbt/persist'
|
7
7
|
require 'rbbt/util/tmpfile'
|
8
8
|
require 'rbbt/util/log'
|
9
9
|
require 'rbbt/corpus/document_repo'
|
@@ -15,15 +15,15 @@ class Test::Unit::TestCase
|
|
15
15
|
|
16
16
|
def setup
|
17
17
|
FileUtils.mkdir_p Rbbt.tmp.test.persistence.find(:user)
|
18
|
-
|
18
|
+
Persist.cachedir = Rbbt.tmp.test.persistence.find :user
|
19
19
|
end
|
20
20
|
|
21
21
|
def teardown
|
22
22
|
FileUtils.rm_rf Rbbt.tmp.test.find :user
|
23
|
-
|
24
|
-
|
25
|
-
DocumentRepo::
|
26
|
-
DocumentRepo::
|
23
|
+
Persist::TC_CONNECTIONS.values.each do |c| c.close end
|
24
|
+
Persist::TC_CONNECTIONS.clear
|
25
|
+
DocumentRepo::TC_CONNECTIONS.values.each do |c| c.close end
|
26
|
+
DocumentRepo::TC_CONNECTIONS.clear
|
27
27
|
end
|
28
28
|
|
29
29
|
end
|
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rbbt-text
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
5
|
-
prerelease:
|
4
|
+
hash: 7
|
5
|
+
prerelease: false
|
6
6
|
segments:
|
7
7
|
- 0
|
8
|
-
-
|
8
|
+
- 6
|
9
9
|
- 0
|
10
|
-
version: 0.
|
10
|
+
version: 0.6.0
|
11
11
|
platform: ruby
|
12
12
|
authors:
|
13
13
|
- Miguel Vazquez
|
@@ -15,7 +15,7 @@ autorequire:
|
|
15
15
|
bindir: bin
|
16
16
|
cert_chain: []
|
17
17
|
|
18
|
-
date: 2011-07
|
18
|
+
date: 2011-09-07 00:00:00 +02:00
|
19
19
|
default_executable: get_ppis.rb
|
20
20
|
dependencies:
|
21
21
|
- !ruby/object:Gem::Dependency
|
@@ -26,10 +26,12 @@ dependencies:
|
|
26
26
|
requirements:
|
27
27
|
- - ">="
|
28
28
|
- !ruby/object:Gem::Version
|
29
|
-
hash:
|
29
|
+
hash: 63
|
30
30
|
segments:
|
31
|
+
- 4
|
31
32
|
- 0
|
32
|
-
|
33
|
+
- 0
|
34
|
+
version: 4.0.0
|
33
35
|
type: :runtime
|
34
36
|
version_requirements: *id001
|
35
37
|
- !ruby/object:Gem::Dependency
|
@@ -74,6 +76,20 @@ dependencies:
|
|
74
76
|
version: "0"
|
75
77
|
type: :runtime
|
76
78
|
version_requirements: *id004
|
79
|
+
- !ruby/object:Gem::Dependency
|
80
|
+
name: rjb
|
81
|
+
prerelease: false
|
82
|
+
requirement: &id005 !ruby/object:Gem::Requirement
|
83
|
+
none: false
|
84
|
+
requirements:
|
85
|
+
- - ">="
|
86
|
+
- !ruby/object:Gem::Version
|
87
|
+
hash: 3
|
88
|
+
segments:
|
89
|
+
- 0
|
90
|
+
version: "0"
|
91
|
+
type: :runtime
|
92
|
+
version_requirements: *id005
|
77
93
|
description: "Text mining tools: named entity recognition and normalization, document classification, bag-of-words, dictionaries, etc"
|
78
94
|
email: miguel.vazquez@fdi.ucm.es
|
79
95
|
executables:
|
@@ -92,12 +108,6 @@ files:
|
|
92
108
|
- lib/rbbt/corpus/sources/pubmed.rb
|
93
109
|
- lib/rbbt/ner/NER.rb
|
94
110
|
- lib/rbbt/ner/abner.rb
|
95
|
-
- lib/rbbt/ner/annotations.rb
|
96
|
-
- lib/rbbt/ner/annotations/annotated.rb
|
97
|
-
- lib/rbbt/ner/annotations/named_entity.rb
|
98
|
-
- lib/rbbt/ner/annotations/relations.rb
|
99
|
-
- lib/rbbt/ner/annotations/token.rb
|
100
|
-
- lib/rbbt/ner/annotations/transformed.rb
|
101
111
|
- lib/rbbt/ner/banner.rb
|
102
112
|
- lib/rbbt/ner/chemical_tagger.rb
|
103
113
|
- lib/rbbt/ner/ngram_prefix_dictionary.rb
|
@@ -108,6 +118,12 @@ files:
|
|
108
118
|
- lib/rbbt/ner/rnorm.rb
|
109
119
|
- lib/rbbt/ner/rnorm/cue_index.rb
|
110
120
|
- lib/rbbt/ner/rnorm/tokens.rb
|
121
|
+
- lib/rbbt/ner/segment.rb
|
122
|
+
- lib/rbbt/ner/segment/named_entity.rb
|
123
|
+
- lib/rbbt/ner/segment/relationship.rb
|
124
|
+
- lib/rbbt/ner/segment/segmented.rb
|
125
|
+
- lib/rbbt/ner/segment/token.rb
|
126
|
+
- lib/rbbt/ner/segment/transformed.rb
|
111
127
|
- lib/rbbt/ner/token_trieNER.rb
|
112
128
|
- lib/rbbt/nlp/genia/sentence_splitter.rb
|
113
129
|
- lib/rbbt/nlp/nlp.rb
|
@@ -130,12 +146,13 @@ files:
|
|
130
146
|
- test/rbbt/ner/test_regexpNER.rb
|
131
147
|
- test/rbbt/ner/test_abner.rb
|
132
148
|
- test/rbbt/ner/test_banner.rb
|
133
|
-
- test/rbbt/ner/
|
134
|
-
- test/rbbt/ner/annotations/test_named_entity.rb
|
149
|
+
- test/rbbt/ner/test_NER.rb
|
135
150
|
- test/rbbt/ner/test_token_trieNER.rb
|
136
|
-
- test/rbbt/ner/test_annotations.rb
|
137
151
|
- test/rbbt/ner/test_patterns.rb
|
138
|
-
- test/rbbt/ner/
|
152
|
+
- test/rbbt/ner/segment/test_named_entity.rb
|
153
|
+
- test/rbbt/ner/segment/test_segmented.rb
|
154
|
+
- test/rbbt/ner/segment/test_transformed.rb
|
155
|
+
- test/rbbt/ner/test_segment.rb
|
139
156
|
- test/rbbt/ner/test_rnorm.rb
|
140
157
|
- test/rbbt/ner/test_oscar4.rb
|
141
158
|
- test/rbbt/ner/test_chemical_tagger.rb
|
@@ -174,7 +191,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
174
191
|
requirements: []
|
175
192
|
|
176
193
|
rubyforge_project:
|
177
|
-
rubygems_version: 1.
|
194
|
+
rubygems_version: 1.3.7
|
178
195
|
signing_key:
|
179
196
|
specification_version: 3
|
180
197
|
summary: Text mining tools for the Ruby Bioinformatics Toolkit (rbbt)
|
@@ -186,12 +203,13 @@ test_files:
|
|
186
203
|
- test/rbbt/ner/test_regexpNER.rb
|
187
204
|
- test/rbbt/ner/test_abner.rb
|
188
205
|
- test/rbbt/ner/test_banner.rb
|
189
|
-
- test/rbbt/ner/
|
190
|
-
- test/rbbt/ner/annotations/test_named_entity.rb
|
206
|
+
- test/rbbt/ner/test_NER.rb
|
191
207
|
- test/rbbt/ner/test_token_trieNER.rb
|
192
|
-
- test/rbbt/ner/test_annotations.rb
|
193
208
|
- test/rbbt/ner/test_patterns.rb
|
194
|
-
- test/rbbt/ner/
|
209
|
+
- test/rbbt/ner/segment/test_named_entity.rb
|
210
|
+
- test/rbbt/ner/segment/test_segmented.rb
|
211
|
+
- test/rbbt/ner/segment/test_transformed.rb
|
212
|
+
- test/rbbt/ner/test_segment.rb
|
195
213
|
- test/rbbt/ner/test_rnorm.rb
|
196
214
|
- test/rbbt/ner/test_oscar4.rb
|
197
215
|
- test/rbbt/ner/test_chemical_tagger.rb
|