rbbt-text 0.5.0 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/rbbt/corpus/corpus.rb +15 -6
- data/lib/rbbt/corpus/document.rb +100 -127
- data/lib/rbbt/corpus/document_repo.rb +72 -51
- data/lib/rbbt/ner/NER.rb +4 -4
- data/lib/rbbt/ner/abner.rb +5 -4
- data/lib/rbbt/ner/banner.rb +3 -3
- data/lib/rbbt/ner/chemical_tagger.rb +3 -3
- data/lib/rbbt/ner/ngram_prefix_dictionary.rb +45 -10
- data/lib/rbbt/ner/oscar3.rb +3 -3
- data/lib/rbbt/ner/oscar4.rb +3 -3
- data/lib/rbbt/ner/patterns.rb +15 -13
- data/lib/rbbt/ner/regexpNER.rb +3 -2
- data/lib/rbbt/ner/rnorm.rb +2 -2
- data/lib/rbbt/ner/rnorm/cue_index.rb +2 -2
- data/lib/rbbt/ner/{annotations.rb → segment.rb} +161 -109
- data/lib/rbbt/ner/{annotations → segment}/named_entity.rb +3 -11
- data/lib/rbbt/ner/segment/relationship.rb +20 -0
- data/lib/rbbt/ner/segment/segmented.rb +13 -0
- data/lib/rbbt/ner/segment/token.rb +24 -0
- data/lib/rbbt/ner/{annotations → segment}/transformed.rb +10 -10
- data/lib/rbbt/ner/token_trieNER.rb +30 -22
- data/lib/rbbt/nlp/genia/sentence_splitter.rb +2 -1
- data/lib/rbbt/nlp/nlp.rb +23 -37
- data/test/rbbt/corpus/test_document.rb +39 -37
- data/test/rbbt/ner/segment/test_named_entity.rb +29 -0
- data/test/rbbt/ner/segment/test_segmented.rb +23 -0
- data/test/rbbt/ner/{annotations → segment}/test_transformed.rb +6 -6
- data/test/rbbt/ner/test_ngram_prefix_dictionary.rb +15 -1
- data/test/rbbt/ner/test_patterns.rb +11 -12
- data/test/rbbt/ner/test_regexpNER.rb +5 -4
- data/test/rbbt/ner/test_segment.rb +101 -0
- data/test/rbbt/ner/test_token_trieNER.rb +8 -9
- data/test/test_helper.rb +6 -6
- metadata +40 -22
- data/lib/rbbt/ner/annotations/annotated.rb +0 -15
- data/lib/rbbt/ner/annotations/relations.rb +0 -25
- data/lib/rbbt/ner/annotations/token.rb +0 -28
- data/test/rbbt/ner/annotations/test_named_entity.rb +0 -14
- data/test/rbbt/ner/test_annotations.rb +0 -70
@@ -0,0 +1,29 @@
|
|
1
|
+
require File.join(File.expand_path(File.dirname(__FILE__)), '../../..', 'test_helper.rb')
|
2
|
+
require 'rbbt/ner/segment'
|
3
|
+
require 'rbbt/ner/segment/named_entity'
|
4
|
+
|
5
|
+
class TestClass < Test::Unit::TestCase
|
6
|
+
def test_info
|
7
|
+
a = "test"
|
8
|
+
NamedEntity.setup a
|
9
|
+
assert(! a.info.keys.include?(:code))
|
10
|
+
a.code = 10
|
11
|
+
a.offset = 100
|
12
|
+
assert a.info.include? :code
|
13
|
+
assert a.info.include? :offset
|
14
|
+
end
|
15
|
+
|
16
|
+
def test_all_args
|
17
|
+
a = "test"
|
18
|
+
NamedEntity.setup a, 10, "TYPE", "CODE", "SCORE"
|
19
|
+
assert_equal 10, a.offset
|
20
|
+
end
|
21
|
+
|
22
|
+
def test_tsv
|
23
|
+
a = "test"
|
24
|
+
NamedEntity.setup a, 10, "TYPE", "CODE", "SCORE"
|
25
|
+
assert Segment.tsv([a]).fields.include? "code"
|
26
|
+
assert Segment.tsv([a], nil).fields.include? "code"
|
27
|
+
assert Segment.tsv([a], "literal").fields.include? "code"
|
28
|
+
end
|
29
|
+
end
|
@@ -0,0 +1,23 @@
|
|
1
|
+
require File.join(File.expand_path(File.dirname(__FILE__)), '../../..', 'test_helper.rb')
|
2
|
+
require 'rbbt/ner/segment/segmented'
|
3
|
+
|
4
|
+
class TestClass < Test::Unit::TestCase
|
5
|
+
def test_split
|
6
|
+
a = "This sentence mentions the TP53 gene and the CDK5R1 protein"
|
7
|
+
|
8
|
+
gene1 = "TP53"
|
9
|
+
gene1.extend Segment
|
10
|
+
gene1.offset = a.index gene1
|
11
|
+
|
12
|
+
gene2 = "CDK5R1"
|
13
|
+
gene2.extend Segment
|
14
|
+
gene2.offset = a.index gene2
|
15
|
+
|
16
|
+
gene3 = "TP53 gene"
|
17
|
+
gene3.extend Segment
|
18
|
+
gene3.offset = a.index gene3
|
19
|
+
|
20
|
+
Segmented.setup(a, [gene2, gene1, gene3])
|
21
|
+
assert_equal ["This sentence mentions the ", gene3, " and the ", gene2, " protein"], a.split_segments
|
22
|
+
end
|
23
|
+
end
|
@@ -1,6 +1,6 @@
|
|
1
1
|
require File.join(File.expand_path(File.dirname(__FILE__)), '../../..', 'test_helper.rb')
|
2
|
-
require 'rbbt/ner/
|
3
|
-
require 'rbbt/ner/
|
2
|
+
require 'rbbt/ner/segment/transformed'
|
3
|
+
require 'rbbt/ner/segment/named_entity'
|
4
4
|
|
5
5
|
class TestClass < Test::Unit::TestCase
|
6
6
|
def test_transform
|
@@ -8,11 +8,11 @@ class TestClass < Test::Unit::TestCase
|
|
8
8
|
original = a.dup
|
9
9
|
|
10
10
|
gene1 = "TP53"
|
11
|
-
gene1.extend
|
11
|
+
gene1.extend Segment
|
12
12
|
gene1.offset = a.index gene1
|
13
13
|
|
14
14
|
gene2 = "CDK5"
|
15
|
-
gene2.extend
|
15
|
+
gene2.extend Segment
|
16
16
|
gene2.offset = a.index gene2
|
17
17
|
|
18
18
|
assert_equal gene1, a[gene1.range]
|
@@ -30,7 +30,7 @@ class TestClass < Test::Unit::TestCase
|
|
30
30
|
|
31
31
|
|
32
32
|
gene3 = "GN gene"
|
33
|
-
gene3.extend
|
33
|
+
gene3.extend Segment
|
34
34
|
gene3.offset = a.index gene3
|
35
35
|
|
36
36
|
assert_equal gene3, a[gene3.range]
|
@@ -108,7 +108,7 @@ class TestClass < Test::Unit::TestCase
|
|
108
108
|
|
109
109
|
def test_html_with_offset
|
110
110
|
a = "This sentence mentions the TP53 gene and the CDK5R1 protein"
|
111
|
-
Segment.
|
111
|
+
Segment.setup(a, 10)
|
112
112
|
|
113
113
|
gene1 = "TP53"
|
114
114
|
gene1.extend NamedEntity
|
@@ -10,7 +10,21 @@ C2;11;22;3 3;bb
|
|
10
10
|
EOF
|
11
11
|
|
12
12
|
TmpFile.with_file(lexicon) do |file|
|
13
|
-
index = NGramPrefixDictionary.new(TSV.
|
13
|
+
index = NGramPrefixDictionary.new(TSV.open(file, :flat, :sep => ';'), "test")
|
14
|
+
|
15
|
+
matches = index.match(' asdfa dsf asdf aa asdfasdf ')
|
16
|
+
assert matches.select{|m| m.code.include? 'C1'}.any?
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
def test_stream
|
21
|
+
lexicon =<<-EOF
|
22
|
+
C1;aa;AA;bb b
|
23
|
+
C2;11;22;3 3;bb
|
24
|
+
EOF
|
25
|
+
|
26
|
+
TmpFile.with_file(lexicon.gsub(/;/,"\t")) do |file|
|
27
|
+
index = NGramPrefixDictionary.new(file, "test")
|
14
28
|
|
15
29
|
matches = index.match(' asdfa dsf asdf aa asdfasdf ')
|
16
30
|
assert matches.select{|m| m.code.include? 'C1'}.any?
|
@@ -6,33 +6,32 @@ class TestPatternRelExt < Test::Unit::TestCase
|
|
6
6
|
text = "Experiments have shown that TP53 interacts with CDK5 under certain conditions"
|
7
7
|
|
8
8
|
gene1 = "TP53"
|
9
|
-
NamedEntity.
|
9
|
+
NamedEntity.setup(gene1, text.index(gene1), "Gene")
|
10
10
|
|
11
11
|
gene2 = "CDK5"
|
12
|
-
NamedEntity.
|
12
|
+
NamedEntity.setup(gene2, text.index(gene2), "Gene")
|
13
13
|
|
14
14
|
interaction = "interacts"
|
15
|
-
NamedEntity.
|
15
|
+
NamedEntity.setup(interaction, text.index(interaction), "Interaction")
|
16
16
|
|
17
|
-
|
17
|
+
Segmented.setup(text, [gene1, gene2, interaction])
|
18
18
|
|
19
19
|
assert_equal "TP53 interacts with CDK5", PatternRelExt.simple_pattern(text, "GENE INTERACTION with GENE").first
|
20
|
-
|
21
20
|
end
|
22
21
|
|
23
22
|
def test_chunk_pattern
|
24
23
|
text = "Experiments have shown that TP53 found in cultivated cells interacts with CDK5 under certain conditions"
|
25
24
|
|
26
25
|
gene1 = "TP53"
|
27
|
-
NamedEntity.
|
26
|
+
NamedEntity.setup(gene1, text.index(gene1), "Gene")
|
28
27
|
|
29
28
|
gene2 = "CDK5"
|
30
|
-
NamedEntity.
|
29
|
+
NamedEntity.setup(gene2, text.index(gene2), "Gene")
|
31
30
|
|
32
31
|
interaction = "interacts"
|
33
|
-
NamedEntity.
|
32
|
+
NamedEntity.setup(interaction, text.index(interaction), "Interaction")
|
34
33
|
|
35
|
-
|
34
|
+
Segmented.setup(text, {:entities => [gene1, gene2, interaction]})
|
36
35
|
|
37
36
|
assert_equal "TP53 found in cultivated cells interacts with CDK5",
|
38
37
|
PatternRelExt.new("NP[entity:Gene] VP[stem:interacts] with NP[entity:Gene]").match_sentences([text]).first.first
|
@@ -45,12 +44,12 @@ class TestPatternRelExt < Test::Unit::TestCase
|
|
45
44
|
text = "There is a concern with the use of thiazolidinediones in patients with an increased risk of colon cancer (e.g., familial colon polyposis)."
|
46
45
|
|
47
46
|
drug = "thiazolidinediones"
|
48
|
-
NamedEntity.
|
47
|
+
NamedEntity.setup(drug, text.index(drug), "Chemical Mention")
|
49
48
|
|
50
49
|
disease = "colon cancer"
|
51
|
-
NamedEntity.
|
50
|
+
NamedEntity.setup(disease, text.index(disease), "disease")
|
52
51
|
|
53
|
-
|
52
|
+
Segmented.setup(text, {:entitites => [drug, disease]})
|
54
53
|
|
55
54
|
assert_equal "thiazolidinediones in patients with an increased risk of colon cancer",
|
56
55
|
PatternRelExt.new("NP[entity:Chemical Mention] NP[stem:risk] NP[entity:disease]").match_sentences([text]).first.first
|
@@ -55,12 +55,13 @@ class TestRegExpNER < Test::Unit::TestCase
|
|
55
55
|
assert_equal "In this sentence I should find ".length, matches.select{|m| m.type == :this}[1].offset
|
56
56
|
assert_equal :this, matches.select{|m| m.type == :this}[0].type
|
57
57
|
|
58
|
-
|
58
|
+
Segmented.setup(sentence)
|
59
59
|
ner_this = RegExpNER.new({:this => /this/})
|
60
60
|
ner_that = RegExpNER.new({:that => /that/})
|
61
|
-
sentence.
|
62
|
-
sentence.
|
63
|
-
|
61
|
+
sentence.segments ||= []
|
62
|
+
sentence.segments += ner_this.entities(sentence)
|
63
|
+
sentence.segments += ner_that.entities(sentence)
|
64
|
+
matches = sentence.segments
|
64
65
|
|
65
66
|
assert_equal ["this", "this", "that"].sort, matches.sort
|
66
67
|
assert_equal "In ".length, matches.select{|m| m.type == :this}[0].offset
|
@@ -0,0 +1,101 @@
|
|
1
|
+
require File.join(File.expand_path(File.dirname(__FILE__)), '../..', 'test_helper.rb')
|
2
|
+
require 'rbbt/ner/segment'
|
3
|
+
|
4
|
+
class TestClass < Test::Unit::TestCase
|
5
|
+
def test_info
|
6
|
+
a = "test"
|
7
|
+
a.extend Segment
|
8
|
+
a.offset = 10
|
9
|
+
assert a.info.include? :offset
|
10
|
+
end
|
11
|
+
|
12
|
+
def test_sort
|
13
|
+
a = "This sentence mentions the TP53 gene and the CDK5R1 protein"
|
14
|
+
|
15
|
+
gene1 = "TP53"
|
16
|
+
gene1.extend Segment
|
17
|
+
gene1.offset = a.index gene1
|
18
|
+
|
19
|
+
gene2 = "CDK5R1"
|
20
|
+
gene2.extend Segment
|
21
|
+
gene2.offset = a.index gene2
|
22
|
+
|
23
|
+
assert_equal [gene1,gene2], Segment.sort([gene2,gene1])
|
24
|
+
end
|
25
|
+
|
26
|
+
def test_clean_sort
|
27
|
+
a = "This sentence mentions the TP53 gene and the CDK5R1 protein"
|
28
|
+
|
29
|
+
gene1 = "TP53"
|
30
|
+
gene1.extend Segment
|
31
|
+
gene1.offset = a.index gene1
|
32
|
+
|
33
|
+
gene2 = "CDK5R1"
|
34
|
+
gene2.extend Segment
|
35
|
+
gene2.offset = a.index gene2
|
36
|
+
|
37
|
+
gene3 = "TP53 gene"
|
38
|
+
gene3.extend Segment
|
39
|
+
gene3.offset = a.index gene3
|
40
|
+
|
41
|
+
assert_equal [gene3,gene2], Segment.clean_sort([gene2,gene1,gene3])
|
42
|
+
end
|
43
|
+
|
44
|
+
def test_split
|
45
|
+
a = "This sentence mentions the TP53 gene and the CDK5R1 protein"
|
46
|
+
|
47
|
+
gene1 = "TP53"
|
48
|
+
gene1.extend Segment
|
49
|
+
gene1.offset = a.index gene1
|
50
|
+
|
51
|
+
gene2 = "CDK5R1"
|
52
|
+
gene2.extend Segment
|
53
|
+
gene2.offset = a.index gene2
|
54
|
+
|
55
|
+
gene3 = "TP53 gene"
|
56
|
+
gene3.extend Segment
|
57
|
+
gene3.offset = a.index gene3
|
58
|
+
|
59
|
+
assert_equal ["This sentence mentions the ", gene3, " and the ", gene2, " protein"], Segment.split(a, [gene2,gene1,gene3])
|
60
|
+
end
|
61
|
+
|
62
|
+
|
63
|
+
def test_align
|
64
|
+
text =<<-EOF
|
65
|
+
Atypical teratoid/rhabdoid tumors (AT/RTs) are highly aggressive brain tumors of early childhood poorly responding to therapy.
|
66
|
+
EOF
|
67
|
+
|
68
|
+
parts = text.split(/\W/)
|
69
|
+
Segment.align(text, parts)
|
70
|
+
|
71
|
+
assert_equal "Atypical teratoid/".length, parts.select{|s| s == "rhabdoid"}.first.offset
|
72
|
+
end
|
73
|
+
|
74
|
+
def test_segment_index
|
75
|
+
a = "This sentence mentions the TP53 gene and the CDK5R1 protein"
|
76
|
+
|
77
|
+
gene1 = "TP53"
|
78
|
+
gene1.extend Segment
|
79
|
+
gene1.offset = a.index gene1
|
80
|
+
|
81
|
+
gene2 = "CDK5R1"
|
82
|
+
gene2.extend Segment
|
83
|
+
gene2.offset = a.index gene2
|
84
|
+
|
85
|
+
gene3 = "TP53 gene"
|
86
|
+
gene3.extend Segment
|
87
|
+
gene3.offset = a.index gene3
|
88
|
+
|
89
|
+
index = Segment.index([gene1, gene2, gene3])
|
90
|
+
assert_equal %w(CDK5R1), index[gene2.offset + 1]
|
91
|
+
|
92
|
+
TmpFile.with_file do |fwt|
|
93
|
+
index = Segment.index([gene1, gene2, gene3], fwt)
|
94
|
+
assert_equal %w(CDK5R1), index[gene2.offset + 1]
|
95
|
+
index = Segment.index([gene1, gene2, gene3], fwt)
|
96
|
+
assert_equal %w(CDK5R1), index[gene2.offset + 1]
|
97
|
+
end
|
98
|
+
end
|
99
|
+
|
100
|
+
end
|
101
|
+
|
@@ -30,7 +30,7 @@ C2;11;22;3 3;bb
|
|
30
30
|
|
31
31
|
TmpFile.with_file(lexicon) do |file|
|
32
32
|
|
33
|
-
index = TokenTrieNER.process({}, TSV.
|
33
|
+
index = TokenTrieNER.process({}, TSV.open(file, :flat, :sep => ';'))
|
34
34
|
|
35
35
|
assert_equal ['AA', 'aa', 'bb', '11', '22', '3'].sort, index.keys.sort
|
36
36
|
assert_equal [:END], index['aa'].keys
|
@@ -47,7 +47,7 @@ C2;11;22;3 3;bb
|
|
47
47
|
|
48
48
|
|
49
49
|
TmpFile.with_file(lexicon) do |file|
|
50
|
-
index = TokenTrieNER.process({}, TSV.
|
50
|
+
index = TokenTrieNER.process({}, TSV.open(file, :sep => ';', :type => :flat ))
|
51
51
|
|
52
52
|
assert TokenTrieNER.find(index, TokenTrieNER.tokenize('aa asdf').extend(TokenTrieNER::EnumeratedArray), false).first.collect{|c| c.code}.include? 'C1'
|
53
53
|
assert_equal %w(aa), TokenTrieNER.find(index, TokenTrieNER.tokenize('aa asdf').extend(TokenTrieNER::EnumeratedArray), false).last
|
@@ -71,9 +71,8 @@ C2;11;22;3 3;bb
|
|
71
71
|
EOF
|
72
72
|
|
73
73
|
TmpFile.with_file(lexicon) do |file|
|
74
|
-
index = TokenTrieNER.new("test", TSV.
|
74
|
+
index = TokenTrieNER.new("test", TSV.open(file, :flat, :sep => ';'))
|
75
75
|
|
76
|
-
index.match(' asdfa dsf asdf aa asdfasdf ')
|
77
76
|
assert index.match(' asdfa dsf asdf aa asdfasdf ').select{|m| m.code.include? 'C1'}.any?
|
78
77
|
end
|
79
78
|
end
|
@@ -81,18 +80,19 @@ C2;11;22;3 3;bb
|
|
81
80
|
def test_slack
|
82
81
|
lexicon =<<-EOF
|
83
82
|
C1;aa;AA;bb cc cc b
|
84
|
-
C2;11;22;3 3;bb
|
83
|
+
C2;11;22;3 3;bb;bbbb
|
85
84
|
EOF
|
86
85
|
|
87
86
|
TmpFile.with_file(lexicon) do |file|
|
88
87
|
index = TokenTrieNER.new({})
|
89
88
|
index.slack = Proc.new{|t| t =~ /^c*$/}
|
90
89
|
|
91
|
-
index.merge TSV.
|
90
|
+
index.merge TSV.open(file, :flat, :sep => ';')
|
92
91
|
|
93
92
|
assert index.match(' aaaaa 3 cc 3').select{|m| m.code.include? 'C2'}.any?
|
94
93
|
assert index.match(' bb cc b').select{|m| m.code.include? 'C1'}.any?
|
95
94
|
assert index.match(' bb b').select{|m| m.code.include? 'C1'}.any?
|
95
|
+
assert index.match(' BBBB b').select{|m| m.code.include? 'C2'}.any?
|
96
96
|
end
|
97
97
|
end
|
98
98
|
|
@@ -106,7 +106,7 @@ C2;11;22;3 3;bb
|
|
106
106
|
index = TokenTrieNER.new({})
|
107
107
|
index.slack = Proc.new{|t| t =~ /^c*$/}
|
108
108
|
|
109
|
-
index.merge TSV.
|
109
|
+
index.merge TSV.open(file, :flat, :sep => ';')
|
110
110
|
|
111
111
|
assert index.match(Token.tokenize('3 cc 3')).select{|m| m.code.include? 'C2'}.any?
|
112
112
|
end
|
@@ -126,9 +126,8 @@ C2;11;22;3 3;bb
|
|
126
126
|
EOF
|
127
127
|
|
128
128
|
TmpFile.with_file(lexicon) do |file|
|
129
|
-
index = TokenTrieNER.new("test", TSV.
|
129
|
+
index = TokenTrieNER.new("test", TSV.open(file, :flat, :sep => ';'), :persistence => true)
|
130
130
|
|
131
|
-
index.match(' asdfa dsf asdf aa asdfasdf ')
|
132
131
|
assert index.match(' asdfa dsf asdf aa asdfasdf ').select{|m| m.code.include? 'C1'}.any?
|
133
132
|
end
|
134
133
|
end
|
data/test/test_helper.rb
CHANGED
@@ -3,7 +3,7 @@ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
|
|
3
3
|
$LOAD_PATH.unshift(File.dirname(__FILE__))
|
4
4
|
|
5
5
|
require 'rbbt'
|
6
|
-
require 'rbbt/
|
6
|
+
require 'rbbt/persist'
|
7
7
|
require 'rbbt/util/tmpfile'
|
8
8
|
require 'rbbt/util/log'
|
9
9
|
require 'rbbt/corpus/document_repo'
|
@@ -15,15 +15,15 @@ class Test::Unit::TestCase
|
|
15
15
|
|
16
16
|
def setup
|
17
17
|
FileUtils.mkdir_p Rbbt.tmp.test.persistence.find(:user)
|
18
|
-
|
18
|
+
Persist.cachedir = Rbbt.tmp.test.persistence.find :user
|
19
19
|
end
|
20
20
|
|
21
21
|
def teardown
|
22
22
|
FileUtils.rm_rf Rbbt.tmp.test.find :user
|
23
|
-
|
24
|
-
|
25
|
-
DocumentRepo::
|
26
|
-
DocumentRepo::
|
23
|
+
Persist::TC_CONNECTIONS.values.each do |c| c.close end
|
24
|
+
Persist::TC_CONNECTIONS.clear
|
25
|
+
DocumentRepo::TC_CONNECTIONS.values.each do |c| c.close end
|
26
|
+
DocumentRepo::TC_CONNECTIONS.clear
|
27
27
|
end
|
28
28
|
|
29
29
|
end
|
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rbbt-text
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
5
|
-
prerelease:
|
4
|
+
hash: 7
|
5
|
+
prerelease: false
|
6
6
|
segments:
|
7
7
|
- 0
|
8
|
-
-
|
8
|
+
- 6
|
9
9
|
- 0
|
10
|
-
version: 0.
|
10
|
+
version: 0.6.0
|
11
11
|
platform: ruby
|
12
12
|
authors:
|
13
13
|
- Miguel Vazquez
|
@@ -15,7 +15,7 @@ autorequire:
|
|
15
15
|
bindir: bin
|
16
16
|
cert_chain: []
|
17
17
|
|
18
|
-
date: 2011-07
|
18
|
+
date: 2011-09-07 00:00:00 +02:00
|
19
19
|
default_executable: get_ppis.rb
|
20
20
|
dependencies:
|
21
21
|
- !ruby/object:Gem::Dependency
|
@@ -26,10 +26,12 @@ dependencies:
|
|
26
26
|
requirements:
|
27
27
|
- - ">="
|
28
28
|
- !ruby/object:Gem::Version
|
29
|
-
hash:
|
29
|
+
hash: 63
|
30
30
|
segments:
|
31
|
+
- 4
|
31
32
|
- 0
|
32
|
-
|
33
|
+
- 0
|
34
|
+
version: 4.0.0
|
33
35
|
type: :runtime
|
34
36
|
version_requirements: *id001
|
35
37
|
- !ruby/object:Gem::Dependency
|
@@ -74,6 +76,20 @@ dependencies:
|
|
74
76
|
version: "0"
|
75
77
|
type: :runtime
|
76
78
|
version_requirements: *id004
|
79
|
+
- !ruby/object:Gem::Dependency
|
80
|
+
name: rjb
|
81
|
+
prerelease: false
|
82
|
+
requirement: &id005 !ruby/object:Gem::Requirement
|
83
|
+
none: false
|
84
|
+
requirements:
|
85
|
+
- - ">="
|
86
|
+
- !ruby/object:Gem::Version
|
87
|
+
hash: 3
|
88
|
+
segments:
|
89
|
+
- 0
|
90
|
+
version: "0"
|
91
|
+
type: :runtime
|
92
|
+
version_requirements: *id005
|
77
93
|
description: "Text mining tools: named entity recognition and normalization, document classification, bag-of-words, dictionaries, etc"
|
78
94
|
email: miguel.vazquez@fdi.ucm.es
|
79
95
|
executables:
|
@@ -92,12 +108,6 @@ files:
|
|
92
108
|
- lib/rbbt/corpus/sources/pubmed.rb
|
93
109
|
- lib/rbbt/ner/NER.rb
|
94
110
|
- lib/rbbt/ner/abner.rb
|
95
|
-
- lib/rbbt/ner/annotations.rb
|
96
|
-
- lib/rbbt/ner/annotations/annotated.rb
|
97
|
-
- lib/rbbt/ner/annotations/named_entity.rb
|
98
|
-
- lib/rbbt/ner/annotations/relations.rb
|
99
|
-
- lib/rbbt/ner/annotations/token.rb
|
100
|
-
- lib/rbbt/ner/annotations/transformed.rb
|
101
111
|
- lib/rbbt/ner/banner.rb
|
102
112
|
- lib/rbbt/ner/chemical_tagger.rb
|
103
113
|
- lib/rbbt/ner/ngram_prefix_dictionary.rb
|
@@ -108,6 +118,12 @@ files:
|
|
108
118
|
- lib/rbbt/ner/rnorm.rb
|
109
119
|
- lib/rbbt/ner/rnorm/cue_index.rb
|
110
120
|
- lib/rbbt/ner/rnorm/tokens.rb
|
121
|
+
- lib/rbbt/ner/segment.rb
|
122
|
+
- lib/rbbt/ner/segment/named_entity.rb
|
123
|
+
- lib/rbbt/ner/segment/relationship.rb
|
124
|
+
- lib/rbbt/ner/segment/segmented.rb
|
125
|
+
- lib/rbbt/ner/segment/token.rb
|
126
|
+
- lib/rbbt/ner/segment/transformed.rb
|
111
127
|
- lib/rbbt/ner/token_trieNER.rb
|
112
128
|
- lib/rbbt/nlp/genia/sentence_splitter.rb
|
113
129
|
- lib/rbbt/nlp/nlp.rb
|
@@ -130,12 +146,13 @@ files:
|
|
130
146
|
- test/rbbt/ner/test_regexpNER.rb
|
131
147
|
- test/rbbt/ner/test_abner.rb
|
132
148
|
- test/rbbt/ner/test_banner.rb
|
133
|
-
- test/rbbt/ner/
|
134
|
-
- test/rbbt/ner/annotations/test_named_entity.rb
|
149
|
+
- test/rbbt/ner/test_NER.rb
|
135
150
|
- test/rbbt/ner/test_token_trieNER.rb
|
136
|
-
- test/rbbt/ner/test_annotations.rb
|
137
151
|
- test/rbbt/ner/test_patterns.rb
|
138
|
-
- test/rbbt/ner/
|
152
|
+
- test/rbbt/ner/segment/test_named_entity.rb
|
153
|
+
- test/rbbt/ner/segment/test_segmented.rb
|
154
|
+
- test/rbbt/ner/segment/test_transformed.rb
|
155
|
+
- test/rbbt/ner/test_segment.rb
|
139
156
|
- test/rbbt/ner/test_rnorm.rb
|
140
157
|
- test/rbbt/ner/test_oscar4.rb
|
141
158
|
- test/rbbt/ner/test_chemical_tagger.rb
|
@@ -174,7 +191,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
174
191
|
requirements: []
|
175
192
|
|
176
193
|
rubyforge_project:
|
177
|
-
rubygems_version: 1.
|
194
|
+
rubygems_version: 1.3.7
|
178
195
|
signing_key:
|
179
196
|
specification_version: 3
|
180
197
|
summary: Text mining tools for the Ruby Bioinformatics Toolkit (rbbt)
|
@@ -186,12 +203,13 @@ test_files:
|
|
186
203
|
- test/rbbt/ner/test_regexpNER.rb
|
187
204
|
- test/rbbt/ner/test_abner.rb
|
188
205
|
- test/rbbt/ner/test_banner.rb
|
189
|
-
- test/rbbt/ner/
|
190
|
-
- test/rbbt/ner/annotations/test_named_entity.rb
|
206
|
+
- test/rbbt/ner/test_NER.rb
|
191
207
|
- test/rbbt/ner/test_token_trieNER.rb
|
192
|
-
- test/rbbt/ner/test_annotations.rb
|
193
208
|
- test/rbbt/ner/test_patterns.rb
|
194
|
-
- test/rbbt/ner/
|
209
|
+
- test/rbbt/ner/segment/test_named_entity.rb
|
210
|
+
- test/rbbt/ner/segment/test_segmented.rb
|
211
|
+
- test/rbbt/ner/segment/test_transformed.rb
|
212
|
+
- test/rbbt/ner/test_segment.rb
|
195
213
|
- test/rbbt/ner/test_rnorm.rb
|
196
214
|
- test/rbbt/ner/test_oscar4.rb
|
197
215
|
- test/rbbt/ner/test_chemical_tagger.rb
|