rbbt-text 0.2.1 → 0.5.0
Sign up to get free protection for your applications and to get access to all the features.
- data/bin/get_ppis.rb +52 -0
- data/lib/rbbt/bow/dictionary.rb +9 -9
- data/lib/rbbt/bow/misc.rb +86 -2
- data/lib/rbbt/corpus/corpus.rb +55 -0
- data/lib/rbbt/corpus/document.rb +289 -0
- data/lib/rbbt/corpus/document_repo.rb +115 -0
- data/lib/rbbt/corpus/sources/pubmed.rb +26 -0
- data/lib/rbbt/ner/NER.rb +7 -5
- data/lib/rbbt/ner/abner.rb +13 -2
- data/lib/rbbt/ner/annotations.rb +182 -51
- data/lib/rbbt/ner/annotations/annotated.rb +15 -0
- data/lib/rbbt/ner/annotations/named_entity.rb +37 -0
- data/lib/rbbt/ner/annotations/relations.rb +25 -0
- data/lib/rbbt/ner/annotations/token.rb +28 -0
- data/lib/rbbt/ner/annotations/transformed.rb +170 -0
- data/lib/rbbt/ner/banner.rb +8 -5
- data/lib/rbbt/ner/chemical_tagger.rb +34 -0
- data/lib/rbbt/ner/ngram_prefix_dictionary.rb +136 -0
- data/lib/rbbt/ner/oscar3.rb +1 -1
- data/lib/rbbt/ner/oscar4.rb +41 -0
- data/lib/rbbt/ner/patterns.rb +132 -0
- data/lib/rbbt/ner/rnorm.rb +141 -0
- data/lib/rbbt/ner/rnorm/cue_index.rb +80 -0
- data/lib/rbbt/ner/rnorm/tokens.rb +218 -0
- data/lib/rbbt/ner/token_trieNER.rb +185 -51
- data/lib/rbbt/nlp/genia/sentence_splitter.rb +214 -0
- data/lib/rbbt/nlp/nlp.rb +235 -0
- data/share/install/software/ABNER +0 -4
- data/share/install/software/ChemicalTagger +81 -0
- data/share/install/software/Gdep +115 -0
- data/share/install/software/Geniass +118 -0
- data/share/install/software/OSCAR4 +16 -0
- data/share/install/software/StanfordParser +15 -0
- data/share/patterns/drug_induce_disease +22 -0
- data/share/rnorm/cue_default +10 -0
- data/share/rnorm/tokens_default +86 -0
- data/share/{stopwords → wordlists/stopwords} +0 -0
- data/test/rbbt/bow/test_bow.rb +1 -1
- data/test/rbbt/bow/test_dictionary.rb +1 -1
- data/test/rbbt/bow/test_misc.rb +1 -1
- data/test/rbbt/corpus/test_corpus.rb +99 -0
- data/test/rbbt/corpus/test_document.rb +222 -0
- data/test/rbbt/ner/annotations/test_named_entity.rb +14 -0
- data/test/rbbt/ner/annotations/test_transformed.rb +175 -0
- data/test/rbbt/ner/test_abner.rb +1 -1
- data/test/rbbt/ner/test_annotations.rb +64 -2
- data/test/rbbt/ner/test_banner.rb +1 -1
- data/test/rbbt/ner/test_chemical_tagger.rb +56 -0
- data/test/rbbt/ner/test_ngram_prefix_dictionary.rb +20 -0
- data/test/rbbt/ner/{test_oscar3.rb → test_oscar4.rb} +12 -13
- data/test/rbbt/ner/test_patterns.rb +66 -0
- data/test/rbbt/ner/test_regexpNER.rb +1 -1
- data/test/rbbt/ner/test_rnorm.rb +47 -0
- data/test/rbbt/ner/test_token_trieNER.rb +60 -35
- data/test/rbbt/nlp/test_nlp.rb +88 -0
- data/test/test_helper.rb +20 -0
- metadata +93 -20
@@ -0,0 +1,47 @@
|
|
1
|
+
require File.join(File.expand_path(File.dirname(__FILE__)), '../../test_helper.rb')
|
2
|
+
require 'rbbt/ner/rnorm'
|
3
|
+
require 'rbbt/util/open'
|
4
|
+
require 'rbbt/util/tmpfile'
|
5
|
+
require 'test/unit'
|
6
|
+
|
7
|
+
class TestRNORM < Test::Unit::TestCase
|
8
|
+
|
9
|
+
def setup
|
10
|
+
tmp = TmpFile.tmp_file("test-rnorm-")
|
11
|
+
lexicon =<<-EOT
|
12
|
+
S000000029 YAL031C GIP4 FUN21
|
13
|
+
S000000030 YAL032C PRP45 FUN20
|
14
|
+
S000000031 YAL033W POP5 FUN53
|
15
|
+
S000000374 YBR170C NPL4 HRD4
|
16
|
+
S000000375 GENE1 BBB CCC
|
17
|
+
S000000376 AAA GENE1 DDD
|
18
|
+
EOT
|
19
|
+
|
20
|
+
Open.write(tmp, lexicon)
|
21
|
+
|
22
|
+
@norm = Normalizer.new(tmp)
|
23
|
+
FileUtils.rm tmp
|
24
|
+
end
|
25
|
+
|
26
|
+
def test_match
|
27
|
+
assert_equal(["S000000029"], @norm.match("FUN21"))
|
28
|
+
assert_equal(["S000000030", "S000000029", "S000000031"], @norm.match("FUN"))
|
29
|
+
assert_equal(["S000000030", "S000000029", "S000000031"], @norm.match("FUN 2"))
|
30
|
+
assert_equal(["S000000030", "S000000029", "S000000031"], @norm.match("FUN 21"))
|
31
|
+
assert_equal([], @norm.match("GER4"))
|
32
|
+
|
33
|
+
@norm.match("FUN21")
|
34
|
+
end
|
35
|
+
|
36
|
+
def test_select
|
37
|
+
assert_equal(["S000000029"], @norm.select(["S000000030", "S000000029", "S000000031"],"FUN 21"))
|
38
|
+
end
|
39
|
+
|
40
|
+
def test_resolve
|
41
|
+
assert_equal(["S000000029"], @norm.resolve("FUN 21"))
|
42
|
+
end
|
43
|
+
|
44
|
+
def test_order
|
45
|
+
assert_equal(["S000000375"], @norm.resolve("GENE1"))
|
46
|
+
end
|
47
|
+
end
|
@@ -10,16 +10,16 @@ class TestTokenTrieNER < Test::Unit::TestCase
|
|
10
10
|
assert_equal 10, TokenTrieNER.tokenize('123456789 12345').last.offset
|
11
11
|
assert_equal 0, TokenTrieNER.tokenize('123456789 12345').first.offset
|
12
12
|
|
13
|
-
|
14
13
|
text = '123456789 12345'
|
15
14
|
assert_equal '12345', text[TokenTrieNER.tokenize('123456789 12345').last.range]
|
16
15
|
end
|
17
16
|
|
18
17
|
def test_merge
|
19
18
|
tokens = %w(a b c)
|
20
|
-
|
19
|
+
tokens.extend TokenTrieNER::EnumeratedArray
|
20
|
+
index = {'a' => {'b' => {'c' => {:END => [TokenTrieNER::Code.new('CODE')]}}}}
|
21
21
|
|
22
|
-
assert_equal 'CODE', TokenTrieNER.merge({}, TokenTrieNER.index_for_tokens(tokens, 'CODE'))['a']['b']['c'][:END].first.
|
22
|
+
assert_equal 'CODE', TokenTrieNER.merge({}, TokenTrieNER.index_for_tokens(tokens, 'CODE'))['a']['b']['c'][:END].first.code
|
23
23
|
end
|
24
24
|
|
25
25
|
def test_process
|
@@ -30,7 +30,7 @@ C2;11;22;3 3;bb
|
|
30
30
|
|
31
31
|
TmpFile.with_file(lexicon) do |file|
|
32
32
|
|
33
|
-
index = TokenTrieNER.process(TSV.new(file, :sep => ';'
|
33
|
+
index = TokenTrieNER.process({}, TSV.new(file, :flat, :sep => ';'))
|
34
34
|
|
35
35
|
assert_equal ['AA', 'aa', 'bb', '11', '22', '3'].sort, index.keys.sort
|
36
36
|
assert_equal [:END], index['aa'].keys
|
@@ -47,20 +47,20 @@ C2;11;22;3 3;bb
|
|
47
47
|
|
48
48
|
|
49
49
|
TmpFile.with_file(lexicon) do |file|
|
50
|
-
index = TokenTrieNER.process(TSV.new(file, :sep => ';', :
|
50
|
+
index = TokenTrieNER.process({}, TSV.new(file, :sep => ';', :type => :flat ))
|
51
51
|
|
52
|
-
assert TokenTrieNER.find(index, TokenTrieNER.tokenize('aa asdf'), false).first.collect{|c| c.
|
53
|
-
assert_equal %w(aa), TokenTrieNER.find(index, TokenTrieNER.tokenize('aa asdf'), false).last
|
52
|
+
assert TokenTrieNER.find(index, TokenTrieNER.tokenize('aa asdf').extend(TokenTrieNER::EnumeratedArray), false).first.collect{|c| c.code}.include? 'C1'
|
53
|
+
assert_equal %w(aa), TokenTrieNER.find(index, TokenTrieNER.tokenize('aa asdf').extend(TokenTrieNER::EnumeratedArray), false).last
|
54
54
|
|
55
|
-
assert TokenTrieNER.find(index, TokenTrieNER.tokenize('aa asdf'), true).first.collect{|c| c.
|
55
|
+
assert TokenTrieNER.find(index, TokenTrieNER.tokenize('aa asdf').extend(TokenTrieNER::EnumeratedArray), true).first.collect{|c| c.code}.include? 'C1'
|
56
56
|
|
57
|
-
assert TokenTrieNER.find(index, TokenTrieNER.tokenize('bb b asdf'), true).first.collect{|c| c.
|
58
|
-
assert_equal %w(bb b), TokenTrieNER.find(index, TokenTrieNER.tokenize('bb b asdf'), true).last
|
57
|
+
assert TokenTrieNER.find(index, TokenTrieNER.tokenize('bb b asdf').extend(TokenTrieNER::EnumeratedArray), true).first.collect{|c| c.code}.include? 'C1'
|
58
|
+
assert_equal %w(bb b), TokenTrieNER.find(index, TokenTrieNER.tokenize('bb b asdf').extend(TokenTrieNER::EnumeratedArray), true).last
|
59
59
|
|
60
|
-
assert TokenTrieNER.find(index, TokenTrieNER.tokenize('bb b asdf'), false).first.collect{|c| c.
|
61
|
-
assert_equal %w(bb), TokenTrieNER.find(index, TokenTrieNER.tokenize('bb b asdf'), false).last
|
60
|
+
assert TokenTrieNER.find(index, TokenTrieNER.tokenize('bb b asdf').extend(TokenTrieNER::EnumeratedArray), false).first.collect{|c| c.code}.include? 'C2'
|
61
|
+
assert_equal %w(bb), TokenTrieNER.find(index, TokenTrieNER.tokenize('bb b asdf').extend(TokenTrieNER::EnumeratedArray), false).last
|
62
62
|
|
63
|
-
assert TokenTrieNER.find(index, TokenTrieNER.tokenize('bb asdf'), false).first.collect{|c| c.
|
63
|
+
assert TokenTrieNER.find(index, TokenTrieNER.tokenize('bb asdf').extend(TokenTrieNER::EnumeratedArray), false).first.collect{|c| c.code}.include? 'C2'
|
64
64
|
end
|
65
65
|
end
|
66
66
|
|
@@ -71,42 +71,67 @@ C2;11;22;3 3;bb
|
|
71
71
|
EOF
|
72
72
|
|
73
73
|
TmpFile.with_file(lexicon) do |file|
|
74
|
-
index = TokenTrieNER.new(file,
|
74
|
+
index = TokenTrieNER.new("test", TSV.new(file, :flat, :sep => ';'))
|
75
75
|
|
76
|
+
index.match(' asdfa dsf asdf aa asdfasdf ')
|
76
77
|
assert index.match(' asdfa dsf asdf aa asdfasdf ').select{|m| m.code.include? 'C1'}.any?
|
77
78
|
end
|
78
79
|
end
|
79
80
|
|
80
|
-
def
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
81
|
+
def test_slack
|
82
|
+
lexicon =<<-EOF
|
83
|
+
C1;aa;AA;bb cc cc b
|
84
|
+
C2;11;22;3 3;bb
|
85
|
+
EOF
|
86
|
+
|
87
|
+
TmpFile.with_file(lexicon) do |file|
|
88
|
+
index = TokenTrieNER.new({})
|
89
|
+
index.slack = Proc.new{|t| t =~ /^c*$/}
|
90
|
+
|
91
|
+
index.merge TSV.new(file, :flat, :sep => ';')
|
92
|
+
|
93
|
+
assert index.match(' aaaaa 3 cc 3').select{|m| m.code.include? 'C2'}.any?
|
94
|
+
assert index.match(' bb cc b').select{|m| m.code.include? 'C1'}.any?
|
95
|
+
assert index.match(' bb b').select{|m| m.code.include? 'C1'}.any?
|
87
96
|
end
|
97
|
+
end
|
88
98
|
|
89
|
-
|
99
|
+
def test_own_tokens
|
100
|
+
lexicon =<<-EOF
|
101
|
+
C1;aa;AA;bb cc cc b
|
102
|
+
C2;11;22;3 3;bb
|
103
|
+
EOF
|
90
104
|
|
91
|
-
|
92
|
-
|
105
|
+
TmpFile.with_file(lexicon) do |file|
|
106
|
+
index = TokenTrieNER.new({})
|
107
|
+
index.slack = Proc.new{|t| t =~ /^c*$/}
|
93
108
|
|
94
|
-
|
95
|
-
assert index.match(sentence).collect{|m| m.code}.flatten.include? 'DID44386'
|
109
|
+
index.merge TSV.new(file, :flat, :sep => ';')
|
96
110
|
|
97
|
-
|
98
|
-
|
111
|
+
assert index.match(Token.tokenize('3 cc 3')).select{|m| m.code.include? 'C2'}.any?
|
112
|
+
end
|
113
|
+
end
|
99
114
|
|
100
|
-
|
101
|
-
|
115
|
+
def test_proc_index
|
116
|
+
index = TokenTrieNER.new({})
|
117
|
+
index.merge({ "aa" => {:PROCS => {Proc.new{|c| c == 'c'} => {:END => [TokenTrieNER::Code.new(:entity, :C1)]}}}})
|
102
118
|
|
103
|
-
index
|
104
|
-
assert index.match(sentence).collect{|m| m.code}.flatten.include? 'OR00063'
|
105
|
-
index.merge Rbbt.find_datafile('disease')
|
106
|
-
assert ! index.match(sentence).collect{|m| m.code}.flatten.include?('OR00063')
|
107
|
-
assert index.match(sentence).collect{|m| m.code}.flatten.include? 'DID44386'
|
119
|
+
assert index.match(Token.tokenize('3 cc 3 aa c ddd')).select{|m| m.code.include? :entity}.any?
|
108
120
|
end
|
109
121
|
|
122
|
+
def test_persistence
|
123
|
+
lexicon =<<-EOF
|
124
|
+
C1;aa;AA;bb b
|
125
|
+
C2;11;22;3 3;bb
|
126
|
+
EOF
|
127
|
+
|
128
|
+
TmpFile.with_file(lexicon) do |file|
|
129
|
+
index = TokenTrieNER.new("test", TSV.new(file, :flat, :sep => ';'), :persistence => true)
|
130
|
+
|
131
|
+
index.match(' asdfa dsf asdf aa asdfasdf ')
|
132
|
+
assert index.match(' asdfa dsf asdf aa asdfasdf ').select{|m| m.code.include? 'C1'}.any?
|
133
|
+
end
|
134
|
+
end
|
110
135
|
|
111
136
|
end
|
112
137
|
|
@@ -0,0 +1,88 @@
|
|
1
|
+
require File.join(File.expand_path(File.dirname(__FILE__)), '../..', 'test_helper.rb')
|
2
|
+
require 'rbbt/nlp/nlp'
|
3
|
+
|
4
|
+
text=<<-EOF
|
5
|
+
Atypical teratoid/rhabdoid tumors (AT/RTs) are highly aggressive brain tumors
|
6
|
+
of early childhood poorly responding to therapy. The majority of cases show
|
7
|
+
inactivation of SMARCB1 (INI1, hSNF5, BAF47), a core member of the adenosine
|
8
|
+
triphosphate (ATP)-dependent SWI/SNF chromatin-remodeling complex. We here
|
9
|
+
report the case of a supratentorial AT/RT in a 9-month-old boy, which showed
|
10
|
+
retained SMARCB1 staining on immunohistochemistry and lacked genetic
|
11
|
+
alterations of SMARCB1. Instead, the tumor showed loss of protein expression of
|
12
|
+
another SWI/SNF chromatin-remodeling complex member, the ATPase subunit SMARCA4
|
13
|
+
(BRG1) due to a homozygous SMARCA4 mutation [c.2032C>T (p.Q678X)]. Our
|
14
|
+
findings highlight the role of SMARCA4 in the pathogenesis of SMARCB1-positive
|
15
|
+
AT/RT and the usefulness of antibodies directed against SMARCA4 in this
|
16
|
+
diagnostic setting.
|
17
|
+
EOF
|
18
|
+
|
19
|
+
class TestClass < Test::Unit::TestCase
|
20
|
+
|
21
|
+
def test_sentences
|
22
|
+
text =<<-EOF
|
23
|
+
This is a
|
24
|
+
sentence. This is
|
25
|
+
another sentence.
|
26
|
+
EOF
|
27
|
+
|
28
|
+
assert_equal 2, NLP.geniass_sentence_splitter(text).length
|
29
|
+
assert_equal "This is a \nsentence. ", NLP.geniass_sentence_splitter(text).first
|
30
|
+
end
|
31
|
+
|
32
|
+
def test_gdep_parse_sentences
|
33
|
+
text =<<-EOF
|
34
|
+
Atypical teratoid/rhabdoid tumors (AT/RTs)
|
35
|
+
are highly aggressive brain
|
36
|
+
tumors of early childhood poorly
|
37
|
+
responding to therapy.
|
38
|
+
EOF
|
39
|
+
|
40
|
+
NLP.gdep_parse_sentences_extension([text, text]).zip([text,text]).each do |segment_list, sentence|
|
41
|
+
segment_list.each do |segment|
|
42
|
+
assert_equal sentence[segment.range], segment
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
def test_gdep_chunks
|
48
|
+
text =<<-EOF
|
49
|
+
Atypical teratoid/rhabdoid tumors (AT/RTs)
|
50
|
+
are highly aggressive brain
|
51
|
+
tumors of early childhood poorly
|
52
|
+
responding to therapy.
|
53
|
+
EOF
|
54
|
+
|
55
|
+
NLP.gdep_parse_sentences_extension([text, text]).zip([text,text]).each do |segment_list, sentence|
|
56
|
+
chunk_list = NLP.gdep_chunks(sentence, segment_list)
|
57
|
+
chunk_list.each do |segment|
|
58
|
+
assert_equal sentence[segment.range], segment
|
59
|
+
end
|
60
|
+
|
61
|
+
assert chunk_list.select{|c| c =~ /rhabdoid/}.first.parts.include? "tumors"
|
62
|
+
end
|
63
|
+
|
64
|
+
end
|
65
|
+
|
66
|
+
def test_merge_chunks
|
67
|
+
text =<<-EOF
|
68
|
+
Atypical teratoid/rhabdoid tumors (AT/RTs)
|
69
|
+
where found to be like highly aggressive brain
|
70
|
+
tumors of early childhood poorly
|
71
|
+
responding to therapy.
|
72
|
+
EOF
|
73
|
+
|
74
|
+
NLP.gdep_parse_sentences_extension([text, text]).zip([text,text]).each do |segment_list, sentence|
|
75
|
+
chunk_list = NLP.gdep_chunks(sentence, segment_list)
|
76
|
+
new_chunk_list = NLP.merge_vp_chunks(chunk_list)
|
77
|
+
|
78
|
+
new_chunk_list.each do |segment|
|
79
|
+
assert_equal sentence[segment.range], segment
|
80
|
+
end
|
81
|
+
|
82
|
+
assert new_chunk_list.select{|c| c.type == "VP"}.first.parts.include? "found"
|
83
|
+
assert new_chunk_list.select{|c| c.type == "VP"}.first.parts.include? "to"
|
84
|
+
assert new_chunk_list.select{|c| c.type == "VP"}.first.parts.include? "be"
|
85
|
+
end
|
86
|
+
end
|
87
|
+
end
|
88
|
+
|
data/test/test_helper.rb
CHANGED
@@ -2,8 +2,28 @@ require 'test/unit'
|
|
2
2
|
$LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
|
3
3
|
$LOAD_PATH.unshift(File.dirname(__FILE__))
|
4
4
|
|
5
|
+
require 'rbbt'
|
6
|
+
require 'rbbt/util/persistence'
|
7
|
+
require 'rbbt/util/tmpfile'
|
8
|
+
require 'rbbt/util/log'
|
9
|
+
require 'rbbt/corpus/document_repo'
|
10
|
+
|
5
11
|
class Test::Unit::TestCase
|
6
12
|
def test_datafile(file)
|
7
13
|
File.join(File.dirname(__FILE__), 'data', file)
|
8
14
|
end
|
15
|
+
|
16
|
+
def setup
|
17
|
+
FileUtils.mkdir_p Rbbt.tmp.test.persistence.find(:user)
|
18
|
+
Persistence.cachedir = Rbbt.tmp.test.persistence.find :user
|
19
|
+
end
|
20
|
+
|
21
|
+
def teardown
|
22
|
+
FileUtils.rm_rf Rbbt.tmp.test.find :user
|
23
|
+
TCHash::CONNECTIONS.values.each do |c| c.close end
|
24
|
+
TCHash::CONNECTIONS.clear
|
25
|
+
DocumentRepo::CONNECTIONS.values.each do |c| c.close end
|
26
|
+
DocumentRepo::CONNECTIONS.clear
|
27
|
+
end
|
28
|
+
|
9
29
|
end
|
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rbbt-text
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
4
|
+
hash: 11
|
5
5
|
prerelease:
|
6
6
|
segments:
|
7
7
|
- 0
|
8
|
-
-
|
9
|
-
-
|
10
|
-
version: 0.
|
8
|
+
- 5
|
9
|
+
- 0
|
10
|
+
version: 0.5.0
|
11
11
|
platform: ruby
|
12
12
|
authors:
|
13
13
|
- Miguel Vazquez
|
@@ -15,8 +15,8 @@ autorequire:
|
|
15
15
|
bindir: bin
|
16
16
|
cert_chain: []
|
17
17
|
|
18
|
-
date: 2011-
|
19
|
-
default_executable:
|
18
|
+
date: 2011-07-05 00:00:00 +02:00
|
19
|
+
default_executable: get_ppis.rb
|
20
20
|
dependencies:
|
21
21
|
- !ruby/object:Gem::Dependency
|
22
22
|
name: rbbt-util
|
@@ -46,10 +46,38 @@ dependencies:
|
|
46
46
|
version: "0"
|
47
47
|
type: :runtime
|
48
48
|
version_requirements: *id002
|
49
|
+
- !ruby/object:Gem::Dependency
|
50
|
+
name: libxml-ruby
|
51
|
+
prerelease: false
|
52
|
+
requirement: &id003 !ruby/object:Gem::Requirement
|
53
|
+
none: false
|
54
|
+
requirements:
|
55
|
+
- - ">="
|
56
|
+
- !ruby/object:Gem::Version
|
57
|
+
hash: 3
|
58
|
+
segments:
|
59
|
+
- 0
|
60
|
+
version: "0"
|
61
|
+
type: :runtime
|
62
|
+
version_requirements: *id003
|
63
|
+
- !ruby/object:Gem::Dependency
|
64
|
+
name: json
|
65
|
+
prerelease: false
|
66
|
+
requirement: &id004 !ruby/object:Gem::Requirement
|
67
|
+
none: false
|
68
|
+
requirements:
|
69
|
+
- - ">="
|
70
|
+
- !ruby/object:Gem::Version
|
71
|
+
hash: 3
|
72
|
+
segments:
|
73
|
+
- 0
|
74
|
+
version: "0"
|
75
|
+
type: :runtime
|
76
|
+
version_requirements: *id004
|
49
77
|
description: "Text mining tools: named entity recognition and normalization, document classification, bag-of-words, dictionaries, etc"
|
50
78
|
email: miguel.vazquez@fdi.ucm.es
|
51
|
-
executables:
|
52
|
-
|
79
|
+
executables:
|
80
|
+
- get_ppis.rb
|
53
81
|
extensions: []
|
54
82
|
|
55
83
|
extra_rdoc_files: []
|
@@ -58,28 +86,64 @@ files:
|
|
58
86
|
- lib/rbbt/bow/bow.rb
|
59
87
|
- lib/rbbt/bow/dictionary.rb
|
60
88
|
- lib/rbbt/bow/misc.rb
|
89
|
+
- lib/rbbt/corpus/corpus.rb
|
90
|
+
- lib/rbbt/corpus/document.rb
|
91
|
+
- lib/rbbt/corpus/document_repo.rb
|
92
|
+
- lib/rbbt/corpus/sources/pubmed.rb
|
61
93
|
- lib/rbbt/ner/NER.rb
|
62
94
|
- lib/rbbt/ner/abner.rb
|
63
95
|
- lib/rbbt/ner/annotations.rb
|
96
|
+
- lib/rbbt/ner/annotations/annotated.rb
|
97
|
+
- lib/rbbt/ner/annotations/named_entity.rb
|
98
|
+
- lib/rbbt/ner/annotations/relations.rb
|
99
|
+
- lib/rbbt/ner/annotations/token.rb
|
100
|
+
- lib/rbbt/ner/annotations/transformed.rb
|
64
101
|
- lib/rbbt/ner/banner.rb
|
102
|
+
- lib/rbbt/ner/chemical_tagger.rb
|
103
|
+
- lib/rbbt/ner/ngram_prefix_dictionary.rb
|
65
104
|
- lib/rbbt/ner/oscar3.rb
|
105
|
+
- lib/rbbt/ner/oscar4.rb
|
106
|
+
- lib/rbbt/ner/patterns.rb
|
66
107
|
- lib/rbbt/ner/regexpNER.rb
|
108
|
+
- lib/rbbt/ner/rnorm.rb
|
109
|
+
- lib/rbbt/ner/rnorm/cue_index.rb
|
110
|
+
- lib/rbbt/ner/rnorm/tokens.rb
|
67
111
|
- lib/rbbt/ner/token_trieNER.rb
|
112
|
+
- lib/rbbt/nlp/genia/sentence_splitter.rb
|
113
|
+
- lib/rbbt/nlp/nlp.rb
|
68
114
|
- share/install/software/ABNER
|
69
115
|
- share/install/software/BANNER
|
116
|
+
- share/install/software/ChemicalTagger
|
117
|
+
- share/install/software/Gdep
|
118
|
+
- share/install/software/Geniass
|
70
119
|
- share/install/software/OSCAR3
|
71
|
-
- share/
|
120
|
+
- share/install/software/OSCAR4
|
121
|
+
- share/install/software/StanfordParser
|
122
|
+
- share/patterns/drug_induce_disease
|
123
|
+
- share/rnorm/cue_default
|
124
|
+
- share/rnorm/tokens_default
|
125
|
+
- share/wordlists/stopwords
|
126
|
+
- test/test_helper.rb
|
72
127
|
- test/rbbt/bow/test_bow.rb
|
73
128
|
- test/rbbt/bow/test_dictionary.rb
|
74
129
|
- test/rbbt/bow/test_misc.rb
|
75
|
-
- test/rbbt/ner/
|
130
|
+
- test/rbbt/ner/test_regexpNER.rb
|
76
131
|
- test/rbbt/ner/test_abner.rb
|
77
|
-
- test/rbbt/ner/test_annotations.rb
|
78
132
|
- test/rbbt/ner/test_banner.rb
|
79
|
-
- test/rbbt/ner/
|
80
|
-
- test/rbbt/ner/
|
133
|
+
- test/rbbt/ner/annotations/test_transformed.rb
|
134
|
+
- test/rbbt/ner/annotations/test_named_entity.rb
|
81
135
|
- test/rbbt/ner/test_token_trieNER.rb
|
82
|
-
- test/
|
136
|
+
- test/rbbt/ner/test_annotations.rb
|
137
|
+
- test/rbbt/ner/test_patterns.rb
|
138
|
+
- test/rbbt/ner/test_NER.rb
|
139
|
+
- test/rbbt/ner/test_rnorm.rb
|
140
|
+
- test/rbbt/ner/test_oscar4.rb
|
141
|
+
- test/rbbt/ner/test_chemical_tagger.rb
|
142
|
+
- test/rbbt/ner/test_ngram_prefix_dictionary.rb
|
143
|
+
- test/rbbt/nlp/test_nlp.rb
|
144
|
+
- test/rbbt/corpus/test_corpus.rb
|
145
|
+
- test/rbbt/corpus/test_document.rb
|
146
|
+
- bin/get_ppis.rb
|
83
147
|
has_rdoc: true
|
84
148
|
homepage: http://github.com/mikisvaz/rbbt-util
|
85
149
|
licenses: []
|
@@ -110,19 +174,28 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
110
174
|
requirements: []
|
111
175
|
|
112
176
|
rubyforge_project:
|
113
|
-
rubygems_version: 1.
|
177
|
+
rubygems_version: 1.6.2
|
114
178
|
signing_key:
|
115
179
|
specification_version: 3
|
116
180
|
summary: Text mining tools for the Ruby Bioinformatics Toolkit (rbbt)
|
117
181
|
test_files:
|
182
|
+
- test/test_helper.rb
|
118
183
|
- test/rbbt/bow/test_bow.rb
|
119
184
|
- test/rbbt/bow/test_dictionary.rb
|
120
185
|
- test/rbbt/bow/test_misc.rb
|
121
|
-
- test/rbbt/ner/
|
186
|
+
- test/rbbt/ner/test_regexpNER.rb
|
122
187
|
- test/rbbt/ner/test_abner.rb
|
123
|
-
- test/rbbt/ner/test_annotations.rb
|
124
188
|
- test/rbbt/ner/test_banner.rb
|
125
|
-
- test/rbbt/ner/
|
126
|
-
- test/rbbt/ner/
|
189
|
+
- test/rbbt/ner/annotations/test_transformed.rb
|
190
|
+
- test/rbbt/ner/annotations/test_named_entity.rb
|
127
191
|
- test/rbbt/ner/test_token_trieNER.rb
|
128
|
-
- test/
|
192
|
+
- test/rbbt/ner/test_annotations.rb
|
193
|
+
- test/rbbt/ner/test_patterns.rb
|
194
|
+
- test/rbbt/ner/test_NER.rb
|
195
|
+
- test/rbbt/ner/test_rnorm.rb
|
196
|
+
- test/rbbt/ner/test_oscar4.rb
|
197
|
+
- test/rbbt/ner/test_chemical_tagger.rb
|
198
|
+
- test/rbbt/ner/test_ngram_prefix_dictionary.rb
|
199
|
+
- test/rbbt/nlp/test_nlp.rb
|
200
|
+
- test/rbbt/corpus/test_corpus.rb
|
201
|
+
- test/rbbt/corpus/test_document.rb
|