rbbt-text 0.2.1 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/bin/get_ppis.rb +52 -0
- data/lib/rbbt/bow/dictionary.rb +9 -9
- data/lib/rbbt/bow/misc.rb +86 -2
- data/lib/rbbt/corpus/corpus.rb +55 -0
- data/lib/rbbt/corpus/document.rb +289 -0
- data/lib/rbbt/corpus/document_repo.rb +115 -0
- data/lib/rbbt/corpus/sources/pubmed.rb +26 -0
- data/lib/rbbt/ner/NER.rb +7 -5
- data/lib/rbbt/ner/abner.rb +13 -2
- data/lib/rbbt/ner/annotations.rb +182 -51
- data/lib/rbbt/ner/annotations/annotated.rb +15 -0
- data/lib/rbbt/ner/annotations/named_entity.rb +37 -0
- data/lib/rbbt/ner/annotations/relations.rb +25 -0
- data/lib/rbbt/ner/annotations/token.rb +28 -0
- data/lib/rbbt/ner/annotations/transformed.rb +170 -0
- data/lib/rbbt/ner/banner.rb +8 -5
- data/lib/rbbt/ner/chemical_tagger.rb +34 -0
- data/lib/rbbt/ner/ngram_prefix_dictionary.rb +136 -0
- data/lib/rbbt/ner/oscar3.rb +1 -1
- data/lib/rbbt/ner/oscar4.rb +41 -0
- data/lib/rbbt/ner/patterns.rb +132 -0
- data/lib/rbbt/ner/rnorm.rb +141 -0
- data/lib/rbbt/ner/rnorm/cue_index.rb +80 -0
- data/lib/rbbt/ner/rnorm/tokens.rb +218 -0
- data/lib/rbbt/ner/token_trieNER.rb +185 -51
- data/lib/rbbt/nlp/genia/sentence_splitter.rb +214 -0
- data/lib/rbbt/nlp/nlp.rb +235 -0
- data/share/install/software/ABNER +0 -4
- data/share/install/software/ChemicalTagger +81 -0
- data/share/install/software/Gdep +115 -0
- data/share/install/software/Geniass +118 -0
- data/share/install/software/OSCAR4 +16 -0
- data/share/install/software/StanfordParser +15 -0
- data/share/patterns/drug_induce_disease +22 -0
- data/share/rnorm/cue_default +10 -0
- data/share/rnorm/tokens_default +86 -0
- data/share/{stopwords → wordlists/stopwords} +0 -0
- data/test/rbbt/bow/test_bow.rb +1 -1
- data/test/rbbt/bow/test_dictionary.rb +1 -1
- data/test/rbbt/bow/test_misc.rb +1 -1
- data/test/rbbt/corpus/test_corpus.rb +99 -0
- data/test/rbbt/corpus/test_document.rb +222 -0
- data/test/rbbt/ner/annotations/test_named_entity.rb +14 -0
- data/test/rbbt/ner/annotations/test_transformed.rb +175 -0
- data/test/rbbt/ner/test_abner.rb +1 -1
- data/test/rbbt/ner/test_annotations.rb +64 -2
- data/test/rbbt/ner/test_banner.rb +1 -1
- data/test/rbbt/ner/test_chemical_tagger.rb +56 -0
- data/test/rbbt/ner/test_ngram_prefix_dictionary.rb +20 -0
- data/test/rbbt/ner/{test_oscar3.rb → test_oscar4.rb} +12 -13
- data/test/rbbt/ner/test_patterns.rb +66 -0
- data/test/rbbt/ner/test_regexpNER.rb +1 -1
- data/test/rbbt/ner/test_rnorm.rb +47 -0
- data/test/rbbt/ner/test_token_trieNER.rb +60 -35
- data/test/rbbt/nlp/test_nlp.rb +88 -0
- data/test/test_helper.rb +20 -0
- metadata +93 -20
@@ -0,0 +1,47 @@
|
|
1
|
+
require File.join(File.expand_path(File.dirname(__FILE__)), '../../test_helper.rb')
|
2
|
+
require 'rbbt/ner/rnorm'
|
3
|
+
require 'rbbt/util/open'
|
4
|
+
require 'rbbt/util/tmpfile'
|
5
|
+
require 'test/unit'
|
6
|
+
|
7
|
+
class TestRNORM < Test::Unit::TestCase
|
8
|
+
|
9
|
+
def setup
|
10
|
+
tmp = TmpFile.tmp_file("test-rnorm-")
|
11
|
+
lexicon =<<-EOT
|
12
|
+
S000000029 YAL031C GIP4 FUN21
|
13
|
+
S000000030 YAL032C PRP45 FUN20
|
14
|
+
S000000031 YAL033W POP5 FUN53
|
15
|
+
S000000374 YBR170C NPL4 HRD4
|
16
|
+
S000000375 GENE1 BBB CCC
|
17
|
+
S000000376 AAA GENE1 DDD
|
18
|
+
EOT
|
19
|
+
|
20
|
+
Open.write(tmp, lexicon)
|
21
|
+
|
22
|
+
@norm = Normalizer.new(tmp)
|
23
|
+
FileUtils.rm tmp
|
24
|
+
end
|
25
|
+
|
26
|
+
def test_match
|
27
|
+
assert_equal(["S000000029"], @norm.match("FUN21"))
|
28
|
+
assert_equal(["S000000030", "S000000029", "S000000031"], @norm.match("FUN"))
|
29
|
+
assert_equal(["S000000030", "S000000029", "S000000031"], @norm.match("FUN 2"))
|
30
|
+
assert_equal(["S000000030", "S000000029", "S000000031"], @norm.match("FUN 21"))
|
31
|
+
assert_equal([], @norm.match("GER4"))
|
32
|
+
|
33
|
+
@norm.match("FUN21")
|
34
|
+
end
|
35
|
+
|
36
|
+
def test_select
|
37
|
+
assert_equal(["S000000029"], @norm.select(["S000000030", "S000000029", "S000000031"],"FUN 21"))
|
38
|
+
end
|
39
|
+
|
40
|
+
def test_resolve
|
41
|
+
assert_equal(["S000000029"], @norm.resolve("FUN 21"))
|
42
|
+
end
|
43
|
+
|
44
|
+
def test_order
|
45
|
+
assert_equal(["S000000375"], @norm.resolve("GENE1"))
|
46
|
+
end
|
47
|
+
end
|
@@ -10,16 +10,16 @@ class TestTokenTrieNER < Test::Unit::TestCase
|
|
10
10
|
assert_equal 10, TokenTrieNER.tokenize('123456789 12345').last.offset
|
11
11
|
assert_equal 0, TokenTrieNER.tokenize('123456789 12345').first.offset
|
12
12
|
|
13
|
-
|
14
13
|
text = '123456789 12345'
|
15
14
|
assert_equal '12345', text[TokenTrieNER.tokenize('123456789 12345').last.range]
|
16
15
|
end
|
17
16
|
|
18
17
|
def test_merge
|
19
18
|
tokens = %w(a b c)
|
20
|
-
|
19
|
+
tokens.extend TokenTrieNER::EnumeratedArray
|
20
|
+
index = {'a' => {'b' => {'c' => {:END => [TokenTrieNER::Code.new('CODE')]}}}}
|
21
21
|
|
22
|
-
assert_equal 'CODE', TokenTrieNER.merge({}, TokenTrieNER.index_for_tokens(tokens, 'CODE'))['a']['b']['c'][:END].first.
|
22
|
+
assert_equal 'CODE', TokenTrieNER.merge({}, TokenTrieNER.index_for_tokens(tokens, 'CODE'))['a']['b']['c'][:END].first.code
|
23
23
|
end
|
24
24
|
|
25
25
|
def test_process
|
@@ -30,7 +30,7 @@ C2;11;22;3 3;bb
|
|
30
30
|
|
31
31
|
TmpFile.with_file(lexicon) do |file|
|
32
32
|
|
33
|
-
index = TokenTrieNER.process(TSV.new(file, :sep => ';'
|
33
|
+
index = TokenTrieNER.process({}, TSV.new(file, :flat, :sep => ';'))
|
34
34
|
|
35
35
|
assert_equal ['AA', 'aa', 'bb', '11', '22', '3'].sort, index.keys.sort
|
36
36
|
assert_equal [:END], index['aa'].keys
|
@@ -47,20 +47,20 @@ C2;11;22;3 3;bb
|
|
47
47
|
|
48
48
|
|
49
49
|
TmpFile.with_file(lexicon) do |file|
|
50
|
-
index = TokenTrieNER.process(TSV.new(file, :sep => ';', :
|
50
|
+
index = TokenTrieNER.process({}, TSV.new(file, :sep => ';', :type => :flat ))
|
51
51
|
|
52
|
-
assert TokenTrieNER.find(index, TokenTrieNER.tokenize('aa asdf'), false).first.collect{|c| c.
|
53
|
-
assert_equal %w(aa), TokenTrieNER.find(index, TokenTrieNER.tokenize('aa asdf'), false).last
|
52
|
+
assert TokenTrieNER.find(index, TokenTrieNER.tokenize('aa asdf').extend(TokenTrieNER::EnumeratedArray), false).first.collect{|c| c.code}.include? 'C1'
|
53
|
+
assert_equal %w(aa), TokenTrieNER.find(index, TokenTrieNER.tokenize('aa asdf').extend(TokenTrieNER::EnumeratedArray), false).last
|
54
54
|
|
55
|
-
assert TokenTrieNER.find(index, TokenTrieNER.tokenize('aa asdf'), true).first.collect{|c| c.
|
55
|
+
assert TokenTrieNER.find(index, TokenTrieNER.tokenize('aa asdf').extend(TokenTrieNER::EnumeratedArray), true).first.collect{|c| c.code}.include? 'C1'
|
56
56
|
|
57
|
-
assert TokenTrieNER.find(index, TokenTrieNER.tokenize('bb b asdf'), true).first.collect{|c| c.
|
58
|
-
assert_equal %w(bb b), TokenTrieNER.find(index, TokenTrieNER.tokenize('bb b asdf'), true).last
|
57
|
+
assert TokenTrieNER.find(index, TokenTrieNER.tokenize('bb b asdf').extend(TokenTrieNER::EnumeratedArray), true).first.collect{|c| c.code}.include? 'C1'
|
58
|
+
assert_equal %w(bb b), TokenTrieNER.find(index, TokenTrieNER.tokenize('bb b asdf').extend(TokenTrieNER::EnumeratedArray), true).last
|
59
59
|
|
60
|
-
assert TokenTrieNER.find(index, TokenTrieNER.tokenize('bb b asdf'), false).first.collect{|c| c.
|
61
|
-
assert_equal %w(bb), TokenTrieNER.find(index, TokenTrieNER.tokenize('bb b asdf'), false).last
|
60
|
+
assert TokenTrieNER.find(index, TokenTrieNER.tokenize('bb b asdf').extend(TokenTrieNER::EnumeratedArray), false).first.collect{|c| c.code}.include? 'C2'
|
61
|
+
assert_equal %w(bb), TokenTrieNER.find(index, TokenTrieNER.tokenize('bb b asdf').extend(TokenTrieNER::EnumeratedArray), false).last
|
62
62
|
|
63
|
-
assert TokenTrieNER.find(index, TokenTrieNER.tokenize('bb asdf'), false).first.collect{|c| c.
|
63
|
+
assert TokenTrieNER.find(index, TokenTrieNER.tokenize('bb asdf').extend(TokenTrieNER::EnumeratedArray), false).first.collect{|c| c.code}.include? 'C2'
|
64
64
|
end
|
65
65
|
end
|
66
66
|
|
@@ -71,42 +71,67 @@ C2;11;22;3 3;bb
|
|
71
71
|
EOF
|
72
72
|
|
73
73
|
TmpFile.with_file(lexicon) do |file|
|
74
|
-
index = TokenTrieNER.new(file,
|
74
|
+
index = TokenTrieNER.new("test", TSV.new(file, :flat, :sep => ';'))
|
75
75
|
|
76
|
+
index.match(' asdfa dsf asdf aa asdfasdf ')
|
76
77
|
assert index.match(' asdfa dsf asdf aa asdfasdf ').select{|m| m.code.include? 'C1'}.any?
|
77
78
|
end
|
78
79
|
end
|
79
80
|
|
80
|
-
def
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
81
|
+
def test_slack
|
82
|
+
lexicon =<<-EOF
|
83
|
+
C1;aa;AA;bb cc cc b
|
84
|
+
C2;11;22;3 3;bb
|
85
|
+
EOF
|
86
|
+
|
87
|
+
TmpFile.with_file(lexicon) do |file|
|
88
|
+
index = TokenTrieNER.new({})
|
89
|
+
index.slack = Proc.new{|t| t =~ /^c*$/}
|
90
|
+
|
91
|
+
index.merge TSV.new(file, :flat, :sep => ';')
|
92
|
+
|
93
|
+
assert index.match(' aaaaa 3 cc 3').select{|m| m.code.include? 'C2'}.any?
|
94
|
+
assert index.match(' bb cc b').select{|m| m.code.include? 'C1'}.any?
|
95
|
+
assert index.match(' bb b').select{|m| m.code.include? 'C1'}.any?
|
87
96
|
end
|
97
|
+
end
|
88
98
|
|
89
|
-
|
99
|
+
def test_own_tokens
|
100
|
+
lexicon =<<-EOF
|
101
|
+
C1;aa;AA;bb cc cc b
|
102
|
+
C2;11;22;3 3;bb
|
103
|
+
EOF
|
90
104
|
|
91
|
-
|
92
|
-
|
105
|
+
TmpFile.with_file(lexicon) do |file|
|
106
|
+
index = TokenTrieNER.new({})
|
107
|
+
index.slack = Proc.new{|t| t =~ /^c*$/}
|
93
108
|
|
94
|
-
|
95
|
-
assert index.match(sentence).collect{|m| m.code}.flatten.include? 'DID44386'
|
109
|
+
index.merge TSV.new(file, :flat, :sep => ';')
|
96
110
|
|
97
|
-
|
98
|
-
|
111
|
+
assert index.match(Token.tokenize('3 cc 3')).select{|m| m.code.include? 'C2'}.any?
|
112
|
+
end
|
113
|
+
end
|
99
114
|
|
100
|
-
|
101
|
-
|
115
|
+
def test_proc_index
|
116
|
+
index = TokenTrieNER.new({})
|
117
|
+
index.merge({ "aa" => {:PROCS => {Proc.new{|c| c == 'c'} => {:END => [TokenTrieNER::Code.new(:entity, :C1)]}}}})
|
102
118
|
|
103
|
-
index
|
104
|
-
assert index.match(sentence).collect{|m| m.code}.flatten.include? 'OR00063'
|
105
|
-
index.merge Rbbt.find_datafile('disease')
|
106
|
-
assert ! index.match(sentence).collect{|m| m.code}.flatten.include?('OR00063')
|
107
|
-
assert index.match(sentence).collect{|m| m.code}.flatten.include? 'DID44386'
|
119
|
+
assert index.match(Token.tokenize('3 cc 3 aa c ddd')).select{|m| m.code.include? :entity}.any?
|
108
120
|
end
|
109
121
|
|
122
|
+
def test_persistence
|
123
|
+
lexicon =<<-EOF
|
124
|
+
C1;aa;AA;bb b
|
125
|
+
C2;11;22;3 3;bb
|
126
|
+
EOF
|
127
|
+
|
128
|
+
TmpFile.with_file(lexicon) do |file|
|
129
|
+
index = TokenTrieNER.new("test", TSV.new(file, :flat, :sep => ';'), :persistence => true)
|
130
|
+
|
131
|
+
index.match(' asdfa dsf asdf aa asdfasdf ')
|
132
|
+
assert index.match(' asdfa dsf asdf aa asdfasdf ').select{|m| m.code.include? 'C1'}.any?
|
133
|
+
end
|
134
|
+
end
|
110
135
|
|
111
136
|
end
|
112
137
|
|
@@ -0,0 +1,88 @@
|
|
1
|
+
require File.join(File.expand_path(File.dirname(__FILE__)), '../..', 'test_helper.rb')
|
2
|
+
require 'rbbt/nlp/nlp'
|
3
|
+
|
4
|
+
text=<<-EOF
|
5
|
+
Atypical teratoid/rhabdoid tumors (AT/RTs) are highly aggressive brain tumors
|
6
|
+
of early childhood poorly responding to therapy. The majority of cases show
|
7
|
+
inactivation of SMARCB1 (INI1, hSNF5, BAF47), a core member of the adenosine
|
8
|
+
triphosphate (ATP)-dependent SWI/SNF chromatin-remodeling complex. We here
|
9
|
+
report the case of a supratentorial AT/RT in a 9-month-old boy, which showed
|
10
|
+
retained SMARCB1 staining on immunohistochemistry and lacked genetic
|
11
|
+
alterations of SMARCB1. Instead, the tumor showed loss of protein expression of
|
12
|
+
another SWI/SNF chromatin-remodeling complex member, the ATPase subunit SMARCA4
|
13
|
+
(BRG1) due to a homozygous SMARCA4 mutation [c.2032C>T (p.Q678X)]. Our
|
14
|
+
findings highlight the role of SMARCA4 in the pathogenesis of SMARCB1-positive
|
15
|
+
AT/RT and the usefulness of antibodies directed against SMARCA4 in this
|
16
|
+
diagnostic setting.
|
17
|
+
EOF
|
18
|
+
|
19
|
+
class TestClass < Test::Unit::TestCase
|
20
|
+
|
21
|
+
def test_sentences
|
22
|
+
text =<<-EOF
|
23
|
+
This is a
|
24
|
+
sentence. This is
|
25
|
+
another sentence.
|
26
|
+
EOF
|
27
|
+
|
28
|
+
assert_equal 2, NLP.geniass_sentence_splitter(text).length
|
29
|
+
assert_equal "This is a \nsentence. ", NLP.geniass_sentence_splitter(text).first
|
30
|
+
end
|
31
|
+
|
32
|
+
def test_gdep_parse_sentences
|
33
|
+
text =<<-EOF
|
34
|
+
Atypical teratoid/rhabdoid tumors (AT/RTs)
|
35
|
+
are highly aggressive brain
|
36
|
+
tumors of early childhood poorly
|
37
|
+
responding to therapy.
|
38
|
+
EOF
|
39
|
+
|
40
|
+
NLP.gdep_parse_sentences_extension([text, text]).zip([text,text]).each do |segment_list, sentence|
|
41
|
+
segment_list.each do |segment|
|
42
|
+
assert_equal sentence[segment.range], segment
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
def test_gdep_chunks
|
48
|
+
text =<<-EOF
|
49
|
+
Atypical teratoid/rhabdoid tumors (AT/RTs)
|
50
|
+
are highly aggressive brain
|
51
|
+
tumors of early childhood poorly
|
52
|
+
responding to therapy.
|
53
|
+
EOF
|
54
|
+
|
55
|
+
NLP.gdep_parse_sentences_extension([text, text]).zip([text,text]).each do |segment_list, sentence|
|
56
|
+
chunk_list = NLP.gdep_chunks(sentence, segment_list)
|
57
|
+
chunk_list.each do |segment|
|
58
|
+
assert_equal sentence[segment.range], segment
|
59
|
+
end
|
60
|
+
|
61
|
+
assert chunk_list.select{|c| c =~ /rhabdoid/}.first.parts.include? "tumors"
|
62
|
+
end
|
63
|
+
|
64
|
+
end
|
65
|
+
|
66
|
+
def test_merge_chunks
|
67
|
+
text =<<-EOF
|
68
|
+
Atypical teratoid/rhabdoid tumors (AT/RTs)
|
69
|
+
where found to be like highly aggressive brain
|
70
|
+
tumors of early childhood poorly
|
71
|
+
responding to therapy.
|
72
|
+
EOF
|
73
|
+
|
74
|
+
NLP.gdep_parse_sentences_extension([text, text]).zip([text,text]).each do |segment_list, sentence|
|
75
|
+
chunk_list = NLP.gdep_chunks(sentence, segment_list)
|
76
|
+
new_chunk_list = NLP.merge_vp_chunks(chunk_list)
|
77
|
+
|
78
|
+
new_chunk_list.each do |segment|
|
79
|
+
assert_equal sentence[segment.range], segment
|
80
|
+
end
|
81
|
+
|
82
|
+
assert new_chunk_list.select{|c| c.type == "VP"}.first.parts.include? "found"
|
83
|
+
assert new_chunk_list.select{|c| c.type == "VP"}.first.parts.include? "to"
|
84
|
+
assert new_chunk_list.select{|c| c.type == "VP"}.first.parts.include? "be"
|
85
|
+
end
|
86
|
+
end
|
87
|
+
end
|
88
|
+
|
data/test/test_helper.rb
CHANGED
@@ -2,8 +2,28 @@ require 'test/unit'
|
|
2
2
|
$LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
|
3
3
|
$LOAD_PATH.unshift(File.dirname(__FILE__))
|
4
4
|
|
5
|
+
require 'rbbt'
|
6
|
+
require 'rbbt/util/persistence'
|
7
|
+
require 'rbbt/util/tmpfile'
|
8
|
+
require 'rbbt/util/log'
|
9
|
+
require 'rbbt/corpus/document_repo'
|
10
|
+
|
5
11
|
class Test::Unit::TestCase
|
6
12
|
def test_datafile(file)
|
7
13
|
File.join(File.dirname(__FILE__), 'data', file)
|
8
14
|
end
|
15
|
+
|
16
|
+
def setup
|
17
|
+
FileUtils.mkdir_p Rbbt.tmp.test.persistence.find(:user)
|
18
|
+
Persistence.cachedir = Rbbt.tmp.test.persistence.find :user
|
19
|
+
end
|
20
|
+
|
21
|
+
def teardown
|
22
|
+
FileUtils.rm_rf Rbbt.tmp.test.find :user
|
23
|
+
TCHash::CONNECTIONS.values.each do |c| c.close end
|
24
|
+
TCHash::CONNECTIONS.clear
|
25
|
+
DocumentRepo::CONNECTIONS.values.each do |c| c.close end
|
26
|
+
DocumentRepo::CONNECTIONS.clear
|
27
|
+
end
|
28
|
+
|
9
29
|
end
|
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rbbt-text
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
4
|
+
hash: 11
|
5
5
|
prerelease:
|
6
6
|
segments:
|
7
7
|
- 0
|
8
|
-
-
|
9
|
-
-
|
10
|
-
version: 0.
|
8
|
+
- 5
|
9
|
+
- 0
|
10
|
+
version: 0.5.0
|
11
11
|
platform: ruby
|
12
12
|
authors:
|
13
13
|
- Miguel Vazquez
|
@@ -15,8 +15,8 @@ autorequire:
|
|
15
15
|
bindir: bin
|
16
16
|
cert_chain: []
|
17
17
|
|
18
|
-
date: 2011-
|
19
|
-
default_executable:
|
18
|
+
date: 2011-07-05 00:00:00 +02:00
|
19
|
+
default_executable: get_ppis.rb
|
20
20
|
dependencies:
|
21
21
|
- !ruby/object:Gem::Dependency
|
22
22
|
name: rbbt-util
|
@@ -46,10 +46,38 @@ dependencies:
|
|
46
46
|
version: "0"
|
47
47
|
type: :runtime
|
48
48
|
version_requirements: *id002
|
49
|
+
- !ruby/object:Gem::Dependency
|
50
|
+
name: libxml-ruby
|
51
|
+
prerelease: false
|
52
|
+
requirement: &id003 !ruby/object:Gem::Requirement
|
53
|
+
none: false
|
54
|
+
requirements:
|
55
|
+
- - ">="
|
56
|
+
- !ruby/object:Gem::Version
|
57
|
+
hash: 3
|
58
|
+
segments:
|
59
|
+
- 0
|
60
|
+
version: "0"
|
61
|
+
type: :runtime
|
62
|
+
version_requirements: *id003
|
63
|
+
- !ruby/object:Gem::Dependency
|
64
|
+
name: json
|
65
|
+
prerelease: false
|
66
|
+
requirement: &id004 !ruby/object:Gem::Requirement
|
67
|
+
none: false
|
68
|
+
requirements:
|
69
|
+
- - ">="
|
70
|
+
- !ruby/object:Gem::Version
|
71
|
+
hash: 3
|
72
|
+
segments:
|
73
|
+
- 0
|
74
|
+
version: "0"
|
75
|
+
type: :runtime
|
76
|
+
version_requirements: *id004
|
49
77
|
description: "Text mining tools: named entity recognition and normalization, document classification, bag-of-words, dictionaries, etc"
|
50
78
|
email: miguel.vazquez@fdi.ucm.es
|
51
|
-
executables:
|
52
|
-
|
79
|
+
executables:
|
80
|
+
- get_ppis.rb
|
53
81
|
extensions: []
|
54
82
|
|
55
83
|
extra_rdoc_files: []
|
@@ -58,28 +86,64 @@ files:
|
|
58
86
|
- lib/rbbt/bow/bow.rb
|
59
87
|
- lib/rbbt/bow/dictionary.rb
|
60
88
|
- lib/rbbt/bow/misc.rb
|
89
|
+
- lib/rbbt/corpus/corpus.rb
|
90
|
+
- lib/rbbt/corpus/document.rb
|
91
|
+
- lib/rbbt/corpus/document_repo.rb
|
92
|
+
- lib/rbbt/corpus/sources/pubmed.rb
|
61
93
|
- lib/rbbt/ner/NER.rb
|
62
94
|
- lib/rbbt/ner/abner.rb
|
63
95
|
- lib/rbbt/ner/annotations.rb
|
96
|
+
- lib/rbbt/ner/annotations/annotated.rb
|
97
|
+
- lib/rbbt/ner/annotations/named_entity.rb
|
98
|
+
- lib/rbbt/ner/annotations/relations.rb
|
99
|
+
- lib/rbbt/ner/annotations/token.rb
|
100
|
+
- lib/rbbt/ner/annotations/transformed.rb
|
64
101
|
- lib/rbbt/ner/banner.rb
|
102
|
+
- lib/rbbt/ner/chemical_tagger.rb
|
103
|
+
- lib/rbbt/ner/ngram_prefix_dictionary.rb
|
65
104
|
- lib/rbbt/ner/oscar3.rb
|
105
|
+
- lib/rbbt/ner/oscar4.rb
|
106
|
+
- lib/rbbt/ner/patterns.rb
|
66
107
|
- lib/rbbt/ner/regexpNER.rb
|
108
|
+
- lib/rbbt/ner/rnorm.rb
|
109
|
+
- lib/rbbt/ner/rnorm/cue_index.rb
|
110
|
+
- lib/rbbt/ner/rnorm/tokens.rb
|
67
111
|
- lib/rbbt/ner/token_trieNER.rb
|
112
|
+
- lib/rbbt/nlp/genia/sentence_splitter.rb
|
113
|
+
- lib/rbbt/nlp/nlp.rb
|
68
114
|
- share/install/software/ABNER
|
69
115
|
- share/install/software/BANNER
|
116
|
+
- share/install/software/ChemicalTagger
|
117
|
+
- share/install/software/Gdep
|
118
|
+
- share/install/software/Geniass
|
70
119
|
- share/install/software/OSCAR3
|
71
|
-
- share/
|
120
|
+
- share/install/software/OSCAR4
|
121
|
+
- share/install/software/StanfordParser
|
122
|
+
- share/patterns/drug_induce_disease
|
123
|
+
- share/rnorm/cue_default
|
124
|
+
- share/rnorm/tokens_default
|
125
|
+
- share/wordlists/stopwords
|
126
|
+
- test/test_helper.rb
|
72
127
|
- test/rbbt/bow/test_bow.rb
|
73
128
|
- test/rbbt/bow/test_dictionary.rb
|
74
129
|
- test/rbbt/bow/test_misc.rb
|
75
|
-
- test/rbbt/ner/
|
130
|
+
- test/rbbt/ner/test_regexpNER.rb
|
76
131
|
- test/rbbt/ner/test_abner.rb
|
77
|
-
- test/rbbt/ner/test_annotations.rb
|
78
132
|
- test/rbbt/ner/test_banner.rb
|
79
|
-
- test/rbbt/ner/
|
80
|
-
- test/rbbt/ner/
|
133
|
+
- test/rbbt/ner/annotations/test_transformed.rb
|
134
|
+
- test/rbbt/ner/annotations/test_named_entity.rb
|
81
135
|
- test/rbbt/ner/test_token_trieNER.rb
|
82
|
-
- test/
|
136
|
+
- test/rbbt/ner/test_annotations.rb
|
137
|
+
- test/rbbt/ner/test_patterns.rb
|
138
|
+
- test/rbbt/ner/test_NER.rb
|
139
|
+
- test/rbbt/ner/test_rnorm.rb
|
140
|
+
- test/rbbt/ner/test_oscar4.rb
|
141
|
+
- test/rbbt/ner/test_chemical_tagger.rb
|
142
|
+
- test/rbbt/ner/test_ngram_prefix_dictionary.rb
|
143
|
+
- test/rbbt/nlp/test_nlp.rb
|
144
|
+
- test/rbbt/corpus/test_corpus.rb
|
145
|
+
- test/rbbt/corpus/test_document.rb
|
146
|
+
- bin/get_ppis.rb
|
83
147
|
has_rdoc: true
|
84
148
|
homepage: http://github.com/mikisvaz/rbbt-util
|
85
149
|
licenses: []
|
@@ -110,19 +174,28 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
110
174
|
requirements: []
|
111
175
|
|
112
176
|
rubyforge_project:
|
113
|
-
rubygems_version: 1.
|
177
|
+
rubygems_version: 1.6.2
|
114
178
|
signing_key:
|
115
179
|
specification_version: 3
|
116
180
|
summary: Text mining tools for the Ruby Bioinformatics Toolkit (rbbt)
|
117
181
|
test_files:
|
182
|
+
- test/test_helper.rb
|
118
183
|
- test/rbbt/bow/test_bow.rb
|
119
184
|
- test/rbbt/bow/test_dictionary.rb
|
120
185
|
- test/rbbt/bow/test_misc.rb
|
121
|
-
- test/rbbt/ner/
|
186
|
+
- test/rbbt/ner/test_regexpNER.rb
|
122
187
|
- test/rbbt/ner/test_abner.rb
|
123
|
-
- test/rbbt/ner/test_annotations.rb
|
124
188
|
- test/rbbt/ner/test_banner.rb
|
125
|
-
- test/rbbt/ner/
|
126
|
-
- test/rbbt/ner/
|
189
|
+
- test/rbbt/ner/annotations/test_transformed.rb
|
190
|
+
- test/rbbt/ner/annotations/test_named_entity.rb
|
127
191
|
- test/rbbt/ner/test_token_trieNER.rb
|
128
|
-
- test/
|
192
|
+
- test/rbbt/ner/test_annotations.rb
|
193
|
+
- test/rbbt/ner/test_patterns.rb
|
194
|
+
- test/rbbt/ner/test_NER.rb
|
195
|
+
- test/rbbt/ner/test_rnorm.rb
|
196
|
+
- test/rbbt/ner/test_oscar4.rb
|
197
|
+
- test/rbbt/ner/test_chemical_tagger.rb
|
198
|
+
- test/rbbt/ner/test_ngram_prefix_dictionary.rb
|
199
|
+
- test/rbbt/nlp/test_nlp.rb
|
200
|
+
- test/rbbt/corpus/test_corpus.rb
|
201
|
+
- test/rbbt/corpus/test_document.rb
|