rbbt-text 0.6.3 → 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/rbbt/corpus/document.rb +1 -0
- data/lib/rbbt/entity/document.rb +62 -18
- data/lib/rbbt/ner/abner.rb +6 -3
- data/lib/rbbt/ner/banner.rb +10 -7
- data/lib/rbbt/ner/chemical_tagger.rb +5 -3
- data/lib/rbbt/ner/finder.rb +60 -0
- data/lib/rbbt/ner/linnaeus.rb +38 -0
- data/lib/rbbt/ner/ngram_prefix_dictionary.rb +42 -48
- data/lib/rbbt/ner/oscar3.rb +9 -6
- data/lib/rbbt/ner/oscar4.rb +21 -7
- data/lib/rbbt/ner/rnorm.rb +57 -33
- data/lib/rbbt/ner/rnorm/cue_index.rb +4 -3
- data/lib/rbbt/ner/rnorm/tokens.rb +10 -4
- data/lib/rbbt/ner/segment.rb +19 -8
- data/lib/rbbt/ner/segment/docid.rb +46 -0
- data/lib/rbbt/ner/segment/named_entity.rb +1 -1
- data/lib/rbbt/ner/segment/transformed.rb +5 -3
- data/lib/rbbt/nlp/genia/sentence_splitter.rb +22 -1
- data/lib/rbbt/nlp/open_nlp/sentence_splitter.rb +74 -0
- data/share/install/software/Linnaeus +21 -0
- data/share/install/software/OpenNLP +12 -0
- data/share/rnorm/tokens_default +1 -2
- data/test/rbbt/entity/test_document.rb +66 -0
- data/test/rbbt/ner/segment/test_transformed.rb +10 -0
- data/test/rbbt/ner/test_finder.rb +34 -0
- data/test/rbbt/ner/test_linnaeus.rb +16 -0
- data/test/rbbt/ner/test_ngram_prefix_dictionary.rb +22 -0
- data/test/rbbt/ner/test_oscar4.rb +3 -3
- data/test/rbbt/ner/test_rnorm.rb +3 -3
- data/test/rbbt/nlp/open_nlp/test_sentence_splitter.rb +45 -0
- data/test/test_helper.rb +1 -1
- metadata +101 -99
- data/test/rbbt/corpus/test_corpus.rb +0 -99
- data/test/rbbt/corpus/test_document.rb +0 -236
@@ -101,7 +101,7 @@ module NLP
|
|
101
101
|
a, b, d, c = $1, $2, $3, $4
|
102
102
|
events << eventCount.to_s << "\t"
|
103
103
|
events << returnFeatures(a, b, c)
|
104
|
-
(" "
|
104
|
+
(" " << a << b << "__" << eventCount.to_s << "____" << d << "__" << c << " ")
|
105
105
|
}
|
106
106
|
eventCount += 1
|
107
107
|
end
|
@@ -111,6 +111,27 @@ module NLP
|
|
111
111
|
[events, marks]
|
112
112
|
end
|
113
113
|
|
114
|
+
def self.event_extraction(text)
|
115
|
+
events = ""
|
116
|
+
marks = ""
|
117
|
+
|
118
|
+
eventCount = 0
|
119
|
+
|
120
|
+
pat = / ([^ ]+)([.!\?\)\]\"])( +)([^ ]+) /
|
121
|
+
for line in text.split(/\n/) do
|
122
|
+
while line.match(pat) do
|
123
|
+
a, b, d, c = $1, $2, $3, $4
|
124
|
+
events << eventCount.to_s << "\t"
|
125
|
+
events << returnFeatures(a, b, c)
|
126
|
+
line = $` + (" " << a << b << "__" << eventCount.to_s << "____" << d << "__" << c << " ") << $'
|
127
|
+
eventCount += 1
|
128
|
+
end
|
129
|
+
marks << line
|
130
|
+
end
|
131
|
+
|
132
|
+
[events, marks]
|
133
|
+
end
|
134
|
+
|
114
135
|
def self.process_labels(marked_text, labels)
|
115
136
|
out = ""
|
116
137
|
|
@@ -0,0 +1,74 @@
|
|
1
|
+
require 'rbbt'
|
2
|
+
require 'rjb'
|
3
|
+
require 'rbbt/ner/segment'
|
4
|
+
require 'rbbt/resource'
|
5
|
+
|
6
|
+
module OpenNLP
|
7
|
+
Rbbt.claim Rbbt.software.opt.OpenNLP, :install, Rbbt.share.install.software.OpenNLP.find
|
8
|
+
|
9
|
+
Rbbt.claim Rbbt.software.opt.OpenNLP.models["da-sent.bin"], :url, "http://opennlp.sourceforge.net/models-1.5/de-sent.bin"
|
10
|
+
|
11
|
+
MAX = 5
|
12
|
+
|
13
|
+
@@FileInputStream = Rjb::import('java.io.FileInputStream')
|
14
|
+
@@SentenceModel = Rjb::import('opennlp.tools.sentdetect.SentenceModel')
|
15
|
+
@@SentenceDetectorME = Rjb::import('opennlp.tools.sentdetect.SentenceDetectorME')
|
16
|
+
|
17
|
+
def self.sentence_split_detector
|
18
|
+
@@sentence_split_detector ||= begin
|
19
|
+
modelIn = @@FileInputStream.new(Rbbt.software.opt.OpenNLP.models["da-sent.bin"].produce.find);
|
20
|
+
|
21
|
+
model = @@SentenceModel.new(modelIn);
|
22
|
+
modelIn.close()
|
23
|
+
model
|
24
|
+
|
25
|
+
@@SentenceDetectorME.new(model)
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
def self.sentence_splitter(text)
|
30
|
+
return [] if text.nil? or text.empty?
|
31
|
+
|
32
|
+
last = 0
|
33
|
+
begin
|
34
|
+
sentence_split_detector = self.sentence_split_detector
|
35
|
+
|
36
|
+
sentences = nil
|
37
|
+
TmpFile.with_file do |tmpfile|
|
38
|
+
start_time = Time.now
|
39
|
+
|
40
|
+
begin
|
41
|
+
pid = Process.fork do
|
42
|
+
sent = sentence_split_detector.sentDetect(text)
|
43
|
+
Open.write(tmpfile, sent * "#OpenNLP:SENTENCE#")
|
44
|
+
end
|
45
|
+
|
46
|
+
while not Process.waitpid(pid)
|
47
|
+
if Time.now - start_time > MAX
|
48
|
+
Process.kill(9, pid)
|
49
|
+
raise "Taking to long (> #{MAX} seconds)"
|
50
|
+
end
|
51
|
+
sleep 0.1
|
52
|
+
end
|
53
|
+
|
54
|
+
begin
|
55
|
+
Process.waitpid(pid)
|
56
|
+
end
|
57
|
+
rescue Errno::ECHILD
|
58
|
+
end
|
59
|
+
|
60
|
+
sentences = Open.read(tmpfile).split("#OpenNLP:SENTENCE#")
|
61
|
+
end
|
62
|
+
|
63
|
+
sentences.collect{|sentence|
|
64
|
+
start = text.index(sentence, last)
|
65
|
+
Segment.setup sentence, start
|
66
|
+
last = start + sentence.length - 1
|
67
|
+
sentence
|
68
|
+
}
|
69
|
+
rescue Exception
|
70
|
+
raise $!
|
71
|
+
raise "Sentence splitter raised exception: #{$!.message}"
|
72
|
+
end
|
73
|
+
end
|
74
|
+
end
|
@@ -0,0 +1,21 @@
|
|
1
|
+
#!/bin/bash
|
2
|
+
|
3
|
+
name='Linnaeus'
|
4
|
+
url="http://sourceforge.net/projects/linnaeus/files/Linnaeus/linnaeus-2.0.tar.gz/download"
|
5
|
+
species_url="http://sourceforge.net/projects/linnaeus/files/Entity_packs/species-proxy-1.2.tar.gz/download"
|
6
|
+
|
7
|
+
install_src "$name" "$url"
|
8
|
+
ln -s "$OPT_DIR/$name/bin/"*.jar "$OPT_JAR_DIR/$name.jar"
|
9
|
+
|
10
|
+
echo "GET SPECIES" > /tmp/foo
|
11
|
+
get_src "Linnaeus-species-proxy" "$species_url"
|
12
|
+
pkg_dir="`opt_dir \"$name\"`"
|
13
|
+
build_dir=`build_dir`
|
14
|
+
echo $pkg_dir >> /tmp/foo
|
15
|
+
echo $build_dir >> /tmp/foo
|
16
|
+
mv "$build_dir" "$pkg_dir"
|
17
|
+
tmp_file="/tmp/species-proxy-properties.tmp"
|
18
|
+
cat "$pkg_dir/species-proxy/properties.conf" |grep -v "^.dir =" >> $tmp_file
|
19
|
+
echo "\$dir = $pkg_dir/species-proxy/" > "$pkg_dir/species-proxy/properties.conf"
|
20
|
+
cat $tmp_file | grep -v "^#" >> "$pkg_dir/species-proxy/properties.conf"
|
21
|
+
|
data/share/rnorm/tokens_default
CHANGED
@@ -1,6 +1,5 @@
|
|
1
1
|
require 'rbbt/util/misc'
|
2
2
|
|
3
|
-
|
4
3
|
plural = Proc.new do |t| t.sub(/s$/,'') end
|
5
4
|
|
6
5
|
tokens do
|
@@ -14,7 +13,7 @@ tokens do
|
|
14
13
|
|
15
14
|
# Some words for removal
|
16
15
|
stopword do |w| $stopwords.include?( w.downcase_first) end
|
17
|
-
gene /genes?/i
|
16
|
+
gene /genes?/i
|
18
17
|
dna
|
19
18
|
cdna
|
20
19
|
rna
|
@@ -0,0 +1,66 @@
|
|
1
|
+
require File.join(File.expand_path(File.dirname(__FILE__)), '../../test_helper.rb')
|
2
|
+
require 'rbbt/entity'
|
3
|
+
require 'rbbt/entity/pmid'
|
4
|
+
require 'rbbt/entity/document'
|
5
|
+
require 'test/unit'
|
6
|
+
|
7
|
+
require 'rbbt/workflow'
|
8
|
+
|
9
|
+
Workflow.require_workflow "TextMining"
|
10
|
+
|
11
|
+
module Document
|
12
|
+
self.corpus = Persist.open_tokyocabinet("/tmp/corpus", false, :string, "BDB")
|
13
|
+
|
14
|
+
property :banner => :single do |*args|
|
15
|
+
normalize, organism = args
|
16
|
+
TextMining.job(:gene_mention_recognition, "Factoid", :text => text, :method => :banner, :normalize => normalize, :organism => organism).exec.each{|e| SegmentWithDocid.setup(e, self.docid)}
|
17
|
+
end
|
18
|
+
|
19
|
+
property :abner => :single do |*args|
|
20
|
+
normalize, organism = args
|
21
|
+
TextMining.job(:gene_mention_recognition, "Factoid", :text => text, :method => :banner, :normalize => normalize, :organism => organism).exec.each{|e| SegmentWithDocid.setup(e, self.docid)}
|
22
|
+
end
|
23
|
+
|
24
|
+
persist :abner, :annotations, :dir => Rbbt.tmp.test.find(:user).entity_property
|
25
|
+
end
|
26
|
+
|
27
|
+
class TestDocument < Test::Unit::TestCase
|
28
|
+
def test_pmid
|
29
|
+
pmid = "21904853"
|
30
|
+
PMID.setup(pmid)
|
31
|
+
|
32
|
+
assert_match /^PMID/, pmid.id
|
33
|
+
assert_match /TET2/, pmid.text
|
34
|
+
end
|
35
|
+
|
36
|
+
def test_abner
|
37
|
+
pmid = "21904853"
|
38
|
+
PMID.setup(pmid)
|
39
|
+
|
40
|
+
genes = pmid.abner.reject{|ne| ne.offset.nil?}
|
41
|
+
genes.each do |ne|
|
42
|
+
orig = ne
|
43
|
+
orig_range = ne.range
|
44
|
+
ne.mask
|
45
|
+
assert ne.masked?
|
46
|
+
assert ne =~ /^MASKED/
|
47
|
+
assert_equal orig_range, ne.range
|
48
|
+
assert_equal ne, ne.unmask
|
49
|
+
end
|
50
|
+
assert pmid.abner.include? "TET2"
|
51
|
+
end
|
52
|
+
|
53
|
+
def test_free_text
|
54
|
+
text = "Free text including a mention to TET2."
|
55
|
+
Document.setup(text)
|
56
|
+
|
57
|
+
assert text.abner.include? "TET2"
|
58
|
+
|
59
|
+
docid = text.docid
|
60
|
+
assert_match /TET2/, Document.setup(docid).text
|
61
|
+
|
62
|
+
assert Document.setup(docid).abner.include? "TET2"
|
63
|
+
end
|
64
|
+
end
|
65
|
+
|
66
|
+
|
@@ -74,6 +74,16 @@ class TestClass < Test::Unit::TestCase
|
|
74
74
|
assert_equal original.gsub(/TP53/, 'GN'), a
|
75
75
|
end
|
76
76
|
|
77
|
+
Transformed.with_transform(a, [gene1], "GN") do
|
78
|
+
Transformed.with_transform(a, [gene2], "LONG_GENE_PLACEHOLDER") do
|
79
|
+
assert_equal original.gsub(/TP53/, 'GN').sub('CDK5R1', "LONG_GENE_PLACEHOLDER"), a
|
80
|
+
end
|
81
|
+
assert_equal original.gsub(/TP53/, 'GN'), a
|
82
|
+
end
|
83
|
+
|
84
|
+
assert_equal original, a
|
85
|
+
|
86
|
+
|
77
87
|
assert_equal original, a
|
78
88
|
|
79
89
|
exp1, exp2 = nil, nil
|
@@ -0,0 +1,34 @@
|
|
1
|
+
require File.join(File.expand_path(File.dirname(__FILE__)), '../../test_helper.rb')
|
2
|
+
require 'rbbt/entity'
|
3
|
+
require 'rbbt/ner/finder'
|
4
|
+
require 'rbbt/ner/finder'
|
5
|
+
require 'rbbt/sources/organism'
|
6
|
+
require 'rbbt/sources/kegg'
|
7
|
+
require 'rbbt/sources/NCI'
|
8
|
+
|
9
|
+
class TestFinder < Test::Unit::TestCase
|
10
|
+
|
11
|
+
def _test_namespace_and_format
|
12
|
+
#f = Finder.new(CMD.cmd("head -n 1000", :in => Open.open(Organism.identifiers("Hsa/jun2011").find)))
|
13
|
+
f = Finder.new(KEGG.pathways, :grep => "^hsa")
|
14
|
+
assert_equal "Hsa/jun2011", f.instances.first.namespace
|
15
|
+
assert_equal "Ensembl Gene ID", f.instances.first.format
|
16
|
+
end
|
17
|
+
|
18
|
+
def _test_find
|
19
|
+
f = Finder.new(Organism.lexicon("Hsa/jun2011"), :grep => ["SF3B1"])
|
20
|
+
|
21
|
+
assert_equal "ENSG00000115524", f.find("SF3B1").first
|
22
|
+
if defined? Entity
|
23
|
+
ddd f.find("SF3B1").first.info
|
24
|
+
assert_equal "Ensembl Gene ID", f.find("SF3B1").first.format
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
def test_find
|
29
|
+
f = Finder.new(Organism.lexicon("Hsa/jun2011"), :grep => ["RASGRF2"])
|
30
|
+
|
31
|
+
ddd f.find("RAS").collect{|m| m.info}
|
32
|
+
end
|
33
|
+
|
34
|
+
end
|
@@ -0,0 +1,16 @@
|
|
1
|
+
require File.join(File.expand_path(File.dirname(__FILE__)), '../../test_helper.rb')
|
2
|
+
require 'rbbt/ner/linnaeus'
|
3
|
+
require 'test/unit'
|
4
|
+
|
5
|
+
class TestLinnaeus < Test::Unit::TestCase
|
6
|
+
|
7
|
+
def test_match
|
8
|
+
begin
|
9
|
+
mentions = Linnaeus.match("Human HeLa cells and murine models")
|
10
|
+
["Human", "HeLa cells", "murine"].each{|mention|
|
11
|
+
assert(mentions.include? mention)
|
12
|
+
}
|
13
|
+
rescue
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
@@ -3,6 +3,7 @@ require 'rbbt/ner/ngram_prefix_dictionary'
|
|
3
3
|
require 'rbbt/util/tmpfile'
|
4
4
|
|
5
5
|
class TestNGramPrefixDictionary < Test::Unit::TestCase
|
6
|
+
|
6
7
|
def test_match
|
7
8
|
lexicon =<<-EOF
|
8
9
|
C1;aa;AA;bb b
|
@@ -17,6 +18,27 @@ C2;11;22;3 3;bb
|
|
17
18
|
end
|
18
19
|
end
|
19
20
|
|
21
|
+
def test_case_insensitive_match
|
22
|
+
lexicon =<<-EOF
|
23
|
+
C1;aa
|
24
|
+
C2;bb
|
25
|
+
EOF
|
26
|
+
|
27
|
+
TmpFile.with_file(lexicon) do |file|
|
28
|
+
index = NGramPrefixDictionary.new(TSV.open(file, :flat, :sep => ';'), "test", true)
|
29
|
+
|
30
|
+
matches = index.match('AA oo')
|
31
|
+
assert matches.select{|m| m.code.include? 'C1'}.any?
|
32
|
+
assert matches.include? 'AA'
|
33
|
+
|
34
|
+
matches = index.match('AA')
|
35
|
+
assert matches.select{|m| m.code.include? 'C1'}.any?
|
36
|
+
assert matches.include? 'AA'
|
37
|
+
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
|
20
42
|
def test_stream
|
21
43
|
lexicon =<<-EOF
|
22
44
|
C1;aa;AA;bb b
|
@@ -8,9 +8,9 @@ class TestOSCAR4 < Test::Unit::TestCase
|
|
8
8
|
def test_match
|
9
9
|
begin
|
10
10
|
ner = OSCAR4.new
|
11
|
-
str = "Alternatively, rearrangement of O-(w-haloalkyl)esters 34 of 2-carboethoxy-N-hydroxypyridine-2-selone affords azonianaphthalenium halides 37 in 79% yield"
|
11
|
+
str = "Alternatively, CO2 rearrangement of O-(w-haloalkyl)esters 34 of 2-carboethoxy-N-hydroxypyridine-2-selone affords azonianaphthalenium halides 37 in 79% yield"
|
12
12
|
|
13
|
-
mentions = ner.match(str, "CM"
|
13
|
+
mentions = ner.match(str, "CM")
|
14
14
|
good_mentions = ["2-carboethoxy-N-hydroxypyridine-2-selone", "O-(w-haloalkyl)esters"]
|
15
15
|
|
16
16
|
good_mentions.each{|mention|
|
@@ -22,7 +22,7 @@ class TestOSCAR4 < Test::Unit::TestCase
|
|
22
22
|
end
|
23
23
|
end
|
24
24
|
|
25
|
-
def
|
25
|
+
def _test_ranges
|
26
26
|
begin
|
27
27
|
ner = OSCAR4.new
|
28
28
|
str =<<-EOF
|
data/test/rbbt/ner/test_rnorm.rb
CHANGED
@@ -25,9 +25,9 @@ S000000376 AAA GENE1 DDD
|
|
25
25
|
|
26
26
|
def test_match
|
27
27
|
assert_equal(["S000000029"], @norm.match("FUN21"))
|
28
|
-
assert_equal(["S000000030", "S000000029", "S000000031"], @norm.match("FUN"))
|
29
|
-
assert_equal(["S000000030", "S000000029", "S000000031"], @norm.match("FUN 2"))
|
30
|
-
assert_equal(["S000000030", "S000000029", "S000000031"], @norm.match("FUN 21"))
|
28
|
+
assert_equal(["S000000030", "S000000029", "S000000031"].sort, @norm.match("FUN").sort)
|
29
|
+
assert_equal(["S000000030", "S000000029", "S000000031"].sort, @norm.match("FUN 2").sort)
|
30
|
+
assert_equal(["S000000030", "S000000029", "S000000031"].sort, @norm.match("FUN 21").sort)
|
31
31
|
assert_equal([], @norm.match("GER4"))
|
32
32
|
|
33
33
|
@norm.match("FUN21")
|
@@ -0,0 +1,45 @@
|
|
1
|
+
require File.join(File.expand_path(File.dirname(__FILE__)), '../../..', 'test_helper.rb')
|
2
|
+
require 'rbbt/nlp/open_nlp/sentence_splitter'
|
3
|
+
require 'rbbt/ner/segment'
|
4
|
+
|
5
|
+
$text=<<-EOF
|
6
|
+
Atypical teratoid/rhabdoid tumors (AT/RTs) are highly aggressive brain tumors
|
7
|
+
of early childhood poorly responding to therapy. The majority of cases show
|
8
|
+
inactivation of SMARCB1 (INI1, hSNF5, BAF47), a core member of the adenosine
|
9
|
+
triphosphate (ATP)-dependent SWI/SNF chromatin-remodeling complex. We here
|
10
|
+
report the case of a supratentorial AT/RT in a 9-month-old boy, which showed
|
11
|
+
retained SMARCB1 staining on immunohistochemistry and lacked genetic
|
12
|
+
alterations of SMARCB1. Instead, the tumor showed loss of protein expression of
|
13
|
+
another SWI/SNF chromatin-remodeling complex member, the ATPase subunit SMARCA4
|
14
|
+
(BRG1) due to a homozygous SMARCA4 mutation [c.2032C>T (p.Q678X)]. Our
|
15
|
+
findings highlight the role of SMARCA4 in the pathogenesis of SMARCB1-positive
|
16
|
+
AT/RT and the usefulness of antibodies directed against SMARCA4 in this
|
17
|
+
diagnostic setting.
|
18
|
+
EOF
|
19
|
+
|
20
|
+
class TestClass < Test::Unit::TestCase
|
21
|
+
|
22
|
+
def test_sentences
|
23
|
+
text =<<-EOF
|
24
|
+
This is a sentence.
|
25
|
+
A funky character ™ in a sentence.
|
26
|
+
This is a sentence.
|
27
|
+
This is a
|
28
|
+
sentence. This is
|
29
|
+
another sentence.
|
30
|
+
EOF
|
31
|
+
|
32
|
+
assert_equal 5, OpenNLP.sentence_splitter(text).length
|
33
|
+
assert_equal "This is a \nsentence.", OpenNLP.sentence_splitter(text)[3]
|
34
|
+
end
|
35
|
+
|
36
|
+
def test_text_sentences
|
37
|
+
Misc.benchmark(100) do
|
38
|
+
OpenNLP.sentence_splitter($text).include? "Our
|
39
|
+
findings highlight the role of SMARCA4 in the pathogenesis of SMARCB1-positive
|
40
|
+
AT/RT and the usefulness of antibodies directed against SMARCA4 in this
|
41
|
+
diagnostic setting."
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
data/test/test_helper.rb
CHANGED
metadata
CHANGED
@@ -1,103 +1,104 @@
|
|
1
|
-
--- !ruby/object:Gem::Specification
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
2
|
name: rbbt-text
|
3
|
-
version: !ruby/object:Gem::Version
|
4
|
-
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 1.0.0
|
5
5
|
prerelease:
|
6
|
-
segments:
|
7
|
-
- 0
|
8
|
-
- 6
|
9
|
-
- 3
|
10
|
-
version: 0.6.3
|
11
6
|
platform: ruby
|
12
|
-
authors:
|
7
|
+
authors:
|
13
8
|
- Miguel Vazquez
|
14
9
|
autorequire:
|
15
10
|
bindir: bin
|
16
11
|
cert_chain: []
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
- !ruby/object:Gem::Dependency
|
12
|
+
date: 2012-12-21 00:00:00.000000000 Z
|
13
|
+
dependencies:
|
14
|
+
- !ruby/object:Gem::Dependency
|
21
15
|
name: rbbt-util
|
22
|
-
|
23
|
-
requirement: &id001 !ruby/object:Gem::Requirement
|
16
|
+
requirement: !ruby/object:Gem::Requirement
|
24
17
|
none: false
|
25
|
-
requirements:
|
26
|
-
- -
|
27
|
-
- !ruby/object:Gem::Version
|
28
|
-
hash: 63
|
29
|
-
segments:
|
30
|
-
- 4
|
31
|
-
- 0
|
32
|
-
- 0
|
18
|
+
requirements:
|
19
|
+
- - ! '>='
|
20
|
+
- !ruby/object:Gem::Version
|
33
21
|
version: 4.0.0
|
34
22
|
type: :runtime
|
35
|
-
version_requirements: *id001
|
36
|
-
- !ruby/object:Gem::Dependency
|
37
|
-
name: stemmer
|
38
23
|
prerelease: false
|
39
|
-
|
24
|
+
version_requirements: !ruby/object:Gem::Requirement
|
25
|
+
none: false
|
26
|
+
requirements:
|
27
|
+
- - ! '>='
|
28
|
+
- !ruby/object:Gem::Version
|
29
|
+
version: 4.0.0
|
30
|
+
- !ruby/object:Gem::Dependency
|
31
|
+
name: stemmer
|
32
|
+
requirement: !ruby/object:Gem::Requirement
|
40
33
|
none: false
|
41
|
-
requirements:
|
42
|
-
- -
|
43
|
-
- !ruby/object:Gem::Version
|
44
|
-
|
45
|
-
segments:
|
46
|
-
- 0
|
47
|
-
version: "0"
|
34
|
+
requirements:
|
35
|
+
- - ! '>='
|
36
|
+
- !ruby/object:Gem::Version
|
37
|
+
version: '0'
|
48
38
|
type: :runtime
|
49
|
-
version_requirements: *id002
|
50
|
-
- !ruby/object:Gem::Dependency
|
51
|
-
name: libxml-ruby
|
52
39
|
prerelease: false
|
53
|
-
|
40
|
+
version_requirements: !ruby/object:Gem::Requirement
|
41
|
+
none: false
|
42
|
+
requirements:
|
43
|
+
- - ! '>='
|
44
|
+
- !ruby/object:Gem::Version
|
45
|
+
version: '0'
|
46
|
+
- !ruby/object:Gem::Dependency
|
47
|
+
name: libxml-ruby
|
48
|
+
requirement: !ruby/object:Gem::Requirement
|
54
49
|
none: false
|
55
|
-
requirements:
|
56
|
-
- -
|
57
|
-
- !ruby/object:Gem::Version
|
58
|
-
|
59
|
-
segments:
|
60
|
-
- 0
|
61
|
-
version: "0"
|
50
|
+
requirements:
|
51
|
+
- - ! '>='
|
52
|
+
- !ruby/object:Gem::Version
|
53
|
+
version: '0'
|
62
54
|
type: :runtime
|
63
|
-
version_requirements: *id003
|
64
|
-
- !ruby/object:Gem::Dependency
|
65
|
-
name: json
|
66
55
|
prerelease: false
|
67
|
-
|
56
|
+
version_requirements: !ruby/object:Gem::Requirement
|
57
|
+
none: false
|
58
|
+
requirements:
|
59
|
+
- - ! '>='
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '0'
|
62
|
+
- !ruby/object:Gem::Dependency
|
63
|
+
name: json
|
64
|
+
requirement: !ruby/object:Gem::Requirement
|
68
65
|
none: false
|
69
|
-
requirements:
|
70
|
-
- -
|
71
|
-
- !ruby/object:Gem::Version
|
72
|
-
|
73
|
-
segments:
|
74
|
-
- 0
|
75
|
-
version: "0"
|
66
|
+
requirements:
|
67
|
+
- - ! '>='
|
68
|
+
- !ruby/object:Gem::Version
|
69
|
+
version: '0'
|
76
70
|
type: :runtime
|
77
|
-
version_requirements: *id004
|
78
|
-
- !ruby/object:Gem::Dependency
|
79
|
-
name: rjb
|
80
71
|
prerelease: false
|
81
|
-
|
72
|
+
version_requirements: !ruby/object:Gem::Requirement
|
82
73
|
none: false
|
83
|
-
requirements:
|
84
|
-
- -
|
85
|
-
- !ruby/object:Gem::Version
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
74
|
+
requirements:
|
75
|
+
- - ! '>='
|
76
|
+
- !ruby/object:Gem::Version
|
77
|
+
version: '0'
|
78
|
+
- !ruby/object:Gem::Dependency
|
79
|
+
name: rjb
|
80
|
+
requirement: !ruby/object:Gem::Requirement
|
81
|
+
none: false
|
82
|
+
requirements:
|
83
|
+
- - ! '>='
|
84
|
+
- !ruby/object:Gem::Version
|
85
|
+
version: '0'
|
90
86
|
type: :runtime
|
91
|
-
|
92
|
-
|
87
|
+
prerelease: false
|
88
|
+
version_requirements: !ruby/object:Gem::Requirement
|
89
|
+
none: false
|
90
|
+
requirements:
|
91
|
+
- - ! '>='
|
92
|
+
- !ruby/object:Gem::Version
|
93
|
+
version: '0'
|
94
|
+
description: ! 'Text mining tools: named entity recognition and normalization, document
|
95
|
+
classification, bag-of-words, dictionaries, etc'
|
93
96
|
email: miguel.vazquez@fdi.ucm.es
|
94
|
-
executables:
|
97
|
+
executables:
|
95
98
|
- get_ppis.rb
|
96
99
|
extensions: []
|
97
|
-
|
98
100
|
extra_rdoc_files: []
|
99
|
-
|
100
|
-
files:
|
101
|
+
files:
|
101
102
|
- lib/rbbt/bow/bow.rb
|
102
103
|
- lib/rbbt/bow/dictionary.rb
|
103
104
|
- lib/rbbt/bow/misc.rb
|
@@ -110,6 +111,8 @@ files:
|
|
110
111
|
- lib/rbbt/ner/abner.rb
|
111
112
|
- lib/rbbt/ner/banner.rb
|
112
113
|
- lib/rbbt/ner/chemical_tagger.rb
|
114
|
+
- lib/rbbt/ner/finder.rb
|
115
|
+
- lib/rbbt/ner/linnaeus.rb
|
113
116
|
- lib/rbbt/ner/ngram_prefix_dictionary.rb
|
114
117
|
- lib/rbbt/ner/oscar3.rb
|
115
118
|
- lib/rbbt/ner/oscar4.rb
|
@@ -119,6 +122,7 @@ files:
|
|
119
122
|
- lib/rbbt/ner/rnorm/cue_index.rb
|
120
123
|
- lib/rbbt/ner/rnorm/tokens.rb
|
121
124
|
- lib/rbbt/ner/segment.rb
|
125
|
+
- lib/rbbt/ner/segment/docid.rb
|
122
126
|
- lib/rbbt/ner/segment/named_entity.rb
|
123
127
|
- lib/rbbt/ner/segment/relationship.rb
|
124
128
|
- lib/rbbt/ner/segment/segmented.rb
|
@@ -127,13 +131,16 @@ files:
|
|
127
131
|
- lib/rbbt/ner/token_trieNER.rb
|
128
132
|
- lib/rbbt/nlp/genia/sentence_splitter.rb
|
129
133
|
- lib/rbbt/nlp/nlp.rb
|
134
|
+
- lib/rbbt/nlp/open_nlp/sentence_splitter.rb
|
130
135
|
- share/install/software/ABNER
|
131
136
|
- share/install/software/BANNER
|
132
137
|
- share/install/software/ChemicalTagger
|
133
138
|
- share/install/software/Gdep
|
134
139
|
- share/install/software/Geniass
|
140
|
+
- share/install/software/Linnaeus
|
135
141
|
- share/install/software/OSCAR3
|
136
142
|
- share/install/software/OSCAR4
|
143
|
+
- share/install/software/OpenNLP
|
137
144
|
- share/install/software/StanfordParser
|
138
145
|
- share/patterns/drug_induce_disease
|
139
146
|
- share/rnorm/cue_default
|
@@ -157,44 +164,37 @@ files:
|
|
157
164
|
- test/rbbt/ner/test_oscar4.rb
|
158
165
|
- test/rbbt/ner/test_chemical_tagger.rb
|
159
166
|
- test/rbbt/ner/test_ngram_prefix_dictionary.rb
|
167
|
+
- test/rbbt/ner/test_finder.rb
|
168
|
+
- test/rbbt/ner/test_linnaeus.rb
|
169
|
+
- test/rbbt/entity/test_document.rb
|
160
170
|
- test/rbbt/nlp/test_nlp.rb
|
161
|
-
- test/rbbt/
|
162
|
-
- test/rbbt/corpus/test_document.rb
|
171
|
+
- test/rbbt/nlp/open_nlp/test_sentence_splitter.rb
|
163
172
|
- bin/get_ppis.rb
|
164
173
|
homepage: http://github.com/mikisvaz/rbbt-util
|
165
174
|
licenses: []
|
166
|
-
|
167
175
|
post_install_message:
|
168
176
|
rdoc_options: []
|
169
|
-
|
170
|
-
require_paths:
|
177
|
+
require_paths:
|
171
178
|
- lib
|
172
|
-
required_ruby_version: !ruby/object:Gem::Requirement
|
179
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
173
180
|
none: false
|
174
|
-
requirements:
|
175
|
-
- -
|
176
|
-
- !ruby/object:Gem::Version
|
177
|
-
|
178
|
-
|
179
|
-
- 0
|
180
|
-
version: "0"
|
181
|
-
required_rubygems_version: !ruby/object:Gem::Requirement
|
181
|
+
requirements:
|
182
|
+
- - ! '>='
|
183
|
+
- !ruby/object:Gem::Version
|
184
|
+
version: '0'
|
185
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
182
186
|
none: false
|
183
|
-
requirements:
|
184
|
-
- -
|
185
|
-
- !ruby/object:Gem::Version
|
186
|
-
|
187
|
-
segments:
|
188
|
-
- 0
|
189
|
-
version: "0"
|
187
|
+
requirements:
|
188
|
+
- - ! '>='
|
189
|
+
- !ruby/object:Gem::Version
|
190
|
+
version: '0'
|
190
191
|
requirements: []
|
191
|
-
|
192
192
|
rubyforge_project:
|
193
|
-
rubygems_version: 1.8.
|
193
|
+
rubygems_version: 1.8.24
|
194
194
|
signing_key:
|
195
195
|
specification_version: 3
|
196
196
|
summary: Text mining tools for the Ruby Bioinformatics Toolkit (rbbt)
|
197
|
-
test_files:
|
197
|
+
test_files:
|
198
198
|
- test/test_helper.rb
|
199
199
|
- test/rbbt/bow/test_bow.rb
|
200
200
|
- test/rbbt/bow/test_dictionary.rb
|
@@ -213,6 +213,8 @@ test_files:
|
|
213
213
|
- test/rbbt/ner/test_oscar4.rb
|
214
214
|
- test/rbbt/ner/test_chemical_tagger.rb
|
215
215
|
- test/rbbt/ner/test_ngram_prefix_dictionary.rb
|
216
|
+
- test/rbbt/ner/test_finder.rb
|
217
|
+
- test/rbbt/ner/test_linnaeus.rb
|
218
|
+
- test/rbbt/entity/test_document.rb
|
216
219
|
- test/rbbt/nlp/test_nlp.rb
|
217
|
-
- test/rbbt/
|
218
|
-
- test/rbbt/corpus/test_document.rb
|
220
|
+
- test/rbbt/nlp/open_nlp/test_sentence_splitter.rb
|