rbbt-text 0.6.3 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/rbbt/corpus/document.rb +1 -0
- data/lib/rbbt/entity/document.rb +62 -18
- data/lib/rbbt/ner/abner.rb +6 -3
- data/lib/rbbt/ner/banner.rb +10 -7
- data/lib/rbbt/ner/chemical_tagger.rb +5 -3
- data/lib/rbbt/ner/finder.rb +60 -0
- data/lib/rbbt/ner/linnaeus.rb +38 -0
- data/lib/rbbt/ner/ngram_prefix_dictionary.rb +42 -48
- data/lib/rbbt/ner/oscar3.rb +9 -6
- data/lib/rbbt/ner/oscar4.rb +21 -7
- data/lib/rbbt/ner/rnorm.rb +57 -33
- data/lib/rbbt/ner/rnorm/cue_index.rb +4 -3
- data/lib/rbbt/ner/rnorm/tokens.rb +10 -4
- data/lib/rbbt/ner/segment.rb +19 -8
- data/lib/rbbt/ner/segment/docid.rb +46 -0
- data/lib/rbbt/ner/segment/named_entity.rb +1 -1
- data/lib/rbbt/ner/segment/transformed.rb +5 -3
- data/lib/rbbt/nlp/genia/sentence_splitter.rb +22 -1
- data/lib/rbbt/nlp/open_nlp/sentence_splitter.rb +74 -0
- data/share/install/software/Linnaeus +21 -0
- data/share/install/software/OpenNLP +12 -0
- data/share/rnorm/tokens_default +1 -2
- data/test/rbbt/entity/test_document.rb +66 -0
- data/test/rbbt/ner/segment/test_transformed.rb +10 -0
- data/test/rbbt/ner/test_finder.rb +34 -0
- data/test/rbbt/ner/test_linnaeus.rb +16 -0
- data/test/rbbt/ner/test_ngram_prefix_dictionary.rb +22 -0
- data/test/rbbt/ner/test_oscar4.rb +3 -3
- data/test/rbbt/ner/test_rnorm.rb +3 -3
- data/test/rbbt/nlp/open_nlp/test_sentence_splitter.rb +45 -0
- data/test/test_helper.rb +1 -1
- metadata +101 -99
- data/test/rbbt/corpus/test_corpus.rb +0 -99
- data/test/rbbt/corpus/test_document.rb +0 -236
@@ -101,7 +101,7 @@ module NLP
|
|
101
101
|
a, b, d, c = $1, $2, $3, $4
|
102
102
|
events << eventCount.to_s << "\t"
|
103
103
|
events << returnFeatures(a, b, c)
|
104
|
-
(" "
|
104
|
+
(" " << a << b << "__" << eventCount.to_s << "____" << d << "__" << c << " ")
|
105
105
|
}
|
106
106
|
eventCount += 1
|
107
107
|
end
|
@@ -111,6 +111,27 @@ module NLP
|
|
111
111
|
[events, marks]
|
112
112
|
end
|
113
113
|
|
114
|
+
def self.event_extraction(text)
|
115
|
+
events = ""
|
116
|
+
marks = ""
|
117
|
+
|
118
|
+
eventCount = 0
|
119
|
+
|
120
|
+
pat = / ([^ ]+)([.!\?\)\]\"])( +)([^ ]+) /
|
121
|
+
for line in text.split(/\n/) do
|
122
|
+
while line.match(pat) do
|
123
|
+
a, b, d, c = $1, $2, $3, $4
|
124
|
+
events << eventCount.to_s << "\t"
|
125
|
+
events << returnFeatures(a, b, c)
|
126
|
+
line = $` + (" " << a << b << "__" << eventCount.to_s << "____" << d << "__" << c << " ") << $'
|
127
|
+
eventCount += 1
|
128
|
+
end
|
129
|
+
marks << line
|
130
|
+
end
|
131
|
+
|
132
|
+
[events, marks]
|
133
|
+
end
|
134
|
+
|
114
135
|
def self.process_labels(marked_text, labels)
|
115
136
|
out = ""
|
116
137
|
|
@@ -0,0 +1,74 @@
|
|
1
|
+
require 'rbbt'
|
2
|
+
require 'rjb'
|
3
|
+
require 'rbbt/ner/segment'
|
4
|
+
require 'rbbt/resource'
|
5
|
+
|
6
|
+
module OpenNLP
|
7
|
+
Rbbt.claim Rbbt.software.opt.OpenNLP, :install, Rbbt.share.install.software.OpenNLP.find
|
8
|
+
|
9
|
+
Rbbt.claim Rbbt.software.opt.OpenNLP.models["da-sent.bin"], :url, "http://opennlp.sourceforge.net/models-1.5/de-sent.bin"
|
10
|
+
|
11
|
+
MAX = 5
|
12
|
+
|
13
|
+
@@FileInputStream = Rjb::import('java.io.FileInputStream')
|
14
|
+
@@SentenceModel = Rjb::import('opennlp.tools.sentdetect.SentenceModel')
|
15
|
+
@@SentenceDetectorME = Rjb::import('opennlp.tools.sentdetect.SentenceDetectorME')
|
16
|
+
|
17
|
+
def self.sentence_split_detector
|
18
|
+
@@sentence_split_detector ||= begin
|
19
|
+
modelIn = @@FileInputStream.new(Rbbt.software.opt.OpenNLP.models["da-sent.bin"].produce.find);
|
20
|
+
|
21
|
+
model = @@SentenceModel.new(modelIn);
|
22
|
+
modelIn.close()
|
23
|
+
model
|
24
|
+
|
25
|
+
@@SentenceDetectorME.new(model)
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
def self.sentence_splitter(text)
|
30
|
+
return [] if text.nil? or text.empty?
|
31
|
+
|
32
|
+
last = 0
|
33
|
+
begin
|
34
|
+
sentence_split_detector = self.sentence_split_detector
|
35
|
+
|
36
|
+
sentences = nil
|
37
|
+
TmpFile.with_file do |tmpfile|
|
38
|
+
start_time = Time.now
|
39
|
+
|
40
|
+
begin
|
41
|
+
pid = Process.fork do
|
42
|
+
sent = sentence_split_detector.sentDetect(text)
|
43
|
+
Open.write(tmpfile, sent * "#OpenNLP:SENTENCE#")
|
44
|
+
end
|
45
|
+
|
46
|
+
while not Process.waitpid(pid)
|
47
|
+
if Time.now - start_time > MAX
|
48
|
+
Process.kill(9, pid)
|
49
|
+
raise "Taking to long (> #{MAX} seconds)"
|
50
|
+
end
|
51
|
+
sleep 0.1
|
52
|
+
end
|
53
|
+
|
54
|
+
begin
|
55
|
+
Process.waitpid(pid)
|
56
|
+
end
|
57
|
+
rescue Errno::ECHILD
|
58
|
+
end
|
59
|
+
|
60
|
+
sentences = Open.read(tmpfile).split("#OpenNLP:SENTENCE#")
|
61
|
+
end
|
62
|
+
|
63
|
+
sentences.collect{|sentence|
|
64
|
+
start = text.index(sentence, last)
|
65
|
+
Segment.setup sentence, start
|
66
|
+
last = start + sentence.length - 1
|
67
|
+
sentence
|
68
|
+
}
|
69
|
+
rescue Exception
|
70
|
+
raise $!
|
71
|
+
raise "Sentence splitter raised exception: #{$!.message}"
|
72
|
+
end
|
73
|
+
end
|
74
|
+
end
|
@@ -0,0 +1,21 @@
|
|
1
|
+
#!/bin/bash
|
2
|
+
|
3
|
+
name='Linnaeus'
|
4
|
+
url="http://sourceforge.net/projects/linnaeus/files/Linnaeus/linnaeus-2.0.tar.gz/download"
|
5
|
+
species_url="http://sourceforge.net/projects/linnaeus/files/Entity_packs/species-proxy-1.2.tar.gz/download"
|
6
|
+
|
7
|
+
install_src "$name" "$url"
|
8
|
+
ln -s "$OPT_DIR/$name/bin/"*.jar "$OPT_JAR_DIR/$name.jar"
|
9
|
+
|
10
|
+
echo "GET SPECIES" > /tmp/foo
|
11
|
+
get_src "Linnaeus-species-proxy" "$species_url"
|
12
|
+
pkg_dir="`opt_dir \"$name\"`"
|
13
|
+
build_dir=`build_dir`
|
14
|
+
echo $pkg_dir >> /tmp/foo
|
15
|
+
echo $build_dir >> /tmp/foo
|
16
|
+
mv "$build_dir" "$pkg_dir"
|
17
|
+
tmp_file="/tmp/species-proxy-properties.tmp"
|
18
|
+
cat "$pkg_dir/species-proxy/properties.conf" |grep -v "^.dir =" >> $tmp_file
|
19
|
+
echo "\$dir = $pkg_dir/species-proxy/" > "$pkg_dir/species-proxy/properties.conf"
|
20
|
+
cat $tmp_file | grep -v "^#" >> "$pkg_dir/species-proxy/properties.conf"
|
21
|
+
|
data/share/rnorm/tokens_default
CHANGED
@@ -1,6 +1,5 @@
|
|
1
1
|
require 'rbbt/util/misc'
|
2
2
|
|
3
|
-
|
4
3
|
plural = Proc.new do |t| t.sub(/s$/,'') end
|
5
4
|
|
6
5
|
tokens do
|
@@ -14,7 +13,7 @@ tokens do
|
|
14
13
|
|
15
14
|
# Some words for removal
|
16
15
|
stopword do |w| $stopwords.include?( w.downcase_first) end
|
17
|
-
gene /genes?/i
|
16
|
+
gene /genes?/i
|
18
17
|
dna
|
19
18
|
cdna
|
20
19
|
rna
|
@@ -0,0 +1,66 @@
|
|
1
|
+
require File.join(File.expand_path(File.dirname(__FILE__)), '../../test_helper.rb')
|
2
|
+
require 'rbbt/entity'
|
3
|
+
require 'rbbt/entity/pmid'
|
4
|
+
require 'rbbt/entity/document'
|
5
|
+
require 'test/unit'
|
6
|
+
|
7
|
+
require 'rbbt/workflow'
|
8
|
+
|
9
|
+
Workflow.require_workflow "TextMining"
|
10
|
+
|
11
|
+
module Document
|
12
|
+
self.corpus = Persist.open_tokyocabinet("/tmp/corpus", false, :string, "BDB")
|
13
|
+
|
14
|
+
property :banner => :single do |*args|
|
15
|
+
normalize, organism = args
|
16
|
+
TextMining.job(:gene_mention_recognition, "Factoid", :text => text, :method => :banner, :normalize => normalize, :organism => organism).exec.each{|e| SegmentWithDocid.setup(e, self.docid)}
|
17
|
+
end
|
18
|
+
|
19
|
+
property :abner => :single do |*args|
|
20
|
+
normalize, organism = args
|
21
|
+
TextMining.job(:gene_mention_recognition, "Factoid", :text => text, :method => :banner, :normalize => normalize, :organism => organism).exec.each{|e| SegmentWithDocid.setup(e, self.docid)}
|
22
|
+
end
|
23
|
+
|
24
|
+
persist :abner, :annotations, :dir => Rbbt.tmp.test.find(:user).entity_property
|
25
|
+
end
|
26
|
+
|
27
|
+
class TestDocument < Test::Unit::TestCase
|
28
|
+
def test_pmid
|
29
|
+
pmid = "21904853"
|
30
|
+
PMID.setup(pmid)
|
31
|
+
|
32
|
+
assert_match /^PMID/, pmid.id
|
33
|
+
assert_match /TET2/, pmid.text
|
34
|
+
end
|
35
|
+
|
36
|
+
def test_abner
|
37
|
+
pmid = "21904853"
|
38
|
+
PMID.setup(pmid)
|
39
|
+
|
40
|
+
genes = pmid.abner.reject{|ne| ne.offset.nil?}
|
41
|
+
genes.each do |ne|
|
42
|
+
orig = ne
|
43
|
+
orig_range = ne.range
|
44
|
+
ne.mask
|
45
|
+
assert ne.masked?
|
46
|
+
assert ne =~ /^MASKED/
|
47
|
+
assert_equal orig_range, ne.range
|
48
|
+
assert_equal ne, ne.unmask
|
49
|
+
end
|
50
|
+
assert pmid.abner.include? "TET2"
|
51
|
+
end
|
52
|
+
|
53
|
+
def test_free_text
|
54
|
+
text = "Free text including a mention to TET2."
|
55
|
+
Document.setup(text)
|
56
|
+
|
57
|
+
assert text.abner.include? "TET2"
|
58
|
+
|
59
|
+
docid = text.docid
|
60
|
+
assert_match /TET2/, Document.setup(docid).text
|
61
|
+
|
62
|
+
assert Document.setup(docid).abner.include? "TET2"
|
63
|
+
end
|
64
|
+
end
|
65
|
+
|
66
|
+
|
@@ -74,6 +74,16 @@ class TestClass < Test::Unit::TestCase
|
|
74
74
|
assert_equal original.gsub(/TP53/, 'GN'), a
|
75
75
|
end
|
76
76
|
|
77
|
+
Transformed.with_transform(a, [gene1], "GN") do
|
78
|
+
Transformed.with_transform(a, [gene2], "LONG_GENE_PLACEHOLDER") do
|
79
|
+
assert_equal original.gsub(/TP53/, 'GN').sub('CDK5R1', "LONG_GENE_PLACEHOLDER"), a
|
80
|
+
end
|
81
|
+
assert_equal original.gsub(/TP53/, 'GN'), a
|
82
|
+
end
|
83
|
+
|
84
|
+
assert_equal original, a
|
85
|
+
|
86
|
+
|
77
87
|
assert_equal original, a
|
78
88
|
|
79
89
|
exp1, exp2 = nil, nil
|
@@ -0,0 +1,34 @@
|
|
1
|
+
require File.join(File.expand_path(File.dirname(__FILE__)), '../../test_helper.rb')
|
2
|
+
require 'rbbt/entity'
|
3
|
+
require 'rbbt/ner/finder'
|
4
|
+
require 'rbbt/ner/finder'
|
5
|
+
require 'rbbt/sources/organism'
|
6
|
+
require 'rbbt/sources/kegg'
|
7
|
+
require 'rbbt/sources/NCI'
|
8
|
+
|
9
|
+
class TestFinder < Test::Unit::TestCase
|
10
|
+
|
11
|
+
def _test_namespace_and_format
|
12
|
+
#f = Finder.new(CMD.cmd("head -n 1000", :in => Open.open(Organism.identifiers("Hsa/jun2011").find)))
|
13
|
+
f = Finder.new(KEGG.pathways, :grep => "^hsa")
|
14
|
+
assert_equal "Hsa/jun2011", f.instances.first.namespace
|
15
|
+
assert_equal "Ensembl Gene ID", f.instances.first.format
|
16
|
+
end
|
17
|
+
|
18
|
+
def _test_find
|
19
|
+
f = Finder.new(Organism.lexicon("Hsa/jun2011"), :grep => ["SF3B1"])
|
20
|
+
|
21
|
+
assert_equal "ENSG00000115524", f.find("SF3B1").first
|
22
|
+
if defined? Entity
|
23
|
+
ddd f.find("SF3B1").first.info
|
24
|
+
assert_equal "Ensembl Gene ID", f.find("SF3B1").first.format
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
def test_find
|
29
|
+
f = Finder.new(Organism.lexicon("Hsa/jun2011"), :grep => ["RASGRF2"])
|
30
|
+
|
31
|
+
ddd f.find("RAS").collect{|m| m.info}
|
32
|
+
end
|
33
|
+
|
34
|
+
end
|
@@ -0,0 +1,16 @@
|
|
1
|
+
require File.join(File.expand_path(File.dirname(__FILE__)), '../../test_helper.rb')
|
2
|
+
require 'rbbt/ner/linnaeus'
|
3
|
+
require 'test/unit'
|
4
|
+
|
5
|
+
class TestLinnaeus < Test::Unit::TestCase
|
6
|
+
|
7
|
+
def test_match
|
8
|
+
begin
|
9
|
+
mentions = Linnaeus.match("Human HeLa cells and murine models")
|
10
|
+
["Human", "HeLa cells", "murine"].each{|mention|
|
11
|
+
assert(mentions.include? mention)
|
12
|
+
}
|
13
|
+
rescue
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
@@ -3,6 +3,7 @@ require 'rbbt/ner/ngram_prefix_dictionary'
|
|
3
3
|
require 'rbbt/util/tmpfile'
|
4
4
|
|
5
5
|
class TestNGramPrefixDictionary < Test::Unit::TestCase
|
6
|
+
|
6
7
|
def test_match
|
7
8
|
lexicon =<<-EOF
|
8
9
|
C1;aa;AA;bb b
|
@@ -17,6 +18,27 @@ C2;11;22;3 3;bb
|
|
17
18
|
end
|
18
19
|
end
|
19
20
|
|
21
|
+
def test_case_insensitive_match
|
22
|
+
lexicon =<<-EOF
|
23
|
+
C1;aa
|
24
|
+
C2;bb
|
25
|
+
EOF
|
26
|
+
|
27
|
+
TmpFile.with_file(lexicon) do |file|
|
28
|
+
index = NGramPrefixDictionary.new(TSV.open(file, :flat, :sep => ';'), "test", true)
|
29
|
+
|
30
|
+
matches = index.match('AA oo')
|
31
|
+
assert matches.select{|m| m.code.include? 'C1'}.any?
|
32
|
+
assert matches.include? 'AA'
|
33
|
+
|
34
|
+
matches = index.match('AA')
|
35
|
+
assert matches.select{|m| m.code.include? 'C1'}.any?
|
36
|
+
assert matches.include? 'AA'
|
37
|
+
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
|
20
42
|
def test_stream
|
21
43
|
lexicon =<<-EOF
|
22
44
|
C1;aa;AA;bb b
|
@@ -8,9 +8,9 @@ class TestOSCAR4 < Test::Unit::TestCase
|
|
8
8
|
def test_match
|
9
9
|
begin
|
10
10
|
ner = OSCAR4.new
|
11
|
-
str = "Alternatively, rearrangement of O-(w-haloalkyl)esters 34 of 2-carboethoxy-N-hydroxypyridine-2-selone affords azonianaphthalenium halides 37 in 79% yield"
|
11
|
+
str = "Alternatively, CO2 rearrangement of O-(w-haloalkyl)esters 34 of 2-carboethoxy-N-hydroxypyridine-2-selone affords azonianaphthalenium halides 37 in 79% yield"
|
12
12
|
|
13
|
-
mentions = ner.match(str, "CM"
|
13
|
+
mentions = ner.match(str, "CM")
|
14
14
|
good_mentions = ["2-carboethoxy-N-hydroxypyridine-2-selone", "O-(w-haloalkyl)esters"]
|
15
15
|
|
16
16
|
good_mentions.each{|mention|
|
@@ -22,7 +22,7 @@ class TestOSCAR4 < Test::Unit::TestCase
|
|
22
22
|
end
|
23
23
|
end
|
24
24
|
|
25
|
-
def
|
25
|
+
def _test_ranges
|
26
26
|
begin
|
27
27
|
ner = OSCAR4.new
|
28
28
|
str =<<-EOF
|
data/test/rbbt/ner/test_rnorm.rb
CHANGED
@@ -25,9 +25,9 @@ S000000376 AAA GENE1 DDD
|
|
25
25
|
|
26
26
|
def test_match
|
27
27
|
assert_equal(["S000000029"], @norm.match("FUN21"))
|
28
|
-
assert_equal(["S000000030", "S000000029", "S000000031"], @norm.match("FUN"))
|
29
|
-
assert_equal(["S000000030", "S000000029", "S000000031"], @norm.match("FUN 2"))
|
30
|
-
assert_equal(["S000000030", "S000000029", "S000000031"], @norm.match("FUN 21"))
|
28
|
+
assert_equal(["S000000030", "S000000029", "S000000031"].sort, @norm.match("FUN").sort)
|
29
|
+
assert_equal(["S000000030", "S000000029", "S000000031"].sort, @norm.match("FUN 2").sort)
|
30
|
+
assert_equal(["S000000030", "S000000029", "S000000031"].sort, @norm.match("FUN 21").sort)
|
31
31
|
assert_equal([], @norm.match("GER4"))
|
32
32
|
|
33
33
|
@norm.match("FUN21")
|
@@ -0,0 +1,45 @@
|
|
1
|
+
require File.join(File.expand_path(File.dirname(__FILE__)), '../../..', 'test_helper.rb')
|
2
|
+
require 'rbbt/nlp/open_nlp/sentence_splitter'
|
3
|
+
require 'rbbt/ner/segment'
|
4
|
+
|
5
|
+
$text=<<-EOF
|
6
|
+
Atypical teratoid/rhabdoid tumors (AT/RTs) are highly aggressive brain tumors
|
7
|
+
of early childhood poorly responding to therapy. The majority of cases show
|
8
|
+
inactivation of SMARCB1 (INI1, hSNF5, BAF47), a core member of the adenosine
|
9
|
+
triphosphate (ATP)-dependent SWI/SNF chromatin-remodeling complex. We here
|
10
|
+
report the case of a supratentorial AT/RT in a 9-month-old boy, which showed
|
11
|
+
retained SMARCB1 staining on immunohistochemistry and lacked genetic
|
12
|
+
alterations of SMARCB1. Instead, the tumor showed loss of protein expression of
|
13
|
+
another SWI/SNF chromatin-remodeling complex member, the ATPase subunit SMARCA4
|
14
|
+
(BRG1) due to a homozygous SMARCA4 mutation [c.2032C>T (p.Q678X)]. Our
|
15
|
+
findings highlight the role of SMARCA4 in the pathogenesis of SMARCB1-positive
|
16
|
+
AT/RT and the usefulness of antibodies directed against SMARCA4 in this
|
17
|
+
diagnostic setting.
|
18
|
+
EOF
|
19
|
+
|
20
|
+
class TestClass < Test::Unit::TestCase
|
21
|
+
|
22
|
+
def test_sentences
|
23
|
+
text =<<-EOF
|
24
|
+
This is a sentence.
|
25
|
+
A funky character ™ in a sentence.
|
26
|
+
This is a sentence.
|
27
|
+
This is a
|
28
|
+
sentence. This is
|
29
|
+
another sentence.
|
30
|
+
EOF
|
31
|
+
|
32
|
+
assert_equal 5, OpenNLP.sentence_splitter(text).length
|
33
|
+
assert_equal "This is a \nsentence.", OpenNLP.sentence_splitter(text)[3]
|
34
|
+
end
|
35
|
+
|
36
|
+
def test_text_sentences
|
37
|
+
Misc.benchmark(100) do
|
38
|
+
OpenNLP.sentence_splitter($text).include? "Our
|
39
|
+
findings highlight the role of SMARCA4 in the pathogenesis of SMARCB1-positive
|
40
|
+
AT/RT and the usefulness of antibodies directed against SMARCA4 in this
|
41
|
+
diagnostic setting."
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
data/test/test_helper.rb
CHANGED
metadata
CHANGED
@@ -1,103 +1,104 @@
|
|
1
|
-
--- !ruby/object:Gem::Specification
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
2
|
name: rbbt-text
|
3
|
-
version: !ruby/object:Gem::Version
|
4
|
-
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 1.0.0
|
5
5
|
prerelease:
|
6
|
-
segments:
|
7
|
-
- 0
|
8
|
-
- 6
|
9
|
-
- 3
|
10
|
-
version: 0.6.3
|
11
6
|
platform: ruby
|
12
|
-
authors:
|
7
|
+
authors:
|
13
8
|
- Miguel Vazquez
|
14
9
|
autorequire:
|
15
10
|
bindir: bin
|
16
11
|
cert_chain: []
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
- !ruby/object:Gem::Dependency
|
12
|
+
date: 2012-12-21 00:00:00.000000000 Z
|
13
|
+
dependencies:
|
14
|
+
- !ruby/object:Gem::Dependency
|
21
15
|
name: rbbt-util
|
22
|
-
|
23
|
-
requirement: &id001 !ruby/object:Gem::Requirement
|
16
|
+
requirement: !ruby/object:Gem::Requirement
|
24
17
|
none: false
|
25
|
-
requirements:
|
26
|
-
- -
|
27
|
-
- !ruby/object:Gem::Version
|
28
|
-
hash: 63
|
29
|
-
segments:
|
30
|
-
- 4
|
31
|
-
- 0
|
32
|
-
- 0
|
18
|
+
requirements:
|
19
|
+
- - ! '>='
|
20
|
+
- !ruby/object:Gem::Version
|
33
21
|
version: 4.0.0
|
34
22
|
type: :runtime
|
35
|
-
version_requirements: *id001
|
36
|
-
- !ruby/object:Gem::Dependency
|
37
|
-
name: stemmer
|
38
23
|
prerelease: false
|
39
|
-
|
24
|
+
version_requirements: !ruby/object:Gem::Requirement
|
25
|
+
none: false
|
26
|
+
requirements:
|
27
|
+
- - ! '>='
|
28
|
+
- !ruby/object:Gem::Version
|
29
|
+
version: 4.0.0
|
30
|
+
- !ruby/object:Gem::Dependency
|
31
|
+
name: stemmer
|
32
|
+
requirement: !ruby/object:Gem::Requirement
|
40
33
|
none: false
|
41
|
-
requirements:
|
42
|
-
- -
|
43
|
-
- !ruby/object:Gem::Version
|
44
|
-
|
45
|
-
segments:
|
46
|
-
- 0
|
47
|
-
version: "0"
|
34
|
+
requirements:
|
35
|
+
- - ! '>='
|
36
|
+
- !ruby/object:Gem::Version
|
37
|
+
version: '0'
|
48
38
|
type: :runtime
|
49
|
-
version_requirements: *id002
|
50
|
-
- !ruby/object:Gem::Dependency
|
51
|
-
name: libxml-ruby
|
52
39
|
prerelease: false
|
53
|
-
|
40
|
+
version_requirements: !ruby/object:Gem::Requirement
|
41
|
+
none: false
|
42
|
+
requirements:
|
43
|
+
- - ! '>='
|
44
|
+
- !ruby/object:Gem::Version
|
45
|
+
version: '0'
|
46
|
+
- !ruby/object:Gem::Dependency
|
47
|
+
name: libxml-ruby
|
48
|
+
requirement: !ruby/object:Gem::Requirement
|
54
49
|
none: false
|
55
|
-
requirements:
|
56
|
-
- -
|
57
|
-
- !ruby/object:Gem::Version
|
58
|
-
|
59
|
-
segments:
|
60
|
-
- 0
|
61
|
-
version: "0"
|
50
|
+
requirements:
|
51
|
+
- - ! '>='
|
52
|
+
- !ruby/object:Gem::Version
|
53
|
+
version: '0'
|
62
54
|
type: :runtime
|
63
|
-
version_requirements: *id003
|
64
|
-
- !ruby/object:Gem::Dependency
|
65
|
-
name: json
|
66
55
|
prerelease: false
|
67
|
-
|
56
|
+
version_requirements: !ruby/object:Gem::Requirement
|
57
|
+
none: false
|
58
|
+
requirements:
|
59
|
+
- - ! '>='
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '0'
|
62
|
+
- !ruby/object:Gem::Dependency
|
63
|
+
name: json
|
64
|
+
requirement: !ruby/object:Gem::Requirement
|
68
65
|
none: false
|
69
|
-
requirements:
|
70
|
-
- -
|
71
|
-
- !ruby/object:Gem::Version
|
72
|
-
|
73
|
-
segments:
|
74
|
-
- 0
|
75
|
-
version: "0"
|
66
|
+
requirements:
|
67
|
+
- - ! '>='
|
68
|
+
- !ruby/object:Gem::Version
|
69
|
+
version: '0'
|
76
70
|
type: :runtime
|
77
|
-
version_requirements: *id004
|
78
|
-
- !ruby/object:Gem::Dependency
|
79
|
-
name: rjb
|
80
71
|
prerelease: false
|
81
|
-
|
72
|
+
version_requirements: !ruby/object:Gem::Requirement
|
82
73
|
none: false
|
83
|
-
requirements:
|
84
|
-
- -
|
85
|
-
- !ruby/object:Gem::Version
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
74
|
+
requirements:
|
75
|
+
- - ! '>='
|
76
|
+
- !ruby/object:Gem::Version
|
77
|
+
version: '0'
|
78
|
+
- !ruby/object:Gem::Dependency
|
79
|
+
name: rjb
|
80
|
+
requirement: !ruby/object:Gem::Requirement
|
81
|
+
none: false
|
82
|
+
requirements:
|
83
|
+
- - ! '>='
|
84
|
+
- !ruby/object:Gem::Version
|
85
|
+
version: '0'
|
90
86
|
type: :runtime
|
91
|
-
|
92
|
-
|
87
|
+
prerelease: false
|
88
|
+
version_requirements: !ruby/object:Gem::Requirement
|
89
|
+
none: false
|
90
|
+
requirements:
|
91
|
+
- - ! '>='
|
92
|
+
- !ruby/object:Gem::Version
|
93
|
+
version: '0'
|
94
|
+
description: ! 'Text mining tools: named entity recognition and normalization, document
|
95
|
+
classification, bag-of-words, dictionaries, etc'
|
93
96
|
email: miguel.vazquez@fdi.ucm.es
|
94
|
-
executables:
|
97
|
+
executables:
|
95
98
|
- get_ppis.rb
|
96
99
|
extensions: []
|
97
|
-
|
98
100
|
extra_rdoc_files: []
|
99
|
-
|
100
|
-
files:
|
101
|
+
files:
|
101
102
|
- lib/rbbt/bow/bow.rb
|
102
103
|
- lib/rbbt/bow/dictionary.rb
|
103
104
|
- lib/rbbt/bow/misc.rb
|
@@ -110,6 +111,8 @@ files:
|
|
110
111
|
- lib/rbbt/ner/abner.rb
|
111
112
|
- lib/rbbt/ner/banner.rb
|
112
113
|
- lib/rbbt/ner/chemical_tagger.rb
|
114
|
+
- lib/rbbt/ner/finder.rb
|
115
|
+
- lib/rbbt/ner/linnaeus.rb
|
113
116
|
- lib/rbbt/ner/ngram_prefix_dictionary.rb
|
114
117
|
- lib/rbbt/ner/oscar3.rb
|
115
118
|
- lib/rbbt/ner/oscar4.rb
|
@@ -119,6 +122,7 @@ files:
|
|
119
122
|
- lib/rbbt/ner/rnorm/cue_index.rb
|
120
123
|
- lib/rbbt/ner/rnorm/tokens.rb
|
121
124
|
- lib/rbbt/ner/segment.rb
|
125
|
+
- lib/rbbt/ner/segment/docid.rb
|
122
126
|
- lib/rbbt/ner/segment/named_entity.rb
|
123
127
|
- lib/rbbt/ner/segment/relationship.rb
|
124
128
|
- lib/rbbt/ner/segment/segmented.rb
|
@@ -127,13 +131,16 @@ files:
|
|
127
131
|
- lib/rbbt/ner/token_trieNER.rb
|
128
132
|
- lib/rbbt/nlp/genia/sentence_splitter.rb
|
129
133
|
- lib/rbbt/nlp/nlp.rb
|
134
|
+
- lib/rbbt/nlp/open_nlp/sentence_splitter.rb
|
130
135
|
- share/install/software/ABNER
|
131
136
|
- share/install/software/BANNER
|
132
137
|
- share/install/software/ChemicalTagger
|
133
138
|
- share/install/software/Gdep
|
134
139
|
- share/install/software/Geniass
|
140
|
+
- share/install/software/Linnaeus
|
135
141
|
- share/install/software/OSCAR3
|
136
142
|
- share/install/software/OSCAR4
|
143
|
+
- share/install/software/OpenNLP
|
137
144
|
- share/install/software/StanfordParser
|
138
145
|
- share/patterns/drug_induce_disease
|
139
146
|
- share/rnorm/cue_default
|
@@ -157,44 +164,37 @@ files:
|
|
157
164
|
- test/rbbt/ner/test_oscar4.rb
|
158
165
|
- test/rbbt/ner/test_chemical_tagger.rb
|
159
166
|
- test/rbbt/ner/test_ngram_prefix_dictionary.rb
|
167
|
+
- test/rbbt/ner/test_finder.rb
|
168
|
+
- test/rbbt/ner/test_linnaeus.rb
|
169
|
+
- test/rbbt/entity/test_document.rb
|
160
170
|
- test/rbbt/nlp/test_nlp.rb
|
161
|
-
- test/rbbt/
|
162
|
-
- test/rbbt/corpus/test_document.rb
|
171
|
+
- test/rbbt/nlp/open_nlp/test_sentence_splitter.rb
|
163
172
|
- bin/get_ppis.rb
|
164
173
|
homepage: http://github.com/mikisvaz/rbbt-util
|
165
174
|
licenses: []
|
166
|
-
|
167
175
|
post_install_message:
|
168
176
|
rdoc_options: []
|
169
|
-
|
170
|
-
require_paths:
|
177
|
+
require_paths:
|
171
178
|
- lib
|
172
|
-
required_ruby_version: !ruby/object:Gem::Requirement
|
179
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
173
180
|
none: false
|
174
|
-
requirements:
|
175
|
-
- -
|
176
|
-
- !ruby/object:Gem::Version
|
177
|
-
|
178
|
-
|
179
|
-
- 0
|
180
|
-
version: "0"
|
181
|
-
required_rubygems_version: !ruby/object:Gem::Requirement
|
181
|
+
requirements:
|
182
|
+
- - ! '>='
|
183
|
+
- !ruby/object:Gem::Version
|
184
|
+
version: '0'
|
185
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
182
186
|
none: false
|
183
|
-
requirements:
|
184
|
-
- -
|
185
|
-
- !ruby/object:Gem::Version
|
186
|
-
|
187
|
-
segments:
|
188
|
-
- 0
|
189
|
-
version: "0"
|
187
|
+
requirements:
|
188
|
+
- - ! '>='
|
189
|
+
- !ruby/object:Gem::Version
|
190
|
+
version: '0'
|
190
191
|
requirements: []
|
191
|
-
|
192
192
|
rubyforge_project:
|
193
|
-
rubygems_version: 1.8.
|
193
|
+
rubygems_version: 1.8.24
|
194
194
|
signing_key:
|
195
195
|
specification_version: 3
|
196
196
|
summary: Text mining tools for the Ruby Bioinformatics Toolkit (rbbt)
|
197
|
-
test_files:
|
197
|
+
test_files:
|
198
198
|
- test/test_helper.rb
|
199
199
|
- test/rbbt/bow/test_bow.rb
|
200
200
|
- test/rbbt/bow/test_dictionary.rb
|
@@ -213,6 +213,8 @@ test_files:
|
|
213
213
|
- test/rbbt/ner/test_oscar4.rb
|
214
214
|
- test/rbbt/ner/test_chemical_tagger.rb
|
215
215
|
- test/rbbt/ner/test_ngram_prefix_dictionary.rb
|
216
|
+
- test/rbbt/ner/test_finder.rb
|
217
|
+
- test/rbbt/ner/test_linnaeus.rb
|
218
|
+
- test/rbbt/entity/test_document.rb
|
216
219
|
- test/rbbt/nlp/test_nlp.rb
|
217
|
-
- test/rbbt/
|
218
|
-
- test/rbbt/corpus/test_document.rb
|
220
|
+
- test/rbbt/nlp/open_nlp/test_sentence_splitter.rb
|