rbbt 1.1.8 → 1.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.rdoc +12 -12
- data/bin/rbbt_config +2 -3
- data/install_scripts/norm/Rakefile +4 -4
- data/install_scripts/organisms/{tair.Rakefile → Ath.Rakefile} +4 -3
- data/install_scripts/organisms/{cgd.Rakefile → Cal.Rakefile} +0 -0
- data/install_scripts/organisms/{worm.Rakefile → Cel.Rakefile} +0 -0
- data/install_scripts/organisms/{human.Rakefile → Hsa.Rakefile} +4 -8
- data/install_scripts/organisms/{mgi.Rakefile → Mmu.Rakefile} +0 -0
- data/install_scripts/organisms/{rgd.Rakefile → Rno.Rakefile} +0 -0
- data/install_scripts/organisms/{sgd.Rakefile → Sce.Rakefile} +0 -0
- data/install_scripts/organisms/{pombe.Rakefile → Spo.Rakefile} +0 -0
- data/install_scripts/organisms/rake-include.rb +15 -19
- data/lib/rbbt.rb +0 -3
- data/lib/rbbt/ner/rnorm.rb +2 -2
- data/lib/rbbt/sources/go.rb +48 -3
- data/lib/rbbt/sources/organism.rb +12 -17
- data/lib/rbbt/util/open.rb +27 -27
- data/lib/rbbt/util/tmpfile.rb +16 -0
- data/tasks/install.rake +1 -1
- data/test/rbbt/bow/test_bow.rb +33 -0
- data/test/rbbt/bow/test_classifier.rb +72 -0
- data/test/rbbt/bow/test_dictionary.rb +91 -0
- data/test/rbbt/ner/rnorm/test_cue_index.rb +57 -0
- data/test/rbbt/ner/rnorm/test_tokens.rb +70 -0
- data/test/rbbt/ner/test_abner.rb +17 -0
- data/test/rbbt/ner/test_banner.rb +17 -0
- data/test/rbbt/ner/test_dictionaryNER.rb +122 -0
- data/test/rbbt/ner/test_regexpNER.rb +33 -0
- data/test/rbbt/ner/test_rner.rb +126 -0
- data/test/rbbt/ner/test_rnorm.rb +47 -0
- data/test/rbbt/sources/test_biocreative.rb +38 -0
- data/test/rbbt/sources/test_biomart.rb +31 -0
- data/test/rbbt/sources/test_entrez.rb +49 -0
- data/test/rbbt/sources/test_go.rb +24 -0
- data/test/rbbt/sources/test_organism.rb +59 -0
- data/test/rbbt/sources/test_polysearch.rb +27 -0
- data/test/rbbt/sources/test_pubmed.rb +29 -0
- data/test/rbbt/util/test_arrayHash.rb +257 -0
- data/test/rbbt/util/test_filecache.rb +37 -0
- data/test/rbbt/util/test_index.rb +31 -0
- data/test/rbbt/util/test_misc.rb +20 -0
- data/test/rbbt/util/test_open.rb +97 -0
- data/test/rbbt/util/test_simpleDSL.rb +57 -0
- data/test/rbbt/util/test_tmpfile.rb +21 -0
- data/test/test_helper.rb +4 -0
- data/test/test_rbbt.rb +11 -0
- metadata +39 -12
data/lib/rbbt/util/tmpfile.rb
CHANGED
@@ -16,4 +16,20 @@ module TmpFile
|
|
16
16
|
def self.tmp_file(s = "",max=10000000)
|
17
17
|
File.join(Rbbt.tmpdir,random_name(s,max))
|
18
18
|
end
|
19
|
+
|
20
|
+
def self.with_file(content = nil)
|
21
|
+
tmpfile = tmp_file
|
22
|
+
|
23
|
+
File.open(tmpfile, 'w') do |f| f.write content end if content != nil
|
24
|
+
|
25
|
+
result = yield(tmpfile)
|
26
|
+
|
27
|
+
FileUtils.rm tmpfile if File.exists? tmpfile
|
28
|
+
|
29
|
+
result
|
30
|
+
end
|
31
|
+
|
32
|
+
class << self
|
33
|
+
alias :new :tmp_file
|
34
|
+
end
|
19
35
|
end
|
data/tasks/install.rake
CHANGED
@@ -0,0 +1,33 @@
|
|
1
|
+
require File.dirname(__FILE__) + '/../../test_helper'
|
2
|
+
require 'rbbt/bow/bow'
|
3
|
+
require 'test/unit'
|
4
|
+
|
5
|
+
class TestBow < Test::Unit::TestCase
|
6
|
+
|
7
|
+
def test_words
|
8
|
+
assert_equal(["hello", "world"], "Hello World".words)
|
9
|
+
|
10
|
+
end
|
11
|
+
|
12
|
+
def test_terms
|
13
|
+
text = "Hello World"
|
14
|
+
assert_equal(["hello", "world"], BagOfWords.terms(text,false).keys.sort)
|
15
|
+
assert_equal(["hello", "hello world", "world"], BagOfWords.terms(text,true).keys.sort)
|
16
|
+
end
|
17
|
+
|
18
|
+
def test_features
|
19
|
+
|
20
|
+
text = "Hello world!"
|
21
|
+
text += "Hello World Again!"
|
22
|
+
|
23
|
+
assert_equal([2, 2], BagOfWords.features(text, "Hello World".words.uniq.sort))
|
24
|
+
end
|
25
|
+
|
26
|
+
def test_stem
|
27
|
+
assert_equal(["protein"], "Proteins".words)
|
28
|
+
end
|
29
|
+
|
30
|
+
|
31
|
+
end
|
32
|
+
|
33
|
+
|
@@ -0,0 +1,72 @@
|
|
1
|
+
require File.dirname(__FILE__) + '/../../test_helper'
|
2
|
+
require 'rbbt/bow/classifier'
|
3
|
+
require 'rbbt/util/tmpfile'
|
4
|
+
require 'rbbt/util/open'
|
5
|
+
require 'test/unit'
|
6
|
+
|
7
|
+
class TestClassifier < Test::Unit::TestCase
|
8
|
+
|
9
|
+
def test_build_model
|
10
|
+
features =<<-EOT
|
11
|
+
Name Class hello world
|
12
|
+
row1 - 2 0
|
13
|
+
row2 + 0 2
|
14
|
+
EOT
|
15
|
+
|
16
|
+
featuresfile = TmpFile.tmp_file("test_classifier")
|
17
|
+
modelfile = TmpFile.tmp_file("test_classifier")
|
18
|
+
Open.write(featuresfile, features)
|
19
|
+
Classifier.create_model(featuresfile, modelfile)
|
20
|
+
|
21
|
+
assert(File.exist? modelfile)
|
22
|
+
|
23
|
+
FileUtils.rm featuresfile
|
24
|
+
FileUtils.rm modelfile
|
25
|
+
|
26
|
+
end
|
27
|
+
|
28
|
+
def test_classifier
|
29
|
+
features =<<-EOT
|
30
|
+
Name Class hello world
|
31
|
+
row1 - 2 0
|
32
|
+
row2 + 0 2
|
33
|
+
EOT
|
34
|
+
|
35
|
+
featuresfile = TmpFile.tmp_file("test_classifier")
|
36
|
+
modelfile = TmpFile.tmp_file("test_classifier")
|
37
|
+
Open.write(featuresfile, features)
|
38
|
+
Classifier.create_model(featuresfile, modelfile)
|
39
|
+
|
40
|
+
FileUtils.rm featuresfile
|
41
|
+
|
42
|
+
classifier = Classifier.new(modelfile)
|
43
|
+
|
44
|
+
assert_equal(["hello", "world"], classifier.terms)
|
45
|
+
|
46
|
+
assert_equal(["-", "+"], classifier.classify_feature_array([[1,0],[0,1]]))
|
47
|
+
|
48
|
+
|
49
|
+
assert_equal({"negative"=>"-", "positive"=>"+"}, classifier.classify_feature_hash({:positive => [0,1], :negative => [1,0]}))
|
50
|
+
assert_equal({"negative"=>"-", "positive"=>"+"}, classifier.classify_feature_hash({:positive => [0,1], :negative => [1,0]}))
|
51
|
+
|
52
|
+
assert_equal(["-", "+"], classifier.classify_text_array(["Hello","World"]))
|
53
|
+
|
54
|
+
assert_equal({"negative"=>"-", "positive"=>"+"}, classifier.classify_text_hash({:negative => "Hello", :positive =>"World"}))
|
55
|
+
|
56
|
+
assert_equal('-', classifier.classify("Hello"))
|
57
|
+
assert_equal(["-", "+"],classifier.classify([[1,0],[0,1]]))
|
58
|
+
assert_equal({"negative"=>"-", "positive"=>"+"},classifier.classify({:positive => [0,1], :negative => [1,0]}))
|
59
|
+
assert_equal(["-", "+"],classifier.classify(["Hello","World"]))
|
60
|
+
#assert_equal({"negative"=>"-", "positive"=>"+"},classifier.classify({:negative => "Hello", :positive => "World"}))
|
61
|
+
|
62
|
+
|
63
|
+
#assert_nothing_raised do classifier.classify("Unknown terms") end
|
64
|
+
#assert_nothing_raised do classifier.classify([]) end
|
65
|
+
|
66
|
+
FileUtils.rm modelfile
|
67
|
+
|
68
|
+
|
69
|
+
end
|
70
|
+
|
71
|
+
end
|
72
|
+
|
@@ -0,0 +1,91 @@
|
|
1
|
+
require File.dirname(__FILE__) + '/../../test_helper'
|
2
|
+
require 'rbbt/bow/dictionary'
|
3
|
+
require 'rbbt/bow/bow'
|
4
|
+
require 'test/unit'
|
5
|
+
|
6
|
+
class TestDictionary < Test::Unit::TestCase
|
7
|
+
|
8
|
+
def test_standard
|
9
|
+
docs = []
|
10
|
+
docs << BagOfWords.terms("Hello World", false)
|
11
|
+
docs << BagOfWords.terms("Hello Yin Yin", false)
|
12
|
+
|
13
|
+
dict = Dictionary.new
|
14
|
+
docs.each{|doc| dict.add doc}
|
15
|
+
|
16
|
+
assert_equal(2, dict.terms["hello"])
|
17
|
+
assert_equal(2, dict.terms["yin"])
|
18
|
+
assert_equal(0, dict.terms["bye"])
|
19
|
+
assert_equal(1, dict.terms["world"])
|
20
|
+
end
|
21
|
+
|
22
|
+
def test_tf_idf
|
23
|
+
docs = []
|
24
|
+
docs << BagOfWords.terms("Hello World", false)
|
25
|
+
docs << BagOfWords.terms("Hello Yin Yin", false)
|
26
|
+
|
27
|
+
|
28
|
+
dict = Dictionary::TF_IDF.new
|
29
|
+
docs.each{|doc| dict.add doc}
|
30
|
+
|
31
|
+
assert_equal(2, dict.terms["hello"])
|
32
|
+
assert_equal(2, dict.terms["yin"])
|
33
|
+
assert_equal(0, dict.terms["bye"])
|
34
|
+
assert_equal(1, dict.terms["world"])
|
35
|
+
|
36
|
+
|
37
|
+
assert_equal(1, dict.df["hello"])
|
38
|
+
assert_equal(0.5, dict.df["yin"])
|
39
|
+
assert_equal(0, dict.df["bye"])
|
40
|
+
assert_equal(0.5, dict.df["world"])
|
41
|
+
|
42
|
+
assert_equal(2.0/5, dict.tf["hello"])
|
43
|
+
assert_equal(2.0/5, dict.tf["yin"])
|
44
|
+
assert_equal(0, dict.tf["bye"])
|
45
|
+
assert_equal(1.0/5, dict.tf["world"])
|
46
|
+
|
47
|
+
assert_equal(Math::log(1), dict.idf["hello"])
|
48
|
+
assert_equal(Math::log(2), dict.idf["yin"])
|
49
|
+
assert_equal(0, dict.idf["bye"])
|
50
|
+
assert_equal(Math::log(2), dict.idf["world"])
|
51
|
+
|
52
|
+
assert_equal(2.0/5 * Math::log(1), dict.tf_idf["hello"])
|
53
|
+
assert_equal(2.0/5 * Math::log(2), dict.tf_idf["yin"])
|
54
|
+
assert_equal(0, dict.tf_idf["bye"])
|
55
|
+
assert_equal(1.0/5 * Math::log(2), dict.tf_idf["world"])
|
56
|
+
end
|
57
|
+
|
58
|
+
def test_best
|
59
|
+
docs = []
|
60
|
+
docs << BagOfWords.terms("Hello World", false)
|
61
|
+
docs << BagOfWords.terms("Hello Yin Yin", false)
|
62
|
+
|
63
|
+
|
64
|
+
dict = Dictionary::TF_IDF.new
|
65
|
+
docs.each{|doc| dict.add doc}
|
66
|
+
|
67
|
+
assert_equal(1, dict.best(:limit => 1).length)
|
68
|
+
assert(dict.best(:limit => 1).include? "yin")
|
69
|
+
end
|
70
|
+
|
71
|
+
def test_kl
|
72
|
+
docs = []
|
73
|
+
docs << [BagOfWords.terms("Hello World", false), :+]
|
74
|
+
docs << [BagOfWords.terms("Hello Cruel World", false), :+]
|
75
|
+
docs << [BagOfWords.terms("Hello Yan Yan", false), :-]
|
76
|
+
docs << [BagOfWords.terms("Hello Yin Yin", false), :-]
|
77
|
+
|
78
|
+
|
79
|
+
dict = Dictionary::KL.new
|
80
|
+
docs.each{|doc| dict.add *doc}
|
81
|
+
|
82
|
+
assert_equal(0, dict.kl["hello"])
|
83
|
+
assert_equal(dict.kl['yan'], dict.kl['yin'])
|
84
|
+
assert_in_delta(1 * Math::log(1 / 0.000001), dict.kl["world"],0.01)
|
85
|
+
assert_in_delta(0.5 * Math::log(0.5 / 0.000001), dict.kl["cruel"],0.01)
|
86
|
+
end
|
87
|
+
|
88
|
+
|
89
|
+
end
|
90
|
+
|
91
|
+
|
@@ -0,0 +1,57 @@
|
|
1
|
+
require File.dirname(__FILE__) + '/../../../test_helper'
|
2
|
+
require 'rbbt/ner/rnorm/cue_index'
|
3
|
+
require 'rbbt/util/misc'
|
4
|
+
require 'rbbt/util/tmpfile'
|
5
|
+
require 'rbbt/util/open'
|
6
|
+
require 'test/unit'
|
7
|
+
|
8
|
+
class TestCUE < Test::Unit::TestCase
|
9
|
+
|
10
|
+
def setup
|
11
|
+
@index = CueIndex.new do
|
12
|
+
equal do |w| [w] end
|
13
|
+
standard do |w| [w.downcase.split(/\s+/).sort.join("")] end
|
14
|
+
special do |w| s = w.split.select{|w| w.is_special?}.collect{|w| w.downcase.sub(/p$/,'')} end
|
15
|
+
words do |w|
|
16
|
+
w.scan(/[a-z]+/i).
|
17
|
+
select{|w| w.length > 2}.
|
18
|
+
sort{|a,b| b.length <=> a.length}.
|
19
|
+
collect{|n| n.downcase}
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
def test_cue
|
25
|
+
assert_equal([["Hsp70 gene"], ["genehsp70"], ["hsp70"], ["gene", "hsp"]], @index.cues("Hsp70 gene"))
|
26
|
+
end
|
27
|
+
|
28
|
+
def test_load
|
29
|
+
tmp = TmpFile.tmp_file("test_cue")
|
30
|
+
|
31
|
+
lexicon =<<-EOT
|
32
|
+
code1\tNAME1\tname 1
|
33
|
+
code2\tNAME2\tname 2
|
34
|
+
EOT
|
35
|
+
Open.write(tmp,lexicon)
|
36
|
+
|
37
|
+
assert_raise(CueIndex::LexiconMissingError){@index.match("NAME2")}
|
38
|
+
@index.load(tmp)
|
39
|
+
assert_equal(["code2"], @index.match("NAME2"))
|
40
|
+
|
41
|
+
FileUtils.rm tmp
|
42
|
+
end
|
43
|
+
|
44
|
+
#def test_yeast
|
45
|
+
# index = CueIndex.new
|
46
|
+
# index.load(File.join(Rbbt.datadir,'biocreative','BC1GN','yeast','synonyms.list'))
|
47
|
+
# assert(index.match("Met - 31").include? 'S0005959')
|
48
|
+
#end
|
49
|
+
|
50
|
+
#def test_mouse
|
51
|
+
# index = CueIndex.new
|
52
|
+
# index.load(File.join(Rbbt.datadir,'biocreative','BC1GN','mouse','synonyms.list'))
|
53
|
+
# puts index.match("kreisler gene").length
|
54
|
+
#end
|
55
|
+
|
56
|
+
|
57
|
+
end
|
@@ -0,0 +1,70 @@
|
|
1
|
+
require File.dirname(__FILE__) + '/../../../test_helper'
|
2
|
+
require 'rbbt/ner/rnorm/tokens'
|
3
|
+
require 'rbbt/util/misc'
|
4
|
+
require 'rbbt/util/tmpfile'
|
5
|
+
require 'rbbt/util/open'
|
6
|
+
require 'test/unit'
|
7
|
+
|
8
|
+
class TestCompare < Test::Unit::TestCase
|
9
|
+
|
10
|
+
def setup
|
11
|
+
@index = Tokenizer.new
|
12
|
+
end
|
13
|
+
|
14
|
+
def test_type
|
15
|
+
assert_equal(:gene, @index.type("gene"))
|
16
|
+
assert_equal(:dna, @index.type("dna"))
|
17
|
+
assert_equal(:number, @index.type("121"))
|
18
|
+
end
|
19
|
+
|
20
|
+
def test_token_types
|
21
|
+
assert_equal([["dna", :dna], ["12", :number]], @index.token_types("dna12"))
|
22
|
+
assert_equal([["REX", :special], ["12", :number]], @index.token_types("REX12"))
|
23
|
+
assert_equal([["SSH", :special], ["3", :number], ["BP", :special]], @index.token_types("SSH3BP"))
|
24
|
+
assert_equal([["HP", :special], ["1", :number], ["gamma", :greek]], @index.token_types("HP1gamma"))
|
25
|
+
assert_equal([["HP", :special], ["1", :number], ["GAMMA", :greek]], @index.token_types("HP1-GAMMA"))
|
26
|
+
end
|
27
|
+
|
28
|
+
def test_eval
|
29
|
+
assert_equal(3, @index.evaluate_tokens(@index.token_types("1"), @index.token_types("1")))
|
30
|
+
end
|
31
|
+
|
32
|
+
def test_transforms
|
33
|
+
t = Tokenizer::Transform.new.unknown do |t| [t, if t.length < 4 then :special else :unknown end] end
|
34
|
+
assert_equal(["BP", :special], t.transform(["BP",:unknown]))
|
35
|
+
end
|
36
|
+
def test_comparisons
|
37
|
+
assert_equal(0, Tokenizer::Operation.new(:same).number(3).eval(@index.token_types("SSH1"),@index.token_types("SSH2")))
|
38
|
+
assert_equal(3, Tokenizer::Operation.new(:same).number(3).eval(@index.token_types("SSH1"),@index.token_types("SSH1")))
|
39
|
+
assert_equal(0, Tokenizer::Operation.new(:same).special(1).eval([["SSH", :special],["1", :number]],[["SSH", :special],["3", :number],["BP",:special]]))
|
40
|
+
assert_equal(-1, Tokenizer::Operation.new(:diff).special(-1).eval([["SSH", :special],["1", :number]],[["SSH", :special],["3", :number],["BP",:special]]))
|
41
|
+
assert_equal(-1, Tokenizer::Operation.new(:extr).special(-1).eval([["SSH", :special],["1", :number]],[["SSH", :special],["3", :number],["BP",:special]]))
|
42
|
+
assert_equal(-1, Tokenizer::Operation.new(:miss).special(-1).eval([["SSH", :special],["3", :number],["BP",:special]],[["SSH", :special],["1", :number]]))
|
43
|
+
end
|
44
|
+
def test_ignore_case
|
45
|
+
assert_equal(-1, Tokenizer::Operation.new(:diff).ignore_case(false).special(-1).eval([["ssh", :special]],[["SSH", :special]]))
|
46
|
+
assert_equal(0, Tokenizer::Operation.new(:diff).ignore_case(true).special(-1).eval([["ssh", :special]],[["SSH", :special]]))
|
47
|
+
end
|
48
|
+
|
49
|
+
def test_compare
|
50
|
+
assert_equal(-10, @index.evaluate("DNA1", "GENE2"))
|
51
|
+
assert_equal(3, @index.evaluate("DNA1", "GENE1"))
|
52
|
+
assert_equal(3, @index.evaluate("DNA1", "RNA1"))
|
53
|
+
assert_equal(-1, @index.evaluate("SSH", "SSH1"))
|
54
|
+
assert_equal(7, @index.evaluate("pol III", "POL3"))
|
55
|
+
end
|
56
|
+
|
57
|
+
def test_default
|
58
|
+
index = Tokenizer.new
|
59
|
+
assert(index.evaluate("SSH", "SSH1") > index.evaluate("SSH", "SSH3BP"))
|
60
|
+
assert(index.evaluate("HP1gamma", "HP1-GAMMA") > 1)
|
61
|
+
assert(index.evaluate("HP1alpha", "HP1 alpha") > 1)
|
62
|
+
assert(index.evaluate("IL-1beta", "IL-1 beta") > 1)
|
63
|
+
assert(index.evaluate("IL-1RI", "IL-1R-1") > 1)
|
64
|
+
assert(index.evaluate("MODI", "MOD 1") > 1)
|
65
|
+
assert(index.evaluate("MOD 1", "MODI") > 1)
|
66
|
+
assert(index.evaluate("Ubc3", "Ubc3b") > 1)
|
67
|
+
end
|
68
|
+
|
69
|
+
|
70
|
+
end
|
@@ -0,0 +1,17 @@
|
|
1
|
+
require File.dirname(__FILE__) + '/../../test_helper'
|
2
|
+
require 'rbbt/ner/abner'
|
3
|
+
require 'test/unit'
|
4
|
+
|
5
|
+
class TestAbner < Test::Unit::TestCase
|
6
|
+
|
7
|
+
def test_extract
|
8
|
+
ner = Abner.new
|
9
|
+
|
10
|
+
mentions = ner.extract(" The P-ITIM-compelled multi-phosphoprotein complex binds to and activates SHP-2, which in turn dephosphorylates SHIP and Shc and probably other substrates.")
|
11
|
+
["SHP-2", "SHIP", "Shc"].each{|mention|
|
12
|
+
assert(mentions.include? mention)
|
13
|
+
}
|
14
|
+
|
15
|
+
end
|
16
|
+
|
17
|
+
end
|
@@ -0,0 +1,17 @@
|
|
1
|
+
require File.dirname(__FILE__) + '/../../test_helper'
|
2
|
+
require 'rbbt/ner/banner'
|
3
|
+
require 'test/unit'
|
4
|
+
|
5
|
+
class TestBanner < Test::Unit::TestCase
|
6
|
+
|
7
|
+
def test_extract
|
8
|
+
ner = Banner.new
|
9
|
+
|
10
|
+
mentions = ner.extract(" The P-ITIM-compelled multi-phosphoprotein complex binds to and activates SHP-2, which in turn dephosphorylates SHIP and Shc and probably other substrates.")
|
11
|
+
["SHP - 2", "SHIP", "Shc"].each{|mention|
|
12
|
+
assert(mentions.include? mention)
|
13
|
+
}
|
14
|
+
|
15
|
+
end
|
16
|
+
|
17
|
+
end
|
@@ -0,0 +1,122 @@
|
|
1
|
+
require 'rbbt'
|
2
|
+
require 'rbbt/util/tmpfile'
|
3
|
+
require 'rbbt/util/open'
|
4
|
+
require 'rbbt/ner/dictionaryNER'
|
5
|
+
require 'test/unit'
|
6
|
+
|
7
|
+
class TestDictionaryNER < Test::Unit::TestCase
|
8
|
+
|
9
|
+
def setup
|
10
|
+
@dictionary =<<-EOT
|
11
|
+
DICT1\tWord1 Word2\tWord1
|
12
|
+
DICT2\tWord3-Word4\tWord4
|
13
|
+
EOT
|
14
|
+
|
15
|
+
@dict = {
|
16
|
+
"word1" => [{'word2' => ['DICT1'] }, 'DICT1'],
|
17
|
+
"word3" => [{'word4' => ['DICT2'] }],
|
18
|
+
"word4" => ['DICT2'],
|
19
|
+
}
|
20
|
+
end
|
21
|
+
|
22
|
+
def test_simplify
|
23
|
+
assert_equal('word1', DictionaryNER.simplify( "Word1"))
|
24
|
+
assert_equal('ACL', DictionaryNER.simplify("ACL"))
|
25
|
+
end
|
26
|
+
|
27
|
+
def test_chunk
|
28
|
+
assert_equal(["Word1","Word2"], DictionaryNER.chunk('Word1-Word2'))
|
29
|
+
assert_equal(["Word1-1"], DictionaryNER.chunk('Word1-1'))
|
30
|
+
end
|
31
|
+
|
32
|
+
def test_match
|
33
|
+
|
34
|
+
[
|
35
|
+
|
36
|
+
["Word1", {"word1" => ["D1"]}, {"Word1" => ["D1"]}],
|
37
|
+
|
38
|
+
["Word1 Word1", {"word1" => ["D1"]}, {"Word1" => ["D1"]}],
|
39
|
+
|
40
|
+
["Word2 Word1 Word3", {"word1" => ["D1"]}, {"Word1" => ["D1"]} ],
|
41
|
+
|
42
|
+
["Word2 Word1 Word4", {"word1" => ["D1","D2"]}, {"Word1" => ["D1","D2"]} ],
|
43
|
+
|
44
|
+
["Word2 Word1 Word4",
|
45
|
+
{"word1" => [{'word2' => ['D1']}]},
|
46
|
+
{} ],
|
47
|
+
|
48
|
+
[
|
49
|
+
"Word2 Word1 Word4",
|
50
|
+
{"word1" => [ {'word4' => ['D1']} ] },
|
51
|
+
{"Word1 Word4" => ["D1"]},
|
52
|
+
],
|
53
|
+
|
54
|
+
[
|
55
|
+
"Word2 Word1 Word4",
|
56
|
+
{"word1" => [ {'word4' => ['D1']} ], "word4" => ['D2'] },
|
57
|
+
{"Word1 Word4" => ["D1"], "Word4" => ['D2']},
|
58
|
+
],
|
59
|
+
|
60
|
+
|
61
|
+
].each{|match_info|
|
62
|
+
text = match_info[0]
|
63
|
+
dict = match_info[1]
|
64
|
+
result = match_info[2]
|
65
|
+
assert_equal(result, DictionaryNER.match(dict, text))
|
66
|
+
}
|
67
|
+
|
68
|
+
end
|
69
|
+
|
70
|
+
def test_add_name
|
71
|
+
|
72
|
+
[
|
73
|
+
|
74
|
+
["Word1", {"word1" => ['code']}],
|
75
|
+
|
76
|
+
["Word1 Word2", {"word1" => [{"word2" => ['code']}]}],
|
77
|
+
|
78
|
+
["Cerebellar stroke syndrome", {"cerebellar" => [{'stroke' => [{'syndrome' => ['code']}]}]}]
|
79
|
+
|
80
|
+
].each{|info|
|
81
|
+
name = info[0]
|
82
|
+
result = info[1]
|
83
|
+
|
84
|
+
dict = {}
|
85
|
+
DictionaryNER.add_name(dict, name, 'code')
|
86
|
+
assert_equal(result, dict)
|
87
|
+
}
|
88
|
+
|
89
|
+
end
|
90
|
+
|
91
|
+
def test_load
|
92
|
+
assert_equal(@dict, DictionaryNER.load(@dictionary))
|
93
|
+
end
|
94
|
+
|
95
|
+
def test_class
|
96
|
+
ner = DictionaryNER.new(@dictionary)
|
97
|
+
|
98
|
+
[
|
99
|
+
[ "Word1 Word2", ["Word1 Word2", "Word1"] ],
|
100
|
+
[ "foo Word1 Word2 foo", ["Word1 Word2", "Word1"] ],
|
101
|
+
[ "Word1-Word2", ["Word1 Word2", "Word1"] ],
|
102
|
+
[ "Word1\nWord2", ["Word1 Word2", "Word1"] ],
|
103
|
+
].each{|info|
|
104
|
+
text = info[0]
|
105
|
+
keys = info[1]
|
106
|
+
|
107
|
+
assert_equal(keys.sort, ner.match(text).keys.sort)
|
108
|
+
}
|
109
|
+
end
|
110
|
+
|
111
|
+
def test_load_from_file
|
112
|
+
tmpfile = TmpFile.tmp_file
|
113
|
+
|
114
|
+
Open.write(tmpfile, @dictionary)
|
115
|
+
|
116
|
+
ner = DictionaryNER.new(tmpfile)
|
117
|
+
|
118
|
+
assert(ner.match("Word1").any?)
|
119
|
+
end
|
120
|
+
|
121
|
+
end
|
122
|
+
|