rbbt 1.1.8 → 1.2.1
Sign up to get free protection for your applications and to get access to all the features.
- data/README.rdoc +12 -12
- data/bin/rbbt_config +2 -3
- data/install_scripts/norm/Rakefile +4 -4
- data/install_scripts/organisms/{tair.Rakefile → Ath.Rakefile} +4 -3
- data/install_scripts/organisms/{cgd.Rakefile → Cal.Rakefile} +0 -0
- data/install_scripts/organisms/{worm.Rakefile → Cel.Rakefile} +0 -0
- data/install_scripts/organisms/{human.Rakefile → Hsa.Rakefile} +4 -8
- data/install_scripts/organisms/{mgi.Rakefile → Mmu.Rakefile} +0 -0
- data/install_scripts/organisms/{rgd.Rakefile → Rno.Rakefile} +0 -0
- data/install_scripts/organisms/{sgd.Rakefile → Sce.Rakefile} +0 -0
- data/install_scripts/organisms/{pombe.Rakefile → Spo.Rakefile} +0 -0
- data/install_scripts/organisms/rake-include.rb +15 -19
- data/lib/rbbt.rb +0 -3
- data/lib/rbbt/ner/rnorm.rb +2 -2
- data/lib/rbbt/sources/go.rb +48 -3
- data/lib/rbbt/sources/organism.rb +12 -17
- data/lib/rbbt/util/open.rb +27 -27
- data/lib/rbbt/util/tmpfile.rb +16 -0
- data/tasks/install.rake +1 -1
- data/test/rbbt/bow/test_bow.rb +33 -0
- data/test/rbbt/bow/test_classifier.rb +72 -0
- data/test/rbbt/bow/test_dictionary.rb +91 -0
- data/test/rbbt/ner/rnorm/test_cue_index.rb +57 -0
- data/test/rbbt/ner/rnorm/test_tokens.rb +70 -0
- data/test/rbbt/ner/test_abner.rb +17 -0
- data/test/rbbt/ner/test_banner.rb +17 -0
- data/test/rbbt/ner/test_dictionaryNER.rb +122 -0
- data/test/rbbt/ner/test_regexpNER.rb +33 -0
- data/test/rbbt/ner/test_rner.rb +126 -0
- data/test/rbbt/ner/test_rnorm.rb +47 -0
- data/test/rbbt/sources/test_biocreative.rb +38 -0
- data/test/rbbt/sources/test_biomart.rb +31 -0
- data/test/rbbt/sources/test_entrez.rb +49 -0
- data/test/rbbt/sources/test_go.rb +24 -0
- data/test/rbbt/sources/test_organism.rb +59 -0
- data/test/rbbt/sources/test_polysearch.rb +27 -0
- data/test/rbbt/sources/test_pubmed.rb +29 -0
- data/test/rbbt/util/test_arrayHash.rb +257 -0
- data/test/rbbt/util/test_filecache.rb +37 -0
- data/test/rbbt/util/test_index.rb +31 -0
- data/test/rbbt/util/test_misc.rb +20 -0
- data/test/rbbt/util/test_open.rb +97 -0
- data/test/rbbt/util/test_simpleDSL.rb +57 -0
- data/test/rbbt/util/test_tmpfile.rb +21 -0
- data/test/test_helper.rb +4 -0
- data/test/test_rbbt.rb +11 -0
- metadata +39 -12
data/lib/rbbt/util/tmpfile.rb
CHANGED
@@ -16,4 +16,20 @@ module TmpFile
|
|
16
16
|
def self.tmp_file(s = "",max=10000000)
|
17
17
|
File.join(Rbbt.tmpdir,random_name(s,max))
|
18
18
|
end
|
19
|
+
|
20
|
+
def self.with_file(content = nil)
|
21
|
+
tmpfile = tmp_file
|
22
|
+
|
23
|
+
File.open(tmpfile, 'w') do |f| f.write content end if content != nil
|
24
|
+
|
25
|
+
result = yield(tmpfile)
|
26
|
+
|
27
|
+
FileUtils.rm tmpfile if File.exists? tmpfile
|
28
|
+
|
29
|
+
result
|
30
|
+
end
|
31
|
+
|
32
|
+
class << self
|
33
|
+
alias :new :tmp_file
|
34
|
+
end
|
19
35
|
end
|
data/tasks/install.rake
CHANGED
@@ -0,0 +1,33 @@
|
|
1
|
+
require File.dirname(__FILE__) + '/../../test_helper'
|
2
|
+
require 'rbbt/bow/bow'
|
3
|
+
require 'test/unit'
|
4
|
+
|
5
|
+
class TestBow < Test::Unit::TestCase
|
6
|
+
|
7
|
+
def test_words
|
8
|
+
assert_equal(["hello", "world"], "Hello World".words)
|
9
|
+
|
10
|
+
end
|
11
|
+
|
12
|
+
def test_terms
|
13
|
+
text = "Hello World"
|
14
|
+
assert_equal(["hello", "world"], BagOfWords.terms(text,false).keys.sort)
|
15
|
+
assert_equal(["hello", "hello world", "world"], BagOfWords.terms(text,true).keys.sort)
|
16
|
+
end
|
17
|
+
|
18
|
+
def test_features
|
19
|
+
|
20
|
+
text = "Hello world!"
|
21
|
+
text += "Hello World Again!"
|
22
|
+
|
23
|
+
assert_equal([2, 2], BagOfWords.features(text, "Hello World".words.uniq.sort))
|
24
|
+
end
|
25
|
+
|
26
|
+
def test_stem
|
27
|
+
assert_equal(["protein"], "Proteins".words)
|
28
|
+
end
|
29
|
+
|
30
|
+
|
31
|
+
end
|
32
|
+
|
33
|
+
|
@@ -0,0 +1,72 @@
|
|
1
|
+
require File.dirname(__FILE__) + '/../../test_helper'
|
2
|
+
require 'rbbt/bow/classifier'
|
3
|
+
require 'rbbt/util/tmpfile'
|
4
|
+
require 'rbbt/util/open'
|
5
|
+
require 'test/unit'
|
6
|
+
|
7
|
+
class TestClassifier < Test::Unit::TestCase
|
8
|
+
|
9
|
+
def test_build_model
|
10
|
+
features =<<-EOT
|
11
|
+
Name Class hello world
|
12
|
+
row1 - 2 0
|
13
|
+
row2 + 0 2
|
14
|
+
EOT
|
15
|
+
|
16
|
+
featuresfile = TmpFile.tmp_file("test_classifier")
|
17
|
+
modelfile = TmpFile.tmp_file("test_classifier")
|
18
|
+
Open.write(featuresfile, features)
|
19
|
+
Classifier.create_model(featuresfile, modelfile)
|
20
|
+
|
21
|
+
assert(File.exist? modelfile)
|
22
|
+
|
23
|
+
FileUtils.rm featuresfile
|
24
|
+
FileUtils.rm modelfile
|
25
|
+
|
26
|
+
end
|
27
|
+
|
28
|
+
def test_classifier
|
29
|
+
features =<<-EOT
|
30
|
+
Name Class hello world
|
31
|
+
row1 - 2 0
|
32
|
+
row2 + 0 2
|
33
|
+
EOT
|
34
|
+
|
35
|
+
featuresfile = TmpFile.tmp_file("test_classifier")
|
36
|
+
modelfile = TmpFile.tmp_file("test_classifier")
|
37
|
+
Open.write(featuresfile, features)
|
38
|
+
Classifier.create_model(featuresfile, modelfile)
|
39
|
+
|
40
|
+
FileUtils.rm featuresfile
|
41
|
+
|
42
|
+
classifier = Classifier.new(modelfile)
|
43
|
+
|
44
|
+
assert_equal(["hello", "world"], classifier.terms)
|
45
|
+
|
46
|
+
assert_equal(["-", "+"], classifier.classify_feature_array([[1,0],[0,1]]))
|
47
|
+
|
48
|
+
|
49
|
+
assert_equal({"negative"=>"-", "positive"=>"+"}, classifier.classify_feature_hash({:positive => [0,1], :negative => [1,0]}))
|
50
|
+
assert_equal({"negative"=>"-", "positive"=>"+"}, classifier.classify_feature_hash({:positive => [0,1], :negative => [1,0]}))
|
51
|
+
|
52
|
+
assert_equal(["-", "+"], classifier.classify_text_array(["Hello","World"]))
|
53
|
+
|
54
|
+
assert_equal({"negative"=>"-", "positive"=>"+"}, classifier.classify_text_hash({:negative => "Hello", :positive =>"World"}))
|
55
|
+
|
56
|
+
assert_equal('-', classifier.classify("Hello"))
|
57
|
+
assert_equal(["-", "+"],classifier.classify([[1,0],[0,1]]))
|
58
|
+
assert_equal({"negative"=>"-", "positive"=>"+"},classifier.classify({:positive => [0,1], :negative => [1,0]}))
|
59
|
+
assert_equal(["-", "+"],classifier.classify(["Hello","World"]))
|
60
|
+
#assert_equal({"negative"=>"-", "positive"=>"+"},classifier.classify({:negative => "Hello", :positive => "World"}))
|
61
|
+
|
62
|
+
|
63
|
+
#assert_nothing_raised do classifier.classify("Unknown terms") end
|
64
|
+
#assert_nothing_raised do classifier.classify([]) end
|
65
|
+
|
66
|
+
FileUtils.rm modelfile
|
67
|
+
|
68
|
+
|
69
|
+
end
|
70
|
+
|
71
|
+
end
|
72
|
+
|
@@ -0,0 +1,91 @@
|
|
1
|
+
require File.dirname(__FILE__) + '/../../test_helper'
|
2
|
+
require 'rbbt/bow/dictionary'
|
3
|
+
require 'rbbt/bow/bow'
|
4
|
+
require 'test/unit'
|
5
|
+
|
6
|
+
class TestDictionary < Test::Unit::TestCase
|
7
|
+
|
8
|
+
def test_standard
|
9
|
+
docs = []
|
10
|
+
docs << BagOfWords.terms("Hello World", false)
|
11
|
+
docs << BagOfWords.terms("Hello Yin Yin", false)
|
12
|
+
|
13
|
+
dict = Dictionary.new
|
14
|
+
docs.each{|doc| dict.add doc}
|
15
|
+
|
16
|
+
assert_equal(2, dict.terms["hello"])
|
17
|
+
assert_equal(2, dict.terms["yin"])
|
18
|
+
assert_equal(0, dict.terms["bye"])
|
19
|
+
assert_equal(1, dict.terms["world"])
|
20
|
+
end
|
21
|
+
|
22
|
+
def test_tf_idf
|
23
|
+
docs = []
|
24
|
+
docs << BagOfWords.terms("Hello World", false)
|
25
|
+
docs << BagOfWords.terms("Hello Yin Yin", false)
|
26
|
+
|
27
|
+
|
28
|
+
dict = Dictionary::TF_IDF.new
|
29
|
+
docs.each{|doc| dict.add doc}
|
30
|
+
|
31
|
+
assert_equal(2, dict.terms["hello"])
|
32
|
+
assert_equal(2, dict.terms["yin"])
|
33
|
+
assert_equal(0, dict.terms["bye"])
|
34
|
+
assert_equal(1, dict.terms["world"])
|
35
|
+
|
36
|
+
|
37
|
+
assert_equal(1, dict.df["hello"])
|
38
|
+
assert_equal(0.5, dict.df["yin"])
|
39
|
+
assert_equal(0, dict.df["bye"])
|
40
|
+
assert_equal(0.5, dict.df["world"])
|
41
|
+
|
42
|
+
assert_equal(2.0/5, dict.tf["hello"])
|
43
|
+
assert_equal(2.0/5, dict.tf["yin"])
|
44
|
+
assert_equal(0, dict.tf["bye"])
|
45
|
+
assert_equal(1.0/5, dict.tf["world"])
|
46
|
+
|
47
|
+
assert_equal(Math::log(1), dict.idf["hello"])
|
48
|
+
assert_equal(Math::log(2), dict.idf["yin"])
|
49
|
+
assert_equal(0, dict.idf["bye"])
|
50
|
+
assert_equal(Math::log(2), dict.idf["world"])
|
51
|
+
|
52
|
+
assert_equal(2.0/5 * Math::log(1), dict.tf_idf["hello"])
|
53
|
+
assert_equal(2.0/5 * Math::log(2), dict.tf_idf["yin"])
|
54
|
+
assert_equal(0, dict.tf_idf["bye"])
|
55
|
+
assert_equal(1.0/5 * Math::log(2), dict.tf_idf["world"])
|
56
|
+
end
|
57
|
+
|
58
|
+
def test_best
|
59
|
+
docs = []
|
60
|
+
docs << BagOfWords.terms("Hello World", false)
|
61
|
+
docs << BagOfWords.terms("Hello Yin Yin", false)
|
62
|
+
|
63
|
+
|
64
|
+
dict = Dictionary::TF_IDF.new
|
65
|
+
docs.each{|doc| dict.add doc}
|
66
|
+
|
67
|
+
assert_equal(1, dict.best(:limit => 1).length)
|
68
|
+
assert(dict.best(:limit => 1).include? "yin")
|
69
|
+
end
|
70
|
+
|
71
|
+
def test_kl
|
72
|
+
docs = []
|
73
|
+
docs << [BagOfWords.terms("Hello World", false), :+]
|
74
|
+
docs << [BagOfWords.terms("Hello Cruel World", false), :+]
|
75
|
+
docs << [BagOfWords.terms("Hello Yan Yan", false), :-]
|
76
|
+
docs << [BagOfWords.terms("Hello Yin Yin", false), :-]
|
77
|
+
|
78
|
+
|
79
|
+
dict = Dictionary::KL.new
|
80
|
+
docs.each{|doc| dict.add *doc}
|
81
|
+
|
82
|
+
assert_equal(0, dict.kl["hello"])
|
83
|
+
assert_equal(dict.kl['yan'], dict.kl['yin'])
|
84
|
+
assert_in_delta(1 * Math::log(1 / 0.000001), dict.kl["world"],0.01)
|
85
|
+
assert_in_delta(0.5 * Math::log(0.5 / 0.000001), dict.kl["cruel"],0.01)
|
86
|
+
end
|
87
|
+
|
88
|
+
|
89
|
+
end
|
90
|
+
|
91
|
+
|
@@ -0,0 +1,57 @@
|
|
1
|
+
require File.dirname(__FILE__) + '/../../../test_helper'
|
2
|
+
require 'rbbt/ner/rnorm/cue_index'
|
3
|
+
require 'rbbt/util/misc'
|
4
|
+
require 'rbbt/util/tmpfile'
|
5
|
+
require 'rbbt/util/open'
|
6
|
+
require 'test/unit'
|
7
|
+
|
8
|
+
class TestCUE < Test::Unit::TestCase
|
9
|
+
|
10
|
+
def setup
|
11
|
+
@index = CueIndex.new do
|
12
|
+
equal do |w| [w] end
|
13
|
+
standard do |w| [w.downcase.split(/\s+/).sort.join("")] end
|
14
|
+
special do |w| s = w.split.select{|w| w.is_special?}.collect{|w| w.downcase.sub(/p$/,'')} end
|
15
|
+
words do |w|
|
16
|
+
w.scan(/[a-z]+/i).
|
17
|
+
select{|w| w.length > 2}.
|
18
|
+
sort{|a,b| b.length <=> a.length}.
|
19
|
+
collect{|n| n.downcase}
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
def test_cue
|
25
|
+
assert_equal([["Hsp70 gene"], ["genehsp70"], ["hsp70"], ["gene", "hsp"]], @index.cues("Hsp70 gene"))
|
26
|
+
end
|
27
|
+
|
28
|
+
def test_load
|
29
|
+
tmp = TmpFile.tmp_file("test_cue")
|
30
|
+
|
31
|
+
lexicon =<<-EOT
|
32
|
+
code1\tNAME1\tname 1
|
33
|
+
code2\tNAME2\tname 2
|
34
|
+
EOT
|
35
|
+
Open.write(tmp,lexicon)
|
36
|
+
|
37
|
+
assert_raise(CueIndex::LexiconMissingError){@index.match("NAME2")}
|
38
|
+
@index.load(tmp)
|
39
|
+
assert_equal(["code2"], @index.match("NAME2"))
|
40
|
+
|
41
|
+
FileUtils.rm tmp
|
42
|
+
end
|
43
|
+
|
44
|
+
#def test_yeast
|
45
|
+
# index = CueIndex.new
|
46
|
+
# index.load(File.join(Rbbt.datadir,'biocreative','BC1GN','yeast','synonyms.list'))
|
47
|
+
# assert(index.match("Met - 31").include? 'S0005959')
|
48
|
+
#end
|
49
|
+
|
50
|
+
#def test_mouse
|
51
|
+
# index = CueIndex.new
|
52
|
+
# index.load(File.join(Rbbt.datadir,'biocreative','BC1GN','mouse','synonyms.list'))
|
53
|
+
# puts index.match("kreisler gene").length
|
54
|
+
#end
|
55
|
+
|
56
|
+
|
57
|
+
end
|
@@ -0,0 +1,70 @@
|
|
1
|
+
require File.dirname(__FILE__) + '/../../../test_helper'
|
2
|
+
require 'rbbt/ner/rnorm/tokens'
|
3
|
+
require 'rbbt/util/misc'
|
4
|
+
require 'rbbt/util/tmpfile'
|
5
|
+
require 'rbbt/util/open'
|
6
|
+
require 'test/unit'
|
7
|
+
|
8
|
+
class TestCompare < Test::Unit::TestCase
|
9
|
+
|
10
|
+
def setup
|
11
|
+
@index = Tokenizer.new
|
12
|
+
end
|
13
|
+
|
14
|
+
def test_type
|
15
|
+
assert_equal(:gene, @index.type("gene"))
|
16
|
+
assert_equal(:dna, @index.type("dna"))
|
17
|
+
assert_equal(:number, @index.type("121"))
|
18
|
+
end
|
19
|
+
|
20
|
+
def test_token_types
|
21
|
+
assert_equal([["dna", :dna], ["12", :number]], @index.token_types("dna12"))
|
22
|
+
assert_equal([["REX", :special], ["12", :number]], @index.token_types("REX12"))
|
23
|
+
assert_equal([["SSH", :special], ["3", :number], ["BP", :special]], @index.token_types("SSH3BP"))
|
24
|
+
assert_equal([["HP", :special], ["1", :number], ["gamma", :greek]], @index.token_types("HP1gamma"))
|
25
|
+
assert_equal([["HP", :special], ["1", :number], ["GAMMA", :greek]], @index.token_types("HP1-GAMMA"))
|
26
|
+
end
|
27
|
+
|
28
|
+
def test_eval
|
29
|
+
assert_equal(3, @index.evaluate_tokens(@index.token_types("1"), @index.token_types("1")))
|
30
|
+
end
|
31
|
+
|
32
|
+
def test_transforms
|
33
|
+
t = Tokenizer::Transform.new.unknown do |t| [t, if t.length < 4 then :special else :unknown end] end
|
34
|
+
assert_equal(["BP", :special], t.transform(["BP",:unknown]))
|
35
|
+
end
|
36
|
+
def test_comparisons
|
37
|
+
assert_equal(0, Tokenizer::Operation.new(:same).number(3).eval(@index.token_types("SSH1"),@index.token_types("SSH2")))
|
38
|
+
assert_equal(3, Tokenizer::Operation.new(:same).number(3).eval(@index.token_types("SSH1"),@index.token_types("SSH1")))
|
39
|
+
assert_equal(0, Tokenizer::Operation.new(:same).special(1).eval([["SSH", :special],["1", :number]],[["SSH", :special],["3", :number],["BP",:special]]))
|
40
|
+
assert_equal(-1, Tokenizer::Operation.new(:diff).special(-1).eval([["SSH", :special],["1", :number]],[["SSH", :special],["3", :number],["BP",:special]]))
|
41
|
+
assert_equal(-1, Tokenizer::Operation.new(:extr).special(-1).eval([["SSH", :special],["1", :number]],[["SSH", :special],["3", :number],["BP",:special]]))
|
42
|
+
assert_equal(-1, Tokenizer::Operation.new(:miss).special(-1).eval([["SSH", :special],["3", :number],["BP",:special]],[["SSH", :special],["1", :number]]))
|
43
|
+
end
|
44
|
+
def test_ignore_case
|
45
|
+
assert_equal(-1, Tokenizer::Operation.new(:diff).ignore_case(false).special(-1).eval([["ssh", :special]],[["SSH", :special]]))
|
46
|
+
assert_equal(0, Tokenizer::Operation.new(:diff).ignore_case(true).special(-1).eval([["ssh", :special]],[["SSH", :special]]))
|
47
|
+
end
|
48
|
+
|
49
|
+
def test_compare
|
50
|
+
assert_equal(-10, @index.evaluate("DNA1", "GENE2"))
|
51
|
+
assert_equal(3, @index.evaluate("DNA1", "GENE1"))
|
52
|
+
assert_equal(3, @index.evaluate("DNA1", "RNA1"))
|
53
|
+
assert_equal(-1, @index.evaluate("SSH", "SSH1"))
|
54
|
+
assert_equal(7, @index.evaluate("pol III", "POL3"))
|
55
|
+
end
|
56
|
+
|
57
|
+
def test_default
|
58
|
+
index = Tokenizer.new
|
59
|
+
assert(index.evaluate("SSH", "SSH1") > index.evaluate("SSH", "SSH3BP"))
|
60
|
+
assert(index.evaluate("HP1gamma", "HP1-GAMMA") > 1)
|
61
|
+
assert(index.evaluate("HP1alpha", "HP1 alpha") > 1)
|
62
|
+
assert(index.evaluate("IL-1beta", "IL-1 beta") > 1)
|
63
|
+
assert(index.evaluate("IL-1RI", "IL-1R-1") > 1)
|
64
|
+
assert(index.evaluate("MODI", "MOD 1") > 1)
|
65
|
+
assert(index.evaluate("MOD 1", "MODI") > 1)
|
66
|
+
assert(index.evaluate("Ubc3", "Ubc3b") > 1)
|
67
|
+
end
|
68
|
+
|
69
|
+
|
70
|
+
end
|
@@ -0,0 +1,17 @@
|
|
1
|
+
require File.dirname(__FILE__) + '/../../test_helper'
|
2
|
+
require 'rbbt/ner/abner'
|
3
|
+
require 'test/unit'
|
4
|
+
|
5
|
+
class TestAbner < Test::Unit::TestCase
|
6
|
+
|
7
|
+
def test_extract
|
8
|
+
ner = Abner.new
|
9
|
+
|
10
|
+
mentions = ner.extract(" The P-ITIM-compelled multi-phosphoprotein complex binds to and activates SHP-2, which in turn dephosphorylates SHIP and Shc and probably other substrates.")
|
11
|
+
["SHP-2", "SHIP", "Shc"].each{|mention|
|
12
|
+
assert(mentions.include? mention)
|
13
|
+
}
|
14
|
+
|
15
|
+
end
|
16
|
+
|
17
|
+
end
|
@@ -0,0 +1,17 @@
|
|
1
|
+
require File.dirname(__FILE__) + '/../../test_helper'
|
2
|
+
require 'rbbt/ner/banner'
|
3
|
+
require 'test/unit'
|
4
|
+
|
5
|
+
class TestBanner < Test::Unit::TestCase
|
6
|
+
|
7
|
+
def test_extract
|
8
|
+
ner = Banner.new
|
9
|
+
|
10
|
+
mentions = ner.extract(" The P-ITIM-compelled multi-phosphoprotein complex binds to and activates SHP-2, which in turn dephosphorylates SHIP and Shc and probably other substrates.")
|
11
|
+
["SHP - 2", "SHIP", "Shc"].each{|mention|
|
12
|
+
assert(mentions.include? mention)
|
13
|
+
}
|
14
|
+
|
15
|
+
end
|
16
|
+
|
17
|
+
end
|
@@ -0,0 +1,122 @@
|
|
1
|
+
require 'rbbt'
|
2
|
+
require 'rbbt/util/tmpfile'
|
3
|
+
require 'rbbt/util/open'
|
4
|
+
require 'rbbt/ner/dictionaryNER'
|
5
|
+
require 'test/unit'
|
6
|
+
|
7
|
+
class TestDictionaryNER < Test::Unit::TestCase
|
8
|
+
|
9
|
+
def setup
|
10
|
+
@dictionary =<<-EOT
|
11
|
+
DICT1\tWord1 Word2\tWord1
|
12
|
+
DICT2\tWord3-Word4\tWord4
|
13
|
+
EOT
|
14
|
+
|
15
|
+
@dict = {
|
16
|
+
"word1" => [{'word2' => ['DICT1'] }, 'DICT1'],
|
17
|
+
"word3" => [{'word4' => ['DICT2'] }],
|
18
|
+
"word4" => ['DICT2'],
|
19
|
+
}
|
20
|
+
end
|
21
|
+
|
22
|
+
def test_simplify
|
23
|
+
assert_equal('word1', DictionaryNER.simplify( "Word1"))
|
24
|
+
assert_equal('ACL', DictionaryNER.simplify("ACL"))
|
25
|
+
end
|
26
|
+
|
27
|
+
def test_chunk
|
28
|
+
assert_equal(["Word1","Word2"], DictionaryNER.chunk('Word1-Word2'))
|
29
|
+
assert_equal(["Word1-1"], DictionaryNER.chunk('Word1-1'))
|
30
|
+
end
|
31
|
+
|
32
|
+
def test_match
|
33
|
+
|
34
|
+
[
|
35
|
+
|
36
|
+
["Word1", {"word1" => ["D1"]}, {"Word1" => ["D1"]}],
|
37
|
+
|
38
|
+
["Word1 Word1", {"word1" => ["D1"]}, {"Word1" => ["D1"]}],
|
39
|
+
|
40
|
+
["Word2 Word1 Word3", {"word1" => ["D1"]}, {"Word1" => ["D1"]} ],
|
41
|
+
|
42
|
+
["Word2 Word1 Word4", {"word1" => ["D1","D2"]}, {"Word1" => ["D1","D2"]} ],
|
43
|
+
|
44
|
+
["Word2 Word1 Word4",
|
45
|
+
{"word1" => [{'word2' => ['D1']}]},
|
46
|
+
{} ],
|
47
|
+
|
48
|
+
[
|
49
|
+
"Word2 Word1 Word4",
|
50
|
+
{"word1" => [ {'word4' => ['D1']} ] },
|
51
|
+
{"Word1 Word4" => ["D1"]},
|
52
|
+
],
|
53
|
+
|
54
|
+
[
|
55
|
+
"Word2 Word1 Word4",
|
56
|
+
{"word1" => [ {'word4' => ['D1']} ], "word4" => ['D2'] },
|
57
|
+
{"Word1 Word4" => ["D1"], "Word4" => ['D2']},
|
58
|
+
],
|
59
|
+
|
60
|
+
|
61
|
+
].each{|match_info|
|
62
|
+
text = match_info[0]
|
63
|
+
dict = match_info[1]
|
64
|
+
result = match_info[2]
|
65
|
+
assert_equal(result, DictionaryNER.match(dict, text))
|
66
|
+
}
|
67
|
+
|
68
|
+
end
|
69
|
+
|
70
|
+
def test_add_name
|
71
|
+
|
72
|
+
[
|
73
|
+
|
74
|
+
["Word1", {"word1" => ['code']}],
|
75
|
+
|
76
|
+
["Word1 Word2", {"word1" => [{"word2" => ['code']}]}],
|
77
|
+
|
78
|
+
["Cerebellar stroke syndrome", {"cerebellar" => [{'stroke' => [{'syndrome' => ['code']}]}]}]
|
79
|
+
|
80
|
+
].each{|info|
|
81
|
+
name = info[0]
|
82
|
+
result = info[1]
|
83
|
+
|
84
|
+
dict = {}
|
85
|
+
DictionaryNER.add_name(dict, name, 'code')
|
86
|
+
assert_equal(result, dict)
|
87
|
+
}
|
88
|
+
|
89
|
+
end
|
90
|
+
|
91
|
+
def test_load
|
92
|
+
assert_equal(@dict, DictionaryNER.load(@dictionary))
|
93
|
+
end
|
94
|
+
|
95
|
+
def test_class
|
96
|
+
ner = DictionaryNER.new(@dictionary)
|
97
|
+
|
98
|
+
[
|
99
|
+
[ "Word1 Word2", ["Word1 Word2", "Word1"] ],
|
100
|
+
[ "foo Word1 Word2 foo", ["Word1 Word2", "Word1"] ],
|
101
|
+
[ "Word1-Word2", ["Word1 Word2", "Word1"] ],
|
102
|
+
[ "Word1\nWord2", ["Word1 Word2", "Word1"] ],
|
103
|
+
].each{|info|
|
104
|
+
text = info[0]
|
105
|
+
keys = info[1]
|
106
|
+
|
107
|
+
assert_equal(keys.sort, ner.match(text).keys.sort)
|
108
|
+
}
|
109
|
+
end
|
110
|
+
|
111
|
+
def test_load_from_file
|
112
|
+
tmpfile = TmpFile.tmp_file
|
113
|
+
|
114
|
+
Open.write(tmpfile, @dictionary)
|
115
|
+
|
116
|
+
ner = DictionaryNER.new(tmpfile)
|
117
|
+
|
118
|
+
assert(ner.match("Word1").any?)
|
119
|
+
end
|
120
|
+
|
121
|
+
end
|
122
|
+
|