rbbt 1.2.5 → 2.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/README.rdoc +2 -138
- metadata +69 -214
- data/LICENSE +0 -20
- data/bin/rbbt_config +0 -245
- data/install_scripts/classifier/R/classify.R +0 -36
- data/install_scripts/classifier/Rakefile +0 -140
- data/install_scripts/get_abner.sh +0 -2
- data/install_scripts/get_banner.sh +0 -25
- data/install_scripts/get_biocreative.sh +0 -72
- data/install_scripts/get_crf++.sh +0 -26
- data/install_scripts/get_entrez.sh +0 -4
- data/install_scripts/get_go.sh +0 -4
- data/install_scripts/get_polysearch.sh +0 -8
- data/install_scripts/ner/Rakefile +0 -206
- data/install_scripts/ner/config/default.rb +0 -52
- data/install_scripts/norm/Rakefile +0 -219
- data/install_scripts/norm/config/cue_default.rb +0 -10
- data/install_scripts/norm/config/tokens_default.rb +0 -86
- data/install_scripts/norm/functions.sh +0 -23
- data/install_scripts/organisms/Ath.Rakefile +0 -55
- data/install_scripts/organisms/Cal.Rakefile +0 -84
- data/install_scripts/organisms/Cel.Rakefile +0 -109
- data/install_scripts/organisms/Hsa.Rakefile +0 -140
- data/install_scripts/organisms/Mmu.Rakefile +0 -77
- data/install_scripts/organisms/Rakefile +0 -43
- data/install_scripts/organisms/Rno.Rakefile +0 -88
- data/install_scripts/organisms/Sce.Rakefile +0 -66
- data/install_scripts/organisms/Spo.Rakefile +0 -40
- data/install_scripts/organisms/rake-include.rb +0 -252
- data/install_scripts/wordlists/consonants +0 -897
- data/install_scripts/wordlists/stopwords +0 -1
- data/lib/rbbt.rb +0 -83
- data/lib/rbbt/bow/bow.rb +0 -88
- data/lib/rbbt/bow/classifier.rb +0 -116
- data/lib/rbbt/bow/dictionary.rb +0 -187
- data/lib/rbbt/ner/abner.rb +0 -34
- data/lib/rbbt/ner/banner.rb +0 -73
- data/lib/rbbt/ner/dictionaryNER.rb +0 -98
- data/lib/rbbt/ner/regexpNER.rb +0 -70
- data/lib/rbbt/ner/rner.rb +0 -227
- data/lib/rbbt/ner/rnorm.rb +0 -143
- data/lib/rbbt/ner/rnorm/cue_index.rb +0 -80
- data/lib/rbbt/ner/rnorm/tokens.rb +0 -217
- data/lib/rbbt/sources/biocreative.rb +0 -75
- data/lib/rbbt/sources/biomart.rb +0 -105
- data/lib/rbbt/sources/entrez.rb +0 -211
- data/lib/rbbt/sources/go.rb +0 -85
- data/lib/rbbt/sources/gscholar.rb +0 -74
- data/lib/rbbt/sources/organism.rb +0 -241
- data/lib/rbbt/sources/polysearch.rb +0 -117
- data/lib/rbbt/sources/pubmed.rb +0 -248
- data/lib/rbbt/util/arrayHash.rb +0 -266
- data/lib/rbbt/util/filecache.rb +0 -72
- data/lib/rbbt/util/index.rb +0 -47
- data/lib/rbbt/util/misc.rb +0 -106
- data/lib/rbbt/util/open.rb +0 -251
- data/lib/rbbt/util/rake.rb +0 -183
- data/lib/rbbt/util/simpleDSL.rb +0 -87
- data/lib/rbbt/util/tmpfile.rb +0 -35
- data/tasks/install.rake +0 -124
- data/test/rbbt/bow/test_bow.rb +0 -33
- data/test/rbbt/bow/test_classifier.rb +0 -72
- data/test/rbbt/bow/test_dictionary.rb +0 -91
- data/test/rbbt/ner/rnorm/test_cue_index.rb +0 -57
- data/test/rbbt/ner/rnorm/test_tokens.rb +0 -70
- data/test/rbbt/ner/test_abner.rb +0 -17
- data/test/rbbt/ner/test_banner.rb +0 -17
- data/test/rbbt/ner/test_dictionaryNER.rb +0 -122
- data/test/rbbt/ner/test_regexpNER.rb +0 -33
- data/test/rbbt/ner/test_rner.rb +0 -126
- data/test/rbbt/ner/test_rnorm.rb +0 -47
- data/test/rbbt/sources/test_biocreative.rb +0 -38
- data/test/rbbt/sources/test_biomart.rb +0 -31
- data/test/rbbt/sources/test_entrez.rb +0 -49
- data/test/rbbt/sources/test_go.rb +0 -24
- data/test/rbbt/sources/test_organism.rb +0 -59
- data/test/rbbt/sources/test_polysearch.rb +0 -27
- data/test/rbbt/sources/test_pubmed.rb +0 -39
- data/test/rbbt/util/test_arrayHash.rb +0 -257
- data/test/rbbt/util/test_filecache.rb +0 -37
- data/test/rbbt/util/test_index.rb +0 -31
- data/test/rbbt/util/test_misc.rb +0 -20
- data/test/rbbt/util/test_open.rb +0 -110
- data/test/rbbt/util/test_simpleDSL.rb +0 -57
- data/test/rbbt/util/test_tmpfile.rb +0 -21
- data/test/test_helper.rb +0 -4
- data/test/test_rbbt.rb +0 -11
data/lib/rbbt/util/tmpfile.rb
DELETED
@@ -1,35 +0,0 @@
|
|
1
|
-
require 'fileutils'
|
2
|
-
require 'rbbt'
|
3
|
-
|
4
|
-
|
5
|
-
module TmpFile
|
6
|
-
|
7
|
-
# Creates a random file name, with the given suffix and a random number
|
8
|
-
# up to +max+
|
9
|
-
def self.random_name( s="",max=10000000)
|
10
|
-
n = rand(max)
|
11
|
-
s << n.to_s
|
12
|
-
s
|
13
|
-
end
|
14
|
-
|
15
|
-
# Creates a random filename in the temporary directory
|
16
|
-
def self.tmp_file(s = "",max=10000000)
|
17
|
-
File.join(Rbbt.tmpdir,random_name(s,max))
|
18
|
-
end
|
19
|
-
|
20
|
-
def self.with_file(content = nil)
|
21
|
-
tmpfile = tmp_file
|
22
|
-
|
23
|
-
File.open(tmpfile, 'w') do |f| f.write content end if content != nil
|
24
|
-
|
25
|
-
result = yield(tmpfile)
|
26
|
-
|
27
|
-
FileUtils.rm tmpfile if File.exists? tmpfile
|
28
|
-
|
29
|
-
result
|
30
|
-
end
|
31
|
-
|
32
|
-
class << self
|
33
|
-
alias :new :tmp_file
|
34
|
-
end
|
35
|
-
end
|
data/tasks/install.rake
DELETED
@@ -1,124 +0,0 @@
|
|
1
|
-
require 'rbbt'
|
2
|
-
|
3
|
-
$datadir = Rbbt.datadir
|
4
|
-
$scriptdir = File.join(File.expand_path(Rbbt.rootdir), '/install_scripts')
|
5
|
-
|
6
|
-
|
7
|
-
task 'abner' do
|
8
|
-
directory = "#{$datadir}/third_party/abner/"
|
9
|
-
if !File.exists?(File.join(directory, 'abner.jar')) || $force
|
10
|
-
FileUtils.mkdir_p directory
|
11
|
-
`cd #{directory};rm -Rf *; #{$scriptdir}/get_abner.sh;cd -`
|
12
|
-
end
|
13
|
-
end
|
14
|
-
|
15
|
-
task 'banner' do
|
16
|
-
directory = "#{$datadir}/third_party/banner/"
|
17
|
-
if !File.exists?(File.join(directory, 'banner.jar')) || $force
|
18
|
-
FileUtils.mkdir_p directory
|
19
|
-
`cd #{directory};rm -Rf *; #{$scriptdir}/get_banner.sh;cd -`
|
20
|
-
end
|
21
|
-
end
|
22
|
-
|
23
|
-
task 'crf++' do
|
24
|
-
directory = "#{$datadir}/third_party/crf++/"
|
25
|
-
if !File.exists?(File.join(directory, 'ruby/CRFPP.so')) || $force
|
26
|
-
FileUtils.mkdir_p directory
|
27
|
-
`cd #{directory};rm -Rf *; #{$scriptdir}/get_crf++.sh;cd -`
|
28
|
-
end
|
29
|
-
end
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
task 'wordlists' do
|
34
|
-
FileUtils.cp_r File.join($scriptdir, 'wordlists/'), $datadir
|
35
|
-
end
|
36
|
-
|
37
|
-
task 'polysearch' do
|
38
|
-
directory = "#{$datadir}/dbs/polysearch/"
|
39
|
-
if !File.exists?(File.join(directory,'disease.txt')) || $force
|
40
|
-
FileUtils.mkdir_p directory
|
41
|
-
`cd #{directory}/; rm * -Rf; #{$scriptdir}/get_polysearch.sh;cd -`
|
42
|
-
end
|
43
|
-
end
|
44
|
-
|
45
|
-
|
46
|
-
task '3party' => %w(abner banner crf++)
|
47
|
-
|
48
|
-
task 'entrez' do
|
49
|
-
directory = "#{$datadir}/dbs/entrez/"
|
50
|
-
if !File.exists?(File.join(directory,'gene_info')) || $force
|
51
|
-
FileUtils.mkdir_p directory
|
52
|
-
`cd #{directory}/; rm * -Rf; #{$scriptdir}/get_entrez.sh;cd -`
|
53
|
-
end
|
54
|
-
end
|
55
|
-
|
56
|
-
task 'go' do
|
57
|
-
directory = "#{$datadir}/dbs/go/"
|
58
|
-
if !File.exists?(File.join(directory,'gene_ontology.obo')) || $force
|
59
|
-
FileUtils.mkdir_p directory
|
60
|
-
`cd #{directory}/; rm * -Rf; #{$scriptdir}/get_go.sh;cd -`
|
61
|
-
end
|
62
|
-
end
|
63
|
-
|
64
|
-
task 'biocreative' do
|
65
|
-
directory = "#{$datadir}/biocreative/"
|
66
|
-
if !File.exists?(File.join(directory, 'BC2GN')) || $force
|
67
|
-
FileUtils.mkdir_p directory
|
68
|
-
`cd #{directory};rm -Rf *; #{$scriptdir}/get_biocreative.sh;cd -`
|
69
|
-
end
|
70
|
-
end
|
71
|
-
|
72
|
-
|
73
|
-
task 'datasets' => %w(entrez biocreative)
|
74
|
-
|
75
|
-
task 'organisms' do
|
76
|
-
directory = "#{$datadir}/organisms"
|
77
|
-
FileUtils.mkdir_p directory
|
78
|
-
%w(Rakefile rake-include.rb).each{|f|
|
79
|
-
FileUtils.cp_r File.join($scriptdir, "organisms/#{ f }"), directory
|
80
|
-
}
|
81
|
-
Dir.glob(File.join($scriptdir, "organisms/*.Rakefile")).each{|f|
|
82
|
-
org = File.basename(f).sub(/.Rakefile/,'')
|
83
|
-
if !File.exists?(File.join(directory, org))
|
84
|
-
FileUtils.mkdir_p File.join(directory, org)
|
85
|
-
end
|
86
|
-
FileUtils.cp f , File.join(directory, "#{ org }/Rakefile")
|
87
|
-
}
|
88
|
-
`cd #{directory}; rake names`
|
89
|
-
end
|
90
|
-
|
91
|
-
task 'ner' do
|
92
|
-
directory = "#{$datadir}/ner"
|
93
|
-
FileUtils.mkdir_p directory
|
94
|
-
%w(Rakefile config).each{|f|
|
95
|
-
FileUtils.cp_r File.join($scriptdir, "ner/#{ f }"), directory
|
96
|
-
}
|
97
|
-
|
98
|
-
%w(data model results).each{|d|
|
99
|
-
FileUtils.mkdir_p File.join(directory, d)
|
100
|
-
}
|
101
|
-
end
|
102
|
-
|
103
|
-
task 'norm' do
|
104
|
-
directory = "#{$datadir}/norm"
|
105
|
-
FileUtils.mkdir_p directory
|
106
|
-
%w(Rakefile config functions.sh).each{|f|
|
107
|
-
FileUtils.cp_r File.join($scriptdir, "norm/#{ f }"), directory
|
108
|
-
}
|
109
|
-
%w(results models).each{|d|
|
110
|
-
FileUtils.mkdir_p File.join(directory, d)
|
111
|
-
}
|
112
|
-
end
|
113
|
-
|
114
|
-
task 'classifier' do
|
115
|
-
directory = "#{$datadir}/classifier"
|
116
|
-
FileUtils.mkdir_p directory
|
117
|
-
%w(Rakefile R).each{|f|
|
118
|
-
FileUtils.cp_r File.join($scriptdir, "classifier/#{ f }"), directory
|
119
|
-
}
|
120
|
-
%w(data model results).each{|d|
|
121
|
-
FileUtils.mkdir_p File.join(directory, d)
|
122
|
-
}
|
123
|
-
end
|
124
|
-
|
data/test/rbbt/bow/test_bow.rb
DELETED
@@ -1,33 +0,0 @@
|
|
1
|
-
require File.dirname(__FILE__) + '/../../test_helper'
|
2
|
-
require 'rbbt/bow/bow'
|
3
|
-
require 'test/unit'
|
4
|
-
|
5
|
-
class TestBow < Test::Unit::TestCase
|
6
|
-
|
7
|
-
def test_words
|
8
|
-
assert_equal(["hello", "world"], "Hello World".words)
|
9
|
-
|
10
|
-
end
|
11
|
-
|
12
|
-
def test_terms
|
13
|
-
text = "Hello World"
|
14
|
-
assert_equal(["hello", "world"], BagOfWords.terms(text,false).keys.sort)
|
15
|
-
assert_equal(["hello", "hello world", "world"], BagOfWords.terms(text,true).keys.sort)
|
16
|
-
end
|
17
|
-
|
18
|
-
def test_features
|
19
|
-
|
20
|
-
text = "Hello world!"
|
21
|
-
text += "Hello World Again!"
|
22
|
-
|
23
|
-
assert_equal([2, 2], BagOfWords.features(text, "Hello World".words.uniq.sort))
|
24
|
-
end
|
25
|
-
|
26
|
-
def test_stem
|
27
|
-
assert_equal(["protein"], "Proteins".words)
|
28
|
-
end
|
29
|
-
|
30
|
-
|
31
|
-
end
|
32
|
-
|
33
|
-
|
@@ -1,72 +0,0 @@
|
|
1
|
-
require File.dirname(__FILE__) + '/../../test_helper'
|
2
|
-
require 'rbbt/bow/classifier'
|
3
|
-
require 'rbbt/util/tmpfile'
|
4
|
-
require 'rbbt/util/open'
|
5
|
-
require 'test/unit'
|
6
|
-
|
7
|
-
class TestClassifier < Test::Unit::TestCase
|
8
|
-
|
9
|
-
def test_build_model
|
10
|
-
features =<<-EOT
|
11
|
-
Name Class hello world
|
12
|
-
row1 - 2 0
|
13
|
-
row2 + 0 2
|
14
|
-
EOT
|
15
|
-
|
16
|
-
featuresfile = TmpFile.tmp_file("test_classifier")
|
17
|
-
modelfile = TmpFile.tmp_file("test_classifier")
|
18
|
-
Open.write(featuresfile, features)
|
19
|
-
Classifier.create_model(featuresfile, modelfile)
|
20
|
-
|
21
|
-
assert(File.exist? modelfile)
|
22
|
-
|
23
|
-
FileUtils.rm featuresfile
|
24
|
-
FileUtils.rm modelfile
|
25
|
-
|
26
|
-
end
|
27
|
-
|
28
|
-
def test_classifier
|
29
|
-
features =<<-EOT
|
30
|
-
Name Class hello world
|
31
|
-
row1 - 2 0
|
32
|
-
row2 + 0 2
|
33
|
-
EOT
|
34
|
-
|
35
|
-
featuresfile = TmpFile.tmp_file("test_classifier")
|
36
|
-
modelfile = TmpFile.tmp_file("test_classifier")
|
37
|
-
Open.write(featuresfile, features)
|
38
|
-
Classifier.create_model(featuresfile, modelfile)
|
39
|
-
|
40
|
-
FileUtils.rm featuresfile
|
41
|
-
|
42
|
-
classifier = Classifier.new(modelfile)
|
43
|
-
|
44
|
-
assert_equal(["hello", "world"], classifier.terms)
|
45
|
-
|
46
|
-
assert_equal(["-", "+"], classifier.classify_feature_array([[1,0],[0,1]]))
|
47
|
-
|
48
|
-
|
49
|
-
assert_equal({"negative"=>"-", "positive"=>"+"}, classifier.classify_feature_hash({:positive => [0,1], :negative => [1,0]}))
|
50
|
-
assert_equal({"negative"=>"-", "positive"=>"+"}, classifier.classify_feature_hash({:positive => [0,1], :negative => [1,0]}))
|
51
|
-
|
52
|
-
assert_equal(["-", "+"], classifier.classify_text_array(["Hello","World"]))
|
53
|
-
|
54
|
-
assert_equal({"negative"=>"-", "positive"=>"+"}, classifier.classify_text_hash({:negative => "Hello", :positive =>"World"}))
|
55
|
-
|
56
|
-
assert_equal('-', classifier.classify("Hello"))
|
57
|
-
assert_equal(["-", "+"],classifier.classify([[1,0],[0,1]]))
|
58
|
-
assert_equal({"negative"=>"-", "positive"=>"+"},classifier.classify({:positive => [0,1], :negative => [1,0]}))
|
59
|
-
assert_equal(["-", "+"],classifier.classify(["Hello","World"]))
|
60
|
-
#assert_equal({"negative"=>"-", "positive"=>"+"},classifier.classify({:negative => "Hello", :positive => "World"}))
|
61
|
-
|
62
|
-
|
63
|
-
#assert_nothing_raised do classifier.classify("Unknown terms") end
|
64
|
-
#assert_nothing_raised do classifier.classify([]) end
|
65
|
-
|
66
|
-
FileUtils.rm modelfile
|
67
|
-
|
68
|
-
|
69
|
-
end
|
70
|
-
|
71
|
-
end
|
72
|
-
|
@@ -1,91 +0,0 @@
|
|
1
|
-
require File.dirname(__FILE__) + '/../../test_helper'
|
2
|
-
require 'rbbt/bow/dictionary'
|
3
|
-
require 'rbbt/bow/bow'
|
4
|
-
require 'test/unit'
|
5
|
-
|
6
|
-
class TestDictionary < Test::Unit::TestCase
|
7
|
-
|
8
|
-
def test_standard
|
9
|
-
docs = []
|
10
|
-
docs << BagOfWords.terms("Hello World", false)
|
11
|
-
docs << BagOfWords.terms("Hello Yin Yin", false)
|
12
|
-
|
13
|
-
dict = Dictionary.new
|
14
|
-
docs.each{|doc| dict.add doc}
|
15
|
-
|
16
|
-
assert_equal(2, dict.terms["hello"])
|
17
|
-
assert_equal(2, dict.terms["yin"])
|
18
|
-
assert_equal(0, dict.terms["bye"])
|
19
|
-
assert_equal(1, dict.terms["world"])
|
20
|
-
end
|
21
|
-
|
22
|
-
def test_tf_idf
|
23
|
-
docs = []
|
24
|
-
docs << BagOfWords.terms("Hello World", false)
|
25
|
-
docs << BagOfWords.terms("Hello Yin Yin", false)
|
26
|
-
|
27
|
-
|
28
|
-
dict = Dictionary::TF_IDF.new
|
29
|
-
docs.each{|doc| dict.add doc}
|
30
|
-
|
31
|
-
assert_equal(2, dict.terms["hello"])
|
32
|
-
assert_equal(2, dict.terms["yin"])
|
33
|
-
assert_equal(0, dict.terms["bye"])
|
34
|
-
assert_equal(1, dict.terms["world"])
|
35
|
-
|
36
|
-
|
37
|
-
assert_equal(1, dict.df["hello"])
|
38
|
-
assert_equal(0.5, dict.df["yin"])
|
39
|
-
assert_equal(0, dict.df["bye"])
|
40
|
-
assert_equal(0.5, dict.df["world"])
|
41
|
-
|
42
|
-
assert_equal(2.0/5, dict.tf["hello"])
|
43
|
-
assert_equal(2.0/5, dict.tf["yin"])
|
44
|
-
assert_equal(0, dict.tf["bye"])
|
45
|
-
assert_equal(1.0/5, dict.tf["world"])
|
46
|
-
|
47
|
-
assert_equal(Math::log(1), dict.idf["hello"])
|
48
|
-
assert_equal(Math::log(2), dict.idf["yin"])
|
49
|
-
assert_equal(0, dict.idf["bye"])
|
50
|
-
assert_equal(Math::log(2), dict.idf["world"])
|
51
|
-
|
52
|
-
assert_equal(2.0/5 * Math::log(1), dict.tf_idf["hello"])
|
53
|
-
assert_equal(2.0/5 * Math::log(2), dict.tf_idf["yin"])
|
54
|
-
assert_equal(0, dict.tf_idf["bye"])
|
55
|
-
assert_equal(1.0/5 * Math::log(2), dict.tf_idf["world"])
|
56
|
-
end
|
57
|
-
|
58
|
-
def test_best
|
59
|
-
docs = []
|
60
|
-
docs << BagOfWords.terms("Hello World", false)
|
61
|
-
docs << BagOfWords.terms("Hello Yin Yin", false)
|
62
|
-
|
63
|
-
|
64
|
-
dict = Dictionary::TF_IDF.new
|
65
|
-
docs.each{|doc| dict.add doc}
|
66
|
-
|
67
|
-
assert_equal(1, dict.best(:limit => 1).length)
|
68
|
-
assert(dict.best(:limit => 1).include? "yin")
|
69
|
-
end
|
70
|
-
|
71
|
-
def test_kl
|
72
|
-
docs = []
|
73
|
-
docs << [BagOfWords.terms("Hello World", false), :+]
|
74
|
-
docs << [BagOfWords.terms("Hello Cruel World", false), :+]
|
75
|
-
docs << [BagOfWords.terms("Hello Yan Yan", false), :-]
|
76
|
-
docs << [BagOfWords.terms("Hello Yin Yin", false), :-]
|
77
|
-
|
78
|
-
|
79
|
-
dict = Dictionary::KL.new
|
80
|
-
docs.each{|doc| dict.add *doc}
|
81
|
-
|
82
|
-
assert_equal(0, dict.kl["hello"])
|
83
|
-
assert_equal(dict.kl['yan'], dict.kl['yin'])
|
84
|
-
assert_in_delta(1 * Math::log(1 / 0.000001), dict.kl["world"],0.01)
|
85
|
-
assert_in_delta(0.5 * Math::log(0.5 / 0.000001), dict.kl["cruel"],0.01)
|
86
|
-
end
|
87
|
-
|
88
|
-
|
89
|
-
end
|
90
|
-
|
91
|
-
|
@@ -1,57 +0,0 @@
|
|
1
|
-
require File.dirname(__FILE__) + '/../../../test_helper'
|
2
|
-
require 'rbbt/ner/rnorm/cue_index'
|
3
|
-
require 'rbbt/util/misc'
|
4
|
-
require 'rbbt/util/tmpfile'
|
5
|
-
require 'rbbt/util/open'
|
6
|
-
require 'test/unit'
|
7
|
-
|
8
|
-
class TestCUE < Test::Unit::TestCase
|
9
|
-
|
10
|
-
def setup
|
11
|
-
@index = CueIndex.new do
|
12
|
-
equal do |w| [w] end
|
13
|
-
standard do |w| [w.downcase.split(/\s+/).sort.join("")] end
|
14
|
-
special do |w| s = w.split.select{|w| w.is_special?}.collect{|w| w.downcase.sub(/p$/,'')} end
|
15
|
-
words do |w|
|
16
|
-
w.scan(/[a-z]+/i).
|
17
|
-
select{|w| w.length > 2}.
|
18
|
-
sort{|a,b| b.length <=> a.length}.
|
19
|
-
collect{|n| n.downcase}
|
20
|
-
end
|
21
|
-
end
|
22
|
-
end
|
23
|
-
|
24
|
-
def test_cue
|
25
|
-
assert_equal([["Hsp70 gene"], ["genehsp70"], ["hsp70"], ["gene", "hsp"]], @index.cues("Hsp70 gene"))
|
26
|
-
end
|
27
|
-
|
28
|
-
def test_load
|
29
|
-
tmp = TmpFile.tmp_file("test_cue")
|
30
|
-
|
31
|
-
lexicon =<<-EOT
|
32
|
-
code1\tNAME1\tname 1
|
33
|
-
code2\tNAME2\tname 2
|
34
|
-
EOT
|
35
|
-
Open.write(tmp,lexicon)
|
36
|
-
|
37
|
-
assert_raise(CueIndex::LexiconMissingError){@index.match("NAME2")}
|
38
|
-
@index.load(tmp)
|
39
|
-
assert_equal(["code2"], @index.match("NAME2"))
|
40
|
-
|
41
|
-
FileUtils.rm tmp
|
42
|
-
end
|
43
|
-
|
44
|
-
#def test_yeast
|
45
|
-
# index = CueIndex.new
|
46
|
-
# index.load(File.join(Rbbt.datadir,'biocreative','BC1GN','yeast','synonyms.list'))
|
47
|
-
# assert(index.match("Met - 31").include? 'S0005959')
|
48
|
-
#end
|
49
|
-
|
50
|
-
#def test_mouse
|
51
|
-
# index = CueIndex.new
|
52
|
-
# index.load(File.join(Rbbt.datadir,'biocreative','BC1GN','mouse','synonyms.list'))
|
53
|
-
# puts index.match("kreisler gene").length
|
54
|
-
#end
|
55
|
-
|
56
|
-
|
57
|
-
end
|
@@ -1,70 +0,0 @@
|
|
1
|
-
require File.dirname(__FILE__) + '/../../../test_helper'
|
2
|
-
require 'rbbt/ner/rnorm/tokens'
|
3
|
-
require 'rbbt/util/misc'
|
4
|
-
require 'rbbt/util/tmpfile'
|
5
|
-
require 'rbbt/util/open'
|
6
|
-
require 'test/unit'
|
7
|
-
|
8
|
-
class TestCompare < Test::Unit::TestCase
|
9
|
-
|
10
|
-
def setup
|
11
|
-
@index = Tokenizer.new
|
12
|
-
end
|
13
|
-
|
14
|
-
def test_type
|
15
|
-
assert_equal(:gene, @index.type("gene"))
|
16
|
-
assert_equal(:dna, @index.type("dna"))
|
17
|
-
assert_equal(:number, @index.type("121"))
|
18
|
-
end
|
19
|
-
|
20
|
-
def test_token_types
|
21
|
-
assert_equal([["dna", :dna], ["12", :number]], @index.token_types("dna12"))
|
22
|
-
assert_equal([["REX", :special], ["12", :number]], @index.token_types("REX12"))
|
23
|
-
assert_equal([["SSH", :special], ["3", :number], ["BP", :special]], @index.token_types("SSH3BP"))
|
24
|
-
assert_equal([["HP", :special], ["1", :number], ["gamma", :greek]], @index.token_types("HP1gamma"))
|
25
|
-
assert_equal([["HP", :special], ["1", :number], ["GAMMA", :greek]], @index.token_types("HP1-GAMMA"))
|
26
|
-
end
|
27
|
-
|
28
|
-
def test_eval
|
29
|
-
assert_equal(3, @index.evaluate_tokens(@index.token_types("1"), @index.token_types("1")))
|
30
|
-
end
|
31
|
-
|
32
|
-
def test_transforms
|
33
|
-
t = Tokenizer::Transform.new.unknown do |t| [t, if t.length < 4 then :special else :unknown end] end
|
34
|
-
assert_equal(["BP", :special], t.transform(["BP",:unknown]))
|
35
|
-
end
|
36
|
-
def test_comparisons
|
37
|
-
assert_equal(0, Tokenizer::Operation.new(:same).number(3).eval(@index.token_types("SSH1"),@index.token_types("SSH2")))
|
38
|
-
assert_equal(3, Tokenizer::Operation.new(:same).number(3).eval(@index.token_types("SSH1"),@index.token_types("SSH1")))
|
39
|
-
assert_equal(0, Tokenizer::Operation.new(:same).special(1).eval([["SSH", :special],["1", :number]],[["SSH", :special],["3", :number],["BP",:special]]))
|
40
|
-
assert_equal(-1, Tokenizer::Operation.new(:diff).special(-1).eval([["SSH", :special],["1", :number]],[["SSH", :special],["3", :number],["BP",:special]]))
|
41
|
-
assert_equal(-1, Tokenizer::Operation.new(:extr).special(-1).eval([["SSH", :special],["1", :number]],[["SSH", :special],["3", :number],["BP",:special]]))
|
42
|
-
assert_equal(-1, Tokenizer::Operation.new(:miss).special(-1).eval([["SSH", :special],["3", :number],["BP",:special]],[["SSH", :special],["1", :number]]))
|
43
|
-
end
|
44
|
-
def test_ignore_case
|
45
|
-
assert_equal(-1, Tokenizer::Operation.new(:diff).ignore_case(false).special(-1).eval([["ssh", :special]],[["SSH", :special]]))
|
46
|
-
assert_equal(0, Tokenizer::Operation.new(:diff).ignore_case(true).special(-1).eval([["ssh", :special]],[["SSH", :special]]))
|
47
|
-
end
|
48
|
-
|
49
|
-
def test_compare
|
50
|
-
assert_equal(-10, @index.evaluate("DNA1", "GENE2"))
|
51
|
-
assert_equal(3, @index.evaluate("DNA1", "GENE1"))
|
52
|
-
assert_equal(3, @index.evaluate("DNA1", "RNA1"))
|
53
|
-
assert_equal(-1, @index.evaluate("SSH", "SSH1"))
|
54
|
-
assert_equal(7, @index.evaluate("pol III", "POL3"))
|
55
|
-
end
|
56
|
-
|
57
|
-
def test_default
|
58
|
-
index = Tokenizer.new
|
59
|
-
assert(index.evaluate("SSH", "SSH1") > index.evaluate("SSH", "SSH3BP"))
|
60
|
-
assert(index.evaluate("HP1gamma", "HP1-GAMMA") > 1)
|
61
|
-
assert(index.evaluate("HP1alpha", "HP1 alpha") > 1)
|
62
|
-
assert(index.evaluate("IL-1beta", "IL-1 beta") > 1)
|
63
|
-
assert(index.evaluate("IL-1RI", "IL-1R-1") > 1)
|
64
|
-
assert(index.evaluate("MODI", "MOD 1") > 1)
|
65
|
-
assert(index.evaluate("MOD 1", "MODI") > 1)
|
66
|
-
assert(index.evaluate("Ubc3", "Ubc3b") > 1)
|
67
|
-
end
|
68
|
-
|
69
|
-
|
70
|
-
end
|