rbbt 1.2.5 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/README.rdoc +2 -138
- metadata +69 -214
- data/LICENSE +0 -20
- data/bin/rbbt_config +0 -245
- data/install_scripts/classifier/R/classify.R +0 -36
- data/install_scripts/classifier/Rakefile +0 -140
- data/install_scripts/get_abner.sh +0 -2
- data/install_scripts/get_banner.sh +0 -25
- data/install_scripts/get_biocreative.sh +0 -72
- data/install_scripts/get_crf++.sh +0 -26
- data/install_scripts/get_entrez.sh +0 -4
- data/install_scripts/get_go.sh +0 -4
- data/install_scripts/get_polysearch.sh +0 -8
- data/install_scripts/ner/Rakefile +0 -206
- data/install_scripts/ner/config/default.rb +0 -52
- data/install_scripts/norm/Rakefile +0 -219
- data/install_scripts/norm/config/cue_default.rb +0 -10
- data/install_scripts/norm/config/tokens_default.rb +0 -86
- data/install_scripts/norm/functions.sh +0 -23
- data/install_scripts/organisms/Ath.Rakefile +0 -55
- data/install_scripts/organisms/Cal.Rakefile +0 -84
- data/install_scripts/organisms/Cel.Rakefile +0 -109
- data/install_scripts/organisms/Hsa.Rakefile +0 -140
- data/install_scripts/organisms/Mmu.Rakefile +0 -77
- data/install_scripts/organisms/Rakefile +0 -43
- data/install_scripts/organisms/Rno.Rakefile +0 -88
- data/install_scripts/organisms/Sce.Rakefile +0 -66
- data/install_scripts/organisms/Spo.Rakefile +0 -40
- data/install_scripts/organisms/rake-include.rb +0 -252
- data/install_scripts/wordlists/consonants +0 -897
- data/install_scripts/wordlists/stopwords +0 -1
- data/lib/rbbt.rb +0 -83
- data/lib/rbbt/bow/bow.rb +0 -88
- data/lib/rbbt/bow/classifier.rb +0 -116
- data/lib/rbbt/bow/dictionary.rb +0 -187
- data/lib/rbbt/ner/abner.rb +0 -34
- data/lib/rbbt/ner/banner.rb +0 -73
- data/lib/rbbt/ner/dictionaryNER.rb +0 -98
- data/lib/rbbt/ner/regexpNER.rb +0 -70
- data/lib/rbbt/ner/rner.rb +0 -227
- data/lib/rbbt/ner/rnorm.rb +0 -143
- data/lib/rbbt/ner/rnorm/cue_index.rb +0 -80
- data/lib/rbbt/ner/rnorm/tokens.rb +0 -217
- data/lib/rbbt/sources/biocreative.rb +0 -75
- data/lib/rbbt/sources/biomart.rb +0 -105
- data/lib/rbbt/sources/entrez.rb +0 -211
- data/lib/rbbt/sources/go.rb +0 -85
- data/lib/rbbt/sources/gscholar.rb +0 -74
- data/lib/rbbt/sources/organism.rb +0 -241
- data/lib/rbbt/sources/polysearch.rb +0 -117
- data/lib/rbbt/sources/pubmed.rb +0 -248
- data/lib/rbbt/util/arrayHash.rb +0 -266
- data/lib/rbbt/util/filecache.rb +0 -72
- data/lib/rbbt/util/index.rb +0 -47
- data/lib/rbbt/util/misc.rb +0 -106
- data/lib/rbbt/util/open.rb +0 -251
- data/lib/rbbt/util/rake.rb +0 -183
- data/lib/rbbt/util/simpleDSL.rb +0 -87
- data/lib/rbbt/util/tmpfile.rb +0 -35
- data/tasks/install.rake +0 -124
- data/test/rbbt/bow/test_bow.rb +0 -33
- data/test/rbbt/bow/test_classifier.rb +0 -72
- data/test/rbbt/bow/test_dictionary.rb +0 -91
- data/test/rbbt/ner/rnorm/test_cue_index.rb +0 -57
- data/test/rbbt/ner/rnorm/test_tokens.rb +0 -70
- data/test/rbbt/ner/test_abner.rb +0 -17
- data/test/rbbt/ner/test_banner.rb +0 -17
- data/test/rbbt/ner/test_dictionaryNER.rb +0 -122
- data/test/rbbt/ner/test_regexpNER.rb +0 -33
- data/test/rbbt/ner/test_rner.rb +0 -126
- data/test/rbbt/ner/test_rnorm.rb +0 -47
- data/test/rbbt/sources/test_biocreative.rb +0 -38
- data/test/rbbt/sources/test_biomart.rb +0 -31
- data/test/rbbt/sources/test_entrez.rb +0 -49
- data/test/rbbt/sources/test_go.rb +0 -24
- data/test/rbbt/sources/test_organism.rb +0 -59
- data/test/rbbt/sources/test_polysearch.rb +0 -27
- data/test/rbbt/sources/test_pubmed.rb +0 -39
- data/test/rbbt/util/test_arrayHash.rb +0 -257
- data/test/rbbt/util/test_filecache.rb +0 -37
- data/test/rbbt/util/test_index.rb +0 -31
- data/test/rbbt/util/test_misc.rb +0 -20
- data/test/rbbt/util/test_open.rb +0 -110
- data/test/rbbt/util/test_simpleDSL.rb +0 -57
- data/test/rbbt/util/test_tmpfile.rb +0 -21
- data/test/test_helper.rb +0 -4
- data/test/test_rbbt.rb +0 -11
data/test/rbbt/ner/test_abner.rb
DELETED
@@ -1,17 +0,0 @@
|
|
1
|
-
require File.dirname(__FILE__) + '/../../test_helper'
|
2
|
-
require 'rbbt/ner/abner'
|
3
|
-
require 'test/unit'
|
4
|
-
|
5
|
-
class TestAbner < Test::Unit::TestCase
|
6
|
-
|
7
|
-
def test_extract
|
8
|
-
ner = Abner.new
|
9
|
-
|
10
|
-
mentions = ner.extract(" The P-ITIM-compelled multi-phosphoprotein complex binds to and activates SHP-2, which in turn dephosphorylates SHIP and Shc and probably other substrates.")
|
11
|
-
["SHP-2", "SHIP", "Shc"].each{|mention|
|
12
|
-
assert(mentions.include? mention)
|
13
|
-
}
|
14
|
-
|
15
|
-
end
|
16
|
-
|
17
|
-
end
|
@@ -1,17 +0,0 @@
|
|
1
|
-
require File.dirname(__FILE__) + '/../../test_helper'
|
2
|
-
require 'rbbt/ner/banner'
|
3
|
-
require 'test/unit'
|
4
|
-
|
5
|
-
class TestBanner < Test::Unit::TestCase
|
6
|
-
|
7
|
-
def test_extract
|
8
|
-
ner = Banner.new
|
9
|
-
|
10
|
-
mentions = ner.extract(" The P-ITIM-compelled multi-phosphoprotein complex binds to and activates SHP-2, which in turn dephosphorylates SHIP and Shc and probably other substrates.")
|
11
|
-
["SHP - 2", "SHIP", "Shc"].each{|mention|
|
12
|
-
assert(mentions.include? mention)
|
13
|
-
}
|
14
|
-
|
15
|
-
end
|
16
|
-
|
17
|
-
end
|
@@ -1,122 +0,0 @@
|
|
1
|
-
require 'rbbt'
|
2
|
-
require 'rbbt/util/tmpfile'
|
3
|
-
require 'rbbt/util/open'
|
4
|
-
require 'rbbt/ner/dictionaryNER'
|
5
|
-
require 'test/unit'
|
6
|
-
|
7
|
-
class TestDictionaryNER < Test::Unit::TestCase
|
8
|
-
|
9
|
-
def setup
|
10
|
-
@dictionary =<<-EOT
|
11
|
-
DICT1\tWord1 Word2\tWord1
|
12
|
-
DICT2\tWord3-Word4\tWord4
|
13
|
-
EOT
|
14
|
-
|
15
|
-
@dict = {
|
16
|
-
"word1" => [{'word2' => ['DICT1'] }, 'DICT1'],
|
17
|
-
"word3" => [{'word4' => ['DICT2'] }],
|
18
|
-
"word4" => ['DICT2'],
|
19
|
-
}
|
20
|
-
end
|
21
|
-
|
22
|
-
def test_simplify
|
23
|
-
assert_equal('word1', DictionaryNER.simplify( "Word1"))
|
24
|
-
assert_equal('ACL', DictionaryNER.simplify("ACL"))
|
25
|
-
end
|
26
|
-
|
27
|
-
def test_chunk
|
28
|
-
assert_equal(["Word1","Word2"], DictionaryNER.chunk('Word1-Word2'))
|
29
|
-
assert_equal(["Word1-1"], DictionaryNER.chunk('Word1-1'))
|
30
|
-
end
|
31
|
-
|
32
|
-
def test_match
|
33
|
-
|
34
|
-
[
|
35
|
-
|
36
|
-
["Word1", {"word1" => ["D1"]}, {"Word1" => ["D1"]}],
|
37
|
-
|
38
|
-
["Word1 Word1", {"word1" => ["D1"]}, {"Word1" => ["D1"]}],
|
39
|
-
|
40
|
-
["Word2 Word1 Word3", {"word1" => ["D1"]}, {"Word1" => ["D1"]} ],
|
41
|
-
|
42
|
-
["Word2 Word1 Word4", {"word1" => ["D1","D2"]}, {"Word1" => ["D1","D2"]} ],
|
43
|
-
|
44
|
-
["Word2 Word1 Word4",
|
45
|
-
{"word1" => [{'word2' => ['D1']}]},
|
46
|
-
{} ],
|
47
|
-
|
48
|
-
[
|
49
|
-
"Word2 Word1 Word4",
|
50
|
-
{"word1" => [ {'word4' => ['D1']} ] },
|
51
|
-
{"Word1 Word4" => ["D1"]},
|
52
|
-
],
|
53
|
-
|
54
|
-
[
|
55
|
-
"Word2 Word1 Word4",
|
56
|
-
{"word1" => [ {'word4' => ['D1']} ], "word4" => ['D2'] },
|
57
|
-
{"Word1 Word4" => ["D1"], "Word4" => ['D2']},
|
58
|
-
],
|
59
|
-
|
60
|
-
|
61
|
-
].each{|match_info|
|
62
|
-
text = match_info[0]
|
63
|
-
dict = match_info[1]
|
64
|
-
result = match_info[2]
|
65
|
-
assert_equal(result, DictionaryNER.match(dict, text))
|
66
|
-
}
|
67
|
-
|
68
|
-
end
|
69
|
-
|
70
|
-
def test_add_name
|
71
|
-
|
72
|
-
[
|
73
|
-
|
74
|
-
["Word1", {"word1" => ['code']}],
|
75
|
-
|
76
|
-
["Word1 Word2", {"word1" => [{"word2" => ['code']}]}],
|
77
|
-
|
78
|
-
["Cerebellar stroke syndrome", {"cerebellar" => [{'stroke' => [{'syndrome' => ['code']}]}]}]
|
79
|
-
|
80
|
-
].each{|info|
|
81
|
-
name = info[0]
|
82
|
-
result = info[1]
|
83
|
-
|
84
|
-
dict = {}
|
85
|
-
DictionaryNER.add_name(dict, name, 'code')
|
86
|
-
assert_equal(result, dict)
|
87
|
-
}
|
88
|
-
|
89
|
-
end
|
90
|
-
|
91
|
-
def test_load
|
92
|
-
assert_equal(@dict, DictionaryNER.load(@dictionary))
|
93
|
-
end
|
94
|
-
|
95
|
-
def test_class
|
96
|
-
ner = DictionaryNER.new(@dictionary)
|
97
|
-
|
98
|
-
[
|
99
|
-
[ "Word1 Word2", ["Word1 Word2", "Word1"] ],
|
100
|
-
[ "foo Word1 Word2 foo", ["Word1 Word2", "Word1"] ],
|
101
|
-
[ "Word1-Word2", ["Word1 Word2", "Word1"] ],
|
102
|
-
[ "Word1\nWord2", ["Word1 Word2", "Word1"] ],
|
103
|
-
].each{|info|
|
104
|
-
text = info[0]
|
105
|
-
keys = info[1]
|
106
|
-
|
107
|
-
assert_equal(keys.sort, ner.match(text).keys.sort)
|
108
|
-
}
|
109
|
-
end
|
110
|
-
|
111
|
-
def test_load_from_file
|
112
|
-
tmpfile = TmpFile.tmp_file
|
113
|
-
|
114
|
-
Open.write(tmpfile, @dictionary)
|
115
|
-
|
116
|
-
ner = DictionaryNER.new(tmpfile)
|
117
|
-
|
118
|
-
assert(ner.match("Word1").any?)
|
119
|
-
end
|
120
|
-
|
121
|
-
end
|
122
|
-
|
@@ -1,33 +0,0 @@
|
|
1
|
-
require File.dirname(__FILE__) + '/../../test_helper'
|
2
|
-
require 'rbbt'
|
3
|
-
require 'rbbt/util/tmpfile'
|
4
|
-
require 'rbbt/ner/regexpNER'
|
5
|
-
require 'test/unit'
|
6
|
-
|
7
|
-
class TestRegExpNER < Test::Unit::TestCase
|
8
|
-
|
9
|
-
def test_class
|
10
|
-
text = "a bc d e f g h i j k l m n o p q one two"
|
11
|
-
|
12
|
-
lexicon =<<-EOF
|
13
|
-
C1,a,x,xx,xxx
|
14
|
-
C2,bc,y,yy,yyy
|
15
|
-
C3,i,z,zz,zzz,m,one two
|
16
|
-
EOF
|
17
|
-
|
18
|
-
file = TmpFile.tmp_file
|
19
|
-
File.open(file, 'w'){|f| f.write lexicon}
|
20
|
-
|
21
|
-
r = RegExpNER.new(file, :sep => ',', :stopwords => false)
|
22
|
-
assert_equal(['a', 'bc', 'i', 'm','one two'].sort,r.match_hash(text).values.flatten.sort)
|
23
|
-
|
24
|
-
r = RegExpNER.new(file, :sep => ',', :stopwords => true)
|
25
|
-
assert_equal(['bc', 'm','one two'].sort,r.match_hash(text).values.flatten.sort)
|
26
|
-
|
27
|
-
|
28
|
-
FileUtils.rm file
|
29
|
-
end
|
30
|
-
|
31
|
-
end
|
32
|
-
|
33
|
-
|
data/test/rbbt/ner/test_rner.rb
DELETED
@@ -1,126 +0,0 @@
|
|
1
|
-
require File.dirname(__FILE__) + '/../../test_helper'
|
2
|
-
require 'rbbt'
|
3
|
-
require 'rbbt/ner/rner'
|
4
|
-
require 'test/unit'
|
5
|
-
|
6
|
-
class TestRNer < Test::Unit::TestCase
|
7
|
-
|
8
|
-
def setup
|
9
|
-
@parser = NERFeatures.new do
|
10
|
-
isLetters /^[A-Z]+$/i
|
11
|
-
context prefix_3 /^(...)/
|
12
|
-
downcase do |w| w.downcase end
|
13
|
-
|
14
|
-
context %w(downcase)
|
15
|
-
end
|
16
|
-
end
|
17
|
-
|
18
|
-
def test_config
|
19
|
-
config = <<-EOC
|
20
|
-
isLetters(/^[A-Z]+$/i)
|
21
|
-
context(prefix_3(/^(...)/))
|
22
|
-
downcase { |w| w.downcase }
|
23
|
-
context(["downcase"])
|
24
|
-
EOC
|
25
|
-
|
26
|
-
assert(@parser.config == config)
|
27
|
-
end
|
28
|
-
|
29
|
-
def test_reverse
|
30
|
-
assert_equal("protein P53", NERFeatures.reverse("P53 protein"))
|
31
|
-
assert_equal(
|
32
|
-
". LH of assay - radioimmuno serum the with compared was LH urinary for ) GONAVIS - HI ( test hemagglutination direct new A",
|
33
|
-
NERFeatures.reverse(
|
34
|
-
"A new direct hemagglutination test (HI-GONAVIS) for urinary LH was compared with the serum\n radioimmuno-assay of LH."
|
35
|
-
))
|
36
|
-
end
|
37
|
-
|
38
|
-
def test_features
|
39
|
-
assert(@parser.features("abCdE"),["abCdE",true,'abc','abcde'])
|
40
|
-
end
|
41
|
-
|
42
|
-
def test_template
|
43
|
-
template =<<-EOT
|
44
|
-
UisLetters: %x[0,1]
|
45
|
-
Uprefix_3: %x[0,2]
|
46
|
-
Uprefix_3#1: %x[1,2]
|
47
|
-
Uprefix_3#-1: %x[-1,2]
|
48
|
-
Udowncase: %x[0,3]
|
49
|
-
Udowncase#1: %x[1,3]
|
50
|
-
Udowncase#-1: %x[-1,3]
|
51
|
-
B
|
52
|
-
EOT
|
53
|
-
|
54
|
-
assert(@parser.template == template)
|
55
|
-
end
|
56
|
-
|
57
|
-
def test_tokens
|
58
|
-
assert( NERFeatures.tokens("A new direct hemagglutination test (HI-GONAVIS) for urinary LH was compared with the serum\n radioimmuno-assay of LH.")==
|
59
|
-
["A", "new", "direct", "hemagglutination", "test", "(", "HI", "-", "GONAVIS", ")", "for", "urinary", "LH", "was", "compared", "with", "the", "serum", "radioimmuno", "-", "assay", "of", "LH", "."])
|
60
|
-
|
61
|
-
|
62
|
-
end
|
63
|
-
def test_text_features
|
64
|
-
|
65
|
-
assert(@parser.text_features("abCdE 1234") == [["abCdE",true, "abC", "abcde"], ["1234",false, "123", "1234"]])
|
66
|
-
assert(@parser.text_features("abCdE 1234",true) == [["abCdE",true, "abC", "abcde",1], ["1234",false, "123", "1234",2]])
|
67
|
-
assert(@parser.text_features("abCdE 1234",false) == [["abCdE",true, "abC", "abcde",0], ["1234",false, "123", "1234",0]])
|
68
|
-
|
69
|
-
end
|
70
|
-
|
71
|
-
def test_tagged_features
|
72
|
-
assert_equal(
|
73
|
-
[["phosphorilation",true, "pho", "phosphorilation", 0],
|
74
|
-
["of",true, false, "of", 0],
|
75
|
-
["GENE1",false, "GEN", "gene1", 1],
|
76
|
-
[".", false, false, ".", 0]],
|
77
|
-
@parser.tagged_features("phosphorilation of GENE1.",['GENE1']))
|
78
|
-
|
79
|
-
assert_equal(
|
80
|
-
[["GENE1",false, "GEN", "gene1", 1],
|
81
|
-
["phosphorilation",true, "pho", "phosphorilation", 0]],
|
82
|
-
@parser.tagged_features("GENE1 phosphorilation",['GENE1']))
|
83
|
-
|
84
|
-
|
85
|
-
assert_equal(
|
86
|
-
[["phosphorilation",true, "pho", "phosphorilation", 0],
|
87
|
-
["of",true, false, "of", 0],
|
88
|
-
["GENE",true, "GEN", "gene", 1],
|
89
|
-
["1",false, false, "1", 2],
|
90
|
-
[".", false, false, ".", 0]],
|
91
|
-
@parser.tagged_features("phosphorilation of GENE 1.",['GENE 1']))
|
92
|
-
end
|
93
|
-
|
94
|
-
def test_tagged_features_reverse
|
95
|
-
@parser.reverse = true
|
96
|
-
assert_equal(
|
97
|
-
[
|
98
|
-
["GENE1",false, "GEN", "gene1", 1],
|
99
|
-
["of",true, false, "of", 0],
|
100
|
-
["phosphorilation",true, "pho", "phosphorilation", 0]
|
101
|
-
],
|
102
|
-
@parser.tagged_features("phosphorilation of GENE1",['GENE1']))
|
103
|
-
|
104
|
-
assert_equal(
|
105
|
-
[
|
106
|
-
[".", false, false, ".", 0],
|
107
|
-
["1",false, false, "1", 1],
|
108
|
-
["GENE",true, "GEN", "gene", 2],
|
109
|
-
["of",true, false, "of", 0],
|
110
|
-
["phosphorilation",true, "pho", "phosphorilation", 0]
|
111
|
-
],
|
112
|
-
@parser.tagged_features("phosphorilation of GENE 1.",['GENE 1']))
|
113
|
-
end
|
114
|
-
|
115
|
-
|
116
|
-
def test_NER_default
|
117
|
-
parser = NERFeatures.new
|
118
|
-
|
119
|
-
assert(parser.template =~ /UisLetter/)
|
120
|
-
end
|
121
|
-
|
122
|
-
def test_CRFPP_install
|
123
|
-
assert(require File.join(Rbbt.datadir, 'third_party/crf++/ruby/CRFPP'))
|
124
|
-
end
|
125
|
-
|
126
|
-
end
|
data/test/rbbt/ner/test_rnorm.rb
DELETED
@@ -1,47 +0,0 @@
|
|
1
|
-
require File.dirname(__FILE__) + '/../../test_helper'
|
2
|
-
require 'rbbt/ner/rnorm'
|
3
|
-
require 'rbbt/util/open'
|
4
|
-
require 'rbbt/util/tmpfile'
|
5
|
-
require 'test/unit'
|
6
|
-
|
7
|
-
class TestRNORM < Test::Unit::TestCase
|
8
|
-
|
9
|
-
def setup
|
10
|
-
tmp = TmpFile.tmp_file("test-rnorm-")
|
11
|
-
lexicon =<<-EOT
|
12
|
-
S000000029 YAL031C GIP4 FUN21
|
13
|
-
S000000030 YAL032C PRP45 FUN20
|
14
|
-
S000000031 YAL033W POP5 FUN53
|
15
|
-
S000000374 YBR170C NPL4 HRD4
|
16
|
-
S000000375 GENE1 BBB CCC
|
17
|
-
S000000376 AAA GENE1 DDD
|
18
|
-
EOT
|
19
|
-
|
20
|
-
Open.write(tmp, lexicon)
|
21
|
-
|
22
|
-
@norm = Normalizer.new(tmp)
|
23
|
-
FileUtils.rm tmp
|
24
|
-
end
|
25
|
-
|
26
|
-
def test_match
|
27
|
-
assert_equal(["S000000029"], @norm.match("FUN21"))
|
28
|
-
assert_equal(["S000000030", "S000000029", "S000000031"], @norm.match("FUN"))
|
29
|
-
assert_equal(["S000000030", "S000000029", "S000000031"], @norm.match("FUN 2"))
|
30
|
-
assert_equal(["S000000030", "S000000029", "S000000031"], @norm.match("FUN 21"))
|
31
|
-
assert_equal([], @norm.match("GER4"))
|
32
|
-
|
33
|
-
@norm.match("FUN21")
|
34
|
-
end
|
35
|
-
|
36
|
-
def test_select
|
37
|
-
assert_equal(["S000000029"], @norm.select(["S000000030", "S000000029", "S000000031"],"FUN 21"))
|
38
|
-
end
|
39
|
-
|
40
|
-
def test_resolve
|
41
|
-
assert_equal(["S000000029"], @norm.resolve("FUN 21"))
|
42
|
-
end
|
43
|
-
|
44
|
-
def test_order
|
45
|
-
assert_equal(["S000000375"], @norm.resolve("GENE1"))
|
46
|
-
end
|
47
|
-
end
|
@@ -1,38 +0,0 @@
|
|
1
|
-
require File.dirname(__FILE__) + '/../../test_helper'
|
2
|
-
require 'rbbt/sources/biocreative'
|
3
|
-
require 'test/unit'
|
4
|
-
|
5
|
-
class TestBiocreative < Test::Unit::TestCase
|
6
|
-
|
7
|
-
def test_BC2GM
|
8
|
-
assert(Biocreative.BC2GM(:test)['BC2GM000008491'][:text] == "Phenotypic analysis demonstrates that trio and Abl cooperate in regulating axon outgrowth in the embryonic central nervous system (CNS).")
|
9
|
-
assert(Biocreative.BC2GM(:test)['BC2GM000008491'][:mentions] == ["trio", "Abl"] )
|
10
|
-
end
|
11
|
-
|
12
|
-
def test_position
|
13
|
-
mention = "IgA"
|
14
|
-
text = "Early complement components, C1q and C4, and IgA secretory piece were absent."
|
15
|
-
pos = [[38, 40]]
|
16
|
-
assert(Biocreative.position(text,mention) == pos)
|
17
|
-
|
18
|
-
mention = "tyrosine-specific phosphatase"
|
19
|
-
text = "When expressed in Escherichia coli, SH-PTP2 displays tyrosine-specific phosphatase activity."
|
20
|
-
pos = [[46, 73]]
|
21
|
-
assert(Biocreative.position(text,mention) == pos)
|
22
|
-
|
23
|
-
mention = "tyrosine - specific phosphatase"
|
24
|
-
text = "When expressed in Escherichia coli, SH-PTP2 displays tyrosine-specific phosphatase activity."
|
25
|
-
pos = [[46, 73]]
|
26
|
-
assert(Biocreative.position(text,mention) == pos)
|
27
|
-
|
28
|
-
mention = "LH"
|
29
|
-
text = "A new direct hemagglutination test (HI-GONAVIS) for urinary LH was compared with the serum radioimmuno-assay of LH."
|
30
|
-
pos = [[52, 53],[96, 97]]
|
31
|
-
assert(Biocreative.position(text,mention) == pos)
|
32
|
-
|
33
|
-
end
|
34
|
-
|
35
|
-
|
36
|
-
end
|
37
|
-
|
38
|
-
|
@@ -1,31 +0,0 @@
|
|
1
|
-
require File.dirname(__FILE__) + '/../../test_helper'
|
2
|
-
require 'rbbt/sources/biomart'
|
3
|
-
require 'test/unit'
|
4
|
-
|
5
|
-
class TestBioMart < Test::Unit::TestCase
|
6
|
-
|
7
|
-
def test_get
|
8
|
-
assert_raise BioMart::QueryError do
|
9
|
-
BioMart.get('scerevisiae_gene_ensembl','entrezgene', ['protein_id'],['with_unknownattr'])
|
10
|
-
end
|
11
|
-
|
12
|
-
data = BioMart.get('scerevisiae_gene_ensembl','entrezgene', ['protein_id'],[])
|
13
|
-
assert(data['856452']['protein_id'].include? 'AAB68382')
|
14
|
-
|
15
|
-
data = BioMart.get('scerevisiae_gene_ensembl','entrezgene', ['external_gene_id'],[], data )
|
16
|
-
assert(data['856452']['protein_id'].include? 'AAB68382')
|
17
|
-
assert(data['856452']['external_gene_id'].include? 'CUP1-2')
|
18
|
-
|
19
|
-
end
|
20
|
-
|
21
|
-
def test_query
|
22
|
-
data = BioMart.query('scerevisiae_gene_ensembl','entrezgene', ['protein_id','refseq_peptide','external_gene_id','ensembl_gene_id'],[])
|
23
|
-
|
24
|
-
assert(data['856452']['protein_id'].include? 'AAB68382')
|
25
|
-
assert(data['856452']['external_gene_id'].include? 'CUP1-2')
|
26
|
-
|
27
|
-
end
|
28
|
-
|
29
|
-
end
|
30
|
-
|
31
|
-
|
@@ -1,49 +0,0 @@
|
|
1
|
-
require File.dirname(__FILE__) + '/../../test_helper'
|
2
|
-
require 'rbbt/sources/entrez'
|
3
|
-
require 'test/unit'
|
4
|
-
|
5
|
-
class TestEntrez < Test::Unit::TestCase
|
6
|
-
|
7
|
-
def test_entrez2native
|
8
|
-
tax = 4932
|
9
|
-
fix = proc{|code| code.sub(/SGD:S0/,'S0') }
|
10
|
-
check = proc{|code| code.match(/^S0/)}
|
11
|
-
|
12
|
-
lexicon = Entrez.entrez2native(tax, 5, fix, check)
|
13
|
-
assert(lexicon['855611'].include? 'S000005056')
|
14
|
-
end
|
15
|
-
|
16
|
-
def test_entrez2pubmed
|
17
|
-
tax = 4932
|
18
|
-
|
19
|
-
data = Entrez.entrez2pubmed(tax)
|
20
|
-
assert(data['850320'].include? '15102838')
|
21
|
-
end
|
22
|
-
|
23
|
-
def test_getonline
|
24
|
-
geneids = 9129
|
25
|
-
|
26
|
-
assert_match(/PRP3 pre-mRNA processing factor/s, Entrez.get_online(geneids))
|
27
|
-
|
28
|
-
geneids = [9129,9]
|
29
|
-
assert_match(/PRP3 pre-mRNA processing factor/s, Entrez.get_online(geneids)[9129])
|
30
|
-
end
|
31
|
-
|
32
|
-
def test_getgene
|
33
|
-
geneids = 9129
|
34
|
-
assert_equal([["PRP3 pre-mRNA processing factor 3 homolog (S. cerevisiae)"]], Entrez.get_gene(geneids).description)
|
35
|
-
|
36
|
-
geneids = [9129,728049]
|
37
|
-
assert_equal([["PRP3 pre-mRNA processing factor 3 homolog (S. cerevisiae)"]], Entrez.get_gene(geneids)[9129].description)
|
38
|
-
|
39
|
-
end
|
40
|
-
|
41
|
-
def test_similarity
|
42
|
-
assert(Entrez.gene_text_similarity(9129, "PRP3 pre-mRNA processing factor 3 homolog (S. cerevisiae)") > 0)
|
43
|
-
assert_equal(0, Entrez.gene_text_similarity("NON EXISTEN GENEID", "PRP3 pre-mRNA processing factor 3 homolog (S. cerevisiae)"))
|
44
|
-
|
45
|
-
end
|
46
|
-
|
47
|
-
end
|
48
|
-
|
49
|
-
|