rbbt 1.2.5 → 2.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (88) hide show
  1. checksums.yaml +7 -0
  2. data/README.rdoc +2 -138
  3. metadata +69 -214
  4. data/LICENSE +0 -20
  5. data/bin/rbbt_config +0 -245
  6. data/install_scripts/classifier/R/classify.R +0 -36
  7. data/install_scripts/classifier/Rakefile +0 -140
  8. data/install_scripts/get_abner.sh +0 -2
  9. data/install_scripts/get_banner.sh +0 -25
  10. data/install_scripts/get_biocreative.sh +0 -72
  11. data/install_scripts/get_crf++.sh +0 -26
  12. data/install_scripts/get_entrez.sh +0 -4
  13. data/install_scripts/get_go.sh +0 -4
  14. data/install_scripts/get_polysearch.sh +0 -8
  15. data/install_scripts/ner/Rakefile +0 -206
  16. data/install_scripts/ner/config/default.rb +0 -52
  17. data/install_scripts/norm/Rakefile +0 -219
  18. data/install_scripts/norm/config/cue_default.rb +0 -10
  19. data/install_scripts/norm/config/tokens_default.rb +0 -86
  20. data/install_scripts/norm/functions.sh +0 -23
  21. data/install_scripts/organisms/Ath.Rakefile +0 -55
  22. data/install_scripts/organisms/Cal.Rakefile +0 -84
  23. data/install_scripts/organisms/Cel.Rakefile +0 -109
  24. data/install_scripts/organisms/Hsa.Rakefile +0 -140
  25. data/install_scripts/organisms/Mmu.Rakefile +0 -77
  26. data/install_scripts/organisms/Rakefile +0 -43
  27. data/install_scripts/organisms/Rno.Rakefile +0 -88
  28. data/install_scripts/organisms/Sce.Rakefile +0 -66
  29. data/install_scripts/organisms/Spo.Rakefile +0 -40
  30. data/install_scripts/organisms/rake-include.rb +0 -252
  31. data/install_scripts/wordlists/consonants +0 -897
  32. data/install_scripts/wordlists/stopwords +0 -1
  33. data/lib/rbbt.rb +0 -83
  34. data/lib/rbbt/bow/bow.rb +0 -88
  35. data/lib/rbbt/bow/classifier.rb +0 -116
  36. data/lib/rbbt/bow/dictionary.rb +0 -187
  37. data/lib/rbbt/ner/abner.rb +0 -34
  38. data/lib/rbbt/ner/banner.rb +0 -73
  39. data/lib/rbbt/ner/dictionaryNER.rb +0 -98
  40. data/lib/rbbt/ner/regexpNER.rb +0 -70
  41. data/lib/rbbt/ner/rner.rb +0 -227
  42. data/lib/rbbt/ner/rnorm.rb +0 -143
  43. data/lib/rbbt/ner/rnorm/cue_index.rb +0 -80
  44. data/lib/rbbt/ner/rnorm/tokens.rb +0 -217
  45. data/lib/rbbt/sources/biocreative.rb +0 -75
  46. data/lib/rbbt/sources/biomart.rb +0 -105
  47. data/lib/rbbt/sources/entrez.rb +0 -211
  48. data/lib/rbbt/sources/go.rb +0 -85
  49. data/lib/rbbt/sources/gscholar.rb +0 -74
  50. data/lib/rbbt/sources/organism.rb +0 -241
  51. data/lib/rbbt/sources/polysearch.rb +0 -117
  52. data/lib/rbbt/sources/pubmed.rb +0 -248
  53. data/lib/rbbt/util/arrayHash.rb +0 -266
  54. data/lib/rbbt/util/filecache.rb +0 -72
  55. data/lib/rbbt/util/index.rb +0 -47
  56. data/lib/rbbt/util/misc.rb +0 -106
  57. data/lib/rbbt/util/open.rb +0 -251
  58. data/lib/rbbt/util/rake.rb +0 -183
  59. data/lib/rbbt/util/simpleDSL.rb +0 -87
  60. data/lib/rbbt/util/tmpfile.rb +0 -35
  61. data/tasks/install.rake +0 -124
  62. data/test/rbbt/bow/test_bow.rb +0 -33
  63. data/test/rbbt/bow/test_classifier.rb +0 -72
  64. data/test/rbbt/bow/test_dictionary.rb +0 -91
  65. data/test/rbbt/ner/rnorm/test_cue_index.rb +0 -57
  66. data/test/rbbt/ner/rnorm/test_tokens.rb +0 -70
  67. data/test/rbbt/ner/test_abner.rb +0 -17
  68. data/test/rbbt/ner/test_banner.rb +0 -17
  69. data/test/rbbt/ner/test_dictionaryNER.rb +0 -122
  70. data/test/rbbt/ner/test_regexpNER.rb +0 -33
  71. data/test/rbbt/ner/test_rner.rb +0 -126
  72. data/test/rbbt/ner/test_rnorm.rb +0 -47
  73. data/test/rbbt/sources/test_biocreative.rb +0 -38
  74. data/test/rbbt/sources/test_biomart.rb +0 -31
  75. data/test/rbbt/sources/test_entrez.rb +0 -49
  76. data/test/rbbt/sources/test_go.rb +0 -24
  77. data/test/rbbt/sources/test_organism.rb +0 -59
  78. data/test/rbbt/sources/test_polysearch.rb +0 -27
  79. data/test/rbbt/sources/test_pubmed.rb +0 -39
  80. data/test/rbbt/util/test_arrayHash.rb +0 -257
  81. data/test/rbbt/util/test_filecache.rb +0 -37
  82. data/test/rbbt/util/test_index.rb +0 -31
  83. data/test/rbbt/util/test_misc.rb +0 -20
  84. data/test/rbbt/util/test_open.rb +0 -110
  85. data/test/rbbt/util/test_simpleDSL.rb +0 -57
  86. data/test/rbbt/util/test_tmpfile.rb +0 -21
  87. data/test/test_helper.rb +0 -4
  88. data/test/test_rbbt.rb +0 -11
@@ -1,24 +0,0 @@
1
- require File.dirname(__FILE__) + '/../../test_helper'
2
-
3
- require 'rbbt/sources/go'
4
- require 'test/unit'
5
-
6
- class TestGo < Test::Unit::TestCase
7
-
8
- def test_go
9
- assert_match('vacuole inheritance',GO::id2name('GO:0000011'))
10
- assert_equal(['vacuole inheritance','alpha-glucoside transport'], GO::id2name(['GO:0000011','GO:0000017']))
11
- end
12
-
13
- def test_ancestors
14
- assert GO.id2ancestors('GO:0000001').include? 'GO:0048308'
15
- end
16
-
17
- def test_namespace
18
- assert_equal 'biological_process', GO.id2namespace('GO:0000001')
19
- end
20
-
21
-
22
- end
23
-
24
-
@@ -1,59 +0,0 @@
1
- require File.dirname(__FILE__) + '/../../test_helper'
2
- require 'rbbt/sources/organism'
3
- require 'test/unit'
4
-
5
- class TestOrganism < Test::Unit::TestCase
6
-
7
- def test_all
8
- assert Organism.all.include? 'Sce'
9
- end
10
-
11
- def test_ner
12
- assert(Organism.ner(:Sce, :abner).is_a? Abner)
13
- end
14
-
15
- def test_norm
16
- assert_equal(["S000003008"], Organism.norm(:Sce).select(['S000029454','S000003008'],'SLU1', 'SLU1 has been used in the literature to refer to both HEM2/YGL040C, which encodes a porphobilinogen synthase and SLU1, which is essential for splicing.'))
17
-
18
- end
19
-
20
- def test_supported_ids
21
-
22
- ids = Organism.supported_ids('Sce', :examples => true)
23
- assert(ids.first[0] == 'SGD DB Id' && ids.first[1] =~ /^S00/)
24
-
25
- ids = Organism.supported_ids('Sce')
26
- assert(ids.first == 'SGD DB Id')
27
- end
28
-
29
- def test_index
30
- index = Organism.id_index('Sce')
31
- assert_equal("S000004431", index['851160'])
32
- end
33
-
34
- def test_index_partial
35
- index = Organism.id_index('Sce',:other => ['Ensembl Gene ID', 'Protein ID'])
36
- assert_nil(index['851160'])
37
- assert_equal("S000000838", index['YER036C'])
38
-
39
- index = Organism.id_index('Sce',:other => ['Ensembl Gene ID', 'Protein ID'], :native => "Entrez Gene ID")
40
- assert_equal("856758", index['YER036C'])
41
-
42
- end
43
-
44
- def test_go_terms
45
-
46
- begin
47
- goterms = Organism.goterms('Sce')
48
- assert(goterms["S000000838"].include? "GO:0016887")
49
- rescue
50
- puts $!
51
- puts "No goterms produced, see if it is all installed"
52
- end
53
-
54
- end
55
-
56
-
57
- end
58
-
59
-
@@ -1,27 +0,0 @@
1
- require File.dirname(__FILE__) + '/../../test_helper'
2
- require 'rbbt'
3
- require 'rbbt/util/tmpfile'
4
- require 'rbbt/sources/polysearch'
5
- require 'test/unit'
6
-
7
- class TestPolysearch < Test::Unit::TestCase
8
-
9
- def test_match
10
- text =<<-EOT
11
-
12
- Analysis of sorted peripheral blood lymphocytes (CD8 T cells, CD4 T cells,
13
- B cells, NK cells) from patients with melanoma. These subpopulations are
14
- involved in antitumor responses and negatively impacted by cancer. Results
15
- provide insight into molecular mechanisms of immune dysfunction in cancer.
16
-
17
- EOT
18
-
19
- assert_equal(["B cells", "T cells", "blood", "lymphocytes", "peripheral blood", "peripheral blood lymphocytes"].sort, Polysearch.match(text,nil).values.flatten.uniq.sort)
20
- end
21
-
22
- def test_name
23
- assert_equal('ligament', Polysearch.name('organ','OR00039'))
24
- end
25
- end
26
-
27
-
@@ -1,39 +0,0 @@
1
- require File.dirname(__FILE__) + '/../../test_helper'
2
- require 'rbbt/sources/pubmed'
3
- require 'test/unit'
4
-
5
- class TestPubMed < Test::Unit::TestCase
6
-
7
- def test_get_online
8
- pmid = '16438716'
9
- assert(PubMed.get_online(pmid) =~ /Discovering semantic features in the literature: a foundation for building functional associations./)
10
-
11
- pmids = ['16438716', 17204154]
12
- assert(PubMed.get_online(pmids)[pmid] =~ /Discovering semantic features in the literature: a foundation for building functional associations./)
13
- end
14
-
15
- def test_get_article
16
- pmid = '16438716'
17
- assert(PubMed.get_article(pmid).title == "Discovering semantic features in the literature: a foundation for building functional associations.")
18
-
19
- pmids = ['16438716', 17204154]
20
- assert(PubMed.get_article(pmids)[pmid].title == "Discovering semantic features in the literature: a foundation for building functional associations.")
21
- end
22
-
23
- def test_full_text
24
- pmid = '16438716'
25
- assert(PubMed.get_article(pmid).full_text =~ /Discovering/)
26
- end
27
-
28
- def test_query
29
- assert(PubMed.query('chagoyen[All Fields] AND ("loattrfull text"[sb] AND hasabstract[text])').include? '16438716')
30
- end
31
-
32
- def test_bibentry
33
- assert("vazquez2008sent", PubMed::Article.make_bibentry('vazquez', 2008, "SENT: Semantic features in text"))
34
- assert("vazquez2008aes", PubMed::Article.make_bibentry('vazquez', 2008, "An Example System"))
35
- end
36
-
37
- end
38
-
39
-
@@ -1,257 +0,0 @@
1
- require File.dirname(__FILE__) + '/../../test_helper'
2
- require 'rbbt/util/arrayHash'
3
- require 'test/unit'
4
-
5
- class TestArrayHash < Test::Unit::TestCase
6
-
7
- def test_merge_values
8
- list1 = ["A|B","C"]
9
- list2 = ["a|b","c"]
10
- list3 = ["a|b",""]
11
- list4 = nil
12
-
13
- assert_equal(["A|B|a|b","C|c"], ArrayHash.merge_values(list1,list2))
14
-
15
- assert_equal(["A|B|a|b","C"], ArrayHash.merge_values(list1,list3))
16
-
17
- assert_equal(["a|b|A|B","C"], ArrayHash.merge_values(list3,list1))
18
-
19
- assert_equal(["A|B","C"], ArrayHash.merge_values(list4,list1))
20
- end
21
-
22
- def test_pullout
23
- data_in = {
24
- "1" => ['A|B','C'],
25
- "2" => ['a|b','c']
26
- }
27
-
28
- data_out0 = {
29
- 'A' => ["1",'C'],
30
- 'B' => ["1",'C'],
31
- 'a' => ["2",'c'],
32
- 'b' => ["2",'c'],
33
- }
34
-
35
- data_out0_ci = {
36
- 'a' => ["1|2",'C|c'],
37
- 'b' => ["1|2",'C|c'],
38
- }
39
-
40
-
41
-
42
- data_out1 = {
43
- 'C' => ["1",'A|B'],
44
- 'c' => ["2",'a|b'],
45
- }
46
-
47
-
48
- assert_equal(data_out0, ArrayHash.pullout(data_in,0, :case_insensitive => false))
49
- assert_equal(data_out1, ArrayHash.pullout(data_in,1, :case_insensitive => false))
50
- assert_equal(data_out0_ci, ArrayHash.pullout(data_in,0,:case_insensitive => true))
51
-
52
- assert_equal("1|2", ArrayHash.pullout(data_in,0,:case_insensitive => true, :index => true)['A'])
53
- assert_equal("1|2", ArrayHash.pullout(data_in,0,:case_insensitive => true, :index => true)['a'])
54
-
55
- end
56
-
57
- def test_merge
58
- hash1 = {
59
- '1' => ['A','B'],
60
- '2' => ['a','b'],
61
- }
62
-
63
- hash2 = {
64
- '1' => ['C']
65
- }
66
-
67
- hash_merged1 = {
68
- '1' => ['A','B','C'],
69
- '2' => ['a','b','']
70
- }
71
-
72
- hash3 = {
73
- 'A' => ['D']
74
- }
75
-
76
- hash_merged2 = {
77
- '1' => ['A','B','D'],
78
- '2' => ['a','b','']
79
- }
80
-
81
- hash4 = {
82
- 'D' => ['1']
83
- }
84
-
85
-
86
- assert_equal(hash_merged1, ArrayHash.merge(hash1, hash2, 'main', 'main', :case_insensitive => false))
87
- assert_equal(hash_merged2, ArrayHash.merge(hash1, hash3, 0, 'main', :case_insensitive => false))
88
- assert_equal(hash_merged2, ArrayHash.merge(hash1, hash4, 'main', 0, :case_insensitive => false))
89
- end
90
-
91
- def test_case_insensitive
92
- hash1 = {
93
- 'c' => ['A','B'],
94
- 'd' => ['a','b'],
95
- }
96
-
97
- hash2 = {
98
- 'C' => ['D']
99
- }
100
-
101
- hash_merged1 = {
102
- 'c' => ['A','B',''],
103
- 'd' => ['a','b',''],
104
- 'C' => ['','','D']
105
- }
106
-
107
- hash_merged2 = {
108
- 'c' => ['A','B','D'],
109
- 'd' => ['a','b',''],
110
- }
111
-
112
- assert_equal(hash_merged1, ArrayHash.merge(hash1, hash2, 'main', 'main', :case_insensitive => false))
113
- assert_equal(hash_merged2, ArrayHash.merge(hash1, hash2, 'main', 'main', :case_insensitive => true))
114
-
115
- end
116
-
117
- def test_clean
118
- data = {
119
- '1' => ['A','B'],
120
- '2' => ['a','A'],
121
- }
122
- data_clean = {
123
- '1' => ['A','B'],
124
- '2' => ['a',''],
125
- }
126
- assert_equal(data_clean, ArrayHash.clean(data))
127
-
128
- data = {
129
- '1' => ['A','B'],
130
- '2' => ['a','A|b'],
131
- }
132
- data_clean = {
133
- '1' => ['A','B'],
134
- '2' => ['a','b'],
135
- }
136
- assert_equal(data_clean, ArrayHash.clean(data))
137
-
138
- data = {
139
- '1' => ['A','B'],
140
- '2' => ['A|a','b'],
141
- }
142
- data_clean = {
143
- '1' => ['A','B'],
144
- '2' => ['a','b'],
145
- }
146
- assert_equal(data_clean, ArrayHash.clean(data))
147
-
148
-
149
- data = {
150
- '1' => ['a1','a2'],
151
- '2' => ['a3','a4|A1'],
152
- }
153
- data_clean = {
154
- '1' => ['a1','a2'],
155
- '2' => ['a3','a4'],
156
- }
157
- assert_equal(data, ArrayHash.clean(data))
158
- assert_equal(data_clean, ArrayHash.clean(data, :case_sensitive => true))
159
-
160
-
161
- end
162
-
163
-
164
- def test_field_pos
165
- data = {
166
- '1' => ['A','B'],
167
- '2' => ['a','b'],
168
- }
169
-
170
- table = ArrayHash.new(table, 'Entrez', ['FA', 'FB'])
171
-
172
- assert_equal(0, table.field_pos('FA'))
173
- assert_equal(:main, table.field_pos('Entrez'))
174
- assert_equal(:main, table.field_pos('entrez'))
175
-
176
- end
177
-
178
- def test_object_merge
179
- data1 = {
180
- '1' => ['A','B'],
181
- '2' => ['a','b'],
182
- }
183
- table1 = ArrayHash.new(data1, 'Entrez', ['FA', 'FB'])
184
-
185
- data2 = {
186
- '1' => ['C']
187
- }
188
- table2 = ArrayHash.new(data2, 'Entrez', ['FC'])
189
-
190
- hash_merged1 = {
191
- '1' => ['A','B','C'],
192
- '2' => ['a','b','']
193
- }
194
- names1 = %w(FA FB FC)
195
-
196
- table1.merge(table2, 'Entrez', :case_insensitive => false)
197
- assert_equal(hash_merged1, table1.data)
198
- assert_equal(names1, table1.fields)
199
-
200
-
201
-
202
- data3 = {
203
- 'b' => ['d']
204
- }
205
- table3 = ArrayHash.new(data3, 'FB', ['FD'])
206
-
207
- hash_merged2 = {
208
- '1' => ['A','B','C',''],
209
- '2' => ['a','b','','d']
210
- }
211
- names2 = %w(FA FB FC FD)
212
-
213
-
214
- table1.merge(table3, 'FB', :case_insensitive => false)
215
- assert_equal(hash_merged2, table1.data)
216
- assert_equal(names2, table1.fields)
217
- end
218
-
219
- def test_remove
220
- data = {
221
- '1' => ['A','B'],
222
- '2' => ['a','b'],
223
- }
224
- data2 = {
225
- '1' => ['B'],
226
- '2' => ['b'],
227
- }
228
-
229
-
230
- table = ArrayHash.new(data, 'Entrez', ['FA', 'FB'])
231
- table.remove('FA')
232
-
233
- assert_equal(nil, table.field_pos('FA'))
234
- assert_equal(['FB'], table.fields)
235
- assert_equal(data2, table.data)
236
- end
237
-
238
- def test_process
239
- data_in = {
240
- '1' => ['A','B'],
241
- '2' => ['a','b'],
242
- }
243
- data_out = {
244
- '1' => ['FA(A)','B'],
245
- '2' => ['FA(a)','b'],
246
- }
247
-
248
- table = ArrayHash.new(data_in, 'Entrez', ['FA', 'FB'])
249
-
250
- table.process('FA'){|n| "FA(#{n})"}
251
-
252
- assert_equal(data_out, table.data)
253
- end
254
-
255
- end
256
-
257
-
@@ -1,37 +0,0 @@
1
- require File.dirname(__FILE__) + '/../../test_helper'
2
- require 'rbbt'
3
- require 'rbbt/util/filecache'
4
- require 'test/unit'
5
-
6
- class TestFileCache < Test::Unit::TestCase
7
-
8
- def setup
9
- @cachedir = Rbbt.cachedir
10
- end
11
-
12
- def test_escape
13
- path = '/etc/password'
14
- assert_equal('_SLASH_etc_SLASH_password',FileCache.clean_path(path))
15
- end
16
-
17
- def test_path
18
- assert_equal(File.expand_path(FileCache.path('123456789.xml')), File.expand_path(File.join(@cachedir, '/5/6/7/8/9/123456789.xml')))
19
- assert_equal(File.expand_path(FileCache.path('12.xml')), File.expand_path(File.join(@cachedir, '/1/2/12.xml')))
20
-
21
- assert_raise(FileCache::BadPathError){FileCache.path('/etc/passwd')}
22
- end
23
-
24
- def test_add_read
25
- filename = 'test_file_cache.txt'
26
- content = 'hello'
27
-
28
- FileCache.del_file(filename)
29
- FileCache.add_file(filename, content)
30
- assert_raise(FileCache::FileExistsError){FileCache.add_file(filename,'')}
31
- assert_nothing_raised{FileCache.add_file(filename,'',:force => true)}
32
- FileCache.del_file(filename)
33
-
34
- end
35
-
36
-
37
- end