rbbt 1.2.5 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (88) hide show
  1. checksums.yaml +7 -0
  2. data/README.rdoc +2 -138
  3. metadata +69 -214
  4. data/LICENSE +0 -20
  5. data/bin/rbbt_config +0 -245
  6. data/install_scripts/classifier/R/classify.R +0 -36
  7. data/install_scripts/classifier/Rakefile +0 -140
  8. data/install_scripts/get_abner.sh +0 -2
  9. data/install_scripts/get_banner.sh +0 -25
  10. data/install_scripts/get_biocreative.sh +0 -72
  11. data/install_scripts/get_crf++.sh +0 -26
  12. data/install_scripts/get_entrez.sh +0 -4
  13. data/install_scripts/get_go.sh +0 -4
  14. data/install_scripts/get_polysearch.sh +0 -8
  15. data/install_scripts/ner/Rakefile +0 -206
  16. data/install_scripts/ner/config/default.rb +0 -52
  17. data/install_scripts/norm/Rakefile +0 -219
  18. data/install_scripts/norm/config/cue_default.rb +0 -10
  19. data/install_scripts/norm/config/tokens_default.rb +0 -86
  20. data/install_scripts/norm/functions.sh +0 -23
  21. data/install_scripts/organisms/Ath.Rakefile +0 -55
  22. data/install_scripts/organisms/Cal.Rakefile +0 -84
  23. data/install_scripts/organisms/Cel.Rakefile +0 -109
  24. data/install_scripts/organisms/Hsa.Rakefile +0 -140
  25. data/install_scripts/organisms/Mmu.Rakefile +0 -77
  26. data/install_scripts/organisms/Rakefile +0 -43
  27. data/install_scripts/organisms/Rno.Rakefile +0 -88
  28. data/install_scripts/organisms/Sce.Rakefile +0 -66
  29. data/install_scripts/organisms/Spo.Rakefile +0 -40
  30. data/install_scripts/organisms/rake-include.rb +0 -252
  31. data/install_scripts/wordlists/consonants +0 -897
  32. data/install_scripts/wordlists/stopwords +0 -1
  33. data/lib/rbbt.rb +0 -83
  34. data/lib/rbbt/bow/bow.rb +0 -88
  35. data/lib/rbbt/bow/classifier.rb +0 -116
  36. data/lib/rbbt/bow/dictionary.rb +0 -187
  37. data/lib/rbbt/ner/abner.rb +0 -34
  38. data/lib/rbbt/ner/banner.rb +0 -73
  39. data/lib/rbbt/ner/dictionaryNER.rb +0 -98
  40. data/lib/rbbt/ner/regexpNER.rb +0 -70
  41. data/lib/rbbt/ner/rner.rb +0 -227
  42. data/lib/rbbt/ner/rnorm.rb +0 -143
  43. data/lib/rbbt/ner/rnorm/cue_index.rb +0 -80
  44. data/lib/rbbt/ner/rnorm/tokens.rb +0 -217
  45. data/lib/rbbt/sources/biocreative.rb +0 -75
  46. data/lib/rbbt/sources/biomart.rb +0 -105
  47. data/lib/rbbt/sources/entrez.rb +0 -211
  48. data/lib/rbbt/sources/go.rb +0 -85
  49. data/lib/rbbt/sources/gscholar.rb +0 -74
  50. data/lib/rbbt/sources/organism.rb +0 -241
  51. data/lib/rbbt/sources/polysearch.rb +0 -117
  52. data/lib/rbbt/sources/pubmed.rb +0 -248
  53. data/lib/rbbt/util/arrayHash.rb +0 -266
  54. data/lib/rbbt/util/filecache.rb +0 -72
  55. data/lib/rbbt/util/index.rb +0 -47
  56. data/lib/rbbt/util/misc.rb +0 -106
  57. data/lib/rbbt/util/open.rb +0 -251
  58. data/lib/rbbt/util/rake.rb +0 -183
  59. data/lib/rbbt/util/simpleDSL.rb +0 -87
  60. data/lib/rbbt/util/tmpfile.rb +0 -35
  61. data/tasks/install.rake +0 -124
  62. data/test/rbbt/bow/test_bow.rb +0 -33
  63. data/test/rbbt/bow/test_classifier.rb +0 -72
  64. data/test/rbbt/bow/test_dictionary.rb +0 -91
  65. data/test/rbbt/ner/rnorm/test_cue_index.rb +0 -57
  66. data/test/rbbt/ner/rnorm/test_tokens.rb +0 -70
  67. data/test/rbbt/ner/test_abner.rb +0 -17
  68. data/test/rbbt/ner/test_banner.rb +0 -17
  69. data/test/rbbt/ner/test_dictionaryNER.rb +0 -122
  70. data/test/rbbt/ner/test_regexpNER.rb +0 -33
  71. data/test/rbbt/ner/test_rner.rb +0 -126
  72. data/test/rbbt/ner/test_rnorm.rb +0 -47
  73. data/test/rbbt/sources/test_biocreative.rb +0 -38
  74. data/test/rbbt/sources/test_biomart.rb +0 -31
  75. data/test/rbbt/sources/test_entrez.rb +0 -49
  76. data/test/rbbt/sources/test_go.rb +0 -24
  77. data/test/rbbt/sources/test_organism.rb +0 -59
  78. data/test/rbbt/sources/test_polysearch.rb +0 -27
  79. data/test/rbbt/sources/test_pubmed.rb +0 -39
  80. data/test/rbbt/util/test_arrayHash.rb +0 -257
  81. data/test/rbbt/util/test_filecache.rb +0 -37
  82. data/test/rbbt/util/test_index.rb +0 -31
  83. data/test/rbbt/util/test_misc.rb +0 -20
  84. data/test/rbbt/util/test_open.rb +0 -110
  85. data/test/rbbt/util/test_simpleDSL.rb +0 -57
  86. data/test/rbbt/util/test_tmpfile.rb +0 -21
  87. data/test/test_helper.rb +0 -4
  88. data/test/test_rbbt.rb +0 -11
@@ -1,24 +0,0 @@
1
- require File.dirname(__FILE__) + '/../../test_helper'
2
-
3
- require 'rbbt/sources/go'
4
- require 'test/unit'
5
-
6
- class TestGo < Test::Unit::TestCase
7
-
8
- def test_go
9
- assert_match('vacuole inheritance',GO::id2name('GO:0000011'))
10
- assert_equal(['vacuole inheritance','alpha-glucoside transport'], GO::id2name(['GO:0000011','GO:0000017']))
11
- end
12
-
13
- def test_ancestors
14
- assert GO.id2ancestors('GO:0000001').include? 'GO:0048308'
15
- end
16
-
17
- def test_namespace
18
- assert_equal 'biological_process', GO.id2namespace('GO:0000001')
19
- end
20
-
21
-
22
- end
23
-
24
-
@@ -1,59 +0,0 @@
1
- require File.dirname(__FILE__) + '/../../test_helper'
2
- require 'rbbt/sources/organism'
3
- require 'test/unit'
4
-
5
- class TestOrganism < Test::Unit::TestCase
6
-
7
- def test_all
8
- assert Organism.all.include? 'Sce'
9
- end
10
-
11
- def test_ner
12
- assert(Organism.ner(:Sce, :abner).is_a? Abner)
13
- end
14
-
15
- def test_norm
16
- assert_equal(["S000003008"], Organism.norm(:Sce).select(['S000029454','S000003008'],'SLU1', 'SLU1 has been used in the literature to refer to both HEM2/YGL040C, which encodes a porphobilinogen synthase and SLU1, which is essential for splicing.'))
17
-
18
- end
19
-
20
- def test_supported_ids
21
-
22
- ids = Organism.supported_ids('Sce', :examples => true)
23
- assert(ids.first[0] == 'SGD DB Id' && ids.first[1] =~ /^S00/)
24
-
25
- ids = Organism.supported_ids('Sce')
26
- assert(ids.first == 'SGD DB Id')
27
- end
28
-
29
- def test_index
30
- index = Organism.id_index('Sce')
31
- assert_equal("S000004431", index['851160'])
32
- end
33
-
34
- def test_index_partial
35
- index = Organism.id_index('Sce',:other => ['Ensembl Gene ID', 'Protein ID'])
36
- assert_nil(index['851160'])
37
- assert_equal("S000000838", index['YER036C'])
38
-
39
- index = Organism.id_index('Sce',:other => ['Ensembl Gene ID', 'Protein ID'], :native => "Entrez Gene ID")
40
- assert_equal("856758", index['YER036C'])
41
-
42
- end
43
-
44
- def test_go_terms
45
-
46
- begin
47
- goterms = Organism.goterms('Sce')
48
- assert(goterms["S000000838"].include? "GO:0016887")
49
- rescue
50
- puts $!
51
- puts "No goterms produced, see if it is all installed"
52
- end
53
-
54
- end
55
-
56
-
57
- end
58
-
59
-
@@ -1,27 +0,0 @@
1
- require File.dirname(__FILE__) + '/../../test_helper'
2
- require 'rbbt'
3
- require 'rbbt/util/tmpfile'
4
- require 'rbbt/sources/polysearch'
5
- require 'test/unit'
6
-
7
- class TestPolysearch < Test::Unit::TestCase
8
-
9
- def test_match
10
- text =<<-EOT
11
-
12
- Analysis of sorted peripheral blood lymphocytes (CD8 T cells, CD4 T cells,
13
- B cells, NK cells) from patients with melanoma. These subpopulations are
14
- involved in antitumor responses and negatively impacted by cancer. Results
15
- provide insight into molecular mechanisms of immune dysfunction in cancer.
16
-
17
- EOT
18
-
19
- assert_equal(["B cells", "T cells", "blood", "lymphocytes", "peripheral blood", "peripheral blood lymphocytes"].sort, Polysearch.match(text,nil).values.flatten.uniq.sort)
20
- end
21
-
22
- def test_name
23
- assert_equal('ligament', Polysearch.name('organ','OR00039'))
24
- end
25
- end
26
-
27
-
@@ -1,39 +0,0 @@
1
- require File.dirname(__FILE__) + '/../../test_helper'
2
- require 'rbbt/sources/pubmed'
3
- require 'test/unit'
4
-
5
- class TestPubMed < Test::Unit::TestCase
6
-
7
- def test_get_online
8
- pmid = '16438716'
9
- assert(PubMed.get_online(pmid) =~ /Discovering semantic features in the literature: a foundation for building functional associations./)
10
-
11
- pmids = ['16438716', 17204154]
12
- assert(PubMed.get_online(pmids)[pmid] =~ /Discovering semantic features in the literature: a foundation for building functional associations./)
13
- end
14
-
15
- def test_get_article
16
- pmid = '16438716'
17
- assert(PubMed.get_article(pmid).title == "Discovering semantic features in the literature: a foundation for building functional associations.")
18
-
19
- pmids = ['16438716', 17204154]
20
- assert(PubMed.get_article(pmids)[pmid].title == "Discovering semantic features in the literature: a foundation for building functional associations.")
21
- end
22
-
23
- def test_full_text
24
- pmid = '16438716'
25
- assert(PubMed.get_article(pmid).full_text =~ /Discovering/)
26
- end
27
-
28
- def test_query
29
- assert(PubMed.query('chagoyen[All Fields] AND ("loattrfull text"[sb] AND hasabstract[text])').include? '16438716')
30
- end
31
-
32
- def test_bibentry
33
- assert("vazquez2008sent", PubMed::Article.make_bibentry('vazquez', 2008, "SENT: Semantic features in text"))
34
- assert("vazquez2008aes", PubMed::Article.make_bibentry('vazquez', 2008, "An Example System"))
35
- end
36
-
37
- end
38
-
39
-
@@ -1,257 +0,0 @@
1
- require File.dirname(__FILE__) + '/../../test_helper'
2
- require 'rbbt/util/arrayHash'
3
- require 'test/unit'
4
-
5
- class TestArrayHash < Test::Unit::TestCase
6
-
7
- def test_merge_values
8
- list1 = ["A|B","C"]
9
- list2 = ["a|b","c"]
10
- list3 = ["a|b",""]
11
- list4 = nil
12
-
13
- assert_equal(["A|B|a|b","C|c"], ArrayHash.merge_values(list1,list2))
14
-
15
- assert_equal(["A|B|a|b","C"], ArrayHash.merge_values(list1,list3))
16
-
17
- assert_equal(["a|b|A|B","C"], ArrayHash.merge_values(list3,list1))
18
-
19
- assert_equal(["A|B","C"], ArrayHash.merge_values(list4,list1))
20
- end
21
-
22
- def test_pullout
23
- data_in = {
24
- "1" => ['A|B','C'],
25
- "2" => ['a|b','c']
26
- }
27
-
28
- data_out0 = {
29
- 'A' => ["1",'C'],
30
- 'B' => ["1",'C'],
31
- 'a' => ["2",'c'],
32
- 'b' => ["2",'c'],
33
- }
34
-
35
- data_out0_ci = {
36
- 'a' => ["1|2",'C|c'],
37
- 'b' => ["1|2",'C|c'],
38
- }
39
-
40
-
41
-
42
- data_out1 = {
43
- 'C' => ["1",'A|B'],
44
- 'c' => ["2",'a|b'],
45
- }
46
-
47
-
48
- assert_equal(data_out0, ArrayHash.pullout(data_in,0, :case_insensitive => false))
49
- assert_equal(data_out1, ArrayHash.pullout(data_in,1, :case_insensitive => false))
50
- assert_equal(data_out0_ci, ArrayHash.pullout(data_in,0,:case_insensitive => true))
51
-
52
- assert_equal("1|2", ArrayHash.pullout(data_in,0,:case_insensitive => true, :index => true)['A'])
53
- assert_equal("1|2", ArrayHash.pullout(data_in,0,:case_insensitive => true, :index => true)['a'])
54
-
55
- end
56
-
57
- def test_merge
58
- hash1 = {
59
- '1' => ['A','B'],
60
- '2' => ['a','b'],
61
- }
62
-
63
- hash2 = {
64
- '1' => ['C']
65
- }
66
-
67
- hash_merged1 = {
68
- '1' => ['A','B','C'],
69
- '2' => ['a','b','']
70
- }
71
-
72
- hash3 = {
73
- 'A' => ['D']
74
- }
75
-
76
- hash_merged2 = {
77
- '1' => ['A','B','D'],
78
- '2' => ['a','b','']
79
- }
80
-
81
- hash4 = {
82
- 'D' => ['1']
83
- }
84
-
85
-
86
- assert_equal(hash_merged1, ArrayHash.merge(hash1, hash2, 'main', 'main', :case_insensitive => false))
87
- assert_equal(hash_merged2, ArrayHash.merge(hash1, hash3, 0, 'main', :case_insensitive => false))
88
- assert_equal(hash_merged2, ArrayHash.merge(hash1, hash4, 'main', 0, :case_insensitive => false))
89
- end
90
-
91
- def test_case_insensitive
92
- hash1 = {
93
- 'c' => ['A','B'],
94
- 'd' => ['a','b'],
95
- }
96
-
97
- hash2 = {
98
- 'C' => ['D']
99
- }
100
-
101
- hash_merged1 = {
102
- 'c' => ['A','B',''],
103
- 'd' => ['a','b',''],
104
- 'C' => ['','','D']
105
- }
106
-
107
- hash_merged2 = {
108
- 'c' => ['A','B','D'],
109
- 'd' => ['a','b',''],
110
- }
111
-
112
- assert_equal(hash_merged1, ArrayHash.merge(hash1, hash2, 'main', 'main', :case_insensitive => false))
113
- assert_equal(hash_merged2, ArrayHash.merge(hash1, hash2, 'main', 'main', :case_insensitive => true))
114
-
115
- end
116
-
117
- def test_clean
118
- data = {
119
- '1' => ['A','B'],
120
- '2' => ['a','A'],
121
- }
122
- data_clean = {
123
- '1' => ['A','B'],
124
- '2' => ['a',''],
125
- }
126
- assert_equal(data_clean, ArrayHash.clean(data))
127
-
128
- data = {
129
- '1' => ['A','B'],
130
- '2' => ['a','A|b'],
131
- }
132
- data_clean = {
133
- '1' => ['A','B'],
134
- '2' => ['a','b'],
135
- }
136
- assert_equal(data_clean, ArrayHash.clean(data))
137
-
138
- data = {
139
- '1' => ['A','B'],
140
- '2' => ['A|a','b'],
141
- }
142
- data_clean = {
143
- '1' => ['A','B'],
144
- '2' => ['a','b'],
145
- }
146
- assert_equal(data_clean, ArrayHash.clean(data))
147
-
148
-
149
- data = {
150
- '1' => ['a1','a2'],
151
- '2' => ['a3','a4|A1'],
152
- }
153
- data_clean = {
154
- '1' => ['a1','a2'],
155
- '2' => ['a3','a4'],
156
- }
157
- assert_equal(data, ArrayHash.clean(data))
158
- assert_equal(data_clean, ArrayHash.clean(data, :case_sensitive => true))
159
-
160
-
161
- end
162
-
163
-
164
- def test_field_pos
165
- data = {
166
- '1' => ['A','B'],
167
- '2' => ['a','b'],
168
- }
169
-
170
- table = ArrayHash.new(table, 'Entrez', ['FA', 'FB'])
171
-
172
- assert_equal(0, table.field_pos('FA'))
173
- assert_equal(:main, table.field_pos('Entrez'))
174
- assert_equal(:main, table.field_pos('entrez'))
175
-
176
- end
177
-
178
- def test_object_merge
179
- data1 = {
180
- '1' => ['A','B'],
181
- '2' => ['a','b'],
182
- }
183
- table1 = ArrayHash.new(data1, 'Entrez', ['FA', 'FB'])
184
-
185
- data2 = {
186
- '1' => ['C']
187
- }
188
- table2 = ArrayHash.new(data2, 'Entrez', ['FC'])
189
-
190
- hash_merged1 = {
191
- '1' => ['A','B','C'],
192
- '2' => ['a','b','']
193
- }
194
- names1 = %w(FA FB FC)
195
-
196
- table1.merge(table2, 'Entrez', :case_insensitive => false)
197
- assert_equal(hash_merged1, table1.data)
198
- assert_equal(names1, table1.fields)
199
-
200
-
201
-
202
- data3 = {
203
- 'b' => ['d']
204
- }
205
- table3 = ArrayHash.new(data3, 'FB', ['FD'])
206
-
207
- hash_merged2 = {
208
- '1' => ['A','B','C',''],
209
- '2' => ['a','b','','d']
210
- }
211
- names2 = %w(FA FB FC FD)
212
-
213
-
214
- table1.merge(table3, 'FB', :case_insensitive => false)
215
- assert_equal(hash_merged2, table1.data)
216
- assert_equal(names2, table1.fields)
217
- end
218
-
219
- def test_remove
220
- data = {
221
- '1' => ['A','B'],
222
- '2' => ['a','b'],
223
- }
224
- data2 = {
225
- '1' => ['B'],
226
- '2' => ['b'],
227
- }
228
-
229
-
230
- table = ArrayHash.new(data, 'Entrez', ['FA', 'FB'])
231
- table.remove('FA')
232
-
233
- assert_equal(nil, table.field_pos('FA'))
234
- assert_equal(['FB'], table.fields)
235
- assert_equal(data2, table.data)
236
- end
237
-
238
- def test_process
239
- data_in = {
240
- '1' => ['A','B'],
241
- '2' => ['a','b'],
242
- }
243
- data_out = {
244
- '1' => ['FA(A)','B'],
245
- '2' => ['FA(a)','b'],
246
- }
247
-
248
- table = ArrayHash.new(data_in, 'Entrez', ['FA', 'FB'])
249
-
250
- table.process('FA'){|n| "FA(#{n})"}
251
-
252
- assert_equal(data_out, table.data)
253
- end
254
-
255
- end
256
-
257
-
@@ -1,37 +0,0 @@
1
- require File.dirname(__FILE__) + '/../../test_helper'
2
- require 'rbbt'
3
- require 'rbbt/util/filecache'
4
- require 'test/unit'
5
-
6
- class TestFileCache < Test::Unit::TestCase
7
-
8
- def setup
9
- @cachedir = Rbbt.cachedir
10
- end
11
-
12
- def test_escape
13
- path = '/etc/password'
14
- assert_equal('_SLASH_etc_SLASH_password',FileCache.clean_path(path))
15
- end
16
-
17
- def test_path
18
- assert_equal(File.expand_path(FileCache.path('123456789.xml')), File.expand_path(File.join(@cachedir, '/5/6/7/8/9/123456789.xml')))
19
- assert_equal(File.expand_path(FileCache.path('12.xml')), File.expand_path(File.join(@cachedir, '/1/2/12.xml')))
20
-
21
- assert_raise(FileCache::BadPathError){FileCache.path('/etc/passwd')}
22
- end
23
-
24
- def test_add_read
25
- filename = 'test_file_cache.txt'
26
- content = 'hello'
27
-
28
- FileCache.del_file(filename)
29
- FileCache.add_file(filename, content)
30
- assert_raise(FileCache::FileExistsError){FileCache.add_file(filename,'')}
31
- assert_nothing_raised{FileCache.add_file(filename,'',:force => true)}
32
- FileCache.del_file(filename)
33
-
34
- end
35
-
36
-
37
- end