rbbt 1.2.5 → 2.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/README.rdoc +2 -138
- metadata +69 -214
- data/LICENSE +0 -20
- data/bin/rbbt_config +0 -245
- data/install_scripts/classifier/R/classify.R +0 -36
- data/install_scripts/classifier/Rakefile +0 -140
- data/install_scripts/get_abner.sh +0 -2
- data/install_scripts/get_banner.sh +0 -25
- data/install_scripts/get_biocreative.sh +0 -72
- data/install_scripts/get_crf++.sh +0 -26
- data/install_scripts/get_entrez.sh +0 -4
- data/install_scripts/get_go.sh +0 -4
- data/install_scripts/get_polysearch.sh +0 -8
- data/install_scripts/ner/Rakefile +0 -206
- data/install_scripts/ner/config/default.rb +0 -52
- data/install_scripts/norm/Rakefile +0 -219
- data/install_scripts/norm/config/cue_default.rb +0 -10
- data/install_scripts/norm/config/tokens_default.rb +0 -86
- data/install_scripts/norm/functions.sh +0 -23
- data/install_scripts/organisms/Ath.Rakefile +0 -55
- data/install_scripts/organisms/Cal.Rakefile +0 -84
- data/install_scripts/organisms/Cel.Rakefile +0 -109
- data/install_scripts/organisms/Hsa.Rakefile +0 -140
- data/install_scripts/organisms/Mmu.Rakefile +0 -77
- data/install_scripts/organisms/Rakefile +0 -43
- data/install_scripts/organisms/Rno.Rakefile +0 -88
- data/install_scripts/organisms/Sce.Rakefile +0 -66
- data/install_scripts/organisms/Spo.Rakefile +0 -40
- data/install_scripts/organisms/rake-include.rb +0 -252
- data/install_scripts/wordlists/consonants +0 -897
- data/install_scripts/wordlists/stopwords +0 -1
- data/lib/rbbt.rb +0 -83
- data/lib/rbbt/bow/bow.rb +0 -88
- data/lib/rbbt/bow/classifier.rb +0 -116
- data/lib/rbbt/bow/dictionary.rb +0 -187
- data/lib/rbbt/ner/abner.rb +0 -34
- data/lib/rbbt/ner/banner.rb +0 -73
- data/lib/rbbt/ner/dictionaryNER.rb +0 -98
- data/lib/rbbt/ner/regexpNER.rb +0 -70
- data/lib/rbbt/ner/rner.rb +0 -227
- data/lib/rbbt/ner/rnorm.rb +0 -143
- data/lib/rbbt/ner/rnorm/cue_index.rb +0 -80
- data/lib/rbbt/ner/rnorm/tokens.rb +0 -217
- data/lib/rbbt/sources/biocreative.rb +0 -75
- data/lib/rbbt/sources/biomart.rb +0 -105
- data/lib/rbbt/sources/entrez.rb +0 -211
- data/lib/rbbt/sources/go.rb +0 -85
- data/lib/rbbt/sources/gscholar.rb +0 -74
- data/lib/rbbt/sources/organism.rb +0 -241
- data/lib/rbbt/sources/polysearch.rb +0 -117
- data/lib/rbbt/sources/pubmed.rb +0 -248
- data/lib/rbbt/util/arrayHash.rb +0 -266
- data/lib/rbbt/util/filecache.rb +0 -72
- data/lib/rbbt/util/index.rb +0 -47
- data/lib/rbbt/util/misc.rb +0 -106
- data/lib/rbbt/util/open.rb +0 -251
- data/lib/rbbt/util/rake.rb +0 -183
- data/lib/rbbt/util/simpleDSL.rb +0 -87
- data/lib/rbbt/util/tmpfile.rb +0 -35
- data/tasks/install.rake +0 -124
- data/test/rbbt/bow/test_bow.rb +0 -33
- data/test/rbbt/bow/test_classifier.rb +0 -72
- data/test/rbbt/bow/test_dictionary.rb +0 -91
- data/test/rbbt/ner/rnorm/test_cue_index.rb +0 -57
- data/test/rbbt/ner/rnorm/test_tokens.rb +0 -70
- data/test/rbbt/ner/test_abner.rb +0 -17
- data/test/rbbt/ner/test_banner.rb +0 -17
- data/test/rbbt/ner/test_dictionaryNER.rb +0 -122
- data/test/rbbt/ner/test_regexpNER.rb +0 -33
- data/test/rbbt/ner/test_rner.rb +0 -126
- data/test/rbbt/ner/test_rnorm.rb +0 -47
- data/test/rbbt/sources/test_biocreative.rb +0 -38
- data/test/rbbt/sources/test_biomart.rb +0 -31
- data/test/rbbt/sources/test_entrez.rb +0 -49
- data/test/rbbt/sources/test_go.rb +0 -24
- data/test/rbbt/sources/test_organism.rb +0 -59
- data/test/rbbt/sources/test_polysearch.rb +0 -27
- data/test/rbbt/sources/test_pubmed.rb +0 -39
- data/test/rbbt/util/test_arrayHash.rb +0 -257
- data/test/rbbt/util/test_filecache.rb +0 -37
- data/test/rbbt/util/test_index.rb +0 -31
- data/test/rbbt/util/test_misc.rb +0 -20
- data/test/rbbt/util/test_open.rb +0 -110
- data/test/rbbt/util/test_simpleDSL.rb +0 -57
- data/test/rbbt/util/test_tmpfile.rb +0 -21
- data/test/test_helper.rb +0 -4
- data/test/test_rbbt.rb +0 -11
data/lib/rbbt/util/arrayHash.rb
DELETED
@@ -1,266 +0,0 @@
|
|
1
|
-
|
2
|
-
class ArrayHash
|
3
|
-
|
4
|
-
def self.make_case_insensitive(hash)
|
5
|
-
new = {}
|
6
|
-
hash.each{|k,v|
|
7
|
-
new[k.to_s.downcase] = v
|
8
|
-
}
|
9
|
-
|
10
|
-
class << new; self; end.instance_eval{
|
11
|
-
alias_method :old_get, :[]
|
12
|
-
define_method(:[], proc{|key| old_get(key.to_s.downcase)})
|
13
|
-
}
|
14
|
-
|
15
|
-
new
|
16
|
-
end
|
17
|
-
|
18
|
-
# Take two strings of elements separated by the character sep_char and join them
|
19
|
-
# into one, removing repetitions.
|
20
|
-
def self.merge_values_string(list1, list2, sep_char ='|')
|
21
|
-
elem1 = list1.to_s.split(sep_char)
|
22
|
-
elem2 = list2.to_s.split(sep_char)
|
23
|
-
(elem1 + elem2).select{|e| e.to_s != ""}.uniq.join(sep_char)
|
24
|
-
end
|
25
|
-
|
26
|
-
# Merge two lists of elements. Elements could be strings of elements
|
27
|
-
# separated by the character sep_char, or arrays of lists of such strings.
|
28
|
-
def self.merge_values(list1, list2, sep_char = "|")
|
29
|
-
if String === list1 || String === list2
|
30
|
-
return merge_values_string(list1, list2)
|
31
|
-
end
|
32
|
-
|
33
|
-
if list1.nil?
|
34
|
-
list1 = [''] * list2.length
|
35
|
-
end
|
36
|
-
|
37
|
-
if list2.nil?
|
38
|
-
list2 = [''] * list1.length
|
39
|
-
end
|
40
|
-
|
41
|
-
new = []
|
42
|
-
list1.each_with_index{|elem, i|
|
43
|
-
new << merge_values_string(elem, list2[i], sep_char)
|
44
|
-
}
|
45
|
-
new
|
46
|
-
end
|
47
|
-
|
48
|
-
|
49
|
-
# Take an hash of arrays and a position and use the value at that position
|
50
|
-
# of the arrays to build a new hash with that value as key, and the original
|
51
|
-
# key prepended to the arrays. The options hash accepts the following keys
|
52
|
-
# :case_insensitive, which defaults to true, and :index, which indicates that
|
53
|
-
# the original key should be the value of the hash entry, instead of the
|
54
|
-
# complete array of values.
|
55
|
-
def self.pullout(hash, pos, options = {})
|
56
|
-
index = options[:index]; index = false if index.nil?
|
57
|
-
case_insensitive = options[:case_insensitive]; case_insensitive = true if case_insensitive.nil?
|
58
|
-
|
59
|
-
new = {}
|
60
|
-
hash.each{|key,values|
|
61
|
-
code = values[pos].to_s
|
62
|
-
next if code == ""
|
63
|
-
|
64
|
-
if index
|
65
|
-
list = key
|
66
|
-
else
|
67
|
-
list = [key] + values
|
68
|
-
list.delete_at(pos + 1)
|
69
|
-
end
|
70
|
-
|
71
|
-
code.split("|").each{|c|
|
72
|
-
c = c.downcase if case_insensitive
|
73
|
-
new[c] = merge_values(new[c], list)
|
74
|
-
}
|
75
|
-
}
|
76
|
-
|
77
|
-
new = make_case_insensitive new if case_insensitive
|
78
|
-
|
79
|
-
new
|
80
|
-
end
|
81
|
-
|
82
|
-
# Merge one hash of arrays into another. Each hash contains a number of fields for each
|
83
|
-
# entry. The pos1 and pos2 indicate what fields should be used to match
|
84
|
-
# entries, the values for pos1 and pos2 can be an integer indicating the
|
85
|
-
# position in the array or the symbol :main to refer to the key of the hash.
|
86
|
-
# The options hash accepts the key :case_insensitive, which defaults to true.
|
87
|
-
def self.merge(hash1, hash2, pos1 = :main, pos2 = :main, options = {})
|
88
|
-
case_insensitive = options[:case_insensitive]; case_insensitive = true if case_insensitive.nil?
|
89
|
-
|
90
|
-
raise "Key #{ pos1 } should be an Interger or :main" unless Fixnum === pos1 || pos1.to_s.downcase == 'main'
|
91
|
-
raise "Key #{ pos2 } should be an Interger or :main" unless Fixnum === pos2 || pos2.to_s.downcase == 'main'
|
92
|
-
|
93
|
-
|
94
|
-
# Pullout if pos2 is not :main
|
95
|
-
hash2 = pullout(hash2, pos2) unless pos2.to_s.downcase == 'main'
|
96
|
-
|
97
|
-
# Translate if pos1 is not :main
|
98
|
-
if pos1.to_s.downcase != 'main'
|
99
|
-
index = pullout(hash1, pos1, options.merge(:index => true))
|
100
|
-
new = {}
|
101
|
-
hash2.each do |key, list|
|
102
|
-
next unless index[key]
|
103
|
-
new[index[key]] = list
|
104
|
-
end
|
105
|
-
hash2 = new
|
106
|
-
end
|
107
|
-
|
108
|
-
# Get the lengths of the arrays on each hash (they should
|
109
|
-
# be the same for every entry)
|
110
|
-
length1 = hash1.values.first.length
|
111
|
-
length2 = hash2.values.first.length
|
112
|
-
|
113
|
-
if case_insensitive
|
114
|
-
hash1 = make_case_insensitive hash1
|
115
|
-
hash2 = make_case_insensitive hash2
|
116
|
-
end
|
117
|
-
|
118
|
-
new = {}
|
119
|
-
(hash1.keys + hash2.keys).uniq.each do |key|
|
120
|
-
if hash2[key].nil?
|
121
|
-
list2 = [''] * length2
|
122
|
-
else
|
123
|
-
list2 = hash2[key]
|
124
|
-
end
|
125
|
-
|
126
|
-
if hash1[key].nil?
|
127
|
-
list1 = [''] * length1
|
128
|
-
else
|
129
|
-
list1 = hash1[key]
|
130
|
-
end
|
131
|
-
|
132
|
-
new[key] = list1 + list2
|
133
|
-
end
|
134
|
-
|
135
|
-
new
|
136
|
-
end
|
137
|
-
|
138
|
-
# For a given hash of arrays, filter the position pos of each array with the
|
139
|
-
# block of code.
|
140
|
-
def self.process(hash, pos, &block)
|
141
|
-
new = {}
|
142
|
-
hash.each{|key, values|
|
143
|
-
v = values
|
144
|
-
v[pos] = v[pos].to_s.split("|").collect{|n| block.call(n)}.join("|")
|
145
|
-
new[key] = v
|
146
|
-
}
|
147
|
-
new
|
148
|
-
end
|
149
|
-
|
150
|
-
# Clean structure for repeated values. If the same value appears two times
|
151
|
-
# eliminate the one that appears latter on the values list (columns of the
|
152
|
-
# ArrayHash are assumed to be sorted for importance) if the appear on the
|
153
|
-
# same position, remove the one with the smaller vale of the code after
|
154
|
-
# turning it into integer.
|
155
|
-
def self.clean(hash, options = {})
|
156
|
-
case_sensitive = options[:case_sensitive]
|
157
|
-
|
158
|
-
found = {}
|
159
|
-
|
160
|
-
hash.each{|k, list|
|
161
|
-
list.each_with_index{|values,i|
|
162
|
-
(String === values ? values.split("|") : values).each{|v|
|
163
|
-
v = v.downcase if case_sensitive
|
164
|
-
if found[v].nil?
|
165
|
-
found[v] = [k,i]
|
166
|
-
else
|
167
|
-
last_k, last_i = found[v].values_at(0,1)
|
168
|
-
if last_i > i || (last_i == i && last_k.to_i > k.to_i)
|
169
|
-
found[v] = [k,i]
|
170
|
-
end
|
171
|
-
end
|
172
|
-
}
|
173
|
-
}
|
174
|
-
}
|
175
|
-
|
176
|
-
new_hash = {}
|
177
|
-
hash.each{|k,list|
|
178
|
-
new_list = []
|
179
|
-
list.each_with_index{|values,i|
|
180
|
-
new_values = []
|
181
|
-
(String === values ? values.split("|") : values).each{|v|
|
182
|
-
found_k, found_i = found[(case_sensitive ? v.downcase : v )].values_at(0,1)
|
183
|
-
if found_i == i && found_k == k
|
184
|
-
new_values << v
|
185
|
-
end
|
186
|
-
}
|
187
|
-
new_list << (String === values ? new_values.join("|") : values)
|
188
|
-
}
|
189
|
-
new_hash[k] = new_list
|
190
|
-
}
|
191
|
-
new_hash
|
192
|
-
end
|
193
|
-
|
194
|
-
attr_reader :main, :fields, :data
|
195
|
-
def initialize(hash, main, fields = nil)
|
196
|
-
@data = hash
|
197
|
-
@main = main.to_s
|
198
|
-
|
199
|
-
if fields.nil? || fields.empty?
|
200
|
-
l = hash.values.first.length
|
201
|
-
fields = []
|
202
|
-
l.times{|i| fields << "F#{i}"}
|
203
|
-
end
|
204
|
-
|
205
|
-
@fields = fields.collect{|f| f.to_s}
|
206
|
-
end
|
207
|
-
|
208
|
-
# Wrapper
|
209
|
-
def process(field, &block)
|
210
|
-
pos = self.field_pos(field)
|
211
|
-
@data = ArrayHash.process(self.data, pos, &block)
|
212
|
-
self
|
213
|
-
end
|
214
|
-
|
215
|
-
# Returns the position of a given field in the value arrays
|
216
|
-
def field_pos(field)
|
217
|
-
return :main if field == :main
|
218
|
-
if field.to_s.downcase == self.main.to_s.downcase
|
219
|
-
return :main
|
220
|
-
else
|
221
|
-
@fields.collect{|f| f.downcase }.index(field.to_s.downcase)
|
222
|
-
end
|
223
|
-
end
|
224
|
-
|
225
|
-
|
226
|
-
# Merge two ArrayHashes using the specified field
|
227
|
-
def merge(other, field = :main, options = {} )
|
228
|
-
field = self.main if field == :main
|
229
|
-
|
230
|
-
pos1 = self.field_pos(field)
|
231
|
-
pos2 = other.field_pos(field)
|
232
|
-
|
233
|
-
raise "Field #{ field } not found in target hash" if pos1.nil?
|
234
|
-
raise "Field #{ field } not found in added hash" if pos2.nil?
|
235
|
-
|
236
|
-
new = ArrayHash.merge(self.data, other.data, pos1, pos2, options)
|
237
|
-
@data = new
|
238
|
-
if pos2 == :main
|
239
|
-
new_fields = other.fields
|
240
|
-
else
|
241
|
-
new_fields = other.fields
|
242
|
-
new_fields.delete_at(pos2)
|
243
|
-
new_fields.unshift(other.main)
|
244
|
-
end
|
245
|
-
@fields += new_fields
|
246
|
-
self
|
247
|
-
end
|
248
|
-
|
249
|
-
# Remove a field from the ArrayHash
|
250
|
-
def remove(field)
|
251
|
-
pos = self.field_pos(field)
|
252
|
-
return if pos.nil?
|
253
|
-
@data = self.data.each{|key,values| values.delete_at(pos)}
|
254
|
-
@fields.delete_at(pos)
|
255
|
-
self
|
256
|
-
end
|
257
|
-
|
258
|
-
def clean
|
259
|
-
@data = ArrayHash.clean(@data)
|
260
|
-
self
|
261
|
-
end
|
262
|
-
end
|
263
|
-
|
264
|
-
|
265
|
-
|
266
|
-
|
data/lib/rbbt/util/filecache.rb
DELETED
@@ -1,72 +0,0 @@
|
|
1
|
-
require 'fileutils'
|
2
|
-
require 'rbbt'
|
3
|
-
|
4
|
-
# Provides caching functionality for files downloaded from the internet
|
5
|
-
module FileCache
|
6
|
-
|
7
|
-
class BadPathError < StandardError; end
|
8
|
-
class FileExistsError < StandardError; end
|
9
|
-
|
10
|
-
private
|
11
|
-
|
12
|
-
# Remove slash characters from filename.
|
13
|
-
def self.clean_path(filename)
|
14
|
-
filename.gsub(/\//,'_SLASH_')
|
15
|
-
end
|
16
|
-
|
17
|
-
# Check that the file name is safe and is in the correct format
|
18
|
-
def self.sanity_check(filename)
|
19
|
-
if filename =~ /\//
|
20
|
-
raise FileCache::BadPathError, "Character / not allowed in name: #{ filename }"
|
21
|
-
end
|
22
|
-
if filename !~ /.+\..+/
|
23
|
-
raise FileCache::BadPathError, "Filename '#{filename}' must have name and extension: name.ext"
|
24
|
-
end
|
25
|
-
end
|
26
|
-
|
27
|
-
public
|
28
|
-
|
29
|
-
# Find the path that a particular file would have in the cache
|
30
|
-
def self.path(filename)
|
31
|
-
sanity_check(filename)
|
32
|
-
|
33
|
-
name, extension = filename.match(/(.+)\.(.+)/).values_at(1,2)
|
34
|
-
dirs = name.scan(/./).reverse.values_at(0,1,2,3,4).reverse.compact.join('/')
|
35
|
-
|
36
|
-
return File.join(File.join(Rbbt.cachedir,dirs),filename)
|
37
|
-
end
|
38
|
-
|
39
|
-
# Add a file in the cache. Raise exception if exists, unless force is
|
40
|
-
# used.
|
41
|
-
def self.add_file(filename, content, options = {})
|
42
|
-
sanity_check(filename)
|
43
|
-
|
44
|
-
path = path(filename)
|
45
|
-
FileUtils.makedirs(File.dirname(path), :mode => 0777)
|
46
|
-
|
47
|
-
if File.exist?(path) and ! (options[:force] || options['force'])
|
48
|
-
raise FileCache::FileExistsError, "File #{filename} already in cache"
|
49
|
-
end
|
50
|
-
|
51
|
-
File.open(path,'w'){|f|
|
52
|
-
f.write(content)
|
53
|
-
}
|
54
|
-
FileUtils.chmod 0666, path
|
55
|
-
|
56
|
-
nil
|
57
|
-
end
|
58
|
-
|
59
|
-
# Removes the file from cache
|
60
|
-
def self.del_file(filename)
|
61
|
-
sanity_check(filename)
|
62
|
-
|
63
|
-
path = path(filename)
|
64
|
-
|
65
|
-
if File.exist? path
|
66
|
-
FileUtils.rm path
|
67
|
-
end
|
68
|
-
|
69
|
-
nil
|
70
|
-
end
|
71
|
-
|
72
|
-
end
|
data/lib/rbbt/util/index.rb
DELETED
@@ -1,47 +0,0 @@
|
|
1
|
-
require 'rbbt/util/open'
|
2
|
-
require 'rbbt/util/arrayHash'
|
3
|
-
|
4
|
-
module Index
|
5
|
-
|
6
|
-
# Creates an inverse index. Takes a file with rows of elements
|
7
|
-
# separated by a given pattern (specified by +sep+) and returns a hash
|
8
|
-
# where each element points to the first element in the row. +lexicon+
|
9
|
-
# is the file containing the data.
|
10
|
-
def self.index(lexicon, options = {})
|
11
|
-
options = {:sep => "\t", :sep2 => '\|', :case_sensitive => true}.merge(options)
|
12
|
-
|
13
|
-
|
14
|
-
data = Open.to_hash(lexicon, options)
|
15
|
-
if options[:clean]
|
16
|
-
data = ArrayHash.clean(data)
|
17
|
-
end
|
18
|
-
|
19
|
-
index = {}
|
20
|
-
|
21
|
-
data.each{|code, id_lists|
|
22
|
-
next if code.nil? || code == ""
|
23
|
-
id_lists.flatten.compact.uniq.each{|id|
|
24
|
-
id = id.downcase unless options[:case_sensitive]
|
25
|
-
index[id] = code
|
26
|
-
}
|
27
|
-
}
|
28
|
-
data.each{|code, id_lists|
|
29
|
-
next if code.nil? || code == ""
|
30
|
-
id = code
|
31
|
-
id = id.downcase unless options[:case_sensitive]
|
32
|
-
index[id] = code
|
33
|
-
}
|
34
|
-
|
35
|
-
if !options[:case_sensitive]
|
36
|
-
class << index; self; end.instance_eval{
|
37
|
-
alias_method :old_get, :[]
|
38
|
-
define_method(:[], proc{|key| old_get(key.to_s.downcase)})
|
39
|
-
|
40
|
-
alias_method :old_values_at, :values_at
|
41
|
-
define_method(:values_at, proc{|*keys| old_values_at(*keys.collect{|key| key.to_s.downcase }) })
|
42
|
-
}
|
43
|
-
end
|
44
|
-
|
45
|
-
index
|
46
|
-
end
|
47
|
-
end
|
data/lib/rbbt/util/misc.rb
DELETED
@@ -1,106 +0,0 @@
|
|
1
|
-
require 'rbbt'
|
2
|
-
require 'rbbt/util/open'
|
3
|
-
|
4
|
-
class String
|
5
|
-
CONSONANTS = []
|
6
|
-
if File.exists? File.join(Rbbt.datadir, 'wordlists/consonants')
|
7
|
-
Object::Open.read(File.join(Rbbt.datadir, 'wordlists/consonants')).each_line{|l| CONSONANTS << l.chomp}
|
8
|
-
end
|
9
|
-
|
10
|
-
# Uses heuristics to checks if a string seems like a special word, like a gene name.
|
11
|
-
def is_special?
|
12
|
-
# Only consonants
|
13
|
-
return true if self =~ /^[bcdfghjklmnpqrstvwxz]+$/i
|
14
|
-
|
15
|
-
# Not a word
|
16
|
-
return false if self =~ /[^\s]\s[^\s]/;
|
17
|
-
return false if self.length < 3;
|
18
|
-
# Alphanumeric
|
19
|
-
return true if self =~ /[0-9]/ && self =~ /[a-z]/i
|
20
|
-
# All Caps
|
21
|
-
return true if self =~ /[A-Z]{2,}/;
|
22
|
-
# Caps Mix
|
23
|
-
return true if self =~ /[a-z][A-Z]/;
|
24
|
-
# All consonants
|
25
|
-
return true if self =~ /^[a-z]$/i && self !~ /[aeiou]/i
|
26
|
-
# Dashed word
|
27
|
-
return true if self =~ /(^\w-|-\w$)/
|
28
|
-
# To many consonants (very heuristic)
|
29
|
-
if self =~ /([^aeiouy]{3,})/i && !CONSONANTS.include?($1.downcase)
|
30
|
-
return true
|
31
|
-
end
|
32
|
-
|
33
|
-
return false
|
34
|
-
end
|
35
|
-
|
36
|
-
# Turns the first letter to lowercase
|
37
|
-
def downcase_first
|
38
|
-
return "" if self == ""
|
39
|
-
letters = self.scan(/./)
|
40
|
-
letters[0].downcase!
|
41
|
-
letters.join("")
|
42
|
-
end
|
43
|
-
|
44
|
-
# Turns a roman number into arabic form is possible. Just simple
|
45
|
-
# romans only...
|
46
|
-
def arabic
|
47
|
-
return 1 if self =~ /^I$/;
|
48
|
-
return 2 if self =~ /^II$/;
|
49
|
-
return 3 if self =~ /^III$/;
|
50
|
-
return 4 if self =~ /^IV$/;
|
51
|
-
return 5 if self =~ /^V$/;
|
52
|
-
return 10 if self =~ /^X$/;
|
53
|
-
|
54
|
-
return nil
|
55
|
-
end
|
56
|
-
end
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
$greek = {
|
62
|
-
"alpha" => "a",
|
63
|
-
"beta" => "b",
|
64
|
-
"gamma" => "g",
|
65
|
-
"delta" => "d",
|
66
|
-
"epsilon" => "e",
|
67
|
-
"zeta" => "z",
|
68
|
-
"eta" => "e",
|
69
|
-
"theta" => "th",
|
70
|
-
"iota" => "i",
|
71
|
-
"kappa" => "k",
|
72
|
-
"lambda" => "l",
|
73
|
-
"mu" => "m",
|
74
|
-
"nu" => "n",
|
75
|
-
"xi" => "x",
|
76
|
-
"omicron" => "o",
|
77
|
-
"pi" => "p",
|
78
|
-
"rho" => "r",
|
79
|
-
"sigma" => "s",
|
80
|
-
"tau" => "t",
|
81
|
-
"upsilon" => "u",
|
82
|
-
"phi" => "ph",
|
83
|
-
"chi" => "ch",
|
84
|
-
"psi" => "ps",
|
85
|
-
"omega" => "o"
|
86
|
-
}
|
87
|
-
|
88
|
-
$inverse_greek = Hash.new
|
89
|
-
$greek.each{|l,s| $inverse_greek[s] = l }
|
90
|
-
|
91
|
-
$stopwords = Open.read(File.join(Rbbt.datadir, 'wordlists/stopwords')).scan(/\w+/) if File.exists? File.join(Rbbt.datadir, 'wordlists/stopwords')
|
92
|
-
|
93
|
-
class Array
|
94
|
-
|
95
|
-
# Divides the array into +num+ chunks of the same size by placing one
|
96
|
-
# element in each chunk iteratively.
|
97
|
-
def chunk(num)
|
98
|
-
chunks = []
|
99
|
-
each_with_index{|e, i|
|
100
|
-
c = i % num
|
101
|
-
chunks[c] ||=[]
|
102
|
-
chunks[c] << e
|
103
|
-
}
|
104
|
-
chunks
|
105
|
-
end
|
106
|
-
end
|