mosta-raramorph 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/README +56 -0
- data/bin/raramorph +6 -0
- data/lib/dictionaries/dictPrefixes +421 -0
- data/lib/dictionaries/dictStems +135989 -0
- data/lib/dictionaries/dictSuffixes +1170 -0
- data/lib/dictionaries/marshal_stems +0 -0
- data/lib/dictionaries/tableAB +2276 -0
- data/lib/dictionaries/tableAC +743 -0
- data/lib/dictionaries/tableBC +1584 -0
- data/lib/raramorph/arabic_latin_translator.rb +38 -0
- data/lib/raramorph/dictionary_entry.rb +40 -0
- data/lib/raramorph/in_memory_dictionary_handler.rb +325 -0
- data/lib/raramorph/in_memory_solutions_handler.rb +78 -0
- data/lib/raramorph/latin_arabic_translator.rb +35 -0
- data/lib/raramorph/logger.rb +20 -0
- data/lib/raramorph/raramorph.rb +417 -0
- data/lib/raramorph/solution.rb +592 -0
- data/lib/raramorph/translator.rb +40 -0
- data/lib/raramorph.rb +16 -0
- data/lib/raramorph_main.rb +34 -0
- data/lib/test_input/UTF-8.txt +32 -0
- data/raramorph.gemspec +42 -0
- metadata +75 -0
@@ -0,0 +1,38 @@
|
|
1
|
+
# Class For Arabic Latin Transileration
|
2
|
+
# Author:: eSpace technologies www.eSpace.com.eg
|
3
|
+
# Copyright:: 2008
|
4
|
+
#
|
5
|
+
class ArabicLatinTranslator
|
6
|
+
# * Table Used for Tranlation From Arabic To English I.e ( Romanize Word )
|
7
|
+
# * According to Buckwalter system Dictionary
|
8
|
+
TABLE = { "\u0621"=> "'" , "\u0622"=> "|" , "\u0623"=> ">" , "\u0624"=> "&" , "\u0625"=> "<" , "\u0626"=> "}" ,
|
9
|
+
"\u0627"=> "A" , "\u0628"=> "b" , "\u0629"=> "p" , "\u062A"=> "t" , "\u062B"=> "v" , "\u062C"=> "j" ,
|
10
|
+
"\u062D"=> "H" , "\u062E"=> "x" , "\u062F"=> "d" , "\u0630"=> "*" , "\u0631"=> "r" , "\u0632"=> "z" ,
|
11
|
+
"\u0633"=> "s" , "\u0634"=> "$" , "\u0635"=> "S" , "\u0636"=> "D" , "\u0637"=> "T" ,"\u0638"=> "Z",
|
12
|
+
"\u0639"=> "E" , "\u063A"=> "g" , "\u0640"=> "_" , "\u0641"=> "f" , "\u0642"=> "q" , "\u0643"=> "k" ,
|
13
|
+
"\u0644"=> "l" , "\u0645"=> "m" , "\u0646"=> "n" , "\u0647"=> "h" , "\u0648"=> "w" , "\u0649"=> "Y",
|
14
|
+
"\u064A"=> "y" , "\u064B"=> "F" , "\u064C"=> "N" , "\u064D"=> "K" , "\u064E"=> "a" , "\u064F"=> "u" ,
|
15
|
+
"\u0650"=> "i" , "\u0651"=> "~" , "\u0652"=> "o" , "\u0670"=> "`" ,"\u0671"=> "{" , "\u067E"=> "P" ,
|
16
|
+
"\u0686"=> "J" , "\u06A4"=> "V" , "\u06AF"=> "G" , "\u0698"=> "R" , "\u060C" => "," ,"\u061B" => ";",
|
17
|
+
"\u061F" => "?" , "\u0640" => "" }
|
18
|
+
#Not suitable for morphological analysis : remove all vowels/diacritics, i.e. undo the job !
|
19
|
+
VOWEL_REMOVER = Regexp.compile("[FNKaui~o]")
|
20
|
+
STRIPER = Regexp.compile("[`\\{]")
|
21
|
+
|
22
|
+
|
23
|
+
# * Translate : Transilerate the arabic word to Roman lettered Word
|
24
|
+
# * [word] Word String To be processed
|
25
|
+
# * @return transilerated word
|
26
|
+
#
|
27
|
+
def self.translate(word)
|
28
|
+
result = ""
|
29
|
+
word.gsub!(VOWEL_REMOVER , "")
|
30
|
+
word.gsub!(STRIPER , "")
|
31
|
+
word.force_encoding "UTF-8"
|
32
|
+
word.each_char{|char|
|
33
|
+
result+= TABLE[char] ? TABLE[char] : char
|
34
|
+
}
|
35
|
+
result
|
36
|
+
end
|
37
|
+
|
38
|
+
end
|
@@ -0,0 +1,40 @@
|
|
1
|
+
# Class For Storing Dictionary Entries
|
2
|
+
# Author:: eSpace technologies www.eSpace.com.eg
|
3
|
+
# Copyright:: 2008
|
4
|
+
|
5
|
+
|
6
|
+
class DictionaryEntry
|
7
|
+
## Constructs a Dictionary Entry
|
8
|
+
|
9
|
+
attr_reader :entry , :lemma_id , :vocalization , :morphology , :gloss , :glosses , :pos
|
10
|
+
@@split_regex = Regexp.compile("\\+")
|
11
|
+
|
12
|
+
protected
|
13
|
+
# * Initiliaze New Dict. Entry
|
14
|
+
def initialize( entry, lemma_id, vocalization, morphology, gloss, pos)
|
15
|
+
# Instance Variables
|
16
|
+
@entry = entry.strip
|
17
|
+
@lemma_id = lemma_id.strip
|
18
|
+
@vocalization = vocalization.strip
|
19
|
+
@morphology = morphology.strip
|
20
|
+
@gloss = gloss
|
21
|
+
@glosses = []
|
22
|
+
@pos = []
|
23
|
+
@glosses = fill_instance_array_from_sent_array(gloss.split(@@split_regex))
|
24
|
+
@pos = fill_instance_array_from_sent_array(pos.split(@@split_regex))
|
25
|
+
end
|
26
|
+
|
27
|
+
private
|
28
|
+
def fill_instance_array_from_sent_array( sent_array)
|
29
|
+
instance_array = []
|
30
|
+
sent_array.each do |value |
|
31
|
+
value = value.strip
|
32
|
+
end
|
33
|
+
sent_array[0] == "" ? offset = 1 : offset = 0
|
34
|
+
for i in offset..sent_array.length-1
|
35
|
+
instance_array[i - offset] = sent_array[i]
|
36
|
+
end
|
37
|
+
instance_array
|
38
|
+
end
|
39
|
+
|
40
|
+
end
|
@@ -0,0 +1,325 @@
|
|
1
|
+
# Class For Storing And Loading Dictionaries
|
2
|
+
# Author:: eSpace technologies www.eSpace.com.eg
|
3
|
+
# Copyright:: 2008
|
4
|
+
|
5
|
+
|
6
|
+
require 'rubygems'
|
7
|
+
class InMemoryDictionaryHandler
|
8
|
+
|
9
|
+
#Signleton Class
|
10
|
+
##### Dictionaries ########
|
11
|
+
#### Dictionaries are HASH OF ARRAYS #####
|
12
|
+
@@prefixes = {}
|
13
|
+
#Dictionary of Prefixes
|
14
|
+
|
15
|
+
@@stems = {}
|
16
|
+
#Dictionary of Stems
|
17
|
+
|
18
|
+
@@suffixes = {}
|
19
|
+
#Dictionary of Suffixes
|
20
|
+
private_class_method :new
|
21
|
+
|
22
|
+
# * Loads Dictionaries and initiate variables
|
23
|
+
def self.create
|
24
|
+
|
25
|
+
### Variables #####
|
26
|
+
@@handler = nil
|
27
|
+
@@regex = Regexp.compile(".*" + "<pos>(.+?)</pos>" + ".*")
|
28
|
+
@@morphology_regexs=[Regexp.compile("^(Pref-0|Suff-0)$") ,
|
29
|
+
Regexp.compile("^F" + ".*") ,
|
30
|
+
Regexp.compile("^IV" + ".*") ,
|
31
|
+
Regexp.compile("^PV" + ".*") ,
|
32
|
+
Regexp.compile("^CV" + ".*") ,
|
33
|
+
Regexp.compile("^N" + ".*") ,
|
34
|
+
Regexp.compile("^[A-Z]" + ".*") ,
|
35
|
+
Regexp.compile(".*" + "iy~$")
|
36
|
+
]
|
37
|
+
@@compatability_stpliter = Regexp.compile("\\s+")
|
38
|
+
@@vocalization_array =["/FUNC_WORD" ,
|
39
|
+
"/VERB_IMPERFECT" ,
|
40
|
+
"/VERB_PERFECT" ,
|
41
|
+
"/VERB_IMPERATIVE" ,
|
42
|
+
"/NOUN_PROP" ,
|
43
|
+
"/NOUN" ,
|
44
|
+
"/NOUN"
|
45
|
+
]
|
46
|
+
|
47
|
+
@@prefixes_stems_compatibility = Set.new
|
48
|
+
#Changed
|
49
|
+
#Compatibility table for prefixes-stems combinations.
|
50
|
+
|
51
|
+
@@prefixes_suffixes_compatibility = Set.new
|
52
|
+
#Changed
|
53
|
+
#Compatibility table for prefixes-suffixes combinations.
|
54
|
+
|
55
|
+
@@stems_suffixes_compatibility = Set.new
|
56
|
+
|
57
|
+
#Changed
|
58
|
+
#Compatibility table for stem-suffixes combinations.
|
59
|
+
|
60
|
+
puts "Initializing in-memory dictionary handler..."
|
61
|
+
Thread.abort_on_exception = true
|
62
|
+
load_dictionary( @@prefixes , "dictPrefixes" , File.dirname(__FILE__) + "/../dictionaries/dictPrefixes" )
|
63
|
+
load_stems_marshaled_dictionary
|
64
|
+
load_dictionary(@@suffixes, "dictSuffixes" , File.dirname(__FILE__) + "/../dictionaries/dictSuffixes")
|
65
|
+
load_compatibility_table(@@prefixes_stems_compatibility , "prefixes_stems_compatibility" , File.dirname(__FILE__) + "/../dictionaries/tableAB")
|
66
|
+
load_compatibility_table(@@prefixes_suffixes_compatibility , "prefixes_suffixes_compatibility" , File.dirname(__FILE__) + "/../dictionaries/tableAC")
|
67
|
+
load_compatibility_table(@@stems_suffixes_compatibility , "stems_suffixes_compatibility" , File.dirname(__FILE__) + "/../dictionaries/tableBC")
|
68
|
+
puts "... Done ... "
|
69
|
+
@@handler = new unless @@handler
|
70
|
+
end
|
71
|
+
|
72
|
+
# * load the marshaled stems dictionary if avalaible or load from the origin dictionary if not avalaible
|
73
|
+
def self.load_stems_marshaled_dictionary
|
74
|
+
if File.exists?( File.dirname(__FILE__) + '/../dictionaries/marshal_stems' )
|
75
|
+
File.open( File.dirname(__FILE__) + '/../dictionaries/marshal_stems') do |f|
|
76
|
+
@@stems = Marshal.load(f)
|
77
|
+
end
|
78
|
+
puts("#{@@stems.length} entries totalizing")
|
79
|
+
else
|
80
|
+
reload_stems_dictionary
|
81
|
+
end
|
82
|
+
end
|
83
|
+
|
84
|
+
# * Marshal the stems dictionary into a file
|
85
|
+
def self.marshal_stems
|
86
|
+
File.open( File.dirname(__FILE__) + '/../dictionaries/marshal_stems' , 'w+') do |f|
|
87
|
+
Marshal.dump(@@stems, f)
|
88
|
+
end
|
89
|
+
end
|
90
|
+
|
91
|
+
|
92
|
+
# * Loads Stem dictionary from original file then marshal the dictionary for faster access
|
93
|
+
def self.reload_stems_dictionary
|
94
|
+
load_dictionary(@@stems, "dictStems", File.dirname(__FILE__) + "/../dictionaries/dictStems") #File.open("dictionaries/dictStems" , "r:UTF-8" ))
|
95
|
+
marshal_stems
|
96
|
+
end
|
97
|
+
|
98
|
+
# * Check if translitered word has prefix
|
99
|
+
# * [translitered] Translitered word to be checked
|
100
|
+
def has_prefix?(translitered)
|
101
|
+
@@prefixes.has_key?(translitered)
|
102
|
+
end
|
103
|
+
|
104
|
+
# * Check if translitered word has stem
|
105
|
+
# * [translitered] Translitered word to be checked
|
106
|
+
def has_stem?(translitered)
|
107
|
+
@@stems.has_key?(translitered)
|
108
|
+
end
|
109
|
+
|
110
|
+
# * Check if translitered word has suffix
|
111
|
+
# * [translitered] Translitered word to be checked
|
112
|
+
def has_suffix?(translitered)
|
113
|
+
@@suffixes.has_key?(translitered)
|
114
|
+
end
|
115
|
+
|
116
|
+
# * Check if prefix and stem are compatible
|
117
|
+
# * [prefix] prefix to be checked
|
118
|
+
# * [stem] stem to be checked
|
119
|
+
def prefixes_stems_compatible?(prefix , stem) #String , #String
|
120
|
+
@@prefixes_stems_compatibility.member?(prefix + " " + stem)
|
121
|
+
end
|
122
|
+
|
123
|
+
# * Check if prefix and suffix are compatible
|
124
|
+
# * [prefix] prefix to be checked
|
125
|
+
# * [suffix] suffix to be checked
|
126
|
+
def prefixes_suffixes_compatible?(prefix , suffix)
|
127
|
+
@@prefixes_suffixes_compatibility.member?(prefix + " " + suffix)
|
128
|
+
end
|
129
|
+
|
130
|
+
# * Check if stem and suffix are compatible
|
131
|
+
# * [stem] stem to be checked
|
132
|
+
# * [suffix] suffix to be checked
|
133
|
+
def stems_suffixes_compatible?(stem , suffix)
|
134
|
+
@@stems_suffixes_compatibility.member?(stem + " " + suffix)
|
135
|
+
end
|
136
|
+
|
137
|
+
# * Returns the prefixes table
|
138
|
+
def prefixes
|
139
|
+
@@prefixes
|
140
|
+
end
|
141
|
+
|
142
|
+
def prefixes=(prefixes)
|
143
|
+
@@prefixes = prefixes
|
144
|
+
end
|
145
|
+
|
146
|
+
# * Returns Stems Dictionary
|
147
|
+
def stems
|
148
|
+
@@stems
|
149
|
+
end
|
150
|
+
|
151
|
+
def stems=(stems)
|
152
|
+
@@stems = stems
|
153
|
+
end
|
154
|
+
|
155
|
+
|
156
|
+
# * Returns Suffixes Dictionary
|
157
|
+
def suffixes
|
158
|
+
@@suffixes
|
159
|
+
end
|
160
|
+
|
161
|
+
def suffixes=(suffixes)
|
162
|
+
@@suffixes = suffixes
|
163
|
+
end
|
164
|
+
|
165
|
+
def analyze_word_in_dictionaries(segmented_word , word_solutions , verbose , count)
|
166
|
+
#Is prefix known ?
|
167
|
+
if has_prefix?(segmented_word.prefix)
|
168
|
+
#Is stem known ?
|
169
|
+
# puts "has prefix"
|
170
|
+
if has_stem?(segmented_word.stem)
|
171
|
+
# puts "has stem"
|
172
|
+
#Is suffix known ?
|
173
|
+
if has_suffix?(segmented_word.suffix)
|
174
|
+
# puts "has suffix"
|
175
|
+
#Compatibility check
|
176
|
+
@@prefixes[segmented_word.prefix].each{|prefix|
|
177
|
+
@@stems[segmented_word.stem].each {|stem|
|
178
|
+
#Prefix/Stem compatibility
|
179
|
+
if prefixes_stems_compatible?(prefix.morphology ,stem.morphology )
|
180
|
+
# puts "has A B Com"
|
181
|
+
@@suffixes[segmented_word.suffix].each {|suffix|
|
182
|
+
# Prefix/Suffix compatiblity
|
183
|
+
if prefixes_suffixes_compatible?(prefix.morphology , suffix.morphology)
|
184
|
+
# puts "has A C Com"
|
185
|
+
# Stems/Suffixes compatiblity
|
186
|
+
if stems_suffixes_compatible?(stem.morphology , suffix.morphology)
|
187
|
+
# puts "has B C COM"
|
188
|
+
#All tests passed : it is a solution
|
189
|
+
count = count + 1
|
190
|
+
word_solutions << Solution.new(verbose , count , prefix , stem , suffix )
|
191
|
+
end
|
192
|
+
end
|
193
|
+
}
|
194
|
+
end
|
195
|
+
}
|
196
|
+
}
|
197
|
+
end
|
198
|
+
end
|
199
|
+
end
|
200
|
+
return count
|
201
|
+
end
|
202
|
+
|
203
|
+
private
|
204
|
+
|
205
|
+
# * load Dictionary from files
|
206
|
+
# * [dictionary] Hash of Arrays to store the Dictionary
|
207
|
+
# * [name] Dictionary Name
|
208
|
+
# * [file] File Path
|
209
|
+
def self.load_dictionary( dictionary , name , file )
|
210
|
+
lemmas = Set.new
|
211
|
+
forms = 0
|
212
|
+
final = 0
|
213
|
+
lemma_id = ""
|
214
|
+
puts "Loading Dictionary : #{ name }"
|
215
|
+
#x = Time.now
|
216
|
+
file = IO.readlines(file)
|
217
|
+
#@loading_secs += Time.now - x
|
218
|
+
line_count = 0
|
219
|
+
# leemas = file.select{|line| line.start_with?(@@leema_starter) }
|
220
|
+
file = file.select{|line| line.start_with?(";; ") or !line.start_with?(";") }
|
221
|
+
# entries = file.select{|line| !( line.start_with?(@@leema_starter) and line.start_with?(";")) }
|
222
|
+
# read_leemas(leemas)
|
223
|
+
# read_entries(entries)
|
224
|
+
|
225
|
+
|
226
|
+
file.each do |line|
|
227
|
+
# puts "." unless line_count % 1000
|
228
|
+
if line.start_with?(";; ")
|
229
|
+
lemma_id = line[3..line.length]
|
230
|
+
# Raise Exception If non-unique Lemma ID
|
231
|
+
raise ArgumentError.new("Lemma #{lemma_id } in #{name} #{line_count} isn't unique") if lemmas.member?(lemma_id)
|
232
|
+
# Add The New Lemma
|
233
|
+
lemmas << lemma_id
|
234
|
+
#elsif line.start_with?(";")
|
235
|
+
else
|
236
|
+
splited_line = line.split("\t" , -1)
|
237
|
+
raise ArgumentError.new("Entry In #{name} line #{line_count} doesn't have 4 fields ( 3 tabs )") unless splited_line.length == 4
|
238
|
+
de = self.construct_dictionary_entry(splited_line , name, line_count , lemma_id)
|
239
|
+
if dictionary.has_key?(de.entry)
|
240
|
+
dictionary[de.entry] << de
|
241
|
+
else
|
242
|
+
tmp_array = []
|
243
|
+
tmp_array << de
|
244
|
+
dictionary[de.entry] = tmp_array
|
245
|
+
end
|
246
|
+
forms+=1;
|
247
|
+
end
|
248
|
+
line_count+=1
|
249
|
+
end
|
250
|
+
# file.close()
|
251
|
+
#puts "Time Taken In If" + @@if_time.to_s
|
252
|
+
#puts "Time Taken In Sub" + @@sub_time.to_s
|
253
|
+
|
254
|
+
puts "#{lemmas.size()} lemmas and " unless lemma_id == ""
|
255
|
+
puts("#{dictionary.length} entries totalizing #{forms} forms")
|
256
|
+
end
|
257
|
+
|
258
|
+
# * Load Compatibilty tables
|
259
|
+
# * [set] Set for Loading Compatibilty Tables
|
260
|
+
# * [name] Table Name
|
261
|
+
# * [file] File Path
|
262
|
+
def self.load_compatibility_table(set, name, file)
|
263
|
+
puts "Loading compatibility table : #{name} "
|
264
|
+
file = IO.readlines(file)
|
265
|
+
file.each do |line|
|
266
|
+
unless (line.start_with?(";")) #Ignore comments
|
267
|
+
line = line.strip
|
268
|
+
line = line.gsub(@@compatability_stpliter, " ")
|
269
|
+
set << line#line
|
270
|
+
end
|
271
|
+
end
|
272
|
+
puts "#{set.size()} entries"
|
273
|
+
end
|
274
|
+
|
275
|
+
# * Construct Dictionary Entry from line
|
276
|
+
def self.construct_dictionary_entry(splited_line , name ,line_count , lemma_id)
|
277
|
+
entry = splited_line[0]
|
278
|
+
vocalization = splited_line[1]
|
279
|
+
morphology = splited_line[2]
|
280
|
+
gloss_pos = splited_line[3]
|
281
|
+
gloss = ""
|
282
|
+
pos = ""
|
283
|
+
# two ways to get the POS info
|
284
|
+
# (1) explicitly, by extracting it from the gloss field:
|
285
|
+
|
286
|
+
matcher = @@regex.match(gloss_pos)
|
287
|
+
if matcher
|
288
|
+
pos = matcher[1] #extract POS from glossPOS
|
289
|
+
gloss = gloss_pos #we clean up the gloss later (see below)
|
290
|
+
# (2) by deduction: use the morphology (and sometimes the voc and gloss) to deduce the appropriate POS
|
291
|
+
else
|
292
|
+
gloss= gloss_pos
|
293
|
+
# we need the gloss to guess proper name
|
294
|
+
|
295
|
+
if morphology.match(@@morphology_regexs[0])
|
296
|
+
pos = ""
|
297
|
+
elsif morphology.match(@@morphology_regexs[1])
|
298
|
+
pos = "#{vocalization} #{@@vocalization_array[0]}"
|
299
|
+
elsif (morphology.match(@@morphology_regexs[2]))
|
300
|
+
pos = "#{vocalization} #{ @@vocalization_array[1]}"
|
301
|
+
elsif (morphology.match(@@morphology_regexs[3]))
|
302
|
+
pos = "#{vocalization} #{ @@vocalization_array[2]}"
|
303
|
+
elsif (morphology.match(@@morphology_regexs[4] ))
|
304
|
+
pos = "#{vocalization} #{@@vocalization_array[3]}"
|
305
|
+
elsif (morphology.match(@@morphology_regexs[5]))
|
306
|
+
# educated guess (99% correct)
|
307
|
+
if (gloss.match(@@morphology_regexs[6]))
|
308
|
+
pos = "#{vocalization} #{@@vocalization_array[4]}"
|
309
|
+
#(was NOUN_ADJ: some of these are really ADJ's and need to be tagged manually)
|
310
|
+
elsif (vocalization.match(@@morphology_regexs[7]))
|
311
|
+
pos = "#{vocalization} #{@@vocalization_array[5]}"
|
312
|
+
else
|
313
|
+
pos = "#{vocalization} #{@@vocalization_array[6]}"
|
314
|
+
end
|
315
|
+
else raise "No POS can be deduced in #{ name} (line #{line_count}"
|
316
|
+
end
|
317
|
+
end
|
318
|
+
# clean up the gloss: remove POS info and extra space, and convert upper-ASCII to lower (it doesn't convert well to UTF-8)
|
319
|
+
gloss =gloss.sub(/<pos>.+?<\/pos>/,"")
|
320
|
+
gloss = gloss.strip
|
321
|
+
translotor = Translator.new
|
322
|
+
gloss = translotor.translate(gloss)
|
323
|
+
DictionaryEntry.new(entry, lemma_id, vocalization, morphology, gloss, pos)
|
324
|
+
end
|
325
|
+
end
|
@@ -0,0 +1,78 @@
|
|
1
|
+
# An in-memory handler for managing solutions found by the morphological analyzer.
|
2
|
+
#
|
3
|
+
# Author:: eSpace technologies www.eSpace.com.eg
|
4
|
+
# Copyright:: 2008
|
5
|
+
|
6
|
+
|
7
|
+
class InMemorySolutionsHandler
|
8
|
+
|
9
|
+
# The unique instance of this handler (singleton pattern)
|
10
|
+
# Constructor to avoid multiple instanciations
|
11
|
+
public_class_method :new
|
12
|
+
@@handler = nil
|
13
|
+
|
14
|
+
def self.create
|
15
|
+
@@handler= new unless @@handler
|
16
|
+
@@handler
|
17
|
+
end
|
18
|
+
|
19
|
+
public
|
20
|
+
|
21
|
+
# Add solutions for a given word
|
22
|
+
# * [translitered] The translitered word.
|
23
|
+
# * [sol] The solution to the translitered word.
|
24
|
+
def add_solutions (translitered, sol)
|
25
|
+
@@solutions[translitered] = sol
|
26
|
+
end
|
27
|
+
|
28
|
+
# Whether or not the word already gave solutions
|
29
|
+
# * [translitered] The translitered word
|
30
|
+
# * @return If it has the solution or not (Boolean).
|
31
|
+
def has_solutions(translitered)
|
32
|
+
@@solutions.has_key?(translitered)
|
33
|
+
end
|
34
|
+
|
35
|
+
# Return the solutions of a given word
|
36
|
+
# * [translitered] The translitered word
|
37
|
+
# * @return The solution matching the transliterd word.
|
38
|
+
def get_solutions(translitered)
|
39
|
+
if(self.has_solutions(translitered))
|
40
|
+
return @@solutions[translitered]
|
41
|
+
else
|
42
|
+
return nil
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
# Add alternative spellings for the given word
|
47
|
+
# * [translitered] The translitered word
|
48
|
+
# * [alt] The alternative spelling
|
49
|
+
def add_alternative_spellings(translitered, alt)
|
50
|
+
@@alternative_spellings[translitered] = alt
|
51
|
+
end
|
52
|
+
|
53
|
+
# Whether or not the word already gave alternative spellings
|
54
|
+
# * [translitered] The translitered word
|
55
|
+
# * @return If the transliterd word has alternative spellings
|
56
|
+
def has_alternative_spellings(translitered)
|
57
|
+
@@alternative_spellings.has_key?(translitered)
|
58
|
+
end
|
59
|
+
|
60
|
+
# Return the alternative spellings of the word
|
61
|
+
# * [translitered] The translitered word
|
62
|
+
# * @return The alternative spellings matching the transliterd word.
|
63
|
+
def get_alternative_spellings(translitered)
|
64
|
+
if(self.has_alternative_spellings(translitered))
|
65
|
+
return @@alternative_spellings[translitered]
|
66
|
+
else
|
67
|
+
return nil
|
68
|
+
end
|
69
|
+
end
|
70
|
+
|
71
|
+
private
|
72
|
+
#Hash of solutions for analyzed words
|
73
|
+
@@solutions ={}
|
74
|
+
#Hash of alternative spellings
|
75
|
+
@@alternative_spellings ={}
|
76
|
+
|
77
|
+
end
|
78
|
+
|
@@ -0,0 +1,35 @@
|
|
1
|
+
# Class For Latin Arabic Transileration
|
2
|
+
# Author:: eSpace technologies www.eSpace.com.eg
|
3
|
+
# Copyright:: 2008
|
4
|
+
|
5
|
+
|
6
|
+
class LatinArabicTranslator
|
7
|
+
|
8
|
+
# * Table Used for Tranlation From Latin Letters To Arabic I.e ( Arabize Word )
|
9
|
+
# * According to Buckwalter system Dictionary
|
10
|
+
TABLE = {"'" => "\u0621","|" => "\u0622",">" => "\u0623","&" => "\u0624",
|
11
|
+
"<" => "\u0625","}" => "\u0626","A" => "\u0627","b" => "\u0628",
|
12
|
+
"p" => "\u0629","t" => "\u062A","v" => "\u062B","j" => "\u062C",
|
13
|
+
"H" => "\u062D","x" => "\u062E","d" => "\u062F","*" => "\u0630",
|
14
|
+
"r" => "\u0631","z" => "\u0632", "s" => "\u0633","$" => "\u0634","S" => "\u0635",
|
15
|
+
"D" => "\u0636","T" => "\u0637","Z" => "\u0638","E" => "\u0639","g" => "\u063A",
|
16
|
+
"_" => "\u0640","f" => "\u0641","q" => "\u0642","k" => "\u0643","l" => "\u0644",
|
17
|
+
"m" => "\u0645","n" => "\u0646","h" => "\u0647","w" => "\u0648","Y" => "\u0649","y" => "\u064A",
|
18
|
+
"F" => "\u064B","N" => "\u064C","K" => "\u064D","a" => "\u064E","u" => "\u064F","i" => "\u0650",
|
19
|
+
"~" => "\u0651", "o" => "\u0652", "`" => "\u0670","{" => "\u0671","P" => "\u067E","J" => "\u0686",
|
20
|
+
"V" => "\u06A4", "G" => "\u06AF", "R" => "\u0698" ,"," => "\u060C" , ";" => "\u061B" , "?" => "\u061F"
|
21
|
+
}
|
22
|
+
|
23
|
+
# * Translate : Transilerate the Roman lettered word to Arabic Word
|
24
|
+
# * [word] Word String To be processed
|
25
|
+
# * @return transilerated word
|
26
|
+
#
|
27
|
+
def self.translate(word)
|
28
|
+
result = ""
|
29
|
+
word.force_encoding "UTF-8"
|
30
|
+
word.each_char{|char|
|
31
|
+
result+= TABLE[char] ? TABLE[char] : char
|
32
|
+
}
|
33
|
+
result
|
34
|
+
end
|
35
|
+
end
|
@@ -0,0 +1,20 @@
|
|
1
|
+
class Logger
|
2
|
+
|
3
|
+
|
4
|
+
attr_reader :verbose , :output
|
5
|
+
def initialize(verbose = nil , output = nil )
|
6
|
+
@verbose = verbose
|
7
|
+
@output = output
|
8
|
+
@stream = StringIO.new
|
9
|
+
end
|
10
|
+
|
11
|
+
def info string , require_verbose = false
|
12
|
+
@stream.puts(string) #if ( require_verbose && @verbose || ! require_verbose )
|
13
|
+
end
|
14
|
+
|
15
|
+
def log
|
16
|
+
return puts @stream.string if @output.nil?
|
17
|
+
File.open(@output , "w") { |f|
|
18
|
+
f.puts @stream.string }
|
19
|
+
end
|
20
|
+
end
|