espace-raramorph 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,592 @@
1
+ # A class to find the solution of the word
2
+ # Author:: eSpace technologies www.eSpace.com.eg
3
+ # Copyright:: 2008
4
+
5
+
6
+
7
+ class Solution
8
+
9
+ attr_reader :prefix, :stem, :suffix, :cnt
10
+ @@ends_with_set_for_pos_one = Set.new(["CONJ","EMPHATIC_PARTICLE","FUNC_WORD",
11
+ "FUT_PART","INTERJ","INTERROG_PART","IV1S","IV2MS",
12
+ "IV2FS","IV3MS","IV3FS","IV2D","IV2FD","IV3MD","IV3FD",
13
+ "IV1P","IV2MP","IV2FP","IV3MP","IV3FP","NEG_PART",
14
+ "PREP","RESULT_CLAUSE_PARTICLE"])
15
+
16
+ @@ends_with_set_for_pos_two = Set.new(["CASE_INDEF_NOM","CASE_INDEF_ACC",
17
+ "CASE_INDEF_ACCGEN","CASE_INDEF_GEN" ,"CASE_DEF_NOM" ,
18
+ "CASE_DEF_ACC" ,"CASE_DEF_ACCGEN","CASE_DEF_GEN" ,
19
+ "NSUFF_MASC_SG_ACC_INDEF" ,"NSUFF_FEM_SG" ,"NSUFF_MASC_DU_NOM" ,
20
+ "NSUFF_MASC_DU_NOM_POSS" ,"NSUFF_MASC_DU_ACCGEN" ,
21
+ "NSUFF_MASC_DU_ACCGEN_POSS" ,"NSUFF_FEM_DU_NOM" ,
22
+ "NSUFF_FEM_DU_NOM_POSS" ,"NSUFF_FEM_DU_ACCGEN" ,
23
+ "NSUFF_FEM_DU_ACCGEN_POSS" ,"NSUFF_MASC_PL_NOM" ,
24
+ "NSUFF_MASC_PL_NOM_POSS" ,"NSUFF_MASC_PL_ACCGEN" ,
25
+ "NSUFF_MASC_PL_ACCGEN_POSS" ,"NSUFF_FEM_PL" ,"POSS_PRON_1S",
26
+ "POSS_PRON_2MS" ,"POSS_PRON_2FS" ,"POSS_PRON_3MS" ,
27
+ "POSS_PRON_3FS","POSS_PRON_2D" ,"POSS_PRON_3D" ,"POSS_PRON_1P",
28
+ "POSS_PRON_2MP" ,"POSS_PRON_2FP" ,"POSS_PRON_3MP" ,"POSS_PRON_3FP" ,
29
+ "IVSUFF_DO:1S" ,"IVSUFF_DO:2MS" ,"IVSUFF_DO:2FS" ,"IVSUFF_DO:3MS" ,
30
+ "IVSUFF_DO:3FS" ,"IVSUFF_DO:2D" ,"IVSUFF_DO:3D" ,"IVSUFF_DO:1P" ,
31
+ "IVSUFF_DO:2MP" ,"IVSUFF_DO:2FP" ,"IVSUFF_DO:3MP" ,"IVSUFF_DO:3FP" ,
32
+ "IVSUFF_MOOD:I" ,"IVSUFF_SUBJ:2FS_MOOD:I" ,"IVSUFF_SUBJ:D_MOOD:I" ,
33
+ "IVSUFF_SUBJ:3D_MOOD:I" ,"IVSUFF_SUBJ:MP_MOOD:I" ,"IVSUFF_MOOD:S",
34
+ "IVSUFF_SUBJ:2FS_MOOD:SJ" ,"IVSUFF_SUBJ:D_MOOD:SJ","IVSUFF_SUBJ:MP_MOOD:SJ" ,
35
+ "IVSUFF_SUBJ:3MP_MOOD:SJ" ,"IVSUFF_SUBJ:FP" ,"PVSUFF_DO:1S" ,"PVSUFF_DO:2MS" ,
36
+ "PVSUFF_DO:2FS" ,"PVSUFF_DO:3MS" ,"PVSUFF_DO:3FS" ,"PVSUFF_DO:2D" ,
37
+ "PVSUFF_DO:3D" ,"PVSUFF_DO:1P" ,"PVSUFF_DO:2MP" ,"PVSUFF_DO:2FP" ,
38
+ "PVSUFF_DO:3MP" ,"PVSUFF_DO:3FP" ,"PVSUFF_SUBJ:1S" ,"PVSUFF_SUBJ:2MS" ,
39
+ "PVSUFF_SUBJ:2FS" ,"PVSUFF_SUBJ:3MS" ,"PVSUFF_SUBJ:3FS" ,"PVSUFF_SUBJ:2MD" ,
40
+ "PVSUFF_SUBJ:2FD" ,"PVSUFF_SUBJ:3MD" ,"PVSUFF_SUBJ:3FD" ,"PVSUFF_SUBJ:1P" ,
41
+ "PVSUFF_SUBJ:2MP" ,"PVSUFF_SUBJ:2FP" ,"PVSUFF_SUBJ:3MP" ,"PVSUFF_SUBJ:3FP" ,
42
+ "CVSUFF_DO:1S" ,"CVSUFF_DO:3MS" ,"CVSUFF_DO:3FS" ,"CVSUFF_DO:3D" ,
43
+ "CVSUFF_DO:1P" ,"CVSUFF_DO:3MP" ,"CVSUFF_DO:3FP" ,"CVSUFF_SUBJ:2MS" ,
44
+ "CVSUFF_SUBJ:2FS" ,"CVSUFF_SUBJ:2MP"])
45
+ protected
46
+
47
+ # Constructs a solution for a word. Note that the prefix, stem and suffix combination is <b>recomputed</b>
48
+ #and may not necessarily match with the information provided by the dictionaries.
49
+ # * [debug] Whether or not the dictionnaries inconsistencies should be output
50
+ # * [cnt] Order in sequence ; not very useful actually
51
+ # * [prefix The prefix as provided by the prefixes dictionnary
52
+ # * [stem] The stem as provided by the stems dictionnary
53
+ # * [suffix] The suffix as provided by the suffixes dictionnary
54
+ #
55
+ def initialize(debug, cnt, prefix, stem, suffix)
56
+ # Whether or not the dictionnaries inconsistencies should be output
57
+ @debug = debug;
58
+ # The order in solutions' sequence.
59
+ @cnt = cnt;
60
+ # The dictionary entry of the prefix.
61
+ @prefix = prefix;
62
+ # The dictionary entry of the stem.
63
+ @stem = stem;
64
+ # The dictionary entry of the suffix.
65
+ @suffix = suffix;
66
+ # The prefixes POS.
67
+ @prefixesPOS = prefix.pos
68
+ #The stems POS.
69
+ @stemsPOS = stem.pos
70
+ #The suffixes POS.
71
+ @suffixesPOS = suffix.pos
72
+ #The prefixes glosses.
73
+ @prefixesGlosses = prefix.glosses
74
+ #The stems glosses
75
+ @stemsGlosses = stem.glosses
76
+ #The suffixes glosses.
77
+ @suffixesGlosses = suffix.glosses
78
+
79
+ puts "\"#{get_lemma()}\" : stem's sizes for POS (\"#{@stemsPOS.length.to_s}\") and GLOSS (\"#{@stemsGlosses.length.to_s}\") do not match" if (@stemsPOS.length != @stemsGlosses.length and @debug)
80
+
81
+ #Normalize stems since some of them can contain prefixes
82
+
83
+ while(@stemsPOS.length>0)
84
+ stemPOS = @stemsPOS.slice(0)
85
+
86
+ stemPOS.force_encoding "UTF-8" if(stemPOS)
87
+
88
+ if (@stemsGlosses.length>0)
89
+ stemGloss = @stemsGlosses.slice(0)
90
+ else
91
+ stemGloss = nil
92
+ end
93
+
94
+ stemGloss.force_encoding "UTF-8" if(stemGloss)
95
+
96
+
97
+ if(stemPOS.ends_with_suffix_set?(@@ends_with_set_for_pos_one) )
98
+ @stemsPOS.slice!(0)
99
+ @prefixesPOS.push(stemPOS)
100
+ if (stemGloss)
101
+ @stemsGlosses.slice!(0)
102
+ @prefixesGlosses.push(stemGloss)
103
+ end
104
+ else
105
+ break
106
+ end
107
+ end
108
+
109
+ #Normalize stems since some of them can contain suffixes
110
+ while(@stemsPOS.length>0)
111
+ stemPOS = @stemsPOS.slice(@stemsPOS.length-1)
112
+ if(stemPOS)
113
+ stemPOS.force_encoding "UTF-8"
114
+ end
115
+ if (@stemsGlosses.length>0)
116
+ stemGloss = @stemsGlosses.slice(@stemsGlosses.length-1)
117
+ else
118
+ stemGloss = nil
119
+ end
120
+ if(stemGloss)
121
+ stemGloss.force_encoding "UTF-8"
122
+ end
123
+
124
+ if(stemPOS.ends_with_suffix_set?(@@ends_with_set_for_pos_two))
125
+ @stemsPOS.slice!(@stemsPOS.length-1)
126
+ @suffixesPOS.insert(0,stemPOS)
127
+ if (stemGloss)
128
+ @stemsGlosses.slice!(@stemsGlosses.length-1)
129
+ @suffixesGlosses.insert(0,stemGloss)
130
+ end
131
+ else
132
+ break
133
+ end
134
+ end
135
+
136
+ #Normalization of bayon, bayona, bayoni
137
+ if (@stemsPOS.length > 1)
138
+ pos0 = @stemsPOS[0]
139
+ pos1 = @stemsPOS[1]
140
+ if(pos1=="bayon" or pos1=="bayona" or pos1=="bayoni")
141
+ if (@debug)
142
+ puts "Merging \""+pos1+"\" into first part of stem \"" + pos0 + "\""
143
+ end
144
+ array = pos0.split("/");
145
+ sb = array[0] + pos1+"/"
146
+ i=1
147
+ while( i < array.length)
148
+ sb+=array[i]
149
+ end
150
+ @stemsPOS.slice!(0)
151
+ @stemsPOS[0] = sb
152
+ end
153
+ end
154
+
155
+ # Sanity check
156
+ if (@stemsPOS.length > 1 and @debug)
157
+ puts"More than one stem for " + @stemsPOS.to_string()
158
+ end
159
+ end
160
+
161
+
162
+ # Returns the lemma id in the stems dictionary.
163
+ # * @return The lemma ID
164
+ #
165
+ def get_lemma
166
+ x = Regexp.compile("(_|-).*$")
167
+ @stem.lemma_id.sub(x,"")
168
+ end
169
+
170
+
171
+ # Returns the vocalizations of the <b>recomputed</b> prefixes in the Buckwalter transliteration system
172
+ # or <b>nil</b> if there are no prefixes for the word.
173
+ # * @return The vocalizations
174
+ #
175
+ def get_prefixes_vocalizations
176
+ vocalizations(false,@prefixesPOS,false)
177
+ end
178
+
179
+ # Returns the vocalizations of the <b>recomputed</b> prefixes in arabic
180
+ # or <b>nil</b> if there are no prefixes for the word.
181
+ # * @return The vocalizations
182
+ #
183
+ def get_prefixes_arabic_vocalizations
184
+ vocalizations(true,@prefixesPOS,false)
185
+ end
186
+
187
+ # Returns the vocalization of the <b>recomputed</b> stem in the Buckwalter transliteration system
188
+ # or <b>nil</b> if there is no stem for the word.
189
+ # * @return The vocalization
190
+ #
191
+ def get_stem_vocalization
192
+ vocalizations(false,@stemsPOS,true)
193
+ end
194
+
195
+ # Returns the vocalization of the <b>recomputed</b> stem in arabic
196
+ # or <b>nil</b> if there is no stem for the word.
197
+ # * @return The vocalization
198
+ #
199
+ def get_stem_arabic_vocalization
200
+ vocalizations(true,@stemsPOS,true)
201
+ end
202
+
203
+ # Returns the vocalizations of the <b>recomputed</b> suffixes in the Buckwalter transliteration system
204
+ # or <b>nil</b> if there are no suffixes for the word.
205
+ # * @return The vocalizations
206
+ #
207
+ def get_suffixes_vocalizations
208
+ vocalizations(false,@suffixesPOS,false)
209
+ end
210
+
211
+ # Returns the vocalizations of the <b>recomputed</b> suffixes in arabic
212
+ # or <b>nil</b> if there are no suffixes for the word.
213
+ # * @return The vocalizations
214
+ #
215
+ def get_suffixes_arabic_vocalizations
216
+ vocalizations(true,@suffixesPOS,false)
217
+ end
218
+
219
+
220
+ # Returns the vocalization of the word in the Buckwalter transliteration system.
221
+ # * @return The vocalization
222
+ #
223
+ def get_word_vocalization
224
+ sb = ""
225
+ sb.force_encoding "UTF-8"
226
+ vocal = get_prefixes_vocalizations()
227
+ if(vocal!=nil)
228
+ sb += vocal[0].to_s
229
+ end
230
+
231
+ s =get_stem_vocalization()
232
+ if ( s != nil)
233
+ sb+=s
234
+ end
235
+ vocal =get_suffixes_vocalizations()
236
+ if(vocal!=nil)
237
+ sb += vocal[0].to_s
238
+ end
239
+
240
+ return sb
241
+ end
242
+
243
+ # Returns the vocalization of the word in arabic.
244
+ # * @return The vocalization
245
+ #
246
+ def get_word_arabic_vocalization
247
+ sb = ""
248
+ sb.force_encoding "UTF-8"
249
+ vocal = get_prefixes_arabic_vocalizations()
250
+ sb += vocal[0].to_s if vocal!=nil
251
+
252
+ s = get_stem_arabic_vocalization()
253
+ sb+=s if s!= nil
254
+ vocal = get_suffixes_arabic_vocalizations()
255
+ if(vocal!=nil)
256
+ sb += vocal[0].to_s
257
+ end
258
+
259
+ return sb
260
+ end
261
+
262
+ # Returns the morphology of the prefix.
263
+ # * @return The morphology
264
+ #
265
+ def get_prefix_morphology
266
+ @prefix.morphology
267
+ end
268
+
269
+ # Returns the morphology of the stem.
270
+ # * @return The morphology
271
+ #
272
+ def get_stem_morphology
273
+ @stem.morphology
274
+ end
275
+
276
+ # Returns the morphology of the suffix.
277
+ # * @return The morphology
278
+ #
279
+ def get_suffix_morphology
280
+ @suffix.morphology
281
+ end
282
+
283
+ # Returns the morphology of the word.
284
+ # * @return The morphology
285
+ #
286
+ def get_word_morphology
287
+ sb = ""
288
+ sb.force_encoding "UTF-8"
289
+ if (!@prefix.morphology.empty? and @prefix.morphology != nil )
290
+ sb+= "\tprefix : #{@prefix.morphology}\n"
291
+ end
292
+ if (!@stem.morphology.empty? and @stem.morphology != nil)
293
+ sb+= "\tstem : #{@stem.morphology}\n"
294
+ end
295
+ if (!@suffix.morphology.empty? and @suffix.morphology != nil)
296
+ sb+= "\tsuffix : #{@suffix.morphology}\n"
297
+ end
298
+ return sb
299
+ end
300
+
301
+ # Returns the grammatical categories of the <b>recomputed</b> prefixes
302
+ # or <b>nil</b> if there are no prefixes for the word.
303
+ # * @return The grammatical categories
304
+ #
305
+ def get_prefixes_POS
306
+ perform_on_POS(1,@prefixesPOS,1)
307
+ end
308
+
309
+ # Returns The vocalizations using the Buckwalter transliteration system of the <b>recomputed</b> prefixes and their grammatical categories
310
+ # or <b>nil</b> if there are no prefixes for the word.
311
+ # * @return The vocalizations and the grammatical categories
312
+ #
313
+ def get_prefixes_long_POS
314
+ perform_on_POS(2,@prefixesPOS,1)
315
+ end
316
+
317
+ # Returns The vocalizations in arabic of the <b>recomputed</b> prefixes and their grammatical categories
318
+ # or <b>nil</b> if there is no stem for the word.
319
+ # * @return The vocalizations and the grammatical categories.
320
+ #
321
+ def get_prefixes_arabic_long_POS
322
+ perform_on_POS(3,@prefixesPOS,1)
323
+ end
324
+
325
+ # Returns the grammatical category of the <b>recomputed</b> stem.
326
+ # * @return The grammatical category
327
+ #
328
+ def get_stem_POS
329
+ perform_on_POS(1,@stemsPOS,2)
330
+ end
331
+
332
+ # Returns The vocalization using the Buckwalter transliteration system of the <b>recomputed</b> stem and its grammatical category
333
+ # or <b>nil</b> if there is no stem for the word.
334
+ # * @return The vocalizations and the grammatical categories.
335
+ #
336
+ def get_stem_long_POS
337
+ perform_on_POS(2,@stemsPOS,2)
338
+ end
339
+
340
+ # Returns The vocalization in arabic of the <b>recomputed</b> stem and its grammatical category
341
+ # or <b>nil</b> if there is no stem for the word.
342
+ # * @return The vocalizations and the grammatical categories.
343
+ #
344
+ def get_stem_arabic_long_POS
345
+ perform_on_POS(3,@stemsPOS,2)
346
+ end
347
+
348
+ # Returns The vocalization in arabic of the <b>recomputed</b> stem and its grammatical category
349
+ # or <b>nil</b> if there is no stem for the word.
350
+ # * @return The grammatical categories
351
+ #
352
+ def get_suffixes_POS
353
+ perform_on_POS(1,@suffixesPOS,3)
354
+ end
355
+
356
+ # Returns The vocalizations using the Buckwalter transliteration system of the <b>recomputed</b> stem and its grammatical category
357
+ # or <b>nil</b> if there is no stem for the word.
358
+ # * @return The vocalizations and the grammatical categories.
359
+ #
360
+ def get_suffixes_long_POS
361
+ perform_on_POS(2,@suffixesPOS,3)
362
+ end
363
+
364
+ # Returns The vocalization in arabic of the <b>recomputed</b> stem and its grammatical category
365
+ # or <b>nil</b> if there is no stem for the word.
366
+ # * @return The vocalizations and the grammatical categories.
367
+ #
368
+ def get_suffixes_arabic_long_POS
369
+ perform_on_POS(3,@suffixesPOS,3)
370
+ end
371
+
372
+ # Returns The vocalization of the word in the Buckwalter transliteration system and its grammatical categories.
373
+ # * @return The vocalization and the grammatical categories
374
+ #
375
+ def get_word_long_POS
376
+ word_POS(false)
377
+ end
378
+
379
+ # Returns The vocalization of the word in arabic and its grammatical categories.
380
+ # * @return The vocalization and the grammatical categories
381
+ #
382
+ def get_word_arabic_long_POS
383
+ word_POS(true)
384
+ end
385
+
386
+ # Returns the english glosses of the prefixes.
387
+ # * @return The glosses.
388
+ #
389
+ def get_prefixes_glosses
390
+ if(@prefixesGlosses.empty?)
391
+ return nil
392
+ else
393
+ return @prefixesGlosses
394
+ end
395
+ end
396
+
397
+ # Returns the english gloss of the stem.
398
+ # * @return The gloss.
399
+ #
400
+ def get_stem_gloss
401
+ if (@stemsGlosses.empty?)
402
+ return nil
403
+ end
404
+ if ((@stemsGlosses.length > 1) and @debug)
405
+ puts "More than one gloss for " + @stemsGlosses.to_s
406
+ end
407
+ #return the first anyway :-(
408
+ return @stemsGlosses[0]
409
+
410
+ end
411
+
412
+ # Returns the english glosses of the suffixes.
413
+ # * @return The glosses.
414
+ #
415
+ def get_suffixes_glosses
416
+ if(@suffixesGlosses.empty?)
417
+ return nil
418
+ else
419
+ return @suffixesGlosses
420
+ end
421
+ end
422
+
423
+ # Returns the english glosses of the word.
424
+ # * @return The glosses.
425
+ #
426
+ def get_word_glosses
427
+ sb = ""
428
+ sb.force_encoding "UTF-8"
429
+ glosses = get_prefixes_glosses()
430
+ if (glosses and glosses[0] != nil)
431
+ sb+=("\tprefix : #{glosses[0].gsub(";","/")}\n")
432
+ end
433
+ if (get_stem_gloss() != nil)
434
+ sb+=("\tstem : #{get_stem_gloss().gsub(";","/")}\n")
435
+ end
436
+ glosses = get_suffixes_glosses()
437
+ if (glosses and glosses[0] != nil)
438
+ sb+=("\tsuffix : #{glosses[0].gsub(";","/")}\n")
439
+ end
440
+ return sb
441
+ end
442
+
443
+ # Returns a string representation of how the word can be analyzed using the Buckwalter transliteration system for the vocalizations.
444
+ # * @return The representation
445
+ #
446
+ public
447
+ def to_s
448
+ ret = ""
449
+ ret.force_encoding "UTF-8"
450
+ ret = "\n SOLUTION # #{ @cnt.to_s} \n Lemma : #{ get_lemma() } \n
451
+ Vocalized as : \t #{get_word_vocalization()} \n
452
+ Morphology : \n #{ get_word_morphology()}
453
+ Grammatical category : \n
454
+ #{get_word_long_POS()} Glossed as : \n
455
+ #{get_word_glosses()} "
456
+ ret
457
+ end
458
+
459
+ # Returns a string representation of how the word can be analyzed using arabic for the vocalizations..
460
+ # * @return The representation
461
+ #
462
+ def to_arabized_string
463
+ ret = ""
464
+ ret.force_encoding "UTF-8"
465
+ ret = "\n SOLUTION # #{ @cnt.to_s} \n Lemma : #{ get_lemma() } \n
466
+ Vocalized as : \t #{get_word_arabic_vocalization()} \n
467
+ Morphology : \n #{ get_word_morphology()}
468
+ Grammatical category : \n
469
+ #{get_word_arabic_long_POS()} Glossed as : \n
470
+ #{get_word_glosses()} "
471
+ ret
472
+ end
473
+
474
+ private
475
+
476
+ # Returns an array of vocalizations according to type specified in the given parameters
477
+ # * [arabic] Whether or not vocalization is for arabic
478
+ # * [arr] The array utilized, either of prefixes, stems, suffixes
479
+ # * [one] Whether or not we are manipulating single vocalization (only true for stem vocalizations, false for suffixes and prefixes)
480
+ #
481
+ def vocalizations(arabic, arr, one)
482
+ if (arr.empty?)
483
+ return nil
484
+ end
485
+ vocalizations = []
486
+ arr.each do |pos|
487
+ array = pos.split("/")
488
+ if(arabic)
489
+ sb = LatinArabicTranslator.translate(array[0])
490
+ sb.force_encoding "UTF-8"
491
+ vocalizations << sb
492
+ else
493
+ vocalizations << array[0]
494
+ end
495
+ end
496
+ if(one)
497
+ if ( (vocalizations.length > 1) and @debug)
498
+ puts "More than one stem for " + vocalizations.to_s
499
+ end
500
+ return vocalizations[0]
501
+ else
502
+ return vocalizations
503
+ end
504
+ end
505
+
506
+ # Returns an array of vocalizations according to type specified in the given parameters
507
+ # * [type] Specifies the type of the function to perform, (1 for regular, 2 for long, 3 for arabic)
508
+ # * [arr] The array utilized, either of prefixes, stems, suffixes
509
+ # * [pre_stem_suff] Specifying which type of arrays are being handled (1 for prefixes, 2 for stems, 3 for suffixes)
510
+ #
511
+ def perform_on_POS(type, arr, pre_stem_suff)
512
+ if (arr.empty?)
513
+ return nil
514
+ end
515
+ temp_POS = []
516
+ arr.each do |pos|
517
+ array = pos.split("/")
518
+ j=1
519
+ if(type==1)
520
+ sb = ""
521
+ elsif(type==2)
522
+ sb = "#{array[0]}\t"
523
+ else
524
+ sb = "#{LatinArabicTranslator.translate(array[0])}\t"
525
+ sb.force_encoding "UTF-8"
526
+ end
527
+ sb << array[1..array.length].join(" / ")
528
+ temp_POS.push(sb)
529
+ end
530
+
531
+ if(pre_stem_suff==2)
532
+ if ((temp_POS.length > 1) and @debug)
533
+ puts "More than one stem for #{temp_POS.to_s}"
534
+ end
535
+ if (type ==1 and temp_POS[0].empty?)
536
+ puts "Empty POS for stem #{get_stem_long_POS()}"
537
+ end
538
+ #return the first anyway :-(
539
+ return temp_POS[0]
540
+ else
541
+ return temp_POS
542
+ end
543
+ end
544
+
545
+ # Returns the vocalizations and the grammatical categories
546
+ # * [arabic] Boolean to choose, Buckwalter transliteration system or arabic
547
+ #
548
+ def word_POS(arabic)
549
+ sb=""
550
+ if(arabic)
551
+ temp_POS =get_prefixes_arabic_long_POS()
552
+ else
553
+ temp_POS =get_prefixes_long_POS()
554
+ end
555
+ if (temp_POS != nil)
556
+ if (temp_POS[0]!=nil)
557
+ sb << ("\tprefix : #{temp_POS[0]}\n")
558
+ end
559
+ end
560
+ if(arabic)
561
+ s = get_stem_arabic_long_POS()
562
+ else
563
+ s = get_stem_long_POS()
564
+ end
565
+ if ( s != nil)
566
+ sb << ("\tstem : #{s} \n")
567
+ end
568
+ if(arabic)
569
+ temp_POS =get_suffixes_arabic_long_POS()
570
+ else
571
+ temp_POS =get_suffixes_long_POS()
572
+ end
573
+ if (temp_POS != nil)
574
+ if (temp_POS[0]!=nil)
575
+ sb << ("\tsuffix : #{temp_POS[0]}\n")
576
+ end
577
+ end
578
+ return sb
579
+ end
580
+
581
+ end
582
+
583
+ class String
584
+ def ends_with_suffix_set?(ends_with_suffix_set)
585
+ length = self.length
586
+ length.times { |i|
587
+ return true if ends_with_suffix_set.member?(self[i..length])
588
+
589
+ }
590
+ return false
591
+ end
592
+ end
@@ -0,0 +1,40 @@
1
+ # Class For Translation
2
+ # Author:: eSpace technologies www.eSpace.com.eg
3
+ # Copyright:: 2008
4
+
5
+ class Translator
6
+
7
+ TABLE = { "ہ"=>"A" , "ء"=>"A","آ"=>"A" ,"أ"=>"A","ؤ"=>"A", "إ"=>"A",
8
+ "ا"=>"C" ,
9
+ "ب"=>"E", "ة"=>"E" , "ت"=>"E" , "ث"=>"E",
10
+ "ج"=>"I" , "ح"=>"I" , "خ"=>"I" , "د"=>"I",
11
+ "ر"=>"N" ,
12
+ "ز"=>"O" , "س"=>"O" , "ش"=>"O" , "ص"=>"O" , "ض"=>"O" ,
13
+ "ظ"=>"U" , "ع"=>"U" , "غ"=>"U" , "ـ"=>"U" ,
14
+ "à"=>"a" , "ل"=>"a" , "â"=>"a" , "م"=>"a" , "ن"=>"a" , "ه"=>"a" ,
15
+ "ç"=>"c" ,
16
+ "è"=>"e" , "é"=>"e" , "ê"=>"e" , "ë"=>"e" ,
17
+ "ى"=>"i" , "ي"=>"i" , "î"=>"i" , "ï"=>"i" ,
18
+ "ٌ"=>"n" ,
19
+ "ٍ"=>"o" , "َ"=>"o" , "ô"=>"o" , "ُ"=>"o" , "ِ"=>"o" ,
20
+ "ù"=>"u" , "ْ"=>"u" , "û"=>"u" , "ü"=>"u" ,
21
+ "ئ"=>"AE" , "ٹ"=>"Sh" , "ژ"=>"Zh" , "ك"=>"ss" , "و"=>"ae" , "ڑ"=>"sh" , "‍"=>"zh" }
22
+
23
+ # * Translate The String
24
+ def translate(string)
25
+ result = ""
26
+ i = 0
27
+ ## IF non Utf8 Char return
28
+ return string unless string.length % 2 ==0
29
+ while i < string.length-1
30
+ char = string[i..i+1]
31
+ result+= TABLE[char].nil? ? char : TABLE[char]
32
+ i+=2
33
+ end
34
+ result
35
+ end
36
+
37
+ def table(str)
38
+ TABLE[str]
39
+ end
40
+ end
data/lib/raramorph.rb ADDED
@@ -0,0 +1,16 @@
1
+ #Dir[File.join(File.dirname(__FILE__), 'raramorph/**/*.rb')].sort.each { |lib| require lib }
2
+
3
+ $:.unshift File.expand_path(File.dirname(__FILE__) )
4
+ start = Time.now
5
+ require 'set'
6
+ require 'stringio'
7
+ require 'raramorph/logger'
8
+ require 'raramorph/translator'
9
+ require 'raramorph/arabic_latin_translator'
10
+ require 'raramorph/latin_arabic_translator'
11
+ require 'raramorph/in_memory_dictionary_handler'
12
+ require 'raramorph/in_memory_solutions_handler'
13
+ require 'raramorph/solution'
14
+ require 'raramorph/dictionary_entry'
15
+ require 'raramorph/raramorph'
16
+ puts "Time Elapsed loading dictionaries= " + ( Time.now - start).to_s
@@ -0,0 +1,34 @@
1
+ # ARGV[0] # Input File Name
2
+ # ARGV[1] # Outpute File Name
3
+ # ARGV[2] # Verbose Default False
4
+ # ARGV[4] # BuckWalter Default False ( Arabic Output)
5
+ $:.unshift File.expand_path(File.dirname(__FILE__) )
6
+ if ARGV.length >= 2 and ARGV.length <= 4
7
+ require 'raramorph'
8
+ start = Time.now
9
+ verbose = false
10
+ not_arabic = true
11
+ verbose = true if ARGV[2] and ARGV[2] == "-v"
12
+ not_arabic = false if ARGV[3] and ARGV[3] == "-a"
13
+ not_arabic = false if ARGV[2] and ARGV[2] == "-a"
14
+ Raramorph.execute(ARGV[0] , ARGV[1] , verbose , not_arabic )
15
+ puts "Time Elapsed= " + ( Time.now - start).to_s
16
+ else
17
+ puts("Arabic Morphological Analyzer for Ruby")
18
+ puts("Ported to Ruby by Moustafa Emara and Hany Salah El din , eSpace-technologies.(www.espace.com.eg) , 2008.")
19
+ puts("Based on :")
20
+ puts("BUCKWALTER ARABIC MORPHOLOGICAL ANALYZER")
21
+ puts("This program is developed under the MIT-Licences")
22
+ puts("Usage :")
23
+ puts("")
24
+ puts("raraMorph inFile [inEncoding] [outFile] [-v] [-a]")
25
+ puts("")
26
+ puts("inFile : file to be analyzed")
27
+ puts("inEncoding : encoding for inFile, default UTF-8")
28
+ puts("outFile : result file ")
29
+ puts("-v : verbose mode")
30
+ puts("-a : Aarbic Output" )
31
+ end
32
+
33
+
34
+