mosta-raramorph 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,592 @@
1
+ # A class to find the solution of the word
2
+ # Author:: eSpace technologies www.eSpace.com.eg
3
+ # Copyright:: 2008
4
+
5
+
6
+
7
+ class Solution
8
+
9
+ attr_reader :prefix, :stem, :suffix, :cnt
10
+ @@ends_with_set_for_pos_one = Set.new(["CONJ","EMPHATIC_PARTICLE","FUNC_WORD",
11
+ "FUT_PART","INTERJ","INTERROG_PART","IV1S","IV2MS",
12
+ "IV2FS","IV3MS","IV3FS","IV2D","IV2FD","IV3MD","IV3FD",
13
+ "IV1P","IV2MP","IV2FP","IV3MP","IV3FP","NEG_PART",
14
+ "PREP","RESULT_CLAUSE_PARTICLE"])
15
+
16
+ @@ends_with_set_for_pos_two = Set.new(["CASE_INDEF_NOM","CASE_INDEF_ACC",
17
+ "CASE_INDEF_ACCGEN","CASE_INDEF_GEN" ,"CASE_DEF_NOM" ,
18
+ "CASE_DEF_ACC" ,"CASE_DEF_ACCGEN","CASE_DEF_GEN" ,
19
+ "NSUFF_MASC_SG_ACC_INDEF" ,"NSUFF_FEM_SG" ,"NSUFF_MASC_DU_NOM" ,
20
+ "NSUFF_MASC_DU_NOM_POSS" ,"NSUFF_MASC_DU_ACCGEN" ,
21
+ "NSUFF_MASC_DU_ACCGEN_POSS" ,"NSUFF_FEM_DU_NOM" ,
22
+ "NSUFF_FEM_DU_NOM_POSS" ,"NSUFF_FEM_DU_ACCGEN" ,
23
+ "NSUFF_FEM_DU_ACCGEN_POSS" ,"NSUFF_MASC_PL_NOM" ,
24
+ "NSUFF_MASC_PL_NOM_POSS" ,"NSUFF_MASC_PL_ACCGEN" ,
25
+ "NSUFF_MASC_PL_ACCGEN_POSS" ,"NSUFF_FEM_PL" ,"POSS_PRON_1S",
26
+ "POSS_PRON_2MS" ,"POSS_PRON_2FS" ,"POSS_PRON_3MS" ,
27
+ "POSS_PRON_3FS","POSS_PRON_2D" ,"POSS_PRON_3D" ,"POSS_PRON_1P",
28
+ "POSS_PRON_2MP" ,"POSS_PRON_2FP" ,"POSS_PRON_3MP" ,"POSS_PRON_3FP" ,
29
+ "IVSUFF_DO:1S" ,"IVSUFF_DO:2MS" ,"IVSUFF_DO:2FS" ,"IVSUFF_DO:3MS" ,
30
+ "IVSUFF_DO:3FS" ,"IVSUFF_DO:2D" ,"IVSUFF_DO:3D" ,"IVSUFF_DO:1P" ,
31
+ "IVSUFF_DO:2MP" ,"IVSUFF_DO:2FP" ,"IVSUFF_DO:3MP" ,"IVSUFF_DO:3FP" ,
32
+ "IVSUFF_MOOD:I" ,"IVSUFF_SUBJ:2FS_MOOD:I" ,"IVSUFF_SUBJ:D_MOOD:I" ,
33
+ "IVSUFF_SUBJ:3D_MOOD:I" ,"IVSUFF_SUBJ:MP_MOOD:I" ,"IVSUFF_MOOD:S",
34
+ "IVSUFF_SUBJ:2FS_MOOD:SJ" ,"IVSUFF_SUBJ:D_MOOD:SJ","IVSUFF_SUBJ:MP_MOOD:SJ" ,
35
+ "IVSUFF_SUBJ:3MP_MOOD:SJ" ,"IVSUFF_SUBJ:FP" ,"PVSUFF_DO:1S" ,"PVSUFF_DO:2MS" ,
36
+ "PVSUFF_DO:2FS" ,"PVSUFF_DO:3MS" ,"PVSUFF_DO:3FS" ,"PVSUFF_DO:2D" ,
37
+ "PVSUFF_DO:3D" ,"PVSUFF_DO:1P" ,"PVSUFF_DO:2MP" ,"PVSUFF_DO:2FP" ,
38
+ "PVSUFF_DO:3MP" ,"PVSUFF_DO:3FP" ,"PVSUFF_SUBJ:1S" ,"PVSUFF_SUBJ:2MS" ,
39
+ "PVSUFF_SUBJ:2FS" ,"PVSUFF_SUBJ:3MS" ,"PVSUFF_SUBJ:3FS" ,"PVSUFF_SUBJ:2MD" ,
40
+ "PVSUFF_SUBJ:2FD" ,"PVSUFF_SUBJ:3MD" ,"PVSUFF_SUBJ:3FD" ,"PVSUFF_SUBJ:1P" ,
41
+ "PVSUFF_SUBJ:2MP" ,"PVSUFF_SUBJ:2FP" ,"PVSUFF_SUBJ:3MP" ,"PVSUFF_SUBJ:3FP" ,
42
+ "CVSUFF_DO:1S" ,"CVSUFF_DO:3MS" ,"CVSUFF_DO:3FS" ,"CVSUFF_DO:3D" ,
43
+ "CVSUFF_DO:1P" ,"CVSUFF_DO:3MP" ,"CVSUFF_DO:3FP" ,"CVSUFF_SUBJ:2MS" ,
44
+ "CVSUFF_SUBJ:2FS" ,"CVSUFF_SUBJ:2MP"])
45
+ protected
46
+
47
+ # Constructs a solution for a word. Note that the prefix, stem and suffix combination is <b>recomputed</b>
48
+ #and may not necessarily match with the information provided by the dictionaries.
49
+ # * [debug] Whether or not the dictionnaries inconsistencies should be output
50
+ # * [cnt] Order in sequence ; not very useful actually
51
+ # * [prefix The prefix as provided by the prefixes dictionnary
52
+ # * [stem] The stem as provided by the stems dictionnary
53
+ # * [suffix] The suffix as provided by the suffixes dictionnary
54
+ #
55
+ def initialize(debug, cnt, prefix, stem, suffix)
56
+ # Whether or not the dictionnaries inconsistencies should be output
57
+ @debug = debug;
58
+ # The order in solutions' sequence.
59
+ @cnt = cnt;
60
+ # The dictionary entry of the prefix.
61
+ @prefix = prefix;
62
+ # The dictionary entry of the stem.
63
+ @stem = stem;
64
+ # The dictionary entry of the suffix.
65
+ @suffix = suffix;
66
+ # The prefixes POS.
67
+ @prefixesPOS = prefix.pos
68
+ #The stems POS.
69
+ @stemsPOS = stem.pos
70
+ #The suffixes POS.
71
+ @suffixesPOS = suffix.pos
72
+ #The prefixes glosses.
73
+ @prefixesGlosses = prefix.glosses
74
+ #The stems glosses
75
+ @stemsGlosses = stem.glosses
76
+ #The suffixes glosses.
77
+ @suffixesGlosses = suffix.glosses
78
+
79
+ puts "\"#{get_lemma()}\" : stem's sizes for POS (\"#{@stemsPOS.length.to_s}\") and GLOSS (\"#{@stemsGlosses.length.to_s}\") do not match" if (@stemsPOS.length != @stemsGlosses.length and @debug)
80
+
81
+ #Normalize stems since some of them can contain prefixes
82
+
83
+ while(@stemsPOS.length>0)
84
+ stemPOS = @stemsPOS.slice(0)
85
+
86
+ stemPOS.force_encoding "UTF-8" if(stemPOS)
87
+
88
+ if (@stemsGlosses.length>0)
89
+ stemGloss = @stemsGlosses.slice(0)
90
+ else
91
+ stemGloss = nil
92
+ end
93
+
94
+ stemGloss.force_encoding "UTF-8" if(stemGloss)
95
+
96
+
97
+ if(stemPOS.ends_with_suffix_set?(@@ends_with_set_for_pos_one) )
98
+ @stemsPOS.slice!(0)
99
+ @prefixesPOS.push(stemPOS)
100
+ if (stemGloss)
101
+ @stemsGlosses.slice!(0)
102
+ @prefixesGlosses.push(stemGloss)
103
+ end
104
+ else
105
+ break
106
+ end
107
+ end
108
+
109
+ #Normalize stems since some of them can contain suffixes
110
+ while(@stemsPOS.length>0)
111
+ stemPOS = @stemsPOS.slice(@stemsPOS.length-1)
112
+ if(stemPOS)
113
+ stemPOS.force_encoding "UTF-8"
114
+ end
115
+ if (@stemsGlosses.length>0)
116
+ stemGloss = @stemsGlosses.slice(@stemsGlosses.length-1)
117
+ else
118
+ stemGloss = nil
119
+ end
120
+ if(stemGloss)
121
+ stemGloss.force_encoding "UTF-8"
122
+ end
123
+
124
+ if(stemPOS.ends_with_suffix_set?(@@ends_with_set_for_pos_two))
125
+ @stemsPOS.slice!(@stemsPOS.length-1)
126
+ @suffixesPOS.insert(0,stemPOS)
127
+ if (stemGloss)
128
+ @stemsGlosses.slice!(@stemsGlosses.length-1)
129
+ @suffixesGlosses.insert(0,stemGloss)
130
+ end
131
+ else
132
+ break
133
+ end
134
+ end
135
+
136
+ #Normalization of bayon, bayona, bayoni
137
+ if (@stemsPOS.length > 1)
138
+ pos0 = @stemsPOS[0]
139
+ pos1 = @stemsPOS[1]
140
+ if(pos1=="bayon" or pos1=="bayona" or pos1=="bayoni")
141
+ if (@debug)
142
+ puts "Merging \""+pos1+"\" into first part of stem \"" + pos0 + "\""
143
+ end
144
+ array = pos0.split("/");
145
+ sb = array[0] + pos1+"/"
146
+ i=1
147
+ while( i < array.length)
148
+ sb+=array[i]
149
+ end
150
+ @stemsPOS.slice!(0)
151
+ @stemsPOS[0] = sb
152
+ end
153
+ end
154
+
155
+ # Sanity check
156
+ if (@stemsPOS.length > 1 and @debug)
157
+ puts"More than one stem for " + @stemsPOS.to_string()
158
+ end
159
+ end
160
+
161
+
162
+ # Returns the lemma id in the stems dictionary.
163
+ # * @return The lemma ID
164
+ #
165
+ def get_lemma
166
+ x = Regexp.compile("(_|-).*$")
167
+ @stem.lemma_id.sub(x,"")
168
+ end
169
+
170
+
171
+ # Returns the vocalizations of the <b>recomputed</b> prefixes in the Buckwalter transliteration system
172
+ # or <b>nil</b> if there are no prefixes for the word.
173
+ # * @return The vocalizations
174
+ #
175
+ def get_prefixes_vocalizations
176
+ vocalizations(false,@prefixesPOS,false)
177
+ end
178
+
179
+ # Returns the vocalizations of the <b>recomputed</b> prefixes in arabic
180
+ # or <b>nil</b> if there are no prefixes for the word.
181
+ # * @return The vocalizations
182
+ #
183
+ def get_prefixes_arabic_vocalizations
184
+ vocalizations(true,@prefixesPOS,false)
185
+ end
186
+
187
+ # Returns the vocalization of the <b>recomputed</b> stem in the Buckwalter transliteration system
188
+ # or <b>nil</b> if there is no stem for the word.
189
+ # * @return The vocalization
190
+ #
191
+ def get_stem_vocalization
192
+ vocalizations(false,@stemsPOS,true)
193
+ end
194
+
195
+ # Returns the vocalization of the <b>recomputed</b> stem in arabic
196
+ # or <b>nil</b> if there is no stem for the word.
197
+ # * @return The vocalization
198
+ #
199
+ def get_stem_arabic_vocalization
200
+ vocalizations(true,@stemsPOS,true)
201
+ end
202
+
203
+ # Returns the vocalizations of the <b>recomputed</b> suffixes in the Buckwalter transliteration system
204
+ # or <b>nil</b> if there are no suffixes for the word.
205
+ # * @return The vocalizations
206
+ #
207
+ def get_suffixes_vocalizations
208
+ vocalizations(false,@suffixesPOS,false)
209
+ end
210
+
211
+ # Returns the vocalizations of the <b>recomputed</b> suffixes in arabic
212
+ # or <b>nil</b> if there are no suffixes for the word.
213
+ # * @return The vocalizations
214
+ #
215
+ def get_suffixes_arabic_vocalizations
216
+ vocalizations(true,@suffixesPOS,false)
217
+ end
218
+
219
+
220
+ # Returns the vocalization of the word in the Buckwalter transliteration system.
221
+ # * @return The vocalization
222
+ #
223
+ def get_word_vocalization
224
+ sb = ""
225
+ sb.force_encoding "UTF-8"
226
+ vocal = get_prefixes_vocalizations()
227
+ if(vocal!=nil)
228
+ sb += vocal[0].to_s
229
+ end
230
+
231
+ s =get_stem_vocalization()
232
+ if ( s != nil)
233
+ sb+=s
234
+ end
235
+ vocal =get_suffixes_vocalizations()
236
+ if(vocal!=nil)
237
+ sb += vocal[0].to_s
238
+ end
239
+
240
+ return sb
241
+ end
242
+
243
+ # Returns the vocalization of the word in arabic.
244
+ # * @return The vocalization
245
+ #
246
+ def get_word_arabic_vocalization
247
+ sb = ""
248
+ sb.force_encoding "UTF-8"
249
+ vocal = get_prefixes_arabic_vocalizations()
250
+ sb += vocal[0].to_s if vocal!=nil
251
+
252
+ s = get_stem_arabic_vocalization()
253
+ sb+=s if s!= nil
254
+ vocal = get_suffixes_arabic_vocalizations()
255
+ if(vocal!=nil)
256
+ sb += vocal[0].to_s
257
+ end
258
+
259
+ return sb
260
+ end
261
+
262
+ # Returns the morphology of the prefix.
263
+ # * @return The morphology
264
+ #
265
+ def get_prefix_morphology
266
+ @prefix.morphology
267
+ end
268
+
269
+ # Returns the morphology of the stem.
270
+ # * @return The morphology
271
+ #
272
+ def get_stem_morphology
273
+ @stem.morphology
274
+ end
275
+
276
+ # Returns the morphology of the suffix.
277
+ # * @return The morphology
278
+ #
279
+ def get_suffix_morphology
280
+ @suffix.morphology
281
+ end
282
+
283
+ # Returns the morphology of the word.
284
+ # * @return The morphology
285
+ #
286
+ def get_word_morphology
287
+ sb = ""
288
+ sb.force_encoding "UTF-8"
289
+ if (!@prefix.morphology.empty? and @prefix.morphology != nil )
290
+ sb+= "\tprefix : #{@prefix.morphology}\n"
291
+ end
292
+ if (!@stem.morphology.empty? and @stem.morphology != nil)
293
+ sb+= "\tstem : #{@stem.morphology}\n"
294
+ end
295
+ if (!@suffix.morphology.empty? and @suffix.morphology != nil)
296
+ sb+= "\tsuffix : #{@suffix.morphology}\n"
297
+ end
298
+ return sb
299
+ end
300
+
301
+ # Returns the grammatical categories of the <b>recomputed</b> prefixes
302
+ # or <b>nil</b> if there are no prefixes for the word.
303
+ # * @return The grammatical categories
304
+ #
305
+ def get_prefixes_POS
306
+ perform_on_POS(1,@prefixesPOS,1)
307
+ end
308
+
309
+ # Returns The vocalizations using the Buckwalter transliteration system of the <b>recomputed</b> prefixes and their grammatical categories
310
+ # or <b>nil</b> if there are no prefixes for the word.
311
+ # * @return The vocalizations and the grammatical categories
312
+ #
313
+ def get_prefixes_long_POS
314
+ perform_on_POS(2,@prefixesPOS,1)
315
+ end
316
+
317
+ # Returns The vocalizations in arabic of the <b>recomputed</b> prefixes and their grammatical categories
318
+ # or <b>nil</b> if there is no stem for the word.
319
+ # * @return The vocalizations and the grammatical categories.
320
+ #
321
+ def get_prefixes_arabic_long_POS
322
+ perform_on_POS(3,@prefixesPOS,1)
323
+ end
324
+
325
+ # Returns the grammatical category of the <b>recomputed</b> stem.
326
+ # * @return The grammatical category
327
+ #
328
+ def get_stem_POS
329
+ perform_on_POS(1,@stemsPOS,2)
330
+ end
331
+
332
+ # Returns The vocalization using the Buckwalter transliteration system of the <b>recomputed</b> stem and its grammatical category
333
+ # or <b>nil</b> if there is no stem for the word.
334
+ # * @return The vocalizations and the grammatical categories.
335
+ #
336
+ def get_stem_long_POS
337
+ perform_on_POS(2,@stemsPOS,2)
338
+ end
339
+
340
+ # Returns The vocalization in arabic of the <b>recomputed</b> stem and its grammatical category
341
+ # or <b>nil</b> if there is no stem for the word.
342
+ # * @return The vocalizations and the grammatical categories.
343
+ #
344
+ def get_stem_arabic_long_POS
345
+ perform_on_POS(3,@stemsPOS,2)
346
+ end
347
+
348
+ # Returns The vocalization in arabic of the <b>recomputed</b> stem and its grammatical category
349
+ # or <b>nil</b> if there is no stem for the word.
350
+ # * @return The grammatical categories
351
+ #
352
+ def get_suffixes_POS
353
+ perform_on_POS(1,@suffixesPOS,3)
354
+ end
355
+
356
+ # Returns The vocalizations using the Buckwalter transliteration system of the <b>recomputed</b> stem and its grammatical category
357
+ # or <b>nil</b> if there is no stem for the word.
358
+ # * @return The vocalizations and the grammatical categories.
359
+ #
360
+ def get_suffixes_long_POS
361
+ perform_on_POS(2,@suffixesPOS,3)
362
+ end
363
+
364
+ # Returns The vocalization in arabic of the <b>recomputed</b> stem and its grammatical category
365
+ # or <b>nil</b> if there is no stem for the word.
366
+ # * @return The vocalizations and the grammatical categories.
367
+ #
368
+ def get_suffixes_arabic_long_POS
369
+ perform_on_POS(3,@suffixesPOS,3)
370
+ end
371
+
372
+ # Returns The vocalization of the word in the Buckwalter transliteration system and its grammatical categories.
373
+ # * @return The vocalization and the grammatical categories
374
+ #
375
+ def get_word_long_POS
376
+ word_POS(false)
377
+ end
378
+
379
+ # Returns The vocalization of the word in arabic and its grammatical categories.
380
+ # * @return The vocalization and the grammatical categories
381
+ #
382
+ def get_word_arabic_long_POS
383
+ word_POS(true)
384
+ end
385
+
386
+ # Returns the english glosses of the prefixes.
387
+ # * @return The glosses.
388
+ #
389
+ def get_prefixes_glosses
390
+ if(@prefixesGlosses.empty?)
391
+ return nil
392
+ else
393
+ return @prefixesGlosses
394
+ end
395
+ end
396
+
397
+ # Returns the english gloss of the stem.
398
+ # * @return The gloss.
399
+ #
400
+ def get_stem_gloss
401
+ if (@stemsGlosses.empty?)
402
+ return nil
403
+ end
404
+ if ((@stemsGlosses.length > 1) and @debug)
405
+ puts "More than one gloss for " + @stemsGlosses.to_s
406
+ end
407
+ #return the first anyway :-(
408
+ return @stemsGlosses[0]
409
+
410
+ end
411
+
412
+ # Returns the english glosses of the suffixes.
413
+ # * @return The glosses.
414
+ #
415
+ def get_suffixes_glosses
416
+ if(@suffixesGlosses.empty?)
417
+ return nil
418
+ else
419
+ return @suffixesGlosses
420
+ end
421
+ end
422
+
423
+ # Returns the english glosses of the word.
424
+ # * @return The glosses.
425
+ #
426
+ def get_word_glosses
427
+ sb = ""
428
+ sb.force_encoding "UTF-8"
429
+ glosses = get_prefixes_glosses()
430
+ if (glosses and glosses[0] != nil)
431
+ sb+=("\tprefix : #{glosses[0].gsub(";","/")}\n")
432
+ end
433
+ if (get_stem_gloss() != nil)
434
+ sb+=("\tstem : #{get_stem_gloss().gsub(";","/")}\n")
435
+ end
436
+ glosses = get_suffixes_glosses()
437
+ if (glosses and glosses[0] != nil)
438
+ sb+=("\tsuffix : #{glosses[0].gsub(";","/")}\n")
439
+ end
440
+ return sb
441
+ end
442
+
443
+ # Returns a string representation of how the word can be analyzed using the Buckwalter transliteration system for the vocalizations.
444
+ # * @return The representation
445
+ #
446
+ public
447
+ def to_s
448
+ ret = ""
449
+ ret.force_encoding "UTF-8"
450
+ ret = "\n SOLUTION # #{ @cnt.to_s} \n Lemma : #{ get_lemma() } \n
451
+ Vocalized as : \t #{get_word_vocalization()} \n
452
+ Morphology : \n #{ get_word_morphology()}
453
+ Grammatical category : \n
454
+ #{get_word_long_POS()} Glossed as : \n
455
+ #{get_word_glosses()} "
456
+ ret
457
+ end
458
+
459
+ # Returns a string representation of how the word can be analyzed using arabic for the vocalizations..
460
+ # * @return The representation
461
+ #
462
+ def to_arabized_string
463
+ ret = ""
464
+ ret.force_encoding "UTF-8"
465
+ ret = "\n SOLUTION # #{ @cnt.to_s} \n Lemma : #{ get_lemma() } \n
466
+ Vocalized as : \t #{get_word_arabic_vocalization()} \n
467
+ Morphology : \n #{ get_word_morphology()}
468
+ Grammatical category : \n
469
+ #{get_word_arabic_long_POS()} Glossed as : \n
470
+ #{get_word_glosses()} "
471
+ ret
472
+ end
473
+
474
+ private
475
+
476
+ # Returns an array of vocalizations according to type specified in the given parameters
477
+ # * [arabic] Whether or not vocalization is for arabic
478
+ # * [arr] The array utilized, either of prefixes, stems, suffixes
479
+ # * [one] Whether or not we are manipulating single vocalization (only true for stem vocalizations, false for suffixes and prefixes)
480
+ #
481
+ def vocalizations(arabic, arr, one)
482
+ if (arr.empty?)
483
+ return nil
484
+ end
485
+ vocalizations = []
486
+ arr.each do |pos|
487
+ array = pos.split("/")
488
+ if(arabic)
489
+ sb = LatinArabicTranslator.translate(array[0])
490
+ sb.force_encoding "UTF-8"
491
+ vocalizations << sb
492
+ else
493
+ vocalizations << array[0]
494
+ end
495
+ end
496
+ if(one)
497
+ if ( (vocalizations.length > 1) and @debug)
498
+ puts "More than one stem for " + vocalizations.to_s
499
+ end
500
+ return vocalizations[0]
501
+ else
502
+ return vocalizations
503
+ end
504
+ end
505
+
506
+ # Returns an array of vocalizations according to type specified in the given parameters
507
+ # * [type] Specifies the type of the function to perform, (1 for regular, 2 for long, 3 for arabic)
508
+ # * [arr] The array utilized, either of prefixes, stems, suffixes
509
+ # * [pre_stem_suff] Specifying which type of arrays are being handled (1 for prefixes, 2 for stems, 3 for suffixes)
510
+ #
511
+ def perform_on_POS(type, arr, pre_stem_suff)
512
+ if (arr.empty?)
513
+ return nil
514
+ end
515
+ temp_POS = []
516
+ arr.each do |pos|
517
+ array = pos.split("/")
518
+ j=1
519
+ if(type==1)
520
+ sb = ""
521
+ elsif(type==2)
522
+ sb = "#{array[0]}\t"
523
+ else
524
+ sb = "#{LatinArabicTranslator.translate(array[0])}\t"
525
+ sb.force_encoding "UTF-8"
526
+ end
527
+ sb << array[1..array.length].join(" / ")
528
+ temp_POS.push(sb)
529
+ end
530
+
531
+ if(pre_stem_suff==2)
532
+ if ((temp_POS.length > 1) and @debug)
533
+ puts "More than one stem for #{temp_POS.to_s}"
534
+ end
535
+ if (type ==1 and temp_POS[0].empty?)
536
+ puts "Empty POS for stem #{get_stem_long_POS()}"
537
+ end
538
+ #return the first anyway :-(
539
+ return temp_POS[0]
540
+ else
541
+ return temp_POS
542
+ end
543
+ end
544
+
545
+ # Returns the vocalizations and the grammatical categories
546
+ # * [arabic] Boolean to choose, Buckwalter transliteration system or arabic
547
+ #
548
+ def word_POS(arabic)
549
+ sb=""
550
+ if(arabic)
551
+ temp_POS =get_prefixes_arabic_long_POS()
552
+ else
553
+ temp_POS =get_prefixes_long_POS()
554
+ end
555
+ if (temp_POS != nil)
556
+ if (temp_POS[0]!=nil)
557
+ sb << ("\tprefix : #{temp_POS[0]}\n")
558
+ end
559
+ end
560
+ if(arabic)
561
+ s = get_stem_arabic_long_POS()
562
+ else
563
+ s = get_stem_long_POS()
564
+ end
565
+ if ( s != nil)
566
+ sb << ("\tstem : #{s} \n")
567
+ end
568
+ if(arabic)
569
+ temp_POS =get_suffixes_arabic_long_POS()
570
+ else
571
+ temp_POS =get_suffixes_long_POS()
572
+ end
573
+ if (temp_POS != nil)
574
+ if (temp_POS[0]!=nil)
575
+ sb << ("\tsuffix : #{temp_POS[0]}\n")
576
+ end
577
+ end
578
+ return sb
579
+ end
580
+
581
+ end
582
+
583
+ class String
584
+ def ends_with_suffix_set?(ends_with_suffix_set)
585
+ length = self.length
586
+ length.times { |i|
587
+ return true if ends_with_suffix_set.member?(self[i..length])
588
+
589
+ }
590
+ return false
591
+ end
592
+ end
@@ -0,0 +1,40 @@
1
+ # Class For Translation
2
+ # Author:: eSpace technologies www.eSpace.com.eg
3
+ # Copyright:: 2008
4
+
5
+ class Translator
6
+
7
+ TABLE = { "ہ"=>"A" , "ء"=>"A","آ"=>"A" ,"أ"=>"A","ؤ"=>"A", "إ"=>"A",
8
+ "ا"=>"C" ,
9
+ "ب"=>"E", "ة"=>"E" , "ت"=>"E" , "ث"=>"E",
10
+ "ج"=>"I" , "ح"=>"I" , "خ"=>"I" , "د"=>"I",
11
+ "ر"=>"N" ,
12
+ "ز"=>"O" , "س"=>"O" , "ش"=>"O" , "ص"=>"O" , "ض"=>"O" ,
13
+ "ظ"=>"U" , "ع"=>"U" , "غ"=>"U" , "ـ"=>"U" ,
14
+ "à"=>"a" , "ل"=>"a" , "â"=>"a" , "م"=>"a" , "ن"=>"a" , "ه"=>"a" ,
15
+ "ç"=>"c" ,
16
+ "è"=>"e" , "é"=>"e" , "ê"=>"e" , "ë"=>"e" ,
17
+ "ى"=>"i" , "ي"=>"i" , "î"=>"i" , "ï"=>"i" ,
18
+ "ٌ"=>"n" ,
19
+ "ٍ"=>"o" , "َ"=>"o" , "ô"=>"o" , "ُ"=>"o" , "ِ"=>"o" ,
20
+ "ù"=>"u" , "ْ"=>"u" , "û"=>"u" , "ü"=>"u" ,
21
+ "ئ"=>"AE" , "ٹ"=>"Sh" , "ژ"=>"Zh" , "ك"=>"ss" , "و"=>"ae" , "ڑ"=>"sh" , "‍"=>"zh" }
22
+
23
+ # * Translate The String
24
+ def translate(string)
25
+ result = ""
26
+ i = 0
27
+ ## IF non Utf8 Char return
28
+ return string unless string.length % 2 ==0
29
+ while i < string.length-1
30
+ char = string[i..i+1]
31
+ result+= TABLE[char].nil? ? char : TABLE[char]
32
+ i+=2
33
+ end
34
+ result
35
+ end
36
+
37
+ def table(str)
38
+ TABLE[str]
39
+ end
40
+ end
data/lib/raramorph.rb ADDED
@@ -0,0 +1,16 @@
1
+ #Dir[File.join(File.dirname(__FILE__), 'raramorph/**/*.rb')].sort.each { |lib| require lib }
2
+
3
+ $:.unshift File.expand_path(File.dirname(__FILE__) )
4
+ start = Time.now
5
+ require 'set'
6
+ require 'stringio'
7
+ require 'raramorph/logger'
8
+ require 'raramorph/translator'
9
+ require 'raramorph/arabic_latin_translator'
10
+ require 'raramorph/latin_arabic_translator'
11
+ require 'raramorph/in_memory_dictionary_handler'
12
+ require 'raramorph/in_memory_solutions_handler'
13
+ require 'raramorph/solution'
14
+ require 'raramorph/dictionary_entry'
15
+ require 'raramorph/raramorph'
16
+ puts "Time Elapsed loading dictionaries= " + ( Time.now - start).to_s
@@ -0,0 +1,34 @@
1
+ # ARGV[0] # Input File Name
2
+ # ARGV[1] # Outpute File Name
3
+ # ARGV[2] # Verbose Default False
4
+ # ARGV[4] # BuckWalter Default False ( Arabic Output)
5
+ $:.unshift File.expand_path(File.dirname(__FILE__) )
6
+ if ARGV.length >= 2 and ARGV.length <= 4
7
+ require 'raramorph'
8
+ start = Time.now
9
+ verbose = false
10
+ not_arabic = true
11
+ verbose = true if ARGV[2] and ARGV[2] == "-v"
12
+ not_arabic = false if ARGV[3] and ARGV[3] == "-a"
13
+ not_arabic = false if ARGV[2] and ARGV[2] == "-a"
14
+ Raramorph.execute(ARGV[0] , ARGV[1] , verbose , not_arabic )
15
+ puts "Time Elapsed= " + ( Time.now - start).to_s
16
+ else
17
+ puts("Arabic Morphological Analyzer for Ruby")
18
+ puts("Ported to Ruby by Moustafa Emara and Hany Salah El din , eSpace-technologies.(www.espace.com.eg) , 2008.")
19
+ puts("Based on :")
20
+ puts("BUCKWALTER ARABIC MORPHOLOGICAL ANALYZER")
21
+ puts("This program is developed under the MIT-Licences")
22
+ puts("Usage :")
23
+ puts("")
24
+ puts("raraMorph inFile [inEncoding] [outFile] [-v] [-a]")
25
+ puts("")
26
+ puts("inFile : file to be analyzed")
27
+ puts("inEncoding : encoding for inFile, default UTF-8")
28
+ puts("outFile : result file ")
29
+ puts("-v : verbose mode")
30
+ puts("-a : Aarbic Output" )
31
+ end
32
+
33
+
34
+