raramorph 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,675 @@
1
+ # A class to find the solution of the word
2
+ #
3
+ # Author:: eSpace technologies www.eSpace.com.eg
4
+ # Copyright:: 2008
5
+ #
6
+
7
+
8
+ class Solution
9
+
10
+ attr_reader :prefix, :stem, :suffix, :cnt
11
+
12
+ protected
13
+
14
+ # Constructs a solution for a word. Note that the prefix, stem and suffix combination is <b>recomputed</b>
15
+ #and may not necessarily match with the information provided by the dictionaries.
16
+ # * [debug] Whether or not the dictionnaries inconsistencies should be output
17
+ # * [cnt] Order in sequence ; not very useful actually
18
+ # * [prefix The prefix as provided by the prefixes dictionnary
19
+ # * [stem] The stem as provided by the stems dictionnary
20
+ # * [suffix] The suffix as provided by the suffixes dictionnary
21
+ #
22
+ def initialize(debug, cnt, prefix, stem, suffix)
23
+ # Whether or not the dictionnaries inconsistencies should be output
24
+ @debug = debug;
25
+ # The order in solutions' sequence.
26
+ @cnt = cnt;
27
+ # The dictionary entry of the prefix.
28
+ @prefix = prefix;
29
+ # The dictionary entry of the stem.
30
+ @stem = stem;
31
+ # The dictionary entry of the suffix.
32
+ @suffix = suffix;
33
+ # The prefixes POS.
34
+ @prefixesPOS = prefix.pos
35
+ #The stems POS.
36
+ @stemsPOS = stem.pos
37
+ #The suffixes POS.
38
+ @suffixesPOS = suffix.pos
39
+ #The prefixes glosses.
40
+ @prefixesGlosses = prefix.glosses
41
+ #The stems glosses
42
+ @stemsGlosses = stem.glosses
43
+ #The suffixes glosses.
44
+ @suffixesGlosses = suffix.glosses
45
+
46
+ if (@stemsPOS.length != @stemsGlosses.length)
47
+ if (@debug)
48
+ puts "\"" + get_lemma() + "\" : stem's sizes for POS (" + @stemsPOS.length.to_s + ") and GLOSS ("+ @stemsGlosses.length.to_s + ") do not match"
49
+ end
50
+ end
51
+
52
+ #Normalize stems since some of them can contain prefixes
53
+
54
+ while(@stemsPOS.length>0)
55
+ stemPOS = @stemsPOS.slice(0)
56
+ if(stemPOS)
57
+ stemPOS.force_encoding "UTF-8"
58
+ end
59
+ if (@stemsGlosses.length>0)
60
+ stemGloss = @stemsGlosses.slice(0)
61
+ else
62
+ stemGloss = nil
63
+ end
64
+ if(stemGloss)
65
+ stemGloss.force_encoding "UTF-8"
66
+ end
67
+ if (stemPOS.end_with?("CONJ") or
68
+ stemPOS.end_with?("EMPHATIC_PARTICLE") or
69
+ stemPOS.end_with?("FUNC_WORD") or
70
+ stemPOS.end_with?("FUT_PART") or
71
+ stemPOS.end_with?("INTERJ") or
72
+ stemPOS.end_with?("INTERROG_PART") or
73
+ stemPOS.end_with?("IV1S") or
74
+ stemPOS.end_with?("IV2MS") or
75
+ stemPOS.end_with?("IV2FS") or
76
+ stemPOS.end_with?("IV3MS") or
77
+ stemPOS.end_with?("IV3FS") or
78
+ stemPOS.end_with?("IV2D") or
79
+ stemPOS.end_with?("IV2FD") or
80
+ stemPOS.end_with?("IV3MD") or
81
+ stemPOS.end_with?("IV3FD") or
82
+ stemPOS.end_with?("IV1P") or
83
+ stemPOS.end_with?("IV2MP") or
84
+ stemPOS.end_with?("IV2FP") or
85
+ stemPOS.end_with?("IV3MP") or
86
+ stemPOS.end_with?("IV3FP") or
87
+ stemPOS.end_with?("NEG_PART") or
88
+ stemPOS.end_with?("PREP") or
89
+ stemPOS.end_with?("RESULT_CLAUSE_PARTICLE") )
90
+ @stemsPOS.slice!(0)
91
+ @prefixesPOS.push(stemPOS)
92
+ if (stemGloss)
93
+ @stemsGlosses.slice!(0)
94
+ @prefixesGlosses.push(stemGloss)
95
+ end
96
+ else
97
+ break
98
+ end
99
+ end
100
+
101
+ #Normalize stems since some of them can contain suffixes
102
+ while(@stemsPOS.length>0)
103
+ stemPOS = @stemsPOS.slice(@stemsPOS.length-1)
104
+ if(stemPOS)
105
+ stemPOS.force_encoding "UTF-8"
106
+ end
107
+ if (@stemsGlosses.length>0)
108
+ stemGloss = @stemsGlosses.slice(@stemsGlosses.length-1)
109
+ else
110
+ stemGloss = nil
111
+ end
112
+ if(stemGloss)
113
+ stemGloss.force_encoding "UTF-8"
114
+ end
115
+
116
+ if (stemPOS.end_with?("CASE_INDEF_NOM") or
117
+ stemPOS.end_with?("CASE_INDEF_ACC") or
118
+ stemPOS.end_with?("CASE_INDEF_ACCGEN") or
119
+ stemPOS.end_with?("CASE_INDEF_GEN") or
120
+ stemPOS.end_with?("CASE_DEF_NOM") or
121
+ stemPOS.end_with?("CASE_DEF_ACC") or
122
+ stemPOS.end_with?("CASE_DEF_ACCGEN") or
123
+ stemPOS.end_with?("CASE_DEF_GEN") or
124
+ stemPOS.end_with?("NSUFF_MASC_SG_ACC_INDEF") or
125
+ stemPOS.end_with?("NSUFF_FEM_SG") or
126
+ stemPOS.end_with?("NSUFF_MASC_DU_NOM") or
127
+ stemPOS.end_with?("NSUFF_MASC_DU_NOM_POSS") or
128
+ stemPOS.end_with?("NSUFF_MASC_DU_ACCGEN") or
129
+ stemPOS.end_with?("NSUFF_MASC_DU_ACCGEN_POSS") or
130
+ stemPOS.end_with?("NSUFF_FEM_DU_NOM") or
131
+ stemPOS.end_with?("NSUFF_FEM_DU_NOM_POSS") or
132
+ stemPOS.end_with?("NSUFF_FEM_DU_ACCGEN") or
133
+ stemPOS.end_with?("NSUFF_FEM_DU_ACCGEN_POSS") or
134
+ stemPOS.end_with?("NSUFF_MASC_PL_NOM") or
135
+ stemPOS.end_with?("NSUFF_MASC_PL_NOM_POSS") or
136
+ stemPOS.end_with?("NSUFF_MASC_PL_ACCGEN") or
137
+ stemPOS.end_with?("NSUFF_MASC_PL_ACCGEN_POSS") or
138
+ stemPOS.end_with?("NSUFF_FEM_PL") or
139
+ stemPOS.end_with?("POSS_PRON_1S") or
140
+ stemPOS.end_with?("POSS_PRON_2MS") or
141
+ stemPOS.end_with?("POSS_PRON_2FS") or
142
+ stemPOS.end_with?("POSS_PRON_3MS") or
143
+ stemPOS.end_with?("POSS_PRON_3FS") or
144
+ stemPOS.end_with?("POSS_PRON_2D") or
145
+ stemPOS.end_with?("POSS_PRON_3D") or
146
+ stemPOS.end_with?("POSS_PRON_1P") or
147
+ stemPOS.end_with?("POSS_PRON_2MP") or
148
+ stemPOS.end_with?("POSS_PRON_2FP") or
149
+ stemPOS.end_with?("POSS_PRON_3MP") or
150
+ stemPOS.end_with?("POSS_PRON_3FP") or
151
+ stemPOS.end_with?("IVSUFF_DO:1S") or
152
+ stemPOS.end_with?("IVSUFF_DO:2MS") or
153
+ stemPOS.end_with?("IVSUFF_DO:2FS") or
154
+ stemPOS.end_with?("IVSUFF_DO:3MS") or
155
+ stemPOS.end_with?("IVSUFF_DO:3FS") or
156
+ stemPOS.end_with?("IVSUFF_DO:2D") or
157
+ stemPOS.end_with?("IVSUFF_DO:3D") or
158
+ stemPOS.end_with?("IVSUFF_DO:1P") or
159
+ stemPOS.end_with?("IVSUFF_DO:2MP") or
160
+ stemPOS.end_with?("IVSUFF_DO:2FP") or
161
+ stemPOS.end_with?("IVSUFF_DO:3MP") or
162
+ stemPOS.end_with?("IVSUFF_DO:3FP") or
163
+ stemPOS.end_with?("IVSUFF_MOOD:I") or
164
+ stemPOS.end_with?("IVSUFF_SUBJ:2FS_MOOD:I") or
165
+ stemPOS.end_with?("IVSUFF_SUBJ:D_MOOD:I") or
166
+ stemPOS.end_with?("IVSUFF_SUBJ:3D_MOOD:I") or
167
+ stemPOS.end_with?("IVSUFF_SUBJ:MP_MOOD:I") or
168
+ stemPOS.end_with?("IVSUFF_MOOD:S") or
169
+ stemPOS.end_with?("IVSUFF_SUBJ:2FS_MOOD:SJ") or
170
+ stemPOS.end_with?("IVSUFF_SUBJ:D_MOOD:SJ") or
171
+ stemPOS.end_with?("IVSUFF_SUBJ:MP_MOOD:SJ") or
172
+ stemPOS.end_with?("IVSUFF_SUBJ:3MP_MOOD:SJ") or
173
+ stemPOS.end_with?("IVSUFF_SUBJ:FP") or
174
+ stemPOS.end_with?("PVSUFF_DO:1S") or
175
+ stemPOS.end_with?("PVSUFF_DO:2MS") or
176
+ stemPOS.end_with?("PVSUFF_DO:2FS") or
177
+ stemPOS.end_with?("PVSUFF_DO:3MS") or
178
+ stemPOS.end_with?("PVSUFF_DO:3FS") or
179
+ stemPOS.end_with?("PVSUFF_DO:2D") or
180
+ stemPOS.end_with?("PVSUFF_DO:3D") or
181
+ stemPOS.end_with?("PVSUFF_DO:1P") or
182
+ stemPOS.end_with?("PVSUFF_DO:2MP") or
183
+ stemPOS.end_with?("PVSUFF_DO:2FP") or
184
+ stemPOS.end_with?("PVSUFF_DO:3MP") or
185
+ stemPOS.end_with?("PVSUFF_DO:3FP") or
186
+ stemPOS.end_with?("PVSUFF_SUBJ:1S") or
187
+ stemPOS.end_with?("PVSUFF_SUBJ:2MS") or
188
+ stemPOS.end_with?("PVSUFF_SUBJ:2FS") or
189
+ stemPOS.end_with?("PVSUFF_SUBJ:3MS") or
190
+ stemPOS.end_with?("PVSUFF_SUBJ:3FS") or
191
+ stemPOS.end_with?("PVSUFF_SUBJ:2MD") or
192
+ stemPOS.end_with?("PVSUFF_SUBJ:2FD") or
193
+ stemPOS.end_with?("PVSUFF_SUBJ:3MD") or
194
+ stemPOS.end_with?("PVSUFF_SUBJ:3FD") or
195
+ stemPOS.end_with?("PVSUFF_SUBJ:1P") or
196
+ stemPOS.end_with?("PVSUFF_SUBJ:2MP") or
197
+ stemPOS.end_with?("PVSUFF_SUBJ:2FP") or
198
+ stemPOS.end_with?("PVSUFF_SUBJ:3MP") or
199
+ stemPOS.end_with?("PVSUFF_SUBJ:3FP") or
200
+ stemPOS.end_with?("CVSUFF_DO:1S") or
201
+ stemPOS.end_with?("CVSUFF_DO:3MS") or
202
+ stemPOS.end_with?("CVSUFF_DO:3FS") or
203
+ stemPOS.end_with?("CVSUFF_DO:3D") or
204
+ stemPOS.end_with?("CVSUFF_DO:1P") or
205
+ stemPOS.end_with?("CVSUFF_DO:3MP") or
206
+ stemPOS.end_with?("CVSUFF_DO:3FP") or
207
+ stemPOS.end_with?("CVSUFF_SUBJ:2MS") or
208
+ stemPOS.end_with?("CVSUFF_SUBJ:2FS") or
209
+ stemPOS.end_with?("CVSUFF_SUBJ:2MP") )
210
+ @stemsPOS.slice!(@stemsPOS.length-1)
211
+ @suffixesPOS.insert(0,stemPOS)
212
+ if (stemGloss)
213
+ @stemsGlosses.slice!(@stemsGlosses.length-1)
214
+ @suffixesGlosses.insert(0,stemGloss)
215
+ end
216
+ else
217
+ break
218
+ end
219
+ end
220
+
221
+ #Normalization of bayon, bayona, bayoni
222
+ if (@stemsPOS.length > 1)
223
+ pos0 = @stemsPOS[0]
224
+ pos1 = @stemsPOS[1]
225
+ if(pos1=="bayon" or pos1=="bayona" or pos1=="bayoni")
226
+ if (@debug)
227
+ puts "Merging \""+pos1+"\" into first part of stem \"" + pos0 + "\""
228
+ end
229
+ array = pos0.split("/");
230
+ sb = array[0] + pos1+"/"
231
+ i=1
232
+ while( i < array.length)
233
+ sb+=array[i]
234
+ end
235
+ @stemsPOS.slice!(0)
236
+ @stemsPOS[0] = sb
237
+ end
238
+ end
239
+
240
+ # Sanity check
241
+ if (@stemsPOS.length > 1 and @debug)
242
+ puts"More than one stem for " + @stemsPOS.to_string()
243
+ end
244
+ end
245
+
246
+
247
+ # Returns the lemma id in the stems dictionary.
248
+ # * @return The lemma ID
249
+ #
250
+ def get_lemma
251
+ x = Regexp.compile("(_|-).*$")
252
+ @stem.lemma_id.sub(x,"")
253
+ end
254
+
255
+
256
+ # Returns the vocalizations of the <b>recomputed</b> prefixes in the Buckwalter transliteration system
257
+ # or <b>nil</b> if there are no prefixes for the word.
258
+ # * @return The vocalizations
259
+ #
260
+ def get_prefixes_vocalizations
261
+ vocalizations(false,@prefixesPOS,false)
262
+ end
263
+
264
+ # Returns the vocalizations of the <b>recomputed</b> prefixes in arabic
265
+ # or <b>nil</b> if there are no prefixes for the word.
266
+ # * @return The vocalizations
267
+ #
268
+ def get_prefixes_arabic_vocalizations
269
+ vocalizations(true,@prefixesPOS,false)
270
+ end
271
+
272
+ # Returns the vocalization of the <b>recomputed</b> stem in the Buckwalter transliteration system
273
+ # or <b>nil</b> if there is no stem for the word.
274
+ # * @return The vocalization
275
+ #
276
+ def get_stem_vocalization
277
+ vocalizations(false,@stemsPOS,true)
278
+ end
279
+
280
+ # Returns the vocalization of the <b>recomputed</b> stem in arabic
281
+ # or <b>nil</b> if there is no stem for the word.
282
+ # * @return The vocalization
283
+ #
284
+ def get_stem_arabic_vocalization
285
+ vocalizations(true,@stemsPOS,true)
286
+ end
287
+
288
+ # Returns the vocalizations of the <b>recomputed</b> suffixes in the Buckwalter transliteration system
289
+ # or <b>nil</b> if there are no suffixes for the word.
290
+ # * @return The vocalizations
291
+ #
292
+ def get_suffixes_vocalizations
293
+ vocalizations(false,@suffixesPOS,false)
294
+ end
295
+
296
+ # Returns the vocalizations of the <b>recomputed</b> suffixes in arabic
297
+ # or <b>nil</b> if there are no suffixes for the word.
298
+ # * @return The vocalizations
299
+ #
300
+ def get_suffixes_arabic_vocalizations
301
+ vocalizations(true,@suffixesPOS,false)
302
+ end
303
+
304
+
305
+ # Returns the vocalization of the word in the Buckwalter transliteration system.
306
+ # * @return The vocalization
307
+ #
308
+ def get_word_vocalization
309
+ sb = ""
310
+ sb.force_encoding "UTF-8"
311
+ vocal = get_prefixes_vocalizations()
312
+ if(vocal!=nil)
313
+ sb += vocal[0].to_s
314
+ end
315
+
316
+ s =get_stem_vocalization()
317
+ if ( s != nil)
318
+ sb+=s
319
+ end
320
+ vocal =get_suffixes_vocalizations()
321
+ if(vocal!=nil)
322
+ sb += vocal[0].to_s
323
+ end
324
+
325
+ return sb
326
+ end
327
+
328
+ # Returns the vocalization of the word in arabic.
329
+ # * @return The vocalization
330
+ #
331
+ def get_word_arabic_vocalization
332
+ sb = ""
333
+ sb.force_encoding "UTF-8"
334
+ vocal = get_prefixes_arabic_vocalizations()
335
+ if(vocal!=nil)
336
+ sb += vocal[0].to_s
337
+ end
338
+
339
+ s = get_stem_arabic_vocalization()
340
+ if ( s != nil)
341
+ sb+=s
342
+ end
343
+ vocal = get_suffixes_arabic_vocalizations()
344
+ if(vocal!=nil)
345
+ sb += vocal[0].to_s
346
+ end
347
+
348
+ return sb
349
+ end
350
+
351
+ # Returns the morphology of the prefix.
352
+ # * @return The morphology
353
+ #
354
+ def get_prefix_morphology
355
+ @prefix.morphology
356
+ end
357
+
358
+ # Returns the morphology of the stem.
359
+ # * @return The morphology
360
+ #
361
+ def get_stem_morphology
362
+ @stem.morphology
363
+ end
364
+
365
+ # Returns the morphology of the suffix.
366
+ # * @return The morphology
367
+ #
368
+ def get_suffix_morphology
369
+ @suffix.morphology
370
+ end
371
+
372
+ # Returns the morphology of the word.
373
+ # * @return The morphology
374
+ #
375
+ def get_word_morphology
376
+ sb = ""
377
+ sb.force_encoding "UTF-8"
378
+ if (!@prefix.morphology.empty? and @prefix.morphology != nil )
379
+ sb+= "\t" + "prefix : " + @prefix.morphology + "\n"
380
+ end
381
+ if (!@stem.morphology.empty? and @stem.morphology != nil)
382
+ sb+= "\t" + "stem : " + @stem.morphology + "\n"
383
+ end
384
+ if (!@suffix.morphology.empty? and @suffix.morphology != nil)
385
+ sb+= "\t" + "suffix : " + @suffix.morphology + "\n"
386
+ end
387
+ return sb
388
+ end
389
+
390
+ # Returns the grammatical categories of the <b>recomputed</b> prefixes
391
+ # or <b>nil</b> if there are no prefixes for the word.
392
+ # * @return The grammatical categories
393
+ #
394
+ def get_prefixes_POS
395
+ perform_on_POS(1,@prefixesPOS,1)
396
+ end
397
+
398
+ # Returns The vocalizations using the Buckwalter transliteration system of the <b>recomputed</b> prefixes and their grammatical categories
399
+ # or <b>nil</b> if there are no prefixes for the word.
400
+ # * @return The vocalizations and the grammatical categories
401
+ #
402
+ def get_prefixes_long_POS
403
+ perform_on_POS(2,@prefixesPOS,1)
404
+ end
405
+
406
+ # Returns The vocalizations in arabic of the <b>recomputed</b> prefixes and their grammatical categories
407
+ # or <b>nil</b> if there is no stem for the word.
408
+ # * @return The vocalizations and the grammatical categories.
409
+ #
410
+ def get_prefixes_arabic_long_POS
411
+ perform_on_POS(3,@prefixesPOS,1)
412
+ end
413
+
414
+ # Returns the grammatical category of the <b>recomputed</b> stem.
415
+ # * @return The grammatical category
416
+ #
417
+ def get_stem_POS
418
+ perform_on_POS(1,@stemsPOS,2)
419
+ end
420
+
421
+ # Returns The vocalization using the Buckwalter transliteration system of the <b>recomputed</b> stem and its grammatical category
422
+ # or <b>nil</b> if there is no stem for the word.
423
+ # * @return The vocalizations and the grammatical categories.
424
+ #
425
+ def get_stem_long_POS
426
+ perform_on_POS(2,@stemsPOS,2)
427
+ end
428
+
429
+ # Returns The vocalization in arabic of the <b>recomputed</b> stem and its grammatical category
430
+ # or <b>nil</b> if there is no stem for the word.
431
+ # * @return The vocalizations and the grammatical categories.
432
+ #
433
+ def get_stem_arabic_long_POS
434
+ perform_on_POS(3,@stemsPOS,2)
435
+ end
436
+
437
+ # Returns The vocalization in arabic of the <b>recomputed</b> stem and its grammatical category
438
+ # or <b>nil</b> if there is no stem for the word.
439
+ # * @return The grammatical categories
440
+ #
441
+ def get_suffixes_POS
442
+ perform_on_POS(1,@suffixesPOS,3)
443
+ end
444
+
445
+ # Returns The vocalizations using the Buckwalter transliteration system of the <b>recomputed</b> stem and its grammatical category
446
+ # or <b>nil</b> if there is no stem for the word.
447
+ # * @return The vocalizations and the grammatical categories.
448
+ #
449
+ def get_suffixes_long_POS
450
+ perform_on_POS(2,@suffixesPOS,3)
451
+ end
452
+
453
+ # Returns The vocalization in arabic of the <b>recomputed</b> stem and its grammatical category
454
+ # or <b>nil</b> if there is no stem for the word.
455
+ # * @return The vocalizations and the grammatical categories.
456
+ #
457
+ def get_suffixes_arabic_long_POS
458
+ perform_on_POS(3,@suffixesPOS,3)
459
+ end
460
+
461
+ # Returns The vocalization of the word in the Buckwalter transliteration system and its grammatical categories.
462
+ # * @return The vocalization and the grammatical categories
463
+ #
464
+ def get_word_long_POS
465
+ word_POS(false)
466
+ end
467
+
468
+ # Returns The vocalization of the word in arabic and its grammatical categories.
469
+ # * @return The vocalization and the grammatical categories
470
+ #
471
+ def get_word_arabic_long_POS
472
+ word_POS(true)
473
+ end
474
+
475
+ # Returns the english glosses of the prefixes.
476
+ # * @return The glosses.
477
+ #
478
+ def get_prefixes_glosses
479
+ if(@prefixesGlosses.empty?)
480
+ return nil
481
+ else
482
+ return @prefixesGlosses
483
+ end
484
+ end
485
+
486
+ # Returns the english gloss of the stem.
487
+ # * @return The gloss.
488
+ #
489
+ def get_stem_gloss
490
+ if (@stemsGlosses.empty?)
491
+ return nil
492
+ end
493
+ if ((@stemsGlosses.length > 1) and @debug)
494
+ puts "More than one gloss for " + @stemsGlosses.to_s
495
+ end
496
+ #return the first anyway :-(
497
+ return @stemsGlosses[0]
498
+
499
+ end
500
+
501
+ # Returns the english glosses of the suffixes.
502
+ # * @return The glosses.
503
+ #
504
+ def get_suffixes_glosses
505
+ if(@suffixesGlosses.empty?)
506
+ return nil
507
+ else
508
+ return @suffixesGlosses
509
+ end
510
+ end
511
+
512
+ # Returns the english glosses of the word.
513
+ # * @return The glosses.
514
+ #
515
+ def get_word_glosses
516
+ sb = ""
517
+ sb.force_encoding "UTF-8"
518
+ glosses = get_prefixes_glosses()
519
+ if (glosses and glosses[0] != nil)
520
+ sb+=("\t" + "prefix : " + glosses[0].gsub(";","/") + "\n")
521
+ end
522
+ if (get_stem_gloss() != nil)
523
+ sb+=("\t" + "stem : " +get_stem_gloss().gsub(";","/") + "\n")
524
+ end
525
+ glosses = get_suffixes_glosses()
526
+ if (glosses and glosses[0] != nil)
527
+ sb+=("\t" + "suffix : " + glosses[0].gsub(";","/") + "\n")
528
+ end
529
+ return sb
530
+ end
531
+
532
+ # Returns a string representation of how the word can be analyzed using the Buckwalter transliteration system for the vocalizations.
533
+ # * @return The representation
534
+ #
535
+ public
536
+ def to_s
537
+ ret = ""
538
+ ret.force_encoding "UTF-8"
539
+ ret = "\n SOLUTION # #{ @cnt.to_s} \n Lemma : #{ get_lemma() } \n
540
+ Vocalized as : \t #{get_word_vocalization()} \n
541
+ Morphology : \n #{ get_word_morphology()}
542
+ Grammatical category : \n
543
+ #{get_word_long_POS()} Glossed as : \n
544
+ #{get_word_glosses()} "
545
+ ret
546
+ end
547
+
548
+ # Returns a string representation of how the word can be analyzed using arabic for the vocalizations..
549
+ # * @return The representation
550
+ #
551
+ def to_arabized_string
552
+ ret = ""
553
+ ret.force_encoding "UTF-8"
554
+ ret = "\n SOLUTION # #{ @cnt.to_s} \n Lemma : #{ get_lemma() } \n
555
+ Vocalized as : \t #{get_word_arabic_vocalization()} \n
556
+ Morphology : \n #{ get_word_morphology()}
557
+ Grammatical category : \n
558
+ #{get_word_arabic_long_POS()} Glossed as : \n
559
+ #{get_word_glosses()} "
560
+ ret
561
+ end
562
+
563
+ private
564
+
565
+ # Returns an array of vocalizations according to type specified in the given parameters
566
+ # * [arabic] Whether or not vocalization is for arabic
567
+ # * [arr] The array utilized, either of prefixes, stems, suffixes
568
+ # * [one] Whether or not we are manipulating single vocalization (only true for stem vocalizations, false for suffixes and prefixes)
569
+ #
570
+ def vocalizations(arabic, arr, one)
571
+ if (arr.empty?)
572
+ return nil
573
+ end
574
+ vocalizations = []
575
+ arr.each do |pos|
576
+ array = pos.split("/")
577
+ if(arabic)
578
+ sb = LatinArabicTranslator.translate(array[0])
579
+ sb.force_encoding "UTF-8"
580
+ vocalizations << sb
581
+ else
582
+ vocalizations << array[0]
583
+ end
584
+ end
585
+ if(one)
586
+ if ( (vocalizations.length > 1) and @debug)
587
+ puts "More than one stem for " + vocalizations.to_s
588
+ end
589
+ return vocalizations[0]
590
+ else
591
+ return vocalizations
592
+ end
593
+ end
594
+
595
+ # Returns an array of vocalizations according to type specified in the given parameters
596
+ # * [type] Specifies the type of the function to perform, (1 for regular, 2 for long, 3 for arabic)
597
+ # * [arr] The array utilized, either of prefixes, stems, suffixes
598
+ # * [pre_stem_suff] Specifying which type of arrays are being handled (1 for prefixes, 2 for stems, 3 for suffixes)
599
+ #
600
+ def perform_on_POS(type, arr, pre_stem_suff)
601
+ if (arr.empty?)
602
+ return nil
603
+ end
604
+ temp_POS = []
605
+ arr.each do |pos|
606
+ array = pos.split("/");
607
+ j=1
608
+ if(type==1)
609
+ sb = ""
610
+ elsif(type==2)
611
+ sb = array[0] + "\t"
612
+ else
613
+ sb = LatinArabicTranslator.translate(array[0]) + "\t"
614
+ sb.force_encoding "UTF-8"
615
+ end
616
+ while( j < array.length)
617
+ if (j > 1)
618
+ sb+=" / "
619
+ end
620
+ sb+=array[j]
621
+ j+=1
622
+ end
623
+ temp_POS.push(sb)
624
+ end
625
+
626
+ if(pre_stem_suff==2)
627
+ if ((temp_POS.length > 1) and @debug)
628
+ puts "More than one stem for " + temp_POS.to_s
629
+ end
630
+ if (type ==1 and temp_POS[0].empty?)
631
+ puts "Empty POS for stem " + get_stem_long_POS()
632
+ end
633
+ #return the first anyway :-(
634
+ return temp_POS[0]
635
+ else
636
+ return temp_POS
637
+ end
638
+ end
639
+
640
+ # Returns the vocalizations and the grammatical categories
641
+ # * [arabic] Boolean to choose, Buckwalter transliteration system or arabic
642
+ #
643
+ def word_POS(arabic)
644
+ sb=""
645
+ if(arabic)
646
+ temp_POS =get_prefixes_arabic_long_POS()
647
+ else
648
+ temp_POS =get_prefixes_long_POS()
649
+ end
650
+ if (temp_POS != nil)
651
+ if (temp_POS[0]!=nil)
652
+ sb+=("\t" + "prefix : " + temp_POS[0] + "\n")
653
+ end
654
+ end
655
+ if(arabic)
656
+ s = get_stem_arabic_long_POS()
657
+ else
658
+ s = get_stem_long_POS()
659
+ end
660
+ if ( s != nil)
661
+ sb+=("\t" + "stem : " + s + "\n")
662
+ end
663
+ if(arabic)
664
+ temp_POS =get_suffixes_arabic_long_POS()
665
+ else
666
+ temp_POS =get_suffixes_long_POS()
667
+ end
668
+ if (temp_POS != nil)
669
+ if (temp_POS[0]!=nil)
670
+ sb+=("\t" + "suffix : " + temp_POS[0] + "\n")
671
+ end
672
+ end
673
+ return sb
674
+ end
675
+ end