SinaTools 0.1.35__py2.py3-none-any.whl → 0.1.36__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,662 +0,0 @@
1
-
2
- # The Imply algorithm takes two words as input and produces the matching tuple defined by (Words Matching).
3
- # The matching between two words is defined as a tuple:
4
- # <w1, w2, implication direction, distance, conflicts, verdict, preferredWord> .
5
-
6
- from sinatools.utils.parser import arStrip
7
- class Implication:
8
- """
9
- The implication class computes whether the two Arabic words are the same or not, regardless of how they are diacritized. The output also contains implication direction, distance, number of conflicts, and other outputs.
10
- Argd:
11
- :obj:`str' word1: input text
12
- :obj:`str' word2: input text
13
-
14
- """
15
- # Diacritic Pair Distance Map
16
- distanceTable = [
17
- [0, 0, 1, 1, 1, 1, 1, 1, 15, 16, 16, 16, 0, 0, 0, 0 ],
18
- [0, 0, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 0, 0, 0, 0],
19
- [1, 101, 0, 101, 101, 101, 101, 101, 101, 101, 101, 101, 0, 0, 0, 0],
20
- [1, 101, 101, 0, 101, 101, 101, 101, 101, 101, 101, 101, 0, 0, 0, 0],
21
- [1, 101, 101, 101, 0, 101, 101, 101, 101, 101, 101, 101, 0, 0, 0, 0],
22
- [1, 101, 101, 101, 101, 0, 101, 101, 101, 101, 101, 101, 0, 0, 0, 0],
23
- [1, 101, 101, 101, 101, 101, 0, 101, 101, 101, 101, 101, 0, 0, 0, 0],
24
- [1, 101, 101, 101, 101, 101, 101, 0, 101, 101, 101, 101, 0, 0, 0, 0],
25
- [15, 101, 101, 101, 101, 101, 101, 101, 0, 1, 1, 1, 0, 0, 0, 0],
26
- [16, 101, 101, 101, 101, 101, 101, 101, 1, 0, 101, 101, 0, 0, 0, 0],
27
- [16, 101, 101, 101, 101, 101, 101, 101, 1, 101, 0, 101, 0, 0, 0, 0],
28
- [16, 101, 101, 101, 101, 101, 101, 101, 1, 101, 101, 0, 0, 0, 0, 0],
29
- [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 4, 4],
30
- [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 0, 100, 100],
31
- [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 100, 0, 100],
32
- [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 100, 100, 0]
33
- ]
34
-
35
- # Implication direction Map
36
- directionTable =[
37
- [3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0],
38
- [2, 3, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, 0, 0, 0],
39
- [2, -1, 3, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, 0, 0, 0],
40
- [2, -1, -1, 3, -1, -1, -1, -1, -1, -1, -1, -1, 0, 0, 0, 0],
41
- [2, -1, -1, -1, 3, -1, -1, -1, -1, -1, -1, -1, 0, 0, 0, 0],
42
- [2, -1, -1, -1, -1, 3, -1, -1, -1, -1, -1, -1, 0, 0, 0, 0],
43
- [2, -1, -1, -1, -1, -1, 3, -1, -1, -1, -1, -1, 0, 0, 0, 0],
44
- [2, -1, -1, -1, -1, -1, -1, 3, -1, -1, -1, -1, 0, 0, 0, 0],
45
- [2, -1, -1, -1, -1, -1, -1, -1, 3, 1, 1, 1, 0, 0, 0, 0],
46
- [2, -1, -1, -1, -1, -1, -1, -1, 2, 3, -1, -1, 0, 0, 0, 0],
47
- [2, -1, -1, -1, -1, -1, -1, -1, 2, -1, 3, -1, 0, 0, 0, 0],
48
- [2, -1, -1, -1, -1, -1, -1, -1, 2, -1, -1, 3, 0, 0, 0, 0],
49
- [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 1, 1, 1],
50
- [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 3, -1, -1],
51
- [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, -1, 3, -1],
52
- [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, -1, -1, 3]
53
- ]
54
-
55
- word1 , word2 = "" , "" # two words to be compared
56
- conflictFlags = [False for i in range(5)]
57
- verdict = "null" # verdict: takes one of the values: “compatible”, or “incompatible”
58
- word1Undiac = "" # word1 without diacritics
59
- word2Undiac = "" # word2 without diacritics
60
- word1Diacritics = [] # Diacritics array of the first word
61
- word2Diacritics = [] # Diacritics array of the second word
62
- direction = -2147483648 # direction: is a number denoting the relationship between the two words, the defult value is given a low integer, arbitrarry value
63
- distance = -2147483648 # distance: denotes the overall similarity of the diacritization between the two words, which we compute based on the distance map; the defult value is given a low integer, arbitrarry value
64
- conflicts = -2147483648 # conflict: denotes the number of conflicting diacritics between the two words, the defult value is given a low integer, arbitrarry value
65
- lettersDirection = [] # implication direction between diacritics
66
-
67
- def __init__(self , inputWord1 , inputWord2):
68
-
69
- #check if inputWord1 or inputWord2 is empty, then return the values below
70
- if ( (not inputWord1) and (inputWord2) ) or ( ( inputWord1) and (not inputWord2) ):
71
- self.verdict = "Incompatible"
72
- self.direction = -3 # the two words have different letters
73
- self.distance = 3000
74
- self.conflicts = 0
75
- return
76
-
77
- self.conflictFlags = [False for i in range(5)] # reset conflictFlags array to Fales
78
- self.word1 = Implication.normalize_alef(inputWord1) # unify alif
79
- self.word2 = Implication.normalize_alef(inputWord2) # unify alif
80
-
81
- if ( self.word1 == self.word2): # If w1 == w2 returns the values bellow
82
- self.verdict = "Compatible"
83
- self.direction = 3 # Both letters have exactly the same diacritics
84
- self.distance = 0
85
- self.conflicts = 0
86
- return
87
- else: # If w1 and w2 are noot exact match
88
- try:
89
- self.lettersDirection = []
90
- # build diacritics array for each word
91
- self.word1Diacritics = Implication.get_diacritics_array(self.word1)
92
- self.word2Diacritics = Implication.get_diacritics_array(self.word2)
93
-
94
- # defined lettersDirection array with size of word1Diacritics and fill it by zeros
95
- for x in range(0 , len(self.word1Diacritics) + 1):
96
- self.lettersDirection.append(0)
97
- except :
98
- # In case of errors returns the values below
99
- self.verdict = "Incompatible"
100
- self.direction = -3 # the two words have different letters
101
- self.distance = 3000
102
- self.conflicts = 0
103
- return
104
-
105
- # check if diacritics in both words for some of syntax errors then return Incompatible
106
- if ( Implication.diacritics_syntax_error_in(self.word1Diacritics) == False and Implication.diacritics_syntax_error_in(self.word2Diacritics) == False) :
107
- # If no syntax error found:
108
- self.word1Undiac = arStrip(self.word1, diacs=False, shaddah=False)
109
- self.word2Undiac = arStrip(self.word2, diacs=False, shaddah=False)
110
- # return compatible if each word is one and same letter regardless of diacritics on this letter
111
- if (len(self.word1Undiac) == 1 and len(self.word2Undiac) == 1 and self.word1Undiac == self.word2Undiac):
112
- self.verdict = "Compatible"
113
- self.direction = 3 # Both letters have exactly the same diacritics
114
- self.distance = 0
115
- self.conflicts = 0
116
- else : # If words are more than letter or deffirent letter then calculate the impication
117
- self.lettersDirection[0] = 3
118
- self.calculate_words_implication()
119
-
120
- else : # If found syntax error in diacitics in word1 or word2 then return these:
121
- self.verdict = "Incompatible"
122
- self.direction = -3 # the two words have different letters
123
- self.distance = 3000
124
- self.conflicts = 0
125
-
126
- def get_non_preferred_word(self, word1, word2):
127
- """
128
- This method returns the non-preferred word from two given words.
129
-
130
- Args:
131
- :obj:`str' word1: The first word.
132
- :obj:`str' word2: The second word.
133
-
134
- Returns:
135
- :obj:`str': The non-preferred word.
136
-
137
- Raises:
138
- None
139
- """
140
- # this function talkes 2-words and retuen preferredWord
141
- word1 = word1.strip()
142
- word2 = word2.strip()
143
- if (word1 != None and word1 ) :
144
- if (word2 != None and word2) :
145
- preferredWord = ""
146
- preferredWord = Implication.getPreferredWord(word1, word2)
147
- if word1== preferredWord:
148
- return word2
149
- else:
150
- return word1
151
- else :
152
- return word1
153
-
154
- else :
155
- if word2 != None and word2:
156
- return word2
157
- else:
158
- return None
159
-
160
-
161
-
162
- def get_preferred_word( self , word1, word2) :
163
- """
164
- This method returns the preferred word from two given words.
165
-
166
- Args:
167
- :obj:`str' word1: The first word.
168
- :obj:`str' word2: The second word.
169
-
170
- Returns:
171
- :obj:`str': The preferred word.
172
-
173
- Raises:
174
- None
175
- """
176
- word1 = word1.strip()
177
- word2 = word2.strip()
178
- if ( word1 != None and word1) :
179
- if (word2 != None and word2) :
180
- implication = Implication(word1, word2)
181
- direction = implication.get_distance()
182
- if (direction < 15) :
183
- if ( ( direction == 0 ) or
184
- (direction == 2 ) ):
185
- return word1
186
- elif direction == 1 :
187
- return word2
188
- elif direction == 3 :
189
- if ( ( not word1.endswith("َ") ) and ( not word1.endswith("ُ") ) ) :
190
- return word2
191
- return word1
192
-
193
- return ""
194
- else :
195
- return word1
196
-
197
- else :
198
- if word2 != None and (not word2):
199
- return word2
200
- else:
201
- return None
202
-
203
- def normalize_alef(word):
204
- """
205
- This method normalizes the alif (ألف) character in the given word.
206
-
207
- Args:
208
- word (:obj:`str`): The input word to be normalized.
209
-
210
- Returns:
211
- :obj:`str`: The normalized word with alif characters modified.
212
-
213
- **Example:**
214
-
215
- .. highlight:: python
216
- .. code-block:: python
217
-
218
- from sinatools.utils.implication import Implication
219
-
220
- word = Implication.normalize_alef("ًى") # Returns "ىً"
221
- word = Implication.normalize_alef("ًا") # Returns "اً"
222
- word = Implication.normalize_alef("ٱلكتاب") # Returns "الكتاب"
223
- """
224
- # If the tanween is before the alif, then it is placed after it,
225
- # because in the Arabic language this word is similar
226
- if word.endswith("ًى"):
227
- word = word[:len(word) - 2] + "ىً"
228
-
229
- if word.endswith("ًا"):
230
- word = word[:len(word) - 2] + "اً"
231
- # Replace Alif-dhamma with Alif
232
- if word.startswith("ٱ"):
233
- word = "ا" + word[1:]
234
- return word
235
-
236
-
237
- def diacritics_syntax_error_in( diacriticsArray ) :
238
- """
239
- This method checks if the diacritics in a given array are incorrect.
240
-
241
- Args:
242
- diacritics_array (:obj:`list`): A list of diacritics to be checked.
243
-
244
- Returns:
245
- :obj:`bool`: True if there is a syntax error in the diacritics, False otherwise.
246
-
247
- **Example:**
248
-
249
- .. highlight:: python
250
- .. code-block:: python
251
-
252
- from sinatools.utils.implication import Implication
253
-
254
- diacritics = ["َ", "ُ", "ِ", "ّ"]
255
- has_error = Implication.diacritics_syntax_error_in(diacritics) # Returns False
256
-
257
- diacritics = ["َ", "ُ", "ِ", "ٓ"]
258
- has_error = Implication.diacritics_syntax_error_in(diacritics) # Returns True
259
- """
260
- # This funcion return True when the diacritics is incorreclty
261
- try:
262
- # check last letter diacritic
263
- if ( Implication.wrong_end_diacritic(diacriticsArray[ len(diacriticsArray) - 1]) ) :
264
- return True
265
- else :
266
- # check All letters diacritic except the last letter diacritic
267
- for i in range(0 , len(diacriticsArray) - 1 ) :
268
- if (Implication.wrong_middle_iacritic(diacriticsArray[i])) :
269
- return True
270
- return False
271
-
272
- except :
273
- return False
274
-
275
-
276
- def wrong_end_diacritic(diac):
277
- """
278
- This method checks if the given diacritic is a wrong end diacritic.
279
-
280
- Args:
281
- diac (:obj:`int`): The diacritic value to be checked.
282
-
283
- Returns:
284
- :obj:`bool`: True if the diacritic is one of the follwoing number (85:SHADDAH WITH FATHATAN, 86:SHADDAH WITH KASRTA, 87:SHADDAH WITH DHAMTAN), False if diacritic is greator than or equal0 and diacritic is less than or equal 11.
285
-
286
- **Example:**
287
-
288
- .. highlight:: python
289
- .. code-block:: python
290
-
291
- from sinatools.utils.implication import Implication
292
-
293
- diacritic = 0
294
- is_wrong_end = Implication.wrong_end_diacritic(diacritic) # Returns False
295
-
296
- diacritic = 85
297
- is_wrong_end = Implication.wrong_end_diacritic(diacritic) # Returns True
298
- """
299
- # 0 > No Diacritics , 1 > SUKUN, 2 > FATHA, 3 > KASRA, 4 > DAMMA, 5 > FATHATAN, 6 > KASRATAN,
300
- # 7 > DAMMATAN, 8 > SHADDA, 9 > SHADDA with FATHA, 10 > SHADDA with KASRA, 11 > SHADDA with DAMMA
301
- if (diac >= 0 and diac <= 11) :
302
- return False
303
- else :
304
- # 85 - 86 - 87: SHADDAH WITH FATHATAN,SHADDAH WITH KASRTA, SHADDAH WITH DHAMTAN
305
- return diac < 85 or diac > 87
306
-
307
- def wrong_middle_iacritic( diac) :
308
-
309
- if (diac >= 0 and diac <= 4) :
310
- return False
311
- else :
312
- return diac < 8 or diac > 15
313
-
314
-
315
- def calculate_words_implication(self):
316
- """
317
- This method calculates the implication between two words, and updates the verdict, direction, distance, and conflicts attributes of the object based on the implication between the words.
318
-
319
- Returns:
320
- None
321
-
322
- **Example:**
323
-
324
- .. highlight:: python
325
- .. code-block:: python
326
-
327
- from sinatools.utils.implication import Implication
328
-
329
- implication = Implication(word1, word2)
330
- implication.calculate_words_implication()
331
- # Access the updated attributes
332
- verdict = implication.verdict
333
- direction = implication.direction
334
- distance = implication.distance
335
- conflicts = implication.conflicts
336
- """
337
-
338
- self.verdict = "Incompatible"
339
- self.direction = -2
340
- self.distance = 1000
341
- if (Implication.equal_words(self) == False): # If both words are not thge same return these values
342
- if ((len(self.word1Undiac) == 0 and len(self.word2Undiac) == 0)):
343
- if (self.word1 == self.word2):
344
- self.conflicts = 0
345
- self.distance = 0
346
- self.direction = 3
347
- else:
348
- self.conflicts = 1
349
- self.distance = 1000
350
- self.direction = -2
351
-
352
- else:
353
- self.conflicts = max(len(self.word1Undiac), len(self.word2Undiac))
354
-
355
- else:
356
- if (Implication.calculate_letters_implication(self)):
357
- self.direction = Implication.calculate_direction(self)
358
- if (self.direction == -1) :
359
- self.distance = 101
360
- else:
361
- self.verdict = "Compatible"
362
-
363
- else:
364
- self.direction = -3 # the two words have different letters
365
- self.distance = 3000
366
- self.conflicts = 0
367
-
368
- def equal_words( self ) :
369
- """
370
- This method updates the word1Undiac and word2Undiac attributes by removing the first letter, and returns True if the words are equal, False otherwise.
371
-
372
- Returns:
373
- :obj:`bool`: True if the words are equal, False otherwise.
374
-
375
- **Example:**
376
-
377
- .. highlight:: python
378
- .. code-block:: python
379
-
380
- from sinatools.utils.implication Implication
381
-
382
- implication = Implication(word1, word2)
383
- result = implication.equal_words()
384
- if result:
385
- print("The words are equal")
386
- else:
387
- print("The words are not equal")
388
- """
389
- # check if the tow words are the same taking into account the alif as the first letter
390
- word1FirstLetter = self.word1Undiac[0 : 1] # First letter in word1
391
- word2FirstLetter = self.word2Undiac[0 : 1] # First letter in word2
392
- self.word1Undiac = self.word1Undiac[1 : ] # all word1 letters without diacritics except first letter
393
- self.word2Undiac = self.word2Undiac[1 : ] # all word2 letters without diacritics except first letter
394
-
395
- # If both words withot first letter are not equal return false, otherwise continue
396
- if ( self.word1Undiac != self.word2Undiac):
397
- return False
398
-
399
- # If the first letter in both words the same and (the other letters are the same) then return true, otherwise continue
400
- if word1FirstLetter == word2FirstLetter :
401
- return True
402
-
403
- # check if first letter is any alif (the other letters are the same) then return below values
404
- if (word1FirstLetter != "ا" or word2FirstLetter != "آ" and word2FirstLetter != "أ" and word2FirstLetter != "إ") :
405
- if ((word1FirstLetter == "آ" or word1FirstLetter == "أ" or word1FirstLetter == "إ") and word2FirstLetter == "ا") :
406
- self.lettersDirection[0] = 2 # w2 implies w1
407
- self.conflictFlags[3] = True
408
- return True
409
- else:
410
- return False
411
- else:
412
- self.lettersDirection[0] = 1 # w1 implies w2
413
- self.conflictFlags[2] = True
414
- return True
415
-
416
- return False
417
-
418
-
419
- def calculate_letters_implication(self) :
420
- """
421
- This method updates the lettersDirection, conflictFlags, and distance attributes based on the directionTable and distanceTable values for each pair of diacritics. It returns True after the calculation is completed.
422
-
423
- Returns:
424
- :obj:`bool`: True indicating the calculation is completed.
425
-
426
- **Example:**
427
-
428
- .. highlight:: python
429
- .. code-block:: python
430
-
431
- from sinatools.utils.implication import Implication
432
-
433
- implication = Implication(word1, word2)
434
- result = implication.calculate_letters_implication()
435
- if result:
436
- print("Letters implication calculation completed")
437
- """
438
- self.distance = 0
439
- word1Diac = 0
440
- word2Diac = 0
441
-
442
- for i in range ( 0 , len(self.word1Diacritics) - 1) :
443
- word1Diac = self.word1Diacritics[i];
444
- word2Diac = self.word2Diacritics[i];
445
-
446
- self.lettersDirection[i + 1] = self.directionTable[word1Diac][word2Diac];
447
- self.conflictFlags[self.lettersDirection[i + 1] + 1] = True;
448
- self.distance = self.distance + self.distanceTable[word1Diac][word2Diac];
449
-
450
-
451
- word1Diac = int( self.word1Diacritics[len(self.word1Diacritics) - 1] ) # last letter diacritics to word1
452
- word2Diac = int( self.word2Diacritics[len(self.word1Diacritics) - 1] ) # last letter diacritics to word2
453
- # 8: expresses the presence of shaddah
454
- if (word1Diac == 8 or word2Diac == 8) :
455
- self.lettersDirection[len(self.lettersDirection) - 1] = self.directionTable[word1Diac][word2Diac]
456
- self.conflictFlags[self.lettersDirection[len(self.lettersDirection) - 1] + 1] = True
457
- self.distance = self.distance + self.distanceTable[word1Diac][word2Diac]
458
- return True
459
-
460
-
461
- def calculate_direction(self ):
462
- """
463
- This method calculates the direction of compatibility based on a conflict flags.
464
-
465
- Returns:
466
- :obj:`int`: The direction of compatibility:
467
- -1: Incompatible-diacritics
468
- 0: Compatible-imply each other
469
- 1: Compatible-w1 implies w2
470
- 2: Compatible-w2 implies w1
471
- 3: Compatible-exactly equal
472
- -2147483648: Default value for an invalid direction
473
- """
474
- self.conflicts = 0
475
- if (self.conflictFlags[0] == True):
476
- return -1 # Incompatible-diacritics
477
-
478
- if (self.conflictFlags[2] == True and self.conflictFlags[3] == True ):
479
- return 0 # Compatible-imply each other
480
-
481
- if (self.conflictFlags[2] == True and self.conflictFlags[3] == False ):
482
- return 1 # Compatible-w1 implies w2
483
-
484
- if (self.conflictFlags[2] == False and self.conflictFlags[3] == True ):
485
- return 2 # Compatible-w2 implies w1
486
-
487
- if (self.conflictFlags[4]):
488
- return 3 # Compatible-exactly equal
489
- return -2147483648
490
-
491
-
492
-
493
- def get_diacritics_array(word):
494
- """
495
- This method converts diacritics in a word to digits and returns the array of diacritics.
496
-
497
- Args:
498
- word (:obj:`str`): The word with diacritics.
499
-
500
- Returns:
501
- :obj:`list`: The array of diacritics converted to digits.
502
-
503
- Raises:
504
- Exception: If the first character of the word is a digit.
505
-
506
- **Example:**
507
-
508
- .. highlight:: python
509
- .. code-block:: python
510
-
511
- from sinatools.utils.implication import Implication
512
- word = "مُرَحَّبًا"
513
- diacritics = Implication.calculate_direction(word)
514
- print(diacritics)
515
- Output: [4, 3, 8, 5, 0]
516
- """
517
- # Replace diacritics by digits
518
- word = word.replace(" ", "") #Space
519
- word = word.replace("ْ", "1") #SUKUN
520
- word = word.replace("َ", "2") #FATHA
521
- word = word.replace("ِ", "3") #KASRA
522
- word = word.replace("ُ", "4") #DAMMA
523
- word = word.replace("ً", "5") #FATHATAN
524
- word = word.replace("ٍ", "6") #KASRATAN
525
- word = word.replace("ٌ", "7") #DAMMATAN
526
- word = word.replace("ّ", "8") #SHADDA
527
- word = word.replace("11", "100") #SUKUN with SUKUN
528
- word = word.replace("12", "100") #SUKUN with FATHA
529
- word = word.replace("13", "100") #SUKUN with KASRA
530
- word = word.replace("14", "100") #SUKUN with DAMMA
531
- word = word.replace("15", "100") #SUKUN with FATHATAN
532
- word = word.replace("82", "9") #SHADDA with FATHA
533
- word = word.replace("83", "10") #SHADDA with KASRA
534
- word = word.replace("84", "11") #SHADDA with DAMMA
535
- # Standardization Alif
536
- word = word[0 : 1].replace("ا", "ا12,") + word[1: ]
537
- word = word[0 : 1].replace("أ", "ا13,") + word[1: ]
538
- word = word[0 : 1].replace("إ", "ا14,") + word[1: ]
539
- word = word[0 : 1].replace("آ", "ا15,") + word[1: ]
540
- if word[0:1].isdigit(): # Because a word should not begin with a diacritics
541
- raise Exception("Sorry, First char is digit")
542
- else:
543
- # word = re.sub(r'[\u0600-\u06FF]' , ",",word) # replace all chars with ,
544
- for x in word:
545
- if ( ( x.isalpha() or not x.isdigit() ) and x != ',' ): # If char is not digit then replace it by ,
546
- word = word.replace(x , ",")
547
- # word = word.replace("\\D", ",")
548
- word = word[0 : len(word) - 1] + word[ len(word ) - 1].replace(",", ",,") # last letter does not have diacritic problem
549
-
550
- while ( ",," in word ):
551
- word = word.replace(",,", ",0,") # No-DIACRITIC
552
-
553
- word = word[1 : len(word) ] # Ignore the first letter diacritic
554
- diacritics = []
555
- diacritics = word.split(",") # diacritics is array of diacritics
556
- if '' in diacritics: # Remove empty index if exist
557
- diacritics.remove('')
558
- var3 = diacritics[len(diacritics) - 1] # last letter diacritic
559
-
560
-
561
- # SHADDA with FATHA,SHADDA with KASRA,SHADDA with DAMMA,SHADDAH WITH FATHATAN,SHADDAH WITH KASRTA, SHADDAH WITH DHAMTAN
562
- if var3 == "8" or var3 == "9" or var3 == "10" or var3 == "11" or var3 == "85" or var3 == "86" or var3 == "87":
563
- diacritics[len(diacritics )- 1] = "8"
564
- # SUKUN , FATHA , KASRA , DAMMA , FATHATAN , KASRATAN , DAMMATAN
565
- elif var3 == "1" or var3 == "2" or var3 == "3" or var3 == "4" or var3 == "5" or var3 == "6" or var3 == "7":
566
- diacritics[len(diacritics )- 1] = "0"
567
-
568
- strDiacritics = []
569
- strDiacritics = diacritics
570
-
571
- # Convert string array digits to integer digits array
572
- for x in range(0 , len(strDiacritics) ):
573
- diacritics[x] = int(strDiacritics[x])
574
- return diacritics
575
-
576
- # def removeDiacritics( word ): # remove all diacritics from Arabic word
577
- # word = word.replace(" ", "")
578
- # word = word.replace("ْ", "") #SUKUN
579
- # word = word.replace("َ", "") #FATHA
580
- # word = word.replace("ِ", "") #KASRA
581
- # word = word.replace("ُ", "") #DAMMA
582
- # word = word.replace("ً", "") #FATHATAN
583
- # word = word.replace("ٍ", "") #KASRATAN
584
- # word = word.replace("ٌ", "") #DAMMATAN
585
- # word = word.replace("ّ", "") #SHADDA
586
- # return word
587
-
588
- def get_letters_array(word):
589
- """
590
- This method returns the array of letters from a given word.
591
-
592
- Args:
593
- word (:obj:`str`): The word from which to extract the letters.
594
-
595
- Returns:
596
- obj:`list`: The array of letters.
597
-
598
- **Example:**
599
-
600
- .. highlight:: python
601
- .. code-block:: python
602
-
603
- from sinatools.utils.implication import Implication
604
- word = "مرحبا"
605
- letters = get_letters_array(word)
606
- print(letters)
607
- Output: ['م', 'ر', 'ح', 'ب', 'ا']
608
- """
609
- word = arStrip(word, diacs=False, shaddah=False)
610
- return list(word)
611
-
612
- def get_verdict(self ):
613
- return self.verdict
614
-
615
-
616
- def get_direction(self):
617
- return self.direction
618
-
619
-
620
- def get_distance(self) :
621
- return self.distance
622
-
623
-
624
- def get_conflicts(self) :
625
- return self.conflicts
626
-
627
-
628
- def get_word1(self) :
629
- return self.word1
630
-
631
-
632
- def get_word2(self) :
633
- return self.word2
634
-
635
- def get_result(self):
636
- """
637
- This method returns the result of the comparison between two words.
638
-
639
- Returns:
640
- :obj:`str`: The result of the comparison. Can be *Same* or *Different*.
641
-
642
- **Example:**
643
-
644
- .. highlight:: python
645
- .. code-block:: python
646
-
647
- from sinatools.utils.implication import Implication
648
- w1 = "hello"
649
- w2 = "hell"
650
- implication = Implication(w1, w2)
651
- result = implication.get_result()
652
- print(result)
653
- Output: "Same"
654
- """
655
- if Implication.get_direction(self) >= 0 and Implication.get_distance(self) < 15:
656
- self.result = "Same"
657
- else:
658
- self.result = "Different"
659
- return self.result
660
-
661
- def toString(self) :
662
- return self.word1 + "\t" + self.word2 + "\t" + str(self.verdict) + "\t" + str(self.direction) + "\t" + str(self.distance) + "\t"+ str(self.conflicts)