sharedkernel 1.6.1__tar.gz → 1.6.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (32) hide show
  1. {sharedkernel-1.6.1 → sharedkernel-1.6.3}/PKG-INFO +5 -1
  2. {sharedkernel-1.6.1 → sharedkernel-1.6.3}/README.md +4 -0
  3. {sharedkernel-1.6.1 → sharedkernel-1.6.3}/setup.py +1 -1
  4. {sharedkernel-1.6.1 → sharedkernel-1.6.3}/sharedkernel.egg-info/PKG-INFO +5 -1
  5. {sharedkernel-1.6.1 → sharedkernel-1.6.3}/sharedkernel.egg-info/SOURCES.txt +0 -1
  6. sharedkernel-1.6.1/sharedkernel/normalizer.py +0 -498
  7. {sharedkernel-1.6.1 → sharedkernel-1.6.3}/setup.cfg +0 -0
  8. {sharedkernel-1.6.1 → sharedkernel-1.6.3}/sharedkernel/common.py +0 -0
  9. {sharedkernel-1.6.1 → sharedkernel-1.6.3}/sharedkernel/database/__init__.py +0 -0
  10. {sharedkernel-1.6.1 → sharedkernel-1.6.3}/sharedkernel/database/mongo_generic_repository.py +0 -0
  11. {sharedkernel-1.6.1 → sharedkernel-1.6.3}/sharedkernel/database/vector_database_repository/__init__.py +0 -0
  12. {sharedkernel-1.6.1 → sharedkernel-1.6.3}/sharedkernel/database/vector_database_repository/chroma_startegy.py +0 -0
  13. {sharedkernel-1.6.1 → sharedkernel-1.6.3}/sharedkernel/database/vector_database_repository/milvus_strategy.py +0 -0
  14. {sharedkernel-1.6.1 → sharedkernel-1.6.3}/sharedkernel/database/vector_database_repository/vector_database_repository.py +0 -0
  15. {sharedkernel-1.6.1 → sharedkernel-1.6.3}/sharedkernel/database/vector_database_repository/vector_database_strategy.py +0 -0
  16. {sharedkernel-1.6.1 → sharedkernel-1.6.3}/sharedkernel/date_converter.py +0 -0
  17. {sharedkernel-1.6.1 → sharedkernel-1.6.3}/sharedkernel/enum/__init__.py +0 -0
  18. {sharedkernel-1.6.1 → sharedkernel-1.6.3}/sharedkernel/enum/error_code.py +0 -0
  19. {sharedkernel-1.6.1 → sharedkernel-1.6.3}/sharedkernel/enum/vector_database_type.py +0 -0
  20. {sharedkernel-1.6.1 → sharedkernel-1.6.3}/sharedkernel/exception/__init__.py +0 -0
  21. {sharedkernel-1.6.1 → sharedkernel-1.6.3}/sharedkernel/exception/exception.py +0 -0
  22. {sharedkernel-1.6.1 → sharedkernel-1.6.3}/sharedkernel/exception/exception_handlers.py +0 -0
  23. {sharedkernel-1.6.1 → sharedkernel-1.6.3}/sharedkernel/jwt_service.py +0 -0
  24. {sharedkernel-1.6.1 → sharedkernel-1.6.3}/sharedkernel/objects/__init__.py +0 -0
  25. {sharedkernel-1.6.1 → sharedkernel-1.6.3}/sharedkernel/objects/base_document.py +0 -0
  26. {sharedkernel-1.6.1 → sharedkernel-1.6.3}/sharedkernel/objects/jwt_model.py +0 -0
  27. {sharedkernel-1.6.1 → sharedkernel-1.6.3}/sharedkernel/objects/result.py +0 -0
  28. {sharedkernel-1.6.1 → sharedkernel-1.6.3}/sharedkernel/regex_masking.py +0 -0
  29. {sharedkernel-1.6.1 → sharedkernel-1.6.3}/sharedkernel/string_extentions.py +0 -0
  30. {sharedkernel-1.6.1 → sharedkernel-1.6.3}/sharedkernel.egg-info/dependency_links.txt +0 -0
  31. {sharedkernel-1.6.1 → sharedkernel-1.6.3}/sharedkernel.egg-info/requires.txt +0 -0
  32. {sharedkernel-1.6.1 → sharedkernel-1.6.3}/sharedkernel.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: sharedkernel
3
- Version: 1.6.1
3
+ Version: 1.6.3
4
4
  Summary: sharekernel is a shared package between all python projects
5
5
  Author: Smilinno
6
6
  Description-Content-Type: text/markdown
@@ -20,6 +20,10 @@ Requires-Dist: persiantools
20
20
  this a shared kernel package
21
21
 
22
22
  # Change Log
23
+ ### Version 1.6.3
24
+ - Fix minor bug in phone normalizer
25
+ ### Version 1.6.2
26
+ - Minor update: normalize function name
23
27
  ### Version 1.6.1
24
28
  - Minor update normalize functions
25
29
  ### Version 1.6
@@ -2,6 +2,10 @@
2
2
  this a shared kernel package
3
3
 
4
4
  # Change Log
5
+ ### Version 1.6.3
6
+ - Fix minor bug in phone normalizer
7
+ ### Version 1.6.2
8
+ - Minor update: normalize function name
5
9
  ### Version 1.6.1
6
10
  - Minor update normalize functions
7
11
  ### Version 1.6
@@ -33,7 +33,7 @@ setup(
33
33
  "persiantools"
34
34
  ],
35
35
  # *strongly* suggested for sharing
36
- version="1.6.1",
36
+ version="1.6.3",
37
37
  description="sharekernel is a shared package between all python projects",
38
38
  long_description=long_description,
39
39
  long_description_content_type="text/markdown",
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: sharedkernel
3
- Version: 1.6.1
3
+ Version: 1.6.3
4
4
  Summary: sharekernel is a shared package between all python projects
5
5
  Author: Smilinno
6
6
  Description-Content-Type: text/markdown
@@ -20,6 +20,10 @@ Requires-Dist: persiantools
20
20
  this a shared kernel package
21
21
 
22
22
  # Change Log
23
+ ### Version 1.6.3
24
+ - Fix minor bug in phone normalizer
25
+ ### Version 1.6.2
26
+ - Minor update: normalize function name
23
27
  ### Version 1.6.1
24
28
  - Minor update normalize functions
25
29
  ### Version 1.6
@@ -3,7 +3,6 @@ setup.py
3
3
  sharedkernel/common.py
4
4
  sharedkernel/date_converter.py
5
5
  sharedkernel/jwt_service.py
6
- sharedkernel/normalizer.py
7
6
  sharedkernel/regex_masking.py
8
7
  sharedkernel/string_extentions.py
9
8
  sharedkernel.egg-info/PKG-INFO
@@ -1,498 +0,0 @@
1
- import re
2
- import itertools
3
-
4
- """
5
- Speech-to-Text Processing Systems for Iranian Dialects:
6
-
7
- Speech-to-text processing systems often struggle to accurately transcribe phone numbers spoken by Iranians.
8
- As a result, phone numbers and sequential digits may not be written accurately.
9
- This is because individuals sometimes read phone numbers in a shortened or rounded manner.
10
- Therefore, it is necessary to design a module that considers all these variations, normalizes the text,
11
- and constructs a correct phone number.
12
-
13
- Input and Output Specifications:
14
-
15
- Input: The input will be in str format.
16
- Output: The output will also be in str format.
17
- If a phone number is identified, it will be processed and replaced with a corrected version.
18
- If no phone number is found, the text will be returned in its original format and structure.
19
- """
20
-
21
-
22
- class DigitMapping:
23
- PERSIAN_DIGITS = "۰۱۲۳۴۵۶۷۸۹"
24
- WESTERN_DIGITS = "0123456789"
25
- PHONE_NUMBER_LENGTH_FULL = 11
26
- PHONE_NUMBER_LENGTH_PARTIAL = 10
27
- SEVEN_DIGIT_PREFIX_LENGTH = 7
28
- MIN_NUMBER_FOR_CHECK = 8
29
- MIN_NUMBER_FOR_GENERATE = 5
30
- MAX_ITERATIONS_CHECK = 4
31
- PICK_FIRST_OR_LAST_LENGTH_NUMBER = 4
32
- MAX_CHAR_CHECK = 30
33
- MIN_CHAR_CHECK = 7
34
- FIND_MIN_REPEAT_NUM = 3
35
- GENERATIVE_VALID_NUMBER_COUNT = 1
36
- number_words = {
37
- # Persian
38
- "صفر": "0",
39
- "یک": "1",
40
- "دو": "2",
41
- "سه": "3",
42
- "چهار": "4",
43
- "پنج": "5",
44
- "شش": "6",
45
- "هفت": "7",
46
- "هشت": "8",
47
- "نه": "9",
48
- }
49
-
50
-
51
-
52
-
53
- # Creating an instance of the DigitMapping class
54
- digit_mapping = DigitMapping()
55
-
56
-
57
- class PhoneNumberProcessor:
58
- # Iranian mobile phone number area code
59
-
60
- def __replace_number_words(self, text):
61
- pattern = r"(\d+)([a-zA-Z]+)"
62
- text = re.sub(pattern, r"\1 \2", text)
63
- for word, digit in digit_mapping.number_words.items():
64
- text = re.sub(r"\b" + word + r"\b", digit, text, flags=re.IGNORECASE)
65
- return text
66
-
67
- def __first_to_in_phone_number(self, number_str):
68
- """
69
- Identify patterns where a number is followed by "تا" and another number.
70
- Generate all combinations for numbers preceding and following "تا"
71
- and return them as a list.
72
- """
73
- generated_numbers = []
74
- ta_pattern = re.compile(r"(\d)(?=\s*تا\s*(\d+))")
75
- matches = ta_pattern.findall(number_str)
76
-
77
- if matches:
78
- num1, num2 = matches[0] # Extract the first match
79
- combinations = self.__generate_prefix_combinations(num1, num2)
80
-
81
- for combo in combinations:
82
- repeated_number = combo[1] * int(combo[0])
83
- pattern = re.escape(f"{combo[0]} تا {combo[1]}")
84
- generated_numbers.append(re.sub(pattern, repeated_number, number_str))
85
-
86
- return generated_numbers
87
- return number_str
88
-
89
- def __process_to_constructions(self, number_str):
90
- """
91
- Process occurrences of 'تا' in the input string to generate possible number combinations.
92
- This function calls `__first_to_in_phone_number` to handle the first 'تا' occurrence and
93
- iteratively processes further occurrences if necessary.
94
- """
95
- # Start processing the first 'تا'
96
- generated_numbers = self.__first_to_in_phone_number(number_str)
97
-
98
- if isinstance(generated_numbers, str):
99
- return generated_numbers
100
-
101
- ta_pattern = re.compile(r"(\d+)(?=\s*تا\s*(\d+))")
102
-
103
- i = 0
104
- while i < len(generated_numbers):
105
- current_number = generated_numbers[i]
106
- if ta_pattern.search(current_number):
107
- new_combinations = self.__first_to_in_phone_number(current_number)
108
- generated_numbers.pop(i)
109
- generated_numbers[i:i] = new_combinations
110
- else:
111
- i += 1
112
-
113
- # Select the appropriate number from the generated list
114
- selected_number = next(
115
- (
116
- num
117
- for num in generated_numbers
118
- if len("".join(num.split())) == digit_mapping.PHONE_NUMBER_LENGTH_FULL
119
- ),
120
- next(
121
- (
122
- num
123
- for num in generated_numbers
124
- if len("".join(num.split()))
125
- == digit_mapping.PHONE_NUMBER_LENGTH_PARTIAL
126
- ),
127
- None,
128
- ),
129
- )
130
-
131
- return "".join(selected_number.split()) if selected_number else number_str
132
-
133
- def __generate_prefix_combinations(self, prefix1: str, prefix2: str):
134
- """
135
- Generate all possible combinations of the prefixes of `prefix1` and `prefix2`
136
- using itertools to avoid explicit loops.
137
- """
138
- len1, len2 = len(prefix1), len(prefix2)
139
- indices_product = itertools.product(range(1, len1 + 1), range(1, len2 + 1))
140
-
141
- # Generate the combinations using the indices
142
- return [(prefix1[:i], prefix2[:j]) for i, j in indices_product]
143
-
144
- def __add_spaces_around_to(self, input_text):
145
- """
146
- Ensure "تا" has spaces around it and remove any redundant spaces.
147
- """
148
- # Use regular expressions to add spaces around "تا"
149
- modified_text = re.sub(r"\s*تا\s*", " تا ", input_text)
150
-
151
- return modified_text
152
-
153
- def __generate_next_numbers(self, num):
154
- str_num = str(num)
155
- next_numbers = set()
156
- for i in range(len(str_num) - 1):
157
- current_digit = int(str_num[i])
158
- next_digit = int(str_num[i + 1])
159
- new_num = (
160
- str_num[:i]
161
- + str(int(current_digit) * str(next_digit))
162
- + str_num[i + 2 :]
163
- )
164
- if len(new_num) <= 7:
165
- next_numbers.add(str(new_num))
166
- return next_numbers
167
-
168
- def __find_seven_chain_numbers(self, start_num):
169
- current_numbers = {start_num}
170
- all_numbers = set(current_numbers)
171
- while len(all_numbers) < digit_mapping.SEVEN_DIGIT_PREFIX_LENGTH:
172
- next_numbers = set()
173
-
174
- # Using a for loop to generate new numbers
175
- for num in current_numbers:
176
- generated_numbers = self.__generate_next_numbers(num)
177
- new_numbers = generated_numbers - all_numbers
178
- next_numbers.update(new_numbers)
179
-
180
- if not next_numbers: # Exit loop if no new numbers are generated
181
- break
182
-
183
- all_numbers.update(next_numbers)
184
- current_numbers = next_numbers
185
-
186
- # Early exit if we find any 7-digit number
187
- if any(
188
- len(str(num)) == digit_mapping.SEVEN_DIGIT_PREFIX_LENGTH
189
- for num in next_numbers
190
- ):
191
- break
192
- # Collect all 7-digit numbers and return them sorted
193
- seven_digit_numbers = sorted(
194
- num
195
- for num in all_numbers
196
- if len(str(num)) == digit_mapping.SEVEN_DIGIT_PREFIX_LENGTH
197
- )
198
- return seven_digit_numbers
199
-
200
- def __apply_filter(self, numbers, index, condition):
201
- return (
202
- [num for num in numbers if condition(num)] if len(numbers) >= 2 else numbers
203
- )
204
-
205
- def __generate_valid_numbers(self, start_num):
206
- numbers_list = self.__find_seven_chain_numbers(start_num)
207
-
208
- # If the number list length matches the valid criteria, return the first number
209
- if len(numbers_list) == digit_mapping.GENERATIVE_VALID_NUMBER_COUNT:
210
- return numbers_list[0] if numbers_list else start_num
211
-
212
- start_num_str = str(start_num)
213
- unique_count = len(set(start_num_str))
214
- # Step 1: Filter numbers with the same or one less unique digit count
215
- filtered_numbers = [
216
- num
217
- for num in numbers_list
218
- if len(set(str(num))) in {unique_count, unique_count - 1}
219
- ]
220
- # Apply additional filters only if there are at least 2 candidates remaining
221
- # Step 2: Filter by last digit
222
- filtered_numbers = self.__apply_filter(
223
- filtered_numbers, -1, lambda num: str(num)[-1] == start_num_str[-1]
224
- )
225
- # Step 3: Filter by second digit
226
- filtered_numbers = self.__apply_filter(
227
- filtered_numbers, 1, lambda num: str(num)[1] == start_num_str[1]
228
- )
229
- # Step 4: Filter by last 4 digits
230
- filtered_numbers = self.__apply_filter(
231
- filtered_numbers, -4, lambda num: str(num)[-4:] == start_num_str[-4:]
232
- )
233
-
234
- # Return the first valid number or fallback
235
- return (
236
- filtered_numbers[0]
237
- if filtered_numbers
238
- else (numbers_list[0] if numbers_list else start_num)
239
- )
240
-
241
- def __check_area_code(self, input_text):
242
- """
243
- Processes the input Persian text to ensure it has a valid area
244
- code and valid number format.
245
- """
246
- # Remove all non-digit characters from the input
247
- digits_only = re.sub(r"\D", "", input_text)
248
-
249
- # Ensure there are enough digits to process
250
- if len(digits_only) <= digit_mapping.PICK_FIRST_OR_LAST_LENGTH_NUMBER:
251
- return digits_only
252
-
253
- # Extract the area code (first 4 digits) and the remaining number
254
- area_code = digits_only[: digit_mapping.PICK_FIRST_OR_LAST_LENGTH_NUMBER]
255
- number_part = digits_only[digit_mapping.PICK_FIRST_OR_LAST_LENGTH_NUMBER :]
256
-
257
- # Generate valid number formats based on the remaining part
258
- valid_number = self.__generate_valid_numbers(number_part)
259
- # Return the formatted result
260
- if not valid_number:
261
- return digits_only
262
- return f"{area_code}{valid_number}"
263
-
264
- def __insert_repeated_number(self, number_str):
265
- """
266
- Finds the longest repeated sequence in the string and inserts one more instance of the repeated number
267
- to extend the sequence, without using explicit `for` loops.
268
- """
269
- # Group consecutive identical characters and find the longest group
270
- grouped = [
271
- (char, len(list(group))) for char, group in itertools.groupby(number_str)
272
- ]
273
-
274
- # Find the max repeated group
275
- max_repeated_char, max_repeated_len = max(grouped, key=lambda x: x[1])
276
-
277
- # Only extend the sequence if the maximum repeated length is greater than 1
278
- if max_repeated_len > 1:
279
- # Find the start index of the sequence
280
- start_index = number_str.find(max_repeated_char * max_repeated_len)
281
-
282
- # Extend the sequence by one character and return the modified string
283
- extended_sequence = max_repeated_char * (max_repeated_len + 1)
284
- return (
285
- number_str[:start_index]
286
- + extended_sequence
287
- + number_str[start_index + max_repeated_len :]
288
- )
289
-
290
- return number_str
291
-
292
- def __add_single_repeating_digit_between_repeats(self, number_str):
293
- # If the input string is already longer than the partial phone number length, return it as is
294
- if len(number_str) > digit_mapping.PHONE_NUMBER_LENGTH_PARTIAL:
295
- return number_str
296
-
297
- # Split the string into two parts: the prefix (e.g., area code) and the remainder
298
- prefix = number_str[: digit_mapping.PICK_FIRST_OR_LAST_LENGTH_NUMBER]
299
- number_str = number_str[digit_mapping.PICK_FIRST_OR_LAST_LENGTH_NUMBER :]
300
-
301
- result = []
302
- i = 0
303
- repeat_count = 1 # Track the count of consecutive repeating digits
304
-
305
- while i < len(number_str):
306
- # Check if the next digit is the same as the current one
307
- if i > 0 and number_str[i] == number_str[i - 1]:
308
- repeat_count += 1
309
- else:
310
- repeat_count = 1 # Reset the repeat count if the digit changes
311
-
312
- # If we have a sequence of three or more repeating digits
313
- if repeat_count == digit_mapping.FIND_MIN_REPEAT_NUM:
314
- # Add a single extra repeating digit and then reset the counter
315
- result.append(number_str[i])
316
- repeat_count = 1
317
-
318
- # Add the current digit to the result
319
- result.append(number_str[i])
320
- i += 1
321
-
322
- # Concatenate the prefix with the processed result and return the final string
323
- return prefix + "".join(result)
324
-
325
- def __process_phone_number(self, phone_number):
326
- """
327
- Process the Persian phone number string by adding a leading zero if missing,
328
- removing spaces, and checking the area code.
329
- """
330
- # Add leading zero to the area code if it's missing
331
- if phone_number[0] != "0":
332
- phone_number = "0" + phone_number
333
-
334
- # Remove any spaces from the phone number
335
- phone_number = "".join(phone_number.split())
336
-
337
- # If the phone number is 10 digits or fewer, validate the area code
338
- if len(phone_number) <= digit_mapping.PHONE_NUMBER_LENGTH_PARTIAL:
339
- validated_number = self.__check_area_code(phone_number)
340
- return validated_number, phone_number
341
-
342
- # If the phone number is longer than expected, return it as-is
343
- return phone_number, phone_number
344
-
345
- def __clean_and_concatenate_numbers(self, text):
346
- """
347
- Process the input text by adding spaces around specific constructions,
348
- removing unnecessary spaces, handling special constructions, and concatenating numbers.
349
- """
350
- # Add spaces around the Persian word "تا"
351
- text = self.__add_spaces_around_to(text)
352
- # Remove spaces between numbers
353
- text = self.__remove_spaces_between_numbers(text)
354
- """ Handle special constructions like replacing "تا"
355
- with corresponding numbers and removing it"""
356
- text = self.__process_to_constructions(text)
357
- # Concatenate digits without any spaces between them
358
- text = re.sub(r"(\d)\s+(\d)", r"\1\2", text)
359
-
360
- return text
361
-
362
- def __english_to_persian(self, text):
363
- farsi_to_latin = str.maketrans(
364
- digit_mapping.WESTERN_DIGITS, digit_mapping.PERSIAN_DIGITS
365
- )
366
- return text.translate(farsi_to_latin)
367
-
368
- def __persian_to_western(self, persian_number):
369
- translation_table = str.maketrans(
370
- digit_mapping.PERSIAN_DIGITS, digit_mapping.WESTERN_DIGITS
371
- )
372
- return persian_number.translate(translation_table)
373
-
374
- def __remove_spaces_between_numbers(self, text):
375
- # This regex will match spaces that are between two digits
376
- return re.sub(r"(\d)\s+(\d)", r"\1\2", text)
377
-
378
- def __process_patterned_numbers(self, number_sequence):
379
- if number_sequence is None:
380
- return number_sequence
381
- # Convert Persian numbers to Western (English) numbers
382
- western_number_sequence = self.__persian_to_western(number_sequence)
383
-
384
- # Remove spaces between numbers to form a continuous sequence
385
- contiguous_numbers = self.__remove_spaces_between_numbers(western_number_sequence)
386
-
387
- # Clean up any remaining spaces
388
- clean_number_sequence = "".join(contiguous_numbers.split())
389
-
390
- # Clean and concatenate numbers to form the phone number
391
- concatenated_numbers = self.__clean_and_concatenate_numbers(clean_number_sequence)
392
-
393
- # Add single repeating digit between repeats in the phone number
394
- processed_sequence = self.__add_single_repeating_digit_between_repeats(
395
- concatenated_numbers
396
- )
397
-
398
- # Process and validate the phone number
399
- final_number, original_sequence = self.__process_phone_number(processed_sequence)
400
-
401
- # Limit the number of iterations to avoid infinite loops
402
-
403
- iterations = 0
404
-
405
- # Loop to handle sequences of length 8 to 10 digits
406
- while (
407
- digit_mapping.MIN_NUMBER_FOR_CHECK
408
- <= len(str(final_number))
409
- <= digit_mapping.PHONE_NUMBER_LENGTH_PARTIAL
410
- and iterations < digit_mapping.MAX_ITERATIONS_CHECK
411
- ):
412
- if len(final_number) == 0:
413
- modified_sequence = self.__insert_repeated_number(str(original_sequence))
414
- else:
415
- modified_sequence = self.__insert_repeated_number(str(final_number))
416
-
417
- # Re-process the phone number
418
- final_number, original_sequence = self.__process_phone_number(
419
- modified_sequence
420
- )
421
-
422
- iterations += 1
423
- # Handle case when final_number is a list
424
- if isinstance(final_number, list):
425
- final_number = final_number[0]
426
-
427
- return final_number
428
-
429
- def __update_text_with_number(self, text, new_number, old_prefix, new_prefix):
430
- """
431
- Update the text by replacing the old prefix with the new prefix and converting it to Persian.
432
- """
433
- processed_text = self.__process_patterned_numbers(new_number)
434
- persian_text = self.__english_to_persian(processed_text)
435
- text = re.sub(old_prefix, new_prefix, text, count=1)
436
- return re.sub(re.escape(new_number), persian_text, text)
437
-
438
- def __update_general_case(self, text, number):
439
- """
440
- Handle general number replacement cases and update the text.
441
- """
442
- processed_text = self.__process_patterned_numbers(number)
443
- persian_text = self.__english_to_persian(processed_text)
444
- return re.sub(re.escape(number), persian_text, text)
445
-
446
- def __process_number_replacement(self, text, number):
447
- """
448
- Process specific number patterns like '۰۹۹' and '۹۹' and replace them with correct forms.
449
- """
450
- first_digits = number[:3]
451
-
452
- if first_digits == "۰۹۹":
453
- new_number = number.replace("۰۹۹", "۰۹۹۹", 1)
454
- return self.__update_text_with_number(text, new_number, "۰۹۹", "۰۹۹۹")
455
-
456
- return self.__update_general_case(text, number)
457
-
458
- @staticmethod
459
- def normalize(text):
460
- """
461
- Normalize phone numbers in the input Persian text by handling specific patterns
462
- and replacing them with the correct numeric forms.
463
- """
464
-
465
- # Compile regex pattern to match number patterns
466
- pattern_regex = re.compile(
467
- r"((0?[1-9][0-9]{1,3})|(۰?[۱-۹][۰-۹]{1,3}))[\s۰-۹0-9تا]*"
468
- )
469
-
470
- # Clean up text by removing extra spaces
471
- cleaned_text = re.sub(r"\s{2,}", " ", text)
472
-
473
- processor = PhoneNumberProcessor()
474
-
475
- # Replace Persian number words with numeric equivalents
476
- cleaned_text = processor.__replace_number_words(cleaned_text)
477
-
478
- # Find all matching number patterns
479
- matches = pattern_regex.finditer(cleaned_text)
480
- results = [
481
- match.group()
482
- for match in matches
483
- if digit_mapping.MIN_CHAR_CHECK
484
- <= len(match.group())
485
- <= digit_mapping.MAX_CHAR_CHECK
486
- ]
487
-
488
- # Process each match to handle specific cases
489
- for result in results:
490
- digits = re.findall(r"\d", result)
491
- if (
492
- digit_mapping.MIN_NUMBER_FOR_GENERATE
493
- <= len(digits)
494
- <= digit_mapping.PHONE_NUMBER_LENGTH_PARTIAL
495
- ):
496
- cleaned_text = processor.__process_number_replacement(cleaned_text, result)
497
-
498
- return cleaned_text if results else text
File without changes