idvpackage 3.0.10__py3-none-any.whl → 3.0.12__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,992 +0,0 @@
1
- import cv2
2
- import numpy as np
3
- from google.cloud import vision_v1
4
- from googletrans import Translator
5
- import re
6
- from idvpackage.common import *
7
- import io
8
- import os
9
- from PIL import Image
10
- from deep_translator import GoogleTranslator
11
- import imghdr
12
- import tempfile
13
-
14
- translator = Translator()
15
-
16
- def crop_second_part(img):
17
- width, height = img.size
18
- half_width = width // 2
19
- second_part = img.crop((half_width, 0, width, height))
20
- return second_part
21
-
22
-
23
- def crop_third_part(img):
24
- width, height = img.size
25
- part_height = height // 3
26
- third_part = img.crop((0, 2 * part_height, width, height))
27
- return third_part
28
-
29
-
30
- def extract_text_from_image_data(client, image):
31
- """Detects text in the file."""
32
-
33
- with io.BytesIO() as output:
34
- image.save(output, format="PNG")
35
- content = output.getvalue()
36
-
37
- image = vision_v1.types.Image(content=content)
38
-
39
- response = client.text_detection(image=image)
40
- texts = response.text_annotations
41
-
42
- return texts[0].description
43
-
44
-
45
- def detect_image_format(image_data):
46
- image_format = imghdr.what(None, image_data)
47
- return image_format
48
-
49
-
50
- def create_temporary_file(image_data, image_format):
51
- with tempfile.NamedTemporaryFile(suffix='.' + image_format, delete=False) as temp_file:
52
- temp_file.write(image_data)
53
- temp_file_path = temp_file.name
54
-
55
- return temp_file_path
56
-
57
- def detect_image_format(pil_img):
58
- image_format = pil_img.format.lower() if pil_img.format else 'jpg' # Default to 'jpg' if format is not recognized
59
-
60
- open_cv_image = np.array(pil_img)
61
- open_cv_image = open_cv_image[:, :, ::-1].copy()
62
-
63
- with tempfile.NamedTemporaryFile(suffix='.' + image_format, delete=False) as temp_file:
64
- cv2.imwrite(temp_file.name, open_cv_image)
65
- temp_file_path = temp_file.name
66
-
67
- return temp_file_path
68
-
69
- def extract_family_number(arabic_text):
70
- # Attempt different patterns to handle different exceptional cases
71
-
72
- # Pattern 1: Generalized pattern that covers most cases (like the first and third one)
73
- pattern_1 = r'(\d{4,6})?\s*(?:الرقم|العائلي|زماره|زمارة|ژماردی|ژماره|از)\s*(?:العاملي|العائلي|خيزانى|خبرائی|خيرالي|خیزانی|خیزاني|خیزانی)?\s*[::]?\s*([A-Za-z0-9]+)'
74
- family_number_match = re.search(pattern_1, arabic_text)
75
-
76
- if family_number_match:
77
- part1 = family_number_match.group(1) if family_number_match.group(1) else ''
78
- part2 = family_number_match.group(2)
79
- dict_1 = {"family_number": part1 + part2}
80
- return dict_1
81
-
82
- # Pattern 2: Handles family numbers directly after the family keyword, with no leading digits
83
- pattern_2 = r'(?:الرقم العائلي|ژماردی خیزانی|ژماره ی خیزانی|العائلى از|العائلي)\s*[::]?\s*([A-Za-z0-9]+)'
84
- family_number_match = re.search(pattern_2, arabic_text)
85
-
86
- if family_number_match:
87
- dict_1 = {"family_number": family_number_match.group(1)}
88
- return dict_1
89
-
90
- # Pattern 3: Handles numbers appearing on a new line, separated from family labels
91
- pattern_3 = r'(?:الرقم العائلي|ژماردی خیزانی|العائلي|العائلى از)\s*[::]?\s*\n*([A-Za-z0-9]+)'
92
- family_number_match = re.search(pattern_3, arabic_text)
93
-
94
- if family_number_match:
95
- dict_1 = {"family_number": family_number_match.group(1)}
96
- return dict_1
97
-
98
- # Pattern 4: Specific case handling for labels ending without a colon
99
- pattern_4 = r'(?:الرقم العائلي|ژماردی خیزانی|العائلي|العائلى از)\s*\n*([A-Za-z0-9]+)'
100
- family_number_match = re.search(pattern_4, arabic_text)
101
-
102
- if family_number_match:
103
- dict_1 = {"family_number": family_number_match.group(1)}
104
- return dict_1
105
-
106
- # Pattern 5: Handles family numbers followed by extra symbols or unusual formatting (2nd case)
107
- pattern_5 = r'(\d{4,6}[A-Za-z0-9]+)\s*[:ˋˋˋˋˋˋˋˋ]'
108
- family_number_match = re.search(pattern_5, arabic_text)
109
-
110
- if family_number_match:
111
- dict_1 = {"family_number": family_number_match.group(1)}
112
- return dict_1
113
-
114
- # Pattern 6: Handles family numbers without clear delimiter or labels ending without a colon (4th case)
115
- pattern_6 = r'(\d{4,6}[A-Za-z0-9]+)\s*[:P]?\s*'
116
- family_number_match = re.search(pattern_6, arabic_text)
117
-
118
- if family_number_match:
119
- dict_1 = {"family_number": family_number_match.group(1)}
120
- return dict_1
121
-
122
- # Return None if no patterns match
123
- return {"family_number": None}
124
-
125
-
126
-
127
-
128
- def extract_mother_surname(text):
129
- # Initialize variables to avoid UnboundLocalError
130
- cleaned_text = None
131
-
132
- pattern_mother_name = r"(?:الأم|دايك|اديك|دایك)\s*[::]?\s*(\S+)?(?:\n.*?)*(?:الجد|باپير|بابير|بايير|باپیر)\s*[::]?\s*([^\n]*)"
133
- match = re.search(pattern_mother_name, text, re.DOTALL)
134
-
135
- if match:
136
- mother_name = match.group(1).strip() if match.group(1) else "Not Available"
137
- grandfather_name = match.group(2).strip() if match.group(2) else ""
138
-
139
- pattern = r"[/:\s]*(بابير|ابير|باپير)[:/\s]*" #r"[/:\s]*(بابير|ابير)[:/\s]*"
140
- cleaned_text = re.sub(pattern, "", grandfather_name).strip()
141
-
142
-
143
- if not cleaned_text:
144
- cleaned_text = None
145
-
146
- # If no match or cleaned_text is empty, return None for mother_last_name
147
- if cleaned_text is None:
148
- return {"mother_last_name": None}
149
-
150
- return {"mother_last_name": cleaned_text}
151
-
152
-
153
-
154
-
155
- def extract_mother_name(text):
156
-
157
- pattern_mother_name = r"(?:الأم|دايك|دایك)\s*[::]?\s*(?:[\n\s]*(\d{4}-\d{2}-\d{2}))?\s*([\u0621-\u064A\s]+)"
158
-
159
- matches = re.finditer(pattern_mother_name, text)
160
-
161
-
162
- mother_names = []
163
- eng_name = []
164
- for match in matches:
165
- mother_name = match.group(2).strip()
166
- if mother_name:
167
-
168
- cleaned_name = re.sub(r"(الجد|الام|با|بابير|فصيل|الدم)", "", mother_name).strip()
169
- if cleaned_name:
170
- parts = cleaned_name.split('\n')
171
- name_ = parts[0]
172
- try:
173
- dict_1 = {"mother_first_name": name_}
174
- except Exception as e:
175
- name_ = None
176
- dict_1 = {"mother_first_name": name_}
177
-
178
-
179
-
180
- return dict_1
181
-
182
- def extract_paternal_grandfather_name(text):
183
- pattern_paternal_grandfather = r"(?:الجد|باپير|بابير|بايير|باپیر)\s*[::]?\s*([^\n/:]*)"
184
-
185
- matches = re.findall(pattern_paternal_grandfather, text)
186
- grandfather_names = [match.strip() for match in matches if match.strip()]
187
- try:
188
- dict_1 = {"third_name":grandfather_names[0]}
189
- except Exception as e:
190
- dict_1 = {'third_name': None}
191
- print("error:", e)
192
-
193
- return dict_1
194
-
195
- def update_family_number_cases(dictt, text_back):
196
- try:
197
- if len(dictt["family_number"]) < 11:
198
- pattern = r'(?:الرقم العائلي|رقم العائلة)\s*/?\s*(?:زمارهی خیزانی)?\s*:\s*([\dA-Z\s]+)'
199
- #pattern = r'(?:الرقم العائلي|رقم العائلة)\s*/?\s*(?:زمارهی خیزانی)?\s*:\s*(\d+\s+\d+[A-Z]\d+B)'
200
- match = re.search(pattern, text_back)
201
- if match:
202
- family_number = match.group(1)
203
- list_modify = family_number.split(" ")
204
- last_value = list_modify[1]+list_modify[0]
205
- dictt['family_number'] = last_value
206
-
207
- else:
208
- print("Family number not found.")
209
- else:
210
- print("value greater than 11")
211
- except Exception as e:
212
- print("None!")
213
- return dictt
214
-
215
- def detect_id_card(client, image_data, id_texts, image_format, part=None, country=None):
216
-
217
- if id_texts:
218
- id_text = id_texts[0]
219
- vertices = id_text.bounding_poly.vertices
220
- left = vertices[0].x
221
- top = vertices[0].y
222
- right = vertices[2].x
223
- bottom = vertices[2].y
224
-
225
- padding = 30
226
- padded_left = max(0, left - padding)
227
- padded_top = max(0, top - padding)
228
- padded_right = right + padding
229
- padded_bottom = bottom + padding
230
-
231
- with Image.open(io.BytesIO(image_data)) as img:
232
- id_card = img.crop((padded_left, padded_top, padded_right, padded_bottom))
233
-
234
- temp_dir = tempfile.mkdtemp()
235
- id_card_path = os.path.join(temp_dir, f"cropped_img_original.{image_format}")
236
- id_card.save(id_card_path)
237
-
238
- width, height = id_card.size
239
- if width < height:
240
- id_card = id_card.rotate(90, expand=True)
241
-
242
- # Read the temporary image with OpenCV for further processing
243
- # if id_card_path:
244
- # orig_img = cv2.imread(id_card_path)
245
- # else:
246
- # orig_img = None
247
-
248
- # tampered_result = calculate_error_difference(orig_img, country)
249
- tampered_result = 'clear'
250
-
251
- if os.path.exists(id_card_path):
252
- os.remove(id_card_path)
253
-
254
- if part:
255
- if part=='second':
256
- part_img = crop_second_part(id_card)
257
-
258
- if part=='third':
259
- part_img = crop_third_part(id_card)
260
-
261
- # 2nd call to vision AI
262
- try:
263
- part_text = extract_text_from_image_data(client, part_img)
264
- except:
265
- part_text = id_texts[0].description
266
-
267
- return id_card, part_img, part_text, tampered_result
268
- else:
269
- return id_card, tampered_result
270
- else:
271
- print('No text found in the image.')
272
-
273
-
274
- def extract_name_fields_from_raw(text):
275
- try:
276
- generic_field_match_pattern = r':\s*([^:\n]*)'
277
-
278
- generic_field_matches = re.findall(generic_field_match_pattern, text)
279
- generic_fields_result = []
280
- for item in generic_field_matches:
281
- no_digits = ''.join([char for char in item if not char.isdigit()])
282
- if no_digits.strip():
283
- generic_fields_result.append(no_digits)
284
-
285
- generic_fields_result = [word for word in generic_fields_result if len(word.split()) <= 3 and '/' not in word and '|' not in word]
286
-
287
- # print(f"DATA LIST: {generic_fields_result}")
288
-
289
- if len(generic_fields_result[0].split()) <= 2 and len(generic_fields_result) <= 8:
290
- given_name = generic_fields_result[0]
291
- else:
292
- given_name = ''
293
-
294
- if len(generic_fields_result[1].split()) <= 2:
295
- fathers_name = generic_fields_result[1]
296
- else:
297
- fathers_name = ''
298
-
299
- if len(generic_fields_result[3].split()) <= 2:
300
- surname = generic_fields_result[3].replace("الأم", "").replace("دايك", "").replace("مديرية ال", "").replace("/", "").replace("البطاقة الو", "").replace("ذكر", "").replace("الام", "").replace("]", "")
301
- else:
302
- surname = ''
303
-
304
- try:
305
- gender = ''
306
- gender_ar = generic_fields_result[-2]
307
- try:
308
- gender = translator.translate(gender_ar, src='ar', dest='en').text
309
- except:
310
- gender = GoogleTranslator('ar', 'en').translate(gender_ar)
311
-
312
- if str(gender).lower() == 'feminine':
313
- gender = 'female'
314
-
315
- if gender.lower() != 'male' or gender.lower() != 'female':
316
- if 'ذكر' in generic_fields_result:
317
- gender = 'male'
318
- elif 'انثى' in generic_fields_result:
319
- gender = 'female'
320
- else:
321
- gender = ''
322
- except:
323
- gender_ar, gender = '', None
324
-
325
- name = f"{given_name} {fathers_name} {surname}"
326
-
327
- first_name_en, fathers_name_en, last_name_en = '', '', ''
328
-
329
- if name:
330
- name_en = GoogleTranslator('ar', 'en').translate(name).upper()
331
- if name_en:
332
- name_list = name_en.split(' ')
333
- if len(name_list) >=2:
334
- first_name_en = name_list[0]
335
- fathers_name_en = name_list[1]
336
- last_name_en = name_list[-1]
337
-
338
- #For edge cases where surnames have prefixes like Al- Bin- Abu-.
339
- if last_name_en.startswith('-'):
340
- last_name_en = name_list[-2]+name_list[-1]
341
-
342
-
343
- # name_en = translator.translate(name, src='ar', dest='en').text.upper()
344
-
345
- names_data = {
346
- "gender": gender,
347
- "gender_ar": gender_ar,
348
- "name": name,
349
- "first_name": given_name,
350
- "father_name": fathers_name,
351
- "last_name": surname,
352
- "first_name_en": first_name_en,
353
- "father_name_en": fathers_name_en,
354
- "last_name_en": last_name_en,
355
- "name_en": name_en,
356
- }
357
-
358
- return names_data
359
-
360
- except:
361
- return {}
362
-
363
-
364
- def identify_front(text):
365
- front_id_keywords = ["The Republic of Iraq", "The Ministry of Interior", "National Card"]
366
- pattern = '|'.join(map(re.escape, front_id_keywords))
367
-
368
- try:
369
- if re.search(pattern, text, re.IGNORECASE):
370
- return True
371
- else:
372
- return False
373
- except:
374
- return 'error'
375
-
376
- def extract_numeric_fields_from_raw(ar_front_data, front_data):
377
- # try:
378
- # front_data = translator.translate(ar_front_data, src='ar', dest='en').text
379
- # except:
380
- # front_data = GoogleTranslator('ar', 'en').translate(ar_front_data)
381
-
382
- gender_pattern = r"Sex.*?:\s*(\w+)"
383
- id_number_pattern = r"\b\d{12}\b"
384
- rfid_number_pattern = r"\b[A-Za-z]{2}\d{7}\b|\b[A-Za-z]\d{8}\b"
385
-
386
- gender_match = re.search(gender_pattern, front_data, re.IGNORECASE)
387
- if gender_match:
388
- gender = gender_match.group(1)
389
- else:
390
- gender = ''
391
-
392
- id_number_match = re.search(id_number_pattern, front_data.replace(" ",""), re.IGNORECASE)
393
- if id_number_match:
394
- id_number = id_number_match.group(0)
395
- else:
396
- try:
397
- id_number_match = re.search(id_number_pattern, ar_front_data.replace(" ",""), re.IGNORECASE)
398
- id_number = id_number_match.group(0)
399
- except:
400
- id_number = ''
401
-
402
- rfid_number_match = re.search(rfid_number_pattern, ar_front_data.replace(" ","").replace(":","").replace("$", "S"), re.IGNORECASE)
403
- if rfid_number_match:
404
- rfid_number = rfid_number_match.group(0).upper()
405
- else:
406
- rfid_number = ''
407
-
408
- front_data = {
409
- "gender": gender,
410
- "id_number_front": id_number,
411
- "card_number_front": rfid_number
412
- }
413
-
414
- return front_data
415
-
416
-
417
- def iraq_front_id_extraction(client, image_data, front_id_text, front_id_text_description, front_translated_data, image_format):
418
- front_data_final = {
419
- 'first_name': '',
420
- 'last_name': '',
421
- 'name': ''
422
- }
423
-
424
- cropped_id_card, second_part, second_part_text, tampered_result_front = detect_id_card(client, image_data, front_id_text, image_format, part='second')
425
- front_data = extract_name_fields_from_raw(second_part_text)
426
- numeric_fields = extract_numeric_fields_from_raw(front_id_text_description, front_translated_data)
427
-
428
- if not front_data:
429
- front_data = extract_name_fields_from_raw(front_id_text_description)
430
-
431
- gender_front_data = front_data.get('gender', '')
432
- gender_numeric_fields = numeric_fields.get('gender', '')
433
-
434
- gender = ''
435
- if gender_front_data and (gender_front_data.lower()=='male' or gender_front_data.lower()=='female'):
436
- gender = gender_front_data
437
- elif gender_numeric_fields and (gender_numeric_fields.lower()=='male' or gender_numeric_fields.lower()=='female'):
438
- gender = gender_numeric_fields
439
-
440
- front_data.update(numeric_fields)
441
- front_data['gender'] = gender
442
- if gender:
443
- try:
444
- front_data['gender_ar'] = translator.translate(gender, src='en', dest='ar').text
445
- except:
446
- front_data['gender_ar'] = GoogleTranslator('en', 'ar').translate(gender)
447
-
448
- front_data['front_tampered_result'] = tampered_result_front
449
-
450
- front_data_final.update(front_data)
451
-
452
- if "gender" in front_data_final:
453
- gender = front_data_final["gender"].strip().upper()
454
- if gender == "F":
455
- front_data_final["gender"] = "FEMALE"
456
- elif gender == "M":
457
- front_data_final["gender"] = "MALE"
458
-
459
- if 'gender' in front_data_final:
460
- front_data_final["gender"] = front_data_final["gender"].strip().upper()
461
-
462
- return front_data_final
463
-
464
-
465
- def find_mrz2_from_original(back_data):
466
- mrz2 = re.search(r'\b\d{6,}.*?<{2,}|\b\d{6,}.*?く{2,}', back_data, re.MULTILINE)
467
- return mrz2.group(0) if mrz2 else None
468
-
469
-
470
- def handle_name_extraction(third_part_text):
471
- mrz_pattern = r'(IDIRQ[\S].*\n*.*\n*.*\n*.*|IDIRQ[\S].*\n*.*\n*.*\n*.*)'
472
-
473
- try:
474
- mrz = re.findall(mrz_pattern, third_part_text.replace(" ","").strip(), re.MULTILINE)
475
- mrz_str = mrz[0].replace(" ", "")
476
- mrz3 = re.search(r'[\n](?:[a-zA-Z<]{6,})', mrz_str)
477
- mrz3 = mrz3.group(0).replace("\n","") if mrz3 else None
478
-
479
- first_name, last_name = '', ''
480
- if mrz3:
481
- name_list = re.findall(r'\b[^<\s]+\b', mrz3)
482
-
483
- if name_list:
484
- if len(name_list)>1:
485
- first_name = name_list[1].upper().replace("X", "")
486
- last_name = name_list[0].upper().replace("X", "")
487
- else:
488
- first_name = name_list[0].upper().replace("X", "")
489
-
490
- return {
491
- "first_name_back": first_name,
492
- "last_name_back": last_name,
493
- }
494
- else:
495
- return {}
496
-
497
- except:
498
- return {}
499
-
500
-
501
- def handle_mrz_extraction(third_part_text, back_data, back_data_dict):
502
- mrz_pattern = r'(IDIRQ[\S].*\n*.*\n*.*\n*.*|IDIRC[\S].*\n*.*\n*.*\n*.*)'
503
- mrz1_data_pattern = r'(IDIRQ([\S]{2}\d{7}|[\S]\d{8}).*?(\d{13})|IDIRC([\S]{2}\d{7}|[\S]\d{8}).*?(\d{13}))'
504
-
505
- try:
506
- mrz = re.findall(mrz_pattern, third_part_text.replace(" ","").strip(), re.MULTILINE)
507
- mrz_str = mrz[0].replace(" ", "")
508
- except:
509
- mrz_str = ''
510
-
511
- mrz1 = re.search(r'(IDIRQ.*?<{2,}|IDIRC.*?<{2,})', mrz_str, re.DOTALL)
512
- mrz1 = mrz1.group(1) if mrz1 else None
513
-
514
- mrz2 = re.search(r'\b\d{6,}.*?<{2,}', mrz_str, re.MULTILINE)
515
- mrz2 = mrz2.group(0) if mrz2 else None
516
-
517
- mrz3 = re.search(r'[\n](?:[a-zA-Z<]{6,})', mrz_str)
518
- mrz3 = mrz3.group(0).replace("\n","") if mrz3 else None
519
-
520
- rfid_number = ''
521
- id_number = ''
522
-
523
- mrz1_data_match = re.search(mrz1_data_pattern, mrz_str)
524
- if mrz1_data_match:
525
- rfid_number = mrz1_data_match.group(1)
526
- id_number = mrz1_data_match.group(2)
527
-
528
- rfid_number = rfid_number.upper()
529
- id_number = id_number[1:14]
530
-
531
- try:
532
- pattern = r'(?<=[\S]\d{7})[A-Z]{3}'
533
-
534
- national = re.search(pattern, mrz[0].replace(" ", ""))
535
- if national:
536
- nationality = national.group()
537
- else:
538
- national2 = re.search(pattern, mrz[0].replace(" ", "").replace("\n", ""))
539
- if national2:
540
- nationality = national2.group()
541
- else:
542
- nationality = ''
543
- except:
544
- nationality = ''
545
-
546
- try:
547
- dob_pattern = r'(\d+)[MF]'
548
- dob_match = re.search(dob_pattern, mrz2)
549
- dob_mrz = convert_dob(dob_match.group(1)) if dob_match else ''
550
-
551
- doe_pattern = r'[MF](\d+)'
552
- doe_match = re.search(doe_pattern, mrz2)
553
- expiry_date_mrz = convert_expiry_date(doe_match.group(1)) if doe_match else ''
554
- except:
555
- dob_mrz, expiry_date_mrz = '', ''
556
-
557
- if back_data_dict.get('id_number'):
558
- id_number = back_data_dict['id_number']
559
-
560
- if back_data_dict.get('card_number'):
561
- rfid_number = back_data_dict['card_number']
562
-
563
- if back_data_dict.get('mrz1'):
564
- mrz1 = back_data_dict['mrz1']
565
-
566
- back_data_new = {
567
- "id_number": id_number,
568
- "card_number": rfid_number,
569
- "nationality": nationality,
570
- "mrz": [mrz_str],
571
- "mrz1": mrz1,
572
- "mrz2": mrz2,
573
- "mrz3": mrz3,
574
- "dob_mrz": dob_mrz,
575
- "expiry_date_mrz": expiry_date_mrz
576
- }
577
-
578
- ## HANDLING EDGE CASES FOR ID NUMBER AND CARD NUMBER
579
- if not (back_data_new.get('id_number') or back_data_new.get('card_number')):
580
- mrz_pattern = r'(IDI[\S]{2}.*\n*.*\n*.*\n*.*|IDIRQ[\S].*\n*.*\n*.*\n*.*|IDIRC[\S].*\n*.*\n*.*\n*.*)'
581
- mrz1_data_pattern = r'IDI[\S]{2}([\S]{2}\d{7}|[\S]\d{8}).*?(\d{13})'
582
-
583
- try:
584
- mrz = re.findall(mrz_pattern, back_data.replace(" ","").strip(), re.MULTILINE)
585
- mrz_str = mrz[0].replace(" ", "")
586
- except:
587
- mrz_str = ''
588
-
589
- back_data_new['mrz'] = [mrz_str]
590
-
591
- mrz1 = re.search(r'(IDI[\S]{2}.*?<{2,})', mrz_str, re.DOTALL)
592
- mrz1 = mrz1.group(1) if mrz1 else None
593
- back_data_new['mrz1'] = mrz1
594
-
595
- mrz2 = re.search(r'\b\d{7}.*?(?:<<\d|<<\n)', mrz_str)
596
- mrz2 = mrz2.group(0) if mrz2 else None
597
- back_data_new['mrz2'] = mrz2
598
-
599
- mrz3 = re.search(r'[\n](?:[a-zA-Z<]{6,})', mrz_str)
600
- mrz3 = mrz3.group(0).replace("\n","") if mrz3 else None
601
- back_data_new['mrz3'] = mrz3
602
-
603
- rfid_number = ''
604
- id_number = ''
605
-
606
- mrz1_data_match = re.search(mrz1_data_pattern, mrz_str)
607
- if mrz1_data_match:
608
- rfid_number = mrz1_data_match.group(1)
609
- id_number = mrz1_data_match.group(2)
610
-
611
- rfid_number = rfid_number.upper()
612
- id_number = id_number[1:14]
613
- back_data_new['id_number'] = id_number
614
- back_data_new['card_number'] = rfid_number
615
-
616
- ## HANDLE DOB AND DOE FROM MRZ
617
- if not (back_data_new.get('dob_mrz') or back_data_new.get('expiry_date_mrz')):
618
- if not mrz2:
619
- mrz2 = re.search(r'\b\d{6,}.*?<{2,}|\b\d{6,}.*?く{2,}', mrz_str, re.MULTILINE)
620
- mrz2 = mrz2.group(0) if mrz2 else find_mrz2_from_original(back_data.replace(" ","").strip())
621
-
622
- if mrz2:
623
- dob_pattern = r"(\d{7})[MF]"
624
- dob_match = re.search(dob_pattern, mrz2)
625
- if dob_match:
626
- dob = dob_match.group(1)
627
- back_data_new['dob_mrz'] = convert_dob(dob)
628
- else:
629
- dob_pattern = r'(\d{12,})[\S]R[\S]\b'
630
- dob_match = re.search(dob_pattern, mrz2)
631
-
632
- if dob_match:
633
- dob = dob_match.group(1)[:7]
634
- back_data_new['dob_mrz'] = convert_dob(dob)
635
-
636
- doe_pattern = r"[MF](\d+)"
637
- doe_match = re.search(doe_pattern, mrz2)
638
- if doe_match:
639
- expiry = doe_match.group(1)
640
- back_data_new['expiry_date_mrz'] = convert_expiry_date(expiry)
641
- else:
642
- doe_pattern = r'(\d{12,})[\S]R[\S]\b'
643
- doe_match = re.search(doe_pattern, mrz2)
644
-
645
- if doe_match:
646
- expiry = doe_match.group(1)[8:]
647
- if len(expiry)<7:
648
- expiry = doe_match.group(1)[7:]
649
- back_data_new['expiry_date_mrz'] = convert_expiry_date(expiry)
650
-
651
- if not back_data_new.get('nationality'):
652
- mrz_pattern = r'(IDI[\S]{2}.*\n*.*\n*.*\n*.*|IDIRQ[\S].*\n*.*\n*.*\n*.*||IDIRC[\S].*\n*.*\n*.*\n*.*)'
653
- try:
654
- mrz = re.findall(mrz_pattern, back_data.replace(" ","").strip(), re.MULTILINE)
655
- except:
656
- mrz = ''
657
-
658
- if mrz:
659
- national = re.search(pattern, mrz[0].replace(" ", "").replace("\n", ""))
660
- if national:
661
- nationality = national.group()
662
- else:
663
- nationality = ''
664
-
665
- back_data_new['nationality'] = nationality
666
-
667
- if not back_data_new.get('nationality'):
668
- nationality_pattern = r'\d{6,}([\S]{3})\b'
669
- nationality_match = re.search(nationality_pattern, mrz2)
670
- if nationality_match:
671
- nationality = nationality_match.group(1)
672
- back_data_new['nationality'] = nationality
673
-
674
- return back_data_new
675
-
676
- def count_digits(text):
677
- return len(re.findall(r'\d', text))
678
-
679
- def find_gender_from_back(text):
680
- gender = ''
681
- gender_pattern = r'(\d)([A-Za-z])(\d)'
682
- gender_match = re.search(gender_pattern, text)
683
- if gender_match:
684
- gender = gender_match.group(2)
685
-
686
- if not gender:
687
- gender_pattern = r'(\d)([MFmf])(\d)'
688
- gender_match = re.search(gender_pattern, text)
689
- if gender_match:
690
- gender = gender_match.group(2)
691
-
692
-
693
-
694
- return gender
695
-
696
- def iraq_back_id_extraction(client, image_data, back_id_text, back_data, image_format):
697
- mrz_pattern = r'(IDIRQA.*\n*.*\n*.*\n*.*|IDIRQC.*\n*.*\n*.*\n*.*|IDIR.*\n*.*\n*.*\n*.*)'
698
- mrz1_data_pattern = r'IDIRQ([A-Za-z]{2}\d{7}|[A-Za-z]\d{8}).*?(\d{13})|IDIRC([A-Za-z]{2}\d{7}|[A-Za-z]\d{8}).*?(\d{13})'
699
- nationality_pattern = r'([A-Z]+)<<'
700
- place_of_birth_pattern = r'(?:محل|الولادة)[^:]*:\s*(.*?)\n'
701
- issuing_authority_pattern_1 = r"مديرية الجنسية والمعلومات المدنية"
702
- issuing_authority_pattern_2 = r"دائرة احوال -.*?(?=\n|\r|$)"
703
-
704
- mrz1, mrz2, mrz3 = '', '', ''
705
-
706
- try:
707
- mrz = re.findall(mrz_pattern, back_data.replace(" ","").strip(), re.MULTILINE)
708
- mrz_str = mrz[0].replace(" ", "")
709
- except:
710
- mrz_str = ''
711
-
712
- # mrz1 = re.search(r'(IDIRQ.*?<<<)', mrz_str, re.DOTALL)
713
- # mrz1 = mrz1.group(1) if mrz1 else None
714
-
715
- # mrz2 = re.search(r'\b\d{6,}.*?<{2,}', mrz_str, re.MULTILINE)
716
- # mrz2 = mrz2.group(0) if mrz2 else None
717
-
718
- # mrz3 = re.search(r'[\n](?:[a-zA-Z<]{6,})', mrz_str)
719
- # mrz3 = mrz3.group(0).replace("\n","") if mrz3 else None
720
-
721
- if mrz_str:
722
- mrz_list=mrz_str.replace(" ", "").split("\n")
723
- try:
724
- mrz1=mrz_list[0]
725
- except:
726
- mrz1=''
727
- try:
728
- mrz3=[s.replace('>','<') for s in [remove_special_characters1(ele).replace(' ','') for ele in back_data.split('\n')] if len(re.findall(r'<', s)) >= 2 and re.fullmatch(r'[A-Za-z<>]+', s)][0]
729
- except:
730
- mrz3=''
731
- try:
732
- mrz2=[ele for ele in [ele for ele in mrz_list if ele not in [mrz1,mrz3] ] if remove_special_characters_mrz2(ele) !='']
733
- if len(mrz2)>1:
734
- mrz2=max(mrz2, key=count_digits)+[ele for ele in mrz2 if ele!=max(mrz2, key=count_digits)][0]
735
-
736
- pattern = r'\d{7}[MF]\d{7}[\S]{3}<+?\d'
737
- mrz2_temp = re.search(pattern, mrz2.replace(">", ""))
738
- if mrz2_temp:
739
- mrz2 = mrz2_temp.group(0)
740
-
741
- mrz2=mrz2.split('<')[0]+'<<<<<<<<<<'+mrz2.split('<')[-1]
742
-
743
- # mrz2=mrz2[0].split('<')[0]+'<<<<<<<<<<'+mrz2[-1][-1]
744
- else :
745
- mrz2=mrz2[0].split('<')[0]+'<<<<<<<<<<'+mrz2[0][-1]
746
- except:
747
- mrz2=''
748
-
749
- ## condition to replace O with 0
750
- try:
751
- pattern = r'(IDIRQ[A-Z]{1,2})O(?=[0-9])'
752
- replacement = lambda m: m.group(1) + '0'
753
- mrz1 = re.sub(pattern, replacement, mrz1)
754
- except:
755
- pass
756
-
757
- ## condition to replace '>' with 7
758
- if mrz2 and mrz2.endswith('>'):
759
- mrz2 = mrz2.split('<')[0]+'<<<<<<<<<<'+'7'
760
-
761
- ## condition to add filler to mrz3, making it total length of 30 chars
762
- if len(mrz3) < 30:
763
- mrz3 = mrz3.ljust(30, '<')
764
-
765
- # mrz1_data_match = re.search(mrz1_data_pattern, mrz_str)
766
- # if mrz1_data_match:
767
- # rfid_number = mrz1_data_match.group(1)
768
- # id_number = mrz1_data_match.group(2)
769
-
770
- # rfid_number = rfid_number.upper()
771
- # id_number = id_number[1:14]
772
- try:
773
- rfid_number=mrz1.split('IDIR')[-1][1:10]
774
- except:
775
- rfid_number = ''
776
- try:
777
- id_number=mrz1.split('IDIR')[-1][11:23]
778
- except:
779
- id_number = ''
780
-
781
- dob = func_dob(mrz_str)
782
-
783
- if not dob:
784
- matches = re.findall(r'\d{4}/\d{2}/\d{2}', back_data)
785
- sorted_dates = sorted(matches)
786
- dob = sorted_dates[0]
787
-
788
- expiry = func_expiry_date(mrz_str)
789
- if not expiry:
790
- matches = re.findall(r'\d{4}/\d{2}/\d{2}', back_data)
791
- sorted_dates = sorted(matches)
792
- expiry = sorted_dates[-1]
793
-
794
- ## handle issue date
795
- try:
796
- matches = re.findall(r'\d{4}/\d{2}/\d{2}', back_data)
797
- sorted_dates = sorted(matches)
798
- issue_date = sorted_dates[1]
799
- except:
800
- issue_date = ''
801
-
802
- # nationality_matches = re.search(nationality_pattern, mrz[0])
803
- # if nationality_matches:
804
- # nationality = nationality_matches.group(1)
805
- # else:
806
- # try:
807
- # pattern = r'(?<=[A-Z]\d{7})[A-Z]{3}'
808
- # national = re.search(pattern, back_data)
809
- # if national:
810
- # nationality = national.group()
811
- # else:
812
- # nationality = ''
813
- # except:
814
- # nationality = ''
815
-
816
- # if len(nationality)>3:
817
- # pattern = r'(?<=[A-Z]\d{7})[A-Z]{3}'
818
- # national = re.search(pattern, back_data)
819
- # if national:
820
- # nationality = national.group()
821
-
822
- try:
823
- nationality=mrz2.split('<')[0][-3:]
824
- except:
825
- nationality='IRQ'
826
- first_name, last_name = '', ''
827
-
828
- if mrz3:
829
- name_list = re.findall(r'\b[^<\s]+\b', mrz3)
830
-
831
- if len(name_list)>1:
832
- first_name = name_list[1].upper().replace("X", "")
833
- last_name = name_list[0].upper().replace("X", "")
834
- else:
835
- first_name = name_list[0].upper().replace("X", "")
836
-
837
- else:
838
- mrz3 = ''
839
-
840
- # issuing_authority_matches = re.findall(issuing_authority_pattern, back_data)
841
- # if issuing_authority_matches:
842
- # issuing_authority = issuing_authority_matches[-1][1]
843
- # else:
844
- # issuing_authority = ''
845
-
846
- issuing_authority = ''
847
- issuing_authority_match_1 = re.search(issuing_authority_pattern_1, back_data)
848
- issuing_authority_match_2 = re.search(issuing_authority_pattern_2, back_data)
849
-
850
- if issuing_authority_match_1:
851
- issuing_authority = issuing_authority_match_1.group(0)
852
-
853
- if issuing_authority_match_2:
854
- issuing_authority = issuing_authority_match_2.group(0)
855
-
856
- place_of_birth_match = re.search(place_of_birth_pattern, back_data)
857
- if place_of_birth_match:
858
- place_of_birth = place_of_birth_match.group(1).strip()
859
- place_of_birth_list = place_of_birth.split(":")
860
- if len(place_of_birth_list)>=2:
861
- place_of_birth = place_of_birth_list[1].strip()
862
- elif len(place_of_birth_list)==1:
863
- place_of_birth = place_of_birth_list[0]
864
- else:
865
- place_of_birth = ''
866
- else:
867
- place_of_birth = ''
868
-
869
- issuing_authority_en=place_of_birth_en=''
870
-
871
- if issuing_authority:
872
- try:
873
- issuing_authority_en = translator.translate(issuing_authority, src='ar', dest='en').text.upper()
874
- except:
875
- issuing_authority_en = GoogleTranslator('ar', 'en').translate(issuing_authority)
876
-
877
- if place_of_birth:
878
- try:
879
- place_of_birth_en = translator.translate(place_of_birth, src='ar', dest='en').text.upper()
880
- except:
881
- place_of_birth_en = GoogleTranslator('ar', 'en').translate(place_of_birth)
882
-
883
- try:
884
- dob_pattern = r'(\d+)[MF]'
885
- dob_match = re.search(dob_pattern, mrz2)
886
- dob_mrz = convert_dob(dob_match.group(1)) if dob_match else ''
887
-
888
- doe_pattern = r'[MF](\d+)'
889
- doe_match = re.search(doe_pattern, mrz2)
890
- expiry_date_mrz = convert_expiry_date(doe_match.group(1)) if doe_match else ''
891
- except:
892
- dob_mrz, expiry_date_mrz = '', ''
893
-
894
- gender = ''
895
- try:
896
- gender = find_gender_from_back(mrz2)
897
- except:
898
- gender = find_gender_from_back(back_data)
899
-
900
- mrz_str = f"{mrz1}\n{mrz2}\n{mrz3}"
901
-
902
- if nationality and (nationality == '1RQ' or nationality == 'IRG'):
903
- nationality = 'IRQ'
904
-
905
- back_data_dict = {
906
- "mrz": [mrz_str],
907
- "mrz1": mrz1,
908
- "mrz2": mrz2,
909
- "mrz3": mrz3,
910
- "id_number": id_number,
911
- "card_number": rfid_number,
912
- "dob": dob,
913
- "issue_date": issue_date,
914
- "first_name_back": first_name,
915
- "last_name_back": last_name,
916
- "expiry_date": expiry,
917
- "nationality": nationality,
918
- "issuing_authority": issuing_authority,
919
- "place_of_birth": place_of_birth,
920
- "issuing_authority_en": issuing_authority_en,
921
- "place_of_birth_en": place_of_birth_en,
922
- "issuing_country": "IRQ",
923
- "dob_mrz": dob_mrz,
924
- "expiry_date_mrz": expiry_date_mrz,
925
- "gender_back": gender
926
- }
927
-
928
- if "gender_back" in back_data_dict:
929
- gender = back_data_dict["gender_back"].strip().upper()
930
- if gender == "F":
931
- back_data_dict["gender_back"] = "FEMALE"
932
- elif gender == "M":
933
- back_data_dict["gender_back"] = "MALE"
934
-
935
- if 'gender_back' in back_data_dict:
936
- back_data_dict["gender_back"] = back_data_dict["gender_back"].strip().upper()
937
-
938
- #-----------------------------------
939
- ## I dont see the point of the below block (unacessary redudency)
940
- # ## HANDLE DOB DOE FROM MRZ
941
- # if not (back_data_dict.get('dob_mrz') or back_data_dict.get('expiry_date_mrz')):
942
- # mrz_pattern = r'(IDIRQ[\S].*\n*.*\n*.*\n*.*|IDIRC[\S].*\n*.*\n*.*\n*.*)'
943
- # try:
944
- # mrz = re.findall(mrz_pattern, back_data.replace(" ","").strip(), re.MULTILINE)
945
- # mrz_str = mrz[0].replace(" ", "")
946
-
947
- # mrz2 = re.search(r'\b\d{6,}.*?<{2,}', mrz_str, re.MULTILINE)
948
- # mrz2 = mrz2.group(0) if mrz2 else None
949
- # except:
950
- # mrz_str = ''
951
-
952
- # if mrz2:
953
- # dob_pattern = r"(\d{7})[MF]"
954
- # dob_match = re.search(dob_pattern, mrz2)
955
- # if dob_match:
956
- # dob = dob_match.group(1)
957
- # back_data_dict['dob_mrz'] = convert_dob(dob)
958
- # else:
959
- # dob_pattern = r'(\d{12,})[\S]R[\S]\b'
960
- # dob_match = re.search(dob_pattern, mrz2)
961
- # if dob_match:
962
- # dob = dob_match.group(1)[:7]
963
- # back_data_dict['dob_mrz'] = convert_dob(dob)
964
-
965
- # doe_pattern = r"[MF](\d+)"
966
- # doe_match = re.search(doe_pattern, mrz2)
967
- # if doe_match:
968
- # expiry = doe_match.group(1)
969
- # back_data_dict['expiry_date_mrz'] = convert_expiry_date(expiry)
970
- # else:
971
- # doe_pattern = r'(\d{12,})[\S]R[\S]\b'
972
- # doe_match = re.search(doe_pattern, mrz2)
973
- # if doe_match:
974
- # expiry = doe_match.group(1)[8:]
975
- # if len(expiry)<7:
976
- # expiry = doe_match.group(1)[7:]
977
- # back_data_dict['expiry_date_mrz'] = convert_expiry_date(expiry)
978
-
979
- # if not back_data_dict.get('nationality'):
980
- # nationality_pattern = r'\d{6,}([\S]{3})\b'
981
- # nationality_match = re.search(nationality_pattern, mrz2)
982
- # if nationality_match:
983
- # nationality = nationality_match.group(1)
984
- # back_data_dict['nationality'] = nationality
985
- #-----------------------------------
986
-
987
- non_optional_keys = ["id_number", "card_number", "nationality", "dob"]
988
- empty_string_keys = [key for key, value in back_data_dict.items() if key in non_optional_keys and value == '']
989
- cropped_id_card, tampered_result_back = detect_id_card(client, image_data, back_id_text, image_format)
990
- back_data_dict['back_tampered_result'] = tampered_result_back
991
-
992
- return back_data_dict