idvpackage 3.0.11__py3-none-any.whl → 3.0.13__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- idvpackage/common.py +8 -966
- idvpackage/iraq_id_extraction_withopenai.py +374 -893
- idvpackage/jor_passport_extraction.py +1 -6
- idvpackage/liveness_spoofing_v2.py +2 -45
- idvpackage/ocr.py +1016 -2430
- idvpackage/ocr_utils.py +148 -489
- idvpackage/pse_passport_extraction.py +18 -292
- idvpackage/qatar_id_extraction.py +4 -956
- idvpackage/sudan_passport_extraction.py +0 -928
- idvpackage/syr_passport_extraction.py +27 -402
- idvpackage/uae_id_extraction.py +87 -151
- {idvpackage-3.0.11.dist-info → idvpackage-3.0.13.dist-info}/METADATA +1 -1
- idvpackage-3.0.13.dist-info/RECORD +34 -0
- {idvpackage-3.0.11.dist-info → idvpackage-3.0.13.dist-info}/WHEEL +1 -1
- idvpackage/ekyc.py +0 -78
- idvpackage/genai_utils.py +0 -309
- idvpackage/iraq_id_extraction.py +0 -992
- idvpackage/iraq_passport_extraction.py +0 -588
- idvpackage/lazy_imports.py +0 -44
- idvpackage/lebanon_passport_extraction.py +0 -161
- idvpackage/sau_id_extraction.py +0 -248
- idvpackage/sudan_id_extraction.py +0 -764
- idvpackage-3.0.11.dist-info/RECORD +0 -42
- {idvpackage-3.0.11.dist-info → idvpackage-3.0.13.dist-info}/licenses/LICENSE +0 -0
- {idvpackage-3.0.11.dist-info → idvpackage-3.0.13.dist-info}/top_level.txt +0 -0
idvpackage/iraq_id_extraction.py
DELETED
|
@@ -1,992 +0,0 @@
|
|
|
1
|
-
import cv2
|
|
2
|
-
import numpy as np
|
|
3
|
-
from google.cloud import vision_v1
|
|
4
|
-
from googletrans import Translator
|
|
5
|
-
import re
|
|
6
|
-
from idvpackage.common import *
|
|
7
|
-
import io
|
|
8
|
-
import os
|
|
9
|
-
from PIL import Image
|
|
10
|
-
from deep_translator import GoogleTranslator
|
|
11
|
-
import imghdr
|
|
12
|
-
import tempfile
|
|
13
|
-
|
|
14
|
-
translator = Translator()
|
|
15
|
-
|
|
16
|
-
def crop_second_part(img):
|
|
17
|
-
width, height = img.size
|
|
18
|
-
half_width = width // 2
|
|
19
|
-
second_part = img.crop((half_width, 0, width, height))
|
|
20
|
-
return second_part
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
def crop_third_part(img):
|
|
24
|
-
width, height = img.size
|
|
25
|
-
part_height = height // 3
|
|
26
|
-
third_part = img.crop((0, 2 * part_height, width, height))
|
|
27
|
-
return third_part
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
def extract_text_from_image_data(client, image):
|
|
31
|
-
"""Detects text in the file."""
|
|
32
|
-
|
|
33
|
-
with io.BytesIO() as output:
|
|
34
|
-
image.save(output, format="PNG")
|
|
35
|
-
content = output.getvalue()
|
|
36
|
-
|
|
37
|
-
image = vision_v1.types.Image(content=content)
|
|
38
|
-
|
|
39
|
-
response = client.text_detection(image=image)
|
|
40
|
-
texts = response.text_annotations
|
|
41
|
-
|
|
42
|
-
return texts[0].description
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
def detect_image_format(image_data):
|
|
46
|
-
image_format = imghdr.what(None, image_data)
|
|
47
|
-
return image_format
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
def create_temporary_file(image_data, image_format):
|
|
51
|
-
with tempfile.NamedTemporaryFile(suffix='.' + image_format, delete=False) as temp_file:
|
|
52
|
-
temp_file.write(image_data)
|
|
53
|
-
temp_file_path = temp_file.name
|
|
54
|
-
|
|
55
|
-
return temp_file_path
|
|
56
|
-
|
|
57
|
-
def detect_image_format(pil_img):
|
|
58
|
-
image_format = pil_img.format.lower() if pil_img.format else 'jpg' # Default to 'jpg' if format is not recognized
|
|
59
|
-
|
|
60
|
-
open_cv_image = np.array(pil_img)
|
|
61
|
-
open_cv_image = open_cv_image[:, :, ::-1].copy()
|
|
62
|
-
|
|
63
|
-
with tempfile.NamedTemporaryFile(suffix='.' + image_format, delete=False) as temp_file:
|
|
64
|
-
cv2.imwrite(temp_file.name, open_cv_image)
|
|
65
|
-
temp_file_path = temp_file.name
|
|
66
|
-
|
|
67
|
-
return temp_file_path
|
|
68
|
-
|
|
69
|
-
def extract_family_number(arabic_text):
|
|
70
|
-
# Attempt different patterns to handle different exceptional cases
|
|
71
|
-
|
|
72
|
-
# Pattern 1: Generalized pattern that covers most cases (like the first and third one)
|
|
73
|
-
pattern_1 = r'(\d{4,6})?\s*(?:الرقم|العائلي|زماره|زمارة|ژماردی|ژماره|از)\s*(?:العاملي|العائلي|خيزانى|خبرائی|خيرالي|خیزانی|خیزاني|خیزانی)?\s*[::]?\s*([A-Za-z0-9]+)'
|
|
74
|
-
family_number_match = re.search(pattern_1, arabic_text)
|
|
75
|
-
|
|
76
|
-
if family_number_match:
|
|
77
|
-
part1 = family_number_match.group(1) if family_number_match.group(1) else ''
|
|
78
|
-
part2 = family_number_match.group(2)
|
|
79
|
-
dict_1 = {"family_number": part1 + part2}
|
|
80
|
-
return dict_1
|
|
81
|
-
|
|
82
|
-
# Pattern 2: Handles family numbers directly after the family keyword, with no leading digits
|
|
83
|
-
pattern_2 = r'(?:الرقم العائلي|ژماردی خیزانی|ژماره ی خیزانی|العائلى از|العائلي)\s*[::]?\s*([A-Za-z0-9]+)'
|
|
84
|
-
family_number_match = re.search(pattern_2, arabic_text)
|
|
85
|
-
|
|
86
|
-
if family_number_match:
|
|
87
|
-
dict_1 = {"family_number": family_number_match.group(1)}
|
|
88
|
-
return dict_1
|
|
89
|
-
|
|
90
|
-
# Pattern 3: Handles numbers appearing on a new line, separated from family labels
|
|
91
|
-
pattern_3 = r'(?:الرقم العائلي|ژماردی خیزانی|العائلي|العائلى از)\s*[::]?\s*\n*([A-Za-z0-9]+)'
|
|
92
|
-
family_number_match = re.search(pattern_3, arabic_text)
|
|
93
|
-
|
|
94
|
-
if family_number_match:
|
|
95
|
-
dict_1 = {"family_number": family_number_match.group(1)}
|
|
96
|
-
return dict_1
|
|
97
|
-
|
|
98
|
-
# Pattern 4: Specific case handling for labels ending without a colon
|
|
99
|
-
pattern_4 = r'(?:الرقم العائلي|ژماردی خیزانی|العائلي|العائلى از)\s*\n*([A-Za-z0-9]+)'
|
|
100
|
-
family_number_match = re.search(pattern_4, arabic_text)
|
|
101
|
-
|
|
102
|
-
if family_number_match:
|
|
103
|
-
dict_1 = {"family_number": family_number_match.group(1)}
|
|
104
|
-
return dict_1
|
|
105
|
-
|
|
106
|
-
# Pattern 5: Handles family numbers followed by extra symbols or unusual formatting (2nd case)
|
|
107
|
-
pattern_5 = r'(\d{4,6}[A-Za-z0-9]+)\s*[:ˋˋˋˋˋˋˋˋ]'
|
|
108
|
-
family_number_match = re.search(pattern_5, arabic_text)
|
|
109
|
-
|
|
110
|
-
if family_number_match:
|
|
111
|
-
dict_1 = {"family_number": family_number_match.group(1)}
|
|
112
|
-
return dict_1
|
|
113
|
-
|
|
114
|
-
# Pattern 6: Handles family numbers without clear delimiter or labels ending without a colon (4th case)
|
|
115
|
-
pattern_6 = r'(\d{4,6}[A-Za-z0-9]+)\s*[:P]?\s*'
|
|
116
|
-
family_number_match = re.search(pattern_6, arabic_text)
|
|
117
|
-
|
|
118
|
-
if family_number_match:
|
|
119
|
-
dict_1 = {"family_number": family_number_match.group(1)}
|
|
120
|
-
return dict_1
|
|
121
|
-
|
|
122
|
-
# Return None if no patterns match
|
|
123
|
-
return {"family_number": None}
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
def extract_mother_surname(text):
|
|
129
|
-
# Initialize variables to avoid UnboundLocalError
|
|
130
|
-
cleaned_text = None
|
|
131
|
-
|
|
132
|
-
pattern_mother_name = r"(?:الأم|دايك|اديك|دایك)\s*[::]?\s*(\S+)?(?:\n.*?)*(?:الجد|باپير|بابير|بايير|باپیر)\s*[::]?\s*([^\n]*)"
|
|
133
|
-
match = re.search(pattern_mother_name, text, re.DOTALL)
|
|
134
|
-
|
|
135
|
-
if match:
|
|
136
|
-
mother_name = match.group(1).strip() if match.group(1) else "Not Available"
|
|
137
|
-
grandfather_name = match.group(2).strip() if match.group(2) else ""
|
|
138
|
-
|
|
139
|
-
pattern = r"[/:\s]*(بابير|ابير|باپير)[:/\s]*" #r"[/:\s]*(بابير|ابير)[:/\s]*"
|
|
140
|
-
cleaned_text = re.sub(pattern, "", grandfather_name).strip()
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
if not cleaned_text:
|
|
144
|
-
cleaned_text = None
|
|
145
|
-
|
|
146
|
-
# If no match or cleaned_text is empty, return None for mother_last_name
|
|
147
|
-
if cleaned_text is None:
|
|
148
|
-
return {"mother_last_name": None}
|
|
149
|
-
|
|
150
|
-
return {"mother_last_name": cleaned_text}
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
def extract_mother_name(text):
|
|
156
|
-
|
|
157
|
-
pattern_mother_name = r"(?:الأم|دايك|دایك)\s*[::]?\s*(?:[\n\s]*(\d{4}-\d{2}-\d{2}))?\s*([\u0621-\u064A\s]+)"
|
|
158
|
-
|
|
159
|
-
matches = re.finditer(pattern_mother_name, text)
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
mother_names = []
|
|
163
|
-
eng_name = []
|
|
164
|
-
for match in matches:
|
|
165
|
-
mother_name = match.group(2).strip()
|
|
166
|
-
if mother_name:
|
|
167
|
-
|
|
168
|
-
cleaned_name = re.sub(r"(الجد|الام|با|بابير|فصيل|الدم)", "", mother_name).strip()
|
|
169
|
-
if cleaned_name:
|
|
170
|
-
parts = cleaned_name.split('\n')
|
|
171
|
-
name_ = parts[0]
|
|
172
|
-
try:
|
|
173
|
-
dict_1 = {"mother_first_name": name_}
|
|
174
|
-
except Exception as e:
|
|
175
|
-
name_ = None
|
|
176
|
-
dict_1 = {"mother_first_name": name_}
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
return dict_1
|
|
181
|
-
|
|
182
|
-
def extract_paternal_grandfather_name(text):
|
|
183
|
-
pattern_paternal_grandfather = r"(?:الجد|باپير|بابير|بايير|باپیر)\s*[::]?\s*([^\n/:]*)"
|
|
184
|
-
|
|
185
|
-
matches = re.findall(pattern_paternal_grandfather, text)
|
|
186
|
-
grandfather_names = [match.strip() for match in matches if match.strip()]
|
|
187
|
-
try:
|
|
188
|
-
dict_1 = {"third_name":grandfather_names[0]}
|
|
189
|
-
except Exception as e:
|
|
190
|
-
dict_1 = {'third_name': None}
|
|
191
|
-
print("error:", e)
|
|
192
|
-
|
|
193
|
-
return dict_1
|
|
194
|
-
|
|
195
|
-
def update_family_number_cases(dictt, text_back):
|
|
196
|
-
try:
|
|
197
|
-
if len(dictt["family_number"]) < 11:
|
|
198
|
-
pattern = r'(?:الرقم العائلي|رقم العائلة)\s*/?\s*(?:زمارهی خیزانی)?\s*:\s*([\dA-Z\s]+)'
|
|
199
|
-
#pattern = r'(?:الرقم العائلي|رقم العائلة)\s*/?\s*(?:زمارهی خیزانی)?\s*:\s*(\d+\s+\d+[A-Z]\d+B)'
|
|
200
|
-
match = re.search(pattern, text_back)
|
|
201
|
-
if match:
|
|
202
|
-
family_number = match.group(1)
|
|
203
|
-
list_modify = family_number.split(" ")
|
|
204
|
-
last_value = list_modify[1]+list_modify[0]
|
|
205
|
-
dictt['family_number'] = last_value
|
|
206
|
-
|
|
207
|
-
else:
|
|
208
|
-
print("Family number not found.")
|
|
209
|
-
else:
|
|
210
|
-
print("value greater than 11")
|
|
211
|
-
except Exception as e:
|
|
212
|
-
print("None!")
|
|
213
|
-
return dictt
|
|
214
|
-
|
|
215
|
-
def detect_id_card(client, image_data, id_texts, image_format, part=None, country=None):
|
|
216
|
-
|
|
217
|
-
if id_texts:
|
|
218
|
-
id_text = id_texts[0]
|
|
219
|
-
vertices = id_text.bounding_poly.vertices
|
|
220
|
-
left = vertices[0].x
|
|
221
|
-
top = vertices[0].y
|
|
222
|
-
right = vertices[2].x
|
|
223
|
-
bottom = vertices[2].y
|
|
224
|
-
|
|
225
|
-
padding = 30
|
|
226
|
-
padded_left = max(0, left - padding)
|
|
227
|
-
padded_top = max(0, top - padding)
|
|
228
|
-
padded_right = right + padding
|
|
229
|
-
padded_bottom = bottom + padding
|
|
230
|
-
|
|
231
|
-
with Image.open(io.BytesIO(image_data)) as img:
|
|
232
|
-
id_card = img.crop((padded_left, padded_top, padded_right, padded_bottom))
|
|
233
|
-
|
|
234
|
-
temp_dir = tempfile.mkdtemp()
|
|
235
|
-
id_card_path = os.path.join(temp_dir, f"cropped_img_original.{image_format}")
|
|
236
|
-
id_card.save(id_card_path)
|
|
237
|
-
|
|
238
|
-
width, height = id_card.size
|
|
239
|
-
if width < height:
|
|
240
|
-
id_card = id_card.rotate(90, expand=True)
|
|
241
|
-
|
|
242
|
-
# Read the temporary image with OpenCV for further processing
|
|
243
|
-
# if id_card_path:
|
|
244
|
-
# orig_img = cv2.imread(id_card_path)
|
|
245
|
-
# else:
|
|
246
|
-
# orig_img = None
|
|
247
|
-
|
|
248
|
-
# tampered_result = calculate_error_difference(orig_img, country)
|
|
249
|
-
tampered_result = 'clear'
|
|
250
|
-
|
|
251
|
-
if os.path.exists(id_card_path):
|
|
252
|
-
os.remove(id_card_path)
|
|
253
|
-
|
|
254
|
-
if part:
|
|
255
|
-
if part=='second':
|
|
256
|
-
part_img = crop_second_part(id_card)
|
|
257
|
-
|
|
258
|
-
if part=='third':
|
|
259
|
-
part_img = crop_third_part(id_card)
|
|
260
|
-
|
|
261
|
-
# 2nd call to vision AI
|
|
262
|
-
try:
|
|
263
|
-
part_text = extract_text_from_image_data(client, part_img)
|
|
264
|
-
except:
|
|
265
|
-
part_text = id_texts[0].description
|
|
266
|
-
|
|
267
|
-
return id_card, part_img, part_text, tampered_result
|
|
268
|
-
else:
|
|
269
|
-
return id_card, tampered_result
|
|
270
|
-
else:
|
|
271
|
-
print('No text found in the image.')
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
def extract_name_fields_from_raw(text):
|
|
275
|
-
try:
|
|
276
|
-
generic_field_match_pattern = r':\s*([^:\n]*)'
|
|
277
|
-
|
|
278
|
-
generic_field_matches = re.findall(generic_field_match_pattern, text)
|
|
279
|
-
generic_fields_result = []
|
|
280
|
-
for item in generic_field_matches:
|
|
281
|
-
no_digits = ''.join([char for char in item if not char.isdigit()])
|
|
282
|
-
if no_digits.strip():
|
|
283
|
-
generic_fields_result.append(no_digits)
|
|
284
|
-
|
|
285
|
-
generic_fields_result = [word for word in generic_fields_result if len(word.split()) <= 3 and '/' not in word and '|' not in word]
|
|
286
|
-
|
|
287
|
-
# print(f"DATA LIST: {generic_fields_result}")
|
|
288
|
-
|
|
289
|
-
if len(generic_fields_result[0].split()) <= 2 and len(generic_fields_result) <= 8:
|
|
290
|
-
given_name = generic_fields_result[0]
|
|
291
|
-
else:
|
|
292
|
-
given_name = ''
|
|
293
|
-
|
|
294
|
-
if len(generic_fields_result[1].split()) <= 2:
|
|
295
|
-
fathers_name = generic_fields_result[1]
|
|
296
|
-
else:
|
|
297
|
-
fathers_name = ''
|
|
298
|
-
|
|
299
|
-
if len(generic_fields_result[3].split()) <= 2:
|
|
300
|
-
surname = generic_fields_result[3].replace("الأم", "").replace("دايك", "").replace("مديرية ال", "").replace("/", "").replace("البطاقة الو", "").replace("ذكر", "").replace("الام", "").replace("]", "")
|
|
301
|
-
else:
|
|
302
|
-
surname = ''
|
|
303
|
-
|
|
304
|
-
try:
|
|
305
|
-
gender = ''
|
|
306
|
-
gender_ar = generic_fields_result[-2]
|
|
307
|
-
try:
|
|
308
|
-
gender = translator.translate(gender_ar, src='ar', dest='en').text
|
|
309
|
-
except:
|
|
310
|
-
gender = GoogleTranslator('ar', 'en').translate(gender_ar)
|
|
311
|
-
|
|
312
|
-
if str(gender).lower() == 'feminine':
|
|
313
|
-
gender = 'female'
|
|
314
|
-
|
|
315
|
-
if gender.lower() != 'male' or gender.lower() != 'female':
|
|
316
|
-
if 'ذكر' in generic_fields_result:
|
|
317
|
-
gender = 'male'
|
|
318
|
-
elif 'انثى' in generic_fields_result:
|
|
319
|
-
gender = 'female'
|
|
320
|
-
else:
|
|
321
|
-
gender = ''
|
|
322
|
-
except:
|
|
323
|
-
gender_ar, gender = '', None
|
|
324
|
-
|
|
325
|
-
name = f"{given_name} {fathers_name} {surname}"
|
|
326
|
-
|
|
327
|
-
first_name_en, fathers_name_en, last_name_en = '', '', ''
|
|
328
|
-
|
|
329
|
-
if name:
|
|
330
|
-
name_en = GoogleTranslator('ar', 'en').translate(name).upper()
|
|
331
|
-
if name_en:
|
|
332
|
-
name_list = name_en.split(' ')
|
|
333
|
-
if len(name_list) >=2:
|
|
334
|
-
first_name_en = name_list[0]
|
|
335
|
-
fathers_name_en = name_list[1]
|
|
336
|
-
last_name_en = name_list[-1]
|
|
337
|
-
|
|
338
|
-
#For edge cases where surnames have prefixes like Al- Bin- Abu-.
|
|
339
|
-
if last_name_en.startswith('-'):
|
|
340
|
-
last_name_en = name_list[-2]+name_list[-1]
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
# name_en = translator.translate(name, src='ar', dest='en').text.upper()
|
|
344
|
-
|
|
345
|
-
names_data = {
|
|
346
|
-
"gender": gender,
|
|
347
|
-
"gender_ar": gender_ar,
|
|
348
|
-
"name": name,
|
|
349
|
-
"first_name": given_name,
|
|
350
|
-
"father_name": fathers_name,
|
|
351
|
-
"last_name": surname,
|
|
352
|
-
"first_name_en": first_name_en,
|
|
353
|
-
"father_name_en": fathers_name_en,
|
|
354
|
-
"last_name_en": last_name_en,
|
|
355
|
-
"name_en": name_en,
|
|
356
|
-
}
|
|
357
|
-
|
|
358
|
-
return names_data
|
|
359
|
-
|
|
360
|
-
except:
|
|
361
|
-
return {}
|
|
362
|
-
|
|
363
|
-
|
|
364
|
-
def identify_front(text):
|
|
365
|
-
front_id_keywords = ["The Republic of Iraq", "The Ministry of Interior", "National Card"]
|
|
366
|
-
pattern = '|'.join(map(re.escape, front_id_keywords))
|
|
367
|
-
|
|
368
|
-
try:
|
|
369
|
-
if re.search(pattern, text, re.IGNORECASE):
|
|
370
|
-
return True
|
|
371
|
-
else:
|
|
372
|
-
return False
|
|
373
|
-
except:
|
|
374
|
-
return 'error'
|
|
375
|
-
|
|
376
|
-
def extract_numeric_fields_from_raw(ar_front_data, front_data):
|
|
377
|
-
# try:
|
|
378
|
-
# front_data = translator.translate(ar_front_data, src='ar', dest='en').text
|
|
379
|
-
# except:
|
|
380
|
-
# front_data = GoogleTranslator('ar', 'en').translate(ar_front_data)
|
|
381
|
-
|
|
382
|
-
gender_pattern = r"Sex.*?:\s*(\w+)"
|
|
383
|
-
id_number_pattern = r"\b\d{12}\b"
|
|
384
|
-
rfid_number_pattern = r"\b[A-Za-z]{2}\d{7}\b|\b[A-Za-z]\d{8}\b"
|
|
385
|
-
|
|
386
|
-
gender_match = re.search(gender_pattern, front_data, re.IGNORECASE)
|
|
387
|
-
if gender_match:
|
|
388
|
-
gender = gender_match.group(1)
|
|
389
|
-
else:
|
|
390
|
-
gender = ''
|
|
391
|
-
|
|
392
|
-
id_number_match = re.search(id_number_pattern, front_data.replace(" ",""), re.IGNORECASE)
|
|
393
|
-
if id_number_match:
|
|
394
|
-
id_number = id_number_match.group(0)
|
|
395
|
-
else:
|
|
396
|
-
try:
|
|
397
|
-
id_number_match = re.search(id_number_pattern, ar_front_data.replace(" ",""), re.IGNORECASE)
|
|
398
|
-
id_number = id_number_match.group(0)
|
|
399
|
-
except:
|
|
400
|
-
id_number = ''
|
|
401
|
-
|
|
402
|
-
rfid_number_match = re.search(rfid_number_pattern, ar_front_data.replace(" ","").replace(":","").replace("$", "S"), re.IGNORECASE)
|
|
403
|
-
if rfid_number_match:
|
|
404
|
-
rfid_number = rfid_number_match.group(0).upper()
|
|
405
|
-
else:
|
|
406
|
-
rfid_number = ''
|
|
407
|
-
|
|
408
|
-
front_data = {
|
|
409
|
-
"gender": gender,
|
|
410
|
-
"id_number_front": id_number,
|
|
411
|
-
"card_number_front": rfid_number
|
|
412
|
-
}
|
|
413
|
-
|
|
414
|
-
return front_data
|
|
415
|
-
|
|
416
|
-
|
|
417
|
-
def iraq_front_id_extraction(client, image_data, front_id_text, front_id_text_description, front_translated_data, image_format):
|
|
418
|
-
front_data_final = {
|
|
419
|
-
'first_name': '',
|
|
420
|
-
'last_name': '',
|
|
421
|
-
'name': ''
|
|
422
|
-
}
|
|
423
|
-
|
|
424
|
-
cropped_id_card, second_part, second_part_text, tampered_result_front = detect_id_card(client, image_data, front_id_text, image_format, part='second')
|
|
425
|
-
front_data = extract_name_fields_from_raw(second_part_text)
|
|
426
|
-
numeric_fields = extract_numeric_fields_from_raw(front_id_text_description, front_translated_data)
|
|
427
|
-
|
|
428
|
-
if not front_data:
|
|
429
|
-
front_data = extract_name_fields_from_raw(front_id_text_description)
|
|
430
|
-
|
|
431
|
-
gender_front_data = front_data.get('gender', '')
|
|
432
|
-
gender_numeric_fields = numeric_fields.get('gender', '')
|
|
433
|
-
|
|
434
|
-
gender = ''
|
|
435
|
-
if gender_front_data and (gender_front_data.lower()=='male' or gender_front_data.lower()=='female'):
|
|
436
|
-
gender = gender_front_data
|
|
437
|
-
elif gender_numeric_fields and (gender_numeric_fields.lower()=='male' or gender_numeric_fields.lower()=='female'):
|
|
438
|
-
gender = gender_numeric_fields
|
|
439
|
-
|
|
440
|
-
front_data.update(numeric_fields)
|
|
441
|
-
front_data['gender'] = gender
|
|
442
|
-
if gender:
|
|
443
|
-
try:
|
|
444
|
-
front_data['gender_ar'] = translator.translate(gender, src='en', dest='ar').text
|
|
445
|
-
except:
|
|
446
|
-
front_data['gender_ar'] = GoogleTranslator('en', 'ar').translate(gender)
|
|
447
|
-
|
|
448
|
-
front_data['front_tampered_result'] = tampered_result_front
|
|
449
|
-
|
|
450
|
-
front_data_final.update(front_data)
|
|
451
|
-
|
|
452
|
-
if "gender" in front_data_final:
|
|
453
|
-
gender = front_data_final["gender"].strip().upper()
|
|
454
|
-
if gender == "F":
|
|
455
|
-
front_data_final["gender"] = "FEMALE"
|
|
456
|
-
elif gender == "M":
|
|
457
|
-
front_data_final["gender"] = "MALE"
|
|
458
|
-
|
|
459
|
-
if 'gender' in front_data_final:
|
|
460
|
-
front_data_final["gender"] = front_data_final["gender"].strip().upper()
|
|
461
|
-
|
|
462
|
-
return front_data_final
|
|
463
|
-
|
|
464
|
-
|
|
465
|
-
def find_mrz2_from_original(back_data):
|
|
466
|
-
mrz2 = re.search(r'\b\d{6,}.*?<{2,}|\b\d{6,}.*?く{2,}', back_data, re.MULTILINE)
|
|
467
|
-
return mrz2.group(0) if mrz2 else None
|
|
468
|
-
|
|
469
|
-
|
|
470
|
-
def handle_name_extraction(third_part_text):
|
|
471
|
-
mrz_pattern = r'(IDIRQ[\S].*\n*.*\n*.*\n*.*|IDIRQ[\S].*\n*.*\n*.*\n*.*)'
|
|
472
|
-
|
|
473
|
-
try:
|
|
474
|
-
mrz = re.findall(mrz_pattern, third_part_text.replace(" ","").strip(), re.MULTILINE)
|
|
475
|
-
mrz_str = mrz[0].replace(" ", "")
|
|
476
|
-
mrz3 = re.search(r'[\n](?:[a-zA-Z<]{6,})', mrz_str)
|
|
477
|
-
mrz3 = mrz3.group(0).replace("\n","") if mrz3 else None
|
|
478
|
-
|
|
479
|
-
first_name, last_name = '', ''
|
|
480
|
-
if mrz3:
|
|
481
|
-
name_list = re.findall(r'\b[^<\s]+\b', mrz3)
|
|
482
|
-
|
|
483
|
-
if name_list:
|
|
484
|
-
if len(name_list)>1:
|
|
485
|
-
first_name = name_list[1].upper().replace("X", "")
|
|
486
|
-
last_name = name_list[0].upper().replace("X", "")
|
|
487
|
-
else:
|
|
488
|
-
first_name = name_list[0].upper().replace("X", "")
|
|
489
|
-
|
|
490
|
-
return {
|
|
491
|
-
"first_name_back": first_name,
|
|
492
|
-
"last_name_back": last_name,
|
|
493
|
-
}
|
|
494
|
-
else:
|
|
495
|
-
return {}
|
|
496
|
-
|
|
497
|
-
except:
|
|
498
|
-
return {}
|
|
499
|
-
|
|
500
|
-
|
|
501
|
-
def handle_mrz_extraction(third_part_text, back_data, back_data_dict):
|
|
502
|
-
mrz_pattern = r'(IDIRQ[\S].*\n*.*\n*.*\n*.*|IDIRC[\S].*\n*.*\n*.*\n*.*)'
|
|
503
|
-
mrz1_data_pattern = r'(IDIRQ([\S]{2}\d{7}|[\S]\d{8}).*?(\d{13})|IDIRC([\S]{2}\d{7}|[\S]\d{8}).*?(\d{13}))'
|
|
504
|
-
|
|
505
|
-
try:
|
|
506
|
-
mrz = re.findall(mrz_pattern, third_part_text.replace(" ","").strip(), re.MULTILINE)
|
|
507
|
-
mrz_str = mrz[0].replace(" ", "")
|
|
508
|
-
except:
|
|
509
|
-
mrz_str = ''
|
|
510
|
-
|
|
511
|
-
mrz1 = re.search(r'(IDIRQ.*?<{2,}|IDIRC.*?<{2,})', mrz_str, re.DOTALL)
|
|
512
|
-
mrz1 = mrz1.group(1) if mrz1 else None
|
|
513
|
-
|
|
514
|
-
mrz2 = re.search(r'\b\d{6,}.*?<{2,}', mrz_str, re.MULTILINE)
|
|
515
|
-
mrz2 = mrz2.group(0) if mrz2 else None
|
|
516
|
-
|
|
517
|
-
mrz3 = re.search(r'[\n](?:[a-zA-Z<]{6,})', mrz_str)
|
|
518
|
-
mrz3 = mrz3.group(0).replace("\n","") if mrz3 else None
|
|
519
|
-
|
|
520
|
-
rfid_number = ''
|
|
521
|
-
id_number = ''
|
|
522
|
-
|
|
523
|
-
mrz1_data_match = re.search(mrz1_data_pattern, mrz_str)
|
|
524
|
-
if mrz1_data_match:
|
|
525
|
-
rfid_number = mrz1_data_match.group(1)
|
|
526
|
-
id_number = mrz1_data_match.group(2)
|
|
527
|
-
|
|
528
|
-
rfid_number = rfid_number.upper()
|
|
529
|
-
id_number = id_number[1:14]
|
|
530
|
-
|
|
531
|
-
try:
|
|
532
|
-
pattern = r'(?<=[\S]\d{7})[A-Z]{3}'
|
|
533
|
-
|
|
534
|
-
national = re.search(pattern, mrz[0].replace(" ", ""))
|
|
535
|
-
if national:
|
|
536
|
-
nationality = national.group()
|
|
537
|
-
else:
|
|
538
|
-
national2 = re.search(pattern, mrz[0].replace(" ", "").replace("\n", ""))
|
|
539
|
-
if national2:
|
|
540
|
-
nationality = national2.group()
|
|
541
|
-
else:
|
|
542
|
-
nationality = ''
|
|
543
|
-
except:
|
|
544
|
-
nationality = ''
|
|
545
|
-
|
|
546
|
-
try:
|
|
547
|
-
dob_pattern = r'(\d+)[MF]'
|
|
548
|
-
dob_match = re.search(dob_pattern, mrz2)
|
|
549
|
-
dob_mrz = convert_dob(dob_match.group(1)) if dob_match else ''
|
|
550
|
-
|
|
551
|
-
doe_pattern = r'[MF](\d+)'
|
|
552
|
-
doe_match = re.search(doe_pattern, mrz2)
|
|
553
|
-
expiry_date_mrz = convert_expiry_date(doe_match.group(1)) if doe_match else ''
|
|
554
|
-
except:
|
|
555
|
-
dob_mrz, expiry_date_mrz = '', ''
|
|
556
|
-
|
|
557
|
-
if back_data_dict.get('id_number'):
|
|
558
|
-
id_number = back_data_dict['id_number']
|
|
559
|
-
|
|
560
|
-
if back_data_dict.get('card_number'):
|
|
561
|
-
rfid_number = back_data_dict['card_number']
|
|
562
|
-
|
|
563
|
-
if back_data_dict.get('mrz1'):
|
|
564
|
-
mrz1 = back_data_dict['mrz1']
|
|
565
|
-
|
|
566
|
-
back_data_new = {
|
|
567
|
-
"id_number": id_number,
|
|
568
|
-
"card_number": rfid_number,
|
|
569
|
-
"nationality": nationality,
|
|
570
|
-
"mrz": [mrz_str],
|
|
571
|
-
"mrz1": mrz1,
|
|
572
|
-
"mrz2": mrz2,
|
|
573
|
-
"mrz3": mrz3,
|
|
574
|
-
"dob_mrz": dob_mrz,
|
|
575
|
-
"expiry_date_mrz": expiry_date_mrz
|
|
576
|
-
}
|
|
577
|
-
|
|
578
|
-
## HANDLING EDGE CASES FOR ID NUMBER AND CARD NUMBER
|
|
579
|
-
if not (back_data_new.get('id_number') or back_data_new.get('card_number')):
|
|
580
|
-
mrz_pattern = r'(IDI[\S]{2}.*\n*.*\n*.*\n*.*|IDIRQ[\S].*\n*.*\n*.*\n*.*|IDIRC[\S].*\n*.*\n*.*\n*.*)'
|
|
581
|
-
mrz1_data_pattern = r'IDI[\S]{2}([\S]{2}\d{7}|[\S]\d{8}).*?(\d{13})'
|
|
582
|
-
|
|
583
|
-
try:
|
|
584
|
-
mrz = re.findall(mrz_pattern, back_data.replace(" ","").strip(), re.MULTILINE)
|
|
585
|
-
mrz_str = mrz[0].replace(" ", "")
|
|
586
|
-
except:
|
|
587
|
-
mrz_str = ''
|
|
588
|
-
|
|
589
|
-
back_data_new['mrz'] = [mrz_str]
|
|
590
|
-
|
|
591
|
-
mrz1 = re.search(r'(IDI[\S]{2}.*?<{2,})', mrz_str, re.DOTALL)
|
|
592
|
-
mrz1 = mrz1.group(1) if mrz1 else None
|
|
593
|
-
back_data_new['mrz1'] = mrz1
|
|
594
|
-
|
|
595
|
-
mrz2 = re.search(r'\b\d{7}.*?(?:<<\d|<<\n)', mrz_str)
|
|
596
|
-
mrz2 = mrz2.group(0) if mrz2 else None
|
|
597
|
-
back_data_new['mrz2'] = mrz2
|
|
598
|
-
|
|
599
|
-
mrz3 = re.search(r'[\n](?:[a-zA-Z<]{6,})', mrz_str)
|
|
600
|
-
mrz3 = mrz3.group(0).replace("\n","") if mrz3 else None
|
|
601
|
-
back_data_new['mrz3'] = mrz3
|
|
602
|
-
|
|
603
|
-
rfid_number = ''
|
|
604
|
-
id_number = ''
|
|
605
|
-
|
|
606
|
-
mrz1_data_match = re.search(mrz1_data_pattern, mrz_str)
|
|
607
|
-
if mrz1_data_match:
|
|
608
|
-
rfid_number = mrz1_data_match.group(1)
|
|
609
|
-
id_number = mrz1_data_match.group(2)
|
|
610
|
-
|
|
611
|
-
rfid_number = rfid_number.upper()
|
|
612
|
-
id_number = id_number[1:14]
|
|
613
|
-
back_data_new['id_number'] = id_number
|
|
614
|
-
back_data_new['card_number'] = rfid_number
|
|
615
|
-
|
|
616
|
-
## HANDLE DOB AND DOE FROM MRZ
|
|
617
|
-
if not (back_data_new.get('dob_mrz') or back_data_new.get('expiry_date_mrz')):
|
|
618
|
-
if not mrz2:
|
|
619
|
-
mrz2 = re.search(r'\b\d{6,}.*?<{2,}|\b\d{6,}.*?く{2,}', mrz_str, re.MULTILINE)
|
|
620
|
-
mrz2 = mrz2.group(0) if mrz2 else find_mrz2_from_original(back_data.replace(" ","").strip())
|
|
621
|
-
|
|
622
|
-
if mrz2:
|
|
623
|
-
dob_pattern = r"(\d{7})[MF]"
|
|
624
|
-
dob_match = re.search(dob_pattern, mrz2)
|
|
625
|
-
if dob_match:
|
|
626
|
-
dob = dob_match.group(1)
|
|
627
|
-
back_data_new['dob_mrz'] = convert_dob(dob)
|
|
628
|
-
else:
|
|
629
|
-
dob_pattern = r'(\d{12,})[\S]R[\S]\b'
|
|
630
|
-
dob_match = re.search(dob_pattern, mrz2)
|
|
631
|
-
|
|
632
|
-
if dob_match:
|
|
633
|
-
dob = dob_match.group(1)[:7]
|
|
634
|
-
back_data_new['dob_mrz'] = convert_dob(dob)
|
|
635
|
-
|
|
636
|
-
doe_pattern = r"[MF](\d+)"
|
|
637
|
-
doe_match = re.search(doe_pattern, mrz2)
|
|
638
|
-
if doe_match:
|
|
639
|
-
expiry = doe_match.group(1)
|
|
640
|
-
back_data_new['expiry_date_mrz'] = convert_expiry_date(expiry)
|
|
641
|
-
else:
|
|
642
|
-
doe_pattern = r'(\d{12,})[\S]R[\S]\b'
|
|
643
|
-
doe_match = re.search(doe_pattern, mrz2)
|
|
644
|
-
|
|
645
|
-
if doe_match:
|
|
646
|
-
expiry = doe_match.group(1)[8:]
|
|
647
|
-
if len(expiry)<7:
|
|
648
|
-
expiry = doe_match.group(1)[7:]
|
|
649
|
-
back_data_new['expiry_date_mrz'] = convert_expiry_date(expiry)
|
|
650
|
-
|
|
651
|
-
if not back_data_new.get('nationality'):
|
|
652
|
-
mrz_pattern = r'(IDI[\S]{2}.*\n*.*\n*.*\n*.*|IDIRQ[\S].*\n*.*\n*.*\n*.*||IDIRC[\S].*\n*.*\n*.*\n*.*)'
|
|
653
|
-
try:
|
|
654
|
-
mrz = re.findall(mrz_pattern, back_data.replace(" ","").strip(), re.MULTILINE)
|
|
655
|
-
except:
|
|
656
|
-
mrz = ''
|
|
657
|
-
|
|
658
|
-
if mrz:
|
|
659
|
-
national = re.search(pattern, mrz[0].replace(" ", "").replace("\n", ""))
|
|
660
|
-
if national:
|
|
661
|
-
nationality = national.group()
|
|
662
|
-
else:
|
|
663
|
-
nationality = ''
|
|
664
|
-
|
|
665
|
-
back_data_new['nationality'] = nationality
|
|
666
|
-
|
|
667
|
-
if not back_data_new.get('nationality'):
|
|
668
|
-
nationality_pattern = r'\d{6,}([\S]{3})\b'
|
|
669
|
-
nationality_match = re.search(nationality_pattern, mrz2)
|
|
670
|
-
if nationality_match:
|
|
671
|
-
nationality = nationality_match.group(1)
|
|
672
|
-
back_data_new['nationality'] = nationality
|
|
673
|
-
|
|
674
|
-
return back_data_new
|
|
675
|
-
|
|
676
|
-
def count_digits(text):
|
|
677
|
-
return len(re.findall(r'\d', text))
|
|
678
|
-
|
|
679
|
-
def find_gender_from_back(text):
|
|
680
|
-
gender = ''
|
|
681
|
-
gender_pattern = r'(\d)([A-Za-z])(\d)'
|
|
682
|
-
gender_match = re.search(gender_pattern, text)
|
|
683
|
-
if gender_match:
|
|
684
|
-
gender = gender_match.group(2)
|
|
685
|
-
|
|
686
|
-
if not gender:
|
|
687
|
-
gender_pattern = r'(\d)([MFmf])(\d)'
|
|
688
|
-
gender_match = re.search(gender_pattern, text)
|
|
689
|
-
if gender_match:
|
|
690
|
-
gender = gender_match.group(2)
|
|
691
|
-
|
|
692
|
-
|
|
693
|
-
|
|
694
|
-
return gender
|
|
695
|
-
|
|
696
|
-
def iraq_back_id_extraction(client, image_data, back_id_text, back_data, image_format):
|
|
697
|
-
mrz_pattern = r'(IDIRQA.*\n*.*\n*.*\n*.*|IDIRQC.*\n*.*\n*.*\n*.*|IDIR.*\n*.*\n*.*\n*.*)'
|
|
698
|
-
mrz1_data_pattern = r'IDIRQ([A-Za-z]{2}\d{7}|[A-Za-z]\d{8}).*?(\d{13})|IDIRC([A-Za-z]{2}\d{7}|[A-Za-z]\d{8}).*?(\d{13})'
|
|
699
|
-
nationality_pattern = r'([A-Z]+)<<'
|
|
700
|
-
place_of_birth_pattern = r'(?:محل|الولادة)[^:]*:\s*(.*?)\n'
|
|
701
|
-
issuing_authority_pattern_1 = r"مديرية الجنسية والمعلومات المدنية"
|
|
702
|
-
issuing_authority_pattern_2 = r"دائرة احوال -.*?(?=\n|\r|$)"
|
|
703
|
-
|
|
704
|
-
mrz1, mrz2, mrz3 = '', '', ''
|
|
705
|
-
|
|
706
|
-
try:
|
|
707
|
-
mrz = re.findall(mrz_pattern, back_data.replace(" ","").strip(), re.MULTILINE)
|
|
708
|
-
mrz_str = mrz[0].replace(" ", "")
|
|
709
|
-
except:
|
|
710
|
-
mrz_str = ''
|
|
711
|
-
|
|
712
|
-
# mrz1 = re.search(r'(IDIRQ.*?<<<)', mrz_str, re.DOTALL)
|
|
713
|
-
# mrz1 = mrz1.group(1) if mrz1 else None
|
|
714
|
-
|
|
715
|
-
# mrz2 = re.search(r'\b\d{6,}.*?<{2,}', mrz_str, re.MULTILINE)
|
|
716
|
-
# mrz2 = mrz2.group(0) if mrz2 else None
|
|
717
|
-
|
|
718
|
-
# mrz3 = re.search(r'[\n](?:[a-zA-Z<]{6,})', mrz_str)
|
|
719
|
-
# mrz3 = mrz3.group(0).replace("\n","") if mrz3 else None
|
|
720
|
-
|
|
721
|
-
if mrz_str:
|
|
722
|
-
mrz_list=mrz_str.replace(" ", "").split("\n")
|
|
723
|
-
try:
|
|
724
|
-
mrz1=mrz_list[0]
|
|
725
|
-
except:
|
|
726
|
-
mrz1=''
|
|
727
|
-
try:
|
|
728
|
-
mrz3=[s.replace('>','<') for s in [remove_special_characters1(ele).replace(' ','') for ele in back_data.split('\n')] if len(re.findall(r'<', s)) >= 2 and re.fullmatch(r'[A-Za-z<>]+', s)][0]
|
|
729
|
-
except:
|
|
730
|
-
mrz3=''
|
|
731
|
-
try:
|
|
732
|
-
mrz2=[ele for ele in [ele for ele in mrz_list if ele not in [mrz1,mrz3] ] if remove_special_characters_mrz2(ele) !='']
|
|
733
|
-
if len(mrz2)>1:
|
|
734
|
-
mrz2=max(mrz2, key=count_digits)+[ele for ele in mrz2 if ele!=max(mrz2, key=count_digits)][0]
|
|
735
|
-
|
|
736
|
-
pattern = r'\d{7}[MF]\d{7}[\S]{3}<+?\d'
|
|
737
|
-
mrz2_temp = re.search(pattern, mrz2.replace(">", ""))
|
|
738
|
-
if mrz2_temp:
|
|
739
|
-
mrz2 = mrz2_temp.group(0)
|
|
740
|
-
|
|
741
|
-
mrz2=mrz2.split('<')[0]+'<<<<<<<<<<'+mrz2.split('<')[-1]
|
|
742
|
-
|
|
743
|
-
# mrz2=mrz2[0].split('<')[0]+'<<<<<<<<<<'+mrz2[-1][-1]
|
|
744
|
-
else :
|
|
745
|
-
mrz2=mrz2[0].split('<')[0]+'<<<<<<<<<<'+mrz2[0][-1]
|
|
746
|
-
except:
|
|
747
|
-
mrz2=''
|
|
748
|
-
|
|
749
|
-
## condition to replace O with 0
|
|
750
|
-
try:
|
|
751
|
-
pattern = r'(IDIRQ[A-Z]{1,2})O(?=[0-9])'
|
|
752
|
-
replacement = lambda m: m.group(1) + '0'
|
|
753
|
-
mrz1 = re.sub(pattern, replacement, mrz1)
|
|
754
|
-
except:
|
|
755
|
-
pass
|
|
756
|
-
|
|
757
|
-
## condition to replace '>' with 7
|
|
758
|
-
if mrz2 and mrz2.endswith('>'):
|
|
759
|
-
mrz2 = mrz2.split('<')[0]+'<<<<<<<<<<'+'7'
|
|
760
|
-
|
|
761
|
-
## condition to add filler to mrz3, making it total length of 30 chars
|
|
762
|
-
if len(mrz3) < 30:
|
|
763
|
-
mrz3 = mrz3.ljust(30, '<')
|
|
764
|
-
|
|
765
|
-
# mrz1_data_match = re.search(mrz1_data_pattern, mrz_str)
|
|
766
|
-
# if mrz1_data_match:
|
|
767
|
-
# rfid_number = mrz1_data_match.group(1)
|
|
768
|
-
# id_number = mrz1_data_match.group(2)
|
|
769
|
-
|
|
770
|
-
# rfid_number = rfid_number.upper()
|
|
771
|
-
# id_number = id_number[1:14]
|
|
772
|
-
try:
|
|
773
|
-
rfid_number=mrz1.split('IDIR')[-1][1:10]
|
|
774
|
-
except:
|
|
775
|
-
rfid_number = ''
|
|
776
|
-
try:
|
|
777
|
-
id_number=mrz1.split('IDIR')[-1][11:23]
|
|
778
|
-
except:
|
|
779
|
-
id_number = ''
|
|
780
|
-
|
|
781
|
-
dob = func_dob(mrz_str)
|
|
782
|
-
|
|
783
|
-
if not dob:
|
|
784
|
-
matches = re.findall(r'\d{4}/\d{2}/\d{2}', back_data)
|
|
785
|
-
sorted_dates = sorted(matches)
|
|
786
|
-
dob = sorted_dates[0]
|
|
787
|
-
|
|
788
|
-
expiry = func_expiry_date(mrz_str)
|
|
789
|
-
if not expiry:
|
|
790
|
-
matches = re.findall(r'\d{4}/\d{2}/\d{2}', back_data)
|
|
791
|
-
sorted_dates = sorted(matches)
|
|
792
|
-
expiry = sorted_dates[-1]
|
|
793
|
-
|
|
794
|
-
## handle issue date
|
|
795
|
-
try:
|
|
796
|
-
matches = re.findall(r'\d{4}/\d{2}/\d{2}', back_data)
|
|
797
|
-
sorted_dates = sorted(matches)
|
|
798
|
-
issue_date = sorted_dates[1]
|
|
799
|
-
except:
|
|
800
|
-
issue_date = ''
|
|
801
|
-
|
|
802
|
-
# nationality_matches = re.search(nationality_pattern, mrz[0])
|
|
803
|
-
# if nationality_matches:
|
|
804
|
-
# nationality = nationality_matches.group(1)
|
|
805
|
-
# else:
|
|
806
|
-
# try:
|
|
807
|
-
# pattern = r'(?<=[A-Z]\d{7})[A-Z]{3}'
|
|
808
|
-
# national = re.search(pattern, back_data)
|
|
809
|
-
# if national:
|
|
810
|
-
# nationality = national.group()
|
|
811
|
-
# else:
|
|
812
|
-
# nationality = ''
|
|
813
|
-
# except:
|
|
814
|
-
# nationality = ''
|
|
815
|
-
|
|
816
|
-
# if len(nationality)>3:
|
|
817
|
-
# pattern = r'(?<=[A-Z]\d{7})[A-Z]{3}'
|
|
818
|
-
# national = re.search(pattern, back_data)
|
|
819
|
-
# if national:
|
|
820
|
-
# nationality = national.group()
|
|
821
|
-
|
|
822
|
-
try:
|
|
823
|
-
nationality=mrz2.split('<')[0][-3:]
|
|
824
|
-
except:
|
|
825
|
-
nationality='IRQ'
|
|
826
|
-
first_name, last_name = '', ''
|
|
827
|
-
|
|
828
|
-
if mrz3:
|
|
829
|
-
name_list = re.findall(r'\b[^<\s]+\b', mrz3)
|
|
830
|
-
|
|
831
|
-
if len(name_list)>1:
|
|
832
|
-
first_name = name_list[1].upper().replace("X", "")
|
|
833
|
-
last_name = name_list[0].upper().replace("X", "")
|
|
834
|
-
else:
|
|
835
|
-
first_name = name_list[0].upper().replace("X", "")
|
|
836
|
-
|
|
837
|
-
else:
|
|
838
|
-
mrz3 = ''
|
|
839
|
-
|
|
840
|
-
# issuing_authority_matches = re.findall(issuing_authority_pattern, back_data)
|
|
841
|
-
# if issuing_authority_matches:
|
|
842
|
-
# issuing_authority = issuing_authority_matches[-1][1]
|
|
843
|
-
# else:
|
|
844
|
-
# issuing_authority = ''
|
|
845
|
-
|
|
846
|
-
issuing_authority = ''
|
|
847
|
-
issuing_authority_match_1 = re.search(issuing_authority_pattern_1, back_data)
|
|
848
|
-
issuing_authority_match_2 = re.search(issuing_authority_pattern_2, back_data)
|
|
849
|
-
|
|
850
|
-
if issuing_authority_match_1:
|
|
851
|
-
issuing_authority = issuing_authority_match_1.group(0)
|
|
852
|
-
|
|
853
|
-
if issuing_authority_match_2:
|
|
854
|
-
issuing_authority = issuing_authority_match_2.group(0)
|
|
855
|
-
|
|
856
|
-
place_of_birth_match = re.search(place_of_birth_pattern, back_data)
|
|
857
|
-
if place_of_birth_match:
|
|
858
|
-
place_of_birth = place_of_birth_match.group(1).strip()
|
|
859
|
-
place_of_birth_list = place_of_birth.split(":")
|
|
860
|
-
if len(place_of_birth_list)>=2:
|
|
861
|
-
place_of_birth = place_of_birth_list[1].strip()
|
|
862
|
-
elif len(place_of_birth_list)==1:
|
|
863
|
-
place_of_birth = place_of_birth_list[0]
|
|
864
|
-
else:
|
|
865
|
-
place_of_birth = ''
|
|
866
|
-
else:
|
|
867
|
-
place_of_birth = ''
|
|
868
|
-
|
|
869
|
-
issuing_authority_en=place_of_birth_en=''
|
|
870
|
-
|
|
871
|
-
if issuing_authority:
|
|
872
|
-
try:
|
|
873
|
-
issuing_authority_en = translator.translate(issuing_authority, src='ar', dest='en').text.upper()
|
|
874
|
-
except:
|
|
875
|
-
issuing_authority_en = GoogleTranslator('ar', 'en').translate(issuing_authority)
|
|
876
|
-
|
|
877
|
-
if place_of_birth:
|
|
878
|
-
try:
|
|
879
|
-
place_of_birth_en = translator.translate(place_of_birth, src='ar', dest='en').text.upper()
|
|
880
|
-
except:
|
|
881
|
-
place_of_birth_en = GoogleTranslator('ar', 'en').translate(place_of_birth)
|
|
882
|
-
|
|
883
|
-
try:
|
|
884
|
-
dob_pattern = r'(\d+)[MF]'
|
|
885
|
-
dob_match = re.search(dob_pattern, mrz2)
|
|
886
|
-
dob_mrz = convert_dob(dob_match.group(1)) if dob_match else ''
|
|
887
|
-
|
|
888
|
-
doe_pattern = r'[MF](\d+)'
|
|
889
|
-
doe_match = re.search(doe_pattern, mrz2)
|
|
890
|
-
expiry_date_mrz = convert_expiry_date(doe_match.group(1)) if doe_match else ''
|
|
891
|
-
except:
|
|
892
|
-
dob_mrz, expiry_date_mrz = '', ''
|
|
893
|
-
|
|
894
|
-
gender = ''
|
|
895
|
-
try:
|
|
896
|
-
gender = find_gender_from_back(mrz2)
|
|
897
|
-
except:
|
|
898
|
-
gender = find_gender_from_back(back_data)
|
|
899
|
-
|
|
900
|
-
mrz_str = f"{mrz1}\n{mrz2}\n{mrz3}"
|
|
901
|
-
|
|
902
|
-
if nationality and (nationality == '1RQ' or nationality == 'IRG'):
|
|
903
|
-
nationality = 'IRQ'
|
|
904
|
-
|
|
905
|
-
back_data_dict = {
|
|
906
|
-
"mrz": [mrz_str],
|
|
907
|
-
"mrz1": mrz1,
|
|
908
|
-
"mrz2": mrz2,
|
|
909
|
-
"mrz3": mrz3,
|
|
910
|
-
"id_number": id_number,
|
|
911
|
-
"card_number": rfid_number,
|
|
912
|
-
"dob": dob,
|
|
913
|
-
"issue_date": issue_date,
|
|
914
|
-
"first_name_back": first_name,
|
|
915
|
-
"last_name_back": last_name,
|
|
916
|
-
"expiry_date": expiry,
|
|
917
|
-
"nationality": nationality,
|
|
918
|
-
"issuing_authority": issuing_authority,
|
|
919
|
-
"place_of_birth": place_of_birth,
|
|
920
|
-
"issuing_authority_en": issuing_authority_en,
|
|
921
|
-
"place_of_birth_en": place_of_birth_en,
|
|
922
|
-
"issuing_country": "IRQ",
|
|
923
|
-
"dob_mrz": dob_mrz,
|
|
924
|
-
"expiry_date_mrz": expiry_date_mrz,
|
|
925
|
-
"gender_back": gender
|
|
926
|
-
}
|
|
927
|
-
|
|
928
|
-
if "gender_back" in back_data_dict:
|
|
929
|
-
gender = back_data_dict["gender_back"].strip().upper()
|
|
930
|
-
if gender == "F":
|
|
931
|
-
back_data_dict["gender_back"] = "FEMALE"
|
|
932
|
-
elif gender == "M":
|
|
933
|
-
back_data_dict["gender_back"] = "MALE"
|
|
934
|
-
|
|
935
|
-
if 'gender_back' in back_data_dict:
|
|
936
|
-
back_data_dict["gender_back"] = back_data_dict["gender_back"].strip().upper()
|
|
937
|
-
|
|
938
|
-
#-----------------------------------
|
|
939
|
-
## I dont see the point of the below block (unacessary redudency)
|
|
940
|
-
# ## HANDLE DOB DOE FROM MRZ
|
|
941
|
-
# if not (back_data_dict.get('dob_mrz') or back_data_dict.get('expiry_date_mrz')):
|
|
942
|
-
# mrz_pattern = r'(IDIRQ[\S].*\n*.*\n*.*\n*.*|IDIRC[\S].*\n*.*\n*.*\n*.*)'
|
|
943
|
-
# try:
|
|
944
|
-
# mrz = re.findall(mrz_pattern, back_data.replace(" ","").strip(), re.MULTILINE)
|
|
945
|
-
# mrz_str = mrz[0].replace(" ", "")
|
|
946
|
-
|
|
947
|
-
# mrz2 = re.search(r'\b\d{6,}.*?<{2,}', mrz_str, re.MULTILINE)
|
|
948
|
-
# mrz2 = mrz2.group(0) if mrz2 else None
|
|
949
|
-
# except:
|
|
950
|
-
# mrz_str = ''
|
|
951
|
-
|
|
952
|
-
# if mrz2:
|
|
953
|
-
# dob_pattern = r"(\d{7})[MF]"
|
|
954
|
-
# dob_match = re.search(dob_pattern, mrz2)
|
|
955
|
-
# if dob_match:
|
|
956
|
-
# dob = dob_match.group(1)
|
|
957
|
-
# back_data_dict['dob_mrz'] = convert_dob(dob)
|
|
958
|
-
# else:
|
|
959
|
-
# dob_pattern = r'(\d{12,})[\S]R[\S]\b'
|
|
960
|
-
# dob_match = re.search(dob_pattern, mrz2)
|
|
961
|
-
# if dob_match:
|
|
962
|
-
# dob = dob_match.group(1)[:7]
|
|
963
|
-
# back_data_dict['dob_mrz'] = convert_dob(dob)
|
|
964
|
-
|
|
965
|
-
# doe_pattern = r"[MF](\d+)"
|
|
966
|
-
# doe_match = re.search(doe_pattern, mrz2)
|
|
967
|
-
# if doe_match:
|
|
968
|
-
# expiry = doe_match.group(1)
|
|
969
|
-
# back_data_dict['expiry_date_mrz'] = convert_expiry_date(expiry)
|
|
970
|
-
# else:
|
|
971
|
-
# doe_pattern = r'(\d{12,})[\S]R[\S]\b'
|
|
972
|
-
# doe_match = re.search(doe_pattern, mrz2)
|
|
973
|
-
# if doe_match:
|
|
974
|
-
# expiry = doe_match.group(1)[8:]
|
|
975
|
-
# if len(expiry)<7:
|
|
976
|
-
# expiry = doe_match.group(1)[7:]
|
|
977
|
-
# back_data_dict['expiry_date_mrz'] = convert_expiry_date(expiry)
|
|
978
|
-
|
|
979
|
-
# if not back_data_dict.get('nationality'):
|
|
980
|
-
# nationality_pattern = r'\d{6,}([\S]{3})\b'
|
|
981
|
-
# nationality_match = re.search(nationality_pattern, mrz2)
|
|
982
|
-
# if nationality_match:
|
|
983
|
-
# nationality = nationality_match.group(1)
|
|
984
|
-
# back_data_dict['nationality'] = nationality
|
|
985
|
-
#-----------------------------------
|
|
986
|
-
|
|
987
|
-
non_optional_keys = ["id_number", "card_number", "nationality", "dob"]
|
|
988
|
-
empty_string_keys = [key for key, value in back_data_dict.items() if key in non_optional_keys and value == '']
|
|
989
|
-
cropped_id_card, tampered_result_back = detect_id_card(client, image_data, back_id_text, image_format)
|
|
990
|
-
back_data_dict['back_tampered_result'] = tampered_result_back
|
|
991
|
-
|
|
992
|
-
return back_data_dict
|