idvpackage 3.0.11__py3-none-any.whl → 3.0.12__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- idvpackage/common.py +4 -962
- idvpackage/iraq_id_extraction_withopenai.py +374 -893
- idvpackage/jor_passport_extraction.py +1 -6
- idvpackage/liveness_spoofing_v2.py +2 -45
- idvpackage/ocr.py +1011 -2427
- idvpackage/ocr_utils.py +144 -486
- idvpackage/pse_passport_extraction.py +18 -292
- idvpackage/qatar_id_extraction.py +4 -956
- idvpackage/sudan_passport_extraction.py +0 -928
- idvpackage/syr_passport_extraction.py +27 -402
- idvpackage/uae_id_extraction.py +87 -151
- {idvpackage-3.0.11.dist-info → idvpackage-3.0.12.dist-info}/METADATA +1 -1
- idvpackage-3.0.12.dist-info/RECORD +34 -0
- {idvpackage-3.0.11.dist-info → idvpackage-3.0.12.dist-info}/WHEEL +1 -1
- idvpackage/ekyc.py +0 -78
- idvpackage/genai_utils.py +0 -309
- idvpackage/iraq_id_extraction.py +0 -992
- idvpackage/iraq_passport_extraction.py +0 -588
- idvpackage/lazy_imports.py +0 -44
- idvpackage/lebanon_passport_extraction.py +0 -161
- idvpackage/sau_id_extraction.py +0 -248
- idvpackage/sudan_id_extraction.py +0 -764
- idvpackage-3.0.11.dist-info/RECORD +0 -42
- {idvpackage-3.0.11.dist-info → idvpackage-3.0.12.dist-info}/licenses/LICENSE +0 -0
- {idvpackage-3.0.11.dist-info → idvpackage-3.0.12.dist-info}/top_level.txt +0 -0
|
@@ -1,588 +0,0 @@
|
|
|
1
|
-
from googletrans import Translator
|
|
2
|
-
import re
|
|
3
|
-
from datetime import datetime
|
|
4
|
-
import gender_guesser.detector as gender
|
|
5
|
-
import pycountry
|
|
6
|
-
from rapidfuzz import fuzz
|
|
7
|
-
from idvpackage.common import *
|
|
8
|
-
|
|
9
|
-
translator = Translator()
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
def convert_expiry_date(input_date):
|
|
13
|
-
day = input_date[4:6]
|
|
14
|
-
month = input_date[2:4]
|
|
15
|
-
year = input_date[0:2]
|
|
16
|
-
|
|
17
|
-
current_year = datetime.now().year
|
|
18
|
-
current_century = current_year // 100
|
|
19
|
-
current_year_last_two_digits = current_year % 100
|
|
20
|
-
century = current_century
|
|
21
|
-
|
|
22
|
-
if int(year) <= current_year_last_two_digits:
|
|
23
|
-
century = current_century
|
|
24
|
-
else:
|
|
25
|
-
century = current_century
|
|
26
|
-
final_date = f"{day}/{month}/{century}{year}"
|
|
27
|
-
|
|
28
|
-
return final_date
|
|
29
|
-
|
|
30
|
-
def extract_mother_name_and_surname(text):
|
|
31
|
-
|
|
32
|
-
pattern_mother_name = r"(?:Mother's Name\.?|Mother Name)\s*[::.]?\s*(?:Date of Birth\s*)?\s*(?:\d{4}-\d{2}-\d{2})?\s*([A-Z]+(?:\s+[A-Z]+)+)"
|
|
33
|
-
mother_name_match = re.search(pattern_mother_name, text, re.IGNORECASE)
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
mother_first_name = None
|
|
37
|
-
mother_surname = None
|
|
38
|
-
if mother_name_match:
|
|
39
|
-
mother_name_full = mother_name_match.group(1).strip()
|
|
40
|
-
# Split the full name to extract first name and surname
|
|
41
|
-
name_parts = mother_name_full.split()
|
|
42
|
-
if len(name_parts) >= 2:
|
|
43
|
-
mother_first_name = name_parts[0]
|
|
44
|
-
mother_surname = " ".join(name_parts[1:])
|
|
45
|
-
|
|
46
|
-
if mother_surname:
|
|
47
|
-
|
|
48
|
-
mother_surname = re.sub(r'\b(Date of Expiry|Date of Issue|Issuing Authority|Date|P|REPUBLIC OF IRAQ|Passport|Type|BAGHDAD|ananda|INTER|Issuing|Author|OF IRAQ assport|assport)\b', '', mother_surname, flags=re.IGNORECASE)
|
|
49
|
-
mother_surname = re.sub(r'\s+', ' ', mother_surname).strip()
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
return {"mother_first_name": mother_first_name, "mother_last_name": mother_surname}
|
|
53
|
-
|
|
54
|
-
def get_dates_to_generic_format(date):
|
|
55
|
-
formats = ["%d/%m/%Y", "%Y/%m/%d"]
|
|
56
|
-
for fmt in formats:
|
|
57
|
-
try:
|
|
58
|
-
return datetime.strptime(date, fmt).strftime("%d/%m/%Y")
|
|
59
|
-
except ValueError:
|
|
60
|
-
pass
|
|
61
|
-
return None
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
def validate_date(date):
|
|
65
|
-
try:
|
|
66
|
-
date = datetime.strptime(date, "%d-%m-%Y")
|
|
67
|
-
return date.strftime("%d-%m-%Y")
|
|
68
|
-
except ValueError:
|
|
69
|
-
try:
|
|
70
|
-
date = datetime.strptime(date, "%d/%m/%Y")
|
|
71
|
-
return date.strftime("%d/%m/%Y")
|
|
72
|
-
except:
|
|
73
|
-
return ''
|
|
74
|
-
|
|
75
|
-
def identify_gender(name):
|
|
76
|
-
d = gender.Detector()
|
|
77
|
-
gender_prediction = d.get_gender(name)
|
|
78
|
-
return gender_prediction
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
def translated_gender_identifier(passport_text):
|
|
82
|
-
translator = Translator()
|
|
83
|
-
try:
|
|
84
|
-
trans_res = translator.translate(passport_text, src='ar', dest='en').text
|
|
85
|
-
except:
|
|
86
|
-
from deep_translator import GoogleTranslator
|
|
87
|
-
trans_res = GoogleTranslator('ar', 'en').translate(passport_text)
|
|
88
|
-
if re.search('male', trans_res, re.IGNORECASE):
|
|
89
|
-
return 'M'
|
|
90
|
-
if re.search('female', trans_res, re.IGNORECASE):
|
|
91
|
-
return 'F'
|
|
92
|
-
|
|
93
|
-
return ''
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
def extract_names(passport_text):
|
|
97
|
-
try:
|
|
98
|
-
pattern = r'Full Name\s+([A-Z\s-]+)\nSurname\s+([A-Z\s-]+)'
|
|
99
|
-
matches = re.search(pattern, passport_text)
|
|
100
|
-
if matches:
|
|
101
|
-
name = matches.group(1).strip()
|
|
102
|
-
last_name = matches.group(2).strip()
|
|
103
|
-
|
|
104
|
-
return name, last_name
|
|
105
|
-
else:
|
|
106
|
-
pattern = r'Full Name\s+([A-Z][A-Za-z\s-]+)|Name\s+([A-Z][A-Za-z\s-]+)|Sumame\s+([A-Z]+)|Surname\s+([A-Z]+)'
|
|
107
|
-
|
|
108
|
-
matches = re.findall(pattern, passport_text)
|
|
109
|
-
clean_matches = [match[0].strip() if match[0] else match[1] for match in matches]
|
|
110
|
-
|
|
111
|
-
if len(clean_matches) > 1:
|
|
112
|
-
name, last_name = clean_matches[0], clean_matches[1]
|
|
113
|
-
|
|
114
|
-
if '\n' in name:
|
|
115
|
-
name_list = name.split("\n")
|
|
116
|
-
name_list.remove('IRQ')
|
|
117
|
-
name_list = [word for word in name_list if word.isupper()]
|
|
118
|
-
|
|
119
|
-
if not name_list[0] == 'IRQ':
|
|
120
|
-
if len(name_list) > 1:
|
|
121
|
-
name = name_list[0]
|
|
122
|
-
last_name = name_list[1]
|
|
123
|
-
if len(last_name)<2:
|
|
124
|
-
last_name = ''
|
|
125
|
-
if len(name_list) == 1:
|
|
126
|
-
name = name_list[0]
|
|
127
|
-
|
|
128
|
-
elif len(matches) == 1:
|
|
129
|
-
name, last_name = clean_matches[0].split("Surname")
|
|
130
|
-
if len(name.split(" ")) > 3:
|
|
131
|
-
name_list = name.split("\n")
|
|
132
|
-
name = name_list[1]
|
|
133
|
-
if len(last_name)>1:
|
|
134
|
-
last_name.split("\n")[0]
|
|
135
|
-
|
|
136
|
-
if not last_name or len(last_name)<3:
|
|
137
|
-
name_pattern = r"[A-Z<]+<<[A-Z<]+<"
|
|
138
|
-
name_match = re.search(name_pattern, passport_text)
|
|
139
|
-
if name_match:
|
|
140
|
-
names = name_match.group(0)
|
|
141
|
-
|
|
142
|
-
extracted_text = names.replace('<', ' ')
|
|
143
|
-
# print(f"TEXT: {extracted_text}")
|
|
144
|
-
name_list = extracted_text.strip().split()
|
|
145
|
-
# name = ' '.join(name_list[1:])
|
|
146
|
-
last_name = name_list[0]
|
|
147
|
-
|
|
148
|
-
return name.replace("\n", ""), last_name.replace("\n", "")
|
|
149
|
-
except:
|
|
150
|
-
return '', ''
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
def load_nationality_keywords():
|
|
154
|
-
countries = pycountry.countries
|
|
155
|
-
nationality_keywords = set()
|
|
156
|
-
|
|
157
|
-
# Common suffixes for demonyms
|
|
158
|
-
demonym_suffixes = ['ian', 'ese', 'ish', 'i', 'ic', 'an', 'nian']
|
|
159
|
-
|
|
160
|
-
for country in countries:
|
|
161
|
-
nationality_keywords.add(country.name.upper())
|
|
162
|
-
nationality_keywords.add(country.alpha_3.upper())
|
|
163
|
-
|
|
164
|
-
# Adding guessed demonyms
|
|
165
|
-
for suffix in demonym_suffixes:
|
|
166
|
-
demonym = country.name.upper() + suffix
|
|
167
|
-
nationality_keywords.add(demonym.upper())
|
|
168
|
-
|
|
169
|
-
# Add common demonyms if the official name is available
|
|
170
|
-
if hasattr(country, 'official_name'):
|
|
171
|
-
nationality_keywords.add(country.official_name.upper())
|
|
172
|
-
for suffix in demonym_suffixes:
|
|
173
|
-
demonym = country.official_name.upper() + suffix
|
|
174
|
-
nationality_keywords.add(demonym.upper())
|
|
175
|
-
|
|
176
|
-
return nationality_keywords
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
def extract_names_exception(passport_text, only_last_name=False):
|
|
180
|
-
## REMOVE REDUNDANT WORDS AFFECTING ALGO
|
|
181
|
-
threshold = 50
|
|
182
|
-
passport_lines = passport_text.split("\n")
|
|
183
|
-
keyword = 'WHEN NEEDED'
|
|
184
|
-
for text in passport_lines:
|
|
185
|
-
similarity = fuzz.partial_ratio(text.lower(), keyword.lower())
|
|
186
|
-
if similarity >= threshold:
|
|
187
|
-
passport_text = passport_text.replace(text, "")
|
|
188
|
-
|
|
189
|
-
## Find 3 consecutive uppercase words as name and single uppercase word as last name
|
|
190
|
-
name_pattern = r'([A-Z]{3,} [A-Z]{3,} [A-Z]{1,}|[A-Z]{3,} [A-Z]{3,} [A-Z]{1,} [A-Z]{1,})\s+([A-Z-]+)'
|
|
191
|
-
|
|
192
|
-
match = re.search(name_pattern, passport_text)
|
|
193
|
-
|
|
194
|
-
name, last_name = '', ''
|
|
195
|
-
|
|
196
|
-
if match:
|
|
197
|
-
name = match.group(1).strip()
|
|
198
|
-
last_name = match.group(2).strip()
|
|
199
|
-
|
|
200
|
-
try:
|
|
201
|
-
if len(last_name) < 3:
|
|
202
|
-
escaped_keyword_start = re.escape(name)
|
|
203
|
-
escaped_keyword_end = re.escape('Exp')
|
|
204
|
-
|
|
205
|
-
subsequent_search_pattern = fr"(?<={escaped_keyword_start})(.*?)(?={escaped_keyword_end})"
|
|
206
|
-
intermediate_text = re.search(subsequent_search_pattern, passport_text, re.DOTALL)
|
|
207
|
-
|
|
208
|
-
if intermediate_text:
|
|
209
|
-
intermediate_text = intermediate_text.group(1)
|
|
210
|
-
all_caps_words = re.findall(r"\b[A-Z][A-Z ]+\b", intermediate_text)
|
|
211
|
-
|
|
212
|
-
if all_caps_words:
|
|
213
|
-
for word in all_caps_words:
|
|
214
|
-
if ' ' not in word:
|
|
215
|
-
last_name = word
|
|
216
|
-
break
|
|
217
|
-
except:
|
|
218
|
-
pass
|
|
219
|
-
|
|
220
|
-
return name, last_name
|
|
221
|
-
|
|
222
|
-
elif only_last_name:
|
|
223
|
-
escaped_keyword_start = re.escape('Name')
|
|
224
|
-
escaped_keyword_end = re.escape('Exp')
|
|
225
|
-
|
|
226
|
-
subsequent_search_pattern = fr"(?<={escaped_keyword_start})(.*?)(?={escaped_keyword_end})"
|
|
227
|
-
intermediate_text = re.search(subsequent_search_pattern, passport_text, re.DOTALL)
|
|
228
|
-
|
|
229
|
-
last_name = ''
|
|
230
|
-
if intermediate_text:
|
|
231
|
-
intermediate_text = intermediate_text.group(1)
|
|
232
|
-
all_caps_words = re.findall(r"\b[A-Z][A-Z ]+\b", intermediate_text)
|
|
233
|
-
|
|
234
|
-
nationality_keywords = load_nationality_keywords()
|
|
235
|
-
|
|
236
|
-
if all_caps_words:
|
|
237
|
-
for word in all_caps_words:
|
|
238
|
-
if ' ' not in word and word not in nationality_keywords:
|
|
239
|
-
last_name = word
|
|
240
|
-
break
|
|
241
|
-
|
|
242
|
-
return last_name
|
|
243
|
-
|
|
244
|
-
else:
|
|
245
|
-
return '', ''
|
|
246
|
-
|
|
247
|
-
def convert_to_mrz_date(date_str):
|
|
248
|
-
if date_str:
|
|
249
|
-
try:
|
|
250
|
-
month, day, year = date_str.split('/')
|
|
251
|
-
|
|
252
|
-
year_last_two_digits = year[-2:]
|
|
253
|
-
|
|
254
|
-
mrz_date = year_last_two_digits + month.zfill(2) + day.zfill(2)
|
|
255
|
-
|
|
256
|
-
return mrz_date
|
|
257
|
-
except:
|
|
258
|
-
return ''
|
|
259
|
-
else:
|
|
260
|
-
return ''
|
|
261
|
-
|
|
262
|
-
def find_nationality_in_text(text, nationality_keywords):
|
|
263
|
-
import re
|
|
264
|
-
for keyword in nationality_keywords:
|
|
265
|
-
if re.search(r'\b' + re.escape(keyword) + r'\b', text):
|
|
266
|
-
return keyword
|
|
267
|
-
return None
|
|
268
|
-
|
|
269
|
-
def iraq_passport_extraction(passport_text):
|
|
270
|
-
passport_details = {}
|
|
271
|
-
|
|
272
|
-
patterns = {
|
|
273
|
-
'passport_number': (r"([A-Da-d]\d{8}|[A-Da-d]\d{7})", lambda match: match.group(1) if match else ''),
|
|
274
|
-
'passport_number_mrz': (r"([A-Za-z]\d{8}|[A-Za-z]\d{7})", lambda match: match.group(1) if match else ''),
|
|
275
|
-
'dob_mrz': (r'(\d+)[MF]', lambda match: convert_dob(match.group(1)) if match else ''),
|
|
276
|
-
'expiry_date_mrz': (r'[MF](\d+)', lambda match: convert_expiry_date(match.group(1)) if match else ''),
|
|
277
|
-
'gender': (r'(\d)([A-Za-z])(\d)', lambda match: match.group(2) if match else '')
|
|
278
|
-
}
|
|
279
|
-
|
|
280
|
-
passport_text_clean = passport_text.replace(" ", "")
|
|
281
|
-
|
|
282
|
-
mrz1_pattern = r"P<{COUNTRY_CODE}[A-Z<]+<<[A-Z<]+<"
|
|
283
|
-
|
|
284
|
-
iso_nationalities = [country.alpha_3 for country in pycountry.countries]
|
|
285
|
-
|
|
286
|
-
name_dict = {}
|
|
287
|
-
for country_code in iso_nationalities:
|
|
288
|
-
current_pattern = mrz1_pattern.format(COUNTRY_CODE=country_code)
|
|
289
|
-
|
|
290
|
-
mrz1_match = re.search(current_pattern, passport_text_clean)
|
|
291
|
-
if mrz1_match:
|
|
292
|
-
mrz1 = mrz1_match.group(0)
|
|
293
|
-
|
|
294
|
-
extracted_text = mrz1.replace('P<','').replace(country_code,'').replace('<', ' ')
|
|
295
|
-
name_list = extracted_text.strip().split()
|
|
296
|
-
name = ' '.join(name_list[1:])
|
|
297
|
-
passport_surname = name_list[0]
|
|
298
|
-
|
|
299
|
-
if re.search(r'\bal\b', passport_surname.lower()):
|
|
300
|
-
passport_surname = '-'.join(name_list[0:2])
|
|
301
|
-
name = ' '.join(name_list[2:])
|
|
302
|
-
|
|
303
|
-
name_dict = {
|
|
304
|
-
'nationality': country_code,
|
|
305
|
-
'full_name': name,
|
|
306
|
-
'last_name': passport_surname
|
|
307
|
-
}
|
|
308
|
-
|
|
309
|
-
passport_details.update(name_dict)
|
|
310
|
-
|
|
311
|
-
break
|
|
312
|
-
else:
|
|
313
|
-
mrz1 = None
|
|
314
|
-
|
|
315
|
-
if not mrz1:
|
|
316
|
-
pattern = r"P[<\w@<]+<<[\w<]+<"
|
|
317
|
-
matches = re.findall(pattern, passport_text)
|
|
318
|
-
|
|
319
|
-
try:
|
|
320
|
-
mrz1 = matches[0]
|
|
321
|
-
except:
|
|
322
|
-
mrz1 = ''
|
|
323
|
-
|
|
324
|
-
if matches:
|
|
325
|
-
processed_matches = matches[0][5:]
|
|
326
|
-
|
|
327
|
-
extracted_text = processed_matches.replace('@', '').replace('<', ' ')
|
|
328
|
-
name_list = extracted_text.strip().split()
|
|
329
|
-
name = ' '.join(name_list[1:])
|
|
330
|
-
passport_surname = name_list[0]
|
|
331
|
-
if re.search(r'\bal\b', passport_surname.lower()) or re.search(r'\bl\b', passport_surname.lower()):
|
|
332
|
-
passport_surname = '-'.join(name_list[0:2])
|
|
333
|
-
name = ' '.join(name_list[2:])
|
|
334
|
-
|
|
335
|
-
name_dict = {
|
|
336
|
-
'full_name': name,
|
|
337
|
-
'last_name': passport_surname
|
|
338
|
-
}
|
|
339
|
-
|
|
340
|
-
passport_details.update(name_dict)
|
|
341
|
-
|
|
342
|
-
if not mrz1 or not name:
|
|
343
|
-
pattern = r"(P<([A-Z]{3})((?:[<A-Z]+)+)<<)"
|
|
344
|
-
matches = re.findall(pattern, passport_text)
|
|
345
|
-
|
|
346
|
-
if matches:
|
|
347
|
-
mrz1, raw_names = matches[0][0], matches[0][2]
|
|
348
|
-
processed_names = raw_names.replace('<', ' ').strip()
|
|
349
|
-
name_parts = processed_names.split()
|
|
350
|
-
|
|
351
|
-
if len(name_parts) > 1 and re.search(r'\b(al|el)\b', name_parts[1].lower()):
|
|
352
|
-
surname = ' '.join(name_parts[:2])
|
|
353
|
-
given_names = ' '.join(name_parts[2:])
|
|
354
|
-
else:
|
|
355
|
-
surname = name_parts[0]
|
|
356
|
-
given_names = ' '.join(name_parts[1:])
|
|
357
|
-
|
|
358
|
-
name_dict = {
|
|
359
|
-
'full_name': given_names,
|
|
360
|
-
'last_name': surname
|
|
361
|
-
}
|
|
362
|
-
|
|
363
|
-
passport_details.update(name_dict)
|
|
364
|
-
|
|
365
|
-
## HANDLE NAME GENERIC FOR VALIDATION
|
|
366
|
-
name_generic, passport_surname_generic = extract_names(passport_text)
|
|
367
|
-
if len(name_generic.split(" "))<3:
|
|
368
|
-
name_generic_temp, passport_surname_generic_temp = extract_names_exception(passport_text)
|
|
369
|
-
if name_generic_temp:
|
|
370
|
-
name_generic, passport_surname_generic = name_generic_temp, passport_surname_generic_temp
|
|
371
|
-
if not passport_surname_generic:
|
|
372
|
-
passport_surname_generic = extract_names_exception(passport_text, only_last_name=True)
|
|
373
|
-
|
|
374
|
-
if not passport_surname_generic or passport_surname_generic=='IRAQI':
|
|
375
|
-
pattern = r"(([A-Z]{3})((?:[<A-Z]+)+)<<)"
|
|
376
|
-
matches = re.findall(pattern, passport_text.replace(' ', ''))
|
|
377
|
-
|
|
378
|
-
if matches:
|
|
379
|
-
surname = ''
|
|
380
|
-
mrz1, raw_names = matches[0][0], matches[0][2]
|
|
381
|
-
processed_names = raw_names.replace('<', ' ').strip()
|
|
382
|
-
name_parts = processed_names.split()
|
|
383
|
-
|
|
384
|
-
if len(name_parts) > 1 and re.search(r'\b(al|el)\b', name_parts[1].lower()):
|
|
385
|
-
surname = ' '.join(name_parts[:2])
|
|
386
|
-
given_names = ' '.join(name_parts[2:])
|
|
387
|
-
else:
|
|
388
|
-
surname = name_parts[0]
|
|
389
|
-
given_names = ' '.join(name_parts[1:])
|
|
390
|
-
|
|
391
|
-
passport_surname_generic = surname
|
|
392
|
-
|
|
393
|
-
name_generic, passport_surname_generic = ''.join(filter(lambda x: x.isupper() or x == '-' or x == ' ', name_generic)), ''.join(filter(lambda x: x.isupper() or x == '-' or x == ' ', passport_surname_generic))
|
|
394
|
-
name_dict = {
|
|
395
|
-
'full_name_generic': name_generic,
|
|
396
|
-
'surname_generic': passport_surname_generic
|
|
397
|
-
}
|
|
398
|
-
|
|
399
|
-
passport_details.update(name_dict)
|
|
400
|
-
|
|
401
|
-
mrz2_pattern = r"\n[A-Z]\d+.*?(?=[<]{2,})"
|
|
402
|
-
mrz2_matches = re.findall(mrz2_pattern, passport_text_clean)
|
|
403
|
-
|
|
404
|
-
if mrz2_matches:
|
|
405
|
-
mrz2 = mrz2_matches[0][1:]
|
|
406
|
-
else:
|
|
407
|
-
mrz2 = ''
|
|
408
|
-
|
|
409
|
-
## EXTRACTING FIELDS FROM MRZ2
|
|
410
|
-
mrz2_keys = ['gender', 'passport_number_mrz', 'dob_mrz', 'expiry_date_mrz']
|
|
411
|
-
|
|
412
|
-
for key, value in patterns.items():
|
|
413
|
-
pattern = value[0]
|
|
414
|
-
transform_func = value[1]
|
|
415
|
-
|
|
416
|
-
text = passport_text
|
|
417
|
-
if key in mrz2_keys:
|
|
418
|
-
text = mrz2
|
|
419
|
-
|
|
420
|
-
match = re.search(pattern, text)
|
|
421
|
-
passport_details[key] = transform_func(match) if match else ''
|
|
422
|
-
|
|
423
|
-
if passport_details['passport_number_mrz'] and (passport_details['passport_number_mrz']!=passport_details['passport_number']):
|
|
424
|
-
passport_details['passport_number'] = passport_details['passport_number_mrz']
|
|
425
|
-
|
|
426
|
-
## HANDLE PASSPORT NO FROM MRZ
|
|
427
|
-
if not passport_details.get('passport_number_mrz'):
|
|
428
|
-
passport_number_pattern = r"([A-Za-z]\d{8,}[A-Za-z]{2,}.*?|[A-Za-z]*\d{8,}[A-Za-z]{2,}.*?)"
|
|
429
|
-
passport_number_match = re.search(passport_number_pattern, passport_text_clean)
|
|
430
|
-
if passport_number_match:
|
|
431
|
-
passport_number = passport_number_match.group(1)
|
|
432
|
-
passport_details['passport_number_mrz'] = passport_number[:9]
|
|
433
|
-
|
|
434
|
-
## HANDLE DOB DOE FROM MRZ
|
|
435
|
-
if not (passport_details.get('dob_mrz') or passport_details.get('expiry_date_mrz')):
|
|
436
|
-
dob_pattern = r"(\d{7})[MF]"
|
|
437
|
-
dob_match = re.search(dob_pattern, passport_text_clean)
|
|
438
|
-
if dob_match:
|
|
439
|
-
dob = dob_match.group(1)
|
|
440
|
-
passport_details['dob_mrz'] = convert_dob(dob)
|
|
441
|
-
else:
|
|
442
|
-
dob_pattern = r'.*?[\S]R[\S](\d{9,})\b'
|
|
443
|
-
dob_match = re.search(dob_pattern, passport_text_clean)
|
|
444
|
-
if dob_match:
|
|
445
|
-
dob = dob_match.group(1)[:7]
|
|
446
|
-
passport_details['dob_mrz'] = validate_date(convert_dob(dob))
|
|
447
|
-
|
|
448
|
-
doe_pattern = r"[MF](\d+)"
|
|
449
|
-
doe_match = re.search(doe_pattern, passport_text_clean)
|
|
450
|
-
if doe_match:
|
|
451
|
-
expiry = doe_match.group(1)
|
|
452
|
-
passport_details['expiry_date_mrz'] = validate_date(convert_expiry_date(expiry))
|
|
453
|
-
else:
|
|
454
|
-
doe_pattern = r'.*?[\S]R[\S](\d{9,})\b'
|
|
455
|
-
doe_match = re.search(doe_pattern, passport_text_clean)
|
|
456
|
-
if doe_match:
|
|
457
|
-
expiry = doe_match.group(1)[8:]
|
|
458
|
-
passport_details['expiry_date_mrz'] = validate_date(convert_expiry_date(expiry))
|
|
459
|
-
|
|
460
|
-
## HANDLE DOB AND DOE CASES FROM GENERIC DATA FOR VALIDATION
|
|
461
|
-
dob = ''
|
|
462
|
-
expiry = ''
|
|
463
|
-
issue_date = ''
|
|
464
|
-
|
|
465
|
-
try:
|
|
466
|
-
matches = re.findall(r'\d{4}/\d{2}/\d{2}', passport_text)
|
|
467
|
-
date_objects = [datetime.strptime(date, '%d/%m/%Y') for date in matches]
|
|
468
|
-
sorted_dates = sorted(set(date_objects))
|
|
469
|
-
sorted_date_strings = [date.strftime('%d/%m/%Y') for date in sorted_dates]
|
|
470
|
-
|
|
471
|
-
# print(f"DATES: {sorted_date_strings}")
|
|
472
|
-
dob = sorted_date_strings[0]
|
|
473
|
-
issue_date = sorted_date_strings[1]
|
|
474
|
-
expiry = sorted_date_strings[-1]
|
|
475
|
-
except:
|
|
476
|
-
matches = re.findall(r'\b\d{2}[./]\d{2}[./]\d{4}\b', passport_text)
|
|
477
|
-
# try:
|
|
478
|
-
date_objects = [datetime.strptime(date.replace('.', '/'), '%d/%m/%Y') for date in matches]
|
|
479
|
-
sorted_dates = sorted(set(date_objects))
|
|
480
|
-
sorted_date_strings = [date.strftime('%d/%m/%Y') for date in sorted_dates]
|
|
481
|
-
|
|
482
|
-
# print(f"DATES 2: {sorted_date_strings}")
|
|
483
|
-
if len(sorted_date_strings)>1:
|
|
484
|
-
dob = sorted_date_strings[0]
|
|
485
|
-
issue_date = sorted_date_strings[1]
|
|
486
|
-
expiry = sorted_date_strings[-1]
|
|
487
|
-
else:
|
|
488
|
-
# try:
|
|
489
|
-
matches = re.findall(r'\d{4}-\d{2}-\d{2}', passport_text)
|
|
490
|
-
date_objects = [datetime.strptime(date, '%Y-%m-%d') for date in matches]
|
|
491
|
-
sorted_dates = sorted(set(date_objects))
|
|
492
|
-
sorted_date_strings = [date.strftime('%Y-%m-%d') for date in sorted_dates]
|
|
493
|
-
|
|
494
|
-
# print(f"DATES 3: {sorted_date_strings}")
|
|
495
|
-
if len(sorted_date_strings)>1:
|
|
496
|
-
dob = sorted_date_strings[0].replace('-', '/')
|
|
497
|
-
issue_date = sorted_date_strings[1].replace('-', '/')
|
|
498
|
-
expiry = sorted_date_strings[-1].replace('-', '/')
|
|
499
|
-
|
|
500
|
-
else:
|
|
501
|
-
matches = re.findall(r'\d{2}-\d{2}-\d{4}', passport_text)
|
|
502
|
-
date_objects = [datetime.strptime(date, '%d-%m-%Y') for date in matches]
|
|
503
|
-
sorted_dates = sorted(set(date_objects))
|
|
504
|
-
sorted_date_strings = [date.strftime('%d-%m-%Y') for date in sorted_dates]
|
|
505
|
-
|
|
506
|
-
if sorted_date_strings:
|
|
507
|
-
dob = sorted_date_strings[0].replace('-', '/')
|
|
508
|
-
issue_date = sorted_date_strings[1].replace('-', '/')
|
|
509
|
-
expiry = sorted_date_strings[-1].replace('-', '/')
|
|
510
|
-
# except:
|
|
511
|
-
# dob, expiry = '', ''
|
|
512
|
-
# except:
|
|
513
|
-
# dob, expiry = '', ''
|
|
514
|
-
|
|
515
|
-
passport_details['passport_date_of_birth_generic'] = get_dates_to_generic_format(dob)
|
|
516
|
-
passport_details['passport_date_of_expiry_generic'] = get_dates_to_generic_format(expiry)
|
|
517
|
-
passport_details['issue_date'] = get_dates_to_generic_format(issue_date)
|
|
518
|
-
|
|
519
|
-
## HANDLE GENDER CASES EXCEPTIONS
|
|
520
|
-
if not (passport_details['gender']):
|
|
521
|
-
gender_pattern = r'(\d)([MFmf])(\d)'
|
|
522
|
-
gender_match = re.search(gender_pattern, passport_text_clean)
|
|
523
|
-
if gender_match:
|
|
524
|
-
passport_details['gender'] = gender_match.group(2)
|
|
525
|
-
else:
|
|
526
|
-
if re.search(r'ذكر', passport_text) or re.search(r'ذکر', passport_text):
|
|
527
|
-
passport_details['gender'] = 'M'
|
|
528
|
-
|
|
529
|
-
elif re.search(r'انثى', passport_text) or re.search(r'أنثى', passport_text):
|
|
530
|
-
passport_details['gender'] = 'F'
|
|
531
|
-
|
|
532
|
-
else:
|
|
533
|
-
if passport_details.get('full_name'):
|
|
534
|
-
first_name = passport_details['full_name'].split()[0].capitalize()
|
|
535
|
-
predicted_gender = identify_gender(first_name)
|
|
536
|
-
passport_details['gender'] = 'M' if predicted_gender.lower() == 'male' else 'F' if predicted_gender.lower() == 'female' and predicted_gender != 'unknown' else translated_gender_identifier(passport_text)
|
|
537
|
-
|
|
538
|
-
if not passport_details.get('nationality', ''):
|
|
539
|
-
nationality_keywords = load_nationality_keywords()
|
|
540
|
-
passport_text = passport_text.replace('REPUBLIC OF IRAQ', '').replace('IRQ', '')
|
|
541
|
-
nationality = find_nationality_in_text(passport_text, nationality_keywords)
|
|
542
|
-
|
|
543
|
-
passport_details['nationality'] = nationality
|
|
544
|
-
|
|
545
|
-
if len(mrz1) < 40:
|
|
546
|
-
mrz1_pattern = r'P<[A-Z]{3}[A-Z0-9<]{5,44}'
|
|
547
|
-
match = re.search(mrz1_pattern, passport_text.replace(" ", ""))
|
|
548
|
-
if match:
|
|
549
|
-
mrz1 = match.group(0)
|
|
550
|
-
|
|
551
|
-
if not mrz1:
|
|
552
|
-
mrz1 = ''
|
|
553
|
-
|
|
554
|
-
if mrz1 and len(mrz1) < 44:
|
|
555
|
-
mrz1 = mrz1 = f"{mrz1}{'<' * (44 - len(mrz1))}"
|
|
556
|
-
|
|
557
|
-
# print(passport_details)
|
|
558
|
-
if passport_details.get('nationality', ''):
|
|
559
|
-
if passport_details['nationality'] == 'IRAQI' or passport_details['nationality'] == 'IRAQ':
|
|
560
|
-
passport_details['nationality'] = 'IRQ'
|
|
561
|
-
|
|
562
|
-
if not mrz2:
|
|
563
|
-
try:
|
|
564
|
-
mrz2 = passport_details.get('passport_number', '') + passport_details.get('nationality', '') + convert_to_mrz_date(passport_details.get('dob_mrz', '')) + passport_details.get('gender', '') + convert_to_mrz_date(passport_details.get('expiry_date_mrz', ''))
|
|
565
|
-
except:
|
|
566
|
-
mrz2 = ''
|
|
567
|
-
|
|
568
|
-
if len(mrz2) >= 28 and len(mrz2) < 40:
|
|
569
|
-
mrz2 = mrz2 = f"{mrz2}{'<' * (44 - len(mrz2))}"
|
|
570
|
-
|
|
571
|
-
passport_details['mrz'] = mrz1 + mrz2
|
|
572
|
-
passport_details['mrz1'] = mrz1
|
|
573
|
-
passport_details['mrz2'] = mrz2
|
|
574
|
-
|
|
575
|
-
if "gender" in passport_details:
|
|
576
|
-
gender = passport_details["gender"].strip().upper()
|
|
577
|
-
if gender == "F":
|
|
578
|
-
passport_details["gender"] = "FEMALE"
|
|
579
|
-
elif gender == "M":
|
|
580
|
-
passport_details["gender"] = "MALE"
|
|
581
|
-
|
|
582
|
-
if 'gender' in passport_details:
|
|
583
|
-
passport_details["gender"] = passport_details["gender"].strip().upper()
|
|
584
|
-
|
|
585
|
-
|
|
586
|
-
# print(f"PASSPORT DETAILS HERE: {passport_details}")
|
|
587
|
-
|
|
588
|
-
return passport_details
|
idvpackage/lazy_imports.py
DELETED
|
@@ -1,44 +0,0 @@
|
|
|
1
|
-
"""Module to handle lazy loading of heavy dependencies"""
|
|
2
|
-
|
|
3
|
-
import psutil
|
|
4
|
-
import os
|
|
5
|
-
|
|
6
|
-
_deepface = None
|
|
7
|
-
_face_recognition = None
|
|
8
|
-
_pycountry = None
|
|
9
|
-
_translator = None
|
|
10
|
-
|
|
11
|
-
def log_memory_usage(operation):
|
|
12
|
-
process = psutil.Process(os.getpid())
|
|
13
|
-
memory_mb = process.memory_info().rss / 1024 / 1024
|
|
14
|
-
print(f"Memory usage after {operation}: {memory_mb:.2f} MB")
|
|
15
|
-
|
|
16
|
-
def get_deepface():
|
|
17
|
-
global _deepface
|
|
18
|
-
if _deepface is None:
|
|
19
|
-
log_memory_usage("before DeepFace import")
|
|
20
|
-
from deepface import DeepFace
|
|
21
|
-
_deepface = DeepFace
|
|
22
|
-
log_memory_usage("after DeepFace import")
|
|
23
|
-
return _deepface
|
|
24
|
-
|
|
25
|
-
def get_face_recognition():
|
|
26
|
-
global _face_recognition
|
|
27
|
-
if _face_recognition is None:
|
|
28
|
-
import face_recognition
|
|
29
|
-
_face_recognition = face_recognition
|
|
30
|
-
return _face_recognition
|
|
31
|
-
|
|
32
|
-
def get_pycountry():
|
|
33
|
-
global _pycountry
|
|
34
|
-
if _pycountry is None:
|
|
35
|
-
import pycountry
|
|
36
|
-
_pycountry = pycountry
|
|
37
|
-
return _pycountry
|
|
38
|
-
|
|
39
|
-
def get_translator():
|
|
40
|
-
global _translator
|
|
41
|
-
if _translator is None:
|
|
42
|
-
from googletrans import Translator
|
|
43
|
-
_translator = Translator()
|
|
44
|
-
return _translator
|