idvpackage 3.0.11__py3-none-any.whl → 3.0.13__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- idvpackage/common.py +8 -966
- idvpackage/iraq_id_extraction_withopenai.py +374 -893
- idvpackage/jor_passport_extraction.py +1 -6
- idvpackage/liveness_spoofing_v2.py +2 -45
- idvpackage/ocr.py +1016 -2430
- idvpackage/ocr_utils.py +148 -489
- idvpackage/pse_passport_extraction.py +18 -292
- idvpackage/qatar_id_extraction.py +4 -956
- idvpackage/sudan_passport_extraction.py +0 -928
- idvpackage/syr_passport_extraction.py +27 -402
- idvpackage/uae_id_extraction.py +87 -151
- {idvpackage-3.0.11.dist-info → idvpackage-3.0.13.dist-info}/METADATA +1 -1
- idvpackage-3.0.13.dist-info/RECORD +34 -0
- {idvpackage-3.0.11.dist-info → idvpackage-3.0.13.dist-info}/WHEEL +1 -1
- idvpackage/ekyc.py +0 -78
- idvpackage/genai_utils.py +0 -309
- idvpackage/iraq_id_extraction.py +0 -992
- idvpackage/iraq_passport_extraction.py +0 -588
- idvpackage/lazy_imports.py +0 -44
- idvpackage/lebanon_passport_extraction.py +0 -161
- idvpackage/sau_id_extraction.py +0 -248
- idvpackage/sudan_id_extraction.py +0 -764
- idvpackage-3.0.11.dist-info/RECORD +0 -42
- {idvpackage-3.0.11.dist-info → idvpackage-3.0.13.dist-info}/licenses/LICENSE +0 -0
- {idvpackage-3.0.11.dist-info → idvpackage-3.0.13.dist-info}/top_level.txt +0 -0
idvpackage/genai_utils.py
DELETED
|
@@ -1,309 +0,0 @@
|
|
|
1
|
-
import re
|
|
2
|
-
from datetime import timedelta
|
|
3
|
-
from dateutil.parser import parse
|
|
4
|
-
from pydantic import BaseModel
|
|
5
|
-
from typing import Type
|
|
6
|
-
from datetime import datetime
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
def find_gender_from_back(text):
|
|
10
|
-
gender = ""
|
|
11
|
-
gender_pattern = r"(\d)([A-Za-z])(\d)"
|
|
12
|
-
gender_match = re.search(gender_pattern, text)
|
|
13
|
-
if gender_match:
|
|
14
|
-
gender = gender_match.group(2)
|
|
15
|
-
|
|
16
|
-
if not gender:
|
|
17
|
-
gender_pattern = r"(\d)([MFmf])(\d)"
|
|
18
|
-
gender_match = re.search(gender_pattern, text)
|
|
19
|
-
if gender_match:
|
|
20
|
-
gender = gender_match.group(2)
|
|
21
|
-
|
|
22
|
-
return gender
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
def is_valid_date(date_str):
|
|
26
|
-
"""Returns True if the string can be parsed as a valid date, regardless of format."""
|
|
27
|
-
try:
|
|
28
|
-
parse(date_str, fuzzy=False)
|
|
29
|
-
return True
|
|
30
|
-
except (ValueError, TypeError):
|
|
31
|
-
return False
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
def is_expiry_issue_diff_valid(issue_date_str, expiry_date_str, time_period):
|
|
35
|
-
"""Check if expiry date = issue date + 5 years - 1 day"""
|
|
36
|
-
if is_valid_date(issue_date_str) and is_valid_date(expiry_date_str):
|
|
37
|
-
issue_date = datetime.strptime(issue_date_str, "%Y/%m/%d")
|
|
38
|
-
expiry_date = datetime.strptime(expiry_date_str, "%Y/%m/%d")
|
|
39
|
-
expected_expiry = issue_date.replace(
|
|
40
|
-
year=issue_date.year + time_period
|
|
41
|
-
) - timedelta(days=1)
|
|
42
|
-
return expiry_date == expected_expiry
|
|
43
|
-
return False
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
def is_mrz_dob_mrz_field_match(dob_str, mrz_line2):
|
|
47
|
-
"""Check if DOB in MRZ matches the printed DOB"""
|
|
48
|
-
dob = datetime.strptime(dob_str, "%Y/%m/%d")
|
|
49
|
-
mrz_dob_raw = mrz_line2[:6] # First 6 characters (YYMMDD)
|
|
50
|
-
current_year_last2 = int(str(datetime.today().year)[-2:])
|
|
51
|
-
year_prefix = "19" if int(mrz_dob_raw[:2]) > current_year_last2 else "20"
|
|
52
|
-
mrz_dob = datetime.strptime(year_prefix + mrz_dob_raw, "%Y%m%d")
|
|
53
|
-
return mrz_dob == dob
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
def is_age_18_above(dob_str):
|
|
57
|
-
"""
|
|
58
|
-
Check if the person is 18 or older as of today
|
|
59
|
-
|
|
60
|
-
Parameters:
|
|
61
|
-
dob_str (str): Date of birth in 'YYYY-MM-DD', 'DD.MM.YYYY', 'YYYY/MM/DD', or 'DD/MM/YYYY' format.
|
|
62
|
-
|
|
63
|
-
Returns:
|
|
64
|
-
bool: True if the person is 18 or older, False otherwise.
|
|
65
|
-
"""
|
|
66
|
-
date_formats = ["%Y-%m-%d", "%d.%m.%Y", "%Y/%m/%d", "%d/%m/%Y"]
|
|
67
|
-
|
|
68
|
-
for fmt in date_formats:
|
|
69
|
-
try:
|
|
70
|
-
dob = datetime.strptime(dob_str, fmt)
|
|
71
|
-
today = datetime.today()
|
|
72
|
-
age = (
|
|
73
|
-
today.year
|
|
74
|
-
- dob.year
|
|
75
|
-
- ((today.month, today.day) < (dob.month, dob.day))
|
|
76
|
-
)
|
|
77
|
-
return age >= 18
|
|
78
|
-
except ValueError:
|
|
79
|
-
continue
|
|
80
|
-
|
|
81
|
-
return "invalid_format"
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
def is_age_less_than_100(dob_str):
|
|
85
|
-
"""
|
|
86
|
-
Check if the person is less than 100 years old as of today.
|
|
87
|
-
|
|
88
|
-
Parameters:
|
|
89
|
-
dob_str (str): Date of birth in 'YYYY-MM-DD', 'DD.MM.YYYY', 'YYYY/MM/DD', or 'DD/MM/YYYY' format.
|
|
90
|
-
|
|
91
|
-
Returns:
|
|
92
|
-
bool: True if the person is less than 100 years old, False otherwise.
|
|
93
|
-
"""
|
|
94
|
-
date_formats = ["%Y-%m-%d", "%d.%m.%Y", "%Y/%m/%d", "%d/%m/%Y"]
|
|
95
|
-
|
|
96
|
-
for fmt in date_formats:
|
|
97
|
-
try:
|
|
98
|
-
dob = datetime.strptime(dob_str, fmt)
|
|
99
|
-
today = datetime.today()
|
|
100
|
-
age = (
|
|
101
|
-
today.year
|
|
102
|
-
- dob.year
|
|
103
|
-
- ((today.month, today.day) < (dob.month, dob.day))
|
|
104
|
-
)
|
|
105
|
-
return age <= 100
|
|
106
|
-
except ValueError:
|
|
107
|
-
continue
|
|
108
|
-
|
|
109
|
-
return "invalid_format"
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
def is_expired_id(expiry_date):
|
|
113
|
-
"""
|
|
114
|
-
Checks if an ID is expired.
|
|
115
|
-
|
|
116
|
-
Parameters:
|
|
117
|
-
expiry_date (str): Expiry date in 'YYYY-MM-DD', 'DD.MM.YYYY', or 'YYYY/MM/DD' format.
|
|
118
|
-
|
|
119
|
-
Returns:
|
|
120
|
-
bool: True if the passport is expired, False otherwise.
|
|
121
|
-
"""
|
|
122
|
-
date_formats = ["%d-%m-%Y","%Y-%m-%d", "%d.%m.%Y", "%Y/%m/%d","%d/%m/%Y"]
|
|
123
|
-
|
|
124
|
-
for fmt in date_formats:
|
|
125
|
-
try:
|
|
126
|
-
expiry = datetime.strptime(expiry_date, fmt).date()
|
|
127
|
-
today = datetime.today().date()
|
|
128
|
-
return expiry < today
|
|
129
|
-
except ValueError:
|
|
130
|
-
continue
|
|
131
|
-
|
|
132
|
-
raise ValueError(
|
|
133
|
-
"Invalid date format. Expected 'YYYY-MM-DD', 'DD.MM.YYYY', or 'YYYY/MM/DD'."
|
|
134
|
-
)
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
def parse_yymmdd(yymmdd_str):
|
|
138
|
-
"""
|
|
139
|
-
Converts a 'YYMMDD' string to a 'YYYY-MM-DD' formatted string.
|
|
140
|
-
Assumes years < 50 are 2000s, otherwise 1900s.
|
|
141
|
-
|
|
142
|
-
Parameters:
|
|
143
|
-
yymmdd_str (str): A string in 'YYMMDD' format.
|
|
144
|
-
|
|
145
|
-
Returns:
|
|
146
|
-
str: A date string in 'YYYY-MM-DD' format.
|
|
147
|
-
"""
|
|
148
|
-
if len(yymmdd_str) != 6 or not yymmdd_str.isdigit():
|
|
149
|
-
raise ValueError("Invalid YYMMDD format")
|
|
150
|
-
|
|
151
|
-
try:
|
|
152
|
-
parsed_date = datetime.strptime(yymmdd_str, "%y%m%d")
|
|
153
|
-
if parsed_date.year < 1950:
|
|
154
|
-
parsed_date = parsed_date.replace(year=parsed_date.year + 100)
|
|
155
|
-
return parsed_date.strftime("%Y-%m-%d")
|
|
156
|
-
except ValueError:
|
|
157
|
-
raise ValueError(f"Could not parse YYMMDD string: {yymmdd_str}")
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
def convert_pydantic_to_openai_function2(model: Type[BaseModel]) -> dict:
|
|
161
|
-
"""
|
|
162
|
-
Convert a Pydantic model into OpenAI function calling format,
|
|
163
|
-
inferring the function name and description from the model.
|
|
164
|
-
|
|
165
|
-
- Function name is derived from the class name in snake_case.
|
|
166
|
-
- Description is taken from the class docstring.
|
|
167
|
-
|
|
168
|
-
Args:
|
|
169
|
-
model (BaseModel): The Pydantic model class.
|
|
170
|
-
|
|
171
|
-
Returns:
|
|
172
|
-
dict: A dictionary formatted for OpenAI function calling.
|
|
173
|
-
"""
|
|
174
|
-
import re
|
|
175
|
-
|
|
176
|
-
def camel_to_snake(name: str) -> str:
|
|
177
|
-
return re.sub(r"(?<!^)(?=[A-Z])", "_", name).lower()
|
|
178
|
-
|
|
179
|
-
return {
|
|
180
|
-
"name": camel_to_snake(model.__name__),
|
|
181
|
-
"description": model.__doc__ or "No description provided.",
|
|
182
|
-
"parameters": model.schema(),
|
|
183
|
-
}
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
def fix_family_number(ocr_output):
|
|
187
|
-
ocr_output = ocr_output.replace(" ", "")
|
|
188
|
-
|
|
189
|
-
if len(ocr_output) == 18:
|
|
190
|
-
return ocr_output
|
|
191
|
-
# Step 1: Remove all non-digit characters just in case
|
|
192
|
-
digits_only = "".join(filter(str.isdigit, ocr_output))
|
|
193
|
-
|
|
194
|
-
# Step 3: Insert 'L' at position 4 (index 4)
|
|
195
|
-
before_L = digits_only[:4]
|
|
196
|
-
after_L = digits_only[4:]
|
|
197
|
-
|
|
198
|
-
if after_L[0] != "0":
|
|
199
|
-
fixed = before_L + "E" + after_L
|
|
200
|
-
return fixed
|
|
201
|
-
|
|
202
|
-
fixed = before_L + "L" + after_L
|
|
203
|
-
|
|
204
|
-
if len(fixed) == 18:
|
|
205
|
-
return fixed
|
|
206
|
-
|
|
207
|
-
# Step 4: After 'L', find where the zeros end and non-zero digits begin
|
|
208
|
-
after_L_part = fixed[5:] # characters after 'L'
|
|
209
|
-
zero_count = 0
|
|
210
|
-
|
|
211
|
-
for ch in after_L_part:
|
|
212
|
-
if ch == "0":
|
|
213
|
-
zero_count += 1
|
|
214
|
-
else:
|
|
215
|
-
break # stop at the first non-zero digit
|
|
216
|
-
|
|
217
|
-
# Step 5: Insert 'M' just after the zeros (before the first non-zero digit)
|
|
218
|
-
insertion_index = 5 + zero_count # 5 = index right after L
|
|
219
|
-
fixed = fixed[:insertion_index] + "M" + fixed[insertion_index:]
|
|
220
|
-
|
|
221
|
-
return fixed
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
def convert_dob_to_standard(date_str):
|
|
225
|
-
input_formats = ["%Y-%m-%d", "%d.%m.%Y", "%Y/%m/%d", "%d/%m/%Y"]
|
|
226
|
-
|
|
227
|
-
for fmt in input_formats:
|
|
228
|
-
try:
|
|
229
|
-
parsed_date = datetime.strptime(date_str, fmt)
|
|
230
|
-
return parsed_date.strftime("%d/%m/%Y")
|
|
231
|
-
except ValueError:
|
|
232
|
-
continue
|
|
233
|
-
raise ValueError("Date format not recognized.")
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
def check_irq_back_mrz1_format(s):
|
|
237
|
-
placeholder = s.endswith("<<<") and all(c != "<" for c in s[:-3])
|
|
238
|
-
pattern = r"^IDIRQ([A-Z]{2}|[A-Z][0-9])[0-9]{20}<<<$"
|
|
239
|
-
if re.fullmatch(pattern, s) and len(s) == 30:
|
|
240
|
-
return s
|
|
241
|
-
else:
|
|
242
|
-
mrz1_stripped = s.strip("<")
|
|
243
|
-
if len(mrz1_stripped) == 27:
|
|
244
|
-
mrz1 = mrz1_stripped + "<<<"
|
|
245
|
-
if re.fullmatch(pattern, mrz1) and len(mrz1) == 30:
|
|
246
|
-
return mrz1
|
|
247
|
-
else:
|
|
248
|
-
raise False
|
|
249
|
-
|
|
250
|
-
return False
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
def check_irq_back_mrz3_format(s):
|
|
254
|
-
parts = [part for part in s.strip("<").split("<") if part]
|
|
255
|
-
|
|
256
|
-
if len(parts) == 2:
|
|
257
|
-
if re.fullmatch(r"[A-Z]+<<[A-Z]+<*", s) and len(s) == 30:
|
|
258
|
-
return s
|
|
259
|
-
else:
|
|
260
|
-
mrz3 = parts[0] + "<<" + parts[1]
|
|
261
|
-
mrz3 = mrz3.ljust(30, "<")
|
|
262
|
-
if re.fullmatch(r"[A-Z]+<<[A-Z]+<*", mrz3) and len(mrz3) == 30:
|
|
263
|
-
return mrz3
|
|
264
|
-
else:
|
|
265
|
-
return False
|
|
266
|
-
elif len(parts) == 1:
|
|
267
|
-
if re.fullmatch(r"<<[A-Z]+<*", s) and len(s) == 30:
|
|
268
|
-
return s
|
|
269
|
-
else:
|
|
270
|
-
mrz3 = "<<" + parts[0]
|
|
271
|
-
mrz3 = mrz3.ljust(30, "<")
|
|
272
|
-
if re.fullmatch(r"<<[A-Z]+<*", mrz3) and len(mrz3) == 30:
|
|
273
|
-
return mrz3
|
|
274
|
-
else:
|
|
275
|
-
return False
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
def check_irq_back_mrz2_format(s):
|
|
279
|
-
pattern = r"^\d{7}[MF]\d{7}[A-Z]{3}<{11}\d$"
|
|
280
|
-
if re.fullmatch(pattern, s) and len(s) == 30:
|
|
281
|
-
return s
|
|
282
|
-
if s[-1].isdigit():
|
|
283
|
-
# Manually add <<< and see if that fixes mrz line2.
|
|
284
|
-
head = s[:18] # 6 DOB + 1 + 1 Gender + 6 Expiry + 1 + 3 'IRQ' = 18
|
|
285
|
-
tail_digit = s[-1]
|
|
286
|
-
mrz2 = head + ("<" * 11) + tail_digit
|
|
287
|
-
if re.fullmatch(pattern, mrz2) and len(mrz2) == 30:
|
|
288
|
-
return mrz2
|
|
289
|
-
else:
|
|
290
|
-
return False
|
|
291
|
-
else:
|
|
292
|
-
return False
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
def normalise_dates_for_nfc(date_str):
|
|
296
|
-
"""
|
|
297
|
-
Convert date from 'YYYY/MM/DD' to 'DD/MM/YYYY' format for NFC usage.
|
|
298
|
-
|
|
299
|
-
Parameters:
|
|
300
|
-
date_str (str): Date in 'YYYY/MM/DD' format.
|
|
301
|
-
|
|
302
|
-
Returns:
|
|
303
|
-
str: Date in 'DD/MM/YYYY' format.
|
|
304
|
-
"""
|
|
305
|
-
try:
|
|
306
|
-
dt = datetime.strptime(date_str, "%Y/%m/%d")
|
|
307
|
-
return dt.strftime("%d/%m/%Y")
|
|
308
|
-
except Exception:
|
|
309
|
-
raise ValueError("Input date must be in 'YYYY/MM/DD' format.")
|