idvpackage 3.0.11__py3-none-any.whl → 3.0.12__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- idvpackage/common.py +4 -962
- idvpackage/iraq_id_extraction_withopenai.py +374 -893
- idvpackage/jor_passport_extraction.py +1 -6
- idvpackage/liveness_spoofing_v2.py +2 -45
- idvpackage/ocr.py +1011 -2427
- idvpackage/ocr_utils.py +144 -486
- idvpackage/pse_passport_extraction.py +18 -292
- idvpackage/qatar_id_extraction.py +4 -956
- idvpackage/sudan_passport_extraction.py +0 -928
- idvpackage/syr_passport_extraction.py +27 -402
- idvpackage/uae_id_extraction.py +87 -151
- {idvpackage-3.0.11.dist-info → idvpackage-3.0.12.dist-info}/METADATA +1 -1
- idvpackage-3.0.12.dist-info/RECORD +34 -0
- {idvpackage-3.0.11.dist-info → idvpackage-3.0.12.dist-info}/WHEEL +1 -1
- idvpackage/ekyc.py +0 -78
- idvpackage/genai_utils.py +0 -309
- idvpackage/iraq_id_extraction.py +0 -992
- idvpackage/iraq_passport_extraction.py +0 -588
- idvpackage/lazy_imports.py +0 -44
- idvpackage/lebanon_passport_extraction.py +0 -161
- idvpackage/sau_id_extraction.py +0 -248
- idvpackage/sudan_id_extraction.py +0 -764
- idvpackage-3.0.11.dist-info/RECORD +0 -42
- {idvpackage-3.0.11.dist-info → idvpackage-3.0.12.dist-info}/licenses/LICENSE +0 -0
- {idvpackage-3.0.11.dist-info → idvpackage-3.0.12.dist-info}/top_level.txt +0 -0
|
@@ -1,397 +1,4 @@
|
|
|
1
|
-
# import google.generativeai as genai
|
|
2
|
-
# import base64
|
|
3
|
-
# import json
|
|
4
|
-
# import re
|
|
5
|
-
# import io
|
|
6
|
-
# from PIL import Image
|
|
7
|
-
# from datetime import datetime
|
|
8
|
-
|
|
9
|
-
# def configure_genai(api_key):
|
|
10
|
-
# genai.configure(api_key=api_key)
|
|
11
|
-
# model = genai.GenerativeModel(model_name="gemini-2.0-flash-lite")
|
|
12
|
-
# return model
|
|
13
|
-
|
|
14
|
-
# def base64_to_image(base64_string):
|
|
15
|
-
# image_data = base64.b64decode(base64_string)
|
|
16
|
-
# image = Image.open(io.BytesIO(image_data))
|
|
17
|
-
# return image
|
|
18
|
-
|
|
19
|
-
# def crop_image_in_half(image, offset=90):
|
|
20
|
-
# width, height = image.size
|
|
21
|
-
# split_line = (height // 2) - offset # Make the first half smaller by 'offset' pixels
|
|
22
|
-
|
|
23
|
-
# first_half = image.crop((0, 0, width, split_line))
|
|
24
|
-
# second_half = image.crop((0, split_line, width, height))
|
|
25
|
-
|
|
26
|
-
# return first_half, second_half
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
# def is_valid_date(date_str):
|
|
30
|
-
# date_pattern = re.compile(r'^(\d{2}/\d{2}/\d{4}|\d{4}/\d{2}/\d{2}|\d{4}-\d{2}-\d{2})$')
|
|
31
|
-
# if date_str is None or date_pattern.match(date_str):
|
|
32
|
-
# return True
|
|
33
|
-
# else:
|
|
34
|
-
# return False
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
# def genai_image_second_half(image, model):
|
|
38
|
-
# result = model.generate_content(
|
|
39
|
-
# [image, "\n\n", "give me issue_number, name, surname, father name, mother name, date_of_birth, place_of_birth, nationality, gender(M/F), and both lines of the MRZ from provided photo, please give me output as just dictionary - issue_number, full_name, first_name, last_name, father_name, mother_name, dob, place_of_birth, nationality,gender, mrz1, mrz2. Note that mrz1 is the line that starts with P"]
|
|
40
|
-
# )
|
|
41
|
-
# return result.text
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
# def genai_vision_first_half(detected_text, model):
|
|
45
|
-
# result = model.generate_content(
|
|
46
|
-
# [detected_text,"\n\n", "Give me No from {detected_text}, output must be just dictionary - No"]
|
|
47
|
-
# )#If the prompt includes the word "passport," it is flagged as harmful content.
|
|
48
|
-
# return result.text
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
# def genai_vision_second_half(detected_text, model):
|
|
52
|
-
# result = model.generate_content(
|
|
53
|
-
# [detected_text,"\n\n", "give me issue_number, passport_number, name, surname, father name, mother name, date_of_birth, place_of_birth, nationality, gender(M/F), and both lines of the MRZ from {detected_text}, please give me output as just dictionary - issue_number, passport_number, full_name, first_name, last_name, father_name, mother_name, dob, place_of_birth, nationality, gender, mrz1, mrz2. Note that mrz1 is the line that starts with P and contains name"]
|
|
54
|
-
# )
|
|
55
|
-
# return result.text
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
# def genai_vision_mrz(detected_text, model):
|
|
59
|
-
# result = model.generate_content(
|
|
60
|
-
# [detected_text,"\n\n", "give me 'document_number', 'nationality', 'birth_date'(dd/mm/yyyy format), 'gender', 'expiration_date'(dd/mm/yyyy format) as dictionary from provided mrz. Dont write anything just return dictionary"]
|
|
61
|
-
# )
|
|
62
|
-
# return result.text
|
|
63
|
-
|
|
64
|
-
# def fix_dob(passport_text):
|
|
65
|
-
# dob = ''
|
|
66
|
-
# expiry = ''
|
|
67
|
-
# issue_date = ''
|
|
68
|
-
# try:
|
|
69
|
-
# matches = re.findall(r'\b\d{2}[\s/\-.]+\d{2}[\s/\-.]+\d{4}\b', passport_text, re.DOTALL)
|
|
70
|
-
# date_objects = [datetime.strptime(re.sub(r'[\s/\-.]+', ' ', date).strip(), '%d %m %Y') for date in matches]
|
|
71
|
-
# sorted_dates = sorted(date_objects)
|
|
72
|
-
# sorted_date_strings = [date.strftime('%d %m %Y') for date in sorted_dates]
|
|
73
|
-
|
|
74
|
-
# if len(sorted_date_strings) > 1:
|
|
75
|
-
# dob = sorted_date_strings[0]
|
|
76
|
-
# issue_date = sorted_date_strings[1]
|
|
77
|
-
# expiry = sorted_date_strings[-1]
|
|
78
|
-
# except:
|
|
79
|
-
# matches = re.findall(r'\b\d{2}[./]\d{2}[./]\d{4}\b', passport_text)
|
|
80
|
-
# date_objects = [datetime.strptime(date.replace('.', '/'), '%d/%m/%Y') for date in matches]
|
|
81
|
-
# sorted_dates = sorted(date_objects)
|
|
82
|
-
# sorted_date_strings = [date.strftime('%d/%m/%Y') for date in sorted_dates]
|
|
83
|
-
|
|
84
|
-
# if len(sorted_date_strings)>1:
|
|
85
|
-
# dob = sorted_date_strings[0]
|
|
86
|
-
# issue_date = sorted_date_strings[1]
|
|
87
|
-
# expiry = sorted_date_strings[-1]
|
|
88
|
-
# else:
|
|
89
|
-
# matches = re.findall(r'\d{4}-\d{2}-\d{2}', passport_text)
|
|
90
|
-
# date_objects = [datetime.strptime(date, '%Y-%m-%d') for date in matches]
|
|
91
|
-
# sorted_dates = sorted(date_objects)
|
|
92
|
-
# sorted_date_strings = [date.strftime('%Y-%m-%d') for date in sorted_dates]
|
|
93
|
-
|
|
94
|
-
# if len(sorted_date_strings)>1:
|
|
95
|
-
# dob = sorted_date_strings[0].replace('-', '/')
|
|
96
|
-
# issue_date = sorted_date_strings[1].replace('-', '/')
|
|
97
|
-
# expiry = sorted_date_strings[-1].replace('-', '/')
|
|
98
|
-
|
|
99
|
-
# else:
|
|
100
|
-
# matches = re.findall(r'\d{2}-\d{2}-\d{4}', passport_text)
|
|
101
|
-
# date_objects = [datetime.strptime(date, '%d-%m-%Y') for date in matches]
|
|
102
|
-
# sorted_dates = sorted(date_objects)
|
|
103
|
-
# sorted_date_strings = [date.strftime('%d-%m-%Y') for date in sorted_dates]
|
|
104
|
-
|
|
105
|
-
# if sorted_date_strings:
|
|
106
|
-
# dob = sorted_date_strings[0].replace('-', '/')
|
|
107
|
-
# issue_date = sorted_date_strings[1].replace('-', '/')
|
|
108
|
-
# expiry = sorted_date_strings[-1].replace('-', '/')
|
|
109
|
-
|
|
110
|
-
# print(f"\nDOB: {dob}, Issue Date: {issue_date}, Expiry: {expiry}\n")
|
|
111
|
-
# return dob, issue_date, expiry
|
|
112
|
-
|
|
113
|
-
# def mrz_data(merged_dict, model):
|
|
114
|
-
# # try:
|
|
115
|
-
# input_mrz_2 = merged_dict['mrz2']
|
|
116
|
-
# match = re.match(
|
|
117
|
-
# r"(\d{10})([A-Z]{3})(\d{6})(\d)([MF])(\d{6})(\d)",
|
|
118
|
-
# input_mrz_2
|
|
119
|
-
# )
|
|
120
|
-
|
|
121
|
-
# if match:
|
|
122
|
-
# birth_date_raw = match.group(3)
|
|
123
|
-
# expiration_date_raw = match.group(6)
|
|
124
|
-
|
|
125
|
-
# birth_year_prefix = '19' if int(birth_date_raw[:2]) > 23 else '20'
|
|
126
|
-
# birth_date = f"{birth_date_raw[4:]}/{birth_date_raw[2:4]}/{birth_year_prefix}{birth_date_raw[:2]}"
|
|
127
|
-
|
|
128
|
-
# exp_year_prefix = '19' if int(expiration_date_raw[:2]) > 50 else '20'
|
|
129
|
-
# expiration_date = f"{expiration_date_raw[4:]}/{expiration_date_raw[2:4]}/{exp_year_prefix}{expiration_date_raw[:2]}"
|
|
130
|
-
|
|
131
|
-
# result_dict = {
|
|
132
|
-
# 'passport_number': match.group(1),
|
|
133
|
-
# 'nationality': match.group(2),
|
|
134
|
-
# 'dob': birth_date,
|
|
135
|
-
# 'gender': match.group(5),
|
|
136
|
-
# 'expiry_date': expiration_date
|
|
137
|
-
# }
|
|
138
|
-
# print(f"\nResult_dict from MRZ: {result_dict}\n")
|
|
139
|
-
# else:
|
|
140
|
-
# mrz_json = genai_vision_mrz(input_mrz_2, model)
|
|
141
|
-
# json_str = mrz_json.replace('```json', '').replace('```', '').strip()
|
|
142
|
-
# json_str = json_str.replace('null', 'None')
|
|
143
|
-
# result_dict = eval(json_str)
|
|
144
|
-
|
|
145
|
-
# result_dict_name = {}
|
|
146
|
-
# input_mrz_1 = merged_dict['mrz1']
|
|
147
|
-
# match = re.match(r"P[<N]SYR([A-Z<]+)<<*([A-Z]+)<<*", input_mrz_1)
|
|
148
|
-
# if match:
|
|
149
|
-
# result_dict_name = {
|
|
150
|
-
# 'last_name': match.group(1),
|
|
151
|
-
# 'first_name': match.group(2)
|
|
152
|
-
# }
|
|
153
|
-
# result_dict_name['last_name'] = result_dict_name['last_name'].replace('<', ' ').strip()
|
|
154
|
-
# result_dict_name['first_name'] = result_dict_name['first_name'].replace('<', ' ').strip()
|
|
155
|
-
|
|
156
|
-
# else:
|
|
157
|
-
# match = re.match(r"PNSYR\s*([A-Za-z]+)(?:<+([A-Za-z]+))?<<*", input_mrz_1)
|
|
158
|
-
# if match:
|
|
159
|
-
# try:
|
|
160
|
-
# result_dict_name = {
|
|
161
|
-
# 'last_name': match.group(1),
|
|
162
|
-
# 'first_name': match.group(2)
|
|
163
|
-
# }
|
|
164
|
-
# result_dict_name['last_name'] = result_dict_name['last_name'].replace('<', ' ').strip()
|
|
165
|
-
# result_dict_name['first_name'] = result_dict_name['first_name'].replace('<', ' ').strip()
|
|
166
|
-
# except Exception as e:
|
|
167
|
-
# result_dict_name = {}
|
|
168
|
-
# print(f"Error: {e}")
|
|
169
|
-
|
|
170
|
-
# # Merge the name data and other MRZ data into dict_gemini
|
|
171
|
-
# merged_dict_mrz = {**merged_dict, **result_dict_name, **result_dict}
|
|
172
|
-
|
|
173
|
-
# # except Exception as e:
|
|
174
|
-
# # print(f"Error: {e}")
|
|
175
|
-
|
|
176
|
-
# return merged_dict_mrz
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
# def fill_with_mrz(dict_gemini, mrz_dict_final):
|
|
180
|
-
# fields_to_fill = ['last_name', 'first_name', 'nationality', 'dob', 'gender']
|
|
181
|
-
# for field in fields_to_fill:
|
|
182
|
-
# if not dict_gemini.get(field, ''):
|
|
183
|
-
# dict_gemini[field] = mrz_dict_final.get(field, '')
|
|
184
|
-
# return dict_gemini
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
# def extract_passport_number(input_string):
|
|
188
|
-
# pattern = r'^(\d+)(?=[A-Z]{3})'
|
|
189
|
-
|
|
190
|
-
# match = re.search(pattern, input_string)
|
|
191
|
-
|
|
192
|
-
# if match:
|
|
193
|
-
# return match.group(1)
|
|
194
|
-
# else:
|
|
195
|
-
# return None
|
|
196
|
-
|
|
197
|
-
# def syr_passport_extraction_front(passport_text_first, api_key):
|
|
198
|
-
# model = configure_genai(api_key)
|
|
199
|
-
# try:
|
|
200
|
-
# ## Process first half of the image
|
|
201
|
-
# passport_first_ai_result = genai_vision_second_half(passport_text_first, model)
|
|
202
|
-
# json_match = re.search(r'```(json|python|plaintext)?\s*(.*?)\s*```', passport_first_ai_result, re.DOTALL)
|
|
203
|
-
# if json_match:
|
|
204
|
-
# json_str = json_match.group(2)
|
|
205
|
-
# dictionary_first_half = json.loads(json_str)
|
|
206
|
-
|
|
207
|
-
# else:
|
|
208
|
-
# json_str = passport_first_ai_result.replace('```json', '').replace('```', '').strip()
|
|
209
|
-
# json_str = json_str.replace('null', 'None')
|
|
210
|
-
# dictionary_first_half= eval(json_str)
|
|
211
|
-
|
|
212
|
-
# if dictionary_first_half.get('nationality', ''):
|
|
213
|
-
# if dictionary_first_half['nationality'].lower().startswith('syria'):
|
|
214
|
-
# dictionary_first_half['nationality'] = 'SYR'
|
|
215
|
-
|
|
216
|
-
# except Exception as e:
|
|
217
|
-
# print(f"Error occured in GenAI first half {e}")
|
|
218
|
-
|
|
219
|
-
# if dictionary_first_half and dictionary_first_half.get('passport_number', ''):
|
|
220
|
-
# passport_number = dictionary_first_half.pop('passport_number')
|
|
221
|
-
# passport_number = re.sub(r'\D', '', passport_number)
|
|
222
|
-
|
|
223
|
-
# dictionary_first_half['passport_number'] = passport_number
|
|
224
|
-
|
|
225
|
-
# merged_dict = {**dictionary_first_half}
|
|
226
|
-
|
|
227
|
-
# if merged_dict and merged_dict.get('birth_date', ''):
|
|
228
|
-
# merged_dict['dob'] = merged_dict.pop('birth_date')
|
|
229
|
-
|
|
230
|
-
# if merged_dict and merged_dict.get('birth_place', ''):
|
|
231
|
-
# merged_dict['place_of_birth'] = merged_dict.pop('birth_place')
|
|
232
|
-
|
|
233
|
-
# if merged_dict and (
|
|
234
|
-
# not merged_dict.get('dob') or
|
|
235
|
-
# not merged_dict.get('full_name') or
|
|
236
|
-
# not merged_dict.get('nationality') or
|
|
237
|
-
# not merged_dict.get('first_name') or
|
|
238
|
-
# not merged_dict.get('last_name')
|
|
239
|
-
# ):
|
|
240
|
-
# mrz_dict_final = mrz_data(merged_dict, model)
|
|
241
|
-
# merged_dict = fill_with_mrz(merged_dict, mrz_dict_final)
|
|
242
|
-
|
|
243
|
-
# passport_text = passport_text_first
|
|
244
|
-
# if merged_dict and not merged_dict.get('dob', ''):
|
|
245
|
-
# dob, issue_date, expiry = fix_dob(passport_text)
|
|
246
|
-
# merged_dict['dob'] = dob
|
|
247
|
-
|
|
248
|
-
# if not merged_dict.get('full_name', ''):
|
|
249
|
-
# merged_dict['full_name'] = f"{merged_dict.get('first_name', '')} {merged_dict.get('last_name', '')}"
|
|
250
|
-
|
|
251
|
-
# if not merged_dict.get('passport_number', ''):
|
|
252
|
-
# passport_number = extract_passport_number(merged_dict.get('mrz2', ''))
|
|
253
|
-
# merged_dict['passport_number'] = passport_number
|
|
254
|
-
|
|
255
|
-
# if merged_dict.get('passport_number', ''):
|
|
256
|
-
# passport_number = merged_dict['passport_number']
|
|
257
|
-
# if len(passport_number) < 9:
|
|
258
|
-
# passport_number = f"0{passport_number}"
|
|
259
|
-
# merged_dict['passport_number'] = passport_number
|
|
260
|
-
|
|
261
|
-
# if merged_dict.get('passport_number', ''):
|
|
262
|
-
# merged_dict['id_number'] = merged_dict['passport_number']
|
|
263
|
-
|
|
264
|
-
# if not merged_dict.get('mrz', ''):
|
|
265
|
-
# mrz1 = merged_dict.get('mrz1', '')
|
|
266
|
-
# mrz2 = merged_dict.get('mrz2', '')
|
|
267
|
-
# if mrz1 and mrz2:
|
|
268
|
-
# merged_dict['mrz'] = f"{mrz1} {mrz2}"
|
|
269
|
-
|
|
270
|
-
# if "gender" in merged_dict:
|
|
271
|
-
# gender = merged_dict["gender"].strip().upper()
|
|
272
|
-
# if gender == "F":
|
|
273
|
-
# merged_dict["gender"] = "FEMALE"
|
|
274
|
-
# elif gender == "M":
|
|
275
|
-
# merged_dict["gender"] = "MALE"
|
|
276
|
-
|
|
277
|
-
# if 'gender' in merged_dict:
|
|
278
|
-
# merged_dict["gender"] = merged_dict["gender"].strip().upper()
|
|
279
|
-
|
|
280
|
-
# if merged_dict.get('nationality', ''):
|
|
281
|
-
# nationality = merged_dict.get('nationality', '')
|
|
282
|
-
# if nationality and len(nationality.split(' ')) > 1:
|
|
283
|
-
# merged_dict['nationality'] = 'SYR'
|
|
284
|
-
|
|
285
|
-
# if not merged_dict.get('nationality', ''):
|
|
286
|
-
# merged_dict['nationality'] = 'SYR'
|
|
287
|
-
|
|
288
|
-
# merged_dict['issuing_country'] = 'SYR'
|
|
289
1
|
|
|
290
|
-
# return merged_dict
|
|
291
|
-
|
|
292
|
-
# def genai_vision_back(detected_text, model):
|
|
293
|
-
# result = model.generate_content(
|
|
294
|
-
# [detected_text,"\n\n", "give me date of issue(in dd/mm/yy format), expiry date (in dd/mm/yy format), place of issue, and national number from {detected_text}, please give me output as just dictionary - issuing_date, expiry_date, place_of_issue, national_number"]
|
|
295
|
-
# )
|
|
296
|
-
# return result.text
|
|
297
|
-
|
|
298
|
-
# def find_issue_date_and_expiry(passport_text_back):
|
|
299
|
-
# date_pattern = re.compile(r'\b\d{2}/\d{2}/\d{4}\b')
|
|
300
|
-
# matches = date_pattern.findall(passport_text_back)
|
|
301
|
-
|
|
302
|
-
# if not matches:
|
|
303
|
-
# return None, None
|
|
304
|
-
|
|
305
|
-
# date_objects = [datetime.strptime(date, '%d/%m/%Y') for date in matches]
|
|
306
|
-
# sorted_dates = sorted(date_objects)
|
|
307
|
-
|
|
308
|
-
# issuing_date = sorted_dates[0].strftime('%d/%m/%Y')
|
|
309
|
-
# expiry_date = sorted_dates[-1].strftime('%d/%m/%Y')
|
|
310
|
-
|
|
311
|
-
# return issuing_date, expiry_date
|
|
312
|
-
|
|
313
|
-
# def extract_national_number(passport_text_back):
|
|
314
|
-
# national_number_pattern = re.compile(r'\b\d{3}-\d{8}\b')
|
|
315
|
-
# match = national_number_pattern.search(passport_text_back)
|
|
316
|
-
|
|
317
|
-
# if match:
|
|
318
|
-
# return match.group(0)
|
|
319
|
-
# else:
|
|
320
|
-
# return None
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
# def syr_passport_extraction_back(passport_text_back, api_key):
|
|
324
|
-
# model = configure_genai(api_key)
|
|
325
|
-
# place_of_issue = ''
|
|
326
|
-
# result_ai = genai_vision_back(passport_text_back, model)
|
|
327
|
-
# try:
|
|
328
|
-
# json_str = result_ai.replace('```json', '').replace('```', '').strip()
|
|
329
|
-
# json_str = json_str.replace('null', 'None')
|
|
330
|
-
# try:
|
|
331
|
-
# passport_back_data = eval(json_str)
|
|
332
|
-
# issue_date = passport_back_data.get('issuing_date', '')
|
|
333
|
-
# expiry_date = passport_back_data.get('expiry_date', '')
|
|
334
|
-
# # Validate date format
|
|
335
|
-
# if not is_valid_date(issue_date) or not is_valid_date(expiry_date):
|
|
336
|
-
# raise ValueError("Invalid date format")
|
|
337
|
-
|
|
338
|
-
# except Exception as e:
|
|
339
|
-
# print(f"Error in parsing or validating dates: {e}")
|
|
340
|
-
# passport_back_data = {'issuing_date': '', 'expiry_date': '', 'national_number': '', 'place_of_issue': ''}
|
|
341
|
-
# try:
|
|
342
|
-
# issue_date, expiry = find_issue_date_and_expiry(passport_text_back)
|
|
343
|
-
# if issue_date and expiry:
|
|
344
|
-
# passport_back_data = {
|
|
345
|
-
# 'issuing_date': issue_date,
|
|
346
|
-
# 'expiry_date': expiry
|
|
347
|
-
# }
|
|
348
|
-
|
|
349
|
-
# national_number = extract_national_number(passport_text_back)
|
|
350
|
-
# if national_number:
|
|
351
|
-
# passport_back_data['national_number'] = national_number
|
|
352
|
-
|
|
353
|
-
# except Exception as e:
|
|
354
|
-
# print(f"Error occurred in finding dates: {e}")
|
|
355
|
-
# passport_back_data = {}
|
|
356
|
-
|
|
357
|
-
# except Exception as e:
|
|
358
|
-
# print(f"Error occured in GenAI back {e}")
|
|
359
|
-
# passport_back_data = {}
|
|
360
|
-
# try:
|
|
361
|
-
# issue_date, expiry = find_issue_date_and_expiry(passport_text_back)
|
|
362
|
-
# if issue_date and expiry:
|
|
363
|
-
# passport_back_data = {
|
|
364
|
-
# 'issuing_date': issue_date,
|
|
365
|
-
# 'expiry_date': expiry
|
|
366
|
-
# }
|
|
367
|
-
|
|
368
|
-
# national_number = extract_national_number(passport_text_back)
|
|
369
|
-
# if national_number:
|
|
370
|
-
# passport_back_data['national_number'] = national_number
|
|
371
|
-
|
|
372
|
-
# except Exception as e:
|
|
373
|
-
# print(f"Error occured in finding dates {e}")
|
|
374
|
-
# passport_back_data = {}
|
|
375
|
-
|
|
376
|
-
# if not passport_back_data.get('place_of_issue', ''):
|
|
377
|
-
# json_match = re.search(r'```(json|python|plaintext)?\s*(.*?)\s*```', result_ai, re.DOTALL)
|
|
378
|
-
# if json_match:
|
|
379
|
-
# json_str = json_match.group(2)
|
|
380
|
-
# dictionary_second_half = json.loads(json_str)
|
|
381
|
-
# place_of_issue = dictionary_second_half.get('place_of_issue', '')
|
|
382
|
-
# passport_back_data['place_of_issue'] = place_of_issue
|
|
383
|
-
|
|
384
|
-
# else:
|
|
385
|
-
# json_str = result_ai.replace('```json', '').replace('```', '').strip()
|
|
386
|
-
# json_str = json_str.replace('null', 'None')
|
|
387
|
-
# dictionary_second_half= eval(json_str)
|
|
388
|
-
|
|
389
|
-
# place_of_issue = dictionary_second_half.get('place_of_issue', '')
|
|
390
|
-
# passport_back_data['place_of_issue'] = place_of_issue
|
|
391
|
-
# else:
|
|
392
|
-
# passport_back_data['place_of_issue'] = ''
|
|
393
|
-
|
|
394
|
-
# return passport_back_data
|
|
395
2
|
|
|
396
3
|
|
|
397
4
|
import base64
|
|
@@ -422,6 +29,7 @@ Return a JSON object with the following fields (use the exact field names):
|
|
|
422
29
|
- father_name: Father's Name (extract exactly as written on the card)
|
|
423
30
|
- mother_name: Mother's Name (extract exactly as written on the card)
|
|
424
31
|
- dob: Date of birth exactly as shown on the card (preserve original format)
|
|
32
|
+
- passport_number: Passport number as printed beside N° or No (exactly 9 characters) above side of the photo
|
|
425
33
|
- place_of_birth: Place of birth in English (extract exactly as written on the card)
|
|
426
34
|
- mrz1: First line of the MRZ, exactly 44 characters, pad with '<' at the end if shorter
|
|
427
35
|
- mrz2: Second line of the MRZ, exactly 44 characters
|
|
@@ -429,6 +37,10 @@ Return a JSON object with the following fields (use the exact field names):
|
|
|
429
37
|
- header_verified: True if both 'SYRIAN ARAB REPUBLIC' and 'PASSPORT' are clearly visible, else False
|
|
430
38
|
- country_code: Country code as printed below the text 'country code' (extract exactly as written)
|
|
431
39
|
- issue_number: Issue number as printed on the card as 'Issue no' (extract exactly as written)
|
|
40
|
+
- passport_number_mrz: Passport number as extracted from MRZ present in starting of mrz line 1
|
|
41
|
+
- dob_mrz: Date of birth as extracted from MRZ in DD/MM/YYYY format
|
|
42
|
+
- gender_mrz: Gender as extracted from MRZ (M or F) if M return MALE else if F return FEMALE
|
|
43
|
+
- expiry_date_mrz: Expiry date as extracted from MRZ line 2 in DD/MM/YYYY format
|
|
432
44
|
|
|
433
45
|
Instructions:
|
|
434
46
|
- Do NOT guess or hallucinate any values. If unclear, return null.
|
|
@@ -476,6 +88,11 @@ class SyriaPassportFront(BaseModel):
|
|
|
476
88
|
...,
|
|
477
89
|
description="The date of birth exactly as shown on the card (preserve original format)",
|
|
478
90
|
)
|
|
91
|
+
|
|
92
|
+
passport_number : str = Field(
|
|
93
|
+
..., min_length=9, max_length=9,
|
|
94
|
+
description="Passport number as printed beside N° or No (exactly 9 characters) present above side of the photo",
|
|
95
|
+
)
|
|
479
96
|
|
|
480
97
|
place_of_birth: str = Field(
|
|
481
98
|
...,
|
|
@@ -506,20 +123,29 @@ class SyriaPassportFront(BaseModel):
|
|
|
506
123
|
|
|
507
124
|
issue_number: str = Field(
|
|
508
125
|
...,
|
|
509
|
-
description="Issue number as printed on the card as 'Issue no'
|
|
126
|
+
min_length=14, max_length=14, description="Issue number as printed on the card as 'Issue no' extract exactly as written on the card",
|
|
510
127
|
)
|
|
511
|
-
|
|
512
|
-
class SyriaPassportBack(BaseModel):
|
|
513
|
-
|
|
514
128
|
|
|
129
|
+
|
|
130
|
+
passport_number_mrz: str = Field(
|
|
131
|
+
...,min_length = 9, max_length = 9, description="Passport number as extracted from MRZ present in starting of mrz line 1"
|
|
132
|
+
)
|
|
133
|
+
dob_mrz: str = Field(
|
|
134
|
+
..., description="Date of birth as extracted from MRZ (in DD/MM/YYYY format)"
|
|
135
|
+
)
|
|
136
|
+
gender_mrz: str = Field(
|
|
137
|
+
..., description="Gender as extracted from MRZ (M or F) if M return MALE else if F return FEMALE"
|
|
138
|
+
)
|
|
139
|
+
expiry_date_mrz: str = Field(
|
|
140
|
+
..., description="Expiry date as extracted from MRZ line 2(in DD/MM/YYYY format)"
|
|
141
|
+
)
|
|
142
|
+
|
|
143
|
+
class SyriaPassportBack(BaseModel):
|
|
515
144
|
issue_date: str = Field(..., description="Issue date in DD/MM/YYYY format")
|
|
516
145
|
place_of_issue: str = Field(..., description="Place of issue as printed on the card")
|
|
517
146
|
expiry_date: str = Field(..., description="Expiry date in DD/MM/YYYY format")
|
|
518
|
-
|
|
519
147
|
national_number: str = Field(..., description="national number as printed on the card example: 123-12345678")
|
|
520
148
|
|
|
521
|
-
|
|
522
|
-
|
|
523
149
|
def process_image(side):
|
|
524
150
|
|
|
525
151
|
if side == "first" or side == "page1":
|
|
@@ -530,7 +156,6 @@ def process_image(side):
|
|
|
530
156
|
prompt = PROMPT_BACK
|
|
531
157
|
model = SyriaPassportBack
|
|
532
158
|
|
|
533
|
-
|
|
534
159
|
else:
|
|
535
160
|
raise ValueError("Invalid document side specified. Use 'front', 'back', or 'passport'.")
|
|
536
161
|
|
|
@@ -547,7 +172,7 @@ def get_openai_response(prompt: str, model_type, image: BytesIO, openai_key):
|
|
|
547
172
|
{"role": "system", "content": "You are an expert at extracting information from identity documents."},
|
|
548
173
|
{"role": "user", "content": [
|
|
549
174
|
{"type": "input_text", "text": prompt},
|
|
550
|
-
{"type": "input_image", "image_url": f"data:image/jpeg;base64,{b64_image}", "detail": "
|
|
175
|
+
{"type": "input_image", "image_url": f"data:image/jpeg;base64,{b64_image}", "detail": "high"},
|
|
551
176
|
]},
|
|
552
177
|
],
|
|
553
178
|
text_format=model_type,
|