idvpackage 3.0.11__py3-none-any.whl → 3.0.13__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,397 +1,4 @@
1
- # import google.generativeai as genai
2
- # import base64
3
- # import json
4
- # import re
5
- # import io
6
- # from PIL import Image
7
- # from datetime import datetime
8
-
9
- # def configure_genai(api_key):
10
- # genai.configure(api_key=api_key)
11
- # model = genai.GenerativeModel(model_name="gemini-2.0-flash-lite")
12
- # return model
13
-
14
- # def base64_to_image(base64_string):
15
- # image_data = base64.b64decode(base64_string)
16
- # image = Image.open(io.BytesIO(image_data))
17
- # return image
18
-
19
- # def crop_image_in_half(image, offset=90):
20
- # width, height = image.size
21
- # split_line = (height // 2) - offset # Make the first half smaller by 'offset' pixels
22
-
23
- # first_half = image.crop((0, 0, width, split_line))
24
- # second_half = image.crop((0, split_line, width, height))
25
-
26
- # return first_half, second_half
27
-
28
-
29
- # def is_valid_date(date_str):
30
- # date_pattern = re.compile(r'^(\d{2}/\d{2}/\d{4}|\d{4}/\d{2}/\d{2}|\d{4}-\d{2}-\d{2})$')
31
- # if date_str is None or date_pattern.match(date_str):
32
- # return True
33
- # else:
34
- # return False
35
-
36
-
37
- # def genai_image_second_half(image, model):
38
- # result = model.generate_content(
39
- # [image, "\n\n", "give me issue_number, name, surname, father name, mother name, date_of_birth, place_of_birth, nationality, gender(M/F), and both lines of the MRZ from provided photo, please give me output as just dictionary - issue_number, full_name, first_name, last_name, father_name, mother_name, dob, place_of_birth, nationality,gender, mrz1, mrz2. Note that mrz1 is the line that starts with P"]
40
- # )
41
- # return result.text
42
-
43
-
44
- # def genai_vision_first_half(detected_text, model):
45
- # result = model.generate_content(
46
- # [detected_text,"\n\n", "Give me No from {detected_text}, output must be just dictionary - No"]
47
- # )#If the prompt includes the word "passport," it is flagged as harmful content.
48
- # return result.text
49
-
50
-
51
- # def genai_vision_second_half(detected_text, model):
52
- # result = model.generate_content(
53
- # [detected_text,"\n\n", "give me issue_number, passport_number, name, surname, father name, mother name, date_of_birth, place_of_birth, nationality, gender(M/F), and both lines of the MRZ from {detected_text}, please give me output as just dictionary - issue_number, passport_number, full_name, first_name, last_name, father_name, mother_name, dob, place_of_birth, nationality, gender, mrz1, mrz2. Note that mrz1 is the line that starts with P and contains name"]
54
- # )
55
- # return result.text
56
-
57
-
58
- # def genai_vision_mrz(detected_text, model):
59
- # result = model.generate_content(
60
- # [detected_text,"\n\n", "give me 'document_number', 'nationality', 'birth_date'(dd/mm/yyyy format), 'gender', 'expiration_date'(dd/mm/yyyy format) as dictionary from provided mrz. Dont write anything just return dictionary"]
61
- # )
62
- # return result.text
63
-
64
- # def fix_dob(passport_text):
65
- # dob = ''
66
- # expiry = ''
67
- # issue_date = ''
68
- # try:
69
- # matches = re.findall(r'\b\d{2}[\s/\-.]+\d{2}[\s/\-.]+\d{4}\b', passport_text, re.DOTALL)
70
- # date_objects = [datetime.strptime(re.sub(r'[\s/\-.]+', ' ', date).strip(), '%d %m %Y') for date in matches]
71
- # sorted_dates = sorted(date_objects)
72
- # sorted_date_strings = [date.strftime('%d %m %Y') for date in sorted_dates]
73
-
74
- # if len(sorted_date_strings) > 1:
75
- # dob = sorted_date_strings[0]
76
- # issue_date = sorted_date_strings[1]
77
- # expiry = sorted_date_strings[-1]
78
- # except:
79
- # matches = re.findall(r'\b\d{2}[./]\d{2}[./]\d{4}\b', passport_text)
80
- # date_objects = [datetime.strptime(date.replace('.', '/'), '%d/%m/%Y') for date in matches]
81
- # sorted_dates = sorted(date_objects)
82
- # sorted_date_strings = [date.strftime('%d/%m/%Y') for date in sorted_dates]
83
-
84
- # if len(sorted_date_strings)>1:
85
- # dob = sorted_date_strings[0]
86
- # issue_date = sorted_date_strings[1]
87
- # expiry = sorted_date_strings[-1]
88
- # else:
89
- # matches = re.findall(r'\d{4}-\d{2}-\d{2}', passport_text)
90
- # date_objects = [datetime.strptime(date, '%Y-%m-%d') for date in matches]
91
- # sorted_dates = sorted(date_objects)
92
- # sorted_date_strings = [date.strftime('%Y-%m-%d') for date in sorted_dates]
93
-
94
- # if len(sorted_date_strings)>1:
95
- # dob = sorted_date_strings[0].replace('-', '/')
96
- # issue_date = sorted_date_strings[1].replace('-', '/')
97
- # expiry = sorted_date_strings[-1].replace('-', '/')
98
-
99
- # else:
100
- # matches = re.findall(r'\d{2}-\d{2}-\d{4}', passport_text)
101
- # date_objects = [datetime.strptime(date, '%d-%m-%Y') for date in matches]
102
- # sorted_dates = sorted(date_objects)
103
- # sorted_date_strings = [date.strftime('%d-%m-%Y') for date in sorted_dates]
104
-
105
- # if sorted_date_strings:
106
- # dob = sorted_date_strings[0].replace('-', '/')
107
- # issue_date = sorted_date_strings[1].replace('-', '/')
108
- # expiry = sorted_date_strings[-1].replace('-', '/')
109
-
110
- # print(f"\nDOB: {dob}, Issue Date: {issue_date}, Expiry: {expiry}\n")
111
- # return dob, issue_date, expiry
112
-
113
- # def mrz_data(merged_dict, model):
114
- # # try:
115
- # input_mrz_2 = merged_dict['mrz2']
116
- # match = re.match(
117
- # r"(\d{10})([A-Z]{3})(\d{6})(\d)([MF])(\d{6})(\d)",
118
- # input_mrz_2
119
- # )
120
-
121
- # if match:
122
- # birth_date_raw = match.group(3)
123
- # expiration_date_raw = match.group(6)
124
-
125
- # birth_year_prefix = '19' if int(birth_date_raw[:2]) > 23 else '20'
126
- # birth_date = f"{birth_date_raw[4:]}/{birth_date_raw[2:4]}/{birth_year_prefix}{birth_date_raw[:2]}"
127
-
128
- # exp_year_prefix = '19' if int(expiration_date_raw[:2]) > 50 else '20'
129
- # expiration_date = f"{expiration_date_raw[4:]}/{expiration_date_raw[2:4]}/{exp_year_prefix}{expiration_date_raw[:2]}"
130
-
131
- # result_dict = {
132
- # 'passport_number': match.group(1),
133
- # 'nationality': match.group(2),
134
- # 'dob': birth_date,
135
- # 'gender': match.group(5),
136
- # 'expiry_date': expiration_date
137
- # }
138
- # print(f"\nResult_dict from MRZ: {result_dict}\n")
139
- # else:
140
- # mrz_json = genai_vision_mrz(input_mrz_2, model)
141
- # json_str = mrz_json.replace('```json', '').replace('```', '').strip()
142
- # json_str = json_str.replace('null', 'None')
143
- # result_dict = eval(json_str)
144
-
145
- # result_dict_name = {}
146
- # input_mrz_1 = merged_dict['mrz1']
147
- # match = re.match(r"P[<N]SYR([A-Z<]+)<<*([A-Z]+)<<*", input_mrz_1)
148
- # if match:
149
- # result_dict_name = {
150
- # 'last_name': match.group(1),
151
- # 'first_name': match.group(2)
152
- # }
153
- # result_dict_name['last_name'] = result_dict_name['last_name'].replace('<', ' ').strip()
154
- # result_dict_name['first_name'] = result_dict_name['first_name'].replace('<', ' ').strip()
155
-
156
- # else:
157
- # match = re.match(r"PNSYR\s*([A-Za-z]+)(?:<+([A-Za-z]+))?<<*", input_mrz_1)
158
- # if match:
159
- # try:
160
- # result_dict_name = {
161
- # 'last_name': match.group(1),
162
- # 'first_name': match.group(2)
163
- # }
164
- # result_dict_name['last_name'] = result_dict_name['last_name'].replace('<', ' ').strip()
165
- # result_dict_name['first_name'] = result_dict_name['first_name'].replace('<', ' ').strip()
166
- # except Exception as e:
167
- # result_dict_name = {}
168
- # print(f"Error: {e}")
169
-
170
- # # Merge the name data and other MRZ data into dict_gemini
171
- # merged_dict_mrz = {**merged_dict, **result_dict_name, **result_dict}
172
-
173
- # # except Exception as e:
174
- # # print(f"Error: {e}")
175
-
176
- # return merged_dict_mrz
177
-
178
-
179
- # def fill_with_mrz(dict_gemini, mrz_dict_final):
180
- # fields_to_fill = ['last_name', 'first_name', 'nationality', 'dob', 'gender']
181
- # for field in fields_to_fill:
182
- # if not dict_gemini.get(field, ''):
183
- # dict_gemini[field] = mrz_dict_final.get(field, '')
184
- # return dict_gemini
185
-
186
-
187
- # def extract_passport_number(input_string):
188
- # pattern = r'^(\d+)(?=[A-Z]{3})'
189
-
190
- # match = re.search(pattern, input_string)
191
-
192
- # if match:
193
- # return match.group(1)
194
- # else:
195
- # return None
196
-
197
- # def syr_passport_extraction_front(passport_text_first, api_key):
198
- # model = configure_genai(api_key)
199
- # try:
200
- # ## Process first half of the image
201
- # passport_first_ai_result = genai_vision_second_half(passport_text_first, model)
202
- # json_match = re.search(r'```(json|python|plaintext)?\s*(.*?)\s*```', passport_first_ai_result, re.DOTALL)
203
- # if json_match:
204
- # json_str = json_match.group(2)
205
- # dictionary_first_half = json.loads(json_str)
206
-
207
- # else:
208
- # json_str = passport_first_ai_result.replace('```json', '').replace('```', '').strip()
209
- # json_str = json_str.replace('null', 'None')
210
- # dictionary_first_half= eval(json_str)
211
-
212
- # if dictionary_first_half.get('nationality', ''):
213
- # if dictionary_first_half['nationality'].lower().startswith('syria'):
214
- # dictionary_first_half['nationality'] = 'SYR'
215
-
216
- # except Exception as e:
217
- # print(f"Error occured in GenAI first half {e}")
218
-
219
- # if dictionary_first_half and dictionary_first_half.get('passport_number', ''):
220
- # passport_number = dictionary_first_half.pop('passport_number')
221
- # passport_number = re.sub(r'\D', '', passport_number)
222
-
223
- # dictionary_first_half['passport_number'] = passport_number
224
-
225
- # merged_dict = {**dictionary_first_half}
226
-
227
- # if merged_dict and merged_dict.get('birth_date', ''):
228
- # merged_dict['dob'] = merged_dict.pop('birth_date')
229
-
230
- # if merged_dict and merged_dict.get('birth_place', ''):
231
- # merged_dict['place_of_birth'] = merged_dict.pop('birth_place')
232
-
233
- # if merged_dict and (
234
- # not merged_dict.get('dob') or
235
- # not merged_dict.get('full_name') or
236
- # not merged_dict.get('nationality') or
237
- # not merged_dict.get('first_name') or
238
- # not merged_dict.get('last_name')
239
- # ):
240
- # mrz_dict_final = mrz_data(merged_dict, model)
241
- # merged_dict = fill_with_mrz(merged_dict, mrz_dict_final)
242
-
243
- # passport_text = passport_text_first
244
- # if merged_dict and not merged_dict.get('dob', ''):
245
- # dob, issue_date, expiry = fix_dob(passport_text)
246
- # merged_dict['dob'] = dob
247
-
248
- # if not merged_dict.get('full_name', ''):
249
- # merged_dict['full_name'] = f"{merged_dict.get('first_name', '')} {merged_dict.get('last_name', '')}"
250
-
251
- # if not merged_dict.get('passport_number', ''):
252
- # passport_number = extract_passport_number(merged_dict.get('mrz2', ''))
253
- # merged_dict['passport_number'] = passport_number
254
-
255
- # if merged_dict.get('passport_number', ''):
256
- # passport_number = merged_dict['passport_number']
257
- # if len(passport_number) < 9:
258
- # passport_number = f"0{passport_number}"
259
- # merged_dict['passport_number'] = passport_number
260
-
261
- # if merged_dict.get('passport_number', ''):
262
- # merged_dict['id_number'] = merged_dict['passport_number']
263
-
264
- # if not merged_dict.get('mrz', ''):
265
- # mrz1 = merged_dict.get('mrz1', '')
266
- # mrz2 = merged_dict.get('mrz2', '')
267
- # if mrz1 and mrz2:
268
- # merged_dict['mrz'] = f"{mrz1} {mrz2}"
269
-
270
- # if "gender" in merged_dict:
271
- # gender = merged_dict["gender"].strip().upper()
272
- # if gender == "F":
273
- # merged_dict["gender"] = "FEMALE"
274
- # elif gender == "M":
275
- # merged_dict["gender"] = "MALE"
276
-
277
- # if 'gender' in merged_dict:
278
- # merged_dict["gender"] = merged_dict["gender"].strip().upper()
279
-
280
- # if merged_dict.get('nationality', ''):
281
- # nationality = merged_dict.get('nationality', '')
282
- # if nationality and len(nationality.split(' ')) > 1:
283
- # merged_dict['nationality'] = 'SYR'
284
-
285
- # if not merged_dict.get('nationality', ''):
286
- # merged_dict['nationality'] = 'SYR'
287
-
288
- # merged_dict['issuing_country'] = 'SYR'
289
1
 
290
- # return merged_dict
291
-
292
- # def genai_vision_back(detected_text, model):
293
- # result = model.generate_content(
294
- # [detected_text,"\n\n", "give me date of issue(in dd/mm/yy format), expiry date (in dd/mm/yy format), place of issue, and national number from {detected_text}, please give me output as just dictionary - issuing_date, expiry_date, place_of_issue, national_number"]
295
- # )
296
- # return result.text
297
-
298
- # def find_issue_date_and_expiry(passport_text_back):
299
- # date_pattern = re.compile(r'\b\d{2}/\d{2}/\d{4}\b')
300
- # matches = date_pattern.findall(passport_text_back)
301
-
302
- # if not matches:
303
- # return None, None
304
-
305
- # date_objects = [datetime.strptime(date, '%d/%m/%Y') for date in matches]
306
- # sorted_dates = sorted(date_objects)
307
-
308
- # issuing_date = sorted_dates[0].strftime('%d/%m/%Y')
309
- # expiry_date = sorted_dates[-1].strftime('%d/%m/%Y')
310
-
311
- # return issuing_date, expiry_date
312
-
313
- # def extract_national_number(passport_text_back):
314
- # national_number_pattern = re.compile(r'\b\d{3}-\d{8}\b')
315
- # match = national_number_pattern.search(passport_text_back)
316
-
317
- # if match:
318
- # return match.group(0)
319
- # else:
320
- # return None
321
-
322
-
323
- # def syr_passport_extraction_back(passport_text_back, api_key):
324
- # model = configure_genai(api_key)
325
- # place_of_issue = ''
326
- # result_ai = genai_vision_back(passport_text_back, model)
327
- # try:
328
- # json_str = result_ai.replace('```json', '').replace('```', '').strip()
329
- # json_str = json_str.replace('null', 'None')
330
- # try:
331
- # passport_back_data = eval(json_str)
332
- # issue_date = passport_back_data.get('issuing_date', '')
333
- # expiry_date = passport_back_data.get('expiry_date', '')
334
- # # Validate date format
335
- # if not is_valid_date(issue_date) or not is_valid_date(expiry_date):
336
- # raise ValueError("Invalid date format")
337
-
338
- # except Exception as e:
339
- # print(f"Error in parsing or validating dates: {e}")
340
- # passport_back_data = {'issuing_date': '', 'expiry_date': '', 'national_number': '', 'place_of_issue': ''}
341
- # try:
342
- # issue_date, expiry = find_issue_date_and_expiry(passport_text_back)
343
- # if issue_date and expiry:
344
- # passport_back_data = {
345
- # 'issuing_date': issue_date,
346
- # 'expiry_date': expiry
347
- # }
348
-
349
- # national_number = extract_national_number(passport_text_back)
350
- # if national_number:
351
- # passport_back_data['national_number'] = national_number
352
-
353
- # except Exception as e:
354
- # print(f"Error occurred in finding dates: {e}")
355
- # passport_back_data = {}
356
-
357
- # except Exception as e:
358
- # print(f"Error occured in GenAI back {e}")
359
- # passport_back_data = {}
360
- # try:
361
- # issue_date, expiry = find_issue_date_and_expiry(passport_text_back)
362
- # if issue_date and expiry:
363
- # passport_back_data = {
364
- # 'issuing_date': issue_date,
365
- # 'expiry_date': expiry
366
- # }
367
-
368
- # national_number = extract_national_number(passport_text_back)
369
- # if national_number:
370
- # passport_back_data['national_number'] = national_number
371
-
372
- # except Exception as e:
373
- # print(f"Error occured in finding dates {e}")
374
- # passport_back_data = {}
375
-
376
- # if not passport_back_data.get('place_of_issue', ''):
377
- # json_match = re.search(r'```(json|python|plaintext)?\s*(.*?)\s*```', result_ai, re.DOTALL)
378
- # if json_match:
379
- # json_str = json_match.group(2)
380
- # dictionary_second_half = json.loads(json_str)
381
- # place_of_issue = dictionary_second_half.get('place_of_issue', '')
382
- # passport_back_data['place_of_issue'] = place_of_issue
383
-
384
- # else:
385
- # json_str = result_ai.replace('```json', '').replace('```', '').strip()
386
- # json_str = json_str.replace('null', 'None')
387
- # dictionary_second_half= eval(json_str)
388
-
389
- # place_of_issue = dictionary_second_half.get('place_of_issue', '')
390
- # passport_back_data['place_of_issue'] = place_of_issue
391
- # else:
392
- # passport_back_data['place_of_issue'] = ''
393
-
394
- # return passport_back_data
395
2
 
396
3
 
397
4
  import base64
@@ -422,6 +29,7 @@ Return a JSON object with the following fields (use the exact field names):
422
29
  - father_name: Father's Name (extract exactly as written on the card)
423
30
  - mother_name: Mother's Name (extract exactly as written on the card)
424
31
  - dob: Date of birth exactly as shown on the card (preserve original format)
32
+ - passport_number: Passport number as printed beside N° or No (exactly 9 characters) above side of the photo
425
33
  - place_of_birth: Place of birth in English (extract exactly as written on the card)
426
34
  - mrz1: First line of the MRZ, exactly 44 characters, pad with '<' at the end if shorter
427
35
  - mrz2: Second line of the MRZ, exactly 44 characters
@@ -429,6 +37,10 @@ Return a JSON object with the following fields (use the exact field names):
429
37
  - header_verified: True if both 'SYRIAN ARAB REPUBLIC' and 'PASSPORT' are clearly visible, else False
430
38
  - country_code: Country code as printed below the text 'country code' (extract exactly as written)
431
39
  - issue_number: Issue number as printed on the card as 'Issue no' (extract exactly as written)
40
+ - passport_number_mrz: Passport number as extracted from MRZ present in starting of mrz line 1
41
+ - dob_mrz: Date of birth as extracted from MRZ in DD/MM/YYYY format
42
+ - gender_mrz: Gender as extracted from MRZ (M or F) if M return MALE else if F return FEMALE
43
+ - expiry_date_mrz: Expiry date as extracted from MRZ line 2 in DD/MM/YYYY format
432
44
 
433
45
  Instructions:
434
46
  - Do NOT guess or hallucinate any values. If unclear, return null.
@@ -476,6 +88,11 @@ class SyriaPassportFront(BaseModel):
476
88
  ...,
477
89
  description="The date of birth exactly as shown on the card (preserve original format)",
478
90
  )
91
+
92
+ passport_number : str = Field(
93
+ ..., min_length=9, max_length=9,
94
+ description="Passport number as printed beside N° or No (exactly 9 characters) present above side of the photo",
95
+ )
479
96
 
480
97
  place_of_birth: str = Field(
481
98
  ...,
@@ -506,20 +123,29 @@ class SyriaPassportFront(BaseModel):
506
123
 
507
124
  issue_number: str = Field(
508
125
  ...,
509
- description="Issue number as printed on the card as 'Issue no' (extract exactly as written)",
126
+ min_length=14, max_length=14, description="Issue number as printed on the card as 'Issue no' extract exactly as written on the card",
510
127
  )
511
-
512
- class SyriaPassportBack(BaseModel):
513
-
514
128
 
129
+
130
+ passport_number_mrz: str = Field(
131
+ ...,min_length = 9, max_length = 9, description="Passport number as extracted from MRZ present in starting of mrz line 1"
132
+ )
133
+ dob_mrz: str = Field(
134
+ ..., description="Date of birth as extracted from MRZ (in DD/MM/YYYY format)"
135
+ )
136
+ gender_mrz: str = Field(
137
+ ..., description="Gender as extracted from MRZ (M or F) if M return MALE else if F return FEMALE"
138
+ )
139
+ expiry_date_mrz: str = Field(
140
+ ..., description="Expiry date as extracted from MRZ line 2(in DD/MM/YYYY format)"
141
+ )
142
+
143
+ class SyriaPassportBack(BaseModel):
515
144
  issue_date: str = Field(..., description="Issue date in DD/MM/YYYY format")
516
145
  place_of_issue: str = Field(..., description="Place of issue as printed on the card")
517
146
  expiry_date: str = Field(..., description="Expiry date in DD/MM/YYYY format")
518
-
519
147
  national_number: str = Field(..., description="national number as printed on the card example: 123-12345678")
520
148
 
521
-
522
-
523
149
  def process_image(side):
524
150
 
525
151
  if side == "first" or side == "page1":
@@ -530,7 +156,6 @@ def process_image(side):
530
156
  prompt = PROMPT_BACK
531
157
  model = SyriaPassportBack
532
158
 
533
-
534
159
  else:
535
160
  raise ValueError("Invalid document side specified. Use 'front', 'back', or 'passport'.")
536
161
 
@@ -547,7 +172,7 @@ def get_openai_response(prompt: str, model_type, image: BytesIO, openai_key):
547
172
  {"role": "system", "content": "You are an expert at extracting information from identity documents."},
548
173
  {"role": "user", "content": [
549
174
  {"type": "input_text", "text": prompt},
550
- {"type": "input_image", "image_url": f"data:image/jpeg;base64,{b64_image}", "detail": "low"},
175
+ {"type": "input_image", "image_url": f"data:image/jpeg;base64,{b64_image}", "detail": "high"},
551
176
  ]},
552
177
  ],
553
178
  text_format=model_type,