idvpackage 3.0.10__py3-none-any.whl → 3.0.12__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,961 +1,9 @@
1
- # from PIL import Image
2
- # from deep_translator import GoogleTranslator
3
- # import pycountry
4
- # from rapidfuzz import process, fuzz
5
- # from idvpackage.common import extract_text_from_image_data
6
- # from io import BytesIO
7
- # import re
8
- # import time
9
- # import datetime
10
- # from langchain.tools import tool
11
- # from langchain.prompts import ChatPromptTemplate
12
- # from langchain.chat_models import ChatOpenAI
13
- # from pydantic import BaseModel, Field, validator
14
- # from langchain.utils.openai_functions import convert_pydantic_to_openai_function
15
- # from typing import Optional, Literal
16
- # from langchain.output_parsers.openai_functions import JsonOutputFunctionsParser
17
- # from datetime import datetime, timedelta
18
- # from langchain.schema.agent import AgentFinish
19
- # import openai
20
- # import json
21
-
22
-
23
- # class QatarIDInfo(BaseModel):
24
- # """
25
- # Extract info from ocr-extracted text from a Qatar ID
26
- # """
27
- # name: str = Field(..., description="Full name in English")
28
- # name_ar: str = Field(..., description="Full name in Arabic")
29
- # nationality: str = Field(...,
30
- # description="Nationality in ISO 3166-1 alpha-3 format (e.g., 'PAK' 'QAT', 'SYR', 'PHL')",
31
- # example="SYR")
32
- # id_number: str = Field(..., description="National ID number")
33
- # dob: str = Field(..., description="Date of birth")
34
- # expiry_date: str = Field(..., description="Card expiry date")
35
- # occupation: str = Field(..., description="Occupation in Arabic")
36
- # occupation_en: str = Field(..., description="Occupation, translated from Arabic to English")
37
-
38
-
39
- # # @tool(args_schema=QatarIDInfo)
40
- # # def verify_qatar_id_info(name='', name_ar='', nationality='', id_number='', dob='', expiry_date='', occupation='',
41
- # # occupation_en=''):
42
- # # if occupation_en == '':
43
- # # occupation_en = GoogleTranslator('ar', 'en').translate(occupation)
44
- # #
45
- # # return {**locals()}
46
- # #
47
- # #
48
- # # def route(result):
49
- # # if isinstance(result, AgentFinish):
50
- # # return result.return_values['output']
51
- # # else:
52
- # # tools = {
53
- # # "verify_qatar_id_info": verify_qatar_id_info
54
- # # }
55
- # # return tools[result.tool].run(result.tool_input)
56
- # #
57
-
58
- # def qatar_id_info_chain(ocr_text, openai_key):
59
- # gpt_model = 'gpt-4o'
60
-
61
- # prompt = ChatPromptTemplate.from_messages([
62
- # ("system",
63
- # "Extract the relevant information, if not explicitly provided do not guess, leave empty string. Extract partial info. Translate where explicity stated."
64
- # ),
65
- # ("user", "{ocr_text}")
66
- # ])
67
-
68
- # model = ChatOpenAI(model=gpt_model, temperature=0,
69
- # openai_api_key=openai_key)
70
- # functions = [convert_pydantic_to_openai_function(QatarIDInfo)]
71
- # verification_model = model.bind(functions=functions)
72
- # verification_chain = prompt | verification_model | JsonOutputFunctionsParser()
73
-
74
- # result = verification_chain.invoke({"ocr_text": ocr_text})
75
- # return result
76
-
77
-
78
- # def extract_name_line(ocr_text):
79
- # """Try to extract the English name line explicitly from OCR."""
80
- # match = re.search(r'(?i)\bname\b\s*[:\-]?\s*([A-Z][A-Z\s]+)', ocr_text)
81
- # return match.group(1).strip() if match else None
82
-
83
-
84
- # # ISO3166 nationality mapping
85
- # ISO3166_nationality_mapping = {
86
- # "004": "AFG", "008": "ALB", "012": "DZA", "016": "ASM", "020": "AND", "024": "AGO", "660": "AIA",
87
- # "010": "ATA", "028": "ATG", "032": "ARG", "051": "ARM", "533": "ABW", "036": "AUS", "040": "AUT",
88
- # "031": "AZE", "044": "BHS", "048": "BHR", "050": "BGD", "052": "BRB", "112": "BLR", "056": "BEL",
89
- # "084": "BLZ", "204": "BEN", "060": "BMU", "064": "BTN", "068": "BOL", "535": "BES", "070": "BIH",
90
- # "072": "BWA", "074": "BVT", "076": "BRA", "086": "IOT", "096": "BRN", "100": "BGR", "854": "BFA",
91
- # "108": "BDI", "132": "CPV", "116": "KHM", "120": "CMR", "124": "CAN", "136": "CYM", "140": "CAF",
92
- # "148": "TCD", "152": "CHL", "156": "CHN", "162": "CXR", "166": "CCK", "170": "COL", "174": "COM",
93
- # "180": "COD", "178": "COG", "184": "COK", "188": "CRI", "191": "HRV", "192": "CUB", "531": "CUW",
94
- # "196": "CYP", "203": "CZE", "384": "CIV", "208": "DNK", "262": "DJI", "212": "DMA", "214": "DOM",
95
- # "218": "ECU", "818": "EGY", "222": "SLV", "226": "GNQ", "232": "ERI", "080": "ERI", "233": "EST",
96
- # "748": "SWZ", "231": "ETH", "238": "FLK", "234": "FRO", "242": "FJI", "246": "FIN", "250": "FRA",
97
- # "254": "GUF", "258": "PYF", "260": "ATF", "266": "GAB", "270": "GMB", "268": "GEO", "276": "DEU",
98
- # "288": "GHA", "292": "GIB", "300": "GRC", "304": "GRL", "308": "GRD", "312": "GLP", "316": "GUM",
99
- # "320": "GTM", "831": "GGY", "324": "GIN", "624": "GNB", "328": "GUY", "332": "HTI", "334": "HMD",
100
- # "336": "VAT", "340": "HND", "344": "HKG", "348": "HUN", "352": "ISL", "356": "IND", "360": "IDN",
101
- # "364": "IRN", "368": "IRQ", "372": "IRL", "833": "IMN", "376": "ISR", "380": "ITA", "388": "JAM",
102
- # "392": "JPN", "832": "JEY", "400": "JOR", "398": "KAZ", "404": "KEN", "296": "KIR", "408": "PRK",
103
- # "410": "KOR", "414": "KWT", "417": "KGZ", "418": "LAO", "428": "LVA", "422": "LBN", "426": "LSO",
104
- # "430": "LBR", "434": "LBY", "438": "LIE", "440": "LTU", "442": "LUX", "446": "MAC", "450": "MDG",
105
- # "454": "MWI", "458": "MYS", "462": "MDV", "466": "MLI", "470": "MLT", "584": "MHL", "474": "MTQ",
106
- # "478": "MRT", "480": "MUS", "175": "MYT", "484": "MEX", "583": "FSM", "498": "MDA", "492": "MCO",
107
- # "496": "MNG", "499": "MNE", "500": "MSR", "504": "MAR", "508": "MOZ", "104": "MMR", "516": "NAM",
108
- # "520": "NRU", "524": "NPL", "528": "NLD", "540": "NCL", "554": "NZL", "558": "NIC", "562": "NER",
109
- # "566": "NGA", "570": "NIU", "574": "NFK", "580": "MNP", "578": "NOR", "512": "OMN", "586": "PAK",
110
- # "585": "PLW", "275": "PSE", "591": "PAN", "598": "PNG", "600": "PRY", "604": "PER", "608": "PHL",
111
- # "612": "PCN", "616": "POL", "620": "PRT", "630": "PRI", "634": "QAT", "807": "MKD", "642": "ROU",
112
- # "643": "RUS", "646": "RWA", "638": "REU", "652": "BLM", "654": "SHN", "659": "KNA", "662": "LCA",
113
- # "663": "MAF", "666": "SPM", "670": "VCT", "882": "WSM", "674": "SMR", "678": "STP", "682": "SAU",
114
- # "686": "SEN", "688": "SRB", "690": "SYC", "694": "SLE", "702": "SGP", "534": "SXM", "703": "SVK",
115
- # "705": "SVN", "090": "SLB", "706": "SOM", "710": "ZAF", "239": "SGS", "728": "SSD", "724": "ESP",
116
- # "144": "LKA", "736": "SDN", "740": "SUR", "744": "SJM", "752": "SWE", "756": "CHE", "760": "SYR",
117
- # "158": "TWN", "762": "TJK", "834": "TZA", "764": "THA", "626": "TLS", "768": "TGO", "772": "TKL",
118
- # "776": "TON", "780": "TTO", "788": "TUN", "792": "TUR", "795": "TKM", "796": "TCA", "798": "TUV",
119
- # "800": "UGA", "804": "UKR", "784": "ARE", "826": "GBR", "581": "UMI", "840": "USA", "858": "URY",
120
- # "860": "UZB", "548": "VUT", "862": "VEN", "704": "VNM", "092": "VGB", "850": "VIR", "876": "WLF",
121
- # "732": "ESH", "887": "YEM", "894": "ZMB", "716": "ZWE", "248": "ALA", "999": "PSE", "544": "BIH",
122
- # "230": "ETH", "886": "YEM", "901": "TWN"
123
- # }
124
-
125
-
126
- # def crop_second_part(img):
127
- # width, height = img.size
128
- # half_width = width // 2
129
- # second_part = img.crop((half_width, 0, width, height))
130
- # return second_part
131
-
132
-
133
- # def crop_third_part(img):
134
- # width, height = img.size
135
- # part_height = height // 6
136
- # third_part = img.crop((0, 3.7 * part_height, width, height))
137
- # return third_part
138
-
139
-
140
- # def detect_id_card(client, image_data, id_text, part=None):
141
- # if id_text:
142
- # vertices = id_text[0].bounding_poly.vertices
143
- # left = vertices[0].x
144
- # top = vertices[0].y
145
- # right = vertices[2].x
146
- # bottom = vertices[2].y
147
-
148
- # padding = 40
149
- # left -= padding
150
- # top -= padding
151
- # right += padding
152
- # bottom += padding
153
-
154
- # # img = image_data
155
- # # with Image.open(io.BytesIO(image_data)) as img:
156
- # # id_card = img.crop((max(0, left), max(0, top), right, bottom))
157
-
158
- # pil_image = Image.open(BytesIO(image_data))
159
- # compressed_image = BytesIO()
160
- # pil_image.save(compressed_image, format="JPEG", quality=50, optimize=True)
161
- # compressed_image_data = compressed_image.getvalue()
162
- # compressed_pil_image = Image.open(BytesIO(compressed_image_data))
163
- # id_card = compressed_pil_image.crop((max(0, left), max(0, top), right, bottom))
164
-
165
- # width, height = id_card.size
166
- # if width < height:
167
- # id_card = id_card.rotate(90, expand=True)
168
-
169
- # if part == 'second':
170
- # part_img = crop_second_part(id_card)
171
- # if part == 'third':
172
- # part_img = crop_third_part(id_card)
173
-
174
- # # 2nd call to vision AI
175
- # part_text = extract_text_from_image_data(client, part_img)
176
-
177
- # return id_card, part_img, part_text
178
- # else:
179
- # print('No text found in the image.')
180
-
181
-
182
- # def is_arabic(word):
183
- # return re.search(r'[\u0600-\u06FF]', word) is not None
184
-
185
-
186
- # def extract_name_ar(text):
187
- # # patterns = [
188
- # # r"(?:الاسم|الإسم):\s*([^\n]+)",
189
- # # r"الاسم\s+([^\n]+)"
190
- # # ]
191
-
192
- # patterns = [
193
- # r"(?:الإسم|الاسم):\s*([^\n]+)",
194
- # r"(?:الإسم|الاسم)\s+([^\n]+)",
195
- # ]
196
-
197
- # for pattern in patterns:
198
- # regex = re.compile(pattern, re.MULTILINE)
199
- # match = regex.search(text)
200
- # if match:
201
- # return match.group(1).strip()
202
-
203
- # return None
204
-
205
-
206
- # def extract_name_fields_from_cropped_part(text):
207
- # pattern = r"Name:\s*([A-Z\s-]+)"
208
- # name_dict = {}
209
- # match = re.search(pattern, text)
210
-
211
- # if match:
212
- # extracted_name = match.group(1).strip()
213
- # extracted_name = extracted_name.replace("\n", " ")
214
- # unnecessary_words = ['OF', 'THE']
215
- # extracted_name = [word for word in extracted_name.split() if word.upper() not in unnecessary_words]
216
- # if len(extracted_name[-1]) <= 2:
217
- # extracted_name = extracted_name[:-1]
218
-
219
- # extracted_name = ' '.join(extracted_name)
220
-
221
- # name_dict["name"] = extracted_name.strip()
222
- # name_parts = extracted_name.split()
223
-
224
- # first_name = name_parts[0].upper()
225
- # last_name = name_parts[-1].upper()
226
-
227
- # name_dict["first_name"] = first_name
228
- # name_dict["last_name"] = last_name
229
- # return name_dict
230
-
231
-
232
- # def identify_front(text):
233
- # front_id_keywords = ["State of Qatar"]
234
- # pattern = '|'.join(map(re.escape, front_id_keywords))
235
-
236
- # try:
237
- # if re.search(pattern, text, re.IGNORECASE):
238
- # return True
239
- # else:
240
- # return False
241
- # except:
242
- # return 'error'
243
-
244
-
245
- # def sort_dates_by_datetime(dates):
246
- # return sorted(dates, key=lambda x: datetime.strptime(x, '%d/%m/%Y'))
247
-
248
-
249
- # def extract_and_check_country(words):
250
- # for word in words:
251
- # try:
252
- # country = pycountry.countries.lookup(word)
253
- # if country:
254
- # return country.name.upper()
255
- # except LookupError:
256
- # pass
257
-
258
- # return ''
259
-
260
-
261
- # def extract_and_check_country_normalized(words):
262
- # normalized_words = [re.sub(r'\s+|-', '', word).lower() for word in words]
263
-
264
- # for country in pycountry.countries:
265
- # common_name_normalized = re.sub(r'\s+|-', '', country.name).lower()
266
- # official_name_normalized = re.sub(r'\s+|-', '', getattr(country, 'official_name', '')).lower()
267
-
268
- # if common_name_normalized in normalized_words or official_name_normalized in normalized_words:
269
- # return country.name.upper()
270
-
271
- # return ''
272
-
273
-
274
- # def extract_name_after_nationality(word_list, nationality):
275
- # nationality_index = word_list.index(nationality) if nationality in word_list else -1
276
-
277
- # if nationality_index != -1 and nationality_index < len(word_list) - 1:
278
- # words_after_nationality = word_list[nationality_index + 1:]
279
- # return words_after_nationality
280
- # else:
281
- # return []
282
-
283
-
284
- # def get_fuzzy_match_score(line, patterns, threshold=80):
285
- # result = process.extractOne(line, patterns, scorer=fuzz.WRatio)
286
- # if result and result[1] > threshold:
287
- # return result[1]
288
- # return None
289
-
290
-
291
- # def extract_occupation_in_empty_case(text):
292
- # pattern = re.compile(r'المهنة\s*[:]*\s*(\S*)', re.IGNORECASE)
293
- # lines = text.split('\n')
294
-
295
- # for i, line in enumerate(lines):
296
- # match = pattern.search(line)
297
- # if match:
298
- # if match.group(1):
299
- # return match.group(1).strip()
300
- # if i + 1 < len(lines):
301
- # return lines[i + 1].strip()
302
-
303
- # return ''
304
-
305
-
306
- # def extract_occupation_in_empty_case_v2(text):
307
- # pattern = re.compile(r'occupation\s*[:]*\s*(\S*)', re.IGNORECASE)
308
- # lines = text.split('\n')
309
-
310
- # for i, line in enumerate(lines):
311
- # match = pattern.search(line)
312
- # if match:
313
- # if match.group(1):
314
- # return match.group(1).strip()
315
- # if i + 1 < len(lines):
316
- # return lines[i + 1].strip()
317
-
318
- # return ''
319
-
320
-
321
- # def genAI(ar_front_data, model):
322
- # query = f"Please extract the nationality from the following text and provide the corresponding ISO 3166-1 alpha-3 country code for that nationality: {ar_front_data}"
323
- # response = model.generate_content(query)
324
- # nationality_ai = re.findall(r'\*\*(.*?)\*\*', response.text)[1]
325
- # return nationality_ai
326
-
327
-
328
- # def genAI_for_occupation(dct, model):
329
- # query = f"""
330
- # You are provided with the following front_data: {dct}.
331
-
332
- # Check if 'occupation_en' information is valid and correct. Please review this broadly without focusing on the specifics.
333
- # for example if (doctor teacher employee and etc it is occupation as well)
334
- # If 'occupation_en' match the expected values, respond with 'correct'.
335
- # If it is incorrect, respond with 'not_correct', if you are not able to determine then respond with 'undetermined'.
336
- # as a response give me 'not_correct','undetermined' or 'correct' nothing else
337
- # """
338
- # response = model.generate_content(query)
339
- # value = response.candidates[0].content.parts[0].text.strip()
340
-
341
- # return value
342
-
343
-
344
- # def genAI_for_occupation_correct(passport_details, model):
345
- # query = f"""
346
- # Please extract the occupation from the following text and provide it in this format:
347
- # - English: **occupation**
348
- # - Arabic: **occupation** return only these 2 nothing else.
349
- # So you will get occupation in arabic and translate it into english and send it
350
- # if no info about occupation then 'not_provided', for both English and Arabic: {passport_details}
351
- # """
352
- # response = model.generate_content(query)
353
- # occupation_ai = re.findall(r'\*\*(.*?)\*\*', response.text)
354
-
355
- # return occupation_ai
356
-
357
-
358
- # def genAI_for_expiry_date(ar_front_data, model):
359
- # query = f"""
360
- # Please extract the expiry date from the following text and provide it in this format(dd/mm/yyyy):
361
- # - expiry_date, return only this 1 variable, nothing else.
362
- # if no info about expiry_date found then return 'expiry_date': 'not_provided': {ar_front_data}
363
- # """
364
- # response = model.generate_content(query)
365
- # expiry_ai = re.findall(r'\*\*(.*?)\*\*', response.text)[1]
366
-
367
- # return expiry_ai
368
-
369
-
370
- # def make_api_request_with_retries(prompt: str, max_retries: int = 3, delay_seconds: float = 2):
371
- # """
372
- # Helper function to make API requests with retry logic using OpenAI
373
- # """
374
- # for attempt in range(max_retries):
375
- # try:
376
- # response = openai.ChatCompletion.create(
377
- # model="gpt-4o-mini",
378
- # temperature=0.4,
379
- # max_tokens=2000,
380
- # messages=[
381
- # {
382
- # "role": "user",
383
- # "content": prompt
384
- # }
385
- # ]
386
- # )
387
- # result = response.choices[0].message.content
388
-
389
- # try:
390
- # return json.loads(result)
391
- # except json.JSONDecodeError:
392
- # try:
393
- # json_match = re.search(r'```(json|python|plaintext)?\s*(.*?)\s*```|\s*({.*?})', result, re.DOTALL)
394
- # if json_match:
395
- # json_str = json_match.group(2) or json_match.group(3)
396
- # try:
397
- # return json.loads(json_str)
398
- # except:
399
- # return eval(json_str.replace("'", '"'))
400
- # except:
401
- # pass
402
-
403
- # return json.loads(result)
404
-
405
- # except Exception as e:
406
- # print(f"Error during API request (attempt {attempt + 1} of {max_retries}): {str(e)}")
407
- # if attempt < max_retries - 1:
408
- # time.sleep(delay_seconds)
409
- # else:
410
- # raise Exception(f"Max retries exceeded. Last error: {str(e)}")
411
-
412
-
413
- # def extract_numeric_fields_from_raw(ar_front_data, third_part_text, name_extracted, extract_names=False):
414
- # front_data = GoogleTranslator(dest='en').translate(ar_front_data)
415
- # id_number_pattern = r"\b\d{11}\b"
416
-
417
- # words = re.findall(r'\b[A-Z]{4,}\b', ar_front_data)
418
- # nationality = extract_and_check_country(words)
419
-
420
- # nationality_iso = ''
421
- # if not nationality:
422
- # nationality = extract_and_check_country_normalized(words)
423
-
424
- # if nationality:
425
- # try:
426
- # country = pycountry.countries.lookup(nationality)
427
- # nationality_iso = country.alpha_3
428
- # except:
429
- # nationality_iso = ''
430
-
431
- # print(f'------------Nationality from OCR: {nationality_iso}')
432
- # # Extract nationality from ID number
433
- # id_number_match = re.search(id_number_pattern, ar_front_data, re.IGNORECASE)
434
- # if id_number_match:
435
- # id_number = id_number_match.group(0)
436
- # # Extract nationality code from ID number (digits 4-6)
437
- # if len(id_number) >= 6:
438
- # nationality_code = id_number[3:6] # 0-based indexing, so 3:6 gives us digits 4-6
439
- # nationality_from_id = ISO3166_nationality_mapping.get(nationality_code, '')
440
- # print(f'------------Nationality from ID number code: {nationality_from_id}')
441
-
442
- # # Use nationality from ID if OCR nationality is empty, invalid, or different
443
- # if nationality_from_id:
444
- # if not nationality_iso or len(nationality_iso) != 3:
445
- # nationality_iso = nationality_from_id
446
- # elif nationality_iso != nationality_from_id:
447
- # nationality_iso = nationality_from_id
448
- # else:
449
- # try:
450
- # id_number_match = re.search(id_number_pattern, ar_front_data, re.IGNORECASE)
451
- # id_number = id_number_match.group(0)
452
- # except:
453
- # id_number = ''
454
-
455
- # names_list = extract_name_after_nationality(words, nationality)
456
- # name = ' '.join(names_list)
457
- # if not name:
458
- # name = name_extracted
459
-
460
- # dates = sort_dates_by_datetime(re.findall(r'\d{2}/\d{2}/\d{4}', ar_front_data))
461
- # combined_back_pattern = r'(Director General of the General Department|Directorate of Passports|Passport Number|Passport Expiry)'
462
- # back_match = re.search(combined_back_pattern, ar_front_data, re.IGNORECASE)
463
-
464
- # try:
465
- # if back_match:
466
- # if 'Passport' in ar_front_data:
467
- # ar_front_data = ar_front_data.split("Name")[0]
468
-
469
- # dates = sort_dates_by_datetime(re.findall(r'\d{2}/\d{2}/\d{4}', ar_front_data))
470
-
471
- # if len(dates) > 2:
472
- # dob = dates[0]
473
- # expiry = dates[1]
474
- # elif len(dates) <= 2:
475
- # dob = dates[0]
476
- # expiry = dates[-1]
477
- # else:
478
- # dob = dates[0]
479
- # expiry = dates[-1]
480
- # except:
481
- # try:
482
- # dob = dates[0]
483
- # expiry = dates[-1]
484
- # except:
485
- # dob = ''
486
- # expiry = ''
487
-
488
- # if 'Passport' in ar_front_data:
489
- # ar_front_data = ar_front_data.split("Name")[0]
490
-
491
- # ar_front_data_filtered = [
492
- # re.sub(r'\b[a-zA-Z0-9]+\b', '',
493
- # line.replace(':', '').replace('/', '').replace('.', '').replace('المهنة', '').replace('تاريخ الميلاد',
494
- # '').replace(
495
- # 'دولة قطر', '').replace('الرقم الشخصي', '').replace('الصلاحية', '').replace('الجنسية', '').replace(
496
- # 'رخصة إقامة', '').replace('الرقم', '').replace('اللى', '').replace('طو', '').replace('دولة',
497
- # '').replace(
498
- # 'الهند', '').replace('بطاقة', '').replace('إثبات', '').replace('شخصية', '').replace('ہے',
499
- # '').replace('۔',
500
- # ''))
501
- # for line in ar_front_data.split('\n')
502
- # ]
503
-
504
- # cleaned_lines = [line for line in ar_front_data_filtered if line.strip()]
505
-
506
- # patterns_to_remove = [
507
- # r"State Of Qatar", r"Residency Permit", r"ID\.No:", r"D\.O\.B\.:", r"D\.O\.B:",
508
- # r"Expiry:", r"Nationality:", r"\d{9}", r"\d{2}/\d{2}/\d{4}", r"بنغلاديش", r"الهند",
509
- # r"on", r"الرقم الشخصي:", r"تاريخ الميلاد:", r"الصلاحية:",
510
- # r"الجنسية:", r"دولة قطر", r"رخصة إقامة", r"المهنة:", r"الاسم:", r"Name:"
511
- # ]
512
-
513
- # if nationality:
514
- # patterns_to_remove.append(re.escape(nationality))
515
-
516
- # if name:
517
- # patterns_to_remove.append(re.escape(name))
518
-
519
- # compiled_patterns = [re.compile(pattern, re.IGNORECASE) for pattern in patterns_to_remove]
520
-
521
- # countries_list = ['أفغانستان', 'جزر أولاند', 'ألبانيا', 'یمنی', 'الجزائر', 'ساموا الأمريكية', 'مغربي', 'أندورا',
522
- # 'أنغولا', 'أنغويلا', 'القارة القطبية الجنوبية', 'أنتيغوا وبربودا', 'الأرجنتين', 'أرمينيا',
523
- # 'أروبا', 'أستراليا', 'النمسا', 'أذربيجان', 'باهاماس', 'البحرين', 'بنغلاديش', 'بربادوس',
524
- # 'بيلاروسيا', 'بلجيكا', 'بليز', 'بنين', 'برمودا', 'بوتان', 'بوليفيا', 'البوسنة والهرسك',
525
- # 'بوتسوانا', 'جزيرة بوفيه', 'البرازيل', 'إقليم المحيط الهندي البريطاني', 'جزر العذراء البريطانية',
526
- # 'بروناي', 'بلغاريا', 'بوركينا فاسو', 'بوروندي', 'كابو فيردي', 'كمبوديا', 'الكاميرون', 'كندا',
527
- # 'الجزر الكاريبية الهولندية', 'جزر كايمان', 'جمهورية أفريقيا الوسطى', 'تشاد', 'تشيلي', 'الصين',
528
- # 'جزيرة الكريسماس', 'جزر كوكوس', 'كولومبيا', 'جزر القمر', 'جمهورية الكونغو', 'جزر كوك',
529
- # 'كوستاريكا', 'كرواتيا', 'كوبا', 'كوراساو', 'قبرص', 'التشيك', 'الدنمارك', 'جيبوتي', 'دومينيكا',
530
- # 'جمهورية الدومينيكان', 'جمهورية الكونغو الديمقراطية', 'الاكوادور', 'السلفادور',
531
- # 'غينيا الاستوائية', 'إريتريا', 'إستونيا', 'إسواتيني', 'إثيوبيا', 'جزر فوكلاند', 'جزر فارو',
532
- # 'فيجي', 'فنلندا', 'فرنسا', 'غويانا الفرنسية', 'بولينزيا الفرنسية', 'أراض فرنسية جنوبية',
533
- # 'الجابون', 'غامبيا', '\u202bجورجيا', 'ألمانيا', 'غانا', 'جبل طارق', 'اليونان', 'جرينلاند',
534
- # 'غرينادا', 'غوادلوب', 'غوام', 'غواتيمالا', 'غيرنزي', 'غينيا', 'غينيا بيساو', 'غيانا', 'هايتي',
535
- # 'جزيرة هيرد وجزر ماكدونالد', 'هندوراس', 'هونج كونج', 'هنجاريا', 'آيسلندا', 'الهند', 'أندونيسيا',
536
- # 'إيران', 'العراق', 'أيرلندا', 'جزيرة مان', 'إيطاليا', 'ساحل العاج', 'جامايكا', 'اليابان', 'جيرسي',
537
- # 'الأردن', 'كازاخستان', 'كينيا', 'كيريباتي', 'كوسوفو', 'الكويت', 'قيرغيزستان', 'لاوس', 'لاتفيا',
538
- # 'لبنان', 'ليسوتو', 'ليبيريا', 'ليبيا', 'ليختنشتاين', 'ليتوانيا', 'لوكسمبورغ', 'ماكاو', 'مدغشقر',
539
- # 'مالاوي', 'ماليزيا', 'المالديف', 'مالي', 'مالطا', 'جزر مارشال', 'مارتينيك', 'موريتانيا',
540
- # 'موريشيوس', 'مايوت', 'المكسيك', 'ولايات ميكرونيسيا المتحدة', 'مولدوفا', 'موناكو', 'منغوليا',
541
- # 'مونتينيغرو', 'مونتسرات', 'المغرب', 'موزمبيق', 'ميانمار', 'ناميبيا', 'ناورو', 'نيبال', 'هولندا',
542
- # 'جزر الأنتيل الهولندية', 'كاليدونيا الجديدة', 'نيوزيلندا', 'نيكاراغوا', 'النيجر', 'نيجيريا',
543
- # 'نييوي', 'جزيرة نورفولك', 'كوريا الشمالية', 'مقدونيا الشمالية', 'جزر ماريانا الشمالية', 'النرويج',
544
- # 'سلطنة عمان', 'باكستان', 'بالاو', 'فلسطين', 'بنما', 'بابوا غينيا الجديدة', 'باراغواي', 'بيرو',
545
- # 'الفلبين', 'جزر بيتكيرن', 'بولندا', 'البرتغال', 'بورتوريكو', 'قطر', 'ريونيون', 'رومانيا', 'روسيا',
546
- # 'رواندا', 'سان بارتيلمي', 'سانت هيلينا', 'سانت كيتس ونيفيس', 'سانت لوسيا', 'سانت مارتن',
547
- # 'سان بيير وميكلون', 'سانت فينسنت والغرينادين', 'ساموا', 'سان مارينو', 'ساو تومي وبرينسيب',
548
- # 'السعودية', 'السنغال', 'صربيا', 'سيشل', 'سيراليون', 'سنغافورة', 'سانت مارتن', 'سلوفاكيا',
549
- # 'سلوفينيا', 'جزر سليمان', 'الصومال', 'جنوب أفريقيا', 'جورجيا الجنوبية وجزر ساندويتش الجنوبية',
550
- # 'كوريا الجنوبية', 'جنوب السودان', 'إسبانيا', 'سريلانكا', 'السودان', 'سورينام',
551
- # 'سفالبارد ويان ماين', 'السويد', 'سويسرا', 'سوريا', 'تايوان', 'طاجيكستان', 'تنزانيا', 'تايلاند',
552
- # 'تيمور الشرقية', 'توجو', 'توكيلاو', 'تونغا', 'ترينيداد وتوباغو', 'تونس', 'تركيا', 'تركمانستان',
553
- # 'جزر توركس وكايكوس', 'توفالو', 'جزر الولايات المتحدة الصغيرة النائية', 'جزر العذراء الأمريكية',
554
- # 'أوغندا', 'أوكرانيا', 'الإمارات العربية المتحدة', 'المملكة المتحدة', 'الولايات المتحدة الأمريكية',
555
- # 'أوروغواي', 'أوزبكستان', 'فانواتو', 'مدينة الفاتيكان', 'فنزويلا', 'فيتنام', 'واليس وفوتونا',
556
- # 'الصحراء الغربية', 'اليمن', 'زامبيا', 'زيمبابوي', 'اردني', 'اردنی', 'سریلانکا', 'پاکستان',
557
- # 'بيكور', 'ایران', 'المهلة']
558
-
559
- # arabic_keywords_to_remove = [
560
- # "الرقم الشخصي", "تاريخ الميلاد", "الصلاحية", "لدولة", "الجنسية", "دولة قطر", "رخصة إقامة", "المهنة", "الإسم",
561
- # "بطاقة", "إثبات", "شخصية", "ـلـة قـ", "ـة", "سلاحية"
562
- # ]
563
-
564
- # filtered_lines = []
565
- # for line in cleaned_lines:
566
- # match_score = get_fuzzy_match_score(line, arabic_keywords_to_remove)
567
- # match_score1 = get_fuzzy_match_score(line, countries_list)
568
-
569
- # if match_score or match_score1:
570
- # score = match_score if match_score else match_score1
571
- # elif not any(pattern.search(line) for pattern in compiled_patterns):
572
- # filtered_lines.append(line)
573
-
574
- # occupation, occupation_en = '', ''
575
-
576
- # front_data = {
577
- # "nationality": nationality_iso,
578
- # "id_number": id_number,
579
- # "dob": dob,
580
- # "expiry_date": expiry,
581
- # "occupation": occupation,
582
- # "occupation_en": occupation_en
583
- # }
584
-
585
- # try:
586
- # if extract_names:
587
- # prompt = f"""Please extract the following information from the text and provide it in a structured dictionary format: {{'occupation': 'abc', 'occupation_en': 'abc', 'nationality': 'XXX', 'name': 'FULL NAME', 'first_name': 'FIRST', 'last_name': 'LAST', 'name_ar': 'ARABIC NAME'}}
588
- # For the name fields:
589
- # - Extract the full name in English and split it into first and last name
590
- # - Extract the full name in Arabic (name_ar)
591
- # For occupation:
592
- # - Extract in both Arabic and English
593
- # For nationality:
594
- # - Provide the ISO 3166-1 alpha-3 country code
595
- # Here's the text: {ar_front_data}"""
596
- # else:
597
- # prompt = f"""Please extract the occupation and nationality(ISO 3166-1 alpha-3 country code) from the following text and provide it in a structured dictionary format: {{'occupation': 'abc', 'occupation_en': 'abc', 'nationality': 'XXX'}}
598
- # So you will get occupation in arabic and translate it into english as well and send it as part of your response. The results should always be a dictionary with only 3 keys as mentioned above and nothing else. Here's the text for your task: {ar_front_data}"""
599
-
600
- # response = make_api_request_with_retries(prompt)
601
-
602
- # if response.get('occupation', ''):
603
- # front_data['occupation'] = response['occupation']
604
-
605
- # if response.get('occupation_en', ''):
606
- # front_data['occupation_en'] = response['occupation_en']
607
-
608
- # if extract_names:
609
- # if response.get('name', ''):
610
- # front_data['name'] = response['name']
611
- # if response.get('first_name', ''):
612
- # front_data['first_name'] = response['first_name']
613
- # if response.get('last_name', ''):
614
- # front_data['last_name'] = response['last_name']
615
- # if response.get('name_ar', ''):
616
- # front_data['name_ar'] = response['name_ar']
617
-
618
- # if front_data.get('occupation_en', ''):
619
- # if front_data['occupation_en'].lower() in ['not available', 'unspecified', 'not specified',
620
- # 'not provided'] or front_data[
621
- # 'occupation_en'].lower().startswith('director of nationality'):
622
- # front_data['occupation'], front_data['occupation_en'] = '', ''
623
-
624
- # except Exception as e:
625
- # print(f"Error in processing the extracted data: {e}")
626
- # front_data['occupation'], front_data['occupation_en'] = '', ''
627
-
628
- # return front_data
629
-
630
-
631
- # def qatar_front_id_extraction(client, image_data, front_id_text, front_id_text_description, openai_key):
632
- # # cropped_id_card, third_part, third_part_text = detect_id_card(client, image_data, front_id_text, part='third')
633
- # # front_data = extract_name_fields_from_cropped_part(third_part_text.replace("\n", ""))
634
- # try:
635
- # english_name_raw = extract_name_line(front_id_text_description)
636
- # if not english_name_raw:
637
- # return {'error': 'covered_photo', 'error_details': 'English name not found in OCR'}
638
-
639
-
640
- # result = qatar_id_info_chain(front_id_text_description, openai_key)
641
-
642
- # from idvpackage.genai_utils import is_age_less_than_100, is_age_18_above
643
- # age_check = is_age_less_than_100(result.get('dob', ''))
644
- # if not age_check:
645
- # return {'error': 'dob_glare'}
646
- # if age_check == 'invalid_format':
647
- # return {'error':'dob_glare'}
648
-
649
- # age_check_2 = is_age_18_above(result.get('dob', ''))
650
- # if age_check_2=='invalid_format':
651
- # return {'error':'dob_glare'}
652
-
653
-
654
- # name = result.get("name", "")
655
- # name_parts = name.split()
656
- # first_name = name_parts[0]
657
- # last_name = name_parts[-1]
658
-
659
- # front_data = {
660
- # 'name': name,
661
- # 'first_name': first_name,
662
- # 'last_name': last_name,
663
- # 'name_ar': result.get('name_ar', ''),
664
- # 'nationality': result.get('nationality', ''),
665
- # 'id_number': result.get('id_number', ''),
666
- # 'dob': result.get('dob', ''),
667
- # 'expiry_date': result.get('expiry_date', ''),
668
- # 'occupation': result.get('occupation', ''),
669
- # 'occupation_en': result.get('occupation_en', '')
670
- # }
671
-
672
-
673
- # except Exception as e:
674
- # return {'error': 'covered_photo', 'error_details': f'Exception Thrown {e}'}
675
- # # if 'error' in front_data.keys():
676
- # # return front_data
677
- # # if not front_data.get('name', '') or not front_data.get('first_name', '') or not front_data.get('last_name', '') or len(front_data.get('name', '').split(' ')) < 2:
678
- # # front_data_temp = extract_name_fields_from_cropped_part(front_id_text_description)
679
- # # front_data['name'] = front_data_temp.get('name', '')
680
- # # front_data['first_name'] = front_data_temp.get('first_name', '')
681
- # # front_data['last_name'] = front_data_temp.get('last_name', '') if len(front_data_temp.get('last_name', ''))>1 else ''
682
- # #
683
- # # name_ar = extract_name_ar(front_id_text_description)
684
- # # if name_ar:
685
- # # front_data["name_ar"] = name_ar
686
- # # else:
687
- # # front_data["name_ar"] = ''
688
-
689
- # # # Check if we need to extract names using GPT
690
- # # need_name_extraction = not front_data.get('name', '') or not front_data.get('first_name', '') or not front_data.get('last_name', '') or not front_data.get('name_ar', '') or len(front_data.get('name', '').split(' ')) < 2
691
- # #
692
- # # numeric_fields = extract_numeric_fields_from_raw(front_id_text_description, third_part_text, front_data.get('name', ''), extract_names=need_name_extraction)
693
- # #
694
- # # #If names were extracted via GPT, update front_data with the new values
695
- # # if need_name_extraction:
696
- # # if numeric_fields.get('name', ''):
697
- # # front_data['name'] = numeric_fields['name']
698
- # # if numeric_fields.get('first_name', ''):
699
- # # front_data['first_name'] = numeric_fields['first_name']
700
- # # if numeric_fields.get('last_name', ''):
701
- # # front_data['last_name'] = numeric_fields['last_name']
702
- # # if numeric_fields.get('name_ar', ''):
703
- # # front_data['name_ar'] = numeric_fields['name_ar']
704
- # #
705
- # # #Update the rest of the fields
706
- # # front_data.update({k: v for k, v in numeric_fields.items() if k not in ['name', 'first_name', 'last_name', 'name_ar']})
707
-
708
- # if not front_data.get('expiry_date', ''):
709
- # try:
710
- # # Find all dates in dd-mm-yyyy format
711
- # date_pattern = r'\d{2}-\d{2}-\d{4}'
712
- # dates = re.findall(date_pattern, front_id_text_description)
713
-
714
- # if dates:
715
- # # Convert strings to datetime objects
716
- # date_objects = []
717
- # for date_str in dates:
718
- # try:
719
- # date_obj = datetime.strptime(date_str, '%d-%m-%Y')
720
- # date_objects.append(date_obj)
721
- # except ValueError:
722
- # continue
723
-
724
- # if date_objects:
725
- # # Get the latest date as expiry
726
- # max_date = max(date_objects)
727
- # front_data['expiry_date'] = max_date.strftime('%d-%m-%Y')
728
- # else:
729
- # front_data['expiry_date'] = ''
730
- # else:
731
- # front_data['expiry_date'] = ''
732
- # except Exception as e:
733
- # print(f"Error extracting expiry date: {e}")
734
- # front_data['expiry_date'] = ''
735
- # return front_data
736
-
737
-
738
- # def qatar_front_id_extraction_old(client, image_data, front_id_text, front_id_text_description):
739
- # cropped_id_card, third_part, third_part_text = detect_id_card(client, image_data, front_id_text, part='third')
740
- # front_data = extract_name_fields_from_cropped_part(third_part_text.replace("\n", ""))
741
- # if not front_data.get('name', '') or not front_data.get('first_name', '') or not front_data.get('last_name',
742
- # '') or len(
743
- # front_data.get('name', '').split(' ')) < 2:
744
- # front_data_temp = extract_name_fields_from_cropped_part(front_id_text_description)
745
- # front_data['name'] = front_data_temp.get('name', '')
746
- # front_data['first_name'] = front_data_temp.get('first_name', '')
747
- # front_data['last_name'] = front_data_temp.get('last_name', '') if len(
748
- # front_data_temp.get('last_name', '')) > 1 else ''
749
-
750
- # name_ar = extract_name_ar(front_id_text_description)
751
- # if name_ar:
752
- # front_data["name_ar"] = name_ar
753
- # else:
754
- # front_data["name_ar"] = ''
755
-
756
- # # Check if we need to extract names using GPT
757
- # need_name_extraction = not front_data.get('name', '') or not front_data.get('first_name', '') or not front_data.get(
758
- # 'last_name', '') or not front_data.get('name_ar', '') or len(front_data.get('name', '').split(' ')) < 2
759
-
760
- # numeric_fields = extract_numeric_fields_from_raw(front_id_text_description, third_part_text,
761
- # front_data.get('name', ''), extract_names=need_name_extraction)
762
-
763
- # # If names were extracted via GPT, update front_data with the new values
764
- # if need_name_extraction:
765
- # if numeric_fields.get('name', ''):
766
- # front_data['name'] = numeric_fields['name']
767
- # if numeric_fields.get('first_name', ''):
768
- # front_data['first_name'] = numeric_fields['first_name']
769
- # if numeric_fields.get('last_name', ''):
770
- # front_data['last_name'] = numeric_fields['last_name']
771
- # if numeric_fields.get('name_ar', ''):
772
- # front_data['name_ar'] = numeric_fields['name_ar']
773
-
774
- # # Update the rest of the fields
775
- # front_data.update(
776
- # {k: v for k, v in numeric_fields.items() if k not in ['name', 'first_name', 'last_name', 'name_ar']})
777
-
778
- # if not front_data.get('expiry_date', ''):
779
- # try:
780
- # # Find all dates in dd-mm-yyyy format
781
- # date_pattern = r'\d{2}-\d{2}-\d{4}'
782
- # dates = re.findall(date_pattern, front_id_text_description)
783
-
784
- # if dates:
785
- # # Convert strings to datetime objects
786
- # date_objects = []
787
- # for date_str in dates:
788
- # try:
789
- # date_obj = datetime.strptime(date_str, '%d-%m-%Y')
790
- # date_objects.append(date_obj)
791
- # except ValueError:
792
- # continue
793
-
794
- # if date_objects:
795
- # # Get the latest date as expiry
796
- # max_date = max(date_objects)
797
- # front_data['expiry_date'] = max_date.strftime('%d-%m-%Y')
798
- # else:
799
- # front_data['expiry_date'] = ''
800
- # else:
801
- # front_data['expiry_date'] = ''
802
- # except Exception as e:
803
- # print(f"Error extracting expiry date: {e}")
804
- # front_data['expiry_date'] = ''
805
-
806
- # return front_data
807
-
808
-
809
- # def extract_employer_from_back(data, passport_number, passport_date, serial_no):
810
- # patterns_to_remove = [r"\b[a-zA-Z0-9]+\b",
811
- # r"توقع حامل البطاقة",
812
- # r"مدير عام الجنسية والمنافذ وشؤون الوالدين",
813
- # r"المستقدم", r"توقع", r"حامل", r"البطاقة", r"مدير", r"عام", r"الإدارة",
814
- # r"الجوازات", r"مدير عام الجنسية والمناقة وشؤون الوافدين",
815
- # r"صل", r"تاريخ النهاء الجواز", r"تاريخ", r"الجواز", r"البطاقة", r"توقع حامل البطاقة",
816
- # r"رق[ـم]* ج[ـوا]*ز السفر", r"تاريخ انتهاء ?الجواز", r"الرقم المسلسل",
817
- # r"ن[ـو]*ع الرخص[ـة]*", r"مدير عام الإدارة العامة( للجوازات| الجورت)?",
818
- # r"عمل",
819
- # r"الارة البا",
820
- # r"وزارة الله",
821
- # r"مدير عام الجنسية والمنافذ وشؤون الوافدين",
822
- # r"مدير إدارة الجنسية و وثائق السفر",
823
- # r"العنوان منطقة",
824
- # r"General Director of Nationality",
825
- # r"Borders & Expatriates Affairs",
826
- # r"Passport expiry date",
827
- # r"تاریخ انتهاء الجواز",
828
- # r"Drectorate of Passports",
829
- # r"Directorate of Passports",
830
- # r"Holder's Signature",
831
- # r"Authority's signature",
832
- # r"Residericy Type",
833
- # r"ترفيع حامل البطاقة", r"توقيع حامل البطاقة", r"passport_number|passport_date|serial_no",
834
- # r"Holder's signature", r"Passport Number", r"Passport Expiry",
835
- # r"Serial No", r"Residency Type", r"Employer", r"Directorate of Passports",
836
- # r"General Director of the General", re.escape(passport_number),
837
- # re.escape(passport_date), re.escape(serial_no), r":",
838
- # ]
839
-
840
- # if 'employer' not in data.lower() or 'passport' not in data.lower():
841
- # employer = ''
842
- # return employer
843
-
844
- # # compiled_patterns = [re.compile(pattern) for pattern in patterns_to_remove]
845
- # compiled_patterns = [re.compile(pattern) for pattern in patterns_to_remove if pattern.strip()]
846
- # data = data.replace("Employer", "").replace("Employe", "").replace("المستقدم :", "").replace("المستقدم", "")
847
-
848
- # address_keywords = ["العنوان", "منطقة", "شارع"]
849
- # lines = [
850
- # line.strip() for line in data.split("\n")
851
- # if line.strip() and not any(keyword in line for keyword in address_keywords)
852
- # ]
853
-
854
- # filtered_lines = []
855
- # for line in lines:
856
- # matched = False
857
- # for pattern in compiled_patterns:
858
- # if pattern.search(line):
859
- # # print(f'Pattern: {pattern.pattern} matched line: {line}')
860
- # matched = True
861
- # break
862
-
863
- # if not matched:
864
- # filtered_lines.append(line)
865
-
866
- # # print(f'FILTERED LINES: {filtered_lines}\n')
867
-
868
- # lines = [re.sub(r'[A-Za-z0-9]', '', i) for i in filtered_lines]
869
-
870
- # # print(f'FILTERED LINES 2: {lines}\n')
871
-
872
- # try:
873
- # employer = max(lines, key=len)
874
- # except:
875
- # employer = ''
876
-
877
- # if employer:
878
- # employer.strip().replace("'", '')
879
- # else:
880
- # employer = ''
881
-
882
- # return employer
883
-
884
-
885
- # def qatar_back_id_extraction(back_id_text_description):
886
- # serial_no_pattern = r"\b\d{14}\b|\b[A-Za-z0-9]{13,16}\b"
887
- # passport_no_pattern = r"([A-Za-z]\d{8}|[A-Za-z]{2}\d{7}|[A-Za-z]\d{7}|[A-Za-z]\d{6})"
888
- # # emp_pattern = r'Employer:\s*([\w\s.]+.)\n\b'
889
-
890
- # serial_no_match = re.search(serial_no_pattern, back_id_text_description, re.IGNORECASE)
891
-
892
- # try:
893
- # if serial_no_match:
894
- # serial_no = serial_no_match.group(0)
895
- # else:
896
- # serial_no = serial_no_match.group(1)
897
- # except:
898
- # serial_no = ''
899
-
900
- # passport_no_match = re.search(passport_no_pattern, back_id_text_description, re.IGNORECASE)
901
- # if passport_no_match:
902
- # passport_no = passport_no_match.group(0)
903
- # else:
904
- # passport_no = ''
905
-
906
- # dates = sort_dates_by_datetime(re.findall(r'\d{2}/\d{2}/\d{4}', back_id_text_description))
907
- # passport_expiry = dates[0] if dates else ''
908
-
909
- # try:
910
- # back_id_text_description_original = back_id_text_description
911
- # if 'Name' in back_id_text_description:
912
- # back_id_text_description = back_id_text_description.split("Serial")[1]
913
-
914
- # employer = extract_employer_from_back(back_id_text_description, passport_no, passport_expiry, serial_no)
915
- # # print(f'Employer here 1: {employer}\n')
916
-
917
- # if employer is None or employer == '':
918
- # back_id_text_description_splitted_2 = back_id_text_description_original.split("Name")[1]
919
- # employer = extract_employer_from_back(back_id_text_description_splitted_2, passport_no, passport_expiry,
920
- # serial_no)
921
- # # print(f'Employer here 2: {employer}\n')
922
-
923
- # if not is_arabic(employer):
924
- # employer = extract_employer_from_back(back_id_text_description, passport_no, passport_expiry, serial_no)
925
- # # print(f'Employer here 3: {employer}\n')
926
- # except:
927
- # try:
928
- # employer = extract_employer_from_back(back_id_text_description, passport_no, passport_expiry, serial_no)
929
- # # print(f'Employer here 4: {employer}\n')
930
- # except:
931
- # employer = ''
932
-
933
- # employer_en = ''
934
- # if employer:
935
- # try:
936
- # employer_en = GoogleTranslator(dest='en').translate(employer)
937
- # if employer_en and (employer_en.startswith('Director of the Nationality') or employer_en.startswith(
938
- # 'Director of Nationality') or employer_en.startswith('Director General')) or employer_en == None:
939
- # employer, employer_en = '', ''
940
- # except:
941
- # pass
942
-
943
- # back_data = {
944
- # "passport_number": passport_no,
945
- # "passport_expiry": passport_expiry,
946
- # "card_number": serial_no,
947
- # "employer": str(employer),
948
- # "employer_en": employer_en,
949
- # "issuing_country": "QAT"
950
- # }
951
-
952
- # return back_data
953
1
 
954
2
 
955
3
  import base64
956
4
  import time
957
5
  from io import BytesIO
958
- from typing import Optional
6
+
959
7
  import cv2
960
8
 
961
9
  from openai import OpenAI
@@ -1016,7 +64,7 @@ Instructions:
1016
64
 
1017
65
  class QatarFront(BaseModel):
1018
66
 
1019
- id_number: str = Field(...,
67
+ id_number: str = Field(...,min_length=9, max_length=11,
1020
68
  description = "The ID number exactly as shown on the card (preserve original format)",
1021
69
  )
1022
70
 
@@ -1075,7 +123,7 @@ class QatarBack(BaseModel):
1075
123
  )
1076
124
 
1077
125
  passport_number: str = Field(...,
1078
- description = "Passport number extract exactly as written on the card ex: EA0605652"
126
+ description = "Passport number extract exactly as written on the card ex: EA0605652."
1079
127
  )
1080
128
 
1081
129
  passport_expiry: str = Field(...,
@@ -1166,7 +214,7 @@ def _image_to_jpeg_bytesio(image) -> BytesIO:
1166
214
 
1167
215
  def get_response_from_openai_qat(image, side, country, openai_key):
1168
216
 
1169
- logging.info("Processing image for Qatari passport extraction OPENAI......")
217
+ logging.info("Processing image for Qatari NID extraction OPENAI......")
1170
218
  logging.info(f" and type: {type(image)}")
1171
219
  try:
1172
220
  image = _image_to_jpeg_bytesio(image)