idvpackage 3.0.10__py3-none-any.whl → 3.0.12__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,764 +0,0 @@
1
- from datetime import datetime, timedelta
2
- import re
3
- from google.cloud import vision_v1
4
- from googletrans import Translator
5
- from deep_translator import GoogleTranslator
6
- import io
7
- from PIL import Image
8
- import json
9
- import openai
10
- import time
11
-
12
- import json
13
- import openai
14
- import time
15
- translator = Translator()
16
- import base64
17
-
18
- def find_gender_from_back(text):
19
- gender = ""
20
- gender_pattern = r"(\d)([A-Za-z])(\d)"
21
- gender_match = re.search(gender_pattern, text)
22
- if gender_match:
23
- gender = gender_match.group(2)
24
-
25
- if not gender:
26
- gender_pattern = r"(\d)([MFmf])(\d)"
27
- gender_match = re.search(gender_pattern, text)
28
- if gender_match:
29
- gender = gender_match.group(2)
30
-
31
- return gender
32
-
33
-
34
- def func_common_dates(extract_no_space):
35
- dob = ""
36
- expiry_date = ""
37
- try:
38
- matches = re.findall(r"\d{2}/\d{2}/\d{4}", extract_no_space)
39
- y1 = matches[0][-4:]
40
- y2 = matches[1][-4:]
41
- if int(y1) < int(y2):
42
- dob = matches[0]
43
- expiry_date = matches[1]
44
- else:
45
- dob = matches[1]
46
- expiry_date = matches[0]
47
- except:
48
- dob = ""
49
- expiry_date = ""
50
-
51
- return dob, expiry_date
52
-
53
-
54
- def convert_dob(input_date):
55
- day = input_date[4:6]
56
- month = input_date[2:4]
57
- year = input_date[0:2]
58
-
59
- current_year = datetime.now().year
60
- current_century = current_year // 100
61
- current_year_last_two_digits = current_year % 100
62
-
63
- century = current_century
64
- # If the given year is greater than the last two digits of the current year, assume last century
65
- if int(year) > current_year_last_two_digits:
66
- century = current_century - 1
67
-
68
- final_date = f"{day}/{month}/{century}{year}"
69
-
70
- return final_date
71
-
72
-
73
- def func_expiry_date(extract):
74
- extract_no_space = extract.replace(" ", "")
75
- dob, expiry_date = func_common_dates(extract_no_space)
76
- if expiry_date == "":
77
- match_doe = re.findall(r"\d{7}[A-Z]{2,3}", extract_no_space)
78
- for i in match_doe:
79
- raw_doe = i[0:6]
80
- print(raw_doe)
81
- expiry_date = raw_doe[4:6] + "/" + raw_doe[2:4] + "/20" + raw_doe[0:2]
82
- try:
83
- dt_obj = datetime.strptime(expiry_date, "%d/%m/%Y")
84
- break
85
- except:
86
- expiry_date = ""
87
-
88
- return expiry_date
89
-
90
-
91
- def convert_expiry_date(input_date):
92
- day = input_date[4:6]
93
- month = input_date[2:4]
94
- year = input_date[0:2]
95
-
96
- current_year = datetime.now().year
97
- current_century = current_year // 100
98
- current_year_last_two_digits = current_year % 100
99
- century = current_century
100
-
101
- if int(year) <= current_year_last_two_digits:
102
- century = current_century
103
- else:
104
- century = current_century
105
- final_date = f"{day}/{month}/{century}{year}"
106
-
107
- return final_date
108
-
109
-
110
- def func_dob(extract):
111
- extract_no_space = extract.replace(" ", "")
112
- dob, expiry_date = func_common_dates(extract_no_space)
113
- if dob == "":
114
- match_dob = re.findall(r"\d{7}(?:M|F)\d", extract_no_space)
115
- for i in match_dob:
116
- # print(i)
117
- raw_dob = i[0:6]
118
- # print(raw_dob)
119
- year = str(datetime.today().year)[2:4]
120
- temp = "19"
121
- if int(raw_dob[0:2]) > int(year):
122
- temp = "19"
123
- else:
124
- temp = "20"
125
- dob = raw_dob[4:6] + "/" + raw_dob[2:4] + "/" + temp + raw_dob[0:2]
126
- try:
127
- dt_obj = datetime.strptime(dob, "%d/%m/%Y")
128
- break
129
- except:
130
- # print(f'invalid date {dob}')
131
- dob = ""
132
- else:
133
- pattern = r"\b(\d{14}).*?\b"
134
-
135
- new_dob_match = re.search(pattern, extract_no_space)
136
-
137
- if new_dob_match:
138
- new_dob = new_dob_match.group(1)
139
- new_dob = new_dob[:7]
140
- dob = convert_dob(new_dob)
141
-
142
- return dob
143
-
144
-
145
- def remove_special_characters_mrz2(string):
146
- # This pattern matches any character that is not a letter, digit, or space
147
- pattern = r"[^a-zA-Z0-9\s]"
148
- return re.sub(pattern, "", string)
149
-
150
-
151
- def count_digits(element):
152
- digits = [char for char in element if char.isdigit()]
153
- return len(digits)
154
-
155
-
156
- def sdn_back_id_extraction(back_id_data):
157
- mrz_pattern = (
158
- r"(IDSDN.*\n*.*\n*.*\n*.*|IDSDN.*\n*.*\n*.*\n*.*|IDSDN.*\n*.*\n*.*\n*.*)"
159
- )
160
- nationality_pattern = r"([A-Z]+)<<"
161
-
162
- mrz1, mrz2, mrz3 = "", "", ""
163
-
164
- try:
165
- mrz = re.findall(
166
- mrz_pattern, back_id_data.replace(" ", "").strip(), re.MULTILINE
167
- )
168
- mrz_str = mrz[0].replace(" ", "")
169
- except:
170
- mrz_str = ""
171
-
172
- if mrz_str:
173
- mrz_list = mrz_str.replace(" ", "").split("\n")
174
- try:
175
- mrz1 = mrz_list[0]
176
- if len(mrz_list) == 3:
177
- mrz1, mrz2, mrz3 = mrz_list[0], mrz_list[1], mrz_list[2]
178
- except:
179
- mrz1 = ""
180
-
181
- try:
182
- mrz2 = [
183
- ele
184
- for ele in [ele for ele in mrz_list if ele not in [mrz1, mrz3]]
185
- if remove_special_characters_mrz2(ele) != ""
186
- ]
187
- if len(mrz2) > 1:
188
- mrz2 = (
189
- max(mrz2, key=count_digits)
190
- + [ele for ele in mrz2 if ele != max(mrz2, key=count_digits)][0]
191
- )
192
-
193
- pattern = r"\d{7}[MF]\d{7}[\S]{3}<+?\d"
194
- mrz2_temp = re.search(pattern, mrz2.replace(">", ""))
195
- if mrz2_temp:
196
- mrz2 = mrz2_temp.group(0)
197
-
198
- mrz2 = mrz2.split("<")[0] + "<<<<<<<<<<" + mrz2.split("<")[-1]
199
-
200
- # mrz2=mrz2[0].split('<')[0]+'<<<<<<<<<<'+mrz2[-1][-1]
201
- else:
202
- mrz2 = mrz2[0].split("<")[0] + "<<<<<<<<<<" + mrz2[0][-1]
203
- except:
204
- mrz2 = ""
205
-
206
- ## condition to replace O with 0
207
- try:
208
- pattern = r"(IDSDN[A-Z]{1,2})O(?=[0-9])"
209
- replacement = lambda m: m.group(1) + "0"
210
- mrz1 = re.sub(pattern, replacement, mrz1)
211
- except:
212
- pass
213
-
214
- ## condition to replace '>' with 7
215
- if mrz2 and mrz2.endswith(">"):
216
- mrz2 = mrz2.split("<")[0] + "<<<<<<<<<<" + "7"
217
-
218
- if not mrz3 or (mrz3.startswith(">") or mrz3.startswith("<")):
219
- pattern = r"^[A-Za-z]+<+[A-Za-z]+.*$"
220
- matches = re.findall(pattern, mrz_str, re.MULTILINE)
221
- try:
222
- mrz3 = list(filter(None, matches))[0]
223
- except:
224
- try:
225
- matches = re.findall(pattern, back_id_data, re.MULTILINE)
226
- mrz3 = list(filter(None, matches))[0]
227
- except:
228
- mrz3 = ""
229
-
230
- ## condition to add filler to mrz3, making it total length of 30 chars
231
- if len(mrz3) < 30:
232
- mrz3 = mrz3.ljust(30, "<")
233
-
234
- try:
235
- dob = func_dob(mrz2)
236
- except:
237
- dob = ""
238
-
239
- if not dob:
240
- matches = re.findall(r"\d{4}/\d{2}/\d{2}", back_id_data)
241
- sorted_dates = sorted(matches)
242
- dob = sorted_dates[0]
243
-
244
- expiry = func_expiry_date(mrz_str)
245
- if not expiry:
246
- matches = re.findall(r"\d{4}/\d{2}/\d{2}", back_id_data)
247
- sorted_dates = sorted(matches)
248
- expiry = sorted_dates[-1]
249
-
250
- # issue date
251
- issue_date = "" # Initialize with default value
252
- try:
253
- matches = re.findall(r"\d{4}/\d{2}/\d{2}", back_id_data)
254
- sorted_dates = sorted(matches)
255
- if len(sorted_dates) > 2:
256
- issue_date = sorted_dates[1]
257
- except:
258
- pass
259
-
260
- try:
261
- nationality = mrz2.split("<")[0][-3:]
262
- except:
263
- nationality = ""
264
-
265
- if mrz3:
266
- full_name_mrz = mrz3.replace("<", " ").replace(">", " ").strip()
267
-
268
- else:
269
- full_name_mrz = ""
270
-
271
- try:
272
- pattern = r"(?<=Name: )\w+(?: \w+)*|(?<=Name )\w+(?: \w+)*"
273
-
274
- match = re.search(pattern, back_id_data, re.IGNORECASE)
275
- name = match.group(0) or match.group(1)
276
- except:
277
- try:
278
- pattern = r"(?<=NAME):*[ \n]*([A-Z ]+)"
279
-
280
- match = re.search(pattern, back_id_data, re.IGNORECASE)
281
- if match:
282
- name = match.group(1).strip().replace(":", "")
283
- else:
284
- name = ""
285
- except:
286
- name = ""
287
-
288
- if full_name_mrz and not name:
289
- name = (
290
- " ".join(full_name_mrz.split(" ")[1:]) + " " + full_name_mrz.split(" ")[0]
291
- if full_name_mrz
292
- else ""
293
- )
294
- name = name.strip()
295
-
296
- if name:
297
- first_name = name.split(" ")[0]
298
- last_name = name.split(" ")[-1]
299
- middle_name = " ".join(name.split(" ")[1:-1])
300
- else:
301
- first_name, last_name, middle_name = "", "", ""
302
-
303
- if "issue_date" not in locals():
304
- issue_date = ""
305
-
306
- try:
307
- dob_pattern = r"(\d+)[MF]"
308
- dob_match = re.search(dob_pattern, mrz2)
309
- dob_mrz = convert_dob(dob_match.group(1)) if dob_match else ""
310
-
311
- doe_pattern = r"[MF](\d+)"
312
- doe_match = re.search(doe_pattern, mrz2)
313
- expiry_date_mrz = convert_expiry_date(doe_match.group(1)) if doe_match else ""
314
- except:
315
- dob_mrz, expiry_date_mrz = "", ""
316
-
317
- gender = ""
318
- try:
319
- gender = find_gender_from_back(mrz2)
320
- except:
321
- gender = find_gender_from_back(back_id_data)
322
-
323
- mrz_str = f"{mrz1}\n{mrz2}\n{mrz3}"
324
-
325
- try:
326
- if expiry and not expiry_date_mrz:
327
- expiry_date_mrz = expiry
328
-
329
- if dob and not dob_mrz:
330
- dob_mrz = dob
331
- except:
332
- pass
333
-
334
- if issue_date == "":
335
- print(f"Calculating issue date....")
336
- from dateutil.relativedelta import relativedelta
337
-
338
- try:
339
- exp = datetime.strptime(expiry_date_mrz, "%d/%m/%Y")
340
- except:
341
- exp = datetime.strptime(expiry_date_mrz, "%d-%m-%Y")
342
-
343
- issue_date = exp - relativedelta(years=5) + timedelta(days=1)
344
- issue_date = issue_date.strftime("%d/%m/%Y")
345
-
346
- back_data_dict = {
347
- "mrz": [mrz_str],
348
- "mrz1": mrz1.replace("*", "<"),
349
- "mrz2": mrz2,
350
- "mrz3": mrz3,
351
- # "dob_generic": dob,
352
- # "full_name_mrz": full_name_mrz,
353
- "full_name_generic": name,
354
- "first_name": first_name,
355
- "middle_name": middle_name,
356
- "last_name": last_name,
357
- "issuing_country": "SDN",
358
- # "expiry_date_generic": expiry,
359
- "nationality": nationality,
360
- "dob_back": dob_mrz,
361
- "issue_date": issue_date,
362
- "expiry_date": expiry_date_mrz,
363
- "gender": gender,
364
- }
365
- if "gender" in back_data_dict:
366
- gender = back_data_dict["gender"].strip().upper()
367
- if gender == "F":
368
- back_data_dict["gender"] = "FEMALE"
369
- elif gender == "M":
370
- back_data_dict["gender"] = "MALE"
371
-
372
- return back_data_dict
373
-
374
-
375
- def crop_second_part(img):
376
- width, height = img.size
377
- half_width = width // 2
378
- second_part = img.crop((half_width, 0, width, height))
379
- return second_part
380
-
381
-
382
- def extract_text_from_image_data(client, image):
383
- """Detects text in the file."""
384
-
385
- with io.BytesIO() as output:
386
- image.save(output, format="PNG")
387
- content = output.getvalue()
388
-
389
- image = vision_v1.types.Image(content=content)
390
-
391
- response = client.text_detection(image=image)
392
- texts = response.text_annotations
393
-
394
- return texts[0].description
395
-
396
-
397
- def detect_id_card(client, image_data, id_text, part=None):
398
- if id_text:
399
- vertices = id_text[0].bounding_poly.vertices
400
- left = vertices[0].x
401
- top = vertices[0].y
402
- right = vertices[2].x
403
- bottom = vertices[2].y
404
-
405
- padding = 30
406
- left -= padding
407
- top -= padding
408
- right += padding
409
- bottom += padding
410
-
411
- # img = image_data
412
-
413
- with Image.open(io.BytesIO(image_data)) as img:
414
- id_card = img.crop((max(0, left), max(0, top), right, bottom))
415
- width, height = id_card.size
416
- if width < height:
417
- id_card = id_card.rotate(90, expand=True)
418
-
419
- part_text = id_text[0].description
420
- part_img = crop_second_part(id_card)
421
- part_text = extract_text_from_image_data(client, part_img)
422
-
423
- return part_text
424
-
425
-
426
- def extract_occupation(text):
427
- match = re.search(r"المهنة\s*([^\n]+)", text)
428
- if match:
429
- return match.group(1).strip().replace(":", "")
430
- else:
431
- match = re.search(r"المهن[ةــ]*\s*\n\s*([^\n]+)", text)
432
- if match:
433
- return (
434
- match.group(1)
435
- .replace(":", "")
436
- .replace("ــة", "")
437
- .replace("ة.", "")
438
- .replace("ة", "")
439
- .replace("العقــ", "")
440
- .replace("ـ", "")
441
- .strip()
442
- )
443
- else:
444
- return None
445
-
446
-
447
- def extract_occupation_from_text(part_text):
448
- lines = part_text.split("\n")
449
-
450
- arabic_number_pattern = re.compile(r"[\u0660-\u0669]+")
451
- final_occupation = ""
452
-
453
- for i in range(len(lines) - 1, 0, -1):
454
- if arabic_number_pattern.search(lines[i]):
455
- occupation = lines[i - 1].strip()
456
- occupation = re.sub(r"\d+", "", occupation)
457
- occupation = re.sub(r"[A-Za-z]+", "", occupation)
458
- if occupation and occupation in [
459
- "العنــ",
460
- "الغد",
461
- "المهنــ",
462
- "العنوان",
463
- "العلب",
464
- "العنب",
465
- "ـوان",
466
- "العيد",
467
- "العز",
468
- "العن",
469
- "العد",
470
- ]:
471
- search_key = lines.index(occupation)
472
- final_occupation = lines[search_key - 1]
473
- break
474
- else:
475
- final_occupation = occupation
476
-
477
- return (
478
- final_occupation.replace(":", "")
479
- .replace("المهنة", "")
480
- .replace("ــة", "")
481
- .replace("ة.", "")
482
- .replace("ة", "")
483
- .replace("العقــ", "")
484
- .replace("ـ", "")
485
- .strip()
486
- if final_occupation
487
- else ""
488
- )
489
-
490
-
491
- def extract_occupation_v2(client, image_data, texts):
492
- part_text = detect_id_card(client, image_data, texts)
493
- occupation_res = extract_occupation_from_text(part_text)
494
- if not occupation_res:
495
- occupation_res = extract_occupation_from_text(texts[0].description)
496
-
497
- if occupation_res in ["الرقم الوطني"] or not occupation_res:
498
- occupation_res = extract_occupation(part_text)
499
- if not occupation_res:
500
- occupation_res = extract_occupation(texts[0].description)
501
-
502
- return occupation_res
503
-
504
-
505
- def extract_place_of_birth(text):
506
- match = re.search(r"مكان الميلاد\s*([^\n]+)|مكان الميادد\s*([^\n]+)", text)
507
- if match:
508
- return (
509
- match.group(1).strip().replace(":", "")
510
- if match.group(1) is not None
511
- else match.group(2).strip().replace(":", "")
512
- )
513
- return None
514
-
515
-
516
- def extract_dob(text):
517
- dob = ""
518
- extract_no_space = text.replace(" ", "")
519
- try:
520
- matches = re.findall(r"\d{4}/\d{2}/\d{2}", extract_no_space)
521
- dob = matches[0]
522
- except:
523
- dob = ""
524
- return dob
525
-
526
-
527
- def extract_name_from_front(text, dob):
528
- name = ""
529
- lines = text.split("\n")
530
- for line in lines:
531
- if dob in line:
532
- search_key = lines.index(line)
533
- part_name = lines[search_key - 1]
534
- if part_name:
535
- part_name = re.sub(r"\d+", "", part_name)
536
- part_name = re.sub(r"[A-Za-z]+", "", part_name)
537
- name = (
538
- part_name.replace("الإســــــــــــم", "")
539
- .replace("الاسم", "")
540
- .replace("الإسم", "")
541
- .replace("ـم", "")
542
- .replace(":", "")
543
- .strip()
544
- )
545
- if (
546
- name
547
- in [
548
- "تاريخ الميلاد",
549
- "الإســ",
550
- "الإسـ",
551
- "الإس",
552
- "الإصـ",
553
- "تاریخ",
554
- "الرقم الوطني",
555
- ]
556
- or len(name.split(" ")) <= 2
557
- ):
558
- name = lines[search_key - 2]
559
- if name:
560
- name = (
561
- name.replace("الإســــــــــــم", "")
562
- .replace("الاسم", "")
563
- .replace("الإسم", "")
564
- .replace("ـم", "")
565
- .replace(":", "")
566
- .strip()
567
- )
568
- name = re.sub(r"\d+", "", name)
569
- name = re.sub(r"[A-Za-z]+", "", name)
570
- break
571
- return name.strip()
572
-
573
-
574
- def sdn_front_id_extraction(
575
- client, ar_front_id_data, image_data, texts, compressed_image
576
- ):
577
- try:
578
- front_id_data = translator.translate(ar_front_id_data, src="ar", dest="en").text
579
- except Exception as e:
580
- front_id_data = GoogleTranslator("ar", "en").translate(ar_front_id_data)
581
-
582
- id_number_pattern = r"\b\d{11}\b"
583
-
584
- id_number_match = re.search(
585
- id_number_pattern, front_id_data.replace(" ", ""), re.IGNORECASE
586
- )
587
- if id_number_match:
588
- id_number = id_number_match.group(0)
589
- else:
590
- try:
591
- id_number_match = re.search(
592
- id_number_pattern, ar_front_id_data.replace(" ", ""), re.IGNORECASE
593
- )
594
- id_number = id_number_match.group(0)
595
- except:
596
- id_number = ""
597
-
598
- try:
599
- occupation = extract_occupation_v2(client, image_data, texts)
600
- if occupation:
601
- try:
602
- occupation_en = GoogleTranslator(dest="en").translate(occupation)
603
- except:
604
- occupation_en = ""
605
- else:
606
- occupation, occupation_en = "", ""
607
- except:
608
- occupation, occupation_en = "", ""
609
-
610
- try:
611
- place_of_birth = extract_place_of_birth(ar_front_id_data)
612
- if place_of_birth:
613
- try:
614
- place_of_birth_en = GoogleTranslator(dest="en").translate(
615
- place_of_birth
616
- )
617
- except:
618
- place_of_birth_en = ""
619
- else:
620
- place_of_birth, place_of_birth_en = "", ""
621
- except:
622
- place_of_birth, place_of_birth_en = "", ""
623
-
624
- try:
625
- dob = extract_dob(ar_front_id_data)
626
- except:
627
- dob = ""
628
-
629
- try:
630
- full_name = extract_name_from_front(ar_front_id_data, dob)
631
- if full_name:
632
- if "مكان الميلاد" in full_name:
633
- lines = ar_front_id_data.split("\n")
634
- search_pos = lines.index(full_name)
635
- full_name = lines[search_pos - 1]
636
- else:
637
- full_name
638
- except:
639
- full_name
640
-
641
- front_data_dict = {
642
- "id_number": id_number,
643
- "occupation_ar": occupation,
644
- "occupation_en": occupation_en,
645
- "occupation": occupation_en,
646
- "place_of_birth": place_of_birth,
647
- "place_of_birth_en": place_of_birth_en,
648
- "dob": dob,
649
- "name_ar": full_name,
650
- }
651
-
652
- empty_string_keys = [key for key, value in front_data_dict.items() if value == ""]
653
- if empty_string_keys:
654
- prompt = (
655
- """
656
- From the provided text: " %s ", extract and structure the following fields as a dictionary:
657
-
658
- - 'id_number': The ID number (e.g., national ID, passport number, etc.)
659
- - 'occupation_ar': The occupation in Arabic
660
- - 'occupation_en': The occupation in English
661
- - 'place_of_birth': The place of birth in Arabic
662
- - 'place_of_birth_en': The place of birth in English
663
- - 'dob': The date of birth (in the format YYYY-MM-DD or any standard date format provided)
664
- - 'name_ar': The full name in Arabic
665
-
666
- The response should STRICTLY follow this format:
667
- {
668
- "id_number": "<value>",
669
- "occupation_ar": "<value>",
670
- "occupation_en": "<value>",
671
- "place_of_birth": "<value>",
672
- "place_of_birth_en": "<value>",
673
- "dob": "<value>",
674
- "name_ar": "<value>"
675
- }
676
- Ensure that all values are accurately extracted and formatted. If a value is missing, return `null` for that field.
677
-
678
- Example:
679
- {
680
- "id_number": "12345678901",
681
- "occupation_ar": "مهندس",
682
- "occupation_en": "Engineer",
683
- "place_of_birth": "الرياض",
684
- "place_of_birth_en": "Riyadh",
685
- "dob": "1990-05-15",
686
- "name_ar": "محمد بن أحمد"
687
- }
688
- """
689
- % front_id_data
690
- )
691
-
692
- start = time.time()
693
- front_data_dict = get_openai_response_with_retries(
694
- prompt=prompt, compressed_image=compressed_image
695
- )
696
- end = time.time() - start
697
- print(f"Openai api call took an additional {end}s")
698
- front_data_dict["occupation"] = front_data_dict["occupation_en"]
699
-
700
- return front_data_dict
701
-
702
-
703
- def get_openai_response_with_retries(
704
- max_retries=3, prompt="", delay_seconds: float = 2, compressed_image=""
705
- ):
706
- img_bytes = compressed_image.getvalue()
707
-
708
- # Encode the bytes to base64
709
- img_base64_bytes = base64.b64encode(img_bytes).decode("utf-8")
710
- for attempt in range(max_retries):
711
- try:
712
- response = openai.ChatCompletion.create(
713
- model="gpt-4.1-nano",
714
- temperature=0.4,
715
- max_tokens=2000,
716
- messages=[
717
- {
718
- "role": "user",
719
- "content": [
720
- {"type": "text", "text": prompt},
721
- {
722
- "type": "image_url",
723
- "image_url": {
724
- "url": f"data:image/jpeg;base64,{img_base64_bytes}",
725
- },
726
- },
727
- ],
728
- }
729
- ],
730
- )
731
-
732
- result = response.choices[0].message.content
733
-
734
- try:
735
- return json.loads(result)
736
- except json.JSONDecodeError:
737
- try:
738
- json_match = re.search(
739
- r"```(json|python|plaintext)?\s*(.*?)\s*```|\s*({.*?})",
740
- result,
741
- re.DOTALL,
742
- )
743
- if json_match:
744
- json_str = json_match.group(2) or json_match.group(3)
745
- try:
746
- return json.loads(json_str)
747
- except:
748
- return eval(json_str.replace("'", '"'))
749
- except Exception as e:
750
- return {
751
- "error": "GPT's response incorrectly formatted.",
752
- "error_details": e,
753
- }
754
-
755
- except Exception as e:
756
- print(
757
- f"Error during API request (attempt {attempt + 1} of {max_retries}): {str(e)}"
758
- )
759
-
760
- if attempt < max_retries - 1:
761
- time.sleep(delay_seconds)
762
-
763
- else:
764
- raise Exception(f"Max retries exceeded. Last error: {str(e)}")