idvpackage 3.0.9__py3-none-any.whl → 3.0.11__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- idvpackage/common.py +118 -140
- idvpackage/jor_passport_extraction.py +0 -257
- idvpackage/ocr.py +140 -493
- idvpackage/ocr_utils.py +2 -1
- {idvpackage-3.0.9.dist-info → idvpackage-3.0.11.dist-info}/METADATA +1 -1
- {idvpackage-3.0.9.dist-info → idvpackage-3.0.11.dist-info}/RECORD +9 -9
- {idvpackage-3.0.9.dist-info → idvpackage-3.0.11.dist-info}/WHEEL +0 -0
- {idvpackage-3.0.9.dist-info → idvpackage-3.0.11.dist-info}/licenses/LICENSE +0 -0
- {idvpackage-3.0.9.dist-info → idvpackage-3.0.11.dist-info}/top_level.txt +0 -0
idvpackage/common.py
CHANGED
|
@@ -780,196 +780,174 @@ def load_and_process_image_deepface_topup(image_input):
|
|
|
780
780
|
|
|
781
781
|
|
|
782
782
|
def load_and_process_image_deepface(image_input, country=None):
|
|
783
|
-
DeepFace = get_deepface()
|
|
784
|
-
face_recognition = get_face_recognition()
|
|
783
|
+
DeepFace = get_deepface()
|
|
784
|
+
face_recognition = get_face_recognition()
|
|
785
|
+
|
|
786
|
+
CONFIDENCE_THRESHOLD = 0.90 if country == "SDN" else 0.97
|
|
787
|
+
|
|
785
788
|
def process_angle(img, angle):
|
|
789
|
+
img_to_process = None
|
|
790
|
+
img_rgb = None
|
|
791
|
+
img_pil = None
|
|
792
|
+
rotated = None
|
|
793
|
+
|
|
786
794
|
try:
|
|
787
|
-
#
|
|
795
|
+
# Rotate only if needed
|
|
788
796
|
if angle != 0:
|
|
789
|
-
|
|
790
|
-
|
|
791
|
-
|
|
792
|
-
img_pil =
|
|
793
|
-
|
|
794
|
-
|
|
795
|
-
img_to_process = cv2.cvtColor(rotated, cv2.COLOR_RGB2BGR)
|
|
796
|
-
# Clear references to intermediate arrays
|
|
797
|
-
del img_rgb, img_pil, rotated
|
|
797
|
+
img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
|
|
798
|
+
img_pil = Image.fromarray(img_rgb)
|
|
799
|
+
rotated = np.ascontiguousarray(
|
|
800
|
+
img_pil.rotate(angle, expand=True)
|
|
801
|
+
)
|
|
802
|
+
img_to_process = cv2.cvtColor(rotated, cv2.COLOR_RGB2BGR)
|
|
798
803
|
else:
|
|
799
804
|
img_to_process = img
|
|
800
805
|
|
|
801
|
-
# Extract faces with memory optimization
|
|
802
806
|
face_objs = DeepFace.extract_faces(
|
|
803
807
|
img_to_process,
|
|
804
|
-
detector_backend=
|
|
808
|
+
detector_backend="fastmtcnn",
|
|
805
809
|
enforce_detection=False,
|
|
806
|
-
align=True
|
|
810
|
+
align=True,
|
|
807
811
|
)
|
|
808
812
|
|
|
809
|
-
if
|
|
810
|
-
|
|
813
|
+
if not face_objs:
|
|
814
|
+
return None, None, 0.0
|
|
811
815
|
|
|
812
|
-
|
|
813
|
-
|
|
814
|
-
|
|
815
|
-
|
|
816
|
-
|
|
817
|
-
print(f"Rejecting face at {angle} degrees due to small size of Sudanese Document: {facial_area['w']}x{facial_area['h']} (minimum 40x50)")
|
|
818
|
-
return None, None, 0
|
|
819
|
-
elif country != 'SDN' and (facial_area['w'] < 80 or facial_area['h'] < 90):
|
|
820
|
-
print(f"Rejecting face at {angle} degrees due to small size: {facial_area['w']}x{facial_area['h']} (minimum 100x100)")
|
|
821
|
-
return None, None, 0
|
|
822
|
-
|
|
823
|
-
# Immediately reject if confidence is below threshold
|
|
824
|
-
if confidence < 0.95 and country != 'SDN':
|
|
825
|
-
print(f"Rejecting face at {angle} degrees due to low confidence: {confidence:.3f}")
|
|
826
|
-
return None, None, 0
|
|
827
|
-
elif confidence >= 0.90 and country == 'SDN':
|
|
828
|
-
return face_objs, img_to_process, confidence
|
|
816
|
+
#get largest face
|
|
817
|
+
biggest_face = max(
|
|
818
|
+
face_objs,
|
|
819
|
+
key=lambda f: f["facial_area"]["w"] * f["facial_area"]["h"],
|
|
820
|
+
)
|
|
829
821
|
|
|
830
|
-
|
|
822
|
+
facial_area = biggest_face["facial_area"]
|
|
823
|
+
confidence = biggest_face.get("confidence", 0.0)
|
|
824
|
+
|
|
825
|
+
logging.info(f"Angle {angle}: Detected face with confidence {confidence}")
|
|
826
|
+
|
|
827
|
+
if country == "SDN":
|
|
828
|
+
if confidence < CONFIDENCE_THRESHOLD:
|
|
829
|
+
logging.info(f"Low confidence for SDN at angle: {confidence} at angle {angle}")
|
|
830
|
+
return None, None, 0.0
|
|
831
|
+
else:
|
|
832
|
+
if confidence < 0.95:
|
|
833
|
+
logging.info(f"Low confidence: for country : {country} -> {confidence} at angle {angle}")
|
|
834
|
+
return None, None, 0.0
|
|
835
|
+
|
|
836
|
+
# Size validation (only when confidence < 1)
|
|
837
|
+
w, h = facial_area["w"], facial_area["h"]
|
|
838
|
+
if country == "SDN":
|
|
839
|
+
if w < 40 or h < 50:
|
|
840
|
+
logging.info(f"Face too small for SDN: w={w}, h={h}")
|
|
841
|
+
return None, None, 0.0
|
|
842
|
+
else:
|
|
843
|
+
if w < 80 or h < 90:
|
|
844
|
+
logging.info(f"Face too small: w={w}, h={h}")
|
|
845
|
+
return None, None, 0.0
|
|
846
|
+
|
|
847
|
+
# All checks passed
|
|
848
|
+
return biggest_face, img_to_process, confidence
|
|
831
849
|
|
|
832
|
-
# Clear memory if no face found
|
|
833
|
-
del img_to_process
|
|
834
|
-
return None, None, 0
|
|
835
850
|
except Exception as e:
|
|
836
|
-
print(f"Error
|
|
837
|
-
return None, None, 0
|
|
851
|
+
print(f"[DeepFace] Error at angle {angle}: {e}")
|
|
852
|
+
return None, None, 0.0
|
|
853
|
+
|
|
838
854
|
finally:
|
|
839
|
-
#
|
|
840
|
-
if
|
|
841
|
-
del
|
|
855
|
+
# Aggressive memory cleanup
|
|
856
|
+
if img_rgb is not None:
|
|
857
|
+
del img_rgb
|
|
858
|
+
if img_pil is not None:
|
|
859
|
+
del img_pil
|
|
860
|
+
if rotated is not None:
|
|
861
|
+
del rotated
|
|
862
|
+
|
|
863
|
+
# -------------------- INPUT HANDLING --------------------
|
|
842
864
|
|
|
843
865
|
try:
|
|
844
|
-
# Process input image efficiently
|
|
845
866
|
if isinstance(image_input, np.ndarray):
|
|
846
|
-
# Use view when possible
|
|
847
867
|
image = np.ascontiguousarray(image_input)
|
|
848
868
|
if image.dtype != np.uint8:
|
|
849
869
|
image = image.astype(np.uint8, copy=False)
|
|
870
|
+
|
|
850
871
|
elif isinstance(image_input, str):
|
|
851
|
-
# Decode base64 directly to numpy array
|
|
852
872
|
image_data = base64.b64decode(image_input)
|
|
853
|
-
image = cv2.imdecode(
|
|
854
|
-
|
|
873
|
+
image = cv2.imdecode(
|
|
874
|
+
np.frombuffer(image_data, np.uint8),
|
|
875
|
+
cv2.IMREAD_COLOR,
|
|
876
|
+
)
|
|
877
|
+
del image_data
|
|
878
|
+
|
|
855
879
|
else:
|
|
856
|
-
print(
|
|
880
|
+
print("Unsupported image input type")
|
|
857
881
|
return [], []
|
|
858
882
|
|
|
859
883
|
if image is None or image.size == 0:
|
|
860
|
-
print("Empty image")
|
|
884
|
+
print("Empty image input")
|
|
861
885
|
return [], []
|
|
862
886
|
|
|
863
|
-
if country == 'SDN':
|
|
864
|
-
CONFIDENCE_THRESHOLD = 0.90
|
|
865
|
-
else:
|
|
866
|
-
CONFIDENCE_THRESHOLD = 0.97
|
|
867
887
|
|
|
868
|
-
#
|
|
869
|
-
face_objs, processed_image, confidence = process_angle(image, 0)
|
|
870
|
-
if face_objs is not None and confidence >= CONFIDENCE_THRESHOLD:
|
|
871
|
-
try:
|
|
872
|
-
biggest_face = max(face_objs, key=lambda face: face['facial_area']['w'] * face['facial_area']['h'])
|
|
873
|
-
facial_area = biggest_face['facial_area']
|
|
888
|
+
# -------------------- ANGLE LOOP (NO THREADS) --------------------
|
|
874
889
|
|
|
875
|
-
|
|
876
|
-
|
|
877
|
-
|
|
878
|
-
if country == 'SDN' and (facial_area['w'] < 40 or facial_area['h'] < 50):
|
|
879
|
-
print(f"Face validation failed: Face frame too small {facial_area['w']}x{facial_area['h']} (minimum 40x50)")
|
|
880
|
-
return [], []
|
|
881
|
-
elif country != 'SDN' and (facial_area['w'] < 80 or facial_area['h'] < 90):
|
|
882
|
-
print(f"Face validation failed: Face frame too small {facial_area['w']}x{facial_area['h']} (minimum 100x100)")
|
|
883
|
-
return [], []
|
|
890
|
+
best_face_objs = None
|
|
891
|
+
best_image = None
|
|
892
|
+
best_confidence = 0.0
|
|
884
893
|
|
|
885
|
-
|
|
894
|
+
for angle in (0, 90, 180, 270):
|
|
895
|
+
face_objs, processed_image, confidence = process_angle(image, angle)
|
|
886
896
|
|
|
887
|
-
|
|
888
|
-
|
|
889
|
-
|
|
890
|
-
|
|
897
|
+
if confidence > best_confidence:
|
|
898
|
+
best_face_objs = face_objs
|
|
899
|
+
best_image = processed_image
|
|
900
|
+
best_confidence = confidence
|
|
901
|
+
best_angle = angle
|
|
891
902
|
|
|
892
|
-
|
|
893
|
-
|
|
894
|
-
|
|
895
|
-
|
|
896
|
-
#
|
|
897
|
-
del processed_image, image_rgb
|
|
898
|
-
if 'face_objs' in locals():
|
|
899
|
-
del face_objs
|
|
900
|
-
if country=='QAT':
|
|
901
|
-
return 0,0
|
|
902
|
-
|
|
903
|
-
# Try other angles in parallel
|
|
904
|
-
angles = [90, 180, 270]
|
|
905
|
-
best_confidence = confidence if face_objs is not None else 0
|
|
906
|
-
best_face_objs = face_objs
|
|
907
|
-
best_image = processed_image
|
|
903
|
+
if face_objs is None:
|
|
904
|
+
continue
|
|
905
|
+
|
|
906
|
+
else:
|
|
907
|
+
break # Exit loop on first valid detection
|
|
908
908
|
|
|
909
|
-
|
|
910
|
-
|
|
911
|
-
executor.submit(process_angle, image, angle): angle
|
|
912
|
-
for angle in angles
|
|
913
|
-
}
|
|
909
|
+
# Keep best fallback (just in case)
|
|
910
|
+
|
|
914
911
|
|
|
915
|
-
try:
|
|
916
|
-
for future in as_completed(futures):
|
|
917
|
-
face_objs, processed_image, confidence = future.result()
|
|
918
|
-
if face_objs is not None:
|
|
919
|
-
if confidence >= CONFIDENCE_THRESHOLD:
|
|
920
|
-
# Cancel remaining tasks
|
|
921
|
-
for f in futures:
|
|
922
|
-
if not f.done():
|
|
923
|
-
f.cancel()
|
|
924
|
-
best_face_objs = face_objs
|
|
925
|
-
best_image = processed_image
|
|
926
|
-
best_confidence = confidence
|
|
927
|
-
break
|
|
928
|
-
finally:
|
|
929
|
-
for future in futures:
|
|
930
|
-
future.cancel()
|
|
931
912
|
|
|
932
913
|
if best_face_objs is None or best_confidence < CONFIDENCE_THRESHOLD:
|
|
933
|
-
print(f"No
|
|
914
|
+
print(f"No valid face found (threshold={CONFIDENCE_THRESHOLD})")
|
|
934
915
|
return [], []
|
|
935
916
|
|
|
936
|
-
|
|
937
|
-
biggest_face = max(best_face_objs, key=lambda face: face['facial_area']['w'] * face['facial_area']['h'])
|
|
938
|
-
facial_area = biggest_face['facial_area']
|
|
939
|
-
|
|
940
|
-
# Final size check for rotated face
|
|
941
|
-
if country != 'SDN' and confidence < 1:
|
|
942
|
-
if facial_area['w'] < 80 or facial_area['h'] < 90:
|
|
943
|
-
print(f"Face validation failed: Face frame too small {facial_area['w']}x{facial_area['h']} (minimum 100x100)")
|
|
944
|
-
return [], []
|
|
945
|
-
elif country == 'SDN' and confidence < CONFIDENCE_THRESHOLD:
|
|
946
|
-
print(f"Face validation failed: Face frame too small {facial_area['w']}x{facial_area['h']} (minimum 40x50)")
|
|
947
|
-
return [], []
|
|
948
|
-
|
|
949
|
-
x, y, w, h = facial_area['x'], facial_area['y'], facial_area['w'], facial_area['h']
|
|
950
|
-
|
|
951
|
-
# Minimize memory during final processing
|
|
952
|
-
image_rgb = cv2.cvtColor(best_image, cv2.COLOR_BGR2RGB)
|
|
953
|
-
face_locations = [(y, x + w, y + h, x)]
|
|
954
|
-
face_encodings = face_recognition.face_encodings(image_rgb, face_locations)
|
|
917
|
+
# -------------------- FINAL ENCODING --------------------
|
|
955
918
|
|
|
956
|
-
|
|
957
|
-
|
|
919
|
+
|
|
920
|
+
logging.info(f"Using best angle: {best_angle} detected with confidence {best_confidence} for encodings")
|
|
921
|
+
fa = best_face_objs["facial_area"]
|
|
922
|
+
x, y, w, h = fa["x"], fa["y"], fa["w"], fa["h"]
|
|
923
|
+
|
|
924
|
+
image_rgb = cv2.cvtColor(best_image, cv2.COLOR_BGR2RGB)
|
|
925
|
+
face_locations = [(y, x + w, y + h, x)]
|
|
926
|
+
face_encodings = face_recognition.face_encodings(
|
|
927
|
+
image_rgb, face_locations
|
|
928
|
+
)
|
|
958
929
|
|
|
959
|
-
|
|
930
|
+
if not face_encodings:
|
|
960
931
|
return [], []
|
|
961
|
-
|
|
962
|
-
|
|
963
|
-
del image_rgb, best_image, best_face_objs
|
|
932
|
+
|
|
933
|
+
return face_locations, face_encodings
|
|
964
934
|
|
|
965
935
|
except Exception as e:
|
|
966
|
-
print(f"
|
|
936
|
+
print(f"[FacePipeline] Fatal error: {e}")
|
|
967
937
|
return [], []
|
|
938
|
+
|
|
968
939
|
finally:
|
|
969
|
-
#
|
|
970
|
-
if
|
|
940
|
+
# Final memory cleanup
|
|
941
|
+
if "image_rgb" in locals():
|
|
942
|
+
del image_rgb
|
|
943
|
+
if "best_image" in locals():
|
|
944
|
+
del best_image
|
|
945
|
+
if "best_face_objs" in locals():
|
|
946
|
+
del best_face_objs
|
|
947
|
+
if "image" in locals():
|
|
971
948
|
del image
|
|
972
949
|
|
|
950
|
+
|
|
973
951
|
def calculate_similarity(face_encoding1, face_encoding2):
|
|
974
952
|
face_recognition = get_face_recognition()
|
|
975
953
|
similarity_score = 1 - face_recognition.face_distance([face_encoding1], face_encoding2)[0]
|
|
@@ -1,260 +1,3 @@
|
|
|
1
|
-
# import google.generativeai as genai
|
|
2
|
-
# import re
|
|
3
|
-
# from datetime import datetime
|
|
4
|
-
# from googletrans import Translator
|
|
5
|
-
# import json
|
|
6
|
-
# import openai
|
|
7
|
-
# import time
|
|
8
|
-
|
|
9
|
-
# def configure_genai(api_key):
|
|
10
|
-
# genai.configure(api_key=api_key)
|
|
11
|
-
# model = genai.GenerativeModel(model_name="gemini-1.5-flash")
|
|
12
|
-
# return model
|
|
13
|
-
|
|
14
|
-
# def genai_vision_jor(detected_text, model):
|
|
15
|
-
# result = model.generate_content(
|
|
16
|
-
# [detected_text,"\n\n", "From provided {detected_text} give me all required information in english. full_name, first_name, last_name, mother_name, passport_number, dob(Date of Birth dd/mm/yy format), Place of Birth, gender(M/F), issuing_date(dd/mm/yy format), expiry_date (dd/mm/yy format), Place of Issue, nationality, and both lines of the MRZ, please give me just dictionary dont write anything else - full_name, first_name, last_name, mother_name, passport_number, dob, place_of_birth, gender, issuing_date, expiry_date, issuing_place, nationality, mrz1, mrz2. Note that mrz1 is the line that starts with P<JOR and mrz2 is the line that starts with passport number, Also note if you are unable to find the passport number directly then use mrz2 inital words that comes before the symbol '<' as the passport number"]
|
|
17
|
-
# )
|
|
18
|
-
# return result.text
|
|
19
|
-
|
|
20
|
-
# def reformat_date(date_str):
|
|
21
|
-
# try:
|
|
22
|
-
# date_obj = datetime.strptime(date_str, '%d-%m-%Y')
|
|
23
|
-
|
|
24
|
-
# return date_obj.strftime('%d/%m/%Y')
|
|
25
|
-
# except ValueError:
|
|
26
|
-
# return date_str
|
|
27
|
-
|
|
28
|
-
# def swap_dates_if_needed(data_dict):
|
|
29
|
-
# try:
|
|
30
|
-
# # Parse the dates
|
|
31
|
-
# issuing_date = datetime.strptime(data_dict['issuing_date'], '%d/%m/%Y')
|
|
32
|
-
# expiry_date = datetime.strptime(data_dict['expiry_date'], '%d/%m/%Y')
|
|
33
|
-
|
|
34
|
-
# if issuing_date > expiry_date:
|
|
35
|
-
# data_dict['issuing_date'], data_dict['expiry_date'] = data_dict['expiry_date'], data_dict['issuing_date']
|
|
36
|
-
# print("Dates swapped: Issuing date and expiry date were in the wrong order.")
|
|
37
|
-
|
|
38
|
-
# except ValueError as e:
|
|
39
|
-
# print(f"Error parsing dates: {e}")
|
|
40
|
-
|
|
41
|
-
# return data_dict
|
|
42
|
-
|
|
43
|
-
# def mrz_add(dictionary_image_half):
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
# mrz_2 = dictionary_image_half['mrz2']
|
|
47
|
-
# mrz_1 = dictionary_image_half['mrz1']
|
|
48
|
-
|
|
49
|
-
# mrz_data_dict = {}
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
# pattern_surname = r'P<JOR([^<]+)'
|
|
53
|
-
# match_surname = re.search(pattern_surname, mrz_1)
|
|
54
|
-
# if match_surname:
|
|
55
|
-
# mrz_data_dict['last_name_mrz'] = match_surname.group(1)
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
# pattern_given_names = r'<([^<]+)<([^<]+)<([^<]+)<<'
|
|
59
|
-
# match_given_names = re.search(pattern_given_names, mrz_1)
|
|
60
|
-
# if match_given_names:
|
|
61
|
-
# mrz_data_dict['first_name_mrz'] = match_given_names.group(1)
|
|
62
|
-
# mrz_data_dict['middle_name_1'] = match_given_names.group(2)
|
|
63
|
-
# mrz_data_dict['middle_name_2'] = match_given_names.group(3)
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
# pattern_passport = r'^([A-Z0-9]+)<'
|
|
67
|
-
# match_passport = re.search(pattern_passport, mrz_2)
|
|
68
|
-
# if match_passport:
|
|
69
|
-
# passport_number = match_passport.group(1)
|
|
70
|
-
# mrz_data_dict['passport_number'] = passport_number
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
# pattern_nationality = r'<.[A-Z]{3}'
|
|
74
|
-
|
|
75
|
-
# match_nationality = re.search(pattern_nationality, mrz_2)
|
|
76
|
-
# if match_nationality:
|
|
77
|
-
# nationality = match_nationality.group(0)[2:]
|
|
78
|
-
# mrz_data_dict['nationality'] = nationality
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
# pattern_birth_date = r'\d{7}<([0-9]{6})'
|
|
82
|
-
# match_birth_date = re.search(pattern_birth_date, mrz_2)
|
|
83
|
-
# if match_birth_date:
|
|
84
|
-
# birth_date_raw = match_birth_date.group(1)
|
|
85
|
-
# year_prefix = '19' if int(birth_date_raw[:2]) > 23 else '20'
|
|
86
|
-
# birth_date = f"{birth_date_raw[4:]}/{birth_date_raw[2:4]}/{year_prefix}{birth_date_raw[:2]}"
|
|
87
|
-
# mrz_data_dict['dob'] = birth_date
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
# pattern_gender = r'([MF])'
|
|
91
|
-
# match_gender = re.search(pattern_gender, mrz_2)
|
|
92
|
-
# if match_gender:
|
|
93
|
-
# gender = match_gender.group(1)
|
|
94
|
-
# mrz_data_dict['gender'] = gender
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
# pattern_expiry_date = r'[MF](\d{6})'
|
|
98
|
-
# match_expiry_date = re.search(pattern_expiry_date, mrz_2)
|
|
99
|
-
# if match_expiry_date:
|
|
100
|
-
# expiry_date_raw = match_expiry_date.group(1)
|
|
101
|
-
# year_prefix = '19' if int(expiry_date_raw[:2]) > 50 else '20'
|
|
102
|
-
# expiry_date = f"{expiry_date_raw[4:]}/{expiry_date_raw[2:4]}/{year_prefix}{expiry_date_raw[:2]}"
|
|
103
|
-
# mrz_data_dict['expiry_date'] = expiry_date
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
# for key, value in mrz_data_dict.items():
|
|
107
|
-
# if key in dictionary_image_half and dictionary_image_half[key] in ['None', None, 'N/A', '', ' ', 'NaN', 'nan', 'null']:
|
|
108
|
-
# dictionary_image_half[key] = value
|
|
109
|
-
# elif key not in dictionary_image_half:
|
|
110
|
-
# dictionary_image_half[key] = value
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
# if len(dictionary_image_half['last_name']) > 1:
|
|
114
|
-
# # Substitute last_name with last_name_mrz
|
|
115
|
-
# dictionary_image_half['last_name'] = dictionary_image_half['last_name_mrz']
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
# return dictionary_image_half
|
|
119
|
-
|
|
120
|
-
# def translate_arabic_words(dictionary):
|
|
121
|
-
# translator = Translator()
|
|
122
|
-
# translated_dict = {}
|
|
123
|
-
# for key, value in dictionary.items():
|
|
124
|
-
# if key not in ['mrz1', 'mrz2']:
|
|
125
|
-
# if isinstance(value, str):
|
|
126
|
-
|
|
127
|
-
# detected_lang = translator.detect(value).lang
|
|
128
|
-
# if detected_lang == 'ar':
|
|
129
|
-
# translated_text = translator.translate(value, src='ar', dest='en').text
|
|
130
|
-
# translated_dict[key] = translated_text
|
|
131
|
-
# else:
|
|
132
|
-
# translated_dict[key] = value
|
|
133
|
-
# else:
|
|
134
|
-
# translated_dict[key] = value
|
|
135
|
-
# else:
|
|
136
|
-
|
|
137
|
-
# translated_dict[key] = value
|
|
138
|
-
# return translated_dict
|
|
139
|
-
|
|
140
|
-
# def make_api_request_with_retries(prompt: str, max_retries: int = 3, delay_seconds: float = 2):
|
|
141
|
-
# """
|
|
142
|
-
# Helper function to make API requests with retry logic using OpenAI
|
|
143
|
-
# """
|
|
144
|
-
# start_time = time.time()
|
|
145
|
-
# for attempt in range(max_retries):
|
|
146
|
-
# try:
|
|
147
|
-
# response = openai.ChatCompletion.create(
|
|
148
|
-
# model="gpt-4o",
|
|
149
|
-
# temperature=0.4,
|
|
150
|
-
# max_tokens=2000,
|
|
151
|
-
# messages=[
|
|
152
|
-
# {
|
|
153
|
-
# "role": "user",
|
|
154
|
-
# "content": prompt
|
|
155
|
-
# }
|
|
156
|
-
# ]
|
|
157
|
-
# )
|
|
158
|
-
# result = response.choices[0].message.content
|
|
159
|
-
|
|
160
|
-
# try:
|
|
161
|
-
# api_response = json.loads(result)
|
|
162
|
-
# except json.JSONDecodeError:
|
|
163
|
-
# try:
|
|
164
|
-
# json_match = re.search(r'```(json|python|plaintext)?\s*(.*?)\s*```|\s*({.*?})', result, re.DOTALL)
|
|
165
|
-
# if json_match:
|
|
166
|
-
# json_str = json_match.group(2) or json_match.group(3)
|
|
167
|
-
# try:
|
|
168
|
-
# api_response = json.loads(json_str)
|
|
169
|
-
# except:
|
|
170
|
-
# api_response = eval(json_str.replace("'", '"'))
|
|
171
|
-
# else:
|
|
172
|
-
# raise json.JSONDecodeError("No JSON found in response", result, 0)
|
|
173
|
-
# except Exception as e:
|
|
174
|
-
# print(f"Error parsing response: {str(e)}")
|
|
175
|
-
# raise
|
|
176
|
-
|
|
177
|
-
# # print(f"GenAI request took {time.time() - start_time:.2f} seconds")
|
|
178
|
-
# return api_response
|
|
179
|
-
|
|
180
|
-
# except Exception as e:
|
|
181
|
-
# print(f"Error during API request (attempt {attempt + 1} of {max_retries}): {str(e)}")
|
|
182
|
-
# if attempt < max_retries - 1:
|
|
183
|
-
# time.sleep(delay_seconds)
|
|
184
|
-
# else:
|
|
185
|
-
# raise Exception(f"Max retries exceeded. Last error: {str(e)}")
|
|
186
|
-
|
|
187
|
-
# def jordan_passport_extraction(passport_text, api_key):
|
|
188
|
-
# start_time = time.time()
|
|
189
|
-
# try:
|
|
190
|
-
# prompt = f"From provided text, give me all required information in english only. full_name, first_name, last_name, mother_name, passport_number, national_number, dob(Date of Birth dd/mm/yyyy format), Place of Birth, gender(M/F), issuing_date(dd/mm/yyyy format), expiry_date (dd/mm/yyyy format), Place of Issue, nationality, and both lines of the MRZ(mrz1, mrz2). Please give me just dictionary dont write anything else - full_name, first_name, last_name, mother_name, passport_number, national_number, dob, place_of_birth, gender, issuing_date, expiry_date, issuing_place, nationality, mrz1, mrz2. Note that mrz1 is the line that starts with P<JOR and mrz2 is the line that starts with passport number. Also note if you are unable to find the passport number directly then use mrz2 initial words that comes before the symbol '<' as the passport number. If there are any arabic words in mother_name, or place_of_birth, or authority, just keep the english words, do not ever include arabic words in the output. Leave National No. empty if not found. Here's the text: {passport_text}"
|
|
191
|
-
|
|
192
|
-
# passport_final_result = make_api_request_with_retries(prompt)
|
|
193
|
-
|
|
194
|
-
# if 'national_number' in passport_final_result:
|
|
195
|
-
# passport_final_result['passport_national_number'] = passport_final_result.get('national_number', '')
|
|
196
|
-
|
|
197
|
-
# # print(f"\nPassport GenAI result: {passport_final_result}\n")
|
|
198
|
-
|
|
199
|
-
# # try:
|
|
200
|
-
# # passport_final_result = swap_dates_if_needed(passport_final_result)
|
|
201
|
-
# # except Exception as e:
|
|
202
|
-
# # print(f"Error swapping dates: {e}")
|
|
203
|
-
|
|
204
|
-
# # try:
|
|
205
|
-
# # passport_final_result = translate_arabic_words(passport_final_result)
|
|
206
|
-
# # except Exception as e:
|
|
207
|
-
# # print(f"Error translating: {e}")
|
|
208
|
-
|
|
209
|
-
# if passport_final_result and not passport_final_result.get('passport_number', ''):
|
|
210
|
-
# passport_number_pattern = r"([A-Za-z]\d{8}|[A-Za-z]\d{7}|[A-Za-z]\d{6})"
|
|
211
|
-
# passport_number_match = re.search(passport_number_pattern, passport_text)
|
|
212
|
-
# if passport_number_match:
|
|
213
|
-
# passport_number = passport_number_match.group(0)
|
|
214
|
-
|
|
215
|
-
# if passport_number:
|
|
216
|
-
# passport_final_result['passport_number'] = passport_number
|
|
217
|
-
# else:
|
|
218
|
-
# passport_number_match = re.search(passport_number_pattern, passport_final_result.get('mrz2', ''))
|
|
219
|
-
# if passport_number_match:
|
|
220
|
-
# passport_number = passport_number_match.group(0)
|
|
221
|
-
# passport_final_result['passport_number'] = passport_number
|
|
222
|
-
|
|
223
|
-
# mrz1 = passport_final_result.get('mrz1', '')
|
|
224
|
-
# mrz2 = passport_final_result.get('mrz2', '')
|
|
225
|
-
# if mrz1 and mrz2:
|
|
226
|
-
# passport_final_result['mrz'] = f"{mrz1} {mrz2}"
|
|
227
|
-
|
|
228
|
-
# if "gender" in passport_final_result:
|
|
229
|
-
# gender = passport_final_result["gender"].strip().upper()
|
|
230
|
-
# if gender == "F":
|
|
231
|
-
# passport_final_result["gender"] = "FEMALE"
|
|
232
|
-
# elif gender == "M":
|
|
233
|
-
# passport_final_result["gender"] = "MALE"
|
|
234
|
-
|
|
235
|
-
# if 'gender' in passport_final_result:
|
|
236
|
-
# passport_final_result["gender"] = passport_final_result["gender"].strip().upper()
|
|
237
|
-
|
|
238
|
-
# if 'issuing_place' in passport_final_result:
|
|
239
|
-
# passport_final_result['place_of_issue'] = passport_final_result['issuing_place'].strip().upper()
|
|
240
|
-
|
|
241
|
-
# if passport_final_result.get('nationality', '') and len(passport_final_result['nationality']) > 3:
|
|
242
|
-
# passport_final_result['nationality'] = 'JOR'
|
|
243
|
-
|
|
244
|
-
# if not passport_final_result.get('nationality', ''):
|
|
245
|
-
# passport_final_result['nationality'] = 'JOR'
|
|
246
|
-
|
|
247
|
-
# passport_final_result['issuing_country'] = 'JOR'
|
|
248
|
-
|
|
249
|
-
# processing_time = time.time() - start_time
|
|
250
|
-
|
|
251
|
-
# return passport_final_result
|
|
252
|
-
|
|
253
|
-
# except Exception as e:
|
|
254
|
-
# processing_time = time.time() - start_time
|
|
255
|
-
# print(f"Error occurred in passport extraction: {e}")
|
|
256
|
-
# print(f"Failed processing took {processing_time:.2f} seconds")
|
|
257
|
-
# return {}
|
|
258
1
|
|
|
259
2
|
import base64
|
|
260
3
|
import time
|