PyPI - idvpackage - Versions diffs - 3.0.9__py3-none-any.whl → 3.0.11__py3-none-any.whl - Mend

idvpackage 3.0.9py3-none-any.whl → 3.0.11py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

idvpackage/common.py +118 -140
idvpackage/jor_passport_extraction.py +0 -257
idvpackage/ocr.py +140 -493
idvpackage/ocr_utils.py +2 -1
{idvpackage-3.0.9.dist-info → idvpackage-3.0.11.dist-info}/METADATA +1 -1
{idvpackage-3.0.9.dist-info → idvpackage-3.0.11.dist-info}/RECORD +9 -9
{idvpackage-3.0.9.dist-info → idvpackage-3.0.11.dist-info}/WHEEL +0 -0
{idvpackage-3.0.9.dist-info → idvpackage-3.0.11.dist-info}/licenses/LICENSE +0 -0
{idvpackage-3.0.9.dist-info → idvpackage-3.0.11.dist-info}/top_level.txt +0 -0

idvpackage/common.py CHANGED Viewed

@@ -780,196 +780,174 @@ def load_and_process_image_deepface_topup(image_input):
 def load_and_process_image_deepface(image_input, country=None):
-    DeepFace = get_deepface()  # Only load when needed
-    face_recognition = get_face_recognition()  # Only load when needed
+    DeepFace = get_deepface()
+    face_recognition = get_face_recognition()
+    CONFIDENCE_THRESHOLD = 0.90 if country == "SDN" else 0.97
     def process_angle(img, angle):
+        img_to_process = None
+        img_rgb = None
+        img_pil = None
+        rotated = None
         try:
-            # Create a view instead of copy when possible
+            # Rotate only if needed
             if angle != 0:
-                # Minimize memory usage during rotation
-                with np.errstate(all='ignore'):
-                    img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
-                    img_pil = Image.fromarray(img_rgb)
-                    # Use existing buffer when possible
-                    rotated = np.ascontiguousarray(img_pil.rotate(angle, expand=True))
-                    img_to_process = cv2.cvtColor(rotated, cv2.COLOR_RGB2BGR)
-                    # Clear references to intermediate arrays
-                    del img_rgb, img_pil, rotated
+                img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
+                img_pil = Image.fromarray(img_rgb)
+                rotated = np.ascontiguousarray(
+                    img_pil.rotate(angle, expand=True)
+                )
+                img_to_process = cv2.cvtColor(rotated, cv2.COLOR_RGB2BGR)
             else:
                 img_to_process = img
-            # Extract faces with memory optimization
             face_objs = DeepFace.extract_faces(
                 img_to_process,
-                detector_backend='fastmtcnn',
+                detector_backend="fastmtcnn",
                 enforce_detection=False,
-                align=True
+                align=True,
             )
-            if face_objs and len(face_objs) > 0:
-                confidence = face_objs[0].get('confidence', 0)
+            if not face_objs:
+                return None, None, 0.0
-                # Check face frame size only if confidence is less than 1
-                if confidence < 1:
-                    facial_area = face_objs[0]['facial_area']
-                    # Sudanese Edge Case. They have smaller pictures.
-                    if country == 'SDN' and (facial_area['w'] < 40 or facial_area['h'] < 50):
-                        print(f"Rejecting face at {angle} degrees due to small size of Sudanese Document: {facial_area['w']}x{facial_area['h']} (minimum 40x50)")
-                        return None, None, 0
-                    elif country != 'SDN' and (facial_area['w'] < 80 or facial_area['h'] < 90):
-                        print(f"Rejecting face at {angle} degrees due to small size: {facial_area['w']}x{facial_area['h']} (minimum 100x100)")
-                        return None, None, 0
-                # Immediately reject if confidence is below threshold
-                if confidence < 0.95 and country != 'SDN':
-                    print(f"Rejecting face at {angle} degrees due to low confidence: {confidence:.3f}")
-                    return None, None, 0
-                elif confidence >= 0.90 and country == 'SDN':
-                    return face_objs, img_to_process, confidence
+            #get largest face
+            biggest_face = max(
+                face_objs,
+                key=lambda f: f["facial_area"]["w"] * f["facial_area"]["h"],
+            )
-                return face_objs, img_to_process, confidence
+            facial_area = biggest_face["facial_area"]
+            confidence = biggest_face.get("confidence", 0.0)
+            logging.info(f"Angle {angle}: Detected face with confidence {confidence}")
+            if country == "SDN":
+                if confidence < CONFIDENCE_THRESHOLD:
+                    logging.info(f"Low confidence for SDN at angle: {confidence} at angle {angle}")
+                    return None, None, 0.0
+            else:
+                if confidence < 0.95:
+                    logging.info(f"Low confidence: for country : {country} -> {confidence} at angle {angle}")
+                    return None, None, 0.0
+            # Size validation (only when confidence < 1)
+            w, h = facial_area["w"], facial_area["h"]
+            if country == "SDN":
+                if w < 40 or h < 50:
+                    logging.info(f"Face too small for SDN: w={w}, h={h}")
+                    return None, None, 0.0
+            else:
+                if w < 80 or h < 90:
+                    logging.info(f"Face too small: w={w}, h={h}")
+                    return None, None, 0.0
+            # All checks passed
+            return biggest_face, img_to_process, confidence
-            # Clear memory if no face found
-            del img_to_process
-            return None, None, 0
         except Exception as e:
-            print(f"Error processing angle {angle}: {e}")
-            return None, None, 0
+            print(f"[DeepFace] Error at angle {angle}: {e}")
+            return None, None, 0.0
         finally:
-            # Ensure memory is cleared
-            if 'img_to_process' in locals():
-                del img_to_process
+            # Aggressive memory cleanup
+            if img_rgb is not None:
+                del img_rgb
+            if img_pil is not None:
+                del img_pil
+            if rotated is not None:
+                del rotated
+    # -------------------- INPUT HANDLING --------------------
     try:
-        # Process input image efficiently
         if isinstance(image_input, np.ndarray):
-            # Use view when possible
             image = np.ascontiguousarray(image_input)
             if image.dtype != np.uint8:
                 image = image.astype(np.uint8, copy=False)
         elif isinstance(image_input, str):
-            # Decode base64 directly to numpy array
             image_data = base64.b64decode(image_input)
-            image = cv2.imdecode(np.frombuffer(image_data, np.uint8), cv2.IMREAD_COLOR)
-            del image_data  # Clear decoded data
+            image = cv2.imdecode(
+                np.frombuffer(image_data, np.uint8),
+                cv2.IMREAD_COLOR,
+            )
+            del image_data
         else:
-            print(f"Unexpected input type: {type(image_input)}")
+            print("Unsupported image input type")
             return [], []
         if image is None or image.size == 0:
-            print("Empty image")
+            print("Empty image input")
             return [], []
-        if country == 'SDN':
-            CONFIDENCE_THRESHOLD = 0.90
-        else:
-            CONFIDENCE_THRESHOLD = 0.97
-        # Try original orientation first to avoid unnecessary processing
-        face_objs, processed_image, confidence = process_angle(image, 0)
-        if face_objs is not None and confidence >= CONFIDENCE_THRESHOLD:
-            try:
-                biggest_face = max(face_objs, key=lambda face: face['facial_area']['w'] * face['facial_area']['h'])
-                facial_area = biggest_face['facial_area']
+        # -------------------- ANGLE LOOP (NO THREADS) --------------------
-                # Double check size requirements for biggest face
-                if confidence < 1:
-                    # print(f"Confidence less than 1: {confidence}")
-                    if country == 'SDN' and (facial_area['w'] < 40 or facial_area['h'] < 50):
-                        print(f"Face validation failed: Face frame too small {facial_area['w']}x{facial_area['h']} (minimum 40x50)")
-                        return [], []
-                    elif country != 'SDN' and (facial_area['w'] < 80 or facial_area['h'] < 90):
-                        print(f"Face validation failed: Face frame too small {facial_area['w']}x{facial_area['h']} (minimum 100x100)")
-                        return [], []
+        best_face_objs = None
+        best_image = None
+        best_confidence = 0.0
-                x, y, w, h = facial_area['x'], facial_area['y'], facial_area['w'], facial_area['h']
+        for angle in (0, 90, 180, 270):
+            face_objs, processed_image, confidence = process_angle(image, angle)
-                # Minimize memory usage during color conversion
-                image_rgb = cv2.cvtColor(processed_image, cv2.COLOR_BGR2RGB)
-                face_locations = [(y, x + w, y + h, x)]
-                face_encodings = face_recognition.face_encodings(image_rgb, face_locations)
+            if confidence > best_confidence:
+                best_face_objs = face_objs
+                best_image = processed_image
+                best_confidence = confidence
+                best_angle = angle
-                if face_encodings:
-                    # print(f"Found face in original orientation with confidence {confidence}")
-                    return face_locations, face_encodings
-            finally:
-                # Clear memory
-                del processed_image, image_rgb
-                if 'face_objs' in locals():
-                    del face_objs
-        if country=='QAT':
-            return 0,0
-        # Try other angles in parallel
-        angles = [90, 180, 270]
-        best_confidence = confidence if face_objs is not None else 0
-        best_face_objs = face_objs
-        best_image = processed_image
+            if face_objs is None:
+                continue
+            else:
+                break  # Exit loop on first valid detection
-        with ThreadPoolExecutor(max_workers=3) as executor:
-            futures = {
-                executor.submit(process_angle, image, angle): angle
-                for angle in angles
-            }
+            # Keep best fallback (just in case)
-            try:
-                for future in as_completed(futures):
-                    face_objs, processed_image, confidence = future.result()
-                    if face_objs is not None:
-                        if confidence >= CONFIDENCE_THRESHOLD:
-                            # Cancel remaining tasks
-                            for f in futures:
-                                if not f.done():
-                                    f.cancel()
-                            best_face_objs = face_objs
-                            best_image = processed_image
-                            best_confidence = confidence
-                            break
-            finally:
-                for future in futures:
-                    future.cancel()
         if best_face_objs is None or best_confidence < CONFIDENCE_THRESHOLD:
-            print(f"No faces detected with confidence >= {CONFIDENCE_THRESHOLD}")
+            print(f"No valid face found (threshold={CONFIDENCE_THRESHOLD})")
             return [], []
-        try:
-            biggest_face = max(best_face_objs, key=lambda face: face['facial_area']['w'] * face['facial_area']['h'])
-            facial_area = biggest_face['facial_area']
-            # Final size check for rotated face
-            if country != 'SDN' and confidence < 1:
-                if facial_area['w'] < 80 or facial_area['h'] < 90:
-                    print(f"Face validation failed: Face frame too small {facial_area['w']}x{facial_area['h']} (minimum 100x100)")
-                    return [], []
-            elif country == 'SDN' and confidence < CONFIDENCE_THRESHOLD:
-                print(f"Face validation failed: Face frame too small {facial_area['w']}x{facial_area['h']} (minimum 40x50)")
-                return [], []
-            x, y, w, h = facial_area['x'], facial_area['y'], facial_area['w'], facial_area['h']
-            # Minimize memory during final processing
-            image_rgb = cv2.cvtColor(best_image, cv2.COLOR_BGR2RGB)
-            face_locations = [(y, x + w, y + h, x)]
-            face_encodings = face_recognition.face_encodings(image_rgb, face_locations)
+        # -------------------- FINAL ENCODING --------------------
-            if face_encodings:
-                return face_locations, face_encodings
+        logging.info(f"Using best angle: {best_angle} detected with confidence {best_confidence} for encodings")
+        fa = best_face_objs["facial_area"]
+        x, y, w, h = fa["x"], fa["y"], fa["w"], fa["h"]
+        image_rgb = cv2.cvtColor(best_image, cv2.COLOR_BGR2RGB)
+        face_locations = [(y, x + w, y + h, x)]
+        face_encodings = face_recognition.face_encodings(
+            image_rgb, face_locations
+        )
-            print("Failed to extract face encodings")
+        if not face_encodings:
             return [], []
-        finally:
-            # Clear final processing memory
-            del image_rgb, best_image, best_face_objs
+        return face_locations, face_encodings
     except Exception as e:
-        print(f"Error in face detection: {e}")
+        print(f"[FacePipeline] Fatal error: {e}")
         return [], []
     finally:
-        # Ensure main image is cleared
-        if 'image' in locals():
+        # Final memory cleanup
+        if "image_rgb" in locals():
+            del image_rgb
+        if "best_image" in locals():
+            del best_image
+        if "best_face_objs" in locals():
+            del best_face_objs
+        if "image" in locals():
             del image
 def calculate_similarity(face_encoding1, face_encoding2):
     face_recognition = get_face_recognition()
     similarity_score = 1 - face_recognition.face_distance([face_encoding1], face_encoding2)[0]

idvpackage/jor_passport_extraction.py CHANGED Viewed

@@ -1,260 +1,3 @@
-# import google.generativeai as genai
-# import re
-# from datetime import datetime
-# from googletrans import Translator
-# import json
-# import openai
-# import time
-# def configure_genai(api_key):
-#     genai.configure(api_key=api_key)
-#     model = genai.GenerativeModel(model_name="gemini-1.5-flash")
-#     return model
-# def genai_vision_jor(detected_text, model):
-#         result = model.generate_content(
-#             [detected_text,"\n\n", "From provided {detected_text} give me all required information in english. full_name, first_name, last_name, mother_name, passport_number, dob(Date of Birth dd/mm/yy format), Place of Birth, gender(M/F), issuing_date(dd/mm/yy format), expiry_date (dd/mm/yy format), Place of Issue, nationality,  and both lines of the MRZ, please give me  just dictionary dont write anything else - full_name, first_name, last_name, mother_name, passport_number, dob, place_of_birth, gender, issuing_date, expiry_date, issuing_place, nationality, mrz1, mrz2. Note that mrz1 is the line that starts with P<JOR and mrz2 is the line that starts with passport number, Also note if you are unable to find the passport number directly then use mrz2 inital words that comes before the symbol '<' as the passport number"]
-#         )
-#         return  result.text
-# def reformat_date(date_str):
-#     try:
-#         date_obj = datetime.strptime(date_str, '%d-%m-%Y')
-#         return date_obj.strftime('%d/%m/%Y')
-#     except ValueError:
-#         return date_str
-# def swap_dates_if_needed(data_dict):
-#     try:
-#         # Parse the dates
-#         issuing_date = datetime.strptime(data_dict['issuing_date'], '%d/%m/%Y')
-#         expiry_date = datetime.strptime(data_dict['expiry_date'], '%d/%m/%Y')
-#         if issuing_date > expiry_date:
-#             data_dict['issuing_date'], data_dict['expiry_date'] = data_dict['expiry_date'], data_dict['issuing_date']
-#             print("Dates swapped: Issuing date and expiry date were in the wrong order.")
-#     except ValueError as e:
-#         print(f"Error parsing dates: {e}")
-#     return  data_dict
-# def mrz_add(dictionary_image_half):
-#     mrz_2 = dictionary_image_half['mrz2']
-#     mrz_1 = dictionary_image_half['mrz1']
-#     mrz_data_dict = {}
-#     pattern_surname = r'P<JOR([^<]+)'
-#     match_surname = re.search(pattern_surname, mrz_1)
-#     if match_surname:
-#         mrz_data_dict['last_name_mrz'] = match_surname.group(1)
-#     pattern_given_names = r'<([^<]+)<([^<]+)<([^<]+)<<'
-#     match_given_names = re.search(pattern_given_names, mrz_1)
-#     if match_given_names:
-#         mrz_data_dict['first_name_mrz'] = match_given_names.group(1)
-#         mrz_data_dict['middle_name_1'] = match_given_names.group(2)
-#         mrz_data_dict['middle_name_2'] = match_given_names.group(3)
-#     pattern_passport = r'^([A-Z0-9]+)<'
-#     match_passport = re.search(pattern_passport, mrz_2)
-#     if match_passport:
-#         passport_number = match_passport.group(1)
-#         mrz_data_dict['passport_number'] = passport_number
-#     pattern_nationality = r'<.[A-Z]{3}'
-#     match_nationality = re.search(pattern_nationality, mrz_2)
-#     if match_nationality:
-#         nationality = match_nationality.group(0)[2:]
-#         mrz_data_dict['nationality'] = nationality
-#     pattern_birth_date = r'\d{7}<([0-9]{6})'
-#     match_birth_date = re.search(pattern_birth_date, mrz_2)
-#     if match_birth_date:
-#         birth_date_raw = match_birth_date.group(1)
-#         year_prefix = '19' if int(birth_date_raw[:2]) > 23 else '20'
-#         birth_date = f"{birth_date_raw[4:]}/{birth_date_raw[2:4]}/{year_prefix}{birth_date_raw[:2]}"
-#         mrz_data_dict['dob'] = birth_date
-#     pattern_gender = r'([MF])'
-#     match_gender = re.search(pattern_gender, mrz_2)
-#     if match_gender:
-#         gender = match_gender.group(1)
-#         mrz_data_dict['gender'] = gender
-#     pattern_expiry_date = r'[MF](\d{6})'
-#     match_expiry_date = re.search(pattern_expiry_date, mrz_2)
-#     if match_expiry_date:
-#         expiry_date_raw = match_expiry_date.group(1)
-#         year_prefix = '19' if int(expiry_date_raw[:2]) > 50 else '20'
-#         expiry_date = f"{expiry_date_raw[4:]}/{expiry_date_raw[2:4]}/{year_prefix}{expiry_date_raw[:2]}"
-#         mrz_data_dict['expiry_date'] = expiry_date
-#         for key, value in mrz_data_dict.items():
-#             if key in dictionary_image_half and dictionary_image_half[key] in ['None', None, 'N/A', '', ' ', 'NaN', 'nan', 'null']:
-#                 dictionary_image_half[key] = value
-#             elif key not in dictionary_image_half:
-#                 dictionary_image_half[key] = value
-#         if len(dictionary_image_half['last_name']) > 1:
-#             # Substitute last_name with last_name_mrz
-#             dictionary_image_half['last_name'] = dictionary_image_half['last_name_mrz']
-#     return dictionary_image_half
-# def translate_arabic_words(dictionary):
-#     translator = Translator()
-#     translated_dict = {}
-#     for key, value in dictionary.items():
-#         if key not in ['mrz1', 'mrz2']:
-#             if isinstance(value, str):
-#                 detected_lang = translator.detect(value).lang
-#                 if detected_lang == 'ar':
-#                     translated_text = translator.translate(value, src='ar', dest='en').text
-#                     translated_dict[key] = translated_text
-#                 else:
-#                     translated_dict[key] = value
-#             else:
-#                 translated_dict[key] = value
-#         else:
-#             translated_dict[key] = value
-#     return translated_dict
-# def make_api_request_with_retries(prompt: str, max_retries: int = 3, delay_seconds: float = 2):
-#     """
-#     Helper function to make API requests with retry logic using OpenAI
-#     """
-#     start_time = time.time()
-#     for attempt in range(max_retries):
-#         try:
-#             response = openai.ChatCompletion.create(
-#                 model="gpt-4o",
-#                 temperature=0.4,
-#                 max_tokens=2000,
-#                 messages=[
-#                     {
-#                         "role": "user",
-#                         "content": prompt
-#                     }
-#                 ]
-#             )
-#             result = response.choices[0].message.content
-#             try:
-#                 api_response = json.loads(result)
-#             except json.JSONDecodeError:
-#                 try:
-#                     json_match = re.search(r'```(json|python|plaintext)?\s*(.*?)\s*```|\s*({.*?})', result, re.DOTALL)
-#                     if json_match:
-#                         json_str = json_match.group(2) or json_match.group(3)
-#                         try:
-#                             api_response = json.loads(json_str)
-#                         except:
-#                             api_response = eval(json_str.replace("'", '"'))
-#                     else:
-#                         raise json.JSONDecodeError("No JSON found in response", result, 0)
-#                 except Exception as e:
-#                     print(f"Error parsing response: {str(e)}")
-#                     raise
-#             # print(f"GenAI request took {time.time() - start_time:.2f} seconds")
-#             return api_response
-#         except Exception as e:
-#             print(f"Error during API request (attempt {attempt + 1} of {max_retries}): {str(e)}")
-#             if attempt < max_retries - 1:
-#                 time.sleep(delay_seconds)
-#             else:
-#                 raise Exception(f"Max retries exceeded. Last error: {str(e)}")
-# def jordan_passport_extraction(passport_text, api_key):
-#     start_time = time.time()
-#     try:
-#         prompt = f"From provided text, give me all required information in english only. full_name, first_name, last_name, mother_name, passport_number, national_number, dob(Date of Birth dd/mm/yyyy format), Place of Birth, gender(M/F), issuing_date(dd/mm/yyyy format), expiry_date (dd/mm/yyyy format), Place of Issue, nationality, and both lines of the MRZ(mrz1, mrz2). Please give me just dictionary dont write anything else - full_name, first_name, last_name, mother_name, passport_number, national_number, dob, place_of_birth, gender, issuing_date, expiry_date, issuing_place, nationality, mrz1, mrz2. Note that mrz1 is the line that starts with P<JOR and mrz2 is the line that starts with passport number. Also note if you are unable to find the passport number directly then use mrz2 initial words that comes before the symbol '<' as the passport number. If there are any arabic words in mother_name, or place_of_birth, or authority, just keep the english words, do not ever include arabic words in the output. Leave National No. empty if not found. Here's the text: {passport_text}"
-#         passport_final_result = make_api_request_with_retries(prompt)
-#         if 'national_number' in passport_final_result:
-#             passport_final_result['passport_national_number'] = passport_final_result.get('national_number', '')
-#         # print(f"\nPassport GenAI result: {passport_final_result}\n")
-#         # try:
-#         #     passport_final_result = swap_dates_if_needed(passport_final_result)
-#         # except Exception as e:
-#         #     print(f"Error swapping dates: {e}")
-#         # try:
-#         #     passport_final_result = translate_arabic_words(passport_final_result)
-#         # except Exception as e:
-#         #     print(f"Error translating: {e}")
-#         if passport_final_result and not passport_final_result.get('passport_number', ''):
-#             passport_number_pattern = r"([A-Za-z]\d{8}|[A-Za-z]\d{7}|[A-Za-z]\d{6})"
-#             passport_number_match = re.search(passport_number_pattern, passport_text)
-#             if passport_number_match:
-#                 passport_number = passport_number_match.group(0)
-#                 if passport_number:
-#                     passport_final_result['passport_number'] = passport_number
-#                 else:
-#                     passport_number_match = re.search(passport_number_pattern, passport_final_result.get('mrz2', ''))
-#                     if passport_number_match:
-#                         passport_number = passport_number_match.group(0)
-#                         passport_final_result['passport_number'] = passport_number
-#         mrz1 = passport_final_result.get('mrz1', '')
-#         mrz2 = passport_final_result.get('mrz2', '')
-#         if mrz1 and mrz2:
-#             passport_final_result['mrz'] = f"{mrz1} {mrz2}"
-#         if "gender" in passport_final_result:
-#             gender = passport_final_result["gender"].strip().upper()
-#             if gender == "F":
-#                 passport_final_result["gender"] = "FEMALE"
-#             elif gender == "M":
-#                 passport_final_result["gender"] = "MALE"
-#         if 'gender' in passport_final_result:
-#             passport_final_result["gender"] = passport_final_result["gender"].strip().upper()
-#         if 'issuing_place' in passport_final_result:
-#             passport_final_result['place_of_issue'] = passport_final_result['issuing_place'].strip().upper()
-#         if passport_final_result.get('nationality', '') and len(passport_final_result['nationality']) > 3:
-#             passport_final_result['nationality'] = 'JOR'
-#         if not passport_final_result.get('nationality', ''):
-#             passport_final_result['nationality'] = 'JOR'
-#         passport_final_result['issuing_country'] = 'JOR'
-#         processing_time = time.time() - start_time
-#         return passport_final_result
-#     except Exception as e:
-#         processing_time = time.time() - start_time
-#         print(f"Error occurred in passport extraction: {e}")
-#         print(f"Failed processing took {processing_time:.2f} seconds")
-#         return {}
 import base64
 import time

idvpackage 3.0.9__py3-none-any.whl → 3.0.11__py3-none-any.whl

idvpackage 3.0.9py3-none-any.whl → 3.0.11py3-none-any.whl