idvpackage 3.0.11__py3-none-any.whl → 3.0.12__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
idvpackage/ocr.py CHANGED
@@ -1,39 +1,30 @@
1
- from google.oauth2.service_account import Credentials
2
- from google.cloud import vision_v1
3
- import json
4
- import pycountry
5
- from googletrans import Translator
6
- from PIL import Image, ImageEnhance, ImageOps, ImageFilter
7
1
  import base64
8
2
  import io
9
- import numpy as np
10
- import cv2
11
- from datetime import datetime, timedelta
12
- import tempfile
13
- from rapidfuzz import fuzz
14
- import face_recognition
15
- import re
3
+ import json
4
+ import logging
16
5
  import os
17
- from idvpackage.constants import BRIGHTNESS_THRESHOLD, BLUR_THRESHOLD
18
- from io import BytesIO
6
+ import tempfile
19
7
  import time
20
- import logging
21
-
22
- # import anthropic
23
- import openai
24
- from idvpackage.blur_detection import is_image_blur
25
- # from idvpackage.common import (
26
- # # load_and_process_image_deepface,
27
- # load_and_process_image_deepface_all_orientations
28
- # )
8
+ from datetime import datetime, timedelta
9
+ from io import BytesIO
29
10
 
11
+ import cv2
12
+ import face_recognition
13
+ import numpy as np
14
+ import pycountry
15
+ from google.cloud import vision_v1
16
+ from google.oauth2.service_account import Credentials
17
+ from PIL import Image, ImageEnhance
30
18
 
31
- import openai
32
- from concurrent.futures import ThreadPoolExecutor, as_completed
33
- # from idvpackage.blur_detection import is_image_blur
19
+ from idvpackage.constants import BLUR_THRESHOLD, BRIGHTNESS_THRESHOLD
34
20
 
35
21
 
36
- logging.basicConfig(level=logging.INFO)
22
+ logging.basicConfig(
23
+ level=logging.INFO,
24
+ format="%(asctime)s - %(levelname)s - %(filename)s:%(lineno)d - %(message)s",
25
+ datefmt="%Y-%m-%d %H:%M:%S",
26
+ force=True,
27
+ )
37
28
 
38
29
  google_client_dict = {}
39
30
 
@@ -44,54 +35,24 @@ class IdentityVerification:
44
35
  This is the initialization function of a class that imports a spoof model and loads an OCR
45
36
  reader.
46
37
  """
47
-
48
- # logs all keys
49
- # logging.info(f"API Key: {api_key[0:8]}, GenAI Key: {genai_key}")
50
38
  try:
51
- # TODO: Make gemni and openai keys work for stg and dev environment. It's different argument placement. One code to work for both stg and dev.
52
39
  st = time.time()
53
40
  credentials_dict = json.loads(credentials_string)
54
41
  credentials = Credentials.from_service_account_info(credentials_dict)
55
-
56
42
  self.client = google_client_dict.get(credentials)
43
+
57
44
  if not self.client:
58
45
  self.client = vision_v1.ImageAnnotatorClient(credentials=credentials)
59
46
  google_client_dict[credentials] = self.client
60
47
 
61
-
62
- # self.genai_client = anthropic.Anthropic(
63
- # api_key=genai_key,
64
- # )
65
-
66
48
  self.openai_key = genai_key
67
49
 
68
- self.translator = Translator()
69
50
  self.iso_nationalities = [country.alpha_3 for country in pycountry.countries]
70
51
  logging.info(f"\nInitialization time inside IDV Package: {time.time() - st}")
71
52
 
72
53
  except Exception as e:
73
54
  logging.error(f"Error during initialization: {e}")
74
55
 
75
-
76
- def preprocess_image(
77
- self, image, sharpness=1.0, contrast=2.0, radius=2, percent=150, threshold=3
78
- ):
79
- """Preprocess the image by sharpening and enhancing contrast."""
80
-
81
- # Apply sharpening
82
- enhancer = ImageEnhance.Sharpness(image)
83
- image = enhancer.enhance(sharpness) # Sharpen the image (increase sharpness)
84
-
85
- # Enhance the contrast
86
- enhancer = ImageEnhance.Contrast(image)
87
- image = enhancer.enhance(contrast) # Increase contrast
88
-
89
- image = image.filter(
90
- ImageFilter.UnsharpMask(radius=radius, percent=percent, threshold=threshold)
91
- )
92
-
93
- return image
94
-
95
56
  def image_conversion(self, image):
96
57
  """
97
58
  This function decodes a base64 string data and returns an image object.
@@ -113,7 +74,7 @@ class IdentityVerification:
113
74
  img = img.copy()
114
75
  return img
115
76
 
116
- def rgb2yuv(self, img):
77
+
117
78
  """
118
79
  Convert an RGB image to YUV format.
119
80
  """
@@ -123,50 +84,11 @@ class IdentityVerification:
123
84
  except Exception as e:
124
85
  raise Exception(f"Error: {e}")
125
86
 
126
- def find_bright_areas(self, image, brightness_threshold):
127
- gray_image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
128
- thresh_image = cv2.threshold(
129
- gray_image, brightness_threshold, 255, cv2.THRESH_BINARY
130
- )[1]
131
- contours, hierarchy = cv2.findContours(
132
- thresh_image, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE
133
- )
134
-
135
- bright_areas = []
136
-
137
- for contour in contours:
138
- bounding_box = cv2.boundingRect(contour)
139
-
140
- area = bounding_box[2] * bounding_box[3]
141
-
142
- if area > 800:
143
- bright_areas.append(bounding_box)
144
-
145
- return len(bright_areas)
146
-
147
87
  def is_blurry(self, image):
148
88
  gray_image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
149
-
150
89
  laplacian_variance = cv2.Laplacian(gray_image, cv2.CV_64F).var()
151
-
152
90
  return laplacian_variance
153
-
154
- def identify_input_type(self, data):
155
- if isinstance(data, bytes):
156
- return "video_bytes"
157
- else:
158
- pass
159
-
160
- try:
161
- decoded_data = base64.b64decode(data)
162
-
163
- if decoded_data:
164
- return "base_64"
165
- except Exception:
166
- pass
167
-
168
- return "unknown"
169
-
91
+
170
92
  def sharpen_image(self, image):
171
93
  kernel = np.array([[-1, -1, -1], [-1, 9, -1], [-1, -1, -1]])
172
94
  return cv2.filter2D(image, -1, kernel)
@@ -183,16 +105,8 @@ class IdentityVerification:
183
105
  enhanced_image = enhancer.enhance(factor)
184
106
  return np.array(enhanced_image)
185
107
 
186
- def enhance_quality(self, image):
187
- sharpened_image = self.sharpen_image(image)
188
- enhanced_image = self.adjust_brightness(sharpened_image, 1.2)
189
- enhanced_contrast = self.adjust_contrast(enhanced_image, 1.2)
190
- # grayscale_image = cv2.cvtColor(enhanced_contrast, cv2.COLOR_BGR2GRAY)
191
-
192
- return enhanced_contrast
193
-
194
-
195
108
  def check_document_quality(self, data):
109
+
196
110
  video_quality = {"error": ""}
197
111
  temp_video_file = tempfile.NamedTemporaryFile(delete=False)
198
112
  temp_video_file_path = temp_video_file.name
@@ -234,14 +148,12 @@ class IdentityVerification:
234
148
  return video_quality
235
149
 
236
150
  finally:
237
-
238
151
  if video_capture is not None:
239
152
  try:
240
153
  video_capture.release()
241
154
  except Exception:
242
155
  pass
243
156
  del video_capture
244
-
245
157
  # Remove temp file
246
158
  if temp_video_file_path and os.path.exists(temp_video_file_path):
247
159
  try:
@@ -252,13 +164,10 @@ class IdentityVerification:
252
164
  # Force cleanup of OpenCV / numpy memory
253
165
  import gc
254
166
  gc.collect()
255
-
256
167
 
257
168
  def extract_selfie_from_video(self, video_capture):
258
169
  """Extract the best quality selfie from video with speed optimizations for frontal faces."""
259
170
  video_dict = {'error': ''}
260
-
261
-
262
171
  try:
263
172
  # Get rotation metadata from video
264
173
  try:
@@ -287,7 +196,6 @@ class IdentityVerification:
287
196
 
288
197
  best_face = None
289
198
  best_score = -1
290
- best_frame_position = None
291
199
  best_frame = None
292
200
 
293
201
  logging.info(f"Analyzing video with {total_frames} frames")
@@ -321,10 +229,6 @@ class IdentityVerification:
321
229
  encode_params = [cv2.IMWRITE_JPEG_QUALITY, 90]
322
230
  _, buffer = cv2.imencode(".jpg", small_frame, encode_params)
323
231
 
324
- # image = vision_v1.Image(content=buffer.tobytes())
325
- # response = self.client.face_detection(image=image, max_results=2)
326
- # faces = response.face_annotations
327
-
328
232
  image_bytes = buffer.tobytes()
329
233
  del buffer
330
234
 
@@ -442,14 +346,6 @@ class IdentityVerification:
442
346
  }
443
347
 
444
348
  if frame_best_face is not None:
445
-
446
-
447
- # frame_results.append({
448
- # 'frame': target_frame,
449
- # 'face': frame_best_face,
450
- # 'score': frame_best_score,
451
- # 'frame_data': frame.copy()
452
- # })
453
349
  logging.info(f"Frame {target_frame}: Best face score {frame_best_score:.2f} "
454
350
  f"(Frontal: {frame_best_face['frontal_score']:.2f}, "
455
351
  f"Center: {frame_best_face['center_score']:.2f}, "
@@ -460,29 +356,11 @@ class IdentityVerification:
460
356
  best_score = frame_best_score
461
357
  best_face = frame_best_face
462
358
  best_frame = frame.copy()
463
- best_frame_position = target_frame
464
359
 
465
360
  except Exception as e:
466
361
  logging.info(f"Error processing frame {target_frame}: {e}")
467
362
  continue
468
-
469
- # # Process results
470
- # if len(frame_results) > 0:
471
- # # Sort faces by score
472
- # frame_results.sort(key=lambda x: x['score'], reverse=True)
473
-
474
- # for i, result in enumerate(frame_results[:min(3, len(frame_results))]):
475
- # face_info = result['face']
476
- # print(f"Rank {i+1}: Frame {face_info['frame']}, "
477
- # f"Score: {result['score']:.2f}, "
478
- # f"Frontal: {face_info['frontal_score']:.2f}, "
479
- # f"Center: {face_info['center_score']:.2f}")
480
-
481
- # Use the best frame
482
- # best_frame = frame_best_face['frame']
483
-
484
- # print(f"Selected frame {best_face['frame']} as best selfie")
485
-
363
+
486
364
  if best_face and best_frame is not None:
487
365
  try:
488
366
  left = best_face['left']
@@ -499,7 +377,7 @@ class IdentityVerification:
499
377
  video_dict['error'] = 'invalid_cropped_face'
500
378
  return video_dict
501
379
 
502
- print(f"Face shape: {cropped_face.shape}")
380
+ logging.info(f"Face shape: {cropped_face.shape}")
503
381
  return cropped_face
504
382
 
505
383
  except Exception as e:
@@ -514,8 +392,6 @@ class IdentityVerification:
514
392
  video_dict['error'] = 'video_processing_error'
515
393
  return video_dict
516
394
 
517
-
518
-
519
395
  def is_colored(self, base64_image):
520
396
  img = self.image_conversion(base64_image)
521
397
  img = np.array(img)
@@ -541,46 +417,7 @@ class IdentityVerification:
541
417
 
542
418
  return blurred, glare
543
419
 
544
- def standardize_date(self, input_date):
545
- input_formats = [
546
- "%Y/%m/%d",
547
- "%m/%d/%Y",
548
- "%m-%d-%Y",
549
- "%Y-%m-%d",
550
- "%d-%m-%Y",
551
- "%d/%m/%Y",
552
- "%Y%m%d",
553
- "%m%d%Y",
554
- "%d%m%Y",
555
- "%Y.%m.%d",
556
- "%d.%m.%Y",
557
- "%m.%d.%Y",
558
- "%Y %m %d",
559
- "%d %m %Y",
560
- "%m %d %Y",
561
- ]
562
-
563
- for format in input_formats:
564
- try:
565
- parsed_date = datetime.strptime(input_date, format)
566
- standardized_date = parsed_date.strftime("%d/%m/%Y")
567
- return standardized_date
568
- except ValueError:
569
- pass
570
-
571
- return None
572
-
573
- def compare_dates(self, date_str1, date_str2):
574
- date_format = "%d/%m/%Y"
575
-
576
- date1 = datetime.strptime(date_str1, date_format)
577
- date2 = datetime.strptime(date_str2, date_format)
578
-
579
- if date1 == date2:
580
- return True
581
- else:
582
- return False
583
-
420
+
584
421
  def check_nationality_in_iso_list(self, nationality):
585
422
  try:
586
423
  if len(nationality) > 3:
@@ -589,185 +426,22 @@ class IdentityVerification:
589
426
  nationality = country.alpha_3
590
427
  except:
591
428
  return "consider"
592
-
593
429
  ## Handling case for OMN as it comes as MN, due to O being considered as 0
594
430
  if nationality.upper() == "MN":
595
431
  nationality = "OMN"
596
-
597
432
  if nationality.upper() in self.iso_nationalities:
598
433
  return "clear"
599
434
  else:
600
435
  return "consider"
601
-
602
436
  except:
603
437
  return "consider"
604
438
 
605
- def get_face_orientation(self, face_landmarks):
606
- left_eye = np.array(face_landmarks["left_eye"]).mean(axis=0)
607
- right_eye = np.array(face_landmarks["right_eye"]).mean(axis=0)
608
-
609
- eye_slope = (right_eye[1] - left_eye[1]) / (right_eye[0] - left_eye[0])
610
- angle = np.degrees(np.arctan(eye_slope))
611
-
612
- return angle
613
-
614
- def rotate_image(self, img):
615
- from skimage.transform import radon
616
-
617
- img_array = np.array(img)
618
-
619
- if len(img_array.shape) == 2:
620
- gray = img_array
621
- else:
622
- gray = cv2.cvtColor(img_array, cv2.COLOR_RGB2GRAY)
623
-
624
- h, w = gray.shape
625
- if w > 640:
626
- gray = cv2.resize(gray, (640, int((h / w) * 640)))
627
- gray = gray - np.mean(gray)
628
- sinogram = radon(gray)
629
- r = np.array(
630
- [np.sqrt(np.mean(np.abs(line) ** 2)) for line in sinogram.transpose()]
631
- )
632
- rotation = np.argmax(r)
633
- angle = round(abs(90 - rotation) + 0.5)
634
-
635
- if abs(angle) > 5:
636
- rotated_img = img.rotate(angle, expand=True)
637
- return rotated_img
638
-
639
- return img
640
-
641
- def load_and_process_image_fr(self, base64_image, arr=False):
642
- try:
643
- if not arr:
644
- img = self.image_conversion(base64_image)
645
- img = np.array(img)
646
- image = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
647
- else:
648
- if base64_image.dtype != np.uint8:
649
- base64_image = base64_image.astype(np.uint8)
650
-
651
- image = cv2.cvtColor(base64_image, cv2.COLOR_BGR2RGB)
652
-
653
- # base64_image = base64_image.split(',')[-1]
654
- # image_data = base64.b64decode(base64_image)
655
- # image_file = io.BytesIO(image_data)
656
-
657
- # image = face_recognition.load_image_file(image_file)
658
-
659
- face_locations = []
660
- face_locations = face_recognition.face_locations(image)
661
-
662
- if not face_locations:
663
- return [], []
664
-
665
- face_encodings = []
666
- face_encodings = face_recognition.face_encodings(image, face_locations)
667
-
668
- return face_locations, face_encodings
669
- except:
670
- return [], []
671
-
672
439
  def calculate_similarity(self, face_encoding1, face_encoding2):
673
440
  similarity_score = (
674
441
  1 - face_recognition.face_distance([face_encoding1], face_encoding2)[0]
675
442
  )
676
443
  return round(similarity_score + 0.25, 2)
677
444
 
678
- def try_detect_face_at_angles(self, image_array):
679
- """Try detecting faces at different angles and return the first successful detection"""
680
- angles = [0, 90, 180, 270]
681
- best_result = ([], [])
682
-
683
- for angle in angles:
684
- if angle == 0:
685
- rotated = image_array
686
- else:
687
- rotated = np.array(
688
- Image.fromarray(image_array).rotate(angle, expand=True)
689
- )
690
-
691
- face_locs, face_encs = self.load_and_process_image_fr(rotated, arr=True)
692
- if face_locs:
693
- return face_locs, face_encs, angle
694
-
695
- return [], [], None
696
-
697
- def try_detect_face_parallel(self, image_array):
698
- """Try detecting faces at different angles in parallel"""
699
-
700
- def check_angle(angle):
701
- try:
702
- # Create a copy of the image array to prevent memory issues
703
- image_copy = np.copy(image_array)
704
-
705
- if angle == 0:
706
- rotated = image_copy
707
- else:
708
- # Convert to PIL, rotate, and convert back to numpy
709
- pil_image = Image.fromarray(image_copy)
710
- rotated_pil = pil_image.rotate(angle, expand=True)
711
- rotated = np.array(rotated_pil)
712
-
713
- # Ensure the array is contiguous
714
- if not rotated.flags["C_CONTIGUOUS"]:
715
- rotated = np.ascontiguousarray(rotated)
716
-
717
- face_locs, face_encs = self.load_and_process_image_fr(rotated, arr=True)
718
- return (face_locs, face_encs, angle)
719
- except Exception as e:
720
- print(f"Error processing angle {angle}: {e}")
721
- return ([], [], None)
722
-
723
- angles = [0, 90, 180, 270]
724
- results = []
725
-
726
- try:
727
- with ThreadPoolExecutor(max_workers=4) as executor:
728
- futures = [executor.submit(check_angle, angle) for angle in angles]
729
-
730
- for future in as_completed(futures):
731
- try:
732
- face_locs, face_encs, angle = future.result()
733
- if face_locs:
734
- # Cancel remaining futures
735
- for f in futures:
736
- f.cancel()
737
- return face_locs, face_encs, angle
738
- results.append((face_locs, face_encs, angle))
739
- except Exception as e:
740
- print(f"Error getting future result: {e}")
741
- continue
742
- except Exception as e:
743
- print(f"Error in parallel processing: {e}")
744
-
745
- return [], [], None
746
-
747
- def try_detect_face_deepface(self, image_array):
748
- """Try detecting faces using DeepFace"""
749
- try:
750
- from idvpackage.common import load_and_process_image_deepface
751
-
752
- # Ensure image is in correct format
753
- if image_array is None:
754
- print("Image array is None")
755
- return [], [], None
756
-
757
- # Ensure we're working with RGB
758
- if len(image_array.shape) == 3 and image_array.shape[2] == 3:
759
- # If image is BGR, convert to RGB
760
- if image_array.dtype != np.uint8:
761
- image_array = image_array.astype(np.uint8)
762
- # No need to convert color space as the image should already be in RGB
763
- face_locs, face_encs = load_and_process_image_deepface(image_array)
764
- return face_locs, face_encs, None
765
- else:
766
- print(f"Unexpected image shape: {image_array.shape}")
767
- return [], [], None
768
- except Exception as e:
769
- print(f"Error in try_detect_face_deepface: {e}")
770
- return [], [], None
771
445
 
772
446
  def extract_face_and_compute_similarity(
773
447
  self, selfie, front_face_locations, front_face_encodings
@@ -776,7 +450,7 @@ class IdentityVerification:
776
450
 
777
451
  try:
778
452
  if selfie is None:
779
- print("Error: Selfie image is None")
453
+ logging.info("Error: Selfie image is None")
780
454
  return 0
781
455
 
782
456
  # Ensure the input array is contiguous and in the correct format
@@ -790,103 +464,31 @@ class IdentityVerification:
790
464
  else:
791
465
  selfie = selfie.astype(np.uint8)
792
466
 
793
- # Try DeepFace first as it's generally more reliable
794
- # start_time = time.time()
795
467
  face_locations1, face_encodings1 = load_and_process_image_deepface(selfie)
796
- # end_time = time.time()
797
-
468
+
798
469
  if not face_locations1 or not face_encodings1:
799
- print("No face detected in Selfie Video by DeepFace")
470
+ logging.info("No face detected in Selfie Video by DeepFace")
800
471
  return 0
801
-
802
- # print(f"Face detection took {end_time - start_time:.3f} seconds")
803
-
804
472
  face_locations2, face_encodings2 = (
805
473
  front_face_locations,
806
474
  front_face_encodings,
807
475
  )
808
-
476
+
809
477
  if not face_encodings2.any():
810
- print("No face detected in front ID")
478
+ logging.info("No face detected in front ID")
811
479
  return 0
812
480
 
813
- largest_face_index1 = face_locations1.index(
814
- max(
815
- face_locations1,
816
- key=lambda loc: (loc[2] - loc[0]) * (loc[3] - loc[1]),
817
- )
818
- )
819
- largest_face_index2 = face_locations2.index(
820
- max(
821
- face_locations2,
822
- key=lambda loc: (loc[2] - loc[0]) * (loc[3] - loc[1]),
823
- )
824
- )
825
-
826
- face_encoding1 = face_encodings1[largest_face_index1]
827
- face_encoding2 = face_encodings2[largest_face_index2]
828
-
829
- similarity_score = self.calculate_similarity(face_encoding1, face_encoding2)
830
- # print(f"Calculated similarity score: {similarity_score}")
831
-
481
+ similarity_score = self.calculate_similarity(face_encodings1[0], face_encodings2[0])
482
+ logging.info(f"Similarity score between selfie and front ID: {similarity_score}")
832
483
  return min(1, similarity_score)
833
484
 
834
485
  except Exception as e:
835
- print(f"Error in extract_face_and_compute_similarity: {e}")
836
- import traceback
837
-
838
- traceback.print_exc()
486
+ logging.info(f"Error in extract_face_and_compute_similarity: {e}")
487
+
839
488
  return 0
840
489
 
841
- def calculate_landmarks_movement(self, current_landmarks, previous_landmarks):
842
- return sum(
843
- abs(cur_point.position.x - prev_point.position.x)
844
- + abs(cur_point.position.y - prev_point.position.y)
845
- for cur_point, prev_point in zip(current_landmarks, previous_landmarks)
846
- )
847
-
848
- def calculate_face_movement(self, current_face, previous_face):
849
- return abs(current_face[0].x - previous_face[0].x) + abs(
850
- current_face[0].y - previous_face[0].y
851
- )
852
-
853
- def calculate_liveness_result(
854
- self, eyebrow_movement, nose_movement, lip_movement, face_movement
855
- ):
856
- eyebrow_movement_threshold = 15.0
857
- nose_movement_threshold = 15.0
858
- lip_movement_threshold = 15.0
859
- face_movement_threshold = 10.0
860
-
861
- if (
862
- eyebrow_movement > eyebrow_movement_threshold
863
- or nose_movement > nose_movement_threshold
864
- or lip_movement > lip_movement_threshold
865
- or face_movement > face_movement_threshold
866
- ):
867
- return True
868
- else:
869
- return False
870
-
871
- def detect_image_format(self, base64_image):
872
- import imghdr
873
490
 
874
- decoded_image = base64.b64decode(base64_image)
875
- format = imghdr.what(None, decoded_image)
876
-
877
- return format
878
-
879
- def frame_count_and_save(self, cap):
880
- frames = []
881
- status, frame = cap.read()
882
- while status:
883
- frames.append(frame)
884
- status, frame = cap.read()
885
-
886
- cap.release()
887
- return frames
888
-
889
- def check_for_liveness(self, similarity, video_bytes, face_match_threshold=0.59):
491
+ def check_for_liveness(self, video_bytes):
890
492
  st = time.time()
891
493
  from idvpackage.liveness_spoofing_v2 import test
892
494
 
@@ -905,109 +507,20 @@ class IdentityVerification:
905
507
  if result:
906
508
  return result
907
509
  except Exception as e:
908
- print(f"\nError in Liveness: {e}")
510
+ logging.info(f"\nError in Liveness: {e}")
909
511
  return None
910
512
 
911
513
  finally:
912
514
  # Ensure the temporary file is deleted
913
515
  if os.path.exists(temp_video_file_path):
914
516
  os.remove(temp_video_file_path)
915
- # print(f"Temporary file {temp_video_file_path} has been deleted.")
517
+ # logging.info(f"Temporary file {temp_video_file_path} has been deleted.")
916
518
  logging.info(
917
519
  f"--------------Time taken for Liveness and Spoofing in IDV package: {time.time() - st} seconds\n"
918
520
  )
919
521
 
920
- def reverse_date(self, date_str: str) -> str:
921
- """
922
- Convert a date from yyyy/mm/dd to dd/mm/yyyy, or vice versa.
923
- Returns empty string if invalid.
924
-
925
- TODO: vice versa conversion not implemented yet.
926
- """
927
- try:
928
- dt = datetime.strptime(date_str.strip(), "%Y/%m/%d")
929
- return dt.strftime("%d/%m/%Y")
930
- except ValueError:
931
- return ""
932
-
933
- # Example
934
-
935
- def convert_dob(self, input_date):
936
- day = input_date[4:6]
937
- month = input_date[2:4]
938
- year = input_date[0:2]
939
-
940
- current_year = datetime.now().year
941
- current_century = current_year // 100
942
- current_year_last_two_digits = current_year % 100
943
-
944
- century = current_century
945
- # If the given year is greater than the last two digits of the current year, assume last century
946
- if int(year) > current_year_last_two_digits:
947
- century = current_century - 1
948
-
949
- final_date = f"{day}/{month}/{century}{year}"
950
-
951
- return final_date
952
-
953
- def convert_expiry_date(self, input_date):
954
- day = input_date[4:6]
955
- month = input_date[2:4]
956
- year = input_date[0:2]
957
-
958
- current_year = datetime.now().year
959
- current_century = current_year // 100
960
- current_year_last_two_digits = current_year % 100
961
- century = current_century
962
-
963
- if int(year) <= current_year_last_two_digits:
964
- century = current_century
965
- else:
966
- century = current_century
967
- final_date = f"{day}/{month}/{century}{year}"
968
-
969
- return final_date
970
-
971
- def clean_string(self, input_string):
972
- cleaned_string = re.sub(r"[^\w\s]", " ", input_string)
973
- return cleaned_string.strip()
974
-
975
- def find_and_slice_number(input_number, digits):
976
- from itertools import permutations
977
-
978
- # Generate all possible permutations of the digits
979
- perms = ["".join(p) for p in permutations(digits)]
980
-
981
- # Initialize variables to keep track of the found pattern and its index
982
- found_pattern = None
983
- found_index = -1
984
-
985
- # Search for any permutation of the digits in the input_number
986
- for perm in perms:
987
- found_index = input_number.find(perm)
988
- if found_index != -1:
989
- found_pattern = perm
990
- break
991
-
992
- # If a pattern is found, slice the number accordingly
993
- if found_pattern:
994
- if found_index > len(input_number) - found_index - len(found_pattern):
995
- # Slice to the left
996
- sliced_number = input_number[: found_index + len(found_pattern)]
997
- else:
998
- # Slice to the right
999
- sliced_number = input_number[found_index:]
1000
-
1001
- return sliced_number
1002
- else:
1003
- return ""
1004
-
1005
522
  def get_ocr_results(self, processed_image, country=None, side=None):
1006
- # with io.BytesIO() as output:
1007
- # processed_image.save(output, format="PNG")
1008
- # image_data = output.getvalue()
1009
-
1010
- # image = vision_v1.types.Image(content=image_data)
523
+
1011
524
  logging.info(f"Getting OCR results for country: {country}, side: {side}")
1012
525
 
1013
526
  if country == "QAT" or country == "LBN" or country == "IRQ" or country == "SDN":
@@ -1031,36 +544,36 @@ class IdentityVerification:
1031
544
  ):
1032
545
  st = time.time()
1033
546
  document_data = {}
1034
-
547
+
1035
548
  logging.info(f"Starting extraction for document_type: {document_type}, country: {country}, side: {side}, nationality: {nationality}, \n step data: {step_data}")
1036
549
 
1037
550
  if country == "IRQ":
1038
551
  document_data = self.agent_extraction(
1039
- image, country, nationality, side, step_data
552
+ image, country,document_type, nationality, side, step_data
1040
553
  )
1041
554
  logging.info(
1042
- f"--------------Time taken for Front ID Extraction in IDV package: {time.time() - st} seconds\n"
555
+ f"--------------Time taken for {side} ID Extraction in IDV package: {time.time() - st} seconds\n"
1043
556
  )
1044
557
  return document_data
1045
558
 
1046
559
  if document_type == "national_id" and side == "front":
1047
560
  document_data = self.extract_front_id_info(image, country, nationality)
1048
561
  logging.info(
1049
- f"--------------Time taken for Front ID Extraction in IDV package: {time.time() - st} seconds\n"
562
+ f"--------------Time taken for {side} ID Extraction in IDV package: {time.time() - st} seconds\n"
1050
563
  )
1051
564
  return document_data
1052
565
 
1053
566
  if document_type == "national_id" and side == "back":
1054
567
  document_data = self.extract_back_id_info(image, country, nationality, step_data)
1055
568
  logging.info(
1056
- f"--------------Time taken for Back ID Extraction in IDV package: {time.time() - st} seconds\n"
569
+ f"--------------Time taken for {side} ID Extraction in IDV package: {time.time() - st} seconds\n"
1057
570
  )
1058
571
 
1059
572
  if document_type == "passport" and (
1060
573
  side == "first" or side == "page1" or side == ""
1061
574
  ):
1062
575
  logging.info(f"Starting passport extraction for side: {side} country: {country} nationality: {nationality}")
1063
- document_data = self.exract_passport_info(
576
+ document_data = self.extract_passport_info(
1064
577
  image, country, nationality, step_data
1065
578
  )
1066
579
  logging.info(
@@ -1078,326 +591,123 @@ class IdentityVerification:
1078
591
 
1079
592
  return document_data
1080
593
 
1081
-
1082
- def agent_extraction(self, front_id, country, nationality,side, step_data=None):
1083
- from idvpackage.ocr_utils import detect_photo_on_screen, detect_screenshot, document_on_printed_paper
1084
- from idvpackage.common import get_facial_encodings_deepface_irq, load_and_process_image_deepface, cosine_similarity
1085
- from idvpackage.iraq_id_extraction_withopenai import extraction_chain
1086
- import idvpackage.genai_utils as sanity_utils
1087
- result = {'error': '', "error_details": ''}
1088
-
1089
- logging.info(f'Starting agent_extraction for country: {country}, nationality: {nationality}, side: {side}')
1090
- try:
1091
- st = time.time()
1092
- processed_front_id = self.image_conversion(front_id)
1093
- logging.info(f'----------------Time taken for image conversion: {time.time() - st} seconds\n')
1094
- compressed_image = BytesIO()
1095
- processed_front_id.save(compressed_image, format="JPEG", quality=85, optimize=True)
1096
- compressed_image_data = compressed_image.getvalue()
1097
-
1098
- st = time.time()
1099
- front_id_text = self.get_ocr_results(compressed_image_data, country=country)
1100
- front_id_text_desc = front_id_text[0].description
1101
- logging.info(f'----------------Time taken for Google Vision API call: {time.time() - st} seconds\n')
594
+ def agent_extraction(self, image, country, document_type, nationality=None, side=None, step_data=None):
1102
595
 
1103
- #To allow for non-iraqi passports
1104
596
 
1105
- #when uploading passports later, after one has 'skipped passport' during onboading, we get nationality=None
597
+ #Handling non-iraqi passports
598
+ if nationality is not None and nationality != "IRQ" and (side == "page1" or side == "first"):
599
+ logging.info(f"Starting non-iraqi passport extraction for side: {side} country: {country} nationality: {nationality}")
600
+ data = self.extract_passport_info(image, country, nationality, step_data)
601
+ return data
1106
602
 
603
+ elif nationality is not None and nationality != "IRQ" and (side == "page2" or side == "last"):
604
+ logging.info(f"Starting non-iraqi passport extraction for side: {side} country: {country} nationality: {nationality}")
605
+ data = self.extract_passport_info_back(image, country, nationality, step_data)
606
+ return data
607
+
1107
608
 
1108
- if side in ['page1', 'first'] and nationality != 'IRQ' and nationality is not None:
1109
- front_data = self.exract_passport_info(front_id, country, nationality)
609
+ # Handling Iraqi National ID Front
610
+ elif document_type == "national_id" and side == "front":
611
+ data = self.extract_front_id_info(image, country, nationality)
612
+ return data
613
+
614
+ #Handling Iraqi National ID Back
615
+ elif document_type == "national_id" and side == "back":
616
+ data = self.extract_back_id_info(image, country, nationality, step_data)
617
+ return data
618
+
619
+ #handling Iraqi passport
620
+ elif document_type == "passport" and (side == "first" or side == "page1"):
621
+ data = self.extract_passport_info(image, country, nationality, step_data)
622
+ return data
1110
623
 
1111
- if front_data.get("name", ''):
1112
- front_data['full_name'] = front_data.get('name', '')
1113
- elif front_data.get('first_name') and front_data.get("last_name"):
1114
- front_data['full_name'] = front_data.get("first_name", '') + ' ' + front_data.get("last_name", "")
624
+ def image_conversion_and_compression(self, image):
625
+ """
626
+ This function decodes a base64 string data and returns an image object.
627
+ If the image is in RGBA mode, it is converted to RGB mode.
628
+ :return: an Image object that has been created from a base64 encoded string.
629
+ """
630
+
631
+ img_data = io.BytesIO(base64.decodebytes(bytes(image, "utf-8")))
632
+ with Image.open(img_data) as img:
633
+ if img.mode == "RGBA":
634
+ # Create a blank background image
635
+ background = Image.new("RGB", img.size, (255, 255, 255))
636
+ # Paste the image on the background.
637
+ background.paste(img, mask=img.split()[3]) # 3 is the alpha channel
638
+ img = background
639
+ else:
640
+ img = img.copy()
1115
641
 
1116
- # handle expired passports
1117
- expiry_date = (
1118
- front_data.get("expiry_date")
1119
- or front_data.get("date_of_expiry")
1120
- )
1121
- if expiry_date:
1122
- is_doc_expired = sanity_utils.is_expired_id(expiry_date)
642
+ compressed_image = BytesIO()
643
+ img.save(
644
+ compressed_image, format="JPEG", quality=75, optimize=True
645
+ )
646
+ compressed_image_data = compressed_image.getvalue()
1123
647
 
1124
- if is_doc_expired:
1125
- return {"error": "expired_id", "error_details": "expired ID"}
648
+ return img,compressed_image_data
1126
649
 
1127
- return front_data
650
+ def extract_front_id_info(self, front_id, country, nationality=None,):
1128
651
 
1129
- if side in ['page2','last'] and nationality!='IRQ':
1130
- front_data = self.exract_passport_info_back(front_id,country,nationality)
652
+ if country == "UAE":
653
+ try:
1131
654
 
1132
- if front_data.get("name",''):
1133
- front_data['full_name'] = front_data.get('name','')
1134
- elif front_data.get('first_name') and front_data.get("last_name"):
1135
- front_data['full_name'] = front_data.get("first_name",'')+ ' ' + front_data.get("last_name","")
655
+ from idvpackage.common import load_and_process_image_deepface
1136
656
 
1137
- #handle expired passports
1138
- expiry_date = (
1139
- front_data.get("expiry_date")
1140
- or front_data.get("date_of_expiry")
1141
- )
1142
- if expiry_date:
1143
- is_doc_expired = sanity_utils.is_expired_id(expiry_date)
657
+ from idvpackage.uae_id_extraction import get_response_from_openai_uae
1144
658
 
1145
- if is_doc_expired:
1146
- return {"error": "expired_id", "error_details": "expired ID"}
659
+ front_data = {"error": "", "doc_type": "national_identity_card"}
1147
660
 
1148
- return front_data
661
+ start_time = time.time()
662
+ logging.info(f"Starting extraction for UAE front ID")
663
+ output = get_response_from_openai_uae(front_id, "front", country, self.openai_key)
664
+ logging.info(f"Time taken for UAE front ID extraction: {time.time() - start_time} seconds")
665
+
666
+ logging.info(f"UAE front ID extraction output: {json.dumps(output, ensure_ascii=False, indent=2)}")
667
+ front_data.update(output)
668
+
669
+ if not output.get("header_verified", False):
670
+ front_data["error"] = "not_front_id"
671
+ return front_data
1149
672
 
1150
- #the extra side here is for testing. So that when we test, we can pass in side='auto', instead of passing front and back seperately.
1151
- st = time.time()
1152
- result_extraction, side = extraction_chain(ocr_text=front_id_text_desc, openai_key=self.openai_key, side=side)
1153
- logging.info(f'----------------Time taken for Extraction Chain (openAI + langchain call): {time.time() - st} seconds\n')
673
+ dob = front_data.get('dob', '')
674
+ try:
675
+ if dob:
676
+ logging.info(f"Extracted DOB for age verification: {dob}")
677
+ from idvpackage.ocr_utils import is_age_18_above
678
+ is_legal_age = is_age_18_above(dob)
679
+ logging.info(f"Is legal age (18+): {is_legal_age}")
680
+ if not is_legal_age:
681
+ front_data['error'] = 'under_age'
682
+ return front_data
1154
683
 
1155
- if result_extraction['error']:
1156
- return result_extraction
1157
- result.update(result_extraction)
684
+ except Exception as e:
685
+ logging.info(f"Error in age calculation: {e}")
686
+
687
+
688
+ expiry_date = front_data.get('expiry_date', '')
689
+ try:
690
+ if expiry_date:
691
+ logging.info(f"Extracted Expiry Date for expiry verification: {expiry_date}")
692
+ from idvpackage.ocr_utils import is_expired_id
693
+ if is_expired_id(expiry_date):
694
+ front_data['error'] = 'expired_id'
695
+ logging.info(f"ID is expired with expiry date: {expiry_date}")
696
+ return front_data
697
+ except Exception as e:
698
+ logging.info(f"Error in expiry date calculation: {e}")
1158
699
 
700
+
1159
701
 
1160
- st = time.time()
1161
- image = np.array(processed_front_id)
1162
- doc_on_pp_result = 'clear'
1163
- template_result = 'clear'
1164
- logo_result = 'clear'
702
+ front_face_locations, front_face_encodings = (
703
+ load_and_process_image_deepface(front_id)
704
+ )
1165
705
 
1166
- #uses google vision api
1167
- st = time.time()
1168
- # screenshot_result = detect_screenshot(self.client, front_id)
1169
- # #
1170
- # # #uses google vision api
1171
- # photo_on_screen_result = detect_photo_on_screen(self.client, front_id)
1172
- # valid_nationality_result = self.check_nationality_in_iso_list(result.get('nationality',''))
1173
- # front_blurred, front_glare = self.get_blurred_and_glared_for_doc(image)
1174
- #logging.info(f'----------------Time taken for fraud detection attributes: {time.time() - st} seconds\n')
1175
-
1176
-
1177
- logging.info(f'----------------Time taken for gvision api calls for: detect screensot, detect_photo_on_screen, get_blurred_and_glared_for_doc: {time.time() - st} seconds\n')
1178
-
1179
- # if side=='front' or side=='page1':
1180
- # st = time.time()
1181
- # front_face_locations, front_face_encodings = load_and_process_image_deepface(front_id)
1182
- # logging.info(f'----------------Time taken for face extraction: {time.time() - st} seconds\n')
1183
- #
1184
- # front_face_locations_str = json.dumps([tuple(face_loc) for face_loc in front_face_locations])
1185
- # front_face_encodings_str = json.dumps([face_enc.tolist() for face_enc in front_face_encodings])
1186
- #
1187
- #
1188
- # if step_data:
1189
- # try:
1190
- # print(f"Matching face from Passport with National ID")
1191
- # national_id_front_face_locations = step_data.get("front_face_locations")
1192
- # national_id_front_face_encodings = json.loads(step_data.get("front_face_encodings"))
1193
- # st = time.time()
1194
- # national_id_front_face_encodings = np.array(national_id_front_face_encodings[0], dtype=np.float32)
1195
- # similarity = self.calculate_similarity(national_id_front_face_encodings, front_face_encodings[0])
1196
- # logging.info(f'----------------Time taken for face extraction for matching passport with front id: {time.time() - st} seconds\n')
1197
- # result["similarity_score"] = similarity
1198
- # print(f"Front ID and Passport similarity score: {similarity}")
1199
- # if similarity <= 0.65:
1200
- # result["error"] = 'face_mismatch'
1201
- # return {"error":"face_mismatch", "error_details":"Front ID and Passport Face dont match."}
1202
- # except Exception as e:
1203
- # result["error"] = 'National ID Image Not Found'
1204
- # result["error_details"] = e
1205
- # return result
1206
-
1207
-
1208
- if side=='front' or side=='page1':
706
+ if front_face_encodings is None or len(front_face_encodings) == 0:
707
+ front_data['error'] = 'face_not_detected'
708
+ logging.info("No face detected in front image")
709
+ return front_data
1209
710
 
1210
- st = time.time()
1211
- front_face_locations, front_face_encodings = load_and_process_image_deepface(front_id)
1212
- logging.info(f'----------------Time taken for face extraction: {time.time() - st} seconds\n')
1213
-
1214
- front_face_locations_str = json.dumps([tuple(face_loc) for face_loc in front_face_locations])
1215
- front_face_encodings_str = json.dumps([face_enc.tolist() for face_enc in front_face_encodings])
1216
-
1217
-
1218
- if step_data:
1219
- try:
1220
- print(f"Matching face from Passport with National ID")
1221
- national_id_front_face_locations = json.loads(step_data.get("front_face_locations"))
1222
- national_id_front_face_encodings = json.loads(step_data.get("front_face_encodings"))
1223
- st = time.time()
1224
-
1225
- largest_face_index1 = national_id_front_face_locations.index(
1226
- max(national_id_front_face_locations, key=lambda loc: (loc[2] - loc[0]) * (loc[3] - loc[1])))
1227
- largest_face_index2 = front_face_locations.index(
1228
- max(front_face_locations, key=lambda loc: (loc[2] - loc[0]) * (loc[3] - loc[1])))
1229
-
1230
- face_encoding1 = national_id_front_face_encodings[largest_face_index1]
1231
- face_encoding2 = front_face_encodings[largest_face_index2]
1232
-
1233
- similarity = self.calculate_similarity(face_encoding1, face_encoding2)
1234
-
1235
- logging.info(
1236
- f'----------------Time taken for face extraction for matching passport with front id: {time.time() - st} seconds\n')
1237
- result["similarity_score"] = similarity
1238
- print(f"Front ID and Passport similarity score: {similarity}")
1239
- if similarity <= 0.65:
1240
- result["error"] = 'face_mismatch'
1241
- return {"error": "face_mismatch",
1242
- "error_details": "Front ID and Passport Face dont match."}
1243
-
1244
- except Exception as e:
1245
- result["error"] = 'covered_photo'
1246
- result["error_details"] = e
1247
- return result
1248
-
1249
- if side=='front':
1250
- data_temp = {
1251
- 'front_extracted_data': front_id_text_desc,
1252
- f'translated_{side}_id_text':'',
1253
- 'front_coloured': True,
1254
- 'back_coloured':True,
1255
- 'front_doc_on_pp': doc_on_pp_result,
1256
- 'front_logo_result': logo_result,
1257
- 'front_template_result': template_result,
1258
- 'front_screenshot_result': '',
1259
- 'front_photo_on_screen_result': '',
1260
- 'front_blurred': '',
1261
- 'front_glare': '',
1262
- 'front_face_locations': front_face_locations_str,
1263
- 'front_face_encodings': front_face_encodings_str,
1264
- 'front_tampered_result': 'clear',
1265
- 'issuing_country':'IRQ',
1266
- 'valid_nationality': 'valid_nationality_result'
1267
- }
1268
-
1269
- elif side=='page1':
1270
- data_temp = {
1271
- # 'back_tampered_result': tampered_result_back,
1272
- 'passport_data': front_id_text_desc,
1273
- 'front_coloured': True,
1274
- 'back_coloured': True,
1275
- 'front_logo_result': 'clear',
1276
- 'front_doc_on_pp': doc_on_pp_result,
1277
- 'front_screenshot_result': 'screenshot_result',
1278
- 'front_photo_on_screen_result': 'photo_on_screen_result',
1279
- 'doc_on_pp': doc_on_pp_result,
1280
- 'screenshot_result': 'screenshot_result',
1281
- 'photo_on_screen_result': 'photo_on_screen_result',
1282
- 'front_blurred': 'front_blurred',
1283
- 'front_glare': 'front_glare',
1284
- 'back_blurred': 'front_blurred',
1285
- 'back_glare': 'front_glare',
1286
- 'front_face_locations': front_face_locations_str,
1287
- 'front_face_encodings': front_face_encodings_str,
1288
- 'valid_nationality': 'valid_nationality_result',
1289
- 'issuing_country': 'IRQ',
1290
- 'nationality_received': nationality
1291
- }
1292
- else:
1293
- data_temp={}
1294
-
1295
- result.update(data_temp)
1296
- required_keys = ["front_face_locations", "front_face_encodings"]
1297
- empty_string_keys = [key for key, value in result.items() if key in required_keys and value == '']
1298
- empty_string_keys = [key for key, value in result.items() if key in required_keys and value == '[]']
1299
-
1300
-
1301
-
1302
- if empty_string_keys:
1303
- result['error'] = 'covered_photo'
1304
- result['error_details'] = f"Missing fields: {empty_string_keys}"
1305
- if side=='page1':
1306
- result['passport_data'] = front_id_text_desc
1307
- if side=='back':
1308
- valid_nationality_result = self.check_nationality_in_iso_list(result.get('nationality'))
1309
- back_data_update = {
1310
- # 'back_tampered_result': tampered_result_back,
1311
- 'valid_nationality': valid_nationality_result,
1312
- 'back_extracted_data': front_id_text_desc,
1313
- 'translated_back_id_text': '',
1314
- 'back_coloured': True,
1315
- 'occupation': '',
1316
- 'employer': '',
1317
- 'doc_on_pp': doc_on_pp_result,
1318
- 'screenshot_result': 'screenshot_result',
1319
- 'photo_on_screen_result': 'photo_on_screen_result',
1320
- 'back_blurred': 'front_blurred',
1321
- 'back_glare': 'front_glare',
1322
- 'back_tampered_result':'clear'
1323
- }
1324
-
1325
- result.update(back_data_update)
1326
-
1327
-
1328
- except Exception as e:
1329
- result['error'] = 'bad_image'
1330
- result['error_details'] = e
1331
-
1332
- logging.info(f'result from agent extraction: {result}')
1333
- return result
1334
-
1335
-
1336
-
1337
- def extract_front_id_info(self, front_id, country, nationality=None):
1338
- if country == "UAE":
1339
- print("working on UAE")
1340
- from idvpackage.ocr_utils import (
1341
- detect_logo,
1342
- detect_photo_on_screen,
1343
- detect_screenshot,
1344
- document_on_printed_paper,
1345
- )
1346
- from idvpackage.common import (
1347
- detect_id_card_uae,
1348
- load_and_process_image_deepface,
1349
- )
1350
- # from idvpackage.uae_id_extraction import extract_uae_front_id
1351
- from idvpackage.uae_id_extraction import get_response_from_openai_uae
1352
-
1353
- front_data = {"error": "", "doc_type": "national_identity_card"}
1354
-
1355
- try:
1356
- # output = extract_uae_front_id(front_id)
1357
- processed_front_id = self.image_conversion(front_id)
1358
- output = get_response_from_openai_uae(processed_front_id, "front", country, self.openai_key)
1359
-
1360
- logging.info(f"UAE Front ID extraction output: {output}")
1361
- # if isinstance(output, dict):
1362
- # if output.get("error", "") == "covered_photo":
1363
- # return {
1364
- # "error": "covered_photo",
1365
- # "error_details": "Issue in extracting id number from ID card",
1366
- # }
1367
-
1368
- # processed_front_id = self.image_conversion(front_id)
1369
- # front_id_text = self.get_ocr_results(processed_front_id)
1370
- # front_id_text_desc = front_id_text[0].description
1371
- # combined_pattern = r'(Resident Identity|Identity Card|Golden Card|FEDERAL AUTHORITY FOR IDENTITY)'
1372
- # match = re.search(combined_pattern, front_id_text_desc, re.IGNORECASE)
1373
-
1374
- if not output.get("header_verified", False):
1375
- front_data["error"] = "not_front_id"
1376
- return front_data
1377
-
1378
- # img = processed_front_id
1379
- # image = np.array(img)
1380
- # pil_image = Image.fromarray(image)
1381
- #
1382
- # doc_on_pp_result = document_on_printed_paper(image)
1383
-
1384
- # with io.BytesIO() as output:
1385
- # pil_image.save(output, format="PNG")
1386
- # image_data = output.getvalue()
1387
-
1388
- # logo_result = detect_logo(self.client, image_data, country)
1389
- logo_result = "clear"
1390
- #
1391
- # screenshot_result = detect_screenshot(self.client, front_id)
1392
- # photo_on_screen_result = detect_photo_on_screen(self.client, front_id)
1393
- #
1394
- # front_blurred, front_glare = self.get_blurred_and_glared_for_doc(image)
1395
- # print(f"blurred, glare: {front_blurred, front_glare}")
1396
-
1397
- front_face_locations, front_face_encodings = (
1398
- load_and_process_image_deepface(front_id)
1399
- )
1400
- # front_face_locations, front_face_encodings = self.load_and_process_image_fr(front_id)
1401
711
 
1402
712
  front_face_locations_str = json.dumps(
1403
713
  [tuple(face_loc) for face_loc in front_face_locations]
@@ -1406,43 +716,13 @@ class IdentityVerification:
1406
716
  [face_enc.tolist() for face_enc in front_face_encodings]
1407
717
  )
1408
718
 
1409
- # tampered_result, part_text = detect_id_card_uae(self.client, image_data, front_id_text)
1410
-
1411
- # dob, expiry = '', ''
1412
- # date_matches = re.findall(r'\d{2}/\d{2}/\d{4}', front_id_text_desc)
1413
- # sorted_dates = sorted(date_matches)
1414
-
1415
- # if len(sorted_dates) > 1:
1416
- # dob = sorted_dates[0]
1417
- # expiry = sorted_dates[-1]
1418
-
1419
- # front_data = {
1420
- # 'front_extracted_data': front_id_text_desc,
1421
- # 'front_coloured': True,
1422
- # 'front_doc_on_pp': doc_on_pp_result,
1423
- # 'front_logo_result': logo_result,
1424
- # 'front_screenshot_result': screenshot_result,
1425
- # 'front_photo_on_screen_result': photo_on_screen_result,
1426
- # 'front_blurred': front_blurred,
1427
- # 'front_glare': front_glare,
1428
- # 'front_face_locations': front_face_locations_str,
1429
- # 'front_face_encodings': front_face_encodings_str,
1430
- # 'front_tampered_result': tampered_result
1431
- # }
1432
-
1433
- # front_id_text_desc = (
1434
- # str(output.id_number)
1435
- # + " "
1436
- # + "is header verified:"
1437
- # + str(output.is_header_verified)
1438
- # )
1439
- # id_number_front = str(outputid_number).replace("-", "")
1440
- front_data = {
1441
- "id_number_front": output.get("id_number",""),
1442
- "front_extracted_data": "",
719
+
720
+ front_data_update = {
721
+ "id_number_front": output.get("id_number", ""),
722
+ "front_extracted_data": "front_extracted_data",
1443
723
  "front_coloured": True,
1444
724
  "front_doc_on_pp": "clear",
1445
- "front_logo_result": logo_result,
725
+ "front_logo_result": 'clear',
1446
726
  "front_screenshot_result": "clear",
1447
727
  "front_photo_on_screen_result": "clear",
1448
728
  "front_blurred": "clear",
@@ -1452,6 +732,8 @@ class IdentityVerification:
1452
732
  "front_tampered_result": "clear",
1453
733
  }
1454
734
 
735
+ front_data.update(front_data_update)
736
+
1455
737
  non_optional_keys = [
1456
738
  "front_face_locations",
1457
739
  "front_face_encodings",
@@ -1464,275 +746,174 @@ class IdentityVerification:
1464
746
  ]
1465
747
 
1466
748
  if empty_string_keys:
1467
- front_data["error"] = "covered_photo"
749
+ front_data["error"] = "missing_key_fields"
750
+ logging.info(f"Empty string fields found in front_data: {empty_string_keys}")
751
+ return front_data
1468
752
 
753
+
1469
754
  except Exception as e:
1470
755
  front_data["error"] = "bad_image"
756
+ logging.info(f"Exception in UAE front ID extraction: {e}")
1471
757
  front_data["error_details"] = e
1472
758
 
1473
759
  return front_data
1474
760
 
1475
761
  if country == "IRQ":
1476
762
  logging.info("-------------Working on IRQ \n")
763
+ from idvpackage.common import load_and_process_image_deepface
764
+ from idvpackage.iraq_id_extraction_withopenai import (
765
+ get_response_from_openai_irq,
766
+ )
1477
767
  from idvpackage.ocr_utils import (
1478
768
  detect_photo_on_screen,
1479
769
  detect_screenshot,
1480
770
  document_on_printed_paper,
1481
771
  )
1482
- from idvpackage.iraq_id_extraction import (
1483
- iraq_front_id_extraction,
1484
- extract_mother_surname,
1485
- extract_mother_name,
1486
- extract_paternal_grandfather_name,
1487
- )
1488
- from idvpackage.common import load_and_process_image_deepface
1489
- from deep_translator import GoogleTranslator
1490
772
 
1491
- front_data = {"error": "", "doc_type": "national_identity_card"}
773
+ result = {"error": "", "doc_type": "national_identity_card"}
1492
774
 
1493
775
  try:
1494
776
  st = time.time()
1495
- processed_front_id = self.image_conversion(front_id)
1496
- logging.info(
1497
- f"----------------Time taken for image conversion: {time.time() - st} seconds\n"
1498
- )
1499
-
777
+ processed_front_id, compressed_image_data = self.image_conversion_and_compression(front_id)
778
+ logging.info(f'----------------Time taken for image conversion: {time.time() - st} seconds\n')
779
+
780
+
781
+ logging.info(f"starting the extraction using openai for IRQ front_id")
1500
782
  st = time.time()
1501
- compressed_image = BytesIO()
1502
- processed_front_id.save(
1503
- compressed_image, format="JPEG", quality=75, optimize=True
1504
- )
1505
- compressed_image_data = compressed_image.getvalue()
1506
-
1507
- front_id_text = self.get_ocr_results(
1508
- compressed_image_data, country="IRQ", side="front"
1509
- )
1510
- front_id_text_desc = front_id_text[0].description
1511
- logging.info(
1512
- f"----------------Time taken for vision: {time.time() - st} seconds\n"
1513
- )
1514
-
1515
- try:
1516
- translated_id_text = self.translator.translate(
1517
- front_id_text_desc, src="ar", dest="en"
1518
- ).text
1519
- except Exception as e:
1520
- logging.info(
1521
- f"--------------Fallback for translation keyword matching\n"
1522
- )
1523
- translated_id_text = GoogleTranslator("ar", "en").translate(
1524
- front_id_text_desc
1525
- )
1526
783
 
1527
- # logging.info(f'\n----------------Time taken for translation: {time.time() - st} seconds')
784
+ front_data = get_response_from_openai_irq(compressed_image_data, openai_key=self.openai_key,side="front")
785
+
1528
786
 
1529
- combined_pattern = r"(Ministry of Interior|Republic of Iraq|National Card|Passports and Residence|Republic|Ministry|Iraq)"
1530
- match = re.search(combined_pattern, translated_id_text, re.IGNORECASE)
787
+ logging.info(f'----------------Time taken for OpenAI and final extraction front_id: {time.time() - st} seconds\n')
1531
788
 
1532
- if not match:
1533
- front_data["error"] = "not_front_id"
1534
- return front_data
789
+ logging.info(f"front_data: {json.dumps(front_data, ensure_ascii=False, indent=2)}")
790
+
1535
791
 
1536
- st = time.time()
1537
- image = np.array(processed_front_id)
1538
- # pil_image = Image.fromarray(image)
792
+ if not front_data.get('header_verified', False):
793
+ result["error"] = "not_front_id"
794
+ return result
1539
795
 
1540
- ## TODO: uncomment this later with more sophisticated approach for doc_on_pp
1541
- # doc_on_pp_result = document_on_printed_paper(image)
1542
- doc_on_pp_result = "clear"
796
+ if not front_data.get("id_number", '').isdigit() and len(front_data.get("id_number", '')) == 12:
797
+ result["error"] = "invalid_national_number"
798
+ logging.info(f" invalid_national_number number found in front_data: {front_data.get('id_number')}")
799
+ return result
1543
800
 
1544
- # with io.BytesIO() as output:
1545
- # pil_image.save(output, format="PNG")
1546
- # image_data = output.getvalue()
801
+ if len(front_data.get("card_number", '')) != 9:
802
+ result["error"] = "invalid_document_number"
803
+ logging.info(f" invalid_document_number number found in front_data: {front_data.get('card_number')}")
804
+ return result
1547
805
 
1548
- template_result = "clear"
1549
- logo_result = "clear"
806
+ if front_data.get("gender", '').lower() not in ['male', 'female']:
807
+ logging.info(f" invalid_gender found in front_data: {front_data.get('gender')}")
808
+ front_data["gender"] = ""
1550
809
 
1551
- screenshot_result = detect_screenshot(self.client, front_id)
1552
- photo_on_screen_result = detect_photo_on_screen(self.client, front_id)
810
+ image= np.array(processed_front_id)
1553
811
 
1554
- front_blurred, front_glare = self.get_blurred_and_glared_for_doc(image)
1555
- logging.info(
1556
- f"----------------Time taken for fraud detection attributes: {time.time() - st} seconds\n"
1557
- )
812
+ if not front_data['last_name'] or not front_data['last_name_en']:
1558
813
 
1559
- st = time.time()
1560
- front_face_locations, front_face_encodings = (
1561
- load_and_process_image_deepface(front_id)
1562
- )
1563
- logging.info(
1564
- f"----------------Time taken for face extraction: {time.time() - st} seconds\n"
1565
- )
1566
- # front_face_locations, front_face_encodings = self.load_and_process_image_fr(front_id)
814
+ front_data['name'] = front_data.get('first_name', '') + " " + front_data.get('father_name', '')
815
+ front_data['name_en']= front_data.get('first_name_en', '') + " " + front_data.get('father_name_en', '')
1567
816
 
1568
- front_face_locations_str = json.dumps(
1569
- [tuple(face_loc) for face_loc in front_face_locations]
1570
- )
1571
- front_face_encodings_str = json.dumps(
1572
- [face_enc.tolist() for face_enc in front_face_encodings]
1573
- )
817
+ else:
818
+ front_data['name'] = front_data.get('first_name', '') + " " + front_data.get('father_name','') + " " + front_data.get('last_name', '')
819
+ front_data['name_en'] = front_data.get('first_name_en', '') + " " + front_data.get('father_name_en', '') + " " + front_data.get("last_name_en", '')
1574
820
 
1575
- # image_format = 'jpg'
1576
821
  st = time.time()
1577
- image_format = self.detect_image_format(front_id)
1578
-
1579
- front_data_fields = iraq_front_id_extraction(
1580
- self.client,
1581
- compressed_image_data,
1582
- front_id_text,
1583
- front_id_text_desc,
1584
- translated_id_text,
1585
- image_format,
1586
- )
1587
- logging.info(
1588
- f"----------------Time taken for iraq data formatting and final extraction: {time.time() - st} seconds\n"
1589
- )
1590
-
1591
- front_data_temp = {
1592
- "front_extracted_data": front_id_text_desc,
1593
- "translated_front_id_text": translated_id_text,
1594
- "front_coloured": True,
1595
- "front_doc_on_pp": doc_on_pp_result,
1596
- "front_logo_result": logo_result,
1597
- "front_template_result": template_result,
1598
- # 'front_tampered_result': tampered_result_front,
1599
- "front_screenshot_result": screenshot_result,
1600
- "front_photo_on_screen_result": photo_on_screen_result,
1601
- "front_blurred": front_blurred,
1602
- "front_glare": front_glare,
1603
- "front_face_locations": front_face_locations_str,
1604
- "front_face_encodings": front_face_encodings_str,
1605
- }
822
+ from idvpackage.common import load_and_process_image_deepface
823
+ front_face_locations, front_face_encodings = load_and_process_image_deepface(front_id)
824
+ logging.info(f'----------------Time taken for face extraction: {time.time() - st} seconds\n')
1606
825
 
1607
- front_data.update(front_data_fields)
1608
826
 
1609
- front_data.update(front_data_temp)
827
+ if front_face_encodings is None or len(front_face_encodings) == 0:
828
+ result['error'] = 'face_not_detected'
829
+ logging.info("No face detected in front image")
830
+ return result
1610
831
 
1611
- required_keys = [
1612
- "front_face_locations",
1613
- "front_face_encodings",
1614
- "id_number",
1615
- "name",
1616
- ]
1617
- empty_string_keys = [
1618
- key
1619
- for key, value in front_data.items()
1620
- if key in required_keys and value == ""
1621
- ]
832
+ front_face_locations_str = json.dumps([tuple(face_loc) for face_loc in front_face_locations])
833
+ front_face_encodings_str = json.dumps([face_enc.tolist() for face_enc in front_face_encodings])
834
+
835
+ # optional_fields = ('last_name', 'last_name_en','serial_number','blood_type','gender','gender_ar')
836
+ non_optional_keys = ['id_number_front',
837
+ 'card_number_front',
838
+ 'first_name',
839
+ 'first_name_en',
840
+ 'father_name',
841
+ 'father_name_en',
842
+ 'third_name',
843
+ 'third_name_en',
844
+ 'mother_first_name',
845
+ 'mother_first_name_en',
846
+ 'mother_last_name',
847
+ 'mother_last_name_en',
848
+ 'doc_type',
849
+ 'nationality',
850
+ 'nationality_en']
851
+ #check if any non optional key has empty string value
852
+ empty_string_keys = [ key for key, value in front_data.items() if key in non_optional_keys and (value == "" or value == [] or value == "[]")]
853
+ logging.info(f"Empty string keys in back_data: {empty_string_keys}")
1622
854
 
1623
855
  if empty_string_keys:
1624
- front_data["error"] = "covered_photo"
1625
-
1626
- except Exception as e:
1627
- front_data["error"] = "bad_image"
1628
- front_data["error_details"] = e
1629
-
1630
- try:
1631
- dict_mother_surname = extract_mother_surname(front_id_text_desc)
1632
- dict_mother_surname["mother_last_name_en"] = (
1633
- GoogleTranslator("ar", "en")
1634
- .translate(f"Name: {dict_mother_surname['mother_last_name']}")
1635
- .upper()
1636
- )
1637
- # Check if "NAME: " is in the translated string, and remove it
1638
- if "NAME: " in dict_mother_surname["mother_last_name_en"]:
1639
- dict_mother_surname["mother_last_name_en"] = dict_mother_surname[
1640
- "mother_last_name_en"
1641
- ].replace("NAME: ", "")
1642
-
1643
- except Exception as e:
1644
- dict_mother_surname = {
1645
- "mother_last_name": "",
1646
- "mother_last_name_en": "",
1647
- }
1648
-
1649
- front_data.update(dict_mother_surname)
1650
-
1651
- try:
1652
- dict_third_name = extract_paternal_grandfather_name(front_id_text_desc)
1653
- dict_third_name["third_name_en"] = (
1654
- GoogleTranslator("ar", "en")
1655
- .translate(dict_third_name["third_name"])
1656
- .upper()
1657
- )
1658
- except Exception as e:
1659
- dict_third_name = {"third_name_en": "", "third_name": ""}
856
+ result['error'] = 'missing_key_fields'
857
+ logging.info(f"Empty string fields found in front_data: {empty_string_keys}")
858
+ return result
1660
859
 
1661
- front_data.update(dict_third_name)
860
+ data_temp = {
861
+ 'front_extracted_data': '',
862
+ 'translated_front_id_text':'',
863
+ 'front_coloured': True,
864
+ 'back_coloured':True,
865
+ 'nationality': 'IRQ',
866
+ 'nationality_en': 'IRQ',
867
+ "doc_on_pp_result": "clear",
868
+ "template_result": "clear",
869
+ "logo_result": "clear",
870
+ 'front_doc_on_pp': "clear",
871
+ 'front_logo_result': "clear",
872
+ 'front_template_result': "clear",
873
+ 'front_screenshot_result': "clear",
874
+ 'front_photo_on_screen_result': "clear",
875
+ 'front_blurred': "clear",
876
+ 'front_glare': "clear",
877
+ 'front_face_locations': front_face_locations_str,
878
+ 'front_face_encodings': front_face_encodings_str,
879
+ 'front_tampered_result': 'clear',
880
+ 'issuing_country':'IRQ',
881
+ 'valid_nationality': "clear"
882
+ }
883
+ front_data.update(data_temp)
884
+ result.update(front_data)
885
+ return result
1662
886
 
1663
- try:
1664
- dict_mother_name = extract_mother_name(front_id_text_desc)
1665
- dict_mother_name["mother_first_name_en"] = (
1666
- GoogleTranslator("ar", "en")
1667
- .translate(f"Name: {dict_mother_name['mother_first_name']}")
1668
- .upper()
1669
- )
1670
- # Check if "NAME: " is in the string, and remove it
1671
- if "NAME: " in dict_mother_name["mother_first_name_en"]:
1672
- dict_mother_name["mother_first_name_en"] = dict_mother_name[
1673
- "mother_first_name_en"
1674
- ].replace("NAME: ", "")
1675
887
 
1676
888
  except Exception as e:
1677
- dict_mother_name = {"mother_first_name": "", "mother_first_name_en": ""}
1678
-
1679
- front_data.update(dict_mother_name)
1680
-
1681
- if front_data.get("last_name", "") and front_data.get(
1682
- "mother_first_name", ""
1683
- ):
1684
- if front_data.get("last_name") == front_data.get("mother_last_name"):
1685
- front_data["last_name"], front_data["last_name_en"] = "", ""
1686
-
1687
- return front_data
889
+ result["error"] = "bad_image"
890
+ result["error_details"] = e
891
+ return result
1688
892
 
1689
893
  if country == "QAT":
1690
- print("working on QAT with compression")
1691
- # from idvpackage.qatar_id_extraction import qatar_front_id_extraction
1692
- from idvpackage.qatar_id_extraction import get_response_from_openai_qat
894
+ logging.info(f"Working on QAT NID ....")
1693
895
  from idvpackage.common import load_and_process_image_deepface
1694
896
  from idvpackage.ocr_utils import document_on_printed_paper
897
+ from idvpackage.qatar_id_extraction import get_response_from_openai_qat
1695
898
 
1696
899
  front_data = {"error": "", "doc_type": "national_identity_card"}
1697
900
 
1698
901
  try:
902
+ processed_front_id ,compressed_image_data= self.image_conversion_and_compression(front_id)
1699
903
  st = time.time()
1700
- processed_front_id = self.image_conversion(front_id)
1701
- logging.info(
1702
- f"----------------Time taken for image conversion front: {time.time() - st} seconds\n"
1703
- )
1704
-
1705
- st = time.time()
1706
- compressed_image = BytesIO()
1707
- processed_front_id.save(
1708
- compressed_image, format="JPEG", quality=70, optimize=True
1709
- )
1710
- compressed_image_data = compressed_image.getvalue()
1711
-
1712
- st = time.time()
1713
- # front_data_fields = qatar_front_id_extraction(
1714
- # self.client,
1715
- # compressed_image_data,
1716
- # front_id_text,
1717
- # front_id_text_desc,
1718
- # openai.api_key,
1719
- # )
1720
904
  front_data_fields = get_response_from_openai_qat(
1721
905
  compressed_image_data, "front", country, self.openai_key
1722
906
  )
907
+ logging.info(f"----------------Time taken for vision front: {time.time() - st} seconds\n")
908
+ logging.info(f"front_data_fields: {json.dumps(front_data_fields, indent=2, ensure_ascii=False)}")
1723
909
 
1724
910
  front_data_fields['issuing_country'] = 'QAT'
1725
911
 
1726
912
  if not front_data_fields["header_verified"]:
1727
913
  front_data["error"] = "not_front_id"
1728
914
  return front_data
1729
-
1730
-
1731
- logging.info(f"front_data_fields: {json.dumps(front_data_fields, indent=2, ensure_ascii=False)}")
1732
915
 
1733
- logging.info(
1734
- f"----------------Time taken for qatar data formatting and final extraction front: {time.time() - st} seconds\n"
1735
- )
916
+ logging.info( f"----------------Time taken for qatar OpenAI and final extraction front: {time.time() - st} seconds\n")
1736
917
 
1737
918
  expiry_date = front_data_fields.get("expiry_date", "")
1738
919
  if expiry_date:
@@ -1744,53 +925,28 @@ class IdentityVerification:
1744
925
  except:
1745
926
  pass
1746
927
 
1747
- # front_id_text = self.get_ocr_results(
1748
- # compressed_image_data, country="QAT", side="front"
1749
- # )
1750
- # front_id_text_desc = front_id_text[0].description
1751
- # logging.info(
1752
- # f"----------------Time taken for vision front: {time.time() - st} seconds\n"
1753
- # )
1754
- # combined_pattern = r"(State of Qatar|Residency Permit)"
1755
- # match = re.search(combined_pattern, front_id_text_desc, re.IGNORECASE)
1756
-
1757
- # if not match:
1758
- # front_data["error"] = "not_front_id"
1759
- # return front_data
1760
-
1761
928
  image = np.array(processed_front_id)
1762
929
  doc_on_pp_result = document_on_printed_paper(image)
1763
930
 
1764
931
  st = time.time()
1765
- # qatar's original face encodings code
1766
932
  front_face_locations, front_face_encodings = (
1767
933
  load_and_process_image_deepface(front_id, country="QAT")
1768
934
  )
1769
935
 
1770
- if front_face_locations == 0:
1771
- return {
1772
- "error": "face_not_found",
1773
- "error_details": "Face not found at angle 0.",
1774
- }
1775
-
1776
- # front_face_locations, front_face_encodings = load_and_process_image_deepface_optimized(front_id)
1777
- logging.info(
1778
- f"----------------Time taken for face extraction front: {time.time() - st} seconds\n"
1779
- )
1780
- # front_face_locations, front_face_encodings = self.load_and_process_image_fr(front_id)
936
+
937
+ if front_face_encodings is None or len(front_face_encodings) == 0:
938
+ front_data['error'] = 'face_not_detected'
939
+ logging.info("No face detected in front image")
940
+ return front_data
941
+
942
+ logging.info(f"----------------Time taken for face extraction front: {time.time() - st} seconds\n")
1781
943
 
1782
944
  front_face_locations_str = json.dumps(
1783
945
  [tuple(face_loc) for face_loc in front_face_locations]
1784
946
  )
1785
947
  front_face_encodings_str = json.dumps(
1786
948
  [face_enc.tolist() for face_enc in front_face_encodings]
1787
- )
1788
-
1789
-
1790
-
1791
- # print(f"TAMPERING: {tampered_result_front}")
1792
- # valid_nationality_result = self.check_nationality_in_iso_list(front_data_fields.get('nationality'))
1793
-
949
+ )
1794
950
  front_data_temp = {
1795
951
  "front_extracted_data": "",
1796
952
  "valid_nationality": "clear",
@@ -1798,7 +954,6 @@ class IdentityVerification:
1798
954
  "front_doc_on_pp": "clear",
1799
955
  "front_logo_result": "clear",
1800
956
  "front_template_result": "clear",
1801
- # 'front_tampered_result': tampered_result_front,
1802
957
  "front_screenshot_result": "clear",
1803
958
  "front_photo_on_screen_result": "clear",
1804
959
  "front_blurred": "clear",
@@ -1809,9 +964,8 @@ class IdentityVerification:
1809
964
 
1810
965
  front_data_fields.update(front_data_temp)
1811
966
  front_data.update(front_data_fields)
1812
-
1813
967
 
1814
- required_keys = ["expiry", "name", "id_number"]
968
+ required_keys = ["expiry_date", "name", "id_number"]
1815
969
  empty_string_keys = [
1816
970
  key
1817
971
  for key, value in front_data.items()
@@ -1819,14 +973,14 @@ class IdentityVerification:
1819
973
  ]
1820
974
 
1821
975
  if empty_string_keys:
1822
- front_data["error"] = "covered_photo"
976
+ front_data["error"] = "missing_key_fields"
1823
977
 
1824
978
  if front_data.get("error"):
1825
979
  return front_data
1826
980
 
1827
981
 
1828
982
  except Exception as e:
1829
- print(e)
983
+ logging.info(f"exception in QAT front ID extraction: {e}")
1830
984
  front_data["error"] = "bad_image"
1831
985
  front_data["error_details"] = e
1832
986
 
@@ -1834,43 +988,28 @@ class IdentityVerification:
1834
988
 
1835
989
  if country == "LBN":
1836
990
  logging.info("----------------Working on LBN\n")
1837
- from idvpackage.ocr_utils import (
1838
- detect_logo,
1839
- detect_photo_on_screen,
991
+ from idvpackage.blur_detection import is_image_blur
992
+ from idvpackage.common import load_and_process_image_deepface
993
+ from idvpackage.lebanon_id_extraction import lebanon_id_extraction_from_text
994
+ from idvpackage.ocr_utils import (
1840
995
  detect_screenshot,
1841
996
  document_on_printed_paper,
1842
997
  )
1843
- # from idvpackage.lebanon_id_extraction import lebanon_front_id_extraction
1844
- from idvpackage.lebanon_id_extraction import lebanon_id_extraction_from_text
1845
- from idvpackage.common import load_and_process_image_deepface
1846
- from idvpackage.blur_detection import is_image_blur
1847
998
 
1848
999
  front_data = {"error": "", "doc_type": "national_identity_card"}
1849
1000
 
1850
1001
  try:
1851
1002
  st = time.time()
1852
- processed_front_id = self.image_conversion(front_id)
1853
- logging.info(
1854
- f"----------------Time taken for image conversion front: {time.time() - st} seconds\n"
1855
- )
1856
-
1857
- st = time.time()
1858
- compressed_image = BytesIO()
1859
- processed_front_id.save(
1860
- compressed_image, format="JPEG", quality=100, optimize=True
1861
- )
1862
- compressed_image_data = compressed_image.getvalue()
1003
+ processed_front_id ,compressed_image_data= self.image_conversion_and_compression(front_id)
1004
+ logging.info(f"----------------Time taken for image conversion front: {time.time() - st} seconds\n")
1863
1005
 
1006
+ st = time.time()
1864
1007
  front_id_text = self.get_ocr_results(
1865
1008
  compressed_image_data, country="LBN"
1866
1009
  )
1867
1010
  front_id_text_desc = front_id_text[0].description
1868
-
1869
1011
  logging.info(f"Extracted LBN front OCR text: {front_id_text_desc}")
1870
-
1871
- logging.info(
1872
- f"----------------Time taken for vision front: {time.time() - st} seconds\n"
1873
- )
1012
+ logging.info(f"----------------Time taken for vision front: {time.time() - st} seconds\n")
1874
1013
 
1875
1014
  # Check for blur using the new comprehensive method
1876
1015
  image = np.array(processed_front_id)
@@ -1881,36 +1020,35 @@ class IdentityVerification:
1881
1020
  fft_threshold=120,
1882
1021
  bright_reflection_min_area=1.0,
1883
1022
  ):
1884
- print(
1023
+ logging.info(
1885
1024
  f"Blur/Brightness issue detected in front image, marking as covered photo"
1886
1025
  )
1887
1026
  front_data["error"] = "blur_photo"
1888
1027
  return front_data
1889
1028
 
1890
- # Extract data and check for necessary keys
1891
1029
  st = time.time()
1892
1030
  front_data_fields = lebanon_id_extraction_from_text(front_id_text_desc, compressed_image_data, 'front', self.openai_key)
1893
1031
 
1894
- logging.info(
1895
- f"----------------Time taken for data formatting and final extraction front: {time.time() - st} seconds\n"
1896
- )
1897
-
1032
+ logging.info(f"----------------Time taken for OpenAI and final extraction front: {time.time() - st} seconds\n")
1898
1033
  logging.info(f"Extracted LBN front data fields: {json.dumps(front_data_fields, indent=2, ensure_ascii=False)}")
1899
1034
 
1900
- if front_data_fields.get("header_verified", False) is False:
1035
+ if front_data_fields.get('header_verified') is not True:
1901
1036
  front_data["error"] = "not_front_id"
1902
- return front_data
1903
-
1037
+ return front_data
1904
1038
  if 'id_number' in front_data_fields:
1905
1039
  id_number = front_data_fields['id_number']
1906
1040
  if id_number and len(id_number) < 12:
1907
1041
  front_data_fields['id_number'] = id_number.zfill(12)
1908
-
1042
+
1043
+ logging.info(f"ID Number after padding: {front_data_fields.get('id_number', '')}")
1044
+
1909
1045
  if 'id_number_ar' in front_data_fields:
1910
1046
  id_number_ar = front_data_fields['id_number_ar']
1911
1047
  if id_number_ar and len(id_number_ar) < 12:
1912
1048
  front_data_fields['id_number_ar'] = id_number_ar.rjust(12, '٠')
1913
-
1049
+
1050
+ logging.info(f"ID Number AR after padding: {front_data_fields.get('id_number_ar', '')}")
1051
+
1914
1052
  front_data_fields['issuing_country'] = 'LBN'
1915
1053
  front_data_fields['nationality'] = 'LBN'
1916
1054
  required_keys = [
@@ -1919,7 +1057,6 @@ class IdentityVerification:
1919
1057
  "dob_ar",
1920
1058
  "place_of_birth_ar",
1921
1059
  "id_number",
1922
- "expiry_date",
1923
1060
  ]
1924
1061
 
1925
1062
  dob = front_data_fields.get('dob', '')
@@ -1944,7 +1081,7 @@ class IdentityVerification:
1944
1081
  ]
1945
1082
 
1946
1083
  if empty_string_keys:
1947
- front_data["error"] = "covered_photo"
1084
+ front_data["error"] = "missing_key_fields"
1948
1085
  logging.info(
1949
1086
  f"Missing required keys in LBN front data: {empty_string_keys}"
1950
1087
  )
@@ -1953,25 +1090,16 @@ class IdentityVerification:
1953
1090
  image = np.array(processed_front_id)
1954
1091
 
1955
1092
  st = time.time()
1956
- ## TODO: doc_on_pp and detect_photo_on_screen for LBN
1957
- # doc_on_pp_result = document_on_printed_paper(image)
1958
1093
  doc_on_pp_result = "clear"
1959
- ## no logo for Lebanon ID's
1960
1094
  logo_result = "clear"
1961
1095
  template_result = "clear"
1962
- ## TODO: template matching for Lebanon ID's
1963
- # template_result = detect_logo(self.client, compressed_image_data, country, compare_type='template', side='front')
1964
- ## TODO: tampering result for Lebanon ID's - pending tampered samples
1965
- # tampered_result_front = calculate_error_difference(np.array(Image.open(io.BytesIO(base64.decodebytes(bytes(front_id, "utf-8"))))))
1966
1096
  screenshot_result = detect_screenshot(self.client, front_id)
1967
- # photo_on_screen_result = detect_photo_on_screen(self.client, image)
1968
1097
  photo_on_screen_result = "clear"
1969
1098
  front_blurred, front_glare = self.get_blurred_and_glared_for_doc(image)
1970
1099
  logging.info(
1971
1100
  f"----------------Time taken for fraud detection attributes front: {time.time() - st} seconds\n"
1972
1101
  )
1973
1102
 
1974
- # front_face_locations, front_face_encodings = self.load_and_process_image_fr(front_id)
1975
1103
  st = time.time()
1976
1104
  front_face_locations, front_face_encodings = (
1977
1105
  load_and_process_image_deepface(front_id)
@@ -1980,6 +1108,11 @@ class IdentityVerification:
1980
1108
  f"----------------Time taken for face extraction front: {time.time() - st} seconds\n"
1981
1109
  )
1982
1110
 
1111
+ if front_face_encodings is None or len(front_face_encodings) == 0:
1112
+ front_data['error'] = 'face_not_detected'
1113
+ logging.info("No face detected in front image")
1114
+ return front_data
1115
+
1983
1116
  front_face_locations_str = json.dumps(
1984
1117
  [tuple(face_loc) for face_loc in front_face_locations]
1985
1118
  )
@@ -1987,7 +1120,6 @@ class IdentityVerification:
1987
1120
  [face_enc.tolist() for face_enc in front_face_encodings]
1988
1121
  )
1989
1122
 
1990
- # print(f"TAMPERING: {tampered_result_front}")
1991
1123
 
1992
1124
  front_data_temp = {
1993
1125
  "front_extracted_data": "",
@@ -1996,7 +1128,6 @@ class IdentityVerification:
1996
1128
  "front_doc_on_pp": doc_on_pp_result,
1997
1129
  "front_logo_result": logo_result,
1998
1130
  "front_template_result": template_result,
1999
- # 'front_tampered_result': tampered_result_front,
2000
1131
  "front_screenshot_result": screenshot_result,
2001
1132
  "front_photo_on_screen_result": photo_on_screen_result,
2002
1133
  "front_blurred": front_blurred,
@@ -2016,7 +1147,6 @@ class IdentityVerification:
2016
1147
  "dob_ar",
2017
1148
  "place_of_birth_ar",
2018
1149
  "id_number",
2019
- "expiry_date",
2020
1150
  ]
2021
1151
 
2022
1152
  empty_string_keys = [
@@ -2027,7 +1157,7 @@ class IdentityVerification:
2027
1157
  ]
2028
1158
 
2029
1159
  if empty_string_keys:
2030
- front_data["error"] = "covered_photo"
1160
+ front_data["error"] = "missing_key_fields"
2031
1161
  logging.info(
2032
1162
  f"Missing required keys in LBN front data after update: {empty_string_keys}"
2033
1163
  )
@@ -2040,43 +1170,33 @@ class IdentityVerification:
2040
1170
 
2041
1171
  if country == "SDN":
2042
1172
  logging.info("----------------Working on SDN\n")
1173
+ from idvpackage.blur_detection import is_image_blur
1174
+ from idvpackage.common import load_and_process_image_deepface
2043
1175
  from idvpackage.ocr_utils import (
2044
- detect_logo,
2045
- detect_photo_on_screen,
2046
1176
  detect_screenshot,
2047
1177
  document_on_printed_paper,
2048
1178
  )
2049
- from idvpackage.sudan_passport_extraction import get_response_from_openai_sdn
2050
- from idvpackage.common import load_and_process_image_deepface
2051
- from idvpackage.blur_detection import is_image_blur
1179
+ from idvpackage.sudan_passport_extraction import (
1180
+ get_response_from_openai_sdn,
1181
+ )
2052
1182
 
2053
1183
  front_data = {"error": "", "doc_type": "national_identity_card"}
2054
1184
 
2055
1185
  try:
2056
1186
  st = time.time()
2057
- processed_front_id = self.image_conversion(front_id)
2058
- logging.info(
2059
- f"----------------Time taken for image conversion front: {time.time() - st} seconds\n"
2060
- )
2061
-
1187
+ processed_front_id,compressed_image_data = self.image_conversion_and_compression(front_id)
1188
+ logging.info(f"----------------Time taken for image conversion front: {time.time() - st} seconds\n")
1189
+
2062
1190
  st = time.time()
2063
- compressed_image = BytesIO()
2064
- processed_front_id.save(
2065
- compressed_image, format="JPEG", quality=90, optimize=True
2066
- )
2067
- compressed_image_data = compressed_image.getvalue()
2068
1191
  image = np.array(processed_front_id)
2069
-
2070
1192
  doc_on_pp_result = "clear"
2071
1193
  logo_result = "clear"
2072
1194
  template_result = "clear"
2073
1195
 
2074
- front_data_fields = get_response_from_openai_sdn(compressed_image, 'front', self.openai_key)
1196
+ front_data_fields = get_response_from_openai_sdn(compressed_image_data, 'front', self.openai_key)
2075
1197
 
2076
1198
  logging.info(f"Extracted SDN front data fields: {json.dumps(front_data_fields, indent=2, ensure_ascii=False)}")
2077
- logging.info(
2078
- f"----------------Time taken for data formatting and final extraction front: {time.time() - st} seconds\n"
2079
- )
1199
+ logging.info(f"----------------Time taken for OpenAI and final extraction front: {time.time() - st} seconds\n")
2080
1200
 
2081
1201
  if not front_data_fields['header_verified']:
2082
1202
  front_data_fields['error'] = 'not_front_id'
@@ -2086,7 +1206,6 @@ class IdentityVerification:
2086
1206
  dob = front_data_fields.get('dob', '')
2087
1207
 
2088
1208
  if dob:
2089
-
2090
1209
  try:
2091
1210
  from idvpackage.ocr_utils import is_age_18_above
2092
1211
  is_legal_age = is_age_18_above(dob)
@@ -2097,11 +1216,6 @@ class IdentityVerification:
2097
1216
  except Exception as e:
2098
1217
  logging.error(f"Error in age calculation: {e}")
2099
1218
 
2100
- else:
2101
- front_data['error'] = 'covered_photo'
2102
- logging.error(f"DOB date not found in the extracted data.")
2103
- return front_data
2104
-
2105
1219
 
2106
1220
  front_data_fields['occupation'] = front_data_fields.get('occupation_en', '')
2107
1221
 
@@ -2118,6 +1232,12 @@ class IdentityVerification:
2118
1232
  front_face_locations, front_face_encodings = (
2119
1233
  load_and_process_image_deepface(front_id, country="SDN")
2120
1234
  )
1235
+
1236
+ if front_face_encodings is None or len(front_face_encodings) == 0:
1237
+ front_data['error'] = 'face_not_detected'
1238
+ logging.info("No face detected in front image")
1239
+ return front_data
1240
+
2121
1241
  logging.info(
2122
1242
  f"----------------Time taken for face extraction front: {time.time() - st} seconds\n"
2123
1243
  )
@@ -2129,7 +1249,6 @@ class IdentityVerification:
2129
1249
  [face_enc.tolist() for face_enc in front_face_encodings]
2130
1250
  )
2131
1251
 
2132
-
2133
1252
  front_data_temp = {
2134
1253
  "front_extracted_data": "",
2135
1254
  "translated_front_id_text": "",
@@ -2166,9 +1285,10 @@ class IdentityVerification:
2166
1285
 
2167
1286
  except Exception as e:
2168
1287
  front_data["error"] = "bad_image"
2169
- print(f"-------------->> Something went wrong error trace:: {e}")
1288
+ logging.info(f"-------------->> Something went wrong error trace:: {e}")
2170
1289
  front_data["error_details"] = e
2171
1290
 
1291
+
2172
1292
 
2173
1293
  try:
2174
1294
  list_1 = front_data["name_ar"].split(" ")
@@ -2227,217 +1347,101 @@ class IdentityVerification:
2227
1347
 
2228
1348
  return front_data
2229
1349
 
2230
- def extract_back_id_info(self, back_id, country, nationality=None, step_data=None):
2231
- if country == "UAE":
2232
- from idvpackage.ocr_utils import (
2233
- detect_photo_on_screen,
2234
- detect_screenshot,
2235
- document_on_printed_paper,
2236
- )
2237
- from idvpackage.common import (
2238
- remove_special_characters1,
2239
- remove_special_characters2,
2240
- func_id_number,
2241
- convert_date_format,
2242
- detect_id_card_uae,
2243
- convert_gender,
2244
- count_digits_after_pattern,
2245
- remove_special_characters_mrz2,
2246
- validate_string,
2247
- )
2248
- from idvpackage.ocr_utils import is_valid_and_not_expired
2249
- import pytesseract
2250
- import sys
2251
-
2252
- back_data = {"error": "", "doc_type": "national_identity_card"}
1350
+ def extract_back_id_info(self, back_id, country, nationality, step_data):
1351
+
1352
+ step_data = step_data if step_data is not None else {}
2253
1353
 
1354
+ if country == 'UAE':
2254
1355
  try:
2255
- processed_back_id = self.image_conversion(back_id)
2256
- id_infos = self.get_ocr_results(processed_back_id)
2257
- text = id_infos[0].description
2258
- pattern4 = r"(Card Number|<<|ILARE|IDARE|(?=.*\bOccupation\b).*|(?=.*\bEmployer\b).*|(?=.*\bIssuing Place\b).*)"
2259
- k = re.search(pattern4, text.replace(" ", ""), re.IGNORECASE)
2260
-
2261
- if not k:
2262
- back_data["error"] = "not_back_id"
1356
+ from idvpackage.ocr_utils import (
1357
+ detect_photo_on_screen,
1358
+ detect_screenshot,
1359
+ document_on_printed_paper,
1360
+
1361
+ )
1362
+ from idvpackage.uae_id_extraction import get_response_from_openai_uae
1363
+ back_data = {"error": "", "doc_type": "national_identity_card"}
1364
+ back_data['country'] = "UAE"
1365
+
1366
+ st = time.time()
1367
+ output = get_response_from_openai_uae(back_id, 'back', 'UAE', self.openai_key)
1368
+ logging.info(f"Time Taken to get back id raw output from openai: {time.time() - st}")
1369
+
1370
+ logging.info(f"back_data output from openai: {json.dumps(output, indent=2, ensure_ascii=False)}")
1371
+
1372
+ if not output.get('back_header_verified', False):
1373
+ back_data['error'] = 'not_back_id'
2263
1374
  return back_data
2264
1375
 
2265
- original_text = text
2266
-
2267
- # print('this is original text:',original_text)
2268
-
2269
- patterns = {
2270
- "id_number": (
2271
- r"(?:ILARE|IDARE)\s*([\d\s]+)",
2272
- lambda match: match.group(0).replace(" ", "")[15:30]
2273
- if match
2274
- else "",
2275
- ),
2276
- "card_number": (
2277
- r"(?:ILARE|IDARE)(\d{1,9})",
2278
- lambda match: match.group(1) if match else "",
2279
- ),
2280
- "nationality": (
2281
- r"([A-Z]+)<<",
2282
- lambda match: match.group(1) if match else "",
2283
- ),
2284
- "gender": (
2285
- r"(?<=\d)[A-Z](?=\d)",
2286
- lambda match: match.group(0) if match else "",
2287
- ),
2288
- "dob": (
2289
- r"(\d+)[MF]",
2290
- lambda match: self.convert_dob(match.group(1)) if match else "",
2291
- ),
2292
- "expiry_date": (
2293
- r"[MF](\d+)",
2294
- lambda match: self.convert_expiry_date(match.group(1))
2295
- if match
2296
- else "",
2297
- ),
2298
- "name": (
2299
- r"(.*[A-Za-z]+<[<]+[A-Za-z].*)",
2300
- lambda match: match.group(0).replace("<", " ").strip()
2301
- if match
2302
- else "",
2303
- ),
2304
- # 'first_name': (r'<<([^<]+)', lambda match: match.group(0).replace("<", "") if match else ''),
2305
- # 'last_name': (r'([^<]+)(?=<<)', lambda match: match.group(0).replace("<", "") if match else ''),
2306
- # 'occupation': (r'Occupation:\s*([-\w\s.]+)', lambda match: match.group(1).strip().split('\n', 1)[0] if match else '', re.IGNORECASE),
2307
- # 'employer': (r'Employer:\s*([\w\s.]+)', lambda match: match.group(1).strip().split('\n', 1)[0] if match else '', re.IGNORECASE),
2308
- "place_of_issuance": (
2309
- r"Issuing Place:\s*([\w\s.]+)",
2310
- lambda match: match.group(1).strip().split("\n", 1)[0]
2311
- if match
2312
- else "",
2313
- re.IGNORECASE,
2314
- ),
2315
- "issuing_place": (
2316
- r"Issuing Place:\s*([\w\s.]+)",
2317
- lambda match: match.group(1).strip().split("\n", 1)[0]
2318
- if match
2319
- else "",
2320
- re.IGNORECASE,
2321
- ),
1376
+ update_doe_dob_gender_from_back_data = {
1377
+ "dob": step_data.get('dob', '') if step_data.get('dob', '') != '' else output.get('dob', ''),
1378
+ "expiry_date": step_data.get('expiry_date', '') if step_data.get('expiry_date',
1379
+ '') != '' else output.get(
1380
+ 'expiry_date', ''),
1381
+ "gender": step_data.get('gender', '') if step_data.get('gender', '') != '' else output.get('gender', '')
1382
+ }
1383
+
1384
+ back_date_update_from_stepdata = {
1385
+ "name": step_data.get('name', ''),
1386
+ "first_name": step_data.get('first_name', ''),
1387
+ "last_name": step_data.get('last_name', ''),
1388
+ "nationality": step_data.get('nationality', '')
2322
1389
  }
2323
1390
 
2324
- mrz_pattern = r"(ILAR.*\n*.*\n*.*\n*.*|IDAR.*\n*.*\n*.*\n*.*)"
1391
+ logging.info("Time Taken to get back id info from openai: {}".format(time.time() - st))
1392
+
1393
+ back_data.update(output)
1394
+ back_data.update(update_doe_dob_gender_from_back_data)
1395
+ back_data.update(back_date_update_from_stepdata)
1396
+
1397
+ # hard-coded as per Ola and Maryem's request.
1398
+ back_data["issuing_country"] = "ARE"
1399
+
1400
+ from idvpackage.ocr_utils import (
1401
+ normalize_date_generic,
1402
+ normalize_mrz_date
1403
+ )
2325
1404
 
1405
+ dob = back_data.get('dob', '')
2326
1406
  try:
2327
- mrz = re.findall(
2328
- mrz_pattern,
2329
- original_text.replace(" ", "").strip(),
2330
- re.MULTILINE,
2331
- )
2332
- mrz_list = mrz[0].replace(" ", "").split("\n", 3)
2333
- mrz1 = mrz_list[0]
1407
+ if dob:
1408
+ logging.info(f"Extracted DOB for age verification: {dob}")
1409
+ from idvpackage.ocr_utils import is_age_18_above
1410
+ is_legal_age = is_age_18_above(dob)
1411
+ logging.info(f"Is legal age (18+): {is_legal_age}")
1412
+ if not is_legal_age:
1413
+ back_data['error'] = 'under_age'
1414
+ return back_data
2334
1415
 
2335
- except:
2336
- mrz1 = ""
1416
+ except Exception as e:
1417
+ logging.info(f"Error in age calculation: {e}")
2337
1418
 
2338
- #### EXTRACT mrz2
2339
1419
 
2340
- # try:
2341
- # mrz2=mrz_list[1]
2342
- # except:
2343
- # mrz2=''
1420
+ expiry_date = back_data.get('expiry_date', '')
2344
1421
  try:
2345
- mrz2 = [
2346
- s
2347
- for s in [
2348
- remove_special_characters1(ele).replace(" ", "")
2349
- for ele in original_text.split("\n")
2350
- ]
2351
- if len(re.findall(r"<", s)) >= 2
2352
- and not (re.fullmatch(r"[A-Za-z<]+", s))
2353
- ][0]
1422
+ if expiry_date:
1423
+ logging.info(f"Extracted Expiry Date for expiry verification: {expiry_date}")
1424
+ from idvpackage.ocr_utils import is_expired_id
1425
+ if is_expired_id(expiry_date):
1426
+ back_data['error'] = 'expired_id'
1427
+ logging.info(f"ID is expired with expiry date: {expiry_date}")
1428
+ return back_data
1429
+ except Exception as e:
1430
+ logging.info(f"Error in expiry date calculation: {e}")
1431
+
1432
+ back_data['mrz'] = [
1433
+ back_data.get('mrz1', '') + back_data.get('mrz2', '') + back_data.get('mrz3', '')]
2354
1434
 
2355
- except:
2356
- mrz2 = ""
2357
- ### Extract mrz3
2358
- try:
2359
- mrz3 = [
2360
- s
2361
- for s in [
2362
- remove_special_characters1(ele).replace(" ", "")
2363
- for ele in original_text.split("\n")
2364
- ]
2365
- if len(re.findall(r"<", s)) >= 2
2366
- and re.fullmatch(r"[A-Za-z<]+", s)
2367
- ][0]
2368
- back_data["name"] = remove_special_characters2(mrz3[0]).strip()
2369
- back_data["last_name"] = (
2370
- remove_special_characters2(
2371
- re.search(r"([^<]+)(?=<<)", mrz3).group(0)
2372
- ).strip()
2373
- if re.search(r"([^<]+)(?=<<)", mrz3)
2374
- else ""
2375
- )
2376
- back_data["first_name"] = (
2377
- remove_special_characters2(
2378
- re.search(r"<<([^<]+)", mrz3).group(0)
2379
- ).strip()
2380
- if re.search(r"<<([^<]+)", mrz3)
2381
- else ""
2382
- )
1435
+ mrz1 = back_data.get('mrz1', '')
1436
+
1437
+ mrz1 = mrz1.strip()
1438
+ back_data['id_number_mrz'] = mrz1[-15:]
1439
+ back_data['card_number_mrz'] = mrz1[5:14]
2383
1440
 
2384
- except:
2385
- (
2386
- mrz3,
2387
- back_data["name"],
2388
- back_data["last_name"],
2389
- back_data["first_name"],
2390
- ) = "", "", "", ""
2391
-
2392
- pattern = r"ARE\d{25}"
2393
-
2394
- extracted_data_tesseract = ""
2395
-
2396
- if not re.search(pattern, original_text.replace(" ", "")):
2397
- img = self.image_conversion(back_id)
2398
- # Decode the base64 string
2399
- image_data = base64.b64decode(back_id)
2400
- # Convert to an image
2401
- with Image.open(io.BytesIO(image_data)) as image:
2402
- # Use PyTesseract to do OCR on the image
2403
- try:
2404
- extracted_data_tesseract = pytesseract.image_to_string(
2405
- image
2406
- )
2407
- match = re.search(
2408
- pattern, extracted_data_tesseract.replace(" ", "")
2409
- )
2410
- mrz1 = (mrz1[:2] + match[0]).strip()
2411
- except:
2412
- pass
2413
-
2414
- mrz1_keys = ["id_number", "card_number"]
2415
- mrz2_keys = ["nationality", "gender", "dob", "expiry_date"]
2416
- # mrz3_keys = [ 'first_name', 'last_name']
2417
-
2418
- for key, value in patterns.items():
2419
- pattern = value[0]
2420
- transform_func = value[1]
2421
- flags = value[2] if len(value) > 2 else 0
2422
-
2423
- text = original_text
2424
- if key in mrz1_keys:
2425
- text = mrz1
2426
- if key in mrz2_keys:
2427
- text = mrz2
2428
- # if key in mrz3_keys:
2429
- # text = mrz3
2430
-
2431
- match = re.search(pattern, text, flags)
2432
- back_data[key] = transform_func(match) if match else ""
2433
-
2434
- back_data.update({"mrz1": mrz1, "mrz2": mrz2, "mrz3": mrz3})
2435
-
2436
- # print("ths is gender :",back_data['gender'])
2437
1441
 
2438
1442
  try:
2439
- front_id_number = step_data.get("id_number_front", "")
2440
- back_id_number = back_data.get("id_number", "")
1443
+ front_id_number = step_data.get("id_number", "")
1444
+ back_id_number = back_data.get("id_number_mrz", "")
2441
1445
 
2442
1446
  logging.info(
2443
1447
  f"Front ID number: {front_id_number}, Back ID number: {back_id_number}"
@@ -2458,263 +1462,83 @@ class IdentityVerification:
2458
1462
  "error_details": f"Exception Thrown while comparing front id number with back id number: {e}",
2459
1463
  }
2460
1464
 
2461
- ## extracting occupation and employer
2462
- occ_word = "Occupation"
2463
- occ = ""
2464
- emp_word = "Employer"
2465
- emp = ""
2466
- try:
2467
- lines = original_text.split("\n")
2468
- for line in lines:
2469
- if occ_word in line:
2470
- start_index = line.find(occ_word)
2471
- end_index = start_index + len(occ_word)
2472
- occ = line[end_index:]
2473
- occ = self.clean_string(occ)
2474
-
2475
- if emp_word in line:
2476
- start_index1 = line.find(emp_word)
2477
- end_index1 = start_index1 + len(emp_word)
2478
- emp = line[end_index1:]
2479
- emp = self.clean_string(emp)
2480
- except:
2481
- occ = ""
2482
- emp = ""
2483
-
2484
- family_sponsor_word = "Family Sponsor"
2485
- family_sponsor = ""
2486
- try:
2487
- lines = original_text.split("\n")
2488
- for line in lines:
2489
- if family_sponsor_word in line:
2490
- start_index = line.find(family_sponsor_word)
2491
- end_index = start_index + len(family_sponsor_word)
2492
- family_sponsor = line[end_index:]
2493
- family_sponsor = self.clean_string(family_sponsor)
2494
- except:
2495
- family_sponsor = ""
2496
-
2497
- ### new rule
2498
- if len(str(back_data["id_number"])) != 15:
2499
- back_data["id_number"] = ""
2500
-
2501
- ### new rule
2502
- if len(str(back_data["card_number"])) != 9:
2503
- back_data["card_number"] = ""
2504
-
2505
- current_module = sys.modules[__name__]
2506
-
2507
- for key in ["dob", "expiry_date", "card_number", "name", "nationality"]:
2508
- # if not back_data[key] and key not in ['occupation', 'employer', 'first_name', 'last_name', 'issuing_place', 'error']:
2509
-
2510
- if not back_data[key]:
2511
- transform_func_new = getattr(current_module, f"func_{key}")
2512
- back_data[key] = transform_func_new(original_text)
2513
-
2514
- for key in ["dob", "expiry_date"]:
2515
- if not back_data[key]:
2516
- transform_func_new = getattr(current_module, f"find_{key}")
2517
- back_data[key] = transform_func_new(
2518
- original_text, back_data["mrz2"]
2519
- )
2520
-
2521
- if not back_data["id_number"]:
2522
- back_data["id_number"] = func_id_number(
2523
- original_text, back_data["dob"]
2524
- )
2525
-
2526
- if (
2527
- is_valid_and_not_expired(back_data.get("expiry_date"), country)
2528
- == "consider"
2529
- ):
2530
- back_data["error"] = "expired_document"
2531
-
2532
- ### convert the date format
2533
- if back_data["dob"]:
2534
- try:
2535
- back_data["dob"] = convert_date_format(back_data["dob"])
2536
- except:
2537
- back_data["dob"] = ""
2538
-
2539
- if back_data["expiry_date"]:
2540
- try:
2541
- back_data["expiry_date"] = convert_date_format(
2542
- back_data["expiry_date"]
2543
- )
2544
- except:
2545
- back_data["expiry_date"] = ""
2546
-
2547
- img = self.image_conversion(back_id)
2548
- if hasattr(img, "_getexif"):
2549
- orientation = 0x0112
2550
- exif = img._getexif()
2551
- if exif is not None and orientation in exif:
2552
- orientation = exif[orientation]
2553
- rotations = {
2554
- 3: Image.ROTATE_180,
2555
- 6: Image.ROTATE_270,
2556
- 8: Image.ROTATE_90,
2557
- }
2558
- if orientation in rotations:
2559
- img = img.transpose(rotations[orientation])
2560
-
2561
- image = np.array(img)
2562
- pil_image = Image.fromarray(image)
2563
-
2564
- with io.BytesIO() as output:
2565
- pil_image.save(output, format="PNG")
2566
- image_data = output.getvalue()
1465
+ back_data['id_number_front_back_mrz_match'] = False
1466
+ back_data["card_number_back_mrz_match"] = False
1467
+ back_data['dob_mrz_match']= False
1468
+ back_data['expiry_date_mrz_match']= False
1469
+ back_data['gender_mrz_match']= False
1470
+ back_data['name_mrz_match']= False
1471
+
1472
+ back_data['place_of_issuance'] = output.get('issuing_place'," ")
1473
+
1474
+ id_number_mrz_str = back_data.get('id_number_mrz', '')
1475
+ id_number_str= step_data.get('id_number', '')
1476
+ logging.info(f"ID Number from front: {id_number_str} ID Number from MRZ: {id_number_mrz_str}")
1477
+ if id_number_mrz_str and id_number_str and id_number_mrz_str == id_number_str:
1478
+ back_data['id_number_front_back_mrz_match'] = True
1479
+
1480
+ card_number_mrz_str = back_data.get('card_number_mrz', '')
1481
+ card_number_str= back_data.get('card_number', '')
1482
+
1483
+ logging.info(f"Card Number from back: {card_number_str} Card Number from MRZ: {card_number_mrz_str}")
1484
+ if card_number_mrz_str and card_number_str and card_number_mrz_str == card_number_str:
1485
+ back_data['card_number_back_mrz_match'] = True
2567
1486
 
2568
- tampered_result, third_part_text = detect_id_card_uae(
2569
- self.client, image_data, id_infos, part="third"
2570
- )
2571
- back_data["back_tampered_result"] = tampered_result
2572
-
2573
- ### layer of gender extraction
2574
- if not back_data["gender"]:
2575
- # print(f"TEXT: {third_part_text}")
2576
- mrz2 = re.search(r"\b\d{7}.*?(?:<<\d|<<\n)", third_part_text)
2577
- mrz2 = mrz2.group(0) if mrz2 else None
2578
-
2579
- gender_ptrn = r"\d{7}([A-Z])\d{4,}"
2580
- if mrz2:
2581
- gender_match = re.search(gender_ptrn, mrz2)
2582
- gender = gender_match.group(1)
2583
- back_data["gender"] = gender
2584
- else:
2585
- gender_match = re.search(gender_ptrn, third_part_text)
2586
- gender = gender_match.group(0)
2587
- back_data["gender"] = gender
2588
-
2589
- ### another layer of gender extraction + formatting
2590
- if not back_data["gender"]:
2591
- extract_no_space = original_text.replace(" ", "")
2592
- try:
2593
- pattern = r"\sM|F"
2594
- m = re.search(pattern, original_text)
2595
- back_data["gender"] = m.group(0)[-1]
2596
- except:
2597
- pattern = r"\d{3}(?:M|F)\d"
2598
- m = re.findall(pattern, extract_no_space)
2599
- if len(m) != 0:
2600
- back_data["gender"] = m[0][3:4]
2601
- else:
2602
- back_data["gender"] = ""
2603
-
2604
- ### if still no gender then one more layer of gender extraction + formatting
2605
- if not back_data["gender"]:
2606
- if not extracted_data_tesseract:
2607
- # Decode the base64 string
2608
- image_data = base64.b64decode(back_id)
2609
- # Convert to an image
2610
- with Image.open(io.BytesIO(image_data)) as image:
2611
- # Use PyTesseract to do OCR on the image
2612
- extracted_data_tesseract = pytesseract.image_to_string(
2613
- image
2614
- )
2615
-
2616
- mrzs_tesseract = [
2617
- s
2618
- for s in [
2619
- ele.replace(" ", "")
2620
- for ele in extracted_data_tesseract.split("\n")
2621
- ]
2622
- if re.search(r"<<{2,}", s)
2623
- ]
2624
- mrz3_tesseract = [
2625
- s for s in mrzs_tesseract if re.fullmatch(r"[A-Za-z<]+", s)
2626
- ]
2627
-
2628
- if mrzs_tesseract and mrz3_tesseract:
2629
- mrz2_tesseract = list(
2630
- set(mrzs_tesseract) - set(mrz3_tesseract)
2631
- )[0]
2632
- gender = mrz2_tesseract[7].lower()
2633
- if gender in ["f", "m"]:
2634
- back_data["gender"] = convert_gender(gender)
2635
- else:
2636
- back_data["gender"] = convert_gender(back_data["gender"])
1487
+ dob_back_str = back_data.get("dob", "")
1488
+ dob_back_mrz_str = back_data.get("dob_mrz", "")
1489
+ dob_back = normalize_date_generic(dob_back_str)
1490
+ dob_back_mrz = normalize_date_generic(dob_back_mrz_str)
2637
1491
 
2638
- if back_data["name"]:
2639
- back_data["name"] = re.sub(
2640
- "[^a-zA-Z]", " ", back_data["name"]
2641
- ).strip()
1492
+ logging.info(f"Normalized DOB from back: {dob_back}, from MRZ: {dob_back_mrz}")
2642
1493
 
2643
- ### new rule
2644
- if len(str(back_data["id_number"])) != 15:
2645
- back_data["id_number"] = ""
1494
+ if dob_back == dob_back_mrz:
1495
+ back_data['dob_mrz_match']= True
2646
1496
 
2647
- ### new rule
2648
- if len(str(back_data["card_number"])) != 9:
2649
- back_data["card_number"] = ""
1497
+ exp_back_str = back_data.get("expiry_date", "")
1498
+ exp_back_mrz_str = back_data.get("expiry_date_mrz", "")
1499
+ exp_back = normalize_date_generic(exp_back_str)
1500
+ exp_back_mrz = normalize_date_generic(exp_back_mrz_str)
1501
+ logging.info(f"Normalized Expiry Date from back: {exp_back}, from MRZ: {exp_back_mrz}")
1502
+ if exp_back == exp_back_mrz_str:
1503
+ back_data['expiry_date_mrz_match']= True
2650
1504
 
2651
- count = count_digits_after_pattern(mrz2)
1505
+
1506
+ if back_data.get('gender_mrz','')=='M':
1507
+ back_data['gender_mrz'] = 'Male'
1508
+ elif back_data.get('gender_mrz','')=='F':
1509
+ back_data['gender_mrz'] = 'Female'
2652
1510
 
2653
- if count > 1:
2654
- mrz2 = mrz2[: -int(count - 1)]
1511
+ gender_mrz = back_data.get("gender_mrz", "")
1512
+ gender_back = back_data.get('gender', '')
1513
+ logging.info(f"Gender from card {gender_back} and gender from mrz: {gender_mrz}")
1514
+ if gender_back == gender_mrz:
1515
+ logging.info(f"Gender from back: {gender_back}, gender from MRZ: {gender_mrz}")
1516
+ back_data['gender_mrz_match'] = True
2655
1517
 
2656
- ## fix a special case where O comes as zero
2657
1518
 
2658
- if re.sub(r"O([A-Z]{3})", r"0\1", mrz2):
2659
- mrz2 = re.sub(r"O([A-Z]{3})", r"0\1", mrz2)
1519
+ from idvpackage.ocr_utils import get_name_match_mrz
2660
1520
 
2661
- if not validate_string(remove_special_characters_mrz2(mrz2)):
2662
- if (
2663
- (back_data["mrz2"])
2664
- and (back_data["gender"])
2665
- and (back_data["mrz2"][-1].isdigit())
2666
- ):
2667
- try:
2668
- # Regular expression to extract two sequences of 7 digits
2669
- matches = re.findall(r"\d{7}", back_data["mrz2"])
2670
-
2671
- # Check if we found two sequences
2672
- extracted_digits = (
2673
- matches[:2] if len(matches) >= 2 else None
2674
- )
2675
-
2676
- if extracted_digits:
2677
- mrz2 = (
2678
- extracted_digits[0]
2679
- + back_data["gender"][:1]
2680
- + extracted_digits[-1]
2681
- + "<<<<<<<<<<<"
2682
- + back_data["mrz2"][-1]
2683
- )
2684
- except:
2685
- mrz2 = ""
2686
- else:
2687
- mrz2 = ""
2688
-
2689
- if len(back_data["nationality"]) > 3:
2690
- back_data["nationality"] = back_data["nationality"][-3:]
2691
-
2692
- ### check if teh extracted nationality is valid
2693
- valid_nationality_result = self.check_nationality_in_iso_list(
2694
- back_data.get("nationality")
2695
- )
1521
+ back_data["is_name_match_mrz"],back_data["name_mrz"] = get_name_match_mrz(back_data,'national_identity_card')
1522
+ logging.info(f"Name from back: {back_data.get('name','')}, name from MRZ: {back_data.get('name_mrz','')}")
2696
1523
 
2697
1524
  img = self.image_conversion(back_id)
2698
1525
  image = np.array(img)
2699
- # pil_image = Image.fromarray(image)
2700
1526
 
2701
1527
  doc_on_pp_result = document_on_printed_paper(image)
2702
1528
  screenshot_result = detect_screenshot(self.client, back_id)
2703
1529
  photo_on_screen_result = detect_photo_on_screen(self.client, back_id)
2704
1530
  back_blurred, back_glare = self.get_blurred_and_glared_for_doc(image)
2705
- # print(f"blurred, glare: {back_blurred, back_glare}")
1531
+
1532
+ valid_nationality_result = self.check_nationality_in_iso_list(
1533
+ back_data.get("nationality")
1534
+ )
2706
1535
 
2707
1536
  back_data_update = {
1537
+ "back_extracted_data":"back_extracted_data",
2708
1538
  "valid_nationality": valid_nationality_result,
2709
- "back_extracted_data": original_text,
1539
+ "back_extracted_data": 'original_text',
2710
1540
  "back_coloured": True,
2711
- "mrz": mrz,
2712
- "mrz1": mrz1,
2713
- "mrz2": mrz2,
2714
- "mrz3": mrz3,
2715
- "occupation": occ,
2716
- "employer": emp,
2717
- "family_sponsor": family_sponsor,
1541
+
2718
1542
  "doc_on_pp": doc_on_pp_result,
2719
1543
  "screenshot_result": screenshot_result,
2720
1544
  "photo_on_screen_result": photo_on_screen_result,
@@ -2723,7 +1547,7 @@ class IdentityVerification:
2723
1547
  }
2724
1548
 
2725
1549
  back_data.update(back_data_update)
2726
- back_data["issuing_country"] = "ARE"
1550
+
2727
1551
 
2728
1552
  non_optional_keys = [
2729
1553
  "id_number",
@@ -2745,194 +1569,240 @@ class IdentityVerification:
2745
1569
  ]
2746
1570
 
2747
1571
  if empty_string_keys:
2748
- back_data["error"] = "covered_photo"
1572
+ back_data["error"] = "missing_key_fields"
1573
+ logging.info(f"Missing fields: {empty_string_keys}")
1574
+
2749
1575
 
2750
1576
  except Exception as e:
2751
- back_data["error"] = "bad_image"
2752
- back_data["error_details"] = e
1577
+ back_data = {"error": "bad_image"}
1578
+ back_data["error_details"] = f"Exception Thrown: {e}"
1579
+ return back_data
2753
1580
 
2754
1581
  return back_data
2755
1582
 
2756
1583
  if country == "IRQ":
2757
- back_data = {"error": "", "doc_type": "national_identity_card"}
1584
+ result = {"error": "", "doc_type": "national_identity_card"}
1585
+ from idvpackage.iraq_id_extraction_withopenai import (
1586
+ get_response_from_openai_irq,
1587
+ )
2758
1588
  from idvpackage.ocr_utils import (
2759
1589
  detect_photo_on_screen,
2760
1590
  detect_screenshot,
2761
1591
  document_on_printed_paper,
2762
1592
  )
2763
- from idvpackage.iraq_id_extraction import (
2764
- iraq_back_id_extraction,
2765
- update_family_number_cases,
2766
- extract_family_number,
2767
- )
2768
-
2769
1593
  try:
2770
- processed_back_id = self.image_conversion(back_id)
2771
-
2772
1594
  st = time.time()
2773
- compressed_image = BytesIO()
2774
- processed_back_id.save(
2775
- compressed_image, format="JPEG", quality=75, optimize=True
2776
- )
2777
- compressed_image_data = compressed_image.getvalue()
2778
-
2779
- id_infos = self.get_ocr_results(
2780
- compressed_image_data, country="IRQ", side="front"
2781
- )
2782
- text = id_infos[0].description
2783
- logging.info(
2784
- f"----------------Time taken for vision: {time.time() - st} seconds\n"
2785
- )
2786
-
2787
- # print(f"\nORIGINAL TEXT: {text}\n")
2788
- # translated_id_text = self.translator.translate(text, from_lang='ar', to_lang='en').text
2789
- try:
2790
- translated_id_text = self.translator.translate(
2791
- text, src="ar", dest="en"
2792
- ).text
2793
- except Exception as e:
2794
- logging.info(
2795
- f"--------------Fallback for translation keyword matching\n"
2796
- )
2797
- from deep_translator import GoogleTranslator
2798
-
2799
- translated_id_text = GoogleTranslator("ar", "en").translate(text)
2800
-
2801
- # print(f"\nTRANS: {translated_id_text}\n")
2802
-
2803
- pattern4 = r"(Register|Signature|IDIRQ|Family number|The Directorate of Nationality|IDIR)"
2804
- k = re.search(pattern4, translated_id_text, re.IGNORECASE)
2805
-
2806
- if not k:
2807
- k1 = re.search(pattern4, text, re.IGNORECASE)
2808
- if not k1:
2809
- back_data["error"] = "not_back_id"
2810
- return back_data
2811
-
2812
- original_text = text
2813
-
2814
- # print('this is original text:',original_text)
2815
- image = np.array(processed_back_id)
2816
- # pil_image = Image.fromarray(image)
2817
-
2818
- # with io.BytesIO() as output:
2819
- # pil_image.save(output, format="PNG")
2820
- # image_data = output.getvalue()
2821
-
2822
- # template_result='clear'
2823
- # tampered_result_back = calculate_error_difference(np.array(Image.open(io.BytesIO(base64.decodebytes(bytes(back_id, "utf-8"))))))
2824
-
2825
- # image_format = 'jpg'
2826
- image_format = self.detect_image_format(back_id)
2827
-
1595
+ processed_back_id,compressed_image_data = self.image_conversion_and_compression(back_id)
1596
+ logging.info(f'----------------Time taken for image conversion: {time.time() - st} seconds\n')
1597
+
1598
+ logging.info(f"starting the extraction using openai for IRQ back id")
2828
1599
  st = time.time()
2829
- back_extraction_result = iraq_back_id_extraction(
2830
- self.client,
2831
- compressed_image_data,
2832
- id_infos,
2833
- original_text,
2834
- image_format,
2835
- )
2836
- logging.info(
2837
- f"--------------Time taken for back id data formatting final extraction: {time.time() - st}\n"
2838
- )
1600
+ back_data = get_response_from_openai_irq(compressed_image_data, openai_key=self.openai_key,side="back")
1601
+ logging.info(f'----------------Time taken for OpenAI and final extraction back id: {time.time() - st} seconds\n')
2839
1602
 
2840
- error = ""
2841
- expiry_date = back_extraction_result.get("expiry_date", "")
2842
-
2843
- if expiry_date:
2844
- try:
2845
- dt_obj = datetime.strptime(expiry_date, "%d/%m/%Y")
2846
- tomorrow = datetime.today() + timedelta(days=1)
2847
- if dt_obj < tomorrow:
2848
- back_data["error"] = "expired_id"
2849
- except:
2850
- pass
2851
-
2852
- back_data.update(back_extraction_result)
2853
- valid_nationality_result = self.check_nationality_in_iso_list(
2854
- back_data.get("nationality")
2855
- )
2856
- # doc_on_pp_result = document_on_printed_paper(image)
2857
- doc_on_pp_result = "clear"
2858
- screenshot_result = detect_screenshot(self.client, back_id)
2859
- photo_on_screen_result = detect_photo_on_screen(self.client, back_id)
2860
- back_blurred, back_glare = self.get_blurred_and_glared_for_doc(image)
2861
-
2862
- back_data_update = {
2863
- # 'back_tampered_result': tampered_result_back,
2864
- "valid_nationality": valid_nationality_result,
2865
- "back_extracted_data": original_text,
2866
- "translated_back_id_text": translated_id_text,
2867
- "back_coloured": True,
2868
- "occupation": "",
2869
- "employer": "",
2870
- "doc_on_pp": doc_on_pp_result,
2871
- "screenshot_result": screenshot_result,
2872
- "photo_on_screen_result": photo_on_screen_result,
2873
- "back_blurred": back_blurred,
2874
- "back_glare": back_glare,
2875
- }
1603
+ logging.info(f"back_data: {json.dumps(back_data, ensure_ascii=False, indent=2)}")
1604
+ if not back_data.get('header_verified', False):
1605
+ back_data["error"] = "not_back_id"
1606
+ return back_data
1607
+
1608
+ doc_on_pp_result="clear"
1609
+
1610
+ from idvpackage.ocr_utils import (
1611
+ normalize_date_generic,
1612
+ )
2876
1613
 
2877
- back_data.update(back_data_update)
1614
+ if back_data.get('dob', None):
1615
+ from idvpackage.ocr_utils import is_age_18_above
1616
+ if not is_age_18_above(back_data.get('dob', None)):
1617
+ result['error'] = 'under_age'
1618
+ logging.info("Passport holder is under age")
1619
+ return result
2878
1620
 
2879
- non_optional_keys = [
2880
- "id_number",
2881
- "card_number",
2882
- "dob",
2883
- "expiry_date",
2884
- "nationality",
2885
- ]
2886
- empty_string_keys = [
2887
- key
2888
- for key, value in back_data.items()
2889
- if key in non_optional_keys and value == ""
2890
- ]
1621
+ if back_data.get('expiry_date'):
1622
+ from idvpackage.ocr_utils import is_expired_id
1623
+ if is_expired_id(back_data.get('expiry_date', None)):
1624
+ result['error'] = 'expired_id'
1625
+ logging.info(f"ID is expired with expiry date: {back_data.get('expiry_date','')}")
1626
+ return result
2891
1627
 
2892
- if empty_string_keys:
2893
- back_data["error"] = "covered_photo"
2894
1628
 
2895
- except Exception as e:
2896
- back_data["error"] = "bad_image"
2897
- back_data["error_details"] = e
1629
+ try:
1630
+ expiry_date_obj = datetime.strptime(back_data.get("expiry_date", ""), "%d/%m/%Y")
1631
+ except Exception as e:
1632
+ logging.info(f"Error in parsing expiry date for IRQ ID: {e}")
1633
+ expiry_date_obj = None
1634
+ try:
1635
+ expiry_date_mrz_obj = datetime.strptime(back_data.get("expiry_date_mrz", ""), "%d/%m/%Y")
1636
+ except Exception as e:
1637
+ logging.info(f"Error in parsing expiry date mrz for IRQ ID: {e}")
1638
+ expiry_date_mrz_obj = None
1639
+ try:
1640
+ issue_date_obj = datetime.strptime(back_data.get("issue_date", ""), "%d/%m/%Y")
1641
+ except Exception as e:
1642
+ logging.info(f"Error in parsing issue date for IRQ ID: {e}")
1643
+ issue_date_obj = None
1644
+
1645
+ if expiry_date_obj and issue_date_obj:
1646
+ try:
1647
+ difference_in_days_mrz_obj = (expiry_date_obj - issue_date_obj).days
1648
+ back_data["valid_id_duration_mrz"] = difference_in_days_mrz_obj in [3651, 3652, 3653]
2898
1649
 
2899
- try:
2900
- family_num_dict = extract_family_number(text)
2901
- family_num_dict = update_family_number_cases(family_num_dict, text)
1650
+ except Exception as e:
1651
+ result['error'] = 'expiry_issue_date_difference_error'
1652
+ logging.info(f"Error in calculating difference between expiry and issue date for IRQ ID: {e}")
1653
+ return result
1654
+
1655
+ dob_back_str = back_data.get("dob", "")
1656
+ dob_back_mrz_str = back_data.get("dob_mrz", "")
1657
+ dob_back = normalize_date_generic(dob_back_str)
1658
+ dob_back_mrz = normalize_date_generic(dob_back_mrz_str)
1659
+
1660
+ if dob_back == dob_back_mrz:
1661
+ logging.info(f"Normalized DOB from back: {dob_back}, from MRZ: {dob_back_mrz}")
1662
+ back_data['dob_back_and_mrz_match']= True
1663
+ else:
1664
+ logging.info(f"Mismatch in DOB from back: {dob_back}, from MRZ: {dob_back_mrz}")
1665
+ back_data['dob_back_and_mrz_match']= False
2902
1666
 
2903
- back_data["family_number_en"] = family_num_dict["family_number"]
2904
- back_data["family_number"] = family_num_dict["family_number"]
2905
- except Exception as e:
2906
- back_data["family_number_en"] = None
2907
- back_data["family_number"] = None
1667
+ back_data['exp_date_back_and_mrz_match']= False
1668
+ back_data['gender_front_and_back_match']= False
1669
+ back_data['id_number_front_back_match'] = False
1670
+ back_data['card_number_front_back_match'] = False
1671
+
1672
+ card_number_mrz_str = back_data.get('card_number_mrz', '')
1673
+ card_number_str= step_data.get('card_number', '')
1674
+ if card_number_mrz_str and card_number_str and card_number_mrz_str == card_number_str:
1675
+ back_data['card_number_front_back_match'] = True
1676
+
1677
+ mrz1= back_data.get('mrz1', '')
1678
+ mrz1 = mrz1.strip()
1679
+ back_data['id_number_mrz'] = mrz1[15:27]
1680
+ id_number_mrz_str = back_data.get('id_number_mrz', '')
1681
+ logging.info(f"ID Number from front: {step_data.get('id_number', '')} ID Number from MRZ: {id_number_mrz_str}")
1682
+
1683
+ id_number_str= step_data.get('id_number', '')
1684
+ if id_number_mrz_str and id_number_str and id_number_mrz_str == id_number_str:
1685
+ back_data['id_number_front_back_match'] = True
1686
+
1687
+ exp_back_str = back_data.get("expiry_date", "")
1688
+ exp_back_mrz_str = back_data.get("expiry_date_mrz", "")
1689
+ exp_back = normalize_date_generic(exp_back_str)
1690
+ exp_back_mrz = normalize_date_generic(exp_back_mrz_str)
1691
+ logging.info(f"Normalized Expiry Date from back: {exp_back}, from MRZ: {exp_back_mrz}")
1692
+
1693
+ if exp_back == exp_back_mrz:
1694
+ logging.info(f"Normalized Expiry Date from back: {exp_back}, from MRZ: {exp_back_mrz}")
1695
+ back_data['exp_date_back_and_mrz_match']= True
1696
+
1697
+
1698
+ if back_data.get('gender_mrz','')=='M':
1699
+ back_data['gender_mrz'] = 'Male'
1700
+ elif back_data.get('gender_mrz','')=='F':
1701
+ back_data['gender_mrz'] = 'Female'
1702
+
1703
+ gender_mrz = back_data.get("gender_mrz", "")
1704
+ gender_front = step_data.get('gender','') if step_data else ''
1705
+
1706
+ if gender_front:
1707
+ if gender_front == gender_mrz:
1708
+ back_data['gender_front_and_back_match']= True
1709
+ else:
1710
+ back_data['gender'] = gender_mrz
2908
1711
 
2909
- print("error in family number:", e)
1712
+ back_data['mrz'] = [back_data.get('mrz1', '') + back_data.get('mrz2', '') + back_data.get('mrz3', '')]
2910
1713
 
2911
- return back_data
1714
+ if len(back_data['mrz']) == 0 or not back_data['mrz'][0].strip():
1715
+ result['error']= "missing_mrz"
1716
+ result['error_details'] = "Missing or empty MRZ fields"
1717
+ return result
1718
+
1719
+ nationality_to_check = "IRQ"
1720
+ logging.info(f"Nationality to check for back id: {nationality_to_check}")
1721
+ valid_nationality_result = self.check_nationality_in_iso_list(nationality_to_check)
1722
+ logging.info(f"valid_nationality_result for passport: {valid_nationality_result}")
1723
+ back_data['valid_nationality'] = valid_nationality_result
1724
+
1725
+ result.update(back_data)
1726
+
1727
+ non_optional_keys =['issuing_authority',
1728
+ 'issuing_authority_en',
1729
+ 'issue_date',
1730
+ 'expiry_date',
1731
+ 'place_of_birth',
1732
+ 'place_of_birth_en',
1733
+ 'dob',
1734
+ 'mrz1',
1735
+ 'mrz2',
1736
+ 'mrz3',
1737
+ 'family_number',
1738
+ 'doc_type',
1739
+ 'family_number_en',
1740
+ 'nationality',
1741
+ 'issuing_country',
1742
+ 'card_number',
1743
+ 'card_number_back',
1744
+ 'id_number',
1745
+ 'mrz',
1746
+ 'valid_expiry_issue',
1747
+ 'age_check',
1748
+ 'dob_match_mrz_dob',
1749
+ 'is_doc_expired',
1750
+ 'gender_back']
1751
+
1752
+ empty_string_keys = [
1753
+ key for key, value in result.items() if key in non_optional_keys and (value == "" or value == [] or value == "[]")
1754
+ ]
1755
+ logging.info(f"Empty string keys in back_data: {empty_string_keys}")
1756
+
1757
+ if empty_string_keys:
1758
+ result['error'] = 'missing_key_fields'
1759
+ logging.info(f"Empty string fields found in back_data: {empty_string_keys}")
1760
+ return result
2912
1761
 
1762
+ back_data_update = {
1763
+ 'valid_nationality': valid_nationality_result,
1764
+ 'back_extracted_data': '',
1765
+ 'translated_back_id_text': '',
1766
+ 'back_coloured': True,
1767
+ 'occupation': '',
1768
+ 'employer': '',
1769
+ 'doc_on_pp': doc_on_pp_result,
1770
+ 'screenshot_result': 'screenshot_result',
1771
+ 'photo_on_screen_result': 'photo_on_screen_result',
1772
+ 'back_blurred': 'front_blurred',
1773
+ 'back_glare': 'front_glare',
1774
+ 'back_tampered_result': 'clear'
1775
+ }
1776
+ logging.info(f"Back data update before final update: ")
1777
+ result.update(back_data_update)
1778
+ logging.info(f"Final back_data completed succesfully")
1779
+
1780
+ return result
1781
+
1782
+ except Exception as e:
1783
+ result['error'] = 'bad_image'
1784
+ result['error_details'] = str(e)
1785
+ logging.info(f"Exception details: {e}")
1786
+ return result
1787
+
2913
1788
  if country == "QAT":
2914
1789
  from idvpackage.qatar_id_extraction import get_response_from_openai_qat
2915
-
2916
1790
  back_data = {"error": "", "doc_type": "national_identity_card"}
2917
- # is_colored2 = self.is_colored(back_id)
2918
- # if is_colored2:
2919
1791
  try:
2920
1792
  processed_back_id = self.image_conversion(back_id)
2921
-
2922
1793
  compressed_image = BytesIO()
2923
1794
  processed_back_id.save(
2924
1795
  compressed_image, format="JPEG", quality=80, optimize=True
2925
1796
  )
2926
1797
  compressed_image_data = compressed_image.getvalue()
2927
-
1798
+
1799
+ st = time.time()
2928
1800
  back_extraction_result = get_response_from_openai_qat(compressed_image_data, "back", country, self.openai_key)
1801
+ logging.info(f"----------------Time taken for OpenAI and final extraction back: {time.time() - st} seconds\n")
1802
+ logging.info(f"back_extraction_result: {json.dumps(back_extraction_result, ensure_ascii=False, indent=2)}")
1803
+
2929
1804
  back_data.update(back_extraction_result)
2930
-
2931
- # image = np.array(processed_back_id)
2932
-
2933
-
2934
1805
  back_data_update = {
2935
- # 'back_tampered_result': tampered_result_back,
2936
1806
  "back_extracted_data": "",
2937
1807
  "back_coloured": True,
2938
1808
  "doc_on_pp": "clear",
@@ -2943,8 +1813,7 @@ class IdentityVerification:
2943
1813
  }
2944
1814
 
2945
1815
  back_data.update(back_data_update)
2946
-
2947
-
1816
+
2948
1817
  except Exception as e:
2949
1818
  back_data["error"] = "bad_image"
2950
1819
  back_data["error_details"] = e
@@ -2952,37 +1821,30 @@ class IdentityVerification:
2952
1821
  return back_data
2953
1822
 
2954
1823
  if country == "LBN":
1824
+ from idvpackage.blur_detection import is_image_blur
1825
+ from idvpackage.lebanon_id_extraction import lebanon_id_extraction_from_text
2955
1826
  from idvpackage.ocr_utils import (
2956
1827
  detect_photo_on_screen,
2957
1828
  detect_screenshot,
2958
1829
  document_on_printed_paper,
2959
1830
  )
2960
- from idvpackage.lebanon_id_extraction import lebanon_id_extraction_from_text
2961
- from idvpackage.blur_detection import is_image_blur
2962
1831
 
2963
1832
  back_data = {"error": "", "doc_type": "national_identity_card"}
2964
-
2965
1833
  try:
2966
1834
  st = time.time()
2967
1835
  processed_back_id = self.image_conversion(back_id)
2968
- logging.info(
2969
- f"----------------Time taken for image conversion back: {time.time() - st} seconds\n"
2970
- )
2971
-
1836
+ logging.info(f"----------------Time taken for image conversion back: {time.time() - st} seconds\n")
2972
1837
  st = time.time()
2973
1838
  compressed_image = BytesIO()
2974
1839
  processed_back_id.save(
2975
1840
  compressed_image, format="JPEG", quality=100, optimize=True
2976
1841
  )
2977
1842
  compressed_image_data = compressed_image.getvalue()
2978
-
2979
1843
  back_id_text = self.get_ocr_results(compressed_image_data, country="LBN")
2980
1844
  back_id_text_desc = back_id_text[0].description
2981
1845
 
2982
1846
  logging.info(f"Extracted LBN back OCR text: {back_id_text_desc}")
2983
- logging.info(
2984
- f"----------------Time taken for vision back: {time.time() - st} seconds\n"
2985
- )
1847
+ logging.info(f"----------------Time taken for vision back: {time.time() - st} seconds\n")
2986
1848
 
2987
1849
  # Check for blur using the new comprehensive method
2988
1850
  image = np.array(processed_back_id)
@@ -2993,13 +1855,10 @@ class IdentityVerification:
2993
1855
  fft_threshold=120,
2994
1856
  bright_reflection_min_area=1.0,
2995
1857
  ):
2996
- print(
2997
- f"Blur/Brightness issue detected in front image, marking as covered photo"
2998
- )
1858
+ logging.info(f"Blur/Brightness issue detected in front image, marking as covered photo")
2999
1859
  back_data["error"] = "blur_photo"
3000
1860
  return back_data
3001
1861
 
3002
-
3003
1862
  st = time.time()
3004
1863
  back_extraction_result = lebanon_id_extraction_from_text(back_id_text_desc, compressed_image_data, "back", self.openai_key)
3005
1864
 
@@ -3021,18 +1880,15 @@ class IdentityVerification:
3021
1880
  return back_data
3022
1881
 
3023
1882
  logging.info(
3024
- f"----------------Time taken for data formatting and final extraction back: {time.time() - st} seconds\n"
1883
+ f"----------------Time taken for OpenAI and final extraction back: {time.time() - st} seconds\n"
3025
1884
  )
3026
1885
 
3027
1886
  back_data.update(back_extraction_result)
3028
1887
  image = np.array(processed_back_id)
3029
1888
 
3030
1889
  st = time.time()
3031
- ## TODO: doc_on_pp and detect_photo_on_screen for LBN
3032
- # doc_on_pp_result = document_on_printed_paper(image)
3033
1890
  doc_on_pp_result = "clear"
3034
1891
  screenshot_result = detect_screenshot(self.client, back_id)
3035
- # photo_on_screen_result = detect_photo_on_screen(self.client, image)
3036
1892
  photo_on_screen_result = "clear"
3037
1893
  back_blurred, back_glare = self.get_blurred_and_glared_for_doc(image)
3038
1894
  logging.info(
@@ -3040,9 +1896,7 @@ class IdentityVerification:
3040
1896
  )
3041
1897
 
3042
1898
  back_data_update = {
3043
- # 'back_tampered_result': tampered_result_back,
3044
1899
  "back_extracted_data": "",
3045
- # "translated_back_id_text": "",
3046
1900
  "back_coloured": True,
3047
1901
  "doc_on_pp": doc_on_pp_result,
3048
1902
  "screenshot_result": screenshot_result,
@@ -3052,15 +1906,7 @@ class IdentityVerification:
3052
1906
  }
3053
1907
 
3054
1908
  back_data.update(back_data_update)
3055
-
3056
- if (
3057
- not back_data.get("gender")
3058
- or not back_data.get("issue_date")
3059
- or not back_data.get("card_number")
3060
- ):
3061
- back_data["error"] = "covered_photo"
3062
-
3063
- non_optional_keys = ["gender", "issue_date"]
1909
+ non_optional_keys = ["gender", "issue_date", "card_number"]
3064
1910
  empty_string_keys = [
3065
1911
  key
3066
1912
  for key, value in back_data.items()
@@ -3068,7 +1914,7 @@ class IdentityVerification:
3068
1914
  ]
3069
1915
 
3070
1916
  if empty_string_keys:
3071
- back_data["error"] = "covered_photo"
1917
+ back_data["error"] = "missing_key_fields"
3072
1918
  logging.info(f"Empty non-optional keys: {empty_string_keys}")
3073
1919
 
3074
1920
  except Exception as e:
@@ -3079,19 +1925,18 @@ class IdentityVerification:
3079
1925
  return back_data
3080
1926
 
3081
1927
  if country == "SDN":
1928
+ from idvpackage.blur_detection import is_image_blur
3082
1929
  from idvpackage.ocr_utils import (
3083
1930
  detect_photo_on_screen,
3084
1931
  detect_screenshot,
3085
1932
  document_on_printed_paper,
3086
1933
  )
3087
- from idvpackage.blur_detection import is_image_blur
3088
1934
 
3089
1935
  back_data = {"error": "", "doc_type": "national_identity_card"}
3090
-
3091
- logging.info(f"Starting Sudan ID back side processing with step_data:{step_data.keys() if step_data else 'None'}")
1936
+ logging.info(f"Starting Sudan ID back side processing with step_data:{step_data}")
3092
1937
  try:
3093
1938
  st = time.time()
3094
- processed_back_id = self.image_conversion(back_id)
1939
+ processed_back_id ,compressed_image_data = self.image_conversion_and_compression(back_id)
3095
1940
  logging.info(
3096
1941
  f"----------------Time taken for image conversion back: {time.time() - st} seconds\n"
3097
1942
  )
@@ -3102,19 +1947,18 @@ class IdentityVerification:
3102
1947
  )
3103
1948
  compressed_image_data = compressed_image.getvalue()
3104
1949
  image = np.array(processed_back_id)
3105
- st = time.time()
1950
+
3106
1951
  from idvpackage.sudan_passport_extraction import get_response_from_openai_sdn
1952
+ st = time.time()
3107
1953
  back_extraction_result = get_response_from_openai_sdn(compressed_image_data, "back", self.openai_key)
3108
-
1954
+ logging.info(f"----------------Time taken for vision back: {time.time() - st} seconds\n")
3109
1955
  logging.info(f"back_extraction_result: {json.dumps(back_extraction_result, ensure_ascii=False, indent=2)}")
3110
1956
 
3111
1957
  if not back_extraction_result.get('idsdn_verified', False):
3112
1958
  back_data["error"] = "not_back_id"
3113
1959
  return back_data
3114
- logging.info(
3115
- f"----------------Time taken for data formatting and final extraction back: {time.time() - st} seconds\n"
3116
- )
3117
-
1960
+ logging.info(f"----------------Time taken for OpenAI and final extraction back: {time.time() - st} seconds\n")
1961
+
3118
1962
  if back_extraction_result.get("expiry_date"):
3119
1963
  try:
3120
1964
  expiry_date = back_extraction_result["expiry_date"]
@@ -3129,7 +1973,6 @@ class IdentityVerification:
3129
1973
 
3130
1974
  back_data.update(back_extraction_result)
3131
1975
  back_data['issuing_country'] = 'SDN'
3132
-
3133
1976
  from idvpackage.ocr_utils import normalize_date_generic
3134
1977
 
3135
1978
  dob_front_str = step_data.get("dob", "") if step_data else ""
@@ -3178,35 +2021,28 @@ class IdentityVerification:
3178
2021
 
3179
2022
  if back_data.get('issue_date', '') and back_data.get("expiry_date_mrz",''):
3180
2023
  try:
3181
-
3182
2024
  logging.info(f"difference_in_days issue_date_mrz_obj: {issue_date_obj}, expiry_date_mrz_obj: {expiry_date_mrz_obj} differece is : {(expiry_date_mrz_obj - issue_date_obj).days}")
3183
2025
  difference_in_days_mrz_obj = (expiry_date_mrz_obj - issue_date_obj).days
3184
2026
  back_data["valid_id_duration_mrz"] = difference_in_days_mrz_obj in [1825, 1826, 1827]
3185
-
3186
2027
  except:
3187
2028
  logging.info("Error in calculating date difference between issue and expiry dates for SDN ID")
3188
2029
  pass
3189
2030
 
3190
-
3191
2031
  if back_data.get("issue_date",'') and back_data.get("expiry_date",''):
3192
2032
  try:
3193
-
3194
2033
  logging.info(f"difference_in_days issue_date_obj: {issue_date_obj}, expiry_date_obj: {expiry_date_obj} differece is : {(expiry_date_obj - issue_date_obj).days}")
3195
2034
  difference_in_days_obj = (expiry_date_obj - issue_date_obj).days
3196
2035
  back_data["valid_id_duration"] = difference_in_days_obj in [1825, 1826, 1827]
3197
-
3198
2036
  except:
3199
2037
  logging.info("Error in calculating date difference between issue and expiry dates from MRZ for SDN ID")
3200
2038
  pass
3201
2039
 
3202
2040
  logging.info(f"dob_front_str: {dob_front_str}, dob_back_str: {dob_back_str}, dob_back_mrz_str: {dob_back_mrz_str}")
3203
-
3204
2041
  if dob_front_str and dob_back_str:
3205
2042
  try:
3206
2043
  logging.info(f"dob_front: {dob_front}, dob_back: {dob_back}")
3207
2044
  if dob_front == dob_back:
3208
2045
  back_data["is_dob_front_back_match"] = True
3209
-
3210
2046
  except:
3211
2047
  logging.info("Error in comparing DOB between front and back for SDN ID")
3212
2048
  pass
@@ -3216,18 +2052,15 @@ class IdentityVerification:
3216
2052
  logging.info(f"dob_front: {dob_front}, dob_back_mrz: {dob_back_mrz}")
3217
2053
  if dob_front == dob_back_mrz:
3218
2054
  back_data["is_dob_front_back_mrz_match"] = True
3219
-
3220
2055
  except:
3221
2056
  logging.info("Error in comparing DOB between front and back MRZ for SDN ID")
3222
2057
  pass
3223
2058
 
3224
-
3225
2059
  if back_data.get('gender') and back_data.get('gender_mrz'):
3226
2060
  try:
3227
2061
  logging.info(f"gender: {back_data['gender']}, gender_mrz: {back_data['gender_mrz']}")
3228
2062
  if back_data['gender'] == back_data['gender_mrz']:
3229
2063
  back_data["is_gender_mrz_match"] = True
3230
-
3231
2064
  except:
3232
2065
  logging.info("Error in comparing gender between front and back MRZ for SDN ID")
3233
2066
  pass
@@ -3239,9 +2072,7 @@ class IdentityVerification:
3239
2072
  from idvpackage.ocr_utils import get_name_match_mrz
3240
2073
  back_data['nationality'] = 'SDN'
3241
2074
  back_data['is_name_match_mrz'], back_data['name_mrz'] = get_name_match_mrz(back_data, "nationality_identity_card")
3242
-
3243
-
3244
- # get in this format "2022/08/01",
2075
+ logging.info(f"name from back: {back_data.get('name','')}, name from mrz: {back_data.get('name_mrz','')}")
3245
2076
  back_data['issuance_date'] = back_data.get('issue_date', '')
3246
2077
  if back_data.get("issuance_date",""):
3247
2078
  try:
@@ -3253,11 +2084,8 @@ class IdentityVerification:
3253
2084
  image = np.array(processed_back_id)
3254
2085
 
3255
2086
  st = time.time()
3256
- ## TODO: doc_on_pp and detect_photo_on_screen for LBN
3257
- # doc_on_pp_result = document_on_printed_paper(image)
3258
2087
  doc_on_pp_result = "clear"
3259
2088
  screenshot_result = detect_screenshot(self.client, back_id)
3260
- # photo_on_screen_result = detect_photo_on_screen(self.client, image)
3261
2089
  photo_on_screen_result = "clear"
3262
2090
  back_blurred, back_glare = self.get_blurred_and_glared_for_doc(image)
3263
2091
  logging.info(
@@ -3265,7 +2093,6 @@ class IdentityVerification:
3265
2093
  )
3266
2094
 
3267
2095
  back_data_update = {
3268
- # 'back_tampered_result': tampered_result_back,
3269
2096
  "back_extracted_data": "",
3270
2097
  "translated_back_id_text": "",
3271
2098
  "back_coloured": True,
@@ -3277,8 +2104,6 @@ class IdentityVerification:
3277
2104
  }
3278
2105
 
3279
2106
  back_data.update(back_data_update)
3280
-
3281
-
3282
2107
  mrz1 = back_data.get('mrz1', '')
3283
2108
  mrz2 = back_data.get('mrz2', '')
3284
2109
  mrz3 = back_data.get('mrz3', '')
@@ -3292,286 +2117,208 @@ class IdentityVerification:
3292
2117
  for key, value in back_data.items()
3293
2118
  if key in non_optional_keys and value == ""
3294
2119
  ]
3295
-
3296
2120
  if empty_string_keys:
3297
2121
  back_data["error"] = "missing_key_fields"
3298
2122
 
3299
2123
  except Exception as e:
3300
2124
  back_data["error"] = "bad_image"
3301
- print(f"-------------->> Something went wrong error trace:: {e}")
2125
+ logging.info(f"-------------->> Something went wrong error trace:: {e}")
3302
2126
  back_data["error_details"] = e
3303
2127
 
3304
2128
  return back_data
3305
2129
 
3306
- def exract_passport_info(self, passport, country, nationality, step_data=None):
3307
- if (nationality and nationality == "RUS") or (
3308
- not nationality and country == "RUS"
3309
- ):
3310
- logging.info("-------------Working on RUS Passport \n")
3311
- processed_passport = self.image_conversion(passport)
3312
- passport_text = self.get_ocr_results(processed_passport)
3313
- passport_text = passport_text[0].description
3314
-
3315
- passport_details = {}
3316
-
3317
- patterns = {
3318
- "passport_given_name": (
3319
- r"Имя Given names\n(.*?)/",
3320
- lambda match: self.translator.translate(
3321
- match.group(1), src="ru", dest="en"
3322
- ).text
3323
- if match
3324
- else "",
3325
- ),
3326
- "passport_surname": (
3327
- r"RUS(.*?)<<(.*?)<.*",
3328
- lambda match: match.group(1) if match else "",
3329
- ),
3330
- "passport_number": (
3331
- r"(\d{7})",
3332
- lambda match: match.group(1) if match else "",
3333
- ),
3334
- "passport_date_of_birth": (
3335
- r"(\d+)[MF]",
3336
- lambda match: self.convert_dob(match.group(1)) if match else "",
3337
- ),
3338
- "passport_date_of_expiry": (
3339
- r"[MF](\d+)",
3340
- lambda match: self.convert_expiry_date(match.group(1))
3341
- if match
3342
- else "",
3343
- ),
3344
- "passport_gender": (
3345
- r"(\d)([A-Za-z])(\d)",
3346
- lambda match: match.group(2) if match else "",
3347
- ),
3348
- }
3349
-
3350
- mrz1_pattern = r"([A-Z<]+)<<([A-Z<]+)<<([\dA-Z<]+)"
3351
- mrz2_pattern = r"(\d{10}[A-Z]{3}\d{7}[\dA-Z<]+)"
3352
-
3353
- mrz1_matches = re.findall(mrz1_pattern, passport_text)
3354
- mrz2_matches = re.findall(mrz2_pattern, passport_text)
3355
-
3356
- if mrz1_matches:
3357
- mrz1 = " ".join(mrz1_matches[0])
3358
- else:
3359
- mrz1 = ""
3360
-
3361
- if mrz2_matches:
3362
- mrz2 = mrz2_matches[0]
3363
- else:
3364
- mrz2 = ""
3365
-
3366
- mrz1_keys = ["passport_surname"]
3367
- mrz2_keys = [
3368
- "passport_date_of_birth",
3369
- "passport_date_of_expiry",
3370
- "passport_gender",
3371
- ]
3372
-
3373
- for key, value in patterns.items():
3374
- pattern = value[0]
3375
- transform_func = value[1]
3376
-
3377
- text = passport_text
3378
- if key in mrz1_keys:
3379
- text = mrz1
3380
- if key in mrz2_keys:
3381
- text = mrz2
3382
-
3383
- match = re.search(pattern, text)
3384
- passport_details[key] = transform_func(match) if match else ""
2130
+ def extract_passport_info(self, passport, country, nationality, step_data=None):
3385
2131
 
3386
- passport_details["doc_type"] = "passport"
3387
- passport_details["nationality_received"] = nationality
3388
-
3389
- return passport_details
2132
+ step_data = step_data if step_data is not None else {}
3390
2133
 
3391
2134
  if (nationality and nationality == "IRQ") or (
3392
- not nationality and country == "IRQ"
3393
- ):
2135
+ not nationality and country == "IRQ"):
3394
2136
  logging.info("-------------Working on IRQ Passport \n")
3395
- from idvpackage.ocr_utils import detect_photo_on_screen, detect_screenshot, document_on_printed_paper
3396
- from idvpackage.iraq_passport_extraction import iraq_passport_extraction, extract_mother_name_and_surname
3397
- from idvpackage.common import load_and_process_image_deepface
3398
2137
 
3399
- passport_data = {
3400
- 'error': '',
3401
- 'doc_type': 'passport'
3402
- }
2138
+ from idvpackage.iraq_id_extraction_withopenai import (
2139
+ get_response_from_openai_irq,
2140
+ )
2141
+
2142
+ result = {'error': '', "error_details": '', 'doc_type': 'passport'}
2143
+ if nationality == '':
2144
+ nationality=None
3403
2145
 
3404
2146
  try:
3405
- processed_passport = self.image_conversion(passport)
3406
- id_infos = self.get_ocr_results(processed_passport)
3407
- passport_text = id_infos[0].description
3408
- print(passport_text)
3409
- pattern4 = r'(Republic of Iraq|Iraq)' # |Passport
3410
- k = re.search(pattern4, passport_text, re.IGNORECASE)
3411
-
3412
- if not k:
2147
+ st = time.time()
2148
+ processed_passport, compressed_image_data = self.image_conversion_and_compression(passport)
2149
+ logging.info(f'----------------Time taken for image conversion: {time.time() - st} seconds\n')
2150
+ logging.info(f"starting the extraction using openai for IRQ passport")
2151
+
2152
+ st = time.time()
2153
+ passport_data = get_response_from_openai_irq(compressed_image_data, openai_key=self.openai_key,side="page1")
2154
+ logging.info(f'----------------Time taken for OpenAI and final extraction passport: {time.time() - st} seconds\n')
2155
+ logging.info(f"passport_data: {json.dumps(passport_data, ensure_ascii=False, indent=2)}")
2156
+
2157
+ if not passport_data.get('header_verified', False):
3413
2158
  passport_data["error"] = "not_passport"
3414
2159
  return passport_data
2160
+
2161
+ if passport_data.get('full_name', '') and passport_data.get('last_name', ''):
2162
+ if passport_data['last_name'] not in passport_data['full_name']:
2163
+ passport_data['full_name'] = passport_data.get('full_name', '') +' '+ passport_data.get('last_name', '')
2164
+
2165
+ if passport_data.get('dob', None):
2166
+ from idvpackage.ocr_utils import is_age_18_above
2167
+ if not is_age_18_above(passport_data.get('dob', None)):
2168
+ passport_data['error'] = 'under_age'
2169
+ logging.info("Passport holder is under age")
2170
+ return passport_data
3415
2171
 
3416
- original_text = passport_text
3417
-
3418
- passport_details = iraq_passport_extraction(passport_text)
3419
-
3420
- passport_details['id_number'] = passport_details.get('passport_number',
3421
- passport_details.get('passport_number_mrz'))
3422
- if passport_details.get('passport_number_mrz') and passport_details['id_number'] != passport_details[
3423
- 'passport_number_mrz']:
3424
- passport_details['id_number'] = passport_details['passport_number_mrz']
2172
+ if passport_data.get('expiry_date'):
2173
+ from idvpackage.ocr_utils import is_expired_id
2174
+ if is_expired_id(passport_data.get('expiry_date', None)):
2175
+ passport_data['error'] = 'expired_id'
2176
+ logging.info(f"ID is expired with expiry date: {passport_data.get('expiry_date','')}")
2177
+ return passport_data
2178
+
2179
+ if passport_data.get('mrz2', ''):
2180
+ mrz2 = passport_data.get('mrz2')
2181
+ logging.info(f"Original mrz2: {mrz2}")
2182
+ mrz2 = mrz2.replace('<', '')
2183
+ logging.info(f"Cleaned mrz2: {mrz2}")
2184
+ if len(mrz2) <28:
2185
+ passport_data['error'] = 'missing_mrz'
2186
+ logging.info("MRZ2 length is less than expected after cleaning")
2187
+ return passport_data
3425
2188
 
3426
- if not passport_details.get('passport_date_of_birth_generic') and passport_details.get('dob_mrz'):
3427
- passport_details['dob'] = passport_details['dob_mrz']
3428
- else:
3429
- passport_details['dob'] = passport_details['passport_date_of_birth_generic']
3430
-
3431
- if (not passport_details.get('full_name') and passport_details.get('full_name_generic')) or (
3432
- len(passport_details.get('full_name')) < len(passport_details.get('full_name_generic'))):
3433
- if passport_details['full_name_generic'].startswith('IRQ'):
3434
- passport_details['full_name'] = passport_details['full_name_generic'][3:].strip()
3435
- else:
3436
- passport_details['full_name'] = passport_details['full_name_generic']
3437
-
3438
- # if len(passport_details.get('full_name')) < len(passport_details.get('full_name_generic')):
3439
- # passport_details['full_name'] = passport_details['full_name_generic']
3440
-
3441
- if not passport_details.get('last_name') and passport_details.get('surname_generic'):
3442
- if passport_details['surname_generic'].startswith('IRQ'):
3443
- passport_details['last_name'] = passport_details['surname_generic'][3:].strip()
3444
- else:
3445
- passport_details['last_name'] = passport_details['surname_generic']
3446
-
3447
- if not passport_details.get('passport_date_of_expiry_generic') and passport_details.get(
3448
- 'expiry_date_mrz'):
3449
- passport_details['expiry_date'] = passport_details['expiry_date_mrz']
3450
- else:
3451
- passport_details['expiry_date'] = passport_details['passport_date_of_expiry_generic']
2189
+ passport_data['gender'] = ''
2190
+ if passport_data.get('gender_letter', None):
2191
+ if passport_data.get('gender_letter', '') in ['M', 'm']:
2192
+ passport_data['gender'] = 'Male'
2193
+ elif passport_data.get('gender_letter', '') in ['F', 'f']:
2194
+ passport_data['gender'] = 'Female'
2195
+
2196
+ if passport_data.get('mrz1') and passport_data.get('mrz2'):
2197
+ passport_data['mrz'] = passport_data['mrz1'] + passport_data['mrz2']
3452
2198
 
3453
- keys_to_delete = ['expiry_date_mrz', 'passport_date_of_expiry_generic', 'dob_mrz',
3454
- 'passport_date_of_birth_generic',
3455
- 'passport_number', 'passport_number_mrz', 'full_name_generic', 'surname_generic']
2199
+ if len(passport_data['mrz']) == 0 or not passport_data['mrz'][0].strip():
2200
+ return {
2201
+ "error": "missing_mrz",
2202
+ "error_details": "Missing or empty MRZ fields"
2203
+ }
2204
+
2205
+ passport_data['issuing_country'] = 'IRQ'
2206
+ nationality_to_check = passport_data.get('nationality')
2207
+ valid_nationality_result = self.check_nationality_in_iso_list(nationality_to_check)
2208
+ logging.info(f"valid_nationality_result for passport: {valid_nationality_result}")
2209
+ passport_data['valid_nationality'] = valid_nationality_result
2210
+
2211
+ logging.info(f"checking all passport_data fields for empty non-optional keys{passport_data.keys()}")
2212
+ empty_string_keys = [ key for key, value in passport_data.items() if value == "" or value == [] or value == "[]"]
2213
+ if empty_string_keys:
2214
+ passport_data['error'] = 'missing_key_fields'
2215
+ logging.warning(f"Empty string fields found in passport_data: {empty_string_keys}")
3456
2216
 
3457
- for key in keys_to_delete:
3458
- if key in passport_details.keys():
3459
- del passport_details[key]
2217
+ st = time.time()
2218
+ from idvpackage.common import load_and_process_image_deepface
2219
+ front_face_locations, front_face_encodings = load_and_process_image_deepface(passport)
2220
+ logging.info(f'----------------Time taken for face extraction: {time.time() - st} seconds\n')
3460
2221
 
3461
- passport_data.update(passport_details)
3462
2222
 
3463
- image = np.array(processed_passport)
3464
- doc_on_pp_result = document_on_printed_paper(image)
3465
- screenshot_result = detect_screenshot(self.client, passport)
3466
- photo_on_screen_result = detect_photo_on_screen(self.client, passport)
3467
- blurred, glare = self.get_blurred_and_glared_for_doc(image)
3468
- valid_nationality_result = self.check_nationality_in_iso_list(passport_details.get('nationality'))
2223
+ if front_face_encodings is None or len(front_face_encodings) == 0:
2224
+ passport_data['error'] = 'face_not_detected'
2225
+ logging.info("No face detected in passport image")
2226
+ return passport_data
3469
2227
 
3470
- front_face_locations, front_face_encodings = load_and_process_image_deepface(passport)
2228
+ front_face_locations_str = json.dumps([tuple(face_loc) for face_loc in front_face_locations])
2229
+ front_face_encodings_str = json.dumps([face_enc.tolist() for face_enc in front_face_encodings])
2230
+
2231
+ result.update(passport_data)
3471
2232
 
3472
2233
  if step_data:
3473
2234
  try:
3474
- print(f"Matching face from Passport with National ID")
3475
- national_id_front_face_locations = step_data.get("front_face_locations")
2235
+ logging.info(f"Matching face from Passport with National ID")
2236
+ national_id_front_face_locations = json.loads(step_data.get("front_face_locations"))
3476
2237
  national_id_front_face_encodings = json.loads(step_data.get("front_face_encodings"))
3477
- national_id_front_face_encodings = np.array(national_id_front_face_encodings[0],
3478
- dtype=np.float32)
3479
- similarity = self.calculate_similarity(national_id_front_face_encodings,
3480
- front_face_encodings[0])
3481
- passport_data["similarity_score"] = similarity
3482
- if similarity <= 0.65:
3483
- passport_data["error"] = 'face_mismatch'
3484
- except Exception as e:
3485
- passport_data["error"] = 'National ID Image Not Found'
3486
- passport_data["error_details"] = e
2238
+ st = time.time()
3487
2239
 
3488
- front_face_locations_str = json.dumps([tuple(face_loc) for face_loc in front_face_locations])
3489
- front_face_encodings_str = json.dumps([face_enc.tolist() for face_enc in front_face_encodings])
2240
+ largest_face_index1 = national_id_front_face_locations.index(
2241
+ max(national_id_front_face_locations, key=lambda loc: (loc[2] - loc[0]) * (loc[3] - loc[1])))
2242
+ largest_face_index2 = front_face_locations.index(
2243
+ max(front_face_locations, key=lambda loc: (loc[2] - loc[0]) * (loc[3] - loc[1])))
3490
2244
 
3491
- passport_data_update = {
3492
- # 'back_tampered_result': tampered_result_back,
3493
- 'passport_data': original_text,
3494
- 'front_coloured': True,
3495
- 'back_coloured': True,
3496
- 'front_logo_result': 'clear',
3497
- 'front_doc_on_pp': doc_on_pp_result,
3498
- 'front_screenshot_result': screenshot_result,
3499
- 'front_photo_on_screen_result': photo_on_screen_result,
3500
- 'doc_on_pp': doc_on_pp_result,
3501
- 'screenshot_result': screenshot_result,
3502
- 'photo_on_screen_result': photo_on_screen_result,
3503
- 'front_blurred': blurred,
3504
- 'front_glare': glare,
3505
- 'back_blurred': blurred,
3506
- 'back_glare': glare,
3507
- 'front_face_locations': front_face_locations_str,
3508
- 'front_face_encodings': front_face_encodings_str,
3509
- 'valid_nationality': valid_nationality_result,
3510
- 'issuing_country': 'IRQ',
3511
- 'nationality_received': nationality
3512
- }
2245
+ face_encoding1 = national_id_front_face_encodings[largest_face_index1]
2246
+ face_encoding2 = front_face_encodings[largest_face_index2]
3513
2247
 
3514
- passport_data.update(passport_data_update)
2248
+ similarity = self.calculate_similarity(face_encoding1, face_encoding2)
3515
2249
 
3516
- non_optional_keys = ["gender", "passport_date_of_birth", "id_number", "passport_date_of_expiry"]
3517
- empty_string_keys = [key for key, value in passport_data.items() if
3518
- key in non_optional_keys and value == '']
2250
+ logging.info(
2251
+ f'----------------Time taken for face extraction for matching passport with front id: {time.time() - st} seconds\n')
2252
+ result["similarity_score"] = similarity
2253
+ logging.info(f"Front ID and Passport similarity score: {similarity}")
2254
+ if similarity < 0.5:
2255
+ result["error"] = 'face_mismatch'
2256
+ return {"error": "face_mismatch",
2257
+ "error_details": "Front ID and Passport Face dont match."}
2258
+
2259
+ except Exception as e:
2260
+ logging.info(f"Error in face matching between passport and national id: {e}")
2261
+ result["error"] = 'covered_photo'
2262
+ result["error_details"] = e
2263
+ return result
2264
+
2265
+ data_temp = {
2266
+ 'passport_data': "",
2267
+ 'front_coloured': True,
2268
+ 'back_coloured': True,
2269
+ 'front_logo_result': 'clear',
2270
+ 'front_doc_on_pp': "clear",
2271
+ 'front_screenshot_result': 'clear',
2272
+ 'front_photo_on_screen_result': 'clear',
2273
+ 'doc_on_pp': "clear",
2274
+ 'screenshot_result': 'clear',
2275
+ 'photo_on_screen_result': 'clear',
2276
+ 'front_blurred': 'front_blurred',
2277
+ 'front_glare': 'front_glare',
2278
+ 'back_blurred': 'front_blurred',
2279
+ 'back_glare': 'front_glare',
2280
+ 'front_face_locations': front_face_locations_str,
2281
+ 'front_face_encodings': front_face_encodings_str,
2282
+ 'issuing_country': 'IRQ',
2283
+ 'nationality_received': nationality_to_check
2284
+ }
2285
+
2286
+ result.update(data_temp)
3519
2287
 
3520
- if empty_string_keys:
3521
- passport_data['error'] = 'covered_photo'
2288
+ from idvpackage.ocr_utils import validation_checks_passport
2289
+ validation_results = validation_checks_passport(passport_data, 'id_number', 'IRQ')
2290
+ logging.info(f"validation_results: {json.dumps(validation_results, ensure_ascii=False, indent=2)}")
2291
+ result.update(validation_results)
3522
2292
 
3523
- except Exception as e:
3524
- passport_data['error'] = 'bad_image'
3525
- passport_data['error_details'] = e
2293
+ from idvpackage.ocr_utils import get_name_match_mrz
2294
+ result['is_name_match_mrz'], result['name_mrz'] = get_name_match_mrz(result, "passport")
3526
2295
 
3527
- try:
3528
- new_dict = extract_mother_name_and_surname(passport_text)
3529
- new_dict['mother_first_name_en'] = new_dict.get('mother_first_name', '')
3530
- new_dict['mother_last_name_en'] = new_dict.get('mother_last_name', '')
3531
-
3532
- from deep_translator import GoogleTranslator
3533
- new_dict['mother_first_name'] = GoogleTranslator('en', 'ar').translate(
3534
- new_dict.get('mother_first_name', ''))
3535
- new_dict['mother_last_name'] = GoogleTranslator('en', 'ar').translate(
3536
- new_dict.get('mother_last_name', ''))
3537
- passport_data.update(new_dict)
2296
+ return result
2297
+
3538
2298
  except Exception as e:
3539
- passport_data['mother_first_name'], passport_data['mother_first_name_en'] = '', ''
3540
- passport_data['mother_last_name'], passport_data['mother_last_name_en'] = '', ''
3541
- print("error:", e)
3542
-
3543
- return passport_data
2299
+ result['error'] = 'bad_image'
2300
+ result['error_details'] = str(e)
2301
+ logging.info(f"Exception details: {e}")
2302
+ return result
3544
2303
 
3545
2304
  if (nationality and nationality == "LBN") or (
3546
2305
  not nationality and country == "LBN"
3547
2306
  ):
3548
2307
  logging.info("-------------Working on LBN Passport \n")
3549
- from idvpackage.ocr_utils import (
3550
- detect_photo_on_screen,
3551
- detect_screenshot,
3552
- document_on_printed_paper,
3553
- )
3554
-
3555
- from idvpackage.lebanon_id_extraction import get_response_from_openai_lbn
3556
- from idvpackage.common import load_and_process_image_deepface
3557
2308
  from idvpackage.blur_detection import is_image_blur
2309
+ from idvpackage.common import load_and_process_image_deepface
2310
+ from idvpackage.lebanon_id_extraction import get_response_from_openai_lbn
2311
+ from idvpackage.ocr_utils import detect_screenshot
3558
2312
 
3559
2313
  passport_data = {"error": "", "doc_type": "passport"}
3560
2314
 
3561
2315
  try:
3562
- st = time.time()
3563
- processed_passport = self.image_conversion(passport)
3564
- logging.info(
3565
- f"----------------Time taken for image conversion passport: {time.time() - st} seconds\n"
3566
- )
3567
-
3568
- st = time.time()
3569
- compressed_image = BytesIO()
3570
- processed_passport.save(
3571
- compressed_image, format="PNG", quality=100, optimize=True
3572
- )
3573
- compressed_image_data = compressed_image.getvalue()
2316
+ st = time.time()
2317
+ processed_passport, compressed_image_data = self.image_conversion_and_compression(passport)
2318
+ logging.info(f"----------------Time taken for image conversion passport: {time.time() - st} seconds\n")
3574
2319
 
2320
+ st = time.time()
2321
+
3575
2322
  image = np.array(processed_passport)
3576
2323
  if country == "LBN":
3577
2324
  if is_image_blur(
@@ -3581,21 +2328,17 @@ class IdentityVerification:
3581
2328
  fft_threshold=120,
3582
2329
  bright_reflection_min_area=1.0,
3583
2330
  ):
3584
- print(
3585
- f"Blur/Brightness issue detected in front image, marking as covered photo"
3586
- )
2331
+ logging.info(f"Blur/Brightness issue detected in front image, marking as covered photo")
3587
2332
  passport_data["error"] = "blur_photo"
3588
2333
  return passport_data
3589
2334
 
3590
2335
  st = time.time()
3591
2336
  passport_details = get_response_from_openai_lbn(compressed_image_data,"first", self.openai_key)
3592
2337
  logging.info(
3593
- f"----------------Time taken for data formatting and final extraction passport: {time.time() - st} seconds\n"
2338
+ f"----------------Time taken for OpenAI and final extraction passport: {time.time() - st} seconds\n"
3594
2339
  )
3595
-
3596
2340
  logging.info(f"passport_details: {json.dumps(passport_details, ensure_ascii=False, indent=2)}")
3597
2341
 
3598
-
3599
2342
  if not passport_details['header_verified'] :
3600
2343
  passport_data["error"] = "not_passport"
3601
2344
  return passport_data
@@ -3609,7 +2352,7 @@ class IdentityVerification:
3609
2352
  passport_details['error'] = 'under_age'
3610
2353
  return passport_details
3611
2354
  else:
3612
- passport_details['error'] = 'covered_photo'
2355
+ passport_details['error'] = 'missing_key_fields'
3613
2356
  logging.error(f"DOB date not found in the extracted data.")
3614
2357
  return passport_details
3615
2358
 
@@ -3625,51 +2368,40 @@ class IdentityVerification:
3625
2368
  logging.info(f"Error in expiry date check: {e}")
3626
2369
 
3627
2370
  passport_details['mrz'] = passport_details.get('mrz1', '') + passport_details.get('mrz2', '')
3628
-
2371
+ passport_details['full_name'] = passport_details.get('first_name', '') + ' ' + passport_details.get('last_name', '')
3629
2372
  passport_details['issuing_country'] = 'LBN'
3630
-
3631
2373
  passport_data.update(passport_details)
3632
2374
 
3633
- # if passport_data["id_number"][:2] == "PR":
3634
- # passport_data["error"] = "not_allowed"
3635
- # return passport_data
3636
-
3637
2375
  from idvpackage.ocr_utils import validation_checks_passport
3638
-
3639
-
3640
2376
  validation_results = validation_checks_passport(passport_data, 'id_number', 'LBN')
3641
2377
  logging.info(f"validation_results: {json.dumps(validation_results, ensure_ascii=False, indent=2)}")
3642
2378
  passport_data.update(validation_results)
3643
2379
 
3644
-
3645
2380
  from idvpackage.ocr_utils import get_name_match_mrz
3646
2381
  passport_data['is_name_match_mrz'], passport_data['name_mrz'] = get_name_match_mrz(passport_data, "passport")
3647
2382
 
3648
-
3649
2383
  image = np.array(processed_passport)
3650
-
3651
2384
  st = time.time()
3652
-
3653
2385
  doc_on_pp_result = "clear"
3654
2386
  screenshot_result = detect_screenshot(self.client, passport)
3655
- # photo_on_screen_result = detect_photo_on_screen(self.client, passport)
3656
2387
  photo_on_screen_result = "clear"
3657
2388
  blurred, glare = self.get_blurred_and_glared_for_doc(image)
3658
- print(f"Nationality: {passport_details.get('nationality')}")
2389
+ logging.info(f"Nationality: {passport_details.get('nationality')}")
3659
2390
  valid_nationality_result = self.check_nationality_in_iso_list(
3660
2391
  passport_details.get("nationality")
3661
2392
  )
3662
- logging.info(
3663
- f"----------------Time taken for fraud detection attributes passport: {time.time() - st} seconds\n"
3664
- )
2393
+ logging.info(f"----------------Time taken for fraud detection attributes passport: {time.time() - st} seconds\n")
3665
2394
 
3666
2395
  st = time.time()
3667
- front_face_locations, front_face_encodings = (
3668
- load_and_process_image_deepface(passport)
3669
- )
2396
+ front_face_locations, front_face_encodings = (load_and_process_image_deepface(passport))
3670
2397
  logging.info(
3671
2398
  f"----------------Time taken for face extraction passport: {time.time() - st} seconds\n"
3672
2399
  )
2400
+
2401
+ if front_face_encodings is None or len(front_face_encodings) == 0:
2402
+ passport_data["error"] = "face_not_detected"
2403
+ logging.info("No face detected in passport image")
2404
+ return passport_data
3673
2405
 
3674
2406
  front_face_locations_str = json.dumps(
3675
2407
  [tuple(face_loc) for face_loc in front_face_locations]
@@ -3679,7 +2411,6 @@ class IdentityVerification:
3679
2411
  )
3680
2412
 
3681
2413
  passport_data_update = {
3682
- # 'back_tampered_result': tampered_result_back,
3683
2414
  "passport_data": "",
3684
2415
  "front_coloured": True,
3685
2416
  "back_coloured": True,
@@ -3701,7 +2432,6 @@ class IdentityVerification:
3701
2432
  }
3702
2433
 
3703
2434
  passport_data.update(passport_data_update)
3704
-
3705
2435
  non_optional_keys = [
3706
2436
  "front_face_locations",
3707
2437
  "id_number",
@@ -3718,11 +2448,10 @@ class IdentityVerification:
3718
2448
  ]
3719
2449
 
3720
2450
  if empty_string_keys:
3721
- passport_data["error"] = "covered_photo"
2451
+ passport_data["error"] = "missing_key_fields"
3722
2452
  logging.info(f"Empty keys found: {empty_string_keys}")
3723
2453
 
3724
2454
 
3725
-
3726
2455
  except Exception as e:
3727
2456
  passport_data["error"] = "bad_image"
3728
2457
  passport_data["error_details"] = e
@@ -3734,34 +2463,20 @@ class IdentityVerification:
3734
2463
  not nationality and country == "SDN"
3735
2464
  ):
3736
2465
  logging.info("-------------Working on SDN Passport \n")
3737
- from idvpackage.ocr_utils import (
3738
- detect_photo_on_screen,
3739
- detect_screenshot,
3740
- document_on_printed_paper,
3741
- )
3742
- from idvpackage.sudan_passport_extraction import get_response_from_openai_sdn
3743
- from idvpackage.common import load_and_process_image_deepface
3744
2466
  from idvpackage.blur_detection import is_image_blur
3745
-
2467
+ from idvpackage.common import load_and_process_image_deepface
2468
+ from idvpackage.ocr_utils import detect_screenshot
2469
+ from idvpackage.sudan_passport_extraction import get_response_from_openai_sdn
2470
+
3746
2471
  passport_data = {"error": "", "doc_type": "passport"}
3747
-
3748
2472
  try:
3749
2473
  st = time.time()
3750
- processed_passport = self.image_conversion(passport)
2474
+ processed_passport, compressed_image_data = self.image_conversion_and_compression(passport)
3751
2475
  logging.info(
3752
2476
  f"----------------Time taken for image conversion passport: {time.time() - st} seconds\n"
3753
2477
  )
3754
2478
 
3755
2479
  st = time.time()
3756
- compressed_image = BytesIO()
3757
-
3758
-
3759
- processed_passport.save(
3760
- compressed_image, format="JPEG", quality=95, optimize=True
3761
- )
3762
- compressed_image_data = compressed_image.getvalue()
3763
-
3764
-
3765
2480
  image = np.array(processed_passport)
3766
2481
  if country == "SDN":
3767
2482
  if is_image_blur(
@@ -3772,18 +2487,15 @@ class IdentityVerification:
3772
2487
  bright_reflection_threshold=100,
3773
2488
  bright_reflection_min_area=1.0,
3774
2489
  ):
3775
- print(f"Passport Document is blurry, marking as covered photo")
2490
+ logging.info(f"Passport Document is blurry, marking as covered photo")
3776
2491
  passport_data["error"] = "blur_photo"
3777
2492
  return passport_data
3778
2493
 
3779
2494
  st = time.time()
3780
- # passport_details = sdn_passport_extraction(passport_text)
3781
2495
  passport_details = get_response_from_openai_sdn(compressed_image_data, "passport", self.openai_key)
3782
2496
 
3783
2497
  logging.info(f"Passport details extracted: {json.dumps(passport_details, ensure_ascii=False, indent=2)}")
3784
- logging.info(
3785
- f"----------------Time taken for data formatting and final extraction passport: {time.time() - st} seconds\n"
3786
- )
2498
+ logging.info(f"----------------Time taken for OpenAI and final extraction passport: {time.time() - st} seconds\n")
3787
2499
 
3788
2500
  if not passport_details['header_verified']:
3789
2501
  passport_data["error"] = "not_passport"
@@ -3794,6 +2506,8 @@ class IdentityVerification:
3794
2506
  passport_details['nationality_received'] = 'SDN'
3795
2507
  passport_details['nationality'] = 'SDN'
3796
2508
 
2509
+ passport_details['full_name'] = passport_details.get('first_name','')+ passport_details.get('last_name','')
2510
+
3797
2511
  from idvpackage.ocr_utils import is_expired_id
3798
2512
  if is_expired_id(passport_details.get('expiry_date', None)):
3799
2513
  passport_data['error'] = 'expired_id'
@@ -3810,15 +2524,11 @@ class IdentityVerification:
3810
2524
 
3811
2525
  if passport_details.get("name_en", ""):
3812
2526
  passport_details['full_name_generic'] = passport_details.get("name_en","")
3813
-
3814
2527
  else:
3815
2528
  passport_details['full_name_generic'] = passport_details.get("first_name","") + " " + passport_details.get("middle_name","") + " " + passport_details.get("last_name","")
3816
2529
 
3817
2530
  passport_data.update(passport_details)
3818
2531
 
3819
-
3820
-
3821
-
3822
2532
  from idvpackage.ocr_utils import validation_checks_passport
3823
2533
  validation_results = validation_checks_passport(passport_data, 'passport_number', 'SDN')
3824
2534
  logging.info(f"validation_results: {json.dumps(validation_results, ensure_ascii=False, indent=2)}")
@@ -3828,27 +2538,18 @@ class IdentityVerification:
3828
2538
  from idvpackage.ocr_utils import get_name_match_mrz
3829
2539
  passport_data['is_name_match_mrz'], passport_data['name_mrz'] = get_name_match_mrz(passport_data, "passport")
3830
2540
 
3831
-
3832
2541
  if passport_data.get("issue_date"):
3833
2542
  passport_data["issuance_date"] = passport_details["issue_date"]
3834
2543
 
3835
-
3836
-
3837
-
3838
2544
  image = np.array(processed_passport)
3839
2545
 
3840
2546
  st = time.time()
3841
-
3842
2547
  doc_on_pp_result = "clear"
3843
2548
  screenshot_result = detect_screenshot(self.client, passport)
3844
2549
  photo_on_screen_result = "clear"
3845
2550
  blurred, glare = self.get_blurred_and_glared_for_doc(image)
3846
- valid_nationality_result = self.check_nationality_in_iso_list(
3847
- passport_details.get("nationality")
3848
- )
3849
- logging.info(
3850
- f"----------------Time taken for fraud detection attributes passport: {time.time() - st} seconds\n"
3851
- )
2551
+ valid_nationality_result = self.check_nationality_in_iso_list(passport_details.get("nationality"))
2552
+ logging.info(f"----------------Time taken for fraud detection attributes passport: {time.time() - st} seconds\n")
3852
2553
 
3853
2554
  st = time.time()
3854
2555
  front_face_locations, front_face_encodings = (
@@ -3857,6 +2558,11 @@ class IdentityVerification:
3857
2558
  logging.info(
3858
2559
  f"----------------Time taken for face extraction passport: {time.time() - st} seconds\n"
3859
2560
  )
2561
+
2562
+ if front_face_encodings is None or len(front_face_encodings) == 0:
2563
+ passport_data["error"] = "face_not_detected"
2564
+ logging.info("No face detected in passport image")
2565
+ return passport_data
3860
2566
 
3861
2567
  front_face_locations_str = json.dumps(
3862
2568
  [tuple(face_loc) for face_loc in front_face_locations]
@@ -3866,7 +2572,6 @@ class IdentityVerification:
3866
2572
  )
3867
2573
 
3868
2574
  passport_data_update = {
3869
- # 'back_tampered_result': tampered_result_back,
3870
2575
  "passport_data": "",
3871
2576
  "front_coloured": True,
3872
2577
  "back_coloured": True,
@@ -3889,11 +2594,10 @@ class IdentityVerification:
3889
2594
  }
3890
2595
 
3891
2596
  passport_data.update(passport_data_update)
3892
-
3893
2597
  non_optional_keys = [
3894
- "passport_number_mrz",
3895
- "dob_mrz",
3896
- "expiry_date_mrz",
2598
+ "passport_number",
2599
+ "dob",
2600
+ "expiry_date",
3897
2601
  "gender",
3898
2602
  ]
3899
2603
  empty_string_keys = [
@@ -3904,10 +2608,11 @@ class IdentityVerification:
3904
2608
 
3905
2609
  if empty_string_keys:
3906
2610
  passport_data["error"] = "missing_key_fields"
2611
+ logging.info(f"Empty keys found: {empty_string_keys}")
3907
2612
 
3908
2613
  except Exception as e:
3909
2614
  passport_data["error"] = "bad_image"
3910
- print(f"-------------->> Something went wrong error trace:: {e}")
2615
+ logging.info(f"-------------->> Something went wrong error trace:: {e}")
3911
2616
  passport_data["error_details"] = e
3912
2617
 
3913
2618
  return passport_data
@@ -3916,97 +2621,93 @@ class IdentityVerification:
3916
2621
  not nationality and country == "SYR"
3917
2622
  ):
3918
2623
  logging.info("-------------Working on SYR Passport \n")
3919
- from idvpackage.ocr_utils import (
3920
- detect_photo_on_screen,
3921
- detect_screenshot,
3922
- document_on_printed_paper,
3923
- )
3924
- # from idvpackage.syr_passport_extraction import (
3925
- # syr_passport_extraction_front,
3926
- # syr_passport_extraction_back,
3927
- # )
3928
-
3929
- from idvpackage.syr_passport_extraction import get_response_from_openai_syr
3930
-
3931
2624
  from idvpackage.common import load_and_process_image_deepface
2625
+ from idvpackage.ocr_utils import detect_screenshot
3932
2626
 
2627
+ from idvpackage.syr_passport_extraction import get_response_from_openai_syr
3933
2628
  passport_data = {"error": "", "doc_type": "passport"}
3934
2629
 
3935
2630
  try:
3936
2631
  st = time.time()
3937
2632
  processed_passport = self.image_conversion(passport)
3938
- # logging.info(
3939
- # f"----------------Time taken for image conversion passport: {time.time() - st} seconds\n"
3940
- # )
3941
-
3942
- # st = time.time()
3943
- # id_infos = self.get_ocr_results(processed_passport)
3944
- # passport_text = id_infos[0].description
3945
- # logging.info(
3946
- # f"----------------Time taken for vision passport: {time.time() - st} seconds\n"
3947
- # )
3948
-
3949
- # # print(f"\nPassport text: {passport_text}\n")
3950
-
3951
- # pattern4 = r"(Syrian Arab Republic|REPUBLIQUE ARABE SYRIEH|Syrian|Syrienne|SYR)"
3952
- # k = re.search(pattern4, passport_text, re.IGNORECASE)
3953
-
3954
- # if not k:
3955
- # passport_data["error"] = "not_passport"
3956
- # return passport_data
3957
-
3958
2633
  st = time.time()
3959
- # passport_details = syr_passport_extraction_front(
3960
- # passport_text, self.gemini_key
3961
- # )
3962
2634
  passport_details = get_response_from_openai_syr(processed_passport, "page1", country, self.openai_key)
2635
+ logging.info(
2636
+ f"----------------Time taken for OpenAI and final extraction passport: {time.time() - st} seconds\n"
2637
+ )
2638
+ logging.info(f"passport_details: {json.dumps(passport_details, ensure_ascii=False, indent=2)}")
3963
2639
 
3964
2640
  if not passport_details['header_verified']:
3965
2641
  passport_data["error"] = "not_passport"
3966
2642
  return passport_data
3967
-
2643
+
2644
+ if passport_details.get('dob', None):
2645
+ from idvpackage.ocr_utils import is_age_18_above
2646
+ if not is_age_18_above(passport_details.get('dob', None)):
2647
+ passport_data['error'] = 'under_age'
2648
+ logging.info("Passport holder is under age")
2649
+ return passport_data
2650
+
2651
+ passport_details['full_name'] = passport_details.get('first_name','') + ' ' + passport_details.get('last_name','')
3968
2652
  passport_details['nationality'] = "SYR"
3969
2653
  passport_details['mrz'] = passport_details.get('mrz1','') + passport_details.get('mrz2','')
2654
+
2655
+ #check is passport_number is all digits and len is 9
2656
+ if passport_details.get('passport_number', '').isdigit() and len(passport_details.get('passport_number', '')) == 9:
2657
+ passport_details['id_number'] = passport_details.get('passport_number','')
2658
+ elif passport_details.get('passport_number_mrz', '').isdigit() and len(passport_details.get('passport_number_mrz', '')) == 9:
2659
+ passport_details['id_number'] = passport_details.get('passport_number_mrz','')
2660
+ else:
2661
+ passport_details['id_number'] = ''
2662
+
2663
+ mrz2 = passport_details.get('mrz2','')
2664
+ mrz2 = mrz2.strip()
2665
+ logging.info(f"mrz2: {mrz2}")
2666
+ mrz2 = mrz2[-16:]
2667
+ logging.info(f"mrz2: last 16 chars: {mrz2}")
2668
+ mrz2 = mrz2.split("<<")[0].replace('<','')
2669
+ passport_details['national_number_mrz_regex'] = mrz2
2670
+
2671
+ if passport_details.get('gender_mrz','').upper() =='M':
2672
+ passport_details['gender_mrz'] = 'MALE'
2673
+ elif passport_details.get('gender_mrz','').upper() =='F':
2674
+ passport_details['gender_mrz'] = 'FEMALE'
2675
+
2676
+ if passport_details.get('gender','').upper() =='M':
2677
+ passport_details['gender'] = 'MALE'
2678
+ elif passport_details.get('gender','').upper() =='F':
2679
+ passport_details['gender'] = 'FEMALE'
3970
2680
 
3971
- try:
3972
- passport_details['passport_number'] = passport_details.get('mrz2')[0:9]
3973
- passport_details['id_number'] = passport_details['passport_number']
3974
- except KeyError:
3975
- passport_details['passport_number'] = ""
3976
- passport_details['id_number'] = ""
3977
-
2681
+ from idvpackage.ocr_utils import get_name_match_mrz
2682
+ passport_details['is_name_match_mrz'], passport_details['name_mrz'] = get_name_match_mrz(passport_details, "passport")
2683
+
3978
2684
  logging.info(f"----------------Passport details front: {json.dumps(passport_details, indent=2, ensure_ascii=False)}\n")
3979
- logging.info(
3980
- f"----------------Time taken for data formatting and final extraction passport: {time.time() - st} seconds\n"
3981
- )
2685
+ logging.info(f"----------------Time taken for OpenAI and final extraction passport: {time.time() - st} seconds\n")
3982
2686
 
3983
2687
  passport_data.update(passport_details)
3984
2688
 
3985
2689
  image = np.array(processed_passport)
3986
2690
 
3987
2691
  st = time.time()
3988
- ## TODO: doc_on_pp and detect_photo_on_screen for LBN
3989
- # doc_on_pp_result = document_on_printed_paper(image)
2692
+
3990
2693
  doc_on_pp_result = "clear"
3991
2694
  screenshot_result = detect_screenshot(self.client, passport)
3992
- # photo_on_screen_result = detect_photo_on_screen(self.client, passport)
3993
2695
  photo_on_screen_result = "clear"
3994
2696
  blurred, glare = self.get_blurred_and_glared_for_doc(image)
3995
2697
  valid_nationality_result = self.check_nationality_in_iso_list(
3996
2698
  passport_details.get("nationality")
3997
2699
  )
3998
- logging.info(
3999
- f"----------------Time taken for fraud detection attributes passport: {time.time() - st} seconds\n"
4000
- )
4001
-
2700
+ logging.info(f"----------------Time taken for fraud detection attributes passport: {time.time() - st} seconds\n")
4002
2701
  st = time.time()
4003
- front_face_locations, front_face_encodings = (
4004
- load_and_process_image_deepface(passport)
4005
- )
4006
- logging.info(
4007
- f"----------------Time taken for face extraction passport: {time.time() - st} seconds\n"
4008
- )
2702
+ front_face_locations, front_face_encodings = (load_and_process_image_deepface(passport))
2703
+ logging.info(f"----------------Time taken for face extraction passport: {time.time() - st} seconds\n")
2704
+
4009
2705
 
2706
+ if front_face_encodings is None or len(front_face_encodings) == 0:
2707
+ passport_data["error"] = "face_not_detected"
2708
+ logging.info("No face detected in passport image")
2709
+ return passport_data
2710
+
4010
2711
  front_face_locations_str = json.dumps(
4011
2712
  [tuple(face_loc) for face_loc in front_face_locations]
4012
2713
  )
@@ -4015,7 +2716,6 @@ class IdentityVerification:
4015
2716
  )
4016
2717
 
4017
2718
  passport_data_update = {
4018
- # 'back_tampered_result': tampered_result_back,
4019
2719
  "passport_data": "",
4020
2720
  "front_coloured": True,
4021
2721
  "back_coloured": True,
@@ -4045,9 +2745,8 @@ class IdentityVerification:
4045
2745
  for key, value in passport_data.items()
4046
2746
  if key in non_optional_keys and value == ""
4047
2747
  ]
4048
-
4049
2748
  if empty_string_keys:
4050
- passport_data["error"] = "covered_photo"
2749
+ passport_data["error"] = "missing_key_fields"
4051
2750
 
4052
2751
  except Exception as e:
4053
2752
  passport_data["error"] = "bad_image"
@@ -4060,58 +2759,23 @@ class IdentityVerification:
4060
2759
  not nationality and country == "JOR"
4061
2760
  ):
4062
2761
  logging.info("-------------Working on JOR Passport \n")
4063
- from idvpackage.ocr_utils import (
4064
- detect_photo_on_screen,
4065
- detect_screenshot,
4066
- document_on_printed_paper,
4067
- )
4068
- from idvpackage.jor_passport_extraction import get_response_from_openai_jor
4069
2762
  from idvpackage.common import load_and_process_image_deepface
4070
-
2763
+ from idvpackage.jor_passport_extraction import get_response_from_openai_jor
2764
+ from idvpackage.ocr_utils import detect_screenshot
2765
+
4071
2766
  passport_data = {"error": "", "doc_type": "passport"}
4072
-
4073
2767
  try:
4074
2768
  st = time.time()
4075
2769
  logging.info("Performing image conversion for passport")
4076
2770
  processed_passport = self.image_conversion(passport)
4077
- logging.info("Image conversion completed for passport")
4078
-
4079
- logging.info(
4080
- f"----------------Time taken for image conversion passport: {time.time() - st} seconds\n"
4081
- )
4082
-
4083
- # try:
4084
- # st = time.time()
4085
- # id_infos = self.get_ocr_results(processed_passport)
4086
- # passport_text = id_infos[0].description
4087
- # logging.info(f"Passport text extracted: {passport_text}")
4088
- # logging.info(
4089
- # f"----------------Time taken for vision passport: {time.time() - st} seconds\n"
4090
- # )
4091
- # except Exception as e:
4092
- # logging.error(f"Error during get_ocr_results: {e}")
4093
- # passport_text = ""
4094
- # pass
4095
-
4096
- # pattern4 = r"(Hashemite Kingdom of Jordan|Hashemite Kingdom|Jordan)"
4097
- # k = re.search(pattern4, passport_text, re.IGNORECASE)
4098
-
4099
- # if not k:
4100
- # passport_data["error"] = "not_passport"
4101
- # return passport_data
2771
+ logging.info(f"----------------Time taken for image conversion passport: {time.time() - st} seconds\n")
4102
2772
 
4103
2773
  logging.info("Performing OCR for passport using openai.......")
4104
2774
  st = time.time()
4105
- passport_details = get_response_from_openai_jor(
4106
- processed_passport, "first", nationality, self.openai_key
4107
- )
2775
+ passport_details = get_response_from_openai_jor(processed_passport, "first", nationality, self.openai_key)
4108
2776
 
4109
- logging.info(
4110
- f"----------------Passport details: {json.dumps(passport_details, indent=4, ensure_ascii=False)}\n"
4111
- )
4112
- logging.info(
4113
- f"----------------Time taken for openai final extraction passport: {time.time() - st} seconds\n"
4114
- )
2777
+ logging.info(f"----------------Passport details: {json.dumps(passport_details, indent=4, ensure_ascii=False)}\n")
2778
+ logging.info(f"----------------Time taken for openai final extraction passport: {time.time() - st} seconds\n")
4115
2779
 
4116
2780
  if (
4117
2781
  passport_details.get("header_verified", "")
@@ -4120,77 +2784,59 @@ class IdentityVerification:
4120
2784
  passport_data["error"] = "not_passport"
4121
2785
  return passport_data
4122
2786
 
2787
+ from idvpackage.ocr_utils import is_expired_id
2788
+ if is_expired_id(passport_details.get('expiry_date', None)):
2789
+ passport_data['error'] = 'expired_id'
2790
+ logging.info(f"ID is expired with expiry date: {passport_details.get('expiry_date','')}")
2791
+ return passport_data
4123
2792
 
4124
- # append mrz1 and mrz2 from passport_details and make it as list
2793
+ if passport_details.get('dob', None):
2794
+ from idvpackage.ocr_utils import is_age_18_above
2795
+ if not is_age_18_above(passport_details.get('dob', None)):
2796
+ passport_data['error'] = 'under_age'
2797
+ logging.info("Passport holder is under age")
2798
+ return passport_data
2799
+
2800
+ # append mrz1 and mrz2 from passport_details and make it as list
2801
+ passport_details['full_name'] = passport_details.get('first_name','') + ' ' + passport_details.get('last_name','')
4125
2802
  passport_details['mrz'] = [passport_details.get('mrz1','') + ' ' + passport_details.get('mrz2','')]
4126
2803
  passport_details['issuing_place'] = passport_details.get('place_of_issue','')
2804
+ passport_details['issue_date'] = passport_details.get('issuing_date','')
2805
+
4127
2806
  passport_data.update(passport_details)
4128
- passport_data['issue_date'] = passport_data.get('issuing_date','')
4129
-
2807
+
4130
2808
  from idvpackage.ocr_utils import validation_checks_passport
4131
-
4132
-
4133
2809
  validation_results = validation_checks_passport(passport_data, 'passport_number', 'JOR')
4134
2810
  logging.info(f"validation_results: {json.dumps(validation_results, ensure_ascii=False, indent=2)}")
4135
2811
  passport_data.update(validation_results)
4136
2812
  logging.info(f"Passport data after validation checks: {json.dumps(passport_data, ensure_ascii=False, indent=2)}")
4137
- #extract name from mrz1
4138
- # mrz1 = passport_details.get('mrz1', '')
4139
-
4140
2813
 
4141
- # logging.info(f"MRZ1 extracted: {mrz1}")
4142
- # if mrz1:
4143
- # try:
4144
- # mrz1 = mrz1[5:]
4145
- # logging.info(f"Processed MRZ1: {mrz1}")
4146
- # name_mrz = []
4147
- # for word in mrz1.split("<"):
4148
- # if word and word.isalpha():
4149
- # name_mrz.append(word)
4150
-
4151
- # passport_data['name_mrz'] = " ".join(name_mrz)
4152
- # logging.info(f"Name from MRZ1 parts: {name_mrz}")
4153
-
4154
- # name = passport_data.get("name", "")
4155
- # name = name.split(" ")
4156
- # from idvpackage.ocr_utils import get_name_match_mrz
4157
- # passport_data['is_name_match_mrz'] = get_name_match_mrz(name, name_mrz)
4158
- # logging.info(f"is_name_match_mrz: {passport_data['is_name_match_mrz']}")
4159
-
4160
- # except Exception as e:
4161
- # logging.info(f"Error in processing Name from MRZ1: {e}")
4162
- # passport_data['is_name_match_mrz'] = False
4163
- # pass
4164
2814
  from idvpackage.ocr_utils import get_name_match_mrz
4165
2815
  passport_data['is_name_match_mrz'], passport_data['name_mrz'] = get_name_match_mrz(passport_data, "passport")
4166
2816
 
4167
-
4168
-
4169
2817
  image = np.array(processed_passport)
4170
2818
 
4171
2819
  st = time.time()
4172
- ## TODO: doc_on_pp and detect_photo_on_screen for LBN
4173
- # doc_on_pp_result = document_on_printed_paper(image)
4174
2820
  doc_on_pp_result = "clear"
4175
2821
  screenshot_result = detect_screenshot(self.client, passport)
4176
- # photo_on_screen_result = detect_photo_on_screen(self.client, passport)
4177
2822
  photo_on_screen_result = "clear"
4178
2823
  blurred, glare = self.get_blurred_and_glared_for_doc(image)
4179
2824
  valid_nationality_result = self.check_nationality_in_iso_list(
4180
2825
  passport_details.get("nationality")
4181
2826
  )
4182
- logging.info(
4183
- f"----------------Time taken for fraud detection attributes passport: {time.time() - st} seconds\n"
4184
- )
2827
+ logging.info(f"----------------Time taken for fraud detection attributes passport: {time.time() - st} seconds\n")
4185
2828
 
4186
2829
  st = time.time()
4187
2830
  front_face_locations, front_face_encodings = (
4188
2831
  load_and_process_image_deepface(passport)
4189
2832
  )
4190
- logging.info(
4191
- f"----------------Time taken for face extraction passport: {time.time() - st} seconds\n"
4192
- )
2833
+ logging.info(f"----------------Time taken for face extraction passport: {time.time() - st} seconds\n")
4193
2834
 
2835
+ if front_face_encodings is None or len(front_face_encodings) == 0:
2836
+ passport_data["error"] = "face_not_detected"
2837
+ logging.info("No face detected in passport image")
2838
+ return passport_data
2839
+
4194
2840
  front_face_locations_str = json.dumps(
4195
2841
  [tuple(face_loc) for face_loc in front_face_locations]
4196
2842
  )
@@ -4199,7 +2845,6 @@ class IdentityVerification:
4199
2845
  )
4200
2846
 
4201
2847
  passport_data_update = {
4202
- # 'back_tampered_result': tampered_result_back,
4203
2848
  "passport_data": "",
4204
2849
  "front_coloured": True,
4205
2850
  "back_coloured": True,
@@ -4231,7 +2876,7 @@ class IdentityVerification:
4231
2876
  ]
4232
2877
 
4233
2878
  if empty_string_keys:
4234
- passport_data["error"] = "covered_photo"
2879
+ passport_data["error"] = "missing_key_fields"
4235
2880
 
4236
2881
  except Exception as e:
4237
2882
  passport_data["error"] = "bad_image"
@@ -4244,44 +2889,16 @@ class IdentityVerification:
4244
2889
  not nationality and country == "PSE"
4245
2890
  ):
4246
2891
  logging.info("-------------Working on PSE Passport \n")
4247
- from idvpackage.ocr_utils import (
4248
- detect_photo_on_screen,
4249
- detect_screenshot,
4250
- document_on_printed_paper,
4251
- )
4252
- # from idvpackage.pse_passport_extraction import palestine_passport_extraction
4253
- from idvpackage.pse_passport_extraction import get_response_from_openai_pse
4254
-
4255
2892
  from idvpackage.common import load_and_process_image_deepface
4256
-
2893
+ from idvpackage.ocr_utils import detect_screenshot
2894
+ from idvpackage.pse_passport_extraction import get_response_from_openai_pse
4257
2895
  passport_data = {"error": "", "doc_type": "passport"}
4258
2896
 
4259
2897
  try:
4260
2898
  st = time.time()
4261
2899
  processed_passport = self.image_conversion(passport)
4262
- # logging.info(
4263
- # f"----------------Time taken for image conversion passport: {time.time() - st} seconds\n"
4264
- # )
4265
-
4266
- # st = time.time()
4267
- # id_infos = self.get_ocr_results(processed_passport)
4268
- # passport_text = id_infos[0].description
4269
- # logging.info(
4270
- # f"----------------Time taken for vision passport: {time.time() - st} seconds\n"
4271
- # )
4272
-
4273
- # pattern4 = r"(PALESTINIAN AUTHORITY|PALESTINE|P<PSE|PSE)" # PASSPORT
4274
- # k = re.search(pattern4, passport_text, re.IGNORECASE)
4275
-
4276
- # if not k:
4277
- # passport_data["error"] = "not_passport"
4278
- # return passport_data
4279
-
4280
2900
  st = time.time()
4281
2901
  try:
4282
- # passport_details = palestine_passport_extraction(
4283
- # passport_text, self.gemini_key
4284
- # )
4285
2902
  passport_details = get_response_from_openai_pse(
4286
2903
  processed_passport,"first",nationality,self.openai_key
4287
2904
  )
@@ -4289,29 +2906,39 @@ class IdentityVerification:
4289
2906
  except Exception as e:
4290
2907
  logging.error(f"Error in PSE passport extraction: {e}")
4291
2908
  logging.info(
4292
- f"----------------Time taken for data formatting and final extraction passport: {time.time() - st} seconds\n"
2909
+ f"----------------Time taken for OpenAI and final extraction passport: {time.time() - st} seconds\n"
4293
2910
  )
4294
2911
 
4295
2912
  if passport_details.get("header_verified", "") == "False":
4296
2913
  passport_data["error"] = "not_passport"
4297
2914
  return passport_data
4298
2915
 
2916
+ if passport_details.get('dob', None):
2917
+ from idvpackage.ocr_utils import is_age_18_above
2918
+ if not is_age_18_above(passport_details.get('dob', None)):
2919
+ passport_data['error'] = 'under_age'
2920
+ logging.info("Passport holder is under age")
2921
+ return passport_data
2922
+
2923
+ from idvpackage.ocr_utils import is_expired_id
2924
+ if is_expired_id(passport_details.get('expiry_date', None)):
2925
+ passport_data['error'] = 'expired_id'
2926
+ logging.info(f"ID is expired with expiry date: {passport_details.get('expiry_date','')}")
2927
+ return passport_data
2928
+
4299
2929
  passport_details['nationality'] = 'PSE'
4300
-
2930
+ passport_details['full_name_openai'] = passport_details.get('full_name','')
2931
+ passport_details['full_name'] = passport_details.get('first_name','') + ' ' + passport_details.get('last_name','')
4301
2932
  passport_details['mrz'] = passport_details.get('mrz1','') + passport_details.get('mrz2','')
2933
+ passport_details['issue_date'] = passport_details.get('issuing_date','')
2934
+
4302
2935
 
4303
2936
  passport_data.update(passport_details)
4304
-
4305
-
4306
-
4307
2937
  image = np.array(processed_passport)
4308
2938
 
4309
2939
  st = time.time()
4310
- ## TODO: doc_on_pp and detect_photo_on_screen for LBN
4311
- # doc_on_pp_result = document_on_printed_paper(image)
4312
2940
  doc_on_pp_result = "clear"
4313
2941
  screenshot_result = detect_screenshot(self.client, passport)
4314
- # photo_on_screen_result = detect_photo_on_screen(self.client, passport)
4315
2942
  photo_on_screen_result = "clear"
4316
2943
  blurred, glare = self.get_blurred_and_glared_for_doc(image)
4317
2944
  valid_nationality_result = self.check_nationality_in_iso_list(
@@ -4320,7 +2947,6 @@ class IdentityVerification:
4320
2947
  logging.info(
4321
2948
  f"----------------Time taken for fraud detection attributes passport: {time.time() - st} seconds\n"
4322
2949
  )
4323
-
4324
2950
  st = time.time()
4325
2951
  front_face_locations, front_face_encodings = (
4326
2952
  load_and_process_image_deepface(passport)
@@ -4328,7 +2954,12 @@ class IdentityVerification:
4328
2954
  logging.info(
4329
2955
  f"----------------Time taken for face extraction passport: {time.time() - st} seconds\n"
4330
2956
  )
4331
-
2957
+
2958
+ if front_face_encodings is None or len(front_face_encodings) == 0:
2959
+ passport_data["error"] = "face_not_detected"
2960
+ logging.info("No face detected in passport image")
2961
+ return passport_data
2962
+
4332
2963
  front_face_locations_str = json.dumps(
4333
2964
  [tuple(face_loc) for face_loc in front_face_locations]
4334
2965
  )
@@ -4337,7 +2968,6 @@ class IdentityVerification:
4337
2968
  )
4338
2969
 
4339
2970
  passport_data_update = {
4340
- # 'back_tampered_result': tampered_result_back,
4341
2971
  "passport_data": "",
4342
2972
  "front_coloured": True,
4343
2973
  "back_coloured": True,
@@ -4361,15 +2991,23 @@ class IdentityVerification:
4361
2991
 
4362
2992
  passport_data.update(passport_data_update)
4363
2993
 
2994
+ from idvpackage.ocr_utils import validation_checks_passport
2995
+ validation_results = validation_checks_passport(passport_details, 'id_number', 'PSE')
2996
+ logging.info(f"validation_results: {json.dumps(validation_results, ensure_ascii=False, indent=2)}")
2997
+ passport_data.update(validation_results)
2998
+
2999
+ from idvpackage.ocr_utils import get_name_match_mrz
3000
+ passport_data['is_name_match_mrz'], passport_data['name_mrz'] = get_name_match_mrz(passport_data, "passport")
3001
+
3002
+
4364
3003
  non_optional_keys = ["passport_number", "dob", "expiry_date", "gender"]
4365
3004
  empty_string_keys = [
4366
3005
  key
4367
3006
  for key, value in passport_data.items()
4368
3007
  if key in non_optional_keys and value == ""
4369
3008
  ]
4370
-
4371
3009
  if empty_string_keys:
4372
- passport_data["error"] = "covered_photo"
3010
+ passport_data["error"] = "missing_key_fields"
4373
3011
 
4374
3012
  except Exception as e:
4375
3013
  passport_data["error"] = "bad_image"
@@ -4377,11 +3015,10 @@ class IdentityVerification:
4377
3015
 
4378
3016
  return passport_data
4379
3017
 
4380
- def exract_passport_info_back(self, passport, country, nationality, step_data=None):
3018
+ def extract_passport_info_back(self, passport, country, nationality, step_data=None):
4381
3019
  if (nationality and nationality == "SYR") or (
4382
3020
  not nationality and country == "SYR"
4383
3021
  ):
4384
- # from idvpackage.syr_passport_extraction import syr_passport_extraction_back
4385
3022
  from idvpackage.syr_passport_extraction import get_response_from_openai_syr
4386
3023
 
4387
3024
  passport_data = {"error": "", "doc_type": "passport"}
@@ -4389,34 +3026,33 @@ class IdentityVerification:
4389
3026
  try:
4390
3027
  st = time.time()
4391
3028
  processed_passport = self.image_conversion(passport)
4392
- # logging.info(
4393
- # f"----------------Time taken for image conversion passport: {time.time() - st} seconds\n"
4394
- # )
4395
-
4396
- # st = time.time()
4397
- # id_infos = self.get_ocr_results(processed_passport)
4398
- # passport_text = id_infos[0].description
4399
- # logging.info(
4400
- # f"----------------Time taken for vision passport: {time.time() - st} seconds\n"
4401
- # )
4402
-
4403
- # # print(f"\nPassport text: {passport_text}\n")
4404
-
3029
+ logging.info(f"----------------Time taken for image conversion passport: {time.time() - st} seconds\n")
3030
+
4405
3031
  st = time.time()
4406
- # passport_details = syr_passport_extraction_back(
4407
- # passport_text, self.gemini_key
4408
- # )
4409
3032
  passport_details = get_response_from_openai_syr(processed_passport, "page2", country, self.openai_key)
4410
-
4411
3033
  logging.info(f"----------------Passport details back: {json.dumps(passport_details, indent=2, ensure_ascii=False)}\n")
4412
- logging.info(
4413
- f"----------------Time taken for data formatting and final extraction passport: {time.time() - st} seconds\n"
4414
- )
3034
+ logging.info(f"----------------Time taken for OpenAI and final extraction passport: {time.time() - st} seconds\n")
4415
3035
 
4416
3036
  passport_details['issuing_date'] = passport_details.get('issue_date','')
4417
3037
  passport_data.update(passport_details)
4418
-
4419
-
3038
+
3039
+ from idvpackage.ocr_utils import is_expired_id
3040
+ if is_expired_id(passport_details.get('expiry_date', None)):
3041
+ passport_data['error'] = 'expired_id'
3042
+ logging.info(f"ID is expired with expiry date: {passport_details.get('expiry_date','')}")
3043
+ return passport_data
3044
+
3045
+ temp_data = {}
3046
+ if step_data:
3047
+ temp_data.update(step_data)
3048
+ temp_data.update(passport_details)
3049
+
3050
+ logging.info(f"temp_data for validation: {json.dumps(temp_data, ensure_ascii=False, indent=2)}")
3051
+ from idvpackage.ocr_utils import validation_checks_passport
3052
+ validation_results = validation_checks_passport(temp_data, 'passport_number', 'SYR')
3053
+ logging.info(f"validation_results: {json.dumps(validation_results, ensure_ascii=False, indent=2)}")
3054
+ passport_data.update(validation_results)
3055
+
4420
3056
  non_optional_keys = ["issuing_date", "expiry_date"]
4421
3057
  empty_string_keys = [
4422
3058
  key
@@ -4425,7 +3061,7 @@ class IdentityVerification:
4425
3061
  ]
4426
3062
 
4427
3063
  if empty_string_keys:
4428
- passport_data["error"] = "covered_photo"
3064
+ passport_data["error"] = "missing_key_fields"
4429
3065
 
4430
3066
  except Exception as e:
4431
3067
  passport_data["error"] = "bad_image"
@@ -4433,25 +3069,8 @@ class IdentityVerification:
4433
3069
 
4434
3070
  return passport_data
4435
3071
 
4436
- def replace_keywords(self, report, target="consider", replacement="clear"):
4437
- if isinstance(report, dict):
4438
- # Iterate through dictionary items
4439
- for key, value in report.items():
4440
- # If the value matches the target, replace it
4441
- if value == target:
4442
- report[key] = replacement
4443
- else:
4444
- # Recursively call for nested dictionaries or lists
4445
- self.replace_keywords(value, target, replacement)
4446
- elif isinstance(report, list):
4447
- # If the report is a list, iterate through elements
4448
- for index, item in enumerate(report):
4449
- # Recursively call for each element
4450
- self.replace_keywords(item, target, replacement)
4451
-
4452
3072
  def extract_ocr_info(self, data, video, country, report_names, back_img=None):
4453
3073
 
4454
-
4455
3074
  try:
4456
3075
  if data.get("uae_pass_data", ""):
4457
3076
  uae_pass_data = data.get("uae_pass_data", {})
@@ -4505,11 +3124,6 @@ class IdentityVerification:
4505
3124
  if not data.get("gender") and data.get("gender_back"):
4506
3125
  data["gender"] = data.get("gender_back")
4507
3126
 
4508
- # validation_result = self.validate_fields_id(data, country)
4509
- # if not validation_result:
4510
- # tampering_result = 'consider'
4511
- # data['tampering_result'] = tampering_result
4512
-
4513
3127
  colour_picture = "consider"
4514
3128
  if data.get("front_coloured") and data.get("back_coloured"):
4515
3129
  colour_picture = "clear"
@@ -4553,7 +3167,7 @@ class IdentityVerification:
4553
3167
  selfie, front_face_locations, front_face_encodings
4554
3168
  )
4555
3169
  except Exception as e:
4556
- print("issue in extracting face and computing similarity")
3170
+ logging.info("issue in extracting face and computing similarity")
4557
3171
  selfie = None
4558
3172
  similarity = 0
4559
3173
 
@@ -4561,8 +3175,6 @@ class IdentityVerification:
4561
3175
  selfie = None
4562
3176
  similarity = 0
4563
3177
 
4564
- # front_face_locations, front_face_encodings = data.get('front_face_locations'), data.get('front_face_encodings')
4565
- # processed_selfie = self.process_image(selfie)
4566
3178
  if country == "SAU" or data.get("doc_type") == "passport":
4567
3179
  back_id_text = ""
4568
3180
  else:
@@ -4599,7 +3211,7 @@ class IdentityVerification:
4599
3211
  back_img,
4600
3212
  )
4601
3213
  else:
4602
- print(
3214
+ logging.info(
4603
3215
  f"\nNationality not present, picking country: {country} for generating Document Report"
4604
3216
  )
4605
3217
  document_report = form_final_data_document_report(
@@ -4627,25 +3239,18 @@ class IdentityVerification:
4627
3239
  if country == "IRQ" and nationality in ["SDN"]:
4628
3240
  document_report["properties"].pop("passport_number_mrz", None)
4629
3241
 
4630
- if "facial_similarity_video" in report_names:
3242
+ if 'facial_similarity_video' in report_names:
4631
3243
  if video:
4632
3244
  logging.info("--------------Checking for liveness-----------\n")
4633
- liveness_result = self.check_for_liveness(
4634
- similarity, video, face_match_threshold
4635
- )
4636
- logging.info(
4637
- f"--------------Liveness Result from portal video: {liveness_result}\n"
4638
- )
3245
+ liveness_result = self.check_for_liveness(video)
3246
+ logging.info(f"--------------Liveness Result from portal video: {liveness_result}\n")
4639
3247
  if country == "IRQ":
4640
3248
  liveness_result = "clear"
4641
- # print(f"LIVE RES: {liveness_result}")
4642
3249
  else:
4643
3250
  liveness_result = None
4644
3251
 
4645
3252
  if nationality:
4646
- print(
4647
- f"\nNationality present, pikcing nationality: {nationality} for generating Video Report"
4648
- )
3253
+ logging.info(f"Nationality present, picking nationality: {nationality} for generating Video Report")
4649
3254
  facial_report = form_final_facial_similarity_report(
4650
3255
  data,
4651
3256
  selfie,
@@ -4655,9 +3260,7 @@ class IdentityVerification:
4655
3260
  nationality,
4656
3261
  )
4657
3262
  else:
4658
- print(
4659
- f"\nNationality not present, pikcing country: {country} for generating Document Report"
4660
- )
3263
+ logging.info(f"Nationality not present, picking country: {country} for generating Video Report")
4661
3264
  facial_report = form_final_facial_similarity_report(
4662
3265
  data,
4663
3266
  selfie,
@@ -4667,10 +3270,7 @@ class IdentityVerification:
4667
3270
  country,
4668
3271
  )
4669
3272
 
4670
- logging.info(
4671
- f"--------------Time taken for Extract Ocr Function in IDV package: {time.time() - st} seconds\n"
4672
- )
4673
-
3273
+ logging.info(f"--------------Time taken for Extract Ocr Function in IDV package: {time.time() - st} seconds\n")
4674
3274
  return document_report, facial_report
4675
3275
 
4676
3276
  except Exception as e:
@@ -4678,15 +3278,13 @@ class IdentityVerification:
4678
3278
  raise Exception("Error occurred in extract_ocr_info")
4679
3279
 
4680
3280
  def generate_facial_report_portal(self, data, latest_portal_video):
4681
- from idvpackage.ocr_utils import form_final_facial_similarity_report
4682
3281
  from idvpackage.common import load_and_process_image_deepface_topup
3282
+ from idvpackage.ocr_utils import form_final_facial_similarity_report
4683
3283
 
4684
3284
  try:
4685
3285
  if latest_portal_video:
4686
3286
  logging.info("--------------Checking for liveness-----------\n")
4687
- latest_vid_liveness_result = self.check_for_liveness(
4688
- 0, latest_portal_video
4689
- )
3287
+ latest_vid_liveness_result = self.check_for_liveness(latest_portal_video)
4690
3288
  logging.info(
4691
3289
  f"--------------Liveness Result from portal video: {latest_vid_liveness_result}\n"
4692
3290
  )
@@ -4703,10 +3301,7 @@ class IdentityVerification:
4703
3301
  else:
4704
3302
  latest_selfie = latest_selfie_str
4705
3303
 
4706
-
4707
-
4708
3304
  onboarding_face_encodings = load_and_process_image_deepface_topup(initial_selfie)
4709
-
4710
3305
  top_up_face_encodings = load_and_process_image_deepface_topup(latest_selfie)
4711
3306
 
4712
3307
  if not onboarding_face_encodings:
@@ -4719,13 +3314,9 @@ class IdentityVerification:
4719
3314
  similarity_score = self.calculate_similarity(
4720
3315
  onboarding_face_encodings[0], top_up_face_encodings[0]
4721
3316
  )
4722
-
4723
3317
  similarity_score = min(1, similarity_score)
4724
3318
 
4725
- logging.info(
4726
- f"--------------Face Similarity Computed: {similarity_score}\n"
4727
- )
4728
-
3319
+ logging.info(f"--------------Face Similarity Computed: {similarity_score}\n")
4729
3320
 
4730
3321
  facial_report = form_final_facial_similarity_report(
4731
3322
  data,
@@ -4737,15 +3328,8 @@ class IdentityVerification:
4737
3328
  )
4738
3329
 
4739
3330
  logging.info(f"Facial Report: {facial_report}")
4740
-
4741
3331
  return facial_report
4742
3332
 
4743
3333
  except Exception as e:
4744
- logging.info(
4745
- f"--------------Error occurred in generate_facial_report_portal: {e}\n"
4746
- )
4747
- raise Exception("Error occurred in generate_facial_report for portal")
4748
-
4749
-
4750
-
4751
-
3334
+ logging.info(f"--------------Error occurred in generate_facial_report_portal: {e}\n")
3335
+ raise Exception("Error occurred in generate_facial_report for portal")