document-analyzer 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,31 @@
1
+ from .analyzers import (
2
+ DocumentAnalyzer,
3
+ CedulaAnalyzer,
4
+ PassportAnalyzer,
5
+ analyze_document,
6
+ analyze_cedula,
7
+ analyze_passport,
8
+ )
9
+ from .services import PaddleOCRService
10
+ from .startup import startup_services
11
+ from .config import (
12
+ DocumentAnalyzerLoggerAdapter,
13
+ logger,
14
+ )
15
+
16
+ __all__ = [
17
+ # Core analyzers
18
+ "DocumentAnalyzer",
19
+ "CedulaAnalyzer",
20
+ "PassportAnalyzer",
21
+ # Analyzer convenience functions
22
+ "analyze_document",
23
+ "analyze_cedula",
24
+ "analyze_passport",
25
+ # Services
26
+ "PaddleOCRService",
27
+ "startup_services",
28
+ # Logging
29
+ "DocumentAnalyzerLoggerAdapter",
30
+ "logger",
31
+ ]
@@ -0,0 +1,5 @@
1
+ import sys
2
+ from .cli import main
3
+
4
+ if __name__ == "__main__":
5
+ sys.exit(main())
@@ -0,0 +1,14 @@
1
+ from .document_analyzer import DocumentAnalyzer, analyze_document
2
+ from .cedula_analyzer import CedulaAnalyzer, analyze_cedula
3
+ from .passport_analyzer import PassportAnalyzer, analyze_passport
4
+
5
+ __all__ = [
6
+ # Core analyzers
7
+ "DocumentAnalyzer",
8
+ "CedulaAnalyzer",
9
+ "PassportAnalyzer",
10
+ # Convenience functions
11
+ "analyze_document",
12
+ "analyze_cedula",
13
+ "analyze_passport",
14
+ ]
@@ -0,0 +1,412 @@
1
+ import re
2
+ import cv2
3
+ import time
4
+ import base64
5
+ import numpy as np
6
+
7
+ from ..config import DocumentAnalyzerLoggerAdapter, logger
8
+ from ..utils import (
9
+ ensure_bytesio,
10
+ preprocess_image,
11
+ identify_signature_box,
12
+ extract_signature_image,
13
+ extract_data_with_boxes,
14
+ convert_spanish_date_to_english,
15
+ )
16
+
17
+ # Start and end messages for logging
18
+ START_MSG = "======= CedulaAnalyzer Started ======="
19
+ END_MSG = "======= CedulaAnalyzer Ended ======="
20
+ ERROR_END_MSG = "======= CedulaAnalyzer Ended With Error ======="
21
+
22
+
23
+ class CedulaAnalyzer:
24
+ """A comprehensive analyzer for Panamanian cédula documents.
25
+
26
+ This class provides complete functionality for analyzing Panamanian identity
27
+ cards (cédulas) including OCR text extraction, field parsing, and signature
28
+ detection and extraction.
29
+
30
+ The analyzer can process various image formats and extract key information:
31
+ - Personal details (dates, places, nationality)
32
+ - Document identifiers (ID numbers, expiry dates)
33
+ - Handwritten signatures (detection and image extraction)
34
+
35
+ Attributes:
36
+ user_email (str): Optional user email for logging context.
37
+ logger (DocumentAnalyzerLoggerAdapter): Custom logger with user context.
38
+ cedula_stream (BytesIO): Image data in BytesIO format.
39
+ cedula_np (np.ndarray): Image data as OpenCV-compatible numpy array.
40
+ ocr (PaddleOCR): OCR engine instance configured for Spanish text.
41
+
42
+ Examples:
43
+ >>> analyzer = CedulaAnalyzer("cedula_image.jpg", "user@example.com")
44
+ >>> results = analyzer.analyze_cedula()
45
+ >>> print(results['cedula_info']['id_number'])
46
+ >>> # Using with file-like object
47
+ >>> with open("cedula.jpg", "rb") as f:
48
+ ... analyzer = CedulaAnalyzer(f)
49
+ ... results = analyzer.analyze_cedula()
50
+ """
51
+
52
+ def __init__(
53
+ self,
54
+ cedula_file,
55
+ user_email=None,
56
+ ocr_instance=None,
57
+ normalize_input=True,
58
+ preprocess_image=True,
59
+ ):
60
+ """Initialize the CedulaAnalyzer with an image file.
61
+
62
+ Args:
63
+ cedula_file: Input cédula image in various formats:
64
+ - File path (str)
65
+ - File-like object (Django upload, etc.)
66
+ - BytesIO object
67
+ user_email (str, optional): User email for logging context.
68
+ Helps track analysis requests in logs.
69
+ ocr_instance (PaddleOCR, optional): Pre-initialized PaddleOCR instance.
70
+ If not provided, will use default Spanish 'es' model and if not
71
+ initialized, will create a new instance.
72
+
73
+ Raises:
74
+ ValueError: If image cannot be decoded or is corrupted.
75
+ IOError: If file path cannot be read.
76
+ Exception: For other initialization errors (logged and re-raised).
77
+
78
+ Note:
79
+ The image is automatically converted to OpenCV BGR format for
80
+ consistent processing regardless of input format.
81
+ """
82
+ self.start_time = time.time()
83
+
84
+ # Custom logger adapter
85
+ self.logger = DocumentAnalyzerLoggerAdapter(logger, {"user_email": user_email})
86
+
87
+ try:
88
+ # Convert to BytesIO (if not already)
89
+ cedula_stream = ensure_bytesio(cedula_file)
90
+ # Read image bytes into OpenCV-compatible format (BGR)
91
+ self.cedula_np = cv2.imdecode(
92
+ np.frombuffer(cedula_stream.read(), np.uint8), cv2.IMREAD_COLOR
93
+ )
94
+
95
+ if self.cedula_np is None:
96
+ raise ValueError(
97
+ "Could not decode image - invalid format or corrupted file"
98
+ )
99
+ except Exception as e:
100
+ self.logger.error(f"Failed to load input file: {e}")
101
+ raise
102
+
103
+ # Use provided OCR instance or get default
104
+ if ocr_instance is not None:
105
+ self.ocr = ocr_instance
106
+ self.logger.debug("Using provided PaddleOCR instance for cedula analysis")
107
+ else:
108
+ from ..services.paddleocr_service import PaddleOCRService
109
+
110
+ self.ocr = PaddleOCRService.get_instance("es")
111
+ self.logger.debug(
112
+ "Using default Spanish 'es' PaddleOCR instance for cedula analysis"
113
+ )
114
+
115
+ def parse_cedula_information(self, extracted_data):
116
+ """Parse required cédula fields from OCR extracted data.
117
+
118
+ Extracts and parses specific information fields from the OCR text data
119
+ including dates, places, nationality, and identification numbers using
120
+ pattern matching and contextual analysis.
121
+
122
+ Args:
123
+ extracted_data (list): List of text data dictionaries from OCR
124
+ extraction, each containing text, bbox,
125
+ confidence, and position information.
126
+
127
+ Returns:
128
+ dict: Dictionary containing parsed cédula information with keys:
129
+ - fecha_nacimiento (str): Birth date in DD-MMM-YYYY format
130
+ - lugar_nacimiento (str): Place of birth
131
+ - nacionalidad (str): Nationality
132
+ - fecha_expiracion (str): Document expiry date
133
+ - cedula_number (str): ID number in X-XXX-XXXX format
134
+
135
+ Note:
136
+ - Dates are kept in original Spanish format (e.g., "AGO" for August)
137
+ - Text patterns are matched case-insensitively
138
+ - Contextual clues help disambiguate similar patterns
139
+ - Empty strings returned for fields that cannot be found
140
+
141
+ Examples:
142
+ >>> extracted = analyzer.extract_data_with_boxes(image)
143
+ >>> info = analyzer.parse_cedula_information(extracted)
144
+ >>> print(info['cedula_number']) # e.g., "1-234-5678"
145
+ >>> print(info['fecha_nacimiento']) # e.g., "14-AGO-1947"
146
+ """
147
+ self.logger.debug("Starting cedula information parsing")
148
+
149
+ cedula_info = {
150
+ "fecha_nacimiento": "", # Date of birth
151
+ "lugar_nacimiento": "", # Place of birth
152
+ "nacionalidad": "", # Nationality
153
+ "fecha_expiracion": "", # Expiry date
154
+ "cedula_number": "", # ID number
155
+ }
156
+
157
+ for item in extracted_data:
158
+ text = item["text"].strip().upper()
159
+ original_text = item["text"].strip()
160
+
161
+ # Skip very short text
162
+ if len(text) < 2:
163
+ continue
164
+
165
+ # Parse cédula number (flexible format: [A-Z or digits]-[digits]-[digits])
166
+ cedula_match = re.search(r"([A-Z]+|\d+)-(\d+)-(\d+)", original_text.upper())
167
+ if cedula_match and not cedula_info["cedula_number"]:
168
+ cedula_info["cedula_number"] = (
169
+ f"{cedula_match.group(1)}-{cedula_match.group(2)}-{cedula_match.group(3)}"
170
+ )
171
+ self.logger.info(
172
+ f"Found cedula number: '{cedula_info['cedula_number']}'"
173
+ )
174
+
175
+ # Parse dates (format: DD-MMM-YYYY, e.g., 14-AGO-1947)
176
+ date_match = re.search(r"(\d{2}-\w{3}-\d{4})", original_text)
177
+ if date_match:
178
+ date_str = date_match.group(1)
179
+
180
+ # Determine which date it is based on the context
181
+ if "NACIMIENTO" in text:
182
+ cedula_info["fecha_nacimiento"] = date_str
183
+ self.logger.info(f"Found birth date: '{date_str}'")
184
+ elif "EXPIRA" in text:
185
+ cedula_info["fecha_expiracion"] = date_str
186
+ self.logger.info(f"Found expiry date: '{date_str}'")
187
+ else:
188
+ # If we don't have birth date yet, assume this is the birth date
189
+ # Otherwise, assume it's expiry date
190
+ if not cedula_info["fecha_nacimiento"]:
191
+ cedula_info["fecha_nacimiento"] = date_str
192
+ self.logger.info(f"Assumed birth date: '{date_str}'")
193
+ elif not cedula_info["fecha_expiracion"]:
194
+ cedula_info["fecha_expiracion"] = date_str
195
+ self.logger.info(f"Assumed expiry date: '{date_str}'")
196
+
197
+ # Parse place of birth (look for "LUGAR DE NACIMIENTO:" pattern)
198
+ if "LUGAR DE NACIMIENTO:" in text and not cedula_info["lugar_nacimiento"]:
199
+ place = original_text.replace("LUGAR DE NACIMIENTO:", "").strip()
200
+ if place:
201
+ cedula_info["lugar_nacimiento"] = place
202
+ self.logger.info(f"Found place of birth: '{place}'")
203
+ elif (
204
+ text.startswith("LUGAR DE NACIMIENTO:")
205
+ and not cedula_info["lugar_nacimiento"]
206
+ ):
207
+ # Handle cases where the label might be at the start
208
+ place = text.replace("LUGAR DE NACIMIENTO:", "").strip()
209
+ if place:
210
+ cedula_info["lugar_nacimiento"] = place
211
+ self.logger.info(f"Found place of birth: '{place}'")
212
+
213
+ # Parse nationality (look for "NACIONALIDAD:" pattern)
214
+ if "NACIONALIDAD:" in text and not cedula_info["nacionalidad"]:
215
+ nationality = original_text.replace("NACIONALIDAD:", "").strip()
216
+ if nationality:
217
+ cedula_info["nacionalidad"] = nationality
218
+ self.logger.info(f"Found nationality: '{nationality}'")
219
+ elif text.startswith("NACIONALIDAD:") and not cedula_info["nacionalidad"]:
220
+ # Handle cases where the lable might be at the start
221
+ nationality = text.replace("NACIONALIDAD:", "").strip()
222
+ if nationality:
223
+ cedula_info["nacionalidad"] = nationality
224
+ self.logger.info(f"Found nationality: '{nationality}'")
225
+
226
+ self.logger.debug(f"Parsed cedula info: {cedula_info}")
227
+
228
+ return cedula_info
229
+
230
+ def analyze_cedula(self):
231
+ """Main function to analyze a cédula image.
232
+
233
+ Orchestrates the complete analysis pipeline including image preprocessing,
234
+ OCR text extraction, information parsing, signature detection and extraction,
235
+ and result compilation. This is the primary entry point for cédula analysis.
236
+
237
+ Returns:
238
+ dict: Complete analysis results containing:
239
+ - success (str): Analysis status - "both", "cedula_info",
240
+ "signature", or "none"
241
+ - cedula_info (dict): Parsed document information with English dates
242
+ - signature (str): Base64-encoded signature image or None
243
+ - raw_extracted_data (list): OCR results for debugging
244
+ - error (str): Error message if analysis fails
245
+
246
+ Note:
247
+ Success status indicates what information was successfully extracted:
248
+ - "both": All required fields AND signature extracted
249
+ - "cedula_info": All required fields but no signature
250
+ - "signature": Signature found but missing required fields
251
+ - "none": Neither complete info nor signature extracted
252
+
253
+ Raises:
254
+ Exception: Caught internally and returned in error field of result dict.
255
+
256
+ Examples:
257
+ >>> analyzer = CedulaAnalyzer("cedula.jpg")
258
+ >>> result = analyzer.analyze_cedula()
259
+ >>> if result['success'] == 'both':
260
+ ... print("Complete analysis successful")
261
+ ... print(f"ID: {result['cedula_info']['id_number']}")
262
+ ... print(f"DOB: {result['cedula_info']['dob']}")
263
+ ... # Save signature
264
+ ... with open("signature.png", "wb") as f:
265
+ ... f.write(base64.b64decode(result['signature']))
266
+ """
267
+ try:
268
+ self.logger.info(START_MSG)
269
+
270
+ # Load image if path is provided
271
+ image_path_or_array = self.cedula_np
272
+ if isinstance(image_path_or_array, str):
273
+ self.logger.debug(f"Loading image from path: '{image_path_or_array}'")
274
+ image = cv2.imread(image_path_or_array)
275
+ if image is None:
276
+ self.logger.error(
277
+ f"Couldn't load image from '{image_path_or_array}'"
278
+ )
279
+ raise ValueError(
280
+ f"Couldn't load image from '{image_path_or_array}'"
281
+ )
282
+ else:
283
+ self.logger.debug("Using provided image array")
284
+ image = image_path_or_array.copy()
285
+
286
+ # Preprocess the image
287
+ processed_image = preprocess_image(image, logger=self.logger)
288
+
289
+ # Extract data with bounding boxes
290
+ extracted_data = extract_data_with_boxes(
291
+ processed_image, ocr=self.ocr, logger=self.logger
292
+ )
293
+
294
+ if not extracted_data:
295
+ self.logger.warning("Couldn't extract data from the cedula image")
296
+ return {
297
+ "success": "none",
298
+ "cedula_info": {},
299
+ "signature": None,
300
+ "raw_extracted_data": [],
301
+ }
302
+
303
+ self.logger.info(f"Extracted {len(extracted_data)} data boxes")
304
+
305
+ # Parse cédula information
306
+ raw_cedula_info = self.parse_cedula_information(extracted_data)
307
+
308
+ # Convert to desired field names
309
+ cedula_info = {
310
+ "type": "cedula",
311
+ "dob": convert_spanish_date_to_english(
312
+ raw_cedula_info.get("fecha_nacimiento", "")
313
+ ),
314
+ "pob": raw_cedula_info.get("lugar_nacimiento", ""),
315
+ "nationality": raw_cedula_info.get("nacionalidad", ""),
316
+ "expiry": convert_spanish_date_to_english(
317
+ raw_cedula_info.get("fecha_expiracion", "")
318
+ ),
319
+ "id_number": raw_cedula_info.get("cedula_number", ""),
320
+ }
321
+
322
+ # Identify and extract signature using EXPIRA-based method
323
+ signature_box = identify_signature_box(
324
+ extracted_data, image.shape, logger=self.logger
325
+ )
326
+ signature_base64 = None
327
+
328
+ if signature_box:
329
+ self.logger.info(f"Signature box identified")
330
+ signature_image = extract_signature_image(
331
+ image, signature_box, logger=self.logger
332
+ )
333
+ if signature_image is not None:
334
+ # Convert signature image to base64
335
+ try:
336
+ _, buffer = cv2.imencode(".png", signature_image)
337
+ signature_base64 = base64.b64encode(buffer).decode("utf-8")
338
+ self.logger.debug("Signature converted to base64")
339
+ except Exception as e:
340
+ self.logger.error(
341
+ f"Failed to convert signature to base64: {str(e)}"
342
+ )
343
+ signature_base64 = None
344
+ else:
345
+ self.logger.warning("Failed to extract signature image")
346
+ else:
347
+ self.logger.warning("No signature box identified")
348
+
349
+ # Determine success status
350
+ has_all_cedula_fields = all(
351
+ [
352
+ cedula_info["dob"],
353
+ cedula_info["pob"],
354
+ cedula_info["expiry"],
355
+ cedula_info["id_number"],
356
+ ]
357
+ )
358
+ has_signature = signature_base64 is not None
359
+
360
+ if has_all_cedula_fields and has_signature:
361
+ success_status = "both"
362
+ elif has_all_cedula_fields:
363
+ success_status = "cedula_info"
364
+ elif has_signature:
365
+ success_status = "signature"
366
+ else:
367
+ success_status = "none"
368
+
369
+ self.logger.info(
370
+ f"Success: '{success_status.capitalize()}' | Date of Birth: '{cedula_info['dob']}' "
371
+ f"| Place of Birth: '{cedula_info['pob']}' | Nationality: '{cedula_info['nationality']}' "
372
+ f"| Expiry: '{cedula_info['expiry']}' | ID Number: '{cedula_info['id_number']}' "
373
+ f"| Signature: '{signature_base64}'"
374
+ )
375
+
376
+ end_time = time.time()
377
+ elapsed_time = end_time - self.start_time
378
+ self.logger.info(f"Cedula analysis took: {elapsed_time:.2f} seconds")
379
+ self.logger.info(END_MSG)
380
+
381
+ return {
382
+ "success": success_status,
383
+ "cedula_info": cedula_info,
384
+ "signature": signature_base64,
385
+ "raw_extracted_data": extracted_data, # For debugging
386
+ }
387
+
388
+ except Exception as e:
389
+ self.logger.error(f"Error in CedulaAnalyzer: {str(e)}")
390
+ self.logger.info(ERROR_END_MSG)
391
+ return {
392
+ "success": "none",
393
+ "cedula_info": {},
394
+ "signature": None,
395
+ "raw_extracted_data": [],
396
+ }
397
+
398
+
399
+ # Convenience function for easy import and use
400
+ def analyze_cedula(cedula_file, user_email=None, ocr_instance=None):
401
+ """Convenience function for cedula analysis using CedulaAnalyzer.
402
+
403
+ Args:
404
+ cedula_file: Input cedula image.
405
+ user_email (str, optional): User email for logging.
406
+ ocr_instance (PaddleOCR, optional): PaddleOCR instance.
407
+
408
+ Returns:
409
+ dict: Analysis results.
410
+ """
411
+ analyzer = CedulaAnalyzer(cedula_file, user_email, ocr_instance)
412
+ return analyzer.analyze_cedula()
@@ -0,0 +1,187 @@
1
+ import cv2
2
+ import time
3
+ import numpy as np
4
+
5
+ from ..utils import ensure_bytesio
6
+ from ..config import (
7
+ DocumentAnalyzerLoggerAdapter,
8
+ logger,
9
+ CEDULA_INDICATORS,
10
+ PASSPORT_INDICATORS,
11
+ )
12
+
13
+ # Start and end messages for logging
14
+ START_MSG = "======= DocumentAnalyzer Started ======="
15
+ END_MSG = "======= DocumentAnalyzer Ended ======="
16
+ ERROR_END_MSG = "======= DocumentAnalyzer Ended With Error ======="
17
+
18
+
19
+ class DocumentAnalyzer:
20
+ """
21
+ Unified analyzer for both Cedula and Passport documents with document type detection.
22
+ """
23
+
24
+ def __init__(
25
+ self,
26
+ document_file,
27
+ user_email=None,
28
+ ocr_instance=None,
29
+ normalize_input=True,
30
+ preprocess_image=True,
31
+ ):
32
+ """
33
+ Initialize DocumentAnalyzer.
34
+
35
+ Args:
36
+ document_file: File object or BytesIO containing the document image
37
+ user_email: Optional user email for logging
38
+ ocr_instance: Optional pre-initialized OCR instance
39
+ """
40
+ self.start_time = time.time()
41
+ self.document_file = document_file
42
+ self.user_email = user_email
43
+
44
+ # Custom logger adapter
45
+ self.logger = DocumentAnalyzerLoggerAdapter(
46
+ logger, {"user_email": self.user_email}
47
+ )
48
+ self.logger.info(START_MSG)
49
+
50
+ # Load and prepare image for type detection
51
+ document_stream = ensure_bytesio(self.document_file)
52
+ self.document_np = cv2.imdecode(
53
+ np.frombuffer(document_stream.read(), np.uint8), cv2.IMREAD_COLOR
54
+ )
55
+
56
+ if self.document_np is None:
57
+ raise ValueError(
58
+ "Could not decode image - invalid format or corrupted file"
59
+ )
60
+
61
+ # Use provided OCR instance or get default
62
+ if ocr_instance is not None:
63
+ self.ocr = ocr_instance
64
+ self.logger.debug(
65
+ "Using provided PaddleOCR instance for document detection"
66
+ )
67
+ else:
68
+ from ..services.paddleocr_service import PaddleOCRService
69
+
70
+ self.ocr = PaddleOCRService.get_instance("es")
71
+ self.logger.debug(
72
+ "Using default Spanish 'es' PaddleOCR instance for document detection"
73
+ )
74
+
75
+ def detect_document_type(self):
76
+ """Detect whether the document is a cedula or passport."""
77
+ self.logger.debug("Starting document type detection")
78
+
79
+ try:
80
+ # Get text from image
81
+ results = self.ocr.predict(self.document_np)
82
+ if not results or not results[0]:
83
+ self.logger.warning("No text detected for document type detection")
84
+ return "unknown"
85
+
86
+ result = results[0]
87
+ all_text = " ".join([text.upper() for text in result["rec_texts"]]).strip()
88
+
89
+ self.logger.debug(f"Detected text sample: '{all_text[:100]}...'")
90
+
91
+ # Check for MRZ pattern (strong passport indicator)
92
+ if any("<" in text and len(text) > 20 for text in result["rec_texts"]):
93
+ self.logger.info("MRZ pattern detected - identifying as passport")
94
+ return "passport"
95
+
96
+ # Count indicators
97
+ cedula_score = sum(
98
+ 1 for indicator in CEDULA_INDICATORS if indicator in all_text
99
+ )
100
+ passport_score = sum(
101
+ 1 for indicator in PASSPORT_INDICATORS if indicator in all_text
102
+ )
103
+
104
+ self.logger.debug(
105
+ f"Cedula score: {cedula_score}, Passport score: {passport_score}"
106
+ )
107
+
108
+ if cedula_score > passport_score and cedula_score > 0:
109
+ self.logger.info("Document identified as cedula")
110
+ return "cedula"
111
+ elif passport_score > cedula_score and passport_score > 0:
112
+ self.logger.info("Document identified as passport")
113
+ return "passport"
114
+ else:
115
+ self.logger.warning("Document type could not be determined confidently")
116
+ return "unknown"
117
+
118
+ except Exception as e:
119
+ self.logger.error(f"Error in document type detection: {str(e)}")
120
+ return "unknown"
121
+
122
+ def analyze_document(self):
123
+ """Analyze document by detecting type and using appropriate analyzer."""
124
+ try:
125
+ # Detect document type
126
+ doc_type = self.detect_document_type()
127
+
128
+ if doc_type == "unknown":
129
+ self.logger.warning("Could not determine document type")
130
+ self.logger.info(ERROR_END_MSG)
131
+ return {
132
+ "success": "none",
133
+ "document_info": {},
134
+ "signature": None,
135
+ "raw_extracted_data": [],
136
+ }
137
+
138
+ # Reset file pointer before further analysis
139
+ self.document_file.seek(0)
140
+
141
+ if doc_type == "cedula":
142
+ from .cedula_analyzer import CedulaAnalyzer
143
+
144
+ analyzer = CedulaAnalyzer(self.document_file, self.user_email)
145
+ results = analyzer.analyze_cedula()
146
+ results = results.copy()
147
+ results["document_info"] = results.pop("cedula_info")
148
+ elif doc_type == "passport":
149
+ from .passport_analyzer import PassportAnalyzer
150
+
151
+ analyzer = PassportAnalyzer(self.document_file, self.user_email)
152
+ results = analyzer.analyze_passport()
153
+ results = results.copy()
154
+ results["document_info"] = results.pop("passport_info")
155
+
156
+ end_time = time.time()
157
+ elapsed_time = end_time - self.start_time
158
+ self.logger.info(f"Document analysis took: {elapsed_time:.2f} seconds")
159
+ self.logger.info(END_MSG)
160
+
161
+ return results
162
+
163
+ except Exception as e:
164
+ self.logger.error(f"Error in DocumentAnalyzer: {str(e)}")
165
+ self.logger.info(ERROR_END_MSG)
166
+ return {
167
+ "success": "none",
168
+ "document_info": {},
169
+ "signature": None,
170
+ "raw_extracted_data": [],
171
+ }
172
+
173
+
174
+ # Convenience function for easy import and use
175
+ def analyze_document(document_file, user_email=None, ocr_instance=None):
176
+ """Convenience function for document analysis using DocumentAnalyzer.
177
+
178
+ Args:
179
+ document_file: Input document image.
180
+ user_email (str, optional): User email for logging.
181
+ ocr_instance (PaddleOCR, optional): PaddleOCR instance.
182
+
183
+ Returns:
184
+ dict: Analysis results.
185
+ """
186
+ analyzer = DocumentAnalyzer(document_file, user_email, ocr_instance)
187
+ return analyzer.analyze_document()