document-analyzer 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,294 @@
1
+ import cv2
2
+ import time
3
+ import numpy as np
4
+
5
+ from ..config import DocumentAnalyzerLoggerAdapter, logger
6
+ from ..utils import (
7
+ ensure_bytesio,
8
+ preprocess_image,
9
+ extract_mrz_data,
10
+ extract_place_of_birth,
11
+ extract_data_with_boxes,
12
+ detect_passport_language,
13
+ )
14
+
15
+ # Start and end messages for logging
16
+ START_MSG = "======= PassportAnalyzer Started ======="
17
+ END_MSG = "======= PassportAnalyzer Ended ======="
18
+ ERROR_END_MSG = "======= PassportAnalyzer Ended With Error ======="
19
+
20
+
21
+ class PassportAnalyzer:
22
+ """A comprehensive analyzer for passport documents.
23
+
24
+ This class provides complete functionality for analyzing passports including
25
+ OCR text extraction, MRZ parsing, and field extraction.
26
+
27
+ The analyzer can process various image formats and extract key information:
28
+ - Personal details (dates, places, nationality)
29
+ - Document identifiers (passport numbers, expiry dates)
30
+ - MRZ data parsing
31
+
32
+ Attributes:
33
+ user_email (str): Optional user email for logging context.
34
+ logger (DocumentAnalyzerLoggerAdapter): Custom logger with user context.
35
+ passport_np (np.ndarray): Image data as OpenCV-compatible numpy array.
36
+ ocr (PaddleOCR): OCR engine instance configured for detected language.
37
+
38
+ Examples:
39
+ >>> analyzer = PassportAnalyzer("passport_image.jpg", "user@example.com")
40
+ >>> results = analyzer.analyze_passport()
41
+ >>> print(results['passport_info']['id_number'])
42
+ """
43
+
44
+ def __init__(
45
+ self,
46
+ passport_file,
47
+ user_email=None,
48
+ ocr_instance=None,
49
+ lang_detector_instance=None,
50
+ normalize_input=True,
51
+ preprocess_image=True,
52
+ ):
53
+ """Initialize the PassportAnalyzer with an image file.
54
+
55
+ Args:
56
+ passport_file: Input passport image in various formats:
57
+ - File path (str)
58
+ - File-like object (Django upload, etc.)
59
+ - BytesIO object
60
+ user_email (str, optional): User email for logging context.
61
+ ocr_instance (PaddleOCR, optional): Pre-initialized PaddleOCR instance.
62
+ If not provided, language will be detected and appropriate model used.
63
+ lang_detector_instance: Language detector instance for passport language detection.
64
+ normalize_input (bool): Whether to normalize input.
65
+ preprocess_image (bool): Whether to preprocess image.
66
+
67
+ Raises:
68
+ ValueError: If image cannot be decoded or is corrupted.
69
+ IOError: If file path cannot be read.
70
+ """
71
+ self.start_time = time.time()
72
+
73
+ # Custom logger adapter
74
+ self.logger = DocumentAnalyzerLoggerAdapter(logger, {"user_email": user_email})
75
+
76
+ try:
77
+ # Convert to BytesIO (if not already)
78
+ passport_stream = ensure_bytesio(passport_file)
79
+ # Read image bytes into OpenCV-compatible format (BGR)
80
+ self.passport_np = cv2.imdecode(
81
+ np.frombuffer(passport_stream.read(), np.uint8), cv2.IMREAD_COLOR
82
+ )
83
+
84
+ if self.passport_np is None:
85
+ raise ValueError(
86
+ "Could not decode image - invalid format or corrupted file"
87
+ )
88
+ except Exception as e:
89
+ self.logger.error(f"Failed to load input file: {e}")
90
+ raise
91
+
92
+ # Use provided OCR instance or detect language and use appropriate model
93
+ if ocr_instance is not None:
94
+ # Explicit OCR instance takes priority
95
+ self.ocr = ocr_instance
96
+ self.logger.debug("Using provided PaddleOCR instance for passport analysis")
97
+ else:
98
+ # Always run language detection (except if explicit OCR provided)
99
+ passport_file.seek(0)
100
+ detected_lang = detect_passport_language(
101
+ passport_file, ocr_instance=lang_detector_instance, logger=self.logger
102
+ )
103
+ self.logger.info(f"Detected passport language: '{detected_lang}'")
104
+
105
+ from ..services.paddleocr_service import PaddleOCRService
106
+
107
+ self.ocr = PaddleOCRService.get_instance(detected_lang)
108
+ self.logger.debug(
109
+ f"Using PaddleOCR instance for language: '{detected_lang}'"
110
+ )
111
+
112
+ def parse_passport_information(self, extracted_data):
113
+ """Parse required passport fields from OCR extracted data.
114
+
115
+ Extracts and parses specific information fields from the OCR text data
116
+ including MRZ data (dates, nationality, passport number) and place of birth
117
+ using pattern matching and contextual analysis.
118
+
119
+ Args:
120
+ extracted_data (list): List of text data dictionaries from OCR
121
+ extraction, each containing text, bbox, confidence, and position information.
122
+
123
+ Returns:
124
+ dict: Dictionary containing parsed passport information with keys:
125
+ - date_of_birth (str): Birth date in DD-MMM-YYYY format
126
+ - place_of_birth (str): Place of birth
127
+ - nationality (str): Nationality (3-letter code)
128
+ - expiry_date (str): Document expiry date in DD-MMM-YYYY format
129
+ - passport_number (str): Passport number
130
+
131
+ Note:
132
+ - Dates are converted from MRZ format (YYMMDD) to DD-MMM-YYYY
133
+ - MRZ data is parsed from the machine-readable zone at bottom of passport
134
+ - Place of birth is extracted from text fields using indicators
135
+
136
+ Examples:
137
+ >>> extracted = analyzer.extract_data_with_boxes(image)
138
+ >>> info = analyzer.parse_passport_information(extracted)
139
+ >>> print(info['passport_number']) # e.g., "AB1234567"
140
+ >>> print(info['date_of_birth']) # e.g., "15-MAR-1985"
141
+ """
142
+ self.logger.debug("Starting passport information parsing")
143
+
144
+ # Extract MRZ data (passport number, nationality, dates)
145
+ mrz_data = extract_mrz_data(extracted_data, logger=self.logger)
146
+
147
+ # Extract place of birth from text fields
148
+ place_of_birth = extract_place_of_birth(extracted_data, logger=self.logger)
149
+
150
+ passport_info = {
151
+ "date_of_birth": mrz_data.get("date_of_birth", ""),
152
+ "place_of_birth": place_of_birth,
153
+ "nationality": mrz_data.get("nationality", ""),
154
+ "expiry_date": mrz_data.get("expiry_date", ""),
155
+ "passport_number": mrz_data.get("passport_number", ""),
156
+ }
157
+
158
+ self.logger.debug(f"Parsed passport info: {passport_info}")
159
+
160
+ return passport_info
161
+
162
+ def analyze_passport(self):
163
+ """Main function to analyze a passport image.
164
+
165
+ Orchestrates the complete analysis pipeline including image preprocessing,
166
+ OCR text extraction, MRZ parsing, information extraction, and result compilation.
167
+ This is the primary entry point for passport analysis.
168
+
169
+ Returns:
170
+ dict: Complete analysis results containing:
171
+ - success (str): Analysis status - "passport_info" or "none"
172
+ - passport_info (dict): Parsed document information
173
+ - signature (None): Always None for passports
174
+ - raw_extracted_data (list): OCR results for debugging
175
+ - error (str): Error message if analysis fails
176
+
177
+ Note:
178
+ Success status indicates what information was successfully extracted:
179
+ - "passport_info": All or most required fields extracted
180
+ - "none": Could not extract sufficient information
181
+
182
+ Raises:
183
+ Exception: Caught internally and returned in error field of result dict.
184
+
185
+ Examples:
186
+ >>> analyzer = PassportAnalyzer("passport.jpg")
187
+ >>> result = analyzer.analyze_passport()
188
+ >>> if result['success'] == 'passport_info':
189
+ ... print("Analysis successful")
190
+ ... print(f"Passport: {result['passport_info']['passport_number']}")
191
+ ... print(f"DOB: {result['passport_info']['dob']}")
192
+ ... print(f"POB: {result['passport_info']['pob']}")
193
+ """
194
+ try:
195
+ self.logger.info(START_MSG)
196
+
197
+ # Load image if path is provided
198
+ image_path_or_array = self.passport_np
199
+ if isinstance(image_path_or_array, str):
200
+ self.logger.debug(f"Loading image from path: '{image_path_or_array}'")
201
+ image = cv2.imread(image_path_or_array)
202
+ if image is None:
203
+ self.logger.error(
204
+ f"Couldn't load image from '{image_path_or_array}'"
205
+ )
206
+ raise ValueError(
207
+ f"Couldn't load image from '{image_path_or_array}'"
208
+ )
209
+ else:
210
+ self.logger.debug("Using provided image array")
211
+ image = image_path_or_array.copy()
212
+
213
+ # Preprocess the image
214
+ processed_image = preprocess_image(image, logger=self.logger)
215
+
216
+ # Extract data with bounding boxes
217
+ extracted_data = extract_data_with_boxes(
218
+ processed_image, ocr=self.ocr, logger=self.logger
219
+ )
220
+
221
+ if not extracted_data:
222
+ self.logger.warning("Couldn't extract data from the passport image")
223
+ return {
224
+ "success": "none",
225
+ "passport_info": {},
226
+ "signature": None,
227
+ "raw_extracted_data": [],
228
+ }
229
+
230
+ self.logger.info(f"Extracted {len(extracted_data)} data boxes")
231
+
232
+ # Parse passport information
233
+ raw_passport_info = self.parse_passport_information(extracted_data)
234
+
235
+ # Convert to desired field names (matching cedula format)
236
+ passport_info = {
237
+ "type": "passport",
238
+ "dob": raw_passport_info.get("date_of_birth", ""),
239
+ "pob": raw_passport_info.get("place_of_birth", ""),
240
+ "nationality": raw_passport_info.get("nationality", ""),
241
+ "expiry": raw_passport_info.get("expiry_date", ""),
242
+ "id_number": raw_passport_info.get("passport_number", ""),
243
+ }
244
+
245
+ success_status = "passport_info"
246
+
247
+ self.logger.info(
248
+ f"Success: '{success_status.capitalize()}' | Date of Birth: '{passport_info['dob']}' "
249
+ f"| Place of Birth: '{passport_info['pob']}' | Nationality: '{passport_info['nationality']}' "
250
+ f"| Expiry: '{passport_info['expiry']}' | Passport Number: '{passport_info['id_number']}'"
251
+ )
252
+
253
+ end_time = time.time()
254
+ elapsed_time = end_time - self.start_time
255
+ self.logger.info(f"Passport analysis took: {elapsed_time:.2f} seconds")
256
+ self.logger.info(END_MSG)
257
+
258
+ return {
259
+ "success": success_status,
260
+ "passport_info": passport_info,
261
+ "signature": None,
262
+ "raw_extracted_data": extracted_data, # For debugging
263
+ }
264
+
265
+ except Exception as e:
266
+ self.logger.error(f"Error in PassportAnalyzer: {str(e)}")
267
+ self.logger.info(ERROR_END_MSG)
268
+ return {
269
+ "success": "none",
270
+ "passport_info": {},
271
+ "signature": None,
272
+ "raw_extracted_data": [],
273
+ }
274
+
275
+
276
+ # Convenience function for easy import and use
277
+ def analyze_passport(
278
+ passport_file, user_email=None, ocr_instance=None, lang_detector_instance=True
279
+ ):
280
+ """Convenience function for passport analysis using PassportAnalyzer.
281
+
282
+ Args:
283
+ passport_file: Input passport image.
284
+ user_email (str, optional): User email for logging.
285
+ ocr_instance (PaddleOCR, optional): Pre-initialized OCR instance.
286
+ lang_detector_instance: Language detector instance.
287
+
288
+ Returns:
289
+ dict: Analysis results.
290
+ """
291
+ analyzer = PassportAnalyzer(
292
+ passport_file, user_email, ocr_instance, lang_detector_instance
293
+ )
294
+ return analyzer.analyze_passport()
@@ -0,0 +1,401 @@
1
+ import sys
2
+ import json
3
+ import logging
4
+ import argparse
5
+ from pathlib import Path
6
+ from typing import Optional, Dict, Any
7
+ from importlib.metadata import version, PackageNotFoundError
8
+
9
+ from .analyzers import DocumentAnalyzer, CedulaAnalyzer, PassportAnalyzer
10
+ from .config import logger as project_logger
11
+
12
+ try:
13
+ __version__ = version("document-analyzer")
14
+ except PackageNotFoundError:
15
+ __version__ = "unknown"
16
+
17
+ # Supported image formats
18
+ SUPPORTED_FORMATS = {".jpg", ".jpeg", ".png", ".bmp", ".tiff", ".gif", ".pdf"}
19
+
20
+
21
+ class CLIError(Exception):
22
+ """Custom exception for CLI errors."""
23
+
24
+ def __init__(self, message: str, exit_code: int = 1):
25
+ self.message = message
26
+ self.exit_code = exit_code
27
+ super().__init__(self.message)
28
+
29
+
30
+ def validate_input_file(file_path: str) -> Path:
31
+ """
32
+ Validate that input file exists, is readable, and has supported format.
33
+
34
+ Args:
35
+ file_path: Path to the input file
36
+
37
+ Returns:
38
+ Path object if valid
39
+
40
+ Raises:
41
+ CLIError: If file is invalid or unsupported
42
+ """
43
+ try:
44
+ path = Path(file_path)
45
+
46
+ # Check if file exists
47
+ if not path.exists():
48
+ raise CLIError(f"Error: File not found: {file_path}", exit_code=1)
49
+
50
+ # Check if it's a file (not a directory)
51
+ if not path.is_file():
52
+ raise CLIError(f"Error: Path is not a file: {file_path}", exit_code=1)
53
+
54
+ # Check if file is empty
55
+ if path.stat().st_size == 0:
56
+ raise CLIError(f"Error: File is empty: {file_path}", exit_code=1)
57
+
58
+ # Check file format
59
+ if path.suffix.lower() not in SUPPORTED_FORMATS:
60
+ raise CLIError(
61
+ f"Error: Unsupported file format '{path.suffix}'. "
62
+ f"Supported formats: {', '.join(sorted(SUPPORTED_FORMATS))}",
63
+ exit_code=1,
64
+ )
65
+
66
+ # Check if file is readable
67
+ try:
68
+ with open(path, "rb") as f:
69
+ f.read(1)
70
+ except PermissionError:
71
+ raise CLIError(
72
+ f"Error: Permission denied reading file: {file_path}", exit_code=1
73
+ )
74
+
75
+ return path
76
+
77
+ except CLIError:
78
+ raise
79
+ except Exception as e:
80
+ raise CLIError(f"Error: Failed to validate file: {str(e)}", exit_code=1)
81
+
82
+
83
+ def validate_output_path(output_path: str) -> Path:
84
+ """
85
+ Validate that output path directory is writable.
86
+
87
+ Args:
88
+ output_path: Path to the output file
89
+
90
+ Returns:
91
+ Path object if valid
92
+
93
+ Raises:
94
+ CLIError: If output directory is not writable
95
+ """
96
+ try:
97
+ path = Path(output_path)
98
+ output_dir = path.parent
99
+
100
+ # Create parent directories if they don't exist
101
+ if not output_dir.exists():
102
+ try:
103
+ output_dir.mkdir(parents=True, exist_ok=True)
104
+ except PermissionError:
105
+ raise CLIError(
106
+ f"Error: Permission denied creating directory: {output_dir}",
107
+ exit_code=1,
108
+ )
109
+
110
+ # Check if directory is writable by attempting to write a test file
111
+ test_file = output_dir / ".write_test"
112
+ try:
113
+ test_file.touch()
114
+ test_file.unlink()
115
+ except PermissionError:
116
+ raise CLIError(
117
+ f"Error: Output directory is not writable: {output_dir}", exit_code=1
118
+ )
119
+
120
+ return path
121
+
122
+ except CLIError:
123
+ raise
124
+ except Exception as e:
125
+ raise CLIError(f"Error: Failed to validate output path: {str(e)}", exit_code=1)
126
+
127
+
128
+ def detect_and_analyze(
129
+ file_path: Path, user_email: Optional[str] = None
130
+ ) -> Dict[str, Any]:
131
+ """
132
+ Auto-detect document type and analyze it.
133
+
134
+ Args:
135
+ file_path: Path to the document file
136
+ user_email: Optional user email for logging
137
+
138
+ Returns:
139
+ Dictionary with analysis results
140
+
141
+ Raises:
142
+ CLIError: If detection or analysis fails
143
+ """
144
+ try:
145
+ project_logger.debug(f"Attempting auto-detection on: {file_path}")
146
+
147
+ # Use DocumentAnalyzer for auto-detection
148
+ analyzer = DocumentAnalyzer(str(file_path), user_email=user_email)
149
+ doc_type = analyzer.detect_document_type()
150
+
151
+ project_logger.debug(f"Detected document type: {doc_type}")
152
+
153
+ if doc_type == "unknown":
154
+ raise CLIError(
155
+ "Error: Could not determine document type. "
156
+ "Please specify --type (cedula or passport).",
157
+ exit_code=1,
158
+ )
159
+
160
+ # Now analyze with the appropriate analyzer
161
+ if doc_type == "cedula":
162
+ cedula_analyzer = CedulaAnalyzer(str(file_path), user_email=user_email)
163
+ result = cedula_analyzer.analyze_cedula()
164
+ result["document_type"] = "cedula"
165
+ return result
166
+ elif doc_type == "passport":
167
+ passport_analyzer = PassportAnalyzer(str(file_path), user_email=user_email)
168
+ result = passport_analyzer.analyze_passport()
169
+ result["document_type"] = "passport"
170
+ return result
171
+ else:
172
+ raise CLIError("Error: Unknown document type after detection.", exit_code=2)
173
+
174
+ except CLIError:
175
+ raise
176
+ except ValueError as e:
177
+ # Invalid image format or corrupted file
178
+ raise CLIError(f"Error: Invalid or corrupted image file: {str(e)}", exit_code=2)
179
+ except Exception as e:
180
+ project_logger.error(f"Auto-detection failed: {str(e)}", exc_info=True)
181
+ raise CLIError(f"Error: Failed to analyze document: {str(e)}", exit_code=2)
182
+
183
+
184
+ def analyze_cedula(file_path: Path, user_email: Optional[str] = None) -> Dict[str, Any]:
185
+ """
186
+ Analyze a cédula document.
187
+
188
+ Args:
189
+ file_path: Path to the cédula image
190
+ user_email: Optional user email for logging
191
+
192
+ Returns:
193
+ Dictionary with analysis results
194
+
195
+ Raises:
196
+ CLIError: If analysis fails
197
+ """
198
+ try:
199
+ project_logger.debug(f"Analyzing cédula: {file_path}")
200
+ analyzer = CedulaAnalyzer(str(file_path), user_email=user_email)
201
+ result = analyzer.analyze_cedula()
202
+ result["document_type"] = "cedula"
203
+ return result
204
+
205
+ except ValueError as e:
206
+ raise CLIError(f"Error: Invalid or corrupted image file: {str(e)}", exit_code=2)
207
+ except Exception as e:
208
+ project_logger.error(f"Cédula analysis failed: {str(e)}", exc_info=True)
209
+ raise CLIError(f"Error: Failed to analyze cédula: {str(e)}", exit_code=2)
210
+
211
+
212
+ def analyze_passport(
213
+ file_path: Path, user_email: Optional[str] = None
214
+ ) -> Dict[str, Any]:
215
+ """
216
+ Analyze a passport document.
217
+
218
+ Args:
219
+ file_path: Path to the passport image
220
+ user_email: Optional user email for logging
221
+
222
+ Returns:
223
+ Dictionary with analysis results
224
+
225
+ Raises:
226
+ CLIError: If analysis fails
227
+ """
228
+ try:
229
+ project_logger.debug(f"Analyzing passport: {file_path}")
230
+ analyzer = PassportAnalyzer(str(file_path), user_email=user_email)
231
+ result = analyzer.analyze_passport()
232
+ result["document_type"] = "passport"
233
+ return result
234
+
235
+ except ValueError as e:
236
+ raise CLIError(f"Error: Invalid or corrupted image file: {str(e)}", exit_code=2)
237
+ except Exception as e:
238
+ project_logger.error(f"Passport analysis failed: {str(e)}", exc_info=True)
239
+ raise CLIError(f"Error: Failed to analyze passport: {str(e)}", exit_code=2)
240
+
241
+
242
+ def format_result_json(result: Dict[str, Any]) -> str:
243
+ """Format analysis result as JSON string."""
244
+ return json.dumps(result, indent=4, default=str)
245
+
246
+
247
+ def setup_logging(verbose: bool) -> None:
248
+ """Configure logging for the entire package.
249
+
250
+ With -v: Show all DEBUG logs from the document_analyzer package.
251
+ Without -v: Show only WARNING and ERROR logs from the document_analyzer package.
252
+
253
+ This is scoped to document_analyzer only to avoid capturing logs from
254
+ other libraries.
255
+ """
256
+ level = logging.DEBUG if verbose else logging.WARNING
257
+
258
+ # Get the package logger (scoped to document_analyzer)
259
+ package_logger = logging.getLogger("document_analyzer")
260
+
261
+ # Prevent duplicate handlers if setup_logging is called multiple times
262
+ if not package_logger.handlers:
263
+ handler = logging.StreamHandler(sys.stderr)
264
+ handler.setLevel(level)
265
+ formatter = logging.Formatter("%(levelname)s: %(message)s")
266
+ handler.setFormatter(formatter)
267
+ package_logger.addHandler(handler)
268
+ else:
269
+ # Update existing handlers to use the new level
270
+ for handler in package_logger.handlers:
271
+ handler.setLevel(level)
272
+
273
+ package_logger.setLevel(level)
274
+
275
+
276
+ def create_parser() -> argparse.ArgumentParser:
277
+ """Create and configure the argument parser."""
278
+ parser = argparse.ArgumentParser(
279
+ prog="document-analyzer",
280
+ description="Analyze Panamanian identity cards (cédulas) and passports using OCR.",
281
+ formatter_class=argparse.RawDescriptionHelpFormatter,
282
+ epilog="""
283
+ Examples:
284
+ %(prog)s analyze doc.jpg
285
+ %(prog)s analyze cedula.jpg --type cedula
286
+ %(prog)s analyze doc.jpg --save result.json -v
287
+ """,
288
+ )
289
+
290
+ # Global options
291
+ parser.add_argument(
292
+ "--version", action="version", version=f"%(prog)s {__version__}"
293
+ )
294
+
295
+ # Subcommands
296
+ subparsers = parser.add_subparsers(dest="command", help="Commands")
297
+
298
+ # analyze command
299
+ analyze_cmd = subparsers.add_parser("analyze", help="Analyze a document")
300
+
301
+ analyze_cmd.add_argument("path", help="Path to the document image file")
302
+
303
+ analyze_cmd.add_argument(
304
+ "--type",
305
+ choices=["auto", "cedula", "passport"],
306
+ default="auto",
307
+ help="Document type (default: auto-detect)",
308
+ )
309
+
310
+ analyze_cmd.add_argument(
311
+ "--save",
312
+ metavar="FILE",
313
+ help="Save result to file instead of printing to stdout",
314
+ )
315
+
316
+ analyze_cmd.add_argument(
317
+ "-v", "--verbose", action="store_true", help="Enable debug-level logging"
318
+ )
319
+
320
+ return parser
321
+
322
+
323
+ def main(argv: Optional[list] = None) -> int:
324
+ """
325
+ Main entry point for the CLI.
326
+
327
+ Args:
328
+ argv: Command-line arguments (default: sys.argv[1:])
329
+
330
+ Returns:
331
+ Exit code (0 for success, 1 for user error, 2 for processing error)
332
+ """
333
+ parser = create_parser()
334
+
335
+ try:
336
+ args = parser.parse_args(argv)
337
+
338
+ # Handle no command
339
+ if not args.command:
340
+ parser.print_help()
341
+ return 0
342
+
343
+ # Setup logging
344
+ setup_logging(args.verbose)
345
+
346
+ # Validate input file
347
+ project_logger.debug(f"Validating input file: {args.path}")
348
+ input_path = validate_input_file(args.path)
349
+
350
+ # Analyze document based on type
351
+ if args.type == "auto":
352
+ project_logger.debug("Using auto-detection for document type")
353
+ result = detect_and_analyze(input_path)
354
+ elif args.type == "cedula":
355
+ result = analyze_cedula(input_path)
356
+ elif args.type == "passport":
357
+ result = analyze_passport(input_path)
358
+ else:
359
+ raise CLIError(f"Unknown document type: {args.type}", exit_code=1)
360
+
361
+ # Format output as JSON
362
+ output_json = format_result_json(result)
363
+
364
+ # Handle output destination
365
+ if args.save:
366
+ project_logger.debug(f"Validating output path: {args.save}")
367
+ output_path = validate_output_path(args.save)
368
+
369
+ try:
370
+ with open(output_path, "w") as f:
371
+ f.write(output_json)
372
+ print(f"Result saved to: {output_path}")
373
+ project_logger.debug(f"Result saved to: {output_path}")
374
+ except PermissionError:
375
+ raise CLIError(
376
+ f"Error: Permission denied writing to: {args.save}", exit_code=1
377
+ )
378
+ except Exception as e:
379
+ raise CLIError(
380
+ f"Error: Failed to write output file: {str(e)}", exit_code=1
381
+ )
382
+ else:
383
+ # Print to stdout
384
+ print(output_json)
385
+
386
+ return 0
387
+
388
+ except CLIError as e:
389
+ print(e.message, file=sys.stderr)
390
+ return e.exit_code
391
+ except KeyboardInterrupt:
392
+ print("\nOperation cancelled by user.", file=sys.stderr)
393
+ return 1
394
+ except Exception as e:
395
+ project_logger.error(f"Unexpected error: {str(e)}", exc_info=True)
396
+ print(f"Error: An unexpected error occurred: {str(e)}", file=sys.stderr)
397
+ return 2
398
+
399
+
400
+ if __name__ == "__main__":
401
+ sys.exit(main())