paddleocr-skills 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (29) hide show
  1. package/README.md +220 -0
  2. package/bin/paddleocr-skills.js +20 -0
  3. package/lib/copy.js +39 -0
  4. package/lib/installer.js +70 -0
  5. package/lib/prompts.js +67 -0
  6. package/lib/python.js +75 -0
  7. package/lib/verify.js +121 -0
  8. package/package.json +42 -0
  9. package/templates/.env.example +12 -0
  10. package/templates/paddleocr-vl/references/paddleocr-vl/layout_schema.md +64 -0
  11. package/templates/paddleocr-vl/references/paddleocr-vl/output_format.md +154 -0
  12. package/templates/paddleocr-vl/references/paddleocr-vl/vl_model_spec.md +157 -0
  13. package/templates/paddleocr-vl/scripts/paddleocr-vl/_lib.py +780 -0
  14. package/templates/paddleocr-vl/scripts/paddleocr-vl/configure.py +270 -0
  15. package/templates/paddleocr-vl/scripts/paddleocr-vl/optimize_file.py +226 -0
  16. package/templates/paddleocr-vl/scripts/paddleocr-vl/requirements-optimize.txt +8 -0
  17. package/templates/paddleocr-vl/scripts/paddleocr-vl/requirements.txt +7 -0
  18. package/templates/paddleocr-vl/scripts/paddleocr-vl/smoke_test.py +199 -0
  19. package/templates/paddleocr-vl/scripts/paddleocr-vl/vl_caller.py +232 -0
  20. package/templates/paddleocr-vl/skills/paddleocr-vl/SKILL.md +481 -0
  21. package/templates/ppocrv5/references/ppocrv5/agent_policy.md +258 -0
  22. package/templates/ppocrv5/references/ppocrv5/normalized_schema.md +257 -0
  23. package/templates/ppocrv5/references/ppocrv5/provider_api.md +140 -0
  24. package/templates/ppocrv5/scripts/ppocrv5/_lib.py +635 -0
  25. package/templates/ppocrv5/scripts/ppocrv5/configure.py +346 -0
  26. package/templates/ppocrv5/scripts/ppocrv5/ocr_caller.py +684 -0
  27. package/templates/ppocrv5/scripts/ppocrv5/requirements.txt +4 -0
  28. package/templates/ppocrv5/scripts/ppocrv5/smoke_test.py +139 -0
  29. package/templates/ppocrv5/skills/ppocrv5/SKILL.md +272 -0
@@ -0,0 +1,780 @@
1
+ """
2
+ Core library for PaddleOCR-VL API Skill
3
+ - Config: Configuration manager for VL API
4
+ - VLClient: HTTP client with retry logic and caching
5
+ - QualityEvaluator: Confidence scoring and quality assessment
6
+ - Utility functions for error handling
7
+ """
8
+
9
+ import hashlib
10
+ import json
11
+ import logging
12
+ import os
13
+ import re
14
+ import sys
15
+ import time
16
+ from pathlib import Path
17
+ from typing import Any, Dict, Optional, Tuple
18
+
19
+ import httpx
20
+
21
+ logger = logging.getLogger(__name__)
22
+
23
+ # =============================================================================
24
+ # Constants
25
+ # =============================================================================
26
+
27
+ DEFAULT_TIMEOUT_MS = 30000 # VL processing takes longer
28
+ DEFAULT_MAX_RETRY = 2
29
+ DEFAULT_CACHE_TTL_SEC = 600
30
+
31
+ # Quality scoring thresholds
32
+ QUALITY_EXCELLENT = 0.90
33
+ QUALITY_GOOD = 0.75
34
+ QUALITY_ACCEPTABLE = 0.60
35
+
36
+ # Unified error codes (aligned with ppocrv5)
37
+ ERROR_CONFIG = "CONFIG_ERROR"
38
+ ERROR_AUTH = "PROVIDER_AUTH_ERROR"
39
+ ERROR_QUOTA = "PROVIDER_QUOTA_EXCEEDED"
40
+ ERROR_OVERLOADED = "PROVIDER_OVERLOADED"
41
+ ERROR_TIMEOUT = "PROVIDER_TIMEOUT"
42
+ ERROR_BAD_REQUEST = "PROVIDER_BAD_REQUEST"
43
+ ERROR_PROVIDER = "PROVIDER_ERROR"
44
+ ERROR_NETWORK = "NETWORK_ERROR"
45
+ ERROR_PARSE = "PARSE_ERROR"
46
+
47
+
48
+ # =============================================================================
49
+ # Configuration
50
+ # =============================================================================
51
+
52
+ class Config:
53
+ """
54
+ Configuration manager for PaddleOCR-VL
55
+
56
+ Reads from:
57
+ 1. Environment variables (highest priority)
58
+ 2. .env file in project root
59
+ 3. Raise error if not found
60
+ """
61
+
62
+ _env_loaded = False
63
+
64
+ @staticmethod
65
+ def load_env():
66
+ """Load .env file using python-dotenv"""
67
+ if Config._env_loaded:
68
+ return
69
+
70
+ try:
71
+ from dotenv import load_dotenv
72
+
73
+ # Find .env file (project root is 2 levels up)
74
+ project_root = Path(__file__).parent.parent.parent
75
+ env_file = project_root / ".env"
76
+
77
+ if env_file.exists():
78
+ load_dotenv(env_file)
79
+ logger.debug(f"Loaded .env from {env_file}")
80
+ else:
81
+ logger.debug(f".env file not found at {env_file}")
82
+
83
+ Config._env_loaded = True
84
+
85
+ except ImportError:
86
+ logger.warning("python-dotenv not installed")
87
+ logger.warning("Install with: pip install python-dotenv")
88
+ Config._env_loaded = True
89
+
90
+ @staticmethod
91
+ def get_vl_api_url() -> str:
92
+ """
93
+ Get PaddleOCR-VL API URL from environment
94
+
95
+ Returns:
96
+ Full API URL
97
+
98
+ Raises:
99
+ ValueError: If API_URL not configured
100
+ """
101
+ Config.load_env()
102
+
103
+ api_url = os.getenv("VL_API_URL", "").strip()
104
+
105
+ if not api_url:
106
+ raise ValueError(
107
+ "API not configured. Get your API at: https://aistudio.baidu.com/paddleocr/task"
108
+ )
109
+
110
+ # Validate URL format
111
+ if not api_url.startswith(("http://", "https://")):
112
+ api_url = f"https://{api_url}"
113
+
114
+ return api_url
115
+
116
+ @staticmethod
117
+ def get_vl_token() -> str:
118
+ """
119
+ Get PaddleOCR-VL access token from environment
120
+
121
+ Returns:
122
+ Access token
123
+
124
+ Raises:
125
+ ValueError: If token not configured
126
+ """
127
+ Config.load_env()
128
+
129
+ token = os.getenv("VL_TOKEN", "").strip()
130
+
131
+ if not token:
132
+ raise ValueError(
133
+ "TOKEN not configured. Get your API at: https://aistudio.baidu.com/paddleocr/task"
134
+ )
135
+
136
+ return token
137
+
138
+ @staticmethod
139
+ def is_configured() -> bool:
140
+ """Check if VL API is properly configured"""
141
+ Config.load_env()
142
+
143
+ api_url = os.getenv("VL_API_URL", "").strip()
144
+ token = os.getenv("VL_TOKEN", "").strip()
145
+
146
+ return bool(api_url and token)
147
+
148
+ @staticmethod
149
+ def get_timeout_ms() -> int:
150
+ """Get timeout in milliseconds from environment"""
151
+ Config.load_env()
152
+ timeout_str = os.getenv("VL_TIMEOUT_MS", str(DEFAULT_TIMEOUT_MS))
153
+ try:
154
+ return int(timeout_str)
155
+ except ValueError:
156
+ return DEFAULT_TIMEOUT_MS
157
+
158
+ @staticmethod
159
+ def get_max_retry() -> int:
160
+ """Get max retry count from environment"""
161
+ Config.load_env()
162
+ retry_str = os.getenv("VL_MAX_RETRY", str(DEFAULT_MAX_RETRY))
163
+ try:
164
+ return int(retry_str)
165
+ except ValueError:
166
+ return DEFAULT_MAX_RETRY
167
+
168
+ @staticmethod
169
+ def get_cache_ttl_sec() -> int:
170
+ """Get cache TTL in seconds from environment"""
171
+ Config.load_env()
172
+ ttl_str = os.getenv("VL_CACHE_TTL_SEC", str(DEFAULT_CACHE_TTL_SEC))
173
+ try:
174
+ return int(ttl_str)
175
+ except ValueError:
176
+ return DEFAULT_CACHE_TTL_SEC
177
+
178
+ @staticmethod
179
+ def get_max_file_size_mb() -> int:
180
+ """
181
+ Get maximum file size in MB from environment
182
+
183
+ Default: 0 (no limit)
184
+ Set VL_MAX_FILE_SIZE_MB in .env to enforce a limit if needed
185
+ """
186
+ Config.load_env()
187
+ size_str = os.getenv("VL_MAX_FILE_SIZE_MB", "0")
188
+ try:
189
+ return int(size_str)
190
+ except ValueError:
191
+ return 0
192
+
193
+
194
+ # =============================================================================
195
+ # Simple Cache
196
+ # =============================================================================
197
+
198
+ class SimpleCache:
199
+ """
200
+ Simple in-memory TTL cache
201
+
202
+ Caches API responses to avoid redundant requests.
203
+ """
204
+
205
+ def __init__(self, ttl_seconds: int = DEFAULT_CACHE_TTL_SEC):
206
+ self.cache: Dict[str, Tuple[Any, float]] = {}
207
+ self.ttl = ttl_seconds
208
+
209
+ def get(self, key: str) -> Optional[Any]:
210
+ """Get value from cache if not expired"""
211
+ if key not in self.cache:
212
+ return None
213
+
214
+ value, timestamp = self.cache[key]
215
+ if time.time() - timestamp > self.ttl:
216
+ del self.cache[key]
217
+ return None
218
+
219
+ logger.debug(f"Cache hit: {key[:16]}...")
220
+ return value
221
+
222
+ def set(self, key: str, value: Any):
223
+ """Set value in cache with current timestamp"""
224
+ self.cache[key] = (value, time.time())
225
+ logger.debug(f"Cache set: {key[:16]}...")
226
+
227
+ def clear(self):
228
+ """Clear all cache entries"""
229
+ self.cache.clear()
230
+ logger.debug("Cache cleared")
231
+
232
+ @staticmethod
233
+ def make_key(file_path: Optional[str] = None, file_url: Optional[str] = None) -> str:
234
+ """Generate cache key from request parameters"""
235
+ if file_url:
236
+ content = f"url:{file_url}"
237
+ elif file_path:
238
+ content = f"path:{file_path}"
239
+ else:
240
+ content = "empty"
241
+
242
+ return hashlib.sha256(content.encode()).hexdigest()
243
+
244
+
245
+ # =============================================================================
246
+ # Quality Evaluator
247
+ # =============================================================================
248
+
249
+ class QualityEvaluator:
250
+ """
251
+ Evaluates confidence and quality of VL parsing results
252
+ """
253
+
254
+ @staticmethod
255
+ def evaluate(api_response: Dict[str, Any]) -> Dict[str, Any]:
256
+ """
257
+ Evaluate quality of API response
258
+
259
+ Args:
260
+ api_response: Complete API response from VL
261
+
262
+ Returns:
263
+ Quality assessment dict with:
264
+ - overall_confidence: 0.0-1.0
265
+ - quality_level: "excellent" | "good" | "acceptable" | "poor"
266
+ - warnings: List of quality warnings
267
+ - region_stats: Statistics about regions
268
+ """
269
+ if not api_response.get("ok", False):
270
+ return {
271
+ "overall_confidence": 0.0,
272
+ "quality_level": "error",
273
+ "warnings": ["API request failed"],
274
+ "region_stats": {}
275
+ }
276
+
277
+ result = api_response.get("result", {})
278
+ layout = result.get("layout", {})
279
+ regions = layout.get("regions", [])
280
+
281
+ if not regions:
282
+ return {
283
+ "overall_confidence": 0.0,
284
+ "quality_level": "poor",
285
+ "warnings": ["No content detected in document"],
286
+ "region_stats": {"total_regions": 0}
287
+ }
288
+
289
+ # Calculate average confidence
290
+ confidences = []
291
+ region_types = {}
292
+
293
+ for region in regions:
294
+ confidence = region.get("confidence", 0.8) # Default if not provided
295
+ confidences.append(confidence)
296
+
297
+ region_type = region.get("type", "unknown")
298
+ region_types[region_type] = region_types.get(region_type, 0) + 1
299
+
300
+ overall_confidence = sum(confidences) / len(confidences) if confidences else 0.0
301
+
302
+ # Determine quality level
303
+ if overall_confidence >= QUALITY_EXCELLENT:
304
+ quality_level = "excellent"
305
+ elif overall_confidence >= QUALITY_GOOD:
306
+ quality_level = "good"
307
+ elif overall_confidence >= QUALITY_ACCEPTABLE:
308
+ quality_level = "acceptable"
309
+ else:
310
+ quality_level = "poor"
311
+
312
+ # Generate warnings
313
+ warnings = []
314
+ if overall_confidence < QUALITY_ACCEPTABLE:
315
+ warnings.append(f"Low confidence score: {overall_confidence:.2f}")
316
+
317
+ low_confidence_regions = [r for r in regions if r.get("confidence", 0.8) < QUALITY_ACCEPTABLE]
318
+ if low_confidence_regions:
319
+ warnings.append(f"{len(low_confidence_regions)} regions have low confidence")
320
+
321
+ return {
322
+ "overall_confidence": overall_confidence,
323
+ "quality_level": quality_level,
324
+ "warnings": warnings,
325
+ "region_stats": {
326
+ "total_regions": len(regions),
327
+ "by_type": region_types,
328
+ "low_confidence_count": len(low_confidence_regions)
329
+ }
330
+ }
331
+
332
+
333
+ # =============================================================================
334
+ # VL API Client
335
+ # =============================================================================
336
+
337
+ class VLClient:
338
+ """
339
+ HTTP client for PaddleOCR-VL API with retry and caching
340
+ """
341
+
342
+ def __init__(
343
+ self,
344
+ api_url: Optional[str] = None,
345
+ token: Optional[str] = None,
346
+ timeout_ms: Optional[int] = None,
347
+ max_retry: Optional[int] = None,
348
+ enable_cache: bool = True
349
+ ):
350
+ """
351
+ Initialize VL client
352
+
353
+ Args:
354
+ api_url: API endpoint URL (auto-loaded from config if None)
355
+ token: Access token (auto-loaded from config if None)
356
+ timeout_ms: Request timeout in milliseconds
357
+ max_retry: Maximum retry count for transient errors
358
+ enable_cache: Enable response caching
359
+ """
360
+ self.api_url = api_url or Config.get_vl_api_url()
361
+ self.token = token or Config.get_vl_token()
362
+ self.timeout_ms = timeout_ms or Config.get_timeout_ms()
363
+ self.max_retry = max_retry or Config.get_max_retry()
364
+ self.cache = SimpleCache(Config.get_cache_ttl_sec()) if enable_cache else None
365
+
366
+ logger.info(f"VLClient initialized: {self.api_url}")
367
+ logger.debug(f"Timeout: {self.timeout_ms}ms, Max retry: {self.max_retry}")
368
+
369
+ def call_api(
370
+ self,
371
+ file_path: Optional[str] = None,
372
+ file_url: Optional[str] = None,
373
+ use_cache: bool = True
374
+ ) -> Dict[str, Any]:
375
+ """
376
+ Call PaddleOCR-VL API with automatic retry
377
+
378
+ Args:
379
+ file_path: Local file path
380
+ file_url: URL to file
381
+ use_cache: Use cached response if available
382
+
383
+ Returns:
384
+ Complete API response with standardized error codes
385
+
386
+ Raises:
387
+ ValueError: If neither file_path nor file_url provided
388
+ """
389
+ # Validate input parameters
390
+ is_valid, error_msg = validate_file_input(file_path, file_url)
391
+ if not is_valid:
392
+ return self._make_error_response(ERROR_BAD_REQUEST, error_msg)
393
+
394
+ # Check cache
395
+ if use_cache and self.cache:
396
+ cache_key = SimpleCache.make_key(file_path, file_url)
397
+ cached = self.cache.get(cache_key)
398
+ if cached:
399
+ logger.info("Returning cached response")
400
+ return cached
401
+
402
+ # Build request payload
403
+ payload = {
404
+ "parse_all": True,
405
+ "include_layout": True,
406
+ "include_all_elements": True
407
+ }
408
+
409
+ if file_url:
410
+ payload["file_url"] = file_url
411
+ # Make request with retry (JSON mode)
412
+ result = self._request_with_retry(payload)
413
+ elif file_path:
414
+ # Make request with retry (multipart file upload mode)
415
+ result = self._request_with_retry(payload, upload_file_path=str(Path(file_path).absolute()))
416
+ else:
417
+ result = self._request_with_retry(payload)
418
+
419
+ # Cache successful response
420
+ if use_cache and self.cache and result.get("ok", False):
421
+ self.cache.set(cache_key, result)
422
+
423
+ return result
424
+
425
+ def _request_with_retry(self, payload: Dict[str, Any], upload_file_path: Optional[str] = None) -> Dict[str, Any]:
426
+ """
427
+ Make HTTP request with automatic retry on transient failures
428
+
429
+ Args:
430
+ payload: Request payload
431
+ upload_file_path: If provided, upload this file as multipart form-data
432
+
433
+ Returns:
434
+ API response dict
435
+ """
436
+ timeout_sec = self.timeout_ms / 1000.0
437
+ attempt = 0
438
+
439
+ while attempt <= self.max_retry:
440
+ attempt += 1
441
+
442
+ try:
443
+ logger.debug(f"API request attempt {attempt}/{self.max_retry + 1}")
444
+
445
+ with httpx.Client(timeout=timeout_sec) as client:
446
+ if upload_file_path:
447
+ # Encode local file as base64 and send as JSON
448
+ import base64
449
+ headers = {
450
+ "Content-Type": "application/json",
451
+ "Authorization": f"Bearer {self.token}"
452
+ }
453
+ ext = Path(upload_file_path).suffix.lower()
454
+ with open(upload_file_path, 'rb') as f:
455
+ file_data = base64.b64encode(f.read()).decode('utf-8')
456
+ file_payload = {
457
+ "file": file_data,
458
+ "fileType": 0 if ext == '.pdf' else 1,
459
+ }
460
+ response = client.post(
461
+ self.api_url,
462
+ json=file_payload,
463
+ headers=headers
464
+ )
465
+ else:
466
+ # JSON request for URL-based input
467
+ headers = {
468
+ "Content-Type": "application/json",
469
+ "Authorization": f"Bearer {self.token}"
470
+ }
471
+ response = client.post(
472
+ self.api_url,
473
+ json=payload,
474
+ headers=headers
475
+ )
476
+
477
+ # Handle different status codes
478
+ status = response.status_code
479
+
480
+ # Success
481
+ if status == 200:
482
+ try:
483
+ result = response.json()
484
+ logger.info(f"API request successful (attempt {attempt})")
485
+ return self._wrap_success(result)
486
+ except json.JSONDecodeError as e:
487
+ return self._make_error_response(
488
+ ERROR_PARSE,
489
+ f"Invalid JSON response: {str(e)}"
490
+ )
491
+
492
+ # Authentication errors (no retry)
493
+ if status in (401, 403):
494
+ logger.error(f"Authentication failed: HTTP {status}")
495
+ return self._make_error_response(
496
+ ERROR_AUTH,
497
+ f"Authentication failed (HTTP {status}). Check your VL_TOKEN."
498
+ )
499
+
500
+ # Quota exceeded (no retry)
501
+ if status == 429:
502
+ logger.error("API quota exceeded")
503
+ return self._make_error_response(
504
+ ERROR_QUOTA,
505
+ "API quota exceeded. Please wait or upgrade your plan."
506
+ )
507
+
508
+ # Transient errors (retry)
509
+ if status in (503, 504):
510
+ error_code = ERROR_OVERLOADED if status == 503 else ERROR_TIMEOUT
511
+ if attempt <= self.max_retry:
512
+ wait_time = 2 ** (attempt - 1) # Exponential backoff
513
+ logger.warning(f"Transient error HTTP {status}, retrying in {wait_time}s...")
514
+ time.sleep(wait_time)
515
+ continue
516
+ else:
517
+ logger.error(f"Max retries exceeded for HTTP {status}")
518
+ return self._make_error_response(
519
+ error_code,
520
+ f"Service unavailable (HTTP {status}) after {self.max_retry} retries"
521
+ )
522
+
523
+ # Other errors
524
+ logger.error(f"API request failed: HTTP {status}")
525
+ return self._make_error_response(
526
+ ERROR_PROVIDER,
527
+ f"API request failed (HTTP {status}): {response.text[:200]}"
528
+ )
529
+
530
+ except httpx.TimeoutException:
531
+ logger.error(f"Request timeout after {timeout_sec}s")
532
+ if attempt <= self.max_retry:
533
+ logger.warning(f"Retrying after timeout (attempt {attempt})...")
534
+ continue
535
+ return self._make_error_response(
536
+ ERROR_TIMEOUT,
537
+ f"Request timeout after {timeout_sec}s. Document may be too large."
538
+ )
539
+
540
+ except httpx.RequestError as e:
541
+ logger.error(f"Network error: {str(e)}")
542
+ return self._make_error_response(
543
+ ERROR_NETWORK,
544
+ f"Network error: {str(e)}"
545
+ )
546
+
547
+ # Should not reach here
548
+ return self._make_error_response(
549
+ ERROR_PROVIDER,
550
+ "Unexpected error: max retries logic failed"
551
+ )
552
+
553
+ def _wrap_success(self, api_response: Dict[str, Any]) -> Dict[str, Any]:
554
+ """Wrap successful API response in standard format"""
555
+ # If response already has 'ok' field, return as-is
556
+ if "ok" in api_response:
557
+ return api_response
558
+
559
+ # Otherwise wrap it
560
+ return {
561
+ "ok": True,
562
+ "result": api_response,
563
+ "error": None
564
+ }
565
+
566
+ def _make_error_response(self, error_code: str, message: str) -> Dict[str, Any]:
567
+ """Create standardized error response"""
568
+ return {
569
+ "ok": False,
570
+ "result": None,
571
+ "error": {
572
+ "code": error_code,
573
+ "message": message
574
+ }
575
+ }
576
+
577
+
578
+ # =============================================================================
579
+ # High-level API Functions
580
+ # =============================================================================
581
+
582
+ def make_api_request(
583
+ file_path: Optional[str] = None,
584
+ file_url: Optional[str] = None,
585
+ timeout_ms: Optional[int] = None,
586
+ use_cache: bool = True
587
+ ) -> Dict[str, Any]:
588
+ """
589
+ High-level function to call PaddleOCR-VL API
590
+
591
+ Args:
592
+ file_path: Local file path
593
+ file_url: URL to file
594
+ timeout_ms: Request timeout in milliseconds
595
+ use_cache: Use cached response if available
596
+
597
+ Returns:
598
+ Complete API response dict with standardized format
599
+
600
+ Raises:
601
+ ValueError: If configuration is invalid
602
+ """
603
+ # Create client
604
+ client = VLClient(timeout_ms=timeout_ms)
605
+
606
+ # Call API
607
+ return client.call_api(
608
+ file_path=file_path,
609
+ file_url=file_url,
610
+ use_cache=use_cache
611
+ )
612
+
613
+
614
+ # =============================================================================
615
+ # File Type Detection
616
+ # =============================================================================
617
+
618
+ def detect_file_format(file_input: str) -> Tuple[bool, str]:
619
+ """
620
+ Detect and validate file format from URL or path.
621
+
622
+ Args:
623
+ file_input: File URL or path
624
+
625
+ Returns:
626
+ Tuple of (is_valid, format_type)
627
+ - is_valid: True if format is supported
628
+ - format_type: "pdf", "png", "jpg", "jpeg", etc. or "unknown"
629
+
630
+ Supported formats:
631
+ - PDF: .pdf
632
+ - Images: .png, .jpg, .jpeg, .bmp, .tiff, .tif, .webp
633
+ """
634
+ import re
635
+ from urllib.parse import urlparse, unquote
636
+
637
+ # Clean up the input
638
+ file_str = file_input.lower().strip()
639
+
640
+ # If it's a URL, extract the path
641
+ if file_str.startswith(('http://', 'https://')):
642
+ try:
643
+ parsed = urlparse(file_str)
644
+ file_str = unquote(parsed.path)
645
+ except:
646
+ pass
647
+
648
+ # Remove query parameters and fragments
649
+ file_str = re.sub(r'[?#].*$', '', file_str)
650
+
651
+ # Supported formats
652
+ supported_formats = {
653
+ '.pdf': 'pdf',
654
+ '.png': 'png',
655
+ '.jpg': 'jpg',
656
+ '.jpeg': 'jpeg',
657
+ '.bmp': 'bmp',
658
+ '.tiff': 'tiff',
659
+ '.tif': 'tif',
660
+ '.webp': 'webp'
661
+ }
662
+
663
+ # Check each extension
664
+ for ext, format_name in supported_formats.items():
665
+ if file_str.endswith(ext):
666
+ return (True, format_name)
667
+
668
+ # Unsupported format
669
+ return (False, "unknown")
670
+
671
+
672
+ def validate_file_input(
673
+ file_path: Optional[str] = None,
674
+ file_url: Optional[str] = None
675
+ ) -> Tuple[bool, str]:
676
+ """
677
+ Validate file input parameters
678
+
679
+ Args:
680
+ file_path: Local file path
681
+ file_url: File URL
682
+
683
+ Returns:
684
+ Tuple of (is_valid, error_message)
685
+ """
686
+ if not file_path and not file_url:
687
+ return (False, "Either file_path or file_url must be provided")
688
+
689
+ file_input = file_url if file_url else file_path
690
+
691
+ # Validate format
692
+ is_valid, format_type = detect_file_format(file_input)
693
+ if not is_valid:
694
+ return (False, f"Unsupported file format. Supported: PDF, PNG, JPG, JPEG, BMP, TIFF, WEBP")
695
+
696
+ # Validate local file exists
697
+ if file_path:
698
+ if not Path(file_path).exists():
699
+ return (False, f"File not found: {file_path}")
700
+
701
+ # Check file size (configurable via VL_MAX_FILE_SIZE_MB)
702
+ file_size = Path(file_path).stat().st_size
703
+ max_size_mb = Config.get_max_file_size_mb()
704
+ max_size_bytes = max_size_mb * 1024 * 1024
705
+
706
+ if file_size > max_size_bytes:
707
+ file_size_mb = file_size / 1024 / 1024
708
+ return (False, f"File too large: {file_size_mb:.1f}MB (max {max_size_mb}MB). Use --file-url or increase VL_MAX_FILE_SIZE_MB")
709
+
710
+ return (True, "")
711
+
712
+
713
+ # =============================================================================
714
+ # Utility Functions
715
+ # =============================================================================
716
+
717
+ def format_error_output(error: Exception, error_code: str = ERROR_PROVIDER) -> Dict[str, Any]:
718
+ """
719
+ Format exception as JSON error output
720
+
721
+ Args:
722
+ error: Exception to format
723
+ error_code: Unified error code
724
+
725
+ Returns:
726
+ Error dict in standard format
727
+ """
728
+ return {
729
+ "ok": False,
730
+ "result": None,
731
+ "error": {
732
+ "code": error_code,
733
+ "type": type(error).__name__,
734
+ "message": str(error)
735
+ }
736
+ }
737
+
738
+
739
+ def wrap_success_output(result: Dict[str, Any]) -> Dict[str, Any]:
740
+ """
741
+ Wrap successful result in standard format
742
+
743
+ Args:
744
+ result: API response
745
+
746
+ Returns:
747
+ Wrapped output with ok=True
748
+ """
749
+ # If API response already has 'ok' field, return as-is
750
+ if "ok" in result:
751
+ return result
752
+
753
+ # Otherwise wrap it
754
+ return {
755
+ "ok": True,
756
+ "result": result,
757
+ "error": None
758
+ }
759
+
760
+
761
+ def setup_logging(level: Optional[str] = None):
762
+ """
763
+ Setup logging configuration
764
+
765
+ Args:
766
+ level: Log level (DEBUG, INFO, WARNING, ERROR)
767
+ If None, reads from VL_LOG_LEVEL environment variable
768
+ """
769
+ if level is None:
770
+ level = os.getenv("VL_LOG_LEVEL", "INFO").upper()
771
+
772
+ numeric_level = getattr(logging, level, logging.INFO)
773
+
774
+ logging.basicConfig(
775
+ level=numeric_level,
776
+ format="%(asctime)s [%(levelname)s] %(name)s: %(message)s",
777
+ datefmt="%Y-%m-%d %H:%M:%S"
778
+ )
779
+
780
+ logger.setLevel(numeric_level)