gemini-ocr-cli 0.3.1__tar.gz → 0.3.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {gemini_ocr_cli-0.3.1 → gemini_ocr_cli-0.3.2}/PKG-INFO +1 -1
- {gemini_ocr_cli-0.3.1 → gemini_ocr_cli-0.3.2}/gemini_ocr/__init__.py +1 -1
- {gemini_ocr_cli-0.3.1 → gemini_ocr_cli-0.3.2}/gemini_ocr/processor.py +32 -3
- gemini_ocr_cli-0.3.2/gemini_ocr/retry.py +104 -0
- {gemini_ocr_cli-0.3.1 → gemini_ocr_cli-0.3.2}/pyproject.toml +1 -1
- {gemini_ocr_cli-0.3.1 → gemini_ocr_cli-0.3.2}/.env.example +0 -0
- {gemini_ocr_cli-0.3.1 → gemini_ocr_cli-0.3.2}/.github/workflows/ci.yml +0 -0
- {gemini_ocr_cli-0.3.1 → gemini_ocr_cli-0.3.2}/.gitignore +0 -0
- {gemini_ocr_cli-0.3.1 → gemini_ocr_cli-0.3.2}/.pre-commit-config.yaml +0 -0
- {gemini_ocr_cli-0.3.1 → gemini_ocr_cli-0.3.2}/CHANGELOG.md +0 -0
- {gemini_ocr_cli-0.3.1 → gemini_ocr_cli-0.3.2}/LICENSE +0 -0
- {gemini_ocr_cli-0.3.1 → gemini_ocr_cli-0.3.2}/README.md +0 -0
- {gemini_ocr_cli-0.3.1 → gemini_ocr_cli-0.3.2}/gemini_ocr/__main__.py +0 -0
- {gemini_ocr_cli-0.3.1 → gemini_ocr_cli-0.3.2}/gemini_ocr/cli.py +0 -0
- {gemini_ocr_cli-0.3.1 → gemini_ocr_cli-0.3.2}/gemini_ocr/config.py +0 -0
- {gemini_ocr_cli-0.3.1 → gemini_ocr_cli-0.3.2}/gemini_ocr/metadata.py +0 -0
- {gemini_ocr_cli-0.3.1 → gemini_ocr_cli-0.3.2}/gemini_ocr/utils.py +0 -0
- {gemini_ocr_cli-0.3.1 → gemini_ocr_cli-0.3.2}/tests/__init__.py +0 -0
- {gemini_ocr_cli-0.3.1 → gemini_ocr_cli-0.3.2}/tests/conftest.py +0 -0
- {gemini_ocr_cli-0.3.1 → gemini_ocr_cli-0.3.2}/tests/test_cli.py +0 -0
- {gemini_ocr_cli-0.3.1 → gemini_ocr_cli-0.3.2}/tests/test_config.py +0 -0
- {gemini_ocr_cli-0.3.1 → gemini_ocr_cli-0.3.2}/tests/test_import.py +0 -0
- {gemini_ocr_cli-0.3.1 → gemini_ocr_cli-0.3.2}/tests/test_integration.py +0 -0
- {gemini_ocr_cli-0.3.1 → gemini_ocr_cli-0.3.2}/tests/test_metadata.py +0 -0
- {gemini_ocr_cli-0.3.1 → gemini_ocr_cli-0.3.2}/tests/test_processor.py +0 -0
- {gemini_ocr_cli-0.3.1 → gemini_ocr_cli-0.3.2}/tests/test_utils.py +0 -0
- {gemini_ocr_cli-0.3.1 → gemini_ocr_cli-0.3.2}/uv.lock +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: gemini-ocr-cli
|
|
3
|
-
Version: 0.3.
|
|
3
|
+
Version: 0.3.2
|
|
4
4
|
Summary: CLI tool for OCR processing using Google Gemini's vision capabilities
|
|
5
5
|
Project-URL: Homepage, https://github.com/r-uben/gemini-ocr-cli
|
|
6
6
|
Project-URL: Repository, https://github.com/r-uben/gemini-ocr-cli
|
|
@@ -115,6 +115,37 @@ class OCRProcessor:
|
|
|
115
115
|
|
|
116
116
|
return types.GenerateContentConfig(**kwargs)
|
|
117
117
|
|
|
118
|
+
@staticmethod
|
|
119
|
+
def _extract_text(response: Any) -> str:
|
|
120
|
+
"""Extract text from a GenerateContentResponse by walking parts explicitly.
|
|
121
|
+
|
|
122
|
+
The `.text` shortcut returns None when parts include thought summaries,
|
|
123
|
+
non-text parts, or when finish_reason != STOP — which is common with
|
|
124
|
+
Gemini 3.x thinking models. Walking parts is the reliable path.
|
|
125
|
+
"""
|
|
126
|
+
candidates = getattr(response, "candidates", None) or []
|
|
127
|
+
if not candidates:
|
|
128
|
+
feedback = getattr(response, "prompt_feedback", None)
|
|
129
|
+
raise RuntimeError(f"Empty response: no candidates (prompt_feedback={feedback})")
|
|
130
|
+
|
|
131
|
+
candidate = candidates[0]
|
|
132
|
+
content = getattr(candidate, "content", None)
|
|
133
|
+
parts = getattr(content, "parts", None) or []
|
|
134
|
+
text = "".join(
|
|
135
|
+
p.text for p in parts if getattr(p, "text", None) and not getattr(p, "thought", False)
|
|
136
|
+
).strip()
|
|
137
|
+
|
|
138
|
+
if not text:
|
|
139
|
+
finish = getattr(candidate, "finish_reason", None)
|
|
140
|
+
safety = getattr(candidate, "safety_ratings", None)
|
|
141
|
+
part_types = [type(p).__name__ for p in parts]
|
|
142
|
+
raise RuntimeError(
|
|
143
|
+
f"Empty response: finish_reason={finish}, "
|
|
144
|
+
f"len(parts)={len(parts)}, part_types={part_types}, "
|
|
145
|
+
f"safety_ratings={safety}"
|
|
146
|
+
)
|
|
147
|
+
return text
|
|
148
|
+
|
|
118
149
|
def _call_with_retry(self, contents: list[Any], prompt: str) -> str:
|
|
119
150
|
"""Call generate_content with exponential backoff on transient errors."""
|
|
120
151
|
max_attempts = self.config.max_retries + 1
|
|
@@ -128,9 +159,7 @@ class OCRProcessor:
|
|
|
128
159
|
contents=[prompt, *contents],
|
|
129
160
|
config=config,
|
|
130
161
|
)
|
|
131
|
-
|
|
132
|
-
return response.text.strip()
|
|
133
|
-
return ""
|
|
162
|
+
return self._extract_text(response)
|
|
134
163
|
except Exception as e:
|
|
135
164
|
is_last = attempt == max_attempts - 1
|
|
136
165
|
if is_last or not self._is_retryable(e):
|
|
@@ -0,0 +1,104 @@
|
|
|
1
|
+
"""Retry logic with exponential backoff for API calls."""
|
|
2
|
+
|
|
3
|
+
import logging
|
|
4
|
+
import time
|
|
5
|
+
from functools import wraps
|
|
6
|
+
from typing import Callable, Tuple, Type, TypeVar
|
|
7
|
+
|
|
8
|
+
logger = logging.getLogger(__name__)
|
|
9
|
+
|
|
10
|
+
T = TypeVar("T")
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class RetryError(Exception):
|
|
14
|
+
"""Raised when all retry attempts are exhausted."""
|
|
15
|
+
|
|
16
|
+
def __init__(self, message: str, last_exception: Exception):
|
|
17
|
+
super().__init__(message)
|
|
18
|
+
self.last_exception = last_exception
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def retry(
|
|
22
|
+
max_attempts: int = 3,
|
|
23
|
+
backoff_factor: float = 2.0,
|
|
24
|
+
initial_delay: float = 1.0,
|
|
25
|
+
max_delay: float = 60.0,
|
|
26
|
+
exceptions: Tuple[Type[Exception], ...] = (Exception,),
|
|
27
|
+
) -> Callable[[Callable[..., T]], Callable[..., T]]:
|
|
28
|
+
"""Decorator for retrying functions with exponential backoff.
|
|
29
|
+
|
|
30
|
+
Args:
|
|
31
|
+
max_attempts: Maximum number of attempts (including first try)
|
|
32
|
+
backoff_factor: Multiplier for delay between retries
|
|
33
|
+
initial_delay: Initial delay in seconds
|
|
34
|
+
max_delay: Maximum delay in seconds
|
|
35
|
+
exceptions: Tuple of exception types to catch and retry
|
|
36
|
+
|
|
37
|
+
Returns:
|
|
38
|
+
Decorated function with retry logic
|
|
39
|
+
"""
|
|
40
|
+
|
|
41
|
+
def decorator(func: Callable[..., T]) -> Callable[..., T]:
|
|
42
|
+
@wraps(func)
|
|
43
|
+
def wrapper(*args, **kwargs) -> T:
|
|
44
|
+
delay = initial_delay
|
|
45
|
+
last_exception = None
|
|
46
|
+
|
|
47
|
+
for attempt in range(1, max_attempts + 1):
|
|
48
|
+
try:
|
|
49
|
+
return func(*args, **kwargs)
|
|
50
|
+
except exceptions as e:
|
|
51
|
+
last_exception = e
|
|
52
|
+
if attempt == max_attempts:
|
|
53
|
+
logger.error(
|
|
54
|
+
f"All {max_attempts} attempts failed for {func.__name__}: {e}"
|
|
55
|
+
)
|
|
56
|
+
raise RetryError(
|
|
57
|
+
f"Failed after {max_attempts} attempts", last_exception
|
|
58
|
+
) from e
|
|
59
|
+
|
|
60
|
+
logger.warning(
|
|
61
|
+
f"Attempt {attempt}/{max_attempts} failed for {func.__name__}: {e}. "
|
|
62
|
+
f"Retrying in {delay:.1f}s..."
|
|
63
|
+
)
|
|
64
|
+
time.sleep(delay)
|
|
65
|
+
delay = min(delay * backoff_factor, max_delay)
|
|
66
|
+
|
|
67
|
+
# Should not reach here, but for type safety
|
|
68
|
+
raise RetryError(f"Failed after {max_attempts} attempts", last_exception)
|
|
69
|
+
|
|
70
|
+
return wrapper
|
|
71
|
+
|
|
72
|
+
return decorator
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
def is_retryable_error(error: Exception) -> bool:
|
|
76
|
+
"""Check if an error is retryable.
|
|
77
|
+
|
|
78
|
+
Args:
|
|
79
|
+
error: The exception to check
|
|
80
|
+
|
|
81
|
+
Returns:
|
|
82
|
+
True if the error is typically transient and retryable
|
|
83
|
+
"""
|
|
84
|
+
error_str = str(error).lower()
|
|
85
|
+
|
|
86
|
+
# Rate limit errors
|
|
87
|
+
if "rate" in error_str and "limit" in error_str:
|
|
88
|
+
return True
|
|
89
|
+
if "429" in error_str or "too many requests" in error_str:
|
|
90
|
+
return True
|
|
91
|
+
|
|
92
|
+
# Server errors
|
|
93
|
+
if "500" in error_str or "502" in error_str or "503" in error_str:
|
|
94
|
+
return True
|
|
95
|
+
if "internal" in error_str and "error" in error_str:
|
|
96
|
+
return True
|
|
97
|
+
|
|
98
|
+
# Connection errors
|
|
99
|
+
if "timeout" in error_str:
|
|
100
|
+
return True
|
|
101
|
+
if "connection" in error_str:
|
|
102
|
+
return True
|
|
103
|
+
|
|
104
|
+
return False
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|