gemini-ocr-cli 0.3.0__tar.gz → 0.3.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (27) hide show
  1. {gemini_ocr_cli-0.3.0 → gemini_ocr_cli-0.3.2}/.env.example +2 -2
  2. {gemini_ocr_cli-0.3.0 → gemini_ocr_cli-0.3.2}/PKG-INFO +15 -3
  3. {gemini_ocr_cli-0.3.0 → gemini_ocr_cli-0.3.2}/README.md +14 -2
  4. {gemini_ocr_cli-0.3.0 → gemini_ocr_cli-0.3.2}/gemini_ocr/__init__.py +1 -1
  5. {gemini_ocr_cli-0.3.0 → gemini_ocr_cli-0.3.2}/gemini_ocr/cli.py +11 -8
  6. {gemini_ocr_cli-0.3.0 → gemini_ocr_cli-0.3.2}/gemini_ocr/config.py +1 -1
  7. {gemini_ocr_cli-0.3.0 → gemini_ocr_cli-0.3.2}/gemini_ocr/processor.py +70 -18
  8. gemini_ocr_cli-0.3.2/gemini_ocr/retry.py +104 -0
  9. {gemini_ocr_cli-0.3.0 → gemini_ocr_cli-0.3.2}/gemini_ocr/utils.py +4 -2
  10. {gemini_ocr_cli-0.3.0 → gemini_ocr_cli-0.3.2}/pyproject.toml +1 -1
  11. {gemini_ocr_cli-0.3.0 → gemini_ocr_cli-0.3.2}/tests/conftest.py +1 -1
  12. {gemini_ocr_cli-0.3.0 → gemini_ocr_cli-0.3.2}/tests/test_config.py +1 -1
  13. {gemini_ocr_cli-0.3.0 → gemini_ocr_cli-0.3.2}/tests/test_metadata.py +3 -3
  14. {gemini_ocr_cli-0.3.0 → gemini_ocr_cli-0.3.2}/.github/workflows/ci.yml +0 -0
  15. {gemini_ocr_cli-0.3.0 → gemini_ocr_cli-0.3.2}/.gitignore +0 -0
  16. {gemini_ocr_cli-0.3.0 → gemini_ocr_cli-0.3.2}/.pre-commit-config.yaml +0 -0
  17. {gemini_ocr_cli-0.3.0 → gemini_ocr_cli-0.3.2}/CHANGELOG.md +0 -0
  18. {gemini_ocr_cli-0.3.0 → gemini_ocr_cli-0.3.2}/LICENSE +0 -0
  19. {gemini_ocr_cli-0.3.0 → gemini_ocr_cli-0.3.2}/gemini_ocr/__main__.py +0 -0
  20. {gemini_ocr_cli-0.3.0 → gemini_ocr_cli-0.3.2}/gemini_ocr/metadata.py +0 -0
  21. {gemini_ocr_cli-0.3.0 → gemini_ocr_cli-0.3.2}/tests/__init__.py +0 -0
  22. {gemini_ocr_cli-0.3.0 → gemini_ocr_cli-0.3.2}/tests/test_cli.py +0 -0
  23. {gemini_ocr_cli-0.3.0 → gemini_ocr_cli-0.3.2}/tests/test_import.py +0 -0
  24. {gemini_ocr_cli-0.3.0 → gemini_ocr_cli-0.3.2}/tests/test_integration.py +0 -0
  25. {gemini_ocr_cli-0.3.0 → gemini_ocr_cli-0.3.2}/tests/test_processor.py +0 -0
  26. {gemini_ocr_cli-0.3.0 → gemini_ocr_cli-0.3.2}/tests/test_utils.py +0 -0
  27. {gemini_ocr_cli-0.3.0 → gemini_ocr_cli-0.3.2}/uv.lock +0 -0
@@ -5,8 +5,8 @@
5
5
  # Get one at: https://aistudio.google.com/apikey
6
6
  GEMINI_API_KEY=your-api-key-here
7
7
 
8
- # Optional: Model to use (default: gemini-3.1-flash-lite-preview)
9
- # GEMINI_MODEL=gemini-3.1-flash-lite-preview
8
+ # Optional: Model to use (default: gemini-3-flash-preview)
9
+ # GEMINI_MODEL=gemini-3-flash-preview
10
10
 
11
11
  # Optional: Maximum file size in MB (default: 50)
12
12
  # GEMINI_MAX_FILE_SIZE_MB=50
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: gemini-ocr-cli
3
- Version: 0.3.0
3
+ Version: 0.3.2
4
4
  Summary: CLI tool for OCR processing using Google Gemini's vision capabilities
5
5
  Project-URL: Homepage, https://github.com/r-uben/gemini-ocr-cli
6
6
  Project-URL: Repository, https://github.com/r-uben/gemini-ocr-cli
@@ -45,6 +45,18 @@ Description-Content-Type: text/markdown
45
45
 
46
46
  A command-line tool for OCR processing using Google Gemini's vision capabilities. Process PDFs and images to extract text, tables, equations, and figures.
47
47
 
48
+ ## Choosing an OCR tool
49
+
50
+ This is one of five OCR CLI tools with a shared design: clean Markdown output, batch processing, and figure extraction. Pick based on your constraints:
51
+
52
+ | Tool | Engine | Runs | Cost | Best for |
53
+ |------|--------|------|------|----------|
54
+ | [deepseek-ocr-cli](https://github.com/r-uben/deepseek-ocr-cli) | DeepSeek vision | Local (Ollama / vLLM) | Free | General-purpose local OCR with multi-backend flexibility |
55
+ | **gemini-ocr-cli** (this repo) | Google Gemini | Cloud API | Free tier / Pay-per-use | Fast cloud OCR with concurrent processing |
56
+ | [marker-ocr-cli](https://github.com/r-uben/marker-ocr-cli) | Marker (Surya + Texify) | Local | Free | Academic papers with equations, tables, complex layouts |
57
+ | [mistral-ocr-cli](https://github.com/r-uben/mistral-ocr-cli) | Mistral OCR API | Cloud API | ~$1/1k pages | Structured extraction (tables, headers, footers) |
58
+ | [nougat-ocr-cli](https://github.com/r-uben/nougat-ocr-cli) | Meta Nougat | Local (GPU) | Free | Academic papers, GPU-accelerated batch processing |
59
+
48
60
  ## Installation
49
61
 
50
62
  Requires Python 3.11+ and a [Google Gemini API key](https://aistudio.google.com/apikey).
@@ -88,7 +100,7 @@ Usage: gemini-ocr [OPTIONS] INPUT_PATH
88
100
  Options:
89
101
  -o, --output-dir PATH Output directory (default: <input_dir>/gemini_ocr_output/)
90
102
  --api-key TEXT Gemini API key (or set GEMINI_API_KEY env var)
91
- --model TEXT Model to use (default: gemini-3.1-flash-lite-preview)
103
+ --model TEXT Model to use (default: gemini-3-flash-preview)
92
104
  --task [convert|extract|table|describe_figure]
93
105
  OCR task type (default: convert)
94
106
  --prompt TEXT Custom prompt for OCR processing
@@ -136,7 +148,7 @@ All CLI options can also be set via environment variables or a `.env` file:
136
148
  | CLI flag | Environment variable | Default |
137
149
  |----------|---------------------|---------|
138
150
  | `--api-key` | `GEMINI_API_KEY` | (required) |
139
- | `--model` | `GEMINI_MODEL` | `gemini-3.1-flash-lite-preview` |
151
+ | `--model` | `GEMINI_MODEL` | `gemini-3-flash-preview` |
140
152
  | `--include-images` | `GEMINI_INCLUDE_IMAGES` | `true` |
141
153
  | `--save-originals` | `GEMINI_SAVE_ORIGINAL_IMAGES` | `true` |
142
154
  | `--workers` | `GEMINI_MAX_WORKERS` | `1` |
@@ -7,6 +7,18 @@
7
7
 
8
8
  A command-line tool for OCR processing using Google Gemini's vision capabilities. Process PDFs and images to extract text, tables, equations, and figures.
9
9
 
10
+ ## Choosing an OCR tool
11
+
12
+ This is one of five OCR CLI tools with a shared design: clean Markdown output, batch processing, and figure extraction. Pick based on your constraints:
13
+
14
+ | Tool | Engine | Runs | Cost | Best for |
15
+ |------|--------|------|------|----------|
16
+ | [deepseek-ocr-cli](https://github.com/r-uben/deepseek-ocr-cli) | DeepSeek vision | Local (Ollama / vLLM) | Free | General-purpose local OCR with multi-backend flexibility |
17
+ | **gemini-ocr-cli** (this repo) | Google Gemini | Cloud API | Free tier / Pay-per-use | Fast cloud OCR with concurrent processing |
18
+ | [marker-ocr-cli](https://github.com/r-uben/marker-ocr-cli) | Marker (Surya + Texify) | Local | Free | Academic papers with equations, tables, complex layouts |
19
+ | [mistral-ocr-cli](https://github.com/r-uben/mistral-ocr-cli) | Mistral OCR API | Cloud API | ~$1/1k pages | Structured extraction (tables, headers, footers) |
20
+ | [nougat-ocr-cli](https://github.com/r-uben/nougat-ocr-cli) | Meta Nougat | Local (GPU) | Free | Academic papers, GPU-accelerated batch processing |
21
+
10
22
  ## Installation
11
23
 
12
24
  Requires Python 3.11+ and a [Google Gemini API key](https://aistudio.google.com/apikey).
@@ -50,7 +62,7 @@ Usage: gemini-ocr [OPTIONS] INPUT_PATH
50
62
  Options:
51
63
  -o, --output-dir PATH Output directory (default: <input_dir>/gemini_ocr_output/)
52
64
  --api-key TEXT Gemini API key (or set GEMINI_API_KEY env var)
53
- --model TEXT Model to use (default: gemini-3.1-flash-lite-preview)
65
+ --model TEXT Model to use (default: gemini-3-flash-preview)
54
66
  --task [convert|extract|table|describe_figure]
55
67
  OCR task type (default: convert)
56
68
  --prompt TEXT Custom prompt for OCR processing
@@ -98,7 +110,7 @@ All CLI options can also be set via environment variables or a `.env` file:
98
110
  | CLI flag | Environment variable | Default |
99
111
  |----------|---------------------|---------|
100
112
  | `--api-key` | `GEMINI_API_KEY` | (required) |
101
- | `--model` | `GEMINI_MODEL` | `gemini-3.1-flash-lite-preview` |
113
+ | `--model` | `GEMINI_MODEL` | `gemini-3-flash-preview` |
102
114
  | `--include-images` | `GEMINI_INCLUDE_IMAGES` | `true` |
103
115
  | `--save-originals` | `GEMINI_SAVE_ORIGINAL_IMAGES` | `true` |
104
116
  | `--workers` | `GEMINI_MAX_WORKERS` | `1` |
@@ -1,6 +1,6 @@
1
1
  """Gemini OCR CLI - Document processing using Google Gemini's vision capabilities."""
2
2
 
3
- __version__ = "0.3.0"
3
+ __version__ = "0.3.2"
4
4
 
5
5
  from gemini_ocr.config import Config
6
6
  from gemini_ocr.processor import OCRProcessor
@@ -23,7 +23,8 @@ from gemini_ocr.utils import (
23
23
  console = Console()
24
24
 
25
25
  # Get original working directory if set (for wrapper scripts)
26
- ORIGINAL_CWD = os.environ.get("GEMINI_OCR_CWD", os.getcwd())
26
+ _cwd_override = os.environ.get("GEMINI_OCR_CWD", "")
27
+ ORIGINAL_CWD = _cwd_override if _cwd_override and Path(_cwd_override).is_absolute() else os.getcwd()
27
28
 
28
29
 
29
30
  def _resolve_path(path: Path) -> Path:
@@ -50,8 +51,8 @@ def _resolve_path(path: Path) -> Path:
50
51
  @click.option(
51
52
  "--model",
52
53
  type=str,
53
- default="gemini-3.1-flash-lite-preview",
54
- help="Gemini model to use (default: gemini-3.1-flash-lite-preview)",
54
+ default="gemini-3-flash-preview",
55
+ help="Gemini model to use (default: gemini-3-flash-preview)",
55
56
  )
56
57
  @click.option(
57
58
  "--task",
@@ -174,10 +175,12 @@ def cli(
174
175
  if env_file:
175
176
  config = Config.from_env(env_file)
176
177
  else:
177
- if api_key:
178
- os.environ["GEMINI_API_KEY"] = api_key
179
178
  config = Config.from_env()
180
179
 
180
+ # Pass CLI api_key directly to config (don't pollute os.environ)
181
+ if api_key:
182
+ config.api_key = api_key
183
+
181
184
  # Override with CLI options
182
185
  config.model = model
183
186
  config.include_images = include_images
@@ -213,7 +216,7 @@ def cli(
213
216
  if verbose:
214
217
  import traceback
215
218
 
216
- traceback.print_exc()
219
+ traceback.print_exc(file=sys.stderr)
217
220
  sys.exit(1)
218
221
 
219
222
 
@@ -260,9 +263,9 @@ def _show_info(api_key: str | None = None) -> None:
260
263
  console.print()
261
264
 
262
265
  try:
263
- if api_key:
264
- os.environ["GEMINI_API_KEY"] = api_key
265
266
  config = Config.from_env()
267
+ if api_key:
268
+ config.api_key = api_key
266
269
 
267
270
  config_table = Table(title="Configuration")
268
271
  config_table.add_column("Setting", style="cyan")
@@ -42,7 +42,7 @@ class Config(BaseSettings):
42
42
 
43
43
  # Model Configuration
44
44
  model: str = Field(
45
- default="gemini-3.1-flash-lite-preview",
45
+ default="gemini-3-flash-preview",
46
46
  description="Gemini model to use for OCR",
47
47
  )
48
48
 
@@ -2,6 +2,7 @@
2
2
 
3
3
  import io
4
4
  import logging
5
+ import re
5
6
  import shutil
6
7
  import threading
7
8
  import time
@@ -35,15 +36,12 @@ console = Console()
35
36
 
36
37
  # OCR prompts for different tasks
37
38
  OCR_PROMPTS = {
38
- "convert": """Extract all text from this document and convert it to clean markdown format.
39
-
40
- Rules:
41
- - Preserve the document structure (headings, paragraphs, lists, tables)
42
- - Convert tables to markdown table format
43
- - Preserve mathematical equations in LaTeX format where possible
44
- - Include figure/image captions if present
45
- - Do not describe images, just note their presence as [Figure X] or [Image]
46
- - Output ONLY the extracted text in markdown, no commentary""",
39
+ "convert": """Convert this document into well-structured markdown.
40
+
41
+ - Maintain headings, paragraphs, lists, and tables (use markdown table format).
42
+ - Represent equations in LaTeX syntax.
43
+ - Preserve figure captions as [Figure N: <caption>]. Do not describe figure contents.
44
+ - Output only the resulting markdown, no commentary.""",
47
45
  "extract": """Extract all visible text from this document exactly as it appears.
48
46
  Output only the extracted text, preserving line breaks and spacing.""",
49
47
  "describe_figure": """Analyze this figure/chart/diagram in detail:
@@ -101,23 +99,67 @@ class OCRProcessor:
101
99
  error_str = str(error).lower()
102
100
  return "429" in error_str or "rate limit" in error_str or "quota" in error_str
103
101
 
102
+ # Gemini 3.x Flash models use thinking architecture and need explicit config
103
+ # to avoid empty responses (thinking stalls at low temperature).
104
+ # Does NOT match: gemini-2.x (different thinking API), gemini-3-pro (not Flash)
105
+ _GEMINI_3_FLASH_RE = re.compile(r"gemini-3(?:\.\d+)?-flash")
106
+
107
+ def _build_generation_config(self) -> types.GenerateContentConfig:
108
+ """Build GenerateContentConfig, adding thinking config for Gemini 3 Flash models."""
109
+ kwargs: dict[str, Any] = {"temperature": 0.1}
110
+
111
+ if self._GEMINI_3_FLASH_RE.search(self.model_name):
112
+ kwargs["thinking_config"] = types.ThinkingConfig(
113
+ thinking_level="MINIMAL",
114
+ )
115
+
116
+ return types.GenerateContentConfig(**kwargs)
117
+
118
+ @staticmethod
119
+ def _extract_text(response: Any) -> str:
120
+ """Extract text from a GenerateContentResponse by walking parts explicitly.
121
+
122
+ The `.text` shortcut returns None when parts include thought summaries,
123
+ non-text parts, or when finish_reason != STOP — which is common with
124
+ Gemini 3.x thinking models. Walking parts is the reliable path.
125
+ """
126
+ candidates = getattr(response, "candidates", None) or []
127
+ if not candidates:
128
+ feedback = getattr(response, "prompt_feedback", None)
129
+ raise RuntimeError(f"Empty response: no candidates (prompt_feedback={feedback})")
130
+
131
+ candidate = candidates[0]
132
+ content = getattr(candidate, "content", None)
133
+ parts = getattr(content, "parts", None) or []
134
+ text = "".join(
135
+ p.text for p in parts if getattr(p, "text", None) and not getattr(p, "thought", False)
136
+ ).strip()
137
+
138
+ if not text:
139
+ finish = getattr(candidate, "finish_reason", None)
140
+ safety = getattr(candidate, "safety_ratings", None)
141
+ part_types = [type(p).__name__ for p in parts]
142
+ raise RuntimeError(
143
+ f"Empty response: finish_reason={finish}, "
144
+ f"len(parts)={len(parts)}, part_types={part_types}, "
145
+ f"safety_ratings={safety}"
146
+ )
147
+ return text
148
+
104
149
  def _call_with_retry(self, contents: list[Any], prompt: str) -> str:
105
150
  """Call generate_content with exponential backoff on transient errors."""
106
151
  max_attempts = self.config.max_retries + 1
107
152
  base_delay = self.config.retry_base_delay
153
+ config = self._build_generation_config()
108
154
 
109
155
  for attempt in range(max_attempts):
110
156
  try:
111
157
  response = self.client.models.generate_content(
112
158
  model=self.model_name,
113
159
  contents=[prompt, *contents],
114
- config=types.GenerateContentConfig(
115
- temperature=0.1,
116
- ),
160
+ config=config,
117
161
  )
118
- if response.text:
119
- return response.text.strip()
120
- return ""
162
+ return self._extract_text(response)
121
163
  except Exception as e:
122
164
  is_last = attempt == max_attempts - 1
123
165
  if is_last or not self._is_retryable(e):
@@ -204,6 +246,7 @@ class OCRProcessor:
204
246
  start_time = time.time()
205
247
  self.config.validate_file_size(pdf_path)
206
248
 
249
+ uploaded_file = None
207
250
  try:
208
251
  if show_progress and not self.config.quiet:
209
252
  with Progress(
@@ -245,6 +288,13 @@ class OCRProcessor:
245
288
  error=str(e),
246
289
  processing_time=time.time() - start_time,
247
290
  )
291
+ finally:
292
+ # Clean up uploaded file from Gemini Files API (48hr retention)
293
+ if uploaded_file is not None:
294
+ try:
295
+ self.client.files.delete(name=uploaded_file.name)
296
+ except Exception as del_err:
297
+ logger.debug(f"Failed to delete uploaded file: {del_err}")
248
298
 
249
299
  def process_file(
250
300
  self,
@@ -281,9 +331,11 @@ class OCRProcessor:
281
331
  shutil.copy2(result.file_path, original_output)
282
332
 
283
333
  # Write clean markdown — just the OCR text, no headers
284
- markdown_path.write_text(
285
- result.text if result.success else f"*[OCR Failed: {result.error}]*", encoding="utf-8"
286
- )
334
+ if result.success:
335
+ markdown_path.write_text(result.text, encoding="utf-8")
336
+ else:
337
+ # Sanitize error: don't leak raw exception details to output files
338
+ markdown_path.write_text("*[OCR Failed]*", encoding="utf-8")
287
339
 
288
340
  # Save extracted images
289
341
  if result.extracted_images and self.config.include_images:
@@ -0,0 +1,104 @@
1
+ """Retry logic with exponential backoff for API calls."""
2
+
3
+ import logging
4
+ import time
5
+ from functools import wraps
6
+ from typing import Callable, Tuple, Type, TypeVar
7
+
8
+ logger = logging.getLogger(__name__)
9
+
10
+ T = TypeVar("T")
11
+
12
+
13
+ class RetryError(Exception):
14
+ """Raised when all retry attempts are exhausted."""
15
+
16
+ def __init__(self, message: str, last_exception: Exception):
17
+ super().__init__(message)
18
+ self.last_exception = last_exception
19
+
20
+
21
+ def retry(
22
+ max_attempts: int = 3,
23
+ backoff_factor: float = 2.0,
24
+ initial_delay: float = 1.0,
25
+ max_delay: float = 60.0,
26
+ exceptions: Tuple[Type[Exception], ...] = (Exception,),
27
+ ) -> Callable[[Callable[..., T]], Callable[..., T]]:
28
+ """Decorator for retrying functions with exponential backoff.
29
+
30
+ Args:
31
+ max_attempts: Maximum number of attempts (including first try)
32
+ backoff_factor: Multiplier for delay between retries
33
+ initial_delay: Initial delay in seconds
34
+ max_delay: Maximum delay in seconds
35
+ exceptions: Tuple of exception types to catch and retry
36
+
37
+ Returns:
38
+ Decorated function with retry logic
39
+ """
40
+
41
+ def decorator(func: Callable[..., T]) -> Callable[..., T]:
42
+ @wraps(func)
43
+ def wrapper(*args, **kwargs) -> T:
44
+ delay = initial_delay
45
+ last_exception = None
46
+
47
+ for attempt in range(1, max_attempts + 1):
48
+ try:
49
+ return func(*args, **kwargs)
50
+ except exceptions as e:
51
+ last_exception = e
52
+ if attempt == max_attempts:
53
+ logger.error(
54
+ f"All {max_attempts} attempts failed for {func.__name__}: {e}"
55
+ )
56
+ raise RetryError(
57
+ f"Failed after {max_attempts} attempts", last_exception
58
+ ) from e
59
+
60
+ logger.warning(
61
+ f"Attempt {attempt}/{max_attempts} failed for {func.__name__}: {e}. "
62
+ f"Retrying in {delay:.1f}s..."
63
+ )
64
+ time.sleep(delay)
65
+ delay = min(delay * backoff_factor, max_delay)
66
+
67
+ # Should not reach here, but for type safety
68
+ raise RetryError(f"Failed after {max_attempts} attempts", last_exception)
69
+
70
+ return wrapper
71
+
72
+ return decorator
73
+
74
+
75
+ def is_retryable_error(error: Exception) -> bool:
76
+ """Check if an error is retryable.
77
+
78
+ Args:
79
+ error: The exception to check
80
+
81
+ Returns:
82
+ True if the error is typically transient and retryable
83
+ """
84
+ error_str = str(error).lower()
85
+
86
+ # Rate limit errors
87
+ if "rate" in error_str and "limit" in error_str:
88
+ return True
89
+ if "429" in error_str or "too many requests" in error_str:
90
+ return True
91
+
92
+ # Server errors
93
+ if "500" in error_str or "502" in error_str or "503" in error_str:
94
+ return True
95
+ if "internal" in error_str and "error" in error_str:
96
+ return True
97
+
98
+ # Connection errors
99
+ if "timeout" in error_str:
100
+ return True
101
+ if "connection" in error_str:
102
+ return True
103
+
104
+ return False
@@ -56,10 +56,12 @@ def get_supported_files(directory: Path, recursive: bool = True) -> list[Path]:
56
56
 
57
57
  def sanitize_filename(filename: str, max_length: int | None = 200) -> str:
58
58
  """Sanitize filename for safe filesystem usage."""
59
- sanitized = re.sub(r'[<>:"/\\|?*]', "_", filename)
59
+ # Strip null bytes and leading dots (prevent hidden files / path tricks)
60
+ sanitized = filename.replace("\x00", "")
61
+ sanitized = re.sub(r'[<>:"/\\|?*]', "_", sanitized)
60
62
  sanitized = re.sub(r"\s+", "_", sanitized)
61
63
  sanitized = re.sub(r"_+", "_", sanitized)
62
- sanitized = sanitized.strip("_")
64
+ sanitized = sanitized.strip("_.")
63
65
  if max_length and len(sanitized) > max_length:
64
66
  sanitized = sanitized[:max_length]
65
67
  return sanitized or "unnamed"
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "gemini-ocr-cli"
3
- version = "0.3.0"
3
+ version = "0.3.2"
4
4
  description = "CLI tool for OCR processing using Google Gemini's vision capabilities"
5
5
  authors = [
6
6
  {name = "Ruben Fernandez-Fuertes", email = "fernandezfuertesruben@gmail.com"}
@@ -66,7 +66,7 @@ def mock_config():
66
66
  with patch.dict(os.environ, {"GEMINI_API_KEY": "test-api-key"}):
67
67
  config = Config()
68
68
  config.api_key = "test-api-key"
69
- config.model = "gemini-3.1-flash-lite-preview"
69
+ config.model = "gemini-3-flash-preview"
70
70
  config.verbose = False
71
71
  config.quiet = False
72
72
  config.max_workers = 1
@@ -55,7 +55,7 @@ class TestConfigDefaults:
55
55
  def test_default_model(self):
56
56
  with patch.dict(os.environ, {"GEMINI_API_KEY": "test"}, clear=True):
57
57
  config = Config()
58
- assert config.model == "gemini-3.1-flash-lite-preview"
58
+ assert config.model == "gemini-3-flash-preview"
59
59
 
60
60
  def test_default_max_file_size(self):
61
61
  with patch.dict(os.environ, {"GEMINI_API_KEY": "test"}, clear=True):
@@ -40,7 +40,7 @@ class TestMetadataManager:
40
40
  f.write_bytes(b"fake pdf content")
41
41
 
42
42
  meta = MetadataManager(tmp_path)
43
- meta.record(f, processing_time=1.5, model="gemini-3.1-flash-lite-preview", output_path="test/test.md")
43
+ meta.record(f, processing_time=1.5, model="gemini-3-flash-preview", output_path="test/test.md")
44
44
 
45
45
  assert meta.is_processed(f)
46
46
 
@@ -98,12 +98,12 @@ class TestMetadataManager:
98
98
  f.write_bytes(b"data")
99
99
 
100
100
  meta = MetadataManager(tmp_path)
101
- meta.record(f, processing_time=2.5, model="gemini-3.1-flash-lite-preview", output_path="test/test.md")
101
+ meta.record(f, processing_time=2.5, model="gemini-3-flash-preview", output_path="test/test.md")
102
102
 
103
103
  entry = meta.files["test.pdf"]
104
104
  assert entry["status"] == "completed"
105
105
  assert entry["processing_time"] == 2.5
106
- assert entry["model"] == "gemini-3.1-flash-lite-preview"
106
+ assert entry["model"] == "gemini-3-flash-preview"
107
107
  assert entry["output_path"] == "test/test.md"
108
108
  assert "checksum" in entry
109
109
  assert "timestamp" in entry
File without changes
File without changes