ocrcontext 0.1.4__tar.gz → 0.1.5__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {ocrcontext-0.1.4 → ocrcontext-0.1.5}/CHANGELOG.md +10 -1
- {ocrcontext-0.1.4 → ocrcontext-0.1.5}/PKG-INFO +1 -1
- {ocrcontext-0.1.4 → ocrcontext-0.1.5}/pyproject.toml +1 -1
- {ocrcontext-0.1.4 → ocrcontext-0.1.5}/src/ocrcontext/cli.py +25 -0
- {ocrcontext-0.1.4 → ocrcontext-0.1.5}/.gitignore +0 -0
- {ocrcontext-0.1.4 → ocrcontext-0.1.5}/LICENSE +0 -0
- {ocrcontext-0.1.4 → ocrcontext-0.1.5}/README.md +0 -0
- {ocrcontext-0.1.4 → ocrcontext-0.1.5}/examples/01_quickstart.py +0 -0
- {ocrcontext-0.1.4 → ocrcontext-0.1.5}/examples/02_refine_openai.py +0 -0
- {ocrcontext-0.1.4 → ocrcontext-0.1.5}/examples/03_structured_invoice.py +0 -0
- {ocrcontext-0.1.4 → ocrcontext-0.1.5}/examples/04_local_ollama.py +0 -0
- {ocrcontext-0.1.4 → ocrcontext-0.1.5}/examples/image_smoke_test.py +0 -0
- {ocrcontext-0.1.4 → ocrcontext-0.1.5}/examples/pdf_smoke_test.py +0 -0
- {ocrcontext-0.1.4 → ocrcontext-0.1.5}/examples/structured_smoke_test.py +0 -0
- {ocrcontext-0.1.4 → ocrcontext-0.1.5}/src/ocrcontext/__init__.py +0 -0
- {ocrcontext-0.1.4 → ocrcontext-0.1.5}/src/ocrcontext/analyzer.py +0 -0
- {ocrcontext-0.1.4 → ocrcontext-0.1.5}/src/ocrcontext/config.py +0 -0
- {ocrcontext-0.1.4 → ocrcontext-0.1.5}/src/ocrcontext/engines/__init__.py +0 -0
- {ocrcontext-0.1.4 → ocrcontext-0.1.5}/src/ocrcontext/engines/base.py +0 -0
- {ocrcontext-0.1.4 → ocrcontext-0.1.5}/src/ocrcontext/engines/handwriting.py +0 -0
- {ocrcontext-0.1.4 → ocrcontext-0.1.5}/src/ocrcontext/engines/paddle.py +0 -0
- {ocrcontext-0.1.4 → ocrcontext-0.1.5}/src/ocrcontext/engines/pdf_text.py +0 -0
- {ocrcontext-0.1.4 → ocrcontext-0.1.5}/src/ocrcontext/engines/registry.py +0 -0
- {ocrcontext-0.1.4 → ocrcontext-0.1.5}/src/ocrcontext/engines/trocr.py +0 -0
- {ocrcontext-0.1.4 → ocrcontext-0.1.5}/src/ocrcontext/engines/vision.py +0 -0
- {ocrcontext-0.1.4 → ocrcontext-0.1.5}/src/ocrcontext/exceptions.py +0 -0
- {ocrcontext-0.1.4 → ocrcontext-0.1.5}/src/ocrcontext/llm/__init__.py +0 -0
- {ocrcontext-0.1.4 → ocrcontext-0.1.5}/src/ocrcontext/llm/drift.py +0 -0
- {ocrcontext-0.1.4 → ocrcontext-0.1.5}/src/ocrcontext/llm/extractor.py +0 -0
- {ocrcontext-0.1.4 → ocrcontext-0.1.5}/src/ocrcontext/llm/formatting.py +0 -0
- {ocrcontext-0.1.4 → ocrcontext-0.1.5}/src/ocrcontext/llm/literal_preserve.py +0 -0
- {ocrcontext-0.1.4 → ocrcontext-0.1.5}/src/ocrcontext/llm/prompts.py +0 -0
- {ocrcontext-0.1.4 → ocrcontext-0.1.5}/src/ocrcontext/llm/refiner.py +0 -0
- {ocrcontext-0.1.4 → ocrcontext-0.1.5}/src/ocrcontext/llm/schemas.py +0 -0
- {ocrcontext-0.1.4 → ocrcontext-0.1.5}/src/ocrcontext/loaders.py +0 -0
- {ocrcontext-0.1.4 → ocrcontext-0.1.5}/src/ocrcontext/pipeline.py +0 -0
- {ocrcontext-0.1.4 → ocrcontext-0.1.5}/src/ocrcontext/preprocessing/__init__.py +0 -0
- {ocrcontext-0.1.4 → ocrcontext-0.1.5}/src/ocrcontext/preprocessing/image.py +0 -0
- {ocrcontext-0.1.4 → ocrcontext-0.1.5}/src/ocrcontext/py.typed +0 -0
- {ocrcontext-0.1.4 → ocrcontext-0.1.5}/src/ocrcontext/quality.py +0 -0
- {ocrcontext-0.1.4 → ocrcontext-0.1.5}/src/ocrcontext/schemas.py +0 -0
- {ocrcontext-0.1.4 → ocrcontext-0.1.5}/src/ocrcontext/types.py +0 -0
- {ocrcontext-0.1.4 → ocrcontext-0.1.5}/src/ocrcontext/utils/__init__.py +0 -0
- {ocrcontext-0.1.4 → ocrcontext-0.1.5}/src/ocrcontext/utils/files.py +0 -0
- {ocrcontext-0.1.4 → ocrcontext-0.1.5}/src/ocrcontext/utils/lang.py +0 -0
- {ocrcontext-0.1.4 → ocrcontext-0.1.5}/tests/__init__.py +0 -0
- {ocrcontext-0.1.4 → ocrcontext-0.1.5}/tests/conftest.py +0 -0
- {ocrcontext-0.1.4 → ocrcontext-0.1.5}/tests/test_cli.py +0 -0
- {ocrcontext-0.1.4 → ocrcontext-0.1.5}/tests/test_langchain_loader.py +0 -0
- {ocrcontext-0.1.4 → ocrcontext-0.1.5}/tests/test_literal_preserve.py +0 -0
- {ocrcontext-0.1.4 → ocrcontext-0.1.5}/tests/test_llm.py +0 -0
- {ocrcontext-0.1.4 → ocrcontext-0.1.5}/tests/test_pipeline_analyzer.py +0 -0
- {ocrcontext-0.1.4 → ocrcontext-0.1.5}/tests/test_schemas.py +0 -0
- {ocrcontext-0.1.4 → ocrcontext-0.1.5}/tests/test_text_helpers.py +0 -0
|
@@ -7,6 +7,14 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
|
|
7
7
|
|
|
8
8
|
## [Unreleased]
|
|
9
9
|
|
|
10
|
+
## [0.1.5] - 2026-06-27
|
|
11
|
+
|
|
12
|
+
### Fixed
|
|
13
|
+
- CLI now shows a clear error message when an LLM provider API key is missing
|
|
14
|
+
instead of a raw traceback (e.g. `OPENAI_API_KEY` not set).
|
|
15
|
+
- CLI prints a first-run warning before the OCR step when PaddleOCR models
|
|
16
|
+
have not been downloaded yet, so users know the ~90 MB download is expected.
|
|
17
|
+
|
|
10
18
|
## [0.1.4] - 2026-06-27
|
|
11
19
|
|
|
12
20
|
### Added
|
|
@@ -95,7 +103,8 @@ into a standalone, LLM-agnostic library.
|
|
|
95
103
|
- **Packaging** — optional extras `[paddle]`, `[trocr]`, `[vision]`, `[all]`;
|
|
96
104
|
PEP 561 typed (`py.typed`); examples and a GPU/network-free test suite.
|
|
97
105
|
|
|
98
|
-
[Unreleased]: https://github.com/bahadirkarsli/ocrcontext/compare/v0.1.
|
|
106
|
+
[Unreleased]: https://github.com/bahadirkarsli/ocrcontext/compare/v0.1.5...HEAD
|
|
107
|
+
[0.1.5]: https://github.com/bahadirkarsli/ocrcontext/compare/v0.1.4...v0.1.5
|
|
99
108
|
[0.1.4]: https://github.com/bahadirkarsli/ocrcontext/compare/v0.1.3...v0.1.4
|
|
100
109
|
[0.1.3]: https://github.com/bahadirkarsli/ocrcontext/compare/v0.1.2...v0.1.3
|
|
101
110
|
[0.1.2]: https://github.com/bahadirkarsli/ocrcontext/compare/v0.1.1...v0.1.2
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: ocrcontext
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.5
|
|
4
4
|
Summary: Decoupled, LLM-agnostic document OCR + structured extraction. Vision and LLM parsing in 3 lines of code.
|
|
5
5
|
Project-URL: Homepage, https://github.com/BahadirKarsli/OCRContext
|
|
6
6
|
Project-URL: Repository, https://github.com/BahadirKarsli/OCRContext
|
|
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "ocrcontext"
|
|
7
|
-
version = "0.1.
|
|
7
|
+
version = "0.1.5"
|
|
8
8
|
description = "Decoupled, LLM-agnostic document OCR + structured extraction. Vision and LLM parsing in 3 lines of code."
|
|
9
9
|
readme = "README.md"
|
|
10
10
|
license = { text = "MIT" }
|
|
@@ -116,6 +116,13 @@ _SCHEMA_NAMES = list(_SCHEMAS)
|
|
|
116
116
|
|
|
117
117
|
def _build_llm(provider: str, model: str):
|
|
118
118
|
"""Dynamically import the right LangChain provider class."""
|
|
119
|
+
_API_KEY_HINTS = {
|
|
120
|
+
"openai": ("OPENAI_API_KEY", "platform.openai.com/api-keys"),
|
|
121
|
+
"anthropic": ("ANTHROPIC_API_KEY", "console.anthropic.com/settings/keys"),
|
|
122
|
+
"google": ("GOOGLE_API_KEY", "aistudio.google.com/apikey"),
|
|
123
|
+
"ollama": (None, None),
|
|
124
|
+
}
|
|
125
|
+
|
|
119
126
|
try:
|
|
120
127
|
if provider == "openai":
|
|
121
128
|
from langchain_openai import ChatOpenAI # type: ignore[import-untyped]
|
|
@@ -136,6 +143,19 @@ def _build_llm(provider: str, model: str):
|
|
|
136
143
|
err=True,
|
|
137
144
|
)
|
|
138
145
|
raise typer.Exit(code=1)
|
|
146
|
+
except Exception as exc:
|
|
147
|
+
msg = str(exc)
|
|
148
|
+
if "api_key" in msg.lower() or "credentials" in msg.lower() or "auth" in msg.lower():
|
|
149
|
+
env_var, url = _API_KEY_HINTS.get(provider, (None, None))
|
|
150
|
+
hint = f"Set it with: $env:{env_var} = \"...\"" if env_var else ""
|
|
151
|
+
url_hint = f"\nGet a key at: {url}" if url else ""
|
|
152
|
+
typer.echo(
|
|
153
|
+
f"[ERROR] No API key found for '{provider}'.\n{hint}{url_hint}",
|
|
154
|
+
err=True,
|
|
155
|
+
)
|
|
156
|
+
else:
|
|
157
|
+
typer.echo(f"[ERROR] Failed to initialize '{provider}': {exc}", err=True)
|
|
158
|
+
raise typer.Exit(code=1)
|
|
139
159
|
|
|
140
160
|
typer.echo(
|
|
141
161
|
f"[ERROR] Unknown provider '{provider}'. "
|
|
@@ -213,6 +233,11 @@ def extract(
|
|
|
213
233
|
|
|
214
234
|
try:
|
|
215
235
|
_info(f"file: {file_path.name}")
|
|
236
|
+
|
|
237
|
+
paddlex_cache = Path(os.environ.get("PADDLE_PDX_CACHE_HOME", Path.home() / ".paddlex"))
|
|
238
|
+
if not (paddlex_cache / "official_models").exists():
|
|
239
|
+
_info("first run: downloading OCR model (~90 MB), this may take a minute...")
|
|
240
|
+
|
|
216
241
|
_info("OCR...")
|
|
217
242
|
|
|
218
243
|
ocr_result = analyzer.analyze(
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|