ocrcontext 0.1.4__tar.gz → 0.1.5__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. {ocrcontext-0.1.4 → ocrcontext-0.1.5}/CHANGELOG.md +10 -1
  2. {ocrcontext-0.1.4 → ocrcontext-0.1.5}/PKG-INFO +1 -1
  3. {ocrcontext-0.1.4 → ocrcontext-0.1.5}/pyproject.toml +1 -1
  4. {ocrcontext-0.1.4 → ocrcontext-0.1.5}/src/ocrcontext/cli.py +25 -0
  5. {ocrcontext-0.1.4 → ocrcontext-0.1.5}/.gitignore +0 -0
  6. {ocrcontext-0.1.4 → ocrcontext-0.1.5}/LICENSE +0 -0
  7. {ocrcontext-0.1.4 → ocrcontext-0.1.5}/README.md +0 -0
  8. {ocrcontext-0.1.4 → ocrcontext-0.1.5}/examples/01_quickstart.py +0 -0
  9. {ocrcontext-0.1.4 → ocrcontext-0.1.5}/examples/02_refine_openai.py +0 -0
  10. {ocrcontext-0.1.4 → ocrcontext-0.1.5}/examples/03_structured_invoice.py +0 -0
  11. {ocrcontext-0.1.4 → ocrcontext-0.1.5}/examples/04_local_ollama.py +0 -0
  12. {ocrcontext-0.1.4 → ocrcontext-0.1.5}/examples/image_smoke_test.py +0 -0
  13. {ocrcontext-0.1.4 → ocrcontext-0.1.5}/examples/pdf_smoke_test.py +0 -0
  14. {ocrcontext-0.1.4 → ocrcontext-0.1.5}/examples/structured_smoke_test.py +0 -0
  15. {ocrcontext-0.1.4 → ocrcontext-0.1.5}/src/ocrcontext/__init__.py +0 -0
  16. {ocrcontext-0.1.4 → ocrcontext-0.1.5}/src/ocrcontext/analyzer.py +0 -0
  17. {ocrcontext-0.1.4 → ocrcontext-0.1.5}/src/ocrcontext/config.py +0 -0
  18. {ocrcontext-0.1.4 → ocrcontext-0.1.5}/src/ocrcontext/engines/__init__.py +0 -0
  19. {ocrcontext-0.1.4 → ocrcontext-0.1.5}/src/ocrcontext/engines/base.py +0 -0
  20. {ocrcontext-0.1.4 → ocrcontext-0.1.5}/src/ocrcontext/engines/handwriting.py +0 -0
  21. {ocrcontext-0.1.4 → ocrcontext-0.1.5}/src/ocrcontext/engines/paddle.py +0 -0
  22. {ocrcontext-0.1.4 → ocrcontext-0.1.5}/src/ocrcontext/engines/pdf_text.py +0 -0
  23. {ocrcontext-0.1.4 → ocrcontext-0.1.5}/src/ocrcontext/engines/registry.py +0 -0
  24. {ocrcontext-0.1.4 → ocrcontext-0.1.5}/src/ocrcontext/engines/trocr.py +0 -0
  25. {ocrcontext-0.1.4 → ocrcontext-0.1.5}/src/ocrcontext/engines/vision.py +0 -0
  26. {ocrcontext-0.1.4 → ocrcontext-0.1.5}/src/ocrcontext/exceptions.py +0 -0
  27. {ocrcontext-0.1.4 → ocrcontext-0.1.5}/src/ocrcontext/llm/__init__.py +0 -0
  28. {ocrcontext-0.1.4 → ocrcontext-0.1.5}/src/ocrcontext/llm/drift.py +0 -0
  29. {ocrcontext-0.1.4 → ocrcontext-0.1.5}/src/ocrcontext/llm/extractor.py +0 -0
  30. {ocrcontext-0.1.4 → ocrcontext-0.1.5}/src/ocrcontext/llm/formatting.py +0 -0
  31. {ocrcontext-0.1.4 → ocrcontext-0.1.5}/src/ocrcontext/llm/literal_preserve.py +0 -0
  32. {ocrcontext-0.1.4 → ocrcontext-0.1.5}/src/ocrcontext/llm/prompts.py +0 -0
  33. {ocrcontext-0.1.4 → ocrcontext-0.1.5}/src/ocrcontext/llm/refiner.py +0 -0
  34. {ocrcontext-0.1.4 → ocrcontext-0.1.5}/src/ocrcontext/llm/schemas.py +0 -0
  35. {ocrcontext-0.1.4 → ocrcontext-0.1.5}/src/ocrcontext/loaders.py +0 -0
  36. {ocrcontext-0.1.4 → ocrcontext-0.1.5}/src/ocrcontext/pipeline.py +0 -0
  37. {ocrcontext-0.1.4 → ocrcontext-0.1.5}/src/ocrcontext/preprocessing/__init__.py +0 -0
  38. {ocrcontext-0.1.4 → ocrcontext-0.1.5}/src/ocrcontext/preprocessing/image.py +0 -0
  39. {ocrcontext-0.1.4 → ocrcontext-0.1.5}/src/ocrcontext/py.typed +0 -0
  40. {ocrcontext-0.1.4 → ocrcontext-0.1.5}/src/ocrcontext/quality.py +0 -0
  41. {ocrcontext-0.1.4 → ocrcontext-0.1.5}/src/ocrcontext/schemas.py +0 -0
  42. {ocrcontext-0.1.4 → ocrcontext-0.1.5}/src/ocrcontext/types.py +0 -0
  43. {ocrcontext-0.1.4 → ocrcontext-0.1.5}/src/ocrcontext/utils/__init__.py +0 -0
  44. {ocrcontext-0.1.4 → ocrcontext-0.1.5}/src/ocrcontext/utils/files.py +0 -0
  45. {ocrcontext-0.1.4 → ocrcontext-0.1.5}/src/ocrcontext/utils/lang.py +0 -0
  46. {ocrcontext-0.1.4 → ocrcontext-0.1.5}/tests/__init__.py +0 -0
  47. {ocrcontext-0.1.4 → ocrcontext-0.1.5}/tests/conftest.py +0 -0
  48. {ocrcontext-0.1.4 → ocrcontext-0.1.5}/tests/test_cli.py +0 -0
  49. {ocrcontext-0.1.4 → ocrcontext-0.1.5}/tests/test_langchain_loader.py +0 -0
  50. {ocrcontext-0.1.4 → ocrcontext-0.1.5}/tests/test_literal_preserve.py +0 -0
  51. {ocrcontext-0.1.4 → ocrcontext-0.1.5}/tests/test_llm.py +0 -0
  52. {ocrcontext-0.1.4 → ocrcontext-0.1.5}/tests/test_pipeline_analyzer.py +0 -0
  53. {ocrcontext-0.1.4 → ocrcontext-0.1.5}/tests/test_schemas.py +0 -0
  54. {ocrcontext-0.1.4 → ocrcontext-0.1.5}/tests/test_text_helpers.py +0 -0
@@ -7,6 +7,14 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
7
7
 
8
8
  ## [Unreleased]
9
9
 
10
+ ## [0.1.5] - 2026-06-27
11
+
12
+ ### Fixed
13
+ - CLI now shows a clear error message when an LLM provider API key is missing
14
+ instead of a raw traceback (e.g. `OPENAI_API_KEY` not set).
15
+ - CLI prints a first-run warning before the OCR step when PaddleOCR models
16
+ have not been downloaded yet, so users know the ~90 MB download is expected.
17
+
10
18
  ## [0.1.4] - 2026-06-27
11
19
 
12
20
  ### Added
@@ -95,7 +103,8 @@ into a standalone, LLM-agnostic library.
95
103
  - **Packaging** — optional extras `[paddle]`, `[trocr]`, `[vision]`, `[all]`;
96
104
  PEP 561 typed (`py.typed`); examples and a GPU/network-free test suite.
97
105
 
98
- [Unreleased]: https://github.com/bahadirkarsli/ocrcontext/compare/v0.1.4...HEAD
106
+ [Unreleased]: https://github.com/bahadirkarsli/ocrcontext/compare/v0.1.5...HEAD
107
+ [0.1.5]: https://github.com/bahadirkarsli/ocrcontext/compare/v0.1.4...v0.1.5
99
108
  [0.1.4]: https://github.com/bahadirkarsli/ocrcontext/compare/v0.1.3...v0.1.4
100
109
  [0.1.3]: https://github.com/bahadirkarsli/ocrcontext/compare/v0.1.2...v0.1.3
101
110
  [0.1.2]: https://github.com/bahadirkarsli/ocrcontext/compare/v0.1.1...v0.1.2
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ocrcontext
3
- Version: 0.1.4
3
+ Version: 0.1.5
4
4
  Summary: Decoupled, LLM-agnostic document OCR + structured extraction. Vision and LLM parsing in 3 lines of code.
5
5
  Project-URL: Homepage, https://github.com/BahadirKarsli/OCRContext
6
6
  Project-URL: Repository, https://github.com/BahadirKarsli/OCRContext
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
4
4
 
5
5
  [project]
6
6
  name = "ocrcontext"
7
- version = "0.1.4"
7
+ version = "0.1.5"
8
8
  description = "Decoupled, LLM-agnostic document OCR + structured extraction. Vision and LLM parsing in 3 lines of code."
9
9
  readme = "README.md"
10
10
  license = { text = "MIT" }
@@ -116,6 +116,13 @@ _SCHEMA_NAMES = list(_SCHEMAS)
116
116
 
117
117
  def _build_llm(provider: str, model: str):
118
118
  """Dynamically import the right LangChain provider class."""
119
+ _API_KEY_HINTS = {
120
+ "openai": ("OPENAI_API_KEY", "platform.openai.com/api-keys"),
121
+ "anthropic": ("ANTHROPIC_API_KEY", "console.anthropic.com/settings/keys"),
122
+ "google": ("GOOGLE_API_KEY", "aistudio.google.com/apikey"),
123
+ "ollama": (None, None),
124
+ }
125
+
119
126
  try:
120
127
  if provider == "openai":
121
128
  from langchain_openai import ChatOpenAI # type: ignore[import-untyped]
@@ -136,6 +143,19 @@ def _build_llm(provider: str, model: str):
136
143
  err=True,
137
144
  )
138
145
  raise typer.Exit(code=1)
146
+ except Exception as exc:
147
+ msg = str(exc)
148
+ if "api_key" in msg.lower() or "credentials" in msg.lower() or "auth" in msg.lower():
149
+ env_var, url = _API_KEY_HINTS.get(provider, (None, None))
150
+ hint = f"Set it with: $env:{env_var} = \"...\"" if env_var else ""
151
+ url_hint = f"\nGet a key at: {url}" if url else ""
152
+ typer.echo(
153
+ f"[ERROR] No API key found for '{provider}'.\n{hint}{url_hint}",
154
+ err=True,
155
+ )
156
+ else:
157
+ typer.echo(f"[ERROR] Failed to initialize '{provider}': {exc}", err=True)
158
+ raise typer.Exit(code=1)
139
159
 
140
160
  typer.echo(
141
161
  f"[ERROR] Unknown provider '{provider}'. "
@@ -213,6 +233,11 @@ def extract(
213
233
 
214
234
  try:
215
235
  _info(f"file: {file_path.name}")
236
+
237
+ paddlex_cache = Path(os.environ.get("PADDLE_PDX_CACHE_HOME", Path.home() / ".paddlex"))
238
+ if not (paddlex_cache / "official_models").exists():
239
+ _info("first run: downloading OCR model (~90 MB), this may take a minute...")
240
+
216
241
  _info("OCR...")
217
242
 
218
243
  ocr_result = analyzer.analyze(
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes