extractforms 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (27) hide show
  1. extractforms-0.1.0/.gitignore +277 -0
  2. extractforms-0.1.0/LICENSE +22 -0
  3. extractforms-0.1.0/PKG-INFO +78 -0
  4. extractforms-0.1.0/README.md +51 -0
  5. extractforms-0.1.0/docs/adr/README.md +17 -0
  6. extractforms-0.1.0/docs/engineering/README.md +7 -0
  7. extractforms-0.1.0/pyproject.toml +90 -0
  8. extractforms-0.1.0/src/extractforms/__init__.py +32 -0
  9. extractforms-0.1.0/src/extractforms/_bootstrap.py +10 -0
  10. extractforms-0.1.0/src/extractforms/async_runner.py +64 -0
  11. extractforms-0.1.0/src/extractforms/backends/__init__.py +7 -0
  12. extractforms-0.1.0/src/extractforms/backends/multimodal_openai.py +254 -0
  13. extractforms-0.1.0/src/extractforms/backends/ocr_document_intelligence.py +48 -0
  14. extractforms-0.1.0/src/extractforms/cli.py +150 -0
  15. extractforms-0.1.0/src/extractforms/dependencies.py +66 -0
  16. extractforms-0.1.0/src/extractforms/exceptions.py +83 -0
  17. extractforms-0.1.0/src/extractforms/extractor.py +646 -0
  18. extractforms-0.1.0/src/extractforms/logging.py +96 -0
  19. extractforms-0.1.0/src/extractforms/pdf_render.py +88 -0
  20. extractforms-0.1.0/src/extractforms/pricing.py +27 -0
  21. extractforms-0.1.0/src/extractforms/prompts.py +96 -0
  22. extractforms-0.1.0/src/extractforms/schema_store.py +138 -0
  23. extractforms-0.1.0/src/extractforms/settings.py +522 -0
  24. extractforms-0.1.0/src/extractforms/typing/__init__.py +32 -0
  25. extractforms-0.1.0/src/extractforms/typing/enums.py +65 -0
  26. extractforms-0.1.0/src/extractforms/typing/models.py +205 -0
  27. extractforms-0.1.0/src/extractforms/typing/protocol.py +59 -0
@@ -0,0 +1,277 @@
1
+ # File created using '.gitignore Generator' for Visual Studio Code: https://bit.ly/vscode-gig
2
+ # Created by https://www.toptal.com/developers/gitignore/api/visualstudiocode,macos,dotenv,linux,python,windows
3
+ # Edit at https://www.toptal.com/developers/gitignore?templates=visualstudiocode,macos,dotenv,linux,python,windows
4
+
5
+ ### dotenv ###
6
+ .env
7
+
8
+ ### Linux ###
9
+ *~
10
+
11
+ # temporary files which can be created if a process still has a handle open of a deleted file
12
+ .fuse_hidden*
13
+
14
+ # KDE directory preferences
15
+ .directory
16
+
17
+ # Linux trash folder which might appear on any partition or disk
18
+ .Trash-*
19
+
20
+ # .nfs files are created when an open file is removed but is still being accessed
21
+ .nfs*
22
+
23
+ ### macOS ###
24
+ # General
25
+ .DS_Store
26
+ .AppleDouble
27
+ .LSOverride
28
+
29
+ # Icon must end with two \r
30
+ Icon
31
+
32
+
33
+ # Thumbnails
34
+ ._*
35
+
36
+ # Files that might appear in the root of a volume
37
+ .DocumentRevisions-V100
38
+ .fseventsd
39
+ .Spotlight-V100
40
+ .TemporaryItems
41
+ .Trashes
42
+ .VolumeIcon.icns
43
+ .com.apple.timemachine.donotpresent
44
+
45
+ # Directories potentially created on remote AFP share
46
+ .AppleDB
47
+ .AppleDesktop
48
+ Network Trash Folder
49
+ Temporary Items
50
+ .apdisk
51
+
52
+ ### macOS Patch ###
53
+ # iCloud generated files
54
+ *.icloud
55
+
56
+ ### Python ###
57
+ # Byte-compiled / optimized / DLL files
58
+ __pycache__/
59
+ *.py[cod]
60
+ *$py.class
61
+
62
+ # C extensions
63
+ *.so
64
+
65
+ # Distribution / packaging
66
+ .Python
67
+ build/
68
+ develop-eggs/
69
+ dist/
70
+ downloads/
71
+ eggs/
72
+ .eggs/
73
+ lib/
74
+ lib64/
75
+ parts/
76
+ sdist/
77
+ var/
78
+ wheels/
79
+ share/python-wheels/
80
+ *.egg-info/
81
+ .installed.cfg
82
+ *.egg
83
+ MANIFEST
84
+
85
+ # PyInstaller
86
+ # Usually these files are written by a python script from a template
87
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
88
+ *.manifest
89
+ *.spec
90
+
91
+ # Installer logs
92
+ pip-log.txt
93
+ pip-delete-this-directory.txt
94
+
95
+ # Unit test / coverage reports
96
+ htmlcov/
97
+ .tox/
98
+ .nox/
99
+ .coverage
100
+ .coverage.*
101
+ .cache
102
+ nosetests.xml
103
+ coverage.xml
104
+ *.cover
105
+ *.py,cover
106
+ .hypothesis/
107
+ .pytest_cache/
108
+ cover/
109
+
110
+ # Translations
111
+ *.mo
112
+ *.pot
113
+
114
+ # Django stuff:
115
+ *.log
116
+ local_settings.py
117
+ db.sqlite3
118
+ db.sqlite3-journal
119
+
120
+ # Flask stuff:
121
+ instance/
122
+ .webassets-cache
123
+
124
+ # Scrapy stuff:
125
+ .scrapy
126
+
127
+ # Sphinx documentation
128
+ docs/_build/
129
+
130
+ # PyBuilder
131
+ .pybuilder/
132
+ target/
133
+
134
+ # Jupyter Notebook
135
+ .ipynb_checkpoints
136
+
137
+ # IPython
138
+ profile_default/
139
+ ipython_config.py
140
+
141
+ # pyenv
142
+ # For a library or package, you might want to ignore these files since the code is
143
+ # intended to run in multiple environments; otherwise, check them in:
144
+ # .python-version
145
+
146
+ # pipenv
147
+ # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
148
+ # However, in case of collaboration, if having platform-specific dependencies or dependencies
149
+ # having no cross-platform support, pipenv may install dependencies that don't work, or not
150
+ # install all needed dependencies.
151
+ #Pipfile.lock
152
+
153
+ # poetry
154
+ # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
155
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
156
+ # commonly ignored for libraries.
157
+ # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
158
+ #poetry.lock
159
+
160
+ # pdm
161
+ # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
162
+ #pdm.lock
163
+ # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
164
+ # in version control.
165
+ # https://pdm.fming.dev/#use-with-ide
166
+ .pdm.toml
167
+
168
+ # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
169
+ __pypackages__/
170
+
171
+ # Celery stuff
172
+ celerybeat-schedule
173
+ celerybeat.pid
174
+
175
+ # SageMath parsed files
176
+ *.sage.py
177
+
178
+ # Environments
179
+ .venv
180
+ env/
181
+ venv/
182
+ ENV/
183
+ env.bak/
184
+ venv.bak/
185
+
186
+ # Spyder project settings
187
+ .spyderproject
188
+ .spyproject
189
+
190
+ # Rope project settings
191
+ .ropeproject
192
+
193
+ # mkdocs documentation
194
+ /site
195
+
196
+ # mypy
197
+ .mypy_cache/
198
+ .dmypy.json
199
+ dmypy.json
200
+
201
+ # Pyre type checker
202
+ .pyre/
203
+
204
+ # pytype static type analyzer
205
+ .pytype/
206
+
207
+ # Cython debug symbols
208
+ cython_debug/
209
+
210
+ # PyCharm
211
+ # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
212
+ # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
213
+ # and can be added to the global gitignore or merged into this file. For a more nuclear
214
+ # option (not recommended) you can uncomment the following to ignore the entire idea folder.
215
+ #.idea/
216
+
217
+ ### Python Patch ###
218
+ # Poetry local configuration file - https://python-poetry.org/docs/configuration/#local-configuration
219
+ poetry.toml
220
+
221
+ # ruff
222
+ .ruff_cache/
223
+
224
+ # LSP config files
225
+ pyrightconfig.json
226
+
227
+ ### VisualStudioCode ###
228
+ .vscode/*
229
+ !.vscode/settings.json
230
+ !.vscode/tasks.json
231
+ !.vscode/launch.json
232
+ !.vscode/extensions.json
233
+ !.vscode/*.code-snippets
234
+
235
+ # Local History for Visual Studio Code
236
+ .history/
237
+
238
+ # Built Visual Studio Code Extensions
239
+ *.vsix
240
+
241
+ ### VisualStudioCode Patch ###
242
+ # Ignore all local history of files
243
+ .history
244
+ .ionide
245
+
246
+ ### Windows ###
247
+ # Windows thumbnail cache files
248
+ Thumbs.db
249
+ Thumbs.db:encryptable
250
+ ehthumbs.db
251
+ ehthumbs_vista.db
252
+
253
+ # Dump file
254
+ *.stackdump
255
+
256
+ # Folder config file
257
+ [Dd]esktop.ini
258
+
259
+ # Recycle Bin used on file shares
260
+ $RECYCLE.BIN/
261
+
262
+ # Windows Installer files
263
+ *.cab
264
+ *.msi
265
+ *.msix
266
+ *.msm
267
+ *.msp
268
+
269
+ # Windows shortcuts
270
+ *.lnk
271
+
272
+ # End of https://www.toptal.com/developers/gitignore/api/visualstudiocode,macos,dotenv,linux,python,windows
273
+
274
+ # Custom rules (everything added below won't be overriden by 'Generate .gitignore File' if you use 'Update' option)
275
+
276
+ data/*
277
+ results/*
@@ -0,0 +1,22 @@
1
+
2
+ MIT License
3
+
4
+ Copyright (c) Guillaume Lombardo
5
+
6
+ Permission is hereby granted, free of charge, to any person obtaining a copy
7
+ of this software and associated documentation files (the "Software"), to deal
8
+ in the Software without restriction, including without limitation the rights
9
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10
+ copies of the Software, and to permit persons to whom the Software is
11
+ furnished to do so, subject to the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be included in all
14
+ copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22
+ SOFTWARE.
@@ -0,0 +1,78 @@
1
+ Metadata-Version: 2.4
2
+ Name: extractforms
3
+ Version: 0.1.0
4
+ Summary: A python project to turn scanned forms into a list of key-value pairs.
5
+ Project-URL: Homepage, https://github.com/Guillaume-Lombardo/extractforms
6
+ Project-URL: Repository, https://github.com/Guillaume-Lombardo/extractforms
7
+ Project-URL: Issues, https://github.com/Guillaume-Lombardo/extractforms/issues
8
+ Author-email: Guillaume Lombardo <g1lom@later.day>
9
+ License: MIT
10
+ License-File: LICENSE
11
+ Keywords: automation,cli,package,python
12
+ Classifier: Development Status :: 3 - Alpha
13
+ Classifier: Environment :: Console
14
+ Classifier: Intended Audience :: Developers
15
+ Classifier: Programming Language :: Python :: 3
16
+ Classifier: Programming Language :: Python :: 3.13
17
+ Requires-Python: >=3.13
18
+ Requires-Dist: certifi>=2026.1.4
19
+ Requires-Dist: httpx>=0.28.1
20
+ Requires-Dist: openai>=2.1.0
21
+ Requires-Dist: pydantic-settings>=2.13.0
22
+ Requires-Dist: pydantic>=2.12.0
23
+ Requires-Dist: pymupdf>=1.26.5
24
+ Requires-Dist: python-dotenv>=1.1.0
25
+ Requires-Dist: structlog>=25.5.0
26
+ Description-Content-Type: text/markdown
27
+
28
+ # ExtractForms
29
+
30
+ `extractforms` is a Python package and CLI to extract key/value fields from PDF forms.
31
+
32
+ ## Quickstart
33
+
34
+ ```bash
35
+ uv sync --group dev
36
+ uv run pre-commit install
37
+ uv run ruff format .
38
+ uv run ruff check .
39
+ uv run ty check src tests
40
+ uv run pytest
41
+ uv run pre-commit run --all-files
42
+ ```
43
+
44
+ ## CLI
45
+
46
+ ```bash
47
+ extractforms extract --input form.pdf --output results/result.json --passes 2
48
+ ```
49
+
50
+ Supported options include:
51
+ - `--no-cache`
52
+ - `--dpi`, `--image-format`, `--page-start`, `--page-end`, `--max-pages`
53
+ - `--chunk-pages`
54
+ - `--extra-instructions`
55
+ - `--schema-id`, `--schema-path`, `--match-schema`
56
+
57
+ ## Environment
58
+
59
+ Copy `.env.template` to `.env` and configure:
60
+ - logging (`LOG_LEVEL`, `LOG_JSON`, `LOG_FILE`)
61
+ - enterprise network/TLS (`HTTP_PROXY`, `HTTPS_PROXY`, `ALL_PROXY`, `NO_PROXY`, `CERT_PATH`)
62
+ - model endpoint (`OPENAI_BASE_URL`, `OPENAI_API_KEY`, `OPENAI_MODEL`)
63
+
64
+ ## Project Layout
65
+
66
+ - `src/extractforms`: package code
67
+ - `tests/unit`: fast default tests
68
+ - `tests/integration`: component-level tests
69
+ - `tests/end2end`: user-facing behavior tests
70
+ - `skills`: AI helper skills for coding workflows
71
+
72
+ ## Release
73
+
74
+ 1. Bump `version` in `pyproject.toml`.
75
+ 2. Create and push a git tag: `vX.Y.Z`.
76
+ 3. GitHub Action publishes to PyPI.
77
+
78
+ For manual validation, use workflow dispatch with `publish_target=testpypi`.
@@ -0,0 +1,51 @@
1
+ # ExtractForms
2
+
3
+ `extractforms` is a Python package and CLI to extract key/value fields from PDF forms.
4
+
5
+ ## Quickstart
6
+
7
+ ```bash
8
+ uv sync --group dev
9
+ uv run pre-commit install
10
+ uv run ruff format .
11
+ uv run ruff check .
12
+ uv run ty check src tests
13
+ uv run pytest
14
+ uv run pre-commit run --all-files
15
+ ```
16
+
17
+ ## CLI
18
+
19
+ ```bash
20
+ extractforms extract --input form.pdf --output results/result.json --passes 2
21
+ ```
22
+
23
+ Supported options include:
24
+ - `--no-cache`
25
+ - `--dpi`, `--image-format`, `--page-start`, `--page-end`, `--max-pages`
26
+ - `--chunk-pages`
27
+ - `--extra-instructions`
28
+ - `--schema-id`, `--schema-path`, `--match-schema`
29
+
30
+ ## Environment
31
+
32
+ Copy `.env.template` to `.env` and configure:
33
+ - logging (`LOG_LEVEL`, `LOG_JSON`, `LOG_FILE`)
34
+ - enterprise network/TLS (`HTTP_PROXY`, `HTTPS_PROXY`, `ALL_PROXY`, `NO_PROXY`, `CERT_PATH`)
35
+ - model endpoint (`OPENAI_BASE_URL`, `OPENAI_API_KEY`, `OPENAI_MODEL`)
36
+
37
+ ## Project Layout
38
+
39
+ - `src/extractforms`: package code
40
+ - `tests/unit`: fast default tests
41
+ - `tests/integration`: component-level tests
42
+ - `tests/end2end`: user-facing behavior tests
43
+ - `skills`: AI helper skills for coding workflows
44
+
45
+ ## Release
46
+
47
+ 1. Bump `version` in `pyproject.toml`.
48
+ 2. Create and push a git tag: `vX.Y.Z`.
49
+ 3. GitHub Action publishes to PyPI.
50
+
51
+ For manual validation, use workflow dispatch with `publish_target=testpypi`.
@@ -0,0 +1,17 @@
1
+ # Architecture Decision Records (ADR)
2
+
3
+ ## Why ADRs
4
+
5
+ - Track architectural decisions.
6
+ - Preserve context and rejected alternatives.
7
+ - Improve onboarding and future reviews.
8
+
9
+ ## Naming convention
10
+
11
+ - `NNNN-short-title.md`
12
+ - Example: `0001-http-client-choice.md`
13
+
14
+ ## Process
15
+
16
+ - Create an ADR for any long-term architectural decision.
17
+ - Link the ADR in the corresponding PR.
@@ -0,0 +1,7 @@
1
+ # Engineering Docs
2
+
3
+ - [Definition of Done](./DEFINITION_OF_DONE.md)
4
+ - [Review Runbook](./REVIEW_RUNBOOK.md)
5
+ - [ADR](../adr/README.md)
6
+ - [Review Guide](../../.github/review/REVIEW_GUIDE.md)
7
+ - [Repo Coherence Playbook](../../.github/review/REPO_COHERENCE_PLAYBOOK.md)
@@ -0,0 +1,90 @@
1
+ [project]
2
+ name = "extractforms"
3
+ version = "0.1.0"
4
+ description = "A python project to turn scanned forms into a list of key-value pairs."
5
+ readme = "README.md"
6
+ requires-python = ">=3.13"
7
+ authors = [
8
+ { name = "Guillaume Lombardo", email = "g1lom@later.day" },
9
+ ]
10
+ license = { text = "MIT" }
11
+ keywords = ["automation", "cli", "package", "python"]
12
+ classifiers = [
13
+ "Development Status :: 3 - Alpha",
14
+ "Environment :: Console",
15
+ "Intended Audience :: Developers",
16
+ "Programming Language :: Python :: 3",
17
+ "Programming Language :: Python :: 3.13",
18
+ ]
19
+ dependencies = [
20
+ "certifi>=2026.1.4",
21
+ "httpx>=0.28.1",
22
+ "openai>=2.1.0",
23
+ "pydantic-settings>=2.13.0",
24
+ "pydantic>=2.12.0",
25
+ "pymupdf>=1.26.5",
26
+ "python-dotenv>=1.1.0",
27
+ "structlog>=25.5.0",
28
+ ]
29
+
30
+ [dependency-groups]
31
+ dev = [
32
+ "build>=1.3.0",
33
+ "detect-secrets>=1.5.0",
34
+ "pre-commit>=4.5.1",
35
+ "pytest-cov>=7.0.0",
36
+ "pytest-mock>=3.15.1",
37
+ "pytest>=9.0.2",
38
+ "ruff>=0.15.0",
39
+ "twine>=6.2.0",
40
+ "ty>=0.0.15",
41
+ ]
42
+
43
+ [project.urls]
44
+ Homepage = "https://github.com/Guillaume-Lombardo/extractforms"
45
+ Repository = "https://github.com/Guillaume-Lombardo/extractforms"
46
+ Issues = "https://github.com/Guillaume-Lombardo/extractforms/issues"
47
+
48
+ [project.scripts]
49
+ extractforms = "extractforms.cli:main"
50
+
51
+ [build-system]
52
+ requires = ["hatchling>=1.27.0"]
53
+ build-backend = "hatchling.build"
54
+
55
+ [tool.hatch.build.targets.wheel]
56
+ packages = ["src/extractforms"]
57
+
58
+ [tool.hatch.build.targets.sdist]
59
+ include = [
60
+ "LICENSE",
61
+ "README.md",
62
+ "pyproject.toml",
63
+ "src/extractforms",
64
+ ]
65
+
66
+ [tool.pytest.ini_options]
67
+ minversion = "9.0"
68
+ testpaths = ["tests"]
69
+ addopts = "-ra -q -m unit --strict-markers --cov=src --cov-report=term-missing:skip-covered --cov-report=html"
70
+ pythonpath = ["src"]
71
+ markers = [
72
+ "end2end: mark a test as an end-to-end test.",
73
+ "integration: mark a test as an integration test.",
74
+ "unit: mark a test as a unit test.",
75
+ ]
76
+
77
+ [tool.coverage.run]
78
+ branch = true
79
+ source = ["src/extractforms"]
80
+ omit = ["*/tests/*"]
81
+
82
+ [tool.coverage.report]
83
+ show_missing = true
84
+ skip_covered = true
85
+ precision = 2
86
+ fail_under = 80
87
+
88
+ [tool.coverage.html]
89
+ directory = "htmlcov"
90
+ title = "ExtractForms coverage"
@@ -0,0 +1,32 @@
1
+ """ExtractForms package."""
2
+
3
+ from extractforms._bootstrap import logger
4
+ from extractforms.async_runner import run_async
5
+ from extractforms.exceptions import (
6
+ AsyncExecutionError,
7
+ BackendError,
8
+ DependencyError,
9
+ ExtractionError,
10
+ PackageError,
11
+ SettingsError,
12
+ )
13
+ from extractforms.logging import configure_logging, get_logger
14
+ from extractforms.settings import Settings, get_settings
15
+
16
+ __version__ = "0.1.0"
17
+
18
+ __all__ = [
19
+ "AsyncExecutionError",
20
+ "BackendError",
21
+ "DependencyError",
22
+ "ExtractionError",
23
+ "PackageError",
24
+ "Settings",
25
+ "SettingsError",
26
+ "__version__",
27
+ "configure_logging",
28
+ "get_logger",
29
+ "get_settings",
30
+ "logger",
31
+ "run_async",
32
+ ]
@@ -0,0 +1,10 @@
1
+ """Package bootstrap helpers."""
2
+
3
+ from extractforms.dependencies import ensure_package_dependencies
4
+ from extractforms.logging import get_logger
5
+
6
+ ensure_package_dependencies()
7
+
8
+ logger = get_logger("extractforms")
9
+
10
+ __all__ = ["logger"]
@@ -0,0 +1,64 @@
1
+ """Helpers to run async operations from sync or async contexts."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import asyncio
6
+ import threading
7
+ from queue import Queue
8
+ from typing import TYPE_CHECKING, Any
9
+
10
+ from extractforms.exceptions import AsyncExecutionError
11
+
12
+ if TYPE_CHECKING:
13
+ from collections.abc import Coroutine
14
+
15
+
16
+ def _run_in_background_thread[T](coro: Coroutine[Any, Any, T]) -> T:
17
+ """Run a coroutine in a dedicated thread with its own event loop.
18
+
19
+ Args:
20
+ coro (Coroutine[Any, Any, T]): The coroutine to run.
21
+
22
+ Raises:
23
+ AsyncExecutionError: If the coroutine raises an exception.
24
+
25
+ Returns:
26
+ T: The result of the coroutine.
27
+ """
28
+ output: Queue[T | BaseException] = Queue(maxsize=1)
29
+
30
+ def _runner() -> None:
31
+ try:
32
+ output.put(asyncio.run(coro))
33
+ except BaseException as exc:
34
+ output.put(exc)
35
+
36
+ thread = threading.Thread(target=_runner, daemon=True)
37
+ thread.start()
38
+ thread.join()
39
+
40
+ result = output.get()
41
+ if isinstance(result, BaseException):
42
+ raise AsyncExecutionError(result=result) from result
43
+ return result
44
+
45
+
46
+ def run_async[T](coro: Coroutine[Any, Any, T]) -> T:
47
+ """Run an async coroutine from both sync and async contexts.
48
+
49
+ If called from a sync context, the coroutine will be run in a dedicated thread
50
+ with its own event loop. If called from an async context, the coroutine will
51
+ be awaited directly.
52
+
53
+ Args:
54
+ coro (Coroutine[Any, Any, T]): The coroutine to run.
55
+
56
+ Returns:
57
+ T: The result of the coroutine.
58
+ """
59
+ try:
60
+ asyncio.get_running_loop()
61
+ except RuntimeError:
62
+ return asyncio.run(coro)
63
+
64
+ return _run_in_background_thread(coro)
@@ -0,0 +1,7 @@
1
+ """Extraction backends."""
2
+
3
+ from extractforms.backends.multimodal_openai import MultimodalLLMBackend
4
+ from extractforms.backends.ocr_document_intelligence import OCRBackend
5
+ from extractforms.typing.protocol import ExtractorBackend, PageSource
6
+
7
+ __all__ = ["ExtractorBackend", "MultimodalLLMBackend", "OCRBackend", "PageSource"]