llm-batch-annotate 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (68) hide show
  1. llm_batch_annotate-0.1.0/LICENSE +21 -0
  2. llm_batch_annotate-0.1.0/PKG-INFO +97 -0
  3. llm_batch_annotate-0.1.0/README.md +68 -0
  4. llm_batch_annotate-0.1.0/pyproject.toml +65 -0
  5. llm_batch_annotate-0.1.0/setup.cfg +4 -0
  6. llm_batch_annotate-0.1.0/src/llm_batch_annotate/__init__.py +193 -0
  7. llm_batch_annotate-0.1.0/src/llm_batch_annotate/_model.py +12 -0
  8. llm_batch_annotate-0.1.0/src/llm_batch_annotate/_version.py +3 -0
  9. llm_batch_annotate-0.1.0/src/llm_batch_annotate/artifacts/__init__.py +23 -0
  10. llm_batch_annotate-0.1.0/src/llm_batch_annotate/artifacts/local.py +86 -0
  11. llm_batch_annotate-0.1.0/src/llm_batch_annotate/artifacts/naming.py +103 -0
  12. llm_batch_annotate-0.1.0/src/llm_batch_annotate/builders/__init__.py +19 -0
  13. llm_batch_annotate-0.1.0/src/llm_batch_annotate/builders/assets.py +48 -0
  14. llm_batch_annotate-0.1.0/src/llm_batch_annotate/builders/base.py +154 -0
  15. llm_batch_annotate-0.1.0/src/llm_batch_annotate/builders/programmatic.py +41 -0
  16. llm_batch_annotate-0.1.0/src/llm_batch_annotate/builders/template.py +70 -0
  17. llm_batch_annotate-0.1.0/src/llm_batch_annotate/cli/__init__.py +31 -0
  18. llm_batch_annotate-0.1.0/src/llm_batch_annotate/cli/__main__.py +5 -0
  19. llm_batch_annotate-0.1.0/src/llm_batch_annotate/cli/main.py +483 -0
  20. llm_batch_annotate-0.1.0/src/llm_batch_annotate/configs/__init__.py +33 -0
  21. llm_batch_annotate-0.1.0/src/llm_batch_annotate/configs/models.py +158 -0
  22. llm_batch_annotate-0.1.0/src/llm_batch_annotate/contracts/__init__.py +39 -0
  23. llm_batch_annotate-0.1.0/src/llm_batch_annotate/contracts/base.py +263 -0
  24. llm_batch_annotate-0.1.0/src/llm_batch_annotate/contracts/records.py +167 -0
  25. llm_batch_annotate-0.1.0/src/llm_batch_annotate/enums.py +75 -0
  26. llm_batch_annotate-0.1.0/src/llm_batch_annotate/execution/__init__.py +22 -0
  27. llm_batch_annotate-0.1.0/src/llm_batch_annotate/execution/base.py +256 -0
  28. llm_batch_annotate-0.1.0/src/llm_batch_annotate/execution/providers/__init__.py +6 -0
  29. llm_batch_annotate-0.1.0/src/llm_batch_annotate/execution/providers/openai_batch.py +697 -0
  30. llm_batch_annotate-0.1.0/src/llm_batch_annotate/grouping/__init__.py +7 -0
  31. llm_batch_annotate-0.1.0/src/llm_batch_annotate/grouping/fixed_size.py +65 -0
  32. llm_batch_annotate-0.1.0/src/llm_batch_annotate/manifests/__init__.py +19 -0
  33. llm_batch_annotate-0.1.0/src/llm_batch_annotate/manifests/models.py +108 -0
  34. llm_batch_annotate-0.1.0/src/llm_batch_annotate/orchestration/__init__.py +10 -0
  35. llm_batch_annotate-0.1.0/src/llm_batch_annotate/orchestration/offline.py +139 -0
  36. llm_batch_annotate-0.1.0/src/llm_batch_annotate/orchestration/run.py +1042 -0
  37. llm_batch_annotate-0.1.0/src/llm_batch_annotate/parsers/__init__.py +7 -0
  38. llm_batch_annotate-0.1.0/src/llm_batch_annotate/parsers/base.py +318 -0
  39. llm_batch_annotate-0.1.0/src/llm_batch_annotate/parsers/structured.py +67 -0
  40. llm_batch_annotate-0.1.0/src/llm_batch_annotate/tasks/__init__.py +7 -0
  41. llm_batch_annotate-0.1.0/src/llm_batch_annotate/tasks/base.py +326 -0
  42. llm_batch_annotate-0.1.0/src/llm_batch_annotate/units/__init__.py +7 -0
  43. llm_batch_annotate-0.1.0/src/llm_batch_annotate/units/materialization.py +108 -0
  44. llm_batch_annotate-0.1.0/src/llm_batch_annotate/validation/__init__.py +8 -0
  45. llm_batch_annotate-0.1.0/src/llm_batch_annotate/validation/coverage.py +115 -0
  46. llm_batch_annotate-0.1.0/src/llm_batch_annotate.egg-info/PKG-INFO +97 -0
  47. llm_batch_annotate-0.1.0/src/llm_batch_annotate.egg-info/SOURCES.txt +66 -0
  48. llm_batch_annotate-0.1.0/src/llm_batch_annotate.egg-info/dependency_links.txt +1 -0
  49. llm_batch_annotate-0.1.0/src/llm_batch_annotate.egg-info/entry_points.txt +2 -0
  50. llm_batch_annotate-0.1.0/src/llm_batch_annotate.egg-info/requires.txt +9 -0
  51. llm_batch_annotate-0.1.0/src/llm_batch_annotate.egg-info/top_level.txt +1 -0
  52. llm_batch_annotate-0.1.0/tests/test_artifact_naming.py +48 -0
  53. llm_batch_annotate-0.1.0/tests/test_artifact_store.py +87 -0
  54. llm_batch_annotate-0.1.0/tests/test_builders.py +184 -0
  55. llm_batch_annotate-0.1.0/tests/test_cli.py +205 -0
  56. llm_batch_annotate-0.1.0/tests/test_configs.py +81 -0
  57. llm_batch_annotate-0.1.0/tests/test_contracts.py +254 -0
  58. llm_batch_annotate-0.1.0/tests/test_coverage_validation.py +60 -0
  59. llm_batch_annotate-0.1.0/tests/test_enums.py +18 -0
  60. llm_batch_annotate-0.1.0/tests/test_execution.py +197 -0
  61. llm_batch_annotate-0.1.0/tests/test_grouping.py +55 -0
  62. llm_batch_annotate-0.1.0/tests/test_imports.py +26 -0
  63. llm_batch_annotate-0.1.0/tests/test_manifests.py +104 -0
  64. llm_batch_annotate-0.1.0/tests/test_openai_batch_provider.py +396 -0
  65. llm_batch_annotate-0.1.0/tests/test_orchestration.py +314 -0
  66. llm_batch_annotate-0.1.0/tests/test_parsers.py +152 -0
  67. llm_batch_annotate-0.1.0/tests/test_tasks.py +142 -0
  68. llm_batch_annotate-0.1.0/tests/test_units.py +54 -0
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Felipe Paula
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,97 @@
1
+ Metadata-Version: 2.4
2
+ Name: llm-batch-annotate
3
+ Version: 0.1.0
4
+ Summary: Batch-oriented LLM annotation workflows for tabular datasets with OpenAI Batch support.
5
+ Author: Felipe Paula
6
+ License-Expression: MIT
7
+ Project-URL: Homepage, https://github.com/felipepaula/batch_api_annotate
8
+ Project-URL: Documentation, https://llm-batch-annotate.readthedocs.io/
9
+ Project-URL: Repository, https://github.com/felipepaula/batch_api_annotate
10
+ Project-URL: Issues, https://github.com/felipepaula/batch_api_annotate/issues
11
+ Keywords: annotation,batch,llm,openai,pydantic,tabular-data
12
+ Classifier: Development Status :: 3 - Alpha
13
+ Classifier: Intended Audience :: Developers
14
+ Classifier: Programming Language :: Python :: 3
15
+ Classifier: Programming Language :: Python :: 3.12
16
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
17
+ Classifier: Topic :: Software Development :: Libraries :: Python Modules
18
+ Requires-Python: >=3.12
19
+ Description-Content-Type: text/markdown
20
+ License-File: LICENSE
21
+ Requires-Dist: pydantic<3,>=2
22
+ Provides-Extra: test
23
+ Requires-Dist: pytest<9,>=8; extra == "test"
24
+ Provides-Extra: docs
25
+ Requires-Dist: furo>=2024.8.6; extra == "docs"
26
+ Requires-Dist: myst-parser<5,>=4; extra == "docs"
27
+ Requires-Dist: sphinx<9,>=8; extra == "docs"
28
+ Dynamic: license-file
29
+
30
+ # `llm-batch-annotate`
31
+
32
+ `llm-batch-annotate` is a Python package for running reproducible LLM annotation workflows over tabular datasets. It materializes units from source rows, groups them into provider requests, submits them through an execution adapter, parses structured outputs, validates coverage, and writes run artifacts for auditability.
33
+
34
+ ## Highlights
35
+
36
+ - single-unit and grouped annotation workflows
37
+ - provider-agnostic task, builder, parser, and artifact abstractions
38
+ - concrete OpenAI Batch adapter
39
+ - resumable CLI-driven runs with persisted manifests
40
+ - example configs, prompts, schemas, and sample data under `examples/`
41
+
42
+ ## Installation
43
+
44
+ When the package is published:
45
+
46
+ ```bash
47
+ pip install llm-batch-annotate
48
+ ```
49
+
50
+ From a local checkout:
51
+
52
+ ```bash
53
+ python3 -m venv .venv
54
+ .venv/bin/pip install -e .[test,docs]
55
+ ```
56
+
57
+ ## Quickstart
58
+
59
+ Single-unit example:
60
+
61
+ ```bash
62
+ export OPEN_AI_KEY="your-key"
63
+ llm-batch-annotate run examples/config/run_config.json --run-id example-single --no-poll-until-terminal
64
+ llm-batch-annotate resume examples/config/run_config.json example-single --poll-interval 2m
65
+ ```
66
+
67
+ Grouped example:
68
+
69
+ ```bash
70
+ export OPEN_AI_KEY="your-key"
71
+ llm-batch-annotate run examples/config/run_config_2.json --run-id example-grouped --no-poll-until-terminal
72
+ llm-batch-annotate resume examples/config/run_config_2.json example-grouped --poll-interval 2m
73
+ ```
74
+
75
+ ## Documentation
76
+
77
+ Project documentation is intended to be hosted on Read the Docs. The Sphinx source lives under `docs/`.
78
+
79
+ Planned public docs include:
80
+
81
+ - installation
82
+ - quickstart
83
+ - CLI reference
84
+ - config reference
85
+ - OpenAI Batch provider guide
86
+ - worked examples
87
+ - API reference
88
+ - development and release notes
89
+
90
+ ## Repository layout
91
+
92
+ - `src/llm_batch_annotate/`: package source
93
+ - `examples/`: tracked example inputs and configs
94
+ - `tests/`: pytest suite
95
+ - `docs/`: Sphinx documentation source
96
+
97
+ Generated example runs are written to `examples/runs/` and are intentionally excluded from version control.
@@ -0,0 +1,68 @@
1
+ # `llm-batch-annotate`
2
+
3
+ `llm-batch-annotate` is a Python package for running reproducible LLM annotation workflows over tabular datasets. It materializes units from source rows, groups them into provider requests, submits them through an execution adapter, parses structured outputs, validates coverage, and writes run artifacts for auditability.
4
+
5
+ ## Highlights
6
+
7
+ - single-unit and grouped annotation workflows
8
+ - provider-agnostic task, builder, parser, and artifact abstractions
9
+ - concrete OpenAI Batch adapter
10
+ - resumable CLI-driven runs with persisted manifests
11
+ - example configs, prompts, schemas, and sample data under `examples/`
12
+
13
+ ## Installation
14
+
15
+ When the package is published:
16
+
17
+ ```bash
18
+ pip install llm-batch-annotate
19
+ ```
20
+
21
+ From a local checkout:
22
+
23
+ ```bash
24
+ python3 -m venv .venv
25
+ .venv/bin/pip install -e .[test,docs]
26
+ ```
27
+
28
+ ## Quickstart
29
+
30
+ Single-unit example:
31
+
32
+ ```bash
33
+ export OPEN_AI_KEY="your-key"
34
+ llm-batch-annotate run examples/config/run_config.json --run-id example-single --no-poll-until-terminal
35
+ llm-batch-annotate resume examples/config/run_config.json example-single --poll-interval 2m
36
+ ```
37
+
38
+ Grouped example:
39
+
40
+ ```bash
41
+ export OPEN_AI_KEY="your-key"
42
+ llm-batch-annotate run examples/config/run_config_2.json --run-id example-grouped --no-poll-until-terminal
43
+ llm-batch-annotate resume examples/config/run_config_2.json example-grouped --poll-interval 2m
44
+ ```
45
+
46
+ ## Documentation
47
+
48
+ Project documentation is intended to be hosted on Read the Docs. The Sphinx source lives under `docs/`.
49
+
50
+ Planned public docs include:
51
+
52
+ - installation
53
+ - quickstart
54
+ - CLI reference
55
+ - config reference
56
+ - OpenAI Batch provider guide
57
+ - worked examples
58
+ - API reference
59
+ - development and release notes
60
+
61
+ ## Repository layout
62
+
63
+ - `src/llm_batch_annotate/`: package source
64
+ - `examples/`: tracked example inputs and configs
65
+ - `tests/`: pytest suite
66
+ - `docs/`: Sphinx documentation source
67
+
68
+ Generated example runs are written to `examples/runs/` and are intentionally excluded from version control.
@@ -0,0 +1,65 @@
1
+ [build-system]
2
+ requires = ["setuptools>=69", "wheel"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "llm-batch-annotate"
7
+ dynamic = ["version"]
8
+ description = "Batch-oriented LLM annotation workflows for tabular datasets with OpenAI Batch support."
9
+ readme = "README.md"
10
+ requires-python = ">=3.12"
11
+ license = "MIT"
12
+ license-files = ["LICENSE"]
13
+ authors = [
14
+ {name = "Felipe Paula"},
15
+ ]
16
+ keywords = [
17
+ "annotation",
18
+ "batch",
19
+ "llm",
20
+ "openai",
21
+ "pydantic",
22
+ "tabular-data",
23
+ ]
24
+ classifiers = [
25
+ "Development Status :: 3 - Alpha",
26
+ "Intended Audience :: Developers",
27
+ "Programming Language :: Python :: 3",
28
+ "Programming Language :: Python :: 3.12",
29
+ "Topic :: Scientific/Engineering :: Artificial Intelligence",
30
+ "Topic :: Software Development :: Libraries :: Python Modules",
31
+ ]
32
+ dependencies = [
33
+ "pydantic>=2,<3",
34
+ ]
35
+
36
+ [project.urls]
37
+ Homepage = "https://github.com/felipepaula/batch_api_annotate"
38
+ Documentation = "https://llm-batch-annotate.readthedocs.io/"
39
+ Repository = "https://github.com/felipepaula/batch_api_annotate"
40
+ Issues = "https://github.com/felipepaula/batch_api_annotate/issues"
41
+
42
+ [project.optional-dependencies]
43
+ test = [
44
+ "pytest>=8,<9",
45
+ ]
46
+ docs = [
47
+ "furo>=2024.8.6",
48
+ "myst-parser>=4,<5",
49
+ "sphinx>=8,<9",
50
+ ]
51
+
52
+ [project.scripts]
53
+ llm-batch-annotate = "llm_batch_annotate.cli:main"
54
+
55
+ [tool.setuptools]
56
+ package-dir = {"" = "src"}
57
+
58
+ [tool.setuptools.dynamic]
59
+ version = {attr = "llm_batch_annotate._version.__version__"}
60
+
61
+ [tool.setuptools.packages.find]
62
+ where = ["src"]
63
+
64
+ [tool.pytest.ini_options]
65
+ testpaths = ["tests"]
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
@@ -0,0 +1,193 @@
1
+ """Public package exports for llm_batch_annotate."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from ._version import __version__
6
+ from .artifacts.naming import (
7
+ ARTIFACT_REGISTRY,
8
+ ArtifactDefinition,
9
+ artifact_path,
10
+ artifact_ref,
11
+ artifact_refs_for_run,
12
+ artifact_relative_path,
13
+ get_artifact_definition,
14
+ run_directory,
15
+ )
16
+ from .artifacts.local import LocalArtifactStore
17
+ from .builders import (
18
+ BaseBuilder,
19
+ ProgrammaticBuilderBase,
20
+ PromptAssetBundle,
21
+ SimpleTemplateBuilder,
22
+ load_prompt_asset_text,
23
+ load_prompt_assets,
24
+ resolve_prompt_asset_path,
25
+ )
26
+ from .configs.models import (
27
+ ArtifactStoreConfig,
28
+ ArtifactStoreSelectionConfig,
29
+ BaseProviderConfig,
30
+ GenericProviderConfig,
31
+ GroupingConfig,
32
+ OpenAIBatchConfig,
33
+ OutputConfig,
34
+ PromptAssetsConfig,
35
+ ProviderConfig,
36
+ ProviderSelectionConfig,
37
+ RetryPolicyConfig,
38
+ RunConfig,
39
+ RunMetadataConfig,
40
+ SourceInputConfig,
41
+ )
42
+ from .contracts.base import ArtifactStore, BaseMessageBuilder, BaseParser, BaseTask, ExecutionProvider
43
+ from .contracts.records import (
44
+ AnnotationRecord,
45
+ ArtifactRef,
46
+ ComponentRef,
47
+ ExecutionHandle,
48
+ FailureRecord,
49
+ GroupRecord,
50
+ GroupMembershipRecord,
51
+ ParsedRequestRecord,
52
+ ProviderCapabilities,
53
+ RawErrorRecord,
54
+ RawOutputRecord,
55
+ RawResultRecord,
56
+ RequestRecord,
57
+ UnitRecord,
58
+ )
59
+ from .execution import (
60
+ ExecutionProviderBase,
61
+ OpenAIBatchProvider,
62
+ OpenAIBatchProviderError,
63
+ SUCCESSFUL_EXECUTION_STATUSES,
64
+ TERMINAL_EXECUTION_STATUSES,
65
+ is_successful_execution_status,
66
+ is_terminal_execution_status,
67
+ normalize_execution_status,
68
+ )
69
+ from .enums import (
70
+ ArtifactFormat,
71
+ ArtifactKind,
72
+ ArtifactStoreKind,
73
+ ExecutionStatus,
74
+ FailureKind,
75
+ GroupingStrategy,
76
+ ProviderKind,
77
+ RunStatus,
78
+ SourceFormat,
79
+ TaskKind,
80
+ )
81
+ from .manifests.models import (
82
+ ComponentIdentitySummary,
83
+ GroupingSummary,
84
+ InputSummary,
85
+ LineageSummary,
86
+ ParseSummary,
87
+ RunManifest,
88
+ ValidationSummary,
89
+ )
90
+ from .orchestration import OfflineTaskPipeline, OfflineTaskPipelineResult, TaskOrchestrator, TaskRunState, default_run_id
91
+ from .parsers import BaseOutputParser, StructuredOutputParser
92
+ from .tasks import ComposedTaskBase, GroupedTaskBase, SingleTaskBase
93
+ from .grouping.fixed_size import build_group_memberships, membership_map, plan_fixed_size_groups
94
+ from .units.materialization import derive_unit_id, materialize_units, validate_unique_unit_ids
95
+ from .validation.coverage import CoverageValidationResult, coverage_failures, validate_coverage, validate_group_coverage
96
+
97
+ __all__ = [
98
+ "ARTIFACT_REGISTRY",
99
+ "AnnotationRecord",
100
+ "ArtifactDefinition",
101
+ "ArtifactFormat",
102
+ "ArtifactKind",
103
+ "ArtifactRef",
104
+ "ArtifactStore",
105
+ "LocalArtifactStore",
106
+ "ArtifactStoreConfig",
107
+ "ArtifactStoreKind",
108
+ "ArtifactStoreSelectionConfig",
109
+ "BaseBuilder",
110
+ "BaseMessageBuilder",
111
+ "BaseOutputParser",
112
+ "BaseParser",
113
+ "BaseProviderConfig",
114
+ "BaseTask",
115
+ "ComposedTaskBase",
116
+ "ComponentIdentitySummary",
117
+ "ComponentRef",
118
+ "ExecutionHandle",
119
+ "ExecutionProvider",
120
+ "ExecutionProviderBase",
121
+ "ExecutionStatus",
122
+ "FailureKind",
123
+ "FailureRecord",
124
+ "GenericProviderConfig",
125
+ "GroupRecord",
126
+ "GroupMembershipRecord",
127
+ "GroupingConfig",
128
+ "GroupingStrategy",
129
+ "GroupingSummary",
130
+ "GroupedTaskBase",
131
+ "InputSummary",
132
+ "LineageSummary",
133
+ "OfflineTaskPipeline",
134
+ "OfflineTaskPipelineResult",
135
+ "OpenAIBatchConfig",
136
+ "OpenAIBatchProvider",
137
+ "OpenAIBatchProviderError",
138
+ "OutputConfig",
139
+ "ParseSummary",
140
+ "ParsedRequestRecord",
141
+ "ProgrammaticBuilderBase",
142
+ "PromptAssetBundle",
143
+ "PromptAssetsConfig",
144
+ "ProviderCapabilities",
145
+ "ProviderConfig",
146
+ "ProviderKind",
147
+ "ProviderSelectionConfig",
148
+ "RawErrorRecord",
149
+ "RawOutputRecord",
150
+ "RawResultRecord",
151
+ "RequestRecord",
152
+ "RetryPolicyConfig",
153
+ "RunConfig",
154
+ "RunManifest",
155
+ "RunMetadataConfig",
156
+ "RunStatus",
157
+ "SingleTaskBase",
158
+ "SimpleTemplateBuilder",
159
+ "SourceFormat",
160
+ "SourceInputConfig",
161
+ "StructuredOutputParser",
162
+ "SUCCESSFUL_EXECUTION_STATUSES",
163
+ "TaskKind",
164
+ "TaskOrchestrator",
165
+ "TaskRunState",
166
+ "TERMINAL_EXECUTION_STATUSES",
167
+ "UnitRecord",
168
+ "ValidationSummary",
169
+ "CoverageValidationResult",
170
+ "artifact_path",
171
+ "artifact_ref",
172
+ "artifact_refs_for_run",
173
+ "artifact_relative_path",
174
+ "build_group_memberships",
175
+ "coverage_failures",
176
+ "default_run_id",
177
+ "derive_unit_id",
178
+ "get_artifact_definition",
179
+ "load_prompt_asset_text",
180
+ "load_prompt_assets",
181
+ "materialize_units",
182
+ "membership_map",
183
+ "normalize_execution_status",
184
+ "plan_fixed_size_groups",
185
+ "resolve_prompt_asset_path",
186
+ "run_directory",
187
+ "is_successful_execution_status",
188
+ "is_terminal_execution_status",
189
+ "validate_coverage",
190
+ "validate_group_coverage",
191
+ "validate_unique_unit_ids",
192
+ "__version__",
193
+ ]
@@ -0,0 +1,12 @@
1
+ from __future__ import annotations
2
+
3
+ from pydantic import BaseModel, ConfigDict
4
+
5
+
6
+ class FrameworkModel(BaseModel):
7
+ model_config = ConfigDict(
8
+ extra="forbid",
9
+ populate_by_name=True,
10
+ str_strip_whitespace=True,
11
+ validate_assignment=True,
12
+ )
@@ -0,0 +1,3 @@
1
+ """Package version metadata."""
2
+
3
+ __version__ = "0.1.0"
@@ -0,0 +1,23 @@
1
+ from .naming import (
2
+ ARTIFACT_REGISTRY,
3
+ ArtifactDefinition,
4
+ artifact_path,
5
+ artifact_ref,
6
+ artifact_refs_for_run,
7
+ artifact_relative_path,
8
+ get_artifact_definition,
9
+ run_directory,
10
+ )
11
+ from .local import LocalArtifactStore
12
+
13
+ __all__ = [
14
+ "ARTIFACT_REGISTRY",
15
+ "ArtifactDefinition",
16
+ "LocalArtifactStore",
17
+ "artifact_path",
18
+ "artifact_ref",
19
+ "artifact_refs_for_run",
20
+ "artifact_relative_path",
21
+ "get_artifact_definition",
22
+ "run_directory",
23
+ ]
@@ -0,0 +1,86 @@
1
+ """Filesystem-backed artifact store implementation."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from pathlib import Path
6
+
7
+ from ..configs.models import ArtifactStoreConfig
8
+ from ..contracts.base import ArtifactStore
9
+ from ..contracts.records import ArtifactRef
10
+ from ..enums import ArtifactFormat, ArtifactKind, ArtifactStoreKind
11
+ from ..manifests.models import RunManifest
12
+ from .naming import ARTIFACT_REGISTRY, artifact_path, artifact_ref, run_directory
13
+
14
+
15
+ class LocalArtifactStore(ArtifactStore):
16
+ """Persist run artifacts under a canonical local directory tree."""
17
+
18
+ def validate_config(self, config: ArtifactStoreConfig) -> None:
19
+ if config.kind is not ArtifactStoreKind.LOCAL:
20
+ msg = "LocalArtifactStore requires ArtifactStoreConfig.kind='local'"
21
+ raise ValueError(msg)
22
+
23
+ def run_path(self, run_id: str, config: ArtifactStoreConfig) -> Path:
24
+ self.validate_config(config)
25
+ return run_directory(run_id=run_id, runs_root=config.root_dir)
26
+
27
+ def artifact_path(self, run_id: str, artifact_kind: ArtifactKind, config: ArtifactStoreConfig) -> Path:
28
+ self.validate_config(config)
29
+ return artifact_path(run_id=run_id, artifact_kind=artifact_kind, runs_root=config.root_dir)
30
+
31
+ def initialize_run(self, run_id: str, config: ArtifactStoreConfig) -> Path:
32
+ run_path = self.run_path(run_id, config)
33
+ run_path.mkdir(parents=True, exist_ok=True)
34
+
35
+ for definition in ARTIFACT_REGISTRY.values():
36
+ (run_path / definition.relative_path.parent).mkdir(parents=True, exist_ok=True)
37
+
38
+ return run_path
39
+
40
+ def write_artifact(
41
+ self,
42
+ run_id: str,
43
+ artifact_kind: ArtifactKind,
44
+ content: str | bytes,
45
+ config: ArtifactStoreConfig,
46
+ ) -> ArtifactRef:
47
+ artifact_file = self.artifact_path(run_id, artifact_kind, config)
48
+ artifact_file.parent.mkdir(parents=True, exist_ok=True)
49
+
50
+ if isinstance(content, bytes):
51
+ artifact_file.write_bytes(content)
52
+ else:
53
+ artifact_file.write_text(content, encoding="utf-8")
54
+
55
+ return artifact_ref(artifact_kind)
56
+
57
+ def read_artifact(
58
+ self,
59
+ run_id: str,
60
+ artifact_kind: ArtifactKind,
61
+ config: ArtifactStoreConfig,
62
+ ) -> str | bytes:
63
+ artifact_file = self.artifact_path(run_id, artifact_kind, config)
64
+ artifact = artifact_ref(artifact_kind)
65
+
66
+ if artifact.format in {ArtifactFormat.JSON, ArtifactFormat.JSONL}:
67
+ return artifact_file.read_text(encoding="utf-8")
68
+ return artifact_file.read_bytes()
69
+
70
+ def resolve_artifact(
71
+ self,
72
+ run_id: str,
73
+ artifact_kind: ArtifactKind,
74
+ config: ArtifactStoreConfig,
75
+ ) -> ArtifactRef:
76
+ _ = self.artifact_path(run_id, artifact_kind, config)
77
+ return artifact_ref(artifact_kind)
78
+
79
+ def write_manifest(self, manifest: RunManifest, config: ArtifactStoreConfig) -> ArtifactRef:
80
+ self.initialize_run(manifest.run_id, config)
81
+ return self.write_artifact(
82
+ manifest.run_id,
83
+ ArtifactKind.MANIFEST,
84
+ manifest.model_dump_json(indent=2),
85
+ config,
86
+ )
@@ -0,0 +1,103 @@
1
+ from __future__ import annotations
2
+
3
+ from dataclasses import dataclass
4
+ from pathlib import Path, PurePosixPath
5
+
6
+ from ..contracts.records import ArtifactRef
7
+ from ..enums import ArtifactFormat, ArtifactKind
8
+
9
+
10
+ @dataclass(frozen=True)
11
+ class ArtifactDefinition:
12
+ artifact_kind: ArtifactKind
13
+ format: ArtifactFormat
14
+ relative_path: PurePosixPath
15
+
16
+
17
+ ARTIFACT_REGISTRY: dict[ArtifactKind, ArtifactDefinition] = {
18
+ ArtifactKind.RUN_CONFIG: ArtifactDefinition(
19
+ artifact_kind=ArtifactKind.RUN_CONFIG,
20
+ format=ArtifactFormat.JSON,
21
+ relative_path=PurePosixPath("config/run_config.json"),
22
+ ),
23
+ ArtifactKind.MANIFEST: ArtifactDefinition(
24
+ artifact_kind=ArtifactKind.MANIFEST,
25
+ format=ArtifactFormat.JSON,
26
+ relative_path=PurePosixPath("metadata/manifest.json"),
27
+ ),
28
+ ArtifactKind.SUMMARY: ArtifactDefinition(
29
+ artifact_kind=ArtifactKind.SUMMARY,
30
+ format=ArtifactFormat.JSON,
31
+ relative_path=PurePosixPath("metadata/summary.json"),
32
+ ),
33
+ ArtifactKind.UNITS: ArtifactDefinition(
34
+ artifact_kind=ArtifactKind.UNITS,
35
+ format=ArtifactFormat.JSONL,
36
+ relative_path=PurePosixPath("tables/units.jsonl"),
37
+ ),
38
+ ArtifactKind.GROUPS: ArtifactDefinition(
39
+ artifact_kind=ArtifactKind.GROUPS,
40
+ format=ArtifactFormat.JSONL,
41
+ relative_path=PurePosixPath("tables/groups.jsonl"),
42
+ ),
43
+ ArtifactKind.REQUESTS: ArtifactDefinition(
44
+ artifact_kind=ArtifactKind.REQUESTS,
45
+ format=ArtifactFormat.JSONL,
46
+ relative_path=PurePosixPath("tables/requests.jsonl"),
47
+ ),
48
+ ArtifactKind.RAW_OUTPUTS: ArtifactDefinition(
49
+ artifact_kind=ArtifactKind.RAW_OUTPUTS,
50
+ format=ArtifactFormat.JSONL,
51
+ relative_path=PurePosixPath("raw/raw_outputs.jsonl"),
52
+ ),
53
+ ArtifactKind.RAW_ERRORS: ArtifactDefinition(
54
+ artifact_kind=ArtifactKind.RAW_ERRORS,
55
+ format=ArtifactFormat.JSONL,
56
+ relative_path=PurePosixPath("raw/raw_errors.jsonl"),
57
+ ),
58
+ ArtifactKind.PARSED_REQUESTS: ArtifactDefinition(
59
+ artifact_kind=ArtifactKind.PARSED_REQUESTS,
60
+ format=ArtifactFormat.JSONL,
61
+ relative_path=PurePosixPath("parsed/parsed_requests.jsonl"),
62
+ ),
63
+ ArtifactKind.FLATTENED_ANNOTATIONS: ArtifactDefinition(
64
+ artifact_kind=ArtifactKind.FLATTENED_ANNOTATIONS,
65
+ format=ArtifactFormat.JSONL,
66
+ relative_path=PurePosixPath("parsed/flattened_annotations.jsonl"),
67
+ ),
68
+ ArtifactKind.FAILURES: ArtifactDefinition(
69
+ artifact_kind=ArtifactKind.FAILURES,
70
+ format=ArtifactFormat.JSONL,
71
+ relative_path=PurePosixPath("parsed/failures.jsonl"),
72
+ ),
73
+ }
74
+
75
+
76
+ def get_artifact_definition(artifact_kind: ArtifactKind) -> ArtifactDefinition:
77
+ return ARTIFACT_REGISTRY[artifact_kind]
78
+
79
+
80
+ def run_directory(run_id: str, runs_root: str | Path = "runs") -> Path:
81
+ return Path(runs_root) / run_id
82
+
83
+
84
+ def artifact_relative_path(artifact_kind: ArtifactKind) -> PurePosixPath:
85
+ return get_artifact_definition(artifact_kind).relative_path
86
+
87
+
88
+ def artifact_path(run_id: str, artifact_kind: ArtifactKind, runs_root: str | Path = "runs") -> Path:
89
+ return run_directory(run_id=run_id, runs_root=runs_root) / artifact_relative_path(artifact_kind)
90
+
91
+
92
+ def artifact_ref(artifact_kind: ArtifactKind) -> ArtifactRef:
93
+ definition = get_artifact_definition(artifact_kind)
94
+ return ArtifactRef(
95
+ artifact_kind=definition.artifact_kind,
96
+ format=definition.format,
97
+ relative_path=str(definition.relative_path),
98
+ )
99
+
100
+
101
+ def artifact_refs_for_run(run_id: str, runs_root: str | Path = "runs") -> dict[ArtifactKind, ArtifactRef]:
102
+ _ = run_directory(run_id=run_id, runs_root=runs_root)
103
+ return {artifact_kind: artifact_ref(artifact_kind) for artifact_kind in ARTIFACT_REGISTRY}