lightassay 0.3.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lightassay-0.3.0/LICENSE +21 -0
- lightassay-0.3.0/PKG-INFO +163 -0
- lightassay-0.3.0/README.md +134 -0
- lightassay-0.3.0/pyproject.toml +54 -0
- lightassay-0.3.0/setup.cfg +4 -0
- lightassay-0.3.0/src/lightassay/__init__.py +134 -0
- lightassay-0.3.0/src/lightassay/adapter_pack/__init__.py +295 -0
- lightassay-0.3.0/src/lightassay/adapter_pack/command.py +84 -0
- lightassay-0.3.0/src/lightassay/adapter_pack/http_driver.py +75 -0
- lightassay-0.3.0/src/lightassay/adapter_pack/python_callable.py +63 -0
- lightassay-0.3.0/src/lightassay/analyzer.py +287 -0
- lightassay-0.3.0/src/lightassay/backends.py +144 -0
- lightassay-0.3.0/src/lightassay/bootstrap.py +469 -0
- lightassay-0.3.0/src/lightassay/builtin_adapters/__init__.py +27 -0
- lightassay-0.3.0/src/lightassay/builtin_adapters/_agent_cli_common.py +281 -0
- lightassay-0.3.0/src/lightassay/builtin_adapters/claude_cli.py +29 -0
- lightassay-0.3.0/src/lightassay/builtin_adapters/codex_cli.py +28 -0
- lightassay-0.3.0/src/lightassay/builtin_adapters/stub.py +361 -0
- lightassay-0.3.0/src/lightassay/cli.py +1077 -0
- lightassay-0.3.0/src/lightassay/comparer.py +197 -0
- lightassay-0.3.0/src/lightassay/diagnostics.py +104 -0
- lightassay-0.3.0/src/lightassay/errors.py +94 -0
- lightassay-0.3.0/src/lightassay/expert.py +440 -0
- lightassay-0.3.0/src/lightassay/orchestrator.py +1219 -0
- lightassay-0.3.0/src/lightassay/preparation_config.py +109 -0
- lightassay-0.3.0/src/lightassay/preparer.py +1218 -0
- lightassay-0.3.0/src/lightassay/run_artifact_io.py +407 -0
- lightassay-0.3.0/src/lightassay/run_models.py +70 -0
- lightassay-0.3.0/src/lightassay/runner.py +298 -0
- lightassay-0.3.0/src/lightassay/runtime_state.py +240 -0
- lightassay-0.3.0/src/lightassay/semantic_config.py +102 -0
- lightassay-0.3.0/src/lightassay/surface.py +2635 -0
- lightassay-0.3.0/src/lightassay/types.py +319 -0
- lightassay-0.3.0/src/lightassay/workbook_models.py +151 -0
- lightassay-0.3.0/src/lightassay/workbook_parser.py +824 -0
- lightassay-0.3.0/src/lightassay/workbook_renderer.py +405 -0
- lightassay-0.3.0/src/lightassay/workflow_config.py +239 -0
- lightassay-0.3.0/src/lightassay/workflow_config_builder.py +141 -0
- lightassay-0.3.0/src/lightassay.egg-info/PKG-INFO +163 -0
- lightassay-0.3.0/src/lightassay.egg-info/SOURCES.txt +53 -0
- lightassay-0.3.0/src/lightassay.egg-info/dependency_links.txt +1 -0
- lightassay-0.3.0/src/lightassay.egg-info/entry_points.txt +2 -0
- lightassay-0.3.0/src/lightassay.egg-info/top_level.txt +1 -0
- lightassay-0.3.0/tests/test_adapter_pack.py +1160 -0
- lightassay-0.3.0/tests/test_analyze.py +687 -0
- lightassay-0.3.0/tests/test_cli_library_parity.py +612 -0
- lightassay-0.3.0/tests/test_compare.py +1097 -0
- lightassay-0.3.0/tests/test_diagnostics.py +1218 -0
- lightassay-0.3.0/tests/test_expert.py +768 -0
- lightassay-0.3.0/tests/test_preparation.py +1446 -0
- lightassay-0.3.0/tests/test_quickstart_continue.py +1024 -0
- lightassay-0.3.0/tests/test_run.py +1123 -0
- lightassay-0.3.0/tests/test_smoke.py +465 -0
- lightassay-0.3.0/tests/test_surface.py +1361 -0
- lightassay-0.3.0/tests/test_workbook.py +904 -0
lightassay-0.3.0/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Vadim Larin
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,163 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: lightassay
|
|
3
|
+
Version: 0.3.0
|
|
4
|
+
Summary: File-based orchestrator for structured evaluation of applied LLM workflows: humans declare intent, LLMs reason about quality, code runs and records raw facts
|
|
5
|
+
Author-email: Vadim Larin <vadimlarintech@gmail.com>
|
|
6
|
+
License-Expression: MIT
|
|
7
|
+
Project-URL: Homepage, https://github.com/vadimlarintech/lightassay
|
|
8
|
+
Project-URL: Repository, https://github.com/vadimlarintech/lightassay
|
|
9
|
+
Project-URL: Issues, https://github.com/vadimlarintech/lightassay/issues
|
|
10
|
+
Project-URL: Changelog, https://github.com/vadimlarintech/lightassay/blob/main/CHANGELOG.md
|
|
11
|
+
Keywords: llm,eval,evaluation,testing,workflow,ai,ai-agents
|
|
12
|
+
Classifier: Development Status :: 3 - Alpha
|
|
13
|
+
Classifier: Intended Audience :: Developers
|
|
14
|
+
Classifier: Operating System :: OS Independent
|
|
15
|
+
Classifier: Programming Language :: Python :: 3
|
|
16
|
+
Classifier: Programming Language :: Python :: 3 :: Only
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
20
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
21
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
22
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
23
|
+
Classifier: Topic :: Software Development :: Quality Assurance
|
|
24
|
+
Classifier: Topic :: Software Development :: Testing
|
|
25
|
+
Requires-Python: >=3.9
|
|
26
|
+
Description-Content-Type: text/markdown
|
|
27
|
+
License-File: LICENSE
|
|
28
|
+
Dynamic: license-file
|
|
29
|
+
|
|
30
|
+
# lightassay
|
|
31
|
+
|
|
32
|
+
`lightassay` is a simple first way to test an LLM workflow.
|
|
33
|
+
|
|
34
|
+
- You describe what worries you in plain language.
|
|
35
|
+
- Your agent, using the LLM access you already have, helps turn that into directions, test cases, and analysis.
|
|
36
|
+
- You do not need to build a formal eval system first.
|
|
37
|
+
- The code runs the workflow and records raw facts.
|
|
38
|
+
- The results are analyzed in terms that make sense to you.
|
|
39
|
+
|
|
40
|
+
---
|
|
41
|
+
|
|
42
|
+
## How it works
|
|
43
|
+
|
|
44
|
+
```
|
|
45
|
+
target → sources → intention → directions → cases → run → analysis → compare
|
|
46
|
+
```
|
|
47
|
+
|
|
48
|
+
Use it to check and compare:
|
|
49
|
+
|
|
50
|
+
- different LLM models or providers for the same workflow
|
|
51
|
+
- different workflow architectures around the same model
|
|
52
|
+
- structured and free-text LLM responses
|
|
53
|
+
- any applied LLM workflow where behavior quality matters
|
|
54
|
+
|
|
55
|
+
It keeps the process visible in normal files:
|
|
56
|
+
|
|
57
|
+
- workbook: Markdown
|
|
58
|
+
- run artifact: JSON
|
|
59
|
+
- analysis and compare: Markdown
|
|
60
|
+
|
|
61
|
+
---
|
|
62
|
+
|
|
63
|
+
## Quick start
|
|
64
|
+
|
|
65
|
+
```bash
|
|
66
|
+
python -m venv .venv
|
|
67
|
+
source .venv/bin/activate
|
|
68
|
+
pip install -e .
|
|
69
|
+
```
|
|
70
|
+
|
|
71
|
+
### One-shot `quickstart` (main self-serve entrypoint)
|
|
72
|
+
|
|
73
|
+
Start from one plain-language message. No hand-authored target,
|
|
74
|
+
preparation, semantic, or workflow config is required — use
|
|
75
|
+
`--backend` to pick the adapter bundle.
|
|
76
|
+
|
|
77
|
+
```bash
|
|
78
|
+
lightassay quickstart my-eval \
|
|
79
|
+
--message "Check myapp.pipeline.run. I care about obvious mistakes, over-correction, and preserving names and numbers." \
|
|
80
|
+
--target "myapp.pipeline.run" \
|
|
81
|
+
--backend claude-cli
|
|
82
|
+
```
|
|
83
|
+
|
|
84
|
+
Quickstart performs the full first pass end-to-end:
|
|
85
|
+
|
|
86
|
+
- resolves the target and execution shape,
|
|
87
|
+
- generates a small, high-signal suite,
|
|
88
|
+
- runs the workflow,
|
|
89
|
+
- writes the analysis artifact with structured next-step
|
|
90
|
+
recommendations (each answering "to ensure what?"),
|
|
91
|
+
- leaves a canonical workbook ready for further iterations.
|
|
92
|
+
|
|
93
|
+
List the built-in backends:
|
|
94
|
+
|
|
95
|
+
```bash
|
|
96
|
+
lightassay --list-backends
|
|
97
|
+
```
|
|
98
|
+
|
|
99
|
+
### Follow-up `continue`
|
|
100
|
+
|
|
101
|
+
```bash
|
|
102
|
+
# Add instructions in the workbook's "## Continue Next Run" block,
|
|
103
|
+
# or pass --message, or both.
|
|
104
|
+
lightassay continue --backend claude-cli --compare-previous
|
|
105
|
+
```
|
|
106
|
+
|
|
107
|
+
`continue` extends/refines directions + cases, runs again, analyzes
|
|
108
|
+
again, and — when `--compare-previous` is set — compares with the
|
|
109
|
+
prior run. The active workbook pointer (`.lightassay/active_workbook.json`)
|
|
110
|
+
is updated after each successful `quickstart` or `continue`, so
|
|
111
|
+
`continue` picks up the right workbook automatically.
|
|
112
|
+
|
|
113
|
+
### Library path
|
|
114
|
+
|
|
115
|
+
```python
|
|
116
|
+
from lightassay import quickstart, continue_workbook
|
|
117
|
+
|
|
118
|
+
result = quickstart(
|
|
119
|
+
"my-eval",
|
|
120
|
+
message="Check myapp.pipeline.run for preservation of names and numbers.",
|
|
121
|
+
target_hint="myapp.pipeline.run",
|
|
122
|
+
backend="claude-cli",
|
|
123
|
+
)
|
|
124
|
+
print(result.workbook_path, result.analysis_artifact_path, result.conclusion)
|
|
125
|
+
|
|
126
|
+
next_result = continue_workbook(
|
|
127
|
+
message="Also check edge cases around very short inputs.",
|
|
128
|
+
backend="claude-cli",
|
|
129
|
+
compare_previous=True,
|
|
130
|
+
)
|
|
131
|
+
```
|
|
132
|
+
|
|
133
|
+
For the earlier, explicit flow (init + prepare + run + analyze), see
|
|
134
|
+
[`docs/quickstart.md`](docs/quickstart.md).
|
|
135
|
+
|
|
136
|
+
For a runnable end-to-end example, see
|
|
137
|
+
[`examples/quickstart/`](examples/quickstart/).
|
|
138
|
+
|
|
139
|
+
The first-party Claude / Codex / stub adapters are packaged inside
|
|
140
|
+
`lightassay.builtin_adapters` and selected via `--backend <name>`. No
|
|
141
|
+
separate reference scripts are needed.
|
|
142
|
+
|
|
143
|
+
---
|
|
144
|
+
|
|
145
|
+
## Documentation
|
|
146
|
+
|
|
147
|
+
- [`quickstart.md`](docs/quickstart.md) — normal start path
|
|
148
|
+
- [`workbook_spec.md`](docs/workbook_spec.md) — workbook structure
|
|
149
|
+
- [`workflow_config_spec.md`](docs/workflow_config_spec.md) — workflow execution config
|
|
150
|
+
- [`semantic_adapter_spec.md`](docs/semantic_adapter_spec.md) — analysis and compare config
|
|
151
|
+
- [`code_architecture.md`](docs/code_architecture.md) — code structure
|
|
152
|
+
|
|
153
|
+
---
|
|
154
|
+
|
|
155
|
+
## Status
|
|
156
|
+
|
|
157
|
+
`0.3.0`
|
|
158
|
+
|
|
159
|
+
---
|
|
160
|
+
|
|
161
|
+
## License
|
|
162
|
+
|
|
163
|
+
MIT — see [`LICENSE`](LICENSE).
|
|
@@ -0,0 +1,134 @@
|
|
|
1
|
+
# lightassay
|
|
2
|
+
|
|
3
|
+
`lightassay` is a simple first way to test an LLM workflow.
|
|
4
|
+
|
|
5
|
+
- You describe what worries you in plain language.
|
|
6
|
+
- Your agent, using the LLM access you already have, helps turn that into directions, test cases, and analysis.
|
|
7
|
+
- You do not need to build a formal eval system first.
|
|
8
|
+
- The code runs the workflow and records raw facts.
|
|
9
|
+
- The results are analyzed in terms that make sense to you.
|
|
10
|
+
|
|
11
|
+
---
|
|
12
|
+
|
|
13
|
+
## How it works
|
|
14
|
+
|
|
15
|
+
```
|
|
16
|
+
target → sources → intention → directions → cases → run → analysis → compare
|
|
17
|
+
```
|
|
18
|
+
|
|
19
|
+
Use it to check and compare:
|
|
20
|
+
|
|
21
|
+
- different LLM models or providers for the same workflow
|
|
22
|
+
- different workflow architectures around the same model
|
|
23
|
+
- structured and free-text LLM responses
|
|
24
|
+
- any applied LLM workflow where behavior quality matters
|
|
25
|
+
|
|
26
|
+
It keeps the process visible in normal files:
|
|
27
|
+
|
|
28
|
+
- workbook: Markdown
|
|
29
|
+
- run artifact: JSON
|
|
30
|
+
- analysis and compare: Markdown
|
|
31
|
+
|
|
32
|
+
---
|
|
33
|
+
|
|
34
|
+
## Quick start
|
|
35
|
+
|
|
36
|
+
```bash
|
|
37
|
+
python -m venv .venv
|
|
38
|
+
source .venv/bin/activate
|
|
39
|
+
pip install -e .
|
|
40
|
+
```
|
|
41
|
+
|
|
42
|
+
### One-shot `quickstart` (main self-serve entrypoint)
|
|
43
|
+
|
|
44
|
+
Start from one plain-language message. No hand-authored target,
|
|
45
|
+
preparation, semantic, or workflow config is required — use
|
|
46
|
+
`--backend` to pick the adapter bundle.
|
|
47
|
+
|
|
48
|
+
```bash
|
|
49
|
+
lightassay quickstart my-eval \
|
|
50
|
+
--message "Check myapp.pipeline.run. I care about obvious mistakes, over-correction, and preserving names and numbers." \
|
|
51
|
+
--target "myapp.pipeline.run" \
|
|
52
|
+
--backend claude-cli
|
|
53
|
+
```
|
|
54
|
+
|
|
55
|
+
Quickstart performs the full first pass end-to-end:
|
|
56
|
+
|
|
57
|
+
- resolves the target and execution shape,
|
|
58
|
+
- generates a small, high-signal suite,
|
|
59
|
+
- runs the workflow,
|
|
60
|
+
- writes the analysis artifact with structured next-step
|
|
61
|
+
recommendations (each answering "to ensure what?"),
|
|
62
|
+
- leaves a canonical workbook ready for further iterations.
|
|
63
|
+
|
|
64
|
+
List the built-in backends:
|
|
65
|
+
|
|
66
|
+
```bash
|
|
67
|
+
lightassay --list-backends
|
|
68
|
+
```
|
|
69
|
+
|
|
70
|
+
### Follow-up `continue`
|
|
71
|
+
|
|
72
|
+
```bash
|
|
73
|
+
# Add instructions in the workbook's "## Continue Next Run" block,
|
|
74
|
+
# or pass --message, or both.
|
|
75
|
+
lightassay continue --backend claude-cli --compare-previous
|
|
76
|
+
```
|
|
77
|
+
|
|
78
|
+
`continue` extends/refines directions + cases, runs again, analyzes
|
|
79
|
+
again, and — when `--compare-previous` is set — compares with the
|
|
80
|
+
prior run. The active workbook pointer (`.lightassay/active_workbook.json`)
|
|
81
|
+
is updated after each successful `quickstart` or `continue`, so
|
|
82
|
+
`continue` picks up the right workbook automatically.
|
|
83
|
+
|
|
84
|
+
### Library path
|
|
85
|
+
|
|
86
|
+
```python
|
|
87
|
+
from lightassay import quickstart, continue_workbook
|
|
88
|
+
|
|
89
|
+
result = quickstart(
|
|
90
|
+
"my-eval",
|
|
91
|
+
message="Check myapp.pipeline.run for preservation of names and numbers.",
|
|
92
|
+
target_hint="myapp.pipeline.run",
|
|
93
|
+
backend="claude-cli",
|
|
94
|
+
)
|
|
95
|
+
print(result.workbook_path, result.analysis_artifact_path, result.conclusion)
|
|
96
|
+
|
|
97
|
+
next_result = continue_workbook(
|
|
98
|
+
message="Also check edge cases around very short inputs.",
|
|
99
|
+
backend="claude-cli",
|
|
100
|
+
compare_previous=True,
|
|
101
|
+
)
|
|
102
|
+
```
|
|
103
|
+
|
|
104
|
+
For the earlier, explicit flow (init + prepare + run + analyze), see
|
|
105
|
+
[`docs/quickstart.md`](docs/quickstart.md).
|
|
106
|
+
|
|
107
|
+
For a runnable end-to-end example, see
|
|
108
|
+
[`examples/quickstart/`](examples/quickstart/).
|
|
109
|
+
|
|
110
|
+
The first-party Claude / Codex / stub adapters are packaged inside
|
|
111
|
+
`lightassay.builtin_adapters` and selected via `--backend <name>`. No
|
|
112
|
+
separate reference scripts are needed.
|
|
113
|
+
|
|
114
|
+
---
|
|
115
|
+
|
|
116
|
+
## Documentation
|
|
117
|
+
|
|
118
|
+
- [`quickstart.md`](docs/quickstart.md) — normal start path
|
|
119
|
+
- [`workbook_spec.md`](docs/workbook_spec.md) — workbook structure
|
|
120
|
+
- [`workflow_config_spec.md`](docs/workflow_config_spec.md) — workflow execution config
|
|
121
|
+
- [`semantic_adapter_spec.md`](docs/semantic_adapter_spec.md) — analysis and compare config
|
|
122
|
+
- [`code_architecture.md`](docs/code_architecture.md) — code structure
|
|
123
|
+
|
|
124
|
+
---
|
|
125
|
+
|
|
126
|
+
## Status
|
|
127
|
+
|
|
128
|
+
`0.3.0`
|
|
129
|
+
|
|
130
|
+
---
|
|
131
|
+
|
|
132
|
+
## License
|
|
133
|
+
|
|
134
|
+
MIT — see [`LICENSE`](LICENSE).
|
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=77"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "lightassay"
|
|
7
|
+
version = "0.3.0"
|
|
8
|
+
description = "File-based orchestrator for structured evaluation of applied LLM workflows: humans declare intent, LLMs reason about quality, code runs and records raw facts"
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
requires-python = ">=3.9"
|
|
11
|
+
license = "MIT"
|
|
12
|
+
license-files = ["LICENSE"]
|
|
13
|
+
authors = [
|
|
14
|
+
{ name = "Vadim Larin", email = "vadimlarintech@gmail.com" },
|
|
15
|
+
]
|
|
16
|
+
keywords = ["llm", "eval", "evaluation", "testing", "workflow", "ai", "ai-agents"]
|
|
17
|
+
classifiers = [
|
|
18
|
+
"Development Status :: 3 - Alpha",
|
|
19
|
+
"Intended Audience :: Developers",
|
|
20
|
+
"Operating System :: OS Independent",
|
|
21
|
+
"Programming Language :: Python :: 3",
|
|
22
|
+
"Programming Language :: Python :: 3 :: Only",
|
|
23
|
+
"Programming Language :: Python :: 3.9",
|
|
24
|
+
"Programming Language :: Python :: 3.10",
|
|
25
|
+
"Programming Language :: Python :: 3.11",
|
|
26
|
+
"Programming Language :: Python :: 3.12",
|
|
27
|
+
"Programming Language :: Python :: 3.13",
|
|
28
|
+
"Topic :: Scientific/Engineering :: Artificial Intelligence",
|
|
29
|
+
"Topic :: Software Development :: Quality Assurance",
|
|
30
|
+
"Topic :: Software Development :: Testing",
|
|
31
|
+
]
|
|
32
|
+
|
|
33
|
+
[project.urls]
|
|
34
|
+
Homepage = "https://github.com/vadimlarintech/lightassay"
|
|
35
|
+
Repository = "https://github.com/vadimlarintech/lightassay"
|
|
36
|
+
Issues = "https://github.com/vadimlarintech/lightassay/issues"
|
|
37
|
+
Changelog = "https://github.com/vadimlarintech/lightassay/blob/main/CHANGELOG.md"
|
|
38
|
+
|
|
39
|
+
[project.scripts]
|
|
40
|
+
lightassay = "lightassay.cli:main"
|
|
41
|
+
|
|
42
|
+
[tool.setuptools.packages.find]
|
|
43
|
+
where = ["src"]
|
|
44
|
+
|
|
45
|
+
[tool.ruff]
|
|
46
|
+
line-length = 100
|
|
47
|
+
target-version = "py39"
|
|
48
|
+
|
|
49
|
+
[tool.ruff.lint]
|
|
50
|
+
select = ["E", "F", "W", "I", "UP", "B"]
|
|
51
|
+
|
|
52
|
+
[tool.ruff.lint.per-file-ignores]
|
|
53
|
+
"tests/*" = ["E501", "E402"]
|
|
54
|
+
"tests/fixtures/*" = ["E501", "UP"]
|
|
@@ -0,0 +1,134 @@
|
|
|
1
|
+
"""lightassay: file-based orchestrator for structured evaluation of applied LLM workflows.
|
|
2
|
+
|
|
3
|
+
One rule runs through the whole design: humans declare intent, LLMs do the
|
|
4
|
+
semantic reasoning, code orchestrates execution and measures raw facts — and
|
|
5
|
+
never judges output quality. The workbook (markdown), run artifact (JSON),
|
|
6
|
+
and analysis/compare artifacts (markdown) are the source of truth; the
|
|
7
|
+
library is an orchestrator around them.
|
|
8
|
+
|
|
9
|
+
The ordinary public entrypoint is the L1 library surface. Start here::
|
|
10
|
+
|
|
11
|
+
from lightassay import (
|
|
12
|
+
open_session,
|
|
13
|
+
init_workbook,
|
|
14
|
+
quick_try,
|
|
15
|
+
quick_try_workbook,
|
|
16
|
+
refine_workbook,
|
|
17
|
+
explore_workbook,
|
|
18
|
+
compare_runs,
|
|
19
|
+
)
|
|
20
|
+
|
|
21
|
+
# Create a workbook (or use an existing one).
|
|
22
|
+
wb_path = init_workbook("my-eval", output_dir=".")
|
|
23
|
+
|
|
24
|
+
# Or run a one-shot quick try to see the full workbook shape.
|
|
25
|
+
quick = quick_try(
|
|
26
|
+
"my-quick-try",
|
|
27
|
+
target=EvalTarget(
|
|
28
|
+
kind="workflow",
|
|
29
|
+
name="summarize",
|
|
30
|
+
locator="myapp.pipeline.run",
|
|
31
|
+
boundary="high-level pipeline boundary",
|
|
32
|
+
sources=["myapp/pipeline.py", "myapp/prompts/summarize.py"],
|
|
33
|
+
),
|
|
34
|
+
user_request="Check how the pipeline handles obvious failures without over-correcting.",
|
|
35
|
+
preparation_config="prep.json",
|
|
36
|
+
output_dir=".",
|
|
37
|
+
)
|
|
38
|
+
|
|
39
|
+
# Open a session.
|
|
40
|
+
session = open_session(
|
|
41
|
+
wb_path,
|
|
42
|
+
preparation_config="prep.json",
|
|
43
|
+
workflow_config="wf.json",
|
|
44
|
+
semantic_config="sem.json",
|
|
45
|
+
)
|
|
46
|
+
|
|
47
|
+
# Inspect state, prepare, run, analyze.
|
|
48
|
+
state = session.state()
|
|
49
|
+
result = session.prepare()
|
|
50
|
+
...
|
|
51
|
+
|
|
52
|
+
# Compare runs (no session/workbook required).
|
|
53
|
+
compare_result = compare_runs(
|
|
54
|
+
["run_a.json", "run_b.json"],
|
|
55
|
+
semantic_config="sem.json",
|
|
56
|
+
)
|
|
57
|
+
|
|
58
|
+
Deeper engine internals are not part of the ordinary L1 surface.
|
|
59
|
+
Use ``open_diagnostics()`` on a session to enter the L2
|
|
60
|
+
diagnostics/recovery layer with structured reports, evidence, and
|
|
61
|
+
bounded recovery actions. The ``DiagnosticsHandle`` type returned
|
|
62
|
+
by ``open_diagnostics()`` lives in ``lightassay.types`` but
|
|
63
|
+
is not part of the ordinary top-level export set. L2 detail types
|
|
64
|
+
live in ``lightassay.diagnostics``.
|
|
65
|
+
|
|
66
|
+
For deep inspection and bounded low-level control, escalate from
|
|
67
|
+
L2 to L3 via ``diag.open_expert()``. L3 types live in
|
|
68
|
+
``lightassay.expert``.
|
|
69
|
+
"""
|
|
70
|
+
|
|
71
|
+
__version__ = "0.3.0"
|
|
72
|
+
|
|
73
|
+
# L1 public surface ──────────────────────────────────────────────────────────
|
|
74
|
+
|
|
75
|
+
from .errors import EvalError
|
|
76
|
+
from .surface import (
|
|
77
|
+
EvalSession,
|
|
78
|
+
compare_runs,
|
|
79
|
+
continue_workbook,
|
|
80
|
+
explore_workbook,
|
|
81
|
+
init_workbook,
|
|
82
|
+
list_backends,
|
|
83
|
+
open_session,
|
|
84
|
+
quick_try,
|
|
85
|
+
quick_try_workbook,
|
|
86
|
+
quickstart,
|
|
87
|
+
refine_workbook,
|
|
88
|
+
)
|
|
89
|
+
from .types import (
|
|
90
|
+
AnalyzeResult,
|
|
91
|
+
CompareResult,
|
|
92
|
+
ContinueResult,
|
|
93
|
+
EvalState,
|
|
94
|
+
EvalTarget,
|
|
95
|
+
ExploreResult,
|
|
96
|
+
PreparationStage,
|
|
97
|
+
PrepareResult,
|
|
98
|
+
QuickstartResult,
|
|
99
|
+
QuickTryResult,
|
|
100
|
+
RefineResult,
|
|
101
|
+
RunResult,
|
|
102
|
+
)
|
|
103
|
+
|
|
104
|
+
__all__ = [
|
|
105
|
+
# Version
|
|
106
|
+
"__version__",
|
|
107
|
+
# L1 control
|
|
108
|
+
"open_session",
|
|
109
|
+
"init_workbook",
|
|
110
|
+
"quick_try",
|
|
111
|
+
"quick_try_workbook",
|
|
112
|
+
"refine_workbook",
|
|
113
|
+
"explore_workbook",
|
|
114
|
+
"compare_runs",
|
|
115
|
+
"quickstart",
|
|
116
|
+
"continue_workbook",
|
|
117
|
+
"list_backends",
|
|
118
|
+
"EvalSession",
|
|
119
|
+
# L1 types
|
|
120
|
+
"EvalTarget",
|
|
121
|
+
"EvalState",
|
|
122
|
+
"ExploreResult",
|
|
123
|
+
"PreparationStage",
|
|
124
|
+
"PrepareResult",
|
|
125
|
+
"QuickstartResult",
|
|
126
|
+
"QuickTryResult",
|
|
127
|
+
"ContinueResult",
|
|
128
|
+
"RefineResult",
|
|
129
|
+
"RunResult",
|
|
130
|
+
"AnalyzeResult",
|
|
131
|
+
"CompareResult",
|
|
132
|
+
# L1 error boundary
|
|
133
|
+
"EvalError",
|
|
134
|
+
]
|