lightassay 0.3.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. lightassay-0.3.0/LICENSE +21 -0
  2. lightassay-0.3.0/PKG-INFO +163 -0
  3. lightassay-0.3.0/README.md +134 -0
  4. lightassay-0.3.0/pyproject.toml +54 -0
  5. lightassay-0.3.0/setup.cfg +4 -0
  6. lightassay-0.3.0/src/lightassay/__init__.py +134 -0
  7. lightassay-0.3.0/src/lightassay/adapter_pack/__init__.py +295 -0
  8. lightassay-0.3.0/src/lightassay/adapter_pack/command.py +84 -0
  9. lightassay-0.3.0/src/lightassay/adapter_pack/http_driver.py +75 -0
  10. lightassay-0.3.0/src/lightassay/adapter_pack/python_callable.py +63 -0
  11. lightassay-0.3.0/src/lightassay/analyzer.py +287 -0
  12. lightassay-0.3.0/src/lightassay/backends.py +144 -0
  13. lightassay-0.3.0/src/lightassay/bootstrap.py +469 -0
  14. lightassay-0.3.0/src/lightassay/builtin_adapters/__init__.py +27 -0
  15. lightassay-0.3.0/src/lightassay/builtin_adapters/_agent_cli_common.py +281 -0
  16. lightassay-0.3.0/src/lightassay/builtin_adapters/claude_cli.py +29 -0
  17. lightassay-0.3.0/src/lightassay/builtin_adapters/codex_cli.py +28 -0
  18. lightassay-0.3.0/src/lightassay/builtin_adapters/stub.py +361 -0
  19. lightassay-0.3.0/src/lightassay/cli.py +1077 -0
  20. lightassay-0.3.0/src/lightassay/comparer.py +197 -0
  21. lightassay-0.3.0/src/lightassay/diagnostics.py +104 -0
  22. lightassay-0.3.0/src/lightassay/errors.py +94 -0
  23. lightassay-0.3.0/src/lightassay/expert.py +440 -0
  24. lightassay-0.3.0/src/lightassay/orchestrator.py +1219 -0
  25. lightassay-0.3.0/src/lightassay/preparation_config.py +109 -0
  26. lightassay-0.3.0/src/lightassay/preparer.py +1218 -0
  27. lightassay-0.3.0/src/lightassay/run_artifact_io.py +407 -0
  28. lightassay-0.3.0/src/lightassay/run_models.py +70 -0
  29. lightassay-0.3.0/src/lightassay/runner.py +298 -0
  30. lightassay-0.3.0/src/lightassay/runtime_state.py +240 -0
  31. lightassay-0.3.0/src/lightassay/semantic_config.py +102 -0
  32. lightassay-0.3.0/src/lightassay/surface.py +2635 -0
  33. lightassay-0.3.0/src/lightassay/types.py +319 -0
  34. lightassay-0.3.0/src/lightassay/workbook_models.py +151 -0
  35. lightassay-0.3.0/src/lightassay/workbook_parser.py +824 -0
  36. lightassay-0.3.0/src/lightassay/workbook_renderer.py +405 -0
  37. lightassay-0.3.0/src/lightassay/workflow_config.py +239 -0
  38. lightassay-0.3.0/src/lightassay/workflow_config_builder.py +141 -0
  39. lightassay-0.3.0/src/lightassay.egg-info/PKG-INFO +163 -0
  40. lightassay-0.3.0/src/lightassay.egg-info/SOURCES.txt +53 -0
  41. lightassay-0.3.0/src/lightassay.egg-info/dependency_links.txt +1 -0
  42. lightassay-0.3.0/src/lightassay.egg-info/entry_points.txt +2 -0
  43. lightassay-0.3.0/src/lightassay.egg-info/top_level.txt +1 -0
  44. lightassay-0.3.0/tests/test_adapter_pack.py +1160 -0
  45. lightassay-0.3.0/tests/test_analyze.py +687 -0
  46. lightassay-0.3.0/tests/test_cli_library_parity.py +612 -0
  47. lightassay-0.3.0/tests/test_compare.py +1097 -0
  48. lightassay-0.3.0/tests/test_diagnostics.py +1218 -0
  49. lightassay-0.3.0/tests/test_expert.py +768 -0
  50. lightassay-0.3.0/tests/test_preparation.py +1446 -0
  51. lightassay-0.3.0/tests/test_quickstart_continue.py +1024 -0
  52. lightassay-0.3.0/tests/test_run.py +1123 -0
  53. lightassay-0.3.0/tests/test_smoke.py +465 -0
  54. lightassay-0.3.0/tests/test_surface.py +1361 -0
  55. lightassay-0.3.0/tests/test_workbook.py +904 -0
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Vadim Larin
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,163 @@
1
+ Metadata-Version: 2.4
2
+ Name: lightassay
3
+ Version: 0.3.0
4
+ Summary: File-based orchestrator for structured evaluation of applied LLM workflows: humans declare intent, LLMs reason about quality, code runs and records raw facts
5
+ Author-email: Vadim Larin <vadimlarintech@gmail.com>
6
+ License-Expression: MIT
7
+ Project-URL: Homepage, https://github.com/vadimlarintech/lightassay
8
+ Project-URL: Repository, https://github.com/vadimlarintech/lightassay
9
+ Project-URL: Issues, https://github.com/vadimlarintech/lightassay/issues
10
+ Project-URL: Changelog, https://github.com/vadimlarintech/lightassay/blob/main/CHANGELOG.md
11
+ Keywords: llm,eval,evaluation,testing,workflow,ai,ai-agents
12
+ Classifier: Development Status :: 3 - Alpha
13
+ Classifier: Intended Audience :: Developers
14
+ Classifier: Operating System :: OS Independent
15
+ Classifier: Programming Language :: Python :: 3
16
+ Classifier: Programming Language :: Python :: 3 :: Only
17
+ Classifier: Programming Language :: Python :: 3.9
18
+ Classifier: Programming Language :: Python :: 3.10
19
+ Classifier: Programming Language :: Python :: 3.11
20
+ Classifier: Programming Language :: Python :: 3.12
21
+ Classifier: Programming Language :: Python :: 3.13
22
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
23
+ Classifier: Topic :: Software Development :: Quality Assurance
24
+ Classifier: Topic :: Software Development :: Testing
25
+ Requires-Python: >=3.9
26
+ Description-Content-Type: text/markdown
27
+ License-File: LICENSE
28
+ Dynamic: license-file
29
+
30
+ # lightassay
31
+
32
+ `lightassay` is a simple first way to test an LLM workflow.
33
+
34
+ - You describe what worries you in plain language.
35
+ - Your agent, using the LLM access you already have, helps turn that into directions, test cases, and analysis.
36
+ - You do not need to build a formal eval system first.
37
+ - The code runs the workflow and records raw facts.
38
+ - The results are analyzed in terms that make sense to you.
39
+
40
+ ---
41
+
42
+ ## How it works
43
+
44
+ ```
45
+ target → sources → intention → directions → cases → run → analysis → compare
46
+ ```
47
+
48
+ Use it to check and compare:
49
+
50
+ - different LLM models or providers for the same workflow
51
+ - different workflow architectures around the same model
52
+ - structured and free-text LLM responses
53
+ - any applied LLM workflow where behavior quality matters
54
+
55
+ It keeps the process visible in normal files:
56
+
57
+ - workbook: Markdown
58
+ - run artifact: JSON
59
+ - analysis and compare: Markdown
60
+
61
+ ---
62
+
63
+ ## Quick start
64
+
65
+ ```bash
66
+ python -m venv .venv
67
+ source .venv/bin/activate
68
+ pip install -e .
69
+ ```
70
+
71
+ ### One-shot `quickstart` (main self-serve entrypoint)
72
+
73
+ Start from one plain-language message. No hand-authored target,
74
+ preparation, semantic, or workflow config is required — use
75
+ `--backend` to pick the adapter bundle.
76
+
77
+ ```bash
78
+ lightassay quickstart my-eval \
79
+ --message "Check myapp.pipeline.run. I care about obvious mistakes, over-correction, and preserving names and numbers." \
80
+ --target "myapp.pipeline.run" \
81
+ --backend claude-cli
82
+ ```
83
+
84
+ Quickstart performs the full first pass end-to-end:
85
+
86
+ - resolves the target and execution shape,
87
+ - generates a small, high-signal suite,
88
+ - runs the workflow,
89
+ - writes the analysis artifact with structured next-step
90
+ recommendations (each answering "to ensure what?"),
91
+ - leaves a canonical workbook ready for further iterations.
92
+
93
+ List the built-in backends:
94
+
95
+ ```bash
96
+ lightassay --list-backends
97
+ ```
98
+
99
+ ### Follow-up `continue`
100
+
101
+ ```bash
102
+ # Add instructions in the workbook's "## Continue Next Run" block,
103
+ # or pass --message, or both.
104
+ lightassay continue --backend claude-cli --compare-previous
105
+ ```
106
+
107
+ `continue` extends/refines directions + cases, runs again, analyzes
108
+ again, and — when `--compare-previous` is set — compares with the
109
+ prior run. The active workbook pointer (`.lightassay/active_workbook.json`)
110
+ is updated after each successful `quickstart` or `continue`, so
111
+ `continue` picks up the right workbook automatically.
112
+
113
+ ### Library path
114
+
115
+ ```python
116
+ from lightassay import quickstart, continue_workbook
117
+
118
+ result = quickstart(
119
+ "my-eval",
120
+ message="Check myapp.pipeline.run for preservation of names and numbers.",
121
+ target_hint="myapp.pipeline.run",
122
+ backend="claude-cli",
123
+ )
124
+ print(result.workbook_path, result.analysis_artifact_path, result.conclusion)
125
+
126
+ next_result = continue_workbook(
127
+ message="Also check edge cases around very short inputs.",
128
+ backend="claude-cli",
129
+ compare_previous=True,
130
+ )
131
+ ```
132
+
133
+ For the earlier, explicit flow (init + prepare + run + analyze), see
134
+ [`docs/quickstart.md`](docs/quickstart.md).
135
+
136
+ For a runnable end-to-end example, see
137
+ [`examples/quickstart/`](examples/quickstart/).
138
+
139
+ The first-party Claude / Codex / stub adapters are packaged inside
140
+ `lightassay.builtin_adapters` and selected via `--backend <name>`. No
141
+ separate reference scripts are needed.
142
+
143
+ ---
144
+
145
+ ## Documentation
146
+
147
+ - [`quickstart.md`](docs/quickstart.md) — normal start path
148
+ - [`workbook_spec.md`](docs/workbook_spec.md) — workbook structure
149
+ - [`workflow_config_spec.md`](docs/workflow_config_spec.md) — workflow execution config
150
+ - [`semantic_adapter_spec.md`](docs/semantic_adapter_spec.md) — analysis and compare config
151
+ - [`code_architecture.md`](docs/code_architecture.md) — code structure
152
+
153
+ ---
154
+
155
+ ## Status
156
+
157
+ `0.3.0`
158
+
159
+ ---
160
+
161
+ ## License
162
+
163
+ MIT — see [`LICENSE`](LICENSE).
@@ -0,0 +1,134 @@
1
+ # lightassay
2
+
3
+ `lightassay` is a simple first way to test an LLM workflow.
4
+
5
+ - You describe what worries you in plain language.
6
+ - Your agent, using the LLM access you already have, helps turn that into directions, test cases, and analysis.
7
+ - You do not need to build a formal eval system first.
8
+ - The code runs the workflow and records raw facts.
9
+ - The results are analyzed in terms that make sense to you.
10
+
11
+ ---
12
+
13
+ ## How it works
14
+
15
+ ```
16
+ target → sources → intention → directions → cases → run → analysis → compare
17
+ ```
18
+
19
+ Use it to check and compare:
20
+
21
+ - different LLM models or providers for the same workflow
22
+ - different workflow architectures around the same model
23
+ - structured and free-text LLM responses
24
+ - any applied LLM workflow where behavior quality matters
25
+
26
+ It keeps the process visible in normal files:
27
+
28
+ - workbook: Markdown
29
+ - run artifact: JSON
30
+ - analysis and compare: Markdown
31
+
32
+ ---
33
+
34
+ ## Quick start
35
+
36
+ ```bash
37
+ python -m venv .venv
38
+ source .venv/bin/activate
39
+ pip install -e .
40
+ ```
41
+
42
+ ### One-shot `quickstart` (main self-serve entrypoint)
43
+
44
+ Start from one plain-language message. No hand-authored target,
45
+ preparation, semantic, or workflow config is required — use
46
+ `--backend` to pick the adapter bundle.
47
+
48
+ ```bash
49
+ lightassay quickstart my-eval \
50
+ --message "Check myapp.pipeline.run. I care about obvious mistakes, over-correction, and preserving names and numbers." \
51
+ --target "myapp.pipeline.run" \
52
+ --backend claude-cli
53
+ ```
54
+
55
+ Quickstart performs the full first pass end-to-end:
56
+
57
+ - resolves the target and execution shape,
58
+ - generates a small, high-signal suite,
59
+ - runs the workflow,
60
+ - writes the analysis artifact with structured next-step
61
+ recommendations (each answering "to ensure what?"),
62
+ - leaves a canonical workbook ready for further iterations.
63
+
64
+ List the built-in backends:
65
+
66
+ ```bash
67
+ lightassay --list-backends
68
+ ```
69
+
70
+ ### Follow-up `continue`
71
+
72
+ ```bash
73
+ # Add instructions in the workbook's "## Continue Next Run" block,
74
+ # or pass --message, or both.
75
+ lightassay continue --backend claude-cli --compare-previous
76
+ ```
77
+
78
+ `continue` extends/refines directions + cases, runs again, analyzes
79
+ again, and — when `--compare-previous` is set — compares with the
80
+ prior run. The active workbook pointer (`.lightassay/active_workbook.json`)
81
+ is updated after each successful `quickstart` or `continue`, so
82
+ `continue` picks up the right workbook automatically.
83
+
84
+ ### Library path
85
+
86
+ ```python
87
+ from lightassay import quickstart, continue_workbook
88
+
89
+ result = quickstart(
90
+ "my-eval",
91
+ message="Check myapp.pipeline.run for preservation of names and numbers.",
92
+ target_hint="myapp.pipeline.run",
93
+ backend="claude-cli",
94
+ )
95
+ print(result.workbook_path, result.analysis_artifact_path, result.conclusion)
96
+
97
+ next_result = continue_workbook(
98
+ message="Also check edge cases around very short inputs.",
99
+ backend="claude-cli",
100
+ compare_previous=True,
101
+ )
102
+ ```
103
+
104
+ For the earlier, explicit flow (init + prepare + run + analyze), see
105
+ [`docs/quickstart.md`](docs/quickstart.md).
106
+
107
+ For a runnable end-to-end example, see
108
+ [`examples/quickstart/`](examples/quickstart/).
109
+
110
+ The first-party Claude / Codex / stub adapters are packaged inside
111
+ `lightassay.builtin_adapters` and selected via `--backend <name>`. No
112
+ separate reference scripts are needed.
113
+
114
+ ---
115
+
116
+ ## Documentation
117
+
118
+ - [`quickstart.md`](docs/quickstart.md) — normal start path
119
+ - [`workbook_spec.md`](docs/workbook_spec.md) — workbook structure
120
+ - [`workflow_config_spec.md`](docs/workflow_config_spec.md) — workflow execution config
121
+ - [`semantic_adapter_spec.md`](docs/semantic_adapter_spec.md) — analysis and compare config
122
+ - [`code_architecture.md`](docs/code_architecture.md) — code structure
123
+
124
+ ---
125
+
126
+ ## Status
127
+
128
+ `0.3.0`
129
+
130
+ ---
131
+
132
+ ## License
133
+
134
+ MIT — see [`LICENSE`](LICENSE).
@@ -0,0 +1,54 @@
1
+ [build-system]
2
+ requires = ["setuptools>=77"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "lightassay"
7
+ version = "0.3.0"
8
+ description = "File-based orchestrator for structured evaluation of applied LLM workflows: humans declare intent, LLMs reason about quality, code runs and records raw facts"
9
+ readme = "README.md"
10
+ requires-python = ">=3.9"
11
+ license = "MIT"
12
+ license-files = ["LICENSE"]
13
+ authors = [
14
+ { name = "Vadim Larin", email = "vadimlarintech@gmail.com" },
15
+ ]
16
+ keywords = ["llm", "eval", "evaluation", "testing", "workflow", "ai", "ai-agents"]
17
+ classifiers = [
18
+ "Development Status :: 3 - Alpha",
19
+ "Intended Audience :: Developers",
20
+ "Operating System :: OS Independent",
21
+ "Programming Language :: Python :: 3",
22
+ "Programming Language :: Python :: 3 :: Only",
23
+ "Programming Language :: Python :: 3.9",
24
+ "Programming Language :: Python :: 3.10",
25
+ "Programming Language :: Python :: 3.11",
26
+ "Programming Language :: Python :: 3.12",
27
+ "Programming Language :: Python :: 3.13",
28
+ "Topic :: Scientific/Engineering :: Artificial Intelligence",
29
+ "Topic :: Software Development :: Quality Assurance",
30
+ "Topic :: Software Development :: Testing",
31
+ ]
32
+
33
+ [project.urls]
34
+ Homepage = "https://github.com/vadimlarintech/lightassay"
35
+ Repository = "https://github.com/vadimlarintech/lightassay"
36
+ Issues = "https://github.com/vadimlarintech/lightassay/issues"
37
+ Changelog = "https://github.com/vadimlarintech/lightassay/blob/main/CHANGELOG.md"
38
+
39
+ [project.scripts]
40
+ lightassay = "lightassay.cli:main"
41
+
42
+ [tool.setuptools.packages.find]
43
+ where = ["src"]
44
+
45
+ [tool.ruff]
46
+ line-length = 100
47
+ target-version = "py39"
48
+
49
+ [tool.ruff.lint]
50
+ select = ["E", "F", "W", "I", "UP", "B"]
51
+
52
+ [tool.ruff.lint.per-file-ignores]
53
+ "tests/*" = ["E501", "E402"]
54
+ "tests/fixtures/*" = ["E501", "UP"]
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
@@ -0,0 +1,134 @@
1
+ """lightassay: file-based orchestrator for structured evaluation of applied LLM workflows.
2
+
3
+ One rule runs through the whole design: humans declare intent, LLMs do the
4
+ semantic reasoning, code orchestrates execution and measures raw facts — and
5
+ never judges output quality. The workbook (markdown), run artifact (JSON),
6
+ and analysis/compare artifacts (markdown) are the source of truth; the
7
+ library is an orchestrator around them.
8
+
9
+ The ordinary public entrypoint is the L1 library surface. Start here::
10
+
11
+ from lightassay import (
12
+ open_session,
13
+ init_workbook,
14
+ quick_try,
15
+ quick_try_workbook,
16
+ refine_workbook,
17
+ explore_workbook,
18
+ compare_runs,
19
+ )
20
+
21
+ # Create a workbook (or use an existing one).
22
+ wb_path = init_workbook("my-eval", output_dir=".")
23
+
24
+ # Or run a one-shot quick try to see the full workbook shape.
25
+ quick = quick_try(
26
+ "my-quick-try",
27
+ target=EvalTarget(
28
+ kind="workflow",
29
+ name="summarize",
30
+ locator="myapp.pipeline.run",
31
+ boundary="high-level pipeline boundary",
32
+ sources=["myapp/pipeline.py", "myapp/prompts/summarize.py"],
33
+ ),
34
+ user_request="Check how the pipeline handles obvious failures without over-correcting.",
35
+ preparation_config="prep.json",
36
+ output_dir=".",
37
+ )
38
+
39
+ # Open a session.
40
+ session = open_session(
41
+ wb_path,
42
+ preparation_config="prep.json",
43
+ workflow_config="wf.json",
44
+ semantic_config="sem.json",
45
+ )
46
+
47
+ # Inspect state, prepare, run, analyze.
48
+ state = session.state()
49
+ result = session.prepare()
50
+ ...
51
+
52
+ # Compare runs (no session/workbook required).
53
+ compare_result = compare_runs(
54
+ ["run_a.json", "run_b.json"],
55
+ semantic_config="sem.json",
56
+ )
57
+
58
+ Deeper engine internals are not part of the ordinary L1 surface.
59
+ Use ``open_diagnostics()`` on a session to enter the L2
60
+ diagnostics/recovery layer with structured reports, evidence, and
61
+ bounded recovery actions. The ``DiagnosticsHandle`` type returned
62
+ by ``open_diagnostics()`` lives in ``lightassay.types`` but
63
+ is not part of the ordinary top-level export set. L2 detail types
64
+ live in ``lightassay.diagnostics``.
65
+
66
+ For deep inspection and bounded low-level control, escalate from
67
+ L2 to L3 via ``diag.open_expert()``. L3 types live in
68
+ ``lightassay.expert``.
69
+ """
70
+
71
+ __version__ = "0.3.0"
72
+
73
+ # L1 public surface ──────────────────────────────────────────────────────────
74
+
75
+ from .errors import EvalError
76
+ from .surface import (
77
+ EvalSession,
78
+ compare_runs,
79
+ continue_workbook,
80
+ explore_workbook,
81
+ init_workbook,
82
+ list_backends,
83
+ open_session,
84
+ quick_try,
85
+ quick_try_workbook,
86
+ quickstart,
87
+ refine_workbook,
88
+ )
89
+ from .types import (
90
+ AnalyzeResult,
91
+ CompareResult,
92
+ ContinueResult,
93
+ EvalState,
94
+ EvalTarget,
95
+ ExploreResult,
96
+ PreparationStage,
97
+ PrepareResult,
98
+ QuickstartResult,
99
+ QuickTryResult,
100
+ RefineResult,
101
+ RunResult,
102
+ )
103
+
104
+ __all__ = [
105
+ # Version
106
+ "__version__",
107
+ # L1 control
108
+ "open_session",
109
+ "init_workbook",
110
+ "quick_try",
111
+ "quick_try_workbook",
112
+ "refine_workbook",
113
+ "explore_workbook",
114
+ "compare_runs",
115
+ "quickstart",
116
+ "continue_workbook",
117
+ "list_backends",
118
+ "EvalSession",
119
+ # L1 types
120
+ "EvalTarget",
121
+ "EvalState",
122
+ "ExploreResult",
123
+ "PreparationStage",
124
+ "PrepareResult",
125
+ "QuickstartResult",
126
+ "QuickTryResult",
127
+ "ContinueResult",
128
+ "RefineResult",
129
+ "RunResult",
130
+ "AnalyzeResult",
131
+ "CompareResult",
132
+ # L1 error boundary
133
+ "EvalError",
134
+ ]