design-research-experiments 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- design_research_experiments-0.1.0/LICENSE +21 -0
- design_research_experiments-0.1.0/PKG-INFO +126 -0
- design_research_experiments-0.1.0/README.md +89 -0
- design_research_experiments-0.1.0/pyproject.toml +97 -0
- design_research_experiments-0.1.0/setup.cfg +4 -0
- design_research_experiments-0.1.0/src/design_research_experiments/__init__.py +87 -0
- design_research_experiments-0.1.0/src/design_research_experiments/adapters/__init__.py +16 -0
- design_research_experiments-0.1.0/src/design_research_experiments/adapters/agents.py +248 -0
- design_research_experiments-0.1.0/src/design_research_experiments/adapters/analysis.py +81 -0
- design_research_experiments-0.1.0/src/design_research_experiments/adapters/problems.py +196 -0
- design_research_experiments-0.1.0/src/design_research_experiments/artifacts.py +432 -0
- design_research_experiments-0.1.0/src/design_research_experiments/bundles.py +66 -0
- design_research_experiments-0.1.0/src/design_research_experiments/cli.py +280 -0
- design_research_experiments-0.1.0/src/design_research_experiments/conditions.py +459 -0
- design_research_experiments-0.1.0/src/design_research_experiments/designs.py +789 -0
- design_research_experiments-0.1.0/src/design_research_experiments/hypotheses.py +276 -0
- design_research_experiments-0.1.0/src/design_research_experiments/io/__init__.py +16 -0
- design_research_experiments-0.1.0/src/design_research_experiments/io/csv_io.py +47 -0
- design_research_experiments-0.1.0/src/design_research_experiments/io/json_io.py +22 -0
- design_research_experiments-0.1.0/src/design_research_experiments/io/sqlite_io.py +69 -0
- design_research_experiments-0.1.0/src/design_research_experiments/io/yaml_io.py +44 -0
- design_research_experiments-0.1.0/src/design_research_experiments/metrics.py +105 -0
- design_research_experiments-0.1.0/src/design_research_experiments/py.typed +1 -0
- design_research_experiments-0.1.0/src/design_research_experiments/recipes.py +525 -0
- design_research_experiments-0.1.0/src/design_research_experiments/reporting.py +109 -0
- design_research_experiments-0.1.0/src/design_research_experiments/runners.py +428 -0
- design_research_experiments-0.1.0/src/design_research_experiments/schemas.py +279 -0
- design_research_experiments-0.1.0/src/design_research_experiments/study.py +370 -0
- design_research_experiments-0.1.0/src/design_research_experiments.egg-info/PKG-INFO +126 -0
- design_research_experiments-0.1.0/src/design_research_experiments.egg-info/SOURCES.txt +43 -0
- design_research_experiments-0.1.0/src/design_research_experiments.egg-info/dependency_links.txt +1 -0
- design_research_experiments-0.1.0/src/design_research_experiments.egg-info/entry_points.txt +2 -0
- design_research_experiments-0.1.0/src/design_research_experiments.egg-info/requires.txt +14 -0
- design_research_experiments-0.1.0/src/design_research_experiments.egg-info/top_level.txt +1 -0
- design_research_experiments-0.1.0/tests/test_adapters_and_reporting.py +346 -0
- design_research_experiments-0.1.0/tests/test_artifacts.py +152 -0
- design_research_experiments-0.1.0/tests/test_cli_and_examples.py +128 -0
- design_research_experiments-0.1.0/tests/test_conditions.py +68 -0
- design_research_experiments-0.1.0/tests/test_conditions_and_designs_extended.py +237 -0
- design_research_experiments-0.1.0/tests/test_core.py +125 -0
- design_research_experiments-0.1.0/tests/test_designs.py +98 -0
- design_research_experiments-0.1.0/tests/test_doe_capabilities.py +145 -0
- design_research_experiments-0.1.0/tests/test_io_study_schemas_and_runners.py +299 -0
- design_research_experiments-0.1.0/tests/test_public_api.py +56 -0
- design_research_experiments-0.1.0/tests/test_recipes.py +144 -0
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 The Design Research Collective
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,126 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: design-research-experiments
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Study-definition and orchestration layer for the cmudrc design research ecosystem
|
|
5
|
+
Author: The Design Research Collective
|
|
6
|
+
Maintainer-email: "Christopher C. McComb" <ccm@cmu.edu>
|
|
7
|
+
License-Expression: MIT
|
|
8
|
+
Project-URL: Homepage, https://github.com/cmudrc/design-research-experiments
|
|
9
|
+
Project-URL: Repository, https://github.com/cmudrc/design-research-experiments
|
|
10
|
+
Project-URL: Issues, https://github.com/cmudrc/design-research-experiments/issues
|
|
11
|
+
Keywords: design,research,experiments,orchestration,doe
|
|
12
|
+
Classifier: Development Status :: 3 - Alpha
|
|
13
|
+
Classifier: Intended Audience :: Science/Research
|
|
14
|
+
Classifier: Programming Language :: Python :: 3 :: Only
|
|
15
|
+
Classifier: Programming Language :: Python :: 3
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
17
|
+
Classifier: Operating System :: OS Independent
|
|
18
|
+
Classifier: Topic :: Scientific/Engineering
|
|
19
|
+
Classifier: Typing :: Typed
|
|
20
|
+
Requires-Python: >=3.12
|
|
21
|
+
Description-Content-Type: text/markdown
|
|
22
|
+
License-File: LICENSE
|
|
23
|
+
Requires-Dist: PyYAML<7,>=6.0
|
|
24
|
+
Provides-Extra: dev
|
|
25
|
+
Requires-Dist: build<2,>=1.2; extra == "dev"
|
|
26
|
+
Requires-Dist: mypy<2,>=1.10; extra == "dev"
|
|
27
|
+
Requires-Dist: pre-commit<5,>=3.7; extra == "dev"
|
|
28
|
+
Requires-Dist: pytest<9,>=8.2; extra == "dev"
|
|
29
|
+
Requires-Dist: pytest-cov<8,>=7.0; extra == "dev"
|
|
30
|
+
Requires-Dist: ruff<1,>=0.6.0; extra == "dev"
|
|
31
|
+
Requires-Dist: sphinx<9,>=7.4; extra == "dev"
|
|
32
|
+
Requires-Dist: sphinx-rtd-theme<4,>=2.0; extra == "dev"
|
|
33
|
+
Requires-Dist: twine<7,>=5.1; extra == "dev"
|
|
34
|
+
Requires-Dist: types-PyYAML<7,>=6.0; extra == "dev"
|
|
35
|
+
Requires-Dist: uv<1,>=0.6; extra == "dev"
|
|
36
|
+
Dynamic: license-file
|
|
37
|
+
|
|
38
|
+
# design-research-experiments
|
|
39
|
+
[](https://github.com/cmudrc/design-research-experiments/actions/workflows/ci.yml)
|
|
40
|
+
[](https://github.com/cmudrc/design-research-experiments/actions/workflows/docs-pages.yml)
|
|
41
|
+
|
|
42
|
+
`design-research-experiments` is the hypothesis-first study-definition and
|
|
43
|
+
experiment-orchestration layer in the cmudrc design research ecosystem.
|
|
44
|
+
|
|
45
|
+
It composes sibling libraries rather than reimplementing them:
|
|
46
|
+
|
|
47
|
+
- `design-research-agents` for executable agent behavior, workflows, and traces
|
|
48
|
+
- `design-research-problems` for problem catalogs, registries, and evaluators
|
|
49
|
+
- `design-research-analysis` for downstream unified-table analysis and reporting
|
|
50
|
+
|
|
51
|
+
## Overview
|
|
52
|
+
|
|
53
|
+
This package centers on reproducible experiment structure and execution:
|
|
54
|
+
|
|
55
|
+
- typed schemas for studies, factors, blocks, hypotheses, outcomes, and analysis plans
|
|
56
|
+
- design-of-experiments materialization (full/constrained factorial, randomized block,
|
|
57
|
+
repeated measures, latin square, custom matrices)
|
|
58
|
+
- run orchestration with deterministic seeding, checkpointing, and resume support
|
|
59
|
+
- canonical artifact exports (`study.yaml`, `manifest.json`, `conditions.csv`,
|
|
60
|
+
`runs.csv`, `events.csv`, `evaluations.csv`, and machine-readable hypothesis/plan files)
|
|
61
|
+
- thin adapters that connect to the public APIs of sibling agent/problem/analysis libraries
|
|
62
|
+
|
|
63
|
+
## Quickstart
|
|
64
|
+
|
|
65
|
+
Requires Python 3.12+.
|
|
66
|
+
Reproducible release installs are pinned to Python `3.12.12` (`.python-version`).
|
|
67
|
+
|
|
68
|
+
```bash
|
|
69
|
+
python -m venv .venv
|
|
70
|
+
source .venv/bin/activate
|
|
71
|
+
make dev
|
|
72
|
+
make test
|
|
73
|
+
```
|
|
74
|
+
|
|
75
|
+
Run a basic example:
|
|
76
|
+
|
|
77
|
+
```bash
|
|
78
|
+
make run-example
|
|
79
|
+
```
|
|
80
|
+
|
|
81
|
+
## CLI
|
|
82
|
+
|
|
83
|
+
The package installs a `drexp` CLI:
|
|
84
|
+
|
|
85
|
+
```bash
|
|
86
|
+
drexp validate-study path/to/study.yaml
|
|
87
|
+
drexp materialize-design path/to/study.yaml
|
|
88
|
+
drexp generate-doe --kind lhs --factors-json '{"x": [0, 1], "y": [10, 20]}' --n-samples 12 --out artifacts/doe.csv
|
|
89
|
+
drexp run-study path/to/study.yaml
|
|
90
|
+
drexp resume-study path/to/study.yaml
|
|
91
|
+
drexp export-analysis path/to/study.yaml
|
|
92
|
+
drexp bundle-results path/to/output_dir
|
|
93
|
+
```
|
|
94
|
+
|
|
95
|
+
## Examples
|
|
96
|
+
|
|
97
|
+
See [examples/README.md](examples/README.md) for runnable scripts, including
|
|
98
|
+
end-to-end recipe executions.
|
|
99
|
+
|
|
100
|
+
## Docs
|
|
101
|
+
|
|
102
|
+
See the [published documentation](https://cmudrc.github.io/design-research-experiments/)
|
|
103
|
+
for guides and API reference.
|
|
104
|
+
|
|
105
|
+
Build docs locally with:
|
|
106
|
+
|
|
107
|
+
```bash
|
|
108
|
+
make docs
|
|
109
|
+
```
|
|
110
|
+
|
|
111
|
+
## Public API
|
|
112
|
+
|
|
113
|
+
Top-level exports are intentionally small:
|
|
114
|
+
|
|
115
|
+
- `Study`, `Factor`, `Level`, `Constraint`, `Condition`, `Block`
|
|
116
|
+
- `RecipeStudyConfig`, recipe-specific typed config classes
|
|
117
|
+
- `Hypothesis`, `OutcomeSpec`, `AnalysisPlan`
|
|
118
|
+
- `RunSpec`, `RunResult`, `BenchmarkBundle`
|
|
119
|
+
- `build_design`, `generate_doe`, `materialize_conditions`
|
|
120
|
+
- `build_prompt_framing_study`, `build_optimization_benchmark_study`, and other recipe builders
|
|
121
|
+
- `run_study`, `resume_study`
|
|
122
|
+
- `export_analysis_tables`, `validate_study`
|
|
123
|
+
|
|
124
|
+
## Contributing
|
|
125
|
+
|
|
126
|
+
Contribution workflow and quality gates are documented in `CONTRIBUTING.md`.
|
|
@@ -0,0 +1,89 @@
|
|
|
1
|
+
# design-research-experiments
|
|
2
|
+
[](https://github.com/cmudrc/design-research-experiments/actions/workflows/ci.yml)
|
|
3
|
+
[](https://github.com/cmudrc/design-research-experiments/actions/workflows/docs-pages.yml)
|
|
4
|
+
|
|
5
|
+
`design-research-experiments` is the hypothesis-first study-definition and
|
|
6
|
+
experiment-orchestration layer in the cmudrc design research ecosystem.
|
|
7
|
+
|
|
8
|
+
It composes sibling libraries rather than reimplementing them:
|
|
9
|
+
|
|
10
|
+
- `design-research-agents` for executable agent behavior, workflows, and traces
|
|
11
|
+
- `design-research-problems` for problem catalogs, registries, and evaluators
|
|
12
|
+
- `design-research-analysis` for downstream unified-table analysis and reporting
|
|
13
|
+
|
|
14
|
+
## Overview
|
|
15
|
+
|
|
16
|
+
This package centers on reproducible experiment structure and execution:
|
|
17
|
+
|
|
18
|
+
- typed schemas for studies, factors, blocks, hypotheses, outcomes, and analysis plans
|
|
19
|
+
- design-of-experiments materialization (full/constrained factorial, randomized block,
|
|
20
|
+
repeated measures, latin square, custom matrices)
|
|
21
|
+
- run orchestration with deterministic seeding, checkpointing, and resume support
|
|
22
|
+
- canonical artifact exports (`study.yaml`, `manifest.json`, `conditions.csv`,
|
|
23
|
+
`runs.csv`, `events.csv`, `evaluations.csv`, and machine-readable hypothesis/plan files)
|
|
24
|
+
- thin adapters that connect to the public APIs of sibling agent/problem/analysis libraries
|
|
25
|
+
|
|
26
|
+
## Quickstart
|
|
27
|
+
|
|
28
|
+
Requires Python 3.12+.
|
|
29
|
+
Reproducible release installs are pinned to Python `3.12.12` (`.python-version`).
|
|
30
|
+
|
|
31
|
+
```bash
|
|
32
|
+
python -m venv .venv
|
|
33
|
+
source .venv/bin/activate
|
|
34
|
+
make dev
|
|
35
|
+
make test
|
|
36
|
+
```
|
|
37
|
+
|
|
38
|
+
Run a basic example:
|
|
39
|
+
|
|
40
|
+
```bash
|
|
41
|
+
make run-example
|
|
42
|
+
```
|
|
43
|
+
|
|
44
|
+
## CLI
|
|
45
|
+
|
|
46
|
+
The package installs a `drexp` CLI:
|
|
47
|
+
|
|
48
|
+
```bash
|
|
49
|
+
drexp validate-study path/to/study.yaml
|
|
50
|
+
drexp materialize-design path/to/study.yaml
|
|
51
|
+
drexp generate-doe --kind lhs --factors-json '{"x": [0, 1], "y": [10, 20]}' --n-samples 12 --out artifacts/doe.csv
|
|
52
|
+
drexp run-study path/to/study.yaml
|
|
53
|
+
drexp resume-study path/to/study.yaml
|
|
54
|
+
drexp export-analysis path/to/study.yaml
|
|
55
|
+
drexp bundle-results path/to/output_dir
|
|
56
|
+
```
|
|
57
|
+
|
|
58
|
+
## Examples
|
|
59
|
+
|
|
60
|
+
See [examples/README.md](examples/README.md) for runnable scripts, including
|
|
61
|
+
end-to-end recipe executions.
|
|
62
|
+
|
|
63
|
+
## Docs
|
|
64
|
+
|
|
65
|
+
See the [published documentation](https://cmudrc.github.io/design-research-experiments/)
|
|
66
|
+
for guides and API reference.
|
|
67
|
+
|
|
68
|
+
Build docs locally with:
|
|
69
|
+
|
|
70
|
+
```bash
|
|
71
|
+
make docs
|
|
72
|
+
```
|
|
73
|
+
|
|
74
|
+
## Public API
|
|
75
|
+
|
|
76
|
+
Top-level exports are intentionally small:
|
|
77
|
+
|
|
78
|
+
- `Study`, `Factor`, `Level`, `Constraint`, `Condition`, `Block`
|
|
79
|
+
- `RecipeStudyConfig`, recipe-specific typed config classes
|
|
80
|
+
- `Hypothesis`, `OutcomeSpec`, `AnalysisPlan`
|
|
81
|
+
- `RunSpec`, `RunResult`, `BenchmarkBundle`
|
|
82
|
+
- `build_design`, `generate_doe`, `materialize_conditions`
|
|
83
|
+
- `build_prompt_framing_study`, `build_optimization_benchmark_study`, and other recipe builders
|
|
84
|
+
- `run_study`, `resume_study`
|
|
85
|
+
- `export_analysis_tables`, `validate_study`
|
|
86
|
+
|
|
87
|
+
## Contributing
|
|
88
|
+
|
|
89
|
+
Contribution workflow and quality gates are documented in `CONTRIBUTING.md`.
|
|
@@ -0,0 +1,97 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=77", "wheel"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "design-research-experiments"
|
|
7
|
+
version = "0.1.0"
|
|
8
|
+
description = "Study-definition and orchestration layer for the cmudrc design research ecosystem"
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
requires-python = ">=3.12"
|
|
11
|
+
license = "MIT"
|
|
12
|
+
license-files = ["LICENSE"]
|
|
13
|
+
authors = [{ name = "The Design Research Collective" }]
|
|
14
|
+
maintainers = [{ name = "Christopher C. McComb", email = "ccm@cmu.edu" }]
|
|
15
|
+
keywords = ["design", "research", "experiments", "orchestration", "doe"]
|
|
16
|
+
classifiers = [
|
|
17
|
+
"Development Status :: 3 - Alpha",
|
|
18
|
+
"Intended Audience :: Science/Research",
|
|
19
|
+
"Programming Language :: Python :: 3 :: Only",
|
|
20
|
+
"Programming Language :: Python :: 3",
|
|
21
|
+
"Programming Language :: Python :: 3.12",
|
|
22
|
+
"Operating System :: OS Independent",
|
|
23
|
+
"Topic :: Scientific/Engineering",
|
|
24
|
+
"Typing :: Typed",
|
|
25
|
+
]
|
|
26
|
+
dependencies = [
|
|
27
|
+
"PyYAML>=6.0,<7",
|
|
28
|
+
]
|
|
29
|
+
|
|
30
|
+
[project.optional-dependencies]
|
|
31
|
+
dev = [
|
|
32
|
+
"build>=1.2,<2",
|
|
33
|
+
"mypy>=1.10,<2",
|
|
34
|
+
"pre-commit>=3.7,<5",
|
|
35
|
+
"pytest>=8.2,<9",
|
|
36
|
+
"pytest-cov>=7.0,<8",
|
|
37
|
+
"ruff>=0.6.0,<1",
|
|
38
|
+
"sphinx>=7.4,<9",
|
|
39
|
+
"sphinx-rtd-theme>=2.0,<4",
|
|
40
|
+
"twine>=5.1,<7",
|
|
41
|
+
"types-PyYAML>=6.0,<7",
|
|
42
|
+
"uv>=0.6,<1",
|
|
43
|
+
]
|
|
44
|
+
|
|
45
|
+
[project.urls]
|
|
46
|
+
Homepage = "https://github.com/cmudrc/design-research-experiments"
|
|
47
|
+
Repository = "https://github.com/cmudrc/design-research-experiments"
|
|
48
|
+
Issues = "https://github.com/cmudrc/design-research-experiments/issues"
|
|
49
|
+
|
|
50
|
+
[project.scripts]
|
|
51
|
+
drexp = "design_research_experiments.cli:main"
|
|
52
|
+
|
|
53
|
+
[tool.setuptools]
|
|
54
|
+
package-dir = { "" = "src" }
|
|
55
|
+
include-package-data = true
|
|
56
|
+
|
|
57
|
+
[tool.setuptools.packages.find]
|
|
58
|
+
where = ["src"]
|
|
59
|
+
|
|
60
|
+
[tool.setuptools.package-data]
|
|
61
|
+
design_research_experiments = ["py.typed"]
|
|
62
|
+
|
|
63
|
+
[tool.pytest.ini_options]
|
|
64
|
+
addopts = "-q"
|
|
65
|
+
testpaths = ["tests"]
|
|
66
|
+
|
|
67
|
+
[tool.ruff]
|
|
68
|
+
line-length = 100
|
|
69
|
+
target-version = "py312"
|
|
70
|
+
src = ["src", "tests", "examples", "scripts"]
|
|
71
|
+
|
|
72
|
+
[tool.ruff.format]
|
|
73
|
+
quote-style = "double"
|
|
74
|
+
indent-style = "space"
|
|
75
|
+
skip-magic-trailing-comma = false
|
|
76
|
+
line-ending = "auto"
|
|
77
|
+
|
|
78
|
+
[tool.ruff.lint]
|
|
79
|
+
select = ["E", "F", "I", "UP", "B", "SIM", "RUF", "D"]
|
|
80
|
+
|
|
81
|
+
[tool.ruff.lint.pydocstyle]
|
|
82
|
+
convention = "google"
|
|
83
|
+
|
|
84
|
+
[tool.ruff.lint.per-file-ignores]
|
|
85
|
+
"tests/*.py" = ["D"]
|
|
86
|
+
|
|
87
|
+
[tool.mypy]
|
|
88
|
+
packages = ["design_research_experiments"]
|
|
89
|
+
python_version = "3.12"
|
|
90
|
+
warn_unused_configs = true
|
|
91
|
+
check_untyped_defs = true
|
|
92
|
+
disallow_untyped_defs = true
|
|
93
|
+
no_implicit_optional = true
|
|
94
|
+
warn_redundant_casts = true
|
|
95
|
+
warn_unused_ignores = true
|
|
96
|
+
warn_return_any = true
|
|
97
|
+
strict_equality = true
|
|
@@ -0,0 +1,87 @@
|
|
|
1
|
+
"""Public API exports for design-research-experiments."""
|
|
2
|
+
|
|
3
|
+
from .adapters.analysis import export_analysis_tables
|
|
4
|
+
from .adapters.problems import ProblemPacket
|
|
5
|
+
from .bundles import (
|
|
6
|
+
BenchmarkBundle,
|
|
7
|
+
grammar_problem_bundle,
|
|
8
|
+
human_vs_agent_bundle,
|
|
9
|
+
ideation_bundle,
|
|
10
|
+
optimization_bundle,
|
|
11
|
+
)
|
|
12
|
+
from .conditions import Condition, Constraint, Factor, FactorKind, Level, materialize_conditions
|
|
13
|
+
from .designs import build_design, generate_doe
|
|
14
|
+
from .hypotheses import AnalysisPlan, Hypothesis, OutcomeSpec
|
|
15
|
+
from .recipes import (
|
|
16
|
+
AgentArchitectureComparisonConfig,
|
|
17
|
+
DiversityAndExplorationConfig,
|
|
18
|
+
GrammarScaffoldConfig,
|
|
19
|
+
HumanVsAgentProcessConfig,
|
|
20
|
+
OptimizationBenchmarkConfig,
|
|
21
|
+
PromptFramingConfig,
|
|
22
|
+
RecipeStudyConfig,
|
|
23
|
+
build_agent_architecture_comparison_study,
|
|
24
|
+
build_diversity_and_exploration_study,
|
|
25
|
+
build_grammar_scaffold_study,
|
|
26
|
+
build_human_vs_agent_process_study,
|
|
27
|
+
build_optimization_benchmark_study,
|
|
28
|
+
build_prompt_framing_study,
|
|
29
|
+
)
|
|
30
|
+
from .reporting import (
|
|
31
|
+
render_codebook,
|
|
32
|
+
render_markdown_summary,
|
|
33
|
+
render_methods_scaffold,
|
|
34
|
+
render_significance_brief,
|
|
35
|
+
write_markdown_report,
|
|
36
|
+
)
|
|
37
|
+
from .runners import resume_study, run_study
|
|
38
|
+
from .schemas import RunBudget, SeedPolicy
|
|
39
|
+
from .study import Block, RunResult, RunSpec, Study, validate_study
|
|
40
|
+
|
|
41
|
+
__all__ = [
|
|
42
|
+
"AgentArchitectureComparisonConfig",
|
|
43
|
+
"AnalysisPlan",
|
|
44
|
+
"BenchmarkBundle",
|
|
45
|
+
"Block",
|
|
46
|
+
"Condition",
|
|
47
|
+
"Constraint",
|
|
48
|
+
"DiversityAndExplorationConfig",
|
|
49
|
+
"Factor",
|
|
50
|
+
"FactorKind",
|
|
51
|
+
"GrammarScaffoldConfig",
|
|
52
|
+
"HumanVsAgentProcessConfig",
|
|
53
|
+
"Hypothesis",
|
|
54
|
+
"Level",
|
|
55
|
+
"OptimizationBenchmarkConfig",
|
|
56
|
+
"OutcomeSpec",
|
|
57
|
+
"ProblemPacket",
|
|
58
|
+
"PromptFramingConfig",
|
|
59
|
+
"RecipeStudyConfig",
|
|
60
|
+
"RunBudget",
|
|
61
|
+
"RunResult",
|
|
62
|
+
"RunSpec",
|
|
63
|
+
"SeedPolicy",
|
|
64
|
+
"Study",
|
|
65
|
+
"build_agent_architecture_comparison_study",
|
|
66
|
+
"build_design",
|
|
67
|
+
"build_diversity_and_exploration_study",
|
|
68
|
+
"build_grammar_scaffold_study",
|
|
69
|
+
"build_human_vs_agent_process_study",
|
|
70
|
+
"build_optimization_benchmark_study",
|
|
71
|
+
"build_prompt_framing_study",
|
|
72
|
+
"export_analysis_tables",
|
|
73
|
+
"generate_doe",
|
|
74
|
+
"grammar_problem_bundle",
|
|
75
|
+
"human_vs_agent_bundle",
|
|
76
|
+
"ideation_bundle",
|
|
77
|
+
"materialize_conditions",
|
|
78
|
+
"optimization_bundle",
|
|
79
|
+
"render_codebook",
|
|
80
|
+
"render_markdown_summary",
|
|
81
|
+
"render_methods_scaffold",
|
|
82
|
+
"render_significance_brief",
|
|
83
|
+
"resume_study",
|
|
84
|
+
"run_study",
|
|
85
|
+
"validate_study",
|
|
86
|
+
"write_markdown_report",
|
|
87
|
+
]
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
"""Integration adapters for agents, problems, and downstream analysis."""
|
|
2
|
+
|
|
3
|
+
from .agents import AgentExecution, execute_agent, resolve_agent
|
|
4
|
+
from .analysis import export_analysis_tables
|
|
5
|
+
from .problems import ProblemPacket, evaluate_problem, resolve_problem, sample_problem_packets
|
|
6
|
+
|
|
7
|
+
__all__ = [
|
|
8
|
+
"AgentExecution",
|
|
9
|
+
"ProblemPacket",
|
|
10
|
+
"evaluate_problem",
|
|
11
|
+
"execute_agent",
|
|
12
|
+
"export_analysis_tables",
|
|
13
|
+
"resolve_agent",
|
|
14
|
+
"resolve_problem",
|
|
15
|
+
"sample_problem_packets",
|
|
16
|
+
]
|
|
@@ -0,0 +1,248 @@
|
|
|
1
|
+
"""Agent-layer adapter utilities built on public agent APIs."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import importlib
|
|
6
|
+
import inspect
|
|
7
|
+
from collections.abc import Callable, Mapping, Sequence
|
|
8
|
+
from dataclasses import dataclass, field
|
|
9
|
+
from typing import Any
|
|
10
|
+
|
|
11
|
+
from ..conditions import Condition
|
|
12
|
+
from ..schemas import Observation, ObservationLevel, ValidationError, hash_identifier, utc_now_iso
|
|
13
|
+
from ..study import RunSpec
|
|
14
|
+
from .problems import ProblemPacket
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
@dataclass(slots=True)
|
|
18
|
+
class AgentExecution:
|
|
19
|
+
"""Normalized agent execution bundle."""
|
|
20
|
+
|
|
21
|
+
output: dict[str, Any] = field(default_factory=dict)
|
|
22
|
+
metrics: dict[str, Any] = field(default_factory=dict)
|
|
23
|
+
events: list[Observation] = field(default_factory=list)
|
|
24
|
+
trace_refs: list[str] = field(default_factory=list)
|
|
25
|
+
metadata: dict[str, Any] = field(default_factory=dict)
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def resolve_agent(
|
|
29
|
+
agent_spec_ref: Any,
|
|
30
|
+
*,
|
|
31
|
+
condition: Condition,
|
|
32
|
+
factories: Mapping[str, Callable[[Condition], Any]] | None = None,
|
|
33
|
+
) -> Any:
|
|
34
|
+
"""Resolve an agent reference into an executable object."""
|
|
35
|
+
if isinstance(agent_spec_ref, str):
|
|
36
|
+
if factories and agent_spec_ref in factories:
|
|
37
|
+
return factories[agent_spec_ref](condition)
|
|
38
|
+
|
|
39
|
+
maybe_agent = _resolve_from_design_research_agents(agent_spec_ref)
|
|
40
|
+
if maybe_agent is not None:
|
|
41
|
+
return maybe_agent
|
|
42
|
+
|
|
43
|
+
raise ValidationError(
|
|
44
|
+
"Unknown agent spec "
|
|
45
|
+
f"'{agent_spec_ref}'. Register a factory or pass an executable object."
|
|
46
|
+
)
|
|
47
|
+
|
|
48
|
+
return agent_spec_ref
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def execute_agent(
|
|
52
|
+
*,
|
|
53
|
+
agent_spec_ref: Any,
|
|
54
|
+
run_spec: RunSpec,
|
|
55
|
+
condition: Condition,
|
|
56
|
+
problem_packet: ProblemPacket,
|
|
57
|
+
factories: Mapping[str, Callable[[Condition], Any]] | None = None,
|
|
58
|
+
) -> AgentExecution:
|
|
59
|
+
"""Execute one agent run and normalize outputs, events, and trace refs."""
|
|
60
|
+
executable = resolve_agent(agent_spec_ref, condition=condition, factories=factories)
|
|
61
|
+
raw = _invoke_agent(
|
|
62
|
+
executable=executable,
|
|
63
|
+
run_spec=run_spec,
|
|
64
|
+
condition=condition,
|
|
65
|
+
problem_packet=problem_packet,
|
|
66
|
+
)
|
|
67
|
+
return _normalize_agent_execution(raw=raw, run_spec=run_spec, condition=condition)
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
def _resolve_from_design_research_agents(agent_id: str) -> Any | None:
|
|
71
|
+
"""Attempt loading a public constructor from design-research-agents."""
|
|
72
|
+
try:
|
|
73
|
+
module = importlib.import_module("design_research_agents")
|
|
74
|
+
except ImportError:
|
|
75
|
+
return None
|
|
76
|
+
|
|
77
|
+
if hasattr(module, agent_id):
|
|
78
|
+
constructor = getattr(module, agent_id)
|
|
79
|
+
if callable(constructor):
|
|
80
|
+
try:
|
|
81
|
+
return constructor()
|
|
82
|
+
except Exception:
|
|
83
|
+
return constructor
|
|
84
|
+
return None
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
def _invoke_agent(
|
|
88
|
+
*,
|
|
89
|
+
executable: Any,
|
|
90
|
+
run_spec: RunSpec,
|
|
91
|
+
condition: Condition,
|
|
92
|
+
problem_packet: ProblemPacket,
|
|
93
|
+
) -> Any:
|
|
94
|
+
"""Invoke an agent object or callable with a best-effort argument mapping."""
|
|
95
|
+
if hasattr(executable, "run") and callable(executable.run):
|
|
96
|
+
return _invoke_callable(
|
|
97
|
+
callable_obj=executable.run,
|
|
98
|
+
run_spec=run_spec,
|
|
99
|
+
condition=condition,
|
|
100
|
+
problem_packet=problem_packet,
|
|
101
|
+
)
|
|
102
|
+
|
|
103
|
+
if callable(executable):
|
|
104
|
+
return _invoke_callable(
|
|
105
|
+
callable_obj=executable,
|
|
106
|
+
run_spec=run_spec,
|
|
107
|
+
condition=condition,
|
|
108
|
+
problem_packet=problem_packet,
|
|
109
|
+
)
|
|
110
|
+
|
|
111
|
+
raise ValidationError("Resolved agent object is not executable.")
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
def _invoke_callable(
|
|
115
|
+
*,
|
|
116
|
+
callable_obj: Callable[..., Any],
|
|
117
|
+
run_spec: RunSpec,
|
|
118
|
+
condition: Condition,
|
|
119
|
+
problem_packet: ProblemPacket,
|
|
120
|
+
) -> Any:
|
|
121
|
+
"""Invoke a callable by matching supported keyword parameters."""
|
|
122
|
+
parameters = inspect.signature(callable_obj).parameters
|
|
123
|
+
kwargs: dict[str, Any] = {}
|
|
124
|
+
|
|
125
|
+
if "problem_packet" in parameters:
|
|
126
|
+
kwargs["problem_packet"] = problem_packet
|
|
127
|
+
if "problem" in parameters:
|
|
128
|
+
kwargs["problem"] = problem_packet
|
|
129
|
+
if "brief" in parameters:
|
|
130
|
+
kwargs["brief"] = problem_packet.brief
|
|
131
|
+
if "run_spec" in parameters:
|
|
132
|
+
kwargs["run_spec"] = run_spec
|
|
133
|
+
if "condition" in parameters:
|
|
134
|
+
kwargs["condition"] = condition
|
|
135
|
+
if "seed" in parameters:
|
|
136
|
+
kwargs["seed"] = run_spec.seed
|
|
137
|
+
|
|
138
|
+
if kwargs:
|
|
139
|
+
return callable_obj(**kwargs)
|
|
140
|
+
|
|
141
|
+
try:
|
|
142
|
+
return callable_obj(problem_packet, run_spec.seed)
|
|
143
|
+
except TypeError:
|
|
144
|
+
return callable_obj(problem_packet)
|
|
145
|
+
|
|
146
|
+
|
|
147
|
+
def _normalize_agent_execution(
|
|
148
|
+
*,
|
|
149
|
+
raw: Any,
|
|
150
|
+
run_spec: RunSpec,
|
|
151
|
+
condition: Condition,
|
|
152
|
+
) -> AgentExecution:
|
|
153
|
+
"""Normalize raw execution output to canonical adapter shape."""
|
|
154
|
+
if isinstance(raw, Mapping):
|
|
155
|
+
output = dict(raw.get("output", raw.get("outputs", {})))
|
|
156
|
+
if not output and "text" in raw:
|
|
157
|
+
output = {"text": raw["text"]}
|
|
158
|
+
|
|
159
|
+
metrics = dict(raw.get("metrics", {}))
|
|
160
|
+
trace_refs = [str(value) for value in raw.get("trace_refs", [])]
|
|
161
|
+
metadata = dict(raw.get("metadata", {}))
|
|
162
|
+
events = _normalize_events(
|
|
163
|
+
raw_events=raw.get("events", []),
|
|
164
|
+
run_spec=run_spec,
|
|
165
|
+
condition=condition,
|
|
166
|
+
output=output,
|
|
167
|
+
)
|
|
168
|
+
return AgentExecution(
|
|
169
|
+
output=output,
|
|
170
|
+
metrics=metrics,
|
|
171
|
+
events=events,
|
|
172
|
+
trace_refs=trace_refs,
|
|
173
|
+
metadata=metadata,
|
|
174
|
+
)
|
|
175
|
+
|
|
176
|
+
output = {"text": str(raw)}
|
|
177
|
+
events = _normalize_events(
|
|
178
|
+
raw_events=[],
|
|
179
|
+
run_spec=run_spec,
|
|
180
|
+
condition=condition,
|
|
181
|
+
output=output,
|
|
182
|
+
)
|
|
183
|
+
return AgentExecution(output=output, metrics={}, events=events, trace_refs=[], metadata={})
|
|
184
|
+
|
|
185
|
+
|
|
186
|
+
def _normalize_events(
|
|
187
|
+
*,
|
|
188
|
+
raw_events: Sequence[Any],
|
|
189
|
+
run_spec: RunSpec,
|
|
190
|
+
condition: Condition,
|
|
191
|
+
output: Mapping[str, Any],
|
|
192
|
+
) -> list[Observation]:
|
|
193
|
+
"""Normalize external event payloads into unified observation records."""
|
|
194
|
+
events: list[Observation] = []
|
|
195
|
+
|
|
196
|
+
for index, raw_event in enumerate(raw_events):
|
|
197
|
+
if not isinstance(raw_event, Mapping):
|
|
198
|
+
continue
|
|
199
|
+
events.append(
|
|
200
|
+
Observation(
|
|
201
|
+
timestamp=str(raw_event.get("timestamp", utc_now_iso())),
|
|
202
|
+
record_id=str(
|
|
203
|
+
raw_event.get(
|
|
204
|
+
"record_id",
|
|
205
|
+
hash_identifier(
|
|
206
|
+
"evt",
|
|
207
|
+
{
|
|
208
|
+
"run_id": run_spec.run_id,
|
|
209
|
+
"index": index,
|
|
210
|
+
"event_type": raw_event.get("event_type", "event"),
|
|
211
|
+
},
|
|
212
|
+
),
|
|
213
|
+
)
|
|
214
|
+
),
|
|
215
|
+
text=str(raw_event.get("text", "")),
|
|
216
|
+
session_id=str(raw_event.get("session_id", run_spec.run_id)),
|
|
217
|
+
actor_id=str(raw_event.get("actor_id", "agent")),
|
|
218
|
+
event_type=str(raw_event.get("event_type", "event")),
|
|
219
|
+
meta_json=dict(raw_event.get("meta_json", {})),
|
|
220
|
+
level=ObservationLevel(str(raw_event.get("level", ObservationLevel.STEP.value))),
|
|
221
|
+
study_id=run_spec.study_id,
|
|
222
|
+
run_id=run_spec.run_id,
|
|
223
|
+
condition_id=condition.condition_id,
|
|
224
|
+
trial_id=raw_event.get("trial_id"),
|
|
225
|
+
step_id=raw_event.get("step_id"),
|
|
226
|
+
tool_name=raw_event.get("tool_name"),
|
|
227
|
+
evaluation_id=raw_event.get("evaluation_id"),
|
|
228
|
+
)
|
|
229
|
+
)
|
|
230
|
+
|
|
231
|
+
if events:
|
|
232
|
+
return events
|
|
233
|
+
|
|
234
|
+
return [
|
|
235
|
+
Observation(
|
|
236
|
+
timestamp=utc_now_iso(),
|
|
237
|
+
record_id=hash_identifier("evt", {"run_id": run_spec.run_id, "index": 0}),
|
|
238
|
+
text=str(output.get("text", "")),
|
|
239
|
+
session_id=run_spec.run_id,
|
|
240
|
+
actor_id="agent",
|
|
241
|
+
event_type="assistant_output",
|
|
242
|
+
meta_json={"auto_generated": True},
|
|
243
|
+
level=ObservationLevel.STEP,
|
|
244
|
+
study_id=run_spec.study_id,
|
|
245
|
+
run_id=run_spec.run_id,
|
|
246
|
+
condition_id=condition.condition_id,
|
|
247
|
+
)
|
|
248
|
+
]
|