design-research-experiments 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. design_research_experiments-0.1.0/LICENSE +21 -0
  2. design_research_experiments-0.1.0/PKG-INFO +126 -0
  3. design_research_experiments-0.1.0/README.md +89 -0
  4. design_research_experiments-0.1.0/pyproject.toml +97 -0
  5. design_research_experiments-0.1.0/setup.cfg +4 -0
  6. design_research_experiments-0.1.0/src/design_research_experiments/__init__.py +87 -0
  7. design_research_experiments-0.1.0/src/design_research_experiments/adapters/__init__.py +16 -0
  8. design_research_experiments-0.1.0/src/design_research_experiments/adapters/agents.py +248 -0
  9. design_research_experiments-0.1.0/src/design_research_experiments/adapters/analysis.py +81 -0
  10. design_research_experiments-0.1.0/src/design_research_experiments/adapters/problems.py +196 -0
  11. design_research_experiments-0.1.0/src/design_research_experiments/artifacts.py +432 -0
  12. design_research_experiments-0.1.0/src/design_research_experiments/bundles.py +66 -0
  13. design_research_experiments-0.1.0/src/design_research_experiments/cli.py +280 -0
  14. design_research_experiments-0.1.0/src/design_research_experiments/conditions.py +459 -0
  15. design_research_experiments-0.1.0/src/design_research_experiments/designs.py +789 -0
  16. design_research_experiments-0.1.0/src/design_research_experiments/hypotheses.py +276 -0
  17. design_research_experiments-0.1.0/src/design_research_experiments/io/__init__.py +16 -0
  18. design_research_experiments-0.1.0/src/design_research_experiments/io/csv_io.py +47 -0
  19. design_research_experiments-0.1.0/src/design_research_experiments/io/json_io.py +22 -0
  20. design_research_experiments-0.1.0/src/design_research_experiments/io/sqlite_io.py +69 -0
  21. design_research_experiments-0.1.0/src/design_research_experiments/io/yaml_io.py +44 -0
  22. design_research_experiments-0.1.0/src/design_research_experiments/metrics.py +105 -0
  23. design_research_experiments-0.1.0/src/design_research_experiments/py.typed +1 -0
  24. design_research_experiments-0.1.0/src/design_research_experiments/recipes.py +525 -0
  25. design_research_experiments-0.1.0/src/design_research_experiments/reporting.py +109 -0
  26. design_research_experiments-0.1.0/src/design_research_experiments/runners.py +428 -0
  27. design_research_experiments-0.1.0/src/design_research_experiments/schemas.py +279 -0
  28. design_research_experiments-0.1.0/src/design_research_experiments/study.py +370 -0
  29. design_research_experiments-0.1.0/src/design_research_experiments.egg-info/PKG-INFO +126 -0
  30. design_research_experiments-0.1.0/src/design_research_experiments.egg-info/SOURCES.txt +43 -0
  31. design_research_experiments-0.1.0/src/design_research_experiments.egg-info/dependency_links.txt +1 -0
  32. design_research_experiments-0.1.0/src/design_research_experiments.egg-info/entry_points.txt +2 -0
  33. design_research_experiments-0.1.0/src/design_research_experiments.egg-info/requires.txt +14 -0
  34. design_research_experiments-0.1.0/src/design_research_experiments.egg-info/top_level.txt +1 -0
  35. design_research_experiments-0.1.0/tests/test_adapters_and_reporting.py +346 -0
  36. design_research_experiments-0.1.0/tests/test_artifacts.py +152 -0
  37. design_research_experiments-0.1.0/tests/test_cli_and_examples.py +128 -0
  38. design_research_experiments-0.1.0/tests/test_conditions.py +68 -0
  39. design_research_experiments-0.1.0/tests/test_conditions_and_designs_extended.py +237 -0
  40. design_research_experiments-0.1.0/tests/test_core.py +125 -0
  41. design_research_experiments-0.1.0/tests/test_designs.py +98 -0
  42. design_research_experiments-0.1.0/tests/test_doe_capabilities.py +145 -0
  43. design_research_experiments-0.1.0/tests/test_io_study_schemas_and_runners.py +299 -0
  44. design_research_experiments-0.1.0/tests/test_public_api.py +56 -0
  45. design_research_experiments-0.1.0/tests/test_recipes.py +144 -0
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 The Design Research Collective
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,126 @@
1
+ Metadata-Version: 2.4
2
+ Name: design-research-experiments
3
+ Version: 0.1.0
4
+ Summary: Study-definition and orchestration layer for the cmudrc design research ecosystem
5
+ Author: The Design Research Collective
6
+ Maintainer-email: "Christopher C. McComb" <ccm@cmu.edu>
7
+ License-Expression: MIT
8
+ Project-URL: Homepage, https://github.com/cmudrc/design-research-experiments
9
+ Project-URL: Repository, https://github.com/cmudrc/design-research-experiments
10
+ Project-URL: Issues, https://github.com/cmudrc/design-research-experiments/issues
11
+ Keywords: design,research,experiments,orchestration,doe
12
+ Classifier: Development Status :: 3 - Alpha
13
+ Classifier: Intended Audience :: Science/Research
14
+ Classifier: Programming Language :: Python :: 3 :: Only
15
+ Classifier: Programming Language :: Python :: 3
16
+ Classifier: Programming Language :: Python :: 3.12
17
+ Classifier: Operating System :: OS Independent
18
+ Classifier: Topic :: Scientific/Engineering
19
+ Classifier: Typing :: Typed
20
+ Requires-Python: >=3.12
21
+ Description-Content-Type: text/markdown
22
+ License-File: LICENSE
23
+ Requires-Dist: PyYAML<7,>=6.0
24
+ Provides-Extra: dev
25
+ Requires-Dist: build<2,>=1.2; extra == "dev"
26
+ Requires-Dist: mypy<2,>=1.10; extra == "dev"
27
+ Requires-Dist: pre-commit<5,>=3.7; extra == "dev"
28
+ Requires-Dist: pytest<9,>=8.2; extra == "dev"
29
+ Requires-Dist: pytest-cov<8,>=7.0; extra == "dev"
30
+ Requires-Dist: ruff<1,>=0.6.0; extra == "dev"
31
+ Requires-Dist: sphinx<9,>=7.4; extra == "dev"
32
+ Requires-Dist: sphinx-rtd-theme<4,>=2.0; extra == "dev"
33
+ Requires-Dist: twine<7,>=5.1; extra == "dev"
34
+ Requires-Dist: types-PyYAML<7,>=6.0; extra == "dev"
35
+ Requires-Dist: uv<1,>=0.6; extra == "dev"
36
+ Dynamic: license-file
37
+
38
+ # design-research-experiments
39
+ [![CI](https://github.com/cmudrc/design-research-experiments/actions/workflows/ci.yml/badge.svg)](https://github.com/cmudrc/design-research-experiments/actions/workflows/ci.yml)
40
+ [![Docs](https://github.com/cmudrc/design-research-experiments/actions/workflows/docs-pages.yml/badge.svg)](https://github.com/cmudrc/design-research-experiments/actions/workflows/docs-pages.yml)
41
+
42
+ `design-research-experiments` is the hypothesis-first study-definition and
43
+ experiment-orchestration layer in the cmudrc design research ecosystem.
44
+
45
+ It composes sibling libraries rather than reimplementing them:
46
+
47
+ - `design-research-agents` for executable agent behavior, workflows, and traces
48
+ - `design-research-problems` for problem catalogs, registries, and evaluators
49
+ - `design-research-analysis` for downstream unified-table analysis and reporting
50
+
51
+ ## Overview
52
+
53
+ This package centers on reproducible experiment structure and execution:
54
+
55
+ - typed schemas for studies, factors, blocks, hypotheses, outcomes, and analysis plans
56
+ - design-of-experiments materialization (full/constrained factorial, randomized block,
57
+ repeated measures, latin square, custom matrices)
58
+ - run orchestration with deterministic seeding, checkpointing, and resume support
59
+ - canonical artifact exports (`study.yaml`, `manifest.json`, `conditions.csv`,
60
+ `runs.csv`, `events.csv`, `evaluations.csv`, and machine-readable hypothesis/plan files)
61
+ - thin adapters that connect to the public APIs of sibling agent/problem/analysis libraries
62
+
63
+ ## Quickstart
64
+
65
+ Requires Python 3.12+.
66
+ Reproducible release installs are pinned to Python `3.12.12` (`.python-version`).
67
+
68
+ ```bash
69
+ python -m venv .venv
70
+ source .venv/bin/activate
71
+ make dev
72
+ make test
73
+ ```
74
+
75
+ Run a basic example:
76
+
77
+ ```bash
78
+ make run-example
79
+ ```
80
+
81
+ ## CLI
82
+
83
+ The package installs a `drexp` CLI:
84
+
85
+ ```bash
86
+ drexp validate-study path/to/study.yaml
87
+ drexp materialize-design path/to/study.yaml
88
+ drexp generate-doe --kind lhs --factors-json '{"x": [0, 1], "y": [10, 20]}' --n-samples 12 --out artifacts/doe.csv
89
+ drexp run-study path/to/study.yaml
90
+ drexp resume-study path/to/study.yaml
91
+ drexp export-analysis path/to/study.yaml
92
+ drexp bundle-results path/to/output_dir
93
+ ```
94
+
95
+ ## Examples
96
+
97
+ See [examples/README.md](examples/README.md) for runnable scripts, including
98
+ end-to-end recipe executions.
99
+
100
+ ## Docs
101
+
102
+ See the [published documentation](https://cmudrc.github.io/design-research-experiments/)
103
+ for guides and API reference.
104
+
105
+ Build docs locally with:
106
+
107
+ ```bash
108
+ make docs
109
+ ```
110
+
111
+ ## Public API
112
+
113
+ Top-level exports are intentionally small:
114
+
115
+ - `Study`, `Factor`, `Level`, `Constraint`, `Condition`, `Block`
116
+ - `RecipeStudyConfig`, recipe-specific typed config classes
117
+ - `Hypothesis`, `OutcomeSpec`, `AnalysisPlan`
118
+ - `RunSpec`, `RunResult`, `BenchmarkBundle`
119
+ - `build_design`, `generate_doe`, `materialize_conditions`
120
+ - `build_prompt_framing_study`, `build_optimization_benchmark_study`, and other recipe builders
121
+ - `run_study`, `resume_study`
122
+ - `export_analysis_tables`, `validate_study`
123
+
124
+ ## Contributing
125
+
126
+ Contribution workflow and quality gates are documented in `CONTRIBUTING.md`.
@@ -0,0 +1,89 @@
1
+ # design-research-experiments
2
+ [![CI](https://github.com/cmudrc/design-research-experiments/actions/workflows/ci.yml/badge.svg)](https://github.com/cmudrc/design-research-experiments/actions/workflows/ci.yml)
3
+ [![Docs](https://github.com/cmudrc/design-research-experiments/actions/workflows/docs-pages.yml/badge.svg)](https://github.com/cmudrc/design-research-experiments/actions/workflows/docs-pages.yml)
4
+
5
+ `design-research-experiments` is the hypothesis-first study-definition and
6
+ experiment-orchestration layer in the cmudrc design research ecosystem.
7
+
8
+ It composes sibling libraries rather than reimplementing them:
9
+
10
+ - `design-research-agents` for executable agent behavior, workflows, and traces
11
+ - `design-research-problems` for problem catalogs, registries, and evaluators
12
+ - `design-research-analysis` for downstream unified-table analysis and reporting
13
+
14
+ ## Overview
15
+
16
+ This package centers on reproducible experiment structure and execution:
17
+
18
+ - typed schemas for studies, factors, blocks, hypotheses, outcomes, and analysis plans
19
+ - design-of-experiments materialization (full/constrained factorial, randomized block,
20
+ repeated measures, latin square, custom matrices)
21
+ - run orchestration with deterministic seeding, checkpointing, and resume support
22
+ - canonical artifact exports (`study.yaml`, `manifest.json`, `conditions.csv`,
23
+ `runs.csv`, `events.csv`, `evaluations.csv`, and machine-readable hypothesis/plan files)
24
+ - thin adapters that connect to the public APIs of sibling agent/problem/analysis libraries
25
+
26
+ ## Quickstart
27
+
28
+ Requires Python 3.12+.
29
+ Reproducible release installs are pinned to Python `3.12.12` (`.python-version`).
30
+
31
+ ```bash
32
+ python -m venv .venv
33
+ source .venv/bin/activate
34
+ make dev
35
+ make test
36
+ ```
37
+
38
+ Run a basic example:
39
+
40
+ ```bash
41
+ make run-example
42
+ ```
43
+
44
+ ## CLI
45
+
46
+ The package installs a `drexp` CLI:
47
+
48
+ ```bash
49
+ drexp validate-study path/to/study.yaml
50
+ drexp materialize-design path/to/study.yaml
51
+ drexp generate-doe --kind lhs --factors-json '{"x": [0, 1], "y": [10, 20]}' --n-samples 12 --out artifacts/doe.csv
52
+ drexp run-study path/to/study.yaml
53
+ drexp resume-study path/to/study.yaml
54
+ drexp export-analysis path/to/study.yaml
55
+ drexp bundle-results path/to/output_dir
56
+ ```
57
+
58
+ ## Examples
59
+
60
+ See [examples/README.md](examples/README.md) for runnable scripts, including
61
+ end-to-end recipe executions.
62
+
63
+ ## Docs
64
+
65
+ See the [published documentation](https://cmudrc.github.io/design-research-experiments/)
66
+ for guides and API reference.
67
+
68
+ Build docs locally with:
69
+
70
+ ```bash
71
+ make docs
72
+ ```
73
+
74
+ ## Public API
75
+
76
+ Top-level exports are intentionally small:
77
+
78
+ - `Study`, `Factor`, `Level`, `Constraint`, `Condition`, `Block`
79
+ - `RecipeStudyConfig`, recipe-specific typed config classes
80
+ - `Hypothesis`, `OutcomeSpec`, `AnalysisPlan`
81
+ - `RunSpec`, `RunResult`, `BenchmarkBundle`
82
+ - `build_design`, `generate_doe`, `materialize_conditions`
83
+ - `build_prompt_framing_study`, `build_optimization_benchmark_study`, and other recipe builders
84
+ - `run_study`, `resume_study`
85
+ - `export_analysis_tables`, `validate_study`
86
+
87
+ ## Contributing
88
+
89
+ Contribution workflow and quality gates are documented in `CONTRIBUTING.md`.
@@ -0,0 +1,97 @@
1
+ [build-system]
2
+ requires = ["setuptools>=77", "wheel"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "design-research-experiments"
7
+ version = "0.1.0"
8
+ description = "Study-definition and orchestration layer for the cmudrc design research ecosystem"
9
+ readme = "README.md"
10
+ requires-python = ">=3.12"
11
+ license = "MIT"
12
+ license-files = ["LICENSE"]
13
+ authors = [{ name = "The Design Research Collective" }]
14
+ maintainers = [{ name = "Christopher C. McComb", email = "ccm@cmu.edu" }]
15
+ keywords = ["design", "research", "experiments", "orchestration", "doe"]
16
+ classifiers = [
17
+ "Development Status :: 3 - Alpha",
18
+ "Intended Audience :: Science/Research",
19
+ "Programming Language :: Python :: 3 :: Only",
20
+ "Programming Language :: Python :: 3",
21
+ "Programming Language :: Python :: 3.12",
22
+ "Operating System :: OS Independent",
23
+ "Topic :: Scientific/Engineering",
24
+ "Typing :: Typed",
25
+ ]
26
+ dependencies = [
27
+ "PyYAML>=6.0,<7",
28
+ ]
29
+
30
+ [project.optional-dependencies]
31
+ dev = [
32
+ "build>=1.2,<2",
33
+ "mypy>=1.10,<2",
34
+ "pre-commit>=3.7,<5",
35
+ "pytest>=8.2,<9",
36
+ "pytest-cov>=7.0,<8",
37
+ "ruff>=0.6.0,<1",
38
+ "sphinx>=7.4,<9",
39
+ "sphinx-rtd-theme>=2.0,<4",
40
+ "twine>=5.1,<7",
41
+ "types-PyYAML>=6.0,<7",
42
+ "uv>=0.6,<1",
43
+ ]
44
+
45
+ [project.urls]
46
+ Homepage = "https://github.com/cmudrc/design-research-experiments"
47
+ Repository = "https://github.com/cmudrc/design-research-experiments"
48
+ Issues = "https://github.com/cmudrc/design-research-experiments/issues"
49
+
50
+ [project.scripts]
51
+ drexp = "design_research_experiments.cli:main"
52
+
53
+ [tool.setuptools]
54
+ package-dir = { "" = "src" }
55
+ include-package-data = true
56
+
57
+ [tool.setuptools.packages.find]
58
+ where = ["src"]
59
+
60
+ [tool.setuptools.package-data]
61
+ design_research_experiments = ["py.typed"]
62
+
63
+ [tool.pytest.ini_options]
64
+ addopts = "-q"
65
+ testpaths = ["tests"]
66
+
67
+ [tool.ruff]
68
+ line-length = 100
69
+ target-version = "py312"
70
+ src = ["src", "tests", "examples", "scripts"]
71
+
72
+ [tool.ruff.format]
73
+ quote-style = "double"
74
+ indent-style = "space"
75
+ skip-magic-trailing-comma = false
76
+ line-ending = "auto"
77
+
78
+ [tool.ruff.lint]
79
+ select = ["E", "F", "I", "UP", "B", "SIM", "RUF", "D"]
80
+
81
+ [tool.ruff.lint.pydocstyle]
82
+ convention = "google"
83
+
84
+ [tool.ruff.lint.per-file-ignores]
85
+ "tests/*.py" = ["D"]
86
+
87
+ [tool.mypy]
88
+ packages = ["design_research_experiments"]
89
+ python_version = "3.12"
90
+ warn_unused_configs = true
91
+ check_untyped_defs = true
92
+ disallow_untyped_defs = true
93
+ no_implicit_optional = true
94
+ warn_redundant_casts = true
95
+ warn_unused_ignores = true
96
+ warn_return_any = true
97
+ strict_equality = true
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
@@ -0,0 +1,87 @@
1
+ """Public API exports for design-research-experiments."""
2
+
3
+ from .adapters.analysis import export_analysis_tables
4
+ from .adapters.problems import ProblemPacket
5
+ from .bundles import (
6
+ BenchmarkBundle,
7
+ grammar_problem_bundle,
8
+ human_vs_agent_bundle,
9
+ ideation_bundle,
10
+ optimization_bundle,
11
+ )
12
+ from .conditions import Condition, Constraint, Factor, FactorKind, Level, materialize_conditions
13
+ from .designs import build_design, generate_doe
14
+ from .hypotheses import AnalysisPlan, Hypothesis, OutcomeSpec
15
+ from .recipes import (
16
+ AgentArchitectureComparisonConfig,
17
+ DiversityAndExplorationConfig,
18
+ GrammarScaffoldConfig,
19
+ HumanVsAgentProcessConfig,
20
+ OptimizationBenchmarkConfig,
21
+ PromptFramingConfig,
22
+ RecipeStudyConfig,
23
+ build_agent_architecture_comparison_study,
24
+ build_diversity_and_exploration_study,
25
+ build_grammar_scaffold_study,
26
+ build_human_vs_agent_process_study,
27
+ build_optimization_benchmark_study,
28
+ build_prompt_framing_study,
29
+ )
30
+ from .reporting import (
31
+ render_codebook,
32
+ render_markdown_summary,
33
+ render_methods_scaffold,
34
+ render_significance_brief,
35
+ write_markdown_report,
36
+ )
37
+ from .runners import resume_study, run_study
38
+ from .schemas import RunBudget, SeedPolicy
39
+ from .study import Block, RunResult, RunSpec, Study, validate_study
40
+
41
+ __all__ = [
42
+ "AgentArchitectureComparisonConfig",
43
+ "AnalysisPlan",
44
+ "BenchmarkBundle",
45
+ "Block",
46
+ "Condition",
47
+ "Constraint",
48
+ "DiversityAndExplorationConfig",
49
+ "Factor",
50
+ "FactorKind",
51
+ "GrammarScaffoldConfig",
52
+ "HumanVsAgentProcessConfig",
53
+ "Hypothesis",
54
+ "Level",
55
+ "OptimizationBenchmarkConfig",
56
+ "OutcomeSpec",
57
+ "ProblemPacket",
58
+ "PromptFramingConfig",
59
+ "RecipeStudyConfig",
60
+ "RunBudget",
61
+ "RunResult",
62
+ "RunSpec",
63
+ "SeedPolicy",
64
+ "Study",
65
+ "build_agent_architecture_comparison_study",
66
+ "build_design",
67
+ "build_diversity_and_exploration_study",
68
+ "build_grammar_scaffold_study",
69
+ "build_human_vs_agent_process_study",
70
+ "build_optimization_benchmark_study",
71
+ "build_prompt_framing_study",
72
+ "export_analysis_tables",
73
+ "generate_doe",
74
+ "grammar_problem_bundle",
75
+ "human_vs_agent_bundle",
76
+ "ideation_bundle",
77
+ "materialize_conditions",
78
+ "optimization_bundle",
79
+ "render_codebook",
80
+ "render_markdown_summary",
81
+ "render_methods_scaffold",
82
+ "render_significance_brief",
83
+ "resume_study",
84
+ "run_study",
85
+ "validate_study",
86
+ "write_markdown_report",
87
+ ]
@@ -0,0 +1,16 @@
1
+ """Integration adapters for agents, problems, and downstream analysis."""
2
+
3
+ from .agents import AgentExecution, execute_agent, resolve_agent
4
+ from .analysis import export_analysis_tables
5
+ from .problems import ProblemPacket, evaluate_problem, resolve_problem, sample_problem_packets
6
+
7
+ __all__ = [
8
+ "AgentExecution",
9
+ "ProblemPacket",
10
+ "evaluate_problem",
11
+ "execute_agent",
12
+ "export_analysis_tables",
13
+ "resolve_agent",
14
+ "resolve_problem",
15
+ "sample_problem_packets",
16
+ ]
@@ -0,0 +1,248 @@
1
+ """Agent-layer adapter utilities built on public agent APIs."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import importlib
6
+ import inspect
7
+ from collections.abc import Callable, Mapping, Sequence
8
+ from dataclasses import dataclass, field
9
+ from typing import Any
10
+
11
+ from ..conditions import Condition
12
+ from ..schemas import Observation, ObservationLevel, ValidationError, hash_identifier, utc_now_iso
13
+ from ..study import RunSpec
14
+ from .problems import ProblemPacket
15
+
16
+
17
+ @dataclass(slots=True)
18
+ class AgentExecution:
19
+ """Normalized agent execution bundle."""
20
+
21
+ output: dict[str, Any] = field(default_factory=dict)
22
+ metrics: dict[str, Any] = field(default_factory=dict)
23
+ events: list[Observation] = field(default_factory=list)
24
+ trace_refs: list[str] = field(default_factory=list)
25
+ metadata: dict[str, Any] = field(default_factory=dict)
26
+
27
+
28
+ def resolve_agent(
29
+ agent_spec_ref: Any,
30
+ *,
31
+ condition: Condition,
32
+ factories: Mapping[str, Callable[[Condition], Any]] | None = None,
33
+ ) -> Any:
34
+ """Resolve an agent reference into an executable object."""
35
+ if isinstance(agent_spec_ref, str):
36
+ if factories and agent_spec_ref in factories:
37
+ return factories[agent_spec_ref](condition)
38
+
39
+ maybe_agent = _resolve_from_design_research_agents(agent_spec_ref)
40
+ if maybe_agent is not None:
41
+ return maybe_agent
42
+
43
+ raise ValidationError(
44
+ "Unknown agent spec "
45
+ f"'{agent_spec_ref}'. Register a factory or pass an executable object."
46
+ )
47
+
48
+ return agent_spec_ref
49
+
50
+
51
+ def execute_agent(
52
+ *,
53
+ agent_spec_ref: Any,
54
+ run_spec: RunSpec,
55
+ condition: Condition,
56
+ problem_packet: ProblemPacket,
57
+ factories: Mapping[str, Callable[[Condition], Any]] | None = None,
58
+ ) -> AgentExecution:
59
+ """Execute one agent run and normalize outputs, events, and trace refs."""
60
+ executable = resolve_agent(agent_spec_ref, condition=condition, factories=factories)
61
+ raw = _invoke_agent(
62
+ executable=executable,
63
+ run_spec=run_spec,
64
+ condition=condition,
65
+ problem_packet=problem_packet,
66
+ )
67
+ return _normalize_agent_execution(raw=raw, run_spec=run_spec, condition=condition)
68
+
69
+
70
+ def _resolve_from_design_research_agents(agent_id: str) -> Any | None:
71
+ """Attempt loading a public constructor from design-research-agents."""
72
+ try:
73
+ module = importlib.import_module("design_research_agents")
74
+ except ImportError:
75
+ return None
76
+
77
+ if hasattr(module, agent_id):
78
+ constructor = getattr(module, agent_id)
79
+ if callable(constructor):
80
+ try:
81
+ return constructor()
82
+ except Exception:
83
+ return constructor
84
+ return None
85
+
86
+
87
+ def _invoke_agent(
88
+ *,
89
+ executable: Any,
90
+ run_spec: RunSpec,
91
+ condition: Condition,
92
+ problem_packet: ProblemPacket,
93
+ ) -> Any:
94
+ """Invoke an agent object or callable with a best-effort argument mapping."""
95
+ if hasattr(executable, "run") and callable(executable.run):
96
+ return _invoke_callable(
97
+ callable_obj=executable.run,
98
+ run_spec=run_spec,
99
+ condition=condition,
100
+ problem_packet=problem_packet,
101
+ )
102
+
103
+ if callable(executable):
104
+ return _invoke_callable(
105
+ callable_obj=executable,
106
+ run_spec=run_spec,
107
+ condition=condition,
108
+ problem_packet=problem_packet,
109
+ )
110
+
111
+ raise ValidationError("Resolved agent object is not executable.")
112
+
113
+
114
+ def _invoke_callable(
115
+ *,
116
+ callable_obj: Callable[..., Any],
117
+ run_spec: RunSpec,
118
+ condition: Condition,
119
+ problem_packet: ProblemPacket,
120
+ ) -> Any:
121
+ """Invoke a callable by matching supported keyword parameters."""
122
+ parameters = inspect.signature(callable_obj).parameters
123
+ kwargs: dict[str, Any] = {}
124
+
125
+ if "problem_packet" in parameters:
126
+ kwargs["problem_packet"] = problem_packet
127
+ if "problem" in parameters:
128
+ kwargs["problem"] = problem_packet
129
+ if "brief" in parameters:
130
+ kwargs["brief"] = problem_packet.brief
131
+ if "run_spec" in parameters:
132
+ kwargs["run_spec"] = run_spec
133
+ if "condition" in parameters:
134
+ kwargs["condition"] = condition
135
+ if "seed" in parameters:
136
+ kwargs["seed"] = run_spec.seed
137
+
138
+ if kwargs:
139
+ return callable_obj(**kwargs)
140
+
141
+ try:
142
+ return callable_obj(problem_packet, run_spec.seed)
143
+ except TypeError:
144
+ return callable_obj(problem_packet)
145
+
146
+
147
+ def _normalize_agent_execution(
148
+ *,
149
+ raw: Any,
150
+ run_spec: RunSpec,
151
+ condition: Condition,
152
+ ) -> AgentExecution:
153
+ """Normalize raw execution output to canonical adapter shape."""
154
+ if isinstance(raw, Mapping):
155
+ output = dict(raw.get("output", raw.get("outputs", {})))
156
+ if not output and "text" in raw:
157
+ output = {"text": raw["text"]}
158
+
159
+ metrics = dict(raw.get("metrics", {}))
160
+ trace_refs = [str(value) for value in raw.get("trace_refs", [])]
161
+ metadata = dict(raw.get("metadata", {}))
162
+ events = _normalize_events(
163
+ raw_events=raw.get("events", []),
164
+ run_spec=run_spec,
165
+ condition=condition,
166
+ output=output,
167
+ )
168
+ return AgentExecution(
169
+ output=output,
170
+ metrics=metrics,
171
+ events=events,
172
+ trace_refs=trace_refs,
173
+ metadata=metadata,
174
+ )
175
+
176
+ output = {"text": str(raw)}
177
+ events = _normalize_events(
178
+ raw_events=[],
179
+ run_spec=run_spec,
180
+ condition=condition,
181
+ output=output,
182
+ )
183
+ return AgentExecution(output=output, metrics={}, events=events, trace_refs=[], metadata={})
184
+
185
+
186
+ def _normalize_events(
187
+ *,
188
+ raw_events: Sequence[Any],
189
+ run_spec: RunSpec,
190
+ condition: Condition,
191
+ output: Mapping[str, Any],
192
+ ) -> list[Observation]:
193
+ """Normalize external event payloads into unified observation records."""
194
+ events: list[Observation] = []
195
+
196
+ for index, raw_event in enumerate(raw_events):
197
+ if not isinstance(raw_event, Mapping):
198
+ continue
199
+ events.append(
200
+ Observation(
201
+ timestamp=str(raw_event.get("timestamp", utc_now_iso())),
202
+ record_id=str(
203
+ raw_event.get(
204
+ "record_id",
205
+ hash_identifier(
206
+ "evt",
207
+ {
208
+ "run_id": run_spec.run_id,
209
+ "index": index,
210
+ "event_type": raw_event.get("event_type", "event"),
211
+ },
212
+ ),
213
+ )
214
+ ),
215
+ text=str(raw_event.get("text", "")),
216
+ session_id=str(raw_event.get("session_id", run_spec.run_id)),
217
+ actor_id=str(raw_event.get("actor_id", "agent")),
218
+ event_type=str(raw_event.get("event_type", "event")),
219
+ meta_json=dict(raw_event.get("meta_json", {})),
220
+ level=ObservationLevel(str(raw_event.get("level", ObservationLevel.STEP.value))),
221
+ study_id=run_spec.study_id,
222
+ run_id=run_spec.run_id,
223
+ condition_id=condition.condition_id,
224
+ trial_id=raw_event.get("trial_id"),
225
+ step_id=raw_event.get("step_id"),
226
+ tool_name=raw_event.get("tool_name"),
227
+ evaluation_id=raw_event.get("evaluation_id"),
228
+ )
229
+ )
230
+
231
+ if events:
232
+ return events
233
+
234
+ return [
235
+ Observation(
236
+ timestamp=utc_now_iso(),
237
+ record_id=hash_identifier("evt", {"run_id": run_spec.run_id, "index": 0}),
238
+ text=str(output.get("text", "")),
239
+ session_id=run_spec.run_id,
240
+ actor_id="agent",
241
+ event_type="assistant_output",
242
+ meta_json={"auto_generated": True},
243
+ level=ObservationLevel.STEP,
244
+ study_id=run_spec.study_id,
245
+ run_id=run_spec.run_id,
246
+ condition_id=condition.condition_id,
247
+ )
248
+ ]