agent-apprenticeship 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +217 -0
- package/bin/agent-apprenticeship.js +131 -0
- package/package.json +30 -0
- package/pyproject.toml +23 -0
- package/src/agent_apprenticeship_trace/__init__.py +2 -0
- package/src/agent_apprenticeship_trace/actual_outputs_normalizer.py +240 -0
- package/src/agent_apprenticeship_trace/apprentice_adapters.py +348 -0
- package/src/agent_apprenticeship_trace/artifact_capture.py +23 -0
- package/src/agent_apprenticeship_trace/artifact_previews.py +80 -0
- package/src/agent_apprenticeship_trace/artifact_resolver.py +142 -0
- package/src/agent_apprenticeship_trace/batch_runner.py +116 -0
- package/src/agent_apprenticeship_trace/bundle_exporter.py +254 -0
- package/src/agent_apprenticeship_trace/certification.py +580 -0
- package/src/agent_apprenticeship_trace/cli.py +2979 -0
- package/src/agent_apprenticeship_trace/codex_runner.py +428 -0
- package/src/agent_apprenticeship_trace/command_discovery.py +94 -0
- package/src/agent_apprenticeship_trace/config.py +609 -0
- package/src/agent_apprenticeship_trace/contract_diagnostics.py +69 -0
- package/src/agent_apprenticeship_trace/env.py +46 -0
- package/src/agent_apprenticeship_trace/evaluator.py +64 -0
- package/src/agent_apprenticeship_trace/grader.py +194 -0
- package/src/agent_apprenticeship_trace/integration_status.py +193 -0
- package/src/agent_apprenticeship_trace/io.py +20 -0
- package/src/agent_apprenticeship_trace/learning.py +627 -0
- package/src/agent_apprenticeship_trace/lesson_extractor.py +5 -0
- package/src/agent_apprenticeship_trace/llm_output_normalizer.py +467 -0
- package/src/agent_apprenticeship_trace/loop.py +111 -0
- package/src/agent_apprenticeship_trace/mentor_checkpoints.py +354 -0
- package/src/agent_apprenticeship_trace/openai_structured.py +783 -0
- package/src/agent_apprenticeship_trace/package_exporter.py +303 -0
- package/src/agent_apprenticeship_trace/progress.py +223 -0
- package/src/agent_apprenticeship_trace/public_run.py +1109 -0
- package/src/agent_apprenticeship_trace/public_sanitizer.py +139 -0
- package/src/agent_apprenticeship_trace/recipes.py +129 -0
- package/src/agent_apprenticeship_trace/release_exporter.py +259 -0
- package/src/agent_apprenticeship_trace/revision.py +21 -0
- package/src/agent_apprenticeship_trace/role_runners.py +7 -0
- package/src/agent_apprenticeship_trace/rubric_generation.py +75 -0
- package/src/agent_apprenticeship_trace/schemas.py +273 -0
- package/src/agent_apprenticeship_trace/session_events.py +99 -0
- package/src/agent_apprenticeship_trace/task_intake.py +112 -0
- package/src/agent_apprenticeship_trace/trace_normalizer.py +669 -0
- package/src/agent_apprenticeship_trace/trace_prompt.py +51 -0
- package/src/agent_apprenticeship_trace/training_signals.py +30 -0
- package/src/agent_apprenticeship_trace/validation.py +210 -0
- package/src/agent_apprenticeship_trace/verifier.py +55 -0
|
@@ -0,0 +1,609 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import os
|
|
4
|
+
import shlex
|
|
5
|
+
import shutil
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
from typing import Any, Literal
|
|
8
|
+
|
|
9
|
+
from pydantic import BaseModel, Field
|
|
10
|
+
|
|
11
|
+
from .env import load_local_env
|
|
12
|
+
from .command_discovery import resolve_command, resolve_agent_command, gui_app_hint
|
|
13
|
+
from .io import read_json, write_json
|
|
14
|
+
from .recipes import MODEL_PROVIDER_RECIPES, WORKER_AGENT_RECIPES
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
MentorMode = Literal["model_assisted", "expert_led", "hybrid"]
|
|
18
|
+
SensitiveInfoMasking = Literal["standard", "no_masking"]
|
|
19
|
+
EcosystemAutoShare = Literal["manual", "ask", "automatic"]
|
|
20
|
+
EvaluationMode = Literal["model-assisted", "expert-led", "hybrid"]
|
|
21
|
+
DataSharingLevel = Literal["standard", "full-context"]
|
|
22
|
+
|
|
23
|
+
MENTOR_MODES: tuple[str, ...] = ("model_assisted", "expert_led", "hybrid")
|
|
24
|
+
SENSITIVE_INFO_MASKING_LEVELS: tuple[str, ...] = ("standard", "no_masking")
|
|
25
|
+
ECOSYSTEM_AUTO_SHARE_MODES: tuple[str, ...] = ("manual", "ask", "automatic")
|
|
26
|
+
EVALUATION_MODES: tuple[str, ...] = ("model-assisted", "expert-led", "hybrid")
|
|
27
|
+
DATA_SHARING_LEVELS: tuple[str, ...] = ("standard", "full-context")
|
|
28
|
+
DEFAULT_APP_HOME = Path("~/.agent-apprenticeship").expanduser()
|
|
29
|
+
DEFAULT_PUBLIC_ECOSYSTEM_REPO = "Forsy-AI/agent-apprenticeship"
|
|
30
|
+
DEFAULT_PUBLIC_ECOSYSTEM_URL = f"https://github.com/{DEFAULT_PUBLIC_ECOSYSTEM_REPO}"
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def normalize_mentor_mode(value: str | None, default: str = "model_assisted") -> str:
|
|
34
|
+
if value is None or str(value).strip() == "":
|
|
35
|
+
return default
|
|
36
|
+
normalized = str(value).strip().lower().replace("-", "_").replace(" ", "_")
|
|
37
|
+
aliases = {
|
|
38
|
+
"model": "model_assisted",
|
|
39
|
+
"model_assisted": "model_assisted",
|
|
40
|
+
"model_assisted_automated": "model_assisted",
|
|
41
|
+
"automated": "model_assisted",
|
|
42
|
+
"expert": "expert_led",
|
|
43
|
+
"expert_led": "expert_led",
|
|
44
|
+
"expert_led_manual": "expert_led",
|
|
45
|
+
"manual": "expert_led",
|
|
46
|
+
"hybrid": "hybrid",
|
|
47
|
+
}
|
|
48
|
+
normalized = aliases.get(normalized, normalized)
|
|
49
|
+
if normalized not in MENTOR_MODES:
|
|
50
|
+
raise ValueError(f"Unsupported Mentor Mode: {value}")
|
|
51
|
+
return normalized
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
def mentor_mode_to_evaluation_mode(value: str | None) -> str:
|
|
55
|
+
normalized = normalize_mentor_mode(value)
|
|
56
|
+
return {
|
|
57
|
+
"model_assisted": "model-assisted",
|
|
58
|
+
"expert_led": "expert-led",
|
|
59
|
+
"hybrid": "hybrid",
|
|
60
|
+
}[normalized]
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
def evaluation_mode_to_mentor_mode(value: str | None) -> str:
|
|
64
|
+
return normalize_mentor_mode(value)
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
def mentor_mode_display(value: str | None) -> str:
|
|
68
|
+
normalized = normalize_mentor_mode(value)
|
|
69
|
+
return {
|
|
70
|
+
"model_assisted": "model-assisted",
|
|
71
|
+
"expert_led": "expert-led",
|
|
72
|
+
"hybrid": "hybrid",
|
|
73
|
+
}[normalized]
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
def normalize_sensitive_info_masking(value: str | None, default: str = "standard") -> str:
|
|
77
|
+
if value is None or str(value).strip() == "":
|
|
78
|
+
return default
|
|
79
|
+
normalized = str(value).strip().lower().replace("-", "_").replace(" ", "_")
|
|
80
|
+
aliases = {
|
|
81
|
+
"standard": "standard",
|
|
82
|
+
"mask": "standard",
|
|
83
|
+
"masked": "standard",
|
|
84
|
+
"full": "no_masking",
|
|
85
|
+
"full_context": "no_masking",
|
|
86
|
+
"no_masking": "no_masking",
|
|
87
|
+
"none": "no_masking",
|
|
88
|
+
"off": "no_masking",
|
|
89
|
+
}
|
|
90
|
+
normalized = aliases.get(normalized, normalized)
|
|
91
|
+
if normalized not in SENSITIVE_INFO_MASKING_LEVELS:
|
|
92
|
+
raise ValueError(f"Unsupported Sensitive Info Masking setting: {value}")
|
|
93
|
+
return normalized
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
def sensitive_info_masking_to_data_sharing(value: str | None) -> str:
|
|
97
|
+
return "standard" if normalize_sensitive_info_masking(value) == "standard" else "full-context"
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
def data_sharing_to_sensitive_info_masking(value: str | None) -> str:
|
|
101
|
+
return normalize_sensitive_info_masking(value)
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
def normalize_ecosystem_auto_share(value: str | None, default: str = "manual") -> str:
|
|
105
|
+
if value is None or str(value).strip() == "":
|
|
106
|
+
return default
|
|
107
|
+
normalized = str(value).strip().lower().replace("-", "_").replace(" ", "_")
|
|
108
|
+
aliases = {
|
|
109
|
+
"manual": "manual",
|
|
110
|
+
"off": "manual",
|
|
111
|
+
"none": "manual",
|
|
112
|
+
"ask": "ask",
|
|
113
|
+
"prompt": "ask",
|
|
114
|
+
"ask_before_sharing": "ask",
|
|
115
|
+
"share_automatically": "automatic",
|
|
116
|
+
"automatic": "automatic",
|
|
117
|
+
"auto": "automatic",
|
|
118
|
+
"on": "automatic",
|
|
119
|
+
}
|
|
120
|
+
normalized = aliases.get(normalized, normalized)
|
|
121
|
+
if normalized not in ECOSYSTEM_AUTO_SHARE_MODES:
|
|
122
|
+
raise ValueError(f"Unsupported Ecosystem Auto-Share setting: {value}")
|
|
123
|
+
return normalized
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
def ecosystem_auto_share_display(value: str | None) -> str:
|
|
127
|
+
normalized = normalize_ecosystem_auto_share(value)
|
|
128
|
+
return {
|
|
129
|
+
"manual": "Manual",
|
|
130
|
+
"ask": "Ask before sharing",
|
|
131
|
+
"automatic": "Share automatically",
|
|
132
|
+
}[normalized]
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
class Settings(BaseModel):
|
|
136
|
+
# Public product settings.
|
|
137
|
+
app_home: Path = DEFAULT_APP_HOME
|
|
138
|
+
worker_agent: str = "codex"
|
|
139
|
+
worker_agent_command: str | None = None
|
|
140
|
+
worker_agent_model: str | None = None
|
|
141
|
+
worker_agent_extra_args: list[str] = Field(default_factory=list)
|
|
142
|
+
model_provider: str | None = None
|
|
143
|
+
model_provider_api_key_env: str | None = None
|
|
144
|
+
model_provider_model: str | None = None
|
|
145
|
+
mentor_mode: MentorMode = "model_assisted"
|
|
146
|
+
sensitive_info_masking: SensitiveInfoMasking = "standard"
|
|
147
|
+
evaluation_mode: EvaluationMode = "model-assisted"
|
|
148
|
+
data_sharing_level: DataSharingLevel = "standard"
|
|
149
|
+
max_improvement_loops: int = 5
|
|
150
|
+
custom_worker_display_name: str | None = None
|
|
151
|
+
custom_worker_command_template: str | None = None
|
|
152
|
+
custom_worker_can_write_files: bool = True
|
|
153
|
+
apprentice_agent_readiness_status: str | None = None
|
|
154
|
+
apprentice_agent_readiness_reason: str | None = None
|
|
155
|
+
mentor_model_provider_readiness_status: str | None = None
|
|
156
|
+
mentor_model_provider_readiness_reason: str | None = None
|
|
157
|
+
ecosystem_repo: str | None = DEFAULT_PUBLIC_ECOSYSTEM_REPO
|
|
158
|
+
ecosystem_repo_path: Path | None = None
|
|
159
|
+
ecosystem_auto_share: EcosystemAutoShare = "manual"
|
|
160
|
+
|
|
161
|
+
# Compatibility settings used by the existing internal pipeline.
|
|
162
|
+
openai_api_key: str | None = None
|
|
163
|
+
openai_model: str = "gpt-5-mini"
|
|
164
|
+
worker_runner: str = "codex"
|
|
165
|
+
reviser_runner: str = "codex"
|
|
166
|
+
allow_deterministic_fallback: bool = True
|
|
167
|
+
task_timeout_seconds: int = 900
|
|
168
|
+
max_iterations: int = 3
|
|
169
|
+
codex_sandbox: str = "workspace-write"
|
|
170
|
+
llm_evaluator_enabled: bool = True
|
|
171
|
+
llm_grader_enabled: bool = True
|
|
172
|
+
llm_verifier_enabled: bool = True
|
|
173
|
+
llm_task_intake_model: str = "gpt-5-mini"
|
|
174
|
+
llm_rubric_model: str = "gpt-5-mini"
|
|
175
|
+
llm_evaluator_model: str = "gpt-5-mini"
|
|
176
|
+
llm_grader_model: str = "gpt-5-mini"
|
|
177
|
+
llm_verifier_model: str = "gpt-5-mini"
|
|
178
|
+
llm_judge_count: int = 1
|
|
179
|
+
llm_fail_closed: bool = False
|
|
180
|
+
allow_deterministic_eval_fallback: bool = True
|
|
181
|
+
rubric_mode: str = "hybrid"
|
|
182
|
+
llm_task_intake_enabled: bool = True
|
|
183
|
+
llm_rubric_generation_enabled: bool = True
|
|
184
|
+
|
|
185
|
+
|
|
186
|
+
def app_home_from_env() -> Path:
|
|
187
|
+
return Path(os.getenv("AA_HOME") or DEFAULT_APP_HOME).expanduser()
|
|
188
|
+
|
|
189
|
+
|
|
190
|
+
def settings_path(app_home: Path | None = None) -> Path:
|
|
191
|
+
return (app_home or app_home_from_env()) / "settings.json"
|
|
192
|
+
|
|
193
|
+
|
|
194
|
+
def _bool_env(name: str, default: bool) -> bool:
|
|
195
|
+
value = os.getenv(name)
|
|
196
|
+
if value is None:
|
|
197
|
+
return default
|
|
198
|
+
return value.lower() == "true"
|
|
199
|
+
|
|
200
|
+
|
|
201
|
+
def _int_env(name: str, default: int) -> int:
|
|
202
|
+
value = os.getenv(name)
|
|
203
|
+
if value is None or value == "":
|
|
204
|
+
return default
|
|
205
|
+
return int(value)
|
|
206
|
+
|
|
207
|
+
|
|
208
|
+
def _stored_settings() -> dict[str, Any]:
|
|
209
|
+
# Test suites and CI often set AA_DISABLE_LOCAL_ENV to avoid reading user
|
|
210
|
+
# machine state. Honor that unless AA_HOME intentionally points at a test dir.
|
|
211
|
+
if os.getenv("AA_DISABLE_LOCAL_ENV") == "1" and not os.getenv("AA_HOME"):
|
|
212
|
+
return {}
|
|
213
|
+
path = settings_path()
|
|
214
|
+
if not path.exists():
|
|
215
|
+
return {}
|
|
216
|
+
data = read_json(path)
|
|
217
|
+
return data if isinstance(data, dict) else {}
|
|
218
|
+
|
|
219
|
+
|
|
220
|
+
def default_settings(app_home: Path | None = None) -> Settings:
|
|
221
|
+
home = app_home or app_home_from_env()
|
|
222
|
+
return Settings(app_home=home, max_iterations=3)
|
|
223
|
+
|
|
224
|
+
|
|
225
|
+
def get_settings(root: Path | None = None) -> Settings:
|
|
226
|
+
load_local_env(root)
|
|
227
|
+
stored = _stored_settings()
|
|
228
|
+
home = Path(os.getenv("AA_HOME") or stored.get("app_home") or DEFAULT_APP_HOME).expanduser()
|
|
229
|
+
load_local_env(home)
|
|
230
|
+
|
|
231
|
+
worker_agent = os.getenv("AA_WORKER_AGENT") or os.getenv("AA_TRACE_WORKER_AGENT") or stored.get("worker_agent") or "codex"
|
|
232
|
+
provider = os.getenv("AA_MODEL_PROVIDER") or stored.get("model_provider")
|
|
233
|
+
provider_recipe = MODEL_PROVIDER_RECIPES.get(str(provider)) if provider else None
|
|
234
|
+
provider_key_env = (
|
|
235
|
+
os.getenv("AA_MODEL_PROVIDER_API_KEY_ENV")
|
|
236
|
+
or stored.get("model_provider_api_key_env")
|
|
237
|
+
or (provider_recipe.api_key_env_var if provider_recipe else None)
|
|
238
|
+
)
|
|
239
|
+
provider_model = (
|
|
240
|
+
os.getenv("AA_MODEL_PROVIDER_MODEL")
|
|
241
|
+
or stored.get("model_provider_model")
|
|
242
|
+
or (provider_recipe.default_model if provider_recipe else None)
|
|
243
|
+
)
|
|
244
|
+
|
|
245
|
+
loops = _int_env(
|
|
246
|
+
"AA_MAX_IMPROVEMENT_LOOPS",
|
|
247
|
+
int(stored.get("max_improvement_loops") or stored.get("max_iterations") or 5),
|
|
248
|
+
)
|
|
249
|
+
legacy_iterations = _int_env("AA_MAX_ITERATIONS", int(stored.get("max_iterations") or 3))
|
|
250
|
+
openai_model = os.getenv("AA_OPENAI_MODEL", stored.get("openai_model") or "gpt-5-mini")
|
|
251
|
+
mentor_mode = normalize_mentor_mode(
|
|
252
|
+
os.getenv("AA_MENTOR_MODE")
|
|
253
|
+
or stored.get("mentor_mode")
|
|
254
|
+
or os.getenv("AA_EVALUATION_MODE")
|
|
255
|
+
or stored.get("evaluation_mode")
|
|
256
|
+
or "model_assisted"
|
|
257
|
+
)
|
|
258
|
+
sensitive_info_masking = normalize_sensitive_info_masking(
|
|
259
|
+
os.getenv("AA_SENSITIVE_INFO_MASKING")
|
|
260
|
+
or stored.get("sensitive_info_masking")
|
|
261
|
+
or os.getenv("AA_DATA_SHARING_LEVEL")
|
|
262
|
+
or stored.get("data_sharing_level")
|
|
263
|
+
or "standard"
|
|
264
|
+
)
|
|
265
|
+
|
|
266
|
+
return Settings(
|
|
267
|
+
app_home=home,
|
|
268
|
+
worker_agent=worker_agent,
|
|
269
|
+
worker_agent_command=os.getenv("AA_WORKER_AGENT_COMMAND") or stored.get("worker_agent_command"),
|
|
270
|
+
worker_agent_model=os.getenv("AA_WORKER_AGENT_MODEL") or stored.get("worker_agent_model"),
|
|
271
|
+
worker_agent_extra_args=list(stored.get("worker_agent_extra_args") or []),
|
|
272
|
+
model_provider=provider,
|
|
273
|
+
model_provider_api_key_env=provider_key_env,
|
|
274
|
+
model_provider_model=provider_model,
|
|
275
|
+
mentor_mode=mentor_mode,
|
|
276
|
+
sensitive_info_masking=sensitive_info_masking,
|
|
277
|
+
evaluation_mode=mentor_mode_to_evaluation_mode(mentor_mode),
|
|
278
|
+
data_sharing_level=sensitive_info_masking_to_data_sharing(sensitive_info_masking),
|
|
279
|
+
max_improvement_loops=loops,
|
|
280
|
+
custom_worker_display_name=os.getenv("AA_CUSTOM_WORKER_DISPLAY_NAME") or stored.get("custom_worker_display_name"),
|
|
281
|
+
custom_worker_command_template=os.getenv("AA_CUSTOM_WORKER_COMMAND_TEMPLATE") or stored.get("custom_worker_command_template"),
|
|
282
|
+
custom_worker_can_write_files=_bool_env("AA_CUSTOM_WORKER_CAN_WRITE_FILES", bool(stored.get("custom_worker_can_write_files", True))),
|
|
283
|
+
apprentice_agent_readiness_status=stored.get("apprentice_agent_readiness_status"),
|
|
284
|
+
apprentice_agent_readiness_reason=stored.get("apprentice_agent_readiness_reason"),
|
|
285
|
+
mentor_model_provider_readiness_status=stored.get("mentor_model_provider_readiness_status"),
|
|
286
|
+
mentor_model_provider_readiness_reason=stored.get("mentor_model_provider_readiness_reason"),
|
|
287
|
+
ecosystem_repo=os.getenv("AA_ECOSYSTEM_REPO") or stored.get("ecosystem_repo") or DEFAULT_PUBLIC_ECOSYSTEM_REPO,
|
|
288
|
+
ecosystem_repo_path=Path(os.getenv("AA_ECOSYSTEM_REPO_PATH") or stored.get("ecosystem_repo_path")).expanduser()
|
|
289
|
+
if (os.getenv("AA_ECOSYSTEM_REPO_PATH") or stored.get("ecosystem_repo_path"))
|
|
290
|
+
else None,
|
|
291
|
+
ecosystem_auto_share=normalize_ecosystem_auto_share(
|
|
292
|
+
os.getenv("AA_ECOSYSTEM_AUTO_SHARE") or stored.get("ecosystem_auto_share") or "manual"
|
|
293
|
+
),
|
|
294
|
+
openai_api_key=os.getenv("OPENAI_API_KEY"),
|
|
295
|
+
openai_model=openai_model,
|
|
296
|
+
worker_runner=os.getenv("AA_WORKER_RUNNER", os.getenv("AA_TRACE_WORKER_RUNNER", stored.get("worker_runner") or worker_agent)),
|
|
297
|
+
reviser_runner=os.getenv("AA_REVISER_RUNNER", os.getenv("AA_TRACE_REVISER_RUNNER", stored.get("reviser_runner") or worker_agent)),
|
|
298
|
+
allow_deterministic_fallback=_bool_env("AA_ALLOW_DETERMINISTIC_FALLBACK", bool(stored.get("allow_deterministic_fallback", True))),
|
|
299
|
+
task_timeout_seconds=_int_env("AA_TASK_TIMEOUT_SECONDS", int(stored.get("task_timeout_seconds") or 900)),
|
|
300
|
+
max_iterations=legacy_iterations,
|
|
301
|
+
codex_sandbox=os.getenv("AA_CODEX_SANDBOX", stored.get("codex_sandbox") or "workspace-write"),
|
|
302
|
+
llm_evaluator_enabled=_bool_env("AA_LLM_EVALUATOR_ENABLED", bool(stored.get("llm_evaluator_enabled", True))),
|
|
303
|
+
llm_grader_enabled=_bool_env("AA_LLM_GRADER_ENABLED", bool(stored.get("llm_grader_enabled", True))),
|
|
304
|
+
llm_verifier_enabled=_bool_env("AA_LLM_VERIFIER_ENABLED", bool(stored.get("llm_verifier_enabled", True))),
|
|
305
|
+
llm_task_intake_model=os.getenv("AA_LLM_TASK_INTAKE_MODEL", stored.get("llm_task_intake_model") or openai_model),
|
|
306
|
+
llm_rubric_model=os.getenv("AA_LLM_RUBRIC_MODEL", stored.get("llm_rubric_model") or openai_model),
|
|
307
|
+
llm_evaluator_model=os.getenv("AA_LLM_EVALUATOR_MODEL", stored.get("llm_evaluator_model") or openai_model),
|
|
308
|
+
llm_grader_model=os.getenv("AA_LLM_GRADER_MODEL", stored.get("llm_grader_model") or openai_model),
|
|
309
|
+
llm_verifier_model=os.getenv("AA_LLM_VERIFIER_MODEL", stored.get("llm_verifier_model") or openai_model),
|
|
310
|
+
llm_judge_count=_int_env("AA_LLM_JUDGE_COUNT", int(stored.get("llm_judge_count") or 1)),
|
|
311
|
+
llm_fail_closed=_bool_env("AA_LLM_FAIL_CLOSED", bool(stored.get("llm_fail_closed", False))),
|
|
312
|
+
allow_deterministic_eval_fallback=_bool_env("AA_ALLOW_DETERMINISTIC_EVAL_FALLBACK", bool(stored.get("allow_deterministic_eval_fallback", True))),
|
|
313
|
+
rubric_mode=os.getenv("AA_RUBRIC_MODE", stored.get("rubric_mode") or "hybrid"),
|
|
314
|
+
llm_task_intake_enabled=_bool_env("AA_LLM_TASK_INTAKE_ENABLED", bool(stored.get("llm_task_intake_enabled", True))),
|
|
315
|
+
llm_rubric_generation_enabled=_bool_env("AA_LLM_RUBRIC_GENERATION_ENABLED", bool(stored.get("llm_rubric_generation_enabled", True))),
|
|
316
|
+
)
|
|
317
|
+
|
|
318
|
+
|
|
319
|
+
def save_settings(settings: Settings) -> Path:
|
|
320
|
+
settings.app_home.mkdir(parents=True, exist_ok=True)
|
|
321
|
+
path = settings_path(settings.app_home)
|
|
322
|
+
data = settings.model_dump(mode="json")
|
|
323
|
+
data.pop("openai_api_key", None)
|
|
324
|
+
write_json(path, data)
|
|
325
|
+
return path
|
|
326
|
+
|
|
327
|
+
|
|
328
|
+
def init_settings(app_home: Path | None = None, overwrite: bool = False) -> Path:
|
|
329
|
+
home = app_home or app_home_from_env()
|
|
330
|
+
home.mkdir(parents=True, exist_ok=True)
|
|
331
|
+
(home / "runs").mkdir(exist_ok=True)
|
|
332
|
+
path = settings_path(home)
|
|
333
|
+
if overwrite or not path.exists():
|
|
334
|
+
save_settings(default_settings(home))
|
|
335
|
+
return path
|
|
336
|
+
|
|
337
|
+
|
|
338
|
+
def update_settings(**updates: Any) -> Settings:
|
|
339
|
+
settings = get_settings()
|
|
340
|
+
data = settings.model_dump()
|
|
341
|
+
clean_updates = {k: v for k, v in updates.items() if v is not None}
|
|
342
|
+
if "evaluation_mode" in clean_updates and "mentor_mode" not in clean_updates:
|
|
343
|
+
clean_updates["mentor_mode"] = evaluation_mode_to_mentor_mode(clean_updates["evaluation_mode"])
|
|
344
|
+
if "mentor_mode" in clean_updates:
|
|
345
|
+
clean_updates["mentor_mode"] = normalize_mentor_mode(clean_updates["mentor_mode"])
|
|
346
|
+
clean_updates["evaluation_mode"] = mentor_mode_to_evaluation_mode(clean_updates["mentor_mode"])
|
|
347
|
+
if "data_sharing_level" in clean_updates and "sensitive_info_masking" not in clean_updates:
|
|
348
|
+
clean_updates["sensitive_info_masking"] = data_sharing_to_sensitive_info_masking(clean_updates["data_sharing_level"])
|
|
349
|
+
if "sensitive_info_masking" in clean_updates:
|
|
350
|
+
clean_updates["sensitive_info_masking"] = normalize_sensitive_info_masking(clean_updates["sensitive_info_masking"])
|
|
351
|
+
clean_updates["data_sharing_level"] = sensitive_info_masking_to_data_sharing(clean_updates["sensitive_info_masking"])
|
|
352
|
+
if "ecosystem_auto_share" in clean_updates:
|
|
353
|
+
clean_updates["ecosystem_auto_share"] = normalize_ecosystem_auto_share(clean_updates["ecosystem_auto_share"])
|
|
354
|
+
if "worker_agent" in clean_updates and "worker_runner" not in clean_updates:
|
|
355
|
+
clean_updates["worker_runner"] = clean_updates["worker_agent"]
|
|
356
|
+
clean_updates["reviser_runner"] = clean_updates["worker_agent"]
|
|
357
|
+
data.update(clean_updates)
|
|
358
|
+
updated = Settings.model_validate(data)
|
|
359
|
+
if updated.worker_agent not in WORKER_AGENT_RECIPES:
|
|
360
|
+
raise ValueError(f"Unsupported Apprentice Agent: {updated.worker_agent}")
|
|
361
|
+
if updated.model_provider is not None and updated.model_provider not in MODEL_PROVIDER_RECIPES:
|
|
362
|
+
raise ValueError(f"Unsupported Mentor Model Provider: {updated.model_provider}")
|
|
363
|
+
if updated.evaluation_mode not in EVALUATION_MODES:
|
|
364
|
+
raise ValueError(f"Unsupported Mentor Mode: {updated.mentor_mode}")
|
|
365
|
+
if updated.data_sharing_level not in DATA_SHARING_LEVELS:
|
|
366
|
+
raise ValueError(f"Unsupported Sensitive Info Masking setting: {updated.sensitive_info_masking}")
|
|
367
|
+
if updated.mentor_mode not in MENTOR_MODES:
|
|
368
|
+
raise ValueError(f"Unsupported mentor mode: {updated.mentor_mode}")
|
|
369
|
+
if updated.sensitive_info_masking not in SENSITIVE_INFO_MASKING_LEVELS:
|
|
370
|
+
raise ValueError(f"Unsupported sensitive info masking setting: {updated.sensitive_info_masking}")
|
|
371
|
+
if updated.ecosystem_auto_share not in ECOSYSTEM_AUTO_SHARE_MODES:
|
|
372
|
+
raise ValueError(f"Unsupported Ecosystem Auto-Share setting: {updated.ecosystem_auto_share}")
|
|
373
|
+
if updated.worker_agent == "custom":
|
|
374
|
+
template = updated.custom_worker_command_template or ""
|
|
375
|
+
missing = [token for token in ("{workspace}", "{prompt_file}") if token not in template]
|
|
376
|
+
if missing:
|
|
377
|
+
raise ValueError(
|
|
378
|
+
"custom Apprentice Agent command template must include placeholders: " + ", ".join(missing)
|
|
379
|
+
)
|
|
380
|
+
if updated.max_improvement_loops < 1:
|
|
381
|
+
raise ValueError("max_improvement_loops must be at least 1")
|
|
382
|
+
updated.max_iterations = updated.max_improvement_loops
|
|
383
|
+
save_settings(updated)
|
|
384
|
+
return updated
|
|
385
|
+
|
|
386
|
+
|
|
387
|
+
def model_provider_ready(settings: Settings | None = None) -> bool:
|
|
388
|
+
s = settings or get_settings()
|
|
389
|
+
if s.mentor_mode == "expert_led":
|
|
390
|
+
return True
|
|
391
|
+
return bool(mentor_model_provider_readiness(s).get("ready"))
|
|
392
|
+
|
|
393
|
+
|
|
394
|
+
def configured_model_provider_ready(settings: Settings | None = None) -> bool:
|
|
395
|
+
s = settings or get_settings()
|
|
396
|
+
return bool(mentor_model_provider_readiness(s).get("ready"))
|
|
397
|
+
|
|
398
|
+
|
|
399
|
+
def model_provider_display_name(settings: Settings | None = None) -> str:
|
|
400
|
+
s = settings or get_settings()
|
|
401
|
+
provider = MODEL_PROVIDER_RECIPES.get(s.model_provider or "")
|
|
402
|
+
return provider.display_name if provider else "Not configured"
|
|
403
|
+
|
|
404
|
+
|
|
405
|
+
def mentor_model_provider_readiness(settings: Settings | None = None) -> dict[str, Any]:
|
|
406
|
+
s = settings or get_settings()
|
|
407
|
+
if not s.model_provider:
|
|
408
|
+
return {
|
|
409
|
+
"status": "not_ready",
|
|
410
|
+
"ready": False,
|
|
411
|
+
"reason": None,
|
|
412
|
+
"provider": None,
|
|
413
|
+
"provider_display": "Not configured",
|
|
414
|
+
"model": None,
|
|
415
|
+
"api_key_env_var": None,
|
|
416
|
+
"api_key_visible": False,
|
|
417
|
+
}
|
|
418
|
+
provider = MODEL_PROVIDER_RECIPES.get(s.model_provider)
|
|
419
|
+
env_var = s.model_provider_api_key_env or (provider.api_key_env_var if provider else None)
|
|
420
|
+
key_visible = bool(env_var and os.getenv(env_var))
|
|
421
|
+
if s.model_provider == "google" and not key_visible and os.getenv("GOOGLE_API_KEY"):
|
|
422
|
+
key_visible = True
|
|
423
|
+
env_var = "GOOGLE_API_KEY"
|
|
424
|
+
if not env_var:
|
|
425
|
+
status = "missing_api_key"
|
|
426
|
+
reason = "No API key environment variable is configured."
|
|
427
|
+
elif not key_visible:
|
|
428
|
+
status = "missing_api_key"
|
|
429
|
+
reason = f"{env_var} is not visible."
|
|
430
|
+
else:
|
|
431
|
+
status = s.mentor_model_provider_readiness_status or "untested"
|
|
432
|
+
reason = s.mentor_model_provider_readiness_reason
|
|
433
|
+
if status in {None, "missing_api_key"}:
|
|
434
|
+
status = "untested"
|
|
435
|
+
return {
|
|
436
|
+
"status": status,
|
|
437
|
+
"ready": status == "ready",
|
|
438
|
+
"reason": reason,
|
|
439
|
+
"provider": s.model_provider,
|
|
440
|
+
"provider_display": provider.display_name if provider else s.model_provider,
|
|
441
|
+
"model": s.model_provider_model or (provider.default_model if provider else None),
|
|
442
|
+
"api_key_env_var": env_var,
|
|
443
|
+
"api_key_visible": key_visible,
|
|
444
|
+
}
|
|
445
|
+
|
|
446
|
+
|
|
447
|
+
def _configured_apprentice_command(settings: Settings) -> str | None:
|
|
448
|
+
if settings.worker_agent == "custom":
|
|
449
|
+
template = settings.custom_worker_command_template or ""
|
|
450
|
+
if template:
|
|
451
|
+
try:
|
|
452
|
+
parts = shlex.split(template)
|
|
453
|
+
except ValueError:
|
|
454
|
+
parts = template.split()
|
|
455
|
+
return parts[0] if parts else settings.worker_agent_command
|
|
456
|
+
return settings.worker_agent_command
|
|
457
|
+
recipe = WORKER_AGENT_RECIPES.get(settings.worker_agent)
|
|
458
|
+
return settings.worker_agent_command or (recipe.command_name if recipe else settings.worker_agent)
|
|
459
|
+
|
|
460
|
+
|
|
461
|
+
def apprentice_agent_display_name(settings: Settings | None = None) -> str:
|
|
462
|
+
s = settings or get_settings()
|
|
463
|
+
recipe = WORKER_AGENT_RECIPES.get(s.worker_agent)
|
|
464
|
+
if s.worker_agent == "custom" and s.custom_worker_display_name:
|
|
465
|
+
return s.custom_worker_display_name
|
|
466
|
+
return recipe.display_name if recipe else s.worker_agent
|
|
467
|
+
|
|
468
|
+
|
|
469
|
+
def apprentice_agent_readiness_status(settings: Settings | None = None) -> dict[str, Any]:
|
|
470
|
+
s = settings or get_settings()
|
|
471
|
+
command = _configured_apprentice_command(s)
|
|
472
|
+
if s.worker_agent == "custom":
|
|
473
|
+
template = s.custom_worker_command_template or ""
|
|
474
|
+
missing = [token for token in ("{workspace}", "{prompt_file}") if token not in template]
|
|
475
|
+
if missing:
|
|
476
|
+
return {
|
|
477
|
+
"status": "not_ready",
|
|
478
|
+
"ready": False,
|
|
479
|
+
"reason": "Custom Apprentice Agent command template is missing placeholders: " + ", ".join(missing),
|
|
480
|
+
"command": command,
|
|
481
|
+
"command_found": False,
|
|
482
|
+
}
|
|
483
|
+
if not command:
|
|
484
|
+
return {
|
|
485
|
+
"status": "not_ready",
|
|
486
|
+
"ready": False,
|
|
487
|
+
"reason": "Apprentice Agent command is not configured.",
|
|
488
|
+
"command": None,
|
|
489
|
+
"command_found": False,
|
|
490
|
+
}
|
|
491
|
+
_, resolved = resolve_agent_command(s.worker_agent, command)
|
|
492
|
+
if not resolved:
|
|
493
|
+
hint = gui_app_hint(s.worker_agent or "")
|
|
494
|
+
reason = f"Apprentice Agent command not found: {command}"
|
|
495
|
+
if hint:
|
|
496
|
+
reason = f"{reason}. {hint} Install or expose the headless CLI on PATH."
|
|
497
|
+
return {
|
|
498
|
+
"status": "missing_command",
|
|
499
|
+
"ready": False,
|
|
500
|
+
"reason": reason,
|
|
501
|
+
"command": command,
|
|
502
|
+
"command_found": False,
|
|
503
|
+
"command_resolved": None,
|
|
504
|
+
}
|
|
505
|
+
if s.apprentice_agent_readiness_status == "ready":
|
|
506
|
+
return {
|
|
507
|
+
"status": "ready",
|
|
508
|
+
"ready": True,
|
|
509
|
+
"reason": s.apprentice_agent_readiness_reason,
|
|
510
|
+
"command": command,
|
|
511
|
+
"command_found": True,
|
|
512
|
+
"command_resolved": resolved,
|
|
513
|
+
}
|
|
514
|
+
if s.apprentice_agent_readiness_status in {"auth_error", "quota_or_credit_error", "timeout", "failed", "not_ready"}:
|
|
515
|
+
return {
|
|
516
|
+
"status": s.apprentice_agent_readiness_status,
|
|
517
|
+
"ready": False,
|
|
518
|
+
"reason": s.apprentice_agent_readiness_reason,
|
|
519
|
+
"command": command,
|
|
520
|
+
"command_found": True,
|
|
521
|
+
"command_resolved": resolved,
|
|
522
|
+
}
|
|
523
|
+
return {
|
|
524
|
+
"status": "untested",
|
|
525
|
+
"ready": False,
|
|
526
|
+
"reason": "Command was found, but no readiness check has been recorded.",
|
|
527
|
+
"command": command,
|
|
528
|
+
"command_found": True,
|
|
529
|
+
"command_resolved": resolved,
|
|
530
|
+
}
|
|
531
|
+
|
|
532
|
+
|
|
533
|
+
def _readiness_line(label: str, status: str | None, reason: str | None = None) -> str:
|
|
534
|
+
pretty = (status or "not_ready").replace("_", " ").title()
|
|
535
|
+
if status == "ready":
|
|
536
|
+
pretty = "Ready"
|
|
537
|
+
elif status == "not_ready":
|
|
538
|
+
pretty = "Not ready"
|
|
539
|
+
elif status == "missing_api_key":
|
|
540
|
+
pretty = "Not ready"
|
|
541
|
+
elif status == "missing_command":
|
|
542
|
+
pretty = "Not ready"
|
|
543
|
+
elif status == "untested":
|
|
544
|
+
pretty = "Untested"
|
|
545
|
+
suffix = f" - {reason}" if reason else ""
|
|
546
|
+
return f"{label}: {pretty}{suffix}"
|
|
547
|
+
|
|
548
|
+
|
|
549
|
+
def public_settings(settings: Settings | None = None) -> dict[str, Any]:
|
|
550
|
+
s = settings or get_settings()
|
|
551
|
+
agent_status = apprentice_agent_readiness_status(s)
|
|
552
|
+
provider_status = mentor_model_provider_readiness(s)
|
|
553
|
+
return {
|
|
554
|
+
"apprentice_agent": apprentice_agent_display_name(s),
|
|
555
|
+
"apprentice_agent_status": agent_status["status"],
|
|
556
|
+
"apprentice_agent_status_reason": agent_status.get("reason"),
|
|
557
|
+
"mentor_model_provider": provider_status["provider_display"] if provider_status.get("provider") else None,
|
|
558
|
+
"mentor_model_provider_status": provider_status["status"],
|
|
559
|
+
"mentor_model_provider_status_reason": provider_status.get("reason"),
|
|
560
|
+
"mentor_model_provider_api_key_env": provider_status.get("api_key_env_var"),
|
|
561
|
+
"mentor_model_provider_api_key_visible": provider_status.get("api_key_visible"),
|
|
562
|
+
"mentor_mode": s.mentor_mode,
|
|
563
|
+
"sensitive_info_masking": s.sensitive_info_masking,
|
|
564
|
+
"max_improvement_loops": s.max_improvement_loops,
|
|
565
|
+
"ecosystem_repo": s.ecosystem_repo,
|
|
566
|
+
"ecosystem_repo_path": str(s.ecosystem_repo_path) if s.ecosystem_repo_path else None,
|
|
567
|
+
"ecosystem_auto_share": s.ecosystem_auto_share,
|
|
568
|
+
"app_home": str(s.app_home),
|
|
569
|
+
}
|
|
570
|
+
|
|
571
|
+
|
|
572
|
+
def public_settings_text(settings: Settings | None = None) -> str:
|
|
573
|
+
s = settings or get_settings()
|
|
574
|
+
data = public_settings(s)
|
|
575
|
+
provider = data["mentor_model_provider"] or "Not configured"
|
|
576
|
+
lines = [
|
|
577
|
+
"Agent Apprenticeship Settings",
|
|
578
|
+
"",
|
|
579
|
+
f"Apprentice Agent: {data['apprentice_agent']}",
|
|
580
|
+
_readiness_line("Apprentice Agent Status", data["apprentice_agent_status"], data.get("apprentice_agent_status_reason")),
|
|
581
|
+
f"Mentor Model Provider: {provider}",
|
|
582
|
+
_readiness_line("Mentor Model Provider Status", data["mentor_model_provider_status"], data.get("mentor_model_provider_status_reason")),
|
|
583
|
+
f"Mentor Mode: {mentor_mode_display(data['mentor_mode'])}",
|
|
584
|
+
f"Sensitive Info Masking: {data['sensitive_info_masking']}",
|
|
585
|
+
f"Maximum Improvement Loops: {data['max_improvement_loops']}",
|
|
586
|
+
f"Public Ecosystem Repo: {data['ecosystem_repo'] or DEFAULT_PUBLIC_ECOSYSTEM_REPO}",
|
|
587
|
+
*([f"Public Ecosystem Repo Path: {data['ecosystem_repo_path']}"] if data.get("ecosystem_repo_path") else []),
|
|
588
|
+
f"Ecosystem Auto-Share: {ecosystem_auto_share_display(data['ecosystem_auto_share'])}",
|
|
589
|
+
f"App Home: {data['app_home']}",
|
|
590
|
+
]
|
|
591
|
+
if s.mentor_mode in {"model_assisted", "hybrid"} and not configured_model_provider_ready(s):
|
|
592
|
+
lines.extend(
|
|
593
|
+
[
|
|
594
|
+
"",
|
|
595
|
+
"Model-assisted Mentor Mode needs a ready Mentor Model Provider.",
|
|
596
|
+
"Run: apprentice configure model",
|
|
597
|
+
]
|
|
598
|
+
)
|
|
599
|
+
return "\n".join(lines)
|
|
600
|
+
|
|
601
|
+
|
|
602
|
+
def debug_settings(settings: Settings | None = None) -> dict[str, Any]:
|
|
603
|
+
s = settings or get_settings()
|
|
604
|
+
data = s.model_dump(mode="json")
|
|
605
|
+
data["model_provider_ready"] = configured_model_provider_ready(s)
|
|
606
|
+
data["mentor_model_provider_readiness"] = mentor_model_provider_readiness(s)
|
|
607
|
+
data["apprentice_agent_readiness"] = apprentice_agent_readiness_status(s)
|
|
608
|
+
data.pop("openai_api_key", None)
|
|
609
|
+
return data
|
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
from typing import Any
|
|
5
|
+
|
|
6
|
+
from .env import redact_secrets
|
|
7
|
+
from .io import write_json
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
EXPECTED_AGENT_CONTRACT = ["agent_trace.json", "actual_outputs.json", "artifacts/"]
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def build_contract_diagnostics(
|
|
14
|
+
attempt_dir: Path,
|
|
15
|
+
*,
|
|
16
|
+
command: list[str] | str,
|
|
17
|
+
working_directory: Path,
|
|
18
|
+
agent_display_name: str,
|
|
19
|
+
prompt: str | None = None,
|
|
20
|
+
) -> dict[str, Any]:
|
|
21
|
+
command_value: list[str] | str
|
|
22
|
+
if isinstance(command, list):
|
|
23
|
+
command_value = ["<prompt>" if prompt is not None and part == prompt else redact_secrets(str(part)) for part in command]
|
|
24
|
+
else:
|
|
25
|
+
command_value = redact_secrets(command)
|
|
26
|
+
expected = {
|
|
27
|
+
"agent_trace.json": attempt_dir / "agent_trace.json",
|
|
28
|
+
"actual_outputs.json": attempt_dir / "actual_outputs.json",
|
|
29
|
+
"artifacts/": attempt_dir / "artifacts",
|
|
30
|
+
}
|
|
31
|
+
missing = [name for name, path in expected.items() if not path.exists()]
|
|
32
|
+
top_level = sorted(p.name + ("/" if p.is_dir() else "") for p in attempt_dir.iterdir()) if attempt_dir.exists() else []
|
|
33
|
+
likely_cause = (
|
|
34
|
+
f"{agent_display_name} ran but did not write the Agent Apprenticeship output contract "
|
|
35
|
+
"in the current task workspace."
|
|
36
|
+
if missing
|
|
37
|
+
else f"{agent_display_name} wrote the expected output contract."
|
|
38
|
+
)
|
|
39
|
+
if missing and any(name in {"stdout.txt", "stderr.txt", "final_message.txt"} for name in top_level):
|
|
40
|
+
likely_cause += " Inspect stdout/stderr/final_message for setup, auth, or unsupported headless-mode details."
|
|
41
|
+
diagnostics = {
|
|
42
|
+
"command_used": command_value,
|
|
43
|
+
"working_directory": str(working_directory),
|
|
44
|
+
"expected_files": EXPECTED_AGENT_CONTRACT,
|
|
45
|
+
"missing_expected_files": missing,
|
|
46
|
+
"top_level_files_found": top_level,
|
|
47
|
+
"stdout_ref": "stdout.txt",
|
|
48
|
+
"stderr_ref": "stderr.txt",
|
|
49
|
+
"final_message_ref": "final_message.txt",
|
|
50
|
+
"likely_cause": likely_cause,
|
|
51
|
+
}
|
|
52
|
+
write_json(attempt_dir / "contract_diagnostics.json", diagnostics)
|
|
53
|
+
return diagnostics
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def diagnostics_text(diagnostics: dict[str, Any]) -> str:
|
|
57
|
+
lines = [
|
|
58
|
+
"Agent Apprenticeship output-contract diagnostics",
|
|
59
|
+
f"Command used: {diagnostics.get('command_used')}",
|
|
60
|
+
f"Working directory: {diagnostics.get('working_directory')}",
|
|
61
|
+
"Expected files: " + ", ".join(diagnostics.get("expected_files") or []),
|
|
62
|
+
"Missing expected files: " + (", ".join(diagnostics.get("missing_expected_files") or []) or "none"),
|
|
63
|
+
"Top-level files found: " + (", ".join(diagnostics.get("top_level_files_found") or []) or "none"),
|
|
64
|
+
f"stdout: {diagnostics.get('stdout_ref')}",
|
|
65
|
+
f"stderr: {diagnostics.get('stderr_ref')}",
|
|
66
|
+
f"final message: {diagnostics.get('final_message_ref')}",
|
|
67
|
+
f"Likely cause: {diagnostics.get('likely_cause')}",
|
|
68
|
+
]
|
|
69
|
+
return "\n".join(lines) + "\n"
|