agent-apprenticeship 0.1.0 → 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +6 -15
- package/bin/agent-apprenticeship.js +92 -13
- package/package.json +1 -1
- package/pyproject.toml +1 -1
- package/src/agent_apprenticeship_trace/__init__.py +1 -1
- package/src/agent_apprenticeship_trace/apprentice_adapters.py +2 -1
- package/src/agent_apprenticeship_trace/bundle_exporter.py +63 -3
- package/src/agent_apprenticeship_trace/cli.py +392 -70
- package/src/agent_apprenticeship_trace/codex_runner.py +46 -3
- package/src/agent_apprenticeship_trace/config.py +16 -0
- package/src/agent_apprenticeship_trace/openai_structured.py +6 -0
- package/src/agent_apprenticeship_trace/public_run.py +118 -57
- package/src/agent_apprenticeship_trace/task_intake.py +45 -2
|
@@ -1,4 +1,6 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
|
+
import os
|
|
3
|
+
import signal
|
|
2
4
|
import shutil, subprocess
|
|
3
5
|
from pathlib import Path
|
|
4
6
|
import re
|
|
@@ -16,6 +18,35 @@ class AttemptResult(dict): pass
|
|
|
16
18
|
|
|
17
19
|
CODEX_TRUST_RETRY_MESSAGE = "Codex refused to run because the workspace is not a trusted Git directory. Retrying with --skip-git-repo-check if supported."
|
|
18
20
|
|
|
21
|
+
|
|
22
|
+
def _run_with_process_group_timeout(command, *, cwd: Path, timeout: int | None, shell: bool = False) -> subprocess.CompletedProcess:
|
|
23
|
+
process = subprocess.Popen(
|
|
24
|
+
command,
|
|
25
|
+
cwd=cwd,
|
|
26
|
+
text=True,
|
|
27
|
+
stdout=subprocess.PIPE,
|
|
28
|
+
stderr=subprocess.PIPE,
|
|
29
|
+
shell=shell,
|
|
30
|
+
start_new_session=True,
|
|
31
|
+
)
|
|
32
|
+
try:
|
|
33
|
+
stdout, stderr = process.communicate(timeout=timeout)
|
|
34
|
+
return subprocess.CompletedProcess(command, process.returncode, stdout, stderr)
|
|
35
|
+
except subprocess.TimeoutExpired as exc:
|
|
36
|
+
try:
|
|
37
|
+
os.killpg(process.pid, signal.SIGTERM)
|
|
38
|
+
except Exception:
|
|
39
|
+
process.kill()
|
|
40
|
+
try:
|
|
41
|
+
stdout, stderr = process.communicate(timeout=5)
|
|
42
|
+
except subprocess.TimeoutExpired:
|
|
43
|
+
try:
|
|
44
|
+
os.killpg(process.pid, signal.SIGKILL)
|
|
45
|
+
except Exception:
|
|
46
|
+
process.kill()
|
|
47
|
+
stdout, stderr = process.communicate()
|
|
48
|
+
raise subprocess.TimeoutExpired(command, timeout, output=stdout, stderr=stderr) from exc
|
|
49
|
+
|
|
19
50
|
def _attempt_dir(package_root: Path, attempt_kind: str) -> Path:
|
|
20
51
|
p=package_root/'attempts'/attempt_kind
|
|
21
52
|
(p/'artifacts').mkdir(parents=True, exist_ok=True)
|
|
@@ -292,7 +323,7 @@ def run_codex_attempt(package_root: Path, raw: RawTaskRecord, spec: TaskIntakeSp
|
|
|
292
323
|
stdout=''
|
|
293
324
|
stderr=''
|
|
294
325
|
try:
|
|
295
|
-
cp=
|
|
326
|
+
cp=_run_with_process_group_timeout(cmd, cwd=d, timeout=timeout)
|
|
296
327
|
returncode=cp.returncode
|
|
297
328
|
stdout=cp.stdout or ''
|
|
298
329
|
stderr=cp.stderr or ''
|
|
@@ -308,7 +339,7 @@ def run_codex_attempt(package_root: Path, raw: RawTaskRecord, spec: TaskIntakeSp
|
|
|
308
339
|
skip_git_repo_check_supported=True,
|
|
309
340
|
ask_for_approval_supported=ask_supported,
|
|
310
341
|
)
|
|
311
|
-
cp=
|
|
342
|
+
cp=_run_with_process_group_timeout(retry_cmd, cwd=d, timeout=timeout)
|
|
312
343
|
cmd=retry_cmd
|
|
313
344
|
returncode=cp.returncode
|
|
314
345
|
stdout=(stdout or '') + "\n" + (cp.stdout or '')
|
|
@@ -338,6 +369,18 @@ def run_codex_attempt(package_root: Path, raw: RawTaskRecord, spec: TaskIntakeSp
|
|
|
338
369
|
if contract_diagnostics:
|
|
339
370
|
actual.metadata_json['apprentice_agent_contract_diagnostics']=contract_diagnostics
|
|
340
371
|
op_error = _apprentice_operational_error(run_error, stdout, stderr, returncode)
|
|
372
|
+
if (
|
|
373
|
+
op_error
|
|
374
|
+
and isinstance(run_error, subprocess.TimeoutExpired)
|
|
375
|
+
and trace_valid
|
|
376
|
+
and actual.status == 'success'
|
|
377
|
+
):
|
|
378
|
+
actual.metadata_json['apprentice_agent_warning'] = (
|
|
379
|
+
'Apprentice Agent process timed out after producing required outputs; '
|
|
380
|
+
'the produced trace and artifacts were preserved.'
|
|
381
|
+
)
|
|
382
|
+
trace.metadata_json['apprentice_agent_warning'] = actual.metadata_json['apprentice_agent_warning']
|
|
383
|
+
op_error = None
|
|
341
384
|
if op_error and returncode not in (None, 0) and trace_valid and actual.status == 'success':
|
|
342
385
|
op_error = f"Apprentice Agent exited nonzero after producing required outputs (exit code {returncode})."
|
|
343
386
|
if op_error or not trace_valid:
|
|
@@ -387,7 +430,7 @@ def run_custom_attempt(package_root: Path, raw: RawTaskRecord, spec: TaskIntakeS
|
|
|
387
430
|
run_error=None
|
|
388
431
|
cp=None
|
|
389
432
|
try:
|
|
390
|
-
cp=
|
|
433
|
+
cp=_run_with_process_group_timeout(command, cwd=d, timeout=timeout or settings.task_timeout_seconds, shell=True)
|
|
391
434
|
stdout=redact_secrets(cp.stdout or '')
|
|
392
435
|
stderr=redact_secrets(cp.stderr or '')
|
|
393
436
|
(d/'stdout.txt').write_text(stdout)
|
|
@@ -3,6 +3,8 @@ from __future__ import annotations
|
|
|
3
3
|
import os
|
|
4
4
|
import shlex
|
|
5
5
|
import shutil
|
|
6
|
+
from contextlib import contextmanager
|
|
7
|
+
from contextvars import ContextVar
|
|
6
8
|
from pathlib import Path
|
|
7
9
|
from typing import Any, Literal
|
|
8
10
|
|
|
@@ -28,6 +30,7 @@ DATA_SHARING_LEVELS: tuple[str, ...] = ("standard", "full-context")
|
|
|
28
30
|
DEFAULT_APP_HOME = Path("~/.agent-apprenticeship").expanduser()
|
|
29
31
|
DEFAULT_PUBLIC_ECOSYSTEM_REPO = "Forsy-AI/agent-apprenticeship"
|
|
30
32
|
DEFAULT_PUBLIC_ECOSYSTEM_URL = f"https://github.com/{DEFAULT_PUBLIC_ECOSYSTEM_REPO}"
|
|
33
|
+
_SETTINGS_OVERRIDE: ContextVar[Any] = ContextVar("agent_apprenticeship_settings_override", default=None)
|
|
31
34
|
|
|
32
35
|
|
|
33
36
|
def normalize_mentor_mode(value: str | None, default: str = "model_assisted") -> str:
|
|
@@ -183,6 +186,15 @@ class Settings(BaseModel):
|
|
|
183
186
|
llm_rubric_generation_enabled: bool = True
|
|
184
187
|
|
|
185
188
|
|
|
189
|
+
@contextmanager
|
|
190
|
+
def settings_override(settings: Settings):
|
|
191
|
+
token = _SETTINGS_OVERRIDE.set(settings)
|
|
192
|
+
try:
|
|
193
|
+
yield
|
|
194
|
+
finally:
|
|
195
|
+
_SETTINGS_OVERRIDE.reset(token)
|
|
196
|
+
|
|
197
|
+
|
|
186
198
|
def app_home_from_env() -> Path:
|
|
187
199
|
return Path(os.getenv("AA_HOME") or DEFAULT_APP_HOME).expanduser()
|
|
188
200
|
|
|
@@ -223,6 +235,10 @@ def default_settings(app_home: Path | None = None) -> Settings:
|
|
|
223
235
|
|
|
224
236
|
|
|
225
237
|
def get_settings(root: Path | None = None) -> Settings:
|
|
238
|
+
if root is None:
|
|
239
|
+
override = _SETTINGS_OVERRIDE.get()
|
|
240
|
+
if override is not None:
|
|
241
|
+
return override
|
|
226
242
|
load_local_env(root)
|
|
227
243
|
stored = _stored_settings()
|
|
228
244
|
home = Path(os.getenv("AA_HOME") or stored.get("app_home") or DEFAULT_APP_HOME).expanduser()
|
|
@@ -34,6 +34,12 @@ def _drop_local_pydantic_if_needed() -> dict[str, Any]:
|
|
|
34
34
|
`openai`.
|
|
35
35
|
"""
|
|
36
36
|
src = _repo_src_path()
|
|
37
|
+
# In an installed package, parents[1] is usually site-packages. Removing
|
|
38
|
+
# that path prevents the OpenAI SDK itself from being imported. Only strip
|
|
39
|
+
# the path when it looks like the old repo-local pydantic shim directory:
|
|
40
|
+
# it has pydantic but not the OpenAI SDK alongside it.
|
|
41
|
+
if not (Path(src) / "pydantic" / "__init__.py").exists() or (Path(src) / "openai").exists():
|
|
42
|
+
return {'removed_paths': [], 'removed_modules': {}}
|
|
37
43
|
removed_paths = []
|
|
38
44
|
for p in list(sys.path):
|
|
39
45
|
if Path(p or '.').resolve().as_posix() == Path(src).resolve().as_posix():
|
|
@@ -8,7 +8,7 @@ from datetime import datetime, timezone
|
|
|
8
8
|
from pathlib import Path
|
|
9
9
|
|
|
10
10
|
from .bundle_exporter import create_contribution_bundle
|
|
11
|
-
from .config import Settings, apprentice_agent_display_name, apprentice_agent_readiness_status, get_settings
|
|
11
|
+
from .config import Settings, apprentice_agent_display_name, apprentice_agent_readiness_status, get_settings, settings_override
|
|
12
12
|
from .io import read_json, write_json
|
|
13
13
|
from .loop import run_task
|
|
14
14
|
from .mentor_checkpoints import write_mentor_checkpoints
|
|
@@ -18,6 +18,13 @@ from .schemas import RawTaskRecord
|
|
|
18
18
|
from .session_events import append_session_event, backfill_session_event_task_ids, next_followup_index
|
|
19
19
|
|
|
20
20
|
|
|
21
|
+
class RunInterrupted(Exception):
|
|
22
|
+
def __init__(self, run_root: Path, message: str = "Run interrupted by user."):
|
|
23
|
+
super().__init__(message)
|
|
24
|
+
self.run_root = run_root
|
|
25
|
+
self.message = message
|
|
26
|
+
|
|
27
|
+
|
|
21
28
|
def slugify(text: str, fallback: str = "task") -> str:
|
|
22
29
|
value = re.sub(r"[^a-z0-9]+", "-", text.lower()).strip("-")
|
|
23
30
|
return (value[:64].strip("-") or fallback)
|
|
@@ -162,17 +169,15 @@ def _pre_attempt_checkpoint_callback(
|
|
|
162
169
|
progress_callback: ProgressCallback | None,
|
|
163
170
|
followup_index: int | None = None,
|
|
164
171
|
):
|
|
165
|
-
if not _human_checkpoint_mode(settings):
|
|
166
|
-
return None
|
|
167
|
-
|
|
168
172
|
def _callback(_pkg: Path) -> None:
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
173
|
+
if _human_checkpoint_mode(settings):
|
|
174
|
+
write_mentor_checkpoints(
|
|
175
|
+
run_root,
|
|
176
|
+
settings,
|
|
177
|
+
auto_approve=_checkpoint_auto_approve(settings),
|
|
178
|
+
stages=("task_intake", "rubric"),
|
|
179
|
+
preserve_interactive=followup_index is None,
|
|
180
|
+
)
|
|
176
181
|
append_progress_event(
|
|
177
182
|
run_root,
|
|
178
183
|
"apprentice_attempt_started",
|
|
@@ -188,6 +193,26 @@ def _pre_attempt_checkpoint_callback(
|
|
|
188
193
|
return _callback
|
|
189
194
|
|
|
190
195
|
|
|
196
|
+
def _append_mentor_preparation_started(
|
|
197
|
+
run_root: Path,
|
|
198
|
+
settings: Settings,
|
|
199
|
+
*,
|
|
200
|
+
progress_callback: ProgressCallback | None,
|
|
201
|
+
followup_index: int | None = None,
|
|
202
|
+
) -> None:
|
|
203
|
+
append_progress_event(
|
|
204
|
+
run_root,
|
|
205
|
+
"mentor_preparation_started",
|
|
206
|
+
run_id=run_root.name,
|
|
207
|
+
message=(f"Follow-up {followup_index} Mentor preparation started" if followup_index else "Mentor preparation started"),
|
|
208
|
+
current_loop=1,
|
|
209
|
+
maximum_improvement_loops=settings.max_improvement_loops,
|
|
210
|
+
phase="mentor_preparation",
|
|
211
|
+
metadata_json={"followup_index": followup_index} if followup_index else None,
|
|
212
|
+
callback=progress_callback,
|
|
213
|
+
)
|
|
214
|
+
|
|
215
|
+
|
|
191
216
|
def _revision_decision_callback(
|
|
192
217
|
run_root: Path,
|
|
193
218
|
settings: Settings,
|
|
@@ -300,6 +325,36 @@ def runner_for_settings(settings: Settings, override: str | None = None) -> str:
|
|
|
300
325
|
raise RuntimeError(f"Unsupported Apprentice Agent: {settings.worker_agent}")
|
|
301
326
|
|
|
302
327
|
|
|
328
|
+
def _loop_settings_for_run(settings: Settings) -> Settings:
|
|
329
|
+
if settings.mentor_mode != "expert_led":
|
|
330
|
+
return settings
|
|
331
|
+
return settings.model_copy(
|
|
332
|
+
update={
|
|
333
|
+
"rubric_mode": "deterministic",
|
|
334
|
+
"llm_task_intake_enabled": False,
|
|
335
|
+
"llm_rubric_generation_enabled": False,
|
|
336
|
+
"llm_evaluator_enabled": False,
|
|
337
|
+
"llm_grader_enabled": False,
|
|
338
|
+
"llm_verifier_enabled": False,
|
|
339
|
+
}
|
|
340
|
+
)
|
|
341
|
+
|
|
342
|
+
|
|
343
|
+
def _settings_for_session(settings: Settings, session: dict) -> Settings:
|
|
344
|
+
updates = {}
|
|
345
|
+
if session.get("mentor_mode"):
|
|
346
|
+
updates["mentor_mode"] = session["mentor_mode"]
|
|
347
|
+
if session.get("sensitive_info_masking"):
|
|
348
|
+
updates["sensitive_info_masking"] = session["sensitive_info_masking"]
|
|
349
|
+
if session.get("model_provider"):
|
|
350
|
+
updates["model_provider"] = session["model_provider"]
|
|
351
|
+
if session.get("max_improvement_loops"):
|
|
352
|
+
loops = int(session["max_improvement_loops"])
|
|
353
|
+
updates["max_improvement_loops"] = loops
|
|
354
|
+
updates["max_iterations"] = loops
|
|
355
|
+
return settings.model_copy(update=updates) if updates else settings
|
|
356
|
+
|
|
357
|
+
|
|
303
358
|
def _session_status_for_package(pkg: Path) -> tuple[str, str | None]:
|
|
304
359
|
actual_paths = sorted((pkg / "attempts").glob("*/actual_outputs.json"))
|
|
305
360
|
if not actual_paths:
|
|
@@ -496,34 +551,44 @@ def run_prompt_task(
|
|
|
496
551
|
**_experience_session_fields(experience_pack_refs),
|
|
497
552
|
},
|
|
498
553
|
)
|
|
499
|
-
|
|
554
|
+
_append_mentor_preparation_started(run_root, settings, progress_callback=progress_callback)
|
|
555
|
+
revision_decider, checkpoint_state = _revision_decision_callback(
|
|
556
|
+
run_root,
|
|
557
|
+
settings,
|
|
558
|
+
progress_callback=progress_callback,
|
|
559
|
+
)
|
|
560
|
+
try:
|
|
561
|
+
with settings_override(_loop_settings_for_run(settings)):
|
|
562
|
+
pkg = run_task(
|
|
563
|
+
raw,
|
|
564
|
+
run_root,
|
|
565
|
+
runner=runner_for_settings(settings, runner),
|
|
566
|
+
max_iterations=settings.max_improvement_loops,
|
|
567
|
+
pre_attempt_callback=_pre_attempt_checkpoint_callback(
|
|
568
|
+
run_root,
|
|
569
|
+
settings,
|
|
570
|
+
progress_callback=progress_callback,
|
|
571
|
+
),
|
|
572
|
+
revision_decision_callback=revision_decider,
|
|
573
|
+
)
|
|
574
|
+
except KeyboardInterrupt as exc:
|
|
500
575
|
append_progress_event(
|
|
501
576
|
run_root,
|
|
502
|
-
"
|
|
577
|
+
"run_interrupted",
|
|
503
578
|
run_id=run_id,
|
|
504
|
-
message="
|
|
579
|
+
message="Run interrupted by user.",
|
|
505
580
|
current_loop=1,
|
|
506
581
|
maximum_improvement_loops=settings.max_improvement_loops,
|
|
507
|
-
phase="
|
|
582
|
+
phase="interrupted",
|
|
583
|
+
run_status="partial",
|
|
584
|
+
task_status="partial",
|
|
585
|
+
operational_error="Run interrupted by user.",
|
|
508
586
|
callback=progress_callback,
|
|
509
587
|
)
|
|
510
|
-
|
|
511
|
-
|
|
512
|
-
|
|
513
|
-
|
|
514
|
-
)
|
|
515
|
-
pkg = run_task(
|
|
516
|
-
raw,
|
|
517
|
-
run_root,
|
|
518
|
-
runner=runner_for_settings(settings, runner),
|
|
519
|
-
max_iterations=settings.max_improvement_loops,
|
|
520
|
-
pre_attempt_callback=_pre_attempt_checkpoint_callback(
|
|
521
|
-
run_root,
|
|
522
|
-
settings,
|
|
523
|
-
progress_callback=progress_callback,
|
|
524
|
-
),
|
|
525
|
-
revision_decision_callback=revision_decider,
|
|
526
|
-
)
|
|
588
|
+
session = read_json(run_root / "session.json") if (run_root / "session.json").exists() else {}
|
|
589
|
+
session.update({"run_status": "partial", "task_status": "partial", "status_reason": "Run interrupted by user."})
|
|
590
|
+
write_json(run_root / "session.json", {k: v for k, v in session.items() if v is not None})
|
|
591
|
+
raise RunInterrupted(run_root) from exc
|
|
527
592
|
manifest = read_json(pkg / "package_manifest.json") if (pkg / "package_manifest.json").exists() else {}
|
|
528
593
|
run_status, partial_reason = _session_status_for_package(pkg)
|
|
529
594
|
actual_iterations = int(manifest.get("actual_iterations") or 1)
|
|
@@ -771,6 +836,8 @@ def continue_session(
|
|
|
771
836
|
run_root = run_root_for(run_id, settings)
|
|
772
837
|
if not run_root.exists():
|
|
773
838
|
raise FileNotFoundError(f"Run not found: {run_id}")
|
|
839
|
+
session = read_json(run_root / "session.json") if (run_root / "session.json").exists() else {}
|
|
840
|
+
settings = _settings_for_session(settings, session)
|
|
774
841
|
followup_index = next_followup_index(run_root)
|
|
775
842
|
task_id = _session_task_id(run_root)
|
|
776
843
|
append_progress_event(
|
|
@@ -836,7 +903,6 @@ def continue_session(
|
|
|
836
903
|
)
|
|
837
904
|
bundle: Path | None = None
|
|
838
905
|
if run_loop:
|
|
839
|
-
session = read_json(run_root / "session.json") if (run_root / "session.json").exists() else {}
|
|
840
906
|
original = session.get("task_instruction") or ""
|
|
841
907
|
combined = (
|
|
842
908
|
"Continue the same Agent Apprenticeship session.\n\n"
|
|
@@ -848,37 +914,32 @@ def continue_session(
|
|
|
848
914
|
combined,
|
|
849
915
|
_asset_abs_refs(run_root, asset_refs),
|
|
850
916
|
)
|
|
851
|
-
|
|
852
|
-
append_progress_event(
|
|
853
|
-
run_root,
|
|
854
|
-
"apprentice_attempt_started",
|
|
855
|
-
run_id=run_root.name,
|
|
856
|
-
message=f"Follow-up {followup_index} Apprentice attempt started",
|
|
857
|
-
current_loop=1,
|
|
858
|
-
maximum_improvement_loops=settings.max_improvement_loops,
|
|
859
|
-
phase="apprentice_attempt",
|
|
860
|
-
metadata_json={"followup_index": followup_index},
|
|
861
|
-
callback=progress_callback,
|
|
862
|
-
)
|
|
863
|
-
revision_decider, checkpoint_state = _revision_decision_callback(
|
|
917
|
+
_append_mentor_preparation_started(
|
|
864
918
|
run_root,
|
|
865
919
|
settings,
|
|
866
920
|
progress_callback=progress_callback,
|
|
867
921
|
followup_index=followup_index,
|
|
868
922
|
)
|
|
869
|
-
|
|
870
|
-
raw,
|
|
923
|
+
revision_decider, checkpoint_state = _revision_decision_callback(
|
|
871
924
|
run_root,
|
|
872
|
-
|
|
873
|
-
|
|
874
|
-
|
|
875
|
-
run_root,
|
|
876
|
-
settings,
|
|
877
|
-
progress_callback=progress_callback,
|
|
878
|
-
followup_index=followup_index,
|
|
879
|
-
),
|
|
880
|
-
revision_decision_callback=revision_decider,
|
|
925
|
+
settings,
|
|
926
|
+
progress_callback=progress_callback,
|
|
927
|
+
followup_index=followup_index,
|
|
881
928
|
)
|
|
929
|
+
with settings_override(_loop_settings_for_run(settings)):
|
|
930
|
+
pkg = run_task(
|
|
931
|
+
raw,
|
|
932
|
+
run_root,
|
|
933
|
+
runner=runner_for_settings(settings, runner),
|
|
934
|
+
max_iterations=settings.max_improvement_loops,
|
|
935
|
+
pre_attempt_callback=_pre_attempt_checkpoint_callback(
|
|
936
|
+
run_root,
|
|
937
|
+
settings,
|
|
938
|
+
progress_callback=progress_callback,
|
|
939
|
+
followup_index=followup_index,
|
|
940
|
+
),
|
|
941
|
+
revision_decision_callback=revision_decider,
|
|
942
|
+
)
|
|
882
943
|
manifest = read_json(pkg / "package_manifest.json") if (pkg / "package_manifest.json").exists() else {}
|
|
883
944
|
status, _reason = _session_status_for_package(pkg)
|
|
884
945
|
actual_iterations = int(manifest.get("actual_iterations") or 1)
|
|
@@ -4,8 +4,8 @@ from pathlib import Path
|
|
|
4
4
|
from pydantic import BaseModel
|
|
5
5
|
from .schemas import RawTaskRecord, TaskIntakeSpec, TaskIntakeQualityReport
|
|
6
6
|
from .config import get_settings
|
|
7
|
-
from .io import read_json
|
|
8
|
-
from .openai_structured import get_model_provider_status, run_structured_role
|
|
7
|
+
from .io import read_json, write_json
|
|
8
|
+
from .openai_structured import extract_json_object, get_model_provider_status, run_structured_role
|
|
9
9
|
from .public_sanitizer import sanitize_public_obj, sha256_text
|
|
10
10
|
|
|
11
11
|
class LLMTaskIntakeOutput(BaseModel):
|
|
@@ -32,6 +32,46 @@ def _task_record_for_intake(raw: RawTaskRecord) -> dict:
|
|
|
32
32
|
data=raw.model_dump(mode='json')
|
|
33
33
|
return sanitize_public_obj(_drop_source_fields(data))
|
|
34
34
|
|
|
35
|
+
def _sanitize_intake_output_obj(obj):
|
|
36
|
+
return sanitize_public_obj(_drop_source_fields(obj))
|
|
37
|
+
|
|
38
|
+
def _sanitize_intake_role_artifacts(role_dir: Path) -> None:
|
|
39
|
+
for name in ("parsed_output.json", "raw_parsed_output.json"):
|
|
40
|
+
path = role_dir / name
|
|
41
|
+
if path.exists():
|
|
42
|
+
try:
|
|
43
|
+
write_json(path, _sanitize_intake_output_obj(read_json(path)))
|
|
44
|
+
except Exception:
|
|
45
|
+
pass
|
|
46
|
+
for name in ("raw_output.txt", "raw_output.retry.txt"):
|
|
47
|
+
path = role_dir / name
|
|
48
|
+
if not path.exists():
|
|
49
|
+
continue
|
|
50
|
+
try:
|
|
51
|
+
parsed = extract_json_object(path.read_text(errors="ignore"))
|
|
52
|
+
path.write_text(json.dumps(_sanitize_intake_output_obj(parsed), indent=2, sort_keys=True) + "\n")
|
|
53
|
+
except Exception:
|
|
54
|
+
text = path.read_text(errors="ignore")
|
|
55
|
+
for key in SOURCE_FIELD_KEYS:
|
|
56
|
+
text = text.replace(key, "reference_id")
|
|
57
|
+
path.write_text(text)
|
|
58
|
+
|
|
59
|
+
def _sanitize_spec_and_quality(
|
|
60
|
+
spec: TaskIntakeSpec,
|
|
61
|
+
quality: TaskIntakeQualityReport | None = None,
|
|
62
|
+
) -> tuple[TaskIntakeSpec, TaskIntakeQualityReport | None]:
|
|
63
|
+
spec_updates = {
|
|
64
|
+
"metadata_json": _sanitize_intake_output_obj(spec.metadata_json or {}),
|
|
65
|
+
"expected_pay": None,
|
|
66
|
+
"expected_apprentice_pay": None,
|
|
67
|
+
}
|
|
68
|
+
sanitized_quality = None
|
|
69
|
+
if quality is not None:
|
|
70
|
+
sanitized_quality = quality.model_copy(
|
|
71
|
+
update={"metadata_json": _sanitize_intake_output_obj(quality.metadata_json or {})}
|
|
72
|
+
)
|
|
73
|
+
return spec.model_copy(update=spec_updates), sanitized_quality
|
|
74
|
+
|
|
35
75
|
def direct_task_sheet_metadata(raw: RawTaskRecord) -> dict[str, object]:
|
|
36
76
|
payload=raw.raw_payload or {}
|
|
37
77
|
expected_economic_value = raw.expected_economic_value or payload.get('expected_economic_value') or raw.expected_pay or payload.get('expected_pay')
|
|
@@ -94,12 +134,14 @@ def task_intake(raw: RawTaskRecord, role_root: Path | None=None) -> tuple[TaskIn
|
|
|
94
134
|
model_override=settings.llm_task_intake_model if provider == 'openai' else None
|
|
95
135
|
rr=run_structured_role('intake_agent', prompt, LLMTaskIntakeOutput, role_root/'intake_agent', allow_fallback=settings.allow_deterministic_eval_fallback, model_override=model_override, normalizer_context={'task_id': raw.raw_task_id.replace('raw_','task_'), 'task_title': raw.raw_title, 'task_instruction': raw.raw_description, 'model': model_override or settings.model_provider_model, 'provider':provider})
|
|
96
136
|
if rr.live_call_ok and rr.structured_output_validation_ok:
|
|
137
|
+
_sanitize_intake_role_artifacts(role_root/'intake_agent')
|
|
97
138
|
parsed=read_json(role_root/'intake_agent/parsed_output.json')
|
|
98
139
|
spec=TaskIntakeSpec.model_validate(parsed['task_intake_spec'])
|
|
99
140
|
spec=apply_direct_task_sheet_metadata(spec, raw)
|
|
100
141
|
q=TaskIntakeQualityReport.model_validate(parsed['task_intake_quality_report'])
|
|
101
142
|
spec.metadata_json.update({'intake_source':'llm','provider':rr.provider,'model':rr.model,'llm_prompt_ref_internal':str(role_root/'intake_agent/prompt.md'),'llm_response_ref_internal':str(role_root/'intake_agent/raw_output.txt'),'prompt_template_id':'task_intake_agent_v0','prompt_template_version':'0.1','prompt_hash':sha256_text(prompt),'public_response_summary':'Model-assisted task intake generated structured task spec.'})
|
|
102
143
|
q.metadata_json.update({'intake_source':'llm','role_result_ref_internal':str(role_root/'intake_agent/role_result.json')})
|
|
144
|
+
spec, q = _sanitize_spec_and_quality(spec, q)
|
|
103
145
|
return spec,q
|
|
104
146
|
if settings.rubric_mode == 'llm_required' or settings.llm_fail_closed:
|
|
105
147
|
raise RuntimeError(rr.error_message or 'Model task intake failed')
|
|
@@ -109,4 +151,5 @@ def task_intake(raw: RawTaskRecord, role_root: Path | None=None) -> tuple[TaskIn
|
|
|
109
151
|
spec,q=deterministic_intake(raw)
|
|
110
152
|
if settings.llm_task_intake_enabled and _mentor_provider_can_attempt():
|
|
111
153
|
spec.metadata_json.update({'intake_source':'deterministic_fallback','llm_unavailable':True,'provider':_mentor_provider_id()})
|
|
154
|
+
spec, q = _sanitize_spec_and_quality(spec, q)
|
|
112
155
|
return spec,q
|