npm - agent-apprenticeship - Versions diffs - 0.1.0 → 0.1.2 - Mend

agent-apprenticeship 0.1.0 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

package/README.md +6 -15
package/bin/agent-apprenticeship.js +92 -13
package/package.json +1 -1
package/pyproject.toml +1 -1
package/src/agent_apprenticeship_trace/__init__.py +1 -1
package/src/agent_apprenticeship_trace/apprentice_adapters.py +2 -1
package/src/agent_apprenticeship_trace/bundle_exporter.py +63 -3
package/src/agent_apprenticeship_trace/cli.py +392 -70
package/src/agent_apprenticeship_trace/codex_runner.py +46 -3
package/src/agent_apprenticeship_trace/config.py +16 -0
package/src/agent_apprenticeship_trace/openai_structured.py +6 -0
package/src/agent_apprenticeship_trace/public_run.py +118 -57
package/src/agent_apprenticeship_trace/task_intake.py +45 -2

package/src/agent_apprenticeship_trace/codex_runner.py CHANGED Viewed

@@ -1,4 +1,6 @@
 from __future__ import annotations
+import os
+import signal
 import shutil, subprocess
 from pathlib import Path
 import re
@@ -16,6 +18,35 @@ class AttemptResult(dict): pass
 CODEX_TRUST_RETRY_MESSAGE = "Codex refused to run because the workspace is not a trusted Git directory. Retrying with --skip-git-repo-check if supported."
+def _run_with_process_group_timeout(command, *, cwd: Path, timeout: int | None, shell: bool = False) -> subprocess.CompletedProcess:
+    process = subprocess.Popen(
+        command,
+        cwd=cwd,
+        text=True,
+        stdout=subprocess.PIPE,
+        stderr=subprocess.PIPE,
+        shell=shell,
+        start_new_session=True,
+    )
+    try:
+        stdout, stderr = process.communicate(timeout=timeout)
+        return subprocess.CompletedProcess(command, process.returncode, stdout, stderr)
+    except subprocess.TimeoutExpired as exc:
+        try:
+            os.killpg(process.pid, signal.SIGTERM)
+        except Exception:
+            process.kill()
+        try:
+            stdout, stderr = process.communicate(timeout=5)
+        except subprocess.TimeoutExpired:
+            try:
+                os.killpg(process.pid, signal.SIGKILL)
+            except Exception:
+                process.kill()
+            stdout, stderr = process.communicate()
+        raise subprocess.TimeoutExpired(command, timeout, output=stdout, stderr=stderr) from exc
 def _attempt_dir(package_root: Path, attempt_kind: str) -> Path:
     p=package_root/'attempts'/attempt_kind
     (p/'artifacts').mkdir(parents=True, exist_ok=True)
@@ -292,7 +323,7 @@ def run_codex_attempt(package_root: Path, raw: RawTaskRecord, spec: TaskIntakeSp
     stdout=''
     stderr=''
     try:
-        cp=subprocess.run(cmd, cwd=d, text=True, capture_output=True, timeout=timeout)
+        cp=_run_with_process_group_timeout(cmd, cwd=d, timeout=timeout)
         returncode=cp.returncode
         stdout=cp.stdout or ''
         stderr=cp.stderr or ''
@@ -308,7 +339,7 @@ def run_codex_attempt(package_root: Path, raw: RawTaskRecord, spec: TaskIntakeSp
                 skip_git_repo_check_supported=True,
                 ask_for_approval_supported=ask_supported,
             )
-            cp=subprocess.run(retry_cmd, cwd=d, text=True, capture_output=True, timeout=timeout)
+            cp=_run_with_process_group_timeout(retry_cmd, cwd=d, timeout=timeout)
             cmd=retry_cmd
             returncode=cp.returncode
             stdout=(stdout or '') + "\n" + (cp.stdout or '')
@@ -338,6 +369,18 @@ def run_codex_attempt(package_root: Path, raw: RawTaskRecord, spec: TaskIntakeSp
     if contract_diagnostics:
         actual.metadata_json['apprentice_agent_contract_diagnostics']=contract_diagnostics
     op_error = _apprentice_operational_error(run_error, stdout, stderr, returncode)
+    if (
+        op_error
+        and isinstance(run_error, subprocess.TimeoutExpired)
+        and trace_valid
+        and actual.status == 'success'
+    ):
+        actual.metadata_json['apprentice_agent_warning'] = (
+            'Apprentice Agent process timed out after producing required outputs; '
+            'the produced trace and artifacts were preserved.'
+        )
+        trace.metadata_json['apprentice_agent_warning'] = actual.metadata_json['apprentice_agent_warning']
+        op_error = None
     if op_error and returncode not in (None, 0) and trace_valid and actual.status == 'success':
         op_error = f"Apprentice Agent exited nonzero after producing required outputs (exit code {returncode})."
     if op_error or not trace_valid:
@@ -387,7 +430,7 @@ def run_custom_attempt(package_root: Path, raw: RawTaskRecord, spec: TaskIntakeS
     run_error=None
     cp=None
     try:
-        cp=subprocess.run(command, cwd=d, text=True, capture_output=True, timeout=timeout or settings.task_timeout_seconds, shell=True)
+        cp=_run_with_process_group_timeout(command, cwd=d, timeout=timeout or settings.task_timeout_seconds, shell=True)
         stdout=redact_secrets(cp.stdout or '')
         stderr=redact_secrets(cp.stderr or '')
         (d/'stdout.txt').write_text(stdout)

package/src/agent_apprenticeship_trace/config.py CHANGED Viewed

@@ -3,6 +3,8 @@ from __future__ import annotations
 import os
 import shlex
 import shutil
+from contextlib import contextmanager
+from contextvars import ContextVar
 from pathlib import Path
 from typing import Any, Literal
@@ -28,6 +30,7 @@ DATA_SHARING_LEVELS: tuple[str, ...] = ("standard", "full-context")
 DEFAULT_APP_HOME = Path("~/.agent-apprenticeship").expanduser()
 DEFAULT_PUBLIC_ECOSYSTEM_REPO = "Forsy-AI/agent-apprenticeship"
 DEFAULT_PUBLIC_ECOSYSTEM_URL = f"https://github.com/{DEFAULT_PUBLIC_ECOSYSTEM_REPO}"
+_SETTINGS_OVERRIDE: ContextVar[Any] = ContextVar("agent_apprenticeship_settings_override", default=None)
 def normalize_mentor_mode(value: str | None, default: str = "model_assisted") -> str:
@@ -183,6 +186,15 @@ class Settings(BaseModel):
     llm_rubric_generation_enabled: bool = True
+@contextmanager
+def settings_override(settings: Settings):
+    token = _SETTINGS_OVERRIDE.set(settings)
+    try:
+        yield
+    finally:
+        _SETTINGS_OVERRIDE.reset(token)
 def app_home_from_env() -> Path:
     return Path(os.getenv("AA_HOME") or DEFAULT_APP_HOME).expanduser()
@@ -223,6 +235,10 @@ def default_settings(app_home: Path | None = None) -> Settings:
 def get_settings(root: Path | None = None) -> Settings:
+    if root is None:
+        override = _SETTINGS_OVERRIDE.get()
+        if override is not None:
+            return override
     load_local_env(root)
     stored = _stored_settings()
     home = Path(os.getenv("AA_HOME") or stored.get("app_home") or DEFAULT_APP_HOME).expanduser()

package/src/agent_apprenticeship_trace/openai_structured.py CHANGED Viewed

@@ -34,6 +34,12 @@ def _drop_local_pydantic_if_needed() -> dict[str, Any]:
     `openai`.
     """
     src = _repo_src_path()
+    # In an installed package, parents[1] is usually site-packages. Removing
+    # that path prevents the OpenAI SDK itself from being imported. Only strip
+    # the path when it looks like the old repo-local pydantic shim directory:
+    # it has pydantic but not the OpenAI SDK alongside it.
+    if not (Path(src) / "pydantic" / "__init__.py").exists() or (Path(src) / "openai").exists():
+        return {'removed_paths': [], 'removed_modules': {}}
     removed_paths = []
     for p in list(sys.path):
         if Path(p or '.').resolve().as_posix() == Path(src).resolve().as_posix():

package/src/agent_apprenticeship_trace/public_run.py CHANGED Viewed

@@ -8,7 +8,7 @@ from datetime import datetime, timezone
 from pathlib import Path
 from .bundle_exporter import create_contribution_bundle
-from .config import Settings, apprentice_agent_display_name, apprentice_agent_readiness_status, get_settings
+from .config import Settings, apprentice_agent_display_name, apprentice_agent_readiness_status, get_settings, settings_override
 from .io import read_json, write_json
 from .loop import run_task
 from .mentor_checkpoints import write_mentor_checkpoints
@@ -18,6 +18,13 @@ from .schemas import RawTaskRecord
 from .session_events import append_session_event, backfill_session_event_task_ids, next_followup_index
+class RunInterrupted(Exception):
+    def __init__(self, run_root: Path, message: str = "Run interrupted by user."):
+        super().__init__(message)
+        self.run_root = run_root
+        self.message = message
 def slugify(text: str, fallback: str = "task") -> str:
     value = re.sub(r"[^a-z0-9]+", "-", text.lower()).strip("-")
     return (value[:64].strip("-") or fallback)
@@ -162,17 +169,15 @@ def _pre_attempt_checkpoint_callback(
     progress_callback: ProgressCallback | None,
     followup_index: int | None = None,
 ):
-    if not _human_checkpoint_mode(settings):
-        return None
     def _callback(_pkg: Path) -> None:
-        write_mentor_checkpoints(
-            run_root,
-            settings,
-            auto_approve=_checkpoint_auto_approve(settings),
-            stages=("task_intake", "rubric"),
-            preserve_interactive=followup_index is None,
-        )
+        if _human_checkpoint_mode(settings):
+            write_mentor_checkpoints(
+                run_root,
+                settings,
+                auto_approve=_checkpoint_auto_approve(settings),
+                stages=("task_intake", "rubric"),
+                preserve_interactive=followup_index is None,
+            )
         append_progress_event(
             run_root,
             "apprentice_attempt_started",
@@ -188,6 +193,26 @@ def _pre_attempt_checkpoint_callback(
     return _callback
+def _append_mentor_preparation_started(
+    run_root: Path,
+    settings: Settings,
+    *,
+    progress_callback: ProgressCallback | None,
+    followup_index: int | None = None,
+) -> None:
+    append_progress_event(
+        run_root,
+        "mentor_preparation_started",
+        run_id=run_root.name,
+        message=(f"Follow-up {followup_index} Mentor preparation started" if followup_index else "Mentor preparation started"),
+        current_loop=1,
+        maximum_improvement_loops=settings.max_improvement_loops,
+        phase="mentor_preparation",
+        metadata_json={"followup_index": followup_index} if followup_index else None,
+        callback=progress_callback,
+    )
 def _revision_decision_callback(
     run_root: Path,
     settings: Settings,
@@ -300,6 +325,36 @@ def runner_for_settings(settings: Settings, override: str | None = None) -> str:
     raise RuntimeError(f"Unsupported Apprentice Agent: {settings.worker_agent}")
+def _loop_settings_for_run(settings: Settings) -> Settings:
+    if settings.mentor_mode != "expert_led":
+        return settings
+    return settings.model_copy(
+        update={
+            "rubric_mode": "deterministic",
+            "llm_task_intake_enabled": False,
+            "llm_rubric_generation_enabled": False,
+            "llm_evaluator_enabled": False,
+            "llm_grader_enabled": False,
+            "llm_verifier_enabled": False,
+        }
+    )
+def _settings_for_session(settings: Settings, session: dict) -> Settings:
+    updates = {}
+    if session.get("mentor_mode"):
+        updates["mentor_mode"] = session["mentor_mode"]
+    if session.get("sensitive_info_masking"):
+        updates["sensitive_info_masking"] = session["sensitive_info_masking"]
+    if session.get("model_provider"):
+        updates["model_provider"] = session["model_provider"]
+    if session.get("max_improvement_loops"):
+        loops = int(session["max_improvement_loops"])
+        updates["max_improvement_loops"] = loops
+        updates["max_iterations"] = loops
+    return settings.model_copy(update=updates) if updates else settings
 def _session_status_for_package(pkg: Path) -> tuple[str, str | None]:
     actual_paths = sorted((pkg / "attempts").glob("*/actual_outputs.json"))
     if not actual_paths:
@@ -496,34 +551,44 @@ def run_prompt_task(
             **_experience_session_fields(experience_pack_refs),
         },
     )
-    if not _human_checkpoint_mode(settings):
+    _append_mentor_preparation_started(run_root, settings, progress_callback=progress_callback)
+    revision_decider, checkpoint_state = _revision_decision_callback(
+        run_root,
+        settings,
+        progress_callback=progress_callback,
+    )
+    try:
+        with settings_override(_loop_settings_for_run(settings)):
+            pkg = run_task(
+                raw,
+                run_root,
+                runner=runner_for_settings(settings, runner),
+                max_iterations=settings.max_improvement_loops,
+                pre_attempt_callback=_pre_attempt_checkpoint_callback(
+                    run_root,
+                    settings,
+                    progress_callback=progress_callback,
+                ),
+                revision_decision_callback=revision_decider,
+            )
+    except KeyboardInterrupt as exc:
         append_progress_event(
             run_root,
-            "apprentice_attempt_started",
+            "run_interrupted",
             run_id=run_id,
-            message="Apprentice attempt started",
+            message="Run interrupted by user.",
             current_loop=1,
             maximum_improvement_loops=settings.max_improvement_loops,
-            phase="apprentice_attempt",
+            phase="interrupted",
+            run_status="partial",
+            task_status="partial",
+            operational_error="Run interrupted by user.",
             callback=progress_callback,
         )
-    revision_decider, checkpoint_state = _revision_decision_callback(
-        run_root,
-        settings,
-        progress_callback=progress_callback,
-    )
-    pkg = run_task(
-        raw,
-        run_root,
-        runner=runner_for_settings(settings, runner),
-        max_iterations=settings.max_improvement_loops,
-        pre_attempt_callback=_pre_attempt_checkpoint_callback(
-            run_root,
-            settings,
-            progress_callback=progress_callback,
-        ),
-        revision_decision_callback=revision_decider,
-    )
+        session = read_json(run_root / "session.json") if (run_root / "session.json").exists() else {}
+        session.update({"run_status": "partial", "task_status": "partial", "status_reason": "Run interrupted by user."})
+        write_json(run_root / "session.json", {k: v for k, v in session.items() if v is not None})
+        raise RunInterrupted(run_root) from exc
     manifest = read_json(pkg / "package_manifest.json") if (pkg / "package_manifest.json").exists() else {}
     run_status, partial_reason = _session_status_for_package(pkg)
     actual_iterations = int(manifest.get("actual_iterations") or 1)
@@ -771,6 +836,8 @@ def continue_session(
     run_root = run_root_for(run_id, settings)
     if not run_root.exists():
         raise FileNotFoundError(f"Run not found: {run_id}")
+    session = read_json(run_root / "session.json") if (run_root / "session.json").exists() else {}
+    settings = _settings_for_session(settings, session)
     followup_index = next_followup_index(run_root)
     task_id = _session_task_id(run_root)
     append_progress_event(
@@ -836,7 +903,6 @@ def continue_session(
         )
     bundle: Path | None = None
     if run_loop:
-        session = read_json(run_root / "session.json") if (run_root / "session.json").exists() else {}
         original = session.get("task_instruction") or ""
         combined = (
             "Continue the same Agent Apprenticeship session.\n\n"
@@ -848,37 +914,32 @@ def continue_session(
             combined,
             _asset_abs_refs(run_root, asset_refs),
         )
-        if not _human_checkpoint_mode(settings):
-            append_progress_event(
-                run_root,
-                "apprentice_attempt_started",
-                run_id=run_root.name,
-                message=f"Follow-up {followup_index} Apprentice attempt started",
-                current_loop=1,
-                maximum_improvement_loops=settings.max_improvement_loops,
-                phase="apprentice_attempt",
-                metadata_json={"followup_index": followup_index},
-                callback=progress_callback,
-            )
-        revision_decider, checkpoint_state = _revision_decision_callback(
+        _append_mentor_preparation_started(
             run_root,
             settings,
             progress_callback=progress_callback,
             followup_index=followup_index,
         )
-        pkg = run_task(
-            raw,
+        revision_decider, checkpoint_state = _revision_decision_callback(
             run_root,
-            runner=runner_for_settings(settings, runner),
-            max_iterations=settings.max_improvement_loops,
-            pre_attempt_callback=_pre_attempt_checkpoint_callback(
-                run_root,
-                settings,
-                progress_callback=progress_callback,
-                followup_index=followup_index,
-            ),
-            revision_decision_callback=revision_decider,
+            settings,
+            progress_callback=progress_callback,
+            followup_index=followup_index,
         )
+        with settings_override(_loop_settings_for_run(settings)):
+            pkg = run_task(
+                raw,
+                run_root,
+                runner=runner_for_settings(settings, runner),
+                max_iterations=settings.max_improvement_loops,
+                pre_attempt_callback=_pre_attempt_checkpoint_callback(
+                    run_root,
+                    settings,
+                    progress_callback=progress_callback,
+                    followup_index=followup_index,
+                ),
+                revision_decision_callback=revision_decider,
+            )
         manifest = read_json(pkg / "package_manifest.json") if (pkg / "package_manifest.json").exists() else {}
         status, _reason = _session_status_for_package(pkg)
         actual_iterations = int(manifest.get("actual_iterations") or 1)

package/src/agent_apprenticeship_trace/task_intake.py CHANGED Viewed

@@ -4,8 +4,8 @@ from pathlib import Path
 from pydantic import BaseModel
 from .schemas import RawTaskRecord, TaskIntakeSpec, TaskIntakeQualityReport
 from .config import get_settings
-from .io import read_json
-from .openai_structured import get_model_provider_status, run_structured_role
+from .io import read_json, write_json
+from .openai_structured import extract_json_object, get_model_provider_status, run_structured_role
 from .public_sanitizer import sanitize_public_obj, sha256_text
 class LLMTaskIntakeOutput(BaseModel):
@@ -32,6 +32,46 @@ def _task_record_for_intake(raw: RawTaskRecord) -> dict:
     data=raw.model_dump(mode='json')
     return sanitize_public_obj(_drop_source_fields(data))
+def _sanitize_intake_output_obj(obj):
+    return sanitize_public_obj(_drop_source_fields(obj))
+def _sanitize_intake_role_artifacts(role_dir: Path) -> None:
+    for name in ("parsed_output.json", "raw_parsed_output.json"):
+        path = role_dir / name
+        if path.exists():
+            try:
+                write_json(path, _sanitize_intake_output_obj(read_json(path)))
+            except Exception:
+                pass
+    for name in ("raw_output.txt", "raw_output.retry.txt"):
+        path = role_dir / name
+        if not path.exists():
+            continue
+        try:
+            parsed = extract_json_object(path.read_text(errors="ignore"))
+            path.write_text(json.dumps(_sanitize_intake_output_obj(parsed), indent=2, sort_keys=True) + "\n")
+        except Exception:
+            text = path.read_text(errors="ignore")
+            for key in SOURCE_FIELD_KEYS:
+                text = text.replace(key, "reference_id")
+            path.write_text(text)
+def _sanitize_spec_and_quality(
+    spec: TaskIntakeSpec,
+    quality: TaskIntakeQualityReport | None = None,
+) -> tuple[TaskIntakeSpec, TaskIntakeQualityReport | None]:
+    spec_updates = {
+        "metadata_json": _sanitize_intake_output_obj(spec.metadata_json or {}),
+        "expected_pay": None,
+        "expected_apprentice_pay": None,
+    }
+    sanitized_quality = None
+    if quality is not None:
+        sanitized_quality = quality.model_copy(
+            update={"metadata_json": _sanitize_intake_output_obj(quality.metadata_json or {})}
+        )
+    return spec.model_copy(update=spec_updates), sanitized_quality
 def direct_task_sheet_metadata(raw: RawTaskRecord) -> dict[str, object]:
     payload=raw.raw_payload or {}
     expected_economic_value = raw.expected_economic_value or payload.get('expected_economic_value') or raw.expected_pay or payload.get('expected_pay')
@@ -94,12 +134,14 @@ def task_intake(raw: RawTaskRecord, role_root: Path | None=None) -> tuple[TaskIn
             model_override=settings.llm_task_intake_model if provider == 'openai' else None
             rr=run_structured_role('intake_agent', prompt, LLMTaskIntakeOutput, role_root/'intake_agent', allow_fallback=settings.allow_deterministic_eval_fallback, model_override=model_override, normalizer_context={'task_id': raw.raw_task_id.replace('raw_','task_'), 'task_title': raw.raw_title, 'task_instruction': raw.raw_description, 'model': model_override or settings.model_provider_model, 'provider':provider})
             if rr.live_call_ok and rr.structured_output_validation_ok:
+                _sanitize_intake_role_artifacts(role_root/'intake_agent')
                 parsed=read_json(role_root/'intake_agent/parsed_output.json')
                 spec=TaskIntakeSpec.model_validate(parsed['task_intake_spec'])
                 spec=apply_direct_task_sheet_metadata(spec, raw)
                 q=TaskIntakeQualityReport.model_validate(parsed['task_intake_quality_report'])
                 spec.metadata_json.update({'intake_source':'llm','provider':rr.provider,'model':rr.model,'llm_prompt_ref_internal':str(role_root/'intake_agent/prompt.md'),'llm_response_ref_internal':str(role_root/'intake_agent/raw_output.txt'),'prompt_template_id':'task_intake_agent_v0','prompt_template_version':'0.1','prompt_hash':sha256_text(prompt),'public_response_summary':'Model-assisted task intake generated structured task spec.'})
                 q.metadata_json.update({'intake_source':'llm','role_result_ref_internal':str(role_root/'intake_agent/role_result.json')})
+                spec, q = _sanitize_spec_and_quality(spec, q)
                 return spec,q
             if settings.rubric_mode == 'llm_required' or settings.llm_fail_closed:
                 raise RuntimeError(rr.error_message or 'Model task intake failed')
@@ -109,4 +151,5 @@ def task_intake(raw: RawTaskRecord, role_root: Path | None=None) -> tuple[TaskIn
     spec,q=deterministic_intake(raw)
     if settings.llm_task_intake_enabled and _mentor_provider_can_attempt():
         spec.metadata_json.update({'intake_source':'deterministic_fallback','llm_unavailable':True,'provider':_mentor_provider_id()})
+    spec, q = _sanitize_spec_and_quality(spec, q)
     return spec,q