npm - agent-apprenticeship - Versions diffs - 0.1.0 - Mend

agent-apprenticeship 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (47) hide show

package/LICENSE +21 -0
package/README.md +217 -0
package/bin/agent-apprenticeship.js +131 -0
package/package.json +30 -0
package/pyproject.toml +23 -0
package/src/agent_apprenticeship_trace/__init__.py +2 -0
package/src/agent_apprenticeship_trace/actual_outputs_normalizer.py +240 -0
package/src/agent_apprenticeship_trace/apprentice_adapters.py +348 -0
package/src/agent_apprenticeship_trace/artifact_capture.py +23 -0
package/src/agent_apprenticeship_trace/artifact_previews.py +80 -0
package/src/agent_apprenticeship_trace/artifact_resolver.py +142 -0
package/src/agent_apprenticeship_trace/batch_runner.py +116 -0
package/src/agent_apprenticeship_trace/bundle_exporter.py +254 -0
package/src/agent_apprenticeship_trace/certification.py +580 -0
package/src/agent_apprenticeship_trace/cli.py +2979 -0
package/src/agent_apprenticeship_trace/codex_runner.py +428 -0
package/src/agent_apprenticeship_trace/command_discovery.py +94 -0
package/src/agent_apprenticeship_trace/config.py +609 -0
package/src/agent_apprenticeship_trace/contract_diagnostics.py +69 -0
package/src/agent_apprenticeship_trace/env.py +46 -0
package/src/agent_apprenticeship_trace/evaluator.py +64 -0
package/src/agent_apprenticeship_trace/grader.py +194 -0
package/src/agent_apprenticeship_trace/integration_status.py +193 -0
package/src/agent_apprenticeship_trace/io.py +20 -0
package/src/agent_apprenticeship_trace/learning.py +627 -0
package/src/agent_apprenticeship_trace/lesson_extractor.py +5 -0
package/src/agent_apprenticeship_trace/llm_output_normalizer.py +467 -0
package/src/agent_apprenticeship_trace/loop.py +111 -0
package/src/agent_apprenticeship_trace/mentor_checkpoints.py +354 -0
package/src/agent_apprenticeship_trace/openai_structured.py +783 -0
package/src/agent_apprenticeship_trace/package_exporter.py +303 -0
package/src/agent_apprenticeship_trace/progress.py +223 -0
package/src/agent_apprenticeship_trace/public_run.py +1109 -0
package/src/agent_apprenticeship_trace/public_sanitizer.py +139 -0
package/src/agent_apprenticeship_trace/recipes.py +129 -0
package/src/agent_apprenticeship_trace/release_exporter.py +259 -0
package/src/agent_apprenticeship_trace/revision.py +21 -0
package/src/agent_apprenticeship_trace/role_runners.py +7 -0
package/src/agent_apprenticeship_trace/rubric_generation.py +75 -0
package/src/agent_apprenticeship_trace/schemas.py +273 -0
package/src/agent_apprenticeship_trace/session_events.py +99 -0
package/src/agent_apprenticeship_trace/task_intake.py +112 -0
package/src/agent_apprenticeship_trace/trace_normalizer.py +669 -0
package/src/agent_apprenticeship_trace/trace_prompt.py +51 -0
package/src/agent_apprenticeship_trace/training_signals.py +30 -0
package/src/agent_apprenticeship_trace/validation.py +210 -0
package/src/agent_apprenticeship_trace/verifier.py +55 -0

package/src/agent_apprenticeship_trace/batch_runner.py ADDED Viewed

@@ -0,0 +1,116 @@
+from __future__ import annotations
+import time, traceback, json
+from datetime import datetime, timezone
+from pathlib import Path
+from .schemas import RawTaskRecord
+from .io import read_jsonl, append_jsonl, write_json
+from .loop import run_task
+from .release_exporter import create_release
+from .public_sanitizer import classify_provider_failure
+from .validation import validate_release
+def _task_id_for_row(row: dict) -> str:
+    raw_id = str(row.get('raw_task_id') or row.get('task_id') or 'task_unknown')
+    payload = row.get('raw_payload') if isinstance(row.get('raw_payload'), dict) else {}
+    return str(payload.get('task_id') or row.get('task_id') or raw_id.replace('raw_', 'task_'))
+def _utc_now() -> str:
+    return datetime.now(timezone.utc).isoformat().replace('+00:00', 'Z')
+def _progress(run_root: Path, task_index: int, task_count: int, task_id: str, stage: str, status: str = 'ok', message: str | None = None) -> None:
+    msg = message or stage.replace('_', ' ')
+    print(f'[{task_index}/{task_count}] {msg}', flush=True)
+    append_jsonl(run_root/'progress.jsonl', {
+        'timestamp': _utc_now(),
+        'task_index': task_index,
+        'task_count': task_count,
+        'task_id': task_id,
+        'stage': stage,
+        'status': status,
+        'message': msg,
+    })
+def _task_scale_ready(package_path: Path) -> bool | None:
+    manifest = package_path/'package_manifest.json'
+    if not manifest.exists():
+        return None
+    try:
+        data=json.loads(manifest.read_text())
+        if 'scale_ready_task' in data:
+            return bool(data['scale_ready_task'])
+    except Exception:
+        return None
+    return None
+def run_batch(input_path: Path, output_root: Path, limit: int | None=None, resume=False, max_parallel=1, retry_limit=0, task_timeout_seconds=900, runner='deterministic', release_id: str | None=None, max_iterations: int | None=None) -> Path:
+    run_id=release_id or f'run_{int(time.time())}'
+    release_root=output_root/'releases'/run_id
+    if release_root.exists() and not resume:
+        raise RuntimeError('Release already exists. Use --resume or delete the existing output directory.')
+    rows=read_jsonl(input_path)[:limit]
+    task_ids=[_task_id_for_row(row) for row in rows]
+    duplicates=sorted({tid for tid in task_ids if task_ids.count(tid) > 1})
+    if duplicates:
+        raise RuntimeError('Duplicate task IDs found in input file: ' + ', '.join(duplicates))
+    run_root=output_root/'runs'/run_id; run_root.mkdir(parents=True, exist_ok=True); (run_root/'quarantine').mkdir(exist_ok=True)
+    checkpoint=run_root/'checkpoint.json'; done=set()
+    if resume and checkpoint.exists(): done=set(__import__('json').loads(checkpoint.read_text()).get('completed',[]))
+    completed=0; failed=0
+    total=len(rows)
+    for idx, row in enumerate(rows, start=1):
+        raw=RawTaskRecord.model_validate(row); tid=_task_id_for_row(row)
+        started=time.time()
+        _progress(run_root, idx, total, tid, 'starting', message=f'starting task_id={tid}')
+        if tid in done or raw.raw_task_id in done:
+            _progress(run_root, idx, total, tid, 'skipped', status='skipped', message='complete status=skipped')
+            append_jsonl(run_root/'batch_status.jsonl', {'task_id':tid,'status':'skipped','package_path':None,'error_type':None,'error_message':None,'duration_seconds':round(time.time()-started,3),'scale_ready_task':None})
+            continue
+        try:
+            _progress(run_root, idx, total, tid, 'baseline_started', message='baseline started')
+            pkg=run_task(raw, run_root, runner=runner, max_iterations=max_iterations)
+            _progress(run_root, idx, total, tid, 'baseline_complete', message='baseline complete')
+            _progress(run_root, idx, total, tid, 'evaluation_complete', message='evaluation complete')
+            _progress(run_root, idx, total, tid, 'revised_started', message='revised started')
+            _progress(run_root, idx, total, tid, 'revised_complete', message='revised complete')
+            done.add(pkg.name); done.add(tid); done.add(raw.raw_task_id)
+            error_type=None; error_message=None; final_status='completed'
+            try:
+                attempts=[json.loads((pkg/'attempts/baseline/actual_outputs.json').read_text()), json.loads((pkg/'attempts/revised/actual_outputs.json').read_text())]
+                if any((a.get('metadata_json') or {}).get('provider_failure_type')=='usage_limit' for a in attempts):
+                    final_status='failed'; error_type='ProviderUsageLimit'; error_message='Provider usage limit encountered during attempt.'
+                elif any(a.get('status') in ['failed','timeout','error'] for a in attempts):
+                    final_status='failed'; error_type='AttemptCompletedWithErrors'; error_message='One or more attempts completed with error status.'
+            except Exception as exc:
+                final_status='failed'; error_type=type(exc).__name__; error_message=str(exc)
+            if final_status == 'completed': completed += 1
+            else: failed += 1
+            _progress(run_root, idx, total, tid, 'package_exported', message='package exported')
+            _progress(run_root, idx, total, tid, 'complete', status=final_status, message=f'complete status={"ok" if final_status == "completed" else "failed"}')
+            append_jsonl(run_root/'batch_status.jsonl', {'task_id':pkg.name,'status':final_status,'package_path':str(pkg),'error_type':error_type,'error_message':error_message,'duration_seconds':round(time.time()-started,3),'scale_ready_task':_task_scale_ready(pkg)})
+        except Exception as e:
+            failed += 1
+            cls=classify_provider_failure(str(e))
+            etype=cls.get('error_type') or type(e).__name__
+            emsg=str(e)
+            _progress(run_root, idx, total, tid, 'complete', status='failed', message='complete status=failed')
+            append_jsonl(run_root/'batch_status.jsonl', {'task_id':tid,'status':'failed','package_path':None,'error_type':etype,'error_message':emsg,'duration_seconds':round(time.time()-started,3),'scale_ready_task':False})
+            (run_root/'quarantine'/f'{tid}.txt').write_text(traceback.format_exc())
+        write_json(checkpoint, {'completed':sorted(done)})
+    create_release(run_root, release_root)
+    counters=validate_release(release_root)
+    print('Run complete:', flush=True)
+    print(f'tasks_total={total}', flush=True)
+    print(f'tasks_completed={completed}', flush=True)
+    print(f'tasks_failed={failed}', flush=True)
+    print(f'release_path={release_root}', flush=True)
+    print(f'scale_ready={counters.get("scale_ready")}', flush=True)
+    print(f'scale_blockers={counters.get("scale_blockers")}', flush=True)
+    return run_root

package/src/agent_apprenticeship_trace/bundle_exporter.py ADDED Viewed

@@ -0,0 +1,254 @@
+from __future__ import annotations
+import shutil
+import tempfile
+import re
+from pathlib import Path
+from typing import Any
+from .config import Settings, get_settings, normalize_mentor_mode
+from .io import append_jsonl, read_json, read_jsonl, write_json
+from .public_sanitizer import sanitize_public_obj
+from .release_exporter import create_release
+def _count_jsonl(path: Path) -> int:
+    return len(read_jsonl(path))
+def _trace_step_count(path: Path) -> int:
+    return sum(len(row.get("steps") or []) for row in read_jsonl(path))
+def _is_nonempty_jsonl(path: Path) -> bool:
+    return _count_jsonl(path) > 0
+def _copy_jsonl_if_nonempty(src: Path, dst: Path) -> bool:
+    if not src.exists() or not _is_nonempty_jsonl(src):
+        return False
+    dst.parent.mkdir(parents=True, exist_ok=True)
+    dst.write_text("")
+    for row in read_jsonl(src):
+        append_jsonl(dst, sanitize_public_obj(row) if isinstance(row, dict) else row)
+    return True
+def _sanitize_bundle_json(data: Any) -> Any:
+    if isinstance(data, dict):
+        return sanitize_public_obj(data)
+    if isinstance(data, list):
+        return [sanitize_public_obj(row) if isinstance(row, dict) else row for row in data]
+    return data
+def _copy_json_if_exists(src: Path, dst: Path) -> bool:
+    if not src.exists():
+        return False
+    dst.parent.mkdir(parents=True, exist_ok=True)
+    write_json(dst, _sanitize_bundle_json(read_json(src)))
+    return True
+def _run_status(release_root: Path) -> str:
+    rows = read_jsonl(release_root / "packages_index.jsonl")
+    statuses = [row.get("task_status") for row in rows if row.get("task_status")]
+    if statuses and all(status == "completed" for status in statuses):
+        return "completed"
+    if statuses and all(status == "failed" for status in statuses):
+        return "failed"
+    return "partial" if statuses else "failed"
+def contribution_counts(release_root: Path) -> dict[str, int]:
+    return {
+        "attempts": _count_jsonl(release_root / "actual_outputs.jsonl"),
+        "traced_steps": _trace_step_count(release_root / "agent_traces.jsonl"),
+        "process_supervision_rows": _count_jsonl(release_root / "process_supervision.jsonl"),
+        "reward_modeling_rows": _count_jsonl(release_root / "reward_modeling.jsonl"),
+        "revision_preference_pairs": _count_jsonl(release_root / "revision_preference_pairs.jsonl"),
+    }
+def _write_card(bundle_root: Path) -> None:
+    (bundle_root / "contribution_card.md").write_text(
+        "# Agent Apprenticeship Contribution Bundle\n\n"
+        "This local bundle packages an apprenticeship session for review and later contribution. "
+        "It includes session events, task instructions, traces, actual outputs, evaluation results, "
+        "and learning-data views when available.\n\n"
+        "No automatic upload is performed. Review this folder locally before sharing.\n\n"
+        "To contribute, submit the bundle to the Agent Apprenticeship ecosystem repo, "
+        "or get help in Slack: https://join.slack.com/t/fsycommunity/shared_invite/zt-37417grrb-jFD6BQIYgC5wEMrW2bHssw\n"
+    )
+def _session_mentor_mode(session: dict[str, Any], settings: Settings) -> str:
+    return normalize_mentor_mode(session.get("mentor_mode") or session.get("evaluation_mode") or settings.mentor_mode)
+def _clean_session_metadata(session: dict[str, Any], settings: Settings) -> dict[str, Any]:
+    allowed = {
+        "run_id",
+        "session_id",
+        "run_status",
+        "task_status",
+        "task_id",
+        "task_instruction",
+        "task_assets",
+        "latest_package",
+        "latest_attempt_id",
+        "max_improvement_loops",
+        "apprentice_agent",
+        "model_provider",
+        "status_reason",
+        "session_status",
+    }
+    clean = {k: v for k, v in session.items() if k in allowed and v is not None}
+    clean["mentor_mode"] = _session_mentor_mode(session, settings)
+    return clean
+def _copy_session_files(run_root: Path, bundle_root: Path, settings: Settings) -> None:
+    if (run_root / "session_events.jsonl").exists():
+        shutil.copy2(run_root / "session_events.jsonl", bundle_root / "session_events.jsonl")
+    if (run_root / "session.json").exists():
+        write_json(bundle_root / "session_metadata.json", _clean_session_metadata(read_json(run_root / "session.json"), settings))
+    if (run_root / "task").exists():
+        shutil.copytree(run_root / "task", bundle_root / "task", dirs_exist_ok=True)
+    if (run_root / "mentor_checkpoints").exists():
+        shutil.copytree(run_root / "mentor_checkpoints", bundle_root / "mentor_checkpoints", dirs_exist_ok=True)
+def _copy_clean_bundle_files(release_root: Path, bundle_root: Path, include_debug: bool) -> None:
+    for dirname in ["task", "traces", "outputs", "evaluation", "learning_data"]:
+        (bundle_root / dirname).mkdir(parents=True, exist_ok=True)
+    _copy_jsonl_if_nonempty(release_root / "agent_traces.jsonl", bundle_root / "traces" / "agent_traces.jsonl")
+    _copy_jsonl_if_nonempty(release_root / "raw_agent_traces.jsonl", bundle_root / "traces" / "raw_agent_traces.jsonl")
+    _copy_jsonl_if_nonempty(release_root / "actual_outputs.jsonl", bundle_root / "outputs" / "actual_outputs.jsonl")
+    _copy_json_if_exists(release_root / "artifacts_index.json", bundle_root / "outputs" / "artifacts_index.json")
+    for name in [
+        "grader_results.jsonl",
+        "verifier_results.jsonl",
+        "evaluator_feedback.jsonl",
+        "revision_plans.jsonl",
+        "hillclimb_results.jsonl",
+    ]:
+        _copy_jsonl_if_nonempty(release_root / name, bundle_root / "evaluation" / name)
+    for name in [
+        "process_supervision.jsonl",
+        "reward_modeling.jsonl",
+        "revision_preference_pairs.jsonl",
+        "training_signals.jsonl",
+    ]:
+        _copy_jsonl_if_nonempty(release_root / name, bundle_root / "learning_data" / name)
+    if include_debug:
+        copied = False
+        for name in [
+            "trace_normalization_reports.jsonl",
+            "actual_outputs_normalization_reports.jsonl",
+            "role_results_index.jsonl",
+        ]:
+            copied = _copy_jsonl_if_nonempty(release_root / name, bundle_root / "debug" / name) or copied
+        copied = _copy_json_if_exists(release_root / "quality_report.json", bundle_root / "debug" / "quality_report.json") or copied
+        if not copied and (bundle_root / "debug").exists():
+            shutil.rmtree(bundle_root / "debug")
+def _first_jsonl_row(path: Path) -> dict[str, Any]:
+    rows = read_jsonl(path) if path.exists() else []
+    for row in rows:
+        if isinstance(row, dict):
+            return row
+    return {}
+def _slug(text: str) -> str:
+    return re.sub(r"[^a-z0-9]+", "-", text.lower()).strip("-")[:80].strip("-") or "bundle"
+def _list_value(value: Any) -> list[str]:
+    if value is None:
+        return []
+    if isinstance(value, list):
+        return [str(v) for v in value if v]
+    return [str(value)]
+def _bundle_metadata_shape(bundle_root: Path, release_root: Path, settings: Settings) -> dict[str, Any]:
+    counts = contribution_counts(release_root)
+    session = read_json(bundle_root / "session_metadata.json") if (bundle_root / "session_metadata.json").exists() else {}
+    task = _first_jsonl_row(release_root / "tasks.jsonl")
+    run_status = session.get("run_status") or _run_status(release_root)
+    task_status = session.get("task_status") or run_status
+    instruction = str(session.get("task_instruction") or "")
+    title = (
+        task.get("normalized_title")
+        or task.get("raw_title")
+        or (instruction.strip().splitlines()[0][:90] if instruction.strip() else None)
+        or "Agent Apprenticeship session"
+    )
+    run_id = str(session.get("run_id") or bundle_root.parent.name or _slug(title))
+    role = task.get("agent_apprentice_role") or task.get("apprenticeship_role")
+    metadata = {
+        "bundle_id": f"aa-bundle-{_slug(run_id)}",
+        "title": title,
+        **counts,
+        "domains": _list_value(task.get("domain")),
+        "subdomains": _list_value(task.get("subdomain")),
+        "agent_apprentice_role": role,
+        "expected_economic_value": task.get("expected_economic_value"),
+        "expected_economic_value_for_agent_apprentice": task.get("expected_economic_value_for_agent_apprentice"),
+        "mentor_mode": _session_mentor_mode(session, settings),
+        "task_status": task_status,
+        "run_status": run_status,
+    }
+    return metadata
+def _write_manifest(bundle_root: Path, release_root: Path, settings: Settings, include_internal_schema: bool = False) -> None:
+    manifest = _bundle_metadata_shape(bundle_root, release_root, settings)
+    release_manifest: dict[str, Any] = {}
+    if (release_root / "dataset_manifest.json").exists():
+        release_manifest = read_json(release_root / "dataset_manifest.json")
+    session = read_json(bundle_root / "session_metadata.json") if (bundle_root / "session_metadata.json").exists() else {}
+    if session.get("status_reason") and manifest.get("run_status") != "completed":
+        manifest["status_reason"] = session["status_reason"]
+    if include_internal_schema:
+        manifest["source_release_schema_version"] = release_manifest.get("schema_version")
+    write_json(bundle_root / "contribution_manifest.json", manifest)
+def create_contribution_bundle(
+    run_root: Path,
+    bundle_root: Path | None = None,
+    settings: Settings | None = None,
+    include_debug: bool = False,
+    release_style: bool = False,
+) -> Path:
+    settings = settings or get_settings()
+    bundle_root = bundle_root or (run_root / "contribution_bundle")
+    if bundle_root.exists():
+        shutil.rmtree(bundle_root)
+    bundle_root.mkdir(parents=True, exist_ok=True)
+    if release_style:
+        create_release(run_root, bundle_root)
+        _copy_session_files(run_root, bundle_root, settings)
+        _write_manifest(bundle_root, bundle_root, settings, include_internal_schema=True)
+        _write_card(bundle_root)
+        return bundle_root
+    with tempfile.TemporaryDirectory(prefix="aa-release-for-bundle-") as tmp:
+        release_root = Path(tmp) / "release"
+        create_release(run_root, release_root)
+        _copy_session_files(run_root, bundle_root, settings)
+        _copy_clean_bundle_files(release_root, bundle_root, include_debug=include_debug)
+        _write_manifest(bundle_root, release_root, settings, include_internal_schema=include_debug)
+        _write_card(bundle_root)
+    return bundle_root