npm - agent-cli-runtime - Versions diffs - 0.1.0-alpha.0 - Mend

agent-cli-runtime 0.1.0-alpha.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (151) hide show

package/CHANGELOG.md +51 -0
package/CONTRIBUTING.md +60 -0
package/LICENSE +202 -0
package/README.md +573 -0
package/README.zh-CN.md +571 -0
package/SECURITY.md +35 -0
package/dist/adapters/adapter-types.d.ts +138 -0
package/dist/adapters/adapter-types.js +2 -0
package/dist/adapters/adapter-types.js.map +1 -0
package/dist/adapters/claude.d.ts +2 -0
package/dist/adapters/claude.js +97 -0
package/dist/adapters/claude.js.map +1 -0
package/dist/adapters/codex.d.ts +3 -0
package/dist/adapters/codex.js +120 -0
package/dist/adapters/codex.js.map +1 -0
package/dist/adapters/opencode.d.ts +4 -0
package/dist/adapters/opencode.js +111 -0
package/dist/adapters/opencode.js.map +1 -0
package/dist/adapters/registry.d.ts +9 -0
package/dist/adapters/registry.js +23 -0
package/dist/adapters/registry.js.map +1 -0
package/dist/cli/main.d.ts +2 -0
package/dist/cli/main.js +978 -0
package/dist/cli/main.js.map +1 -0
package/dist/core/async-queue.d.ts +10 -0
package/dist/core/async-queue.js +49 -0
package/dist/core/async-queue.js.map +1 -0
package/dist/core/diagnostics.d.ts +20 -0
package/dist/core/diagnostics.js +4 -0
package/dist/core/diagnostics.js.map +1 -0
package/dist/core/event-contract.d.ts +32 -0
package/dist/core/event-contract.js +128 -0
package/dist/core/event-contract.js.map +1 -0
package/dist/core/events.d.ts +147 -0
package/dist/core/events.js +4 -0
package/dist/core/events.js.map +1 -0
package/dist/core/ids.d.ts +1 -0
package/dist/core/ids.js +5 -0
package/dist/core/ids.js.map +1 -0
package/dist/core/redaction.d.ts +4 -0
package/dist/core/redaction.js +51 -0
package/dist/core/redaction.js.map +1 -0
package/dist/core/runtime.d.ts +41 -0
package/dist/core/runtime.js +83 -0
package/dist/core/runtime.js.map +1 -0
package/dist/core/schema-contract.d.ts +55 -0
package/dist/core/schema-contract.js +143 -0
package/dist/core/schema-contract.js.map +1 -0
package/dist/detection/detect.d.ts +14 -0
package/dist/detection/detect.js +293 -0
package/dist/detection/detect.js.map +1 -0
package/dist/detection/env.d.ts +2 -0
package/dist/detection/env.js +15 -0
package/dist/detection/env.js.map +1 -0
package/dist/detection/executable-resolution.d.ts +12 -0
package/dist/detection/executable-resolution.js +50 -0
package/dist/detection/executable-resolution.js.map +1 -0
package/dist/detection/invocation.d.ts +9 -0
package/dist/detection/invocation.js +22 -0
package/dist/detection/invocation.js.map +1 -0
package/dist/goals/goal-scheduler.d.ts +31 -0
package/dist/goals/goal-scheduler.js +518 -0
package/dist/goals/goal-scheduler.js.map +1 -0
package/dist/goals/goal-store.d.ts +37 -0
package/dist/goals/goal-store.js +300 -0
package/dist/goals/goal-store.js.map +1 -0
package/dist/goals/goal-types.d.ts +103 -0
package/dist/goals/goal-types.js +2 -0
package/dist/goals/goal-types.js.map +1 -0
package/dist/goals/planner-prompts.d.ts +3 -0
package/dist/goals/planner-prompts.js +26 -0
package/dist/goals/planner-prompts.js.map +1 -0
package/dist/goals/task-graph.d.ts +9 -0
package/dist/goals/task-graph.js +229 -0
package/dist/goals/task-graph.js.map +1 -0
package/dist/goals/validation-runner.d.ts +7 -0
package/dist/goals/validation-runner.js +63 -0
package/dist/goals/validation-runner.js.map +1 -0
package/dist/index.d.ts +11 -0
package/dist/index.js +2 -0
package/dist/index.js.map +1 -0
package/dist/parsers/claude-stream-json.d.ts +11 -0
package/dist/parsers/claude-stream-json.js +102 -0
package/dist/parsers/claude-stream-json.js.map +1 -0
package/dist/parsers/codex-json.d.ts +8 -0
package/dist/parsers/codex-json.js +107 -0
package/dist/parsers/codex-json.js.map +1 -0
package/dist/parsers/line-buffer.d.ts +7 -0
package/dist/parsers/line-buffer.js +28 -0
package/dist/parsers/line-buffer.js.map +1 -0
package/dist/parsers/opencode-json.d.ts +8 -0
package/dist/parsers/opencode-json.js +72 -0
package/dist/parsers/opencode-json.js.map +1 -0
package/dist/parsers/plain-lines.d.ts +6 -0
package/dist/parsers/plain-lines.js +9 -0
package/dist/parsers/plain-lines.js.map +1 -0
package/dist/public-types.d.ts +143 -0
package/dist/public-types.js +2 -0
package/dist/public-types.js.map +1 -0
package/dist/runs/process-runner.d.ts +35 -0
package/dist/runs/process-runner.js +97 -0
package/dist/runs/process-runner.js.map +1 -0
package/dist/runs/prompt-transport.d.ts +10 -0
package/dist/runs/prompt-transport.js +43 -0
package/dist/runs/prompt-transport.js.map +1 -0
package/dist/runs/run-result.d.ts +9 -0
package/dist/runs/run-result.js +22 -0
package/dist/runs/run-result.js.map +1 -0
package/dist/runs/run-scheduler.d.ts +25 -0
package/dist/runs/run-scheduler.js +552 -0
package/dist/runs/run-scheduler.js.map +1 -0
package/dist/runs/run-store.d.ts +42 -0
package/dist/runs/run-store.js +297 -0
package/dist/runs/run-store.js.map +1 -0
package/dist/runs/run-types.d.ts +59 -0
package/dist/runs/run-types.js +2 -0
package/dist/runs/run-types.js.map +1 -0
package/dist/smoke/parser-samples.d.ts +17 -0
package/dist/smoke/parser-samples.js +186 -0
package/dist/smoke/parser-samples.js.map +1 -0
package/dist/storage/file-storage.d.ts +35 -0
package/dist/storage/file-storage.js +271 -0
package/dist/storage/file-storage.js.map +1 -0
package/dist/storage/jsonl-store.d.ts +9 -0
package/dist/storage/jsonl-store.js +138 -0
package/dist/storage/jsonl-store.js.map +1 -0
package/dist/storage/manifest-validation.d.ts +11 -0
package/dist/storage/manifest-validation.js +102 -0
package/dist/storage/manifest-validation.js.map +1 -0
package/dist/storage/storage-lease.d.ts +40 -0
package/dist/storage/storage-lease.js +223 -0
package/dist/storage/storage-lease.js.map +1 -0
package/dist/storage/storage-types.d.ts +55 -0
package/dist/storage/storage-types.js +2 -0
package/dist/storage/storage-types.js.map +1 -0
package/dist/storage/store-inspection.d.ts +28 -0
package/dist/storage/store-inspection.js +941 -0
package/dist/storage/store-inspection.js.map +1 -0
package/docs/api-schema-contract.md +92 -0
package/docs/compatibility.md +832 -0
package/docs/daemon-ready-contract.md +283 -0
package/docs/production-readiness.md +281 -0
package/docs/release-checklist.md +257 -0
package/docs/release-publish-runbook.md +201 -0
package/docs/release-report.md +517 -0
package/docs/ssot.md +1257 -0
package/examples/cli-dogfood.md +113 -0
package/examples/library-goal.js +94 -0
package/examples/library-run.js +84 -0
package/package.json +79 -0
package/scripts/dogfood.mjs +243 -0

package/docs/daemon-ready-contract.md ADDED Viewed

@@ -0,0 +1,283 @@
+# Daemon-Ready Execution Kernel Contract
+Status: P3-7 API / CLI schema freeze
+Last updated: 2026-06-22
+This document freezes the contract that a desktop product shell, local daemon, or other embedding process can rely on when using Agent CLI Runtime as a local-first execution kernel. P3-2 added an executable offline stability gate for that contract; P3-3 added a long-lived runtime resource safety gate for repeated embedding, event consumption, cancellation churn, shutdown, and reopen behavior; P3-7 centralizes the API and CLI schema versioning policy in [docs/api-schema-contract.md](./api-schema-contract.md). It is not a daemon implementation and does not add a hosted control plane API.
+## Positioning
+Agent CLI Runtime owns local execution kernel behavior:
+- detect local agent CLIs;
+- create run and goal lifecycles;
+- normalize live events and replay events;
+- cancel, time out, shut down, and classify terminal outcomes;
+- export redacted diagnostics;
+- inspect and repair the local JSON store when `storageDir` is supplied.
+The embedding daemon or product shell owns everything outside that kernel:
+- HTTP, IPC, or RPC API serving;
+- authentication, users, tenants, teams, and admission policy;
+- queue admission, prioritization, and multi-client coordination;
+- remote workers, Docker/SSH runners, and distributed scheduling;
+- UI, artifact viewers, workspaces, and product-specific object models;
+- telemetry, metrics, traces, audit sinks, database/WAL, compaction, and backups.
+## Runtime Instance Lifecycle
+Create one runtime instance per local writer process:
+```ts
+import { createAgentRuntime } from "agent-cli-runtime";
+const runtime = createAgentRuntime({
+  storageDir: ".agent-runtime",
+  storage: { durability: "fsync" },
+});
+```
+If `storageDir` is omitted, runs and goals are memory-only. If `storageDir` is supplied, the runtime writes run/goal manifests and replay JSONL under the local store and acquires a same-machine single-writer lease in `runtime.lock.json`.
+Embedding lifecycle rules:
+- create the runtime during daemon/process startup;
+- keep one writer runtime per `storageDir`;
+- call `runtime.shutdown(reason)` before process exit when possible;
+- treat process crash recovery as replay/diagnostics recovery, not live process resume;
+- use read-only inspection commands or facade methods for dashboards that should not acquire the writer lease.
+## P3-2 Executable Gate
+`npm run daemon:verify` is the P3-2 daemon embedding stability gate. It packs the current package, installs that tarball into a temporary consumer, creates fake local CLIs and temp storage, then exercises the installed package path:
+1. create a runtime with `storageDir`;
+2. detect the fake adapter and run fake conformance through the installed CLI;
+3. run a fake task;
+4. create a fake goal;
+5. replay run and goal events;
+6. inspect store health;
+7. export run and goal diagnostics;
+8. call `shutdown()`;
+9. reopen the same store and query terminal run/goal records.
+The gate emits `schemaVersion: "agent-runtime.daemonVerification.v1"` JSON with only redacted summary fields. It does not require real Codex, Claude Code, or OpenCode credentials, and it does not launch authenticated real agent runs.
+P3-2 also locks regression coverage for read-only inspection, live-owner isolation, shutdown/recovery terminal-event idempotence, and daemon-facing schema compatibility. It still does not implement HTTP, IPC, RPC, auth, users, tenants, remote workers, Docker/SSH, telemetry, database, WAL, compaction, or OpenDesign daemon parity.
+## P3-3 Resource Safety Gate
+`npm run runtime:safety` is the P3-3 long-lived runtime resource safety gate. It packs the current package, installs that tarball into a temporary consumer, creates a fake local CLI and temp storage, then exercises the installed package path under one embedded runtime:
+1. execute repeated fake runs and fake goals without active run/goal leaks;
+2. hold a slow event consumer while the fake CLI emits many JSON and text events, then verify terminal events and replay counts remain stable;
+3. churn multiple cancellations and a timeout/process-close race, verifying one `run_finished` per run;
+4. cancel a goal with running and queued tasks, verifying stable task states and one `goal_finished`;
+5. export noisy failure diagnostics with bounded, redacted stdout/stderr tails;
+6. call `shutdown()` repeatedly, verify terminal event counts do not grow, verify active state is empty, and verify the durable lease closes;
+7. reopen the same store and verify terminal records are queryable while active records are not falsely recovered.
+The gate emits `schemaVersion: "agent-runtime.runtimeSafety.v1"` JSON with redacted summary counts and statuses only. It does not include temp paths, prompts, raw corrupt lines, auth env assignments, token-looking values, or Bearer values. It uses fake CLIs only and does not require real Codex, Claude Code, or OpenCode credentials.
+P3-3 is intentionally still local-kernel hardening. It does not implement HTTP, IPC, RPC, auth, users, tenants, queue admission, remote workers, Docker/SSH, telemetry, database, WAL, compaction, UI/artifact layers, or OpenDesign daemon parity.
+## Writer Lease And Store Ownership
+The local lease is a best-effort same-machine writer guard. It is not a distributed lock, daemon consensus protocol, WAL, database transaction, or multi-host scheduler.
+Lease owner fields:
+- `runtimeInstanceId`
+- `pid`
+- `startedAt`
+- `heartbeatAt`
+- optional `closedAt`
+Runtime behavior:
+- a second writer for the same `storageDir` is refused while the current owner is live;
+- stale, closed, missing, or invalid owners may be taken over;
+- takeover records a redacted `AGENT_STORAGE_LEASE_TAKEOVER` diagnostic;
+- active run/goal manifests carry owner metadata and are heartbeated while active;
+- read-only inspection does not acquire the writer lease and must not mutate active work.
+## Run Lifecycle
+Daemon embedding sequence:
+1. `runtime.detect({ includeUnavailable: true })` or `runtime.detectStream()` to show local adapter state.
+2. `runtime.run({ agentId, cwd, prompt, permissionPolicy, ... })` to start local execution.
+3. Consume `handle.events` until `run_finished`.
+4. Use `runtime.cancelRun(runId)` or `handle.cancel(reason)` for user cancellation.
+5. Use `runtime.replayRunEvents(runId, { afterEventId })` for durable replay.
+6. Use `runtime.getRun(runId)` / `runtime.listRuns()` for status queries.
+7. Use `runtime.exportDiagnostics({ kind: "run", runId })` for support bundles.
+Run state is terminal when status is `succeeded`, `failed`, or `canceled`. Run result remains `success`, `failed`, or `cancelled`; daemon-facing terminal reason canonicalizes spelling to `canceled`.
+## Goal Lifecycle
+Daemon embedding sequence:
+1. `runtime.createGoal({ cwd, objective, defaultAgentId, ... })` starts a planner run.
+2. Planner text is parsed as a validated task graph before tasks start.
+3. Dependency-ready tasks execute through the run scheduler.
+4. Consume `handle.events` until `goal_finished`.
+5. Use `runtime.cancelGoal(goalId)` or `handle.cancel(reason)` for user cancellation.
+6. Use `runtime.replayGoalEvents(goalId, { afterEventId })` for durable replay.
+7. Use `runtime.getGoal(goalId)` / `runtime.listGoals()` for status queries.
+8. Use `runtime.exportDiagnostics({ kind: "goal", goalId })` for support bundles and task attempt evidence.
+Goal state is terminal when status is `succeeded`, `failed`, or `canceled`. Pending or running tasks become `canceled` when a stale active goal is recovered or when cancellation/shutdown wins.
+## Event Replay Contract
+Library replay APIs return source-compatible `ReplayEvent<T>` records:
+- `id`
+- `sequence`
+- `timestamp`
+- `runId` or `goalId`
+- `event`
+CLI stream/replay JSONL wraps those records in a versioned envelope:
+```json
+{
+  "schemaVersion": "agent-runtime.event.v1",
+  "id": 1,
+  "sequence": 1,
+  "timestamp": 1760000000000,
+  "scope": { "kind": "run", "id": "run_123" },
+  "event": { "type": "run_finished", "result": "success", "timestamp": 1760000000000 },
+  "terminal": { "result": "success", "reason": "success" }
+}
+```
+Event replay rules:
+- `id` and `sequence` are monotonic within one run or one goal;
+- `afterEventId` returns only events whose `id` is greater than the cursor;
+- `scope.kind` is `run` or `goal`;
+- terminal events are appended at most once by cancel/shutdown/recovery paths;
+- `terminal` is present only for terminal events or scheduler errors that carry terminal semantics.
+## Schema Versioning
+Stable daemon-facing schemas:
+| Surface | Schema version | Stable top-level fields |
+| --- | --- | --- |
+| Event envelope | `agent-runtime.event.v1` | `schemaVersion`, `id`, `sequence`, `timestamp`, `scope`, `event`, optional `terminal` |
+| Diagnostics bundle | `agent-runtime.diagnostics.v1` | `schemaVersion`, `exportedAt`, `storageDir`, `subject`, `manifest`, `events`, `diagnostics`, `storageDiagnostics`, `consistencyWarnings`, optional `attemptEvidence`, `supervisorSummary`, `adapterSummary` |
+| Conformance report | `agent-runtime.conformance.v1` | `schemaVersion`, `ok`, `mode`, `agents` |
+| Real smoke summary | `agent-runtime.realSmoke.v1` | `schemaVersion`, `type`, `ok`, `mode`, `adapter`, `version`, `auth`, `modelsSource`, `runClassification`, `expectedTextRequired`, `expectedTextMatched`, `observedTextDeltaCount`, `observedTextTail`, `cwdMutationChecked`, `cwdMutated`, `diagnosticsCount`, `diagnostics`, `skippedReason`, `failureReason` |
+| Store health | `agent-runtime.storeHealth.v1` | `schemaVersion`, `ok`, `storageDir`, `checkedAt`, `lock`, `totals`, `corruptManifests`, `corruptEventLogs`, `partialTails`, `activeRecords`, `activeInterrupted`, `warnings`, `storageDiagnostics`, `diagnostics` |
+| Store repair | `agent-runtime.storeRepair.v1` | `schemaVersion`, `storageDir`, `checkedAt`, `dryRun`, `applied`, `ok`, optional `blockedReason`, `actions`, `diagnostics` |
+| CLI JSON error | `agent-runtime.cliError.v1` | `schemaVersion`, `ok`, `error` |
+| Release verification | `agent-cli-runtime.releaseVerification.v1` | `schemaVersion`, `ok`, `checkedFiles`, `tarball`, `diagnostics`, `artifactNames`, `gateEvidence`, `packageName`, `version` |
+| Release gate evidence | `agent-cli-runtime.releaseGateEvidence.v1` | `schemaVersion`, `generatedAt`, `gates`, `noAuthenticatedRealRun`, `noNpmPublish`, `noNpmToken` |
+Compatibility rules:
+- adding optional fields is compatible within the same schema version;
+- removing, renaming, changing type, or changing semantics of a stable field requires a schema bump;
+- daemon callers should ignore unknown fields and branch on `schemaVersion`;
+- redaction guarantees are part of the schema semantics and must not regress without a schema bump and release note.
+The complete schema inventory, redaction rules, and classification fields are maintained in [docs/api-schema-contract.md](./api-schema-contract.md).
+## Failure Taxonomy
+Event terminal reasons use the `EventTerminalReason` vocabulary:
+- `success`
+- `failed`
+- `timeout`
+- `canceled`
+- `interrupted`
+- `validation_failed`
+- `execution_failed`
+- `unavailable`
+- `auth_missing`
+- `task_graph_invalid`
+Daemon-facing CLI and conformance classifications additionally use:
+- `real_run_skipped`
+- `unsupported_flag`
+- `unexpected_output`
+- `cwd_mutated`
+- `needs_verification`
+- `unavailable_executable`
+Canonical mapping notes:
+- `cancelled` is the historical run result spelling; `canceled` is the daemon-facing terminal reason/status spelling.
+- `scheduler_error` is an event type. Its terminal reason is derived from the scheduler diagnostic code, for example `AGENT_TASK_GRAPH_INVALID` maps to `task_graph_invalid`.
+- `unsupported_flag`, `unexpected_output`, and `cwd_mutated` are conformance/smoke classifications, not normal run terminal reasons.
+- `auth_missing`, `unavailable`, and `timeout` should be surfaced as user-actionable states by the embedding daemon.
+## Redaction Contract
+Daemon-facing JSON must not expose:
+- provider tokens, token-looking values, or auth env assignment values;
+- Bearer values;
+- full prompts or raw email/log/source payloads;
+- inherited environment dumps;
+- raw corrupt JSONL lines;
+- private absolute paths such as home directories or project `cwd`.
+Diagnostics may include redacted argv shapes, prompt transport labels, stream format, parsed event counts, exit code, signal, short stdout/stderr tails, retryability, and actionable hints.
+## Store Health And Repair
+`runtime.inspectStore()` and `agent-runtime store-health --json` are read-only. They scan manifests, event logs, lock state, owner state, corrupt JSONL lines, partial tails, terminal manifest/event mismatches, storage diagnostics, and active/interrupted records.
+`store-repair --dry-run` is the default non-mutating plan. `store-repair --apply` is explicit and:
+- refuses live writer owners;
+- acquires the local store lease while writing;
+- backs up event logs under `repair-backups/<timestamp>/...`;
+- rewrites through temp-file-and-rename with best-effort fsync;
+- truncates partial tails and isolates corrupt middle lines while keeping later valid events;
+- reports terminal manifest/event mismatches as manual review;
+- records redacted success/failure repair diagnostics;
+- is idempotent after successful repair.
+Repair is not WAL replay, database recovery, daemon resume, or semantic reconciliation of terminal records.
+## Shutdown Semantics
+`runtime.shutdown(reason)`:
+- cancels active goals first, then active runs;
+- waits a short grace window for terminal events;
+- emits cancellation diagnostics and terminal events for still-active work;
+- clears active scheduler state;
+- closes the local storage lease when held.
+Shutdown and cancellation are idempotent around terminal events. Daemons should still handle process crash by re-opening the same store and inspecting interrupted records.
+## Active Recovery
+On startup with durable storage:
+- active runs owned by missing/stale/closed/invalid owners become `failed` with `AGENT_RUNTIME_INTERRUPTED` and signal `RUNTIME_RESTART`;
+- active goals owned by missing/stale/closed/invalid owners become `failed`;
+- pending/running tasks in recovered goals become `canceled`;
+- active records owned by a live owner are left untouched and surfaced through store health.
+The runtime never resumes a live child process after restart. It only makes durable state queryable, replayable, and diagnosable.
+## Root API Boundary
+The package root value API remains intentionally small:
+- exported value: `createAgentRuntime`;
+- public TypeScript types are exported for facade requests, records, events, diagnostics, conformance, and store inspection;
+- built-in adapters, parser helpers, stores, schedulers, and storage internals are not package-root value exports.
+`getAdapter(id)` and `RuntimeOptions.adapters` remain pre-alpha adapter experimentation points. This contract freezes daemon-facing embedding semantics, not a stable hosted platform API.

package/docs/production-readiness.md ADDED Viewed

@@ -0,0 +1,281 @@
+# Production Readiness
+Status: P3-11 current-head alpha release candidate evidence uses non-package evidence storage; human gate required
+Last updated: 2026-06-23
+This project is still **pre-alpha / developer preview**. P2-11 through P2-13 established release-candidate artifact verification, remote evidence closure, and alpha publish-readiness docs without publishing npm. P3-1 froze daemon-ready execution-kernel contracts for embedders in [docs/daemon-ready-contract.md](./daemon-ready-contract.md); P3-2 added an executable daemon embedding stability gate for the installed-package fake-CLI path; P3-3 added an installed-package long-lived runtime resource safety gate; P3-4 aligned CI and release-candidate artifacts so those gates are represented in remote release artifacts; P3-5 verified its workflow head SHA through a successful remote release-candidate workflow and downloaded artifact re-verification; P3-6 added a redacted opt-in real smoke evidence format for Codex, Claude Code, and OpenCode while keeping default release gates on detection/profile certification only; P3-7 freezes the API / CLI schema inventory and versioning policy in [docs/api-schema-contract.md](./api-schema-contract.md); P3-8 refreshed remote release-candidate evidence for target SHA `eb8de0f9b1edfa3f94c35a50b31005c5d3c105d4`; P3-9 locked evidence-target release-candidate evidence for target SHA `65fac505ca3eb830a06d8656068cf4ed5f6dd46a`.
+P3-11 keeps volatile current-head release-candidate evidence out of the npm package. Fresh run ids, artifact ids, artifact digests, tarball shasums, and pack shasums belong under `.release-evidence/`, while packaged docs keep stable release rules and the human-gated publish packet that stops at `npm publish --dry-run --ignore-scripts --tag alpha`. It still does not publish npm, configure trusted publishing, claim provenance, or add daemon/API server/database/WAL/remote-worker/UI/telemetry/artifact layers.
+## Local-First Production Definition
+For this repository, "production-ready local runtime" means:
+- single-machine execution only;
+- local CLI adapters only: Codex CLI, Claude Code, OpenCode, or caller-supplied compatible adapters;
+- explicit `cwd`, permission policy, and optional `storageDir`;
+- no silent permission escalation and no silent adapter fallback;
+- durable local manifests/events when `storageDir` is supplied;
+- one writer runtime per `storageDir` by default, with read-only inspection paths that do not acquire the writer lease;
+- explicit `store-repair --apply` for partial/corrupt JSONL event-log repair, with backup, temp-file-and-rename writes, best-effort fsync, and idempotent no-op behavior after repair;
+- crash-consistency behavior verified by test-only storage fault injection for temp writes, rename, JSONL append, fsync/fdatasync fallback, repair backup/rewrite, and lock acquire/close;
+- active run/goal reload is conservative: only records owned by a missing/stale/closed owner are marked interrupted, not resumed;
+- diagnostics are auditable and redacted before storage/export;
+- CLI event JSONL is versioned as `agent-runtime.event.v1` for both live stream and replay output;
+- store health, store repair, diagnostics, conformance, and CLI JSON usage errors are versioned as `agent-runtime.storeHealth.v1`, `agent-runtime.storeRepair.v1`, `agent-runtime.diagnostics.v1`, `agent-runtime.conformance.v1`, and `agent-runtime.cliError.v1`;
+- daemon/product shell embedding semantics are documented without adding a hosted daemon surface;
+- `npm run daemon:verify` packs and installs the package into a temporary consumer, then verifies fake run, fake goal, replay, diagnostics, store inspection, shutdown, and reopen using temp storage and fake CLIs;
+- `npm run runtime:safety` packs and installs the package into a temporary consumer, then verifies repeated run/goal execution, slow event consumption, cancel/timeout churn, bounded redacted diagnostics, repeated shutdown, lease close, and reopen behavior using fake CLIs only;
+- real CLI conformance and smoke default to detection/profile certification only; authenticated real agent runs require explicit `--allow-real-run`;
+- real smoke evidence uses `schemaVersion: "agent-runtime.realSmoke.v1"`, requires expected text for success, checks cwd mutation, and omits prompts, raw stdout/stderr, private cwd, tokens, and final run records;
+- release artifact verification uses `agent-cli-runtime.releaseVerification.v1`, release gate evidence uses `agent-cli-runtime.releaseGateEvidence.v1`, and both are covered by the schema versioning policy in [docs/api-schema-contract.md](./api-schema-contract.md);
+- `npm run dogfood` is the default release-candidate gate and does not launch authenticated real agent runs;
+- `npm run dogfood` also installs the packed tarball into a temporary TypeScript consumer, runs `tsc --noEmit`, and executes fake-CLI library run/goal/replay/diagnostics smoke;
+- `npm run prepublish:check` is the local prepublish guard, includes `npm run daemon:verify` and `npm run runtime:safety`, and also avoids authenticated real agent runs;
+- `npm run release:candidate` creates local release-candidate artifacts without publishing npm;
+- `npm run release:verify` validates local or downloaded release artifacts and emits stable redacted JSON;
+- `docs/release-publish-runbook.md` records the future alpha publish command path, 2FA/trusted publishing/provenance decisions, dist-tag checks, and rollback boundaries without configuring real publishing;
+- CLI JSON success and error contracts are parseable, redacted, and covered for core release-facing commands;
+- `npm test` uses Vitest verbose output for default contract coverage; slower installed-package gates and install smokes run through single-Node release gates or explicit opt-in checks rather than every Node matrix entry;
+- GitHub Actions CI runs Node.js 20/22/24 matrix checks plus one single-Node release-gates job for `npm run daemon:verify`, `npm run runtime:safety`, and `npm run dogfood`;
+- the manual release-candidate workflow is configured to upload the packed tarball, pack metadata, package file list, gate evidence JSON, and verification JSON with explicit artifact retention;
+- the release report records local commands, remote workflow evidence, downloaded artifact verification, package boundary, real CLI evidence boundaries, known risks, and non-goals;
+- validation evidence is replayable through goal manifests and diagnostics export.
+## Production Readiness Gates
+Offline gates:
+```bash
+npm test
+npm run typecheck
+npm run lint
+npm run build
+npm run daemon:verify
+npm run runtime:safety
+npm run ci
+npm run dogfood
+npm run prepublish:check
+npm run package:check
+npm run release:candidate -- --out-dir release-candidate
+npm run release:verify -- --dir release-candidate
+node ./dist/cli/main.js conformance --mode fixtures --json
+node ./dist/cli/main.js conformance --mode fake --json
+node ./dist/cli/main.js conformance --mode real --agent all --json
+node ./dist/cli/main.js smoke --mode real --agent codex --json
+node ./dist/cli/main.js store-health --storage-dir <temp-dir> --json
+node ./dist/cli/main.js store-repair --storage-dir <corrupt-fixture-temp-dir> --dry-run --json
+node ./dist/cli/main.js store-repair --storage-dir <corrupt-fixture-temp-dir> --apply --json
+node ./dist/cli/main.js store-health --storage-dir <corrupt-fixture-temp-dir> --json
+node ./dist/cli/main.js agents --json
+node ./dist/cli/main.js doctor --json
+npm pack --dry-run
+npm publish --dry-run --ignore-scripts --tag alpha
+```
+Remote CI gates:
+- `.github/workflows/ci.yml`: Node.js 20/22/24 matrix for typecheck, lint, tests, build, production dependency audit, package boundary checks, and pack dry-run; single Node.js 22 release-gates job for `npm run daemon:verify`, `npm run runtime:safety`, and `npm run dogfood`.
+- `.github/workflows/release-candidate.yml`: manual `workflow_dispatch` gate that runs `npm ci`, `npm run ci`, `npm run dogfood`, and `npm run release:candidate -- --out-dir release-candidate`; it uploads `agent-cli-runtime-tarball`, `agent-cli-runtime-pack-metadata`, `agent-cli-runtime-package-files`, `agent-cli-runtime-gate-evidence`, and `agent-cli-runtime-release-verification`. For P3-11 and later, the fresh workflow head SHA must match the commit being considered, the downloaded artifacts must pass `npm run release:verify -- --dir <normalized-artifact-dir>`, and volatile evidence must be recorded under `.release-evidence/` instead of package docs. Historical runs only prove their own head SHAs. The workflow does not publish and does not require an npm token.
+`npm publish --dry-run --ignore-scripts --tag alpha` is a manual local dry-run check. The explicit `--tag alpha` keeps dry-run output aligned with the pre-alpha release intent even when npm does not apply `publishConfig.tag` in dry-run output. It is intentionally documented but not required as a remote CI gate because npm dry-run output can vary by npm version and registry context.
+Package install smoke:
+```bash
+tmp_dir="$(mktemp -d /tmp/agent-runtime-release-XXXXXX)"
+pack_info="$(npm pack --json --ignore-scripts --pack-destination "$tmp_dir")"
+package_file="$(printf '%s' "$pack_info" | node -e "const data = JSON.parse(require('node:fs').readFileSync(0, 'utf8')); process.stdout.write(data[0].filename);")"
+(
+  cd "$tmp_dir"
+  npm init -y
+  npm install "$tmp_dir/$package_file" --no-save --ignore-scripts --no-audit --no-fund
+  node -e "import('agent-cli-runtime').then((m) => { if (typeof m.createAgentRuntime !== 'function') process.exit(1); })"
+  node /path/to/typescript/bin/tsc --noEmit
+  node consumer.mjs
+  node ./node_modules/.bin/agent-runtime conformance --mode fixtures --json
+  node ./node_modules/.bin/agent-runtime conformance --mode fake --json
+  node ./node_modules/.bin/agent-runtime smoke --mode fixtures --json
+)
+```
+The checked-in automated version of this smoke is `npm run dogfood`; it creates the temporary `consumer.ts`, `consumer.mjs`, fake adapter binary, and fake CLI environment itself. `daemon:verify` and `runtime:safety` run in the single-Node CI release-gates job and in `release:candidate`, not in every Node matrix entry.
+Manual real CLI run gate, only on a machine where the selected CLI is installed, authorized, and safe to run:
+```bash
+node ./dist/cli/main.js conformance --mode real --agent all --json
+node ./dist/cli/main.js smoke --mode real --agent codex --allow-real-run --expect-text <safe_text> --json
+node ./dist/cli/main.js smoke --mode real --agent claude --allow-real-run --expect-text <safe_text> --json
+node ./dist/cli/main.js smoke --mode real --agent opencode --allow-real-run --expect-text <safe_text> --json
+```
+`--agent all` keeps one adapter's fail/skip isolated in the conformance summary. Real mode without `--allow-real-run` never launches a real agent run; it performs executable/version/auth/model/profile certification and returns `runClassification: "real_run_skipped"` when a run would require explicit authorization.
+`--allow-real-run` is the safety boundary. When it is present, the runtime may consume the selected local CLI account/network path. Without `--cwd`, conformance/smoke real runs use an isolated temporary cwd, request read-only behavior, require expected text by default, and check cwd mutation evidence. A custom `--prompt` or `--prompt-file` without `--expect-text` is intentionally `unexpected_output`.
+Conformance JSON uses `schemaVersion: "agent-runtime.conformance.v1"`. Stable summary fields per adapter:
+- `adapter`
+- `version`
+- `resolvedExecutable`
+- `auth`
+- `modelsSource`
+- `capabilities`
+- `argvProfile`
+- `promptTransport`
+- `parserMode`
+- `runClassification`
+- `expectedTextMatched`
+- `observedTextTail`
+- `cwdMutationChecked`
+- `cwdMutated`
+- `diagnosticsCount`
+- `diagnostics`
+- `skippedReason`
+- `failureReason`
+The conformance and real-smoke layers report `unsupported_flag`, unfamiliar version/help shapes, and parser/stream failures as actionable diagnostics instead of guessing replacements. Unknown or unproven flags stay in `argvProfile.needsVerification` and may classify real smoke as `needs_verification` before launch. JSON output is recursively redacted and must not contain tokens, Bearer values, auth-token environment assignments, full prompts, raw private absolute paths, raw stdout/stderr, final real-smoke run records, or unredacted observed text tails.
+## Examples And Package Boundary
+The npm package may include docs and examples, but not local reference material or test fixtures.
+Included release-candidate artifacts:
+- `dist/`
+- `README.md`
+- `README.zh-CN.md`
+- `LICENSE`
+- `docs/daemon-ready-contract.md`
+- `docs/ssot.md`
+- `docs/compatibility.md`
+- `docs/production-readiness.md`
+- `docs/release-checklist.md`
+- `docs/release-report.md`
+- `docs/release-publish-runbook.md`
+- `examples/library-run.js`
+- `examples/library-goal.js`
+- `examples/cli-dogfood.md`
+- `scripts/dogfood.mjs`
+Repository-only release verification scripts:
+- `scripts/create-release-candidate.mjs`
+- `scripts/verify-release-artifacts.mjs`
+Repository-only daemon embedding gates:
+- `scripts/verify-daemon-ready.mjs`
+- `scripts/verify-runtime-safety.mjs`
+Repository-only prepublish artifacts:
+- `scripts/check-package-boundary.mjs`
+Excluded artifacts:
+- `.reference/`
+- `tests/`
+- `tests/fixtures/`
+- fault fixtures
+- raw corrupt samples
+- `repair-backups/`
+- raw real CLI output
+- real private paths
+- real provider secrets or token-looking values
+`examples/library-run.js` and `examples/library-goal.js` create local fake CLIs and are intended to run after `npm run build`. They must not require real Codex, Claude, OpenCode, provider credentials, or user project paths.
+## Known Risks
+- Real CLI behavior can drift after this release candidate. Treat `docs/compatibility.md` as dated evidence, not a permanent guarantee.
+- P3-10 verifies one remote release-candidate run and downloaded artifact re-verification for pre-documentation SHA `fdba3ebccb2e57a0ad295101028a2a3937a92204`. Because release docs are packaged, final publish evidence must come from a fresh workflow run after this evidence packet is committed. Historical P3-9 run `27943672095` only proves target SHA `65fac505ca3eb830a06d8656068cf4ed5f6dd46a`; historical P3-9 interim run `27942743285` only proves target SHA `a0299a7d81bb614661922bebc8c75496cf0a3d11` before the strict `fixtures?` package-boundary lock; historical P3-8 run `27940814340` only proves target SHA `eb8de0f9b1edfa3f94c35a50b31005c5d3c105d4`; historical P3-5 run `27932628093` only proves workflow head SHA `8d7bc2a19c626caa1ad5223acbcd35df34aff18e`; historical P2-12 run `27869580048` only proves commit `2f8832119b4ebdb8393077052560589a398ebf56`. Internal files under `dist/` may exist in the tarball for declarations and CLI execution, but importing internal subpaths is not a documented contract.
+- `status-only real smoke exit 0`, wrong expected text, or a custom prompt without `--expect-text` remain intentionally non-passing: classification is `unexpected_output`.
+- Real conformance preflight can classify a local CLI as unavailable/auth-missing because of machine-specific executable, auth, network, or proxy state. That skip is useful compatibility evidence but is not a successful real run.
+- OpenCode explicit read-only/workspace-write flags, extra dirs, and session/resume mappings remain in `needsVerification`.
+- Claude Code authenticated run smoke remains dependent on local auth or a correctly configured Anthropic-compatible provider environment.
+- P3-6 adds opt-in real smoke evidence, but does not add authenticated real runs to CI, dogfood, prepublish, or release-candidate gates and does not implement scheduler expansion, daemon/API server, database, WAL, remote workers, web UI, telemetry, npm publish, trusted publishing configuration, provenance publishing, or guaranteed authenticated real-run success certification. Repair and fault-injection hardening remains local JSONL-only within the existing store layout.
+## Durable Supervisor Contract
+The runtime does not resume live processes after a process restart. When `storageDir` is supplied, `createAgentRuntime()` opens a local single-writer lease in `runtime.lock.json`. The owner includes `runtimeInstanceId`, `pid`, `startedAt`, and `heartbeatAt`; active run/goal manifests carry the same owner metadata. Heartbeat is enabled only for durable storage; memory-only runtimes do not create a lease.
+- a second writer runtime for the same `storageDir` is refused while the owner is live;
+- stale or closed lock owners may be taken over, with a redacted `AGENT_STORAGE_LEASE_TAKEOVER` diagnostic;
+- read-only inspection commands (`runs`, `goals`, `run-status`, `goal-status`, `replay-run`, `replay-goal`, `store-health`, `store-lock`, `diagnostics`) do not acquire the writer lease and must not mutate active work;
+- repair apply (`store-repair --apply`) requires an explicit storage directory, refuses live writer owners, holds the local store lease while writing, creates an internal `repair-backups/<timestamp>/...` backup through temp-file-and-rename, rewrites event logs through temp-file-and-rename, records redacted success or failure repair storage diagnostics, and is idempotent after successful repair;
+- repair backup failure leaves the original `events.jsonl` unchanged and does not set `applied: true`; rewrite failure preserves the backup path and leaves the original event log readable;
+- repair apply truncates partial tails and removes corrupt JSONL lines while preserving later legal replay events; terminal manifest/event mismatches remain manual-review warnings and are not auto-reconciled;
+- active runs owned by a stale/missing/closed owner become `failed` with `AGENT_RUNTIME_INTERRUPTED` and signal `RUNTIME_RESTART`;
+- active goals owned by a stale/missing/closed owner become `failed`, pending/running tasks become `canceled`, and a scheduler error is replayed;
+- active records owned by another live runtime are left untouched and reported through health/diagnostics owner status;
+- terminal events are appended once and replay remains ordered by `sequence`;
+- `shutdown()`, `cancelRun()`, and `cancelGoal()` are idempotent around terminal events;
+- `shutdown()` releases or marks the storage lease closed.
+Store health uses `schemaVersion: "agent-runtime.storeHealth.v1"` and includes `ok`, `storageDir`, `checkedAt`, `lock`, `totals`, `corruptManifests`, `corruptEventLogs`, `partialTails`, `activeRecords`, `activeInterrupted`, `warnings`, `storageDiagnostics`, and `diagnostics`. Store repair uses `schemaVersion: "agent-runtime.storeRepair.v1"`. CLI JSON usage errors use `schemaVersion: "agent-runtime.cliError.v1"` with `ok: false` and a short redacted error object.
+Diagnostics export uses `schemaVersion: "agent-runtime.diagnostics.v1"` and includes `diagnostics`, `storageDiagnostics`, `supervisorSummary`, `adapterSummary`, and goal `attemptEvidence` when present. `supervisorSummary` includes terminal reason, terminal event count, active reload recovery, owner/lease status, and task status counts for goals. It intentionally omits env values, prompts, raw corrupt lines, raw private paths, and tokens.
+## Event Schema Contract
+CLI `run --stream jsonl`, `goal --stream jsonl`, `replay-run --jsonl`, and `replay-goal --jsonl` emit the same event envelope:
+```json
+{
+  "schemaVersion": "agent-runtime.event.v1",
+  "id": 1,
+  "sequence": 1,
+  "timestamp": 1760000000000,
+  "scope": { "kind": "run", "id": "run_123" },
+  "event": { "type": "run_finished", "result": "success", "timestamp": 1760000000000 },
+  "terminal": { "result": "success", "reason": "success" }
+}
+```
+Library replay APIs remain source-compatible and return `ReplayEvent<T>`. Terminal reasons use the stable vocabulary `success`, `failed`, `timeout`, `canceled`, `interrupted`, `validation_failed`, `execution_failed`, `unavailable`, `auth_missing`, and `task_graph_invalid`.
+This lease is a same-machine best-effort guard for local embedded runtimes. It is not a daemon coordination protocol, distributed lock, WAL, group commit, database transaction layer, multi-host scheduler, or live process resume/session attachment.
+## Validation Contract
+Runtime-side validation runs after a task run succeeds and before task success is committed. Validation failure keeps task status monotonic: a task is not marked `succeeded` and then failed.
+Validation evidence records:
+- redacted command text;
+- redacted logical `cwd`;
+- timeout;
+- redacted caller env overrides only, not the inherited process env;
+- exit code, signal, stdout/stderr tails, duration, pass/fail, and classification.
+Timeout validation failures are classified as `timeout` and produce `AGENT_TIMEOUT` diagnostics. Other command failures produce `AGENT_EXECUTION_FAILED`.
+## Parser Noise Contract
+Codex, Claude, and OpenCode parsers are line-oriented JSON parsers. The contract is:
+- recognized structured events become normalized runtime events;
+- warning/log/noise lines are ignored;
+- partial JSON lines are buffered until complete;
+- corrupt lines are ignored and do not become `text_delta`;
+- unknown future structured event types are ignored.
+Non-JSON CLI noise must not become user-visible assistant text. Parser diagnostics should stay short, redacted, and aggregate-friendly when surfaced by higher layers.
+## Remaining Gap To OpenDesign Daemon-Level Production
+This repository deliberately does not include the following OpenDesign daemon layers:
+- daemon/API server lifecycle and multi-client coordination;
+- WAL, segmented logs, compaction, and transactional repair;
+- live process resume/session attachment after restart;
+- distributed execution, distributed locking, remote workers, Docker/SSH runtime, or queue leasing;
+- browser/UI surfaces, artifact viewers, project workspaces, and media pipelines;
+- telemetry pipeline, metrics database, tracing backend, or hosted analytics;
+- database-backed auth, tenancy, teams, policy management, or audit log service;
+- artifact/object model parity with OpenDesign design workspaces;
+- plugin marketplace or skill installation runtime.
+The intended production path is to harden this local adapter runtime first, then let a daemon or product shell own these larger layers explicitly.