npm - cclaw-cli - Versions diffs - 0.15.1 → 0.21.0 - Mend

cclaw-cli 0.15.1 → 0.21.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (42) hide show

package/dist/artifact-linter.js +154 -0
package/dist/cli.js +2 -1
package/dist/constants.d.ts +2 -2
package/dist/constants.js +4 -3
package/dist/content/compound-command.d.ts +2 -0
package/dist/content/compound-command.js +72 -0
package/dist/content/contracts.js +1 -1
package/dist/content/doctor-references.js +7 -6
package/dist/content/feature-command.js +54 -51
package/dist/content/harnesses-doc.js +5 -3
package/dist/content/hooks.js +2 -2
package/dist/content/ideate-command.d.ts +2 -0
package/dist/content/ideate-command.js +73 -0
package/dist/content/learnings.d.ts +1 -1
package/dist/content/learnings.js +22 -5
package/dist/content/meta-skill.js +6 -3
package/dist/content/next-command.js +5 -5
package/dist/content/observe.js +3 -2
package/dist/content/ops-command.js +4 -4
package/dist/content/protocols.js +27 -38
package/dist/content/retro-command.js +2 -1
package/dist/content/rewind-command.d.ts +0 -1
package/dist/content/rewind-command.js +19 -33
package/dist/content/skills.js +14 -8
package/dist/content/stage-schema.js +3 -38
package/dist/content/stages/plan.js +16 -5
package/dist/content/stages/review.js +20 -0
package/dist/content/stages/scope.js +9 -3
package/dist/content/stages/ship.js +1 -0
package/dist/content/stages/tdd.js +5 -4
package/dist/content/templates.js +105 -9
package/dist/content/utility-skills.d.ts +3 -1
package/dist/content/utility-skills.js +91 -1
package/dist/delegation.d.ts +33 -3
package/dist/delegation.js +56 -3
package/dist/doctor.js +269 -88
package/dist/feature-system.d.ts +22 -5
package/dist/feature-system.js +267 -126
package/dist/harness-adapters.js +17 -1
package/dist/install.js +10 -8
package/dist/policy.js +13 -4
package/package.json +1 -1

package/dist/content/stages/review.js CHANGED Viewed

@@ -168,6 +168,26 @@ export const REVIEW = {
             ],
             stopGate: true
         },
+        {
+            title: "Specialist Lens: Data & Migration Safety",
+            evaluationPoints: [
+                "Schema/data migrations are reversible and include backfill/rollback strategy",
+                "Idempotency expectations are explicit for retryable flows",
+                "Data-loss scenarios (truncate/overwrite/drop) are guarded by checks or dry-runs",
+                "Boundary contracts (API/schema/event payload) maintain backward compatibility or are versioned"
+            ],
+            stopGate: false
+        },
+        {
+            title: "Specialist Lens: Developer Experience",
+            evaluationPoints: [
+                "New behavior includes discoverable docs/usage notes where needed",
+                "Error messages are actionable for on-call and local debugging",
+                "Default configuration remains safe and unsurprising",
+                "Change footprint stays minimal and avoids hidden coupling"
+            ],
+            stopGate: false
+        },
         {
             title: "Meta-Review: Verify the Verification",
             evaluationPoints: [

package/dist/content/stages/scope.js CHANGED Viewed

@@ -50,6 +50,7 @@ export const SCOPE = {
         "Run mode-specific analysis that matches the selected scope mode.",
         "Walk through scope review sections one at a time.",
         "Write explicit scope contract, discretion areas, and deferred items.",
+        "Freeze non-negotiable boundaries as stable Locked Decisions (D-XX IDs).",
         "Produce scope summary plus completion dashboard (checklist findings, number of resolved decisions, unresolved items or `None`)."
     ],
     requiredGates: [
@@ -65,6 +66,7 @@ export const SCOPE = {
         "In-scope and out-of-scope lists are explicit.",
         "Discretion areas are explicit (or marked as `None`).",
         "Selected mode and rationale are documented.",
+        "Locked Decisions section lists stable D-XX IDs for non-negotiable boundaries.",
         "Premise challenge findings documented.",
         "Deferred items list with one-line rationale for each.",
         "Completion dashboard lists checklist findings, decision count, and unresolved items (or `None`)."
@@ -90,6 +92,7 @@ export const SCOPE = {
         "discretion areas recorded explicitly",
         "required gates marked satisfied",
         "deferred list recorded explicitly",
+        "locked decisions captured with stable D-XX IDs",
         "completion dashboard produced",
         "scope summary produced"
     ],
@@ -100,7 +103,8 @@ export const SCOPE = {
         "Sycophantic agreement without evidence-based pushback",
         "Hedged recommendations that avoid taking a position",
         "Batching multiple scope issues into one question",
-        "Re-arguing for smaller scope after user rejects reduction"
+        "Re-arguing for smaller scope after user rejects reduction",
+        "Using scope-reduction placeholders (`v1`, `for now`, `we can do later`) instead of explicit user-approved boundaries"
     ],
     redFlags: [
         "No selected mode in artifact",
@@ -109,9 +113,10 @@ export const SCOPE = {
         "No deferred/not-in-scope section",
         "No user approval marker",
         "Premise challenge missing or superficial",
-        "No implementation alternatives evaluated"
+        "No implementation alternatives evaluated",
+        "Missing Locked Decisions section or decisions without D-XX IDs"
     ],
-    policyNeedles: ["Scope mode", "In Scope", "Out of Scope", "Discretion Areas", "NOT in scope", "Premise Challenge"],
+    policyNeedles: ["Scope mode", "In Scope", "Out of Scope", "Discretion Areas", "NOT in scope", "Premise Challenge", "Locked Decisions"],
     artifactFile: "02-scope.md",
     next: "design",
     reviewSections: [
@@ -173,6 +178,7 @@ export const SCOPE = {
         { section: "Prime Directives", required: true, validationRule: "For each scoped capability: named failure modes, explicit error surface, four data-flow paths, interaction edge cases, observability expectations, and deferred-item handling." },
         { section: "Premise Challenge", required: true, validationRule: "Must contain explicit answers to: right problem? direct path? what if nothing?" },
         { section: "Requirements", required: true, validationRule: "Table of stable requirement IDs (R1, R2, R3…) one per row with observable outcome, priority, and source. IDs are assigned once and never renumbered across scope/design/spec/plan/review; dropped requirements stay with Priority `DROPPED`." },
+        { section: "Locked Decisions (D-XX)", required: false, validationRule: "List of stable locked decisions with IDs D-01, D-02... Each ID appears once, includes rationale, and is intended for downstream cross-stage traceability." },
         { section: "Implementation Alternatives", required: true, validationRule: "2-3 options with Name, Summary, Effort, Risk, Pros, Cons, and Reuses. Must include minimal viable and ideal architecture options." },
         { section: "Scope Mode", required: true, validationRule: "Must state selected mode and rationale with default heuristic justification." },
         { section: "Mode-Specific Analysis", required: true, validationRule: "Must document the analysis matching the selected scope mode: EXPAND (10x and delight opportunities), SELECTIVE (hold-scope baseline then cherry-picked expansions), HOLD (minimum-change-set hardening), REDUCE (ruthless cuts and follow-up split)." },

package/dist/content/stages/ship.js CHANGED Viewed

@@ -26,6 +26,7 @@ export const SHIP = {
         "Re-run tests on merged result — if merging locally, run the full test suite AFTER the merge, not just before. Post-merge failures are common.",
         "Generate release notes — summarize what changed, why, and what it affects. Reference spec criteria. Include: breaking changes, new dependencies, migration steps if any.",
         "Write rollback plan — trigger conditions (what tells you it is broken), rollback steps (exact commands/git operations), and verification (how to confirm rollback worked).",
+        "Load utility skills — `verification-before-completion` for fresh evidence and `finishing-a-development-branch` for finalization workflow.",
         "Monitoring checklist — what should be watched after deploy? Error rates, latency, key business metrics. If no monitoring exists, flag it as a risk.",
         "Select finalization mode — exactly ONE enum: (A) FINALIZE_MERGE_LOCAL, (B) FINALIZE_OPEN_PR, (C) FINALIZE_KEEP_BRANCH, (D) FINALIZE_DISCARD_BRANCH. For discard: list what will be deleted, require typed confirmation.",
         "Execute finalization — perform the selected action. For merge: verify clean merge. For PR: include structured body (summary, test plan, rollback). For discard: verify deletion.",

package/dist/content/stages/tdd.js CHANGED Viewed

@@ -22,9 +22,9 @@ export const TDD = {
     checklist: [
         "Select plan slice — pick one task from the plan. Do not batch multiple tasks.",
         "Map to acceptance criterion — identify the specific spec criterion this test proves.",
-        "RED: Write behavior-focused test — test the expected behavior, not implementation details. Tests MUST fail.",
+        "Dispatch mandatory `test-author` subagent in `TEST_RED_ONLY` mode — produce failing behavior tests and RED evidence only (no production edits).",
         "RED: Capture failure output — copy the exact failure output as RED evidence. Record in artifact.",
-        "GREEN: Minimal implementation — write the smallest code change that makes the RED tests pass. No extra features.",
+        "Dispatch `test-author` subagent in `BUILD_GREEN_REFACTOR` mode — minimal implementation + full-suite GREEN + refactor notes.",
         "GREEN: Run full suite — execute ALL tests, not just the ones you wrote. The full suite must be GREEN.",
         "GREEN: Verify no regressions — if any existing test breaks, fix the regression before proceeding.",
         "REFACTOR: Improve code quality — without changing behavior. Document what you changed and why.",
@@ -34,6 +34,7 @@ export const TDD = {
     ],
     interactionProtocol: [
         "Pick one planned slice at a time.",
+        "Controller owns orchestration; execution runs through the mandatory `test-author` delegation for RED then GREEN/REFACTOR modes.",
         "Write behavior-focused tests before changing implementation (RED).",
         "Capture and store failing output as RED evidence.",
         "Apply minimal change to satisfy RED tests (GREEN).",
@@ -44,9 +45,9 @@ export const TDD = {
     ],
     process: [
         "Select slice and map to acceptance criterion.",
-        "Write test(s) that fail for expected reason (RED).",
+        "Dispatch `test-author` in TEST_RED_ONLY mode and produce failing test(s) for expected reason (RED).",
         "Run tests and capture failure output.",
-        "Implement smallest change needed for GREEN.",
+        "Dispatch `test-author` in BUILD_GREEN_REFACTOR mode and implement smallest change needed for GREEN.",
         "Run full tests and build checks.",
         "Perform refactor pass preserving behavior.",
         "Record RED, GREEN, and REFACTOR evidence in artifact.",

package/dist/content/templates.js CHANGED Viewed

@@ -1,7 +1,16 @@
 import { COMMAND_FILE_ORDER } from "../constants.js";
 import { orderedStageSchemas } from "./stage-schema.js";
 export const ARTIFACT_TEMPLATES = {
-    "01-brainstorm.md": `# Brainstorm Artifact
+    "01-brainstorm.md": `---
+stage: brainstorm
+schema_version: 1
+version: 0.18.0
+feature: <feature-id>
+locked_decisions: []
+inputs_hash: sha256:pending
+---
+# Brainstorm Artifact
 ## Context
 - **Project state:**
@@ -37,7 +46,16 @@ export const ARTIFACT_TEMPLATES = {
 - **Assumptions:**
 - **Open questions (or "None"):**
 `,
-    "02-scope.md": `# Scope Artifact
+    "02-scope.md": `---
+stage: scope
+schema_version: 1
+version: 0.18.0
+feature: <feature-id>
+locked_decisions: []
+inputs_hash: sha256:pending
+---
+# Scope Artifact
 ## Prime Directives
 - Zero silent failures:
@@ -94,6 +112,11 @@ export const ARTIFACT_TEMPLATES = {
 > is later dropped, keep the row and mark Priority \`DROPPED\`; if a new one is
 > added mid-flow, append with the next free R-number — do NOT reuse numbers.
+## Locked Decisions (D-XX)
+| Decision ID | Decision | Why locked now | Downstream impact |
+|---|---|---|---|
+| D-01 |  |  |  |
 ## In Scope / Out of Scope
 ### In Scope
@@ -126,7 +149,16 @@ export const ARTIFACT_TEMPLATES = {
 - Deferred:
 - Explicitly excluded:
 `,
-    "03-design.md": `# Design Artifact
+    "03-design.md": `---
+stage: design
+schema_version: 1
+version: 0.18.0
+feature: <feature-id>
+locked_decisions: []
+inputs_hash: sha256:pending
+---
+# Design Artifact
 ## Codebase Investigation
 | File | Current responsibility | Patterns discovered |
@@ -210,7 +242,16 @@ export const ARTIFACT_TEMPLATES = {
 **Decisions made:** 0 | **Unresolved:** 0
 `,
-    "04-spec.md": `# Specification Artifact
+    "04-spec.md": `---
+stage: spec
+schema_version: 1
+version: 0.18.0
+feature: <feature-id>
+locked_decisions: []
+inputs_hash: sha256:pending
+---
+# Specification Artifact
 ## Acceptance Criteria
 | ID | Requirement Ref (R#) | Criterion (observable/measurable/falsifiable) | Design Decision Ref |
@@ -254,7 +295,16 @@ export const ARTIFACT_TEMPLATES = {
 - Approved by:
 - Date:
 `,
-    "05-plan.md": `# Plan Artifact
+    "05-plan.md": `---
+stage: plan
+schema_version: 1
+version: 0.18.0
+feature: <feature-id>
+locked_decisions: []
+inputs_hash: sha256:pending
+---
+# Plan Artifact
 ## Dependency Graph
 -
@@ -282,6 +332,7 @@ Execution rule: complete and verify each wave before starting the next wave.
 **Rules (apply before writing rows):**
 - Every task fits the **2-5 minute budget**. If \`[~Nm]\` is >5, split the task.
 - **No placeholders.** Forbidden tokens anywhere in this table: \`TODO\`, \`TBD\`, \`FIXME\`, \`<fill-in>\`, \`<your-*-here>\`, \`xxx\`, bare ellipsis. Every file path, test, and verification command must be copy-pasteable as written.
+- **No silent scope reduction.** Forbidden phrasing when locked decisions exist: \`v1\`, \`for now\`, \`later\`, \`temporary\`, \`placeholder\`, \`mock for now\`, \`hardcoded for now\`, \`will improve later\`.
 - If an estimate is genuinely uncertain (new library, unfamiliar subsystem), add a **spike task in wave 0** to de-risk — do NOT hide the uncertainty inside a large estimate.
 | Task ID | Description | Acceptance criterion | Verification command | Effort (S/M/L) | Minutes |
@@ -293,6 +344,11 @@ Execution rule: complete and verify each wave before starting the next wave.
 |---|---|
 | AC-1 | T-1 |
+## Locked Decision Coverage
+| Decision ID | Source section | Plan tasks implementing decision | Status |
+|---|---|---|---|
+| D-01 | 02-scope.md > Locked Decisions | T-1 | covered |
 ## Risk Assessment
 | Task/Wave | Risk | Likelihood | Impact | Mitigation |
 |---|---|---|---|---|
@@ -307,11 +363,24 @@ Execution rule: complete and verify each wave before starting the next wave.
 - Scanned tokens: \`TODO\`, \`TBD\`, \`FIXME\`, \`<fill-in>\`, \`<your-*-here>\`, \`xxx\`, bare ellipsis in task rows.
 - Hits: 0 (required for WAIT_FOR_CONFIRM to resolve).
+## No Scope Reduction Language Scan
+- Scanned phrases: \`v1\`, \`for now\`, \`later\`, \`temporary\`, \`placeholder\`, \`mock for now\`, \`hardcoded for now\`, \`will improve later\`.
+- Hits: 0 (required when Locked Decisions section is non-empty).
 ## WAIT_FOR_CONFIRM
 - Status: pending
 - Confirmed by:
 `,
-    "06-tdd.md": `# TDD Artifact
+    "06-tdd.md": `---
+stage: tdd
+schema_version: 1
+version: 0.18.0
+feature: <feature-id>
+locked_decisions: []
+inputs_hash: sha256:pending
+---
+# TDD Artifact
 ## RED Evidence
 | Slice | Test name | Command | Failure output summary |
@@ -366,7 +435,16 @@ Execution rule: complete and verify each wave before starting the next wave.
 |---|---|---|---|---|
 | S-1 |  |  |  |  |
 `,
-    "07-review.md": `# Review Artifact
+    "07-review.md": `---
+stage: review
+schema_version: 1
+version: 0.18.0
+feature: <feature-id>
+locked_decisions: []
+inputs_hash: sha256:pending
+---
+# Review Artifact
 ## Layer 1 Verdict
 | Criterion | Verdict | Evidence |
@@ -444,7 +522,16 @@ Execution rule: complete and verify each wave before starting the next wave.
   }
 }
 `,
-    "08-ship.md": `# Ship Artifact
+    "08-ship.md": `---
+stage: ship
+schema_version: 1
+version: 0.18.0
+feature: <feature-id>
+locked_decisions: []
+inputs_hash: sha256:pending
+---
+# Ship Artifact
 ## Preflight Results
 - Review verdict:
@@ -485,7 +572,16 @@ Execution rule: complete and verify each wave before starting the next wave.
 - Retro artifact path: \`.cclaw/artifacts/09-retro.md\`
 - Archive remains blocked until retro gate is complete.
 `,
-    "09-retro.md": `# Retro Artifact
+    "09-retro.md": `---
+stage: retro
+schema_version: 1
+version: 0.18.0
+feature: <feature-id>
+locked_decisions: []
+inputs_hash: sha256:pending
+---
+# Retro Artifact
 ## Run Summary
 - Flow track:

package/dist/content/utility-skills.d.ts CHANGED Viewed

@@ -10,6 +10,8 @@ export declare function ciCdSkill(): string;
 export declare function docsSkill(): string;
 export declare function executingPlansSkill(): string;
 export declare function contextEngineeringSkill(): string;
+export declare function verificationBeforeCompletionSkill(): string;
+export declare function finishingDevelopmentBranchSkill(): string;
 export declare function sourceDrivenDevelopmentSkill(): string;
 export declare function frontendAccessibilitySkill(): string;
 export declare function landscapeCheckSkill(): string;
@@ -44,5 +46,5 @@ export declare const LANGUAGE_RULE_PACK_GENERATORS: Record<string, () => string>
  * clean them up after the move to `.cclaw/rules/lang/`.
  */
 export declare const LEGACY_LANGUAGE_RULE_PACK_FOLDERS: readonly ["language-typescript", "language-python", "language-go"];
-export declare const UTILITY_SKILL_FOLDERS: readonly ["security", "debugging", "performance", "ci-cd", "docs", "executing-plans", "context-engineering", "source-driven-development", "frontend-accessibility", "landscape-check", "adversarial-review", "security-audit", "knowledge-curation", "retrospective", "document-review"];
+export declare const UTILITY_SKILL_FOLDERS: readonly ["security", "debugging", "performance", "ci-cd", "docs", "executing-plans", "verification-before-completion", "finishing-a-development-branch", "context-engineering", "source-driven-development", "frontend-accessibility", "landscape-check", "adversarial-review", "security-audit", "knowledge-curation", "retrospective", "document-review"];
 export declare const UTILITY_SKILL_MAP: Record<string, () => string>;

package/dist/content/utility-skills.js CHANGED Viewed

@@ -594,6 +594,92 @@ Modes are stored in \`.cclaw/contexts/\`:
 - Shipping decisions based on stale pre-compaction context.
 `;
 }
+export function verificationBeforeCompletionSkill() {
+    return `---
+name: verification-before-completion
+description: "Final verification discipline before stage closeout or ship. Use when preparing a completion claim."
+---
+# Verification Before Completion
+## Announce at start
+"Using verification-before-completion to validate fresh evidence before completion."
+## HARD-GATE
+Do not claim completion from memory. Every pass claim requires fresh, in-turn evidence.
+## Protocol
+1. Identify changed scope (files, modules, user-facing behaviors).
+2. Run the smallest command set that still proves the scope:
+   - tests for changed area
+   - typecheck/build/lint if the stack requires it
+3. Capture exact command + pass/fail output in the artifact.
+4. If this is a bug fix, include RED -> GREEN regression evidence.
+5. If any check fails, stop completion and return to fix loop.
+## Completion claim checklist
+- [ ] Commands were run in this turn (not reused from earlier output).
+- [ ] Output corresponds to the actual changed scope.
+- [ ] Failures (if any) are resolved or explicitly escalated.
+- [ ] Artifact includes evidence references.
+- [ ] Completion status reflects evidence (DONE / DONE_WITH_CONCERNS / BLOCKED).
+## Anti-patterns
+- "Tests passed earlier today" without rerunning.
+- Reporting only "PASS" without command context.
+- Running unrelated checks while skipping changed scope checks.
+- Marking DONE while blockers still fail.
+`;
+}
+export function finishingDevelopmentBranchSkill() {
+    return `---
+name: finishing-a-development-branch
+description: "Finalize implementation branch after review: verify, choose integration mode, execute safely, and clean up."
+---
+# Finishing a Development Branch
+## Announce at start
+"Using finishing-a-development-branch to complete this branch safely."
+## HARD-GATE
+Do not merge, open PR, or discard branch until verification and rollback notes are explicit.
+## Protocol
+1. Verify readiness:
+   - review verdict is APPROVED or APPROVED_WITH_CONCERNS
+   - verification-before-completion checklist is satisfied
+2. Choose one finalization mode:
+   - FINALIZE_MERGE_LOCAL
+   - FINALIZE_OPEN_PR
+   - FINALIZE_KEEP_BRANCH
+   - FINALIZE_DISCARD_BRANCH
+3. Execute only the chosen mode and record exact result.
+4. If merge or discard happened in a feature worktree, clean the worktree.
+5. Update ship artifact with release notes, rollback, and finalization evidence.
+## Rollback minimum
+- Trigger: what tells us release is wrong.
+- Steps: exact revert/reset/rollback commands.
+- Verification: how we confirm rollback worked.
+## Anti-patterns
+- Multiple finalization modes in one run.
+- Merge without rollback section.
+- PR without test/verification summary.
+- Discarding branch without explicit user confirmation.
+`;
+}
 export function sourceDrivenDevelopmentSkill() {
     return `---
 name: source-driven-development
@@ -1271,7 +1357,7 @@ For each lens, write either a knowledge entry **or** the explicit string
 ## Output protocol
 For every harvested insight, append one strict-schema JSON line to
-\`.cclaw/knowledge.jsonl\` (fields: \`type, trigger, action, confidence, domain, stage, created, project\`).
+\`.cclaw/knowledge.jsonl\` (fields: \`type, trigger, action, confidence, domain, stage, origin_stage, origin_feature, frequency, universality, maturity, created, first_seen_ts, last_seen_ts, project\`).
 See the \`learnings\` skill for the canonical shape. Choose \`type\`:
 - \`compound\` for process/speed accelerators.
@@ -1524,6 +1610,8 @@ export const UTILITY_SKILL_FOLDERS = [
     "ci-cd",
     "docs",
     "executing-plans",
+    "verification-before-completion",
+    "finishing-a-development-branch",
     "context-engineering",
     "source-driven-development",
     "frontend-accessibility",
@@ -1541,6 +1629,8 @@ export const UTILITY_SKILL_MAP = {
     "ci-cd": ciCdSkill,
     docs: docsSkill,
     "executing-plans": executingPlansSkill,
+    "verification-before-completion": verificationBeforeCompletionSkill,
+    "finishing-a-development-branch": finishingDevelopmentBranchSkill,
     "context-engineering": contextEngineeringSkill,
     "source-driven-development": sourceDrivenDevelopmentSkill,
     "frontend-accessibility": frontendAccessibilitySkill,

package/dist/delegation.d.ts CHANGED Viewed

@@ -1,12 +1,34 @@
 import type { FlowStage } from "./types.js";
+export type DelegationMode = "mandatory" | "proactive" | "conditional";
+export type DelegationStatus = "scheduled" | "completed" | "failed" | "waived";
+export interface DelegationTokenUsage {
+    input: number;
+    output: number;
+    model: string;
+}
 export type DelegationEntry = {
     stage: string;
     agent: string;
-    mode: "mandatory" | "proactive" | "conditional";
-    status: "scheduled" | "completed" | "failed" | "waived";
+    mode: DelegationMode;
+    status: DelegationStatus;
+    /**
+     * Span identifier for this delegation unit. Multiple status transitions for
+     * the same delegated unit should reuse the same spanId.
+     */
+    spanId?: string;
+    /** Parent span id when this delegation was spawned from another span. */
+    parentSpanId?: string;
+    /** ISO timestamp when the delegation span started. */
+    startTs?: string;
+    /** ISO timestamp when the delegation span ended (for terminal statuses). */
+    endTs?: string;
+    /**
+     * Legacy timestamp used by historical ledgers. New writers set both `ts` and
+     * `startTs` for backward compatibility.
+     */
     taskId?: string;
     waiverReason?: string;
-    ts: string;
+    ts?: string;
     /**
      * Run id the entry belongs to. Older ledgers written before 0.5.17 may omit this;
      * consumers treat missing runId as unscoped (conservatively excluded from current-run checks).
@@ -17,6 +39,14 @@ export type DelegationEntry = {
      * Recorded for audit so reviewers can see why the second pass was required.
      */
     conditionTrigger?: string;
+    /** Optional token usage captured from the delegated run. */
+    tokens?: DelegationTokenUsage;
+    /** Number of retries attempted for this span. */
+    retryCount?: number;
+    /** Optional references to evidence anchors in artifacts. */
+    evidenceRefs?: string[];
+    /** Schema version marker for span-compatible delegation logs. */
+    schemaVersion?: 1;
 };
 export type DelegationLedger = {
     runId: string;

package/dist/delegation.js CHANGED Viewed

@@ -12,6 +12,20 @@ function delegationLogPath(projectRoot) {
 function delegationLockPath(projectRoot) {
     return path.join(projectRoot, RUNTIME_ROOT, "state", ".delegation.lock");
 }
+function createSpanId() {
+    return `dspan-${Date.now().toString(36)}-${Math.random().toString(36).slice(2, 10)}`;
+}
+function isDelegationTokenUsage(value) {
+    if (!value || typeof value !== "object" || Array.isArray(value))
+        return false;
+    const o = value;
+    return (typeof o.input === "number" &&
+        Number.isFinite(o.input) &&
+        typeof o.output === "number" &&
+        Number.isFinite(o.output) &&
+        typeof o.model === "string" &&
+        o.model.trim().length > 0);
+}
 function isDelegationEntry(value) {
     if (!value || typeof value !== "object" || Array.isArray(value))
         return false;
@@ -21,15 +35,30 @@ function isDelegationEntry(value) {
         o.status === "completed" ||
         o.status === "failed" ||
         o.status === "waived";
+    const timestampOk = typeof o.ts === "string" ||
+        typeof o.startTs === "string";
+    const retryOk = o.retryCount === undefined ||
+        (typeof o.retryCount === "number" &&
+            Number.isFinite(o.retryCount) &&
+            Number.isInteger(o.retryCount) &&
+            o.retryCount >= 0);
     return (typeof o.stage === "string" &&
         typeof o.agent === "string" &&
         modeOk &&
         statusOk &&
-        typeof o.ts === "string" &&
+        timestampOk &&
+        (o.spanId === undefined || typeof o.spanId === "string") &&
+        (o.parentSpanId === undefined || typeof o.parentSpanId === "string") &&
+        (o.startTs === undefined || typeof o.startTs === "string") &&
+        (o.endTs === undefined || typeof o.endTs === "string") &&
         (o.taskId === undefined || typeof o.taskId === "string") &&
         (o.waiverReason === undefined || typeof o.waiverReason === "string") &&
         (o.runId === undefined || typeof o.runId === "string") &&
-        (o.conditionTrigger === undefined || typeof o.conditionTrigger === "string"));
+        (o.conditionTrigger === undefined || typeof o.conditionTrigger === "string") &&
+        (o.tokens === undefined || isDelegationTokenUsage(o.tokens)) &&
+        retryOk &&
+        (o.evidenceRefs === undefined || (Array.isArray(o.evidenceRefs) && o.evidenceRefs.every((item) => typeof item === "string"))) &&
+        (o.schemaVersion === undefined || o.schemaVersion === 1));
 }
 function parseLedger(raw, runId) {
     if (!raw || typeof raw !== "object" || Array.isArray(raw)) {
@@ -41,7 +70,18 @@ function parseLedger(raw, runId) {
     if (Array.isArray(entriesRaw)) {
         for (const item of entriesRaw) {
             if (isDelegationEntry(item)) {
-                entries.push(item);
+                const ts = item.startTs ?? item.ts ?? new Date().toISOString();
+                entries.push({
+                    ...item,
+                    spanId: item.spanId ?? createSpanId(),
+                    startTs: ts,
+                    ts,
+                    retryCount: typeof item.retryCount === "number" && Number.isInteger(item.retryCount) && item.retryCount >= 0
+                        ? item.retryCount
+                        : 0,
+                    evidenceRefs: Array.isArray(item.evidenceRefs) ? item.evidenceRefs : [],
+                    schemaVersion: 1
+                });
             }
         }
     }
@@ -67,7 +107,20 @@ export async function appendDelegation(projectRoot, entry) {
     await withDirectoryLock(delegationLockPath(projectRoot), async () => {
         const filePath = delegationLogPath(projectRoot);
         const prior = await readDelegationLedger(projectRoot);
+        const startTs = entry.startTs ?? entry.ts ?? new Date().toISOString();
         const stamped = { ...entry, runId: entry.runId ?? activeRunId };
+        stamped.spanId = entry.spanId ?? createSpanId();
+        stamped.startTs = startTs;
+        stamped.ts = startTs;
+        stamped.schemaVersion = 1;
+        if (stamped.retryCount === undefined ||
+            !Number.isInteger(stamped.retryCount) ||
+            stamped.retryCount < 0) {
+            stamped.retryCount = 0;
+        }
+        if (!Array.isArray(stamped.evidenceRefs)) {
+            stamped.evidenceRefs = [];
+        }
         const ledger = {
             runId: activeRunId,
             entries: [...prior.entries, stamped]