npm - cclaw-cli - Versions diffs - 0.51.25 → 0.51.27 - Mend

cclaw-cli 0.51.25 → 0.51.27

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (23) hide show

package/dist/artifact-linter.js +574 -0
package/dist/content/core-agents.js +21 -1
package/dist/content/examples.js +9 -8
package/dist/content/harness-doc.d.ts +1 -0
package/dist/content/harness-doc.js +47 -0
package/dist/content/hooks.d.ts +1 -0
package/dist/content/hooks.js +369 -0
package/dist/content/skills.d.ts +9 -0
package/dist/content/skills.js +132 -5
package/dist/content/stages/brainstorm.js +5 -5
package/dist/content/status-command.js +8 -2
package/dist/content/subagents.js +6 -2
package/dist/content/templates.js +312 -20
package/dist/content/tree-command.js +7 -1
package/dist/delegation.d.ts +62 -4
package/dist/delegation.js +218 -16
package/dist/doctor-registry.js +9 -0
package/dist/doctor.js +75 -2
package/dist/harness-adapters.d.ts +48 -0
package/dist/harness-adapters.js +123 -4
package/dist/install.js +3 -1
package/dist/internal/advance-stage.js +68 -18
package/package.json +1 -1

package/dist/content/skills.js CHANGED Viewed

@@ -5,7 +5,120 @@ import { stageExamples } from "./examples.js";
 import { reviewStackAwareRoutes, reviewStackAwareRoutingSummary, stageAutoSubagentDispatch, stageSchema, stageTrackRenderContext } from "./stage-schema.js";
 import { conversationLanguagePolicyMarkdown } from "./language-policy.js";
 import { referencePatternsForStage } from "./reference-patterns.js";
+import { harnessDelegationRecipes } from "../harness-adapters.js";
 const VERIFICATION_STAGES = ["tdd", "review", "ship"];
+// ---------- Cross-cutting universal mechanics (Layer 2 building blocks) ----------
+//
+// These are shared, structural blocks that get injected into every stage skill.
+// They check structural shape, not domain content. Each has a matching linter
+// rule in `src/artifact-linter.ts` so artifacts can fail when shape is missing.
+export const FORBIDDEN_SYCOPHANCY_PHRASES = [
+    "you're absolutely right",
+    "great point",
+    "absolutely!",
+    "thanks for catching",
+    "thanks for the great",
+    "good catch",
+    "love this",
+    "nailed it"
+];
+export const FORBIDDEN_PLACEHOLDER_TOKENS = [
+    "TBD",
+    "TODO",
+    "FIXME",
+    "implement later",
+    "similar to Task",
+    "add appropriate error handling",
+    "add proper logging",
+    "fill this in",
+    "<placeholder>"
+];
+export const CONFIDENCE_FINDING_REGEX_SOURCE = "\\[P[123]\\]\\s*\\(confidence:\\s*\\d{1,2}/10\\)\\s+[^\\s]+(?::\\d+)?\\s+—";
+export function stopPerIssueBlock() {
+    return `## STOP-per-issue Protocol
+After each critical section (premise / alternatives / mode pick / each review finding), STOP and record one decision marker before continuing:
+- \`Q<n>:\` — issue or open question
+- \`decision:\` — \`accept\` / \`reject\` / \`defer\` / \`skip — no issues\`
+- \`rationale:\` — one line, evidence-backed
+Do not batch decisions. Do not silently move on. The artifact MUST contain at least one \`decision:\` marker per critical section.
+`;
+}
+export function confidenceCalibrationBlock() {
+    return `## Confidence Calibration
+Findings, recommendations, and review notes use the calibrated finding format:
+\`[P1|P2|P3] (confidence: <n>/10) <repo-relative-path>[:<line>] — <one-line description>\`
+- \`P1\` blocks merge; \`P2\` should be addressed; \`P3\` is nice-to-have.
+- Confidence \`< 7\` — suppress unless severity is \`P1\`.
+- "What evidence would change this?" — every finding must answer it inline or in the next bullet.
+- Never assert "this is fine" without confidence; never assert confidence above \`8\` without a cited artifact, line, or test.
+`;
+}
+export function outsideVoiceSlotBlock() {
+    return `## Outside Voice Slot (optional)
+Reserve a section titled \`## Outside Voice\` (or \`## Outside Voice — <model/critic>\`) for a second-model or fresh-context critic perspective when used. Required shape when present:
+- \`source:\` — model id, critic agent name, or human reviewer handle
+- \`prompt:\` — exact frame sent (or reference to \`docs/quality-gates.md\` recipe)
+- \`tension:\` — at least one disagreement with the main draft, or \`none — converged\`
+- \`resolution:\` — accepted / rejected / merged / deferred + one-line rationale
+Empty when not used; do not fabricate an outside voice.
+`;
+}
+export function antiSycophancyBlock() {
+    const phrases = FORBIDDEN_SYCOPHANCY_PHRASES.map((p) => `\`${p}\``).join(", ");
+    return `## Anti-sycophancy
+Forbidden response openers when receiving review, critic output, or user feedback: ${phrases}.
+Replace agreement theater with one of:
+- \`Verified — <evidence>\` (you actually checked)
+- \`Disagree — <reason>\` (you push back with substance)
+- \`Investigating — <next step>\` (you do not yet know)
+Never agree before reading the cited evidence. Never apologize for asking a clarifying question.
+`;
+}
+export function noPlaceholdersBlock() {
+    const tokens = FORBIDDEN_PLACEHOLDER_TOKENS.map((p) => `\`${p}\``).join(", ");
+    return `## NO PLACEHOLDERS Rule
+Plans, specs, designs, and review artifacts MUST NOT contain placeholder tokens: ${tokens}. Use repo-relative paths and concrete commands; if a value is genuinely unknown, write the open question explicitly with a \`Q<n>:\` marker and a \`decision: defer — <reason>\` row instead of inserting a placeholder token.
+`;
+}
+export function watchedFailProofBlock() {
+    return `## Watched-fail Proof
+Any "the failure is real" claim (failing test, broken build, regression catch, deployment fail) MUST include a watched-fail proof line in the artifact:
+\`proof: <iso-ts> | <observed snippet — first 200 chars> | source: <command or log path>\`
+For TDD specifically, this is the watched-RED proof and is required per new test before \`stage-complete\` accepts the stage.
+`;
+}
+function crossCuttingMechanicsBlock(stage) {
+    // All stages share the universal mechanics, but each stage's matching
+    // linter rules decide what is mandatory vs. structural-only.
+    const blocks = [
+        stopPerIssueBlock(),
+        confidenceCalibrationBlock(),
+        outsideVoiceSlotBlock(),
+        antiSycophancyBlock(),
+        noPlaceholdersBlock()
+    ];
+    if (stage === "tdd" || stage === "review" || stage === "ship") {
+        blocks.push(watchedFailProofBlock());
+    }
+    return blocks.join("\n");
+}
 function whenNotToUseBlock(items) {
     if (items.length === 0) {
         return "";
@@ -67,14 +180,26 @@ function autoSubagentDispatchBlock(stage, track) {
     const mandatory = schema.mandatoryDelegations;
     const mandatoryList = mandatory.length > 0 ? mandatory.map((a) => `\`${a}\``).join(", ") : "none";
     const delegationLogRel = `${RUNTIME_ROOT}/state/delegation-log.json`;
-    const artifactRef = `${RUNTIME_ROOT}/artifacts/${schema.artifactRules.artifactFile}`;
+    const delegationEventsRel = `${RUNTIME_ROOT}/state/delegation-events.jsonl`;
     return `## Automatic Subagent Dispatch
 | Agent | Mode | Class | Return Schema | User Gate | Trigger | Purpose |
 |---|---|---|---|---|---|---|
 ${rows}
-Mandatory: ${mandatoryList}. Record scheduled/completed/waived lifecycle rows in \`${delegationLogRel}\` before completion.
-### Harness Dispatch Contract
-Use true harness dispatch: Claude native Task, Cursor generic dispatch, OpenCode \`.opencode/agents/<agent>.md\`, Codex \`.codex/agents/<agent>.toml\`. Run independent read-only/review agents in parallel where safe, write evidence into \`${artifactRef}\`, then append \`${delegationLogRel}\` rows with matching \`fulfillmentMode: "isolated"\` or \`"generic-dispatch"\`. Each dispatched worker should have a scheduled row and a terminal row sharing \`spanId\`; stale scheduled spans block completion. Do not collapse OpenCode or Codex to role-switch by default; role-switch is degraded fallback and must carry non-empty \`evidenceRefs\`. Missing evidence blocks completion.
+Mandatory: ${mandatoryList}. Record lifecycle rows in \`${delegationLogRel}\` and append-only \`${delegationEventsRel}\` before completion.
+### Harness Dispatch Contract — use true harness dispatch: Claude Task, Cursor generic dispatch, OpenCode \`.opencode/agents/<agent>.md\` via Task/@agent, Codex \`.codex/agents/<agent>.toml\`. Do not collapse OpenCode or Codex to role-switch by default. Worker ACK Contract: ACK must include \`spanId\`, \`dispatchId\`, \`dispatchSurface\`, \`agentDefinitionPath\`, and \`ackTs\`; never claim \`fulfillmentMode: "isolated"\` without matching lifecycle proof. Helper: \`.cclaw/hooks/delegation-record.mjs --status=<status> --span-id=<spanId> --dispatch-id=<dispatchId> --dispatch-surface=<surface> --agent-definition-path=<path> --json\`. Exact recipe: scheduled -> launched -> acknowledged -> completed with the same span; completed isolated/generic rows require a prior ACK event for that span or \`--ack-ts=<iso>\`.
+${perHarnessLifecycleRecipeBlock()}`;
+}
+function perHarnessLifecycleRecipeBlock() {
+    const recipes = harnessDelegationRecipes();
+    const rows = recipes
+        .map((recipe) => `| \`${recipe.harnessId}\` | \`${recipe.dispatchSurface}\` | \`${recipe.agentDefinitionExample}\` | \`${recipe.fulfillmentMode}\` |`)
+        .join("\n");
+    return `### Per-Harness Lifecycle Recipe — placeholders only
+Reuse the same \`<span-id>\` and \`<dispatch-id>\` across scheduled -> launched -> acknowledged -> completed; substitute neutral tokens \`<agent-name>\`, \`<stage>\`, \`<iso-ts>\`, \`<artifact-anchor>\`. Full command sequences live in \`docs/harnesses.md\`.
+| Harness | Dispatch surface | Agent definition path | fulfillmentMode |
+|---|---|---|---|
+${rows}
 `;
 }
 function researchPlaybooksBlock(playbooks) {
@@ -248,8 +373,9 @@ function completionParametersBlock(schema, track) {
 - \`completion helper\`: \`node .cclaw/hooks/stage-complete.mjs ${schema.stage}\`
 - \`completion helper with evidence\`: \`node .cclaw/hooks/stage-complete.mjs ${schema.stage} --evidence-json '{"<gate_id>":"<evidence note>"}' --passed=<gate_id>[,<gate_id>]\`
 - \`completion helper JSON diagnostics\`: append \`--json\` to receive a machine-readable validation failure summary.
+- \`delegation record helper\`: \`node .cclaw/hooks/delegation-record.mjs --stage=${schema.stage} --agent=<agent> --mode=<mandatory|proactive> --status=<scheduled|launched|acknowledged|completed|failed|waived|stale> --span-id=<spanId> --dispatch-id=<dispatchId> --dispatch-surface=<surface> --agent-definition-path=<path> --json\`. \`delegation helper recipe\`: call \`--status=scheduled\`, then \`--status=launched\`, then \`--status=acknowledged\`, then \`--status=completed\` with the same \`--span-id\`, \`--dispatch-id\`, \`--dispatch-surface\`, and \`--agent-definition-path\`; completed isolated/generic rows fail unless that same span already has an acknowledged event or the completed call includes \`--ack-ts=<iso>\`. For role-switch fallback, use \`--dispatch-surface=role-switch --evidence-ref=<artifact#anchor>\` instead of pretending isolated completion.
 - Fill \`## Learnings\` before closeout: either \`- None this stage.\` or JSON bullets with required keys \`type\`, \`trigger\`, \`action\`, \`confidence\` (knowledge-schema compatible).
-- Record mandatory delegation completion/waiver in \`${RUNTIME_ROOT}/state/delegation-log.json\` with rationale as needed.${mandatoryAgents.length > 0 ? ` If a mandatory delegation cannot run in this harness, use \`--waive-delegation=${mandatoryAgents.join(",")} --waiver-reason="<why safe>"\` on the completion helper.` : ""}
+- Record mandatory delegation lifecycle in \`${RUNTIME_ROOT}/state/delegation-log.json\` and append proof events to \`${RUNTIME_ROOT}/state/delegation-events.jsonl\`; the ledger is current state, the event log is audit proof.${mandatoryAgents.length > 0 ? ` If a mandatory delegation cannot run in this harness, use \`--waive-delegation=${mandatoryAgents.join(",")} --waiver-reason="<why safe>"\` on the completion helper.` : ""}
 - Never edit raw \`flow-state.json\` to complete a stage, even in advisory mode; that bypasses validation, gate evidence, and Learnings harvest. If the helper fails, stop and report the exact command/output instead of applying a manual state workaround.
 - Completion protocol: verify required gates, update the artifact, then use the completion helper with \`--evidence-json\` and \`--passed\` for every satisfied gate.
 `;
@@ -437,6 +563,7 @@ ${interactionFocus.length > 0 ? interactionFocus.map((item, i) => `${i + 1}. ${i
 Decision protocol: ask only decision-changing questions, record the chosen option, rationale, risk, and rollback when the stage makes a non-trivial call.
 ${batchExecutionModeBlock(stage, track)}
+${crossCuttingMechanicsBlock(stage)}
 ## Required Gates
 ${gateList}

package/dist/content/stages/brainstorm.js CHANGED Viewed

@@ -11,7 +11,7 @@ export const BRAINSTORM = {
     philosophy: {
         hardGate: "Do NOT invoke implementation skills, write code, scaffold projects, or mutate product behavior until a concrete direction is approved by the user.",
         ironLaw: "NO ARTIFACT IS COMPLETE WITHOUT AN EXPLICITLY APPROVED DIRECTION — SILENCE IS NOT APPROVAL.",
-        purpose: "Turn an initial idea into an approved problem frame and direction, using product or technical-maintenance discovery before proposing solutions.",
+        purpose: "Turn an initial idea into an approved problem frame and direction, using domain-neutral problem discovery (product, technical-maintenance, research, ops, or infrastructure framing) before proposing solutions.",
         whenToUse: [
             "Starting a new feature or behavior change",
             "Requirements are ambiguous or trade-offs are unclear",
@@ -37,8 +37,8 @@ export const BRAINSTORM = {
     executionModel: {
         checklist: [
             "**Explore project context** — inspect existing files/docs/recent activity before asking what to build; capture matching files/patterns/seeds in `Context > Discovered context` so downstream stages don't redo discovery.",
-            "**Classify stage depth** — choose `lite` for clear low-risk tasks, `standard` for normal product/engineering changes, or `deep` for ambiguity, architecture, external dependency, security/data risk, or explicit think-bigger requests.",
-            "**Write the Problem Decision Record** — product work captures persona/JTBD/pain/value/evidence/success/why-now/do-nothing/non-goals; technical-maintenance work captures affected operator/developer, failure mode, operational improvement, verification signal, do-nothing cost, and non-goals.",
+            "**Classify stage depth** — choose `lite` for clear low-risk tasks, `standard` for normal engineering/product changes, or `deep` for ambiguity, architecture, external dependency, security/data risk, or explicit think-bigger requests.",
+            "**Write the Problem Decision Record** — pick a free-form `Frame type` label that names how this work is framed (examples: product, technical-maintenance, research-spike, ops-incident, infrastructure), then fill the universal Framing fields: affected user/role/operator, current state/failure mode/opportunity, desired observable outcome, evidence/signal, why now, do-nothing consequence, and non-goals.",
             "**Premise check (one pass)** — answer the three gstack-style questions in the artifact body: *Right problem? Direct path? What if we do nothing?* Take a position; do not hedge.",
             "**Reframe with How Might We** — write a single `How Might We …?` line that names the user/operator, the desired outcome, and the constraint. This is the altitude check before approaches.",
             "**Run Clarity Gate** — record ambiguity score (0.00-1.00), decision boundaries, reaffirmed non-goals, and residual-risk handoff before locking recommendations. If ambiguity remains high (>0.40), ask one decision-changing question before recommending.",
@@ -81,7 +81,7 @@ export const BRAINSTORM = {
         requiredEvidence: [
             "Artifact written to `.cclaw/artifacts/01-brainstorm-<slug>.md`.",
             "Project context was explored (files, docs, or recent activity referenced).",
-            "Problem Decision Record includes product framing or technical-maintenance framing.",
+            "Problem Decision Record includes a `Frame type` label and the universal Framing fields (affected user/role/operator, current state/failure mode/opportunity, desired observable outcome, evidence/signal, why now, do-nothing consequence, non-goals).",
             "Clarity Gate records ambiguity score, decision boundaries, reaffirmed non-goals, and residual-risk handoff.",
             "Clarifying questions are one-at-a-time and captured only when they change a decision or stop condition.",
             "2-3 approaches with trade-offs are recorded, including one higher-upside challenger option and reference-pattern source/disposition when applicable.",
@@ -130,7 +130,7 @@ export const BRAINSTORM = {
         },
         artifactValidation: [
             { section: "Context", required: true, validationRule: "Must reference project state and relevant existing code or patterns. A `Discovered context` subsection (or list) is recommended for downstream traceability." },
-            { section: "Problem Decision Record", required: true, validationRule: "Must include either product framing fields (persona/JTBD/pain/value/evidence/success/why-now/do-nothing/non-goals) or technical-maintenance fields (operator/developer, failure mode, operational improvement, verification signal, do-nothing cost, non-goals)." },
+            { section: "Problem Decision Record", required: true, validationRule: "Must include a free-form `Frame type` label (examples only: product, technical-maintenance, research-spike, ops-incident, infrastructure) and the universal Framing fields: affected user/role/operator, current state/failure mode/opportunity, desired observable outcome, evidence/signal, why now, do-nothing consequence, non-goals. The linter checks that the section has meaningful content; the field labels themselves are the structural contract." },
             { section: "Premise Check", required: false, validationRule: "Recommended: explicit answers to `Right problem?`, `Direct path?`, `What if we do nothing?` — take a position, do not hedge." },
             { section: "How Might We", required: false, validationRule: "Recommended: a single `How Might We …?` line naming the user, the outcome, and the binding constraint." },
             { section: "Clarity Gate", required: false, validationRule: "Recommended before recommendation lock: include ambiguity score (0.00-1.00), decision boundaries, reaffirmed non-goals, and residual-risk handoff for scope." },

package/dist/content/status-command.js CHANGED Viewed

@@ -5,6 +5,12 @@ function flowStatePath() {
 function delegationLogPath() {
     return `${RUNTIME_ROOT}/state/delegation-log.json`;
 }
+function delegationEventsPath() {
+    return `${RUNTIME_ROOT}/state/delegation-events.jsonl`;
+}
+function subagentsPath() {
+    return `${RUNTIME_ROOT}/state/subagents.json`;
+}
 function knowledgePath() {
     return `${RUNTIME_ROOT}/knowledge.jsonl`;
 }
@@ -27,7 +33,7 @@ advancing or mutating anything. Safe to run at any point. The snapshot reflects:
 - progress across stages with per-stage markers,
 - gate coverage,
 - mandatory delegations with **fulfillmentMode** (isolated / generic-dispatch /
-  role-switch) plus explicit waived status and evidence gate,
+  role-switch), dispatch proof fields, explicit waived status, and evidence gate,
 - **closeout substate** after ship (retro → compound → archive),
 - **harness parity row** (tier + fallback) for the active harness set.
@@ -85,7 +91,7 @@ a read-only command.
   \`Current\`, \`Stage\`, \`Gates\`, \`Delegations\`, \`Blocked by\`, \`Next\`, \`Evidence needed\`.
 - When blocked, include a plain-English action block:
   \`Current: <stage or closeout substate>\`; \`Blocked by: <gate/delegation/blocker code>\`; \`Next: <exact command or managed remediation>\`; \`Evidence needed: <artifact/test/review/delegation evidence>\`.
-- Report counts, not full artifact contents.
+- Report counts, not full artifact contents. Include active subagent count from \`${subagentsPath()}\` and proof gaps from \`${delegationEventsPath()}\` when present.
 - If any data source is missing or corrupt, say so explicitly rather than guessing.
 - Include \`/cc-view tree\` for deep structure and \`/cc-view diff\` for before/after map in the final line.

package/dist/content/subagents.js CHANGED Viewed

@@ -175,6 +175,10 @@ Borrow the good part of Team/Ruflo-style orchestration without adding a swarm ru
 - **Checkpoint before synthesis.** Each agent returns status, files inspected/changed, evidence, and blockers before the parent acts.
 - **Consensus is for hard calls only.** Use two reviewers when severity or architecture is disputed; otherwise one evidence-backed reviewer is enough.
+## Parallelization Decision Gate
+Before parallel dispatch, answer yes to all gates: tasks are independent, write sets do not overlap, outputs can be reconciled by evidence, and failure in one lane will not invalidate hidden assumptions in another. If any answer is no, serialize. Coder/overseer work is contract-first: the coder implements only the pasted contract, the overseer reads code and verifies acceptance evidence before the controller marks work complete.
 ## When to Use
 - Mid/large plans with multiple discrete tasks, dependencies, or risky overlap.
@@ -1013,9 +1017,9 @@ Two patterns (skills under \`.cclaw/skills/\`):
 - **SDD** (subagent-driven-development): sequential implementer→reviewer loops. Paste self-contained task text; never point subagents at plan files.
 - **Parallel Agents** (dispatching-parallel-agents): parallel review/analysis lenses. Never parallelize implementers on same codebase.
-Status contract: DONE | DONE_WITH_CONCERNS | NEEDS_CONTEXT | BLOCKED. Worker returns must use the strict JSON schemas in \`subagent-driven-development\`.
+Status contract: ACK first, then DONE | DONE_WITH_CONCERNS | NEEDS_CONTEXT | BLOCKED. Worker returns must use the strict JSON schemas in \`subagent-driven-development\` and include matching spanId+dispatchId proof.
-- Controller sequentially dispatches **implementer → reviewer** loops per task.
+- Controller sequentially dispatches **implementer → reviewer** loops per task and records lifecycle events in \`.cclaw/state/delegation-events.jsonl\`.
 - HARD-GATE: paste **self-contained task text**; never point subagents at plan files to “discover” scope.
 - **Review fixers** are **fresh agents** after failed review passes — avoids parent-context pollution.
 - **Machine-only flow checks auto-dispatch** by stage (design/plan/tdd/review/ship) without asking the user to trigger each specialist manually.