npm - workflow-supervisor - Versions diffs - 0.1.3 → 0.2.0 - Mend

workflow-supervisor 0.1.3 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (20) hide show

package/CHANGELOG.md +139 -0
package/README.md +125 -28
package/bin/workflow-skills.mjs +201 -1
package/docs/artifacts.md +9 -0
package/docs/cli.md +3 -1
package/docs/portable-delegation.md +19 -1
package/docs/skill-reference.md +12 -2
package/docs/troubleshooting.md +34 -0
package/package.json +8 -2
package/schemas/dossier-v1.schema.json +38 -0
package/schemas/worker-report-v1.schema.json +120 -12
package/skills/acceptance-matrix/SKILL.md +114 -2
package/skills/acceptance-matrix/agents/openai.yaml +1 -1
package/skills/dossier-builder/SKILL.md +28 -0
package/skills/loop-policy/SKILL.md +29 -6
package/skills/work-unit/SKILL.md +46 -6
package/skills/workflow-docs/SKILL.md +2 -1
package/skills/workflow-docs/references/workflow-control.md +93 -6
package/skills/workflow-supervisor/SKILL.md +195 -46
package/skills/workflow-supervisor/agents/openai.yaml +2 -2

package/bin/workflow-skills.mjs CHANGED Viewed

@@ -19,6 +19,26 @@ const AGENTS = new Set([...INSTALLABLE_AGENTS, "generic"]);
 const DELEGATE_AGENTS = new Set(["codex", "claude-code"]);
 const WORKER_ROLES = new Set(["implementer", "verifier", "repair", "documenter"]);
 const REPORT_STATUSES = new Set(["PASS", "FAIL", "BLOCKED"]);
+const OUTCOME_VERDICTS = new Set(["PASS", "FAIL", "BLOCKED", "CONDITIONAL_PASS"]);
+const VERIFICATION_CAPABILITIES = new Set([
+  "static_diff_inspection",
+  "diff_inspection",
+  "shell_command",
+  "unit_test",
+  "integration_test",
+  "contract_test",
+  "data_contract_test",
+  "jsdom_render",
+  "api_probe",
+  "file_snapshot",
+  "generated_html_snapshot",
+  "component_tree_snapshot",
+  "accessibility_tree_snapshot",
+  "state_machine_test",
+  "browser_snapshot",
+  "human_required",
+  "manual_review",
+]);
 const WORKFLOW_STATE_IGNORE_ENTRY = ".workflow/";
 function usage() {
@@ -483,6 +503,7 @@ function parseSimpleYaml(text) {
     }
     const items = [];
+    const object = {};
     for (i += 1; i < lines.length; i += 1) {
       const next = lines[i];
       if (!next.trim() || next.trimStart().startsWith("#")) continue;
@@ -492,8 +513,10 @@ function parseSimpleYaml(text) {
       }
       const item = next.match(/^\s*-\s*(.*)$/);
       if (item) items.push(unquoteScalar(item[1]));
+      const property = next.match(/^\s+([A-Za-z_][A-Za-z0-9_-]*):(?:\s*(.*))?$/);
+      if (property) object[property[1]] = parseDossierScalar(property[2] || "");
     }
-    result[key] = items.length > 0 ? items : "";
+    result[key] = items.length > 0 ? items : Object.keys(object).length > 0 ? object : "";
   }
   return result;
 }
@@ -556,6 +579,14 @@ const DOSSIER_CORE_ARRAY_FIELDS = [
 ];
 const DOSSIER_EXPLICIT_ARRAY_FIELDS = ["assumptions", "open_questions"];
+const FEEDBACK_LOOP_FIELDS = [
+  "command_or_evidence",
+  "red_capable",
+  "exact_symptom_or_behavior",
+  "deterministic",
+  "expected_runtime",
+  "agent_runnable",
+];
 function isPlaceholder(value, { allowNone = false } = {}) {
   const normalized = String(value || "").trim().toLowerCase().replace(/[.!]+$/, "");
@@ -584,6 +615,61 @@ function validateConcreteArray(data, field, errors, options = {}) {
   return values;
 }
+function dossierSearchText(data) {
+  return [
+    data.workflow,
+    data.work_unit,
+    data.title,
+    data.objective,
+    ...fieldArray(data.work_points),
+    ...fieldArray(data.acceptance_matrix),
+    ...fieldArray(data.adversarial_checks),
+    ...fieldArray(data.required_commands_or_evidence),
+    ...fieldArray(data.stop_gates),
+  ]
+    .join(" ")
+    .toLowerCase();
+}
+function dossierNeedsFeedbackLoop(data) {
+  return /\b(bug|fix|regression|defect|broken|crash|error|failure|failing|risky behavior|behavior change|behaviour change|change behavior|change behaviour)\b/.test(
+    dossierSearchText(data),
+  );
+}
+function validateFeedbackLoop(data, warnings) {
+  const loop = data.feedback_loop;
+  const needsLoop = dossierNeedsFeedbackLoop(data);
+  if (!loop) {
+    if (needsLoop) {
+      warnings.push("feedback_loop is recommended for bug-fix or risky behavior-change dossiers");
+    }
+    return;
+  }
+  if (typeof loop !== "object" || Array.isArray(loop)) {
+    warnings.push("feedback_loop should be an object with command_or_evidence, red_capable, exact_symptom_or_behavior, deterministic, expected_runtime, and agent_runnable");
+    return;
+  }
+  for (const field of FEEDBACK_LOOP_FIELDS) {
+    if (isPlaceholder(loop[field])) warnings.push(`feedback_loop.${field} should be concrete`);
+  }
+  if (loop.red_capable && !["yes", "no", "not_applicable"].includes(String(loop.red_capable))) {
+    warnings.push("feedback_loop.red_capable should be yes, no, or not_applicable");
+  }
+  if (loop.deterministic && !["yes", "no"].includes(String(loop.deterministic))) {
+    warnings.push("feedback_loop.deterministic should be yes or no");
+  }
+  if (loop.agent_runnable && !["yes", "no"].includes(String(loop.agent_runnable))) {
+    warnings.push("feedback_loop.agent_runnable should be yes or no");
+  }
+  if (needsLoop && String(loop.red_capable) !== "yes") {
+    warnings.push("bug-fix or risky behavior-change dossiers should name a red-capable feedback loop or explicit waiver");
+  }
+}
 function validateDossierData(data, { role, unitId } = {}) {
   const errors = [];
   const warnings = [];
@@ -649,6 +735,8 @@ function validateDossierData(data, { role, unitId } = {}) {
     if (!/\b[A-Z]+[0-9]+\b/.test(row)) warnings.push(`acceptance_matrix[${index}] should include a stable row ID`);
   });
+  validateFeedbackLoop(data, warnings);
   const unresolved = fieldArray(data.open_questions).filter((item) => !/^(none|no open questions|empty)$/i.test(item));
   if (unresolved.length > 0) {
     errors.push("open_questions must be explicitly none before delegation; create a discovery dossier or stop as BLOCKED");
@@ -877,6 +965,8 @@ function buildWorkerPrompt({ role, unitId, dossierText }) {
         findings: [],
         blocking_question: null,
         next_action: "",
+        verification_environment: null,
+        outcome_evaluations: [],
         adapter: null,
         guard: null,
         reason: null,
@@ -1044,6 +1134,109 @@ function ensureArray(value) {
   return Array.isArray(value) ? value : [value];
 }
+function isPlainObject(value) {
+  return Boolean(value && typeof value === "object" && !Array.isArray(value));
+}
+function validateCapabilityList(value, field, errors) {
+  if (!Array.isArray(value)) {
+    errors.push(`${field} must be an array`);
+    return;
+  }
+  for (const capability of value) {
+    if (!VERIFICATION_CAPABILITIES.has(capability)) {
+      errors.push(`${field} contains unsupported capability: ${capability}`);
+    }
+  }
+}
+function validateStringArray(value, field, errors) {
+  if (!Array.isArray(value)) {
+    errors.push(`${field} must be an array`);
+    return;
+  }
+  if (value.some((item) => typeof item !== "string")) {
+    errors.push(`${field} must contain only strings`);
+  }
+}
+function validateVerificationEnvironment(environment, errors) {
+  if (environment == null) return;
+  if (!isPlainObject(environment)) {
+    errors.push("verification_environment must be an object or null");
+    return;
+  }
+  for (const field of ["shell", "filesystem", "git_diff", "browser", "playwright_mcp", "network"]) {
+    if (environment[field] != null && typeof environment[field] !== "boolean") {
+      errors.push(`verification_environment.${field} must be boolean`);
+    }
+  }
+  if (environment.capabilities != null) {
+    validateCapabilityList(environment.capabilities, "verification_environment.capabilities", errors);
+  }
+  if (environment.limitations != null) {
+    validateStringArray(environment.limitations, "verification_environment.limitations", errors);
+  }
+}
+function validateOutcomeEvaluations(report, errors) {
+  const rows = report?.outcome_evaluations;
+  if (!Array.isArray(rows)) {
+    errors.push("outcome_evaluations must be an array");
+    return;
+  }
+  if (report.status === "PASS" && rows.some((row) => row?.verdict !== "PASS")) {
+    errors.push("top-level PASS requires every outcome_evaluations row verdict to be PASS");
+  }
+  rows.forEach((row, index) => {
+    const prefix = `outcome_evaluations[${index}]`;
+    if (!isPlainObject(row)) {
+      errors.push(`${prefix} must be an object`);
+      return;
+    }
+    for (const field of ["id", "source_requirement", "expected_outcome", "verdict"]) {
+      if (typeof row[field] !== "string" || row[field].trim() === "") {
+        errors.push(`${prefix}.${field} must be a non-empty string`);
+      }
+    }
+    if (!OUTCOME_VERDICTS.has(row.verdict)) {
+      errors.push(`${prefix}.verdict must be PASS, FAIL, BLOCKED, or CONDITIONAL_PASS`);
+    }
+    validateCapabilityList(row.preferred_verification, `${prefix}.preferred_verification`, errors);
+    validateCapabilityList(row.available_verification, `${prefix}.available_verification`, errors);
+    if (!isPlainObject(row.evidence_strength)) {
+      errors.push(`${prefix}.evidence_strength must be an object`);
+    } else {
+      validateCapabilityList(row.evidence_strength.strongest_possible, `${prefix}.evidence_strength.strongest_possible`, errors);
+      validateCapabilityList(row.evidence_strength.strongest_available, `${prefix}.evidence_strength.strongest_available`, errors);
+      if (row.evidence_strength.limitation != null && typeof row.evidence_strength.limitation !== "string") {
+        errors.push(`${prefix}.evidence_strength.limitation must be a string`);
+      }
+    }
+    if (!Array.isArray(row.evidence)) errors.push(`${prefix}.evidence must be an array`);
+    validateStringArray(row.invalid_pass_conditions, `${prefix}.invalid_pass_conditions`, errors);
+    if (row.verdict === "PASS" && Array.isArray(row.evidence) && row.evidence.length === 0) {
+      errors.push(`${prefix}.PASS requires row evidence`);
+    }
+    if (row.verdict === "CONDITIONAL_PASS") {
+      const hasLimitation = typeof row.limitation === "string" && row.limitation.trim() !== "";
+      const hasCapabilityLimitation = Array.isArray(row.capability_limitations) && row.capability_limitations.length > 0;
+      if (!hasLimitation && !hasCapabilityLimitation) {
+        errors.push(`${prefix}.CONDITIONAL_PASS requires limitation or capability_limitations`);
+      }
+    }
+    if (row.capability_limitations != null) {
+      validateStringArray(row.capability_limitations, `${prefix}.capability_limitations`, errors);
+    }
+    if (row.required_external_check != null) {
+      validateStringArray(row.required_external_check, `${prefix}.required_external_check`, errors);
+    }
+    if (row.finding != null && typeof row.finding !== "string") {
+      errors.push(`${prefix}.finding must be a string`);
+    }
+  });
+}
 function reportAdapterMeta(adapter, result = {}) {
   return {
     agent: adapter?.agent || null,
@@ -1069,6 +1262,8 @@ function blockedReport({ role, unitId, reason, summary, adapter, guard, stdout,
     findings: reason ? [{ id: reason, severity: "blocking", summary }] : [],
     blocking_question: null,
     next_action: "supervisor_review",
+    verification_environment: null,
+    outcome_evaluations: [],
     adapter: adapter || null,
     guard: guard || { allowed_surface_violations: [], role_violations: [], warnings: [] },
     reason,
@@ -1092,8 +1287,11 @@ function normalizeReport(report, { role, unitId, adapter, guard }) {
     findings: ensureArray(report.findings),
     blocking_question: report.blocking_question ?? null,
     next_action: report.next_action || "",
+    verification_environment: report.verification_environment ?? null,
+    outcome_evaluations: ensureArray(report.outcome_evaluations),
     adapter,
     guard,
+    reason: report.reason ?? null,
   };
 }
@@ -1112,6 +1310,8 @@ function validateWorkerReport(report, { role, unitId }) {
     errors.push("blocking_question requires BLOCKED status");
   }
   if (role === "verifier" && report?.changed_surfaces?.length > 0) errors.push("verifier must not report changed surfaces");
+  validateVerificationEnvironment(report?.verification_environment, errors);
+  validateOutcomeEvaluations(report, errors);
   return errors;
 }

package/docs/artifacts.md CHANGED Viewed

@@ -8,6 +8,7 @@ In Git-backed codebases, `.workflow/` is local working state. Ensure `<workspace
 ## Workflow Control
+- `.workflow/LEDGER.md`
 - `.workflow/WORKFLOW.md`
 - `.workflow/SOURCE-CORPUS.md`
 - `.workflow/WORK-UNITS.md`
@@ -40,3 +41,11 @@ In Git-backed codebases, `.workflow/` is local working state. Ensure `<workspace
 ## State Medium
 Markdown is the default, but state may also be an inline brief, spreadsheet tab, ticket set, design annotation, CRM note, runbook, decision log, slide appendix, whiteboard note, or chat continuation note.
+For `lean_work_unit_runner`, prefer one compact ledger over multiple workflow documents. Each executable row should carry `id`, `source_ref`, `scope`, `done`, `check`, `status`, touched surfaces, and blockers. Escalated units may link to strict-mode SPEC, dossier, or verification artifacts only when needed.
+For product or integration implementation, `WORK-UNITS.md` and lean ledger rows should also carry `slice_type`, `observable_behavior`, `expected_outcome`, `demo_or_verification`, `layers_touched`, and `horizontal_slice_justification` where useful. Prefer `tracer_bullet` units for behavior work. Use horizontal slices only for prefactoring, migration safety, infrastructure, documentation, research, or risk-boundary work with a concrete justification.
+For outcome-bearing verification, `ACCEPTANCE-MATRIX.md` and `VERIFICATION-REPORT.md` should include a verification environment, outcome evaluation rows, preferred and available verification capabilities, evidence strength, invalid PASS conditions, and any required external checks. Row-level `CONDITIONAL_PASS` means strongly inferred but not fully observable; it must not be treated as final green status without explicit waiver evidence.
+For native thread or subagent delegation, `WORKER-MAP.md` must record the native resource id, terminal report, close action, and close result. Do not mark a native worker closed until the resource close is recorded.

package/docs/cli.md CHANGED Viewed

@@ -122,10 +122,12 @@ Options:
 ### `delegate`
-Run one role-scoped worker through an installed Codex or Claude Code CLI and print exactly one normalized `WorkerReportV1` JSON object. Missing or invalid `DossierV1` contracts, missing CLIs, invalid worker output, timeouts, non-zero PASS results, PASS without evidence, forbidden-surface changes, and verifier mutations become `BLOCKED` reports instead of unstructured prose.
+Run one role-scoped worker through an installed Codex or Claude Code CLI and print exactly one normalized `WorkerReportV1` JSON object. Missing or invalid `DossierV1` contracts, missing CLIs, invalid worker output, timeouts, non-zero PASS results, PASS without evidence, top-level `CONDITIONAL_PASS`, PASS with conditional outcome rows, forbidden-surface changes, and verifier mutations become `BLOCKED` reports instead of unstructured prose.
 The report schema lives at `schemas/worker-report-v1.schema.json`. The Codex adapter passes it via `--output-schema`; the Claude Code adapter passes it via `--json-schema`; every adapter is still wrapper-validated after the run.
+`WorkerReportV1.status` remains `PASS`, `FAIL`, or `BLOCKED`. Outcome-bearing verifier reports may include `verification_environment` and `outcome_evaluations`; `CONDITIONAL_PASS` is allowed only as an outcome row verdict to record strongly inferred but not fully observable behavior.
 `--dossier` is a hard preflight gate. It must parse as `DossierV1` and pass concrete-field checks before the worker process starts. The delegate command uses `allowed_surfaces` and `forbidden_surfaces` from the dossier as surface guards unless explicit CLI surface flags are provided.
 ```bash

package/docs/portable-delegation.md CHANGED Viewed

@@ -18,6 +18,10 @@ complete intake
 -> final supervisor report
 ```
+This document describes strict or explicitly delegated execution. `lean_work_unit_runner` normally stays in same-session phased execution with a compact ledger and targeted checks. It should enter portable delegation only when the user authorizes workers for a batch or a unit hits a strict-mode escalation trigger.
+Prefer portable delegation over native threads or subagents when it satisfies the work. Portable delegation is one-shot, so the worker process exits after the report. Native thread or subagent transports are allowed only when the supervisor can record the native resource id and call the matching close operation after terminal report, timeout, blocker, cancellation, or invalid output.
 The supervisor remains the only coordinator. Workers do not ask the human questions, choose final disposition, expand scope, approve plans, or talk to each other. If a worker needs a decision, it returns `BLOCKED` with a `blocking_question`; only the supervisor asks the user.
 ## Non-Goals
@@ -76,6 +80,17 @@ Every adapter must normalize into this shape:
   "findings": [],
   "blocking_question": null,
   "next_action": "",
+  "verification_environment": {
+    "shell": true,
+    "filesystem": true,
+    "git_diff": true,
+    "browser": false,
+    "playwright_mcp": false,
+    "network": false,
+    "capabilities": ["shell_command", "api_probe", "static_diff_inspection"],
+    "limitations": []
+  },
+  "outcome_evaluations": [],
   "adapter": {
     "agent": "codex",
     "command": "codex exec",
@@ -89,7 +104,7 @@ Every adapter must normalize into this shape:
 }
 ```
-`PASS`, `FAIL`, and `BLOCKED` mean the same thing on both platforms. A worker report without evidence for material acceptance rows is invalid. Invalid output is converted into a deterministic normalized `BLOCKED` report by default. The package does not make a second live worker call to repair formatting, because a second call can mutate state, consume budget, or produce another non-portable transcript.
+`PASS`, `FAIL`, and `BLOCKED` mean the same thing on both platforms. `CONDITIONAL_PASS` is valid only as a row-level `outcome_evaluations[].verdict`, not as top-level `WorkerReportV1.status`. A worker report without evidence for material acceptance rows is invalid. A top-level PASS with failed, blocked, or conditional outcome rows is invalid. Invalid output is converted into a deterministic normalized `BLOCKED` report by default. The package does not make a second live worker call to repair formatting, because a second call can mutate state, consume budget, or produce another non-portable transcript.
 The schema is a package artifact at `schemas/worker-report-v1.schema.json`. Codex receives it through `--output-schema`; Claude Code receives it through `--json-schema`; both adapters are still wrapper-validated after the run.
@@ -159,10 +174,13 @@ For git workspaces, the surface guard compares pre/post git status. Mutable role
 | Worker hangs | Timeout returns normalized `BLOCKED` with adapter timing evidence. |
 | Worker exits non-zero but printed useful text | Do not trust it as PASS. Normalize as `BLOCKED` unless a valid report and clean guards prove otherwise. |
 | Worker returns PASS without evidence | Invalid report. Return normalized `BLOCKED` with `reason: report_validation_failed`. |
+| Worker returns top-level `CONDITIONAL_PASS` | Invalid report. Use `BLOCKED` or `FAIL` top-level status and record `CONDITIONAL_PASS` only on the affected outcome row. |
+| Worker hides conditional outcome proof inside PASS | Invalid report. Top-level PASS requires every material outcome row verdict to be PASS. |
 | Tests cannot run | Verifier returns `BLOCKED` or `PASS` only with substitute evidence accepted by the acceptance matrix. |
 | Repair expands scope | Reject unless the repair dossier explicitly allowed the new surfaces and criteria. |
 | Units touch same surfaces | Run sequentially. Parallel delegation requires proven disjoint mutable surfaces. |
 | Platform has no native subagents | Fine. Each role is a fresh one-shot CLI process. |
+| Native subagent close is unavailable | Do not spawn it. Return `worker_resource_close_unavailable` and use portable delegation or same-session phased work only if intake allowed it. |
 | Platform output differs | Platform output is not the contract. `WorkerReportV1` is the only supervisor input. |
 | Platform cannot support a role safely | Adapter role is unsupported. Supervisor chooses another certified adapter or blocks. |
 | Full support is claimed but one CLI is absent | `delegate-doctor --agent all --probe --require-pass` exits nonzero and names the missing adapter. |

package/docs/skill-reference.md CHANGED Viewed

@@ -2,7 +2,17 @@
 ## `workflow-supervisor`
-Coordinate explicit supervised or agent-loop workflows. It always starts with a complete intake gate before planning, implementation, goal binding, worker delegation, or final disposition. The user must answer every intake item; the supervisor must not infer or skip steps from keywords. After complete intake, it creates a source-requirement coverage ledger and SPEC review gate before work units so controlling-source deliverables, roadmap phases, and exit criteria are either implemented, explicitly deferred, blocked, or marked non-material. In human-in-loop mode, the human can ask questions, request revisions, block, defer, or approve the SPEC before final work units. In autonomous goal mode, human clarification pauses resume from recorded workflow state after the answer updates Q&A, decisions, coverage, and affected downstream artifacts. It then selects either autonomous goal execution or human-in-the-loop execution, then orchestrates named workers from dossiers through the portable delegate command or an approved native adapter. Loading the skill itself does not spawn workers. It binds Codex goals only after complete intake and when the user or environment authorizes goal-oriented work, checks active goal state first, avoids unrelated active-goal collisions, and treats terminal blocked goals as history when resuming through workflow docs.
+Coordinate explicit supervised or agent-loop workflows with profile-based overhead. It starts by selecting `lean_work_unit_runner`, `strict_full_workflow`, or `planning_only`, then completes the intake needed for that profile before implementation, goal binding, worker delegation, or final disposition. The user must answer required intake items; the supervisor must not infer path, mode, delegation, final disposition, or boundaries from vague keywords. Lean mode is for large already-bounded work-unit backlogs: it keeps a compact ledger with unit id, source reference, scope, done signal, check, status, touched surfaces, and blockers, then executes one ready unit at a time with targeted checks and escalation gates. Strict mode creates a source-requirement coverage ledger and SPEC review gate before work units so controlling-source deliverables, roadmap phases, and exit criteria are either implemented, explicitly deferred, blocked, or marked non-material. In human-in-loop mode, the human can ask questions, request revisions, block, defer, or approve before execution. In autonomous goal mode, human clarification pauses resume from recorded workflow state after the answer updates only affected downstream artifacts. Strict mode can orchestrate named workers from dossiers through the portable delegate command or an approved native adapter. Native threads and subagents require a recorded native resource id plus a close result, such as `close_agent` for Codex subagents, before a worker is `closed`. Loading the skill itself does not spawn workers. It binds Codex goals only after complete intake and when the user or environment authorizes goal-oriented work, checks active goal state first, avoids unrelated active-goal collisions, and treats terminal blocked goals as history when resuming through workflow docs.
+Route first before profile selection. If Workflow Supervisor was not explicitly invoked and the task is a small, clear edit with obvious files and acceptance, do not use Workflow Supervisor; execute directly. If the user explicitly invokes `workflow-supervisor`, `$workflow-supervisor`, or says to use the skill, select the proportional profile instead of silently skipping the supervisor.
+| Situation | Route |
+|---|---|
+| Small, clear edit with obvious files and acceptance | Do not use Workflow Supervisor. Execute directly. |
+| Large bounded backlog with clear unit done signals | `lean_work_unit_runner`. |
+| Broad, ambiguous, source-of-truth, delegated, security-sensitive, dirty-state, release, resume, or externally published work | `strict_full_workflow`. |
+| Sequencing, risk review, or backlog shaping only | `planning_only`. |
+| Runnable uncertainty before implementation | Create a discovery or prototype unit first. |
 ## `source-corpus`
@@ -22,7 +32,7 @@ Define role contracts and solo-mode phase separation. It prevents role bleed: ve
 ## `acceptance-matrix`
-Create formal evidence-mapped acceptance rows for high-risk, supervised, ambiguous, resumable, or delegated workflows. Rows must preserve source requirement strength, including named systems, quantities, live integration language, and exit criteria; weaker proxy checks require explicit user waiver or scope narrowing.
+Create formal evidence-mapped acceptance rows for high-risk, supervised, ambiguous, resumable, or delegated workflows. Rows must preserve source requirement strength, including named systems, quantities, live integration language, and exit criteria; weaker proxy checks require explicit user waiver or scope narrowing. Outcome-bearing rows also name expected outcomes, preferred and available verification capabilities, evidence strength, invalid PASS conditions, and capability limitations. `CONDITIONAL_PASS` is row-level only and must not be treated as final green status without explicit waiver evidence.
 ## `loop-policy`

package/docs/troubleshooting.md CHANGED Viewed

@@ -4,6 +4,12 @@
 Keep `policy.allow_implicit_invocation: false`. Use explicit `$skill-name` invocation until live routing tests prove trigger precision.
+## Workflow Supervisor is used for a tiny edit
+If Workflow Supervisor was not explicitly invoked and the task has obvious files, obvious acceptance, and no hard supervisor trigger, do not invoke the skill. Execute directly and run the relevant check.
+If the user explicitly invoked `workflow-supervisor`, `$workflow-supervisor`, or said to use the skill, do not silently skip it. Select the lightest valid profile, usually `lean_work_unit_runner` for bounded unit work or `planning_only` when the user only needs sequencing, and explain that direct execution would normally fit a tiny edit.
 ## The agent cannot find the skills
 Run:
@@ -23,10 +29,38 @@ Use `.workflow/GOAL-STATE.md` or a workflow continuation document. The superviso
 Use `$workflow-docs` with a minimal artifact request. The skill must reject "create every document just in case."
+## Large backlogs run slowly or exhaust memory
+Use `lean_work_unit_runner` instead of `strict_full_workflow` when the source already contains clear work units and the user's priority is throughput. Keep one compact ledger with `id`, `source_ref`, `scope`, `done`, `check`, `status`, touched surfaces, and blockers. Run one unit at a time by default, avoid subagents unless explicitly authorized, avoid broad scans unless required for the current unit, and checkpoint by batch rather than rewriting full workflow docs after every unit.
+Do not remove work units to make the process lean. If a unit cannot name its boundary, done signal, or targeted check, mark it `blocked` or escalate that unit to strict mode.
+## Native subagents remain open after completion
+Treat this as a lifecycle bug, not a cosmetic cleanup task. A terminal report or completed notification does not close a native Codex subagent. Record every native worker id in `WORKER-MAP.md`, call the native close action such as `close_agent` after the terminal report or blocker is captured, and block the final outcome if any native worker lacks a close result. Prefer one-shot portable delegation when it satisfies the work.
+## Unsupported gauntlet summaries are used as proof
+Unsupported external gauntlet summaries are not validation evidence. Treat them as raw leads only unless they preserve per-scenario reports, commands, artifacts, and expected outcomes that another maintainer can inspect. Use repo-native tests, fixtures, `npm run validate`, and live adapter probes such as `workflow-supervisor delegate-doctor --agent all --probe --require-pass` for real confidence.
 ## Verification rubber-stamps the result
 Use `$acceptance-matrix` for formal evidence rows. A PASS requires row-by-row evidence or explicit waiver evidence.
+## Outcome evidence is only inferred
+Use row-level `CONDITIONAL_PASS` only when the strongest available checks strongly infer the expected outcome but cannot fully observe it. Record the missing capability, limitation, and required external check. Do not roll that row into a final PASS unless the user explicitly accepts the limitation as a waiver or narrowed scope.
+## Browser snapshots are unavailable
+Browser snapshots are a verifier adapter, not the core verification model. If browser, screenshot, Playwright, Storybook, visual diff, or manual-review capability is unavailable, use the strongest available lower-level observable contract such as jsdom render, API probe, state-machine test, file snapshot, route manifest, or static semantic diff inspection. If the source requirement truly depends on browser or visual proof, mark the row BLOCKED or `CONDITIONAL_PASS` with the limitation.
+## Bug fix passes with only related checks
+A related build, lint, broad test run, or inspection is not enough for a bug fix or risky behavior change unless it would catch the exact symptom. Add a red-capable feedback loop with the command, artifact, UI state, or manual check that would fail before the fix and pass after it.
+If no correct test surface exists, record an architecture or verification finding and either block the row or get explicit substitute-evidence waiver from the user. Do not hide this as a skipped check in a PASS report.
 ## A broad roadmap becomes one giant work unit
 Use the source-requirement coverage gate before work-unit finalization. Every material roadmap item, exit criterion, named integration, and numeric target should be mapped to a unit and acceptance row, explicitly deferred by the user, blocked for a decision, or marked non-material with a reason. Do not accept "future work" or residual risk notes as a substitute for work units.

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "workflow-supervisor",
-  "version": "0.1.3",
+  "version": "0.2.0",
   "description": "Portable workflow supervision skills for Codex, Claude Code, and generic agent workspaces.",
   "type": "module",
   "repository": {
@@ -19,9 +19,15 @@
     "skills",
     "adapters",
     "schemas",
-    "docs",
+    "docs/artifacts.md",
+    "docs/cli.md",
+    "docs/compatibility.md",
+    "docs/portable-delegation.md",
+    "docs/skill-reference.md",
+    "docs/troubleshooting.md",
     "assets",
     "bin",
+    "CHANGELOG.md",
     "README.md",
     "LICENSE"
   ],

package/schemas/dossier-v1.schema.json CHANGED Viewed

@@ -114,6 +114,44 @@
     "required_commands_or_evidence": {
       "$ref": "#/$defs/stringList"
     },
+    "feedback_loop": {
+      "type": "object",
+      "required": [
+        "command_or_evidence",
+        "red_capable",
+        "exact_symptom_or_behavior",
+        "deterministic",
+        "expected_runtime",
+        "agent_runnable"
+      ],
+      "additionalProperties": true,
+      "properties": {
+        "command_or_evidence": {
+          "type": "string",
+          "minLength": 1
+        },
+        "red_capable": {
+          "type": "string",
+          "enum": ["yes", "no", "not_applicable"]
+        },
+        "exact_symptom_or_behavior": {
+          "type": "string",
+          "minLength": 1
+        },
+        "deterministic": {
+          "type": "string",
+          "enum": ["yes", "no"]
+        },
+        "expected_runtime": {
+          "type": "string",
+          "minLength": 1
+        },
+        "agent_runnable": {
+          "type": "string",
+          "enum": ["yes", "no"]
+        }
+      }
+    },
     "supervisor_checkpoints": {
       "$ref": "#/$defs/stringList"
     },