npm - theslopmachine - Versions diffs - 1.0.26-beta.0 → 1.0.26-beta.2 - Mend

theslopmachine 1.0.26-beta.0 → 1.0.26-beta.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

package/assets/agents/developer.md +1 -1
package/assets/agents/slopmachine-clarifier.md +1 -1
package/assets/agents/slopmachine-claude.md +3 -0
package/assets/agents/slopmachine.md +3 -0
package/assets/skills/final-evaluation-orchestration/SKILL.md +3 -1
package/assets/skills/verification-gates/SKILL.md +1 -1
package/assets/slopmachine/exact-readme-template.md +2 -0
package/package.json +1 -1
package/plugin/index.ts +134 -7
package/plugin/state.ts +116 -22
package/plugin/types.ts +31 -9

package/assets/agents/developer.md CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 name: developer
 description: Senior implementation agent for software projects
-model: deepseek/deepseek-v4-flash
+model: openai/gpt-5.5
 variant: high
 mode: subagent
 thinkingLevel: high

package/assets/agents/slopmachine-clarifier.md CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 name: slopmachine-clarifier
 description: Product clarification agent for SlopMachine Phase 1
-model: deepseek/deepseek-v4-flash
+model: openai/gpt-5.5
 variant: medium
 mode: subagent
 thinkingLevel: high

package/assets/agents/slopmachine-claude.md CHANGED Viewed

@@ -295,6 +295,9 @@ Use these sequential names as the canonical workflow model. Legacy `P*` names ar
 - Run two strict audit/remediation cycles using evaluator sessions and the active bugfix lane.
 - In each audit cycle, send the complete installed evaluation prompt asset through the exact saved send packet verbatim. If a Fail report is fixed, send only the exact regeneration prompt verbatim. Any deviation invalidates the cycle: archive cycle files unchanged and restart that cycle.
 - Each audit cycle must close with both a rich 150+ line `./.tmp/audit_report-<N>.md` and `./.tmp/audit_report-<N>-fix_check.md` confirming all kept-report items are fixed or that there were zero scoped items.
+- After any evaluator claims a report was written, record it with `slopmachine_state`. The tool verifies kept audit/fix-check paths exist and are non-empty; if it rejects the record, make the evaluator write the exact missing file before continuing.
+- Record evaluator attempts with prompt metadata when available: `cycle|verdict|evaluatorSessionID|reportPath|kept|archived|promptKind|preparedPacketPath`.
+- Duplicate evaluator records and legacy report paths are normalized by plugin state writes/loads. If state still appears stale, stop and report the blocker instead of editing `../.ai/slopmachine-plugin-state.json` directly.
 - Preserve reports, extract complete issue sets, and route fixes in broad human language.
 - After both audit cycles, close the bugfix lane and start a test-coverage/final-reconciliation lane.
 - Exit only when both Audit Cycle 1 and Audit Cycle 2 are complete with kept audit reports and fix-check reports, the bugfix lane is closed, and the coverage/README audit passes with at least 90% test score.

package/assets/agents/slopmachine.md CHANGED Viewed

@@ -262,6 +262,9 @@ Use these sequential names as the canonical workflow model. Legacy `P*` names ar
 - Run two strict audit/remediation cycles using evaluator sessions and the active bugfix lane.
 - In each audit cycle, send the complete installed evaluation prompt asset through the exact saved send packet verbatim. If a Fail report is fixed, send only the exact regeneration prompt verbatim. Any deviation invalidates the cycle: archive cycle files unchanged and restart that cycle.
 - Each audit cycle must close with both a rich 150+ line `./.tmp/audit_report-<N>.md` and `./.tmp/audit_report-<N>-fix_check.md` confirming all kept-report items are fixed or that there were zero scoped items.
+- After any evaluator claims a report was written, record it with `slopmachine_state`. The tool verifies kept audit/fix-check paths exist and are non-empty; if it rejects the record, make the evaluator write the exact missing file before continuing.
+- Record evaluator attempts with prompt metadata when available: `cycle|verdict|evaluatorSessionID|reportPath|kept|archived|promptKind|preparedPacketPath`.
+- Duplicate evaluator records and legacy report paths are normalized by plugin state writes/loads. If state still appears stale, stop and report the blocker instead of editing `../.ai/slopmachine-plugin-state.json` directly.
 - Preserve reports, extract complete issue sets, and route fixes in broad human language.
 - After both audit cycles, close the bugfix lane and start a test-coverage/final-reconciliation lane.
 - Exit only when both Audit Cycle 1 and Audit Cycle 2 are complete with kept audit reports and fix-check reports, the bugfix lane is closed, and the coverage/README audit passes with at least 90% test score.

package/assets/skills/final-evaluation-orchestration/SKILL.md CHANGED Viewed

@@ -135,7 +135,7 @@ Required for each cycle:
 - the full saved send packet was read before send and sent word-for-word with no owner additions, omissions, summaries, path-only substitutions, or footers;
 - every failed, superseded, or invalid attempt report was archived unchanged;
 - no failed report was regenerated in the same evaluator session;
-- every full audit attempt was recorded with `slopmachine_state { evaluationAuditAttempt: "cycle|verdict|evaluatorSessionID|reportPath|kept|archived" }`;
+- every full audit attempt was recorded with `slopmachine_state { evaluationAuditAttempt: "cycle|verdict|evaluatorSessionID|reportPath|kept|archived|promptKind|preparedPacketPath" }` when packet metadata is available, or the 6-field form when it is not;
 - the kept audit report exists at `./.tmp/audit_report-<N>.md`;
 - the kept audit report is rich and complete: at least 150 lines and not materially shallower than the installed prompt's required output structure;
 - the kept audit report includes the required verdict, scope/boundary, prompt/repository mapping, section review or blocker/high panel as applicable, issues/suggestions or explicit no-issue statement, security/data-risk review where applicable, and test/logging/coverage sections required by the installed prompt;
@@ -150,6 +150,8 @@ Required for each cycle:
 No audit cycle is complete without both `./.tmp/audit_report-<N>.md` and `./.tmp/audit_report-<N>-fix_check.md` passing this validation gate.
+`slopmachine_state` rejects kept audit and fix-check records whose report path is missing or empty. If rejected, return to the evaluator and make it write the exact file before continuing. Do not repair this by editing workflow state JSON directly; duplicate evaluator records and legacy report paths are normalized by plugin state writes/loads.
 ## Fix-Check Prompt
 Use this exact fix-check instruction after a kept Pass or Partial Pass report's scoped fix-check items have been fixed. Send it verbatim to the same evaluator session after providing concise developer fix evidence, exact verification results when available, and the exact scoped fix-check issue list from the kept `audit_report-<N>.md`:

package/assets/skills/verification-gates/SKILL.md CHANGED Viewed

@@ -224,7 +224,7 @@ Block readiness if:
 Browser verification for web/fullstack must not give up. Find a working local startup/browser path, route blockers as module/issues only, retry after fixes, and keep going until the app is actually tested unless the user explicitly risk-accepts stopping.
-Plugin readiness validation should include Docker, `runTests`, browser, API/manual, README truth, unresolved strong issue count, accepted light High records, and readiness evidence path.
+Plugin readiness validation should include Docker, `runTests`, browser, API/manual, README truth, unresolved strong issue count, accepted light High records, readiness evidence path, structured readiness evidence entries, and stack-specific environment capability notes such as `dotnet=missing` or `docker=available`.
 Pass with notes if:
 - only accepted light High or lower bounded risks remain;

package/assets/slopmachine/exact-readme-template.md CHANGED Viewed

@@ -114,6 +114,7 @@ Important:
 - The primary runtime contract is `docker compose up --build` only for container-supported projects
 - The legacy compatibility string is included for users who still use the old Compose command name
 - Do not present `docker-compose up` as the primary startup contract
+- Do not include local non-container startup commands as an alternate primary path for backend/fullstack/container-supported projects unless the task explicitly requires them
 ### Additional startup notes
 - [example: first boot may take longer while containers build]
@@ -126,6 +127,7 @@ Important:
 - If env-file shape is required at runtime, it must be generated ephemerally by the controlled startup path and never committed
 - Startup and testing must not require the reviewer to create, copy, or edit any env file
 - Do **not** use manual primary setup steps such as `npm install`, `pnpm install`, `yarn install`, `pip install`, `apt-get`, or manual database setup
+- If local tool commands are mentioned for developer context, clearly label them as non-required and not part of reviewer startup or verification
 ---

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "theslopmachine",
-  "version": "1.0.26-beta.0",
+  "version": "1.0.26-beta.2",
   "description": "SlopMachine installer and project bootstrap CLI",
   "license": "MIT",
   "type": "module",

package/plugin/index.ts CHANGED Viewed

@@ -278,9 +278,9 @@ function parseBooleanToken(value: string): boolean | null {
   return null
 }
-function parseEvaluationAuditAttempt(value: string): { cycle: number; verdict: string; evaluatorSessionID: string; reportPath: string; kept: boolean; archived: boolean; at: string } | null {
+function parseEvaluationAuditAttempt(value: string): { cycle: number; verdict: string; evaluatorSessionID: string; reportPath: string; kept: boolean; archived: boolean; at: string; promptKind?: string | null; preparedPacketPath?: string | null } | null {
   const parts = value.split("|").map((part) => part.trim())
-  if (parts.length !== 6) return null
+  if (parts.length !== 6 && parts.length !== 8) return null
   const cycle = parseNonNegativeInteger(parts[0])
   const verdict = parts[1] || ""
   const evaluatorSessionID = parts[2] || ""
@@ -288,7 +288,15 @@ function parseEvaluationAuditAttempt(value: string): { cycle: number; verdict: s
   const kept = parseBooleanToken(parts[4] || "")
   const archived = parseBooleanToken(parts[5] || "")
   if (!cycle || !verdict || !evaluatorSessionID || !reportPath || kept === null || archived === null) return null
-  return { cycle, verdict, evaluatorSessionID, reportPath, kept, archived, at: new Date().toISOString() }
+  return { cycle, verdict, evaluatorSessionID, reportPath, kept, archived, at: new Date().toISOString(), promptKind: parts[6] || null, preparedPacketPath: parts[7] || null }
+}
+function parseReadinessEvidence(value: string): { key: string; status: string; command: string; artifact: string; note: string; at: string } | null {
+  const parts = value.split("|").map((part) => part.trim())
+  if (parts.length !== 5) return null
+  const [key, status, command, artifact, note] = parts
+  if (!key || !status) return null
+  return { key, status, command: command || "", artifact: artifact || "", note: note || "", at: new Date().toISOString() }
 }
 function parseEvaluationFixCheck(value: string): { cycle: number; evaluatorSessionID: string; reportPath: string; at: string } | null {
@@ -313,6 +321,43 @@ function formatSession(record: { status: string; sessionID: string; purpose: str
   return `${record.status}: ${record.sessionID} (${record.purpose})${record.closedAt ? ` closed=${record.closedAt}` : ""}`
 }
+function recordSessionPhase(
+  state: PluginState,
+  sessionID: string,
+  lane: string,
+  purpose: string,
+  source: "tool" | "event",
+) {
+  if (!sessionID || !lane || !state.currentPhase) return
+  const now = new Date().toISOString()
+  const existing = state.phaseSessions.find((item) => item.sessionID === sessionID && item.lane === lane && item.phase === state.currentPhase)
+  if (existing) {
+    existing.lastSeenAt = now
+    if (purpose) existing.purpose = purpose
+    return
+  }
+  state.phaseSessions.push({
+    sessionID,
+    lane,
+    phase: state.currentPhase,
+    purpose,
+    source,
+    firstSeenAt: now,
+    lastSeenAt: now,
+  })
+}
+function recordActiveSessionsForPhase(state: PluginState) {
+  for (const [lane, sessionID] of Object.entries(state.activeSessions)) {
+    if (!sessionID) continue
+    const record = [...sessionList(state, lane)].reverse().find((item) => item.sessionID === sessionID)
+    recordSessionPhase(state, sessionID, lane, record?.purpose || "active", record?.source || "tool")
+    if (record && !record.phases?.includes(state.currentPhase)) {
+      record.phases = [...(record.phases || []), state.currentPhase]
+    }
+  }
+}
 function registerActiveSession(
   state: PluginState,
   lane: keyof PluginState["activeSessions"],
@@ -342,13 +387,61 @@ function registerActiveSession(
     existing.purpose = purpose || existing.purpose
     if (activate) existing.closedAt = undefined
     existing.source = existing.source || source
+    if (!existing.phases?.includes(state.currentPhase)) existing.phases = [...(existing.phases || []), state.currentPhase]
+    recordSessionPhase(state, sid, lane, existing.purpose, existing.source || source)
     return { created: false, replaced: Boolean(previousActive && previousActive !== sid) }
   }
-  list.push({ sessionID: sid, lane, purpose, status: activate ? "active" : "closed", at: now, source })
+  list.push({ sessionID: sid, lane, purpose, status: activate ? "active" : "closed", at: now, source, phases: [state.currentPhase] })
+  recordSessionPhase(state, sid, lane, purpose, source)
   return { created: true, replaced: Boolean(previousActive && previousActive !== sid) }
 }
+function upsertEvaluationAuditAttempt(state: PluginState, attempt: ReturnType<typeof parseEvaluationAuditAttempt> & {}) {
+  if (!attempt) return
+  attempt.reportPath = canonicalEvaluationReportPath(state, attempt.reportPath)
+  state.evaluation.auditAttempts = state.evaluation.auditAttempts.filter((item) => {
+    if (item.cycle !== attempt.cycle) return true
+    if (item.reportPath === attempt.reportPath && item.evaluatorSessionID === attempt.evaluatorSessionID) return false
+    if (attempt.kept && item.kept) return false
+    return true
+  })
+  state.evaluation.auditAttempts.push(attempt)
+}
+function upsertEvaluationFixCheck(state: PluginState, fixCheck: ReturnType<typeof parseEvaluationFixCheck> & {}) {
+  if (!fixCheck) return
+  fixCheck.reportPath = canonicalEvaluationReportPath(state, fixCheck.reportPath)
+  state.evaluation.fixChecks = state.evaluation.fixChecks.filter((item) => {
+    if (item.cycle !== fixCheck.cycle) return true
+    return !(item.reportPath === fixCheck.reportPath && item.evaluatorSessionID === fixCheck.evaluatorSessionID)
+  })
+  state.evaluation.fixChecks.push(fixCheck)
+}
+function canonicalEvaluationReportPath(state: PluginState, rawPath: string): string {
+  const value = rawPath.trim().replace(/\\/g, "/").replace(/^\.\//, "")
+  if (value.startsWith(`${state.evaluationReportsRoot}/`)) return value
+  if (value.startsWith("task/.tmp/")) return `${state.evaluationReportsRoot}/${value.slice("task/.tmp/".length)}`
+  if (value.startsWith(".tmp/")) return `${state.evaluationReportsRoot}/${value.slice(".tmp/".length)}`
+  return value
+}
+async function nonEmptyWorkflowFile(state: PluginState, relativePath: string): Promise<boolean> {
+  try {
+    const text = await fs.readFile(path.join(state.workflowRoot, relativePath), "utf8")
+    return text.trim().length > 0
+  } catch {
+    return false
+  }
+}
+function openBlockerSignature(state: PluginState): string | null {
+  const open = state.blockers.filter((blocker) => !blocker.resolvedAt)
+  if (open.length === 0) return null
+  return open.map((blocker) => `${blocker.phase}:${blocker.status}:${blocker.reason}`).sort().join("|")
+}
 interface ClaudeLaunchIntent {
   command: string
   lane: string
@@ -1014,6 +1107,7 @@ export default {
         }
       }
       recordActivity(sessionID)
+      recordSessionPhase(state, sessionID, "owner", "owner session", "event")
       await log("owner session tracked", { sessionID, agent, source })
       return true
     }
@@ -1085,6 +1179,12 @@ export default {
           await log("auto-continue cap reached", { sessionID, count, max: MAX_AUTO_CONTINUES_PER_SESSION, phase: state.currentPhase })
           return
         }
+        const blockerSignature = openBlockerSignature(state)
+        if (blockerSignature && state.autoContinueBlockerSignature === blockerSignature) {
+          await log("auto-continue blocked by repeated open blocker", { sessionID, phase: state.currentPhase, blockerSignature })
+          return
+        }
+        state.autoContinueBlockerSignature = blockerSignature
         state.autoContinueCounts[sessionID] = count + 1
         state.lastStateChange = new Date().toISOString()
         await persist()
@@ -1275,6 +1375,7 @@ export default {
             evaluationFixCheck: stringArg,
             evaluationUnresolvedStrongIssues: z.union([z.number(), z.string()]).optional(),
             evaluationAcceptedLightHigh: stringArg,
+            environmentCapability: stringArg,
             readinessDocker: readinessStatusArg,
             readinessRunTests: readinessStatusArg,
             readinessBrowser: readinessStatusArg,
@@ -1284,6 +1385,7 @@ export default {
             readinessAcceptedLightHigh: stringArg,
             readinessD1D9: stringArg,
             readinessEvidencePath: stringArg,
+            readinessEvidence: stringArg,
           },
           async execute(i: Record<string, unknown>, context?: OpenCodeToolContext) {
             assertSlopmachineToolContext(context, slopmachineSessionIDs)
@@ -1349,14 +1451,22 @@ export default {
             }
             if (hasTextSetterValue(i.evaluationAuditAttempt)) {
               const attempt = parseEvaluationAuditAttempt(String(i.evaluationAuditAttempt))
-              if (!attempt) return badArg("evaluationAuditAttempt must be 'cycle|verdict|evaluatorSessionID|reportPath|kept|archived'")
-              state.evaluation.auditAttempts.push(attempt)
+              if (!attempt) return badArg("evaluationAuditAttempt must be 'cycle|verdict|evaluatorSessionID|reportPath|kept|archived' or add '|promptKind|preparedPacketPath'")
+              attempt.reportPath = canonicalEvaluationReportPath(state, attempt.reportPath)
+              if (attempt.kept && !(await nonEmptyWorkflowFile(state, attempt.reportPath))) {
+                return badArg(`evaluationAuditAttempt kept report does not exist or is empty: ${attempt.reportPath}`)
+              }
+              upsertEvaluationAuditAttempt(state, attempt)
               changed = true
             }
             if (hasTextSetterValue(i.evaluationFixCheck)) {
               const fixCheck = parseEvaluationFixCheck(String(i.evaluationFixCheck))
               if (!fixCheck) return badArg("evaluationFixCheck must be 'cycle|evaluatorSessionID|reportPath'")
-              state.evaluation.fixChecks.push(fixCheck)
+              fixCheck.reportPath = canonicalEvaluationReportPath(state, fixCheck.reportPath)
+              if (!(await nonEmptyWorkflowFile(state, fixCheck.reportPath))) {
+                return badArg(`evaluationFixCheck report does not exist or is empty: ${fixCheck.reportPath}`)
+              }
+              upsertEvaluationFixCheck(state, fixCheck)
               changed = true
             }
             if (hasNumberSetterValue(i.evaluationUnresolvedStrongIssues)) {
@@ -1371,6 +1481,12 @@ export default {
               state.evaluation.acceptedLightHigh.push(risk)
               changed = true
             }
+            if (hasTextSetterValue(i.environmentCapability)) {
+              const [name, status] = String(i.environmentCapability).split("=").map((part) => part.trim())
+              if (!name || !status) return badArg("environmentCapability must look like 'dotnet=missing' or 'docker=available'")
+              state.readiness.environment[name] = status
+              changed = true
+            }
             const readinessMap = [
               ["readinessDocker", "docker"],
               ["readinessRunTests", "runTests"],
@@ -1413,6 +1529,13 @@ export default {
               state.readiness.evidencePath = String(i.readinessEvidencePath)
               changed = true
             }
+            if (hasTextSetterValue(i.readinessEvidence)) {
+              const evidence = parseReadinessEvidence(String(i.readinessEvidence))
+              if (!evidence) return badArg("readinessEvidence must be 'key|status|command|artifact|note'")
+              state.readiness.evidence = state.readiness.evidence.filter((item) => item.key !== evidence.key)
+              state.readiness.evidence.push(evidence)
+              changed = true
+            }
             if (changed) {
               state.lastStateChange = new Date().toISOString()
               await persist()
@@ -1452,6 +1575,7 @@ export default {
               internalEvaluator: state.internalEvaluator,
               evaluation: state.evaluation,
               readiness: state.readiness,
+              phaseSessions: state.phaseSessions,
               handoff: state.nextHandoff || "none",
               counters: {
                 artifacts: Object.keys(state.artifacts).length,
@@ -1509,6 +1633,7 @@ export default {
               acceptWarnings: asBool(i.acceptWarnings),
             })
             if (result.ok) {
+              recordActiveSessionsForPhase(state)
               await persist()
             }
             return JSON.stringify(result, null, 2)
@@ -1535,6 +1660,7 @@ export default {
             const result = reopenPhase(state, i.phase)
             if (result.ok) {
               state.lastStateChange = new Date().toISOString()
+              recordActiveSessionsForPhase(state)
               await persist()
             }
             return JSON.stringify(result, null, 2)
@@ -1717,6 +1843,7 @@ export default {
               evaluators: state.evaluatorSessions.map(formatSession),
               claude: state.claudeSessions.map(formatSession),
               general: state.generalSessions.map(formatSession),
+              phaseSessions: state.phaseSessions,
               primary: state.primaryDevelopSessionId || "none",
               latest: state.latestDevelopSessionId || "none",
               next: `develop #${state.nextDevelopSessionNumber}, bugfix #${state.nextBugfixSessionNumber}`,

package/plugin/state.ts CHANGED Viewed

@@ -61,31 +61,31 @@ const CONTENT_CHECKS: ContentCheck[] = [
     noPlaceholders: true,
   },
   {
-    artifact: "task/.tmp/audit_report-1.md",
+    artifact: "__evaluationReportsRoot__/audit_report-1.md",
     headings: ["Verdict", "Scope and Static Verification Boundary", "Repository / Requirement Mapping Summary", "Section-by-section Review", "Issues / Suggestions", "Security Review"],
     noPlaceholders: false,
     substanceLines: 150,
   },
   {
-    artifact: "task/.tmp/audit_report-2.md",
+    artifact: "__evaluationReportsRoot__/audit_report-2.md",
     headings: ["Verdict", "Scope and Static Verification Boundary", "Repository / Requirement Mapping Summary", "Section-by-section Review", "Issues / Suggestions", "Security Review"],
     noPlaceholders: false,
     substanceLines: 150,
   },
   {
-    artifact: "task/.tmp/audit_report-1-fix_check.md",
+    artifact: "__evaluationReportsRoot__/audit_report-1-fix_check.md",
     headings: [],
     noPlaceholders: false,
     substanceLines: 20,
   },
   {
-    artifact: "task/.tmp/audit_report-2-fix_check.md",
+    artifact: "__evaluationReportsRoot__/audit_report-2-fix_check.md",
     headings: [],
     noPlaceholders: false,
     substanceLines: 20,
   },
   {
-    artifact: "task/.tmp/test_coverage_and_readme_audit_report.md",
+    artifact: "__evaluationReportsRoot__/test_coverage_and_readme_audit_report.md",
     headings: [],
     noPlaceholders: false,
     substanceLines: 15,
@@ -135,6 +135,7 @@ function emptyState(workflowRoot: string): PluginState {
     evaluatorSessions: [],
     claudeSessions: [],
     generalSessions: [],
+    phaseSessions: [],
     primaryDevelopSessionId: null,
     latestDevelopSessionId: null,
     nextDevelopSessionNumber: 1,
@@ -151,6 +152,7 @@ function emptyState(workflowRoot: string): PluginState {
     interruptions: [],
     ownerAbortRequestedAt: null,
     autoContinueCounts: {},
+    autoContinueBlockerSignature: null,
     internalEvaluator: emptyInternalEvaluatorState(),
     evaluation: emptyEvaluationState(),
     readiness: emptyReadinessState(),
@@ -243,6 +245,21 @@ function materializeTaskPath(state: Pick<PluginState, "workflowRoot" | "taskRoot
   return relativePath
 }
+function materializeWorkflowPath(state: Pick<PluginState, "workflowRoot" | "taskRoot" | "evaluationReportsRoot">, relativePath: string): string {
+  if (relativePath.startsWith("__evaluationReportsRoot__/")) {
+    return `${state.evaluationReportsRoot}/${relativePath.slice("__evaluationReportsRoot__/".length)}`
+  }
+  return materializeTaskPath(state, relativePath)
+}
+function canonicalEvaluationReportPath(state: Pick<PluginState, "evaluationReportsRoot">, rawPath: string): string {
+  const value = rawPath.trim().replace(/\\/g, "/").replace(/^\.\//, "")
+  if (value.startsWith(`${state.evaluationReportsRoot}/`)) return value
+  if (value.startsWith("task/.tmp/")) return `${state.evaluationReportsRoot}/${value.slice("task/.tmp/".length)}`
+  if (value.startsWith(".tmp/")) return `${state.evaluationReportsRoot}/${value.slice(".tmp/".length)}`
+  return value
+}
 function normalizePhaseStatus(raw: unknown): PhaseStatus {
   if (raw === "in_progress" || raw === "completed") return raw
   return "not_started"
@@ -323,6 +340,8 @@ function emptyReadinessState(): ReadinessState {
     acceptedLightHigh: [],
     d1d9: emptyReadinessD1D9(),
     evidencePath: null,
+    evidence: [],
+    environment: {},
   }
 }
@@ -470,6 +489,8 @@ function normalizeEvaluationState(raw: unknown): EvaluationState {
           kept: entry.kept === true,
           archived: entry.archived === true,
           at: typeof entry.at === "string" ? entry.at : new Date().toISOString(),
+          promptKind: typeof entry.promptKind === "string" ? entry.promptKind : null,
+          preparedPacketPath: typeof entry.preparedPacketPath === "string" ? entry.preparedPacketPath : null,
         }
       })
       .filter((item) => item.cycle > 0 || item.reportPath || item.evaluatorSessionID)
@@ -489,8 +510,8 @@ function normalizeEvaluationState(raw: unknown): EvaluationState {
       .filter((item) => item.cycle > 0 || item.reportPath || item.evaluatorSessionID)
     : []
   return {
-    auditAttempts,
-    fixChecks,
+    auditAttempts: dedupeAuditAttempts(auditAttempts),
+    fixChecks: dedupeFixChecks(fixChecks),
     unresolvedStrongIssues: typeof obj.unresolvedStrongIssues === "number" && Number.isInteger(obj.unresolvedStrongIssues) && obj.unresolvedStrongIssues >= 0
       ? obj.unresolvedStrongIssues
       : null,
@@ -498,6 +519,55 @@ function normalizeEvaluationState(raw: unknown): EvaluationState {
   }
 }
+function auditAttemptKey(item: { cycle: number; reportPath: string; evaluatorSessionID: string }): string {
+  return `${item.cycle}|${item.reportPath}|${item.evaluatorSessionID}`
+}
+function dedupeAuditAttempts<T extends { cycle: number; reportPath: string; evaluatorSessionID: string; kept: boolean }>(items: T[]): T[] {
+  const byExactKey = new Map<string, T>()
+  for (const item of items) byExactKey.set(auditAttemptKey(item), item)
+  const result: T[] = []
+  const keptByCycle = new Set<number>()
+  for (const item of Array.from(byExactKey.values()).reverse()) {
+    if (item.kept) {
+      if (keptByCycle.has(item.cycle)) continue
+      keptByCycle.add(item.cycle)
+    }
+    result.unshift(item)
+  }
+  return result
+}
+function fixCheckKey(item: { cycle: number; reportPath: string; evaluatorSessionID: string }): string {
+  return `${item.cycle}|${item.reportPath}|${item.evaluatorSessionID}`
+}
+function dedupeFixChecks<T extends { cycle: number; reportPath: string; evaluatorSessionID: string }>(items: T[]): T[] {
+  return Array.from(new Map(items.map((item) => [fixCheckKey(item), item])).values())
+}
+function normalizePhaseSessions(raw: unknown): PluginState["phaseSessions"] {
+  if (!Array.isArray(raw)) return []
+  const records = raw
+    .filter((item) => item && typeof item === "object")
+    .map((item) => {
+      const entry = item as Record<string, unknown>
+      return {
+        sessionID: typeof entry.sessionID === "string" ? entry.sessionID : "",
+        lane: typeof entry.lane === "string" ? entry.lane : "",
+        phase: typeof entry.phase === "string" ? entry.phase : "",
+        purpose: typeof entry.purpose === "string" ? entry.purpose : "",
+        source: entry.source === "event" ? "event" as const : "tool" as const,
+        firstSeenAt: typeof entry.firstSeenAt === "string" ? entry.firstSeenAt : new Date().toISOString(),
+        lastSeenAt: typeof entry.lastSeenAt === "string" ? entry.lastSeenAt : new Date().toISOString(),
+      }
+    })
+    .filter((item) => item.sessionID && item.lane && item.phase)
+  return Array.from(new Map(records.map((item) => [`${item.sessionID}|${item.lane}|${item.phase}`, item])).values())
+}
 function normalizeReadinessState(raw: unknown): ReadinessState {
   const defaults = emptyReadinessState()
   if (!raw || typeof raw !== "object") return defaults
@@ -515,9 +585,31 @@ function normalizeReadinessState(raw: unknown): ReadinessState {
     acceptedLightHigh: normalizeAcceptedLightHigh(obj.acceptedLightHigh),
     d1d9: normalizeReadinessD1D9(obj.d1d9),
     evidencePath: typeof obj.evidencePath === "string" ? obj.evidencePath : null,
+    evidence: normalizeReadinessEvidence(obj.evidence),
+    environment: obj.environment && typeof obj.environment === "object"
+      ? Object.fromEntries(Object.entries(obj.environment as Record<string, unknown>).filter(([, value]) => typeof value === "string")) as Record<string, string>
+      : {},
   }
 }
+function normalizeReadinessEvidence(raw: unknown): ReadinessState["evidence"] {
+  if (!Array.isArray(raw)) return []
+  return raw
+    .filter((item) => item && typeof item === "object")
+    .map((item) => {
+      const entry = item as Record<string, unknown>
+      return {
+        key: typeof entry.key === "string" ? entry.key : "",
+        status: typeof entry.status === "string" ? entry.status : "",
+        command: typeof entry.command === "string" ? entry.command : "",
+        artifact: typeof entry.artifact === "string" ? entry.artifact : "",
+        note: typeof entry.note === "string" ? entry.note : "",
+        at: typeof entry.at === "string" ? entry.at : new Date().toISOString(),
+      }
+    })
+    .filter((item) => item.key && item.status)
+}
 export async function loadState(
   workflowRoot: string,
 ): Promise<PluginState> {
@@ -591,6 +683,7 @@ export async function loadState(
     evaluatorSessions: Array.isArray(obj.evaluatorSessions) ? (obj.evaluatorSessions as any[]).filter((s: any) => s && typeof s === "object") : [],
     claudeSessions: Array.isArray(obj.claudeSessions) ? (obj.claudeSessions as any[]).filter((s: any) => s && typeof s === "object") : [],
     generalSessions: Array.isArray(obj.generalSessions) ? (obj.generalSessions as any[]).filter((s: any) => s && typeof s === "object") : [],
+    phaseSessions: normalizePhaseSessions(obj.phaseSessions),
     primaryDevelopSessionId: typeof obj.primaryDevelopSessionId === "string" ? obj.primaryDevelopSessionId : null,
     latestDevelopSessionId: typeof obj.latestDevelopSessionId === "string" ? obj.latestDevelopSessionId : null,
     nextDevelopSessionNumber: typeof obj.nextDevelopSessionNumber === "number" ? obj.nextDevelopSessionNumber as number : 1,
@@ -619,6 +712,7 @@ export async function loadState(
     autoContinueCounts: obj.autoContinueCounts && typeof obj.autoContinueCounts === "object"
       ? Object.fromEntries(Object.entries(obj.autoContinueCounts as Record<string, unknown>).filter(([, value]) => typeof value === "number" && Number.isFinite(value))) as Record<string, number>
       : {},
+    autoContinueBlockerSignature: typeof obj.autoContinueBlockerSignature === "string" ? obj.autoContinueBlockerSignature : null,
     internalEvaluator: normalizeInternalEvaluatorState(obj.internalEvaluator),
     evaluation: normalizeEvaluationState(obj.evaluation),
     readiness: normalizeReadinessState(obj.readiness),
@@ -787,7 +881,7 @@ export async function checkArtifacts(
   const missing: string[] = []
   for (const rel of toCheck) {
-    const materialized = materializeTaskPath(state, rel)
+    const materialized = materializeWorkflowPath(state, rel)
     const abs = path.join(workflowRoot, materialized)
     if (rel === "task/docs/api-spec.md" && ["web", "android", "ios"].includes(projectType)) {
       if (await fileExists(abs)) found.push(materialized)
@@ -857,7 +951,7 @@ async function hasInternalEarlyStopEvidence(state: PluginState): Promise<boolean
 }
 async function countSubstanceLines(state: PluginState, relativePath: string): Promise<number | null> {
-  const text = await readTextIfExists(path.join(state.workflowRoot, materializeTaskPath(state, relativePath)))
+  const text = await readTextIfExists(path.join(state.workflowRoot, materializeWorkflowPath(state, relativePath)))
   if (text === null) return null
   return text.trim().split(/\r?\n/).filter((line) => line.trim()).length
 }
@@ -865,7 +959,7 @@ async function countSubstanceLines(state: PluginState, relativePath: string): Pr
 async function validateMinimumLines(state: PluginState, relativePath: string, minLines: number, missing: string[]) {
   const lines = await countSubstanceLines(state, relativePath)
   if (lines === null) return
-  if (lines < minLines) missing.push(`${materializeTaskPath(state, relativePath)}: expected at least ${minLines} substance lines (found ${lines})`)
+  if (lines < minLines) missing.push(`${materializeWorkflowPath(state, relativePath)}: expected at least ${minLines} substance lines (found ${lines})`)
 }
 async function validatePhaseOneContractConsistency(state: PluginState, missing: string[]) {
@@ -920,7 +1014,7 @@ function extractCoverageScore(text: string): number | null {
 }
 async function fileExistsAt(state: PluginState, relativePath: string): Promise<boolean> {
-  return fileExists(path.join(state.workflowRoot, materializeTaskPath(state, relativePath)))
+  return fileExists(path.join(state.workflowRoot, materializeWorkflowPath(state, relativePath)))
 }
 export async function validateDeterministicPhaseRequirements(
@@ -967,13 +1061,13 @@ export async function validateDeterministicPhaseRequirements(
   }
   if (phase === "phase_5") {
-    await validateMinimumLines(state, "task/.tmp/audit_report-1.md", 150, missing)
-    await validateMinimumLines(state, "task/.tmp/audit_report-2.md", 150, missing)
-    await validateMinimumLines(state, "task/.tmp/audit_report-1-fix_check.md", 20, missing)
-    await validateMinimumLines(state, "task/.tmp/audit_report-2-fix_check.md", 20, missing)
-    await validateMinimumLines(state, "task/.tmp/test_coverage_and_readme_audit_report.md", 15, missing)
+    await validateMinimumLines(state, `${state.evaluationReportsRoot}/audit_report-1.md`, 150, missing)
+    await validateMinimumLines(state, `${state.evaluationReportsRoot}/audit_report-2.md`, 150, missing)
+    await validateMinimumLines(state, `${state.evaluationReportsRoot}/audit_report-1-fix_check.md`, 20, missing)
+    await validateMinimumLines(state, `${state.evaluationReportsRoot}/audit_report-2-fix_check.md`, 20, missing)
+    await validateMinimumLines(state, `${state.evaluationReportsRoot}/test_coverage_and_readme_audit_report.md`, 15, missing)
-    const coverageArtifact = materializeTaskPath(state, "task/.tmp/test_coverage_and_readme_audit_report.md")
+    const coverageArtifact = `${state.evaluationReportsRoot}/test_coverage_and_readme_audit_report.md`
     const coveragePath = path.join(state.workflowRoot, coverageArtifact)
     const coverageText = await readTextIfExists(coveragePath)
     if (coverageText) {
@@ -995,8 +1089,8 @@ export async function validateDeterministicPhaseRequirements(
     }
     for (const cycle of [1, 2]) {
-      const keptReportPath = materializeTaskPath(state, `task/.tmp/audit_report-${cycle}.md`)
-      const fixCheckPath = materializeTaskPath(state, `task/.tmp/audit_report-${cycle}-fix_check.md`)
+      const keptReportPath = `${state.evaluationReportsRoot}/audit_report-${cycle}.md`
+      const fixCheckPath = `${state.evaluationReportsRoot}/audit_report-${cycle}-fix_check.md`
       const attempts = state.evaluation.auditAttempts.filter((attempt) => attempt.cycle === cycle)
       const kept = attempts.filter((attempt) => attempt.kept)
       const fixChecks = state.evaluation.fixChecks.filter((fixCheck) => fixCheck.cycle === cycle)
@@ -1017,12 +1111,12 @@ export async function validateDeterministicPhaseRequirements(
       } else {
         const keptAttempt = kept[0]!
         const verdict = keptAttempt.verdict.toLowerCase().replace(/[\s-]+/g, "_")
-        if (keptAttempt.reportPath !== keptReportPath) missing.push(`evaluation.auditAttempts: cycle ${cycle} kept report path must be ${keptReportPath}`)
+        if (canonicalEvaluationReportPath(state, keptAttempt.reportPath) !== keptReportPath) missing.push(`evaluation.auditAttempts: cycle ${cycle} kept report path must be ${keptReportPath}`)
         if (verdict !== "pass" && verdict !== "partial_pass" && verdict !== "partial") missing.push(`evaluation.auditAttempts: cycle ${cycle} kept verdict must be Pass or Partial Pass`)
         if (fixChecks.length === 0) {
           missing.push(`evaluation.fixChecks: cycle ${cycle} has no recorded fix-check`)
         } else {
-          const matchingFixCheck = fixChecks.find((fixCheck) => fixCheck.reportPath === fixCheckPath)
+          const matchingFixCheck = fixChecks.find((fixCheck) => canonicalEvaluationReportPath(state, fixCheck.reportPath) === fixCheckPath)
           if (!matchingFixCheck) missing.push(`evaluation.fixChecks: cycle ${cycle} missing ${fixCheckPath}`)
           if (matchingFixCheck && matchingFixCheck.evaluatorSessionID !== keptAttempt.evaluatorSessionID) {
             missing.push(`evaluation.fixChecks: cycle ${cycle} fix-check must use the same evaluator session as the kept report`)
@@ -1216,7 +1310,7 @@ export async function validateArtifactContent(
   const scope = artifacts ? new Set(artifacts) : null
   for (const check of CONTENT_CHECKS) {
-    const artifact = materializeTaskPath(state, check.artifact)
+    const artifact = materializeWorkflowPath(state, check.artifact)
     if (scope && !scope.has(check.artifact) && !scope.has(artifact)) continue
     const p = path.join(state.workflowRoot, artifact)

package/plugin/types.ts CHANGED Viewed

@@ -27,6 +27,17 @@ export interface SessionRecord {
   at?: string
   closedAt?: string
   source?: "tool" | "event"
+  phases?: string[]
+}
+export interface SessionPhaseRecord {
+  sessionID: string
+  lane: string
+  phase: string
+  purpose: string
+  source: "tool" | "event"
+  firstSeenAt: string
+  lastSeenAt: string
 }
 export interface Blocker {
@@ -105,6 +116,8 @@ export interface AuditAttemptRecord {
   kept: boolean
   archived: boolean
   at: string
+  promptKind?: string | null
+  preparedPacketPath?: string | null
 }
 export interface AuditFixCheckRecord {
@@ -131,6 +144,17 @@ export interface ReadinessState {
   acceptedLightHigh: AcceptedLightHighRisk[]
   d1d9: Record<ReadinessD1D9Key, ReadinessD1D9Status>
   evidencePath: string | null
+  evidence: ReadinessEvidenceRecord[]
+  environment: Record<string, string>
+}
+export interface ReadinessEvidenceRecord {
+  key: string
+  status: string
+  command: string
+  artifact: string
+  note: string
+  at: string
 }
 export interface PluginState {
@@ -152,6 +176,7 @@ export interface PluginState {
   evaluatorSessions: SessionRecord[]
   claudeSessions: SessionRecord[]
   generalSessions: SessionRecord[]
+  phaseSessions: SessionPhaseRecord[]
   primaryDevelopSessionId: string | null
   latestDevelopSessionId: string | null
   nextDevelopSessionNumber: number
@@ -168,6 +193,7 @@ export interface PluginState {
   interruptions: Interruption[]
   ownerAbortRequestedAt: string | null
   autoContinueCounts: Record<string, number>
+  autoContinueBlockerSignature: string | null
   internalEvaluator: InternalEvaluatorState
   evaluation: EvaluationState
   readiness: ReadinessState
@@ -189,6 +215,7 @@ export interface StateSnapshot {
   evaluatorSessions: SessionRecord[]
   claudeSessions: SessionRecord[]
   generalSessions: SessionRecord[]
+  phaseSessions: SessionPhaseRecord[]
   primaryDevelopSessionId: string | null
   latestDevelopSessionId: string | null
   nextDevelopSessionNumber: number
@@ -204,6 +231,7 @@ export interface StateSnapshot {
   interruptions: Interruption[]
   ownerAbortRequestedAt: string | null
   autoContinueCounts: Record<string, number>
+  autoContinueBlockerSignature: string | null
   internalEvaluator: InternalEvaluatorState
   evaluation: EvaluationState
   readiness: ReadinessState
@@ -329,15 +357,9 @@ export const ARTIFACT_PATHS: Record<string, Record<string, string[]>> = {
     verification_done: [],
   },
   phase_5: {
-    cycle_1_complete: [
-      "task/.tmp/audit_report-1.md",
-      "task/.tmp/audit_report-1-fix_check.md",
-    ],
-    cycle_2_complete: [
-      "task/.tmp/audit_report-2.md",
-      "task/.tmp/audit_report-2-fix_check.md",
-    ],
-    coverage_audit: ["task/.tmp/test_coverage_and_readme_audit_report.md"],
+    cycle_1_complete: [],
+    cycle_2_complete: [],
+    coverage_audit: [],
   },
   phase_6: {
     readiness_done: [],