theslopmachine 1.0.26-beta.0 → 1.0.26-beta.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  name: developer
3
3
  description: Senior implementation agent for software projects
4
- model: deepseek/deepseek-v4-flash
4
+ model: openai/gpt-5.5
5
5
  variant: high
6
6
  mode: subagent
7
7
  thinkingLevel: high
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  name: slopmachine-clarifier
3
3
  description: Product clarification agent for SlopMachine Phase 1
4
- model: deepseek/deepseek-v4-flash
4
+ model: openai/gpt-5.5
5
5
  variant: medium
6
6
  mode: subagent
7
7
  thinkingLevel: high
@@ -295,6 +295,9 @@ Use these sequential names as the canonical workflow model. Legacy `P*` names ar
295
295
  - Run two strict audit/remediation cycles using evaluator sessions and the active bugfix lane.
296
296
  - In each audit cycle, send the complete installed evaluation prompt asset through the exact saved send packet verbatim. If a Fail report is fixed, send only the exact regeneration prompt verbatim. Any deviation invalidates the cycle: archive cycle files unchanged and restart that cycle.
297
297
  - Each audit cycle must close with both a rich 150+ line `./.tmp/audit_report-<N>.md` and `./.tmp/audit_report-<N>-fix_check.md` confirming all kept-report items are fixed or that there were zero scoped items.
298
+ - After any evaluator claims a report was written, record it with `slopmachine_state`. The tool verifies kept audit/fix-check paths exist and are non-empty; if it rejects the record, make the evaluator write the exact missing file before continuing.
299
+ - Record evaluator attempts with prompt metadata when available: `cycle|verdict|evaluatorSessionID|reportPath|kept|archived|promptKind|preparedPacketPath`.
300
+ - Duplicate evaluator records and legacy report paths are normalized by plugin state writes/loads. If state still appears stale, stop and report the blocker instead of editing `../.ai/slopmachine-plugin-state.json` directly.
298
301
  - Preserve reports, extract complete issue sets, and route fixes in broad human language.
299
302
  - After both audit cycles, close the bugfix lane and start a test-coverage/final-reconciliation lane.
300
303
  - Exit only when both Audit Cycle 1 and Audit Cycle 2 are complete with kept audit reports and fix-check reports, the bugfix lane is closed, and the coverage/README audit passes with at least 90% test score.
@@ -262,6 +262,9 @@ Use these sequential names as the canonical workflow model. Legacy `P*` names ar
262
262
  - Run two strict audit/remediation cycles using evaluator sessions and the active bugfix lane.
263
263
  - In each audit cycle, send the complete installed evaluation prompt asset through the exact saved send packet verbatim. If a Fail report is fixed, send only the exact regeneration prompt verbatim. Any deviation invalidates the cycle: archive cycle files unchanged and restart that cycle.
264
264
  - Each audit cycle must close with both a rich 150+ line `./.tmp/audit_report-<N>.md` and `./.tmp/audit_report-<N>-fix_check.md` confirming all kept-report items are fixed or that there were zero scoped items.
265
+ - After any evaluator claims a report was written, record it with `slopmachine_state`. The tool verifies kept audit/fix-check paths exist and are non-empty; if it rejects the record, make the evaluator write the exact missing file before continuing.
266
+ - Record evaluator attempts with prompt metadata when available: `cycle|verdict|evaluatorSessionID|reportPath|kept|archived|promptKind|preparedPacketPath`.
267
+ - Duplicate evaluator records and legacy report paths are normalized by plugin state writes/loads. If state still appears stale, stop and report the blocker instead of editing `../.ai/slopmachine-plugin-state.json` directly.
265
268
  - Preserve reports, extract complete issue sets, and route fixes in broad human language.
266
269
  - After both audit cycles, close the bugfix lane and start a test-coverage/final-reconciliation lane.
267
270
  - Exit only when both Audit Cycle 1 and Audit Cycle 2 are complete with kept audit reports and fix-check reports, the bugfix lane is closed, and the coverage/README audit passes with at least 90% test score.
@@ -135,7 +135,7 @@ Required for each cycle:
135
135
  - the full saved send packet was read before send and sent word-for-word with no owner additions, omissions, summaries, path-only substitutions, or footers;
136
136
  - every failed, superseded, or invalid attempt report was archived unchanged;
137
137
  - no failed report was regenerated in the same evaluator session;
138
- - every full audit attempt was recorded with `slopmachine_state { evaluationAuditAttempt: "cycle|verdict|evaluatorSessionID|reportPath|kept|archived" }`;
138
+ - every full audit attempt was recorded with `slopmachine_state { evaluationAuditAttempt: "cycle|verdict|evaluatorSessionID|reportPath|kept|archived|promptKind|preparedPacketPath" }` when packet metadata is available, or the 6-field form when it is not;
139
139
  - the kept audit report exists at `./.tmp/audit_report-<N>.md`;
140
140
  - the kept audit report is rich and complete: at least 150 lines and not materially shallower than the installed prompt's required output structure;
141
141
  - the kept audit report includes the required verdict, scope/boundary, prompt/repository mapping, section review or blocker/high panel as applicable, issues/suggestions or explicit no-issue statement, security/data-risk review where applicable, and test/logging/coverage sections required by the installed prompt;
@@ -150,6 +150,8 @@ Required for each cycle:
150
150
 
151
151
  No audit cycle is complete without both `./.tmp/audit_report-<N>.md` and `./.tmp/audit_report-<N>-fix_check.md` passing this validation gate.
152
152
 
153
+ `slopmachine_state` rejects kept audit and fix-check records whose report path is missing or empty. If rejected, return to the evaluator and make it write the exact file before continuing. Do not repair this by editing workflow state JSON directly; duplicate evaluator records and legacy report paths are normalized by plugin state writes/loads.
154
+
153
155
  ## Fix-Check Prompt
154
156
 
155
157
  Use this exact fix-check instruction after a kept Pass or Partial Pass report's scoped fix-check items have been fixed. Send it verbatim to the same evaluator session after providing concise developer fix evidence, exact verification results when available, and the exact scoped fix-check issue list from the kept `audit_report-<N>.md`:
@@ -224,7 +224,7 @@ Block readiness if:
224
224
 
225
225
  Browser verification for web/fullstack must not give up. Find a working local startup/browser path, route blockers as module/issues only, retry after fixes, and keep going until the app is actually tested unless the user explicitly risk-accepts stopping.
226
226
 
227
- Plugin readiness validation should include Docker, `runTests`, browser, API/manual, README truth, unresolved strong issue count, accepted light High records, and readiness evidence path.
227
+ Plugin readiness validation should include Docker, `runTests`, browser, API/manual, README truth, unresolved strong issue count, accepted light High records, readiness evidence path, structured readiness evidence entries, and stack-specific environment capability notes such as `dotnet=missing` or `docker=available`.
228
228
 
229
229
  Pass with notes if:
230
230
  - only accepted light High or lower bounded risks remain;
@@ -114,6 +114,7 @@ Important:
114
114
  - The primary runtime contract is `docker compose up --build` only for container-supported projects
115
115
  - The legacy compatibility string is included for users who still use the old Compose command name
116
116
  - Do not present `docker-compose up` as the primary startup contract
117
+ - Do not include local non-container startup commands as an alternate primary path for backend/fullstack/container-supported projects unless the task explicitly requires them
117
118
 
118
119
  ### Additional startup notes
119
120
  - [example: first boot may take longer while containers build]
@@ -126,6 +127,7 @@ Important:
126
127
  - If env-file shape is required at runtime, it must be generated ephemerally by the controlled startup path and never committed
127
128
  - Startup and testing must not require the reviewer to create, copy, or edit any env file
128
129
  - Do **not** use manual primary setup steps such as `npm install`, `pnpm install`, `yarn install`, `pip install`, `apt-get`, or manual database setup
130
+ - If local tool commands are mentioned for developer context, clearly label them as non-required and not part of reviewer startup or verification
129
131
 
130
132
  ---
131
133
 
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "theslopmachine",
3
- "version": "1.0.26-beta.0",
3
+ "version": "1.0.26-beta.2",
4
4
  "description": "SlopMachine installer and project bootstrap CLI",
5
5
  "license": "MIT",
6
6
  "type": "module",
package/plugin/index.ts CHANGED
@@ -278,9 +278,9 @@ function parseBooleanToken(value: string): boolean | null {
278
278
  return null
279
279
  }
280
280
 
281
- function parseEvaluationAuditAttempt(value: string): { cycle: number; verdict: string; evaluatorSessionID: string; reportPath: string; kept: boolean; archived: boolean; at: string } | null {
281
+ function parseEvaluationAuditAttempt(value: string): { cycle: number; verdict: string; evaluatorSessionID: string; reportPath: string; kept: boolean; archived: boolean; at: string; promptKind?: string | null; preparedPacketPath?: string | null } | null {
282
282
  const parts = value.split("|").map((part) => part.trim())
283
- if (parts.length !== 6) return null
283
+ if (parts.length !== 6 && parts.length !== 8) return null
284
284
  const cycle = parseNonNegativeInteger(parts[0])
285
285
  const verdict = parts[1] || ""
286
286
  const evaluatorSessionID = parts[2] || ""
@@ -288,7 +288,15 @@ function parseEvaluationAuditAttempt(value: string): { cycle: number; verdict: s
288
288
  const kept = parseBooleanToken(parts[4] || "")
289
289
  const archived = parseBooleanToken(parts[5] || "")
290
290
  if (!cycle || !verdict || !evaluatorSessionID || !reportPath || kept === null || archived === null) return null
291
- return { cycle, verdict, evaluatorSessionID, reportPath, kept, archived, at: new Date().toISOString() }
291
+ return { cycle, verdict, evaluatorSessionID, reportPath, kept, archived, at: new Date().toISOString(), promptKind: parts[6] || null, preparedPacketPath: parts[7] || null }
292
+ }
293
+
294
+ function parseReadinessEvidence(value: string): { key: string; status: string; command: string; artifact: string; note: string; at: string } | null {
295
+ const parts = value.split("|").map((part) => part.trim())
296
+ if (parts.length !== 5) return null
297
+ const [key, status, command, artifact, note] = parts
298
+ if (!key || !status) return null
299
+ return { key, status, command: command || "", artifact: artifact || "", note: note || "", at: new Date().toISOString() }
292
300
  }
293
301
 
294
302
  function parseEvaluationFixCheck(value: string): { cycle: number; evaluatorSessionID: string; reportPath: string; at: string } | null {
@@ -313,6 +321,43 @@ function formatSession(record: { status: string; sessionID: string; purpose: str
313
321
  return `${record.status}: ${record.sessionID} (${record.purpose})${record.closedAt ? ` closed=${record.closedAt}` : ""}`
314
322
  }
315
323
 
324
+ function recordSessionPhase(
325
+ state: PluginState,
326
+ sessionID: string,
327
+ lane: string,
328
+ purpose: string,
329
+ source: "tool" | "event",
330
+ ) {
331
+ if (!sessionID || !lane || !state.currentPhase) return
332
+ const now = new Date().toISOString()
333
+ const existing = state.phaseSessions.find((item) => item.sessionID === sessionID && item.lane === lane && item.phase === state.currentPhase)
334
+ if (existing) {
335
+ existing.lastSeenAt = now
336
+ if (purpose) existing.purpose = purpose
337
+ return
338
+ }
339
+ state.phaseSessions.push({
340
+ sessionID,
341
+ lane,
342
+ phase: state.currentPhase,
343
+ purpose,
344
+ source,
345
+ firstSeenAt: now,
346
+ lastSeenAt: now,
347
+ })
348
+ }
349
+
350
+ function recordActiveSessionsForPhase(state: PluginState) {
351
+ for (const [lane, sessionID] of Object.entries(state.activeSessions)) {
352
+ if (!sessionID) continue
353
+ const record = [...sessionList(state, lane)].reverse().find((item) => item.sessionID === sessionID)
354
+ recordSessionPhase(state, sessionID, lane, record?.purpose || "active", record?.source || "tool")
355
+ if (record && !record.phases?.includes(state.currentPhase)) {
356
+ record.phases = [...(record.phases || []), state.currentPhase]
357
+ }
358
+ }
359
+ }
360
+
316
361
  function registerActiveSession(
317
362
  state: PluginState,
318
363
  lane: keyof PluginState["activeSessions"],
@@ -342,13 +387,61 @@ function registerActiveSession(
342
387
  existing.purpose = purpose || existing.purpose
343
388
  if (activate) existing.closedAt = undefined
344
389
  existing.source = existing.source || source
390
+ if (!existing.phases?.includes(state.currentPhase)) existing.phases = [...(existing.phases || []), state.currentPhase]
391
+ recordSessionPhase(state, sid, lane, existing.purpose, existing.source || source)
345
392
  return { created: false, replaced: Boolean(previousActive && previousActive !== sid) }
346
393
  }
347
394
 
348
- list.push({ sessionID: sid, lane, purpose, status: activate ? "active" : "closed", at: now, source })
395
+ list.push({ sessionID: sid, lane, purpose, status: activate ? "active" : "closed", at: now, source, phases: [state.currentPhase] })
396
+ recordSessionPhase(state, sid, lane, purpose, source)
349
397
  return { created: true, replaced: Boolean(previousActive && previousActive !== sid) }
350
398
  }
351
399
 
400
+ function upsertEvaluationAuditAttempt(state: PluginState, attempt: ReturnType<typeof parseEvaluationAuditAttempt> & {}) {
401
+ if (!attempt) return
402
+ attempt.reportPath = canonicalEvaluationReportPath(state, attempt.reportPath)
403
+ state.evaluation.auditAttempts = state.evaluation.auditAttempts.filter((item) => {
404
+ if (item.cycle !== attempt.cycle) return true
405
+ if (item.reportPath === attempt.reportPath && item.evaluatorSessionID === attempt.evaluatorSessionID) return false
406
+ if (attempt.kept && item.kept) return false
407
+ return true
408
+ })
409
+ state.evaluation.auditAttempts.push(attempt)
410
+ }
411
+
412
+ function upsertEvaluationFixCheck(state: PluginState, fixCheck: ReturnType<typeof parseEvaluationFixCheck> & {}) {
413
+ if (!fixCheck) return
414
+ fixCheck.reportPath = canonicalEvaluationReportPath(state, fixCheck.reportPath)
415
+ state.evaluation.fixChecks = state.evaluation.fixChecks.filter((item) => {
416
+ if (item.cycle !== fixCheck.cycle) return true
417
+ return !(item.reportPath === fixCheck.reportPath && item.evaluatorSessionID === fixCheck.evaluatorSessionID)
418
+ })
419
+ state.evaluation.fixChecks.push(fixCheck)
420
+ }
421
+
422
+ function canonicalEvaluationReportPath(state: PluginState, rawPath: string): string {
423
+ const value = rawPath.trim().replace(/\\/g, "/").replace(/^\.\//, "")
424
+ if (value.startsWith(`${state.evaluationReportsRoot}/`)) return value
425
+ if (value.startsWith("task/.tmp/")) return `${state.evaluationReportsRoot}/${value.slice("task/.tmp/".length)}`
426
+ if (value.startsWith(".tmp/")) return `${state.evaluationReportsRoot}/${value.slice(".tmp/".length)}`
427
+ return value
428
+ }
429
+
430
+ async function nonEmptyWorkflowFile(state: PluginState, relativePath: string): Promise<boolean> {
431
+ try {
432
+ const text = await fs.readFile(path.join(state.workflowRoot, relativePath), "utf8")
433
+ return text.trim().length > 0
434
+ } catch {
435
+ return false
436
+ }
437
+ }
438
+
439
+ function openBlockerSignature(state: PluginState): string | null {
440
+ const open = state.blockers.filter((blocker) => !blocker.resolvedAt)
441
+ if (open.length === 0) return null
442
+ return open.map((blocker) => `${blocker.phase}:${blocker.status}:${blocker.reason}`).sort().join("|")
443
+ }
444
+
352
445
  interface ClaudeLaunchIntent {
353
446
  command: string
354
447
  lane: string
@@ -1014,6 +1107,7 @@ export default {
1014
1107
  }
1015
1108
  }
1016
1109
  recordActivity(sessionID)
1110
+ recordSessionPhase(state, sessionID, "owner", "owner session", "event")
1017
1111
  await log("owner session tracked", { sessionID, agent, source })
1018
1112
  return true
1019
1113
  }
@@ -1085,6 +1179,12 @@ export default {
1085
1179
  await log("auto-continue cap reached", { sessionID, count, max: MAX_AUTO_CONTINUES_PER_SESSION, phase: state.currentPhase })
1086
1180
  return
1087
1181
  }
1182
+ const blockerSignature = openBlockerSignature(state)
1183
+ if (blockerSignature && state.autoContinueBlockerSignature === blockerSignature) {
1184
+ await log("auto-continue blocked by repeated open blocker", { sessionID, phase: state.currentPhase, blockerSignature })
1185
+ return
1186
+ }
1187
+ state.autoContinueBlockerSignature = blockerSignature
1088
1188
  state.autoContinueCounts[sessionID] = count + 1
1089
1189
  state.lastStateChange = new Date().toISOString()
1090
1190
  await persist()
@@ -1275,6 +1375,7 @@ export default {
1275
1375
  evaluationFixCheck: stringArg,
1276
1376
  evaluationUnresolvedStrongIssues: z.union([z.number(), z.string()]).optional(),
1277
1377
  evaluationAcceptedLightHigh: stringArg,
1378
+ environmentCapability: stringArg,
1278
1379
  readinessDocker: readinessStatusArg,
1279
1380
  readinessRunTests: readinessStatusArg,
1280
1381
  readinessBrowser: readinessStatusArg,
@@ -1284,6 +1385,7 @@ export default {
1284
1385
  readinessAcceptedLightHigh: stringArg,
1285
1386
  readinessD1D9: stringArg,
1286
1387
  readinessEvidencePath: stringArg,
1388
+ readinessEvidence: stringArg,
1287
1389
  },
1288
1390
  async execute(i: Record<string, unknown>, context?: OpenCodeToolContext) {
1289
1391
  assertSlopmachineToolContext(context, slopmachineSessionIDs)
@@ -1349,14 +1451,22 @@ export default {
1349
1451
  }
1350
1452
  if (hasTextSetterValue(i.evaluationAuditAttempt)) {
1351
1453
  const attempt = parseEvaluationAuditAttempt(String(i.evaluationAuditAttempt))
1352
- if (!attempt) return badArg("evaluationAuditAttempt must be 'cycle|verdict|evaluatorSessionID|reportPath|kept|archived'")
1353
- state.evaluation.auditAttempts.push(attempt)
1454
+ if (!attempt) return badArg("evaluationAuditAttempt must be 'cycle|verdict|evaluatorSessionID|reportPath|kept|archived' or add '|promptKind|preparedPacketPath'")
1455
+ attempt.reportPath = canonicalEvaluationReportPath(state, attempt.reportPath)
1456
+ if (attempt.kept && !(await nonEmptyWorkflowFile(state, attempt.reportPath))) {
1457
+ return badArg(`evaluationAuditAttempt kept report does not exist or is empty: ${attempt.reportPath}`)
1458
+ }
1459
+ upsertEvaluationAuditAttempt(state, attempt)
1354
1460
  changed = true
1355
1461
  }
1356
1462
  if (hasTextSetterValue(i.evaluationFixCheck)) {
1357
1463
  const fixCheck = parseEvaluationFixCheck(String(i.evaluationFixCheck))
1358
1464
  if (!fixCheck) return badArg("evaluationFixCheck must be 'cycle|evaluatorSessionID|reportPath'")
1359
- state.evaluation.fixChecks.push(fixCheck)
1465
+ fixCheck.reportPath = canonicalEvaluationReportPath(state, fixCheck.reportPath)
1466
+ if (!(await nonEmptyWorkflowFile(state, fixCheck.reportPath))) {
1467
+ return badArg(`evaluationFixCheck report does not exist or is empty: ${fixCheck.reportPath}`)
1468
+ }
1469
+ upsertEvaluationFixCheck(state, fixCheck)
1360
1470
  changed = true
1361
1471
  }
1362
1472
  if (hasNumberSetterValue(i.evaluationUnresolvedStrongIssues)) {
@@ -1371,6 +1481,12 @@ export default {
1371
1481
  state.evaluation.acceptedLightHigh.push(risk)
1372
1482
  changed = true
1373
1483
  }
1484
+ if (hasTextSetterValue(i.environmentCapability)) {
1485
+ const [name, status] = String(i.environmentCapability).split("=").map((part) => part.trim())
1486
+ if (!name || !status) return badArg("environmentCapability must look like 'dotnet=missing' or 'docker=available'")
1487
+ state.readiness.environment[name] = status
1488
+ changed = true
1489
+ }
1374
1490
  const readinessMap = [
1375
1491
  ["readinessDocker", "docker"],
1376
1492
  ["readinessRunTests", "runTests"],
@@ -1413,6 +1529,13 @@ export default {
1413
1529
  state.readiness.evidencePath = String(i.readinessEvidencePath)
1414
1530
  changed = true
1415
1531
  }
1532
+ if (hasTextSetterValue(i.readinessEvidence)) {
1533
+ const evidence = parseReadinessEvidence(String(i.readinessEvidence))
1534
+ if (!evidence) return badArg("readinessEvidence must be 'key|status|command|artifact|note'")
1535
+ state.readiness.evidence = state.readiness.evidence.filter((item) => item.key !== evidence.key)
1536
+ state.readiness.evidence.push(evidence)
1537
+ changed = true
1538
+ }
1416
1539
  if (changed) {
1417
1540
  state.lastStateChange = new Date().toISOString()
1418
1541
  await persist()
@@ -1452,6 +1575,7 @@ export default {
1452
1575
  internalEvaluator: state.internalEvaluator,
1453
1576
  evaluation: state.evaluation,
1454
1577
  readiness: state.readiness,
1578
+ phaseSessions: state.phaseSessions,
1455
1579
  handoff: state.nextHandoff || "none",
1456
1580
  counters: {
1457
1581
  artifacts: Object.keys(state.artifacts).length,
@@ -1509,6 +1633,7 @@ export default {
1509
1633
  acceptWarnings: asBool(i.acceptWarnings),
1510
1634
  })
1511
1635
  if (result.ok) {
1636
+ recordActiveSessionsForPhase(state)
1512
1637
  await persist()
1513
1638
  }
1514
1639
  return JSON.stringify(result, null, 2)
@@ -1535,6 +1660,7 @@ export default {
1535
1660
  const result = reopenPhase(state, i.phase)
1536
1661
  if (result.ok) {
1537
1662
  state.lastStateChange = new Date().toISOString()
1663
+ recordActiveSessionsForPhase(state)
1538
1664
  await persist()
1539
1665
  }
1540
1666
  return JSON.stringify(result, null, 2)
@@ -1717,6 +1843,7 @@ export default {
1717
1843
  evaluators: state.evaluatorSessions.map(formatSession),
1718
1844
  claude: state.claudeSessions.map(formatSession),
1719
1845
  general: state.generalSessions.map(formatSession),
1846
+ phaseSessions: state.phaseSessions,
1720
1847
  primary: state.primaryDevelopSessionId || "none",
1721
1848
  latest: state.latestDevelopSessionId || "none",
1722
1849
  next: `develop #${state.nextDevelopSessionNumber}, bugfix #${state.nextBugfixSessionNumber}`,
package/plugin/state.ts CHANGED
@@ -61,31 +61,31 @@ const CONTENT_CHECKS: ContentCheck[] = [
61
61
  noPlaceholders: true,
62
62
  },
63
63
  {
64
- artifact: "task/.tmp/audit_report-1.md",
64
+ artifact: "__evaluationReportsRoot__/audit_report-1.md",
65
65
  headings: ["Verdict", "Scope and Static Verification Boundary", "Repository / Requirement Mapping Summary", "Section-by-section Review", "Issues / Suggestions", "Security Review"],
66
66
  noPlaceholders: false,
67
67
  substanceLines: 150,
68
68
  },
69
69
  {
70
- artifact: "task/.tmp/audit_report-2.md",
70
+ artifact: "__evaluationReportsRoot__/audit_report-2.md",
71
71
  headings: ["Verdict", "Scope and Static Verification Boundary", "Repository / Requirement Mapping Summary", "Section-by-section Review", "Issues / Suggestions", "Security Review"],
72
72
  noPlaceholders: false,
73
73
  substanceLines: 150,
74
74
  },
75
75
  {
76
- artifact: "task/.tmp/audit_report-1-fix_check.md",
76
+ artifact: "__evaluationReportsRoot__/audit_report-1-fix_check.md",
77
77
  headings: [],
78
78
  noPlaceholders: false,
79
79
  substanceLines: 20,
80
80
  },
81
81
  {
82
- artifact: "task/.tmp/audit_report-2-fix_check.md",
82
+ artifact: "__evaluationReportsRoot__/audit_report-2-fix_check.md",
83
83
  headings: [],
84
84
  noPlaceholders: false,
85
85
  substanceLines: 20,
86
86
  },
87
87
  {
88
- artifact: "task/.tmp/test_coverage_and_readme_audit_report.md",
88
+ artifact: "__evaluationReportsRoot__/test_coverage_and_readme_audit_report.md",
89
89
  headings: [],
90
90
  noPlaceholders: false,
91
91
  substanceLines: 15,
@@ -135,6 +135,7 @@ function emptyState(workflowRoot: string): PluginState {
135
135
  evaluatorSessions: [],
136
136
  claudeSessions: [],
137
137
  generalSessions: [],
138
+ phaseSessions: [],
138
139
  primaryDevelopSessionId: null,
139
140
  latestDevelopSessionId: null,
140
141
  nextDevelopSessionNumber: 1,
@@ -151,6 +152,7 @@ function emptyState(workflowRoot: string): PluginState {
151
152
  interruptions: [],
152
153
  ownerAbortRequestedAt: null,
153
154
  autoContinueCounts: {},
155
+ autoContinueBlockerSignature: null,
154
156
  internalEvaluator: emptyInternalEvaluatorState(),
155
157
  evaluation: emptyEvaluationState(),
156
158
  readiness: emptyReadinessState(),
@@ -243,6 +245,21 @@ function materializeTaskPath(state: Pick<PluginState, "workflowRoot" | "taskRoot
243
245
  return relativePath
244
246
  }
245
247
 
248
+ function materializeWorkflowPath(state: Pick<PluginState, "workflowRoot" | "taskRoot" | "evaluationReportsRoot">, relativePath: string): string {
249
+ if (relativePath.startsWith("__evaluationReportsRoot__/")) {
250
+ return `${state.evaluationReportsRoot}/${relativePath.slice("__evaluationReportsRoot__/".length)}`
251
+ }
252
+ return materializeTaskPath(state, relativePath)
253
+ }
254
+
255
+ function canonicalEvaluationReportPath(state: Pick<PluginState, "evaluationReportsRoot">, rawPath: string): string {
256
+ const value = rawPath.trim().replace(/\\/g, "/").replace(/^\.\//, "")
257
+ if (value.startsWith(`${state.evaluationReportsRoot}/`)) return value
258
+ if (value.startsWith("task/.tmp/")) return `${state.evaluationReportsRoot}/${value.slice("task/.tmp/".length)}`
259
+ if (value.startsWith(".tmp/")) return `${state.evaluationReportsRoot}/${value.slice(".tmp/".length)}`
260
+ return value
261
+ }
262
+
246
263
  function normalizePhaseStatus(raw: unknown): PhaseStatus {
247
264
  if (raw === "in_progress" || raw === "completed") return raw
248
265
  return "not_started"
@@ -323,6 +340,8 @@ function emptyReadinessState(): ReadinessState {
323
340
  acceptedLightHigh: [],
324
341
  d1d9: emptyReadinessD1D9(),
325
342
  evidencePath: null,
343
+ evidence: [],
344
+ environment: {},
326
345
  }
327
346
  }
328
347
 
@@ -470,6 +489,8 @@ function normalizeEvaluationState(raw: unknown): EvaluationState {
470
489
  kept: entry.kept === true,
471
490
  archived: entry.archived === true,
472
491
  at: typeof entry.at === "string" ? entry.at : new Date().toISOString(),
492
+ promptKind: typeof entry.promptKind === "string" ? entry.promptKind : null,
493
+ preparedPacketPath: typeof entry.preparedPacketPath === "string" ? entry.preparedPacketPath : null,
473
494
  }
474
495
  })
475
496
  .filter((item) => item.cycle > 0 || item.reportPath || item.evaluatorSessionID)
@@ -489,8 +510,8 @@ function normalizeEvaluationState(raw: unknown): EvaluationState {
489
510
  .filter((item) => item.cycle > 0 || item.reportPath || item.evaluatorSessionID)
490
511
  : []
491
512
  return {
492
- auditAttempts,
493
- fixChecks,
513
+ auditAttempts: dedupeAuditAttempts(auditAttempts),
514
+ fixChecks: dedupeFixChecks(fixChecks),
494
515
  unresolvedStrongIssues: typeof obj.unresolvedStrongIssues === "number" && Number.isInteger(obj.unresolvedStrongIssues) && obj.unresolvedStrongIssues >= 0
495
516
  ? obj.unresolvedStrongIssues
496
517
  : null,
@@ -498,6 +519,55 @@ function normalizeEvaluationState(raw: unknown): EvaluationState {
498
519
  }
499
520
  }
500
521
 
522
+ function auditAttemptKey(item: { cycle: number; reportPath: string; evaluatorSessionID: string }): string {
523
+ return `${item.cycle}|${item.reportPath}|${item.evaluatorSessionID}`
524
+ }
525
+
526
+ function dedupeAuditAttempts<T extends { cycle: number; reportPath: string; evaluatorSessionID: string; kept: boolean }>(items: T[]): T[] {
527
+ const byExactKey = new Map<string, T>()
528
+ for (const item of items) byExactKey.set(auditAttemptKey(item), item)
529
+
530
+ const result: T[] = []
531
+ const keptByCycle = new Set<number>()
532
+ for (const item of Array.from(byExactKey.values()).reverse()) {
533
+ if (item.kept) {
534
+ if (keptByCycle.has(item.cycle)) continue
535
+ keptByCycle.add(item.cycle)
536
+ }
537
+ result.unshift(item)
538
+ }
539
+ return result
540
+ }
541
+
542
+ function fixCheckKey(item: { cycle: number; reportPath: string; evaluatorSessionID: string }): string {
543
+ return `${item.cycle}|${item.reportPath}|${item.evaluatorSessionID}`
544
+ }
545
+
546
+ function dedupeFixChecks<T extends { cycle: number; reportPath: string; evaluatorSessionID: string }>(items: T[]): T[] {
547
+ return Array.from(new Map(items.map((item) => [fixCheckKey(item), item])).values())
548
+ }
549
+
550
+ function normalizePhaseSessions(raw: unknown): PluginState["phaseSessions"] {
551
+ if (!Array.isArray(raw)) return []
552
+ const records = raw
553
+ .filter((item) => item && typeof item === "object")
554
+ .map((item) => {
555
+ const entry = item as Record<string, unknown>
556
+ return {
557
+ sessionID: typeof entry.sessionID === "string" ? entry.sessionID : "",
558
+ lane: typeof entry.lane === "string" ? entry.lane : "",
559
+ phase: typeof entry.phase === "string" ? entry.phase : "",
560
+ purpose: typeof entry.purpose === "string" ? entry.purpose : "",
561
+ source: entry.source === "event" ? "event" as const : "tool" as const,
562
+ firstSeenAt: typeof entry.firstSeenAt === "string" ? entry.firstSeenAt : new Date().toISOString(),
563
+ lastSeenAt: typeof entry.lastSeenAt === "string" ? entry.lastSeenAt : new Date().toISOString(),
564
+ }
565
+ })
566
+ .filter((item) => item.sessionID && item.lane && item.phase)
567
+
568
+ return Array.from(new Map(records.map((item) => [`${item.sessionID}|${item.lane}|${item.phase}`, item])).values())
569
+ }
570
+
501
571
  function normalizeReadinessState(raw: unknown): ReadinessState {
502
572
  const defaults = emptyReadinessState()
503
573
  if (!raw || typeof raw !== "object") return defaults
@@ -515,9 +585,31 @@ function normalizeReadinessState(raw: unknown): ReadinessState {
515
585
  acceptedLightHigh: normalizeAcceptedLightHigh(obj.acceptedLightHigh),
516
586
  d1d9: normalizeReadinessD1D9(obj.d1d9),
517
587
  evidencePath: typeof obj.evidencePath === "string" ? obj.evidencePath : null,
588
+ evidence: normalizeReadinessEvidence(obj.evidence),
589
+ environment: obj.environment && typeof obj.environment === "object"
590
+ ? Object.fromEntries(Object.entries(obj.environment as Record<string, unknown>).filter(([, value]) => typeof value === "string")) as Record<string, string>
591
+ : {},
518
592
  }
519
593
  }
520
594
 
595
+ function normalizeReadinessEvidence(raw: unknown): ReadinessState["evidence"] {
596
+ if (!Array.isArray(raw)) return []
597
+ return raw
598
+ .filter((item) => item && typeof item === "object")
599
+ .map((item) => {
600
+ const entry = item as Record<string, unknown>
601
+ return {
602
+ key: typeof entry.key === "string" ? entry.key : "",
603
+ status: typeof entry.status === "string" ? entry.status : "",
604
+ command: typeof entry.command === "string" ? entry.command : "",
605
+ artifact: typeof entry.artifact === "string" ? entry.artifact : "",
606
+ note: typeof entry.note === "string" ? entry.note : "",
607
+ at: typeof entry.at === "string" ? entry.at : new Date().toISOString(),
608
+ }
609
+ })
610
+ .filter((item) => item.key && item.status)
611
+ }
612
+
521
613
  export async function loadState(
522
614
  workflowRoot: string,
523
615
  ): Promise<PluginState> {
@@ -591,6 +683,7 @@ export async function loadState(
591
683
  evaluatorSessions: Array.isArray(obj.evaluatorSessions) ? (obj.evaluatorSessions as any[]).filter((s: any) => s && typeof s === "object") : [],
592
684
  claudeSessions: Array.isArray(obj.claudeSessions) ? (obj.claudeSessions as any[]).filter((s: any) => s && typeof s === "object") : [],
593
685
  generalSessions: Array.isArray(obj.generalSessions) ? (obj.generalSessions as any[]).filter((s: any) => s && typeof s === "object") : [],
686
+ phaseSessions: normalizePhaseSessions(obj.phaseSessions),
594
687
  primaryDevelopSessionId: typeof obj.primaryDevelopSessionId === "string" ? obj.primaryDevelopSessionId : null,
595
688
  latestDevelopSessionId: typeof obj.latestDevelopSessionId === "string" ? obj.latestDevelopSessionId : null,
596
689
  nextDevelopSessionNumber: typeof obj.nextDevelopSessionNumber === "number" ? obj.nextDevelopSessionNumber as number : 1,
@@ -619,6 +712,7 @@ export async function loadState(
619
712
  autoContinueCounts: obj.autoContinueCounts && typeof obj.autoContinueCounts === "object"
620
713
  ? Object.fromEntries(Object.entries(obj.autoContinueCounts as Record<string, unknown>).filter(([, value]) => typeof value === "number" && Number.isFinite(value))) as Record<string, number>
621
714
  : {},
715
+ autoContinueBlockerSignature: typeof obj.autoContinueBlockerSignature === "string" ? obj.autoContinueBlockerSignature : null,
622
716
  internalEvaluator: normalizeInternalEvaluatorState(obj.internalEvaluator),
623
717
  evaluation: normalizeEvaluationState(obj.evaluation),
624
718
  readiness: normalizeReadinessState(obj.readiness),
@@ -787,7 +881,7 @@ export async function checkArtifacts(
787
881
  const missing: string[] = []
788
882
 
789
883
  for (const rel of toCheck) {
790
- const materialized = materializeTaskPath(state, rel)
884
+ const materialized = materializeWorkflowPath(state, rel)
791
885
  const abs = path.join(workflowRoot, materialized)
792
886
  if (rel === "task/docs/api-spec.md" && ["web", "android", "ios"].includes(projectType)) {
793
887
  if (await fileExists(abs)) found.push(materialized)
@@ -857,7 +951,7 @@ async function hasInternalEarlyStopEvidence(state: PluginState): Promise<boolean
857
951
  }
858
952
 
859
953
  async function countSubstanceLines(state: PluginState, relativePath: string): Promise<number | null> {
860
- const text = await readTextIfExists(path.join(state.workflowRoot, materializeTaskPath(state, relativePath)))
954
+ const text = await readTextIfExists(path.join(state.workflowRoot, materializeWorkflowPath(state, relativePath)))
861
955
  if (text === null) return null
862
956
  return text.trim().split(/\r?\n/).filter((line) => line.trim()).length
863
957
  }
@@ -865,7 +959,7 @@ async function countSubstanceLines(state: PluginState, relativePath: string): Pr
865
959
  async function validateMinimumLines(state: PluginState, relativePath: string, minLines: number, missing: string[]) {
866
960
  const lines = await countSubstanceLines(state, relativePath)
867
961
  if (lines === null) return
868
- if (lines < minLines) missing.push(`${materializeTaskPath(state, relativePath)}: expected at least ${minLines} substance lines (found ${lines})`)
962
+ if (lines < minLines) missing.push(`${materializeWorkflowPath(state, relativePath)}: expected at least ${minLines} substance lines (found ${lines})`)
869
963
  }
870
964
 
871
965
  async function validatePhaseOneContractConsistency(state: PluginState, missing: string[]) {
@@ -920,7 +1014,7 @@ function extractCoverageScore(text: string): number | null {
920
1014
  }
921
1015
 
922
1016
  async function fileExistsAt(state: PluginState, relativePath: string): Promise<boolean> {
923
- return fileExists(path.join(state.workflowRoot, materializeTaskPath(state, relativePath)))
1017
+ return fileExists(path.join(state.workflowRoot, materializeWorkflowPath(state, relativePath)))
924
1018
  }
925
1019
 
926
1020
  export async function validateDeterministicPhaseRequirements(
@@ -967,13 +1061,13 @@ export async function validateDeterministicPhaseRequirements(
967
1061
  }
968
1062
 
969
1063
  if (phase === "phase_5") {
970
- await validateMinimumLines(state, "task/.tmp/audit_report-1.md", 150, missing)
971
- await validateMinimumLines(state, "task/.tmp/audit_report-2.md", 150, missing)
972
- await validateMinimumLines(state, "task/.tmp/audit_report-1-fix_check.md", 20, missing)
973
- await validateMinimumLines(state, "task/.tmp/audit_report-2-fix_check.md", 20, missing)
974
- await validateMinimumLines(state, "task/.tmp/test_coverage_and_readme_audit_report.md", 15, missing)
1064
+ await validateMinimumLines(state, `${state.evaluationReportsRoot}/audit_report-1.md`, 150, missing)
1065
+ await validateMinimumLines(state, `${state.evaluationReportsRoot}/audit_report-2.md`, 150, missing)
1066
+ await validateMinimumLines(state, `${state.evaluationReportsRoot}/audit_report-1-fix_check.md`, 20, missing)
1067
+ await validateMinimumLines(state, `${state.evaluationReportsRoot}/audit_report-2-fix_check.md`, 20, missing)
1068
+ await validateMinimumLines(state, `${state.evaluationReportsRoot}/test_coverage_and_readme_audit_report.md`, 15, missing)
975
1069
 
976
- const coverageArtifact = materializeTaskPath(state, "task/.tmp/test_coverage_and_readme_audit_report.md")
1070
+ const coverageArtifact = `${state.evaluationReportsRoot}/test_coverage_and_readme_audit_report.md`
977
1071
  const coveragePath = path.join(state.workflowRoot, coverageArtifact)
978
1072
  const coverageText = await readTextIfExists(coveragePath)
979
1073
  if (coverageText) {
@@ -995,8 +1089,8 @@ export async function validateDeterministicPhaseRequirements(
995
1089
  }
996
1090
 
997
1091
  for (const cycle of [1, 2]) {
998
- const keptReportPath = materializeTaskPath(state, `task/.tmp/audit_report-${cycle}.md`)
999
- const fixCheckPath = materializeTaskPath(state, `task/.tmp/audit_report-${cycle}-fix_check.md`)
1092
+ const keptReportPath = `${state.evaluationReportsRoot}/audit_report-${cycle}.md`
1093
+ const fixCheckPath = `${state.evaluationReportsRoot}/audit_report-${cycle}-fix_check.md`
1000
1094
  const attempts = state.evaluation.auditAttempts.filter((attempt) => attempt.cycle === cycle)
1001
1095
  const kept = attempts.filter((attempt) => attempt.kept)
1002
1096
  const fixChecks = state.evaluation.fixChecks.filter((fixCheck) => fixCheck.cycle === cycle)
@@ -1017,12 +1111,12 @@ export async function validateDeterministicPhaseRequirements(
1017
1111
  } else {
1018
1112
  const keptAttempt = kept[0]!
1019
1113
  const verdict = keptAttempt.verdict.toLowerCase().replace(/[\s-]+/g, "_")
1020
- if (keptAttempt.reportPath !== keptReportPath) missing.push(`evaluation.auditAttempts: cycle ${cycle} kept report path must be ${keptReportPath}`)
1114
+ if (canonicalEvaluationReportPath(state, keptAttempt.reportPath) !== keptReportPath) missing.push(`evaluation.auditAttempts: cycle ${cycle} kept report path must be ${keptReportPath}`)
1021
1115
  if (verdict !== "pass" && verdict !== "partial_pass" && verdict !== "partial") missing.push(`evaluation.auditAttempts: cycle ${cycle} kept verdict must be Pass or Partial Pass`)
1022
1116
  if (fixChecks.length === 0) {
1023
1117
  missing.push(`evaluation.fixChecks: cycle ${cycle} has no recorded fix-check`)
1024
1118
  } else {
1025
- const matchingFixCheck = fixChecks.find((fixCheck) => fixCheck.reportPath === fixCheckPath)
1119
+ const matchingFixCheck = fixChecks.find((fixCheck) => canonicalEvaluationReportPath(state, fixCheck.reportPath) === fixCheckPath)
1026
1120
  if (!matchingFixCheck) missing.push(`evaluation.fixChecks: cycle ${cycle} missing ${fixCheckPath}`)
1027
1121
  if (matchingFixCheck && matchingFixCheck.evaluatorSessionID !== keptAttempt.evaluatorSessionID) {
1028
1122
  missing.push(`evaluation.fixChecks: cycle ${cycle} fix-check must use the same evaluator session as the kept report`)
@@ -1216,7 +1310,7 @@ export async function validateArtifactContent(
1216
1310
  const scope = artifacts ? new Set(artifacts) : null
1217
1311
 
1218
1312
  for (const check of CONTENT_CHECKS) {
1219
- const artifact = materializeTaskPath(state, check.artifact)
1313
+ const artifact = materializeWorkflowPath(state, check.artifact)
1220
1314
  if (scope && !scope.has(check.artifact) && !scope.has(artifact)) continue
1221
1315
  const p = path.join(state.workflowRoot, artifact)
1222
1316
 
package/plugin/types.ts CHANGED
@@ -27,6 +27,17 @@ export interface SessionRecord {
27
27
  at?: string
28
28
  closedAt?: string
29
29
  source?: "tool" | "event"
30
+ phases?: string[]
31
+ }
32
+
33
+ export interface SessionPhaseRecord {
34
+ sessionID: string
35
+ lane: string
36
+ phase: string
37
+ purpose: string
38
+ source: "tool" | "event"
39
+ firstSeenAt: string
40
+ lastSeenAt: string
30
41
  }
31
42
 
32
43
  export interface Blocker {
@@ -105,6 +116,8 @@ export interface AuditAttemptRecord {
105
116
  kept: boolean
106
117
  archived: boolean
107
118
  at: string
119
+ promptKind?: string | null
120
+ preparedPacketPath?: string | null
108
121
  }
109
122
 
110
123
  export interface AuditFixCheckRecord {
@@ -131,6 +144,17 @@ export interface ReadinessState {
131
144
  acceptedLightHigh: AcceptedLightHighRisk[]
132
145
  d1d9: Record<ReadinessD1D9Key, ReadinessD1D9Status>
133
146
  evidencePath: string | null
147
+ evidence: ReadinessEvidenceRecord[]
148
+ environment: Record<string, string>
149
+ }
150
+
151
+ export interface ReadinessEvidenceRecord {
152
+ key: string
153
+ status: string
154
+ command: string
155
+ artifact: string
156
+ note: string
157
+ at: string
134
158
  }
135
159
 
136
160
  export interface PluginState {
@@ -152,6 +176,7 @@ export interface PluginState {
152
176
  evaluatorSessions: SessionRecord[]
153
177
  claudeSessions: SessionRecord[]
154
178
  generalSessions: SessionRecord[]
179
+ phaseSessions: SessionPhaseRecord[]
155
180
  primaryDevelopSessionId: string | null
156
181
  latestDevelopSessionId: string | null
157
182
  nextDevelopSessionNumber: number
@@ -168,6 +193,7 @@ export interface PluginState {
168
193
  interruptions: Interruption[]
169
194
  ownerAbortRequestedAt: string | null
170
195
  autoContinueCounts: Record<string, number>
196
+ autoContinueBlockerSignature: string | null
171
197
  internalEvaluator: InternalEvaluatorState
172
198
  evaluation: EvaluationState
173
199
  readiness: ReadinessState
@@ -189,6 +215,7 @@ export interface StateSnapshot {
189
215
  evaluatorSessions: SessionRecord[]
190
216
  claudeSessions: SessionRecord[]
191
217
  generalSessions: SessionRecord[]
218
+ phaseSessions: SessionPhaseRecord[]
192
219
  primaryDevelopSessionId: string | null
193
220
  latestDevelopSessionId: string | null
194
221
  nextDevelopSessionNumber: number
@@ -204,6 +231,7 @@ export interface StateSnapshot {
204
231
  interruptions: Interruption[]
205
232
  ownerAbortRequestedAt: string | null
206
233
  autoContinueCounts: Record<string, number>
234
+ autoContinueBlockerSignature: string | null
207
235
  internalEvaluator: InternalEvaluatorState
208
236
  evaluation: EvaluationState
209
237
  readiness: ReadinessState
@@ -329,15 +357,9 @@ export const ARTIFACT_PATHS: Record<string, Record<string, string[]>> = {
329
357
  verification_done: [],
330
358
  },
331
359
  phase_5: {
332
- cycle_1_complete: [
333
- "task/.tmp/audit_report-1.md",
334
- "task/.tmp/audit_report-1-fix_check.md",
335
- ],
336
- cycle_2_complete: [
337
- "task/.tmp/audit_report-2.md",
338
- "task/.tmp/audit_report-2-fix_check.md",
339
- ],
340
- coverage_audit: ["task/.tmp/test_coverage_and_readme_audit_report.md"],
360
+ cycle_1_complete: [],
361
+ cycle_2_complete: [],
362
+ coverage_audit: [],
341
363
  },
342
364
  phase_6: {
343
365
  readiness_done: [],