@smartmemory/compose 0.2.7-beta → 0.2.8-beta
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/compose.js +38 -3
- package/contracts/gsd-stuck.json +141 -0
- package/lib/budget-ledger.js +84 -0
- package/lib/build-stream-schema.js +5 -3
- package/lib/build.js +87 -1
- package/lib/feature-validator.js +40 -8
- package/lib/gsd-budget.js +205 -0
- package/lib/gsd-stuck.js +275 -0
- package/lib/gsd.js +499 -8
- package/package.json +1 -1
package/bin/compose.js
CHANGED
|
@@ -1969,14 +1969,22 @@ if (cmd === 'build') {
|
|
|
1969
1969
|
// pipeline (pipelines/gsd.stratum.yaml). Hard-requires existing
|
|
1970
1970
|
// docs/features/<code>/blueprint.md with a parseable Boundary Map.
|
|
1971
1971
|
const gsdCode = args.find(a => !a.startsWith('-'))
|
|
1972
|
+
const gsdResume = args.includes('--resume')
|
|
1973
|
+
const gsdResetBudget = args.includes('--reset-budget')
|
|
1972
1974
|
if (!gsdCode) {
|
|
1973
|
-
console.error('Usage: compose gsd <feature-code>')
|
|
1975
|
+
console.error('Usage: compose gsd <feature-code> [--resume] [--reset-budget]')
|
|
1974
1976
|
console.error('')
|
|
1975
1977
|
console.error('Runs the per-task fresh-context dispatch pipeline (COMP-GSD-2).')
|
|
1976
1978
|
console.error('Hard-requires docs/features/<code>/blueprint.md with a valid Boundary Map.')
|
|
1979
|
+
console.error('Detects stuck tasks (COMP-GSD-5) and halts with a structured diagnostic.')
|
|
1980
|
+
console.error('Enforces budget ceilings (COMP-GSD-4) from .compose/compose.json gsd.budget.*')
|
|
1977
1981
|
console.error('')
|
|
1978
1982
|
console.error('Options:')
|
|
1979
|
-
console.error(' --
|
|
1983
|
+
console.error(' --resume Resume a halted run: re-dispatch the unfinished tasks')
|
|
1984
|
+
console.error(' from .compose/gsd/<code>/pause.json (skips completed tasks).')
|
|
1985
|
+
console.error(' --reset-budget Clear the feature\'s cumulative budget ledger before running')
|
|
1986
|
+
console.error(' (use after raising or removing a spent gsd.budget.cumulative cap).')
|
|
1987
|
+
console.error(' --cwd <path> Working directory (defaults to current)')
|
|
1980
1988
|
process.exit(1)
|
|
1981
1989
|
}
|
|
1982
1990
|
const { root: gsdCwd } = resolveCwdWithWorkspace(args)
|
|
@@ -1984,7 +1992,34 @@ if (cmd === 'build') {
|
|
|
1984
1992
|
const gsdAgentCwd = cwdIdx !== -1 ? resolve(args[cwdIdx + 1]) : gsdCwd
|
|
1985
1993
|
const { runGsd } = await import('../lib/gsd.js')
|
|
1986
1994
|
try {
|
|
1987
|
-
|
|
1995
|
+
if (gsdResetBudget) {
|
|
1996
|
+
// COMP-GSD-4: clear the cumulative ledger so a spent ceiling no longer
|
|
1997
|
+
// refuses the run. Runs before dispatch; per-run windows reset anyway.
|
|
1998
|
+
const { resetGsdUsage } = await import('../lib/budget-ledger.js')
|
|
1999
|
+
resetGsdUsage(resolve(gsdAgentCwd, '.compose'), gsdCode)
|
|
2000
|
+
console.log(`gsd: cleared cumulative budget ledger for ${gsdCode}.`)
|
|
2001
|
+
}
|
|
2002
|
+
const result = await runGsd(gsdCode, { cwd: gsdAgentCwd, resume: gsdResume })
|
|
2003
|
+
if (result.status === 'stuck') {
|
|
2004
|
+
// COMP-GSD-5: a stuck halt is a clean, recoverable stop — not a crash.
|
|
2005
|
+
console.error(`gsd stuck: task ${result.stuckTaskId} tripped the ${result.signal} detector.`)
|
|
2006
|
+
console.error(`Diagnostic: .compose/gsd/${gsdCode}/stuck.md`)
|
|
2007
|
+
console.error(`Resume with: compose gsd ${gsdCode} --resume`)
|
|
2008
|
+
process.exit(2)
|
|
2009
|
+
}
|
|
2010
|
+
if (result.status === 'budget') {
|
|
2011
|
+
// COMP-GSD-4: a budget halt is a clean, recoverable stop — not a crash.
|
|
2012
|
+
if (result.axis === 'cumulative') {
|
|
2013
|
+
console.error(`gsd budget: cumulative ceiling for ${gsdCode} is already spent.`)
|
|
2014
|
+
console.error(`Diagnostic: .compose/gsd/${gsdCode}/budget.md`)
|
|
2015
|
+
console.error(`Raise gsd.budget.cumulative.* or clear it: compose gsd ${gsdCode} --reset-budget`)
|
|
2016
|
+
} else {
|
|
2017
|
+
console.error(`gsd budget: the ${result.axis} ceiling tripped mid-run.`)
|
|
2018
|
+
console.error(`Diagnostic: .compose/gsd/${gsdCode}/budget.md`)
|
|
2019
|
+
console.error(`Raise gsd.budget.* and resume: compose gsd ${gsdCode} --resume`)
|
|
2020
|
+
}
|
|
2021
|
+
process.exit(2)
|
|
2022
|
+
}
|
|
1988
2023
|
console.log(`gsd complete: ${result.blackboardEntries} task results captured.`)
|
|
1989
2024
|
} catch (err) {
|
|
1990
2025
|
console.error(`gsd failed: ${err.message}`)
|
|
@@ -0,0 +1,141 @@
|
|
|
1
|
+
{
|
|
2
|
+
"$schema": "http://json-schema.org/draft-07/schema#",
|
|
3
|
+
"$id": "gsd-stuck.json",
|
|
4
|
+
"_source": "COMP-GSD-5",
|
|
5
|
+
"_also": "COMP-GSD-4",
|
|
6
|
+
"_roadmap": "COMP-GSD-5",
|
|
7
|
+
"title": "GsdStuckContracts",
|
|
8
|
+
"description": "Artifacts written when a `compose gsd` run halts. A STUCK halt (COMP-GSD-5) writes stuck.json/stuck.md; a BUDGET halt (COMP-GSD-4) writes budget.json/budget.md. Both persist `pause.json` (machine-readable resume state); `compose gsd <feature> --resume` reads it, validates ownership/mode, and re-dispatches decomposedTasks minus completedTaskIds (completed results already live in the blackboard). The pause `kind` field (optional; absent ⇒ 'stuck' for back-compat) discriminates: a 'stuck' pause carries the stuck-specific fields, a 'budget' pause carries the `budget` block. GSD-6 reuses the `pause` shape for automatic crash-recovery.",
|
|
9
|
+
"definitions": {
|
|
10
|
+
"stuck": {
|
|
11
|
+
"type": "object",
|
|
12
|
+
"title": "GsdStuckDiagnostic",
|
|
13
|
+
"description": "Structured diagnostic emitted on the first stuck verdict for a task.",
|
|
14
|
+
"required": ["feature", "taskId", "signal", "detail", "attemptCounts", "ts"],
|
|
15
|
+
"additionalProperties": false,
|
|
16
|
+
"properties": {
|
|
17
|
+
"feature": {
|
|
18
|
+
"type": "string",
|
|
19
|
+
"description": "Feature code the gsd run is building (e.g. COMP-GSD-5)."
|
|
20
|
+
},
|
|
21
|
+
"taskId": {
|
|
22
|
+
"type": "string",
|
|
23
|
+
"description": "The decomposed task id that tripped the detector."
|
|
24
|
+
},
|
|
25
|
+
"signal": {
|
|
26
|
+
"type": "string",
|
|
27
|
+
"enum": ["same_file", "error_recurrence", "no_progress", "wall_clock"],
|
|
28
|
+
"description": "Which stuck pattern fired. same_file: one file edited >= sameFileEdits times. error_recurrence: a normalized error hash recurred >= errorRepeats. no_progress: >= noProgressCalls consecutive non-file-changing tool calls. wall_clock: task ran >= wallClockMs without finishing."
|
|
29
|
+
},
|
|
30
|
+
"detail": {
|
|
31
|
+
"type": "string",
|
|
32
|
+
"description": "Human-readable explanation of the offending file / error / stall."
|
|
33
|
+
},
|
|
34
|
+
"attemptCounts": {
|
|
35
|
+
"type": "object",
|
|
36
|
+
"description": "Snapshot of the per-task counters at the moment of the verdict.",
|
|
37
|
+
"properties": {
|
|
38
|
+
"sameFileEdits": { "type": "integer", "minimum": 0, "description": "Max edit count across files for this task." },
|
|
39
|
+
"errorRepeats": { "type": "integer", "minimum": 0, "description": "Max repeat count across normalized error hashes." },
|
|
40
|
+
"noProgressCalls": { "type": "integer", "minimum": 0, "description": "Current consecutive non-file-changing tool-call count." }
|
|
41
|
+
},
|
|
42
|
+
"additionalProperties": true
|
|
43
|
+
},
|
|
44
|
+
"partialDiff": {
|
|
45
|
+
"type": "string",
|
|
46
|
+
"description": "Optional unified diff of the stuck task's worktree at halt time, for triage."
|
|
47
|
+
},
|
|
48
|
+
"ts": {
|
|
49
|
+
"type": "string",
|
|
50
|
+
"format": "date-time",
|
|
51
|
+
"description": "ISO-8601 timestamp of the verdict."
|
|
52
|
+
}
|
|
53
|
+
}
|
|
54
|
+
},
|
|
55
|
+
"pause": {
|
|
56
|
+
"type": "object",
|
|
57
|
+
"title": "GsdPauseState",
|
|
58
|
+
"description": "Resume state persisted on a stuck OR budget halt. Drives blackboard-driven re-dispatch on --resume — NOT mid-task re-entry. Base required fields are kind-agnostic; the `kind` field (optional; absent ⇒ stuck) selects which extra fields are required via the if/then/else at the end.",
|
|
59
|
+
"required": ["flowId", "stepId", "decomposedTasks", "completedTaskIds", "pid", "mode", "ts"],
|
|
60
|
+
"additionalProperties": false,
|
|
61
|
+
"properties": {
|
|
62
|
+
"kind": {
|
|
63
|
+
"type": "string",
|
|
64
|
+
"enum": ["stuck", "budget"],
|
|
65
|
+
"description": "Halt kind. Optional for back-compat: an absent kind is interpreted as 'stuck'. Determines which extra fields are required (see if/then/else)."
|
|
66
|
+
},
|
|
67
|
+
"budget": {
|
|
68
|
+
"type": "object",
|
|
69
|
+
"description": "Present on a budget halt (kind='budget'). The enforced axis that tripped + the stratum budget_state snapshot.",
|
|
70
|
+
"required": ["axis", "caps", "consumed"],
|
|
71
|
+
"additionalProperties": true,
|
|
72
|
+
"properties": {
|
|
73
|
+
"axis": {
|
|
74
|
+
"type": ["string", "null"],
|
|
75
|
+
"enum": ["ms", "max_agent_dispatches", "max_tokens", "usd", null],
|
|
76
|
+
"description": "Which enforced stratum axis reached its cap (null if indeterminate)."
|
|
77
|
+
},
|
|
78
|
+
"caps": { "type": "object", "description": "The flow budget caps {ms?,max_agent_dispatches?,max_tokens?,usd?}." },
|
|
79
|
+
"consumed": { "type": "object", "description": "Consumed totals {tokens,dispatches,wall_s,dollars}." }
|
|
80
|
+
}
|
|
81
|
+
},
|
|
82
|
+
"flowId": {
|
|
83
|
+
"type": "string",
|
|
84
|
+
"description": "Stratum flow id of the halted run (informational; resume opens a fresh flow)."
|
|
85
|
+
},
|
|
86
|
+
"stepId": {
|
|
87
|
+
"type": "string",
|
|
88
|
+
"description": "The parallel_dispatch step id that was cancelled (e.g. execute)."
|
|
89
|
+
},
|
|
90
|
+
"stuckTaskId": {
|
|
91
|
+
"type": "string",
|
|
92
|
+
"description": "The task id that tripped the detector."
|
|
93
|
+
},
|
|
94
|
+
"signal": {
|
|
95
|
+
"type": "string",
|
|
96
|
+
"enum": ["same_file", "error_recurrence", "no_progress", "wall_clock"],
|
|
97
|
+
"description": "Which stuck pattern fired (mirrors stuck.signal)."
|
|
98
|
+
},
|
|
99
|
+
"detail": {
|
|
100
|
+
"type": "string",
|
|
101
|
+
"description": "Human-readable explanation (mirrors stuck.detail)."
|
|
102
|
+
},
|
|
103
|
+
"decomposedTasks": {
|
|
104
|
+
"type": "array",
|
|
105
|
+
"minItems": 1,
|
|
106
|
+
"description": "The full decomposed task list, persisted so --resume does NOT re-decompose (stable task ids).",
|
|
107
|
+
"items": { "type": "object" }
|
|
108
|
+
},
|
|
109
|
+
"completedTaskIds": {
|
|
110
|
+
"type": "array",
|
|
111
|
+
"description": "Task ids whose VALIDATED result is already in the blackboard. --resume skips these.",
|
|
112
|
+
"items": { "type": "string" }
|
|
113
|
+
},
|
|
114
|
+
"pid": {
|
|
115
|
+
"type": "integer",
|
|
116
|
+
"description": "OS pid of the process that wrote the pause file. --resume refuses if this pid is still alive (another owner)."
|
|
117
|
+
},
|
|
118
|
+
"mode": {
|
|
119
|
+
"type": "string",
|
|
120
|
+
"const": "gsd",
|
|
121
|
+
"description": "Run mode. --resume refuses if this is not 'gsd' (mirrors `compose fix --resume` mode guard)."
|
|
122
|
+
},
|
|
123
|
+
"ts": {
|
|
124
|
+
"type": "string",
|
|
125
|
+
"format": "date-time",
|
|
126
|
+
"description": "ISO-8601 timestamp the pause file was written."
|
|
127
|
+
}
|
|
128
|
+
},
|
|
129
|
+
"if": {
|
|
130
|
+
"properties": { "kind": { "const": "budget" } },
|
|
131
|
+
"required": ["kind"]
|
|
132
|
+
},
|
|
133
|
+
"then": {
|
|
134
|
+
"required": ["budget"]
|
|
135
|
+
},
|
|
136
|
+
"else": {
|
|
137
|
+
"required": ["stuckTaskId", "signal", "detail"]
|
|
138
|
+
}
|
|
139
|
+
}
|
|
140
|
+
}
|
|
141
|
+
}
|
package/lib/budget-ledger.js
CHANGED
|
@@ -85,6 +85,90 @@ export function checkCumulativeBudget(composeDir, featureCode, limits = {}) {
|
|
|
85
85
|
return { exceeded: false, reason: null, usage };
|
|
86
86
|
}
|
|
87
87
|
|
|
88
|
+
// ===========================================================================
|
|
89
|
+
// COMP-GSD-4: cumulative gsd-run usage (tokens + cost). Shares the per-feature
|
|
90
|
+
// ledger entry with COMP-BUDGET's iteration tracking, adding two
|
|
91
|
+
// back-compatible fields (totalTokens/totalCostUsd; absent on legacy entries
|
|
92
|
+
// reads as 0). Wall-clock/dispatch are per-RUN windows enforced by the stratum
|
|
93
|
+
// flow budget — NOT cumulative-checked here (design Decision 3).
|
|
94
|
+
// ===========================================================================
|
|
95
|
+
|
|
96
|
+
/**
|
|
97
|
+
* Record a gsd run's token/cost (and informational dispatch/time) usage,
|
|
98
|
+
* sourced from the stratum terminal envelope's budget_state.consumed.
|
|
99
|
+
*
|
|
100
|
+
* @param {string} composeDir — path to .compose directory
|
|
101
|
+
* @param {string} featureCode
|
|
102
|
+
* @param {{ tokens?: number, costUsd?: number, dispatches?: number, timeMs?: number }} usage
|
|
103
|
+
*/
|
|
104
|
+
export function recordGsdUsage(composeDir, featureCode, { tokens = 0, costUsd = 0, dispatches = 0, timeMs = 0 } = {}) {
|
|
105
|
+
const filePath = ledgerPath(composeDir);
|
|
106
|
+
const ledger = readLedger(composeDir);
|
|
107
|
+
|
|
108
|
+
if (!ledger.features[featureCode]) {
|
|
109
|
+
ledger.features[featureCode] = { totalIterations: 0, totalActions: 0, totalTimeMs: 0, sessions: [] };
|
|
110
|
+
}
|
|
111
|
+
const feat = ledger.features[featureCode];
|
|
112
|
+
feat.totalTokens = (feat.totalTokens ?? 0) + tokens;
|
|
113
|
+
feat.totalCostUsd = (feat.totalCostUsd ?? 0) + costUsd;
|
|
114
|
+
feat.totalTimeMs = (feat.totalTimeMs ?? 0) + timeMs;
|
|
115
|
+
feat.sessions.push({ recordedAt: new Date().toISOString(), kind: 'gsd', tokens, costUsd, dispatches, timeMs });
|
|
116
|
+
|
|
117
|
+
fs.mkdirSync(path.dirname(filePath), { recursive: true });
|
|
118
|
+
fs.writeFileSync(filePath, JSON.stringify(ledger, null, 2), 'utf-8');
|
|
119
|
+
return feat;
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
/**
|
|
123
|
+
* Check whether a feature has exceeded its cumulative gsd token/cost ceiling.
|
|
124
|
+
* Cumulative tokens/cost persist across sessions (a hard ceiling that blocks
|
|
125
|
+
* resume); per-run wall-clock/dispatch reset each run, so they are not checked.
|
|
126
|
+
*
|
|
127
|
+
* @param {string} composeDir
|
|
128
|
+
* @param {string} featureCode
|
|
129
|
+
* @param {{ maxTotalTokens?: number, maxTotalCostUsd?: number }} limits
|
|
130
|
+
* @returns {{ exceeded: boolean, reason: string|null, usage: object }}
|
|
131
|
+
*/
|
|
132
|
+
export function checkGsdCumulativeBudget(composeDir, featureCode, limits = {}) {
|
|
133
|
+
const ledger = readLedger(composeDir);
|
|
134
|
+
const feat = ledger.features[featureCode] ?? {};
|
|
135
|
+
const usage = {
|
|
136
|
+
totalTokens: feat.totalTokens ?? 0,
|
|
137
|
+
totalCostUsd: feat.totalCostUsd ?? 0,
|
|
138
|
+
};
|
|
139
|
+
|
|
140
|
+
if (limits.maxTotalTokens != null && usage.totalTokens >= limits.maxTotalTokens) {
|
|
141
|
+
return { exceeded: true, reason: `Cumulative token ceiling reached (${usage.totalTokens}/${limits.maxTotalTokens})`, usage };
|
|
142
|
+
}
|
|
143
|
+
if (limits.maxTotalCostUsd != null && usage.totalCostUsd >= limits.maxTotalCostUsd) {
|
|
144
|
+
return { exceeded: true, reason: `Cumulative cost ceiling reached ($${usage.totalCostUsd.toFixed(4)}/$${Number(limits.maxTotalCostUsd).toFixed(4)})`, usage };
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
return { exceeded: false, reason: null, usage };
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
/**
|
|
151
|
+
* Clear the cumulative gsd usage for a feature (the `--reset-budget` path).
|
|
152
|
+
* Preserves COMP-BUDGET iteration fields; zeroes only the gsd token/cost
|
|
153
|
+
* counters and drops gsd sessions. No-op when the feature has no ledger entry.
|
|
154
|
+
*
|
|
155
|
+
* @param {string} composeDir
|
|
156
|
+
* @param {string} featureCode
|
|
157
|
+
*/
|
|
158
|
+
export function resetGsdUsage(composeDir, featureCode) {
|
|
159
|
+
const filePath = ledgerPath(composeDir);
|
|
160
|
+
const ledger = readLedger(composeDir);
|
|
161
|
+
const feat = ledger.features[featureCode];
|
|
162
|
+
if (!feat) return;
|
|
163
|
+
feat.totalTokens = 0;
|
|
164
|
+
feat.totalCostUsd = 0;
|
|
165
|
+
if (Array.isArray(feat.sessions)) {
|
|
166
|
+
feat.sessions = feat.sessions.filter((s) => s.kind !== 'gsd');
|
|
167
|
+
}
|
|
168
|
+
fs.mkdirSync(path.dirname(filePath), { recursive: true });
|
|
169
|
+
fs.writeFileSync(filePath, JSON.stringify(ledger, null, 2), 'utf-8');
|
|
170
|
+
}
|
|
171
|
+
|
|
88
172
|
/**
|
|
89
173
|
* Read a snapshot budget response for a feature without throwing on quota.
|
|
90
174
|
* Used by GET /api/lifecycle/budget.
|
|
@@ -7,15 +7,17 @@
|
|
|
7
7
|
* Design decisions:
|
|
8
8
|
* - Uses AJV (already in compose deps) compiled once at module load.
|
|
9
9
|
* - On validation failure the caller should warn+drop — never throw.
|
|
10
|
-
* - KNOWN_VERSIONS: set of accepted schema_version strings. v0.2.5 accepted
|
|
11
|
-
*
|
|
10
|
+
* - KNOWN_VERSIONS: set of accepted schema_version strings. v0.2.5/v0.2.6 accepted
|
|
11
|
+
* for backward compatibility; v0.2.7 is current (STRAT-PAR-STREAM-TOOLDETAIL:
|
|
12
|
+
* enriched tool_use_summary.input + tool_use_id, new tool_result kind — both
|
|
13
|
+
* ride the open catch-all, so no closed metadata schema is added here).
|
|
12
14
|
* - reply_required (Option A, STRAT-PAR-STREAM-CONSUMER-VALIDATE design):
|
|
13
15
|
* optional boolean reserved for future gate/permission/question kinds.
|
|
14
16
|
*/
|
|
15
17
|
|
|
16
18
|
import Ajv2020 from 'ajv/dist/2020.js';
|
|
17
19
|
|
|
18
|
-
export const KNOWN_VERSIONS = new Set(['0.2.5', '0.2.6']);
|
|
20
|
+
export const KNOWN_VERSIONS = new Set(['0.2.5', '0.2.6', '0.2.7']);
|
|
19
21
|
|
|
20
22
|
// ---------------------------------------------------------------------------
|
|
21
23
|
// Envelope schema (top-level fields only; metadata shape is kind-specific)
|
package/lib/build.js
CHANGED
|
@@ -2947,12 +2947,19 @@ export async function executeParallelDispatchServer(
|
|
|
2947
2947
|
progress,
|
|
2948
2948
|
streamWriter,
|
|
2949
2949
|
baseCwd,
|
|
2950
|
+
opts = {},
|
|
2950
2951
|
) {
|
|
2951
2952
|
const { flow_id: flowId, step_id: stepId,
|
|
2952
2953
|
step_number: stepNum, total_steps: totalSteps,
|
|
2953
2954
|
tasks } = dispatchResponse;
|
|
2954
2955
|
const emittedStates = new Map();
|
|
2955
2956
|
|
|
2957
|
+
// COMP-GSD-5: optional stuck detector. ONLY the gsd path passes one; build
|
|
2958
|
+
// mode invokes this fn with 6 args (opts={}) so every detector branch below
|
|
2959
|
+
// is skipped and build behavior stays byte-identical.
|
|
2960
|
+
const stuckDetector = opts.stuckDetector ?? null;
|
|
2961
|
+
const startedTasks = new Set();
|
|
2962
|
+
|
|
2956
2963
|
if (streamWriter) {
|
|
2957
2964
|
streamWriter.write({
|
|
2958
2965
|
type: 'build_step_start', stepId,
|
|
@@ -2982,11 +2989,18 @@ export async function executeParallelDispatchServer(
|
|
|
2982
2989
|
// state-machine driver. Forward valid events through streamWriter so the
|
|
2983
2990
|
// bridge rebroadcasts them via SSE under the buildStreamEvent wrapper.
|
|
2984
2991
|
let unsubscribePush = null;
|
|
2985
|
-
if (typeof stratum.onEvent === 'function' && streamWriter) {
|
|
2992
|
+
if (typeof stratum.onEvent === 'function' && (streamWriter || stuckDetector)) {
|
|
2986
2993
|
unsubscribePush = stratum.onEvent(flowId, stepId, (event) => {
|
|
2987
2994
|
// Accept all KNOWN_VERSIONS (producer emits 0.2.6); pinning '0.2.5' dropped
|
|
2988
2995
|
// every push event from the current producer. Client already validated.
|
|
2989
2996
|
if (!event || !KNOWN_VERSIONS.has(event.schema_version)) return;
|
|
2997
|
+
// COMP-GSD-5: feed the per-task stuck detector (no-op in build mode —
|
|
2998
|
+
// stuckDetector is null there). record() ignores all but
|
|
2999
|
+
// tool_use_summary/tool_result and keys by event.task_id.
|
|
3000
|
+
if (stuckDetector && (event.kind === 'tool_use_summary' || event.kind === 'tool_result')) {
|
|
3001
|
+
try { stuckDetector.record(event); } catch (err) { console.error('[build] stuck detector record failed:', err); }
|
|
3002
|
+
}
|
|
3003
|
+
if (!streamWriter) return;
|
|
2990
3004
|
try {
|
|
2991
3005
|
streamWriter.write({ type: 'build_stream_event', event });
|
|
2992
3006
|
} catch (err) {
|
|
@@ -2998,6 +3012,7 @@ export async function executeParallelDispatchServer(
|
|
|
2998
3012
|
try {
|
|
2999
3013
|
// Poll until outcome is present (NOT can_advance — see design §3)
|
|
3000
3014
|
let pollResult;
|
|
3015
|
+
let stuckVerdict = null;
|
|
3001
3016
|
const intervalMs = SERVER_DISPATCH_POLL_MS();
|
|
3002
3017
|
while (true) {
|
|
3003
3018
|
pollResult = await stratum.parallelPoll(flowId, stepId);
|
|
@@ -3007,10 +3022,81 @@ export async function executeParallelDispatchServer(
|
|
|
3007
3022
|
);
|
|
3008
3023
|
}
|
|
3009
3024
|
emitPerTaskProgress(streamWriter, pollResult, emittedStates);
|
|
3025
|
+
|
|
3026
|
+
// COMP-GSD-5: real-time stuck detection (gsd path only — null in build).
|
|
3027
|
+
// For each running task: establish a wall-clock baseline on first sight,
|
|
3028
|
+
// then ask the detector for a verdict. On the first stuck task, cancel the
|
|
3029
|
+
// step via the terminal cascade primitive (parallelAdvance 'conflict' —
|
|
3030
|
+
// the same path T2-F5 uses) and break with a stuck outcome.
|
|
3031
|
+
if (stuckDetector) {
|
|
3032
|
+
const now = Date.now();
|
|
3033
|
+
for (const [taskId, ts] of Object.entries(pollResult.tasks ?? {})) {
|
|
3034
|
+
if (ts.state === 'running') stuckDetector.startTask(taskId, now);
|
|
3035
|
+
if (ts.state !== 'running') continue;
|
|
3036
|
+
const v = stuckDetector.check(taskId, now);
|
|
3037
|
+
if (v?.stuck) {
|
|
3038
|
+
stuckVerdict = {
|
|
3039
|
+
...v,
|
|
3040
|
+
taskId,
|
|
3041
|
+
attemptCounts: stuckDetector.attemptCounts(taskId),
|
|
3042
|
+
};
|
|
3043
|
+
if (streamWriter) {
|
|
3044
|
+
streamWriter.write({
|
|
3045
|
+
type: 'system', subtype: 'gsd_stuck',
|
|
3046
|
+
stepId, taskId, signal: v.signal, detail: v.detail, parallel: true,
|
|
3047
|
+
});
|
|
3048
|
+
}
|
|
3049
|
+
const cancel = await stratum.parallelAdvance(flowId, stepId, 'conflict');
|
|
3050
|
+
if (cancel?.error) {
|
|
3051
|
+
throw new Error(
|
|
3052
|
+
`stratum_parallel_advance (stuck cancel) failed: ${cancel.error}: ${cancel.message || ''}`,
|
|
3053
|
+
);
|
|
3054
|
+
}
|
|
3055
|
+
pollResult = { ...pollResult, outcome: cancel };
|
|
3056
|
+
break;
|
|
3057
|
+
}
|
|
3058
|
+
}
|
|
3059
|
+
}
|
|
3060
|
+
if (stuckVerdict) break;
|
|
3061
|
+
|
|
3010
3062
|
if (pollResult.outcome != null) break;
|
|
3011
3063
|
await new Promise((resolve) => setTimeout(resolve, intervalMs));
|
|
3012
3064
|
}
|
|
3013
3065
|
|
|
3066
|
+
// COMP-GSD-5: short-circuit return on a stuck verdict. The cancel outcome is
|
|
3067
|
+
// returned verbatim with the verdict attached; the gsd run loop branches on
|
|
3068
|
+
// `.stuck`. This bypasses the merge/advance bookkeeping below — there is
|
|
3069
|
+
// nothing to merge for a cancelled task.
|
|
3070
|
+
if (stuckVerdict) {
|
|
3071
|
+
if (streamWriter) {
|
|
3072
|
+
streamWriter.write({
|
|
3073
|
+
type: 'build_step_done', stepId, parallel: true,
|
|
3074
|
+
summary: { ...(pollResult.summary ?? {}), stuck: stuckVerdict.signal }, flowId,
|
|
3075
|
+
});
|
|
3076
|
+
}
|
|
3077
|
+
// unsubscribePush is invoked by the enclosing finally.
|
|
3078
|
+
return { ...(pollResult.outcome ?? {}), stuck: stuckVerdict };
|
|
3079
|
+
}
|
|
3080
|
+
|
|
3081
|
+
// COMP-GSD-4: budget_exhausted is a stratum terminal status. When the run
|
|
3082
|
+
// budget trips mid-dispatch, the flow has already cascade-cancelled the
|
|
3083
|
+
// in-flight siblings (server-side) and the poll returns the terminal
|
|
3084
|
+
// envelope (carrying budget_state). There is nothing to merge — short-circuit
|
|
3085
|
+
// like stuck and hand the envelope back so the gsd run loop halts. No-op for
|
|
3086
|
+
// build mode: build flows declare no `budget:` block, so the status never
|
|
3087
|
+
// appears and this branch is never taken (byte-identical). The
|
|
3088
|
+
// advance-carried case (a parallelAdvance that returns budget_exhausted)
|
|
3089
|
+
// falls through to the final `return pollResult.outcome` below.
|
|
3090
|
+
if (pollResult.outcome?.status === 'budget_exhausted') {
|
|
3091
|
+
if (streamWriter) {
|
|
3092
|
+
streamWriter.write({
|
|
3093
|
+
type: 'build_step_done', stepId, parallel: true,
|
|
3094
|
+
summary: { ...(pollResult.summary ?? {}), budget_exhausted: true }, flowId,
|
|
3095
|
+
});
|
|
3096
|
+
}
|
|
3097
|
+
return pollResult.outcome;
|
|
3098
|
+
}
|
|
3099
|
+
|
|
3014
3100
|
if (pollResult.outcome.status === 'already_advanced') {
|
|
3015
3101
|
throw new Error(
|
|
3016
3102
|
`stratum_parallel_poll returned already_advanced for step ${stepId} — ` +
|
package/lib/feature-validator.js
CHANGED
|
@@ -385,6 +385,17 @@ function normalizeStatus(s) {
|
|
|
385
385
|
return String(s).toUpperCase();
|
|
386
386
|
}
|
|
387
387
|
|
|
388
|
+
// vision-state's status vocabulary (contracts/vision-state.schema.json) is the
|
|
389
|
+
// tracker's set MINUS `PARTIAL` — it cannot represent "partially shipped". A
|
|
390
|
+
// tracker status of PARTIAL is the same lifecycle reality as vision's
|
|
391
|
+
// IN_PROGRESS (partially shipped = still in progress), so project the tracker
|
|
392
|
+
// side onto the vision vocabulary before any *_VS_VISION_STATE comparison.
|
|
393
|
+
// Tracker↔tracker comparisons (ROADMAP_VS_FEATUREJSON) keep the full
|
|
394
|
+
// vocabulary — PARTIAL vs IN_PROGRESS there is a real distinction.
|
|
395
|
+
function projectToVisionStatus(s) {
|
|
396
|
+
return s === 'PARTIAL' ? 'IN_PROGRESS' : s;
|
|
397
|
+
}
|
|
398
|
+
|
|
388
399
|
function runStateMismatchChecks(fctx, findings) {
|
|
389
400
|
const { code, roadmap, vision, featureJson } = fctx;
|
|
390
401
|
const rStatus = normalizeStatus(roadmap?.status);
|
|
@@ -401,22 +412,43 @@ function runStateMismatchChecks(fctx, findings) {
|
|
|
401
412
|
'STATUS_MISMATCH_ROADMAP_VS_FEATUREJSON', code,
|
|
402
413
|
`ROADMAP says ${rStatus}, feature.json says ${fStatus}`));
|
|
403
414
|
}
|
|
404
|
-
|
|
405
|
-
|
|
415
|
+
// Project BOTH sides to the vision vocabulary (PARTIAL→IN_PROGRESS) before
|
|
416
|
+
// comparing. Projecting the tracker side stops a legitimately-PARTIAL feature
|
|
417
|
+
// false-firing against a vision item that can only say in_progress; projecting
|
|
418
|
+
// the vision side keeps it symmetric so a malformed/legacy vision status of
|
|
419
|
+
// "partial" (schema-invalid — reported as VISION_STATE_SCHEMA_VIOLATION) still
|
|
420
|
+
// aligns with tracker PARTIAL instead of double-reporting. Real drift (PARTIAL
|
|
421
|
+
// vs complete/planned) still differs and fires.
|
|
422
|
+
const rVis = projectToVisionStatus(rStatus);
|
|
423
|
+
const fVis = projectToVisionStatus(fStatus);
|
|
424
|
+
const vVis = projectToVisionStatus(vStatus);
|
|
425
|
+
if (rStatus && vStatus && rVis !== vVis) {
|
|
426
|
+
findings.push(finding(statusSeverity(rVis, vVis),
|
|
406
427
|
'STATUS_MISMATCH_ROADMAP_VS_VISION_STATE', code,
|
|
407
428
|
`ROADMAP says ${rStatus}, vision-state says ${vStatus}`));
|
|
408
429
|
}
|
|
409
|
-
if (fStatus && vStatus &&
|
|
410
|
-
findings.push(finding(statusSeverity(
|
|
430
|
+
if (fStatus && vStatus && fVis !== vVis) {
|
|
431
|
+
findings.push(finding(statusSeverity(fVis, vVis),
|
|
411
432
|
'STATUS_MISMATCH_FEATUREJSON_VS_VISION_STATE', code,
|
|
412
433
|
`feature.json says ${fStatus}, vision-state says ${vStatus}`));
|
|
413
434
|
}
|
|
414
|
-
// CONTRADICTORY_PHASE_CLAIM
|
|
415
|
-
|
|
416
|
-
|
|
435
|
+
// CONTRADICTORY_PHASE_CLAIM — compare LIFECYCLE phase to LIFECYCLE phase.
|
|
436
|
+
// feature.json's top-level `phase` holds the ROADMAP heading (e.g. "Phase 7:
|
|
437
|
+
// MCP Writers"), NOT a lifecycle stage; comparing it to vision-state's
|
|
438
|
+
// lifecycle phase ("vision"/"explore_design"/…) is a category mismatch that
|
|
439
|
+
// false-fired on ~every feature with a vision item. Use only the lifecycle
|
|
440
|
+
// sources on both sides — which is what the "does not involve the roadmap"
|
|
441
|
+
// comment above always intended. (feature.json doesn't currently carry a
|
|
442
|
+
// lifecycle phase, so this correctly yields no finding until it does.)
|
|
443
|
+
const fPhase = featureJson?.lifecycle?.currentPhase;
|
|
444
|
+
// Lifecycle phase ONLY on both sides. Do NOT fall back to vision.phase — that
|
|
445
|
+
// is the legacy board-column taxonomy (planning|implementation|…), a different
|
|
446
|
+
// vocabulary from lifecycle.currentPhase (explore_design|blueprint|…), and
|
|
447
|
+
// mixing them reintroduces the category mismatch this fix removes (Codex).
|
|
448
|
+
const vPhase = vision?.lifecycle?.currentPhase;
|
|
417
449
|
if (fPhase && vPhase && fPhase !== vPhase) {
|
|
418
450
|
findings.push(finding('error', 'CONTRADICTORY_PHASE_CLAIM', code,
|
|
419
|
-
`feature.json phase '${fPhase}' vs vision-state phase '${vPhase}'`));
|
|
451
|
+
`feature.json lifecycle phase '${fPhase}' vs vision-state phase '${vPhase}'`));
|
|
420
452
|
}
|
|
421
453
|
// COMPLEXITY_OR_DESCRIPTION_DRIFT
|
|
422
454
|
if (roadmap && featureJson) {
|