@smartmemory/compose 0.2.7-beta → 0.2.8-beta

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,205 @@
1
+ /**
2
+ * gsd-budget.js — COMP-GSD-4 budget-ceiling helpers for autonomous `compose gsd`.
3
+ *
4
+ * This module does NOT count tokens or enforce budgets — that is the stratum
5
+ * flow budget (STRAT-WORKFLOW-BUDGET): a `budget:` block on the flow makes
6
+ * stratum debit every server-dispatched agent and halt the run with a terminal
7
+ * `budget_exhausted` status that carries `budget_state = {caps, consumed}`.
8
+ *
9
+ * GSD-4's job is purely compose-side glue:
10
+ * - readGsdBudgetConfig: read `.compose/compose.json` `gsd.budget.*` (no defaults).
11
+ * - buildBudgetBlock: map that config → the stratum flow `budget` block
12
+ * (+ a per-task task_timeout in seconds).
13
+ * - injectBudget: inject the block into the gsd spec YAML — IDENTITY
14
+ * when nothing is configured (byte-identical guarantee).
15
+ * - composeBudgetDiagnostic: render budget.json + budget.md from budget_state.
16
+ *
17
+ * Enforced axes (stratum): ms (wall-clock), max_agent_dispatches, max_tokens, usd.
18
+ * See: docs/features/COMP-GSD-4/{design,blueprint}.md, stratum run_budget.py.
19
+ */
20
+
21
+ import { existsSync, readFileSync } from 'node:fs';
22
+ import { join } from 'node:path';
23
+ import YAML from 'yaml';
24
+
25
+ /**
26
+ * Read `.compose/compose.json` → `gsd.budget`. Returns {} when absent or
27
+ * unparseable. NO defaults (gate decision 7): a gsd run is unbounded unless the
28
+ * user sets a budget. Mirrors readGsdStuckConfig in gsd.js.
29
+ */
30
+ export function readGsdBudgetConfig(cwd) {
31
+ const configPath = join(cwd, '.compose', 'compose.json');
32
+ if (!existsSync(configPath)) return {};
33
+ try {
34
+ const cfg = JSON.parse(readFileSync(configPath, 'utf-8'));
35
+ return cfg?.gsd?.budget ?? {};
36
+ } catch {
37
+ return {};
38
+ }
39
+ }
40
+
41
+ /**
42
+ * Map snake_case `gsd.budget.*` config → the stratum flow budget block and an
43
+ * optional per-task timeout (seconds). Only keys the user set appear.
44
+ *
45
+ * Config keys:
46
+ * max_tokens, max_agent_dispatches, usd → flow budget axes
47
+ * per_run_ms (alias: ms) → flow budget `ms` (wall-clock)
48
+ * per_task_ms → execute step `task_timeout` (sec)
49
+ * cumulative: { max_total_tokens, max_total_cost_usd } → cross-session ceiling
50
+ *
51
+ * @returns {{ budget?: object, taskTimeoutSec?: number, cumulative?: object }}
52
+ */
53
+ export function buildBudgetBlock(cfg = {}) {
54
+ const out = {};
55
+
56
+ const budget = {};
57
+ if (cfg.max_tokens != null) budget.max_tokens = cfg.max_tokens;
58
+ if (cfg.max_agent_dispatches != null) budget.max_agent_dispatches = cfg.max_agent_dispatches;
59
+ if (cfg.usd != null) budget.usd = cfg.usd;
60
+ const ms = cfg.per_run_ms ?? cfg.ms;
61
+ if (ms != null) budget.ms = ms;
62
+ if (Object.keys(budget).length > 0) out.budget = budget;
63
+
64
+ if (cfg.per_task_ms != null) {
65
+ // stratum parallel_dispatch per-task timeout is `task_timeout` in SECONDS
66
+ // (spec.py:145, schema minimum 1). Convert from ms, floor at 1s.
67
+ out.taskTimeoutSec = Math.max(1, Math.ceil(cfg.per_task_ms / 1000));
68
+ }
69
+
70
+ if (cfg.cumulative && typeof cfg.cumulative === 'object') {
71
+ const cum = {};
72
+ if (cfg.cumulative.max_total_tokens != null) cum.maxTotalTokens = cfg.cumulative.max_total_tokens;
73
+ if (cfg.cumulative.max_total_cost_usd != null) cum.maxTotalCostUsd = cfg.cumulative.max_total_cost_usd;
74
+ if (Object.keys(cum).length > 0) out.cumulative = cum;
75
+ }
76
+
77
+ return out;
78
+ }
79
+
80
+ /**
81
+ * Inject the budget block into the gsd flow spec YAML.
82
+ *
83
+ * BYTE-IDENTICAL GUARANTEE: when nothing is configured (no flow budget AND no
84
+ * per-task timeout), the original `specYaml` string is returned VERBATIM — no
85
+ * YAML.parse/stringify round-trip (which would reorder/reformat). This keeps an
86
+ * un-budgeted `compose gsd` (and plain `compose build`) bit-for-bit unchanged.
87
+ *
88
+ * @param {string} specYaml — the gsd.stratum.yaml contents
89
+ * @param {object} cfg — raw gsd.budget config (from readGsdBudgetConfig)
90
+ * @returns {string}
91
+ */
92
+ export function injectBudget(specYaml, cfg = {}) {
93
+ const built = buildBudgetBlock(cfg);
94
+ if (!built.budget && built.taskTimeoutSec == null) {
95
+ return specYaml; // identity — nothing to inject
96
+ }
97
+
98
+ const parsed = YAML.parse(specYaml);
99
+ const flow = parsed?.flows?.gsd;
100
+ if (!flow) {
101
+ // Defensive: spec shape changed. Don't silently drop the budget — surface it.
102
+ throw new Error('injectBudget: spec has no flows.gsd to attach a budget to');
103
+ }
104
+
105
+ if (built.budget) flow.budget = built.budget;
106
+
107
+ if (built.taskTimeoutSec != null && Array.isArray(flow.steps)) {
108
+ const execute = flow.steps.find((s) => s && s.id === 'execute');
109
+ if (execute) execute.task_timeout = built.taskTimeoutSec;
110
+ }
111
+
112
+ return YAML.stringify(parsed);
113
+ }
114
+
115
+ // Maps a stratum budget axis → human label for diagnostics.
116
+ const AXIS_LABEL = {
117
+ max_tokens: 'tokens',
118
+ max_agent_dispatches: 'agent dispatches',
119
+ ms: 'wall-clock',
120
+ usd: 'cost (USD)',
121
+ };
122
+
123
+ /**
124
+ * Identify which enforced axis tripped, comparing consumed vs caps.
125
+ * Mirrors stratum run_budget.budget_exhausted() (consumed >= cap), in the same
126
+ * precedence order. Returns null if nothing is over (shouldn't happen on a
127
+ * budget_exhausted terminal, but the diagnostic stays honest).
128
+ */
129
+ export function trippedAxis(budgetState) {
130
+ const caps = budgetState?.caps ?? {};
131
+ const consumed = budgetState?.consumed ?? {};
132
+ if (caps.ms != null && (consumed.wall_s ?? 0) >= caps.ms / 1000) return 'ms';
133
+ if (caps.max_agent_dispatches != null && (consumed.dispatches ?? 0) >= caps.max_agent_dispatches) return 'max_agent_dispatches';
134
+ if (caps.max_tokens != null && (consumed.tokens ?? 0) >= caps.max_tokens) return 'max_tokens';
135
+ if (caps.usd != null && (consumed.dollars ?? 0) >= caps.usd) return 'usd';
136
+ return null;
137
+ }
138
+
139
+ /**
140
+ * Build the budget.json + budget.md diagnostic from the stratum terminal
141
+ * envelope's budget_state.
142
+ *
143
+ * @param {object} budgetState — {caps, consumed:{tokens,dispatches,wall_s,dollars}}
144
+ * @param {{feature:string, decomposedTasks?:Array, completedTaskIds?:Array, cumulative?:object}} meta
145
+ * @returns {{ json: object, md: string }}
146
+ */
147
+ export function composeBudgetDiagnostic(budgetState, meta = {}) {
148
+ const caps = budgetState?.caps ?? {};
149
+ const consumed = budgetState?.consumed ?? {};
150
+ const axis = meta.axis ?? trippedAxis(budgetState);
151
+ const feature = meta.feature ?? '';
152
+
153
+ const completed = new Set(meta.completedTaskIds ?? []);
154
+ const remaining = (meta.decomposedTasks ?? [])
155
+ .map((t) => t.id)
156
+ .filter((id) => id && !completed.has(id));
157
+
158
+ const json = {
159
+ feature,
160
+ kind: 'budget',
161
+ axis,
162
+ caps,
163
+ consumed,
164
+ remainingTaskIds: remaining,
165
+ ts: new Date().toISOString(),
166
+ };
167
+
168
+ const rows = [];
169
+ if (caps.max_tokens != null) rows.push(`| tokens | ${consumed.tokens ?? 0} | ${caps.max_tokens} |`);
170
+ if (caps.max_agent_dispatches != null) rows.push(`| agent dispatches | ${consumed.dispatches ?? 0} | ${caps.max_agent_dispatches} |`);
171
+ if (caps.ms != null) rows.push(`| wall-clock (s) | ${Math.round(consumed.wall_s ?? 0)} | ${Math.round(caps.ms / 1000)} |`);
172
+ if (caps.usd != null) rows.push(`| cost (USD) | ${(consumed.dollars ?? 0).toFixed(4)} | ${Number(caps.usd).toFixed(4)} |`);
173
+
174
+ const md = [
175
+ `# GSD budget halt — ${feature}`,
176
+ '',
177
+ `**Tripped axis:** ${AXIS_LABEL[axis] ?? axis ?? 'cumulative'}`,
178
+ `**When:** ${json.ts}`,
179
+ '',
180
+ '## Consumed vs cap',
181
+ '',
182
+ '| Axis | Consumed | Cap |',
183
+ '|------|----------|-----|',
184
+ ...rows,
185
+ '',
186
+ `## Remaining tasks (${remaining.length})`,
187
+ '',
188
+ remaining.length ? remaining.map((id) => `- ${id}`).join('\n') : '_none — all tasks completed before the halt._',
189
+ '',
190
+ '## Resume',
191
+ '',
192
+ 'Raise the relevant `gsd.budget.*` cap in `.compose/compose.json` (or run with',
193
+ '`--reset-budget` to clear the cumulative ledger), then:',
194
+ '',
195
+ '```',
196
+ `compose gsd ${feature} --resume`,
197
+ '```',
198
+ '',
199
+ 'Completed task results are preserved in the blackboard; --resume re-dispatches',
200
+ 'only the remaining tasks.',
201
+ '',
202
+ ].join('\n');
203
+
204
+ return { json, md };
205
+ }
@@ -0,0 +1,275 @@
1
+ /**
2
+ * gsd-stuck.js — GsdStuckDetector for COMP-GSD-5.
3
+ *
4
+ * Detects, in real time during per-task `compose gsd` dispatch, that an agent
5
+ * is spinning, and emits a structured verdict so the run loop can halt cleanly.
6
+ *
7
+ * Four signals (thresholds tunable via constructor opts; defaults 3/3/8/600000):
8
+ * - same_file: one file_path edited >= sameFileEdits times.
9
+ * - error_recurrence: a normalized error hash recurs >= errorRepeats.
10
+ * - no_progress: >= noProgressCalls consecutive non-file-changing tool calls.
11
+ * - wall_clock: nowMs - startedAt(taskId) >= wallClockMs.
12
+ *
13
+ * The same-file signal REUSES FixChainDetector (lib/debug-discipline.js) for its
14
+ * per-key file-hit counting — keyed here by taskId. Error-recurrence and
15
+ * no-progress are the only new bookkeeping.
16
+ *
17
+ * Consumes BuildStreamEvents from stratum.onEvent inside
18
+ * executeParallelDispatchServer, keyed by event.task_id. gsd runs the execute
19
+ * step max_concurrent:1, so per-task state is unambiguous. Telemetry contract
20
+ * (schema 0.2.7, STRAT-PAR-STREAM-TOOLDETAIL):
21
+ * tool_use_summary.metadata = { tool, summary, ok, duration_ms, input, tool_use_id }
22
+ * input.file_path present for Edit/Write/MultiEdit/Read
23
+ * tool_result.metadata = { tool_use_id, ok, output }
24
+ *
25
+ * See: docs/features/COMP-GSD-5/{design,blueprint,plan}.md
26
+ * contracts/gsd-stuck.json (`stuck` diagnostic shape)
27
+ */
28
+
29
+ import { createHash } from 'node:crypto';
30
+ import { FixChainDetector } from './debug-discipline.js';
31
+
32
+ // Default thresholds (Decision 4 in design.md).
33
+ export const DEFAULT_THRESHOLDS = Object.freeze({
34
+ sameFileEdits: 3,
35
+ errorRepeats: 3,
36
+ noProgressCalls: 8,
37
+ wallClockMs: 600000,
38
+ });
39
+
40
+ // Tools that change files on disk — they reset no-progress and feed same-file.
41
+ // Read is deliberately excluded: it touches a file_path but makes no change.
42
+ const FILE_CHANGING_TOOLS = new Set(['Edit', 'Write', 'MultiEdit']);
43
+
44
+ // ---------------------------------------------------------------------------
45
+ // Error normalization — collapse cosmetic diffs (volatile paths, line:col
46
+ // numbers, whitespace, hex/temp ids) so the SAME logical failure hashes the
47
+ // same across repeats.
48
+ // ---------------------------------------------------------------------------
49
+
50
+ export function normalizeError(output) {
51
+ if (output == null) return '';
52
+ let s = String(output);
53
+ // Absolute/relative file paths -> a stable token. Catches /Users/..,
54
+ // /tmp/.., /var/.., ./rel/path, C:\... etc. up to a :line:col or space.
55
+ s = s.replace(/(?:[A-Za-z]:)?(?:\/|\\)[^\s:]+(?:[/\\][^\s:]+)*/g, '<path>');
56
+ // Bare relative module-ish paths (a/b/c.js) that didn't start with a slash.
57
+ s = s.replace(/\b[\w.-]+(?:\/[\w.-]+)+\.\w+\b/g, '<path>');
58
+ // line:col suffixes (e.g. :12:5 or :12).
59
+ s = s.replace(/:\d+(?::\d+)?\b/g, ':<n>');
60
+ // Standalone long digit runs (ids, ports, offsets) and hex blobs.
61
+ s = s.replace(/0x[0-9a-fA-F]+/g, '<hex>');
62
+ s = s.replace(/\b\d{2,}\b/g, '<n>');
63
+ // Collapse all whitespace (incl. the em-dash-adjacent spacing) to single spaces.
64
+ s = s.replace(/\s+/g, ' ').trim().toLowerCase();
65
+ return s;
66
+ }
67
+
68
+ function hashError(output) {
69
+ return createHash('sha1').update(normalizeError(output)).digest('hex').slice(0, 16);
70
+ }
71
+
72
+ // ---------------------------------------------------------------------------
73
+ // GsdStuckDetector
74
+ // ---------------------------------------------------------------------------
75
+
76
+ export class GsdStuckDetector {
77
+ constructor(opts = {}) {
78
+ this.sameFileEdits = opts.sameFileEdits ?? DEFAULT_THRESHOLDS.sameFileEdits;
79
+ this.errorRepeats = opts.errorRepeats ?? DEFAULT_THRESHOLDS.errorRepeats;
80
+ this.noProgressCalls = opts.noProgressCalls ?? DEFAULT_THRESHOLDS.noProgressCalls;
81
+ this.wallClockMs = opts.wallClockMs ?? DEFAULT_THRESHOLDS.wallClockMs;
82
+
83
+ // same-file: reuse FixChainDetector's per-key file-hit counter, keyed by taskId.
84
+ this._fixChain = new FixChainDetector();
85
+ // error-recurrence: per-task Map<normalizedHash, count>.
86
+ /** @type {Map<string, Map<string, number>>} */
87
+ this._errorHits = new Map();
88
+ // no-progress: per-task consecutive non-file-changing call count.
89
+ /** @type {Map<string, number>} */
90
+ this._noProgress = new Map();
91
+ // wall-clock baseline per task.
92
+ /** @type {Map<string, number>} */
93
+ this._startedAt = new Map();
94
+ }
95
+
96
+ /** Mark a task's dispatch start — establishes the wall-clock baseline. */
97
+ startTask(taskId, nowMs) {
98
+ if (!taskId) return;
99
+ if (!this._startedAt.has(taskId)) {
100
+ this._startedAt.set(taskId, nowMs);
101
+ }
102
+ }
103
+
104
+ /**
105
+ * Route a BuildStreamEvent into per-task state. Only tool_use_summary and
106
+ * tool_result are meaningful; everything else (and any untagged event) is
107
+ * ignored. Keyed by event.task_id.
108
+ */
109
+ record(event) {
110
+ if (!event || typeof event !== 'object') return;
111
+ const taskId = event.task_id;
112
+ if (!taskId) return; // gsd is max_concurrent:1 but be defensive about attribution.
113
+ const md = event.metadata ?? {};
114
+
115
+ if (event.kind === 'tool_use_summary') {
116
+ const tool = md.tool;
117
+ const filePath = md.input?.file_path;
118
+ if (FILE_CHANGING_TOOLS.has(tool)) {
119
+ // same-file: count the file hit (reuse FixChainDetector per-key counter).
120
+ if (filePath) {
121
+ this._fixChain.recordIterationForBug(taskId, [filePath]);
122
+ }
123
+ // no-progress: a file-changing tool resets the consecutive run.
124
+ this._noProgress.set(taskId, 0);
125
+ } else if ((this._fixChain.byBug.get(taskId)?.fileHits?.size ?? 0) > 0) {
126
+ // "No progress" = non-file-changing calls (Bash, Grep, Read, Glob, ...)
127
+ // AFTER the task has started editing. A task's initial read/grep/test
128
+ // exploration is legitimate work, not a stall — counting it would
129
+ // false-positive and abort productive TDD loops (COMP-GSD-5 Codex
130
+ // review). A task that NEVER edits is caught by the wall_clock backstop.
131
+ this._noProgress.set(taskId, (this._noProgress.get(taskId) ?? 0) + 1);
132
+ }
133
+ return;
134
+ }
135
+
136
+ if (event.kind === 'tool_result') {
137
+ if (md.ok === false) {
138
+ const hash = hashError(md.output);
139
+ let m = this._errorHits.get(taskId);
140
+ if (!m) { m = new Map(); this._errorHits.set(taskId, m); }
141
+ m.set(hash, (m.get(hash) ?? 0) + 1);
142
+ }
143
+ return;
144
+ }
145
+ }
146
+
147
+ /**
148
+ * Evaluate the stuck signals for a task. Returns the FIRST signal that has
149
+ * tripped (precedence: same_file, error_recurrence, no_progress, wall_clock).
150
+ * @returns {{stuck:true, signal:string, detail:string} | {stuck:false}}
151
+ */
152
+ check(taskId, nowMs) {
153
+ // --- same_file ---
154
+ const fileHits = this._fixChain.byBug.get(taskId)?.fileHits;
155
+ if (fileHits) {
156
+ for (const [file, count] of fileHits.entries()) {
157
+ if (count >= this.sameFileEdits) {
158
+ return {
159
+ stuck: true,
160
+ signal: 'same_file',
161
+ detail: `file ${file} edited ${count} times (>= ${this.sameFileEdits}) without converging`,
162
+ };
163
+ }
164
+ }
165
+ }
166
+
167
+ // --- error_recurrence ---
168
+ const errs = this._errorHits.get(taskId);
169
+ if (errs) {
170
+ for (const [hash, count] of errs.entries()) {
171
+ if (count >= this.errorRepeats) {
172
+ return {
173
+ stuck: true,
174
+ signal: 'error_recurrence',
175
+ detail: `the same error recurred ${count} times (>= ${this.errorRepeats}); normalized hash ${hash}`,
176
+ };
177
+ }
178
+ }
179
+ }
180
+
181
+ // --- no_progress ---
182
+ const np = this._noProgress.get(taskId) ?? 0;
183
+ if (np >= this.noProgressCalls) {
184
+ return {
185
+ stuck: true,
186
+ signal: 'no_progress',
187
+ detail: `${np} consecutive tool calls (>= ${this.noProgressCalls}) with no file-changing edit`,
188
+ };
189
+ }
190
+
191
+ // --- wall_clock ---
192
+ const startedAt = this._startedAt.get(taskId);
193
+ if (startedAt != null && nowMs - startedAt >= this.wallClockMs) {
194
+ return {
195
+ stuck: true,
196
+ signal: 'wall_clock',
197
+ detail: `task ran ${nowMs - startedAt}ms (>= ${this.wallClockMs}ms) without finishing`,
198
+ };
199
+ }
200
+
201
+ return { stuck: false };
202
+ }
203
+
204
+ /**
205
+ * Build the `attemptCounts` snapshot for the stuck.json diagnostic
206
+ * (contracts/gsd-stuck.json#/definitions/stuck/attemptCounts).
207
+ */
208
+ attemptCounts(taskId) {
209
+ const fileHits = this._fixChain.byBug.get(taskId)?.fileHits;
210
+ const maxFileEdits = fileHits ? Math.max(0, ...fileHits.values()) : 0;
211
+ const errs = this._errorHits.get(taskId);
212
+ const maxErrorRepeats = errs ? Math.max(0, ...errs.values()) : 0;
213
+ return {
214
+ sameFileEdits: maxFileEdits,
215
+ errorRepeats: maxErrorRepeats,
216
+ noProgressCalls: this._noProgress.get(taskId) ?? 0,
217
+ };
218
+ }
219
+
220
+ /** Clear all state for one task without touching others. */
221
+ reset(taskId) {
222
+ this._fixChain.resetForBug(taskId);
223
+ this._errorHits.delete(taskId);
224
+ this._noProgress.delete(taskId);
225
+ this._startedAt.delete(taskId);
226
+ }
227
+
228
+ // --- Serialization (resume) ----------------------------------------------
229
+
230
+ toJSON() {
231
+ return {
232
+ thresholds: {
233
+ sameFileEdits: this.sameFileEdits,
234
+ errorRepeats: this.errorRepeats,
235
+ noProgressCalls: this.noProgressCalls,
236
+ wallClockMs: this.wallClockMs,
237
+ },
238
+ fixChain: this._fixChain.toJSON(),
239
+ errorHits: Object.fromEntries(
240
+ [...this._errorHits.entries()].map(([t, m]) => [t, Object.fromEntries(m)]),
241
+ ),
242
+ noProgress: Object.fromEntries(this._noProgress),
243
+ startedAt: Object.fromEntries(this._startedAt),
244
+ };
245
+ }
246
+
247
+ static fromJSON(json) {
248
+ const t = (json && typeof json === 'object' && json.thresholds) || {};
249
+ const d = new GsdStuckDetector({
250
+ sameFileEdits: t.sameFileEdits,
251
+ errorRepeats: t.errorRepeats,
252
+ noProgressCalls: t.noProgressCalls,
253
+ wallClockMs: t.wallClockMs,
254
+ });
255
+ if (!json || typeof json !== 'object') return d;
256
+
257
+ d._fixChain = FixChainDetector.fromJSON(json.fixChain ?? {});
258
+ if (json.errorHits && typeof json.errorHits === 'object') {
259
+ for (const [taskId, hashes] of Object.entries(json.errorHits)) {
260
+ d._errorHits.set(taskId, new Map(Object.entries(hashes ?? {})));
261
+ }
262
+ }
263
+ if (json.noProgress && typeof json.noProgress === 'object') {
264
+ for (const [taskId, n] of Object.entries(json.noProgress)) {
265
+ d._noProgress.set(taskId, Number(n) || 0);
266
+ }
267
+ }
268
+ if (json.startedAt && typeof json.startedAt === 'object') {
269
+ for (const [taskId, ms] of Object.entries(json.startedAt)) {
270
+ d._startedAt.set(taskId, Number(ms) || 0);
271
+ }
272
+ }
273
+ return d;
274
+ }
275
+ }