@taewooopark/agent-blackbox 0.42.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli.js
ADDED
|
@@ -0,0 +1,2135 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
|
|
3
|
+
// packages/core/src/events.ts
|
|
4
|
+
var traceHosts = [
|
|
5
|
+
"opencode",
|
|
6
|
+
"pi",
|
|
7
|
+
"codex",
|
|
8
|
+
"claude-code",
|
|
9
|
+
"hermes",
|
|
10
|
+
"custom"
|
|
11
|
+
];
|
|
12
|
+
var traceEventKinds = [
|
|
13
|
+
"session_created",
|
|
14
|
+
"session_updated",
|
|
15
|
+
"session_idle",
|
|
16
|
+
"session_error",
|
|
17
|
+
"agent_start",
|
|
18
|
+
"agent_end",
|
|
19
|
+
"turn_start",
|
|
20
|
+
"turn_end",
|
|
21
|
+
"message",
|
|
22
|
+
"tool_call",
|
|
23
|
+
"tool_result",
|
|
24
|
+
"file_read",
|
|
25
|
+
"file_edit",
|
|
26
|
+
"file_created",
|
|
27
|
+
"file_deleted",
|
|
28
|
+
"search",
|
|
29
|
+
"bash",
|
|
30
|
+
"permission_asked",
|
|
31
|
+
"permission_replied",
|
|
32
|
+
"todo_updated",
|
|
33
|
+
"subagent_spawned",
|
|
34
|
+
"decision_extracted",
|
|
35
|
+
"blocker_detected",
|
|
36
|
+
"handoff_generated",
|
|
37
|
+
"git_status",
|
|
38
|
+
"git_commit",
|
|
39
|
+
"git_push",
|
|
40
|
+
"context_compacted",
|
|
41
|
+
"command_run",
|
|
42
|
+
"agent_switched",
|
|
43
|
+
"model_switched",
|
|
44
|
+
"host_event"
|
|
45
|
+
];
|
|
46
|
+
var dataSensitivities = [
|
|
47
|
+
"public",
|
|
48
|
+
"internal",
|
|
49
|
+
"private",
|
|
50
|
+
"secret",
|
|
51
|
+
"student_sensitive"
|
|
52
|
+
];
|
|
53
|
+
function validateTraceEvent(event) {
|
|
54
|
+
const errors = [];
|
|
55
|
+
if (!isRecord(event)) {
|
|
56
|
+
return { ok: false, errors: ["event must be an object"] };
|
|
57
|
+
}
|
|
58
|
+
requireString(event, "id", errors);
|
|
59
|
+
requireString(event, "ts", errors);
|
|
60
|
+
requireNumber(event, "seq", errors);
|
|
61
|
+
requireEnum(event, "host", traceHosts, errors);
|
|
62
|
+
requireString(event, "runId", errors);
|
|
63
|
+
requireString(event, "sessionId", errors);
|
|
64
|
+
requireEnum(event, "kind", traceEventKinds, errors);
|
|
65
|
+
requireEnum(event, "sensitivity", dataSensitivities, errors);
|
|
66
|
+
if (!isRecord(event.payload)) {
|
|
67
|
+
errors.push("payload must be an object");
|
|
68
|
+
}
|
|
69
|
+
if (!isRecord(event.redaction)) {
|
|
70
|
+
errors.push("redaction must be an object");
|
|
71
|
+
}
|
|
72
|
+
if (!isRecord(event.evidence)) {
|
|
73
|
+
errors.push("evidence must be an object");
|
|
74
|
+
}
|
|
75
|
+
if (typeof event.ts === "string" && Number.isNaN(Date.parse(event.ts))) {
|
|
76
|
+
errors.push("ts must be an ISO-compatible timestamp");
|
|
77
|
+
}
|
|
78
|
+
if (typeof event.seq === "number" && (!Number.isInteger(event.seq) || event.seq < 1)) {
|
|
79
|
+
errors.push("seq must be a positive integer");
|
|
80
|
+
}
|
|
81
|
+
return { ok: errors.length === 0, errors };
|
|
82
|
+
}
|
|
83
|
+
function assertTraceEvent(event) {
|
|
84
|
+
const result = validateTraceEvent(event);
|
|
85
|
+
if (!result.ok) {
|
|
86
|
+
throw new Error(`Invalid trace event: ${result.errors.join("; ")}`);
|
|
87
|
+
}
|
|
88
|
+
}
|
|
89
|
+
function isRecord(value) {
|
|
90
|
+
return typeof value === "object" && value !== null && !Array.isArray(value);
|
|
91
|
+
}
|
|
92
|
+
function requireString(value, key, errors) {
|
|
93
|
+
if (typeof value[key] !== "string" || value[key] === "") {
|
|
94
|
+
errors.push(`${key} must be a non-empty string`);
|
|
95
|
+
}
|
|
96
|
+
}
|
|
97
|
+
function requireNumber(value, key, errors) {
|
|
98
|
+
if (typeof value[key] !== "number") {
|
|
99
|
+
errors.push(`${key} must be a number`);
|
|
100
|
+
}
|
|
101
|
+
}
|
|
102
|
+
function requireEnum(value, key, allowed, errors) {
|
|
103
|
+
if (typeof value[key] !== "string" || !allowed.includes(value[key])) {
|
|
104
|
+
errors.push(`${key} must be one of ${allowed.join(", ")}`);
|
|
105
|
+
}
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
// packages/core/src/graph.ts
|
|
109
|
+
function materializeWorkflowGraph(events) {
|
|
110
|
+
const sorted = sortEvents(events);
|
|
111
|
+
const runId = sorted[0]?.runId ?? "empty-run";
|
|
112
|
+
const graph = {
|
|
113
|
+
runId,
|
|
114
|
+
nodes: /* @__PURE__ */ new Map(),
|
|
115
|
+
edges: /* @__PURE__ */ new Map(),
|
|
116
|
+
appliedEventIds: []
|
|
117
|
+
};
|
|
118
|
+
ensureNode(graph, {
|
|
119
|
+
id: runNodeId(runId),
|
|
120
|
+
type: "RUN",
|
|
121
|
+
label: runId,
|
|
122
|
+
status: sorted.length > 0 ? "ACTIVE" : "UNKNOWN",
|
|
123
|
+
at: sorted[0]?.ts ?? (/* @__PURE__ */ new Date(0)).toISOString(),
|
|
124
|
+
...sorted[0]?.id ? { eventId: sorted[0].id } : {},
|
|
125
|
+
data: { runId }
|
|
126
|
+
});
|
|
127
|
+
for (const event of sorted) {
|
|
128
|
+
applyTraceEvent(graph, event);
|
|
129
|
+
}
|
|
130
|
+
return freezeGraph(graph);
|
|
131
|
+
}
|
|
132
|
+
function replayWorkflowGraphAtSeq(events, seq) {
|
|
133
|
+
return materializeWorkflowGraph(events.filter((event) => event.seq <= seq));
|
|
134
|
+
}
|
|
135
|
+
function replayWorkflowGraphAtTime(events, at) {
|
|
136
|
+
const atTime = new Date(at).getTime();
|
|
137
|
+
return materializeWorkflowGraph(events.filter((event) => new Date(event.ts).getTime() <= atTime));
|
|
138
|
+
}
|
|
139
|
+
function applyTraceEvent(graph, event) {
|
|
140
|
+
graph.appliedEventIds.push(event.id);
|
|
141
|
+
ensureSession(graph, event);
|
|
142
|
+
if (event.agentId) {
|
|
143
|
+
ensureAgent(graph, event);
|
|
144
|
+
}
|
|
145
|
+
if (event.turnId) {
|
|
146
|
+
ensureTurn(graph, event);
|
|
147
|
+
}
|
|
148
|
+
switch (event.kind) {
|
|
149
|
+
case "session_created":
|
|
150
|
+
case "session_updated":
|
|
151
|
+
updateNodeStatus(graph, sessionNodeId(event.sessionId), "ACTIVE", event);
|
|
152
|
+
break;
|
|
153
|
+
case "session_idle":
|
|
154
|
+
updateNodeStatus(graph, sessionNodeId(event.sessionId), "SUCCEEDED", event);
|
|
155
|
+
break;
|
|
156
|
+
case "session_error":
|
|
157
|
+
updateNodeStatus(graph, sessionNodeId(event.sessionId), "FAILED", event);
|
|
158
|
+
createEventNode(graph, event, "ERROR", event.summary ?? "Session error", "FAILED");
|
|
159
|
+
connectScope(graph, event, eventNodeId(event), "PRODUCES");
|
|
160
|
+
break;
|
|
161
|
+
case "agent_start":
|
|
162
|
+
if (event.agentId) {
|
|
163
|
+
updateNodeStatus(graph, agentNodeId(event.agentId), "ACTIVE", event);
|
|
164
|
+
}
|
|
165
|
+
break;
|
|
166
|
+
case "agent_end":
|
|
167
|
+
if (event.agentId) {
|
|
168
|
+
updateNodeStatus(graph, agentNodeId(event.agentId), "SUCCEEDED", event);
|
|
169
|
+
}
|
|
170
|
+
break;
|
|
171
|
+
case "turn_start":
|
|
172
|
+
if (event.turnId) {
|
|
173
|
+
updateNodeStatus(graph, turnNodeId(event.turnId), "ACTIVE", event);
|
|
174
|
+
}
|
|
175
|
+
break;
|
|
176
|
+
case "turn_end":
|
|
177
|
+
if (event.turnId) {
|
|
178
|
+
updateNodeStatus(graph, turnNodeId(event.turnId), "SUCCEEDED", event);
|
|
179
|
+
}
|
|
180
|
+
break;
|
|
181
|
+
case "message":
|
|
182
|
+
createEventNode(graph, event, "MESSAGE", event.summary ?? messageLabel(event), "SUCCEEDED");
|
|
183
|
+
connectScope(graph, event, eventNodeId(event), event.evidence.claimedByModel ? "CLAIMS" : "CONTAINS");
|
|
184
|
+
break;
|
|
185
|
+
case "tool_call":
|
|
186
|
+
createEventNode(graph, event, "TOOL_CALL", event.summary ?? toolLabel(event), "ACTIVE");
|
|
187
|
+
connectScope(graph, event, eventNodeId(event), "CALLS");
|
|
188
|
+
break;
|
|
189
|
+
case "tool_result":
|
|
190
|
+
createEventNode(graph, event, "TOOL_CALL", event.summary ?? "Tool result", resultStatus(event));
|
|
191
|
+
connectScope(graph, event, eventNodeId(event), "OBSERVED_AS");
|
|
192
|
+
break;
|
|
193
|
+
case "file_read":
|
|
194
|
+
case "file_edit":
|
|
195
|
+
case "file_created":
|
|
196
|
+
case "file_deleted":
|
|
197
|
+
createFileActivity(graph, event);
|
|
198
|
+
break;
|
|
199
|
+
case "bash":
|
|
200
|
+
createEventNode(graph, event, "COMMAND", event.summary ?? commandLabel(event), resultStatus(event));
|
|
201
|
+
connectScope(graph, event, eventNodeId(event), "EXECUTES");
|
|
202
|
+
break;
|
|
203
|
+
case "search":
|
|
204
|
+
createEventNode(graph, event, "SEARCH", event.summary ?? searchLabel(event), "SUCCEEDED");
|
|
205
|
+
connectScope(graph, event, eventNodeId(event), "SEARCHES");
|
|
206
|
+
break;
|
|
207
|
+
case "todo_updated":
|
|
208
|
+
createEventNode(graph, event, "TODO", event.summary ?? "Todo update", "ACTIVE");
|
|
209
|
+
connectScope(graph, event, eventNodeId(event), "UPDATES");
|
|
210
|
+
break;
|
|
211
|
+
case "permission_asked":
|
|
212
|
+
case "permission_replied":
|
|
213
|
+
createEventNode(graph, event, "PERMISSION_GATE", event.summary ?? "Permission gate", "NEEDS_APPROVAL");
|
|
214
|
+
connectScope(graph, event, eventNodeId(event), "BLOCKS");
|
|
215
|
+
break;
|
|
216
|
+
case "decision_extracted":
|
|
217
|
+
createDecision(graph, event);
|
|
218
|
+
break;
|
|
219
|
+
case "blocker_detected":
|
|
220
|
+
createEventNode(graph, event, "BLOCKER", event.summary ?? "Blocker", "BLOCKED");
|
|
221
|
+
connectScope(graph, event, eventNodeId(event), "BLOCKS");
|
|
222
|
+
break;
|
|
223
|
+
case "handoff_generated":
|
|
224
|
+
createEventNode(graph, event, "HANDOFF", event.summary ?? "Handoff", "SUCCEEDED");
|
|
225
|
+
connectScope(graph, event, eventNodeId(event), "PRODUCES");
|
|
226
|
+
break;
|
|
227
|
+
case "subagent_spawned":
|
|
228
|
+
createEventNode(graph, event, "AGENT", event.summary ?? "Subagent", "ACTIVE");
|
|
229
|
+
connectScope(graph, event, eventNodeId(event), "SPAWNS");
|
|
230
|
+
break;
|
|
231
|
+
case "git_status":
|
|
232
|
+
case "git_commit":
|
|
233
|
+
case "git_push":
|
|
234
|
+
createEventNode(graph, event, "ARTIFACT", event.summary ?? event.kind, resultStatus(event));
|
|
235
|
+
connectScope(graph, event, eventNodeId(event), "PRODUCES");
|
|
236
|
+
break;
|
|
237
|
+
default:
|
|
238
|
+
createEventNode(graph, event, "ARTIFACT", event.summary ?? event.kind, "UNKNOWN");
|
|
239
|
+
connectScope(graph, event, eventNodeId(event), "CONTAINS");
|
|
240
|
+
}
|
|
241
|
+
}
|
|
242
|
+
function ensureSession(graph, event) {
|
|
243
|
+
const sessionId = sessionNodeId(event.sessionId);
|
|
244
|
+
ensureNode(graph, {
|
|
245
|
+
id: sessionId,
|
|
246
|
+
type: "SESSION",
|
|
247
|
+
label: event.sessionId,
|
|
248
|
+
status: "ACTIVE",
|
|
249
|
+
at: event.ts,
|
|
250
|
+
eventId: event.id,
|
|
251
|
+
data: { sessionId: event.sessionId, parentSessionId: event.parentSessionId ?? null }
|
|
252
|
+
});
|
|
253
|
+
ensureEdge(graph, {
|
|
254
|
+
from: runNodeId(event.runId),
|
|
255
|
+
to: sessionId,
|
|
256
|
+
type: "CONTAINS",
|
|
257
|
+
at: event.ts,
|
|
258
|
+
eventId: event.id
|
|
259
|
+
});
|
|
260
|
+
}
|
|
261
|
+
function ensureAgent(graph, event) {
|
|
262
|
+
if (!event.agentId) {
|
|
263
|
+
return;
|
|
264
|
+
}
|
|
265
|
+
const id = agentNodeId(event.agentId);
|
|
266
|
+
ensureNode(graph, {
|
|
267
|
+
id,
|
|
268
|
+
type: "AGENT",
|
|
269
|
+
label: event.agentId,
|
|
270
|
+
status: "ACTIVE",
|
|
271
|
+
at: event.ts,
|
|
272
|
+
eventId: event.id,
|
|
273
|
+
data: { agentId: event.agentId, agentRole: event.agentRole ?? "unknown" }
|
|
274
|
+
});
|
|
275
|
+
ensureEdge(graph, {
|
|
276
|
+
from: sessionNodeId(event.sessionId),
|
|
277
|
+
to: id,
|
|
278
|
+
type: event.agentRole === "subagent" ? "SPAWNS" : "CONTAINS",
|
|
279
|
+
at: event.ts,
|
|
280
|
+
eventId: event.id
|
|
281
|
+
});
|
|
282
|
+
}
|
|
283
|
+
function ensureTurn(graph, event) {
|
|
284
|
+
if (!event.turnId) {
|
|
285
|
+
return;
|
|
286
|
+
}
|
|
287
|
+
const id = turnNodeId(event.turnId);
|
|
288
|
+
ensureNode(graph, {
|
|
289
|
+
id,
|
|
290
|
+
type: "TURN",
|
|
291
|
+
label: event.turnId,
|
|
292
|
+
status: "ACTIVE",
|
|
293
|
+
at: event.ts,
|
|
294
|
+
eventId: event.id,
|
|
295
|
+
data: { turnId: event.turnId }
|
|
296
|
+
});
|
|
297
|
+
ensureEdge(graph, {
|
|
298
|
+
from: event.agentId ? agentNodeId(event.agentId) : sessionNodeId(event.sessionId),
|
|
299
|
+
to: id,
|
|
300
|
+
type: "CONTAINS",
|
|
301
|
+
at: event.ts,
|
|
302
|
+
eventId: event.id
|
|
303
|
+
});
|
|
304
|
+
}
|
|
305
|
+
function createEventNode(graph, event, type, label, status) {
|
|
306
|
+
ensureNode(graph, {
|
|
307
|
+
id: eventNodeId(event),
|
|
308
|
+
type,
|
|
309
|
+
label,
|
|
310
|
+
status,
|
|
311
|
+
at: event.ts,
|
|
312
|
+
eventId: event.id,
|
|
313
|
+
data: { eventKind: event.kind, ...event.payload }
|
|
314
|
+
});
|
|
315
|
+
}
|
|
316
|
+
function createFileActivity(graph, event) {
|
|
317
|
+
const path = filePathPayload(event) ?? "unknown-file";
|
|
318
|
+
const fileId = fileNodeId(path);
|
|
319
|
+
ensureNode(graph, {
|
|
320
|
+
id: fileId,
|
|
321
|
+
type: "FILE",
|
|
322
|
+
label: path,
|
|
323
|
+
status: event.kind === "file_deleted" ? "REVERTED" : "ACTIVE",
|
|
324
|
+
at: event.ts,
|
|
325
|
+
eventId: event.id,
|
|
326
|
+
data: { path }
|
|
327
|
+
});
|
|
328
|
+
const edgeType = fileEdgeType(event.kind);
|
|
329
|
+
connectScope(graph, event, fileId, edgeType);
|
|
330
|
+
}
|
|
331
|
+
function createDecision(graph, event) {
|
|
332
|
+
const label = stringPayload(event, "statement") ?? event.summary ?? "Decision";
|
|
333
|
+
const decisionId = eventNodeId(event);
|
|
334
|
+
createEventNode(graph, event, "DECISION", label, "SUCCEEDED");
|
|
335
|
+
connectScope(graph, event, decisionId, "DECIDES");
|
|
336
|
+
const evidenceIds = arrayPayload(event, "evidenceEventIds");
|
|
337
|
+
for (const evidenceEventId of evidenceIds) {
|
|
338
|
+
ensureEdge(graph, {
|
|
339
|
+
from: eventNodeIdFromEventId(evidenceEventId),
|
|
340
|
+
to: decisionId,
|
|
341
|
+
type: "SUPPORTS_DECISION",
|
|
342
|
+
at: event.ts,
|
|
343
|
+
eventId: event.id,
|
|
344
|
+
inferred: true,
|
|
345
|
+
confidence: numberPayload(event, "confidence") ?? 0.5
|
|
346
|
+
});
|
|
347
|
+
}
|
|
348
|
+
}
|
|
349
|
+
function connectScope(graph, event, targetNodeId, edgeType) {
|
|
350
|
+
const sourceId = event.turnId ? turnNodeId(event.turnId) : event.agentId ? agentNodeId(event.agentId) : sessionNodeId(event.sessionId);
|
|
351
|
+
ensureEdge(graph, {
|
|
352
|
+
from: sourceId,
|
|
353
|
+
to: targetNodeId,
|
|
354
|
+
type: edgeType,
|
|
355
|
+
at: event.ts,
|
|
356
|
+
eventId: event.id
|
|
357
|
+
});
|
|
358
|
+
}
|
|
359
|
+
function ensureNode(graph, input) {
|
|
360
|
+
const existing = graph.nodes.get(input.id);
|
|
361
|
+
if (existing) {
|
|
362
|
+
existing.updatedAt = input.at;
|
|
363
|
+
existing.status = input.status;
|
|
364
|
+
existing.data = { ...existing.data, ...input.data ?? {} };
|
|
365
|
+
if (input.eventId && !existing.eventIds.includes(input.eventId)) {
|
|
366
|
+
existing.eventIds.push(input.eventId);
|
|
367
|
+
}
|
|
368
|
+
return;
|
|
369
|
+
}
|
|
370
|
+
graph.nodes.set(input.id, {
|
|
371
|
+
id: input.id,
|
|
372
|
+
type: input.type,
|
|
373
|
+
label: input.label,
|
|
374
|
+
status: input.status,
|
|
375
|
+
createdAt: input.at,
|
|
376
|
+
updatedAt: input.at,
|
|
377
|
+
eventIds: input.eventId ? [input.eventId] : [],
|
|
378
|
+
data: input.data ?? {}
|
|
379
|
+
});
|
|
380
|
+
}
|
|
381
|
+
function updateNodeStatus(graph, nodeId, status, event) {
|
|
382
|
+
const node = graph.nodes.get(nodeId);
|
|
383
|
+
if (!node) {
|
|
384
|
+
return;
|
|
385
|
+
}
|
|
386
|
+
node.status = status;
|
|
387
|
+
node.updatedAt = event.ts;
|
|
388
|
+
if (!node.eventIds.includes(event.id)) {
|
|
389
|
+
node.eventIds.push(event.id);
|
|
390
|
+
}
|
|
391
|
+
}
|
|
392
|
+
function ensureEdge(graph, input) {
|
|
393
|
+
const id = workflowEdgeId(input.from, input.to, input.type);
|
|
394
|
+
const existing = graph.edges.get(id);
|
|
395
|
+
if (existing) {
|
|
396
|
+
existing.updatedAt = input.at;
|
|
397
|
+
if (!existing.eventIds.includes(input.eventId)) {
|
|
398
|
+
existing.eventIds.push(input.eventId);
|
|
399
|
+
}
|
|
400
|
+
return;
|
|
401
|
+
}
|
|
402
|
+
graph.edges.set(id, {
|
|
403
|
+
id,
|
|
404
|
+
from: input.from,
|
|
405
|
+
to: input.to,
|
|
406
|
+
type: input.type,
|
|
407
|
+
...input.label ? { label: input.label } : {},
|
|
408
|
+
createdAt: input.at,
|
|
409
|
+
updatedAt: input.at,
|
|
410
|
+
eventIds: [input.eventId],
|
|
411
|
+
inferred: input.inferred ?? false,
|
|
412
|
+
confidence: input.confidence ?? 1
|
|
413
|
+
});
|
|
414
|
+
}
|
|
415
|
+
function freezeGraph(graph) {
|
|
416
|
+
return {
|
|
417
|
+
runId: graph.runId,
|
|
418
|
+
nodes: [...graph.nodes.values()].sort((a, b) => a.id.localeCompare(b.id)),
|
|
419
|
+
edges: [...graph.edges.values()].sort((a, b) => a.id.localeCompare(b.id)),
|
|
420
|
+
appliedEventIds: [...graph.appliedEventIds]
|
|
421
|
+
};
|
|
422
|
+
}
|
|
423
|
+
function sortEvents(events) {
|
|
424
|
+
return [...events].sort((a, b) => a.seq - b.seq || a.ts.localeCompare(b.ts));
|
|
425
|
+
}
|
|
426
|
+
function resultStatus(event) {
|
|
427
|
+
const exitCode = numberPayload(event, "exitCode");
|
|
428
|
+
if (exitCode !== void 0) {
|
|
429
|
+
return exitCode === 0 ? "SUCCEEDED" : "FAILED";
|
|
430
|
+
}
|
|
431
|
+
if (event.payload.error === true || typeof event.payload.error === "string") {
|
|
432
|
+
return "FAILED";
|
|
433
|
+
}
|
|
434
|
+
return "SUCCEEDED";
|
|
435
|
+
}
|
|
436
|
+
function fileEdgeType(kind) {
|
|
437
|
+
if (kind === "file_read") return "READS";
|
|
438
|
+
if (kind === "file_edit") return "EDITS";
|
|
439
|
+
if (kind === "file_created") return "CREATES";
|
|
440
|
+
if (kind === "file_deleted") return "DELETES";
|
|
441
|
+
return "UPDATES";
|
|
442
|
+
}
|
|
443
|
+
function messageLabel(event) {
|
|
444
|
+
return stringPayload(event, "role") ? `Message: ${stringPayload(event, "role")}` : "Message";
|
|
445
|
+
}
|
|
446
|
+
function toolLabel(event) {
|
|
447
|
+
return stringPayload(event, "tool") ?? stringPayload(event, "name") ?? "Tool call";
|
|
448
|
+
}
|
|
449
|
+
function commandLabel(event) {
|
|
450
|
+
return stringPayload(event, "command") ?? "Command";
|
|
451
|
+
}
|
|
452
|
+
function searchLabel(event) {
|
|
453
|
+
return stringPayload(event, "query") ?? "Search";
|
|
454
|
+
}
|
|
455
|
+
function stringPayload(event, key) {
|
|
456
|
+
const value = event.payload[key];
|
|
457
|
+
return typeof value === "string" ? value : void 0;
|
|
458
|
+
}
|
|
459
|
+
function stringPayloadPath(event, paths) {
|
|
460
|
+
for (const path of paths) {
|
|
461
|
+
const value = payloadPath(event.payload, path);
|
|
462
|
+
if (typeof value === "string" && value.trim().length > 0) {
|
|
463
|
+
return value;
|
|
464
|
+
}
|
|
465
|
+
}
|
|
466
|
+
return void 0;
|
|
467
|
+
}
|
|
468
|
+
function filePathPayload(event) {
|
|
469
|
+
return stringPayloadPath(event, [
|
|
470
|
+
"path",
|
|
471
|
+
"file",
|
|
472
|
+
"properties.file",
|
|
473
|
+
"properties.path",
|
|
474
|
+
"output.metadata.path",
|
|
475
|
+
"input.args.filePath",
|
|
476
|
+
"input.args.path"
|
|
477
|
+
]);
|
|
478
|
+
}
|
|
479
|
+
function payloadPath(value, path) {
|
|
480
|
+
let current = value;
|
|
481
|
+
for (const part of path.split(".")) {
|
|
482
|
+
if (!isRecord2(current)) return void 0;
|
|
483
|
+
current = current[part];
|
|
484
|
+
}
|
|
485
|
+
return current;
|
|
486
|
+
}
|
|
487
|
+
function numberPayload(event, key) {
|
|
488
|
+
const value = event.payload[key];
|
|
489
|
+
return typeof value === "number" ? value : void 0;
|
|
490
|
+
}
|
|
491
|
+
function arrayPayload(event, key) {
|
|
492
|
+
const value = event.payload[key];
|
|
493
|
+
return Array.isArray(value) ? value.filter((item) => typeof item === "string") : [];
|
|
494
|
+
}
|
|
495
|
+
function runNodeId(runId) {
|
|
496
|
+
return `run:${stablePart(runId)}`;
|
|
497
|
+
}
|
|
498
|
+
function sessionNodeId(sessionId) {
|
|
499
|
+
return `session:${stablePart(sessionId)}`;
|
|
500
|
+
}
|
|
501
|
+
function agentNodeId(agentId) {
|
|
502
|
+
return `agent:${stablePart(agentId)}`;
|
|
503
|
+
}
|
|
504
|
+
function turnNodeId(turnId) {
|
|
505
|
+
return `turn:${stablePart(turnId)}`;
|
|
506
|
+
}
|
|
507
|
+
function fileNodeId(path) {
|
|
508
|
+
return `file:${stablePart(path)}`;
|
|
509
|
+
}
|
|
510
|
+
function eventNodeId(event) {
|
|
511
|
+
return eventNodeIdFromEventId(event.id);
|
|
512
|
+
}
|
|
513
|
+
function eventNodeIdFromEventId(eventId) {
|
|
514
|
+
return `event:${stablePart(eventId)}`;
|
|
515
|
+
}
|
|
516
|
+
function workflowEdgeId(from, to, type) {
|
|
517
|
+
return `edge:${type}:${stablePart(from)}:${stablePart(to)}`;
|
|
518
|
+
}
|
|
519
|
+
function stablePart(value) {
|
|
520
|
+
return value.replace(/[^a-zA-Z0-9_.:-]/g, "_");
|
|
521
|
+
}
|
|
522
|
+
function isRecord2(value) {
|
|
523
|
+
return typeof value === "object" && value !== null && !Array.isArray(value);
|
|
524
|
+
}
|
|
525
|
+
|
|
526
|
+
// packages/core/src/audit.ts
|
|
527
|
+
var claimRules = [
|
|
528
|
+
{
|
|
529
|
+
name: "tests-run",
|
|
530
|
+
pattern: /\b(?:ran|run|running|executed)\s+(?:the\s+)?(?:tests?|test suite|checks?)\b/i,
|
|
531
|
+
verifier: (events) => events.filter((event) => event.kind === "bash" && stringPayload2(event, "command")?.match(/\b(test|check|vitest|pytest|cargo test|npm test)\b/i)).map((event) => event.id),
|
|
532
|
+
severity: "warning"
|
|
533
|
+
},
|
|
534
|
+
{
|
|
535
|
+
name: "file-updated",
|
|
536
|
+
pattern: /\b(?:updated|edited|patched|changed|modified)\s+(?:the\s+)?(?:file|code|implementation|readme|docs?)\b/i,
|
|
537
|
+
verifier: (events) => events.filter((event) => event.kind === "file_edit").map((event) => event.id),
|
|
538
|
+
severity: "warning"
|
|
539
|
+
},
|
|
540
|
+
{
|
|
541
|
+
name: "committed",
|
|
542
|
+
pattern: /\b(?:committed|created a commit|made a commit)\b/i,
|
|
543
|
+
verifier: (events) => events.filter((event) => event.kind === "git_commit").map((event) => event.id),
|
|
544
|
+
severity: "risk"
|
|
545
|
+
}
|
|
546
|
+
];
|
|
547
|
+
function evaluatePromiseChecks(events) {
|
|
548
|
+
const messageEvents = events.filter((event) => event.kind === "message" && event.evidence.claimedByModel);
|
|
549
|
+
const checks = [];
|
|
550
|
+
for (const message of messageEvents) {
|
|
551
|
+
const text = stringPayload2(message, "text") ?? stringPayload2(message, "content") ?? "";
|
|
552
|
+
for (const rule of claimRules) {
|
|
553
|
+
if (!rule.pattern.test(text)) {
|
|
554
|
+
continue;
|
|
555
|
+
}
|
|
556
|
+
const evidenceEventIds = rule.verifier(events);
|
|
557
|
+
checks.push({
|
|
558
|
+
claim: `${rule.name}: ${shorten(text)}`,
|
|
559
|
+
status: evidenceEventIds.length > 0 ? "verified" : "unverified",
|
|
560
|
+
evidenceEventIds,
|
|
561
|
+
severity: evidenceEventIds.length > 0 ? "info" : rule.severity
|
|
562
|
+
});
|
|
563
|
+
}
|
|
564
|
+
}
|
|
565
|
+
return checks;
|
|
566
|
+
}
|
|
567
|
+
function generateHandoffMarkdown(graph, checks = []) {
|
|
568
|
+
const files = graph.nodes.filter((node) => node.type === "FILE");
|
|
569
|
+
const decisions = graph.nodes.filter((node) => node.type === "DECISION");
|
|
570
|
+
const failures = graph.nodes.filter((node) => node.status === "FAILED");
|
|
571
|
+
const blockers = graph.nodes.filter((node) => node.type === "BLOCKER" || node.status === "BLOCKED");
|
|
572
|
+
const commands = graph.nodes.filter((node) => node.type === "COMMAND");
|
|
573
|
+
return [
|
|
574
|
+
`# Agent-Blackbox Handoff`,
|
|
575
|
+
``,
|
|
576
|
+
`## Current Objective`,
|
|
577
|
+
`Run: ${graph.runId}`,
|
|
578
|
+
``,
|
|
579
|
+
`## What Has Been Observed`,
|
|
580
|
+
`- Events applied: ${graph.appliedEventIds.length}`,
|
|
581
|
+
`- Nodes: ${graph.nodes.length}`,
|
|
582
|
+
`- Edges: ${graph.edges.length}`,
|
|
583
|
+
``,
|
|
584
|
+
`## Files In Play`,
|
|
585
|
+
renderNodeList(files),
|
|
586
|
+
``,
|
|
587
|
+
`## Decisions`,
|
|
588
|
+
renderNodeList(decisions),
|
|
589
|
+
``,
|
|
590
|
+
`## Commands / Verification`,
|
|
591
|
+
renderNodeList(commands),
|
|
592
|
+
``,
|
|
593
|
+
`## Failed Attempts`,
|
|
594
|
+
renderNodeList(failures),
|
|
595
|
+
``,
|
|
596
|
+
`## Blockers / Approval Needed`,
|
|
597
|
+
renderNodeList(blockers),
|
|
598
|
+
``,
|
|
599
|
+
`## Promise Checks`,
|
|
600
|
+
checks.length === 0 ? `- No model claims matched the built-in promise-check rules.` : checks.map((check) => `- ${check.status.toUpperCase()}: ${check.claim} (${check.evidenceEventIds.join(", ") || "no evidence"})`).join("\n"),
|
|
601
|
+
``,
|
|
602
|
+
`## Next Safe Action`,
|
|
603
|
+
blockers.length > 0 ? `Resolve or approve the blocker before continuing.` : failures.length > 0 ? `Inspect the latest failed command or error node before editing again.` : `Continue from the latest decision or verification node.`
|
|
604
|
+
].join("\n");
|
|
605
|
+
}
|
|
606
|
+
function renderNodeList(nodes) {
|
|
607
|
+
if (nodes.length === 0) {
|
|
608
|
+
return "- None recorded.";
|
|
609
|
+
}
|
|
610
|
+
return nodes.map((node) => `- ${node.label} [${node.status}] events=${node.eventIds.join(",") || "none"}`).join("\n");
|
|
611
|
+
}
|
|
612
|
+
function stringPayload2(event, key) {
|
|
613
|
+
const value = event.payload[key];
|
|
614
|
+
return typeof value === "string" ? value : void 0;
|
|
615
|
+
}
|
|
616
|
+
function shorten(value) {
|
|
617
|
+
const normalized = value.replace(/\s+/g, " ").trim();
|
|
618
|
+
return normalized.length > 120 ? `${normalized.slice(0, 117)}...` : normalized;
|
|
619
|
+
}
|
|
620
|
+
|
|
621
|
+
// packages/core/src/efficiency.ts
|
|
622
|
+
function buildDeterministicSuggestions(report) {
|
|
623
|
+
const suggestions = [];
|
|
624
|
+
for (const metric of report.metrics) {
|
|
625
|
+
if (metric.status === "good") continue;
|
|
626
|
+
const action = deterministicActionFor(metric);
|
|
627
|
+
if (!action) continue;
|
|
628
|
+
suggestions.push({
|
|
629
|
+
metricId: metric.id,
|
|
630
|
+
severity: metric.status,
|
|
631
|
+
title: metric.label,
|
|
632
|
+
action,
|
|
633
|
+
source: "deterministic"
|
|
634
|
+
});
|
|
635
|
+
}
|
|
636
|
+
return suggestions;
|
|
637
|
+
}
|
|
638
|
+
function deterministicActionFor(metric) {
|
|
639
|
+
const reclaim = metric.reclaimableTokens ? ` (~${formatTokens(metric.reclaimableTokens)} reclaimable)` : "";
|
|
640
|
+
const worst = metric.offenders && metric.offenders.length > 0 ? metric.offenders.join(", ") : "";
|
|
641
|
+
switch (metric.id) {
|
|
642
|
+
case "context-pressure":
|
|
643
|
+
return `Peak input hit ${metric.display}. Compact: summarise resolved turns into a short note of decisions + open bugs and start a fresh window, clear raw tool outputs you've already acted on, and push deep exploration into a sub-agent that returns only a ~1-2k-token summary.`;
|
|
644
|
+
case "cache-hit":
|
|
645
|
+
return `Only ${metric.display} was cache-served, and cached tokens are ~10\xD7 cheaper. Keep the prompt prefix byte-stable (no timestamps or per-run data in the system prompt), append new turns instead of editing old ones, and mask unused tools rather than adding/removing them \u2014 any change voids the cache from that point on.`;
|
|
646
|
+
case "redundant-reads":
|
|
647
|
+
return `${worst ? `${worst} were re-read` : "Files were re-read"}${reclaim}. Read each file once and keep it in working memory or a notes file; after an edit, re-read only the changed line range, not the whole file.`;
|
|
648
|
+
case "read-amplification":
|
|
649
|
+
return `Read ${metric.display} more text than was edited${worst ? ` (${worst} dominated)` : ""}. Locate with grep/symbol search first, then read only the relevant line range; load a repo map instead of whole files up front.`;
|
|
650
|
+
case "large-injections":
|
|
651
|
+
return `A single output added ${metric.display}${worst ? ` (${worst})` : ""}. Scope it \u2014 narrow paths, add a max-count/head limit, or pipe through a summary \u2014 or have a sub-agent absorb it and return just the distilled result.`;
|
|
652
|
+
case "retry-waste":
|
|
653
|
+
return `${worst ? `${worst} was re-run after failing` : "Failing commands were re-run"}${reclaim}. Read the first failure's stderr and fix the root cause before retrying once; keep the failed attempt in context so the model doesn't repeat it.`;
|
|
654
|
+
case "yield-density":
|
|
655
|
+
return `A lot of context produced few concrete changes (${metric.display}). Split into smaller verifiable steps, recite the current goal/todo each step to keep it in recent tokens (models under-use the middle of long contexts), and offload exploration to a sub-agent.`;
|
|
656
|
+
case "tool-overhead":
|
|
657
|
+
return `Many tool calls per outcome (${metric.display}). Batch related edits into one change, drop exploratory calls that don't lead to an edit, and trim to a minimal non-overlapping tool set.`;
|
|
658
|
+
default:
|
|
659
|
+
return metric.detail;
|
|
660
|
+
}
|
|
661
|
+
}
|
|
662
|
+
var CHARS_PER_TOKEN = 4;
|
|
663
|
+
var estimateTokens = (chars) => Math.round(chars / CHARS_PER_TOKEN);
|
|
664
|
+
var baseName = (path) => path.split(/[\\/]/).filter(Boolean).pop() ?? path;
|
|
665
|
+
var commandVerb = (command) => command.trim().split(/\s+/)[0] || command;
|
|
666
|
+
function computeEfficiencyReport(events) {
|
|
667
|
+
let finalSnapshot;
|
|
668
|
+
let peakInput = 0;
|
|
669
|
+
let hasRealTokens = false;
|
|
670
|
+
for (const event of events) {
|
|
671
|
+
const snap = readTokenSnapshot(event);
|
|
672
|
+
if (!snap) continue;
|
|
673
|
+
hasRealTokens = true;
|
|
674
|
+
finalSnapshot = snap;
|
|
675
|
+
peakInput = Math.max(peakInput, snap.input);
|
|
676
|
+
}
|
|
677
|
+
const reads = [];
|
|
678
|
+
const edits = [];
|
|
679
|
+
const injections = [];
|
|
680
|
+
const bashRuns = [];
|
|
681
|
+
for (const event of events) {
|
|
682
|
+
if (event.kind === "file_read") {
|
|
683
|
+
const chars = numberAt(event, "chars");
|
|
684
|
+
if (chars) reads.push({ path: stringAt(event, "path") ?? event.id, tokens: estimateTokens(chars), id: event.id });
|
|
685
|
+
} else if (event.kind === "file_edit" || event.kind === "file_created") {
|
|
686
|
+
const chars = numberAt(event, "chars");
|
|
687
|
+
if (chars) edits.push({ path: stringAt(event, "path") ?? event.id, tokens: estimateTokens(chars), id: event.id });
|
|
688
|
+
} else if (event.kind === "bash") {
|
|
689
|
+
const chars = numberAt(event, "outputChars") ?? 0;
|
|
690
|
+
const tokens = estimateTokens(chars);
|
|
691
|
+
bashRuns.push({ command: stringAt(event, "command") ?? "", exitCode: numberAt(event, "exitCode"), tokens, id: event.id });
|
|
692
|
+
if (tokens > 0) injections.push({ label: stringAt(event, "command") ?? "command", tokens, id: event.id });
|
|
693
|
+
} else if (event.kind === "tool_result") {
|
|
694
|
+
const chars = numberAt(event, "outputChars");
|
|
695
|
+
if (chars) {
|
|
696
|
+
const label = stringAt(event, "skill") ?? stringAt(event, "tool") ?? "tool";
|
|
697
|
+
injections.push({ label, tokens: estimateTokens(chars), id: event.id });
|
|
698
|
+
}
|
|
699
|
+
}
|
|
700
|
+
}
|
|
701
|
+
const totalReadTokens = reads.reduce((sum, r) => sum + r.tokens, 0);
|
|
702
|
+
const totalEditTokens = edits.reduce((sum, e) => sum + e.tokens, 0);
|
|
703
|
+
const editedPaths = new Set(edits.map((e) => e.path));
|
|
704
|
+
const okCommands = bashRuns.filter((b) => b.exitCode === 0).length;
|
|
705
|
+
const totalInputTokens = hasRealTokens ? finalSnapshot.input : estimateTokens(reads.reduce((s, r) => s + r.tokens * CHARS_PER_TOKEN, 0) + injections.reduce((s, i) => s + i.tokens * CHARS_PER_TOKEN, 0));
|
|
706
|
+
const peak = hasRealTokens ? peakInput : totalInputTokens;
|
|
707
|
+
const metrics = [];
|
|
708
|
+
{
|
|
709
|
+
const { score, status } = lowerIsBetter(peak, 1e5, 18e4);
|
|
710
|
+
metrics.push({
|
|
711
|
+
weight: 1.5,
|
|
712
|
+
metric: {
|
|
713
|
+
id: "context-pressure",
|
|
714
|
+
label: "Context pressure",
|
|
715
|
+
value: peak,
|
|
716
|
+
unit: "tokens",
|
|
717
|
+
display: formatTokens(peak),
|
|
718
|
+
score,
|
|
719
|
+
status,
|
|
720
|
+
detail: status === "good" ? "The context window stayed comfortably sized." : `Peak input reached ${formatTokens(peak)} \u2014 large prompts cost latency and money on every turn.`,
|
|
721
|
+
evidenceEventIds: []
|
|
722
|
+
}
|
|
723
|
+
});
|
|
724
|
+
}
|
|
725
|
+
{
|
|
726
|
+
const cacheRead = finalSnapshot?.cacheRead ?? 0;
|
|
727
|
+
const fresh = finalSnapshot?.input ?? 0;
|
|
728
|
+
const denom = cacheRead + fresh;
|
|
729
|
+
const hasCacheTelemetry = hasRealTokens && (cacheRead > 0 || (finalSnapshot?.cacheWrite ?? 0) > 0);
|
|
730
|
+
const ratio = denom > 0 ? cacheRead / denom : 0;
|
|
731
|
+
if (hasCacheTelemetry) {
|
|
732
|
+
const { score, status } = higherIsBetter(ratio, 0.6, 0.3);
|
|
733
|
+
metrics.push({
|
|
734
|
+
weight: 1,
|
|
735
|
+
metric: {
|
|
736
|
+
id: "cache-hit",
|
|
737
|
+
label: "Cache hit ratio",
|
|
738
|
+
value: ratio,
|
|
739
|
+
unit: "%",
|
|
740
|
+
display: `${Math.round(ratio * 100)}%`,
|
|
741
|
+
score,
|
|
742
|
+
status,
|
|
743
|
+
detail: status === "good" ? "Most of the prompt was served from cache." : "Low prompt-cache reuse \u2014 stabilise the prompt prefix so more context is cached.",
|
|
744
|
+
evidenceEventIds: []
|
|
745
|
+
}
|
|
746
|
+
});
|
|
747
|
+
} else {
|
|
748
|
+
metrics.push({
|
|
749
|
+
weight: 0,
|
|
750
|
+
metric: {
|
|
751
|
+
id: "cache-hit",
|
|
752
|
+
label: "Cache hit ratio",
|
|
753
|
+
value: 0,
|
|
754
|
+
unit: "%",
|
|
755
|
+
display: "n/a",
|
|
756
|
+
score: 100,
|
|
757
|
+
status: "good",
|
|
758
|
+
detail: "This model reported no cache telemetry.",
|
|
759
|
+
evidenceEventIds: []
|
|
760
|
+
}
|
|
761
|
+
});
|
|
762
|
+
}
|
|
763
|
+
}
|
|
764
|
+
{
|
|
765
|
+
const byPath = /* @__PURE__ */ new Map();
|
|
766
|
+
for (const r of reads) {
|
|
767
|
+
const list = byPath.get(r.path) ?? [];
|
|
768
|
+
list.push({ tokens: r.tokens, id: r.id });
|
|
769
|
+
byPath.set(r.path, list);
|
|
770
|
+
}
|
|
771
|
+
let reclaimable = 0;
|
|
772
|
+
const evidence = [];
|
|
773
|
+
let reReadPaths = 0;
|
|
774
|
+
const offenders = [];
|
|
775
|
+
for (const [path, list] of byPath.entries()) {
|
|
776
|
+
if (list.length > 1) {
|
|
777
|
+
reReadPaths += 1;
|
|
778
|
+
let extraTokens = 0;
|
|
779
|
+
for (const extra of list.slice(1)) {
|
|
780
|
+
reclaimable += extra.tokens;
|
|
781
|
+
extraTokens += extra.tokens;
|
|
782
|
+
evidence.push(extra.id);
|
|
783
|
+
}
|
|
784
|
+
offenders.push({ label: `${baseName(path)} \xD7${list.length}`, reclaim: extraTokens });
|
|
785
|
+
}
|
|
786
|
+
}
|
|
787
|
+
const reReadOffenders = offenders.sort((a, b) => b.reclaim - a.reclaim).slice(0, 3).map((o) => o.label);
|
|
788
|
+
let { score, status } = lowerIsBetter(reReadPaths, 0, 2);
|
|
789
|
+
if (reclaimable >= 1e4 && status !== "bad") {
|
|
790
|
+
status = "bad";
|
|
791
|
+
score = Math.min(score, 30);
|
|
792
|
+
}
|
|
793
|
+
metrics.push({
|
|
794
|
+
weight: 2,
|
|
795
|
+
metric: {
|
|
796
|
+
id: "redundant-reads",
|
|
797
|
+
label: "Redundant re-reads",
|
|
798
|
+
value: reReadPaths,
|
|
799
|
+
unit: "count",
|
|
800
|
+
display: reReadPaths === 0 ? "none" : `${reReadPaths} ${reReadPaths === 1 ? "file" : "files"}`,
|
|
801
|
+
score,
|
|
802
|
+
status,
|
|
803
|
+
detail: reReadPaths === 0 ? "No file was read more than once." : `${reReadPaths} file(s) were read again \u2014 about ${formatTokens(reclaimable)} of context was reloaded.`,
|
|
804
|
+
evidenceEventIds: evidence,
|
|
805
|
+
reclaimableTokens: reclaimable,
|
|
806
|
+
...reReadOffenders.length > 0 ? { offenders: reReadOffenders } : {}
|
|
807
|
+
}
|
|
808
|
+
});
|
|
809
|
+
}
|
|
810
|
+
if (edits.length > 0 && totalReadTokens > 0) {
|
|
811
|
+
const ratio = totalReadTokens / Math.max(totalEditTokens, 1);
|
|
812
|
+
const { score, status } = lowerIsBetter(ratio, 40, 120);
|
|
813
|
+
const readByPath = /* @__PURE__ */ new Map();
|
|
814
|
+
for (const r of reads) readByPath.set(r.path, (readByPath.get(r.path) ?? 0) + r.tokens);
|
|
815
|
+
const topReaders = [...readByPath.entries()].sort((a, b) => b[1] - a[1]).slice(0, 3).map(([path, tokens]) => `${baseName(path)} ~${formatTokens(tokens)}`);
|
|
816
|
+
metrics.push({
|
|
817
|
+
weight: 2,
|
|
818
|
+
metric: {
|
|
819
|
+
id: "read-amplification",
|
|
820
|
+
label: "Read amplification",
|
|
821
|
+
value: ratio,
|
|
822
|
+
unit: "x",
|
|
823
|
+
display: `${ratio.toFixed(ratio >= 10 ? 0 : 1)}\xD7`,
|
|
824
|
+
score,
|
|
825
|
+
status,
|
|
826
|
+
detail: status === "good" ? "Reads were roughly proportional to the edits made." : `Read ${formatTokens(totalReadTokens)} to write ${formatTokens(totalEditTokens)} \u2014 pull in less, use ranged reads.`,
|
|
827
|
+
evidenceEventIds: reads.slice(0, 5).map((r) => r.id),
|
|
828
|
+
...status !== "good" && topReaders.length > 0 ? { offenders: topReaders } : {}
|
|
829
|
+
}
|
|
830
|
+
});
|
|
831
|
+
}
|
|
832
|
+
{
|
|
833
|
+
const sorted = [...injections].sort((a, b) => b.tokens - a.tokens);
|
|
834
|
+
const biggest = sorted[0]?.tokens ?? 0;
|
|
835
|
+
const over5k = sorted.filter((i) => i.tokens >= 5e3);
|
|
836
|
+
const { score, status } = lowerIsBetter(biggest, 5e3, 15e3);
|
|
837
|
+
const injectionOffenders = over5k.slice(0, 3).map((i) => `${commandVerb(i.label)} ~${formatTokens(i.tokens)}`);
|
|
838
|
+
metrics.push({
|
|
839
|
+
weight: 1.5,
|
|
840
|
+
metric: {
|
|
841
|
+
id: "large-injections",
|
|
842
|
+
label: "Large context injections",
|
|
843
|
+
value: biggest,
|
|
844
|
+
unit: "tokens",
|
|
845
|
+
display: biggest >= 2e3 ? formatTokens(biggest) : "none",
|
|
846
|
+
score,
|
|
847
|
+
status,
|
|
848
|
+
detail: over5k.length === 0 ? "No single tool output flooded the context." : `${over5k.length} output(s) added 5k+ tokens (largest ${formatTokens(biggest)}) \u2014 scope greps/reads or summarise.`,
|
|
849
|
+
evidenceEventIds: over5k.map((i) => i.id),
|
|
850
|
+
...injectionOffenders.length > 0 ? { offenders: injectionOffenders } : {}
|
|
851
|
+
}
|
|
852
|
+
});
|
|
853
|
+
}
|
|
854
|
+
{
|
|
855
|
+
const byCommand = /* @__PURE__ */ new Map();
|
|
856
|
+
for (const b of bashRuns) {
|
|
857
|
+
if (!b.command) continue;
|
|
858
|
+
const list = byCommand.get(b.command) ?? [];
|
|
859
|
+
list.push({ exitCode: b.exitCode, tokens: b.tokens, id: b.id });
|
|
860
|
+
byCommand.set(b.command, list);
|
|
861
|
+
}
|
|
862
|
+
let wasted = 0;
|
|
863
|
+
let retries = 0;
|
|
864
|
+
const evidence = [];
|
|
865
|
+
const offenders = [];
|
|
866
|
+
for (const [command, list] of byCommand.entries()) {
|
|
867
|
+
if (list.length <= 1) continue;
|
|
868
|
+
retries += list.length - 1;
|
|
869
|
+
for (const attempt of list) {
|
|
870
|
+
if (attempt.exitCode !== 0) {
|
|
871
|
+
wasted += attempt.tokens;
|
|
872
|
+
evidence.push(attempt.id);
|
|
873
|
+
}
|
|
874
|
+
}
|
|
875
|
+
offenders.push({ label: `${commandVerb(command)} \xD7${list.length}`, runs: list.length });
|
|
876
|
+
}
|
|
877
|
+
const retryOffenders = offenders.sort((a, b) => b.runs - a.runs).slice(0, 3).map((o) => o.label);
|
|
878
|
+
const { score, status } = lowerIsBetter(retries, 0, 2);
|
|
879
|
+
metrics.push({
|
|
880
|
+
weight: 2,
|
|
881
|
+
metric: {
|
|
882
|
+
id: "retry-waste",
|
|
883
|
+
label: "Retry waste",
|
|
884
|
+
value: retries,
|
|
885
|
+
unit: "count",
|
|
886
|
+
display: retries === 0 ? "none" : `${retries}`,
|
|
887
|
+
score,
|
|
888
|
+
status,
|
|
889
|
+
detail: retries === 0 ? "No command was re-run after failing." : `${retries} re-run(s) of failing commands burned about ${formatTokens(wasted)}.`,
|
|
890
|
+
evidenceEventIds: evidence,
|
|
891
|
+
reclaimableTokens: wasted,
|
|
892
|
+
...retryOffenders.length > 0 ? { offenders: retryOffenders } : {}
|
|
893
|
+
}
|
|
894
|
+
});
|
|
895
|
+
}
|
|
896
|
+
if (totalInputTokens > 0) {
|
|
897
|
+
const outcomes = editedPaths.size + okCommands;
|
|
898
|
+
const density = outcomes / (totalInputTokens / 1e3);
|
|
899
|
+
const { score, status } = higherIsBetter(density, 0.05, 0.02);
|
|
900
|
+
metrics.push({
|
|
901
|
+
weight: 1,
|
|
902
|
+
metric: {
|
|
903
|
+
id: "yield-density",
|
|
904
|
+
label: "Yield density",
|
|
905
|
+
value: density,
|
|
906
|
+
unit: "ratio",
|
|
907
|
+
display: `${density.toFixed(3)}/k`,
|
|
908
|
+
score,
|
|
909
|
+
status,
|
|
910
|
+
detail: status === "good" ? "The run turned tokens into concrete changes efficiently." : `${outcomes} outcome(s) across ${formatTokens(totalInputTokens)} \u2014 a lot of context for little change.`,
|
|
911
|
+
evidenceEventIds: []
|
|
912
|
+
}
|
|
913
|
+
});
|
|
914
|
+
}
|
|
915
|
+
{
|
|
916
|
+
const toolCalls = events.filter((e) => e.kind === "tool_call").length;
|
|
917
|
+
const outcomes = Math.max(1, reads.length + edits.length + bashRuns.length);
|
|
918
|
+
const ratio = toolCalls / outcomes;
|
|
919
|
+
const { score, status } = lowerIsBetter(ratio, 2, 4);
|
|
920
|
+
metrics.push({
|
|
921
|
+
weight: 0.5,
|
|
922
|
+
metric: {
|
|
923
|
+
id: "tool-overhead",
|
|
924
|
+
label: "Tool overhead",
|
|
925
|
+
value: ratio,
|
|
926
|
+
unit: "ratio",
|
|
927
|
+
display: `${ratio.toFixed(1)}\xD7`,
|
|
928
|
+
score,
|
|
929
|
+
status,
|
|
930
|
+
detail: status === "good" ? "Tool calls translated into work without much churn." : "Many tool calls relative to concrete outcomes.",
|
|
931
|
+
evidenceEventIds: []
|
|
932
|
+
}
|
|
933
|
+
});
|
|
934
|
+
}
|
|
935
|
+
const weighted = metrics.filter((m) => m.weight > 0);
|
|
936
|
+
const overallScore = weighted.length > 0 ? Math.round(weighted.reduce((s, m) => s + m.metric.score * m.weight, 0) / weighted.reduce((s, m) => s + m.weight, 0)) : 100;
|
|
937
|
+
const reclaimableTokens = metrics.reduce((s, m) => s + (m.metric.reclaimableTokens ?? 0), 0);
|
|
938
|
+
return {
|
|
939
|
+
overallScore,
|
|
940
|
+
status: overallScore >= 75 ? "good" : overallScore >= 50 ? "warn" : "bad",
|
|
941
|
+
headline: buildHeadline(totalInputTokens, finalSnapshot, reclaimableTokens, hasRealTokens),
|
|
942
|
+
totalInputTokens,
|
|
943
|
+
reclaimableTokens,
|
|
944
|
+
estimated: !hasRealTokens,
|
|
945
|
+
metrics: metrics.map((m) => m.metric)
|
|
946
|
+
};
|
|
947
|
+
}
|
|
948
|
+
function buildHeadline(totalInput, snap, reclaimable, hasRealTokens) {
|
|
949
|
+
const parts = [];
|
|
950
|
+
if (totalInput > 0) parts.push(`${hasRealTokens ? "" : "~"}${formatTokens(totalInput)} input`);
|
|
951
|
+
if (snap && (snap.cacheRead > 0 || snap.cacheWrite > 0)) {
|
|
952
|
+
const denom = snap.cacheRead + snap.input;
|
|
953
|
+
if (denom > 0) parts.push(`cache ${Math.round(snap.cacheRead / denom * 100)}%`);
|
|
954
|
+
}
|
|
955
|
+
if (reclaimable > 0) parts.push(`~${formatTokens(reclaimable)} reclaimable`);
|
|
956
|
+
return parts.join(" \xB7 ");
|
|
957
|
+
}
|
|
958
|
+
function lowerIsBetter(value, warnAt, badAt) {
|
|
959
|
+
if (value <= warnAt) return { score: Math.round(lerp(value, 0, warnAt, 100, 80)), status: "good" };
|
|
960
|
+
if (value <= badAt) return { score: Math.round(lerp(value, warnAt, badAt, 80, 40)), status: "warn" };
|
|
961
|
+
return { score: Math.max(0, Math.round(lerp(value, badAt, badAt * 2, 40, 0))), status: "bad" };
|
|
962
|
+
}
|
|
963
|
+
function higherIsBetter(value, goodAt, badAt) {
|
|
964
|
+
if (value >= goodAt) return { score: Math.min(100, Math.round(lerp(value, goodAt, goodAt * 1.5, 80, 100))), status: "good" };
|
|
965
|
+
if (value >= badAt) return { score: Math.round(lerp(value, badAt, goodAt, 40, 80)), status: "warn" };
|
|
966
|
+
return { score: Math.max(0, Math.round(lerp(value, 0, badAt, 0, 40))), status: "bad" };
|
|
967
|
+
}
|
|
968
|
+
function lerp(value, inMin, inMax, outMin, outMax) {
|
|
969
|
+
if (inMax === inMin) return outMin;
|
|
970
|
+
const t = Math.max(0, Math.min(1, (value - inMin) / (inMax - inMin)));
|
|
971
|
+
return outMin + t * (outMax - outMin);
|
|
972
|
+
}
|
|
973
|
+
function numberAt(event, key) {
|
|
974
|
+
const value = event.payload[key];
|
|
975
|
+
return typeof value === "number" && Number.isFinite(value) ? value : void 0;
|
|
976
|
+
}
|
|
977
|
+
function stringAt(event, key) {
|
|
978
|
+
const value = event.payload[key];
|
|
979
|
+
return typeof value === "string" && value.length > 0 ? value : void 0;
|
|
980
|
+
}
|
|
981
|
+
function readTokenSnapshot(event) {
|
|
982
|
+
const present = deepNumber(event.payload, "properties.info.tokens") !== void 0 || deepNumber(event.payload, "properties.tokens") !== void 0 || deepNumber(event.payload, "tokens") !== void 0 || isRecordAtPath(event.payload, "properties.info.tokens") || isRecordAtPath(event.payload, "properties.tokens") || isRecordAtPath(event.payload, "tokens");
|
|
983
|
+
if (!present) return void 0;
|
|
984
|
+
return {
|
|
985
|
+
input: deepNumber(event.payload, ["properties.info.tokens.input", "properties.tokens.input", "tokens.input"]) ?? 0,
|
|
986
|
+
output: deepNumber(event.payload, ["properties.info.tokens.output", "properties.tokens.output", "tokens.output"]) ?? 0,
|
|
987
|
+
reasoning: deepNumber(event.payload, ["properties.info.tokens.reasoning", "properties.tokens.reasoning", "tokens.reasoning"]) ?? 0,
|
|
988
|
+
cacheRead: deepNumber(event.payload, [
|
|
989
|
+
"properties.info.tokens.cache.read",
|
|
990
|
+
"properties.tokens.cache.read",
|
|
991
|
+
"tokens.cache.read",
|
|
992
|
+
"properties.info.tokens.cacheRead",
|
|
993
|
+
"tokens.cacheRead"
|
|
994
|
+
]) ?? 0,
|
|
995
|
+
cacheWrite: deepNumber(event.payload, [
|
|
996
|
+
"properties.info.tokens.cache.write",
|
|
997
|
+
"properties.tokens.cache.write",
|
|
998
|
+
"tokens.cache.write",
|
|
999
|
+
"properties.info.tokens.cacheWrite",
|
|
1000
|
+
"tokens.cacheWrite"
|
|
1001
|
+
]) ?? 0
|
|
1002
|
+
};
|
|
1003
|
+
}
|
|
1004
|
+
function deepNumber(payload, path) {
|
|
1005
|
+
const paths = Array.isArray(path) ? path : [path];
|
|
1006
|
+
for (const p of paths) {
|
|
1007
|
+
const value = walk(payload, p);
|
|
1008
|
+
if (typeof value === "number" && Number.isFinite(value)) return value;
|
|
1009
|
+
}
|
|
1010
|
+
return void 0;
|
|
1011
|
+
}
|
|
1012
|
+
function isRecordAtPath(payload, path) {
|
|
1013
|
+
const value = walk(payload, path);
|
|
1014
|
+
return typeof value === "object" && value !== null;
|
|
1015
|
+
}
|
|
1016
|
+
function walk(payload, path) {
|
|
1017
|
+
let current = payload;
|
|
1018
|
+
for (const part of path.split(".")) {
|
|
1019
|
+
if (typeof current !== "object" || current === null) return void 0;
|
|
1020
|
+
current = current[part];
|
|
1021
|
+
}
|
|
1022
|
+
return current;
|
|
1023
|
+
}
|
|
1024
|
+
function formatTokens(value) {
|
|
1025
|
+
if (value >= 1e3) {
|
|
1026
|
+
const k = value / 1e3;
|
|
1027
|
+
return `${k >= 10 ? Math.round(k) : k.toFixed(1)}k`;
|
|
1028
|
+
}
|
|
1029
|
+
return `${Math.round(value)}`;
|
|
1030
|
+
}
|
|
1031
|
+
|
|
1032
|
+
// packages/core/src/efficiencyMemory.ts
|
|
1033
|
+
var EFFICIENCY_MEMORY_START = "<!-- agent-blackbox:efficiency:start -->";
|
|
1034
|
+
var EFFICIENCY_MEMORY_END = "<!-- agent-blackbox:efficiency:end -->";
|
|
1035
|
+
var offenderLabel = (offender) => offender.split(/\s+/)[0] ?? offender;
|
|
1036
|
+
var dedupe = (xs) => [...new Set(xs.filter(Boolean))];
|
|
1037
|
+
function buildEfficiencyMemory(report, options = {}) {
|
|
1038
|
+
const flagged = new Map(report.metrics.filter((m) => m.status !== "good").map((m) => [m.id, m]));
|
|
1039
|
+
const offendersOf = (id) => (flagged.get(id)?.offenders ?? []).map(offenderLabel);
|
|
1040
|
+
const lines = [];
|
|
1041
|
+
const commands = dedupe(options.verifiedCommands ?? []).slice(0, 4);
|
|
1042
|
+
if (commands.length > 0) {
|
|
1043
|
+
lines.push(`- **Reuse these verified commands** (don't rediscover): ${commands.map((c) => `\`${c}\``).join(", ")}`);
|
|
1044
|
+
}
|
|
1045
|
+
const reread = dedupe([...offendersOf("redundant-reads"), ...offendersOf("read-amplification")]).slice(0, 6);
|
|
1046
|
+
if (reread.length > 0) {
|
|
1047
|
+
lines.push(
|
|
1048
|
+
`- **Read these once, then reuse** \u2014 re-read only the changed line range after an edit, never the whole file: ${reread.join(", ")}`
|
|
1049
|
+
);
|
|
1050
|
+
}
|
|
1051
|
+
const injections = dedupe(offendersOf("large-injections")).slice(0, 4);
|
|
1052
|
+
if (injections.length > 0) {
|
|
1053
|
+
lines.push(
|
|
1054
|
+
`- **Scope these large outputs** (narrow paths, \`max-count\`/\`head\`, or summarise): ${injections.join(", ")}`
|
|
1055
|
+
);
|
|
1056
|
+
}
|
|
1057
|
+
const retries = dedupe(offendersOf("retry-waste")).slice(0, 4);
|
|
1058
|
+
if (retries.length > 0) {
|
|
1059
|
+
lines.push(`- **Fix the root cause before re-running** (read the first failure's stderr): ${retries.join(", ")}`);
|
|
1060
|
+
}
|
|
1061
|
+
if (flagged.has("context-pressure")) {
|
|
1062
|
+
lines.push(
|
|
1063
|
+
"- **Keep the window lean**: compact resolved turns into a short decisions + open-bugs note, and delegate deep exploration to a sub-agent that returns a brief summary."
|
|
1064
|
+
);
|
|
1065
|
+
}
|
|
1066
|
+
if (flagged.has("cache-hit")) {
|
|
1067
|
+
lines.push(
|
|
1068
|
+
"- **Protect the prompt cache**: keep the prefix byte-stable (no timestamps/volatile data) and append turns instead of editing earlier ones."
|
|
1069
|
+
);
|
|
1070
|
+
}
|
|
1071
|
+
if (flagged.has("tool-overhead")) {
|
|
1072
|
+
lines.push("- **Batch related edits** into one change; skip exploratory tool calls that don't lead to an edit.");
|
|
1073
|
+
}
|
|
1074
|
+
if (lines.length === 0) return null;
|
|
1075
|
+
return [
|
|
1076
|
+
EFFICIENCY_MEMORY_START,
|
|
1077
|
+
"## Context-efficiency notes",
|
|
1078
|
+
"<!-- Auto-generated by Agent-Blackbox from the last run. Put your own notes ABOVE this block; everything between these markers is regenerated. -->",
|
|
1079
|
+
"",
|
|
1080
|
+
...lines,
|
|
1081
|
+
EFFICIENCY_MEMORY_END
|
|
1082
|
+
].join("\n");
|
|
1083
|
+
}
|
|
1084
|
+
var escapeRegExp = (s) => s.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
|
|
1085
|
+
var managedBlockRegExp = () => new RegExp(`${escapeRegExp(EFFICIENCY_MEMORY_START)}[\\s\\S]*?${escapeRegExp(EFFICIENCY_MEMORY_END)}`);
|
|
1086
|
+
function hasManagedBlock(content) {
|
|
1087
|
+
return managedBlockRegExp().test(content);
|
|
1088
|
+
}
|
|
1089
|
+
function upsertManagedBlock(content, block) {
|
|
1090
|
+
if (hasManagedBlock(content)) {
|
|
1091
|
+
return content.replace(managedBlockRegExp(), block);
|
|
1092
|
+
}
|
|
1093
|
+
const base = content.trimEnd();
|
|
1094
|
+
return base.length === 0 ? `${block}
|
|
1095
|
+
` : `${base}
|
|
1096
|
+
|
|
1097
|
+
${block}
|
|
1098
|
+
`;
|
|
1099
|
+
}
|
|
1100
|
+
function removeManagedBlock(content) {
|
|
1101
|
+
if (!hasManagedBlock(content)) return content;
|
|
1102
|
+
const stripped = content.replace(managedBlockRegExp(), "").replace(/\n{3,}/g, "\n\n").trimEnd();
|
|
1103
|
+
return stripped.length === 0 ? "" : `${stripped}
|
|
1104
|
+
`;
|
|
1105
|
+
}
|
|
1106
|
+
|
|
1107
|
+
// apps/daemon/dist/cli.js
|
|
1108
|
+
import { spawn as spawn2 } from "node:child_process";
|
|
1109
|
+
import { existsSync } from "node:fs";
|
|
1110
|
+
import { fileURLToPath } from "node:url";
|
|
1111
|
+
import { dirname as dirname3, resolve } from "node:path";
|
|
1112
|
+
|
|
1113
|
+
// apps/daemon/dist/dashboardServer.js
|
|
1114
|
+
import { createReadStream } from "node:fs";
|
|
1115
|
+
import { readFile, stat } from "node:fs/promises";
|
|
1116
|
+
import { createServer } from "node:http";
|
|
1117
|
+
import { extname, join, normalize } from "node:path";
|
|
1118
|
+
var mimeTypes = {
|
|
1119
|
+
".css": "text/css; charset=utf-8",
|
|
1120
|
+
".html": "text/html; charset=utf-8",
|
|
1121
|
+
".ico": "image/x-icon",
|
|
1122
|
+
".js": "text/javascript; charset=utf-8",
|
|
1123
|
+
".json": "application/json; charset=utf-8",
|
|
1124
|
+
".map": "application/json; charset=utf-8",
|
|
1125
|
+
".png": "image/png",
|
|
1126
|
+
".svg": "image/svg+xml",
|
|
1127
|
+
".woff": "font/woff",
|
|
1128
|
+
".woff2": "font/woff2"
|
|
1129
|
+
};
|
|
1130
|
+
async function startDashboardServer(options) {
|
|
1131
|
+
const indexPath = join(options.distDir, "index.html");
|
|
1132
|
+
let indexHtml;
|
|
1133
|
+
try {
|
|
1134
|
+
indexHtml = await readFile(indexPath, "utf8");
|
|
1135
|
+
} catch {
|
|
1136
|
+
throw new Error(`Dashboard build not found at ${options.distDir}. Run "npm run build" first.`);
|
|
1137
|
+
}
|
|
1138
|
+
const injected = indexHtml.replace("</head>", ` <script>window.AGENT_BLACKBOX_DAEMON_URL=${JSON.stringify(options.daemonUrl)};</script>
|
|
1139
|
+
</head>`);
|
|
1140
|
+
const server = createServer((request, response) => {
|
|
1141
|
+
const rawPath = decodeURIComponent((request.url ?? "/").split("?")[0] ?? "/");
|
|
1142
|
+
if (rawPath === "/" || rawPath === "/index.html") {
|
|
1143
|
+
response.writeHead(200, { "content-type": "text/html; charset=utf-8" });
|
|
1144
|
+
response.end(injected);
|
|
1145
|
+
return;
|
|
1146
|
+
}
|
|
1147
|
+
const safePath = normalize(rawPath).replace(/^(\.\.[/\\])+/, "");
|
|
1148
|
+
const filePath = join(options.distDir, safePath);
|
|
1149
|
+
if (!filePath.startsWith(options.distDir)) {
|
|
1150
|
+
response.writeHead(403);
|
|
1151
|
+
response.end("Forbidden");
|
|
1152
|
+
return;
|
|
1153
|
+
}
|
|
1154
|
+
void stat(filePath).then((stats) => {
|
|
1155
|
+
if (!stats.isFile()) {
|
|
1156
|
+
throw new Error("not a file");
|
|
1157
|
+
}
|
|
1158
|
+
response.writeHead(200, { "content-type": mimeTypes[extname(filePath)] ?? "application/octet-stream" });
|
|
1159
|
+
createReadStream(filePath).pipe(response);
|
|
1160
|
+
}).catch(() => {
|
|
1161
|
+
response.writeHead(200, { "content-type": "text/html; charset=utf-8" });
|
|
1162
|
+
response.end(injected);
|
|
1163
|
+
});
|
|
1164
|
+
});
|
|
1165
|
+
const port = options.port ?? 5173;
|
|
1166
|
+
await new Promise((resolve2) => {
|
|
1167
|
+
server.listen(port, "127.0.0.1", resolve2);
|
|
1168
|
+
});
|
|
1169
|
+
const address = server.address();
|
|
1170
|
+
const actualPort = typeof address === "object" && address ? address.port : port;
|
|
1171
|
+
return {
|
|
1172
|
+
server,
|
|
1173
|
+
port: actualPort,
|
|
1174
|
+
close: () => new Promise((resolve2, reject) => {
|
|
1175
|
+
server.close((error) => error ? reject(error) : resolve2());
|
|
1176
|
+
})
|
|
1177
|
+
};
|
|
1178
|
+
}
|
|
1179
|
+
|
|
1180
|
+
// packages/storage/src/ndjson.ts
|
|
1181
|
+
import { appendFile, mkdir, readFile as readFile2 } from "node:fs/promises";
|
|
1182
|
+
import { dirname } from "node:path";
|
|
1183
|
+
function serializeTraceEvent(event) {
|
|
1184
|
+
assertTraceEvent(event);
|
|
1185
|
+
return `${JSON.stringify(event)}
|
|
1186
|
+
`;
|
|
1187
|
+
}
|
|
1188
|
+
function parseTraceEventLine(line) {
|
|
1189
|
+
const parsed = JSON.parse(line);
|
|
1190
|
+
assertTraceEvent(parsed);
|
|
1191
|
+
return parsed;
|
|
1192
|
+
}
|
|
1193
|
+
function parseTraceEvents(input) {
|
|
1194
|
+
return input.split(/\r?\n/).filter((line) => line.trim().length > 0).map((line) => parseTraceEventLine(line));
|
|
1195
|
+
}
|
|
1196
|
+
async function appendTraceEvent(filePath, event) {
|
|
1197
|
+
await mkdir(dirname(filePath), { recursive: true });
|
|
1198
|
+
await appendFile(filePath, serializeTraceEvent(event), "utf8");
|
|
1199
|
+
}
|
|
1200
|
+
async function readTraceEvents(filePath) {
|
|
1201
|
+
const input = await readFile2(filePath, "utf8");
|
|
1202
|
+
return parseTraceEvents(input);
|
|
1203
|
+
}
|
|
1204
|
+
|
|
1205
|
+
// apps/daemon/dist/server.js
|
|
1206
|
+
import { createServer as createServer2 } from "node:http";
|
|
1207
|
+
import { join as join2 } from "node:path";
|
|
1208
|
+
import { WebSocket, WebSocketServer } from "ws";
|
|
1209
|
+
|
|
1210
|
+
// apps/daemon/dist/suggestionProvider.js
|
|
1211
|
+
import { spawn } from "node:child_process";
|
|
1212
|
+
var TIMEOUT_MS = 45e3;
|
|
1213
|
+
var SYSTEM_PROMPT = `You optimize the context-window economy of AI coding-agent runs. The agent has tools: file read/edit, bash, grep/glob, sub-agents, and prompt caching. You receive a JSON digest of the run's FLAGGED metrics (each: id, value, display, status, detail, reclaimableTokens, offenders). Return ONE concrete fix per flagged metric that the operator can apply on the next run.
|
|
1214
|
+
|
|
1215
|
+
# Every action MUST
|
|
1216
|
+
- Ground in this run's numbers: cite the metric's display/reclaimable, and name the offenders verbatim when present (e.g. "config.json \xD75").
|
|
1217
|
+
- Name a concrete mechanism or tool \u2014 not a goal. "Reduce context" is banned; "after an edit, re-read only the changed line range instead of the whole file" is right.
|
|
1218
|
+
- State the expected effect (fewer tokens / cache hits / fewer steps).
|
|
1219
|
+
- Be one or two sentences. Do not restate the metric or give generic advice.
|
|
1220
|
+
|
|
1221
|
+
# Fix playbook (match the flagged id)
|
|
1222
|
+
- context-pressure: compact resolved turns into a short decisions+open-bugs note and start a fresh window; clear raw tool outputs already acted on; move exploration into a sub-agent that returns a ~1-2k-token summary; keep file paths as references, not full contents.
|
|
1223
|
+
- cache-hit: cached tokens are ~10x cheaper \u2014 keep the prompt prefix byte-stable (no timestamps/per-run data in the system prompt), append-only (never edit earlier turns), deterministic JSON key order, and mask unused tools instead of adding/removing them mid-run (any change voids the cache downstream).
|
|
1224
|
+
- redundant-reads: read each file once and hold it in working memory or a notes file; after an edit re-read only the changed line range, never the whole file again.
|
|
1225
|
+
- read-amplification: locate with grep/symbol search, then read only the relevant line range; pre-load a repo map/metadata and fetch on demand instead of whole files.
|
|
1226
|
+
- large-injections: scope the command (narrow paths, max-count/head) or pipe it through a summary; or have a sub-agent absorb the big output and return only the distilled result.
|
|
1227
|
+
- retry-waste: don't re-run blindly \u2014 read the first failure's stderr, fix the root cause, retry once; keep the failed attempt in context so the model doesn't repeat it.
|
|
1228
|
+
- yield-density: split into smaller verifiable steps; recite the goal/todo each step to keep it in recent tokens (models under-use the middle of long contexts); offload exploration to a sub-agent to keep the main thread lean.
|
|
1229
|
+
- tool-overhead: batch related edits into one change, drop exploratory calls that don't lead to an edit, and trim to a minimal non-overlapping tool set.
|
|
1230
|
+
|
|
1231
|
+
# Contrast (do this)
|
|
1232
|
+
metricId "redundant-reads", reclaimableTokens ~12000, offenders ["calculator.js \xD74"]:
|
|
1233
|
+
- BAD: "Avoid reading files multiple times to save context."
|
|
1234
|
+
- GOOD: "calculator.js was read 4\xD7 (~12k reclaimable) \u2014 read it once and cache it, then after each edit re-read only the changed line range instead of the whole file."
|
|
1235
|
+
|
|
1236
|
+
# Output
|
|
1237
|
+
Respond with ONLY this JSON, one entry per flagged metric, nothing else:
|
|
1238
|
+
{"suggestions":[{"metricId":"<id>","title":"<=6 words","action":"<specific fix grounded in this run's numbers/offenders>"}]}`;
|
|
1239
|
+
function buildDigest(report) {
|
|
1240
|
+
return {
|
|
1241
|
+
overallScore: report.overallScore,
|
|
1242
|
+
headline: report.headline,
|
|
1243
|
+
totalInputTokens: report.totalInputTokens,
|
|
1244
|
+
estimated: report.estimated,
|
|
1245
|
+
metrics: report.metrics.filter((m) => m.status !== "good").map((m) => ({
|
|
1246
|
+
id: m.id,
|
|
1247
|
+
label: m.label,
|
|
1248
|
+
status: m.status,
|
|
1249
|
+
value: Number(m.value.toFixed(3)),
|
|
1250
|
+
display: m.display,
|
|
1251
|
+
detail: m.detail,
|
|
1252
|
+
...m.reclaimableTokens ? { reclaimableTokens: m.reclaimableTokens } : {},
|
|
1253
|
+
...m.offenders && m.offenders.length > 0 ? { offenders: m.offenders } : {}
|
|
1254
|
+
}))
|
|
1255
|
+
};
|
|
1256
|
+
}
|
|
1257
|
+
var FREE_POOL = [
|
|
1258
|
+
{ provider: "opencode", model: "opencode/deepseek-v4-flash-free" },
|
|
1259
|
+
{ provider: "opencode", model: "opencode/north-mini-code-free" },
|
|
1260
|
+
{ provider: "ollama", model: "qwen3-coder:480b-cloud" },
|
|
1261
|
+
{ provider: "opencode", model: "opencode/mimo-v2.5-free" },
|
|
1262
|
+
{ provider: "ollama", model: "gpt-oss:120b-cloud" },
|
|
1263
|
+
{ provider: "ollama", model: "llama3.1:8b" }
|
|
1264
|
+
];
|
|
1265
|
+
var FREE_COOLDOWN_MS = 10 * 6e4;
|
|
1266
|
+
var freeCursor = 0;
|
|
1267
|
+
var freeCooldownUntil = /* @__PURE__ */ new Map();
|
|
1268
|
+
function orderFreePool(pool, cooldownUntil, cursor, now) {
|
|
1269
|
+
const fresh = pool.filter((entry) => (cooldownUntil.get(entry.model) ?? 0) <= now);
|
|
1270
|
+
const list = fresh.length > 0 ? fresh : pool;
|
|
1271
|
+
const start = (cursor % list.length + list.length) % list.length;
|
|
1272
|
+
return [...list.slice(start), ...list.slice(0, start)];
|
|
1273
|
+
}
|
|
1274
|
+
function isQuotaError(error) {
|
|
1275
|
+
const message = (error instanceof Error ? error.message : String(error)).toLowerCase();
|
|
1276
|
+
return message.includes("429") || message.includes("usage limit") || message.includes("rate limit") || message.includes("too many requests") || message.includes("quota");
|
|
1277
|
+
}
|
|
1278
|
+
async function generateSuggestions(report, config) {
|
|
1279
|
+
const deterministic = buildDeterministicSuggestions(report);
|
|
1280
|
+
if (config.mode === "off" || deterministic.length === 0) {
|
|
1281
|
+
return { suggestions: deterministic, provider: "deterministic" };
|
|
1282
|
+
}
|
|
1283
|
+
const digest = buildDigest(report);
|
|
1284
|
+
if ((config.mode === "auto" || config.mode === "free") && !config.model) {
|
|
1285
|
+
const order2 = orderFreePool(FREE_POOL, freeCooldownUntil, freeCursor, Date.now());
|
|
1286
|
+
freeCursor += 1;
|
|
1287
|
+
for (const entry of order2) {
|
|
1288
|
+
try {
|
|
1289
|
+
const llm = await callProvider(entry.provider, digest, { ...config, model: entry.model });
|
|
1290
|
+
const validated = validateSuggestions(llm, report);
|
|
1291
|
+
if (validated.length > 0) {
|
|
1292
|
+
return { suggestions: mergeSuggestions(deterministic, validated), provider: entry.model };
|
|
1293
|
+
}
|
|
1294
|
+
} catch (error) {
|
|
1295
|
+
if (isQuotaError(error))
|
|
1296
|
+
freeCooldownUntil.set(entry.model, Date.now() + FREE_COOLDOWN_MS);
|
|
1297
|
+
}
|
|
1298
|
+
}
|
|
1299
|
+
return { suggestions: deterministic, provider: "deterministic" };
|
|
1300
|
+
}
|
|
1301
|
+
const order = config.mode === "auto" || config.mode === "free" ? ["ollama"] : [config.mode];
|
|
1302
|
+
for (const provider of order) {
|
|
1303
|
+
try {
|
|
1304
|
+
const llm = await callProvider(provider, digest, config);
|
|
1305
|
+
const validated = validateSuggestions(llm, report);
|
|
1306
|
+
if (validated.length > 0) {
|
|
1307
|
+
return { suggestions: mergeSuggestions(deterministic, validated), provider };
|
|
1308
|
+
}
|
|
1309
|
+
} catch {
|
|
1310
|
+
}
|
|
1311
|
+
}
|
|
1312
|
+
return { suggestions: deterministic, provider: "deterministic" };
|
|
1313
|
+
}
|
|
1314
|
+
async function callProvider(provider, digest, config) {
|
|
1315
|
+
if (provider === "ollama")
|
|
1316
|
+
return callOllama(digest, config);
|
|
1317
|
+
if (provider === "openai-compat")
|
|
1318
|
+
return callOpenAICompat(digest, config);
|
|
1319
|
+
return callOpenCode(digest, config);
|
|
1320
|
+
}
|
|
1321
|
+
async function callOllama(digest, config) {
|
|
1322
|
+
const baseUrl = config.baseUrl ?? "http://127.0.0.1:11434";
|
|
1323
|
+
const model = config.model ?? "llama3.1";
|
|
1324
|
+
const response = await fetchJson(`${baseUrl}/api/chat`, {
|
|
1325
|
+
model,
|
|
1326
|
+
stream: false,
|
|
1327
|
+
format: "json",
|
|
1328
|
+
messages: [
|
|
1329
|
+
{ role: "system", content: SYSTEM_PROMPT },
|
|
1330
|
+
{ role: "user", content: JSON.stringify(digest) }
|
|
1331
|
+
]
|
|
1332
|
+
});
|
|
1333
|
+
const err = response?.error;
|
|
1334
|
+
if (err)
|
|
1335
|
+
throw new Error(typeof err === "string" ? err : JSON.stringify(err));
|
|
1336
|
+
const content = response?.message?.content;
|
|
1337
|
+
return content ? JSON.parse(content) : void 0;
|
|
1338
|
+
}
|
|
1339
|
+
async function callOpenAICompat(digest, config) {
|
|
1340
|
+
if (!config.baseUrl)
|
|
1341
|
+
throw new Error("openai-compat needs --suggest-base-url");
|
|
1342
|
+
const model = config.model ?? "local-model";
|
|
1343
|
+
const apiKey = process.env.AGENT_BLACKBOX_SUGGEST_KEY;
|
|
1344
|
+
const response = await fetchJson(`${config.baseUrl.replace(/\/$/, "")}/v1/chat/completions`, {
|
|
1345
|
+
model,
|
|
1346
|
+
stream: false,
|
|
1347
|
+
response_format: { type: "json_object" },
|
|
1348
|
+
messages: [
|
|
1349
|
+
{ role: "system", content: SYSTEM_PROMPT },
|
|
1350
|
+
{ role: "user", content: JSON.stringify(digest) }
|
|
1351
|
+
]
|
|
1352
|
+
}, apiKey ? { authorization: `Bearer ${apiKey}` } : {});
|
|
1353
|
+
const content = response?.choices?.[0]?.message?.content;
|
|
1354
|
+
return content ? JSON.parse(content) : void 0;
|
|
1355
|
+
}
|
|
1356
|
+
async function callOpenCode(digest, config) {
|
|
1357
|
+
const model = config.model ?? "opencode/deepseek-v4-flash-free";
|
|
1358
|
+
const prompt = `${SYSTEM_PROMPT}
|
|
1359
|
+
|
|
1360
|
+
Metrics:
|
|
1361
|
+
${JSON.stringify(digest)}
|
|
1362
|
+
|
|
1363
|
+
Output ONLY the JSON object, nothing else.`;
|
|
1364
|
+
const stdout = await runCommand("opencode", ["run", "--model", model, prompt]);
|
|
1365
|
+
return extractJsonObject(stdout);
|
|
1366
|
+
}
|
|
1367
|
+
function mergeSuggestions(deterministic, llm) {
|
|
1368
|
+
const llmByMetric = new Map(llm.map((s) => [s.metricId, s]));
|
|
1369
|
+
return deterministic.map((d) => llmByMetric.get(d.metricId) ?? d);
|
|
1370
|
+
}
|
|
1371
|
+
function validateSuggestions(raw, report) {
|
|
1372
|
+
const list = Array.isArray(raw) ? raw : Array.isArray(raw?.suggestions) ? raw.suggestions : [];
|
|
1373
|
+
const statusByMetric = new Map(report.metrics.map((m) => [m.id, m.status]));
|
|
1374
|
+
const out = [];
|
|
1375
|
+
for (const item of list) {
|
|
1376
|
+
if (typeof item !== "object" || item === null)
|
|
1377
|
+
continue;
|
|
1378
|
+
const metricId = item.metricId;
|
|
1379
|
+
const action = item.action;
|
|
1380
|
+
const title = item.title;
|
|
1381
|
+
if (typeof metricId !== "string" || typeof action !== "string" || action.trim().length < 8)
|
|
1382
|
+
continue;
|
|
1383
|
+
const status = statusByMetric.get(metricId);
|
|
1384
|
+
if (status !== "warn" && status !== "bad")
|
|
1385
|
+
continue;
|
|
1386
|
+
out.push({
|
|
1387
|
+
metricId,
|
|
1388
|
+
severity: status,
|
|
1389
|
+
title: typeof title === "string" && title.length > 0 ? title : metricId,
|
|
1390
|
+
action: action.trim().slice(0, 400),
|
|
1391
|
+
source: "llm"
|
|
1392
|
+
});
|
|
1393
|
+
}
|
|
1394
|
+
return out;
|
|
1395
|
+
}
|
|
1396
|
+
async function fetchJson(url, body, extraHeaders = {}) {
|
|
1397
|
+
const controller = new AbortController();
|
|
1398
|
+
const timer = setTimeout(() => controller.abort(), TIMEOUT_MS);
|
|
1399
|
+
try {
|
|
1400
|
+
const response = await fetch(url, {
|
|
1401
|
+
method: "POST",
|
|
1402
|
+
headers: { "content-type": "application/json", ...extraHeaders },
|
|
1403
|
+
body: JSON.stringify(body),
|
|
1404
|
+
signal: controller.signal
|
|
1405
|
+
});
|
|
1406
|
+
if (!response.ok)
|
|
1407
|
+
throw new Error(`${url} -> ${response.status}`);
|
|
1408
|
+
return await response.json();
|
|
1409
|
+
} finally {
|
|
1410
|
+
clearTimeout(timer);
|
|
1411
|
+
}
|
|
1412
|
+
}
|
|
1413
|
+
function runCommand(command, args2) {
|
|
1414
|
+
return new Promise((resolve2, reject) => {
|
|
1415
|
+
const child = spawn(command, args2, { stdio: ["ignore", "pipe", "ignore"] });
|
|
1416
|
+
let out = "";
|
|
1417
|
+
const timer = setTimeout(() => {
|
|
1418
|
+
child.kill("SIGKILL");
|
|
1419
|
+
reject(new Error("opencode timed out"));
|
|
1420
|
+
}, TIMEOUT_MS);
|
|
1421
|
+
child.stdout.on("data", (chunk) => {
|
|
1422
|
+
out += chunk.toString();
|
|
1423
|
+
});
|
|
1424
|
+
child.on("error", (error) => {
|
|
1425
|
+
clearTimeout(timer);
|
|
1426
|
+
reject(error);
|
|
1427
|
+
});
|
|
1428
|
+
child.on("close", (code) => {
|
|
1429
|
+
clearTimeout(timer);
|
|
1430
|
+
if (code === 0)
|
|
1431
|
+
resolve2(out);
|
|
1432
|
+
else
|
|
1433
|
+
reject(new Error(`opencode exited ${code}`));
|
|
1434
|
+
});
|
|
1435
|
+
});
|
|
1436
|
+
}
|
|
1437
|
+
function extractJsonObject(text) {
|
|
1438
|
+
const start = text.indexOf("{");
|
|
1439
|
+
if (start === -1)
|
|
1440
|
+
return void 0;
|
|
1441
|
+
let depth = 0;
|
|
1442
|
+
for (let i = start; i < text.length; i += 1) {
|
|
1443
|
+
if (text[i] === "{")
|
|
1444
|
+
depth += 1;
|
|
1445
|
+
else if (text[i] === "}") {
|
|
1446
|
+
depth -= 1;
|
|
1447
|
+
if (depth === 0) {
|
|
1448
|
+
try {
|
|
1449
|
+
return JSON.parse(text.slice(start, i + 1));
|
|
1450
|
+
} catch {
|
|
1451
|
+
return void 0;
|
|
1452
|
+
}
|
|
1453
|
+
}
|
|
1454
|
+
}
|
|
1455
|
+
}
|
|
1456
|
+
return void 0;
|
|
1457
|
+
}
|
|
1458
|
+
|
|
1459
|
+
// apps/daemon/dist/server.js
|
|
1460
|
+
async function startTraceDaemon(options) {
|
|
1461
|
+
const eventsFile = options.eventsFile ?? join2(options.projectDir, ".agent-blackbox", "events.ndjson");
|
|
1462
|
+
const suggestConfig = options.suggest ?? { mode: "auto" };
|
|
1463
|
+
const clients = /* @__PURE__ */ new Set();
|
|
1464
|
+
const server = createServer2((request, response) => {
|
|
1465
|
+
void handleRequest(request, response, eventsFile, clients, suggestConfig);
|
|
1466
|
+
});
|
|
1467
|
+
const streamServer = new WebSocketServer({ noServer: true });
|
|
1468
|
+
server.on("upgrade", (request, socket, head) => {
|
|
1469
|
+
const url = new URL(request.url ?? "/", "http://127.0.0.1");
|
|
1470
|
+
if (url.pathname !== "/stream") {
|
|
1471
|
+
socket.destroy();
|
|
1472
|
+
return;
|
|
1473
|
+
}
|
|
1474
|
+
streamServer.handleUpgrade(request, socket, head, (client) => {
|
|
1475
|
+
clients.add(client);
|
|
1476
|
+
client.on("close", () => {
|
|
1477
|
+
clients.delete(client);
|
|
1478
|
+
});
|
|
1479
|
+
void sendSnapshot(client, eventsFile);
|
|
1480
|
+
});
|
|
1481
|
+
});
|
|
1482
|
+
const port = options.port ?? 47831;
|
|
1483
|
+
await new Promise((resolve2) => {
|
|
1484
|
+
server.listen(port, "127.0.0.1", resolve2);
|
|
1485
|
+
});
|
|
1486
|
+
const address = server.address();
|
|
1487
|
+
const actualPort = typeof address === "object" && address ? address.port : port;
|
|
1488
|
+
return {
|
|
1489
|
+
server,
|
|
1490
|
+
port: actualPort,
|
|
1491
|
+
eventsFile,
|
|
1492
|
+
close: () => new Promise((resolve2, reject) => {
|
|
1493
|
+
server.close((error) => {
|
|
1494
|
+
streamServer.close();
|
|
1495
|
+
if (error) {
|
|
1496
|
+
reject(error);
|
|
1497
|
+
} else {
|
|
1498
|
+
resolve2();
|
|
1499
|
+
}
|
|
1500
|
+
});
|
|
1501
|
+
})
|
|
1502
|
+
};
|
|
1503
|
+
}
|
|
1504
|
+
async function loadTraceEvents(eventsFile) {
|
|
1505
|
+
try {
|
|
1506
|
+
return await readTraceEvents(eventsFile);
|
|
1507
|
+
} catch (error) {
|
|
1508
|
+
if (isNodeError(error) && error.code === "ENOENT") {
|
|
1509
|
+
return [];
|
|
1510
|
+
}
|
|
1511
|
+
throw error;
|
|
1512
|
+
}
|
|
1513
|
+
}
|
|
1514
|
+
async function buildReplaySummary(eventsFile) {
|
|
1515
|
+
const events = await loadTraceEvents(eventsFile);
|
|
1516
|
+
const graph = materializeWorkflowGraph(events);
|
|
1517
|
+
return {
|
|
1518
|
+
events: events.length,
|
|
1519
|
+
nodes: graph.nodes.length,
|
|
1520
|
+
edges: graph.edges.length,
|
|
1521
|
+
runId: graph.runId
|
|
1522
|
+
};
|
|
1523
|
+
}
|
|
1524
|
+
async function buildTraceSnapshot(eventsFile, replay = {}) {
|
|
1525
|
+
const events = await loadTraceEvents(eventsFile);
|
|
1526
|
+
const graph = replay.seq !== void 0 ? replayWorkflowGraphAtSeq(events, replay.seq) : replay.at !== void 0 ? replayWorkflowGraphAtTime(events, replay.at) : materializeWorkflowGraph(events);
|
|
1527
|
+
const replayedEvents = new Set(graph.appliedEventIds);
|
|
1528
|
+
const visibleEvents = events.filter((event) => replayedEvents.has(event.id));
|
|
1529
|
+
const checks = evaluatePromiseChecks(visibleEvents);
|
|
1530
|
+
const efficiency = computeEfficiencyReport(visibleEvents);
|
|
1531
|
+
const handoffMarkdown = generateHandoffMarkdown(graph, checks);
|
|
1532
|
+
return {
|
|
1533
|
+
events,
|
|
1534
|
+
graph,
|
|
1535
|
+
checks,
|
|
1536
|
+
efficiency,
|
|
1537
|
+
handoffMarkdown,
|
|
1538
|
+
replay: {
|
|
1539
|
+
mode: replay.seq !== void 0 ? "seq" : replay.at !== void 0 ? "time" : "live",
|
|
1540
|
+
...replay.seq !== void 0 ? { seq: replay.seq } : {},
|
|
1541
|
+
...replay.at !== void 0 ? { at: replay.at } : {}
|
|
1542
|
+
}
|
|
1543
|
+
};
|
|
1544
|
+
}
|
|
1545
|
+
async function handleRequest(request, response, eventsFile, clients, suggestConfig) {
|
|
1546
|
+
try {
|
|
1547
|
+
const url = new URL(request.url ?? "/", "http://127.0.0.1");
|
|
1548
|
+
const replay = parseReplayQuery(url);
|
|
1549
|
+
if (request.method === "OPTIONS") {
|
|
1550
|
+
sendEmpty(response, 204);
|
|
1551
|
+
return;
|
|
1552
|
+
}
|
|
1553
|
+
if (request.method === "GET" && url.pathname === "/health") {
|
|
1554
|
+
sendJson(response, 200, { ok: true, data: { status: "ok", eventsFile } });
|
|
1555
|
+
return;
|
|
1556
|
+
}
|
|
1557
|
+
if (request.method === "GET" && url.pathname === "/events") {
|
|
1558
|
+
sendJson(response, 200, { ok: true, data: await loadTraceEvents(eventsFile) });
|
|
1559
|
+
return;
|
|
1560
|
+
}
|
|
1561
|
+
if (request.method === "GET" && url.pathname === "/graph") {
|
|
1562
|
+
sendJson(response, 200, { ok: true, data: (await buildTraceSnapshot(eventsFile, replay)).graph });
|
|
1563
|
+
return;
|
|
1564
|
+
}
|
|
1565
|
+
if (request.method === "GET" && url.pathname === "/snapshot") {
|
|
1566
|
+
sendJson(response, 200, { ok: true, data: await buildTraceSnapshot(eventsFile, replay) });
|
|
1567
|
+
return;
|
|
1568
|
+
}
|
|
1569
|
+
if (request.method === "GET" && url.pathname === "/efficiency") {
|
|
1570
|
+
sendJson(response, 200, { ok: true, data: (await buildTraceSnapshot(eventsFile, replay)).efficiency });
|
|
1571
|
+
return;
|
|
1572
|
+
}
|
|
1573
|
+
if (request.method === "GET" && url.pathname === "/audit") {
|
|
1574
|
+
sendJson(response, 200, { ok: true, data: (await buildTraceSnapshot(eventsFile, replay)).checks });
|
|
1575
|
+
return;
|
|
1576
|
+
}
|
|
1577
|
+
if (request.method === "GET" && url.pathname === "/handoff") {
|
|
1578
|
+
sendJson(response, 200, {
|
|
1579
|
+
ok: true,
|
|
1580
|
+
data: { markdown: (await buildTraceSnapshot(eventsFile, replay)).handoffMarkdown }
|
|
1581
|
+
});
|
|
1582
|
+
return;
|
|
1583
|
+
}
|
|
1584
|
+
if (url.pathname === "/suggest" && (request.method === "POST" || request.method === "GET")) {
|
|
1585
|
+
const body = request.method === "POST" ? await readJsonBody(request) : {};
|
|
1586
|
+
const report = body.report && Array.isArray(body.report.metrics) ? body.report : (await buildTraceSnapshot(eventsFile, replay)).efficiency;
|
|
1587
|
+
const result = await generateSuggestions(report, suggestConfig);
|
|
1588
|
+
sendJson(response, 200, { ok: true, data: result });
|
|
1589
|
+
return;
|
|
1590
|
+
}
|
|
1591
|
+
if (request.method === "POST" && url.pathname === "/events") {
|
|
1592
|
+
const body = await readJsonBody(request);
|
|
1593
|
+
const validation = validateTraceEvent(body);
|
|
1594
|
+
if (!validation.ok) {
|
|
1595
|
+
sendJson(response, 400, {
|
|
1596
|
+
ok: false,
|
|
1597
|
+
error: { message: "Invalid TraceEvent", details: validation.errors }
|
|
1598
|
+
});
|
|
1599
|
+
return;
|
|
1600
|
+
}
|
|
1601
|
+
await appendTraceEvent(eventsFile, body);
|
|
1602
|
+
void broadcastSnapshot(clients, eventsFile);
|
|
1603
|
+
sendJson(response, 202, { ok: true, data: { accepted: true, id: body.id } });
|
|
1604
|
+
return;
|
|
1605
|
+
}
|
|
1606
|
+
sendJson(response, 404, { ok: false, error: { message: "Not found" } });
|
|
1607
|
+
} catch (error) {
|
|
1608
|
+
sendJson(response, error instanceof BadRequestError ? 400 : 500, {
|
|
1609
|
+
ok: false,
|
|
1610
|
+
error: { message: error instanceof Error ? error.message : String(error) }
|
|
1611
|
+
});
|
|
1612
|
+
}
|
|
1613
|
+
}
|
|
1614
|
+
async function broadcastSnapshot(clients, eventsFile) {
|
|
1615
|
+
if (clients.size === 0) {
|
|
1616
|
+
return;
|
|
1617
|
+
}
|
|
1618
|
+
await Promise.allSettled([...clients].map((client) => sendSnapshot(client, eventsFile)));
|
|
1619
|
+
}
|
|
1620
|
+
async function sendSnapshot(client, eventsFile) {
|
|
1621
|
+
if (client.readyState !== WebSocket.OPEN) {
|
|
1622
|
+
return;
|
|
1623
|
+
}
|
|
1624
|
+
try {
|
|
1625
|
+
client.send(JSON.stringify({ type: "snapshot", data: await buildTraceSnapshot(eventsFile) }));
|
|
1626
|
+
} catch (error) {
|
|
1627
|
+
if (client.readyState === WebSocket.OPEN) {
|
|
1628
|
+
client.send(JSON.stringify({
|
|
1629
|
+
type: "error",
|
|
1630
|
+
error: { message: error instanceof Error ? error.message : String(error) }
|
|
1631
|
+
}));
|
|
1632
|
+
}
|
|
1633
|
+
}
|
|
1634
|
+
}
|
|
1635
|
+
async function readJsonBody(request) {
|
|
1636
|
+
const chunks = [];
|
|
1637
|
+
for await (const chunk of request) {
|
|
1638
|
+
chunks.push(Buffer.isBuffer(chunk) ? chunk : Buffer.from(chunk));
|
|
1639
|
+
}
|
|
1640
|
+
const raw = Buffer.concat(chunks).toString("utf8");
|
|
1641
|
+
return raw.length > 0 ? JSON.parse(raw) : {};
|
|
1642
|
+
}
|
|
1643
|
+
function sendJson(response, statusCode, payload) {
|
|
1644
|
+
response.writeHead(statusCode, {
|
|
1645
|
+
"access-control-allow-headers": "content-type",
|
|
1646
|
+
"access-control-allow-methods": "GET,POST,OPTIONS",
|
|
1647
|
+
"access-control-allow-origin": "*",
|
|
1648
|
+
"content-type": "application/json; charset=utf-8"
|
|
1649
|
+
});
|
|
1650
|
+
response.end(JSON.stringify(payload));
|
|
1651
|
+
}
|
|
1652
|
+
function sendEmpty(response, statusCode) {
|
|
1653
|
+
response.writeHead(statusCode, {
|
|
1654
|
+
"access-control-allow-headers": "content-type",
|
|
1655
|
+
"access-control-allow-methods": "GET,POST,OPTIONS",
|
|
1656
|
+
"access-control-allow-origin": "*"
|
|
1657
|
+
});
|
|
1658
|
+
response.end();
|
|
1659
|
+
}
|
|
1660
|
+
function parseReplayQuery(url) {
|
|
1661
|
+
const seq = url.searchParams.get("seq");
|
|
1662
|
+
if (seq !== null && seq !== "") {
|
|
1663
|
+
const parsed = Number(seq);
|
|
1664
|
+
if (!Number.isInteger(parsed) || parsed < 0) {
|
|
1665
|
+
throw new BadRequestError("seq must be a non-negative integer");
|
|
1666
|
+
}
|
|
1667
|
+
return { seq: parsed };
|
|
1668
|
+
}
|
|
1669
|
+
const at = url.searchParams.get("at");
|
|
1670
|
+
if (at !== null && at !== "") {
|
|
1671
|
+
if (Number.isNaN(Date.parse(at))) {
|
|
1672
|
+
throw new BadRequestError("at must be an ISO-compatible timestamp");
|
|
1673
|
+
}
|
|
1674
|
+
return { at };
|
|
1675
|
+
}
|
|
1676
|
+
return {};
|
|
1677
|
+
}
|
|
1678
|
+
var BadRequestError = class extends Error {
|
|
1679
|
+
};
|
|
1680
|
+
function isNodeError(error) {
|
|
1681
|
+
return error instanceof Error && "code" in error;
|
|
1682
|
+
}
|
|
1683
|
+
|
|
1684
|
+
// apps/daemon/dist/initOpenCode.js
|
|
1685
|
+
import { mkdir as mkdir2, readFile as readFile3, writeFile } from "node:fs/promises";
|
|
1686
|
+
import { join as join3 } from "node:path";
|
|
1687
|
+
var defaultAdapterPackage = "@agent-blackbox/opencode-adapter";
|
|
1688
|
+
var defaultDaemonUrl = "http://127.0.0.1:47831";
|
|
1689
|
+
async function initOpenCodeProject(options) {
|
|
1690
|
+
const adapterPackage = options.adapterPackage ?? defaultAdapterPackage;
|
|
1691
|
+
const adapterImport = inferAdapterImport(adapterPackage);
|
|
1692
|
+
const daemonUrl = options.daemonUrl ?? defaultDaemonUrl;
|
|
1693
|
+
const opencodeDir = join3(options.projectDir, ".opencode");
|
|
1694
|
+
const pluginsDir = join3(opencodeDir, "plugins");
|
|
1695
|
+
const pluginPath = join3(pluginsDir, "agent-blackbox.ts");
|
|
1696
|
+
const packageJsonPath = join3(opencodeDir, "package.json");
|
|
1697
|
+
await mkdir2(pluginsDir, { recursive: true });
|
|
1698
|
+
if (!options.force && await pathExists(pluginPath)) {
|
|
1699
|
+
throw new Error(`${pluginPath} already exists. Re-run with --force to overwrite it.`);
|
|
1700
|
+
}
|
|
1701
|
+
if (options.pluginBundlePath && await pathExists(options.pluginBundlePath)) {
|
|
1702
|
+
const bundle = await readFile3(options.pluginBundlePath, "utf8");
|
|
1703
|
+
const inlined = bundle.replaceAll("__ABB_DAEMON_URL__", daemonUrl);
|
|
1704
|
+
await writeFile(pluginPath, inlined, "utf8");
|
|
1705
|
+
return { pluginPath, packageJsonPath, adapterPackage, adapterImport };
|
|
1706
|
+
}
|
|
1707
|
+
await writeFile(pluginPath, renderOpenCodePlugin({ adapterImport, daemonUrl, optimize: options.optimize ?? false }), "utf8");
|
|
1708
|
+
await writePackageJson(packageJsonPath, adapterPackage, adapterImport);
|
|
1709
|
+
return {
|
|
1710
|
+
pluginPath,
|
|
1711
|
+
packageJsonPath,
|
|
1712
|
+
adapterPackage,
|
|
1713
|
+
adapterImport
|
|
1714
|
+
};
|
|
1715
|
+
}
|
|
1716
|
+
function renderOpenCodePlugin(options) {
|
|
1717
|
+
return `import { createOpenCodePlugin } from "${options.adapterImport}";
|
|
1718
|
+
|
|
1719
|
+
export const AgentBlackbox = createOpenCodePlugin({
|
|
1720
|
+
daemonUrl: process.env.AGENT_BLACKBOX_DAEMON_URL ?? "${options.daemonUrl}"${options.optimize ? ",\n optimize: true" : ""}
|
|
1721
|
+
});
|
|
1722
|
+
`;
|
|
1723
|
+
}
|
|
1724
|
+
async function writePackageJson(packageJsonPath, adapterPackage, adapterImport) {
|
|
1725
|
+
const existing = await readPackageJson(packageJsonPath);
|
|
1726
|
+
const dependencies = {
|
|
1727
|
+
...existing.dependencies ?? {},
|
|
1728
|
+
[adapterImport]: adapterPackage
|
|
1729
|
+
};
|
|
1730
|
+
await writeFile(packageJsonPath, `${JSON.stringify({ ...existing, dependencies }, null, 2)}
|
|
1731
|
+
`, "utf8");
|
|
1732
|
+
}
|
|
1733
|
+
function inferAdapterImport(adapterPackage) {
|
|
1734
|
+
if (adapterPackage.startsWith("file:") || adapterPackage.startsWith("/") || adapterPackage.startsWith("./") || adapterPackage.startsWith("../")) {
|
|
1735
|
+
return defaultAdapterPackage;
|
|
1736
|
+
}
|
|
1737
|
+
return adapterPackage;
|
|
1738
|
+
}
|
|
1739
|
+
async function readPackageJson(packageJsonPath) {
|
|
1740
|
+
try {
|
|
1741
|
+
return JSON.parse(await readFile3(packageJsonPath, "utf8"));
|
|
1742
|
+
} catch (error) {
|
|
1743
|
+
if (isNodeError2(error) && error.code === "ENOENT") {
|
|
1744
|
+
return {};
|
|
1745
|
+
}
|
|
1746
|
+
throw error;
|
|
1747
|
+
}
|
|
1748
|
+
}
|
|
1749
|
+
async function pathExists(path) {
|
|
1750
|
+
try {
|
|
1751
|
+
await readFile3(path, "utf8");
|
|
1752
|
+
return true;
|
|
1753
|
+
} catch (error) {
|
|
1754
|
+
if (isNodeError2(error) && error.code === "ENOENT") {
|
|
1755
|
+
return false;
|
|
1756
|
+
}
|
|
1757
|
+
throw error;
|
|
1758
|
+
}
|
|
1759
|
+
}
|
|
1760
|
+
function isNodeError2(error) {
|
|
1761
|
+
return error instanceof Error && "code" in error;
|
|
1762
|
+
}
|
|
1763
|
+
|
|
1764
|
+
// apps/daemon/dist/index.js
|
|
1765
|
+
var AGENT_BLACKBOX_DAEMON_VERSION = "0.1.0";
|
|
1766
|
+
function describeDaemon() {
|
|
1767
|
+
return "Agent-Blackbox daemon: local ingest, replay, and dashboard bridge.";
|
|
1768
|
+
}
|
|
1769
|
+
|
|
1770
|
+
// apps/daemon/dist/optimize.js
|
|
1771
|
+
import { mkdir as mkdir3, readFile as readFile4, rm, writeFile as writeFile2 } from "node:fs/promises";
|
|
1772
|
+
import { dirname as dirname2, join as join4 } from "node:path";
|
|
1773
|
+
var flaggedIds = (report) => report.metrics.filter((m) => m.status !== "good").map((m) => m.id);
|
|
1774
|
+
var joinIds = (ids) => ids.join(", ");
|
|
1775
|
+
var REVERT_MARGIN = 3;
|
|
1776
|
+
async function runOptimize(options) {
|
|
1777
|
+
const eventsFile = join4(options.projectDir, ".agent-blackbox", "events.ndjson");
|
|
1778
|
+
const agentsMdPath = join4(options.projectDir, "AGENTS.md");
|
|
1779
|
+
const statePath = join4(options.projectDir, ".agent-blackbox", "optimization.json");
|
|
1780
|
+
const events = await loadTraceEvents(eventsFile);
|
|
1781
|
+
const { runId, events: runEvents } = latestRun(events);
|
|
1782
|
+
const latestTs = runEvents.reduce((max, e) => e.ts > max ? e.ts : max, "");
|
|
1783
|
+
const report = runEvents.length > 0 ? computeEfficiencyReport(runEvents) : null;
|
|
1784
|
+
const score = report ? report.overallScore : null;
|
|
1785
|
+
if (options.mode === "revert") {
|
|
1786
|
+
return revert(agentsMdPath, statePath, score);
|
|
1787
|
+
}
|
|
1788
|
+
const block = report ? buildEfficiencyMemory(report, { verifiedCommands: verifiedCommands(runEvents) }) : null;
|
|
1789
|
+
if (options.mode === "preview") {
|
|
1790
|
+
return {
|
|
1791
|
+
mode: "preview",
|
|
1792
|
+
action: block ? "Preview only \u2014 re-run with --apply to write this to AGENTS.md." : "This run is clean \u2014 nothing worth pinning.",
|
|
1793
|
+
score,
|
|
1794
|
+
baselineScore: null,
|
|
1795
|
+
reclaimableTokens: report?.reclaimableTokens,
|
|
1796
|
+
block,
|
|
1797
|
+
agentsMdPath,
|
|
1798
|
+
changed: false
|
|
1799
|
+
};
|
|
1800
|
+
}
|
|
1801
|
+
if (options.mode === "apply") {
|
|
1802
|
+
if (!block || !report || score === null || runId === null) {
|
|
1803
|
+
return { mode: "apply", action: "This run is clean \u2014 nothing to apply.", score, baselineScore: null, block: null, agentsMdPath, changed: false };
|
|
1804
|
+
}
|
|
1805
|
+
const prior = await readMaybe(agentsMdPath);
|
|
1806
|
+
const next = upsertManagedBlock(prior ?? "", block);
|
|
1807
|
+
await writeFile2(agentsMdPath, next, "utf8");
|
|
1808
|
+
await writeState(statePath, {
|
|
1809
|
+
runId: runId ?? "",
|
|
1810
|
+
baselineScore: score,
|
|
1811
|
+
baselineLatestTs: latestTs,
|
|
1812
|
+
baselineFlagged: flaggedIds(report),
|
|
1813
|
+
fileExisted: prior !== null,
|
|
1814
|
+
appliedAt: (/* @__PURE__ */ new Date()).toISOString()
|
|
1815
|
+
});
|
|
1816
|
+
return {
|
|
1817
|
+
mode: "apply",
|
|
1818
|
+
action: `Wrote efficiency memory to AGENTS.md \u2014 targets ~${report.reclaimableTokens} reclaimable tokens on similar future runs (no re-run needed). Optional: re-run the same task + \`optimize --check\` to benchmark the gain.`,
|
|
1819
|
+
score,
|
|
1820
|
+
baselineScore: score,
|
|
1821
|
+
reclaimableTokens: report.reclaimableTokens,
|
|
1822
|
+
block,
|
|
1823
|
+
agentsMdPath,
|
|
1824
|
+
changed: prior !== next
|
|
1825
|
+
};
|
|
1826
|
+
}
|
|
1827
|
+
const state = await readState(statePath);
|
|
1828
|
+
if (!state) {
|
|
1829
|
+
return { mode: "check", action: "Nothing applied yet \u2014 run `optimize --apply` first.", score, baselineScore: null, block: null, agentsMdPath, changed: false };
|
|
1830
|
+
}
|
|
1831
|
+
if (runId === null) {
|
|
1832
|
+
return { mode: "check", action: "No runs recorded yet.", score, baselineScore: state.baselineScore, block: null, agentsMdPath, changed: false };
|
|
1833
|
+
}
|
|
1834
|
+
if (latestTs <= state.baselineLatestTs) {
|
|
1835
|
+
return {
|
|
1836
|
+
mode: "check",
|
|
1837
|
+
action: "No new run since apply. Run your agent with the memory in place, then re-check.",
|
|
1838
|
+
score,
|
|
1839
|
+
baselineScore: state.baselineScore,
|
|
1840
|
+
block: null,
|
|
1841
|
+
agentsMdPath,
|
|
1842
|
+
changed: false
|
|
1843
|
+
};
|
|
1844
|
+
}
|
|
1845
|
+
const delta = (score ?? 0) - state.baselineScore;
|
|
1846
|
+
const nowFlagged = report ? flaggedIds(report) : [];
|
|
1847
|
+
const baseFlagged = state.baselineFlagged ?? [];
|
|
1848
|
+
const cleared = baseFlagged.filter((id) => !nowFlagged.includes(id));
|
|
1849
|
+
const appeared = nowFlagged.filter((id) => !baseFlagged.includes(id));
|
|
1850
|
+
const metricDiff = [cleared.length ? `cleared ${joinIds(cleared)}` : "", appeared.length ? `new ${joinIds(appeared)}` : ""].filter(Boolean).join("; ");
|
|
1851
|
+
const diffSuffix = metricDiff ? ` [${metricDiff}]` : "";
|
|
1852
|
+
if (delta < -REVERT_MARGIN) {
|
|
1853
|
+
const changed = await restore(agentsMdPath, state.fileExisted);
|
|
1854
|
+
await rm(statePath, { force: true });
|
|
1855
|
+
return {
|
|
1856
|
+
mode: "check",
|
|
1857
|
+
action: `Score dropped ${state.baselineScore} \u2192 ${score ?? "?"} (\u0394${delta})${diffSuffix} on the new run \u2014 rolled the memory back.`,
|
|
1858
|
+
score,
|
|
1859
|
+
baselineScore: state.baselineScore,
|
|
1860
|
+
block: null,
|
|
1861
|
+
agentsMdPath,
|
|
1862
|
+
changed
|
|
1863
|
+
};
|
|
1864
|
+
}
|
|
1865
|
+
return {
|
|
1866
|
+
mode: "check",
|
|
1867
|
+
action: `Score ${state.baselineScore} \u2192 ${score ?? "?"} (\u0394${delta >= 0 ? "+" : ""}${delta})${diffSuffix} \u2014 kept the memory.`,
|
|
1868
|
+
score,
|
|
1869
|
+
baselineScore: state.baselineScore,
|
|
1870
|
+
block: null,
|
|
1871
|
+
agentsMdPath,
|
|
1872
|
+
changed: false
|
|
1873
|
+
};
|
|
1874
|
+
}
|
|
1875
|
+
async function revert(agentsMdPath, statePath, score) {
|
|
1876
|
+
const state = await readState(statePath);
|
|
1877
|
+
const changed = await restore(agentsMdPath, state ? state.fileExisted : true);
|
|
1878
|
+
if (state)
|
|
1879
|
+
await rm(statePath, { force: true });
|
|
1880
|
+
return {
|
|
1881
|
+
mode: "revert",
|
|
1882
|
+
action: changed ? "Removed the managed efficiency block from AGENTS.md." : "Nothing to revert.",
|
|
1883
|
+
score,
|
|
1884
|
+
baselineScore: state ? state.baselineScore : null,
|
|
1885
|
+
block: null,
|
|
1886
|
+
agentsMdPath,
|
|
1887
|
+
changed
|
|
1888
|
+
};
|
|
1889
|
+
}
|
|
1890
|
+
async function restore(agentsMdPath, fileExisted) {
|
|
1891
|
+
const current = await readMaybe(agentsMdPath);
|
|
1892
|
+
if (current === null)
|
|
1893
|
+
return false;
|
|
1894
|
+
const next = removeManagedBlock(current);
|
|
1895
|
+
if (next === current)
|
|
1896
|
+
return false;
|
|
1897
|
+
if (next.trim() === "" && !fileExisted) {
|
|
1898
|
+
await rm(agentsMdPath, { force: true });
|
|
1899
|
+
return true;
|
|
1900
|
+
}
|
|
1901
|
+
await writeFile2(agentsMdPath, next, "utf8");
|
|
1902
|
+
return true;
|
|
1903
|
+
}
|
|
1904
|
+
function latestRun(events) {
|
|
1905
|
+
let latest;
|
|
1906
|
+
for (const e of events)
|
|
1907
|
+
if (!latest || e.ts > latest.ts)
|
|
1908
|
+
latest = e;
|
|
1909
|
+
if (!latest)
|
|
1910
|
+
return { runId: null, events: [] };
|
|
1911
|
+
const runId = latest.runId;
|
|
1912
|
+
return { runId, events: events.filter((e) => e.runId === runId) };
|
|
1913
|
+
}
|
|
1914
|
+
var NAV_VERBS = /* @__PURE__ */ new Set([
|
|
1915
|
+
"ls",
|
|
1916
|
+
"pwd",
|
|
1917
|
+
"cat",
|
|
1918
|
+
"find",
|
|
1919
|
+
"grep",
|
|
1920
|
+
"rg",
|
|
1921
|
+
"fd",
|
|
1922
|
+
"head",
|
|
1923
|
+
"tail",
|
|
1924
|
+
"echo",
|
|
1925
|
+
"which",
|
|
1926
|
+
"env",
|
|
1927
|
+
"cd",
|
|
1928
|
+
"tree",
|
|
1929
|
+
"stat",
|
|
1930
|
+
"wc",
|
|
1931
|
+
"sort",
|
|
1932
|
+
"uniq",
|
|
1933
|
+
"clear",
|
|
1934
|
+
"sleep",
|
|
1935
|
+
"true",
|
|
1936
|
+
"false"
|
|
1937
|
+
]);
|
|
1938
|
+
function verifiedCommands(events) {
|
|
1939
|
+
const out = [];
|
|
1940
|
+
const seen = /* @__PURE__ */ new Set();
|
|
1941
|
+
for (const e of events) {
|
|
1942
|
+
if (e.kind !== "bash")
|
|
1943
|
+
continue;
|
|
1944
|
+
const payload = e.payload;
|
|
1945
|
+
if (!payload || payload.exitCode !== 0)
|
|
1946
|
+
continue;
|
|
1947
|
+
const command = typeof payload.command === "string" ? payload.command.trim() : "";
|
|
1948
|
+
if (!command || seen.has(command))
|
|
1949
|
+
continue;
|
|
1950
|
+
const verb = command.split(/\s+/)[0] ?? "";
|
|
1951
|
+
if (NAV_VERBS.has(verb))
|
|
1952
|
+
continue;
|
|
1953
|
+
seen.add(command);
|
|
1954
|
+
out.push(command);
|
|
1955
|
+
}
|
|
1956
|
+
return out;
|
|
1957
|
+
}
|
|
1958
|
+
async function readMaybe(path) {
|
|
1959
|
+
try {
|
|
1960
|
+
return await readFile4(path, "utf8");
|
|
1961
|
+
} catch (error) {
|
|
1962
|
+
if (error.code === "ENOENT")
|
|
1963
|
+
return null;
|
|
1964
|
+
throw error;
|
|
1965
|
+
}
|
|
1966
|
+
}
|
|
1967
|
+
async function readState(path) {
|
|
1968
|
+
const raw = await readMaybe(path);
|
|
1969
|
+
if (!raw)
|
|
1970
|
+
return null;
|
|
1971
|
+
try {
|
|
1972
|
+
return JSON.parse(raw);
|
|
1973
|
+
} catch {
|
|
1974
|
+
return null;
|
|
1975
|
+
}
|
|
1976
|
+
}
|
|
1977
|
+
async function writeState(path, state) {
|
|
1978
|
+
await mkdir3(dirname2(path), { recursive: true });
|
|
1979
|
+
await writeFile2(path, `${JSON.stringify(state, null, 2)}
|
|
1980
|
+
`, "utf8");
|
|
1981
|
+
}
|
|
1982
|
+
|
|
1983
|
+
// apps/daemon/dist/cli.js
|
|
1984
|
+
var args = process.argv.slice(2);
|
|
1985
|
+
var cliDir = dirname3(fileURLToPath(import.meta.url));
|
|
1986
|
+
var repoRoot = resolve(cliDir, "../../..");
|
|
1987
|
+
var firstExisting = (paths) => paths.find((p) => existsSync(p));
|
|
1988
|
+
var dashboardDistDir = firstExisting([resolve(cliDir, "dashboard"), resolve(repoRoot, "apps/dashboard/dist")]) ?? resolve(repoRoot, "apps/dashboard/dist");
|
|
1989
|
+
var pluginBundlePath = firstExisting([resolve(cliDir, "agent-blackbox.plugin.mjs")]);
|
|
1990
|
+
void main(args);
|
|
1991
|
+
async function main(argv) {
|
|
1992
|
+
if (argv.includes("--version") || argv.includes("-v")) {
|
|
1993
|
+
console.log(AGENT_BLACKBOX_DAEMON_VERSION);
|
|
1994
|
+
return;
|
|
1995
|
+
}
|
|
1996
|
+
const command = argv[0] ?? "help";
|
|
1997
|
+
if (command === "daemon") {
|
|
1998
|
+
const projectDir = readFlag(argv, "--project") ?? process.cwd();
|
|
1999
|
+
const port = Number(readFlag(argv, "--port") ?? "47831");
|
|
2000
|
+
const daemon = await startTraceDaemon({ projectDir, port });
|
|
2001
|
+
console.log(`Agent-Blackbox daemon listening on http://127.0.0.1:${daemon.port}`);
|
|
2002
|
+
console.log(`Trace file: ${daemon.eventsFile}`);
|
|
2003
|
+
return;
|
|
2004
|
+
}
|
|
2005
|
+
if (command === "up") {
|
|
2006
|
+
const projectDir = resolve(readFlag(argv, "--project") ?? process.cwd());
|
|
2007
|
+
const port = Number(readFlag(argv, "--port") ?? "47831");
|
|
2008
|
+
const uiPort = Number(readFlag(argv, "--ui-port") ?? "5173");
|
|
2009
|
+
const daemonUrl = `http://127.0.0.1:${port}`;
|
|
2010
|
+
const adapterPackage = readFlag(argv, "--adapter-package") ?? `file:${resolve(repoRoot, "packages/opencode-adapter")}`;
|
|
2011
|
+
const suggest = readSuggestConfig(argv);
|
|
2012
|
+
try {
|
|
2013
|
+
const result = await initOpenCodeProject({
|
|
2014
|
+
projectDir,
|
|
2015
|
+
daemonUrl,
|
|
2016
|
+
adapterPackage,
|
|
2017
|
+
force: false,
|
|
2018
|
+
optimize: argv.includes("--optimize"),
|
|
2019
|
+
...pluginBundlePath ? { pluginBundlePath } : {}
|
|
2020
|
+
});
|
|
2021
|
+
console.log(`\u2713 OpenCode recorder plugin installed: ${result.pluginPath}`);
|
|
2022
|
+
} catch (error) {
|
|
2023
|
+
if (error instanceof Error && error.message.includes("already exists")) {
|
|
2024
|
+
console.log("\u2713 OpenCode recorder plugin already present");
|
|
2025
|
+
} else {
|
|
2026
|
+
throw error;
|
|
2027
|
+
}
|
|
2028
|
+
}
|
|
2029
|
+
const daemon = await startTraceDaemon({ projectDir, port, suggest });
|
|
2030
|
+
const ui = await startDashboardServer({ distDir: dashboardDistDir, port: uiPort, daemonUrl });
|
|
2031
|
+
const dashboardUrl = `http://127.0.0.1:${ui.port}`;
|
|
2032
|
+
console.log("");
|
|
2033
|
+
console.log(`\u2713 Agent-Blackbox is up for ${projectDir}`);
|
|
2034
|
+
console.log(` Dashboard: ${dashboardUrl}`);
|
|
2035
|
+
console.log(` Daemon API: ${daemonUrl} (trace: ${daemon.eventsFile})`);
|
|
2036
|
+
console.log(` Suggestions: ${suggest.mode}${suggest.model ? ` (${suggest.model})` : ""}`);
|
|
2037
|
+
console.log("");
|
|
2038
|
+
if (!argv.includes("--no-open"))
|
|
2039
|
+
openInBrowser(dashboardUrl);
|
|
2040
|
+
console.log("Now run your agent in that project, e.g.:");
|
|
2041
|
+
console.log(` AGENT_BLACKBOX_DAEMON_URL=${daemonUrl} opencode run --dir ${projectDir} "Read the code, run tests, summarize."`);
|
|
2042
|
+
console.log("");
|
|
2043
|
+
console.log("Press Ctrl+C to stop.");
|
|
2044
|
+
return;
|
|
2045
|
+
}
|
|
2046
|
+
if (command === "replay") {
|
|
2047
|
+
const eventsFile = argv[1];
|
|
2048
|
+
if (!eventsFile) {
|
|
2049
|
+
throw new Error("Usage: agent-blackbox replay <events.ndjson>");
|
|
2050
|
+
}
|
|
2051
|
+
console.log(JSON.stringify(await buildReplaySummary(eventsFile), null, 2));
|
|
2052
|
+
return;
|
|
2053
|
+
}
|
|
2054
|
+
if (command === "handoff") {
|
|
2055
|
+
const eventsFile = argv[1];
|
|
2056
|
+
if (!eventsFile) {
|
|
2057
|
+
throw new Error("Usage: agent-blackbox handoff <events.ndjson>");
|
|
2058
|
+
}
|
|
2059
|
+
const events = await loadTraceEvents(eventsFile);
|
|
2060
|
+
console.log(generateHandoffMarkdown(materializeWorkflowGraph(events), evaluatePromiseChecks(events)));
|
|
2061
|
+
return;
|
|
2062
|
+
}
|
|
2063
|
+
if (command === "init-opencode") {
|
|
2064
|
+
const projectDir = readFlag(argv, "--project") ?? process.cwd();
|
|
2065
|
+
const daemonUrl = readFlag(argv, "--daemon-url");
|
|
2066
|
+
const adapterPackage = readFlag(argv, "--adapter-package");
|
|
2067
|
+
const result = await initOpenCodeProject({
|
|
2068
|
+
projectDir,
|
|
2069
|
+
...daemonUrl ? { daemonUrl } : {},
|
|
2070
|
+
...adapterPackage ? { adapterPackage } : {},
|
|
2071
|
+
force: argv.includes("--force"),
|
|
2072
|
+
optimize: argv.includes("--optimize"),
|
|
2073
|
+
...pluginBundlePath ? { pluginBundlePath } : {}
|
|
2074
|
+
});
|
|
2075
|
+
console.log(`OpenCode plugin written: ${result.pluginPath}`);
|
|
2076
|
+
console.log(`OpenCode package config written: ${result.packageJsonPath}`);
|
|
2077
|
+
return;
|
|
2078
|
+
}
|
|
2079
|
+
if (command === "optimize") {
|
|
2080
|
+
const projectDir = resolve(readFlag(argv, "--project") ?? process.cwd());
|
|
2081
|
+
const mode = argv.includes("--apply") ? "apply" : argv.includes("--check") ? "check" : argv.includes("--revert") ? "revert" : "preview";
|
|
2082
|
+
const result = await runOptimize({ projectDir, mode });
|
|
2083
|
+
console.log(`Agent-Blackbox optimize (${result.mode}) \u2014 ${result.agentsMdPath}`);
|
|
2084
|
+
if (result.score !== null)
|
|
2085
|
+
console.log(` Latest run score: ${result.score}${result.baselineScore !== null ? ` (baseline ${result.baselineScore})` : ""}`);
|
|
2086
|
+
if (result.reclaimableTokens && result.reclaimableTokens > 0)
|
|
2087
|
+
console.log(` Reclaimable waste this run: ~${result.reclaimableTokens} tokens`);
|
|
2088
|
+
console.log(` ${result.action}`);
|
|
2089
|
+
if (result.block) {
|
|
2090
|
+
console.log("");
|
|
2091
|
+
console.log(result.block);
|
|
2092
|
+
}
|
|
2093
|
+
if (mode === "check")
|
|
2094
|
+
console.log("\nNote: scores compare different runs, so this is a heuristic \u2014 auto-revert only fires on a clear drop.");
|
|
2095
|
+
return;
|
|
2096
|
+
}
|
|
2097
|
+
printHelp();
|
|
2098
|
+
}
|
|
2099
|
+
function printHelp() {
|
|
2100
|
+
console.log(describeDaemon());
|
|
2101
|
+
console.log("");
|
|
2102
|
+
console.log("Usage:");
|
|
2103
|
+
console.log(" agent-blackbox up [--project <dir>] [--port <port>] [--ui-port <port>] # plugin + daemon + dashboard, one command");
|
|
2104
|
+
console.log(" [--suggest auto|free|off|ollama|opencode|openai-compat] [--suggest-model <id>] [--suggest-base-url <url>] [--optimize] [--no-open]");
|
|
2105
|
+
console.log(" agent-blackbox daemon [--project <dir>] [--port <port>]");
|
|
2106
|
+
console.log(" agent-blackbox init-opencode [--project <dir>] [--daemon-url <url>] [--adapter-package <specifier>] [--force] [--optimize]");
|
|
2107
|
+
console.log(" agent-blackbox optimize [--project <dir>] [--apply | --check | --revert] # write/measure/rollback AGENTS.md efficiency memory");
|
|
2108
|
+
console.log(" agent-blackbox handoff <events.ndjson>");
|
|
2109
|
+
console.log(" agent-blackbox replay <events.ndjson>");
|
|
2110
|
+
console.log(" agent-blackbox --version");
|
|
2111
|
+
}
|
|
2112
|
+
function openInBrowser(url) {
|
|
2113
|
+
const platform = process.platform;
|
|
2114
|
+
const command = platform === "darwin" ? "open" : platform === "win32" ? "cmd" : "xdg-open";
|
|
2115
|
+
const args2 = platform === "win32" ? ["/c", "start", "", url] : [url];
|
|
2116
|
+
try {
|
|
2117
|
+
spawn2(command, args2, { stdio: "ignore", detached: true }).unref();
|
|
2118
|
+
} catch {
|
|
2119
|
+
}
|
|
2120
|
+
}
|
|
2121
|
+
function readFlag(argv, flag) {
|
|
2122
|
+
const index = argv.indexOf(flag);
|
|
2123
|
+
if (index < 0) {
|
|
2124
|
+
return void 0;
|
|
2125
|
+
}
|
|
2126
|
+
return argv[index + 1];
|
|
2127
|
+
}
|
|
2128
|
+
function readSuggestConfig(argv) {
|
|
2129
|
+
const modes = ["auto", "off", "free", "ollama", "opencode", "openai-compat"];
|
|
2130
|
+
const raw = readFlag(argv, "--suggest") ?? process.env.AGENT_BLACKBOX_SUGGEST ?? "auto";
|
|
2131
|
+
const mode = modes.includes(raw) ? raw : "auto";
|
|
2132
|
+
const model = readFlag(argv, "--suggest-model") ?? process.env.AGENT_BLACKBOX_SUGGEST_MODEL;
|
|
2133
|
+
const baseUrl = readFlag(argv, "--suggest-base-url") ?? process.env.AGENT_BLACKBOX_SUGGEST_BASE_URL;
|
|
2134
|
+
return { mode, ...model ? { model } : {}, ...baseUrl ? { baseUrl } : {} };
|
|
2135
|
+
}
|