safeloop 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +451 -0
- package/dist/index.d.ts +240 -0
- package/dist/index.js +834 -0
- package/package.json +46 -0
package/dist/index.js
ADDED
|
@@ -0,0 +1,834 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.BREAKER_PRESETS = exports.DEFAULTS = void 0;
|
|
4
|
+
exports.createCodingAgentBreaker = createCodingAgentBreaker;
|
|
5
|
+
exports.createPolicyGate = createPolicyGate;
|
|
6
|
+
exports.createAgentRunLedger = createAgentRunLedger;
|
|
7
|
+
exports.toMarkdownReport = toMarkdownReport;
|
|
8
|
+
exports.createBreaker = createBreaker;
|
|
9
|
+
exports.DEFAULTS = {
|
|
10
|
+
maxRetries: 3,
|
|
11
|
+
maxRepeatedErrors: 2,
|
|
12
|
+
tokenBudget: { perStep: Infinity, perTask: Infinity },
|
|
13
|
+
scopeFreeze: true,
|
|
14
|
+
};
|
|
15
|
+
exports.BREAKER_PRESETS = {
|
|
16
|
+
conservativeCodingAgent: {
|
|
17
|
+
maxRetries: 1,
|
|
18
|
+
maxRepeatedErrors: 1,
|
|
19
|
+
tokenBudget: { perStep: 4000, perTask: 12000 },
|
|
20
|
+
scopeFreeze: true,
|
|
21
|
+
},
|
|
22
|
+
standardCodingAgent: {
|
|
23
|
+
maxRetries: 2,
|
|
24
|
+
maxRepeatedErrors: 2,
|
|
25
|
+
tokenBudget: { perStep: 8000, perTask: 30000 },
|
|
26
|
+
scopeFreeze: true,
|
|
27
|
+
},
|
|
28
|
+
exploratoryResearchAgent: {
|
|
29
|
+
maxRetries: 3,
|
|
30
|
+
maxRepeatedErrors: 2,
|
|
31
|
+
tokenBudget: { perStep: 12000, perTask: 60000 },
|
|
32
|
+
scopeFreeze: false,
|
|
33
|
+
},
|
|
34
|
+
};
|
|
35
|
+
function mergeBreakerConfig(base, override) {
|
|
36
|
+
return {
|
|
37
|
+
...base,
|
|
38
|
+
...override,
|
|
39
|
+
tokenBudget: {
|
|
40
|
+
...base.tokenBudget,
|
|
41
|
+
...override?.tokenBudget,
|
|
42
|
+
},
|
|
43
|
+
};
|
|
44
|
+
}
|
|
45
|
+
function createCodingAgentBreaker(config) {
|
|
46
|
+
return createBreaker(mergeBreakerConfig(exports.BREAKER_PRESETS.standardCodingAgent, config));
|
|
47
|
+
}
|
|
48
|
+
function cloneAgentRunLedgerMetadata(metadata) {
|
|
49
|
+
return {
|
|
50
|
+
...metadata,
|
|
51
|
+
allowedFiles: metadata.allowedFiles ? [...metadata.allowedFiles] : [],
|
|
52
|
+
};
|
|
53
|
+
}
|
|
54
|
+
function normalizePath(value) {
|
|
55
|
+
let normalized = value.split('\\').join('/').trim();
|
|
56
|
+
while (normalized.includes('//')) {
|
|
57
|
+
normalized = normalized.replace('//', '/');
|
|
58
|
+
}
|
|
59
|
+
return normalized;
|
|
60
|
+
}
|
|
61
|
+
function normalizeCommand(value) {
|
|
62
|
+
return value.replace(/\s+/g, ' ').trim().toLowerCase();
|
|
63
|
+
}
|
|
64
|
+
function riskRank(risk) {
|
|
65
|
+
switch (risk) {
|
|
66
|
+
case 'low':
|
|
67
|
+
return 1;
|
|
68
|
+
case 'medium':
|
|
69
|
+
return 2;
|
|
70
|
+
case 'high':
|
|
71
|
+
return 3;
|
|
72
|
+
}
|
|
73
|
+
}
|
|
74
|
+
function matchesAllowedFilePattern(pattern, file) {
|
|
75
|
+
const normalizedPattern = normalizePath(pattern);
|
|
76
|
+
const normalizedFile = normalizePath(file);
|
|
77
|
+
if (normalizedPattern.endsWith('/**')) {
|
|
78
|
+
const base = normalizedPattern.slice(0, -3);
|
|
79
|
+
return (normalizedFile === base ||
|
|
80
|
+
normalizedFile.startsWith(`${base}/`));
|
|
81
|
+
}
|
|
82
|
+
if (normalizedPattern.endsWith('/*')) {
|
|
83
|
+
const base = normalizedPattern.slice(0, -2);
|
|
84
|
+
if (!normalizedFile.startsWith(`${base}/`))
|
|
85
|
+
return false;
|
|
86
|
+
const rest = normalizedFile.slice(base.length + 1);
|
|
87
|
+
return !rest.includes('/');
|
|
88
|
+
}
|
|
89
|
+
return normalizedPattern === normalizedFile;
|
|
90
|
+
}
|
|
91
|
+
function matchesAnyAllowedFile(allowedFiles, requestedFile) {
|
|
92
|
+
return allowedFiles.some((pattern) => matchesAllowedFilePattern(pattern, requestedFile));
|
|
93
|
+
}
|
|
94
|
+
function commandMatchesBlockedPattern(blockedPattern, command) {
|
|
95
|
+
return normalizeCommand(command).includes(normalizeCommand(blockedPattern));
|
|
96
|
+
}
|
|
97
|
+
function commandIsAllowed(allowedCommands, command) {
|
|
98
|
+
return allowedCommands.some((allowedCommand) => normalizeCommand(allowedCommand) === normalizeCommand(command));
|
|
99
|
+
}
|
|
100
|
+
function createDecisionMessage(allowed, requiresApproval, reasons, violations) {
|
|
101
|
+
if (allowed) {
|
|
102
|
+
return 'Policy Gate approved this run.';
|
|
103
|
+
}
|
|
104
|
+
if (requiresApproval) {
|
|
105
|
+
return reasons.length > 0
|
|
106
|
+
? `Policy Gate requires human approval before execution: ${reasons.join('; ')}.`
|
|
107
|
+
: 'Policy Gate requires human approval before execution.';
|
|
108
|
+
}
|
|
109
|
+
const parts = ['Policy Gate blocked this run.'];
|
|
110
|
+
if (violations.length > 0) {
|
|
111
|
+
parts.push(`Violations: ${violations.join('; ')}.`);
|
|
112
|
+
}
|
|
113
|
+
if (reasons.length > 0) {
|
|
114
|
+
parts.push(`Reasons: ${reasons.join('; ')}.`);
|
|
115
|
+
}
|
|
116
|
+
return parts.join(' ');
|
|
117
|
+
}
|
|
118
|
+
function createPolicyGate(policy) {
|
|
119
|
+
const oversightMode = policy.oversightMode;
|
|
120
|
+
const maxRisk = policy.maxRisk ?? 'high';
|
|
121
|
+
const allowedFiles = policy.allowedFiles?.map(normalizePath) ?? [];
|
|
122
|
+
const allowedCommands = policy.allowedCommands?.map(normalizeCommand) ?? [];
|
|
123
|
+
const blockedCommands = policy.blockedCommands?.map(normalizeCommand) ?? [];
|
|
124
|
+
const requireApprovalFor = policy.requireApprovalFor?.map((value) => value.toLowerCase().trim()) ?? [];
|
|
125
|
+
return {
|
|
126
|
+
evaluate(request) {
|
|
127
|
+
const risk = request.risk ?? 'low';
|
|
128
|
+
const requestedFiles = request.requestedFiles ?? [];
|
|
129
|
+
const requestedCommands = request.requestedCommands ?? [];
|
|
130
|
+
const reasons = [];
|
|
131
|
+
const violations = [];
|
|
132
|
+
const approvalReasons = [];
|
|
133
|
+
const riskTooHigh = riskRank(risk) > riskRank(maxRisk);
|
|
134
|
+
if (riskTooHigh) {
|
|
135
|
+
violations.push(`risk ${risk} exceeds maxRisk ${maxRisk}`);
|
|
136
|
+
reasons.push(`request risk ${risk} exceeds policy maxRisk ${maxRisk}`);
|
|
137
|
+
}
|
|
138
|
+
for (const file of requestedFiles) {
|
|
139
|
+
if (allowedFiles.length > 0 && !matchesAnyAllowedFile(allowedFiles, file)) {
|
|
140
|
+
violations.push(`file not allowed: ${normalizePath(file)}`);
|
|
141
|
+
}
|
|
142
|
+
}
|
|
143
|
+
for (const command of requestedCommands) {
|
|
144
|
+
if (blockedCommands.some((blockedCommand) => commandMatchesBlockedPattern(blockedCommand, command))) {
|
|
145
|
+
violations.push(`blocked command: ${command.trim()}`);
|
|
146
|
+
}
|
|
147
|
+
if (allowedCommands.length > 0 &&
|
|
148
|
+
!commandIsAllowed(allowedCommands, command)) {
|
|
149
|
+
violations.push(`command not allowed: ${command.trim()}`);
|
|
150
|
+
}
|
|
151
|
+
}
|
|
152
|
+
const requestText = [request.task, ...requestedCommands].join(' ').toLowerCase();
|
|
153
|
+
const matchedApprovalRules = requireApprovalFor.filter((trigger) => trigger && requestText.includes(trigger));
|
|
154
|
+
if (matchedApprovalRules.length > 0 && !request.hasHumanApproval) {
|
|
155
|
+
approvalReasons.push(`requires approval for: ${matchedApprovalRules.join(', ')}`);
|
|
156
|
+
}
|
|
157
|
+
if (oversightMode === 'HITL' && risk === 'high' && !request.hasHumanApproval) {
|
|
158
|
+
approvalReasons.push('high-risk run requires human approval in HITL mode');
|
|
159
|
+
}
|
|
160
|
+
if (approvalReasons.length > 0) {
|
|
161
|
+
reasons.push(...approvalReasons);
|
|
162
|
+
}
|
|
163
|
+
const blocked = violations.length > 0;
|
|
164
|
+
const requiresApproval = !blocked && approvalReasons.length > 0;
|
|
165
|
+
const allowed = !blocked && !requiresApproval;
|
|
166
|
+
const message = createDecisionMessage(allowed, requiresApproval, reasons, violations);
|
|
167
|
+
return {
|
|
168
|
+
allowed,
|
|
169
|
+
requiresApproval,
|
|
170
|
+
reasons,
|
|
171
|
+
violations,
|
|
172
|
+
oversightMode,
|
|
173
|
+
risk,
|
|
174
|
+
message,
|
|
175
|
+
};
|
|
176
|
+
},
|
|
177
|
+
};
|
|
178
|
+
}
|
|
179
|
+
function formatMaybeList(values) {
|
|
180
|
+
return values.length > 0 ? values.map((value) => `- ${value}`).join('\n') : 'None';
|
|
181
|
+
}
|
|
182
|
+
function formatMaybeText(value) {
|
|
183
|
+
return value && value.trim() ? value : 'None';
|
|
184
|
+
}
|
|
185
|
+
function cloneScopeCheckAllowed(allowed) {
|
|
186
|
+
if (Array.isArray(allowed)) {
|
|
187
|
+
return [...allowed];
|
|
188
|
+
}
|
|
189
|
+
if (typeof allowed === 'boolean') {
|
|
190
|
+
return allowed;
|
|
191
|
+
}
|
|
192
|
+
return undefined;
|
|
193
|
+
}
|
|
194
|
+
function cloneMaybeStringArray(values) {
|
|
195
|
+
return Array.isArray(values) ? [...values] : undefined;
|
|
196
|
+
}
|
|
197
|
+
function formatMaybeScopeAllowed(value) {
|
|
198
|
+
if (Array.isArray(value)) {
|
|
199
|
+
return formatMaybeList(value);
|
|
200
|
+
}
|
|
201
|
+
if (typeof value === 'boolean') {
|
|
202
|
+
return value ? 'Yes' : 'No';
|
|
203
|
+
}
|
|
204
|
+
return 'None';
|
|
205
|
+
}
|
|
206
|
+
function formatMaybeNumber(value) {
|
|
207
|
+
return typeof value === 'number' ? String(value) : 'n/a';
|
|
208
|
+
}
|
|
209
|
+
function createAgentRunLedger(initialMetadata) {
|
|
210
|
+
const metadata = cloneAgentRunLedgerMetadata({
|
|
211
|
+
runId: `run-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`,
|
|
212
|
+
task: 'untitled',
|
|
213
|
+
startedAt: new Date().toISOString(),
|
|
214
|
+
allowedFiles: [],
|
|
215
|
+
...initialMetadata,
|
|
216
|
+
});
|
|
217
|
+
const events = [];
|
|
218
|
+
const prompts = [];
|
|
219
|
+
const commands = [];
|
|
220
|
+
const changedFiles = [];
|
|
221
|
+
const validations = [];
|
|
222
|
+
const scopeChecks = [];
|
|
223
|
+
const approvals = [];
|
|
224
|
+
let status = 'open';
|
|
225
|
+
let closedAt = null;
|
|
226
|
+
function addEvent(event) {
|
|
227
|
+
events.push(event);
|
|
228
|
+
}
|
|
229
|
+
function now() {
|
|
230
|
+
return new Date().toISOString();
|
|
231
|
+
}
|
|
232
|
+
function cloneLedgerEvent(entry) {
|
|
233
|
+
switch (entry.type) {
|
|
234
|
+
case 'prompt':
|
|
235
|
+
return {
|
|
236
|
+
timestamp: entry.timestamp,
|
|
237
|
+
type: 'prompt',
|
|
238
|
+
data: { prompt: entry.data.prompt },
|
|
239
|
+
};
|
|
240
|
+
case 'command':
|
|
241
|
+
return {
|
|
242
|
+
timestamp: entry.timestamp,
|
|
243
|
+
type: 'command',
|
|
244
|
+
data: {
|
|
245
|
+
command: entry.data.command,
|
|
246
|
+
result: entry.data.result ? { ...entry.data.result } : undefined,
|
|
247
|
+
},
|
|
248
|
+
};
|
|
249
|
+
case 'changed_files':
|
|
250
|
+
return {
|
|
251
|
+
timestamp: entry.timestamp,
|
|
252
|
+
type: 'changed_files',
|
|
253
|
+
data: { files: [...entry.data.files] },
|
|
254
|
+
};
|
|
255
|
+
case 'validation':
|
|
256
|
+
return {
|
|
257
|
+
timestamp: entry.timestamp,
|
|
258
|
+
type: 'validation',
|
|
259
|
+
data: { ...entry.data },
|
|
260
|
+
};
|
|
261
|
+
case 'scope_check':
|
|
262
|
+
return {
|
|
263
|
+
timestamp: entry.timestamp,
|
|
264
|
+
type: 'scope_check',
|
|
265
|
+
data: {
|
|
266
|
+
ok: entry.data.ok,
|
|
267
|
+
allowed: cloneScopeCheckAllowed(entry.data.allowed),
|
|
268
|
+
requiresApproval: entry.data.requiresApproval,
|
|
269
|
+
reasons: cloneMaybeStringArray(entry.data.reasons),
|
|
270
|
+
violations: cloneMaybeStringArray(entry.data.violations),
|
|
271
|
+
message: entry.data.message,
|
|
272
|
+
oversightMode: entry.data.oversightMode,
|
|
273
|
+
risk: entry.data.risk,
|
|
274
|
+
},
|
|
275
|
+
};
|
|
276
|
+
case 'approval':
|
|
277
|
+
return {
|
|
278
|
+
timestamp: entry.timestamp,
|
|
279
|
+
type: 'approval',
|
|
280
|
+
data: { ...entry.data },
|
|
281
|
+
};
|
|
282
|
+
case 'close':
|
|
283
|
+
return {
|
|
284
|
+
timestamp: entry.timestamp,
|
|
285
|
+
type: 'close',
|
|
286
|
+
data: { status: entry.data.status },
|
|
287
|
+
};
|
|
288
|
+
}
|
|
289
|
+
}
|
|
290
|
+
function cloneJSON() {
|
|
291
|
+
return {
|
|
292
|
+
metadata: cloneAgentRunLedgerMetadata(metadata),
|
|
293
|
+
status,
|
|
294
|
+
closedAt,
|
|
295
|
+
prompts: [...prompts],
|
|
296
|
+
commands: commands.map((entry) => ({
|
|
297
|
+
command: entry.command,
|
|
298
|
+
result: entry.result ? { ...entry.result } : undefined,
|
|
299
|
+
})),
|
|
300
|
+
changedFiles: changedFiles.map((entry) => [...entry]),
|
|
301
|
+
validations: validations.map((entry) => ({ ...entry })),
|
|
302
|
+
scopeChecks: scopeChecks.map((entry) => ({
|
|
303
|
+
ok: entry.ok,
|
|
304
|
+
allowed: cloneScopeCheckAllowed(entry.allowed),
|
|
305
|
+
requiresApproval: entry.requiresApproval,
|
|
306
|
+
reasons: cloneMaybeStringArray(entry.reasons),
|
|
307
|
+
violations: cloneMaybeStringArray(entry.violations),
|
|
308
|
+
message: entry.message,
|
|
309
|
+
oversightMode: entry.oversightMode,
|
|
310
|
+
risk: entry.risk,
|
|
311
|
+
})),
|
|
312
|
+
approvals: approvals.map((entry) => ({ ...entry })),
|
|
313
|
+
events: events.map(cloneLedgerEvent),
|
|
314
|
+
};
|
|
315
|
+
}
|
|
316
|
+
function recordPrompt(prompt) {
|
|
317
|
+
prompts.push(prompt);
|
|
318
|
+
addEvent({ timestamp: Date.now(), type: 'prompt', data: { prompt } });
|
|
319
|
+
}
|
|
320
|
+
function recordCommand(command, result) {
|
|
321
|
+
const entry = { command, result: result ? { ...result } : undefined };
|
|
322
|
+
commands.push(entry);
|
|
323
|
+
addEvent({
|
|
324
|
+
timestamp: Date.now(),
|
|
325
|
+
type: 'command',
|
|
326
|
+
data: {
|
|
327
|
+
command,
|
|
328
|
+
result: entry.result ? { ...entry.result } : undefined,
|
|
329
|
+
},
|
|
330
|
+
});
|
|
331
|
+
}
|
|
332
|
+
function recordChangedFiles(files) {
|
|
333
|
+
const entry = [...files];
|
|
334
|
+
changedFiles.push(entry);
|
|
335
|
+
addEvent({
|
|
336
|
+
timestamp: Date.now(),
|
|
337
|
+
type: 'changed_files',
|
|
338
|
+
data: { files: [...entry] },
|
|
339
|
+
});
|
|
340
|
+
}
|
|
341
|
+
function recordValidation(name, validationStatus, details) {
|
|
342
|
+
const entry = {
|
|
343
|
+
name,
|
|
344
|
+
status: validationStatus,
|
|
345
|
+
details,
|
|
346
|
+
};
|
|
347
|
+
validations.push(entry);
|
|
348
|
+
addEvent({ timestamp: Date.now(), type: 'validation', data: { ...entry } });
|
|
349
|
+
}
|
|
350
|
+
function recordScopeCheck(result) {
|
|
351
|
+
const entry = {
|
|
352
|
+
ok: typeof result.ok === 'boolean'
|
|
353
|
+
? result.ok
|
|
354
|
+
: typeof result.allowed === 'boolean'
|
|
355
|
+
? result.allowed
|
|
356
|
+
: !(Array.isArray(result.violations) && result.violations.length > 0),
|
|
357
|
+
allowed: cloneScopeCheckAllowed(result.allowed),
|
|
358
|
+
requiresApproval: result.requiresApproval,
|
|
359
|
+
reasons: cloneMaybeStringArray(result.reasons),
|
|
360
|
+
violations: cloneMaybeStringArray(result.violations),
|
|
361
|
+
message: result.message,
|
|
362
|
+
oversightMode: result.oversightMode,
|
|
363
|
+
risk: result.risk,
|
|
364
|
+
};
|
|
365
|
+
scopeChecks.push(entry);
|
|
366
|
+
addEvent({ timestamp: Date.now(), type: 'scope_check', data: { ...entry } });
|
|
367
|
+
}
|
|
368
|
+
function recordApproval(approver, decision, note) {
|
|
369
|
+
const entry = { approver, decision, note };
|
|
370
|
+
approvals.push(entry);
|
|
371
|
+
addEvent({ timestamp: Date.now(), type: 'approval', data: { ...entry } });
|
|
372
|
+
}
|
|
373
|
+
function close(nextStatus) {
|
|
374
|
+
status = nextStatus;
|
|
375
|
+
closedAt = now();
|
|
376
|
+
addEvent({
|
|
377
|
+
timestamp: Date.now(),
|
|
378
|
+
type: 'close',
|
|
379
|
+
data: { status: nextStatus },
|
|
380
|
+
});
|
|
381
|
+
}
|
|
382
|
+
function toMarkdown() {
|
|
383
|
+
const json = cloneJSON();
|
|
384
|
+
const lines = ['# Agent Run Ledger', ''];
|
|
385
|
+
lines.push(`Run ID: ${json.metadata.runId}`);
|
|
386
|
+
lines.push(`Agent: ${json.metadata.agent}`);
|
|
387
|
+
lines.push(`Executor: ${json.metadata.executor}`);
|
|
388
|
+
lines.push(`Repo: ${json.metadata.repo}`);
|
|
389
|
+
lines.push(`Task: ${json.metadata.task}`);
|
|
390
|
+
lines.push(`Status: ${json.status}`);
|
|
391
|
+
lines.push(`Started at: ${json.metadata.startedAt}`);
|
|
392
|
+
lines.push(`Closed at: ${json.closedAt ?? 'open'}`);
|
|
393
|
+
lines.push('', '## Allowed Files', '', formatMaybeList(json.metadata.allowedFiles ?? []));
|
|
394
|
+
lines.push('', '## Prompt History', '');
|
|
395
|
+
lines.push(json.prompts.length > 0 ? json.prompts.map((prompt) => `- ${prompt}`).join('\n') : 'None');
|
|
396
|
+
lines.push('', '## Commands', '');
|
|
397
|
+
if (json.commands.length === 0) {
|
|
398
|
+
lines.push('None');
|
|
399
|
+
}
|
|
400
|
+
else {
|
|
401
|
+
json.commands.forEach((entry, index) => {
|
|
402
|
+
lines.push(`${index + 1}. ${entry.command}`);
|
|
403
|
+
lines.push(` - exit code: ${formatMaybeNumber(entry.result?.exitCode)}`);
|
|
404
|
+
lines.push(` - summary: ${formatMaybeText(entry.result?.summary)}`);
|
|
405
|
+
});
|
|
406
|
+
}
|
|
407
|
+
lines.push('', '## Changed Files', '');
|
|
408
|
+
if (json.changedFiles.length === 0) {
|
|
409
|
+
lines.push('None');
|
|
410
|
+
}
|
|
411
|
+
else {
|
|
412
|
+
json.changedFiles.forEach((group, index) => {
|
|
413
|
+
lines.push(`${index + 1}.`);
|
|
414
|
+
lines.push(formatMaybeList(group));
|
|
415
|
+
});
|
|
416
|
+
}
|
|
417
|
+
lines.push('', '## Validations', '');
|
|
418
|
+
lines.push(json.validations.length > 0
|
|
419
|
+
? json.validations
|
|
420
|
+
.map((entry) => `- ${entry.name}: ${entry.status}${entry.details ? ` — ${entry.details}` : ''}`)
|
|
421
|
+
.join('\n')
|
|
422
|
+
: 'None');
|
|
423
|
+
lines.push('', '## Scope Checks', '');
|
|
424
|
+
if (json.scopeChecks.length === 0) {
|
|
425
|
+
lines.push('None');
|
|
426
|
+
}
|
|
427
|
+
else {
|
|
428
|
+
json.scopeChecks.forEach((entry) => {
|
|
429
|
+
lines.push(`- ok: ${entry.ok ? 'yes' : 'no'}`);
|
|
430
|
+
lines.push(` - allowed: ${formatMaybeScopeAllowed(entry.allowed)}`);
|
|
431
|
+
lines.push(` - reasons: ${formatMaybeList(entry.reasons ?? [])}`);
|
|
432
|
+
lines.push(` - violations: ${formatMaybeList(entry.violations ?? [])}`);
|
|
433
|
+
lines.push(` - message: ${formatMaybeText(entry.message)}`);
|
|
434
|
+
if (typeof entry.requiresApproval === 'boolean') {
|
|
435
|
+
lines.push(` - requires approval: ${entry.requiresApproval ? 'yes' : 'no'}`);
|
|
436
|
+
}
|
|
437
|
+
if (entry.oversightMode) {
|
|
438
|
+
lines.push(` - oversight mode: ${entry.oversightMode}`);
|
|
439
|
+
}
|
|
440
|
+
if (entry.risk) {
|
|
441
|
+
lines.push(` - risk: ${entry.risk}`);
|
|
442
|
+
}
|
|
443
|
+
});
|
|
444
|
+
}
|
|
445
|
+
lines.push('', '## Human Approval', '');
|
|
446
|
+
lines.push(json.approvals.length > 0
|
|
447
|
+
? json.approvals
|
|
448
|
+
.map((entry) => `- ${entry.approver}: ${entry.decision}${entry.note ? ` — ${entry.note}` : ''}`)
|
|
449
|
+
.join('\n')
|
|
450
|
+
: 'None');
|
|
451
|
+
lines.push('', '## Events', '');
|
|
452
|
+
lines.push(json.events.length > 0
|
|
453
|
+
? json.events
|
|
454
|
+
.map((entry) => {
|
|
455
|
+
const when = new Date(entry.timestamp).toISOString();
|
|
456
|
+
switch (entry.type) {
|
|
457
|
+
case 'prompt':
|
|
458
|
+
return `- [${when}] prompt: ${entry.data.prompt}`;
|
|
459
|
+
case 'command':
|
|
460
|
+
return `- [${when}] command: ${entry.data.command}`;
|
|
461
|
+
case 'changed_files':
|
|
462
|
+
return `- [${when}] changed files: ${entry.data.files.join(', ')}`;
|
|
463
|
+
case 'validation':
|
|
464
|
+
return `- [${when}] validation: ${entry.data.name} (${entry.data.status})`;
|
|
465
|
+
case 'scope_check':
|
|
466
|
+
return `- [${when}] scope check: ${entry.data.ok ? 'ok' : 'violations'}`;
|
|
467
|
+
case 'approval':
|
|
468
|
+
return `- [${when}] approval: ${entry.data.approver} (${entry.data.decision})`;
|
|
469
|
+
case 'close':
|
|
470
|
+
return `- [${when}] close: ${entry.data.status}`;
|
|
471
|
+
}
|
|
472
|
+
})
|
|
473
|
+
.join('\n')
|
|
474
|
+
: 'None');
|
|
475
|
+
return lines.join('\n').trim();
|
|
476
|
+
}
|
|
477
|
+
return {
|
|
478
|
+
recordPrompt,
|
|
479
|
+
recordCommand,
|
|
480
|
+
recordChangedFiles,
|
|
481
|
+
recordValidation,
|
|
482
|
+
recordScopeCheck,
|
|
483
|
+
recordApproval,
|
|
484
|
+
close,
|
|
485
|
+
toJSON: cloneJSON,
|
|
486
|
+
toMarkdown,
|
|
487
|
+
};
|
|
488
|
+
}
|
|
489
|
+
function formatAuditSummary(auditEntries) {
|
|
490
|
+
if (auditEntries.length === 0)
|
|
491
|
+
return null;
|
|
492
|
+
const counts = new Map();
|
|
493
|
+
for (const entry of auditEntries) {
|
|
494
|
+
counts.set(entry.type, (counts.get(entry.type) ?? 0) + 1);
|
|
495
|
+
}
|
|
496
|
+
const lines = Array.from(counts.entries())
|
|
497
|
+
.sort(([a], [b]) => a.localeCompare(b))
|
|
498
|
+
.map(([type, count]) => `* ${type}${count > 1 ? ` x${count}` : ''}`);
|
|
499
|
+
return lines.join('\n');
|
|
500
|
+
}
|
|
501
|
+
function extractRecommendedAction(escalationMessage) {
|
|
502
|
+
const match = escalationMessage.match(/What a human should decide next:\s*([\s\S]*)$/);
|
|
503
|
+
if (!match)
|
|
504
|
+
return null;
|
|
505
|
+
return match[1].trim().replace(/\n+/g, ' ');
|
|
506
|
+
}
|
|
507
|
+
function toMarkdownReport(result) {
|
|
508
|
+
const status = result.success ? 'Succeeded' : 'Failed';
|
|
509
|
+
const lines = ['# Safeloop Report', ''];
|
|
510
|
+
lines.push(`Status: ${status}`);
|
|
511
|
+
if (!result.success && result.stoppedBy) {
|
|
512
|
+
lines.push(`Trip reason: ${result.stoppedBy}`);
|
|
513
|
+
}
|
|
514
|
+
lines.push(`Attempts: ${result.attempts}`);
|
|
515
|
+
if (typeof result.tokenEstimate === 'number') {
|
|
516
|
+
lines.push(`Token usage: ${result.tokenEstimate}`);
|
|
517
|
+
}
|
|
518
|
+
if (result.lastError) {
|
|
519
|
+
lines.push(`Last error: ${result.lastError}`);
|
|
520
|
+
}
|
|
521
|
+
if (result.escalationMessage) {
|
|
522
|
+
lines.push('', '## Escalation', '', result.escalationMessage);
|
|
523
|
+
const recommendedAction = extractRecommendedAction(result.escalationMessage);
|
|
524
|
+
if (recommendedAction) {
|
|
525
|
+
lines.push('', `Recommended human action: ${recommendedAction}`);
|
|
526
|
+
}
|
|
527
|
+
}
|
|
528
|
+
const auditSummary = formatAuditSummary(result.auditEntries ?? []);
|
|
529
|
+
if (auditSummary) {
|
|
530
|
+
lines.push('', '## Audit Summary', '', auditSummary);
|
|
531
|
+
}
|
|
532
|
+
return lines.join('\n').trim();
|
|
533
|
+
}
|
|
534
|
+
function extractTokenCost(value) {
|
|
535
|
+
if (value && typeof value === 'object') {
|
|
536
|
+
const obj = value;
|
|
537
|
+
const stepCost = typeof obj._stepTokenCost === 'number' ? obj._stepTokenCost : 0;
|
|
538
|
+
const totalEstimate = typeof obj._tokenEstimate === 'number' ? obj._tokenEstimate : 0;
|
|
539
|
+
const tokensUsed = typeof obj.tokensUsed === 'number' ? obj.tokensUsed : 0;
|
|
540
|
+
return stepCost || totalEstimate || tokensUsed;
|
|
541
|
+
}
|
|
542
|
+
return 0;
|
|
543
|
+
}
|
|
544
|
+
function normalizeError(err) {
|
|
545
|
+
if (err instanceof Error) {
|
|
546
|
+
return err.message.split('\n')[0].trim();
|
|
547
|
+
}
|
|
548
|
+
return String(err).split('\n')[0].trim();
|
|
549
|
+
}
|
|
550
|
+
function createBreaker(config) {
|
|
551
|
+
const maxRetries = config?.maxRetries ?? exports.DEFAULTS.maxRetries;
|
|
552
|
+
const maxRepeatedErrors = config?.maxRepeatedErrors ?? exports.DEFAULTS.maxRepeatedErrors;
|
|
553
|
+
const perStepBudget = config?.tokenBudget?.perStep ?? exports.DEFAULTS.tokenBudget.perStep;
|
|
554
|
+
const perTaskBudget = config?.tokenBudget?.perTask ?? exports.DEFAULTS.tokenBudget.perTask;
|
|
555
|
+
const scopeFreeze = config?.scopeFreeze ?? exports.DEFAULTS.scopeFreeze;
|
|
556
|
+
let killSwitchEngaged = false;
|
|
557
|
+
let killReason = '';
|
|
558
|
+
let attempts = 0;
|
|
559
|
+
let tokenUsed = 0;
|
|
560
|
+
let lastError = null;
|
|
561
|
+
let escalationMessage = null;
|
|
562
|
+
let normalizedErrors = [];
|
|
563
|
+
let stoppedBy = '';
|
|
564
|
+
let isTripped = false;
|
|
565
|
+
let tripReason = null;
|
|
566
|
+
const auditEntries = [];
|
|
567
|
+
let abortController = null;
|
|
568
|
+
let scopeProposed = false;
|
|
569
|
+
let scopeProposalDescription = '';
|
|
570
|
+
function record(type, message, metadata) {
|
|
571
|
+
auditEntries.push({ timestamp: Date.now(), type, message, metadata });
|
|
572
|
+
}
|
|
573
|
+
function detectRepeatedError(errorMessage) {
|
|
574
|
+
if (maxRepeatedErrors === 0)
|
|
575
|
+
return false;
|
|
576
|
+
let consecutiveCount = 0;
|
|
577
|
+
for (let i = normalizedErrors.length - 1; i >= 0; i--) {
|
|
578
|
+
if (normalizedErrors[i] === errorMessage) {
|
|
579
|
+
consecutiveCount++;
|
|
580
|
+
}
|
|
581
|
+
else {
|
|
582
|
+
break;
|
|
583
|
+
}
|
|
584
|
+
}
|
|
585
|
+
return consecutiveCount >= maxRepeatedErrors;
|
|
586
|
+
}
|
|
587
|
+
function checkScopeInResult(taskResult) {
|
|
588
|
+
if (!scopeFreeze)
|
|
589
|
+
return false;
|
|
590
|
+
if (taskResult && typeof taskResult === 'object') {
|
|
591
|
+
const obj = taskResult;
|
|
592
|
+
const newGoals = obj._newGoals || obj.newGoals || obj._newTasks || obj.newTasks;
|
|
593
|
+
if (Array.isArray(newGoals) && newGoals.length > 0) {
|
|
594
|
+
return true;
|
|
595
|
+
}
|
|
596
|
+
}
|
|
597
|
+
return false;
|
|
598
|
+
}
|
|
599
|
+
function buildEscalation(context, detail) {
|
|
600
|
+
return [
|
|
601
|
+
`The agent loop was stopped.`,
|
|
602
|
+
``,
|
|
603
|
+
`What failed: ${context}`,
|
|
604
|
+
`What was tried: ${attempts} attempt(s) were made.`,
|
|
605
|
+
`Why it stopped: ${detail}`,
|
|
606
|
+
`What a human should decide next: Review the task and error above. ` +
|
|
607
|
+
`You may retry with a different approach, adjust configuration ` +
|
|
608
|
+
`(maxRetries, tokenBudget, maxRepeatedErrors), or abandon the task.`,
|
|
609
|
+
].join('\n');
|
|
610
|
+
}
|
|
611
|
+
async function run(taskFn) {
|
|
612
|
+
killSwitchEngaged = false;
|
|
613
|
+
killReason = '';
|
|
614
|
+
attempts = 0;
|
|
615
|
+
tokenUsed = 0;
|
|
616
|
+
lastError = null;
|
|
617
|
+
escalationMessage = null;
|
|
618
|
+
normalizedErrors = [];
|
|
619
|
+
stoppedBy = '';
|
|
620
|
+
isTripped = false;
|
|
621
|
+
tripReason = null;
|
|
622
|
+
auditEntries.length = 0;
|
|
623
|
+
abortController = new AbortController();
|
|
624
|
+
scopeProposed = false;
|
|
625
|
+
scopeProposalDescription = '';
|
|
626
|
+
while (true) {
|
|
627
|
+
if (killSwitchEngaged || abortController?.signal.aborted) {
|
|
628
|
+
const reason = killReason || 'kill switch engaged';
|
|
629
|
+
isTripped = true;
|
|
630
|
+
record('breaker_trip', `Kill switch engaged: ${reason}`);
|
|
631
|
+
return {
|
|
632
|
+
success: false,
|
|
633
|
+
stoppedBy: 'kill_switch',
|
|
634
|
+
attempts,
|
|
635
|
+
tokenEstimate: tokenUsed,
|
|
636
|
+
lastError,
|
|
637
|
+
escalationMessage: buildEscalation(`Task was manually stopped.`, `The kill switch was engaged: ${reason}`),
|
|
638
|
+
auditEntries: [...auditEntries],
|
|
639
|
+
};
|
|
640
|
+
}
|
|
641
|
+
if (attempts > maxRetries) {
|
|
642
|
+
isTripped = true;
|
|
643
|
+
record('breaker_trip', `Exceeded max retries (${maxRetries})`);
|
|
644
|
+
escalationMessage = buildEscalation(`The task failed on every attempt. Last error: ${lastError ?? 'unknown'}`, `The maximum number of retries (${maxRetries}) was exceeded after ${attempts} attempt(s).`);
|
|
645
|
+
return {
|
|
646
|
+
success: false,
|
|
647
|
+
stoppedBy: 'max_retries',
|
|
648
|
+
attempts,
|
|
649
|
+
tokenEstimate: tokenUsed,
|
|
650
|
+
lastError,
|
|
651
|
+
escalationMessage,
|
|
652
|
+
auditEntries: [...auditEntries],
|
|
653
|
+
};
|
|
654
|
+
}
|
|
655
|
+
attempts++;
|
|
656
|
+
record('attempt', `Attempt ${attempts}`, {
|
|
657
|
+
attempt: attempts,
|
|
658
|
+
tokenUsed,
|
|
659
|
+
});
|
|
660
|
+
const context = {
|
|
661
|
+
attempt: attempts,
|
|
662
|
+
tokenUsed,
|
|
663
|
+
signal: abortController.signal,
|
|
664
|
+
log: (entry) => {
|
|
665
|
+
record(entry.type, entry.message, entry.metadata);
|
|
666
|
+
},
|
|
667
|
+
recordTokenUsage: (cost) => {
|
|
668
|
+
tokenUsed += cost;
|
|
669
|
+
record('token_usage', `Recorded ${cost} tokens`, { cost });
|
|
670
|
+
},
|
|
671
|
+
proposeScopeChange: (description, newGoals) => {
|
|
672
|
+
if (!scopeFreeze)
|
|
673
|
+
return true;
|
|
674
|
+
scopeProposed = true;
|
|
675
|
+
scopeProposalDescription = `${description}: ${newGoals.join(', ')}`;
|
|
676
|
+
record('scope_proposed', `Scope change requested: ${scopeProposalDescription}`, { description, newGoals });
|
|
677
|
+
record('scope_denied', `Scope change denied: ${scopeProposalDescription}`);
|
|
678
|
+
return false;
|
|
679
|
+
},
|
|
680
|
+
};
|
|
681
|
+
try {
|
|
682
|
+
const result = await taskFn(context);
|
|
683
|
+
if (killSwitchEngaged) {
|
|
684
|
+
record('breaker_trip', `Kill switch engaged: ${killReason}`);
|
|
685
|
+
return {
|
|
686
|
+
success: false,
|
|
687
|
+
stoppedBy: 'kill_switch',
|
|
688
|
+
attempts,
|
|
689
|
+
tokenEstimate: tokenUsed,
|
|
690
|
+
lastError,
|
|
691
|
+
escalationMessage: buildEscalation(`Task was manually stopped.`, `The kill switch was engaged: ${killReason}`),
|
|
692
|
+
auditEntries: [...auditEntries],
|
|
693
|
+
};
|
|
694
|
+
}
|
|
695
|
+
const tokenCost = extractTokenCost(result);
|
|
696
|
+
tokenUsed += tokenCost;
|
|
697
|
+
if (scopeProposed) {
|
|
698
|
+
isTripped = true;
|
|
699
|
+
record('breaker_trip', `Scope change denied: ${scopeProposalDescription}`);
|
|
700
|
+
escalationMessage = buildEscalation(`The agent attempted to expand scope: ${scopeProposalDescription}`, `Scope freeze is enabled. The agent requested to add new goals ` +
|
|
701
|
+
`without explicit human approval.`);
|
|
702
|
+
return {
|
|
703
|
+
success: false,
|
|
704
|
+
stoppedBy: 'scope_freeze',
|
|
705
|
+
attempts,
|
|
706
|
+
tokenEstimate: tokenUsed,
|
|
707
|
+
lastError: null,
|
|
708
|
+
escalationMessage,
|
|
709
|
+
auditEntries: [...auditEntries],
|
|
710
|
+
};
|
|
711
|
+
}
|
|
712
|
+
if (tokenUsed > perTaskBudget) {
|
|
713
|
+
isTripped = true;
|
|
714
|
+
record('budget_check', `Task token budget exceeded: ${tokenUsed} > ${perTaskBudget}`);
|
|
715
|
+
escalationMessage = buildEscalation(`The task consumed ${tokenUsed} tokens (budget: ${perTaskBudget}).`, `The task-level token budget was exceeded.`);
|
|
716
|
+
return {
|
|
717
|
+
success: false,
|
|
718
|
+
stoppedBy: 'token_budget_task',
|
|
719
|
+
attempts,
|
|
720
|
+
tokenEstimate: tokenUsed,
|
|
721
|
+
lastError: null,
|
|
722
|
+
escalationMessage,
|
|
723
|
+
auditEntries: [...auditEntries],
|
|
724
|
+
};
|
|
725
|
+
}
|
|
726
|
+
if (tokenCost > perStepBudget) {
|
|
727
|
+
isTripped = true;
|
|
728
|
+
record('budget_check', `Step token budget exceeded: ${tokenCost} > ${perStepBudget}`);
|
|
729
|
+
escalationMessage = buildEscalation(`A single step consumed ${tokenCost} tokens (per-step budget: ${perStepBudget}).`, `The per-step token budget was exceeded.`);
|
|
730
|
+
return {
|
|
731
|
+
success: false,
|
|
732
|
+
stoppedBy: 'token_budget_step',
|
|
733
|
+
attempts,
|
|
734
|
+
tokenEstimate: tokenUsed,
|
|
735
|
+
lastError: null,
|
|
736
|
+
escalationMessage,
|
|
737
|
+
auditEntries: [...auditEntries],
|
|
738
|
+
};
|
|
739
|
+
}
|
|
740
|
+
if (checkScopeInResult(result)) {
|
|
741
|
+
isTripped = true;
|
|
742
|
+
record('scope_denied', 'Task returned new goals without using proposeScopeChange()');
|
|
743
|
+
escalationMessage = buildEscalation(`The agent returned new goals or tasks in its result without ` +
|
|
744
|
+
`explicitly requesting approval via proposeScopeChange().`, `Scope freeze is enabled. The task may not silently add new goals.`);
|
|
745
|
+
return {
|
|
746
|
+
success: false,
|
|
747
|
+
stoppedBy: 'scope_freeze',
|
|
748
|
+
attempts,
|
|
749
|
+
tokenEstimate: tokenUsed,
|
|
750
|
+
lastError: null,
|
|
751
|
+
escalationMessage,
|
|
752
|
+
auditEntries: [...auditEntries],
|
|
753
|
+
};
|
|
754
|
+
}
|
|
755
|
+
record('retry', `Attempt ${attempts} succeeded`);
|
|
756
|
+
return {
|
|
757
|
+
success: true,
|
|
758
|
+
stoppedBy: '',
|
|
759
|
+
attempts,
|
|
760
|
+
tokenEstimate: tokenUsed,
|
|
761
|
+
lastError: null,
|
|
762
|
+
escalationMessage: null,
|
|
763
|
+
auditEntries: [...auditEntries],
|
|
764
|
+
data: result,
|
|
765
|
+
};
|
|
766
|
+
}
|
|
767
|
+
catch (err) {
|
|
768
|
+
const errMsg = normalizeError(err);
|
|
769
|
+
const errTokenCost = extractTokenCost(err);
|
|
770
|
+
tokenUsed += errTokenCost;
|
|
771
|
+
lastError = errMsg;
|
|
772
|
+
normalizedErrors.push(errMsg);
|
|
773
|
+
record('failure', `Attempt ${attempts} failed: ${errMsg}`, errTokenCost ? { tokenCost: errTokenCost } : undefined);
|
|
774
|
+
if (detectRepeatedError(errMsg)) {
|
|
775
|
+
isTripped = true;
|
|
776
|
+
record('breaker_trip', `Repeated error detected: "${errMsg}" appeared ` +
|
|
777
|
+
`${normalizedErrors.filter((e) => e === errMsg).length} times`);
|
|
778
|
+
escalationMessage = buildEscalation(`The task failed with: "${errMsg}".`, `The same error repeated ${normalizedErrors.filter((e) => e === errMsg).length} times ` +
|
|
779
|
+
`(threshold: ${maxRepeatedErrors}). The agent appears to be stuck.`);
|
|
780
|
+
return {
|
|
781
|
+
success: false,
|
|
782
|
+
stoppedBy: 'repeated_error',
|
|
783
|
+
attempts,
|
|
784
|
+
tokenEstimate: tokenUsed,
|
|
785
|
+
lastError: errMsg,
|
|
786
|
+
escalationMessage,
|
|
787
|
+
auditEntries: [...auditEntries],
|
|
788
|
+
};
|
|
789
|
+
}
|
|
790
|
+
record('retry', `Retrying after attempt ${attempts}`);
|
|
791
|
+
}
|
|
792
|
+
}
|
|
793
|
+
}
|
|
794
|
+
return {
|
|
795
|
+
run,
|
|
796
|
+
trip(reason) {
|
|
797
|
+
killSwitchEngaged = true;
|
|
798
|
+
killReason = reason;
|
|
799
|
+
isTripped = true;
|
|
800
|
+
tripReason = reason;
|
|
801
|
+
record('kill_switch', `Manual kill switch: ${reason}`);
|
|
802
|
+
if (abortController) {
|
|
803
|
+
abortController.abort(reason);
|
|
804
|
+
}
|
|
805
|
+
},
|
|
806
|
+
reset() {
|
|
807
|
+
killSwitchEngaged = false;
|
|
808
|
+
killReason = '';
|
|
809
|
+
attempts = 0;
|
|
810
|
+
tokenUsed = 0;
|
|
811
|
+
lastError = null;
|
|
812
|
+
escalationMessage = null;
|
|
813
|
+
normalizedErrors = [];
|
|
814
|
+
stoppedBy = '';
|
|
815
|
+
isTripped = false;
|
|
816
|
+
tripReason = null;
|
|
817
|
+
auditEntries.length = 0;
|
|
818
|
+
abortController = null;
|
|
819
|
+
scopeProposed = false;
|
|
820
|
+
scopeProposalDescription = '';
|
|
821
|
+
},
|
|
822
|
+
status() {
|
|
823
|
+
return {
|
|
824
|
+
isTripped,
|
|
825
|
+
isKilled: killSwitchEngaged,
|
|
826
|
+
attempts,
|
|
827
|
+
tripReason,
|
|
828
|
+
};
|
|
829
|
+
},
|
|
830
|
+
log() {
|
|
831
|
+
return [...auditEntries];
|
|
832
|
+
},
|
|
833
|
+
};
|
|
834
|
+
}
|