clementine-agent 1.18.199 → 1.18.201
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/agent/run-agent.d.ts +2 -0
- package/dist/agent/run-agent.js +34 -0
- package/dist/agent/run-summary.d.ts +48 -0
- package/dist/agent/run-summary.js +287 -0
- package/dist/agent/side-effect-classifier.d.ts +32 -0
- package/dist/agent/side-effect-classifier.js +232 -0
- package/dist/agent/side-effect-idempotency.d.ts +73 -0
- package/dist/agent/side-effect-idempotency.js +505 -0
- package/dist/channels/discord-utils.d.ts +1 -0
- package/dist/channels/discord-utils.js +14 -0
- package/dist/channels/discord.js +26 -0
- package/dist/channels/slack-utils.d.ts +1 -0
- package/dist/channels/slack-utils.js +14 -0
- package/dist/channels/slack.js +5 -0
- package/dist/channels/telegram.js +19 -0
- package/dist/gateway/router.d.ts +2 -0
- package/dist/gateway/router.js +133 -31
- package/package.json +1 -1
|
@@ -73,6 +73,8 @@ export interface RunAgentOptions {
|
|
|
73
73
|
tool: string;
|
|
74
74
|
input: Record<string, unknown>;
|
|
75
75
|
}) => void | Promise<void>;
|
|
76
|
+
/** Fires after the runId is generated, before SDK execution starts. */
|
|
77
|
+
onRunStart?: (runId: string) => void;
|
|
76
78
|
/** Abort signal — when triggered, the SDK stream is cancelled. */
|
|
77
79
|
abortSignal?: AbortSignal;
|
|
78
80
|
/** Optional override of the AgentDefinition map. Mostly for tests. */
|
package/dist/agent/run-agent.js
CHANGED
|
@@ -97,6 +97,7 @@ export function invalidateMcpStatusEntry(name) {
|
|
|
97
97
|
import { BASE_DIR, PKG_DIR, CLAUDE_CODE_OAUTH_TOKEN, ANTHROPIC_API_KEY as CONFIG_ANTHROPIC_API_KEY, normalizeClaudeSdkOptionsForOneMillionContext, TOOL_OUTPUT_GUARD, } from '../config.js';
|
|
98
98
|
import { buildGuardHooks } from './tool-output-guard.js';
|
|
99
99
|
import { buildDedupHook } from './tool-call-dedup.js';
|
|
100
|
+
import { buildSideEffectIdempotencyHook } from './side-effect-idempotency.js';
|
|
100
101
|
import { buildChatStopHook } from './chat-stop-hook.js';
|
|
101
102
|
import { buildAgentMap } from './agent-definitions.js';
|
|
102
103
|
import { buildExecutionToolPolicy, } from './execution-policy.js';
|
|
@@ -351,6 +352,10 @@ export async function runAgent(prompt, opts) {
|
|
|
351
352
|
// tool-output guard (1.18.169) can namespace its on-disk archive by
|
|
352
353
|
// runId. EventLog writers below also reference this id.
|
|
353
354
|
const runId = randomUUID();
|
|
355
|
+
try {
|
|
356
|
+
opts.onRunStart?.(runId);
|
|
357
|
+
}
|
|
358
|
+
catch { /* telemetry callback only */ }
|
|
354
359
|
const eventLog = new EventLog();
|
|
355
360
|
let eventSeq = 0;
|
|
356
361
|
const writeEvent = (e) => {
|
|
@@ -445,6 +450,25 @@ export async function runAgent(prompt, opts) {
|
|
|
445
450
|
});
|
|
446
451
|
},
|
|
447
452
|
});
|
|
453
|
+
// ── Side-effect idempotency hook (1.18.201) ────────────────────────
|
|
454
|
+
// Prevents exact duplicate high-confidence external mutations across
|
|
455
|
+
// context-overflow resumes/retries. This is intentionally narrow:
|
|
456
|
+
// confident email sends and CRM mutations only, keyed by stable identity
|
|
457
|
+
// fields. Unknown side effects remain event-log/audit data, not blocks.
|
|
458
|
+
const idempotency = buildSideEffectIdempotencyHook({
|
|
459
|
+
runId,
|
|
460
|
+
sessionKey: opts.sessionKey,
|
|
461
|
+
onDecision: (info) => {
|
|
462
|
+
if (info.decision !== 'block')
|
|
463
|
+
return;
|
|
464
|
+
writeEvent({
|
|
465
|
+
kind: 'error',
|
|
466
|
+
ts: new Date().toISOString(),
|
|
467
|
+
sessionId,
|
|
468
|
+
toolError: `_clementine_idempotency:block ${info.toolName} ${info.summary ?? ''}`.trim(),
|
|
469
|
+
});
|
|
470
|
+
},
|
|
471
|
+
});
|
|
448
472
|
// ── Chat persistence Stop hook (1.18.184, source='chat' only) ─────
|
|
449
473
|
// Keeps chat-initiated multi-step jobs running until they finish.
|
|
450
474
|
// Inspects the model's last assistant message for continuation
|
|
@@ -476,6 +500,10 @@ export async function runAgent(prompt, opts) {
|
|
|
476
500
|
// Merge hook maps from the modules. SDK accepts arrays of
|
|
477
501
|
// HookCallbackMatcher per event; we concatenate.
|
|
478
502
|
const mergedHooks = { ...guard.hooks };
|
|
503
|
+
for (const [evt, matchers] of Object.entries(idempotency.hooks)) {
|
|
504
|
+
const existing = mergedHooks[evt] ?? [];
|
|
505
|
+
mergedHooks[evt] = [...existing, ...matchers];
|
|
506
|
+
}
|
|
479
507
|
for (const [evt, matchers] of Object.entries(dedup.hooks)) {
|
|
480
508
|
const existing = mergedHooks[evt] ?? [];
|
|
481
509
|
mergedHooks[evt] = [...existing, ...matchers];
|
|
@@ -827,6 +855,12 @@ export async function runAgent(prompt, opts) {
|
|
|
827
855
|
warned: dedup.stats.warned,
|
|
828
856
|
blocked: dedup.stats.blocked,
|
|
829
857
|
} : undefined,
|
|
858
|
+
idempotency: idempotency.stats.guarded > 0 ? {
|
|
859
|
+
guarded: idempotency.stats.guarded,
|
|
860
|
+
blocked: idempotency.stats.blocked,
|
|
861
|
+
recorded: idempotency.stats.recorded,
|
|
862
|
+
failedNotRecorded: idempotency.stats.failedNotRecorded,
|
|
863
|
+
} : undefined,
|
|
830
864
|
}, 'runAgent: query complete');
|
|
831
865
|
// PRD §6 Phase 4e: subagent transcript backfill (Path C). The SDK persists
|
|
832
866
|
// every subagent's full message stream to ~/.claude/projects/<encoded-cwd>/
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Run summaries derived from the durable Clementine event log.
|
|
3
|
+
*
|
|
4
|
+
* The event log is the source of truth for "what actually fired" during an
|
|
5
|
+
* SDK run. This module turns raw tool_call/tool_result rows into a compact,
|
|
6
|
+
* provider-neutral summary that chat overflow recovery can show to the owner
|
|
7
|
+
* and feed back into a fresh continuation prompt.
|
|
8
|
+
*/
|
|
9
|
+
import { EventLog } from '../gateway/event-log.js';
|
|
10
|
+
import { type SideEffectVerdict } from './side-effect-classifier.js';
|
|
11
|
+
export interface SideEffectCall {
|
|
12
|
+
runId: string;
|
|
13
|
+
toolName: string;
|
|
14
|
+
toolUseId: string;
|
|
15
|
+
input: Record<string, unknown>;
|
|
16
|
+
timestamp: string;
|
|
17
|
+
verdict: SideEffectVerdict;
|
|
18
|
+
result?: {
|
|
19
|
+
successful: boolean;
|
|
20
|
+
raw: unknown;
|
|
21
|
+
reason: string;
|
|
22
|
+
statusCode?: number;
|
|
23
|
+
error?: string;
|
|
24
|
+
};
|
|
25
|
+
}
|
|
26
|
+
export interface RunSummary {
|
|
27
|
+
runIds: string[];
|
|
28
|
+
sessionIds: string[];
|
|
29
|
+
totalEvents: number;
|
|
30
|
+
successfulSideEffects: SideEffectCall[];
|
|
31
|
+
failedSideEffects: SideEffectCall[];
|
|
32
|
+
pendingSideEffects: SideEffectCall[];
|
|
33
|
+
unknownEffectCalls: SideEffectCall[];
|
|
34
|
+
readOnlyCount: number;
|
|
35
|
+
errors: Array<{
|
|
36
|
+
runId: string;
|
|
37
|
+
ts: string;
|
|
38
|
+
message: string;
|
|
39
|
+
}>;
|
|
40
|
+
lastAssistantText?: string;
|
|
41
|
+
ended: 'session_end' | 'error' | 'in_progress';
|
|
42
|
+
}
|
|
43
|
+
export declare function summarizeRunSideEffects(runIds: string | string[], eventLog?: EventLog): RunSummary;
|
|
44
|
+
export declare function hasOperationalActivity(summary: RunSummary): boolean;
|
|
45
|
+
export declare function extractRecipients(input: Record<string, unknown>): string[];
|
|
46
|
+
export declare function formatOverflowRecoveryMessage(summary: RunSummary): string;
|
|
47
|
+
export declare function buildContinuationPrompt(summary: RunSummary, originalRequest: string): string;
|
|
48
|
+
//# sourceMappingURL=run-summary.d.ts.map
|
|
@@ -0,0 +1,287 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Run summaries derived from the durable Clementine event log.
|
|
3
|
+
*
|
|
4
|
+
* The event log is the source of truth for "what actually fired" during an
|
|
5
|
+
* SDK run. This module turns raw tool_call/tool_result rows into a compact,
|
|
6
|
+
* provider-neutral summary that chat overflow recovery can show to the owner
|
|
7
|
+
* and feed back into a fresh continuation prompt.
|
|
8
|
+
*/
|
|
9
|
+
import { EventLog } from '../gateway/event-log.js';
|
|
10
|
+
import { classifyToolCall, isToolResultSuccessful, normalizedToolResultPayload, } from './side-effect-classifier.js';
|
|
11
|
+
function uniqueRunIds(runIds) {
|
|
12
|
+
const raw = Array.isArray(runIds) ? runIds : [runIds];
|
|
13
|
+
const out = [];
|
|
14
|
+
for (const id of raw) {
|
|
15
|
+
if (!id || out.includes(id))
|
|
16
|
+
continue;
|
|
17
|
+
out.push(id);
|
|
18
|
+
}
|
|
19
|
+
return out;
|
|
20
|
+
}
|
|
21
|
+
function asInput(value) {
|
|
22
|
+
return value && typeof value === 'object' && !Array.isArray(value)
|
|
23
|
+
? value
|
|
24
|
+
: {};
|
|
25
|
+
}
|
|
26
|
+
function isToolCall(event) {
|
|
27
|
+
return event.kind === 'tool_call'
|
|
28
|
+
&& typeof event.toolName === 'string'
|
|
29
|
+
&& typeof event.toolUseId === 'string'
|
|
30
|
+
&& !!event.toolName
|
|
31
|
+
&& !!event.toolUseId;
|
|
32
|
+
}
|
|
33
|
+
function resultForToolUse(events, toolUseId) {
|
|
34
|
+
return events.find((event) => event.kind === 'tool_result' && event.toolUseId === toolUseId);
|
|
35
|
+
}
|
|
36
|
+
function summarizeResult(result) {
|
|
37
|
+
if (!result)
|
|
38
|
+
return undefined;
|
|
39
|
+
return isToolResultSuccessful(result.toolResult, !!result.toolError);
|
|
40
|
+
}
|
|
41
|
+
function makeCall(call, result, verdict) {
|
|
42
|
+
const success = summarizeResult(result);
|
|
43
|
+
return {
|
|
44
|
+
runId: call.runId,
|
|
45
|
+
toolName: call.toolName,
|
|
46
|
+
toolUseId: call.toolUseId,
|
|
47
|
+
input: asInput(call.toolInput),
|
|
48
|
+
timestamp: call.ts,
|
|
49
|
+
verdict,
|
|
50
|
+
...(success ? {
|
|
51
|
+
result: {
|
|
52
|
+
successful: success.successful,
|
|
53
|
+
raw: normalizedToolResultPayload(result?.toolResult),
|
|
54
|
+
reason: success.reason,
|
|
55
|
+
...(success.statusCode !== undefined ? { statusCode: success.statusCode } : {}),
|
|
56
|
+
...(success.error ? { error: success.error } : {}),
|
|
57
|
+
},
|
|
58
|
+
} : {}),
|
|
59
|
+
};
|
|
60
|
+
}
|
|
61
|
+
export function summarizeRunSideEffects(runIds, eventLog = new EventLog()) {
|
|
62
|
+
const ids = uniqueRunIds(runIds);
|
|
63
|
+
const events = ids.flatMap((runId) => eventLog.readByRun(runId));
|
|
64
|
+
const sessionIds = Array.from(new Set(events.map((event) => event.sessionId).filter((id) => !!id)));
|
|
65
|
+
const successfulSideEffects = [];
|
|
66
|
+
const failedSideEffects = [];
|
|
67
|
+
const pendingSideEffects = [];
|
|
68
|
+
const unknownEffectCalls = [];
|
|
69
|
+
let readOnlyCount = 0;
|
|
70
|
+
for (const call of events.filter(isToolCall)) {
|
|
71
|
+
const verdict = classifyToolCall(call.toolName, asInput(call.toolInput));
|
|
72
|
+
const result = resultForToolUse(events, call.toolUseId);
|
|
73
|
+
const item = makeCall(call, result, verdict);
|
|
74
|
+
if (verdict.kind === 'read_only') {
|
|
75
|
+
readOnlyCount += 1;
|
|
76
|
+
continue;
|
|
77
|
+
}
|
|
78
|
+
if (verdict.kind === 'unknown') {
|
|
79
|
+
unknownEffectCalls.push(item);
|
|
80
|
+
continue;
|
|
81
|
+
}
|
|
82
|
+
if (!result) {
|
|
83
|
+
pendingSideEffects.push(item);
|
|
84
|
+
continue;
|
|
85
|
+
}
|
|
86
|
+
if (item.result?.successful)
|
|
87
|
+
successfulSideEffects.push(item);
|
|
88
|
+
else
|
|
89
|
+
failedSideEffects.push(item);
|
|
90
|
+
}
|
|
91
|
+
const errors = events
|
|
92
|
+
.filter((event) => event.kind === 'error' || event.toolError)
|
|
93
|
+
.map((event) => ({
|
|
94
|
+
runId: event.runId,
|
|
95
|
+
ts: event.ts,
|
|
96
|
+
message: String(event.toolError ?? event.text ?? 'error').slice(0, 500),
|
|
97
|
+
}));
|
|
98
|
+
const lastAssistantText = [...events]
|
|
99
|
+
.reverse()
|
|
100
|
+
.find((event) => event.kind === 'llm_text' && typeof event.text === 'string' && event.text.trim())
|
|
101
|
+
?.text;
|
|
102
|
+
const ended = events.some((event) => event.kind === 'session_end')
|
|
103
|
+
? 'session_end'
|
|
104
|
+
: errors.length > 0 ? 'error' : 'in_progress';
|
|
105
|
+
return {
|
|
106
|
+
runIds: ids,
|
|
107
|
+
sessionIds,
|
|
108
|
+
totalEvents: events.length,
|
|
109
|
+
successfulSideEffects,
|
|
110
|
+
failedSideEffects,
|
|
111
|
+
pendingSideEffects,
|
|
112
|
+
unknownEffectCalls,
|
|
113
|
+
readOnlyCount,
|
|
114
|
+
errors,
|
|
115
|
+
...(lastAssistantText ? { lastAssistantText } : {}),
|
|
116
|
+
ended,
|
|
117
|
+
};
|
|
118
|
+
}
|
|
119
|
+
export function hasOperationalActivity(summary) {
|
|
120
|
+
return summary.successfulSideEffects.length > 0
|
|
121
|
+
|| summary.failedSideEffects.length > 0
|
|
122
|
+
|| summary.pendingSideEffects.length > 0
|
|
123
|
+
|| summary.unknownEffectCalls.length > 0;
|
|
124
|
+
}
|
|
125
|
+
function toolKindLabel(toolName) {
|
|
126
|
+
const lower = toolName.toLowerCase();
|
|
127
|
+
if (lower.includes('email') || lower.includes('gmail') || lower.includes('outlook') || lower.includes('send_email')) {
|
|
128
|
+
return 'email sends';
|
|
129
|
+
}
|
|
130
|
+
if (lower.includes('salesforce') || lower.includes('__sf_') || lower.includes('sfdc')) {
|
|
131
|
+
return 'CRM mutations';
|
|
132
|
+
}
|
|
133
|
+
if (toolName === 'Bash')
|
|
134
|
+
return 'Bash commands';
|
|
135
|
+
return toolName;
|
|
136
|
+
}
|
|
137
|
+
function groupCounts(calls) {
|
|
138
|
+
const map = new Map();
|
|
139
|
+
for (const call of calls) {
|
|
140
|
+
const label = toolKindLabel(call.toolName);
|
|
141
|
+
map.set(label, [...(map.get(label) ?? []), call]);
|
|
142
|
+
}
|
|
143
|
+
return Array.from(map.entries()).map(([label, grouped]) => ({ label, count: grouped.length, calls: grouped }));
|
|
144
|
+
}
|
|
145
|
+
function firstString(...values) {
|
|
146
|
+
for (const value of values) {
|
|
147
|
+
if (typeof value === 'string' && value.trim())
|
|
148
|
+
return value.trim();
|
|
149
|
+
}
|
|
150
|
+
return undefined;
|
|
151
|
+
}
|
|
152
|
+
function arrayStrings(value) {
|
|
153
|
+
if (!Array.isArray(value))
|
|
154
|
+
return [];
|
|
155
|
+
return value
|
|
156
|
+
.flatMap((item) => {
|
|
157
|
+
if (typeof item === 'string')
|
|
158
|
+
return [item];
|
|
159
|
+
if (item && typeof item === 'object') {
|
|
160
|
+
const obj = item;
|
|
161
|
+
return [firstString(obj.email, obj.address, obj.mail, obj.name)].filter((v) => !!v);
|
|
162
|
+
}
|
|
163
|
+
return [];
|
|
164
|
+
})
|
|
165
|
+
.filter(Boolean);
|
|
166
|
+
}
|
|
167
|
+
export function extractRecipients(input) {
|
|
168
|
+
const direct = [
|
|
169
|
+
...arrayStrings(input.to),
|
|
170
|
+
...arrayStrings(input.toRecipients),
|
|
171
|
+
...arrayStrings(input.recipients),
|
|
172
|
+
...arrayStrings(input.cc),
|
|
173
|
+
...arrayStrings(input.bcc),
|
|
174
|
+
];
|
|
175
|
+
const singles = [
|
|
176
|
+
firstString(input.to, input.recipient, input.email, input.to_email, input.toEmail),
|
|
177
|
+
].filter((v) => !!v);
|
|
178
|
+
return Array.from(new Set([...direct, ...singles]));
|
|
179
|
+
}
|
|
180
|
+
function extractSubject(input) {
|
|
181
|
+
return firstString(input.subject, input.title);
|
|
182
|
+
}
|
|
183
|
+
function extractProviderLogId(raw) {
|
|
184
|
+
if (!raw || typeof raw !== 'object')
|
|
185
|
+
return undefined;
|
|
186
|
+
const obj = raw;
|
|
187
|
+
return firstString(obj.logId, obj.log_id, obj.id)
|
|
188
|
+
?? (obj.data && typeof obj.data === 'object' ? extractProviderLogId(obj.data) : undefined);
|
|
189
|
+
}
|
|
190
|
+
function statusPhrase(call) {
|
|
191
|
+
const status = call.result?.statusCode;
|
|
192
|
+
if (status && toolKindLabel(call.toolName) === 'email sends')
|
|
193
|
+
return `accepted (${status})`;
|
|
194
|
+
if (status)
|
|
195
|
+
return `succeeded (${status})`;
|
|
196
|
+
return 'succeeded';
|
|
197
|
+
}
|
|
198
|
+
function recipientPreview(calls, max = 3) {
|
|
199
|
+
const recipients = calls.flatMap((call) => extractRecipients(call.input));
|
|
200
|
+
if (recipients.length === 0)
|
|
201
|
+
return '';
|
|
202
|
+
const shown = recipients.slice(0, max);
|
|
203
|
+
const rest = recipients.length - shown.length;
|
|
204
|
+
return ` (${shown.join(', ')}${rest > 0 ? `, +${rest} more` : ''})`;
|
|
205
|
+
}
|
|
206
|
+
function formatGroupedLines(prefix, calls) {
|
|
207
|
+
return groupCounts(calls).map((group) => `- ${group.count} ${group.label} ${prefix}${recipientPreview(group.calls)}`);
|
|
208
|
+
}
|
|
209
|
+
export function formatOverflowRecoveryMessage(summary) {
|
|
210
|
+
const lines = [
|
|
211
|
+
'That run hit the context limit after some work had already happened.',
|
|
212
|
+
'',
|
|
213
|
+
];
|
|
214
|
+
if (summary.successfulSideEffects.length > 0) {
|
|
215
|
+
lines.push('Completed before overflow:');
|
|
216
|
+
lines.push(...formatGroupedLines('completed', summary.successfulSideEffects));
|
|
217
|
+
lines.push('');
|
|
218
|
+
}
|
|
219
|
+
if (summary.failedSideEffects.length > 0 || summary.pendingSideEffects.length > 0 || summary.unknownEffectCalls.length > 0) {
|
|
220
|
+
lines.push('Needs attention:');
|
|
221
|
+
if (summary.failedSideEffects.length > 0)
|
|
222
|
+
lines.push(...formatGroupedLines('failed', summary.failedSideEffects));
|
|
223
|
+
if (summary.pendingSideEffects.length > 0)
|
|
224
|
+
lines.push(...formatGroupedLines('started, no confirmation', summary.pendingSideEffects));
|
|
225
|
+
if (summary.unknownEffectCalls.length > 0)
|
|
226
|
+
lines.push(`- ${summary.unknownEffectCalls.length} tool call(s) had unknown external effect`);
|
|
227
|
+
lines.push('');
|
|
228
|
+
}
|
|
229
|
+
if (summary.readOnlyCount > 0) {
|
|
230
|
+
lines.push(`Read-only tool calls before overflow: ${summary.readOnlyCount}`);
|
|
231
|
+
lines.push('');
|
|
232
|
+
}
|
|
233
|
+
lines.push('Reply `continue` within 30 minutes to resume from this state, or `done` to stop here.');
|
|
234
|
+
return lines.join('\n').slice(0, 1900);
|
|
235
|
+
}
|
|
236
|
+
function formatDetailedCall(call) {
|
|
237
|
+
const recipients = extractRecipients(call.input);
|
|
238
|
+
const subject = extractSubject(call.input);
|
|
239
|
+
const logId = call.result ? extractProviderLogId(call.result.raw) : undefined;
|
|
240
|
+
const parts = [
|
|
241
|
+
toolKindLabel(call.toolName),
|
|
242
|
+
recipients.length ? `to ${recipients.join(', ')}` : undefined,
|
|
243
|
+
subject ? `subject "${subject}"` : undefined,
|
|
244
|
+
call.result ? statusPhrase(call) : 'started, no confirmation',
|
|
245
|
+
logId ? `logId ${logId}` : undefined,
|
|
246
|
+
`run ${call.runId}`,
|
|
247
|
+
].filter(Boolean);
|
|
248
|
+
return `- ${parts.join(' · ')}`;
|
|
249
|
+
}
|
|
250
|
+
export function buildContinuationPrompt(summary, originalRequest) {
|
|
251
|
+
const lines = [];
|
|
252
|
+
lines.push('[Resume context — read this before taking any action]');
|
|
253
|
+
lines.push(`The previous SDK run(s) hit a context limit: ${summary.runIds.join(', ')}`);
|
|
254
|
+
lines.push('Some tool calls may already have changed external state. DO NOT re-run completed side effects.');
|
|
255
|
+
lines.push('');
|
|
256
|
+
if (summary.successfulSideEffects.length > 0) {
|
|
257
|
+
lines.push('Completed side effects:');
|
|
258
|
+
for (const call of summary.successfulSideEffects.slice(0, 80))
|
|
259
|
+
lines.push(formatDetailedCall(call));
|
|
260
|
+
if (summary.successfulSideEffects.length > 80)
|
|
261
|
+
lines.push(`- ...and ${summary.successfulSideEffects.length - 80} more completed side effects in the event log.`);
|
|
262
|
+
lines.push('');
|
|
263
|
+
}
|
|
264
|
+
if (summary.failedSideEffects.length > 0) {
|
|
265
|
+
lines.push('Failed side effects that may need retry or reconciliation:');
|
|
266
|
+
for (const call of summary.failedSideEffects.slice(0, 30))
|
|
267
|
+
lines.push(formatDetailedCall(call));
|
|
268
|
+
lines.push('');
|
|
269
|
+
}
|
|
270
|
+
if (summary.pendingSideEffects.length > 0) {
|
|
271
|
+
lines.push('Side-effect calls that started but had no confirmation. Check before retrying:');
|
|
272
|
+
for (const call of summary.pendingSideEffects.slice(0, 30))
|
|
273
|
+
lines.push(formatDetailedCall(call));
|
|
274
|
+
lines.push('');
|
|
275
|
+
}
|
|
276
|
+
if (summary.unknownEffectCalls.length > 0) {
|
|
277
|
+
lines.push(`Unknown-effect tool calls: ${summary.unknownEffectCalls.length}. Treat these cautiously and inspect if relevant.`);
|
|
278
|
+
lines.push('');
|
|
279
|
+
}
|
|
280
|
+
lines.push('Original owner request:');
|
|
281
|
+
lines.push(originalRequest);
|
|
282
|
+
lines.push('');
|
|
283
|
+
lines.push('Continue from where the previous run stopped. Focus on remaining follow-up, cleanup, reconciliation, or status reporting. DO NOT re-find inputs or re-execute completed sends/updates/deletes.');
|
|
284
|
+
lines.push('[/Resume context]');
|
|
285
|
+
return lines.join('\n');
|
|
286
|
+
}
|
|
287
|
+
//# sourceMappingURL=run-summary.js.map
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Provider-neutral side-effect classification for SDK tool calls.
|
|
3
|
+
*
|
|
4
|
+
* This module answers two intentionally small questions:
|
|
5
|
+
* - Did this tool likely mutate external or durable state?
|
|
6
|
+
* - Did the matching tool_result represent a successful execution?
|
|
7
|
+
*
|
|
8
|
+
* Provider-specific details belong in extractors/summaries. The classifier
|
|
9
|
+
* keeps the global npm behavior conservative: confident read-only calls stay
|
|
10
|
+
* read-only, confident mutating calls are side effects, and unclear calls are
|
|
11
|
+
* surfaced as unknown rather than blocked or retried automatically.
|
|
12
|
+
*/
|
|
13
|
+
export type SideEffectVerdict = {
|
|
14
|
+
kind: 'side_effect';
|
|
15
|
+
reason: string;
|
|
16
|
+
} | {
|
|
17
|
+
kind: 'read_only';
|
|
18
|
+
reason: string;
|
|
19
|
+
} | {
|
|
20
|
+
kind: 'unknown';
|
|
21
|
+
reason: string;
|
|
22
|
+
};
|
|
23
|
+
export declare function classifyToolCall(toolName: string, input?: Record<string, unknown>): SideEffectVerdict;
|
|
24
|
+
export interface ToolResultSuccess {
|
|
25
|
+
successful: boolean;
|
|
26
|
+
reason: string;
|
|
27
|
+
statusCode?: number;
|
|
28
|
+
error?: string;
|
|
29
|
+
}
|
|
30
|
+
export declare function isToolResultSuccessful(rawResult: unknown, sdkIsError?: boolean): ToolResultSuccess;
|
|
31
|
+
export declare function normalizedToolResultPayload(value: unknown): unknown;
|
|
32
|
+
//# sourceMappingURL=side-effect-classifier.d.ts.map
|
|
@@ -0,0 +1,232 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Provider-neutral side-effect classification for SDK tool calls.
|
|
3
|
+
*
|
|
4
|
+
* This module answers two intentionally small questions:
|
|
5
|
+
* - Did this tool likely mutate external or durable state?
|
|
6
|
+
* - Did the matching tool_result represent a successful execution?
|
|
7
|
+
*
|
|
8
|
+
* Provider-specific details belong in extractors/summaries. The classifier
|
|
9
|
+
* keeps the global npm behavior conservative: confident read-only calls stay
|
|
10
|
+
* read-only, confident mutating calls are side effects, and unclear calls are
|
|
11
|
+
* surfaced as unknown rather than blocked or retried automatically.
|
|
12
|
+
*/
|
|
13
|
+
const READ_ONLY_BUILTINS = new Set([
|
|
14
|
+
'Agent',
|
|
15
|
+
'Glob',
|
|
16
|
+
'Grep',
|
|
17
|
+
'LS',
|
|
18
|
+
'Read',
|
|
19
|
+
'Task',
|
|
20
|
+
'TodoRead',
|
|
21
|
+
'TodoWrite',
|
|
22
|
+
'WebFetch',
|
|
23
|
+
'WebSearch',
|
|
24
|
+
]);
|
|
25
|
+
const MUTATING_BUILTINS = new Set([
|
|
26
|
+
'Edit',
|
|
27
|
+
'MultiEdit',
|
|
28
|
+
'NotebookEdit',
|
|
29
|
+
'Write',
|
|
30
|
+
]);
|
|
31
|
+
const READ_ONLY_TOOL_VERBS = new Set([
|
|
32
|
+
'describe',
|
|
33
|
+
'fetch',
|
|
34
|
+
'find',
|
|
35
|
+
'get',
|
|
36
|
+
'inbox',
|
|
37
|
+
'list',
|
|
38
|
+
'lookup',
|
|
39
|
+
'query',
|
|
40
|
+
'read',
|
|
41
|
+
'retrieve',
|
|
42
|
+
'search',
|
|
43
|
+
'select',
|
|
44
|
+
]);
|
|
45
|
+
const SIDE_EFFECT_TOOL_VERBS = new Set([
|
|
46
|
+
'add',
|
|
47
|
+
'apply',
|
|
48
|
+
'approve',
|
|
49
|
+
'archive',
|
|
50
|
+
'assign',
|
|
51
|
+
'cancel',
|
|
52
|
+
'compose',
|
|
53
|
+
'create',
|
|
54
|
+
'delete',
|
|
55
|
+
'deploy',
|
|
56
|
+
'disable',
|
|
57
|
+
'enable',
|
|
58
|
+
'forward',
|
|
59
|
+
'insert',
|
|
60
|
+
'merge',
|
|
61
|
+
'move',
|
|
62
|
+
'post',
|
|
63
|
+
'publish',
|
|
64
|
+
'push',
|
|
65
|
+
'remove',
|
|
66
|
+
'rename',
|
|
67
|
+
'reply',
|
|
68
|
+
'send',
|
|
69
|
+
'set',
|
|
70
|
+
'subscribe',
|
|
71
|
+
'unsubscribe',
|
|
72
|
+
'update',
|
|
73
|
+
'upload',
|
|
74
|
+
'upsert',
|
|
75
|
+
]);
|
|
76
|
+
const READ_ONLY_MCP_PATTERNS = [
|
|
77
|
+
/^mcp__dataforseo__/i,
|
|
78
|
+
/^mcp__bright[_-]?data__/i,
|
|
79
|
+
/^mcp__.*__(?:get|list|search|find|fetch|read|query|describe|retrieve|lookup|inbox|select)(?:_|$)/i,
|
|
80
|
+
/^mcp__.*__(?:.*_)?(?:get|list|search|find|fetch|read|query|describe|retrieve|lookup|inbox|select)$/i,
|
|
81
|
+
];
|
|
82
|
+
const BASH_SIDE_EFFECT_PATTERNS = [
|
|
83
|
+
/\b(rm|mv|cp|mkdir|touch|chmod|chown)\b/i,
|
|
84
|
+
/(^|[^>])>{1,2}[^>]/,
|
|
85
|
+
/\btee\b/i,
|
|
86
|
+
/\bgit\s+(commit|push|merge|rebase|tag)\b/i,
|
|
87
|
+
/\bnpm\s+(install|publish|update)\b/i,
|
|
88
|
+
/\b(?:sf|sfdx)\s+data\s+(update|delete|create|upsert)\b/i,
|
|
89
|
+
/\b(?:sf|sfdx)\s+org\s+(create|delete)\b/i,
|
|
90
|
+
/\bcurl\b.*(?:-X|--request)\s*(POST|PUT|DELETE|PATCH)\b/i,
|
|
91
|
+
/\bpython3?\s+\S*(send|sender|publish|deploy|migrate|push|upload)/i,
|
|
92
|
+
];
|
|
93
|
+
const BASH_READ_ONLY_PATTERNS = [
|
|
94
|
+
/^\s*(?:pwd|ls|find|rg|grep|sed|awk|cat|head|tail|wc|jq|git\s+(?:status|diff|show|log|branch|rev-parse)|npm\s+(?:view|ls|outdated))\b/i,
|
|
95
|
+
];
|
|
96
|
+
function tokensForToolName(toolName) {
|
|
97
|
+
const withCamelBreaks = toolName.replace(/([a-z0-9])([A-Z])/g, '$1_$2');
|
|
98
|
+
return withCamelBreaks
|
|
99
|
+
.toLowerCase()
|
|
100
|
+
.split(/[^a-z0-9]+/)
|
|
101
|
+
.filter(Boolean);
|
|
102
|
+
}
|
|
103
|
+
function extractBashCommand(input) {
|
|
104
|
+
return typeof input?.command === 'string' ? input.command : '';
|
|
105
|
+
}
|
|
106
|
+
export function classifyToolCall(toolName, input) {
|
|
107
|
+
if (!toolName)
|
|
108
|
+
return { kind: 'unknown', reason: 'missing-tool-name' };
|
|
109
|
+
if (READ_ONLY_BUILTINS.has(toolName)) {
|
|
110
|
+
return { kind: 'read_only', reason: 'known-readonly-builtin' };
|
|
111
|
+
}
|
|
112
|
+
if (MUTATING_BUILTINS.has(toolName)) {
|
|
113
|
+
return { kind: 'side_effect', reason: 'known-mutating-builtin' };
|
|
114
|
+
}
|
|
115
|
+
if (toolName === 'Bash') {
|
|
116
|
+
const command = extractBashCommand(input);
|
|
117
|
+
for (const re of BASH_SIDE_EFFECT_PATTERNS) {
|
|
118
|
+
if (re.test(command))
|
|
119
|
+
return { kind: 'side_effect', reason: 'bash-mutation-pattern' };
|
|
120
|
+
}
|
|
121
|
+
for (const re of BASH_READ_ONLY_PATTERNS) {
|
|
122
|
+
if (re.test(command))
|
|
123
|
+
return { kind: 'read_only', reason: 'bash-readonly-pattern' };
|
|
124
|
+
}
|
|
125
|
+
return { kind: 'unknown', reason: 'bash-uncategorized' };
|
|
126
|
+
}
|
|
127
|
+
for (const re of READ_ONLY_MCP_PATTERNS) {
|
|
128
|
+
if (re.test(toolName))
|
|
129
|
+
return { kind: 'read_only', reason: 'known-readonly-tool-pattern' };
|
|
130
|
+
}
|
|
131
|
+
const tokens = tokensForToolName(toolName);
|
|
132
|
+
if (tokens.some((token) => SIDE_EFFECT_TOOL_VERBS.has(token))) {
|
|
133
|
+
return { kind: 'side_effect', reason: 'side-effect-verb-match' };
|
|
134
|
+
}
|
|
135
|
+
if (tokens.some((token) => READ_ONLY_TOOL_VERBS.has(token))) {
|
|
136
|
+
return { kind: 'read_only', reason: 'read-only-verb-match' };
|
|
137
|
+
}
|
|
138
|
+
return { kind: 'unknown', reason: 'unclassified-tool-name' };
|
|
139
|
+
}
|
|
140
|
+
function parseMaybeJsonString(value) {
|
|
141
|
+
if (typeof value !== 'string')
|
|
142
|
+
return value;
|
|
143
|
+
const trimmed = value.trim();
|
|
144
|
+
if (!trimmed || !/^[\[{"]/.test(trimmed))
|
|
145
|
+
return value;
|
|
146
|
+
try {
|
|
147
|
+
return JSON.parse(trimmed);
|
|
148
|
+
}
|
|
149
|
+
catch {
|
|
150
|
+
return value;
|
|
151
|
+
}
|
|
152
|
+
}
|
|
153
|
+
function normalizeResultPayload(value) {
|
|
154
|
+
const parsed = parseMaybeJsonString(value);
|
|
155
|
+
if (Array.isArray(parsed) && parsed.length === 1) {
|
|
156
|
+
const first = parsed[0];
|
|
157
|
+
if (first && typeof first === 'object') {
|
|
158
|
+
const obj = first;
|
|
159
|
+
if (typeof obj.text === 'string')
|
|
160
|
+
return normalizeResultPayload(obj.text);
|
|
161
|
+
if ('content' in obj)
|
|
162
|
+
return normalizeResultPayload(obj.content);
|
|
163
|
+
}
|
|
164
|
+
}
|
|
165
|
+
return parsed;
|
|
166
|
+
}
|
|
167
|
+
function findStatusCode(value) {
|
|
168
|
+
if (!value || typeof value !== 'object')
|
|
169
|
+
return undefined;
|
|
170
|
+
const obj = value;
|
|
171
|
+
for (const key of ['status_code', 'statusCode', 'status', 'httpStatus', 'code']) {
|
|
172
|
+
const raw = obj[key];
|
|
173
|
+
const n = typeof raw === 'number' ? raw : typeof raw === 'string' ? Number(raw) : NaN;
|
|
174
|
+
if (Number.isFinite(n) && n >= 100 && n <= 599)
|
|
175
|
+
return n;
|
|
176
|
+
}
|
|
177
|
+
for (const key of ['data', 'response', 'result']) {
|
|
178
|
+
const nested = findStatusCode(obj[key]);
|
|
179
|
+
if (nested !== undefined)
|
|
180
|
+
return nested;
|
|
181
|
+
}
|
|
182
|
+
return undefined;
|
|
183
|
+
}
|
|
184
|
+
function findError(value) {
|
|
185
|
+
if (!value || typeof value !== 'object')
|
|
186
|
+
return undefined;
|
|
187
|
+
const obj = value;
|
|
188
|
+
for (const key of ['error', 'errors', 'message']) {
|
|
189
|
+
const raw = obj[key];
|
|
190
|
+
if (raw == null || raw === false || raw === '')
|
|
191
|
+
continue;
|
|
192
|
+
if (typeof raw === 'string')
|
|
193
|
+
return raw;
|
|
194
|
+
try {
|
|
195
|
+
return JSON.stringify(raw).slice(0, 500);
|
|
196
|
+
}
|
|
197
|
+
catch {
|
|
198
|
+
return String(raw);
|
|
199
|
+
}
|
|
200
|
+
}
|
|
201
|
+
return undefined;
|
|
202
|
+
}
|
|
203
|
+
export function isToolResultSuccessful(rawResult, sdkIsError = false) {
|
|
204
|
+
if (sdkIsError)
|
|
205
|
+
return { successful: false, reason: 'sdk-is-error' };
|
|
206
|
+
const result = normalizeResultPayload(rawResult);
|
|
207
|
+
if (result && typeof result === 'object') {
|
|
208
|
+
const obj = result;
|
|
209
|
+
if (obj.is_error === true || obj.isError === true) {
|
|
210
|
+
return { successful: false, reason: 'tool-result-is-error' };
|
|
211
|
+
}
|
|
212
|
+
if (obj.successful === false || obj.success === false || obj.ok === false) {
|
|
213
|
+
return { successful: false, reason: 'tool-result-success-false', error: findError(obj) };
|
|
214
|
+
}
|
|
215
|
+
const error = findError(obj);
|
|
216
|
+
if (error)
|
|
217
|
+
return { successful: false, reason: 'tool-result-error-field', error };
|
|
218
|
+
const statusCode = findStatusCode(obj);
|
|
219
|
+
if (statusCode !== undefined) {
|
|
220
|
+
return {
|
|
221
|
+
successful: statusCode >= 200 && statusCode < 300,
|
|
222
|
+
reason: statusCode >= 200 && statusCode < 300 ? 'status-2xx' : 'status-non-2xx',
|
|
223
|
+
statusCode,
|
|
224
|
+
};
|
|
225
|
+
}
|
|
226
|
+
}
|
|
227
|
+
return { successful: true, reason: 'no-error-signal' };
|
|
228
|
+
}
|
|
229
|
+
export function normalizedToolResultPayload(value) {
|
|
230
|
+
return normalizeResultPayload(value);
|
|
231
|
+
}
|
|
232
|
+
//# sourceMappingURL=side-effect-classifier.js.map
|