clementine-agent 1.18.201 → 1.18.203
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
|
@@ -40,12 +40,16 @@
|
|
|
40
40
|
* `StopHookInput` shape including the `stop_hook_active` guard.
|
|
41
41
|
*/
|
|
42
42
|
import type { HookCallbackMatcher, HookEvent } from '@anthropic-ai/claude-agent-sdk';
|
|
43
|
+
import type { LiveRunState } from './run-state.js';
|
|
43
44
|
export interface StopHookOptions {
|
|
44
45
|
/** Stable run identifier for telemetry. */
|
|
45
46
|
runId: string;
|
|
46
47
|
/** Optional abort signal to honor — if it fires, the hook will
|
|
47
48
|
* never re-block. User-initiated stops always win. */
|
|
48
49
|
abortSignal?: AbortSignal;
|
|
50
|
+
/** Live hook-fed run state. When present, Stop can require a final
|
|
51
|
+
* Completed/Pending manifest after external side effects. */
|
|
52
|
+
runState?: LiveRunState;
|
|
49
53
|
/** Optional callback fired on every decision. Useful for the
|
|
50
54
|
* dashboard "What Clementine sees this turn" panel. */
|
|
51
55
|
onDecision?: (info: {
|
|
@@ -62,6 +66,10 @@ export interface StopHookStats {
|
|
|
62
66
|
passed: number;
|
|
63
67
|
/** Stop events where we re-prompted the model to continue. */
|
|
64
68
|
continued: number;
|
|
69
|
+
/** Stop events blocked because live RunState showed unfinished todos. */
|
|
70
|
+
todoContinued: number;
|
|
71
|
+
/** Stop events blocked because side effects were not acknowledged. */
|
|
72
|
+
manifestRequired: number;
|
|
65
73
|
}
|
|
66
74
|
export interface StopHookHandles {
|
|
67
75
|
/** Hook map suitable for SDK `query({ options: { hooks } })`. */
|
|
@@ -40,6 +40,7 @@
|
|
|
40
40
|
* `StopHookInput` shape including the `stop_hook_active` guard.
|
|
41
41
|
*/
|
|
42
42
|
import pino from 'pino';
|
|
43
|
+
import { hasCompletedManifest, summarizeRunStateForManifest } from './run-state.js';
|
|
43
44
|
const logger = pino({ name: 'clementine.chat-stop-hook' });
|
|
44
45
|
/**
|
|
45
46
|
* Phrases in the last assistant message that signal "more work to do."
|
|
@@ -67,7 +68,7 @@ const CONTINUATION_SIGNALS = [
|
|
|
67
68
|
* Build a Stop hook for a chat-initiated agentic run.
|
|
68
69
|
*/
|
|
69
70
|
export function buildChatStopHook(opts) {
|
|
70
|
-
const stats = { inspected: 0, passed: 0, continued: 0 };
|
|
71
|
+
const stats = { inspected: 0, passed: 0, continued: 0, todoContinued: 0, manifestRequired: 0 };
|
|
71
72
|
const stopHook = async (input) => {
|
|
72
73
|
if (input.hook_event_name !== 'Stop')
|
|
73
74
|
return {};
|
|
@@ -112,6 +113,51 @@ export function buildChatStopHook(opts) {
|
|
|
112
113
|
});
|
|
113
114
|
return {};
|
|
114
115
|
}
|
|
116
|
+
const unfinishedTodos = opts.runState?.todo
|
|
117
|
+
? opts.runState.todo.pending + opts.runState.todo.inProgress
|
|
118
|
+
: 0;
|
|
119
|
+
if (unfinishedTodos > 0) {
|
|
120
|
+
stats.todoContinued += 1;
|
|
121
|
+
const reason = `TodoWrite still shows ${unfinishedTodos} unfinished item(s). ` +
|
|
122
|
+
'Keep working until the todo list is complete, or explain the blocker and include a concise Completed/Pending manifest before ending.';
|
|
123
|
+
logger.info({
|
|
124
|
+
runId: opts.runId,
|
|
125
|
+
unfinishedTodos,
|
|
126
|
+
lastMessagePreview,
|
|
127
|
+
}, 'Stop hook re-prompting model because live RunState has unfinished todos');
|
|
128
|
+
opts.onDecision?.({
|
|
129
|
+
decision: 'continue',
|
|
130
|
+
reason,
|
|
131
|
+
lastMessagePreview,
|
|
132
|
+
stopHookActive: false,
|
|
133
|
+
});
|
|
134
|
+
return {
|
|
135
|
+
decision: 'block',
|
|
136
|
+
reason,
|
|
137
|
+
};
|
|
138
|
+
}
|
|
139
|
+
const successfulSideEffects = opts.runState?.successfulSideEffects.length ?? 0;
|
|
140
|
+
if (successfulSideEffects > 0 && !hasCompletedManifest(lastMsg)) {
|
|
141
|
+
stats.manifestRequired += 1;
|
|
142
|
+
const reason = `You completed ${successfulSideEffects} external side effect(s), but your final message does not include the required ` +
|
|
143
|
+
'`✅ **Completed**` manifest. Confirm what was done before ending.\n\n' +
|
|
144
|
+
summarizeRunStateForManifest(opts.runState);
|
|
145
|
+
logger.info({
|
|
146
|
+
runId: opts.runId,
|
|
147
|
+
successfulSideEffects,
|
|
148
|
+
lastMessagePreview,
|
|
149
|
+
}, 'Stop hook requiring completion manifest for successful side effects');
|
|
150
|
+
opts.onDecision?.({
|
|
151
|
+
decision: 'continue',
|
|
152
|
+
reason,
|
|
153
|
+
lastMessagePreview,
|
|
154
|
+
stopHookActive: false,
|
|
155
|
+
});
|
|
156
|
+
return {
|
|
157
|
+
decision: 'block',
|
|
158
|
+
reason,
|
|
159
|
+
};
|
|
160
|
+
}
|
|
115
161
|
// ── Detection: did the model say it would continue? ──────────
|
|
116
162
|
const continuationMatched = CONTINUATION_SIGNALS.some((rx) => rx.test(lastMsg));
|
|
117
163
|
if (!continuationMatched) {
|
package/dist/agent/run-agent.js
CHANGED
|
@@ -99,6 +99,7 @@ import { buildGuardHooks } from './tool-output-guard.js';
|
|
|
99
99
|
import { buildDedupHook } from './tool-call-dedup.js';
|
|
100
100
|
import { buildSideEffectIdempotencyHook } from './side-effect-idempotency.js';
|
|
101
101
|
import { buildChatStopHook } from './chat-stop-hook.js';
|
|
102
|
+
import { buildRunStateHooks } from './run-state.js';
|
|
102
103
|
import { buildAgentMap } from './agent-definitions.js';
|
|
103
104
|
import { buildExecutionToolPolicy, } from './execution-policy.js';
|
|
104
105
|
const MCP_SERVER_SCRIPT = path.join(PKG_DIR, 'dist', 'tools', 'mcp-server.js');
|
|
@@ -469,6 +470,13 @@ export async function runAgent(prompt, opts) {
|
|
|
469
470
|
});
|
|
470
471
|
},
|
|
471
472
|
});
|
|
473
|
+
// ── Live RunState hook (1.18.202) ─────────────────────────────────
|
|
474
|
+
// Hook-fed active-run state for Stop decisions. Durable history remains
|
|
475
|
+
// EventLog; this cache exists only while a run is active.
|
|
476
|
+
const runState = buildRunStateHooks({
|
|
477
|
+
runId,
|
|
478
|
+
sessionKey: opts.sessionKey,
|
|
479
|
+
});
|
|
472
480
|
// ── Chat persistence Stop hook (1.18.184, source='chat' only) ─────
|
|
473
481
|
// Keeps chat-initiated multi-step jobs running until they finish.
|
|
474
482
|
// Inspects the model's last assistant message for continuation
|
|
@@ -484,6 +492,7 @@ export async function runAgent(prompt, opts) {
|
|
|
484
492
|
? buildChatStopHook({
|
|
485
493
|
runId,
|
|
486
494
|
...(opts.abortSignal ? { abortSignal: opts.abortSignal } : {}),
|
|
495
|
+
runState: runState.state,
|
|
487
496
|
onDecision: (info) => {
|
|
488
497
|
if (info.decision !== 'continue')
|
|
489
498
|
return;
|
|
@@ -504,6 +513,10 @@ export async function runAgent(prompt, opts) {
|
|
|
504
513
|
const existing = mergedHooks[evt] ?? [];
|
|
505
514
|
mergedHooks[evt] = [...existing, ...matchers];
|
|
506
515
|
}
|
|
516
|
+
for (const [evt, matchers] of Object.entries(runState.hooks)) {
|
|
517
|
+
const existing = mergedHooks[evt] ?? [];
|
|
518
|
+
mergedHooks[evt] = [...existing, ...matchers];
|
|
519
|
+
}
|
|
507
520
|
for (const [evt, matchers] of Object.entries(dedup.hooks)) {
|
|
508
521
|
const existing = mergedHooks[evt] ?? [];
|
|
509
522
|
mergedHooks[evt] = [...existing, ...matchers];
|
|
@@ -861,6 +874,13 @@ export async function runAgent(prompt, opts) {
|
|
|
861
874
|
recorded: idempotency.stats.recorded,
|
|
862
875
|
failedNotRecorded: idempotency.stats.failedNotRecorded,
|
|
863
876
|
} : undefined,
|
|
877
|
+
runState: runState.stats.inspected > 0 ? {
|
|
878
|
+
inspected: runState.stats.inspected,
|
|
879
|
+
sideEffects: runState.stats.sideEffects,
|
|
880
|
+
todosUpdated: runState.stats.todosUpdated,
|
|
881
|
+
successfulSideEffects: runState.state.successfulSideEffects.length,
|
|
882
|
+
failedSideEffects: runState.state.failedSideEffects.length,
|
|
883
|
+
} : undefined,
|
|
864
884
|
}, 'runAgent: query complete');
|
|
865
885
|
// PRD §6 Phase 4e: subagent transcript backfill (Path C). The SDK persists
|
|
866
886
|
// every subagent's full message stream to ~/.claude/projects/<encoded-cwd>/
|
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Live per-run state fed by SDK hooks.
|
|
3
|
+
*
|
|
4
|
+
* Event logs are the durable source of truth. RunState is the hot-path cache:
|
|
5
|
+
* enough structured state for Stop hooks to make one good decision before the
|
|
6
|
+
* run ends, without reading JSONL from disk or expanding prompts.
|
|
7
|
+
*/
|
|
8
|
+
import type { HookCallbackMatcher, HookEvent } from '@anthropic-ai/claude-agent-sdk';
|
|
9
|
+
export interface RunStateSideEffect {
|
|
10
|
+
toolName: string;
|
|
11
|
+
toolUseId?: string;
|
|
12
|
+
summary: string;
|
|
13
|
+
kind: 'side_effect' | 'unknown';
|
|
14
|
+
successful: boolean;
|
|
15
|
+
successReason?: string;
|
|
16
|
+
statusCode?: number;
|
|
17
|
+
ts: string;
|
|
18
|
+
}
|
|
19
|
+
export interface RunStateTodoSnapshot {
|
|
20
|
+
total: number;
|
|
21
|
+
pending: number;
|
|
22
|
+
inProgress: number;
|
|
23
|
+
completed: number;
|
|
24
|
+
}
|
|
25
|
+
export interface LiveRunState {
|
|
26
|
+
runId: string;
|
|
27
|
+
sessionKey?: string;
|
|
28
|
+
startedAt: number;
|
|
29
|
+
lastUpdatedAt: number;
|
|
30
|
+
readOnlyToolCalls: number;
|
|
31
|
+
unknownToolCalls: number;
|
|
32
|
+
totalToolCalls: number;
|
|
33
|
+
successfulSideEffects: RunStateSideEffect[];
|
|
34
|
+
failedSideEffects: RunStateSideEffect[];
|
|
35
|
+
todo?: RunStateTodoSnapshot;
|
|
36
|
+
ended?: {
|
|
37
|
+
reason?: string;
|
|
38
|
+
endedAt: number;
|
|
39
|
+
};
|
|
40
|
+
}
|
|
41
|
+
export interface RunStateStats {
|
|
42
|
+
inspected: number;
|
|
43
|
+
sideEffects: number;
|
|
44
|
+
todosUpdated: number;
|
|
45
|
+
}
|
|
46
|
+
export interface RunStateHookOptions {
|
|
47
|
+
runId: string;
|
|
48
|
+
sessionKey?: string;
|
|
49
|
+
now?: () => number;
|
|
50
|
+
}
|
|
51
|
+
export interface RunStateHookHandles {
|
|
52
|
+
state: LiveRunState;
|
|
53
|
+
hooks: Partial<Record<HookEvent, HookCallbackMatcher[]>>;
|
|
54
|
+
stats: RunStateStats;
|
|
55
|
+
}
|
|
56
|
+
export declare function getRunState(runId: string): LiveRunState | undefined;
|
|
57
|
+
export declare function clearRunState(runId: string): void;
|
|
58
|
+
export declare function buildRunStateHooks(opts: RunStateHookOptions): RunStateHookHandles;
|
|
59
|
+
export declare function hasCompletedManifest(text: string): boolean;
|
|
60
|
+
export declare function summarizeRunStateForManifest(state: LiveRunState): string;
|
|
61
|
+
//# sourceMappingURL=run-state.d.ts.map
|
|
@@ -0,0 +1,164 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Live per-run state fed by SDK hooks.
|
|
3
|
+
*
|
|
4
|
+
* Event logs are the durable source of truth. RunState is the hot-path cache:
|
|
5
|
+
* enough structured state for Stop hooks to make one good decision before the
|
|
6
|
+
* run ends, without reading JSONL from disk or expanding prompts.
|
|
7
|
+
*/
|
|
8
|
+
import { classifyToolCall, isToolResultSuccessful } from './side-effect-classifier.js';
|
|
9
|
+
import { buildSideEffectFingerprint } from './side-effect-idempotency.js';
|
|
10
|
+
const LIVE_RUNS = new Map();
|
|
11
|
+
function nowIso(now) {
|
|
12
|
+
return new Date(now()).toISOString();
|
|
13
|
+
}
|
|
14
|
+
function asRecord(value) {
|
|
15
|
+
return value && typeof value === 'object' && !Array.isArray(value) ? value : undefined;
|
|
16
|
+
}
|
|
17
|
+
function summarizeToolCall(toolName, input) {
|
|
18
|
+
const fp = buildSideEffectFingerprint(toolName, input);
|
|
19
|
+
if (fp)
|
|
20
|
+
return fp.summary;
|
|
21
|
+
if (toolName === 'Bash') {
|
|
22
|
+
const command = asRecord(input)?.command;
|
|
23
|
+
if (typeof command === 'string' && command.trim()) {
|
|
24
|
+
const preview = command.trim().replace(/\s+/g, ' ').slice(0, 96);
|
|
25
|
+
return `Bash mutation: ${preview}`;
|
|
26
|
+
}
|
|
27
|
+
}
|
|
28
|
+
return toolName;
|
|
29
|
+
}
|
|
30
|
+
function readTodoSnapshot(input) {
|
|
31
|
+
const rec = asRecord(input);
|
|
32
|
+
const todos = rec?.todos;
|
|
33
|
+
if (!Array.isArray(todos))
|
|
34
|
+
return undefined;
|
|
35
|
+
let pending = 0;
|
|
36
|
+
let inProgress = 0;
|
|
37
|
+
let completed = 0;
|
|
38
|
+
for (const item of todos) {
|
|
39
|
+
const status = asRecord(item)?.status;
|
|
40
|
+
if (status === 'completed')
|
|
41
|
+
completed += 1;
|
|
42
|
+
else if (status === 'in_progress')
|
|
43
|
+
inProgress += 1;
|
|
44
|
+
else
|
|
45
|
+
pending += 1;
|
|
46
|
+
}
|
|
47
|
+
return {
|
|
48
|
+
total: todos.length,
|
|
49
|
+
pending,
|
|
50
|
+
inProgress,
|
|
51
|
+
completed,
|
|
52
|
+
};
|
|
53
|
+
}
|
|
54
|
+
export function getRunState(runId) {
|
|
55
|
+
return LIVE_RUNS.get(runId);
|
|
56
|
+
}
|
|
57
|
+
export function clearRunState(runId) {
|
|
58
|
+
LIVE_RUNS.delete(runId);
|
|
59
|
+
}
|
|
60
|
+
export function buildRunStateHooks(opts) {
|
|
61
|
+
const now = opts.now ?? (() => Date.now());
|
|
62
|
+
const state = {
|
|
63
|
+
runId: opts.runId,
|
|
64
|
+
...(opts.sessionKey ? { sessionKey: opts.sessionKey } : {}),
|
|
65
|
+
startedAt: now(),
|
|
66
|
+
lastUpdatedAt: now(),
|
|
67
|
+
readOnlyToolCalls: 0,
|
|
68
|
+
unknownToolCalls: 0,
|
|
69
|
+
totalToolCalls: 0,
|
|
70
|
+
successfulSideEffects: [],
|
|
71
|
+
failedSideEffects: [],
|
|
72
|
+
};
|
|
73
|
+
const stats = { inspected: 0, sideEffects: 0, todosUpdated: 0 };
|
|
74
|
+
LIVE_RUNS.set(opts.runId, state);
|
|
75
|
+
const postToolUse = async (input) => {
|
|
76
|
+
if (input.hook_event_name !== 'PostToolUse')
|
|
77
|
+
return {};
|
|
78
|
+
const evt = input;
|
|
79
|
+
const toolName = String(evt.tool_name ?? 'unknown');
|
|
80
|
+
stats.inspected += 1;
|
|
81
|
+
state.totalToolCalls += 1;
|
|
82
|
+
state.lastUpdatedAt = now();
|
|
83
|
+
if (toolName === 'TodoWrite') {
|
|
84
|
+
const snapshot = readTodoSnapshot(evt.tool_input);
|
|
85
|
+
if (snapshot) {
|
|
86
|
+
state.todo = snapshot;
|
|
87
|
+
stats.todosUpdated += 1;
|
|
88
|
+
}
|
|
89
|
+
return {};
|
|
90
|
+
}
|
|
91
|
+
const inputRecord = asRecord(evt.tool_input);
|
|
92
|
+
const verdict = classifyToolCall(toolName, inputRecord);
|
|
93
|
+
if (verdict.kind === 'read_only') {
|
|
94
|
+
state.readOnlyToolCalls += 1;
|
|
95
|
+
return {};
|
|
96
|
+
}
|
|
97
|
+
if (verdict.kind === 'unknown') {
|
|
98
|
+
state.unknownToolCalls += 1;
|
|
99
|
+
return {};
|
|
100
|
+
}
|
|
101
|
+
const result = isToolResultSuccessful(evt.tool_response, false);
|
|
102
|
+
const sideEffect = {
|
|
103
|
+
toolName,
|
|
104
|
+
toolUseId: evt.tool_use_id,
|
|
105
|
+
summary: summarizeToolCall(toolName, evt.tool_input),
|
|
106
|
+
kind: 'side_effect',
|
|
107
|
+
successful: result.successful,
|
|
108
|
+
...(result.successful ? { successReason: result.reason } : {}),
|
|
109
|
+
...(result.statusCode !== undefined ? { statusCode: result.statusCode } : {}),
|
|
110
|
+
ts: nowIso(now),
|
|
111
|
+
};
|
|
112
|
+
stats.sideEffects += 1;
|
|
113
|
+
if (result.successful)
|
|
114
|
+
state.successfulSideEffects.push(sideEffect);
|
|
115
|
+
else
|
|
116
|
+
state.failedSideEffects.push(sideEffect);
|
|
117
|
+
return {};
|
|
118
|
+
};
|
|
119
|
+
const sessionEnd = async (input) => {
|
|
120
|
+
if (input.hook_event_name !== 'SessionEnd')
|
|
121
|
+
return {};
|
|
122
|
+
const evt = input;
|
|
123
|
+
state.ended = { reason: String(evt.reason ?? ''), endedAt: now() };
|
|
124
|
+
// Keep the state object alive for closures that already hold it, but drop
|
|
125
|
+
// the global index to prevent stale cross-run reads.
|
|
126
|
+
LIVE_RUNS.delete(opts.runId);
|
|
127
|
+
return {};
|
|
128
|
+
};
|
|
129
|
+
return {
|
|
130
|
+
state,
|
|
131
|
+
hooks: {
|
|
132
|
+
PostToolUse: [{ hooks: [postToolUse] }],
|
|
133
|
+
SessionEnd: [{ hooks: [sessionEnd] }],
|
|
134
|
+
},
|
|
135
|
+
stats,
|
|
136
|
+
};
|
|
137
|
+
}
|
|
138
|
+
export function hasCompletedManifest(text) {
|
|
139
|
+
return /✅\s*\*\*Completed\*\*/.test(text);
|
|
140
|
+
}
|
|
141
|
+
export function summarizeRunStateForManifest(state) {
|
|
142
|
+
const groups = new Map();
|
|
143
|
+
for (const effect of state.successfulSideEffects) {
|
|
144
|
+
const key = effect.summary;
|
|
145
|
+
groups.set(key, (groups.get(key) ?? 0) + 1);
|
|
146
|
+
}
|
|
147
|
+
const completed = Array.from(groups.entries())
|
|
148
|
+
.slice(0, 5)
|
|
149
|
+
.map(([summary, count]) => `- ${count > 1 ? `${count}x ` : ''}${summary}`)
|
|
150
|
+
.join('\n') || '- No side effects recorded';
|
|
151
|
+
const pending = state.todo && (state.todo.pending + state.todo.inProgress) > 0
|
|
152
|
+
? `- TodoWrite: ${state.todo.pending + state.todo.inProgress} unfinished item(s)`
|
|
153
|
+
: '- None known';
|
|
154
|
+
return [
|
|
155
|
+
'Use this concise manifest before ending:',
|
|
156
|
+
'',
|
|
157
|
+
'✅ **Completed**',
|
|
158
|
+
completed,
|
|
159
|
+
'',
|
|
160
|
+
'⚠️ **Pending**',
|
|
161
|
+
pending,
|
|
162
|
+
].join('\n');
|
|
163
|
+
}
|
|
164
|
+
//# sourceMappingURL=run-state.js.map
|
package/dist/gateway/router.js
CHANGED
|
@@ -98,10 +98,21 @@ function contextOverflowFallbackMessage() {
|
|
|
98
98
|
].join('\n');
|
|
99
99
|
}
|
|
100
100
|
function detectOverflowResumeReply(message) {
|
|
101
|
-
|
|
101
|
+
let text = message.trim().toLowerCase().replace(/[.!?]+$/g, '').replace(/\s+/g, ' ').trim();
|
|
102
102
|
if (!text)
|
|
103
103
|
return 'other';
|
|
104
|
-
|
|
104
|
+
// Humans naturally add politeness around the control word. Keep this
|
|
105
|
+
// parser narrow, but do not drop a pending resume just because the owner
|
|
106
|
+
// replied "continue please" instead of the exact token "continue".
|
|
107
|
+
text = text
|
|
108
|
+
.replace(/^(?:please|pls)\s+/, '')
|
|
109
|
+
.replace(/\s+(?:please|pls)$/, '')
|
|
110
|
+
.replace(/^(?:ok|okay|yes|yep|yeah|sure),?\s+/, '')
|
|
111
|
+
.trim();
|
|
112
|
+
if (/^(?:continue|resume|proceed|keep going|carry on|yes|yep|yeah|sure|ok|okay|go|go ahead|do it|run it)$/.test(text)) {
|
|
113
|
+
return 'continue';
|
|
114
|
+
}
|
|
115
|
+
if (/^(?:continue|resume|proceed|keep going|carry on)\s+(?:from\s+(?:there|here|that state|this state)|where you left off|with\s+(?:that|it)|the work)$/.test(text)) {
|
|
105
116
|
return 'continue';
|
|
106
117
|
}
|
|
107
118
|
if (/^(?:done|stop|cancel|abort|no|nope|that's all|that is all|leave it|do not continue|don't continue)\b/.test(text)) {
|