clementine-agent 1.18.183 → 1.18.184
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/agent/assistant.js +15 -2
- package/dist/agent/chat-stop-hook.d.ts +76 -0
- package/dist/agent/chat-stop-hook.js +155 -0
- package/dist/agent/clementine-turn-context.d.ts +102 -0
- package/dist/agent/clementine-turn-context.js +210 -0
- package/dist/agent/run-agent-context.js +35 -0
- package/dist/agent/run-agent.js +50 -1
- package/dist/agent/tool-call-dedup.js +25 -7
- package/dist/gateway/router.d.ts +11 -0
- package/dist/gateway/router.js +114 -2
- package/package.json +1 -1
package/dist/agent/assistant.js
CHANGED
|
@@ -1025,6 +1025,21 @@ export class PersonalAssistant {
|
|
|
1025
1025
|
}
|
|
1026
1026
|
}
|
|
1027
1027
|
// ── System Prompt Builder ─────────────────────────────────────────
|
|
1028
|
+
//
|
|
1029
|
+
// 1.18.184 caveat: the canonical CHAT path no longer reaches this
|
|
1030
|
+
// builder. Chat goes through runAgent (src/agent/run-agent.ts:679)
|
|
1031
|
+
// with `systemPrompt: { type: 'preset', preset: 'claude_code',
|
|
1032
|
+
// append: <buildChatSystemAppend(...)> }`. The recall + trust posture
|
|
1033
|
+
// directives live in `run-agent-context.ts:BEHAVIORAL_POSTURE`.
|
|
1034
|
+
//
|
|
1035
|
+
// This buildSystemPrompt is now invoked only from:
|
|
1036
|
+
// - plan-step path (`processPlanStep` ~line 3396)
|
|
1037
|
+
// - auto-memory extraction Haiku passes (~line 3187)
|
|
1038
|
+
// - cron-reflection Haiku passes
|
|
1039
|
+
//
|
|
1040
|
+
// If you're adding chat-time behavioral guidance, add it to
|
|
1041
|
+
// BEHAVIORAL_POSTURE in run-agent-context.ts. Adding it here will
|
|
1042
|
+
// NOT affect real chat — only the legacy Haiku/plan-step paths.
|
|
1028
1043
|
buildSystemPrompt(opts = {}) {
|
|
1029
1044
|
const { isHeartbeat = false, cronTier = null, retrievalContext = '', profile = null, sessionKey = null, model = null, verboseLevel, intentClassification = null, contextTier = 'full', toolsAvailable = true, composioConnectedSlugs = [] } = opts;
|
|
1030
1045
|
const isAutonomous = isHeartbeat || cronTier !== null;
|
|
@@ -1208,8 +1223,6 @@ Obsidian vault with YAML frontmatter, [[wikilinks]], #tags.
|
|
|
1208
1223
|
**Remembering:** Durable facts → memory_write(action="update_memory"). Daily context → note_take / memory_write(action="append_daily"). New person → note_create. New task → task_add.
|
|
1209
1224
|
Save important facts immediately; a background agent also extracts after each exchange.
|
|
1210
1225
|
|
|
1211
|
-
**Recalling — REQUIRED behavior:** When the user references past work you don't have in immediate context — a URL, a deployment, a file you created, a task or background job you ran, a person/project/domain name you don't have inline — call \`memory_search\` (or \`transcript_search\` for chat history) BEFORE asking the user to provide it and BEFORE replying that you have no record. Saying "I don't see any record of that" without having searched is a memory failure, not an honest answer. Background tasks, cron runs, deployments, and prior chat turns are all in the SQLite memory store with dense embeddings — semantic search will surface them even when the wording doesn't match exactly.
|
|
1212
|
-
|
|
1213
1226
|
## Self-Configuration (never tell ${owner} to edit a config file)
|
|
1214
1227
|
|
|
1215
1228
|
Clementine is self-configuring. Every credential, every integration, every tool permission can be set by calling a tool — no hand-editing.
|
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* chat-stop-hook — Stop hook that keeps chat-initiated multi-step jobs
|
|
3
|
+
* running until they finish OR the user explicitly stops them.
|
|
4
|
+
*
|
|
5
|
+
* Why this exists (1.18.184)
|
|
6
|
+
* ──────────────────────────
|
|
7
|
+
* Before this hook, the SDK loop ended whenever the model produced
|
|
8
|
+
* a final assistant message — even when the model had clearly stated
|
|
9
|
+
* "next, I'll do X" but then stopped. From the user's POV: "I asked her
|
|
10
|
+
* to draft 3 emails and she only drafted 1, no explanation." The model
|
|
11
|
+
* was being prematurely terminated by the SDK's default Stop behavior.
|
|
12
|
+
*
|
|
13
|
+
* The canonical SDK pattern for long-running agentic loops is a Stop
|
|
14
|
+
* hook that:
|
|
15
|
+
* 1. Detects when the model said it would continue but didn't.
|
|
16
|
+
* 2. Returns `decision: 'block'` with a `reason` that re-prompts
|
|
17
|
+
* the model to keep going.
|
|
18
|
+
* 3. NEVER blocks if Stop hooks have already fired this run
|
|
19
|
+
* (`input.stop_hook_active === true`) — that's the SDK's
|
|
20
|
+
* anti-infinite-loop guardrail and we honor it.
|
|
21
|
+
* 4. NEVER blocks if the user has aborted (abortSignal fired) —
|
|
22
|
+
* user intent always wins.
|
|
23
|
+
*
|
|
24
|
+
* What this hook does NOT do
|
|
25
|
+
* ──────────────────────────
|
|
26
|
+
* It does NOT force every chat turn to keep going. The default path
|
|
27
|
+
* is to LET THE MODEL FINISH. The hook only intervenes when:
|
|
28
|
+
* (a) the last assistant message contains a clear "more work to do"
|
|
29
|
+
* signal (e.g., "next, I'll", "step 2:", "I'll continue with"), AND
|
|
30
|
+
* (b) the user has NOT issued a stop / cancel, AND
|
|
31
|
+
* (c) we haven't already re-blocked this run.
|
|
32
|
+
*
|
|
33
|
+
* Conservative by design: better to let one job finish slightly short
|
|
34
|
+
* than to spin forever. If a job needs to run long, the user can
|
|
35
|
+
* always re-ask.
|
|
36
|
+
*
|
|
37
|
+
* Aligned with Anthropic SDK best practices: Stop hooks fire even
|
|
38
|
+
* under `bypassPermissions`, which is the canonical lever for
|
|
39
|
+
* "agentic loop that keeps going." See `sdk.d.ts:5483-5492` for the
|
|
40
|
+
* `StopHookInput` shape including the `stop_hook_active` guard.
|
|
41
|
+
*/
|
|
42
|
+
import type { HookCallbackMatcher, HookEvent } from '@anthropic-ai/claude-agent-sdk';
|
|
43
|
+
export interface StopHookOptions {
|
|
44
|
+
/** Stable run identifier for telemetry. */
|
|
45
|
+
runId: string;
|
|
46
|
+
/** Optional abort signal to honor — if it fires, the hook will
|
|
47
|
+
* never re-block. User-initiated stops always win. */
|
|
48
|
+
abortSignal?: AbortSignal;
|
|
49
|
+
/** Optional callback fired on every decision. Useful for the
|
|
50
|
+
* dashboard "What Clementine sees this turn" panel. */
|
|
51
|
+
onDecision?: (info: {
|
|
52
|
+
decision: 'pass' | 'continue';
|
|
53
|
+
reason?: string;
|
|
54
|
+
lastMessagePreview: string;
|
|
55
|
+
stopHookActive: boolean;
|
|
56
|
+
}) => void;
|
|
57
|
+
}
|
|
58
|
+
export interface StopHookStats {
|
|
59
|
+
/** Total Stop events inspected. */
|
|
60
|
+
inspected: number;
|
|
61
|
+
/** Stop events that passed through (model finished cleanly). */
|
|
62
|
+
passed: number;
|
|
63
|
+
/** Stop events where we re-prompted the model to continue. */
|
|
64
|
+
continued: number;
|
|
65
|
+
}
|
|
66
|
+
export interface StopHookHandles {
|
|
67
|
+
/** Hook map suitable for SDK `query({ options: { hooks } })`. */
|
|
68
|
+
hooks: Partial<Record<HookEvent, HookCallbackMatcher[]>>;
|
|
69
|
+
/** Aggregated telemetry — read after the run completes. */
|
|
70
|
+
stats: StopHookStats;
|
|
71
|
+
}
|
|
72
|
+
/**
|
|
73
|
+
* Build a Stop hook for a chat-initiated agentic run.
|
|
74
|
+
*/
|
|
75
|
+
export declare function buildChatStopHook(opts: StopHookOptions): StopHookHandles;
|
|
76
|
+
//# sourceMappingURL=chat-stop-hook.d.ts.map
|
|
@@ -0,0 +1,155 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* chat-stop-hook — Stop hook that keeps chat-initiated multi-step jobs
|
|
3
|
+
* running until they finish OR the user explicitly stops them.
|
|
4
|
+
*
|
|
5
|
+
* Why this exists (1.18.184)
|
|
6
|
+
* ──────────────────────────
|
|
7
|
+
* Before this hook, the SDK loop ended whenever the model produced
|
|
8
|
+
* a final assistant message — even when the model had clearly stated
|
|
9
|
+
* "next, I'll do X" but then stopped. From the user's POV: "I asked her
|
|
10
|
+
* to draft 3 emails and she only drafted 1, no explanation." The model
|
|
11
|
+
* was being prematurely terminated by the SDK's default Stop behavior.
|
|
12
|
+
*
|
|
13
|
+
* The canonical SDK pattern for long-running agentic loops is a Stop
|
|
14
|
+
* hook that:
|
|
15
|
+
* 1. Detects when the model said it would continue but didn't.
|
|
16
|
+
* 2. Returns `decision: 'block'` with a `reason` that re-prompts
|
|
17
|
+
* the model to keep going.
|
|
18
|
+
* 3. NEVER blocks if Stop hooks have already fired this run
|
|
19
|
+
* (`input.stop_hook_active === true`) — that's the SDK's
|
|
20
|
+
* anti-infinite-loop guardrail and we honor it.
|
|
21
|
+
* 4. NEVER blocks if the user has aborted (abortSignal fired) —
|
|
22
|
+
* user intent always wins.
|
|
23
|
+
*
|
|
24
|
+
* What this hook does NOT do
|
|
25
|
+
* ──────────────────────────
|
|
26
|
+
* It does NOT force every chat turn to keep going. The default path
|
|
27
|
+
* is to LET THE MODEL FINISH. The hook only intervenes when:
|
|
28
|
+
* (a) the last assistant message contains a clear "more work to do"
|
|
29
|
+
* signal (e.g., "next, I'll", "step 2:", "I'll continue with"), AND
|
|
30
|
+
* (b) the user has NOT issued a stop / cancel, AND
|
|
31
|
+
* (c) we haven't already re-blocked this run.
|
|
32
|
+
*
|
|
33
|
+
* Conservative by design: better to let one job finish slightly short
|
|
34
|
+
* than to spin forever. If a job needs to run long, the user can
|
|
35
|
+
* always re-ask.
|
|
36
|
+
*
|
|
37
|
+
* Aligned with Anthropic SDK best practices: Stop hooks fire even
|
|
38
|
+
* under `bypassPermissions`, which is the canonical lever for
|
|
39
|
+
* "agentic loop that keeps going." See `sdk.d.ts:5483-5492` for the
|
|
40
|
+
* `StopHookInput` shape including the `stop_hook_active` guard.
|
|
41
|
+
*/
|
|
42
|
+
import pino from 'pino';
|
|
43
|
+
const logger = pino({ name: 'clementine.chat-stop-hook' });
|
|
44
|
+
/**
|
|
45
|
+
* Phrases in the last assistant message that signal "more work to do."
|
|
46
|
+
* Conservative — we only continue when the model EXPLICITLY said it
|
|
47
|
+
* would. Vague endings ("Let me know if you need anything else.") do
|
|
48
|
+
* NOT trigger; those are clean completions.
|
|
49
|
+
*/
|
|
50
|
+
const CONTINUATION_SIGNALS = [
|
|
51
|
+
// Explicit "next step" / sequencing
|
|
52
|
+
/\bnext,?\s+(?:i'?ll|i\s+will|let'?s|i'?m\s+going\s+to)\b/i,
|
|
53
|
+
/\bstep\s+\d+:/i,
|
|
54
|
+
/\bphase\s+\d+:/i,
|
|
55
|
+
/\bi'?ll\s+(?:now|then|next)\b/i,
|
|
56
|
+
/\bi\s+will\s+(?:now|then|next)\b/i,
|
|
57
|
+
// "Continuing with" / "moving on"
|
|
58
|
+
/\bcontinuing\s+(?:with|to)\b/i,
|
|
59
|
+
/\bmoving\s+on\s+to\b/i,
|
|
60
|
+
/\bi'?ll\s+continue\s+(?:by|with)\b/i,
|
|
61
|
+
// Promised remainder of a list
|
|
62
|
+
/\b(?:second|third|fourth|fifth|remaining|rest)\s+(?:email|email\.?|draft|item|step)/i,
|
|
63
|
+
// "After this, I'll"
|
|
64
|
+
/\bafter\s+(?:this|that),?\s+i'?ll\b/i,
|
|
65
|
+
];
|
|
66
|
+
/**
|
|
67
|
+
* Build a Stop hook for a chat-initiated agentic run.
|
|
68
|
+
*/
|
|
69
|
+
export function buildChatStopHook(opts) {
|
|
70
|
+
const stats = { inspected: 0, passed: 0, continued: 0 };
|
|
71
|
+
const stopHook = async (input) => {
|
|
72
|
+
if (input.hook_event_name !== 'Stop')
|
|
73
|
+
return {};
|
|
74
|
+
const evt = input;
|
|
75
|
+
stats.inspected += 1;
|
|
76
|
+
const lastMsg = evt.last_assistant_message ?? '';
|
|
77
|
+
const lastMessagePreview = lastMsg.slice(0, 160).replace(/\s+/g, ' ').trim();
|
|
78
|
+
// ── Guard 1: anti-infinite-loop ───────────────────────────────
|
|
79
|
+
// stop_hook_active is true if Stop hooks have ALREADY fired this
|
|
80
|
+
// run. SDK uses this exact field to prevent us re-blocking
|
|
81
|
+
// forever. If it's set, we must pass through.
|
|
82
|
+
if (evt.stop_hook_active) {
|
|
83
|
+
stats.passed += 1;
|
|
84
|
+
logger.debug({
|
|
85
|
+
runId: opts.runId,
|
|
86
|
+
reason: 'stop_hook_active',
|
|
87
|
+
lastMessagePreview,
|
|
88
|
+
}, 'Stop hook passing — already active');
|
|
89
|
+
opts.onDecision?.({
|
|
90
|
+
decision: 'pass',
|
|
91
|
+
reason: 'stop_hook_active',
|
|
92
|
+
lastMessagePreview,
|
|
93
|
+
stopHookActive: true,
|
|
94
|
+
});
|
|
95
|
+
return {};
|
|
96
|
+
}
|
|
97
|
+
// ── Guard 2: user-initiated stop ──────────────────────────────
|
|
98
|
+
// If the abort signal has fired, the user wants out. Never
|
|
99
|
+
// re-block. User intent ALWAYS wins.
|
|
100
|
+
if (opts.abortSignal?.aborted) {
|
|
101
|
+
stats.passed += 1;
|
|
102
|
+
logger.debug({
|
|
103
|
+
runId: opts.runId,
|
|
104
|
+
reason: 'user_aborted',
|
|
105
|
+
lastMessagePreview,
|
|
106
|
+
}, 'Stop hook passing — user aborted');
|
|
107
|
+
opts.onDecision?.({
|
|
108
|
+
decision: 'pass',
|
|
109
|
+
reason: 'user_aborted',
|
|
110
|
+
lastMessagePreview,
|
|
111
|
+
stopHookActive: false,
|
|
112
|
+
});
|
|
113
|
+
return {};
|
|
114
|
+
}
|
|
115
|
+
// ── Detection: did the model say it would continue? ──────────
|
|
116
|
+
const continuationMatched = CONTINUATION_SIGNALS.some((rx) => rx.test(lastMsg));
|
|
117
|
+
if (!continuationMatched) {
|
|
118
|
+
// No continuation signal — let the model finish.
|
|
119
|
+
stats.passed += 1;
|
|
120
|
+
opts.onDecision?.({
|
|
121
|
+
decision: 'pass',
|
|
122
|
+
reason: 'clean_completion',
|
|
123
|
+
lastMessagePreview,
|
|
124
|
+
stopHookActive: false,
|
|
125
|
+
});
|
|
126
|
+
return {};
|
|
127
|
+
}
|
|
128
|
+
// ── Re-prompt: keep going ──────────────────────────────────────
|
|
129
|
+
stats.continued += 1;
|
|
130
|
+
const reason = 'You said you would continue with more work but the loop is about to end. ' +
|
|
131
|
+
'Keep going — finish the remaining steps you outlined. ' +
|
|
132
|
+
'If you genuinely cannot continue (waiting on external input, hit a hard error, etc.), say so explicitly in your next message so the owner knows where you stopped.';
|
|
133
|
+
logger.info({
|
|
134
|
+
runId: opts.runId,
|
|
135
|
+
lastMessagePreview,
|
|
136
|
+
}, 'Stop hook re-prompting model to continue work it announced');
|
|
137
|
+
opts.onDecision?.({
|
|
138
|
+
decision: 'continue',
|
|
139
|
+
reason,
|
|
140
|
+
lastMessagePreview,
|
|
141
|
+
stopHookActive: false,
|
|
142
|
+
});
|
|
143
|
+
return {
|
|
144
|
+
decision: 'block',
|
|
145
|
+
reason,
|
|
146
|
+
};
|
|
147
|
+
};
|
|
148
|
+
return {
|
|
149
|
+
hooks: {
|
|
150
|
+
Stop: [{ hooks: [stopHook] }],
|
|
151
|
+
},
|
|
152
|
+
stats,
|
|
153
|
+
};
|
|
154
|
+
}
|
|
155
|
+
//# sourceMappingURL=chat-stop-hook.js.map
|
|
@@ -0,0 +1,102 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* clementine-turn-context — the volatile per-turn context block that
|
|
3
|
+
* reconstitutes Clementine's live awareness on every chat turn.
|
|
4
|
+
*
|
|
5
|
+
* Why this exists (1.18.184)
|
|
6
|
+
* ──────────────────────────
|
|
7
|
+
* The modern chat path's system prompt is the SDK's `claude_code`
|
|
8
|
+
* preset + `buildChatSystemAppend()` (run-agent-context.ts). That gives
|
|
9
|
+
* Clementine her identity (SOUL) and her hand-curated long-term memory
|
|
10
|
+
* (MEMORY.md), but it is STATIC across turns by design — anything that
|
|
11
|
+
* varies per turn must NOT live there or it would invalidate the
|
|
12
|
+
* Anthropic prompt cache.
|
|
13
|
+
*
|
|
14
|
+
* Anything volatile — what's true right now, what just happened, what
|
|
15
|
+
* the SQLite memory store has relevant to the current message — lives
|
|
16
|
+
* here, in a block prepended to the user's message. The SDK treats
|
|
17
|
+
* that as turn input, not as system prompt, so it doesn't break cache
|
|
18
|
+
* and it gives the model fresh context every turn.
|
|
19
|
+
*
|
|
20
|
+
* What we put in the block
|
|
21
|
+
* ────────────────────────
|
|
22
|
+
* 1. Retrieved memory hits from the SQLite store (semantic + FTS),
|
|
23
|
+
* scored against the user's current message. The single highest-
|
|
24
|
+
* leverage section — this is how persistent memory of EVERYTHING
|
|
25
|
+
* actually reaches the model.
|
|
26
|
+
* 2. Recent background-task headlines (last 24h, terminal status only)
|
|
27
|
+
* so the model knows what work just completed without re-asking.
|
|
28
|
+
* 3. Live state — current date/time + channel/identity framing.
|
|
29
|
+
* 4. Extension points for the deeper learning subsystems (decision-
|
|
30
|
+
* reflection, skill-quality, insight-engine, seed-user-model,
|
|
31
|
+
* goal-evaluator) — each one is a labeled section that returns
|
|
32
|
+
* empty today and can be wired in a follow-up ship without
|
|
33
|
+
* re-architecting.
|
|
34
|
+
*
|
|
35
|
+
* Hard cap on total block size — see MAX_BLOCK_CHARS. Anthropic's prompt
|
|
36
|
+
* cache benefit dies if the volatile block is larger than the cacheable
|
|
37
|
+
* prefix, so keep this tight.
|
|
38
|
+
*
|
|
39
|
+
* Aligned with Anthropic SDK best practices: per-turn dynamic context
|
|
40
|
+
* in the USER message, NOT in the system prompt. See the SDK reference
|
|
41
|
+
* note on prompt caching boundaries.
|
|
42
|
+
*/
|
|
43
|
+
import type { BackgroundTask } from '../types.js';
|
|
44
|
+
export interface BuildTurnContextOptions {
|
|
45
|
+
/** The user's current message — used as the query for retrieved memory. */
|
|
46
|
+
userMessage: string;
|
|
47
|
+
/** Session key for the active chat. Used for log breadcrumbs and to
|
|
48
|
+
* scope future per-session reads (currently unused; searchContext
|
|
49
|
+
* is intentionally cross-session for single-owner installs). */
|
|
50
|
+
sessionKey: string;
|
|
51
|
+
/** Where the user is reaching Clementine from. Surfaces in the
|
|
52
|
+
* identity framing block. Examples: "discord-dm", "dashboard",
|
|
53
|
+
* "slack-channel", "chat". */
|
|
54
|
+
channel?: string;
|
|
55
|
+
/** Owner-facing name (display name, not slug). When set, used in
|
|
56
|
+
* the identity framing block. */
|
|
57
|
+
ownerName?: string | null;
|
|
58
|
+
/** Active hired-agent profile if running as one. Affects the
|
|
59
|
+
* identity framing — "you are talking to Sasha right now," not
|
|
60
|
+
* "you are Clementine". */
|
|
61
|
+
profileName?: string | null;
|
|
62
|
+
/** Read-only memory store handle. When absent, retrieved-memory
|
|
63
|
+
* section is skipped — the rest still renders. */
|
|
64
|
+
memoryStore?: {
|
|
65
|
+
searchContext?: (query: string, opts?: {
|
|
66
|
+
limit?: number;
|
|
67
|
+
}) => Array<{
|
|
68
|
+
source_file?: string;
|
|
69
|
+
section?: string;
|
|
70
|
+
content: string;
|
|
71
|
+
score?: number;
|
|
72
|
+
}>;
|
|
73
|
+
} | null;
|
|
74
|
+
/** Optional override: synchronous read of recent terminal-state bg
|
|
75
|
+
* tasks. Defaults to a one-time module-cached listBackgroundTasks
|
|
76
|
+
* import (lazy, since not all callers have one). */
|
|
77
|
+
listBackgroundTasks?: (filter: {
|
|
78
|
+
status?: BackgroundTask['status'];
|
|
79
|
+
}) => BackgroundTask[];
|
|
80
|
+
/** Clock injection for tests. Defaults to Date.now(). */
|
|
81
|
+
now?: () => number;
|
|
82
|
+
}
|
|
83
|
+
export interface BuildTurnContextResult {
|
|
84
|
+
/** The full ready-to-prepend context block, INCLUDING outer
|
|
85
|
+
* `[Context...]\n...\n[/Context]\n\n` fence. Empty string when no
|
|
86
|
+
* sections produced output (e.g., builder sessions or completely
|
|
87
|
+
* empty stores) — caller can treat empty as "no prefix needed". */
|
|
88
|
+
block: string;
|
|
89
|
+
/** Telemetry — which sections contributed, for the dashboard
|
|
90
|
+
* "what Clementine sees this turn" panel. */
|
|
91
|
+
sections: {
|
|
92
|
+
retrievedMemory: number;
|
|
93
|
+
recentBgTasks: number;
|
|
94
|
+
liveState: boolean;
|
|
95
|
+
identityFrame: boolean;
|
|
96
|
+
};
|
|
97
|
+
/** Final character count of the block. Useful for logging + the
|
|
98
|
+
* Anthropic prompt-cache-health analysis. */
|
|
99
|
+
totalChars: number;
|
|
100
|
+
}
|
|
101
|
+
export declare function buildClementineTurnContext(opts: BuildTurnContextOptions): BuildTurnContextResult;
|
|
102
|
+
//# sourceMappingURL=clementine-turn-context.d.ts.map
|
|
@@ -0,0 +1,210 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* clementine-turn-context — the volatile per-turn context block that
|
|
3
|
+
* reconstitutes Clementine's live awareness on every chat turn.
|
|
4
|
+
*
|
|
5
|
+
* Why this exists (1.18.184)
|
|
6
|
+
* ──────────────────────────
|
|
7
|
+
* The modern chat path's system prompt is the SDK's `claude_code`
|
|
8
|
+
* preset + `buildChatSystemAppend()` (run-agent-context.ts). That gives
|
|
9
|
+
* Clementine her identity (SOUL) and her hand-curated long-term memory
|
|
10
|
+
* (MEMORY.md), but it is STATIC across turns by design — anything that
|
|
11
|
+
* varies per turn must NOT live there or it would invalidate the
|
|
12
|
+
* Anthropic prompt cache.
|
|
13
|
+
*
|
|
14
|
+
* Anything volatile — what's true right now, what just happened, what
|
|
15
|
+
* the SQLite memory store has relevant to the current message — lives
|
|
16
|
+
* here, in a block prepended to the user's message. The SDK treats
|
|
17
|
+
* that as turn input, not as system prompt, so it doesn't break cache
|
|
18
|
+
* and it gives the model fresh context every turn.
|
|
19
|
+
*
|
|
20
|
+
* What we put in the block
|
|
21
|
+
* ────────────────────────
|
|
22
|
+
* 1. Retrieved memory hits from the SQLite store (semantic + FTS),
|
|
23
|
+
* scored against the user's current message. The single highest-
|
|
24
|
+
* leverage section — this is how persistent memory of EVERYTHING
|
|
25
|
+
* actually reaches the model.
|
|
26
|
+
* 2. Recent background-task headlines (last 24h, terminal status only)
|
|
27
|
+
* so the model knows what work just completed without re-asking.
|
|
28
|
+
* 3. Live state — current date/time + channel/identity framing.
|
|
29
|
+
* 4. Extension points for the deeper learning subsystems (decision-
|
|
30
|
+
* reflection, skill-quality, insight-engine, seed-user-model,
|
|
31
|
+
* goal-evaluator) — each one is a labeled section that returns
|
|
32
|
+
* empty today and can be wired in a follow-up ship without
|
|
33
|
+
* re-architecting.
|
|
34
|
+
*
|
|
35
|
+
* Hard cap on total block size — see MAX_BLOCK_CHARS. Anthropic's prompt
|
|
36
|
+
* cache benefit dies if the volatile block is larger than the cacheable
|
|
37
|
+
* prefix, so keep this tight.
|
|
38
|
+
*
|
|
39
|
+
* Aligned with Anthropic SDK best practices: per-turn dynamic context
|
|
40
|
+
* in the USER message, NOT in the system prompt. See the SDK reference
|
|
41
|
+
* note on prompt caching boundaries.
|
|
42
|
+
*/
|
|
43
|
+
import pino from 'pino';
|
|
44
|
+
const logger = pino({ name: 'clementine.turn-context' });
|
|
45
|
+
// ── Tunables ──────────────────────────────────────────────────────────
|
|
46
|
+
/** Hard cap on the entire block. Keep volatile content small so the
|
|
47
|
+
* cacheable prefix stays larger than the dynamic delta. */
|
|
48
|
+
const MAX_BLOCK_CHARS = 4_000;
|
|
49
|
+
/** Per-section caps so any one section can't crowd out the others. */
|
|
50
|
+
const MAX_MEMORY_HITS = 6;
|
|
51
|
+
const MAX_MEMORY_HIT_CHARS = 320;
|
|
52
|
+
const MAX_BG_TASKS = 3;
|
|
53
|
+
const MAX_BG_TASK_LINE_CHARS = 200;
|
|
54
|
+
const RECENT_BG_WINDOW_MS = 24 * 60 * 60 * 1000;
|
|
55
|
+
// ── The builder ───────────────────────────────────────────────────────
|
|
56
|
+
export function buildClementineTurnContext(opts) {
|
|
57
|
+
const sections = {
|
|
58
|
+
retrievedMemory: 0,
|
|
59
|
+
recentBgTasks: 0,
|
|
60
|
+
liveState: false,
|
|
61
|
+
identityFrame: false,
|
|
62
|
+
};
|
|
63
|
+
const parts = [];
|
|
64
|
+
const nowMs = (opts.now ?? Date.now)();
|
|
65
|
+
const nowDate = new Date(nowMs);
|
|
66
|
+
// ── 1. Retrieved memory hits ──────────────────────────────────────
|
|
67
|
+
// The single most important section. Pulls the top semantic + FTS
|
|
68
|
+
// hits from the SQLite memory store, scored against the user's
|
|
69
|
+
// current message. Without this, Clementine has no automatic recall
|
|
70
|
+
// — she'd have to spontaneously call memory_search every turn.
|
|
71
|
+
if (opts.memoryStore?.searchContext && opts.userMessage.trim().length > 0) {
|
|
72
|
+
try {
|
|
73
|
+
const hits = opts.memoryStore.searchContext(opts.userMessage, {
|
|
74
|
+
limit: MAX_MEMORY_HITS,
|
|
75
|
+
});
|
|
76
|
+
if (hits && hits.length > 0) {
|
|
77
|
+
const lines = ['### Possibly relevant from persistent memory'];
|
|
78
|
+
for (const h of hits.slice(0, MAX_MEMORY_HITS)) {
|
|
79
|
+
const label = h.section
|
|
80
|
+
? h.section
|
|
81
|
+
: (h.source_file ? h.source_file.split('/').pop() ?? h.source_file : 'memory');
|
|
82
|
+
const content = (h.content ?? '').slice(0, MAX_MEMORY_HIT_CHARS).trim();
|
|
83
|
+
if (!content)
|
|
84
|
+
continue;
|
|
85
|
+
lines.push(`- **${label}**: ${content}`);
|
|
86
|
+
sections.retrievedMemory += 1;
|
|
87
|
+
}
|
|
88
|
+
if (sections.retrievedMemory > 0) {
|
|
89
|
+
parts.push(lines.join('\n'));
|
|
90
|
+
}
|
|
91
|
+
}
|
|
92
|
+
}
|
|
93
|
+
catch (err) {
|
|
94
|
+
// Never block on memory failure — log and continue.
|
|
95
|
+
logger.debug({ err, sessionKey: opts.sessionKey }, 'turn-context: searchContext failed (non-fatal)');
|
|
96
|
+
}
|
|
97
|
+
}
|
|
98
|
+
// ── 2. Recent background task headlines ───────────────────────────
|
|
99
|
+
// Last 24h of terminal-state bg tasks. So when the owner asks "what
|
|
100
|
+
// happened with that job?" she knows without re-asking.
|
|
101
|
+
if (opts.listBackgroundTasks) {
|
|
102
|
+
try {
|
|
103
|
+
const TERMINAL = ['done', 'failed', 'interrupted', 'aborted'];
|
|
104
|
+
const recent = [];
|
|
105
|
+
for (const status of TERMINAL) {
|
|
106
|
+
const tasks = opts.listBackgroundTasks({ status });
|
|
107
|
+
for (const task of tasks) {
|
|
108
|
+
const stamp = task.completedAt ?? task.interruptedAt ?? task.startedAt ?? task.createdAt;
|
|
109
|
+
if (!stamp)
|
|
110
|
+
continue;
|
|
111
|
+
if (nowMs - Date.parse(stamp) > RECENT_BG_WINDOW_MS)
|
|
112
|
+
continue;
|
|
113
|
+
recent.push(task);
|
|
114
|
+
}
|
|
115
|
+
}
|
|
116
|
+
// Newest first, capped.
|
|
117
|
+
recent.sort((a, b) => {
|
|
118
|
+
const aStamp = a.completedAt ?? a.startedAt ?? a.createdAt ?? '';
|
|
119
|
+
const bStamp = b.completedAt ?? b.startedAt ?? b.createdAt ?? '';
|
|
120
|
+
return bStamp.localeCompare(aStamp);
|
|
121
|
+
});
|
|
122
|
+
if (recent.length > 0) {
|
|
123
|
+
const lines = ['### Recently completed background work (last 24h)'];
|
|
124
|
+
for (const task of recent.slice(0, MAX_BG_TASKS)) {
|
|
125
|
+
const promptPreview = (task.prompt ?? '').slice(0, 80).replace(/\s+/g, ' ').trim();
|
|
126
|
+
const tail = task.status === 'done'
|
|
127
|
+
? (task.result ?? task.deliverableNote ?? 'done').slice(0, 100).replace(/\s+/g, ' ').trim()
|
|
128
|
+
: (task.error ?? task.status).slice(0, 100).replace(/\s+/g, ' ').trim();
|
|
129
|
+
const line = `- **${task.status}**: ${promptPreview} → ${tail}`;
|
|
130
|
+
lines.push(line.slice(0, MAX_BG_TASK_LINE_CHARS));
|
|
131
|
+
sections.recentBgTasks += 1;
|
|
132
|
+
}
|
|
133
|
+
if (sections.recentBgTasks > 0) {
|
|
134
|
+
parts.push(lines.join('\n'));
|
|
135
|
+
}
|
|
136
|
+
}
|
|
137
|
+
}
|
|
138
|
+
catch (err) {
|
|
139
|
+
logger.debug({ err, sessionKey: opts.sessionKey }, 'turn-context: listBackgroundTasks failed (non-fatal)');
|
|
140
|
+
}
|
|
141
|
+
}
|
|
142
|
+
// ── 3. Identity framing ───────────────────────────────────────────
|
|
143
|
+
// "Who is the user, where are they reaching you, which agent are
|
|
144
|
+
// you running as." Anchors the model's voice + addressing.
|
|
145
|
+
const identityLine = buildIdentityLine(opts);
|
|
146
|
+
if (identityLine) {
|
|
147
|
+
parts.push(`### Right now\n${identityLine}`);
|
|
148
|
+
sections.identityFrame = true;
|
|
149
|
+
}
|
|
150
|
+
// ── 4. Live state ─────────────────────────────────────────────────
|
|
151
|
+
// Current date/time so the model never says "I don't know what
|
|
152
|
+
// today is." Cheap, high-signal.
|
|
153
|
+
const liveLine = `Current time: ${nowDate.toISOString()} (UTC)`;
|
|
154
|
+
if (sections.identityFrame) {
|
|
155
|
+
// Fold into the same "Right now" section to avoid an extra header.
|
|
156
|
+
parts[parts.length - 1] = `${parts[parts.length - 1]}\n${liveLine}`;
|
|
157
|
+
}
|
|
158
|
+
else {
|
|
159
|
+
parts.push(`### Right now\n${liveLine}`);
|
|
160
|
+
}
|
|
161
|
+
sections.liveState = true;
|
|
162
|
+
// ── 5. Extension points for deeper learning subsystems ───────────
|
|
163
|
+
// These are intentionally empty today. The architecture is set up
|
|
164
|
+
// so adding a new subsystem = adding a new builder function +
|
|
165
|
+
// calling it here. Each follow-up ship can wire one at a time
|
|
166
|
+
// without re-touching the rest of the module.
|
|
167
|
+
//
|
|
168
|
+
// TODO(1.18.185+): wire these in:
|
|
169
|
+
// - decision-reflection: latest formatReflectionSummary() if <24h old
|
|
170
|
+
// - skill-quality: skills flagged 'underperforming' or 'stale'
|
|
171
|
+
// - insight-engine: most recent generated insights not yet ack'd
|
|
172
|
+
// - seed-user-model: latest persisted snapshot of the owner profile
|
|
173
|
+
// - goal-evaluator: active goals and last 3 goal-check results
|
|
174
|
+
//
|
|
175
|
+
// For each, the pattern is: read fast, cap output, log non-fatally on error.
|
|
176
|
+
if (parts.length === 0) {
|
|
177
|
+
return { block: '', sections, totalChars: 0 };
|
|
178
|
+
}
|
|
179
|
+
const body = parts.join('\n\n');
|
|
180
|
+
// Hard cap on the whole block to protect cache health.
|
|
181
|
+
const truncated = body.length > MAX_BLOCK_CHARS
|
|
182
|
+
? body.slice(0, MAX_BLOCK_CHARS - 3) + '...'
|
|
183
|
+
: body;
|
|
184
|
+
// Mirror the existing securityAnnotation envelope shape so the chat
|
|
185
|
+
// path can concatenate cleanly.
|
|
186
|
+
const block = `[Context — read this for continuity, then respond to the user message below]\n${truncated}\n[/Context]\n\n`;
|
|
187
|
+
return {
|
|
188
|
+
block,
|
|
189
|
+
sections,
|
|
190
|
+
totalChars: block.length,
|
|
191
|
+
};
|
|
192
|
+
}
|
|
193
|
+
// ── Helpers ───────────────────────────────────────────────────────────
|
|
194
|
+
function buildIdentityLine(opts) {
|
|
195
|
+
const parts = [];
|
|
196
|
+
if (opts.ownerName) {
|
|
197
|
+
parts.push(`You're talking to ${opts.ownerName}`);
|
|
198
|
+
}
|
|
199
|
+
if (opts.channel) {
|
|
200
|
+
parts.push(`via ${opts.channel}`);
|
|
201
|
+
}
|
|
202
|
+
if (opts.profileName) {
|
|
203
|
+
parts.push(`as ${opts.profileName}`);
|
|
204
|
+
}
|
|
205
|
+
else if (parts.length > 0) {
|
|
206
|
+
parts.push('as Clementine');
|
|
207
|
+
}
|
|
208
|
+
return parts.length > 0 ? parts.join(' ') + '.' : '';
|
|
209
|
+
}
|
|
210
|
+
//# sourceMappingURL=clementine-turn-context.js.map
|
|
@@ -81,8 +81,43 @@ export function buildChatSystemAppend(opts = {}) {
|
|
|
81
81
|
if (opts.profileAppend?.trim()) {
|
|
82
82
|
blocks.push(opts.profileAppend);
|
|
83
83
|
}
|
|
84
|
+
// 5. Behavioral posture (1.18.184) — re-anchored from the legacy
|
|
85
|
+
// assistant.ts:buildSystemPrompt path which the modern chat no
|
|
86
|
+
// longer goes through. Kept in the cacheable system-prompt append
|
|
87
|
+
// (NOT in the per-turn user-message context) so Anthropic's
|
|
88
|
+
// prompt cache holds these directives across turns. They are
|
|
89
|
+
// short, stable, and describe HOW Clementine should behave —
|
|
90
|
+
// not WHAT is true right now (that's the volatile turn-context
|
|
91
|
+
// block; see clementine-turn-context.ts).
|
|
92
|
+
blocks.push(BEHAVIORAL_POSTURE);
|
|
84
93
|
return blocks.join('\n\n');
|
|
85
94
|
}
|
|
95
|
+
/**
|
|
96
|
+
* Stable behavioral directives appended to every chat-path system prompt.
|
|
97
|
+
*
|
|
98
|
+
* Trust posture: Clementine runs on the owner's machine; she is the
|
|
99
|
+
* owner's agent, not a faceless assistant. She acts decisively because
|
|
100
|
+
* the trust relationship has already been established by virtue of the
|
|
101
|
+
* owner running her on their own hardware. Permission-prompt friction
|
|
102
|
+
* is a violation of the trust model.
|
|
103
|
+
*
|
|
104
|
+
* Recall posture (re-anchored from 1.18.181, originally at
|
|
105
|
+
* assistant.ts:1382 in the legacy buildSystemPrompt path): durable
|
|
106
|
+
* memory across all sessions / channels / bg tasks lives in SQLite with
|
|
107
|
+
* dense embeddings. The model has `memory_search` and `transcript_search`
|
|
108
|
+
* tools available. "I have no record" without first searching is a
|
|
109
|
+
* memory failure, not an honest answer.
|
|
110
|
+
*
|
|
111
|
+
* Kept short (<600 chars total) so it doesn't crowd the cacheable
|
|
112
|
+
* prefix; each directive is one sentence of guidance, not a manual.
|
|
113
|
+
*/
|
|
114
|
+
const BEHAVIORAL_POSTURE = `## How you operate
|
|
115
|
+
|
|
116
|
+
**Trust posture.** You are running on your owner's trusted local machine as their Clementine. The owner has already authenticated every integration you have access to; you act as them, with their judgment. Don't ask permission for things you have tools to do — just do them. Save permission-asking for genuinely consequential or ambiguous actions (sending the email vs drafting it, deleting vs archiving). Friction-by-default is a violation of the relationship.
|
|
117
|
+
|
|
118
|
+
**Recall posture.** Your durable memory across every session, every channel, every background task, and every workflow lives in SQLite with dense embeddings. When the owner references past work you don't have in immediate context — a URL, a deployment, a person, a project, a file you created, a job you ran — call \`memory_search\` (or \`transcript_search\` for chat-history specifics) BEFORE asking the owner to provide it and BEFORE replying that you have no record. "I don't see any record of that" without having searched is a memory failure, not an honest answer.
|
|
119
|
+
|
|
120
|
+
**Persistence posture.** When the owner gives you a multi-step job in chat, run it to completion. If you hit a real constraint (budget, cap, missing input, validation needed), say so explicitly — never trail off silently. The owner can always stop you via cancel or by typing \`stop\`; that's their lever. Yours is to keep going until the work is done.`;
|
|
86
121
|
/**
|
|
87
122
|
* Read the long-term memory block for an autonomous run (cron, team-task).
|
|
88
123
|
* Returns the agent-specific MEMORY.md when a hired agent is active, the
|
package/dist/agent/run-agent.js
CHANGED
|
@@ -97,6 +97,7 @@ export function invalidateMcpStatusEntry(name) {
|
|
|
97
97
|
import { BASE_DIR, PKG_DIR, CLAUDE_CODE_OAUTH_TOKEN, ANTHROPIC_API_KEY as CONFIG_ANTHROPIC_API_KEY, normalizeClaudeSdkOptionsForOneMillionContext, TOOL_OUTPUT_GUARD, } from '../config.js';
|
|
98
98
|
import { buildGuardHooks } from './tool-output-guard.js';
|
|
99
99
|
import { buildDedupHook } from './tool-call-dedup.js';
|
|
100
|
+
import { buildChatStopHook } from './chat-stop-hook.js';
|
|
100
101
|
import { buildAgentMap } from './agent-definitions.js';
|
|
101
102
|
import { buildExecutionToolPolicy, } from './execution-policy.js';
|
|
102
103
|
const MCP_SERVER_SCRIPT = path.join(PKG_DIR, 'dist', 'tools', 'mcp-server.js');
|
|
@@ -189,6 +190,20 @@ const CORE_TOOLS_FOR_AGENT_PARENT = [
|
|
|
189
190
|
'WebSearch',
|
|
190
191
|
'WebFetch',
|
|
191
192
|
'TodoWrite',
|
|
193
|
+
// 1.18.184 — Clementine's identity tools. Memory + capture are not
|
|
194
|
+
// optional skills; they are part of who she is. Surfacing them in
|
|
195
|
+
// the canonical SDK tools channel (NOT in the system prompt) means
|
|
196
|
+
// the model always sees them as available — no dependency on
|
|
197
|
+
// skill-match score thresholds widening the surface mid-turn.
|
|
198
|
+
// The MCP wildcard at execution-policy.ts:233 also exposes them, but
|
|
199
|
+
// when the MCP server hiccups during init the wildcard goes empty;
|
|
200
|
+
// explicit listing here guarantees the surface.
|
|
201
|
+
'mcp__clementine-tools__memory_search',
|
|
202
|
+
'mcp__clementine-tools__transcript_search',
|
|
203
|
+
'mcp__clementine-tools__memory_write',
|
|
204
|
+
'mcp__clementine-tools__note_take',
|
|
205
|
+
'mcp__clementine-tools__note_create',
|
|
206
|
+
'mcp__clementine-tools__task_add',
|
|
192
207
|
];
|
|
193
208
|
/**
|
|
194
209
|
* Run a single agent invocation via the canonical SDK pattern.
|
|
@@ -416,13 +431,47 @@ export async function runAgent(prompt, opts) {
|
|
|
416
431
|
});
|
|
417
432
|
},
|
|
418
433
|
});
|
|
419
|
-
//
|
|
434
|
+
// ── Chat persistence Stop hook (1.18.184, source='chat' only) ─────
|
|
435
|
+
// Keeps chat-initiated multi-step jobs running until they finish.
|
|
436
|
+
// Inspects the model's last assistant message for continuation
|
|
437
|
+
// signals ("next, I'll...", "step 2:", etc.) and re-prompts the
|
|
438
|
+
// model when it would otherwise stop mid-job. Honors
|
|
439
|
+
// stop_hook_active (anti-infinite-loop) and abortSignal
|
|
440
|
+
// (user-initiated stop always wins). Autonomous paths
|
|
441
|
+
// (cron / scheduled-skill / heartbeat / team-task) intentionally
|
|
442
|
+
// skip this — they have their own completion semantics and a
|
|
443
|
+
// continue-on-stop hook would fight them.
|
|
444
|
+
const isChatPath = source === 'chat';
|
|
445
|
+
const stopHook = isChatPath
|
|
446
|
+
? buildChatStopHook({
|
|
447
|
+
runId,
|
|
448
|
+
...(opts.abortSignal ? { abortSignal: opts.abortSignal } : {}),
|
|
449
|
+
onDecision: (info) => {
|
|
450
|
+
if (info.decision !== 'continue')
|
|
451
|
+
return;
|
|
452
|
+
writeEvent({
|
|
453
|
+
kind: 'hook',
|
|
454
|
+
ts: new Date().toISOString(),
|
|
455
|
+
sessionId,
|
|
456
|
+
hookEventName: 'Stop',
|
|
457
|
+
text: `clementine_stop_hook:continue last="${info.lastMessagePreview}"`,
|
|
458
|
+
});
|
|
459
|
+
},
|
|
460
|
+
})
|
|
461
|
+
: null;
|
|
462
|
+
// Merge hook maps from the modules. SDK accepts arrays of
|
|
420
463
|
// HookCallbackMatcher per event; we concatenate.
|
|
421
464
|
const mergedHooks = { ...guard.hooks };
|
|
422
465
|
for (const [evt, matchers] of Object.entries(dedup.hooks)) {
|
|
423
466
|
const existing = mergedHooks[evt] ?? [];
|
|
424
467
|
mergedHooks[evt] = [...existing, ...matchers];
|
|
425
468
|
}
|
|
469
|
+
if (stopHook) {
|
|
470
|
+
for (const [evt, matchers] of Object.entries(stopHook.hooks)) {
|
|
471
|
+
const existing = mergedHooks[evt] ?? [];
|
|
472
|
+
mergedHooks[evt] = [...existing, ...matchers];
|
|
473
|
+
}
|
|
474
|
+
}
|
|
426
475
|
// Apply 1M-context env normalization (existing infra)
|
|
427
476
|
const sdkOptionsRaw = {
|
|
428
477
|
systemPrompt: profileAppend
|
|
@@ -47,11 +47,24 @@ import { createHash } from 'node:crypto';
|
|
|
47
47
|
import pino from 'pino';
|
|
48
48
|
const logger = pino({ name: 'clementine.tool-call-dedup' });
|
|
49
49
|
// ── Tunables ──────────────────────────────────────────────────────────
|
|
50
|
-
/**
|
|
50
|
+
/** Entry lifetime — how long we remember a call to compare against. */
|
|
51
51
|
const DEFAULT_TTL_MS = 60_000;
|
|
52
|
+
/**
|
|
53
|
+
* Tight-burst window for HARD blocks (1.18.184 refinement). Hard-block
|
|
54
|
+
* fires only when ≥ HARD_BLOCK_AT identical calls happen within this
|
|
55
|
+
* window of the FIRST call. The classic refetch-after-compact failure
|
|
56
|
+
* pattern (which is what this hook exists to prevent — see the
|
|
57
|
+
* imessage-triage diagnosis comment up top) reliably completes 4
|
|
58
|
+
* identical calls in <2 minutes; the tight-burst signature is more
|
|
59
|
+
* like ~3 calls in <10 seconds. Polling-with-delay (e.g., "wait 30s
|
|
60
|
+
* and check again") legitimately produces identical calls spread out
|
|
61
|
+
* over the entry lifetime; the wider TTL still warns the model in
|
|
62
|
+
* that case but does not block. User intent for retry > our caution.
|
|
63
|
+
*/
|
|
64
|
+
const HARD_BLOCK_BURST_WINDOW_MS = 8_000;
|
|
52
65
|
/** Second identical call within TTL → soft warn (let it through with a hint). */
|
|
53
66
|
const SOFT_WARN_AT = 2;
|
|
54
|
-
/** Third+ identical call within
|
|
67
|
+
/** Third+ identical call within HARD_BLOCK_BURST_WINDOW_MS → hard block (deny). */
|
|
55
68
|
const HARD_BLOCK_AT = 3;
|
|
56
69
|
// ── Hashing ───────────────────────────────────────────────────────────
|
|
57
70
|
/**
|
|
@@ -115,24 +128,29 @@ export function buildDedupHook(opts) {
|
|
|
115
128
|
entry.count += 1;
|
|
116
129
|
entry.lastSeen = now;
|
|
117
130
|
const sinceFirstMs = now - entry.firstSeen;
|
|
118
|
-
|
|
131
|
+
// 1.18.184: hard-block ONLY on tight bursts (≤ HARD_BLOCK_BURST_WINDOW_MS
|
|
132
|
+
// from first call). This is the refetch-after-compact failure signature.
|
|
133
|
+
// Calls spread out across the wider TTL get a warning but not a block —
|
|
134
|
+
// legitimate polling ("wait 30s, retry") is preserved.
|
|
135
|
+
if (entry.count >= hardAt && sinceFirstMs <= HARD_BLOCK_BURST_WINDOW_MS) {
|
|
119
136
|
stats.blocked += 1;
|
|
120
137
|
logger.warn({
|
|
121
138
|
toolName,
|
|
122
139
|
inputHash,
|
|
123
140
|
callCount: entry.count,
|
|
124
141
|
sinceFirstMs,
|
|
142
|
+
burstWindowMs: HARD_BLOCK_BURST_WINDOW_MS,
|
|
125
143
|
runId: opts.runId,
|
|
126
|
-
}, 'tool-call-dedup: hard-blocking identical call');
|
|
144
|
+
}, 'tool-call-dedup: hard-blocking tight-burst identical call');
|
|
127
145
|
opts.onDecision?.({ toolName, inputHash, callCount: entry.count, decision: 'block', sinceFirstMs });
|
|
128
146
|
return {
|
|
129
147
|
hookSpecificOutput: {
|
|
130
148
|
hookEventName: 'PreToolUse',
|
|
131
149
|
permissionDecision: 'deny',
|
|
132
|
-
permissionDecisionReason: `Tool \`${toolName}\` was
|
|
133
|
-
`
|
|
150
|
+
permissionDecisionReason: `Tool \`${toolName}\` was just called with these exact arguments ${entry.count - 1} time(s) within the last ${Math.floor(sinceFirstMs / 1000)}s — a tight-burst refetch pattern that almost always indicates a refetch-after-compaction loop. ` +
|
|
151
|
+
`STOP re-calling — use the result from your earlier context, ` +
|
|
134
152
|
`change the arguments to fetch different data, or finish the task with what you already know. ` +
|
|
135
|
-
`If you genuinely need fresh data, wait at least ${Math.ceil(
|
|
153
|
+
`If you genuinely need fresh data (polling), wait at least ${Math.ceil(HARD_BLOCK_BURST_WINDOW_MS / 1000)}s before the next identical call.`,
|
|
136
154
|
},
|
|
137
155
|
};
|
|
138
156
|
}
|
package/dist/gateway/router.d.ts
CHANGED
|
@@ -18,6 +18,17 @@ export declare function buildContextOverflowRetryPrompt(opts: {
|
|
|
18
18
|
turnContextPrefix?: string;
|
|
19
19
|
project?: ProjectMeta | null;
|
|
20
20
|
}): string;
|
|
21
|
+
/**
|
|
22
|
+
* Map a SDK TerminalReason to a brief, honest note for the user when
|
|
23
|
+
* a chat-initiated job stopped due to a cap rather than clean
|
|
24
|
+
* completion. Returns null for reasons that don't need user
|
|
25
|
+
* messaging (clean completion, user-initiated abort, etc.).
|
|
26
|
+
*
|
|
27
|
+
* 1.18.184 — silent trail-off is the bug class we're killing. When
|
|
28
|
+
* Clementine stops mid-job because of maxTurns / budget / etc., the
|
|
29
|
+
* owner needs to know so they can choose to continue.
|
|
30
|
+
*/
|
|
31
|
+
export declare function buildCapHitNote(terminalReason: string | undefined): string | null;
|
|
21
32
|
export declare function runAgentResultIndicatesContextOverflow(result: {
|
|
22
33
|
subtype?: string;
|
|
23
34
|
terminalReason?: string;
|
package/dist/gateway/router.js
CHANGED
|
@@ -96,6 +96,45 @@ export function buildContextOverflowRetryPrompt(opts) {
|
|
|
96
96
|
parts.push(opts.chatPrompt);
|
|
97
97
|
return parts.filter(Boolean).join('\n\n');
|
|
98
98
|
}
|
|
99
|
+
/**
|
|
100
|
+
* Map a SDK TerminalReason to a brief, honest note for the user when
|
|
101
|
+
* a chat-initiated job stopped due to a cap rather than clean
|
|
102
|
+
* completion. Returns null for reasons that don't need user
|
|
103
|
+
* messaging (clean completion, user-initiated abort, etc.).
|
|
104
|
+
*
|
|
105
|
+
* 1.18.184 — silent trail-off is the bug class we're killing. When
|
|
106
|
+
* Clementine stops mid-job because of maxTurns / budget / etc., the
|
|
107
|
+
* owner needs to know so they can choose to continue.
|
|
108
|
+
*/
|
|
109
|
+
export function buildCapHitNote(terminalReason) {
|
|
110
|
+
if (!terminalReason)
|
|
111
|
+
return null;
|
|
112
|
+
switch (terminalReason) {
|
|
113
|
+
case 'max_turns':
|
|
114
|
+
return '_(Note: I hit my turn cap before finishing. Say "continue" if you want me to keep going from where I left off.)_';
|
|
115
|
+
case 'blocking_limit':
|
|
116
|
+
return '_(Note: I hit a budget cap before finishing. Say "continue" if you want me to keep going — or raise the per-chat budget in the dashboard.)_';
|
|
117
|
+
case 'rapid_refill_breaker':
|
|
118
|
+
// Context-overflow path has its own recovery flow earlier; if
|
|
119
|
+
// we're seeing this terminal reason at the success path, the
|
|
120
|
+
// retry didn't fully recover. Surface it honestly.
|
|
121
|
+
return '_(Note: my context got refilled too aggressively mid-task. Some work above may be partial. Say "continue" and I\'ll pick up with a fresh context.)_';
|
|
122
|
+
case 'prompt_too_long':
|
|
123
|
+
return '_(Note: the working context grew too large mid-task. Some work above may be partial. Say "continue" with a fresh focus and I\'ll keep going.)_';
|
|
124
|
+
case 'hook_stopped':
|
|
125
|
+
case 'stop_hook_prevented':
|
|
126
|
+
// A user-supplied stop/validation hook fired. Not a "silent
|
|
127
|
+
// trail-off" — the owner asked for the pause. Don't add noise.
|
|
128
|
+
return null;
|
|
129
|
+
case 'completed':
|
|
130
|
+
case undefined:
|
|
131
|
+
return null;
|
|
132
|
+
default:
|
|
133
|
+
// Anything else: don't second-guess the SDK; let the message
|
|
134
|
+
// text speak for itself.
|
|
135
|
+
return null;
|
|
136
|
+
}
|
|
137
|
+
}
|
|
99
138
|
export function runAgentResultIndicatesContextOverflow(result) {
|
|
100
139
|
const terminalReason = (result.terminalReason ?? '').trim();
|
|
101
140
|
if (terminalReason && classifyChatError(terminalReason) === 'context_overflow')
|
|
@@ -2118,6 +2157,8 @@ export class Gateway {
|
|
|
2118
2157
|
const { buildExtraMcpForRunAgent } = await import('../agent/run-agent-mcp.js');
|
|
2119
2158
|
const { buildChatSystemAppend } = await import('../agent/run-agent-context.js');
|
|
2120
2159
|
const { resolveSkillsForChat } = await import('../agent/chat-skill-resolver.js');
|
|
2160
|
+
const { buildClementineTurnContext } = await import('../agent/clementine-turn-context.js');
|
|
2161
|
+
const { listBackgroundTasks } = await import('../agent/background-tasks.js');
|
|
2121
2162
|
// Builder sessions (dashboard trick/skill/cron/agent builder)
|
|
2122
2163
|
// are conversational JSON-drafting flows, not real chat. They
|
|
2123
2164
|
// don't need vault context, MCP tools, recall, or auto-memory
|
|
@@ -2173,9 +2214,51 @@ export class Gateway {
|
|
|
2173
2214
|
// Per-turn context (recall + persistent learnings + silent
|
|
2174
2215
|
// blocks + security/toolset directives) — real chat only.
|
|
2175
2216
|
// Builder doesn't need recall of unrelated transcripts.
|
|
2176
|
-
|
|
2217
|
+
//
|
|
2218
|
+
// 1.18.184: the volatile turn-context block is now the
|
|
2219
|
+
// single integration point for everything dynamic about
|
|
2220
|
+
// Clementine — retrieved SQLite memory, recent bg-task
|
|
2221
|
+
// headlines, live state, and (soon) outputs from the
|
|
2222
|
+
// self-improvement subsystems. Prepended ahead of the
|
|
2223
|
+
// existing securityAnnotation envelope.
|
|
2224
|
+
// See `src/agent/clementine-turn-context.ts` for the
|
|
2225
|
+
// architecture rationale and the labeled extension points.
|
|
2226
|
+
let clementineContextBlock = '';
|
|
2227
|
+
if (!isBuilderSession) {
|
|
2228
|
+
try {
|
|
2229
|
+
const memStore = this.assistant.getMemoryStore?.() ?? null;
|
|
2230
|
+
const turnCtx = buildClementineTurnContext({
|
|
2231
|
+
userMessage: originalText,
|
|
2232
|
+
sessionKey: effectiveSessionKey,
|
|
2233
|
+
channel: effectiveSessionKey.split(':')[0] ?? 'chat',
|
|
2234
|
+
ownerName: resolvedProfile?.name ?? null,
|
|
2235
|
+
profileName: resolvedProfile && resolvedProfile.slug !== 'clementine'
|
|
2236
|
+
? (resolvedProfile.name ?? resolvedProfile.slug)
|
|
2237
|
+
: null,
|
|
2238
|
+
memoryStore: memStore,
|
|
2239
|
+
listBackgroundTasks,
|
|
2240
|
+
});
|
|
2241
|
+
clementineContextBlock = turnCtx.block;
|
|
2242
|
+
logger.debug({
|
|
2243
|
+
sessionKey: effectiveSessionKey,
|
|
2244
|
+
turnContextChars: turnCtx.totalChars,
|
|
2245
|
+
sections: turnCtx.sections,
|
|
2246
|
+
}, 'Built Clementine turn-context block');
|
|
2247
|
+
}
|
|
2248
|
+
catch (err) {
|
|
2249
|
+
// Never block chat on context-builder failure — log and skip.
|
|
2250
|
+
logger.warn({ err, sessionKey: effectiveSessionKey }, 'Clementine turn-context builder failed (non-fatal)');
|
|
2251
|
+
}
|
|
2252
|
+
}
|
|
2253
|
+
const securityContextPrefix = !isBuilderSession && securityAnnotation.trim()
|
|
2177
2254
|
? `[Context — read this for continuity, then respond to the user message below]\n${securityAnnotation}\n[/Context]\n\n`
|
|
2178
2255
|
: '';
|
|
2256
|
+
// Order: Clementine context first (durable memory + live
|
|
2257
|
+
// state), then security annotation (per-turn signal), then
|
|
2258
|
+
// the user's actual chat prompt. The model sees memory and
|
|
2259
|
+
// identity framing BEFORE per-turn warnings, which matches
|
|
2260
|
+
// how a human assistant would orient themselves.
|
|
2261
|
+
const turnContextPrefix = clementineContextBlock + securityContextPrefix;
|
|
2179
2262
|
const finalPrompt = turnContextPrefix + chatPrompt;
|
|
2180
2263
|
// Resume the prior SDK session when one exists for this
|
|
2181
2264
|
// sessionKey. The SDK persists session JSONLs to disk, so
|
|
@@ -2215,7 +2298,28 @@ export class Gateway {
|
|
|
2215
2298
|
profile: resolvedProfile,
|
|
2216
2299
|
agentManager: this.getAgentManager(),
|
|
2217
2300
|
memoryStore: this.assistant.getMemoryStore?.() ?? null,
|
|
2301
|
+
// 1.18.184 — Chat runs on a trusted local machine for the
|
|
2302
|
+
// owner. The canonical SDK posture for that case is
|
|
2303
|
+
// `bypassPermissions` (requires allowDangerouslySkipPermissions,
|
|
2304
|
+
// which execution-policy.ts:266 wires automatically when this
|
|
2305
|
+
// mode is selected). Builder sessions still inherit the
|
|
2306
|
+
// default 'dontAsk' since they have no tools and run on
|
|
2307
|
+
// Haiku — bypass would be a no-op there anyway. Autonomous
|
|
2308
|
+
// paths (cron, scheduled-skill, heartbeat) intentionally
|
|
2309
|
+
// stay on 'dontAsk' so they remain strict-allowlist for
|
|
2310
|
+
// safety; only the owner's direct chat gets full bypass.
|
|
2311
|
+
...(isBuilderSession ? {} : { permissionMode: 'bypassPermissions' }),
|
|
2218
2312
|
...(builderModel ? { model: builderModel } : {}),
|
|
2313
|
+
// 1.18.184 — right-size maxTurns for chat-initiated work.
|
|
2314
|
+
// Chat jobs are often multi-step ("draft 3 emails and send
|
|
2315
|
+
// them") and the SDK's default (low single digits) was
|
|
2316
|
+
// forcing premature trail-off. We give chat a generous
|
|
2317
|
+
// 60-turn ceiling; the real cost stopper is `BUDGET.chat`
|
|
2318
|
+
// (default $5.00 / invocation, see config/effective-config.ts).
|
|
2319
|
+
// Caller-supplied maxTurns still wins. Builder sessions
|
|
2320
|
+
// skip this — they're tight Haiku JSON drafting and don't
|
|
2321
|
+
// need the runway.
|
|
2322
|
+
...((!isBuilderSession && !maxTurns) ? { maxTurns: 60 } : {}),
|
|
2219
2323
|
...(maxTurns ? { maxTurns } : {}),
|
|
2220
2324
|
...(chatBudget !== undefined ? { maxBudgetUsd: chatBudget } : {}),
|
|
2221
2325
|
...(builderAllowedTools ? { allowedTools: builderAllowedTools } : {}),
|
|
@@ -2350,8 +2454,16 @@ export class Gateway {
|
|
|
2350
2454
|
numTurns: runAgentResult.numTurns,
|
|
2351
2455
|
cost: Number(runAgentResult.totalCostUsd.toFixed(4)),
|
|
2352
2456
|
responseLen: runAgentResult.text.length,
|
|
2457
|
+
terminalReason: runAgentResult.terminalReason,
|
|
2353
2458
|
}, 'chat:latency');
|
|
2354
|
-
|
|
2459
|
+
// 1.18.184 — Honest cap-hit messaging. If the run stopped
|
|
2460
|
+
// because of a cap (not a clean completion or a user abort),
|
|
2461
|
+
// append a brief, factual note so the owner knows where the
|
|
2462
|
+
// job actually stopped. Silent trail-off is the bug class
|
|
2463
|
+
// we're killing.
|
|
2464
|
+
const baseText = runAgentResult.text || '*(no response)*';
|
|
2465
|
+
const capNote = buildCapHitNote(runAgentResult.terminalReason);
|
|
2466
|
+
return capNote ? `${baseText}\n\n${capNote}` : baseText;
|
|
2355
2467
|
}
|
|
2356
2468
|
catch (err) {
|
|
2357
2469
|
clearTimeout(chatTimer);
|