@blockrun/franklin 3.7.9 → 3.7.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/agent/llm.d.ts +25 -0
- package/dist/agent/llm.js +25 -11
- package/package.json +1 -1
package/dist/agent/llm.d.ts
CHANGED
|
@@ -27,6 +27,31 @@ export interface LLMClientOptions {
|
|
|
27
27
|
chain: Chain;
|
|
28
28
|
debug?: boolean;
|
|
29
29
|
}
|
|
30
|
+
/**
|
|
31
|
+
* Apply Anthropic prompt caching using the `system_and_3` strategy.
|
|
32
|
+
* Pattern from nousresearch/hermes-agent `agent/prompt_caching.py`.
|
|
33
|
+
*
|
|
34
|
+
* Places 4 cache_control breakpoints (Anthropic's max):
|
|
35
|
+
* 1. System prompt (stable across all turns)
|
|
36
|
+
* 2-4. Last 3 non-system messages (rolling window)
|
|
37
|
+
*
|
|
38
|
+
* Also caches the last tool definition (tools are stable across turns).
|
|
39
|
+
*
|
|
40
|
+
* This keeps the cache warm: each new turn extends the cached prefix rather
|
|
41
|
+
* than invalidating it. Multi-turn conversations see ~75% input token savings
|
|
42
|
+
* on Anthropic models.
|
|
43
|
+
*/
|
|
44
|
+
/**
|
|
45
|
+
* True if the given Anthropic model accepts the `thinking: { type: 'enabled' }`
|
|
46
|
+
* API flag (so-called *extended thinking*). Models using *adaptive thinking*
|
|
47
|
+
* (Opus 4.7 and later) reject that flag — the behavior is built in and not
|
|
48
|
+
* opt-in via API. Keeping the allowlist explicit, not derived from a regex,
|
|
49
|
+
* so a future model that happens to include "opus" in its name doesn't
|
|
50
|
+
* silently re-enable extended thinking on a model that can't handle it.
|
|
51
|
+
*
|
|
52
|
+
* Exported so tests can pin this decision without a live API.
|
|
53
|
+
*/
|
|
54
|
+
export declare function modelHasExtendedThinking(model: string): boolean;
|
|
30
55
|
export declare class ModelClient {
|
|
31
56
|
private apiUrl;
|
|
32
57
|
private chain;
|
package/dist/agent/llm.js
CHANGED
|
@@ -20,6 +20,26 @@ import { USER_AGENT } from '../config.js';
|
|
|
20
20
|
* than invalidating it. Multi-turn conversations see ~75% input token savings
|
|
21
21
|
* on Anthropic models.
|
|
22
22
|
*/
|
|
23
|
+
/**
|
|
24
|
+
* True if the given Anthropic model accepts the `thinking: { type: 'enabled' }`
|
|
25
|
+
* API flag (so-called *extended thinking*). Models using *adaptive thinking*
|
|
26
|
+
* (Opus 4.7 and later) reject that flag — the behavior is built in and not
|
|
27
|
+
* opt-in via API. Keeping the allowlist explicit, not derived from a regex,
|
|
28
|
+
* so a future model that happens to include "opus" in its name doesn't
|
|
29
|
+
* silently re-enable extended thinking on a model that can't handle it.
|
|
30
|
+
*
|
|
31
|
+
* Exported so tests can pin this decision without a live API.
|
|
32
|
+
*/
|
|
33
|
+
export function modelHasExtendedThinking(model) {
|
|
34
|
+
const m = model.toLowerCase();
|
|
35
|
+
if (m.includes('opus-4.7') || m.includes('opus-4-7'))
|
|
36
|
+
return false;
|
|
37
|
+
return (m.includes('opus-4.6') || m.includes('opus-4-6') ||
|
|
38
|
+
m.includes('opus-4.5') || m.includes('opus-4-5') ||
|
|
39
|
+
m.includes('opus-4.1') || m.includes('opus-4-1') ||
|
|
40
|
+
m.includes('sonnet-4') ||
|
|
41
|
+
m.includes('sonnet-3.7'));
|
|
42
|
+
}
|
|
23
43
|
function applyAnthropicPromptCaching(payload, request) {
|
|
24
44
|
const out = { ...payload };
|
|
25
45
|
const cacheMarker = { type: 'ephemeral' };
|
|
@@ -140,17 +160,11 @@ export class ModelClient {
|
|
|
140
160
|
if (isAnthropic) {
|
|
141
161
|
// ─ Anthropic extended thinking ──────────────────────────────────────
|
|
142
162
|
// Enable thinking for Claude models that support it (Opus 4.6, Sonnet 4.6).
|
|
143
|
-
//
|
|
144
|
-
//
|
|
145
|
-
//
|
|
146
|
-
//
|
|
147
|
-
|
|
148
|
-
// waste tokens on simple tasks. Set to 80% of max_tokens to leave room
|
|
149
|
-
// for the actual response.
|
|
150
|
-
const supportsThinking = request.model.includes('opus') ||
|
|
151
|
-
request.model.includes('sonnet-4') ||
|
|
152
|
-
request.model.includes('sonnet-3.7');
|
|
153
|
-
if (supportsThinking) {
|
|
163
|
+
// Enable the `thinking` API block only for models that accept it.
|
|
164
|
+
// Claude Opus 4.7 and newer use *adaptive* thinking (built-in, no API
|
|
165
|
+
// flag); passing the extended-thinking flag to them makes Anthropic
|
|
166
|
+
// reject the request. See `modelHasExtendedThinking` for the allowlist.
|
|
167
|
+
if (modelHasExtendedThinking(request.model)) {
|
|
154
168
|
const maxOut = (request.max_tokens ?? 16_384);
|
|
155
169
|
requestPayload['thinking'] = {
|
|
156
170
|
type: 'enabled',
|
package/package.json
CHANGED