@blockrun/franklin 3.7.9 → 3.7.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -27,6 +27,31 @@ export interface LLMClientOptions {
27
27
  chain: Chain;
28
28
  debug?: boolean;
29
29
  }
30
+ /**
31
+ * Apply Anthropic prompt caching using the `system_and_3` strategy.
32
+ * Pattern from nousresearch/hermes-agent `agent/prompt_caching.py`.
33
+ *
34
+ * Places 4 cache_control breakpoints (Anthropic's max):
35
+ * 1. System prompt (stable across all turns)
36
+ * 2-4. Last 3 non-system messages (rolling window)
37
+ *
38
+ * Also caches the last tool definition (tools are stable across turns).
39
+ *
40
+ * This keeps the cache warm: each new turn extends the cached prefix rather
41
+ * than invalidating it. Multi-turn conversations see ~75% input token savings
42
+ * on Anthropic models.
43
+ */
44
+ /**
45
+ * True if the given Anthropic model accepts the `thinking: { type: 'enabled' }`
46
+ * API flag (so-called *extended thinking*). Models using *adaptive thinking*
47
+ * (Opus 4.7 and later) reject that flag — the behavior is built in and not
48
+ * opt-in via API. Keeping the allowlist explicit, not derived from a regex,
49
+ * so a future model that happens to include "opus" in its name doesn't
50
+ * silently re-enable extended thinking on a model that can't handle it.
51
+ *
52
+ * Exported so tests can pin this decision without a live API.
53
+ */
54
+ export declare function modelHasExtendedThinking(model: string): boolean;
30
55
  export declare class ModelClient {
31
56
  private apiUrl;
32
57
  private chain;
package/dist/agent/llm.js CHANGED
@@ -20,6 +20,26 @@ import { USER_AGENT } from '../config.js';
20
20
  * than invalidating it. Multi-turn conversations see ~75% input token savings
21
21
  * on Anthropic models.
22
22
  */
23
+ /**
24
+ * True if the given Anthropic model accepts the `thinking: { type: 'enabled' }`
25
+ * API flag (so-called *extended thinking*). Models using *adaptive thinking*
26
+ * (Opus 4.7 and later) reject that flag — the behavior is built in and not
27
+ * opt-in via API. Keeping the allowlist explicit, not derived from a regex,
28
+ * so a future model that happens to include "opus" in its name doesn't
29
+ * silently re-enable extended thinking on a model that can't handle it.
30
+ *
31
+ * Exported so tests can pin this decision without a live API.
32
+ */
33
+ export function modelHasExtendedThinking(model) {
34
+ const m = model.toLowerCase();
35
+ if (m.includes('opus-4.7') || m.includes('opus-4-7'))
36
+ return false;
37
+ return (m.includes('opus-4.6') || m.includes('opus-4-6') ||
38
+ m.includes('opus-4.5') || m.includes('opus-4-5') ||
39
+ m.includes('opus-4.1') || m.includes('opus-4-1') ||
40
+ m.includes('sonnet-4') ||
41
+ m.includes('sonnet-3.7'));
42
+ }
23
43
  function applyAnthropicPromptCaching(payload, request) {
24
44
  const out = { ...payload };
25
45
  const cacheMarker = { type: 'ephemeral' };
@@ -140,17 +160,11 @@ export class ModelClient {
140
160
  if (isAnthropic) {
141
161
  // ─ Anthropic extended thinking ──────────────────────────────────────
142
162
  // Enable thinking for Claude models that support it (Opus 4.6, Sonnet 4.6).
143
- // This is the single biggest quality lever Claude with thinking enabled
144
- // is dramatically better at complex multi-step tasks, reasoning, and code.
145
- //
146
- // Uses adaptive thinking: the model decides how much to think per request.
147
- // budget_tokens is the MAX it can use (not a minimum), so the model won't
148
- // waste tokens on simple tasks. Set to 80% of max_tokens to leave room
149
- // for the actual response.
150
- const supportsThinking = request.model.includes('opus') ||
151
- request.model.includes('sonnet-4') ||
152
- request.model.includes('sonnet-3.7');
153
- if (supportsThinking) {
163
+ // Enable the `thinking` API block only for models that accept it.
164
+ // Claude Opus 4.7 and newer use *adaptive* thinking (built-in, no API
165
+ // flag); passing the extended-thinking flag to them makes Anthropic
166
+ // reject the request. See `modelHasExtendedThinking` for the allowlist.
167
+ if (modelHasExtendedThinking(request.model)) {
154
168
  const maxOut = (request.max_tokens ?? 16_384);
155
169
  requestPayload['thinking'] = {
156
170
  type: 'enabled',
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@blockrun/franklin",
3
- "version": "3.7.9",
3
+ "version": "3.7.10",
4
4
  "description": "Franklin — The AI agent with a wallet. Spends USDC autonomously to get real work done. Pay per action, no subscriptions.",
5
5
  "type": "module",
6
6
  "exports": {