npm - @blockrun/franklin - Versions diffs - 3.7.9 → 3.7.10 - Mend

@blockrun/franklin 3.7.9 → 3.7.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (3) hide show

package/dist/agent/llm.d.ts CHANGED Viewed

@@ -27,6 +27,31 @@ export interface LLMClientOptions {
     chain: Chain;
     debug?: boolean;
 }
+/**
+ * Apply Anthropic prompt caching using the `system_and_3` strategy.
+ * Pattern from nousresearch/hermes-agent `agent/prompt_caching.py`.
+ *
+ * Places 4 cache_control breakpoints (Anthropic's max):
+ *   1. System prompt (stable across all turns)
+ *   2-4. Last 3 non-system messages (rolling window)
+ *
+ * Also caches the last tool definition (tools are stable across turns).
+ *
+ * This keeps the cache warm: each new turn extends the cached prefix rather
+ * than invalidating it. Multi-turn conversations see ~75% input token savings
+ * on Anthropic models.
+ */
+/**
+ * True if the given Anthropic model accepts the `thinking: { type: 'enabled' }`
+ * API flag (so-called *extended thinking*). Models using *adaptive thinking*
+ * (Opus 4.7 and later) reject that flag — the behavior is built in and not
+ * opt-in via API. Keeping the allowlist explicit, not derived from a regex,
+ * so a future model that happens to include "opus" in its name doesn't
+ * silently re-enable extended thinking on a model that can't handle it.
+ *
+ * Exported so tests can pin this decision without a live API.
+ */
+export declare function modelHasExtendedThinking(model: string): boolean;
 export declare class ModelClient {
     private apiUrl;
     private chain;

package/dist/agent/llm.js CHANGED Viewed

@@ -20,6 +20,26 @@ import { USER_AGENT } from '../config.js';
  * than invalidating it. Multi-turn conversations see ~75% input token savings
  * on Anthropic models.
  */
+/**
+ * True if the given Anthropic model accepts the `thinking: { type: 'enabled' }`
+ * API flag (so-called *extended thinking*). Models using *adaptive thinking*
+ * (Opus 4.7 and later) reject that flag — the behavior is built in and not
+ * opt-in via API. Keeping the allowlist explicit, not derived from a regex,
+ * so a future model that happens to include "opus" in its name doesn't
+ * silently re-enable extended thinking on a model that can't handle it.
+ *
+ * Exported so tests can pin this decision without a live API.
+ */
+export function modelHasExtendedThinking(model) {
+    const m = model.toLowerCase();
+    if (m.includes('opus-4.7') || m.includes('opus-4-7'))
+        return false;
+    return (m.includes('opus-4.6') || m.includes('opus-4-6') ||
+        m.includes('opus-4.5') || m.includes('opus-4-5') ||
+        m.includes('opus-4.1') || m.includes('opus-4-1') ||
+        m.includes('sonnet-4') ||
+        m.includes('sonnet-3.7'));
+}
 function applyAnthropicPromptCaching(payload, request) {
     const out = { ...payload };
     const cacheMarker = { type: 'ephemeral' };
@@ -140,17 +160,11 @@ export class ModelClient {
         if (isAnthropic) {
             // ─ Anthropic extended thinking ──────────────────────────────────────
             // Enable thinking for Claude models that support it (Opus 4.6, Sonnet 4.6).
-            // This is the single biggest quality lever — Claude with thinking enabled
-            // is dramatically better at complex multi-step tasks, reasoning, and code.
-            //
-            // Uses adaptive thinking: the model decides how much to think per request.
-            // budget_tokens is the MAX it can use (not a minimum), so the model won't
-            // waste tokens on simple tasks. Set to 80% of max_tokens to leave room
-            // for the actual response.
-            const supportsThinking = request.model.includes('opus') ||
-                request.model.includes('sonnet-4') ||
-                request.model.includes('sonnet-3.7');
-            if (supportsThinking) {
+            // Enable the `thinking` API block only for models that accept it.
+            // Claude Opus 4.7 and newer use *adaptive* thinking (built-in, no API
+            // flag); passing the extended-thinking flag to them makes Anthropic
+            // reject the request. See `modelHasExtendedThinking` for the allowlist.
+            if (modelHasExtendedThinking(request.model)) {
                 const maxOut = (request.max_tokens ?? 16_384);
                 requestPayload['thinking'] = {
                     type: 'enabled',

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@blockrun/franklin",
-  "version": "3.7.9",
+  "version": "3.7.10",
   "description": "Franklin — The AI agent with a wallet. Spends USDC autonomously to get real work done. Pay per action, no subscriptions.",
   "type": "module",
   "exports": {