npm - @blockrun/franklin - Versions diffs - 3.9.5 → 3.9.6 - Mend

@blockrun/franklin 3.9.5 → 3.9.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (4) hide show

package/dist/agent/context.js CHANGED Viewed

@@ -174,6 +174,7 @@ function getToolPatternsSection() {
 - **Research**: WebSearch for discovery → WebFetch for specific URLs from search results. Don't WebFetch URLs you invented.
 - **Complex tasks**: Use Agent to spawn sub-agents for 2+ independent research or implementation tasks. Don't do sequentially what can be done in parallel.
 - **Multiple independent lookups**: Call all tools in a single response. NEVER make sequential calls when parallel calls would work.
+- **Long-running iteration (>20 items)**: Do NOT loop in the agent (one tool call per item burns turns and trips timeouts on the 21st item). Instead: Write a script (Node/Bash/Python), have it iterate with a checkpoint file (\`./.franklin/<task>.checkpoint.json\` storing cursor + processedCount), then Bash it once. The agent re-engages only on errors or completion. Pattern fits paginated APIs, batch enrichment, large CSV emit, anything where the loop body is deterministic. The agent's job is to design and orchestrate, not to be the for-loop.
 # Grounding Before Answering
 Your training data is frozen in the past. Live-world questions MUST be answered from tool results, not memory.

package/dist/agent/llm.js CHANGED Viewed

@@ -14,9 +14,18 @@ function parseTimeoutEnv(name) {
     return Number.isFinite(parsed) && parsed >= 0 ? parsed : null;
 }
 function getModelRequestTimeoutMs() {
+    // 180s budget for *time-to-headers* (the gateway flushes SSE headers only
+    // once the upstream model emits its first token). Reasoning-class models
+    // (zai/glm-*, nemotron *-reasoning, deepseek-r*, gpt-5-codex, anthropic
+    // extended-thinking) routinely take 60–120s to first token on cache-cold
+    // prompts or when the gateway is under load — the old 45s default cut
+    // those off and wasted USDC on retries that hit the same wall. 180s is
+    // generous enough for any realistic first-token latency, still bounded
+    // enough that genuinely dead requests surface within ~6 min after the
+    // single timeout retry.
     return (parseTimeoutEnv('FRANKLIN_MODEL_REQUEST_TIMEOUT_MS') ??
         parseTimeoutEnv('FRANKLIN_MODEL_IDLE_TIMEOUT_MS') ??
-        45_000);
+        180_000);
 }
 function getModelStreamIdleTimeoutMs() {
     return (parseTimeoutEnv('FRANKLIN_MODEL_STREAM_IDLE_TIMEOUT_MS') ??

package/dist/proxy/server.js CHANGED Viewed

@@ -41,7 +41,13 @@ function log(...args) {
     catch { /* ignore */ }
 }
 const DEFAULT_MAX_TOKENS = 4096;
-const DEFAULT_PROXY_REQUEST_TIMEOUT_MS = 45_000;
+// 180s budget for *time-to-headers* — reasoning-class models (zai/glm-*,
+// nemotron *-reasoning, deepseek-r*, gpt-5-codex, anthropic extended-thinking)
+// routinely take 60–120s to first token on cache-cold prompts or busy
+// gateways. The old 45s default cut those off and the proxy returned a
+// failed response that downstream agents (Cline, Claude Desktop, etc.) had
+// to retry blindly.
+const DEFAULT_PROXY_REQUEST_TIMEOUT_MS = 180_000;
 const DEFAULT_PROXY_STREAM_TIMEOUT_MS = 5 * 60 * 1000;
 function parseTimeoutEnv(name, fallback) {
     const raw = process.env[name];

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@blockrun/franklin",
-  "version": "3.9.5",
+  "version": "3.9.6",
   "description": "Franklin — The AI agent with a wallet. Spends USDC autonomously to get real work done. Pay per action, no subscriptions.",
   "type": "module",
   "exports": {