@blockrun/franklin 3.9.5 → 3.9.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -174,6 +174,7 @@ function getToolPatternsSection() {
174
174
  - **Research**: WebSearch for discovery → WebFetch for specific URLs from search results. Don't WebFetch URLs you invented.
175
175
  - **Complex tasks**: Use Agent to spawn sub-agents for 2+ independent research or implementation tasks. Don't do sequentially what can be done in parallel.
176
176
  - **Multiple independent lookups**: Call all tools in a single response. NEVER make sequential calls when parallel calls would work.
177
+ - **Long-running iteration (>20 items)**: Do NOT loop in the agent (one tool call per item burns turns and trips timeouts on the 21st item). Instead: Write a script (Node/Bash/Python), have it iterate with a checkpoint file (\`./.franklin/<task>.checkpoint.json\` storing cursor + processedCount), then Bash it once. The agent re-engages only on errors or completion. Pattern fits paginated APIs, batch enrichment, large CSV emit, anything where the loop body is deterministic. The agent's job is to design and orchestrate, not to be the for-loop.
177
178
 
178
179
  # Grounding Before Answering
179
180
  Your training data is frozen in the past. Live-world questions MUST be answered from tool results, not memory.
package/dist/agent/llm.js CHANGED
@@ -14,9 +14,18 @@ function parseTimeoutEnv(name) {
14
14
  return Number.isFinite(parsed) && parsed >= 0 ? parsed : null;
15
15
  }
16
16
  function getModelRequestTimeoutMs() {
17
+ // 180s budget for *time-to-headers* (the gateway flushes SSE headers only
18
+ // once the upstream model emits its first token). Reasoning-class models
19
+ // (zai/glm-*, nemotron *-reasoning, deepseek-r*, gpt-5-codex, anthropic
20
+ // extended-thinking) routinely take 60–120s to first token on cache-cold
21
+ // prompts or when the gateway is under load — the old 45s default cut
22
+ // those off and wasted USDC on retries that hit the same wall. 180s is
23
+ // generous enough for any realistic first-token latency, still bounded
24
+ // enough that genuinely dead requests surface within ~6 min after the
25
+ // single timeout retry.
17
26
  return (parseTimeoutEnv('FRANKLIN_MODEL_REQUEST_TIMEOUT_MS') ??
18
27
  parseTimeoutEnv('FRANKLIN_MODEL_IDLE_TIMEOUT_MS') ??
19
- 45_000);
28
+ 180_000);
20
29
  }
21
30
  function getModelStreamIdleTimeoutMs() {
22
31
  return (parseTimeoutEnv('FRANKLIN_MODEL_STREAM_IDLE_TIMEOUT_MS') ??
@@ -41,7 +41,13 @@ function log(...args) {
41
41
  catch { /* ignore */ }
42
42
  }
43
43
  const DEFAULT_MAX_TOKENS = 4096;
44
- const DEFAULT_PROXY_REQUEST_TIMEOUT_MS = 45_000;
44
+ // 180s budget for *time-to-headers* — reasoning-class models (zai/glm-*,
45
+ // nemotron *-reasoning, deepseek-r*, gpt-5-codex, anthropic extended-thinking)
46
+ // routinely take 60–120s to first token on cache-cold prompts or busy
47
+ // gateways. The old 45s default cut those off and the proxy returned a
48
+ // failed response that downstream agents (Cline, Claude Desktop, etc.) had
49
+ // to retry blindly.
50
+ const DEFAULT_PROXY_REQUEST_TIMEOUT_MS = 180_000;
45
51
  const DEFAULT_PROXY_STREAM_TIMEOUT_MS = 5 * 60 * 1000;
46
52
  function parseTimeoutEnv(name, fallback) {
47
53
  const raw = process.env[name];
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@blockrun/franklin",
3
- "version": "3.9.5",
3
+ "version": "3.9.6",
4
4
  "description": "Franklin — The AI agent with a wallet. Spends USDC autonomously to get real work done. Pay per action, no subscriptions.",
5
5
  "type": "module",
6
6
  "exports": {