@blockrun/franklin 3.9.5 → 3.9.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/agent/context.js +1 -0
- package/dist/agent/llm.js +10 -1
- package/dist/proxy/server.js +7 -1
- package/package.json +1 -1
package/dist/agent/context.js
CHANGED
|
@@ -174,6 +174,7 @@ function getToolPatternsSection() {
|
|
|
174
174
|
- **Research**: WebSearch for discovery → WebFetch for specific URLs from search results. Don't WebFetch URLs you invented.
|
|
175
175
|
- **Complex tasks**: Use Agent to spawn sub-agents for 2+ independent research or implementation tasks. Don't do sequentially what can be done in parallel.
|
|
176
176
|
- **Multiple independent lookups**: Call all tools in a single response. NEVER make sequential calls when parallel calls would work.
|
|
177
|
+
- **Long-running iteration (>20 items)**: Do NOT loop in the agent (one tool call per item burns turns and trips timeouts on the 21st item). Instead: Write a script (Node/Bash/Python), have it iterate with a checkpoint file (\`./.franklin/<task>.checkpoint.json\` storing cursor + processedCount), then Bash it once. The agent re-engages only on errors or completion. Pattern fits paginated APIs, batch enrichment, large CSV emit, anything where the loop body is deterministic. The agent's job is to design and orchestrate, not to be the for-loop.
|
|
177
178
|
|
|
178
179
|
# Grounding Before Answering
|
|
179
180
|
Your training data is frozen in the past. Live-world questions MUST be answered from tool results, not memory.
|
package/dist/agent/llm.js
CHANGED
|
@@ -14,9 +14,18 @@ function parseTimeoutEnv(name) {
|
|
|
14
14
|
return Number.isFinite(parsed) && parsed >= 0 ? parsed : null;
|
|
15
15
|
}
|
|
16
16
|
function getModelRequestTimeoutMs() {
|
|
17
|
+
// 180s budget for *time-to-headers* (the gateway flushes SSE headers only
|
|
18
|
+
// once the upstream model emits its first token). Reasoning-class models
|
|
19
|
+
// (zai/glm-*, nemotron *-reasoning, deepseek-r*, gpt-5-codex, anthropic
|
|
20
|
+
// extended-thinking) routinely take 60–120s to first token on cache-cold
|
|
21
|
+
// prompts or when the gateway is under load — the old 45s default cut
|
|
22
|
+
// those off and wasted USDC on retries that hit the same wall. 180s is
|
|
23
|
+
// generous enough for any realistic first-token latency, still bounded
|
|
24
|
+
// enough that genuinely dead requests surface within ~6 min after the
|
|
25
|
+
// single timeout retry.
|
|
17
26
|
return (parseTimeoutEnv('FRANKLIN_MODEL_REQUEST_TIMEOUT_MS') ??
|
|
18
27
|
parseTimeoutEnv('FRANKLIN_MODEL_IDLE_TIMEOUT_MS') ??
|
|
19
|
-
|
|
28
|
+
180_000);
|
|
20
29
|
}
|
|
21
30
|
function getModelStreamIdleTimeoutMs() {
|
|
22
31
|
return (parseTimeoutEnv('FRANKLIN_MODEL_STREAM_IDLE_TIMEOUT_MS') ??
|
package/dist/proxy/server.js
CHANGED
|
@@ -41,7 +41,13 @@ function log(...args) {
|
|
|
41
41
|
catch { /* ignore */ }
|
|
42
42
|
}
|
|
43
43
|
const DEFAULT_MAX_TOKENS = 4096;
|
|
44
|
-
|
|
44
|
+
// 180s budget for *time-to-headers* — reasoning-class models (zai/glm-*,
|
|
45
|
+
// nemotron *-reasoning, deepseek-r*, gpt-5-codex, anthropic extended-thinking)
|
|
46
|
+
// routinely take 60–120s to first token on cache-cold prompts or busy
|
|
47
|
+
// gateways. The old 45s default cut those off and the proxy returned a
|
|
48
|
+
// failed response that downstream agents (Cline, Claude Desktop, etc.) had
|
|
49
|
+
// to retry blindly.
|
|
50
|
+
const DEFAULT_PROXY_REQUEST_TIMEOUT_MS = 180_000;
|
|
45
51
|
const DEFAULT_PROXY_STREAM_TIMEOUT_MS = 5 * 60 * 1000;
|
|
46
52
|
function parseTimeoutEnv(name, fallback) {
|
|
47
53
|
const raw = process.env[name];
|
package/package.json
CHANGED