@blockrun/franklin 3.7.10 → 3.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. package/dist/agent/bash-guard.js +8 -2
  2. package/dist/agent/compact.d.ts +14 -0
  3. package/dist/agent/compact.js +57 -1
  4. package/dist/agent/context.js +6 -4
  5. package/dist/agent/llm.js +2 -1
  6. package/dist/agent/loop.js +88 -18
  7. package/dist/agent/optimize.js +4 -0
  8. package/dist/agent/tokens.d.ts +7 -3
  9. package/dist/agent/tokens.js +14 -7
  10. package/dist/agent/tool-guard.js +64 -26
  11. package/dist/content/image-pricing.d.ts +14 -0
  12. package/dist/content/image-pricing.js +32 -0
  13. package/dist/content/library.d.ts +63 -0
  14. package/dist/content/library.js +75 -0
  15. package/dist/content/record-image.d.ts +43 -0
  16. package/dist/content/record-image.js +50 -0
  17. package/dist/content/store.d.ts +15 -0
  18. package/dist/content/store.js +55 -0
  19. package/dist/pricing.d.ts +1 -1
  20. package/dist/pricing.js +2 -2
  21. package/dist/router/index.js +17 -6
  22. package/dist/tools/bash.d.ts +8 -0
  23. package/dist/tools/bash.js +13 -0
  24. package/dist/tools/content-execute.d.ts +26 -0
  25. package/dist/tools/content-execute.js +212 -0
  26. package/dist/tools/imagegen.d.ts +14 -0
  27. package/dist/tools/imagegen.js +164 -101
  28. package/dist/tools/index.d.ts +6 -0
  29. package/dist/tools/index.js +91 -5
  30. package/dist/tools/read.d.ts +13 -0
  31. package/dist/tools/read.js +17 -0
  32. package/dist/tools/trading-execute.d.ts +35 -0
  33. package/dist/tools/trading-execute.js +297 -0
  34. package/dist/tools/webfetch.d.ts +6 -0
  35. package/dist/tools/webfetch.js +8 -0
  36. package/dist/trading/engine.d.ts +51 -0
  37. package/dist/trading/engine.js +75 -0
  38. package/dist/trading/live-exchange.d.ts +43 -0
  39. package/dist/trading/live-exchange.js +48 -0
  40. package/dist/trading/mock-exchange.d.ts +40 -0
  41. package/dist/trading/mock-exchange.js +41 -0
  42. package/dist/trading/portfolio.d.ts +67 -0
  43. package/dist/trading/portfolio.js +106 -0
  44. package/dist/trading/risk.d.ts +34 -0
  45. package/dist/trading/risk.js +64 -0
  46. package/dist/trading/store.d.ts +9 -0
  47. package/dist/trading/store.js +32 -0
  48. package/dist/trading/trade-log.d.ts +39 -0
  49. package/dist/trading/trade-log.js +81 -0
  50. package/package.json +1 -1
@@ -30,8 +30,14 @@ const DANGEROUS_PATTERNS = [
30
30
  [/\bTRUNCATE\s+TABLE\b/i, 'truncate table'],
31
31
  // System-level danger
32
32
  [/\bchmod\s+(-R\s+)?777\b/, 'world-writable permissions'],
33
- [/\bcurl\s+.*\|\s*(sudo\s+)?(ba)?sh\b/, 'pipe URL to shell'],
34
- [/\bwget\s+.*\|\s*(sudo\s+)?(ba)?sh\b/, 'pipe URL to shell'],
33
+ // Pipe-to-shell: catch sudo/env prefixes and common shell variants (bash/sh/zsh/ksh/dash/fish).
34
+ // The optional `-e`/`-x` flags after the shell binary are intentionally allowed by \b;
35
+ // what we block is the routing of downloaded content into an interpreter.
36
+ [/\bcurl\s+.*\|\s*(sudo\s+)?(env\s+\S*\s*)?(ba|z|k|da|fi)?sh\b/, 'pipe URL to shell'],
37
+ [/\bwget\s+.*\|\s*(sudo\s+)?(env\s+\S*\s*)?(ba|z|k|da|fi)?sh\b/, 'pipe URL to shell'],
38
+ // Command substitution of a downloader into argv — `$(curl …)` or `` `curl …` ``.
39
+ [/\$\(\s*(curl|wget|fetch)\b/, 'command substitution of network downloader'],
40
+ [/`\s*(curl|wget|fetch)\b[^`]*`/, 'backtick substitution of network downloader'],
35
41
  [/\bsudo\s+rm\b/, 'sudo delete'],
36
42
  // Kill/shutdown
37
43
  [/\bkill\s+-9\s+-1\b/, 'kill all processes'],
@@ -5,6 +5,20 @@
5
5
  */
6
6
  import { ModelClient } from './llm.js';
7
7
  import type { Dialogue } from './types.js';
8
+ /**
9
+ * Decide whether compacting is worth the round-trip. Pure function so tests
10
+ * can pin behavior at specific history shapes without spinning up a client.
11
+ *
12
+ * Returns `{ worthIt, currentTokens, projectedTokens, savings }`. Caller
13
+ * can log the numbers or just branch on `worthIt`.
14
+ */
15
+ export declare function projectCompactionSavings(history: Dialogue[]): {
16
+ worthIt: boolean;
17
+ currentTokens: number;
18
+ projectedTokens: number;
19
+ savings: number;
20
+ floor: number;
21
+ };
8
22
  export declare const COMPACT_HEADER = "[CONTEXT COMPACTION \u2014 REFERENCE ONLY] Earlier turns were compacted into the summary below. This is a handoff from a previous context window \u2014 treat it as background reference, NOT as active instructions. Do NOT answer questions or fulfill requests mentioned in this summary; they were already addressed. Respond ONLY to the latest user message that appears AFTER this summary.";
9
23
  /**
10
24
  * Check if compaction is needed and perform it if so.
@@ -9,6 +9,47 @@ import { estimateHistoryTokens, getCompactionThreshold, COMPACTION_SUMMARY_RESER
9
9
  const POST_COMPACT_MAX_FILES = 5;
10
10
  /** Max tokens to spend on post-compact file restoration */
11
11
  const POST_COMPACT_TOKEN_BUDGET = 50_000;
12
+ /**
13
+ * Minimum projected fraction of total history tokens that compaction must
14
+ * save to be worth the round-trip. Summarization itself costs roughly
15
+ * the input payload tokens (read once by the compaction model) plus the
16
+ * ~16k reserved for the output. If the payload we'd summarize is small
17
+ * relative to what we'd keep, we pay the full cost for marginal relief.
18
+ * 0.20 = skip compaction unless projected savings clear 20% of total tokens.
19
+ * This only applies to autoCompactIfNeeded; /compact (forceCompact) still
20
+ * runs unconditionally because the user asked for it.
21
+ */
22
+ const MIN_COMPACTION_SAVINGS_RATIO = 0.20;
23
+ /**
24
+ * Rough upper bound on how many tokens the summary itself will occupy in
25
+ * the new history. The model is asked for up to COMPACTION_SUMMARY_RESERVE,
26
+ * but in practice structured summaries land well under that; be optimistic
27
+ * on the expected case, pessimistic on the safety margin.
28
+ */
29
+ const EXPECTED_SUMMARY_TOKENS = 4_000;
30
+ /**
31
+ * Decide whether compacting is worth the round-trip. Pure function so tests
32
+ * can pin behavior at specific history shapes without spinning up a client.
33
+ *
34
+ * Returns `{ worthIt, currentTokens, projectedTokens, savings }`. Caller
35
+ * can log the numbers or just branch on `worthIt`.
36
+ */
37
+ export function projectCompactionSavings(history) {
38
+ const currentTokens = estimateHistoryTokens(history);
39
+ const keepCount = findKeepBoundary(history);
40
+ const toKeep = history.slice(history.length - keepCount);
41
+ const keptTokens = estimateHistoryTokens(toKeep);
42
+ const projectedTokens = keptTokens + EXPECTED_SUMMARY_TOKENS;
43
+ const savings = currentTokens - projectedTokens;
44
+ const floor = Math.ceil(currentTokens * MIN_COMPACTION_SAVINGS_RATIO);
45
+ return {
46
+ worthIt: savings >= floor,
47
+ currentTokens,
48
+ projectedTokens,
49
+ savings,
50
+ floor,
51
+ };
52
+ }
12
53
  // Structured compaction prompt (pattern from nousresearch/hermes-agent
13
54
  // `agent/context_compressor.py`). The structured sections preserve more
14
55
  // signal than free-form summaries and make it easier for the model to
@@ -71,8 +112,23 @@ export async function autoCompactIfNeeded(history, model, client, debug) {
71
112
  if (currentTokens < threshold) {
72
113
  return { history, compacted: false };
73
114
  }
115
+ // ROI gate: project how much the summarization would actually save. The
116
+ // portion that survives compaction (`toKeep`) doesn't shrink, and the
117
+ // summary replaces `toSummarize` with ~EXPECTED_SUMMARY_TOKENS. If the
118
+ // resulting history is within MIN_COMPACTION_SAVINGS_RATIO of the current
119
+ // size, skip — the round-trip would cost more than the headroom is worth.
120
+ // The caller then falls back to per-turn emergency handling (413 recovery,
121
+ // output-tokens clamp) which is much cheaper on the margin.
122
+ const roi = projectCompactionSavings(history);
123
+ if (!roi.worthIt) {
124
+ if (debug) {
125
+ console.error(`[franklin] Compaction skipped (ROI): current=${roi.currentTokens}, projected=${roi.projectedTokens}, ` +
126
+ `savings=${roi.savings} < ${roi.floor} floor`);
127
+ }
128
+ return { history, compacted: false };
129
+ }
74
130
  if (debug) {
75
- console.error(`[franklin] Auto-compacting: ~${currentTokens} tokens, threshold=${threshold}`);
131
+ console.error(`[franklin] Auto-compacting: ~${currentTokens} tokens, threshold=${threshold}, projected savings=${roi.savings}`);
76
132
  }
77
133
  const beforeTokens = estimateHistoryTokens(history);
78
134
  try {
@@ -463,8 +463,10 @@ function readRuntimeWallet() {
463
463
  }
464
464
  // ─── Git Context ───────────────────────────────────────────────────────────
465
465
  const GIT_TIMEOUT_MS = 5_000;
466
- // Max chars for git log output — long commit messages can bloat the system prompt
467
- const MAX_GIT_LOG_CHARS = 2_000;
466
+ // Max chars for git log output — long commit messages can bloat the system prompt.
467
+ // Tightened from 2000: at typical 60-80 chars/commit, 800 comfortably fits
468
+ // the 3 commits we request below with headroom for long subjects.
469
+ const MAX_GIT_LOG_CHARS = 800;
468
470
  function getGitContext(workingDir) {
469
471
  const gitCmd = (cmd) => execSync(cmd, {
470
472
  cwd: workingDir,
@@ -516,9 +518,9 @@ function getGitContext(workingDir) {
516
518
  }
517
519
  }
518
520
  catch { /* ignore */ }
519
- // Recent commits
521
+ // Recent commits — 3 is enough for style/context matching; more just bloats every turn.
520
522
  try {
521
- let log = gitCmd('git log --oneline -5');
523
+ let log = gitCmd('git log --oneline -3');
522
524
  if (log) {
523
525
  if (log.length > MAX_GIT_LOG_CHARS) {
524
526
  log = log.slice(0, MAX_GIT_LOG_CHARS) + '\n... (truncated)';
package/dist/agent/llm.js CHANGED
@@ -32,6 +32,8 @@ import { USER_AGENT } from '../config.js';
32
32
  */
33
33
  export function modelHasExtendedThinking(model) {
34
34
  const m = model.toLowerCase();
35
+ // Excluded: Opus 4.7+ uses adaptive thinking; sending `thinking: enabled`
36
+ // causes the API to 400.
35
37
  if (m.includes('opus-4.7') || m.includes('opus-4-7'))
36
38
  return false;
37
39
  return (m.includes('opus-4.6') || m.includes('opus-4-6') ||
@@ -159,7 +161,6 @@ export class ModelClient {
159
161
  }
160
162
  if (isAnthropic) {
161
163
  // ─ Anthropic extended thinking ──────────────────────────────────────
162
- // Enable thinking for Claude models that support it (Opus 4.6, Sonnet 4.6).
163
164
  // Enable the `thinking` API block only for models that accept it.
164
165
  // Claude Opus 4.7 and newer use *adaptive* thinking (built-in, no API
165
166
  // flag); passing the extended-thinking flag to them makes Anthropic
@@ -12,6 +12,7 @@ import { StreamingExecutor } from './streaming-executor.js';
12
12
  import { optimizeHistory, CAPPED_MAX_TOKENS, ESCALATED_MAX_TOKENS, getMaxOutputTokens } from './optimize.js';
13
13
  import { classifyAgentError } from './error-classifier.js';
14
14
  import { SessionToolGuard } from './tool-guard.js';
15
+ import { resetToolSessionState } from '../tools/index.js';
15
16
  import { recordUsage } from '../stats/tracker.js';
16
17
  import { recordSessionUsage } from '../stats/session-tracker.js';
17
18
  import { appendAudit, extractLastUserPrompt } from '../stats/audit.js';
@@ -34,25 +35,74 @@ function replaceHistory(target, replacement) {
34
35
  // ─── Pushback detection ───────────────────────────────────────────────────
35
36
  // Cheap models plough forward when users correct them. This detects common
36
37
  // correction patterns so the agent can explicitly reset its approach.
37
- const PUSHBACK_PATTERNS = [
38
- /^(but|however|actually|wait|no+\b|hmm)\b/i,
38
+ //
39
+ // Precision-biased: we'd rather miss a real pushback than falsely trigger on
40
+ // casual disagreement ("But how do I deploy?"). False positives pollute the
41
+ // conversation and make the agent abandon working approaches unnecessarily.
42
+ // STRONG patterns: high-precision correction language. Fires even on short input.
43
+ const PUSHBACK_STRONG = [
39
44
  /\b(that'?s?\s+(wrong|incorrect|not\s+right)|you'?re?\s+wrong)\b/i,
40
- /\b(i\s+(said|told\s+you)|not\s+(what|that))\b/i,
45
+ /\b(i\s+(said|told\s+you)|not\s+what\s+i)\b/i,
46
+ /^(stop|wrong|incorrect|try\s+again)\b/i,
47
+ /^(不对|不是|错了|再试|重来)/,
48
+ ];
49
+ // WEAK patterns: common correction starters that also appear in casual speech.
50
+ // Require a corroborating signal (see detectPushback) to count as pushback.
51
+ const PUSHBACK_WEAK = [
52
+ /^(but|however|actually|wait|no+\b|hmm)\b/i,
41
53
  /\b(we\s+are\s+using|the\s+correct|the\s+actual)\b/i,
42
- /^(stop|no,|wrong|incorrect|try\s+again)\b/i,
43
- /^(不对|不是|错了|再试|但是|其实|等等|停|重来)/,
54
+ /^(但是|其实|等等|停)/,
44
55
  ];
56
+ /**
57
+ * True if the last assistant turn made a concrete claim worth pushing back
58
+ * against: executed a tool, wrote code, or produced a non-trivial answer.
59
+ * Casual assistant chatter doesn't warrant treating a "but" as a correction.
60
+ */
61
+ function lastAssistantHasClaim(history) {
62
+ for (let i = history.length - 1; i >= 0; i--) {
63
+ const msg = history[i];
64
+ if (msg.role !== 'assistant')
65
+ continue;
66
+ if (Array.isArray(msg.content)) {
67
+ for (const part of msg.content) {
68
+ const p = part;
69
+ if (p.type === 'tool_use')
70
+ return true;
71
+ if (p.type === 'text' && typeof p.text === 'string' && p.text.trim().length >= 40) {
72
+ return true;
73
+ }
74
+ }
75
+ return false;
76
+ }
77
+ if (typeof msg.content === 'string' && msg.content.trim().length >= 40)
78
+ return true;
79
+ return false;
80
+ }
81
+ return false;
82
+ }
45
83
  function detectPushback(input, history) {
46
84
  // Only count as pushback if there's a prior assistant turn to push back against.
47
85
  if (history.length === 0)
48
86
  return false;
49
- const hasPriorAssistant = history.some((m) => m.role === 'assistant');
50
- if (!hasPriorAssistant)
87
+ if (!lastAssistantHasClaim(history))
51
88
  return false;
52
89
  const trimmed = input.trim();
53
90
  if (trimmed.length === 0 || trimmed.length > 500)
54
91
  return false;
55
- return PUSHBACK_PATTERNS.some((re) => re.test(trimmed));
92
+ // Strong patterns: direct correction language — fire immediately.
93
+ if (PUSHBACK_STRONG.some((re) => re.test(trimmed)))
94
+ return true;
95
+ // Weak patterns: only count if the message is short (< 120 chars) AND doesn't
96
+ // also contain a fresh request. A weak starter followed by "can you also X"
97
+ // or "please do Y" is scope addition, not correction.
98
+ if (PUSHBACK_WEAK.some((re) => re.test(trimmed))) {
99
+ if (trimmed.length > 120)
100
+ return false;
101
+ if (/\b(can you|could you|please|also|add|include)\b/i.test(trimmed))
102
+ return false;
103
+ return true;
104
+ }
105
+ return false;
56
106
  }
57
107
  /**
58
108
  * Sanitize history: fix orphaned tool results AND inject missing results.
@@ -227,6 +277,13 @@ function getBackoffDelay(attempt, maxDelayMs = 32_000) {
227
277
  * Returns the accumulated conversation history.
228
278
  */
229
279
  export async function interactiveSession(config, getUserInput, onEvent, onAbortReady) {
280
+ // Clear module-level tool caches left over from a prior session in the same
281
+ // process. Matters when Franklin is used as a library or driven by tests
282
+ // that call interactiveSession() more than once — stale fileReadTracker /
283
+ // fetchCache / backgroundTasks entries from the previous run would otherwise
284
+ // fool Edit/Write into skipping the read-before-edit check or serve cached
285
+ // webfetch content fetched under the previous session's intent.
286
+ resetToolSessionState();
230
287
  const client = new ModelClient({
231
288
  apiUrl: config.apiUrl,
232
289
  chain: config.chain,
@@ -345,7 +402,9 @@ export async function interactiveSession(config, getUserInput, onEvent, onAbortR
345
402
  history.push({ role: 'user', content: effectiveInput });
346
403
  turnCount++;
347
404
  toolGuard.startTurn();
348
- persistSessionMessage({ role: 'user', content: effectiveInput });
405
+ // Persist the user's original message, not the injected SYSTEM NOTE scaffold.
406
+ // Resumed sessions should show what the user typed, not our internal prompt engineering.
407
+ persistSessionMessage({ role: 'user', content: input });
349
408
  // ── Model recovery: try original model at the start of each new turn ──
350
409
  // If we fell back to a free model last turn due to a transient error, try original again.
351
410
  // But DON'T reset if the original model had a payment failure — it will just fail again.
@@ -446,16 +505,27 @@ export async function interactiveSession(config, getUserInput, onEvent, onAbortR
446
505
  // ── Context awareness injection ──
447
506
  // Tell the model how full its context window is so it can self-regulate.
448
507
  // At high usage, nudge it to be concise and avoid unnecessary tool calls.
508
+ //
509
+ // IMPORTANT: this text is appended to the system prompt, which carries a
510
+ // prompt-cache breakpoint on Anthropic. Including the exact percentage
511
+ // invalidated the cache on every turn (the string differed by a digit).
512
+ // Bucketing the signal to coarse bands (>50 / >65 / >80) keeps the text
513
+ // byte-identical across many consecutive turns, so the cache actually
514
+ // holds. The model doesn't need 3% precision to self-regulate.
449
515
  const { contextUsagePct: preCallPct } = getAnchoredTokenCount(history);
450
- if (preCallPct > 50) {
451
- let contextNote = `# Context Window Status\nYou have used approximately ${Math.round(preCallPct)}% of your context window.`;
452
- if (preCallPct > 80) {
453
- contextNote += ' Context is critically full. Be extremely concise. Avoid re-reading files already in context. Prioritize completing the current task over exploring new questions.';
454
- }
455
- else if (preCallPct > 65) {
456
- contextNote += ' Be concise in responses. Avoid unnecessary tool calls. Do not re-read files you already have in context.';
457
- }
458
- systemParts.push(contextNote);
516
+ if (preCallPct > 80) {
517
+ systemParts.push('# Context Window Status\nContext window is critically full (>80%). ' +
518
+ 'Be extremely concise. Avoid re-reading files already in context. ' +
519
+ 'Prioritize completing the current task over exploring new questions.');
520
+ }
521
+ else if (preCallPct > 65) {
522
+ systemParts.push('# Context Window Status\nContext window is more than two-thirds full (>65%). ' +
523
+ 'Be concise in responses. Avoid unnecessary tool calls. ' +
524
+ 'Do not re-read files you already have in context.');
525
+ }
526
+ else if (preCallPct > 50) {
527
+ systemParts.push('# Context Window Status\nContext window has crossed the halfway mark (>50%). ' +
528
+ 'Prefer concise responses and batch tool calls when possible.');
459
529
  }
460
530
  const systemPrompt = systemParts.join('\n\n');
461
531
  const modelMaxOut = getMaxOutputTokens(config.model);
@@ -21,6 +21,10 @@ export const CAPPED_MAX_TOKENS = 16_384;
21
21
  export const ESCALATED_MAX_TOKENS = 65_536;
22
22
  /** Per-model max output tokens — prevents requesting more than the model supports */
23
23
  const MODEL_MAX_OUTPUT = {
24
+ // Opus 4.7 supports 128k output per the BlockRun gateway model entry
25
+ // (anthropic/claude-opus-4.7 maxOutput: 128000). Bumping from 32k to
26
+ // 128k unlocks the full headroom — runaway generations are gated
27
+ // separately by CAPPED_MAX_TOKENS / ESCALATED_MAX_TOKENS budgets.
24
28
  'anthropic/claude-opus-4.7': 128_000,
25
29
  'anthropic/claude-opus-4.6': 32_000,
26
30
  'anthropic/claude-sonnet-4.6': 64_000,
@@ -31,9 +31,13 @@ export declare function setEstimationModel(model: string): void;
31
31
  * Estimate token count for a string using byte-length heuristic.
32
32
  * JSON-heavy content uses 2 bytes/token; general text uses model-specific ratio.
33
33
  *
34
- * Padding reduced from 1.33x to 1.15x to prevent premature compaction.
35
- * The old 1.33x + ceil() combo caused ~36% overestimation, triggering
36
- * auto-compact when context was still 15-20% below the actual limit.
34
+ * Padding history:
35
+ * 1.33x ~36% overestimate, auto-compact fired 15-20% below real limit.
36
+ * 1.15x still triggered compaction around 60% of real context.
37
+ * 1.05x (current) — combined with Math.ceil() this still leaves a small
38
+ * safety margin, and the LLM surfaces a hard 413/context error long before
39
+ * the real limit that recovery code can handle. Net effect: fewer
40
+ * unnecessary (and expensive) compaction round-trips on mid-sized sessions.
37
41
  */
38
42
  export declare function estimateTokens(text: string, bytesPerToken?: number): number;
39
43
  /**
@@ -91,14 +91,17 @@ export function setEstimationModel(model) {
91
91
  * Estimate token count for a string using byte-length heuristic.
92
92
  * JSON-heavy content uses 2 bytes/token; general text uses model-specific ratio.
93
93
  *
94
- * Padding reduced from 1.33x to 1.15x to prevent premature compaction.
95
- * The old 1.33x + ceil() combo caused ~36% overestimation, triggering
96
- * auto-compact when context was still 15-20% below the actual limit.
94
+ * Padding history:
95
+ * 1.33x ~36% overestimate, auto-compact fired 15-20% below real limit.
96
+ * 1.15x still triggered compaction around 60% of real context.
97
+ * 1.05x (current) — combined with Math.ceil() this still leaves a small
98
+ * safety margin, and the LLM surfaces a hard 413/context error long before
99
+ * the real limit that recovery code can handle. Net effect: fewer
100
+ * unnecessary (and expensive) compaction round-trips on mid-sized sessions.
97
101
  */
98
102
  export function estimateTokens(text, bytesPerToken) {
99
103
  const effectiveBPT = bytesPerToken ?? getModelBytesPerToken(_currentModel);
100
- // Pad by 15% for safety margin — still conservative but not premature
101
- return Math.ceil(Buffer.byteLength(text, 'utf-8') / effectiveBPT * 1.15);
104
+ return Math.ceil(Buffer.byteLength(text, 'utf-8') / effectiveBPT * 1.05);
102
105
  }
103
106
  /**
104
107
  * Estimate tokens for a content part.
@@ -150,8 +153,12 @@ export function estimateHistoryTokens(history) {
150
153
  * Context window sizes for known models.
151
154
  */
152
155
  const MODEL_CONTEXT_WINDOWS = {
153
- // Anthropic
154
- 'anthropic/claude-opus-4.7': 1_000_000,
156
+ // Anthropic. The BlockRun gateway model entry advertises 1M context for
157
+ // Opus 4.7, but the 1M beta header may not be enabled at the gateway
158
+ // edge yet — sending more than 200k without it 413s. Keep 200k as the
159
+ // safe Franklin baseline; bump to 1_000_000 in a separate commit once
160
+ // a real >200k call has been verified end-to-end.
161
+ 'anthropic/claude-opus-4.7': 200_000,
155
162
  'anthropic/claude-opus-4.6': 200_000,
156
163
  'anthropic/claude-sonnet-4.6': 200_000,
157
164
  'anthropic/claude-sonnet-4': 200_000,
@@ -7,6 +7,55 @@ const SEARCH_FAMILY_SIMILARITY = 0.58;
7
7
  const DUPLICATE_READ_TURN_WINDOW = 1;
8
8
  const DUPLICATE_FETCH_TURN_WINDOW = 1;
9
9
  const MAX_PREVIEW_CHARS = 320;
10
+ // Commands that mutate state or have side effects — never dedup these.
11
+ // Covers: filesystem writes, network downloads, package managers, container/orchestration,
12
+ // git mutations, privileged escalation, archive ops, and output redirection.
13
+ // Hoisted to module scope so beforeBash/afterBash don't recompile on every call.
14
+ // Normalize a filesystem path for cache-key use: collapse whitespace and strip
15
+ // a single trailing slash (so `/foo` and `/foo/` share a cache entry).
16
+ function normalizePath(p) {
17
+ const trimmed = p.trim().replace(/\s+/g, ' ');
18
+ if (trimmed.length > 1 && trimmed.endsWith('/'))
19
+ return trimmed.slice(0, -1);
20
+ return trimmed;
21
+ }
22
+ // Build a stable Grep cache key — or return '' if the call isn't dedupable.
23
+ // Pattern is case-sensitive by design (grep semantics), but path/glob/type
24
+ // are normalized so cosmetic variation doesn't bypass dedup.
25
+ function grepKey(invocation) {
26
+ const pattern = String(invocation.input.pattern ?? '').trim();
27
+ if (!pattern)
28
+ return '';
29
+ const path = normalizePath(String(invocation.input.path ?? ''));
30
+ const glob = String(invocation.input.glob ?? '').trim().replace(/\s+/g, ' ');
31
+ const type = String(invocation.input.type ?? '').trim();
32
+ return `${pattern}::${path}::${glob}::${type}`;
33
+ }
34
+ function globKey(invocation) {
35
+ const pattern = String(invocation.input.pattern ?? '').trim().replace(/\s+/g, ' ');
36
+ if (!pattern)
37
+ return '';
38
+ const path = normalizePath(String(invocation.input.path ?? ''));
39
+ return `${pattern}::${path}`;
40
+ }
41
+ const WRITE_KEYWORDS = (() => {
42
+ const words = [
43
+ 'rm', 'mv', 'cp', 'mkdir', 'touch', 'chmod', 'chown', 'ln',
44
+ 'write', 'install', 'uninstall', 'build', 'publish',
45
+ 'push', 'pull', 'fetch', 'clone',
46
+ 'curl', 'wget', 'scp', 'rsync',
47
+ 'npm', 'pnpm', 'yarn', 'bun', 'pip', 'pipx', 'poetry', 'cargo', 'gem',
48
+ 'apt', 'apt-get', 'brew', 'port', 'dnf', 'yum', 'pacman',
49
+ 'make', 'cmake', 'gradle', 'mvn',
50
+ 'go\\s+(?:build|run|test|install|mod)',
51
+ 'git\\s+(?:push|pull|commit|merge|rebase|reset|clean|stash|checkout|add|rm|mv|fetch|clone|revert|cherry-pick)',
52
+ 'docker', 'podman', 'kubectl', 'helm',
53
+ 'tar', 'zip', 'unzip', 'gzip', 'bzip2',
54
+ 'tee', 'sudo', 'doas',
55
+ ];
56
+ // Redirect operators are not word chars — match separately, not under \b.
57
+ return new RegExp(`(?:\\b(?:${words.join('|')})\\b|>>?\\s)`);
58
+ })();
10
59
  const SEARCH_STOPWORDS = new Set([
11
60
  'a', 'an', 'and', 'april', 'at', 'builder', 'builders', 'com', 'developer',
12
61
  'developers', 'for', 'from', 'in', 'latest', 'live', 'may', 'of', 'on', 'or',
@@ -135,10 +184,10 @@ export class SessionToolGuard {
135
184
  if (!cmd)
136
185
  return null;
137
186
  // Only dedup deterministic read-only commands. Skip anything writing/network/long-running.
138
- const writeKeywords = /\b(rm|mv|cp|mkdir|touch|chmod|chown|write|install|build|publish|push|pull|curl|wget|fetch|npm|pnpm|yarn|pip|cargo|go\s+(build|run|test)|docker|kubectl|tar|zip|unzip|tee|>\s|>>\s)\b/;
139
- if (writeKeywords.test(cmd))
187
+ if (WRITE_KEYWORDS.test(cmd))
140
188
  return null;
141
- const key = cmd;
189
+ // Normalize whitespace so "ls -la" and "ls -la" share a cache entry.
190
+ const key = cmd.replace(/\s+/g, ' ');
142
191
  const cached = this.recentBash.get(key);
143
192
  if (cached) {
144
193
  const lead = cached.isError
@@ -152,13 +201,9 @@ export class SessionToolGuard {
152
201
  return null;
153
202
  }
154
203
  beforeGrep(invocation) {
155
- const pattern = String(invocation.input.pattern ?? '').trim();
156
- const path = String(invocation.input.path ?? '').trim();
157
- const glob = String(invocation.input.glob ?? '').trim();
158
- const type = String(invocation.input.type ?? '').trim();
159
- if (!pattern)
204
+ const key = grepKey(invocation);
205
+ if (!key)
160
206
  return null;
161
- const key = `${pattern}::${path}::${glob}::${type}`;
162
207
  const cached = this.recentGreps.get(key);
163
208
  if (cached) {
164
209
  return {
@@ -169,11 +214,9 @@ export class SessionToolGuard {
169
214
  return null;
170
215
  }
171
216
  beforeGlob(invocation) {
172
- const pattern = String(invocation.input.pattern ?? '').trim();
173
- const path = String(invocation.input.path ?? '').trim();
174
- if (!pattern)
217
+ const key = globKey(invocation);
218
+ if (!key)
175
219
  return null;
176
- const key = `${pattern}::${path}`;
177
220
  const cached = this.recentGlobs.get(key);
178
221
  if (cached) {
179
222
  return {
@@ -216,23 +259,20 @@ export class SessionToolGuard {
216
259
  const cmd = String(invocation.input.command ?? '').trim();
217
260
  if (!cmd)
218
261
  return;
219
- const writeKeywords = /\b(rm|mv|cp|mkdir|touch|chmod|chown|write|install|build|publish|push|pull|curl|wget|fetch|npm|pnpm|yarn|pip|cargo|go\s+(build|run|test)|docker|kubectl|tar|zip|unzip|tee|>\s|>>\s)\b/;
220
- if (writeKeywords.test(cmd))
262
+ if (WRITE_KEYWORDS.test(cmd))
221
263
  return;
222
264
  const output = String(result.output ?? '');
223
265
  const preview = output.length > MAX_PREVIEW_CHARS
224
266
  ? output.slice(0, MAX_PREVIEW_CHARS) + '…'
225
267
  : output;
226
- this.recentBash.set(cmd, { preview, turn: this.turn, isError: !!result.isError });
268
+ // Match the normalization used in beforeBash so reads/writes share keys.
269
+ const key = cmd.replace(/\s+/g, ' ');
270
+ this.recentBash.set(key, { preview, turn: this.turn, isError: !!result.isError });
227
271
  }
228
272
  afterGrep(invocation, result) {
229
- const pattern = String(invocation.input.pattern ?? '').trim();
230
- const path = String(invocation.input.path ?? '').trim();
231
- const glob = String(invocation.input.glob ?? '').trim();
232
- const type = String(invocation.input.type ?? '').trim();
233
- if (!pattern)
273
+ const key = grepKey(invocation);
274
+ if (!key)
234
275
  return;
235
- const key = `${pattern}::${path}::${glob}::${type}`;
236
276
  const output = String(result.output ?? '');
237
277
  const preview = output.length > MAX_PREVIEW_CHARS
238
278
  ? output.slice(0, MAX_PREVIEW_CHARS) + '…'
@@ -240,11 +280,9 @@ export class SessionToolGuard {
240
280
  this.recentGreps.set(key, { preview, turn: this.turn });
241
281
  }
242
282
  afterGlob(invocation, result) {
243
- const pattern = String(invocation.input.pattern ?? '').trim();
244
- const path = String(invocation.input.path ?? '').trim();
245
- if (!pattern)
283
+ const key = globKey(invocation);
284
+ if (!key)
246
285
  return;
247
- const key = `${pattern}::${path}`;
248
286
  const output = String(result.output ?? '');
249
287
  const preview = output.length > MAX_PREVIEW_CHARS
250
288
  ? output.slice(0, MAX_PREVIEW_CHARS) + '…'
@@ -0,0 +1,14 @@
1
+ /**
2
+ * Best-effort pricing estimate for image generation models Franklin routes
3
+ * through the BlockRun gateway. Numbers are drawn from published model
4
+ * pricing and should be treated as *estimates* — the x402 micropayment is
5
+ * what actually debits the wallet. The purpose of this table is to attach a
6
+ * USD cost to a generated asset so budget tracking on a Content piece has
7
+ * something to count against, not to promise an exact price.
8
+ *
9
+ * Kept in `content/` (not `tools/`) because the table is content-budget
10
+ * business logic, not an image-generation implementation detail. If the
11
+ * gateway ever exposes the realized payment amount on the response, that
12
+ * should be preferred — fall back to this estimate when it's missing.
13
+ */
14
+ export declare function estimateImageCostUsd(model: string, size: string): number;
@@ -0,0 +1,32 @@
1
+ /**
2
+ * Best-effort pricing estimate for image generation models Franklin routes
3
+ * through the BlockRun gateway. Numbers are drawn from published model
4
+ * pricing and should be treated as *estimates* — the x402 micropayment is
5
+ * what actually debits the wallet. The purpose of this table is to attach a
6
+ * USD cost to a generated asset so budget tracking on a Content piece has
7
+ * something to count against, not to promise an exact price.
8
+ *
9
+ * Kept in `content/` (not `tools/`) because the table is content-budget
10
+ * business logic, not an image-generation implementation detail. If the
11
+ * gateway ever exposes the realized payment amount on the response, that
12
+ * should be preferred — fall back to this estimate when it's missing.
13
+ */
14
+ export function estimateImageCostUsd(model, size) {
15
+ const m = model.toLowerCase();
16
+ const s = size.replace(/\s+/g, '');
17
+ if (m === 'openai/dall-e-3') {
18
+ if (s === '1792x1024' || s === '1024x1792')
19
+ return 0.08;
20
+ // All other sizes fall back to the standard 1024x1024 tier.
21
+ return 0.04;
22
+ }
23
+ if (m === 'openai/gpt-image-1') {
24
+ // gpt-image-1 standard tier; larger sizes would tier up but Franklin
25
+ // sends 1024x1024 as default.
26
+ return 0.042;
27
+ }
28
+ // Unknown model: return 0 rather than a guess. A free/custom model should
29
+ // not have a phantom charge against the Content budget, and surprise
30
+ // overcharging from a wrong guess is worse than under-counting.
31
+ return 0;
32
+ }
@@ -0,0 +1,63 @@
1
+ export type ContentType = 'x-thread' | 'blog' | 'podcast' | 'video' | 'ad-copy' | 'image';
2
+ export type ContentStatus = 'outline' | 'drafting' | 'assets' | 'review' | 'published';
3
+ export type AssetKind = 'image' | 'audio' | 'video' | 'text';
4
+ export interface ContentAsset {
5
+ kind: AssetKind;
6
+ /** Producer of the asset: model ID like "openai/dall-e-3", or "manual". */
7
+ source: string;
8
+ /** USD actually spent producing this asset. 0 is valid (free models). */
9
+ costUsd: number;
10
+ /** Optional payload reference — URL, file path, or short inline text. */
11
+ data?: string;
12
+ createdAt: number;
13
+ }
14
+ export interface ContentDraft {
15
+ text: string;
16
+ createdAt: number;
17
+ }
18
+ export interface DistributionEntry {
19
+ channel: string;
20
+ url?: string;
21
+ at: number;
22
+ }
23
+ export interface Content {
24
+ id: string;
25
+ type: ContentType;
26
+ title: string;
27
+ status: ContentStatus;
28
+ outline?: string;
29
+ drafts: ContentDraft[];
30
+ assets: ContentAsset[];
31
+ spentUsd: number;
32
+ budgetUsd: number;
33
+ createdAt: number;
34
+ publishedAt?: number;
35
+ distribution: DistributionEntry[];
36
+ }
37
+ export interface CreateContentOptions {
38
+ type: ContentType;
39
+ title: string;
40
+ budgetUsd: number;
41
+ }
42
+ export declare class ContentLibrary {
43
+ private byId;
44
+ create(opts: CreateContentOptions): Content;
45
+ get(id: string): Content | undefined;
46
+ list(): Content[];
47
+ /** Replace a content record wholesale — used by the persistence layer. */
48
+ restore(content: Content): void;
49
+ /**
50
+ * Record a generated asset against a content, enforcing the budget cap.
51
+ * Returns `{ ok: false, reason }` on rejection so callers (including the
52
+ * agent-facing capability) can surface the reason instead of catching an
53
+ * exception. On the happy path mutates the Content in place and returns
54
+ * the updated spendUsd.
55
+ */
56
+ addAsset(id: string, asset: Omit<ContentAsset, 'createdAt'>): {
57
+ ok: true;
58
+ spentUsd: number;
59
+ } | {
60
+ ok: false;
61
+ reason: string;
62
+ };
63
+ }