@blockrun/franklin 3.7.10 → 3.8.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/agent/bash-guard.js +8 -2
- package/dist/agent/compact.d.ts +14 -0
- package/dist/agent/compact.js +57 -1
- package/dist/agent/context.js +6 -4
- package/dist/agent/llm.js +2 -1
- package/dist/agent/loop.js +88 -18
- package/dist/agent/optimize.js +4 -0
- package/dist/agent/tokens.d.ts +7 -3
- package/dist/agent/tokens.js +14 -7
- package/dist/agent/tool-guard.js +64 -26
- package/dist/content/image-pricing.d.ts +14 -0
- package/dist/content/image-pricing.js +32 -0
- package/dist/content/library.d.ts +63 -0
- package/dist/content/library.js +75 -0
- package/dist/content/record-image.d.ts +43 -0
- package/dist/content/record-image.js +50 -0
- package/dist/content/store.d.ts +15 -0
- package/dist/content/store.js +55 -0
- package/dist/index.js +0 -0
- package/dist/pricing.d.ts +1 -1
- package/dist/pricing.js +2 -2
- package/dist/router/index.js +17 -6
- package/dist/tools/bash.d.ts +8 -0
- package/dist/tools/bash.js +13 -0
- package/dist/tools/content-execute.d.ts +26 -0
- package/dist/tools/content-execute.js +212 -0
- package/dist/tools/imagegen.d.ts +14 -0
- package/dist/tools/imagegen.js +164 -101
- package/dist/tools/index.d.ts +6 -0
- package/dist/tools/index.js +91 -5
- package/dist/tools/read.d.ts +13 -0
- package/dist/tools/read.js +17 -0
- package/dist/tools/trading-execute.d.ts +35 -0
- package/dist/tools/trading-execute.js +297 -0
- package/dist/tools/webfetch.d.ts +6 -0
- package/dist/tools/webfetch.js +8 -0
- package/dist/trading/engine.d.ts +51 -0
- package/dist/trading/engine.js +75 -0
- package/dist/trading/live-exchange.d.ts +43 -0
- package/dist/trading/live-exchange.js +48 -0
- package/dist/trading/mock-exchange.d.ts +40 -0
- package/dist/trading/mock-exchange.js +41 -0
- package/dist/trading/portfolio.d.ts +67 -0
- package/dist/trading/portfolio.js +106 -0
- package/dist/trading/risk.d.ts +34 -0
- package/dist/trading/risk.js +64 -0
- package/dist/trading/store.d.ts +9 -0
- package/dist/trading/store.js +32 -0
- package/dist/trading/trade-log.d.ts +39 -0
- package/dist/trading/trade-log.js +81 -0
- package/package.json +1 -1
- package/dist/commands/history.d.ts +0 -5
- package/dist/commands/history.js +0 -31
- package/dist/plugins-bundled/social/index.d.ts +0 -10
- package/dist/plugins-bundled/social/index.js +0 -363
- package/dist/plugins-bundled/social/plugin.json +0 -14
- package/dist/plugins-bundled/social/prompts.d.ts +0 -19
- package/dist/plugins-bundled/social/prompts.js +0 -67
- package/dist/plugins-bundled/social/types.d.ts +0 -58
- package/dist/plugins-bundled/social/types.js +0 -16
package/dist/agent/bash-guard.js
CHANGED
|
@@ -30,8 +30,14 @@ const DANGEROUS_PATTERNS = [
|
|
|
30
30
|
[/\bTRUNCATE\s+TABLE\b/i, 'truncate table'],
|
|
31
31
|
// System-level danger
|
|
32
32
|
[/\bchmod\s+(-R\s+)?777\b/, 'world-writable permissions'],
|
|
33
|
-
|
|
34
|
-
|
|
33
|
+
// Pipe-to-shell: catch sudo/env prefixes and common shell variants (bash/sh/zsh/ksh/dash/fish).
|
|
34
|
+
// The optional `-e`/`-x` flags after the shell binary are intentionally allowed by \b;
|
|
35
|
+
// what we block is the routing of downloaded content into an interpreter.
|
|
36
|
+
[/\bcurl\s+.*\|\s*(sudo\s+)?(env\s+\S*\s*)?(ba|z|k|da|fi)?sh\b/, 'pipe URL to shell'],
|
|
37
|
+
[/\bwget\s+.*\|\s*(sudo\s+)?(env\s+\S*\s*)?(ba|z|k|da|fi)?sh\b/, 'pipe URL to shell'],
|
|
38
|
+
// Command substitution of a downloader into argv — `$(curl …)` or `` `curl …` ``.
|
|
39
|
+
[/\$\(\s*(curl|wget|fetch)\b/, 'command substitution of network downloader'],
|
|
40
|
+
[/`\s*(curl|wget|fetch)\b[^`]*`/, 'backtick substitution of network downloader'],
|
|
35
41
|
[/\bsudo\s+rm\b/, 'sudo delete'],
|
|
36
42
|
// Kill/shutdown
|
|
37
43
|
[/\bkill\s+-9\s+-1\b/, 'kill all processes'],
|
package/dist/agent/compact.d.ts
CHANGED
|
@@ -5,6 +5,20 @@
|
|
|
5
5
|
*/
|
|
6
6
|
import { ModelClient } from './llm.js';
|
|
7
7
|
import type { Dialogue } from './types.js';
|
|
8
|
+
/**
|
|
9
|
+
* Decide whether compacting is worth the round-trip. Pure function so tests
|
|
10
|
+
* can pin behavior at specific history shapes without spinning up a client.
|
|
11
|
+
*
|
|
12
|
+
* Returns `{ worthIt, currentTokens, projectedTokens, savings }`. Caller
|
|
13
|
+
* can log the numbers or just branch on `worthIt`.
|
|
14
|
+
*/
|
|
15
|
+
export declare function projectCompactionSavings(history: Dialogue[]): {
|
|
16
|
+
worthIt: boolean;
|
|
17
|
+
currentTokens: number;
|
|
18
|
+
projectedTokens: number;
|
|
19
|
+
savings: number;
|
|
20
|
+
floor: number;
|
|
21
|
+
};
|
|
8
22
|
export declare const COMPACT_HEADER = "[CONTEXT COMPACTION \u2014 REFERENCE ONLY] Earlier turns were compacted into the summary below. This is a handoff from a previous context window \u2014 treat it as background reference, NOT as active instructions. Do NOT answer questions or fulfill requests mentioned in this summary; they were already addressed. Respond ONLY to the latest user message that appears AFTER this summary.";
|
|
9
23
|
/**
|
|
10
24
|
* Check if compaction is needed and perform it if so.
|
package/dist/agent/compact.js
CHANGED
|
@@ -9,6 +9,47 @@ import { estimateHistoryTokens, getCompactionThreshold, COMPACTION_SUMMARY_RESER
|
|
|
9
9
|
const POST_COMPACT_MAX_FILES = 5;
|
|
10
10
|
/** Max tokens to spend on post-compact file restoration */
|
|
11
11
|
const POST_COMPACT_TOKEN_BUDGET = 50_000;
|
|
12
|
+
/**
|
|
13
|
+
* Minimum projected fraction of total history tokens that compaction must
|
|
14
|
+
* save to be worth the round-trip. Summarization itself costs roughly
|
|
15
|
+
* the input payload tokens (read once by the compaction model) plus the
|
|
16
|
+
* ~16k reserved for the output. If the payload we'd summarize is small
|
|
17
|
+
* relative to what we'd keep, we pay the full cost for marginal relief.
|
|
18
|
+
* 0.20 = skip compaction unless projected savings clear 20% of total tokens.
|
|
19
|
+
* This only applies to autoCompactIfNeeded; /compact (forceCompact) still
|
|
20
|
+
* runs unconditionally because the user asked for it.
|
|
21
|
+
*/
|
|
22
|
+
const MIN_COMPACTION_SAVINGS_RATIO = 0.20;
|
|
23
|
+
/**
|
|
24
|
+
* Rough upper bound on how many tokens the summary itself will occupy in
|
|
25
|
+
* the new history. The model is asked for up to COMPACTION_SUMMARY_RESERVE,
|
|
26
|
+
* but in practice structured summaries land well under that; be optimistic
|
|
27
|
+
* on the expected case, pessimistic on the safety margin.
|
|
28
|
+
*/
|
|
29
|
+
const EXPECTED_SUMMARY_TOKENS = 4_000;
|
|
30
|
+
/**
|
|
31
|
+
* Decide whether compacting is worth the round-trip. Pure function so tests
|
|
32
|
+
* can pin behavior at specific history shapes without spinning up a client.
|
|
33
|
+
*
|
|
34
|
+
* Returns `{ worthIt, currentTokens, projectedTokens, savings }`. Caller
|
|
35
|
+
* can log the numbers or just branch on `worthIt`.
|
|
36
|
+
*/
|
|
37
|
+
export function projectCompactionSavings(history) {
|
|
38
|
+
const currentTokens = estimateHistoryTokens(history);
|
|
39
|
+
const keepCount = findKeepBoundary(history);
|
|
40
|
+
const toKeep = history.slice(history.length - keepCount);
|
|
41
|
+
const keptTokens = estimateHistoryTokens(toKeep);
|
|
42
|
+
const projectedTokens = keptTokens + EXPECTED_SUMMARY_TOKENS;
|
|
43
|
+
const savings = currentTokens - projectedTokens;
|
|
44
|
+
const floor = Math.ceil(currentTokens * MIN_COMPACTION_SAVINGS_RATIO);
|
|
45
|
+
return {
|
|
46
|
+
worthIt: savings >= floor,
|
|
47
|
+
currentTokens,
|
|
48
|
+
projectedTokens,
|
|
49
|
+
savings,
|
|
50
|
+
floor,
|
|
51
|
+
};
|
|
52
|
+
}
|
|
12
53
|
// Structured compaction prompt (pattern from nousresearch/hermes-agent
|
|
13
54
|
// `agent/context_compressor.py`). The structured sections preserve more
|
|
14
55
|
// signal than free-form summaries and make it easier for the model to
|
|
@@ -71,8 +112,23 @@ export async function autoCompactIfNeeded(history, model, client, debug) {
|
|
|
71
112
|
if (currentTokens < threshold) {
|
|
72
113
|
return { history, compacted: false };
|
|
73
114
|
}
|
|
115
|
+
// ROI gate: project how much the summarization would actually save. The
|
|
116
|
+
// portion that survives compaction (`toKeep`) doesn't shrink, and the
|
|
117
|
+
// summary replaces `toSummarize` with ~EXPECTED_SUMMARY_TOKENS. If the
|
|
118
|
+
// resulting history is within MIN_COMPACTION_SAVINGS_RATIO of the current
|
|
119
|
+
// size, skip — the round-trip would cost more than the headroom is worth.
|
|
120
|
+
// The caller then falls back to per-turn emergency handling (413 recovery,
|
|
121
|
+
// output-tokens clamp) which is much cheaper on the margin.
|
|
122
|
+
const roi = projectCompactionSavings(history);
|
|
123
|
+
if (!roi.worthIt) {
|
|
124
|
+
if (debug) {
|
|
125
|
+
console.error(`[franklin] Compaction skipped (ROI): current=${roi.currentTokens}, projected=${roi.projectedTokens}, ` +
|
|
126
|
+
`savings=${roi.savings} < ${roi.floor} floor`);
|
|
127
|
+
}
|
|
128
|
+
return { history, compacted: false };
|
|
129
|
+
}
|
|
74
130
|
if (debug) {
|
|
75
|
-
console.error(`[franklin] Auto-compacting: ~${currentTokens} tokens, threshold=${threshold}`);
|
|
131
|
+
console.error(`[franklin] Auto-compacting: ~${currentTokens} tokens, threshold=${threshold}, projected savings=${roi.savings}`);
|
|
76
132
|
}
|
|
77
133
|
const beforeTokens = estimateHistoryTokens(history);
|
|
78
134
|
try {
|
package/dist/agent/context.js
CHANGED
|
@@ -463,8 +463,10 @@ function readRuntimeWallet() {
|
|
|
463
463
|
}
|
|
464
464
|
// ─── Git Context ───────────────────────────────────────────────────────────
|
|
465
465
|
const GIT_TIMEOUT_MS = 5_000;
|
|
466
|
-
// Max chars for git log output — long commit messages can bloat the system prompt
|
|
467
|
-
|
|
466
|
+
// Max chars for git log output — long commit messages can bloat the system prompt.
|
|
467
|
+
// Tightened from 2000: at typical 60-80 chars/commit, 800 comfortably fits
|
|
468
|
+
// the 3 commits we request below with headroom for long subjects.
|
|
469
|
+
const MAX_GIT_LOG_CHARS = 800;
|
|
468
470
|
function getGitContext(workingDir) {
|
|
469
471
|
const gitCmd = (cmd) => execSync(cmd, {
|
|
470
472
|
cwd: workingDir,
|
|
@@ -516,9 +518,9 @@ function getGitContext(workingDir) {
|
|
|
516
518
|
}
|
|
517
519
|
}
|
|
518
520
|
catch { /* ignore */ }
|
|
519
|
-
// Recent commits
|
|
521
|
+
// Recent commits — 3 is enough for style/context matching; more just bloats every turn.
|
|
520
522
|
try {
|
|
521
|
-
let log = gitCmd('git log --oneline -
|
|
523
|
+
let log = gitCmd('git log --oneline -3');
|
|
522
524
|
if (log) {
|
|
523
525
|
if (log.length > MAX_GIT_LOG_CHARS) {
|
|
524
526
|
log = log.slice(0, MAX_GIT_LOG_CHARS) + '\n... (truncated)';
|
package/dist/agent/llm.js
CHANGED
|
@@ -32,6 +32,8 @@ import { USER_AGENT } from '../config.js';
|
|
|
32
32
|
*/
|
|
33
33
|
export function modelHasExtendedThinking(model) {
|
|
34
34
|
const m = model.toLowerCase();
|
|
35
|
+
// Excluded: Opus 4.7+ uses adaptive thinking; sending `thinking: enabled`
|
|
36
|
+
// causes the API to 400.
|
|
35
37
|
if (m.includes('opus-4.7') || m.includes('opus-4-7'))
|
|
36
38
|
return false;
|
|
37
39
|
return (m.includes('opus-4.6') || m.includes('opus-4-6') ||
|
|
@@ -159,7 +161,6 @@ export class ModelClient {
|
|
|
159
161
|
}
|
|
160
162
|
if (isAnthropic) {
|
|
161
163
|
// ─ Anthropic extended thinking ──────────────────────────────────────
|
|
162
|
-
// Enable thinking for Claude models that support it (Opus 4.6, Sonnet 4.6).
|
|
163
164
|
// Enable the `thinking` API block only for models that accept it.
|
|
164
165
|
// Claude Opus 4.7 and newer use *adaptive* thinking (built-in, no API
|
|
165
166
|
// flag); passing the extended-thinking flag to them makes Anthropic
|
package/dist/agent/loop.js
CHANGED
|
@@ -12,6 +12,7 @@ import { StreamingExecutor } from './streaming-executor.js';
|
|
|
12
12
|
import { optimizeHistory, CAPPED_MAX_TOKENS, ESCALATED_MAX_TOKENS, getMaxOutputTokens } from './optimize.js';
|
|
13
13
|
import { classifyAgentError } from './error-classifier.js';
|
|
14
14
|
import { SessionToolGuard } from './tool-guard.js';
|
|
15
|
+
import { resetToolSessionState } from '../tools/index.js';
|
|
15
16
|
import { recordUsage } from '../stats/tracker.js';
|
|
16
17
|
import { recordSessionUsage } from '../stats/session-tracker.js';
|
|
17
18
|
import { appendAudit, extractLastUserPrompt } from '../stats/audit.js';
|
|
@@ -34,25 +35,74 @@ function replaceHistory(target, replacement) {
|
|
|
34
35
|
// ─── Pushback detection ───────────────────────────────────────────────────
|
|
35
36
|
// Cheap models plough forward when users correct them. This detects common
|
|
36
37
|
// correction patterns so the agent can explicitly reset its approach.
|
|
37
|
-
|
|
38
|
-
|
|
38
|
+
//
|
|
39
|
+
// Precision-biased: we'd rather miss a real pushback than falsely trigger on
|
|
40
|
+
// casual disagreement ("But how do I deploy?"). False positives pollute the
|
|
41
|
+
// conversation and make the agent abandon working approaches unnecessarily.
|
|
42
|
+
// STRONG patterns: high-precision correction language. Fires even on short input.
|
|
43
|
+
const PUSHBACK_STRONG = [
|
|
39
44
|
/\b(that'?s?\s+(wrong|incorrect|not\s+right)|you'?re?\s+wrong)\b/i,
|
|
40
|
-
/\b(i\s+(said|told\s+you)|not\s+
|
|
45
|
+
/\b(i\s+(said|told\s+you)|not\s+what\s+i)\b/i,
|
|
46
|
+
/^(stop|wrong|incorrect|try\s+again)\b/i,
|
|
47
|
+
/^(不对|不是|错了|再试|重来)/,
|
|
48
|
+
];
|
|
49
|
+
// WEAK patterns: common correction starters that also appear in casual speech.
|
|
50
|
+
// Require a corroborating signal (see detectPushback) to count as pushback.
|
|
51
|
+
const PUSHBACK_WEAK = [
|
|
52
|
+
/^(but|however|actually|wait|no+\b|hmm)\b/i,
|
|
41
53
|
/\b(we\s+are\s+using|the\s+correct|the\s+actual)\b/i,
|
|
42
|
-
/^(
|
|
43
|
-
/^(不对|不是|错了|再试|但是|其实|等等|停|重来)/,
|
|
54
|
+
/^(但是|其实|等等|停)/,
|
|
44
55
|
];
|
|
56
|
+
/**
|
|
57
|
+
* True if the last assistant turn made a concrete claim worth pushing back
|
|
58
|
+
* against: executed a tool, wrote code, or produced a non-trivial answer.
|
|
59
|
+
* Casual assistant chatter doesn't warrant treating a "but" as a correction.
|
|
60
|
+
*/
|
|
61
|
+
function lastAssistantHasClaim(history) {
|
|
62
|
+
for (let i = history.length - 1; i >= 0; i--) {
|
|
63
|
+
const msg = history[i];
|
|
64
|
+
if (msg.role !== 'assistant')
|
|
65
|
+
continue;
|
|
66
|
+
if (Array.isArray(msg.content)) {
|
|
67
|
+
for (const part of msg.content) {
|
|
68
|
+
const p = part;
|
|
69
|
+
if (p.type === 'tool_use')
|
|
70
|
+
return true;
|
|
71
|
+
if (p.type === 'text' && typeof p.text === 'string' && p.text.trim().length >= 40) {
|
|
72
|
+
return true;
|
|
73
|
+
}
|
|
74
|
+
}
|
|
75
|
+
return false;
|
|
76
|
+
}
|
|
77
|
+
if (typeof msg.content === 'string' && msg.content.trim().length >= 40)
|
|
78
|
+
return true;
|
|
79
|
+
return false;
|
|
80
|
+
}
|
|
81
|
+
return false;
|
|
82
|
+
}
|
|
45
83
|
function detectPushback(input, history) {
|
|
46
84
|
// Only count as pushback if there's a prior assistant turn to push back against.
|
|
47
85
|
if (history.length === 0)
|
|
48
86
|
return false;
|
|
49
|
-
|
|
50
|
-
if (!hasPriorAssistant)
|
|
87
|
+
if (!lastAssistantHasClaim(history))
|
|
51
88
|
return false;
|
|
52
89
|
const trimmed = input.trim();
|
|
53
90
|
if (trimmed.length === 0 || trimmed.length > 500)
|
|
54
91
|
return false;
|
|
55
|
-
|
|
92
|
+
// Strong patterns: direct correction language — fire immediately.
|
|
93
|
+
if (PUSHBACK_STRONG.some((re) => re.test(trimmed)))
|
|
94
|
+
return true;
|
|
95
|
+
// Weak patterns: only count if the message is short (< 120 chars) AND doesn't
|
|
96
|
+
// also contain a fresh request. A weak starter followed by "can you also X"
|
|
97
|
+
// or "please do Y" is scope addition, not correction.
|
|
98
|
+
if (PUSHBACK_WEAK.some((re) => re.test(trimmed))) {
|
|
99
|
+
if (trimmed.length > 120)
|
|
100
|
+
return false;
|
|
101
|
+
if (/\b(can you|could you|please|also|add|include)\b/i.test(trimmed))
|
|
102
|
+
return false;
|
|
103
|
+
return true;
|
|
104
|
+
}
|
|
105
|
+
return false;
|
|
56
106
|
}
|
|
57
107
|
/**
|
|
58
108
|
* Sanitize history: fix orphaned tool results AND inject missing results.
|
|
@@ -227,6 +277,13 @@ function getBackoffDelay(attempt, maxDelayMs = 32_000) {
|
|
|
227
277
|
* Returns the accumulated conversation history.
|
|
228
278
|
*/
|
|
229
279
|
export async function interactiveSession(config, getUserInput, onEvent, onAbortReady) {
|
|
280
|
+
// Clear module-level tool caches left over from a prior session in the same
|
|
281
|
+
// process. Matters when Franklin is used as a library or driven by tests
|
|
282
|
+
// that call interactiveSession() more than once — stale fileReadTracker /
|
|
283
|
+
// fetchCache / backgroundTasks entries from the previous run would otherwise
|
|
284
|
+
// fool Edit/Write into skipping the read-before-edit check or serve cached
|
|
285
|
+
// webfetch content fetched under the previous session's intent.
|
|
286
|
+
resetToolSessionState();
|
|
230
287
|
const client = new ModelClient({
|
|
231
288
|
apiUrl: config.apiUrl,
|
|
232
289
|
chain: config.chain,
|
|
@@ -345,7 +402,9 @@ export async function interactiveSession(config, getUserInput, onEvent, onAbortR
|
|
|
345
402
|
history.push({ role: 'user', content: effectiveInput });
|
|
346
403
|
turnCount++;
|
|
347
404
|
toolGuard.startTurn();
|
|
348
|
-
|
|
405
|
+
// Persist the user's original message, not the injected SYSTEM NOTE scaffold.
|
|
406
|
+
// Resumed sessions should show what the user typed, not our internal prompt engineering.
|
|
407
|
+
persistSessionMessage({ role: 'user', content: input });
|
|
349
408
|
// ── Model recovery: try original model at the start of each new turn ──
|
|
350
409
|
// If we fell back to a free model last turn due to a transient error, try original again.
|
|
351
410
|
// But DON'T reset if the original model had a payment failure — it will just fail again.
|
|
@@ -446,16 +505,27 @@ export async function interactiveSession(config, getUserInput, onEvent, onAbortR
|
|
|
446
505
|
// ── Context awareness injection ──
|
|
447
506
|
// Tell the model how full its context window is so it can self-regulate.
|
|
448
507
|
// At high usage, nudge it to be concise and avoid unnecessary tool calls.
|
|
508
|
+
//
|
|
509
|
+
// IMPORTANT: this text is appended to the system prompt, which carries a
|
|
510
|
+
// prompt-cache breakpoint on Anthropic. Including the exact percentage
|
|
511
|
+
// invalidated the cache on every turn (the string differed by a digit).
|
|
512
|
+
// Bucketing the signal to coarse bands (>50 / >65 / >80) keeps the text
|
|
513
|
+
// byte-identical across many consecutive turns, so the cache actually
|
|
514
|
+
// holds. The model doesn't need 3% precision to self-regulate.
|
|
449
515
|
const { contextUsagePct: preCallPct } = getAnchoredTokenCount(history);
|
|
450
|
-
if (preCallPct >
|
|
451
|
-
|
|
452
|
-
|
|
453
|
-
|
|
454
|
-
|
|
455
|
-
|
|
456
|
-
|
|
457
|
-
|
|
458
|
-
|
|
516
|
+
if (preCallPct > 80) {
|
|
517
|
+
systemParts.push('# Context Window Status\nContext window is critically full (>80%). ' +
|
|
518
|
+
'Be extremely concise. Avoid re-reading files already in context. ' +
|
|
519
|
+
'Prioritize completing the current task over exploring new questions.');
|
|
520
|
+
}
|
|
521
|
+
else if (preCallPct > 65) {
|
|
522
|
+
systemParts.push('# Context Window Status\nContext window is more than two-thirds full (>65%). ' +
|
|
523
|
+
'Be concise in responses. Avoid unnecessary tool calls. ' +
|
|
524
|
+
'Do not re-read files you already have in context.');
|
|
525
|
+
}
|
|
526
|
+
else if (preCallPct > 50) {
|
|
527
|
+
systemParts.push('# Context Window Status\nContext window has crossed the halfway mark (>50%). ' +
|
|
528
|
+
'Prefer concise responses and batch tool calls when possible.');
|
|
459
529
|
}
|
|
460
530
|
const systemPrompt = systemParts.join('\n\n');
|
|
461
531
|
const modelMaxOut = getMaxOutputTokens(config.model);
|
package/dist/agent/optimize.js
CHANGED
|
@@ -21,6 +21,10 @@ export const CAPPED_MAX_TOKENS = 16_384;
|
|
|
21
21
|
export const ESCALATED_MAX_TOKENS = 65_536;
|
|
22
22
|
/** Per-model max output tokens — prevents requesting more than the model supports */
|
|
23
23
|
const MODEL_MAX_OUTPUT = {
|
|
24
|
+
// Opus 4.7 supports 128k output per the BlockRun gateway model entry
|
|
25
|
+
// (anthropic/claude-opus-4.7 maxOutput: 128000). Bumping from 32k to
|
|
26
|
+
// 128k unlocks the full headroom — runaway generations are gated
|
|
27
|
+
// separately by CAPPED_MAX_TOKENS / ESCALATED_MAX_TOKENS budgets.
|
|
24
28
|
'anthropic/claude-opus-4.7': 128_000,
|
|
25
29
|
'anthropic/claude-opus-4.6': 32_000,
|
|
26
30
|
'anthropic/claude-sonnet-4.6': 64_000,
|
package/dist/agent/tokens.d.ts
CHANGED
|
@@ -31,9 +31,13 @@ export declare function setEstimationModel(model: string): void;
|
|
|
31
31
|
* Estimate token count for a string using byte-length heuristic.
|
|
32
32
|
* JSON-heavy content uses 2 bytes/token; general text uses model-specific ratio.
|
|
33
33
|
*
|
|
34
|
-
* Padding
|
|
35
|
-
*
|
|
36
|
-
*
|
|
34
|
+
* Padding history:
|
|
35
|
+
* 1.33x → ~36% overestimate, auto-compact fired 15-20% below real limit.
|
|
36
|
+
* 1.15x → still triggered compaction around 60% of real context.
|
|
37
|
+
* 1.05x (current) — combined with Math.ceil() this still leaves a small
|
|
38
|
+
* safety margin, and the LLM surfaces a hard 413/context error long before
|
|
39
|
+
* the real limit that recovery code can handle. Net effect: fewer
|
|
40
|
+
* unnecessary (and expensive) compaction round-trips on mid-sized sessions.
|
|
37
41
|
*/
|
|
38
42
|
export declare function estimateTokens(text: string, bytesPerToken?: number): number;
|
|
39
43
|
/**
|
package/dist/agent/tokens.js
CHANGED
|
@@ -91,14 +91,17 @@ export function setEstimationModel(model) {
|
|
|
91
91
|
* Estimate token count for a string using byte-length heuristic.
|
|
92
92
|
* JSON-heavy content uses 2 bytes/token; general text uses model-specific ratio.
|
|
93
93
|
*
|
|
94
|
-
* Padding
|
|
95
|
-
*
|
|
96
|
-
*
|
|
94
|
+
* Padding history:
|
|
95
|
+
* 1.33x → ~36% overestimate, auto-compact fired 15-20% below real limit.
|
|
96
|
+
* 1.15x → still triggered compaction around 60% of real context.
|
|
97
|
+
* 1.05x (current) — combined with Math.ceil() this still leaves a small
|
|
98
|
+
* safety margin, and the LLM surfaces a hard 413/context error long before
|
|
99
|
+
* the real limit that recovery code can handle. Net effect: fewer
|
|
100
|
+
* unnecessary (and expensive) compaction round-trips on mid-sized sessions.
|
|
97
101
|
*/
|
|
98
102
|
export function estimateTokens(text, bytesPerToken) {
|
|
99
103
|
const effectiveBPT = bytesPerToken ?? getModelBytesPerToken(_currentModel);
|
|
100
|
-
|
|
101
|
-
return Math.ceil(Buffer.byteLength(text, 'utf-8') / effectiveBPT * 1.15);
|
|
104
|
+
return Math.ceil(Buffer.byteLength(text, 'utf-8') / effectiveBPT * 1.05);
|
|
102
105
|
}
|
|
103
106
|
/**
|
|
104
107
|
* Estimate tokens for a content part.
|
|
@@ -150,8 +153,12 @@ export function estimateHistoryTokens(history) {
|
|
|
150
153
|
* Context window sizes for known models.
|
|
151
154
|
*/
|
|
152
155
|
const MODEL_CONTEXT_WINDOWS = {
|
|
153
|
-
// Anthropic
|
|
154
|
-
|
|
156
|
+
// Anthropic. The BlockRun gateway model entry advertises 1M context for
|
|
157
|
+
// Opus 4.7, but the 1M beta header may not be enabled at the gateway
|
|
158
|
+
// edge yet — sending more than 200k without it 413s. Keep 200k as the
|
|
159
|
+
// safe Franklin baseline; bump to 1_000_000 in a separate commit once
|
|
160
|
+
// a real >200k call has been verified end-to-end.
|
|
161
|
+
'anthropic/claude-opus-4.7': 200_000,
|
|
155
162
|
'anthropic/claude-opus-4.6': 200_000,
|
|
156
163
|
'anthropic/claude-sonnet-4.6': 200_000,
|
|
157
164
|
'anthropic/claude-sonnet-4': 200_000,
|
package/dist/agent/tool-guard.js
CHANGED
|
@@ -7,6 +7,55 @@ const SEARCH_FAMILY_SIMILARITY = 0.58;
|
|
|
7
7
|
const DUPLICATE_READ_TURN_WINDOW = 1;
|
|
8
8
|
const DUPLICATE_FETCH_TURN_WINDOW = 1;
|
|
9
9
|
const MAX_PREVIEW_CHARS = 320;
|
|
10
|
+
// Commands that mutate state or have side effects — never dedup these.
|
|
11
|
+
// Covers: filesystem writes, network downloads, package managers, container/orchestration,
|
|
12
|
+
// git mutations, privileged escalation, archive ops, and output redirection.
|
|
13
|
+
// Hoisted to module scope so beforeBash/afterBash don't recompile on every call.
|
|
14
|
+
// Normalize a filesystem path for cache-key use: collapse whitespace and strip
|
|
15
|
+
// a single trailing slash (so `/foo` and `/foo/` share a cache entry).
|
|
16
|
+
function normalizePath(p) {
|
|
17
|
+
const trimmed = p.trim().replace(/\s+/g, ' ');
|
|
18
|
+
if (trimmed.length > 1 && trimmed.endsWith('/'))
|
|
19
|
+
return trimmed.slice(0, -1);
|
|
20
|
+
return trimmed;
|
|
21
|
+
}
|
|
22
|
+
// Build a stable Grep cache key — or return '' if the call isn't dedupable.
|
|
23
|
+
// Pattern is case-sensitive by design (grep semantics), but path/glob/type
|
|
24
|
+
// are normalized so cosmetic variation doesn't bypass dedup.
|
|
25
|
+
function grepKey(invocation) {
|
|
26
|
+
const pattern = String(invocation.input.pattern ?? '').trim();
|
|
27
|
+
if (!pattern)
|
|
28
|
+
return '';
|
|
29
|
+
const path = normalizePath(String(invocation.input.path ?? ''));
|
|
30
|
+
const glob = String(invocation.input.glob ?? '').trim().replace(/\s+/g, ' ');
|
|
31
|
+
const type = String(invocation.input.type ?? '').trim();
|
|
32
|
+
return `${pattern}::${path}::${glob}::${type}`;
|
|
33
|
+
}
|
|
34
|
+
function globKey(invocation) {
|
|
35
|
+
const pattern = String(invocation.input.pattern ?? '').trim().replace(/\s+/g, ' ');
|
|
36
|
+
if (!pattern)
|
|
37
|
+
return '';
|
|
38
|
+
const path = normalizePath(String(invocation.input.path ?? ''));
|
|
39
|
+
return `${pattern}::${path}`;
|
|
40
|
+
}
|
|
41
|
+
const WRITE_KEYWORDS = (() => {
|
|
42
|
+
const words = [
|
|
43
|
+
'rm', 'mv', 'cp', 'mkdir', 'touch', 'chmod', 'chown', 'ln',
|
|
44
|
+
'write', 'install', 'uninstall', 'build', 'publish',
|
|
45
|
+
'push', 'pull', 'fetch', 'clone',
|
|
46
|
+
'curl', 'wget', 'scp', 'rsync',
|
|
47
|
+
'npm', 'pnpm', 'yarn', 'bun', 'pip', 'pipx', 'poetry', 'cargo', 'gem',
|
|
48
|
+
'apt', 'apt-get', 'brew', 'port', 'dnf', 'yum', 'pacman',
|
|
49
|
+
'make', 'cmake', 'gradle', 'mvn',
|
|
50
|
+
'go\\s+(?:build|run|test|install|mod)',
|
|
51
|
+
'git\\s+(?:push|pull|commit|merge|rebase|reset|clean|stash|checkout|add|rm|mv|fetch|clone|revert|cherry-pick)',
|
|
52
|
+
'docker', 'podman', 'kubectl', 'helm',
|
|
53
|
+
'tar', 'zip', 'unzip', 'gzip', 'bzip2',
|
|
54
|
+
'tee', 'sudo', 'doas',
|
|
55
|
+
];
|
|
56
|
+
// Redirect operators are not word chars — match separately, not under \b.
|
|
57
|
+
return new RegExp(`(?:\\b(?:${words.join('|')})\\b|>>?\\s)`);
|
|
58
|
+
})();
|
|
10
59
|
const SEARCH_STOPWORDS = new Set([
|
|
11
60
|
'a', 'an', 'and', 'april', 'at', 'builder', 'builders', 'com', 'developer',
|
|
12
61
|
'developers', 'for', 'from', 'in', 'latest', 'live', 'may', 'of', 'on', 'or',
|
|
@@ -135,10 +184,10 @@ export class SessionToolGuard {
|
|
|
135
184
|
if (!cmd)
|
|
136
185
|
return null;
|
|
137
186
|
// Only dedup deterministic read-only commands. Skip anything writing/network/long-running.
|
|
138
|
-
|
|
139
|
-
if (writeKeywords.test(cmd))
|
|
187
|
+
if (WRITE_KEYWORDS.test(cmd))
|
|
140
188
|
return null;
|
|
141
|
-
|
|
189
|
+
// Normalize whitespace so "ls -la" and "ls -la" share a cache entry.
|
|
190
|
+
const key = cmd.replace(/\s+/g, ' ');
|
|
142
191
|
const cached = this.recentBash.get(key);
|
|
143
192
|
if (cached) {
|
|
144
193
|
const lead = cached.isError
|
|
@@ -152,13 +201,9 @@ export class SessionToolGuard {
|
|
|
152
201
|
return null;
|
|
153
202
|
}
|
|
154
203
|
beforeGrep(invocation) {
|
|
155
|
-
const
|
|
156
|
-
|
|
157
|
-
const glob = String(invocation.input.glob ?? '').trim();
|
|
158
|
-
const type = String(invocation.input.type ?? '').trim();
|
|
159
|
-
if (!pattern)
|
|
204
|
+
const key = grepKey(invocation);
|
|
205
|
+
if (!key)
|
|
160
206
|
return null;
|
|
161
|
-
const key = `${pattern}::${path}::${glob}::${type}`;
|
|
162
207
|
const cached = this.recentGreps.get(key);
|
|
163
208
|
if (cached) {
|
|
164
209
|
return {
|
|
@@ -169,11 +214,9 @@ export class SessionToolGuard {
|
|
|
169
214
|
return null;
|
|
170
215
|
}
|
|
171
216
|
beforeGlob(invocation) {
|
|
172
|
-
const
|
|
173
|
-
|
|
174
|
-
if (!pattern)
|
|
217
|
+
const key = globKey(invocation);
|
|
218
|
+
if (!key)
|
|
175
219
|
return null;
|
|
176
|
-
const key = `${pattern}::${path}`;
|
|
177
220
|
const cached = this.recentGlobs.get(key);
|
|
178
221
|
if (cached) {
|
|
179
222
|
return {
|
|
@@ -216,23 +259,20 @@ export class SessionToolGuard {
|
|
|
216
259
|
const cmd = String(invocation.input.command ?? '').trim();
|
|
217
260
|
if (!cmd)
|
|
218
261
|
return;
|
|
219
|
-
|
|
220
|
-
if (writeKeywords.test(cmd))
|
|
262
|
+
if (WRITE_KEYWORDS.test(cmd))
|
|
221
263
|
return;
|
|
222
264
|
const output = String(result.output ?? '');
|
|
223
265
|
const preview = output.length > MAX_PREVIEW_CHARS
|
|
224
266
|
? output.slice(0, MAX_PREVIEW_CHARS) + '…'
|
|
225
267
|
: output;
|
|
226
|
-
|
|
268
|
+
// Match the normalization used in beforeBash so reads/writes share keys.
|
|
269
|
+
const key = cmd.replace(/\s+/g, ' ');
|
|
270
|
+
this.recentBash.set(key, { preview, turn: this.turn, isError: !!result.isError });
|
|
227
271
|
}
|
|
228
272
|
afterGrep(invocation, result) {
|
|
229
|
-
const
|
|
230
|
-
|
|
231
|
-
const glob = String(invocation.input.glob ?? '').trim();
|
|
232
|
-
const type = String(invocation.input.type ?? '').trim();
|
|
233
|
-
if (!pattern)
|
|
273
|
+
const key = grepKey(invocation);
|
|
274
|
+
if (!key)
|
|
234
275
|
return;
|
|
235
|
-
const key = `${pattern}::${path}::${glob}::${type}`;
|
|
236
276
|
const output = String(result.output ?? '');
|
|
237
277
|
const preview = output.length > MAX_PREVIEW_CHARS
|
|
238
278
|
? output.slice(0, MAX_PREVIEW_CHARS) + '…'
|
|
@@ -240,11 +280,9 @@ export class SessionToolGuard {
|
|
|
240
280
|
this.recentGreps.set(key, { preview, turn: this.turn });
|
|
241
281
|
}
|
|
242
282
|
afterGlob(invocation, result) {
|
|
243
|
-
const
|
|
244
|
-
|
|
245
|
-
if (!pattern)
|
|
283
|
+
const key = globKey(invocation);
|
|
284
|
+
if (!key)
|
|
246
285
|
return;
|
|
247
|
-
const key = `${pattern}::${path}`;
|
|
248
286
|
const output = String(result.output ?? '');
|
|
249
287
|
const preview = output.length > MAX_PREVIEW_CHARS
|
|
250
288
|
? output.slice(0, MAX_PREVIEW_CHARS) + '…'
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Best-effort pricing estimate for image generation models Franklin routes
|
|
3
|
+
* through the BlockRun gateway. Numbers are drawn from published model
|
|
4
|
+
* pricing and should be treated as *estimates* — the x402 micropayment is
|
|
5
|
+
* what actually debits the wallet. The purpose of this table is to attach a
|
|
6
|
+
* USD cost to a generated asset so budget tracking on a Content piece has
|
|
7
|
+
* something to count against, not to promise an exact price.
|
|
8
|
+
*
|
|
9
|
+
* Kept in `content/` (not `tools/`) because the table is content-budget
|
|
10
|
+
* business logic, not an image-generation implementation detail. If the
|
|
11
|
+
* gateway ever exposes the realized payment amount on the response, that
|
|
12
|
+
* should be preferred — fall back to this estimate when it's missing.
|
|
13
|
+
*/
|
|
14
|
+
export declare function estimateImageCostUsd(model: string, size: string): number;
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Best-effort pricing estimate for image generation models Franklin routes
|
|
3
|
+
* through the BlockRun gateway. Numbers are drawn from published model
|
|
4
|
+
* pricing and should be treated as *estimates* — the x402 micropayment is
|
|
5
|
+
* what actually debits the wallet. The purpose of this table is to attach a
|
|
6
|
+
* USD cost to a generated asset so budget tracking on a Content piece has
|
|
7
|
+
* something to count against, not to promise an exact price.
|
|
8
|
+
*
|
|
9
|
+
* Kept in `content/` (not `tools/`) because the table is content-budget
|
|
10
|
+
* business logic, not an image-generation implementation detail. If the
|
|
11
|
+
* gateway ever exposes the realized payment amount on the response, that
|
|
12
|
+
* should be preferred — fall back to this estimate when it's missing.
|
|
13
|
+
*/
|
|
14
|
+
export function estimateImageCostUsd(model, size) {
|
|
15
|
+
const m = model.toLowerCase();
|
|
16
|
+
const s = size.replace(/\s+/g, '');
|
|
17
|
+
if (m === 'openai/dall-e-3') {
|
|
18
|
+
if (s === '1792x1024' || s === '1024x1792')
|
|
19
|
+
return 0.08;
|
|
20
|
+
// All other sizes fall back to the standard 1024x1024 tier.
|
|
21
|
+
return 0.04;
|
|
22
|
+
}
|
|
23
|
+
if (m === 'openai/gpt-image-1') {
|
|
24
|
+
// gpt-image-1 standard tier; larger sizes would tier up but Franklin
|
|
25
|
+
// sends 1024x1024 as default.
|
|
26
|
+
return 0.042;
|
|
27
|
+
}
|
|
28
|
+
// Unknown model: return 0 rather than a guess. A free/custom model should
|
|
29
|
+
// not have a phantom charge against the Content budget, and surprise
|
|
30
|
+
// overcharging from a wrong guess is worse than under-counting.
|
|
31
|
+
return 0;
|
|
32
|
+
}
|
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
export type ContentType = 'x-thread' | 'blog' | 'podcast' | 'video' | 'ad-copy' | 'image';
|
|
2
|
+
export type ContentStatus = 'outline' | 'drafting' | 'assets' | 'review' | 'published';
|
|
3
|
+
export type AssetKind = 'image' | 'audio' | 'video' | 'text';
|
|
4
|
+
export interface ContentAsset {
|
|
5
|
+
kind: AssetKind;
|
|
6
|
+
/** Producer of the asset: model ID like "openai/dall-e-3", or "manual". */
|
|
7
|
+
source: string;
|
|
8
|
+
/** USD actually spent producing this asset. 0 is valid (free models). */
|
|
9
|
+
costUsd: number;
|
|
10
|
+
/** Optional payload reference — URL, file path, or short inline text. */
|
|
11
|
+
data?: string;
|
|
12
|
+
createdAt: number;
|
|
13
|
+
}
|
|
14
|
+
export interface ContentDraft {
|
|
15
|
+
text: string;
|
|
16
|
+
createdAt: number;
|
|
17
|
+
}
|
|
18
|
+
export interface DistributionEntry {
|
|
19
|
+
channel: string;
|
|
20
|
+
url?: string;
|
|
21
|
+
at: number;
|
|
22
|
+
}
|
|
23
|
+
export interface Content {
|
|
24
|
+
id: string;
|
|
25
|
+
type: ContentType;
|
|
26
|
+
title: string;
|
|
27
|
+
status: ContentStatus;
|
|
28
|
+
outline?: string;
|
|
29
|
+
drafts: ContentDraft[];
|
|
30
|
+
assets: ContentAsset[];
|
|
31
|
+
spentUsd: number;
|
|
32
|
+
budgetUsd: number;
|
|
33
|
+
createdAt: number;
|
|
34
|
+
publishedAt?: number;
|
|
35
|
+
distribution: DistributionEntry[];
|
|
36
|
+
}
|
|
37
|
+
export interface CreateContentOptions {
|
|
38
|
+
type: ContentType;
|
|
39
|
+
title: string;
|
|
40
|
+
budgetUsd: number;
|
|
41
|
+
}
|
|
42
|
+
export declare class ContentLibrary {
|
|
43
|
+
private byId;
|
|
44
|
+
create(opts: CreateContentOptions): Content;
|
|
45
|
+
get(id: string): Content | undefined;
|
|
46
|
+
list(): Content[];
|
|
47
|
+
/** Replace a content record wholesale — used by the persistence layer. */
|
|
48
|
+
restore(content: Content): void;
|
|
49
|
+
/**
|
|
50
|
+
* Record a generated asset against a content, enforcing the budget cap.
|
|
51
|
+
* Returns `{ ok: false, reason }` on rejection so callers (including the
|
|
52
|
+
* agent-facing capability) can surface the reason instead of catching an
|
|
53
|
+
* exception. On the happy path mutates the Content in place and returns
|
|
54
|
+
* the updated spendUsd.
|
|
55
|
+
*/
|
|
56
|
+
addAsset(id: string, asset: Omit<ContentAsset, 'createdAt'>): {
|
|
57
|
+
ok: true;
|
|
58
|
+
spentUsd: number;
|
|
59
|
+
} | {
|
|
60
|
+
ok: false;
|
|
61
|
+
reason: string;
|
|
62
|
+
};
|
|
63
|
+
}
|