@blockrun/franklin 3.7.3 → 3.7.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/agent/loop.js +33 -2
- package/dist/agent/tool-guard.d.ts +6 -0
- package/dist/agent/tool-guard.js +72 -0
- package/dist/router/index.js +16 -7
- package/package.json +1 -1
package/dist/agent/loop.js
CHANGED
|
@@ -31,6 +31,29 @@ import { createSessionId, appendToSession, updateSessionMeta, pruneOldSessions,
|
|
|
31
31
|
function replaceHistory(target, replacement) {
|
|
32
32
|
target.splice(0, target.length, ...replacement);
|
|
33
33
|
}
|
|
34
|
+
// ─── Pushback detection ───────────────────────────────────────────────────
|
|
35
|
+
// Cheap models plough forward when users correct them. This detects common
|
|
36
|
+
// correction patterns so the agent can explicitly reset its approach.
|
|
37
|
+
const PUSHBACK_PATTERNS = [
|
|
38
|
+
/^(but|however|actually|wait|no+\b|hmm)\b/i,
|
|
39
|
+
/\b(that'?s?\s+(wrong|incorrect|not\s+right)|you'?re?\s+wrong)\b/i,
|
|
40
|
+
/\b(i\s+(said|told\s+you)|not\s+(what|that))\b/i,
|
|
41
|
+
/\b(we\s+are\s+using|the\s+correct|the\s+actual)\b/i,
|
|
42
|
+
/^(stop|no,|wrong|incorrect|try\s+again)\b/i,
|
|
43
|
+
/^(不对|不是|错了|再试|但是|其实|等等|停|重来)/,
|
|
44
|
+
];
|
|
45
|
+
function detectPushback(input, history) {
|
|
46
|
+
// Only count as pushback if there's a prior assistant turn to push back against.
|
|
47
|
+
if (history.length === 0)
|
|
48
|
+
return false;
|
|
49
|
+
const hasPriorAssistant = history.some((m) => m.role === 'assistant');
|
|
50
|
+
if (!hasPriorAssistant)
|
|
51
|
+
return false;
|
|
52
|
+
const trimmed = input.trim();
|
|
53
|
+
if (trimmed.length === 0 || trimmed.length > 500)
|
|
54
|
+
return false;
|
|
55
|
+
return PUSHBACK_PATTERNS.some((re) => re.test(trimmed));
|
|
56
|
+
}
|
|
34
57
|
/**
|
|
35
58
|
* Sanitize history: fix orphaned tool results AND inject missing results.
|
|
36
59
|
*
|
|
@@ -310,11 +333,19 @@ export async function interactiveSession(config, getUserInput, onEvent, onAbortR
|
|
|
310
333
|
input = cmdResult.rewritten;
|
|
311
334
|
}
|
|
312
335
|
}
|
|
336
|
+
// ── Pushback detection ──
|
|
337
|
+
// When the user corrects us ("no", "but", "actually", "wrong"), we must throw
|
|
338
|
+
// away the previous plan and reconsider — not continue the failing approach.
|
|
339
|
+
// Without this signal, cheap models tend to plough forward with the same bad idea.
|
|
340
|
+
const pushbackSignal = detectPushback(input, history);
|
|
341
|
+
const effectiveInput = pushbackSignal
|
|
342
|
+
? `${input}\n\n[SYSTEM NOTE] The user is correcting you. Your previous response was wrong or off-target. Do NOT continue the previous approach. Re-read the conversation, identify what specifically the user is correcting, and change your strategy. If the user pointed out a fact (e.g. "we are using X"), treat that fact as ground truth and rebuild your answer around it.`
|
|
343
|
+
: input;
|
|
313
344
|
lastUserInput = input;
|
|
314
|
-
history.push({ role: 'user', content:
|
|
345
|
+
history.push({ role: 'user', content: effectiveInput });
|
|
315
346
|
turnCount++;
|
|
316
347
|
toolGuard.startTurn();
|
|
317
|
-
persistSessionMessage({ role: 'user', content:
|
|
348
|
+
persistSessionMessage({ role: 'user', content: effectiveInput });
|
|
318
349
|
// ── Model recovery: try original model at the start of each new turn ──
|
|
319
350
|
// If we fell back to a free model last turn due to a transient error, try original again.
|
|
320
351
|
// But DON'T reset if the original model had a payment failure — it will just fail again.
|
|
@@ -14,9 +14,15 @@ export declare class SessionToolGuard {
|
|
|
14
14
|
private recentFetches;
|
|
15
15
|
private pendingFetches;
|
|
16
16
|
private toolErrorCounts;
|
|
17
|
+
private recentGreps;
|
|
18
|
+
private recentGlobs;
|
|
17
19
|
startTurn(): void;
|
|
18
20
|
beforeExecute(invocation: CapabilityInvocation, scope: ExecutionScope): Promise<CapabilityResult | null>;
|
|
21
|
+
private beforeGrep;
|
|
22
|
+
private beforeGlob;
|
|
19
23
|
afterExecute(invocation: CapabilityInvocation, result: CapabilityResult): void;
|
|
24
|
+
private afterGrep;
|
|
25
|
+
private afterGlob;
|
|
20
26
|
cancelInvocation(invocationId: string): void;
|
|
21
27
|
private beforeWebSearch;
|
|
22
28
|
private beforeRead;
|
package/dist/agent/tool-guard.js
CHANGED
|
@@ -90,6 +90,10 @@ export class SessionToolGuard {
|
|
|
90
90
|
recentFetches = new Map();
|
|
91
91
|
pendingFetches = new Map();
|
|
92
92
|
toolErrorCounts = new Map();
|
|
93
|
+
// Session-level dedup for code-search tools — agents love grep'ing the same pattern
|
|
94
|
+
// five times in a row when they're confused. Tell them once that it already failed.
|
|
95
|
+
recentGreps = new Map();
|
|
96
|
+
recentGlobs = new Map();
|
|
93
97
|
startTurn() {
|
|
94
98
|
this.turn++;
|
|
95
99
|
this.webSearchesThisTurn = 0;
|
|
@@ -115,10 +119,46 @@ export class SessionToolGuard {
|
|
|
115
119
|
return this.beforeRead(invocation, scope);
|
|
116
120
|
case 'WebFetch':
|
|
117
121
|
return this.beforeWebFetch(invocation);
|
|
122
|
+
case 'Grep':
|
|
123
|
+
return this.beforeGrep(invocation);
|
|
124
|
+
case 'Glob':
|
|
125
|
+
return this.beforeGlob(invocation);
|
|
118
126
|
default:
|
|
119
127
|
return null;
|
|
120
128
|
}
|
|
121
129
|
}
|
|
130
|
+
beforeGrep(invocation) {
|
|
131
|
+
const pattern = String(invocation.input.pattern ?? '').trim();
|
|
132
|
+
const path = String(invocation.input.path ?? '').trim();
|
|
133
|
+
const glob = String(invocation.input.glob ?? '').trim();
|
|
134
|
+
const type = String(invocation.input.type ?? '').trim();
|
|
135
|
+
if (!pattern)
|
|
136
|
+
return null;
|
|
137
|
+
const key = `${pattern}::${path}::${glob}::${type}`;
|
|
138
|
+
const cached = this.recentGreps.get(key);
|
|
139
|
+
if (cached) {
|
|
140
|
+
return {
|
|
141
|
+
output: `That exact Grep was already run this session and returned:\n${cached.preview}\n\n` +
|
|
142
|
+
'Do not re-run the same pattern. If you need different information, change the pattern, path, or try a different tool (Glob to list files, Read to see full content).',
|
|
143
|
+
};
|
|
144
|
+
}
|
|
145
|
+
return null;
|
|
146
|
+
}
|
|
147
|
+
beforeGlob(invocation) {
|
|
148
|
+
const pattern = String(invocation.input.pattern ?? '').trim();
|
|
149
|
+
const path = String(invocation.input.path ?? '').trim();
|
|
150
|
+
if (!pattern)
|
|
151
|
+
return null;
|
|
152
|
+
const key = `${pattern}::${path}`;
|
|
153
|
+
const cached = this.recentGlobs.get(key);
|
|
154
|
+
if (cached) {
|
|
155
|
+
return {
|
|
156
|
+
output: `That exact Glob was already run this session and returned:\n${cached.preview}\n\n` +
|
|
157
|
+
'Do not re-run the same pattern. Use Grep to search within those files, or Read them directly.',
|
|
158
|
+
};
|
|
159
|
+
}
|
|
160
|
+
return null;
|
|
161
|
+
}
|
|
122
162
|
afterExecute(invocation, result) {
|
|
123
163
|
// Track per-tool error counts across the session
|
|
124
164
|
if (result.isError) {
|
|
@@ -135,10 +175,42 @@ export class SessionToolGuard {
|
|
|
135
175
|
case 'WebFetch':
|
|
136
176
|
this.afterWebFetch(invocation, result);
|
|
137
177
|
break;
|
|
178
|
+
case 'Grep':
|
|
179
|
+
this.afterGrep(invocation, result);
|
|
180
|
+
break;
|
|
181
|
+
case 'Glob':
|
|
182
|
+
this.afterGlob(invocation, result);
|
|
183
|
+
break;
|
|
138
184
|
default:
|
|
139
185
|
break;
|
|
140
186
|
}
|
|
141
187
|
}
|
|
188
|
+
afterGrep(invocation, result) {
|
|
189
|
+
const pattern = String(invocation.input.pattern ?? '').trim();
|
|
190
|
+
const path = String(invocation.input.path ?? '').trim();
|
|
191
|
+
const glob = String(invocation.input.glob ?? '').trim();
|
|
192
|
+
const type = String(invocation.input.type ?? '').trim();
|
|
193
|
+
if (!pattern)
|
|
194
|
+
return;
|
|
195
|
+
const key = `${pattern}::${path}::${glob}::${type}`;
|
|
196
|
+
const output = String(result.output ?? '');
|
|
197
|
+
const preview = output.length > MAX_PREVIEW_CHARS
|
|
198
|
+
? output.slice(0, MAX_PREVIEW_CHARS) + '…'
|
|
199
|
+
: output;
|
|
200
|
+
this.recentGreps.set(key, { preview, turn: this.turn });
|
|
201
|
+
}
|
|
202
|
+
afterGlob(invocation, result) {
|
|
203
|
+
const pattern = String(invocation.input.pattern ?? '').trim();
|
|
204
|
+
const path = String(invocation.input.path ?? '').trim();
|
|
205
|
+
if (!pattern)
|
|
206
|
+
return;
|
|
207
|
+
const key = `${pattern}::${path}`;
|
|
208
|
+
const output = String(result.output ?? '');
|
|
209
|
+
const preview = output.length > MAX_PREVIEW_CHARS
|
|
210
|
+
? output.slice(0, MAX_PREVIEW_CHARS) + '…'
|
|
211
|
+
: output;
|
|
212
|
+
this.recentGlobs.set(key, { preview, turn: this.turn });
|
|
213
|
+
}
|
|
142
214
|
cancelInvocation(invocationId) {
|
|
143
215
|
this.pendingSearches.delete(invocationId);
|
|
144
216
|
this.pendingReads.delete(invocationId);
|
package/dist/router/index.js
CHANGED
|
@@ -33,22 +33,24 @@ function loadLearnedWeights() {
|
|
|
33
33
|
return null;
|
|
34
34
|
}
|
|
35
35
|
// ─── Tier Model Configs ───
|
|
36
|
+
// Agent-first defaults. Claude Sonnet 4.6 is the industry standard for multi-step
|
|
37
|
+
// tool-use agent work; cheap models keep derailing on simple agent loops.
|
|
36
38
|
const AUTO_TIERS = {
|
|
37
39
|
SIMPLE: {
|
|
38
40
|
primary: 'google/gemini-2.5-flash',
|
|
39
|
-
fallback: ['
|
|
41
|
+
fallback: ['moonshot/kimi-k2.5', 'deepseek/deepseek-chat'],
|
|
40
42
|
},
|
|
41
43
|
MEDIUM: {
|
|
42
|
-
primary: '
|
|
43
|
-
fallback: ['
|
|
44
|
+
primary: 'anthropic/claude-sonnet-4.6',
|
|
45
|
+
fallback: ['openai/gpt-5.4', 'google/gemini-3.1-pro'],
|
|
44
46
|
},
|
|
45
47
|
COMPLEX: {
|
|
46
|
-
primary: '
|
|
47
|
-
fallback: ['
|
|
48
|
+
primary: 'anthropic/claude-sonnet-4.6',
|
|
49
|
+
fallback: ['openai/gpt-5.4', 'anthropic/claude-opus-4.6'],
|
|
48
50
|
},
|
|
49
51
|
REASONING: {
|
|
50
|
-
primary: '
|
|
51
|
-
fallback: ['
|
|
52
|
+
primary: 'anthropic/claude-opus-4.6',
|
|
53
|
+
fallback: ['openai/o3', 'xai/grok-4-1-fast-reasoning'],
|
|
52
54
|
},
|
|
53
55
|
};
|
|
54
56
|
const ECO_TIERS = {
|
|
@@ -266,6 +268,13 @@ export function routeRequest(prompt, profile = 'auto') {
|
|
|
266
268
|
savings: 1.0,
|
|
267
269
|
};
|
|
268
270
|
}
|
|
271
|
+
// Auto profile bypasses learned routing. The learned Elo scores grow with
|
|
272
|
+
// usage volume rather than pure quality, which biased the router toward
|
|
273
|
+
// cheap/weak models on agentic work. Classic AUTO_TIERS defaults are
|
|
274
|
+
// agent-tuned (Claude Sonnet as backbone) and more predictable for users.
|
|
275
|
+
if (profile === 'auto') {
|
|
276
|
+
return classicRouteRequest(prompt, profile);
|
|
277
|
+
}
|
|
269
278
|
// ── Learned routing (if weights available) ──
|
|
270
279
|
const weights = loadLearnedWeights();
|
|
271
280
|
if (weights) {
|
package/package.json
CHANGED