@blockrun/franklin 3.7.4 → 3.7.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -31,6 +31,29 @@ import { createSessionId, appendToSession, updateSessionMeta, pruneOldSessions,
31
31
  function replaceHistory(target, replacement) {
32
32
  target.splice(0, target.length, ...replacement);
33
33
  }
34
+ // ─── Pushback detection ───────────────────────────────────────────────────
35
+ // Cheap models plough forward when users correct them. This detects common
36
+ // correction patterns so the agent can explicitly reset its approach.
37
+ const PUSHBACK_PATTERNS = [
38
+ /^(but|however|actually|wait|no+\b|hmm)\b/i,
39
+ /\b(that'?s?\s+(wrong|incorrect|not\s+right)|you'?re?\s+wrong)\b/i,
40
+ /\b(i\s+(said|told\s+you)|not\s+(what|that))\b/i,
41
+ /\b(we\s+are\s+using|the\s+correct|the\s+actual)\b/i,
42
+ /^(stop|no,|wrong|incorrect|try\s+again)\b/i,
43
+ /^(不对|不是|错了|再试|但是|其实|等等|停|重来)/,
44
+ ];
45
+ function detectPushback(input, history) {
46
+ // Only count as pushback if there's a prior assistant turn to push back against.
47
+ if (history.length === 0)
48
+ return false;
49
+ const hasPriorAssistant = history.some((m) => m.role === 'assistant');
50
+ if (!hasPriorAssistant)
51
+ return false;
52
+ const trimmed = input.trim();
53
+ if (trimmed.length === 0 || trimmed.length > 500)
54
+ return false;
55
+ return PUSHBACK_PATTERNS.some((re) => re.test(trimmed));
56
+ }
34
57
  /**
35
58
  * Sanitize history: fix orphaned tool results AND inject missing results.
36
59
  *
@@ -310,11 +333,19 @@ export async function interactiveSession(config, getUserInput, onEvent, onAbortR
310
333
  input = cmdResult.rewritten;
311
334
  }
312
335
  }
336
+ // ── Pushback detection ──
337
+ // When the user corrects us ("no", "but", "actually", "wrong"), we must throw
338
+ // away the previous plan and reconsider — not continue the failing approach.
339
+ // Without this signal, cheap models tend to plough forward with the same bad idea.
340
+ const pushbackSignal = detectPushback(input, history);
341
+ const effectiveInput = pushbackSignal
342
+ ? `${input}\n\n[SYSTEM NOTE] The user is correcting you. Your previous response was wrong or off-target. Do NOT continue the previous approach. Re-read the conversation, identify what specifically the user is correcting, and change your strategy. If the user pointed out a fact (e.g. "we are using X"), treat that fact as ground truth and rebuild your answer around it.`
343
+ : input;
313
344
  lastUserInput = input;
314
- history.push({ role: 'user', content: input });
345
+ history.push({ role: 'user', content: effectiveInput });
315
346
  turnCount++;
316
347
  toolGuard.startTurn();
317
- persistSessionMessage({ role: 'user', content: input });
348
+ persistSessionMessage({ role: 'user', content: effectiveInput });
318
349
  // ── Model recovery: try original model at the start of each new turn ──
319
350
  // If we fell back to a free model last turn due to a transient error, try original again.
320
351
  // But DON'T reset if the original model had a payment failure — it will just fail again.
@@ -17,25 +17,29 @@ const MULTI_STEP_PATTERN = /first.*then|step\s+\d|\d+\.\s|and\s+then|after\s+tha
17
17
  * the overhead of an extra planning call.
18
18
  */
19
19
  export function shouldPlan(tier, profile, userText, ultrathink, planDisabled) {
20
- // Gate 1: only COMPLEX or REASONING tiers benefit from planning
21
- if (tier !== 'COMPLEX' && tier !== 'REASONING')
22
- return false;
23
- // Gate 2: only auto or premium profiles (eco/free already cost-optimized)
24
- if (profile !== 'auto' && profile !== 'premium')
25
- return false;
26
- // Gate 3: skip short queries — planning overhead not worth it
27
- if (userText.length < 80)
20
+ // User disabled planning for this session
21
+ if (planDisabled)
28
22
  return false;
29
- // Gate 4: ultrathink already provides deep reasoning
23
+ // Ultrathink already provides deep reasoning
30
24
  if (ultrathink)
31
25
  return false;
32
- // Gate 5: user disabled planning for this session
33
- if (planDisabled)
26
+ // Only auto or premium profiles (eco/free are cost-constrained)
27
+ if (profile !== 'auto' && profile !== 'premium')
34
28
  return false;
35
- // Gate 6: must have agentic or multi-step signals
36
- const hasAgenticKeyword = AGENTIC_KEYWORDS.test(userText);
37
- const hasMultiStep = MULTI_STEP_PATTERN.test(userText);
38
- return hasAgenticKeyword || hasMultiStep;
29
+ // Explicit multi-step language always plans, regardless of tier / length
30
+ // ("first ... then ...", "step 1 ... step 2 ...", numbered lists, etc.)
31
+ if (MULTI_STEP_PATTERN.test(userText))
32
+ return true;
33
+ // Planning is high-ROI on COMPLEX / REASONING tiers for agentic verbs,
34
+ // even when the prompt is short ("refactor the wallet module", "migrate to TS")
35
+ if (tier === 'COMPLEX' || tier === 'REASONING') {
36
+ return AGENTIC_KEYWORDS.test(userText) || userText.length >= 60;
37
+ }
38
+ // On MEDIUM tier: plan only if long AND agentic
39
+ if (tier === 'MEDIUM' && userText.length >= 120 && AGENTIC_KEYWORDS.test(userText)) {
40
+ return true;
41
+ }
42
+ return false;
39
43
  }
40
44
  // ─── Planning Prompt ─────────────────────────────────────────────────────
41
45
  /**
@@ -14,9 +14,18 @@ export declare class SessionToolGuard {
14
14
  private recentFetches;
15
15
  private pendingFetches;
16
16
  private toolErrorCounts;
17
+ private recentGreps;
18
+ private recentGlobs;
19
+ private recentBash;
17
20
  startTurn(): void;
18
21
  beforeExecute(invocation: CapabilityInvocation, scope: ExecutionScope): Promise<CapabilityResult | null>;
22
+ private beforeBash;
23
+ private beforeGrep;
24
+ private beforeGlob;
19
25
  afterExecute(invocation: CapabilityInvocation, result: CapabilityResult): void;
26
+ private afterBash;
27
+ private afterGrep;
28
+ private afterGlob;
20
29
  cancelInvocation(invocationId: string): void;
21
30
  private beforeWebSearch;
22
31
  private beforeRead;
@@ -90,6 +90,11 @@ export class SessionToolGuard {
90
90
  recentFetches = new Map();
91
91
  pendingFetches = new Map();
92
92
  toolErrorCounts = new Map();
93
+ // Session-level dedup for code-search tools — agents love grep'ing the same pattern
94
+ // five times in a row when they're confused. Tell them once that it already failed.
95
+ recentGreps = new Map();
96
+ recentGlobs = new Map();
97
+ recentBash = new Map();
93
98
  startTurn() {
94
99
  this.turn++;
95
100
  this.webSearchesThisTurn = 0;
@@ -115,10 +120,69 @@ export class SessionToolGuard {
115
120
  return this.beforeRead(invocation, scope);
116
121
  case 'WebFetch':
117
122
  return this.beforeWebFetch(invocation);
123
+ case 'Grep':
124
+ return this.beforeGrep(invocation);
125
+ case 'Glob':
126
+ return this.beforeGlob(invocation);
127
+ case 'Bash':
128
+ return this.beforeBash(invocation);
118
129
  default:
119
130
  return null;
120
131
  }
121
132
  }
133
+ beforeBash(invocation) {
134
+ const cmd = String(invocation.input.command ?? '').trim();
135
+ if (!cmd)
136
+ return null;
137
+ // Only dedup deterministic read-only commands. Skip anything writing/network/long-running.
138
+ const writeKeywords = /\b(rm|mv|cp|mkdir|touch|chmod|chown|write|install|build|publish|push|pull|curl|wget|fetch|npm|pnpm|yarn|pip|cargo|go\s+(build|run|test)|docker|kubectl|tar|zip|unzip|tee|>\s|>>\s)\b/;
139
+ if (writeKeywords.test(cmd))
140
+ return null;
141
+ const key = cmd;
142
+ const cached = this.recentBash.get(key);
143
+ if (cached) {
144
+ const lead = cached.isError
145
+ ? 'That exact Bash command was already run this session and FAILED:'
146
+ : 'That exact Bash command was already run this session and returned:';
147
+ return {
148
+ output: `${lead}\n${cached.preview}\n\n` +
149
+ 'Do not re-run the same command. If the output was insufficient, run a different command or use a dedicated tool (Read for files, Grep/Glob for searching).',
150
+ };
151
+ }
152
+ return null;
153
+ }
154
+ beforeGrep(invocation) {
155
+ const pattern = String(invocation.input.pattern ?? '').trim();
156
+ const path = String(invocation.input.path ?? '').trim();
157
+ const glob = String(invocation.input.glob ?? '').trim();
158
+ const type = String(invocation.input.type ?? '').trim();
159
+ if (!pattern)
160
+ return null;
161
+ const key = `${pattern}::${path}::${glob}::${type}`;
162
+ const cached = this.recentGreps.get(key);
163
+ if (cached) {
164
+ return {
165
+ output: `That exact Grep was already run this session and returned:\n${cached.preview}\n\n` +
166
+ 'Do not re-run the same pattern. If you need different information, change the pattern, path, or try a different tool (Glob to list files, Read to see full content).',
167
+ };
168
+ }
169
+ return null;
170
+ }
171
+ beforeGlob(invocation) {
172
+ const pattern = String(invocation.input.pattern ?? '').trim();
173
+ const path = String(invocation.input.path ?? '').trim();
174
+ if (!pattern)
175
+ return null;
176
+ const key = `${pattern}::${path}`;
177
+ const cached = this.recentGlobs.get(key);
178
+ if (cached) {
179
+ return {
180
+ output: `That exact Glob was already run this session and returned:\n${cached.preview}\n\n` +
181
+ 'Do not re-run the same pattern. Use Grep to search within those files, or Read them directly.',
182
+ };
183
+ }
184
+ return null;
185
+ }
122
186
  afterExecute(invocation, result) {
123
187
  // Track per-tool error counts across the session
124
188
  if (result.isError) {
@@ -135,10 +199,58 @@ export class SessionToolGuard {
135
199
  case 'WebFetch':
136
200
  this.afterWebFetch(invocation, result);
137
201
  break;
202
+ case 'Grep':
203
+ this.afterGrep(invocation, result);
204
+ break;
205
+ case 'Glob':
206
+ this.afterGlob(invocation, result);
207
+ break;
208
+ case 'Bash':
209
+ this.afterBash(invocation, result);
210
+ break;
138
211
  default:
139
212
  break;
140
213
  }
141
214
  }
215
+ afterBash(invocation, result) {
216
+ const cmd = String(invocation.input.command ?? '').trim();
217
+ if (!cmd)
218
+ return;
219
+ const writeKeywords = /\b(rm|mv|cp|mkdir|touch|chmod|chown|write|install|build|publish|push|pull|curl|wget|fetch|npm|pnpm|yarn|pip|cargo|go\s+(build|run|test)|docker|kubectl|tar|zip|unzip|tee|>\s|>>\s)\b/;
220
+ if (writeKeywords.test(cmd))
221
+ return;
222
+ const output = String(result.output ?? '');
223
+ const preview = output.length > MAX_PREVIEW_CHARS
224
+ ? output.slice(0, MAX_PREVIEW_CHARS) + '…'
225
+ : output;
226
+ this.recentBash.set(cmd, { preview, turn: this.turn, isError: !!result.isError });
227
+ }
228
+ afterGrep(invocation, result) {
229
+ const pattern = String(invocation.input.pattern ?? '').trim();
230
+ const path = String(invocation.input.path ?? '').trim();
231
+ const glob = String(invocation.input.glob ?? '').trim();
232
+ const type = String(invocation.input.type ?? '').trim();
233
+ if (!pattern)
234
+ return;
235
+ const key = `${pattern}::${path}::${glob}::${type}`;
236
+ const output = String(result.output ?? '');
237
+ const preview = output.length > MAX_PREVIEW_CHARS
238
+ ? output.slice(0, MAX_PREVIEW_CHARS) + '…'
239
+ : output;
240
+ this.recentGreps.set(key, { preview, turn: this.turn });
241
+ }
242
+ afterGlob(invocation, result) {
243
+ const pattern = String(invocation.input.pattern ?? '').trim();
244
+ const path = String(invocation.input.path ?? '').trim();
245
+ if (!pattern)
246
+ return;
247
+ const key = `${pattern}::${path}`;
248
+ const output = String(result.output ?? '');
249
+ const preview = output.length > MAX_PREVIEW_CHARS
250
+ ? output.slice(0, MAX_PREVIEW_CHARS) + '…'
251
+ : output;
252
+ this.recentGlobs.set(key, { preview, turn: this.turn });
253
+ }
142
254
  cancelInvocation(invocationId) {
143
255
  this.pendingSearches.delete(invocationId);
144
256
  this.pendingReads.delete(invocationId);
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@blockrun/franklin",
3
- "version": "3.7.4",
3
+ "version": "3.7.6",
4
4
  "description": "Franklin — The AI agent with a wallet. Spends USDC autonomously to get real work done. Pay per action, no subscriptions.",
5
5
  "type": "module",
6
6
  "exports": {