@blockrun/franklin 3.8.27 → 3.8.28

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -39,77 +39,12 @@ function replaceHistory(target, replacement) {
39
39
  target.splice(0, target.length, ...replacement);
40
40
  }
41
41
  // ─── Pushback detection ───────────────────────────────────────────────────
42
- // Cheap models plough forward when users correct them. This detects common
43
- // correction patterns so the agent can explicitly reset its approach.
44
- //
45
- // Precision-biased: we'd rather miss a real pushback than falsely trigger on
46
- // casual disagreement ("But how do I deploy?"). False positives pollute the
47
- // conversation and make the agent abandon working approaches unnecessarily.
48
- // STRONG patterns: high-precision correction language. Fires even on short input.
49
- const PUSHBACK_STRONG = [
50
- /\b(that'?s?\s+(wrong|incorrect|not\s+right)|you'?re?\s+wrong)\b/i,
51
- /\b(i\s+(said|told\s+you)|not\s+what\s+i)\b/i,
52
- /^(stop|wrong|incorrect|try\s+again)\b/i,
53
- /^(不对|不是|错了|再试|重来)/,
54
- ];
55
- // WEAK patterns: common correction starters that also appear in casual speech.
56
- // Require a corroborating signal (see detectPushback) to count as pushback.
57
- const PUSHBACK_WEAK = [
58
- /^(but|however|actually|wait|no+\b|hmm)\b/i,
59
- /\b(we\s+are\s+using|the\s+correct|the\s+actual)\b/i,
60
- /^(但是|其实|等等|停)/,
61
- ];
62
- /**
63
- * True if the last assistant turn made a concrete claim worth pushing back
64
- * against: executed a tool, wrote code, or produced a non-trivial answer.
65
- * Casual assistant chatter doesn't warrant treating a "but" as a correction.
66
- */
67
- function lastAssistantHasClaim(history) {
68
- for (let i = history.length - 1; i >= 0; i--) {
69
- const msg = history[i];
70
- if (msg.role !== 'assistant')
71
- continue;
72
- if (Array.isArray(msg.content)) {
73
- for (const part of msg.content) {
74
- const p = part;
75
- if (p.type === 'tool_use')
76
- return true;
77
- if (p.type === 'text' && typeof p.text === 'string' && p.text.trim().length >= 40) {
78
- return true;
79
- }
80
- }
81
- return false;
82
- }
83
- if (typeof msg.content === 'string' && msg.content.trim().length >= 40)
84
- return true;
85
- return false;
86
- }
87
- return false;
88
- }
89
- function detectPushback(input, history) {
90
- // Only count as pushback if there's a prior assistant turn to push back against.
91
- if (history.length === 0)
92
- return false;
93
- if (!lastAssistantHasClaim(history))
94
- return false;
95
- const trimmed = input.trim();
96
- if (trimmed.length === 0 || trimmed.length > 500)
97
- return false;
98
- // Strong patterns: direct correction language — fire immediately.
99
- if (PUSHBACK_STRONG.some((re) => re.test(trimmed)))
100
- return true;
101
- // Weak patterns: only count if the message is short (< 120 chars) AND doesn't
102
- // also contain a fresh request. A weak starter followed by "can you also X"
103
- // or "please do Y" is scope addition, not correction.
104
- if (PUSHBACK_WEAK.some((re) => re.test(trimmed))) {
105
- if (trimmed.length > 120)
106
- return false;
107
- if (/\b(can you|could you|please|also|add|include)\b/i.test(trimmed))
108
- return false;
109
- return true;
110
- }
111
- return false;
112
- }
42
+ // Formerly a pair of regex lists (PUSHBACK_STRONG / PUSHBACK_WEAK) plus a
43
+ // claim-on-prior-turn check ~70 lines of keyword heuristics. Replaced by
44
+ // `turnAnalysis.isPushback` from `turn-analyzer.ts` (v3.8.27): the free
45
+ // classifier reads the user's actual phrasing AND the prior assistant
46
+ // reply and decides whether this turn is a correction. Zero keyword
47
+ // allowlist, works across languages and phrasings the regex never covered.
113
48
  /**
114
49
  * Sanitize history: fix orphaned tool results AND inject missing results.
115
50
  *
@@ -456,20 +391,14 @@ export async function interactiveSession(config, getUserInput, onEvent, onAbortR
456
391
  input = cmdResult.rewritten;
457
392
  }
458
393
  }
459
- // ── Pushback detection ──
460
- // When the user corrects us ("no", "but", "actually", "wrong"), we must throw
461
- // away the previous plan and reconsider — not continue the failing approach.
462
- // Without this signal, cheap models tend to plough forward with the same bad idea.
463
- const pushbackSignal = detectPushback(input, history);
464
- const effectiveInput = pushbackSignal
465
- ? `${input}\n\n[SYSTEM NOTE] The user is correcting you. Your previous response was wrong or off-target. Do NOT continue the previous approach. Re-read the conversation, identify what specifically the user is correcting, and change your strategy. If the user pointed out a fact (e.g. "we are using X"), treat that fact as ground truth and rebuild your answer around it.`
466
- : input;
467
394
  lastUserInput = input;
468
- history.push({ role: 'user', content: effectiveInput });
395
+ // Push the user's clean message; any harness-injected annotations
396
+ // (pushback SYSTEM NOTE, prefetch context block) are applied AFTER
397
+ // the turn analyzer runs so they get driven by model-decided flags
398
+ // instead of keyword regex.
399
+ history.push({ role: 'user', content: input });
469
400
  turnCount++;
470
401
  toolGuard.startTurn();
471
- // Persist the user's original message, not the injected SYSTEM NOTE scaffold.
472
- // Resumed sessions should show what the user typed, not our internal prompt engineering.
473
402
  persistSessionMessage({ role: 'user', content: input });
474
403
  // ── Model recovery: try original model at the start of each new turn ──
475
404
  // If we fell back to a free model last turn due to a transient error, try original again.
@@ -595,6 +524,22 @@ export async function interactiveSession(config, getUserInput, onEvent, onAbortR
595
524
  catch {
596
525
  // Analyzer is best-effort; ignore.
597
526
  }
527
+ // ── Pushback annotation ─────────────────────────────────────────
528
+ // If the analyzer judged this turn as a user correction of the
529
+ // previous answer, inject a SYSTEM NOTE into the user message so the
530
+ // model resets its approach rather than doubling down. Replaces the
531
+ // former PUSHBACK_STRONG / PUSHBACK_WEAK regex lists — model-decided,
532
+ // no keyword allowlist to rot.
533
+ if (turnAnalysis?.isPushback) {
534
+ const lastIdx = history.length - 1;
535
+ const last = history[lastIdx];
536
+ if (last && last.role === 'user' && typeof last.content === 'string') {
537
+ history[lastIdx] = {
538
+ role: 'user',
539
+ content: `${last.content}\n\n[SYSTEM NOTE] The user is correcting you. Your previous response was wrong or off-target. Do NOT continue the previous approach. Re-read the conversation, identify what specifically the user is correcting, and change your strategy. If the user pointed out a fact (e.g. "we are using X"), treat that fact as ground truth and rebuild your answer around it.`,
540
+ };
541
+ }
542
+ }
598
543
  // ── Proactive prefetch ────────────────────────────────────────────
599
544
  // Uses the intent the analyzer already extracted. Skips the separate
600
545
  // prefetch-classifier call that previously ran here.
@@ -773,8 +718,12 @@ export async function interactiveSession(config, getUserInput, onEvent, onAbortR
773
718
  // Update token estimation model for more accurate byte-per-token ratio
774
719
  setEstimationModel(resolvedModel);
775
720
  // ── Plan-then-execute: detect and activate ──
721
+ // `needsPlanning` flag comes from turn-analyzer (one-word LLM decision
722
+ // on the user's original prompt). shouldPlan still guards env / profile /
723
+ // ultrathink / per-session overrides — those are operator policy, not
724
+ // model decisions.
776
725
  if (loopCount === 1 && !planActive && routingProfile &&
777
- shouldPlan(routingTier, routingProfile, lastUserInput, !!config.ultrathink, !!config.planDisabled)) {
726
+ shouldPlan(routingProfile, !!config.ultrathink, !!config.planDisabled, turnAnalysis?.needsPlanning ?? false)) {
778
727
  planActive = true;
779
728
  planPlannerModel = resolvedModel;
780
729
  planExecutorModel = getExecutorModel(routingProfile);
@@ -7,13 +7,19 @@
7
7
  * Flow: detect complexity → plan with strong model → execute with cheap model
8
8
  * → escalate back to strong model if executor gets stuck
9
9
  */
10
- import type { Tier, RoutingProfile } from '../router/index.js';
10
+ import type { RoutingProfile } from '../router/index.js';
11
11
  /**
12
12
  * Should this task use plan-then-execute?
13
- * Returns true only for complex, multi-step tasks where the savings justify
14
- * the overhead of an extra planning call.
13
+ *
14
+ * Replaces the former AGENTIC_KEYWORDS / MULTI_STEP_PATTERN regex heuristics
15
+ * with a single read of `turnAnalysis.needsPlanning`. The free model judged
16
+ * whether the task is substantive-multi-step from the user's actual phrasing,
17
+ * no keyword allowlist to maintain.
18
+ *
19
+ * Environment gates (opt-in / opt-out / profile / ultrathink / session
20
+ * override) remain — those are operator decisions, not model decisions.
15
21
  */
16
- export declare function shouldPlan(tier: Tier | undefined, profile: RoutingProfile | undefined, userText: string, ultrathink: boolean, planDisabled: boolean): boolean;
22
+ export declare function shouldPlan(profile: RoutingProfile | undefined, ultrathink: boolean, planDisabled: boolean, analyzerSaysNeedsPlanning: boolean): boolean;
17
23
  /**
18
24
  * Returns the planning system prompt section.
19
25
  * Injected alongside the normal system prompt during the planning call.
@@ -7,53 +7,38 @@
7
7
  * Flow: detect complexity → plan with strong model → execute with cheap model
8
8
  * → escalate back to strong model if executor gets stuck
9
9
  */
10
- // ─── Agentic keywords that suggest multi-step work ───────────────────────
11
- const AGENTIC_KEYWORDS = /\b(implement|refactor|build|fix|debug|migrate|deploy|create|add|remove|update|restructure|extract|rewrite|optimize|convert|integrate|setup|configure)\b/i;
12
- const MULTI_STEP_PATTERN = /first.*then|step\s+\d|\d+\.\s|and\s+then|after\s+that|next\s*,|finally\b/i;
13
10
  // ─── Detection ───────────────────────────────────────────────────────────
14
11
  /**
15
12
  * Should this task use plan-then-execute?
16
- * Returns true only for complex, multi-step tasks where the savings justify
17
- * the overhead of an extra planning call.
13
+ *
14
+ * Replaces the former AGENTIC_KEYWORDS / MULTI_STEP_PATTERN regex heuristics
15
+ * with a single read of `turnAnalysis.needsPlanning`. The free model judged
16
+ * whether the task is substantive-multi-step from the user's actual phrasing,
17
+ * no keyword allowlist to maintain.
18
+ *
19
+ * Environment gates (opt-in / opt-out / profile / ultrathink / session
20
+ * override) remain — those are operator decisions, not model decisions.
18
21
  */
19
- export function shouldPlan(tier, profile, userText, ultrathink, planDisabled) {
20
- // Default: plan-then-execute is OFF (v3.8.18). Observed failure: router
21
- // correctly picks Sonnet for a "should I sell CRCL" prompt, but the
22
- // executor swap downgrades actual execution to gemini-2.5-flash, which
23
- // then answers from memory instead of calling TradingMarket / ExaAnswer.
24
- // The cheap-executor pattern was load-bearing for Sonnet 4.0-era models;
25
- // Opus 4.7 / Sonnet 4.6 handle multi-step tool use coherently in a
26
- // single pass, so the two-call path is pure overhead — and it actively
27
- // hurts when the executor is weaker than the planner.
28
- // Opt back in with FRANKLIN_PLAN=1 (for experiments / ablation).
22
+ export function shouldPlan(profile, ultrathink, planDisabled, analyzerSaysNeedsPlanning) {
23
+ // Default: plan-then-execute is OFF (since v3.8.18). The cheap-executor
24
+ // pattern was load-bearing for Sonnet-4.0-era models but Opus 4.7 /
25
+ // Sonnet 4.6 handle multi-step tool use in a single pass. Opt in with
26
+ // FRANKLIN_PLAN=1 for ablation / experiments.
29
27
  if (process.env.FRANKLIN_PLAN !== '1')
30
28
  return false;
31
- // Legacy env opt-out still honored for users who set it previously.
29
+ // Legacy env opt-out still honored for users who set it previously.
32
30
  if (process.env.FRANKLIN_NOPLAN === '1')
33
31
  return false;
34
- // User disabled planning for this session
32
+ // Per-session / per-turn overrides from the agent surface.
35
33
  if (planDisabled)
36
34
  return false;
37
- // Ultrathink already provides deep reasoning
38
35
  if (ultrathink)
39
- return false;
40
- // Only auto or premium profiles (eco/free are cost-constrained)
36
+ return false; // ultrathink already provides deep reasoning
37
+ // Only auto / premium profiles eco / free are cost-constrained.
41
38
  if (profile !== 'auto' && profile !== 'premium')
42
39
  return false;
43
- // Explicit multi-step language always plans, regardless of tier / length
44
- // ("first ... then ...", "step 1 ... step 2 ...", numbered lists, etc.)
45
- if (MULTI_STEP_PATTERN.test(userText))
46
- return true;
47
- // Planning is high-ROI on COMPLEX / REASONING tiers for agentic verbs,
48
- // even when the prompt is short ("refactor the wallet module", "migrate to TS")
49
- if (tier === 'COMPLEX' || tier === 'REASONING') {
50
- return AGENTIC_KEYWORDS.test(userText) || userText.length >= 60;
51
- }
52
- // On MEDIUM tier: plan only if long AND agentic
53
- if (tier === 'MEDIUM' && userText.length >= 120 && AGENTIC_KEYWORDS.test(userText)) {
54
- return true;
55
- }
56
- return false;
40
+ // Final decision comes from the turn analyzer's boolean flag.
41
+ return analyzerSaysNeedsPlanning;
57
42
  }
58
43
  // ─── Planning Prompt ─────────────────────────────────────────────────────
59
44
  /**
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@blockrun/franklin",
3
- "version": "3.8.27",
3
+ "version": "3.8.28",
4
4
  "description": "Franklin — The AI agent with a wallet. Spends USDC autonomously to get real work done. Pay per action, no subscriptions.",
5
5
  "type": "module",
6
6
  "exports": {