npm - @blockrun/franklin - Versions diffs - 3.8.2 → 3.8.3 - Mend

@blockrun/franklin 3.8.2 → 3.8.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (46) hide show

package/README.md +23 -36
package/dist/agent/commands.js +1 -1
package/dist/agent/llm.d.ts +6 -0
package/dist/agent/llm.js +103 -14
package/dist/agent/loop.d.ts +9 -0
package/dist/agent/loop.js +85 -0
package/dist/agent/think-tag-stripper.d.ts +27 -0
package/dist/agent/think-tag-stripper.js +75 -0
package/dist/agent/tokens.js +2 -1
package/dist/agent/types.d.ts +7 -0
package/dist/brain/index.d.ts +1 -1
package/dist/brain/index.js +1 -1
package/dist/brain/store.d.ts +13 -1
package/dist/brain/store.js +74 -5
package/dist/channel/telegram.d.ts +46 -0
package/dist/channel/telegram.js +367 -0
package/dist/commands/migrate.d.ts +5 -3
package/dist/commands/migrate.js +17 -15
package/dist/commands/stats.js +1 -1
package/dist/commands/telegram.d.ts +15 -0
package/dist/commands/telegram.js +95 -0
package/dist/content/library.js +2 -2
package/dist/index.js +9 -0
package/dist/panel/html.js +1 -1
package/dist/router/index.js +5 -5
package/dist/session/storage.d.ts +12 -0
package/dist/session/storage.js +11 -0
package/dist/social/ai.d.ts +3 -2
package/dist/social/ai.js +3 -2
package/dist/stats/insights.d.ts +1 -1
package/dist/stats/tracker.js +1 -1
package/dist/tools/content-execute.d.ts +1 -1
package/dist/tools/content-execute.js +1 -1
package/dist/tools/index.js +11 -3
package/dist/tools/memory.d.ts +16 -0
package/dist/tools/memory.js +86 -0
package/dist/tools/trading-execute.d.ts +2 -2
package/dist/tools/trading-execute.js +2 -2
package/dist/tools/videogen.d.ts +17 -0
package/dist/tools/videogen.js +237 -0
package/dist/trading/trade-log.d.ts +2 -2
package/dist/trading/trade-log.js +2 -2
package/dist/ui/app.js +38 -3
package/dist/ui/markdown.d.ts +16 -0
package/dist/ui/markdown.js +26 -2
package/package.json +5 -2

package/README.md CHANGED Viewed

@@ -16,9 +16,9 @@
 <p>
   <a href="https://npmjs.com/package/@blockrun/franklin"><img src="https://img.shields.io/npm/v/@blockrun/franklin.svg?style=flat-square&color=FFD700&label=npm" alt="npm"></a>
   <a href="https://npmjs.com/package/@blockrun/franklin"><img src="https://img.shields.io/npm/dm/@blockrun/franklin.svg?style=flat-square&color=10B981&label=downloads" alt="downloads"></a>
-  <a href="https://github.com/BlockRunAI/franklin/stargazers"><img src="https://img.shields.io/github/stars/BlockRunAI/franklin?style=flat-square&color=FFD700&label=stars" alt="stars"></a>
+  <a href="https://gitlab.com/blockrunai/franklin"><img src="https://img.shields.io/gitlab/stars/blockrunai/franklin?style=flat-square&color=FFD700&label=stars" alt="stars"></a>
   <a href="LICENSE"><img src="https://img.shields.io/badge/license-Apache_2.0-blue?style=flat-square" alt="license"></a>
-  <a href="https://github.com/BlockRunAI/franklin/actions"><img src="https://img.shields.io/github/actions/workflow/status/BlockRunAI/franklin/ci.yml?style=flat-square&label=ci" alt="ci"></a>
+  <a href="https://gitlab.com/blockrunai/franklin/-/pipelines"><img src="https://img.shields.io/gitlab/pipeline-status/blockrunai%2Ffranklin?branch=main&style=flat-square&label=ci" alt="ci"></a>
   <a href="https://www.typescriptlang.org/"><img src="https://img.shields.io/badge/TypeScript-strict-3178C6?style=flat-square&logo=typescript&logoColor=white" alt="TypeScript"></a>
   <a href="https://nodejs.org/"><img src="https://img.shields.io/badge/Node-%E2%89%A520-339933?style=flat-square&logo=node.js&logoColor=white" alt="Node"></a>
   <a href="https://x402.org"><img src="https://img.shields.io/badge/x402-native-10B981?style=flat-square" alt="x402"></a>
@@ -31,7 +31,7 @@
   <a href="#a-new-category">Category</a> ·
   <a href="#what-franklin-can-execute">What&nbsp;it&nbsp;does</a> ·
   <a href="#smart-router">Smart&nbsp;Router</a> ·
-  <a href="#the-comparison">vs.&nbsp;Claude&nbsp;Code</a> ·
+  <a href="#the-comparison">Comparison</a> ·
   <a href="#how-it-works">Architecture</a> ·
   <a href="#community">Community</a>
 </p>
@@ -42,7 +42,7 @@
 ## The pitch in one paragraph
-Claude Code writes code. Cursor writes code. Franklin writes code **and spends money to get the job done**. It holds a USDC wallet, picks the best model per task from 55+ providers, purchases trading data, generates images, pays for web search — all autonomously. You state an outcome and set a budget. Franklin decides what to call, what to pay for, and when to stop. Every paid action routes through the [x402](https://x402.org) micropayment protocol and settles against your own wallet. No subscriptions. No API keys. No account. The wallet is the identity.
+Most coding agents write code. Franklin writes code **and spends money to get the job done**. It holds a USDC wallet, picks the best model per task from 55+ providers, purchases trading data, generates images, pays for web search — all autonomously. You state an outcome and set a budget. Franklin decides what to call, what to pay for, and when to stop. Every paid action routes through the [x402](https://x402.org) micropayment protocol and settles against your own wallet. No subscriptions. No API keys. No account. The wallet is the identity.
 Built by the [BlockRun](https://blockrun.ai) team. Apache-2.0. TypeScript. Ships as one npm package.
@@ -63,7 +63,7 @@ npm install -g @blockrun/franklin
 # 2. Run (free — uses NVIDIA Nemotron & Qwen3 Coder out of the box)
 franklin
-# 3. (optional) Fund a wallet to unlock Claude, GPT, Gemini, Grok, + paid APIs
+# 3. (optional) Fund a wallet to unlock Sonnet, Opus, GPT, Gemini, Grok, + paid APIs
 franklin setup base        # or: franklin setup solana
 franklin balance           # show address + USDC balance
 ```
@@ -78,7 +78,7 @@ That's it. Zero signup, zero credit card, zero phone verification. Send **$5 of
 |                         | You pay for...                               | Result                               |
 | ----------------------- | -------------------------------------------- | ------------------------------------ |
-| Subscription (ChatGPT Plus, Claude Max, Cursor Pro) | Access. Paid whether you use it or not. | $20–200/month, rate-limited.         |
+| AI subscription       | Access. Paid whether you use it or not. | $20–200/month, rate-limited.         |
 | Pay-per-call (OpenAI API, etc.) | Every attempt — even failed ones.    | Hidden cost from retries, dead ends. |
 | **Franklin (YOPO)**     | **The outcome.** Each signed micropayment.  | **Provider cost + 5%. No more.**     |
@@ -253,7 +253,7 @@ You don't subscribe to electricity, you pay for what you use. Franklin brings th
 ### 🧠 &nbsp;Multi-model is the future
-No single model is best at everything. Claude writes better code, Gemini handles longer context, DeepSeek costs 20x less for simple tasks. The Smart Router routes every request to the optimal model in <1ms — up to 89% savings vs. always using Opus.
+No single model is best at everything. Sonnet writes better code, Gemini handles longer context, DeepSeek costs 20x less for simple tasks. The Smart Router routes every request to the optimal model in <1ms — up to 89% savings vs. always using Opus.
 </td>
 <td width="33%" valign="top">
@@ -270,19 +270,19 @@ No email. No phone. No KYC. Your Base or Solana address is your account — port
 ## The comparison
-|                                        | Claude Code     | Cursor           | Chatbots         | **Franklin**                    |
-| -------------------------------------- | --------------- | ---------------- | ---------------- | ------------------------------- |
-| Writes code                            | ✅              | ✅               | ⚠️                | ✅                              |
-| **Spends money for you**               | ❌              | ❌               | ❌               | ✅ **USDC wallet, x402**        |
-| **Buys data + APIs + images + search** | ❌              | ❌               | ❌               | ✅ **55+ APIs, one wallet**     |
-| Picks best model per task              | ❌ Anthropic only | ❌ plan-tied    | ❌               | ✅ **Smart Router, 55+ models** |
-| Pricing model                          | Subscription    | Subscription     | Subscription     | **YOPO** — per outcome, USDC    |
-| Monthly fee                            | $20–$200        | $20–$40          | $20+             | **$0**                          |
-| Rate-limited                           | Yes             | Yes              | Yes              | No — limited only by wallet     |
-| Works when provider goes down          | ❌              | ❌               | ❌               | ✅ **routes to another**        |
-| Identity                               | Anthropic account | Cursor account | Account / email | ✅ **wallet, no signup**        |
-| Start free, no KYC                     | ❌              | ❌               | ❌               | ✅                              |
-| Source                                 | Closed          | Closed           | Closed           | **Apache 2.0, local-first**     |
+|                                        | Coding agents    | Editor IDEs      | Chatbots         | **Franklin**                    |
+| -------------------------------------- | ---------------- | ---------------- | ---------------- | ------------------------------- |
+| Writes code                            | ✅               | ✅               | ⚠️                | ✅                              |
+| **Spends money for you**               | ❌               | ❌               | ❌               | ✅ **USDC wallet, x402**        |
+| **Buys data + APIs + images + search** | ❌               | ❌               | ❌               | ✅ **55+ APIs, one wallet**     |
+| Picks best model per task              | ❌ single-vendor | ❌ plan-tied    | ❌               | ✅ **Smart Router, 55+ models** |
+| Pricing model                          | Subscription     | Subscription     | Subscription     | **YOPO** — per outcome, USDC    |
+| Monthly fee                            | $20–$200         | $20–$40          | $20+             | **$0**                          |
+| Rate-limited                           | Yes              | Yes              | Yes              | No — limited only by wallet     |
+| Works when provider goes down          | ❌               | ❌               | ❌               | ✅ **routes to another**        |
+| Identity                               | Vendor account   | Vendor account   | Account / email  | ✅ **wallet, no signup**        |
+| Start free, no KYC                     | ❌               | ❌               | ❌               | ✅                              |
+| Source                                 | Closed           | Closed           | Closed           | **Apache 2.0, local-first**     |
 **Franklin is the economic agent category in one sentence:** software with a wallet that can spend toward a result.
@@ -430,7 +430,7 @@ Start with **zero dollars**. Franklin defaults to free NVIDIA models that need n
 franklin --model nvidia/nemotron-ultra-253b
 ```
-When you fund the wallet, Franklin gets more purchasing power: Claude, GPT, Gemini, Grok, and paid tools like Exa, DALL-E, and CoinGecko Pro.
+When you fund the wallet, Franklin gets more purchasing power: Sonnet, Opus, GPT, Gemini, Grok, and paid tools like Exa, DALL-E, and CoinGecko Pro.
 ---
@@ -464,15 +464,14 @@ The chat-based social tools (`SearchX`, `PostToX`) and the batch CLI (`franklin
 - [Telegram](https://t.me/blockrunAI) — realtime help, bug reports, feature requests
 - [@BlockRunAI](https://x.com/BlockRunAI) — release notes, demos
-- [Issues](https://github.com/BlockRunAI/franklin/issues) — bugs and feature requests
-- [Discussions](https://github.com/BlockRunAI/franklin/discussions) — ideas, Q&A, show & tell
+- [Issues](https://gitlab.com/blockrunai/franklin/-/issues) — bugs and feature requests
 ---
 ## Development
 ```bash
-git clone https://github.com/BlockRunAI/franklin.git
+git clone https://gitlab.com/blockrunai/franklin.git
 cd franklin
 npm install
 npm run build
@@ -485,18 +484,6 @@ node dist/index.js --help
 ---
-## Star history
-<a href="https://star-history.com/#BlockRunAI/franklin&Date">
-  <picture>
-    <source media="(prefers-color-scheme: dark)" srcset="https://api.star-history.com/svg?repos=BlockRunAI/franklin&type=Date&theme=dark">
-    <source media="(prefers-color-scheme: light)" srcset="https://api.star-history.com/svg?repos=BlockRunAI/franklin&type=Date">
-    <img alt="Star history" src="https://api.star-history.com/svg?repos=BlockRunAI/franklin&type=Date">
-  </picture>
-</a>
----
 ## License
 Apache-2.0. See [LICENSE](LICENSE).

package/dist/agent/commands.js CHANGED Viewed

@@ -383,7 +383,7 @@ const DIRECT_COMMANDS = {
         let text = `**Session Cost**\n` +
             `  Requests: ${stats.totalRequests}\n` +
             `  Cost:     $${stats.totalCostUsd.toFixed(4)} USDC\n` +
-            `  Saved:    $${saved.toFixed(2)} vs Claude Opus\n` +
+            `  Saved:    $${saved.toFixed(2)} vs Opus tier\n` +
             `  Tokens:   ${formatTokens(stats.totalInputTokens)} in / ${formatTokens(stats.totalOutputTokens)} out\n`;
         if (breakdown.length > 0) {
             text += `\n  **By model:**\n`;

package/dist/agent/llm.d.ts CHANGED Viewed

@@ -52,6 +52,12 @@ export interface LLMClientOptions {
  * Exported so tests can pin this decision without a live API.
  */
 export declare function modelHasExtendedThinking(model: string): boolean;
+/**
+ * Classify an unparseable tool-call JSON failure so the user and the model
+ * get an actionable message instead of a single generic line. Exported for
+ * direct unit testing — the happy path hits it only on stream error.
+ */
+export declare function classifyToolCallFailure(toolName: string, rawInput: string, signal: AbortSignal | undefined, model: string): string;
 export declare class ModelClient {
     private apiUrl;
     private chain;

package/dist/agent/llm.js CHANGED Viewed

@@ -5,6 +5,7 @@
  */
 import { getOrCreateWallet, getOrCreateSolanaWallet, createPaymentPayload, createSolanaPaymentPayload, parsePaymentRequired, extractPaymentDetails, solanaKeyToBytes, SOLANA_NETWORK, } from '@blockrun/llm';
 import { USER_AGENT } from '../config.js';
+import { ThinkTagStripper } from './think-tag-stripper.js';
 // ─── Anthropic Prompt Caching ─────────────────────────────────────────────
 /**
  * Apply Anthropic prompt caching using the `system_and_3` strategy.
@@ -42,6 +43,33 @@ export function modelHasExtendedThinking(model) {
         m.includes('sonnet-4') ||
         m.includes('sonnet-3.7'));
 }
+/**
+ * Classify an unparseable tool-call JSON failure so the user and the model
+ * get an actionable message instead of a single generic line. Exported for
+ * direct unit testing — the happy path hits it only on stream error.
+ */
+export function classifyToolCallFailure(toolName, rawInput, signal, model) {
+    if (signal?.aborted) {
+        return `[Tool call to ${toolName} was canceled before the input finished streaming. ` +
+            `Previous response kept. Resubmit the last message to retry.]`;
+    }
+    const charsReceived = rawInput.length;
+    // If we have almost nothing, the stream stopped early (timeout / model cut off).
+    // If we have a lot but it's still invalid, the model produced malformed JSON.
+    if (charsReceived < 8) {
+        return `[Tool call to ${toolName} was interrupted mid-stream (only ${charsReceived} chars received) — ` +
+            `likely a model timeout or rate limit on ${model}. Try \`/model <other>\` or resubmit.]`;
+    }
+    const looksTruncated = !rawInput.trimEnd().endsWith('}');
+    if (looksTruncated) {
+        return `[Model ${model} cut off mid tool call (${charsReceived} chars received, JSON not closed). ` +
+            `Try \`/model <stronger>\` or shorten the prompt.]`;
+    }
+    const preview = rawInput.slice(0, 120).replace(/\s+/g, ' ');
+    return `[Tool call to ${toolName} had malformed JSON input (${charsReceived} chars). ` +
+        `Preview: ${preview}${rawInput.length > 120 ? '…' : ''} — ` +
+        `this is usually a model output bug; try \`/model <other>\` or retry.]`;
+}
 function applyAnthropicPromptCaching(payload, request) {
     const out = { ...payload };
     const cacheMarker = { type: 'ephemeral' };
@@ -267,6 +295,17 @@ export class ModelClient {
         let currentToolId = '';
         let currentToolName = '';
         let currentToolInput = '';
+        // Split inline <think>…</think> emitted by reasoning models (nemotron,
+        // deepseek-r1, qwq, etc.) that use the text field instead of the native
+        // thinking block. Thinking emitted this way is display-only — we don't
+        // store it in history (Anthropic thinking blocks require signatures).
+        // Reset per text block.
+        let textStripper = new ThinkTagStripper();
+        // One-shot observability: log when a weak model starts role-playing tool
+        // calls as literal text tokens. We don't rewrite the stream — the
+        // system-prompt guard in loop.ts is responsible for preventing this.
+        // Debug-only because the user already sees the literal text in the UI.
+        let toolCallRoleplayWarned = false;
         for await (const chunk of this.streamCompletion(request, signal)) {
             switch (chunk.kind) {
                 case 'content_block_start': {
@@ -283,6 +322,7 @@ export class ModelClient {
                     }
                     else if (cblock?.type === 'text') {
                         currentText = '';
+                        textStripper = new ThinkTagStripper();
                     }
                     break;
                 }
@@ -291,10 +331,34 @@ export class ModelClient {
                     if (!delta)
                         break;
                     if (delta.type === 'text_delta') {
-                        const text = delta.text || '';
-                        currentText += text;
-                        if (text)
-                            onStreamDelta?.({ type: 'text', text });
+                        const raw = delta.text || '';
+                        if (!toolCallRoleplayWarned) {
+                            // Only scan the last ~15 chars of already-emitted text plus the
+                            // new delta — enough to catch a token straddling the chunk
+                            // boundary (`[TOOLCALL]`=10, `<tool_calls>`=12) without the
+                            // O(N²) blowup of re-scanning the whole accumulated text on
+                            // every delta.
+                            const window = currentText.slice(-15) + raw;
+                            if (/\[TOOLCALL\]|<tool_calls?>/i.test(window)) {
+                                toolCallRoleplayWarned = true;
+                                if (this.debug) {
+                                    console.error(`[franklin] Model ${request.model} emitted a tool-call ` +
+                                        'roleplay token ([TOOLCALL] / <tool_call>) in its text. ' +
+                                        'This is a model hallucination; real tool calls arrive ' +
+                                        'as tool_use blocks, not text.');
+                                }
+                            }
+                        }
+                        for (const seg of textStripper.push(raw)) {
+                            if (seg.type === 'text') {
+                                currentText += seg.text;
+                                if (seg.text)
+                                    onStreamDelta?.({ type: 'text', text: seg.text });
+                            }
+                            else if (seg.text) {
+                                onStreamDelta?.({ type: 'thinking', text: seg.text });
+                            }
+                        }
                     }
                     else if (delta.type === 'thinking_delta') {
                         const text = delta.thinking || '';
@@ -329,11 +393,13 @@ export class ModelClient {
                             }
                         }
                         if (inputParseError) {
-                            // Don't invoke the tool — add a text block explaining the error
-                            // and skip the tool_use entirely. The model will see the error and retry.
+                            // Don't invoke the tool — add a classified text block so the
+                            // user (and the model) can see the specific cause. Prior streamed
+                            // text is already in `collected` from earlier content_block_stop
+                            // events, so partial work survives.
                             collected.push({
                                 type: 'text',
-                                text: `[Tool call to ${currentToolName} failed: incomplete JSON input from stream. The request may have been interrupted.]`,
+                                text: classifyToolCallFailure(currentToolName, currentToolInput, signal, request.model),
                             });
                         }
                         else {
@@ -360,12 +426,25 @@ export class ModelClient {
                         currentThinking = '';
                         currentThinkingSignature = '';
                     }
-                    else if (currentText) {
-                        collected.push({
-                            type: 'text',
-                            text: currentText,
-                        });
-                        currentText = '';
+                    else {
+                        // Flush any partial tag held in the stripper
+                        for (const seg of textStripper.flush()) {
+                            if (seg.type === 'text') {
+                                currentText += seg.text;
+                                if (seg.text)
+                                    onStreamDelta?.({ type: 'text', text: seg.text });
+                            }
+                            else if (seg.text) {
+                                onStreamDelta?.({ type: 'thinking', text: seg.text });
+                            }
+                        }
+                        if (currentText) {
+                            collected.push({
+                                type: 'text',
+                                text: currentText,
+                            });
+                            currentText = '';
+                        }
                     }
                     break;
                 }
@@ -399,7 +478,17 @@ export class ModelClient {
                 }
             }
         }
-        // Flush any remaining text
+        // Flush any remaining text (stream ended without content_block_stop)
+        for (const seg of textStripper.flush()) {
+            if (seg.type === 'text') {
+                currentText += seg.text;
+                if (seg.text)
+                    onStreamDelta?.({ type: 'text', text: seg.text });
+            }
+            else if (seg.text) {
+                onStreamDelta?.({ type: 'thinking', text: seg.text });
+            }
+        }
         if (currentText) {
             collected.push({ type: 'text', text: currentText });
         }

package/dist/agent/loop.d.ts CHANGED Viewed

@@ -3,6 +3,15 @@
  * The core reasoning-action cycle: prompt → model → extract capabilities → execute → repeat.
  */
 import type { AgentConfig, Dialogue, StreamEvent } from './types.js';
+/**
+ * Identify models known to hallucinate tool calls (invented names, literal
+ * `[TOOLCALL]` / `<tool_call>` text in answers) — they need the explicit
+ * "Available tools" inventory appended to the system prompt. Strong frontier
+ * models skip the nag so their prompt cache doesn't turn over.
+ *
+ * Exported so tests can pin the classification without a live API.
+ */
+export declare function isWeakModel(model: string): boolean;
 /**
  * Run a multi-turn interactive session.
  * Each user message triggers a full agent loop.

package/dist/agent/loop.js CHANGED Viewed

@@ -18,6 +18,7 @@ import { recordSessionUsage } from '../stats/session-tracker.js';
 import { appendAudit, extractLastUserPrompt } from '../stats/audit.js';
 import { estimateCost, OPUS_PRICING } from '../pricing.js';
 import { maybeMidSessionExtract } from '../learnings/extractor.js';
+import { extractMentions, buildEntityContext, loadEntities } from '../brain/store.js';
 import { routeRequest, parseRoutingProfile } from '../router/index.js';
 import { recordOutcome } from '../router/local-elo.js';
 import { shouldPlan, getPlanningPrompt, getExecutorModel, isExecutorStuck, toolCallSignature } from './planner.js';
@@ -270,6 +271,33 @@ function getBackoffDelay(attempt, maxDelayMs = 32_000) {
     const jitter = base * 0.25 * (Math.random() * 2 - 1); // ±25%
     return Math.max(500, Math.round(base + jitter));
 }
+/**
+ * Identify models known to hallucinate tool calls (invented names, literal
+ * `[TOOLCALL]` / `<tool_call>` text in answers) — they need the explicit
+ * "Available tools" inventory appended to the system prompt. Strong frontier
+ * models skip the nag so their prompt cache doesn't turn over.
+ *
+ * Exported so tests can pin the classification without a live API.
+ */
+export function isWeakModel(model) {
+    const m = model.toLowerCase();
+    // NVIDIA-hosted open models have been observed confabulating tool calls.
+    // `blockrun/free` and `blockrun/eco` resolve to nvidia/nemotron-ultra in
+    // llm.ts, so catching the `nvidia/` prefix also catches those paths.
+    if (m.startsWith('nvidia/'))
+        return true;
+    if (m.includes('nemotron-ultra'))
+        return true;
+    if (m.includes('qwen3-coder'))
+        return true;
+    // GLM-4* is weak; GLM-5+ is capable enough to skip the nag.
+    if (/^zai\/glm-4/.test(m))
+        return true;
+    // DeepSeek's smaller / quantized SKUs tend to role-play tools too.
+    if (/deepseek[-_/](r1|v3|chat)-?(lite|mini|tiny)/.test(m))
+        return true;
+    return false;
+}
 // ─── Interactive Session ───────────────────────────────────────────────────
 /**
  * Run a multi-turn interactive session.
@@ -341,6 +369,7 @@ export async function interactiveSession(config, getUserInput, onEvent, onAbortR
             outputTokens: sessionOutputTokens,
             costUsd: sessionCostUsd,
             savedVsOpusUsd: sessionSavedVsOpus,
+            ...(config.sessionChannel !== undefined ? { channel: config.sessionChannel } : {}),
         });
     };
     const persistSessionMessage = (message) => {
@@ -414,6 +443,44 @@ export async function interactiveSession(config, getUserInput, onEvent, onAbortR
             config.onModelChange?.(baseModel, 'system');
         }
         turnFailedModels = new Set(); // Fresh slate for transient failures this turn
+        // ── Brain auto-recall (computed once per user turn) ──
+        // Scan the new user message plus the previous assistant reply (so
+        // cross-turn references like "that company we discussed" still resolve)
+        // for entity mentions, and build the context string. The inner agent
+        // loop can iterate many times (planner + executor steps); the user's
+        // input doesn't change between those iterations, so caching here saves
+        // loadEntities + loadObservations + loadRelations on every re-entry.
+        let turnBrainContext = '';
+        try {
+            const lastAssistantBeforeThisTurn = [...history.slice(0, -1)]
+                .reverse()
+                .find((m) => m.role === 'assistant');
+            const flatten = (d) => {
+                if (!d)
+                    return '';
+                if (typeof d.content === 'string')
+                    return d.content;
+                if (!Array.isArray(d.content))
+                    return '';
+                return d.content
+                    .filter(p => p.type === 'text')
+                    .map(p => p.text ?? '')
+                    .join(' ');
+            };
+            const scanText = input + '\n' + flatten(lastAssistantBeforeThisTurn);
+            if (scanText.trim().length > 0) {
+                const entities = loadEntities();
+                if (entities.length > 0) {
+                    const mentioned = extractMentions(scanText, entities);
+                    if (mentioned.length > 0) {
+                        turnBrainContext = buildEntityContext(mentioned, entities) ?? '';
+                    }
+                }
+            }
+        }
+        catch {
+            /* brain is optional — never block a turn on recall */
+        }
         const abort = new AbortController();
         onAbortReady?.(() => abort.abort());
         let loopCount = 0;
@@ -527,6 +594,9 @@ export async function interactiveSession(config, getUserInput, onEvent, onAbortR
                 systemParts.push('# Context Window Status\nContext window has crossed the halfway mark (>50%). ' +
                     'Prefer concise responses and batch tool calls when possible.');
             }
+            // ── Brain auto-recall (computed once per user turn above) ──
+            if (turnBrainContext)
+                systemParts.push(turnBrainContext);
             const systemPrompt = systemParts.join('\n\n');
             const modelMaxOut = getMaxOutputTokens(config.model);
             let maxTokens = Math.min(maxTokensOverride ?? CAPPED_MAX_TOKENS, modelMaxOut);
@@ -600,6 +670,21 @@ export async function interactiveSession(config, getUserInput, onEvent, onAbortR
                 callMaxTokens = 2048; // Short plan output
                 callSystemPrompt = systemPrompt + '\n\n' + getPlanningPrompt();
             }
+            // ── Hallucination guard for weak models ──
+            // Weak / free models (nemotron-ultra, GLM-4, qwen coder, free-profile
+            // resolves) have been observed inventing tool names (e.g. MixtureOfAgents)
+            // and emitting literal `[TOOLCALL]` / `<tool_call>` text pretending to
+            // call tools. Give them an explicit inventory + an anti-roleplay hint.
+            // Skipped for strong models to keep their prompt cache warm.
+            if (isWeakModel(resolvedModel) && callToolDefs.length > 0) {
+                const names = callToolDefs.map(t => t.name).join(', ');
+                callSystemPrompt = callSystemPrompt +
+                    '\n\n# Available tools\n' +
+                    `You have exactly these tools: ${names}.\n` +
+                    'Do not invent other tool names. Do not emit literal "[TOOLCALL]", ' +
+                    '"<tool_call>", or similar tokens in your text — call tools via the ' +
+                    'proper API only. If no tool fits, explain plainly in prose.';
+            }
             // Safety net: handled in llm.ts resolveVirtualModel()
             // Sanitize: remove orphaned tool results that could confuse the API
             const sanitized = sanitizeHistory(history);

package/dist/agent/think-tag-stripper.d.ts ADDED Viewed

@@ -0,0 +1,27 @@
+/**
+ * Streaming parser that splits `<think>…</think>` (and `<thinking>…</thinking>`)
+ * tags embedded in a model's text output into separate text / thinking segments.
+ *
+ * Problem: reasoning models like nemotron, deepseek-r1, qwq emit their chain of
+ * thought inline in the text content field — not via the Anthropic `thinking`
+ * block nor the OpenAI `reasoning_content` field. If we don't split these,
+ * the literal `<think>` tags and the full reasoning leak into the answer UI
+ * and into conversation history (wasting context on future turns).
+ *
+ * Usage:
+ *   const s = new ThinkTagStripper();
+ *   for (const seg of s.push(chunk)) emit(seg);
+ *   for (const seg of s.flush()) emit(seg);
+ *
+ * Handles tags split across chunk boundaries by holding a small suffix.
+ */
+export type Segment = {
+    type: 'text' | 'thinking';
+    text: string;
+};
+export declare class ThinkTagStripper {
+    private mode;
+    private pending;
+    push(chunk: string): Segment[];
+    flush(): Segment[];
+}

package/dist/agent/think-tag-stripper.js ADDED Viewed

@@ -0,0 +1,75 @@
+/**
+ * Streaming parser that splits `<think>…</think>` (and `<thinking>…</thinking>`)
+ * tags embedded in a model's text output into separate text / thinking segments.
+ *
+ * Problem: reasoning models like nemotron, deepseek-r1, qwq emit their chain of
+ * thought inline in the text content field — not via the Anthropic `thinking`
+ * block nor the OpenAI `reasoning_content` field. If we don't split these,
+ * the literal `<think>` tags and the full reasoning leak into the answer UI
+ * and into conversation history (wasting context on future turns).
+ *
+ * Usage:
+ *   const s = new ThinkTagStripper();
+ *   for (const seg of s.push(chunk)) emit(seg);
+ *   for (const seg of s.flush()) emit(seg);
+ *
+ * Handles tags split across chunk boundaries by holding a small suffix.
+ */
+const OPEN_TAGS = ['<think>', '<thinking>'];
+const CLOSE_TAGS = ['</think>', '</thinking>'];
+export class ThinkTagStripper {
+    mode = 'text';
+    pending = '';
+    push(chunk) {
+        const input = this.pending + chunk;
+        this.pending = '';
+        const out = [];
+        let emitStart = 0;
+        let i = 0;
+        const emit = (end) => {
+            if (end > emitStart) {
+                out.push({ type: this.mode, text: input.slice(emitStart, end) });
+            }
+        };
+        while (i < input.length) {
+            if (input[i] !== '<') {
+                i++;
+                continue;
+            }
+            const tags = this.mode === 'text' ? OPEN_TAGS : CLOSE_TAGS;
+            // Full-tag match?
+            let matched = null;
+            for (const t of tags) {
+                if (input.startsWith(t, i)) {
+                    matched = t;
+                    break;
+                }
+            }
+            if (matched) {
+                emit(i);
+                i += matched.length;
+                emitStart = i;
+                this.mode = this.mode === 'text' ? 'thinking' : 'text';
+                continue;
+            }
+            // Partial match at boundary? Hold back the remainder.
+            const rest = input.slice(i);
+            const couldStillMatch = tags.some(t => t.length > rest.length && t.startsWith(rest));
+            if (couldStillMatch) {
+                emit(i);
+                this.pending = rest;
+                return out;
+            }
+            i++;
+        }
+        emit(input.length);
+        return out;
+    }
+    flush() {
+        if (!this.pending)
+            return [];
+        const segments = [{ type: this.mode, text: this.pending }];
+        this.pending = '';
+        return segments;
+    }
+}

package/dist/agent/tokens.js CHANGED Viewed

@@ -6,7 +6,8 @@
 const DEFAULT_BYTES_PER_TOKEN = 4;
 /**
  * Model-specific bytes-per-token ratios for more accurate estimation.
- * Claude tokenizes more efficiently (~3.5 bytes/token), GPT at ~4, Gemini at ~3.
+ * Anthropic-family models tokenize at ~3.5 bytes/token, GPT-family at ~4,
+ * Gemini-family at ~3.
  */
 const MODEL_BYTES_PER_TOKEN = {
     'anthropic': 3.5,

package/dist/agent/types.d.ts CHANGED Viewed

@@ -148,4 +148,11 @@ export interface AgentConfig {
     baseModel?: string;
     /** Resume an existing session by ID — loads prior history and keeps appending to the same JSONL */
     resumeSessionId?: string;
+    /**
+     * Optional channel tag persisted to SessionMeta. Lets non-CLI drivers
+     * (Telegram bot, Discord bot, future ingresses) find their own sessions
+     * later via findLatestSessionByChannel. Regular CLI sessions leave this
+     * unset. Format: "<driver>:<owner-or-chat-id>", e.g. "telegram:12345".
+     */
+    sessionChannel?: string;
 }

package/dist/brain/index.d.ts CHANGED Viewed

@@ -1,3 +1,3 @@
 export type { Entity, EntityType, Observation, Relation, BrainExtraction } from './types.js';
-export { loadEntities, saveEntities, findEntity, upsertEntity, loadObservations, getEntityObservations, addObservation, loadRelations, getEntityRelations, upsertRelation, searchEntities, buildEntityContext, getBrainStats, } from './store.js';
+export { loadEntities, saveEntities, findEntity, upsertEntity, loadObservations, getEntityObservations, addObservation, loadRelations, getEntityRelations, upsertRelation, searchEntities, buildEntityContext, getBrainStats, extractMentions, } from './store.js';
 export { extractBrainEntities } from './extract.js';

package/dist/brain/index.js CHANGED Viewed

@@ -1,2 +1,2 @@
-export { loadEntities, saveEntities, findEntity, upsertEntity, loadObservations, getEntityObservations, addObservation, loadRelations, getEntityRelations, upsertRelation, searchEntities, buildEntityContext, getBrainStats, } from './store.js';
+export { loadEntities, saveEntities, findEntity, upsertEntity, loadObservations, getEntityObservations, addObservation, loadRelations, getEntityRelations, upsertRelation, searchEntities, buildEntityContext, getBrainStats, extractMentions, } from './store.js';
 export { extractBrainEntities } from './extract.js';

package/dist/brain/store.d.ts CHANGED Viewed

@@ -34,7 +34,19 @@ export declare function searchEntities(query: string, limit?: number): Entity[];
  * Build context string for entities mentioned in the conversation.
  * Returns empty string if no relevant entities found.
  */
-export declare function buildEntityContext(mentionedNames: string[]): string;
+export declare function buildEntityContext(mentionedNames: string[], entitiesCache?: Entity[]): string;
+/**
+ * Scan `text` for occurrences of any known entity's canonical name or alias
+ * and return the matched canonical names (deduped, case-preserving).
+ * Word-boundary match so "Base" in "Baseline" doesn't match entity "Base".
+ *
+ * This is the read half of the brain — the agent loop calls this on each
+ * user turn to decide which entities to auto-inject into the system prompt.
+ *
+ * Pass `entities` if the caller already has them loaded to avoid re-reading
+ * the JSONL; otherwise we load it ourselves.
+ */
+export declare function extractMentions(text: string, entities?: Entity[]): string[];
 export declare function getBrainStats(): {
     entities: number;
     observations: number;