@blockrun/franklin 3.8.2 → 3.8.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. package/README.md +23 -36
  2. package/dist/agent/commands.js +1 -1
  3. package/dist/agent/llm.d.ts +6 -0
  4. package/dist/agent/llm.js +103 -14
  5. package/dist/agent/loop.d.ts +9 -0
  6. package/dist/agent/loop.js +85 -0
  7. package/dist/agent/think-tag-stripper.d.ts +27 -0
  8. package/dist/agent/think-tag-stripper.js +75 -0
  9. package/dist/agent/tokens.js +2 -1
  10. package/dist/agent/types.d.ts +7 -0
  11. package/dist/brain/index.d.ts +1 -1
  12. package/dist/brain/index.js +1 -1
  13. package/dist/brain/store.d.ts +13 -1
  14. package/dist/brain/store.js +74 -5
  15. package/dist/channel/telegram.d.ts +46 -0
  16. package/dist/channel/telegram.js +367 -0
  17. package/dist/commands/migrate.d.ts +5 -3
  18. package/dist/commands/migrate.js +17 -15
  19. package/dist/commands/stats.js +1 -1
  20. package/dist/commands/telegram.d.ts +15 -0
  21. package/dist/commands/telegram.js +95 -0
  22. package/dist/content/library.js +2 -2
  23. package/dist/index.js +9 -0
  24. package/dist/panel/html.js +1 -1
  25. package/dist/router/index.js +5 -5
  26. package/dist/session/storage.d.ts +12 -0
  27. package/dist/session/storage.js +11 -0
  28. package/dist/social/ai.d.ts +3 -2
  29. package/dist/social/ai.js +3 -2
  30. package/dist/stats/insights.d.ts +1 -1
  31. package/dist/stats/tracker.js +1 -1
  32. package/dist/tools/content-execute.d.ts +1 -1
  33. package/dist/tools/content-execute.js +1 -1
  34. package/dist/tools/index.js +11 -3
  35. package/dist/tools/memory.d.ts +16 -0
  36. package/dist/tools/memory.js +86 -0
  37. package/dist/tools/trading-execute.d.ts +2 -2
  38. package/dist/tools/trading-execute.js +2 -2
  39. package/dist/tools/videogen.d.ts +17 -0
  40. package/dist/tools/videogen.js +237 -0
  41. package/dist/trading/trade-log.d.ts +2 -2
  42. package/dist/trading/trade-log.js +2 -2
  43. package/dist/ui/app.js +38 -3
  44. package/dist/ui/markdown.d.ts +16 -0
  45. package/dist/ui/markdown.js +26 -2
  46. package/package.json +5 -2
package/README.md CHANGED
@@ -16,9 +16,9 @@
16
16
  <p>
17
17
  <a href="https://npmjs.com/package/@blockrun/franklin"><img src="https://img.shields.io/npm/v/@blockrun/franklin.svg?style=flat-square&color=FFD700&label=npm" alt="npm"></a>
18
18
  <a href="https://npmjs.com/package/@blockrun/franklin"><img src="https://img.shields.io/npm/dm/@blockrun/franklin.svg?style=flat-square&color=10B981&label=downloads" alt="downloads"></a>
19
- <a href="https://github.com/BlockRunAI/franklin/stargazers"><img src="https://img.shields.io/github/stars/BlockRunAI/franklin?style=flat-square&color=FFD700&label=stars" alt="stars"></a>
19
+ <a href="https://gitlab.com/blockrunai/franklin"><img src="https://img.shields.io/gitlab/stars/blockrunai/franklin?style=flat-square&color=FFD700&label=stars" alt="stars"></a>
20
20
  <a href="LICENSE"><img src="https://img.shields.io/badge/license-Apache_2.0-blue?style=flat-square" alt="license"></a>
21
- <a href="https://github.com/BlockRunAI/franklin/actions"><img src="https://img.shields.io/github/actions/workflow/status/BlockRunAI/franklin/ci.yml?style=flat-square&label=ci" alt="ci"></a>
21
+ <a href="https://gitlab.com/blockrunai/franklin/-/pipelines"><img src="https://img.shields.io/gitlab/pipeline-status/blockrunai%2Ffranklin?branch=main&style=flat-square&label=ci" alt="ci"></a>
22
22
  <a href="https://www.typescriptlang.org/"><img src="https://img.shields.io/badge/TypeScript-strict-3178C6?style=flat-square&logo=typescript&logoColor=white" alt="TypeScript"></a>
23
23
  <a href="https://nodejs.org/"><img src="https://img.shields.io/badge/Node-%E2%89%A520-339933?style=flat-square&logo=node.js&logoColor=white" alt="Node"></a>
24
24
  <a href="https://x402.org"><img src="https://img.shields.io/badge/x402-native-10B981?style=flat-square" alt="x402"></a>
@@ -31,7 +31,7 @@
31
31
  <a href="#a-new-category">Category</a> ·
32
32
  <a href="#what-franklin-can-execute">What&nbsp;it&nbsp;does</a> ·
33
33
  <a href="#smart-router">Smart&nbsp;Router</a> ·
34
- <a href="#the-comparison">vs.&nbsp;Claude&nbsp;Code</a> ·
34
+ <a href="#the-comparison">Comparison</a> ·
35
35
  <a href="#how-it-works">Architecture</a> ·
36
36
  <a href="#community">Community</a>
37
37
  </p>
@@ -42,7 +42,7 @@
42
42
 
43
43
  ## The pitch in one paragraph
44
44
 
45
- Claude Code writes code. Cursor writes code. Franklin writes code **and spends money to get the job done**. It holds a USDC wallet, picks the best model per task from 55+ providers, purchases trading data, generates images, pays for web search — all autonomously. You state an outcome and set a budget. Franklin decides what to call, what to pay for, and when to stop. Every paid action routes through the [x402](https://x402.org) micropayment protocol and settles against your own wallet. No subscriptions. No API keys. No account. The wallet is the identity.
45
+ Most coding agents write code. Franklin writes code **and spends money to get the job done**. It holds a USDC wallet, picks the best model per task from 55+ providers, purchases trading data, generates images, pays for web search — all autonomously. You state an outcome and set a budget. Franklin decides what to call, what to pay for, and when to stop. Every paid action routes through the [x402](https://x402.org) micropayment protocol and settles against your own wallet. No subscriptions. No API keys. No account. The wallet is the identity.
46
46
 
47
47
  Built by the [BlockRun](https://blockrun.ai) team. Apache-2.0. TypeScript. Ships as one npm package.
48
48
 
@@ -63,7 +63,7 @@ npm install -g @blockrun/franklin
63
63
  # 2. Run (free — uses NVIDIA Nemotron & Qwen3 Coder out of the box)
64
64
  franklin
65
65
 
66
- # 3. (optional) Fund a wallet to unlock Claude, GPT, Gemini, Grok, + paid APIs
66
+ # 3. (optional) Fund a wallet to unlock Sonnet, Opus, GPT, Gemini, Grok, + paid APIs
67
67
  franklin setup base # or: franklin setup solana
68
68
  franklin balance # show address + USDC balance
69
69
  ```
@@ -78,7 +78,7 @@ That's it. Zero signup, zero credit card, zero phone verification. Send **$5 of
78
78
 
79
79
  | | You pay for... | Result |
80
80
  | ----------------------- | -------------------------------------------- | ------------------------------------ |
81
- | Subscription (ChatGPT Plus, Claude Max, Cursor Pro) | Access. Paid whether you use it or not. | $20–200/month, rate-limited. |
81
+ | AI subscription | Access. Paid whether you use it or not. | $20–200/month, rate-limited. |
82
82
  | Pay-per-call (OpenAI API, etc.) | Every attempt — even failed ones. | Hidden cost from retries, dead ends. |
83
83
  | **Franklin (YOPO)** | **The outcome.** Each signed micropayment. | **Provider cost + 5%. No more.** |
84
84
 
@@ -253,7 +253,7 @@ You don't subscribe to electricity, you pay for what you use. Franklin brings th
253
253
 
254
254
  ### 🧠 &nbsp;Multi-model is the future
255
255
 
256
- No single model is best at everything. Claude writes better code, Gemini handles longer context, DeepSeek costs 20x less for simple tasks. The Smart Router routes every request to the optimal model in <1ms — up to 89% savings vs. always using Opus.
256
+ No single model is best at everything. Sonnet writes better code, Gemini handles longer context, DeepSeek costs 20x less for simple tasks. The Smart Router routes every request to the optimal model in <1ms — up to 89% savings vs. always using Opus.
257
257
 
258
258
  </td>
259
259
  <td width="33%" valign="top">
@@ -270,19 +270,19 @@ No email. No phone. No KYC. Your Base or Solana address is your account — port
270
270
 
271
271
  ## The comparison
272
272
 
273
- | | Claude Code | Cursor | Chatbots | **Franklin** |
274
- | -------------------------------------- | --------------- | ---------------- | ---------------- | ------------------------------- |
275
- | Writes code | ✅ | ✅ | ⚠️ | ✅ |
276
- | **Spends money for you** | ❌ | ❌ | ❌ | ✅ **USDC wallet, x402** |
277
- | **Buys data + APIs + images + search** | ❌ | ❌ | ❌ | ✅ **55+ APIs, one wallet** |
278
- | Picks best model per task | ❌ Anthropic only | ❌ plan-tied | ❌ | ✅ **Smart Router, 55+ models** |
279
- | Pricing model | Subscription | Subscription | Subscription | **YOPO** — per outcome, USDC |
280
- | Monthly fee | $20–$200 | $20–$40 | $20+ | **$0** |
281
- | Rate-limited | Yes | Yes | Yes | No — limited only by wallet |
282
- | Works when provider goes down | ❌ | ❌ | ❌ | ✅ **routes to another** |
283
- | Identity | Anthropic account | Cursor account | Account / email | ✅ **wallet, no signup** |
284
- | Start free, no KYC | ❌ | ❌ | ❌ | ✅ |
285
- | Source | Closed | Closed | Closed | **Apache 2.0, local-first** |
273
+ | | Coding agents | Editor IDEs | Chatbots | **Franklin** |
274
+ | -------------------------------------- | ---------------- | ---------------- | ---------------- | ------------------------------- |
275
+ | Writes code | ✅ | ✅ | ⚠️ | ✅ |
276
+ | **Spends money for you** | ❌ | ❌ | ❌ | ✅ **USDC wallet, x402** |
277
+ | **Buys data + APIs + images + search** | ❌ | ❌ | ❌ | ✅ **55+ APIs, one wallet** |
278
+ | Picks best model per task | ❌ single-vendor | ❌ plan-tied | ❌ | ✅ **Smart Router, 55+ models** |
279
+ | Pricing model | Subscription | Subscription | Subscription | **YOPO** — per outcome, USDC |
280
+ | Monthly fee | $20–$200 | $20–$40 | $20+ | **$0** |
281
+ | Rate-limited | Yes | Yes | Yes | No — limited only by wallet |
282
+ | Works when provider goes down | ❌ | ❌ | ❌ | ✅ **routes to another** |
283
+ | Identity | Vendor account | Vendor account | Account / email | ✅ **wallet, no signup** |
284
+ | Start free, no KYC | ❌ | ❌ | ❌ | ✅ |
285
+ | Source | Closed | Closed | Closed | **Apache 2.0, local-first** |
286
286
 
287
287
  **Franklin is the economic agent category in one sentence:** software with a wallet that can spend toward a result.
288
288
 
@@ -430,7 +430,7 @@ Start with **zero dollars**. Franklin defaults to free NVIDIA models that need n
430
430
  franklin --model nvidia/nemotron-ultra-253b
431
431
  ```
432
432
 
433
- When you fund the wallet, Franklin gets more purchasing power: Claude, GPT, Gemini, Grok, and paid tools like Exa, DALL-E, and CoinGecko Pro.
433
+ When you fund the wallet, Franklin gets more purchasing power: Sonnet, Opus, GPT, Gemini, Grok, and paid tools like Exa, DALL-E, and CoinGecko Pro.
434
434
 
435
435
  ---
436
436
 
@@ -464,15 +464,14 @@ The chat-based social tools (`SearchX`, `PostToX`) and the batch CLI (`franklin
464
464
 
465
465
  - [Telegram](https://t.me/blockrunAI) — realtime help, bug reports, feature requests
466
466
  - [@BlockRunAI](https://x.com/BlockRunAI) — release notes, demos
467
- - [Issues](https://github.com/BlockRunAI/franklin/issues) — bugs and feature requests
468
- - [Discussions](https://github.com/BlockRunAI/franklin/discussions) — ideas, Q&A, show & tell
467
+ - [Issues](https://gitlab.com/blockrunai/franklin/-/issues) — bugs and feature requests
469
468
 
470
469
  ---
471
470
 
472
471
  ## Development
473
472
 
474
473
  ```bash
475
- git clone https://github.com/BlockRunAI/franklin.git
474
+ git clone https://gitlab.com/blockrunai/franklin.git
476
475
  cd franklin
477
476
  npm install
478
477
  npm run build
@@ -485,18 +484,6 @@ node dist/index.js --help
485
484
 
486
485
  ---
487
486
 
488
- ## Star history
489
-
490
- <a href="https://star-history.com/#BlockRunAI/franklin&Date">
491
- <picture>
492
- <source media="(prefers-color-scheme: dark)" srcset="https://api.star-history.com/svg?repos=BlockRunAI/franklin&type=Date&theme=dark">
493
- <source media="(prefers-color-scheme: light)" srcset="https://api.star-history.com/svg?repos=BlockRunAI/franklin&type=Date">
494
- <img alt="Star history" src="https://api.star-history.com/svg?repos=BlockRunAI/franklin&type=Date">
495
- </picture>
496
- </a>
497
-
498
- ---
499
-
500
487
  ## License
501
488
 
502
489
  Apache-2.0. See [LICENSE](LICENSE).
@@ -383,7 +383,7 @@ const DIRECT_COMMANDS = {
383
383
  let text = `**Session Cost**\n` +
384
384
  ` Requests: ${stats.totalRequests}\n` +
385
385
  ` Cost: $${stats.totalCostUsd.toFixed(4)} USDC\n` +
386
- ` Saved: $${saved.toFixed(2)} vs Claude Opus\n` +
386
+ ` Saved: $${saved.toFixed(2)} vs Opus tier\n` +
387
387
  ` Tokens: ${formatTokens(stats.totalInputTokens)} in / ${formatTokens(stats.totalOutputTokens)} out\n`;
388
388
  if (breakdown.length > 0) {
389
389
  text += `\n **By model:**\n`;
@@ -52,6 +52,12 @@ export interface LLMClientOptions {
52
52
  * Exported so tests can pin this decision without a live API.
53
53
  */
54
54
  export declare function modelHasExtendedThinking(model: string): boolean;
55
+ /**
56
+ * Classify an unparseable tool-call JSON failure so the user and the model
57
+ * get an actionable message instead of a single generic line. Exported for
58
+ * direct unit testing — the happy path hits it only on stream error.
59
+ */
60
+ export declare function classifyToolCallFailure(toolName: string, rawInput: string, signal: AbortSignal | undefined, model: string): string;
55
61
  export declare class ModelClient {
56
62
  private apiUrl;
57
63
  private chain;
package/dist/agent/llm.js CHANGED
@@ -5,6 +5,7 @@
5
5
  */
6
6
  import { getOrCreateWallet, getOrCreateSolanaWallet, createPaymentPayload, createSolanaPaymentPayload, parsePaymentRequired, extractPaymentDetails, solanaKeyToBytes, SOLANA_NETWORK, } from '@blockrun/llm';
7
7
  import { USER_AGENT } from '../config.js';
8
+ import { ThinkTagStripper } from './think-tag-stripper.js';
8
9
  // ─── Anthropic Prompt Caching ─────────────────────────────────────────────
9
10
  /**
10
11
  * Apply Anthropic prompt caching using the `system_and_3` strategy.
@@ -42,6 +43,33 @@ export function modelHasExtendedThinking(model) {
42
43
  m.includes('sonnet-4') ||
43
44
  m.includes('sonnet-3.7'));
44
45
  }
46
+ /**
47
+ * Classify an unparseable tool-call JSON failure so the user and the model
48
+ * get an actionable message instead of a single generic line. Exported for
49
+ * direct unit testing — the happy path hits it only on stream error.
50
+ */
51
+ export function classifyToolCallFailure(toolName, rawInput, signal, model) {
52
+ if (signal?.aborted) {
53
+ return `[Tool call to ${toolName} was canceled before the input finished streaming. ` +
54
+ `Previous response kept. Resubmit the last message to retry.]`;
55
+ }
56
+ const charsReceived = rawInput.length;
57
+ // If we have almost nothing, the stream stopped early (timeout / model cut off).
58
+ // If we have a lot but it's still invalid, the model produced malformed JSON.
59
+ if (charsReceived < 8) {
60
+ return `[Tool call to ${toolName} was interrupted mid-stream (only ${charsReceived} chars received) — ` +
61
+ `likely a model timeout or rate limit on ${model}. Try \`/model <other>\` or resubmit.]`;
62
+ }
63
+ const looksTruncated = !rawInput.trimEnd().endsWith('}');
64
+ if (looksTruncated) {
65
+ return `[Model ${model} cut off mid tool call (${charsReceived} chars received, JSON not closed). ` +
66
+ `Try \`/model <stronger>\` or shorten the prompt.]`;
67
+ }
68
+ const preview = rawInput.slice(0, 120).replace(/\s+/g, ' ');
69
+ return `[Tool call to ${toolName} had malformed JSON input (${charsReceived} chars). ` +
70
+ `Preview: ${preview}${rawInput.length > 120 ? '…' : ''} — ` +
71
+ `this is usually a model output bug; try \`/model <other>\` or retry.]`;
72
+ }
45
73
  function applyAnthropicPromptCaching(payload, request) {
46
74
  const out = { ...payload };
47
75
  const cacheMarker = { type: 'ephemeral' };
@@ -267,6 +295,17 @@ export class ModelClient {
267
295
  let currentToolId = '';
268
296
  let currentToolName = '';
269
297
  let currentToolInput = '';
298
+ // Split inline <think>…</think> emitted by reasoning models (nemotron,
299
+ // deepseek-r1, qwq, etc.) that use the text field instead of the native
300
+ // thinking block. Thinking emitted this way is display-only — we don't
301
+ // store it in history (Anthropic thinking blocks require signatures).
302
+ // Reset per text block.
303
+ let textStripper = new ThinkTagStripper();
304
+ // One-shot observability: log when a weak model starts role-playing tool
305
+ // calls as literal text tokens. We don't rewrite the stream — the
306
+ // system-prompt guard in loop.ts is responsible for preventing this.
307
+ // Debug-only because the user already sees the literal text in the UI.
308
+ let toolCallRoleplayWarned = false;
270
309
  for await (const chunk of this.streamCompletion(request, signal)) {
271
310
  switch (chunk.kind) {
272
311
  case 'content_block_start': {
@@ -283,6 +322,7 @@ export class ModelClient {
283
322
  }
284
323
  else if (cblock?.type === 'text') {
285
324
  currentText = '';
325
+ textStripper = new ThinkTagStripper();
286
326
  }
287
327
  break;
288
328
  }
@@ -291,10 +331,34 @@ export class ModelClient {
291
331
  if (!delta)
292
332
  break;
293
333
  if (delta.type === 'text_delta') {
294
- const text = delta.text || '';
295
- currentText += text;
296
- if (text)
297
- onStreamDelta?.({ type: 'text', text });
334
+ const raw = delta.text || '';
335
+ if (!toolCallRoleplayWarned) {
336
+ // Only scan the last ~15 chars of already-emitted text plus the
337
+ // new delta enough to catch a token straddling the chunk
338
+ // boundary (`[TOOLCALL]`=10, `<tool_calls>`=12) without the
339
+ // O(N²) blowup of re-scanning the whole accumulated text on
340
+ // every delta.
341
+ const window = currentText.slice(-15) + raw;
342
+ if (/\[TOOLCALL\]|<tool_calls?>/i.test(window)) {
343
+ toolCallRoleplayWarned = true;
344
+ if (this.debug) {
345
+ console.error(`[franklin] Model ${request.model} emitted a tool-call ` +
346
+ 'roleplay token ([TOOLCALL] / <tool_call>) in its text. ' +
347
+ 'This is a model hallucination; real tool calls arrive ' +
348
+ 'as tool_use blocks, not text.');
349
+ }
350
+ }
351
+ }
352
+ for (const seg of textStripper.push(raw)) {
353
+ if (seg.type === 'text') {
354
+ currentText += seg.text;
355
+ if (seg.text)
356
+ onStreamDelta?.({ type: 'text', text: seg.text });
357
+ }
358
+ else if (seg.text) {
359
+ onStreamDelta?.({ type: 'thinking', text: seg.text });
360
+ }
361
+ }
298
362
  }
299
363
  else if (delta.type === 'thinking_delta') {
300
364
  const text = delta.thinking || '';
@@ -329,11 +393,13 @@ export class ModelClient {
329
393
  }
330
394
  }
331
395
  if (inputParseError) {
332
- // Don't invoke the tool — add a text block explaining the error
333
- // and skip the tool_use entirely. The model will see the error and retry.
396
+ // Don't invoke the tool — add a classified text block so the
397
+ // user (and the model) can see the specific cause. Prior streamed
398
+ // text is already in `collected` from earlier content_block_stop
399
+ // events, so partial work survives.
334
400
  collected.push({
335
401
  type: 'text',
336
- text: `[Tool call to ${currentToolName} failed: incomplete JSON input from stream. The request may have been interrupted.]`,
402
+ text: classifyToolCallFailure(currentToolName, currentToolInput, signal, request.model),
337
403
  });
338
404
  }
339
405
  else {
@@ -360,12 +426,25 @@ export class ModelClient {
360
426
  currentThinking = '';
361
427
  currentThinkingSignature = '';
362
428
  }
363
- else if (currentText) {
364
- collected.push({
365
- type: 'text',
366
- text: currentText,
367
- });
368
- currentText = '';
429
+ else {
430
+ // Flush any partial tag held in the stripper
431
+ for (const seg of textStripper.flush()) {
432
+ if (seg.type === 'text') {
433
+ currentText += seg.text;
434
+ if (seg.text)
435
+ onStreamDelta?.({ type: 'text', text: seg.text });
436
+ }
437
+ else if (seg.text) {
438
+ onStreamDelta?.({ type: 'thinking', text: seg.text });
439
+ }
440
+ }
441
+ if (currentText) {
442
+ collected.push({
443
+ type: 'text',
444
+ text: currentText,
445
+ });
446
+ currentText = '';
447
+ }
369
448
  }
370
449
  break;
371
450
  }
@@ -399,7 +478,17 @@ export class ModelClient {
399
478
  }
400
479
  }
401
480
  }
402
- // Flush any remaining text
481
+ // Flush any remaining text (stream ended without content_block_stop)
482
+ for (const seg of textStripper.flush()) {
483
+ if (seg.type === 'text') {
484
+ currentText += seg.text;
485
+ if (seg.text)
486
+ onStreamDelta?.({ type: 'text', text: seg.text });
487
+ }
488
+ else if (seg.text) {
489
+ onStreamDelta?.({ type: 'thinking', text: seg.text });
490
+ }
491
+ }
403
492
  if (currentText) {
404
493
  collected.push({ type: 'text', text: currentText });
405
494
  }
@@ -3,6 +3,15 @@
3
3
  * The core reasoning-action cycle: prompt → model → extract capabilities → execute → repeat.
4
4
  */
5
5
  import type { AgentConfig, Dialogue, StreamEvent } from './types.js';
6
+ /**
7
+ * Identify models known to hallucinate tool calls (invented names, literal
8
+ * `[TOOLCALL]` / `<tool_call>` text in answers) — they need the explicit
9
+ * "Available tools" inventory appended to the system prompt. Strong frontier
10
+ * models skip the nag so their prompt cache doesn't turn over.
11
+ *
12
+ * Exported so tests can pin the classification without a live API.
13
+ */
14
+ export declare function isWeakModel(model: string): boolean;
6
15
  /**
7
16
  * Run a multi-turn interactive session.
8
17
  * Each user message triggers a full agent loop.
@@ -18,6 +18,7 @@ import { recordSessionUsage } from '../stats/session-tracker.js';
18
18
  import { appendAudit, extractLastUserPrompt } from '../stats/audit.js';
19
19
  import { estimateCost, OPUS_PRICING } from '../pricing.js';
20
20
  import { maybeMidSessionExtract } from '../learnings/extractor.js';
21
+ import { extractMentions, buildEntityContext, loadEntities } from '../brain/store.js';
21
22
  import { routeRequest, parseRoutingProfile } from '../router/index.js';
22
23
  import { recordOutcome } from '../router/local-elo.js';
23
24
  import { shouldPlan, getPlanningPrompt, getExecutorModel, isExecutorStuck, toolCallSignature } from './planner.js';
@@ -270,6 +271,33 @@ function getBackoffDelay(attempt, maxDelayMs = 32_000) {
270
271
  const jitter = base * 0.25 * (Math.random() * 2 - 1); // ±25%
271
272
  return Math.max(500, Math.round(base + jitter));
272
273
  }
274
+ /**
275
+ * Identify models known to hallucinate tool calls (invented names, literal
276
+ * `[TOOLCALL]` / `<tool_call>` text in answers) — they need the explicit
277
+ * "Available tools" inventory appended to the system prompt. Strong frontier
278
+ * models skip the nag so their prompt cache doesn't turn over.
279
+ *
280
+ * Exported so tests can pin the classification without a live API.
281
+ */
282
+ export function isWeakModel(model) {
283
+ const m = model.toLowerCase();
284
+ // NVIDIA-hosted open models have been observed confabulating tool calls.
285
+ // `blockrun/free` and `blockrun/eco` resolve to nvidia/nemotron-ultra in
286
+ // llm.ts, so catching the `nvidia/` prefix also catches those paths.
287
+ if (m.startsWith('nvidia/'))
288
+ return true;
289
+ if (m.includes('nemotron-ultra'))
290
+ return true;
291
+ if (m.includes('qwen3-coder'))
292
+ return true;
293
+ // GLM-4* is weak; GLM-5+ is capable enough to skip the nag.
294
+ if (/^zai\/glm-4/.test(m))
295
+ return true;
296
+ // DeepSeek's smaller / quantized SKUs tend to role-play tools too.
297
+ if (/deepseek[-_/](r1|v3|chat)-?(lite|mini|tiny)/.test(m))
298
+ return true;
299
+ return false;
300
+ }
273
301
  // ─── Interactive Session ───────────────────────────────────────────────────
274
302
  /**
275
303
  * Run a multi-turn interactive session.
@@ -341,6 +369,7 @@ export async function interactiveSession(config, getUserInput, onEvent, onAbortR
341
369
  outputTokens: sessionOutputTokens,
342
370
  costUsd: sessionCostUsd,
343
371
  savedVsOpusUsd: sessionSavedVsOpus,
372
+ ...(config.sessionChannel !== undefined ? { channel: config.sessionChannel } : {}),
344
373
  });
345
374
  };
346
375
  const persistSessionMessage = (message) => {
@@ -414,6 +443,44 @@ export async function interactiveSession(config, getUserInput, onEvent, onAbortR
414
443
  config.onModelChange?.(baseModel, 'system');
415
444
  }
416
445
  turnFailedModels = new Set(); // Fresh slate for transient failures this turn
446
+ // ── Brain auto-recall (computed once per user turn) ──
447
+ // Scan the new user message plus the previous assistant reply (so
448
+ // cross-turn references like "that company we discussed" still resolve)
449
+ // for entity mentions, and build the context string. The inner agent
450
+ // loop can iterate many times (planner + executor steps); the user's
451
+ // input doesn't change between those iterations, so caching here saves
452
+ // loadEntities + loadObservations + loadRelations on every re-entry.
453
+ let turnBrainContext = '';
454
+ try {
455
+ const lastAssistantBeforeThisTurn = [...history.slice(0, -1)]
456
+ .reverse()
457
+ .find((m) => m.role === 'assistant');
458
+ const flatten = (d) => {
459
+ if (!d)
460
+ return '';
461
+ if (typeof d.content === 'string')
462
+ return d.content;
463
+ if (!Array.isArray(d.content))
464
+ return '';
465
+ return d.content
466
+ .filter(p => p.type === 'text')
467
+ .map(p => p.text ?? '')
468
+ .join(' ');
469
+ };
470
+ const scanText = input + '\n' + flatten(lastAssistantBeforeThisTurn);
471
+ if (scanText.trim().length > 0) {
472
+ const entities = loadEntities();
473
+ if (entities.length > 0) {
474
+ const mentioned = extractMentions(scanText, entities);
475
+ if (mentioned.length > 0) {
476
+ turnBrainContext = buildEntityContext(mentioned, entities) ?? '';
477
+ }
478
+ }
479
+ }
480
+ }
481
+ catch {
482
+ /* brain is optional — never block a turn on recall */
483
+ }
417
484
  const abort = new AbortController();
418
485
  onAbortReady?.(() => abort.abort());
419
486
  let loopCount = 0;
@@ -527,6 +594,9 @@ export async function interactiveSession(config, getUserInput, onEvent, onAbortR
527
594
  systemParts.push('# Context Window Status\nContext window has crossed the halfway mark (>50%). ' +
528
595
  'Prefer concise responses and batch tool calls when possible.');
529
596
  }
597
+ // ── Brain auto-recall (computed once per user turn above) ──
598
+ if (turnBrainContext)
599
+ systemParts.push(turnBrainContext);
530
600
  const systemPrompt = systemParts.join('\n\n');
531
601
  const modelMaxOut = getMaxOutputTokens(config.model);
532
602
  let maxTokens = Math.min(maxTokensOverride ?? CAPPED_MAX_TOKENS, modelMaxOut);
@@ -600,6 +670,21 @@ export async function interactiveSession(config, getUserInput, onEvent, onAbortR
600
670
  callMaxTokens = 2048; // Short plan output
601
671
  callSystemPrompt = systemPrompt + '\n\n' + getPlanningPrompt();
602
672
  }
673
+ // ── Hallucination guard for weak models ──
674
+ // Weak / free models (nemotron-ultra, GLM-4, qwen coder, free-profile
675
+ // resolves) have been observed inventing tool names (e.g. MixtureOfAgents)
676
+ // and emitting literal `[TOOLCALL]` / `<tool_call>` text pretending to
677
+ // call tools. Give them an explicit inventory + an anti-roleplay hint.
678
+ // Skipped for strong models to keep their prompt cache warm.
679
+ if (isWeakModel(resolvedModel) && callToolDefs.length > 0) {
680
+ const names = callToolDefs.map(t => t.name).join(', ');
681
+ callSystemPrompt = callSystemPrompt +
682
+ '\n\n# Available tools\n' +
683
+ `You have exactly these tools: ${names}.\n` +
684
+ 'Do not invent other tool names. Do not emit literal "[TOOLCALL]", ' +
685
+ '"<tool_call>", or similar tokens in your text — call tools via the ' +
686
+ 'proper API only. If no tool fits, explain plainly in prose.';
687
+ }
603
688
  // Safety net: handled in llm.ts resolveVirtualModel()
604
689
  // Sanitize: remove orphaned tool results that could confuse the API
605
690
  const sanitized = sanitizeHistory(history);
@@ -0,0 +1,27 @@
1
+ /**
2
+ * Streaming parser that splits `<think>…</think>` (and `<thinking>…</thinking>`)
3
+ * tags embedded in a model's text output into separate text / thinking segments.
4
+ *
5
+ * Problem: reasoning models like nemotron, deepseek-r1, qwq emit their chain of
6
+ * thought inline in the text content field — not via the Anthropic `thinking`
7
+ * block nor the OpenAI `reasoning_content` field. If we don't split these,
8
+ * the literal `<think>` tags and the full reasoning leak into the answer UI
9
+ * and into conversation history (wasting context on future turns).
10
+ *
11
+ * Usage:
12
+ * const s = new ThinkTagStripper();
13
+ * for (const seg of s.push(chunk)) emit(seg);
14
+ * for (const seg of s.flush()) emit(seg);
15
+ *
16
+ * Handles tags split across chunk boundaries by holding a small suffix.
17
+ */
18
+ export type Segment = {
19
+ type: 'text' | 'thinking';
20
+ text: string;
21
+ };
22
+ export declare class ThinkTagStripper {
23
+ private mode;
24
+ private pending;
25
+ push(chunk: string): Segment[];
26
+ flush(): Segment[];
27
+ }
@@ -0,0 +1,75 @@
1
+ /**
2
+ * Streaming parser that splits `<think>…</think>` (and `<thinking>…</thinking>`)
3
+ * tags embedded in a model's text output into separate text / thinking segments.
4
+ *
5
+ * Problem: reasoning models like nemotron, deepseek-r1, qwq emit their chain of
6
+ * thought inline in the text content field — not via the Anthropic `thinking`
7
+ * block nor the OpenAI `reasoning_content` field. If we don't split these,
8
+ * the literal `<think>` tags and the full reasoning leak into the answer UI
9
+ * and into conversation history (wasting context on future turns).
10
+ *
11
+ * Usage:
12
+ * const s = new ThinkTagStripper();
13
+ * for (const seg of s.push(chunk)) emit(seg);
14
+ * for (const seg of s.flush()) emit(seg);
15
+ *
16
+ * Handles tags split across chunk boundaries by holding a small suffix.
17
+ */
18
+ const OPEN_TAGS = ['<think>', '<thinking>'];
19
+ const CLOSE_TAGS = ['</think>', '</thinking>'];
20
+ export class ThinkTagStripper {
21
+ mode = 'text';
22
+ pending = '';
23
+ push(chunk) {
24
+ const input = this.pending + chunk;
25
+ this.pending = '';
26
+ const out = [];
27
+ let emitStart = 0;
28
+ let i = 0;
29
+ const emit = (end) => {
30
+ if (end > emitStart) {
31
+ out.push({ type: this.mode, text: input.slice(emitStart, end) });
32
+ }
33
+ };
34
+ while (i < input.length) {
35
+ if (input[i] !== '<') {
36
+ i++;
37
+ continue;
38
+ }
39
+ const tags = this.mode === 'text' ? OPEN_TAGS : CLOSE_TAGS;
40
+ // Full-tag match?
41
+ let matched = null;
42
+ for (const t of tags) {
43
+ if (input.startsWith(t, i)) {
44
+ matched = t;
45
+ break;
46
+ }
47
+ }
48
+ if (matched) {
49
+ emit(i);
50
+ i += matched.length;
51
+ emitStart = i;
52
+ this.mode = this.mode === 'text' ? 'thinking' : 'text';
53
+ continue;
54
+ }
55
+ // Partial match at boundary? Hold back the remainder.
56
+ const rest = input.slice(i);
57
+ const couldStillMatch = tags.some(t => t.length > rest.length && t.startsWith(rest));
58
+ if (couldStillMatch) {
59
+ emit(i);
60
+ this.pending = rest;
61
+ return out;
62
+ }
63
+ i++;
64
+ }
65
+ emit(input.length);
66
+ return out;
67
+ }
68
+ flush() {
69
+ if (!this.pending)
70
+ return [];
71
+ const segments = [{ type: this.mode, text: this.pending }];
72
+ this.pending = '';
73
+ return segments;
74
+ }
75
+ }
@@ -6,7 +6,8 @@
6
6
  const DEFAULT_BYTES_PER_TOKEN = 4;
7
7
  /**
8
8
  * Model-specific bytes-per-token ratios for more accurate estimation.
9
- * Claude tokenizes more efficiently (~3.5 bytes/token), GPT at ~4, Gemini at ~3.
9
+ * Anthropic-family models tokenize at ~3.5 bytes/token, GPT-family at ~4,
10
+ * Gemini-family at ~3.
10
11
  */
11
12
  const MODEL_BYTES_PER_TOKEN = {
12
13
  'anthropic': 3.5,
@@ -148,4 +148,11 @@ export interface AgentConfig {
148
148
  baseModel?: string;
149
149
  /** Resume an existing session by ID — loads prior history and keeps appending to the same JSONL */
150
150
  resumeSessionId?: string;
151
+ /**
152
+ * Optional channel tag persisted to SessionMeta. Lets non-CLI drivers
153
+ * (Telegram bot, Discord bot, future ingresses) find their own sessions
154
+ * later via findLatestSessionByChannel. Regular CLI sessions leave this
155
+ * unset. Format: "<driver>:<owner-or-chat-id>", e.g. "telegram:12345".
156
+ */
157
+ sessionChannel?: string;
151
158
  }
@@ -1,3 +1,3 @@
1
1
  export type { Entity, EntityType, Observation, Relation, BrainExtraction } from './types.js';
2
- export { loadEntities, saveEntities, findEntity, upsertEntity, loadObservations, getEntityObservations, addObservation, loadRelations, getEntityRelations, upsertRelation, searchEntities, buildEntityContext, getBrainStats, } from './store.js';
2
+ export { loadEntities, saveEntities, findEntity, upsertEntity, loadObservations, getEntityObservations, addObservation, loadRelations, getEntityRelations, upsertRelation, searchEntities, buildEntityContext, getBrainStats, extractMentions, } from './store.js';
3
3
  export { extractBrainEntities } from './extract.js';
@@ -1,2 +1,2 @@
1
- export { loadEntities, saveEntities, findEntity, upsertEntity, loadObservations, getEntityObservations, addObservation, loadRelations, getEntityRelations, upsertRelation, searchEntities, buildEntityContext, getBrainStats, } from './store.js';
1
+ export { loadEntities, saveEntities, findEntity, upsertEntity, loadObservations, getEntityObservations, addObservation, loadRelations, getEntityRelations, upsertRelation, searchEntities, buildEntityContext, getBrainStats, extractMentions, } from './store.js';
2
2
  export { extractBrainEntities } from './extract.js';
@@ -34,7 +34,19 @@ export declare function searchEntities(query: string, limit?: number): Entity[];
34
34
  * Build context string for entities mentioned in the conversation.
35
35
  * Returns empty string if no relevant entities found.
36
36
  */
37
- export declare function buildEntityContext(mentionedNames: string[]): string;
37
+ export declare function buildEntityContext(mentionedNames: string[], entitiesCache?: Entity[]): string;
38
+ /**
39
+ * Scan `text` for occurrences of any known entity's canonical name or alias
40
+ * and return the matched canonical names (deduped, case-preserving).
41
+ * Word-boundary match so "Base" in "Baseline" doesn't match entity "Base".
42
+ *
43
+ * This is the read half of the brain — the agent loop calls this on each
44
+ * user turn to decide which entities to auto-inject into the system prompt.
45
+ *
46
+ * Pass `entities` if the caller already has them loaded to avoid re-reading
47
+ * the JSONL; otherwise we load it ourselves.
48
+ */
49
+ export declare function extractMentions(text: string, entities?: Entity[]): string[];
38
50
  export declare function getBrainStats(): {
39
51
  entities: number;
40
52
  observations: number;