@blockrun/franklin 3.8.2 → 3.8.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +23 -36
- package/dist/agent/commands.js +1 -1
- package/dist/agent/llm.d.ts +6 -0
- package/dist/agent/llm.js +103 -14
- package/dist/agent/loop.d.ts +9 -0
- package/dist/agent/loop.js +85 -0
- package/dist/agent/think-tag-stripper.d.ts +27 -0
- package/dist/agent/think-tag-stripper.js +75 -0
- package/dist/agent/tokens.js +2 -1
- package/dist/agent/types.d.ts +7 -0
- package/dist/brain/index.d.ts +1 -1
- package/dist/brain/index.js +1 -1
- package/dist/brain/store.d.ts +13 -1
- package/dist/brain/store.js +74 -5
- package/dist/channel/telegram.d.ts +46 -0
- package/dist/channel/telegram.js +367 -0
- package/dist/commands/migrate.d.ts +5 -3
- package/dist/commands/migrate.js +17 -15
- package/dist/commands/stats.js +1 -1
- package/dist/commands/telegram.d.ts +15 -0
- package/dist/commands/telegram.js +95 -0
- package/dist/content/library.js +2 -2
- package/dist/index.js +9 -0
- package/dist/panel/html.js +1 -1
- package/dist/router/index.js +5 -5
- package/dist/session/storage.d.ts +12 -0
- package/dist/session/storage.js +11 -0
- package/dist/social/ai.d.ts +3 -2
- package/dist/social/ai.js +3 -2
- package/dist/stats/insights.d.ts +1 -1
- package/dist/stats/tracker.js +1 -1
- package/dist/tools/content-execute.d.ts +1 -1
- package/dist/tools/content-execute.js +1 -1
- package/dist/tools/index.js +11 -3
- package/dist/tools/memory.d.ts +16 -0
- package/dist/tools/memory.js +86 -0
- package/dist/tools/trading-execute.d.ts +2 -2
- package/dist/tools/trading-execute.js +2 -2
- package/dist/tools/videogen.d.ts +17 -0
- package/dist/tools/videogen.js +237 -0
- package/dist/trading/trade-log.d.ts +2 -2
- package/dist/trading/trade-log.js +2 -2
- package/dist/ui/app.js +38 -3
- package/dist/ui/markdown.d.ts +16 -0
- package/dist/ui/markdown.js +26 -2
- package/package.json +5 -2
package/README.md
CHANGED
|
@@ -16,9 +16,9 @@
|
|
|
16
16
|
<p>
|
|
17
17
|
<a href="https://npmjs.com/package/@blockrun/franklin"><img src="https://img.shields.io/npm/v/@blockrun/franklin.svg?style=flat-square&color=FFD700&label=npm" alt="npm"></a>
|
|
18
18
|
<a href="https://npmjs.com/package/@blockrun/franklin"><img src="https://img.shields.io/npm/dm/@blockrun/franklin.svg?style=flat-square&color=10B981&label=downloads" alt="downloads"></a>
|
|
19
|
-
<a href="https://
|
|
19
|
+
<a href="https://gitlab.com/blockrunai/franklin"><img src="https://img.shields.io/gitlab/stars/blockrunai/franklin?style=flat-square&color=FFD700&label=stars" alt="stars"></a>
|
|
20
20
|
<a href="LICENSE"><img src="https://img.shields.io/badge/license-Apache_2.0-blue?style=flat-square" alt="license"></a>
|
|
21
|
-
<a href="https://
|
|
21
|
+
<a href="https://gitlab.com/blockrunai/franklin/-/pipelines"><img src="https://img.shields.io/gitlab/pipeline-status/blockrunai%2Ffranklin?branch=main&style=flat-square&label=ci" alt="ci"></a>
|
|
22
22
|
<a href="https://www.typescriptlang.org/"><img src="https://img.shields.io/badge/TypeScript-strict-3178C6?style=flat-square&logo=typescript&logoColor=white" alt="TypeScript"></a>
|
|
23
23
|
<a href="https://nodejs.org/"><img src="https://img.shields.io/badge/Node-%E2%89%A520-339933?style=flat-square&logo=node.js&logoColor=white" alt="Node"></a>
|
|
24
24
|
<a href="https://x402.org"><img src="https://img.shields.io/badge/x402-native-10B981?style=flat-square" alt="x402"></a>
|
|
@@ -31,7 +31,7 @@
|
|
|
31
31
|
<a href="#a-new-category">Category</a> ·
|
|
32
32
|
<a href="#what-franklin-can-execute">What it does</a> ·
|
|
33
33
|
<a href="#smart-router">Smart Router</a> ·
|
|
34
|
-
<a href="#the-comparison">
|
|
34
|
+
<a href="#the-comparison">Comparison</a> ·
|
|
35
35
|
<a href="#how-it-works">Architecture</a> ·
|
|
36
36
|
<a href="#community">Community</a>
|
|
37
37
|
</p>
|
|
@@ -42,7 +42,7 @@
|
|
|
42
42
|
|
|
43
43
|
## The pitch in one paragraph
|
|
44
44
|
|
|
45
|
-
|
|
45
|
+
Most coding agents write code. Franklin writes code **and spends money to get the job done**. It holds a USDC wallet, picks the best model per task from 55+ providers, purchases trading data, generates images, pays for web search — all autonomously. You state an outcome and set a budget. Franklin decides what to call, what to pay for, and when to stop. Every paid action routes through the [x402](https://x402.org) micropayment protocol and settles against your own wallet. No subscriptions. No API keys. No account. The wallet is the identity.
|
|
46
46
|
|
|
47
47
|
Built by the [BlockRun](https://blockrun.ai) team. Apache-2.0. TypeScript. Ships as one npm package.
|
|
48
48
|
|
|
@@ -63,7 +63,7 @@ npm install -g @blockrun/franklin
|
|
|
63
63
|
# 2. Run (free — uses NVIDIA Nemotron & Qwen3 Coder out of the box)
|
|
64
64
|
franklin
|
|
65
65
|
|
|
66
|
-
# 3. (optional) Fund a wallet to unlock
|
|
66
|
+
# 3. (optional) Fund a wallet to unlock Sonnet, Opus, GPT, Gemini, Grok, + paid APIs
|
|
67
67
|
franklin setup base # or: franklin setup solana
|
|
68
68
|
franklin balance # show address + USDC balance
|
|
69
69
|
```
|
|
@@ -78,7 +78,7 @@ That's it. Zero signup, zero credit card, zero phone verification. Send **$5 of
|
|
|
78
78
|
|
|
79
79
|
| | You pay for... | Result |
|
|
80
80
|
| ----------------------- | -------------------------------------------- | ------------------------------------ |
|
|
81
|
-
|
|
|
81
|
+
| AI subscription | Access. Paid whether you use it or not. | $20–200/month, rate-limited. |
|
|
82
82
|
| Pay-per-call (OpenAI API, etc.) | Every attempt — even failed ones. | Hidden cost from retries, dead ends. |
|
|
83
83
|
| **Franklin (YOPO)** | **The outcome.** Each signed micropayment. | **Provider cost + 5%. No more.** |
|
|
84
84
|
|
|
@@ -253,7 +253,7 @@ You don't subscribe to electricity, you pay for what you use. Franklin brings th
|
|
|
253
253
|
|
|
254
254
|
### 🧠 Multi-model is the future
|
|
255
255
|
|
|
256
|
-
No single model is best at everything.
|
|
256
|
+
No single model is best at everything. Sonnet writes better code, Gemini handles longer context, DeepSeek costs 20x less for simple tasks. The Smart Router routes every request to the optimal model in <1ms — up to 89% savings vs. always using Opus.
|
|
257
257
|
|
|
258
258
|
</td>
|
|
259
259
|
<td width="33%" valign="top">
|
|
@@ -270,19 +270,19 @@ No email. No phone. No KYC. Your Base or Solana address is your account — port
|
|
|
270
270
|
|
|
271
271
|
## The comparison
|
|
272
272
|
|
|
273
|
-
| |
|
|
274
|
-
| -------------------------------------- |
|
|
275
|
-
| Writes code | ✅
|
|
276
|
-
| **Spends money for you** | ❌
|
|
277
|
-
| **Buys data + APIs + images + search** | ❌
|
|
278
|
-
| Picks best model per task | ❌
|
|
279
|
-
| Pricing model | Subscription
|
|
280
|
-
| Monthly fee | $20–$200
|
|
281
|
-
| Rate-limited | Yes
|
|
282
|
-
| Works when provider goes down | ❌
|
|
283
|
-
| Identity |
|
|
284
|
-
| Start free, no KYC | ❌
|
|
285
|
-
| Source | Closed
|
|
273
|
+
| | Coding agents | Editor IDEs | Chatbots | **Franklin** |
|
|
274
|
+
| -------------------------------------- | ---------------- | ---------------- | ---------------- | ------------------------------- |
|
|
275
|
+
| Writes code | ✅ | ✅ | ⚠️ | ✅ |
|
|
276
|
+
| **Spends money for you** | ❌ | ❌ | ❌ | ✅ **USDC wallet, x402** |
|
|
277
|
+
| **Buys data + APIs + images + search** | ❌ | ❌ | ❌ | ✅ **55+ APIs, one wallet** |
|
|
278
|
+
| Picks best model per task | ❌ single-vendor | ❌ plan-tied | ❌ | ✅ **Smart Router, 55+ models** |
|
|
279
|
+
| Pricing model | Subscription | Subscription | Subscription | **YOPO** — per outcome, USDC |
|
|
280
|
+
| Monthly fee | $20–$200 | $20–$40 | $20+ | **$0** |
|
|
281
|
+
| Rate-limited | Yes | Yes | Yes | No — limited only by wallet |
|
|
282
|
+
| Works when provider goes down | ❌ | ❌ | ❌ | ✅ **routes to another** |
|
|
283
|
+
| Identity | Vendor account | Vendor account | Account / email | ✅ **wallet, no signup** |
|
|
284
|
+
| Start free, no KYC | ❌ | ❌ | ❌ | ✅ |
|
|
285
|
+
| Source | Closed | Closed | Closed | **Apache 2.0, local-first** |
|
|
286
286
|
|
|
287
287
|
**Franklin is the economic agent category in one sentence:** software with a wallet that can spend toward a result.
|
|
288
288
|
|
|
@@ -430,7 +430,7 @@ Start with **zero dollars**. Franklin defaults to free NVIDIA models that need n
|
|
|
430
430
|
franklin --model nvidia/nemotron-ultra-253b
|
|
431
431
|
```
|
|
432
432
|
|
|
433
|
-
When you fund the wallet, Franklin gets more purchasing power:
|
|
433
|
+
When you fund the wallet, Franklin gets more purchasing power: Sonnet, Opus, GPT, Gemini, Grok, and paid tools like Exa, DALL-E, and CoinGecko Pro.
|
|
434
434
|
|
|
435
435
|
---
|
|
436
436
|
|
|
@@ -464,15 +464,14 @@ The chat-based social tools (`SearchX`, `PostToX`) and the batch CLI (`franklin
|
|
|
464
464
|
|
|
465
465
|
- [Telegram](https://t.me/blockrunAI) — realtime help, bug reports, feature requests
|
|
466
466
|
- [@BlockRunAI](https://x.com/BlockRunAI) — release notes, demos
|
|
467
|
-
- [Issues](https://
|
|
468
|
-
- [Discussions](https://github.com/BlockRunAI/franklin/discussions) — ideas, Q&A, show & tell
|
|
467
|
+
- [Issues](https://gitlab.com/blockrunai/franklin/-/issues) — bugs and feature requests
|
|
469
468
|
|
|
470
469
|
---
|
|
471
470
|
|
|
472
471
|
## Development
|
|
473
472
|
|
|
474
473
|
```bash
|
|
475
|
-
git clone https://
|
|
474
|
+
git clone https://gitlab.com/blockrunai/franklin.git
|
|
476
475
|
cd franklin
|
|
477
476
|
npm install
|
|
478
477
|
npm run build
|
|
@@ -485,18 +484,6 @@ node dist/index.js --help
|
|
|
485
484
|
|
|
486
485
|
---
|
|
487
486
|
|
|
488
|
-
## Star history
|
|
489
|
-
|
|
490
|
-
<a href="https://star-history.com/#BlockRunAI/franklin&Date">
|
|
491
|
-
<picture>
|
|
492
|
-
<source media="(prefers-color-scheme: dark)" srcset="https://api.star-history.com/svg?repos=BlockRunAI/franklin&type=Date&theme=dark">
|
|
493
|
-
<source media="(prefers-color-scheme: light)" srcset="https://api.star-history.com/svg?repos=BlockRunAI/franklin&type=Date">
|
|
494
|
-
<img alt="Star history" src="https://api.star-history.com/svg?repos=BlockRunAI/franklin&type=Date">
|
|
495
|
-
</picture>
|
|
496
|
-
</a>
|
|
497
|
-
|
|
498
|
-
---
|
|
499
|
-
|
|
500
487
|
## License
|
|
501
488
|
|
|
502
489
|
Apache-2.0. See [LICENSE](LICENSE).
|
package/dist/agent/commands.js
CHANGED
|
@@ -383,7 +383,7 @@ const DIRECT_COMMANDS = {
|
|
|
383
383
|
let text = `**Session Cost**\n` +
|
|
384
384
|
` Requests: ${stats.totalRequests}\n` +
|
|
385
385
|
` Cost: $${stats.totalCostUsd.toFixed(4)} USDC\n` +
|
|
386
|
-
` Saved: $${saved.toFixed(2)} vs
|
|
386
|
+
` Saved: $${saved.toFixed(2)} vs Opus tier\n` +
|
|
387
387
|
` Tokens: ${formatTokens(stats.totalInputTokens)} in / ${formatTokens(stats.totalOutputTokens)} out\n`;
|
|
388
388
|
if (breakdown.length > 0) {
|
|
389
389
|
text += `\n **By model:**\n`;
|
package/dist/agent/llm.d.ts
CHANGED
|
@@ -52,6 +52,12 @@ export interface LLMClientOptions {
|
|
|
52
52
|
* Exported so tests can pin this decision without a live API.
|
|
53
53
|
*/
|
|
54
54
|
export declare function modelHasExtendedThinking(model: string): boolean;
|
|
55
|
+
/**
|
|
56
|
+
* Classify an unparseable tool-call JSON failure so the user and the model
|
|
57
|
+
* get an actionable message instead of a single generic line. Exported for
|
|
58
|
+
* direct unit testing — the happy path hits it only on stream error.
|
|
59
|
+
*/
|
|
60
|
+
export declare function classifyToolCallFailure(toolName: string, rawInput: string, signal: AbortSignal | undefined, model: string): string;
|
|
55
61
|
export declare class ModelClient {
|
|
56
62
|
private apiUrl;
|
|
57
63
|
private chain;
|
package/dist/agent/llm.js
CHANGED
|
@@ -5,6 +5,7 @@
|
|
|
5
5
|
*/
|
|
6
6
|
import { getOrCreateWallet, getOrCreateSolanaWallet, createPaymentPayload, createSolanaPaymentPayload, parsePaymentRequired, extractPaymentDetails, solanaKeyToBytes, SOLANA_NETWORK, } from '@blockrun/llm';
|
|
7
7
|
import { USER_AGENT } from '../config.js';
|
|
8
|
+
import { ThinkTagStripper } from './think-tag-stripper.js';
|
|
8
9
|
// ─── Anthropic Prompt Caching ─────────────────────────────────────────────
|
|
9
10
|
/**
|
|
10
11
|
* Apply Anthropic prompt caching using the `system_and_3` strategy.
|
|
@@ -42,6 +43,33 @@ export function modelHasExtendedThinking(model) {
|
|
|
42
43
|
m.includes('sonnet-4') ||
|
|
43
44
|
m.includes('sonnet-3.7'));
|
|
44
45
|
}
|
|
46
|
+
/**
|
|
47
|
+
* Classify an unparseable tool-call JSON failure so the user and the model
|
|
48
|
+
* get an actionable message instead of a single generic line. Exported for
|
|
49
|
+
* direct unit testing — the happy path hits it only on stream error.
|
|
50
|
+
*/
|
|
51
|
+
export function classifyToolCallFailure(toolName, rawInput, signal, model) {
|
|
52
|
+
if (signal?.aborted) {
|
|
53
|
+
return `[Tool call to ${toolName} was canceled before the input finished streaming. ` +
|
|
54
|
+
`Previous response kept. Resubmit the last message to retry.]`;
|
|
55
|
+
}
|
|
56
|
+
const charsReceived = rawInput.length;
|
|
57
|
+
// If we have almost nothing, the stream stopped early (timeout / model cut off).
|
|
58
|
+
// If we have a lot but it's still invalid, the model produced malformed JSON.
|
|
59
|
+
if (charsReceived < 8) {
|
|
60
|
+
return `[Tool call to ${toolName} was interrupted mid-stream (only ${charsReceived} chars received) — ` +
|
|
61
|
+
`likely a model timeout or rate limit on ${model}. Try \`/model <other>\` or resubmit.]`;
|
|
62
|
+
}
|
|
63
|
+
const looksTruncated = !rawInput.trimEnd().endsWith('}');
|
|
64
|
+
if (looksTruncated) {
|
|
65
|
+
return `[Model ${model} cut off mid tool call (${charsReceived} chars received, JSON not closed). ` +
|
|
66
|
+
`Try \`/model <stronger>\` or shorten the prompt.]`;
|
|
67
|
+
}
|
|
68
|
+
const preview = rawInput.slice(0, 120).replace(/\s+/g, ' ');
|
|
69
|
+
return `[Tool call to ${toolName} had malformed JSON input (${charsReceived} chars). ` +
|
|
70
|
+
`Preview: ${preview}${rawInput.length > 120 ? '…' : ''} — ` +
|
|
71
|
+
`this is usually a model output bug; try \`/model <other>\` or retry.]`;
|
|
72
|
+
}
|
|
45
73
|
function applyAnthropicPromptCaching(payload, request) {
|
|
46
74
|
const out = { ...payload };
|
|
47
75
|
const cacheMarker = { type: 'ephemeral' };
|
|
@@ -267,6 +295,17 @@ export class ModelClient {
|
|
|
267
295
|
let currentToolId = '';
|
|
268
296
|
let currentToolName = '';
|
|
269
297
|
let currentToolInput = '';
|
|
298
|
+
// Split inline <think>…</think> emitted by reasoning models (nemotron,
|
|
299
|
+
// deepseek-r1, qwq, etc.) that use the text field instead of the native
|
|
300
|
+
// thinking block. Thinking emitted this way is display-only — we don't
|
|
301
|
+
// store it in history (Anthropic thinking blocks require signatures).
|
|
302
|
+
// Reset per text block.
|
|
303
|
+
let textStripper = new ThinkTagStripper();
|
|
304
|
+
// One-shot observability: log when a weak model starts role-playing tool
|
|
305
|
+
// calls as literal text tokens. We don't rewrite the stream — the
|
|
306
|
+
// system-prompt guard in loop.ts is responsible for preventing this.
|
|
307
|
+
// Debug-only because the user already sees the literal text in the UI.
|
|
308
|
+
let toolCallRoleplayWarned = false;
|
|
270
309
|
for await (const chunk of this.streamCompletion(request, signal)) {
|
|
271
310
|
switch (chunk.kind) {
|
|
272
311
|
case 'content_block_start': {
|
|
@@ -283,6 +322,7 @@ export class ModelClient {
|
|
|
283
322
|
}
|
|
284
323
|
else if (cblock?.type === 'text') {
|
|
285
324
|
currentText = '';
|
|
325
|
+
textStripper = new ThinkTagStripper();
|
|
286
326
|
}
|
|
287
327
|
break;
|
|
288
328
|
}
|
|
@@ -291,10 +331,34 @@ export class ModelClient {
|
|
|
291
331
|
if (!delta)
|
|
292
332
|
break;
|
|
293
333
|
if (delta.type === 'text_delta') {
|
|
294
|
-
const
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
334
|
+
const raw = delta.text || '';
|
|
335
|
+
if (!toolCallRoleplayWarned) {
|
|
336
|
+
// Only scan the last ~15 chars of already-emitted text plus the
|
|
337
|
+
// new delta — enough to catch a token straddling the chunk
|
|
338
|
+
// boundary (`[TOOLCALL]`=10, `<tool_calls>`=12) without the
|
|
339
|
+
// O(N²) blowup of re-scanning the whole accumulated text on
|
|
340
|
+
// every delta.
|
|
341
|
+
const window = currentText.slice(-15) + raw;
|
|
342
|
+
if (/\[TOOLCALL\]|<tool_calls?>/i.test(window)) {
|
|
343
|
+
toolCallRoleplayWarned = true;
|
|
344
|
+
if (this.debug) {
|
|
345
|
+
console.error(`[franklin] Model ${request.model} emitted a tool-call ` +
|
|
346
|
+
'roleplay token ([TOOLCALL] / <tool_call>) in its text. ' +
|
|
347
|
+
'This is a model hallucination; real tool calls arrive ' +
|
|
348
|
+
'as tool_use blocks, not text.');
|
|
349
|
+
}
|
|
350
|
+
}
|
|
351
|
+
}
|
|
352
|
+
for (const seg of textStripper.push(raw)) {
|
|
353
|
+
if (seg.type === 'text') {
|
|
354
|
+
currentText += seg.text;
|
|
355
|
+
if (seg.text)
|
|
356
|
+
onStreamDelta?.({ type: 'text', text: seg.text });
|
|
357
|
+
}
|
|
358
|
+
else if (seg.text) {
|
|
359
|
+
onStreamDelta?.({ type: 'thinking', text: seg.text });
|
|
360
|
+
}
|
|
361
|
+
}
|
|
298
362
|
}
|
|
299
363
|
else if (delta.type === 'thinking_delta') {
|
|
300
364
|
const text = delta.thinking || '';
|
|
@@ -329,11 +393,13 @@ export class ModelClient {
|
|
|
329
393
|
}
|
|
330
394
|
}
|
|
331
395
|
if (inputParseError) {
|
|
332
|
-
// Don't invoke the tool — add a text block
|
|
333
|
-
// and
|
|
396
|
+
// Don't invoke the tool — add a classified text block so the
|
|
397
|
+
// user (and the model) can see the specific cause. Prior streamed
|
|
398
|
+
// text is already in `collected` from earlier content_block_stop
|
|
399
|
+
// events, so partial work survives.
|
|
334
400
|
collected.push({
|
|
335
401
|
type: 'text',
|
|
336
|
-
text:
|
|
402
|
+
text: classifyToolCallFailure(currentToolName, currentToolInput, signal, request.model),
|
|
337
403
|
});
|
|
338
404
|
}
|
|
339
405
|
else {
|
|
@@ -360,12 +426,25 @@ export class ModelClient {
|
|
|
360
426
|
currentThinking = '';
|
|
361
427
|
currentThinkingSignature = '';
|
|
362
428
|
}
|
|
363
|
-
else
|
|
364
|
-
|
|
365
|
-
|
|
366
|
-
text
|
|
367
|
-
|
|
368
|
-
|
|
429
|
+
else {
|
|
430
|
+
// Flush any partial tag held in the stripper
|
|
431
|
+
for (const seg of textStripper.flush()) {
|
|
432
|
+
if (seg.type === 'text') {
|
|
433
|
+
currentText += seg.text;
|
|
434
|
+
if (seg.text)
|
|
435
|
+
onStreamDelta?.({ type: 'text', text: seg.text });
|
|
436
|
+
}
|
|
437
|
+
else if (seg.text) {
|
|
438
|
+
onStreamDelta?.({ type: 'thinking', text: seg.text });
|
|
439
|
+
}
|
|
440
|
+
}
|
|
441
|
+
if (currentText) {
|
|
442
|
+
collected.push({
|
|
443
|
+
type: 'text',
|
|
444
|
+
text: currentText,
|
|
445
|
+
});
|
|
446
|
+
currentText = '';
|
|
447
|
+
}
|
|
369
448
|
}
|
|
370
449
|
break;
|
|
371
450
|
}
|
|
@@ -399,7 +478,17 @@ export class ModelClient {
|
|
|
399
478
|
}
|
|
400
479
|
}
|
|
401
480
|
}
|
|
402
|
-
// Flush any remaining text
|
|
481
|
+
// Flush any remaining text (stream ended without content_block_stop)
|
|
482
|
+
for (const seg of textStripper.flush()) {
|
|
483
|
+
if (seg.type === 'text') {
|
|
484
|
+
currentText += seg.text;
|
|
485
|
+
if (seg.text)
|
|
486
|
+
onStreamDelta?.({ type: 'text', text: seg.text });
|
|
487
|
+
}
|
|
488
|
+
else if (seg.text) {
|
|
489
|
+
onStreamDelta?.({ type: 'thinking', text: seg.text });
|
|
490
|
+
}
|
|
491
|
+
}
|
|
403
492
|
if (currentText) {
|
|
404
493
|
collected.push({ type: 'text', text: currentText });
|
|
405
494
|
}
|
package/dist/agent/loop.d.ts
CHANGED
|
@@ -3,6 +3,15 @@
|
|
|
3
3
|
* The core reasoning-action cycle: prompt → model → extract capabilities → execute → repeat.
|
|
4
4
|
*/
|
|
5
5
|
import type { AgentConfig, Dialogue, StreamEvent } from './types.js';
|
|
6
|
+
/**
|
|
7
|
+
* Identify models known to hallucinate tool calls (invented names, literal
|
|
8
|
+
* `[TOOLCALL]` / `<tool_call>` text in answers) — they need the explicit
|
|
9
|
+
* "Available tools" inventory appended to the system prompt. Strong frontier
|
|
10
|
+
* models skip the nag so their prompt cache doesn't turn over.
|
|
11
|
+
*
|
|
12
|
+
* Exported so tests can pin the classification without a live API.
|
|
13
|
+
*/
|
|
14
|
+
export declare function isWeakModel(model: string): boolean;
|
|
6
15
|
/**
|
|
7
16
|
* Run a multi-turn interactive session.
|
|
8
17
|
* Each user message triggers a full agent loop.
|
package/dist/agent/loop.js
CHANGED
|
@@ -18,6 +18,7 @@ import { recordSessionUsage } from '../stats/session-tracker.js';
|
|
|
18
18
|
import { appendAudit, extractLastUserPrompt } from '../stats/audit.js';
|
|
19
19
|
import { estimateCost, OPUS_PRICING } from '../pricing.js';
|
|
20
20
|
import { maybeMidSessionExtract } from '../learnings/extractor.js';
|
|
21
|
+
import { extractMentions, buildEntityContext, loadEntities } from '../brain/store.js';
|
|
21
22
|
import { routeRequest, parseRoutingProfile } from '../router/index.js';
|
|
22
23
|
import { recordOutcome } from '../router/local-elo.js';
|
|
23
24
|
import { shouldPlan, getPlanningPrompt, getExecutorModel, isExecutorStuck, toolCallSignature } from './planner.js';
|
|
@@ -270,6 +271,33 @@ function getBackoffDelay(attempt, maxDelayMs = 32_000) {
|
|
|
270
271
|
const jitter = base * 0.25 * (Math.random() * 2 - 1); // ±25%
|
|
271
272
|
return Math.max(500, Math.round(base + jitter));
|
|
272
273
|
}
|
|
274
|
+
/**
|
|
275
|
+
* Identify models known to hallucinate tool calls (invented names, literal
|
|
276
|
+
* `[TOOLCALL]` / `<tool_call>` text in answers) — they need the explicit
|
|
277
|
+
* "Available tools" inventory appended to the system prompt. Strong frontier
|
|
278
|
+
* models skip the nag so their prompt cache doesn't turn over.
|
|
279
|
+
*
|
|
280
|
+
* Exported so tests can pin the classification without a live API.
|
|
281
|
+
*/
|
|
282
|
+
export function isWeakModel(model) {
|
|
283
|
+
const m = model.toLowerCase();
|
|
284
|
+
// NVIDIA-hosted open models have been observed confabulating tool calls.
|
|
285
|
+
// `blockrun/free` and `blockrun/eco` resolve to nvidia/nemotron-ultra in
|
|
286
|
+
// llm.ts, so catching the `nvidia/` prefix also catches those paths.
|
|
287
|
+
if (m.startsWith('nvidia/'))
|
|
288
|
+
return true;
|
|
289
|
+
if (m.includes('nemotron-ultra'))
|
|
290
|
+
return true;
|
|
291
|
+
if (m.includes('qwen3-coder'))
|
|
292
|
+
return true;
|
|
293
|
+
// GLM-4* is weak; GLM-5+ is capable enough to skip the nag.
|
|
294
|
+
if (/^zai\/glm-4/.test(m))
|
|
295
|
+
return true;
|
|
296
|
+
// DeepSeek's smaller / quantized SKUs tend to role-play tools too.
|
|
297
|
+
if (/deepseek[-_/](r1|v3|chat)-?(lite|mini|tiny)/.test(m))
|
|
298
|
+
return true;
|
|
299
|
+
return false;
|
|
300
|
+
}
|
|
273
301
|
// ─── Interactive Session ───────────────────────────────────────────────────
|
|
274
302
|
/**
|
|
275
303
|
* Run a multi-turn interactive session.
|
|
@@ -341,6 +369,7 @@ export async function interactiveSession(config, getUserInput, onEvent, onAbortR
|
|
|
341
369
|
outputTokens: sessionOutputTokens,
|
|
342
370
|
costUsd: sessionCostUsd,
|
|
343
371
|
savedVsOpusUsd: sessionSavedVsOpus,
|
|
372
|
+
...(config.sessionChannel !== undefined ? { channel: config.sessionChannel } : {}),
|
|
344
373
|
});
|
|
345
374
|
};
|
|
346
375
|
const persistSessionMessage = (message) => {
|
|
@@ -414,6 +443,44 @@ export async function interactiveSession(config, getUserInput, onEvent, onAbortR
|
|
|
414
443
|
config.onModelChange?.(baseModel, 'system');
|
|
415
444
|
}
|
|
416
445
|
turnFailedModels = new Set(); // Fresh slate for transient failures this turn
|
|
446
|
+
// ── Brain auto-recall (computed once per user turn) ──
|
|
447
|
+
// Scan the new user message plus the previous assistant reply (so
|
|
448
|
+
// cross-turn references like "that company we discussed" still resolve)
|
|
449
|
+
// for entity mentions, and build the context string. The inner agent
|
|
450
|
+
// loop can iterate many times (planner + executor steps); the user's
|
|
451
|
+
// input doesn't change between those iterations, so caching here saves
|
|
452
|
+
// loadEntities + loadObservations + loadRelations on every re-entry.
|
|
453
|
+
let turnBrainContext = '';
|
|
454
|
+
try {
|
|
455
|
+
const lastAssistantBeforeThisTurn = [...history.slice(0, -1)]
|
|
456
|
+
.reverse()
|
|
457
|
+
.find((m) => m.role === 'assistant');
|
|
458
|
+
const flatten = (d) => {
|
|
459
|
+
if (!d)
|
|
460
|
+
return '';
|
|
461
|
+
if (typeof d.content === 'string')
|
|
462
|
+
return d.content;
|
|
463
|
+
if (!Array.isArray(d.content))
|
|
464
|
+
return '';
|
|
465
|
+
return d.content
|
|
466
|
+
.filter(p => p.type === 'text')
|
|
467
|
+
.map(p => p.text ?? '')
|
|
468
|
+
.join(' ');
|
|
469
|
+
};
|
|
470
|
+
const scanText = input + '\n' + flatten(lastAssistantBeforeThisTurn);
|
|
471
|
+
if (scanText.trim().length > 0) {
|
|
472
|
+
const entities = loadEntities();
|
|
473
|
+
if (entities.length > 0) {
|
|
474
|
+
const mentioned = extractMentions(scanText, entities);
|
|
475
|
+
if (mentioned.length > 0) {
|
|
476
|
+
turnBrainContext = buildEntityContext(mentioned, entities) ?? '';
|
|
477
|
+
}
|
|
478
|
+
}
|
|
479
|
+
}
|
|
480
|
+
}
|
|
481
|
+
catch {
|
|
482
|
+
/* brain is optional — never block a turn on recall */
|
|
483
|
+
}
|
|
417
484
|
const abort = new AbortController();
|
|
418
485
|
onAbortReady?.(() => abort.abort());
|
|
419
486
|
let loopCount = 0;
|
|
@@ -527,6 +594,9 @@ export async function interactiveSession(config, getUserInput, onEvent, onAbortR
|
|
|
527
594
|
systemParts.push('# Context Window Status\nContext window has crossed the halfway mark (>50%). ' +
|
|
528
595
|
'Prefer concise responses and batch tool calls when possible.');
|
|
529
596
|
}
|
|
597
|
+
// ── Brain auto-recall (computed once per user turn above) ──
|
|
598
|
+
if (turnBrainContext)
|
|
599
|
+
systemParts.push(turnBrainContext);
|
|
530
600
|
const systemPrompt = systemParts.join('\n\n');
|
|
531
601
|
const modelMaxOut = getMaxOutputTokens(config.model);
|
|
532
602
|
let maxTokens = Math.min(maxTokensOverride ?? CAPPED_MAX_TOKENS, modelMaxOut);
|
|
@@ -600,6 +670,21 @@ export async function interactiveSession(config, getUserInput, onEvent, onAbortR
|
|
|
600
670
|
callMaxTokens = 2048; // Short plan output
|
|
601
671
|
callSystemPrompt = systemPrompt + '\n\n' + getPlanningPrompt();
|
|
602
672
|
}
|
|
673
|
+
// ── Hallucination guard for weak models ──
|
|
674
|
+
// Weak / free models (nemotron-ultra, GLM-4, qwen coder, free-profile
|
|
675
|
+
// resolves) have been observed inventing tool names (e.g. MixtureOfAgents)
|
|
676
|
+
// and emitting literal `[TOOLCALL]` / `<tool_call>` text pretending to
|
|
677
|
+
// call tools. Give them an explicit inventory + an anti-roleplay hint.
|
|
678
|
+
// Skipped for strong models to keep their prompt cache warm.
|
|
679
|
+
if (isWeakModel(resolvedModel) && callToolDefs.length > 0) {
|
|
680
|
+
const names = callToolDefs.map(t => t.name).join(', ');
|
|
681
|
+
callSystemPrompt = callSystemPrompt +
|
|
682
|
+
'\n\n# Available tools\n' +
|
|
683
|
+
`You have exactly these tools: ${names}.\n` +
|
|
684
|
+
'Do not invent other tool names. Do not emit literal "[TOOLCALL]", ' +
|
|
685
|
+
'"<tool_call>", or similar tokens in your text — call tools via the ' +
|
|
686
|
+
'proper API only. If no tool fits, explain plainly in prose.';
|
|
687
|
+
}
|
|
603
688
|
// Safety net: handled in llm.ts resolveVirtualModel()
|
|
604
689
|
// Sanitize: remove orphaned tool results that could confuse the API
|
|
605
690
|
const sanitized = sanitizeHistory(history);
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Streaming parser that splits `<think>…</think>` (and `<thinking>…</thinking>`)
|
|
3
|
+
* tags embedded in a model's text output into separate text / thinking segments.
|
|
4
|
+
*
|
|
5
|
+
* Problem: reasoning models like nemotron, deepseek-r1, qwq emit their chain of
|
|
6
|
+
* thought inline in the text content field — not via the Anthropic `thinking`
|
|
7
|
+
* block nor the OpenAI `reasoning_content` field. If we don't split these,
|
|
8
|
+
* the literal `<think>` tags and the full reasoning leak into the answer UI
|
|
9
|
+
* and into conversation history (wasting context on future turns).
|
|
10
|
+
*
|
|
11
|
+
* Usage:
|
|
12
|
+
* const s = new ThinkTagStripper();
|
|
13
|
+
* for (const seg of s.push(chunk)) emit(seg);
|
|
14
|
+
* for (const seg of s.flush()) emit(seg);
|
|
15
|
+
*
|
|
16
|
+
* Handles tags split across chunk boundaries by holding a small suffix.
|
|
17
|
+
*/
|
|
18
|
+
export type Segment = {
|
|
19
|
+
type: 'text' | 'thinking';
|
|
20
|
+
text: string;
|
|
21
|
+
};
|
|
22
|
+
export declare class ThinkTagStripper {
|
|
23
|
+
private mode;
|
|
24
|
+
private pending;
|
|
25
|
+
push(chunk: string): Segment[];
|
|
26
|
+
flush(): Segment[];
|
|
27
|
+
}
|
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Streaming parser that splits `<think>…</think>` (and `<thinking>…</thinking>`)
|
|
3
|
+
* tags embedded in a model's text output into separate text / thinking segments.
|
|
4
|
+
*
|
|
5
|
+
* Problem: reasoning models like nemotron, deepseek-r1, qwq emit their chain of
|
|
6
|
+
* thought inline in the text content field — not via the Anthropic `thinking`
|
|
7
|
+
* block nor the OpenAI `reasoning_content` field. If we don't split these,
|
|
8
|
+
* the literal `<think>` tags and the full reasoning leak into the answer UI
|
|
9
|
+
* and into conversation history (wasting context on future turns).
|
|
10
|
+
*
|
|
11
|
+
* Usage:
|
|
12
|
+
* const s = new ThinkTagStripper();
|
|
13
|
+
* for (const seg of s.push(chunk)) emit(seg);
|
|
14
|
+
* for (const seg of s.flush()) emit(seg);
|
|
15
|
+
*
|
|
16
|
+
* Handles tags split across chunk boundaries by holding a small suffix.
|
|
17
|
+
*/
|
|
18
|
+
const OPEN_TAGS = ['<think>', '<thinking>'];
|
|
19
|
+
const CLOSE_TAGS = ['</think>', '</thinking>'];
|
|
20
|
+
export class ThinkTagStripper {
|
|
21
|
+
mode = 'text';
|
|
22
|
+
pending = '';
|
|
23
|
+
push(chunk) {
|
|
24
|
+
const input = this.pending + chunk;
|
|
25
|
+
this.pending = '';
|
|
26
|
+
const out = [];
|
|
27
|
+
let emitStart = 0;
|
|
28
|
+
let i = 0;
|
|
29
|
+
const emit = (end) => {
|
|
30
|
+
if (end > emitStart) {
|
|
31
|
+
out.push({ type: this.mode, text: input.slice(emitStart, end) });
|
|
32
|
+
}
|
|
33
|
+
};
|
|
34
|
+
while (i < input.length) {
|
|
35
|
+
if (input[i] !== '<') {
|
|
36
|
+
i++;
|
|
37
|
+
continue;
|
|
38
|
+
}
|
|
39
|
+
const tags = this.mode === 'text' ? OPEN_TAGS : CLOSE_TAGS;
|
|
40
|
+
// Full-tag match?
|
|
41
|
+
let matched = null;
|
|
42
|
+
for (const t of tags) {
|
|
43
|
+
if (input.startsWith(t, i)) {
|
|
44
|
+
matched = t;
|
|
45
|
+
break;
|
|
46
|
+
}
|
|
47
|
+
}
|
|
48
|
+
if (matched) {
|
|
49
|
+
emit(i);
|
|
50
|
+
i += matched.length;
|
|
51
|
+
emitStart = i;
|
|
52
|
+
this.mode = this.mode === 'text' ? 'thinking' : 'text';
|
|
53
|
+
continue;
|
|
54
|
+
}
|
|
55
|
+
// Partial match at boundary? Hold back the remainder.
|
|
56
|
+
const rest = input.slice(i);
|
|
57
|
+
const couldStillMatch = tags.some(t => t.length > rest.length && t.startsWith(rest));
|
|
58
|
+
if (couldStillMatch) {
|
|
59
|
+
emit(i);
|
|
60
|
+
this.pending = rest;
|
|
61
|
+
return out;
|
|
62
|
+
}
|
|
63
|
+
i++;
|
|
64
|
+
}
|
|
65
|
+
emit(input.length);
|
|
66
|
+
return out;
|
|
67
|
+
}
|
|
68
|
+
flush() {
|
|
69
|
+
if (!this.pending)
|
|
70
|
+
return [];
|
|
71
|
+
const segments = [{ type: this.mode, text: this.pending }];
|
|
72
|
+
this.pending = '';
|
|
73
|
+
return segments;
|
|
74
|
+
}
|
|
75
|
+
}
|
package/dist/agent/tokens.js
CHANGED
|
@@ -6,7 +6,8 @@
|
|
|
6
6
|
const DEFAULT_BYTES_PER_TOKEN = 4;
|
|
7
7
|
/**
|
|
8
8
|
* Model-specific bytes-per-token ratios for more accurate estimation.
|
|
9
|
-
*
|
|
9
|
+
* Anthropic-family models tokenize at ~3.5 bytes/token, GPT-family at ~4,
|
|
10
|
+
* Gemini-family at ~3.
|
|
10
11
|
*/
|
|
11
12
|
const MODEL_BYTES_PER_TOKEN = {
|
|
12
13
|
'anthropic': 3.5,
|
package/dist/agent/types.d.ts
CHANGED
|
@@ -148,4 +148,11 @@ export interface AgentConfig {
|
|
|
148
148
|
baseModel?: string;
|
|
149
149
|
/** Resume an existing session by ID — loads prior history and keeps appending to the same JSONL */
|
|
150
150
|
resumeSessionId?: string;
|
|
151
|
+
/**
|
|
152
|
+
* Optional channel tag persisted to SessionMeta. Lets non-CLI drivers
|
|
153
|
+
* (Telegram bot, Discord bot, future ingresses) find their own sessions
|
|
154
|
+
* later via findLatestSessionByChannel. Regular CLI sessions leave this
|
|
155
|
+
* unset. Format: "<driver>:<owner-or-chat-id>", e.g. "telegram:12345".
|
|
156
|
+
*/
|
|
157
|
+
sessionChannel?: string;
|
|
151
158
|
}
|
package/dist/brain/index.d.ts
CHANGED
|
@@ -1,3 +1,3 @@
|
|
|
1
1
|
export type { Entity, EntityType, Observation, Relation, BrainExtraction } from './types.js';
|
|
2
|
-
export { loadEntities, saveEntities, findEntity, upsertEntity, loadObservations, getEntityObservations, addObservation, loadRelations, getEntityRelations, upsertRelation, searchEntities, buildEntityContext, getBrainStats, } from './store.js';
|
|
2
|
+
export { loadEntities, saveEntities, findEntity, upsertEntity, loadObservations, getEntityObservations, addObservation, loadRelations, getEntityRelations, upsertRelation, searchEntities, buildEntityContext, getBrainStats, extractMentions, } from './store.js';
|
|
3
3
|
export { extractBrainEntities } from './extract.js';
|
package/dist/brain/index.js
CHANGED
|
@@ -1,2 +1,2 @@
|
|
|
1
|
-
export { loadEntities, saveEntities, findEntity, upsertEntity, loadObservations, getEntityObservations, addObservation, loadRelations, getEntityRelations, upsertRelation, searchEntities, buildEntityContext, getBrainStats, } from './store.js';
|
|
1
|
+
export { loadEntities, saveEntities, findEntity, upsertEntity, loadObservations, getEntityObservations, addObservation, loadRelations, getEntityRelations, upsertRelation, searchEntities, buildEntityContext, getBrainStats, extractMentions, } from './store.js';
|
|
2
2
|
export { extractBrainEntities } from './extract.js';
|
package/dist/brain/store.d.ts
CHANGED
|
@@ -34,7 +34,19 @@ export declare function searchEntities(query: string, limit?: number): Entity[];
|
|
|
34
34
|
* Build context string for entities mentioned in the conversation.
|
|
35
35
|
* Returns empty string if no relevant entities found.
|
|
36
36
|
*/
|
|
37
|
-
export declare function buildEntityContext(mentionedNames: string[]): string;
|
|
37
|
+
export declare function buildEntityContext(mentionedNames: string[], entitiesCache?: Entity[]): string;
|
|
38
|
+
/**
|
|
39
|
+
* Scan `text` for occurrences of any known entity's canonical name or alias
|
|
40
|
+
* and return the matched canonical names (deduped, case-preserving).
|
|
41
|
+
* Word-boundary match so "Base" in "Baseline" doesn't match entity "Base".
|
|
42
|
+
*
|
|
43
|
+
* This is the read half of the brain — the agent loop calls this on each
|
|
44
|
+
* user turn to decide which entities to auto-inject into the system prompt.
|
|
45
|
+
*
|
|
46
|
+
* Pass `entities` if the caller already has them loaded to avoid re-reading
|
|
47
|
+
* the JSONL; otherwise we load it ourselves.
|
|
48
|
+
*/
|
|
49
|
+
export declare function extractMentions(text: string, entities?: Entity[]): string[];
|
|
38
50
|
export declare function getBrainStats(): {
|
|
39
51
|
entities: number;
|
|
40
52
|
observations: number;
|