@kaleidorg/mind 0.5.1 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/autonomy/index.d.ts +21 -0
- package/dist/autonomy/index.d.ts.map +1 -0
- package/dist/autonomy/index.js +16 -0
- package/dist/autonomy/index.js.map +1 -0
- package/dist/autonomy/prompt.d.ts +21 -0
- package/dist/autonomy/prompt.d.ts.map +1 -0
- package/dist/autonomy/prompt.js +37 -0
- package/dist/autonomy/prompt.js.map +1 -0
- package/dist/autonomy/risk.d.ts +53 -0
- package/dist/autonomy/risk.d.ts.map +1 -0
- package/dist/autonomy/risk.js +74 -0
- package/dist/autonomy/risk.js.map +1 -0
- package/dist/autonomy/run-state.d.ts +39 -0
- package/dist/autonomy/run-state.d.ts.map +1 -0
- package/dist/autonomy/run-state.js +118 -0
- package/dist/autonomy/run-state.js.map +1 -0
- package/dist/autonomy/scheduler.d.ts +18 -0
- package/dist/autonomy/scheduler.d.ts.map +1 -0
- package/dist/autonomy/scheduler.js +113 -0
- package/dist/autonomy/scheduler.js.map +1 -0
- package/dist/autonomy/task-store.d.ts +44 -0
- package/dist/autonomy/task-store.d.ts.map +1 -0
- package/dist/autonomy/task-store.js +139 -0
- package/dist/autonomy/task-store.js.map +1 -0
- package/dist/autonomy/types.d.ts +164 -0
- package/dist/autonomy/types.d.ts.map +1 -0
- package/dist/autonomy/types.js +20 -0
- package/dist/autonomy/types.js.map +1 -0
- package/dist/funnel.d.ts.map +1 -1
- package/dist/funnel.js +12 -0
- package/dist/funnel.js.map +1 -1
- package/dist/index.d.ts +2 -0
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +4 -0
- package/dist/index.js.map +1 -1
- package/dist/knowledge/bitcoin-copilot.js +2 -2
- package/dist/knowledge/bitcoin-copilot.js.map +1 -1
- package/dist/qvac/index.d.ts +1 -1
- package/dist/qvac/index.d.ts.map +1 -1
- package/dist/qvac/index.js.map +1 -1
- package/dist/qvac/parse.d.ts +18 -0
- package/dist/qvac/parse.d.ts.map +1 -1
- package/dist/qvac/parse.js +1 -0
- package/dist/qvac/parse.js.map +1 -1
- package/dist/qvac/provider.d.ts +16 -0
- package/dist/qvac/provider.d.ts.map +1 -1
- package/dist/qvac/provider.js +17 -1
- package/dist/qvac/provider.js.map +1 -1
- package/dist/qvac/stream.d.ts +16 -0
- package/dist/qvac/stream.d.ts.map +1 -1
- package/dist/qvac/stream.js +21 -1
- package/dist/qvac/stream.js.map +1 -1
- package/dist/recipe/buy-asset-channel.d.ts +1 -1
- package/dist/recipe/buy-asset-channel.d.ts.map +1 -1
- package/dist/recipe/buy-asset-channel.js +4 -3
- package/dist/recipe/buy-asset-channel.js.map +1 -1
- package/dist/recipe/kaleidoswap-atomic.d.ts +1 -1
- package/dist/recipe/kaleidoswap-atomic.d.ts.map +1 -1
- package/dist/recipe/kaleidoswap-atomic.js +5 -4
- package/dist/recipe/kaleidoswap-atomic.js.map +1 -1
- package/dist/recipe/runner.d.ts.map +1 -1
- package/dist/recipe/runner.js +38 -0
- package/dist/recipe/runner.js.map +1 -1
- package/dist/tools/mcp.d.ts +19 -0
- package/dist/tools/mcp.d.ts.map +1 -1
- package/dist/tools/mcp.js +51 -9
- package/dist/tools/mcp.js.map +1 -1
- package/package.json +2 -1
- package/skills/channel-manager/SKILL.md +59 -0
- package/skills/dca/SKILL.md +48 -0
- package/skills/kaleido-lsps/SKILL.md +12 -12
- package/skills/kaleido-trading/SKILL.md +1 -1
- package/skills/liquidity-optimizer/SKILL.md +91 -0
- package/skills/merchant-finder/SKILL.md +1 -1
- package/skills/portfolio-manager/SKILL.md +67 -0
- package/skills/rgb-lightning-node/SKILL.md +3 -3
- package/skills/wallet-assistant/SKILL.md +1 -1
- package/src/autonomy/autonomy.test.ts +348 -0
- package/src/autonomy/index.ts +50 -0
- package/src/autonomy/prompt.ts +48 -0
- package/src/autonomy/risk.ts +139 -0
- package/src/autonomy/run-state.ts +144 -0
- package/src/autonomy/scheduler.ts +120 -0
- package/src/autonomy/task-store.ts +167 -0
- package/src/autonomy/types.ts +186 -0
- package/src/funnel.mind.test.ts +390 -0
- package/src/funnel.ts +14 -0
- package/src/index.ts +41 -0
- package/src/knowledge/bitcoin-copilot.ts +2 -2
- package/src/qvac/index.ts +1 -0
- package/src/qvac/parse.ts +20 -0
- package/src/qvac/provider.test.ts +17 -0
- package/src/qvac/provider.ts +37 -1
- package/src/qvac/stream.test.ts +25 -0
- package/src/qvac/stream.ts +38 -1
- package/src/recipe/buy-asset-channel.test.ts +5 -0
- package/src/recipe/buy-asset-channel.ts +6 -3
- package/src/recipe/kaleidoswap-atomic.test.ts +3 -3
- package/src/recipe/kaleidoswap-atomic.ts +5 -4
- package/src/recipe/recipe.test.ts +16 -0
- package/src/recipe/runner.ts +41 -0
- package/src/tools/mcp.live.test.ts +116 -0
- package/src/tools/mcp.parse.test.ts +37 -0
- package/src/tools/mcp.ts +55 -9
|
@@ -0,0 +1,390 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Desktop "mind" smoke tests — drive the SAME Funnel the desktop sidecar builds
|
|
3
|
+
* (apps/provider/src/index.ts: recipes [buyAssetChannel, kaleidoswapAtomic,
|
|
4
|
+
* assetSend, payments, receive] over the MCP tool surface) through each
|
|
5
|
+
* user-facing intent, end to end, with a SCRIPTED provider standing in for the
|
|
6
|
+
* on-device QVAC model.
|
|
7
|
+
*
|
|
8
|
+
* Why mind-level (not just MCP-level, which mcp.live.test.ts covers): the
|
|
9
|
+
* desktop "tool-less" bugs live in the wiring BETWEEN the brain and the tools —
|
|
10
|
+
* tier routing (fast/recipe/agentic), recipe orchestration, and agentic tool
|
|
11
|
+
* selection. These assert that, given a real tool surface, the mind:
|
|
12
|
+
* - balance → agentic → calls rln_get_balances, surfaces the balance
|
|
13
|
+
* - list channels → agentic → calls rln_list_channels
|
|
14
|
+
* - buy via swap → recipe → quote → init → node → whitelist → execute (1 confirm)
|
|
15
|
+
* - merchant near city → agentic → search_knowledge over the merchant corpus
|
|
16
|
+
*
|
|
17
|
+
* Fully deterministic (no node/model/maker), so it runs in CI. Live tool
|
|
18
|
+
* execution against a real node is the separate mcp.live.test.ts.
|
|
19
|
+
*/
|
|
20
|
+
import { describe, expect, it } from 'vitest';
|
|
21
|
+
import { Funnel } from './funnel.js';
|
|
22
|
+
import { ToolRegistry } from './tools/registry.js';
|
|
23
|
+
import { InProcessToolSource } from './tools/in-process.js';
|
|
24
|
+
import { merchantsToDocuments } from './knowledge/merchants.js';
|
|
25
|
+
import { buyAssetChannelRecipe } from './recipe/buy-asset-channel.js';
|
|
26
|
+
import { kaleidoswapAtomicRecipe } from './recipe/kaleidoswap-atomic.js';
|
|
27
|
+
import { assetSendRecipe } from './recipe/asset-send.js';
|
|
28
|
+
import { paymentsRecipe } from './recipe/payments.js';
|
|
29
|
+
import { receiveRecipe } from './recipe/receive.js';
|
|
30
|
+
import { loadSkillsDir, packagedSkillsDir } from './skills/loader.js';
|
|
31
|
+
import type { Skill } from './skills/types.js';
|
|
32
|
+
import type { LLMProvider, TurnInput, TurnOutput } from './providers/types.js';
|
|
33
|
+
import type { ConfirmDecision, ToolCall } from './types.js';
|
|
34
|
+
|
|
35
|
+
// The exact recipe set the desktop provider registers, in order. Order matters:
|
|
36
|
+
// kaleidoswapAtomicRecipe is FIRST, so a plain "buy 1 USDT" on a funded node
|
|
37
|
+
// routes to the atomic SWAP (BTC→USDT over existing liquidity). The
|
|
38
|
+
// channel-onboarding recipe wins only for explicit channel/inbound/liquidity
|
|
39
|
+
// phrasing, which the atomic matcher excludes. (See the routing tests below.)
|
|
40
|
+
const DESKTOP_RECIPES = [
|
|
41
|
+
kaleidoswapAtomicRecipe,
|
|
42
|
+
buyAssetChannelRecipe,
|
|
43
|
+
assetSendRecipe,
|
|
44
|
+
paymentsRecipe,
|
|
45
|
+
receiveRecipe,
|
|
46
|
+
];
|
|
47
|
+
|
|
48
|
+
// ── A scripted provider: each script entry is one model turn. Returning tool
|
|
49
|
+
// calls makes the agentic engine execute them and ask for the next turn. ──
|
|
50
|
+
function scripted(script: Array<{ text: string; toolCalls?: ToolCall[] }>): LLMProvider {
|
|
51
|
+
let turn = 0;
|
|
52
|
+
return {
|
|
53
|
+
name: 'scripted',
|
|
54
|
+
async runTurn(input: TurnInput): Promise<TurnOutput> {
|
|
55
|
+
const step = script[Math.min(turn, script.length - 1)];
|
|
56
|
+
turn += 1;
|
|
57
|
+
input.onToken?.(step.text);
|
|
58
|
+
return { text: step.text, rawContent: step.text, toolCalls: step.toolCalls ?? [], requestId: `req-${turn}` };
|
|
59
|
+
},
|
|
60
|
+
};
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
// ── Merchant corpus: real merchantsToDocuments transform over a small fixture,
|
|
64
|
+
// queried by city so "near Rome" surfaces the Rome places (not Milan). ──
|
|
65
|
+
const MERCHANTS = [
|
|
66
|
+
{ id: 'm1', name: 'Bitcoin Caffè', category: 'cafe', city: 'Rome', address: 'Via Roma 1', acceptedAssets: ['lightning', 'onchain'] },
|
|
67
|
+
{ id: 'm2', name: 'Satoshi Pizzeria', category: 'restaurant', city: 'Milan', acceptedAssets: ['lightning'] },
|
|
68
|
+
{ id: 'm3', name: 'Nakamoto Books', category: 'shop', city: 'Rome', address: 'Via Veneto 9', acceptedAssets: ['onchain'] },
|
|
69
|
+
];
|
|
70
|
+
const MERCHANT_DOCS = merchantsToDocuments(MERCHANTS);
|
|
71
|
+
function searchMerchants(query: string): string {
|
|
72
|
+
const q = query.toLowerCase();
|
|
73
|
+
const hits = MERCHANT_DOCS.filter((d) => {
|
|
74
|
+
const city = String((d.metadata as { city?: string })?.city ?? '').toLowerCase();
|
|
75
|
+
return city.length > 0 && q.includes(city);
|
|
76
|
+
});
|
|
77
|
+
return hits.length ? hits.map((h, i) => `[${i + 1}] ${h.text}`).join('\n\n') : 'No relevant passages found.';
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
/**
|
|
81
|
+
* Build the desktop mind with canned MCP-named tools. Every call is recorded in
|
|
82
|
+
* `calls` (name + args, in execution order) so we can assert routing + sequence.
|
|
83
|
+
*/
|
|
84
|
+
function buildMind(
|
|
85
|
+
provider: LLMProvider,
|
|
86
|
+
opts: { skills?: Skill[]; log?: (m: string) => void } = {},
|
|
87
|
+
): { funnel: Funnel; calls: Array<{ name: string; args: any }> } {
|
|
88
|
+
const calls: Array<{ name: string; args: any }> = [];
|
|
89
|
+
const tool = (name: string, response: any, spend = false) => ({
|
|
90
|
+
name,
|
|
91
|
+
description: '',
|
|
92
|
+
parameters: { type: 'object' as const, properties: {} },
|
|
93
|
+
requiresConfirmation: spend,
|
|
94
|
+
handler: async (a: Record<string, unknown>) => {
|
|
95
|
+
calls.push({ name, args: a });
|
|
96
|
+
return typeof response === 'function' ? response(a) : response;
|
|
97
|
+
},
|
|
98
|
+
});
|
|
99
|
+
|
|
100
|
+
const tools = new ToolRegistry([
|
|
101
|
+
new InProcessToolSource('wallet', [
|
|
102
|
+
// reads
|
|
103
|
+
tool('rln_get_balances', { lightning_balance_sat: 1_949_753, btc_onchain: { vanilla_spendable_sats: 100_000 } }),
|
|
104
|
+
tool('rln_list_channels', {
|
|
105
|
+
channels: [
|
|
106
|
+
{ channel_id: '5d4487c8', capacity_sat: 1_000_000, outbound_balance_msat: 987_240_000, ready: true },
|
|
107
|
+
{ channel_id: 'a1b2c3d4', capacity_sat: 1_000_000, outbound_balance_msat: 500_000_000, ready: true },
|
|
108
|
+
],
|
|
109
|
+
}),
|
|
110
|
+
// atomic-swap chain (quote read; init/whitelist/execute are spends)
|
|
111
|
+
tool('kaleidoswap_get_quote', {
|
|
112
|
+
rfq_id: 'rfq-1',
|
|
113
|
+
from_asset: { asset_id: 'BTC', ticker: 'BTC', amount: 100_000 },
|
|
114
|
+
to_asset: { asset_id: 'rgb:USDT', ticker: 'USDT', amount: 1_000_000 },
|
|
115
|
+
from_amount_display: '100,000 sats',
|
|
116
|
+
to_amount_display: '1 USDT',
|
|
117
|
+
fee_display: '154 sats',
|
|
118
|
+
}),
|
|
119
|
+
tool('kaleidoswap_atomic_init', { swapstring: 'SWAP/abc/def', payment_hash: 'ph-1' }, /* spend */ true),
|
|
120
|
+
tool('rln_get_node_info', { pubkey: '030637ec' }),
|
|
121
|
+
tool('rln_atomic_taker', { ok: true }, /* spend */ true),
|
|
122
|
+
tool('kaleidoswap_atomic_execute', { status: 200, message: 'Swap executed successfully.' }, /* spend */ true),
|
|
123
|
+
// LSPS1 asset-channel onboarding (the rail "buy N USDT" routes to)
|
|
124
|
+
tool('kaleidoswap_lsp_quote_asset_channel', {
|
|
125
|
+
total_sat: 29_946,
|
|
126
|
+
btc_amount_sat: 13_807,
|
|
127
|
+
channel_fee_sat: 16_139,
|
|
128
|
+
expires_at: 0,
|
|
129
|
+
}),
|
|
130
|
+
tool('kaleidoswap_lsp_create_asset_channel', { order_id: 'cf2981c4', order_state: 'CREATED' }, /* spend */ true),
|
|
131
|
+
// knowledge (merchant discovery)
|
|
132
|
+
{
|
|
133
|
+
name: 'search_knowledge',
|
|
134
|
+
description: 'Search the knowledge base (merchants, docs) for relevant passages.',
|
|
135
|
+
parameters: { type: 'object' as const, properties: { query: { type: 'string' } }, required: ['query'] },
|
|
136
|
+
handler: async (a: Record<string, unknown>) => {
|
|
137
|
+
calls.push({ name: 'search_knowledge', args: a });
|
|
138
|
+
return searchMerchants(String(a.query ?? ''));
|
|
139
|
+
},
|
|
140
|
+
},
|
|
141
|
+
]),
|
|
142
|
+
]);
|
|
143
|
+
|
|
144
|
+
return {
|
|
145
|
+
funnel: new Funnel({ provider, tools, recipes: DESKTOP_RECIPES, maxTurns: 8, skills: opts.skills, log: opts.log }),
|
|
146
|
+
calls,
|
|
147
|
+
};
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
describe('desktop mind — balance', () => {
|
|
151
|
+
it('routes "what\'s my balance?" to the agentic tier and calls rln_get_balances', async () => {
|
|
152
|
+
const { funnel, calls } = buildMind(
|
|
153
|
+
scripted([
|
|
154
|
+
{ text: '', toolCalls: [{ name: 'rln_get_balances', arguments: {} }] },
|
|
155
|
+
{ text: 'You have 1,949,753 sats in Lightning.' },
|
|
156
|
+
]),
|
|
157
|
+
);
|
|
158
|
+
|
|
159
|
+
const res = await funnel.runTurn("what's my balance?");
|
|
160
|
+
|
|
161
|
+
expect(res.tier).toBe('agentic');
|
|
162
|
+
expect(calls.map((c) => c.name)).toContain('rln_get_balances');
|
|
163
|
+
const exec = res.toolCalls?.find((c) => c.name === 'rln_get_balances');
|
|
164
|
+
expect((exec?.result as { lightning_balance_sat?: number })?.lightning_balance_sat).toBe(1_949_753);
|
|
165
|
+
expect(res.text).toBeTruthy();
|
|
166
|
+
});
|
|
167
|
+
});
|
|
168
|
+
|
|
169
|
+
describe('desktop mind — list channels', () => {
|
|
170
|
+
it('routes "list my channels" to the agentic tier and calls rln_list_channels', async () => {
|
|
171
|
+
const { funnel, calls } = buildMind(
|
|
172
|
+
scripted([
|
|
173
|
+
{ text: '', toolCalls: [{ name: 'rln_list_channels', arguments: {} }] },
|
|
174
|
+
{ text: 'You have 2 open channels.' },
|
|
175
|
+
]),
|
|
176
|
+
);
|
|
177
|
+
|
|
178
|
+
const res = await funnel.runTurn('list my channels');
|
|
179
|
+
|
|
180
|
+
expect(res.tier).toBe('agentic');
|
|
181
|
+
expect(calls.map((c) => c.name)).toContain('rln_list_channels');
|
|
182
|
+
const exec = res.toolCalls?.find((c) => c.name === 'rln_list_channels');
|
|
183
|
+
expect((exec?.result as { channels?: unknown[] })?.channels).toHaveLength(2);
|
|
184
|
+
});
|
|
185
|
+
});
|
|
186
|
+
|
|
187
|
+
describe('desktop mind — buy assets via atomic swap', () => {
|
|
188
|
+
it('routes "swap … for usdt" to the atomic recipe and runs quote→init→node→whitelist→execute with ONE confirm', async () => {
|
|
189
|
+
// The recipe forces a model inference for slot extraction (forceModelExtract):
|
|
190
|
+
// the runner injects a synthetic `extract_request` tool; the model fills slots.
|
|
191
|
+
const provider: LLMProvider = {
|
|
192
|
+
name: 'extract',
|
|
193
|
+
async runTurn(input) {
|
|
194
|
+
if (input.tools?.some((t) => t.name === 'extract_request')) {
|
|
195
|
+
return {
|
|
196
|
+
text: '',
|
|
197
|
+
rawContent: '',
|
|
198
|
+
toolCalls: [
|
|
199
|
+
{ id: 'ex1', name: 'extract_request', arguments: { from_asset: 'BTC', to_asset: 'USDT', amount: 100_000, amount_side: 'from' } },
|
|
200
|
+
],
|
|
201
|
+
};
|
|
202
|
+
}
|
|
203
|
+
return { text: '', rawContent: '', toolCalls: [] };
|
|
204
|
+
},
|
|
205
|
+
};
|
|
206
|
+
|
|
207
|
+
const { funnel, calls } = buildMind(provider);
|
|
208
|
+
const confirms: Array<{ name: string; summary?: string }> = [];
|
|
209
|
+
|
|
210
|
+
const res = await funnel.runTurn('swap 100000 sats for usdt', {
|
|
211
|
+
onConfirm: async (call): Promise<ConfirmDecision> => {
|
|
212
|
+
confirms.push({ name: call.name, summary: call.summary });
|
|
213
|
+
return { approved: true };
|
|
214
|
+
},
|
|
215
|
+
});
|
|
216
|
+
|
|
217
|
+
expect(res.tier).toBe('recipe');
|
|
218
|
+
expect(res.route).toBe('kaleidoswap-atomic');
|
|
219
|
+
// The full deterministic chain, in order.
|
|
220
|
+
expect(calls.map((c) => c.name)).toEqual([
|
|
221
|
+
'kaleidoswap_get_quote',
|
|
222
|
+
'kaleidoswap_atomic_init',
|
|
223
|
+
'rln_get_node_info',
|
|
224
|
+
'rln_atomic_taker',
|
|
225
|
+
'kaleidoswap_atomic_execute',
|
|
226
|
+
]);
|
|
227
|
+
// init sources the asset ids + maker-unit amounts straight from the quote.
|
|
228
|
+
const init = calls.find((c) => c.name === 'kaleidoswap_atomic_init')!;
|
|
229
|
+
expect(init.args).toMatchObject({ rfq_id: 'rfq-1', from_asset: 'BTC', to_asset: 'rgb:USDT' });
|
|
230
|
+
// execute carries the node pubkey as taker_pubkey + the maker's payment_hash.
|
|
231
|
+
const exec = calls.find((c) => c.name === 'kaleidoswap_atomic_execute')!;
|
|
232
|
+
expect(exec.args).toMatchObject({ swapstring: 'SWAP/abc/def', taker_pubkey: '030637ec', payment_hash: 'ph-1' });
|
|
233
|
+
// EXACTLY ONE confirmation gate, fired before the first spend, with real numbers.
|
|
234
|
+
expect(confirms).toHaveLength(1);
|
|
235
|
+
expect(confirms[0]!.name).toBe('kaleidoswap_atomic_init');
|
|
236
|
+
expect(confirms[0]!.summary).toMatch(/swap/i);
|
|
237
|
+
expect(res.text).toMatch(/submitted|settling/i);
|
|
238
|
+
});
|
|
239
|
+
|
|
240
|
+
it('routes a plain "buy 1 usdt" to the ATOMIC swap (funded node), not channel onboarding', async () => {
|
|
241
|
+
// On a node with existing BTC liquidity, "buy 1 usdt" = swap BTC→USDT, NOT
|
|
242
|
+
// open a new channel. The model fills the implicit source (BTC) + buy leg.
|
|
243
|
+
const buyExtract: LLMProvider = {
|
|
244
|
+
name: 'extract',
|
|
245
|
+
async runTurn(input) {
|
|
246
|
+
if (input.tools?.some((t) => t.name === 'extract_request')) {
|
|
247
|
+
return {
|
|
248
|
+
text: '',
|
|
249
|
+
rawContent: '',
|
|
250
|
+
toolCalls: [
|
|
251
|
+
{ id: 'ex1', name: 'extract_request', arguments: { from_asset: 'BTC', to_asset: 'USDT', amount: 1, amount_side: 'to' } },
|
|
252
|
+
],
|
|
253
|
+
};
|
|
254
|
+
}
|
|
255
|
+
return { text: '', rawContent: '', toolCalls: [] };
|
|
256
|
+
},
|
|
257
|
+
};
|
|
258
|
+
|
|
259
|
+
const { funnel, calls } = buildMind(buyExtract);
|
|
260
|
+
const res = await funnel.runTurn('buy 1 usdt', { onConfirm: async () => ({ approved: true }) });
|
|
261
|
+
|
|
262
|
+
expect(res.tier).toBe('recipe');
|
|
263
|
+
expect(res.route).toBe('kaleidoswap-atomic');
|
|
264
|
+
expect(calls.map((c) => c.name)).toEqual([
|
|
265
|
+
'kaleidoswap_get_quote',
|
|
266
|
+
'kaleidoswap_atomic_init',
|
|
267
|
+
'rln_get_node_info',
|
|
268
|
+
'rln_atomic_taker',
|
|
269
|
+
'kaleidoswap_atomic_execute',
|
|
270
|
+
]);
|
|
271
|
+
});
|
|
272
|
+
|
|
273
|
+
it('routes explicit inbound-liquidity phrasing to channel onboarding', async () => {
|
|
274
|
+
// The channel-onboarding rail still wins for explicit channel/inbound
|
|
275
|
+
// phrasing (the atomic matcher excludes channel/inbound/liquidity).
|
|
276
|
+
const { funnel } = buildMind(scripted([{ text: '' }]));
|
|
277
|
+
const res = await funnel.runTurn('get 100 usdt inbound liquidity', {
|
|
278
|
+
onConfirm: async () => ({ approved: false }),
|
|
279
|
+
});
|
|
280
|
+
expect(res.tier).toBe('recipe');
|
|
281
|
+
expect(res.route).toBe(buyAssetChannelRecipe.name);
|
|
282
|
+
});
|
|
283
|
+
});
|
|
284
|
+
|
|
285
|
+
describe('desktop mind — find a merchant near a city', () => {
|
|
286
|
+
it('routes "where can I spend bitcoin near Rome" to agentic search_knowledge and surfaces the Rome merchants', async () => {
|
|
287
|
+
const { funnel, calls } = buildMind(
|
|
288
|
+
scripted([
|
|
289
|
+
{ text: '', toolCalls: [{ name: 'search_knowledge', arguments: { query: 'bitcoin merchants in Rome' } }] },
|
|
290
|
+
{ text: 'Near Rome you can spend at Bitcoin Caffè and Nakamoto Books.' },
|
|
291
|
+
]),
|
|
292
|
+
);
|
|
293
|
+
|
|
294
|
+
const res = await funnel.runTurn('where can I spend bitcoin near Rome?');
|
|
295
|
+
|
|
296
|
+
expect(res.tier).toBe('agentic');
|
|
297
|
+
const sk = calls.find((c) => c.name === 'search_knowledge');
|
|
298
|
+
expect(sk).toBeTruthy();
|
|
299
|
+
expect(String(sk!.args.query)).toMatch(/rome/i);
|
|
300
|
+
// Real retrieval over merchantsToDocuments: Rome places in, Milan out.
|
|
301
|
+
const result = String(res.toolCalls?.find((c) => c.name === 'search_knowledge')?.result ?? '');
|
|
302
|
+
expect(result).toMatch(/Bitcoin Caffè/);
|
|
303
|
+
expect(result).toMatch(/Nakamoto Books/);
|
|
304
|
+
expect(result).not.toMatch(/Satoshi Pizzeria|Milan/);
|
|
305
|
+
});
|
|
306
|
+
});
|
|
307
|
+
|
|
308
|
+
// ─────────────────────────────────────────────────────────────────────
|
|
309
|
+
// Skill scoping — the layer that actually caused the desktop "I cannot check
|
|
310
|
+
// your balance, the tool is not available" bug. The agentic tier filters the
|
|
311
|
+
// model's tools to the SELECTED SKILL's `tools:` allowlist (engine.ts honours
|
|
312
|
+
// allowedTools). If a skill's allowlist names tools that don't exist on the
|
|
313
|
+
// host (e.g. `get_balances` while the desktop MCP exposes `rln_get_balances`),
|
|
314
|
+
// the real tool is filtered out and the model goes tool-less. These load the
|
|
315
|
+
// REAL desktop skills and assert the needed tool survives scoping.
|
|
316
|
+
// (The scenario tests above ran skill-LESS, which is exactly why they missed it.)
|
|
317
|
+
// ─────────────────────────────────────────────────────────────────────
|
|
318
|
+
describe('desktop mind — skill scoping (real skills)', () => {
|
|
319
|
+
const SKILLS = loadSkillsDir(packagedSkillsDir());
|
|
320
|
+
|
|
321
|
+
it('loads the real desktop skills', () => {
|
|
322
|
+
expect(SKILLS.length).toBeGreaterThan(0);
|
|
323
|
+
expect(SKILLS.map((s) => s.name)).toEqual(
|
|
324
|
+
expect.arrayContaining(['wallet-assistant', 'rgb-lightning-node', 'kaleido-trading']),
|
|
325
|
+
);
|
|
326
|
+
});
|
|
327
|
+
|
|
328
|
+
it('wallet-assistant (triggers on "balance") exposes the real rln_*/wdk_* tool names', () => {
|
|
329
|
+
const wallet = SKILLS.find((s) => s.name === 'wallet-assistant')!;
|
|
330
|
+
expect(wallet.tools).toEqual(expect.arrayContaining(['rln_get_balances', 'wdk_get_balances']));
|
|
331
|
+
expect(wallet.tools).toEqual(expect.arrayContaining(['rln_get_address', 'rln_send_btc', 'rln_create_ln_invoice']));
|
|
332
|
+
});
|
|
333
|
+
|
|
334
|
+
it('rgb-lightning-node (triggers on "channels") exposes only canonical rln_* tools', () => {
|
|
335
|
+
const node = SKILLS.find((s) => s.name === 'rgb-lightning-node')!;
|
|
336
|
+
expect(node.tools).toContain('rln_list_channels');
|
|
337
|
+
expect(node.tools?.every((tool) => tool.startsWith('rln_'))).toBe(true);
|
|
338
|
+
});
|
|
339
|
+
|
|
340
|
+
it('kaleido-trading drops the phantom kaleidoswap_get_nodeinfo / get_order_history names', () => {
|
|
341
|
+
const trading = SKILLS.find((s) => s.name === 'kaleido-trading')!;
|
|
342
|
+
expect(trading.tools).not.toContain('kaleidoswap_get_nodeinfo');
|
|
343
|
+
expect(trading.tools).not.toContain('kaleidoswap_get_order_history');
|
|
344
|
+
expect(trading.tools).toEqual(expect.arrayContaining(['kaleidoswap_get_quote', 'kaleidoswap_place_order']));
|
|
345
|
+
expect(trading.tools).not.toEqual(
|
|
346
|
+
expect.arrayContaining([
|
|
347
|
+
'kaleidoswap_get_spreads',
|
|
348
|
+
'kaleidoswap_get_open_orders',
|
|
349
|
+
'kaleidoswap_cancel_order',
|
|
350
|
+
'kaleidoswap_get_position',
|
|
351
|
+
]),
|
|
352
|
+
);
|
|
353
|
+
});
|
|
354
|
+
|
|
355
|
+
it('balance through the FULL mind WITH skills loaded still reaches rln_get_balances', async () => {
|
|
356
|
+
const logs: string[] = [];
|
|
357
|
+
const { funnel, calls } = buildMind(
|
|
358
|
+
scripted([
|
|
359
|
+
{ text: '', toolCalls: [{ name: 'rln_get_balances', arguments: {} }] },
|
|
360
|
+
{ text: 'You have 1,949,753 sats.' },
|
|
361
|
+
]),
|
|
362
|
+
{ skills: SKILLS, log: (m) => logs.push(m) },
|
|
363
|
+
);
|
|
364
|
+
|
|
365
|
+
const res = await funnel.runTurn("what's my balance?");
|
|
366
|
+
|
|
367
|
+
expect(res.tier).toBe('agentic');
|
|
368
|
+
// wallet-assistant is selected AND rln_get_balances survives its scoping…
|
|
369
|
+
const agenticLine = logs.find((l) => l.startsWith('tier=agentic'));
|
|
370
|
+
expect(agenticLine).toMatch(/skill=wallet-assistant/);
|
|
371
|
+
expect(agenticLine).toMatch(/rln_get_balances/);
|
|
372
|
+
// …and the tool actually executes (not narrated).
|
|
373
|
+
expect(calls.map((c) => c.name)).toContain('rln_get_balances');
|
|
374
|
+
});
|
|
375
|
+
|
|
376
|
+
it('list channels through the FULL mind WITH skills loaded reaches rln_list_channels', async () => {
|
|
377
|
+
const { funnel, calls } = buildMind(
|
|
378
|
+
scripted([
|
|
379
|
+
{ text: '', toolCalls: [{ name: 'rln_list_channels', arguments: {} }] },
|
|
380
|
+
{ text: 'You have 2 channels.' },
|
|
381
|
+
]),
|
|
382
|
+
{ skills: SKILLS },
|
|
383
|
+
);
|
|
384
|
+
|
|
385
|
+
const res = await funnel.runTurn('list my channels');
|
|
386
|
+
|
|
387
|
+
expect(res.tier).toBe('agentic');
|
|
388
|
+
expect(calls.map((c) => c.name)).toContain('rln_list_channels');
|
|
389
|
+
});
|
|
390
|
+
});
|
package/src/funnel.ts
CHANGED
|
@@ -302,6 +302,20 @@ export class Funnel {
|
|
|
302
302
|
let scoped: string[] | undefined;
|
|
303
303
|
if (allowedTools) {
|
|
304
304
|
scoped = [...new Set([...allowedTools, ...ambient])];
|
|
305
|
+
// Resilience against host tool-name drift: a skill's allowlist may name
|
|
306
|
+
// tools that don't exist on this host (e.g. the skill says `get_balances`
|
|
307
|
+
// but the desktop MCP exposes `rln_get_balances`). engine.runAgentic
|
|
308
|
+
// filters the model's tools to this list, so a fully-mismatched skill
|
|
309
|
+
// leaves the model TOOL-LESS — it then narrates "the tool isn't available"
|
|
310
|
+
// instead of acting. If NONE of the scoped tools resolve against the live
|
|
311
|
+
// registry, widen to the full surface so the agent can still work.
|
|
312
|
+
const present = new Set((await this.registry.listTools()).map((t) => t.name));
|
|
313
|
+
if (!scoped.some((n) => present.has(n))) {
|
|
314
|
+
this.log(
|
|
315
|
+
`tier=agentic: skill '${skill?.name ?? '?'}' tools resolved to 0 live tools — using full tool surface`,
|
|
316
|
+
);
|
|
317
|
+
scoped = undefined;
|
|
318
|
+
}
|
|
305
319
|
} else if (disabledAmbient.length) {
|
|
306
320
|
// No skill matched but a toggle is off: expose everything except the
|
|
307
321
|
// disabled ambient tools (the sources stay mounted — no rebuild).
|
package/src/index.ts
CHANGED
|
@@ -186,3 +186,44 @@ export type { Skill, SkillReference, SkillSelector } from './skills/types.js';
|
|
|
186
186
|
|
|
187
187
|
export { TurnLogger, defaultMask } from './logger.js';
|
|
188
188
|
export type { TurnLog, Device, LoggerIO, LoggerOptions } from './logger.js';
|
|
189
|
+
|
|
190
|
+
// ── Autonomy (the task brain: scheduled tasks + run history + spend guardrails)
|
|
191
|
+
// The operational half of the agent's memory — the state nanobot kept in
|
|
192
|
+
// tasks.json + cron + run history, lifted into core (storage/timers injected).
|
|
193
|
+
export {
|
|
194
|
+
InMemoryTaskStore,
|
|
195
|
+
defaultTaskSeeds,
|
|
196
|
+
TaskRunLog,
|
|
197
|
+
createTaskScheduler,
|
|
198
|
+
evaluateSpend,
|
|
199
|
+
DEFAULT_RISK_LIMITS,
|
|
200
|
+
buildTaskPrompt,
|
|
201
|
+
ZERO_ALLOCATION,
|
|
202
|
+
} from './autonomy/index.js';
|
|
203
|
+
export type {
|
|
204
|
+
TaskAllocation,
|
|
205
|
+
AgentTask,
|
|
206
|
+
NewTask,
|
|
207
|
+
TaskSeed,
|
|
208
|
+
TaskStore,
|
|
209
|
+
TaskStoreIO,
|
|
210
|
+
TaskStoreOptions,
|
|
211
|
+
TaskRunCost,
|
|
212
|
+
TaskStats,
|
|
213
|
+
TaskRunRecord,
|
|
214
|
+
RunLogSnapshot,
|
|
215
|
+
RunLogIO,
|
|
216
|
+
RunLogOptions,
|
|
217
|
+
TaskRunOutcome,
|
|
218
|
+
RunTask,
|
|
219
|
+
TimerHandle,
|
|
220
|
+
SchedulerOptions,
|
|
221
|
+
TaskScheduler,
|
|
222
|
+
SpendKind,
|
|
223
|
+
RiskLimits,
|
|
224
|
+
SpendAction,
|
|
225
|
+
RiskContext,
|
|
226
|
+
RiskOutcome,
|
|
227
|
+
RiskVerdict,
|
|
228
|
+
TaskPromptOptions,
|
|
229
|
+
} from './autonomy/index.js';
|
|
@@ -218,8 +218,8 @@ export const BITCOIN_COPILOT_DOCS: RagDocument[] = [
|
|
|
218
218
|
'channel size you can buy, fees, accepted payment options). It is NOT ' +
|
|
219
219
|
'your current inbound capacity — it describes what the LSP is willing ' +
|
|
220
220
|
'to sell you. To learn your CURRENT receive capacity, sum the remote ' +
|
|
221
|
-
'balance of your existing channels; to BUY MORE, use
|
|
222
|
-
'
|
|
221
|
+
'balance of your existing channels; to BUY MORE, use kaleidoswap_lsp_get_info and ' +
|
|
222
|
+
'kaleidoswap_lsp_create_order.',
|
|
223
223
|
metadata: { topic: 'channels' },
|
|
224
224
|
},
|
|
225
225
|
{
|
package/src/qvac/index.ts
CHANGED
package/src/qvac/parse.ts
CHANGED
|
@@ -6,6 +6,21 @@
|
|
|
6
6
|
*/
|
|
7
7
|
import { cleanAssistantVisibleText } from './text.js';
|
|
8
8
|
|
|
9
|
+
/**
|
|
10
|
+
* Per-turn inference stats from a QVAC `completion().final.stats` frame. The
|
|
11
|
+
* authoritative source for which backend actually ran (`backendDevice`) and the
|
|
12
|
+
* real throughput — hosts surface these instead of guessing from load config.
|
|
13
|
+
*/
|
|
14
|
+
export interface QvacTurnStats {
|
|
15
|
+
/** The backend that actually executed this turn — the real "is GPU active". */
|
|
16
|
+
backendDevice?: 'cpu' | 'gpu';
|
|
17
|
+
tokensPerSecond?: number;
|
|
18
|
+
totalTokens?: number;
|
|
19
|
+
promptTokens?: number;
|
|
20
|
+
contextSize?: number;
|
|
21
|
+
totalTime?: number;
|
|
22
|
+
}
|
|
23
|
+
|
|
9
24
|
/** Structural subset of a QVAC `completion().final` we depend on. */
|
|
10
25
|
export interface QvacFinalLike {
|
|
11
26
|
/** Visible assistant text (excludes `<think>` reasoning). */
|
|
@@ -20,6 +35,8 @@ export interface QvacFinalLike {
|
|
|
20
35
|
* it so the funnel can tell a truncated tool-call from a complete one.
|
|
21
36
|
*/
|
|
22
37
|
stopReason?: 'length' | 'cancelled' | string;
|
|
38
|
+
/** Inference stats (backend device, throughput). Present on a natural finish. */
|
|
39
|
+
stats?: QvacTurnStats;
|
|
23
40
|
}
|
|
24
41
|
|
|
25
42
|
export interface ParsedTurn {
|
|
@@ -33,6 +50,8 @@ export interface ParsedTurn {
|
|
|
33
50
|
truncated: boolean;
|
|
34
51
|
/** Raw stop reason from the SDK, when provided. */
|
|
35
52
|
stopReason?: string;
|
|
53
|
+
/** Inference stats for this turn (backend device, throughput), when provided. */
|
|
54
|
+
stats?: QvacTurnStats;
|
|
36
55
|
}
|
|
37
56
|
|
|
38
57
|
/** Parse the first balanced `{…}` from a string as a `{name, arguments}` call. */
|
|
@@ -119,5 +138,6 @@ export function finalToTurn(final: QvacFinalLike, streamed = ''): ParsedTurn {
|
|
|
119
138
|
toolCalls,
|
|
120
139
|
truncated: final.stopReason === 'length',
|
|
121
140
|
stopReason: final.stopReason,
|
|
141
|
+
stats: final.stats,
|
|
122
142
|
};
|
|
123
143
|
}
|
|
@@ -84,6 +84,23 @@ describe('createQvacProvider.runTurn', () => {
|
|
|
84
84
|
expect(calls[0].generationParams).toBeUndefined();
|
|
85
85
|
});
|
|
86
86
|
|
|
87
|
+
it('caps thinking by tokens — cancels the run and returns a fallback', async () => {
|
|
88
|
+
const cancel = vi.fn(async () => {});
|
|
89
|
+
const { fn } = fakeCompletion(
|
|
90
|
+
{ contentText: '', toolCalls: [], raw: { fullText: '' }, stopReason: 'cancelled' },
|
|
91
|
+
[{ type: 'thinkingDelta', text: 'z'.repeat(40) }], // ~10 tokens, budget 4
|
|
92
|
+
);
|
|
93
|
+
const p = createQvacProvider({
|
|
94
|
+
completion: fn as any,
|
|
95
|
+
cancel: cancel as any,
|
|
96
|
+
getModelId: () => 'm1',
|
|
97
|
+
maxThinkingTokens: 4,
|
|
98
|
+
});
|
|
99
|
+
const out = await p.runTurn({ messages: [{ role: 'user', content: 'think hard' }], tools: [] });
|
|
100
|
+
expect(cancel).toHaveBeenCalledWith({ requestId: 'req-1' });
|
|
101
|
+
expect(out.text).toMatch(/thinking budget/i);
|
|
102
|
+
});
|
|
103
|
+
|
|
87
104
|
it('streams visible content tokens to onToken', async () => {
|
|
88
105
|
const { fn } = fakeCompletion(
|
|
89
106
|
{ contentText: 'Hi there', toolCalls: [], raw: { fullText: 'Hi there' } },
|
package/src/qvac/provider.ts
CHANGED
|
@@ -18,6 +18,7 @@
|
|
|
18
18
|
*/
|
|
19
19
|
import type * as QvacSdk from '@qvac/sdk';
|
|
20
20
|
import type { LLMProvider, TurnInput, TurnOutput } from '../providers/types.js';
|
|
21
|
+
import type { QvacTurnStats } from './parse.js';
|
|
21
22
|
import { consumeRun } from './stream.js';
|
|
22
23
|
|
|
23
24
|
type CompletionFn = typeof QvacSdk.completion;
|
|
@@ -38,17 +39,37 @@ export interface QvacProviderOptions {
|
|
|
38
39
|
defaultTemperature?: number;
|
|
39
40
|
/** Default max output tokens — caps a turn so it can't ramble. Omit for uncapped. */
|
|
40
41
|
defaultMaxTokens?: number;
|
|
42
|
+
/**
|
|
43
|
+
* Cap `<think>` reasoning at this many TOKENS (not seconds — tok/s varies, and
|
|
44
|
+
* the SDK has no numeric reasoning budget). When a turn's thinking exceeds it,
|
|
45
|
+
* the run is cancelled and a short fallback is returned instead of hanging on
|
|
46
|
+
* "Thinking…". Omit for unlimited reasoning.
|
|
47
|
+
*/
|
|
48
|
+
maxThinkingTokens?: number;
|
|
41
49
|
/** Stream the model's `<think>` reasoning, when a host wants to surface it. */
|
|
42
50
|
onThinking?: (token: string) => void;
|
|
51
|
+
/**
|
|
52
|
+
* Per-turn inference stats (real backend device + throughput), when a host
|
|
53
|
+
* wants to surface them. Fires once per turn after the `final` frame resolves.
|
|
54
|
+
*/
|
|
55
|
+
onStats?: (stats: QvacTurnStats) => void;
|
|
43
56
|
}
|
|
44
57
|
|
|
45
58
|
/** TurnInput plus the per-call knobs the funnel/voice paths pass through. */
|
|
46
59
|
export interface QvacTurnInput extends TurnInput {
|
|
47
60
|
temperature?: number;
|
|
48
61
|
maxTokens?: number;
|
|
62
|
+
/** Per-turn override of the thinking-token cap (see QvacProviderOptions). */
|
|
63
|
+
maxThinkingTokens?: number;
|
|
49
64
|
onThinking?: (token: string) => void;
|
|
65
|
+
onStats?: (stats: QvacTurnStats) => void;
|
|
50
66
|
}
|
|
51
67
|
|
|
68
|
+
/** Shown when a turn is cut off because it blew its thinking-token budget. */
|
|
69
|
+
const THINKING_BUDGET_FALLBACK =
|
|
70
|
+
'I spent my whole thinking budget on that one without landing an answer. ' +
|
|
71
|
+
'Try asking again, more specifically.';
|
|
72
|
+
|
|
52
73
|
export function createQvacProvider(options: QvacProviderOptions): LLMProvider {
|
|
53
74
|
return {
|
|
54
75
|
name: 'qvac',
|
|
@@ -98,13 +119,28 @@ export function createQvacProvider(options: QvacProviderOptions): LLMProvider {
|
|
|
98
119
|
...(tools ? { tools } : {}),
|
|
99
120
|
} as unknown as Parameters<CompletionFn>[0]);
|
|
100
121
|
|
|
122
|
+
const maxThinkingTokens = input.maxThinkingTokens ?? options.maxThinkingTokens;
|
|
101
123
|
const result = await consumeRun(run, {
|
|
102
124
|
onToken: input.onToken,
|
|
103
125
|
onThinking: input.onThinking ?? options.onThinking,
|
|
126
|
+
maxThinkingTokens,
|
|
127
|
+
// Cancel the in-flight run the moment the thinking budget is blown — the
|
|
128
|
+
// SDK keeps generating otherwise. Fire-and-forget; `final` then resolves.
|
|
129
|
+
onThinkingBudgetExceeded: () => {
|
|
130
|
+
void options.cancel({ requestId: run.requestId }).catch(() => {});
|
|
131
|
+
},
|
|
104
132
|
});
|
|
105
133
|
|
|
134
|
+
// Surface the real per-turn inference stats (backend device + throughput).
|
|
135
|
+
if (result.stats) (input.onStats ?? options.onStats)?.(result.stats);
|
|
136
|
+
|
|
137
|
+
// A turn cut off mid-reasoning has no visible answer — return a short note
|
|
138
|
+
// instead of an empty bubble so the agentic loop ends cleanly.
|
|
139
|
+
const text =
|
|
140
|
+
result.text || (result.thinkingBudgetExceeded ? THINKING_BUDGET_FALLBACK : result.text);
|
|
141
|
+
|
|
106
142
|
return {
|
|
107
|
-
text
|
|
143
|
+
text,
|
|
108
144
|
rawContent: result.rawContent,
|
|
109
145
|
toolCalls: result.toolCalls,
|
|
110
146
|
requestId: result.requestId,
|
package/src/qvac/stream.test.ts
CHANGED
|
@@ -67,6 +67,31 @@ describe('consumeRun', () => {
|
|
|
67
67
|
expect(out.truncated).toBe(true);
|
|
68
68
|
});
|
|
69
69
|
|
|
70
|
+
it('stops forwarding and flags when thinking exceeds maxThinkingTokens', async () => {
|
|
71
|
+
const thinking: string[] = [];
|
|
72
|
+
let exceeded = 0;
|
|
73
|
+
// 8-char deltas ≈ 2 tokens each; budget 4 tokens trips after the 2nd.
|
|
74
|
+
const run = fakeRun(
|
|
75
|
+
[
|
|
76
|
+
{ type: 'thinkingDelta', text: 'aaaaaaaa' },
|
|
77
|
+
{ type: 'thinkingDelta', text: 'bbbbbbbb' },
|
|
78
|
+
{ type: 'thinkingDelta', text: 'cccccccc' },
|
|
79
|
+
{ type: 'contentDelta', text: 'should-not-arrive' },
|
|
80
|
+
],
|
|
81
|
+
{ contentText: '', toolCalls: [], raw: { fullText: '' }, stopReason: 'cancelled' },
|
|
82
|
+
);
|
|
83
|
+
const out = await consumeRun(run, {
|
|
84
|
+
onThinking: (t) => thinking.push(t),
|
|
85
|
+
maxThinkingTokens: 4,
|
|
86
|
+
onThinkingBudgetExceeded: () => {
|
|
87
|
+
exceeded += 1;
|
|
88
|
+
},
|
|
89
|
+
});
|
|
90
|
+
expect(exceeded).toBe(1);
|
|
91
|
+
expect(out.thinkingBudgetExceeded).toBe(true);
|
|
92
|
+
expect(thinking).toEqual(['aaaaaaaa', 'bbbbbbbb']); // stopped at the trip
|
|
93
|
+
});
|
|
94
|
+
|
|
70
95
|
it('ignores delta events with no text', async () => {
|
|
71
96
|
const tokens: string[] = [];
|
|
72
97
|
const run = fakeRun(
|