@kaleidorg/mind 0.5.0 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (109) hide show
  1. package/dist/autonomy/index.d.ts +21 -0
  2. package/dist/autonomy/index.d.ts.map +1 -0
  3. package/dist/autonomy/index.js +16 -0
  4. package/dist/autonomy/index.js.map +1 -0
  5. package/dist/autonomy/prompt.d.ts +21 -0
  6. package/dist/autonomy/prompt.d.ts.map +1 -0
  7. package/dist/autonomy/prompt.js +37 -0
  8. package/dist/autonomy/prompt.js.map +1 -0
  9. package/dist/autonomy/risk.d.ts +53 -0
  10. package/dist/autonomy/risk.d.ts.map +1 -0
  11. package/dist/autonomy/risk.js +74 -0
  12. package/dist/autonomy/risk.js.map +1 -0
  13. package/dist/autonomy/run-state.d.ts +39 -0
  14. package/dist/autonomy/run-state.d.ts.map +1 -0
  15. package/dist/autonomy/run-state.js +118 -0
  16. package/dist/autonomy/run-state.js.map +1 -0
  17. package/dist/autonomy/scheduler.d.ts +18 -0
  18. package/dist/autonomy/scheduler.d.ts.map +1 -0
  19. package/dist/autonomy/scheduler.js +113 -0
  20. package/dist/autonomy/scheduler.js.map +1 -0
  21. package/dist/autonomy/task-store.d.ts +44 -0
  22. package/dist/autonomy/task-store.d.ts.map +1 -0
  23. package/dist/autonomy/task-store.js +139 -0
  24. package/dist/autonomy/task-store.js.map +1 -0
  25. package/dist/autonomy/types.d.ts +164 -0
  26. package/dist/autonomy/types.d.ts.map +1 -0
  27. package/dist/autonomy/types.js +20 -0
  28. package/dist/autonomy/types.js.map +1 -0
  29. package/dist/funnel.d.ts.map +1 -1
  30. package/dist/funnel.js +12 -0
  31. package/dist/funnel.js.map +1 -1
  32. package/dist/index.d.ts +2 -0
  33. package/dist/index.d.ts.map +1 -1
  34. package/dist/index.js +4 -0
  35. package/dist/index.js.map +1 -1
  36. package/dist/knowledge/bitcoin-copilot.js +2 -2
  37. package/dist/knowledge/bitcoin-copilot.js.map +1 -1
  38. package/dist/qvac/index.d.ts +1 -1
  39. package/dist/qvac/index.d.ts.map +1 -1
  40. package/dist/qvac/index.js.map +1 -1
  41. package/dist/qvac/parse.d.ts +33 -0
  42. package/dist/qvac/parse.d.ts.map +1 -1
  43. package/dist/qvac/parse.js +69 -5
  44. package/dist/qvac/parse.js.map +1 -1
  45. package/dist/qvac/provider.d.ts +16 -0
  46. package/dist/qvac/provider.d.ts.map +1 -1
  47. package/dist/qvac/provider.js +17 -1
  48. package/dist/qvac/provider.js.map +1 -1
  49. package/dist/qvac/stream.d.ts +16 -0
  50. package/dist/qvac/stream.d.ts.map +1 -1
  51. package/dist/qvac/stream.js +21 -1
  52. package/dist/qvac/stream.js.map +1 -1
  53. package/dist/qvac/text.d.ts.map +1 -1
  54. package/dist/qvac/text.js +4 -0
  55. package/dist/qvac/text.js.map +1 -1
  56. package/dist/recipe/buy-asset-channel.d.ts +1 -1
  57. package/dist/recipe/buy-asset-channel.d.ts.map +1 -1
  58. package/dist/recipe/buy-asset-channel.js +4 -3
  59. package/dist/recipe/buy-asset-channel.js.map +1 -1
  60. package/dist/recipe/kaleidoswap-atomic.d.ts +1 -1
  61. package/dist/recipe/kaleidoswap-atomic.d.ts.map +1 -1
  62. package/dist/recipe/kaleidoswap-atomic.js +5 -4
  63. package/dist/recipe/kaleidoswap-atomic.js.map +1 -1
  64. package/dist/recipe/runner.d.ts.map +1 -1
  65. package/dist/recipe/runner.js +38 -0
  66. package/dist/recipe/runner.js.map +1 -1
  67. package/dist/tools/mcp.d.ts +19 -0
  68. package/dist/tools/mcp.d.ts.map +1 -1
  69. package/dist/tools/mcp.js +51 -9
  70. package/dist/tools/mcp.js.map +1 -1
  71. package/package.json +2 -1
  72. package/skills/channel-manager/SKILL.md +59 -0
  73. package/skills/dca/SKILL.md +48 -0
  74. package/skills/kaleido-lsps/SKILL.md +12 -12
  75. package/skills/kaleido-trading/SKILL.md +1 -1
  76. package/skills/liquidity-optimizer/SKILL.md +91 -0
  77. package/skills/merchant-finder/SKILL.md +1 -1
  78. package/skills/portfolio-manager/SKILL.md +67 -0
  79. package/skills/rgb-lightning-node/SKILL.md +3 -3
  80. package/skills/wallet-assistant/SKILL.md +1 -1
  81. package/src/autonomy/autonomy.test.ts +348 -0
  82. package/src/autonomy/index.ts +50 -0
  83. package/src/autonomy/prompt.ts +48 -0
  84. package/src/autonomy/risk.ts +139 -0
  85. package/src/autonomy/run-state.ts +144 -0
  86. package/src/autonomy/scheduler.ts +120 -0
  87. package/src/autonomy/task-store.ts +167 -0
  88. package/src/autonomy/types.ts +186 -0
  89. package/src/funnel.mind.test.ts +390 -0
  90. package/src/funnel.ts +14 -0
  91. package/src/index.ts +41 -0
  92. package/src/knowledge/bitcoin-copilot.ts +2 -2
  93. package/src/qvac/index.ts +1 -0
  94. package/src/qvac/parse.test.ts +70 -1
  95. package/src/qvac/parse.ts +91 -5
  96. package/src/qvac/provider.test.ts +17 -0
  97. package/src/qvac/provider.ts +37 -1
  98. package/src/qvac/stream.test.ts +25 -0
  99. package/src/qvac/stream.ts +38 -1
  100. package/src/qvac/text.ts +4 -0
  101. package/src/recipe/buy-asset-channel.test.ts +5 -0
  102. package/src/recipe/buy-asset-channel.ts +6 -3
  103. package/src/recipe/kaleidoswap-atomic.test.ts +3 -3
  104. package/src/recipe/kaleidoswap-atomic.ts +5 -4
  105. package/src/recipe/recipe.test.ts +16 -0
  106. package/src/recipe/runner.ts +41 -0
  107. package/src/tools/mcp.live.test.ts +116 -0
  108. package/src/tools/mcp.parse.test.ts +37 -0
  109. package/src/tools/mcp.ts +55 -9
package/src/qvac/parse.ts CHANGED
@@ -6,6 +6,21 @@
6
6
  */
7
7
  import { cleanAssistantVisibleText } from './text.js';
8
8
 
9
+ /**
10
+ * Per-turn inference stats from a QVAC `completion().final.stats` frame. The
11
+ * authoritative source for which backend actually ran (`backendDevice`) and the
12
+ * real throughput — hosts surface these instead of guessing from load config.
13
+ */
14
+ export interface QvacTurnStats {
15
+ /** The backend that actually executed this turn — the real "is GPU active". */
16
+ backendDevice?: 'cpu' | 'gpu';
17
+ tokensPerSecond?: number;
18
+ totalTokens?: number;
19
+ promptTokens?: number;
20
+ contextSize?: number;
21
+ totalTime?: number;
22
+ }
23
+
9
24
  /** Structural subset of a QVAC `completion().final` we depend on. */
10
25
  export interface QvacFinalLike {
11
26
  /** Visible assistant text (excludes `<think>` reasoning). */
@@ -20,6 +35,8 @@ export interface QvacFinalLike {
20
35
  * it so the funnel can tell a truncated tool-call from a complete one.
21
36
  */
22
37
  stopReason?: 'length' | 'cancelled' | string;
38
+ /** Inference stats (backend device, throughput). Present on a natural finish. */
39
+ stats?: QvacTurnStats;
23
40
  }
24
41
 
25
42
  export interface ParsedTurn {
@@ -33,25 +50,94 @@ export interface ParsedTurn {
33
50
  truncated: boolean;
34
51
  /** Raw stop reason from the SDK, when provided. */
35
52
  stopReason?: string;
53
+ /** Inference stats for this turn (backend device, throughput), when provided. */
54
+ stats?: QvacTurnStats;
55
+ }
56
+
57
+ /** Parse the first balanced `{…}` from a string as a `{name, arguments}` call. */
58
+ function parseCallObject(
59
+ s: string,
60
+ ): { name: string; arguments: Record<string, unknown> } | null {
61
+ const start = s.indexOf('{');
62
+ if (start < 0) return null;
63
+ let depth = 0;
64
+ for (let i = start; i < s.length; i++) {
65
+ const ch = s[i];
66
+ if (ch === '{') depth++;
67
+ else if (ch === '}' && --depth === 0) {
68
+ try {
69
+ const obj = JSON.parse(s.slice(start, i + 1)) as {
70
+ name?: unknown;
71
+ arguments?: unknown;
72
+ };
73
+ if (obj && typeof obj.name === 'string') {
74
+ const args =
75
+ obj.arguments && typeof obj.arguments === 'object'
76
+ ? (obj.arguments as Record<string, unknown>)
77
+ : {};
78
+ return { name: obj.name, arguments: args };
79
+ }
80
+ } catch {
81
+ /* malformed JSON — give up on this fragment */
82
+ }
83
+ return null;
84
+ }
85
+ }
86
+ return null;
87
+ }
88
+
89
+ /**
90
+ * Recover tool calls a model emitted as PLAIN TEXT instead of structured frames
91
+ * — `<tool_call>{"name":…,"arguments":…}</tool_call>` (Qwen/Hermes) or a bare
92
+ * leading `{"name":…,"arguments":…}`. Small local models (and SDK builds that
93
+ * don't apply the tool grammar) do this; without recovery the call leaks into
94
+ * the visible answer and never runs.
95
+ */
96
+ export function extractTextToolCalls(
97
+ text: string,
98
+ ): Array<{ name: string; arguments: Record<string, unknown> }> {
99
+ const calls: Array<{ name: string; arguments: Record<string, unknown> }> = [];
100
+ for (const m of text.matchAll(/<tool_call\b[^>]*>([\s\S]*?)<\/tool_call>/gi)) {
101
+ const c = parseCallObject(m[1] ?? '');
102
+ if (c) calls.push(c);
103
+ }
104
+ if (calls.length) return calls;
105
+ // No tags — accept a bare tool-call object only at the very start of the
106
+ // text (so we don't misread JSON the model is merely talking about).
107
+ if (/^\s*\{?\s*"name"\s*:/i.test(text)) {
108
+ const c = parseCallObject(text);
109
+ if (c) calls.push(c);
110
+ }
111
+ return calls;
36
112
  }
37
113
 
38
114
  /**
39
115
  * Map a completion `final` (plus the streamed fallback text) into a ParsedTurn.
40
116
  * `rawContent` prefers the SDK's framed `raw.fullText` so the Engine can anchor
41
117
  * the next turn; falls back to the visible text when a provider has no raw form.
118
+ *
119
+ * When the SDK reports no structured tool calls, we re-scan the raw text for
120
+ * tool calls the model emitted inline (see `extractTextToolCalls`) so they still
121
+ * execute instead of leaking into the chat.
42
122
  */
43
123
  export function finalToTurn(final: QvacFinalLike, streamed = ''): ParsedTurn {
44
124
  const rawText = final.contentText || streamed;
45
125
  const text = cleanAssistantVisibleText(rawText);
126
+ let toolCalls = (final.toolCalls ?? []).map((c) => ({
127
+ id: c.id,
128
+ name: c.name,
129
+ arguments: c.arguments ?? {},
130
+ }));
131
+ if (toolCalls.length === 0) {
132
+ const recovered = extractTextToolCalls(final.raw?.fullText ?? rawText);
133
+ if (recovered.length) toolCalls = recovered.map((c) => ({ id: undefined, ...c }));
134
+ }
46
135
  return {
47
136
  text,
48
137
  rawContent: final.raw?.fullText ?? rawText,
49
- toolCalls: (final.toolCalls ?? []).map((c) => ({
50
- id: c.id,
51
- name: c.name,
52
- arguments: c.arguments ?? {},
53
- })),
138
+ toolCalls,
54
139
  truncated: final.stopReason === 'length',
55
140
  stopReason: final.stopReason,
141
+ stats: final.stats,
56
142
  };
57
143
  }
@@ -84,6 +84,23 @@ describe('createQvacProvider.runTurn', () => {
84
84
  expect(calls[0].generationParams).toBeUndefined();
85
85
  });
86
86
 
87
+ it('caps thinking by tokens — cancels the run and returns a fallback', async () => {
88
+ const cancel = vi.fn(async () => {});
89
+ const { fn } = fakeCompletion(
90
+ { contentText: '', toolCalls: [], raw: { fullText: '' }, stopReason: 'cancelled' },
91
+ [{ type: 'thinkingDelta', text: 'z'.repeat(40) }], // ~10 tokens, budget 4
92
+ );
93
+ const p = createQvacProvider({
94
+ completion: fn as any,
95
+ cancel: cancel as any,
96
+ getModelId: () => 'm1',
97
+ maxThinkingTokens: 4,
98
+ });
99
+ const out = await p.runTurn({ messages: [{ role: 'user', content: 'think hard' }], tools: [] });
100
+ expect(cancel).toHaveBeenCalledWith({ requestId: 'req-1' });
101
+ expect(out.text).toMatch(/thinking budget/i);
102
+ });
103
+
87
104
  it('streams visible content tokens to onToken', async () => {
88
105
  const { fn } = fakeCompletion(
89
106
  { contentText: 'Hi there', toolCalls: [], raw: { fullText: 'Hi there' } },
@@ -18,6 +18,7 @@
18
18
  */
19
19
  import type * as QvacSdk from '@qvac/sdk';
20
20
  import type { LLMProvider, TurnInput, TurnOutput } from '../providers/types.js';
21
+ import type { QvacTurnStats } from './parse.js';
21
22
  import { consumeRun } from './stream.js';
22
23
 
23
24
  type CompletionFn = typeof QvacSdk.completion;
@@ -38,17 +39,37 @@ export interface QvacProviderOptions {
38
39
  defaultTemperature?: number;
39
40
  /** Default max output tokens — caps a turn so it can't ramble. Omit for uncapped. */
40
41
  defaultMaxTokens?: number;
42
+ /**
43
+ * Cap `<think>` reasoning at this many TOKENS (not seconds — tok/s varies, and
44
+ * the SDK has no numeric reasoning budget). When a turn's thinking exceeds it,
45
+ * the run is cancelled and a short fallback is returned instead of hanging on
46
+ * "Thinking…". Omit for unlimited reasoning.
47
+ */
48
+ maxThinkingTokens?: number;
41
49
  /** Stream the model's `<think>` reasoning, when a host wants to surface it. */
42
50
  onThinking?: (token: string) => void;
51
+ /**
52
+ * Per-turn inference stats (real backend device + throughput), when a host
53
+ * wants to surface them. Fires once per turn after the `final` frame resolves.
54
+ */
55
+ onStats?: (stats: QvacTurnStats) => void;
43
56
  }
44
57
 
45
58
  /** TurnInput plus the per-call knobs the funnel/voice paths pass through. */
46
59
  export interface QvacTurnInput extends TurnInput {
47
60
  temperature?: number;
48
61
  maxTokens?: number;
62
+ /** Per-turn override of the thinking-token cap (see QvacProviderOptions). */
63
+ maxThinkingTokens?: number;
49
64
  onThinking?: (token: string) => void;
65
+ onStats?: (stats: QvacTurnStats) => void;
50
66
  }
51
67
 
68
+ /** Shown when a turn is cut off because it blew its thinking-token budget. */
69
+ const THINKING_BUDGET_FALLBACK =
70
+ 'I spent my whole thinking budget on that one without landing an answer. ' +
71
+ 'Try asking again, more specifically.';
72
+
52
73
  export function createQvacProvider(options: QvacProviderOptions): LLMProvider {
53
74
  return {
54
75
  name: 'qvac',
@@ -98,13 +119,28 @@ export function createQvacProvider(options: QvacProviderOptions): LLMProvider {
98
119
  ...(tools ? { tools } : {}),
99
120
  } as unknown as Parameters<CompletionFn>[0]);
100
121
 
122
+ const maxThinkingTokens = input.maxThinkingTokens ?? options.maxThinkingTokens;
101
123
  const result = await consumeRun(run, {
102
124
  onToken: input.onToken,
103
125
  onThinking: input.onThinking ?? options.onThinking,
126
+ maxThinkingTokens,
127
+ // Cancel the in-flight run the moment the thinking budget is blown — the
128
+ // SDK keeps generating otherwise. Fire-and-forget; `final` then resolves.
129
+ onThinkingBudgetExceeded: () => {
130
+ void options.cancel({ requestId: run.requestId }).catch(() => {});
131
+ },
104
132
  });
105
133
 
134
+ // Surface the real per-turn inference stats (backend device + throughput).
135
+ if (result.stats) (input.onStats ?? options.onStats)?.(result.stats);
136
+
137
+ // A turn cut off mid-reasoning has no visible answer — return a short note
138
+ // instead of an empty bubble so the agentic loop ends cleanly.
139
+ const text =
140
+ result.text || (result.thinkingBudgetExceeded ? THINKING_BUDGET_FALLBACK : result.text);
141
+
106
142
  return {
107
- text: result.text,
143
+ text,
108
144
  rawContent: result.rawContent,
109
145
  toolCalls: result.toolCalls,
110
146
  requestId: result.requestId,
@@ -67,6 +67,31 @@ describe('consumeRun', () => {
67
67
  expect(out.truncated).toBe(true);
68
68
  });
69
69
 
70
+ it('stops forwarding and flags when thinking exceeds maxThinkingTokens', async () => {
71
+ const thinking: string[] = [];
72
+ let exceeded = 0;
73
+ // 8-char deltas ≈ 2 tokens each; budget 4 tokens trips after the 2nd.
74
+ const run = fakeRun(
75
+ [
76
+ { type: 'thinkingDelta', text: 'aaaaaaaa' },
77
+ { type: 'thinkingDelta', text: 'bbbbbbbb' },
78
+ { type: 'thinkingDelta', text: 'cccccccc' },
79
+ { type: 'contentDelta', text: 'should-not-arrive' },
80
+ ],
81
+ { contentText: '', toolCalls: [], raw: { fullText: '' }, stopReason: 'cancelled' },
82
+ );
83
+ const out = await consumeRun(run, {
84
+ onThinking: (t) => thinking.push(t),
85
+ maxThinkingTokens: 4,
86
+ onThinkingBudgetExceeded: () => {
87
+ exceeded += 1;
88
+ },
89
+ });
90
+ expect(exceeded).toBe(1);
91
+ expect(out.thinkingBudgetExceeded).toBe(true);
92
+ expect(thinking).toEqual(['aaaaaaaa', 'bbbbbbbb']); // stopped at the trip
93
+ });
94
+
70
95
  it('ignores delta events with no text', async () => {
71
96
  const tokens: string[] = [];
72
97
  const run = fakeRun(
@@ -27,10 +27,31 @@ export interface StreamHandlers {
27
27
  onToken?: (token: string) => void;
28
28
  /** The model's `<think>` reasoning, streamed separately. */
29
29
  onThinking?: (token: string) => void;
30
+ /**
31
+ * Cap the `<think>` reasoning at this many tokens. The cap is on TOKENS, not
32
+ * wall-clock seconds — tok/s varies by model and hardware, so a time budget is
33
+ * unreliable; the SDK has no numeric reasoning budget (`reasoning_budget` is
34
+ * only on/off), so we count thinking tokens and stop the run once they exceed
35
+ * this. Omit for unlimited reasoning.
36
+ */
37
+ maxThinkingTokens?: number;
38
+ /**
39
+ * Fires once, the moment the thinking budget is exceeded, so the host can
40
+ * cancel the in-flight run (the SDK keeps generating otherwise). consumeRun
41
+ * stops forwarding deltas after this.
42
+ */
43
+ onThinkingBudgetExceeded?: () => void;
30
44
  }
31
45
 
32
46
  export interface ConsumedTurn extends ParsedTurn {
33
47
  requestId: string;
48
+ /** True when the run was stopped because `<think>` hit `maxThinkingTokens`. */
49
+ thinkingBudgetExceeded?: boolean;
50
+ }
51
+
52
+ /** Rough token estimate (~4 chars/token) — same heuristic the context budget uses. */
53
+ function approxTokens(chars: number): number {
54
+ return Math.ceil(chars / 4);
34
55
  }
35
56
 
36
57
  /**
@@ -43,14 +64,30 @@ export async function consumeRun(
43
64
  handlers: StreamHandlers = {},
44
65
  ): Promise<ConsumedTurn> {
45
66
  let streamed = '';
67
+ let thinkingChars = 0;
68
+ let budgetExceeded = false;
46
69
  for await (const event of run.events) {
47
70
  if (event.type === 'contentDelta' && typeof event.text === 'string') {
48
71
  streamed += event.text;
49
72
  handlers.onToken?.(event.text);
50
73
  } else if (event.type === 'thinkingDelta' && typeof event.text === 'string') {
51
74
  handlers.onThinking?.(event.text);
75
+ if (handlers.maxThinkingTokens !== undefined && !budgetExceeded) {
76
+ thinkingChars += event.text.length;
77
+ if (approxTokens(thinkingChars) >= handlers.maxThinkingTokens) {
78
+ budgetExceeded = true;
79
+ handlers.onThinkingBudgetExceeded?.();
80
+ // Stop forwarding; the host cancels the run, so `final` resolves
81
+ // (stopReason 'cancelled') with whatever was produced so far.
82
+ break;
83
+ }
84
+ }
52
85
  }
53
86
  }
54
87
  const final = await run.final;
55
- return { ...finalToTurn(final, streamed), requestId: run.requestId };
88
+ return {
89
+ ...finalToTurn(final, streamed),
90
+ requestId: run.requestId,
91
+ thinkingBudgetExceeded: budgetExceeded,
92
+ };
56
93
  }
package/src/qvac/text.ts CHANGED
@@ -15,6 +15,10 @@ export function cleanAssistantVisibleText(text: string): string {
15
15
  // Qwen-style reasoning sometimes arrives in contentText. Never show/speak it.
16
16
  .replace(/<think\b[\s\S]*?<\/think>/gi, ' ')
17
17
  .replace(/<think\b[\s\S]*$/gi, ' ')
18
+ // Tool calls some models emit as text (<tool_call>{…}</tool_call>) are
19
+ // extracted + executed by the Engine (see parse.ts); never show the tags.
20
+ .replace(/<tool_call\b[^>]*>[\s\S]*?<\/tool_call>/gi, ' ')
21
+ .replace(/<tool_call\b[^>]*>[\s\S]*$/gi, ' ')
18
22
  .replace(/\s+/g, ' ')
19
23
  .trim();
20
24
 
@@ -54,6 +54,11 @@ describe('extractBuyAsset (deterministic Tier-0)', () => {
54
54
  it('handles comma grouping in the amount', () => {
55
55
  expect(extractBuyAsset('buy 1,000 usdt')).toEqual({ asset: 'USDT', asset_amount: 1000 });
56
56
  });
57
+ it('parses an article/filler between the verb and amount ("buy a 100 usdt channel")', () => {
58
+ expect(extractBuyAsset('buy a 100 usdt channel')).toEqual({ asset: 'USDT', asset_amount: 100 });
59
+ expect(extractBuyAsset('get a 100 usdt inbound channel')).toEqual({ asset: 'USDT', asset_amount: 100 });
60
+ expect(extractBuyAsset('buy and sell 100 usdt')).toBeNull(); // "and" is not filler
61
+ });
57
62
  it('null for a swap (a named source asset ⇒ swap owns it)', () => {
58
63
  expect(extractBuyAsset('buy 0.001 btc with usdt')).toBeNull();
59
64
  expect(extractBuyAsset('swap 10 usdt for btc')).toBeNull();
@@ -48,13 +48,16 @@ const num = (s?: string): number | undefined => {
48
48
  /** Thousands separators, locale-independent (deterministic for tests). */
49
49
  const commas = (n: number): string => String(n).replace(/\B(?=(\d{3})+(?!\d))/g, ',');
50
50
 
51
- /** "buy 100 usdt" / "get me 50 xaut" / "i want 200 usdt" / "purchase 10 xaut". */
51
+ /** "buy 100 usdt" / "get me 50 xaut" / "buy a 100 usdt channel" / "purchase 10 xaut". */
52
52
  export function extractBuyAsset(text: string): Record<string, unknown> | null {
53
53
  const t = text.trim();
54
54
  if (NOT_BUY.test(t) || HAS_SOURCE.test(t)) return null;
55
55
  if (!RGB_ASSET.test(t)) return null;
56
- // buy/get/want/acquire/purchase [me] <amount> <asset>
57
- const m = t.match(/\b(?:buy|get|acquire|want|purchase|onboard|need)\b(?:\s+me)?\s+([\d.,]+)\s*([a-z]+)/i);
56
+ // buy/get/want/acquire/purchase [me|a|an|some|new]* <amount> <asset>
57
+ // Filler words (the article in "buy A 100 usdt channel") must not break extraction.
58
+ const m = t.match(
59
+ /\b(?:buy|get|acquire|want|purchase|onboard|need)\b(?:\s+(?:me|a|an|some|new)\b)*\s+([\d.,]+)\s*([a-z]+)/i,
60
+ );
58
61
  if (!m) return null;
59
62
  const asset = normAsset(m[2]);
60
63
  const amount = num(m[1]);
@@ -44,7 +44,7 @@ function buildStubs(captured: { name: string; args: any }[]) {
44
44
  ]),
45
45
  new InProcessToolSource('rln', [
46
46
  tool('rln_get_node_info', { pubkey: '03c31dae' }),
47
- tool('rln_whitelist_swap', { ok: true }, /* spend */ true),
47
+ tool('rln_atomic_taker', { ok: true }, /* spend */ true),
48
48
  ]),
49
49
  ]);
50
50
  }
@@ -130,7 +130,7 @@ describe('kaleidoswapAtomicRecipe — full chain', () => {
130
130
  'kaleidoswap_get_quote',
131
131
  'kaleidoswap_atomic_init',
132
132
  'rln_get_node_info',
133
- 'rln_whitelist_swap',
133
+ 'rln_atomic_taker',
134
134
  'kaleidoswap_atomic_execute',
135
135
  ]);
136
136
  });
@@ -157,7 +157,7 @@ describe('kaleidoswapAtomicRecipe — full chain', () => {
157
157
  provider: refusingProvider, tools, onConfirm: async () => ({ approved: true }),
158
158
  slots: { from_asset: 'USDT', to_asset: 'BTC', amount: 10, amount_side: 'from' },
159
159
  });
160
- const whitelist = captured.find((c) => c.name === 'rln_whitelist_swap')!;
160
+ const whitelist = captured.find((c) => c.name === 'rln_atomic_taker')!;
161
161
  expect(whitelist.args).toEqual({ swapstring: 'SWAP/abc/def' });
162
162
  const exe = captured.find((c) => c.name === 'kaleidoswap_atomic_execute')!;
163
163
  expect(exe.args).toEqual({
@@ -12,7 +12,7 @@
12
12
  * ↓ [ONE confirmation gate — shows the real quote numbers]
13
13
  * kaleidoswap_atomic_init ← MAKER locks the swap → swapstring, payment_hash
14
14
  * rln_get_node_info ← NODE read pubkey (= taker_pubkey)
15
- * rln_whitelist_swap ← NODE accept the swapstring
15
+ * rln_atomic_taker ← NODE whitelist the swapstring (taker accepts)
16
16
  * kaleidoswap_atomic_execute ← MAKER settle (final)
17
17
  *
18
18
  * `forceModelExtract` ensures the model is always consulted for slot parsing
@@ -122,10 +122,11 @@ export const kaleidoswapAtomicRecipe: Recipe = {
122
122
  as: 'node',
123
123
  args: () => ({}),
124
124
  },
125
- // 4. NODE: whitelist the maker's swapstring (accept the swap). Ungated —
126
- // covered by the single confirm above.
125
+ // 4. NODE: the taker whitelists the maker's swapstring (accept the swap).
126
+ // Exposed by kaleido-mcp as `rln_atomic_taker` (calls rln.whitelistSwap).
127
+ // Ungated — covered by the single confirm above.
127
128
  {
128
- tool: 'rln_whitelist_swap',
129
+ tool: 'rln_atomic_taker',
129
130
  as: 'whitelist',
130
131
  args: (ctx) => {
131
132
  const init = ctx.results.init as InitResult | undefined;
@@ -72,6 +72,22 @@ describe('runRecipe — pay a contact', () => {
72
72
  expect(sent).toHaveLength(0);
73
73
  });
74
74
 
75
+ it('never reports a failed wallet result as sent', async () => {
76
+ const tools = new ToolRegistry([new InProcessToolSource('wallet', [
77
+ { name: 'resolve_contact', description: '', parameters: { type: 'object', properties: {} }, handler: async ({ name }) => ({ name, ln_address: `${name}@kaleidoswap.com` }) },
78
+ { name: 'fiat_to_sats', description: '', parameters: { type: 'object', properties: {} }, handler: async ({ amount }) => ({ sats: Math.round(Number(amount) * 1000) }) },
79
+ { name: 'send_payment', description: '', parameters: { type: 'object', properties: {} }, requiresConfirmation: true, handler: async () => ({ success: false, message: 'insufficient balance' }) },
80
+ ])]);
81
+ const res = await runRecipe(paymentsRecipe, 'pay bob 3 eur', {
82
+ provider: approve,
83
+ tools,
84
+ onConfirm: async () => ({ approved: true }),
85
+ });
86
+ expect(res.status).toBe('error');
87
+ expect(res.text).toContain('insufficient balance');
88
+ expect(res.text).not.toContain('Sent');
89
+ });
90
+
75
91
  it('falls back to ONE LLM extraction when regex misses', async () => {
76
92
  const sent: any[] = [];
77
93
  const tools = stubTools({ send: (a) => sent.push(a) });
@@ -29,6 +29,43 @@ export interface RunRecipeOptions {
29
29
  signal?: AbortSignal;
30
30
  }
31
31
 
32
+ function toolFailure(result: unknown): string | null {
33
+ // A plain-string result (non-JSON MCP text, or a tool that returns prose):
34
+ // flag obvious error text so a failed action isn't reported as success.
35
+ if (typeof result === 'string') {
36
+ const s = result.trim();
37
+ return /^(error|failed|failure|exception)\b\s*[:\-]?/i.test(s) ? s : null;
38
+ }
39
+ if (!result || typeof result !== 'object') return null;
40
+ const r = result as Record<string, unknown>;
41
+ if (typeof r.error === 'string' && r.error.trim()) return r.error;
42
+ if (r.success === false || r.ok === false) {
43
+ return String(r.message ?? r.reason ?? 'The wallet action failed.');
44
+ }
45
+ const status = String(r.status ?? r.state ?? '').toLowerCase();
46
+ if (['error', 'failed', 'failure', 'rejected'].includes(status)) {
47
+ return String(r.message ?? r.reason ?? `The wallet returned status "${status}".`);
48
+ }
49
+ return null;
50
+ }
51
+
52
+ function failedResult(
53
+ recipe: Recipe,
54
+ ctx: RecipeContext,
55
+ inferences: number,
56
+ message: string,
57
+ ): RecipeResult {
58
+ return {
59
+ recipe: recipe.name,
60
+ slots: ctx.slots,
61
+ results: ctx.results,
62
+ text: `Couldn't complete that: ${message}`,
63
+ status: 'error',
64
+ error: message,
65
+ inferences,
66
+ };
67
+ }
68
+
32
69
  /** Extract the recipe's slots — deterministic regex first, else ONE LLM call. */
33
70
  export async function extractSlots(
34
71
  provider: LLMProvider,
@@ -187,6 +224,8 @@ export async function runRecipe(recipe: Recipe, text: string, opts: RunRecipeOpt
187
224
  const result = await opts.tools.execute(step.tool, args);
188
225
  ctx.results[step.as ?? step.tool] = result;
189
226
  opts.onStep?.(step.tool, args, result);
227
+ const failure = toolFailure(result);
228
+ if (failure) return failedResult(recipe, ctx, inferences, failure);
190
229
  }
191
230
 
192
231
  // Final action.
@@ -195,6 +234,8 @@ export async function runRecipe(recipe: Recipe, text: string, opts: RunRecipeOpt
195
234
  const finalResult = await opts.tools.execute(recipe.final.tool, finalArgs);
196
235
  ctx.results[recipe.final.as ?? recipe.final.tool] = finalResult;
197
236
  opts.onStep?.(recipe.final.tool, finalArgs, finalResult);
237
+ const failure = toolFailure(finalResult);
238
+ if (failure) return failedResult(recipe, ctx, inferences, failure);
198
239
 
199
240
  const out = recipe.summary?.(ctx, finalResult) ?? 'Done.';
200
241
  return { recipe: recipe.name, slots: ctx.slots, results: ctx.results, final: finalResult, text: out, status: 'done', inferences };
@@ -0,0 +1,116 @@
1
+ /**
2
+ * Live MCP integration — regression guard for the "tool-less desktop chat" bug.
3
+ *
4
+ * The desktop agent (desktop-app/src-tauri/src/mind.rs → apps/provider
5
+ * connectMcpIfConfigured) wires tools EXACTLY the way this test does: spawn
6
+ * `node <kaleido-mcp>/dist/index.js` over stdio with RLN_NODE_URL pointing at
7
+ * the user's RGB-Lightning node, then listTools()/execute(). When that wiring
8
+ * breaks, the registry is empty, the model goes "tool-less", and it NARRATES
9
+ * tool calls it can never run ("Could you use the kaleidoswap_get_quote tool?")
10
+ * instead of returning real data — the exact 2026-06 symptom.
11
+ *
12
+ * This drives that chain end-to-end against a REAL running node and asserts the
13
+ * tools both EXIST (not tool-less) and EXECUTE (return live node data). A unit
14
+ * test can't catch this: the bug is in process/env wiring, not pure logic.
15
+ *
16
+ * Auto-skips unless (a) kaleido-mcp/dist is built and (b) an RLN node answers,
17
+ * so it's a no-op in CI and a real check on a dev box with a node up. Run it
18
+ * explicitly against a node with:
19
+ * RLN_NODE_URL=http://localhost:3001 pnpm --filter @kaleidorg/mind test:live
20
+ */
21
+ import { afterAll, beforeAll, describe, expect, it } from 'vitest';
22
+ import { existsSync } from 'node:fs';
23
+ import { dirname, resolve } from 'node:path';
24
+ import { fileURLToPath } from 'node:url';
25
+ import { McpToolSource } from './mcp.js';
26
+
27
+ const here = dirname(fileURLToPath(import.meta.url));
28
+ // $KALEIDO_MCP_PATH override (what mind.rs sets), else the sibling repo's build.
29
+ const MCP_ENTRY =
30
+ process.env.KALEIDO_MCP_PATH ??
31
+ resolve(here, '../../../../../kaleido-mcp/dist/index.js');
32
+ const NODE_URL = (process.env.RLN_NODE_URL ?? 'http://localhost:3001').replace(/\/+$/, '');
33
+
34
+ /** Probe the RLN node directly so we can (a) gate the suite and (b) compare the
35
+ * MCP tool's output to ground truth pulled straight from the node. */
36
+ async function fetchNodePubkey(): Promise<string | null> {
37
+ try {
38
+ const r = await fetch(`${NODE_URL}/nodeinfo`, { signal: AbortSignal.timeout(4000) });
39
+ if (!r.ok) return null;
40
+ const j = (await r.json()) as { pubkey?: string };
41
+ return typeof j.pubkey === 'string' && j.pubkey.length > 0 ? j.pubkey : null;
42
+ } catch {
43
+ return null;
44
+ }
45
+ }
46
+
47
+ const hasDist = existsSync(MCP_ENTRY);
48
+ const livePubkey = hasDist ? await fetchNodePubkey() : null;
49
+ const RUN = hasDist && !!livePubkey;
50
+
51
+ if (!RUN) {
52
+ const why = !hasDist ? `no built MCP at ${MCP_ENTRY}` : `no RLN node at ${NODE_URL}`;
53
+ // eslint-disable-next-line no-console
54
+ console.warn(`[mcp.live] skipping live MCP integration — ${why}`);
55
+ }
56
+
57
+ describe.skipIf(!RUN)('MCP live integration (real RLN node)', () => {
58
+ let src: McpToolSource;
59
+
60
+ beforeAll(async () => {
61
+ src = new McpToolSource({
62
+ id: 'kaleido-test',
63
+ transport: {
64
+ kind: 'stdio',
65
+ command: 'node',
66
+ args: [MCP_ENTRY],
67
+ // Mirror the provider: inherit env, force the node URL, allow no WDK seed
68
+ // (rln_*/kaleidoswap_* register regardless; only spark_*/wdk_* need it).
69
+ env: {
70
+ ...process.env,
71
+ RLN_NODE_URL: NODE_URL,
72
+ WDK_SEED: process.env.WDK_SEED ?? '',
73
+ } as Record<string, string>,
74
+ },
75
+ timeoutMs: 30_000,
76
+ });
77
+ await src.connect();
78
+ }, 45_000);
79
+
80
+ afterAll(async () => {
81
+ await src?.close();
82
+ });
83
+
84
+ it('exposes a non-empty tool registry (the model is NOT tool-less)', () => {
85
+ const tools = src.listTools();
86
+ expect(tools.length).toBeGreaterThan(0);
87
+ // The exact tools the agent narrated when it couldn't call them.
88
+ expect(src.has('rln_get_node_info')).toBe(true);
89
+ expect(src.has('rln_get_balances')).toBe(true);
90
+ expect(src.has('kaleidoswap_get_quote')).toBe(true);
91
+ });
92
+
93
+ it('preserves the confirmation gate on known spend tools', () => {
94
+ const spend = src.listTools().find((tool) => tool.name === 'rln_pay_invoice');
95
+ if (spend) expect(spend.requiresConfirmation).toBe(true);
96
+ });
97
+
98
+ it('rln_get_node_info EXECUTES against the node (returns the live pubkey)', async () => {
99
+ const out = await src.execute('rln_get_node_info', {});
100
+ const text = typeof out === 'string' ? out : JSON.stringify(out);
101
+ // Real execution returns the node's actual identity — not a narrated promise.
102
+ expect(text).toContain(livePubkey!);
103
+ }, 30_000);
104
+
105
+ it('rln_get_balances EXECUTES against the node (returns live balance fields)', async () => {
106
+ const out = await src.execute('rln_get_balances', {});
107
+ const text = typeof out === 'string' ? out : JSON.stringify(out);
108
+ const parsed = JSON.parse(text) as {
109
+ lightning_balance_sat?: number;
110
+ btc_onchain?: Record<string, number>;
111
+ };
112
+ expect(parsed).toHaveProperty('lightning_balance_sat');
113
+ expect(typeof parsed.lightning_balance_sat).toBe('number');
114
+ expect(parsed).toHaveProperty('btc_onchain');
115
+ }, 30_000);
116
+ });
@@ -0,0 +1,37 @@
1
+ /** parseMcpResult — JSON parsing + isError handling for MCP tool results. */
2
+
3
+ import { describe, it, expect } from 'vitest';
4
+ import { parseMcpResult } from './mcp.js';
5
+
6
+ describe('parseMcpResult', () => {
7
+ it('parses JSON text content into an object (so recipes thread real fields)', () => {
8
+ const res = { content: [{ type: 'text', text: '{"rfq_id":"abc","total_sat":1500}' }] };
9
+ expect(parseMcpResult(res)).toEqual({ rfq_id: 'abc', total_sat: 1500 });
10
+ });
11
+
12
+ it('surfaces isError as an {error} object (so a failed spend is not "success")', () => {
13
+ const res = { isError: true, content: [{ type: 'text', text: 'insufficient funds' }] };
14
+ expect(parseMcpResult(res)).toEqual({ error: 'insufficient funds' });
15
+ });
16
+
17
+ it('errors with no text still produce an {error} object', () => {
18
+ expect(parseMcpResult({ isError: true, content: [] })).toEqual({
19
+ error: 'The tool reported an error.',
20
+ });
21
+ });
22
+
23
+ it('passes non-JSON prose through unchanged', () => {
24
+ const res = { content: [{ type: 'text', text: 'Bitcoin is digital cash.' }] };
25
+ expect(parseMcpResult(res)).toBe('Bitcoin is digital cash.');
26
+ });
27
+
28
+ it('returns the content array when there is no text block', () => {
29
+ const res = { content: [{ type: 'image', data: 'x' }] };
30
+ expect(parseMcpResult(res)).toEqual([{ type: 'image', data: 'x' }]);
31
+ });
32
+
33
+ it('joins multiple text blocks before parsing', () => {
34
+ const res = { content: [{ type: 'text', text: '{"a":1,' }, { type: 'text', text: '"b":2}' }] };
35
+ expect(parseMcpResult(res)).toEqual({ a: 1, b: 2 });
36
+ });
37
+ });