npm - @kaleidorg/mind - Versions diffs - 0.5.0 → 0.6.0 - Mend

@kaleidorg/mind 0.5.0 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (109) hide show

package/dist/autonomy/index.d.ts +21 -0
package/dist/autonomy/index.d.ts.map +1 -0
package/dist/autonomy/index.js +16 -0
package/dist/autonomy/index.js.map +1 -0
package/dist/autonomy/prompt.d.ts +21 -0
package/dist/autonomy/prompt.d.ts.map +1 -0
package/dist/autonomy/prompt.js +37 -0
package/dist/autonomy/prompt.js.map +1 -0
package/dist/autonomy/risk.d.ts +53 -0
package/dist/autonomy/risk.d.ts.map +1 -0
package/dist/autonomy/risk.js +74 -0
package/dist/autonomy/risk.js.map +1 -0
package/dist/autonomy/run-state.d.ts +39 -0
package/dist/autonomy/run-state.d.ts.map +1 -0
package/dist/autonomy/run-state.js +118 -0
package/dist/autonomy/run-state.js.map +1 -0
package/dist/autonomy/scheduler.d.ts +18 -0
package/dist/autonomy/scheduler.d.ts.map +1 -0
package/dist/autonomy/scheduler.js +113 -0
package/dist/autonomy/scheduler.js.map +1 -0
package/dist/autonomy/task-store.d.ts +44 -0
package/dist/autonomy/task-store.d.ts.map +1 -0
package/dist/autonomy/task-store.js +139 -0
package/dist/autonomy/task-store.js.map +1 -0
package/dist/autonomy/types.d.ts +164 -0
package/dist/autonomy/types.d.ts.map +1 -0
package/dist/autonomy/types.js +20 -0
package/dist/autonomy/types.js.map +1 -0
package/dist/funnel.d.ts.map +1 -1
package/dist/funnel.js +12 -0
package/dist/funnel.js.map +1 -1
package/dist/index.d.ts +2 -0
package/dist/index.d.ts.map +1 -1
package/dist/index.js +4 -0
package/dist/index.js.map +1 -1
package/dist/knowledge/bitcoin-copilot.js +2 -2
package/dist/knowledge/bitcoin-copilot.js.map +1 -1
package/dist/qvac/index.d.ts +1 -1
package/dist/qvac/index.d.ts.map +1 -1
package/dist/qvac/index.js.map +1 -1
package/dist/qvac/parse.d.ts +33 -0
package/dist/qvac/parse.d.ts.map +1 -1
package/dist/qvac/parse.js +69 -5
package/dist/qvac/parse.js.map +1 -1
package/dist/qvac/provider.d.ts +16 -0
package/dist/qvac/provider.d.ts.map +1 -1
package/dist/qvac/provider.js +17 -1
package/dist/qvac/provider.js.map +1 -1
package/dist/qvac/stream.d.ts +16 -0
package/dist/qvac/stream.d.ts.map +1 -1
package/dist/qvac/stream.js +21 -1
package/dist/qvac/stream.js.map +1 -1
package/dist/qvac/text.d.ts.map +1 -1
package/dist/qvac/text.js +4 -0
package/dist/qvac/text.js.map +1 -1
package/dist/recipe/buy-asset-channel.d.ts +1 -1
package/dist/recipe/buy-asset-channel.d.ts.map +1 -1
package/dist/recipe/buy-asset-channel.js +4 -3
package/dist/recipe/buy-asset-channel.js.map +1 -1
package/dist/recipe/kaleidoswap-atomic.d.ts +1 -1
package/dist/recipe/kaleidoswap-atomic.d.ts.map +1 -1
package/dist/recipe/kaleidoswap-atomic.js +5 -4
package/dist/recipe/kaleidoswap-atomic.js.map +1 -1
package/dist/recipe/runner.d.ts.map +1 -1
package/dist/recipe/runner.js +38 -0
package/dist/recipe/runner.js.map +1 -1
package/dist/tools/mcp.d.ts +19 -0
package/dist/tools/mcp.d.ts.map +1 -1
package/dist/tools/mcp.js +51 -9
package/dist/tools/mcp.js.map +1 -1
package/package.json +2 -1
package/skills/channel-manager/SKILL.md +59 -0
package/skills/dca/SKILL.md +48 -0
package/skills/kaleido-lsps/SKILL.md +12 -12
package/skills/kaleido-trading/SKILL.md +1 -1
package/skills/liquidity-optimizer/SKILL.md +91 -0
package/skills/merchant-finder/SKILL.md +1 -1
package/skills/portfolio-manager/SKILL.md +67 -0
package/skills/rgb-lightning-node/SKILL.md +3 -3
package/skills/wallet-assistant/SKILL.md +1 -1
package/src/autonomy/autonomy.test.ts +348 -0
package/src/autonomy/index.ts +50 -0
package/src/autonomy/prompt.ts +48 -0
package/src/autonomy/risk.ts +139 -0
package/src/autonomy/run-state.ts +144 -0
package/src/autonomy/scheduler.ts +120 -0
package/src/autonomy/task-store.ts +167 -0
package/src/autonomy/types.ts +186 -0
package/src/funnel.mind.test.ts +390 -0
package/src/funnel.ts +14 -0
package/src/index.ts +41 -0
package/src/knowledge/bitcoin-copilot.ts +2 -2
package/src/qvac/index.ts +1 -0
package/src/qvac/parse.test.ts +70 -1
package/src/qvac/parse.ts +91 -5
package/src/qvac/provider.test.ts +17 -0
package/src/qvac/provider.ts +37 -1
package/src/qvac/stream.test.ts +25 -0
package/src/qvac/stream.ts +38 -1
package/src/qvac/text.ts +4 -0
package/src/recipe/buy-asset-channel.test.ts +5 -0
package/src/recipe/buy-asset-channel.ts +6 -3
package/src/recipe/kaleidoswap-atomic.test.ts +3 -3
package/src/recipe/kaleidoswap-atomic.ts +5 -4
package/src/recipe/recipe.test.ts +16 -0
package/src/recipe/runner.ts +41 -0
package/src/tools/mcp.live.test.ts +116 -0
package/src/tools/mcp.parse.test.ts +37 -0
package/src/tools/mcp.ts +55 -9

package/src/qvac/parse.ts CHANGED Viewed

@@ -6,6 +6,21 @@
  */
 import { cleanAssistantVisibleText } from './text.js';
+/**
+ * Per-turn inference stats from a QVAC `completion().final.stats` frame. The
+ * authoritative source for which backend actually ran (`backendDevice`) and the
+ * real throughput — hosts surface these instead of guessing from load config.
+ */
+export interface QvacTurnStats {
+  /** The backend that actually executed this turn — the real "is GPU active". */
+  backendDevice?: 'cpu' | 'gpu';
+  tokensPerSecond?: number;
+  totalTokens?: number;
+  promptTokens?: number;
+  contextSize?: number;
+  totalTime?: number;
+}
 /** Structural subset of a QVAC `completion().final` we depend on. */
 export interface QvacFinalLike {
   /** Visible assistant text (excludes `<think>` reasoning). */
@@ -20,6 +35,8 @@ export interface QvacFinalLike {
    * it so the funnel can tell a truncated tool-call from a complete one.
    */
   stopReason?: 'length' | 'cancelled' | string;
+  /** Inference stats (backend device, throughput). Present on a natural finish. */
+  stats?: QvacTurnStats;
 }
 export interface ParsedTurn {
@@ -33,25 +50,94 @@ export interface ParsedTurn {
   truncated: boolean;
   /** Raw stop reason from the SDK, when provided. */
   stopReason?: string;
+  /** Inference stats for this turn (backend device, throughput), when provided. */
+  stats?: QvacTurnStats;
+}
+/** Parse the first balanced `{…}` from a string as a `{name, arguments}` call. */
+function parseCallObject(
+  s: string,
+): { name: string; arguments: Record<string, unknown> } | null {
+  const start = s.indexOf('{');
+  if (start < 0) return null;
+  let depth = 0;
+  for (let i = start; i < s.length; i++) {
+    const ch = s[i];
+    if (ch === '{') depth++;
+    else if (ch === '}' && --depth === 0) {
+      try {
+        const obj = JSON.parse(s.slice(start, i + 1)) as {
+          name?: unknown;
+          arguments?: unknown;
+        };
+        if (obj && typeof obj.name === 'string') {
+          const args =
+            obj.arguments && typeof obj.arguments === 'object'
+              ? (obj.arguments as Record<string, unknown>)
+              : {};
+          return { name: obj.name, arguments: args };
+        }
+      } catch {
+        /* malformed JSON — give up on this fragment */
+      }
+      return null;
+    }
+  }
+  return null;
+}
+/**
+ * Recover tool calls a model emitted as PLAIN TEXT instead of structured frames
+ * — `<tool_call>{"name":…,"arguments":…}</tool_call>` (Qwen/Hermes) or a bare
+ * leading `{"name":…,"arguments":…}`. Small local models (and SDK builds that
+ * don't apply the tool grammar) do this; without recovery the call leaks into
+ * the visible answer and never runs.
+ */
+export function extractTextToolCalls(
+  text: string,
+): Array<{ name: string; arguments: Record<string, unknown> }> {
+  const calls: Array<{ name: string; arguments: Record<string, unknown> }> = [];
+  for (const m of text.matchAll(/<tool_call\b[^>]*>([\s\S]*?)<\/tool_call>/gi)) {
+    const c = parseCallObject(m[1] ?? '');
+    if (c) calls.push(c);
+  }
+  if (calls.length) return calls;
+  // No tags — accept a bare tool-call object only at the very start of the
+  // text (so we don't misread JSON the model is merely talking about).
+  if (/^\s*\{?\s*"name"\s*:/i.test(text)) {
+    const c = parseCallObject(text);
+    if (c) calls.push(c);
+  }
+  return calls;
 }
 /**
  * Map a completion `final` (plus the streamed fallback text) into a ParsedTurn.
  * `rawContent` prefers the SDK's framed `raw.fullText` so the Engine can anchor
  * the next turn; falls back to the visible text when a provider has no raw form.
+ *
+ * When the SDK reports no structured tool calls, we re-scan the raw text for
+ * tool calls the model emitted inline (see `extractTextToolCalls`) so they still
+ * execute instead of leaking into the chat.
  */
 export function finalToTurn(final: QvacFinalLike, streamed = ''): ParsedTurn {
   const rawText = final.contentText || streamed;
   const text = cleanAssistantVisibleText(rawText);
+  let toolCalls = (final.toolCalls ?? []).map((c) => ({
+    id: c.id,
+    name: c.name,
+    arguments: c.arguments ?? {},
+  }));
+  if (toolCalls.length === 0) {
+    const recovered = extractTextToolCalls(final.raw?.fullText ?? rawText);
+    if (recovered.length) toolCalls = recovered.map((c) => ({ id: undefined, ...c }));
+  }
   return {
     text,
     rawContent: final.raw?.fullText ?? rawText,
-    toolCalls: (final.toolCalls ?? []).map((c) => ({
-      id: c.id,
-      name: c.name,
-      arguments: c.arguments ?? {},
-    })),
+    toolCalls,
     truncated: final.stopReason === 'length',
     stopReason: final.stopReason,
+    stats: final.stats,
   };
 }

package/src/qvac/provider.test.ts CHANGED Viewed

@@ -84,6 +84,23 @@ describe('createQvacProvider.runTurn', () => {
     expect(calls[0].generationParams).toBeUndefined();
   });
+  it('caps thinking by tokens — cancels the run and returns a fallback', async () => {
+    const cancel = vi.fn(async () => {});
+    const { fn } = fakeCompletion(
+      { contentText: '', toolCalls: [], raw: { fullText: '' }, stopReason: 'cancelled' },
+      [{ type: 'thinkingDelta', text: 'z'.repeat(40) }], // ~10 tokens, budget 4
+    );
+    const p = createQvacProvider({
+      completion: fn as any,
+      cancel: cancel as any,
+      getModelId: () => 'm1',
+      maxThinkingTokens: 4,
+    });
+    const out = await p.runTurn({ messages: [{ role: 'user', content: 'think hard' }], tools: [] });
+    expect(cancel).toHaveBeenCalledWith({ requestId: 'req-1' });
+    expect(out.text).toMatch(/thinking budget/i);
+  });
   it('streams visible content tokens to onToken', async () => {
     const { fn } = fakeCompletion(
       { contentText: 'Hi there', toolCalls: [], raw: { fullText: 'Hi there' } },

package/src/qvac/provider.ts CHANGED Viewed

@@ -18,6 +18,7 @@
  */
 import type * as QvacSdk from '@qvac/sdk';
 import type { LLMProvider, TurnInput, TurnOutput } from '../providers/types.js';
+import type { QvacTurnStats } from './parse.js';
 import { consumeRun } from './stream.js';
 type CompletionFn = typeof QvacSdk.completion;
@@ -38,17 +39,37 @@ export interface QvacProviderOptions {
   defaultTemperature?: number;
   /** Default max output tokens — caps a turn so it can't ramble. Omit for uncapped. */
   defaultMaxTokens?: number;
+  /**
+   * Cap `<think>` reasoning at this many TOKENS (not seconds — tok/s varies, and
+   * the SDK has no numeric reasoning budget). When a turn's thinking exceeds it,
+   * the run is cancelled and a short fallback is returned instead of hanging on
+   * "Thinking…". Omit for unlimited reasoning.
+   */
+  maxThinkingTokens?: number;
   /** Stream the model's `<think>` reasoning, when a host wants to surface it. */
   onThinking?: (token: string) => void;
+  /**
+   * Per-turn inference stats (real backend device + throughput), when a host
+   * wants to surface them. Fires once per turn after the `final` frame resolves.
+   */
+  onStats?: (stats: QvacTurnStats) => void;
 }
 /** TurnInput plus the per-call knobs the funnel/voice paths pass through. */
 export interface QvacTurnInput extends TurnInput {
   temperature?: number;
   maxTokens?: number;
+  /** Per-turn override of the thinking-token cap (see QvacProviderOptions). */
+  maxThinkingTokens?: number;
   onThinking?: (token: string) => void;
+  onStats?: (stats: QvacTurnStats) => void;
 }
+/** Shown when a turn is cut off because it blew its thinking-token budget. */
+const THINKING_BUDGET_FALLBACK =
+  'I spent my whole thinking budget on that one without landing an answer. ' +
+  'Try asking again, more specifically.';
 export function createQvacProvider(options: QvacProviderOptions): LLMProvider {
   return {
     name: 'qvac',
@@ -98,13 +119,28 @@ export function createQvacProvider(options: QvacProviderOptions): LLMProvider {
         ...(tools ? { tools } : {}),
       } as unknown as Parameters<CompletionFn>[0]);
+      const maxThinkingTokens = input.maxThinkingTokens ?? options.maxThinkingTokens;
       const result = await consumeRun(run, {
         onToken: input.onToken,
         onThinking: input.onThinking ?? options.onThinking,
+        maxThinkingTokens,
+        // Cancel the in-flight run the moment the thinking budget is blown — the
+        // SDK keeps generating otherwise. Fire-and-forget; `final` then resolves.
+        onThinkingBudgetExceeded: () => {
+          void options.cancel({ requestId: run.requestId }).catch(() => {});
+        },
       });
+      // Surface the real per-turn inference stats (backend device + throughput).
+      if (result.stats) (input.onStats ?? options.onStats)?.(result.stats);
+      // A turn cut off mid-reasoning has no visible answer — return a short note
+      // instead of an empty bubble so the agentic loop ends cleanly.
+      const text =
+        result.text || (result.thinkingBudgetExceeded ? THINKING_BUDGET_FALLBACK : result.text);
       return {
-        text: result.text,
+        text,
         rawContent: result.rawContent,
         toolCalls: result.toolCalls,
         requestId: result.requestId,

package/src/qvac/stream.test.ts CHANGED Viewed

@@ -67,6 +67,31 @@ describe('consumeRun', () => {
     expect(out.truncated).toBe(true);
   });
+  it('stops forwarding and flags when thinking exceeds maxThinkingTokens', async () => {
+    const thinking: string[] = [];
+    let exceeded = 0;
+    // 8-char deltas ≈ 2 tokens each; budget 4 tokens trips after the 2nd.
+    const run = fakeRun(
+      [
+        { type: 'thinkingDelta', text: 'aaaaaaaa' },
+        { type: 'thinkingDelta', text: 'bbbbbbbb' },
+        { type: 'thinkingDelta', text: 'cccccccc' },
+        { type: 'contentDelta', text: 'should-not-arrive' },
+      ],
+      { contentText: '', toolCalls: [], raw: { fullText: '' }, stopReason: 'cancelled' },
+    );
+    const out = await consumeRun(run, {
+      onThinking: (t) => thinking.push(t),
+      maxThinkingTokens: 4,
+      onThinkingBudgetExceeded: () => {
+        exceeded += 1;
+      },
+    });
+    expect(exceeded).toBe(1);
+    expect(out.thinkingBudgetExceeded).toBe(true);
+    expect(thinking).toEqual(['aaaaaaaa', 'bbbbbbbb']); // stopped at the trip
+  });
   it('ignores delta events with no text', async () => {
     const tokens: string[] = [];
     const run = fakeRun(

package/src/qvac/stream.ts CHANGED Viewed

@@ -27,10 +27,31 @@ export interface StreamHandlers {
   onToken?: (token: string) => void;
   /** The model's `<think>` reasoning, streamed separately. */
   onThinking?: (token: string) => void;
+  /**
+   * Cap the `<think>` reasoning at this many tokens. The cap is on TOKENS, not
+   * wall-clock seconds — tok/s varies by model and hardware, so a time budget is
+   * unreliable; the SDK has no numeric reasoning budget (`reasoning_budget` is
+   * only on/off), so we count thinking tokens and stop the run once they exceed
+   * this. Omit for unlimited reasoning.
+   */
+  maxThinkingTokens?: number;
+  /**
+   * Fires once, the moment the thinking budget is exceeded, so the host can
+   * cancel the in-flight run (the SDK keeps generating otherwise). consumeRun
+   * stops forwarding deltas after this.
+   */
+  onThinkingBudgetExceeded?: () => void;
 }
 export interface ConsumedTurn extends ParsedTurn {
   requestId: string;
+  /** True when the run was stopped because `<think>` hit `maxThinkingTokens`. */
+  thinkingBudgetExceeded?: boolean;
+}
+/** Rough token estimate (~4 chars/token) — same heuristic the context budget uses. */
+function approxTokens(chars: number): number {
+  return Math.ceil(chars / 4);
 }
 /**
@@ -43,14 +64,30 @@ export async function consumeRun(
   handlers: StreamHandlers = {},
 ): Promise<ConsumedTurn> {
   let streamed = '';
+  let thinkingChars = 0;
+  let budgetExceeded = false;
   for await (const event of run.events) {
     if (event.type === 'contentDelta' && typeof event.text === 'string') {
       streamed += event.text;
       handlers.onToken?.(event.text);
     } else if (event.type === 'thinkingDelta' && typeof event.text === 'string') {
       handlers.onThinking?.(event.text);
+      if (handlers.maxThinkingTokens !== undefined && !budgetExceeded) {
+        thinkingChars += event.text.length;
+        if (approxTokens(thinkingChars) >= handlers.maxThinkingTokens) {
+          budgetExceeded = true;
+          handlers.onThinkingBudgetExceeded?.();
+          // Stop forwarding; the host cancels the run, so `final` resolves
+          // (stopReason 'cancelled') with whatever was produced so far.
+          break;
+        }
+      }
     }
   }
   const final = await run.final;
-  return { ...finalToTurn(final, streamed), requestId: run.requestId };
+  return {
+    ...finalToTurn(final, streamed),
+    requestId: run.requestId,
+    thinkingBudgetExceeded: budgetExceeded,
+  };
 }

package/src/qvac/text.ts CHANGED Viewed

@@ -15,6 +15,10 @@ export function cleanAssistantVisibleText(text: string): string {
     // Qwen-style reasoning sometimes arrives in contentText. Never show/speak it.
     .replace(/<think\b[\s\S]*?<\/think>/gi, ' ')
     .replace(/<think\b[\s\S]*$/gi, ' ')
+    // Tool calls some models emit as text (<tool_call>{…}</tool_call>) are
+    // extracted + executed by the Engine (see parse.ts); never show the tags.
+    .replace(/<tool_call\b[^>]*>[\s\S]*?<\/tool_call>/gi, ' ')
+    .replace(/<tool_call\b[^>]*>[\s\S]*$/gi, ' ')
     .replace(/\s+/g, ' ')
     .trim();

package/src/recipe/buy-asset-channel.test.ts CHANGED Viewed

@@ -54,6 +54,11 @@ describe('extractBuyAsset (deterministic Tier-0)', () => {
   it('handles comma grouping in the amount', () => {
     expect(extractBuyAsset('buy 1,000 usdt')).toEqual({ asset: 'USDT', asset_amount: 1000 });
   });
+  it('parses an article/filler between the verb and amount ("buy a 100 usdt channel")', () => {
+    expect(extractBuyAsset('buy a 100 usdt channel')).toEqual({ asset: 'USDT', asset_amount: 100 });
+    expect(extractBuyAsset('get a 100 usdt inbound channel')).toEqual({ asset: 'USDT', asset_amount: 100 });
+    expect(extractBuyAsset('buy and sell 100 usdt')).toBeNull(); // "and" is not filler
+  });
   it('null for a swap (a named source asset ⇒ swap owns it)', () => {
     expect(extractBuyAsset('buy 0.001 btc with usdt')).toBeNull();
     expect(extractBuyAsset('swap 10 usdt for btc')).toBeNull();

package/src/recipe/buy-asset-channel.ts CHANGED Viewed

@@ -48,13 +48,16 @@ const num = (s?: string): number | undefined => {
 /** Thousands separators, locale-independent (deterministic for tests). */
 const commas = (n: number): string => String(n).replace(/\B(?=(\d{3})+(?!\d))/g, ',');
-/** "buy 100 usdt" / "get me 50 xaut" / "i want 200 usdt" / "purchase 10 xaut". */
+/** "buy 100 usdt" / "get me 50 xaut" / "buy a 100 usdt channel" / "purchase 10 xaut". */
 export function extractBuyAsset(text: string): Record<string, unknown> | null {
   const t = text.trim();
   if (NOT_BUY.test(t) || HAS_SOURCE.test(t)) return null;
   if (!RGB_ASSET.test(t)) return null;
-  // buy/get/want/acquire/purchase [me] <amount> <asset>
-  const m = t.match(/\b(?:buy|get|acquire|want|purchase|onboard|need)\b(?:\s+me)?\s+([\d.,]+)\s*([a-z]+)/i);
+  // buy/get/want/acquire/purchase [me|a|an|some|new]* <amount> <asset>
+  // Filler words (the article in "buy A 100 usdt channel") must not break extraction.
+  const m = t.match(
+    /\b(?:buy|get|acquire|want|purchase|onboard|need)\b(?:\s+(?:me|a|an|some|new)\b)*\s+([\d.,]+)\s*([a-z]+)/i,
+  );
   if (!m) return null;
   const asset = normAsset(m[2]);
   const amount = num(m[1]);

package/src/recipe/kaleidoswap-atomic.test.ts CHANGED Viewed

@@ -44,7 +44,7 @@ function buildStubs(captured: { name: string; args: any }[]) {
     ]),
     new InProcessToolSource('rln', [
       tool('rln_get_node_info', { pubkey: '03c31dae' }),
-      tool('rln_whitelist_swap', { ok: true }, /* spend */ true),
+      tool('rln_atomic_taker', { ok: true }, /* spend */ true),
     ]),
   ]);
 }
@@ -130,7 +130,7 @@ describe('kaleidoswapAtomicRecipe — full chain', () => {
       'kaleidoswap_get_quote',
       'kaleidoswap_atomic_init',
       'rln_get_node_info',
-      'rln_whitelist_swap',
+      'rln_atomic_taker',
       'kaleidoswap_atomic_execute',
     ]);
   });
@@ -157,7 +157,7 @@ describe('kaleidoswapAtomicRecipe — full chain', () => {
       provider: refusingProvider, tools, onConfirm: async () => ({ approved: true }),
       slots: { from_asset: 'USDT', to_asset: 'BTC', amount: 10, amount_side: 'from' },
     });
-    const whitelist = captured.find((c) => c.name === 'rln_whitelist_swap')!;
+    const whitelist = captured.find((c) => c.name === 'rln_atomic_taker')!;
     expect(whitelist.args).toEqual({ swapstring: 'SWAP/abc/def' });
     const exe = captured.find((c) => c.name === 'kaleidoswap_atomic_execute')!;
     expect(exe.args).toEqual({

package/src/recipe/kaleidoswap-atomic.ts CHANGED Viewed

@@ -12,7 +12,7 @@
  *     ↓ [ONE confirmation gate — shows the real quote numbers]
  *   kaleidoswap_atomic_init      ← MAKER  locks the swap → swapstring, payment_hash
  *   rln_get_node_info            ← NODE   read pubkey (= taker_pubkey)
- *   rln_whitelist_swap           ← NODE   accept the swapstring
+ *   rln_atomic_taker             ← NODE   whitelist the swapstring (taker accepts)
  *   kaleidoswap_atomic_execute   ← MAKER  settle (final)
  *
  * `forceModelExtract` ensures the model is always consulted for slot parsing
@@ -122,10 +122,11 @@ export const kaleidoswapAtomicRecipe: Recipe = {
       as: 'node',
       args: () => ({}),
     },
-    // 4. NODE: whitelist the maker's swapstring (accept the swap). Ungated —
-    //    covered by the single confirm above.
+    // 4. NODE: the taker whitelists the maker's swapstring (accept the swap).
+    //    Exposed by kaleido-mcp as `rln_atomic_taker` (calls rln.whitelistSwap).
+    //    Ungated — covered by the single confirm above.
     {
-      tool: 'rln_whitelist_swap',
+      tool: 'rln_atomic_taker',
       as: 'whitelist',
       args: (ctx) => {
         const init = ctx.results.init as InitResult | undefined;

package/src/recipe/recipe.test.ts CHANGED Viewed

@@ -72,6 +72,22 @@ describe('runRecipe — pay a contact', () => {
     expect(sent).toHaveLength(0);
   });
+  it('never reports a failed wallet result as sent', async () => {
+    const tools = new ToolRegistry([new InProcessToolSource('wallet', [
+      { name: 'resolve_contact', description: '', parameters: { type: 'object', properties: {} }, handler: async ({ name }) => ({ name, ln_address: `${name}@kaleidoswap.com` }) },
+      { name: 'fiat_to_sats', description: '', parameters: { type: 'object', properties: {} }, handler: async ({ amount }) => ({ sats: Math.round(Number(amount) * 1000) }) },
+      { name: 'send_payment', description: '', parameters: { type: 'object', properties: {} }, requiresConfirmation: true, handler: async () => ({ success: false, message: 'insufficient balance' }) },
+    ])]);
+    const res = await runRecipe(paymentsRecipe, 'pay bob 3 eur', {
+      provider: approve,
+      tools,
+      onConfirm: async () => ({ approved: true }),
+    });
+    expect(res.status).toBe('error');
+    expect(res.text).toContain('insufficient balance');
+    expect(res.text).not.toContain('Sent');
+  });
   it('falls back to ONE LLM extraction when regex misses', async () => {
     const sent: any[] = [];
     const tools = stubTools({ send: (a) => sent.push(a) });

package/src/recipe/runner.ts CHANGED Viewed

@@ -29,6 +29,43 @@ export interface RunRecipeOptions {
   signal?: AbortSignal;
 }
+function toolFailure(result: unknown): string | null {
+  // A plain-string result (non-JSON MCP text, or a tool that returns prose):
+  // flag obvious error text so a failed action isn't reported as success.
+  if (typeof result === 'string') {
+    const s = result.trim();
+    return /^(error|failed|failure|exception)\b\s*[:\-]?/i.test(s) ? s : null;
+  }
+  if (!result || typeof result !== 'object') return null;
+  const r = result as Record<string, unknown>;
+  if (typeof r.error === 'string' && r.error.trim()) return r.error;
+  if (r.success === false || r.ok === false) {
+    return String(r.message ?? r.reason ?? 'The wallet action failed.');
+  }
+  const status = String(r.status ?? r.state ?? '').toLowerCase();
+  if (['error', 'failed', 'failure', 'rejected'].includes(status)) {
+    return String(r.message ?? r.reason ?? `The wallet returned status "${status}".`);
+  }
+  return null;
+}
+function failedResult(
+  recipe: Recipe,
+  ctx: RecipeContext,
+  inferences: number,
+  message: string,
+): RecipeResult {
+  return {
+    recipe: recipe.name,
+    slots: ctx.slots,
+    results: ctx.results,
+    text: `Couldn't complete that: ${message}`,
+    status: 'error',
+    error: message,
+    inferences,
+  };
+}
 /** Extract the recipe's slots — deterministic regex first, else ONE LLM call. */
 export async function extractSlots(
   provider: LLMProvider,
@@ -187,6 +224,8 @@ export async function runRecipe(recipe: Recipe, text: string, opts: RunRecipeOpt
       const result = await opts.tools.execute(step.tool, args);
       ctx.results[step.as ?? step.tool] = result;
       opts.onStep?.(step.tool, args, result);
+      const failure = toolFailure(result);
+      if (failure) return failedResult(recipe, ctx, inferences, failure);
     }
     // Final action.
@@ -195,6 +234,8 @@ export async function runRecipe(recipe: Recipe, text: string, opts: RunRecipeOpt
     const finalResult = await opts.tools.execute(recipe.final.tool, finalArgs);
     ctx.results[recipe.final.as ?? recipe.final.tool] = finalResult;
     opts.onStep?.(recipe.final.tool, finalArgs, finalResult);
+    const failure = toolFailure(finalResult);
+    if (failure) return failedResult(recipe, ctx, inferences, failure);
     const out = recipe.summary?.(ctx, finalResult) ?? 'Done.';
     return { recipe: recipe.name, slots: ctx.slots, results: ctx.results, final: finalResult, text: out, status: 'done', inferences };

package/src/tools/mcp.live.test.ts ADDED Viewed

@@ -0,0 +1,116 @@
+/**
+ * Live MCP integration — regression guard for the "tool-less desktop chat" bug.
+ *
+ * The desktop agent (desktop-app/src-tauri/src/mind.rs → apps/provider
+ * connectMcpIfConfigured) wires tools EXACTLY the way this test does: spawn
+ * `node <kaleido-mcp>/dist/index.js` over stdio with RLN_NODE_URL pointing at
+ * the user's RGB-Lightning node, then listTools()/execute(). When that wiring
+ * breaks, the registry is empty, the model goes "tool-less", and it NARRATES
+ * tool calls it can never run ("Could you use the kaleidoswap_get_quote tool?")
+ * instead of returning real data — the exact 2026-06 symptom.
+ *
+ * This drives that chain end-to-end against a REAL running node and asserts the
+ * tools both EXIST (not tool-less) and EXECUTE (return live node data). A unit
+ * test can't catch this: the bug is in process/env wiring, not pure logic.
+ *
+ * Auto-skips unless (a) kaleido-mcp/dist is built and (b) an RLN node answers,
+ * so it's a no-op in CI and a real check on a dev box with a node up. Run it
+ * explicitly against a node with:
+ *   RLN_NODE_URL=http://localhost:3001 pnpm --filter @kaleidorg/mind test:live
+ */
+import { afterAll, beforeAll, describe, expect, it } from 'vitest';
+import { existsSync } from 'node:fs';
+import { dirname, resolve } from 'node:path';
+import { fileURLToPath } from 'node:url';
+import { McpToolSource } from './mcp.js';
+const here = dirname(fileURLToPath(import.meta.url));
+// $KALEIDO_MCP_PATH override (what mind.rs sets), else the sibling repo's build.
+const MCP_ENTRY =
+  process.env.KALEIDO_MCP_PATH ??
+  resolve(here, '../../../../../kaleido-mcp/dist/index.js');
+const NODE_URL = (process.env.RLN_NODE_URL ?? 'http://localhost:3001').replace(/\/+$/, '');
+/** Probe the RLN node directly so we can (a) gate the suite and (b) compare the
+ *  MCP tool's output to ground truth pulled straight from the node. */
+async function fetchNodePubkey(): Promise<string | null> {
+  try {
+    const r = await fetch(`${NODE_URL}/nodeinfo`, { signal: AbortSignal.timeout(4000) });
+    if (!r.ok) return null;
+    const j = (await r.json()) as { pubkey?: string };
+    return typeof j.pubkey === 'string' && j.pubkey.length > 0 ? j.pubkey : null;
+  } catch {
+    return null;
+  }
+}
+const hasDist = existsSync(MCP_ENTRY);
+const livePubkey = hasDist ? await fetchNodePubkey() : null;
+const RUN = hasDist && !!livePubkey;
+if (!RUN) {
+  const why = !hasDist ? `no built MCP at ${MCP_ENTRY}` : `no RLN node at ${NODE_URL}`;
+  // eslint-disable-next-line no-console
+  console.warn(`[mcp.live] skipping live MCP integration — ${why}`);
+}
+describe.skipIf(!RUN)('MCP live integration (real RLN node)', () => {
+  let src: McpToolSource;
+  beforeAll(async () => {
+    src = new McpToolSource({
+      id: 'kaleido-test',
+      transport: {
+        kind: 'stdio',
+        command: 'node',
+        args: [MCP_ENTRY],
+        // Mirror the provider: inherit env, force the node URL, allow no WDK seed
+        // (rln_*/kaleidoswap_* register regardless; only spark_*/wdk_* need it).
+        env: {
+          ...process.env,
+          RLN_NODE_URL: NODE_URL,
+          WDK_SEED: process.env.WDK_SEED ?? '',
+        } as Record<string, string>,
+      },
+      timeoutMs: 30_000,
+    });
+    await src.connect();
+  }, 45_000);
+  afterAll(async () => {
+    await src?.close();
+  });
+  it('exposes a non-empty tool registry (the model is NOT tool-less)', () => {
+    const tools = src.listTools();
+    expect(tools.length).toBeGreaterThan(0);
+    // The exact tools the agent narrated when it couldn't call them.
+    expect(src.has('rln_get_node_info')).toBe(true);
+    expect(src.has('rln_get_balances')).toBe(true);
+    expect(src.has('kaleidoswap_get_quote')).toBe(true);
+  });
+  it('preserves the confirmation gate on known spend tools', () => {
+    const spend = src.listTools().find((tool) => tool.name === 'rln_pay_invoice');
+    if (spend) expect(spend.requiresConfirmation).toBe(true);
+  });
+  it('rln_get_node_info EXECUTES against the node (returns the live pubkey)', async () => {
+    const out = await src.execute('rln_get_node_info', {});
+    const text = typeof out === 'string' ? out : JSON.stringify(out);
+    // Real execution returns the node's actual identity — not a narrated promise.
+    expect(text).toContain(livePubkey!);
+  }, 30_000);
+  it('rln_get_balances EXECUTES against the node (returns live balance fields)', async () => {
+    const out = await src.execute('rln_get_balances', {});
+    const text = typeof out === 'string' ? out : JSON.stringify(out);
+    const parsed = JSON.parse(text) as {
+      lightning_balance_sat?: number;
+      btc_onchain?: Record<string, number>;
+    };
+    expect(parsed).toHaveProperty('lightning_balance_sat');
+    expect(typeof parsed.lightning_balance_sat).toBe('number');
+    expect(parsed).toHaveProperty('btc_onchain');
+  }, 30_000);
+});

package/src/tools/mcp.parse.test.ts ADDED Viewed

@@ -0,0 +1,37 @@
+/** parseMcpResult — JSON parsing + isError handling for MCP tool results. */
+import { describe, it, expect } from 'vitest';
+import { parseMcpResult } from './mcp.js';
+describe('parseMcpResult', () => {
+  it('parses JSON text content into an object (so recipes thread real fields)', () => {
+    const res = { content: [{ type: 'text', text: '{"rfq_id":"abc","total_sat":1500}' }] };
+    expect(parseMcpResult(res)).toEqual({ rfq_id: 'abc', total_sat: 1500 });
+  });
+  it('surfaces isError as an {error} object (so a failed spend is not "success")', () => {
+    const res = { isError: true, content: [{ type: 'text', text: 'insufficient funds' }] };
+    expect(parseMcpResult(res)).toEqual({ error: 'insufficient funds' });
+  });
+  it('errors with no text still produce an {error} object', () => {
+    expect(parseMcpResult({ isError: true, content: [] })).toEqual({
+      error: 'The tool reported an error.',
+    });
+  });
+  it('passes non-JSON prose through unchanged', () => {
+    const res = { content: [{ type: 'text', text: 'Bitcoin is digital cash.' }] };
+    expect(parseMcpResult(res)).toBe('Bitcoin is digital cash.');
+  });
+  it('returns the content array when there is no text block', () => {
+    const res = { content: [{ type: 'image', data: 'x' }] };
+    expect(parseMcpResult(res)).toEqual([{ type: 'image', data: 'x' }]);
+  });
+  it('joins multiple text blocks before parsing', () => {
+    const res = { content: [{ type: 'text', text: '{"a":1,' }, { type: 'text', text: '"b":2}' }] };
+    expect(parseMcpResult(res)).toEqual({ a: 1, b: 2 });
+  });
+});