@kaleidorg/mind 0.6.0 → 0.6.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (108) hide show
  1. package/dist/bitrefill/contract.d.ts +60 -0
  2. package/dist/bitrefill/contract.d.ts.map +1 -0
  3. package/dist/bitrefill/contract.js +119 -0
  4. package/dist/bitrefill/contract.js.map +1 -0
  5. package/dist/context/compress.d.ts +65 -0
  6. package/dist/context/compress.d.ts.map +1 -0
  7. package/dist/context/compress.js +181 -0
  8. package/dist/context/compress.js.map +1 -0
  9. package/dist/engine.d.ts +20 -0
  10. package/dist/engine.d.ts.map +1 -1
  11. package/dist/engine.js +23 -4
  12. package/dist/engine.js.map +1 -1
  13. package/dist/evidence.d.ts +62 -0
  14. package/dist/evidence.d.ts.map +1 -0
  15. package/dist/evidence.js +47 -0
  16. package/dist/evidence.js.map +1 -0
  17. package/dist/flashnet/contract.d.ts +56 -0
  18. package/dist/flashnet/contract.d.ts.map +1 -0
  19. package/dist/flashnet/contract.js +100 -0
  20. package/dist/flashnet/contract.js.map +1 -0
  21. package/dist/funnel.d.ts +11 -0
  22. package/dist/funnel.d.ts.map +1 -1
  23. package/dist/funnel.js +50 -7
  24. package/dist/funnel.js.map +1 -1
  25. package/dist/index.d.ts +10 -1
  26. package/dist/index.d.ts.map +1 -1
  27. package/dist/index.js +7 -0
  28. package/dist/index.js.map +1 -1
  29. package/dist/kaleidoswap/contract.js +1 -1
  30. package/dist/kaleidoswap/contract.js.map +1 -1
  31. package/dist/knowledge/bitcoin-copilot.d.ts.map +1 -1
  32. package/dist/knowledge/bitcoin-copilot.js +83 -0
  33. package/dist/knowledge/bitcoin-copilot.js.map +1 -1
  34. package/dist/providers/types.d.ts +17 -0
  35. package/dist/providers/types.d.ts.map +1 -1
  36. package/dist/qvac/provider.d.ts.map +1 -1
  37. package/dist/qvac/provider.js +23 -0
  38. package/dist/qvac/provider.js.map +1 -1
  39. package/dist/qvac/stream.d.ts +6 -0
  40. package/dist/qvac/stream.d.ts.map +1 -1
  41. package/dist/qvac/stream.js +12 -0
  42. package/dist/qvac/stream.js.map +1 -1
  43. package/dist/recipe/flashnet-swap.d.ts +35 -0
  44. package/dist/recipe/flashnet-swap.d.ts.map +1 -0
  45. package/dist/recipe/flashnet-swap.js +239 -0
  46. package/dist/recipe/flashnet-swap.js.map +1 -0
  47. package/dist/recipe/kaleidoswap-atomic.d.ts.map +1 -1
  48. package/dist/recipe/kaleidoswap-atomic.js +37 -16
  49. package/dist/recipe/kaleidoswap-atomic.js.map +1 -1
  50. package/dist/recipe/kaleidoswap-channel-order.d.ts.map +1 -1
  51. package/dist/recipe/kaleidoswap-channel-order.js +31 -10
  52. package/dist/recipe/kaleidoswap-channel-order.js.map +1 -1
  53. package/dist/recipe/kaleidoswap-price.d.ts.map +1 -1
  54. package/dist/recipe/kaleidoswap-price.js +7 -1
  55. package/dist/recipe/kaleidoswap-price.js.map +1 -1
  56. package/dist/recipe/runner.d.ts.map +1 -1
  57. package/dist/recipe/runner.js +5 -3
  58. package/dist/recipe/runner.js.map +1 -1
  59. package/dist/recipe/swap.d.ts.map +1 -1
  60. package/dist/recipe/swap.js +14 -1
  61. package/dist/recipe/swap.js.map +1 -1
  62. package/dist/wallet/confirm.d.ts.map +1 -1
  63. package/dist/wallet/confirm.js +1 -0
  64. package/dist/wallet/confirm.js.map +1 -1
  65. package/dist/wallet/contract.d.ts.map +1 -1
  66. package/dist/wallet/contract.js +20 -4
  67. package/dist/wallet/contract.js.map +1 -1
  68. package/package.json +4 -4
  69. package/skills/bitrefill/SKILL.md +152 -52
  70. package/skills/flashnet-swaps/SKILL.md +158 -0
  71. package/skills/kaleido-lsps/SKILL.md +25 -8
  72. package/skills/kaleido-trading/SKILL.md +36 -12
  73. package/skills/merchant-finder/SKILL.md +1 -1
  74. package/skills/rgb-lightning-node/SKILL.md +35 -8
  75. package/skills/spark-wallet/SKILL.md +235 -0
  76. package/skills/wallet-assistant/SKILL.md +2 -2
  77. package/src/bitrefill/contract.test.ts +89 -0
  78. package/src/bitrefill/contract.ts +190 -0
  79. package/src/context/compress.test.ts +120 -0
  80. package/src/context/compress.ts +230 -0
  81. package/src/engine.test.ts +34 -0
  82. package/src/engine.ts +35 -4
  83. package/src/evidence.test.ts +80 -0
  84. package/src/evidence.ts +114 -0
  85. package/src/flashnet/contract.test.ts +101 -0
  86. package/src/flashnet/contract.ts +164 -0
  87. package/src/funnel.ts +59 -8
  88. package/src/index.ts +51 -1
  89. package/src/kaleidoswap/contract.ts +1 -1
  90. package/src/knowledge/bitcoin-copilot.ts +94 -0
  91. package/src/providers/types.ts +18 -0
  92. package/src/qvac/provider.ts +25 -1
  93. package/src/qvac/stream.test.ts +11 -0
  94. package/src/qvac/stream.ts +16 -0
  95. package/src/recipe/flashnet-swap.test.ts +114 -0
  96. package/src/recipe/flashnet-swap.ts +266 -0
  97. package/src/recipe/kaleidoswap-atomic.test.ts +21 -0
  98. package/src/recipe/kaleidoswap-atomic.ts +34 -16
  99. package/src/recipe/kaleidoswap-channel-order.test.ts +38 -0
  100. package/src/recipe/kaleidoswap-channel-order.ts +27 -9
  101. package/src/recipe/kaleidoswap-price.ts +7 -1
  102. package/src/recipe/recipe.test.ts +5 -0
  103. package/src/recipe/runner.ts +5 -3
  104. package/src/recipe/swap.ts +16 -1
  105. package/src/wallet/confirm.test.ts +8 -0
  106. package/src/wallet/confirm.ts +1 -0
  107. package/src/wallet/contract.test.ts +10 -0
  108. package/src/wallet/contract.ts +26 -4
@@ -0,0 +1,230 @@
1
+ /**
2
+ * Tool-output compression — the "fit more into a tiny window" part.
3
+ *
4
+ * Tool results are the single biggest, most repetitive thing the engine pushes
5
+ * into a small on-device model's context. A merchant search returns 40 near
6
+ * identical rows; a tx history returns hundreds; a swap quote nests config the
7
+ * model never reads. Every round, the *raw* `JSON.stringify(result)` is fed back
8
+ * into history — so on a 2k-window 0.6B model the conversation drowns in JSON
9
+ * the model didn't need, crowding out the system prompt, the skill, and the
10
+ * actual question.
11
+ *
12
+ * `compressToolResult` is a structural crusher (the idea behind Headroom's
13
+ * SmartCrusher/ToolCrusher, reimplemented natively — no dependency, no network,
14
+ * no proxy, so it stays on-device and private). It walks the JSON and:
15
+ *
16
+ * • dedupes identical array items, then keeps the first/last few and replaces
17
+ * the middle with an honest `{ "__elided__": N }` marker,
18
+ * • caps nesting depth (deep config → a one-line summary),
19
+ * • truncates long *prose* strings (logs, descriptions),
20
+ *
21
+ * and never regresses: if crushing doesn't actually save tokens, the original
22
+ * is returned untouched.
23
+ *
24
+ * SAFETY: it never touches numbers, never elides whole objects, never truncates
25
+ * whitespace-free strings (addresses, BOLT11 invoices, txids, pubkeys), and
26
+ * never touches a value under a money/identity key (see DEFAULT_PRESERVE_KEYS).
27
+ * Amounts and recipients reach the model intact — the confirm readback is built
28
+ * deterministically from the resolved call anyway, but this keeps the model's
29
+ * own view honest too. Small results (below `minTokens`) are passed through
30
+ * verbatim so nothing changes for the common case.
31
+ */
32
+
33
+ import { estimateTokens } from './budget.js';
34
+
35
+ /** Keys whose values are never elided or truncated — amounts, ids, recipients. */
36
+ export const DEFAULT_PRESERVE_KEYS: readonly string[] = [
37
+ 'amount',
38
+ 'amount_sat',
39
+ 'amount_sats',
40
+ 'amount_msat',
41
+ 'sat',
42
+ 'sats',
43
+ 'msat',
44
+ 'value',
45
+ 'fee',
46
+ 'fee_sat',
47
+ 'fee_sats',
48
+ 'total',
49
+ 'total_sats',
50
+ 'balance',
51
+ 'balance_sat',
52
+ 'address',
53
+ 'invoice',
54
+ 'bolt11',
55
+ 'payment_request',
56
+ 'payment_hash',
57
+ 'preimage',
58
+ 'txid',
59
+ 'tx_id',
60
+ 'pubkey',
61
+ 'node_id',
62
+ 'recipient',
63
+ 'destination',
64
+ 'asset_id',
65
+ 'contract_id',
66
+ 'rate',
67
+ 'price',
68
+ 'price_usd',
69
+ ];
70
+
71
+ export interface ToolCrushOptions {
72
+ /** Don't compress results estimated below this many tokens. Default 200. */
73
+ minTokens?: number;
74
+ /** Max items kept in any array before the middle is elided. Default 8. */
75
+ maxArrayItems?: number;
76
+ /** Max nesting depth kept verbatim; deeper is summarized. Default 6. */
77
+ maxDepth?: number;
78
+ /** Max chars for a single prose string before truncation (0 disables). Default 600. */
79
+ maxStringLength?: number;
80
+ /** Dedupe identical array items before eliding. Default true. */
81
+ dedupe?: boolean;
82
+ /** Keys whose values are never elided/truncated (defaults to DEFAULT_PRESERVE_KEYS). */
83
+ preserveKeys?: readonly string[];
84
+ }
85
+
86
+ export interface CrushResult {
87
+ /** Serialized, compressed content — ready to push into history. */
88
+ content: string;
89
+ /** Estimated tokens of the original serialization. */
90
+ originalTokens: number;
91
+ /** Estimated tokens after crushing. */
92
+ compressedTokens: number;
93
+ /** Total array items dropped across the whole structure. */
94
+ elided: number;
95
+ /** False when the original was returned untouched (too small, or no win). */
96
+ changed: boolean;
97
+ }
98
+
99
+ interface Resolved {
100
+ minTokens: number;
101
+ maxArrayItems: number;
102
+ maxDepth: number;
103
+ maxStringLength: number;
104
+ dedupe: boolean;
105
+ preserve: Set<string>;
106
+ }
107
+
108
+ /** A string with no whitespace is treated as an identifier (address/invoice/hash) and never truncated. */
109
+ function isIdentifierLike(s: string): boolean {
110
+ return !/\s/.test(s);
111
+ }
112
+
113
+ function serialize(value: unknown): string {
114
+ return typeof value === 'string' ? value : safeStringify(value);
115
+ }
116
+
117
+ function safeStringify(value: unknown): string {
118
+ try {
119
+ return JSON.stringify(value) ?? String(value);
120
+ } catch {
121
+ return String(value);
122
+ }
123
+ }
124
+
125
+ /**
126
+ * Crush a tool result for inclusion in model context. Returns the original,
127
+ * verbatim, when it's small or when crushing wouldn't save tokens.
128
+ */
129
+ export function compressToolResult(value: unknown, opts: ToolCrushOptions = {}): CrushResult {
130
+ const cfg: Resolved = {
131
+ minTokens: opts.minTokens ?? 200,
132
+ maxArrayItems: Math.max(2, opts.maxArrayItems ?? 8),
133
+ maxDepth: Math.max(1, opts.maxDepth ?? 6),
134
+ maxStringLength: opts.maxStringLength ?? 600,
135
+ dedupe: opts.dedupe ?? true,
136
+ preserve: new Set((opts.preserveKeys ?? DEFAULT_PRESERVE_KEYS).map((k) => k.toLowerCase())),
137
+ };
138
+
139
+ const original = serialize(value);
140
+ const originalTokens = estimateTokens(original);
141
+
142
+ // Below the floor, never touch it — correctness over savings for small results.
143
+ if (originalTokens < cfg.minTokens) {
144
+ return { content: original, originalTokens, compressedTokens: originalTokens, elided: 0, changed: false };
145
+ }
146
+
147
+ let elided = 0;
148
+ const crushed = crush(value, cfg, 0, false, () => {
149
+ elided += 1;
150
+ });
151
+ const content = serialize(crushed);
152
+ const compressedTokens = estimateTokens(content);
153
+
154
+ // Never regress: if crushing didn't actually shrink it, keep the original.
155
+ if (compressedTokens >= originalTokens) {
156
+ return { content: original, originalTokens, compressedTokens: originalTokens, elided: 0, changed: false };
157
+ }
158
+
159
+ return { content, originalTokens, compressedTokens, elided, changed: true };
160
+ }
161
+
162
+ /**
163
+ * Recursively crush a JSON-ish value.
164
+ *
165
+ * @param preserved when true, the value sits under a preserve key — kept verbatim.
166
+ * @param onElide called once per array item dropped (for stats).
167
+ */
168
+ function crush(value: unknown, cfg: Resolved, depth: number, preserved: boolean, onElide: () => void): unknown {
169
+ if (value === null || value === undefined) return value;
170
+
171
+ if (typeof value === 'string') {
172
+ if (preserved || isIdentifierLike(value)) return value;
173
+ if (cfg.maxStringLength > 0 && value.length > cfg.maxStringLength) {
174
+ const omitted = value.length - cfg.maxStringLength;
175
+ return `${value.slice(0, cfg.maxStringLength)}… (+${omitted} chars)`;
176
+ }
177
+ return value;
178
+ }
179
+
180
+ if (typeof value !== 'object') return value; // number, boolean — never touched
181
+
182
+ // Beyond max depth, collapse to a one-line shape summary instead of the subtree.
183
+ if (depth >= cfg.maxDepth) {
184
+ if (Array.isArray(value)) return `[array: ${value.length} items]`;
185
+ return `[object: ${Object.keys(value as object).length} keys]`;
186
+ }
187
+
188
+ if (Array.isArray(value)) {
189
+ let items = value;
190
+
191
+ if (cfg.dedupe && items.length > cfg.maxArrayItems) {
192
+ const seen = new Set<string>();
193
+ const unique: unknown[] = [];
194
+ for (const item of items) {
195
+ const key = safeStringify(item);
196
+ if (seen.has(key)) continue;
197
+ seen.add(key);
198
+ unique.push(item);
199
+ }
200
+ items = unique;
201
+ }
202
+
203
+ if (items.length <= cfg.maxArrayItems) {
204
+ return items.map((v) => crush(v, cfg, depth + 1, preserved, onElide));
205
+ }
206
+
207
+ // Keep the front and a little of the tail (Headroom's "anchors"); elide the
208
+ // middle with an honest marker so the model knows data was omitted and can
209
+ // ask a more specific question / call a narrower tool.
210
+ const keepFirst = Math.max(1, Math.ceil(cfg.maxArrayItems * 0.6));
211
+ const keepLast = Math.max(0, cfg.maxArrayItems - keepFirst);
212
+ const head = items.slice(0, keepFirst);
213
+ const tail = keepLast > 0 ? items.slice(items.length - keepLast) : [];
214
+ const droppedCount = items.length - head.length - tail.length;
215
+ for (let i = 0; i < droppedCount; i++) onElide();
216
+
217
+ const out: unknown[] = head.map((v) => crush(v, cfg, depth + 1, preserved, onElide));
218
+ out.push({ __elided__: droppedCount, note: 'items omitted to fit context' });
219
+ for (const v of tail) out.push(crush(v, cfg, depth + 1, preserved, onElide));
220
+ return out;
221
+ }
222
+
223
+ const obj = value as Record<string, unknown>;
224
+ const result: Record<string, unknown> = {};
225
+ for (const [key, v] of Object.entries(obj)) {
226
+ const keep = preserved || cfg.preserve.has(key.toLowerCase());
227
+ result[key] = crush(v, cfg, depth + 1, keep, onElide);
228
+ }
229
+ return result;
230
+ }
@@ -150,6 +150,40 @@ describe('Engine agentic loop', () => {
150
150
  expect(balanceTool.handler).toHaveBeenCalledTimes(3);
151
151
  });
152
152
 
153
+ it('crushes verbose tool output in history but keeps the raw result for callbacks', async () => {
154
+ const bulky = {
155
+ name: 'list_merchants',
156
+ description: 'returns many rows',
157
+ parameters: {},
158
+ handler: vi.fn(async () => ({
159
+ results: Array.from({ length: 50 }, (_, i) => ({
160
+ name: `Shop ${i}`,
161
+ blurb: 'Accepts Bitcoin and Lightning, open daily, friendly staff and good wifi.',
162
+ amount_sats: 1000 + i,
163
+ })),
164
+ })),
165
+ };
166
+ const onToolResult = vi.fn();
167
+ const engine = new Engine({
168
+ provider: scriptedProvider([
169
+ { text: '', toolCalls: [{ name: 'list_merchants', arguments: {} }] },
170
+ { text: 'Found some merchants.' },
171
+ ]),
172
+ tools: new ToolRegistry([new InProcessToolSource('m', [bulky])]),
173
+ compressToolOutput: { maxArrayItems: 6 },
174
+ });
175
+
176
+ const res = await engine.runAgentic([{ role: 'user', content: 'find cafes' }], { onToolResult });
177
+
178
+ // The history frame the model sees is crushed (elision marker present)...
179
+ const toolFrame = res.messages.find((m) => m.role === 'tool');
180
+ expect(toolFrame?.content).toContain('__elided__');
181
+ // ...but amounts survive and the callback/result still carry the full data.
182
+ expect(toolFrame?.content).toContain('amount_sats');
183
+ expect(onToolResult.mock.calls[0][0].result).toEqual(await bulky.handler.mock.results[0].value);
184
+ expect((res.toolCalls[0].result as { results: unknown[] }).results).toHaveLength(50);
185
+ });
186
+
153
187
  it('surfaces a tool error as a result instead of throwing', async () => {
154
188
  const boom = {
155
189
  name: 'boom',
package/src/engine.ts CHANGED
@@ -16,7 +16,9 @@
16
16
 
17
17
  import type { ConfirmDecision, Message, ToolResult } from './types.js';
18
18
  import type { LLMProvider } from './providers/types.js';
19
+ import type { InferenceMetrics } from './providers/types.js';
19
20
  import type { ToolRegistry } from './tools/registry.js';
21
+ import { compressToolResult, type ToolCrushOptions } from './context/compress.js';
20
22
 
21
23
  export interface EngineOptions {
22
24
  provider: LLMProvider;
@@ -25,6 +27,15 @@ export interface EngineOptions {
25
27
  defaultSystem?: string;
26
28
  /** Max reasoning↔tool rounds before forcing a stop. Default 5. */
27
29
  defaultMaxTurns?: number;
30
+ /**
31
+ * Crush verbose tool results before they're fed back into history, so a
32
+ * tiny on-device model's context window isn't drowned in repetitive JSON
33
+ * (merchant lists, tx history, nested quotes). `true` uses safe defaults;
34
+ * pass options to tune. Off by default — small results are never touched and
35
+ * amounts/addresses/invoices are always preserved (see compressToolResult).
36
+ * The `onToolResult` callback and `toolCalls` still carry the raw result.
37
+ */
38
+ compressToolOutput?: boolean | ToolCrushOptions;
28
39
  }
29
40
 
30
41
  export interface AgenticOptions {
@@ -59,6 +70,8 @@ export interface AgenticResult {
59
70
  messages: Message[];
60
71
  /** Wall-clock duration of the whole agentic run, ms. */
61
72
  latencyMs: number;
73
+ /** One receipt per model call in this agentic run. */
74
+ inference: InferenceMetrics[];
62
75
  }
63
76
 
64
77
  export class Engine {
@@ -66,12 +79,18 @@ export class Engine {
66
79
  private readonly registry: ToolRegistry;
67
80
  private readonly defaultSystem?: string;
68
81
  private readonly defaultMaxTurns: number;
82
+ private readonly compressOpts?: ToolCrushOptions;
69
83
 
70
84
  constructor(opts: EngineOptions) {
71
85
  this.provider = opts.provider;
72
86
  this.registry = opts.tools;
73
87
  this.defaultSystem = opts.defaultSystem;
74
88
  this.defaultMaxTurns = opts.defaultMaxTurns ?? 5;
89
+ this.compressOpts = opts.compressToolOutput
90
+ ? opts.compressToolOutput === true
91
+ ? {}
92
+ : opts.compressToolOutput
93
+ : undefined;
75
94
  }
76
95
 
77
96
  async runAgentic(messages: Message[], opts: AgenticOptions = {}): Promise<AgenticResult> {
@@ -90,6 +109,7 @@ export class Engine {
90
109
  let lastRequestId: string | undefined;
91
110
  let finalText = '';
92
111
  let turns = 0;
112
+ const inference: InferenceMetrics[] = [];
93
113
 
94
114
  for (let turn = 1; turn <= maxTurns; turn++) {
95
115
  turns = turn;
@@ -104,6 +124,7 @@ export class Engine {
104
124
  });
105
125
 
106
126
  lastRequestId = out.requestId;
127
+ if (out.inference) inference.push(out.inference);
107
128
  if (out.requestId) opts.onStart?.(out.requestId, turn);
108
129
  finalText = (out.text || '').trim();
109
130
 
@@ -133,10 +154,7 @@ export class Engine {
133
154
 
134
155
  executed.push({ name: call.name, arguments: call.arguments, result });
135
156
  opts.onToolResult?.({ name: call.name, arguments: call.arguments, result }, turn);
136
- history.push({
137
- role: 'tool',
138
- content: typeof result === 'string' ? result : JSON.stringify(result),
139
- });
157
+ history.push({ role: 'tool', content: this.toHistoryContent(result) });
140
158
  }
141
159
 
142
160
  if (turn === maxTurns && !finalText) {
@@ -155,6 +173,7 @@ export class Engine {
155
173
  requestId: lastRequestId,
156
174
  messages: history,
157
175
  latencyMs: Date.now() - startedAt,
176
+ inference,
158
177
  };
159
178
  }
160
179
 
@@ -162,6 +181,18 @@ export class Engine {
162
181
  await this.provider.cancel?.(requestId);
163
182
  }
164
183
 
184
+ /**
185
+ * Serialize a tool result for history, optionally crushing verbose JSON so
186
+ * it doesn't swamp a small context window. The raw result is unchanged for
187
+ * callbacks/logs — only the model-facing history copy is compressed.
188
+ */
189
+ private toHistoryContent(result: unknown): string {
190
+ if (!this.compressOpts) {
191
+ return typeof result === 'string' ? result : JSON.stringify(result);
192
+ }
193
+ return compressToolResult(result, this.compressOpts).content;
194
+ }
195
+
165
196
  private async safeExecute(name: string, args: Record<string, unknown>): Promise<unknown> {
166
197
  try {
167
198
  return await this.registry.execute(name, args);
@@ -0,0 +1,80 @@
1
+ import { describe, expect, it } from 'vitest';
2
+ import {
3
+ EVIDENCE_SCHEMA,
4
+ EvidenceRecorder,
5
+ sanitizeEvidenceEvent,
6
+ type EvidenceEvent,
7
+ } from './evidence.js';
8
+
9
+ function memoryRecorder(lines: string[]) {
10
+ return new EvidenceRecorder({
11
+ io: {
12
+ appendLine: async (line) => {
13
+ lines.push(line);
14
+ },
15
+ now: () => new Date('2026-06-20T12:00:00.000Z'),
16
+ },
17
+ });
18
+ }
19
+
20
+ describe('EvidenceRecorder', () => {
21
+ it('writes a completed inference receipt', async () => {
22
+ const lines: string[] = [];
23
+ const event = await memoryRecorder(lines).record({
24
+ event: 'inference',
25
+ runId: 'desktop-demo',
26
+ surface: 'desktop',
27
+ prompt: 'show my balance',
28
+ response: 'You have 42 sats.',
29
+ inference: [{ durationMs: 220, ttftMs: 40, totalTokens: 18, status: 'completed' }],
30
+ });
31
+ expect(event.schema).toBe(EVIDENCE_SCHEMA);
32
+ expect(JSON.parse(lines[0]).inference[0].ttftMs).toBe(40);
33
+ });
34
+
35
+ it('records a failed inference without inventing token metrics', async () => {
36
+ const lines: string[] = [];
37
+ await memoryRecorder(lines).record({
38
+ event: 'error',
39
+ runId: 'failed-demo',
40
+ surface: 'test',
41
+ error: { name: 'ModelError', message: 'model failed to load' },
42
+ });
43
+ expect(JSON.parse(lines[0]).error.name).toBe('ModelError');
44
+ });
45
+
46
+ it('records tool calls and confirmation decisions', async () => {
47
+ const lines: string[] = [];
48
+ const recorder = memoryRecorder(lines);
49
+ await recorder.record({
50
+ event: 'tool_call',
51
+ runId: 'tools-demo',
52
+ surface: 'mobile',
53
+ tool: { name: 'rln_send_btc', arguments: { amount_sat: 100 } },
54
+ });
55
+ await recorder.record({
56
+ event: 'confirmation',
57
+ runId: 'tools-demo',
58
+ surface: 'mobile',
59
+ confirmation: { tool: 'rln_send_btc', approved: false, reason: 'demo stop' },
60
+ });
61
+ expect(lines).toHaveLength(2);
62
+ expect(JSON.parse(lines[1]).confirmation.approved).toBe(false);
63
+ });
64
+
65
+ it('sanitizes payment material in interrupted runs', () => {
66
+ const event: EvidenceEvent = {
67
+ schema: EVIDENCE_SCHEMA,
68
+ event: 'inference',
69
+ ts: '2026-06-20T12:00:00.000Z',
70
+ runId: 'cancelled-demo',
71
+ surface: 'mobile',
72
+ prompt: 'pay lnbc123456789012345678901234567890',
73
+ tool: { name: 'rln_pay_invoice', arguments: { invoice: 'lnbc-secret' } },
74
+ inference: { durationMs: 50, status: 'cancelled' },
75
+ };
76
+ const clean = sanitizeEvidenceEvent(event);
77
+ expect(clean.prompt).toContain('[payment-data-redacted]');
78
+ expect(clean.tool?.arguments?.invoice).toBe('[redacted]');
79
+ });
80
+ });
@@ -0,0 +1,114 @@
1
+ /**
2
+ * Hackathon evidence JSONL.
3
+ *
4
+ * This is deliberately transport-neutral: Node writes it to disk, React Native
5
+ * writes it to the app documents directory, and tests keep it in memory.
6
+ */
7
+ import type { InferenceMetrics } from './providers/types.js';
8
+
9
+ export const EVIDENCE_SCHEMA = 'kaleidomind.evidence.v1' as const;
10
+
11
+ export type EvidenceSurface = 'desktop' | 'mobile' | 'cli' | 'test';
12
+ export type EvidenceEventType =
13
+ | 'model_load'
14
+ | 'model_unload'
15
+ | 'inference'
16
+ | 'tool_call'
17
+ | 'tool_result'
18
+ | 'confirmation'
19
+ | 'error';
20
+
21
+ export interface EvidenceEvent {
22
+ schema: typeof EVIDENCE_SCHEMA;
23
+ event: EvidenceEventType;
24
+ ts: string;
25
+ runId: string;
26
+ surface: EvidenceSurface;
27
+ model?: {
28
+ name: string;
29
+ version?: string;
30
+ source?: 'local' | 'delegated';
31
+ };
32
+ hardware?: {
33
+ device: string;
34
+ os?: string;
35
+ memoryGb?: number;
36
+ };
37
+ prompt?: string;
38
+ response?: string;
39
+ inference?: InferenceMetrics | InferenceMetrics[];
40
+ tool?: {
41
+ name: string;
42
+ arguments?: Record<string, unknown>;
43
+ result?: unknown;
44
+ };
45
+ confirmation?: {
46
+ tool: string;
47
+ approved: boolean;
48
+ reason?: string;
49
+ };
50
+ error?: {
51
+ name: string;
52
+ message: string;
53
+ };
54
+ meta?: Record<string, unknown>;
55
+ }
56
+
57
+ export type EvidenceInput = Omit<EvidenceEvent, 'schema' | 'ts'>;
58
+
59
+ export interface EvidenceIO {
60
+ appendLine(line: string): Promise<void>;
61
+ now(): Date;
62
+ }
63
+
64
+ export interface EvidenceRecorderOptions {
65
+ io: EvidenceIO;
66
+ sanitize?: (event: EvidenceEvent) => EvidenceEvent;
67
+ }
68
+
69
+ export class EvidenceRecorder {
70
+ constructor(private readonly opts: EvidenceRecorderOptions) {}
71
+
72
+ async record(input: EvidenceInput): Promise<EvidenceEvent> {
73
+ let event: EvidenceEvent = {
74
+ ...input,
75
+ schema: EVIDENCE_SCHEMA,
76
+ ts: this.opts.io.now().toISOString(),
77
+ };
78
+ event = (this.opts.sanitize ?? sanitizeEvidenceEvent)(event);
79
+ await this.opts.io.appendLine(JSON.stringify(event));
80
+ return event;
81
+ }
82
+ }
83
+
84
+ const SENSITIVE_KEYS = new Set([
85
+ 'address',
86
+ 'invoice',
87
+ 'bolt11',
88
+ 'seed',
89
+ 'mnemonic',
90
+ 'private_key',
91
+ 'privateKey',
92
+ 'access_token',
93
+ 'accessToken',
94
+ 'preimage',
95
+ ]);
96
+
97
+ const PAYMENT_TOKEN =
98
+ /\b(?:ln(?:bc|tb|bcrt)[0-9a-z]{20,}|(?:bc1|tb1|bcrt1)[0-9a-z]{20,})\b/gi;
99
+
100
+ /** Mask wallet secrets while preserving prompts, model output and benchmark value. */
101
+ export function sanitizeEvidenceEvent(event: EvidenceEvent): EvidenceEvent {
102
+ const walk = (value: unknown, key?: string): unknown => {
103
+ if (key && SENSITIVE_KEYS.has(key)) return '[redacted]';
104
+ if (typeof value === 'string') return value.replace(PAYMENT_TOKEN, '[payment-data-redacted]');
105
+ if (Array.isArray(value)) return value.map((item) => walk(item));
106
+ if (value && typeof value === 'object') {
107
+ return Object.fromEntries(
108
+ Object.entries(value as Record<string, unknown>).map(([k, v]) => [k, walk(v, k)]),
109
+ );
110
+ }
111
+ return value;
112
+ };
113
+ return walk(event) as EvidenceEvent;
114
+ }
@@ -0,0 +1,101 @@
1
+ import { describe, expect, it } from 'vitest';
2
+ import {
3
+ FLASHNET_TOOLS,
4
+ FLASHNET_SPEND_TOOLS,
5
+ isFlashnetSpendTool,
6
+ getFlashnetTool,
7
+ bindFlashnetTools,
8
+ type FlashnetHandler,
9
+ } from './contract.js';
10
+
11
+ describe('FLASHNET_TOOLS — shape invariants', () => {
12
+ it('exposes the expected tool names in order', () => {
13
+ expect(FLASHNET_TOOLS.map((t) => t.name)).toEqual([
14
+ 'flashnet_list_pools',
15
+ 'flashnet_get_pool',
16
+ 'flashnet_simulate_swap',
17
+ 'flashnet_execute_swap',
18
+ 'flashnet_get_balance',
19
+ ]);
20
+ });
21
+
22
+ it('every tool has an object parameters schema', () => {
23
+ for (const t of FLASHNET_TOOLS) {
24
+ expect((t.parameters as any)?.type).toBe('object');
25
+ }
26
+ });
27
+
28
+ it('aligns spend ↔ requiresConfirmation', () => {
29
+ for (const t of FLASHNET_TOOLS) {
30
+ expect(!!t.spend).toBe(!!t.requiresConfirmation);
31
+ }
32
+ });
33
+
34
+ it('marks only flashnet_execute_swap as spend', () => {
35
+ expect([...FLASHNET_SPEND_TOOLS]).toEqual(['flashnet_execute_swap']);
36
+ expect(isFlashnetSpendTool('flashnet_execute_swap')).toBe(true);
37
+ expect(isFlashnetSpendTool('flashnet_simulate_swap')).toBe(false);
38
+ expect(isFlashnetSpendTool('flashnet_list_pools')).toBe(false);
39
+ });
40
+
41
+ it('getFlashnetTool returns by name', () => {
42
+ expect(getFlashnetTool('flashnet_simulate_swap')?.name).toBe('flashnet_simulate_swap');
43
+ expect(getFlashnetTool('nope')).toBeUndefined();
44
+ });
45
+
46
+ it('execute_swap requires the canonical 5 fields', () => {
47
+ const def = getFlashnetTool('flashnet_execute_swap')!;
48
+ expect((def.parameters as any).required).toEqual([
49
+ 'pool_id', 'asset_in_address', 'asset_out_address', 'amount_in', 'min_amount_out',
50
+ ]);
51
+ });
52
+
53
+ it('simulate_swap requires pool + assets + amount but no slippage', () => {
54
+ const def = getFlashnetTool('flashnet_simulate_swap')!;
55
+ expect((def.parameters as any).required).toEqual([
56
+ 'pool_id', 'asset_in_address', 'asset_out_address', 'amount_in',
57
+ ]);
58
+ });
59
+ });
60
+
61
+ describe('bindFlashnetTools', () => {
62
+ const echoHandlers = (): Record<string, FlashnetHandler> => ({
63
+ flashnet_list_pools: async (a) => ({ ok: true, t: 'list_pools', args: a }),
64
+ flashnet_get_pool: async (a) => ({ ok: true, t: 'get_pool', args: a }),
65
+ flashnet_simulate_swap: async (a) => ({ ok: true, t: 'simulate_swap', args: a }),
66
+ flashnet_execute_swap: async (a) => ({ ok: true, t: 'execute_swap', args: a }),
67
+ flashnet_get_balance: async () => ({ btc_sats: 100000, tokens: [] }),
68
+ });
69
+
70
+ it('binds every tool and preserves the spend gate', () => {
71
+ const src = bindFlashnetTools(echoHandlers());
72
+ expect(src.listTools().length).toBe(5);
73
+ const exec = src.listTools().find((t) => t.name === 'flashnet_execute_swap');
74
+ expect(exec?.requiresConfirmation).toBe(true);
75
+ const sim = src.listTools().find((t) => t.name === 'flashnet_simulate_swap');
76
+ expect(sim?.requiresConfirmation).toBeFalsy();
77
+ });
78
+
79
+ it('dispatches with args', async () => {
80
+ const src = bindFlashnetTools(echoHandlers());
81
+ const r = await src.execute('flashnet_simulate_swap', {
82
+ pool_id: 'p1',
83
+ asset_in_address: 'btc',
84
+ asset_out_address: 'usdb',
85
+ amount_in: '100000',
86
+ });
87
+ expect(r).toMatchObject({ ok: true, t: 'simulate_swap' });
88
+ });
89
+
90
+ it('throws on a missing handler unless allowMissing', () => {
91
+ const partial = { flashnet_list_pools: echoHandlers().flashnet_list_pools };
92
+ expect(() => bindFlashnetTools(partial)).toThrow(/no handler/);
93
+ const src = bindFlashnetTools(partial, { allowMissing: true });
94
+ expect(src.listTools().map((t) => t.name)).toEqual(['flashnet_list_pools']);
95
+ });
96
+
97
+ it('uses opts.id for the ToolSource id', () => {
98
+ const src = bindFlashnetTools(echoHandlers(), { id: 'flashnet-regtest' });
99
+ expect(src.id).toBe('flashnet-regtest');
100
+ });
101
+ });