@kaleidorg/mind 0.5.1 → 0.6.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/autonomy/index.d.ts +21 -0
- package/dist/autonomy/index.d.ts.map +1 -0
- package/dist/autonomy/index.js +16 -0
- package/dist/autonomy/index.js.map +1 -0
- package/dist/autonomy/prompt.d.ts +21 -0
- package/dist/autonomy/prompt.d.ts.map +1 -0
- package/dist/autonomy/prompt.js +37 -0
- package/dist/autonomy/prompt.js.map +1 -0
- package/dist/autonomy/risk.d.ts +53 -0
- package/dist/autonomy/risk.d.ts.map +1 -0
- package/dist/autonomy/risk.js +74 -0
- package/dist/autonomy/risk.js.map +1 -0
- package/dist/autonomy/run-state.d.ts +39 -0
- package/dist/autonomy/run-state.d.ts.map +1 -0
- package/dist/autonomy/run-state.js +118 -0
- package/dist/autonomy/run-state.js.map +1 -0
- package/dist/autonomy/scheduler.d.ts +18 -0
- package/dist/autonomy/scheduler.d.ts.map +1 -0
- package/dist/autonomy/scheduler.js +113 -0
- package/dist/autonomy/scheduler.js.map +1 -0
- package/dist/autonomy/task-store.d.ts +44 -0
- package/dist/autonomy/task-store.d.ts.map +1 -0
- package/dist/autonomy/task-store.js +139 -0
- package/dist/autonomy/task-store.js.map +1 -0
- package/dist/autonomy/types.d.ts +164 -0
- package/dist/autonomy/types.d.ts.map +1 -0
- package/dist/autonomy/types.js +20 -0
- package/dist/autonomy/types.js.map +1 -0
- package/dist/bitrefill/contract.d.ts +60 -0
- package/dist/bitrefill/contract.d.ts.map +1 -0
- package/dist/bitrefill/contract.js +119 -0
- package/dist/bitrefill/contract.js.map +1 -0
- package/dist/context/compress.d.ts +65 -0
- package/dist/context/compress.d.ts.map +1 -0
- package/dist/context/compress.js +181 -0
- package/dist/context/compress.js.map +1 -0
- package/dist/engine.d.ts +20 -0
- package/dist/engine.d.ts.map +1 -1
- package/dist/engine.js +23 -4
- package/dist/engine.js.map +1 -1
- package/dist/evidence.d.ts +62 -0
- package/dist/evidence.d.ts.map +1 -0
- package/dist/evidence.js +47 -0
- package/dist/evidence.js.map +1 -0
- package/dist/flashnet/contract.d.ts +56 -0
- package/dist/flashnet/contract.d.ts.map +1 -0
- package/dist/flashnet/contract.js +100 -0
- package/dist/flashnet/contract.js.map +1 -0
- package/dist/funnel.d.ts +11 -0
- package/dist/funnel.d.ts.map +1 -1
- package/dist/funnel.js +62 -7
- package/dist/funnel.js.map +1 -1
- package/dist/index.d.ts +12 -1
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +11 -0
- package/dist/index.js.map +1 -1
- package/dist/kaleidoswap/contract.js +1 -1
- package/dist/kaleidoswap/contract.js.map +1 -1
- package/dist/knowledge/bitcoin-copilot.d.ts.map +1 -1
- package/dist/knowledge/bitcoin-copilot.js +85 -2
- package/dist/knowledge/bitcoin-copilot.js.map +1 -1
- package/dist/providers/types.d.ts +17 -0
- package/dist/providers/types.d.ts.map +1 -1
- package/dist/qvac/index.d.ts +1 -1
- package/dist/qvac/index.d.ts.map +1 -1
- package/dist/qvac/index.js.map +1 -1
- package/dist/qvac/parse.d.ts +18 -0
- package/dist/qvac/parse.d.ts.map +1 -1
- package/dist/qvac/parse.js +1 -0
- package/dist/qvac/parse.js.map +1 -1
- package/dist/qvac/provider.d.ts +16 -0
- package/dist/qvac/provider.d.ts.map +1 -1
- package/dist/qvac/provider.js +40 -1
- package/dist/qvac/provider.js.map +1 -1
- package/dist/qvac/stream.d.ts +22 -0
- package/dist/qvac/stream.d.ts.map +1 -1
- package/dist/qvac/stream.js +33 -1
- package/dist/qvac/stream.js.map +1 -1
- package/dist/recipe/buy-asset-channel.d.ts +1 -1
- package/dist/recipe/buy-asset-channel.d.ts.map +1 -1
- package/dist/recipe/buy-asset-channel.js +4 -3
- package/dist/recipe/buy-asset-channel.js.map +1 -1
- package/dist/recipe/flashnet-swap.d.ts +35 -0
- package/dist/recipe/flashnet-swap.d.ts.map +1 -0
- package/dist/recipe/flashnet-swap.js +239 -0
- package/dist/recipe/flashnet-swap.js.map +1 -0
- package/dist/recipe/kaleidoswap-atomic.d.ts +1 -1
- package/dist/recipe/kaleidoswap-atomic.d.ts.map +1 -1
- package/dist/recipe/kaleidoswap-atomic.js +42 -20
- package/dist/recipe/kaleidoswap-atomic.js.map +1 -1
- package/dist/recipe/kaleidoswap-channel-order.d.ts.map +1 -1
- package/dist/recipe/kaleidoswap-channel-order.js +31 -10
- package/dist/recipe/kaleidoswap-channel-order.js.map +1 -1
- package/dist/recipe/kaleidoswap-price.d.ts.map +1 -1
- package/dist/recipe/kaleidoswap-price.js +7 -1
- package/dist/recipe/kaleidoswap-price.js.map +1 -1
- package/dist/recipe/runner.d.ts.map +1 -1
- package/dist/recipe/runner.js +43 -3
- package/dist/recipe/runner.js.map +1 -1
- package/dist/recipe/swap.d.ts.map +1 -1
- package/dist/recipe/swap.js +14 -1
- package/dist/recipe/swap.js.map +1 -1
- package/dist/tools/mcp.d.ts +19 -0
- package/dist/tools/mcp.d.ts.map +1 -1
- package/dist/tools/mcp.js +51 -9
- package/dist/tools/mcp.js.map +1 -1
- package/dist/wallet/confirm.d.ts.map +1 -1
- package/dist/wallet/confirm.js +1 -0
- package/dist/wallet/confirm.js.map +1 -1
- package/dist/wallet/contract.d.ts.map +1 -1
- package/dist/wallet/contract.js +20 -4
- package/dist/wallet/contract.js.map +1 -1
- package/package.json +5 -4
- package/skills/bitrefill/SKILL.md +152 -52
- package/skills/channel-manager/SKILL.md +59 -0
- package/skills/dca/SKILL.md +48 -0
- package/skills/flashnet-swaps/SKILL.md +158 -0
- package/skills/kaleido-lsps/SKILL.md +34 -17
- package/skills/kaleido-trading/SKILL.md +37 -13
- package/skills/liquidity-optimizer/SKILL.md +91 -0
- package/skills/merchant-finder/SKILL.md +2 -2
- package/skills/portfolio-manager/SKILL.md +67 -0
- package/skills/rgb-lightning-node/SKILL.md +38 -11
- package/skills/spark-wallet/SKILL.md +235 -0
- package/skills/wallet-assistant/SKILL.md +2 -2
- package/src/autonomy/autonomy.test.ts +348 -0
- package/src/autonomy/index.ts +50 -0
- package/src/autonomy/prompt.ts +48 -0
- package/src/autonomy/risk.ts +139 -0
- package/src/autonomy/run-state.ts +144 -0
- package/src/autonomy/scheduler.ts +120 -0
- package/src/autonomy/task-store.ts +167 -0
- package/src/autonomy/types.ts +186 -0
- package/src/bitrefill/contract.test.ts +89 -0
- package/src/bitrefill/contract.ts +190 -0
- package/src/context/compress.test.ts +120 -0
- package/src/context/compress.ts +230 -0
- package/src/engine.test.ts +34 -0
- package/src/engine.ts +35 -4
- package/src/evidence.test.ts +80 -0
- package/src/evidence.ts +114 -0
- package/src/flashnet/contract.test.ts +101 -0
- package/src/flashnet/contract.ts +164 -0
- package/src/funnel.mind.test.ts +390 -0
- package/src/funnel.ts +73 -8
- package/src/index.ts +92 -1
- package/src/kaleidoswap/contract.ts +1 -1
- package/src/knowledge/bitcoin-copilot.ts +96 -2
- package/src/providers/types.ts +18 -0
- package/src/qvac/index.ts +1 -0
- package/src/qvac/parse.ts +20 -0
- package/src/qvac/provider.test.ts +17 -0
- package/src/qvac/provider.ts +62 -2
- package/src/qvac/stream.test.ts +36 -0
- package/src/qvac/stream.ts +54 -1
- package/src/recipe/buy-asset-channel.test.ts +5 -0
- package/src/recipe/buy-asset-channel.ts +6 -3
- package/src/recipe/flashnet-swap.test.ts +114 -0
- package/src/recipe/flashnet-swap.ts +266 -0
- package/src/recipe/kaleidoswap-atomic.test.ts +24 -3
- package/src/recipe/kaleidoswap-atomic.ts +39 -20
- package/src/recipe/kaleidoswap-channel-order.test.ts +38 -0
- package/src/recipe/kaleidoswap-channel-order.ts +27 -9
- package/src/recipe/kaleidoswap-price.ts +7 -1
- package/src/recipe/recipe.test.ts +21 -0
- package/src/recipe/runner.ts +46 -3
- package/src/recipe/swap.ts +16 -1
- package/src/tools/mcp.live.test.ts +116 -0
- package/src/tools/mcp.parse.test.ts +37 -0
- package/src/tools/mcp.ts +55 -9
- package/src/wallet/confirm.test.ts +8 -0
- package/src/wallet/confirm.ts +1 -0
- package/src/wallet/contract.test.ts +10 -0
- package/src/wallet/contract.ts +26 -4
|
@@ -0,0 +1,230 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Tool-output compression — the "fit more into a tiny window" part.
|
|
3
|
+
*
|
|
4
|
+
* Tool results are the single biggest, most repetitive thing the engine pushes
|
|
5
|
+
* into a small on-device model's context. A merchant search returns 40 near
|
|
6
|
+
* identical rows; a tx history returns hundreds; a swap quote nests config the
|
|
7
|
+
* model never reads. Every round, the *raw* `JSON.stringify(result)` is fed back
|
|
8
|
+
* into history — so on a 2k-window 0.6B model the conversation drowns in JSON
|
|
9
|
+
* the model didn't need, crowding out the system prompt, the skill, and the
|
|
10
|
+
* actual question.
|
|
11
|
+
*
|
|
12
|
+
* `compressToolResult` is a structural crusher (the idea behind Headroom's
|
|
13
|
+
* SmartCrusher/ToolCrusher, reimplemented natively — no dependency, no network,
|
|
14
|
+
* no proxy, so it stays on-device and private). It walks the JSON and:
|
|
15
|
+
*
|
|
16
|
+
* • dedupes identical array items, then keeps the first/last few and replaces
|
|
17
|
+
* the middle with an honest `{ "__elided__": N }` marker,
|
|
18
|
+
* • caps nesting depth (deep config → a one-line summary),
|
|
19
|
+
* • truncates long *prose* strings (logs, descriptions),
|
|
20
|
+
*
|
|
21
|
+
* and never regresses: if crushing doesn't actually save tokens, the original
|
|
22
|
+
* is returned untouched.
|
|
23
|
+
*
|
|
24
|
+
* SAFETY: it never touches numbers, never elides whole objects, never truncates
|
|
25
|
+
* whitespace-free strings (addresses, BOLT11 invoices, txids, pubkeys), and
|
|
26
|
+
* never touches a value under a money/identity key (see DEFAULT_PRESERVE_KEYS).
|
|
27
|
+
* Amounts and recipients reach the model intact — the confirm readback is built
|
|
28
|
+
* deterministically from the resolved call anyway, but this keeps the model's
|
|
29
|
+
* own view honest too. Small results (below `minTokens`) are passed through
|
|
30
|
+
* verbatim so nothing changes for the common case.
|
|
31
|
+
*/
|
|
32
|
+
|
|
33
|
+
import { estimateTokens } from './budget.js';
|
|
34
|
+
|
|
35
|
+
/** Keys whose values are never elided or truncated — amounts, ids, recipients. */
|
|
36
|
+
export const DEFAULT_PRESERVE_KEYS: readonly string[] = [
|
|
37
|
+
'amount',
|
|
38
|
+
'amount_sat',
|
|
39
|
+
'amount_sats',
|
|
40
|
+
'amount_msat',
|
|
41
|
+
'sat',
|
|
42
|
+
'sats',
|
|
43
|
+
'msat',
|
|
44
|
+
'value',
|
|
45
|
+
'fee',
|
|
46
|
+
'fee_sat',
|
|
47
|
+
'fee_sats',
|
|
48
|
+
'total',
|
|
49
|
+
'total_sats',
|
|
50
|
+
'balance',
|
|
51
|
+
'balance_sat',
|
|
52
|
+
'address',
|
|
53
|
+
'invoice',
|
|
54
|
+
'bolt11',
|
|
55
|
+
'payment_request',
|
|
56
|
+
'payment_hash',
|
|
57
|
+
'preimage',
|
|
58
|
+
'txid',
|
|
59
|
+
'tx_id',
|
|
60
|
+
'pubkey',
|
|
61
|
+
'node_id',
|
|
62
|
+
'recipient',
|
|
63
|
+
'destination',
|
|
64
|
+
'asset_id',
|
|
65
|
+
'contract_id',
|
|
66
|
+
'rate',
|
|
67
|
+
'price',
|
|
68
|
+
'price_usd',
|
|
69
|
+
];
|
|
70
|
+
|
|
71
|
+
export interface ToolCrushOptions {
|
|
72
|
+
/** Don't compress results estimated below this many tokens. Default 200. */
|
|
73
|
+
minTokens?: number;
|
|
74
|
+
/** Max items kept in any array before the middle is elided. Default 8. */
|
|
75
|
+
maxArrayItems?: number;
|
|
76
|
+
/** Max nesting depth kept verbatim; deeper is summarized. Default 6. */
|
|
77
|
+
maxDepth?: number;
|
|
78
|
+
/** Max chars for a single prose string before truncation (0 disables). Default 600. */
|
|
79
|
+
maxStringLength?: number;
|
|
80
|
+
/** Dedupe identical array items before eliding. Default true. */
|
|
81
|
+
dedupe?: boolean;
|
|
82
|
+
/** Keys whose values are never elided/truncated (defaults to DEFAULT_PRESERVE_KEYS). */
|
|
83
|
+
preserveKeys?: readonly string[];
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
export interface CrushResult {
|
|
87
|
+
/** Serialized, compressed content — ready to push into history. */
|
|
88
|
+
content: string;
|
|
89
|
+
/** Estimated tokens of the original serialization. */
|
|
90
|
+
originalTokens: number;
|
|
91
|
+
/** Estimated tokens after crushing. */
|
|
92
|
+
compressedTokens: number;
|
|
93
|
+
/** Total array items dropped across the whole structure. */
|
|
94
|
+
elided: number;
|
|
95
|
+
/** False when the original was returned untouched (too small, or no win). */
|
|
96
|
+
changed: boolean;
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
interface Resolved {
|
|
100
|
+
minTokens: number;
|
|
101
|
+
maxArrayItems: number;
|
|
102
|
+
maxDepth: number;
|
|
103
|
+
maxStringLength: number;
|
|
104
|
+
dedupe: boolean;
|
|
105
|
+
preserve: Set<string>;
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
/** A string with no whitespace is treated as an identifier (address/invoice/hash) and never truncated. */
|
|
109
|
+
function isIdentifierLike(s: string): boolean {
|
|
110
|
+
return !/\s/.test(s);
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
function serialize(value: unknown): string {
|
|
114
|
+
return typeof value === 'string' ? value : safeStringify(value);
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
function safeStringify(value: unknown): string {
|
|
118
|
+
try {
|
|
119
|
+
return JSON.stringify(value) ?? String(value);
|
|
120
|
+
} catch {
|
|
121
|
+
return String(value);
|
|
122
|
+
}
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
/**
|
|
126
|
+
* Crush a tool result for inclusion in model context. Returns the original,
|
|
127
|
+
* verbatim, when it's small or when crushing wouldn't save tokens.
|
|
128
|
+
*/
|
|
129
|
+
export function compressToolResult(value: unknown, opts: ToolCrushOptions = {}): CrushResult {
|
|
130
|
+
const cfg: Resolved = {
|
|
131
|
+
minTokens: opts.minTokens ?? 200,
|
|
132
|
+
maxArrayItems: Math.max(2, opts.maxArrayItems ?? 8),
|
|
133
|
+
maxDepth: Math.max(1, opts.maxDepth ?? 6),
|
|
134
|
+
maxStringLength: opts.maxStringLength ?? 600,
|
|
135
|
+
dedupe: opts.dedupe ?? true,
|
|
136
|
+
preserve: new Set((opts.preserveKeys ?? DEFAULT_PRESERVE_KEYS).map((k) => k.toLowerCase())),
|
|
137
|
+
};
|
|
138
|
+
|
|
139
|
+
const original = serialize(value);
|
|
140
|
+
const originalTokens = estimateTokens(original);
|
|
141
|
+
|
|
142
|
+
// Below the floor, never touch it — correctness over savings for small results.
|
|
143
|
+
if (originalTokens < cfg.minTokens) {
|
|
144
|
+
return { content: original, originalTokens, compressedTokens: originalTokens, elided: 0, changed: false };
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
let elided = 0;
|
|
148
|
+
const crushed = crush(value, cfg, 0, false, () => {
|
|
149
|
+
elided += 1;
|
|
150
|
+
});
|
|
151
|
+
const content = serialize(crushed);
|
|
152
|
+
const compressedTokens = estimateTokens(content);
|
|
153
|
+
|
|
154
|
+
// Never regress: if crushing didn't actually shrink it, keep the original.
|
|
155
|
+
if (compressedTokens >= originalTokens) {
|
|
156
|
+
return { content: original, originalTokens, compressedTokens: originalTokens, elided: 0, changed: false };
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
return { content, originalTokens, compressedTokens, elided, changed: true };
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
/**
|
|
163
|
+
* Recursively crush a JSON-ish value.
|
|
164
|
+
*
|
|
165
|
+
* @param preserved when true, the value sits under a preserve key — kept verbatim.
|
|
166
|
+
* @param onElide called once per array item dropped (for stats).
|
|
167
|
+
*/
|
|
168
|
+
function crush(value: unknown, cfg: Resolved, depth: number, preserved: boolean, onElide: () => void): unknown {
|
|
169
|
+
if (value === null || value === undefined) return value;
|
|
170
|
+
|
|
171
|
+
if (typeof value === 'string') {
|
|
172
|
+
if (preserved || isIdentifierLike(value)) return value;
|
|
173
|
+
if (cfg.maxStringLength > 0 && value.length > cfg.maxStringLength) {
|
|
174
|
+
const omitted = value.length - cfg.maxStringLength;
|
|
175
|
+
return `${value.slice(0, cfg.maxStringLength)}… (+${omitted} chars)`;
|
|
176
|
+
}
|
|
177
|
+
return value;
|
|
178
|
+
}
|
|
179
|
+
|
|
180
|
+
if (typeof value !== 'object') return value; // number, boolean — never touched
|
|
181
|
+
|
|
182
|
+
// Beyond max depth, collapse to a one-line shape summary instead of the subtree.
|
|
183
|
+
if (depth >= cfg.maxDepth) {
|
|
184
|
+
if (Array.isArray(value)) return `[array: ${value.length} items]`;
|
|
185
|
+
return `[object: ${Object.keys(value as object).length} keys]`;
|
|
186
|
+
}
|
|
187
|
+
|
|
188
|
+
if (Array.isArray(value)) {
|
|
189
|
+
let items = value;
|
|
190
|
+
|
|
191
|
+
if (cfg.dedupe && items.length > cfg.maxArrayItems) {
|
|
192
|
+
const seen = new Set<string>();
|
|
193
|
+
const unique: unknown[] = [];
|
|
194
|
+
for (const item of items) {
|
|
195
|
+
const key = safeStringify(item);
|
|
196
|
+
if (seen.has(key)) continue;
|
|
197
|
+
seen.add(key);
|
|
198
|
+
unique.push(item);
|
|
199
|
+
}
|
|
200
|
+
items = unique;
|
|
201
|
+
}
|
|
202
|
+
|
|
203
|
+
if (items.length <= cfg.maxArrayItems) {
|
|
204
|
+
return items.map((v) => crush(v, cfg, depth + 1, preserved, onElide));
|
|
205
|
+
}
|
|
206
|
+
|
|
207
|
+
// Keep the front and a little of the tail (Headroom's "anchors"); elide the
|
|
208
|
+
// middle with an honest marker so the model knows data was omitted and can
|
|
209
|
+
// ask a more specific question / call a narrower tool.
|
|
210
|
+
const keepFirst = Math.max(1, Math.ceil(cfg.maxArrayItems * 0.6));
|
|
211
|
+
const keepLast = Math.max(0, cfg.maxArrayItems - keepFirst);
|
|
212
|
+
const head = items.slice(0, keepFirst);
|
|
213
|
+
const tail = keepLast > 0 ? items.slice(items.length - keepLast) : [];
|
|
214
|
+
const droppedCount = items.length - head.length - tail.length;
|
|
215
|
+
for (let i = 0; i < droppedCount; i++) onElide();
|
|
216
|
+
|
|
217
|
+
const out: unknown[] = head.map((v) => crush(v, cfg, depth + 1, preserved, onElide));
|
|
218
|
+
out.push({ __elided__: droppedCount, note: 'items omitted to fit context' });
|
|
219
|
+
for (const v of tail) out.push(crush(v, cfg, depth + 1, preserved, onElide));
|
|
220
|
+
return out;
|
|
221
|
+
}
|
|
222
|
+
|
|
223
|
+
const obj = value as Record<string, unknown>;
|
|
224
|
+
const result: Record<string, unknown> = {};
|
|
225
|
+
for (const [key, v] of Object.entries(obj)) {
|
|
226
|
+
const keep = preserved || cfg.preserve.has(key.toLowerCase());
|
|
227
|
+
result[key] = crush(v, cfg, depth + 1, keep, onElide);
|
|
228
|
+
}
|
|
229
|
+
return result;
|
|
230
|
+
}
|
package/src/engine.test.ts
CHANGED
|
@@ -150,6 +150,40 @@ describe('Engine agentic loop', () => {
|
|
|
150
150
|
expect(balanceTool.handler).toHaveBeenCalledTimes(3);
|
|
151
151
|
});
|
|
152
152
|
|
|
153
|
+
it('crushes verbose tool output in history but keeps the raw result for callbacks', async () => {
|
|
154
|
+
const bulky = {
|
|
155
|
+
name: 'list_merchants',
|
|
156
|
+
description: 'returns many rows',
|
|
157
|
+
parameters: {},
|
|
158
|
+
handler: vi.fn(async () => ({
|
|
159
|
+
results: Array.from({ length: 50 }, (_, i) => ({
|
|
160
|
+
name: `Shop ${i}`,
|
|
161
|
+
blurb: 'Accepts Bitcoin and Lightning, open daily, friendly staff and good wifi.',
|
|
162
|
+
amount_sats: 1000 + i,
|
|
163
|
+
})),
|
|
164
|
+
})),
|
|
165
|
+
};
|
|
166
|
+
const onToolResult = vi.fn();
|
|
167
|
+
const engine = new Engine({
|
|
168
|
+
provider: scriptedProvider([
|
|
169
|
+
{ text: '', toolCalls: [{ name: 'list_merchants', arguments: {} }] },
|
|
170
|
+
{ text: 'Found some merchants.' },
|
|
171
|
+
]),
|
|
172
|
+
tools: new ToolRegistry([new InProcessToolSource('m', [bulky])]),
|
|
173
|
+
compressToolOutput: { maxArrayItems: 6 },
|
|
174
|
+
});
|
|
175
|
+
|
|
176
|
+
const res = await engine.runAgentic([{ role: 'user', content: 'find cafes' }], { onToolResult });
|
|
177
|
+
|
|
178
|
+
// The history frame the model sees is crushed (elision marker present)...
|
|
179
|
+
const toolFrame = res.messages.find((m) => m.role === 'tool');
|
|
180
|
+
expect(toolFrame?.content).toContain('__elided__');
|
|
181
|
+
// ...but amounts survive and the callback/result still carry the full data.
|
|
182
|
+
expect(toolFrame?.content).toContain('amount_sats');
|
|
183
|
+
expect(onToolResult.mock.calls[0][0].result).toEqual(await bulky.handler.mock.results[0].value);
|
|
184
|
+
expect((res.toolCalls[0].result as { results: unknown[] }).results).toHaveLength(50);
|
|
185
|
+
});
|
|
186
|
+
|
|
153
187
|
it('surfaces a tool error as a result instead of throwing', async () => {
|
|
154
188
|
const boom = {
|
|
155
189
|
name: 'boom',
|
package/src/engine.ts
CHANGED
|
@@ -16,7 +16,9 @@
|
|
|
16
16
|
|
|
17
17
|
import type { ConfirmDecision, Message, ToolResult } from './types.js';
|
|
18
18
|
import type { LLMProvider } from './providers/types.js';
|
|
19
|
+
import type { InferenceMetrics } from './providers/types.js';
|
|
19
20
|
import type { ToolRegistry } from './tools/registry.js';
|
|
21
|
+
import { compressToolResult, type ToolCrushOptions } from './context/compress.js';
|
|
20
22
|
|
|
21
23
|
export interface EngineOptions {
|
|
22
24
|
provider: LLMProvider;
|
|
@@ -25,6 +27,15 @@ export interface EngineOptions {
|
|
|
25
27
|
defaultSystem?: string;
|
|
26
28
|
/** Max reasoning↔tool rounds before forcing a stop. Default 5. */
|
|
27
29
|
defaultMaxTurns?: number;
|
|
30
|
+
/**
|
|
31
|
+
* Crush verbose tool results before they're fed back into history, so a
|
|
32
|
+
* tiny on-device model's context window isn't drowned in repetitive JSON
|
|
33
|
+
* (merchant lists, tx history, nested quotes). `true` uses safe defaults;
|
|
34
|
+
* pass options to tune. Off by default — small results are never touched and
|
|
35
|
+
* amounts/addresses/invoices are always preserved (see compressToolResult).
|
|
36
|
+
* The `onToolResult` callback and `toolCalls` still carry the raw result.
|
|
37
|
+
*/
|
|
38
|
+
compressToolOutput?: boolean | ToolCrushOptions;
|
|
28
39
|
}
|
|
29
40
|
|
|
30
41
|
export interface AgenticOptions {
|
|
@@ -59,6 +70,8 @@ export interface AgenticResult {
|
|
|
59
70
|
messages: Message[];
|
|
60
71
|
/** Wall-clock duration of the whole agentic run, ms. */
|
|
61
72
|
latencyMs: number;
|
|
73
|
+
/** One receipt per model call in this agentic run. */
|
|
74
|
+
inference: InferenceMetrics[];
|
|
62
75
|
}
|
|
63
76
|
|
|
64
77
|
export class Engine {
|
|
@@ -66,12 +79,18 @@ export class Engine {
|
|
|
66
79
|
private readonly registry: ToolRegistry;
|
|
67
80
|
private readonly defaultSystem?: string;
|
|
68
81
|
private readonly defaultMaxTurns: number;
|
|
82
|
+
private readonly compressOpts?: ToolCrushOptions;
|
|
69
83
|
|
|
70
84
|
constructor(opts: EngineOptions) {
|
|
71
85
|
this.provider = opts.provider;
|
|
72
86
|
this.registry = opts.tools;
|
|
73
87
|
this.defaultSystem = opts.defaultSystem;
|
|
74
88
|
this.defaultMaxTurns = opts.defaultMaxTurns ?? 5;
|
|
89
|
+
this.compressOpts = opts.compressToolOutput
|
|
90
|
+
? opts.compressToolOutput === true
|
|
91
|
+
? {}
|
|
92
|
+
: opts.compressToolOutput
|
|
93
|
+
: undefined;
|
|
75
94
|
}
|
|
76
95
|
|
|
77
96
|
async runAgentic(messages: Message[], opts: AgenticOptions = {}): Promise<AgenticResult> {
|
|
@@ -90,6 +109,7 @@ export class Engine {
|
|
|
90
109
|
let lastRequestId: string | undefined;
|
|
91
110
|
let finalText = '';
|
|
92
111
|
let turns = 0;
|
|
112
|
+
const inference: InferenceMetrics[] = [];
|
|
93
113
|
|
|
94
114
|
for (let turn = 1; turn <= maxTurns; turn++) {
|
|
95
115
|
turns = turn;
|
|
@@ -104,6 +124,7 @@ export class Engine {
|
|
|
104
124
|
});
|
|
105
125
|
|
|
106
126
|
lastRequestId = out.requestId;
|
|
127
|
+
if (out.inference) inference.push(out.inference);
|
|
107
128
|
if (out.requestId) opts.onStart?.(out.requestId, turn);
|
|
108
129
|
finalText = (out.text || '').trim();
|
|
109
130
|
|
|
@@ -133,10 +154,7 @@ export class Engine {
|
|
|
133
154
|
|
|
134
155
|
executed.push({ name: call.name, arguments: call.arguments, result });
|
|
135
156
|
opts.onToolResult?.({ name: call.name, arguments: call.arguments, result }, turn);
|
|
136
|
-
history.push({
|
|
137
|
-
role: 'tool',
|
|
138
|
-
content: typeof result === 'string' ? result : JSON.stringify(result),
|
|
139
|
-
});
|
|
157
|
+
history.push({ role: 'tool', content: this.toHistoryContent(result) });
|
|
140
158
|
}
|
|
141
159
|
|
|
142
160
|
if (turn === maxTurns && !finalText) {
|
|
@@ -155,6 +173,7 @@ export class Engine {
|
|
|
155
173
|
requestId: lastRequestId,
|
|
156
174
|
messages: history,
|
|
157
175
|
latencyMs: Date.now() - startedAt,
|
|
176
|
+
inference,
|
|
158
177
|
};
|
|
159
178
|
}
|
|
160
179
|
|
|
@@ -162,6 +181,18 @@ export class Engine {
|
|
|
162
181
|
await this.provider.cancel?.(requestId);
|
|
163
182
|
}
|
|
164
183
|
|
|
184
|
+
/**
|
|
185
|
+
* Serialize a tool result for history, optionally crushing verbose JSON so
|
|
186
|
+
* it doesn't swamp a small context window. The raw result is unchanged for
|
|
187
|
+
* callbacks/logs — only the model-facing history copy is compressed.
|
|
188
|
+
*/
|
|
189
|
+
private toHistoryContent(result: unknown): string {
|
|
190
|
+
if (!this.compressOpts) {
|
|
191
|
+
return typeof result === 'string' ? result : JSON.stringify(result);
|
|
192
|
+
}
|
|
193
|
+
return compressToolResult(result, this.compressOpts).content;
|
|
194
|
+
}
|
|
195
|
+
|
|
165
196
|
private async safeExecute(name: string, args: Record<string, unknown>): Promise<unknown> {
|
|
166
197
|
try {
|
|
167
198
|
return await this.registry.execute(name, args);
|
|
@@ -0,0 +1,80 @@
|
|
|
1
|
+
import { describe, expect, it } from 'vitest';
|
|
2
|
+
import {
|
|
3
|
+
EVIDENCE_SCHEMA,
|
|
4
|
+
EvidenceRecorder,
|
|
5
|
+
sanitizeEvidenceEvent,
|
|
6
|
+
type EvidenceEvent,
|
|
7
|
+
} from './evidence.js';
|
|
8
|
+
|
|
9
|
+
function memoryRecorder(lines: string[]) {
|
|
10
|
+
return new EvidenceRecorder({
|
|
11
|
+
io: {
|
|
12
|
+
appendLine: async (line) => {
|
|
13
|
+
lines.push(line);
|
|
14
|
+
},
|
|
15
|
+
now: () => new Date('2026-06-20T12:00:00.000Z'),
|
|
16
|
+
},
|
|
17
|
+
});
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
describe('EvidenceRecorder', () => {
|
|
21
|
+
it('writes a completed inference receipt', async () => {
|
|
22
|
+
const lines: string[] = [];
|
|
23
|
+
const event = await memoryRecorder(lines).record({
|
|
24
|
+
event: 'inference',
|
|
25
|
+
runId: 'desktop-demo',
|
|
26
|
+
surface: 'desktop',
|
|
27
|
+
prompt: 'show my balance',
|
|
28
|
+
response: 'You have 42 sats.',
|
|
29
|
+
inference: [{ durationMs: 220, ttftMs: 40, totalTokens: 18, status: 'completed' }],
|
|
30
|
+
});
|
|
31
|
+
expect(event.schema).toBe(EVIDENCE_SCHEMA);
|
|
32
|
+
expect(JSON.parse(lines[0]).inference[0].ttftMs).toBe(40);
|
|
33
|
+
});
|
|
34
|
+
|
|
35
|
+
it('records a failed inference without inventing token metrics', async () => {
|
|
36
|
+
const lines: string[] = [];
|
|
37
|
+
await memoryRecorder(lines).record({
|
|
38
|
+
event: 'error',
|
|
39
|
+
runId: 'failed-demo',
|
|
40
|
+
surface: 'test',
|
|
41
|
+
error: { name: 'ModelError', message: 'model failed to load' },
|
|
42
|
+
});
|
|
43
|
+
expect(JSON.parse(lines[0]).error.name).toBe('ModelError');
|
|
44
|
+
});
|
|
45
|
+
|
|
46
|
+
it('records tool calls and confirmation decisions', async () => {
|
|
47
|
+
const lines: string[] = [];
|
|
48
|
+
const recorder = memoryRecorder(lines);
|
|
49
|
+
await recorder.record({
|
|
50
|
+
event: 'tool_call',
|
|
51
|
+
runId: 'tools-demo',
|
|
52
|
+
surface: 'mobile',
|
|
53
|
+
tool: { name: 'rln_send_btc', arguments: { amount_sat: 100 } },
|
|
54
|
+
});
|
|
55
|
+
await recorder.record({
|
|
56
|
+
event: 'confirmation',
|
|
57
|
+
runId: 'tools-demo',
|
|
58
|
+
surface: 'mobile',
|
|
59
|
+
confirmation: { tool: 'rln_send_btc', approved: false, reason: 'demo stop' },
|
|
60
|
+
});
|
|
61
|
+
expect(lines).toHaveLength(2);
|
|
62
|
+
expect(JSON.parse(lines[1]).confirmation.approved).toBe(false);
|
|
63
|
+
});
|
|
64
|
+
|
|
65
|
+
it('sanitizes payment material in interrupted runs', () => {
|
|
66
|
+
const event: EvidenceEvent = {
|
|
67
|
+
schema: EVIDENCE_SCHEMA,
|
|
68
|
+
event: 'inference',
|
|
69
|
+
ts: '2026-06-20T12:00:00.000Z',
|
|
70
|
+
runId: 'cancelled-demo',
|
|
71
|
+
surface: 'mobile',
|
|
72
|
+
prompt: 'pay lnbc123456789012345678901234567890',
|
|
73
|
+
tool: { name: 'rln_pay_invoice', arguments: { invoice: 'lnbc-secret' } },
|
|
74
|
+
inference: { durationMs: 50, status: 'cancelled' },
|
|
75
|
+
};
|
|
76
|
+
const clean = sanitizeEvidenceEvent(event);
|
|
77
|
+
expect(clean.prompt).toContain('[payment-data-redacted]');
|
|
78
|
+
expect(clean.tool?.arguments?.invoice).toBe('[redacted]');
|
|
79
|
+
});
|
|
80
|
+
});
|
package/src/evidence.ts
ADDED
|
@@ -0,0 +1,114 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Hackathon evidence JSONL.
|
|
3
|
+
*
|
|
4
|
+
* This is deliberately transport-neutral: Node writes it to disk, React Native
|
|
5
|
+
* writes it to the app documents directory, and tests keep it in memory.
|
|
6
|
+
*/
|
|
7
|
+
import type { InferenceMetrics } from './providers/types.js';
|
|
8
|
+
|
|
9
|
+
export const EVIDENCE_SCHEMA = 'kaleidomind.evidence.v1' as const;
|
|
10
|
+
|
|
11
|
+
export type EvidenceSurface = 'desktop' | 'mobile' | 'cli' | 'test';
|
|
12
|
+
export type EvidenceEventType =
|
|
13
|
+
| 'model_load'
|
|
14
|
+
| 'model_unload'
|
|
15
|
+
| 'inference'
|
|
16
|
+
| 'tool_call'
|
|
17
|
+
| 'tool_result'
|
|
18
|
+
| 'confirmation'
|
|
19
|
+
| 'error';
|
|
20
|
+
|
|
21
|
+
export interface EvidenceEvent {
|
|
22
|
+
schema: typeof EVIDENCE_SCHEMA;
|
|
23
|
+
event: EvidenceEventType;
|
|
24
|
+
ts: string;
|
|
25
|
+
runId: string;
|
|
26
|
+
surface: EvidenceSurface;
|
|
27
|
+
model?: {
|
|
28
|
+
name: string;
|
|
29
|
+
version?: string;
|
|
30
|
+
source?: 'local' | 'delegated';
|
|
31
|
+
};
|
|
32
|
+
hardware?: {
|
|
33
|
+
device: string;
|
|
34
|
+
os?: string;
|
|
35
|
+
memoryGb?: number;
|
|
36
|
+
};
|
|
37
|
+
prompt?: string;
|
|
38
|
+
response?: string;
|
|
39
|
+
inference?: InferenceMetrics | InferenceMetrics[];
|
|
40
|
+
tool?: {
|
|
41
|
+
name: string;
|
|
42
|
+
arguments?: Record<string, unknown>;
|
|
43
|
+
result?: unknown;
|
|
44
|
+
};
|
|
45
|
+
confirmation?: {
|
|
46
|
+
tool: string;
|
|
47
|
+
approved: boolean;
|
|
48
|
+
reason?: string;
|
|
49
|
+
};
|
|
50
|
+
error?: {
|
|
51
|
+
name: string;
|
|
52
|
+
message: string;
|
|
53
|
+
};
|
|
54
|
+
meta?: Record<string, unknown>;
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
export type EvidenceInput = Omit<EvidenceEvent, 'schema' | 'ts'>;
|
|
58
|
+
|
|
59
|
+
export interface EvidenceIO {
|
|
60
|
+
appendLine(line: string): Promise<void>;
|
|
61
|
+
now(): Date;
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
export interface EvidenceRecorderOptions {
|
|
65
|
+
io: EvidenceIO;
|
|
66
|
+
sanitize?: (event: EvidenceEvent) => EvidenceEvent;
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
export class EvidenceRecorder {
|
|
70
|
+
constructor(private readonly opts: EvidenceRecorderOptions) {}
|
|
71
|
+
|
|
72
|
+
async record(input: EvidenceInput): Promise<EvidenceEvent> {
|
|
73
|
+
let event: EvidenceEvent = {
|
|
74
|
+
...input,
|
|
75
|
+
schema: EVIDENCE_SCHEMA,
|
|
76
|
+
ts: this.opts.io.now().toISOString(),
|
|
77
|
+
};
|
|
78
|
+
event = (this.opts.sanitize ?? sanitizeEvidenceEvent)(event);
|
|
79
|
+
await this.opts.io.appendLine(JSON.stringify(event));
|
|
80
|
+
return event;
|
|
81
|
+
}
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
const SENSITIVE_KEYS = new Set([
|
|
85
|
+
'address',
|
|
86
|
+
'invoice',
|
|
87
|
+
'bolt11',
|
|
88
|
+
'seed',
|
|
89
|
+
'mnemonic',
|
|
90
|
+
'private_key',
|
|
91
|
+
'privateKey',
|
|
92
|
+
'access_token',
|
|
93
|
+
'accessToken',
|
|
94
|
+
'preimage',
|
|
95
|
+
]);
|
|
96
|
+
|
|
97
|
+
const PAYMENT_TOKEN =
|
|
98
|
+
/\b(?:ln(?:bc|tb|bcrt)[0-9a-z]{20,}|(?:bc1|tb1|bcrt1)[0-9a-z]{20,})\b/gi;
|
|
99
|
+
|
|
100
|
+
/** Mask wallet secrets while preserving prompts, model output and benchmark value. */
|
|
101
|
+
export function sanitizeEvidenceEvent(event: EvidenceEvent): EvidenceEvent {
|
|
102
|
+
const walk = (value: unknown, key?: string): unknown => {
|
|
103
|
+
if (key && SENSITIVE_KEYS.has(key)) return '[redacted]';
|
|
104
|
+
if (typeof value === 'string') return value.replace(PAYMENT_TOKEN, '[payment-data-redacted]');
|
|
105
|
+
if (Array.isArray(value)) return value.map((item) => walk(item));
|
|
106
|
+
if (value && typeof value === 'object') {
|
|
107
|
+
return Object.fromEntries(
|
|
108
|
+
Object.entries(value as Record<string, unknown>).map(([k, v]) => [k, walk(v, k)]),
|
|
109
|
+
);
|
|
110
|
+
}
|
|
111
|
+
return value;
|
|
112
|
+
};
|
|
113
|
+
return walk(event) as EvidenceEvent;
|
|
114
|
+
}
|
|
@@ -0,0 +1,101 @@
|
|
|
1
|
+
import { describe, expect, it } from 'vitest';
|
|
2
|
+
import {
|
|
3
|
+
FLASHNET_TOOLS,
|
|
4
|
+
FLASHNET_SPEND_TOOLS,
|
|
5
|
+
isFlashnetSpendTool,
|
|
6
|
+
getFlashnetTool,
|
|
7
|
+
bindFlashnetTools,
|
|
8
|
+
type FlashnetHandler,
|
|
9
|
+
} from './contract.js';
|
|
10
|
+
|
|
11
|
+
describe('FLASHNET_TOOLS — shape invariants', () => {
|
|
12
|
+
it('exposes the expected tool names in order', () => {
|
|
13
|
+
expect(FLASHNET_TOOLS.map((t) => t.name)).toEqual([
|
|
14
|
+
'flashnet_list_pools',
|
|
15
|
+
'flashnet_get_pool',
|
|
16
|
+
'flashnet_simulate_swap',
|
|
17
|
+
'flashnet_execute_swap',
|
|
18
|
+
'flashnet_get_balance',
|
|
19
|
+
]);
|
|
20
|
+
});
|
|
21
|
+
|
|
22
|
+
it('every tool has an object parameters schema', () => {
|
|
23
|
+
for (const t of FLASHNET_TOOLS) {
|
|
24
|
+
expect((t.parameters as any)?.type).toBe('object');
|
|
25
|
+
}
|
|
26
|
+
});
|
|
27
|
+
|
|
28
|
+
it('aligns spend ↔ requiresConfirmation', () => {
|
|
29
|
+
for (const t of FLASHNET_TOOLS) {
|
|
30
|
+
expect(!!t.spend).toBe(!!t.requiresConfirmation);
|
|
31
|
+
}
|
|
32
|
+
});
|
|
33
|
+
|
|
34
|
+
it('marks only flashnet_execute_swap as spend', () => {
|
|
35
|
+
expect([...FLASHNET_SPEND_TOOLS]).toEqual(['flashnet_execute_swap']);
|
|
36
|
+
expect(isFlashnetSpendTool('flashnet_execute_swap')).toBe(true);
|
|
37
|
+
expect(isFlashnetSpendTool('flashnet_simulate_swap')).toBe(false);
|
|
38
|
+
expect(isFlashnetSpendTool('flashnet_list_pools')).toBe(false);
|
|
39
|
+
});
|
|
40
|
+
|
|
41
|
+
it('getFlashnetTool returns by name', () => {
|
|
42
|
+
expect(getFlashnetTool('flashnet_simulate_swap')?.name).toBe('flashnet_simulate_swap');
|
|
43
|
+
expect(getFlashnetTool('nope')).toBeUndefined();
|
|
44
|
+
});
|
|
45
|
+
|
|
46
|
+
it('execute_swap requires the canonical 5 fields', () => {
|
|
47
|
+
const def = getFlashnetTool('flashnet_execute_swap')!;
|
|
48
|
+
expect((def.parameters as any).required).toEqual([
|
|
49
|
+
'pool_id', 'asset_in_address', 'asset_out_address', 'amount_in', 'min_amount_out',
|
|
50
|
+
]);
|
|
51
|
+
});
|
|
52
|
+
|
|
53
|
+
it('simulate_swap requires pool + assets + amount but no slippage', () => {
|
|
54
|
+
const def = getFlashnetTool('flashnet_simulate_swap')!;
|
|
55
|
+
expect((def.parameters as any).required).toEqual([
|
|
56
|
+
'pool_id', 'asset_in_address', 'asset_out_address', 'amount_in',
|
|
57
|
+
]);
|
|
58
|
+
});
|
|
59
|
+
});
|
|
60
|
+
|
|
61
|
+
describe('bindFlashnetTools', () => {
|
|
62
|
+
const echoHandlers = (): Record<string, FlashnetHandler> => ({
|
|
63
|
+
flashnet_list_pools: async (a) => ({ ok: true, t: 'list_pools', args: a }),
|
|
64
|
+
flashnet_get_pool: async (a) => ({ ok: true, t: 'get_pool', args: a }),
|
|
65
|
+
flashnet_simulate_swap: async (a) => ({ ok: true, t: 'simulate_swap', args: a }),
|
|
66
|
+
flashnet_execute_swap: async (a) => ({ ok: true, t: 'execute_swap', args: a }),
|
|
67
|
+
flashnet_get_balance: async () => ({ btc_sats: 100000, tokens: [] }),
|
|
68
|
+
});
|
|
69
|
+
|
|
70
|
+
it('binds every tool and preserves the spend gate', () => {
|
|
71
|
+
const src = bindFlashnetTools(echoHandlers());
|
|
72
|
+
expect(src.listTools().length).toBe(5);
|
|
73
|
+
const exec = src.listTools().find((t) => t.name === 'flashnet_execute_swap');
|
|
74
|
+
expect(exec?.requiresConfirmation).toBe(true);
|
|
75
|
+
const sim = src.listTools().find((t) => t.name === 'flashnet_simulate_swap');
|
|
76
|
+
expect(sim?.requiresConfirmation).toBeFalsy();
|
|
77
|
+
});
|
|
78
|
+
|
|
79
|
+
it('dispatches with args', async () => {
|
|
80
|
+
const src = bindFlashnetTools(echoHandlers());
|
|
81
|
+
const r = await src.execute('flashnet_simulate_swap', {
|
|
82
|
+
pool_id: 'p1',
|
|
83
|
+
asset_in_address: 'btc',
|
|
84
|
+
asset_out_address: 'usdb',
|
|
85
|
+
amount_in: '100000',
|
|
86
|
+
});
|
|
87
|
+
expect(r).toMatchObject({ ok: true, t: 'simulate_swap' });
|
|
88
|
+
});
|
|
89
|
+
|
|
90
|
+
it('throws on a missing handler unless allowMissing', () => {
|
|
91
|
+
const partial = { flashnet_list_pools: echoHandlers().flashnet_list_pools };
|
|
92
|
+
expect(() => bindFlashnetTools(partial)).toThrow(/no handler/);
|
|
93
|
+
const src = bindFlashnetTools(partial, { allowMissing: true });
|
|
94
|
+
expect(src.listTools().map((t) => t.name)).toEqual(['flashnet_list_pools']);
|
|
95
|
+
});
|
|
96
|
+
|
|
97
|
+
it('uses opts.id for the ToolSource id', () => {
|
|
98
|
+
const src = bindFlashnetTools(echoHandlers(), { id: 'flashnet-regtest' });
|
|
99
|
+
expect(src.id).toBe('flashnet-regtest');
|
|
100
|
+
});
|
|
101
|
+
});
|