lazyclaw 3.88.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/logger.mjs ADDED
@@ -0,0 +1,55 @@
1
+ // Tiny structured logger — JSON-line output, level-gated, no transitive deps.
2
+ //
3
+ // Why not pino/winston: a single dep would dwarf the entire CLI. JSON-line
4
+ // is the de-facto observability format (jq-friendly, ingestible by every
5
+ // log shipper) and an 80-line module covers our needs.
6
+ //
7
+ // Levels: debug < info < warn < error. Setting LAZYCLAW_LOG_LEVEL=warn
8
+ // silences info+debug. The default (info) keeps user-meaningful events
9
+ // without per-request noise; the daemon's access log lives at info so
10
+ // it's on by default once the logger is wired.
11
+
12
+ const LEVELS = { debug: 10, info: 20, warn: 30, error: 40 };
13
+
14
+ function levelToNum(name) {
15
+ const n = LEVELS[String(name || '').toLowerCase()];
16
+ return Number.isFinite(n) ? n : LEVELS.info;
17
+ }
18
+
19
+ /**
20
+ * Build a logger. The `sink` callback receives the JSON string per
21
+ * record so tests can capture without monkey-patching process.stderr.
22
+ *
23
+ * @param {{ level?: string, sink?: (line: string) => void, base?: object, now?: () => number }} [opts]
24
+ */
25
+ export function createLogger(opts = {}) {
26
+ const minLevel = levelToNum(opts.level);
27
+ const sink = opts.sink || ((line) => { process.stderr.write(line + '\n'); });
28
+ const now = opts.now || (() => Date.now());
29
+ const base = opts.base || {};
30
+
31
+ const log = (level, msg, fields) => {
32
+ if (LEVELS[level] < minLevel) return;
33
+ const record = {
34
+ ts: new Date(now()).toISOString(),
35
+ level,
36
+ msg,
37
+ ...base,
38
+ ...(fields || {}),
39
+ };
40
+ sink(JSON.stringify(record));
41
+ };
42
+
43
+ return {
44
+ minLevel,
45
+ debug: (msg, fields) => log('debug', msg, fields),
46
+ info: (msg, fields) => log('info', msg, fields),
47
+ warn: (msg, fields) => log('warn', msg, fields),
48
+ error: (msg, fields) => log('error', msg, fields),
49
+ child(extraBase) {
50
+ return createLogger({ ...opts, base: { ...base, ...extraBase } });
51
+ },
52
+ };
53
+ }
54
+
55
+ export { LEVELS, levelToNum };
package/package.json ADDED
@@ -0,0 +1,55 @@
1
+ {
2
+ "name": "lazyclaw",
3
+ "version": "3.88.0",
4
+ "description": "Lazy, elegant terminal CLI for chatting with Claude / OpenAI / Gemini / Ollama and orchestrating multi-step LLM workflows. Banner-on-launch, slash-command ghost autocomplete, persistent sessions, local HTTP gateway.",
5
+ "keywords": [
6
+ "claude",
7
+ "anthropic",
8
+ "openai",
9
+ "gemini",
10
+ "ollama",
11
+ "llm",
12
+ "cli",
13
+ "repl",
14
+ "chatbot",
15
+ "workflow",
16
+ "agent"
17
+ ],
18
+ "homepage": "https://github.com/cmblir/LazyClaude",
19
+ "bugs": {
20
+ "url": "https://github.com/cmblir/LazyClaude/issues"
21
+ },
22
+ "license": "MIT",
23
+ "author": "cmblir",
24
+ "repository": {
25
+ "type": "git",
26
+ "url": "git+https://github.com/cmblir/LazyClaude.git",
27
+ "directory": "src/lazyclaw"
28
+ },
29
+ "type": "module",
30
+ "main": "cli.mjs",
31
+ "bin": {
32
+ "lazyclaw": "cli.mjs"
33
+ },
34
+ "files": [
35
+ "cli.mjs",
36
+ "daemon.mjs",
37
+ "sessions.mjs",
38
+ "skills.mjs",
39
+ "logger.mjs",
40
+ "ratelimit.mjs",
41
+ "config-validate.mjs",
42
+ "rates-validate.mjs",
43
+ "providers/",
44
+ "workflow/",
45
+ "web/",
46
+ "README.md",
47
+ "LICENSE"
48
+ ],
49
+ "engines": {
50
+ "node": ">=18"
51
+ },
52
+ "publishConfig": {
53
+ "access": "public"
54
+ }
55
+ }
@@ -0,0 +1,313 @@
1
+ // Real Anthropic Messages API streaming provider for LazyClaw chat.
2
+ //
3
+ // Why a separate file from registry.mjs:
4
+ // - registry.mjs hosts the *interface* and the offline mock used by the
5
+ // phase 3 acceptance tests. Real network code belongs next to its own
6
+ // unit tests so the mock surface in registry stays trivial.
7
+ //
8
+ // SSE parsing strategy:
9
+ // - The Messages API streams `event: ... \n data: ... \n\n` blocks. We
10
+ // read the body as Uint8Array chunks, accumulate into a buffer, split
11
+ // on the blank-line boundary, and yield the `text_delta` payloads.
12
+ // - We tolerate both a Web ReadableStream body and a Node Readable body
13
+ // (so this works in Node 22+ fetch and in Playwright's injected fetch).
14
+ //
15
+ // Test seam:
16
+ // - opts.fetch overrides globalThis.fetch. The phase 6 test injects a
17
+ // fake fetch returning a hand-rolled SSE ReadableStream. Real code
18
+ // defaults to globalThis.fetch.
19
+
20
+ const ANTHROPIC_VERSION = '2023-06-01';
21
+ const DEFAULT_MAX_TOKENS = 4096;
22
+
23
+ class InvalidApiKeyError extends Error {
24
+ constructor(message = 'invalid x-api-key') {
25
+ super(message);
26
+ this.name = 'InvalidApiKeyError';
27
+ this.code = 'INVALID_KEY';
28
+ }
29
+ }
30
+
31
+ class AbortError extends Error {
32
+ constructor(message = 'aborted') {
33
+ super(message);
34
+ this.name = 'AbortError';
35
+ this.code = 'ABORT';
36
+ }
37
+ }
38
+
39
+ class RateLimitError extends Error {
40
+ constructor(retryAfterMs, body = '') {
41
+ super(`anthropic api 429: rate limited (retry-after ${retryAfterMs}ms)`);
42
+ this.name = 'RateLimitError';
43
+ this.code = 'RATE_LIMIT';
44
+ this.status = 429;
45
+ this.retryAfterMs = retryAfterMs;
46
+ this.body = body;
47
+ }
48
+ }
49
+
50
+ class ApiError extends Error {
51
+ constructor(status, body) {
52
+ super(`anthropic api ${status}: ${body.slice(0, 200)}`);
53
+ this.name = 'AnthropicApiError';
54
+ this.status = status;
55
+ this.body = body;
56
+ }
57
+ }
58
+
59
+ function parseRetryAfterMs(headers) {
60
+ // Headers may be a Headers instance or a plain object. Accept both.
61
+ let raw = null;
62
+ if (headers && typeof headers.get === 'function') raw = headers.get('retry-after') || headers.get('Retry-After');
63
+ else if (headers) raw = headers['retry-after'] || headers['Retry-After'];
64
+ if (!raw) return 1000;
65
+ // Either seconds (e.g. "30") or an HTTP-date (e.g. "Wed, 21 Oct 2026 07:28:00 GMT").
66
+ const asInt = parseInt(String(raw), 10);
67
+ if (!Number.isNaN(asInt)) return Math.max(0, asInt * 1000);
68
+ const date = Date.parse(String(raw));
69
+ if (!Number.isNaN(date)) return Math.max(0, date - Date.now());
70
+ return 1000;
71
+ }
72
+
73
+ async function* iterateBody(body) {
74
+ // Web ReadableStream
75
+ if (body && typeof body.getReader === 'function') {
76
+ const reader = body.getReader();
77
+ while (true) {
78
+ const { value, done } = await reader.read();
79
+ if (done) break;
80
+ if (value) yield value;
81
+ }
82
+ return;
83
+ }
84
+ // Node Readable (async iterator)
85
+ if (body && typeof body[Symbol.asyncIterator] === 'function') {
86
+ for await (const chunk of body) yield chunk;
87
+ return;
88
+ }
89
+ // Already a string / buffer (test convenience)
90
+ if (typeof body === 'string') {
91
+ yield new TextEncoder().encode(body);
92
+ return;
93
+ }
94
+ if (body instanceof Uint8Array) {
95
+ yield body;
96
+ return;
97
+ }
98
+ throw new Error('anthropic: response body is not iterable');
99
+ }
100
+
101
+ function* parseSseFrames(buffer) {
102
+ // Yields { event, data } per complete frame; advances the caller's
103
+ // buffer cursor to the byte right after each consumed frame. We
104
+ // implement this as a generator that returns the leftover buffer too.
105
+ let cursor = 0;
106
+ while (true) {
107
+ const sep = buffer.indexOf('\n\n', cursor);
108
+ if (sep < 0) break;
109
+ const frame = buffer.slice(cursor, sep);
110
+ cursor = sep + 2;
111
+ let event = 'message';
112
+ const dataLines = [];
113
+ for (const line of frame.split('\n')) {
114
+ if (line.startsWith('event:')) event = line.slice(6).trim();
115
+ else if (line.startsWith('data:')) dataLines.push(line.slice(5).trim());
116
+ }
117
+ if (dataLines.length > 0) {
118
+ yield { event, data: dataLines.join('\n'), nextCursor: cursor };
119
+ } else {
120
+ yield { event, data: '', nextCursor: cursor };
121
+ }
122
+ }
123
+ return cursor;
124
+ }
125
+
126
+ export const anthropicProvider = {
127
+ name: 'anthropic',
128
+ /**
129
+ * @param {Array<{role:string,content:string}>} messages
130
+ * @param {{apiKey?:string, model?:string, fetch?:typeof fetch, maxTokens?:number, system?:string}} opts
131
+ */
132
+ async *sendMessage(messages, opts = {}) {
133
+ if (!opts.apiKey) throw new InvalidApiKeyError('missing api key');
134
+ const fetchFn = opts.fetch || globalThis.fetch;
135
+ if (!fetchFn) throw new Error('anthropic: no fetch implementation available');
136
+
137
+ const model = opts.model || 'claude-opus-4-7';
138
+ const apiMessages = messages
139
+ .filter(m => m.role === 'user' || m.role === 'assistant')
140
+ .map(m => ({ role: m.role, content: String(m.content ?? '') }));
141
+
142
+ const body = {
143
+ model,
144
+ max_tokens: opts.maxTokens || DEFAULT_MAX_TOKENS,
145
+ stream: true,
146
+ messages: apiMessages,
147
+ };
148
+ const sys = opts.system || messages.find(m => m.role === 'system')?.content;
149
+ if (sys) {
150
+ // Prompt caching: when opts.cache is truthy, mark the system prompt
151
+ // as ephemeral-cacheable so repeated calls with the same system
152
+ // prefix only pay full input cost once. The Messages API expects
153
+ // an array of text blocks here, so we lift the string into one.
154
+ if (opts.cache) {
155
+ body.system = [{ type: 'text', text: String(sys), cache_control: { type: 'ephemeral' } }];
156
+ } else {
157
+ body.system = sys;
158
+ }
159
+ }
160
+ // Extended thinking. opts.thinking: { enabled?: boolean, budgetTokens?: number }.
161
+ // The thinking field is opt-in; when budget is set we always treat it as enabled
162
+ // because the API rejects budget without a corresponding type.
163
+ if (opts.thinking && (opts.thinking.enabled || opts.thinking.budgetTokens)) {
164
+ body.thinking = {
165
+ type: 'enabled',
166
+ budget_tokens: opts.thinking.budgetTokens || 1024,
167
+ };
168
+ }
169
+ // Tool-use is passthrough only: opts.tools forwards to the request
170
+ // body, but execution is the caller's responsibility. We surface
171
+ // assembled tool_use blocks via opts.onToolUse — the iterator itself
172
+ // continues to yield only text deltas so existing callers don't
173
+ // break.
174
+ if (Array.isArray(opts.tools) && opts.tools.length > 0) {
175
+ body.tools = opts.tools;
176
+ if (opts.toolChoice) body.tool_choice = opts.toolChoice;
177
+ }
178
+
179
+ // Honor opts.signal (AbortSignal) so callers can cancel mid-stream.
180
+ // Both the fetch itself and the body iterator check the signal — fetch
181
+ // for in-flight aborts, the iterator so a cancel between bytes also
182
+ // surfaces immediately rather than waiting for the next chunk.
183
+ if (opts.signal?.aborted) throw new AbortError('aborted before request');
184
+
185
+ const res = await fetchFn('https://api.anthropic.com/v1/messages', {
186
+ method: 'POST',
187
+ headers: {
188
+ 'content-type': 'application/json',
189
+ 'x-api-key': opts.apiKey,
190
+ 'anthropic-version': ANTHROPIC_VERSION,
191
+ },
192
+ body: JSON.stringify(body),
193
+ signal: opts.signal,
194
+ });
195
+
196
+ if (!res.ok) {
197
+ const text = typeof res.text === 'function' ? await res.text() : '';
198
+ if (res.status === 401 || res.status === 403) throw new InvalidApiKeyError(text || 'unauthorized');
199
+ if (res.status === 429) throw new RateLimitError(parseRetryAfterMs(res.headers), text || '');
200
+ throw new ApiError(res.status, text || '');
201
+ }
202
+
203
+ // Stream-mode TextDecoder so UTF-8 sequences split across network
204
+ // chunk boundaries decode correctly. Without {stream:true} a multi-byte
205
+ // codepoint that lands across two reads would surface as U+FFFD.
206
+ const decoder = new TextDecoder('utf-8', { fatal: false });
207
+ let buffer = '';
208
+ // Tool-use blocks are emitted via content_block_start (with the name
209
+ // + tool_use_id) followed by N content_block_delta frames carrying
210
+ // input_json_delta partials. We accumulate per index; at content_block_stop
211
+ // we hand the assembled object to opts.onToolUse.
212
+ const openToolBlocks = new Map();
213
+ // Usage accumulator. The Messages API splits totals across two events:
214
+ // message_start → message.usage.{input_tokens, cache_creation_input_tokens, cache_read_input_tokens}
215
+ // message_delta → usage.output_tokens (final)
216
+ // We collect both and emit a single opts.onUsage call right before
217
+ // we return on message_stop.
218
+ let usage = null;
219
+ for await (const chunk of iterateBody(res.body)) {
220
+ if (opts.signal?.aborted) throw new AbortError('aborted mid-stream');
221
+ buffer += typeof chunk === 'string' ? chunk : decoder.decode(chunk, { stream: true });
222
+ let consumed = 0;
223
+ for (const frame of parseSseFrames(buffer)) {
224
+ consumed = frame.nextCursor;
225
+ if (frame.event === 'message_start' && frame.data) {
226
+ try {
227
+ const obj = JSON.parse(frame.data);
228
+ const u = obj?.message?.usage;
229
+ if (u) {
230
+ usage = {
231
+ inputTokens: u.input_tokens ?? null,
232
+ outputTokens: u.output_tokens ?? null,
233
+ cacheCreationInputTokens: u.cache_creation_input_tokens ?? null,
234
+ cacheReadInputTokens: u.cache_read_input_tokens ?? null,
235
+ };
236
+ }
237
+ } catch { /* skip malformed */ }
238
+ } else if (frame.event === 'message_delta' && frame.data) {
239
+ try {
240
+ const obj = JSON.parse(frame.data);
241
+ const u = obj?.usage;
242
+ if (u && usage) {
243
+ // message_delta carries the final output_tokens — overwrite
244
+ // the input-side initial value with the canonical total.
245
+ if (Number.isFinite(u.output_tokens)) usage.outputTokens = u.output_tokens;
246
+ } else if (u) {
247
+ usage = { inputTokens: null, outputTokens: u.output_tokens ?? null, cacheCreationInputTokens: null, cacheReadInputTokens: null };
248
+ }
249
+ } catch { /* skip malformed */ }
250
+ } else if (frame.event === 'content_block_start' && frame.data) {
251
+ try {
252
+ const obj = JSON.parse(frame.data);
253
+ if (obj?.content_block?.type === 'tool_use') {
254
+ openToolBlocks.set(obj.index, {
255
+ id: obj.content_block.id,
256
+ name: obj.content_block.name,
257
+ inputJson: '',
258
+ });
259
+ }
260
+ } catch { /* skip malformed */ }
261
+ } else if (frame.event === 'content_block_delta' && frame.data) {
262
+ try {
263
+ const obj = JSON.parse(frame.data);
264
+ const delta = obj?.delta || {};
265
+ if (delta.type === 'text_delta' && delta.text) {
266
+ yield delta.text;
267
+ } else if (delta.type === 'thinking_delta' && delta.thinking && typeof opts.onThinking === 'function') {
268
+ try { opts.onThinking(delta.thinking); } catch { /* never let a callback abort the stream */ }
269
+ } else if (delta.type === 'input_json_delta' && typeof delta.partial_json === 'string') {
270
+ const t = openToolBlocks.get(obj.index);
271
+ if (t) t.inputJson += delta.partial_json;
272
+ } else if (delta.text) {
273
+ yield delta.text;
274
+ }
275
+ } catch {
276
+ // Ignore malformed frame; the buffer may still contain valid frames.
277
+ }
278
+ } else if (frame.event === 'content_block_stop' && frame.data) {
279
+ try {
280
+ const obj = JSON.parse(frame.data);
281
+ const t = openToolBlocks.get(obj.index);
282
+ if (t) {
283
+ openToolBlocks.delete(obj.index);
284
+ if (typeof opts.onToolUse === 'function') {
285
+ let input = {};
286
+ try { input = t.inputJson ? JSON.parse(t.inputJson) : {}; }
287
+ catch { /* malformed input → pass empty + raw for caller to inspect */ }
288
+ try { opts.onToolUse({ id: t.id, name: t.name, input, raw: t.inputJson }); }
289
+ catch { /* never let a callback abort the stream */ }
290
+ }
291
+ }
292
+ } catch { /* skip malformed */ }
293
+ } else if (frame.event === 'message_stop') {
294
+ if (usage && typeof opts.onUsage === 'function') {
295
+ try { opts.onUsage(usage); } catch { /* never let a callback abort */ }
296
+ }
297
+ return;
298
+ } else if (frame.event === 'error' && frame.data) {
299
+ let parsed = null;
300
+ try { parsed = JSON.parse(frame.data); } catch { /* keep raw */ }
301
+ const message = parsed?.error?.message || frame.data;
302
+ throw new ApiError(500, message);
303
+ }
304
+ }
305
+ if (consumed > 0) buffer = buffer.slice(consumed);
306
+ }
307
+ // Flush any pending bytes the streaming decoder was still holding.
308
+ const tail = decoder.decode();
309
+ if (tail) buffer += tail;
310
+ },
311
+ };
312
+
313
+ export { InvalidApiKeyError, ApiError, AbortError, RateLimitError };
@@ -0,0 +1,132 @@
1
+ // Response cache decorator for providers — opt-in, in-memory, LRU.
2
+ //
3
+ // Why a decorator and not a per-provider option:
4
+ // - Same reasoning as withRateLimitRetry / withFallback: caching is
5
+ // *policy* (caller decides how aggressive), not transport. The
6
+ // providers themselves stay pure async iterators over a single call.
7
+ // - A single decorator works across every concrete provider — anthropic,
8
+ // openai, ollama, gemini, mock — without each having to grow its own
9
+ // cache machinery.
10
+ //
11
+ // Hashing strategy:
12
+ // - JSON-stringify the messages + model + cache-relevant opts and SHA-256
13
+ // it. We use a stable property order (Object.keys.sort) so that
14
+ // `{a:1,b:2}` and `{b:2,a:1}` hash identically — JSON.stringify alone
15
+ // respects insertion order, which would cause spurious misses.
16
+ // - opts that don't affect the response (signal, fetch, onThinking,
17
+ // onToolUse) are excluded from the hash on purpose.
18
+ //
19
+ // LRU + TTL:
20
+ // - On every hit/miss we touch the entry's recency. Eviction at
21
+ // maxEntries removes the oldest. TTL wins over LRU — an entry
22
+ // past its ttlMs is dropped before being treated as a hit.
23
+ //
24
+ // Streaming semantics:
25
+ // - On a hit, the wrapper replays the cached chunks via the same
26
+ // async-iterable shape callers expect. We don't re-introduce
27
+ // the original delays — the cache is a perf feature.
28
+ // - On a miss, we stream-through (yield each chunk as it arrives)
29
+ // and accumulate into a buffer. The buffer lands in the cache
30
+ // only when the source iterator completes successfully. Errors
31
+ // and aborts mid-stream do not poison the cache.
32
+
33
+ import crypto from 'node:crypto';
34
+
35
+ const DEFAULT_MAX_ENTRIES = 256;
36
+ const DEFAULT_TTL_MS = 60 * 60 * 1000; // 1 hour
37
+
38
+ function stableStringify(value) {
39
+ if (value === null || typeof value !== 'object') return JSON.stringify(value);
40
+ if (Array.isArray(value)) return '[' + value.map(stableStringify).join(',') + ']';
41
+ const keys = Object.keys(value).sort();
42
+ return '{' + keys.map(k => JSON.stringify(k) + ':' + stableStringify(value[k])).join(',') + '}';
43
+ }
44
+
45
+ function hashKey(messages, model, opts) {
46
+ const cacheable = {
47
+ messages,
48
+ model: model || null,
49
+ thinking: opts?.thinking || null,
50
+ system: opts?.system || null,
51
+ tools: opts?.tools || null,
52
+ toolChoice: opts?.toolChoice || null,
53
+ };
54
+ return crypto.createHash('sha256').update(stableStringify(cacheable)).digest('hex');
55
+ }
56
+
57
+ /**
58
+ * @typedef {{ chunks: string[], expiresAt: number }} CacheEntry
59
+ */
60
+
61
+ /**
62
+ * Wrap a provider so identical calls are served from an in-memory cache.
63
+ *
64
+ * @param {{ name: string, sendMessage: Function }} provider
65
+ * @param {{
66
+ * maxEntries?: number,
67
+ * ttlMs?: number,
68
+ * now?: () => number,
69
+ * onHit?: (info: { keyHash: string, size: number }) => void,
70
+ * onMiss?: (info: { keyHash: string }) => void,
71
+ * }} [opts]
72
+ */
73
+ export function withResponseCache(provider, opts = {}) {
74
+ const maxEntries = opts.maxEntries ?? DEFAULT_MAX_ENTRIES;
75
+ const ttlMs = opts.ttlMs ?? DEFAULT_TTL_MS;
76
+ const now = opts.now ?? (() => Date.now());
77
+ const onHit = typeof opts.onHit === 'function' ? opts.onHit : null;
78
+ const onMiss = typeof opts.onMiss === 'function' ? opts.onMiss : null;
79
+ /** @type {Map<string, CacheEntry>} */
80
+ const cache = new Map();
81
+ let hits = 0;
82
+ let misses = 0;
83
+
84
+ const touch = (key, entry) => {
85
+ cache.delete(key);
86
+ cache.set(key, entry);
87
+ };
88
+
89
+ return {
90
+ name: `${provider.name}+cache`,
91
+ /** Inspectable counters — useful for benchmarks and dashboards. */
92
+ cacheStats() { return { hits, misses, size: cache.size, maxEntries }; },
93
+ cacheClear() { cache.clear(); hits = 0; misses = 0; },
94
+ async *sendMessage(messages, sendOpts = {}) {
95
+ const key = hashKey(messages, sendOpts.model, sendOpts);
96
+ const cached = cache.get(key);
97
+ if (cached && cached.expiresAt > now()) {
98
+ hits += 1;
99
+ touch(key, cached);
100
+ if (onHit) { try { onHit({ keyHash: key, size: cache.size }); } catch { /* swallow */ } }
101
+ for (const chunk of cached.chunks) yield chunk;
102
+ return;
103
+ }
104
+ misses += 1;
105
+ if (onMiss) { try { onMiss({ keyHash: key }); } catch { /* swallow */ } }
106
+ // Drop expired entry if we found one
107
+ if (cached) cache.delete(key);
108
+
109
+ const captured = [];
110
+ try {
111
+ for await (const chunk of provider.sendMessage(messages, sendOpts)) {
112
+ captured.push(chunk);
113
+ yield chunk;
114
+ }
115
+ } catch (err) {
116
+ // Don't poison the cache with partial results — a half-stream is
117
+ // useless to a future caller and would surface as a partial reply.
118
+ throw err;
119
+ }
120
+
121
+ // Insert. LRU eviction if we're at the cap.
122
+ while (cache.size >= maxEntries) {
123
+ const oldestKey = cache.keys().next().value;
124
+ if (oldestKey === undefined) break;
125
+ cache.delete(oldestKey);
126
+ }
127
+ cache.set(key, { chunks: captured, expiresAt: now() + ttlMs });
128
+ },
129
+ };
130
+ }
131
+
132
+ export { stableStringify, hashKey };
@@ -0,0 +1,90 @@
1
+ // Auto-fallback wrapper for provider chains.
2
+ //
3
+ // Why a wrapper, not a registry feature:
4
+ // - Fallback policy is caller-specific. The daemon may want
5
+ // "anthropic → openai" while a CLI script wants "ollama only" with
6
+ // no remote fallback at all.
7
+ // - Wrapping keeps each provider self-contained and testable.
8
+ //
9
+ // Strategy:
10
+ // - Try providers in order. The first one that yields any chunk
11
+ // "wins" — we forward its stream and never touch the next one.
12
+ // - Fall through happens when the active provider throws BEFORE
13
+ // yielding any chunk. Once a provider has yielded text, we cannot
14
+ // retry without producing duplicate output, so post-yield errors
15
+ // bubble unchanged. This mirrors `withRateLimitRetry`.
16
+ // - Which errors are recoverable is configurable via `opts.shouldFallback`.
17
+ // Default: every error code except 'INVALID_KEY' (auth errors are
18
+ // usually structural — falling back doesn't fix them, it just delays
19
+ // the diagnosis). Callers can pass their own predicate.
20
+
21
+ const DEFAULT_RECOVERABLE = new Set([
22
+ 'RATE_LIMIT',
23
+ 'CONNECTION_REFUSED',
24
+ // Generic 5xx surfaces as ApiError without a code; we let the default
25
+ // predicate fall back on them based on err.status >= 500.
26
+ ]);
27
+
28
+ function defaultShouldFallback(err) {
29
+ if (!err) return false;
30
+ if (err.code === 'INVALID_KEY' || err.code === 'ABORT') return false;
31
+ if (DEFAULT_RECOVERABLE.has(err.code)) return true;
32
+ // Provider-side ApiError with a 5xx status → fall back.
33
+ if (Number.isFinite(err.status) && err.status >= 500 && err.status < 600) return true;
34
+ // Network-layer fetch failures (no status, no code) → fall back.
35
+ if (!err.code && !err.status) return true;
36
+ return false;
37
+ }
38
+
39
+ /**
40
+ * Wrap a primary provider with a sequence of fallbacks.
41
+ *
42
+ * @param {Array<{ name: string, sendMessage: Function }>} chain
43
+ * Ordered list — the first provider is the primary. Must have at least one entry.
44
+ * @param {{
45
+ * shouldFallback?: (err: Error) => boolean,
46
+ * onFallback?: (info: { from: string, to: string, err: Error }) => void,
47
+ * }} fallbackOpts
48
+ */
49
+ export function withFallback(chain, fallbackOpts = {}) {
50
+ if (!Array.isArray(chain) || chain.length === 0) {
51
+ throw new Error('withFallback: chain must contain at least one provider');
52
+ }
53
+ const shouldFallback = fallbackOpts.shouldFallback || defaultShouldFallback;
54
+ const onFallback = fallbackOpts.onFallback;
55
+
56
+ return {
57
+ name: `fallback(${chain.map(p => p.name).join('→')})`,
58
+ async *sendMessage(messages, opts = {}) {
59
+ let lastErr = null;
60
+ for (let i = 0; i < chain.length; i++) {
61
+ const prov = chain[i];
62
+ let yieldedAny = false;
63
+ try {
64
+ for await (const chunk of prov.sendMessage(messages, opts)) {
65
+ yieldedAny = true;
66
+ yield chunk;
67
+ }
68
+ return;
69
+ } catch (err) {
70
+ lastErr = err;
71
+ // Cannot fall back once we've started yielding — would duplicate text.
72
+ if (yieldedAny) throw err;
73
+ // Last provider in the chain — re-throw, no more fallbacks.
74
+ if (i === chain.length - 1) throw err;
75
+ // Caller-controlled predicate decides what's worth falling back on.
76
+ if (!shouldFallback(err)) throw err;
77
+ if (typeof onFallback === 'function') {
78
+ try { onFallback({ from: prov.name, to: chain[i + 1].name, err }); }
79
+ catch { /* swallow */ }
80
+ }
81
+ }
82
+ }
83
+ // Loop exits only when all providers have been tried; lastErr is set
84
+ // (we wouldn't have entered the catch otherwise).
85
+ throw lastErr;
86
+ },
87
+ };
88
+ }
89
+
90
+ export { defaultShouldFallback };