lazyclaw 3.88.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +186 -0
- package/cli.mjs +2648 -0
- package/config-validate.mjs +61 -0
- package/daemon.mjs +1451 -0
- package/logger.mjs +55 -0
- package/package.json +55 -0
- package/providers/anthropic.mjs +313 -0
- package/providers/cache.mjs +132 -0
- package/providers/fallback.mjs +90 -0
- package/providers/gemini.mjs +187 -0
- package/providers/ollama.mjs +148 -0
- package/providers/openai.mjs +243 -0
- package/providers/rates.mjs +85 -0
- package/providers/registry.mjs +144 -0
- package/providers/retry.mjs +103 -0
- package/ratelimit.mjs +65 -0
- package/rates-validate.mjs +58 -0
- package/sessions.mjs +177 -0
- package/skills.mjs +97 -0
- package/web/server.mjs +33 -0
- package/workflow/executor.mjs +358 -0
- package/workflow/persistent.mjs +369 -0
- package/workflow/summary.mjs +318 -0
package/logger.mjs
ADDED
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
// Tiny structured logger — JSON-line output, level-gated, no transitive deps.
|
|
2
|
+
//
|
|
3
|
+
// Why not pino/winston: a single dep would dwarf the entire CLI. JSON-line
|
|
4
|
+
// is the de-facto observability format (jq-friendly, ingestible by every
|
|
5
|
+
// log shipper) and an 80-line module covers our needs.
|
|
6
|
+
//
|
|
7
|
+
// Levels: debug < info < warn < error. Setting LAZYCLAW_LOG_LEVEL=warn
|
|
8
|
+
// silences info+debug. The default (info) keeps user-meaningful events
|
|
9
|
+
// without per-request noise; the daemon's access log lives at info so
|
|
10
|
+
// it's on by default once the logger is wired.
|
|
11
|
+
|
|
12
|
+
const LEVELS = { debug: 10, info: 20, warn: 30, error: 40 };
|
|
13
|
+
|
|
14
|
+
function levelToNum(name) {
|
|
15
|
+
const n = LEVELS[String(name || '').toLowerCase()];
|
|
16
|
+
return Number.isFinite(n) ? n : LEVELS.info;
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
/**
|
|
20
|
+
* Build a logger. The `sink` callback receives the JSON string per
|
|
21
|
+
* record so tests can capture without monkey-patching process.stderr.
|
|
22
|
+
*
|
|
23
|
+
* @param {{ level?: string, sink?: (line: string) => void, base?: object, now?: () => number }} [opts]
|
|
24
|
+
*/
|
|
25
|
+
export function createLogger(opts = {}) {
|
|
26
|
+
const minLevel = levelToNum(opts.level);
|
|
27
|
+
const sink = opts.sink || ((line) => { process.stderr.write(line + '\n'); });
|
|
28
|
+
const now = opts.now || (() => Date.now());
|
|
29
|
+
const base = opts.base || {};
|
|
30
|
+
|
|
31
|
+
const log = (level, msg, fields) => {
|
|
32
|
+
if (LEVELS[level] < minLevel) return;
|
|
33
|
+
const record = {
|
|
34
|
+
ts: new Date(now()).toISOString(),
|
|
35
|
+
level,
|
|
36
|
+
msg,
|
|
37
|
+
...base,
|
|
38
|
+
...(fields || {}),
|
|
39
|
+
};
|
|
40
|
+
sink(JSON.stringify(record));
|
|
41
|
+
};
|
|
42
|
+
|
|
43
|
+
return {
|
|
44
|
+
minLevel,
|
|
45
|
+
debug: (msg, fields) => log('debug', msg, fields),
|
|
46
|
+
info: (msg, fields) => log('info', msg, fields),
|
|
47
|
+
warn: (msg, fields) => log('warn', msg, fields),
|
|
48
|
+
error: (msg, fields) => log('error', msg, fields),
|
|
49
|
+
child(extraBase) {
|
|
50
|
+
return createLogger({ ...opts, base: { ...base, ...extraBase } });
|
|
51
|
+
},
|
|
52
|
+
};
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
export { LEVELS, levelToNum };
|
package/package.json
ADDED
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "lazyclaw",
|
|
3
|
+
"version": "3.88.0",
|
|
4
|
+
"description": "Lazy, elegant terminal CLI for chatting with Claude / OpenAI / Gemini / Ollama and orchestrating multi-step LLM workflows. Banner-on-launch, slash-command ghost autocomplete, persistent sessions, local HTTP gateway.",
|
|
5
|
+
"keywords": [
|
|
6
|
+
"claude",
|
|
7
|
+
"anthropic",
|
|
8
|
+
"openai",
|
|
9
|
+
"gemini",
|
|
10
|
+
"ollama",
|
|
11
|
+
"llm",
|
|
12
|
+
"cli",
|
|
13
|
+
"repl",
|
|
14
|
+
"chatbot",
|
|
15
|
+
"workflow",
|
|
16
|
+
"agent"
|
|
17
|
+
],
|
|
18
|
+
"homepage": "https://github.com/cmblir/LazyClaude",
|
|
19
|
+
"bugs": {
|
|
20
|
+
"url": "https://github.com/cmblir/LazyClaude/issues"
|
|
21
|
+
},
|
|
22
|
+
"license": "MIT",
|
|
23
|
+
"author": "cmblir",
|
|
24
|
+
"repository": {
|
|
25
|
+
"type": "git",
|
|
26
|
+
"url": "git+https://github.com/cmblir/LazyClaude.git",
|
|
27
|
+
"directory": "src/lazyclaw"
|
|
28
|
+
},
|
|
29
|
+
"type": "module",
|
|
30
|
+
"main": "cli.mjs",
|
|
31
|
+
"bin": {
|
|
32
|
+
"lazyclaw": "cli.mjs"
|
|
33
|
+
},
|
|
34
|
+
"files": [
|
|
35
|
+
"cli.mjs",
|
|
36
|
+
"daemon.mjs",
|
|
37
|
+
"sessions.mjs",
|
|
38
|
+
"skills.mjs",
|
|
39
|
+
"logger.mjs",
|
|
40
|
+
"ratelimit.mjs",
|
|
41
|
+
"config-validate.mjs",
|
|
42
|
+
"rates-validate.mjs",
|
|
43
|
+
"providers/",
|
|
44
|
+
"workflow/",
|
|
45
|
+
"web/",
|
|
46
|
+
"README.md",
|
|
47
|
+
"LICENSE"
|
|
48
|
+
],
|
|
49
|
+
"engines": {
|
|
50
|
+
"node": ">=18"
|
|
51
|
+
},
|
|
52
|
+
"publishConfig": {
|
|
53
|
+
"access": "public"
|
|
54
|
+
}
|
|
55
|
+
}
|
|
@@ -0,0 +1,313 @@
|
|
|
1
|
+
// Real Anthropic Messages API streaming provider for LazyClaw chat.
|
|
2
|
+
//
|
|
3
|
+
// Why a separate file from registry.mjs:
|
|
4
|
+
// - registry.mjs hosts the *interface* and the offline mock used by the
|
|
5
|
+
// phase 3 acceptance tests. Real network code belongs next to its own
|
|
6
|
+
// unit tests so the mock surface in registry stays trivial.
|
|
7
|
+
//
|
|
8
|
+
// SSE parsing strategy:
|
|
9
|
+
// - The Messages API streams `event: ... \n data: ... \n\n` blocks. We
|
|
10
|
+
// read the body as Uint8Array chunks, accumulate into a buffer, split
|
|
11
|
+
// on the blank-line boundary, and yield the `text_delta` payloads.
|
|
12
|
+
// - We tolerate both a Web ReadableStream body and a Node Readable body
|
|
13
|
+
// (so this works in Node 22+ fetch and in Playwright's injected fetch).
|
|
14
|
+
//
|
|
15
|
+
// Test seam:
|
|
16
|
+
// - opts.fetch overrides globalThis.fetch. The phase 6 test injects a
|
|
17
|
+
// fake fetch returning a hand-rolled SSE ReadableStream. Real code
|
|
18
|
+
// defaults to globalThis.fetch.
|
|
19
|
+
|
|
20
|
+
const ANTHROPIC_VERSION = '2023-06-01';
|
|
21
|
+
const DEFAULT_MAX_TOKENS = 4096;
|
|
22
|
+
|
|
23
|
+
class InvalidApiKeyError extends Error {
|
|
24
|
+
constructor(message = 'invalid x-api-key') {
|
|
25
|
+
super(message);
|
|
26
|
+
this.name = 'InvalidApiKeyError';
|
|
27
|
+
this.code = 'INVALID_KEY';
|
|
28
|
+
}
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
class AbortError extends Error {
|
|
32
|
+
constructor(message = 'aborted') {
|
|
33
|
+
super(message);
|
|
34
|
+
this.name = 'AbortError';
|
|
35
|
+
this.code = 'ABORT';
|
|
36
|
+
}
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
class RateLimitError extends Error {
|
|
40
|
+
constructor(retryAfterMs, body = '') {
|
|
41
|
+
super(`anthropic api 429: rate limited (retry-after ${retryAfterMs}ms)`);
|
|
42
|
+
this.name = 'RateLimitError';
|
|
43
|
+
this.code = 'RATE_LIMIT';
|
|
44
|
+
this.status = 429;
|
|
45
|
+
this.retryAfterMs = retryAfterMs;
|
|
46
|
+
this.body = body;
|
|
47
|
+
}
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
class ApiError extends Error {
|
|
51
|
+
constructor(status, body) {
|
|
52
|
+
super(`anthropic api ${status}: ${body.slice(0, 200)}`);
|
|
53
|
+
this.name = 'AnthropicApiError';
|
|
54
|
+
this.status = status;
|
|
55
|
+
this.body = body;
|
|
56
|
+
}
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
function parseRetryAfterMs(headers) {
|
|
60
|
+
// Headers may be a Headers instance or a plain object. Accept both.
|
|
61
|
+
let raw = null;
|
|
62
|
+
if (headers && typeof headers.get === 'function') raw = headers.get('retry-after') || headers.get('Retry-After');
|
|
63
|
+
else if (headers) raw = headers['retry-after'] || headers['Retry-After'];
|
|
64
|
+
if (!raw) return 1000;
|
|
65
|
+
// Either seconds (e.g. "30") or an HTTP-date (e.g. "Wed, 21 Oct 2026 07:28:00 GMT").
|
|
66
|
+
const asInt = parseInt(String(raw), 10);
|
|
67
|
+
if (!Number.isNaN(asInt)) return Math.max(0, asInt * 1000);
|
|
68
|
+
const date = Date.parse(String(raw));
|
|
69
|
+
if (!Number.isNaN(date)) return Math.max(0, date - Date.now());
|
|
70
|
+
return 1000;
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
async function* iterateBody(body) {
|
|
74
|
+
// Web ReadableStream
|
|
75
|
+
if (body && typeof body.getReader === 'function') {
|
|
76
|
+
const reader = body.getReader();
|
|
77
|
+
while (true) {
|
|
78
|
+
const { value, done } = await reader.read();
|
|
79
|
+
if (done) break;
|
|
80
|
+
if (value) yield value;
|
|
81
|
+
}
|
|
82
|
+
return;
|
|
83
|
+
}
|
|
84
|
+
// Node Readable (async iterator)
|
|
85
|
+
if (body && typeof body[Symbol.asyncIterator] === 'function') {
|
|
86
|
+
for await (const chunk of body) yield chunk;
|
|
87
|
+
return;
|
|
88
|
+
}
|
|
89
|
+
// Already a string / buffer (test convenience)
|
|
90
|
+
if (typeof body === 'string') {
|
|
91
|
+
yield new TextEncoder().encode(body);
|
|
92
|
+
return;
|
|
93
|
+
}
|
|
94
|
+
if (body instanceof Uint8Array) {
|
|
95
|
+
yield body;
|
|
96
|
+
return;
|
|
97
|
+
}
|
|
98
|
+
throw new Error('anthropic: response body is not iterable');
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
function* parseSseFrames(buffer) {
|
|
102
|
+
// Yields { event, data } per complete frame; advances the caller's
|
|
103
|
+
// buffer cursor to the byte right after each consumed frame. We
|
|
104
|
+
// implement this as a generator that returns the leftover buffer too.
|
|
105
|
+
let cursor = 0;
|
|
106
|
+
while (true) {
|
|
107
|
+
const sep = buffer.indexOf('\n\n', cursor);
|
|
108
|
+
if (sep < 0) break;
|
|
109
|
+
const frame = buffer.slice(cursor, sep);
|
|
110
|
+
cursor = sep + 2;
|
|
111
|
+
let event = 'message';
|
|
112
|
+
const dataLines = [];
|
|
113
|
+
for (const line of frame.split('\n')) {
|
|
114
|
+
if (line.startsWith('event:')) event = line.slice(6).trim();
|
|
115
|
+
else if (line.startsWith('data:')) dataLines.push(line.slice(5).trim());
|
|
116
|
+
}
|
|
117
|
+
if (dataLines.length > 0) {
|
|
118
|
+
yield { event, data: dataLines.join('\n'), nextCursor: cursor };
|
|
119
|
+
} else {
|
|
120
|
+
yield { event, data: '', nextCursor: cursor };
|
|
121
|
+
}
|
|
122
|
+
}
|
|
123
|
+
return cursor;
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
export const anthropicProvider = {
|
|
127
|
+
name: 'anthropic',
|
|
128
|
+
/**
|
|
129
|
+
* @param {Array<{role:string,content:string}>} messages
|
|
130
|
+
* @param {{apiKey?:string, model?:string, fetch?:typeof fetch, maxTokens?:number, system?:string}} opts
|
|
131
|
+
*/
|
|
132
|
+
async *sendMessage(messages, opts = {}) {
|
|
133
|
+
if (!opts.apiKey) throw new InvalidApiKeyError('missing api key');
|
|
134
|
+
const fetchFn = opts.fetch || globalThis.fetch;
|
|
135
|
+
if (!fetchFn) throw new Error('anthropic: no fetch implementation available');
|
|
136
|
+
|
|
137
|
+
const model = opts.model || 'claude-opus-4-7';
|
|
138
|
+
const apiMessages = messages
|
|
139
|
+
.filter(m => m.role === 'user' || m.role === 'assistant')
|
|
140
|
+
.map(m => ({ role: m.role, content: String(m.content ?? '') }));
|
|
141
|
+
|
|
142
|
+
const body = {
|
|
143
|
+
model,
|
|
144
|
+
max_tokens: opts.maxTokens || DEFAULT_MAX_TOKENS,
|
|
145
|
+
stream: true,
|
|
146
|
+
messages: apiMessages,
|
|
147
|
+
};
|
|
148
|
+
const sys = opts.system || messages.find(m => m.role === 'system')?.content;
|
|
149
|
+
if (sys) {
|
|
150
|
+
// Prompt caching: when opts.cache is truthy, mark the system prompt
|
|
151
|
+
// as ephemeral-cacheable so repeated calls with the same system
|
|
152
|
+
// prefix only pay full input cost once. The Messages API expects
|
|
153
|
+
// an array of text blocks here, so we lift the string into one.
|
|
154
|
+
if (opts.cache) {
|
|
155
|
+
body.system = [{ type: 'text', text: String(sys), cache_control: { type: 'ephemeral' } }];
|
|
156
|
+
} else {
|
|
157
|
+
body.system = sys;
|
|
158
|
+
}
|
|
159
|
+
}
|
|
160
|
+
// Extended thinking. opts.thinking: { enabled?: boolean, budgetTokens?: number }.
|
|
161
|
+
// The thinking field is opt-in; when budget is set we always treat it as enabled
|
|
162
|
+
// because the API rejects budget without a corresponding type.
|
|
163
|
+
if (opts.thinking && (opts.thinking.enabled || opts.thinking.budgetTokens)) {
|
|
164
|
+
body.thinking = {
|
|
165
|
+
type: 'enabled',
|
|
166
|
+
budget_tokens: opts.thinking.budgetTokens || 1024,
|
|
167
|
+
};
|
|
168
|
+
}
|
|
169
|
+
// Tool-use is passthrough only: opts.tools forwards to the request
|
|
170
|
+
// body, but execution is the caller's responsibility. We surface
|
|
171
|
+
// assembled tool_use blocks via opts.onToolUse — the iterator itself
|
|
172
|
+
// continues to yield only text deltas so existing callers don't
|
|
173
|
+
// break.
|
|
174
|
+
if (Array.isArray(opts.tools) && opts.tools.length > 0) {
|
|
175
|
+
body.tools = opts.tools;
|
|
176
|
+
if (opts.toolChoice) body.tool_choice = opts.toolChoice;
|
|
177
|
+
}
|
|
178
|
+
|
|
179
|
+
// Honor opts.signal (AbortSignal) so callers can cancel mid-stream.
|
|
180
|
+
// Both the fetch itself and the body iterator check the signal — fetch
|
|
181
|
+
// for in-flight aborts, the iterator so a cancel between bytes also
|
|
182
|
+
// surfaces immediately rather than waiting for the next chunk.
|
|
183
|
+
if (opts.signal?.aborted) throw new AbortError('aborted before request');
|
|
184
|
+
|
|
185
|
+
const res = await fetchFn('https://api.anthropic.com/v1/messages', {
|
|
186
|
+
method: 'POST',
|
|
187
|
+
headers: {
|
|
188
|
+
'content-type': 'application/json',
|
|
189
|
+
'x-api-key': opts.apiKey,
|
|
190
|
+
'anthropic-version': ANTHROPIC_VERSION,
|
|
191
|
+
},
|
|
192
|
+
body: JSON.stringify(body),
|
|
193
|
+
signal: opts.signal,
|
|
194
|
+
});
|
|
195
|
+
|
|
196
|
+
if (!res.ok) {
|
|
197
|
+
const text = typeof res.text === 'function' ? await res.text() : '';
|
|
198
|
+
if (res.status === 401 || res.status === 403) throw new InvalidApiKeyError(text || 'unauthorized');
|
|
199
|
+
if (res.status === 429) throw new RateLimitError(parseRetryAfterMs(res.headers), text || '');
|
|
200
|
+
throw new ApiError(res.status, text || '');
|
|
201
|
+
}
|
|
202
|
+
|
|
203
|
+
// Stream-mode TextDecoder so UTF-8 sequences split across network
|
|
204
|
+
// chunk boundaries decode correctly. Without {stream:true} a multi-byte
|
|
205
|
+
// codepoint that lands across two reads would surface as U+FFFD.
|
|
206
|
+
const decoder = new TextDecoder('utf-8', { fatal: false });
|
|
207
|
+
let buffer = '';
|
|
208
|
+
// Tool-use blocks are emitted via content_block_start (with the name
|
|
209
|
+
// + tool_use_id) followed by N content_block_delta frames carrying
|
|
210
|
+
// input_json_delta partials. We accumulate per index; at content_block_stop
|
|
211
|
+
// we hand the assembled object to opts.onToolUse.
|
|
212
|
+
const openToolBlocks = new Map();
|
|
213
|
+
// Usage accumulator. The Messages API splits totals across two events:
|
|
214
|
+
// message_start → message.usage.{input_tokens, cache_creation_input_tokens, cache_read_input_tokens}
|
|
215
|
+
// message_delta → usage.output_tokens (final)
|
|
216
|
+
// We collect both and emit a single opts.onUsage call right before
|
|
217
|
+
// we return on message_stop.
|
|
218
|
+
let usage = null;
|
|
219
|
+
for await (const chunk of iterateBody(res.body)) {
|
|
220
|
+
if (opts.signal?.aborted) throw new AbortError('aborted mid-stream');
|
|
221
|
+
buffer += typeof chunk === 'string' ? chunk : decoder.decode(chunk, { stream: true });
|
|
222
|
+
let consumed = 0;
|
|
223
|
+
for (const frame of parseSseFrames(buffer)) {
|
|
224
|
+
consumed = frame.nextCursor;
|
|
225
|
+
if (frame.event === 'message_start' && frame.data) {
|
|
226
|
+
try {
|
|
227
|
+
const obj = JSON.parse(frame.data);
|
|
228
|
+
const u = obj?.message?.usage;
|
|
229
|
+
if (u) {
|
|
230
|
+
usage = {
|
|
231
|
+
inputTokens: u.input_tokens ?? null,
|
|
232
|
+
outputTokens: u.output_tokens ?? null,
|
|
233
|
+
cacheCreationInputTokens: u.cache_creation_input_tokens ?? null,
|
|
234
|
+
cacheReadInputTokens: u.cache_read_input_tokens ?? null,
|
|
235
|
+
};
|
|
236
|
+
}
|
|
237
|
+
} catch { /* skip malformed */ }
|
|
238
|
+
} else if (frame.event === 'message_delta' && frame.data) {
|
|
239
|
+
try {
|
|
240
|
+
const obj = JSON.parse(frame.data);
|
|
241
|
+
const u = obj?.usage;
|
|
242
|
+
if (u && usage) {
|
|
243
|
+
// message_delta carries the final output_tokens — overwrite
|
|
244
|
+
// the input-side initial value with the canonical total.
|
|
245
|
+
if (Number.isFinite(u.output_tokens)) usage.outputTokens = u.output_tokens;
|
|
246
|
+
} else if (u) {
|
|
247
|
+
usage = { inputTokens: null, outputTokens: u.output_tokens ?? null, cacheCreationInputTokens: null, cacheReadInputTokens: null };
|
|
248
|
+
}
|
|
249
|
+
} catch { /* skip malformed */ }
|
|
250
|
+
} else if (frame.event === 'content_block_start' && frame.data) {
|
|
251
|
+
try {
|
|
252
|
+
const obj = JSON.parse(frame.data);
|
|
253
|
+
if (obj?.content_block?.type === 'tool_use') {
|
|
254
|
+
openToolBlocks.set(obj.index, {
|
|
255
|
+
id: obj.content_block.id,
|
|
256
|
+
name: obj.content_block.name,
|
|
257
|
+
inputJson: '',
|
|
258
|
+
});
|
|
259
|
+
}
|
|
260
|
+
} catch { /* skip malformed */ }
|
|
261
|
+
} else if (frame.event === 'content_block_delta' && frame.data) {
|
|
262
|
+
try {
|
|
263
|
+
const obj = JSON.parse(frame.data);
|
|
264
|
+
const delta = obj?.delta || {};
|
|
265
|
+
if (delta.type === 'text_delta' && delta.text) {
|
|
266
|
+
yield delta.text;
|
|
267
|
+
} else if (delta.type === 'thinking_delta' && delta.thinking && typeof opts.onThinking === 'function') {
|
|
268
|
+
try { opts.onThinking(delta.thinking); } catch { /* never let a callback abort the stream */ }
|
|
269
|
+
} else if (delta.type === 'input_json_delta' && typeof delta.partial_json === 'string') {
|
|
270
|
+
const t = openToolBlocks.get(obj.index);
|
|
271
|
+
if (t) t.inputJson += delta.partial_json;
|
|
272
|
+
} else if (delta.text) {
|
|
273
|
+
yield delta.text;
|
|
274
|
+
}
|
|
275
|
+
} catch {
|
|
276
|
+
// Ignore malformed frame; the buffer may still contain valid frames.
|
|
277
|
+
}
|
|
278
|
+
} else if (frame.event === 'content_block_stop' && frame.data) {
|
|
279
|
+
try {
|
|
280
|
+
const obj = JSON.parse(frame.data);
|
|
281
|
+
const t = openToolBlocks.get(obj.index);
|
|
282
|
+
if (t) {
|
|
283
|
+
openToolBlocks.delete(obj.index);
|
|
284
|
+
if (typeof opts.onToolUse === 'function') {
|
|
285
|
+
let input = {};
|
|
286
|
+
try { input = t.inputJson ? JSON.parse(t.inputJson) : {}; }
|
|
287
|
+
catch { /* malformed input → pass empty + raw for caller to inspect */ }
|
|
288
|
+
try { opts.onToolUse({ id: t.id, name: t.name, input, raw: t.inputJson }); }
|
|
289
|
+
catch { /* never let a callback abort the stream */ }
|
|
290
|
+
}
|
|
291
|
+
}
|
|
292
|
+
} catch { /* skip malformed */ }
|
|
293
|
+
} else if (frame.event === 'message_stop') {
|
|
294
|
+
if (usage && typeof opts.onUsage === 'function') {
|
|
295
|
+
try { opts.onUsage(usage); } catch { /* never let a callback abort */ }
|
|
296
|
+
}
|
|
297
|
+
return;
|
|
298
|
+
} else if (frame.event === 'error' && frame.data) {
|
|
299
|
+
let parsed = null;
|
|
300
|
+
try { parsed = JSON.parse(frame.data); } catch { /* keep raw */ }
|
|
301
|
+
const message = parsed?.error?.message || frame.data;
|
|
302
|
+
throw new ApiError(500, message);
|
|
303
|
+
}
|
|
304
|
+
}
|
|
305
|
+
if (consumed > 0) buffer = buffer.slice(consumed);
|
|
306
|
+
}
|
|
307
|
+
// Flush any pending bytes the streaming decoder was still holding.
|
|
308
|
+
const tail = decoder.decode();
|
|
309
|
+
if (tail) buffer += tail;
|
|
310
|
+
},
|
|
311
|
+
};
|
|
312
|
+
|
|
313
|
+
export { InvalidApiKeyError, ApiError, AbortError, RateLimitError };
|
|
@@ -0,0 +1,132 @@
|
|
|
1
|
+
// Response cache decorator for providers — opt-in, in-memory, LRU.
|
|
2
|
+
//
|
|
3
|
+
// Why a decorator and not a per-provider option:
|
|
4
|
+
// - Same reasoning as withRateLimitRetry / withFallback: caching is
|
|
5
|
+
// *policy* (caller decides how aggressive), not transport. The
|
|
6
|
+
// providers themselves stay pure async iterators over a single call.
|
|
7
|
+
// - A single decorator works across every concrete provider — anthropic,
|
|
8
|
+
// openai, ollama, gemini, mock — without each having to grow its own
|
|
9
|
+
// cache machinery.
|
|
10
|
+
//
|
|
11
|
+
// Hashing strategy:
|
|
12
|
+
// - JSON-stringify the messages + model + cache-relevant opts and SHA-256
|
|
13
|
+
// it. We use a stable property order (Object.keys.sort) so that
|
|
14
|
+
// `{a:1,b:2}` and `{b:2,a:1}` hash identically — JSON.stringify alone
|
|
15
|
+
// respects insertion order, which would cause spurious misses.
|
|
16
|
+
// - opts that don't affect the response (signal, fetch, onThinking,
|
|
17
|
+
// onToolUse) are excluded from the hash on purpose.
|
|
18
|
+
//
|
|
19
|
+
// LRU + TTL:
|
|
20
|
+
// - On every hit/miss we touch the entry's recency. Eviction at
|
|
21
|
+
// maxEntries removes the oldest. TTL wins over LRU — an entry
|
|
22
|
+
// past its ttlMs is dropped before being treated as a hit.
|
|
23
|
+
//
|
|
24
|
+
// Streaming semantics:
|
|
25
|
+
// - On a hit, the wrapper replays the cached chunks via the same
|
|
26
|
+
// async-iterable shape callers expect. We don't re-introduce
|
|
27
|
+
// the original delays — the cache is a perf feature.
|
|
28
|
+
// - On a miss, we stream-through (yield each chunk as it arrives)
|
|
29
|
+
// and accumulate into a buffer. The buffer lands in the cache
|
|
30
|
+
// only when the source iterator completes successfully. Errors
|
|
31
|
+
// and aborts mid-stream do not poison the cache.
|
|
32
|
+
|
|
33
|
+
import crypto from 'node:crypto';
|
|
34
|
+
|
|
35
|
+
const DEFAULT_MAX_ENTRIES = 256;
|
|
36
|
+
const DEFAULT_TTL_MS = 60 * 60 * 1000; // 1 hour
|
|
37
|
+
|
|
38
|
+
function stableStringify(value) {
|
|
39
|
+
if (value === null || typeof value !== 'object') return JSON.stringify(value);
|
|
40
|
+
if (Array.isArray(value)) return '[' + value.map(stableStringify).join(',') + ']';
|
|
41
|
+
const keys = Object.keys(value).sort();
|
|
42
|
+
return '{' + keys.map(k => JSON.stringify(k) + ':' + stableStringify(value[k])).join(',') + '}';
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
function hashKey(messages, model, opts) {
|
|
46
|
+
const cacheable = {
|
|
47
|
+
messages,
|
|
48
|
+
model: model || null,
|
|
49
|
+
thinking: opts?.thinking || null,
|
|
50
|
+
system: opts?.system || null,
|
|
51
|
+
tools: opts?.tools || null,
|
|
52
|
+
toolChoice: opts?.toolChoice || null,
|
|
53
|
+
};
|
|
54
|
+
return crypto.createHash('sha256').update(stableStringify(cacheable)).digest('hex');
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
/**
|
|
58
|
+
* @typedef {{ chunks: string[], expiresAt: number }} CacheEntry
|
|
59
|
+
*/
|
|
60
|
+
|
|
61
|
+
/**
|
|
62
|
+
* Wrap a provider so identical calls are served from an in-memory cache.
|
|
63
|
+
*
|
|
64
|
+
* @param {{ name: string, sendMessage: Function }} provider
|
|
65
|
+
* @param {{
|
|
66
|
+
* maxEntries?: number,
|
|
67
|
+
* ttlMs?: number,
|
|
68
|
+
* now?: () => number,
|
|
69
|
+
* onHit?: (info: { keyHash: string, size: number }) => void,
|
|
70
|
+
* onMiss?: (info: { keyHash: string }) => void,
|
|
71
|
+
* }} [opts]
|
|
72
|
+
*/
|
|
73
|
+
export function withResponseCache(provider, opts = {}) {
|
|
74
|
+
const maxEntries = opts.maxEntries ?? DEFAULT_MAX_ENTRIES;
|
|
75
|
+
const ttlMs = opts.ttlMs ?? DEFAULT_TTL_MS;
|
|
76
|
+
const now = opts.now ?? (() => Date.now());
|
|
77
|
+
const onHit = typeof opts.onHit === 'function' ? opts.onHit : null;
|
|
78
|
+
const onMiss = typeof opts.onMiss === 'function' ? opts.onMiss : null;
|
|
79
|
+
/** @type {Map<string, CacheEntry>} */
|
|
80
|
+
const cache = new Map();
|
|
81
|
+
let hits = 0;
|
|
82
|
+
let misses = 0;
|
|
83
|
+
|
|
84
|
+
const touch = (key, entry) => {
|
|
85
|
+
cache.delete(key);
|
|
86
|
+
cache.set(key, entry);
|
|
87
|
+
};
|
|
88
|
+
|
|
89
|
+
return {
|
|
90
|
+
name: `${provider.name}+cache`,
|
|
91
|
+
/** Inspectable counters — useful for benchmarks and dashboards. */
|
|
92
|
+
cacheStats() { return { hits, misses, size: cache.size, maxEntries }; },
|
|
93
|
+
cacheClear() { cache.clear(); hits = 0; misses = 0; },
|
|
94
|
+
async *sendMessage(messages, sendOpts = {}) {
|
|
95
|
+
const key = hashKey(messages, sendOpts.model, sendOpts);
|
|
96
|
+
const cached = cache.get(key);
|
|
97
|
+
if (cached && cached.expiresAt > now()) {
|
|
98
|
+
hits += 1;
|
|
99
|
+
touch(key, cached);
|
|
100
|
+
if (onHit) { try { onHit({ keyHash: key, size: cache.size }); } catch { /* swallow */ } }
|
|
101
|
+
for (const chunk of cached.chunks) yield chunk;
|
|
102
|
+
return;
|
|
103
|
+
}
|
|
104
|
+
misses += 1;
|
|
105
|
+
if (onMiss) { try { onMiss({ keyHash: key }); } catch { /* swallow */ } }
|
|
106
|
+
// Drop expired entry if we found one
|
|
107
|
+
if (cached) cache.delete(key);
|
|
108
|
+
|
|
109
|
+
const captured = [];
|
|
110
|
+
try {
|
|
111
|
+
for await (const chunk of provider.sendMessage(messages, sendOpts)) {
|
|
112
|
+
captured.push(chunk);
|
|
113
|
+
yield chunk;
|
|
114
|
+
}
|
|
115
|
+
} catch (err) {
|
|
116
|
+
// Don't poison the cache with partial results — a half-stream is
|
|
117
|
+
// useless to a future caller and would surface as a partial reply.
|
|
118
|
+
throw err;
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
// Insert. LRU eviction if we're at the cap.
|
|
122
|
+
while (cache.size >= maxEntries) {
|
|
123
|
+
const oldestKey = cache.keys().next().value;
|
|
124
|
+
if (oldestKey === undefined) break;
|
|
125
|
+
cache.delete(oldestKey);
|
|
126
|
+
}
|
|
127
|
+
cache.set(key, { chunks: captured, expiresAt: now() + ttlMs });
|
|
128
|
+
},
|
|
129
|
+
};
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
export { stableStringify, hashKey };
|
|
@@ -0,0 +1,90 @@
|
|
|
1
|
+
// Auto-fallback wrapper for provider chains.
|
|
2
|
+
//
|
|
3
|
+
// Why a wrapper, not a registry feature:
|
|
4
|
+
// - Fallback policy is caller-specific. The daemon may want
|
|
5
|
+
// "anthropic → openai" while a CLI script wants "ollama only" with
|
|
6
|
+
// no remote fallback at all.
|
|
7
|
+
// - Wrapping keeps each provider self-contained and testable.
|
|
8
|
+
//
|
|
9
|
+
// Strategy:
|
|
10
|
+
// - Try providers in order. The first one that yields any chunk
|
|
11
|
+
// "wins" — we forward its stream and never touch the next one.
|
|
12
|
+
// - Fall through happens when the active provider throws BEFORE
|
|
13
|
+
// yielding any chunk. Once a provider has yielded text, we cannot
|
|
14
|
+
// retry without producing duplicate output, so post-yield errors
|
|
15
|
+
// bubble unchanged. This mirrors `withRateLimitRetry`.
|
|
16
|
+
// - Which errors are recoverable is configurable via `opts.shouldFallback`.
|
|
17
|
+
// Default: every error code except 'INVALID_KEY' (auth errors are
|
|
18
|
+
// usually structural — falling back doesn't fix them, it just delays
|
|
19
|
+
// the diagnosis). Callers can pass their own predicate.
|
|
20
|
+
|
|
21
|
+
const DEFAULT_RECOVERABLE = new Set([
|
|
22
|
+
'RATE_LIMIT',
|
|
23
|
+
'CONNECTION_REFUSED',
|
|
24
|
+
// Generic 5xx surfaces as ApiError without a code; we let the default
|
|
25
|
+
// predicate fall back on them based on err.status >= 500.
|
|
26
|
+
]);
|
|
27
|
+
|
|
28
|
+
function defaultShouldFallback(err) {
|
|
29
|
+
if (!err) return false;
|
|
30
|
+
if (err.code === 'INVALID_KEY' || err.code === 'ABORT') return false;
|
|
31
|
+
if (DEFAULT_RECOVERABLE.has(err.code)) return true;
|
|
32
|
+
// Provider-side ApiError with a 5xx status → fall back.
|
|
33
|
+
if (Number.isFinite(err.status) && err.status >= 500 && err.status < 600) return true;
|
|
34
|
+
// Network-layer fetch failures (no status, no code) → fall back.
|
|
35
|
+
if (!err.code && !err.status) return true;
|
|
36
|
+
return false;
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
/**
|
|
40
|
+
* Wrap a primary provider with a sequence of fallbacks.
|
|
41
|
+
*
|
|
42
|
+
* @param {Array<{ name: string, sendMessage: Function }>} chain
|
|
43
|
+
* Ordered list — the first provider is the primary. Must have at least one entry.
|
|
44
|
+
* @param {{
|
|
45
|
+
* shouldFallback?: (err: Error) => boolean,
|
|
46
|
+
* onFallback?: (info: { from: string, to: string, err: Error }) => void,
|
|
47
|
+
* }} fallbackOpts
|
|
48
|
+
*/
|
|
49
|
+
export function withFallback(chain, fallbackOpts = {}) {
|
|
50
|
+
if (!Array.isArray(chain) || chain.length === 0) {
|
|
51
|
+
throw new Error('withFallback: chain must contain at least one provider');
|
|
52
|
+
}
|
|
53
|
+
const shouldFallback = fallbackOpts.shouldFallback || defaultShouldFallback;
|
|
54
|
+
const onFallback = fallbackOpts.onFallback;
|
|
55
|
+
|
|
56
|
+
return {
|
|
57
|
+
name: `fallback(${chain.map(p => p.name).join('→')})`,
|
|
58
|
+
async *sendMessage(messages, opts = {}) {
|
|
59
|
+
let lastErr = null;
|
|
60
|
+
for (let i = 0; i < chain.length; i++) {
|
|
61
|
+
const prov = chain[i];
|
|
62
|
+
let yieldedAny = false;
|
|
63
|
+
try {
|
|
64
|
+
for await (const chunk of prov.sendMessage(messages, opts)) {
|
|
65
|
+
yieldedAny = true;
|
|
66
|
+
yield chunk;
|
|
67
|
+
}
|
|
68
|
+
return;
|
|
69
|
+
} catch (err) {
|
|
70
|
+
lastErr = err;
|
|
71
|
+
// Cannot fall back once we've started yielding — would duplicate text.
|
|
72
|
+
if (yieldedAny) throw err;
|
|
73
|
+
// Last provider in the chain — re-throw, no more fallbacks.
|
|
74
|
+
if (i === chain.length - 1) throw err;
|
|
75
|
+
// Caller-controlled predicate decides what's worth falling back on.
|
|
76
|
+
if (!shouldFallback(err)) throw err;
|
|
77
|
+
if (typeof onFallback === 'function') {
|
|
78
|
+
try { onFallback({ from: prov.name, to: chain[i + 1].name, err }); }
|
|
79
|
+
catch { /* swallow */ }
|
|
80
|
+
}
|
|
81
|
+
}
|
|
82
|
+
}
|
|
83
|
+
// Loop exits only when all providers have been tried; lastErr is set
|
|
84
|
+
// (we wouldn't have entered the catch otherwise).
|
|
85
|
+
throw lastErr;
|
|
86
|
+
},
|
|
87
|
+
};
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
export { defaultShouldFallback };
|