lazyclaw 3.88.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +186 -0
- package/cli.mjs +2648 -0
- package/config-validate.mjs +61 -0
- package/daemon.mjs +1451 -0
- package/logger.mjs +55 -0
- package/package.json +55 -0
- package/providers/anthropic.mjs +313 -0
- package/providers/cache.mjs +132 -0
- package/providers/fallback.mjs +90 -0
- package/providers/gemini.mjs +187 -0
- package/providers/ollama.mjs +148 -0
- package/providers/openai.mjs +243 -0
- package/providers/rates.mjs +85 -0
- package/providers/registry.mjs +144 -0
- package/providers/retry.mjs +103 -0
- package/ratelimit.mjs +65 -0
- package/rates-validate.mjs +58 -0
- package/sessions.mjs +177 -0
- package/skills.mjs +97 -0
- package/web/server.mjs +33 -0
- package/workflow/executor.mjs +358 -0
- package/workflow/persistent.mjs +369 -0
- package/workflow/summary.mjs +318 -0
|
@@ -0,0 +1,187 @@
|
|
|
1
|
+
// Google Gemini (Generative Language API) streaming provider.
|
|
2
|
+
//
|
|
3
|
+
// Wire format:
|
|
4
|
+
// POST https://generativelanguage.googleapis.com/v1/models/{model}:streamGenerateContent?alt=sse&key={apiKey}
|
|
5
|
+
// { "contents": [...], "systemInstruction"?: {...} }
|
|
6
|
+
//
|
|
7
|
+
// Auth quirk: Gemini takes `?key=...` in the query string rather than a
|
|
8
|
+
// header. We still accept opts.apiKey via the same shape as the other
|
|
9
|
+
// providers and append it to the URL ourselves so callers don't have to
|
|
10
|
+
// remember the difference.
|
|
11
|
+
//
|
|
12
|
+
// Streaming format with `alt=sse`: standard SSE — `data: <json>\n\n` per
|
|
13
|
+
// chunk. Each JSON payload contains
|
|
14
|
+
// `candidates[0].content.parts[0].text` for text deltas. There's no
|
|
15
|
+
// terminator like `[DONE]`; the stream simply ends.
|
|
16
|
+
//
|
|
17
|
+
// Test seam: opts.fetch overrides globalThis.fetch.
|
|
18
|
+
|
|
19
|
+
const DEFAULT_BASE = 'https://generativelanguage.googleapis.com/v1';
|
|
20
|
+
|
|
21
|
+
class InvalidApiKeyError extends Error {
|
|
22
|
+
constructor(message = 'invalid api key') {
|
|
23
|
+
super(message);
|
|
24
|
+
this.name = 'InvalidApiKeyError';
|
|
25
|
+
this.code = 'INVALID_KEY';
|
|
26
|
+
}
|
|
27
|
+
}
|
|
28
|
+
class AbortError extends Error {
|
|
29
|
+
constructor(message = 'aborted') {
|
|
30
|
+
super(message);
|
|
31
|
+
this.name = 'AbortError';
|
|
32
|
+
this.code = 'ABORT';
|
|
33
|
+
}
|
|
34
|
+
}
|
|
35
|
+
class RateLimitError extends Error {
|
|
36
|
+
constructor(retryAfterMs, body = '') {
|
|
37
|
+
super(`gemini api 429: rate limited (retry-after ${retryAfterMs}ms)`);
|
|
38
|
+
this.name = 'RateLimitError';
|
|
39
|
+
this.code = 'RATE_LIMIT';
|
|
40
|
+
this.status = 429;
|
|
41
|
+
this.retryAfterMs = retryAfterMs;
|
|
42
|
+
this.body = body;
|
|
43
|
+
}
|
|
44
|
+
}
|
|
45
|
+
class ApiError extends Error {
|
|
46
|
+
constructor(status, body) {
|
|
47
|
+
super(`gemini api ${status}: ${String(body).slice(0, 200)}`);
|
|
48
|
+
this.name = 'GeminiApiError';
|
|
49
|
+
this.status = status;
|
|
50
|
+
this.body = body;
|
|
51
|
+
}
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
function parseRetryAfterMs(headers) {
|
|
55
|
+
let raw = null;
|
|
56
|
+
if (headers && typeof headers.get === 'function') raw = headers.get('retry-after') || headers.get('Retry-After');
|
|
57
|
+
else if (headers) raw = headers['retry-after'] || headers['Retry-After'];
|
|
58
|
+
if (!raw) return 1000;
|
|
59
|
+
const asInt = parseInt(String(raw), 10);
|
|
60
|
+
if (!Number.isNaN(asInt)) return Math.max(0, asInt * 1000);
|
|
61
|
+
const date = Date.parse(String(raw));
|
|
62
|
+
if (!Number.isNaN(date)) return Math.max(0, date - Date.now());
|
|
63
|
+
return 1000;
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
async function* iterateBody(body) {
|
|
67
|
+
if (body && typeof body.getReader === 'function') {
|
|
68
|
+
const reader = body.getReader();
|
|
69
|
+
while (true) {
|
|
70
|
+
const { value, done } = await reader.read();
|
|
71
|
+
if (done) break;
|
|
72
|
+
if (value) yield value;
|
|
73
|
+
}
|
|
74
|
+
return;
|
|
75
|
+
}
|
|
76
|
+
if (body && typeof body[Symbol.asyncIterator] === 'function') {
|
|
77
|
+
for await (const chunk of body) yield chunk;
|
|
78
|
+
return;
|
|
79
|
+
}
|
|
80
|
+
if (typeof body === 'string') { yield new TextEncoder().encode(body); return; }
|
|
81
|
+
if (body instanceof Uint8Array) { yield body; return; }
|
|
82
|
+
throw new Error('gemini: response body is not iterable');
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
function* parseSseFrames(buffer) {
|
|
86
|
+
let cursor = 0;
|
|
87
|
+
while (true) {
|
|
88
|
+
const sep = buffer.indexOf('\n\n', cursor);
|
|
89
|
+
if (sep < 0) break;
|
|
90
|
+
const frame = buffer.slice(cursor, sep);
|
|
91
|
+
cursor = sep + 2;
|
|
92
|
+
const dataLines = [];
|
|
93
|
+
for (const line of frame.split('\n')) {
|
|
94
|
+
if (line.startsWith('data:')) dataLines.push(line.slice(5).trim());
|
|
95
|
+
}
|
|
96
|
+
yield { data: dataLines.join('\n'), nextCursor: cursor };
|
|
97
|
+
}
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
// Translate the canonical {role:user|assistant|system, content:string}
|
|
101
|
+
// message shape to Gemini's contents+systemInstruction shape.
|
|
102
|
+
// - assistant → "model" (Gemini's name for it)
|
|
103
|
+
// - system → bubbled up to systemInstruction (most recent wins on conflict)
|
|
104
|
+
function toGeminiBody(messages, opts) {
|
|
105
|
+
const contents = [];
|
|
106
|
+
let systemText = opts.system || null;
|
|
107
|
+
for (const m of messages) {
|
|
108
|
+
if (m.role === 'system') {
|
|
109
|
+
systemText = String(m.content ?? '');
|
|
110
|
+
continue;
|
|
111
|
+
}
|
|
112
|
+
if (m.role !== 'user' && m.role !== 'assistant') continue;
|
|
113
|
+
contents.push({
|
|
114
|
+
role: m.role === 'assistant' ? 'model' : 'user',
|
|
115
|
+
parts: [{ text: String(m.content ?? '') }],
|
|
116
|
+
});
|
|
117
|
+
}
|
|
118
|
+
const body = { contents };
|
|
119
|
+
if (systemText) body.systemInstruction = { parts: [{ text: systemText }] };
|
|
120
|
+
return body;
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
export const geminiProvider = {
|
|
124
|
+
name: 'gemini',
|
|
125
|
+
/**
|
|
126
|
+
* @param {Array<{role:string,content:string}>} messages
|
|
127
|
+
* @param {{ apiKey?:string, model?:string, baseUrl?:string, fetch?:typeof fetch, signal?:AbortSignal, system?:string }} opts
|
|
128
|
+
*/
|
|
129
|
+
async *sendMessage(messages, opts = {}) {
|
|
130
|
+
if (!opts.apiKey) throw new InvalidApiKeyError('missing api key');
|
|
131
|
+
const fetchFn = opts.fetch || globalThis.fetch;
|
|
132
|
+
if (!fetchFn) throw new Error('gemini: no fetch implementation available');
|
|
133
|
+
const baseUrl = (opts.baseUrl || DEFAULT_BASE).replace(/\/$/, '');
|
|
134
|
+
const model = opts.model || 'gemini-1.5-pro';
|
|
135
|
+
|
|
136
|
+
if (opts.signal?.aborted) throw new AbortError('aborted before request');
|
|
137
|
+
|
|
138
|
+
const url = `${baseUrl}/models/${encodeURIComponent(model)}:streamGenerateContent?alt=sse&key=${encodeURIComponent(opts.apiKey)}`;
|
|
139
|
+
const res = await fetchFn(url, {
|
|
140
|
+
method: 'POST',
|
|
141
|
+
headers: { 'content-type': 'application/json' },
|
|
142
|
+
body: JSON.stringify(toGeminiBody(messages, opts)),
|
|
143
|
+
signal: opts.signal,
|
|
144
|
+
});
|
|
145
|
+
|
|
146
|
+
if (!res.ok) {
|
|
147
|
+
const text = typeof res.text === 'function' ? await res.text() : '';
|
|
148
|
+
if (res.status === 401 || res.status === 403) throw new InvalidApiKeyError(text || 'unauthorized');
|
|
149
|
+
if (res.status === 429) throw new RateLimitError(parseRetryAfterMs(res.headers), text || '');
|
|
150
|
+
throw new ApiError(res.status, text || '');
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
const decoder = new TextDecoder('utf-8', { fatal: false });
|
|
154
|
+
let buffer = '';
|
|
155
|
+
for await (const chunk of iterateBody(res.body)) {
|
|
156
|
+
if (opts.signal?.aborted) throw new AbortError('aborted mid-stream');
|
|
157
|
+
buffer += typeof chunk === 'string' ? chunk : decoder.decode(chunk, { stream: true });
|
|
158
|
+
let consumed = 0;
|
|
159
|
+
for (const frame of parseSseFrames(buffer)) {
|
|
160
|
+
consumed = frame.nextCursor;
|
|
161
|
+
if (!frame.data) continue;
|
|
162
|
+
try {
|
|
163
|
+
const obj = JSON.parse(frame.data);
|
|
164
|
+
// Gemini may stream multiple parts; concatenate any text fields
|
|
165
|
+
// we recognize. The path is candidates[0].content.parts[*].text.
|
|
166
|
+
const parts = obj?.candidates?.[0]?.content?.parts;
|
|
167
|
+
if (Array.isArray(parts)) {
|
|
168
|
+
for (const p of parts) {
|
|
169
|
+
if (typeof p?.text === 'string' && p.text) yield p.text;
|
|
170
|
+
}
|
|
171
|
+
}
|
|
172
|
+
// Some error responses surface mid-stream as {error: {...}}.
|
|
173
|
+
if (obj?.error) {
|
|
174
|
+
const message = obj.error.message || JSON.stringify(obj.error);
|
|
175
|
+
throw new ApiError(obj.error.code || 500, message);
|
|
176
|
+
}
|
|
177
|
+
} catch (err) {
|
|
178
|
+
if (err instanceof ApiError) throw err;
|
|
179
|
+
// malformed frame — skip and keep streaming
|
|
180
|
+
}
|
|
181
|
+
}
|
|
182
|
+
if (consumed > 0) buffer = buffer.slice(consumed);
|
|
183
|
+
}
|
|
184
|
+
},
|
|
185
|
+
};
|
|
186
|
+
|
|
187
|
+
export { InvalidApiKeyError, ApiError, AbortError, RateLimitError };
|
|
@@ -0,0 +1,148 @@
|
|
|
1
|
+
// Ollama local-model streaming provider.
|
|
2
|
+
//
|
|
3
|
+
// Endpoint: POST {baseUrl}/api/chat (default baseUrl http://127.0.0.1:11434)
|
|
4
|
+
// Body: { model, messages: [{role, content}], stream: true }
|
|
5
|
+
// Response: newline-delimited JSON, one object per chunk:
|
|
6
|
+
// {"message":{"role":"assistant","content":"hi"},"done":false}
|
|
7
|
+
// {"done":true,"prompt_eval_count":N,"eval_count":N,...}
|
|
8
|
+
//
|
|
9
|
+
// Differences from anthropic/openai:
|
|
10
|
+
// - No auth; opts.apiKey is ignored. We still accept it so the registry
|
|
11
|
+
// `providers list` shape stays uniform.
|
|
12
|
+
// - opts.baseUrl overrides the default endpoint (env OLLAMA_HOST also
|
|
13
|
+
// respected so a single env var aligns this with the dashboard).
|
|
14
|
+
// - opts.fetch test seam mirrors the other providers.
|
|
15
|
+
//
|
|
16
|
+
// Tools / thinking are not part of Ollama's chat API; we silently drop
|
|
17
|
+
// those opts rather than 400-ing so callers can swap providers without
|
|
18
|
+
// changing surrounding code.
|
|
19
|
+
|
|
20
|
+
const DEFAULT_BASE = 'http://127.0.0.1:11434';
|
|
21
|
+
|
|
22
|
+
class AbortError extends Error {
|
|
23
|
+
constructor(message = 'aborted') {
|
|
24
|
+
super(message);
|
|
25
|
+
this.name = 'AbortError';
|
|
26
|
+
this.code = 'ABORT';
|
|
27
|
+
}
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
class ApiError extends Error {
|
|
31
|
+
constructor(status, body) {
|
|
32
|
+
super(`ollama api ${status}: ${String(body).slice(0, 200)}`);
|
|
33
|
+
this.name = 'OllamaApiError';
|
|
34
|
+
this.status = status;
|
|
35
|
+
this.body = body;
|
|
36
|
+
}
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
class ConnectionError extends Error {
|
|
40
|
+
constructor(baseUrl, cause) {
|
|
41
|
+
super(`ollama: cannot reach ${baseUrl} (is the daemon running?)`);
|
|
42
|
+
this.name = 'OllamaConnectionError';
|
|
43
|
+
this.code = 'CONNECTION_REFUSED';
|
|
44
|
+
this.cause = cause;
|
|
45
|
+
}
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
async function* iterateBody(body) {
|
|
49
|
+
if (body && typeof body.getReader === 'function') {
|
|
50
|
+
const reader = body.getReader();
|
|
51
|
+
while (true) {
|
|
52
|
+
const { value, done } = await reader.read();
|
|
53
|
+
if (done) break;
|
|
54
|
+
if (value) yield value;
|
|
55
|
+
}
|
|
56
|
+
return;
|
|
57
|
+
}
|
|
58
|
+
if (body && typeof body[Symbol.asyncIterator] === 'function') {
|
|
59
|
+
for await (const chunk of body) yield chunk;
|
|
60
|
+
return;
|
|
61
|
+
}
|
|
62
|
+
if (typeof body === 'string') { yield new TextEncoder().encode(body); return; }
|
|
63
|
+
if (body instanceof Uint8Array) { yield body; return; }
|
|
64
|
+
throw new Error('ollama: response body is not iterable');
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
export const ollamaProvider = {
|
|
68
|
+
name: 'ollama',
|
|
69
|
+
/**
|
|
70
|
+
* @param {Array<{role:string,content:string}>} messages
|
|
71
|
+
* @param {{ model?:string, baseUrl?:string, fetch?:typeof fetch, signal?:AbortSignal, system?:string }} opts
|
|
72
|
+
*/
|
|
73
|
+
async *sendMessage(messages, opts = {}) {
|
|
74
|
+
const fetchFn = opts.fetch || globalThis.fetch;
|
|
75
|
+
if (!fetchFn) throw new Error('ollama: no fetch implementation available');
|
|
76
|
+
const baseUrl = (opts.baseUrl || process.env.OLLAMA_HOST || DEFAULT_BASE).replace(/\/$/, '');
|
|
77
|
+
const model = opts.model || 'llama3.1';
|
|
78
|
+
|
|
79
|
+
const apiMessages = [];
|
|
80
|
+
const sys = opts.system || messages.find(m => m.role === 'system')?.content;
|
|
81
|
+
if (sys) apiMessages.push({ role: 'system', content: String(sys) });
|
|
82
|
+
for (const m of messages) {
|
|
83
|
+
if (m.role === 'user' || m.role === 'assistant') {
|
|
84
|
+
apiMessages.push({ role: m.role, content: String(m.content ?? '') });
|
|
85
|
+
}
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
if (opts.signal?.aborted) throw new AbortError('aborted before request');
|
|
89
|
+
|
|
90
|
+
let res;
|
|
91
|
+
try {
|
|
92
|
+
res = await fetchFn(`${baseUrl}/api/chat`, {
|
|
93
|
+
method: 'POST',
|
|
94
|
+
headers: { 'content-type': 'application/json' },
|
|
95
|
+
body: JSON.stringify({ model, messages: apiMessages, stream: true }),
|
|
96
|
+
signal: opts.signal,
|
|
97
|
+
});
|
|
98
|
+
} catch (err) {
|
|
99
|
+
// Distinguish "ollama isn't running" from generic network errors so
|
|
100
|
+
// callers can show a useful prompt ("brew services start ollama")
|
|
101
|
+
// instead of a confusing fetch trace.
|
|
102
|
+
if (err && (err.cause?.code === 'ECONNREFUSED' || err.code === 'ECONNREFUSED' || /ECONNREFUSED|fetch failed/i.test(String(err.message)))) {
|
|
103
|
+
throw new ConnectionError(baseUrl, err);
|
|
104
|
+
}
|
|
105
|
+
throw err;
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
if (!res.ok) {
|
|
109
|
+
const text = typeof res.text === 'function' ? await res.text() : '';
|
|
110
|
+
throw new ApiError(res.status, text || '');
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
const decoder = new TextDecoder('utf-8', { fatal: false });
|
|
114
|
+
let buffer = '';
|
|
115
|
+
for await (const chunk of iterateBody(res.body)) {
|
|
116
|
+
if (opts.signal?.aborted) throw new AbortError('aborted mid-stream');
|
|
117
|
+
buffer += typeof chunk === 'string' ? chunk : decoder.decode(chunk, { stream: true });
|
|
118
|
+
// Ollama is newline-delimited JSON, not SSE — split on '\n', parse
|
|
119
|
+
// each non-empty line, leave trailing partial behind for the next read.
|
|
120
|
+
let nl;
|
|
121
|
+
while ((nl = buffer.indexOf('\n')) >= 0) {
|
|
122
|
+
const line = buffer.slice(0, nl);
|
|
123
|
+
buffer = buffer.slice(nl + 1);
|
|
124
|
+
if (!line.trim()) continue;
|
|
125
|
+
try {
|
|
126
|
+
const obj = JSON.parse(line);
|
|
127
|
+
if (obj?.message?.content) yield obj.message.content;
|
|
128
|
+
if (obj?.done) return;
|
|
129
|
+
if (obj?.error) throw new ApiError(500, obj.error);
|
|
130
|
+
} catch (err) {
|
|
131
|
+
if (err instanceof ApiError) throw err;
|
|
132
|
+
// malformed line — skip and keep streaming
|
|
133
|
+
}
|
|
134
|
+
}
|
|
135
|
+
}
|
|
136
|
+
// Any trailing bytes the decoder still holds onto get flushed and
|
|
137
|
+
// parsed once. Empty after a clean stream.
|
|
138
|
+
const tail = decoder.decode();
|
|
139
|
+
if (tail.trim()) {
|
|
140
|
+
try {
|
|
141
|
+
const obj = JSON.parse(tail);
|
|
142
|
+
if (obj?.message?.content) yield obj.message.content;
|
|
143
|
+
} catch { /* malformed tail — drop */ }
|
|
144
|
+
}
|
|
145
|
+
},
|
|
146
|
+
};
|
|
147
|
+
|
|
148
|
+
export { ApiError, AbortError, ConnectionError };
|
|
@@ -0,0 +1,243 @@
|
|
|
1
|
+
// OpenAI Chat Completions API streaming provider.
|
|
2
|
+
//
|
|
3
|
+
// Format reference (matches OpenClaw and the OpenAI SDK shape):
|
|
4
|
+
// POST https://api.openai.com/v1/chat/completions
|
|
5
|
+
// Authorization: Bearer <key>
|
|
6
|
+
// {"model": "...", "stream": true, "messages": [...]}
|
|
7
|
+
//
|
|
8
|
+
// SSE body: each frame is `data: <json>\n\n`. Terminator is the literal
|
|
9
|
+
// `data: [DONE]\n\n`. Token text lives at `choices[0].delta.content`.
|
|
10
|
+
//
|
|
11
|
+
// Test seam mirrors the Anthropic provider: opts.fetch overrides
|
|
12
|
+
// globalThis.fetch, opts.maxTokens caps `max_tokens`, opts.system seeds
|
|
13
|
+
// a system message.
|
|
14
|
+
|
|
15
|
+
const DEFAULT_MAX_TOKENS = 4096;
|
|
16
|
+
|
|
17
|
+
class InvalidApiKeyError extends Error {
|
|
18
|
+
constructor(message = 'invalid OpenAI api key') {
|
|
19
|
+
super(message);
|
|
20
|
+
this.name = 'InvalidApiKeyError';
|
|
21
|
+
this.code = 'INVALID_KEY';
|
|
22
|
+
}
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
class AbortError extends Error {
|
|
26
|
+
constructor(message = 'aborted') {
|
|
27
|
+
super(message);
|
|
28
|
+
this.name = 'AbortError';
|
|
29
|
+
this.code = 'ABORT';
|
|
30
|
+
}
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
class RateLimitError extends Error {
|
|
34
|
+
constructor(retryAfterMs, body = '') {
|
|
35
|
+
super(`openai api 429: rate limited (retry-after ${retryAfterMs}ms)`);
|
|
36
|
+
this.name = 'RateLimitError';
|
|
37
|
+
this.code = 'RATE_LIMIT';
|
|
38
|
+
this.status = 429;
|
|
39
|
+
this.retryAfterMs = retryAfterMs;
|
|
40
|
+
this.body = body;
|
|
41
|
+
}
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
class ApiError extends Error {
|
|
45
|
+
constructor(status, body) {
|
|
46
|
+
super(`openai api ${status}: ${String(body).slice(0, 200)}`);
|
|
47
|
+
this.name = 'OpenAiApiError';
|
|
48
|
+
this.status = status;
|
|
49
|
+
this.body = body;
|
|
50
|
+
}
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
function parseRetryAfterMs(headers) {
|
|
54
|
+
let raw = null;
|
|
55
|
+
if (headers && typeof headers.get === 'function') raw = headers.get('retry-after') || headers.get('Retry-After');
|
|
56
|
+
else if (headers) raw = headers['retry-after'] || headers['Retry-After'];
|
|
57
|
+
if (!raw) return 1000;
|
|
58
|
+
const asInt = parseInt(String(raw), 10);
|
|
59
|
+
if (!Number.isNaN(asInt)) return Math.max(0, asInt * 1000);
|
|
60
|
+
const date = Date.parse(String(raw));
|
|
61
|
+
if (!Number.isNaN(date)) return Math.max(0, date - Date.now());
|
|
62
|
+
return 1000;
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
async function* iterateBody(body) {
|
|
66
|
+
if (body && typeof body.getReader === 'function') {
|
|
67
|
+
const reader = body.getReader();
|
|
68
|
+
while (true) {
|
|
69
|
+
const { value, done } = await reader.read();
|
|
70
|
+
if (done) break;
|
|
71
|
+
if (value) yield value;
|
|
72
|
+
}
|
|
73
|
+
return;
|
|
74
|
+
}
|
|
75
|
+
if (body && typeof body[Symbol.asyncIterator] === 'function') {
|
|
76
|
+
for await (const chunk of body) yield chunk;
|
|
77
|
+
return;
|
|
78
|
+
}
|
|
79
|
+
if (typeof body === 'string') { yield new TextEncoder().encode(body); return; }
|
|
80
|
+
if (body instanceof Uint8Array) { yield body; return; }
|
|
81
|
+
throw new Error('openai: response body is not iterable');
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
function* parseSseFrames(buffer) {
|
|
85
|
+
let cursor = 0;
|
|
86
|
+
while (true) {
|
|
87
|
+
const sep = buffer.indexOf('\n\n', cursor);
|
|
88
|
+
if (sep < 0) break;
|
|
89
|
+
const frame = buffer.slice(cursor, sep);
|
|
90
|
+
cursor = sep + 2;
|
|
91
|
+
const dataLines = [];
|
|
92
|
+
for (const line of frame.split('\n')) {
|
|
93
|
+
if (line.startsWith('data:')) dataLines.push(line.slice(5).trim());
|
|
94
|
+
}
|
|
95
|
+
if (dataLines.length > 0) yield { data: dataLines.join('\n'), nextCursor: cursor };
|
|
96
|
+
else yield { data: '', nextCursor: cursor };
|
|
97
|
+
}
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
export const openaiProvider = {
|
|
101
|
+
name: 'openai',
|
|
102
|
+
/**
|
|
103
|
+
* @param {Array<{role:string,content:string}>} messages
|
|
104
|
+
* @param {{apiKey?:string, model?:string, fetch?:typeof fetch, maxTokens?:number, system?:string}} opts
|
|
105
|
+
*/
|
|
106
|
+
async *sendMessage(messages, opts = {}) {
|
|
107
|
+
if (!opts.apiKey) throw new InvalidApiKeyError('missing api key');
|
|
108
|
+
const fetchFn = opts.fetch || globalThis.fetch;
|
|
109
|
+
if (!fetchFn) throw new Error('openai: no fetch implementation available');
|
|
110
|
+
|
|
111
|
+
const model = opts.model || 'gpt-4.1';
|
|
112
|
+
const apiMessages = [];
|
|
113
|
+
const sys = opts.system || messages.find(m => m.role === 'system')?.content;
|
|
114
|
+
if (sys) apiMessages.push({ role: 'system', content: String(sys) });
|
|
115
|
+
for (const m of messages) {
|
|
116
|
+
if (m.role === 'user' || m.role === 'assistant') {
|
|
117
|
+
apiMessages.push({ role: m.role, content: String(m.content ?? '') });
|
|
118
|
+
}
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
const body = {
|
|
122
|
+
model,
|
|
123
|
+
max_tokens: opts.maxTokens || DEFAULT_MAX_TOKENS,
|
|
124
|
+
stream: true,
|
|
125
|
+
messages: apiMessages,
|
|
126
|
+
};
|
|
127
|
+
// Tool-use passthrough mirrors the anthropic provider: opts.tools is an
|
|
128
|
+
// array of OpenAI-shaped tools. opts.toolChoice maps to tool_choice.
|
|
129
|
+
if (Array.isArray(opts.tools) && opts.tools.length > 0) {
|
|
130
|
+
body.tools = opts.tools;
|
|
131
|
+
if (opts.toolChoice) body.tool_choice = opts.toolChoice;
|
|
132
|
+
}
|
|
133
|
+
// Usage capture is opt-in via stream_options. We only request it when
|
|
134
|
+
// the caller provided an onUsage callback — otherwise we'd be paying
|
|
135
|
+
// for an extra response field we'd just throw away. The shape comes
|
|
136
|
+
// back as a top-level `usage` field on a final chunk that has empty
|
|
137
|
+
// choices, right before `[DONE]`.
|
|
138
|
+
if (typeof opts.onUsage === 'function') {
|
|
139
|
+
body.stream_options = { include_usage: true };
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
if (opts.signal?.aborted) throw new AbortError('aborted before request');
|
|
143
|
+
const res = await fetchFn('https://api.openai.com/v1/chat/completions', {
|
|
144
|
+
method: 'POST',
|
|
145
|
+
headers: {
|
|
146
|
+
'content-type': 'application/json',
|
|
147
|
+
'authorization': `Bearer ${opts.apiKey}`,
|
|
148
|
+
},
|
|
149
|
+
body: JSON.stringify(body),
|
|
150
|
+
signal: opts.signal,
|
|
151
|
+
});
|
|
152
|
+
|
|
153
|
+
if (!res.ok) {
|
|
154
|
+
const text = typeof res.text === 'function' ? await res.text() : '';
|
|
155
|
+
if (res.status === 401 || res.status === 403) throw new InvalidApiKeyError(text || 'unauthorized');
|
|
156
|
+
if (res.status === 429) throw new RateLimitError(parseRetryAfterMs(res.headers), text || '');
|
|
157
|
+
throw new ApiError(res.status, text || '');
|
|
158
|
+
}
|
|
159
|
+
|
|
160
|
+
const decoder = new TextDecoder('utf-8', { fatal: false });
|
|
161
|
+
let buffer = '';
|
|
162
|
+
// Usage accumulator. With stream_options.include_usage, the final
|
|
163
|
+
// pre-[DONE] chunk carries `usage` at top level: {prompt_tokens,
|
|
164
|
+
// completion_tokens, total_tokens}. We collect into a normalized
|
|
165
|
+
// shape that mirrors the anthropic provider's onUsage payload so
|
|
166
|
+
// callers don't have to special-case per provider.
|
|
167
|
+
let usage = null;
|
|
168
|
+
// OpenAI streams tool_calls as deltas with an `index` we use as the
|
|
169
|
+
// accumulation key. Each delta may carry a partial id, name, and/or
|
|
170
|
+
// arguments string. We assemble until the stream signals
|
|
171
|
+
// finish_reason: tool_calls (the final tool_call delta in that choice).
|
|
172
|
+
const toolCallsByIndex = new Map();
|
|
173
|
+
const flushToolCall = (idx) => {
|
|
174
|
+
const tc = toolCallsByIndex.get(idx);
|
|
175
|
+
if (!tc || !tc.function?.name) return;
|
|
176
|
+
toolCallsByIndex.delete(idx);
|
|
177
|
+
if (typeof opts.onToolUse !== 'function') return;
|
|
178
|
+
let input = {};
|
|
179
|
+
try { input = tc.function.arguments ? JSON.parse(tc.function.arguments) : {}; }
|
|
180
|
+
catch { /* malformed → empty + raw */ }
|
|
181
|
+
try {
|
|
182
|
+
opts.onToolUse({
|
|
183
|
+
id: tc.id || null,
|
|
184
|
+
name: tc.function.name,
|
|
185
|
+
input,
|
|
186
|
+
raw: tc.function.arguments || '',
|
|
187
|
+
});
|
|
188
|
+
} catch { /* never let a callback abort the stream */ }
|
|
189
|
+
};
|
|
190
|
+
for await (const chunk of iterateBody(res.body)) {
|
|
191
|
+
if (opts.signal?.aborted) throw new AbortError('aborted mid-stream');
|
|
192
|
+
buffer += typeof chunk === 'string' ? chunk : decoder.decode(chunk, { stream: true });
|
|
193
|
+
let consumed = 0;
|
|
194
|
+
for (const frame of parseSseFrames(buffer)) {
|
|
195
|
+
consumed = frame.nextCursor;
|
|
196
|
+
if (!frame.data) continue;
|
|
197
|
+
if (frame.data === '[DONE]') {
|
|
198
|
+
// Drain any tool calls that haven't been flushed by finish_reason.
|
|
199
|
+
for (const idx of Array.from(toolCallsByIndex.keys())) flushToolCall(idx);
|
|
200
|
+
if (usage && typeof opts.onUsage === 'function') {
|
|
201
|
+
try { opts.onUsage(usage); } catch { /* never let a callback abort */ }
|
|
202
|
+
}
|
|
203
|
+
return;
|
|
204
|
+
}
|
|
205
|
+
try {
|
|
206
|
+
const obj = JSON.parse(frame.data);
|
|
207
|
+
// Usage frame: top-level `usage` (no choices content). Capture
|
|
208
|
+
// and continue — the final stream terminator [DONE] still emits.
|
|
209
|
+
if (obj?.usage && typeof obj.usage === 'object') {
|
|
210
|
+
usage = {
|
|
211
|
+
inputTokens: obj.usage.prompt_tokens ?? null,
|
|
212
|
+
outputTokens: obj.usage.completion_tokens ?? null,
|
|
213
|
+
totalTokens: obj.usage.total_tokens ?? null,
|
|
214
|
+
};
|
|
215
|
+
}
|
|
216
|
+
const choice = obj?.choices?.[0];
|
|
217
|
+
const delta = choice?.delta || {};
|
|
218
|
+
if (delta.content) yield delta.content;
|
|
219
|
+
if (Array.isArray(delta.tool_calls)) {
|
|
220
|
+
for (const td of delta.tool_calls) {
|
|
221
|
+
const idx = td.index ?? 0;
|
|
222
|
+
const cur = toolCallsByIndex.get(idx) || { id: null, function: { name: '', arguments: '' } };
|
|
223
|
+
if (td.id) cur.id = td.id;
|
|
224
|
+
if (td.function?.name) cur.function.name = td.function.name;
|
|
225
|
+
if (typeof td.function?.arguments === 'string') cur.function.arguments += td.function.arguments;
|
|
226
|
+
toolCallsByIndex.set(idx, cur);
|
|
227
|
+
}
|
|
228
|
+
}
|
|
229
|
+
if (choice?.finish_reason === 'tool_calls') {
|
|
230
|
+
for (const idx of Array.from(toolCallsByIndex.keys())) flushToolCall(idx);
|
|
231
|
+
}
|
|
232
|
+
} catch {
|
|
233
|
+
// Ignore malformed frames; keep scanning the rest of the buffer.
|
|
234
|
+
}
|
|
235
|
+
}
|
|
236
|
+
if (consumed > 0) buffer = buffer.slice(consumed);
|
|
237
|
+
}
|
|
238
|
+
const tail = decoder.decode();
|
|
239
|
+
if (tail) buffer += tail;
|
|
240
|
+
},
|
|
241
|
+
};
|
|
242
|
+
|
|
243
|
+
export { InvalidApiKeyError, ApiError, AbortError, RateLimitError };
|
|
@@ -0,0 +1,85 @@
|
|
|
1
|
+
// Token → currency conversion helper.
|
|
2
|
+
//
|
|
3
|
+
// Why no shipped rate card:
|
|
4
|
+
// - Provider prices change. Hardcoding them sets up the library to
|
|
5
|
+
// silently lie about cost the moment a price moves.
|
|
6
|
+
// - Different teams negotiate different deals (volume contracts,
|
|
7
|
+
// regional pricing, provider-managed proxies). A single global
|
|
8
|
+
// default would be wrong for most users.
|
|
9
|
+
//
|
|
10
|
+
// Shape:
|
|
11
|
+
// const rates = {
|
|
12
|
+
// 'anthropic/claude-opus-4-7': {
|
|
13
|
+
// inputPer1M: 15.00,
|
|
14
|
+
// outputPer1M: 75.00,
|
|
15
|
+
// cacheReadPer1M: 1.50,
|
|
16
|
+
// cacheCreatePer1M: 18.75,
|
|
17
|
+
// currency: 'USD',
|
|
18
|
+
// },
|
|
19
|
+
// 'openai/gpt-4.1': { inputPer1M: 2.00, outputPer1M: 8.00, currency: 'USD' },
|
|
20
|
+
// };
|
|
21
|
+
//
|
|
22
|
+
// `costFromUsage({provider, model, usage}, rates)` returns
|
|
23
|
+
// { cost, currency, breakdown }
|
|
24
|
+
// where breakdown shows the per-bucket charge so callers can audit.
|
|
25
|
+
//
|
|
26
|
+
// Rates are per *million* tokens because that's how every provider
|
|
27
|
+
// publishes them — multiplying tokens/1_000_000 keeps the arithmetic
|
|
28
|
+
// readable in tests.
|
|
29
|
+
|
|
30
|
+
/**
|
|
31
|
+
* @param {{ provider: string, model: string, usage: object }} call
|
|
32
|
+
* @param {Record<string, { inputPer1M: number, outputPer1M: number,
|
|
33
|
+
* cacheReadPer1M?: number, cacheCreatePer1M?: number,
|
|
34
|
+
* currency?: string }>} rates
|
|
35
|
+
* @returns {{ cost: number, currency: string, breakdown: object } | null}
|
|
36
|
+
*/
|
|
37
|
+
export function costFromUsage(call, rates) {
|
|
38
|
+
if (!call || !rates) return null;
|
|
39
|
+
const key = `${call.provider}/${call.model}`;
|
|
40
|
+
const r = rates[key];
|
|
41
|
+
if (!r) return null;
|
|
42
|
+
const u = call.usage || {};
|
|
43
|
+
const million = 1_000_000;
|
|
44
|
+
const inputCost = ((Number(u.inputTokens) || 0) / million) * (Number(r.inputPer1M) || 0);
|
|
45
|
+
const outputCost = ((Number(u.outputTokens) || 0) / million) * (Number(r.outputPer1M) || 0);
|
|
46
|
+
// Cache fields only contribute when both rate and usage are present.
|
|
47
|
+
const cacheReadCost = (Number(u.cacheReadInputTokens) > 0 && Number(r.cacheReadPer1M) > 0)
|
|
48
|
+
? (u.cacheReadInputTokens / million) * r.cacheReadPer1M : 0;
|
|
49
|
+
const cacheCreateCost = (Number(u.cacheCreationInputTokens) > 0 && Number(r.cacheCreatePer1M) > 0)
|
|
50
|
+
? (u.cacheCreationInputTokens / million) * r.cacheCreatePer1M : 0;
|
|
51
|
+
return {
|
|
52
|
+
cost: round6(inputCost + outputCost + cacheReadCost + cacheCreateCost),
|
|
53
|
+
currency: r.currency || 'USD',
|
|
54
|
+
breakdown: {
|
|
55
|
+
input: round6(inputCost),
|
|
56
|
+
output: round6(outputCost),
|
|
57
|
+
cacheRead: round6(cacheReadCost),
|
|
58
|
+
cacheCreate: round6(cacheCreateCost),
|
|
59
|
+
},
|
|
60
|
+
};
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
function round6(n) {
|
|
64
|
+
// Six decimals → fractions of a cent at sub-USD prices, while still
|
|
65
|
+
// rounding away IEEE-754 noise from the 1/1_000_000 division.
|
|
66
|
+
return Math.round(n * 1_000_000) / 1_000_000;
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
/**
|
|
70
|
+
* Reference shape so callers can copy-paste a starting point and edit
|
|
71
|
+
* with their own current rates. The numbers here are deliberately
|
|
72
|
+
* placeholders (zeros) — see this module's header for why we don't
|
|
73
|
+
* ship real prices.
|
|
74
|
+
*/
|
|
75
|
+
export const RATE_CARD_SHAPE = {
|
|
76
|
+
'anthropic/claude-opus-4-7': {
|
|
77
|
+
inputPer1M: 0, outputPer1M: 0,
|
|
78
|
+
cacheReadPer1M: 0, cacheCreatePer1M: 0,
|
|
79
|
+
currency: 'USD',
|
|
80
|
+
},
|
|
81
|
+
'openai/gpt-4.1': {
|
|
82
|
+
inputPer1M: 0, outputPer1M: 0,
|
|
83
|
+
currency: 'USD',
|
|
84
|
+
},
|
|
85
|
+
};
|