@loreai/gateway 0.12.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.js +3548 -0
- package/dist/index.js.map +7 -0
- package/package.json +53 -0
- package/src/auth.ts +133 -0
- package/src/batch-queue.ts +555 -0
- package/src/compaction.ts +195 -0
- package/src/config.ts +199 -0
- package/src/idle.ts +246 -0
- package/src/index.ts +41 -0
- package/src/llm-adapter.ts +110 -0
- package/src/pipeline.ts +1604 -0
- package/src/recall.ts +301 -0
- package/src/recorder.ts +192 -0
- package/src/server.ts +250 -0
- package/src/session.ts +207 -0
- package/src/stream/anthropic.ts +708 -0
- package/src/temporal-adapter.ts +307 -0
- package/src/translate/anthropic.ts +425 -0
- package/src/translate/openai.ts +536 -0
- package/src/translate/types.ts +177 -0
- package/src/worker-model.ts +408 -0
|
@@ -0,0 +1,555 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Batch queue for Anthropic Message Batches API.
|
|
3
|
+
*
|
|
4
|
+
* Wraps a synchronous LLMClient and intercepts non-urgent `prompt()` calls,
|
|
5
|
+
* accumulating them in a queue. A flush timer periodically sends the queue
|
|
6
|
+
* to Anthropic's `/v1/messages/batches` endpoint for 50% cost savings.
|
|
7
|
+
* A poll timer checks for results and resolves the pending promises.
|
|
8
|
+
*
|
|
9
|
+
* Urgent calls (compaction, overflow recovery, query expansion) bypass
|
|
10
|
+
* the queue entirely and delegate to the inner synchronous client.
|
|
11
|
+
*
|
|
12
|
+
* Auth credentials are snapshotted per-item at enqueue time and grouped
|
|
13
|
+
* by credential at flush time — this ensures multi-session isolation when
|
|
14
|
+
* multiple clients with different API keys are connected simultaneously.
|
|
15
|
+
*
|
|
16
|
+
* This is a gateway-only enhancement — the OpenCode and Pi adapters
|
|
17
|
+
* always process immediately regardless of the `urgent` flag.
|
|
18
|
+
*/
|
|
19
|
+
|
|
20
|
+
import type { LLMClient } from "@loreai/core";
|
|
21
|
+
import { log } from "@loreai/core";
|
|
22
|
+
import type { AuthCredential } from "./auth";
|
|
23
|
+
import { authHeaders } from "./auth";
|
|
24
|
+
|
|
25
|
+
// ---------------------------------------------------------------------------
|
|
26
|
+
// Types
|
|
27
|
+
// ---------------------------------------------------------------------------
|
|
28
|
+
|
|
29
|
+
/** A single pending request waiting to be batched. */
|
|
30
|
+
interface PendingRequest {
|
|
31
|
+
/** Unique ID for correlating batch results (alphanumeric + hyphens). */
|
|
32
|
+
customId: string;
|
|
33
|
+
/** Standard Messages API params. */
|
|
34
|
+
params: {
|
|
35
|
+
model: string;
|
|
36
|
+
max_tokens: number;
|
|
37
|
+
system:
|
|
38
|
+
| string
|
|
39
|
+
| Array<{ type: string; text: string; cache_control?: { type: string; ttl?: string } }>;
|
|
40
|
+
messages: Array<{ role: string; content: string }>;
|
|
41
|
+
};
|
|
42
|
+
/** Resolve the caller's promise with the text response. */
|
|
43
|
+
resolve: (value: string | null) => void;
|
|
44
|
+
/** Reject the caller's promise on error. */
|
|
45
|
+
reject: (error: Error) => void;
|
|
46
|
+
/** Timestamp when the request was enqueued. */
|
|
47
|
+
enqueuedAt: number;
|
|
48
|
+
/** Auth credential snapshotted at enqueue time for per-session isolation. */
|
|
49
|
+
auth: AuthCredential;
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
/** A batch that has been submitted and is being polled for results. */
|
|
53
|
+
interface InflightBatch {
|
|
54
|
+
/** Anthropic batch ID returned by the create endpoint. */
|
|
55
|
+
batchId: string;
|
|
56
|
+
/** Map from custom_id → pending request (for resolving on completion). */
|
|
57
|
+
requests: Map<string, PendingRequest>;
|
|
58
|
+
/** Timestamp when the batch was submitted. */
|
|
59
|
+
submittedAt: number;
|
|
60
|
+
/** Poll timer handle. */
|
|
61
|
+
pollTimer: ReturnType<typeof setInterval>;
|
|
62
|
+
/** Auth credential for this batch (used for poll/retrieve calls). */
|
|
63
|
+
auth: AuthCredential;
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
export interface BatchQueueConfig {
|
|
67
|
+
/** How often to flush the queue (ms). Default: 30000 (30s). */
|
|
68
|
+
flushIntervalMs?: number;
|
|
69
|
+
/** Max items before auto-flush. Default: 50. */
|
|
70
|
+
maxQueueSize?: number;
|
|
71
|
+
/** How often to poll for batch results (ms). Default: 60000 (60s). */
|
|
72
|
+
pollIntervalMs?: number;
|
|
73
|
+
/** Max age of a batch before giving up and falling back (ms). Default: 3600000 (1h). */
|
|
74
|
+
maxBatchAgeMs?: number;
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
const DEFAULT_FLUSH_INTERVAL_MS = 30_000;
|
|
78
|
+
const DEFAULT_MAX_QUEUE_SIZE = 50;
|
|
79
|
+
const DEFAULT_POLL_INTERVAL_MS = 60_000;
|
|
80
|
+
const DEFAULT_MAX_BATCH_AGE_MS = 3_600_000; // 1 hour
|
|
81
|
+
|
|
82
|
+
// ---------------------------------------------------------------------------
|
|
83
|
+
// ID generation
|
|
84
|
+
// ---------------------------------------------------------------------------
|
|
85
|
+
|
|
86
|
+
let idCounter = 0;
|
|
87
|
+
|
|
88
|
+
/** Generate a batch-API-compatible custom_id (alphanumeric + hyphens, 1-64 chars). */
|
|
89
|
+
function generateCustomId(): string {
|
|
90
|
+
const ts = Date.now().toString(36);
|
|
91
|
+
const seq = (idCounter++).toString(36);
|
|
92
|
+
const rand = Math.random().toString(36).slice(2, 8);
|
|
93
|
+
return `lore-${ts}-${seq}-${rand}`;
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
/** Produce a grouping key for an auth credential. */
|
|
97
|
+
function authKey(cred: AuthCredential): string {
|
|
98
|
+
return `${cred.scheme}:${cred.value}`;
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
// ---------------------------------------------------------------------------
|
|
102
|
+
// BatchLLMClient
|
|
103
|
+
// ---------------------------------------------------------------------------
|
|
104
|
+
|
|
105
|
+
/**
|
|
106
|
+
* Create a batch-aware LLMClient that wraps a synchronous inner client.
|
|
107
|
+
*
|
|
108
|
+
* - `urgent: true` calls → immediate delegation to `inner.prompt()`
|
|
109
|
+
* - `urgent: false/undefined` calls → queued for batch processing
|
|
110
|
+
* - On flush timer or queue full → POST /v1/messages/batches
|
|
111
|
+
* - On poll timer → GET /v1/messages/batches/{id}, resolve promises
|
|
112
|
+
* - On error → fallback to synchronous calls for the failed batch
|
|
113
|
+
*
|
|
114
|
+
* @param inner The synchronous LLMClient (gateway's direct adapter)
|
|
115
|
+
* @param upstreamUrl Base Anthropic API URL (e.g. "https://api.anthropic.com")
|
|
116
|
+
* @param getAuth Callback to resolve auth credentials (per-session → global fallback)
|
|
117
|
+
* @param defaultModel Default model for requests without explicit model
|
|
118
|
+
* @param batchConfig Optional tuning parameters
|
|
119
|
+
*/
|
|
120
|
+
export function createBatchLLMClient(
|
|
121
|
+
inner: LLMClient,
|
|
122
|
+
upstreamUrl: string,
|
|
123
|
+
getAuth: (sessionID?: string) => AuthCredential | null,
|
|
124
|
+
defaultModel: { providerID: string; modelID: string },
|
|
125
|
+
batchConfig?: BatchQueueConfig,
|
|
126
|
+
): LLMClient & { shutdown: () => Promise<void>; stats: () => BatchStats } {
|
|
127
|
+
const flushIntervalMs = batchConfig?.flushIntervalMs ?? DEFAULT_FLUSH_INTERVAL_MS;
|
|
128
|
+
const maxQueueSize = batchConfig?.maxQueueSize ?? DEFAULT_MAX_QUEUE_SIZE;
|
|
129
|
+
const pollIntervalMs = batchConfig?.pollIntervalMs ?? DEFAULT_POLL_INTERVAL_MS;
|
|
130
|
+
const maxBatchAgeMs = batchConfig?.maxBatchAgeMs ?? DEFAULT_MAX_BATCH_AGE_MS;
|
|
131
|
+
|
|
132
|
+
// State
|
|
133
|
+
const queue: PendingRequest[] = [];
|
|
134
|
+
const inflight = new Map<string, InflightBatch>();
|
|
135
|
+
let flushTimer: ReturnType<typeof setInterval> | null = null;
|
|
136
|
+
let shuttingDown = false;
|
|
137
|
+
|
|
138
|
+
// Stats
|
|
139
|
+
let totalQueued = 0;
|
|
140
|
+
let totalBatched = 0;
|
|
141
|
+
let totalUrgent = 0;
|
|
142
|
+
let totalFallback = 0;
|
|
143
|
+
let totalResolved = 0;
|
|
144
|
+
let totalFailed = 0;
|
|
145
|
+
|
|
146
|
+
// -------------------------------------------------------------------------
|
|
147
|
+
// Submit a single batch for one credential group
|
|
148
|
+
// -------------------------------------------------------------------------
|
|
149
|
+
|
|
150
|
+
async function submitBatch(auth: AuthCredential, items: PendingRequest[]): Promise<void> {
|
|
151
|
+
const requests = items.map((item) => ({
|
|
152
|
+
custom_id: item.customId,
|
|
153
|
+
params: item.params,
|
|
154
|
+
}));
|
|
155
|
+
|
|
156
|
+
log.info(`batch flush: submitting ${items.length} requests`);
|
|
157
|
+
|
|
158
|
+
try {
|
|
159
|
+
const url = `${upstreamUrl.replace(/\/$/, "")}/v1/messages/batches`;
|
|
160
|
+
const response = await fetch(url, {
|
|
161
|
+
method: "POST",
|
|
162
|
+
headers: {
|
|
163
|
+
"Content-Type": "application/json",
|
|
164
|
+
"anthropic-version": "2023-06-01",
|
|
165
|
+
...authHeaders(auth),
|
|
166
|
+
},
|
|
167
|
+
body: JSON.stringify({ requests }),
|
|
168
|
+
});
|
|
169
|
+
|
|
170
|
+
if (!response.ok) {
|
|
171
|
+
const text = await response.text().catch(() => "(no body)");
|
|
172
|
+
log.error(`batch create failed: ${response.status} ${response.statusText} — ${text}`);
|
|
173
|
+
// Fall back to synchronous for all items
|
|
174
|
+
await fallbackAll(items);
|
|
175
|
+
return;
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
const data = (await response.json()) as {
|
|
179
|
+
id: string;
|
|
180
|
+
processing_status: string;
|
|
181
|
+
};
|
|
182
|
+
|
|
183
|
+
totalBatched += items.length;
|
|
184
|
+
|
|
185
|
+
// Track inflight batch
|
|
186
|
+
const requestMap = new Map<string, PendingRequest>();
|
|
187
|
+
for (const item of items) {
|
|
188
|
+
requestMap.set(item.customId, item);
|
|
189
|
+
}
|
|
190
|
+
|
|
191
|
+
const pollTimer = setInterval(
|
|
192
|
+
() => pollBatch(data.id).catch((e) => log.error("batch poll error:", e)),
|
|
193
|
+
pollIntervalMs,
|
|
194
|
+
);
|
|
195
|
+
|
|
196
|
+
inflight.set(data.id, {
|
|
197
|
+
batchId: data.id,
|
|
198
|
+
requests: requestMap,
|
|
199
|
+
submittedAt: Date.now(),
|
|
200
|
+
pollTimer,
|
|
201
|
+
auth,
|
|
202
|
+
});
|
|
203
|
+
|
|
204
|
+
log.info(`batch created: ${data.id} with ${items.length} requests`);
|
|
205
|
+
} catch (e) {
|
|
206
|
+
log.error("batch create error:", e);
|
|
207
|
+
await fallbackAll(items);
|
|
208
|
+
}
|
|
209
|
+
}
|
|
210
|
+
|
|
211
|
+
// -------------------------------------------------------------------------
|
|
212
|
+
// Flush: group queued items by credential, submit one batch per group
|
|
213
|
+
// -------------------------------------------------------------------------
|
|
214
|
+
|
|
215
|
+
async function flush(): Promise<void> {
|
|
216
|
+
if (queue.length === 0) return;
|
|
217
|
+
|
|
218
|
+
// Take all items from the queue
|
|
219
|
+
const batch = queue.splice(0);
|
|
220
|
+
|
|
221
|
+
// Group by auth credential — each credential gets its own batch
|
|
222
|
+
const byAuth = new Map<string, { auth: AuthCredential; items: PendingRequest[] }>();
|
|
223
|
+
for (const item of batch) {
|
|
224
|
+
const key = authKey(item.auth);
|
|
225
|
+
let group = byAuth.get(key);
|
|
226
|
+
if (!group) {
|
|
227
|
+
group = { auth: item.auth, items: [] };
|
|
228
|
+
byAuth.set(key, group);
|
|
229
|
+
}
|
|
230
|
+
group.items.push(item);
|
|
231
|
+
}
|
|
232
|
+
|
|
233
|
+
for (const { auth, items } of byAuth.values()) {
|
|
234
|
+
await submitBatch(auth, items);
|
|
235
|
+
}
|
|
236
|
+
}
|
|
237
|
+
|
|
238
|
+
// -------------------------------------------------------------------------
|
|
239
|
+
// Poll: check batch status and resolve promises
|
|
240
|
+
// -------------------------------------------------------------------------
|
|
241
|
+
|
|
242
|
+
async function pollBatch(batchId: string): Promise<void> {
|
|
243
|
+
const batch = inflight.get(batchId);
|
|
244
|
+
if (!batch) return;
|
|
245
|
+
|
|
246
|
+
// Check max age — give up and fallback if too old
|
|
247
|
+
if (Date.now() - batch.submittedAt > maxBatchAgeMs) {
|
|
248
|
+
log.warn(`batch ${batchId} exceeded max age — falling back to synchronous`);
|
|
249
|
+
clearInterval(batch.pollTimer);
|
|
250
|
+
inflight.delete(batchId);
|
|
251
|
+
await fallbackAll([...batch.requests.values()]);
|
|
252
|
+
return;
|
|
253
|
+
}
|
|
254
|
+
|
|
255
|
+
try {
|
|
256
|
+
const url = `${upstreamUrl.replace(/\/$/, "")}/v1/messages/batches/${batchId}`;
|
|
257
|
+
const response = await fetch(url, {
|
|
258
|
+
headers: {
|
|
259
|
+
"anthropic-version": "2023-06-01",
|
|
260
|
+
...authHeaders(batch.auth),
|
|
261
|
+
},
|
|
262
|
+
});
|
|
263
|
+
|
|
264
|
+
if (!response.ok) {
|
|
265
|
+
log.error(`batch poll failed for ${batchId}: ${response.status}`);
|
|
266
|
+
return; // Retry on next poll
|
|
267
|
+
}
|
|
268
|
+
|
|
269
|
+
const data = (await response.json()) as {
|
|
270
|
+
processing_status: string;
|
|
271
|
+
results_url: string | null;
|
|
272
|
+
};
|
|
273
|
+
|
|
274
|
+
if (data.processing_status !== "ended") return;
|
|
275
|
+
|
|
276
|
+
// Batch is done — stream results
|
|
277
|
+
log.info(`batch ${batchId} ended — retrieving results`);
|
|
278
|
+
|
|
279
|
+
if (data.results_url) {
|
|
280
|
+
await retrieveResults(batchId, data.results_url);
|
|
281
|
+
} else {
|
|
282
|
+
// No results URL — try the standard endpoint
|
|
283
|
+
await retrieveResults(
|
|
284
|
+
batchId,
|
|
285
|
+
`${upstreamUrl.replace(/\/$/, "")}/v1/messages/batches/${batchId}/results`,
|
|
286
|
+
);
|
|
287
|
+
}
|
|
288
|
+
} catch (e) {
|
|
289
|
+
log.error(`batch poll error for ${batchId}:`, e);
|
|
290
|
+
}
|
|
291
|
+
}
|
|
292
|
+
|
|
293
|
+
async function retrieveResults(batchId: string, resultsUrl: string): Promise<void> {
|
|
294
|
+
const batch = inflight.get(batchId);
|
|
295
|
+
if (!batch) return;
|
|
296
|
+
|
|
297
|
+
try {
|
|
298
|
+
const response = await fetch(resultsUrl, {
|
|
299
|
+
headers: {
|
|
300
|
+
"anthropic-version": "2023-06-01",
|
|
301
|
+
...authHeaders(batch.auth),
|
|
302
|
+
},
|
|
303
|
+
});
|
|
304
|
+
|
|
305
|
+
if (!response.ok) {
|
|
306
|
+
log.error(`batch results fetch failed for ${batchId}: ${response.status}`);
|
|
307
|
+
return;
|
|
308
|
+
}
|
|
309
|
+
|
|
310
|
+
const text = await response.text();
|
|
311
|
+
// Results are JSONL — one JSON object per line
|
|
312
|
+
const lines = text.split("\n").filter((l) => l.trim());
|
|
313
|
+
|
|
314
|
+
for (const line of lines) {
|
|
315
|
+
try {
|
|
316
|
+
const result = JSON.parse(line) as {
|
|
317
|
+
custom_id: string;
|
|
318
|
+
result: {
|
|
319
|
+
type: "succeeded" | "errored" | "canceled" | "expired";
|
|
320
|
+
message?: {
|
|
321
|
+
content?: Array<{ type: string; text?: string }>;
|
|
322
|
+
};
|
|
323
|
+
error?: { type: string; message: string };
|
|
324
|
+
};
|
|
325
|
+
};
|
|
326
|
+
|
|
327
|
+
const pending = batch.requests.get(result.custom_id);
|
|
328
|
+
if (!pending) continue;
|
|
329
|
+
|
|
330
|
+
switch (result.result.type) {
|
|
331
|
+
case "succeeded": {
|
|
332
|
+
const textBlock = result.result.message?.content?.find(
|
|
333
|
+
(b) => b.type === "text" && typeof b.text === "string",
|
|
334
|
+
);
|
|
335
|
+
pending.resolve(textBlock?.text ?? null);
|
|
336
|
+
totalResolved++;
|
|
337
|
+
break;
|
|
338
|
+
}
|
|
339
|
+
case "errored":
|
|
340
|
+
pending.resolve(null); // Match inner client behavior (null on error)
|
|
341
|
+
totalFailed++;
|
|
342
|
+
log.error(
|
|
343
|
+
`batch item ${result.custom_id} errored: ${result.result.error?.type ?? "unknown"} — ${result.result.error?.message ?? JSON.stringify(result.result.error)}`,
|
|
344
|
+
);
|
|
345
|
+
break;
|
|
346
|
+
case "canceled":
|
|
347
|
+
case "expired":
|
|
348
|
+
pending.resolve(null);
|
|
349
|
+
totalFailed++;
|
|
350
|
+
log.warn(`batch item ${result.custom_id} ${result.result.type}`);
|
|
351
|
+
break;
|
|
352
|
+
}
|
|
353
|
+
|
|
354
|
+
batch.requests.delete(result.custom_id);
|
|
355
|
+
} catch {
|
|
356
|
+
log.error(`failed to parse batch result line: ${line.slice(0, 200)}`);
|
|
357
|
+
}
|
|
358
|
+
}
|
|
359
|
+
|
|
360
|
+
// Resolve any remaining items that weren't in the results (shouldn't happen)
|
|
361
|
+
for (const [, pending] of batch.requests) {
|
|
362
|
+
pending.resolve(null);
|
|
363
|
+
totalFailed++;
|
|
364
|
+
}
|
|
365
|
+
|
|
366
|
+
// Clean up
|
|
367
|
+
clearInterval(batch.pollTimer);
|
|
368
|
+
inflight.delete(batchId);
|
|
369
|
+
log.info(
|
|
370
|
+
`batch ${batchId} fully resolved (${totalResolved} ok, ${totalFailed} failed total)`,
|
|
371
|
+
);
|
|
372
|
+
} catch (e) {
|
|
373
|
+
log.error(`batch results retrieval error for ${batchId}:`, e);
|
|
374
|
+
}
|
|
375
|
+
}
|
|
376
|
+
|
|
377
|
+
// -------------------------------------------------------------------------
|
|
378
|
+
// Fallback: process items synchronously via inner client
|
|
379
|
+
// -------------------------------------------------------------------------
|
|
380
|
+
|
|
381
|
+
async function fallbackAll(items: PendingRequest[]): Promise<void> {
|
|
382
|
+
totalFallback += items.length;
|
|
383
|
+
log.info(`batch fallback: processing ${items.length} items synchronously`);
|
|
384
|
+
|
|
385
|
+
// Process in parallel with concurrency limit of 5
|
|
386
|
+
const CONCURRENCY = 5;
|
|
387
|
+
for (let i = 0; i < items.length; i += CONCURRENCY) {
|
|
388
|
+
const chunk = items.slice(i, i + CONCURRENCY);
|
|
389
|
+
await Promise.all(
|
|
390
|
+
chunk.map(async (item) => {
|
|
391
|
+
try {
|
|
392
|
+
const system =
|
|
393
|
+
typeof item.params.system === "string"
|
|
394
|
+
? item.params.system
|
|
395
|
+
: item.params.system
|
|
396
|
+
.map((b) => b.text)
|
|
397
|
+
.join("\n");
|
|
398
|
+
const user = item.params.messages[0]?.content ?? "";
|
|
399
|
+
const result = await inner.prompt(system, user, { urgent: true });
|
|
400
|
+
item.resolve(result);
|
|
401
|
+
} catch (e) {
|
|
402
|
+
log.error(`batch fallback error for ${item.customId}:`, e);
|
|
403
|
+
item.resolve(null);
|
|
404
|
+
}
|
|
405
|
+
}),
|
|
406
|
+
);
|
|
407
|
+
}
|
|
408
|
+
}
|
|
409
|
+
|
|
410
|
+
// -------------------------------------------------------------------------
|
|
411
|
+
// Start flush timer
|
|
412
|
+
// -------------------------------------------------------------------------
|
|
413
|
+
|
|
414
|
+
flushTimer = setInterval(() => {
|
|
415
|
+
flush().catch((e) => log.error("batch flush timer error:", e));
|
|
416
|
+
}, flushIntervalMs);
|
|
417
|
+
|
|
418
|
+
// -------------------------------------------------------------------------
|
|
419
|
+
// LLMClient implementation
|
|
420
|
+
// -------------------------------------------------------------------------
|
|
421
|
+
|
|
422
|
+
return {
|
|
423
|
+
async prompt(system, user, opts) {
|
|
424
|
+
// Urgent calls bypass the queue entirely
|
|
425
|
+
if (opts?.urgent || shuttingDown) {
|
|
426
|
+
totalUrgent++;
|
|
427
|
+
return inner.prompt(system, user, opts);
|
|
428
|
+
}
|
|
429
|
+
|
|
430
|
+
// Snapshot auth credential at enqueue time for session isolation.
|
|
431
|
+
// If no credential is available, fall back to synchronous processing
|
|
432
|
+
// (which will also attempt to resolve auth — matches prior behavior).
|
|
433
|
+
const cred = getAuth(opts?.sessionID);
|
|
434
|
+
if (!cred) {
|
|
435
|
+
totalUrgent++;
|
|
436
|
+
return inner.prompt(system, user, opts);
|
|
437
|
+
}
|
|
438
|
+
|
|
439
|
+
totalQueued++;
|
|
440
|
+
|
|
441
|
+
const model = opts?.model ?? defaultModel;
|
|
442
|
+
|
|
443
|
+
// Build system payload with 1h cache (same as direct adapter)
|
|
444
|
+
const systemPayload = system
|
|
445
|
+
? [
|
|
446
|
+
{
|
|
447
|
+
type: "text" as const,
|
|
448
|
+
text: system,
|
|
449
|
+
cache_control: { type: "ephemeral" as const, ttl: "1h" },
|
|
450
|
+
},
|
|
451
|
+
]
|
|
452
|
+
: system;
|
|
453
|
+
|
|
454
|
+
const customId = generateCustomId();
|
|
455
|
+
|
|
456
|
+
const promise = new Promise<string | null>((resolve, reject) => {
|
|
457
|
+
queue.push({
|
|
458
|
+
customId,
|
|
459
|
+
params: {
|
|
460
|
+
model: model.modelID,
|
|
461
|
+
max_tokens: 8192,
|
|
462
|
+
system: systemPayload ?? system,
|
|
463
|
+
messages: [{ role: "user", content: user }],
|
|
464
|
+
},
|
|
465
|
+
resolve,
|
|
466
|
+
reject,
|
|
467
|
+
enqueuedAt: Date.now(),
|
|
468
|
+
auth: cred,
|
|
469
|
+
});
|
|
470
|
+
});
|
|
471
|
+
|
|
472
|
+
// Auto-flush if queue is full
|
|
473
|
+
if (queue.length >= maxQueueSize) {
|
|
474
|
+
flush().catch((e) => log.error("batch auto-flush error:", e));
|
|
475
|
+
}
|
|
476
|
+
|
|
477
|
+
return promise;
|
|
478
|
+
},
|
|
479
|
+
|
|
480
|
+
/**
|
|
481
|
+
* Gracefully shut down the batch queue:
|
|
482
|
+
* 1. Stop the flush timer
|
|
483
|
+
* 2. Flush any remaining queued items (as a batch if possible, fallback sync)
|
|
484
|
+
* 3. Switch to synchronous mode for future calls
|
|
485
|
+
* 4. DON'T wait for inflight batches — they resolve eventually or expire
|
|
486
|
+
*/
|
|
487
|
+
async shutdown(): Promise<void> {
|
|
488
|
+
shuttingDown = true;
|
|
489
|
+
if (flushTimer) {
|
|
490
|
+
clearInterval(flushTimer);
|
|
491
|
+
flushTimer = null;
|
|
492
|
+
}
|
|
493
|
+
|
|
494
|
+
// Flush remaining items synchronously (batch API might not finish before process exits)
|
|
495
|
+
if (queue.length > 0) {
|
|
496
|
+
log.info(`batch shutdown: processing ${queue.length} remaining items synchronously`);
|
|
497
|
+
await fallbackAll(queue.splice(0));
|
|
498
|
+
}
|
|
499
|
+
|
|
500
|
+
// Clean up inflight poll timers (batches will expire naturally)
|
|
501
|
+
for (const [batchId, batch] of inflight) {
|
|
502
|
+
clearInterval(batch.pollTimer);
|
|
503
|
+
// Resolve all pending promises with null (callers handle null gracefully)
|
|
504
|
+
for (const [, pending] of batch.requests) {
|
|
505
|
+
pending.resolve(null);
|
|
506
|
+
}
|
|
507
|
+
log.warn(`batch shutdown: abandoned inflight batch ${batchId}`);
|
|
508
|
+
}
|
|
509
|
+
inflight.clear();
|
|
510
|
+
},
|
|
511
|
+
|
|
512
|
+
/** Return current batch queue statistics. */
|
|
513
|
+
stats(): BatchStats {
|
|
514
|
+
return {
|
|
515
|
+
queued: queue.length,
|
|
516
|
+
inflightBatches: inflight.size,
|
|
517
|
+
inflightRequests: [...inflight.values()].reduce(
|
|
518
|
+
(sum, b) => sum + b.requests.size,
|
|
519
|
+
0,
|
|
520
|
+
),
|
|
521
|
+
totalQueued,
|
|
522
|
+
totalBatched,
|
|
523
|
+
totalUrgent,
|
|
524
|
+
totalFallback,
|
|
525
|
+
totalResolved,
|
|
526
|
+
totalFailed,
|
|
527
|
+
};
|
|
528
|
+
},
|
|
529
|
+
};
|
|
530
|
+
}
|
|
531
|
+
|
|
532
|
+
// ---------------------------------------------------------------------------
|
|
533
|
+
// Stats type
|
|
534
|
+
// ---------------------------------------------------------------------------
|
|
535
|
+
|
|
536
|
+
export interface BatchStats {
|
|
537
|
+
/** Items currently in the queue waiting for next flush. */
|
|
538
|
+
queued: number;
|
|
539
|
+
/** Number of batches currently being polled. */
|
|
540
|
+
inflightBatches: number;
|
|
541
|
+
/** Total requests across all inflight batches. */
|
|
542
|
+
inflightRequests: number;
|
|
543
|
+
/** Total requests that entered the queue. */
|
|
544
|
+
totalQueued: number;
|
|
545
|
+
/** Total requests successfully submitted to the Batch API. */
|
|
546
|
+
totalBatched: number;
|
|
547
|
+
/** Total requests that bypassed the queue (urgent). */
|
|
548
|
+
totalUrgent: number;
|
|
549
|
+
/** Total requests that fell back to synchronous processing. */
|
|
550
|
+
totalFallback: number;
|
|
551
|
+
/** Total batch results successfully resolved. */
|
|
552
|
+
totalResolved: number;
|
|
553
|
+
/** Total batch results that failed/expired/canceled. */
|
|
554
|
+
totalFailed: number;
|
|
555
|
+
}
|