@loreai/gateway 0.14.0 → 0.14.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/bin.cjs +27 -0
- package/dist/index.cjs +1042 -0
- package/dist/index.d.cts +21 -0
- package/package.json +10 -10
- package/dist/index.js +0 -50087
- package/src/auth.ts +0 -133
- package/src/batch-queue.ts +0 -575
- package/src/cache-analytics.ts +0 -344
- package/src/cli/agents.ts +0 -107
- package/src/cli/bin.ts +0 -11
- package/src/cli/help.ts +0 -55
- package/src/cli/lib/binary.ts +0 -353
- package/src/cli/lib/bspatch.ts +0 -306
- package/src/cli/lib/delta-upgrade.ts +0 -790
- package/src/cli/lib/errors.ts +0 -48
- package/src/cli/lib/ghcr.ts +0 -389
- package/src/cli/lib/patch-cache.ts +0 -342
- package/src/cli/lib/upgrade.ts +0 -454
- package/src/cli/lib/version-check.ts +0 -385
- package/src/cli/main.ts +0 -152
- package/src/cli/run.ts +0 -181
- package/src/cli/start.ts +0 -82
- package/src/cli/upgrade.ts +0 -311
- package/src/cli/version.ts +0 -22
- package/src/compaction.ts +0 -195
- package/src/config.ts +0 -199
- package/src/idle.ts +0 -240
- package/src/index.ts +0 -41
- package/src/llm-adapter.ts +0 -182
- package/src/pipeline.ts +0 -1681
- package/src/recall.ts +0 -433
- package/src/recorder.ts +0 -192
- package/src/server.ts +0 -250
- package/src/session.ts +0 -207
- package/src/stream/anthropic.ts +0 -708
- package/src/temporal-adapter.ts +0 -310
- package/src/translate/anthropic.ts +0 -469
- package/src/translate/openai.ts +0 -536
- package/src/translate/types.ts +0 -222
- package/src/worker-model.ts +0 -408
package/src/auth.ts
DELETED
|
@@ -1,133 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Gateway authentication: typed credentials, per-session registry, and
|
|
3
|
-
* two-level lookup for background workers.
|
|
4
|
-
*
|
|
5
|
-
* Replaces the bare `lastSeenApiKey` string with a typed `AuthCredential`
|
|
6
|
-
* that supports both API-key (`x-api-key`) and OAuth Bearer token
|
|
7
|
-
* (`Authorization: Bearer`) authentication schemes.
|
|
8
|
-
*
|
|
9
|
-
* The per-session registry ensures background workers (distillation,
|
|
10
|
-
* curation, batch queue) use the correct credential for their session
|
|
11
|
-
* even when multiple clients are connected simultaneously.
|
|
12
|
-
*/
|
|
13
|
-
|
|
14
|
-
// ---------------------------------------------------------------------------
|
|
15
|
-
// AuthCredential type
|
|
16
|
-
// ---------------------------------------------------------------------------
|
|
17
|
-
|
|
18
|
-
/** Auth credential — either an API key or an OAuth bearer token. */
|
|
19
|
-
export type AuthCredential =
|
|
20
|
-
| { scheme: "api-key"; value: string }
|
|
21
|
-
| { scheme: "bearer"; value: string };
|
|
22
|
-
|
|
23
|
-
// ---------------------------------------------------------------------------
|
|
24
|
-
// Header extraction / formatting
|
|
25
|
-
// ---------------------------------------------------------------------------
|
|
26
|
-
|
|
27
|
-
/**
|
|
28
|
-
* Extract auth from request headers.
|
|
29
|
-
*
|
|
30
|
-
* Prefers `x-api-key` (Anthropic SDK default), falls back to
|
|
31
|
-
* `Authorization: Bearer` (OAuth / Claude Code subscriptions).
|
|
32
|
-
* Returns `null` if neither is present.
|
|
33
|
-
*/
|
|
34
|
-
export function extractAuth(
|
|
35
|
-
headers: Record<string, string>,
|
|
36
|
-
): AuthCredential | null {
|
|
37
|
-
const apiKey = headers["x-api-key"] || headers["X-Api-Key"];
|
|
38
|
-
if (apiKey) return { scheme: "api-key", value: apiKey };
|
|
39
|
-
|
|
40
|
-
const authHeader =
|
|
41
|
-
headers["authorization"] || headers["Authorization"];
|
|
42
|
-
if (authHeader) {
|
|
43
|
-
const match = /^Bearer\s+(\S+)$/i.exec(authHeader);
|
|
44
|
-
if (match) return { scheme: "bearer", value: match[1] };
|
|
45
|
-
}
|
|
46
|
-
|
|
47
|
-
return null;
|
|
48
|
-
}
|
|
49
|
-
|
|
50
|
-
/**
|
|
51
|
-
* Format credential as the appropriate HTTP header(s).
|
|
52
|
-
*
|
|
53
|
-
* - `api-key` → `{ "x-api-key": value }`
|
|
54
|
-
* - `bearer` → `{ "Authorization": "Bearer <value>" }`
|
|
55
|
-
*/
|
|
56
|
-
export function authHeaders(cred: AuthCredential): Record<string, string> {
|
|
57
|
-
switch (cred.scheme) {
|
|
58
|
-
case "api-key":
|
|
59
|
-
return { "x-api-key": cred.value };
|
|
60
|
-
case "bearer":
|
|
61
|
-
return { Authorization: `Bearer ${cred.value}` };
|
|
62
|
-
}
|
|
63
|
-
}
|
|
64
|
-
|
|
65
|
-
/**
|
|
66
|
-
* Non-sensitive suffix for fingerprinting — last 8 chars of credential value.
|
|
67
|
-
*
|
|
68
|
-
* Used to differentiate sessions that share the same first message but use
|
|
69
|
-
* different API keys or OAuth tokens. The suffix alone cannot reconstruct
|
|
70
|
-
* the full credential.
|
|
71
|
-
*/
|
|
72
|
-
export function authFingerprint(cred: AuthCredential): string {
|
|
73
|
-
return cred.value.slice(-8);
|
|
74
|
-
}
|
|
75
|
-
|
|
76
|
-
// ---------------------------------------------------------------------------
|
|
77
|
-
// Per-session registry
|
|
78
|
-
// ---------------------------------------------------------------------------
|
|
79
|
-
|
|
80
|
-
const sessionAuth = new Map<string, AuthCredential>();
|
|
81
|
-
|
|
82
|
-
export function setSessionAuth(
|
|
83
|
-
sessionID: string,
|
|
84
|
-
cred: AuthCredential,
|
|
85
|
-
): void {
|
|
86
|
-
sessionAuth.set(sessionID, cred);
|
|
87
|
-
}
|
|
88
|
-
|
|
89
|
-
export function getSessionAuth(
|
|
90
|
-
sessionID: string,
|
|
91
|
-
): AuthCredential | null {
|
|
92
|
-
return sessionAuth.get(sessionID) ?? null;
|
|
93
|
-
}
|
|
94
|
-
|
|
95
|
-
/** Delete a session's credential (for future eviction). */
|
|
96
|
-
export function deleteSessionAuth(sessionID: string): void {
|
|
97
|
-
sessionAuth.delete(sessionID);
|
|
98
|
-
}
|
|
99
|
-
|
|
100
|
-
// ---------------------------------------------------------------------------
|
|
101
|
-
// Global fallback (replaces lastSeenApiKey)
|
|
102
|
-
// ---------------------------------------------------------------------------
|
|
103
|
-
|
|
104
|
-
let lastSeenAuth: AuthCredential | null = null;
|
|
105
|
-
|
|
106
|
-
export function setLastSeenAuth(cred: AuthCredential): void {
|
|
107
|
-
lastSeenAuth = cred;
|
|
108
|
-
}
|
|
109
|
-
|
|
110
|
-
export function getLastSeenAuth(): AuthCredential | null {
|
|
111
|
-
return lastSeenAuth;
|
|
112
|
-
}
|
|
113
|
-
|
|
114
|
-
// ---------------------------------------------------------------------------
|
|
115
|
-
// Two-level lookup
|
|
116
|
-
// ---------------------------------------------------------------------------
|
|
117
|
-
|
|
118
|
-
/**
|
|
119
|
-
* Resolve auth credentials for a given session.
|
|
120
|
-
*
|
|
121
|
-
* 1. If `sessionID` is provided, check the per-session registry first.
|
|
122
|
-
* 2. Fall back to the global `lastSeenAuth` (for cold-start or callers
|
|
123
|
-
* that don't pass a session ID).
|
|
124
|
-
*/
|
|
125
|
-
export function resolveAuth(
|
|
126
|
-
sessionID?: string,
|
|
127
|
-
): AuthCredential | null {
|
|
128
|
-
if (sessionID) {
|
|
129
|
-
const cred = getSessionAuth(sessionID);
|
|
130
|
-
if (cred) return cred;
|
|
131
|
-
}
|
|
132
|
-
return getLastSeenAuth();
|
|
133
|
-
}
|
package/src/batch-queue.ts
DELETED
|
@@ -1,575 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Batch queue for Anthropic Message Batches API.
|
|
3
|
-
*
|
|
4
|
-
* Wraps a synchronous LLMClient and intercepts non-urgent `prompt()` calls,
|
|
5
|
-
* accumulating them in a queue. A flush timer periodically sends the queue
|
|
6
|
-
* to Anthropic's `/v1/messages/batches` endpoint for 50% cost savings.
|
|
7
|
-
* A poll timer checks for results and resolves the pending promises.
|
|
8
|
-
*
|
|
9
|
-
* Urgent calls (compaction, overflow recovery, query expansion) bypass
|
|
10
|
-
* the queue entirely and delegate to the inner synchronous client.
|
|
11
|
-
*
|
|
12
|
-
* Auth credentials are snapshotted per-item at enqueue time and grouped
|
|
13
|
-
* by credential at flush time — this ensures multi-session isolation when
|
|
14
|
-
* multiple clients with different API keys are connected simultaneously.
|
|
15
|
-
*
|
|
16
|
-
* This is a gateway-only enhancement — the OpenCode and Pi adapters
|
|
17
|
-
* always process immediately regardless of the `urgent` flag.
|
|
18
|
-
*/
|
|
19
|
-
|
|
20
|
-
import type { LLMClient } from "@loreai/core";
|
|
21
|
-
import { log } from "@loreai/core";
|
|
22
|
-
import type { AuthCredential } from "./auth";
|
|
23
|
-
import { authHeaders } from "./auth";
|
|
24
|
-
|
|
25
|
-
// ---------------------------------------------------------------------------
|
|
26
|
-
// Types
|
|
27
|
-
// ---------------------------------------------------------------------------
|
|
28
|
-
|
|
29
|
-
/** A single pending request waiting to be batched. */
|
|
30
|
-
interface PendingRequest {
|
|
31
|
-
/** Unique ID for correlating batch results (alphanumeric + hyphens). */
|
|
32
|
-
customId: string;
|
|
33
|
-
/** Standard Messages API params. */
|
|
34
|
-
params: {
|
|
35
|
-
model: string;
|
|
36
|
-
max_tokens: number;
|
|
37
|
-
system:
|
|
38
|
-
| string
|
|
39
|
-
| Array<{ type: string; text: string; cache_control?: { type: string; ttl?: string } }>;
|
|
40
|
-
messages: Array<{ role: string; content: string }>;
|
|
41
|
-
};
|
|
42
|
-
/** Resolve the caller's promise with the text response. */
|
|
43
|
-
resolve: (value: string | null) => void;
|
|
44
|
-
/** Reject the caller's promise on error. */
|
|
45
|
-
reject: (error: Error) => void;
|
|
46
|
-
/** Timestamp when the request was enqueued. */
|
|
47
|
-
enqueuedAt: number;
|
|
48
|
-
/** Auth credential snapshotted at enqueue time for per-session isolation. */
|
|
49
|
-
auth: AuthCredential;
|
|
50
|
-
}
|
|
51
|
-
|
|
52
|
-
/** A batch that has been submitted and is being polled for results. */
|
|
53
|
-
interface InflightBatch {
|
|
54
|
-
/** Anthropic batch ID returned by the create endpoint. */
|
|
55
|
-
batchId: string;
|
|
56
|
-
/** Map from custom_id → pending request (for resolving on completion). */
|
|
57
|
-
requests: Map<string, PendingRequest>;
|
|
58
|
-
/** Timestamp when the batch was submitted. */
|
|
59
|
-
submittedAt: number;
|
|
60
|
-
/** Poll timer handle. */
|
|
61
|
-
pollTimer: ReturnType<typeof setInterval>;
|
|
62
|
-
/** Auth credential for this batch (used for poll/retrieve calls). */
|
|
63
|
-
auth: AuthCredential;
|
|
64
|
-
}
|
|
65
|
-
|
|
66
|
-
export interface BatchQueueConfig {
|
|
67
|
-
/** How often to flush the queue (ms). Default: 30000 (30s). */
|
|
68
|
-
flushIntervalMs?: number;
|
|
69
|
-
/** Max items before auto-flush. Default: 50. */
|
|
70
|
-
maxQueueSize?: number;
|
|
71
|
-
/** How often to poll for batch results (ms). Default: 60000 (60s). */
|
|
72
|
-
pollIntervalMs?: number;
|
|
73
|
-
/** Max age of a batch before giving up and falling back (ms). Default: 3600000 (1h). */
|
|
74
|
-
maxBatchAgeMs?: number;
|
|
75
|
-
}
|
|
76
|
-
|
|
77
|
-
const DEFAULT_FLUSH_INTERVAL_MS = 30_000;
|
|
78
|
-
const DEFAULT_MAX_QUEUE_SIZE = 50;
|
|
79
|
-
const DEFAULT_POLL_INTERVAL_MS = 60_000;
|
|
80
|
-
const DEFAULT_MAX_BATCH_AGE_MS = 3_600_000; // 1 hour
|
|
81
|
-
|
|
82
|
-
// ---------------------------------------------------------------------------
|
|
83
|
-
// ID generation
|
|
84
|
-
// ---------------------------------------------------------------------------
|
|
85
|
-
|
|
86
|
-
let idCounter = 0;
|
|
87
|
-
|
|
88
|
-
/** Generate a batch-API-compatible custom_id (alphanumeric + hyphens, 1-64 chars). */
|
|
89
|
-
function generateCustomId(): string {
|
|
90
|
-
const ts = Date.now().toString(36);
|
|
91
|
-
const seq = (idCounter++).toString(36);
|
|
92
|
-
const rand = Math.random().toString(36).slice(2, 8);
|
|
93
|
-
return `lore-${ts}-${seq}-${rand}`;
|
|
94
|
-
}
|
|
95
|
-
|
|
96
|
-
/** Produce a grouping key for an auth credential. */
|
|
97
|
-
function authKey(cred: AuthCredential): string {
|
|
98
|
-
return `${cred.scheme}:${cred.value}`;
|
|
99
|
-
}
|
|
100
|
-
|
|
101
|
-
// ---------------------------------------------------------------------------
|
|
102
|
-
// BatchLLMClient
|
|
103
|
-
// ---------------------------------------------------------------------------
|
|
104
|
-
|
|
105
|
-
/**
|
|
106
|
-
* Create a batch-aware LLMClient that wraps a synchronous inner client.
|
|
107
|
-
*
|
|
108
|
-
* - `urgent: true` calls → immediate delegation to `inner.prompt()`
|
|
109
|
-
* - `urgent: false/undefined` calls → queued for batch processing
|
|
110
|
-
* - On flush timer or queue full → POST /v1/messages/batches
|
|
111
|
-
* - On poll timer → GET /v1/messages/batches/{id}, resolve promises
|
|
112
|
-
* - On error → fallback to synchronous calls for the failed batch
|
|
113
|
-
*
|
|
114
|
-
* @param inner The synchronous LLMClient (gateway's direct adapter)
|
|
115
|
-
* @param upstreamUrl Base Anthropic API URL (e.g. "https://api.anthropic.com")
|
|
116
|
-
* @param getAuth Callback to resolve auth credentials (per-session → global fallback)
|
|
117
|
-
* @param defaultModel Default model for requests without explicit model
|
|
118
|
-
* @param batchConfig Optional tuning parameters
|
|
119
|
-
*/
|
|
120
|
-
export function createBatchLLMClient(
|
|
121
|
-
inner: LLMClient,
|
|
122
|
-
upstreamUrl: string,
|
|
123
|
-
getAuth: (sessionID?: string) => AuthCredential | null,
|
|
124
|
-
defaultModel: { providerID: string; modelID: string },
|
|
125
|
-
batchConfig?: BatchQueueConfig,
|
|
126
|
-
): LLMClient & { shutdown: () => Promise<void>; stats: () => BatchStats } {
|
|
127
|
-
const flushIntervalMs = batchConfig?.flushIntervalMs ?? DEFAULT_FLUSH_INTERVAL_MS;
|
|
128
|
-
const maxQueueSize = batchConfig?.maxQueueSize ?? DEFAULT_MAX_QUEUE_SIZE;
|
|
129
|
-
const pollIntervalMs = batchConfig?.pollIntervalMs ?? DEFAULT_POLL_INTERVAL_MS;
|
|
130
|
-
const maxBatchAgeMs = batchConfig?.maxBatchAgeMs ?? DEFAULT_MAX_BATCH_AGE_MS;
|
|
131
|
-
|
|
132
|
-
// State
|
|
133
|
-
const queue: PendingRequest[] = [];
|
|
134
|
-
const inflight = new Map<string, InflightBatch>();
|
|
135
|
-
let flushTimer: ReturnType<typeof setInterval> | null = null;
|
|
136
|
-
let shuttingDown = false;
|
|
137
|
-
|
|
138
|
-
/** Credentials whose batch API access has been permanently disabled (401/403). */
|
|
139
|
-
const disabledBatchAuth = new Set<string>();
|
|
140
|
-
|
|
141
|
-
// Stats
|
|
142
|
-
let totalQueued = 0;
|
|
143
|
-
let totalBatched = 0;
|
|
144
|
-
let totalUrgent = 0;
|
|
145
|
-
let totalFallback = 0;
|
|
146
|
-
let totalResolved = 0;
|
|
147
|
-
let totalFailed = 0;
|
|
148
|
-
|
|
149
|
-
// -------------------------------------------------------------------------
|
|
150
|
-
// Submit a single batch for one credential group
|
|
151
|
-
// -------------------------------------------------------------------------
|
|
152
|
-
|
|
153
|
-
async function submitBatch(auth: AuthCredential, items: PendingRequest[]): Promise<void> {
|
|
154
|
-
const requests = items.map((item) => ({
|
|
155
|
-
custom_id: item.customId,
|
|
156
|
-
params: item.params,
|
|
157
|
-
}));
|
|
158
|
-
|
|
159
|
-
log.info(`batch flush: submitting ${items.length} requests`);
|
|
160
|
-
|
|
161
|
-
try {
|
|
162
|
-
const url = `${upstreamUrl.replace(/\/$/, "")}/v1/messages/batches`;
|
|
163
|
-
const response = await fetch(url, {
|
|
164
|
-
method: "POST",
|
|
165
|
-
headers: {
|
|
166
|
-
"Content-Type": "application/json",
|
|
167
|
-
"anthropic-version": "2023-06-01",
|
|
168
|
-
...authHeaders(auth),
|
|
169
|
-
},
|
|
170
|
-
body: JSON.stringify({ requests }),
|
|
171
|
-
});
|
|
172
|
-
|
|
173
|
-
if (!response.ok) {
|
|
174
|
-
const text = await response.text().catch(() => "(no body)");
|
|
175
|
-
// Permanent auth errors — disable batch API for this credential
|
|
176
|
-
if (response.status === 401 || response.status === 403) {
|
|
177
|
-
const key = authKey(auth);
|
|
178
|
-
if (!disabledBatchAuth.has(key)) {
|
|
179
|
-
disabledBatchAuth.add(key);
|
|
180
|
-
log.warn(
|
|
181
|
-
`batch API disabled for this credential (${response.status}): ${text}. ` +
|
|
182
|
-
`Future worker calls will use individual requests.`,
|
|
183
|
-
);
|
|
184
|
-
}
|
|
185
|
-
} else {
|
|
186
|
-
log.error(`batch create failed: ${response.status} ${response.statusText} — ${text}`);
|
|
187
|
-
}
|
|
188
|
-
// Fall back to synchronous for all items
|
|
189
|
-
await fallbackAll(items);
|
|
190
|
-
return;
|
|
191
|
-
}
|
|
192
|
-
|
|
193
|
-
const data = (await response.json()) as {
|
|
194
|
-
id: string;
|
|
195
|
-
processing_status: string;
|
|
196
|
-
};
|
|
197
|
-
|
|
198
|
-
totalBatched += items.length;
|
|
199
|
-
|
|
200
|
-
// Track inflight batch
|
|
201
|
-
const requestMap = new Map<string, PendingRequest>();
|
|
202
|
-
for (const item of items) {
|
|
203
|
-
requestMap.set(item.customId, item);
|
|
204
|
-
}
|
|
205
|
-
|
|
206
|
-
const pollTimer = setInterval(
|
|
207
|
-
() => pollBatch(data.id).catch((e) => log.error("batch poll error:", e)),
|
|
208
|
-
pollIntervalMs,
|
|
209
|
-
);
|
|
210
|
-
|
|
211
|
-
inflight.set(data.id, {
|
|
212
|
-
batchId: data.id,
|
|
213
|
-
requests: requestMap,
|
|
214
|
-
submittedAt: Date.now(),
|
|
215
|
-
pollTimer,
|
|
216
|
-
auth,
|
|
217
|
-
});
|
|
218
|
-
|
|
219
|
-
log.info(`batch created: ${data.id} with ${items.length} requests`);
|
|
220
|
-
} catch (e) {
|
|
221
|
-
log.error("batch create error:", e);
|
|
222
|
-
await fallbackAll(items);
|
|
223
|
-
}
|
|
224
|
-
}
|
|
225
|
-
|
|
226
|
-
// -------------------------------------------------------------------------
|
|
227
|
-
// Flush: group queued items by credential, submit one batch per group
|
|
228
|
-
// -------------------------------------------------------------------------
|
|
229
|
-
|
|
230
|
-
async function flush(): Promise<void> {
|
|
231
|
-
if (queue.length === 0) return;
|
|
232
|
-
|
|
233
|
-
// Take all items from the queue
|
|
234
|
-
const batch = queue.splice(0);
|
|
235
|
-
|
|
236
|
-
// Group by auth credential — each credential gets its own batch
|
|
237
|
-
const byAuth = new Map<string, { auth: AuthCredential; items: PendingRequest[] }>();
|
|
238
|
-
for (const item of batch) {
|
|
239
|
-
const key = authKey(item.auth);
|
|
240
|
-
let group = byAuth.get(key);
|
|
241
|
-
if (!group) {
|
|
242
|
-
group = { auth: item.auth, items: [] };
|
|
243
|
-
byAuth.set(key, group);
|
|
244
|
-
}
|
|
245
|
-
group.items.push(item);
|
|
246
|
-
}
|
|
247
|
-
|
|
248
|
-
for (const { auth, items } of byAuth.values()) {
|
|
249
|
-
// Skip batch API for credentials with permanent auth failures
|
|
250
|
-
if (disabledBatchAuth.has(authKey(auth))) {
|
|
251
|
-
await fallbackAll(items);
|
|
252
|
-
continue;
|
|
253
|
-
}
|
|
254
|
-
await submitBatch(auth, items);
|
|
255
|
-
}
|
|
256
|
-
}
|
|
257
|
-
|
|
258
|
-
// -------------------------------------------------------------------------
|
|
259
|
-
// Poll: check batch status and resolve promises
|
|
260
|
-
// -------------------------------------------------------------------------
|
|
261
|
-
|
|
262
|
-
async function pollBatch(batchId: string): Promise<void> {
|
|
263
|
-
const batch = inflight.get(batchId);
|
|
264
|
-
if (!batch) return;
|
|
265
|
-
|
|
266
|
-
// Check max age — give up and fallback if too old
|
|
267
|
-
if (Date.now() - batch.submittedAt > maxBatchAgeMs) {
|
|
268
|
-
log.warn(`batch ${batchId} exceeded max age — falling back to synchronous`);
|
|
269
|
-
clearInterval(batch.pollTimer);
|
|
270
|
-
inflight.delete(batchId);
|
|
271
|
-
await fallbackAll([...batch.requests.values()]);
|
|
272
|
-
return;
|
|
273
|
-
}
|
|
274
|
-
|
|
275
|
-
try {
|
|
276
|
-
const url = `${upstreamUrl.replace(/\/$/, "")}/v1/messages/batches/${batchId}`;
|
|
277
|
-
const response = await fetch(url, {
|
|
278
|
-
headers: {
|
|
279
|
-
"anthropic-version": "2023-06-01",
|
|
280
|
-
...authHeaders(batch.auth),
|
|
281
|
-
},
|
|
282
|
-
});
|
|
283
|
-
|
|
284
|
-
if (!response.ok) {
|
|
285
|
-
log.error(`batch poll failed for ${batchId}: ${response.status}`);
|
|
286
|
-
return; // Retry on next poll
|
|
287
|
-
}
|
|
288
|
-
|
|
289
|
-
const data = (await response.json()) as {
|
|
290
|
-
processing_status: string;
|
|
291
|
-
results_url: string | null;
|
|
292
|
-
};
|
|
293
|
-
|
|
294
|
-
if (data.processing_status !== "ended") return;
|
|
295
|
-
|
|
296
|
-
// Batch is done — stream results
|
|
297
|
-
log.info(`batch ${batchId} ended — retrieving results`);
|
|
298
|
-
|
|
299
|
-
if (data.results_url) {
|
|
300
|
-
await retrieveResults(batchId, data.results_url);
|
|
301
|
-
} else {
|
|
302
|
-
// No results URL — try the standard endpoint
|
|
303
|
-
await retrieveResults(
|
|
304
|
-
batchId,
|
|
305
|
-
`${upstreamUrl.replace(/\/$/, "")}/v1/messages/batches/${batchId}/results`,
|
|
306
|
-
);
|
|
307
|
-
}
|
|
308
|
-
} catch (e) {
|
|
309
|
-
log.error(`batch poll error for ${batchId}:`, e);
|
|
310
|
-
}
|
|
311
|
-
}
|
|
312
|
-
|
|
313
|
-
async function retrieveResults(batchId: string, resultsUrl: string): Promise<void> {
|
|
314
|
-
const batch = inflight.get(batchId);
|
|
315
|
-
if (!batch) return;
|
|
316
|
-
|
|
317
|
-
try {
|
|
318
|
-
const response = await fetch(resultsUrl, {
|
|
319
|
-
headers: {
|
|
320
|
-
"anthropic-version": "2023-06-01",
|
|
321
|
-
...authHeaders(batch.auth),
|
|
322
|
-
},
|
|
323
|
-
});
|
|
324
|
-
|
|
325
|
-
if (!response.ok) {
|
|
326
|
-
log.error(`batch results fetch failed for ${batchId}: ${response.status}`);
|
|
327
|
-
return;
|
|
328
|
-
}
|
|
329
|
-
|
|
330
|
-
const text = await response.text();
|
|
331
|
-
// Results are JSONL — one JSON object per line
|
|
332
|
-
const lines = text.split("\n").filter((l) => l.trim());
|
|
333
|
-
|
|
334
|
-
for (const line of lines) {
|
|
335
|
-
try {
|
|
336
|
-
const result = JSON.parse(line) as {
|
|
337
|
-
custom_id: string;
|
|
338
|
-
result: {
|
|
339
|
-
type: "succeeded" | "errored" | "canceled" | "expired";
|
|
340
|
-
message?: {
|
|
341
|
-
content?: Array<{ type: string; text?: string }>;
|
|
342
|
-
};
|
|
343
|
-
error?: { type: string; message: string };
|
|
344
|
-
};
|
|
345
|
-
};
|
|
346
|
-
|
|
347
|
-
const pending = batch.requests.get(result.custom_id);
|
|
348
|
-
if (!pending) continue;
|
|
349
|
-
|
|
350
|
-
switch (result.result.type) {
|
|
351
|
-
case "succeeded": {
|
|
352
|
-
const textBlock = result.result.message?.content?.find(
|
|
353
|
-
(b) => b.type === "text" && typeof b.text === "string",
|
|
354
|
-
);
|
|
355
|
-
pending.resolve(textBlock?.text ?? null);
|
|
356
|
-
totalResolved++;
|
|
357
|
-
break;
|
|
358
|
-
}
|
|
359
|
-
case "errored":
|
|
360
|
-
pending.resolve(null); // Match inner client behavior (null on error)
|
|
361
|
-
totalFailed++;
|
|
362
|
-
log.error(
|
|
363
|
-
`batch item ${result.custom_id} errored: ${result.result.error?.type ?? "unknown"} — ${result.result.error?.message ?? JSON.stringify(result.result.error)}`,
|
|
364
|
-
);
|
|
365
|
-
break;
|
|
366
|
-
case "canceled":
|
|
367
|
-
case "expired":
|
|
368
|
-
pending.resolve(null);
|
|
369
|
-
totalFailed++;
|
|
370
|
-
log.warn(`batch item ${result.custom_id} ${result.result.type}`);
|
|
371
|
-
break;
|
|
372
|
-
}
|
|
373
|
-
|
|
374
|
-
batch.requests.delete(result.custom_id);
|
|
375
|
-
} catch {
|
|
376
|
-
log.error(`failed to parse batch result line: ${line.slice(0, 200)}`);
|
|
377
|
-
}
|
|
378
|
-
}
|
|
379
|
-
|
|
380
|
-
// Resolve any remaining items that weren't in the results (shouldn't happen)
|
|
381
|
-
for (const [, pending] of batch.requests) {
|
|
382
|
-
pending.resolve(null);
|
|
383
|
-
totalFailed++;
|
|
384
|
-
}
|
|
385
|
-
|
|
386
|
-
// Clean up
|
|
387
|
-
clearInterval(batch.pollTimer);
|
|
388
|
-
inflight.delete(batchId);
|
|
389
|
-
log.info(
|
|
390
|
-
`batch ${batchId} fully resolved (${totalResolved} ok, ${totalFailed} failed total)`,
|
|
391
|
-
);
|
|
392
|
-
} catch (e) {
|
|
393
|
-
log.error(`batch results retrieval error for ${batchId}:`, e);
|
|
394
|
-
}
|
|
395
|
-
}
|
|
396
|
-
|
|
397
|
-
// -------------------------------------------------------------------------
|
|
398
|
-
// Fallback: process items synchronously via inner client
|
|
399
|
-
// -------------------------------------------------------------------------
|
|
400
|
-
|
|
401
|
-
async function fallbackAll(items: PendingRequest[]): Promise<void> {
|
|
402
|
-
totalFallback += items.length;
|
|
403
|
-
log.info(`batch fallback: processing ${items.length} items synchronously`);
|
|
404
|
-
|
|
405
|
-
// Process in parallel with concurrency limit of 5
|
|
406
|
-
const CONCURRENCY = 5;
|
|
407
|
-
for (let i = 0; i < items.length; i += CONCURRENCY) {
|
|
408
|
-
const chunk = items.slice(i, i + CONCURRENCY);
|
|
409
|
-
await Promise.all(
|
|
410
|
-
chunk.map(async (item) => {
|
|
411
|
-
try {
|
|
412
|
-
const system =
|
|
413
|
-
typeof item.params.system === "string"
|
|
414
|
-
? item.params.system
|
|
415
|
-
: item.params.system
|
|
416
|
-
.map((b) => b.text)
|
|
417
|
-
.join("\n");
|
|
418
|
-
const user = item.params.messages[0]?.content ?? "";
|
|
419
|
-
const result = await inner.prompt(system, user, { urgent: true });
|
|
420
|
-
item.resolve(result);
|
|
421
|
-
} catch (e) {
|
|
422
|
-
log.error(`batch fallback error for ${item.customId}:`, e);
|
|
423
|
-
item.resolve(null);
|
|
424
|
-
}
|
|
425
|
-
}),
|
|
426
|
-
);
|
|
427
|
-
}
|
|
428
|
-
}
|
|
429
|
-
|
|
430
|
-
// -------------------------------------------------------------------------
|
|
431
|
-
// Start flush timer
|
|
432
|
-
// -------------------------------------------------------------------------
|
|
433
|
-
|
|
434
|
-
flushTimer = setInterval(() => {
|
|
435
|
-
flush().catch((e) => log.error("batch flush timer error:", e));
|
|
436
|
-
}, flushIntervalMs);
|
|
437
|
-
|
|
438
|
-
// -------------------------------------------------------------------------
|
|
439
|
-
// LLMClient implementation
|
|
440
|
-
// -------------------------------------------------------------------------
|
|
441
|
-
|
|
442
|
-
return {
|
|
443
|
-
async prompt(system, user, opts) {
|
|
444
|
-
// Urgent calls bypass the queue entirely
|
|
445
|
-
if (opts?.urgent || shuttingDown) {
|
|
446
|
-
totalUrgent++;
|
|
447
|
-
return inner.prompt(system, user, opts);
|
|
448
|
-
}
|
|
449
|
-
|
|
450
|
-
// Snapshot auth credential at enqueue time for session isolation.
|
|
451
|
-
// If no credential is available, fall back to synchronous processing
|
|
452
|
-
// (which will also attempt to resolve auth — matches prior behavior).
|
|
453
|
-
const cred = getAuth(opts?.sessionID);
|
|
454
|
-
if (!cred) {
|
|
455
|
-
totalUrgent++;
|
|
456
|
-
return inner.prompt(system, user, opts);
|
|
457
|
-
}
|
|
458
|
-
|
|
459
|
-
totalQueued++;
|
|
460
|
-
|
|
461
|
-
const model = opts?.model ?? defaultModel;
|
|
462
|
-
|
|
463
|
-
// Build system payload with 1h cache (same as direct adapter)
|
|
464
|
-
const systemPayload = system
|
|
465
|
-
? [
|
|
466
|
-
{
|
|
467
|
-
type: "text" as const,
|
|
468
|
-
text: system,
|
|
469
|
-
cache_control: { type: "ephemeral" as const, ttl: "1h" },
|
|
470
|
-
},
|
|
471
|
-
]
|
|
472
|
-
: system;
|
|
473
|
-
|
|
474
|
-
const customId = generateCustomId();
|
|
475
|
-
|
|
476
|
-
const promise = new Promise<string | null>((resolve, reject) => {
|
|
477
|
-
queue.push({
|
|
478
|
-
customId,
|
|
479
|
-
params: {
|
|
480
|
-
model: model.modelID,
|
|
481
|
-
max_tokens: 8192,
|
|
482
|
-
system: systemPayload ?? system,
|
|
483
|
-
messages: [{ role: "user", content: user }],
|
|
484
|
-
},
|
|
485
|
-
resolve,
|
|
486
|
-
reject,
|
|
487
|
-
enqueuedAt: Date.now(),
|
|
488
|
-
auth: cred,
|
|
489
|
-
});
|
|
490
|
-
});
|
|
491
|
-
|
|
492
|
-
// Auto-flush if queue is full
|
|
493
|
-
if (queue.length >= maxQueueSize) {
|
|
494
|
-
flush().catch((e) => log.error("batch auto-flush error:", e));
|
|
495
|
-
}
|
|
496
|
-
|
|
497
|
-
return promise;
|
|
498
|
-
},
|
|
499
|
-
|
|
500
|
-
/**
|
|
501
|
-
* Gracefully shut down the batch queue:
|
|
502
|
-
* 1. Stop the flush timer
|
|
503
|
-
* 2. Flush any remaining queued items (as a batch if possible, fallback sync)
|
|
504
|
-
* 3. Switch to synchronous mode for future calls
|
|
505
|
-
* 4. DON'T wait for inflight batches — they resolve eventually or expire
|
|
506
|
-
*/
|
|
507
|
-
async shutdown(): Promise<void> {
|
|
508
|
-
shuttingDown = true;
|
|
509
|
-
if (flushTimer) {
|
|
510
|
-
clearInterval(flushTimer);
|
|
511
|
-
flushTimer = null;
|
|
512
|
-
}
|
|
513
|
-
|
|
514
|
-
// Flush remaining items synchronously (batch API might not finish before process exits)
|
|
515
|
-
if (queue.length > 0) {
|
|
516
|
-
log.info(`batch shutdown: processing ${queue.length} remaining items synchronously`);
|
|
517
|
-
await fallbackAll(queue.splice(0));
|
|
518
|
-
}
|
|
519
|
-
|
|
520
|
-
// Clean up inflight poll timers (batches will expire naturally)
|
|
521
|
-
for (const [batchId, batch] of inflight) {
|
|
522
|
-
clearInterval(batch.pollTimer);
|
|
523
|
-
// Resolve all pending promises with null (callers handle null gracefully)
|
|
524
|
-
for (const [, pending] of batch.requests) {
|
|
525
|
-
pending.resolve(null);
|
|
526
|
-
}
|
|
527
|
-
log.warn(`batch shutdown: abandoned inflight batch ${batchId}`);
|
|
528
|
-
}
|
|
529
|
-
inflight.clear();
|
|
530
|
-
},
|
|
531
|
-
|
|
532
|
-
/** Return current batch queue statistics. */
|
|
533
|
-
stats(): BatchStats {
|
|
534
|
-
return {
|
|
535
|
-
queued: queue.length,
|
|
536
|
-
inflightBatches: inflight.size,
|
|
537
|
-
inflightRequests: [...inflight.values()].reduce(
|
|
538
|
-
(sum, b) => sum + b.requests.size,
|
|
539
|
-
0,
|
|
540
|
-
),
|
|
541
|
-
totalQueued,
|
|
542
|
-
totalBatched,
|
|
543
|
-
totalUrgent,
|
|
544
|
-
totalFallback,
|
|
545
|
-
totalResolved,
|
|
546
|
-
totalFailed,
|
|
547
|
-
};
|
|
548
|
-
},
|
|
549
|
-
};
|
|
550
|
-
}
|
|
551
|
-
|
|
552
|
-
// ---------------------------------------------------------------------------
|
|
553
|
-
// Stats type
|
|
554
|
-
// ---------------------------------------------------------------------------
|
|
555
|
-
|
|
556
|
-
export interface BatchStats {
|
|
557
|
-
/** Items currently in the queue waiting for next flush. */
|
|
558
|
-
queued: number;
|
|
559
|
-
/** Number of batches currently being polled. */
|
|
560
|
-
inflightBatches: number;
|
|
561
|
-
/** Total requests across all inflight batches. */
|
|
562
|
-
inflightRequests: number;
|
|
563
|
-
/** Total requests that entered the queue. */
|
|
564
|
-
totalQueued: number;
|
|
565
|
-
/** Total requests successfully submitted to the Batch API. */
|
|
566
|
-
totalBatched: number;
|
|
567
|
-
/** Total requests that bypassed the queue (urgent). */
|
|
568
|
-
totalUrgent: number;
|
|
569
|
-
/** Total requests that fell back to synchronous processing. */
|
|
570
|
-
totalFallback: number;
|
|
571
|
-
/** Total batch results successfully resolved. */
|
|
572
|
-
totalResolved: number;
|
|
573
|
-
/** Total batch results that failed/expired/canceled. */
|
|
574
|
-
totalFailed: number;
|
|
575
|
-
}
|