@victor-software-house/pi-openai-proxy 4.6.0 → 4.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.mjs +119 -22
- package/dist/sync-zed.mjs +1 -1
- package/package.json +1 -1
package/dist/index.mjs
CHANGED
|
@@ -179,6 +179,17 @@ function logDisconnect(ctx) {
|
|
|
179
179
|
};
|
|
180
180
|
console.error(JSON.stringify(entry));
|
|
181
181
|
}
|
|
182
|
+
function logUpstreamOverload(ctx, status, code) {
|
|
183
|
+
const entry = {
|
|
184
|
+
ts: timestamp(),
|
|
185
|
+
level: "warn",
|
|
186
|
+
event: "upstream_overload",
|
|
187
|
+
requestId: ctx.requestId,
|
|
188
|
+
upstreamStatus: status,
|
|
189
|
+
code
|
|
190
|
+
};
|
|
191
|
+
console.error(JSON.stringify(entry));
|
|
192
|
+
}
|
|
182
193
|
function logStartup(host, port, modelCount) {
|
|
183
194
|
const entry = {
|
|
184
195
|
ts: timestamp(),
|
|
@@ -1001,14 +1012,16 @@ function convertTools(openaiTools) {
|
|
|
1001
1012
|
/**
|
|
1002
1013
|
* Zod schemas for the OpenAI chat-completions request subset.
|
|
1003
1014
|
*
|
|
1004
|
-
* Phase
|
|
1005
|
-
* -
|
|
1006
|
-
*
|
|
1007
|
-
* -
|
|
1008
|
-
*
|
|
1015
|
+
* Supported fields (cumulative through Phase 3D):
|
|
1016
|
+
* - Core: model, messages, stream, temperature, max_tokens,
|
|
1017
|
+
* max_completion_tokens, stop, user, stream_options
|
|
1018
|
+
* - Tools: tools, tool_choice, parallel_tool_calls
|
|
1019
|
+
* - Model control: reasoning_effort, top_p, frequency_penalty,
|
|
1020
|
+
* presence_penalty, seed, response_format
|
|
1021
|
+
* - Client interop: metadata, prediction
|
|
1009
1022
|
* - Unknown top-level fields are rejected with 422
|
|
1010
|
-
* -
|
|
1011
|
-
*
|
|
1023
|
+
* - Explicitly rejected: n, logprobs, top_logprobs, logit_bias,
|
|
1024
|
+
* functions (deprecated), function_call (deprecated)
|
|
1012
1025
|
*/
|
|
1013
1026
|
const textContentPartSchema = z.object({
|
|
1014
1027
|
type: z.literal("text"),
|
|
@@ -1125,6 +1138,7 @@ const chatCompletionRequestSchema = z.object({
|
|
|
1125
1138
|
stream_options: streamOptionsSchema.nullable().optional(),
|
|
1126
1139
|
tools: z.array(functionToolSchema).optional(),
|
|
1127
1140
|
tool_choice: toolChoiceSchema.optional(),
|
|
1141
|
+
parallel_tool_calls: z.boolean().optional(),
|
|
1128
1142
|
reasoning_effort: z.enum([
|
|
1129
1143
|
"none",
|
|
1130
1144
|
"minimal",
|
|
@@ -1137,11 +1151,25 @@ const chatCompletionRequestSchema = z.object({
|
|
|
1137
1151
|
frequency_penalty: z.number().min(-2).max(2).optional(),
|
|
1138
1152
|
presence_penalty: z.number().min(-2).max(2).optional(),
|
|
1139
1153
|
seed: z.int().optional(),
|
|
1140
|
-
response_format: responseFormatSchema.optional()
|
|
1154
|
+
response_format: responseFormatSchema.optional(),
|
|
1155
|
+
metadata: z.record(z.string().trim(), z.unknown()).optional(),
|
|
1156
|
+
prediction: z.object({
|
|
1157
|
+
type: z.literal("content"),
|
|
1158
|
+
content: z.union([z.string().trim(), z.array(z.object({
|
|
1159
|
+
type: z.literal("text"),
|
|
1160
|
+
text: z.string().trim()
|
|
1161
|
+
}))])
|
|
1162
|
+
}).optional()
|
|
1141
1163
|
}).strict();
|
|
1142
1164
|
/**
|
|
1143
1165
|
* Fields that are explicitly rejected with a helpful error.
|
|
1144
|
-
*
|
|
1166
|
+
*
|
|
1167
|
+
* `n`, `logprobs`, `top_logprobs`, `logit_bias`: not supported by the pi SDK's
|
|
1168
|
+
* simple completion interface. The proxy returns a single choice with no token
|
|
1169
|
+
* probability data. Promoting these would require response-side changes.
|
|
1170
|
+
*
|
|
1171
|
+
* `functions`, `function_call`: deprecated OpenAI fields, superseded by `tools`
|
|
1172
|
+
* and `tool_choice`. Clients should migrate to the current API.
|
|
1145
1173
|
*/
|
|
1146
1174
|
const rejectedFields = [
|
|
1147
1175
|
"n",
|
|
@@ -1149,8 +1177,7 @@ const rejectedFields = [
|
|
|
1149
1177
|
"top_logprobs",
|
|
1150
1178
|
"logit_bias",
|
|
1151
1179
|
"functions",
|
|
1152
|
-
"function_call"
|
|
1153
|
-
"parallel_tool_calls"
|
|
1180
|
+
"function_call"
|
|
1154
1181
|
];
|
|
1155
1182
|
//#endregion
|
|
1156
1183
|
//#region src/openai/validate.ts
|
|
@@ -1227,6 +1254,8 @@ const SKIP_PAYLOAD_PASSTHROUGH_APIS = new Set(["openai-codex-responses"]);
|
|
|
1227
1254
|
/**
|
|
1228
1255
|
* Collect fields that need to be injected via onPayload.
|
|
1229
1256
|
* Skips passthrough for APIs that use non-standard request formats.
|
|
1257
|
+
*
|
|
1258
|
+
* @internal Exported for unit testing only.
|
|
1230
1259
|
*/
|
|
1231
1260
|
function collectPayloadFields(request, api) {
|
|
1232
1261
|
if (SKIP_PAYLOAD_PASSTHROUGH_APIS.has(api)) return;
|
|
@@ -1260,9 +1289,67 @@ function collectPayloadFields(request, api) {
|
|
|
1260
1289
|
fields["response_format"] = request.response_format;
|
|
1261
1290
|
hasFields = true;
|
|
1262
1291
|
}
|
|
1292
|
+
if (request.tool_choice !== void 0) {
|
|
1293
|
+
fields["tool_choice"] = request.tool_choice;
|
|
1294
|
+
hasFields = true;
|
|
1295
|
+
}
|
|
1296
|
+
if (request.parallel_tool_calls !== void 0) {
|
|
1297
|
+
fields["parallel_tool_calls"] = request.parallel_tool_calls;
|
|
1298
|
+
hasFields = true;
|
|
1299
|
+
}
|
|
1300
|
+
if (request.metadata !== void 0) {
|
|
1301
|
+
fields["metadata"] = request.metadata;
|
|
1302
|
+
hasFields = true;
|
|
1303
|
+
}
|
|
1304
|
+
if (request.prediction !== void 0) {
|
|
1305
|
+
fields["prediction"] = request.prediction;
|
|
1306
|
+
hasFields = true;
|
|
1307
|
+
}
|
|
1263
1308
|
return hasFields ? fields : void 0;
|
|
1264
1309
|
}
|
|
1265
1310
|
/**
|
|
1311
|
+
* Collect tool strict flags from the original OpenAI request.
|
|
1312
|
+
*
|
|
1313
|
+
* The pi SDK's `Tool` interface has no `strict` field, so the SDK always sets
|
|
1314
|
+
* `strict: false` when building the upstream payload. This function extracts
|
|
1315
|
+
* the per-tool strict flags from the original request so they can be restored
|
|
1316
|
+
* via `onPayload` after the SDK builds the payload.
|
|
1317
|
+
*
|
|
1318
|
+
* Returns a map of tool index -> true for tools that requested strict mode,
|
|
1319
|
+
* or undefined if no tools use strict mode.
|
|
1320
|
+
*
|
|
1321
|
+
* @internal Exported for unit testing only.
|
|
1322
|
+
*/
|
|
1323
|
+
function collectToolStrictFlags(tools) {
|
|
1324
|
+
if (tools === void 0 || tools.length === 0) return;
|
|
1325
|
+
let flags;
|
|
1326
|
+
for (let i = 0; i < tools.length; i++) if (tools[i]?.function.strict === true) {
|
|
1327
|
+
flags ??= /* @__PURE__ */ new Map();
|
|
1328
|
+
flags.set(i, true);
|
|
1329
|
+
}
|
|
1330
|
+
return flags;
|
|
1331
|
+
}
|
|
1332
|
+
/**
|
|
1333
|
+
* Apply strict flags to tool definitions in the upstream payload.
|
|
1334
|
+
*
|
|
1335
|
+
* The pi SDK always sets `strict: false` on tool definitions. This function
|
|
1336
|
+
* patches the payload's `tools` array to restore the client's requested
|
|
1337
|
+
* `strict: true` flags on the matching tool definitions.
|
|
1338
|
+
*
|
|
1339
|
+
* @internal Exported for unit testing only.
|
|
1340
|
+
*/
|
|
1341
|
+
function applyToolStrictFlags(payload, strictFlags) {
|
|
1342
|
+
const tools = payload["tools"];
|
|
1343
|
+
if (!Array.isArray(tools)) return;
|
|
1344
|
+
for (const [index, _flag] of strictFlags) {
|
|
1345
|
+
const tool = tools[index];
|
|
1346
|
+
if (isRecord(tool)) {
|
|
1347
|
+
const fn = tool["function"];
|
|
1348
|
+
if (isRecord(fn)) fn["strict"] = true;
|
|
1349
|
+
}
|
|
1350
|
+
}
|
|
1351
|
+
}
|
|
1352
|
+
/**
|
|
1266
1353
|
* Combine a client disconnect signal with an upstream timeout into a single signal.
|
|
1267
1354
|
* Returns the combined signal, or undefined if neither is provided.
|
|
1268
1355
|
*/
|
|
@@ -1293,8 +1380,12 @@ async function buildStreamOptions(model, request, options) {
|
|
|
1293
1380
|
if (apiKey !== void 0) opts.apiKey = apiKey;
|
|
1294
1381
|
}
|
|
1295
1382
|
const payloadFields = collectPayloadFields(request, model.api);
|
|
1296
|
-
|
|
1297
|
-
|
|
1383
|
+
const strictFlags = collectToolStrictFlags(request.tools);
|
|
1384
|
+
if (payloadFields !== void 0 || strictFlags !== void 0) opts.onPayload = (payload) => {
|
|
1385
|
+
if (isRecord(payload)) {
|
|
1386
|
+
if (payloadFields !== void 0) for (const [key, value] of Object.entries(payloadFields)) payload[key] = value;
|
|
1387
|
+
if (strictFlags !== void 0) applyToolStrictFlags(payload, strictFlags);
|
|
1388
|
+
}
|
|
1298
1389
|
return payload;
|
|
1299
1390
|
};
|
|
1300
1391
|
return opts;
|
|
@@ -1329,6 +1420,15 @@ function createRoutes(config, configReader = fileConfigReader) {
|
|
|
1329
1420
|
if (!outcome.ok) throw new Error(`Model exposure configuration error: ${outcome.message}`);
|
|
1330
1421
|
return outcome;
|
|
1331
1422
|
}
|
|
1423
|
+
/**
|
|
1424
|
+
* Map upstream error, log it, and emit a structured warn for rate limit / overload.
|
|
1425
|
+
*/
|
|
1426
|
+
function handleUpstreamError(ctx, err) {
|
|
1427
|
+
const mapped = mapUpstreamError(err);
|
|
1428
|
+
logError(ctx, mapped.body.error.message, err instanceof Error ? err.message : void 0);
|
|
1429
|
+
if (mapped.status === 429 || mapped.status === 503) logUpstreamOverload(ctx, mapped.status, mapped.body.error.code ?? "unknown");
|
|
1430
|
+
return mapped;
|
|
1431
|
+
}
|
|
1332
1432
|
const routes = new Hono();
|
|
1333
1433
|
routes.get("/v1/models", (c) => {
|
|
1334
1434
|
const exposure = getExposure();
|
|
@@ -1390,12 +1490,11 @@ function createRoutes(config, configReader = fileConfigReader) {
|
|
|
1390
1490
|
const eventStream = await piStream(model, context, request, completionOptions);
|
|
1391
1491
|
for await (const frame of streamToSSE(eventStream, requestId, canonicalModelId, includeUsage)) await stream.write(frame);
|
|
1392
1492
|
} catch (err) {
|
|
1393
|
-
const mapped =
|
|
1394
|
-
logError({
|
|
1493
|
+
const mapped = handleUpstreamError({
|
|
1395
1494
|
requestId,
|
|
1396
1495
|
method: "POST",
|
|
1397
1496
|
path: "/v1/chat/completions"
|
|
1398
|
-
},
|
|
1497
|
+
}, err);
|
|
1399
1498
|
const errorChunk = JSON.stringify({ error: mapped.body.error });
|
|
1400
1499
|
await stream.write(`data: ${errorChunk}\n\n`);
|
|
1401
1500
|
await stream.write("data: [DONE]\n\n");
|
|
@@ -1406,22 +1505,20 @@ function createRoutes(config, configReader = fileConfigReader) {
|
|
|
1406
1505
|
const message = await piComplete(model, context, request, completionOptions);
|
|
1407
1506
|
if (message.stopReason === "error" || message.stopReason === "aborted") {
|
|
1408
1507
|
const errorMessage = message.errorMessage ?? "Upstream provider error";
|
|
1409
|
-
const mapped =
|
|
1410
|
-
logError({
|
|
1508
|
+
const mapped = handleUpstreamError({
|
|
1411
1509
|
requestId,
|
|
1412
1510
|
method: "POST",
|
|
1413
1511
|
path: "/v1/chat/completions"
|
|
1414
|
-
}, errorMessage);
|
|
1512
|
+
}, new Error(errorMessage));
|
|
1415
1513
|
return c.json(mapped.body, mapped.status);
|
|
1416
1514
|
}
|
|
1417
1515
|
return c.json(buildChatCompletion(requestId, canonicalModelId, message));
|
|
1418
1516
|
} catch (err) {
|
|
1419
|
-
const mapped =
|
|
1420
|
-
logError({
|
|
1517
|
+
const mapped = handleUpstreamError({
|
|
1421
1518
|
requestId,
|
|
1422
1519
|
method: "POST",
|
|
1423
1520
|
path: "/v1/chat/completions"
|
|
1424
|
-
},
|
|
1521
|
+
}, err);
|
|
1425
1522
|
return c.json(mapped.body, mapped.status);
|
|
1426
1523
|
}
|
|
1427
1524
|
});
|
package/dist/sync-zed.mjs
CHANGED
package/package.json
CHANGED