@victor-software-house/pi-openai-proxy 4.6.0 → 4.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.mjs CHANGED
@@ -179,6 +179,17 @@ function logDisconnect(ctx) {
179
179
  };
180
180
  console.error(JSON.stringify(entry));
181
181
  }
182
+ function logUpstreamOverload(ctx, status, code) {
183
+ const entry = {
184
+ ts: timestamp(),
185
+ level: "warn",
186
+ event: "upstream_overload",
187
+ requestId: ctx.requestId,
188
+ upstreamStatus: status,
189
+ code
190
+ };
191
+ console.error(JSON.stringify(entry));
192
+ }
182
193
  function logStartup(host, port, modelCount) {
183
194
  const entry = {
184
195
  ts: timestamp(),
@@ -1001,14 +1012,16 @@ function convertTools(openaiTools) {
1001
1012
  /**
1002
1013
  * Zod schemas for the OpenAI chat-completions request subset.
1003
1014
  *
1004
- * Phase 2 contract:
1005
- * - Phase 1 supported fields: model, messages, stream, temperature,
1006
- * max_tokens, max_completion_tokens, stop, user, stream_options
1007
- * - Phase 2 additions: tools, tool_choice, reasoning_effort,
1008
- * top_p, frequency_penalty, presence_penalty, seed, response_format
1015
+ * Supported fields (cumulative through Phase 3D):
1016
+ * - Core: model, messages, stream, temperature, max_tokens,
1017
+ * max_completion_tokens, stop, user, stream_options
1018
+ * - Tools: tools, tool_choice, parallel_tool_calls
1019
+ * - Model control: reasoning_effort, top_p, frequency_penalty,
1020
+ * presence_penalty, seed, response_format
1021
+ * - Client interop: metadata, prediction
1009
1022
  * - Unknown top-level fields are rejected with 422
1010
- * - `n > 1` is rejected
1011
- * - `logprobs` is rejected
1023
+ * - Explicitly rejected: n, logprobs, top_logprobs, logit_bias,
1024
+ * functions (deprecated), function_call (deprecated)
1012
1025
  */
1013
1026
  const textContentPartSchema = z.object({
1014
1027
  type: z.literal("text"),
@@ -1125,6 +1138,7 @@ const chatCompletionRequestSchema = z.object({
1125
1138
  stream_options: streamOptionsSchema.nullable().optional(),
1126
1139
  tools: z.array(functionToolSchema).optional(),
1127
1140
  tool_choice: toolChoiceSchema.optional(),
1141
+ parallel_tool_calls: z.boolean().optional(),
1128
1142
  reasoning_effort: z.enum([
1129
1143
  "none",
1130
1144
  "minimal",
@@ -1137,11 +1151,25 @@ const chatCompletionRequestSchema = z.object({
1137
1151
  frequency_penalty: z.number().min(-2).max(2).optional(),
1138
1152
  presence_penalty: z.number().min(-2).max(2).optional(),
1139
1153
  seed: z.int().optional(),
1140
- response_format: responseFormatSchema.optional()
1154
+ response_format: responseFormatSchema.optional(),
1155
+ metadata: z.record(z.string().trim(), z.unknown()).optional(),
1156
+ prediction: z.object({
1157
+ type: z.literal("content"),
1158
+ content: z.union([z.string().trim(), z.array(z.object({
1159
+ type: z.literal("text"),
1160
+ text: z.string().trim()
1161
+ }))])
1162
+ }).optional()
1141
1163
  }).strict();
1142
1164
  /**
1143
1165
  * Fields that are explicitly rejected with a helpful error.
1144
- * These are not supported and won't be promoted.
1166
+ *
1167
+ * `n`, `logprobs`, `top_logprobs`, `logit_bias`: not supported by the pi SDK's
1168
+ * simple completion interface. The proxy returns a single choice with no token
1169
+ * probability data. Promoting these would require response-side changes.
1170
+ *
1171
+ * `functions`, `function_call`: deprecated OpenAI fields, superseded by `tools`
1172
+ * and `tool_choice`. Clients should migrate to the current API.
1145
1173
  */
1146
1174
  const rejectedFields = [
1147
1175
  "n",
@@ -1149,8 +1177,7 @@ const rejectedFields = [
1149
1177
  "top_logprobs",
1150
1178
  "logit_bias",
1151
1179
  "functions",
1152
- "function_call",
1153
- "parallel_tool_calls"
1180
+ "function_call"
1154
1181
  ];
1155
1182
  //#endregion
1156
1183
  //#region src/openai/validate.ts
@@ -1227,6 +1254,8 @@ const SKIP_PAYLOAD_PASSTHROUGH_APIS = new Set(["openai-codex-responses"]);
1227
1254
  /**
1228
1255
  * Collect fields that need to be injected via onPayload.
1229
1256
  * Skips passthrough for APIs that use non-standard request formats.
1257
+ *
1258
+ * @internal Exported for unit testing only.
1230
1259
  */
1231
1260
  function collectPayloadFields(request, api) {
1232
1261
  if (SKIP_PAYLOAD_PASSTHROUGH_APIS.has(api)) return;
@@ -1260,9 +1289,67 @@ function collectPayloadFields(request, api) {
1260
1289
  fields["response_format"] = request.response_format;
1261
1290
  hasFields = true;
1262
1291
  }
1292
+ if (request.tool_choice !== void 0) {
1293
+ fields["tool_choice"] = request.tool_choice;
1294
+ hasFields = true;
1295
+ }
1296
+ if (request.parallel_tool_calls !== void 0) {
1297
+ fields["parallel_tool_calls"] = request.parallel_tool_calls;
1298
+ hasFields = true;
1299
+ }
1300
+ if (request.metadata !== void 0) {
1301
+ fields["metadata"] = request.metadata;
1302
+ hasFields = true;
1303
+ }
1304
+ if (request.prediction !== void 0) {
1305
+ fields["prediction"] = request.prediction;
1306
+ hasFields = true;
1307
+ }
1263
1308
  return hasFields ? fields : void 0;
1264
1309
  }
1265
1310
  /**
1311
+ * Collect tool strict flags from the original OpenAI request.
1312
+ *
1313
+ * The pi SDK's `Tool` interface has no `strict` field, so the SDK always sets
1314
+ * `strict: false` when building the upstream payload. This function extracts
1315
+ * the per-tool strict flags from the original request so they can be restored
1316
+ * via `onPayload` after the SDK builds the payload.
1317
+ *
1318
+ * Returns a map of tool index -> true for tools that requested strict mode,
1319
+ * or undefined if no tools use strict mode.
1320
+ *
1321
+ * @internal Exported for unit testing only.
1322
+ */
1323
+ function collectToolStrictFlags(tools) {
1324
+ if (tools === void 0 || tools.length === 0) return;
1325
+ let flags;
1326
+ for (let i = 0; i < tools.length; i++) if (tools[i]?.function.strict === true) {
1327
+ flags ??= /* @__PURE__ */ new Map();
1328
+ flags.set(i, true);
1329
+ }
1330
+ return flags;
1331
+ }
1332
+ /**
1333
+ * Apply strict flags to tool definitions in the upstream payload.
1334
+ *
1335
+ * The pi SDK always sets `strict: false` on tool definitions. This function
1336
+ * patches the payload's `tools` array to restore the client's requested
1337
+ * `strict: true` flags on the matching tool definitions.
1338
+ *
1339
+ * @internal Exported for unit testing only.
1340
+ */
1341
+ function applyToolStrictFlags(payload, strictFlags) {
1342
+ const tools = payload["tools"];
1343
+ if (!Array.isArray(tools)) return;
1344
+ for (const [index, _flag] of strictFlags) {
1345
+ const tool = tools[index];
1346
+ if (isRecord(tool)) {
1347
+ const fn = tool["function"];
1348
+ if (isRecord(fn)) fn["strict"] = true;
1349
+ }
1350
+ }
1351
+ }
1352
+ /**
1266
1353
  * Combine a client disconnect signal with an upstream timeout into a single signal.
1267
1354
  * Returns the combined signal, or undefined if neither is provided.
1268
1355
  */
@@ -1293,8 +1380,12 @@ async function buildStreamOptions(model, request, options) {
1293
1380
  if (apiKey !== void 0) opts.apiKey = apiKey;
1294
1381
  }
1295
1382
  const payloadFields = collectPayloadFields(request, model.api);
1296
- if (payloadFields !== void 0) opts.onPayload = (payload) => {
1297
- if (isRecord(payload)) for (const [key, value] of Object.entries(payloadFields)) payload[key] = value;
1383
+ const strictFlags = collectToolStrictFlags(request.tools);
1384
+ if (payloadFields !== void 0 || strictFlags !== void 0) opts.onPayload = (payload) => {
1385
+ if (isRecord(payload)) {
1386
+ if (payloadFields !== void 0) for (const [key, value] of Object.entries(payloadFields)) payload[key] = value;
1387
+ if (strictFlags !== void 0) applyToolStrictFlags(payload, strictFlags);
1388
+ }
1298
1389
  return payload;
1299
1390
  };
1300
1391
  return opts;
@@ -1329,6 +1420,15 @@ function createRoutes(config, configReader = fileConfigReader) {
1329
1420
  if (!outcome.ok) throw new Error(`Model exposure configuration error: ${outcome.message}`);
1330
1421
  return outcome;
1331
1422
  }
1423
+ /**
1424
+ * Map upstream error, log it, and emit a structured warn for rate limit / overload.
1425
+ */
1426
+ function handleUpstreamError(ctx, err) {
1427
+ const mapped = mapUpstreamError(err);
1428
+ logError(ctx, mapped.body.error.message, err instanceof Error ? err.message : void 0);
1429
+ if (mapped.status === 429 || mapped.status === 503) logUpstreamOverload(ctx, mapped.status, mapped.body.error.code ?? "unknown");
1430
+ return mapped;
1431
+ }
1332
1432
  const routes = new Hono();
1333
1433
  routes.get("/v1/models", (c) => {
1334
1434
  const exposure = getExposure();
@@ -1390,12 +1490,11 @@ function createRoutes(config, configReader = fileConfigReader) {
1390
1490
  const eventStream = await piStream(model, context, request, completionOptions);
1391
1491
  for await (const frame of streamToSSE(eventStream, requestId, canonicalModelId, includeUsage)) await stream.write(frame);
1392
1492
  } catch (err) {
1393
- const mapped = mapUpstreamError(err);
1394
- logError({
1493
+ const mapped = handleUpstreamError({
1395
1494
  requestId,
1396
1495
  method: "POST",
1397
1496
  path: "/v1/chat/completions"
1398
- }, mapped.body.error.message, err instanceof Error ? err.message : void 0);
1497
+ }, err);
1399
1498
  const errorChunk = JSON.stringify({ error: mapped.body.error });
1400
1499
  await stream.write(`data: ${errorChunk}\n\n`);
1401
1500
  await stream.write("data: [DONE]\n\n");
@@ -1406,22 +1505,20 @@ function createRoutes(config, configReader = fileConfigReader) {
1406
1505
  const message = await piComplete(model, context, request, completionOptions);
1407
1506
  if (message.stopReason === "error" || message.stopReason === "aborted") {
1408
1507
  const errorMessage = message.errorMessage ?? "Upstream provider error";
1409
- const mapped = mapUpstreamError(new Error(errorMessage));
1410
- logError({
1508
+ const mapped = handleUpstreamError({
1411
1509
  requestId,
1412
1510
  method: "POST",
1413
1511
  path: "/v1/chat/completions"
1414
- }, errorMessage);
1512
+ }, new Error(errorMessage));
1415
1513
  return c.json(mapped.body, mapped.status);
1416
1514
  }
1417
1515
  return c.json(buildChatCompletion(requestId, canonicalModelId, message));
1418
1516
  } catch (err) {
1419
- const mapped = mapUpstreamError(err);
1420
- logError({
1517
+ const mapped = handleUpstreamError({
1421
1518
  requestId,
1422
1519
  method: "POST",
1423
1520
  path: "/v1/chat/completions"
1424
- }, mapped.body.error.message, err instanceof Error ? err.message : void 0);
1521
+ }, err);
1425
1522
  return c.json(mapped.body, mapped.status);
1426
1523
  }
1427
1524
  });
package/dist/sync-zed.mjs CHANGED
@@ -48,7 +48,7 @@ function toZedModel(exposed) {
48
48
  capabilities: {
49
49
  tools: true,
50
50
  images: model.input.includes("image"),
51
- parallel_tool_calls: false,
51
+ parallel_tool_calls: true,
52
52
  prompt_cache_key: false,
53
53
  chat_completions: true
54
54
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@victor-software-house/pi-openai-proxy",
3
- "version": "4.6.0",
3
+ "version": "4.7.0",
4
4
  "description": "OpenAI-compatible HTTP proxy for pi's multi-provider model registry",
5
5
  "license": "MIT",
6
6
  "author": "Victor Software House",