@victor-software-house/pi-openai-proxy 4.6.1 → 4.7.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.mjs CHANGED
@@ -179,6 +179,17 @@ function logDisconnect(ctx) {
179
179
  };
180
180
  console.error(JSON.stringify(entry));
181
181
  }
182
+ function logUpstreamOverload(ctx, status, code) {
183
+ const entry = {
184
+ ts: timestamp(),
185
+ level: "warn",
186
+ event: "upstream_overload",
187
+ requestId: ctx.requestId,
188
+ upstreamStatus: status,
189
+ code
190
+ };
191
+ console.error(JSON.stringify(entry));
192
+ }
182
193
  function logStartup(host, port, modelCount) {
183
194
  const entry = {
184
195
  ts: timestamp(),
@@ -1001,14 +1012,16 @@ function convertTools(openaiTools) {
1001
1012
  /**
1002
1013
  * Zod schemas for the OpenAI chat-completions request subset.
1003
1014
  *
1004
- * Phase 2 contract:
1005
- * - Phase 1 supported fields: model, messages, stream, temperature,
1006
- * max_tokens, max_completion_tokens, stop, user, stream_options
1007
- * - Phase 2 additions: tools, tool_choice, reasoning_effort,
1008
- * top_p, frequency_penalty, presence_penalty, seed, response_format
1015
+ * Supported fields (cumulative through Phase 3D):
1016
+ * - Core: model, messages, stream, temperature, max_tokens,
1017
+ * max_completion_tokens, stop, user, stream_options
1018
+ * - Tools: tools, tool_choice, parallel_tool_calls
1019
+ * - Model control: reasoning_effort, top_p, frequency_penalty,
1020
+ * presence_penalty, seed, response_format
1021
+ * - Client interop: metadata, prediction
1009
1022
  * - Unknown top-level fields are rejected with 422
1010
- * - `n > 1` is rejected
1011
- * - `logprobs` is rejected
1023
+ * - Explicitly rejected: n, logprobs, top_logprobs, logit_bias,
1024
+ * functions (deprecated), function_call (deprecated)
1012
1025
  */
1013
1026
  const textContentPartSchema = z.object({
1014
1027
  type: z.literal("text"),
@@ -1125,6 +1138,7 @@ const chatCompletionRequestSchema = z.object({
1125
1138
  stream_options: streamOptionsSchema.nullable().optional(),
1126
1139
  tools: z.array(functionToolSchema).optional(),
1127
1140
  tool_choice: toolChoiceSchema.optional(),
1141
+ parallel_tool_calls: z.boolean().optional(),
1128
1142
  reasoning_effort: z.enum([
1129
1143
  "none",
1130
1144
  "minimal",
@@ -1137,21 +1151,25 @@ const chatCompletionRequestSchema = z.object({
1137
1151
  frequency_penalty: z.number().min(-2).max(2).optional(),
1138
1152
  presence_penalty: z.number().min(-2).max(2).optional(),
1139
1153
  seed: z.int().optional(),
1140
- response_format: responseFormatSchema.optional()
1154
+ response_format: responseFormatSchema.optional(),
1155
+ metadata: z.record(z.string().trim(), z.unknown()).optional(),
1156
+ prediction: z.object({
1157
+ type: z.literal("content"),
1158
+ content: z.union([z.string().trim(), z.array(z.object({
1159
+ type: z.literal("text"),
1160
+ text: z.string().trim()
1161
+ }))])
1162
+ }).optional()
1141
1163
  }).strict();
1142
1164
  /**
1143
1165
  * Fields that are explicitly rejected with a helpful error.
1144
1166
  *
1145
1167
  * `n`, `logprobs`, `top_logprobs`, `logit_bias`: not supported by the pi SDK's
1146
- * simple completion interface and unlikely to be promoted.
1168
+ * simple completion interface. The proxy returns a single choice with no token
1169
+ * probability data. Promoting these would require response-side changes.
1147
1170
  *
1148
1171
  * `functions`, `function_call`: deprecated OpenAI fields, superseded by `tools`
1149
- * and `tool_choice`.
1150
- *
1151
- * `parallel_tool_calls`: the pi SDK does not expose parallel tool call control.
1152
- * The SSE streaming code handles multiple tool calls per response, so the response
1153
- * side is capable, but the proxy cannot guarantee the flag reaches the provider.
1154
- * Needs deeper analysis — see Phase 3D in TODO.md.
1172
+ * and `tool_choice`. Clients should migrate to the current API.
1155
1173
  */
1156
1174
  const rejectedFields = [
1157
1175
  "n",
@@ -1159,8 +1177,7 @@ const rejectedFields = [
1159
1177
  "top_logprobs",
1160
1178
  "logit_bias",
1161
1179
  "functions",
1162
- "function_call",
1163
- "parallel_tool_calls"
1180
+ "function_call"
1164
1181
  ];
1165
1182
  //#endregion
1166
1183
  //#region src/openai/validate.ts
@@ -1230,18 +1247,28 @@ const REASONING_EFFORT_MAP = {
1230
1247
  xhigh: "xhigh"
1231
1248
  };
1232
1249
  /**
1233
- * APIs where onPayload passthrough fields are not supported.
1234
- * These APIs use non-standard request formats that reject standard OpenAI fields.
1250
+ * APIs that use the OpenAI chat completions wire format and accept standard
1251
+ * passthrough fields (stop, seed, top_p, tool_choice, etc.) in the payload.
1252
+ *
1253
+ * Only these APIs receive injected fields via onPayload. All other APIs
1254
+ * (anthropic-messages, google-*, bedrock-*, openai-codex-responses) use
1255
+ * different payload schemas that reject unknown fields.
1235
1256
  */
1236
- const SKIP_PAYLOAD_PASSTHROUGH_APIS = new Set(["openai-codex-responses"]);
1257
+ const OPENAI_COMPLETIONS_COMPATIBLE_APIS = new Set([
1258
+ "openai-completions",
1259
+ "openai-responses",
1260
+ "azure-openai-responses",
1261
+ "mistral-conversations"
1262
+ ]);
1237
1263
  /**
1238
1264
  * Collect fields that need to be injected via onPayload.
1239
- * Skips passthrough for APIs that use non-standard request formats.
1265
+ * Only injects for APIs that use the OpenAI chat completions wire format.
1266
+ * Non-compatible APIs (Anthropic, Google, Bedrock, Codex) reject unknown fields.
1240
1267
  *
1241
1268
  * @internal Exported for unit testing only.
1242
1269
  */
1243
1270
  function collectPayloadFields(request, api) {
1244
- if (SKIP_PAYLOAD_PASSTHROUGH_APIS.has(api)) return;
1271
+ if (!OPENAI_COMPLETIONS_COMPATIBLE_APIS.has(api)) return;
1245
1272
  const fields = {};
1246
1273
  let hasFields = false;
1247
1274
  if (request.stop !== void 0) {
@@ -1276,6 +1303,18 @@ function collectPayloadFields(request, api) {
1276
1303
  fields["tool_choice"] = request.tool_choice;
1277
1304
  hasFields = true;
1278
1305
  }
1306
+ if (request.parallel_tool_calls !== void 0) {
1307
+ fields["parallel_tool_calls"] = request.parallel_tool_calls;
1308
+ hasFields = true;
1309
+ }
1310
+ if (request.metadata !== void 0) {
1311
+ fields["metadata"] = request.metadata;
1312
+ hasFields = true;
1313
+ }
1314
+ if (request.prediction !== void 0) {
1315
+ fields["prediction"] = request.prediction;
1316
+ hasFields = true;
1317
+ }
1279
1318
  return hasFields ? fields : void 0;
1280
1319
  }
1281
1320
  /**
@@ -1391,6 +1430,15 @@ function createRoutes(config, configReader = fileConfigReader) {
1391
1430
  if (!outcome.ok) throw new Error(`Model exposure configuration error: ${outcome.message}`);
1392
1431
  return outcome;
1393
1432
  }
1433
+ /**
1434
+ * Map upstream error, log it, and emit a structured warn for rate limit / overload.
1435
+ */
1436
+ function handleUpstreamError(ctx, err) {
1437
+ const mapped = mapUpstreamError(err);
1438
+ logError(ctx, mapped.body.error.message, err instanceof Error ? err.message : void 0);
1439
+ if (mapped.status === 429 || mapped.status === 503) logUpstreamOverload(ctx, mapped.status, mapped.body.error.code ?? "unknown");
1440
+ return mapped;
1441
+ }
1394
1442
  const routes = new Hono();
1395
1443
  routes.get("/v1/models", (c) => {
1396
1444
  const exposure = getExposure();
@@ -1452,12 +1500,11 @@ function createRoutes(config, configReader = fileConfigReader) {
1452
1500
  const eventStream = await piStream(model, context, request, completionOptions);
1453
1501
  for await (const frame of streamToSSE(eventStream, requestId, canonicalModelId, includeUsage)) await stream.write(frame);
1454
1502
  } catch (err) {
1455
- const mapped = mapUpstreamError(err);
1456
- logError({
1503
+ const mapped = handleUpstreamError({
1457
1504
  requestId,
1458
1505
  method: "POST",
1459
1506
  path: "/v1/chat/completions"
1460
- }, mapped.body.error.message, err instanceof Error ? err.message : void 0);
1507
+ }, err);
1461
1508
  const errorChunk = JSON.stringify({ error: mapped.body.error });
1462
1509
  await stream.write(`data: ${errorChunk}\n\n`);
1463
1510
  await stream.write("data: [DONE]\n\n");
@@ -1468,22 +1515,20 @@ function createRoutes(config, configReader = fileConfigReader) {
1468
1515
  const message = await piComplete(model, context, request, completionOptions);
1469
1516
  if (message.stopReason === "error" || message.stopReason === "aborted") {
1470
1517
  const errorMessage = message.errorMessage ?? "Upstream provider error";
1471
- const mapped = mapUpstreamError(new Error(errorMessage));
1472
- logError({
1518
+ const mapped = handleUpstreamError({
1473
1519
  requestId,
1474
1520
  method: "POST",
1475
1521
  path: "/v1/chat/completions"
1476
- }, errorMessage);
1522
+ }, new Error(errorMessage));
1477
1523
  return c.json(mapped.body, mapped.status);
1478
1524
  }
1479
1525
  return c.json(buildChatCompletion(requestId, canonicalModelId, message));
1480
1526
  } catch (err) {
1481
- const mapped = mapUpstreamError(err);
1482
- logError({
1527
+ const mapped = handleUpstreamError({
1483
1528
  requestId,
1484
1529
  method: "POST",
1485
1530
  path: "/v1/chat/completions"
1486
- }, mapped.body.error.message, err instanceof Error ? err.message : void 0);
1531
+ }, err);
1487
1532
  return c.json(mapped.body, mapped.status);
1488
1533
  }
1489
1534
  });
package/dist/sync-zed.mjs CHANGED
@@ -48,7 +48,7 @@ function toZedModel(exposed) {
48
48
  capabilities: {
49
49
  tools: true,
50
50
  images: model.input.includes("image"),
51
- parallel_tool_calls: false,
51
+ parallel_tool_calls: true,
52
52
  prompt_cache_key: false,
53
53
  chat_completions: true
54
54
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@victor-software-house/pi-openai-proxy",
3
- "version": "4.6.1",
3
+ "version": "4.7.1",
4
4
  "description": "OpenAI-compatible HTTP proxy for pi's multi-provider model registry",
5
5
  "license": "MIT",
6
6
  "author": "Victor Software House",