@victor-software-house/pi-openai-proxy 4.6.1 → 4.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.mjs CHANGED
@@ -179,6 +179,17 @@ function logDisconnect(ctx) {
179
179
  };
180
180
  console.error(JSON.stringify(entry));
181
181
  }
182
+ function logUpstreamOverload(ctx, status, code) {
183
+ const entry = {
184
+ ts: timestamp(),
185
+ level: "warn",
186
+ event: "upstream_overload",
187
+ requestId: ctx.requestId,
188
+ upstreamStatus: status,
189
+ code
190
+ };
191
+ console.error(JSON.stringify(entry));
192
+ }
182
193
  function logStartup(host, port, modelCount) {
183
194
  const entry = {
184
195
  ts: timestamp(),
@@ -1001,14 +1012,16 @@ function convertTools(openaiTools) {
1001
1012
  /**
1002
1013
  * Zod schemas for the OpenAI chat-completions request subset.
1003
1014
  *
1004
- * Phase 2 contract:
1005
- * - Phase 1 supported fields: model, messages, stream, temperature,
1006
- * max_tokens, max_completion_tokens, stop, user, stream_options
1007
- * - Phase 2 additions: tools, tool_choice, reasoning_effort,
1008
- * top_p, frequency_penalty, presence_penalty, seed, response_format
1015
+ * Supported fields (cumulative through Phase 3D):
1016
+ * - Core: model, messages, stream, temperature, max_tokens,
1017
+ * max_completion_tokens, stop, user, stream_options
1018
+ * - Tools: tools, tool_choice, parallel_tool_calls
1019
+ * - Model control: reasoning_effort, top_p, frequency_penalty,
1020
+ * presence_penalty, seed, response_format
1021
+ * - Client interop: metadata, prediction
1009
1022
  * - Unknown top-level fields are rejected with 422
1010
- * - `n > 1` is rejected
1011
- * - `logprobs` is rejected
1023
+ * - Explicitly rejected: n, logprobs, top_logprobs, logit_bias,
1024
+ * functions (deprecated), function_call (deprecated)
1012
1025
  */
1013
1026
  const textContentPartSchema = z.object({
1014
1027
  type: z.literal("text"),
@@ -1125,6 +1138,7 @@ const chatCompletionRequestSchema = z.object({
1125
1138
  stream_options: streamOptionsSchema.nullable().optional(),
1126
1139
  tools: z.array(functionToolSchema).optional(),
1127
1140
  tool_choice: toolChoiceSchema.optional(),
1141
+ parallel_tool_calls: z.boolean().optional(),
1128
1142
  reasoning_effort: z.enum([
1129
1143
  "none",
1130
1144
  "minimal",
@@ -1137,21 +1151,25 @@ const chatCompletionRequestSchema = z.object({
1137
1151
  frequency_penalty: z.number().min(-2).max(2).optional(),
1138
1152
  presence_penalty: z.number().min(-2).max(2).optional(),
1139
1153
  seed: z.int().optional(),
1140
- response_format: responseFormatSchema.optional()
1154
+ response_format: responseFormatSchema.optional(),
1155
+ metadata: z.record(z.string().trim(), z.unknown()).optional(),
1156
+ prediction: z.object({
1157
+ type: z.literal("content"),
1158
+ content: z.union([z.string().trim(), z.array(z.object({
1159
+ type: z.literal("text"),
1160
+ text: z.string().trim()
1161
+ }))])
1162
+ }).optional()
1141
1163
  }).strict();
1142
1164
  /**
1143
1165
  * Fields that are explicitly rejected with a helpful error.
1144
1166
  *
1145
1167
  * `n`, `logprobs`, `top_logprobs`, `logit_bias`: not supported by the pi SDK's
1146
- * simple completion interface and unlikely to be promoted.
1168
+ * simple completion interface. The proxy returns a single choice with no token
1169
+ * probability data. Promoting these would require response-side changes.
1147
1170
  *
1148
1171
  * `functions`, `function_call`: deprecated OpenAI fields, superseded by `tools`
1149
- * and `tool_choice`.
1150
- *
1151
- * `parallel_tool_calls`: the pi SDK does not expose parallel tool call control.
1152
- * The SSE streaming code handles multiple tool calls per response, so the response
1153
- * side is capable, but the proxy cannot guarantee the flag reaches the provider.
1154
- * Needs deeper analysis — see Phase 3D in TODO.md.
1172
+ * and `tool_choice`. Clients should migrate to the current API.
1155
1173
  */
1156
1174
  const rejectedFields = [
1157
1175
  "n",
@@ -1159,8 +1177,7 @@ const rejectedFields = [
1159
1177
  "top_logprobs",
1160
1178
  "logit_bias",
1161
1179
  "functions",
1162
- "function_call",
1163
- "parallel_tool_calls"
1180
+ "function_call"
1164
1181
  ];
1165
1182
  //#endregion
1166
1183
  //#region src/openai/validate.ts
@@ -1276,6 +1293,18 @@ function collectPayloadFields(request, api) {
1276
1293
  fields["tool_choice"] = request.tool_choice;
1277
1294
  hasFields = true;
1278
1295
  }
1296
+ if (request.parallel_tool_calls !== void 0) {
1297
+ fields["parallel_tool_calls"] = request.parallel_tool_calls;
1298
+ hasFields = true;
1299
+ }
1300
+ if (request.metadata !== void 0) {
1301
+ fields["metadata"] = request.metadata;
1302
+ hasFields = true;
1303
+ }
1304
+ if (request.prediction !== void 0) {
1305
+ fields["prediction"] = request.prediction;
1306
+ hasFields = true;
1307
+ }
1279
1308
  return hasFields ? fields : void 0;
1280
1309
  }
1281
1310
  /**
@@ -1391,6 +1420,15 @@ function createRoutes(config, configReader = fileConfigReader) {
1391
1420
  if (!outcome.ok) throw new Error(`Model exposure configuration error: ${outcome.message}`);
1392
1421
  return outcome;
1393
1422
  }
1423
+ /**
1424
+ * Map upstream error, log it, and emit a structured warn for rate limit / overload.
1425
+ */
1426
+ function handleUpstreamError(ctx, err) {
1427
+ const mapped = mapUpstreamError(err);
1428
+ logError(ctx, mapped.body.error.message, err instanceof Error ? err.message : void 0);
1429
+ if (mapped.status === 429 || mapped.status === 503) logUpstreamOverload(ctx, mapped.status, mapped.body.error.code ?? "unknown");
1430
+ return mapped;
1431
+ }
1394
1432
  const routes = new Hono();
1395
1433
  routes.get("/v1/models", (c) => {
1396
1434
  const exposure = getExposure();
@@ -1452,12 +1490,11 @@ function createRoutes(config, configReader = fileConfigReader) {
1452
1490
  const eventStream = await piStream(model, context, request, completionOptions);
1453
1491
  for await (const frame of streamToSSE(eventStream, requestId, canonicalModelId, includeUsage)) await stream.write(frame);
1454
1492
  } catch (err) {
1455
- const mapped = mapUpstreamError(err);
1456
- logError({
1493
+ const mapped = handleUpstreamError({
1457
1494
  requestId,
1458
1495
  method: "POST",
1459
1496
  path: "/v1/chat/completions"
1460
- }, mapped.body.error.message, err instanceof Error ? err.message : void 0);
1497
+ }, err);
1461
1498
  const errorChunk = JSON.stringify({ error: mapped.body.error });
1462
1499
  await stream.write(`data: ${errorChunk}\n\n`);
1463
1500
  await stream.write("data: [DONE]\n\n");
@@ -1468,22 +1505,20 @@ function createRoutes(config, configReader = fileConfigReader) {
1468
1505
  const message = await piComplete(model, context, request, completionOptions);
1469
1506
  if (message.stopReason === "error" || message.stopReason === "aborted") {
1470
1507
  const errorMessage = message.errorMessage ?? "Upstream provider error";
1471
- const mapped = mapUpstreamError(new Error(errorMessage));
1472
- logError({
1508
+ const mapped = handleUpstreamError({
1473
1509
  requestId,
1474
1510
  method: "POST",
1475
1511
  path: "/v1/chat/completions"
1476
- }, errorMessage);
1512
+ }, new Error(errorMessage));
1477
1513
  return c.json(mapped.body, mapped.status);
1478
1514
  }
1479
1515
  return c.json(buildChatCompletion(requestId, canonicalModelId, message));
1480
1516
  } catch (err) {
1481
- const mapped = mapUpstreamError(err);
1482
- logError({
1517
+ const mapped = handleUpstreamError({
1483
1518
  requestId,
1484
1519
  method: "POST",
1485
1520
  path: "/v1/chat/completions"
1486
- }, mapped.body.error.message, err instanceof Error ? err.message : void 0);
1521
+ }, err);
1487
1522
  return c.json(mapped.body, mapped.status);
1488
1523
  }
1489
1524
  });
package/dist/sync-zed.mjs CHANGED
@@ -48,7 +48,7 @@ function toZedModel(exposed) {
48
48
  capabilities: {
49
49
  tools: true,
50
50
  images: model.input.includes("image"),
51
- parallel_tool_calls: false,
51
+ parallel_tool_calls: true,
52
52
  prompt_cache_key: false,
53
53
  chat_completions: true
54
54
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@victor-software-house/pi-openai-proxy",
3
- "version": "4.6.1",
3
+ "version": "4.7.0",
4
4
  "description": "OpenAI-compatible HTTP proxy for pi's multi-provider model registry",
5
5
  "license": "MIT",
6
6
  "author": "Victor Software House",