@elsium-ai/gateway 0.2.3 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -394,6 +394,94 @@ function createLogger(options = {}) {
394
394
  }
395
395
  };
396
396
  }
397
+ // ../core/src/schema.ts
398
+ var log = createLogger();
399
+ function zodDefKind(def) {
400
+ return typeof def.type === "string" ? def.type : def.typeName;
401
+ }
402
+ function zodObjectToJsonSchema(schema, convert) {
403
+ const shape = typeof schema.shape === "function" ? schema.shape() : schema.shape;
404
+ const properties = {};
405
+ const required = [];
406
+ for (const [key, value] of Object.entries(shape)) {
407
+ const fieldSchema = value;
408
+ properties[key] = convert(fieldSchema);
409
+ const fieldDef = fieldSchema._def;
410
+ const fieldKind = zodDefKind(fieldDef);
411
+ if (fieldKind !== "optional" && fieldKind !== "ZodOptional" && fieldKind !== "default" && fieldKind !== "ZodDefault") {
412
+ required.push(key);
413
+ }
414
+ if (fieldDef.description) {
415
+ properties[key].description = fieldDef.description;
416
+ }
417
+ }
418
+ return { type: "object", properties, required };
419
+ }
420
+ function zodToJsonSchema(schema) {
421
+ if (!("_def" in schema))
422
+ return { type: "object" };
423
+ const def = schema._def;
424
+ const kind = zodDefKind(def);
425
+ switch (kind) {
426
+ case "object":
427
+ case "ZodObject":
428
+ return zodObjectToJsonSchema(def, zodToJsonSchema);
429
+ case "string":
430
+ case "ZodString":
431
+ return { type: "string" };
432
+ case "number":
433
+ case "ZodNumber":
434
+ return { type: "number" };
435
+ case "boolean":
436
+ case "ZodBoolean":
437
+ return { type: "boolean" };
438
+ case "array":
439
+ case "ZodArray":
440
+ return {
441
+ type: "array",
442
+ items: zodToJsonSchema(def.element ?? def.type)
443
+ };
444
+ case "enum":
445
+ case "ZodEnum": {
446
+ const values = def.values ?? (def.entries ? Object.values(def.entries) : []);
447
+ return { type: "string", enum: values };
448
+ }
449
+ case "optional":
450
+ case "ZodOptional":
451
+ return zodToJsonSchema(def.innerType);
452
+ case "default":
453
+ case "ZodDefault":
454
+ return zodToJsonSchema(def.innerType);
455
+ case "nullable":
456
+ case "ZodNullable": {
457
+ const inner = zodToJsonSchema(def.innerType);
458
+ return { ...inner, nullable: true };
459
+ }
460
+ case "ZodLiteral":
461
+ return { type: typeof def.value, const: def.value };
462
+ case "ZodUnion": {
463
+ const options = def.options.map(zodToJsonSchema);
464
+ return { anyOf: options };
465
+ }
466
+ case "ZodRecord":
467
+ return {
468
+ type: "object",
469
+ additionalProperties: def.valueType ? zodToJsonSchema(def.valueType) : { type: "string" }
470
+ };
471
+ case "ZodTuple": {
472
+ const items = (def.items ?? []).map(zodToJsonSchema);
473
+ return { type: "array", prefixItems: items, minItems: items.length, maxItems: items.length };
474
+ }
475
+ case "ZodDate":
476
+ return { type: "string", format: "date-time" };
477
+ default:
478
+ log.warn(`zodToJsonSchema: unsupported type ${kind}, defaulting to string`);
479
+ return { type: "string" };
480
+ }
481
+ }
482
+ // ../core/src/registry.ts
483
+ var log2 = createLogger();
484
+ var BLOCKED_KEYS = new Set(["__proto__", "constructor", "prototype"]);
397
485
  // ../core/src/circuit-breaker.ts
398
486
  function defaultShouldCount(error) {
399
487
  if (error && typeof error === "object" && "retryable" in error) {
@@ -559,17 +647,28 @@ function composeMiddleware(middlewares) {
559
647
  return dispatch(0);
560
648
  };
561
649
  }
650
+ function composeStreamMiddleware(middlewares) {
651
+ return (ctx, source, finalNext) => {
652
+ function dispatch(i, currentCtx, currentSource) {
653
+ if (i >= middlewares.length) {
654
+ return finalNext(currentCtx, currentSource);
655
+ }
656
+ return middlewares[i](currentCtx, currentSource, (c, s) => dispatch(i + 1, c, s));
657
+ }
658
+ return dispatch(0, ctx, source);
659
+ };
660
+ }
562
661
  function loggingMiddleware(logger) {
563
- const log = logger ?? createLogger({ level: "info" });
662
+ const log3 = logger ?? createLogger({ level: "info" });
564
663
  return async (ctx, next) => {
565
- log.info("LLM request", {
664
+ log3.info("LLM request", {
566
665
  provider: ctx.provider,
567
666
  model: ctx.model,
568
667
  traceId: ctx.traceId,
569
668
  messageCount: ctx.request.messages.length
570
669
  });
571
670
  const response = await next(ctx);
572
- log.info("LLM response", {
671
+ log3.info("LLM response", {
573
672
  provider: ctx.provider,
574
673
  model: ctx.model,
575
674
  traceId: ctx.traceId,
@@ -705,7 +804,7 @@ function xrayMiddleware(options = {}) {
705
804
  }
706
805
 
707
806
  // src/pricing.ts
708
- var log = createLogger();
807
+ var log3 = createLogger();
709
808
  var PRICING = {
710
809
  "claude-opus-4-6": { inputPerMillion: 15, outputPerMillion: 75 },
711
810
  "claude-sonnet-4-6": { inputPerMillion: 3, outputPerMillion: 15 },
@@ -743,7 +842,7 @@ function resolveModelName(model) {
743
842
  function calculateCost(model, usage) {
744
843
  const pricing = PRICING[resolveModelName(model)];
745
844
  if (!pricing) {
746
- log.warn(`Unknown model "${model}" — cost will be reported as $0. Register pricing with registerPricing().`);
845
+ log3.warn(`Unknown model "${model}" — cost will be reported as $0. Register pricing with registerPricing().`);
747
846
  return {
748
847
  inputCost: 0,
749
848
  outputCost: 0,
@@ -763,6 +862,12 @@ function calculateCost(model, usage) {
763
862
  function registerPricing(model, pricing) {
764
863
  PRICING[model] = pricing;
765
864
  }
865
+ function estimateCost(model, tokenCount) {
866
+ const pricing = PRICING[resolveModelName(model)];
867
+ if (!pricing)
868
+ return 0;
869
+ return tokenCount / 1e6 * pricing.inputPerMillion;
870
+ }
766
871
 
767
872
  // src/providers/anthropic.ts
768
873
  var DEFAULT_BASE_URL = "https://api.anthropic.com";
@@ -837,15 +942,33 @@ function createAnthropicProvider(config) {
837
942
  if (part.type === "text")
838
943
  return { type: "text", text: part.text };
839
944
  if (part.type === "image" && part.source?.type === "base64") {
945
+ const src = part.source;
840
946
  return {
841
947
  type: "image",
842
948
  source: {
843
949
  type: "base64",
844
- media_type: part.source.mediaType,
845
- data: part.source.data
950
+ media_type: src.mediaType,
951
+ data: src.data
846
952
  }
847
953
  };
848
954
  }
955
+ if (part.type === "document" && part.source) {
956
+ if (part.source.type === "base64") {
957
+ const src = part.source;
958
+ return {
959
+ type: "document",
960
+ source: {
961
+ type: "base64",
962
+ media_type: src.mediaType,
963
+ data: src.data
964
+ }
965
+ };
966
+ }
967
+ return { type: "text", text: "[document: url source not supported by Anthropic]" };
968
+ }
969
+ if (part.type === "audio") {
970
+ return { type: "text", text: "[audio content not supported by this provider]" };
971
+ }
849
972
  return { type: "text", text: "[unsupported content]" };
850
973
  }
851
974
  function formatMultipartContent(msg, role) {
@@ -888,6 +1011,52 @@ function createAnthropicProvider(config) {
888
1011
  input_schema: t.inputSchema
889
1012
  }));
890
1013
  }
1014
+ function buildOptionalParams(req) {
1015
+ const params = {};
1016
+ if (req.temperature !== undefined)
1017
+ params.temperature = req.temperature;
1018
+ if (req.topP !== undefined)
1019
+ params.top_p = req.topP;
1020
+ if (req.stopSequences?.length)
1021
+ params.stop_sequences = req.stopSequences;
1022
+ return params;
1023
+ }
1024
+ function applyStructuredOutput(body, req, tools) {
1025
+ if (!req.schema)
1026
+ return;
1027
+ const jsonSchema = zodToJsonSchema(req.schema);
1028
+ const structuredTool = {
1029
+ name: "_structured_output",
1030
+ description: "Return structured output matching the required schema",
1031
+ input_schema: jsonSchema
1032
+ };
1033
+ body.tools = [...tools ?? [], structuredTool];
1034
+ body.tool_choice = { type: "tool", name: "_structured_output" };
1035
+ }
1036
+ function buildRequestBody(req) {
1037
+ const { system, messages } = formatMessages(req.messages);
1038
+ const model = req.model ?? "claude-sonnet-4-6";
1039
+ const body = {
1040
+ model,
1041
+ messages,
1042
+ max_tokens: req.maxTokens ?? DEFAULT_MAX_TOKENS,
1043
+ ...system || req.system ? { system: req.system ?? system } : {},
1044
+ ...buildOptionalParams(req),
1045
+ ...buildSeedMetadata(req)
1046
+ };
1047
+ const tools = formatTools(req.tools);
1048
+ if (tools)
1049
+ body.tools = tools;
1050
+ applyStructuredOutput(body, req, tools);
1051
+ return body;
1052
+ }
1053
+ function executeWithTimeout(fn, reqSignal) {
1054
+ const controller = new AbortController;
1055
+ const timer = setTimeout(() => controller.abort(), timeout);
1056
+ const signals = [controller.signal, reqSignal].filter(Boolean);
1057
+ const mergedSignal = signals.length > 1 ? AbortSignal.any(signals) : signals[0];
1058
+ return fn(mergedSignal).finally(() => clearTimeout(timer));
1059
+ }
891
1060
  function extractContentBlocks(content) {
892
1061
  const toolCalls = [];
893
1062
  const textParts = [];
@@ -939,34 +1108,12 @@ function createAnthropicProvider(config) {
939
1108
  authStyle: "x-api-key"
940
1109
  },
941
1110
  async complete(req) {
942
- const { system, messages } = formatMessages(req.messages);
943
- const model = req.model ?? "claude-sonnet-4-6";
944
- const body = {
945
- model,
946
- messages,
947
- max_tokens: req.maxTokens ?? DEFAULT_MAX_TOKENS,
948
- ...system || req.system ? { system: req.system ?? system } : {},
949
- ...req.temperature !== undefined ? { temperature: req.temperature } : {},
950
- ...req.topP !== undefined ? { top_p: req.topP } : {},
951
- ...req.stopSequences?.length ? { stop_sequences: req.stopSequences } : {},
952
- ...buildSeedMetadata(req)
953
- };
954
- const tools = formatTools(req.tools);
955
- if (tools)
956
- body.tools = tools;
1111
+ const body = buildRequestBody(req);
957
1112
  const startTime = performance.now();
958
- const raw = await retry(async () => {
959
- const controller = new AbortController;
960
- const timer = setTimeout(() => controller.abort(), timeout);
961
- try {
962
- const signals = [controller.signal, req.signal].filter(Boolean);
963
- const mergedSignal = signals.length > 1 ? AbortSignal.any(signals) : signals[0];
964
- const resp = await request("/messages", body, mergedSignal);
965
- return await resp.json();
966
- } finally {
967
- clearTimeout(timer);
968
- }
969
- }, {
1113
+ const raw = await retry(() => executeWithTimeout(async (signal) => {
1114
+ const resp = await request("/messages", body, signal);
1115
+ return await resp.json();
1116
+ }, req.signal), {
970
1117
  maxRetries,
971
1118
  baseDelayMs: 1000,
972
1119
  shouldRetry: (e) => e instanceof ElsiumError && e.retryable
@@ -975,29 +1122,12 @@ function createAnthropicProvider(config) {
975
1122
  return parseResponse(raw, latencyMs);
976
1123
  },
977
1124
  stream(req) {
978
- const { system, messages } = formatMessages(req.messages);
979
- const model = req.model ?? "claude-sonnet-4-6";
980
- const body = {
981
- model,
982
- messages,
983
- max_tokens: req.maxTokens ?? DEFAULT_MAX_TOKENS,
984
- stream: true,
985
- ...system || req.system ? { system: req.system ?? system } : {},
986
- ...req.temperature !== undefined ? { temperature: req.temperature } : {},
987
- ...req.topP !== undefined ? { top_p: req.topP } : {},
988
- ...req.stopSequences?.length ? { stop_sequences: req.stopSequences } : {},
989
- ...buildSeedMetadata(req)
990
- };
991
- const tools = formatTools(req.tools);
992
- if (tools)
993
- body.tools = tools;
1125
+ const body = buildRequestBody(req);
1126
+ body.stream = true;
1127
+ const model = body.model ?? "claude-sonnet-4-6";
994
1128
  return createStream(async (emit) => {
995
- const controller = new AbortController;
996
- const timer = setTimeout(() => controller.abort(), timeout);
997
- try {
998
- const signals = [controller.signal, req.signal].filter(Boolean);
999
- const mergedSignal = signals.length > 1 ? AbortSignal.any(signals) : signals[0];
1000
- const resp = await request("/messages", body, mergedSignal);
1129
+ await executeWithTimeout(async (signal) => {
1130
+ const resp = await request("/messages", body, signal);
1001
1131
  if (!resp.body)
1002
1132
  throw new ElsiumError({
1003
1133
  code: "STREAM_ERROR",
@@ -1006,9 +1136,7 @@ function createAnthropicProvider(config) {
1006
1136
  retryable: false
1007
1137
  });
1008
1138
  await processAnthropicSSEStream(resp.body, model, emit);
1009
- } finally {
1010
- clearTimeout(timer);
1011
- }
1139
+ }, req.signal);
1012
1140
  });
1013
1141
  },
1014
1142
  async listModels() {
@@ -1162,19 +1290,38 @@ function createGoogleProvider(config) {
1162
1290
  }
1163
1291
  return { role, parts };
1164
1292
  }
1293
+ function convertGeminiImagePart(p) {
1294
+ if (p.source.type === "base64") {
1295
+ return { inlineData: { mimeType: p.source.mediaType, data: p.source.data } };
1296
+ }
1297
+ return { fileData: { mimeType: "image/jpeg", fileUri: p.source.url } };
1298
+ }
1299
+ function convertGeminiMediaPart(p) {
1300
+ if (p.source.type === "base64") {
1301
+ return { inlineData: { mimeType: p.source.mediaType, data: p.source.data } };
1302
+ }
1303
+ const urlSource = p.source;
1304
+ return { fileData: { mimeType: "application/octet-stream", fileUri: urlSource.url } };
1305
+ }
1306
+ function convertGeminiContentPart(p) {
1307
+ if (p.type === "text") {
1308
+ return { text: p.text };
1309
+ }
1310
+ if (p.type === "image") {
1311
+ return convertGeminiImagePart(p);
1312
+ }
1313
+ if (p.type === "audio" || p.type === "document") {
1314
+ return convertGeminiMediaPart(p);
1315
+ }
1316
+ return null;
1317
+ }
1165
1318
  function formatGeminiMultipartContent(msg, role) {
1319
+ const content = msg.content;
1166
1320
  const parts = [];
1167
- for (const p of msg.content) {
1168
- if (p.type === "text") {
1169
- parts.push({ text: p.text });
1170
- } else if (p.type === "image") {
1171
- const img = p;
1172
- if (img.source.type === "base64") {
1173
- parts.push({ inlineData: { mimeType: img.source.mediaType, data: img.source.data } });
1174
- } else {
1175
- parts.push({ fileData: { mimeType: "image/jpeg", fileUri: img.source.url } });
1176
- }
1177
- }
1321
+ for (const p of content) {
1322
+ const converted = convertGeminiContentPart(p);
1323
+ if (converted)
1324
+ parts.push(converted);
1178
1325
  }
1179
1326
  return { role, parts };
1180
1327
  }
@@ -1272,6 +1419,10 @@ function createGoogleProvider(config) {
1272
1419
  config2.topP = req.topP;
1273
1420
  if (req.stopSequences?.length)
1274
1421
  config2.stopSequences = req.stopSequences;
1422
+ if (req.schema) {
1423
+ config2.responseMimeType = "application/json";
1424
+ config2.responseSchema = zodToJsonSchema(req.schema);
1425
+ }
1275
1426
  return config2;
1276
1427
  }
1277
1428
  function buildRequestBody(req) {
@@ -1537,21 +1688,48 @@ function createOpenAIProvider(config) {
1537
1688
  }
1538
1689
  return openaiMsg;
1539
1690
  }
1691
+ function convertImagePart(part) {
1692
+ if (part.source.type === "base64") {
1693
+ const url = `data:${part.source.mediaType};base64,${part.source.data}`;
1694
+ return { type: "image_url", image_url: { url } };
1695
+ }
1696
+ return { type: "image_url", image_url: { url: part.source.url } };
1697
+ }
1698
+ function convertAudioPart(part) {
1699
+ if (part.source.type === "base64") {
1700
+ const format = part.source.mediaType.split("/")[1] ?? "wav";
1701
+ return { type: "input_audio", input_audio: { data: part.source.data, format } };
1702
+ }
1703
+ return { type: "text", text: "[audio: url source requires file upload]" };
1704
+ }
1705
+ function convertDocumentPart(part) {
1706
+ if (part.source.type === "base64") {
1707
+ return {
1708
+ type: "text",
1709
+ text: `[document: ${part.source.mediaType} content attached as base64]`
1710
+ };
1711
+ }
1712
+ return { type: "text", text: `[document: ${part.source.url}]` };
1713
+ }
1714
+ function convertContentPart(part) {
1715
+ if (part.type === "text")
1716
+ return { type: "text", text: part.text };
1717
+ if (part.type === "image")
1718
+ return convertImagePart(part);
1719
+ if (part.type === "audio")
1720
+ return convertAudioPart(part);
1721
+ if (part.type === "document")
1722
+ return convertDocumentPart(part);
1723
+ return null;
1724
+ }
1540
1725
  function formatUserContent(msg) {
1541
1726
  if (typeof msg.content === "string")
1542
1727
  return msg.content;
1543
1728
  const parts = [];
1544
1729
  for (const part of msg.content) {
1545
- if (part.type === "text") {
1546
- parts.push({ type: "text", text: part.text });
1547
- } else if (part.type === "image") {
1548
- if (part.source.type === "base64") {
1549
- const url = `data:${part.source.mediaType};base64,${part.source.data}`;
1550
- parts.push({ type: "image_url", image_url: { url } });
1551
- } else {
1552
- parts.push({ type: "image_url", image_url: { url: part.source.url } });
1553
- }
1554
- }
1730
+ const converted = convertContentPart(part);
1731
+ if (converted)
1732
+ parts.push(converted);
1555
1733
  }
1556
1734
  return parts;
1557
1735
  }
@@ -1586,6 +1764,49 @@ function createOpenAIProvider(config) {
1586
1764
  }
1587
1765
  }));
1588
1766
  }
1767
+ function buildOptionalParams(req) {
1768
+ const params = {};
1769
+ if (req.temperature !== undefined)
1770
+ params.temperature = req.temperature;
1771
+ if (req.seed !== undefined)
1772
+ params.seed = req.seed;
1773
+ if (req.topP !== undefined)
1774
+ params.top_p = req.topP;
1775
+ if (req.stopSequences?.length)
1776
+ params.stop = req.stopSequences;
1777
+ return params;
1778
+ }
1779
+ function applyResponseFormat(body, req) {
1780
+ if (!req.schema)
1781
+ return;
1782
+ const jsonSchema = zodToJsonSchema(req.schema);
1783
+ body.response_format = {
1784
+ type: "json_schema",
1785
+ json_schema: {
1786
+ name: "structured_output",
1787
+ strict: true,
1788
+ schema: jsonSchema
1789
+ }
1790
+ };
1791
+ }
1792
+ function buildRequestBody(req) {
1793
+ const messages = formatMessages(req.messages);
1794
+ const model = req.model ?? "gpt-4o";
1795
+ if (req.system) {
1796
+ messages.unshift({ role: "system", content: req.system });
1797
+ }
1798
+ const body = {
1799
+ model,
1800
+ messages,
1801
+ max_tokens: req.maxTokens ?? DEFAULT_MAX_TOKENS2,
1802
+ ...buildOptionalParams(req)
1803
+ };
1804
+ const tools = formatTools(req.tools);
1805
+ if (tools)
1806
+ body.tools = tools;
1807
+ applyResponseFormat(body, req);
1808
+ return body;
1809
+ }
1589
1810
  function parseResponse(raw, latencyMs) {
1590
1811
  const traceId = generateTraceId();
1591
1812
  const choice = raw.choices[0];
@@ -1630,23 +1851,7 @@ function createOpenAIProvider(config) {
1630
1851
  authStyle: "bearer"
1631
1852
  },
1632
1853
  async complete(req) {
1633
- const messages = formatMessages(req.messages);
1634
- const model = req.model ?? "gpt-4o";
1635
- if (req.system) {
1636
- messages.unshift({ role: "system", content: req.system });
1637
- }
1638
- const body = {
1639
- model,
1640
- messages,
1641
- max_tokens: req.maxTokens ?? DEFAULT_MAX_TOKENS2,
1642
- ...req.temperature !== undefined ? { temperature: req.temperature } : {},
1643
- ...req.seed !== undefined ? { seed: req.seed } : {},
1644
- ...req.topP !== undefined ? { top_p: req.topP } : {},
1645
- ...req.stopSequences?.length ? { stop: req.stopSequences } : {}
1646
- };
1647
- const tools = formatTools(req.tools);
1648
- if (tools)
1649
- body.tools = tools;
1854
+ const body = buildRequestBody(req);
1650
1855
  const startTime = performance.now();
1651
1856
  const raw = await retry(async () => {
1652
1857
  const controller = new AbortController;
@@ -1668,25 +1873,10 @@ function createOpenAIProvider(config) {
1668
1873
  return parseResponse(raw, latencyMs);
1669
1874
  },
1670
1875
  stream(req) {
1671
- const messages = formatMessages(req.messages);
1672
- const model = req.model ?? "gpt-4o";
1673
- if (req.system) {
1674
- messages.unshift({ role: "system", content: req.system });
1675
- }
1676
- const body = {
1677
- model,
1678
- messages,
1679
- max_tokens: req.maxTokens ?? DEFAULT_MAX_TOKENS2,
1680
- stream: true,
1681
- stream_options: { include_usage: true },
1682
- ...req.temperature !== undefined ? { temperature: req.temperature } : {},
1683
- ...req.seed !== undefined ? { seed: req.seed } : {},
1684
- ...req.topP !== undefined ? { top_p: req.topP } : {},
1685
- ...req.stopSequences?.length ? { stop: req.stopSequences } : {}
1686
- };
1687
- const tools = formatTools(req.tools);
1688
- if (tools)
1689
- body.tools = tools;
1876
+ const body = buildRequestBody(req);
1877
+ body.stream = true;
1878
+ body.stream_options = { include_usage: true };
1879
+ const model = body.model ?? "gpt-4o";
1690
1880
  return createStream(async (emit) => {
1691
1881
  const controller = new AbortController;
1692
1882
  const timer = setTimeout(() => controller.abort(), timeout);
@@ -1806,15 +1996,32 @@ var PROVIDER_FACTORIES = {
1806
1996
  openai: createOpenAIProvider,
1807
1997
  google: createGoogleProvider
1808
1998
  };
1999
+ registerProviderMetadata("anthropic", {
2000
+ baseUrl: "https://api.anthropic.com/v1/messages",
2001
+ capabilities: ["tools", "vision", "streaming", "system"],
2002
+ authStyle: "x-api-key"
2003
+ });
2004
+ registerProviderMetadata("openai", {
2005
+ baseUrl: "https://api.openai.com/v1/chat/completions",
2006
+ capabilities: ["tools", "vision", "streaming", "system", "json_mode"],
2007
+ authStyle: "bearer"
2008
+ });
2009
+ registerProviderMetadata("google", {
2010
+ baseUrl: "https://generativelanguage.googleapis.com/v1beta/models",
2011
+ capabilities: ["tools", "vision", "streaming", "system"],
2012
+ authStyle: "bearer"
2013
+ });
1809
2014
  function registerProviderFactory(name, factory) {
1810
2015
  PROVIDER_FACTORIES[name] = factory;
1811
2016
  }
1812
2017
  function validateGatewayConfig(config) {
1813
- const factory = PROVIDER_FACTORIES[config.provider];
2018
+ const factory = PROVIDER_FACTORIES[config.provider] ?? getProviderFactory(config.provider);
1814
2019
  if (!factory) {
2020
+ const available = [...Object.keys(PROVIDER_FACTORIES), ...listProviders()];
2021
+ const unique = [...new Set(available)];
1815
2022
  throw new ElsiumError({
1816
2023
  code: "CONFIG_ERROR",
1817
- message: `Unknown provider: ${config.provider}. Available: ${Object.keys(PROVIDER_FACTORIES).join(", ")}`,
2024
+ message: `Unknown provider: ${config.provider}. Available: ${unique.join(", ")}`,
1818
2025
  retryable: false
1819
2026
  });
1820
2027
  }
@@ -1892,6 +2099,24 @@ async function accumulateStreamEvents(stream, emit) {
1892
2099
  }
1893
2100
  return { textContent, usage, stopReason, id };
1894
2101
  }
2102
+ function extractFromToolCalls(response) {
2103
+ if (response.stopReason !== "tool_use" || !response.message.toolCalls?.length) {
2104
+ return;
2105
+ }
2106
+ const structuredCall = response.message.toolCalls.find((tc) => tc.name === "_structured_output");
2107
+ return structuredCall?.arguments;
2108
+ }
2109
+ function extractJsonFromText(response) {
2110
+ let text = typeof response.message.content === "string" ? response.message.content : "";
2111
+ text = text.replace(/^```(?:json)?\s*\n?([\s\S]*?)\n?\s*```$/gm, "$1").trim();
2112
+ const jsonMatch = text.match(/(\{[\s\S]*\}|\[[\s\S]*\])/);
2113
+ if (!jsonMatch) {
2114
+ throw ElsiumError.validation("LLM response did not contain valid JSON", {
2115
+ response: text
2116
+ });
2117
+ }
2118
+ return JSON.parse(jsonMatch[0]);
2119
+ }
1895
2120
  function gateway(config) {
1896
2121
  const factory = validateGatewayConfig(config);
1897
2122
  const provider = factory({
@@ -1913,6 +2138,7 @@ function gateway(config) {
1913
2138
  allMiddleware.push(xm);
1914
2139
  }
1915
2140
  const composedMiddleware = allMiddleware.length ? composeMiddleware(allMiddleware) : null;
2141
+ const composedStreamMiddleware = config.streamMiddleware?.length ? composeStreamMiddleware(config.streamMiddleware) : null;
1916
2142
  async function executeWithMiddleware(request) {
1917
2143
  const req = { ...request, model: request.model ?? defaultModel };
1918
2144
  if (!composedMiddleware) {
@@ -1937,11 +2163,11 @@ function gateway(config) {
1937
2163
  validateRequestLimits(request, maxMessages, maxInputTokens);
1938
2164
  const req = { ...request, model: request.model ?? defaultModel };
1939
2165
  if (composedMiddleware) {
1940
- const ctx = buildMiddlewareContext(req, provider.name, defaultModel, request.metadata ?? {});
2166
+ const ctx2 = buildMiddlewareContext(req, provider.name, defaultModel, request.metadata ?? {});
1941
2167
  return createStream(async (emit) => {
1942
- await composedMiddleware(ctx, async (c) => {
2168
+ await composedMiddleware(ctx2, async (c) => {
1943
2169
  const result = await accumulateStreamEvents(provider.stream(c.request), emit);
1944
- const latencyMs = Math.round(performance.now() - ctx.startTime);
2170
+ const latencyMs = Math.round(performance.now() - ctx2.startTime);
1945
2171
  return {
1946
2172
  id: result.id,
1947
2173
  message: { role: "assistant", content: result.textContent },
@@ -1951,116 +2177,48 @@ function gateway(config) {
1951
2177
  provider: provider.name,
1952
2178
  stopReason: result.stopReason,
1953
2179
  latencyMs,
1954
- traceId: ctx.traceId
2180
+ traceId: ctx2.traceId
1955
2181
  };
1956
2182
  });
1957
2183
  });
1958
2184
  }
1959
- return provider.stream(req);
2185
+ const rawStream = provider.stream(req);
2186
+ if (!composedStreamMiddleware)
2187
+ return rawStream;
2188
+ const ctx = buildMiddlewareContext(req, provider.name, defaultModel, request.metadata ?? {});
2189
+ return createStream(async (emit) => {
2190
+ const processed = composedStreamMiddleware(ctx, rawStream, (_c, s) => s);
2191
+ for await (const event of processed) {
2192
+ emit(event);
2193
+ }
2194
+ });
1960
2195
  },
1961
2196
  async generate(request) {
1962
2197
  const { schema, ...rest } = request;
1963
- const jsonSchema = schemaToJsonSchema(schema);
1964
- const systemPrompt = [
1965
- rest.system ?? "",
1966
- "You MUST respond with valid JSON matching this schema:",
1967
- JSON.stringify(jsonSchema, null, 2),
1968
- "Respond ONLY with the JSON object, no markdown or explanation."
1969
- ].filter(Boolean).join(`
1970
-
1971
- `);
2198
+ const jsonSchema = zodToJsonSchema(schema);
1972
2199
  const response = await executeWithMiddleware({
1973
2200
  ...rest,
1974
- system: systemPrompt
2201
+ schema,
2202
+ system: [
2203
+ rest.system ?? "",
2204
+ "You MUST respond with valid JSON matching this schema:",
2205
+ JSON.stringify(jsonSchema, null, 2),
2206
+ "Respond ONLY with the JSON object, no markdown or explanation."
2207
+ ].filter(Boolean).join(`
2208
+
2209
+ `)
1975
2210
  });
1976
- const text = typeof response.message.content === "string" ? response.message.content : "";
1977
- const jsonMatch = text.match(/\{[\s\S]*\}/);
1978
- if (!jsonMatch) {
1979
- throw ElsiumError.validation("LLM response did not contain valid JSON", {
1980
- response: text
1981
- });
1982
- }
1983
- const parsed = JSON.parse(jsonMatch[0]);
2211
+ const parsed = extractFromToolCalls(response) ?? extractJsonFromText(response);
1984
2212
  const result = schema.safeParse(parsed);
1985
2213
  if (!result.success) {
1986
2214
  throw ElsiumError.validation("LLM response did not match schema", {
1987
- errors: result.error.issues,
1988
- response: text
2215
+ errors: result.error.issues
1989
2216
  });
1990
2217
  }
1991
2218
  return { data: result.data, response };
1992
2219
  }
1993
2220
  };
1994
2221
  }
1995
- function schemaToJsonSchema(schema) {
1996
- try {
1997
- if ("_def" in schema) {
1998
- const def = schema._def;
1999
- const result = convertZodDef(def);
2000
- if (result)
2001
- return result;
2002
- }
2003
- } catch {}
2004
- return { type: "string" };
2005
- }
2006
- function zodDefKind(def) {
2007
- return typeof def.type === "string" ? def.type : def.typeName;
2008
- }
2009
- function convertZodDef(def) {
2010
- const kind = zodDefKind(def);
2011
- switch (kind) {
2012
- case "object":
2013
- case "ZodObject":
2014
- return convertZodObject(def);
2015
- case "string":
2016
- case "ZodString":
2017
- return { type: "string" };
2018
- case "number":
2019
- case "ZodNumber":
2020
- return { type: "number" };
2021
- case "boolean":
2022
- case "ZodBoolean":
2023
- return { type: "boolean" };
2024
- case "array":
2025
- case "ZodArray":
2026
- return convertZodArray(def);
2027
- case "enum":
2028
- case "ZodEnum": {
2029
- const values = def.values ?? (def.entries ? Object.values(def.entries) : []);
2030
- return { type: "string", enum: values };
2031
- }
2032
- case "optional":
2033
- case "ZodOptional":
2034
- return convertZodOptional(def);
2035
- default:
2036
- return null;
2037
- }
2038
- }
2039
- function convertZodObject(def) {
2040
- if (!def.shape)
2041
- return null;
2042
- const shape = typeof def.shape === "function" ? def.shape() : def.shape;
2043
- const properties = {};
2044
- const required = [];
2045
- for (const [key, value] of Object.entries(shape)) {
2046
- properties[key] = schemaToJsonSchema(value);
2047
- const valDef = value._def;
2048
- const valKind = zodDefKind(valDef);
2049
- if (valKind !== "optional" && valKind !== "ZodOptional") {
2050
- required.push(key);
2051
- }
2052
- }
2053
- return { type: "object", properties, required };
2054
- }
2055
- function convertZodArray(def) {
2056
- return {
2057
- type: "array",
2058
- items: schemaToJsonSchema(def.element ?? def.type)
2059
- };
2060
- }
2061
- function convertZodOptional(def) {
2062
- return schemaToJsonSchema(def.innerType ?? def.innerType);
2063
- }
2064
2222
  // src/security.ts
2065
2223
  var INJECTION_PATTERNS = [
2066
2224
  {
@@ -2118,6 +2276,21 @@ var SECRET_PATTERNS = [
2118
2276
  detail: "API public key detected",
2119
2277
  replacement: "[REDACTED_API_KEY]"
2120
2278
  },
2279
+ {
2280
+ pattern: /\bghp_[a-zA-Z0-9]{36,}\b/g,
2281
+ detail: "GitHub personal access token detected",
2282
+ replacement: "[REDACTED_GITHUB_TOKEN]"
2283
+ },
2284
+ {
2285
+ pattern: /\bgho_[a-zA-Z0-9]{36,}\b/g,
2286
+ detail: "GitHub OAuth token detected",
2287
+ replacement: "[REDACTED_GITHUB_TOKEN]"
2288
+ },
2289
+ {
2290
+ pattern: /\bgithub_pat_[a-zA-Z0-9_]{20,}\b/g,
2291
+ detail: "GitHub fine-grained token detected",
2292
+ replacement: "[REDACTED_GITHUB_TOKEN]"
2293
+ },
2121
2294
  {
2122
2295
  pattern: /\bapi_key[=:]\s*["']?[a-zA-Z0-9_-]{16,}["']?/gi,
2123
2296
  detail: "API key assignment detected",
@@ -2423,6 +2596,334 @@ function bulkheadMiddleware(config) {
2423
2596
  return bulkhead.execute(() => next(ctx));
2424
2597
  };
2425
2598
  }
2599
+ // src/cache.ts
2600
+ import { createHash } from "node:crypto";
2601
+ var log4 = createLogger();
2602
+ function createInMemoryCache(maxSize = 1000) {
2603
+ const cache = new Map;
2604
+ function evict() {
2605
+ if (cache.size <= maxSize)
2606
+ return;
2607
+ const firstKey = cache.keys().next().value;
2608
+ if (firstKey !== undefined)
2609
+ cache.delete(firstKey);
2610
+ }
2611
+ return {
2612
+ async get(key) {
2613
+ const entry = cache.get(key);
2614
+ if (!entry)
2615
+ return null;
2616
+ if (Date.now() > entry.expiresAt) {
2617
+ cache.delete(key);
2618
+ return null;
2619
+ }
2620
+ cache.delete(key);
2621
+ cache.set(key, entry);
2622
+ return entry.value;
2623
+ },
2624
+ async set(key, value, ttlMs) {
2625
+ cache.set(key, { value, expiresAt: Date.now() + ttlMs });
2626
+ evict();
2627
+ },
2628
+ async delete(key) {
2629
+ cache.delete(key);
2630
+ },
2631
+ async clear() {
2632
+ cache.clear();
2633
+ }
2634
+ };
2635
+ }
2636
+ function defaultCacheKey(ctx) {
2637
+ const data = JSON.stringify({
2638
+ provider: ctx.provider,
2639
+ model: ctx.model,
2640
+ messages: ctx.request.messages,
2641
+ system: ctx.request.system,
2642
+ temperature: ctx.request.temperature
2643
+ });
2644
+ return createHash("sha256").update(data).digest("hex");
2645
+ }
2646
+ function defaultShouldCache(_ctx, response) {
2647
+ const temp = _ctx.request.temperature;
2648
+ if (temp !== undefined && temp !== 0)
2649
+ return false;
2650
+ return response.stopReason === "end_turn";
2651
+ }
2652
+ function cacheMiddleware(config) {
2653
+ const ttlMs = config?.ttlMs ?? 3600000;
2654
+ const adapter = config?.adapter ?? createInMemoryCache(config?.maxSize ?? 1000);
2655
+ const keyFn = config?.keyFn ?? defaultCacheKey;
2656
+ const shouldCache = config?.shouldCache ?? defaultShouldCache;
2657
+ let hits = 0;
2658
+ let misses = 0;
2659
+ const middleware = async (ctx, next) => {
2660
+ if (ctx.request.stream) {
2661
+ return next(ctx);
2662
+ }
2663
+ const key = keyFn(ctx);
2664
+ const cached = await adapter.get(key);
2665
+ if (cached) {
2666
+ hits++;
2667
+ log4.debug("Cache hit", { key: key.slice(0, 8), provider: ctx.provider });
2668
+ return cached;
2669
+ }
2670
+ misses++;
2671
+ const response = await next(ctx);
2672
+ if (shouldCache(ctx, response)) {
2673
+ await adapter.set(key, response, ttlMs);
2674
+ }
2675
+ return response;
2676
+ };
2677
+ return Object.assign(middleware, {
2678
+ adapter,
2679
+ stats() {
2680
+ const total = hits + misses;
2681
+ return {
2682
+ hits,
2683
+ misses,
2684
+ size: 0,
2685
+ hitRate: total > 0 ? hits / total : 0
2686
+ };
2687
+ }
2688
+ });
2689
+ }
2690
+ // src/output-guardrails.ts
2691
+ var log5 = createLogger();
2692
+ var PII_PATTERNS2 = [
2693
+ {
2694
+ pattern: /\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}\b/g,
2695
+ label: "email",
2696
+ replacement: "[REDACTED_EMAIL]"
2697
+ },
2698
+ {
2699
+ pattern: /(?:\+?1[-.\s]?)?\(?\d{3}\)?[-.\s]?\d{3}[-.\s]?\d{4}\b/g,
2700
+ label: "phone",
2701
+ replacement: "[REDACTED_PHONE]"
2702
+ },
2703
+ {
2704
+ pattern: /\b\d{3}-\d{2}-\d{4}\b/g,
2705
+ label: "ssn",
2706
+ replacement: "[REDACTED_SSN]"
2707
+ },
2708
+ {
2709
+ pattern: /\b\d{4}[\s-]?\d{4}[\s-]?\d{4}[\s-]?\d{4}\b/g,
2710
+ label: "credit_card",
2711
+ replacement: "[REDACTED_CC]"
2712
+ }
2713
+ ];
2714
+ var SECRET_PATTERNS2 = [
2715
+ {
2716
+ pattern: /\bsk-[A-Za-z0-9]{20,}\b/g,
2717
+ label: "api_key",
2718
+ replacement: "[REDACTED_API_KEY]"
2719
+ },
2720
+ {
2721
+ pattern: /\bpk-[A-Za-z0-9]{20,}\b/g,
2722
+ label: "api_key",
2723
+ replacement: "[REDACTED_API_KEY]"
2724
+ },
2725
+ {
2726
+ pattern: /\bAKIA[A-Z0-9]{16}\b/g,
2727
+ label: "aws_key",
2728
+ replacement: "[REDACTED_AWS_KEY]"
2729
+ }
2730
+ ];
2731
+ function detectPII(text) {
2732
+ const violations = [];
2733
+ const normalized = text.normalize("NFKC");
2734
+ for (const { pattern, label } of [...PII_PATTERNS2, ...SECRET_PATTERNS2]) {
2735
+ const regex = new RegExp(pattern.source, pattern.flags);
2736
+ if (regex.test(normalized)) {
2737
+ violations.push({
2738
+ type: "pii",
2739
+ detail: `Detected ${label} in output`,
2740
+ pattern: label
2741
+ });
2742
+ }
2743
+ }
2744
+ return violations;
2745
+ }
2746
+ function redactPII(text) {
2747
+ let result = text;
2748
+ for (const { pattern, replacement } of [...PII_PATTERNS2, ...SECRET_PATTERNS2]) {
2749
+ const regex = new RegExp(pattern.source, pattern.flags);
2750
+ result = result.replace(regex, replacement);
2751
+ }
2752
+ return result;
2753
+ }
2754
+ function checkContentPolicy(text, policy) {
2755
+ const violations = [];
2756
+ if (policy.maxResponseLength && text.length > policy.maxResponseLength) {
2757
+ violations.push({
2758
+ type: "content_policy",
2759
+ detail: `Response length ${text.length} exceeds max ${policy.maxResponseLength}`
2760
+ });
2761
+ }
2762
+ if (policy.blockedPatterns) {
2763
+ for (const pattern of policy.blockedPatterns) {
2764
+ if (pattern.test(text)) {
2765
+ violations.push({
2766
+ type: "content_policy",
2767
+ detail: `Response matches blocked pattern: ${pattern.source}`,
2768
+ pattern: pattern.source
2769
+ });
2770
+ }
2771
+ }
2772
+ }
2773
+ return violations;
2774
+ }
2775
+ function checkCustomRules(text, rules) {
2776
+ const violations = [];
2777
+ for (const rule of rules) {
2778
+ if (rule.pattern.test(text)) {
2779
+ violations.push({
2780
+ type: "custom_rule",
2781
+ detail: rule.message ?? `Output matched custom rule: ${rule.name}`,
2782
+ pattern: rule.pattern.source
2783
+ });
2784
+ }
2785
+ }
2786
+ return violations;
2787
+ }
2788
+ function outputGuardrailMiddleware(config) {
2789
+ const mode = config.onViolation ?? "block";
2790
+ return async (ctx, next) => {
2791
+ const response = await next(ctx);
2792
+ const text = extractText(response.message.content);
2793
+ const violations = [];
2794
+ if (config.piiDetection) {
2795
+ violations.push(...detectPII(text));
2796
+ }
2797
+ if (config.contentPolicy) {
2798
+ violations.push(...checkContentPolicy(text, config.contentPolicy));
2799
+ }
2800
+ if (config.customRules?.length) {
2801
+ violations.push(...checkCustomRules(text, config.customRules));
2802
+ }
2803
+ if (violations.length === 0)
2804
+ return response;
2805
+ for (const v of violations) {
2806
+ config.onViolationCallback?.(v);
2807
+ }
2808
+ switch (mode) {
2809
+ case "block":
2810
+ throw ElsiumError.validation(`Output guardrail violation: ${violations.map((v) => v.detail).join("; ")}`, { violations });
2811
+ case "redact": {
2812
+ let redacted = text;
2813
+ if (config.piiDetection) {
2814
+ redacted = redactPII(redacted);
2815
+ }
2816
+ return {
2817
+ ...response,
2818
+ message: { ...response.message, content: redacted }
2819
+ };
2820
+ }
2821
+ case "warn":
2822
+ log5.warn("Output guardrail violations detected", { violations });
2823
+ return response;
2824
+ }
2825
+ };
2826
+ }
2827
+ // src/batch.ts
2828
+ var log6 = createLogger();
2829
+ function makeCancelledItem(index) {
2830
+ return { index, success: false, error: "Batch cancelled" };
2831
+ }
2832
+ function makeFailedItem(index, error) {
2833
+ return { index, success: false, error };
2834
+ }
2835
+ async function attemptRequest(gateway2, request, retryPerItem) {
2836
+ let lastError;
2837
+ for (let attempt = 0;attempt <= retryPerItem; attempt++) {
2838
+ try {
2839
+ const response = await gateway2.complete(request);
2840
+ return { response };
2841
+ } catch (err2) {
2842
+ lastError = err2 instanceof Error ? err2.message : String(err2);
2843
+ const isRetryable = attempt < retryPerItem && err2 instanceof ElsiumError && err2.retryable;
2844
+ if (!isRetryable)
2845
+ break;
2846
+ }
2847
+ }
2848
+ return { error: lastError };
2849
+ }
2850
+ function cancelRemaining(results, fromIndex, total) {
2851
+ let cancelled = 0;
2852
+ for (let i = fromIndex;i < total; i++) {
2853
+ results[i] = makeCancelledItem(i);
2854
+ cancelled++;
2855
+ }
2856
+ return cancelled;
2857
+ }
2858
+ function createBatch(gateway2, config) {
2859
+ const concurrency = config?.concurrency ?? 5;
2860
+ const retryPerItem = config?.retryPerItem ?? 0;
2861
+ return {
2862
+ async execute(requests) {
2863
+ const startTime = performance.now();
2864
+ const results = new Array(requests.length);
2865
+ let completed = 0;
2866
+ let totalSucceeded = 0;
2867
+ let totalFailed = 0;
2868
+ let running = 0;
2869
+ let nextIndex = 0;
2870
+ const signal = config?.signal;
2871
+ async function processItem(index) {
2872
+ if (signal?.aborted) {
2873
+ results[index] = makeCancelledItem(index);
2874
+ totalFailed++;
2875
+ return;
2876
+ }
2877
+ const result = await attemptRequest(gateway2, requests[index], retryPerItem);
2878
+ if (result.response) {
2879
+ results[index] = { index, success: true, response: result.response };
2880
+ totalSucceeded++;
2881
+ } else {
2882
+ results[index] = makeFailedItem(index, result.error);
2883
+ totalFailed++;
2884
+ }
2885
+ }
2886
+ return new Promise((resolve) => {
2887
+ function scheduleNext() {
2888
+ while (running < concurrency && nextIndex < requests.length) {
2889
+ if (signal?.aborted) {
2890
+ totalFailed += cancelRemaining(results, nextIndex, requests.length);
2891
+ nextIndex = requests.length;
2892
+ break;
2893
+ }
2894
+ const idx = nextIndex++;
2895
+ running++;
2896
+ processItem(idx).then(() => {
2897
+ running--;
2898
+ completed++;
2899
+ config?.onProgress?.(completed, requests.length);
2900
+ if (completed === requests.length) {
2901
+ resolve({
2902
+ results,
2903
+ totalSucceeded,
2904
+ totalFailed,
2905
+ totalDurationMs: Math.round(performance.now() - startTime)
2906
+ });
2907
+ } else {
2908
+ scheduleNext();
2909
+ }
2910
+ });
2911
+ }
2912
+ }
2913
+ if (requests.length === 0) {
2914
+ resolve({
2915
+ results: [],
2916
+ totalSucceeded: 0,
2917
+ totalFailed: 0,
2918
+ totalDurationMs: 0
2919
+ });
2920
+ return;
2921
+ }
2922
+ scheduleNext();
2923
+ });
2924
+ }
2925
+ };
2926
+ }
2426
2927
  // src/router.ts
2427
2928
  var REASONING_KEYWORDS = /\b(prove|explain why|analyze|compare|contrast|evaluate|critique|debate|reason|deduce|infer|justify|argue|synthesize|hypothesize|derive)\b/i;
2428
2929
  var CODE_KEYWORDS = /\b(implement|refactor|debug|optimize|architect|design pattern|algorithm|data structure|write code|code review|fix the bug|type system)\b/i;
@@ -2678,22 +3179,28 @@ export {
2678
3179
  registerProvider,
2679
3180
  registerPricing,
2680
3181
  redactSecrets,
3182
+ outputGuardrailMiddleware,
2681
3183
  loggingMiddleware,
2682
3184
  listProviders,
2683
3185
  getProviderMetadata,
2684
3186
  getProviderFactory,
2685
3187
  gateway,
3188
+ estimateCost,
2686
3189
  detectPromptInjection,
2687
3190
  detectJailbreak,
2688
3191
  createProviderMesh,
2689
3192
  createOpenAIProvider,
3193
+ createInMemoryCache,
2690
3194
  createGoogleProvider,
2691
3195
  createBulkhead,
3196
+ createBatch,
2692
3197
  createAnthropicProvider,
2693
3198
  costTrackingMiddleware,
3199
+ composeStreamMiddleware,
2694
3200
  composeMiddleware,
2695
3201
  classifyContent,
2696
3202
  checkBlockedPatterns,
2697
3203
  calculateCost,
3204
+ cacheMiddleware,
2698
3205
  bulkheadMiddleware
2699
3206
  };