@infersec/conduit 1.34.0 → 1.35.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli.js CHANGED
@@ -6,7 +6,7 @@ const __dirname = __pathDirname(__filename);
6
6
 
7
7
  import { parseArgs } from 'node:util';
8
8
  import 'node:crypto';
9
- import { a as asError, s as startInferenceAgent } from './start-BXwggMaM.js';
9
+ import { a as asError, s as startInferenceAgent } from './start-CdILFvRO.js';
10
10
  import 'argon2';
11
11
  import 'node:child_process';
12
12
  import 'node:stream';
package/dist/index.js CHANGED
@@ -5,7 +5,7 @@ const __filename = __fileURLToPath(import.meta.url);
5
5
  const __dirname = __pathDirname(__filename);
6
6
 
7
7
  import 'node:crypto';
8
- import { s as startInferenceAgent, a as asError } from './start-BXwggMaM.js';
8
+ import { s as startInferenceAgent, a as asError } from './start-CdILFvRO.js';
9
9
  import 'argon2';
10
10
  import 'node:child_process';
11
11
  import 'node:stream';
@@ -0,0 +1,119 @@
1
+ import { API_CLIENT_CONDUIT_ANTHROPIC_REFERENCE } from "@infersec/definitions";
2
+ import { implementAPIReference } from "@infersec/fetch";
3
+ import { Logger } from "@infersec/logger";
4
+ import { APIClient } from "../apiClient/index.js";
5
+ import { Configuration } from "../configuration.js";
6
+ import { ModelManager } from "../modelManagement/ModelManager.js";
7
+ type ConduitAnthropicAPIReferenceHandlers = Parameters<typeof implementAPIReference<typeof API_CLIENT_CONDUIT_ANTHROPIC_REFERENCE>>[0]["api"];
8
+ export declare function createConduitAnthropicAPIReferenceHandlers({ apiClient, configuration, getModelID, getModelManager, logger }: {
9
+ apiClient: APIClient;
10
+ configuration: Configuration;
11
+ getModelID: () => string;
12
+ getModelManager: () => ModelManager;
13
+ logger: Logger;
14
+ }): ConduitAnthropicAPIReferenceHandlers;
15
+ export declare function createPostMessagesHandler(options: {
16
+ apiClient: APIClient;
17
+ configuration: Configuration;
18
+ getModelID: () => string;
19
+ getModelManager: () => ModelManager;
20
+ logger: Logger;
21
+ }): (params: {
22
+ req: import("@infersec/fetch").APIRequest;
23
+ res: import("@infersec/fetch").APIResponse;
24
+ parameters: Record<string, never>;
25
+ query: Record<string, never>;
26
+ body: {
27
+ max_tokens: number;
28
+ messages: ({
29
+ content: string | ({
30
+ text: string;
31
+ type: "text";
32
+ } | {
33
+ source: {
34
+ data: string;
35
+ media_type: "image/gif" | "image/jpeg" | "image/png" | "image/webp";
36
+ type: "base64";
37
+ } | {
38
+ type: "url";
39
+ url: string;
40
+ };
41
+ type: "image";
42
+ } | {
43
+ tool_use_id: string;
44
+ content: string | {
45
+ text: string;
46
+ type: "text";
47
+ }[];
48
+ type: "tool_result";
49
+ is_error?: boolean | undefined;
50
+ })[];
51
+ role: "user";
52
+ } | {
53
+ content: string | ({
54
+ text: string;
55
+ type: "text";
56
+ } | {
57
+ id: string;
58
+ input: Record<string, unknown>;
59
+ name: string;
60
+ type: "tool_use";
61
+ } | {
62
+ [x: string]: unknown;
63
+ thinking: string;
64
+ type: "thinking";
65
+ } | {
66
+ [x: string]: unknown;
67
+ data: string;
68
+ type: "redacted_thinking";
69
+ })[];
70
+ role: "assistant";
71
+ })[];
72
+ model: string;
73
+ metadata?: {
74
+ user_id?: string | undefined;
75
+ } | undefined;
76
+ stop_sequences?: string[] | undefined;
77
+ stream?: boolean | undefined;
78
+ system?: string | {
79
+ text: string;
80
+ type: "text";
81
+ }[] | undefined;
82
+ temperature?: number | undefined;
83
+ thinking?: {
84
+ budget_tokens: number;
85
+ type: "enabled";
86
+ } | undefined;
87
+ tool_choice?: "none" | "any" | "auto" | {
88
+ type: "auto";
89
+ disable_parallel_tool_use?: boolean | undefined;
90
+ } | {
91
+ type: "any";
92
+ disable_parallel_tool_use?: boolean | undefined;
93
+ } | {
94
+ type: "none";
95
+ disable_parallel_tool_use?: boolean | undefined;
96
+ } | {
97
+ name: string;
98
+ type: "tool";
99
+ disable_parallel_tool_use?: boolean | undefined;
100
+ } | undefined;
101
+ tools?: {
102
+ input_schema: Record<string, unknown>;
103
+ name: string;
104
+ description?: string | undefined;
105
+ }[] | undefined;
106
+ top_k?: number | undefined;
107
+ top_p?: number | undefined;
108
+ };
109
+ responseSchema: undefined;
110
+ }) => Promise<{
111
+ body: import("stream").Readable;
112
+ headers?: Record<string, string>;
113
+ status: number;
114
+ } | {
115
+ headers?: Record<string, string>;
116
+ status: number;
117
+ statusText: string;
118
+ }>;
119
+ export {};
@@ -460,7 +460,7 @@ const allowsEval = cached(() => {
460
460
  return false;
461
461
  }
462
462
  });
463
- function isPlainObject$2(o) {
463
+ function isPlainObject$3(o) {
464
464
  if (isObject$1(o) === false)
465
465
  return false;
466
466
  // modified constructor
@@ -480,7 +480,7 @@ function isPlainObject$2(o) {
480
480
  return true;
481
481
  }
482
482
  function shallowClone(o) {
483
- if (isPlainObject$2(o))
483
+ if (isPlainObject$3(o))
484
484
  return { ...o };
485
485
  if (Array.isArray(o))
486
486
  return [...o];
@@ -665,7 +665,7 @@ function omit(schema, mask) {
665
665
  return clone(schema, def);
666
666
  }
667
667
  function extend(schema, shape) {
668
- if (!isPlainObject$2(shape)) {
668
+ if (!isPlainObject$3(shape)) {
669
669
  throw new Error("Invalid input to extend: expected a plain object");
670
670
  }
671
671
  const checks = schema._zod.def.checks;
@@ -684,7 +684,7 @@ function extend(schema, shape) {
684
684
  return clone(schema, def);
685
685
  }
686
686
  function safeExtend(schema, shape) {
687
- if (!isPlainObject$2(shape)) {
687
+ if (!isPlainObject$3(shape)) {
688
688
  throw new Error("Invalid input to safeExtend: expected a plain object");
689
689
  }
690
690
  const def = {
@@ -944,7 +944,7 @@ var util$6 = /*#__PURE__*/Object.freeze({
944
944
  getSizableOrigin: getSizableOrigin,
945
945
  hexToUint8Array: hexToUint8Array,
946
946
  isObject: isObject$1,
947
- isPlainObject: isPlainObject$2,
947
+ isPlainObject: isPlainObject$3,
948
948
  issue: issue,
949
949
  joinValues: joinValues,
950
950
  jsonStringifyReplacer: jsonStringifyReplacer,
@@ -3154,7 +3154,7 @@ function mergeValues(a, b) {
3154
3154
  if (a instanceof Date && b instanceof Date && +a === +b) {
3155
3155
  return { valid: true, data: a };
3156
3156
  }
3157
- if (isPlainObject$2(a) && isPlainObject$2(b)) {
3157
+ if (isPlainObject$3(a) && isPlainObject$3(b)) {
3158
3158
  const bKeys = Object.keys(b);
3159
3159
  const sharedKeys = Object.keys(a).filter((key) => bKeys.indexOf(key) !== -1);
3160
3160
  const newObj = { ...a, ...b };
@@ -3286,7 +3286,7 @@ const $ZodRecord = /*@__PURE__*/ $constructor("$ZodRecord", (inst, def) => {
3286
3286
  $ZodType.init(inst, def);
3287
3287
  inst._zod.parse = (payload, ctx) => {
3288
3288
  const input = payload.value;
3289
- if (!isPlainObject$2(input)) {
3289
+ if (!isPlainObject$3(input)) {
3290
3290
  payload.issues.push({
3291
3291
  expected: "record",
3292
3292
  code: "invalid_type",
@@ -15004,6 +15004,167 @@ const API_SERVICE_CONDUIT_API_REFERENCE = {
15004
15004
  }
15005
15005
  };
15006
15006
 
15007
+ // ==================== CONTENT BLOCKS ====================
15008
+ const AnthropicTextContentBlockSchema = object({
15009
+ text: string$1(),
15010
+ type: literal("text")
15011
+ });
15012
+ const AnthropicToolUseContentBlockSchema = object({
15013
+ id: string$1(),
15014
+ input: record(string$1(), unknown()),
15015
+ name: string$1(),
15016
+ type: literal("tool_use")
15017
+ });
15018
+ const AnthropicToolResultContentBlockSchema = object({
15019
+ tool_use_id: string$1(),
15020
+ content: union([
15021
+ string$1(),
15022
+ array(object({ text: string$1(), type: literal("text") }))
15023
+ ]),
15024
+ is_error: boolean$1().optional(),
15025
+ type: literal("tool_result")
15026
+ });
15027
+ const AnthropicImageContentBlockSchema = object({
15028
+ source: discriminatedUnion("type", [
15029
+ object({
15030
+ data: string$1(),
15031
+ media_type: _enum(["image/gif", "image/jpeg", "image/png", "image/webp"]),
15032
+ type: literal("base64")
15033
+ }),
15034
+ object({
15035
+ type: literal("url"),
15036
+ url: string$1()
15037
+ })
15038
+ ]),
15039
+ type: literal("image")
15040
+ });
15041
+ const AnthropicThinkingContentBlockSchema = object({
15042
+ thinking: string$1(),
15043
+ type: literal("thinking")
15044
+ })
15045
+ .passthrough();
15046
+ const AnthropicRedactedThinkingContentBlockSchema = object({
15047
+ data: string$1(),
15048
+ type: literal("redacted_thinking")
15049
+ })
15050
+ .passthrough();
15051
+ // ==================== INPUT CONTENT (for messages) ====================
15052
+ const AnthropicInputContentSchema = union([
15053
+ AnthropicTextContentBlockSchema,
15054
+ AnthropicImageContentBlockSchema,
15055
+ AnthropicToolResultContentBlockSchema
15056
+ ]);
15057
+ // ==================== MESSAGE PARAMS ====================
15058
+ const AnthropicUserMessageParamSchema = object({
15059
+ content: union([string$1(), array(AnthropicInputContentSchema)]),
15060
+ role: literal("user")
15061
+ });
15062
+ const AnthropicAssistantMessageParamSchema = object({
15063
+ content: union([
15064
+ string$1(),
15065
+ array(union([
15066
+ AnthropicTextContentBlockSchema,
15067
+ AnthropicToolUseContentBlockSchema,
15068
+ AnthropicThinkingContentBlockSchema,
15069
+ AnthropicRedactedThinkingContentBlockSchema
15070
+ ]))
15071
+ ]),
15072
+ role: literal("assistant")
15073
+ });
15074
+ const AnthropicMessageParamSchema = discriminatedUnion("role", [
15075
+ AnthropicUserMessageParamSchema,
15076
+ AnthropicAssistantMessageParamSchema
15077
+ ]);
15078
+ // ==================== TOOL DEFINITIONS ====================
15079
+ const AnthropicToolSchema = object({
15080
+ description: string$1().optional(),
15081
+ input_schema: record(string$1(), unknown()),
15082
+ name: string$1()
15083
+ });
15084
+ // ==================== MESSAGES CREATE PARAMS ====================
15085
+ const AnthropicMessagesCreateParamsSchema = object({
15086
+ max_tokens: number$1().int().positive(),
15087
+ messages: array(AnthropicMessageParamSchema),
15088
+ model: string$1(),
15089
+ metadata: object({
15090
+ user_id: string$1().optional()
15091
+ })
15092
+ .optional(),
15093
+ stop_sequences: array(string$1()).optional(),
15094
+ stream: boolean$1().optional(),
15095
+ system: union([string$1(), array(object({ text: string$1(), type: literal("text") }))])
15096
+ .optional(),
15097
+ temperature: number$1().min(0).max(1).optional(),
15098
+ thinking: object({
15099
+ budget_tokens: number$1().int().min(1024),
15100
+ type: literal("enabled")
15101
+ })
15102
+ .optional(),
15103
+ tool_choice: union([
15104
+ literal("auto"),
15105
+ literal("any"),
15106
+ literal("none"),
15107
+ object({
15108
+ disable_parallel_tool_use: boolean$1().optional(),
15109
+ type: literal("auto")
15110
+ }),
15111
+ object({
15112
+ disable_parallel_tool_use: boolean$1().optional(),
15113
+ type: literal("any")
15114
+ }),
15115
+ object({
15116
+ disable_parallel_tool_use: boolean$1().optional(),
15117
+ type: literal("none")
15118
+ }),
15119
+ object({
15120
+ disable_parallel_tool_use: boolean$1().optional(),
15121
+ name: string$1(),
15122
+ type: literal("tool")
15123
+ })
15124
+ ])
15125
+ .optional(),
15126
+ tools: array(AnthropicToolSchema).optional(),
15127
+ top_k: number$1().int().positive().optional(),
15128
+ top_p: number$1().min(0).max(1).optional()
15129
+ });
15130
+ // ==================== RESPONSE SCHEMAS ====================
15131
+ const AnthropicUsageSchema = object({
15132
+ cache_creation_input_tokens: number$1().optional(),
15133
+ cache_read_input_tokens: number$1().optional(),
15134
+ input_tokens: number$1(),
15135
+ output_tokens: number$1()
15136
+ });
15137
+ object({
15138
+ content: array(union([
15139
+ AnthropicTextContentBlockSchema,
15140
+ AnthropicToolUseContentBlockSchema,
15141
+ AnthropicThinkingContentBlockSchema,
15142
+ AnthropicRedactedThinkingContentBlockSchema
15143
+ ])),
15144
+ id: string$1(),
15145
+ model: string$1(),
15146
+ role: literal("assistant"),
15147
+ stop_reason: _enum(["end_turn", "max_tokens", "pause_turn", "refusal", "stop_sequence", "tool_use"])
15148
+ .nullable(),
15149
+ stop_sequence: string$1().nullable(),
15150
+ type: literal("message"),
15151
+ usage: AnthropicUsageSchema
15152
+ });
15153
+
15154
+ const API_CLIENT_CONDUIT_ANTHROPIC_REFERENCE = {
15155
+ "/v1/messages": {
15156
+ POST: {
15157
+ auth: {
15158
+ type: "shared-secret"
15159
+ },
15160
+ body: AnthropicMessagesCreateParamsSchema,
15161
+ response: {
15162
+ type: "text-stream"
15163
+ }
15164
+ }
15165
+ }
15166
+ };
15167
+
15007
15168
  /**
15008
15169
  * Coerce non-string values to JSON strings. Some LLM backends (e.g. llama.cpp)
15009
15170
  * return tool_calls arguments as parsed objects instead of JSON strings, which
@@ -15323,6 +15484,15 @@ const API_CLIENT_CONDUIT_OPENAI_REFERENCE = {
15323
15484
  }
15324
15485
  };
15325
15486
 
15487
+ ({
15488
+ "/api/inferencing/:endpointID/anthropic/v1/messages": {
15489
+ POST: {
15490
+ parameters: {
15491
+ endpointID: ULIDSchema.describe("Endpoint identifier")
15492
+ }}
15493
+ }
15494
+ });
15495
+
15326
15496
  ({
15327
15497
  "/api/inferencing/:endpointID/oai/v1/chat/completions": {
15328
15498
  POST: {
@@ -109253,11 +109423,11 @@ function logEngineMetrics({ agentEngineType, error, level, logger, requestBodyBy
109253
109423
  logger[level](metricsMessage, attributes);
109254
109424
  }
109255
109425
 
109256
- function isPlainObject$1(value) {
109426
+ function isPlainObject$2(value) {
109257
109427
  return typeof value === "object" && value !== null && !Array.isArray(value);
109258
109428
  }
109259
- function serializeRequestBody(body) {
109260
- if (!isPlainObject$1(body)) {
109429
+ function serializeRequestBody$1(body) {
109430
+ if (!isPlainObject$2(body)) {
109261
109431
  const payload = typeof body === "string" ? body : JSON.stringify(body);
109262
109432
  return {
109263
109433
  bytes: Buffer.byteLength(payload, "utf8"),
@@ -109266,7 +109436,7 @@ function serializeRequestBody(body) {
109266
109436
  }
109267
109437
  const requestPayload = { ...body };
109268
109438
  const streamOptions = requestPayload.stream_options;
109269
- const normalizedStreamOptions = isPlainObject$1(streamOptions)
109439
+ const normalizedStreamOptions = isPlainObject$2(streamOptions)
109270
109440
  ? { ...streamOptions }
109271
109441
  : {};
109272
109442
  normalizedStreamOptions.include_usage = true;
@@ -109277,7 +109447,7 @@ function serializeRequestBody(body) {
109277
109447
  payload
109278
109448
  };
109279
109449
  }
109280
- function calculateTokensPerSecond$1({ durationMs, totalTokens }) {
109450
+ function calculateTokensPerSecond$2({ durationMs, totalTokens }) {
109281
109451
  if (durationMs <= 0) {
109282
109452
  return 0;
109283
109453
  }
@@ -109302,7 +109472,7 @@ async function proxyOpenAIStreamingRoute({ body, configuration, logger, modelID,
109302
109472
  });
109303
109473
  });
109304
109474
  }
109305
- const { bytes: requestBodyBytes, payload: serializedBody } = serializeRequestBody(body);
109475
+ const { bytes: requestBodyBytes, payload: serializedBody } = serializeRequestBody$1(body);
109306
109476
  const requestStartedAt = Date.now();
109307
109477
  const requestBody = JSON.parse(serializedBody);
109308
109478
  const streamRequested = requestBody.stream === true;
@@ -109326,7 +109496,7 @@ async function proxyOpenAIStreamingRoute({ body, configuration, logger, modelID,
109326
109496
  responseBytes,
109327
109497
  successful: !error,
109328
109498
  timeToFirstTokenMs,
109329
- tokensPerSecond: calculateTokensPerSecond$1({
109499
+ tokensPerSecond: calculateTokensPerSecond$2({
109330
109500
  durationMs: latencyMs,
109331
109501
  totalTokens
109332
109502
  }),
@@ -109383,8 +109553,7 @@ async function proxyOpenAIStreamingRoute({ body, configuration, logger, modelID,
109383
109553
  error: responseError,
109384
109554
  requestUrl: path,
109385
109555
  statusCode: response.status,
109386
- statusText: responseStatusText,
109387
- responseBody: responseBody ?? undefined
109556
+ statusText: responseStatusText
109388
109557
  });
109389
109558
  if (!response.body) {
109390
109559
  return {
@@ -109531,6 +109700,385 @@ function createPostCompletionsHandler(options) {
109531
109700
  return createConduitOpenAIAPIReferenceHandlers(options)["/v1/completions"].POST;
109532
109701
  }
109533
109702
 
109703
+ function isPlainObject$1(value) {
109704
+ return typeof value === "object" && value !== null && !Array.isArray(value);
109705
+ }
109706
+ function serializeRequestBody(body) {
109707
+ const payload = typeof body === "string" ? body : JSON.stringify(body);
109708
+ return {
109709
+ bytes: Buffer.byteLength(payload, "utf8"),
109710
+ payload
109711
+ };
109712
+ }
109713
+ function calculateTokensPerSecond$1({ durationMs, totalTokens }) {
109714
+ if (durationMs <= 0)
109715
+ return 0;
109716
+ const tokensPerSecond = totalTokens / (durationMs / 1000);
109717
+ if (!Number.isFinite(tokensPerSecond) || tokensPerSecond <= 0)
109718
+ return 0;
109719
+ return Math.round(tokensPerSecond);
109720
+ }
109721
+ function normalizeTokenCount(value) {
109722
+ if (typeof value === "number" && Number.isFinite(value) && value >= 0)
109723
+ return value;
109724
+ return 0;
109725
+ }
109726
+ function extractAnthropicStreamUsage(line) {
109727
+ if (!line.startsWith("data:"))
109728
+ return null;
109729
+ const payload = line.slice(5).trim();
109730
+ if (!payload)
109731
+ return null;
109732
+ try {
109733
+ const parsed = JSON.parse(payload);
109734
+ if (!isPlainObject$1(parsed))
109735
+ return null;
109736
+ if (parsed.type === "message_start" && isPlainObject$1(parsed.message)) {
109737
+ const msgObj = parsed.message;
109738
+ const usage = msgObj.usage;
109739
+ if (isPlainObject$1(usage)) {
109740
+ return {
109741
+ inputTokens: typeof usage.input_tokens === "number" ? usage.input_tokens : null
109742
+ };
109743
+ }
109744
+ }
109745
+ if (parsed.type === "message_delta" && isPlainObject$1(parsed.usage)) {
109746
+ return {
109747
+ outputTokens: typeof parsed.usage.output_tokens === "number"
109748
+ ? parsed.usage.output_tokens
109749
+ : null
109750
+ };
109751
+ }
109752
+ }
109753
+ catch {
109754
+ // ignore
109755
+ }
109756
+ return null;
109757
+ }
109758
+ function extractAnthropicNonStreamUsage(body) {
109759
+ try {
109760
+ const parsed = JSON.parse(body);
109761
+ if (!isPlainObject$1(parsed) || !isPlainObject$1(parsed.usage))
109762
+ return null;
109763
+ const usage = parsed.usage;
109764
+ return {
109765
+ inputTokens: typeof usage.input_tokens === "number" ? usage.input_tokens : null,
109766
+ outputTokens: typeof usage.output_tokens === "number" ? usage.output_tokens : null
109767
+ };
109768
+ }
109769
+ catch {
109770
+ return null;
109771
+ }
109772
+ }
109773
+ async function proxyAnthropicStreamingRoute({ body, configuration, logger, modelID, modelManager, reportMetrics }) {
109774
+ function reportMetricsSafe(payload) {
109775
+ reportMetrics(payload).catch(error => {
109776
+ logger.warn("Failed to upload LLM prompt metrics", {
109777
+ error: asError(error),
109778
+ requestUrl: "/v1/messages"
109779
+ });
109780
+ });
109781
+ }
109782
+ const { bytes: requestBodyBytes, payload: serializedBody } = serializeRequestBody(body);
109783
+ const requestStartedAt = Date.now();
109784
+ const requestBody = JSON.parse(serializedBody);
109785
+ const streamRequested = requestBody.stream === true;
109786
+ const onMonitoringComplete = ({ durationMs, error, responseBytes, usage }) => {
109787
+ const promptTokens = normalizeTokenCount(usage?.inputTokens);
109788
+ const completionTokens = normalizeTokenCount(usage?.outputTokens);
109789
+ const totalTokens = promptTokens + completionTokens;
109790
+ const latencyMs = Math.max(0, durationMs);
109791
+ reportMetricsSafe({
109792
+ bytes: requestBodyBytes + responseBytes,
109793
+ completionTokens,
109794
+ engine: configuration.agentEngineType,
109795
+ endpointId: null,
109796
+ latencyMs,
109797
+ modelId: modelID,
109798
+ promptTokens,
109799
+ requestBytes: requestBodyBytes,
109800
+ requestId: null,
109801
+ requestMethod: "POST",
109802
+ requestPath: "/v1/messages",
109803
+ responseBytes,
109804
+ successful: !error,
109805
+ timeToFirstTokenMs: null,
109806
+ tokensPerSecond: calculateTokensPerSecond$1({
109807
+ durationMs: latencyMs,
109808
+ totalTokens
109809
+ }),
109810
+ totalTokens
109811
+ });
109812
+ };
109813
+ const response = await modelManager
109814
+ .fetchOpenAI("/v1/messages", {
109815
+ body: serializedBody,
109816
+ headers: {
109817
+ "Content-Type": "application/json"
109818
+ },
109819
+ method: "POST"
109820
+ })
109821
+ .catch(error => {
109822
+ logEngineMetrics({
109823
+ agentEngineType: configuration.agentEngineType,
109824
+ error: asError(error),
109825
+ level: "error",
109826
+ logger,
109827
+ requestBodyBytes,
109828
+ requestPath: "/v1/messages",
109829
+ responseBytes: 0,
109830
+ usage: null
109831
+ });
109832
+ const latencyMs = Math.max(0, Date.now() - requestStartedAt);
109833
+ reportMetricsSafe({
109834
+ bytes: requestBodyBytes,
109835
+ completionTokens: 0,
109836
+ engine: configuration.agentEngineType,
109837
+ endpointId: null,
109838
+ latencyMs,
109839
+ modelId: modelID,
109840
+ promptTokens: 0,
109841
+ requestBytes: requestBodyBytes,
109842
+ requestId: null,
109843
+ requestMethod: "POST",
109844
+ requestPath: "/v1/messages",
109845
+ responseBytes: 0,
109846
+ successful: false,
109847
+ timeToFirstTokenMs: null,
109848
+ tokensPerSecond: 0,
109849
+ totalTokens: 0
109850
+ });
109851
+ throw error;
109852
+ });
109853
+ const responseStatusText = response.statusText ?? "Upstream request failed";
109854
+ if (!response.ok) {
109855
+ const responseClone = response.clone();
109856
+ const responseBody = await responseClone.text().catch(() => null);
109857
+ const responseError = new Error(responseBody
109858
+ ? `Upstream error response: ${responseBody}`
109859
+ : "Upstream error response: empty body");
109860
+ logger.error("LLM engine request failed", {
109861
+ error: responseError,
109862
+ requestUrl: "/v1/messages",
109863
+ statusCode: response.status,
109864
+ statusText: responseStatusText
109865
+ });
109866
+ }
109867
+ if (!response.body) {
109868
+ logEngineMetrics({
109869
+ agentEngineType: configuration.agentEngineType,
109870
+ level: response.ok ? "info" : "error",
109871
+ logger,
109872
+ requestBodyBytes,
109873
+ requestPath: "/v1/messages",
109874
+ responseBytes: 0,
109875
+ usage: null
109876
+ });
109877
+ const latencyMs = Math.max(0, Date.now() - requestStartedAt);
109878
+ reportMetricsSafe({
109879
+ bytes: requestBodyBytes,
109880
+ completionTokens: 0,
109881
+ engine: configuration.agentEngineType,
109882
+ endpointId: null,
109883
+ latencyMs,
109884
+ modelId: modelID,
109885
+ promptTokens: 0,
109886
+ requestBytes: requestBodyBytes,
109887
+ requestId: null,
109888
+ requestMethod: "POST",
109889
+ requestPath: "/v1/messages",
109890
+ responseBytes: 0,
109891
+ successful: false,
109892
+ timeToFirstTokenMs: null,
109893
+ tokensPerSecond: 0,
109894
+ totalTokens: 0
109895
+ });
109896
+ return {
109897
+ status: response.status,
109898
+ statusText: responseStatusText
109899
+ };
109900
+ }
109901
+ const passThrough = new PassThrough();
109902
+ let responseBytes = 0;
109903
+ let completed = false;
109904
+ const usage = { inputTokens: null, outputTokens: null };
109905
+ const upstreamError = response.ok
109906
+ ? null
109907
+ : new Error(`Upstream error: ${response.status} ${responseStatusText}`);
109908
+ function finalize(error) {
109909
+ if (completed)
109910
+ return;
109911
+ completed = true;
109912
+ onMonitoringComplete({
109913
+ durationMs: Math.max(0, Date.now() - requestStartedAt),
109914
+ error,
109915
+ responseBytes,
109916
+ usage
109917
+ });
109918
+ }
109919
+ const rawBody = Readable.fromWeb(response.body);
109920
+ if (streamRequested) {
109921
+ let buffer = "";
109922
+ rawBody.on("data", (chunk) => {
109923
+ const chunkBuffer = Buffer.isBuffer(chunk) ? chunk : Buffer.from(chunk);
109924
+ responseBytes += chunkBuffer.length;
109925
+ buffer += chunkBuffer.toString("utf8");
109926
+ const lines = buffer.split("\n");
109927
+ buffer = lines.pop() ?? "";
109928
+ for (const line of lines) {
109929
+ const extracted = extractAnthropicStreamUsage(line.trim());
109930
+ if (extracted?.inputTokens !== undefined && extracted.inputTokens !== null) {
109931
+ usage.inputTokens = extracted.inputTokens;
109932
+ }
109933
+ if (extracted?.outputTokens !== undefined && extracted.outputTokens !== null) {
109934
+ usage.outputTokens = extracted.outputTokens;
109935
+ }
109936
+ }
109937
+ passThrough.write(chunkBuffer);
109938
+ });
109939
+ rawBody.once("error", err => {
109940
+ const normalizedError = asError(err);
109941
+ logEngineMetrics({
109942
+ agentEngineType: configuration.agentEngineType,
109943
+ error: normalizedError,
109944
+ level: "error",
109945
+ logger,
109946
+ requestBodyBytes,
109947
+ requestPath: "/v1/messages",
109948
+ responseBytes,
109949
+ usage: null
109950
+ });
109951
+ finalize(normalizedError);
109952
+ passThrough.destroy(normalizedError);
109953
+ });
109954
+ rawBody.once("end", () => {
109955
+ logEngineMetrics({
109956
+ agentEngineType: configuration.agentEngineType,
109957
+ level: upstreamError ? "error" : "info",
109958
+ logger,
109959
+ requestBodyBytes,
109960
+ requestPath: "/v1/messages",
109961
+ responseBytes,
109962
+ usage: null
109963
+ });
109964
+ finalize(upstreamError);
109965
+ passThrough.end();
109966
+ });
109967
+ rawBody.once("close", () => {
109968
+ if (completed) {
109969
+ if (!passThrough.writableEnded)
109970
+ passThrough.end();
109971
+ return;
109972
+ }
109973
+ const closeError = new Error("Engine response stream closed before completion");
109974
+ logEngineMetrics({
109975
+ agentEngineType: configuration.agentEngineType,
109976
+ error: closeError,
109977
+ level: "error",
109978
+ logger,
109979
+ requestBodyBytes,
109980
+ requestPath: "/v1/messages",
109981
+ responseBytes,
109982
+ usage: null
109983
+ });
109984
+ finalize(closeError);
109985
+ if (!passThrough.writableEnded)
109986
+ passThrough.end();
109987
+ });
109988
+ }
109989
+ else {
109990
+ const chunks = [];
109991
+ rawBody.on("data", (chunk) => {
109992
+ const chunkBuffer = Buffer.isBuffer(chunk) ? chunk : Buffer.from(chunk);
109993
+ responseBytes += chunkBuffer.length;
109994
+ chunks.push(chunkBuffer);
109995
+ passThrough.write(chunkBuffer);
109996
+ });
109997
+ rawBody.once("error", err => {
109998
+ const normalizedError = asError(err);
109999
+ logEngineMetrics({
110000
+ agentEngineType: configuration.agentEngineType,
110001
+ error: normalizedError,
110002
+ level: "error",
110003
+ logger,
110004
+ requestBodyBytes,
110005
+ requestPath: "/v1/messages",
110006
+ responseBytes,
110007
+ usage: null
110008
+ });
110009
+ finalize(normalizedError);
110010
+ passThrough.destroy(normalizedError);
110011
+ });
110012
+ rawBody.once("end", () => {
110013
+ const fullBody = Buffer.concat(chunks).toString("utf8");
110014
+ const extractedUsage = extractAnthropicNonStreamUsage(fullBody);
110015
+ if (extractedUsage) {
110016
+ usage.inputTokens = extractedUsage.inputTokens;
110017
+ usage.outputTokens = extractedUsage.outputTokens;
110018
+ }
110019
+ logEngineMetrics({
110020
+ agentEngineType: configuration.agentEngineType,
110021
+ level: upstreamError ? "error" : "info",
110022
+ logger,
110023
+ requestBodyBytes,
110024
+ requestPath: "/v1/messages",
110025
+ responseBytes,
110026
+ usage: null
110027
+ });
110028
+ finalize(upstreamError);
110029
+ passThrough.end();
110030
+ });
110031
+ rawBody.once("close", () => {
110032
+ if (completed) {
110033
+ if (!passThrough.writableEnded)
110034
+ passThrough.end();
110035
+ return;
110036
+ }
110037
+ const closeError = new Error("Engine response stream closed before completion");
110038
+ logEngineMetrics({
110039
+ agentEngineType: configuration.agentEngineType,
110040
+ error: closeError,
110041
+ level: "error",
110042
+ logger,
110043
+ requestBodyBytes,
110044
+ requestPath: "/v1/messages",
110045
+ responseBytes,
110046
+ usage: null
110047
+ });
110048
+ finalize(closeError);
110049
+ if (!passThrough.writableEnded)
110050
+ passThrough.end();
110051
+ });
110052
+ }
110053
+ return {
110054
+ body: passThrough,
110055
+ headers: Object.fromEntries(response.headers.entries()),
110056
+ status: response.status
110057
+ };
110058
+ }
110059
+
110060
+ function createConduitAnthropicAPIReferenceHandlers({ apiClient, configuration, getModelID, getModelManager, logger }) {
110061
+ return {
110062
+ "/v1/messages": {
110063
+ POST: async ({ body }) => {
110064
+ const modelID = getModelID();
110065
+ const modelManager = getModelManager();
110066
+ return proxyAnthropicStreamingRoute({
110067
+ body,
110068
+ configuration,
110069
+ logger,
110070
+ modelID,
110071
+ modelManager,
110072
+ reportMetrics: apiClient.reportPromptMetrics
110073
+ });
110074
+ }
110075
+ }
110076
+ };
110077
+ }
110078
+ function createPostMessagesHandler(options) {
110079
+ return createConduitAnthropicAPIReferenceHandlers(options)["/v1/messages"].POST;
110080
+ }
110081
+
109534
110082
  function createHealthHandler() {
109535
110083
  return (_req, res) => {
109536
110084
  res.status(200).send("OK");
@@ -119544,6 +120092,22 @@ async function createApplication({ abortController, apiClient, configuration, lo
119544
120092
  mount: publicRouter,
119545
120093
  reference: API_CLIENT_CONDUIT_OPENAI_REFERENCE
119546
120094
  });
120095
+ implementAPIReference({
120096
+ api: {
120097
+ "/v1/messages": {
120098
+ POST: createPostMessagesHandler({
120099
+ apiClient,
120100
+ configuration,
120101
+ getModelID: () => conduitConfiguration.targetModel.id,
120102
+ getModelManager: () => modelManager,
120103
+ logger
120104
+ })
120105
+ }
120106
+ },
120107
+ logger,
120108
+ mount: publicRouter,
120109
+ reference: API_CLIENT_CONDUIT_ANTHROPIC_REFERENCE
120110
+ });
119547
120111
  handleSSERequests({
119548
120112
  apiURL: configuration.apiURL,
119549
120113
  configuration,
@@ -0,0 +1,20 @@
1
+ import { Readable } from "node:stream";
2
+ import { InferenceAgentLLMMetricsPayload, type ULID } from "@infersec/definitions";
3
+ import { Logger } from "@infersec/logger";
4
+ import { Configuration } from "../configuration.js";
5
+ import { ModelManager } from "../modelManagement/ModelManager.js";
6
+ export declare function proxyAnthropicStreamingRoute({ body, configuration, logger, modelID, modelManager, reportMetrics }: {
7
+ body: unknown;
8
+ configuration: Configuration;
9
+ logger: Logger;
10
+ modelID: ULID;
11
+ modelManager: ModelManager;
12
+ reportMetrics: (payload: InferenceAgentLLMMetricsPayload) => Promise<void>;
13
+ }): Promise<{
14
+ body: Readable;
15
+ headers: Record<string, string>;
16
+ status: number;
17
+ } | {
18
+ status: number;
19
+ statusText: string;
20
+ }>;
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "@infersec/conduit",
3
3
  "description": "End user conduit agent for connecting local LLMs to the cloud.",
4
- "version": "1.34.0",
4
+ "version": "1.35.0",
5
5
  "bin": {
6
6
  "infersec-conduit": "./dist/cli.js"
7
7
  },