@ax-llm/ax 12.0.19 → 12.0.20
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/index.cjs +668 -10
- package/index.cjs.map +1 -1
- package/index.d.cts +49 -2
- package/index.d.ts +49 -2
- package/index.js +668 -10
- package/index.js.map +1 -1
- package/package.json +1 -1
package/index.js
CHANGED
|
@@ -586,6 +586,16 @@ var apiCall = async (api, json) => {
|
|
|
586
586
|
import crypto2 from "crypto";
|
|
587
587
|
import { context, SpanKind } from "@opentelemetry/api";
|
|
588
588
|
|
|
589
|
+
// dsp/globals.ts
|
|
590
|
+
var axGlobals = {
|
|
591
|
+
signatureStrict: true,
|
|
592
|
+
// Controls reservedNames enforcement in signature parsing/validation
|
|
593
|
+
tracer: void 0,
|
|
594
|
+
// Global OpenTelemetry tracer for all AI operations
|
|
595
|
+
meter: void 0
|
|
596
|
+
// Global OpenTelemetry meter for metrics collection
|
|
597
|
+
};
|
|
598
|
+
|
|
589
599
|
// trace/trace.ts
|
|
590
600
|
var axSpanAttributes = {
|
|
591
601
|
// LLM
|
|
@@ -1021,6 +1031,291 @@ var logResponseDelta = (delta, logger = defaultLogger) => {
|
|
|
1021
1031
|
logger(delta, { tags: ["responseContent"] });
|
|
1022
1032
|
};
|
|
1023
1033
|
|
|
1034
|
+
// ai/metrics.ts
|
|
1035
|
+
var createMetricsInstruments = (meter) => {
|
|
1036
|
+
return {
|
|
1037
|
+
latencyHistogram: meter.createHistogram("ax_llm_request_duration_ms", {
|
|
1038
|
+
description: "Duration of LLM requests in milliseconds",
|
|
1039
|
+
unit: "ms"
|
|
1040
|
+
}),
|
|
1041
|
+
errorCounter: meter.createCounter("ax_llm_errors_total", {
|
|
1042
|
+
description: "Total number of LLM request errors"
|
|
1043
|
+
}),
|
|
1044
|
+
requestCounter: meter.createCounter("ax_llm_requests_total", {
|
|
1045
|
+
description: "Total number of LLM requests"
|
|
1046
|
+
}),
|
|
1047
|
+
tokenCounter: meter.createCounter("ax_llm_tokens_total", {
|
|
1048
|
+
description: "Total number of LLM tokens consumed"
|
|
1049
|
+
}),
|
|
1050
|
+
inputTokenCounter: meter.createCounter("ax_llm_input_tokens_total", {
|
|
1051
|
+
description: "Total number of input/prompt tokens consumed"
|
|
1052
|
+
}),
|
|
1053
|
+
outputTokenCounter: meter.createCounter("ax_llm_output_tokens_total", {
|
|
1054
|
+
description: "Total number of output/completion tokens generated"
|
|
1055
|
+
}),
|
|
1056
|
+
errorRateGauge: meter.createGauge("ax_llm_error_rate", {
|
|
1057
|
+
description: "Current error rate as a percentage (0-100)"
|
|
1058
|
+
}),
|
|
1059
|
+
meanLatencyGauge: meter.createGauge("ax_llm_mean_latency_ms", {
|
|
1060
|
+
description: "Mean latency of LLM requests in milliseconds",
|
|
1061
|
+
unit: "ms"
|
|
1062
|
+
}),
|
|
1063
|
+
p95LatencyGauge: meter.createGauge("ax_llm_p95_latency_ms", {
|
|
1064
|
+
description: "95th percentile latency of LLM requests in milliseconds",
|
|
1065
|
+
unit: "ms"
|
|
1066
|
+
}),
|
|
1067
|
+
p99LatencyGauge: meter.createGauge("ax_llm_p99_latency_ms", {
|
|
1068
|
+
description: "99th percentile latency of LLM requests in milliseconds",
|
|
1069
|
+
unit: "ms"
|
|
1070
|
+
}),
|
|
1071
|
+
streamingRequestsCounter: meter.createCounter(
|
|
1072
|
+
"ax_llm_streaming_requests_total",
|
|
1073
|
+
{
|
|
1074
|
+
description: "Total number of streaming LLM requests"
|
|
1075
|
+
}
|
|
1076
|
+
),
|
|
1077
|
+
functionCallsCounter: meter.createCounter("ax_llm_function_calls_total", {
|
|
1078
|
+
description: "Total number of function/tool calls made"
|
|
1079
|
+
}),
|
|
1080
|
+
functionCallLatencyHistogram: meter.createHistogram(
|
|
1081
|
+
"ax_llm_function_call_latency_ms",
|
|
1082
|
+
{
|
|
1083
|
+
description: "Latency of function calls in milliseconds",
|
|
1084
|
+
unit: "ms"
|
|
1085
|
+
}
|
|
1086
|
+
),
|
|
1087
|
+
requestSizeHistogram: meter.createHistogram("ax_llm_request_size_bytes", {
|
|
1088
|
+
description: "Size of LLM request payloads in bytes",
|
|
1089
|
+
unit: "By"
|
|
1090
|
+
}),
|
|
1091
|
+
responseSizeHistogram: meter.createHistogram("ax_llm_response_size_bytes", {
|
|
1092
|
+
description: "Size of LLM response payloads in bytes",
|
|
1093
|
+
unit: "By"
|
|
1094
|
+
}),
|
|
1095
|
+
temperatureGauge: meter.createGauge("ax_llm_temperature_gauge", {
|
|
1096
|
+
description: "Temperature setting used for LLM requests"
|
|
1097
|
+
}),
|
|
1098
|
+
maxTokensGauge: meter.createGauge("ax_llm_max_tokens_gauge", {
|
|
1099
|
+
description: "Maximum tokens setting used for LLM requests"
|
|
1100
|
+
}),
|
|
1101
|
+
estimatedCostCounter: meter.createCounter("ax_llm_estimated_cost_total", {
|
|
1102
|
+
description: "Estimated cost of LLM requests in USD",
|
|
1103
|
+
unit: "$"
|
|
1104
|
+
}),
|
|
1105
|
+
promptLengthHistogram: meter.createHistogram("ax_llm_prompt_length_chars", {
|
|
1106
|
+
description: "Length of prompts in characters"
|
|
1107
|
+
}),
|
|
1108
|
+
contextWindowUsageGauge: meter.createGauge(
|
|
1109
|
+
"ax_llm_context_window_usage_ratio",
|
|
1110
|
+
{
|
|
1111
|
+
description: "Context window utilization ratio (0-1)"
|
|
1112
|
+
}
|
|
1113
|
+
),
|
|
1114
|
+
timeoutsCounter: meter.createCounter("ax_llm_timeouts_total", {
|
|
1115
|
+
description: "Total number of timed out LLM requests"
|
|
1116
|
+
}),
|
|
1117
|
+
abortsCounter: meter.createCounter("ax_llm_aborts_total", {
|
|
1118
|
+
description: "Total number of aborted LLM requests"
|
|
1119
|
+
}),
|
|
1120
|
+
thinkingBudgetUsageCounter: meter.createCounter(
|
|
1121
|
+
"ax_llm_thinking_budget_usage_total",
|
|
1122
|
+
{
|
|
1123
|
+
description: "Total thinking budget tokens used"
|
|
1124
|
+
}
|
|
1125
|
+
),
|
|
1126
|
+
multimodalRequestsCounter: meter.createCounter(
|
|
1127
|
+
"ax_llm_multimodal_requests_total",
|
|
1128
|
+
{
|
|
1129
|
+
description: "Total number of multimodal requests (with images/audio)"
|
|
1130
|
+
}
|
|
1131
|
+
)
|
|
1132
|
+
};
|
|
1133
|
+
};
|
|
1134
|
+
var recordLatencyMetric = (instruments, type, duration, aiService, model) => {
|
|
1135
|
+
if (instruments.latencyHistogram) {
|
|
1136
|
+
instruments.latencyHistogram.record(duration, {
|
|
1137
|
+
operation: type,
|
|
1138
|
+
ai_service: aiService,
|
|
1139
|
+
...model ? { model } : {}
|
|
1140
|
+
});
|
|
1141
|
+
}
|
|
1142
|
+
};
|
|
1143
|
+
var recordLatencyStatsMetrics = (instruments, type, meanLatency, p95Latency, p99Latency, aiService, model) => {
|
|
1144
|
+
const labels = {
|
|
1145
|
+
operation: type,
|
|
1146
|
+
ai_service: aiService,
|
|
1147
|
+
...model ? { model } : {}
|
|
1148
|
+
};
|
|
1149
|
+
if (instruments.meanLatencyGauge) {
|
|
1150
|
+
instruments.meanLatencyGauge.record(meanLatency, labels);
|
|
1151
|
+
}
|
|
1152
|
+
if (instruments.p95LatencyGauge) {
|
|
1153
|
+
instruments.p95LatencyGauge.record(p95Latency, labels);
|
|
1154
|
+
}
|
|
1155
|
+
if (instruments.p99LatencyGauge) {
|
|
1156
|
+
instruments.p99LatencyGauge.record(p99Latency, labels);
|
|
1157
|
+
}
|
|
1158
|
+
};
|
|
1159
|
+
var recordErrorMetric = (instruments, type, aiService, model) => {
|
|
1160
|
+
if (instruments.errorCounter) {
|
|
1161
|
+
instruments.errorCounter.add(1, {
|
|
1162
|
+
operation: type,
|
|
1163
|
+
ai_service: aiService,
|
|
1164
|
+
...model ? { model } : {}
|
|
1165
|
+
});
|
|
1166
|
+
}
|
|
1167
|
+
};
|
|
1168
|
+
var recordErrorRateMetric = (instruments, type, errorRate, aiService, model) => {
|
|
1169
|
+
if (instruments.errorRateGauge) {
|
|
1170
|
+
instruments.errorRateGauge.record(errorRate * 100, {
|
|
1171
|
+
// Convert to percentage
|
|
1172
|
+
operation: type,
|
|
1173
|
+
ai_service: aiService,
|
|
1174
|
+
...model ? { model } : {}
|
|
1175
|
+
});
|
|
1176
|
+
}
|
|
1177
|
+
};
|
|
1178
|
+
var recordRequestMetric = (instruments, type, aiService, model) => {
|
|
1179
|
+
if (instruments.requestCounter) {
|
|
1180
|
+
instruments.requestCounter.add(1, {
|
|
1181
|
+
operation: type,
|
|
1182
|
+
ai_service: aiService,
|
|
1183
|
+
...model ? { model } : {}
|
|
1184
|
+
});
|
|
1185
|
+
}
|
|
1186
|
+
};
|
|
1187
|
+
var recordTokenMetric = (instruments, type, tokens, aiService, model) => {
|
|
1188
|
+
const labels = {
|
|
1189
|
+
ai_service: aiService,
|
|
1190
|
+
...model ? { model } : {}
|
|
1191
|
+
};
|
|
1192
|
+
if (instruments.tokenCounter) {
|
|
1193
|
+
instruments.tokenCounter.add(tokens, {
|
|
1194
|
+
token_type: type,
|
|
1195
|
+
...labels
|
|
1196
|
+
});
|
|
1197
|
+
}
|
|
1198
|
+
if (type === "input" && instruments.inputTokenCounter) {
|
|
1199
|
+
instruments.inputTokenCounter.add(tokens, labels);
|
|
1200
|
+
}
|
|
1201
|
+
if (type === "output" && instruments.outputTokenCounter) {
|
|
1202
|
+
instruments.outputTokenCounter.add(tokens, labels);
|
|
1203
|
+
}
|
|
1204
|
+
};
|
|
1205
|
+
var recordStreamingRequestMetric = (instruments, type, isStreaming, aiService, model) => {
|
|
1206
|
+
if (isStreaming && instruments.streamingRequestsCounter) {
|
|
1207
|
+
instruments.streamingRequestsCounter.add(1, {
|
|
1208
|
+
operation: type,
|
|
1209
|
+
ai_service: aiService,
|
|
1210
|
+
...model ? { model } : {}
|
|
1211
|
+
});
|
|
1212
|
+
}
|
|
1213
|
+
};
|
|
1214
|
+
var recordFunctionCallMetric = (instruments, functionName, latency, aiService, model) => {
|
|
1215
|
+
const labels = {
|
|
1216
|
+
function_name: functionName,
|
|
1217
|
+
...aiService ? { ai_service: aiService } : {},
|
|
1218
|
+
...model ? { model } : {}
|
|
1219
|
+
};
|
|
1220
|
+
if (instruments.functionCallsCounter) {
|
|
1221
|
+
instruments.functionCallsCounter.add(1, labels);
|
|
1222
|
+
}
|
|
1223
|
+
if (latency && instruments.functionCallLatencyHistogram) {
|
|
1224
|
+
instruments.functionCallLatencyHistogram.record(latency, labels);
|
|
1225
|
+
}
|
|
1226
|
+
};
|
|
1227
|
+
var recordRequestSizeMetric = (instruments, type, sizeBytes, aiService, model) => {
|
|
1228
|
+
if (instruments.requestSizeHistogram) {
|
|
1229
|
+
instruments.requestSizeHistogram.record(sizeBytes, {
|
|
1230
|
+
operation: type,
|
|
1231
|
+
ai_service: aiService,
|
|
1232
|
+
...model ? { model } : {}
|
|
1233
|
+
});
|
|
1234
|
+
}
|
|
1235
|
+
};
|
|
1236
|
+
var recordResponseSizeMetric = (instruments, type, sizeBytes, aiService, model) => {
|
|
1237
|
+
if (instruments.responseSizeHistogram) {
|
|
1238
|
+
instruments.responseSizeHistogram.record(sizeBytes, {
|
|
1239
|
+
operation: type,
|
|
1240
|
+
ai_service: aiService,
|
|
1241
|
+
...model ? { model } : {}
|
|
1242
|
+
});
|
|
1243
|
+
}
|
|
1244
|
+
};
|
|
1245
|
+
var recordModelConfigMetrics = (instruments, temperature, maxTokens, aiService, model) => {
|
|
1246
|
+
const labels = {
|
|
1247
|
+
...aiService ? { ai_service: aiService } : {},
|
|
1248
|
+
...model ? { model } : {}
|
|
1249
|
+
};
|
|
1250
|
+
if (temperature !== void 0 && instruments.temperatureGauge) {
|
|
1251
|
+
instruments.temperatureGauge.record(temperature, labels);
|
|
1252
|
+
}
|
|
1253
|
+
if (maxTokens !== void 0 && instruments.maxTokensGauge) {
|
|
1254
|
+
instruments.maxTokensGauge.record(maxTokens, labels);
|
|
1255
|
+
}
|
|
1256
|
+
};
|
|
1257
|
+
var recordEstimatedCostMetric = (instruments, type, costUSD, aiService, model) => {
|
|
1258
|
+
if (instruments.estimatedCostCounter) {
|
|
1259
|
+
instruments.estimatedCostCounter.add(costUSD, {
|
|
1260
|
+
operation: type,
|
|
1261
|
+
ai_service: aiService,
|
|
1262
|
+
...model ? { model } : {}
|
|
1263
|
+
});
|
|
1264
|
+
}
|
|
1265
|
+
};
|
|
1266
|
+
var recordPromptLengthMetric = (instruments, lengthChars, aiService, model) => {
|
|
1267
|
+
if (instruments.promptLengthHistogram) {
|
|
1268
|
+
instruments.promptLengthHistogram.record(lengthChars, {
|
|
1269
|
+
ai_service: aiService,
|
|
1270
|
+
...model ? { model } : {}
|
|
1271
|
+
});
|
|
1272
|
+
}
|
|
1273
|
+
};
|
|
1274
|
+
var recordContextWindowUsageMetric = (instruments, usageRatio, aiService, model) => {
|
|
1275
|
+
if (instruments.contextWindowUsageGauge) {
|
|
1276
|
+
instruments.contextWindowUsageGauge.record(usageRatio, {
|
|
1277
|
+
ai_service: aiService,
|
|
1278
|
+
...model ? { model } : {}
|
|
1279
|
+
});
|
|
1280
|
+
}
|
|
1281
|
+
};
|
|
1282
|
+
var recordTimeoutMetric = (instruments, type, aiService, model) => {
|
|
1283
|
+
if (instruments.timeoutsCounter) {
|
|
1284
|
+
instruments.timeoutsCounter.add(1, {
|
|
1285
|
+
operation: type,
|
|
1286
|
+
ai_service: aiService,
|
|
1287
|
+
...model ? { model } : {}
|
|
1288
|
+
});
|
|
1289
|
+
}
|
|
1290
|
+
};
|
|
1291
|
+
var recordAbortMetric = (instruments, type, aiService, model) => {
|
|
1292
|
+
if (instruments.abortsCounter) {
|
|
1293
|
+
instruments.abortsCounter.add(1, {
|
|
1294
|
+
operation: type,
|
|
1295
|
+
ai_service: aiService,
|
|
1296
|
+
...model ? { model } : {}
|
|
1297
|
+
});
|
|
1298
|
+
}
|
|
1299
|
+
};
|
|
1300
|
+
var recordThinkingBudgetUsageMetric = (instruments, tokensUsed, aiService, model) => {
|
|
1301
|
+
if (instruments.thinkingBudgetUsageCounter) {
|
|
1302
|
+
instruments.thinkingBudgetUsageCounter.add(tokensUsed, {
|
|
1303
|
+
ai_service: aiService,
|
|
1304
|
+
...model ? { model } : {}
|
|
1305
|
+
});
|
|
1306
|
+
}
|
|
1307
|
+
};
|
|
1308
|
+
var recordMultimodalRequestMetric = (instruments, hasImages, hasAudio, aiService, model) => {
|
|
1309
|
+
if ((hasImages || hasAudio) && instruments.multimodalRequestsCounter) {
|
|
1310
|
+
instruments.multimodalRequestsCounter.add(1, {
|
|
1311
|
+
ai_service: aiService,
|
|
1312
|
+
has_images: hasImages.toString(),
|
|
1313
|
+
has_audio: hasAudio.toString(),
|
|
1314
|
+
...model ? { model } : {}
|
|
1315
|
+
});
|
|
1316
|
+
}
|
|
1317
|
+
};
|
|
1318
|
+
|
|
1024
1319
|
// ai/base.ts
|
|
1025
1320
|
var axBaseAIDefaultConfig = () => structuredClone({
|
|
1026
1321
|
temperature: 0,
|
|
@@ -1051,7 +1346,8 @@ var AxBaseAI = class {
|
|
|
1051
1346
|
this.apiURL = apiURL;
|
|
1052
1347
|
this.headers = headers;
|
|
1053
1348
|
this.supportFor = supportFor;
|
|
1054
|
-
this.tracer = options.tracer;
|
|
1349
|
+
this.tracer = options.tracer ?? axGlobals.tracer;
|
|
1350
|
+
this.meter = options.meter ?? axGlobals.meter;
|
|
1055
1351
|
this.modelInfo = modelInfo;
|
|
1056
1352
|
this.models = models;
|
|
1057
1353
|
this.id = crypto2.randomUUID();
|
|
@@ -1062,6 +1358,7 @@ var AxBaseAI = class {
|
|
|
1062
1358
|
throw new Error("No model defined");
|
|
1063
1359
|
}
|
|
1064
1360
|
this.setOptions(options);
|
|
1361
|
+
this.initializeMetricsInstruments();
|
|
1065
1362
|
if (models) {
|
|
1066
1363
|
validateModels(models);
|
|
1067
1364
|
}
|
|
@@ -1070,11 +1367,14 @@ var AxBaseAI = class {
|
|
|
1070
1367
|
rt;
|
|
1071
1368
|
fetch;
|
|
1072
1369
|
tracer;
|
|
1370
|
+
meter;
|
|
1073
1371
|
timeout;
|
|
1074
1372
|
excludeContentFromTrace;
|
|
1075
1373
|
models;
|
|
1076
1374
|
abortSignal;
|
|
1077
1375
|
logger = defaultLogger2;
|
|
1376
|
+
// OpenTelemetry metrics instruments
|
|
1377
|
+
metricsInstruments;
|
|
1078
1378
|
modelInfo;
|
|
1079
1379
|
modelUsage;
|
|
1080
1380
|
embedModelUsage;
|
|
@@ -1116,6 +1416,11 @@ var AxBaseAI = class {
|
|
|
1116
1416
|
}
|
|
1117
1417
|
}
|
|
1118
1418
|
};
|
|
1419
|
+
initializeMetricsInstruments() {
|
|
1420
|
+
if (this.meter) {
|
|
1421
|
+
this.metricsInstruments = createMetricsInstruments(this.meter);
|
|
1422
|
+
}
|
|
1423
|
+
}
|
|
1119
1424
|
setName(name) {
|
|
1120
1425
|
this.name = name;
|
|
1121
1426
|
}
|
|
@@ -1133,10 +1438,12 @@ var AxBaseAI = class {
|
|
|
1133
1438
|
this.rt = options.rateLimiter;
|
|
1134
1439
|
this.fetch = options.fetch;
|
|
1135
1440
|
this.timeout = options.timeout;
|
|
1136
|
-
this.tracer = options.tracer;
|
|
1441
|
+
this.tracer = options.tracer ?? axGlobals.tracer;
|
|
1442
|
+
this.meter = options.meter ?? axGlobals.meter;
|
|
1137
1443
|
this.excludeContentFromTrace = options.excludeContentFromTrace;
|
|
1138
1444
|
this.abortSignal = options.abortSignal;
|
|
1139
1445
|
this.logger = options.logger ?? defaultLogger2;
|
|
1446
|
+
this.initializeMetricsInstruments();
|
|
1140
1447
|
}
|
|
1141
1448
|
getOptions() {
|
|
1142
1449
|
return {
|
|
@@ -1144,6 +1451,7 @@ var AxBaseAI = class {
|
|
|
1144
1451
|
rateLimiter: this.rt,
|
|
1145
1452
|
fetch: this.fetch,
|
|
1146
1453
|
tracer: this.tracer,
|
|
1454
|
+
meter: this.meter,
|
|
1147
1455
|
timeout: this.timeout,
|
|
1148
1456
|
excludeContentFromTrace: this.excludeContentFromTrace,
|
|
1149
1457
|
abortSignal: this.abortSignal,
|
|
@@ -1208,6 +1516,25 @@ var AxBaseAI = class {
|
|
|
1208
1516
|
metrics.mean = metrics.samples.reduce((a, b) => a + b, 0) / metrics.samples.length;
|
|
1209
1517
|
metrics.p95 = this.calculatePercentile(metrics.samples, 95);
|
|
1210
1518
|
metrics.p99 = this.calculatePercentile(metrics.samples, 99);
|
|
1519
|
+
if (this.metricsInstruments) {
|
|
1520
|
+
const model = type === "chat" ? this.lastUsedChatModel : this.lastUsedEmbedModel;
|
|
1521
|
+
recordLatencyMetric(
|
|
1522
|
+
this.metricsInstruments,
|
|
1523
|
+
type,
|
|
1524
|
+
duration,
|
|
1525
|
+
this.name,
|
|
1526
|
+
model
|
|
1527
|
+
);
|
|
1528
|
+
recordLatencyStatsMetrics(
|
|
1529
|
+
this.metricsInstruments,
|
|
1530
|
+
type,
|
|
1531
|
+
metrics.mean,
|
|
1532
|
+
metrics.p95,
|
|
1533
|
+
metrics.p99,
|
|
1534
|
+
this.name,
|
|
1535
|
+
model
|
|
1536
|
+
);
|
|
1537
|
+
}
|
|
1211
1538
|
}
|
|
1212
1539
|
// Method to update error metrics
|
|
1213
1540
|
updateErrorMetrics(type, isError) {
|
|
@@ -1217,6 +1544,317 @@ var AxBaseAI = class {
|
|
|
1217
1544
|
metrics.count++;
|
|
1218
1545
|
}
|
|
1219
1546
|
metrics.rate = metrics.count / metrics.total;
|
|
1547
|
+
if (this.metricsInstruments) {
|
|
1548
|
+
const model = type === "chat" ? this.lastUsedChatModel : this.lastUsedEmbedModel;
|
|
1549
|
+
recordRequestMetric(this.metricsInstruments, type, this.name, model);
|
|
1550
|
+
if (isError) {
|
|
1551
|
+
recordErrorMetric(this.metricsInstruments, type, this.name, model);
|
|
1552
|
+
}
|
|
1553
|
+
recordErrorRateMetric(
|
|
1554
|
+
this.metricsInstruments,
|
|
1555
|
+
type,
|
|
1556
|
+
metrics.rate,
|
|
1557
|
+
this.name,
|
|
1558
|
+
model
|
|
1559
|
+
);
|
|
1560
|
+
}
|
|
1561
|
+
}
|
|
1562
|
+
// Method to record token usage metrics
|
|
1563
|
+
recordTokenUsage(modelUsage) {
|
|
1564
|
+
if (this.metricsInstruments && modelUsage?.tokens) {
|
|
1565
|
+
const { promptTokens, completionTokens, totalTokens, thoughtsTokens } = modelUsage.tokens;
|
|
1566
|
+
if (promptTokens) {
|
|
1567
|
+
recordTokenMetric(
|
|
1568
|
+
this.metricsInstruments,
|
|
1569
|
+
"input",
|
|
1570
|
+
promptTokens,
|
|
1571
|
+
this.name,
|
|
1572
|
+
modelUsage.model
|
|
1573
|
+
);
|
|
1574
|
+
}
|
|
1575
|
+
if (completionTokens) {
|
|
1576
|
+
recordTokenMetric(
|
|
1577
|
+
this.metricsInstruments,
|
|
1578
|
+
"output",
|
|
1579
|
+
completionTokens,
|
|
1580
|
+
this.name,
|
|
1581
|
+
modelUsage.model
|
|
1582
|
+
);
|
|
1583
|
+
}
|
|
1584
|
+
if (totalTokens) {
|
|
1585
|
+
recordTokenMetric(
|
|
1586
|
+
this.metricsInstruments,
|
|
1587
|
+
"total",
|
|
1588
|
+
totalTokens,
|
|
1589
|
+
this.name,
|
|
1590
|
+
modelUsage.model
|
|
1591
|
+
);
|
|
1592
|
+
}
|
|
1593
|
+
if (thoughtsTokens) {
|
|
1594
|
+
recordTokenMetric(
|
|
1595
|
+
this.metricsInstruments,
|
|
1596
|
+
"thoughts",
|
|
1597
|
+
thoughtsTokens,
|
|
1598
|
+
this.name,
|
|
1599
|
+
modelUsage.model
|
|
1600
|
+
);
|
|
1601
|
+
}
|
|
1602
|
+
}
|
|
1603
|
+
}
|
|
1604
|
+
// Helper method to calculate request size in bytes
|
|
1605
|
+
calculateRequestSize(req) {
|
|
1606
|
+
try {
|
|
1607
|
+
return new TextEncoder().encode(JSON.stringify(req)).length;
|
|
1608
|
+
} catch {
|
|
1609
|
+
return 0;
|
|
1610
|
+
}
|
|
1611
|
+
}
|
|
1612
|
+
// Helper method to calculate response size in bytes
|
|
1613
|
+
calculateResponseSize(response) {
|
|
1614
|
+
try {
|
|
1615
|
+
return new TextEncoder().encode(JSON.stringify(response)).length;
|
|
1616
|
+
} catch {
|
|
1617
|
+
return 0;
|
|
1618
|
+
}
|
|
1619
|
+
}
|
|
1620
|
+
// Helper method to detect multimodal content
|
|
1621
|
+
detectMultimodalContent(req) {
|
|
1622
|
+
let hasImages = false;
|
|
1623
|
+
let hasAudio = false;
|
|
1624
|
+
if (req.chatPrompt && Array.isArray(req.chatPrompt)) {
|
|
1625
|
+
for (const message of req.chatPrompt) {
|
|
1626
|
+
if (message.role === "user" && Array.isArray(message.content)) {
|
|
1627
|
+
for (const part of message.content) {
|
|
1628
|
+
if (part.type === "image") {
|
|
1629
|
+
hasImages = true;
|
|
1630
|
+
} else if (part.type === "audio") {
|
|
1631
|
+
hasAudio = true;
|
|
1632
|
+
}
|
|
1633
|
+
}
|
|
1634
|
+
}
|
|
1635
|
+
}
|
|
1636
|
+
}
|
|
1637
|
+
return { hasImages, hasAudio };
|
|
1638
|
+
}
|
|
1639
|
+
// Helper method to calculate prompt length
|
|
1640
|
+
calculatePromptLength(req) {
|
|
1641
|
+
let totalLength = 0;
|
|
1642
|
+
if (req.chatPrompt && Array.isArray(req.chatPrompt)) {
|
|
1643
|
+
for (const message of req.chatPrompt) {
|
|
1644
|
+
if (message.role === "system" || message.role === "assistant") {
|
|
1645
|
+
if (message.content) {
|
|
1646
|
+
totalLength += message.content.length;
|
|
1647
|
+
}
|
|
1648
|
+
} else if (message.role === "user") {
|
|
1649
|
+
if (typeof message.content === "string") {
|
|
1650
|
+
totalLength += message.content.length;
|
|
1651
|
+
} else if (Array.isArray(message.content)) {
|
|
1652
|
+
for (const part of message.content) {
|
|
1653
|
+
if (part.type === "text") {
|
|
1654
|
+
totalLength += part.text.length;
|
|
1655
|
+
}
|
|
1656
|
+
}
|
|
1657
|
+
}
|
|
1658
|
+
} else if (message.role === "function") {
|
|
1659
|
+
if (message.result) {
|
|
1660
|
+
totalLength += message.result.length;
|
|
1661
|
+
}
|
|
1662
|
+
}
|
|
1663
|
+
}
|
|
1664
|
+
}
|
|
1665
|
+
return totalLength;
|
|
1666
|
+
}
|
|
1667
|
+
// Helper method to calculate context window usage
|
|
1668
|
+
calculateContextWindowUsage(model, modelUsage) {
|
|
1669
|
+
if (!modelUsage?.tokens?.promptTokens) return 0;
|
|
1670
|
+
const modelInfo = this.modelInfo.find(
|
|
1671
|
+
(info) => info.name === model
|
|
1672
|
+
);
|
|
1673
|
+
if (!modelInfo?.contextWindow) return 0;
|
|
1674
|
+
return modelUsage.tokens.promptTokens / modelInfo.contextWindow;
|
|
1675
|
+
}
|
|
1676
|
+
// Helper method to estimate cost
|
|
1677
|
+
estimateCost(model, modelUsage) {
|
|
1678
|
+
if (!modelUsage?.tokens) return 0;
|
|
1679
|
+
const modelInfo = this.modelInfo.find(
|
|
1680
|
+
(info) => info.name === model
|
|
1681
|
+
);
|
|
1682
|
+
if (!modelInfo || !modelInfo.promptTokenCostPer1M && !modelInfo.completionTokenCostPer1M)
|
|
1683
|
+
return 0;
|
|
1684
|
+
const { promptTokens = 0, completionTokens = 0 } = modelUsage.tokens;
|
|
1685
|
+
const promptCostPer1M = modelInfo.promptTokenCostPer1M || 0;
|
|
1686
|
+
const completionCostPer1M = modelInfo.completionTokenCostPer1M || 0;
|
|
1687
|
+
return promptTokens * promptCostPer1M / 1e6 + completionTokens * completionCostPer1M / 1e6;
|
|
1688
|
+
}
|
|
1689
|
+
// Helper method to estimate cost by model name
|
|
1690
|
+
estimateCostByName(modelName, modelUsage) {
|
|
1691
|
+
if (!modelUsage?.tokens) return 0;
|
|
1692
|
+
const modelInfo = this.modelInfo.find((info) => info.name === modelName);
|
|
1693
|
+
if (!modelInfo || !modelInfo.promptTokenCostPer1M && !modelInfo.completionTokenCostPer1M)
|
|
1694
|
+
return 0;
|
|
1695
|
+
const { promptTokens = 0, completionTokens = 0 } = modelUsage.tokens;
|
|
1696
|
+
const promptCostPer1M = modelInfo.promptTokenCostPer1M || 0;
|
|
1697
|
+
const completionCostPer1M = modelInfo.completionTokenCostPer1M || 0;
|
|
1698
|
+
return promptTokens * promptCostPer1M / 1e6 + completionTokens * completionCostPer1M / 1e6;
|
|
1699
|
+
}
|
|
1700
|
+
// Helper method to record function call metrics
|
|
1701
|
+
recordFunctionCallMetrics(functionCalls, model) {
|
|
1702
|
+
if (!this.metricsInstruments || !functionCalls) return;
|
|
1703
|
+
for (const call of functionCalls) {
|
|
1704
|
+
if (call && typeof call === "object" && "function" in call && call.function && typeof call.function === "object" && "name" in call.function) {
|
|
1705
|
+
recordFunctionCallMetric(
|
|
1706
|
+
this.metricsInstruments,
|
|
1707
|
+
call.function.name,
|
|
1708
|
+
void 0,
|
|
1709
|
+
// latency would need to be tracked separately
|
|
1710
|
+
this.name,
|
|
1711
|
+
model
|
|
1712
|
+
);
|
|
1713
|
+
}
|
|
1714
|
+
}
|
|
1715
|
+
}
|
|
1716
|
+
// Helper method to record timeout metrics
|
|
1717
|
+
recordTimeoutMetric(type) {
|
|
1718
|
+
if (this.metricsInstruments) {
|
|
1719
|
+
const model = type === "chat" ? this.lastUsedChatModel : this.lastUsedEmbedModel;
|
|
1720
|
+
recordTimeoutMetric(this.metricsInstruments, type, this.name, model);
|
|
1721
|
+
}
|
|
1722
|
+
}
|
|
1723
|
+
// Helper method to record abort metrics
|
|
1724
|
+
recordAbortMetric(type) {
|
|
1725
|
+
if (this.metricsInstruments) {
|
|
1726
|
+
const model = type === "chat" ? this.lastUsedChatModel : this.lastUsedEmbedModel;
|
|
1727
|
+
recordAbortMetric(this.metricsInstruments, type, this.name, model);
|
|
1728
|
+
}
|
|
1729
|
+
}
|
|
1730
|
+
// Comprehensive method to record all chat-related metrics
|
|
1731
|
+
recordChatMetrics(req, options, result) {
|
|
1732
|
+
if (!this.metricsInstruments) return;
|
|
1733
|
+
const model = this.lastUsedChatModel;
|
|
1734
|
+
const modelConfig = this.lastUsedModelConfig;
|
|
1735
|
+
const isStreaming = modelConfig?.stream ?? false;
|
|
1736
|
+
recordStreamingRequestMetric(
|
|
1737
|
+
this.metricsInstruments,
|
|
1738
|
+
"chat",
|
|
1739
|
+
isStreaming,
|
|
1740
|
+
this.name,
|
|
1741
|
+
model
|
|
1742
|
+
);
|
|
1743
|
+
const { hasImages, hasAudio } = this.detectMultimodalContent(req);
|
|
1744
|
+
recordMultimodalRequestMetric(
|
|
1745
|
+
this.metricsInstruments,
|
|
1746
|
+
hasImages,
|
|
1747
|
+
hasAudio,
|
|
1748
|
+
this.name,
|
|
1749
|
+
model
|
|
1750
|
+
);
|
|
1751
|
+
const promptLength = this.calculatePromptLength(req);
|
|
1752
|
+
recordPromptLengthMetric(
|
|
1753
|
+
this.metricsInstruments,
|
|
1754
|
+
promptLength,
|
|
1755
|
+
this.name,
|
|
1756
|
+
model
|
|
1757
|
+
);
|
|
1758
|
+
recordModelConfigMetrics(
|
|
1759
|
+
this.metricsInstruments,
|
|
1760
|
+
modelConfig?.temperature,
|
|
1761
|
+
modelConfig?.maxTokens,
|
|
1762
|
+
this.name,
|
|
1763
|
+
model
|
|
1764
|
+
);
|
|
1765
|
+
if (options?.thinkingTokenBudget && this.modelUsage?.tokens?.thoughtsTokens) {
|
|
1766
|
+
recordThinkingBudgetUsageMetric(
|
|
1767
|
+
this.metricsInstruments,
|
|
1768
|
+
this.modelUsage.tokens.thoughtsTokens,
|
|
1769
|
+
this.name,
|
|
1770
|
+
model
|
|
1771
|
+
);
|
|
1772
|
+
}
|
|
1773
|
+
const requestSize = this.calculateRequestSize(req);
|
|
1774
|
+
recordRequestSizeMetric(
|
|
1775
|
+
this.metricsInstruments,
|
|
1776
|
+
"chat",
|
|
1777
|
+
requestSize,
|
|
1778
|
+
this.name,
|
|
1779
|
+
model
|
|
1780
|
+
);
|
|
1781
|
+
if (result && !isStreaming) {
|
|
1782
|
+
const chatResponse = result;
|
|
1783
|
+
const responseSize = this.calculateResponseSize(chatResponse);
|
|
1784
|
+
recordResponseSizeMetric(
|
|
1785
|
+
this.metricsInstruments,
|
|
1786
|
+
"chat",
|
|
1787
|
+
responseSize,
|
|
1788
|
+
this.name,
|
|
1789
|
+
model
|
|
1790
|
+
);
|
|
1791
|
+
if (chatResponse.results) {
|
|
1792
|
+
for (const chatResult of chatResponse.results) {
|
|
1793
|
+
if (chatResult.functionCalls) {
|
|
1794
|
+
this.recordFunctionCallMetrics(
|
|
1795
|
+
chatResult.functionCalls,
|
|
1796
|
+
this.lastUsedChatModel
|
|
1797
|
+
);
|
|
1798
|
+
}
|
|
1799
|
+
}
|
|
1800
|
+
}
|
|
1801
|
+
const contextUsage = this.calculateContextWindowUsage(
|
|
1802
|
+
this.lastUsedChatModel,
|
|
1803
|
+
chatResponse.modelUsage
|
|
1804
|
+
);
|
|
1805
|
+
if (contextUsage > 0) {
|
|
1806
|
+
recordContextWindowUsageMetric(
|
|
1807
|
+
this.metricsInstruments,
|
|
1808
|
+
contextUsage,
|
|
1809
|
+
this.name,
|
|
1810
|
+
model
|
|
1811
|
+
);
|
|
1812
|
+
}
|
|
1813
|
+
const estimatedCost = this.estimateCost(
|
|
1814
|
+
this.lastUsedChatModel,
|
|
1815
|
+
chatResponse.modelUsage
|
|
1816
|
+
);
|
|
1817
|
+
if (estimatedCost > 0) {
|
|
1818
|
+
recordEstimatedCostMetric(
|
|
1819
|
+
this.metricsInstruments,
|
|
1820
|
+
"chat",
|
|
1821
|
+
estimatedCost,
|
|
1822
|
+
this.name,
|
|
1823
|
+
model
|
|
1824
|
+
);
|
|
1825
|
+
}
|
|
1826
|
+
}
|
|
1827
|
+
}
|
|
1828
|
+
// Comprehensive method to record all embed-related metrics
|
|
1829
|
+
recordEmbedMetrics(req, result) {
|
|
1830
|
+
if (!this.metricsInstruments) return;
|
|
1831
|
+
const model = this.lastUsedEmbedModel;
|
|
1832
|
+
const requestSize = this.calculateRequestSize(req);
|
|
1833
|
+
recordRequestSizeMetric(
|
|
1834
|
+
this.metricsInstruments,
|
|
1835
|
+
"embed",
|
|
1836
|
+
requestSize,
|
|
1837
|
+
this.name,
|
|
1838
|
+
model
|
|
1839
|
+
);
|
|
1840
|
+
const responseSize = this.calculateResponseSize(result);
|
|
1841
|
+
recordResponseSizeMetric(
|
|
1842
|
+
this.metricsInstruments,
|
|
1843
|
+
"embed",
|
|
1844
|
+
responseSize,
|
|
1845
|
+
this.name,
|
|
1846
|
+
model
|
|
1847
|
+
);
|
|
1848
|
+
const estimatedCost = this.estimateCostByName(model, result.modelUsage);
|
|
1849
|
+
if (estimatedCost > 0) {
|
|
1850
|
+
recordEstimatedCostMetric(
|
|
1851
|
+
this.metricsInstruments,
|
|
1852
|
+
"embed",
|
|
1853
|
+
estimatedCost,
|
|
1854
|
+
this.name,
|
|
1855
|
+
model
|
|
1856
|
+
);
|
|
1857
|
+
}
|
|
1220
1858
|
}
|
|
1221
1859
|
// Public method to get metrics
|
|
1222
1860
|
getMetrics() {
|
|
@@ -1225,16 +1863,27 @@ var AxBaseAI = class {
|
|
|
1225
1863
|
async chat(req, options) {
|
|
1226
1864
|
const startTime = performance.now();
|
|
1227
1865
|
let isError = false;
|
|
1866
|
+
let result;
|
|
1228
1867
|
try {
|
|
1229
|
-
|
|
1868
|
+
result = await this._chat1(req, options);
|
|
1230
1869
|
return result;
|
|
1231
1870
|
} catch (error) {
|
|
1232
1871
|
isError = true;
|
|
1872
|
+
if (error instanceof Error) {
|
|
1873
|
+
if (error.message.includes("timeout") || error.name === "TimeoutError") {
|
|
1874
|
+
this.recordTimeoutMetric("chat");
|
|
1875
|
+
} else if (error.message.includes("abort") || error.name === "AbortError") {
|
|
1876
|
+
this.recordAbortMetric("chat");
|
|
1877
|
+
}
|
|
1878
|
+
}
|
|
1233
1879
|
throw error;
|
|
1234
1880
|
} finally {
|
|
1235
1881
|
const duration = performance.now() - startTime;
|
|
1236
1882
|
this.updateLatencyMetrics("chat", duration);
|
|
1237
1883
|
this.updateErrorMetrics("chat", isError);
|
|
1884
|
+
if (!isError) {
|
|
1885
|
+
this.recordChatMetrics(req, options, result);
|
|
1886
|
+
}
|
|
1238
1887
|
}
|
|
1239
1888
|
}
|
|
1240
1889
|
async _chat1(req, options) {
|
|
@@ -1381,6 +2030,7 @@ var AxBaseAI = class {
|
|
|
1381
2030
|
}
|
|
1382
2031
|
}
|
|
1383
2032
|
this.modelUsage = res2.modelUsage;
|
|
2033
|
+
this.recordTokenUsage(res2.modelUsage);
|
|
1384
2034
|
if (span?.isRecording()) {
|
|
1385
2035
|
setChatResponseEvents(res2, span, this.excludeContentFromTrace);
|
|
1386
2036
|
}
|
|
@@ -1423,6 +2073,7 @@ var AxBaseAI = class {
|
|
|
1423
2073
|
}
|
|
1424
2074
|
if (res.modelUsage) {
|
|
1425
2075
|
this.modelUsage = res.modelUsage;
|
|
2076
|
+
this.recordTokenUsage(res.modelUsage);
|
|
1426
2077
|
}
|
|
1427
2078
|
if (span?.isRecording()) {
|
|
1428
2079
|
setChatResponseEvents(res, span, this.excludeContentFromTrace);
|
|
@@ -1439,15 +2090,27 @@ var AxBaseAI = class {
|
|
|
1439
2090
|
async embed(req, options) {
|
|
1440
2091
|
const startTime = performance.now();
|
|
1441
2092
|
let isError = false;
|
|
2093
|
+
let result;
|
|
1442
2094
|
try {
|
|
1443
|
-
|
|
2095
|
+
result = await this._embed1(req, options);
|
|
2096
|
+
return result;
|
|
1444
2097
|
} catch (error) {
|
|
1445
2098
|
isError = true;
|
|
2099
|
+
if (error instanceof Error) {
|
|
2100
|
+
if (error.message.includes("timeout") || error.name === "TimeoutError") {
|
|
2101
|
+
this.recordTimeoutMetric("embed");
|
|
2102
|
+
} else if (error.message.includes("abort") || error.name === "AbortError") {
|
|
2103
|
+
this.recordAbortMetric("embed");
|
|
2104
|
+
}
|
|
2105
|
+
}
|
|
1446
2106
|
throw error;
|
|
1447
2107
|
} finally {
|
|
1448
2108
|
const duration = performance.now() - startTime;
|
|
1449
2109
|
this.updateLatencyMetrics("embed", duration);
|
|
1450
2110
|
this.updateErrorMetrics("embed", isError);
|
|
2111
|
+
if (!isError) {
|
|
2112
|
+
this.recordEmbedMetrics(req, result);
|
|
2113
|
+
}
|
|
1451
2114
|
}
|
|
1452
2115
|
}
|
|
1453
2116
|
async _embed1(req, options) {
|
|
@@ -1522,6 +2185,7 @@ var AxBaseAI = class {
|
|
|
1522
2185
|
}
|
|
1523
2186
|
}
|
|
1524
2187
|
this.embedModelUsage = res.modelUsage;
|
|
2188
|
+
this.recordTokenUsage(res.modelUsage);
|
|
1525
2189
|
if (span?.isRecording() && res.modelUsage?.tokens) {
|
|
1526
2190
|
span.addEvent(axSpanEvents.GEN_AI_USAGE, {
|
|
1527
2191
|
[axSpanAttributes.LLM_USAGE_INPUT_TOKENS]: res.modelUsage.tokens.promptTokens,
|
|
@@ -7884,12 +8548,6 @@ function mergeFunctionCalls(functionCalls, functionCallDeltas) {
|
|
|
7884
8548
|
// dsp/sig.ts
|
|
7885
8549
|
import { createHash } from "crypto";
|
|
7886
8550
|
|
|
7887
|
-
// dsp/globals.ts
|
|
7888
|
-
var axGlobals = {
|
|
7889
|
-
signatureStrict: true
|
|
7890
|
-
// Controls reservedNames enforcement in signature parsing/validation
|
|
7891
|
-
};
|
|
7892
|
-
|
|
7893
8551
|
// dsp/parser.ts
|
|
7894
8552
|
var SignatureValidationError = class extends Error {
|
|
7895
8553
|
constructor(message, position, context3, suggestion) {
|