@ax-llm/ax 12.0.19 → 12.0.20
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/index.cjs +668 -10
- package/index.cjs.map +1 -1
- package/index.d.cts +49 -2
- package/index.d.ts +49 -2
- package/index.js +668 -10
- package/index.js.map +1 -1
- package/package.json +1 -1
package/index.cjs
CHANGED
|
@@ -770,6 +770,16 @@ var apiCall = async (api, json) => {
|
|
|
770
770
|
var import_crypto2 = __toESM(require("crypto"), 1);
|
|
771
771
|
var import_api2 = require("@opentelemetry/api");
|
|
772
772
|
|
|
773
|
+
// dsp/globals.ts
|
|
774
|
+
var axGlobals = {
|
|
775
|
+
signatureStrict: true,
|
|
776
|
+
// Controls reservedNames enforcement in signature parsing/validation
|
|
777
|
+
tracer: void 0,
|
|
778
|
+
// Global OpenTelemetry tracer for all AI operations
|
|
779
|
+
meter: void 0
|
|
780
|
+
// Global OpenTelemetry meter for metrics collection
|
|
781
|
+
};
|
|
782
|
+
|
|
773
783
|
// trace/trace.ts
|
|
774
784
|
var axSpanAttributes = {
|
|
775
785
|
// LLM
|
|
@@ -1203,6 +1213,291 @@ var logResponseDelta = (delta, logger = defaultLogger) => {
|
|
|
1203
1213
|
logger(delta, { tags: ["responseContent"] });
|
|
1204
1214
|
};
|
|
1205
1215
|
|
|
1216
|
+
// ai/metrics.ts
|
|
1217
|
+
var createMetricsInstruments = (meter) => {
|
|
1218
|
+
return {
|
|
1219
|
+
latencyHistogram: meter.createHistogram("ax_llm_request_duration_ms", {
|
|
1220
|
+
description: "Duration of LLM requests in milliseconds",
|
|
1221
|
+
unit: "ms"
|
|
1222
|
+
}),
|
|
1223
|
+
errorCounter: meter.createCounter("ax_llm_errors_total", {
|
|
1224
|
+
description: "Total number of LLM request errors"
|
|
1225
|
+
}),
|
|
1226
|
+
requestCounter: meter.createCounter("ax_llm_requests_total", {
|
|
1227
|
+
description: "Total number of LLM requests"
|
|
1228
|
+
}),
|
|
1229
|
+
tokenCounter: meter.createCounter("ax_llm_tokens_total", {
|
|
1230
|
+
description: "Total number of LLM tokens consumed"
|
|
1231
|
+
}),
|
|
1232
|
+
inputTokenCounter: meter.createCounter("ax_llm_input_tokens_total", {
|
|
1233
|
+
description: "Total number of input/prompt tokens consumed"
|
|
1234
|
+
}),
|
|
1235
|
+
outputTokenCounter: meter.createCounter("ax_llm_output_tokens_total", {
|
|
1236
|
+
description: "Total number of output/completion tokens generated"
|
|
1237
|
+
}),
|
|
1238
|
+
errorRateGauge: meter.createGauge("ax_llm_error_rate", {
|
|
1239
|
+
description: "Current error rate as a percentage (0-100)"
|
|
1240
|
+
}),
|
|
1241
|
+
meanLatencyGauge: meter.createGauge("ax_llm_mean_latency_ms", {
|
|
1242
|
+
description: "Mean latency of LLM requests in milliseconds",
|
|
1243
|
+
unit: "ms"
|
|
1244
|
+
}),
|
|
1245
|
+
p95LatencyGauge: meter.createGauge("ax_llm_p95_latency_ms", {
|
|
1246
|
+
description: "95th percentile latency of LLM requests in milliseconds",
|
|
1247
|
+
unit: "ms"
|
|
1248
|
+
}),
|
|
1249
|
+
p99LatencyGauge: meter.createGauge("ax_llm_p99_latency_ms", {
|
|
1250
|
+
description: "99th percentile latency of LLM requests in milliseconds",
|
|
1251
|
+
unit: "ms"
|
|
1252
|
+
}),
|
|
1253
|
+
streamingRequestsCounter: meter.createCounter(
|
|
1254
|
+
"ax_llm_streaming_requests_total",
|
|
1255
|
+
{
|
|
1256
|
+
description: "Total number of streaming LLM requests"
|
|
1257
|
+
}
|
|
1258
|
+
),
|
|
1259
|
+
functionCallsCounter: meter.createCounter("ax_llm_function_calls_total", {
|
|
1260
|
+
description: "Total number of function/tool calls made"
|
|
1261
|
+
}),
|
|
1262
|
+
functionCallLatencyHistogram: meter.createHistogram(
|
|
1263
|
+
"ax_llm_function_call_latency_ms",
|
|
1264
|
+
{
|
|
1265
|
+
description: "Latency of function calls in milliseconds",
|
|
1266
|
+
unit: "ms"
|
|
1267
|
+
}
|
|
1268
|
+
),
|
|
1269
|
+
requestSizeHistogram: meter.createHistogram("ax_llm_request_size_bytes", {
|
|
1270
|
+
description: "Size of LLM request payloads in bytes",
|
|
1271
|
+
unit: "By"
|
|
1272
|
+
}),
|
|
1273
|
+
responseSizeHistogram: meter.createHistogram("ax_llm_response_size_bytes", {
|
|
1274
|
+
description: "Size of LLM response payloads in bytes",
|
|
1275
|
+
unit: "By"
|
|
1276
|
+
}),
|
|
1277
|
+
temperatureGauge: meter.createGauge("ax_llm_temperature_gauge", {
|
|
1278
|
+
description: "Temperature setting used for LLM requests"
|
|
1279
|
+
}),
|
|
1280
|
+
maxTokensGauge: meter.createGauge("ax_llm_max_tokens_gauge", {
|
|
1281
|
+
description: "Maximum tokens setting used for LLM requests"
|
|
1282
|
+
}),
|
|
1283
|
+
estimatedCostCounter: meter.createCounter("ax_llm_estimated_cost_total", {
|
|
1284
|
+
description: "Estimated cost of LLM requests in USD",
|
|
1285
|
+
unit: "$"
|
|
1286
|
+
}),
|
|
1287
|
+
promptLengthHistogram: meter.createHistogram("ax_llm_prompt_length_chars", {
|
|
1288
|
+
description: "Length of prompts in characters"
|
|
1289
|
+
}),
|
|
1290
|
+
contextWindowUsageGauge: meter.createGauge(
|
|
1291
|
+
"ax_llm_context_window_usage_ratio",
|
|
1292
|
+
{
|
|
1293
|
+
description: "Context window utilization ratio (0-1)"
|
|
1294
|
+
}
|
|
1295
|
+
),
|
|
1296
|
+
timeoutsCounter: meter.createCounter("ax_llm_timeouts_total", {
|
|
1297
|
+
description: "Total number of timed out LLM requests"
|
|
1298
|
+
}),
|
|
1299
|
+
abortsCounter: meter.createCounter("ax_llm_aborts_total", {
|
|
1300
|
+
description: "Total number of aborted LLM requests"
|
|
1301
|
+
}),
|
|
1302
|
+
thinkingBudgetUsageCounter: meter.createCounter(
|
|
1303
|
+
"ax_llm_thinking_budget_usage_total",
|
|
1304
|
+
{
|
|
1305
|
+
description: "Total thinking budget tokens used"
|
|
1306
|
+
}
|
|
1307
|
+
),
|
|
1308
|
+
multimodalRequestsCounter: meter.createCounter(
|
|
1309
|
+
"ax_llm_multimodal_requests_total",
|
|
1310
|
+
{
|
|
1311
|
+
description: "Total number of multimodal requests (with images/audio)"
|
|
1312
|
+
}
|
|
1313
|
+
)
|
|
1314
|
+
};
|
|
1315
|
+
};
|
|
1316
|
+
var recordLatencyMetric = (instruments, type, duration, aiService, model) => {
|
|
1317
|
+
if (instruments.latencyHistogram) {
|
|
1318
|
+
instruments.latencyHistogram.record(duration, {
|
|
1319
|
+
operation: type,
|
|
1320
|
+
ai_service: aiService,
|
|
1321
|
+
...model ? { model } : {}
|
|
1322
|
+
});
|
|
1323
|
+
}
|
|
1324
|
+
};
|
|
1325
|
+
var recordLatencyStatsMetrics = (instruments, type, meanLatency, p95Latency, p99Latency, aiService, model) => {
|
|
1326
|
+
const labels = {
|
|
1327
|
+
operation: type,
|
|
1328
|
+
ai_service: aiService,
|
|
1329
|
+
...model ? { model } : {}
|
|
1330
|
+
};
|
|
1331
|
+
if (instruments.meanLatencyGauge) {
|
|
1332
|
+
instruments.meanLatencyGauge.record(meanLatency, labels);
|
|
1333
|
+
}
|
|
1334
|
+
if (instruments.p95LatencyGauge) {
|
|
1335
|
+
instruments.p95LatencyGauge.record(p95Latency, labels);
|
|
1336
|
+
}
|
|
1337
|
+
if (instruments.p99LatencyGauge) {
|
|
1338
|
+
instruments.p99LatencyGauge.record(p99Latency, labels);
|
|
1339
|
+
}
|
|
1340
|
+
};
|
|
1341
|
+
var recordErrorMetric = (instruments, type, aiService, model) => {
|
|
1342
|
+
if (instruments.errorCounter) {
|
|
1343
|
+
instruments.errorCounter.add(1, {
|
|
1344
|
+
operation: type,
|
|
1345
|
+
ai_service: aiService,
|
|
1346
|
+
...model ? { model } : {}
|
|
1347
|
+
});
|
|
1348
|
+
}
|
|
1349
|
+
};
|
|
1350
|
+
var recordErrorRateMetric = (instruments, type, errorRate, aiService, model) => {
|
|
1351
|
+
if (instruments.errorRateGauge) {
|
|
1352
|
+
instruments.errorRateGauge.record(errorRate * 100, {
|
|
1353
|
+
// Convert to percentage
|
|
1354
|
+
operation: type,
|
|
1355
|
+
ai_service: aiService,
|
|
1356
|
+
...model ? { model } : {}
|
|
1357
|
+
});
|
|
1358
|
+
}
|
|
1359
|
+
};
|
|
1360
|
+
var recordRequestMetric = (instruments, type, aiService, model) => {
|
|
1361
|
+
if (instruments.requestCounter) {
|
|
1362
|
+
instruments.requestCounter.add(1, {
|
|
1363
|
+
operation: type,
|
|
1364
|
+
ai_service: aiService,
|
|
1365
|
+
...model ? { model } : {}
|
|
1366
|
+
});
|
|
1367
|
+
}
|
|
1368
|
+
};
|
|
1369
|
+
var recordTokenMetric = (instruments, type, tokens, aiService, model) => {
|
|
1370
|
+
const labels = {
|
|
1371
|
+
ai_service: aiService,
|
|
1372
|
+
...model ? { model } : {}
|
|
1373
|
+
};
|
|
1374
|
+
if (instruments.tokenCounter) {
|
|
1375
|
+
instruments.tokenCounter.add(tokens, {
|
|
1376
|
+
token_type: type,
|
|
1377
|
+
...labels
|
|
1378
|
+
});
|
|
1379
|
+
}
|
|
1380
|
+
if (type === "input" && instruments.inputTokenCounter) {
|
|
1381
|
+
instruments.inputTokenCounter.add(tokens, labels);
|
|
1382
|
+
}
|
|
1383
|
+
if (type === "output" && instruments.outputTokenCounter) {
|
|
1384
|
+
instruments.outputTokenCounter.add(tokens, labels);
|
|
1385
|
+
}
|
|
1386
|
+
};
|
|
1387
|
+
var recordStreamingRequestMetric = (instruments, type, isStreaming, aiService, model) => {
|
|
1388
|
+
if (isStreaming && instruments.streamingRequestsCounter) {
|
|
1389
|
+
instruments.streamingRequestsCounter.add(1, {
|
|
1390
|
+
operation: type,
|
|
1391
|
+
ai_service: aiService,
|
|
1392
|
+
...model ? { model } : {}
|
|
1393
|
+
});
|
|
1394
|
+
}
|
|
1395
|
+
};
|
|
1396
|
+
var recordFunctionCallMetric = (instruments, functionName, latency, aiService, model) => {
|
|
1397
|
+
const labels = {
|
|
1398
|
+
function_name: functionName,
|
|
1399
|
+
...aiService ? { ai_service: aiService } : {},
|
|
1400
|
+
...model ? { model } : {}
|
|
1401
|
+
};
|
|
1402
|
+
if (instruments.functionCallsCounter) {
|
|
1403
|
+
instruments.functionCallsCounter.add(1, labels);
|
|
1404
|
+
}
|
|
1405
|
+
if (latency && instruments.functionCallLatencyHistogram) {
|
|
1406
|
+
instruments.functionCallLatencyHistogram.record(latency, labels);
|
|
1407
|
+
}
|
|
1408
|
+
};
|
|
1409
|
+
var recordRequestSizeMetric = (instruments, type, sizeBytes, aiService, model) => {
|
|
1410
|
+
if (instruments.requestSizeHistogram) {
|
|
1411
|
+
instruments.requestSizeHistogram.record(sizeBytes, {
|
|
1412
|
+
operation: type,
|
|
1413
|
+
ai_service: aiService,
|
|
1414
|
+
...model ? { model } : {}
|
|
1415
|
+
});
|
|
1416
|
+
}
|
|
1417
|
+
};
|
|
1418
|
+
var recordResponseSizeMetric = (instruments, type, sizeBytes, aiService, model) => {
|
|
1419
|
+
if (instruments.responseSizeHistogram) {
|
|
1420
|
+
instruments.responseSizeHistogram.record(sizeBytes, {
|
|
1421
|
+
operation: type,
|
|
1422
|
+
ai_service: aiService,
|
|
1423
|
+
...model ? { model } : {}
|
|
1424
|
+
});
|
|
1425
|
+
}
|
|
1426
|
+
};
|
|
1427
|
+
var recordModelConfigMetrics = (instruments, temperature, maxTokens, aiService, model) => {
|
|
1428
|
+
const labels = {
|
|
1429
|
+
...aiService ? { ai_service: aiService } : {},
|
|
1430
|
+
...model ? { model } : {}
|
|
1431
|
+
};
|
|
1432
|
+
if (temperature !== void 0 && instruments.temperatureGauge) {
|
|
1433
|
+
instruments.temperatureGauge.record(temperature, labels);
|
|
1434
|
+
}
|
|
1435
|
+
if (maxTokens !== void 0 && instruments.maxTokensGauge) {
|
|
1436
|
+
instruments.maxTokensGauge.record(maxTokens, labels);
|
|
1437
|
+
}
|
|
1438
|
+
};
|
|
1439
|
+
var recordEstimatedCostMetric = (instruments, type, costUSD, aiService, model) => {
|
|
1440
|
+
if (instruments.estimatedCostCounter) {
|
|
1441
|
+
instruments.estimatedCostCounter.add(costUSD, {
|
|
1442
|
+
operation: type,
|
|
1443
|
+
ai_service: aiService,
|
|
1444
|
+
...model ? { model } : {}
|
|
1445
|
+
});
|
|
1446
|
+
}
|
|
1447
|
+
};
|
|
1448
|
+
var recordPromptLengthMetric = (instruments, lengthChars, aiService, model) => {
|
|
1449
|
+
if (instruments.promptLengthHistogram) {
|
|
1450
|
+
instruments.promptLengthHistogram.record(lengthChars, {
|
|
1451
|
+
ai_service: aiService,
|
|
1452
|
+
...model ? { model } : {}
|
|
1453
|
+
});
|
|
1454
|
+
}
|
|
1455
|
+
};
|
|
1456
|
+
var recordContextWindowUsageMetric = (instruments, usageRatio, aiService, model) => {
|
|
1457
|
+
if (instruments.contextWindowUsageGauge) {
|
|
1458
|
+
instruments.contextWindowUsageGauge.record(usageRatio, {
|
|
1459
|
+
ai_service: aiService,
|
|
1460
|
+
...model ? { model } : {}
|
|
1461
|
+
});
|
|
1462
|
+
}
|
|
1463
|
+
};
|
|
1464
|
+
var recordTimeoutMetric = (instruments, type, aiService, model) => {
|
|
1465
|
+
if (instruments.timeoutsCounter) {
|
|
1466
|
+
instruments.timeoutsCounter.add(1, {
|
|
1467
|
+
operation: type,
|
|
1468
|
+
ai_service: aiService,
|
|
1469
|
+
...model ? { model } : {}
|
|
1470
|
+
});
|
|
1471
|
+
}
|
|
1472
|
+
};
|
|
1473
|
+
var recordAbortMetric = (instruments, type, aiService, model) => {
|
|
1474
|
+
if (instruments.abortsCounter) {
|
|
1475
|
+
instruments.abortsCounter.add(1, {
|
|
1476
|
+
operation: type,
|
|
1477
|
+
ai_service: aiService,
|
|
1478
|
+
...model ? { model } : {}
|
|
1479
|
+
});
|
|
1480
|
+
}
|
|
1481
|
+
};
|
|
1482
|
+
var recordThinkingBudgetUsageMetric = (instruments, tokensUsed, aiService, model) => {
|
|
1483
|
+
if (instruments.thinkingBudgetUsageCounter) {
|
|
1484
|
+
instruments.thinkingBudgetUsageCounter.add(tokensUsed, {
|
|
1485
|
+
ai_service: aiService,
|
|
1486
|
+
...model ? { model } : {}
|
|
1487
|
+
});
|
|
1488
|
+
}
|
|
1489
|
+
};
|
|
1490
|
+
var recordMultimodalRequestMetric = (instruments, hasImages, hasAudio, aiService, model) => {
|
|
1491
|
+
if ((hasImages || hasAudio) && instruments.multimodalRequestsCounter) {
|
|
1492
|
+
instruments.multimodalRequestsCounter.add(1, {
|
|
1493
|
+
ai_service: aiService,
|
|
1494
|
+
has_images: hasImages.toString(),
|
|
1495
|
+
has_audio: hasAudio.toString(),
|
|
1496
|
+
...model ? { model } : {}
|
|
1497
|
+
});
|
|
1498
|
+
}
|
|
1499
|
+
};
|
|
1500
|
+
|
|
1206
1501
|
// ai/base.ts
|
|
1207
1502
|
var axBaseAIDefaultConfig = () => structuredClone({
|
|
1208
1503
|
temperature: 0,
|
|
@@ -1233,7 +1528,8 @@ var AxBaseAI = class {
|
|
|
1233
1528
|
this.apiURL = apiURL;
|
|
1234
1529
|
this.headers = headers;
|
|
1235
1530
|
this.supportFor = supportFor;
|
|
1236
|
-
this.tracer = options.tracer;
|
|
1531
|
+
this.tracer = options.tracer ?? axGlobals.tracer;
|
|
1532
|
+
this.meter = options.meter ?? axGlobals.meter;
|
|
1237
1533
|
this.modelInfo = modelInfo;
|
|
1238
1534
|
this.models = models;
|
|
1239
1535
|
this.id = import_crypto2.default.randomUUID();
|
|
@@ -1244,6 +1540,7 @@ var AxBaseAI = class {
|
|
|
1244
1540
|
throw new Error("No model defined");
|
|
1245
1541
|
}
|
|
1246
1542
|
this.setOptions(options);
|
|
1543
|
+
this.initializeMetricsInstruments();
|
|
1247
1544
|
if (models) {
|
|
1248
1545
|
validateModels(models);
|
|
1249
1546
|
}
|
|
@@ -1252,11 +1549,14 @@ var AxBaseAI = class {
|
|
|
1252
1549
|
rt;
|
|
1253
1550
|
fetch;
|
|
1254
1551
|
tracer;
|
|
1552
|
+
meter;
|
|
1255
1553
|
timeout;
|
|
1256
1554
|
excludeContentFromTrace;
|
|
1257
1555
|
models;
|
|
1258
1556
|
abortSignal;
|
|
1259
1557
|
logger = defaultLogger2;
|
|
1558
|
+
// OpenTelemetry metrics instruments
|
|
1559
|
+
metricsInstruments;
|
|
1260
1560
|
modelInfo;
|
|
1261
1561
|
modelUsage;
|
|
1262
1562
|
embedModelUsage;
|
|
@@ -1298,6 +1598,11 @@ var AxBaseAI = class {
|
|
|
1298
1598
|
}
|
|
1299
1599
|
}
|
|
1300
1600
|
};
|
|
1601
|
+
initializeMetricsInstruments() {
|
|
1602
|
+
if (this.meter) {
|
|
1603
|
+
this.metricsInstruments = createMetricsInstruments(this.meter);
|
|
1604
|
+
}
|
|
1605
|
+
}
|
|
1301
1606
|
setName(name) {
|
|
1302
1607
|
this.name = name;
|
|
1303
1608
|
}
|
|
@@ -1315,10 +1620,12 @@ var AxBaseAI = class {
|
|
|
1315
1620
|
this.rt = options.rateLimiter;
|
|
1316
1621
|
this.fetch = options.fetch;
|
|
1317
1622
|
this.timeout = options.timeout;
|
|
1318
|
-
this.tracer = options.tracer;
|
|
1623
|
+
this.tracer = options.tracer ?? axGlobals.tracer;
|
|
1624
|
+
this.meter = options.meter ?? axGlobals.meter;
|
|
1319
1625
|
this.excludeContentFromTrace = options.excludeContentFromTrace;
|
|
1320
1626
|
this.abortSignal = options.abortSignal;
|
|
1321
1627
|
this.logger = options.logger ?? defaultLogger2;
|
|
1628
|
+
this.initializeMetricsInstruments();
|
|
1322
1629
|
}
|
|
1323
1630
|
getOptions() {
|
|
1324
1631
|
return {
|
|
@@ -1326,6 +1633,7 @@ var AxBaseAI = class {
|
|
|
1326
1633
|
rateLimiter: this.rt,
|
|
1327
1634
|
fetch: this.fetch,
|
|
1328
1635
|
tracer: this.tracer,
|
|
1636
|
+
meter: this.meter,
|
|
1329
1637
|
timeout: this.timeout,
|
|
1330
1638
|
excludeContentFromTrace: this.excludeContentFromTrace,
|
|
1331
1639
|
abortSignal: this.abortSignal,
|
|
@@ -1390,6 +1698,25 @@ var AxBaseAI = class {
|
|
|
1390
1698
|
metrics.mean = metrics.samples.reduce((a, b) => a + b, 0) / metrics.samples.length;
|
|
1391
1699
|
metrics.p95 = this.calculatePercentile(metrics.samples, 95);
|
|
1392
1700
|
metrics.p99 = this.calculatePercentile(metrics.samples, 99);
|
|
1701
|
+
if (this.metricsInstruments) {
|
|
1702
|
+
const model = type === "chat" ? this.lastUsedChatModel : this.lastUsedEmbedModel;
|
|
1703
|
+
recordLatencyMetric(
|
|
1704
|
+
this.metricsInstruments,
|
|
1705
|
+
type,
|
|
1706
|
+
duration,
|
|
1707
|
+
this.name,
|
|
1708
|
+
model
|
|
1709
|
+
);
|
|
1710
|
+
recordLatencyStatsMetrics(
|
|
1711
|
+
this.metricsInstruments,
|
|
1712
|
+
type,
|
|
1713
|
+
metrics.mean,
|
|
1714
|
+
metrics.p95,
|
|
1715
|
+
metrics.p99,
|
|
1716
|
+
this.name,
|
|
1717
|
+
model
|
|
1718
|
+
);
|
|
1719
|
+
}
|
|
1393
1720
|
}
|
|
1394
1721
|
// Method to update error metrics
|
|
1395
1722
|
updateErrorMetrics(type, isError) {
|
|
@@ -1399,6 +1726,317 @@ var AxBaseAI = class {
|
|
|
1399
1726
|
metrics.count++;
|
|
1400
1727
|
}
|
|
1401
1728
|
metrics.rate = metrics.count / metrics.total;
|
|
1729
|
+
if (this.metricsInstruments) {
|
|
1730
|
+
const model = type === "chat" ? this.lastUsedChatModel : this.lastUsedEmbedModel;
|
|
1731
|
+
recordRequestMetric(this.metricsInstruments, type, this.name, model);
|
|
1732
|
+
if (isError) {
|
|
1733
|
+
recordErrorMetric(this.metricsInstruments, type, this.name, model);
|
|
1734
|
+
}
|
|
1735
|
+
recordErrorRateMetric(
|
|
1736
|
+
this.metricsInstruments,
|
|
1737
|
+
type,
|
|
1738
|
+
metrics.rate,
|
|
1739
|
+
this.name,
|
|
1740
|
+
model
|
|
1741
|
+
);
|
|
1742
|
+
}
|
|
1743
|
+
}
|
|
1744
|
+
// Method to record token usage metrics
|
|
1745
|
+
recordTokenUsage(modelUsage) {
|
|
1746
|
+
if (this.metricsInstruments && modelUsage?.tokens) {
|
|
1747
|
+
const { promptTokens, completionTokens, totalTokens, thoughtsTokens } = modelUsage.tokens;
|
|
1748
|
+
if (promptTokens) {
|
|
1749
|
+
recordTokenMetric(
|
|
1750
|
+
this.metricsInstruments,
|
|
1751
|
+
"input",
|
|
1752
|
+
promptTokens,
|
|
1753
|
+
this.name,
|
|
1754
|
+
modelUsage.model
|
|
1755
|
+
);
|
|
1756
|
+
}
|
|
1757
|
+
if (completionTokens) {
|
|
1758
|
+
recordTokenMetric(
|
|
1759
|
+
this.metricsInstruments,
|
|
1760
|
+
"output",
|
|
1761
|
+
completionTokens,
|
|
1762
|
+
this.name,
|
|
1763
|
+
modelUsage.model
|
|
1764
|
+
);
|
|
1765
|
+
}
|
|
1766
|
+
if (totalTokens) {
|
|
1767
|
+
recordTokenMetric(
|
|
1768
|
+
this.metricsInstruments,
|
|
1769
|
+
"total",
|
|
1770
|
+
totalTokens,
|
|
1771
|
+
this.name,
|
|
1772
|
+
modelUsage.model
|
|
1773
|
+
);
|
|
1774
|
+
}
|
|
1775
|
+
if (thoughtsTokens) {
|
|
1776
|
+
recordTokenMetric(
|
|
1777
|
+
this.metricsInstruments,
|
|
1778
|
+
"thoughts",
|
|
1779
|
+
thoughtsTokens,
|
|
1780
|
+
this.name,
|
|
1781
|
+
modelUsage.model
|
|
1782
|
+
);
|
|
1783
|
+
}
|
|
1784
|
+
}
|
|
1785
|
+
}
|
|
1786
|
+
// Helper method to calculate request size in bytes
|
|
1787
|
+
calculateRequestSize(req) {
|
|
1788
|
+
try {
|
|
1789
|
+
return new TextEncoder().encode(JSON.stringify(req)).length;
|
|
1790
|
+
} catch {
|
|
1791
|
+
return 0;
|
|
1792
|
+
}
|
|
1793
|
+
}
|
|
1794
|
+
// Helper method to calculate response size in bytes
|
|
1795
|
+
calculateResponseSize(response) {
|
|
1796
|
+
try {
|
|
1797
|
+
return new TextEncoder().encode(JSON.stringify(response)).length;
|
|
1798
|
+
} catch {
|
|
1799
|
+
return 0;
|
|
1800
|
+
}
|
|
1801
|
+
}
|
|
1802
|
+
// Helper method to detect multimodal content
|
|
1803
|
+
detectMultimodalContent(req) {
|
|
1804
|
+
let hasImages = false;
|
|
1805
|
+
let hasAudio = false;
|
|
1806
|
+
if (req.chatPrompt && Array.isArray(req.chatPrompt)) {
|
|
1807
|
+
for (const message of req.chatPrompt) {
|
|
1808
|
+
if (message.role === "user" && Array.isArray(message.content)) {
|
|
1809
|
+
for (const part of message.content) {
|
|
1810
|
+
if (part.type === "image") {
|
|
1811
|
+
hasImages = true;
|
|
1812
|
+
} else if (part.type === "audio") {
|
|
1813
|
+
hasAudio = true;
|
|
1814
|
+
}
|
|
1815
|
+
}
|
|
1816
|
+
}
|
|
1817
|
+
}
|
|
1818
|
+
}
|
|
1819
|
+
return { hasImages, hasAudio };
|
|
1820
|
+
}
|
|
1821
|
+
// Helper method to calculate prompt length
|
|
1822
|
+
calculatePromptLength(req) {
|
|
1823
|
+
let totalLength = 0;
|
|
1824
|
+
if (req.chatPrompt && Array.isArray(req.chatPrompt)) {
|
|
1825
|
+
for (const message of req.chatPrompt) {
|
|
1826
|
+
if (message.role === "system" || message.role === "assistant") {
|
|
1827
|
+
if (message.content) {
|
|
1828
|
+
totalLength += message.content.length;
|
|
1829
|
+
}
|
|
1830
|
+
} else if (message.role === "user") {
|
|
1831
|
+
if (typeof message.content === "string") {
|
|
1832
|
+
totalLength += message.content.length;
|
|
1833
|
+
} else if (Array.isArray(message.content)) {
|
|
1834
|
+
for (const part of message.content) {
|
|
1835
|
+
if (part.type === "text") {
|
|
1836
|
+
totalLength += part.text.length;
|
|
1837
|
+
}
|
|
1838
|
+
}
|
|
1839
|
+
}
|
|
1840
|
+
} else if (message.role === "function") {
|
|
1841
|
+
if (message.result) {
|
|
1842
|
+
totalLength += message.result.length;
|
|
1843
|
+
}
|
|
1844
|
+
}
|
|
1845
|
+
}
|
|
1846
|
+
}
|
|
1847
|
+
return totalLength;
|
|
1848
|
+
}
|
|
1849
|
+
// Helper method to calculate context window usage
|
|
1850
|
+
calculateContextWindowUsage(model, modelUsage) {
|
|
1851
|
+
if (!modelUsage?.tokens?.promptTokens) return 0;
|
|
1852
|
+
const modelInfo = this.modelInfo.find(
|
|
1853
|
+
(info) => info.name === model
|
|
1854
|
+
);
|
|
1855
|
+
if (!modelInfo?.contextWindow) return 0;
|
|
1856
|
+
return modelUsage.tokens.promptTokens / modelInfo.contextWindow;
|
|
1857
|
+
}
|
|
1858
|
+
// Helper method to estimate cost
|
|
1859
|
+
estimateCost(model, modelUsage) {
|
|
1860
|
+
if (!modelUsage?.tokens) return 0;
|
|
1861
|
+
const modelInfo = this.modelInfo.find(
|
|
1862
|
+
(info) => info.name === model
|
|
1863
|
+
);
|
|
1864
|
+
if (!modelInfo || !modelInfo.promptTokenCostPer1M && !modelInfo.completionTokenCostPer1M)
|
|
1865
|
+
return 0;
|
|
1866
|
+
const { promptTokens = 0, completionTokens = 0 } = modelUsage.tokens;
|
|
1867
|
+
const promptCostPer1M = modelInfo.promptTokenCostPer1M || 0;
|
|
1868
|
+
const completionCostPer1M = modelInfo.completionTokenCostPer1M || 0;
|
|
1869
|
+
return promptTokens * promptCostPer1M / 1e6 + completionTokens * completionCostPer1M / 1e6;
|
|
1870
|
+
}
|
|
1871
|
+
// Helper method to estimate cost by model name
|
|
1872
|
+
estimateCostByName(modelName, modelUsage) {
|
|
1873
|
+
if (!modelUsage?.tokens) return 0;
|
|
1874
|
+
const modelInfo = this.modelInfo.find((info) => info.name === modelName);
|
|
1875
|
+
if (!modelInfo || !modelInfo.promptTokenCostPer1M && !modelInfo.completionTokenCostPer1M)
|
|
1876
|
+
return 0;
|
|
1877
|
+
const { promptTokens = 0, completionTokens = 0 } = modelUsage.tokens;
|
|
1878
|
+
const promptCostPer1M = modelInfo.promptTokenCostPer1M || 0;
|
|
1879
|
+
const completionCostPer1M = modelInfo.completionTokenCostPer1M || 0;
|
|
1880
|
+
return promptTokens * promptCostPer1M / 1e6 + completionTokens * completionCostPer1M / 1e6;
|
|
1881
|
+
}
|
|
1882
|
+
// Helper method to record function call metrics
|
|
1883
|
+
recordFunctionCallMetrics(functionCalls, model) {
|
|
1884
|
+
if (!this.metricsInstruments || !functionCalls) return;
|
|
1885
|
+
for (const call of functionCalls) {
|
|
1886
|
+
if (call && typeof call === "object" && "function" in call && call.function && typeof call.function === "object" && "name" in call.function) {
|
|
1887
|
+
recordFunctionCallMetric(
|
|
1888
|
+
this.metricsInstruments,
|
|
1889
|
+
call.function.name,
|
|
1890
|
+
void 0,
|
|
1891
|
+
// latency would need to be tracked separately
|
|
1892
|
+
this.name,
|
|
1893
|
+
model
|
|
1894
|
+
);
|
|
1895
|
+
}
|
|
1896
|
+
}
|
|
1897
|
+
}
|
|
1898
|
+
// Helper method to record timeout metrics
|
|
1899
|
+
recordTimeoutMetric(type) {
|
|
1900
|
+
if (this.metricsInstruments) {
|
|
1901
|
+
const model = type === "chat" ? this.lastUsedChatModel : this.lastUsedEmbedModel;
|
|
1902
|
+
recordTimeoutMetric(this.metricsInstruments, type, this.name, model);
|
|
1903
|
+
}
|
|
1904
|
+
}
|
|
1905
|
+
// Helper method to record abort metrics
|
|
1906
|
+
recordAbortMetric(type) {
|
|
1907
|
+
if (this.metricsInstruments) {
|
|
1908
|
+
const model = type === "chat" ? this.lastUsedChatModel : this.lastUsedEmbedModel;
|
|
1909
|
+
recordAbortMetric(this.metricsInstruments, type, this.name, model);
|
|
1910
|
+
}
|
|
1911
|
+
}
|
|
1912
|
+
// Comprehensive method to record all chat-related metrics
|
|
1913
|
+
recordChatMetrics(req, options, result) {
|
|
1914
|
+
if (!this.metricsInstruments) return;
|
|
1915
|
+
const model = this.lastUsedChatModel;
|
|
1916
|
+
const modelConfig = this.lastUsedModelConfig;
|
|
1917
|
+
const isStreaming = modelConfig?.stream ?? false;
|
|
1918
|
+
recordStreamingRequestMetric(
|
|
1919
|
+
this.metricsInstruments,
|
|
1920
|
+
"chat",
|
|
1921
|
+
isStreaming,
|
|
1922
|
+
this.name,
|
|
1923
|
+
model
|
|
1924
|
+
);
|
|
1925
|
+
const { hasImages, hasAudio } = this.detectMultimodalContent(req);
|
|
1926
|
+
recordMultimodalRequestMetric(
|
|
1927
|
+
this.metricsInstruments,
|
|
1928
|
+
hasImages,
|
|
1929
|
+
hasAudio,
|
|
1930
|
+
this.name,
|
|
1931
|
+
model
|
|
1932
|
+
);
|
|
1933
|
+
const promptLength = this.calculatePromptLength(req);
|
|
1934
|
+
recordPromptLengthMetric(
|
|
1935
|
+
this.metricsInstruments,
|
|
1936
|
+
promptLength,
|
|
1937
|
+
this.name,
|
|
1938
|
+
model
|
|
1939
|
+
);
|
|
1940
|
+
recordModelConfigMetrics(
|
|
1941
|
+
this.metricsInstruments,
|
|
1942
|
+
modelConfig?.temperature,
|
|
1943
|
+
modelConfig?.maxTokens,
|
|
1944
|
+
this.name,
|
|
1945
|
+
model
|
|
1946
|
+
);
|
|
1947
|
+
if (options?.thinkingTokenBudget && this.modelUsage?.tokens?.thoughtsTokens) {
|
|
1948
|
+
recordThinkingBudgetUsageMetric(
|
|
1949
|
+
this.metricsInstruments,
|
|
1950
|
+
this.modelUsage.tokens.thoughtsTokens,
|
|
1951
|
+
this.name,
|
|
1952
|
+
model
|
|
1953
|
+
);
|
|
1954
|
+
}
|
|
1955
|
+
const requestSize = this.calculateRequestSize(req);
|
|
1956
|
+
recordRequestSizeMetric(
|
|
1957
|
+
this.metricsInstruments,
|
|
1958
|
+
"chat",
|
|
1959
|
+
requestSize,
|
|
1960
|
+
this.name,
|
|
1961
|
+
model
|
|
1962
|
+
);
|
|
1963
|
+
if (result && !isStreaming) {
|
|
1964
|
+
const chatResponse = result;
|
|
1965
|
+
const responseSize = this.calculateResponseSize(chatResponse);
|
|
1966
|
+
recordResponseSizeMetric(
|
|
1967
|
+
this.metricsInstruments,
|
|
1968
|
+
"chat",
|
|
1969
|
+
responseSize,
|
|
1970
|
+
this.name,
|
|
1971
|
+
model
|
|
1972
|
+
);
|
|
1973
|
+
if (chatResponse.results) {
|
|
1974
|
+
for (const chatResult of chatResponse.results) {
|
|
1975
|
+
if (chatResult.functionCalls) {
|
|
1976
|
+
this.recordFunctionCallMetrics(
|
|
1977
|
+
chatResult.functionCalls,
|
|
1978
|
+
this.lastUsedChatModel
|
|
1979
|
+
);
|
|
1980
|
+
}
|
|
1981
|
+
}
|
|
1982
|
+
}
|
|
1983
|
+
const contextUsage = this.calculateContextWindowUsage(
|
|
1984
|
+
this.lastUsedChatModel,
|
|
1985
|
+
chatResponse.modelUsage
|
|
1986
|
+
);
|
|
1987
|
+
if (contextUsage > 0) {
|
|
1988
|
+
recordContextWindowUsageMetric(
|
|
1989
|
+
this.metricsInstruments,
|
|
1990
|
+
contextUsage,
|
|
1991
|
+
this.name,
|
|
1992
|
+
model
|
|
1993
|
+
);
|
|
1994
|
+
}
|
|
1995
|
+
const estimatedCost = this.estimateCost(
|
|
1996
|
+
this.lastUsedChatModel,
|
|
1997
|
+
chatResponse.modelUsage
|
|
1998
|
+
);
|
|
1999
|
+
if (estimatedCost > 0) {
|
|
2000
|
+
recordEstimatedCostMetric(
|
|
2001
|
+
this.metricsInstruments,
|
|
2002
|
+
"chat",
|
|
2003
|
+
estimatedCost,
|
|
2004
|
+
this.name,
|
|
2005
|
+
model
|
|
2006
|
+
);
|
|
2007
|
+
}
|
|
2008
|
+
}
|
|
2009
|
+
}
|
|
2010
|
+
// Comprehensive method to record all embed-related metrics
|
|
2011
|
+
recordEmbedMetrics(req, result) {
|
|
2012
|
+
if (!this.metricsInstruments) return;
|
|
2013
|
+
const model = this.lastUsedEmbedModel;
|
|
2014
|
+
const requestSize = this.calculateRequestSize(req);
|
|
2015
|
+
recordRequestSizeMetric(
|
|
2016
|
+
this.metricsInstruments,
|
|
2017
|
+
"embed",
|
|
2018
|
+
requestSize,
|
|
2019
|
+
this.name,
|
|
2020
|
+
model
|
|
2021
|
+
);
|
|
2022
|
+
const responseSize = this.calculateResponseSize(result);
|
|
2023
|
+
recordResponseSizeMetric(
|
|
2024
|
+
this.metricsInstruments,
|
|
2025
|
+
"embed",
|
|
2026
|
+
responseSize,
|
|
2027
|
+
this.name,
|
|
2028
|
+
model
|
|
2029
|
+
);
|
|
2030
|
+
const estimatedCost = this.estimateCostByName(model, result.modelUsage);
|
|
2031
|
+
if (estimatedCost > 0) {
|
|
2032
|
+
recordEstimatedCostMetric(
|
|
2033
|
+
this.metricsInstruments,
|
|
2034
|
+
"embed",
|
|
2035
|
+
estimatedCost,
|
|
2036
|
+
this.name,
|
|
2037
|
+
model
|
|
2038
|
+
);
|
|
2039
|
+
}
|
|
1402
2040
|
}
|
|
1403
2041
|
// Public method to get metrics
|
|
1404
2042
|
getMetrics() {
|
|
@@ -1407,16 +2045,27 @@ var AxBaseAI = class {
|
|
|
1407
2045
|
async chat(req, options) {
|
|
1408
2046
|
const startTime = performance.now();
|
|
1409
2047
|
let isError = false;
|
|
2048
|
+
let result;
|
|
1410
2049
|
try {
|
|
1411
|
-
|
|
2050
|
+
result = await this._chat1(req, options);
|
|
1412
2051
|
return result;
|
|
1413
2052
|
} catch (error) {
|
|
1414
2053
|
isError = true;
|
|
2054
|
+
if (error instanceof Error) {
|
|
2055
|
+
if (error.message.includes("timeout") || error.name === "TimeoutError") {
|
|
2056
|
+
this.recordTimeoutMetric("chat");
|
|
2057
|
+
} else if (error.message.includes("abort") || error.name === "AbortError") {
|
|
2058
|
+
this.recordAbortMetric("chat");
|
|
2059
|
+
}
|
|
2060
|
+
}
|
|
1415
2061
|
throw error;
|
|
1416
2062
|
} finally {
|
|
1417
2063
|
const duration = performance.now() - startTime;
|
|
1418
2064
|
this.updateLatencyMetrics("chat", duration);
|
|
1419
2065
|
this.updateErrorMetrics("chat", isError);
|
|
2066
|
+
if (!isError) {
|
|
2067
|
+
this.recordChatMetrics(req, options, result);
|
|
2068
|
+
}
|
|
1420
2069
|
}
|
|
1421
2070
|
}
|
|
1422
2071
|
async _chat1(req, options) {
|
|
@@ -1563,6 +2212,7 @@ var AxBaseAI = class {
|
|
|
1563
2212
|
}
|
|
1564
2213
|
}
|
|
1565
2214
|
this.modelUsage = res2.modelUsage;
|
|
2215
|
+
this.recordTokenUsage(res2.modelUsage);
|
|
1566
2216
|
if (span?.isRecording()) {
|
|
1567
2217
|
setChatResponseEvents(res2, span, this.excludeContentFromTrace);
|
|
1568
2218
|
}
|
|
@@ -1605,6 +2255,7 @@ var AxBaseAI = class {
|
|
|
1605
2255
|
}
|
|
1606
2256
|
if (res.modelUsage) {
|
|
1607
2257
|
this.modelUsage = res.modelUsage;
|
|
2258
|
+
this.recordTokenUsage(res.modelUsage);
|
|
1608
2259
|
}
|
|
1609
2260
|
if (span?.isRecording()) {
|
|
1610
2261
|
setChatResponseEvents(res, span, this.excludeContentFromTrace);
|
|
@@ -1621,15 +2272,27 @@ var AxBaseAI = class {
|
|
|
1621
2272
|
async embed(req, options) {
|
|
1622
2273
|
const startTime = performance.now();
|
|
1623
2274
|
let isError = false;
|
|
2275
|
+
let result;
|
|
1624
2276
|
try {
|
|
1625
|
-
|
|
2277
|
+
result = await this._embed1(req, options);
|
|
2278
|
+
return result;
|
|
1626
2279
|
} catch (error) {
|
|
1627
2280
|
isError = true;
|
|
2281
|
+
if (error instanceof Error) {
|
|
2282
|
+
if (error.message.includes("timeout") || error.name === "TimeoutError") {
|
|
2283
|
+
this.recordTimeoutMetric("embed");
|
|
2284
|
+
} else if (error.message.includes("abort") || error.name === "AbortError") {
|
|
2285
|
+
this.recordAbortMetric("embed");
|
|
2286
|
+
}
|
|
2287
|
+
}
|
|
1628
2288
|
throw error;
|
|
1629
2289
|
} finally {
|
|
1630
2290
|
const duration = performance.now() - startTime;
|
|
1631
2291
|
this.updateLatencyMetrics("embed", duration);
|
|
1632
2292
|
this.updateErrorMetrics("embed", isError);
|
|
2293
|
+
if (!isError) {
|
|
2294
|
+
this.recordEmbedMetrics(req, result);
|
|
2295
|
+
}
|
|
1633
2296
|
}
|
|
1634
2297
|
}
|
|
1635
2298
|
async _embed1(req, options) {
|
|
@@ -1704,6 +2367,7 @@ var AxBaseAI = class {
|
|
|
1704
2367
|
}
|
|
1705
2368
|
}
|
|
1706
2369
|
this.embedModelUsage = res.modelUsage;
|
|
2370
|
+
this.recordTokenUsage(res.modelUsage);
|
|
1707
2371
|
if (span?.isRecording() && res.modelUsage?.tokens) {
|
|
1708
2372
|
span.addEvent(axSpanEvents.GEN_AI_USAGE, {
|
|
1709
2373
|
[axSpanAttributes.LLM_USAGE_INPUT_TOKENS]: res.modelUsage.tokens.promptTokens,
|
|
@@ -8062,12 +8726,6 @@ function mergeFunctionCalls(functionCalls, functionCallDeltas) {
|
|
|
8062
8726
|
// dsp/sig.ts
|
|
8063
8727
|
var import_crypto3 = require("crypto");
|
|
8064
8728
|
|
|
8065
|
-
// dsp/globals.ts
|
|
8066
|
-
var axGlobals = {
|
|
8067
|
-
signatureStrict: true
|
|
8068
|
-
// Controls reservedNames enforcement in signature parsing/validation
|
|
8069
|
-
};
|
|
8070
|
-
|
|
8071
8729
|
// dsp/parser.ts
|
|
8072
8730
|
var SignatureValidationError = class extends Error {
|
|
8073
8731
|
constructor(message, position, context3, suggestion) {
|