@ax-llm/ax 12.0.19 → 12.0.20

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/index.cjs CHANGED
@@ -770,6 +770,16 @@ var apiCall = async (api, json) => {
770
770
  var import_crypto2 = __toESM(require("crypto"), 1);
771
771
  var import_api2 = require("@opentelemetry/api");
772
772
 
773
+ // dsp/globals.ts
774
+ var axGlobals = {
775
+ signatureStrict: true,
776
+ // Controls reservedNames enforcement in signature parsing/validation
777
+ tracer: void 0,
778
+ // Global OpenTelemetry tracer for all AI operations
779
+ meter: void 0
780
+ // Global OpenTelemetry meter for metrics collection
781
+ };
782
+
773
783
  // trace/trace.ts
774
784
  var axSpanAttributes = {
775
785
  // LLM
@@ -1203,6 +1213,291 @@ var logResponseDelta = (delta, logger = defaultLogger) => {
1203
1213
  logger(delta, { tags: ["responseContent"] });
1204
1214
  };
1205
1215
 
1216
+ // ai/metrics.ts
1217
+ var createMetricsInstruments = (meter) => {
1218
+ return {
1219
+ latencyHistogram: meter.createHistogram("ax_llm_request_duration_ms", {
1220
+ description: "Duration of LLM requests in milliseconds",
1221
+ unit: "ms"
1222
+ }),
1223
+ errorCounter: meter.createCounter("ax_llm_errors_total", {
1224
+ description: "Total number of LLM request errors"
1225
+ }),
1226
+ requestCounter: meter.createCounter("ax_llm_requests_total", {
1227
+ description: "Total number of LLM requests"
1228
+ }),
1229
+ tokenCounter: meter.createCounter("ax_llm_tokens_total", {
1230
+ description: "Total number of LLM tokens consumed"
1231
+ }),
1232
+ inputTokenCounter: meter.createCounter("ax_llm_input_tokens_total", {
1233
+ description: "Total number of input/prompt tokens consumed"
1234
+ }),
1235
+ outputTokenCounter: meter.createCounter("ax_llm_output_tokens_total", {
1236
+ description: "Total number of output/completion tokens generated"
1237
+ }),
1238
+ errorRateGauge: meter.createGauge("ax_llm_error_rate", {
1239
+ description: "Current error rate as a percentage (0-100)"
1240
+ }),
1241
+ meanLatencyGauge: meter.createGauge("ax_llm_mean_latency_ms", {
1242
+ description: "Mean latency of LLM requests in milliseconds",
1243
+ unit: "ms"
1244
+ }),
1245
+ p95LatencyGauge: meter.createGauge("ax_llm_p95_latency_ms", {
1246
+ description: "95th percentile latency of LLM requests in milliseconds",
1247
+ unit: "ms"
1248
+ }),
1249
+ p99LatencyGauge: meter.createGauge("ax_llm_p99_latency_ms", {
1250
+ description: "99th percentile latency of LLM requests in milliseconds",
1251
+ unit: "ms"
1252
+ }),
1253
+ streamingRequestsCounter: meter.createCounter(
1254
+ "ax_llm_streaming_requests_total",
1255
+ {
1256
+ description: "Total number of streaming LLM requests"
1257
+ }
1258
+ ),
1259
+ functionCallsCounter: meter.createCounter("ax_llm_function_calls_total", {
1260
+ description: "Total number of function/tool calls made"
1261
+ }),
1262
+ functionCallLatencyHistogram: meter.createHistogram(
1263
+ "ax_llm_function_call_latency_ms",
1264
+ {
1265
+ description: "Latency of function calls in milliseconds",
1266
+ unit: "ms"
1267
+ }
1268
+ ),
1269
+ requestSizeHistogram: meter.createHistogram("ax_llm_request_size_bytes", {
1270
+ description: "Size of LLM request payloads in bytes",
1271
+ unit: "By"
1272
+ }),
1273
+ responseSizeHistogram: meter.createHistogram("ax_llm_response_size_bytes", {
1274
+ description: "Size of LLM response payloads in bytes",
1275
+ unit: "By"
1276
+ }),
1277
+ temperatureGauge: meter.createGauge("ax_llm_temperature_gauge", {
1278
+ description: "Temperature setting used for LLM requests"
1279
+ }),
1280
+ maxTokensGauge: meter.createGauge("ax_llm_max_tokens_gauge", {
1281
+ description: "Maximum tokens setting used for LLM requests"
1282
+ }),
1283
+ estimatedCostCounter: meter.createCounter("ax_llm_estimated_cost_total", {
1284
+ description: "Estimated cost of LLM requests in USD",
1285
+ unit: "$"
1286
+ }),
1287
+ promptLengthHistogram: meter.createHistogram("ax_llm_prompt_length_chars", {
1288
+ description: "Length of prompts in characters"
1289
+ }),
1290
+ contextWindowUsageGauge: meter.createGauge(
1291
+ "ax_llm_context_window_usage_ratio",
1292
+ {
1293
+ description: "Context window utilization ratio (0-1)"
1294
+ }
1295
+ ),
1296
+ timeoutsCounter: meter.createCounter("ax_llm_timeouts_total", {
1297
+ description: "Total number of timed out LLM requests"
1298
+ }),
1299
+ abortsCounter: meter.createCounter("ax_llm_aborts_total", {
1300
+ description: "Total number of aborted LLM requests"
1301
+ }),
1302
+ thinkingBudgetUsageCounter: meter.createCounter(
1303
+ "ax_llm_thinking_budget_usage_total",
1304
+ {
1305
+ description: "Total thinking budget tokens used"
1306
+ }
1307
+ ),
1308
+ multimodalRequestsCounter: meter.createCounter(
1309
+ "ax_llm_multimodal_requests_total",
1310
+ {
1311
+ description: "Total number of multimodal requests (with images/audio)"
1312
+ }
1313
+ )
1314
+ };
1315
+ };
1316
+ var recordLatencyMetric = (instruments, type, duration, aiService, model) => {
1317
+ if (instruments.latencyHistogram) {
1318
+ instruments.latencyHistogram.record(duration, {
1319
+ operation: type,
1320
+ ai_service: aiService,
1321
+ ...model ? { model } : {}
1322
+ });
1323
+ }
1324
+ };
1325
+ var recordLatencyStatsMetrics = (instruments, type, meanLatency, p95Latency, p99Latency, aiService, model) => {
1326
+ const labels = {
1327
+ operation: type,
1328
+ ai_service: aiService,
1329
+ ...model ? { model } : {}
1330
+ };
1331
+ if (instruments.meanLatencyGauge) {
1332
+ instruments.meanLatencyGauge.record(meanLatency, labels);
1333
+ }
1334
+ if (instruments.p95LatencyGauge) {
1335
+ instruments.p95LatencyGauge.record(p95Latency, labels);
1336
+ }
1337
+ if (instruments.p99LatencyGauge) {
1338
+ instruments.p99LatencyGauge.record(p99Latency, labels);
1339
+ }
1340
+ };
1341
+ var recordErrorMetric = (instruments, type, aiService, model) => {
1342
+ if (instruments.errorCounter) {
1343
+ instruments.errorCounter.add(1, {
1344
+ operation: type,
1345
+ ai_service: aiService,
1346
+ ...model ? { model } : {}
1347
+ });
1348
+ }
1349
+ };
1350
+ var recordErrorRateMetric = (instruments, type, errorRate, aiService, model) => {
1351
+ if (instruments.errorRateGauge) {
1352
+ instruments.errorRateGauge.record(errorRate * 100, {
1353
+ // Convert to percentage
1354
+ operation: type,
1355
+ ai_service: aiService,
1356
+ ...model ? { model } : {}
1357
+ });
1358
+ }
1359
+ };
1360
+ var recordRequestMetric = (instruments, type, aiService, model) => {
1361
+ if (instruments.requestCounter) {
1362
+ instruments.requestCounter.add(1, {
1363
+ operation: type,
1364
+ ai_service: aiService,
1365
+ ...model ? { model } : {}
1366
+ });
1367
+ }
1368
+ };
1369
+ var recordTokenMetric = (instruments, type, tokens, aiService, model) => {
1370
+ const labels = {
1371
+ ai_service: aiService,
1372
+ ...model ? { model } : {}
1373
+ };
1374
+ if (instruments.tokenCounter) {
1375
+ instruments.tokenCounter.add(tokens, {
1376
+ token_type: type,
1377
+ ...labels
1378
+ });
1379
+ }
1380
+ if (type === "input" && instruments.inputTokenCounter) {
1381
+ instruments.inputTokenCounter.add(tokens, labels);
1382
+ }
1383
+ if (type === "output" && instruments.outputTokenCounter) {
1384
+ instruments.outputTokenCounter.add(tokens, labels);
1385
+ }
1386
+ };
1387
+ var recordStreamingRequestMetric = (instruments, type, isStreaming, aiService, model) => {
1388
+ if (isStreaming && instruments.streamingRequestsCounter) {
1389
+ instruments.streamingRequestsCounter.add(1, {
1390
+ operation: type,
1391
+ ai_service: aiService,
1392
+ ...model ? { model } : {}
1393
+ });
1394
+ }
1395
+ };
1396
+ var recordFunctionCallMetric = (instruments, functionName, latency, aiService, model) => {
1397
+ const labels = {
1398
+ function_name: functionName,
1399
+ ...aiService ? { ai_service: aiService } : {},
1400
+ ...model ? { model } : {}
1401
+ };
1402
+ if (instruments.functionCallsCounter) {
1403
+ instruments.functionCallsCounter.add(1, labels);
1404
+ }
1405
+ if (latency && instruments.functionCallLatencyHistogram) {
1406
+ instruments.functionCallLatencyHistogram.record(latency, labels);
1407
+ }
1408
+ };
1409
+ var recordRequestSizeMetric = (instruments, type, sizeBytes, aiService, model) => {
1410
+ if (instruments.requestSizeHistogram) {
1411
+ instruments.requestSizeHistogram.record(sizeBytes, {
1412
+ operation: type,
1413
+ ai_service: aiService,
1414
+ ...model ? { model } : {}
1415
+ });
1416
+ }
1417
+ };
1418
+ var recordResponseSizeMetric = (instruments, type, sizeBytes, aiService, model) => {
1419
+ if (instruments.responseSizeHistogram) {
1420
+ instruments.responseSizeHistogram.record(sizeBytes, {
1421
+ operation: type,
1422
+ ai_service: aiService,
1423
+ ...model ? { model } : {}
1424
+ });
1425
+ }
1426
+ };
1427
+ var recordModelConfigMetrics = (instruments, temperature, maxTokens, aiService, model) => {
1428
+ const labels = {
1429
+ ...aiService ? { ai_service: aiService } : {},
1430
+ ...model ? { model } : {}
1431
+ };
1432
+ if (temperature !== void 0 && instruments.temperatureGauge) {
1433
+ instruments.temperatureGauge.record(temperature, labels);
1434
+ }
1435
+ if (maxTokens !== void 0 && instruments.maxTokensGauge) {
1436
+ instruments.maxTokensGauge.record(maxTokens, labels);
1437
+ }
1438
+ };
1439
+ var recordEstimatedCostMetric = (instruments, type, costUSD, aiService, model) => {
1440
+ if (instruments.estimatedCostCounter) {
1441
+ instruments.estimatedCostCounter.add(costUSD, {
1442
+ operation: type,
1443
+ ai_service: aiService,
1444
+ ...model ? { model } : {}
1445
+ });
1446
+ }
1447
+ };
1448
+ var recordPromptLengthMetric = (instruments, lengthChars, aiService, model) => {
1449
+ if (instruments.promptLengthHistogram) {
1450
+ instruments.promptLengthHistogram.record(lengthChars, {
1451
+ ai_service: aiService,
1452
+ ...model ? { model } : {}
1453
+ });
1454
+ }
1455
+ };
1456
+ var recordContextWindowUsageMetric = (instruments, usageRatio, aiService, model) => {
1457
+ if (instruments.contextWindowUsageGauge) {
1458
+ instruments.contextWindowUsageGauge.record(usageRatio, {
1459
+ ai_service: aiService,
1460
+ ...model ? { model } : {}
1461
+ });
1462
+ }
1463
+ };
1464
+ var recordTimeoutMetric = (instruments, type, aiService, model) => {
1465
+ if (instruments.timeoutsCounter) {
1466
+ instruments.timeoutsCounter.add(1, {
1467
+ operation: type,
1468
+ ai_service: aiService,
1469
+ ...model ? { model } : {}
1470
+ });
1471
+ }
1472
+ };
1473
+ var recordAbortMetric = (instruments, type, aiService, model) => {
1474
+ if (instruments.abortsCounter) {
1475
+ instruments.abortsCounter.add(1, {
1476
+ operation: type,
1477
+ ai_service: aiService,
1478
+ ...model ? { model } : {}
1479
+ });
1480
+ }
1481
+ };
1482
+ var recordThinkingBudgetUsageMetric = (instruments, tokensUsed, aiService, model) => {
1483
+ if (instruments.thinkingBudgetUsageCounter) {
1484
+ instruments.thinkingBudgetUsageCounter.add(tokensUsed, {
1485
+ ai_service: aiService,
1486
+ ...model ? { model } : {}
1487
+ });
1488
+ }
1489
+ };
1490
+ var recordMultimodalRequestMetric = (instruments, hasImages, hasAudio, aiService, model) => {
1491
+ if ((hasImages || hasAudio) && instruments.multimodalRequestsCounter) {
1492
+ instruments.multimodalRequestsCounter.add(1, {
1493
+ ai_service: aiService,
1494
+ has_images: hasImages.toString(),
1495
+ has_audio: hasAudio.toString(),
1496
+ ...model ? { model } : {}
1497
+ });
1498
+ }
1499
+ };
1500
+
1206
1501
  // ai/base.ts
1207
1502
  var axBaseAIDefaultConfig = () => structuredClone({
1208
1503
  temperature: 0,
@@ -1233,7 +1528,8 @@ var AxBaseAI = class {
1233
1528
  this.apiURL = apiURL;
1234
1529
  this.headers = headers;
1235
1530
  this.supportFor = supportFor;
1236
- this.tracer = options.tracer;
1531
+ this.tracer = options.tracer ?? axGlobals.tracer;
1532
+ this.meter = options.meter ?? axGlobals.meter;
1237
1533
  this.modelInfo = modelInfo;
1238
1534
  this.models = models;
1239
1535
  this.id = import_crypto2.default.randomUUID();
@@ -1244,6 +1540,7 @@ var AxBaseAI = class {
1244
1540
  throw new Error("No model defined");
1245
1541
  }
1246
1542
  this.setOptions(options);
1543
+ this.initializeMetricsInstruments();
1247
1544
  if (models) {
1248
1545
  validateModels(models);
1249
1546
  }
@@ -1252,11 +1549,14 @@ var AxBaseAI = class {
1252
1549
  rt;
1253
1550
  fetch;
1254
1551
  tracer;
1552
+ meter;
1255
1553
  timeout;
1256
1554
  excludeContentFromTrace;
1257
1555
  models;
1258
1556
  abortSignal;
1259
1557
  logger = defaultLogger2;
1558
+ // OpenTelemetry metrics instruments
1559
+ metricsInstruments;
1260
1560
  modelInfo;
1261
1561
  modelUsage;
1262
1562
  embedModelUsage;
@@ -1298,6 +1598,11 @@ var AxBaseAI = class {
1298
1598
  }
1299
1599
  }
1300
1600
  };
1601
+ initializeMetricsInstruments() {
1602
+ if (this.meter) {
1603
+ this.metricsInstruments = createMetricsInstruments(this.meter);
1604
+ }
1605
+ }
1301
1606
  setName(name) {
1302
1607
  this.name = name;
1303
1608
  }
@@ -1315,10 +1620,12 @@ var AxBaseAI = class {
1315
1620
  this.rt = options.rateLimiter;
1316
1621
  this.fetch = options.fetch;
1317
1622
  this.timeout = options.timeout;
1318
- this.tracer = options.tracer;
1623
+ this.tracer = options.tracer ?? axGlobals.tracer;
1624
+ this.meter = options.meter ?? axGlobals.meter;
1319
1625
  this.excludeContentFromTrace = options.excludeContentFromTrace;
1320
1626
  this.abortSignal = options.abortSignal;
1321
1627
  this.logger = options.logger ?? defaultLogger2;
1628
+ this.initializeMetricsInstruments();
1322
1629
  }
1323
1630
  getOptions() {
1324
1631
  return {
@@ -1326,6 +1633,7 @@ var AxBaseAI = class {
1326
1633
  rateLimiter: this.rt,
1327
1634
  fetch: this.fetch,
1328
1635
  tracer: this.tracer,
1636
+ meter: this.meter,
1329
1637
  timeout: this.timeout,
1330
1638
  excludeContentFromTrace: this.excludeContentFromTrace,
1331
1639
  abortSignal: this.abortSignal,
@@ -1390,6 +1698,25 @@ var AxBaseAI = class {
1390
1698
  metrics.mean = metrics.samples.reduce((a, b) => a + b, 0) / metrics.samples.length;
1391
1699
  metrics.p95 = this.calculatePercentile(metrics.samples, 95);
1392
1700
  metrics.p99 = this.calculatePercentile(metrics.samples, 99);
1701
+ if (this.metricsInstruments) {
1702
+ const model = type === "chat" ? this.lastUsedChatModel : this.lastUsedEmbedModel;
1703
+ recordLatencyMetric(
1704
+ this.metricsInstruments,
1705
+ type,
1706
+ duration,
1707
+ this.name,
1708
+ model
1709
+ );
1710
+ recordLatencyStatsMetrics(
1711
+ this.metricsInstruments,
1712
+ type,
1713
+ metrics.mean,
1714
+ metrics.p95,
1715
+ metrics.p99,
1716
+ this.name,
1717
+ model
1718
+ );
1719
+ }
1393
1720
  }
1394
1721
  // Method to update error metrics
1395
1722
  updateErrorMetrics(type, isError) {
@@ -1399,6 +1726,317 @@ var AxBaseAI = class {
1399
1726
  metrics.count++;
1400
1727
  }
1401
1728
  metrics.rate = metrics.count / metrics.total;
1729
+ if (this.metricsInstruments) {
1730
+ const model = type === "chat" ? this.lastUsedChatModel : this.lastUsedEmbedModel;
1731
+ recordRequestMetric(this.metricsInstruments, type, this.name, model);
1732
+ if (isError) {
1733
+ recordErrorMetric(this.metricsInstruments, type, this.name, model);
1734
+ }
1735
+ recordErrorRateMetric(
1736
+ this.metricsInstruments,
1737
+ type,
1738
+ metrics.rate,
1739
+ this.name,
1740
+ model
1741
+ );
1742
+ }
1743
+ }
1744
+ // Method to record token usage metrics
1745
+ recordTokenUsage(modelUsage) {
1746
+ if (this.metricsInstruments && modelUsage?.tokens) {
1747
+ const { promptTokens, completionTokens, totalTokens, thoughtsTokens } = modelUsage.tokens;
1748
+ if (promptTokens) {
1749
+ recordTokenMetric(
1750
+ this.metricsInstruments,
1751
+ "input",
1752
+ promptTokens,
1753
+ this.name,
1754
+ modelUsage.model
1755
+ );
1756
+ }
1757
+ if (completionTokens) {
1758
+ recordTokenMetric(
1759
+ this.metricsInstruments,
1760
+ "output",
1761
+ completionTokens,
1762
+ this.name,
1763
+ modelUsage.model
1764
+ );
1765
+ }
1766
+ if (totalTokens) {
1767
+ recordTokenMetric(
1768
+ this.metricsInstruments,
1769
+ "total",
1770
+ totalTokens,
1771
+ this.name,
1772
+ modelUsage.model
1773
+ );
1774
+ }
1775
+ if (thoughtsTokens) {
1776
+ recordTokenMetric(
1777
+ this.metricsInstruments,
1778
+ "thoughts",
1779
+ thoughtsTokens,
1780
+ this.name,
1781
+ modelUsage.model
1782
+ );
1783
+ }
1784
+ }
1785
+ }
1786
+ // Helper method to calculate request size in bytes
1787
+ calculateRequestSize(req) {
1788
+ try {
1789
+ return new TextEncoder().encode(JSON.stringify(req)).length;
1790
+ } catch {
1791
+ return 0;
1792
+ }
1793
+ }
1794
+ // Helper method to calculate response size in bytes
1795
+ calculateResponseSize(response) {
1796
+ try {
1797
+ return new TextEncoder().encode(JSON.stringify(response)).length;
1798
+ } catch {
1799
+ return 0;
1800
+ }
1801
+ }
1802
+ // Helper method to detect multimodal content
1803
+ detectMultimodalContent(req) {
1804
+ let hasImages = false;
1805
+ let hasAudio = false;
1806
+ if (req.chatPrompt && Array.isArray(req.chatPrompt)) {
1807
+ for (const message of req.chatPrompt) {
1808
+ if (message.role === "user" && Array.isArray(message.content)) {
1809
+ for (const part of message.content) {
1810
+ if (part.type === "image") {
1811
+ hasImages = true;
1812
+ } else if (part.type === "audio") {
1813
+ hasAudio = true;
1814
+ }
1815
+ }
1816
+ }
1817
+ }
1818
+ }
1819
+ return { hasImages, hasAudio };
1820
+ }
1821
+ // Helper method to calculate prompt length
1822
+ calculatePromptLength(req) {
1823
+ let totalLength = 0;
1824
+ if (req.chatPrompt && Array.isArray(req.chatPrompt)) {
1825
+ for (const message of req.chatPrompt) {
1826
+ if (message.role === "system" || message.role === "assistant") {
1827
+ if (message.content) {
1828
+ totalLength += message.content.length;
1829
+ }
1830
+ } else if (message.role === "user") {
1831
+ if (typeof message.content === "string") {
1832
+ totalLength += message.content.length;
1833
+ } else if (Array.isArray(message.content)) {
1834
+ for (const part of message.content) {
1835
+ if (part.type === "text") {
1836
+ totalLength += part.text.length;
1837
+ }
1838
+ }
1839
+ }
1840
+ } else if (message.role === "function") {
1841
+ if (message.result) {
1842
+ totalLength += message.result.length;
1843
+ }
1844
+ }
1845
+ }
1846
+ }
1847
+ return totalLength;
1848
+ }
1849
+ // Helper method to calculate context window usage
1850
+ calculateContextWindowUsage(model, modelUsage) {
1851
+ if (!modelUsage?.tokens?.promptTokens) return 0;
1852
+ const modelInfo = this.modelInfo.find(
1853
+ (info) => info.name === model
1854
+ );
1855
+ if (!modelInfo?.contextWindow) return 0;
1856
+ return modelUsage.tokens.promptTokens / modelInfo.contextWindow;
1857
+ }
1858
+ // Helper method to estimate cost
1859
+ estimateCost(model, modelUsage) {
1860
+ if (!modelUsage?.tokens) return 0;
1861
+ const modelInfo = this.modelInfo.find(
1862
+ (info) => info.name === model
1863
+ );
1864
+ if (!modelInfo || !modelInfo.promptTokenCostPer1M && !modelInfo.completionTokenCostPer1M)
1865
+ return 0;
1866
+ const { promptTokens = 0, completionTokens = 0 } = modelUsage.tokens;
1867
+ const promptCostPer1M = modelInfo.promptTokenCostPer1M || 0;
1868
+ const completionCostPer1M = modelInfo.completionTokenCostPer1M || 0;
1869
+ return promptTokens * promptCostPer1M / 1e6 + completionTokens * completionCostPer1M / 1e6;
1870
+ }
1871
+ // Helper method to estimate cost by model name
1872
+ estimateCostByName(modelName, modelUsage) {
1873
+ if (!modelUsage?.tokens) return 0;
1874
+ const modelInfo = this.modelInfo.find((info) => info.name === modelName);
1875
+ if (!modelInfo || !modelInfo.promptTokenCostPer1M && !modelInfo.completionTokenCostPer1M)
1876
+ return 0;
1877
+ const { promptTokens = 0, completionTokens = 0 } = modelUsage.tokens;
1878
+ const promptCostPer1M = modelInfo.promptTokenCostPer1M || 0;
1879
+ const completionCostPer1M = modelInfo.completionTokenCostPer1M || 0;
1880
+ return promptTokens * promptCostPer1M / 1e6 + completionTokens * completionCostPer1M / 1e6;
1881
+ }
1882
+ // Helper method to record function call metrics
1883
+ recordFunctionCallMetrics(functionCalls, model) {
1884
+ if (!this.metricsInstruments || !functionCalls) return;
1885
+ for (const call of functionCalls) {
1886
+ if (call && typeof call === "object" && "function" in call && call.function && typeof call.function === "object" && "name" in call.function) {
1887
+ recordFunctionCallMetric(
1888
+ this.metricsInstruments,
1889
+ call.function.name,
1890
+ void 0,
1891
+ // latency would need to be tracked separately
1892
+ this.name,
1893
+ model
1894
+ );
1895
+ }
1896
+ }
1897
+ }
1898
+ // Helper method to record timeout metrics
1899
+ recordTimeoutMetric(type) {
1900
+ if (this.metricsInstruments) {
1901
+ const model = type === "chat" ? this.lastUsedChatModel : this.lastUsedEmbedModel;
1902
+ recordTimeoutMetric(this.metricsInstruments, type, this.name, model);
1903
+ }
1904
+ }
1905
+ // Helper method to record abort metrics
1906
+ recordAbortMetric(type) {
1907
+ if (this.metricsInstruments) {
1908
+ const model = type === "chat" ? this.lastUsedChatModel : this.lastUsedEmbedModel;
1909
+ recordAbortMetric(this.metricsInstruments, type, this.name, model);
1910
+ }
1911
+ }
1912
+ // Comprehensive method to record all chat-related metrics
1913
+ recordChatMetrics(req, options, result) {
1914
+ if (!this.metricsInstruments) return;
1915
+ const model = this.lastUsedChatModel;
1916
+ const modelConfig = this.lastUsedModelConfig;
1917
+ const isStreaming = modelConfig?.stream ?? false;
1918
+ recordStreamingRequestMetric(
1919
+ this.metricsInstruments,
1920
+ "chat",
1921
+ isStreaming,
1922
+ this.name,
1923
+ model
1924
+ );
1925
+ const { hasImages, hasAudio } = this.detectMultimodalContent(req);
1926
+ recordMultimodalRequestMetric(
1927
+ this.metricsInstruments,
1928
+ hasImages,
1929
+ hasAudio,
1930
+ this.name,
1931
+ model
1932
+ );
1933
+ const promptLength = this.calculatePromptLength(req);
1934
+ recordPromptLengthMetric(
1935
+ this.metricsInstruments,
1936
+ promptLength,
1937
+ this.name,
1938
+ model
1939
+ );
1940
+ recordModelConfigMetrics(
1941
+ this.metricsInstruments,
1942
+ modelConfig?.temperature,
1943
+ modelConfig?.maxTokens,
1944
+ this.name,
1945
+ model
1946
+ );
1947
+ if (options?.thinkingTokenBudget && this.modelUsage?.tokens?.thoughtsTokens) {
1948
+ recordThinkingBudgetUsageMetric(
1949
+ this.metricsInstruments,
1950
+ this.modelUsage.tokens.thoughtsTokens,
1951
+ this.name,
1952
+ model
1953
+ );
1954
+ }
1955
+ const requestSize = this.calculateRequestSize(req);
1956
+ recordRequestSizeMetric(
1957
+ this.metricsInstruments,
1958
+ "chat",
1959
+ requestSize,
1960
+ this.name,
1961
+ model
1962
+ );
1963
+ if (result && !isStreaming) {
1964
+ const chatResponse = result;
1965
+ const responseSize = this.calculateResponseSize(chatResponse);
1966
+ recordResponseSizeMetric(
1967
+ this.metricsInstruments,
1968
+ "chat",
1969
+ responseSize,
1970
+ this.name,
1971
+ model
1972
+ );
1973
+ if (chatResponse.results) {
1974
+ for (const chatResult of chatResponse.results) {
1975
+ if (chatResult.functionCalls) {
1976
+ this.recordFunctionCallMetrics(
1977
+ chatResult.functionCalls,
1978
+ this.lastUsedChatModel
1979
+ );
1980
+ }
1981
+ }
1982
+ }
1983
+ const contextUsage = this.calculateContextWindowUsage(
1984
+ this.lastUsedChatModel,
1985
+ chatResponse.modelUsage
1986
+ );
1987
+ if (contextUsage > 0) {
1988
+ recordContextWindowUsageMetric(
1989
+ this.metricsInstruments,
1990
+ contextUsage,
1991
+ this.name,
1992
+ model
1993
+ );
1994
+ }
1995
+ const estimatedCost = this.estimateCost(
1996
+ this.lastUsedChatModel,
1997
+ chatResponse.modelUsage
1998
+ );
1999
+ if (estimatedCost > 0) {
2000
+ recordEstimatedCostMetric(
2001
+ this.metricsInstruments,
2002
+ "chat",
2003
+ estimatedCost,
2004
+ this.name,
2005
+ model
2006
+ );
2007
+ }
2008
+ }
2009
+ }
2010
+ // Comprehensive method to record all embed-related metrics
2011
+ recordEmbedMetrics(req, result) {
2012
+ if (!this.metricsInstruments) return;
2013
+ const model = this.lastUsedEmbedModel;
2014
+ const requestSize = this.calculateRequestSize(req);
2015
+ recordRequestSizeMetric(
2016
+ this.metricsInstruments,
2017
+ "embed",
2018
+ requestSize,
2019
+ this.name,
2020
+ model
2021
+ );
2022
+ const responseSize = this.calculateResponseSize(result);
2023
+ recordResponseSizeMetric(
2024
+ this.metricsInstruments,
2025
+ "embed",
2026
+ responseSize,
2027
+ this.name,
2028
+ model
2029
+ );
2030
+ const estimatedCost = this.estimateCostByName(model, result.modelUsage);
2031
+ if (estimatedCost > 0) {
2032
+ recordEstimatedCostMetric(
2033
+ this.metricsInstruments,
2034
+ "embed",
2035
+ estimatedCost,
2036
+ this.name,
2037
+ model
2038
+ );
2039
+ }
1402
2040
  }
1403
2041
  // Public method to get metrics
1404
2042
  getMetrics() {
@@ -1407,16 +2045,27 @@ var AxBaseAI = class {
1407
2045
  async chat(req, options) {
1408
2046
  const startTime = performance.now();
1409
2047
  let isError = false;
2048
+ let result;
1410
2049
  try {
1411
- const result = await this._chat1(req, options);
2050
+ result = await this._chat1(req, options);
1412
2051
  return result;
1413
2052
  } catch (error) {
1414
2053
  isError = true;
2054
+ if (error instanceof Error) {
2055
+ if (error.message.includes("timeout") || error.name === "TimeoutError") {
2056
+ this.recordTimeoutMetric("chat");
2057
+ } else if (error.message.includes("abort") || error.name === "AbortError") {
2058
+ this.recordAbortMetric("chat");
2059
+ }
2060
+ }
1415
2061
  throw error;
1416
2062
  } finally {
1417
2063
  const duration = performance.now() - startTime;
1418
2064
  this.updateLatencyMetrics("chat", duration);
1419
2065
  this.updateErrorMetrics("chat", isError);
2066
+ if (!isError) {
2067
+ this.recordChatMetrics(req, options, result);
2068
+ }
1420
2069
  }
1421
2070
  }
1422
2071
  async _chat1(req, options) {
@@ -1563,6 +2212,7 @@ var AxBaseAI = class {
1563
2212
  }
1564
2213
  }
1565
2214
  this.modelUsage = res2.modelUsage;
2215
+ this.recordTokenUsage(res2.modelUsage);
1566
2216
  if (span?.isRecording()) {
1567
2217
  setChatResponseEvents(res2, span, this.excludeContentFromTrace);
1568
2218
  }
@@ -1605,6 +2255,7 @@ var AxBaseAI = class {
1605
2255
  }
1606
2256
  if (res.modelUsage) {
1607
2257
  this.modelUsage = res.modelUsage;
2258
+ this.recordTokenUsage(res.modelUsage);
1608
2259
  }
1609
2260
  if (span?.isRecording()) {
1610
2261
  setChatResponseEvents(res, span, this.excludeContentFromTrace);
@@ -1621,15 +2272,27 @@ var AxBaseAI = class {
1621
2272
  async embed(req, options) {
1622
2273
  const startTime = performance.now();
1623
2274
  let isError = false;
2275
+ let result;
1624
2276
  try {
1625
- return this._embed1(req, options);
2277
+ result = await this._embed1(req, options);
2278
+ return result;
1626
2279
  } catch (error) {
1627
2280
  isError = true;
2281
+ if (error instanceof Error) {
2282
+ if (error.message.includes("timeout") || error.name === "TimeoutError") {
2283
+ this.recordTimeoutMetric("embed");
2284
+ } else if (error.message.includes("abort") || error.name === "AbortError") {
2285
+ this.recordAbortMetric("embed");
2286
+ }
2287
+ }
1628
2288
  throw error;
1629
2289
  } finally {
1630
2290
  const duration = performance.now() - startTime;
1631
2291
  this.updateLatencyMetrics("embed", duration);
1632
2292
  this.updateErrorMetrics("embed", isError);
2293
+ if (!isError) {
2294
+ this.recordEmbedMetrics(req, result);
2295
+ }
1633
2296
  }
1634
2297
  }
1635
2298
  async _embed1(req, options) {
@@ -1704,6 +2367,7 @@ var AxBaseAI = class {
1704
2367
  }
1705
2368
  }
1706
2369
  this.embedModelUsage = res.modelUsage;
2370
+ this.recordTokenUsage(res.modelUsage);
1707
2371
  if (span?.isRecording() && res.modelUsage?.tokens) {
1708
2372
  span.addEvent(axSpanEvents.GEN_AI_USAGE, {
1709
2373
  [axSpanAttributes.LLM_USAGE_INPUT_TOKENS]: res.modelUsage.tokens.promptTokens,
@@ -8062,12 +8726,6 @@ function mergeFunctionCalls(functionCalls, functionCallDeltas) {
8062
8726
  // dsp/sig.ts
8063
8727
  var import_crypto3 = require("crypto");
8064
8728
 
8065
- // dsp/globals.ts
8066
- var axGlobals = {
8067
- signatureStrict: true
8068
- // Controls reservedNames enforcement in signature parsing/validation
8069
- };
8070
-
8071
8729
  // dsp/parser.ts
8072
8730
  var SignatureValidationError = class extends Error {
8073
8731
  constructor(message, position, context3, suggestion) {