npm - @ax-llm/ax - Versions diffs - 12.0.19 → 12.0.20 - Mend

@ax-llm/ax 12.0.19 → 12.0.20

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

package/index.js CHANGED Viewed

@@ -586,6 +586,16 @@ var apiCall = async (api, json) => {
 import crypto2 from "crypto";
 import { context, SpanKind } from "@opentelemetry/api";
+// dsp/globals.ts
+var axGlobals = {
+  signatureStrict: true,
+  // Controls reservedNames enforcement in signature parsing/validation
+  tracer: void 0,
+  // Global OpenTelemetry tracer for all AI operations
+  meter: void 0
+  // Global OpenTelemetry meter for metrics collection
+};
 // trace/trace.ts
 var axSpanAttributes = {
   // LLM
@@ -1021,6 +1031,291 @@ var logResponseDelta = (delta, logger = defaultLogger) => {
   logger(delta, { tags: ["responseContent"] });
 };
+// ai/metrics.ts
+var createMetricsInstruments = (meter) => {
+  return {
+    latencyHistogram: meter.createHistogram("ax_llm_request_duration_ms", {
+      description: "Duration of LLM requests in milliseconds",
+      unit: "ms"
+    }),
+    errorCounter: meter.createCounter("ax_llm_errors_total", {
+      description: "Total number of LLM request errors"
+    }),
+    requestCounter: meter.createCounter("ax_llm_requests_total", {
+      description: "Total number of LLM requests"
+    }),
+    tokenCounter: meter.createCounter("ax_llm_tokens_total", {
+      description: "Total number of LLM tokens consumed"
+    }),
+    inputTokenCounter: meter.createCounter("ax_llm_input_tokens_total", {
+      description: "Total number of input/prompt tokens consumed"
+    }),
+    outputTokenCounter: meter.createCounter("ax_llm_output_tokens_total", {
+      description: "Total number of output/completion tokens generated"
+    }),
+    errorRateGauge: meter.createGauge("ax_llm_error_rate", {
+      description: "Current error rate as a percentage (0-100)"
+    }),
+    meanLatencyGauge: meter.createGauge("ax_llm_mean_latency_ms", {
+      description: "Mean latency of LLM requests in milliseconds",
+      unit: "ms"
+    }),
+    p95LatencyGauge: meter.createGauge("ax_llm_p95_latency_ms", {
+      description: "95th percentile latency of LLM requests in milliseconds",
+      unit: "ms"
+    }),
+    p99LatencyGauge: meter.createGauge("ax_llm_p99_latency_ms", {
+      description: "99th percentile latency of LLM requests in milliseconds",
+      unit: "ms"
+    }),
+    streamingRequestsCounter: meter.createCounter(
+      "ax_llm_streaming_requests_total",
+      {
+        description: "Total number of streaming LLM requests"
+      }
+    ),
+    functionCallsCounter: meter.createCounter("ax_llm_function_calls_total", {
+      description: "Total number of function/tool calls made"
+    }),
+    functionCallLatencyHistogram: meter.createHistogram(
+      "ax_llm_function_call_latency_ms",
+      {
+        description: "Latency of function calls in milliseconds",
+        unit: "ms"
+      }
+    ),
+    requestSizeHistogram: meter.createHistogram("ax_llm_request_size_bytes", {
+      description: "Size of LLM request payloads in bytes",
+      unit: "By"
+    }),
+    responseSizeHistogram: meter.createHistogram("ax_llm_response_size_bytes", {
+      description: "Size of LLM response payloads in bytes",
+      unit: "By"
+    }),
+    temperatureGauge: meter.createGauge("ax_llm_temperature_gauge", {
+      description: "Temperature setting used for LLM requests"
+    }),
+    maxTokensGauge: meter.createGauge("ax_llm_max_tokens_gauge", {
+      description: "Maximum tokens setting used for LLM requests"
+    }),
+    estimatedCostCounter: meter.createCounter("ax_llm_estimated_cost_total", {
+      description: "Estimated cost of LLM requests in USD",
+      unit: "$"
+    }),
+    promptLengthHistogram: meter.createHistogram("ax_llm_prompt_length_chars", {
+      description: "Length of prompts in characters"
+    }),
+    contextWindowUsageGauge: meter.createGauge(
+      "ax_llm_context_window_usage_ratio",
+      {
+        description: "Context window utilization ratio (0-1)"
+      }
+    ),
+    timeoutsCounter: meter.createCounter("ax_llm_timeouts_total", {
+      description: "Total number of timed out LLM requests"
+    }),
+    abortsCounter: meter.createCounter("ax_llm_aborts_total", {
+      description: "Total number of aborted LLM requests"
+    }),
+    thinkingBudgetUsageCounter: meter.createCounter(
+      "ax_llm_thinking_budget_usage_total",
+      {
+        description: "Total thinking budget tokens used"
+      }
+    ),
+    multimodalRequestsCounter: meter.createCounter(
+      "ax_llm_multimodal_requests_total",
+      {
+        description: "Total number of multimodal requests (with images/audio)"
+      }
+    )
+  };
+};
+var recordLatencyMetric = (instruments, type, duration, aiService, model) => {
+  if (instruments.latencyHistogram) {
+    instruments.latencyHistogram.record(duration, {
+      operation: type,
+      ai_service: aiService,
+      ...model ? { model } : {}
+    });
+  }
+};
+var recordLatencyStatsMetrics = (instruments, type, meanLatency, p95Latency, p99Latency, aiService, model) => {
+  const labels = {
+    operation: type,
+    ai_service: aiService,
+    ...model ? { model } : {}
+  };
+  if (instruments.meanLatencyGauge) {
+    instruments.meanLatencyGauge.record(meanLatency, labels);
+  }
+  if (instruments.p95LatencyGauge) {
+    instruments.p95LatencyGauge.record(p95Latency, labels);
+  }
+  if (instruments.p99LatencyGauge) {
+    instruments.p99LatencyGauge.record(p99Latency, labels);
+  }
+};
+var recordErrorMetric = (instruments, type, aiService, model) => {
+  if (instruments.errorCounter) {
+    instruments.errorCounter.add(1, {
+      operation: type,
+      ai_service: aiService,
+      ...model ? { model } : {}
+    });
+  }
+};
+var recordErrorRateMetric = (instruments, type, errorRate, aiService, model) => {
+  if (instruments.errorRateGauge) {
+    instruments.errorRateGauge.record(errorRate * 100, {
+      // Convert to percentage
+      operation: type,
+      ai_service: aiService,
+      ...model ? { model } : {}
+    });
+  }
+};
+var recordRequestMetric = (instruments, type, aiService, model) => {
+  if (instruments.requestCounter) {
+    instruments.requestCounter.add(1, {
+      operation: type,
+      ai_service: aiService,
+      ...model ? { model } : {}
+    });
+  }
+};
+var recordTokenMetric = (instruments, type, tokens, aiService, model) => {
+  const labels = {
+    ai_service: aiService,
+    ...model ? { model } : {}
+  };
+  if (instruments.tokenCounter) {
+    instruments.tokenCounter.add(tokens, {
+      token_type: type,
+      ...labels
+    });
+  }
+  if (type === "input" && instruments.inputTokenCounter) {
+    instruments.inputTokenCounter.add(tokens, labels);
+  }
+  if (type === "output" && instruments.outputTokenCounter) {
+    instruments.outputTokenCounter.add(tokens, labels);
+  }
+};
+var recordStreamingRequestMetric = (instruments, type, isStreaming, aiService, model) => {
+  if (isStreaming && instruments.streamingRequestsCounter) {
+    instruments.streamingRequestsCounter.add(1, {
+      operation: type,
+      ai_service: aiService,
+      ...model ? { model } : {}
+    });
+  }
+};
+var recordFunctionCallMetric = (instruments, functionName, latency, aiService, model) => {
+  const labels = {
+    function_name: functionName,
+    ...aiService ? { ai_service: aiService } : {},
+    ...model ? { model } : {}
+  };
+  if (instruments.functionCallsCounter) {
+    instruments.functionCallsCounter.add(1, labels);
+  }
+  if (latency && instruments.functionCallLatencyHistogram) {
+    instruments.functionCallLatencyHistogram.record(latency, labels);
+  }
+};
+var recordRequestSizeMetric = (instruments, type, sizeBytes, aiService, model) => {
+  if (instruments.requestSizeHistogram) {
+    instruments.requestSizeHistogram.record(sizeBytes, {
+      operation: type,
+      ai_service: aiService,
+      ...model ? { model } : {}
+    });
+  }
+};
+var recordResponseSizeMetric = (instruments, type, sizeBytes, aiService, model) => {
+  if (instruments.responseSizeHistogram) {
+    instruments.responseSizeHistogram.record(sizeBytes, {
+      operation: type,
+      ai_service: aiService,
+      ...model ? { model } : {}
+    });
+  }
+};
+var recordModelConfigMetrics = (instruments, temperature, maxTokens, aiService, model) => {
+  const labels = {
+    ...aiService ? { ai_service: aiService } : {},
+    ...model ? { model } : {}
+  };
+  if (temperature !== void 0 && instruments.temperatureGauge) {
+    instruments.temperatureGauge.record(temperature, labels);
+  }
+  if (maxTokens !== void 0 && instruments.maxTokensGauge) {
+    instruments.maxTokensGauge.record(maxTokens, labels);
+  }
+};
+var recordEstimatedCostMetric = (instruments, type, costUSD, aiService, model) => {
+  if (instruments.estimatedCostCounter) {
+    instruments.estimatedCostCounter.add(costUSD, {
+      operation: type,
+      ai_service: aiService,
+      ...model ? { model } : {}
+    });
+  }
+};
+var recordPromptLengthMetric = (instruments, lengthChars, aiService, model) => {
+  if (instruments.promptLengthHistogram) {
+    instruments.promptLengthHistogram.record(lengthChars, {
+      ai_service: aiService,
+      ...model ? { model } : {}
+    });
+  }
+};
+var recordContextWindowUsageMetric = (instruments, usageRatio, aiService, model) => {
+  if (instruments.contextWindowUsageGauge) {
+    instruments.contextWindowUsageGauge.record(usageRatio, {
+      ai_service: aiService,
+      ...model ? { model } : {}
+    });
+  }
+};
+var recordTimeoutMetric = (instruments, type, aiService, model) => {
+  if (instruments.timeoutsCounter) {
+    instruments.timeoutsCounter.add(1, {
+      operation: type,
+      ai_service: aiService,
+      ...model ? { model } : {}
+    });
+  }
+};
+var recordAbortMetric = (instruments, type, aiService, model) => {
+  if (instruments.abortsCounter) {
+    instruments.abortsCounter.add(1, {
+      operation: type,
+      ai_service: aiService,
+      ...model ? { model } : {}
+    });
+  }
+};
+var recordThinkingBudgetUsageMetric = (instruments, tokensUsed, aiService, model) => {
+  if (instruments.thinkingBudgetUsageCounter) {
+    instruments.thinkingBudgetUsageCounter.add(tokensUsed, {
+      ai_service: aiService,
+      ...model ? { model } : {}
+    });
+  }
+};
+var recordMultimodalRequestMetric = (instruments, hasImages, hasAudio, aiService, model) => {
+  if ((hasImages || hasAudio) && instruments.multimodalRequestsCounter) {
+    instruments.multimodalRequestsCounter.add(1, {
+      ai_service: aiService,
+      has_images: hasImages.toString(),
+      has_audio: hasAudio.toString(),
+      ...model ? { model } : {}
+    });
+  }
+};
 // ai/base.ts
 var axBaseAIDefaultConfig = () => structuredClone({
   temperature: 0,
@@ -1051,7 +1346,8 @@ var AxBaseAI = class {
     this.apiURL = apiURL;
     this.headers = headers;
     this.supportFor = supportFor;
-    this.tracer = options.tracer;
+    this.tracer = options.tracer ?? axGlobals.tracer;
+    this.meter = options.meter ?? axGlobals.meter;
     this.modelInfo = modelInfo;
     this.models = models;
     this.id = crypto2.randomUUID();
@@ -1062,6 +1358,7 @@ var AxBaseAI = class {
       throw new Error("No model defined");
     }
     this.setOptions(options);
+    this.initializeMetricsInstruments();
     if (models) {
       validateModels(models);
     }
@@ -1070,11 +1367,14 @@ var AxBaseAI = class {
   rt;
   fetch;
   tracer;
+  meter;
   timeout;
   excludeContentFromTrace;
   models;
   abortSignal;
   logger = defaultLogger2;
+  // OpenTelemetry metrics instruments
+  metricsInstruments;
   modelInfo;
   modelUsage;
   embedModelUsage;
@@ -1116,6 +1416,11 @@ var AxBaseAI = class {
       }
     }
   };
+  initializeMetricsInstruments() {
+    if (this.meter) {
+      this.metricsInstruments = createMetricsInstruments(this.meter);
+    }
+  }
   setName(name) {
     this.name = name;
   }
@@ -1133,10 +1438,12 @@ var AxBaseAI = class {
     this.rt = options.rateLimiter;
     this.fetch = options.fetch;
     this.timeout = options.timeout;
-    this.tracer = options.tracer;
+    this.tracer = options.tracer ?? axGlobals.tracer;
+    this.meter = options.meter ?? axGlobals.meter;
     this.excludeContentFromTrace = options.excludeContentFromTrace;
     this.abortSignal = options.abortSignal;
     this.logger = options.logger ?? defaultLogger2;
+    this.initializeMetricsInstruments();
   }
   getOptions() {
     return {
@@ -1144,6 +1451,7 @@ var AxBaseAI = class {
       rateLimiter: this.rt,
       fetch: this.fetch,
       tracer: this.tracer,
+      meter: this.meter,
       timeout: this.timeout,
       excludeContentFromTrace: this.excludeContentFromTrace,
       abortSignal: this.abortSignal,
@@ -1208,6 +1516,25 @@ var AxBaseAI = class {
     metrics.mean = metrics.samples.reduce((a, b) => a + b, 0) / metrics.samples.length;
     metrics.p95 = this.calculatePercentile(metrics.samples, 95);
     metrics.p99 = this.calculatePercentile(metrics.samples, 99);
+    if (this.metricsInstruments) {
+      const model = type === "chat" ? this.lastUsedChatModel : this.lastUsedEmbedModel;
+      recordLatencyMetric(
+        this.metricsInstruments,
+        type,
+        duration,
+        this.name,
+        model
+      );
+      recordLatencyStatsMetrics(
+        this.metricsInstruments,
+        type,
+        metrics.mean,
+        metrics.p95,
+        metrics.p99,
+        this.name,
+        model
+      );
+    }
   }
   // Method to update error metrics
   updateErrorMetrics(type, isError) {
@@ -1217,6 +1544,317 @@ var AxBaseAI = class {
       metrics.count++;
     }
     metrics.rate = metrics.count / metrics.total;
+    if (this.metricsInstruments) {
+      const model = type === "chat" ? this.lastUsedChatModel : this.lastUsedEmbedModel;
+      recordRequestMetric(this.metricsInstruments, type, this.name, model);
+      if (isError) {
+        recordErrorMetric(this.metricsInstruments, type, this.name, model);
+      }
+      recordErrorRateMetric(
+        this.metricsInstruments,
+        type,
+        metrics.rate,
+        this.name,
+        model
+      );
+    }
+  }
+  // Method to record token usage metrics
+  recordTokenUsage(modelUsage) {
+    if (this.metricsInstruments && modelUsage?.tokens) {
+      const { promptTokens, completionTokens, totalTokens, thoughtsTokens } = modelUsage.tokens;
+      if (promptTokens) {
+        recordTokenMetric(
+          this.metricsInstruments,
+          "input",
+          promptTokens,
+          this.name,
+          modelUsage.model
+        );
+      }
+      if (completionTokens) {
+        recordTokenMetric(
+          this.metricsInstruments,
+          "output",
+          completionTokens,
+          this.name,
+          modelUsage.model
+        );
+      }
+      if (totalTokens) {
+        recordTokenMetric(
+          this.metricsInstruments,
+          "total",
+          totalTokens,
+          this.name,
+          modelUsage.model
+        );
+      }
+      if (thoughtsTokens) {
+        recordTokenMetric(
+          this.metricsInstruments,
+          "thoughts",
+          thoughtsTokens,
+          this.name,
+          modelUsage.model
+        );
+      }
+    }
+  }
+  // Helper method to calculate request size in bytes
+  calculateRequestSize(req) {
+    try {
+      return new TextEncoder().encode(JSON.stringify(req)).length;
+    } catch {
+      return 0;
+    }
+  }
+  // Helper method to calculate response size in bytes
+  calculateResponseSize(response) {
+    try {
+      return new TextEncoder().encode(JSON.stringify(response)).length;
+    } catch {
+      return 0;
+    }
+  }
+  // Helper method to detect multimodal content
+  detectMultimodalContent(req) {
+    let hasImages = false;
+    let hasAudio = false;
+    if (req.chatPrompt && Array.isArray(req.chatPrompt)) {
+      for (const message of req.chatPrompt) {
+        if (message.role === "user" && Array.isArray(message.content)) {
+          for (const part of message.content) {
+            if (part.type === "image") {
+              hasImages = true;
+            } else if (part.type === "audio") {
+              hasAudio = true;
+            }
+          }
+        }
+      }
+    }
+    return { hasImages, hasAudio };
+  }
+  // Helper method to calculate prompt length
+  calculatePromptLength(req) {
+    let totalLength = 0;
+    if (req.chatPrompt && Array.isArray(req.chatPrompt)) {
+      for (const message of req.chatPrompt) {
+        if (message.role === "system" || message.role === "assistant") {
+          if (message.content) {
+            totalLength += message.content.length;
+          }
+        } else if (message.role === "user") {
+          if (typeof message.content === "string") {
+            totalLength += message.content.length;
+          } else if (Array.isArray(message.content)) {
+            for (const part of message.content) {
+              if (part.type === "text") {
+                totalLength += part.text.length;
+              }
+            }
+          }
+        } else if (message.role === "function") {
+          if (message.result) {
+            totalLength += message.result.length;
+          }
+        }
+      }
+    }
+    return totalLength;
+  }
+  // Helper method to calculate context window usage
+  calculateContextWindowUsage(model, modelUsage) {
+    if (!modelUsage?.tokens?.promptTokens) return 0;
+    const modelInfo = this.modelInfo.find(
+      (info) => info.name === model
+    );
+    if (!modelInfo?.contextWindow) return 0;
+    return modelUsage.tokens.promptTokens / modelInfo.contextWindow;
+  }
+  // Helper method to estimate cost
+  estimateCost(model, modelUsage) {
+    if (!modelUsage?.tokens) return 0;
+    const modelInfo = this.modelInfo.find(
+      (info) => info.name === model
+    );
+    if (!modelInfo || !modelInfo.promptTokenCostPer1M && !modelInfo.completionTokenCostPer1M)
+      return 0;
+    const { promptTokens = 0, completionTokens = 0 } = modelUsage.tokens;
+    const promptCostPer1M = modelInfo.promptTokenCostPer1M || 0;
+    const completionCostPer1M = modelInfo.completionTokenCostPer1M || 0;
+    return promptTokens * promptCostPer1M / 1e6 + completionTokens * completionCostPer1M / 1e6;
+  }
+  // Helper method to estimate cost by model name
+  estimateCostByName(modelName, modelUsage) {
+    if (!modelUsage?.tokens) return 0;
+    const modelInfo = this.modelInfo.find((info) => info.name === modelName);
+    if (!modelInfo || !modelInfo.promptTokenCostPer1M && !modelInfo.completionTokenCostPer1M)
+      return 0;
+    const { promptTokens = 0, completionTokens = 0 } = modelUsage.tokens;
+    const promptCostPer1M = modelInfo.promptTokenCostPer1M || 0;
+    const completionCostPer1M = modelInfo.completionTokenCostPer1M || 0;
+    return promptTokens * promptCostPer1M / 1e6 + completionTokens * completionCostPer1M / 1e6;
+  }
+  // Helper method to record function call metrics
+  recordFunctionCallMetrics(functionCalls, model) {
+    if (!this.metricsInstruments || !functionCalls) return;
+    for (const call of functionCalls) {
+      if (call && typeof call === "object" && "function" in call && call.function && typeof call.function === "object" && "name" in call.function) {
+        recordFunctionCallMetric(
+          this.metricsInstruments,
+          call.function.name,
+          void 0,
+          // latency would need to be tracked separately
+          this.name,
+          model
+        );
+      }
+    }
+  }
+  // Helper method to record timeout metrics
+  recordTimeoutMetric(type) {
+    if (this.metricsInstruments) {
+      const model = type === "chat" ? this.lastUsedChatModel : this.lastUsedEmbedModel;
+      recordTimeoutMetric(this.metricsInstruments, type, this.name, model);
+    }
+  }
+  // Helper method to record abort metrics
+  recordAbortMetric(type) {
+    if (this.metricsInstruments) {
+      const model = type === "chat" ? this.lastUsedChatModel : this.lastUsedEmbedModel;
+      recordAbortMetric(this.metricsInstruments, type, this.name, model);
+    }
+  }
+  // Comprehensive method to record all chat-related metrics
+  recordChatMetrics(req, options, result) {
+    if (!this.metricsInstruments) return;
+    const model = this.lastUsedChatModel;
+    const modelConfig = this.lastUsedModelConfig;
+    const isStreaming = modelConfig?.stream ?? false;
+    recordStreamingRequestMetric(
+      this.metricsInstruments,
+      "chat",
+      isStreaming,
+      this.name,
+      model
+    );
+    const { hasImages, hasAudio } = this.detectMultimodalContent(req);
+    recordMultimodalRequestMetric(
+      this.metricsInstruments,
+      hasImages,
+      hasAudio,
+      this.name,
+      model
+    );
+    const promptLength = this.calculatePromptLength(req);
+    recordPromptLengthMetric(
+      this.metricsInstruments,
+      promptLength,
+      this.name,
+      model
+    );
+    recordModelConfigMetrics(
+      this.metricsInstruments,
+      modelConfig?.temperature,
+      modelConfig?.maxTokens,
+      this.name,
+      model
+    );
+    if (options?.thinkingTokenBudget && this.modelUsage?.tokens?.thoughtsTokens) {
+      recordThinkingBudgetUsageMetric(
+        this.metricsInstruments,
+        this.modelUsage.tokens.thoughtsTokens,
+        this.name,
+        model
+      );
+    }
+    const requestSize = this.calculateRequestSize(req);
+    recordRequestSizeMetric(
+      this.metricsInstruments,
+      "chat",
+      requestSize,
+      this.name,
+      model
+    );
+    if (result && !isStreaming) {
+      const chatResponse = result;
+      const responseSize = this.calculateResponseSize(chatResponse);
+      recordResponseSizeMetric(
+        this.metricsInstruments,
+        "chat",
+        responseSize,
+        this.name,
+        model
+      );
+      if (chatResponse.results) {
+        for (const chatResult of chatResponse.results) {
+          if (chatResult.functionCalls) {
+            this.recordFunctionCallMetrics(
+              chatResult.functionCalls,
+              this.lastUsedChatModel
+            );
+          }
+        }
+      }
+      const contextUsage = this.calculateContextWindowUsage(
+        this.lastUsedChatModel,
+        chatResponse.modelUsage
+      );
+      if (contextUsage > 0) {
+        recordContextWindowUsageMetric(
+          this.metricsInstruments,
+          contextUsage,
+          this.name,
+          model
+        );
+      }
+      const estimatedCost = this.estimateCost(
+        this.lastUsedChatModel,
+        chatResponse.modelUsage
+      );
+      if (estimatedCost > 0) {
+        recordEstimatedCostMetric(
+          this.metricsInstruments,
+          "chat",
+          estimatedCost,
+          this.name,
+          model
+        );
+      }
+    }
+  }
+  // Comprehensive method to record all embed-related metrics
+  recordEmbedMetrics(req, result) {
+    if (!this.metricsInstruments) return;
+    const model = this.lastUsedEmbedModel;
+    const requestSize = this.calculateRequestSize(req);
+    recordRequestSizeMetric(
+      this.metricsInstruments,
+      "embed",
+      requestSize,
+      this.name,
+      model
+    );
+    const responseSize = this.calculateResponseSize(result);
+    recordResponseSizeMetric(
+      this.metricsInstruments,
+      "embed",
+      responseSize,
+      this.name,
+      model
+    );
+    const estimatedCost = this.estimateCostByName(model, result.modelUsage);
+    if (estimatedCost > 0) {
+      recordEstimatedCostMetric(
+        this.metricsInstruments,
+        "embed",
+        estimatedCost,
+        this.name,
+        model
+      );
+    }
   }
   // Public method to get metrics
   getMetrics() {
@@ -1225,16 +1863,27 @@ var AxBaseAI = class {
   async chat(req, options) {
     const startTime = performance.now();
     let isError = false;
+    let result;
     try {
-      const result = await this._chat1(req, options);
+      result = await this._chat1(req, options);
       return result;
     } catch (error) {
       isError = true;
+      if (error instanceof Error) {
+        if (error.message.includes("timeout") || error.name === "TimeoutError") {
+          this.recordTimeoutMetric("chat");
+        } else if (error.message.includes("abort") || error.name === "AbortError") {
+          this.recordAbortMetric("chat");
+        }
+      }
       throw error;
     } finally {
       const duration = performance.now() - startTime;
       this.updateLatencyMetrics("chat", duration);
       this.updateErrorMetrics("chat", isError);
+      if (!isError) {
+        this.recordChatMetrics(req, options, result);
+      }
     }
   }
   async _chat1(req, options) {
@@ -1381,6 +2030,7 @@ var AxBaseAI = class {
           }
         }
         this.modelUsage = res2.modelUsage;
+        this.recordTokenUsage(res2.modelUsage);
         if (span?.isRecording()) {
           setChatResponseEvents(res2, span, this.excludeContentFromTrace);
         }
@@ -1423,6 +2073,7 @@ var AxBaseAI = class {
     }
     if (res.modelUsage) {
       this.modelUsage = res.modelUsage;
+      this.recordTokenUsage(res.modelUsage);
     }
     if (span?.isRecording()) {
       setChatResponseEvents(res, span, this.excludeContentFromTrace);
@@ -1439,15 +2090,27 @@ var AxBaseAI = class {
   async embed(req, options) {
     const startTime = performance.now();
     let isError = false;
+    let result;
     try {
-      return this._embed1(req, options);
+      result = await this._embed1(req, options);
+      return result;
     } catch (error) {
       isError = true;
+      if (error instanceof Error) {
+        if (error.message.includes("timeout") || error.name === "TimeoutError") {
+          this.recordTimeoutMetric("embed");
+        } else if (error.message.includes("abort") || error.name === "AbortError") {
+          this.recordAbortMetric("embed");
+        }
+      }
       throw error;
     } finally {
       const duration = performance.now() - startTime;
       this.updateLatencyMetrics("embed", duration);
       this.updateErrorMetrics("embed", isError);
+      if (!isError) {
+        this.recordEmbedMetrics(req, result);
+      }
     }
   }
   async _embed1(req, options) {
@@ -1522,6 +2185,7 @@ var AxBaseAI = class {
       }
     }
     this.embedModelUsage = res.modelUsage;
+    this.recordTokenUsage(res.modelUsage);
     if (span?.isRecording() && res.modelUsage?.tokens) {
       span.addEvent(axSpanEvents.GEN_AI_USAGE, {
         [axSpanAttributes.LLM_USAGE_INPUT_TOKENS]: res.modelUsage.tokens.promptTokens,
@@ -7884,12 +8548,6 @@ function mergeFunctionCalls(functionCalls, functionCallDeltas) {
 // dsp/sig.ts
 import { createHash } from "crypto";
-// dsp/globals.ts
-var axGlobals = {
-  signatureStrict: true
-  // Controls reservedNames enforcement in signature parsing/validation
-};
 // dsp/parser.ts
 var SignatureValidationError = class extends Error {
   constructor(message, position, context3, suggestion) {