npm - langwatch - Versions diffs - 0.10.0 → 0.12.0 - Mend

langwatch 0.10.0 → 0.12.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (75) hide show

package/dist/index.js CHANGED Viewed

@@ -5,27 +5,28 @@
-var _chunkNM5OKM7Fjs = require('./chunk-NM5OKM7F.js');
+var _chunk6SSCBYJMjs = require('./chunk-6SSCBYJM.js');
-var _chunkSNDTNU3Tjs = require('./chunk-SNDTNU3T.js');
+var _chunkASTAIRXGjs = require('./chunk-ASTAIRXG.js');
-var _chunkBTCJWUS5js = require('./chunk-BTCJWUS5.js');
+var _chunkONXIZKC6js = require('./chunk-ONXIZKC6.js');
+var _chunkBQRUUTN3js = require('./chunk-BQRUUTN3.js');
-var _chunkYWO3NE5Ajs = require('./chunk-YWO3NE5A.js');
-var _chunkA43BYF5Qjs = require('./chunk-A43BYF5Q.js');
+var _chunkC4XUWCQRjs = require('./chunk-C4XUWCQR.js');
-var _chunkCKIZDPIJjs = require('./chunk-CKIZDPIJ.js');
+var _chunk5MQQRSVMjs = require('./chunk-5MQQRSVM.js');
 require('./chunk-WAAQLJ67.js');
 require('./chunk-AZHZ4NB4.js');
@@ -36,6 +37,1218 @@ require('./chunk-AZHZ4NB4.js');
 var _chunkOHM7JUMRjs = require('./chunk-OHM7JUMR.js');
+// src/client-sdk/services/datasets/errors.ts
+var DatasetError = class extends Error {
+  constructor(message) {
+    super(message);
+    this.name = "DatasetError";
+  }
+};
+var DatasetNotFoundError = class extends DatasetError {
+  constructor(slugOrId) {
+    super(`Dataset not found: ${slugOrId}`);
+    this.name = "DatasetNotFoundError";
+  }
+};
+var DatasetApiError = class extends DatasetError {
+  constructor(message, status) {
+    super(message);
+    this.name = "DatasetApiError";
+    this.status = status;
+  }
+};
+// src/client-sdk/services/datasets/dataset.service.ts
+var _config;
+var DatasetService = class {
+  constructor(config) {
+    _chunkOHM7JUMRjs.__privateAdd.call(void 0, this, _config);
+    _chunkOHM7JUMRjs.__privateSet.call(void 0, this, _config, config);
+  }
+  /**
+   * Fetches a dataset by its slug or ID
+   *
+   * @param slugOrId - The slug or ID of the dataset
+   * @param options - Optional configuration
+   * @returns The dataset with all entries
+   */
+  async getDataset(slugOrId, _options) {
+    _chunkOHM7JUMRjs.__privateGet.call(void 0, this, _config).logger.debug(`Fetching dataset: ${slugOrId}`);
+    const response = await _chunkOHM7JUMRjs.__privateGet.call(void 0, this, _config).langwatchApiClient.GET(
+      "/api/dataset/{slugOrId}",
+      {
+        params: {
+          path: {
+            slugOrId
+          }
+        }
+      }
+    );
+    if (response.error) {
+      const status = response.response.status;
+      if (status === 404) {
+        throw new DatasetNotFoundError(slugOrId);
+      }
+      const errorMessage = "message" in response.error ? response.error.message : "error" in response.error ? response.error.error : `Failed to fetch dataset: ${slugOrId}`;
+      throw new DatasetApiError(errorMessage != null ? errorMessage : `HTTP ${status}`, status);
+    }
+    const data = response.data;
+    const entries = data.data.map((item) => ({
+      id: item.id,
+      datasetId: item.datasetId,
+      projectId: item.projectId,
+      entry: item.entry,
+      createdAt: item.createdAt,
+      updatedAt: item.updatedAt
+    }));
+    _chunkOHM7JUMRjs.__privateGet.call(void 0, this, _config).logger.debug(
+      `Fetched dataset ${slugOrId} with ${entries.length} entries`
+    );
+    return { entries };
+  }
+};
+_config = new WeakMap();
+// src/client-sdk/services/datasets/datasets.facade.ts
+var _datasetService;
+var DatasetsFacade = class {
+  constructor(config) {
+    _chunkOHM7JUMRjs.__privateAdd.call(void 0, this, _datasetService);
+    /**
+     * Fetches a dataset by its slug or ID
+     *
+     * @param slugOrId - The slug or ID of the dataset to fetch
+     * @param options - Optional configuration
+     * @returns The dataset with all entries
+     *
+     * @example
+     * ```typescript
+     * // Get dataset by slug
+     * const dataset = await langwatch.datasets.get("product-qa");
+     *
+     * // Get dataset by ID
+     * const dataset = await langwatch.datasets.get("ds_abc123");
+     *
+     * // Typed dataset
+     * type MyDatasetEntry = { input: string; expected_output: string; };
+     * const dataset = await langwatch.datasets.get<MyDatasetEntry>("my-dataset");
+     *
+     * // Iterate over entries
+     * for (const entry of dataset.entries) {
+     *   console.log(entry.entry.input);  // typed as string
+     * }
+     * ```
+     */
+    this.get = (slugOrId, options) => {
+      return _chunkOHM7JUMRjs.__privateGet.call(void 0, this, _datasetService).getDataset(slugOrId, options);
+    };
+    _chunkOHM7JUMRjs.__privateSet.call(void 0, this, _datasetService, new DatasetService(config));
+  }
+};
+_datasetService = new WeakMap();
+// src/client-sdk/services/evaluation/evaluation.ts
+var _async_hooks = require('async_hooks');
+var _api = require('@opentelemetry/api');
+// src/client-sdk/services/evaluation/humanReadableId.ts
+var ADJECTIVES = [
+  "swift",
+  "bright",
+  "calm",
+  "eager",
+  "bold",
+  "keen",
+  "warm",
+  "cool",
+  "wise",
+  "fair",
+  "glad",
+  "kind",
+  "neat",
+  "pure",
+  "safe",
+  "true",
+  "vast",
+  "wild",
+  "zesty",
+  "agile",
+  "brave",
+  "crisp",
+  "dense",
+  "epic",
+  "fresh",
+  "grand",
+  "happy",
+  "ideal",
+  "jolly",
+  "lively",
+  "merry",
+  "noble",
+  "proud",
+  "quick",
+  "rapid",
+  "sharp",
+  "smart",
+  "solid",
+  "sunny",
+  "vivid",
+  "gentle",
+  "silent",
+  "cosmic",
+  "golden",
+  "silver",
+  "ancient",
+  "modern",
+  "mighty",
+  "humble"
+];
+var NOUNS = [
+  "fox",
+  "owl",
+  "bee",
+  "elk",
+  "hawk",
+  "lynx",
+  "wolf",
+  "bear",
+  "deer",
+  "dove",
+  "eagle",
+  "finch",
+  "heron",
+  "koala",
+  "lemur",
+  "moose",
+  "otter",
+  "panda",
+  "raven",
+  "robin",
+  "seal",
+  "swan",
+  "tiger",
+  "whale",
+  "zebra",
+  "atlas",
+  "bloom",
+  "cloud",
+  "delta",
+  "ember",
+  "flame",
+  "grove",
+  "haven",
+  "iris",
+  "jade",
+  "leaf",
+  "moon",
+  "nova",
+  "ocean",
+  "peak",
+  "river",
+  "spark",
+  "storm",
+  "tide",
+  "wave",
+  "comet",
+  "prism",
+  "coral"
+];
+var generateHumanReadableId = (separator = "-") => {
+  const adj1Index = Math.floor(Math.random() * ADJECTIVES.length);
+  let adj2Index = Math.floor(Math.random() * ADJECTIVES.length);
+  if (adj2Index === adj1Index) {
+    adj2Index = (adj2Index + 1) % ADJECTIVES.length;
+  }
+  const adjective1 = ADJECTIVES[adj1Index];
+  const adjective2 = ADJECTIVES[adj2Index];
+  const noun = NOUNS[Math.floor(Math.random() * NOUNS.length)];
+  return `${adjective1}${separator}${adjective2}${separator}${noun}`;
+};
+// src/client-sdk/services/evaluation/errors/evaluation.error.ts
+var EvaluationError = class extends Error {
+  constructor(message) {
+    super(message);
+    this.name = "EvaluationError";
+  }
+};
+var EvaluationInitError = class extends EvaluationError {
+  constructor(message, cause) {
+    super(message);
+    this.cause = cause;
+    this.name = "EvaluationInitError";
+  }
+};
+var EvaluationApiError = class extends EvaluationError {
+  constructor(message, statusCode, cause) {
+    super(message);
+    this.statusCode = statusCode;
+    this.cause = cause;
+    this.name = "EvaluationApiError";
+  }
+};
+var TargetMetadataConflictError = class extends EvaluationError {
+  constructor(targetName, existingMetadata, newMetadata) {
+    super(
+      `Target '${targetName}' was previously registered with different metadata.
+Original: ${JSON.stringify(existingMetadata)}
+New: ${JSON.stringify(newMetadata)}
+If you want to use different metadata, please use a different target name.`
+    );
+    this.targetName = targetName;
+    this.existingMetadata = existingMetadata;
+    this.newMetadata = newMetadata;
+    this.name = "TargetMetadataConflictError";
+  }
+};
+var EvaluatorError = class extends EvaluationError {
+  constructor(evaluatorSlug, message, cause) {
+    super(`Evaluator '${evaluatorSlug}' failed: ${message}`);
+    this.evaluatorSlug = evaluatorSlug;
+    this.cause = cause;
+    this.name = "EvaluatorError";
+  }
+};
+// src/client-sdk/services/evaluation/evaluation.ts
+var DEFAULT_CONCURRENCY = 4;
+var DEBOUNCE_INTERVAL_MS = 1e3;
+var iterationContextStorage = new (0, _async_hooks.AsyncLocalStorage)();
+var targetContextStorage = new (0, _async_hooks.AsyncLocalStorage)();
+var Evaluation = class _Evaluation {
+  constructor(name, options) {
+    this.initialized = false;
+    this.total = 0;
+    this.progress = 0;
+    // Batching state
+    this.batch = { dataset: [], evaluations: [], targets: [] };
+    this.lastSentMs = 0;
+    this.pendingFlush = null;
+    this.flushTimeout = null;
+    // Target registry
+    this.targets = /* @__PURE__ */ new Map();
+    // Current iteration context (for log/evaluate calls)
+    this.currentTraceId = null;
+    this.currentIndex = null;
+    // Track whether withTarget() was used in the current iteration
+    // If so, we don't create dataset entries in executeItem()
+    // Note: This is now checked via iterationContextStorage to be thread-safe
+    this.iterationUsedWithTarget = /* @__PURE__ */ new Map();
+    // Track whether withTarget() has EVER been used in this evaluation
+    // Once set to true, we stop creating iteration-level traces
+    this.evaluationUsesTargets = false;
+    var _a, _b;
+    this.name = name;
+    this.experimentSlug = name;
+    this.runId = (_a = options.runId) != null ? _a : generateHumanReadableId();
+    this.apiClient = options.apiClient;
+    this.endpoint = options.endpoint;
+    this.apiKey = options.apiKey;
+    this.logger = options.logger;
+    this.concurrency = (_b = options.concurrency) != null ? _b : DEFAULT_CONCURRENCY;
+    this.createdAtMs = Date.now();
+  }
+  /**
+   * Initialize an evaluation session
+   */
+  static async init(name, options) {
+    const evaluation = new _Evaluation(name, options);
+    await evaluation.initialize();
+    return evaluation;
+  }
+  /**
+   * Initialize the evaluation by creating/getting the experiment
+   */
+  async initialize() {
+    if (!this.apiKey) {
+      throw new EvaluationInitError(
+        "API key is required. Set LANGWATCH_API_KEY or pass apiKey to LangWatch constructor."
+      );
+    }
+    try {
+      const response = await fetch(`${this.endpoint}/api/experiment/init`, {
+        method: "POST",
+        headers: {
+          "Content-Type": "application/json",
+          "X-Auth-Token": this.apiKey
+        },
+        body: JSON.stringify({
+          experiment_name: this.name,
+          experiment_slug: this.experimentSlug,
+          experiment_type: "BATCH_EVALUATION_V2"
+        })
+      });
+      if (response.status === 401) {
+        throw new EvaluationInitError("Invalid API key");
+      }
+      if (!response.ok) {
+        const text = await response.text();
+        throw new EvaluationInitError(`Failed to initialize experiment: ${text}`);
+      }
+      const data = await response.json();
+      this.experimentSlug = data.slug;
+      const encodedRunId = encodeURIComponent(this.runId);
+      console.log(`Follow results at: ${this.endpoint}${data.path}?runId=${encodedRunId}`);
+      this.initialized = true;
+    } catch (error) {
+      if (error instanceof EvaluationInitError) {
+        throw error;
+      }
+      throw new EvaluationInitError(
+        `Failed to initialize evaluation: ${error instanceof Error ? error.message : String(error)}`,
+        error instanceof Error ? error : void 0
+      );
+    }
+  }
+  /**
+   * Run evaluation over a dataset with a callback
+   *
+   * @param dataset - Array of items to evaluate
+   * @param callback - Function called for each item with { item, index, span }
+   * @param options - Concurrency options
+   *
+   * @example
+   * ```typescript
+   * await evaluation.run(dataset, async ({ item, index, span }) => {
+   *   const response = await myAgent(item.question);
+   *   evaluation.log('accuracy', { index, score: 0.95 });
+   * }, { concurrency: 4 });
+   * ```
+   */
+  async run(dataset, callback, options) {
+    var _a;
+    if (!this.initialized) {
+      await this.initialize();
+    }
+    const concurrency = (_a = options == null ? void 0 : options.concurrency) != null ? _a : this.concurrency;
+    this.total = dataset.length;
+    this.progress = 0;
+    const tracer2 = _api.trace.getTracer("langwatch-evaluation");
+    const executing = /* @__PURE__ */ new Set();
+    for (let index = 0; index < dataset.length; index++) {
+      const item = dataset[index];
+      const itemPromise = this.executeItem(tracer2, item, index, callback);
+      executing.add(itemPromise);
+      void itemPromise.finally(() => executing.delete(itemPromise));
+      if (executing.size >= concurrency) {
+        await Promise.race(executing);
+      }
+    }
+    await Promise.all(executing);
+    await this.flush(true);
+  }
+  /**
+   * Execute a single item in the dataset
+   */
+  async executeItem(tracer2, item, index, callback) {
+    var _a;
+    const startTime = Date.now();
+    let error;
+    let capturedTraceId = null;
+    this.iterationUsedWithTarget.set(index, false);
+    const iterationContext = { index, item };
+    if (this.evaluationUsesTargets) {
+      await iterationContextStorage.run(iterationContext, async () => {
+        this.currentIndex = index;
+        try {
+          const span = {
+            setStatus: () => {
+            },
+            recordException: () => {
+            },
+            end: () => {
+            }
+          };
+          const ctx = { item, index, span };
+          const result = callback(ctx);
+          if (result && typeof result.then === "function") {
+            await result;
+          }
+        } catch (err) {
+          error = err instanceof Error ? err : new Error(String(err));
+          this.logger.error(`Evaluation error at index ${index}:`, error);
+        } finally {
+          this.currentIndex = null;
+        }
+      });
+    } else {
+      await iterationContextStorage.run(iterationContext, async () => {
+        await tracer2.startActiveSpan(
+          "evaluation.iteration",
+          {
+            attributes: {
+              "evaluation.run_id": this.runId,
+              "evaluation.index": index
+            }
+          },
+          async (otelSpan) => {
+            const span = _chunkONXIZKC6js.createLangWatchSpan.call(void 0, otelSpan);
+            const spanContext = otelSpan.spanContext();
+            const traceId = spanContext.traceId;
+            this.currentTraceId = traceId;
+            this.currentIndex = index;
+            capturedTraceId = traceId;
+            try {
+              const ctx = { item, index, span };
+              const result = callback(ctx);
+              if (result && typeof result.then === "function") {
+                await result;
+              }
+              span.setStatus({ code: _api.SpanStatusCode.OK });
+            } catch (err) {
+              error = err instanceof Error ? err : new Error(String(err));
+              span.setStatus({
+                code: _api.SpanStatusCode.ERROR,
+                message: error.message
+              });
+              span.recordException(error);
+              this.logger.error(`Evaluation error at index ${index}:`, error);
+            } finally {
+              span.end();
+              this.currentTraceId = null;
+              this.currentIndex = null;
+            }
+          }
+        );
+      });
+    }
+    if (!this.iterationUsedWithTarget.get(index)) {
+      const duration = Date.now() - startTime;
+      const entry = {
+        index,
+        entry: this.serializeItem(item),
+        duration,
+        error: (_a = error == null ? void 0 : error.message) != null ? _a : null,
+        trace_id: capturedTraceId != null ? capturedTraceId : this.getTraceIdFromContext()
+      };
+      this.batch.dataset.push(entry);
+    }
+    this.iterationUsedWithTarget.delete(index);
+    this.progress++;
+    this.scheduleSend();
+  }
+  /**
+   * Log a custom metric result
+   *
+   * @param metric - Name of the metric
+   * @param options - Metric options including index, score, passed, etc.
+   *
+   * If called inside a withTarget() block, the target and index are automatically
+   * inferred from the context and don't need to be specified.
+   *
+   * @example
+   * ```typescript
+   * // Explicit target (outside withTarget)
+   * evaluation.log('accuracy', { index, score: 0.95, target: 'gpt-4' });
+   *
+   * // Implicit target (inside withTarget)
+   * await evaluation.withTarget('gpt-4', { model: 'openai/gpt-4' }, async () => {
+   *   evaluation.log('accuracy', { score: 0.95 }); // target and index auto-inferred
+   * });
+   * ```
+   */
+  log(metric, options) {
+    var _a, _b, _c, _d;
+    const targetContext = targetContextStorage.getStore();
+    const {
+      data = {},
+      score,
+      passed,
+      label,
+      details,
+      status = options.error ? "error" : "processed",
+      duration,
+      cost,
+      error,
+      // Use context values as defaults, allow explicit override
+      target = targetContext == null ? void 0 : targetContext.targetId,
+      metadata,
+      index = (_a = targetContext == null ? void 0 : targetContext.index) != null ? _a : options.index
+    } = options;
+    let targetId;
+    if (target) {
+      targetId = this.registerTarget(target, metadata);
+    }
+    const traceId = (_c = (_b = targetContext == null ? void 0 : targetContext.traceId) != null ? _b : this.currentTraceId) != null ? _c : this.getTraceIdFromContext();
+    const result = {
+      name: metric,
+      evaluator: metric,
+      trace_id: traceId,
+      status,
+      data,
+      score: score != null ? score : null,
+      passed: passed != null ? passed : null,
+      details: details != null ? details : error ? error.message : null,
+      index,
+      label: label != null ? label : null,
+      cost: cost != null ? cost : null,
+      duration: duration != null ? duration : null,
+      error_type: error ? error.name : null,
+      traceback: error ? [(_d = error.stack) != null ? _d : error.message] : null,
+      target_id: targetId != null ? targetId : null
+    };
+    this.batch.evaluations.push(result);
+    this.scheduleSend();
+  }
+  /**
+   * Run a built-in evaluator
+   *
+   * @param evaluatorSlug - The evaluator identifier (e.g., 'ragas/faithfulness')
+   * @param options - Evaluator options including data and settings
+   *
+   * If called inside a withTarget() block, the target and index are automatically
+   * inferred from the context and don't need to be specified.
+   *
+   * @example
+   * ```typescript
+   * // Inside withTarget() - target and index auto-inferred
+   * await evaluation.withTarget('gpt-4', { model: 'openai/gpt-4' }, async () => {
+   *   await evaluation.evaluate('ragas/faithfulness', {
+   *     data: { input, output, contexts },
+   *   });
+   * });
+   *
+   * // Or explicit index/target
+   * await evaluation.evaluate('ragas/faithfulness', {
+   *   index,
+   *   data: { input, output, contexts },
+   *   target: 'gpt-4',
+   * });
+   * ```
+   */
+  async evaluate(evaluatorSlug, options) {
+    var _a, _b, _c, _d, _e, _f, _g, _h, _i;
+    const targetContext = targetContextStorage.getStore();
+    const {
+      data,
+      settings,
+      name,
+      asGuardrail = false,
+      // Use context values as defaults, allow explicit override
+      target = targetContext == null ? void 0 : targetContext.targetId,
+      metadata,
+      index = (_a = targetContext == null ? void 0 : targetContext.index) != null ? _a : options.index
+    } = options;
+    const startTime = Date.now();
+    const traceId = (_c = (_b = targetContext == null ? void 0 : targetContext.traceId) != null ? _b : this.currentTraceId) != null ? _c : this.getTraceIdFromContext();
+    const spanId = (_d = targetContext == null ? void 0 : targetContext.spanId) != null ? _d : this.getSpanIdFromContext();
+    try {
+      const response = await fetch(
+        `${this.endpoint}/api/evaluations/${evaluatorSlug}/evaluate`,
+        {
+          method: "POST",
+          headers: {
+            "Content-Type": "application/json",
+            "X-Auth-Token": this.apiKey
+          },
+          body: JSON.stringify({
+            trace_id: traceId != null ? traceId : null,
+            span_id: spanId != null ? spanId : null,
+            name: name != null ? name : evaluatorSlug,
+            data,
+            settings,
+            as_guardrail: asGuardrail
+          })
+        }
+      );
+      if (!response.ok) {
+        const text = await response.text();
+        throw new EvaluatorError(evaluatorSlug, text);
+      }
+      const result = await response.json();
+      const duration = Date.now() - startTime;
+      this.log(name != null ? name : evaluatorSlug, {
+        index,
+        data,
+        status: result.status,
+        score: (_e = result.score) != null ? _e : void 0,
+        passed: (_f = result.passed) != null ? _f : void 0,
+        details: (_g = result.details) != null ? _g : void 0,
+        label: (_h = result.label) != null ? _h : void 0,
+        duration,
+        cost: (_i = result.cost) == null ? void 0 : _i.amount,
+        target,
+        metadata
+      });
+    } catch (error) {
+      const duration = Date.now() - startTime;
+      if (error instanceof EvaluatorError) {
+        this.log(name != null ? name : evaluatorSlug, {
+          index,
+          data,
+          status: "error",
+          duration,
+          error,
+          target,
+          metadata
+        });
+        throw error;
+      }
+      const wrappedError = new EvaluatorError(
+        evaluatorSlug,
+        error instanceof Error ? error.message : String(error),
+        error instanceof Error ? error : void 0
+      );
+      this.log(name != null ? name : evaluatorSlug, {
+        index,
+        data,
+        status: "error",
+        duration,
+        error: wrappedError,
+        target,
+        metadata
+      });
+      throw wrappedError;
+    }
+  }
+  async withTarget(targetName, metadataOrCallback, maybeCallback) {
+    var _a, _b, _c;
+    const metadata = typeof metadataOrCallback === "function" ? null : metadataOrCallback;
+    const callback = typeof metadataOrCallback === "function" ? metadataOrCallback : maybeCallback;
+    if (!this.evaluationUsesTargets) {
+      this.evaluationUsesTargets = true;
+    }
+    const iterationContext = iterationContextStorage.getStore();
+    const index = (_b = (_a = iterationContext == null ? void 0 : iterationContext.index) != null ? _a : this.currentIndex) != null ? _b : 0;
+    const currentItem = iterationContext == null ? void 0 : iterationContext.item;
+    this.iterationUsedWithTarget.set(index, true);
+    this.registerTarget(targetName, metadata != null ? metadata : void 0);
+    const tracer2 = _api.trace.getTracer("langwatch-evaluation");
+    const startTime = Date.now();
+    let result;
+    let traceId = "";
+    let spanId = "";
+    let callbackError;
+    await tracer2.startActiveSpan(
+      `evaluation.target.${targetName}`,
+      {
+        attributes: {
+          "evaluation.run_id": this.runId,
+          "evaluation.target": targetName,
+          "evaluation.index": index
+        }
+      },
+      _api.ROOT_CONTEXT,
+      async (otelSpan) => {
+        const span = _chunkONXIZKC6js.createLangWatchSpan.call(void 0, otelSpan);
+        const spanContext = otelSpan.spanContext();
+        const rawTraceId = spanContext.traceId;
+        spanId = spanContext.spanId;
+        const isNoOpTrace = rawTraceId === "00000000000000000000000000000000";
+        traceId = isNoOpTrace ? "" : rawTraceId;
+        const executionContext = {
+          targetId: targetName,
+          traceId,
+          spanId,
+          index
+        };
+        try {
+          result = await targetContextStorage.run(executionContext, async () => {
+            const ctx = { span, traceId, spanId };
+            const callbackResult = callback(ctx);
+            if (callbackResult && typeof callbackResult.then === "function") {
+              return await callbackResult;
+            }
+            return callbackResult;
+          });
+          span.setStatus({ code: _api.SpanStatusCode.OK });
+        } catch (err) {
+          callbackError = err instanceof Error ? err : new Error(String(err));
+          span.setStatus({
+            code: _api.SpanStatusCode.ERROR,
+            message: callbackError.message
+          });
+          span.recordException(callbackError);
+          throw err;
+        } finally {
+          span.end();
+        }
+      }
+    );
+    const duration = Date.now() - startTime;
+    let predicted = null;
+    if (result !== void 0 && result !== null) {
+      predicted = typeof result === "object" ? result : { output: result };
+    }
+    const entry = {
+      index,
+      entry: this.serializeItem(currentItem),
+      duration,
+      error: (_c = callbackError == null ? void 0 : callbackError.message) != null ? _c : null,
+      trace_id: traceId || null,
+      // null if no tracer configured (no-op)
+      target_id: targetName,
+      predicted
+    };
+    this.batch.dataset.push(entry);
+    this.scheduleSend();
+    return {
+      result,
+      duration,
+      traceId,
+      spanId
+    };
+  }
+  /**
+   * Register a target for multi-target comparison
+   */
+  registerTarget(name, metadata) {
+    var _a;
+    const existing = this.targets.get(name);
+    if (existing) {
+      if (metadata) {
+        const existingMeta = (_a = existing.metadata) != null ? _a : {};
+        if (JSON.stringify(existingMeta) !== JSON.stringify(metadata)) {
+          throw new TargetMetadataConflictError(name, existingMeta, metadata);
+        }
+      }
+      return name;
+    }
+    const targetInfo = {
+      id: name,
+      name,
+      type: "custom",
+      metadata: metadata != null ? metadata : null
+    };
+    this.targets.set(name, targetInfo);
+    this.batch.targets.push(targetInfo);
+    return name;
+  }
+  /**
+   * Schedule a debounced send
+   */
+  scheduleSend() {
+    var _a;
+    const now = Date.now();
+    if (now - this.lastSentMs >= DEBOUNCE_INTERVAL_MS) {
+      this.sendBatch();
+    } else {
+      (_a = this.flushTimeout) != null ? _a : this.flushTimeout = setTimeout(() => {
+        this.flushTimeout = null;
+        this.sendBatch();
+      }, DEBOUNCE_INTERVAL_MS - (now - this.lastSentMs));
+    }
+  }
+  /**
+   * Send current batch to the API
+   */
+  sendBatch(finished = false) {
+    if (this.batch.dataset.length === 0 && this.batch.evaluations.length === 0 && this.batch.targets.length === 0 && !finished) {
+      return;
+    }
+    const body = {
+      experiment_slug: this.experimentSlug,
+      name: this.name,
+      run_id: this.runId,
+      dataset: this.batch.dataset.map((entry) => {
+        var _a, _b, _c;
+        return {
+          index: entry.index,
+          entry: entry.entry,
+          duration: entry.duration,
+          error: entry.error,
+          trace_id: entry.trace_id,
+          target_id: (_a = entry.target_id) != null ? _a : null,
+          cost: (_b = entry.cost) != null ? _b : null,
+          predicted: (_c = entry.predicted) != null ? _c : null
+        };
+      }),
+      evaluations: this.batch.evaluations.map((e) => ({
+        name: e.name,
+        evaluator: e.evaluator,
+        trace_id: e.trace_id,
+        status: e.status,
+        inputs: e.data,
+        score: e.score,
+        passed: e.passed,
+        details: e.details,
+        index: e.index,
+        label: e.label,
+        cost: e.cost,
+        duration: e.duration,
+        target_id: e.target_id
+      })),
+      targets: this.batch.targets,
+      progress: this.progress,
+      total: this.total,
+      timestamps: {
+        created_at: this.createdAtMs,
+        finished_at: finished ? Date.now() : null
+      }
+    };
+    this.pendingFlush = fetch(`${this.endpoint}/api/evaluations/batch/log_results`, {
+      method: "POST",
+      headers: {
+        "Content-Type": "application/json",
+        Authorization: `Bearer ${this.apiKey}`
+      },
+      body: JSON.stringify(body)
+    }).then((response) => {
+      if (!response.ok) {
+        this.logger.error(`Failed to send batch: ${response.status}`);
+      }
+    }).catch((error) => {
+      this.logger.error("Failed to send batch:", error);
+    });
+    this.batch = { dataset: [], evaluations: [], targets: [] };
+    this.lastSentMs = Date.now();
+  }
+  /**
+   * Flush all pending data
+   */
+  async flush(finished = false) {
+    if (this.flushTimeout) {
+      clearTimeout(this.flushTimeout);
+      this.flushTimeout = null;
+    }
+    this.sendBatch(finished);
+    if (this.pendingFlush) {
+      await this.pendingFlush;
+    }
+  }
+  /**
+   * Serialize a dataset item for the API
+   */
+  serializeItem(item) {
+    if (item === null || item === void 0) {
+      return item;
+    }
+    if (typeof item === "object") {
+      if ("toJSON" in item && typeof item.toJSON === "function") {
+        return item.toJSON();
+      }
+      return item;
+    }
+    return item;
+  }
+  /**
+   * Get trace ID from current OpenTelemetry context
+   */
+  getTraceIdFromContext() {
+    const span = _api.trace.getActiveSpan();
+    if (span) {
+      return span.spanContext().traceId;
+    }
+    return "";
+  }
+  /**
+   * Get span ID from current OpenTelemetry context
+   */
+  getSpanIdFromContext() {
+    const span = _api.trace.getActiveSpan();
+    if (span) {
+      return span.spanContext().spanId;
+    }
+    return null;
+  }
+};
+// src/client-sdk/services/evaluation/platformErrors.ts
+var EvaluationsError = class extends Error {
+  constructor(message) {
+    super(message);
+    this.name = "EvaluationsError";
+  }
+};
+var EvaluationNotFoundError = class extends EvaluationsError {
+  constructor(slug) {
+    super(`Evaluation not found: ${slug}`);
+    this.name = "EvaluationNotFoundError";
+  }
+};
+var EvaluationTimeoutError = class extends EvaluationsError {
+  constructor(runId, progress, total) {
+    super(`Evaluation run timed out: ${runId} (${progress}/${total} completed)`);
+    this.name = "EvaluationTimeoutError";
+    this.runId = runId;
+    this.progress = progress;
+    this.total = total;
+  }
+};
+var EvaluationRunFailedError = class extends EvaluationsError {
+  constructor(runId, errorMessage) {
+    super(`Evaluation run failed: ${errorMessage}`);
+    this.name = "EvaluationRunFailedError";
+    this.runId = runId;
+    this.errorMessage = errorMessage;
+  }
+};
+var EvaluationsApiError = class extends EvaluationsError {
+  constructor(message, statusCode) {
+    super(message);
+    this.name = "EvaluationsApiError";
+    this.statusCode = statusCode;
+  }
+};
+// src/client-sdk/services/evaluation/evaluation.facade.ts
+var DEFAULT_POLL_INTERVAL = 2e3;
+var DEFAULT_TIMEOUT = 6e5;
+var EvaluationFacade = class {
+  constructor(config) {
+    this.config = config;
+  }
+  /**
+   * Initialize a new evaluation session (SDK-defined)
+   *
+   * @param name - Name of the experiment (used as slug)
+   * @param options - Optional configuration
+   * @returns An initialized Evaluation instance
+   *
+   * @example
+   * ```typescript
+   * const evaluation = await langwatch.evaluation.init('my-experiment');
+   *
+   * await evaluation.run(dataset, async ({ item, index }) => {
+   *   const response = await myAgent(item.question);
+   *   evaluation.log('accuracy', { index, score: 0.95 });
+   * });
+   * ```
+   */
+  async init(name, options) {
+    return Evaluation.init(name, _chunkOHM7JUMRjs.__spreadValues.call(void 0, {
+      apiClient: this.config.langwatchApiClient,
+      endpoint: this.config.endpoint,
+      apiKey: this.config.apiKey,
+      logger: this.config.logger
+    }, options));
+  }
+  /**
+   * Run a platform-configured evaluation (Evaluations V3)
+   *
+   * This runs an evaluation that was configured in the LangWatch platform.
+   * The method automatically prints a summary and exits with code 1 on failure
+   * (unless `exitOnFailure: false` is passed).
+   *
+   * @param slug - The slug of the evaluation (found in the evaluation URL)
+   * @param options - Optional configuration
+   * @returns The evaluation results including pass rate and summary
+   *
+   * @example
+   * ```typescript
+   * import { LangWatch } from "langwatch";
+   *
+   * const langwatch = new LangWatch();
+   *
+   * const result = await langwatch.evaluation.run("my-evaluation-slug");
+   * result.printSummary();
+   * ```
+   */
+  async run(slug, options) {
+    this.config.logger.info(`Running platform evaluation: ${slug}`);
+    const result = await this.runWithPolling(slug, options);
+    return result;
+  }
+  /**
+   * Run an evaluation and wait for completion using polling
+   */
+  async runWithPolling(slug, options = {}) {
+    var _a, _b, _c, _d, _e, _f, _g;
+    const pollInterval = (_a = options.pollInterval) != null ? _a : DEFAULT_POLL_INTERVAL;
+    const timeout = (_b = options.timeout) != null ? _b : DEFAULT_TIMEOUT;
+    const startResponse = await this.startRun(slug);
+    const { runId } = startResponse;
+    const apiRunUrl = (_c = startResponse.runUrl) != null ? _c : "";
+    const runUrl = apiRunUrl ? this.replaceUrlDomain(apiRunUrl, this.config.endpoint) : "";
+    console.log(`Started evaluation run: ${runId}`);
+    if (runUrl) {
+      console.log(`Follow live: ${runUrl}`);
+    }
+    const total = startResponse.total;
+    let lastProgress = 0;
+    if (total > 0) {
+      process.stdout.write(`Progress: 0/${total} (0%)`);
+    }
+    (_d = options.onProgress) == null ? void 0 : _d.call(options, 0, total);
+    const startTime = Date.now();
+    while (true) {
+      if (Date.now() - startTime > timeout) {
+        console.log();
+        const finalStatus = await this.getRunStatus(runId);
+        throw new EvaluationTimeoutError(runId, finalStatus.progress, finalStatus.total);
+      }
+      await this.sleep(pollInterval);
+      const status = await this.getRunStatus(runId);
+      const progress = status.progress;
+      if (progress !== lastProgress && status.total > 0) {
+        const percentage = Math.round(progress / status.total * 100);
+        process.stdout.write(`\rProgress: ${progress}/${status.total} (${percentage}%)`);
+        lastProgress = progress;
+      }
+      (_e = options.onProgress) == null ? void 0 : _e.call(options, status.progress, status.total);
+      if (status.status === "completed") {
+        console.log();
+        const summary = status.summary;
+        return this.buildResult(runId, "completed", summary, runUrl != null ? runUrl : "");
+      }
+      if (status.status === "failed") {
+        console.log();
+        throw new EvaluationRunFailedError(runId, (_f = status.error) != null ? _f : "Unknown error");
+      }
+      if (status.status === "stopped") {
+        console.log();
+        return this.buildResult(runId, "stopped", (_g = status.summary) != null ? _g : {
+          runId,
+          totalCells: status.total,
+          completedCells: status.progress,
+          failedCells: 0,
+          duration: Date.now() - startTime
+        }, runUrl != null ? runUrl : "");
+      }
+    }
+  }
+  /**
+   * Start an evaluation run
+   */
+  async startRun(slug) {
+    const response = await this.config.langwatchApiClient.POST(
+      "/api/evaluations/v3/{slug}/run",
+      {
+        params: {
+          path: { slug }
+        }
+      }
+    );
+    if (response.error) {
+      const status = response.response.status;
+      if (status === 404) {
+        throw new EvaluationNotFoundError(slug);
+      }
+      if (status === 401) {
+        throw new EvaluationsApiError("Unauthorized - check your API key", 401);
+      }
+      const errorMessage = "error" in response.error ? response.error.error : `Failed to start evaluation: ${slug}`;
+      throw new EvaluationsApiError(errorMessage != null ? errorMessage : `HTTP ${status}`, status);
+    }
+    return response.data;
+  }
+  /**
+   * Get the status of a run
+   */
+  async getRunStatus(runId) {
+    const response = await this.config.langwatchApiClient.GET(
+      "/api/evaluations/v3/runs/{runId}",
+      {
+        params: {
+          path: { runId }
+        }
+      }
+    );
+    if (response.error) {
+      const status = response.response.status;
+      if (status === 404) {
+        throw new EvaluationsApiError(`Run not found: ${runId}`, 404);
+      }
+      if (status === 401) {
+        throw new EvaluationsApiError("Unauthorized - check your API key", 401);
+      }
+      const errorMessage = "error" in response.error ? response.error.error : `Failed to get run status: ${runId}`;
+      throw new EvaluationsApiError(errorMessage != null ? errorMessage : `HTTP ${status}`, status);
+    }
+    return response.data;
+  }
+  /**
+   * Build the result object from API response
+   */
+  buildResult(runId, status, summary, runUrl) {
+    var _a, _b, _c, _d, _e, _f, _g;
+    const totalCells = (_a = summary.totalCells) != null ? _a : 0;
+    const completedCells = (_b = summary.completedCells) != null ? _b : 0;
+    const failedCells = (_c = summary.failedCells) != null ? _c : 0;
+    const duration = (_d = summary.duration) != null ? _d : 0;
+    const totalPassed = (_e = summary.totalPassed) != null ? _e : completedCells - failedCells;
+    const totalFailed = (_f = summary.totalFailed) != null ? _f : failedCells;
+    const passRate = (_g = summary.passRate) != null ? _g : completedCells > 0 ? totalPassed / completedCells * 100 : 0;
+    return {
+      runId,
+      status,
+      passed: totalPassed,
+      failed: totalFailed,
+      passRate,
+      duration,
+      runUrl,
+      // Always use the endpoint-based URL we constructed
+      summary,
+      printSummary: (exitOnFailure = true) => {
+        var _a2;
+        this.printSummary({
+          runId,
+          status,
+          passed: totalPassed,
+          failed: totalFailed,
+          passRate,
+          duration,
+          runUrl: (_a2 = summary.runUrl) != null ? _a2 : runUrl,
+          summary
+        });
+        if (exitOnFailure && totalFailed > 0) {
+          process.exit(1);
+        }
+      }
+    };
+  }
+  /**
+   * Print a CI-friendly summary of the evaluation results
+   */
+  printSummary(result) {
+    const { runId, status, passed, failed, passRate, duration, runUrl, summary } = result;
+    console.log("\n" + "\u2550".repeat(60));
+    console.log("  EVALUATION RESULTS");
+    console.log("\u2550".repeat(60));
+    console.log(`  Run ID:     ${runId}`);
+    console.log(`  Status:     ${status.toUpperCase()}`);
+    console.log(`  Duration:   ${(duration / 1e3).toFixed(1)}s`);
+    console.log("\u2500".repeat(60));
+    console.log(`  Passed:     ${passed}`);
+    console.log(`  Failed:     ${failed}`);
+    console.log(`  Pass Rate:  ${passRate.toFixed(1)}%`);
+    if (summary.targets && summary.targets.length > 0) {
+      console.log("\u2500".repeat(60));
+      console.log("  TARGETS:");
+      for (const target of summary.targets) {
+        console.log(`    ${target.name}: ${target.passed} passed, ${target.failed} failed`);
+        if (target.avgLatency) {
+          console.log(`      Avg latency: ${target.avgLatency.toFixed(0)}ms`);
+        }
+        if (target.totalCost) {
+          console.log(`      Total cost: $${target.totalCost.toFixed(4)}`);
+        }
+      }
+    }
+    if (summary.evaluators && summary.evaluators.length > 0) {
+      console.log("\u2500".repeat(60));
+      console.log("  EVALUATORS:");
+      for (const evaluator of summary.evaluators) {
+        console.log(
+          `    ${evaluator.name}: ${evaluator.passRate.toFixed(1)}% pass rate`
+        );
+        if (evaluator.avgScore !== void 0) {
+          console.log(`      Avg score: ${evaluator.avgScore.toFixed(2)}`);
+        }
+      }
+    }
+    console.log("\u2500".repeat(60));
+    console.log(`  View details: ${runUrl}`);
+    console.log("\u2550".repeat(60) + "\n");
+  }
+  sleep(ms) {
+    return new Promise((resolve) => setTimeout(resolve, ms));
+  }
+  /**
+   * Replace the domain of a URL with a new base URL, preserving the path
+   */
+  replaceUrlDomain(url, newBase) {
+    if (!url) return url;
+    try {
+      const parsedUrl = new URL(url);
+      const parsedNewBase = new URL(newBase);
+      return `${parsedNewBase.origin}${parsedUrl.pathname}${parsedUrl.search}${parsedUrl.hash}`;
+    } catch (e) {
+      return url;
+    }
+  }
+};
 // src/client-sdk/services/traces/types.ts
 var TracesError = class extends Error {
   constructor(message, operation, originalError) {
@@ -47,13 +1260,13 @@ var TracesError = class extends Error {
 };
 // src/client-sdk/services/traces/tracing/tracer.ts
-var tracer = _chunkA43BYF5Qjs.getLangWatchTracer.call(void 0, `${_chunkYWO3NE5Ajs.LANGWATCH_SDK_NAME_CLIENT}.traces`, _chunkYWO3NE5Ajs.LANGWATCH_SDK_VERSION);
+var tracer = _chunkONXIZKC6js.getLangWatchTracer.call(void 0, `${_chunkC4XUWCQRjs.LANGWATCH_SDK_NAME_CLIENT}.traces`, _chunkC4XUWCQRjs.LANGWATCH_SDK_VERSION);
 // src/client-sdk/services/traces/service.ts
 var TracesService = class {
   constructor(config) {
     this.config = config;
-    return _chunkNM5OKM7Fjs.createTracingProxy.call(void 0,
+    return _chunk6SSCBYJMjs.createTracingProxy.call(void 0,
       this,
       tracer
     );
@@ -116,17 +1329,27 @@ var LangWatch = class {
     _chunkOHM7JUMRjs.__privateAdd.call(void 0, this, _LangWatch_instances);
     var _a, _b, _c, _d;
     const apiKey = (_b = (_a = options.apiKey) != null ? _a : process.env.LANGWATCH_API_KEY) != null ? _b : "";
-    const endpoint = (_d = (_c = options.endpoint) != null ? _c : process.env.LANGWATCH_ENDPOINT) != null ? _d : _chunkYWO3NE5Ajs.DEFAULT_ENDPOINT;
+    const endpoint = (_d = (_c = options.endpoint) != null ? _c : process.env.LANGWATCH_ENDPOINT) != null ? _d : _chunkC4XUWCQRjs.DEFAULT_ENDPOINT;
     this.config = _chunkOHM7JUMRjs.__privateMethod.call(void 0, this, _LangWatch_instances, createInternalConfig_fn).call(this, {
       apiKey,
       endpoint,
       options: options.options
     });
-    this.prompts = new (0, _chunkNM5OKM7Fjs.PromptsFacade)(_chunkOHM7JUMRjs.__spreadValues.call(void 0, {
-      promptsApiService: new (0, _chunkNM5OKM7Fjs.PromptsApiService)(this.config),
-      localPromptsService: new (0, _chunkNM5OKM7Fjs.LocalPromptsService)()
+    this.prompts = new (0, _chunk6SSCBYJMjs.PromptsFacade)(_chunkOHM7JUMRjs.__spreadValues.call(void 0, {
+      promptsApiService: new (0, _chunk6SSCBYJMjs.PromptsApiService)(this.config),
+      localPromptsService: new (0, _chunk6SSCBYJMjs.LocalPromptsService)()
     }, this.config));
     this.traces = new TracesFacade(this.config);
+    this.evaluation = new EvaluationFacade({
+      langwatchApiClient: this.config.langwatchApiClient,
+      endpoint: this.config.endpoint,
+      apiKey: this.config.apiKey,
+      logger: this.config.logger
+    });
+    this.datasets = new DatasetsFacade({
+      langwatchApiClient: this.config.langwatchApiClient,
+      logger: this.config.logger
+    });
   }
   get apiClient() {
     return this.config.langwatchApiClient;
@@ -140,15 +1363,17 @@ createInternalConfig_fn = function({
 }) {
   var _a;
   return {
-    logger: (_a = options == null ? void 0 : options.logger) != null ? _a : new (0, _chunkCKIZDPIJjs.NoOpLogger)(),
-    langwatchApiClient: _chunkNM5OKM7Fjs.createLangWatchApiClient.call(void 0, apiKey, endpoint)
+    logger: (_a = options == null ? void 0 : options.logger) != null ? _a : new (0, _chunk5MQQRSVMjs.NoOpLogger)(),
+    langwatchApiClient: _chunk6SSCBYJMjs.createLangWatchApiClient.call(void 0, apiKey, endpoint),
+    endpoint,
+    apiKey
   };
 };
 // src/index.ts
 var logger = {
-  ConsoleLogger: _chunkCKIZDPIJjs.ConsoleLogger,
-  NoOpLogger: _chunkCKIZDPIJjs.NoOpLogger
+  ConsoleLogger: _chunk5MQQRSVMjs.ConsoleLogger,
+  NoOpLogger: _chunk5MQQRSVMjs.NoOpLogger
 };
@@ -159,5 +1384,12 @@ var logger = {
-exports.FetchPolicy = _chunkNM5OKM7Fjs.FetchPolicy; exports.FilterableBatchSpanProcessor = _chunkSNDTNU3Tjs.FilterableBatchSpanProcessor; exports.LangWatch = LangWatch; exports.LangWatchExporter = _chunkBTCJWUS5js.LangWatchTraceExporter; exports.attributes = _chunkCKIZDPIJjs.attributes_exports; exports.getLangWatchLogger = _chunkBTCJWUS5js.getLangWatchLogger; exports.getLangWatchTracer = _chunkA43BYF5Qjs.getLangWatchTracer; exports.logger = logger;
+exports.Evaluation = Evaluation; exports.EvaluationApiError = EvaluationApiError; exports.EvaluationError = EvaluationError; exports.EvaluationFacade = EvaluationFacade; exports.EvaluationInitError = EvaluationInitError; exports.EvaluatorError = EvaluatorError; exports.FetchPolicy = _chunk6SSCBYJMjs.FetchPolicy; exports.FilterableBatchSpanProcessor = _chunkASTAIRXGjs.FilterableBatchSpanProcessor; exports.LangWatch = LangWatch; exports.LangWatchExporter = _chunkBQRUUTN3js.LangWatchTraceExporter; exports.TargetMetadataConflictError = TargetMetadataConflictError; exports.attributes = _chunk5MQQRSVMjs.attributes_exports; exports.getLangWatchLogger = _chunkBQRUUTN3js.getLangWatchLogger; exports.getLangWatchTracer = _chunkONXIZKC6js.getLangWatchTracer; exports.logger = logger;
 //# sourceMappingURL=index.js.map