langwatch 0.12.0 → 0.13.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (56) hide show
  1. package/dist/{add-LUETMKBD.js → add-Z5UVUPCK.js} +7 -7
  2. package/dist/{add-LUETMKBD.js.map → add-Z5UVUPCK.js.map} +1 -1
  3. package/dist/{add-2UHFYNUA.mjs → add-ZAPD2GBO.mjs} +4 -4
  4. package/dist/{chunk-BQRUUTN3.js → chunk-4BNGSDYW.js} +14 -14
  5. package/dist/{chunk-BQRUUTN3.js.map → chunk-4BNGSDYW.js.map} +1 -1
  6. package/dist/{chunk-TB5KB737.mjs → chunk-77XIPD42.mjs} +2 -2
  7. package/dist/chunk-77XIPD42.mjs.map +1 -0
  8. package/dist/{chunk-6SSCBYJM.js → chunk-DXBTJGCK.js} +10 -10
  9. package/dist/{chunk-6SSCBYJM.js.map → chunk-DXBTJGCK.js.map} +1 -1
  10. package/dist/{chunk-OTID7S7K.mjs → chunk-J4HK6XZR.mjs} +5 -5
  11. package/dist/{chunk-C4XUWCQR.js → chunk-NPFWFQK6.js} +2 -2
  12. package/dist/chunk-NPFWFQK6.js.map +1 -0
  13. package/dist/chunk-OAKQ7UBU.mjs +317 -0
  14. package/dist/chunk-OAKQ7UBU.mjs.map +1 -0
  15. package/dist/chunk-RM2VUAFL.js +317 -0
  16. package/dist/chunk-RM2VUAFL.js.map +1 -0
  17. package/dist/{chunk-WCNDT5SD.mjs → chunk-SZRV7E6P.mjs} +2 -2
  18. package/dist/cli/index.js +6 -6
  19. package/dist/cli/index.mjs +6 -6
  20. package/dist/index.d.mts +239 -54
  21. package/dist/index.d.ts +239 -54
  22. package/dist/index.js +287 -89
  23. package/dist/index.js.map +1 -1
  24. package/dist/index.mjs +286 -88
  25. package/dist/index.mjs.map +1 -1
  26. package/dist/{list-7U3M64GY.js → list-LASBYRI4.js} +7 -7
  27. package/dist/{list-7U3M64GY.js.map → list-LASBYRI4.js.map} +1 -1
  28. package/dist/{list-WV5LA6LD.mjs → list-XX4VPNJA.mjs} +4 -4
  29. package/dist/{login-B7DKMN7P.js → login-2VCZDSLE.js} +3 -3
  30. package/dist/{login-B7DKMN7P.js.map → login-2VCZDSLE.js.map} +1 -1
  31. package/dist/{login-QKRT6PXA.mjs → login-CZ2257SV.mjs} +2 -2
  32. package/dist/observability-sdk/index.js +4 -4
  33. package/dist/observability-sdk/index.js.map +1 -1
  34. package/dist/observability-sdk/index.mjs +7 -7
  35. package/dist/observability-sdk/setup/node/index.d.mts +24 -1
  36. package/dist/observability-sdk/setup/node/index.d.ts +24 -1
  37. package/dist/observability-sdk/setup/node/index.js +7 -292
  38. package/dist/observability-sdk/setup/node/index.js.map +1 -1
  39. package/dist/observability-sdk/setup/node/index.mjs +8 -293
  40. package/dist/observability-sdk/setup/node/index.mjs.map +1 -1
  41. package/dist/{remove-2OGMXSTR.mjs → remove-KESD7YHL.mjs} +4 -4
  42. package/dist/{remove-A4DKCN7A.js → remove-XWN3XTF5.js} +6 -6
  43. package/dist/{remove-A4DKCN7A.js.map → remove-XWN3XTF5.js.map} +1 -1
  44. package/dist/{sync-WRZXIBZS.js → sync-IJ26JHEP.js} +6 -6
  45. package/dist/{sync-WRZXIBZS.js.map → sync-IJ26JHEP.js.map} +1 -1
  46. package/dist/{sync-TNVCKWTC.mjs → sync-SCVP7CHX.mjs} +4 -4
  47. package/package.json +3 -2
  48. package/dist/chunk-C4XUWCQR.js.map +0 -1
  49. package/dist/chunk-TB5KB737.mjs.map +0 -1
  50. /package/dist/{add-2UHFYNUA.mjs.map → add-ZAPD2GBO.mjs.map} +0 -0
  51. /package/dist/{chunk-OTID7S7K.mjs.map → chunk-J4HK6XZR.mjs.map} +0 -0
  52. /package/dist/{chunk-WCNDT5SD.mjs.map → chunk-SZRV7E6P.mjs.map} +0 -0
  53. /package/dist/{list-WV5LA6LD.mjs.map → list-XX4VPNJA.mjs.map} +0 -0
  54. /package/dist/{login-QKRT6PXA.mjs.map → login-CZ2257SV.mjs.map} +0 -0
  55. /package/dist/{remove-2OGMXSTR.mjs.map → remove-KESD7YHL.mjs.map} +0 -0
  56. /package/dist/{sync-TNVCKWTC.mjs.map → sync-SCVP7CHX.mjs.map} +0 -0
package/dist/index.js CHANGED
@@ -1,27 +1,30 @@
1
1
  "use strict";Object.defineProperty(exports, "__esModule", {value: true});
2
2
 
3
+ var _chunkRM2VUAFLjs = require('./chunk-RM2VUAFL.js');
3
4
 
4
5
 
5
6
 
6
7
 
7
8
 
8
- var _chunk6SSCBYJMjs = require('./chunk-6SSCBYJM.js');
9
+
10
+
11
+ var _chunkDXBTJGCKjs = require('./chunk-DXBTJGCK.js');
9
12
 
10
13
 
11
14
  var _chunkASTAIRXGjs = require('./chunk-ASTAIRXG.js');
12
15
 
13
16
 
14
17
 
15
- var _chunkONXIZKC6js = require('./chunk-ONXIZKC6.js');
18
+ var _chunk4BNGSDYWjs = require('./chunk-4BNGSDYW.js');
16
19
 
17
20
 
18
21
 
19
- var _chunkBQRUUTN3js = require('./chunk-BQRUUTN3.js');
20
22
 
23
+ var _chunkNPFWFQK6js = require('./chunk-NPFWFQK6.js');
21
24
 
22
25
 
23
26
 
24
- var _chunkC4XUWCQRjs = require('./chunk-C4XUWCQR.js');
27
+ var _chunkONXIZKC6js = require('./chunk-ONXIZKC6.js');
25
28
 
26
29
 
27
30
 
@@ -147,11 +150,11 @@ var DatasetsFacade = class {
147
150
  };
148
151
  _datasetService = new WeakMap();
149
152
 
150
- // src/client-sdk/services/evaluation/evaluation.ts
153
+ // src/client-sdk/services/experiments/experiment.ts
151
154
  var _async_hooks = require('async_hooks');
152
155
  var _api = require('@opentelemetry/api');
153
156
 
154
- // src/client-sdk/services/evaluation/humanReadableId.ts
157
+ // src/client-sdk/services/experiments/humanReadableId.ts
155
158
  var ADJECTIVES = [
156
159
  "swift",
157
160
  "bright",
@@ -265,29 +268,29 @@ var generateHumanReadableId = (separator = "-") => {
265
268
  return `${adjective1}${separator}${adjective2}${separator}${noun}`;
266
269
  };
267
270
 
268
- // src/client-sdk/services/evaluation/errors/evaluation.error.ts
269
- var EvaluationError = class extends Error {
271
+ // src/client-sdk/services/experiments/errors/experiment.error.ts
272
+ var ExperimentError = class extends Error {
270
273
  constructor(message) {
271
274
  super(message);
272
- this.name = "EvaluationError";
275
+ this.name = "ExperimentError";
273
276
  }
274
277
  };
275
- var EvaluationInitError = class extends EvaluationError {
278
+ var ExperimentInitError = class extends ExperimentError {
276
279
  constructor(message, cause) {
277
280
  super(message);
278
281
  this.cause = cause;
279
- this.name = "EvaluationInitError";
282
+ this.name = "ExperimentInitError";
280
283
  }
281
284
  };
282
- var EvaluationApiError = class extends EvaluationError {
285
+ var ExperimentApiError = class extends ExperimentError {
283
286
  constructor(message, statusCode, cause) {
284
287
  super(message);
285
288
  this.statusCode = statusCode;
286
289
  this.cause = cause;
287
- this.name = "EvaluationApiError";
290
+ this.name = "ExperimentApiError";
288
291
  }
289
292
  };
290
- var TargetMetadataConflictError = class extends EvaluationError {
293
+ var TargetMetadataConflictError = class extends ExperimentError {
291
294
  constructor(targetName, existingMetadata, newMetadata) {
292
295
  super(
293
296
  `Target '${targetName}' was previously registered with different metadata.
@@ -301,7 +304,7 @@ If you want to use different metadata, please use a different target name.`
301
304
  this.name = "TargetMetadataConflictError";
302
305
  }
303
306
  };
304
- var EvaluatorError = class extends EvaluationError {
307
+ var EvaluatorError = class extends ExperimentError {
305
308
  constructor(evaluatorSlug, message, cause) {
306
309
  super(`Evaluator '${evaluatorSlug}' failed: ${message}`);
307
310
  this.evaluatorSlug = evaluatorSlug;
@@ -310,12 +313,12 @@ var EvaluatorError = class extends EvaluationError {
310
313
  }
311
314
  };
312
315
 
313
- // src/client-sdk/services/evaluation/evaluation.ts
316
+ // src/client-sdk/services/experiments/experiment.ts
314
317
  var DEFAULT_CONCURRENCY = 4;
315
318
  var DEBOUNCE_INTERVAL_MS = 1e3;
316
319
  var iterationContextStorage = new (0, _async_hooks.AsyncLocalStorage)();
317
320
  var targetContextStorage = new (0, _async_hooks.AsyncLocalStorage)();
318
- var Evaluation = class _Evaluation {
321
+ var Experiment = class _Experiment {
319
322
  constructor(name, options) {
320
323
  this.initialized = false;
321
324
  this.total = 0;
@@ -352,16 +355,17 @@ var Evaluation = class _Evaluation {
352
355
  * Initialize an evaluation session
353
356
  */
354
357
  static async init(name, options) {
355
- const evaluation = new _Evaluation(name, options);
356
- await evaluation.initialize();
357
- return evaluation;
358
+ _chunkRM2VUAFLjs.ensureSetup.call(void 0, );
359
+ const experiment = new _Experiment(name, options);
360
+ await experiment.initialize();
361
+ return experiment;
358
362
  }
359
363
  /**
360
364
  * Initialize the evaluation by creating/getting the experiment
361
365
  */
362
366
  async initialize() {
363
367
  if (!this.apiKey) {
364
- throw new EvaluationInitError(
368
+ throw new ExperimentInitError(
365
369
  "API key is required. Set LANGWATCH_API_KEY or pass apiKey to LangWatch constructor."
366
370
  );
367
371
  }
@@ -379,11 +383,11 @@ var Evaluation = class _Evaluation {
379
383
  })
380
384
  });
381
385
  if (response.status === 401) {
382
- throw new EvaluationInitError("Invalid API key");
386
+ throw new ExperimentInitError("Invalid API key");
383
387
  }
384
388
  if (!response.ok) {
385
389
  const text = await response.text();
386
- throw new EvaluationInitError(`Failed to initialize experiment: ${text}`);
390
+ throw new ExperimentInitError(`Failed to initialize experiment: ${text}`);
387
391
  }
388
392
  const data = await response.json();
389
393
  this.experimentSlug = data.slug;
@@ -391,10 +395,10 @@ var Evaluation = class _Evaluation {
391
395
  console.log(`Follow results at: ${this.endpoint}${data.path}?runId=${encodedRunId}`);
392
396
  this.initialized = true;
393
397
  } catch (error) {
394
- if (error instanceof EvaluationInitError) {
398
+ if (error instanceof ExperimentInitError) {
395
399
  throw error;
396
400
  }
397
- throw new EvaluationInitError(
401
+ throw new ExperimentInitError(
398
402
  `Failed to initialize evaluation: ${error instanceof Error ? error.message : String(error)}`,
399
403
  error instanceof Error ? error : void 0
400
404
  );
@@ -841,17 +845,16 @@ var Evaluation = class _Evaluation {
841
845
  name: this.name,
842
846
  run_id: this.runId,
843
847
  dataset: this.batch.dataset.map((entry) => {
844
- var _a, _b, _c;
845
- return {
848
+ var _a, _b;
849
+ return _chunkOHM7JUMRjs.__spreadValues.call(void 0, {
846
850
  index: entry.index,
847
851
  entry: entry.entry,
848
852
  duration: entry.duration,
849
853
  error: entry.error,
850
854
  trace_id: entry.trace_id,
851
855
  target_id: (_a = entry.target_id) != null ? _a : null,
852
- cost: (_b = entry.cost) != null ? _b : null,
853
- predicted: (_c = entry.predicted) != null ? _c : null
854
- };
856
+ cost: (_b = entry.cost) != null ? _b : null
857
+ }, entry.predicted !== void 0 && entry.predicted !== null ? { predicted: entry.predicted } : {});
855
858
  }),
856
859
  evaluations: this.batch.evaluations.map((e) => ({
857
860
  name: e.name,
@@ -943,70 +946,70 @@ var Evaluation = class _Evaluation {
943
946
  }
944
947
  };
945
948
 
946
- // src/client-sdk/services/evaluation/platformErrors.ts
947
- var EvaluationsError = class extends Error {
949
+ // src/client-sdk/services/experiments/platformErrors.ts
950
+ var ExperimentsError = class extends Error {
948
951
  constructor(message) {
949
952
  super(message);
950
- this.name = "EvaluationsError";
953
+ this.name = "ExperimentsError";
951
954
  }
952
955
  };
953
- var EvaluationNotFoundError = class extends EvaluationsError {
956
+ var ExperimentNotFoundError = class extends ExperimentsError {
954
957
  constructor(slug) {
955
- super(`Evaluation not found: ${slug}`);
956
- this.name = "EvaluationNotFoundError";
958
+ super(`Experiment not found: ${slug}`);
959
+ this.name = "ExperimentNotFoundError";
957
960
  }
958
961
  };
959
- var EvaluationTimeoutError = class extends EvaluationsError {
962
+ var ExperimentTimeoutError = class extends ExperimentsError {
960
963
  constructor(runId, progress, total) {
961
- super(`Evaluation run timed out: ${runId} (${progress}/${total} completed)`);
962
- this.name = "EvaluationTimeoutError";
964
+ super(`Experiment run timed out: ${runId} (${progress}/${total} completed)`);
965
+ this.name = "ExperimentTimeoutError";
963
966
  this.runId = runId;
964
967
  this.progress = progress;
965
968
  this.total = total;
966
969
  }
967
970
  };
968
- var EvaluationRunFailedError = class extends EvaluationsError {
971
+ var ExperimentRunFailedError = class extends ExperimentsError {
969
972
  constructor(runId, errorMessage) {
970
- super(`Evaluation run failed: ${errorMessage}`);
971
- this.name = "EvaluationRunFailedError";
973
+ super(`Experiment run failed: ${errorMessage}`);
974
+ this.name = "ExperimentRunFailedError";
972
975
  this.runId = runId;
973
976
  this.errorMessage = errorMessage;
974
977
  }
975
978
  };
976
- var EvaluationsApiError = class extends EvaluationsError {
979
+ var ExperimentsApiError = class extends ExperimentsError {
977
980
  constructor(message, statusCode) {
978
981
  super(message);
979
- this.name = "EvaluationsApiError";
982
+ this.name = "ExperimentsApiError";
980
983
  this.statusCode = statusCode;
981
984
  }
982
985
  };
983
986
 
984
- // src/client-sdk/services/evaluation/evaluation.facade.ts
987
+ // src/client-sdk/services/experiments/experiments.facade.ts
985
988
  var DEFAULT_POLL_INTERVAL = 2e3;
986
989
  var DEFAULT_TIMEOUT = 6e5;
987
- var EvaluationFacade = class {
990
+ var ExperimentsFacade = class {
988
991
  constructor(config) {
989
992
  this.config = config;
990
993
  }
991
994
  /**
992
- * Initialize a new evaluation session (SDK-defined)
995
+ * Initialize a new experiment session (SDK-defined)
993
996
  *
994
997
  * @param name - Name of the experiment (used as slug)
995
998
  * @param options - Optional configuration
996
- * @returns An initialized Evaluation instance
999
+ * @returns An initialized Experiment instance
997
1000
  *
998
1001
  * @example
999
1002
  * ```typescript
1000
- * const evaluation = await langwatch.evaluation.init('my-experiment');
1003
+ * const experiment = await langwatch.experiments.init('my-experiment');
1001
1004
  *
1002
- * await evaluation.run(dataset, async ({ item, index }) => {
1005
+ * await experiment.run(dataset, async ({ item, index }) => {
1003
1006
  * const response = await myAgent(item.question);
1004
- * evaluation.log('accuracy', { index, score: 0.95 });
1007
+ * experiment.log('accuracy', { index, score: 0.95 });
1005
1008
  * });
1006
1009
  * ```
1007
1010
  */
1008
1011
  async init(name, options) {
1009
- return Evaluation.init(name, _chunkOHM7JUMRjs.__spreadValues.call(void 0, {
1012
+ return Experiment.init(name, _chunkOHM7JUMRjs.__spreadValues.call(void 0, {
1010
1013
  apiClient: this.config.langwatchApiClient,
1011
1014
  endpoint: this.config.endpoint,
1012
1015
  apiKey: this.config.apiKey,
@@ -1014,15 +1017,15 @@ var EvaluationFacade = class {
1014
1017
  }, options));
1015
1018
  }
1016
1019
  /**
1017
- * Run a platform-configured evaluation (Evaluations V3)
1020
+ * Run a platform-configured experiment (Experiments Workbench)
1018
1021
  *
1019
- * This runs an evaluation that was configured in the LangWatch platform.
1022
+ * This runs an experiment that was configured in the LangWatch platform.
1020
1023
  * The method automatically prints a summary and exits with code 1 on failure
1021
1024
  * (unless `exitOnFailure: false` is passed).
1022
1025
  *
1023
- * @param slug - The slug of the evaluation (found in the evaluation URL)
1026
+ * @param slug - The slug of the experiment (found in the experiment URL)
1024
1027
  * @param options - Optional configuration
1025
- * @returns The evaluation results including pass rate and summary
1028
+ * @returns The experiment results including pass rate and summary
1026
1029
  *
1027
1030
  * @example
1028
1031
  * ```typescript
@@ -1030,17 +1033,17 @@ var EvaluationFacade = class {
1030
1033
  *
1031
1034
  * const langwatch = new LangWatch();
1032
1035
  *
1033
- * const result = await langwatch.evaluation.run("my-evaluation-slug");
1036
+ * const result = await langwatch.experiments.run("my-experiment-slug");
1034
1037
  * result.printSummary();
1035
1038
  * ```
1036
1039
  */
1037
1040
  async run(slug, options) {
1038
- this.config.logger.info(`Running platform evaluation: ${slug}`);
1041
+ this.config.logger.info(`Running platform experiment: ${slug}`);
1039
1042
  const result = await this.runWithPolling(slug, options);
1040
1043
  return result;
1041
1044
  }
1042
1045
  /**
1043
- * Run an evaluation and wait for completion using polling
1046
+ * Run an experiment and wait for completion using polling
1044
1047
  */
1045
1048
  async runWithPolling(slug, options = {}) {
1046
1049
  var _a, _b, _c, _d, _e, _f, _g;
@@ -1050,7 +1053,7 @@ var EvaluationFacade = class {
1050
1053
  const { runId } = startResponse;
1051
1054
  const apiRunUrl = (_c = startResponse.runUrl) != null ? _c : "";
1052
1055
  const runUrl = apiRunUrl ? this.replaceUrlDomain(apiRunUrl, this.config.endpoint) : "";
1053
- console.log(`Started evaluation run: ${runId}`);
1056
+ console.log(`Started experiment run: ${runId}`);
1054
1057
  if (runUrl) {
1055
1058
  console.log(`Follow live: ${runUrl}`);
1056
1059
  }
@@ -1065,7 +1068,7 @@ var EvaluationFacade = class {
1065
1068
  if (Date.now() - startTime > timeout) {
1066
1069
  console.log();
1067
1070
  const finalStatus = await this.getRunStatus(runId);
1068
- throw new EvaluationTimeoutError(runId, finalStatus.progress, finalStatus.total);
1071
+ throw new ExperimentTimeoutError(runId, finalStatus.progress, finalStatus.total);
1069
1072
  }
1070
1073
  await this.sleep(pollInterval);
1071
1074
  const status = await this.getRunStatus(runId);
@@ -1083,7 +1086,7 @@ var EvaluationFacade = class {
1083
1086
  }
1084
1087
  if (status.status === "failed") {
1085
1088
  console.log();
1086
- throw new EvaluationRunFailedError(runId, (_f = status.error) != null ? _f : "Unknown error");
1089
+ throw new ExperimentRunFailedError(runId, (_f = status.error) != null ? _f : "Unknown error");
1087
1090
  }
1088
1091
  if (status.status === "stopped") {
1089
1092
  console.log();
@@ -1098,7 +1101,7 @@ var EvaluationFacade = class {
1098
1101
  }
1099
1102
  }
1100
1103
  /**
1101
- * Start an evaluation run
1104
+ * Start an experiment run
1102
1105
  */
1103
1106
  async startRun(slug) {
1104
1107
  const response = await this.config.langwatchApiClient.POST(
@@ -1112,13 +1115,13 @@ var EvaluationFacade = class {
1112
1115
  if (response.error) {
1113
1116
  const status = response.response.status;
1114
1117
  if (status === 404) {
1115
- throw new EvaluationNotFoundError(slug);
1118
+ throw new ExperimentNotFoundError(slug);
1116
1119
  }
1117
1120
  if (status === 401) {
1118
- throw new EvaluationsApiError("Unauthorized - check your API key", 401);
1121
+ throw new ExperimentsApiError("Unauthorized - check your API key", 401);
1119
1122
  }
1120
- const errorMessage = "error" in response.error ? response.error.error : `Failed to start evaluation: ${slug}`;
1121
- throw new EvaluationsApiError(errorMessage != null ? errorMessage : `HTTP ${status}`, status);
1123
+ const errorMessage = "error" in response.error ? response.error.error : `Failed to start experiment: ${slug}`;
1124
+ throw new ExperimentsApiError(errorMessage != null ? errorMessage : `HTTP ${status}`, status);
1122
1125
  }
1123
1126
  return response.data;
1124
1127
  }
@@ -1137,13 +1140,13 @@ var EvaluationFacade = class {
1137
1140
  if (response.error) {
1138
1141
  const status = response.response.status;
1139
1142
  if (status === 404) {
1140
- throw new EvaluationsApiError(`Run not found: ${runId}`, 404);
1143
+ throw new ExperimentsApiError(`Run not found: ${runId}`, 404);
1141
1144
  }
1142
1145
  if (status === 401) {
1143
- throw new EvaluationsApiError("Unauthorized - check your API key", 401);
1146
+ throw new ExperimentsApiError("Unauthorized - check your API key", 401);
1144
1147
  }
1145
1148
  const errorMessage = "error" in response.error ? response.error.error : `Failed to get run status: ${runId}`;
1146
- throw new EvaluationsApiError(errorMessage != null ? errorMessage : `HTTP ${status}`, status);
1149
+ throw new ExperimentsApiError(errorMessage != null ? errorMessage : `HTTP ${status}`, status);
1147
1150
  }
1148
1151
  return response.data;
1149
1152
  }
@@ -1151,14 +1154,13 @@ var EvaluationFacade = class {
1151
1154
  * Build the result object from API response
1152
1155
  */
1153
1156
  buildResult(runId, status, summary, runUrl) {
1154
- var _a, _b, _c, _d, _e, _f, _g;
1155
- const totalCells = (_a = summary.totalCells) != null ? _a : 0;
1156
- const completedCells = (_b = summary.completedCells) != null ? _b : 0;
1157
- const failedCells = (_c = summary.failedCells) != null ? _c : 0;
1158
- const duration = (_d = summary.duration) != null ? _d : 0;
1159
- const totalPassed = (_e = summary.totalPassed) != null ? _e : completedCells - failedCells;
1160
- const totalFailed = (_f = summary.totalFailed) != null ? _f : failedCells;
1161
- const passRate = (_g = summary.passRate) != null ? _g : completedCells > 0 ? totalPassed / completedCells * 100 : 0;
1157
+ var _a, _b, _c, _d, _e, _f;
1158
+ const completedCells = (_a = summary.completedCells) != null ? _a : 0;
1159
+ const failedCells = (_b = summary.failedCells) != null ? _b : 0;
1160
+ const duration = (_c = summary.duration) != null ? _c : 0;
1161
+ const totalPassed = (_d = summary.totalPassed) != null ? _d : completedCells - failedCells;
1162
+ const totalFailed = (_e = summary.totalFailed) != null ? _e : failedCells;
1163
+ const passRate = (_f = summary.passRate) != null ? _f : completedCells > 0 ? totalPassed / completedCells * 100 : 0;
1162
1164
  return {
1163
1165
  runId,
1164
1166
  status,
@@ -1188,12 +1190,12 @@ var EvaluationFacade = class {
1188
1190
  };
1189
1191
  }
1190
1192
  /**
1191
- * Print a CI-friendly summary of the evaluation results
1193
+ * Print a CI-friendly summary of the experiment results
1192
1194
  */
1193
1195
  printSummary(result) {
1194
1196
  const { runId, status, passed, failed, passRate, duration, runUrl, summary } = result;
1195
1197
  console.log("\n" + "\u2550".repeat(60));
1196
- console.log(" EVALUATION RESULTS");
1198
+ console.log(" EXPERIMENT RESULTS");
1197
1199
  console.log("\u2550".repeat(60));
1198
1200
  console.log(` Run ID: ${runId}`);
1199
1201
  console.log(` Status: ${status.toUpperCase()}`);
@@ -1249,6 +1251,192 @@ var EvaluationFacade = class {
1249
1251
  }
1250
1252
  };
1251
1253
 
1254
+ // src/client-sdk/services/evaluations/evaluations.facade.ts
1255
+
1256
+
1257
+ // src/client-sdk/services/evaluations/errors.ts
1258
+ var EvaluationError = class extends Error {
1259
+ constructor(message) {
1260
+ super(message);
1261
+ this.name = "EvaluationError";
1262
+ }
1263
+ };
1264
+ var EvaluatorCallError = class extends EvaluationError {
1265
+ constructor(evaluatorSlug, message, statusCode) {
1266
+ super(`Evaluator '${evaluatorSlug}' failed: ${message}`);
1267
+ this.name = "EvaluatorCallError";
1268
+ this.evaluatorSlug = evaluatorSlug;
1269
+ this.statusCode = statusCode;
1270
+ }
1271
+ };
1272
+ var EvaluatorNotFoundError = class extends EvaluationError {
1273
+ constructor(evaluatorSlug) {
1274
+ super(`Evaluator not found: ${evaluatorSlug}`);
1275
+ this.name = "EvaluatorNotFoundError";
1276
+ this.evaluatorSlug = evaluatorSlug;
1277
+ }
1278
+ };
1279
+ var EvaluationsApiError = class extends EvaluationError {
1280
+ constructor(message, statusCode) {
1281
+ super(message);
1282
+ this.name = "EvaluationsApiError";
1283
+ this.statusCode = statusCode;
1284
+ }
1285
+ };
1286
+
1287
+ // src/client-sdk/services/evaluations/evaluations.facade.ts
1288
+ var _endpoint, _apiKey, _logger;
1289
+ var EvaluationsFacade = class {
1290
+ constructor(config) {
1291
+ _chunkOHM7JUMRjs.__privateAdd.call(void 0, this, _endpoint);
1292
+ _chunkOHM7JUMRjs.__privateAdd.call(void 0, this, _apiKey);
1293
+ _chunkOHM7JUMRjs.__privateAdd.call(void 0, this, _logger);
1294
+ /**
1295
+ * Run an evaluator or guardrail against provided data
1296
+ *
1297
+ * Creates an OpenTelemetry span attached to the current trace context,
1298
+ * calls the LangWatch evaluation API, and returns the result.
1299
+ *
1300
+ * @param slug - The evaluator slug (e.g., "presidio/pii_detection", "langevals/llm_boolean")
1301
+ * @param options - Evaluation options including data, name, settings, and asGuardrail flag
1302
+ * @returns The evaluation result with status, passed, score, details, label, and cost
1303
+ *
1304
+ * @example
1305
+ * ```typescript
1306
+ * // Run as a guardrail (synchronous evaluation that can block responses)
1307
+ * const guardrail = await langwatch.evaluations.evaluate("presidio/pii_detection", {
1308
+ * data: { input: userInput, output: generatedResponse },
1309
+ * name: "PII Detection Guardrail",
1310
+ * asGuardrail: true,
1311
+ * });
1312
+ *
1313
+ * if (!guardrail.passed) {
1314
+ * console.log("PII detected:", guardrail.details);
1315
+ * return "Sorry, I cannot process that request.";
1316
+ * }
1317
+ * ```
1318
+ *
1319
+ * @example
1320
+ * ```typescript
1321
+ * // Run as an online evaluation (async scoring for monitoring)
1322
+ * const result = await langwatch.evaluations.evaluate("langevals/llm_boolean", {
1323
+ * data: { input: question, output: response },
1324
+ * name: "Quality Check",
1325
+ * settings: { prompt: "Check if the response answers the question." },
1326
+ * });
1327
+ *
1328
+ * console.log("Score:", result.score);
1329
+ * console.log("Details:", result.details);
1330
+ * ```
1331
+ */
1332
+ this.evaluate = async (slug, options) => {
1333
+ var _a;
1334
+ const { data, name, settings, asGuardrail } = options;
1335
+ const spanName = name != null ? name : slug;
1336
+ const spanType = asGuardrail ? "guardrail" : "evaluation";
1337
+ const tracer2 = _api.trace.getTracer("langwatch-evaluations");
1338
+ const activeSpan = _api.trace.getActiveSpan();
1339
+ const traceId = activeSpan ? activeSpan.spanContext().traceId : void 0;
1340
+ const parentSpanId = activeSpan ? activeSpan.spanContext().spanId : void 0;
1341
+ const otelSpan = tracer2.startSpan(
1342
+ spanName,
1343
+ {
1344
+ attributes: {
1345
+ "langwatch.span.type": spanType
1346
+ }
1347
+ },
1348
+ _api.context.active()
1349
+ );
1350
+ const langwatchSpan = _chunkONXIZKC6js.createLangWatchSpan.call(void 0, otelSpan);
1351
+ langwatchSpan.setType(spanType);
1352
+ langwatchSpan.setInput(_chunkOHM7JUMRjs.__spreadValues.call(void 0, {
1353
+ data
1354
+ }, settings && { settings }));
1355
+ try {
1356
+ const requestBody = {
1357
+ trace_id: traceId != null ? traceId : null,
1358
+ span_id: parentSpanId != null ? parentSpanId : null,
1359
+ name: name != null ? name : null,
1360
+ data,
1361
+ settings,
1362
+ as_guardrail: asGuardrail
1363
+ };
1364
+ const url = `${_chunkOHM7JUMRjs.__privateGet.call(void 0, this, _endpoint)}/api/evaluations/${slug}/evaluate`;
1365
+ _chunkOHM7JUMRjs.__privateGet.call(void 0, this, _logger).debug(`Calling evaluation API: ${url}`);
1366
+ const response = await fetch(url, {
1367
+ method: "POST",
1368
+ headers: {
1369
+ "Content-Type": "application/json",
1370
+ "X-Auth-Token": _chunkOHM7JUMRjs.__privateGet.call(void 0, this, _apiKey)
1371
+ },
1372
+ body: JSON.stringify(requestBody)
1373
+ });
1374
+ if (!response.ok) {
1375
+ const errorText = await response.text();
1376
+ if (response.status === 404) {
1377
+ throw new EvaluatorNotFoundError(slug);
1378
+ }
1379
+ throw new EvaluationsApiError(
1380
+ `Evaluation API returned ${response.status}: ${errorText}`,
1381
+ response.status
1382
+ );
1383
+ }
1384
+ const responseData = await response.json();
1385
+ const result = _chunkOHM7JUMRjs.__spreadValues.call(void 0, _chunkOHM7JUMRjs.__spreadValues.call(void 0, _chunkOHM7JUMRjs.__spreadValues.call(void 0, _chunkOHM7JUMRjs.__spreadValues.call(void 0, _chunkOHM7JUMRjs.__spreadValues.call(void 0, {
1386
+ status: responseData.status
1387
+ }, responseData.passed !== null && responseData.passed !== void 0 && { passed: responseData.passed }), responseData.score !== null && responseData.score !== void 0 && { score: responseData.score }), responseData.details !== null && responseData.details !== void 0 && { details: responseData.details }), responseData.label !== null && responseData.label !== void 0 && { label: responseData.label }), responseData.cost !== null && responseData.cost !== void 0 && { cost: responseData.cost });
1388
+ langwatchSpan.setOutput({
1389
+ type: asGuardrail ? "guardrail_result" : "evaluation_result",
1390
+ value: result
1391
+ });
1392
+ if (result.status === "error") {
1393
+ otelSpan.setStatus({
1394
+ code: _api.SpanStatusCode.ERROR,
1395
+ message: (_a = result.details) != null ? _a : "Evaluation failed"
1396
+ });
1397
+ } else {
1398
+ otelSpan.setStatus({ code: _api.SpanStatusCode.OK });
1399
+ }
1400
+ return result;
1401
+ } catch (error) {
1402
+ const errorResult = {
1403
+ status: "error",
1404
+ details: error instanceof Error ? error.message : String(error)
1405
+ };
1406
+ if (asGuardrail) {
1407
+ errorResult.passed = true;
1408
+ }
1409
+ langwatchSpan.setOutput({
1410
+ type: asGuardrail ? "guardrail_result" : "evaluation_result",
1411
+ value: errorResult
1412
+ });
1413
+ otelSpan.setStatus({
1414
+ code: _api.SpanStatusCode.ERROR,
1415
+ message: errorResult.details
1416
+ });
1417
+ if (error instanceof Error) {
1418
+ otelSpan.recordException(error);
1419
+ }
1420
+ if (error instanceof EvaluatorNotFoundError || error instanceof EvaluationsApiError || error instanceof EvaluatorCallError) {
1421
+ throw error;
1422
+ }
1423
+ throw new EvaluatorCallError(
1424
+ slug,
1425
+ error instanceof Error ? error.message : String(error)
1426
+ );
1427
+ } finally {
1428
+ otelSpan.end();
1429
+ }
1430
+ };
1431
+ _chunkOHM7JUMRjs.__privateSet.call(void 0, this, _endpoint, config.endpoint);
1432
+ _chunkOHM7JUMRjs.__privateSet.call(void 0, this, _apiKey, config.apiKey);
1433
+ _chunkOHM7JUMRjs.__privateSet.call(void 0, this, _logger, config.logger);
1434
+ }
1435
+ };
1436
+ _endpoint = new WeakMap();
1437
+ _apiKey = new WeakMap();
1438
+ _logger = new WeakMap();
1439
+
1252
1440
  // src/client-sdk/services/traces/types.ts
1253
1441
  var TracesError = class extends Error {
1254
1442
  constructor(message, operation, originalError) {
@@ -1260,13 +1448,13 @@ var TracesError = class extends Error {
1260
1448
  };
1261
1449
 
1262
1450
  // src/client-sdk/services/traces/tracing/tracer.ts
1263
- var tracer = _chunkONXIZKC6js.getLangWatchTracer.call(void 0, `${_chunkC4XUWCQRjs.LANGWATCH_SDK_NAME_CLIENT}.traces`, _chunkC4XUWCQRjs.LANGWATCH_SDK_VERSION);
1451
+ var tracer = _chunkONXIZKC6js.getLangWatchTracer.call(void 0, `${_chunkNPFWFQK6js.LANGWATCH_SDK_NAME_CLIENT}.traces`, _chunkNPFWFQK6js.LANGWATCH_SDK_VERSION);
1264
1452
 
1265
1453
  // src/client-sdk/services/traces/service.ts
1266
1454
  var TracesService = class {
1267
1455
  constructor(config) {
1268
1456
  this.config = config;
1269
- return _chunk6SSCBYJMjs.createTracingProxy.call(void 0,
1457
+ return _chunkDXBTJGCKjs.createTracingProxy.call(void 0,
1270
1458
  this,
1271
1459
  tracer
1272
1460
  );
@@ -1329,18 +1517,18 @@ var LangWatch = class {
1329
1517
  _chunkOHM7JUMRjs.__privateAdd.call(void 0, this, _LangWatch_instances);
1330
1518
  var _a, _b, _c, _d;
1331
1519
  const apiKey = (_b = (_a = options.apiKey) != null ? _a : process.env.LANGWATCH_API_KEY) != null ? _b : "";
1332
- const endpoint = (_d = (_c = options.endpoint) != null ? _c : process.env.LANGWATCH_ENDPOINT) != null ? _d : _chunkC4XUWCQRjs.DEFAULT_ENDPOINT;
1520
+ const endpoint = (_d = (_c = options.endpoint) != null ? _c : process.env.LANGWATCH_ENDPOINT) != null ? _d : _chunkNPFWFQK6js.DEFAULT_ENDPOINT;
1333
1521
  this.config = _chunkOHM7JUMRjs.__privateMethod.call(void 0, this, _LangWatch_instances, createInternalConfig_fn).call(this, {
1334
1522
  apiKey,
1335
1523
  endpoint,
1336
1524
  options: options.options
1337
1525
  });
1338
- this.prompts = new (0, _chunk6SSCBYJMjs.PromptsFacade)(_chunkOHM7JUMRjs.__spreadValues.call(void 0, {
1339
- promptsApiService: new (0, _chunk6SSCBYJMjs.PromptsApiService)(this.config),
1340
- localPromptsService: new (0, _chunk6SSCBYJMjs.LocalPromptsService)()
1526
+ this.prompts = new (0, _chunkDXBTJGCKjs.PromptsFacade)(_chunkOHM7JUMRjs.__spreadValues.call(void 0, {
1527
+ promptsApiService: new (0, _chunkDXBTJGCKjs.PromptsApiService)(this.config),
1528
+ localPromptsService: new (0, _chunkDXBTJGCKjs.LocalPromptsService)()
1341
1529
  }, this.config));
1342
1530
  this.traces = new TracesFacade(this.config);
1343
- this.evaluation = new EvaluationFacade({
1531
+ this.experiments = new ExperimentsFacade({
1344
1532
  langwatchApiClient: this.config.langwatchApiClient,
1345
1533
  endpoint: this.config.endpoint,
1346
1534
  apiKey: this.config.apiKey,
@@ -1350,6 +1538,11 @@ var LangWatch = class {
1350
1538
  langwatchApiClient: this.config.langwatchApiClient,
1351
1539
  logger: this.config.logger
1352
1540
  });
1541
+ this.evaluations = new EvaluationsFacade({
1542
+ endpoint: this.config.endpoint,
1543
+ apiKey: this.config.apiKey,
1544
+ logger: this.config.logger
1545
+ });
1353
1546
  }
1354
1547
  get apiClient() {
1355
1548
  return this.config.langwatchApiClient;
@@ -1364,7 +1557,7 @@ createInternalConfig_fn = function({
1364
1557
  var _a;
1365
1558
  return {
1366
1559
  logger: (_a = options == null ? void 0 : options.logger) != null ? _a : new (0, _chunk5MQQRSVMjs.NoOpLogger)(),
1367
- langwatchApiClient: _chunk6SSCBYJMjs.createLangWatchApiClient.call(void 0, apiKey, endpoint),
1560
+ langwatchApiClient: _chunkDXBTJGCKjs.createLangWatchApiClient.call(void 0, apiKey, endpoint),
1368
1561
  endpoint,
1369
1562
  apiKey
1370
1563
  };
@@ -1391,5 +1584,10 @@ var logger = {
1391
1584
 
1392
1585
 
1393
1586
 
1394
- exports.Evaluation = Evaluation; exports.EvaluationApiError = EvaluationApiError; exports.EvaluationError = EvaluationError; exports.EvaluationFacade = EvaluationFacade; exports.EvaluationInitError = EvaluationInitError; exports.EvaluatorError = EvaluatorError; exports.FetchPolicy = _chunk6SSCBYJMjs.FetchPolicy; exports.FilterableBatchSpanProcessor = _chunkASTAIRXGjs.FilterableBatchSpanProcessor; exports.LangWatch = LangWatch; exports.LangWatchExporter = _chunkBQRUUTN3js.LangWatchTraceExporter; exports.TargetMetadataConflictError = TargetMetadataConflictError; exports.attributes = _chunk5MQQRSVMjs.attributes_exports; exports.getLangWatchLogger = _chunkBQRUUTN3js.getLangWatchLogger; exports.getLangWatchTracer = _chunkONXIZKC6js.getLangWatchTracer; exports.logger = logger;
1587
+
1588
+
1589
+
1590
+
1591
+
1592
+ exports.EvaluationError = EvaluationError; exports.EvaluationsApiError = EvaluationsApiError; exports.EvaluationsFacade = EvaluationsFacade; exports.EvaluatorCallError = EvaluatorCallError; exports.EvaluatorError = EvaluatorError; exports.EvaluatorNotFoundError = EvaluatorNotFoundError; exports.Experiment = Experiment; exports.ExperimentApiError = ExperimentApiError; exports.ExperimentError = ExperimentError; exports.ExperimentInitError = ExperimentInitError; exports.ExperimentsFacade = ExperimentsFacade; exports.FetchPolicy = _chunkDXBTJGCKjs.FetchPolicy; exports.FilterableBatchSpanProcessor = _chunkASTAIRXGjs.FilterableBatchSpanProcessor; exports.LangWatch = LangWatch; exports.LangWatchExporter = _chunk4BNGSDYWjs.LangWatchTraceExporter; exports.TargetMetadataConflictError = TargetMetadataConflictError; exports.attributes = _chunk5MQQRSVMjs.attributes_exports; exports.getLangWatchLogger = _chunk4BNGSDYWjs.getLangWatchLogger; exports.getLangWatchTracer = _chunkONXIZKC6js.getLangWatchTracer; exports.logger = logger;
1395
1593
  //# sourceMappingURL=index.js.map