langwatch 0.11.0 → 0.13.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. package/dist/{add-UB5U3K3M.js → add-Z5UVUPCK.js} +7 -7
  2. package/dist/{add-UB5U3K3M.js.map → add-Z5UVUPCK.js.map} +1 -1
  3. package/dist/{add-XV5SUAXF.mjs → add-ZAPD2GBO.mjs} +4 -4
  4. package/dist/{chunk-JQYW7RY7.js → chunk-4BNGSDYW.js} +14 -14
  5. package/dist/{chunk-JQYW7RY7.js.map → chunk-4BNGSDYW.js.map} +1 -1
  6. package/dist/{chunk-LKE6DMUP.mjs → chunk-77XIPD42.mjs} +2 -2
  7. package/dist/chunk-77XIPD42.mjs.map +1 -0
  8. package/dist/{chunk-D4H6PR6H.js → chunk-DXBTJGCK.js} +10 -10
  9. package/dist/{chunk-D4H6PR6H.js.map → chunk-DXBTJGCK.js.map} +1 -1
  10. package/dist/{chunk-WZ7FYUHN.mjs → chunk-J4HK6XZR.mjs} +5 -5
  11. package/dist/{chunk-N7PJJMU2.js → chunk-NPFWFQK6.js} +2 -2
  12. package/dist/chunk-NPFWFQK6.js.map +1 -0
  13. package/dist/chunk-OAKQ7UBU.mjs +317 -0
  14. package/dist/chunk-OAKQ7UBU.mjs.map +1 -0
  15. package/dist/chunk-RM2VUAFL.js +317 -0
  16. package/dist/chunk-RM2VUAFL.js.map +1 -0
  17. package/dist/{chunk-556ZFJMK.mjs → chunk-SZRV7E6P.mjs} +2 -2
  18. package/dist/cli/index.js +6 -6
  19. package/dist/cli/index.mjs +6 -6
  20. package/dist/{implementation-CPxv2BdW.d.ts → implementation-Bnc8Aymq.d.ts} +1 -1
  21. package/dist/{implementation-CVrmD0bz.d.mts → implementation-Ck58nRkT.d.mts} +1 -1
  22. package/dist/index.d.mts +347 -38
  23. package/dist/index.d.ts +347 -38
  24. package/dist/index.js +519 -47
  25. package/dist/index.js.map +1 -1
  26. package/dist/index.mjs +518 -46
  27. package/dist/index.mjs.map +1 -1
  28. package/dist/{list-DUNP46AD.js → list-LASBYRI4.js} +7 -7
  29. package/dist/{list-DUNP46AD.js.map → list-LASBYRI4.js.map} +1 -1
  30. package/dist/{list-T4QS6CT2.mjs → list-XX4VPNJA.mjs} +4 -4
  31. package/dist/{login-3H27NIOD.js → login-2VCZDSLE.js} +3 -3
  32. package/dist/{login-3H27NIOD.js.map → login-2VCZDSLE.js.map} +1 -1
  33. package/dist/{login-T2ET7TKH.mjs → login-CZ2257SV.mjs} +2 -2
  34. package/dist/observability-sdk/index.d.mts +3 -3
  35. package/dist/observability-sdk/index.d.ts +3 -3
  36. package/dist/observability-sdk/index.js +4 -4
  37. package/dist/observability-sdk/index.js.map +1 -1
  38. package/dist/observability-sdk/index.mjs +7 -7
  39. package/dist/observability-sdk/instrumentation/langchain/index.d.mts +1 -1
  40. package/dist/observability-sdk/instrumentation/langchain/index.d.ts +1 -1
  41. package/dist/observability-sdk/setup/node/index.d.mts +24 -1
  42. package/dist/observability-sdk/setup/node/index.d.ts +24 -1
  43. package/dist/observability-sdk/setup/node/index.js +7 -292
  44. package/dist/observability-sdk/setup/node/index.js.map +1 -1
  45. package/dist/observability-sdk/setup/node/index.mjs +8 -293
  46. package/dist/observability-sdk/setup/node/index.mjs.map +1 -1
  47. package/dist/{remove-F5RM4775.mjs → remove-KESD7YHL.mjs} +4 -4
  48. package/dist/{remove-V4JL5Z4U.js → remove-XWN3XTF5.js} +6 -6
  49. package/dist/{remove-V4JL5Z4U.js.map → remove-XWN3XTF5.js.map} +1 -1
  50. package/dist/{sync-DIOKWE6R.js → sync-IJ26JHEP.js} +6 -6
  51. package/dist/{sync-DIOKWE6R.js.map → sync-IJ26JHEP.js.map} +1 -1
  52. package/dist/{sync-VGWOLOLJ.mjs → sync-SCVP7CHX.mjs} +4 -4
  53. package/dist/{types-Kts5RGLY.d.mts → types-5h2Im4pl.d.mts} +162 -0
  54. package/dist/{types-usU5mTCX.d.ts → types-fo-Ij9pl.d.ts} +162 -0
  55. package/package.json +3 -2
  56. package/dist/chunk-LKE6DMUP.mjs.map +0 -1
  57. package/dist/chunk-N7PJJMU2.js.map +0 -1
  58. /package/dist/{add-XV5SUAXF.mjs.map → add-ZAPD2GBO.mjs.map} +0 -0
  59. /package/dist/{chunk-WZ7FYUHN.mjs.map → chunk-J4HK6XZR.mjs.map} +0 -0
  60. /package/dist/{chunk-556ZFJMK.mjs.map → chunk-SZRV7E6P.mjs.map} +0 -0
  61. /package/dist/{list-T4QS6CT2.mjs.map → list-XX4VPNJA.mjs.map} +0 -0
  62. /package/dist/{login-T2ET7TKH.mjs.map → login-CZ2257SV.mjs.map} +0 -0
  63. /package/dist/{remove-F5RM4775.mjs.map → remove-KESD7YHL.mjs.map} +0 -0
  64. /package/dist/{sync-VGWOLOLJ.mjs.map → sync-SCVP7CHX.mjs.map} +0 -0
package/dist/index.js CHANGED
@@ -1,27 +1,30 @@
1
1
  "use strict";Object.defineProperty(exports, "__esModule", {value: true});
2
2
 
3
+ var _chunkRM2VUAFLjs = require('./chunk-RM2VUAFL.js');
3
4
 
4
5
 
5
6
 
6
7
 
7
8
 
8
- var _chunkD4H6PR6Hjs = require('./chunk-D4H6PR6H.js');
9
+
10
+
11
+ var _chunkDXBTJGCKjs = require('./chunk-DXBTJGCK.js');
9
12
 
10
13
 
11
14
  var _chunkASTAIRXGjs = require('./chunk-ASTAIRXG.js');
12
15
 
13
16
 
14
17
 
15
- var _chunkONXIZKC6js = require('./chunk-ONXIZKC6.js');
18
+ var _chunk4BNGSDYWjs = require('./chunk-4BNGSDYW.js');
16
19
 
17
20
 
18
21
 
19
- var _chunkJQYW7RY7js = require('./chunk-JQYW7RY7.js');
20
22
 
23
+ var _chunkNPFWFQK6js = require('./chunk-NPFWFQK6.js');
21
24
 
22
25
 
23
26
 
24
- var _chunkN7PJJMU2js = require('./chunk-N7PJJMU2.js');
27
+ var _chunkONXIZKC6js = require('./chunk-ONXIZKC6.js');
25
28
 
26
29
 
27
30
 
@@ -147,11 +150,11 @@ var DatasetsFacade = class {
147
150
  };
148
151
  _datasetService = new WeakMap();
149
152
 
150
- // src/client-sdk/services/evaluation/evaluation.ts
153
+ // src/client-sdk/services/experiments/experiment.ts
151
154
  var _async_hooks = require('async_hooks');
152
155
  var _api = require('@opentelemetry/api');
153
156
 
154
- // src/client-sdk/services/evaluation/humanReadableId.ts
157
+ // src/client-sdk/services/experiments/humanReadableId.ts
155
158
  var ADJECTIVES = [
156
159
  "swift",
157
160
  "bright",
@@ -265,29 +268,29 @@ var generateHumanReadableId = (separator = "-") => {
265
268
  return `${adjective1}${separator}${adjective2}${separator}${noun}`;
266
269
  };
267
270
 
268
- // src/client-sdk/services/evaluation/errors/evaluation.error.ts
269
- var EvaluationError = class extends Error {
271
+ // src/client-sdk/services/experiments/errors/experiment.error.ts
272
+ var ExperimentError = class extends Error {
270
273
  constructor(message) {
271
274
  super(message);
272
- this.name = "EvaluationError";
275
+ this.name = "ExperimentError";
273
276
  }
274
277
  };
275
- var EvaluationInitError = class extends EvaluationError {
278
+ var ExperimentInitError = class extends ExperimentError {
276
279
  constructor(message, cause) {
277
280
  super(message);
278
281
  this.cause = cause;
279
- this.name = "EvaluationInitError";
282
+ this.name = "ExperimentInitError";
280
283
  }
281
284
  };
282
- var EvaluationApiError = class extends EvaluationError {
285
+ var ExperimentApiError = class extends ExperimentError {
283
286
  constructor(message, statusCode, cause) {
284
287
  super(message);
285
288
  this.statusCode = statusCode;
286
289
  this.cause = cause;
287
- this.name = "EvaluationApiError";
290
+ this.name = "ExperimentApiError";
288
291
  }
289
292
  };
290
- var TargetMetadataConflictError = class extends EvaluationError {
293
+ var TargetMetadataConflictError = class extends ExperimentError {
291
294
  constructor(targetName, existingMetadata, newMetadata) {
292
295
  super(
293
296
  `Target '${targetName}' was previously registered with different metadata.
@@ -301,7 +304,7 @@ If you want to use different metadata, please use a different target name.`
301
304
  this.name = "TargetMetadataConflictError";
302
305
  }
303
306
  };
304
- var EvaluatorError = class extends EvaluationError {
307
+ var EvaluatorError = class extends ExperimentError {
305
308
  constructor(evaluatorSlug, message, cause) {
306
309
  super(`Evaluator '${evaluatorSlug}' failed: ${message}`);
307
310
  this.evaluatorSlug = evaluatorSlug;
@@ -310,12 +313,12 @@ var EvaluatorError = class extends EvaluationError {
310
313
  }
311
314
  };
312
315
 
313
- // src/client-sdk/services/evaluation/evaluation.ts
316
+ // src/client-sdk/services/experiments/experiment.ts
314
317
  var DEFAULT_CONCURRENCY = 4;
315
318
  var DEBOUNCE_INTERVAL_MS = 1e3;
316
319
  var iterationContextStorage = new (0, _async_hooks.AsyncLocalStorage)();
317
320
  var targetContextStorage = new (0, _async_hooks.AsyncLocalStorage)();
318
- var Evaluation = class _Evaluation {
321
+ var Experiment = class _Experiment {
319
322
  constructor(name, options) {
320
323
  this.initialized = false;
321
324
  this.total = 0;
@@ -352,16 +355,17 @@ var Evaluation = class _Evaluation {
352
355
  * Initialize an evaluation session
353
356
  */
354
357
  static async init(name, options) {
355
- const evaluation = new _Evaluation(name, options);
356
- await evaluation.initialize();
357
- return evaluation;
358
+ _chunkRM2VUAFLjs.ensureSetup.call(void 0, );
359
+ const experiment = new _Experiment(name, options);
360
+ await experiment.initialize();
361
+ return experiment;
358
362
  }
359
363
  /**
360
364
  * Initialize the evaluation by creating/getting the experiment
361
365
  */
362
366
  async initialize() {
363
367
  if (!this.apiKey) {
364
- throw new EvaluationInitError(
368
+ throw new ExperimentInitError(
365
369
  "API key is required. Set LANGWATCH_API_KEY or pass apiKey to LangWatch constructor."
366
370
  );
367
371
  }
@@ -379,11 +383,11 @@ var Evaluation = class _Evaluation {
379
383
  })
380
384
  });
381
385
  if (response.status === 401) {
382
- throw new EvaluationInitError("Invalid API key");
386
+ throw new ExperimentInitError("Invalid API key");
383
387
  }
384
388
  if (!response.ok) {
385
389
  const text = await response.text();
386
- throw new EvaluationInitError(`Failed to initialize experiment: ${text}`);
390
+ throw new ExperimentInitError(`Failed to initialize experiment: ${text}`);
387
391
  }
388
392
  const data = await response.json();
389
393
  this.experimentSlug = data.slug;
@@ -391,10 +395,10 @@ var Evaluation = class _Evaluation {
391
395
  console.log(`Follow results at: ${this.endpoint}${data.path}?runId=${encodedRunId}`);
392
396
  this.initialized = true;
393
397
  } catch (error) {
394
- if (error instanceof EvaluationInitError) {
398
+ if (error instanceof ExperimentInitError) {
395
399
  throw error;
396
400
  }
397
- throw new EvaluationInitError(
401
+ throw new ExperimentInitError(
398
402
  `Failed to initialize evaluation: ${error instanceof Error ? error.message : String(error)}`,
399
403
  error instanceof Error ? error : void 0
400
404
  );
@@ -841,17 +845,16 @@ var Evaluation = class _Evaluation {
841
845
  name: this.name,
842
846
  run_id: this.runId,
843
847
  dataset: this.batch.dataset.map((entry) => {
844
- var _a, _b, _c;
845
- return {
848
+ var _a, _b;
849
+ return _chunkOHM7JUMRjs.__spreadValues.call(void 0, {
846
850
  index: entry.index,
847
851
  entry: entry.entry,
848
852
  duration: entry.duration,
849
853
  error: entry.error,
850
854
  trace_id: entry.trace_id,
851
855
  target_id: (_a = entry.target_id) != null ? _a : null,
852
- cost: (_b = entry.cost) != null ? _b : null,
853
- predicted: (_c = entry.predicted) != null ? _c : null
854
- };
856
+ cost: (_b = entry.cost) != null ? _b : null
857
+ }, entry.predicted !== void 0 && entry.predicted !== null ? { predicted: entry.predicted } : {});
855
858
  }),
856
859
  evaluations: this.batch.evaluations.map((e) => ({
857
860
  name: e.name,
@@ -943,38 +946,497 @@ var Evaluation = class _Evaluation {
943
946
  }
944
947
  };
945
948
 
946
- // src/client-sdk/services/evaluation/evaluation.facade.ts
947
- var EvaluationFacade = class {
949
+ // src/client-sdk/services/experiments/platformErrors.ts
950
+ var ExperimentsError = class extends Error {
951
+ constructor(message) {
952
+ super(message);
953
+ this.name = "ExperimentsError";
954
+ }
955
+ };
956
+ var ExperimentNotFoundError = class extends ExperimentsError {
957
+ constructor(slug) {
958
+ super(`Experiment not found: ${slug}`);
959
+ this.name = "ExperimentNotFoundError";
960
+ }
961
+ };
962
+ var ExperimentTimeoutError = class extends ExperimentsError {
963
+ constructor(runId, progress, total) {
964
+ super(`Experiment run timed out: ${runId} (${progress}/${total} completed)`);
965
+ this.name = "ExperimentTimeoutError";
966
+ this.runId = runId;
967
+ this.progress = progress;
968
+ this.total = total;
969
+ }
970
+ };
971
+ var ExperimentRunFailedError = class extends ExperimentsError {
972
+ constructor(runId, errorMessage) {
973
+ super(`Experiment run failed: ${errorMessage}`);
974
+ this.name = "ExperimentRunFailedError";
975
+ this.runId = runId;
976
+ this.errorMessage = errorMessage;
977
+ }
978
+ };
979
+ var ExperimentsApiError = class extends ExperimentsError {
980
+ constructor(message, statusCode) {
981
+ super(message);
982
+ this.name = "ExperimentsApiError";
983
+ this.statusCode = statusCode;
984
+ }
985
+ };
986
+
987
+ // src/client-sdk/services/experiments/experiments.facade.ts
988
+ var DEFAULT_POLL_INTERVAL = 2e3;
989
+ var DEFAULT_TIMEOUT = 6e5;
990
+ var ExperimentsFacade = class {
948
991
  constructor(config) {
949
992
  this.config = config;
950
993
  }
951
994
  /**
952
- * Initialize a new evaluation session
995
+ * Initialize a new experiment session (SDK-defined)
953
996
  *
954
997
  * @param name - Name of the experiment (used as slug)
955
998
  * @param options - Optional configuration
956
- * @returns An initialized Evaluation instance
999
+ * @returns An initialized Experiment instance
957
1000
  *
958
1001
  * @example
959
1002
  * ```typescript
960
- * const evaluation = await langwatch.evaluation.init('my-experiment');
1003
+ * const experiment = await langwatch.experiments.init('my-experiment');
961
1004
  *
962
- * await evaluation.run(dataset, async ({ item, index }) => {
1005
+ * await experiment.run(dataset, async ({ item, index }) => {
963
1006
  * const response = await myAgent(item.question);
964
- * evaluation.log('accuracy', { index, score: 0.95 });
1007
+ * experiment.log('accuracy', { index, score: 0.95 });
965
1008
  * });
966
1009
  * ```
967
1010
  */
968
1011
  async init(name, options) {
969
- return Evaluation.init(name, _chunkOHM7JUMRjs.__spreadValues.call(void 0, {
1012
+ return Experiment.init(name, _chunkOHM7JUMRjs.__spreadValues.call(void 0, {
970
1013
  apiClient: this.config.langwatchApiClient,
971
1014
  endpoint: this.config.endpoint,
972
1015
  apiKey: this.config.apiKey,
973
1016
  logger: this.config.logger
974
1017
  }, options));
975
1018
  }
1019
+ /**
1020
+ * Run a platform-configured experiment (Experiments Workbench)
1021
+ *
1022
+ * This runs an experiment that was configured in the LangWatch platform.
1023
+ * The method automatically prints a summary and exits with code 1 on failure
1024
+ * (unless `exitOnFailure: false` is passed).
1025
+ *
1026
+ * @param slug - The slug of the experiment (found in the experiment URL)
1027
+ * @param options - Optional configuration
1028
+ * @returns The experiment results including pass rate and summary
1029
+ *
1030
+ * @example
1031
+ * ```typescript
1032
+ * import { LangWatch } from "langwatch";
1033
+ *
1034
+ * const langwatch = new LangWatch();
1035
+ *
1036
+ * const result = await langwatch.experiments.run("my-experiment-slug");
1037
+ * result.printSummary();
1038
+ * ```
1039
+ */
1040
+ async run(slug, options) {
1041
+ this.config.logger.info(`Running platform experiment: ${slug}`);
1042
+ const result = await this.runWithPolling(slug, options);
1043
+ return result;
1044
+ }
1045
+ /**
1046
+ * Run an experiment and wait for completion using polling
1047
+ */
1048
+ async runWithPolling(slug, options = {}) {
1049
+ var _a, _b, _c, _d, _e, _f, _g;
1050
+ const pollInterval = (_a = options.pollInterval) != null ? _a : DEFAULT_POLL_INTERVAL;
1051
+ const timeout = (_b = options.timeout) != null ? _b : DEFAULT_TIMEOUT;
1052
+ const startResponse = await this.startRun(slug);
1053
+ const { runId } = startResponse;
1054
+ const apiRunUrl = (_c = startResponse.runUrl) != null ? _c : "";
1055
+ const runUrl = apiRunUrl ? this.replaceUrlDomain(apiRunUrl, this.config.endpoint) : "";
1056
+ console.log(`Started experiment run: ${runId}`);
1057
+ if (runUrl) {
1058
+ console.log(`Follow live: ${runUrl}`);
1059
+ }
1060
+ const total = startResponse.total;
1061
+ let lastProgress = 0;
1062
+ if (total > 0) {
1063
+ process.stdout.write(`Progress: 0/${total} (0%)`);
1064
+ }
1065
+ (_d = options.onProgress) == null ? void 0 : _d.call(options, 0, total);
1066
+ const startTime = Date.now();
1067
+ while (true) {
1068
+ if (Date.now() - startTime > timeout) {
1069
+ console.log();
1070
+ const finalStatus = await this.getRunStatus(runId);
1071
+ throw new ExperimentTimeoutError(runId, finalStatus.progress, finalStatus.total);
1072
+ }
1073
+ await this.sleep(pollInterval);
1074
+ const status = await this.getRunStatus(runId);
1075
+ const progress = status.progress;
1076
+ if (progress !== lastProgress && status.total > 0) {
1077
+ const percentage = Math.round(progress / status.total * 100);
1078
+ process.stdout.write(`\rProgress: ${progress}/${status.total} (${percentage}%)`);
1079
+ lastProgress = progress;
1080
+ }
1081
+ (_e = options.onProgress) == null ? void 0 : _e.call(options, status.progress, status.total);
1082
+ if (status.status === "completed") {
1083
+ console.log();
1084
+ const summary = status.summary;
1085
+ return this.buildResult(runId, "completed", summary, runUrl != null ? runUrl : "");
1086
+ }
1087
+ if (status.status === "failed") {
1088
+ console.log();
1089
+ throw new ExperimentRunFailedError(runId, (_f = status.error) != null ? _f : "Unknown error");
1090
+ }
1091
+ if (status.status === "stopped") {
1092
+ console.log();
1093
+ return this.buildResult(runId, "stopped", (_g = status.summary) != null ? _g : {
1094
+ runId,
1095
+ totalCells: status.total,
1096
+ completedCells: status.progress,
1097
+ failedCells: 0,
1098
+ duration: Date.now() - startTime
1099
+ }, runUrl != null ? runUrl : "");
1100
+ }
1101
+ }
1102
+ }
1103
+ /**
1104
+ * Start an experiment run
1105
+ */
1106
+ async startRun(slug) {
1107
+ const response = await this.config.langwatchApiClient.POST(
1108
+ "/api/evaluations/v3/{slug}/run",
1109
+ {
1110
+ params: {
1111
+ path: { slug }
1112
+ }
1113
+ }
1114
+ );
1115
+ if (response.error) {
1116
+ const status = response.response.status;
1117
+ if (status === 404) {
1118
+ throw new ExperimentNotFoundError(slug);
1119
+ }
1120
+ if (status === 401) {
1121
+ throw new ExperimentsApiError("Unauthorized - check your API key", 401);
1122
+ }
1123
+ const errorMessage = "error" in response.error ? response.error.error : `Failed to start experiment: ${slug}`;
1124
+ throw new ExperimentsApiError(errorMessage != null ? errorMessage : `HTTP ${status}`, status);
1125
+ }
1126
+ return response.data;
1127
+ }
1128
+ /**
1129
+ * Get the status of a run
1130
+ */
1131
+ async getRunStatus(runId) {
1132
+ const response = await this.config.langwatchApiClient.GET(
1133
+ "/api/evaluations/v3/runs/{runId}",
1134
+ {
1135
+ params: {
1136
+ path: { runId }
1137
+ }
1138
+ }
1139
+ );
1140
+ if (response.error) {
1141
+ const status = response.response.status;
1142
+ if (status === 404) {
1143
+ throw new ExperimentsApiError(`Run not found: ${runId}`, 404);
1144
+ }
1145
+ if (status === 401) {
1146
+ throw new ExperimentsApiError("Unauthorized - check your API key", 401);
1147
+ }
1148
+ const errorMessage = "error" in response.error ? response.error.error : `Failed to get run status: ${runId}`;
1149
+ throw new ExperimentsApiError(errorMessage != null ? errorMessage : `HTTP ${status}`, status);
1150
+ }
1151
+ return response.data;
1152
+ }
1153
+ /**
1154
+ * Build the result object from API response
1155
+ */
1156
+ buildResult(runId, status, summary, runUrl) {
1157
+ var _a, _b, _c, _d, _e, _f;
1158
+ const completedCells = (_a = summary.completedCells) != null ? _a : 0;
1159
+ const failedCells = (_b = summary.failedCells) != null ? _b : 0;
1160
+ const duration = (_c = summary.duration) != null ? _c : 0;
1161
+ const totalPassed = (_d = summary.totalPassed) != null ? _d : completedCells - failedCells;
1162
+ const totalFailed = (_e = summary.totalFailed) != null ? _e : failedCells;
1163
+ const passRate = (_f = summary.passRate) != null ? _f : completedCells > 0 ? totalPassed / completedCells * 100 : 0;
1164
+ return {
1165
+ runId,
1166
+ status,
1167
+ passed: totalPassed,
1168
+ failed: totalFailed,
1169
+ passRate,
1170
+ duration,
1171
+ runUrl,
1172
+ // Always use the endpoint-based URL we constructed
1173
+ summary,
1174
+ printSummary: (exitOnFailure = true) => {
1175
+ var _a2;
1176
+ this.printSummary({
1177
+ runId,
1178
+ status,
1179
+ passed: totalPassed,
1180
+ failed: totalFailed,
1181
+ passRate,
1182
+ duration,
1183
+ runUrl: (_a2 = summary.runUrl) != null ? _a2 : runUrl,
1184
+ summary
1185
+ });
1186
+ if (exitOnFailure && totalFailed > 0) {
1187
+ process.exit(1);
1188
+ }
1189
+ }
1190
+ };
1191
+ }
1192
+ /**
1193
+ * Print a CI-friendly summary of the experiment results
1194
+ */
1195
+ printSummary(result) {
1196
+ const { runId, status, passed, failed, passRate, duration, runUrl, summary } = result;
1197
+ console.log("\n" + "\u2550".repeat(60));
1198
+ console.log(" EXPERIMENT RESULTS");
1199
+ console.log("\u2550".repeat(60));
1200
+ console.log(` Run ID: ${runId}`);
1201
+ console.log(` Status: ${status.toUpperCase()}`);
1202
+ console.log(` Duration: ${(duration / 1e3).toFixed(1)}s`);
1203
+ console.log("\u2500".repeat(60));
1204
+ console.log(` Passed: ${passed}`);
1205
+ console.log(` Failed: ${failed}`);
1206
+ console.log(` Pass Rate: ${passRate.toFixed(1)}%`);
1207
+ if (summary.targets && summary.targets.length > 0) {
1208
+ console.log("\u2500".repeat(60));
1209
+ console.log(" TARGETS:");
1210
+ for (const target of summary.targets) {
1211
+ console.log(` ${target.name}: ${target.passed} passed, ${target.failed} failed`);
1212
+ if (target.avgLatency) {
1213
+ console.log(` Avg latency: ${target.avgLatency.toFixed(0)}ms`);
1214
+ }
1215
+ if (target.totalCost) {
1216
+ console.log(` Total cost: $${target.totalCost.toFixed(4)}`);
1217
+ }
1218
+ }
1219
+ }
1220
+ if (summary.evaluators && summary.evaluators.length > 0) {
1221
+ console.log("\u2500".repeat(60));
1222
+ console.log(" EVALUATORS:");
1223
+ for (const evaluator of summary.evaluators) {
1224
+ console.log(
1225
+ ` ${evaluator.name}: ${evaluator.passRate.toFixed(1)}% pass rate`
1226
+ );
1227
+ if (evaluator.avgScore !== void 0) {
1228
+ console.log(` Avg score: ${evaluator.avgScore.toFixed(2)}`);
1229
+ }
1230
+ }
1231
+ }
1232
+ console.log("\u2500".repeat(60));
1233
+ console.log(` View details: ${runUrl}`);
1234
+ console.log("\u2550".repeat(60) + "\n");
1235
+ }
1236
+ sleep(ms) {
1237
+ return new Promise((resolve) => setTimeout(resolve, ms));
1238
+ }
1239
+ /**
1240
+ * Replace the domain of a URL with a new base URL, preserving the path
1241
+ */
1242
+ replaceUrlDomain(url, newBase) {
1243
+ if (!url) return url;
1244
+ try {
1245
+ const parsedUrl = new URL(url);
1246
+ const parsedNewBase = new URL(newBase);
1247
+ return `${parsedNewBase.origin}${parsedUrl.pathname}${parsedUrl.search}${parsedUrl.hash}`;
1248
+ } catch (e) {
1249
+ return url;
1250
+ }
1251
+ }
976
1252
  };
977
1253
 
1254
+ // src/client-sdk/services/evaluations/evaluations.facade.ts
1255
+
1256
+
1257
+ // src/client-sdk/services/evaluations/errors.ts
1258
+ var EvaluationError = class extends Error {
1259
+ constructor(message) {
1260
+ super(message);
1261
+ this.name = "EvaluationError";
1262
+ }
1263
+ };
1264
+ var EvaluatorCallError = class extends EvaluationError {
1265
+ constructor(evaluatorSlug, message, statusCode) {
1266
+ super(`Evaluator '${evaluatorSlug}' failed: ${message}`);
1267
+ this.name = "EvaluatorCallError";
1268
+ this.evaluatorSlug = evaluatorSlug;
1269
+ this.statusCode = statusCode;
1270
+ }
1271
+ };
1272
+ var EvaluatorNotFoundError = class extends EvaluationError {
1273
+ constructor(evaluatorSlug) {
1274
+ super(`Evaluator not found: ${evaluatorSlug}`);
1275
+ this.name = "EvaluatorNotFoundError";
1276
+ this.evaluatorSlug = evaluatorSlug;
1277
+ }
1278
+ };
1279
+ var EvaluationsApiError = class extends EvaluationError {
1280
+ constructor(message, statusCode) {
1281
+ super(message);
1282
+ this.name = "EvaluationsApiError";
1283
+ this.statusCode = statusCode;
1284
+ }
1285
+ };
1286
+
1287
+ // src/client-sdk/services/evaluations/evaluations.facade.ts
1288
+ var _endpoint, _apiKey, _logger;
1289
+ var EvaluationsFacade = class {
1290
+ constructor(config) {
1291
+ _chunkOHM7JUMRjs.__privateAdd.call(void 0, this, _endpoint);
1292
+ _chunkOHM7JUMRjs.__privateAdd.call(void 0, this, _apiKey);
1293
+ _chunkOHM7JUMRjs.__privateAdd.call(void 0, this, _logger);
1294
+ /**
1295
+ * Run an evaluator or guardrail against provided data
1296
+ *
1297
+ * Creates an OpenTelemetry span attached to the current trace context,
1298
+ * calls the LangWatch evaluation API, and returns the result.
1299
+ *
1300
+ * @param slug - The evaluator slug (e.g., "presidio/pii_detection", "langevals/llm_boolean")
1301
+ * @param options - Evaluation options including data, name, settings, and asGuardrail flag
1302
+ * @returns The evaluation result with status, passed, score, details, label, and cost
1303
+ *
1304
+ * @example
1305
+ * ```typescript
1306
+ * // Run as a guardrail (synchronous evaluation that can block responses)
1307
+ * const guardrail = await langwatch.evaluations.evaluate("presidio/pii_detection", {
1308
+ * data: { input: userInput, output: generatedResponse },
1309
+ * name: "PII Detection Guardrail",
1310
+ * asGuardrail: true,
1311
+ * });
1312
+ *
1313
+ * if (!guardrail.passed) {
1314
+ * console.log("PII detected:", guardrail.details);
1315
+ * return "Sorry, I cannot process that request.";
1316
+ * }
1317
+ * ```
1318
+ *
1319
+ * @example
1320
+ * ```typescript
1321
+ * // Run as an online evaluation (async scoring for monitoring)
1322
+ * const result = await langwatch.evaluations.evaluate("langevals/llm_boolean", {
1323
+ * data: { input: question, output: response },
1324
+ * name: "Quality Check",
1325
+ * settings: { prompt: "Check if the response answers the question." },
1326
+ * });
1327
+ *
1328
+ * console.log("Score:", result.score);
1329
+ * console.log("Details:", result.details);
1330
+ * ```
1331
+ */
1332
+ this.evaluate = async (slug, options) => {
1333
+ var _a;
1334
+ const { data, name, settings, asGuardrail } = options;
1335
+ const spanName = name != null ? name : slug;
1336
+ const spanType = asGuardrail ? "guardrail" : "evaluation";
1337
+ const tracer2 = _api.trace.getTracer("langwatch-evaluations");
1338
+ const activeSpan = _api.trace.getActiveSpan();
1339
+ const traceId = activeSpan ? activeSpan.spanContext().traceId : void 0;
1340
+ const parentSpanId = activeSpan ? activeSpan.spanContext().spanId : void 0;
1341
+ const otelSpan = tracer2.startSpan(
1342
+ spanName,
1343
+ {
1344
+ attributes: {
1345
+ "langwatch.span.type": spanType
1346
+ }
1347
+ },
1348
+ _api.context.active()
1349
+ );
1350
+ const langwatchSpan = _chunkONXIZKC6js.createLangWatchSpan.call(void 0, otelSpan);
1351
+ langwatchSpan.setType(spanType);
1352
+ langwatchSpan.setInput(_chunkOHM7JUMRjs.__spreadValues.call(void 0, {
1353
+ data
1354
+ }, settings && { settings }));
1355
+ try {
1356
+ const requestBody = {
1357
+ trace_id: traceId != null ? traceId : null,
1358
+ span_id: parentSpanId != null ? parentSpanId : null,
1359
+ name: name != null ? name : null,
1360
+ data,
1361
+ settings,
1362
+ as_guardrail: asGuardrail
1363
+ };
1364
+ const url = `${_chunkOHM7JUMRjs.__privateGet.call(void 0, this, _endpoint)}/api/evaluations/${slug}/evaluate`;
1365
+ _chunkOHM7JUMRjs.__privateGet.call(void 0, this, _logger).debug(`Calling evaluation API: ${url}`);
1366
+ const response = await fetch(url, {
1367
+ method: "POST",
1368
+ headers: {
1369
+ "Content-Type": "application/json",
1370
+ "X-Auth-Token": _chunkOHM7JUMRjs.__privateGet.call(void 0, this, _apiKey)
1371
+ },
1372
+ body: JSON.stringify(requestBody)
1373
+ });
1374
+ if (!response.ok) {
1375
+ const errorText = await response.text();
1376
+ if (response.status === 404) {
1377
+ throw new EvaluatorNotFoundError(slug);
1378
+ }
1379
+ throw new EvaluationsApiError(
1380
+ `Evaluation API returned ${response.status}: ${errorText}`,
1381
+ response.status
1382
+ );
1383
+ }
1384
+ const responseData = await response.json();
1385
+ const result = _chunkOHM7JUMRjs.__spreadValues.call(void 0, _chunkOHM7JUMRjs.__spreadValues.call(void 0, _chunkOHM7JUMRjs.__spreadValues.call(void 0, _chunkOHM7JUMRjs.__spreadValues.call(void 0, _chunkOHM7JUMRjs.__spreadValues.call(void 0, {
1386
+ status: responseData.status
1387
+ }, responseData.passed !== null && responseData.passed !== void 0 && { passed: responseData.passed }), responseData.score !== null && responseData.score !== void 0 && { score: responseData.score }), responseData.details !== null && responseData.details !== void 0 && { details: responseData.details }), responseData.label !== null && responseData.label !== void 0 && { label: responseData.label }), responseData.cost !== null && responseData.cost !== void 0 && { cost: responseData.cost });
1388
+ langwatchSpan.setOutput({
1389
+ type: asGuardrail ? "guardrail_result" : "evaluation_result",
1390
+ value: result
1391
+ });
1392
+ if (result.status === "error") {
1393
+ otelSpan.setStatus({
1394
+ code: _api.SpanStatusCode.ERROR,
1395
+ message: (_a = result.details) != null ? _a : "Evaluation failed"
1396
+ });
1397
+ } else {
1398
+ otelSpan.setStatus({ code: _api.SpanStatusCode.OK });
1399
+ }
1400
+ return result;
1401
+ } catch (error) {
1402
+ const errorResult = {
1403
+ status: "error",
1404
+ details: error instanceof Error ? error.message : String(error)
1405
+ };
1406
+ if (asGuardrail) {
1407
+ errorResult.passed = true;
1408
+ }
1409
+ langwatchSpan.setOutput({
1410
+ type: asGuardrail ? "guardrail_result" : "evaluation_result",
1411
+ value: errorResult
1412
+ });
1413
+ otelSpan.setStatus({
1414
+ code: _api.SpanStatusCode.ERROR,
1415
+ message: errorResult.details
1416
+ });
1417
+ if (error instanceof Error) {
1418
+ otelSpan.recordException(error);
1419
+ }
1420
+ if (error instanceof EvaluatorNotFoundError || error instanceof EvaluationsApiError || error instanceof EvaluatorCallError) {
1421
+ throw error;
1422
+ }
1423
+ throw new EvaluatorCallError(
1424
+ slug,
1425
+ error instanceof Error ? error.message : String(error)
1426
+ );
1427
+ } finally {
1428
+ otelSpan.end();
1429
+ }
1430
+ };
1431
+ _chunkOHM7JUMRjs.__privateSet.call(void 0, this, _endpoint, config.endpoint);
1432
+ _chunkOHM7JUMRjs.__privateSet.call(void 0, this, _apiKey, config.apiKey);
1433
+ _chunkOHM7JUMRjs.__privateSet.call(void 0, this, _logger, config.logger);
1434
+ }
1435
+ };
1436
+ _endpoint = new WeakMap();
1437
+ _apiKey = new WeakMap();
1438
+ _logger = new WeakMap();
1439
+
978
1440
  // src/client-sdk/services/traces/types.ts
979
1441
  var TracesError = class extends Error {
980
1442
  constructor(message, operation, originalError) {
@@ -986,13 +1448,13 @@ var TracesError = class extends Error {
986
1448
  };
987
1449
 
988
1450
  // src/client-sdk/services/traces/tracing/tracer.ts
989
- var tracer = _chunkONXIZKC6js.getLangWatchTracer.call(void 0, `${_chunkN7PJJMU2js.LANGWATCH_SDK_NAME_CLIENT}.traces`, _chunkN7PJJMU2js.LANGWATCH_SDK_VERSION);
1451
+ var tracer = _chunkONXIZKC6js.getLangWatchTracer.call(void 0, `${_chunkNPFWFQK6js.LANGWATCH_SDK_NAME_CLIENT}.traces`, _chunkNPFWFQK6js.LANGWATCH_SDK_VERSION);
990
1452
 
991
1453
  // src/client-sdk/services/traces/service.ts
992
1454
  var TracesService = class {
993
1455
  constructor(config) {
994
1456
  this.config = config;
995
- return _chunkD4H6PR6Hjs.createTracingProxy.call(void 0,
1457
+ return _chunkDXBTJGCKjs.createTracingProxy.call(void 0,
996
1458
  this,
997
1459
  tracer
998
1460
  );
@@ -1055,18 +1517,18 @@ var LangWatch = class {
1055
1517
  _chunkOHM7JUMRjs.__privateAdd.call(void 0, this, _LangWatch_instances);
1056
1518
  var _a, _b, _c, _d;
1057
1519
  const apiKey = (_b = (_a = options.apiKey) != null ? _a : process.env.LANGWATCH_API_KEY) != null ? _b : "";
1058
- const endpoint = (_d = (_c = options.endpoint) != null ? _c : process.env.LANGWATCH_ENDPOINT) != null ? _d : _chunkN7PJJMU2js.DEFAULT_ENDPOINT;
1520
+ const endpoint = (_d = (_c = options.endpoint) != null ? _c : process.env.LANGWATCH_ENDPOINT) != null ? _d : _chunkNPFWFQK6js.DEFAULT_ENDPOINT;
1059
1521
  this.config = _chunkOHM7JUMRjs.__privateMethod.call(void 0, this, _LangWatch_instances, createInternalConfig_fn).call(this, {
1060
1522
  apiKey,
1061
1523
  endpoint,
1062
1524
  options: options.options
1063
1525
  });
1064
- this.prompts = new (0, _chunkD4H6PR6Hjs.PromptsFacade)(_chunkOHM7JUMRjs.__spreadValues.call(void 0, {
1065
- promptsApiService: new (0, _chunkD4H6PR6Hjs.PromptsApiService)(this.config),
1066
- localPromptsService: new (0, _chunkD4H6PR6Hjs.LocalPromptsService)()
1526
+ this.prompts = new (0, _chunkDXBTJGCKjs.PromptsFacade)(_chunkOHM7JUMRjs.__spreadValues.call(void 0, {
1527
+ promptsApiService: new (0, _chunkDXBTJGCKjs.PromptsApiService)(this.config),
1528
+ localPromptsService: new (0, _chunkDXBTJGCKjs.LocalPromptsService)()
1067
1529
  }, this.config));
1068
1530
  this.traces = new TracesFacade(this.config);
1069
- this.evaluation = new EvaluationFacade({
1531
+ this.experiments = new ExperimentsFacade({
1070
1532
  langwatchApiClient: this.config.langwatchApiClient,
1071
1533
  endpoint: this.config.endpoint,
1072
1534
  apiKey: this.config.apiKey,
@@ -1076,6 +1538,11 @@ var LangWatch = class {
1076
1538
  langwatchApiClient: this.config.langwatchApiClient,
1077
1539
  logger: this.config.logger
1078
1540
  });
1541
+ this.evaluations = new EvaluationsFacade({
1542
+ endpoint: this.config.endpoint,
1543
+ apiKey: this.config.apiKey,
1544
+ logger: this.config.logger
1545
+ });
1079
1546
  }
1080
1547
  get apiClient() {
1081
1548
  return this.config.langwatchApiClient;
@@ -1090,7 +1557,7 @@ createInternalConfig_fn = function({
1090
1557
  var _a;
1091
1558
  return {
1092
1559
  logger: (_a = options == null ? void 0 : options.logger) != null ? _a : new (0, _chunk5MQQRSVMjs.NoOpLogger)(),
1093
- langwatchApiClient: _chunkD4H6PR6Hjs.createLangWatchApiClient.call(void 0, apiKey, endpoint),
1560
+ langwatchApiClient: _chunkDXBTJGCKjs.createLangWatchApiClient.call(void 0, apiKey, endpoint),
1094
1561
  endpoint,
1095
1562
  apiKey
1096
1563
  };
@@ -1117,5 +1584,10 @@ var logger = {
1117
1584
 
1118
1585
 
1119
1586
 
1120
- exports.Evaluation = Evaluation; exports.EvaluationApiError = EvaluationApiError; exports.EvaluationError = EvaluationError; exports.EvaluationFacade = EvaluationFacade; exports.EvaluationInitError = EvaluationInitError; exports.EvaluatorError = EvaluatorError; exports.FetchPolicy = _chunkD4H6PR6Hjs.FetchPolicy; exports.FilterableBatchSpanProcessor = _chunkASTAIRXGjs.FilterableBatchSpanProcessor; exports.LangWatch = LangWatch; exports.LangWatchExporter = _chunkJQYW7RY7js.LangWatchTraceExporter; exports.TargetMetadataConflictError = TargetMetadataConflictError; exports.attributes = _chunk5MQQRSVMjs.attributes_exports; exports.getLangWatchLogger = _chunkJQYW7RY7js.getLangWatchLogger; exports.getLangWatchTracer = _chunkONXIZKC6js.getLangWatchTracer; exports.logger = logger;
1587
+
1588
+
1589
+
1590
+
1591
+
1592
+ exports.EvaluationError = EvaluationError; exports.EvaluationsApiError = EvaluationsApiError; exports.EvaluationsFacade = EvaluationsFacade; exports.EvaluatorCallError = EvaluatorCallError; exports.EvaluatorError = EvaluatorError; exports.EvaluatorNotFoundError = EvaluatorNotFoundError; exports.Experiment = Experiment; exports.ExperimentApiError = ExperimentApiError; exports.ExperimentError = ExperimentError; exports.ExperimentInitError = ExperimentInitError; exports.ExperimentsFacade = ExperimentsFacade; exports.FetchPolicy = _chunkDXBTJGCKjs.FetchPolicy; exports.FilterableBatchSpanProcessor = _chunkASTAIRXGjs.FilterableBatchSpanProcessor; exports.LangWatch = LangWatch; exports.LangWatchExporter = _chunk4BNGSDYWjs.LangWatchTraceExporter; exports.TargetMetadataConflictError = TargetMetadataConflictError; exports.attributes = _chunk5MQQRSVMjs.attributes_exports; exports.getLangWatchLogger = _chunk4BNGSDYWjs.getLangWatchLogger; exports.getLangWatchTracer = _chunkONXIZKC6js.getLangWatchTracer; exports.logger = logger;
1121
1593
  //# sourceMappingURL=index.js.map