langwatch 0.10.0 → 0.12.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (75) hide show
  1. package/dist/{add-5FRWEQ32.mjs → add-2UHFYNUA.mjs} +8 -8
  2. package/dist/add-2UHFYNUA.mjs.map +1 -0
  3. package/dist/{add-CXUS4ZSQ.js → add-LUETMKBD.js} +11 -11
  4. package/dist/add-LUETMKBD.js.map +1 -0
  5. package/dist/{chunk-CKIZDPIJ.js → chunk-5MQQRSVM.js} +1 -1
  6. package/dist/{chunk-CKIZDPIJ.js.map → chunk-5MQQRSVM.js.map} +1 -1
  7. package/dist/{chunk-NM5OKM7F.js → chunk-6SSCBYJM.js} +21 -20
  8. package/dist/chunk-6SSCBYJM.js.map +1 -0
  9. package/dist/{chunk-SNDTNU3T.js → chunk-ASTAIRXG.js} +2 -2
  10. package/dist/{chunk-SNDTNU3T.js.map → chunk-ASTAIRXG.js.map} +1 -1
  11. package/dist/{chunk-BTCJWUS5.js → chunk-BQRUUTN3.js} +17 -17
  12. package/dist/{chunk-BTCJWUS5.js.map → chunk-BQRUUTN3.js.map} +1 -1
  13. package/dist/{chunk-YWO3NE5A.js → chunk-C4XUWCQR.js} +2 -2
  14. package/dist/{chunk-YWO3NE5A.js.map → chunk-C4XUWCQR.js.map} +1 -1
  15. package/dist/{chunk-WHPBZSTS.mjs → chunk-IIUI2XYW.mjs} +2 -2
  16. package/dist/{chunk-A43BYF5Q.js → chunk-ONXIZKC6.js} +11 -11
  17. package/dist/{chunk-A43BYF5Q.js.map → chunk-ONXIZKC6.js.map} +1 -1
  18. package/dist/{chunk-I3X7VMSP.mjs → chunk-OTID7S7K.mjs} +8 -7
  19. package/dist/chunk-OTID7S7K.mjs.map +1 -0
  20. package/dist/{chunk-I2SOBPAF.mjs → chunk-RSIPLYVA.mjs} +1 -1
  21. package/dist/{chunk-I2SOBPAF.mjs.map → chunk-RSIPLYVA.mjs.map} +1 -1
  22. package/dist/{chunk-W6FD5ZO3.mjs → chunk-TB5KB737.mjs} +2 -2
  23. package/dist/{chunk-W6FD5ZO3.mjs.map → chunk-TB5KB737.mjs.map} +1 -1
  24. package/dist/{chunk-VGVWXKVM.mjs → chunk-WCNDT5SD.mjs} +3 -3
  25. package/dist/{chunk-FEL5FLHA.mjs → chunk-ZEPKV5YO.mjs} +2 -2
  26. package/dist/cli/index.js +6 -6
  27. package/dist/cli/index.mjs +6 -6
  28. package/dist/{implementation-CPxv2BdW.d.ts → implementation-Bnc8Aymq.d.ts} +1 -1
  29. package/dist/{implementation-CVrmD0bz.d.mts → implementation-Ck58nRkT.d.mts} +1 -1
  30. package/dist/index.d.mts +666 -3
  31. package/dist/index.d.ts +666 -3
  32. package/dist/index.js +1249 -17
  33. package/dist/index.js.map +1 -1
  34. package/dist/index.mjs +1241 -9
  35. package/dist/index.mjs.map +1 -1
  36. package/dist/{list-K6E3OGYH.js → list-7U3M64GY.js} +10 -10
  37. package/dist/{list-K6E3OGYH.js.map → list-7U3M64GY.js.map} +1 -1
  38. package/dist/{list-DQ6XLQCK.mjs → list-WV5LA6LD.mjs} +7 -7
  39. package/dist/{login-HX7WMLPL.js → login-B7DKMN7P.js} +4 -4
  40. package/dist/{login-HX7WMLPL.js.map → login-B7DKMN7P.js.map} +1 -1
  41. package/dist/{login-TJ2NCUAJ.mjs → login-QKRT6PXA.mjs} +3 -3
  42. package/dist/login-QKRT6PXA.mjs.map +1 -0
  43. package/dist/observability-sdk/index.d.mts +3 -3
  44. package/dist/observability-sdk/index.d.ts +3 -3
  45. package/dist/observability-sdk/index.js +6 -6
  46. package/dist/observability-sdk/index.js.map +1 -1
  47. package/dist/observability-sdk/index.mjs +10 -10
  48. package/dist/observability-sdk/instrumentation/langchain/index.d.mts +1 -1
  49. package/dist/observability-sdk/instrumentation/langchain/index.d.ts +1 -1
  50. package/dist/observability-sdk/instrumentation/langchain/index.js +16 -16
  51. package/dist/observability-sdk/instrumentation/langchain/index.mjs +2 -2
  52. package/dist/observability-sdk/setup/node/index.js +13 -13
  53. package/dist/observability-sdk/setup/node/index.mjs +3 -3
  54. package/dist/{remove-5ZOYQTF4.mjs → remove-2OGMXSTR.mjs} +7 -7
  55. package/dist/{remove-45A7WUCB.js → remove-A4DKCN7A.js} +9 -9
  56. package/dist/{remove-45A7WUCB.js.map → remove-A4DKCN7A.js.map} +1 -1
  57. package/dist/{sync-LL6TTFMS.mjs → sync-TNVCKWTC.mjs} +9 -9
  58. package/dist/sync-TNVCKWTC.mjs.map +1 -0
  59. package/dist/{sync-BE7XZC2A.js → sync-WRZXIBZS.js} +11 -11
  60. package/dist/sync-WRZXIBZS.js.map +1 -0
  61. package/dist/{types-Kts5RGLY.d.mts → types-5h2Im4pl.d.mts} +162 -0
  62. package/dist/{types-usU5mTCX.d.ts → types-fo-Ij9pl.d.ts} +162 -0
  63. package/package.json +125 -142
  64. package/dist/add-5FRWEQ32.mjs.map +0 -1
  65. package/dist/add-CXUS4ZSQ.js.map +0 -1
  66. package/dist/chunk-I3X7VMSP.mjs.map +0 -1
  67. package/dist/chunk-NM5OKM7F.js.map +0 -1
  68. package/dist/login-TJ2NCUAJ.mjs.map +0 -1
  69. package/dist/sync-BE7XZC2A.js.map +0 -1
  70. package/dist/sync-LL6TTFMS.mjs.map +0 -1
  71. /package/dist/{chunk-WHPBZSTS.mjs.map → chunk-IIUI2XYW.mjs.map} +0 -0
  72. /package/dist/{chunk-VGVWXKVM.mjs.map → chunk-WCNDT5SD.mjs.map} +0 -0
  73. /package/dist/{chunk-FEL5FLHA.mjs.map → chunk-ZEPKV5YO.mjs.map} +0 -0
  74. /package/dist/{list-DQ6XLQCK.mjs.map → list-WV5LA6LD.mjs.map} +0 -0
  75. /package/dist/{remove-5ZOYQTF4.mjs.map → remove-2OGMXSTR.mjs.map} +0 -0
package/dist/index.js CHANGED
@@ -5,27 +5,28 @@
5
5
 
6
6
 
7
7
 
8
- var _chunkNM5OKM7Fjs = require('./chunk-NM5OKM7F.js');
8
+ var _chunk6SSCBYJMjs = require('./chunk-6SSCBYJM.js');
9
9
 
10
10
 
11
- var _chunkSNDTNU3Tjs = require('./chunk-SNDTNU3T.js');
11
+ var _chunkASTAIRXGjs = require('./chunk-ASTAIRXG.js');
12
12
 
13
13
 
14
14
 
15
- var _chunkBTCJWUS5js = require('./chunk-BTCJWUS5.js');
15
+ var _chunkONXIZKC6js = require('./chunk-ONXIZKC6.js');
16
16
 
17
17
 
18
18
 
19
+ var _chunkBQRUUTN3js = require('./chunk-BQRUUTN3.js');
19
20
 
20
- var _chunkYWO3NE5Ajs = require('./chunk-YWO3NE5A.js');
21
21
 
22
22
 
23
- var _chunkA43BYF5Qjs = require('./chunk-A43BYF5Q.js');
24
23
 
24
+ var _chunkC4XUWCQRjs = require('./chunk-C4XUWCQR.js');
25
25
 
26
26
 
27
27
 
28
- var _chunkCKIZDPIJjs = require('./chunk-CKIZDPIJ.js');
28
+
29
+ var _chunk5MQQRSVMjs = require('./chunk-5MQQRSVM.js');
29
30
  require('./chunk-WAAQLJ67.js');
30
31
  require('./chunk-AZHZ4NB4.js');
31
32
 
@@ -36,6 +37,1218 @@ require('./chunk-AZHZ4NB4.js');
36
37
 
37
38
  var _chunkOHM7JUMRjs = require('./chunk-OHM7JUMR.js');
38
39
 
40
+ // src/client-sdk/services/datasets/errors.ts
41
+ var DatasetError = class extends Error {
42
+ constructor(message) {
43
+ super(message);
44
+ this.name = "DatasetError";
45
+ }
46
+ };
47
+ var DatasetNotFoundError = class extends DatasetError {
48
+ constructor(slugOrId) {
49
+ super(`Dataset not found: ${slugOrId}`);
50
+ this.name = "DatasetNotFoundError";
51
+ }
52
+ };
53
+ var DatasetApiError = class extends DatasetError {
54
+ constructor(message, status) {
55
+ super(message);
56
+ this.name = "DatasetApiError";
57
+ this.status = status;
58
+ }
59
+ };
60
+
61
+ // src/client-sdk/services/datasets/dataset.service.ts
62
+ var _config;
63
+ var DatasetService = class {
64
+ constructor(config) {
65
+ _chunkOHM7JUMRjs.__privateAdd.call(void 0, this, _config);
66
+ _chunkOHM7JUMRjs.__privateSet.call(void 0, this, _config, config);
67
+ }
68
+ /**
69
+ * Fetches a dataset by its slug or ID
70
+ *
71
+ * @param slugOrId - The slug or ID of the dataset
72
+ * @param options - Optional configuration
73
+ * @returns The dataset with all entries
74
+ */
75
+ async getDataset(slugOrId, _options) {
76
+ _chunkOHM7JUMRjs.__privateGet.call(void 0, this, _config).logger.debug(`Fetching dataset: ${slugOrId}`);
77
+ const response = await _chunkOHM7JUMRjs.__privateGet.call(void 0, this, _config).langwatchApiClient.GET(
78
+ "/api/dataset/{slugOrId}",
79
+ {
80
+ params: {
81
+ path: {
82
+ slugOrId
83
+ }
84
+ }
85
+ }
86
+ );
87
+ if (response.error) {
88
+ const status = response.response.status;
89
+ if (status === 404) {
90
+ throw new DatasetNotFoundError(slugOrId);
91
+ }
92
+ const errorMessage = "message" in response.error ? response.error.message : "error" in response.error ? response.error.error : `Failed to fetch dataset: ${slugOrId}`;
93
+ throw new DatasetApiError(errorMessage != null ? errorMessage : `HTTP ${status}`, status);
94
+ }
95
+ const data = response.data;
96
+ const entries = data.data.map((item) => ({
97
+ id: item.id,
98
+ datasetId: item.datasetId,
99
+ projectId: item.projectId,
100
+ entry: item.entry,
101
+ createdAt: item.createdAt,
102
+ updatedAt: item.updatedAt
103
+ }));
104
+ _chunkOHM7JUMRjs.__privateGet.call(void 0, this, _config).logger.debug(
105
+ `Fetched dataset ${slugOrId} with ${entries.length} entries`
106
+ );
107
+ return { entries };
108
+ }
109
+ };
110
+ _config = new WeakMap();
111
+
112
+ // src/client-sdk/services/datasets/datasets.facade.ts
113
+ var _datasetService;
114
+ var DatasetsFacade = class {
115
+ constructor(config) {
116
+ _chunkOHM7JUMRjs.__privateAdd.call(void 0, this, _datasetService);
117
+ /**
118
+ * Fetches a dataset by its slug or ID
119
+ *
120
+ * @param slugOrId - The slug or ID of the dataset to fetch
121
+ * @param options - Optional configuration
122
+ * @returns The dataset with all entries
123
+ *
124
+ * @example
125
+ * ```typescript
126
+ * // Get dataset by slug
127
+ * const dataset = await langwatch.datasets.get("product-qa");
128
+ *
129
+ * // Get dataset by ID
130
+ * const dataset = await langwatch.datasets.get("ds_abc123");
131
+ *
132
+ * // Typed dataset
133
+ * type MyDatasetEntry = { input: string; expected_output: string; };
134
+ * const dataset = await langwatch.datasets.get<MyDatasetEntry>("my-dataset");
135
+ *
136
+ * // Iterate over entries
137
+ * for (const entry of dataset.entries) {
138
+ * console.log(entry.entry.input); // typed as string
139
+ * }
140
+ * ```
141
+ */
142
+ this.get = (slugOrId, options) => {
143
+ return _chunkOHM7JUMRjs.__privateGet.call(void 0, this, _datasetService).getDataset(slugOrId, options);
144
+ };
145
+ _chunkOHM7JUMRjs.__privateSet.call(void 0, this, _datasetService, new DatasetService(config));
146
+ }
147
+ };
148
+ _datasetService = new WeakMap();
149
+
150
+ // src/client-sdk/services/evaluation/evaluation.ts
151
+ var _async_hooks = require('async_hooks');
152
+ var _api = require('@opentelemetry/api');
153
+
154
+ // src/client-sdk/services/evaluation/humanReadableId.ts
155
+ var ADJECTIVES = [
156
+ "swift",
157
+ "bright",
158
+ "calm",
159
+ "eager",
160
+ "bold",
161
+ "keen",
162
+ "warm",
163
+ "cool",
164
+ "wise",
165
+ "fair",
166
+ "glad",
167
+ "kind",
168
+ "neat",
169
+ "pure",
170
+ "safe",
171
+ "true",
172
+ "vast",
173
+ "wild",
174
+ "zesty",
175
+ "agile",
176
+ "brave",
177
+ "crisp",
178
+ "dense",
179
+ "epic",
180
+ "fresh",
181
+ "grand",
182
+ "happy",
183
+ "ideal",
184
+ "jolly",
185
+ "lively",
186
+ "merry",
187
+ "noble",
188
+ "proud",
189
+ "quick",
190
+ "rapid",
191
+ "sharp",
192
+ "smart",
193
+ "solid",
194
+ "sunny",
195
+ "vivid",
196
+ "gentle",
197
+ "silent",
198
+ "cosmic",
199
+ "golden",
200
+ "silver",
201
+ "ancient",
202
+ "modern",
203
+ "mighty",
204
+ "humble"
205
+ ];
206
+ var NOUNS = [
207
+ "fox",
208
+ "owl",
209
+ "bee",
210
+ "elk",
211
+ "hawk",
212
+ "lynx",
213
+ "wolf",
214
+ "bear",
215
+ "deer",
216
+ "dove",
217
+ "eagle",
218
+ "finch",
219
+ "heron",
220
+ "koala",
221
+ "lemur",
222
+ "moose",
223
+ "otter",
224
+ "panda",
225
+ "raven",
226
+ "robin",
227
+ "seal",
228
+ "swan",
229
+ "tiger",
230
+ "whale",
231
+ "zebra",
232
+ "atlas",
233
+ "bloom",
234
+ "cloud",
235
+ "delta",
236
+ "ember",
237
+ "flame",
238
+ "grove",
239
+ "haven",
240
+ "iris",
241
+ "jade",
242
+ "leaf",
243
+ "moon",
244
+ "nova",
245
+ "ocean",
246
+ "peak",
247
+ "river",
248
+ "spark",
249
+ "storm",
250
+ "tide",
251
+ "wave",
252
+ "comet",
253
+ "prism",
254
+ "coral"
255
+ ];
256
+ var generateHumanReadableId = (separator = "-") => {
257
+ const adj1Index = Math.floor(Math.random() * ADJECTIVES.length);
258
+ let adj2Index = Math.floor(Math.random() * ADJECTIVES.length);
259
+ if (adj2Index === adj1Index) {
260
+ adj2Index = (adj2Index + 1) % ADJECTIVES.length;
261
+ }
262
+ const adjective1 = ADJECTIVES[adj1Index];
263
+ const adjective2 = ADJECTIVES[adj2Index];
264
+ const noun = NOUNS[Math.floor(Math.random() * NOUNS.length)];
265
+ return `${adjective1}${separator}${adjective2}${separator}${noun}`;
266
+ };
267
+
268
+ // src/client-sdk/services/evaluation/errors/evaluation.error.ts
269
+ var EvaluationError = class extends Error {
270
+ constructor(message) {
271
+ super(message);
272
+ this.name = "EvaluationError";
273
+ }
274
+ };
275
+ var EvaluationInitError = class extends EvaluationError {
276
+ constructor(message, cause) {
277
+ super(message);
278
+ this.cause = cause;
279
+ this.name = "EvaluationInitError";
280
+ }
281
+ };
282
+ var EvaluationApiError = class extends EvaluationError {
283
+ constructor(message, statusCode, cause) {
284
+ super(message);
285
+ this.statusCode = statusCode;
286
+ this.cause = cause;
287
+ this.name = "EvaluationApiError";
288
+ }
289
+ };
290
+ var TargetMetadataConflictError = class extends EvaluationError {
291
+ constructor(targetName, existingMetadata, newMetadata) {
292
+ super(
293
+ `Target '${targetName}' was previously registered with different metadata.
294
+ Original: ${JSON.stringify(existingMetadata)}
295
+ New: ${JSON.stringify(newMetadata)}
296
+ If you want to use different metadata, please use a different target name.`
297
+ );
298
+ this.targetName = targetName;
299
+ this.existingMetadata = existingMetadata;
300
+ this.newMetadata = newMetadata;
301
+ this.name = "TargetMetadataConflictError";
302
+ }
303
+ };
304
+ var EvaluatorError = class extends EvaluationError {
305
+ constructor(evaluatorSlug, message, cause) {
306
+ super(`Evaluator '${evaluatorSlug}' failed: ${message}`);
307
+ this.evaluatorSlug = evaluatorSlug;
308
+ this.cause = cause;
309
+ this.name = "EvaluatorError";
310
+ }
311
+ };
312
+
313
+ // src/client-sdk/services/evaluation/evaluation.ts
314
+ var DEFAULT_CONCURRENCY = 4;
315
+ var DEBOUNCE_INTERVAL_MS = 1e3;
316
+ var iterationContextStorage = new (0, _async_hooks.AsyncLocalStorage)();
317
+ var targetContextStorage = new (0, _async_hooks.AsyncLocalStorage)();
318
+ var Evaluation = class _Evaluation {
319
+ constructor(name, options) {
320
+ this.initialized = false;
321
+ this.total = 0;
322
+ this.progress = 0;
323
+ // Batching state
324
+ this.batch = { dataset: [], evaluations: [], targets: [] };
325
+ this.lastSentMs = 0;
326
+ this.pendingFlush = null;
327
+ this.flushTimeout = null;
328
+ // Target registry
329
+ this.targets = /* @__PURE__ */ new Map();
330
+ // Current iteration context (for log/evaluate calls)
331
+ this.currentTraceId = null;
332
+ this.currentIndex = null;
333
+ // Track whether withTarget() was used in the current iteration
334
+ // If so, we don't create dataset entries in executeItem()
335
+ // Note: This is now checked via iterationContextStorage to be thread-safe
336
+ this.iterationUsedWithTarget = /* @__PURE__ */ new Map();
337
+ // Track whether withTarget() has EVER been used in this evaluation
338
+ // Once set to true, we stop creating iteration-level traces
339
+ this.evaluationUsesTargets = false;
340
+ var _a, _b;
341
+ this.name = name;
342
+ this.experimentSlug = name;
343
+ this.runId = (_a = options.runId) != null ? _a : generateHumanReadableId();
344
+ this.apiClient = options.apiClient;
345
+ this.endpoint = options.endpoint;
346
+ this.apiKey = options.apiKey;
347
+ this.logger = options.logger;
348
+ this.concurrency = (_b = options.concurrency) != null ? _b : DEFAULT_CONCURRENCY;
349
+ this.createdAtMs = Date.now();
350
+ }
351
+ /**
352
+ * Initialize an evaluation session
353
+ */
354
+ static async init(name, options) {
355
+ const evaluation = new _Evaluation(name, options);
356
+ await evaluation.initialize();
357
+ return evaluation;
358
+ }
359
+ /**
360
+ * Initialize the evaluation by creating/getting the experiment
361
+ */
362
+ async initialize() {
363
+ if (!this.apiKey) {
364
+ throw new EvaluationInitError(
365
+ "API key is required. Set LANGWATCH_API_KEY or pass apiKey to LangWatch constructor."
366
+ );
367
+ }
368
+ try {
369
+ const response = await fetch(`${this.endpoint}/api/experiment/init`, {
370
+ method: "POST",
371
+ headers: {
372
+ "Content-Type": "application/json",
373
+ "X-Auth-Token": this.apiKey
374
+ },
375
+ body: JSON.stringify({
376
+ experiment_name: this.name,
377
+ experiment_slug: this.experimentSlug,
378
+ experiment_type: "BATCH_EVALUATION_V2"
379
+ })
380
+ });
381
+ if (response.status === 401) {
382
+ throw new EvaluationInitError("Invalid API key");
383
+ }
384
+ if (!response.ok) {
385
+ const text = await response.text();
386
+ throw new EvaluationInitError(`Failed to initialize experiment: ${text}`);
387
+ }
388
+ const data = await response.json();
389
+ this.experimentSlug = data.slug;
390
+ const encodedRunId = encodeURIComponent(this.runId);
391
+ console.log(`Follow results at: ${this.endpoint}${data.path}?runId=${encodedRunId}`);
392
+ this.initialized = true;
393
+ } catch (error) {
394
+ if (error instanceof EvaluationInitError) {
395
+ throw error;
396
+ }
397
+ throw new EvaluationInitError(
398
+ `Failed to initialize evaluation: ${error instanceof Error ? error.message : String(error)}`,
399
+ error instanceof Error ? error : void 0
400
+ );
401
+ }
402
+ }
403
+ /**
404
+ * Run evaluation over a dataset with a callback
405
+ *
406
+ * @param dataset - Array of items to evaluate
407
+ * @param callback - Function called for each item with { item, index, span }
408
+ * @param options - Concurrency options
409
+ *
410
+ * @example
411
+ * ```typescript
412
+ * await evaluation.run(dataset, async ({ item, index, span }) => {
413
+ * const response = await myAgent(item.question);
414
+ * evaluation.log('accuracy', { index, score: 0.95 });
415
+ * }, { concurrency: 4 });
416
+ * ```
417
+ */
418
+ async run(dataset, callback, options) {
419
+ var _a;
420
+ if (!this.initialized) {
421
+ await this.initialize();
422
+ }
423
+ const concurrency = (_a = options == null ? void 0 : options.concurrency) != null ? _a : this.concurrency;
424
+ this.total = dataset.length;
425
+ this.progress = 0;
426
+ const tracer2 = _api.trace.getTracer("langwatch-evaluation");
427
+ const executing = /* @__PURE__ */ new Set();
428
+ for (let index = 0; index < dataset.length; index++) {
429
+ const item = dataset[index];
430
+ const itemPromise = this.executeItem(tracer2, item, index, callback);
431
+ executing.add(itemPromise);
432
+ void itemPromise.finally(() => executing.delete(itemPromise));
433
+ if (executing.size >= concurrency) {
434
+ await Promise.race(executing);
435
+ }
436
+ }
437
+ await Promise.all(executing);
438
+ await this.flush(true);
439
+ }
440
+ /**
441
+ * Execute a single item in the dataset
442
+ */
443
+ async executeItem(tracer2, item, index, callback) {
444
+ var _a;
445
+ const startTime = Date.now();
446
+ let error;
447
+ let capturedTraceId = null;
448
+ this.iterationUsedWithTarget.set(index, false);
449
+ const iterationContext = { index, item };
450
+ if (this.evaluationUsesTargets) {
451
+ await iterationContextStorage.run(iterationContext, async () => {
452
+ this.currentIndex = index;
453
+ try {
454
+ const span = {
455
+ setStatus: () => {
456
+ },
457
+ recordException: () => {
458
+ },
459
+ end: () => {
460
+ }
461
+ };
462
+ const ctx = { item, index, span };
463
+ const result = callback(ctx);
464
+ if (result && typeof result.then === "function") {
465
+ await result;
466
+ }
467
+ } catch (err) {
468
+ error = err instanceof Error ? err : new Error(String(err));
469
+ this.logger.error(`Evaluation error at index ${index}:`, error);
470
+ } finally {
471
+ this.currentIndex = null;
472
+ }
473
+ });
474
+ } else {
475
+ await iterationContextStorage.run(iterationContext, async () => {
476
+ await tracer2.startActiveSpan(
477
+ "evaluation.iteration",
478
+ {
479
+ attributes: {
480
+ "evaluation.run_id": this.runId,
481
+ "evaluation.index": index
482
+ }
483
+ },
484
+ async (otelSpan) => {
485
+ const span = _chunkONXIZKC6js.createLangWatchSpan.call(void 0, otelSpan);
486
+ const spanContext = otelSpan.spanContext();
487
+ const traceId = spanContext.traceId;
488
+ this.currentTraceId = traceId;
489
+ this.currentIndex = index;
490
+ capturedTraceId = traceId;
491
+ try {
492
+ const ctx = { item, index, span };
493
+ const result = callback(ctx);
494
+ if (result && typeof result.then === "function") {
495
+ await result;
496
+ }
497
+ span.setStatus({ code: _api.SpanStatusCode.OK });
498
+ } catch (err) {
499
+ error = err instanceof Error ? err : new Error(String(err));
500
+ span.setStatus({
501
+ code: _api.SpanStatusCode.ERROR,
502
+ message: error.message
503
+ });
504
+ span.recordException(error);
505
+ this.logger.error(`Evaluation error at index ${index}:`, error);
506
+ } finally {
507
+ span.end();
508
+ this.currentTraceId = null;
509
+ this.currentIndex = null;
510
+ }
511
+ }
512
+ );
513
+ });
514
+ }
515
+ if (!this.iterationUsedWithTarget.get(index)) {
516
+ const duration = Date.now() - startTime;
517
+ const entry = {
518
+ index,
519
+ entry: this.serializeItem(item),
520
+ duration,
521
+ error: (_a = error == null ? void 0 : error.message) != null ? _a : null,
522
+ trace_id: capturedTraceId != null ? capturedTraceId : this.getTraceIdFromContext()
523
+ };
524
+ this.batch.dataset.push(entry);
525
+ }
526
+ this.iterationUsedWithTarget.delete(index);
527
+ this.progress++;
528
+ this.scheduleSend();
529
+ }
530
+ /**
531
+ * Log a custom metric result
532
+ *
533
+ * @param metric - Name of the metric
534
+ * @param options - Metric options including index, score, passed, etc.
535
+ *
536
+ * If called inside a withTarget() block, the target and index are automatically
537
+ * inferred from the context and don't need to be specified.
538
+ *
539
+ * @example
540
+ * ```typescript
541
+ * // Explicit target (outside withTarget)
542
+ * evaluation.log('accuracy', { index, score: 0.95, target: 'gpt-4' });
543
+ *
544
+ * // Implicit target (inside withTarget)
545
+ * await evaluation.withTarget('gpt-4', { model: 'openai/gpt-4' }, async () => {
546
+ * evaluation.log('accuracy', { score: 0.95 }); // target and index auto-inferred
547
+ * });
548
+ * ```
549
+ */
550
+ log(metric, options) {
551
+ var _a, _b, _c, _d;
552
+ const targetContext = targetContextStorage.getStore();
553
+ const {
554
+ data = {},
555
+ score,
556
+ passed,
557
+ label,
558
+ details,
559
+ status = options.error ? "error" : "processed",
560
+ duration,
561
+ cost,
562
+ error,
563
+ // Use context values as defaults, allow explicit override
564
+ target = targetContext == null ? void 0 : targetContext.targetId,
565
+ metadata,
566
+ index = (_a = targetContext == null ? void 0 : targetContext.index) != null ? _a : options.index
567
+ } = options;
568
+ let targetId;
569
+ if (target) {
570
+ targetId = this.registerTarget(target, metadata);
571
+ }
572
+ const traceId = (_c = (_b = targetContext == null ? void 0 : targetContext.traceId) != null ? _b : this.currentTraceId) != null ? _c : this.getTraceIdFromContext();
573
+ const result = {
574
+ name: metric,
575
+ evaluator: metric,
576
+ trace_id: traceId,
577
+ status,
578
+ data,
579
+ score: score != null ? score : null,
580
+ passed: passed != null ? passed : null,
581
+ details: details != null ? details : error ? error.message : null,
582
+ index,
583
+ label: label != null ? label : null,
584
+ cost: cost != null ? cost : null,
585
+ duration: duration != null ? duration : null,
586
+ error_type: error ? error.name : null,
587
+ traceback: error ? [(_d = error.stack) != null ? _d : error.message] : null,
588
+ target_id: targetId != null ? targetId : null
589
+ };
590
+ this.batch.evaluations.push(result);
591
+ this.scheduleSend();
592
+ }
593
+ /**
594
+ * Run a built-in evaluator
595
+ *
596
+ * @param evaluatorSlug - The evaluator identifier (e.g., 'ragas/faithfulness')
597
+ * @param options - Evaluator options including data and settings
598
+ *
599
+ * If called inside a withTarget() block, the target and index are automatically
600
+ * inferred from the context and don't need to be specified.
601
+ *
602
+ * @example
603
+ * ```typescript
604
+ * // Inside withTarget() - target and index auto-inferred
605
+ * await evaluation.withTarget('gpt-4', { model: 'openai/gpt-4' }, async () => {
606
+ * await evaluation.evaluate('ragas/faithfulness', {
607
+ * data: { input, output, contexts },
608
+ * });
609
+ * });
610
+ *
611
+ * // Or explicit index/target
612
+ * await evaluation.evaluate('ragas/faithfulness', {
613
+ * index,
614
+ * data: { input, output, contexts },
615
+ * target: 'gpt-4',
616
+ * });
617
+ * ```
618
+ */
619
+ async evaluate(evaluatorSlug, options) {
620
+ var _a, _b, _c, _d, _e, _f, _g, _h, _i;
621
+ const targetContext = targetContextStorage.getStore();
622
+ const {
623
+ data,
624
+ settings,
625
+ name,
626
+ asGuardrail = false,
627
+ // Use context values as defaults, allow explicit override
628
+ target = targetContext == null ? void 0 : targetContext.targetId,
629
+ metadata,
630
+ index = (_a = targetContext == null ? void 0 : targetContext.index) != null ? _a : options.index
631
+ } = options;
632
+ const startTime = Date.now();
633
+ const traceId = (_c = (_b = targetContext == null ? void 0 : targetContext.traceId) != null ? _b : this.currentTraceId) != null ? _c : this.getTraceIdFromContext();
634
+ const spanId = (_d = targetContext == null ? void 0 : targetContext.spanId) != null ? _d : this.getSpanIdFromContext();
635
+ try {
636
+ const response = await fetch(
637
+ `${this.endpoint}/api/evaluations/${evaluatorSlug}/evaluate`,
638
+ {
639
+ method: "POST",
640
+ headers: {
641
+ "Content-Type": "application/json",
642
+ "X-Auth-Token": this.apiKey
643
+ },
644
+ body: JSON.stringify({
645
+ trace_id: traceId != null ? traceId : null,
646
+ span_id: spanId != null ? spanId : null,
647
+ name: name != null ? name : evaluatorSlug,
648
+ data,
649
+ settings,
650
+ as_guardrail: asGuardrail
651
+ })
652
+ }
653
+ );
654
+ if (!response.ok) {
655
+ const text = await response.text();
656
+ throw new EvaluatorError(evaluatorSlug, text);
657
+ }
658
+ const result = await response.json();
659
+ const duration = Date.now() - startTime;
660
+ this.log(name != null ? name : evaluatorSlug, {
661
+ index,
662
+ data,
663
+ status: result.status,
664
+ score: (_e = result.score) != null ? _e : void 0,
665
+ passed: (_f = result.passed) != null ? _f : void 0,
666
+ details: (_g = result.details) != null ? _g : void 0,
667
+ label: (_h = result.label) != null ? _h : void 0,
668
+ duration,
669
+ cost: (_i = result.cost) == null ? void 0 : _i.amount,
670
+ target,
671
+ metadata
672
+ });
673
+ } catch (error) {
674
+ const duration = Date.now() - startTime;
675
+ if (error instanceof EvaluatorError) {
676
+ this.log(name != null ? name : evaluatorSlug, {
677
+ index,
678
+ data,
679
+ status: "error",
680
+ duration,
681
+ error,
682
+ target,
683
+ metadata
684
+ });
685
+ throw error;
686
+ }
687
+ const wrappedError = new EvaluatorError(
688
+ evaluatorSlug,
689
+ error instanceof Error ? error.message : String(error),
690
+ error instanceof Error ? error : void 0
691
+ );
692
+ this.log(name != null ? name : evaluatorSlug, {
693
+ index,
694
+ data,
695
+ status: "error",
696
+ duration,
697
+ error: wrappedError,
698
+ target,
699
+ metadata
700
+ });
701
+ throw wrappedError;
702
+ }
703
+ }
704
+ async withTarget(targetName, metadataOrCallback, maybeCallback) {
705
+ var _a, _b, _c;
706
+ const metadata = typeof metadataOrCallback === "function" ? null : metadataOrCallback;
707
+ const callback = typeof metadataOrCallback === "function" ? metadataOrCallback : maybeCallback;
708
+ if (!this.evaluationUsesTargets) {
709
+ this.evaluationUsesTargets = true;
710
+ }
711
+ const iterationContext = iterationContextStorage.getStore();
712
+ const index = (_b = (_a = iterationContext == null ? void 0 : iterationContext.index) != null ? _a : this.currentIndex) != null ? _b : 0;
713
+ const currentItem = iterationContext == null ? void 0 : iterationContext.item;
714
+ this.iterationUsedWithTarget.set(index, true);
715
+ this.registerTarget(targetName, metadata != null ? metadata : void 0);
716
+ const tracer2 = _api.trace.getTracer("langwatch-evaluation");
717
+ const startTime = Date.now();
718
+ let result;
719
+ let traceId = "";
720
+ let spanId = "";
721
+ let callbackError;
722
+ await tracer2.startActiveSpan(
723
+ `evaluation.target.${targetName}`,
724
+ {
725
+ attributes: {
726
+ "evaluation.run_id": this.runId,
727
+ "evaluation.target": targetName,
728
+ "evaluation.index": index
729
+ }
730
+ },
731
+ _api.ROOT_CONTEXT,
732
+ async (otelSpan) => {
733
+ const span = _chunkONXIZKC6js.createLangWatchSpan.call(void 0, otelSpan);
734
+ const spanContext = otelSpan.spanContext();
735
+ const rawTraceId = spanContext.traceId;
736
+ spanId = spanContext.spanId;
737
+ const isNoOpTrace = rawTraceId === "00000000000000000000000000000000";
738
+ traceId = isNoOpTrace ? "" : rawTraceId;
739
+ const executionContext = {
740
+ targetId: targetName,
741
+ traceId,
742
+ spanId,
743
+ index
744
+ };
745
+ try {
746
+ result = await targetContextStorage.run(executionContext, async () => {
747
+ const ctx = { span, traceId, spanId };
748
+ const callbackResult = callback(ctx);
749
+ if (callbackResult && typeof callbackResult.then === "function") {
750
+ return await callbackResult;
751
+ }
752
+ return callbackResult;
753
+ });
754
+ span.setStatus({ code: _api.SpanStatusCode.OK });
755
+ } catch (err) {
756
+ callbackError = err instanceof Error ? err : new Error(String(err));
757
+ span.setStatus({
758
+ code: _api.SpanStatusCode.ERROR,
759
+ message: callbackError.message
760
+ });
761
+ span.recordException(callbackError);
762
+ throw err;
763
+ } finally {
764
+ span.end();
765
+ }
766
+ }
767
+ );
768
+ const duration = Date.now() - startTime;
769
+ let predicted = null;
770
+ if (result !== void 0 && result !== null) {
771
+ predicted = typeof result === "object" ? result : { output: result };
772
+ }
773
+ const entry = {
774
+ index,
775
+ entry: this.serializeItem(currentItem),
776
+ duration,
777
+ error: (_c = callbackError == null ? void 0 : callbackError.message) != null ? _c : null,
778
+ trace_id: traceId || null,
779
+ // null if no tracer configured (no-op)
780
+ target_id: targetName,
781
+ predicted
782
+ };
783
+ this.batch.dataset.push(entry);
784
+ this.scheduleSend();
785
+ return {
786
+ result,
787
+ duration,
788
+ traceId,
789
+ spanId
790
+ };
791
+ }
792
+ /**
793
+ * Register a target for multi-target comparison
794
+ */
795
+ registerTarget(name, metadata) {
796
+ var _a;
797
+ const existing = this.targets.get(name);
798
+ if (existing) {
799
+ if (metadata) {
800
+ const existingMeta = (_a = existing.metadata) != null ? _a : {};
801
+ if (JSON.stringify(existingMeta) !== JSON.stringify(metadata)) {
802
+ throw new TargetMetadataConflictError(name, existingMeta, metadata);
803
+ }
804
+ }
805
+ return name;
806
+ }
807
+ const targetInfo = {
808
+ id: name,
809
+ name,
810
+ type: "custom",
811
+ metadata: metadata != null ? metadata : null
812
+ };
813
+ this.targets.set(name, targetInfo);
814
+ this.batch.targets.push(targetInfo);
815
+ return name;
816
+ }
817
+ /**
818
+ * Schedule a debounced send
819
+ */
820
+ scheduleSend() {
821
+ var _a;
822
+ const now = Date.now();
823
+ if (now - this.lastSentMs >= DEBOUNCE_INTERVAL_MS) {
824
+ this.sendBatch();
825
+ } else {
826
+ (_a = this.flushTimeout) != null ? _a : this.flushTimeout = setTimeout(() => {
827
+ this.flushTimeout = null;
828
+ this.sendBatch();
829
+ }, DEBOUNCE_INTERVAL_MS - (now - this.lastSentMs));
830
+ }
831
+ }
832
+ /**
833
+ * Send current batch to the API
834
+ */
835
+ sendBatch(finished = false) {
836
+ if (this.batch.dataset.length === 0 && this.batch.evaluations.length === 0 && this.batch.targets.length === 0 && !finished) {
837
+ return;
838
+ }
839
+ const body = {
840
+ experiment_slug: this.experimentSlug,
841
+ name: this.name,
842
+ run_id: this.runId,
843
+ dataset: this.batch.dataset.map((entry) => {
844
+ var _a, _b, _c;
845
+ return {
846
+ index: entry.index,
847
+ entry: entry.entry,
848
+ duration: entry.duration,
849
+ error: entry.error,
850
+ trace_id: entry.trace_id,
851
+ target_id: (_a = entry.target_id) != null ? _a : null,
852
+ cost: (_b = entry.cost) != null ? _b : null,
853
+ predicted: (_c = entry.predicted) != null ? _c : null
854
+ };
855
+ }),
856
+ evaluations: this.batch.evaluations.map((e) => ({
857
+ name: e.name,
858
+ evaluator: e.evaluator,
859
+ trace_id: e.trace_id,
860
+ status: e.status,
861
+ inputs: e.data,
862
+ score: e.score,
863
+ passed: e.passed,
864
+ details: e.details,
865
+ index: e.index,
866
+ label: e.label,
867
+ cost: e.cost,
868
+ duration: e.duration,
869
+ target_id: e.target_id
870
+ })),
871
+ targets: this.batch.targets,
872
+ progress: this.progress,
873
+ total: this.total,
874
+ timestamps: {
875
+ created_at: this.createdAtMs,
876
+ finished_at: finished ? Date.now() : null
877
+ }
878
+ };
879
+ this.pendingFlush = fetch(`${this.endpoint}/api/evaluations/batch/log_results`, {
880
+ method: "POST",
881
+ headers: {
882
+ "Content-Type": "application/json",
883
+ Authorization: `Bearer ${this.apiKey}`
884
+ },
885
+ body: JSON.stringify(body)
886
+ }).then((response) => {
887
+ if (!response.ok) {
888
+ this.logger.error(`Failed to send batch: ${response.status}`);
889
+ }
890
+ }).catch((error) => {
891
+ this.logger.error("Failed to send batch:", error);
892
+ });
893
+ this.batch = { dataset: [], evaluations: [], targets: [] };
894
+ this.lastSentMs = Date.now();
895
+ }
896
+ /**
897
+ * Flush all pending data
898
+ */
899
+ async flush(finished = false) {
900
+ if (this.flushTimeout) {
901
+ clearTimeout(this.flushTimeout);
902
+ this.flushTimeout = null;
903
+ }
904
+ this.sendBatch(finished);
905
+ if (this.pendingFlush) {
906
+ await this.pendingFlush;
907
+ }
908
+ }
909
+ /**
910
+ * Serialize a dataset item for the API
911
+ */
912
+ serializeItem(item) {
913
+ if (item === null || item === void 0) {
914
+ return item;
915
+ }
916
+ if (typeof item === "object") {
917
+ if ("toJSON" in item && typeof item.toJSON === "function") {
918
+ return item.toJSON();
919
+ }
920
+ return item;
921
+ }
922
+ return item;
923
+ }
924
+ /**
925
+ * Get trace ID from current OpenTelemetry context
926
+ */
927
+ getTraceIdFromContext() {
928
+ const span = _api.trace.getActiveSpan();
929
+ if (span) {
930
+ return span.spanContext().traceId;
931
+ }
932
+ return "";
933
+ }
934
+ /**
935
+ * Get span ID from current OpenTelemetry context
936
+ */
937
+ getSpanIdFromContext() {
938
+ const span = _api.trace.getActiveSpan();
939
+ if (span) {
940
+ return span.spanContext().spanId;
941
+ }
942
+ return null;
943
+ }
944
+ };
945
+
946
+ // src/client-sdk/services/evaluation/platformErrors.ts
947
+ var EvaluationsError = class extends Error {
948
+ constructor(message) {
949
+ super(message);
950
+ this.name = "EvaluationsError";
951
+ }
952
+ };
953
+ var EvaluationNotFoundError = class extends EvaluationsError {
954
+ constructor(slug) {
955
+ super(`Evaluation not found: ${slug}`);
956
+ this.name = "EvaluationNotFoundError";
957
+ }
958
+ };
959
+ var EvaluationTimeoutError = class extends EvaluationsError {
960
+ constructor(runId, progress, total) {
961
+ super(`Evaluation run timed out: ${runId} (${progress}/${total} completed)`);
962
+ this.name = "EvaluationTimeoutError";
963
+ this.runId = runId;
964
+ this.progress = progress;
965
+ this.total = total;
966
+ }
967
+ };
968
+ var EvaluationRunFailedError = class extends EvaluationsError {
969
+ constructor(runId, errorMessage) {
970
+ super(`Evaluation run failed: ${errorMessage}`);
971
+ this.name = "EvaluationRunFailedError";
972
+ this.runId = runId;
973
+ this.errorMessage = errorMessage;
974
+ }
975
+ };
976
+ var EvaluationsApiError = class extends EvaluationsError {
977
+ constructor(message, statusCode) {
978
+ super(message);
979
+ this.name = "EvaluationsApiError";
980
+ this.statusCode = statusCode;
981
+ }
982
+ };
983
+
984
+ // src/client-sdk/services/evaluation/evaluation.facade.ts
985
+ var DEFAULT_POLL_INTERVAL = 2e3;
986
+ var DEFAULT_TIMEOUT = 6e5;
987
+ var EvaluationFacade = class {
988
+ constructor(config) {
989
+ this.config = config;
990
+ }
991
+ /**
992
+ * Initialize a new evaluation session (SDK-defined)
993
+ *
994
+ * @param name - Name of the experiment (used as slug)
995
+ * @param options - Optional configuration
996
+ * @returns An initialized Evaluation instance
997
+ *
998
+ * @example
999
+ * ```typescript
1000
+ * const evaluation = await langwatch.evaluation.init('my-experiment');
1001
+ *
1002
+ * await evaluation.run(dataset, async ({ item, index }) => {
1003
+ * const response = await myAgent(item.question);
1004
+ * evaluation.log('accuracy', { index, score: 0.95 });
1005
+ * });
1006
+ * ```
1007
+ */
1008
+ async init(name, options) {
1009
+ return Evaluation.init(name, _chunkOHM7JUMRjs.__spreadValues.call(void 0, {
1010
+ apiClient: this.config.langwatchApiClient,
1011
+ endpoint: this.config.endpoint,
1012
+ apiKey: this.config.apiKey,
1013
+ logger: this.config.logger
1014
+ }, options));
1015
+ }
1016
+ /**
1017
+ * Run a platform-configured evaluation (Evaluations V3)
1018
+ *
1019
+ * This runs an evaluation that was configured in the LangWatch platform.
1020
+ * The method automatically prints a summary and exits with code 1 on failure
1021
+ * (unless `exitOnFailure: false` is passed).
1022
+ *
1023
+ * @param slug - The slug of the evaluation (found in the evaluation URL)
1024
+ * @param options - Optional configuration
1025
+ * @returns The evaluation results including pass rate and summary
1026
+ *
1027
+ * @example
1028
+ * ```typescript
1029
+ * import { LangWatch } from "langwatch";
1030
+ *
1031
+ * const langwatch = new LangWatch();
1032
+ *
1033
+ * const result = await langwatch.evaluation.run("my-evaluation-slug");
1034
+ * result.printSummary();
1035
+ * ```
1036
+ */
1037
+ async run(slug, options) {
1038
+ this.config.logger.info(`Running platform evaluation: ${slug}`);
1039
+ const result = await this.runWithPolling(slug, options);
1040
+ return result;
1041
+ }
1042
+ /**
1043
+ * Run an evaluation and wait for completion using polling
1044
+ */
1045
+ async runWithPolling(slug, options = {}) {
1046
+ var _a, _b, _c, _d, _e, _f, _g;
1047
+ const pollInterval = (_a = options.pollInterval) != null ? _a : DEFAULT_POLL_INTERVAL;
1048
+ const timeout = (_b = options.timeout) != null ? _b : DEFAULT_TIMEOUT;
1049
+ const startResponse = await this.startRun(slug);
1050
+ const { runId } = startResponse;
1051
+ const apiRunUrl = (_c = startResponse.runUrl) != null ? _c : "";
1052
+ const runUrl = apiRunUrl ? this.replaceUrlDomain(apiRunUrl, this.config.endpoint) : "";
1053
+ console.log(`Started evaluation run: ${runId}`);
1054
+ if (runUrl) {
1055
+ console.log(`Follow live: ${runUrl}`);
1056
+ }
1057
+ const total = startResponse.total;
1058
+ let lastProgress = 0;
1059
+ if (total > 0) {
1060
+ process.stdout.write(`Progress: 0/${total} (0%)`);
1061
+ }
1062
+ (_d = options.onProgress) == null ? void 0 : _d.call(options, 0, total);
1063
+ const startTime = Date.now();
1064
+ while (true) {
1065
+ if (Date.now() - startTime > timeout) {
1066
+ console.log();
1067
+ const finalStatus = await this.getRunStatus(runId);
1068
+ throw new EvaluationTimeoutError(runId, finalStatus.progress, finalStatus.total);
1069
+ }
1070
+ await this.sleep(pollInterval);
1071
+ const status = await this.getRunStatus(runId);
1072
+ const progress = status.progress;
1073
+ if (progress !== lastProgress && status.total > 0) {
1074
+ const percentage = Math.round(progress / status.total * 100);
1075
+ process.stdout.write(`\rProgress: ${progress}/${status.total} (${percentage}%)`);
1076
+ lastProgress = progress;
1077
+ }
1078
+ (_e = options.onProgress) == null ? void 0 : _e.call(options, status.progress, status.total);
1079
+ if (status.status === "completed") {
1080
+ console.log();
1081
+ const summary = status.summary;
1082
+ return this.buildResult(runId, "completed", summary, runUrl != null ? runUrl : "");
1083
+ }
1084
+ if (status.status === "failed") {
1085
+ console.log();
1086
+ throw new EvaluationRunFailedError(runId, (_f = status.error) != null ? _f : "Unknown error");
1087
+ }
1088
+ if (status.status === "stopped") {
1089
+ console.log();
1090
+ return this.buildResult(runId, "stopped", (_g = status.summary) != null ? _g : {
1091
+ runId,
1092
+ totalCells: status.total,
1093
+ completedCells: status.progress,
1094
+ failedCells: 0,
1095
+ duration: Date.now() - startTime
1096
+ }, runUrl != null ? runUrl : "");
1097
+ }
1098
+ }
1099
+ }
1100
+ /**
1101
+ * Start an evaluation run
1102
+ */
1103
+ async startRun(slug) {
1104
+ const response = await this.config.langwatchApiClient.POST(
1105
+ "/api/evaluations/v3/{slug}/run",
1106
+ {
1107
+ params: {
1108
+ path: { slug }
1109
+ }
1110
+ }
1111
+ );
1112
+ if (response.error) {
1113
+ const status = response.response.status;
1114
+ if (status === 404) {
1115
+ throw new EvaluationNotFoundError(slug);
1116
+ }
1117
+ if (status === 401) {
1118
+ throw new EvaluationsApiError("Unauthorized - check your API key", 401);
1119
+ }
1120
+ const errorMessage = "error" in response.error ? response.error.error : `Failed to start evaluation: ${slug}`;
1121
+ throw new EvaluationsApiError(errorMessage != null ? errorMessage : `HTTP ${status}`, status);
1122
+ }
1123
+ return response.data;
1124
+ }
1125
+ /**
1126
+ * Get the status of a run
1127
+ */
1128
+ async getRunStatus(runId) {
1129
+ const response = await this.config.langwatchApiClient.GET(
1130
+ "/api/evaluations/v3/runs/{runId}",
1131
+ {
1132
+ params: {
1133
+ path: { runId }
1134
+ }
1135
+ }
1136
+ );
1137
+ if (response.error) {
1138
+ const status = response.response.status;
1139
+ if (status === 404) {
1140
+ throw new EvaluationsApiError(`Run not found: ${runId}`, 404);
1141
+ }
1142
+ if (status === 401) {
1143
+ throw new EvaluationsApiError("Unauthorized - check your API key", 401);
1144
+ }
1145
+ const errorMessage = "error" in response.error ? response.error.error : `Failed to get run status: ${runId}`;
1146
+ throw new EvaluationsApiError(errorMessage != null ? errorMessage : `HTTP ${status}`, status);
1147
+ }
1148
+ return response.data;
1149
+ }
1150
+ /**
1151
+ * Build the result object from API response
1152
+ */
1153
+ buildResult(runId, status, summary, runUrl) {
1154
+ var _a, _b, _c, _d, _e, _f, _g;
1155
+ const totalCells = (_a = summary.totalCells) != null ? _a : 0;
1156
+ const completedCells = (_b = summary.completedCells) != null ? _b : 0;
1157
+ const failedCells = (_c = summary.failedCells) != null ? _c : 0;
1158
+ const duration = (_d = summary.duration) != null ? _d : 0;
1159
+ const totalPassed = (_e = summary.totalPassed) != null ? _e : completedCells - failedCells;
1160
+ const totalFailed = (_f = summary.totalFailed) != null ? _f : failedCells;
1161
+ const passRate = (_g = summary.passRate) != null ? _g : completedCells > 0 ? totalPassed / completedCells * 100 : 0;
1162
+ return {
1163
+ runId,
1164
+ status,
1165
+ passed: totalPassed,
1166
+ failed: totalFailed,
1167
+ passRate,
1168
+ duration,
1169
+ runUrl,
1170
+ // Always use the endpoint-based URL we constructed
1171
+ summary,
1172
+ printSummary: (exitOnFailure = true) => {
1173
+ var _a2;
1174
+ this.printSummary({
1175
+ runId,
1176
+ status,
1177
+ passed: totalPassed,
1178
+ failed: totalFailed,
1179
+ passRate,
1180
+ duration,
1181
+ runUrl: (_a2 = summary.runUrl) != null ? _a2 : runUrl,
1182
+ summary
1183
+ });
1184
+ if (exitOnFailure && totalFailed > 0) {
1185
+ process.exit(1);
1186
+ }
1187
+ }
1188
+ };
1189
+ }
1190
+ /**
1191
+ * Print a CI-friendly summary of the evaluation results
1192
+ */
1193
+ printSummary(result) {
1194
+ const { runId, status, passed, failed, passRate, duration, runUrl, summary } = result;
1195
+ console.log("\n" + "\u2550".repeat(60));
1196
+ console.log(" EVALUATION RESULTS");
1197
+ console.log("\u2550".repeat(60));
1198
+ console.log(` Run ID: ${runId}`);
1199
+ console.log(` Status: ${status.toUpperCase()}`);
1200
+ console.log(` Duration: ${(duration / 1e3).toFixed(1)}s`);
1201
+ console.log("\u2500".repeat(60));
1202
+ console.log(` Passed: ${passed}`);
1203
+ console.log(` Failed: ${failed}`);
1204
+ console.log(` Pass Rate: ${passRate.toFixed(1)}%`);
1205
+ if (summary.targets && summary.targets.length > 0) {
1206
+ console.log("\u2500".repeat(60));
1207
+ console.log(" TARGETS:");
1208
+ for (const target of summary.targets) {
1209
+ console.log(` ${target.name}: ${target.passed} passed, ${target.failed} failed`);
1210
+ if (target.avgLatency) {
1211
+ console.log(` Avg latency: ${target.avgLatency.toFixed(0)}ms`);
1212
+ }
1213
+ if (target.totalCost) {
1214
+ console.log(` Total cost: $${target.totalCost.toFixed(4)}`);
1215
+ }
1216
+ }
1217
+ }
1218
+ if (summary.evaluators && summary.evaluators.length > 0) {
1219
+ console.log("\u2500".repeat(60));
1220
+ console.log(" EVALUATORS:");
1221
+ for (const evaluator of summary.evaluators) {
1222
+ console.log(
1223
+ ` ${evaluator.name}: ${evaluator.passRate.toFixed(1)}% pass rate`
1224
+ );
1225
+ if (evaluator.avgScore !== void 0) {
1226
+ console.log(` Avg score: ${evaluator.avgScore.toFixed(2)}`);
1227
+ }
1228
+ }
1229
+ }
1230
+ console.log("\u2500".repeat(60));
1231
+ console.log(` View details: ${runUrl}`);
1232
+ console.log("\u2550".repeat(60) + "\n");
1233
+ }
1234
+ sleep(ms) {
1235
+ return new Promise((resolve) => setTimeout(resolve, ms));
1236
+ }
1237
+ /**
1238
+ * Replace the domain of a URL with a new base URL, preserving the path
1239
+ */
1240
+ replaceUrlDomain(url, newBase) {
1241
+ if (!url) return url;
1242
+ try {
1243
+ const parsedUrl = new URL(url);
1244
+ const parsedNewBase = new URL(newBase);
1245
+ return `${parsedNewBase.origin}${parsedUrl.pathname}${parsedUrl.search}${parsedUrl.hash}`;
1246
+ } catch (e) {
1247
+ return url;
1248
+ }
1249
+ }
1250
+ };
1251
+
39
1252
  // src/client-sdk/services/traces/types.ts
40
1253
  var TracesError = class extends Error {
41
1254
  constructor(message, operation, originalError) {
@@ -47,13 +1260,13 @@ var TracesError = class extends Error {
47
1260
  };
48
1261
 
49
1262
  // src/client-sdk/services/traces/tracing/tracer.ts
50
- var tracer = _chunkA43BYF5Qjs.getLangWatchTracer.call(void 0, `${_chunkYWO3NE5Ajs.LANGWATCH_SDK_NAME_CLIENT}.traces`, _chunkYWO3NE5Ajs.LANGWATCH_SDK_VERSION);
1263
+ var tracer = _chunkONXIZKC6js.getLangWatchTracer.call(void 0, `${_chunkC4XUWCQRjs.LANGWATCH_SDK_NAME_CLIENT}.traces`, _chunkC4XUWCQRjs.LANGWATCH_SDK_VERSION);
51
1264
 
52
1265
  // src/client-sdk/services/traces/service.ts
53
1266
  var TracesService = class {
54
1267
  constructor(config) {
55
1268
  this.config = config;
56
- return _chunkNM5OKM7Fjs.createTracingProxy.call(void 0,
1269
+ return _chunk6SSCBYJMjs.createTracingProxy.call(void 0,
57
1270
  this,
58
1271
  tracer
59
1272
  );
@@ -116,17 +1329,27 @@ var LangWatch = class {
116
1329
  _chunkOHM7JUMRjs.__privateAdd.call(void 0, this, _LangWatch_instances);
117
1330
  var _a, _b, _c, _d;
118
1331
  const apiKey = (_b = (_a = options.apiKey) != null ? _a : process.env.LANGWATCH_API_KEY) != null ? _b : "";
119
- const endpoint = (_d = (_c = options.endpoint) != null ? _c : process.env.LANGWATCH_ENDPOINT) != null ? _d : _chunkYWO3NE5Ajs.DEFAULT_ENDPOINT;
1332
+ const endpoint = (_d = (_c = options.endpoint) != null ? _c : process.env.LANGWATCH_ENDPOINT) != null ? _d : _chunkC4XUWCQRjs.DEFAULT_ENDPOINT;
120
1333
  this.config = _chunkOHM7JUMRjs.__privateMethod.call(void 0, this, _LangWatch_instances, createInternalConfig_fn).call(this, {
121
1334
  apiKey,
122
1335
  endpoint,
123
1336
  options: options.options
124
1337
  });
125
- this.prompts = new (0, _chunkNM5OKM7Fjs.PromptsFacade)(_chunkOHM7JUMRjs.__spreadValues.call(void 0, {
126
- promptsApiService: new (0, _chunkNM5OKM7Fjs.PromptsApiService)(this.config),
127
- localPromptsService: new (0, _chunkNM5OKM7Fjs.LocalPromptsService)()
1338
+ this.prompts = new (0, _chunk6SSCBYJMjs.PromptsFacade)(_chunkOHM7JUMRjs.__spreadValues.call(void 0, {
1339
+ promptsApiService: new (0, _chunk6SSCBYJMjs.PromptsApiService)(this.config),
1340
+ localPromptsService: new (0, _chunk6SSCBYJMjs.LocalPromptsService)()
128
1341
  }, this.config));
129
1342
  this.traces = new TracesFacade(this.config);
1343
+ this.evaluation = new EvaluationFacade({
1344
+ langwatchApiClient: this.config.langwatchApiClient,
1345
+ endpoint: this.config.endpoint,
1346
+ apiKey: this.config.apiKey,
1347
+ logger: this.config.logger
1348
+ });
1349
+ this.datasets = new DatasetsFacade({
1350
+ langwatchApiClient: this.config.langwatchApiClient,
1351
+ logger: this.config.logger
1352
+ });
130
1353
  }
131
1354
  get apiClient() {
132
1355
  return this.config.langwatchApiClient;
@@ -140,15 +1363,17 @@ createInternalConfig_fn = function({
140
1363
  }) {
141
1364
  var _a;
142
1365
  return {
143
- logger: (_a = options == null ? void 0 : options.logger) != null ? _a : new (0, _chunkCKIZDPIJjs.NoOpLogger)(),
144
- langwatchApiClient: _chunkNM5OKM7Fjs.createLangWatchApiClient.call(void 0, apiKey, endpoint)
1366
+ logger: (_a = options == null ? void 0 : options.logger) != null ? _a : new (0, _chunk5MQQRSVMjs.NoOpLogger)(),
1367
+ langwatchApiClient: _chunk6SSCBYJMjs.createLangWatchApiClient.call(void 0, apiKey, endpoint),
1368
+ endpoint,
1369
+ apiKey
145
1370
  };
146
1371
  };
147
1372
 
148
1373
  // src/index.ts
149
1374
  var logger = {
150
- ConsoleLogger: _chunkCKIZDPIJjs.ConsoleLogger,
151
- NoOpLogger: _chunkCKIZDPIJjs.NoOpLogger
1375
+ ConsoleLogger: _chunk5MQQRSVMjs.ConsoleLogger,
1376
+ NoOpLogger: _chunk5MQQRSVMjs.NoOpLogger
152
1377
  };
153
1378
 
154
1379
 
@@ -159,5 +1384,12 @@ var logger = {
159
1384
 
160
1385
 
161
1386
 
162
- exports.FetchPolicy = _chunkNM5OKM7Fjs.FetchPolicy; exports.FilterableBatchSpanProcessor = _chunkSNDTNU3Tjs.FilterableBatchSpanProcessor; exports.LangWatch = LangWatch; exports.LangWatchExporter = _chunkBTCJWUS5js.LangWatchTraceExporter; exports.attributes = _chunkCKIZDPIJjs.attributes_exports; exports.getLangWatchLogger = _chunkBTCJWUS5js.getLangWatchLogger; exports.getLangWatchTracer = _chunkA43BYF5Qjs.getLangWatchTracer; exports.logger = logger;
1387
+
1388
+
1389
+
1390
+
1391
+
1392
+
1393
+
1394
+ exports.Evaluation = Evaluation; exports.EvaluationApiError = EvaluationApiError; exports.EvaluationError = EvaluationError; exports.EvaluationFacade = EvaluationFacade; exports.EvaluationInitError = EvaluationInitError; exports.EvaluatorError = EvaluatorError; exports.FetchPolicy = _chunk6SSCBYJMjs.FetchPolicy; exports.FilterableBatchSpanProcessor = _chunkASTAIRXGjs.FilterableBatchSpanProcessor; exports.LangWatch = LangWatch; exports.LangWatchExporter = _chunkBQRUUTN3js.LangWatchTraceExporter; exports.TargetMetadataConflictError = TargetMetadataConflictError; exports.attributes = _chunk5MQQRSVMjs.attributes_exports; exports.getLangWatchLogger = _chunkBQRUUTN3js.getLangWatchLogger; exports.getLangWatchTracer = _chunkONXIZKC6js.getLangWatchTracer; exports.logger = logger;
163
1395
  //# sourceMappingURL=index.js.map