langwatch 0.12.0 → 0.13.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{add-LUETMKBD.js → add-Z5UVUPCK.js} +7 -7
- package/dist/{add-LUETMKBD.js.map → add-Z5UVUPCK.js.map} +1 -1
- package/dist/{add-2UHFYNUA.mjs → add-ZAPD2GBO.mjs} +4 -4
- package/dist/{chunk-BQRUUTN3.js → chunk-4BNGSDYW.js} +14 -14
- package/dist/{chunk-BQRUUTN3.js.map → chunk-4BNGSDYW.js.map} +1 -1
- package/dist/{chunk-TB5KB737.mjs → chunk-77XIPD42.mjs} +2 -2
- package/dist/chunk-77XIPD42.mjs.map +1 -0
- package/dist/{chunk-6SSCBYJM.js → chunk-DXBTJGCK.js} +10 -10
- package/dist/{chunk-6SSCBYJM.js.map → chunk-DXBTJGCK.js.map} +1 -1
- package/dist/{chunk-OTID7S7K.mjs → chunk-J4HK6XZR.mjs} +5 -5
- package/dist/{chunk-C4XUWCQR.js → chunk-NPFWFQK6.js} +2 -2
- package/dist/chunk-NPFWFQK6.js.map +1 -0
- package/dist/chunk-OAKQ7UBU.mjs +317 -0
- package/dist/chunk-OAKQ7UBU.mjs.map +1 -0
- package/dist/chunk-RM2VUAFL.js +317 -0
- package/dist/chunk-RM2VUAFL.js.map +1 -0
- package/dist/{chunk-WCNDT5SD.mjs → chunk-SZRV7E6P.mjs} +2 -2
- package/dist/cli/index.js +6 -6
- package/dist/cli/index.mjs +6 -6
- package/dist/index.d.mts +239 -54
- package/dist/index.d.ts +239 -54
- package/dist/index.js +287 -89
- package/dist/index.js.map +1 -1
- package/dist/index.mjs +286 -88
- package/dist/index.mjs.map +1 -1
- package/dist/{list-7U3M64GY.js → list-LASBYRI4.js} +7 -7
- package/dist/{list-7U3M64GY.js.map → list-LASBYRI4.js.map} +1 -1
- package/dist/{list-WV5LA6LD.mjs → list-XX4VPNJA.mjs} +4 -4
- package/dist/{login-B7DKMN7P.js → login-2VCZDSLE.js} +3 -3
- package/dist/{login-B7DKMN7P.js.map → login-2VCZDSLE.js.map} +1 -1
- package/dist/{login-QKRT6PXA.mjs → login-CZ2257SV.mjs} +2 -2
- package/dist/observability-sdk/index.js +4 -4
- package/dist/observability-sdk/index.js.map +1 -1
- package/dist/observability-sdk/index.mjs +7 -7
- package/dist/observability-sdk/setup/node/index.d.mts +24 -1
- package/dist/observability-sdk/setup/node/index.d.ts +24 -1
- package/dist/observability-sdk/setup/node/index.js +7 -292
- package/dist/observability-sdk/setup/node/index.js.map +1 -1
- package/dist/observability-sdk/setup/node/index.mjs +8 -293
- package/dist/observability-sdk/setup/node/index.mjs.map +1 -1
- package/dist/{remove-2OGMXSTR.mjs → remove-KESD7YHL.mjs} +4 -4
- package/dist/{remove-A4DKCN7A.js → remove-XWN3XTF5.js} +6 -6
- package/dist/{remove-A4DKCN7A.js.map → remove-XWN3XTF5.js.map} +1 -1
- package/dist/{sync-WRZXIBZS.js → sync-IJ26JHEP.js} +6 -6
- package/dist/{sync-WRZXIBZS.js.map → sync-IJ26JHEP.js.map} +1 -1
- package/dist/{sync-TNVCKWTC.mjs → sync-SCVP7CHX.mjs} +4 -4
- package/package.json +3 -2
- package/dist/chunk-C4XUWCQR.js.map +0 -1
- package/dist/chunk-TB5KB737.mjs.map +0 -1
- /package/dist/{add-2UHFYNUA.mjs.map → add-ZAPD2GBO.mjs.map} +0 -0
- /package/dist/{chunk-OTID7S7K.mjs.map → chunk-J4HK6XZR.mjs.map} +0 -0
- /package/dist/{chunk-WCNDT5SD.mjs.map → chunk-SZRV7E6P.mjs.map} +0 -0
- /package/dist/{list-WV5LA6LD.mjs.map → list-XX4VPNJA.mjs.map} +0 -0
- /package/dist/{login-QKRT6PXA.mjs.map → login-CZ2257SV.mjs.map} +0 -0
- /package/dist/{remove-2OGMXSTR.mjs.map → remove-KESD7YHL.mjs.map} +0 -0
- /package/dist/{sync-TNVCKWTC.mjs.map → sync-SCVP7CHX.mjs.map} +0 -0
package/dist/index.js
CHANGED
|
@@ -1,27 +1,30 @@
|
|
|
1
1
|
"use strict";Object.defineProperty(exports, "__esModule", {value: true});
|
|
2
2
|
|
|
3
|
+
var _chunkRM2VUAFLjs = require('./chunk-RM2VUAFL.js');
|
|
3
4
|
|
|
4
5
|
|
|
5
6
|
|
|
6
7
|
|
|
7
8
|
|
|
8
|
-
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
var _chunkDXBTJGCKjs = require('./chunk-DXBTJGCK.js');
|
|
9
12
|
|
|
10
13
|
|
|
11
14
|
var _chunkASTAIRXGjs = require('./chunk-ASTAIRXG.js');
|
|
12
15
|
|
|
13
16
|
|
|
14
17
|
|
|
15
|
-
var
|
|
18
|
+
var _chunk4BNGSDYWjs = require('./chunk-4BNGSDYW.js');
|
|
16
19
|
|
|
17
20
|
|
|
18
21
|
|
|
19
|
-
var _chunkBQRUUTN3js = require('./chunk-BQRUUTN3.js');
|
|
20
22
|
|
|
23
|
+
var _chunkNPFWFQK6js = require('./chunk-NPFWFQK6.js');
|
|
21
24
|
|
|
22
25
|
|
|
23
26
|
|
|
24
|
-
var
|
|
27
|
+
var _chunkONXIZKC6js = require('./chunk-ONXIZKC6.js');
|
|
25
28
|
|
|
26
29
|
|
|
27
30
|
|
|
@@ -147,11 +150,11 @@ var DatasetsFacade = class {
|
|
|
147
150
|
};
|
|
148
151
|
_datasetService = new WeakMap();
|
|
149
152
|
|
|
150
|
-
// src/client-sdk/services/
|
|
153
|
+
// src/client-sdk/services/experiments/experiment.ts
|
|
151
154
|
var _async_hooks = require('async_hooks');
|
|
152
155
|
var _api = require('@opentelemetry/api');
|
|
153
156
|
|
|
154
|
-
// src/client-sdk/services/
|
|
157
|
+
// src/client-sdk/services/experiments/humanReadableId.ts
|
|
155
158
|
var ADJECTIVES = [
|
|
156
159
|
"swift",
|
|
157
160
|
"bright",
|
|
@@ -265,29 +268,29 @@ var generateHumanReadableId = (separator = "-") => {
|
|
|
265
268
|
return `${adjective1}${separator}${adjective2}${separator}${noun}`;
|
|
266
269
|
};
|
|
267
270
|
|
|
268
|
-
// src/client-sdk/services/
|
|
269
|
-
var
|
|
271
|
+
// src/client-sdk/services/experiments/errors/experiment.error.ts
|
|
272
|
+
var ExperimentError = class extends Error {
|
|
270
273
|
constructor(message) {
|
|
271
274
|
super(message);
|
|
272
|
-
this.name = "
|
|
275
|
+
this.name = "ExperimentError";
|
|
273
276
|
}
|
|
274
277
|
};
|
|
275
|
-
var
|
|
278
|
+
var ExperimentInitError = class extends ExperimentError {
|
|
276
279
|
constructor(message, cause) {
|
|
277
280
|
super(message);
|
|
278
281
|
this.cause = cause;
|
|
279
|
-
this.name = "
|
|
282
|
+
this.name = "ExperimentInitError";
|
|
280
283
|
}
|
|
281
284
|
};
|
|
282
|
-
var
|
|
285
|
+
var ExperimentApiError = class extends ExperimentError {
|
|
283
286
|
constructor(message, statusCode, cause) {
|
|
284
287
|
super(message);
|
|
285
288
|
this.statusCode = statusCode;
|
|
286
289
|
this.cause = cause;
|
|
287
|
-
this.name = "
|
|
290
|
+
this.name = "ExperimentApiError";
|
|
288
291
|
}
|
|
289
292
|
};
|
|
290
|
-
var TargetMetadataConflictError = class extends
|
|
293
|
+
var TargetMetadataConflictError = class extends ExperimentError {
|
|
291
294
|
constructor(targetName, existingMetadata, newMetadata) {
|
|
292
295
|
super(
|
|
293
296
|
`Target '${targetName}' was previously registered with different metadata.
|
|
@@ -301,7 +304,7 @@ If you want to use different metadata, please use a different target name.`
|
|
|
301
304
|
this.name = "TargetMetadataConflictError";
|
|
302
305
|
}
|
|
303
306
|
};
|
|
304
|
-
var EvaluatorError = class extends
|
|
307
|
+
var EvaluatorError = class extends ExperimentError {
|
|
305
308
|
constructor(evaluatorSlug, message, cause) {
|
|
306
309
|
super(`Evaluator '${evaluatorSlug}' failed: ${message}`);
|
|
307
310
|
this.evaluatorSlug = evaluatorSlug;
|
|
@@ -310,12 +313,12 @@ var EvaluatorError = class extends EvaluationError {
|
|
|
310
313
|
}
|
|
311
314
|
};
|
|
312
315
|
|
|
313
|
-
// src/client-sdk/services/
|
|
316
|
+
// src/client-sdk/services/experiments/experiment.ts
|
|
314
317
|
var DEFAULT_CONCURRENCY = 4;
|
|
315
318
|
var DEBOUNCE_INTERVAL_MS = 1e3;
|
|
316
319
|
var iterationContextStorage = new (0, _async_hooks.AsyncLocalStorage)();
|
|
317
320
|
var targetContextStorage = new (0, _async_hooks.AsyncLocalStorage)();
|
|
318
|
-
var
|
|
321
|
+
var Experiment = class _Experiment {
|
|
319
322
|
constructor(name, options) {
|
|
320
323
|
this.initialized = false;
|
|
321
324
|
this.total = 0;
|
|
@@ -352,16 +355,17 @@ var Evaluation = class _Evaluation {
|
|
|
352
355
|
* Initialize an evaluation session
|
|
353
356
|
*/
|
|
354
357
|
static async init(name, options) {
|
|
355
|
-
|
|
356
|
-
|
|
357
|
-
|
|
358
|
+
_chunkRM2VUAFLjs.ensureSetup.call(void 0, );
|
|
359
|
+
const experiment = new _Experiment(name, options);
|
|
360
|
+
await experiment.initialize();
|
|
361
|
+
return experiment;
|
|
358
362
|
}
|
|
359
363
|
/**
|
|
360
364
|
* Initialize the evaluation by creating/getting the experiment
|
|
361
365
|
*/
|
|
362
366
|
async initialize() {
|
|
363
367
|
if (!this.apiKey) {
|
|
364
|
-
throw new
|
|
368
|
+
throw new ExperimentInitError(
|
|
365
369
|
"API key is required. Set LANGWATCH_API_KEY or pass apiKey to LangWatch constructor."
|
|
366
370
|
);
|
|
367
371
|
}
|
|
@@ -379,11 +383,11 @@ var Evaluation = class _Evaluation {
|
|
|
379
383
|
})
|
|
380
384
|
});
|
|
381
385
|
if (response.status === 401) {
|
|
382
|
-
throw new
|
|
386
|
+
throw new ExperimentInitError("Invalid API key");
|
|
383
387
|
}
|
|
384
388
|
if (!response.ok) {
|
|
385
389
|
const text = await response.text();
|
|
386
|
-
throw new
|
|
390
|
+
throw new ExperimentInitError(`Failed to initialize experiment: ${text}`);
|
|
387
391
|
}
|
|
388
392
|
const data = await response.json();
|
|
389
393
|
this.experimentSlug = data.slug;
|
|
@@ -391,10 +395,10 @@ var Evaluation = class _Evaluation {
|
|
|
391
395
|
console.log(`Follow results at: ${this.endpoint}${data.path}?runId=${encodedRunId}`);
|
|
392
396
|
this.initialized = true;
|
|
393
397
|
} catch (error) {
|
|
394
|
-
if (error instanceof
|
|
398
|
+
if (error instanceof ExperimentInitError) {
|
|
395
399
|
throw error;
|
|
396
400
|
}
|
|
397
|
-
throw new
|
|
401
|
+
throw new ExperimentInitError(
|
|
398
402
|
`Failed to initialize evaluation: ${error instanceof Error ? error.message : String(error)}`,
|
|
399
403
|
error instanceof Error ? error : void 0
|
|
400
404
|
);
|
|
@@ -841,17 +845,16 @@ var Evaluation = class _Evaluation {
|
|
|
841
845
|
name: this.name,
|
|
842
846
|
run_id: this.runId,
|
|
843
847
|
dataset: this.batch.dataset.map((entry) => {
|
|
844
|
-
var _a, _b
|
|
845
|
-
return {
|
|
848
|
+
var _a, _b;
|
|
849
|
+
return _chunkOHM7JUMRjs.__spreadValues.call(void 0, {
|
|
846
850
|
index: entry.index,
|
|
847
851
|
entry: entry.entry,
|
|
848
852
|
duration: entry.duration,
|
|
849
853
|
error: entry.error,
|
|
850
854
|
trace_id: entry.trace_id,
|
|
851
855
|
target_id: (_a = entry.target_id) != null ? _a : null,
|
|
852
|
-
cost: (_b = entry.cost) != null ? _b : null
|
|
853
|
-
|
|
854
|
-
};
|
|
856
|
+
cost: (_b = entry.cost) != null ? _b : null
|
|
857
|
+
}, entry.predicted !== void 0 && entry.predicted !== null ? { predicted: entry.predicted } : {});
|
|
855
858
|
}),
|
|
856
859
|
evaluations: this.batch.evaluations.map((e) => ({
|
|
857
860
|
name: e.name,
|
|
@@ -943,70 +946,70 @@ var Evaluation = class _Evaluation {
|
|
|
943
946
|
}
|
|
944
947
|
};
|
|
945
948
|
|
|
946
|
-
// src/client-sdk/services/
|
|
947
|
-
var
|
|
949
|
+
// src/client-sdk/services/experiments/platformErrors.ts
|
|
950
|
+
var ExperimentsError = class extends Error {
|
|
948
951
|
constructor(message) {
|
|
949
952
|
super(message);
|
|
950
|
-
this.name = "
|
|
953
|
+
this.name = "ExperimentsError";
|
|
951
954
|
}
|
|
952
955
|
};
|
|
953
|
-
var
|
|
956
|
+
var ExperimentNotFoundError = class extends ExperimentsError {
|
|
954
957
|
constructor(slug) {
|
|
955
|
-
super(`
|
|
956
|
-
this.name = "
|
|
958
|
+
super(`Experiment not found: ${slug}`);
|
|
959
|
+
this.name = "ExperimentNotFoundError";
|
|
957
960
|
}
|
|
958
961
|
};
|
|
959
|
-
var
|
|
962
|
+
var ExperimentTimeoutError = class extends ExperimentsError {
|
|
960
963
|
constructor(runId, progress, total) {
|
|
961
|
-
super(`
|
|
962
|
-
this.name = "
|
|
964
|
+
super(`Experiment run timed out: ${runId} (${progress}/${total} completed)`);
|
|
965
|
+
this.name = "ExperimentTimeoutError";
|
|
963
966
|
this.runId = runId;
|
|
964
967
|
this.progress = progress;
|
|
965
968
|
this.total = total;
|
|
966
969
|
}
|
|
967
970
|
};
|
|
968
|
-
var
|
|
971
|
+
var ExperimentRunFailedError = class extends ExperimentsError {
|
|
969
972
|
constructor(runId, errorMessage) {
|
|
970
|
-
super(`
|
|
971
|
-
this.name = "
|
|
973
|
+
super(`Experiment run failed: ${errorMessage}`);
|
|
974
|
+
this.name = "ExperimentRunFailedError";
|
|
972
975
|
this.runId = runId;
|
|
973
976
|
this.errorMessage = errorMessage;
|
|
974
977
|
}
|
|
975
978
|
};
|
|
976
|
-
var
|
|
979
|
+
var ExperimentsApiError = class extends ExperimentsError {
|
|
977
980
|
constructor(message, statusCode) {
|
|
978
981
|
super(message);
|
|
979
|
-
this.name = "
|
|
982
|
+
this.name = "ExperimentsApiError";
|
|
980
983
|
this.statusCode = statusCode;
|
|
981
984
|
}
|
|
982
985
|
};
|
|
983
986
|
|
|
984
|
-
// src/client-sdk/services/
|
|
987
|
+
// src/client-sdk/services/experiments/experiments.facade.ts
|
|
985
988
|
var DEFAULT_POLL_INTERVAL = 2e3;
|
|
986
989
|
var DEFAULT_TIMEOUT = 6e5;
|
|
987
|
-
var
|
|
990
|
+
var ExperimentsFacade = class {
|
|
988
991
|
constructor(config) {
|
|
989
992
|
this.config = config;
|
|
990
993
|
}
|
|
991
994
|
/**
|
|
992
|
-
* Initialize a new
|
|
995
|
+
* Initialize a new experiment session (SDK-defined)
|
|
993
996
|
*
|
|
994
997
|
* @param name - Name of the experiment (used as slug)
|
|
995
998
|
* @param options - Optional configuration
|
|
996
|
-
* @returns An initialized
|
|
999
|
+
* @returns An initialized Experiment instance
|
|
997
1000
|
*
|
|
998
1001
|
* @example
|
|
999
1002
|
* ```typescript
|
|
1000
|
-
* const
|
|
1003
|
+
* const experiment = await langwatch.experiments.init('my-experiment');
|
|
1001
1004
|
*
|
|
1002
|
-
* await
|
|
1005
|
+
* await experiment.run(dataset, async ({ item, index }) => {
|
|
1003
1006
|
* const response = await myAgent(item.question);
|
|
1004
|
-
*
|
|
1007
|
+
* experiment.log('accuracy', { index, score: 0.95 });
|
|
1005
1008
|
* });
|
|
1006
1009
|
* ```
|
|
1007
1010
|
*/
|
|
1008
1011
|
async init(name, options) {
|
|
1009
|
-
return
|
|
1012
|
+
return Experiment.init(name, _chunkOHM7JUMRjs.__spreadValues.call(void 0, {
|
|
1010
1013
|
apiClient: this.config.langwatchApiClient,
|
|
1011
1014
|
endpoint: this.config.endpoint,
|
|
1012
1015
|
apiKey: this.config.apiKey,
|
|
@@ -1014,15 +1017,15 @@ var EvaluationFacade = class {
|
|
|
1014
1017
|
}, options));
|
|
1015
1018
|
}
|
|
1016
1019
|
/**
|
|
1017
|
-
* Run a platform-configured
|
|
1020
|
+
* Run a platform-configured experiment (Experiments Workbench)
|
|
1018
1021
|
*
|
|
1019
|
-
* This runs an
|
|
1022
|
+
* This runs an experiment that was configured in the LangWatch platform.
|
|
1020
1023
|
* The method automatically prints a summary and exits with code 1 on failure
|
|
1021
1024
|
* (unless `exitOnFailure: false` is passed).
|
|
1022
1025
|
*
|
|
1023
|
-
* @param slug - The slug of the
|
|
1026
|
+
* @param slug - The slug of the experiment (found in the experiment URL)
|
|
1024
1027
|
* @param options - Optional configuration
|
|
1025
|
-
* @returns The
|
|
1028
|
+
* @returns The experiment results including pass rate and summary
|
|
1026
1029
|
*
|
|
1027
1030
|
* @example
|
|
1028
1031
|
* ```typescript
|
|
@@ -1030,17 +1033,17 @@ var EvaluationFacade = class {
|
|
|
1030
1033
|
*
|
|
1031
1034
|
* const langwatch = new LangWatch();
|
|
1032
1035
|
*
|
|
1033
|
-
* const result = await langwatch.
|
|
1036
|
+
* const result = await langwatch.experiments.run("my-experiment-slug");
|
|
1034
1037
|
* result.printSummary();
|
|
1035
1038
|
* ```
|
|
1036
1039
|
*/
|
|
1037
1040
|
async run(slug, options) {
|
|
1038
|
-
this.config.logger.info(`Running platform
|
|
1041
|
+
this.config.logger.info(`Running platform experiment: ${slug}`);
|
|
1039
1042
|
const result = await this.runWithPolling(slug, options);
|
|
1040
1043
|
return result;
|
|
1041
1044
|
}
|
|
1042
1045
|
/**
|
|
1043
|
-
* Run an
|
|
1046
|
+
* Run an experiment and wait for completion using polling
|
|
1044
1047
|
*/
|
|
1045
1048
|
async runWithPolling(slug, options = {}) {
|
|
1046
1049
|
var _a, _b, _c, _d, _e, _f, _g;
|
|
@@ -1050,7 +1053,7 @@ var EvaluationFacade = class {
|
|
|
1050
1053
|
const { runId } = startResponse;
|
|
1051
1054
|
const apiRunUrl = (_c = startResponse.runUrl) != null ? _c : "";
|
|
1052
1055
|
const runUrl = apiRunUrl ? this.replaceUrlDomain(apiRunUrl, this.config.endpoint) : "";
|
|
1053
|
-
console.log(`Started
|
|
1056
|
+
console.log(`Started experiment run: ${runId}`);
|
|
1054
1057
|
if (runUrl) {
|
|
1055
1058
|
console.log(`Follow live: ${runUrl}`);
|
|
1056
1059
|
}
|
|
@@ -1065,7 +1068,7 @@ var EvaluationFacade = class {
|
|
|
1065
1068
|
if (Date.now() - startTime > timeout) {
|
|
1066
1069
|
console.log();
|
|
1067
1070
|
const finalStatus = await this.getRunStatus(runId);
|
|
1068
|
-
throw new
|
|
1071
|
+
throw new ExperimentTimeoutError(runId, finalStatus.progress, finalStatus.total);
|
|
1069
1072
|
}
|
|
1070
1073
|
await this.sleep(pollInterval);
|
|
1071
1074
|
const status = await this.getRunStatus(runId);
|
|
@@ -1083,7 +1086,7 @@ var EvaluationFacade = class {
|
|
|
1083
1086
|
}
|
|
1084
1087
|
if (status.status === "failed") {
|
|
1085
1088
|
console.log();
|
|
1086
|
-
throw new
|
|
1089
|
+
throw new ExperimentRunFailedError(runId, (_f = status.error) != null ? _f : "Unknown error");
|
|
1087
1090
|
}
|
|
1088
1091
|
if (status.status === "stopped") {
|
|
1089
1092
|
console.log();
|
|
@@ -1098,7 +1101,7 @@ var EvaluationFacade = class {
|
|
|
1098
1101
|
}
|
|
1099
1102
|
}
|
|
1100
1103
|
/**
|
|
1101
|
-
* Start an
|
|
1104
|
+
* Start an experiment run
|
|
1102
1105
|
*/
|
|
1103
1106
|
async startRun(slug) {
|
|
1104
1107
|
const response = await this.config.langwatchApiClient.POST(
|
|
@@ -1112,13 +1115,13 @@ var EvaluationFacade = class {
|
|
|
1112
1115
|
if (response.error) {
|
|
1113
1116
|
const status = response.response.status;
|
|
1114
1117
|
if (status === 404) {
|
|
1115
|
-
throw new
|
|
1118
|
+
throw new ExperimentNotFoundError(slug);
|
|
1116
1119
|
}
|
|
1117
1120
|
if (status === 401) {
|
|
1118
|
-
throw new
|
|
1121
|
+
throw new ExperimentsApiError("Unauthorized - check your API key", 401);
|
|
1119
1122
|
}
|
|
1120
|
-
const errorMessage = "error" in response.error ? response.error.error : `Failed to start
|
|
1121
|
-
throw new
|
|
1123
|
+
const errorMessage = "error" in response.error ? response.error.error : `Failed to start experiment: ${slug}`;
|
|
1124
|
+
throw new ExperimentsApiError(errorMessage != null ? errorMessage : `HTTP ${status}`, status);
|
|
1122
1125
|
}
|
|
1123
1126
|
return response.data;
|
|
1124
1127
|
}
|
|
@@ -1137,13 +1140,13 @@ var EvaluationFacade = class {
|
|
|
1137
1140
|
if (response.error) {
|
|
1138
1141
|
const status = response.response.status;
|
|
1139
1142
|
if (status === 404) {
|
|
1140
|
-
throw new
|
|
1143
|
+
throw new ExperimentsApiError(`Run not found: ${runId}`, 404);
|
|
1141
1144
|
}
|
|
1142
1145
|
if (status === 401) {
|
|
1143
|
-
throw new
|
|
1146
|
+
throw new ExperimentsApiError("Unauthorized - check your API key", 401);
|
|
1144
1147
|
}
|
|
1145
1148
|
const errorMessage = "error" in response.error ? response.error.error : `Failed to get run status: ${runId}`;
|
|
1146
|
-
throw new
|
|
1149
|
+
throw new ExperimentsApiError(errorMessage != null ? errorMessage : `HTTP ${status}`, status);
|
|
1147
1150
|
}
|
|
1148
1151
|
return response.data;
|
|
1149
1152
|
}
|
|
@@ -1151,14 +1154,13 @@ var EvaluationFacade = class {
|
|
|
1151
1154
|
* Build the result object from API response
|
|
1152
1155
|
*/
|
|
1153
1156
|
buildResult(runId, status, summary, runUrl) {
|
|
1154
|
-
var _a, _b, _c, _d, _e, _f
|
|
1155
|
-
const
|
|
1156
|
-
const
|
|
1157
|
-
const
|
|
1158
|
-
const
|
|
1159
|
-
const
|
|
1160
|
-
const
|
|
1161
|
-
const passRate = (_g = summary.passRate) != null ? _g : completedCells > 0 ? totalPassed / completedCells * 100 : 0;
|
|
1157
|
+
var _a, _b, _c, _d, _e, _f;
|
|
1158
|
+
const completedCells = (_a = summary.completedCells) != null ? _a : 0;
|
|
1159
|
+
const failedCells = (_b = summary.failedCells) != null ? _b : 0;
|
|
1160
|
+
const duration = (_c = summary.duration) != null ? _c : 0;
|
|
1161
|
+
const totalPassed = (_d = summary.totalPassed) != null ? _d : completedCells - failedCells;
|
|
1162
|
+
const totalFailed = (_e = summary.totalFailed) != null ? _e : failedCells;
|
|
1163
|
+
const passRate = (_f = summary.passRate) != null ? _f : completedCells > 0 ? totalPassed / completedCells * 100 : 0;
|
|
1162
1164
|
return {
|
|
1163
1165
|
runId,
|
|
1164
1166
|
status,
|
|
@@ -1188,12 +1190,12 @@ var EvaluationFacade = class {
|
|
|
1188
1190
|
};
|
|
1189
1191
|
}
|
|
1190
1192
|
/**
|
|
1191
|
-
* Print a CI-friendly summary of the
|
|
1193
|
+
* Print a CI-friendly summary of the experiment results
|
|
1192
1194
|
*/
|
|
1193
1195
|
printSummary(result) {
|
|
1194
1196
|
const { runId, status, passed, failed, passRate, duration, runUrl, summary } = result;
|
|
1195
1197
|
console.log("\n" + "\u2550".repeat(60));
|
|
1196
|
-
console.log("
|
|
1198
|
+
console.log(" EXPERIMENT RESULTS");
|
|
1197
1199
|
console.log("\u2550".repeat(60));
|
|
1198
1200
|
console.log(` Run ID: ${runId}`);
|
|
1199
1201
|
console.log(` Status: ${status.toUpperCase()}`);
|
|
@@ -1249,6 +1251,192 @@ var EvaluationFacade = class {
|
|
|
1249
1251
|
}
|
|
1250
1252
|
};
|
|
1251
1253
|
|
|
1254
|
+
// src/client-sdk/services/evaluations/evaluations.facade.ts
|
|
1255
|
+
|
|
1256
|
+
|
|
1257
|
+
// src/client-sdk/services/evaluations/errors.ts
|
|
1258
|
+
var EvaluationError = class extends Error {
|
|
1259
|
+
constructor(message) {
|
|
1260
|
+
super(message);
|
|
1261
|
+
this.name = "EvaluationError";
|
|
1262
|
+
}
|
|
1263
|
+
};
|
|
1264
|
+
var EvaluatorCallError = class extends EvaluationError {
|
|
1265
|
+
constructor(evaluatorSlug, message, statusCode) {
|
|
1266
|
+
super(`Evaluator '${evaluatorSlug}' failed: ${message}`);
|
|
1267
|
+
this.name = "EvaluatorCallError";
|
|
1268
|
+
this.evaluatorSlug = evaluatorSlug;
|
|
1269
|
+
this.statusCode = statusCode;
|
|
1270
|
+
}
|
|
1271
|
+
};
|
|
1272
|
+
var EvaluatorNotFoundError = class extends EvaluationError {
|
|
1273
|
+
constructor(evaluatorSlug) {
|
|
1274
|
+
super(`Evaluator not found: ${evaluatorSlug}`);
|
|
1275
|
+
this.name = "EvaluatorNotFoundError";
|
|
1276
|
+
this.evaluatorSlug = evaluatorSlug;
|
|
1277
|
+
}
|
|
1278
|
+
};
|
|
1279
|
+
var EvaluationsApiError = class extends EvaluationError {
|
|
1280
|
+
constructor(message, statusCode) {
|
|
1281
|
+
super(message);
|
|
1282
|
+
this.name = "EvaluationsApiError";
|
|
1283
|
+
this.statusCode = statusCode;
|
|
1284
|
+
}
|
|
1285
|
+
};
|
|
1286
|
+
|
|
1287
|
+
// src/client-sdk/services/evaluations/evaluations.facade.ts
|
|
1288
|
+
var _endpoint, _apiKey, _logger;
|
|
1289
|
+
var EvaluationsFacade = class {
|
|
1290
|
+
constructor(config) {
|
|
1291
|
+
_chunkOHM7JUMRjs.__privateAdd.call(void 0, this, _endpoint);
|
|
1292
|
+
_chunkOHM7JUMRjs.__privateAdd.call(void 0, this, _apiKey);
|
|
1293
|
+
_chunkOHM7JUMRjs.__privateAdd.call(void 0, this, _logger);
|
|
1294
|
+
/**
|
|
1295
|
+
* Run an evaluator or guardrail against provided data
|
|
1296
|
+
*
|
|
1297
|
+
* Creates an OpenTelemetry span attached to the current trace context,
|
|
1298
|
+
* calls the LangWatch evaluation API, and returns the result.
|
|
1299
|
+
*
|
|
1300
|
+
* @param slug - The evaluator slug (e.g., "presidio/pii_detection", "langevals/llm_boolean")
|
|
1301
|
+
* @param options - Evaluation options including data, name, settings, and asGuardrail flag
|
|
1302
|
+
* @returns The evaluation result with status, passed, score, details, label, and cost
|
|
1303
|
+
*
|
|
1304
|
+
* @example
|
|
1305
|
+
* ```typescript
|
|
1306
|
+
* // Run as a guardrail (synchronous evaluation that can block responses)
|
|
1307
|
+
* const guardrail = await langwatch.evaluations.evaluate("presidio/pii_detection", {
|
|
1308
|
+
* data: { input: userInput, output: generatedResponse },
|
|
1309
|
+
* name: "PII Detection Guardrail",
|
|
1310
|
+
* asGuardrail: true,
|
|
1311
|
+
* });
|
|
1312
|
+
*
|
|
1313
|
+
* if (!guardrail.passed) {
|
|
1314
|
+
* console.log("PII detected:", guardrail.details);
|
|
1315
|
+
* return "Sorry, I cannot process that request.";
|
|
1316
|
+
* }
|
|
1317
|
+
* ```
|
|
1318
|
+
*
|
|
1319
|
+
* @example
|
|
1320
|
+
* ```typescript
|
|
1321
|
+
* // Run as an online evaluation (async scoring for monitoring)
|
|
1322
|
+
* const result = await langwatch.evaluations.evaluate("langevals/llm_boolean", {
|
|
1323
|
+
* data: { input: question, output: response },
|
|
1324
|
+
* name: "Quality Check",
|
|
1325
|
+
* settings: { prompt: "Check if the response answers the question." },
|
|
1326
|
+
* });
|
|
1327
|
+
*
|
|
1328
|
+
* console.log("Score:", result.score);
|
|
1329
|
+
* console.log("Details:", result.details);
|
|
1330
|
+
* ```
|
|
1331
|
+
*/
|
|
1332
|
+
this.evaluate = async (slug, options) => {
|
|
1333
|
+
var _a;
|
|
1334
|
+
const { data, name, settings, asGuardrail } = options;
|
|
1335
|
+
const spanName = name != null ? name : slug;
|
|
1336
|
+
const spanType = asGuardrail ? "guardrail" : "evaluation";
|
|
1337
|
+
const tracer2 = _api.trace.getTracer("langwatch-evaluations");
|
|
1338
|
+
const activeSpan = _api.trace.getActiveSpan();
|
|
1339
|
+
const traceId = activeSpan ? activeSpan.spanContext().traceId : void 0;
|
|
1340
|
+
const parentSpanId = activeSpan ? activeSpan.spanContext().spanId : void 0;
|
|
1341
|
+
const otelSpan = tracer2.startSpan(
|
|
1342
|
+
spanName,
|
|
1343
|
+
{
|
|
1344
|
+
attributes: {
|
|
1345
|
+
"langwatch.span.type": spanType
|
|
1346
|
+
}
|
|
1347
|
+
},
|
|
1348
|
+
_api.context.active()
|
|
1349
|
+
);
|
|
1350
|
+
const langwatchSpan = _chunkONXIZKC6js.createLangWatchSpan.call(void 0, otelSpan);
|
|
1351
|
+
langwatchSpan.setType(spanType);
|
|
1352
|
+
langwatchSpan.setInput(_chunkOHM7JUMRjs.__spreadValues.call(void 0, {
|
|
1353
|
+
data
|
|
1354
|
+
}, settings && { settings }));
|
|
1355
|
+
try {
|
|
1356
|
+
const requestBody = {
|
|
1357
|
+
trace_id: traceId != null ? traceId : null,
|
|
1358
|
+
span_id: parentSpanId != null ? parentSpanId : null,
|
|
1359
|
+
name: name != null ? name : null,
|
|
1360
|
+
data,
|
|
1361
|
+
settings,
|
|
1362
|
+
as_guardrail: asGuardrail
|
|
1363
|
+
};
|
|
1364
|
+
const url = `${_chunkOHM7JUMRjs.__privateGet.call(void 0, this, _endpoint)}/api/evaluations/${slug}/evaluate`;
|
|
1365
|
+
_chunkOHM7JUMRjs.__privateGet.call(void 0, this, _logger).debug(`Calling evaluation API: ${url}`);
|
|
1366
|
+
const response = await fetch(url, {
|
|
1367
|
+
method: "POST",
|
|
1368
|
+
headers: {
|
|
1369
|
+
"Content-Type": "application/json",
|
|
1370
|
+
"X-Auth-Token": _chunkOHM7JUMRjs.__privateGet.call(void 0, this, _apiKey)
|
|
1371
|
+
},
|
|
1372
|
+
body: JSON.stringify(requestBody)
|
|
1373
|
+
});
|
|
1374
|
+
if (!response.ok) {
|
|
1375
|
+
const errorText = await response.text();
|
|
1376
|
+
if (response.status === 404) {
|
|
1377
|
+
throw new EvaluatorNotFoundError(slug);
|
|
1378
|
+
}
|
|
1379
|
+
throw new EvaluationsApiError(
|
|
1380
|
+
`Evaluation API returned ${response.status}: ${errorText}`,
|
|
1381
|
+
response.status
|
|
1382
|
+
);
|
|
1383
|
+
}
|
|
1384
|
+
const responseData = await response.json();
|
|
1385
|
+
const result = _chunkOHM7JUMRjs.__spreadValues.call(void 0, _chunkOHM7JUMRjs.__spreadValues.call(void 0, _chunkOHM7JUMRjs.__spreadValues.call(void 0, _chunkOHM7JUMRjs.__spreadValues.call(void 0, _chunkOHM7JUMRjs.__spreadValues.call(void 0, {
|
|
1386
|
+
status: responseData.status
|
|
1387
|
+
}, responseData.passed !== null && responseData.passed !== void 0 && { passed: responseData.passed }), responseData.score !== null && responseData.score !== void 0 && { score: responseData.score }), responseData.details !== null && responseData.details !== void 0 && { details: responseData.details }), responseData.label !== null && responseData.label !== void 0 && { label: responseData.label }), responseData.cost !== null && responseData.cost !== void 0 && { cost: responseData.cost });
|
|
1388
|
+
langwatchSpan.setOutput({
|
|
1389
|
+
type: asGuardrail ? "guardrail_result" : "evaluation_result",
|
|
1390
|
+
value: result
|
|
1391
|
+
});
|
|
1392
|
+
if (result.status === "error") {
|
|
1393
|
+
otelSpan.setStatus({
|
|
1394
|
+
code: _api.SpanStatusCode.ERROR,
|
|
1395
|
+
message: (_a = result.details) != null ? _a : "Evaluation failed"
|
|
1396
|
+
});
|
|
1397
|
+
} else {
|
|
1398
|
+
otelSpan.setStatus({ code: _api.SpanStatusCode.OK });
|
|
1399
|
+
}
|
|
1400
|
+
return result;
|
|
1401
|
+
} catch (error) {
|
|
1402
|
+
const errorResult = {
|
|
1403
|
+
status: "error",
|
|
1404
|
+
details: error instanceof Error ? error.message : String(error)
|
|
1405
|
+
};
|
|
1406
|
+
if (asGuardrail) {
|
|
1407
|
+
errorResult.passed = true;
|
|
1408
|
+
}
|
|
1409
|
+
langwatchSpan.setOutput({
|
|
1410
|
+
type: asGuardrail ? "guardrail_result" : "evaluation_result",
|
|
1411
|
+
value: errorResult
|
|
1412
|
+
});
|
|
1413
|
+
otelSpan.setStatus({
|
|
1414
|
+
code: _api.SpanStatusCode.ERROR,
|
|
1415
|
+
message: errorResult.details
|
|
1416
|
+
});
|
|
1417
|
+
if (error instanceof Error) {
|
|
1418
|
+
otelSpan.recordException(error);
|
|
1419
|
+
}
|
|
1420
|
+
if (error instanceof EvaluatorNotFoundError || error instanceof EvaluationsApiError || error instanceof EvaluatorCallError) {
|
|
1421
|
+
throw error;
|
|
1422
|
+
}
|
|
1423
|
+
throw new EvaluatorCallError(
|
|
1424
|
+
slug,
|
|
1425
|
+
error instanceof Error ? error.message : String(error)
|
|
1426
|
+
);
|
|
1427
|
+
} finally {
|
|
1428
|
+
otelSpan.end();
|
|
1429
|
+
}
|
|
1430
|
+
};
|
|
1431
|
+
_chunkOHM7JUMRjs.__privateSet.call(void 0, this, _endpoint, config.endpoint);
|
|
1432
|
+
_chunkOHM7JUMRjs.__privateSet.call(void 0, this, _apiKey, config.apiKey);
|
|
1433
|
+
_chunkOHM7JUMRjs.__privateSet.call(void 0, this, _logger, config.logger);
|
|
1434
|
+
}
|
|
1435
|
+
};
|
|
1436
|
+
_endpoint = new WeakMap();
|
|
1437
|
+
_apiKey = new WeakMap();
|
|
1438
|
+
_logger = new WeakMap();
|
|
1439
|
+
|
|
1252
1440
|
// src/client-sdk/services/traces/types.ts
|
|
1253
1441
|
var TracesError = class extends Error {
|
|
1254
1442
|
constructor(message, operation, originalError) {
|
|
@@ -1260,13 +1448,13 @@ var TracesError = class extends Error {
|
|
|
1260
1448
|
};
|
|
1261
1449
|
|
|
1262
1450
|
// src/client-sdk/services/traces/tracing/tracer.ts
|
|
1263
|
-
var tracer = _chunkONXIZKC6js.getLangWatchTracer.call(void 0, `${
|
|
1451
|
+
var tracer = _chunkONXIZKC6js.getLangWatchTracer.call(void 0, `${_chunkNPFWFQK6js.LANGWATCH_SDK_NAME_CLIENT}.traces`, _chunkNPFWFQK6js.LANGWATCH_SDK_VERSION);
|
|
1264
1452
|
|
|
1265
1453
|
// src/client-sdk/services/traces/service.ts
|
|
1266
1454
|
var TracesService = class {
|
|
1267
1455
|
constructor(config) {
|
|
1268
1456
|
this.config = config;
|
|
1269
|
-
return
|
|
1457
|
+
return _chunkDXBTJGCKjs.createTracingProxy.call(void 0,
|
|
1270
1458
|
this,
|
|
1271
1459
|
tracer
|
|
1272
1460
|
);
|
|
@@ -1329,18 +1517,18 @@ var LangWatch = class {
|
|
|
1329
1517
|
_chunkOHM7JUMRjs.__privateAdd.call(void 0, this, _LangWatch_instances);
|
|
1330
1518
|
var _a, _b, _c, _d;
|
|
1331
1519
|
const apiKey = (_b = (_a = options.apiKey) != null ? _a : process.env.LANGWATCH_API_KEY) != null ? _b : "";
|
|
1332
|
-
const endpoint = (_d = (_c = options.endpoint) != null ? _c : process.env.LANGWATCH_ENDPOINT) != null ? _d :
|
|
1520
|
+
const endpoint = (_d = (_c = options.endpoint) != null ? _c : process.env.LANGWATCH_ENDPOINT) != null ? _d : _chunkNPFWFQK6js.DEFAULT_ENDPOINT;
|
|
1333
1521
|
this.config = _chunkOHM7JUMRjs.__privateMethod.call(void 0, this, _LangWatch_instances, createInternalConfig_fn).call(this, {
|
|
1334
1522
|
apiKey,
|
|
1335
1523
|
endpoint,
|
|
1336
1524
|
options: options.options
|
|
1337
1525
|
});
|
|
1338
|
-
this.prompts = new (0,
|
|
1339
|
-
promptsApiService: new (0,
|
|
1340
|
-
localPromptsService: new (0,
|
|
1526
|
+
this.prompts = new (0, _chunkDXBTJGCKjs.PromptsFacade)(_chunkOHM7JUMRjs.__spreadValues.call(void 0, {
|
|
1527
|
+
promptsApiService: new (0, _chunkDXBTJGCKjs.PromptsApiService)(this.config),
|
|
1528
|
+
localPromptsService: new (0, _chunkDXBTJGCKjs.LocalPromptsService)()
|
|
1341
1529
|
}, this.config));
|
|
1342
1530
|
this.traces = new TracesFacade(this.config);
|
|
1343
|
-
this.
|
|
1531
|
+
this.experiments = new ExperimentsFacade({
|
|
1344
1532
|
langwatchApiClient: this.config.langwatchApiClient,
|
|
1345
1533
|
endpoint: this.config.endpoint,
|
|
1346
1534
|
apiKey: this.config.apiKey,
|
|
@@ -1350,6 +1538,11 @@ var LangWatch = class {
|
|
|
1350
1538
|
langwatchApiClient: this.config.langwatchApiClient,
|
|
1351
1539
|
logger: this.config.logger
|
|
1352
1540
|
});
|
|
1541
|
+
this.evaluations = new EvaluationsFacade({
|
|
1542
|
+
endpoint: this.config.endpoint,
|
|
1543
|
+
apiKey: this.config.apiKey,
|
|
1544
|
+
logger: this.config.logger
|
|
1545
|
+
});
|
|
1353
1546
|
}
|
|
1354
1547
|
get apiClient() {
|
|
1355
1548
|
return this.config.langwatchApiClient;
|
|
@@ -1364,7 +1557,7 @@ createInternalConfig_fn = function({
|
|
|
1364
1557
|
var _a;
|
|
1365
1558
|
return {
|
|
1366
1559
|
logger: (_a = options == null ? void 0 : options.logger) != null ? _a : new (0, _chunk5MQQRSVMjs.NoOpLogger)(),
|
|
1367
|
-
langwatchApiClient:
|
|
1560
|
+
langwatchApiClient: _chunkDXBTJGCKjs.createLangWatchApiClient.call(void 0, apiKey, endpoint),
|
|
1368
1561
|
endpoint,
|
|
1369
1562
|
apiKey
|
|
1370
1563
|
};
|
|
@@ -1391,5 +1584,10 @@ var logger = {
|
|
|
1391
1584
|
|
|
1392
1585
|
|
|
1393
1586
|
|
|
1394
|
-
|
|
1587
|
+
|
|
1588
|
+
|
|
1589
|
+
|
|
1590
|
+
|
|
1591
|
+
|
|
1592
|
+
exports.EvaluationError = EvaluationError; exports.EvaluationsApiError = EvaluationsApiError; exports.EvaluationsFacade = EvaluationsFacade; exports.EvaluatorCallError = EvaluatorCallError; exports.EvaluatorError = EvaluatorError; exports.EvaluatorNotFoundError = EvaluatorNotFoundError; exports.Experiment = Experiment; exports.ExperimentApiError = ExperimentApiError; exports.ExperimentError = ExperimentError; exports.ExperimentInitError = ExperimentInitError; exports.ExperimentsFacade = ExperimentsFacade; exports.FetchPolicy = _chunkDXBTJGCKjs.FetchPolicy; exports.FilterableBatchSpanProcessor = _chunkASTAIRXGjs.FilterableBatchSpanProcessor; exports.LangWatch = LangWatch; exports.LangWatchExporter = _chunk4BNGSDYWjs.LangWatchTraceExporter; exports.TargetMetadataConflictError = TargetMetadataConflictError; exports.attributes = _chunk5MQQRSVMjs.attributes_exports; exports.getLangWatchLogger = _chunk4BNGSDYWjs.getLangWatchLogger; exports.getLangWatchTracer = _chunkONXIZKC6js.getLangWatchTracer; exports.logger = logger;
|
|
1395
1593
|
//# sourceMappingURL=index.js.map
|