langwatch 0.12.0 → 0.14.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{add-2UHFYNUA.mjs → add-36C7337U.mjs} +5 -5
- package/dist/{add-LUETMKBD.js → add-HNUQULX7.js} +8 -8
- package/dist/{add-LUETMKBD.js.map → add-HNUQULX7.js.map} +1 -1
- package/dist/chunk-2BALUJ3B.js +317 -0
- package/dist/chunk-2BALUJ3B.js.map +1 -0
- package/dist/{chunk-6SSCBYJM.js → chunk-4VAQQB35.js} +10 -10
- package/dist/{chunk-6SSCBYJM.js.map → chunk-4VAQQB35.js.map} +1 -1
- package/dist/{chunk-C4XUWCQR.js → chunk-D3YWZYYD.js} +2 -2
- package/dist/chunk-D3YWZYYD.js.map +1 -0
- package/dist/{chunk-BQRUUTN3.js → chunk-GNHX3RA6.js} +14 -14
- package/dist/{chunk-BQRUUTN3.js.map → chunk-GNHX3RA6.js.map} +1 -1
- package/dist/{chunk-WCNDT5SD.mjs → chunk-JX2FBF5Z.mjs} +2 -2
- package/dist/{chunk-IIUI2XYW.mjs → chunk-KD5U3MP3.mjs} +11 -2
- package/dist/chunk-KD5U3MP3.mjs.map +1 -0
- package/dist/chunk-KJQNNRAB.mjs +317 -0
- package/dist/chunk-KJQNNRAB.mjs.map +1 -0
- package/dist/{chunk-OTID7S7K.mjs → chunk-QOSICUZE.mjs} +3 -3
- package/dist/{chunk-ONXIZKC6.js → chunk-SHJZRXU7.js} +11 -2
- package/dist/chunk-SHJZRXU7.js.map +1 -0
- package/dist/{chunk-TB5KB737.mjs → chunk-WIB3D4PL.mjs} +2 -2
- package/dist/chunk-WIB3D4PL.mjs.map +1 -0
- package/dist/cli/index.js +6 -6
- package/dist/cli/index.mjs +6 -6
- package/dist/{implementation-Ck58nRkT.d.mts → implementation-Dl15eRjo.d.mts} +1 -1
- package/dist/{implementation-Bnc8Aymq.d.ts → implementation-gLzM6qpI.d.ts} +1 -1
- package/dist/index.d.mts +269 -56
- package/dist/index.d.ts +269 -56
- package/dist/index.js +344 -92
- package/dist/index.js.map +1 -1
- package/dist/index.mjs +338 -86
- package/dist/index.mjs.map +1 -1
- package/dist/{list-WV5LA6LD.mjs → list-S5UIBHSU.mjs} +5 -5
- package/dist/{list-7U3M64GY.js → list-U5HX65KF.js} +8 -8
- package/dist/{list-7U3M64GY.js.map → list-U5HX65KF.js.map} +1 -1
- package/dist/{login-QKRT6PXA.mjs → login-ALPTL4S6.mjs} +2 -2
- package/dist/{login-B7DKMN7P.js → login-IINYTYEF.js} +3 -3
- package/dist/{login-B7DKMN7P.js.map → login-IINYTYEF.js.map} +1 -1
- package/dist/observability-sdk/index.d.mts +3 -3
- package/dist/observability-sdk/index.d.ts +3 -3
- package/dist/observability-sdk/index.js +4 -4
- package/dist/observability-sdk/index.mjs +3 -3
- package/dist/observability-sdk/instrumentation/langchain/index.d.mts +1 -1
- package/dist/observability-sdk/instrumentation/langchain/index.d.ts +1 -1
- package/dist/observability-sdk/instrumentation/langchain/index.js +3 -3
- package/dist/observability-sdk/instrumentation/langchain/index.mjs +1 -1
- package/dist/observability-sdk/setup/node/index.d.mts +24 -1
- package/dist/observability-sdk/setup/node/index.d.ts +24 -1
- package/dist/observability-sdk/setup/node/index.js +7 -292
- package/dist/observability-sdk/setup/node/index.js.map +1 -1
- package/dist/observability-sdk/setup/node/index.mjs +8 -293
- package/dist/observability-sdk/setup/node/index.mjs.map +1 -1
- package/dist/{remove-A4DKCN7A.js → remove-4NB23DL3.js} +7 -7
- package/dist/{remove-A4DKCN7A.js.map → remove-4NB23DL3.js.map} +1 -1
- package/dist/{remove-2OGMXSTR.mjs → remove-KVFBJ4XE.mjs} +5 -5
- package/dist/{sync-TNVCKWTC.mjs → sync-ITMRHKSW.mjs} +5 -5
- package/dist/{sync-WRZXIBZS.js → sync-UI7HBIDR.js} +7 -7
- package/dist/{sync-WRZXIBZS.js.map → sync-UI7HBIDR.js.map} +1 -1
- package/dist/{types-5h2Im4pl.d.mts → types-ExKeJEM0.d.mts} +1179 -17
- package/dist/{types-fo-Ij9pl.d.ts → types-WRcnQUom.d.ts} +1179 -17
- package/package.json +3 -2
- package/dist/chunk-C4XUWCQR.js.map +0 -1
- package/dist/chunk-IIUI2XYW.mjs.map +0 -1
- package/dist/chunk-ONXIZKC6.js.map +0 -1
- package/dist/chunk-TB5KB737.mjs.map +0 -1
- /package/dist/{add-2UHFYNUA.mjs.map → add-36C7337U.mjs.map} +0 -0
- /package/dist/{chunk-WCNDT5SD.mjs.map → chunk-JX2FBF5Z.mjs.map} +0 -0
- /package/dist/{chunk-OTID7S7K.mjs.map → chunk-QOSICUZE.mjs.map} +0 -0
- /package/dist/{list-WV5LA6LD.mjs.map → list-S5UIBHSU.mjs.map} +0 -0
- /package/dist/{login-QKRT6PXA.mjs.map → login-ALPTL4S6.mjs.map} +0 -0
- /package/dist/{remove-2OGMXSTR.mjs.map → remove-KVFBJ4XE.mjs.map} +0 -0
- /package/dist/{sync-TNVCKWTC.mjs.map → sync-ITMRHKSW.mjs.map} +0 -0
package/dist/index.d.mts
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import { L as Logger, C as ConsoleLogger, N as NoOpLogger } from './index-D7rKIGrO.mjs';
|
|
2
|
-
export { F as FilterableBatchSpanProcessor, L as LangWatchExporter, S as SpanProcessingExcludeRule, g as getLangWatchLogger, d as getLangWatchTracer } from './implementation-
|
|
3
|
-
import { p as paths, P as PromptResponse, g as CreatePromptBody, U as UpdatePromptBody, h as PromptData, i as Prompt, F as FetchPolicy, L as LangWatchSpan } from './types-
|
|
2
|
+
export { F as FilterableBatchSpanProcessor, L as LangWatchExporter, S as SpanProcessingExcludeRule, g as getLangWatchLogger, d as getLangWatchTracer } from './implementation-Dl15eRjo.mjs';
|
|
3
|
+
import { p as paths, P as PromptResponse, g as CreatePromptBody, U as UpdatePromptBody, h as PromptData, i as Prompt, F as FetchPolicy, L as LangWatchSpan } from './types-ExKeJEM0.mjs';
|
|
4
4
|
import openApiCreateClient from 'openapi-fetch';
|
|
5
5
|
import { z } from 'zod';
|
|
6
6
|
export { l as attributes } from './types-DRiQaKFG.mjs';
|
|
@@ -405,7 +405,7 @@ type DatasetsFacadeConfig = {
|
|
|
405
405
|
* const dataset = await langwatch.datasets.get("my-dataset");
|
|
406
406
|
*
|
|
407
407
|
* // Use with evaluation
|
|
408
|
-
* const evaluation = langwatch.
|
|
408
|
+
* const evaluation = langwatch.experiments.init("my-experiment");
|
|
409
409
|
* await evaluation.run(dataset.entries.map(e => e.entry), async ({ item, index }) => {
|
|
410
410
|
* const output = await myLLM(item.input);
|
|
411
411
|
* await evaluation.evaluate("my-evaluator", {
|
|
@@ -447,16 +447,16 @@ declare class DatasetsFacade {
|
|
|
447
447
|
}
|
|
448
448
|
|
|
449
449
|
/**
|
|
450
|
-
* Types for the
|
|
450
|
+
* Types for the Experiments API
|
|
451
451
|
*
|
|
452
|
-
* These types define the structure for batch
|
|
452
|
+
* These types define the structure for batch experiments, including
|
|
453
453
|
* logging metrics, running evaluators, and managing targets.
|
|
454
454
|
*/
|
|
455
455
|
|
|
456
456
|
/**
|
|
457
457
|
* Status of an evaluation result
|
|
458
458
|
*/
|
|
459
|
-
type EvaluationStatus = "processed" | "error" | "skipped";
|
|
459
|
+
type EvaluationStatus$1 = "processed" | "error" | "skipped";
|
|
460
460
|
/**
|
|
461
461
|
* Target types for batch evaluations
|
|
462
462
|
*/
|
|
@@ -503,11 +503,11 @@ type TargetInfo = z.infer<typeof targetInfoSchema>;
|
|
|
503
503
|
/**
|
|
504
504
|
* Result of an evaluation
|
|
505
505
|
*/
|
|
506
|
-
type EvaluationResult = z.infer<typeof evaluationResultSchema>;
|
|
506
|
+
type EvaluationResult$1 = z.infer<typeof evaluationResultSchema>;
|
|
507
507
|
/**
|
|
508
|
-
* Options for initializing an
|
|
508
|
+
* Options for initializing an experiment
|
|
509
509
|
*/
|
|
510
|
-
type
|
|
510
|
+
type ExperimentInitOptions = {
|
|
511
511
|
/** Custom run ID (auto-generated if not provided) */
|
|
512
512
|
runId?: string;
|
|
513
513
|
/** Number of parallel threads for submit() */
|
|
@@ -533,7 +533,7 @@ type LogOptions = {
|
|
|
533
533
|
/** Human-readable description of the result */
|
|
534
534
|
details?: string;
|
|
535
535
|
/** Status of the evaluation */
|
|
536
|
-
status?: EvaluationStatus;
|
|
536
|
+
status?: EvaluationStatus$1;
|
|
537
537
|
/** Duration in milliseconds */
|
|
538
538
|
duration?: number;
|
|
539
539
|
/** Cost amount in USD */
|
|
@@ -551,7 +551,7 @@ type LogOptions = {
|
|
|
551
551
|
/**
|
|
552
552
|
* Options for the evaluate() method (built-in evaluators)
|
|
553
553
|
*/
|
|
554
|
-
type EvaluateOptions = {
|
|
554
|
+
type EvaluateOptions$1 = {
|
|
555
555
|
/**
|
|
556
556
|
* Row index in the dataset.
|
|
557
557
|
* Optional when called inside withTarget() - will be auto-inferred from context.
|
|
@@ -627,9 +627,9 @@ type TargetResult<R> = {
|
|
|
627
627
|
};
|
|
628
628
|
|
|
629
629
|
/**
|
|
630
|
-
*
|
|
630
|
+
* Experiment - Main class for running batch experiments
|
|
631
631
|
*
|
|
632
|
-
* Provides a clean API for running
|
|
632
|
+
* Provides a clean API for running experiments over datasets with:
|
|
633
633
|
* - Automatic tracing per iteration
|
|
634
634
|
* - Parallel execution with concurrency control
|
|
635
635
|
* - Batched result sending
|
|
@@ -638,9 +638,9 @@ type TargetResult<R> = {
|
|
|
638
638
|
*/
|
|
639
639
|
|
|
640
640
|
/**
|
|
641
|
-
*
|
|
641
|
+
* Experiment session for running batch experiments
|
|
642
642
|
*/
|
|
643
|
-
declare class
|
|
643
|
+
declare class Experiment {
|
|
644
644
|
readonly name: string;
|
|
645
645
|
readonly runId: string;
|
|
646
646
|
readonly experimentSlug: string;
|
|
@@ -671,7 +671,7 @@ declare class Evaluation {
|
|
|
671
671
|
endpoint: string;
|
|
672
672
|
apiKey: string;
|
|
673
673
|
logger: Logger;
|
|
674
|
-
} &
|
|
674
|
+
} & ExperimentInitOptions): Promise<Experiment>;
|
|
675
675
|
/**
|
|
676
676
|
* Initialize the evaluation by creating/getting the experiment
|
|
677
677
|
*/
|
|
@@ -743,7 +743,7 @@ declare class Evaluation {
|
|
|
743
743
|
* });
|
|
744
744
|
* ```
|
|
745
745
|
*/
|
|
746
|
-
evaluate(evaluatorSlug: string, options: EvaluateOptions): Promise<void>;
|
|
746
|
+
evaluate(evaluatorSlug: string, options: EvaluateOptions$1): Promise<void>;
|
|
747
747
|
/**
|
|
748
748
|
* Execute code within a target context with automatic tracing
|
|
749
749
|
*
|
|
@@ -811,12 +811,12 @@ declare class Evaluation {
|
|
|
811
811
|
}
|
|
812
812
|
|
|
813
813
|
/**
|
|
814
|
-
* Types for platform-configured
|
|
814
|
+
* Types for platform-configured experiments (Experiments Workbench)
|
|
815
815
|
*/
|
|
816
816
|
/**
|
|
817
|
-
* Summary of a completed
|
|
817
|
+
* Summary of a completed experiment run
|
|
818
818
|
*/
|
|
819
|
-
type
|
|
819
|
+
type ExperimentRunSummary = {
|
|
820
820
|
runId?: string;
|
|
821
821
|
totalCells?: number;
|
|
822
822
|
completedCells?: number;
|
|
@@ -850,9 +850,9 @@ type EvaluationRunSummary = {
|
|
|
850
850
|
totalCost?: number;
|
|
851
851
|
};
|
|
852
852
|
/**
|
|
853
|
-
* Options for running a platform
|
|
853
|
+
* Options for running a platform experiment
|
|
854
854
|
*/
|
|
855
|
-
type
|
|
855
|
+
type RunExperimentOptions = {
|
|
856
856
|
/**
|
|
857
857
|
* Polling interval in milliseconds (default: 2000)
|
|
858
858
|
*/
|
|
@@ -867,9 +867,9 @@ type RunEvaluationOptions = {
|
|
|
867
867
|
onProgress?: (progress: number, total: number) => void;
|
|
868
868
|
};
|
|
869
869
|
/**
|
|
870
|
-
* Final result of a platform
|
|
870
|
+
* Final result of a platform experiment run
|
|
871
871
|
*/
|
|
872
|
-
type
|
|
872
|
+
type ExperimentRunResult = {
|
|
873
873
|
runId: string;
|
|
874
874
|
status: "completed" | "failed" | "stopped";
|
|
875
875
|
passed: number;
|
|
@@ -877,7 +877,7 @@ type EvaluationRunResult = {
|
|
|
877
877
|
passRate: number;
|
|
878
878
|
duration: number;
|
|
879
879
|
runUrl: string;
|
|
880
|
-
summary:
|
|
880
|
+
summary: ExperimentRunSummary;
|
|
881
881
|
/**
|
|
882
882
|
* Print a CI-friendly summary of the results
|
|
883
883
|
* @param exitOnFailure - If true (default), calls process.exit(1) when there are failures
|
|
@@ -886,53 +886,53 @@ type EvaluationRunResult = {
|
|
|
886
886
|
};
|
|
887
887
|
|
|
888
888
|
/**
|
|
889
|
-
*
|
|
889
|
+
* ExperimentsFacade - Entry point for the experiments API
|
|
890
890
|
*
|
|
891
891
|
* Provides:
|
|
892
|
-
* - `init()` method to create
|
|
893
|
-
* - `run()` method to execute platform-configured
|
|
892
|
+
* - `init()` method to create experiment sessions (SDK-defined experiments)
|
|
893
|
+
* - `run()` method to execute platform-configured experiments (Experiments Workbench)
|
|
894
894
|
*/
|
|
895
895
|
|
|
896
|
-
type
|
|
896
|
+
type ExperimentsFacadeConfig = {
|
|
897
897
|
langwatchApiClient: LangwatchApiClient;
|
|
898
898
|
endpoint: string;
|
|
899
899
|
apiKey: string;
|
|
900
900
|
logger: Logger;
|
|
901
901
|
};
|
|
902
902
|
/**
|
|
903
|
-
* Facade for creating
|
|
903
|
+
* Facade for creating experiment sessions and running platform-configured experiments
|
|
904
904
|
*/
|
|
905
|
-
declare class
|
|
905
|
+
declare class ExperimentsFacade {
|
|
906
906
|
private readonly config;
|
|
907
|
-
constructor(config:
|
|
907
|
+
constructor(config: ExperimentsFacadeConfig);
|
|
908
908
|
/**
|
|
909
|
-
* Initialize a new
|
|
909
|
+
* Initialize a new experiment session (SDK-defined)
|
|
910
910
|
*
|
|
911
911
|
* @param name - Name of the experiment (used as slug)
|
|
912
912
|
* @param options - Optional configuration
|
|
913
|
-
* @returns An initialized
|
|
913
|
+
* @returns An initialized Experiment instance
|
|
914
914
|
*
|
|
915
915
|
* @example
|
|
916
916
|
* ```typescript
|
|
917
|
-
* const
|
|
917
|
+
* const experiment = await langwatch.experiments.init('my-experiment');
|
|
918
918
|
*
|
|
919
|
-
* await
|
|
919
|
+
* await experiment.run(dataset, async ({ item, index }) => {
|
|
920
920
|
* const response = await myAgent(item.question);
|
|
921
|
-
*
|
|
921
|
+
* experiment.log('accuracy', { index, score: 0.95 });
|
|
922
922
|
* });
|
|
923
923
|
* ```
|
|
924
924
|
*/
|
|
925
|
-
init(name: string, options?:
|
|
925
|
+
init(name: string, options?: ExperimentInitOptions): Promise<Experiment>;
|
|
926
926
|
/**
|
|
927
|
-
* Run a platform-configured
|
|
927
|
+
* Run a platform-configured experiment (Experiments Workbench)
|
|
928
928
|
*
|
|
929
|
-
* This runs an
|
|
929
|
+
* This runs an experiment that was configured in the LangWatch platform.
|
|
930
930
|
* The method automatically prints a summary and exits with code 1 on failure
|
|
931
931
|
* (unless `exitOnFailure: false` is passed).
|
|
932
932
|
*
|
|
933
|
-
* @param slug - The slug of the
|
|
933
|
+
* @param slug - The slug of the experiment (found in the experiment URL)
|
|
934
934
|
* @param options - Optional configuration
|
|
935
|
-
* @returns The
|
|
935
|
+
* @returns The experiment results including pass rate and summary
|
|
936
936
|
*
|
|
937
937
|
* @example
|
|
938
938
|
* ```typescript
|
|
@@ -940,17 +940,17 @@ declare class EvaluationFacade {
|
|
|
940
940
|
*
|
|
941
941
|
* const langwatch = new LangWatch();
|
|
942
942
|
*
|
|
943
|
-
* const result = await langwatch.
|
|
943
|
+
* const result = await langwatch.experiments.run("my-experiment-slug");
|
|
944
944
|
* result.printSummary();
|
|
945
945
|
* ```
|
|
946
946
|
*/
|
|
947
|
-
run(slug: string, options?:
|
|
947
|
+
run(slug: string, options?: RunExperimentOptions): Promise<ExperimentRunResult>;
|
|
948
948
|
/**
|
|
949
|
-
* Run an
|
|
949
|
+
* Run an experiment and wait for completion using polling
|
|
950
950
|
*/
|
|
951
951
|
private runWithPolling;
|
|
952
952
|
/**
|
|
953
|
-
* Start an
|
|
953
|
+
* Start an experiment run
|
|
954
954
|
*/
|
|
955
955
|
private startRun;
|
|
956
956
|
/**
|
|
@@ -962,7 +962,7 @@ declare class EvaluationFacade {
|
|
|
962
962
|
*/
|
|
963
963
|
private buildResult;
|
|
964
964
|
/**
|
|
965
|
-
* Print a CI-friendly summary of the
|
|
965
|
+
* Print a CI-friendly summary of the experiment results
|
|
966
966
|
*/
|
|
967
967
|
private printSummary;
|
|
968
968
|
private sleep;
|
|
@@ -973,25 +973,25 @@ declare class EvaluationFacade {
|
|
|
973
973
|
}
|
|
974
974
|
|
|
975
975
|
/**
|
|
976
|
-
* Errors for the
|
|
976
|
+
* Errors for the Experiments API
|
|
977
977
|
*/
|
|
978
978
|
/**
|
|
979
|
-
* Base error for
|
|
979
|
+
* Base error for experiment-related issues
|
|
980
980
|
*/
|
|
981
|
-
declare class
|
|
981
|
+
declare class ExperimentError extends Error {
|
|
982
982
|
constructor(message: string);
|
|
983
983
|
}
|
|
984
984
|
/**
|
|
985
985
|
* Thrown when initialization fails
|
|
986
986
|
*/
|
|
987
|
-
declare class
|
|
987
|
+
declare class ExperimentInitError extends ExperimentError {
|
|
988
988
|
readonly cause?: Error | undefined;
|
|
989
989
|
constructor(message: string, cause?: Error | undefined);
|
|
990
990
|
}
|
|
991
991
|
/**
|
|
992
992
|
* Thrown when API calls fail
|
|
993
993
|
*/
|
|
994
|
-
declare class
|
|
994
|
+
declare class ExperimentApiError extends ExperimentError {
|
|
995
995
|
readonly statusCode?: number | undefined;
|
|
996
996
|
readonly cause?: Error | undefined;
|
|
997
997
|
constructor(message: string, statusCode?: number | undefined, cause?: Error | undefined);
|
|
@@ -999,7 +999,7 @@ declare class EvaluationApiError extends EvaluationError {
|
|
|
999
999
|
/**
|
|
1000
1000
|
* Thrown when target metadata conflicts
|
|
1001
1001
|
*/
|
|
1002
|
-
declare class TargetMetadataConflictError extends
|
|
1002
|
+
declare class TargetMetadataConflictError extends ExperimentError {
|
|
1003
1003
|
readonly targetName: string;
|
|
1004
1004
|
readonly existingMetadata: Record<string, unknown>;
|
|
1005
1005
|
readonly newMetadata: Record<string, unknown>;
|
|
@@ -1008,12 +1008,164 @@ declare class TargetMetadataConflictError extends EvaluationError {
|
|
|
1008
1008
|
/**
|
|
1009
1009
|
* Thrown when an evaluator call fails
|
|
1010
1010
|
*/
|
|
1011
|
-
declare class EvaluatorError extends
|
|
1011
|
+
declare class EvaluatorError extends ExperimentError {
|
|
1012
1012
|
readonly evaluatorSlug: string;
|
|
1013
1013
|
readonly cause?: Error | undefined;
|
|
1014
1014
|
constructor(evaluatorSlug: string, message: string, cause?: Error | undefined);
|
|
1015
1015
|
}
|
|
1016
1016
|
|
|
1017
|
+
/**
|
|
1018
|
+
* Types for the Evaluations API (Online Evaluations / Guardrails)
|
|
1019
|
+
*
|
|
1020
|
+
* These types define the structure for running evaluators and guardrails
|
|
1021
|
+
* in real-time against LLM inputs/outputs.
|
|
1022
|
+
*/
|
|
1023
|
+
/**
|
|
1024
|
+
* Status of an evaluation result
|
|
1025
|
+
*/
|
|
1026
|
+
type EvaluationStatus = "processed" | "skipped" | "error";
|
|
1027
|
+
/**
|
|
1028
|
+
* Cost information from an evaluation
|
|
1029
|
+
*/
|
|
1030
|
+
type EvaluationCost = {
|
|
1031
|
+
currency: string;
|
|
1032
|
+
amount: number;
|
|
1033
|
+
};
|
|
1034
|
+
/**
|
|
1035
|
+
* Result returned from running an evaluator
|
|
1036
|
+
*/
|
|
1037
|
+
type EvaluationResult = {
|
|
1038
|
+
/** Status of the evaluation */
|
|
1039
|
+
status: EvaluationStatus;
|
|
1040
|
+
/** Whether the evaluation passed (for guardrails) */
|
|
1041
|
+
passed?: boolean;
|
|
1042
|
+
/** Numeric score (typically 0-1) */
|
|
1043
|
+
score?: number;
|
|
1044
|
+
/** Human-readable details about the result */
|
|
1045
|
+
details?: string;
|
|
1046
|
+
/** Label/category for the result */
|
|
1047
|
+
label?: string;
|
|
1048
|
+
/** Cost of running the evaluation */
|
|
1049
|
+
cost?: EvaluationCost;
|
|
1050
|
+
};
|
|
1051
|
+
/**
|
|
1052
|
+
* Options for the evaluate() method
|
|
1053
|
+
*/
|
|
1054
|
+
type EvaluateOptions = {
|
|
1055
|
+
/** Data to pass to the evaluator (input, output, contexts, etc.) */
|
|
1056
|
+
data: Record<string, unknown>;
|
|
1057
|
+
/** Human-readable name for this evaluation */
|
|
1058
|
+
name?: string;
|
|
1059
|
+
/** Evaluator-specific settings */
|
|
1060
|
+
settings?: Record<string, unknown>;
|
|
1061
|
+
/** Whether to run as a guardrail (affects error handling) */
|
|
1062
|
+
asGuardrail?: boolean;
|
|
1063
|
+
};
|
|
1064
|
+
|
|
1065
|
+
/**
|
|
1066
|
+
* EvaluationsFacade - Entry point for the Evaluations API (Online Evaluations / Guardrails)
|
|
1067
|
+
*
|
|
1068
|
+
* Provides an API for running evaluators and guardrails in real-time against LLM inputs/outputs.
|
|
1069
|
+
*
|
|
1070
|
+
* @example
|
|
1071
|
+
* ```typescript
|
|
1072
|
+
* const langwatch = new LangWatch({ apiKey: "your-api-key" });
|
|
1073
|
+
*
|
|
1074
|
+
* // Run a guardrail
|
|
1075
|
+
* const guardrail = await langwatch.evaluations.evaluate("presidio/pii_detection", {
|
|
1076
|
+
* data: { input: userInput, output: generatedResponse },
|
|
1077
|
+
* name: "PII Detection",
|
|
1078
|
+
* asGuardrail: true,
|
|
1079
|
+
* settings: {},
|
|
1080
|
+
* });
|
|
1081
|
+
*
|
|
1082
|
+
* if (!guardrail.passed) {
|
|
1083
|
+
* return "I'm sorry, I can't do that.";
|
|
1084
|
+
* }
|
|
1085
|
+
* ```
|
|
1086
|
+
*/
|
|
1087
|
+
|
|
1088
|
+
type EvaluationsFacadeConfig = {
|
|
1089
|
+
endpoint: string;
|
|
1090
|
+
apiKey: string;
|
|
1091
|
+
logger: Logger;
|
|
1092
|
+
};
|
|
1093
|
+
declare class EvaluationsFacade {
|
|
1094
|
+
#private;
|
|
1095
|
+
constructor(config: EvaluationsFacadeConfig);
|
|
1096
|
+
/**
|
|
1097
|
+
* Run an evaluator or guardrail against provided data
|
|
1098
|
+
*
|
|
1099
|
+
* Creates an OpenTelemetry span attached to the current trace context,
|
|
1100
|
+
* calls the LangWatch evaluation API, and returns the result.
|
|
1101
|
+
*
|
|
1102
|
+
* @param slug - The evaluator slug (e.g., "presidio/pii_detection", "langevals/llm_boolean")
|
|
1103
|
+
* @param options - Evaluation options including data, name, settings, and asGuardrail flag
|
|
1104
|
+
* @returns The evaluation result with status, passed, score, details, label, and cost
|
|
1105
|
+
*
|
|
1106
|
+
* @example
|
|
1107
|
+
* ```typescript
|
|
1108
|
+
* // Run as a guardrail (synchronous evaluation that can block responses)
|
|
1109
|
+
* const guardrail = await langwatch.evaluations.evaluate("presidio/pii_detection", {
|
|
1110
|
+
* data: { input: userInput, output: generatedResponse },
|
|
1111
|
+
* name: "PII Detection Guardrail",
|
|
1112
|
+
* asGuardrail: true,
|
|
1113
|
+
* });
|
|
1114
|
+
*
|
|
1115
|
+
* if (!guardrail.passed) {
|
|
1116
|
+
* console.log("PII detected:", guardrail.details);
|
|
1117
|
+
* return "Sorry, I cannot process that request.";
|
|
1118
|
+
* }
|
|
1119
|
+
* ```
|
|
1120
|
+
*
|
|
1121
|
+
* @example
|
|
1122
|
+
* ```typescript
|
|
1123
|
+
* // Run as an online evaluation (async scoring for monitoring)
|
|
1124
|
+
* const result = await langwatch.evaluations.evaluate("langevals/llm_boolean", {
|
|
1125
|
+
* data: { input: question, output: response },
|
|
1126
|
+
* name: "Quality Check",
|
|
1127
|
+
* settings: { prompt: "Check if the response answers the question." },
|
|
1128
|
+
* });
|
|
1129
|
+
*
|
|
1130
|
+
* console.log("Score:", result.score);
|
|
1131
|
+
* console.log("Details:", result.details);
|
|
1132
|
+
* ```
|
|
1133
|
+
*/
|
|
1134
|
+
evaluate: (slug: string, options: EvaluateOptions) => Promise<EvaluationResult>;
|
|
1135
|
+
}
|
|
1136
|
+
|
|
1137
|
+
/**
|
|
1138
|
+
* Error classes for the Evaluations API
|
|
1139
|
+
*/
|
|
1140
|
+
/**
|
|
1141
|
+
* Base error for evaluation operations
|
|
1142
|
+
*/
|
|
1143
|
+
declare class EvaluationError extends Error {
|
|
1144
|
+
constructor(message: string);
|
|
1145
|
+
}
|
|
1146
|
+
/**
|
|
1147
|
+
* Error when an evaluator call fails
|
|
1148
|
+
*/
|
|
1149
|
+
declare class EvaluatorCallError extends EvaluationError {
|
|
1150
|
+
readonly evaluatorSlug: string;
|
|
1151
|
+
readonly statusCode?: number;
|
|
1152
|
+
constructor(evaluatorSlug: string, message: string, statusCode?: number);
|
|
1153
|
+
}
|
|
1154
|
+
/**
|
|
1155
|
+
* Error when evaluator is not found
|
|
1156
|
+
*/
|
|
1157
|
+
declare class EvaluatorNotFoundError extends EvaluationError {
|
|
1158
|
+
readonly evaluatorSlug: string;
|
|
1159
|
+
constructor(evaluatorSlug: string);
|
|
1160
|
+
}
|
|
1161
|
+
/**
|
|
1162
|
+
* Error from the evaluations API
|
|
1163
|
+
*/
|
|
1164
|
+
declare class EvaluationsApiError extends EvaluationError {
|
|
1165
|
+
readonly statusCode: number;
|
|
1166
|
+
constructor(message: string, statusCode: number);
|
|
1167
|
+
}
|
|
1168
|
+
|
|
1017
1169
|
interface GetTraceParams {
|
|
1018
1170
|
includeSpans?: boolean;
|
|
1019
1171
|
}
|
|
@@ -1037,15 +1189,76 @@ declare class LangWatch {
|
|
|
1037
1189
|
private readonly config;
|
|
1038
1190
|
readonly prompts: PromptsFacade;
|
|
1039
1191
|
readonly traces: TracesFacade;
|
|
1040
|
-
readonly evaluation: EvaluationFacade;
|
|
1041
1192
|
readonly datasets: DatasetsFacade;
|
|
1193
|
+
/**
|
|
1194
|
+
* Run experiments on LangWatch platform or via SDK.
|
|
1195
|
+
*
|
|
1196
|
+
* Platform experiments (CI/CD):
|
|
1197
|
+
* ```typescript
|
|
1198
|
+
* const result = await langwatch.experiments.run("my-experiment-slug");
|
|
1199
|
+
* result.printSummary();
|
|
1200
|
+
* ```
|
|
1201
|
+
*
|
|
1202
|
+
* SDK-defined experiments:
|
|
1203
|
+
* ```typescript
|
|
1204
|
+
* const experiment = await langwatch.experiments.init("my-experiment");
|
|
1205
|
+
* // ... run evaluators using experiment.evaluate()
|
|
1206
|
+
* ```
|
|
1207
|
+
*/
|
|
1208
|
+
readonly experiments: ExperimentsFacade;
|
|
1209
|
+
/**
|
|
1210
|
+
* Run evaluators and guardrails in real-time (Online Evaluations).
|
|
1211
|
+
*
|
|
1212
|
+
* @example
|
|
1213
|
+
* ```typescript
|
|
1214
|
+
* // Run a guardrail
|
|
1215
|
+
* const guardrail = await langwatch.evaluations.evaluate("presidio/pii_detection", {
|
|
1216
|
+
* data: { input: userInput, output: generatedResponse },
|
|
1217
|
+
* name: "PII Detection",
|
|
1218
|
+
* asGuardrail: true,
|
|
1219
|
+
* });
|
|
1220
|
+
*
|
|
1221
|
+
* if (!guardrail.passed) {
|
|
1222
|
+
* return "I'm sorry, I can't do that.";
|
|
1223
|
+
* }
|
|
1224
|
+
* ```
|
|
1225
|
+
*/
|
|
1226
|
+
readonly evaluations: EvaluationsFacade;
|
|
1042
1227
|
constructor(options?: LangWatchConstructorOptions);
|
|
1043
1228
|
get apiClient(): LangwatchApiClient;
|
|
1044
1229
|
}
|
|
1045
1230
|
|
|
1231
|
+
type EvaluatorResponse = NonNullable<paths["/api/evaluators"]["get"]["responses"]["200"]["content"]["application/json"]>[number];
|
|
1232
|
+
type EvaluatorField = EvaluatorResponse["fields"][number];
|
|
1233
|
+
|
|
1234
|
+
/**
|
|
1235
|
+
* Service for retrieving evaluator resources via the LangWatch API.
|
|
1236
|
+
*
|
|
1237
|
+
* Provides read-only access to project evaluators with computed fields.
|
|
1238
|
+
*/
|
|
1239
|
+
declare class EvaluatorsApiService {
|
|
1240
|
+
private readonly apiClient;
|
|
1241
|
+
constructor(config?: Pick<InternalConfig, "langwatchApiClient">);
|
|
1242
|
+
private handleApiError;
|
|
1243
|
+
/**
|
|
1244
|
+
* Fetches all evaluators for the project.
|
|
1245
|
+
*/
|
|
1246
|
+
getAll(): Promise<EvaluatorResponse[]>;
|
|
1247
|
+
/**
|
|
1248
|
+
* Fetches a single evaluator by its ID or slug.
|
|
1249
|
+
*/
|
|
1250
|
+
get(idOrSlug: string): Promise<EvaluatorResponse>;
|
|
1251
|
+
}
|
|
1252
|
+
|
|
1253
|
+
declare class EvaluatorsApiError extends Error {
|
|
1254
|
+
readonly operation: string;
|
|
1255
|
+
readonly originalError?: unknown | undefined;
|
|
1256
|
+
constructor(message: string, operation: string, originalError?: unknown | undefined);
|
|
1257
|
+
}
|
|
1258
|
+
|
|
1046
1259
|
declare const logger: {
|
|
1047
1260
|
ConsoleLogger: typeof ConsoleLogger;
|
|
1048
1261
|
NoOpLogger: typeof NoOpLogger;
|
|
1049
1262
|
};
|
|
1050
1263
|
|
|
1051
|
-
export { type EvaluateOptions,
|
|
1264
|
+
export { type EvaluateOptions, type EvaluationCost, EvaluationError, type EvaluationResult, type EvaluationStatus, EvaluationsApiError, EvaluationsFacade, EvaluatorCallError, EvaluatorError, type EvaluatorField, EvaluatorNotFoundError, type EvaluatorResponse, EvaluatorsApiError, EvaluatorsApiService, Experiment, ExperimentApiError, ExperimentError, type EvaluateOptions$1 as ExperimentEvaluateOptions, type EvaluationResult$1 as ExperimentEvaluationResult, type EvaluationStatus$1 as ExperimentEvaluationStatus, ExperimentInitError, type ExperimentInitOptions, ExperimentsFacade, FetchPolicy, type GetPromptOptions, LangWatch, type LogOptions, type RunCallback, type RunContext, type RunOptions, type TargetInfo, type TargetMetadata, TargetMetadataConflictError, type TargetType, logger };
|