langwatch 0.11.0 → 0.13.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{add-UB5U3K3M.js → add-Z5UVUPCK.js} +7 -7
- package/dist/{add-UB5U3K3M.js.map → add-Z5UVUPCK.js.map} +1 -1
- package/dist/{add-XV5SUAXF.mjs → add-ZAPD2GBO.mjs} +4 -4
- package/dist/{chunk-JQYW7RY7.js → chunk-4BNGSDYW.js} +14 -14
- package/dist/{chunk-JQYW7RY7.js.map → chunk-4BNGSDYW.js.map} +1 -1
- package/dist/{chunk-LKE6DMUP.mjs → chunk-77XIPD42.mjs} +2 -2
- package/dist/chunk-77XIPD42.mjs.map +1 -0
- package/dist/{chunk-D4H6PR6H.js → chunk-DXBTJGCK.js} +10 -10
- package/dist/{chunk-D4H6PR6H.js.map → chunk-DXBTJGCK.js.map} +1 -1
- package/dist/{chunk-WZ7FYUHN.mjs → chunk-J4HK6XZR.mjs} +5 -5
- package/dist/{chunk-N7PJJMU2.js → chunk-NPFWFQK6.js} +2 -2
- package/dist/chunk-NPFWFQK6.js.map +1 -0
- package/dist/chunk-OAKQ7UBU.mjs +317 -0
- package/dist/chunk-OAKQ7UBU.mjs.map +1 -0
- package/dist/chunk-RM2VUAFL.js +317 -0
- package/dist/chunk-RM2VUAFL.js.map +1 -0
- package/dist/{chunk-556ZFJMK.mjs → chunk-SZRV7E6P.mjs} +2 -2
- package/dist/cli/index.js +6 -6
- package/dist/cli/index.mjs +6 -6
- package/dist/{implementation-CPxv2BdW.d.ts → implementation-Bnc8Aymq.d.ts} +1 -1
- package/dist/{implementation-CVrmD0bz.d.mts → implementation-Ck58nRkT.d.mts} +1 -1
- package/dist/index.d.mts +347 -38
- package/dist/index.d.ts +347 -38
- package/dist/index.js +519 -47
- package/dist/index.js.map +1 -1
- package/dist/index.mjs +518 -46
- package/dist/index.mjs.map +1 -1
- package/dist/{list-DUNP46AD.js → list-LASBYRI4.js} +7 -7
- package/dist/{list-DUNP46AD.js.map → list-LASBYRI4.js.map} +1 -1
- package/dist/{list-T4QS6CT2.mjs → list-XX4VPNJA.mjs} +4 -4
- package/dist/{login-3H27NIOD.js → login-2VCZDSLE.js} +3 -3
- package/dist/{login-3H27NIOD.js.map → login-2VCZDSLE.js.map} +1 -1
- package/dist/{login-T2ET7TKH.mjs → login-CZ2257SV.mjs} +2 -2
- package/dist/observability-sdk/index.d.mts +3 -3
- package/dist/observability-sdk/index.d.ts +3 -3
- package/dist/observability-sdk/index.js +4 -4
- package/dist/observability-sdk/index.js.map +1 -1
- package/dist/observability-sdk/index.mjs +7 -7
- package/dist/observability-sdk/instrumentation/langchain/index.d.mts +1 -1
- package/dist/observability-sdk/instrumentation/langchain/index.d.ts +1 -1
- package/dist/observability-sdk/setup/node/index.d.mts +24 -1
- package/dist/observability-sdk/setup/node/index.d.ts +24 -1
- package/dist/observability-sdk/setup/node/index.js +7 -292
- package/dist/observability-sdk/setup/node/index.js.map +1 -1
- package/dist/observability-sdk/setup/node/index.mjs +8 -293
- package/dist/observability-sdk/setup/node/index.mjs.map +1 -1
- package/dist/{remove-F5RM4775.mjs → remove-KESD7YHL.mjs} +4 -4
- package/dist/{remove-V4JL5Z4U.js → remove-XWN3XTF5.js} +6 -6
- package/dist/{remove-V4JL5Z4U.js.map → remove-XWN3XTF5.js.map} +1 -1
- package/dist/{sync-DIOKWE6R.js → sync-IJ26JHEP.js} +6 -6
- package/dist/{sync-DIOKWE6R.js.map → sync-IJ26JHEP.js.map} +1 -1
- package/dist/{sync-VGWOLOLJ.mjs → sync-SCVP7CHX.mjs} +4 -4
- package/dist/{types-Kts5RGLY.d.mts → types-5h2Im4pl.d.mts} +162 -0
- package/dist/{types-usU5mTCX.d.ts → types-fo-Ij9pl.d.ts} +162 -0
- package/package.json +3 -2
- package/dist/chunk-LKE6DMUP.mjs.map +0 -1
- package/dist/chunk-N7PJJMU2.js.map +0 -1
- /package/dist/{add-XV5SUAXF.mjs.map → add-ZAPD2GBO.mjs.map} +0 -0
- /package/dist/{chunk-WZ7FYUHN.mjs.map → chunk-J4HK6XZR.mjs.map} +0 -0
- /package/dist/{chunk-556ZFJMK.mjs.map → chunk-SZRV7E6P.mjs.map} +0 -0
- /package/dist/{list-T4QS6CT2.mjs.map → list-XX4VPNJA.mjs.map} +0 -0
- /package/dist/{login-T2ET7TKH.mjs.map → login-CZ2257SV.mjs.map} +0 -0
- /package/dist/{remove-F5RM4775.mjs.map → remove-KESD7YHL.mjs.map} +0 -0
- /package/dist/{sync-VGWOLOLJ.mjs.map → sync-SCVP7CHX.mjs.map} +0 -0
package/dist/index.d.mts
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import { L as Logger, C as ConsoleLogger, N as NoOpLogger } from './index-D7rKIGrO.mjs';
|
|
2
|
-
export { F as FilterableBatchSpanProcessor, L as LangWatchExporter, S as SpanProcessingExcludeRule, g as getLangWatchLogger, d as getLangWatchTracer } from './implementation-
|
|
3
|
-
import { p as paths, P as PromptResponse, g as CreatePromptBody, U as UpdatePromptBody, h as PromptData, i as Prompt, F as FetchPolicy, L as LangWatchSpan } from './types-
|
|
2
|
+
export { F as FilterableBatchSpanProcessor, L as LangWatchExporter, S as SpanProcessingExcludeRule, g as getLangWatchLogger, d as getLangWatchTracer } from './implementation-Ck58nRkT.mjs';
|
|
3
|
+
import { p as paths, P as PromptResponse, g as CreatePromptBody, U as UpdatePromptBody, h as PromptData, i as Prompt, F as FetchPolicy, L as LangWatchSpan } from './types-5h2Im4pl.mjs';
|
|
4
4
|
import openApiCreateClient from 'openapi-fetch';
|
|
5
5
|
import { z } from 'zod';
|
|
6
6
|
export { l as attributes } from './types-DRiQaKFG.mjs';
|
|
@@ -405,7 +405,7 @@ type DatasetsFacadeConfig = {
|
|
|
405
405
|
* const dataset = await langwatch.datasets.get("my-dataset");
|
|
406
406
|
*
|
|
407
407
|
* // Use with evaluation
|
|
408
|
-
* const evaluation = langwatch.
|
|
408
|
+
* const evaluation = langwatch.experiments.init("my-experiment");
|
|
409
409
|
* await evaluation.run(dataset.entries.map(e => e.entry), async ({ item, index }) => {
|
|
410
410
|
* const output = await myLLM(item.input);
|
|
411
411
|
* await evaluation.evaluate("my-evaluator", {
|
|
@@ -447,16 +447,16 @@ declare class DatasetsFacade {
|
|
|
447
447
|
}
|
|
448
448
|
|
|
449
449
|
/**
|
|
450
|
-
* Types for the
|
|
450
|
+
* Types for the Experiments API
|
|
451
451
|
*
|
|
452
|
-
* These types define the structure for batch
|
|
452
|
+
* These types define the structure for batch experiments, including
|
|
453
453
|
* logging metrics, running evaluators, and managing targets.
|
|
454
454
|
*/
|
|
455
455
|
|
|
456
456
|
/**
|
|
457
457
|
* Status of an evaluation result
|
|
458
458
|
*/
|
|
459
|
-
type EvaluationStatus = "processed" | "error" | "skipped";
|
|
459
|
+
type EvaluationStatus$1 = "processed" | "error" | "skipped";
|
|
460
460
|
/**
|
|
461
461
|
* Target types for batch evaluations
|
|
462
462
|
*/
|
|
@@ -503,11 +503,11 @@ type TargetInfo = z.infer<typeof targetInfoSchema>;
|
|
|
503
503
|
/**
|
|
504
504
|
* Result of an evaluation
|
|
505
505
|
*/
|
|
506
|
-
type EvaluationResult = z.infer<typeof evaluationResultSchema>;
|
|
506
|
+
type EvaluationResult$1 = z.infer<typeof evaluationResultSchema>;
|
|
507
507
|
/**
|
|
508
|
-
* Options for initializing an
|
|
508
|
+
* Options for initializing an experiment
|
|
509
509
|
*/
|
|
510
|
-
type
|
|
510
|
+
type ExperimentInitOptions = {
|
|
511
511
|
/** Custom run ID (auto-generated if not provided) */
|
|
512
512
|
runId?: string;
|
|
513
513
|
/** Number of parallel threads for submit() */
|
|
@@ -533,7 +533,7 @@ type LogOptions = {
|
|
|
533
533
|
/** Human-readable description of the result */
|
|
534
534
|
details?: string;
|
|
535
535
|
/** Status of the evaluation */
|
|
536
|
-
status?: EvaluationStatus;
|
|
536
|
+
status?: EvaluationStatus$1;
|
|
537
537
|
/** Duration in milliseconds */
|
|
538
538
|
duration?: number;
|
|
539
539
|
/** Cost amount in USD */
|
|
@@ -551,7 +551,7 @@ type LogOptions = {
|
|
|
551
551
|
/**
|
|
552
552
|
* Options for the evaluate() method (built-in evaluators)
|
|
553
553
|
*/
|
|
554
|
-
type EvaluateOptions = {
|
|
554
|
+
type EvaluateOptions$1 = {
|
|
555
555
|
/**
|
|
556
556
|
* Row index in the dataset.
|
|
557
557
|
* Optional when called inside withTarget() - will be auto-inferred from context.
|
|
@@ -627,9 +627,9 @@ type TargetResult<R> = {
|
|
|
627
627
|
};
|
|
628
628
|
|
|
629
629
|
/**
|
|
630
|
-
*
|
|
630
|
+
* Experiment - Main class for running batch experiments
|
|
631
631
|
*
|
|
632
|
-
* Provides a clean API for running
|
|
632
|
+
* Provides a clean API for running experiments over datasets with:
|
|
633
633
|
* - Automatic tracing per iteration
|
|
634
634
|
* - Parallel execution with concurrency control
|
|
635
635
|
* - Batched result sending
|
|
@@ -638,9 +638,9 @@ type TargetResult<R> = {
|
|
|
638
638
|
*/
|
|
639
639
|
|
|
640
640
|
/**
|
|
641
|
-
*
|
|
641
|
+
* Experiment session for running batch experiments
|
|
642
642
|
*/
|
|
643
|
-
declare class
|
|
643
|
+
declare class Experiment {
|
|
644
644
|
readonly name: string;
|
|
645
645
|
readonly runId: string;
|
|
646
646
|
readonly experimentSlug: string;
|
|
@@ -671,7 +671,7 @@ declare class Evaluation {
|
|
|
671
671
|
endpoint: string;
|
|
672
672
|
apiKey: string;
|
|
673
673
|
logger: Logger;
|
|
674
|
-
} &
|
|
674
|
+
} & ExperimentInitOptions): Promise<Experiment>;
|
|
675
675
|
/**
|
|
676
676
|
* Initialize the evaluation by creating/getting the experiment
|
|
677
677
|
*/
|
|
@@ -743,7 +743,7 @@ declare class Evaluation {
|
|
|
743
743
|
* });
|
|
744
744
|
* ```
|
|
745
745
|
*/
|
|
746
|
-
evaluate(evaluatorSlug: string, options: EvaluateOptions): Promise<void>;
|
|
746
|
+
evaluate(evaluatorSlug: string, options: EvaluateOptions$1): Promise<void>;
|
|
747
747
|
/**
|
|
748
748
|
* Execute code within a target context with automatic tracing
|
|
749
749
|
*
|
|
@@ -811,63 +811,187 @@ declare class Evaluation {
|
|
|
811
811
|
}
|
|
812
812
|
|
|
813
813
|
/**
|
|
814
|
-
*
|
|
814
|
+
* Types for platform-configured experiments (Experiments Workbench)
|
|
815
|
+
*/
|
|
816
|
+
/**
|
|
817
|
+
* Summary of a completed experiment run
|
|
818
|
+
*/
|
|
819
|
+
type ExperimentRunSummary = {
|
|
820
|
+
runId?: string;
|
|
821
|
+
totalCells?: number;
|
|
822
|
+
completedCells?: number;
|
|
823
|
+
failedCells?: number;
|
|
824
|
+
duration?: number;
|
|
825
|
+
runUrl?: string;
|
|
826
|
+
timestamps?: {
|
|
827
|
+
startedAt: number;
|
|
828
|
+
finishedAt?: number;
|
|
829
|
+
stoppedAt?: number;
|
|
830
|
+
};
|
|
831
|
+
targets?: Array<{
|
|
832
|
+
targetId: string;
|
|
833
|
+
name: string;
|
|
834
|
+
passed: number;
|
|
835
|
+
failed: number;
|
|
836
|
+
avgLatency: number;
|
|
837
|
+
totalCost: number;
|
|
838
|
+
}>;
|
|
839
|
+
evaluators?: Array<{
|
|
840
|
+
evaluatorId: string;
|
|
841
|
+
name: string;
|
|
842
|
+
passed: number;
|
|
843
|
+
failed: number;
|
|
844
|
+
passRate: number;
|
|
845
|
+
avgScore?: number;
|
|
846
|
+
}>;
|
|
847
|
+
totalPassed?: number;
|
|
848
|
+
totalFailed?: number;
|
|
849
|
+
passRate?: number;
|
|
850
|
+
totalCost?: number;
|
|
851
|
+
};
|
|
852
|
+
/**
|
|
853
|
+
* Options for running a platform experiment
|
|
854
|
+
*/
|
|
855
|
+
type RunExperimentOptions = {
|
|
856
|
+
/**
|
|
857
|
+
* Polling interval in milliseconds (default: 2000)
|
|
858
|
+
*/
|
|
859
|
+
pollInterval?: number;
|
|
860
|
+
/**
|
|
861
|
+
* Maximum time to wait for completion in milliseconds (default: 600000 = 10 minutes)
|
|
862
|
+
*/
|
|
863
|
+
timeout?: number;
|
|
864
|
+
/**
|
|
865
|
+
* Callback for progress updates
|
|
866
|
+
*/
|
|
867
|
+
onProgress?: (progress: number, total: number) => void;
|
|
868
|
+
};
|
|
869
|
+
/**
|
|
870
|
+
* Final result of a platform experiment run
|
|
871
|
+
*/
|
|
872
|
+
type ExperimentRunResult = {
|
|
873
|
+
runId: string;
|
|
874
|
+
status: "completed" | "failed" | "stopped";
|
|
875
|
+
passed: number;
|
|
876
|
+
failed: number;
|
|
877
|
+
passRate: number;
|
|
878
|
+
duration: number;
|
|
879
|
+
runUrl: string;
|
|
880
|
+
summary: ExperimentRunSummary;
|
|
881
|
+
/**
|
|
882
|
+
* Print a CI-friendly summary of the results
|
|
883
|
+
* @param exitOnFailure - If true (default), calls process.exit(1) when there are failures
|
|
884
|
+
*/
|
|
885
|
+
printSummary: (exitOnFailure?: boolean) => void;
|
|
886
|
+
};
|
|
887
|
+
|
|
888
|
+
/**
|
|
889
|
+
* ExperimentsFacade - Entry point for the experiments API
|
|
815
890
|
*
|
|
816
|
-
* Provides
|
|
891
|
+
* Provides:
|
|
892
|
+
* - `init()` method to create experiment sessions (SDK-defined experiments)
|
|
893
|
+
* - `run()` method to execute platform-configured experiments (Experiments Workbench)
|
|
817
894
|
*/
|
|
818
895
|
|
|
819
|
-
type
|
|
896
|
+
type ExperimentsFacadeConfig = {
|
|
820
897
|
langwatchApiClient: LangwatchApiClient;
|
|
821
898
|
endpoint: string;
|
|
822
899
|
apiKey: string;
|
|
823
900
|
logger: Logger;
|
|
824
901
|
};
|
|
825
902
|
/**
|
|
826
|
-
* Facade for creating
|
|
903
|
+
* Facade for creating experiment sessions and running platform-configured experiments
|
|
827
904
|
*/
|
|
828
|
-
declare class
|
|
905
|
+
declare class ExperimentsFacade {
|
|
829
906
|
private readonly config;
|
|
830
|
-
constructor(config:
|
|
907
|
+
constructor(config: ExperimentsFacadeConfig);
|
|
831
908
|
/**
|
|
832
|
-
* Initialize a new
|
|
909
|
+
* Initialize a new experiment session (SDK-defined)
|
|
833
910
|
*
|
|
834
911
|
* @param name - Name of the experiment (used as slug)
|
|
835
912
|
* @param options - Optional configuration
|
|
836
|
-
* @returns An initialized
|
|
913
|
+
* @returns An initialized Experiment instance
|
|
837
914
|
*
|
|
838
915
|
* @example
|
|
839
916
|
* ```typescript
|
|
840
|
-
* const
|
|
917
|
+
* const experiment = await langwatch.experiments.init('my-experiment');
|
|
841
918
|
*
|
|
842
|
-
* await
|
|
919
|
+
* await experiment.run(dataset, async ({ item, index }) => {
|
|
843
920
|
* const response = await myAgent(item.question);
|
|
844
|
-
*
|
|
921
|
+
* experiment.log('accuracy', { index, score: 0.95 });
|
|
845
922
|
* });
|
|
846
923
|
* ```
|
|
847
924
|
*/
|
|
848
|
-
init(name: string, options?:
|
|
925
|
+
init(name: string, options?: ExperimentInitOptions): Promise<Experiment>;
|
|
926
|
+
/**
|
|
927
|
+
* Run a platform-configured experiment (Experiments Workbench)
|
|
928
|
+
*
|
|
929
|
+
* This runs an experiment that was configured in the LangWatch platform.
|
|
930
|
+
* The method automatically prints a summary and exits with code 1 on failure
|
|
931
|
+
* (unless `exitOnFailure: false` is passed).
|
|
932
|
+
*
|
|
933
|
+
* @param slug - The slug of the experiment (found in the experiment URL)
|
|
934
|
+
* @param options - Optional configuration
|
|
935
|
+
* @returns The experiment results including pass rate and summary
|
|
936
|
+
*
|
|
937
|
+
* @example
|
|
938
|
+
* ```typescript
|
|
939
|
+
* import { LangWatch } from "langwatch";
|
|
940
|
+
*
|
|
941
|
+
* const langwatch = new LangWatch();
|
|
942
|
+
*
|
|
943
|
+
* const result = await langwatch.experiments.run("my-experiment-slug");
|
|
944
|
+
* result.printSummary();
|
|
945
|
+
* ```
|
|
946
|
+
*/
|
|
947
|
+
run(slug: string, options?: RunExperimentOptions): Promise<ExperimentRunResult>;
|
|
948
|
+
/**
|
|
949
|
+
* Run an experiment and wait for completion using polling
|
|
950
|
+
*/
|
|
951
|
+
private runWithPolling;
|
|
952
|
+
/**
|
|
953
|
+
* Start an experiment run
|
|
954
|
+
*/
|
|
955
|
+
private startRun;
|
|
956
|
+
/**
|
|
957
|
+
* Get the status of a run
|
|
958
|
+
*/
|
|
959
|
+
private getRunStatus;
|
|
960
|
+
/**
|
|
961
|
+
* Build the result object from API response
|
|
962
|
+
*/
|
|
963
|
+
private buildResult;
|
|
964
|
+
/**
|
|
965
|
+
* Print a CI-friendly summary of the experiment results
|
|
966
|
+
*/
|
|
967
|
+
private printSummary;
|
|
968
|
+
private sleep;
|
|
969
|
+
/**
|
|
970
|
+
* Replace the domain of a URL with a new base URL, preserving the path
|
|
971
|
+
*/
|
|
972
|
+
private replaceUrlDomain;
|
|
849
973
|
}
|
|
850
974
|
|
|
851
975
|
/**
|
|
852
|
-
* Errors for the
|
|
976
|
+
* Errors for the Experiments API
|
|
853
977
|
*/
|
|
854
978
|
/**
|
|
855
|
-
* Base error for
|
|
979
|
+
* Base error for experiment-related issues
|
|
856
980
|
*/
|
|
857
|
-
declare class
|
|
981
|
+
declare class ExperimentError extends Error {
|
|
858
982
|
constructor(message: string);
|
|
859
983
|
}
|
|
860
984
|
/**
|
|
861
985
|
* Thrown when initialization fails
|
|
862
986
|
*/
|
|
863
|
-
declare class
|
|
987
|
+
declare class ExperimentInitError extends ExperimentError {
|
|
864
988
|
readonly cause?: Error | undefined;
|
|
865
989
|
constructor(message: string, cause?: Error | undefined);
|
|
866
990
|
}
|
|
867
991
|
/**
|
|
868
992
|
* Thrown when API calls fail
|
|
869
993
|
*/
|
|
870
|
-
declare class
|
|
994
|
+
declare class ExperimentApiError extends ExperimentError {
|
|
871
995
|
readonly statusCode?: number | undefined;
|
|
872
996
|
readonly cause?: Error | undefined;
|
|
873
997
|
constructor(message: string, statusCode?: number | undefined, cause?: Error | undefined);
|
|
@@ -875,7 +999,7 @@ declare class EvaluationApiError extends EvaluationError {
|
|
|
875
999
|
/**
|
|
876
1000
|
* Thrown when target metadata conflicts
|
|
877
1001
|
*/
|
|
878
|
-
declare class TargetMetadataConflictError extends
|
|
1002
|
+
declare class TargetMetadataConflictError extends ExperimentError {
|
|
879
1003
|
readonly targetName: string;
|
|
880
1004
|
readonly existingMetadata: Record<string, unknown>;
|
|
881
1005
|
readonly newMetadata: Record<string, unknown>;
|
|
@@ -884,12 +1008,164 @@ declare class TargetMetadataConflictError extends EvaluationError {
|
|
|
884
1008
|
/**
|
|
885
1009
|
* Thrown when an evaluator call fails
|
|
886
1010
|
*/
|
|
887
|
-
declare class EvaluatorError extends
|
|
1011
|
+
declare class EvaluatorError extends ExperimentError {
|
|
888
1012
|
readonly evaluatorSlug: string;
|
|
889
1013
|
readonly cause?: Error | undefined;
|
|
890
1014
|
constructor(evaluatorSlug: string, message: string, cause?: Error | undefined);
|
|
891
1015
|
}
|
|
892
1016
|
|
|
1017
|
+
/**
|
|
1018
|
+
* Types for the Evaluations API (Online Evaluations / Guardrails)
|
|
1019
|
+
*
|
|
1020
|
+
* These types define the structure for running evaluators and guardrails
|
|
1021
|
+
* in real-time against LLM inputs/outputs.
|
|
1022
|
+
*/
|
|
1023
|
+
/**
|
|
1024
|
+
* Status of an evaluation result
|
|
1025
|
+
*/
|
|
1026
|
+
type EvaluationStatus = "processed" | "skipped" | "error";
|
|
1027
|
+
/**
|
|
1028
|
+
* Cost information from an evaluation
|
|
1029
|
+
*/
|
|
1030
|
+
type EvaluationCost = {
|
|
1031
|
+
currency: string;
|
|
1032
|
+
amount: number;
|
|
1033
|
+
};
|
|
1034
|
+
/**
|
|
1035
|
+
* Result returned from running an evaluator
|
|
1036
|
+
*/
|
|
1037
|
+
type EvaluationResult = {
|
|
1038
|
+
/** Status of the evaluation */
|
|
1039
|
+
status: EvaluationStatus;
|
|
1040
|
+
/** Whether the evaluation passed (for guardrails) */
|
|
1041
|
+
passed?: boolean;
|
|
1042
|
+
/** Numeric score (typically 0-1) */
|
|
1043
|
+
score?: number;
|
|
1044
|
+
/** Human-readable details about the result */
|
|
1045
|
+
details?: string;
|
|
1046
|
+
/** Label/category for the result */
|
|
1047
|
+
label?: string;
|
|
1048
|
+
/** Cost of running the evaluation */
|
|
1049
|
+
cost?: EvaluationCost;
|
|
1050
|
+
};
|
|
1051
|
+
/**
|
|
1052
|
+
* Options for the evaluate() method
|
|
1053
|
+
*/
|
|
1054
|
+
type EvaluateOptions = {
|
|
1055
|
+
/** Data to pass to the evaluator (input, output, contexts, etc.) */
|
|
1056
|
+
data: Record<string, unknown>;
|
|
1057
|
+
/** Human-readable name for this evaluation */
|
|
1058
|
+
name?: string;
|
|
1059
|
+
/** Evaluator-specific settings */
|
|
1060
|
+
settings?: Record<string, unknown>;
|
|
1061
|
+
/** Whether to run as a guardrail (affects error handling) */
|
|
1062
|
+
asGuardrail?: boolean;
|
|
1063
|
+
};
|
|
1064
|
+
|
|
1065
|
+
/**
|
|
1066
|
+
* EvaluationsFacade - Entry point for the Evaluations API (Online Evaluations / Guardrails)
|
|
1067
|
+
*
|
|
1068
|
+
* Provides an API for running evaluators and guardrails in real-time against LLM inputs/outputs.
|
|
1069
|
+
*
|
|
1070
|
+
* @example
|
|
1071
|
+
* ```typescript
|
|
1072
|
+
* const langwatch = new LangWatch({ apiKey: "your-api-key" });
|
|
1073
|
+
*
|
|
1074
|
+
* // Run a guardrail
|
|
1075
|
+
* const guardrail = await langwatch.evaluations.evaluate("presidio/pii_detection", {
|
|
1076
|
+
* data: { input: userInput, output: generatedResponse },
|
|
1077
|
+
* name: "PII Detection",
|
|
1078
|
+
* asGuardrail: true,
|
|
1079
|
+
* settings: {},
|
|
1080
|
+
* });
|
|
1081
|
+
*
|
|
1082
|
+
* if (!guardrail.passed) {
|
|
1083
|
+
* return "I'm sorry, I can't do that.";
|
|
1084
|
+
* }
|
|
1085
|
+
* ```
|
|
1086
|
+
*/
|
|
1087
|
+
|
|
1088
|
+
type EvaluationsFacadeConfig = {
|
|
1089
|
+
endpoint: string;
|
|
1090
|
+
apiKey: string;
|
|
1091
|
+
logger: Logger;
|
|
1092
|
+
};
|
|
1093
|
+
declare class EvaluationsFacade {
|
|
1094
|
+
#private;
|
|
1095
|
+
constructor(config: EvaluationsFacadeConfig);
|
|
1096
|
+
/**
|
|
1097
|
+
* Run an evaluator or guardrail against provided data
|
|
1098
|
+
*
|
|
1099
|
+
* Creates an OpenTelemetry span attached to the current trace context,
|
|
1100
|
+
* calls the LangWatch evaluation API, and returns the result.
|
|
1101
|
+
*
|
|
1102
|
+
* @param slug - The evaluator slug (e.g., "presidio/pii_detection", "langevals/llm_boolean")
|
|
1103
|
+
* @param options - Evaluation options including data, name, settings, and asGuardrail flag
|
|
1104
|
+
* @returns The evaluation result with status, passed, score, details, label, and cost
|
|
1105
|
+
*
|
|
1106
|
+
* @example
|
|
1107
|
+
* ```typescript
|
|
1108
|
+
* // Run as a guardrail (synchronous evaluation that can block responses)
|
|
1109
|
+
* const guardrail = await langwatch.evaluations.evaluate("presidio/pii_detection", {
|
|
1110
|
+
* data: { input: userInput, output: generatedResponse },
|
|
1111
|
+
* name: "PII Detection Guardrail",
|
|
1112
|
+
* asGuardrail: true,
|
|
1113
|
+
* });
|
|
1114
|
+
*
|
|
1115
|
+
* if (!guardrail.passed) {
|
|
1116
|
+
* console.log("PII detected:", guardrail.details);
|
|
1117
|
+
* return "Sorry, I cannot process that request.";
|
|
1118
|
+
* }
|
|
1119
|
+
* ```
|
|
1120
|
+
*
|
|
1121
|
+
* @example
|
|
1122
|
+
* ```typescript
|
|
1123
|
+
* // Run as an online evaluation (async scoring for monitoring)
|
|
1124
|
+
* const result = await langwatch.evaluations.evaluate("langevals/llm_boolean", {
|
|
1125
|
+
* data: { input: question, output: response },
|
|
1126
|
+
* name: "Quality Check",
|
|
1127
|
+
* settings: { prompt: "Check if the response answers the question." },
|
|
1128
|
+
* });
|
|
1129
|
+
*
|
|
1130
|
+
* console.log("Score:", result.score);
|
|
1131
|
+
* console.log("Details:", result.details);
|
|
1132
|
+
* ```
|
|
1133
|
+
*/
|
|
1134
|
+
evaluate: (slug: string, options: EvaluateOptions) => Promise<EvaluationResult>;
|
|
1135
|
+
}
|
|
1136
|
+
|
|
1137
|
+
/**
|
|
1138
|
+
* Error classes for the Evaluations API
|
|
1139
|
+
*/
|
|
1140
|
+
/**
|
|
1141
|
+
* Base error for evaluation operations
|
|
1142
|
+
*/
|
|
1143
|
+
declare class EvaluationError extends Error {
|
|
1144
|
+
constructor(message: string);
|
|
1145
|
+
}
|
|
1146
|
+
/**
|
|
1147
|
+
* Error when an evaluator call fails
|
|
1148
|
+
*/
|
|
1149
|
+
declare class EvaluatorCallError extends EvaluationError {
|
|
1150
|
+
readonly evaluatorSlug: string;
|
|
1151
|
+
readonly statusCode?: number;
|
|
1152
|
+
constructor(evaluatorSlug: string, message: string, statusCode?: number);
|
|
1153
|
+
}
|
|
1154
|
+
/**
|
|
1155
|
+
* Error when evaluator is not found
|
|
1156
|
+
*/
|
|
1157
|
+
declare class EvaluatorNotFoundError extends EvaluationError {
|
|
1158
|
+
readonly evaluatorSlug: string;
|
|
1159
|
+
constructor(evaluatorSlug: string);
|
|
1160
|
+
}
|
|
1161
|
+
/**
|
|
1162
|
+
* Error from the evaluations API
|
|
1163
|
+
*/
|
|
1164
|
+
declare class EvaluationsApiError extends EvaluationError {
|
|
1165
|
+
readonly statusCode: number;
|
|
1166
|
+
constructor(message: string, statusCode: number);
|
|
1167
|
+
}
|
|
1168
|
+
|
|
893
1169
|
interface GetTraceParams {
|
|
894
1170
|
includeSpans?: boolean;
|
|
895
1171
|
}
|
|
@@ -913,8 +1189,41 @@ declare class LangWatch {
|
|
|
913
1189
|
private readonly config;
|
|
914
1190
|
readonly prompts: PromptsFacade;
|
|
915
1191
|
readonly traces: TracesFacade;
|
|
916
|
-
readonly evaluation: EvaluationFacade;
|
|
917
1192
|
readonly datasets: DatasetsFacade;
|
|
1193
|
+
/**
|
|
1194
|
+
* Run experiments on LangWatch platform or via SDK.
|
|
1195
|
+
*
|
|
1196
|
+
* Platform experiments (CI/CD):
|
|
1197
|
+
* ```typescript
|
|
1198
|
+
* const result = await langwatch.experiments.run("my-experiment-slug");
|
|
1199
|
+
* result.printSummary();
|
|
1200
|
+
* ```
|
|
1201
|
+
*
|
|
1202
|
+
* SDK-defined experiments:
|
|
1203
|
+
* ```typescript
|
|
1204
|
+
* const experiment = await langwatch.experiments.init("my-experiment");
|
|
1205
|
+
* // ... run evaluators using experiment.evaluate()
|
|
1206
|
+
* ```
|
|
1207
|
+
*/
|
|
1208
|
+
readonly experiments: ExperimentsFacade;
|
|
1209
|
+
/**
|
|
1210
|
+
* Run evaluators and guardrails in real-time (Online Evaluations).
|
|
1211
|
+
*
|
|
1212
|
+
* @example
|
|
1213
|
+
* ```typescript
|
|
1214
|
+
* // Run a guardrail
|
|
1215
|
+
* const guardrail = await langwatch.evaluations.evaluate("presidio/pii_detection", {
|
|
1216
|
+
* data: { input: userInput, output: generatedResponse },
|
|
1217
|
+
* name: "PII Detection",
|
|
1218
|
+
* asGuardrail: true,
|
|
1219
|
+
* });
|
|
1220
|
+
*
|
|
1221
|
+
* if (!guardrail.passed) {
|
|
1222
|
+
* return "I'm sorry, I can't do that.";
|
|
1223
|
+
* }
|
|
1224
|
+
* ```
|
|
1225
|
+
*/
|
|
1226
|
+
readonly evaluations: EvaluationsFacade;
|
|
918
1227
|
constructor(options?: LangWatchConstructorOptions);
|
|
919
1228
|
get apiClient(): LangwatchApiClient;
|
|
920
1229
|
}
|
|
@@ -924,4 +1233,4 @@ declare const logger: {
|
|
|
924
1233
|
NoOpLogger: typeof NoOpLogger;
|
|
925
1234
|
};
|
|
926
1235
|
|
|
927
|
-
export { type EvaluateOptions,
|
|
1236
|
+
export { type EvaluateOptions, type EvaluationCost, EvaluationError, type EvaluationResult, type EvaluationStatus, EvaluationsApiError, EvaluationsFacade, EvaluatorCallError, EvaluatorError, EvaluatorNotFoundError, Experiment, ExperimentApiError, ExperimentError, type EvaluateOptions$1 as ExperimentEvaluateOptions, type EvaluationResult$1 as ExperimentEvaluationResult, type EvaluationStatus$1 as ExperimentEvaluationStatus, ExperimentInitError, type ExperimentInitOptions, ExperimentsFacade, FetchPolicy, type GetPromptOptions, LangWatch, type LogOptions, type RunCallback, type RunContext, type RunOptions, type TargetInfo, type TargetMetadata, TargetMetadataConflictError, type TargetType, logger };
|