langwatch 0.11.0 → 0.13.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. package/dist/{add-UB5U3K3M.js → add-Z5UVUPCK.js} +7 -7
  2. package/dist/{add-UB5U3K3M.js.map → add-Z5UVUPCK.js.map} +1 -1
  3. package/dist/{add-XV5SUAXF.mjs → add-ZAPD2GBO.mjs} +4 -4
  4. package/dist/{chunk-JQYW7RY7.js → chunk-4BNGSDYW.js} +14 -14
  5. package/dist/{chunk-JQYW7RY7.js.map → chunk-4BNGSDYW.js.map} +1 -1
  6. package/dist/{chunk-LKE6DMUP.mjs → chunk-77XIPD42.mjs} +2 -2
  7. package/dist/chunk-77XIPD42.mjs.map +1 -0
  8. package/dist/{chunk-D4H6PR6H.js → chunk-DXBTJGCK.js} +10 -10
  9. package/dist/{chunk-D4H6PR6H.js.map → chunk-DXBTJGCK.js.map} +1 -1
  10. package/dist/{chunk-WZ7FYUHN.mjs → chunk-J4HK6XZR.mjs} +5 -5
  11. package/dist/{chunk-N7PJJMU2.js → chunk-NPFWFQK6.js} +2 -2
  12. package/dist/chunk-NPFWFQK6.js.map +1 -0
  13. package/dist/chunk-OAKQ7UBU.mjs +317 -0
  14. package/dist/chunk-OAKQ7UBU.mjs.map +1 -0
  15. package/dist/chunk-RM2VUAFL.js +317 -0
  16. package/dist/chunk-RM2VUAFL.js.map +1 -0
  17. package/dist/{chunk-556ZFJMK.mjs → chunk-SZRV7E6P.mjs} +2 -2
  18. package/dist/cli/index.js +6 -6
  19. package/dist/cli/index.mjs +6 -6
  20. package/dist/{implementation-CPxv2BdW.d.ts → implementation-Bnc8Aymq.d.ts} +1 -1
  21. package/dist/{implementation-CVrmD0bz.d.mts → implementation-Ck58nRkT.d.mts} +1 -1
  22. package/dist/index.d.mts +347 -38
  23. package/dist/index.d.ts +347 -38
  24. package/dist/index.js +519 -47
  25. package/dist/index.js.map +1 -1
  26. package/dist/index.mjs +518 -46
  27. package/dist/index.mjs.map +1 -1
  28. package/dist/{list-DUNP46AD.js → list-LASBYRI4.js} +7 -7
  29. package/dist/{list-DUNP46AD.js.map → list-LASBYRI4.js.map} +1 -1
  30. package/dist/{list-T4QS6CT2.mjs → list-XX4VPNJA.mjs} +4 -4
  31. package/dist/{login-3H27NIOD.js → login-2VCZDSLE.js} +3 -3
  32. package/dist/{login-3H27NIOD.js.map → login-2VCZDSLE.js.map} +1 -1
  33. package/dist/{login-T2ET7TKH.mjs → login-CZ2257SV.mjs} +2 -2
  34. package/dist/observability-sdk/index.d.mts +3 -3
  35. package/dist/observability-sdk/index.d.ts +3 -3
  36. package/dist/observability-sdk/index.js +4 -4
  37. package/dist/observability-sdk/index.js.map +1 -1
  38. package/dist/observability-sdk/index.mjs +7 -7
  39. package/dist/observability-sdk/instrumentation/langchain/index.d.mts +1 -1
  40. package/dist/observability-sdk/instrumentation/langchain/index.d.ts +1 -1
  41. package/dist/observability-sdk/setup/node/index.d.mts +24 -1
  42. package/dist/observability-sdk/setup/node/index.d.ts +24 -1
  43. package/dist/observability-sdk/setup/node/index.js +7 -292
  44. package/dist/observability-sdk/setup/node/index.js.map +1 -1
  45. package/dist/observability-sdk/setup/node/index.mjs +8 -293
  46. package/dist/observability-sdk/setup/node/index.mjs.map +1 -1
  47. package/dist/{remove-F5RM4775.mjs → remove-KESD7YHL.mjs} +4 -4
  48. package/dist/{remove-V4JL5Z4U.js → remove-XWN3XTF5.js} +6 -6
  49. package/dist/{remove-V4JL5Z4U.js.map → remove-XWN3XTF5.js.map} +1 -1
  50. package/dist/{sync-DIOKWE6R.js → sync-IJ26JHEP.js} +6 -6
  51. package/dist/{sync-DIOKWE6R.js.map → sync-IJ26JHEP.js.map} +1 -1
  52. package/dist/{sync-VGWOLOLJ.mjs → sync-SCVP7CHX.mjs} +4 -4
  53. package/dist/{types-Kts5RGLY.d.mts → types-5h2Im4pl.d.mts} +162 -0
  54. package/dist/{types-usU5mTCX.d.ts → types-fo-Ij9pl.d.ts} +162 -0
  55. package/package.json +3 -2
  56. package/dist/chunk-LKE6DMUP.mjs.map +0 -1
  57. package/dist/chunk-N7PJJMU2.js.map +0 -1
  58. /package/dist/{add-XV5SUAXF.mjs.map → add-ZAPD2GBO.mjs.map} +0 -0
  59. /package/dist/{chunk-WZ7FYUHN.mjs.map → chunk-J4HK6XZR.mjs.map} +0 -0
  60. /package/dist/{chunk-556ZFJMK.mjs.map → chunk-SZRV7E6P.mjs.map} +0 -0
  61. /package/dist/{list-T4QS6CT2.mjs.map → list-XX4VPNJA.mjs.map} +0 -0
  62. /package/dist/{login-T2ET7TKH.mjs.map → login-CZ2257SV.mjs.map} +0 -0
  63. /package/dist/{remove-F5RM4775.mjs.map → remove-KESD7YHL.mjs.map} +0 -0
  64. /package/dist/{sync-VGWOLOLJ.mjs.map → sync-SCVP7CHX.mjs.map} +0 -0
package/dist/index.d.mts CHANGED
@@ -1,6 +1,6 @@
1
1
  import { L as Logger, C as ConsoleLogger, N as NoOpLogger } from './index-D7rKIGrO.mjs';
2
- export { F as FilterableBatchSpanProcessor, L as LangWatchExporter, S as SpanProcessingExcludeRule, g as getLangWatchLogger, d as getLangWatchTracer } from './implementation-CVrmD0bz.mjs';
3
- import { p as paths, P as PromptResponse, g as CreatePromptBody, U as UpdatePromptBody, h as PromptData, i as Prompt, F as FetchPolicy, L as LangWatchSpan } from './types-Kts5RGLY.mjs';
2
+ export { F as FilterableBatchSpanProcessor, L as LangWatchExporter, S as SpanProcessingExcludeRule, g as getLangWatchLogger, d as getLangWatchTracer } from './implementation-Ck58nRkT.mjs';
3
+ import { p as paths, P as PromptResponse, g as CreatePromptBody, U as UpdatePromptBody, h as PromptData, i as Prompt, F as FetchPolicy, L as LangWatchSpan } from './types-5h2Im4pl.mjs';
4
4
  import openApiCreateClient from 'openapi-fetch';
5
5
  import { z } from 'zod';
6
6
  export { l as attributes } from './types-DRiQaKFG.mjs';
@@ -405,7 +405,7 @@ type DatasetsFacadeConfig = {
405
405
  * const dataset = await langwatch.datasets.get("my-dataset");
406
406
  *
407
407
  * // Use with evaluation
408
- * const evaluation = langwatch.evaluation.init("my-experiment");
408
+ * const evaluation = langwatch.experiments.init("my-experiment");
409
409
  * await evaluation.run(dataset.entries.map(e => e.entry), async ({ item, index }) => {
410
410
  * const output = await myLLM(item.input);
411
411
  * await evaluation.evaluate("my-evaluator", {
@@ -447,16 +447,16 @@ declare class DatasetsFacade {
447
447
  }
448
448
 
449
449
  /**
450
- * Types for the Evaluation API
450
+ * Types for the Experiments API
451
451
  *
452
- * These types define the structure for batch evaluations, including
452
+ * These types define the structure for batch experiments, including
453
453
  * logging metrics, running evaluators, and managing targets.
454
454
  */
455
455
 
456
456
  /**
457
457
  * Status of an evaluation result
458
458
  */
459
- type EvaluationStatus = "processed" | "error" | "skipped";
459
+ type EvaluationStatus$1 = "processed" | "error" | "skipped";
460
460
  /**
461
461
  * Target types for batch evaluations
462
462
  */
@@ -503,11 +503,11 @@ type TargetInfo = z.infer<typeof targetInfoSchema>;
503
503
  /**
504
504
  * Result of an evaluation
505
505
  */
506
- type EvaluationResult = z.infer<typeof evaluationResultSchema>;
506
+ type EvaluationResult$1 = z.infer<typeof evaluationResultSchema>;
507
507
  /**
508
- * Options for initializing an evaluation
508
+ * Options for initializing an experiment
509
509
  */
510
- type EvaluationInitOptions = {
510
+ type ExperimentInitOptions = {
511
511
  /** Custom run ID (auto-generated if not provided) */
512
512
  runId?: string;
513
513
  /** Number of parallel threads for submit() */
@@ -533,7 +533,7 @@ type LogOptions = {
533
533
  /** Human-readable description of the result */
534
534
  details?: string;
535
535
  /** Status of the evaluation */
536
- status?: EvaluationStatus;
536
+ status?: EvaluationStatus$1;
537
537
  /** Duration in milliseconds */
538
538
  duration?: number;
539
539
  /** Cost amount in USD */
@@ -551,7 +551,7 @@ type LogOptions = {
551
551
  /**
552
552
  * Options for the evaluate() method (built-in evaluators)
553
553
  */
554
- type EvaluateOptions = {
554
+ type EvaluateOptions$1 = {
555
555
  /**
556
556
  * Row index in the dataset.
557
557
  * Optional when called inside withTarget() - will be auto-inferred from context.
@@ -627,9 +627,9 @@ type TargetResult<R> = {
627
627
  };
628
628
 
629
629
  /**
630
- * Evaluation - Main class for running batch evaluations
630
+ * Experiment - Main class for running batch experiments
631
631
  *
632
- * Provides a clean API for running evaluations over datasets with:
632
+ * Provides a clean API for running experiments over datasets with:
633
633
  * - Automatic tracing per iteration
634
634
  * - Parallel execution with concurrency control
635
635
  * - Batched result sending
@@ -638,9 +638,9 @@ type TargetResult<R> = {
638
638
  */
639
639
 
640
640
  /**
641
- * Evaluation session for running batch evaluations
641
+ * Experiment session for running batch experiments
642
642
  */
643
- declare class Evaluation {
643
+ declare class Experiment {
644
644
  readonly name: string;
645
645
  readonly runId: string;
646
646
  readonly experimentSlug: string;
@@ -671,7 +671,7 @@ declare class Evaluation {
671
671
  endpoint: string;
672
672
  apiKey: string;
673
673
  logger: Logger;
674
- } & EvaluationInitOptions): Promise<Evaluation>;
674
+ } & ExperimentInitOptions): Promise<Experiment>;
675
675
  /**
676
676
  * Initialize the evaluation by creating/getting the experiment
677
677
  */
@@ -743,7 +743,7 @@ declare class Evaluation {
743
743
  * });
744
744
  * ```
745
745
  */
746
- evaluate(evaluatorSlug: string, options: EvaluateOptions): Promise<void>;
746
+ evaluate(evaluatorSlug: string, options: EvaluateOptions$1): Promise<void>;
747
747
  /**
748
748
  * Execute code within a target context with automatic tracing
749
749
  *
@@ -811,63 +811,187 @@ declare class Evaluation {
811
811
  }
812
812
 
813
813
  /**
814
- * EvaluationFacade - Entry point for the evaluation API
814
+ * Types for platform-configured experiments (Experiments Workbench)
815
+ */
816
+ /**
817
+ * Summary of a completed experiment run
818
+ */
819
+ type ExperimentRunSummary = {
820
+ runId?: string;
821
+ totalCells?: number;
822
+ completedCells?: number;
823
+ failedCells?: number;
824
+ duration?: number;
825
+ runUrl?: string;
826
+ timestamps?: {
827
+ startedAt: number;
828
+ finishedAt?: number;
829
+ stoppedAt?: number;
830
+ };
831
+ targets?: Array<{
832
+ targetId: string;
833
+ name: string;
834
+ passed: number;
835
+ failed: number;
836
+ avgLatency: number;
837
+ totalCost: number;
838
+ }>;
839
+ evaluators?: Array<{
840
+ evaluatorId: string;
841
+ name: string;
842
+ passed: number;
843
+ failed: number;
844
+ passRate: number;
845
+ avgScore?: number;
846
+ }>;
847
+ totalPassed?: number;
848
+ totalFailed?: number;
849
+ passRate?: number;
850
+ totalCost?: number;
851
+ };
852
+ /**
853
+ * Options for running a platform experiment
854
+ */
855
+ type RunExperimentOptions = {
856
+ /**
857
+ * Polling interval in milliseconds (default: 2000)
858
+ */
859
+ pollInterval?: number;
860
+ /**
861
+ * Maximum time to wait for completion in milliseconds (default: 600000 = 10 minutes)
862
+ */
863
+ timeout?: number;
864
+ /**
865
+ * Callback for progress updates
866
+ */
867
+ onProgress?: (progress: number, total: number) => void;
868
+ };
869
+ /**
870
+ * Final result of a platform experiment run
871
+ */
872
+ type ExperimentRunResult = {
873
+ runId: string;
874
+ status: "completed" | "failed" | "stopped";
875
+ passed: number;
876
+ failed: number;
877
+ passRate: number;
878
+ duration: number;
879
+ runUrl: string;
880
+ summary: ExperimentRunSummary;
881
+ /**
882
+ * Print a CI-friendly summary of the results
883
+ * @param exitOnFailure - If true (default), calls process.exit(1) when there are failures
884
+ */
885
+ printSummary: (exitOnFailure?: boolean) => void;
886
+ };
887
+
888
+ /**
889
+ * ExperimentsFacade - Entry point for the experiments API
815
890
  *
816
- * Provides the `init()` method to create evaluation sessions.
891
+ * Provides:
892
+ * - `init()` method to create experiment sessions (SDK-defined experiments)
893
+ * - `run()` method to execute platform-configured experiments (Experiments Workbench)
817
894
  */
818
895
 
819
- type EvaluationFacadeConfig = {
896
+ type ExperimentsFacadeConfig = {
820
897
  langwatchApiClient: LangwatchApiClient;
821
898
  endpoint: string;
822
899
  apiKey: string;
823
900
  logger: Logger;
824
901
  };
825
902
  /**
826
- * Facade for creating evaluation sessions
903
+ * Facade for creating experiment sessions and running platform-configured experiments
827
904
  */
828
- declare class EvaluationFacade {
905
+ declare class ExperimentsFacade {
829
906
  private readonly config;
830
- constructor(config: EvaluationFacadeConfig);
907
+ constructor(config: ExperimentsFacadeConfig);
831
908
  /**
832
- * Initialize a new evaluation session
909
+ * Initialize a new experiment session (SDK-defined)
833
910
  *
834
911
  * @param name - Name of the experiment (used as slug)
835
912
  * @param options - Optional configuration
836
- * @returns An initialized Evaluation instance
913
+ * @returns An initialized Experiment instance
837
914
  *
838
915
  * @example
839
916
  * ```typescript
840
- * const evaluation = await langwatch.evaluation.init('my-experiment');
917
+ * const experiment = await langwatch.experiments.init('my-experiment');
841
918
  *
842
- * await evaluation.run(dataset, async ({ item, index }) => {
919
+ * await experiment.run(dataset, async ({ item, index }) => {
843
920
  * const response = await myAgent(item.question);
844
- * evaluation.log('accuracy', { index, score: 0.95 });
921
+ * experiment.log('accuracy', { index, score: 0.95 });
845
922
  * });
846
923
  * ```
847
924
  */
848
- init(name: string, options?: EvaluationInitOptions): Promise<Evaluation>;
925
+ init(name: string, options?: ExperimentInitOptions): Promise<Experiment>;
926
+ /**
927
+ * Run a platform-configured experiment (Experiments Workbench)
928
+ *
929
+ * This runs an experiment that was configured in the LangWatch platform.
930
+ * The method automatically prints a summary and exits with code 1 on failure
931
+ * (unless `exitOnFailure: false` is passed).
932
+ *
933
+ * @param slug - The slug of the experiment (found in the experiment URL)
934
+ * @param options - Optional configuration
935
+ * @returns The experiment results including pass rate and summary
936
+ *
937
+ * @example
938
+ * ```typescript
939
+ * import { LangWatch } from "langwatch";
940
+ *
941
+ * const langwatch = new LangWatch();
942
+ *
943
+ * const result = await langwatch.experiments.run("my-experiment-slug");
944
+ * result.printSummary();
945
+ * ```
946
+ */
947
+ run(slug: string, options?: RunExperimentOptions): Promise<ExperimentRunResult>;
948
+ /**
949
+ * Run an experiment and wait for completion using polling
950
+ */
951
+ private runWithPolling;
952
+ /**
953
+ * Start an experiment run
954
+ */
955
+ private startRun;
956
+ /**
957
+ * Get the status of a run
958
+ */
959
+ private getRunStatus;
960
+ /**
961
+ * Build the result object from API response
962
+ */
963
+ private buildResult;
964
+ /**
965
+ * Print a CI-friendly summary of the experiment results
966
+ */
967
+ private printSummary;
968
+ private sleep;
969
+ /**
970
+ * Replace the domain of a URL with a new base URL, preserving the path
971
+ */
972
+ private replaceUrlDomain;
849
973
  }
850
974
 
851
975
  /**
852
- * Errors for the Evaluation API
976
+ * Errors for the Experiments API
853
977
  */
854
978
  /**
855
- * Base error for evaluation-related issues
979
+ * Base error for experiment-related issues
856
980
  */
857
- declare class EvaluationError extends Error {
981
+ declare class ExperimentError extends Error {
858
982
  constructor(message: string);
859
983
  }
860
984
  /**
861
985
  * Thrown when initialization fails
862
986
  */
863
- declare class EvaluationInitError extends EvaluationError {
987
+ declare class ExperimentInitError extends ExperimentError {
864
988
  readonly cause?: Error | undefined;
865
989
  constructor(message: string, cause?: Error | undefined);
866
990
  }
867
991
  /**
868
992
  * Thrown when API calls fail
869
993
  */
870
- declare class EvaluationApiError extends EvaluationError {
994
+ declare class ExperimentApiError extends ExperimentError {
871
995
  readonly statusCode?: number | undefined;
872
996
  readonly cause?: Error | undefined;
873
997
  constructor(message: string, statusCode?: number | undefined, cause?: Error | undefined);
@@ -875,7 +999,7 @@ declare class EvaluationApiError extends EvaluationError {
875
999
  /**
876
1000
  * Thrown when target metadata conflicts
877
1001
  */
878
- declare class TargetMetadataConflictError extends EvaluationError {
1002
+ declare class TargetMetadataConflictError extends ExperimentError {
879
1003
  readonly targetName: string;
880
1004
  readonly existingMetadata: Record<string, unknown>;
881
1005
  readonly newMetadata: Record<string, unknown>;
@@ -884,12 +1008,164 @@ declare class TargetMetadataConflictError extends EvaluationError {
884
1008
  /**
885
1009
  * Thrown when an evaluator call fails
886
1010
  */
887
- declare class EvaluatorError extends EvaluationError {
1011
+ declare class EvaluatorError extends ExperimentError {
888
1012
  readonly evaluatorSlug: string;
889
1013
  readonly cause?: Error | undefined;
890
1014
  constructor(evaluatorSlug: string, message: string, cause?: Error | undefined);
891
1015
  }
892
1016
 
1017
+ /**
1018
+ * Types for the Evaluations API (Online Evaluations / Guardrails)
1019
+ *
1020
+ * These types define the structure for running evaluators and guardrails
1021
+ * in real-time against LLM inputs/outputs.
1022
+ */
1023
+ /**
1024
+ * Status of an evaluation result
1025
+ */
1026
+ type EvaluationStatus = "processed" | "skipped" | "error";
1027
+ /**
1028
+ * Cost information from an evaluation
1029
+ */
1030
+ type EvaluationCost = {
1031
+ currency: string;
1032
+ amount: number;
1033
+ };
1034
+ /**
1035
+ * Result returned from running an evaluator
1036
+ */
1037
+ type EvaluationResult = {
1038
+ /** Status of the evaluation */
1039
+ status: EvaluationStatus;
1040
+ /** Whether the evaluation passed (for guardrails) */
1041
+ passed?: boolean;
1042
+ /** Numeric score (typically 0-1) */
1043
+ score?: number;
1044
+ /** Human-readable details about the result */
1045
+ details?: string;
1046
+ /** Label/category for the result */
1047
+ label?: string;
1048
+ /** Cost of running the evaluation */
1049
+ cost?: EvaluationCost;
1050
+ };
1051
+ /**
1052
+ * Options for the evaluate() method
1053
+ */
1054
+ type EvaluateOptions = {
1055
+ /** Data to pass to the evaluator (input, output, contexts, etc.) */
1056
+ data: Record<string, unknown>;
1057
+ /** Human-readable name for this evaluation */
1058
+ name?: string;
1059
+ /** Evaluator-specific settings */
1060
+ settings?: Record<string, unknown>;
1061
+ /** Whether to run as a guardrail (affects error handling) */
1062
+ asGuardrail?: boolean;
1063
+ };
1064
+
1065
+ /**
1066
+ * EvaluationsFacade - Entry point for the Evaluations API (Online Evaluations / Guardrails)
1067
+ *
1068
+ * Provides an API for running evaluators and guardrails in real-time against LLM inputs/outputs.
1069
+ *
1070
+ * @example
1071
+ * ```typescript
1072
+ * const langwatch = new LangWatch({ apiKey: "your-api-key" });
1073
+ *
1074
+ * // Run a guardrail
1075
+ * const guardrail = await langwatch.evaluations.evaluate("presidio/pii_detection", {
1076
+ * data: { input: userInput, output: generatedResponse },
1077
+ * name: "PII Detection",
1078
+ * asGuardrail: true,
1079
+ * settings: {},
1080
+ * });
1081
+ *
1082
+ * if (!guardrail.passed) {
1083
+ * return "I'm sorry, I can't do that.";
1084
+ * }
1085
+ * ```
1086
+ */
1087
+
1088
+ type EvaluationsFacadeConfig = {
1089
+ endpoint: string;
1090
+ apiKey: string;
1091
+ logger: Logger;
1092
+ };
1093
+ declare class EvaluationsFacade {
1094
+ #private;
1095
+ constructor(config: EvaluationsFacadeConfig);
1096
+ /**
1097
+ * Run an evaluator or guardrail against provided data
1098
+ *
1099
+ * Creates an OpenTelemetry span attached to the current trace context,
1100
+ * calls the LangWatch evaluation API, and returns the result.
1101
+ *
1102
+ * @param slug - The evaluator slug (e.g., "presidio/pii_detection", "langevals/llm_boolean")
1103
+ * @param options - Evaluation options including data, name, settings, and asGuardrail flag
1104
+ * @returns The evaluation result with status, passed, score, details, label, and cost
1105
+ *
1106
+ * @example
1107
+ * ```typescript
1108
+ * // Run as a guardrail (synchronous evaluation that can block responses)
1109
+ * const guardrail = await langwatch.evaluations.evaluate("presidio/pii_detection", {
1110
+ * data: { input: userInput, output: generatedResponse },
1111
+ * name: "PII Detection Guardrail",
1112
+ * asGuardrail: true,
1113
+ * });
1114
+ *
1115
+ * if (!guardrail.passed) {
1116
+ * console.log("PII detected:", guardrail.details);
1117
+ * return "Sorry, I cannot process that request.";
1118
+ * }
1119
+ * ```
1120
+ *
1121
+ * @example
1122
+ * ```typescript
1123
+ * // Run as an online evaluation (async scoring for monitoring)
1124
+ * const result = await langwatch.evaluations.evaluate("langevals/llm_boolean", {
1125
+ * data: { input: question, output: response },
1126
+ * name: "Quality Check",
1127
+ * settings: { prompt: "Check if the response answers the question." },
1128
+ * });
1129
+ *
1130
+ * console.log("Score:", result.score);
1131
+ * console.log("Details:", result.details);
1132
+ * ```
1133
+ */
1134
+ evaluate: (slug: string, options: EvaluateOptions) => Promise<EvaluationResult>;
1135
+ }
1136
+
1137
+ /**
1138
+ * Error classes for the Evaluations API
1139
+ */
1140
+ /**
1141
+ * Base error for evaluation operations
1142
+ */
1143
+ declare class EvaluationError extends Error {
1144
+ constructor(message: string);
1145
+ }
1146
+ /**
1147
+ * Error when an evaluator call fails
1148
+ */
1149
+ declare class EvaluatorCallError extends EvaluationError {
1150
+ readonly evaluatorSlug: string;
1151
+ readonly statusCode?: number;
1152
+ constructor(evaluatorSlug: string, message: string, statusCode?: number);
1153
+ }
1154
+ /**
1155
+ * Error when evaluator is not found
1156
+ */
1157
+ declare class EvaluatorNotFoundError extends EvaluationError {
1158
+ readonly evaluatorSlug: string;
1159
+ constructor(evaluatorSlug: string);
1160
+ }
1161
+ /**
1162
+ * Error from the evaluations API
1163
+ */
1164
+ declare class EvaluationsApiError extends EvaluationError {
1165
+ readonly statusCode: number;
1166
+ constructor(message: string, statusCode: number);
1167
+ }
1168
+
893
1169
  interface GetTraceParams {
894
1170
  includeSpans?: boolean;
895
1171
  }
@@ -913,8 +1189,41 @@ declare class LangWatch {
913
1189
  private readonly config;
914
1190
  readonly prompts: PromptsFacade;
915
1191
  readonly traces: TracesFacade;
916
- readonly evaluation: EvaluationFacade;
917
1192
  readonly datasets: DatasetsFacade;
1193
+ /**
1194
+ * Run experiments on LangWatch platform or via SDK.
1195
+ *
1196
+ * Platform experiments (CI/CD):
1197
+ * ```typescript
1198
+ * const result = await langwatch.experiments.run("my-experiment-slug");
1199
+ * result.printSummary();
1200
+ * ```
1201
+ *
1202
+ * SDK-defined experiments:
1203
+ * ```typescript
1204
+ * const experiment = await langwatch.experiments.init("my-experiment");
1205
+ * // ... run evaluators using experiment.evaluate()
1206
+ * ```
1207
+ */
1208
+ readonly experiments: ExperimentsFacade;
1209
+ /**
1210
+ * Run evaluators and guardrails in real-time (Online Evaluations).
1211
+ *
1212
+ * @example
1213
+ * ```typescript
1214
+ * // Run a guardrail
1215
+ * const guardrail = await langwatch.evaluations.evaluate("presidio/pii_detection", {
1216
+ * data: { input: userInput, output: generatedResponse },
1217
+ * name: "PII Detection",
1218
+ * asGuardrail: true,
1219
+ * });
1220
+ *
1221
+ * if (!guardrail.passed) {
1222
+ * return "I'm sorry, I can't do that.";
1223
+ * }
1224
+ * ```
1225
+ */
1226
+ readonly evaluations: EvaluationsFacade;
918
1227
  constructor(options?: LangWatchConstructorOptions);
919
1228
  get apiClient(): LangwatchApiClient;
920
1229
  }
@@ -924,4 +1233,4 @@ declare const logger: {
924
1233
  NoOpLogger: typeof NoOpLogger;
925
1234
  };
926
1235
 
927
- export { type EvaluateOptions, Evaluation, EvaluationApiError, EvaluationError, EvaluationFacade, EvaluationInitError, type EvaluationInitOptions, type EvaluationResult, type EvaluationStatus, EvaluatorError, FetchPolicy, type GetPromptOptions, LangWatch, type LogOptions, type RunCallback, type RunContext, type RunOptions, type TargetInfo, type TargetMetadata, TargetMetadataConflictError, type TargetType, logger };
1236
+ export { type EvaluateOptions, type EvaluationCost, EvaluationError, type EvaluationResult, type EvaluationStatus, EvaluationsApiError, EvaluationsFacade, EvaluatorCallError, EvaluatorError, EvaluatorNotFoundError, Experiment, ExperimentApiError, ExperimentError, type EvaluateOptions$1 as ExperimentEvaluateOptions, type EvaluationResult$1 as ExperimentEvaluationResult, type EvaluationStatus$1 as ExperimentEvaluationStatus, ExperimentInitError, type ExperimentInitOptions, ExperimentsFacade, FetchPolicy, type GetPromptOptions, LangWatch, type LogOptions, type RunCallback, type RunContext, type RunOptions, type TargetInfo, type TargetMetadata, TargetMetadataConflictError, type TargetType, logger };