langwatch 0.12.0 → 0.14.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (71) hide show
  1. package/dist/{add-2UHFYNUA.mjs → add-36C7337U.mjs} +5 -5
  2. package/dist/{add-LUETMKBD.js → add-HNUQULX7.js} +8 -8
  3. package/dist/{add-LUETMKBD.js.map → add-HNUQULX7.js.map} +1 -1
  4. package/dist/chunk-2BALUJ3B.js +317 -0
  5. package/dist/chunk-2BALUJ3B.js.map +1 -0
  6. package/dist/{chunk-6SSCBYJM.js → chunk-4VAQQB35.js} +10 -10
  7. package/dist/{chunk-6SSCBYJM.js.map → chunk-4VAQQB35.js.map} +1 -1
  8. package/dist/{chunk-C4XUWCQR.js → chunk-D3YWZYYD.js} +2 -2
  9. package/dist/chunk-D3YWZYYD.js.map +1 -0
  10. package/dist/{chunk-BQRUUTN3.js → chunk-GNHX3RA6.js} +14 -14
  11. package/dist/{chunk-BQRUUTN3.js.map → chunk-GNHX3RA6.js.map} +1 -1
  12. package/dist/{chunk-WCNDT5SD.mjs → chunk-JX2FBF5Z.mjs} +2 -2
  13. package/dist/{chunk-IIUI2XYW.mjs → chunk-KD5U3MP3.mjs} +11 -2
  14. package/dist/chunk-KD5U3MP3.mjs.map +1 -0
  15. package/dist/chunk-KJQNNRAB.mjs +317 -0
  16. package/dist/chunk-KJQNNRAB.mjs.map +1 -0
  17. package/dist/{chunk-OTID7S7K.mjs → chunk-QOSICUZE.mjs} +3 -3
  18. package/dist/{chunk-ONXIZKC6.js → chunk-SHJZRXU7.js} +11 -2
  19. package/dist/chunk-SHJZRXU7.js.map +1 -0
  20. package/dist/{chunk-TB5KB737.mjs → chunk-WIB3D4PL.mjs} +2 -2
  21. package/dist/chunk-WIB3D4PL.mjs.map +1 -0
  22. package/dist/cli/index.js +6 -6
  23. package/dist/cli/index.mjs +6 -6
  24. package/dist/{implementation-Ck58nRkT.d.mts → implementation-Dl15eRjo.d.mts} +1 -1
  25. package/dist/{implementation-Bnc8Aymq.d.ts → implementation-gLzM6qpI.d.ts} +1 -1
  26. package/dist/index.d.mts +269 -56
  27. package/dist/index.d.ts +269 -56
  28. package/dist/index.js +344 -92
  29. package/dist/index.js.map +1 -1
  30. package/dist/index.mjs +338 -86
  31. package/dist/index.mjs.map +1 -1
  32. package/dist/{list-WV5LA6LD.mjs → list-S5UIBHSU.mjs} +5 -5
  33. package/dist/{list-7U3M64GY.js → list-U5HX65KF.js} +8 -8
  34. package/dist/{list-7U3M64GY.js.map → list-U5HX65KF.js.map} +1 -1
  35. package/dist/{login-QKRT6PXA.mjs → login-ALPTL4S6.mjs} +2 -2
  36. package/dist/{login-B7DKMN7P.js → login-IINYTYEF.js} +3 -3
  37. package/dist/{login-B7DKMN7P.js.map → login-IINYTYEF.js.map} +1 -1
  38. package/dist/observability-sdk/index.d.mts +3 -3
  39. package/dist/observability-sdk/index.d.ts +3 -3
  40. package/dist/observability-sdk/index.js +4 -4
  41. package/dist/observability-sdk/index.mjs +3 -3
  42. package/dist/observability-sdk/instrumentation/langchain/index.d.mts +1 -1
  43. package/dist/observability-sdk/instrumentation/langchain/index.d.ts +1 -1
  44. package/dist/observability-sdk/instrumentation/langchain/index.js +3 -3
  45. package/dist/observability-sdk/instrumentation/langchain/index.mjs +1 -1
  46. package/dist/observability-sdk/setup/node/index.d.mts +24 -1
  47. package/dist/observability-sdk/setup/node/index.d.ts +24 -1
  48. package/dist/observability-sdk/setup/node/index.js +7 -292
  49. package/dist/observability-sdk/setup/node/index.js.map +1 -1
  50. package/dist/observability-sdk/setup/node/index.mjs +8 -293
  51. package/dist/observability-sdk/setup/node/index.mjs.map +1 -1
  52. package/dist/{remove-A4DKCN7A.js → remove-4NB23DL3.js} +7 -7
  53. package/dist/{remove-A4DKCN7A.js.map → remove-4NB23DL3.js.map} +1 -1
  54. package/dist/{remove-2OGMXSTR.mjs → remove-KVFBJ4XE.mjs} +5 -5
  55. package/dist/{sync-TNVCKWTC.mjs → sync-ITMRHKSW.mjs} +5 -5
  56. package/dist/{sync-WRZXIBZS.js → sync-UI7HBIDR.js} +7 -7
  57. package/dist/{sync-WRZXIBZS.js.map → sync-UI7HBIDR.js.map} +1 -1
  58. package/dist/{types-5h2Im4pl.d.mts → types-ExKeJEM0.d.mts} +1179 -17
  59. package/dist/{types-fo-Ij9pl.d.ts → types-WRcnQUom.d.ts} +1179 -17
  60. package/package.json +3 -2
  61. package/dist/chunk-C4XUWCQR.js.map +0 -1
  62. package/dist/chunk-IIUI2XYW.mjs.map +0 -1
  63. package/dist/chunk-ONXIZKC6.js.map +0 -1
  64. package/dist/chunk-TB5KB737.mjs.map +0 -1
  65. /package/dist/{add-2UHFYNUA.mjs.map → add-36C7337U.mjs.map} +0 -0
  66. /package/dist/{chunk-WCNDT5SD.mjs.map → chunk-JX2FBF5Z.mjs.map} +0 -0
  67. /package/dist/{chunk-OTID7S7K.mjs.map → chunk-QOSICUZE.mjs.map} +0 -0
  68. /package/dist/{list-WV5LA6LD.mjs.map → list-S5UIBHSU.mjs.map} +0 -0
  69. /package/dist/{login-QKRT6PXA.mjs.map → login-ALPTL4S6.mjs.map} +0 -0
  70. /package/dist/{remove-2OGMXSTR.mjs.map → remove-KVFBJ4XE.mjs.map} +0 -0
  71. /package/dist/{sync-TNVCKWTC.mjs.map → sync-ITMRHKSW.mjs.map} +0 -0
package/dist/index.d.mts CHANGED
@@ -1,6 +1,6 @@
1
1
  import { L as Logger, C as ConsoleLogger, N as NoOpLogger } from './index-D7rKIGrO.mjs';
2
- export { F as FilterableBatchSpanProcessor, L as LangWatchExporter, S as SpanProcessingExcludeRule, g as getLangWatchLogger, d as getLangWatchTracer } from './implementation-Ck58nRkT.mjs';
3
- import { p as paths, P as PromptResponse, g as CreatePromptBody, U as UpdatePromptBody, h as PromptData, i as Prompt, F as FetchPolicy, L as LangWatchSpan } from './types-5h2Im4pl.mjs';
2
+ export { F as FilterableBatchSpanProcessor, L as LangWatchExporter, S as SpanProcessingExcludeRule, g as getLangWatchLogger, d as getLangWatchTracer } from './implementation-Dl15eRjo.mjs';
3
+ import { p as paths, P as PromptResponse, g as CreatePromptBody, U as UpdatePromptBody, h as PromptData, i as Prompt, F as FetchPolicy, L as LangWatchSpan } from './types-ExKeJEM0.mjs';
4
4
  import openApiCreateClient from 'openapi-fetch';
5
5
  import { z } from 'zod';
6
6
  export { l as attributes } from './types-DRiQaKFG.mjs';
@@ -405,7 +405,7 @@ type DatasetsFacadeConfig = {
405
405
  * const dataset = await langwatch.datasets.get("my-dataset");
406
406
  *
407
407
  * // Use with evaluation
408
- * const evaluation = langwatch.evaluation.init("my-experiment");
408
+ * const evaluation = langwatch.experiments.init("my-experiment");
409
409
  * await evaluation.run(dataset.entries.map(e => e.entry), async ({ item, index }) => {
410
410
  * const output = await myLLM(item.input);
411
411
  * await evaluation.evaluate("my-evaluator", {
@@ -447,16 +447,16 @@ declare class DatasetsFacade {
447
447
  }
448
448
 
449
449
  /**
450
- * Types for the Evaluation API
450
+ * Types for the Experiments API
451
451
  *
452
- * These types define the structure for batch evaluations, including
452
+ * These types define the structure for batch experiments, including
453
453
  * logging metrics, running evaluators, and managing targets.
454
454
  */
455
455
 
456
456
  /**
457
457
  * Status of an evaluation result
458
458
  */
459
- type EvaluationStatus = "processed" | "error" | "skipped";
459
+ type EvaluationStatus$1 = "processed" | "error" | "skipped";
460
460
  /**
461
461
  * Target types for batch evaluations
462
462
  */
@@ -503,11 +503,11 @@ type TargetInfo = z.infer<typeof targetInfoSchema>;
503
503
  /**
504
504
  * Result of an evaluation
505
505
  */
506
- type EvaluationResult = z.infer<typeof evaluationResultSchema>;
506
+ type EvaluationResult$1 = z.infer<typeof evaluationResultSchema>;
507
507
  /**
508
- * Options for initializing an evaluation
508
+ * Options for initializing an experiment
509
509
  */
510
- type EvaluationInitOptions = {
510
+ type ExperimentInitOptions = {
511
511
  /** Custom run ID (auto-generated if not provided) */
512
512
  runId?: string;
513
513
  /** Number of parallel threads for submit() */
@@ -533,7 +533,7 @@ type LogOptions = {
533
533
  /** Human-readable description of the result */
534
534
  details?: string;
535
535
  /** Status of the evaluation */
536
- status?: EvaluationStatus;
536
+ status?: EvaluationStatus$1;
537
537
  /** Duration in milliseconds */
538
538
  duration?: number;
539
539
  /** Cost amount in USD */
@@ -551,7 +551,7 @@ type LogOptions = {
551
551
  /**
552
552
  * Options for the evaluate() method (built-in evaluators)
553
553
  */
554
- type EvaluateOptions = {
554
+ type EvaluateOptions$1 = {
555
555
  /**
556
556
  * Row index in the dataset.
557
557
  * Optional when called inside withTarget() - will be auto-inferred from context.
@@ -627,9 +627,9 @@ type TargetResult<R> = {
627
627
  };
628
628
 
629
629
  /**
630
- * Evaluation - Main class for running batch evaluations
630
+ * Experiment - Main class for running batch experiments
631
631
  *
632
- * Provides a clean API for running evaluations over datasets with:
632
+ * Provides a clean API for running experiments over datasets with:
633
633
  * - Automatic tracing per iteration
634
634
  * - Parallel execution with concurrency control
635
635
  * - Batched result sending
@@ -638,9 +638,9 @@ type TargetResult<R> = {
638
638
  */
639
639
 
640
640
  /**
641
- * Evaluation session for running batch evaluations
641
+ * Experiment session for running batch experiments
642
642
  */
643
- declare class Evaluation {
643
+ declare class Experiment {
644
644
  readonly name: string;
645
645
  readonly runId: string;
646
646
  readonly experimentSlug: string;
@@ -671,7 +671,7 @@ declare class Evaluation {
671
671
  endpoint: string;
672
672
  apiKey: string;
673
673
  logger: Logger;
674
- } & EvaluationInitOptions): Promise<Evaluation>;
674
+ } & ExperimentInitOptions): Promise<Experiment>;
675
675
  /**
676
676
  * Initialize the evaluation by creating/getting the experiment
677
677
  */
@@ -743,7 +743,7 @@ declare class Evaluation {
743
743
  * });
744
744
  * ```
745
745
  */
746
- evaluate(evaluatorSlug: string, options: EvaluateOptions): Promise<void>;
746
+ evaluate(evaluatorSlug: string, options: EvaluateOptions$1): Promise<void>;
747
747
  /**
748
748
  * Execute code within a target context with automatic tracing
749
749
  *
@@ -811,12 +811,12 @@ declare class Evaluation {
811
811
  }
812
812
 
813
813
  /**
814
- * Types for platform-configured evaluations (Evaluations V3)
814
+ * Types for platform-configured experiments (Experiments Workbench)
815
815
  */
816
816
  /**
817
- * Summary of a completed evaluation run
817
+ * Summary of a completed experiment run
818
818
  */
819
- type EvaluationRunSummary = {
819
+ type ExperimentRunSummary = {
820
820
  runId?: string;
821
821
  totalCells?: number;
822
822
  completedCells?: number;
@@ -850,9 +850,9 @@ type EvaluationRunSummary = {
850
850
  totalCost?: number;
851
851
  };
852
852
  /**
853
- * Options for running a platform evaluation
853
+ * Options for running a platform experiment
854
854
  */
855
- type RunEvaluationOptions = {
855
+ type RunExperimentOptions = {
856
856
  /**
857
857
  * Polling interval in milliseconds (default: 2000)
858
858
  */
@@ -867,9 +867,9 @@ type RunEvaluationOptions = {
867
867
  onProgress?: (progress: number, total: number) => void;
868
868
  };
869
869
  /**
870
- * Final result of a platform evaluation run
870
+ * Final result of a platform experiment run
871
871
  */
872
- type EvaluationRunResult = {
872
+ type ExperimentRunResult = {
873
873
  runId: string;
874
874
  status: "completed" | "failed" | "stopped";
875
875
  passed: number;
@@ -877,7 +877,7 @@ type EvaluationRunResult = {
877
877
  passRate: number;
878
878
  duration: number;
879
879
  runUrl: string;
880
- summary: EvaluationRunSummary;
880
+ summary: ExperimentRunSummary;
881
881
  /**
882
882
  * Print a CI-friendly summary of the results
883
883
  * @param exitOnFailure - If true (default), calls process.exit(1) when there are failures
@@ -886,53 +886,53 @@ type EvaluationRunResult = {
886
886
  };
887
887
 
888
888
  /**
889
- * EvaluationFacade - Entry point for the evaluation API
889
+ * ExperimentsFacade - Entry point for the experiments API
890
890
  *
891
891
  * Provides:
892
- * - `init()` method to create evaluation sessions (SDK-defined evaluations)
893
- * - `run()` method to execute platform-configured evaluations (Evaluations V3)
892
+ * - `init()` method to create experiment sessions (SDK-defined experiments)
893
+ * - `run()` method to execute platform-configured experiments (Experiments Workbench)
894
894
  */
895
895
 
896
- type EvaluationFacadeConfig = {
896
+ type ExperimentsFacadeConfig = {
897
897
  langwatchApiClient: LangwatchApiClient;
898
898
  endpoint: string;
899
899
  apiKey: string;
900
900
  logger: Logger;
901
901
  };
902
902
  /**
903
- * Facade for creating evaluation sessions and running platform-configured evaluations
903
+ * Facade for creating experiment sessions and running platform-configured experiments
904
904
  */
905
- declare class EvaluationFacade {
905
+ declare class ExperimentsFacade {
906
906
  private readonly config;
907
- constructor(config: EvaluationFacadeConfig);
907
+ constructor(config: ExperimentsFacadeConfig);
908
908
  /**
909
- * Initialize a new evaluation session (SDK-defined)
909
+ * Initialize a new experiment session (SDK-defined)
910
910
  *
911
911
  * @param name - Name of the experiment (used as slug)
912
912
  * @param options - Optional configuration
913
- * @returns An initialized Evaluation instance
913
+ * @returns An initialized Experiment instance
914
914
  *
915
915
  * @example
916
916
  * ```typescript
917
- * const evaluation = await langwatch.evaluation.init('my-experiment');
917
+ * const experiment = await langwatch.experiments.init('my-experiment');
918
918
  *
919
- * await evaluation.run(dataset, async ({ item, index }) => {
919
+ * await experiment.run(dataset, async ({ item, index }) => {
920
920
  * const response = await myAgent(item.question);
921
- * evaluation.log('accuracy', { index, score: 0.95 });
921
+ * experiment.log('accuracy', { index, score: 0.95 });
922
922
  * });
923
923
  * ```
924
924
  */
925
- init(name: string, options?: EvaluationInitOptions): Promise<Evaluation>;
925
+ init(name: string, options?: ExperimentInitOptions): Promise<Experiment>;
926
926
  /**
927
- * Run a platform-configured evaluation (Evaluations V3)
927
+ * Run a platform-configured experiment (Experiments Workbench)
928
928
  *
929
- * This runs an evaluation that was configured in the LangWatch platform.
929
+ * This runs an experiment that was configured in the LangWatch platform.
930
930
  * The method automatically prints a summary and exits with code 1 on failure
931
931
  * (unless `exitOnFailure: false` is passed).
932
932
  *
933
- * @param slug - The slug of the evaluation (found in the evaluation URL)
933
+ * @param slug - The slug of the experiment (found in the experiment URL)
934
934
  * @param options - Optional configuration
935
- * @returns The evaluation results including pass rate and summary
935
+ * @returns The experiment results including pass rate and summary
936
936
  *
937
937
  * @example
938
938
  * ```typescript
@@ -940,17 +940,17 @@ declare class EvaluationFacade {
940
940
  *
941
941
  * const langwatch = new LangWatch();
942
942
  *
943
- * const result = await langwatch.evaluation.run("my-evaluation-slug");
943
+ * const result = await langwatch.experiments.run("my-experiment-slug");
944
944
  * result.printSummary();
945
945
  * ```
946
946
  */
947
- run(slug: string, options?: RunEvaluationOptions): Promise<EvaluationRunResult>;
947
+ run(slug: string, options?: RunExperimentOptions): Promise<ExperimentRunResult>;
948
948
  /**
949
- * Run an evaluation and wait for completion using polling
949
+ * Run an experiment and wait for completion using polling
950
950
  */
951
951
  private runWithPolling;
952
952
  /**
953
- * Start an evaluation run
953
+ * Start an experiment run
954
954
  */
955
955
  private startRun;
956
956
  /**
@@ -962,7 +962,7 @@ declare class EvaluationFacade {
962
962
  */
963
963
  private buildResult;
964
964
  /**
965
- * Print a CI-friendly summary of the evaluation results
965
+ * Print a CI-friendly summary of the experiment results
966
966
  */
967
967
  private printSummary;
968
968
  private sleep;
@@ -973,25 +973,25 @@ declare class EvaluationFacade {
973
973
  }
974
974
 
975
975
  /**
976
- * Errors for the Evaluation API
976
+ * Errors for the Experiments API
977
977
  */
978
978
  /**
979
- * Base error for evaluation-related issues
979
+ * Base error for experiment-related issues
980
980
  */
981
- declare class EvaluationError extends Error {
981
+ declare class ExperimentError extends Error {
982
982
  constructor(message: string);
983
983
  }
984
984
  /**
985
985
  * Thrown when initialization fails
986
986
  */
987
- declare class EvaluationInitError extends EvaluationError {
987
+ declare class ExperimentInitError extends ExperimentError {
988
988
  readonly cause?: Error | undefined;
989
989
  constructor(message: string, cause?: Error | undefined);
990
990
  }
991
991
  /**
992
992
  * Thrown when API calls fail
993
993
  */
994
- declare class EvaluationApiError extends EvaluationError {
994
+ declare class ExperimentApiError extends ExperimentError {
995
995
  readonly statusCode?: number | undefined;
996
996
  readonly cause?: Error | undefined;
997
997
  constructor(message: string, statusCode?: number | undefined, cause?: Error | undefined);
@@ -999,7 +999,7 @@ declare class EvaluationApiError extends EvaluationError {
999
999
  /**
1000
1000
  * Thrown when target metadata conflicts
1001
1001
  */
1002
- declare class TargetMetadataConflictError extends EvaluationError {
1002
+ declare class TargetMetadataConflictError extends ExperimentError {
1003
1003
  readonly targetName: string;
1004
1004
  readonly existingMetadata: Record<string, unknown>;
1005
1005
  readonly newMetadata: Record<string, unknown>;
@@ -1008,12 +1008,164 @@ declare class TargetMetadataConflictError extends EvaluationError {
1008
1008
  /**
1009
1009
  * Thrown when an evaluator call fails
1010
1010
  */
1011
- declare class EvaluatorError extends EvaluationError {
1011
+ declare class EvaluatorError extends ExperimentError {
1012
1012
  readonly evaluatorSlug: string;
1013
1013
  readonly cause?: Error | undefined;
1014
1014
  constructor(evaluatorSlug: string, message: string, cause?: Error | undefined);
1015
1015
  }
1016
1016
 
1017
+ /**
1018
+ * Types for the Evaluations API (Online Evaluations / Guardrails)
1019
+ *
1020
+ * These types define the structure for running evaluators and guardrails
1021
+ * in real-time against LLM inputs/outputs.
1022
+ */
1023
+ /**
1024
+ * Status of an evaluation result
1025
+ */
1026
+ type EvaluationStatus = "processed" | "skipped" | "error";
1027
+ /**
1028
+ * Cost information from an evaluation
1029
+ */
1030
+ type EvaluationCost = {
1031
+ currency: string;
1032
+ amount: number;
1033
+ };
1034
+ /**
1035
+ * Result returned from running an evaluator
1036
+ */
1037
+ type EvaluationResult = {
1038
+ /** Status of the evaluation */
1039
+ status: EvaluationStatus;
1040
+ /** Whether the evaluation passed (for guardrails) */
1041
+ passed?: boolean;
1042
+ /** Numeric score (typically 0-1) */
1043
+ score?: number;
1044
+ /** Human-readable details about the result */
1045
+ details?: string;
1046
+ /** Label/category for the result */
1047
+ label?: string;
1048
+ /** Cost of running the evaluation */
1049
+ cost?: EvaluationCost;
1050
+ };
1051
+ /**
1052
+ * Options for the evaluate() method
1053
+ */
1054
+ type EvaluateOptions = {
1055
+ /** Data to pass to the evaluator (input, output, contexts, etc.) */
1056
+ data: Record<string, unknown>;
1057
+ /** Human-readable name for this evaluation */
1058
+ name?: string;
1059
+ /** Evaluator-specific settings */
1060
+ settings?: Record<string, unknown>;
1061
+ /** Whether to run as a guardrail (affects error handling) */
1062
+ asGuardrail?: boolean;
1063
+ };
1064
+
1065
+ /**
1066
+ * EvaluationsFacade - Entry point for the Evaluations API (Online Evaluations / Guardrails)
1067
+ *
1068
+ * Provides an API for running evaluators and guardrails in real-time against LLM inputs/outputs.
1069
+ *
1070
+ * @example
1071
+ * ```typescript
1072
+ * const langwatch = new LangWatch({ apiKey: "your-api-key" });
1073
+ *
1074
+ * // Run a guardrail
1075
+ * const guardrail = await langwatch.evaluations.evaluate("presidio/pii_detection", {
1076
+ * data: { input: userInput, output: generatedResponse },
1077
+ * name: "PII Detection",
1078
+ * asGuardrail: true,
1079
+ * settings: {},
1080
+ * });
1081
+ *
1082
+ * if (!guardrail.passed) {
1083
+ * return "I'm sorry, I can't do that.";
1084
+ * }
1085
+ * ```
1086
+ */
1087
+
1088
+ type EvaluationsFacadeConfig = {
1089
+ endpoint: string;
1090
+ apiKey: string;
1091
+ logger: Logger;
1092
+ };
1093
+ declare class EvaluationsFacade {
1094
+ #private;
1095
+ constructor(config: EvaluationsFacadeConfig);
1096
+ /**
1097
+ * Run an evaluator or guardrail against provided data
1098
+ *
1099
+ * Creates an OpenTelemetry span attached to the current trace context,
1100
+ * calls the LangWatch evaluation API, and returns the result.
1101
+ *
1102
+ * @param slug - The evaluator slug (e.g., "presidio/pii_detection", "langevals/llm_boolean")
1103
+ * @param options - Evaluation options including data, name, settings, and asGuardrail flag
1104
+ * @returns The evaluation result with status, passed, score, details, label, and cost
1105
+ *
1106
+ * @example
1107
+ * ```typescript
1108
+ * // Run as a guardrail (synchronous evaluation that can block responses)
1109
+ * const guardrail = await langwatch.evaluations.evaluate("presidio/pii_detection", {
1110
+ * data: { input: userInput, output: generatedResponse },
1111
+ * name: "PII Detection Guardrail",
1112
+ * asGuardrail: true,
1113
+ * });
1114
+ *
1115
+ * if (!guardrail.passed) {
1116
+ * console.log("PII detected:", guardrail.details);
1117
+ * return "Sorry, I cannot process that request.";
1118
+ * }
1119
+ * ```
1120
+ *
1121
+ * @example
1122
+ * ```typescript
1123
+ * // Run as an online evaluation (async scoring for monitoring)
1124
+ * const result = await langwatch.evaluations.evaluate("langevals/llm_boolean", {
1125
+ * data: { input: question, output: response },
1126
+ * name: "Quality Check",
1127
+ * settings: { prompt: "Check if the response answers the question." },
1128
+ * });
1129
+ *
1130
+ * console.log("Score:", result.score);
1131
+ * console.log("Details:", result.details);
1132
+ * ```
1133
+ */
1134
+ evaluate: (slug: string, options: EvaluateOptions) => Promise<EvaluationResult>;
1135
+ }
1136
+
1137
+ /**
1138
+ * Error classes for the Evaluations API
1139
+ */
1140
+ /**
1141
+ * Base error for evaluation operations
1142
+ */
1143
+ declare class EvaluationError extends Error {
1144
+ constructor(message: string);
1145
+ }
1146
+ /**
1147
+ * Error when an evaluator call fails
1148
+ */
1149
+ declare class EvaluatorCallError extends EvaluationError {
1150
+ readonly evaluatorSlug: string;
1151
+ readonly statusCode?: number;
1152
+ constructor(evaluatorSlug: string, message: string, statusCode?: number);
1153
+ }
1154
+ /**
1155
+ * Error when evaluator is not found
1156
+ */
1157
+ declare class EvaluatorNotFoundError extends EvaluationError {
1158
+ readonly evaluatorSlug: string;
1159
+ constructor(evaluatorSlug: string);
1160
+ }
1161
+ /**
1162
+ * Error from the evaluations API
1163
+ */
1164
+ declare class EvaluationsApiError extends EvaluationError {
1165
+ readonly statusCode: number;
1166
+ constructor(message: string, statusCode: number);
1167
+ }
1168
+
1017
1169
  interface GetTraceParams {
1018
1170
  includeSpans?: boolean;
1019
1171
  }
@@ -1037,15 +1189,76 @@ declare class LangWatch {
1037
1189
  private readonly config;
1038
1190
  readonly prompts: PromptsFacade;
1039
1191
  readonly traces: TracesFacade;
1040
- readonly evaluation: EvaluationFacade;
1041
1192
  readonly datasets: DatasetsFacade;
1193
+ /**
1194
+ * Run experiments on LangWatch platform or via SDK.
1195
+ *
1196
+ * Platform experiments (CI/CD):
1197
+ * ```typescript
1198
+ * const result = await langwatch.experiments.run("my-experiment-slug");
1199
+ * result.printSummary();
1200
+ * ```
1201
+ *
1202
+ * SDK-defined experiments:
1203
+ * ```typescript
1204
+ * const experiment = await langwatch.experiments.init("my-experiment");
1205
+ * // ... run evaluators using experiment.evaluate()
1206
+ * ```
1207
+ */
1208
+ readonly experiments: ExperimentsFacade;
1209
+ /**
1210
+ * Run evaluators and guardrails in real-time (Online Evaluations).
1211
+ *
1212
+ * @example
1213
+ * ```typescript
1214
+ * // Run a guardrail
1215
+ * const guardrail = await langwatch.evaluations.evaluate("presidio/pii_detection", {
1216
+ * data: { input: userInput, output: generatedResponse },
1217
+ * name: "PII Detection",
1218
+ * asGuardrail: true,
1219
+ * });
1220
+ *
1221
+ * if (!guardrail.passed) {
1222
+ * return "I'm sorry, I can't do that.";
1223
+ * }
1224
+ * ```
1225
+ */
1226
+ readonly evaluations: EvaluationsFacade;
1042
1227
  constructor(options?: LangWatchConstructorOptions);
1043
1228
  get apiClient(): LangwatchApiClient;
1044
1229
  }
1045
1230
 
1231
+ type EvaluatorResponse = NonNullable<paths["/api/evaluators"]["get"]["responses"]["200"]["content"]["application/json"]>[number];
1232
+ type EvaluatorField = EvaluatorResponse["fields"][number];
1233
+
1234
+ /**
1235
+ * Service for retrieving evaluator resources via the LangWatch API.
1236
+ *
1237
+ * Provides read-only access to project evaluators with computed fields.
1238
+ */
1239
+ declare class EvaluatorsApiService {
1240
+ private readonly apiClient;
1241
+ constructor(config?: Pick<InternalConfig, "langwatchApiClient">);
1242
+ private handleApiError;
1243
+ /**
1244
+ * Fetches all evaluators for the project.
1245
+ */
1246
+ getAll(): Promise<EvaluatorResponse[]>;
1247
+ /**
1248
+ * Fetches a single evaluator by its ID or slug.
1249
+ */
1250
+ get(idOrSlug: string): Promise<EvaluatorResponse>;
1251
+ }
1252
+
1253
+ declare class EvaluatorsApiError extends Error {
1254
+ readonly operation: string;
1255
+ readonly originalError?: unknown | undefined;
1256
+ constructor(message: string, operation: string, originalError?: unknown | undefined);
1257
+ }
1258
+
1046
1259
  declare const logger: {
1047
1260
  ConsoleLogger: typeof ConsoleLogger;
1048
1261
  NoOpLogger: typeof NoOpLogger;
1049
1262
  };
1050
1263
 
1051
- export { type EvaluateOptions, Evaluation, EvaluationApiError, EvaluationError, EvaluationFacade, EvaluationInitError, type EvaluationInitOptions, type EvaluationResult, type EvaluationStatus, EvaluatorError, FetchPolicy, type GetPromptOptions, LangWatch, type LogOptions, type RunCallback, type RunContext, type RunOptions, type TargetInfo, type TargetMetadata, TargetMetadataConflictError, type TargetType, logger };
1264
+ export { type EvaluateOptions, type EvaluationCost, EvaluationError, type EvaluationResult, type EvaluationStatus, EvaluationsApiError, EvaluationsFacade, EvaluatorCallError, EvaluatorError, type EvaluatorField, EvaluatorNotFoundError, type EvaluatorResponse, EvaluatorsApiError, EvaluatorsApiService, Experiment, ExperimentApiError, ExperimentError, type EvaluateOptions$1 as ExperimentEvaluateOptions, type EvaluationResult$1 as ExperimentEvaluationResult, type EvaluationStatus$1 as ExperimentEvaluationStatus, ExperimentInitError, type ExperimentInitOptions, ExperimentsFacade, FetchPolicy, type GetPromptOptions, LangWatch, type LogOptions, type RunCallback, type RunContext, type RunOptions, type TargetInfo, type TargetMetadata, TargetMetadataConflictError, type TargetType, logger };