langwatch 0.11.0 → 0.12.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. package/dist/{add-XV5SUAXF.mjs → add-2UHFYNUA.mjs} +4 -4
  2. package/dist/{add-UB5U3K3M.js → add-LUETMKBD.js} +7 -7
  3. package/dist/{add-UB5U3K3M.js.map → add-LUETMKBD.js.map} +1 -1
  4. package/dist/{chunk-D4H6PR6H.js → chunk-6SSCBYJM.js} +9 -9
  5. package/dist/{chunk-D4H6PR6H.js.map → chunk-6SSCBYJM.js.map} +1 -1
  6. package/dist/{chunk-JQYW7RY7.js → chunk-BQRUUTN3.js} +14 -14
  7. package/dist/{chunk-JQYW7RY7.js.map → chunk-BQRUUTN3.js.map} +1 -1
  8. package/dist/{chunk-N7PJJMU2.js → chunk-C4XUWCQR.js} +2 -2
  9. package/dist/{chunk-N7PJJMU2.js.map → chunk-C4XUWCQR.js.map} +1 -1
  10. package/dist/{chunk-WZ7FYUHN.mjs → chunk-OTID7S7K.mjs} +2 -2
  11. package/dist/{chunk-LKE6DMUP.mjs → chunk-TB5KB737.mjs} +2 -2
  12. package/dist/{chunk-LKE6DMUP.mjs.map → chunk-TB5KB737.mjs.map} +1 -1
  13. package/dist/{chunk-556ZFJMK.mjs → chunk-WCNDT5SD.mjs} +2 -2
  14. package/dist/cli/index.js +6 -6
  15. package/dist/cli/index.mjs +6 -6
  16. package/dist/{implementation-CPxv2BdW.d.ts → implementation-Bnc8Aymq.d.ts} +1 -1
  17. package/dist/{implementation-CVrmD0bz.d.mts → implementation-Ck58nRkT.d.mts} +1 -1
  18. package/dist/index.d.mts +129 -5
  19. package/dist/index.d.ts +129 -5
  20. package/dist/index.js +286 -12
  21. package/dist/index.js.map +1 -1
  22. package/dist/index.mjs +278 -4
  23. package/dist/index.mjs.map +1 -1
  24. package/dist/{list-DUNP46AD.js → list-7U3M64GY.js} +7 -7
  25. package/dist/{list-DUNP46AD.js.map → list-7U3M64GY.js.map} +1 -1
  26. package/dist/{list-T4QS6CT2.mjs → list-WV5LA6LD.mjs} +4 -4
  27. package/dist/{login-3H27NIOD.js → login-B7DKMN7P.js} +3 -3
  28. package/dist/{login-3H27NIOD.js.map → login-B7DKMN7P.js.map} +1 -1
  29. package/dist/{login-T2ET7TKH.mjs → login-QKRT6PXA.mjs} +2 -2
  30. package/dist/observability-sdk/index.d.mts +3 -3
  31. package/dist/observability-sdk/index.d.ts +3 -3
  32. package/dist/observability-sdk/index.js +3 -3
  33. package/dist/observability-sdk/index.mjs +2 -2
  34. package/dist/observability-sdk/instrumentation/langchain/index.d.mts +1 -1
  35. package/dist/observability-sdk/instrumentation/langchain/index.d.ts +1 -1
  36. package/dist/observability-sdk/setup/node/index.js +10 -10
  37. package/dist/observability-sdk/setup/node/index.mjs +2 -2
  38. package/dist/{remove-F5RM4775.mjs → remove-2OGMXSTR.mjs} +4 -4
  39. package/dist/{remove-V4JL5Z4U.js → remove-A4DKCN7A.js} +6 -6
  40. package/dist/{remove-V4JL5Z4U.js.map → remove-A4DKCN7A.js.map} +1 -1
  41. package/dist/{sync-VGWOLOLJ.mjs → sync-TNVCKWTC.mjs} +4 -4
  42. package/dist/{sync-DIOKWE6R.js → sync-WRZXIBZS.js} +6 -6
  43. package/dist/{sync-DIOKWE6R.js.map → sync-WRZXIBZS.js.map} +1 -1
  44. package/dist/{types-Kts5RGLY.d.mts → types-5h2Im4pl.d.mts} +162 -0
  45. package/dist/{types-usU5mTCX.d.ts → types-fo-Ij9pl.d.ts} +162 -0
  46. package/package.json +1 -1
  47. /package/dist/{add-XV5SUAXF.mjs.map → add-2UHFYNUA.mjs.map} +0 -0
  48. /package/dist/{chunk-WZ7FYUHN.mjs.map → chunk-OTID7S7K.mjs.map} +0 -0
  49. /package/dist/{chunk-556ZFJMK.mjs.map → chunk-WCNDT5SD.mjs.map} +0 -0
  50. /package/dist/{list-T4QS6CT2.mjs.map → list-WV5LA6LD.mjs.map} +0 -0
  51. /package/dist/{login-T2ET7TKH.mjs.map → login-QKRT6PXA.mjs.map} +0 -0
  52. /package/dist/{remove-F5RM4775.mjs.map → remove-2OGMXSTR.mjs.map} +0 -0
  53. /package/dist/{sync-VGWOLOLJ.mjs.map → sync-TNVCKWTC.mjs.map} +0 -0
package/dist/index.d.mts CHANGED
@@ -1,6 +1,6 @@
1
1
  import { L as Logger, C as ConsoleLogger, N as NoOpLogger } from './index-D7rKIGrO.mjs';
2
- export { F as FilterableBatchSpanProcessor, L as LangWatchExporter, S as SpanProcessingExcludeRule, g as getLangWatchLogger, d as getLangWatchTracer } from './implementation-CVrmD0bz.mjs';
3
- import { p as paths, P as PromptResponse, g as CreatePromptBody, U as UpdatePromptBody, h as PromptData, i as Prompt, F as FetchPolicy, L as LangWatchSpan } from './types-Kts5RGLY.mjs';
2
+ export { F as FilterableBatchSpanProcessor, L as LangWatchExporter, S as SpanProcessingExcludeRule, g as getLangWatchLogger, d as getLangWatchTracer } from './implementation-Ck58nRkT.mjs';
3
+ import { p as paths, P as PromptResponse, g as CreatePromptBody, U as UpdatePromptBody, h as PromptData, i as Prompt, F as FetchPolicy, L as LangWatchSpan } from './types-5h2Im4pl.mjs';
4
4
  import openApiCreateClient from 'openapi-fetch';
5
5
  import { z } from 'zod';
6
6
  export { l as attributes } from './types-DRiQaKFG.mjs';
@@ -810,10 +810,87 @@ declare class Evaluation {
810
810
  private getSpanIdFromContext;
811
811
  }
812
812
 
813
+ /**
814
+ * Types for platform-configured evaluations (Evaluations V3)
815
+ */
816
+ /**
817
+ * Summary of a completed evaluation run
818
+ */
819
+ type EvaluationRunSummary = {
820
+ runId?: string;
821
+ totalCells?: number;
822
+ completedCells?: number;
823
+ failedCells?: number;
824
+ duration?: number;
825
+ runUrl?: string;
826
+ timestamps?: {
827
+ startedAt: number;
828
+ finishedAt?: number;
829
+ stoppedAt?: number;
830
+ };
831
+ targets?: Array<{
832
+ targetId: string;
833
+ name: string;
834
+ passed: number;
835
+ failed: number;
836
+ avgLatency: number;
837
+ totalCost: number;
838
+ }>;
839
+ evaluators?: Array<{
840
+ evaluatorId: string;
841
+ name: string;
842
+ passed: number;
843
+ failed: number;
844
+ passRate: number;
845
+ avgScore?: number;
846
+ }>;
847
+ totalPassed?: number;
848
+ totalFailed?: number;
849
+ passRate?: number;
850
+ totalCost?: number;
851
+ };
852
+ /**
853
+ * Options for running a platform evaluation
854
+ */
855
+ type RunEvaluationOptions = {
856
+ /**
857
+ * Polling interval in milliseconds (default: 2000)
858
+ */
859
+ pollInterval?: number;
860
+ /**
861
+ * Maximum time to wait for completion in milliseconds (default: 600000 = 10 minutes)
862
+ */
863
+ timeout?: number;
864
+ /**
865
+ * Callback for progress updates
866
+ */
867
+ onProgress?: (progress: number, total: number) => void;
868
+ };
869
+ /**
870
+ * Final result of a platform evaluation run
871
+ */
872
+ type EvaluationRunResult = {
873
+ runId: string;
874
+ status: "completed" | "failed" | "stopped";
875
+ passed: number;
876
+ failed: number;
877
+ passRate: number;
878
+ duration: number;
879
+ runUrl: string;
880
+ summary: EvaluationRunSummary;
881
+ /**
882
+ * Print a CI-friendly summary of the results
883
+ * @param exitOnFailure - If true (default), calls process.exit(1) when there are failures
884
+ */
885
+ printSummary: (exitOnFailure?: boolean) => void;
886
+ };
887
+
813
888
  /**
814
889
  * EvaluationFacade - Entry point for the evaluation API
815
890
  *
816
- * Provides the `init()` method to create evaluation sessions.
891
+ * Provides:
892
+ * - `init()` method to create evaluation sessions (SDK-defined evaluations)
893
+ * - `run()` method to execute platform-configured evaluations (Evaluations V3)
817
894
  */
818
895
 
819
896
  type EvaluationFacadeConfig = {
@@ -823,13 +900,13 @@ type EvaluationFacadeConfig = {
823
900
  logger: Logger;
824
901
  };
825
902
  /**
826
- * Facade for creating evaluation sessions
903
+ * Facade for creating evaluation sessions and running platform-configured evaluations
827
904
  */
828
905
  declare class EvaluationFacade {
829
906
  private readonly config;
830
907
  constructor(config: EvaluationFacadeConfig);
831
908
  /**
832
- * Initialize a new evaluation session
909
+ * Initialize a new evaluation session (SDK-defined)
833
910
  *
834
911
  * @param name - Name of the experiment (used as slug)
835
912
  * @param options - Optional configuration
@@ -846,6 +923,53 @@ declare class EvaluationFacade {
846
923
  * ```
847
924
  */
848
925
  init(name: string, options?: EvaluationInitOptions): Promise<Evaluation>;
926
+ /**
927
+ * Run a platform-configured evaluation (Evaluations V3)
928
+ *
929
+ * This runs an evaluation that was configured in the LangWatch platform.
930
+ * The method automatically prints a summary and exits with code 1 on failure
931
+ * (unless `exitOnFailure: false` is passed).
932
+ *
933
+ * @param slug - The slug of the evaluation (found in the evaluation URL)
934
+ * @param options - Optional configuration
935
+ * @returns The evaluation results including pass rate and summary
936
+ *
937
+ * @example
938
+ * ```typescript
939
+ * import { LangWatch } from "langwatch";
940
+ *
941
+ * const langwatch = new LangWatch();
942
+ *
943
+ * const result = await langwatch.evaluation.run("my-evaluation-slug");
944
+ * result.printSummary();
945
+ * ```
946
+ */
947
+ run(slug: string, options?: RunEvaluationOptions): Promise<EvaluationRunResult>;
948
+ /**
949
+ * Run an evaluation and wait for completion using polling
950
+ */
951
+ private runWithPolling;
952
+ /**
953
+ * Start an evaluation run
954
+ */
955
+ private startRun;
956
+ /**
957
+ * Get the status of a run
958
+ */
959
+ private getRunStatus;
960
+ /**
961
+ * Build the result object from API response
962
+ */
963
+ private buildResult;
964
+ /**
965
+ * Print a CI-friendly summary of the evaluation results
966
+ */
967
+ private printSummary;
968
+ private sleep;
969
+ /**
970
+ * Replace the domain of a URL with a new base URL, preserving the path
971
+ */
972
+ private replaceUrlDomain;
849
973
  }
850
974
 
851
975
  /**
package/dist/index.d.ts CHANGED
@@ -1,6 +1,6 @@
1
1
  import { L as Logger, C as ConsoleLogger, N as NoOpLogger } from './index-D7rKIGrO.js';
2
- export { F as FilterableBatchSpanProcessor, L as LangWatchExporter, S as SpanProcessingExcludeRule, g as getLangWatchLogger, d as getLangWatchTracer } from './implementation-CPxv2BdW.js';
3
- import { p as paths, P as PromptResponse, g as CreatePromptBody, U as UpdatePromptBody, h as PromptData, i as Prompt, F as FetchPolicy, L as LangWatchSpan } from './types-usU5mTCX.js';
2
+ export { F as FilterableBatchSpanProcessor, L as LangWatchExporter, S as SpanProcessingExcludeRule, g as getLangWatchLogger, d as getLangWatchTracer } from './implementation-Bnc8Aymq.js';
3
+ import { p as paths, P as PromptResponse, g as CreatePromptBody, U as UpdatePromptBody, h as PromptData, i as Prompt, F as FetchPolicy, L as LangWatchSpan } from './types-fo-Ij9pl.js';
4
4
  import openApiCreateClient from 'openapi-fetch';
5
5
  import { z } from 'zod';
6
6
  export { l as attributes } from './types-DRiQaKFG.js';
@@ -810,10 +810,87 @@ declare class Evaluation {
810
810
  private getSpanIdFromContext;
811
811
  }
812
812
 
813
+ /**
814
+ * Types for platform-configured evaluations (Evaluations V3)
815
+ */
816
+ /**
817
+ * Summary of a completed evaluation run
818
+ */
819
+ type EvaluationRunSummary = {
820
+ runId?: string;
821
+ totalCells?: number;
822
+ completedCells?: number;
823
+ failedCells?: number;
824
+ duration?: number;
825
+ runUrl?: string;
826
+ timestamps?: {
827
+ startedAt: number;
828
+ finishedAt?: number;
829
+ stoppedAt?: number;
830
+ };
831
+ targets?: Array<{
832
+ targetId: string;
833
+ name: string;
834
+ passed: number;
835
+ failed: number;
836
+ avgLatency: number;
837
+ totalCost: number;
838
+ }>;
839
+ evaluators?: Array<{
840
+ evaluatorId: string;
841
+ name: string;
842
+ passed: number;
843
+ failed: number;
844
+ passRate: number;
845
+ avgScore?: number;
846
+ }>;
847
+ totalPassed?: number;
848
+ totalFailed?: number;
849
+ passRate?: number;
850
+ totalCost?: number;
851
+ };
852
+ /**
853
+ * Options for running a platform evaluation
854
+ */
855
+ type RunEvaluationOptions = {
856
+ /**
857
+ * Polling interval in milliseconds (default: 2000)
858
+ */
859
+ pollInterval?: number;
860
+ /**
861
+ * Maximum time to wait for completion in milliseconds (default: 600000 = 10 minutes)
862
+ */
863
+ timeout?: number;
864
+ /**
865
+ * Callback for progress updates
866
+ */
867
+ onProgress?: (progress: number, total: number) => void;
868
+ };
869
+ /**
870
+ * Final result of a platform evaluation run
871
+ */
872
+ type EvaluationRunResult = {
873
+ runId: string;
874
+ status: "completed" | "failed" | "stopped";
875
+ passed: number;
876
+ failed: number;
877
+ passRate: number;
878
+ duration: number;
879
+ runUrl: string;
880
+ summary: EvaluationRunSummary;
881
+ /**
882
+ * Print a CI-friendly summary of the results
883
+ * @param exitOnFailure - If true (default), calls process.exit(1) when there are failures
884
+ */
885
+ printSummary: (exitOnFailure?: boolean) => void;
886
+ };
887
+
813
888
  /**
814
889
  * EvaluationFacade - Entry point for the evaluation API
815
890
  *
816
- * Provides the `init()` method to create evaluation sessions.
891
+ * Provides:
892
+ * - `init()` method to create evaluation sessions (SDK-defined evaluations)
893
+ * - `run()` method to execute platform-configured evaluations (Evaluations V3)
817
894
  */
818
895
 
819
896
  type EvaluationFacadeConfig = {
@@ -823,13 +900,13 @@ type EvaluationFacadeConfig = {
823
900
  logger: Logger;
824
901
  };
825
902
  /**
826
- * Facade for creating evaluation sessions
903
+ * Facade for creating evaluation sessions and running platform-configured evaluations
827
904
  */
828
905
  declare class EvaluationFacade {
829
906
  private readonly config;
830
907
  constructor(config: EvaluationFacadeConfig);
831
908
  /**
832
- * Initialize a new evaluation session
909
+ * Initialize a new evaluation session (SDK-defined)
833
910
  *
834
911
  * @param name - Name of the experiment (used as slug)
835
912
  * @param options - Optional configuration
@@ -846,6 +923,53 @@ declare class EvaluationFacade {
846
923
  * ```
847
924
  */
848
925
  init(name: string, options?: EvaluationInitOptions): Promise<Evaluation>;
926
+ /**
927
+ * Run a platform-configured evaluation (Evaluations V3)
928
+ *
929
+ * This runs an evaluation that was configured in the LangWatch platform.
930
+ * The method automatically prints a summary and exits with code 1 on failure
931
+ * (unless `exitOnFailure: false` is passed).
932
+ *
933
+ * @param slug - The slug of the evaluation (found in the evaluation URL)
934
+ * @param options - Optional configuration
935
+ * @returns The evaluation results including pass rate and summary
936
+ *
937
+ * @example
938
+ * ```typescript
939
+ * import { LangWatch } from "langwatch";
940
+ *
941
+ * const langwatch = new LangWatch();
942
+ *
943
+ * const result = await langwatch.evaluation.run("my-evaluation-slug");
944
+ * result.printSummary();
945
+ * ```
946
+ */
947
+ run(slug: string, options?: RunEvaluationOptions): Promise<EvaluationRunResult>;
948
+ /**
949
+ * Run an evaluation and wait for completion using polling
950
+ */
951
+ private runWithPolling;
952
+ /**
953
+ * Start an evaluation run
954
+ */
955
+ private startRun;
956
+ /**
957
+ * Get the status of a run
958
+ */
959
+ private getRunStatus;
960
+ /**
961
+ * Build the result object from API response
962
+ */
963
+ private buildResult;
964
+ /**
965
+ * Print a CI-friendly summary of the evaluation results
966
+ */
967
+ private printSummary;
968
+ private sleep;
969
+ /**
970
+ * Replace the domain of a URL with a new base URL, preserving the path
971
+ */
972
+ private replaceUrlDomain;
849
973
  }
850
974
 
851
975
  /**
package/dist/index.js CHANGED
@@ -5,7 +5,7 @@
5
5
 
6
6
 
7
7
 
8
- var _chunkD4H6PR6Hjs = require('./chunk-D4H6PR6H.js');
8
+ var _chunk6SSCBYJMjs = require('./chunk-6SSCBYJM.js');
9
9
 
10
10
 
11
11
  var _chunkASTAIRXGjs = require('./chunk-ASTAIRXG.js');
@@ -16,12 +16,12 @@ var _chunkONXIZKC6js = require('./chunk-ONXIZKC6.js');
16
16
 
17
17
 
18
18
 
19
- var _chunkJQYW7RY7js = require('./chunk-JQYW7RY7.js');
19
+ var _chunkBQRUUTN3js = require('./chunk-BQRUUTN3.js');
20
20
 
21
21
 
22
22
 
23
23
 
24
- var _chunkN7PJJMU2js = require('./chunk-N7PJJMU2.js');
24
+ var _chunkC4XUWCQRjs = require('./chunk-C4XUWCQR.js');
25
25
 
26
26
 
27
27
 
@@ -943,13 +943,53 @@ var Evaluation = class _Evaluation {
943
943
  }
944
944
  };
945
945
 
946
+ // src/client-sdk/services/evaluation/platformErrors.ts
947
+ var EvaluationsError = class extends Error {
948
+ constructor(message) {
949
+ super(message);
950
+ this.name = "EvaluationsError";
951
+ }
952
+ };
953
+ var EvaluationNotFoundError = class extends EvaluationsError {
954
+ constructor(slug) {
955
+ super(`Evaluation not found: ${slug}`);
956
+ this.name = "EvaluationNotFoundError";
957
+ }
958
+ };
959
+ var EvaluationTimeoutError = class extends EvaluationsError {
960
+ constructor(runId, progress, total) {
961
+ super(`Evaluation run timed out: ${runId} (${progress}/${total} completed)`);
962
+ this.name = "EvaluationTimeoutError";
963
+ this.runId = runId;
964
+ this.progress = progress;
965
+ this.total = total;
966
+ }
967
+ };
968
+ var EvaluationRunFailedError = class extends EvaluationsError {
969
+ constructor(runId, errorMessage) {
970
+ super(`Evaluation run failed: ${errorMessage}`);
971
+ this.name = "EvaluationRunFailedError";
972
+ this.runId = runId;
973
+ this.errorMessage = errorMessage;
974
+ }
975
+ };
976
+ var EvaluationsApiError = class extends EvaluationsError {
977
+ constructor(message, statusCode) {
978
+ super(message);
979
+ this.name = "EvaluationsApiError";
980
+ this.statusCode = statusCode;
981
+ }
982
+ };
983
+
946
984
  // src/client-sdk/services/evaluation/evaluation.facade.ts
985
+ var DEFAULT_POLL_INTERVAL = 2e3;
986
+ var DEFAULT_TIMEOUT = 6e5;
947
987
  var EvaluationFacade = class {
948
988
  constructor(config) {
949
989
  this.config = config;
950
990
  }
951
991
  /**
952
- * Initialize a new evaluation session
992
+ * Initialize a new evaluation session (SDK-defined)
953
993
  *
954
994
  * @param name - Name of the experiment (used as slug)
955
995
  * @param options - Optional configuration
@@ -973,6 +1013,240 @@ var EvaluationFacade = class {
973
1013
  logger: this.config.logger
974
1014
  }, options));
975
1015
  }
1016
+ /**
1017
+ * Run a platform-configured evaluation (Evaluations V3)
1018
+ *
1019
+ * This runs an evaluation that was configured in the LangWatch platform.
1020
+ * The method automatically prints a summary and exits with code 1 on failure
1021
+ * (unless `exitOnFailure: false` is passed).
1022
+ *
1023
+ * @param slug - The slug of the evaluation (found in the evaluation URL)
1024
+ * @param options - Optional configuration
1025
+ * @returns The evaluation results including pass rate and summary
1026
+ *
1027
+ * @example
1028
+ * ```typescript
1029
+ * import { LangWatch } from "langwatch";
1030
+ *
1031
+ * const langwatch = new LangWatch();
1032
+ *
1033
+ * const result = await langwatch.evaluation.run("my-evaluation-slug");
1034
+ * result.printSummary();
1035
+ * ```
1036
+ */
1037
+ async run(slug, options) {
1038
+ this.config.logger.info(`Running platform evaluation: ${slug}`);
1039
+ const result = await this.runWithPolling(slug, options);
1040
+ return result;
1041
+ }
1042
+ /**
1043
+ * Run an evaluation and wait for completion using polling
1044
+ */
1045
+ async runWithPolling(slug, options = {}) {
1046
+ var _a, _b, _c, _d, _e, _f, _g;
1047
+ const pollInterval = (_a = options.pollInterval) != null ? _a : DEFAULT_POLL_INTERVAL;
1048
+ const timeout = (_b = options.timeout) != null ? _b : DEFAULT_TIMEOUT;
1049
+ const startResponse = await this.startRun(slug);
1050
+ const { runId } = startResponse;
1051
+ const apiRunUrl = (_c = startResponse.runUrl) != null ? _c : "";
1052
+ const runUrl = apiRunUrl ? this.replaceUrlDomain(apiRunUrl, this.config.endpoint) : "";
1053
+ console.log(`Started evaluation run: ${runId}`);
1054
+ if (runUrl) {
1055
+ console.log(`Follow live: ${runUrl}`);
1056
+ }
1057
+ const total = startResponse.total;
1058
+ let lastProgress = 0;
1059
+ if (total > 0) {
1060
+ process.stdout.write(`Progress: 0/${total} (0%)`);
1061
+ }
1062
+ (_d = options.onProgress) == null ? void 0 : _d.call(options, 0, total);
1063
+ const startTime = Date.now();
1064
+ while (true) {
1065
+ if (Date.now() - startTime > timeout) {
1066
+ console.log();
1067
+ const finalStatus = await this.getRunStatus(runId);
1068
+ throw new EvaluationTimeoutError(runId, finalStatus.progress, finalStatus.total);
1069
+ }
1070
+ await this.sleep(pollInterval);
1071
+ const status = await this.getRunStatus(runId);
1072
+ const progress = status.progress;
1073
+ if (progress !== lastProgress && status.total > 0) {
1074
+ const percentage = Math.round(progress / status.total * 100);
1075
+ process.stdout.write(`\rProgress: ${progress}/${status.total} (${percentage}%)`);
1076
+ lastProgress = progress;
1077
+ }
1078
+ (_e = options.onProgress) == null ? void 0 : _e.call(options, status.progress, status.total);
1079
+ if (status.status === "completed") {
1080
+ console.log();
1081
+ const summary = status.summary;
1082
+ return this.buildResult(runId, "completed", summary, runUrl != null ? runUrl : "");
1083
+ }
1084
+ if (status.status === "failed") {
1085
+ console.log();
1086
+ throw new EvaluationRunFailedError(runId, (_f = status.error) != null ? _f : "Unknown error");
1087
+ }
1088
+ if (status.status === "stopped") {
1089
+ console.log();
1090
+ return this.buildResult(runId, "stopped", (_g = status.summary) != null ? _g : {
1091
+ runId,
1092
+ totalCells: status.total,
1093
+ completedCells: status.progress,
1094
+ failedCells: 0,
1095
+ duration: Date.now() - startTime
1096
+ }, runUrl != null ? runUrl : "");
1097
+ }
1098
+ }
1099
+ }
1100
+ /**
1101
+ * Start an evaluation run
1102
+ */
1103
+ async startRun(slug) {
1104
+ const response = await this.config.langwatchApiClient.POST(
1105
+ "/api/evaluations/v3/{slug}/run",
1106
+ {
1107
+ params: {
1108
+ path: { slug }
1109
+ }
1110
+ }
1111
+ );
1112
+ if (response.error) {
1113
+ const status = response.response.status;
1114
+ if (status === 404) {
1115
+ throw new EvaluationNotFoundError(slug);
1116
+ }
1117
+ if (status === 401) {
1118
+ throw new EvaluationsApiError("Unauthorized - check your API key", 401);
1119
+ }
1120
+ const errorMessage = "error" in response.error ? response.error.error : `Failed to start evaluation: ${slug}`;
1121
+ throw new EvaluationsApiError(errorMessage != null ? errorMessage : `HTTP ${status}`, status);
1122
+ }
1123
+ return response.data;
1124
+ }
1125
+ /**
1126
+ * Get the status of a run
1127
+ */
1128
+ async getRunStatus(runId) {
1129
+ const response = await this.config.langwatchApiClient.GET(
1130
+ "/api/evaluations/v3/runs/{runId}",
1131
+ {
1132
+ params: {
1133
+ path: { runId }
1134
+ }
1135
+ }
1136
+ );
1137
+ if (response.error) {
1138
+ const status = response.response.status;
1139
+ if (status === 404) {
1140
+ throw new EvaluationsApiError(`Run not found: ${runId}`, 404);
1141
+ }
1142
+ if (status === 401) {
1143
+ throw new EvaluationsApiError("Unauthorized - check your API key", 401);
1144
+ }
1145
+ const errorMessage = "error" in response.error ? response.error.error : `Failed to get run status: ${runId}`;
1146
+ throw new EvaluationsApiError(errorMessage != null ? errorMessage : `HTTP ${status}`, status);
1147
+ }
1148
+ return response.data;
1149
+ }
1150
+ /**
1151
+ * Build the result object from API response
1152
+ */
1153
+ buildResult(runId, status, summary, runUrl) {
1154
+ var _a, _b, _c, _d, _e, _f, _g;
1155
+ const totalCells = (_a = summary.totalCells) != null ? _a : 0;
1156
+ const completedCells = (_b = summary.completedCells) != null ? _b : 0;
1157
+ const failedCells = (_c = summary.failedCells) != null ? _c : 0;
1158
+ const duration = (_d = summary.duration) != null ? _d : 0;
1159
+ const totalPassed = (_e = summary.totalPassed) != null ? _e : completedCells - failedCells;
1160
+ const totalFailed = (_f = summary.totalFailed) != null ? _f : failedCells;
1161
+ const passRate = (_g = summary.passRate) != null ? _g : completedCells > 0 ? totalPassed / completedCells * 100 : 0;
1162
+ return {
1163
+ runId,
1164
+ status,
1165
+ passed: totalPassed,
1166
+ failed: totalFailed,
1167
+ passRate,
1168
+ duration,
1169
+ runUrl,
1170
+ // Always use the endpoint-based URL we constructed
1171
+ summary,
1172
+ printSummary: (exitOnFailure = true) => {
1173
+ var _a2;
1174
+ this.printSummary({
1175
+ runId,
1176
+ status,
1177
+ passed: totalPassed,
1178
+ failed: totalFailed,
1179
+ passRate,
1180
+ duration,
1181
+ runUrl: (_a2 = summary.runUrl) != null ? _a2 : runUrl,
1182
+ summary
1183
+ });
1184
+ if (exitOnFailure && totalFailed > 0) {
1185
+ process.exit(1);
1186
+ }
1187
+ }
1188
+ };
1189
+ }
1190
+ /**
1191
+ * Print a CI-friendly summary of the evaluation results
1192
+ */
1193
+ printSummary(result) {
1194
+ const { runId, status, passed, failed, passRate, duration, runUrl, summary } = result;
1195
+ console.log("\n" + "\u2550".repeat(60));
1196
+ console.log(" EVALUATION RESULTS");
1197
+ console.log("\u2550".repeat(60));
1198
+ console.log(` Run ID: ${runId}`);
1199
+ console.log(` Status: ${status.toUpperCase()}`);
1200
+ console.log(` Duration: ${(duration / 1e3).toFixed(1)}s`);
1201
+ console.log("\u2500".repeat(60));
1202
+ console.log(` Passed: ${passed}`);
1203
+ console.log(` Failed: ${failed}`);
1204
+ console.log(` Pass Rate: ${passRate.toFixed(1)}%`);
1205
+ if (summary.targets && summary.targets.length > 0) {
1206
+ console.log("\u2500".repeat(60));
1207
+ console.log(" TARGETS:");
1208
+ for (const target of summary.targets) {
1209
+ console.log(` ${target.name}: ${target.passed} passed, ${target.failed} failed`);
1210
+ if (target.avgLatency) {
1211
+ console.log(` Avg latency: ${target.avgLatency.toFixed(0)}ms`);
1212
+ }
1213
+ if (target.totalCost) {
1214
+ console.log(` Total cost: $${target.totalCost.toFixed(4)}`);
1215
+ }
1216
+ }
1217
+ }
1218
+ if (summary.evaluators && summary.evaluators.length > 0) {
1219
+ console.log("\u2500".repeat(60));
1220
+ console.log(" EVALUATORS:");
1221
+ for (const evaluator of summary.evaluators) {
1222
+ console.log(
1223
+ ` ${evaluator.name}: ${evaluator.passRate.toFixed(1)}% pass rate`
1224
+ );
1225
+ if (evaluator.avgScore !== void 0) {
1226
+ console.log(` Avg score: ${evaluator.avgScore.toFixed(2)}`);
1227
+ }
1228
+ }
1229
+ }
1230
+ console.log("\u2500".repeat(60));
1231
+ console.log(` View details: ${runUrl}`);
1232
+ console.log("\u2550".repeat(60) + "\n");
1233
+ }
1234
+ sleep(ms) {
1235
+ return new Promise((resolve) => setTimeout(resolve, ms));
1236
+ }
1237
+ /**
1238
+ * Replace the domain of a URL with a new base URL, preserving the path
1239
+ */
1240
+ replaceUrlDomain(url, newBase) {
1241
+ if (!url) return url;
1242
+ try {
1243
+ const parsedUrl = new URL(url);
1244
+ const parsedNewBase = new URL(newBase);
1245
+ return `${parsedNewBase.origin}${parsedUrl.pathname}${parsedUrl.search}${parsedUrl.hash}`;
1246
+ } catch (e) {
1247
+ return url;
1248
+ }
1249
+ }
976
1250
  };
977
1251
 
978
1252
  // src/client-sdk/services/traces/types.ts
@@ -986,13 +1260,13 @@ var TracesError = class extends Error {
986
1260
  };
987
1261
 
988
1262
  // src/client-sdk/services/traces/tracing/tracer.ts
989
- var tracer = _chunkONXIZKC6js.getLangWatchTracer.call(void 0, `${_chunkN7PJJMU2js.LANGWATCH_SDK_NAME_CLIENT}.traces`, _chunkN7PJJMU2js.LANGWATCH_SDK_VERSION);
1263
+ var tracer = _chunkONXIZKC6js.getLangWatchTracer.call(void 0, `${_chunkC4XUWCQRjs.LANGWATCH_SDK_NAME_CLIENT}.traces`, _chunkC4XUWCQRjs.LANGWATCH_SDK_VERSION);
990
1264
 
991
1265
  // src/client-sdk/services/traces/service.ts
992
1266
  var TracesService = class {
993
1267
  constructor(config) {
994
1268
  this.config = config;
995
- return _chunkD4H6PR6Hjs.createTracingProxy.call(void 0,
1269
+ return _chunk6SSCBYJMjs.createTracingProxy.call(void 0,
996
1270
  this,
997
1271
  tracer
998
1272
  );
@@ -1055,15 +1329,15 @@ var LangWatch = class {
1055
1329
  _chunkOHM7JUMRjs.__privateAdd.call(void 0, this, _LangWatch_instances);
1056
1330
  var _a, _b, _c, _d;
1057
1331
  const apiKey = (_b = (_a = options.apiKey) != null ? _a : process.env.LANGWATCH_API_KEY) != null ? _b : "";
1058
- const endpoint = (_d = (_c = options.endpoint) != null ? _c : process.env.LANGWATCH_ENDPOINT) != null ? _d : _chunkN7PJJMU2js.DEFAULT_ENDPOINT;
1332
+ const endpoint = (_d = (_c = options.endpoint) != null ? _c : process.env.LANGWATCH_ENDPOINT) != null ? _d : _chunkC4XUWCQRjs.DEFAULT_ENDPOINT;
1059
1333
  this.config = _chunkOHM7JUMRjs.__privateMethod.call(void 0, this, _LangWatch_instances, createInternalConfig_fn).call(this, {
1060
1334
  apiKey,
1061
1335
  endpoint,
1062
1336
  options: options.options
1063
1337
  });
1064
- this.prompts = new (0, _chunkD4H6PR6Hjs.PromptsFacade)(_chunkOHM7JUMRjs.__spreadValues.call(void 0, {
1065
- promptsApiService: new (0, _chunkD4H6PR6Hjs.PromptsApiService)(this.config),
1066
- localPromptsService: new (0, _chunkD4H6PR6Hjs.LocalPromptsService)()
1338
+ this.prompts = new (0, _chunk6SSCBYJMjs.PromptsFacade)(_chunkOHM7JUMRjs.__spreadValues.call(void 0, {
1339
+ promptsApiService: new (0, _chunk6SSCBYJMjs.PromptsApiService)(this.config),
1340
+ localPromptsService: new (0, _chunk6SSCBYJMjs.LocalPromptsService)()
1067
1341
  }, this.config));
1068
1342
  this.traces = new TracesFacade(this.config);
1069
1343
  this.evaluation = new EvaluationFacade({
@@ -1090,7 +1364,7 @@ createInternalConfig_fn = function({
1090
1364
  var _a;
1091
1365
  return {
1092
1366
  logger: (_a = options == null ? void 0 : options.logger) != null ? _a : new (0, _chunk5MQQRSVMjs.NoOpLogger)(),
1093
- langwatchApiClient: _chunkD4H6PR6Hjs.createLangWatchApiClient.call(void 0, apiKey, endpoint),
1367
+ langwatchApiClient: _chunk6SSCBYJMjs.createLangWatchApiClient.call(void 0, apiKey, endpoint),
1094
1368
  endpoint,
1095
1369
  apiKey
1096
1370
  };
@@ -1117,5 +1391,5 @@ var logger = {
1117
1391
 
1118
1392
 
1119
1393
 
1120
- exports.Evaluation = Evaluation; exports.EvaluationApiError = EvaluationApiError; exports.EvaluationError = EvaluationError; exports.EvaluationFacade = EvaluationFacade; exports.EvaluationInitError = EvaluationInitError; exports.EvaluatorError = EvaluatorError; exports.FetchPolicy = _chunkD4H6PR6Hjs.FetchPolicy; exports.FilterableBatchSpanProcessor = _chunkASTAIRXGjs.FilterableBatchSpanProcessor; exports.LangWatch = LangWatch; exports.LangWatchExporter = _chunkJQYW7RY7js.LangWatchTraceExporter; exports.TargetMetadataConflictError = TargetMetadataConflictError; exports.attributes = _chunk5MQQRSVMjs.attributes_exports; exports.getLangWatchLogger = _chunkJQYW7RY7js.getLangWatchLogger; exports.getLangWatchTracer = _chunkONXIZKC6js.getLangWatchTracer; exports.logger = logger;
1394
+ exports.Evaluation = Evaluation; exports.EvaluationApiError = EvaluationApiError; exports.EvaluationError = EvaluationError; exports.EvaluationFacade = EvaluationFacade; exports.EvaluationInitError = EvaluationInitError; exports.EvaluatorError = EvaluatorError; exports.FetchPolicy = _chunk6SSCBYJMjs.FetchPolicy; exports.FilterableBatchSpanProcessor = _chunkASTAIRXGjs.FilterableBatchSpanProcessor; exports.LangWatch = LangWatch; exports.LangWatchExporter = _chunkBQRUUTN3js.LangWatchTraceExporter; exports.TargetMetadataConflictError = TargetMetadataConflictError; exports.attributes = _chunk5MQQRSVMjs.attributes_exports; exports.getLangWatchLogger = _chunkBQRUUTN3js.getLangWatchLogger; exports.getLangWatchTracer = _chunkONXIZKC6js.getLangWatchTracer; exports.logger = logger;
1121
1395
  //# sourceMappingURL=index.js.map