langwatch 0.11.0 → 0.12.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{add-XV5SUAXF.mjs → add-2UHFYNUA.mjs} +4 -4
- package/dist/{add-UB5U3K3M.js → add-LUETMKBD.js} +7 -7
- package/dist/{add-UB5U3K3M.js.map → add-LUETMKBD.js.map} +1 -1
- package/dist/{chunk-D4H6PR6H.js → chunk-6SSCBYJM.js} +9 -9
- package/dist/{chunk-D4H6PR6H.js.map → chunk-6SSCBYJM.js.map} +1 -1
- package/dist/{chunk-JQYW7RY7.js → chunk-BQRUUTN3.js} +14 -14
- package/dist/{chunk-JQYW7RY7.js.map → chunk-BQRUUTN3.js.map} +1 -1
- package/dist/{chunk-N7PJJMU2.js → chunk-C4XUWCQR.js} +2 -2
- package/dist/{chunk-N7PJJMU2.js.map → chunk-C4XUWCQR.js.map} +1 -1
- package/dist/{chunk-WZ7FYUHN.mjs → chunk-OTID7S7K.mjs} +2 -2
- package/dist/{chunk-LKE6DMUP.mjs → chunk-TB5KB737.mjs} +2 -2
- package/dist/{chunk-LKE6DMUP.mjs.map → chunk-TB5KB737.mjs.map} +1 -1
- package/dist/{chunk-556ZFJMK.mjs → chunk-WCNDT5SD.mjs} +2 -2
- package/dist/cli/index.js +6 -6
- package/dist/cli/index.mjs +6 -6
- package/dist/{implementation-CPxv2BdW.d.ts → implementation-Bnc8Aymq.d.ts} +1 -1
- package/dist/{implementation-CVrmD0bz.d.mts → implementation-Ck58nRkT.d.mts} +1 -1
- package/dist/index.d.mts +129 -5
- package/dist/index.d.ts +129 -5
- package/dist/index.js +286 -12
- package/dist/index.js.map +1 -1
- package/dist/index.mjs +278 -4
- package/dist/index.mjs.map +1 -1
- package/dist/{list-DUNP46AD.js → list-7U3M64GY.js} +7 -7
- package/dist/{list-DUNP46AD.js.map → list-7U3M64GY.js.map} +1 -1
- package/dist/{list-T4QS6CT2.mjs → list-WV5LA6LD.mjs} +4 -4
- package/dist/{login-3H27NIOD.js → login-B7DKMN7P.js} +3 -3
- package/dist/{login-3H27NIOD.js.map → login-B7DKMN7P.js.map} +1 -1
- package/dist/{login-T2ET7TKH.mjs → login-QKRT6PXA.mjs} +2 -2
- package/dist/observability-sdk/index.d.mts +3 -3
- package/dist/observability-sdk/index.d.ts +3 -3
- package/dist/observability-sdk/index.js +3 -3
- package/dist/observability-sdk/index.mjs +2 -2
- package/dist/observability-sdk/instrumentation/langchain/index.d.mts +1 -1
- package/dist/observability-sdk/instrumentation/langchain/index.d.ts +1 -1
- package/dist/observability-sdk/setup/node/index.js +10 -10
- package/dist/observability-sdk/setup/node/index.mjs +2 -2
- package/dist/{remove-F5RM4775.mjs → remove-2OGMXSTR.mjs} +4 -4
- package/dist/{remove-V4JL5Z4U.js → remove-A4DKCN7A.js} +6 -6
- package/dist/{remove-V4JL5Z4U.js.map → remove-A4DKCN7A.js.map} +1 -1
- package/dist/{sync-VGWOLOLJ.mjs → sync-TNVCKWTC.mjs} +4 -4
- package/dist/{sync-DIOKWE6R.js → sync-WRZXIBZS.js} +6 -6
- package/dist/{sync-DIOKWE6R.js.map → sync-WRZXIBZS.js.map} +1 -1
- package/dist/{types-Kts5RGLY.d.mts → types-5h2Im4pl.d.mts} +162 -0
- package/dist/{types-usU5mTCX.d.ts → types-fo-Ij9pl.d.ts} +162 -0
- package/package.json +1 -1
- /package/dist/{add-XV5SUAXF.mjs.map → add-2UHFYNUA.mjs.map} +0 -0
- /package/dist/{chunk-WZ7FYUHN.mjs.map → chunk-OTID7S7K.mjs.map} +0 -0
- /package/dist/{chunk-556ZFJMK.mjs.map → chunk-WCNDT5SD.mjs.map} +0 -0
- /package/dist/{list-T4QS6CT2.mjs.map → list-WV5LA6LD.mjs.map} +0 -0
- /package/dist/{login-T2ET7TKH.mjs.map → login-QKRT6PXA.mjs.map} +0 -0
- /package/dist/{remove-F5RM4775.mjs.map → remove-2OGMXSTR.mjs.map} +0 -0
- /package/dist/{sync-VGWOLOLJ.mjs.map → sync-TNVCKWTC.mjs.map} +0 -0
package/dist/index.d.mts
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import { L as Logger, C as ConsoleLogger, N as NoOpLogger } from './index-D7rKIGrO.mjs';
|
|
2
|
-
export { F as FilterableBatchSpanProcessor, L as LangWatchExporter, S as SpanProcessingExcludeRule, g as getLangWatchLogger, d as getLangWatchTracer } from './implementation-
|
|
3
|
-
import { p as paths, P as PromptResponse, g as CreatePromptBody, U as UpdatePromptBody, h as PromptData, i as Prompt, F as FetchPolicy, L as LangWatchSpan } from './types-
|
|
2
|
+
export { F as FilterableBatchSpanProcessor, L as LangWatchExporter, S as SpanProcessingExcludeRule, g as getLangWatchLogger, d as getLangWatchTracer } from './implementation-Ck58nRkT.mjs';
|
|
3
|
+
import { p as paths, P as PromptResponse, g as CreatePromptBody, U as UpdatePromptBody, h as PromptData, i as Prompt, F as FetchPolicy, L as LangWatchSpan } from './types-5h2Im4pl.mjs';
|
|
4
4
|
import openApiCreateClient from 'openapi-fetch';
|
|
5
5
|
import { z } from 'zod';
|
|
6
6
|
export { l as attributes } from './types-DRiQaKFG.mjs';
|
|
@@ -810,10 +810,87 @@ declare class Evaluation {
|
|
|
810
810
|
private getSpanIdFromContext;
|
|
811
811
|
}
|
|
812
812
|
|
|
813
|
+
/**
|
|
814
|
+
* Types for platform-configured evaluations (Evaluations V3)
|
|
815
|
+
*/
|
|
816
|
+
/**
|
|
817
|
+
* Summary of a completed evaluation run
|
|
818
|
+
*/
|
|
819
|
+
type EvaluationRunSummary = {
|
|
820
|
+
runId?: string;
|
|
821
|
+
totalCells?: number;
|
|
822
|
+
completedCells?: number;
|
|
823
|
+
failedCells?: number;
|
|
824
|
+
duration?: number;
|
|
825
|
+
runUrl?: string;
|
|
826
|
+
timestamps?: {
|
|
827
|
+
startedAt: number;
|
|
828
|
+
finishedAt?: number;
|
|
829
|
+
stoppedAt?: number;
|
|
830
|
+
};
|
|
831
|
+
targets?: Array<{
|
|
832
|
+
targetId: string;
|
|
833
|
+
name: string;
|
|
834
|
+
passed: number;
|
|
835
|
+
failed: number;
|
|
836
|
+
avgLatency: number;
|
|
837
|
+
totalCost: number;
|
|
838
|
+
}>;
|
|
839
|
+
evaluators?: Array<{
|
|
840
|
+
evaluatorId: string;
|
|
841
|
+
name: string;
|
|
842
|
+
passed: number;
|
|
843
|
+
failed: number;
|
|
844
|
+
passRate: number;
|
|
845
|
+
avgScore?: number;
|
|
846
|
+
}>;
|
|
847
|
+
totalPassed?: number;
|
|
848
|
+
totalFailed?: number;
|
|
849
|
+
passRate?: number;
|
|
850
|
+
totalCost?: number;
|
|
851
|
+
};
|
|
852
|
+
/**
|
|
853
|
+
* Options for running a platform evaluation
|
|
854
|
+
*/
|
|
855
|
+
type RunEvaluationOptions = {
|
|
856
|
+
/**
|
|
857
|
+
* Polling interval in milliseconds (default: 2000)
|
|
858
|
+
*/
|
|
859
|
+
pollInterval?: number;
|
|
860
|
+
/**
|
|
861
|
+
* Maximum time to wait for completion in milliseconds (default: 600000 = 10 minutes)
|
|
862
|
+
*/
|
|
863
|
+
timeout?: number;
|
|
864
|
+
/**
|
|
865
|
+
* Callback for progress updates
|
|
866
|
+
*/
|
|
867
|
+
onProgress?: (progress: number, total: number) => void;
|
|
868
|
+
};
|
|
869
|
+
/**
|
|
870
|
+
* Final result of a platform evaluation run
|
|
871
|
+
*/
|
|
872
|
+
type EvaluationRunResult = {
|
|
873
|
+
runId: string;
|
|
874
|
+
status: "completed" | "failed" | "stopped";
|
|
875
|
+
passed: number;
|
|
876
|
+
failed: number;
|
|
877
|
+
passRate: number;
|
|
878
|
+
duration: number;
|
|
879
|
+
runUrl: string;
|
|
880
|
+
summary: EvaluationRunSummary;
|
|
881
|
+
/**
|
|
882
|
+
* Print a CI-friendly summary of the results
|
|
883
|
+
* @param exitOnFailure - If true (default), calls process.exit(1) when there are failures
|
|
884
|
+
*/
|
|
885
|
+
printSummary: (exitOnFailure?: boolean) => void;
|
|
886
|
+
};
|
|
887
|
+
|
|
813
888
|
/**
|
|
814
889
|
* EvaluationFacade - Entry point for the evaluation API
|
|
815
890
|
*
|
|
816
|
-
* Provides
|
|
891
|
+
* Provides:
|
|
892
|
+
* - `init()` method to create evaluation sessions (SDK-defined evaluations)
|
|
893
|
+
* - `run()` method to execute platform-configured evaluations (Evaluations V3)
|
|
817
894
|
*/
|
|
818
895
|
|
|
819
896
|
type EvaluationFacadeConfig = {
|
|
@@ -823,13 +900,13 @@ type EvaluationFacadeConfig = {
|
|
|
823
900
|
logger: Logger;
|
|
824
901
|
};
|
|
825
902
|
/**
|
|
826
|
-
* Facade for creating evaluation sessions
|
|
903
|
+
* Facade for creating evaluation sessions and running platform-configured evaluations
|
|
827
904
|
*/
|
|
828
905
|
declare class EvaluationFacade {
|
|
829
906
|
private readonly config;
|
|
830
907
|
constructor(config: EvaluationFacadeConfig);
|
|
831
908
|
/**
|
|
832
|
-
* Initialize a new evaluation session
|
|
909
|
+
* Initialize a new evaluation session (SDK-defined)
|
|
833
910
|
*
|
|
834
911
|
* @param name - Name of the experiment (used as slug)
|
|
835
912
|
* @param options - Optional configuration
|
|
@@ -846,6 +923,53 @@ declare class EvaluationFacade {
|
|
|
846
923
|
* ```
|
|
847
924
|
*/
|
|
848
925
|
init(name: string, options?: EvaluationInitOptions): Promise<Evaluation>;
|
|
926
|
+
/**
|
|
927
|
+
* Run a platform-configured evaluation (Evaluations V3)
|
|
928
|
+
*
|
|
929
|
+
* This runs an evaluation that was configured in the LangWatch platform.
|
|
930
|
+
* The method automatically prints a summary and exits with code 1 on failure
|
|
931
|
+
* (unless `exitOnFailure: false` is passed).
|
|
932
|
+
*
|
|
933
|
+
* @param slug - The slug of the evaluation (found in the evaluation URL)
|
|
934
|
+
* @param options - Optional configuration
|
|
935
|
+
* @returns The evaluation results including pass rate and summary
|
|
936
|
+
*
|
|
937
|
+
* @example
|
|
938
|
+
* ```typescript
|
|
939
|
+
* import { LangWatch } from "langwatch";
|
|
940
|
+
*
|
|
941
|
+
* const langwatch = new LangWatch();
|
|
942
|
+
*
|
|
943
|
+
* const result = await langwatch.evaluation.run("my-evaluation-slug");
|
|
944
|
+
* result.printSummary();
|
|
945
|
+
* ```
|
|
946
|
+
*/
|
|
947
|
+
run(slug: string, options?: RunEvaluationOptions): Promise<EvaluationRunResult>;
|
|
948
|
+
/**
|
|
949
|
+
* Run an evaluation and wait for completion using polling
|
|
950
|
+
*/
|
|
951
|
+
private runWithPolling;
|
|
952
|
+
/**
|
|
953
|
+
* Start an evaluation run
|
|
954
|
+
*/
|
|
955
|
+
private startRun;
|
|
956
|
+
/**
|
|
957
|
+
* Get the status of a run
|
|
958
|
+
*/
|
|
959
|
+
private getRunStatus;
|
|
960
|
+
/**
|
|
961
|
+
* Build the result object from API response
|
|
962
|
+
*/
|
|
963
|
+
private buildResult;
|
|
964
|
+
/**
|
|
965
|
+
* Print a CI-friendly summary of the evaluation results
|
|
966
|
+
*/
|
|
967
|
+
private printSummary;
|
|
968
|
+
private sleep;
|
|
969
|
+
/**
|
|
970
|
+
* Replace the domain of a URL with a new base URL, preserving the path
|
|
971
|
+
*/
|
|
972
|
+
private replaceUrlDomain;
|
|
849
973
|
}
|
|
850
974
|
|
|
851
975
|
/**
|
package/dist/index.d.ts
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import { L as Logger, C as ConsoleLogger, N as NoOpLogger } from './index-D7rKIGrO.js';
|
|
2
|
-
export { F as FilterableBatchSpanProcessor, L as LangWatchExporter, S as SpanProcessingExcludeRule, g as getLangWatchLogger, d as getLangWatchTracer } from './implementation-
|
|
3
|
-
import { p as paths, P as PromptResponse, g as CreatePromptBody, U as UpdatePromptBody, h as PromptData, i as Prompt, F as FetchPolicy, L as LangWatchSpan } from './types-
|
|
2
|
+
export { F as FilterableBatchSpanProcessor, L as LangWatchExporter, S as SpanProcessingExcludeRule, g as getLangWatchLogger, d as getLangWatchTracer } from './implementation-Bnc8Aymq.js';
|
|
3
|
+
import { p as paths, P as PromptResponse, g as CreatePromptBody, U as UpdatePromptBody, h as PromptData, i as Prompt, F as FetchPolicy, L as LangWatchSpan } from './types-fo-Ij9pl.js';
|
|
4
4
|
import openApiCreateClient from 'openapi-fetch';
|
|
5
5
|
import { z } from 'zod';
|
|
6
6
|
export { l as attributes } from './types-DRiQaKFG.js';
|
|
@@ -810,10 +810,87 @@ declare class Evaluation {
|
|
|
810
810
|
private getSpanIdFromContext;
|
|
811
811
|
}
|
|
812
812
|
|
|
813
|
+
/**
|
|
814
|
+
* Types for platform-configured evaluations (Evaluations V3)
|
|
815
|
+
*/
|
|
816
|
+
/**
|
|
817
|
+
* Summary of a completed evaluation run
|
|
818
|
+
*/
|
|
819
|
+
type EvaluationRunSummary = {
|
|
820
|
+
runId?: string;
|
|
821
|
+
totalCells?: number;
|
|
822
|
+
completedCells?: number;
|
|
823
|
+
failedCells?: number;
|
|
824
|
+
duration?: number;
|
|
825
|
+
runUrl?: string;
|
|
826
|
+
timestamps?: {
|
|
827
|
+
startedAt: number;
|
|
828
|
+
finishedAt?: number;
|
|
829
|
+
stoppedAt?: number;
|
|
830
|
+
};
|
|
831
|
+
targets?: Array<{
|
|
832
|
+
targetId: string;
|
|
833
|
+
name: string;
|
|
834
|
+
passed: number;
|
|
835
|
+
failed: number;
|
|
836
|
+
avgLatency: number;
|
|
837
|
+
totalCost: number;
|
|
838
|
+
}>;
|
|
839
|
+
evaluators?: Array<{
|
|
840
|
+
evaluatorId: string;
|
|
841
|
+
name: string;
|
|
842
|
+
passed: number;
|
|
843
|
+
failed: number;
|
|
844
|
+
passRate: number;
|
|
845
|
+
avgScore?: number;
|
|
846
|
+
}>;
|
|
847
|
+
totalPassed?: number;
|
|
848
|
+
totalFailed?: number;
|
|
849
|
+
passRate?: number;
|
|
850
|
+
totalCost?: number;
|
|
851
|
+
};
|
|
852
|
+
/**
|
|
853
|
+
* Options for running a platform evaluation
|
|
854
|
+
*/
|
|
855
|
+
type RunEvaluationOptions = {
|
|
856
|
+
/**
|
|
857
|
+
* Polling interval in milliseconds (default: 2000)
|
|
858
|
+
*/
|
|
859
|
+
pollInterval?: number;
|
|
860
|
+
/**
|
|
861
|
+
* Maximum time to wait for completion in milliseconds (default: 600000 = 10 minutes)
|
|
862
|
+
*/
|
|
863
|
+
timeout?: number;
|
|
864
|
+
/**
|
|
865
|
+
* Callback for progress updates
|
|
866
|
+
*/
|
|
867
|
+
onProgress?: (progress: number, total: number) => void;
|
|
868
|
+
};
|
|
869
|
+
/**
|
|
870
|
+
* Final result of a platform evaluation run
|
|
871
|
+
*/
|
|
872
|
+
type EvaluationRunResult = {
|
|
873
|
+
runId: string;
|
|
874
|
+
status: "completed" | "failed" | "stopped";
|
|
875
|
+
passed: number;
|
|
876
|
+
failed: number;
|
|
877
|
+
passRate: number;
|
|
878
|
+
duration: number;
|
|
879
|
+
runUrl: string;
|
|
880
|
+
summary: EvaluationRunSummary;
|
|
881
|
+
/**
|
|
882
|
+
* Print a CI-friendly summary of the results
|
|
883
|
+
* @param exitOnFailure - If true (default), calls process.exit(1) when there are failures
|
|
884
|
+
*/
|
|
885
|
+
printSummary: (exitOnFailure?: boolean) => void;
|
|
886
|
+
};
|
|
887
|
+
|
|
813
888
|
/**
|
|
814
889
|
* EvaluationFacade - Entry point for the evaluation API
|
|
815
890
|
*
|
|
816
|
-
* Provides
|
|
891
|
+
* Provides:
|
|
892
|
+
* - `init()` method to create evaluation sessions (SDK-defined evaluations)
|
|
893
|
+
* - `run()` method to execute platform-configured evaluations (Evaluations V3)
|
|
817
894
|
*/
|
|
818
895
|
|
|
819
896
|
type EvaluationFacadeConfig = {
|
|
@@ -823,13 +900,13 @@ type EvaluationFacadeConfig = {
|
|
|
823
900
|
logger: Logger;
|
|
824
901
|
};
|
|
825
902
|
/**
|
|
826
|
-
* Facade for creating evaluation sessions
|
|
903
|
+
* Facade for creating evaluation sessions and running platform-configured evaluations
|
|
827
904
|
*/
|
|
828
905
|
declare class EvaluationFacade {
|
|
829
906
|
private readonly config;
|
|
830
907
|
constructor(config: EvaluationFacadeConfig);
|
|
831
908
|
/**
|
|
832
|
-
* Initialize a new evaluation session
|
|
909
|
+
* Initialize a new evaluation session (SDK-defined)
|
|
833
910
|
*
|
|
834
911
|
* @param name - Name of the experiment (used as slug)
|
|
835
912
|
* @param options - Optional configuration
|
|
@@ -846,6 +923,53 @@ declare class EvaluationFacade {
|
|
|
846
923
|
* ```
|
|
847
924
|
*/
|
|
848
925
|
init(name: string, options?: EvaluationInitOptions): Promise<Evaluation>;
|
|
926
|
+
/**
|
|
927
|
+
* Run a platform-configured evaluation (Evaluations V3)
|
|
928
|
+
*
|
|
929
|
+
* This runs an evaluation that was configured in the LangWatch platform.
|
|
930
|
+
* The method automatically prints a summary and exits with code 1 on failure
|
|
931
|
+
* (unless `exitOnFailure: false` is passed).
|
|
932
|
+
*
|
|
933
|
+
* @param slug - The slug of the evaluation (found in the evaluation URL)
|
|
934
|
+
* @param options - Optional configuration
|
|
935
|
+
* @returns The evaluation results including pass rate and summary
|
|
936
|
+
*
|
|
937
|
+
* @example
|
|
938
|
+
* ```typescript
|
|
939
|
+
* import { LangWatch } from "langwatch";
|
|
940
|
+
*
|
|
941
|
+
* const langwatch = new LangWatch();
|
|
942
|
+
*
|
|
943
|
+
* const result = await langwatch.evaluation.run("my-evaluation-slug");
|
|
944
|
+
* result.printSummary();
|
|
945
|
+
* ```
|
|
946
|
+
*/
|
|
947
|
+
run(slug: string, options?: RunEvaluationOptions): Promise<EvaluationRunResult>;
|
|
948
|
+
/**
|
|
949
|
+
* Run an evaluation and wait for completion using polling
|
|
950
|
+
*/
|
|
951
|
+
private runWithPolling;
|
|
952
|
+
/**
|
|
953
|
+
* Start an evaluation run
|
|
954
|
+
*/
|
|
955
|
+
private startRun;
|
|
956
|
+
/**
|
|
957
|
+
* Get the status of a run
|
|
958
|
+
*/
|
|
959
|
+
private getRunStatus;
|
|
960
|
+
/**
|
|
961
|
+
* Build the result object from API response
|
|
962
|
+
*/
|
|
963
|
+
private buildResult;
|
|
964
|
+
/**
|
|
965
|
+
* Print a CI-friendly summary of the evaluation results
|
|
966
|
+
*/
|
|
967
|
+
private printSummary;
|
|
968
|
+
private sleep;
|
|
969
|
+
/**
|
|
970
|
+
* Replace the domain of a URL with a new base URL, preserving the path
|
|
971
|
+
*/
|
|
972
|
+
private replaceUrlDomain;
|
|
849
973
|
}
|
|
850
974
|
|
|
851
975
|
/**
|
package/dist/index.js
CHANGED
|
@@ -5,7 +5,7 @@
|
|
|
5
5
|
|
|
6
6
|
|
|
7
7
|
|
|
8
|
-
var
|
|
8
|
+
var _chunk6SSCBYJMjs = require('./chunk-6SSCBYJM.js');
|
|
9
9
|
|
|
10
10
|
|
|
11
11
|
var _chunkASTAIRXGjs = require('./chunk-ASTAIRXG.js');
|
|
@@ -16,12 +16,12 @@ var _chunkONXIZKC6js = require('./chunk-ONXIZKC6.js');
|
|
|
16
16
|
|
|
17
17
|
|
|
18
18
|
|
|
19
|
-
var
|
|
19
|
+
var _chunkBQRUUTN3js = require('./chunk-BQRUUTN3.js');
|
|
20
20
|
|
|
21
21
|
|
|
22
22
|
|
|
23
23
|
|
|
24
|
-
var
|
|
24
|
+
var _chunkC4XUWCQRjs = require('./chunk-C4XUWCQR.js');
|
|
25
25
|
|
|
26
26
|
|
|
27
27
|
|
|
@@ -943,13 +943,53 @@ var Evaluation = class _Evaluation {
|
|
|
943
943
|
}
|
|
944
944
|
};
|
|
945
945
|
|
|
946
|
+
// src/client-sdk/services/evaluation/platformErrors.ts
|
|
947
|
+
var EvaluationsError = class extends Error {
|
|
948
|
+
constructor(message) {
|
|
949
|
+
super(message);
|
|
950
|
+
this.name = "EvaluationsError";
|
|
951
|
+
}
|
|
952
|
+
};
|
|
953
|
+
var EvaluationNotFoundError = class extends EvaluationsError {
|
|
954
|
+
constructor(slug) {
|
|
955
|
+
super(`Evaluation not found: ${slug}`);
|
|
956
|
+
this.name = "EvaluationNotFoundError";
|
|
957
|
+
}
|
|
958
|
+
};
|
|
959
|
+
var EvaluationTimeoutError = class extends EvaluationsError {
|
|
960
|
+
constructor(runId, progress, total) {
|
|
961
|
+
super(`Evaluation run timed out: ${runId} (${progress}/${total} completed)`);
|
|
962
|
+
this.name = "EvaluationTimeoutError";
|
|
963
|
+
this.runId = runId;
|
|
964
|
+
this.progress = progress;
|
|
965
|
+
this.total = total;
|
|
966
|
+
}
|
|
967
|
+
};
|
|
968
|
+
var EvaluationRunFailedError = class extends EvaluationsError {
|
|
969
|
+
constructor(runId, errorMessage) {
|
|
970
|
+
super(`Evaluation run failed: ${errorMessage}`);
|
|
971
|
+
this.name = "EvaluationRunFailedError";
|
|
972
|
+
this.runId = runId;
|
|
973
|
+
this.errorMessage = errorMessage;
|
|
974
|
+
}
|
|
975
|
+
};
|
|
976
|
+
var EvaluationsApiError = class extends EvaluationsError {
|
|
977
|
+
constructor(message, statusCode) {
|
|
978
|
+
super(message);
|
|
979
|
+
this.name = "EvaluationsApiError";
|
|
980
|
+
this.statusCode = statusCode;
|
|
981
|
+
}
|
|
982
|
+
};
|
|
983
|
+
|
|
946
984
|
// src/client-sdk/services/evaluation/evaluation.facade.ts
|
|
985
|
+
var DEFAULT_POLL_INTERVAL = 2e3;
|
|
986
|
+
var DEFAULT_TIMEOUT = 6e5;
|
|
947
987
|
var EvaluationFacade = class {
|
|
948
988
|
constructor(config) {
|
|
949
989
|
this.config = config;
|
|
950
990
|
}
|
|
951
991
|
/**
|
|
952
|
-
* Initialize a new evaluation session
|
|
992
|
+
* Initialize a new evaluation session (SDK-defined)
|
|
953
993
|
*
|
|
954
994
|
* @param name - Name of the experiment (used as slug)
|
|
955
995
|
* @param options - Optional configuration
|
|
@@ -973,6 +1013,240 @@ var EvaluationFacade = class {
|
|
|
973
1013
|
logger: this.config.logger
|
|
974
1014
|
}, options));
|
|
975
1015
|
}
|
|
1016
|
+
/**
|
|
1017
|
+
* Run a platform-configured evaluation (Evaluations V3)
|
|
1018
|
+
*
|
|
1019
|
+
* This runs an evaluation that was configured in the LangWatch platform.
|
|
1020
|
+
* The method automatically prints a summary and exits with code 1 on failure
|
|
1021
|
+
* (unless `exitOnFailure: false` is passed).
|
|
1022
|
+
*
|
|
1023
|
+
* @param slug - The slug of the evaluation (found in the evaluation URL)
|
|
1024
|
+
* @param options - Optional configuration
|
|
1025
|
+
* @returns The evaluation results including pass rate and summary
|
|
1026
|
+
*
|
|
1027
|
+
* @example
|
|
1028
|
+
* ```typescript
|
|
1029
|
+
* import { LangWatch } from "langwatch";
|
|
1030
|
+
*
|
|
1031
|
+
* const langwatch = new LangWatch();
|
|
1032
|
+
*
|
|
1033
|
+
* const result = await langwatch.evaluation.run("my-evaluation-slug");
|
|
1034
|
+
* result.printSummary();
|
|
1035
|
+
* ```
|
|
1036
|
+
*/
|
|
1037
|
+
async run(slug, options) {
|
|
1038
|
+
this.config.logger.info(`Running platform evaluation: ${slug}`);
|
|
1039
|
+
const result = await this.runWithPolling(slug, options);
|
|
1040
|
+
return result;
|
|
1041
|
+
}
|
|
1042
|
+
/**
|
|
1043
|
+
* Run an evaluation and wait for completion using polling
|
|
1044
|
+
*/
|
|
1045
|
+
async runWithPolling(slug, options = {}) {
|
|
1046
|
+
var _a, _b, _c, _d, _e, _f, _g;
|
|
1047
|
+
const pollInterval = (_a = options.pollInterval) != null ? _a : DEFAULT_POLL_INTERVAL;
|
|
1048
|
+
const timeout = (_b = options.timeout) != null ? _b : DEFAULT_TIMEOUT;
|
|
1049
|
+
const startResponse = await this.startRun(slug);
|
|
1050
|
+
const { runId } = startResponse;
|
|
1051
|
+
const apiRunUrl = (_c = startResponse.runUrl) != null ? _c : "";
|
|
1052
|
+
const runUrl = apiRunUrl ? this.replaceUrlDomain(apiRunUrl, this.config.endpoint) : "";
|
|
1053
|
+
console.log(`Started evaluation run: ${runId}`);
|
|
1054
|
+
if (runUrl) {
|
|
1055
|
+
console.log(`Follow live: ${runUrl}`);
|
|
1056
|
+
}
|
|
1057
|
+
const total = startResponse.total;
|
|
1058
|
+
let lastProgress = 0;
|
|
1059
|
+
if (total > 0) {
|
|
1060
|
+
process.stdout.write(`Progress: 0/${total} (0%)`);
|
|
1061
|
+
}
|
|
1062
|
+
(_d = options.onProgress) == null ? void 0 : _d.call(options, 0, total);
|
|
1063
|
+
const startTime = Date.now();
|
|
1064
|
+
while (true) {
|
|
1065
|
+
if (Date.now() - startTime > timeout) {
|
|
1066
|
+
console.log();
|
|
1067
|
+
const finalStatus = await this.getRunStatus(runId);
|
|
1068
|
+
throw new EvaluationTimeoutError(runId, finalStatus.progress, finalStatus.total);
|
|
1069
|
+
}
|
|
1070
|
+
await this.sleep(pollInterval);
|
|
1071
|
+
const status = await this.getRunStatus(runId);
|
|
1072
|
+
const progress = status.progress;
|
|
1073
|
+
if (progress !== lastProgress && status.total > 0) {
|
|
1074
|
+
const percentage = Math.round(progress / status.total * 100);
|
|
1075
|
+
process.stdout.write(`\rProgress: ${progress}/${status.total} (${percentage}%)`);
|
|
1076
|
+
lastProgress = progress;
|
|
1077
|
+
}
|
|
1078
|
+
(_e = options.onProgress) == null ? void 0 : _e.call(options, status.progress, status.total);
|
|
1079
|
+
if (status.status === "completed") {
|
|
1080
|
+
console.log();
|
|
1081
|
+
const summary = status.summary;
|
|
1082
|
+
return this.buildResult(runId, "completed", summary, runUrl != null ? runUrl : "");
|
|
1083
|
+
}
|
|
1084
|
+
if (status.status === "failed") {
|
|
1085
|
+
console.log();
|
|
1086
|
+
throw new EvaluationRunFailedError(runId, (_f = status.error) != null ? _f : "Unknown error");
|
|
1087
|
+
}
|
|
1088
|
+
if (status.status === "stopped") {
|
|
1089
|
+
console.log();
|
|
1090
|
+
return this.buildResult(runId, "stopped", (_g = status.summary) != null ? _g : {
|
|
1091
|
+
runId,
|
|
1092
|
+
totalCells: status.total,
|
|
1093
|
+
completedCells: status.progress,
|
|
1094
|
+
failedCells: 0,
|
|
1095
|
+
duration: Date.now() - startTime
|
|
1096
|
+
}, runUrl != null ? runUrl : "");
|
|
1097
|
+
}
|
|
1098
|
+
}
|
|
1099
|
+
}
|
|
1100
|
+
/**
|
|
1101
|
+
* Start an evaluation run
|
|
1102
|
+
*/
|
|
1103
|
+
async startRun(slug) {
|
|
1104
|
+
const response = await this.config.langwatchApiClient.POST(
|
|
1105
|
+
"/api/evaluations/v3/{slug}/run",
|
|
1106
|
+
{
|
|
1107
|
+
params: {
|
|
1108
|
+
path: { slug }
|
|
1109
|
+
}
|
|
1110
|
+
}
|
|
1111
|
+
);
|
|
1112
|
+
if (response.error) {
|
|
1113
|
+
const status = response.response.status;
|
|
1114
|
+
if (status === 404) {
|
|
1115
|
+
throw new EvaluationNotFoundError(slug);
|
|
1116
|
+
}
|
|
1117
|
+
if (status === 401) {
|
|
1118
|
+
throw new EvaluationsApiError("Unauthorized - check your API key", 401);
|
|
1119
|
+
}
|
|
1120
|
+
const errorMessage = "error" in response.error ? response.error.error : `Failed to start evaluation: ${slug}`;
|
|
1121
|
+
throw new EvaluationsApiError(errorMessage != null ? errorMessage : `HTTP ${status}`, status);
|
|
1122
|
+
}
|
|
1123
|
+
return response.data;
|
|
1124
|
+
}
|
|
1125
|
+
/**
|
|
1126
|
+
* Get the status of a run
|
|
1127
|
+
*/
|
|
1128
|
+
async getRunStatus(runId) {
|
|
1129
|
+
const response = await this.config.langwatchApiClient.GET(
|
|
1130
|
+
"/api/evaluations/v3/runs/{runId}",
|
|
1131
|
+
{
|
|
1132
|
+
params: {
|
|
1133
|
+
path: { runId }
|
|
1134
|
+
}
|
|
1135
|
+
}
|
|
1136
|
+
);
|
|
1137
|
+
if (response.error) {
|
|
1138
|
+
const status = response.response.status;
|
|
1139
|
+
if (status === 404) {
|
|
1140
|
+
throw new EvaluationsApiError(`Run not found: ${runId}`, 404);
|
|
1141
|
+
}
|
|
1142
|
+
if (status === 401) {
|
|
1143
|
+
throw new EvaluationsApiError("Unauthorized - check your API key", 401);
|
|
1144
|
+
}
|
|
1145
|
+
const errorMessage = "error" in response.error ? response.error.error : `Failed to get run status: ${runId}`;
|
|
1146
|
+
throw new EvaluationsApiError(errorMessage != null ? errorMessage : `HTTP ${status}`, status);
|
|
1147
|
+
}
|
|
1148
|
+
return response.data;
|
|
1149
|
+
}
|
|
1150
|
+
/**
|
|
1151
|
+
* Build the result object from API response
|
|
1152
|
+
*/
|
|
1153
|
+
buildResult(runId, status, summary, runUrl) {
|
|
1154
|
+
var _a, _b, _c, _d, _e, _f, _g;
|
|
1155
|
+
const totalCells = (_a = summary.totalCells) != null ? _a : 0;
|
|
1156
|
+
const completedCells = (_b = summary.completedCells) != null ? _b : 0;
|
|
1157
|
+
const failedCells = (_c = summary.failedCells) != null ? _c : 0;
|
|
1158
|
+
const duration = (_d = summary.duration) != null ? _d : 0;
|
|
1159
|
+
const totalPassed = (_e = summary.totalPassed) != null ? _e : completedCells - failedCells;
|
|
1160
|
+
const totalFailed = (_f = summary.totalFailed) != null ? _f : failedCells;
|
|
1161
|
+
const passRate = (_g = summary.passRate) != null ? _g : completedCells > 0 ? totalPassed / completedCells * 100 : 0;
|
|
1162
|
+
return {
|
|
1163
|
+
runId,
|
|
1164
|
+
status,
|
|
1165
|
+
passed: totalPassed,
|
|
1166
|
+
failed: totalFailed,
|
|
1167
|
+
passRate,
|
|
1168
|
+
duration,
|
|
1169
|
+
runUrl,
|
|
1170
|
+
// Always use the endpoint-based URL we constructed
|
|
1171
|
+
summary,
|
|
1172
|
+
printSummary: (exitOnFailure = true) => {
|
|
1173
|
+
var _a2;
|
|
1174
|
+
this.printSummary({
|
|
1175
|
+
runId,
|
|
1176
|
+
status,
|
|
1177
|
+
passed: totalPassed,
|
|
1178
|
+
failed: totalFailed,
|
|
1179
|
+
passRate,
|
|
1180
|
+
duration,
|
|
1181
|
+
runUrl: (_a2 = summary.runUrl) != null ? _a2 : runUrl,
|
|
1182
|
+
summary
|
|
1183
|
+
});
|
|
1184
|
+
if (exitOnFailure && totalFailed > 0) {
|
|
1185
|
+
process.exit(1);
|
|
1186
|
+
}
|
|
1187
|
+
}
|
|
1188
|
+
};
|
|
1189
|
+
}
|
|
1190
|
+
/**
|
|
1191
|
+
* Print a CI-friendly summary of the evaluation results
|
|
1192
|
+
*/
|
|
1193
|
+
printSummary(result) {
|
|
1194
|
+
const { runId, status, passed, failed, passRate, duration, runUrl, summary } = result;
|
|
1195
|
+
console.log("\n" + "\u2550".repeat(60));
|
|
1196
|
+
console.log(" EVALUATION RESULTS");
|
|
1197
|
+
console.log("\u2550".repeat(60));
|
|
1198
|
+
console.log(` Run ID: ${runId}`);
|
|
1199
|
+
console.log(` Status: ${status.toUpperCase()}`);
|
|
1200
|
+
console.log(` Duration: ${(duration / 1e3).toFixed(1)}s`);
|
|
1201
|
+
console.log("\u2500".repeat(60));
|
|
1202
|
+
console.log(` Passed: ${passed}`);
|
|
1203
|
+
console.log(` Failed: ${failed}`);
|
|
1204
|
+
console.log(` Pass Rate: ${passRate.toFixed(1)}%`);
|
|
1205
|
+
if (summary.targets && summary.targets.length > 0) {
|
|
1206
|
+
console.log("\u2500".repeat(60));
|
|
1207
|
+
console.log(" TARGETS:");
|
|
1208
|
+
for (const target of summary.targets) {
|
|
1209
|
+
console.log(` ${target.name}: ${target.passed} passed, ${target.failed} failed`);
|
|
1210
|
+
if (target.avgLatency) {
|
|
1211
|
+
console.log(` Avg latency: ${target.avgLatency.toFixed(0)}ms`);
|
|
1212
|
+
}
|
|
1213
|
+
if (target.totalCost) {
|
|
1214
|
+
console.log(` Total cost: $${target.totalCost.toFixed(4)}`);
|
|
1215
|
+
}
|
|
1216
|
+
}
|
|
1217
|
+
}
|
|
1218
|
+
if (summary.evaluators && summary.evaluators.length > 0) {
|
|
1219
|
+
console.log("\u2500".repeat(60));
|
|
1220
|
+
console.log(" EVALUATORS:");
|
|
1221
|
+
for (const evaluator of summary.evaluators) {
|
|
1222
|
+
console.log(
|
|
1223
|
+
` ${evaluator.name}: ${evaluator.passRate.toFixed(1)}% pass rate`
|
|
1224
|
+
);
|
|
1225
|
+
if (evaluator.avgScore !== void 0) {
|
|
1226
|
+
console.log(` Avg score: ${evaluator.avgScore.toFixed(2)}`);
|
|
1227
|
+
}
|
|
1228
|
+
}
|
|
1229
|
+
}
|
|
1230
|
+
console.log("\u2500".repeat(60));
|
|
1231
|
+
console.log(` View details: ${runUrl}`);
|
|
1232
|
+
console.log("\u2550".repeat(60) + "\n");
|
|
1233
|
+
}
|
|
1234
|
+
sleep(ms) {
|
|
1235
|
+
return new Promise((resolve) => setTimeout(resolve, ms));
|
|
1236
|
+
}
|
|
1237
|
+
/**
|
|
1238
|
+
* Replace the domain of a URL with a new base URL, preserving the path
|
|
1239
|
+
*/
|
|
1240
|
+
replaceUrlDomain(url, newBase) {
|
|
1241
|
+
if (!url) return url;
|
|
1242
|
+
try {
|
|
1243
|
+
const parsedUrl = new URL(url);
|
|
1244
|
+
const parsedNewBase = new URL(newBase);
|
|
1245
|
+
return `${parsedNewBase.origin}${parsedUrl.pathname}${parsedUrl.search}${parsedUrl.hash}`;
|
|
1246
|
+
} catch (e) {
|
|
1247
|
+
return url;
|
|
1248
|
+
}
|
|
1249
|
+
}
|
|
976
1250
|
};
|
|
977
1251
|
|
|
978
1252
|
// src/client-sdk/services/traces/types.ts
|
|
@@ -986,13 +1260,13 @@ var TracesError = class extends Error {
|
|
|
986
1260
|
};
|
|
987
1261
|
|
|
988
1262
|
// src/client-sdk/services/traces/tracing/tracer.ts
|
|
989
|
-
var tracer = _chunkONXIZKC6js.getLangWatchTracer.call(void 0, `${
|
|
1263
|
+
var tracer = _chunkONXIZKC6js.getLangWatchTracer.call(void 0, `${_chunkC4XUWCQRjs.LANGWATCH_SDK_NAME_CLIENT}.traces`, _chunkC4XUWCQRjs.LANGWATCH_SDK_VERSION);
|
|
990
1264
|
|
|
991
1265
|
// src/client-sdk/services/traces/service.ts
|
|
992
1266
|
var TracesService = class {
|
|
993
1267
|
constructor(config) {
|
|
994
1268
|
this.config = config;
|
|
995
|
-
return
|
|
1269
|
+
return _chunk6SSCBYJMjs.createTracingProxy.call(void 0,
|
|
996
1270
|
this,
|
|
997
1271
|
tracer
|
|
998
1272
|
);
|
|
@@ -1055,15 +1329,15 @@ var LangWatch = class {
|
|
|
1055
1329
|
_chunkOHM7JUMRjs.__privateAdd.call(void 0, this, _LangWatch_instances);
|
|
1056
1330
|
var _a, _b, _c, _d;
|
|
1057
1331
|
const apiKey = (_b = (_a = options.apiKey) != null ? _a : process.env.LANGWATCH_API_KEY) != null ? _b : "";
|
|
1058
|
-
const endpoint = (_d = (_c = options.endpoint) != null ? _c : process.env.LANGWATCH_ENDPOINT) != null ? _d :
|
|
1332
|
+
const endpoint = (_d = (_c = options.endpoint) != null ? _c : process.env.LANGWATCH_ENDPOINT) != null ? _d : _chunkC4XUWCQRjs.DEFAULT_ENDPOINT;
|
|
1059
1333
|
this.config = _chunkOHM7JUMRjs.__privateMethod.call(void 0, this, _LangWatch_instances, createInternalConfig_fn).call(this, {
|
|
1060
1334
|
apiKey,
|
|
1061
1335
|
endpoint,
|
|
1062
1336
|
options: options.options
|
|
1063
1337
|
});
|
|
1064
|
-
this.prompts = new (0,
|
|
1065
|
-
promptsApiService: new (0,
|
|
1066
|
-
localPromptsService: new (0,
|
|
1338
|
+
this.prompts = new (0, _chunk6SSCBYJMjs.PromptsFacade)(_chunkOHM7JUMRjs.__spreadValues.call(void 0, {
|
|
1339
|
+
promptsApiService: new (0, _chunk6SSCBYJMjs.PromptsApiService)(this.config),
|
|
1340
|
+
localPromptsService: new (0, _chunk6SSCBYJMjs.LocalPromptsService)()
|
|
1067
1341
|
}, this.config));
|
|
1068
1342
|
this.traces = new TracesFacade(this.config);
|
|
1069
1343
|
this.evaluation = new EvaluationFacade({
|
|
@@ -1090,7 +1364,7 @@ createInternalConfig_fn = function({
|
|
|
1090
1364
|
var _a;
|
|
1091
1365
|
return {
|
|
1092
1366
|
logger: (_a = options == null ? void 0 : options.logger) != null ? _a : new (0, _chunk5MQQRSVMjs.NoOpLogger)(),
|
|
1093
|
-
langwatchApiClient:
|
|
1367
|
+
langwatchApiClient: _chunk6SSCBYJMjs.createLangWatchApiClient.call(void 0, apiKey, endpoint),
|
|
1094
1368
|
endpoint,
|
|
1095
1369
|
apiKey
|
|
1096
1370
|
};
|
|
@@ -1117,5 +1391,5 @@ var logger = {
|
|
|
1117
1391
|
|
|
1118
1392
|
|
|
1119
1393
|
|
|
1120
|
-
exports.Evaluation = Evaluation; exports.EvaluationApiError = EvaluationApiError; exports.EvaluationError = EvaluationError; exports.EvaluationFacade = EvaluationFacade; exports.EvaluationInitError = EvaluationInitError; exports.EvaluatorError = EvaluatorError; exports.FetchPolicy =
|
|
1394
|
+
exports.Evaluation = Evaluation; exports.EvaluationApiError = EvaluationApiError; exports.EvaluationError = EvaluationError; exports.EvaluationFacade = EvaluationFacade; exports.EvaluationInitError = EvaluationInitError; exports.EvaluatorError = EvaluatorError; exports.FetchPolicy = _chunk6SSCBYJMjs.FetchPolicy; exports.FilterableBatchSpanProcessor = _chunkASTAIRXGjs.FilterableBatchSpanProcessor; exports.LangWatch = LangWatch; exports.LangWatchExporter = _chunkBQRUUTN3js.LangWatchTraceExporter; exports.TargetMetadataConflictError = TargetMetadataConflictError; exports.attributes = _chunk5MQQRSVMjs.attributes_exports; exports.getLangWatchLogger = _chunkBQRUUTN3js.getLangWatchLogger; exports.getLangWatchTracer = _chunkONXIZKC6js.getLangWatchTracer; exports.logger = logger;
|
|
1121
1395
|
//# sourceMappingURL=index.js.map
|