@wix/evalforge-types 0.4.0 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/build/index.js +185 -180
- package/build/index.js.map +3 -3
- package/build/index.mjs +185 -180
- package/build/index.mjs.map +3 -3
- package/build/types/evaluation/eval-result.d.ts +14 -0
- package/build/types/evaluation/eval-run.d.ts +14 -0
- package/build/types/evaluation/live-trace.d.ts +3 -1
- package/package.json +2 -2
package/build/index.js
CHANGED
|
@@ -634,128 +634,46 @@ var LLMTraceSchema = import_zod21.z.object({
|
|
|
634
634
|
});
|
|
635
635
|
|
|
636
636
|
// src/evaluation/eval-result.ts
|
|
637
|
-
var
|
|
638
|
-
var AssertionResultStatus = /* @__PURE__ */ ((AssertionResultStatus2) => {
|
|
639
|
-
AssertionResultStatus2["PASSED"] = "passed";
|
|
640
|
-
AssertionResultStatus2["FAILED"] = "failed";
|
|
641
|
-
AssertionResultStatus2["SKIPPED"] = "skipped";
|
|
642
|
-
AssertionResultStatus2["ERROR"] = "error";
|
|
643
|
-
return AssertionResultStatus2;
|
|
644
|
-
})(AssertionResultStatus || {});
|
|
645
|
-
var AssertionResultSchema = import_zod22.z.object({
|
|
646
|
-
id: import_zod22.z.string(),
|
|
647
|
-
assertionId: import_zod22.z.string(),
|
|
648
|
-
assertionType: import_zod22.z.string(),
|
|
649
|
-
assertionName: import_zod22.z.string(),
|
|
650
|
-
status: import_zod22.z.enum(AssertionResultStatus),
|
|
651
|
-
message: import_zod22.z.string().optional(),
|
|
652
|
-
expected: import_zod22.z.string().optional(),
|
|
653
|
-
actual: import_zod22.z.string().optional(),
|
|
654
|
-
duration: import_zod22.z.number().optional(),
|
|
655
|
-
details: import_zod22.z.record(import_zod22.z.string(), import_zod22.z.unknown()).optional(),
|
|
656
|
-
llmTraceSteps: import_zod22.z.array(LLMTraceStepSchema).optional()
|
|
657
|
-
});
|
|
658
|
-
var EvalRunResultSchema = import_zod22.z.object({
|
|
659
|
-
id: import_zod22.z.string(),
|
|
660
|
-
targetId: import_zod22.z.string(),
|
|
661
|
-
targetName: import_zod22.z.string().optional(),
|
|
662
|
-
scenarioId: import_zod22.z.string(),
|
|
663
|
-
scenarioName: import_zod22.z.string(),
|
|
664
|
-
modelConfig: ModelConfigSchema.optional(),
|
|
665
|
-
assertionResults: import_zod22.z.array(AssertionResultSchema),
|
|
666
|
-
metrics: EvalMetricsSchema.optional(),
|
|
667
|
-
passed: import_zod22.z.number(),
|
|
668
|
-
failed: import_zod22.z.number(),
|
|
669
|
-
passRate: import_zod22.z.number(),
|
|
670
|
-
duration: import_zod22.z.number(),
|
|
671
|
-
outputText: import_zod22.z.string().optional(),
|
|
672
|
-
files: import_zod22.z.array(ExpectedFileSchema).optional(),
|
|
673
|
-
startedAt: import_zod22.z.string().optional(),
|
|
674
|
-
completedAt: import_zod22.z.string().optional(),
|
|
675
|
-
llmTrace: LLMTraceSchema.optional()
|
|
676
|
-
});
|
|
677
|
-
var PromptResultSchema = import_zod22.z.object({
|
|
678
|
-
text: import_zod22.z.string(),
|
|
679
|
-
files: import_zod22.z.array(import_zod22.z.unknown()).optional(),
|
|
680
|
-
finishReason: import_zod22.z.string().optional(),
|
|
681
|
-
reasoning: import_zod22.z.string().optional(),
|
|
682
|
-
reasoningDetails: import_zod22.z.unknown().optional(),
|
|
683
|
-
toolCalls: import_zod22.z.array(import_zod22.z.unknown()).optional(),
|
|
684
|
-
toolResults: import_zod22.z.array(import_zod22.z.unknown()).optional(),
|
|
685
|
-
warnings: import_zod22.z.array(import_zod22.z.unknown()).optional(),
|
|
686
|
-
sources: import_zod22.z.array(import_zod22.z.unknown()).optional(),
|
|
687
|
-
steps: import_zod22.z.array(import_zod22.z.unknown()),
|
|
688
|
-
generationTimeMs: import_zod22.z.number(),
|
|
689
|
-
prompt: import_zod22.z.string(),
|
|
690
|
-
systemPrompt: import_zod22.z.string(),
|
|
691
|
-
usage: import_zod22.z.object({
|
|
692
|
-
totalTokens: import_zod22.z.number().optional(),
|
|
693
|
-
totalMicrocentsSpent: import_zod22.z.number().optional()
|
|
694
|
-
})
|
|
695
|
-
});
|
|
696
|
-
var EvaluationResultSchema = import_zod22.z.object({
|
|
697
|
-
id: import_zod22.z.string(),
|
|
698
|
-
runId: import_zod22.z.string(),
|
|
699
|
-
timestamp: import_zod22.z.number(),
|
|
700
|
-
promptResult: PromptResultSchema,
|
|
701
|
-
testResults: import_zod22.z.array(import_zod22.z.unknown()),
|
|
702
|
-
tags: import_zod22.z.array(import_zod22.z.string()).optional(),
|
|
703
|
-
feedback: import_zod22.z.string().optional(),
|
|
704
|
-
score: import_zod22.z.number(),
|
|
705
|
-
suiteId: import_zod22.z.string().optional()
|
|
706
|
-
});
|
|
707
|
-
var LeanEvaluationResultSchema = import_zod22.z.object({
|
|
708
|
-
id: import_zod22.z.string(),
|
|
709
|
-
runId: import_zod22.z.string(),
|
|
710
|
-
timestamp: import_zod22.z.number(),
|
|
711
|
-
tags: import_zod22.z.array(import_zod22.z.string()).optional(),
|
|
712
|
-
scenarioId: import_zod22.z.string(),
|
|
713
|
-
scenarioVersion: import_zod22.z.number().optional(),
|
|
714
|
-
targetId: import_zod22.z.string(),
|
|
715
|
-
targetVersion: import_zod22.z.number().optional(),
|
|
716
|
-
suiteId: import_zod22.z.string().optional(),
|
|
717
|
-
score: import_zod22.z.number(),
|
|
718
|
-
time: import_zod22.z.number().optional(),
|
|
719
|
-
microcentsSpent: import_zod22.z.number().optional()
|
|
720
|
-
});
|
|
637
|
+
var import_zod24 = require("zod");
|
|
721
638
|
|
|
722
639
|
// src/evaluation/eval-run.ts
|
|
723
|
-
var
|
|
640
|
+
var import_zod23 = require("zod");
|
|
724
641
|
|
|
725
642
|
// src/evaluation/live-trace.ts
|
|
726
|
-
var
|
|
643
|
+
var import_zod22 = require("zod");
|
|
727
644
|
var LiveTraceEventType = /* @__PURE__ */ ((LiveTraceEventType2) => {
|
|
728
645
|
LiveTraceEventType2["THINKING"] = "thinking";
|
|
729
646
|
LiveTraceEventType2["TOOL_USE"] = "tool_use";
|
|
730
647
|
LiveTraceEventType2["COMPLETION"] = "completion";
|
|
731
648
|
LiveTraceEventType2["TOOL_RESULT"] = "tool_result";
|
|
649
|
+
LiveTraceEventType2["DIAGNOSTIC"] = "diagnostic";
|
|
732
650
|
return LiveTraceEventType2;
|
|
733
651
|
})(LiveTraceEventType || {});
|
|
734
|
-
var LiveTraceEventSchema =
|
|
652
|
+
var LiveTraceEventSchema = import_zod22.z.object({
|
|
735
653
|
/** The evaluation run ID */
|
|
736
|
-
evalRunId:
|
|
654
|
+
evalRunId: import_zod22.z.string(),
|
|
737
655
|
/** The scenario ID being executed */
|
|
738
|
-
scenarioId:
|
|
656
|
+
scenarioId: import_zod22.z.string(),
|
|
739
657
|
/** The scenario name for display */
|
|
740
|
-
scenarioName:
|
|
658
|
+
scenarioName: import_zod22.z.string(),
|
|
741
659
|
/** The target ID (skill, agent, etc.) */
|
|
742
|
-
targetId:
|
|
660
|
+
targetId: import_zod22.z.string(),
|
|
743
661
|
/** The target name for display */
|
|
744
|
-
targetName:
|
|
662
|
+
targetName: import_zod22.z.string(),
|
|
745
663
|
/** Step number in the current scenario execution */
|
|
746
|
-
stepNumber:
|
|
664
|
+
stepNumber: import_zod22.z.number(),
|
|
747
665
|
/** Type of trace event */
|
|
748
|
-
type:
|
|
666
|
+
type: import_zod22.z.enum(LiveTraceEventType),
|
|
749
667
|
/** Tool name if this is a tool_use event */
|
|
750
|
-
toolName:
|
|
668
|
+
toolName: import_zod22.z.string().optional(),
|
|
751
669
|
/** Tool arguments preview (truncated JSON) */
|
|
752
|
-
toolArgs:
|
|
670
|
+
toolArgs: import_zod22.z.string().optional(),
|
|
753
671
|
/** Output preview (truncated text) */
|
|
754
|
-
outputPreview:
|
|
672
|
+
outputPreview: import_zod22.z.string().optional(),
|
|
755
673
|
/** Timestamp when this event occurred */
|
|
756
|
-
timestamp:
|
|
674
|
+
timestamp: import_zod22.z.string(),
|
|
757
675
|
/** Whether this is the final event for this scenario */
|
|
758
|
-
isComplete:
|
|
676
|
+
isComplete: import_zod22.z.boolean()
|
|
759
677
|
});
|
|
760
678
|
var TRACE_EVENT_PREFIX = "TRACE_EVENT:";
|
|
761
679
|
function parseTraceEventLine(line) {
|
|
@@ -783,14 +701,14 @@ var TriggerType = /* @__PURE__ */ ((TriggerType2) => {
|
|
|
783
701
|
TriggerType2["MANUAL"] = "MANUAL";
|
|
784
702
|
return TriggerType2;
|
|
785
703
|
})(TriggerType || {});
|
|
786
|
-
var TriggerMetadataSchema =
|
|
787
|
-
version:
|
|
788
|
-
resourceUpdated:
|
|
704
|
+
var TriggerMetadataSchema = import_zod23.z.object({
|
|
705
|
+
version: import_zod23.z.string().optional(),
|
|
706
|
+
resourceUpdated: import_zod23.z.array(import_zod23.z.string()).optional()
|
|
789
707
|
});
|
|
790
|
-
var TriggerSchema =
|
|
791
|
-
id:
|
|
708
|
+
var TriggerSchema = import_zod23.z.object({
|
|
709
|
+
id: import_zod23.z.string(),
|
|
792
710
|
metadata: TriggerMetadataSchema.optional(),
|
|
793
|
-
type:
|
|
711
|
+
type: import_zod23.z.enum(TriggerType)
|
|
794
712
|
});
|
|
795
713
|
var FailureCategory = /* @__PURE__ */ ((FailureCategory2) => {
|
|
796
714
|
FailureCategory2["MISSING_FILE"] = "missing_file";
|
|
@@ -808,89 +726,89 @@ var FailureSeverity = /* @__PURE__ */ ((FailureSeverity2) => {
|
|
|
808
726
|
FailureSeverity2["LOW"] = "low";
|
|
809
727
|
return FailureSeverity2;
|
|
810
728
|
})(FailureSeverity || {});
|
|
811
|
-
var DiffLineTypeSchema =
|
|
812
|
-
var DiffLineSchema =
|
|
729
|
+
var DiffLineTypeSchema = import_zod23.z.enum(["added", "removed", "unchanged"]);
|
|
730
|
+
var DiffLineSchema = import_zod23.z.object({
|
|
813
731
|
type: DiffLineTypeSchema,
|
|
814
|
-
content:
|
|
815
|
-
lineNumber:
|
|
816
|
-
});
|
|
817
|
-
var DiffContentSchema =
|
|
818
|
-
path:
|
|
819
|
-
expected:
|
|
820
|
-
actual:
|
|
821
|
-
diffLines:
|
|
822
|
-
});
|
|
823
|
-
var CommandExecutionSchema =
|
|
824
|
-
command:
|
|
825
|
-
exitCode:
|
|
826
|
-
output:
|
|
827
|
-
duration:
|
|
828
|
-
});
|
|
829
|
-
var FileModificationSchema =
|
|
830
|
-
path:
|
|
831
|
-
action:
|
|
832
|
-
});
|
|
833
|
-
var ApiCallSchema =
|
|
834
|
-
endpoint:
|
|
835
|
-
tokensUsed:
|
|
836
|
-
duration:
|
|
837
|
-
});
|
|
838
|
-
var ExecutionTraceSchema =
|
|
839
|
-
commands:
|
|
840
|
-
filesModified:
|
|
841
|
-
apiCalls:
|
|
842
|
-
totalDuration:
|
|
843
|
-
});
|
|
844
|
-
var FailureAnalysisSchema =
|
|
845
|
-
category:
|
|
846
|
-
severity:
|
|
847
|
-
summary:
|
|
848
|
-
details:
|
|
849
|
-
rootCause:
|
|
850
|
-
suggestedFix:
|
|
851
|
-
relatedAssertions:
|
|
852
|
-
codeSnippet:
|
|
853
|
-
similarIssues:
|
|
854
|
-
patternId:
|
|
732
|
+
content: import_zod23.z.string(),
|
|
733
|
+
lineNumber: import_zod23.z.number()
|
|
734
|
+
});
|
|
735
|
+
var DiffContentSchema = import_zod23.z.object({
|
|
736
|
+
path: import_zod23.z.string(),
|
|
737
|
+
expected: import_zod23.z.string(),
|
|
738
|
+
actual: import_zod23.z.string(),
|
|
739
|
+
diffLines: import_zod23.z.array(DiffLineSchema)
|
|
740
|
+
});
|
|
741
|
+
var CommandExecutionSchema = import_zod23.z.object({
|
|
742
|
+
command: import_zod23.z.string(),
|
|
743
|
+
exitCode: import_zod23.z.number(),
|
|
744
|
+
output: import_zod23.z.string().optional(),
|
|
745
|
+
duration: import_zod23.z.number()
|
|
746
|
+
});
|
|
747
|
+
var FileModificationSchema = import_zod23.z.object({
|
|
748
|
+
path: import_zod23.z.string(),
|
|
749
|
+
action: import_zod23.z.enum(["created", "modified", "deleted"])
|
|
750
|
+
});
|
|
751
|
+
var ApiCallSchema = import_zod23.z.object({
|
|
752
|
+
endpoint: import_zod23.z.string(),
|
|
753
|
+
tokensUsed: import_zod23.z.number(),
|
|
754
|
+
duration: import_zod23.z.number()
|
|
755
|
+
});
|
|
756
|
+
var ExecutionTraceSchema = import_zod23.z.object({
|
|
757
|
+
commands: import_zod23.z.array(CommandExecutionSchema),
|
|
758
|
+
filesModified: import_zod23.z.array(FileModificationSchema),
|
|
759
|
+
apiCalls: import_zod23.z.array(ApiCallSchema),
|
|
760
|
+
totalDuration: import_zod23.z.number()
|
|
761
|
+
});
|
|
762
|
+
var FailureAnalysisSchema = import_zod23.z.object({
|
|
763
|
+
category: import_zod23.z.enum(FailureCategory),
|
|
764
|
+
severity: import_zod23.z.enum(FailureSeverity),
|
|
765
|
+
summary: import_zod23.z.string(),
|
|
766
|
+
details: import_zod23.z.string(),
|
|
767
|
+
rootCause: import_zod23.z.string(),
|
|
768
|
+
suggestedFix: import_zod23.z.string(),
|
|
769
|
+
relatedAssertions: import_zod23.z.array(import_zod23.z.string()),
|
|
770
|
+
codeSnippet: import_zod23.z.string().optional(),
|
|
771
|
+
similarIssues: import_zod23.z.array(import_zod23.z.string()).optional(),
|
|
772
|
+
patternId: import_zod23.z.string().optional(),
|
|
855
773
|
// Extended fields for detailed debugging
|
|
856
774
|
diff: DiffContentSchema.optional(),
|
|
857
775
|
executionTrace: ExecutionTraceSchema.optional()
|
|
858
776
|
});
|
|
859
777
|
var EvalRunSchema = TenantEntitySchema.extend({
|
|
860
778
|
/** Agent ID for this run */
|
|
861
|
-
agentId:
|
|
779
|
+
agentId: import_zod23.z.string().optional(),
|
|
862
780
|
/** Skills group ID for this run */
|
|
863
|
-
skillsGroupId:
|
|
781
|
+
skillsGroupId: import_zod23.z.string().optional(),
|
|
864
782
|
/** Scenario IDs to run */
|
|
865
|
-
scenarioIds:
|
|
783
|
+
scenarioIds: import_zod23.z.array(import_zod23.z.string()),
|
|
866
784
|
/** Current status */
|
|
867
785
|
status: EvalStatusSchema,
|
|
868
786
|
/** Progress percentage (0-100) */
|
|
869
|
-
progress:
|
|
787
|
+
progress: import_zod23.z.number(),
|
|
870
788
|
/** Results for each scenario/target combination */
|
|
871
|
-
results:
|
|
789
|
+
results: import_zod23.z.array(EvalRunResultSchema),
|
|
872
790
|
/** Aggregated metrics across all results */
|
|
873
791
|
aggregateMetrics: EvalMetricsSchema,
|
|
874
792
|
/** Failure analyses */
|
|
875
|
-
failureAnalyses:
|
|
793
|
+
failureAnalyses: import_zod23.z.array(FailureAnalysisSchema).optional(),
|
|
876
794
|
/** Aggregated LLM trace summary */
|
|
877
795
|
llmTraceSummary: LLMTraceSummarySchema.optional(),
|
|
878
796
|
/** What triggered this run */
|
|
879
797
|
trigger: TriggerSchema.optional(),
|
|
880
798
|
/** When the run started (set when evaluation is triggered) */
|
|
881
|
-
startedAt:
|
|
799
|
+
startedAt: import_zod23.z.string().optional(),
|
|
882
800
|
/** When the run completed */
|
|
883
|
-
completedAt:
|
|
801
|
+
completedAt: import_zod23.z.string().optional(),
|
|
884
802
|
/** Live trace events captured during execution (for playback on results page) */
|
|
885
|
-
liveTraceEvents:
|
|
803
|
+
liveTraceEvents: import_zod23.z.array(LiveTraceEventSchema).optional(),
|
|
886
804
|
/** Remote job ID for tracking execution in Dev Machines */
|
|
887
|
-
jobId:
|
|
805
|
+
jobId: import_zod23.z.string().optional(),
|
|
888
806
|
/** Remote job status from the Dev Machine API (PENDING, RUNNING, COMPLETED, FAILED, CANCELLED) */
|
|
889
|
-
jobStatus:
|
|
807
|
+
jobStatus: import_zod23.z.string().optional(),
|
|
890
808
|
/** Remote job error message if the job failed */
|
|
891
|
-
jobError:
|
|
809
|
+
jobError: import_zod23.z.string().optional(),
|
|
892
810
|
/** Timestamp of the last job status check */
|
|
893
|
-
jobStatusCheckedAt:
|
|
811
|
+
jobStatusCheckedAt: import_zod23.z.string().optional()
|
|
894
812
|
});
|
|
895
813
|
var CreateEvalRunInputSchema = EvalRunSchema.omit({
|
|
896
814
|
id: true,
|
|
@@ -903,32 +821,119 @@ var CreateEvalRunInputSchema = EvalRunSchema.omit({
|
|
|
903
821
|
startedAt: true,
|
|
904
822
|
completedAt: true
|
|
905
823
|
});
|
|
906
|
-
var EvaluationProgressSchema =
|
|
907
|
-
runId:
|
|
908
|
-
targetId:
|
|
909
|
-
totalScenarios:
|
|
910
|
-
completedScenarios:
|
|
911
|
-
scenarioProgress:
|
|
912
|
-
|
|
913
|
-
scenarioId:
|
|
914
|
-
currentStep:
|
|
915
|
-
error:
|
|
824
|
+
var EvaluationProgressSchema = import_zod23.z.object({
|
|
825
|
+
runId: import_zod23.z.string(),
|
|
826
|
+
targetId: import_zod23.z.string(),
|
|
827
|
+
totalScenarios: import_zod23.z.number(),
|
|
828
|
+
completedScenarios: import_zod23.z.number(),
|
|
829
|
+
scenarioProgress: import_zod23.z.array(
|
|
830
|
+
import_zod23.z.object({
|
|
831
|
+
scenarioId: import_zod23.z.string(),
|
|
832
|
+
currentStep: import_zod23.z.string(),
|
|
833
|
+
error: import_zod23.z.string().optional()
|
|
916
834
|
})
|
|
917
835
|
),
|
|
918
|
-
createdAt:
|
|
836
|
+
createdAt: import_zod23.z.number()
|
|
919
837
|
});
|
|
920
|
-
var EvaluationLogSchema =
|
|
921
|
-
runId:
|
|
922
|
-
scenarioId:
|
|
923
|
-
log:
|
|
924
|
-
level:
|
|
925
|
-
message:
|
|
926
|
-
args:
|
|
927
|
-
error:
|
|
838
|
+
var EvaluationLogSchema = import_zod23.z.object({
|
|
839
|
+
runId: import_zod23.z.string(),
|
|
840
|
+
scenarioId: import_zod23.z.string(),
|
|
841
|
+
log: import_zod23.z.object({
|
|
842
|
+
level: import_zod23.z.enum(["info", "error", "debug"]),
|
|
843
|
+
message: import_zod23.z.string().optional(),
|
|
844
|
+
args: import_zod23.z.array(import_zod23.z.any()).optional(),
|
|
845
|
+
error: import_zod23.z.string().optional()
|
|
928
846
|
})
|
|
929
847
|
});
|
|
930
848
|
var LLM_TIMEOUT = 12e4;
|
|
931
849
|
|
|
850
|
+
// src/evaluation/eval-result.ts
|
|
851
|
+
var AssertionResultStatus = /* @__PURE__ */ ((AssertionResultStatus2) => {
|
|
852
|
+
AssertionResultStatus2["PASSED"] = "passed";
|
|
853
|
+
AssertionResultStatus2["FAILED"] = "failed";
|
|
854
|
+
AssertionResultStatus2["SKIPPED"] = "skipped";
|
|
855
|
+
AssertionResultStatus2["ERROR"] = "error";
|
|
856
|
+
return AssertionResultStatus2;
|
|
857
|
+
})(AssertionResultStatus || {});
|
|
858
|
+
var AssertionResultSchema = import_zod24.z.object({
|
|
859
|
+
id: import_zod24.z.string(),
|
|
860
|
+
assertionId: import_zod24.z.string(),
|
|
861
|
+
assertionType: import_zod24.z.string(),
|
|
862
|
+
assertionName: import_zod24.z.string(),
|
|
863
|
+
status: import_zod24.z.enum(AssertionResultStatus),
|
|
864
|
+
message: import_zod24.z.string().optional(),
|
|
865
|
+
expected: import_zod24.z.string().optional(),
|
|
866
|
+
actual: import_zod24.z.string().optional(),
|
|
867
|
+
duration: import_zod24.z.number().optional(),
|
|
868
|
+
details: import_zod24.z.record(import_zod24.z.string(), import_zod24.z.unknown()).optional(),
|
|
869
|
+
llmTraceSteps: import_zod24.z.array(LLMTraceStepSchema).optional()
|
|
870
|
+
});
|
|
871
|
+
var EvalRunResultSchema = import_zod24.z.object({
|
|
872
|
+
id: import_zod24.z.string(),
|
|
873
|
+
targetId: import_zod24.z.string(),
|
|
874
|
+
targetName: import_zod24.z.string().optional(),
|
|
875
|
+
scenarioId: import_zod24.z.string(),
|
|
876
|
+
scenarioName: import_zod24.z.string(),
|
|
877
|
+
modelConfig: ModelConfigSchema.optional(),
|
|
878
|
+
assertionResults: import_zod24.z.array(AssertionResultSchema),
|
|
879
|
+
metrics: EvalMetricsSchema.optional(),
|
|
880
|
+
passed: import_zod24.z.number(),
|
|
881
|
+
failed: import_zod24.z.number(),
|
|
882
|
+
passRate: import_zod24.z.number(),
|
|
883
|
+
duration: import_zod24.z.number(),
|
|
884
|
+
outputText: import_zod24.z.string().optional(),
|
|
885
|
+
files: import_zod24.z.array(ExpectedFileSchema).optional(),
|
|
886
|
+
/** File diffs showing changes made by the agent during execution */
|
|
887
|
+
fileDiffs: import_zod24.z.array(DiffContentSchema).optional(),
|
|
888
|
+
startedAt: import_zod24.z.string().optional(),
|
|
889
|
+
completedAt: import_zod24.z.string().optional(),
|
|
890
|
+
llmTrace: LLMTraceSchema.optional()
|
|
891
|
+
});
|
|
892
|
+
var PromptResultSchema = import_zod24.z.object({
|
|
893
|
+
text: import_zod24.z.string(),
|
|
894
|
+
files: import_zod24.z.array(import_zod24.z.unknown()).optional(),
|
|
895
|
+
finishReason: import_zod24.z.string().optional(),
|
|
896
|
+
reasoning: import_zod24.z.string().optional(),
|
|
897
|
+
reasoningDetails: import_zod24.z.unknown().optional(),
|
|
898
|
+
toolCalls: import_zod24.z.array(import_zod24.z.unknown()).optional(),
|
|
899
|
+
toolResults: import_zod24.z.array(import_zod24.z.unknown()).optional(),
|
|
900
|
+
warnings: import_zod24.z.array(import_zod24.z.unknown()).optional(),
|
|
901
|
+
sources: import_zod24.z.array(import_zod24.z.unknown()).optional(),
|
|
902
|
+
steps: import_zod24.z.array(import_zod24.z.unknown()),
|
|
903
|
+
generationTimeMs: import_zod24.z.number(),
|
|
904
|
+
prompt: import_zod24.z.string(),
|
|
905
|
+
systemPrompt: import_zod24.z.string(),
|
|
906
|
+
usage: import_zod24.z.object({
|
|
907
|
+
totalTokens: import_zod24.z.number().optional(),
|
|
908
|
+
totalMicrocentsSpent: import_zod24.z.number().optional()
|
|
909
|
+
})
|
|
910
|
+
});
|
|
911
|
+
var EvaluationResultSchema = import_zod24.z.object({
|
|
912
|
+
id: import_zod24.z.string(),
|
|
913
|
+
runId: import_zod24.z.string(),
|
|
914
|
+
timestamp: import_zod24.z.number(),
|
|
915
|
+
promptResult: PromptResultSchema,
|
|
916
|
+
testResults: import_zod24.z.array(import_zod24.z.unknown()),
|
|
917
|
+
tags: import_zod24.z.array(import_zod24.z.string()).optional(),
|
|
918
|
+
feedback: import_zod24.z.string().optional(),
|
|
919
|
+
score: import_zod24.z.number(),
|
|
920
|
+
suiteId: import_zod24.z.string().optional()
|
|
921
|
+
});
|
|
922
|
+
var LeanEvaluationResultSchema = import_zod24.z.object({
|
|
923
|
+
id: import_zod24.z.string(),
|
|
924
|
+
runId: import_zod24.z.string(),
|
|
925
|
+
timestamp: import_zod24.z.number(),
|
|
926
|
+
tags: import_zod24.z.array(import_zod24.z.string()).optional(),
|
|
927
|
+
scenarioId: import_zod24.z.string(),
|
|
928
|
+
scenarioVersion: import_zod24.z.number().optional(),
|
|
929
|
+
targetId: import_zod24.z.string(),
|
|
930
|
+
targetVersion: import_zod24.z.number().optional(),
|
|
931
|
+
suiteId: import_zod24.z.string().optional(),
|
|
932
|
+
score: import_zod24.z.number(),
|
|
933
|
+
time: import_zod24.z.number().optional(),
|
|
934
|
+
microcentsSpent: import_zod24.z.number().optional()
|
|
935
|
+
});
|
|
936
|
+
|
|
932
937
|
// src/project/project.ts
|
|
933
938
|
var import_zod25 = require("zod");
|
|
934
939
|
var ProjectSchema = BaseEntitySchema.extend({
|