@wix/evalforge-types 0.5.0 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/build/index.js +184 -180
- package/build/index.js.map +3 -3
- package/build/index.mjs +184 -180
- package/build/index.mjs.map +3 -3
- package/build/types/evaluation/eval-result.d.ts +14 -0
- package/build/types/evaluation/eval-run.d.ts +14 -0
- package/package.json +2 -2
package/build/index.js
CHANGED
|
@@ -634,96 +634,13 @@ var LLMTraceSchema = import_zod21.z.object({
|
|
|
634
634
|
});
|
|
635
635
|
|
|
636
636
|
// src/evaluation/eval-result.ts
|
|
637
|
-
var
|
|
638
|
-
var AssertionResultStatus = /* @__PURE__ */ ((AssertionResultStatus2) => {
|
|
639
|
-
AssertionResultStatus2["PASSED"] = "passed";
|
|
640
|
-
AssertionResultStatus2["FAILED"] = "failed";
|
|
641
|
-
AssertionResultStatus2["SKIPPED"] = "skipped";
|
|
642
|
-
AssertionResultStatus2["ERROR"] = "error";
|
|
643
|
-
return AssertionResultStatus2;
|
|
644
|
-
})(AssertionResultStatus || {});
|
|
645
|
-
var AssertionResultSchema = import_zod22.z.object({
|
|
646
|
-
id: import_zod22.z.string(),
|
|
647
|
-
assertionId: import_zod22.z.string(),
|
|
648
|
-
assertionType: import_zod22.z.string(),
|
|
649
|
-
assertionName: import_zod22.z.string(),
|
|
650
|
-
status: import_zod22.z.enum(AssertionResultStatus),
|
|
651
|
-
message: import_zod22.z.string().optional(),
|
|
652
|
-
expected: import_zod22.z.string().optional(),
|
|
653
|
-
actual: import_zod22.z.string().optional(),
|
|
654
|
-
duration: import_zod22.z.number().optional(),
|
|
655
|
-
details: import_zod22.z.record(import_zod22.z.string(), import_zod22.z.unknown()).optional(),
|
|
656
|
-
llmTraceSteps: import_zod22.z.array(LLMTraceStepSchema).optional()
|
|
657
|
-
});
|
|
658
|
-
var EvalRunResultSchema = import_zod22.z.object({
|
|
659
|
-
id: import_zod22.z.string(),
|
|
660
|
-
targetId: import_zod22.z.string(),
|
|
661
|
-
targetName: import_zod22.z.string().optional(),
|
|
662
|
-
scenarioId: import_zod22.z.string(),
|
|
663
|
-
scenarioName: import_zod22.z.string(),
|
|
664
|
-
modelConfig: ModelConfigSchema.optional(),
|
|
665
|
-
assertionResults: import_zod22.z.array(AssertionResultSchema),
|
|
666
|
-
metrics: EvalMetricsSchema.optional(),
|
|
667
|
-
passed: import_zod22.z.number(),
|
|
668
|
-
failed: import_zod22.z.number(),
|
|
669
|
-
passRate: import_zod22.z.number(),
|
|
670
|
-
duration: import_zod22.z.number(),
|
|
671
|
-
outputText: import_zod22.z.string().optional(),
|
|
672
|
-
files: import_zod22.z.array(ExpectedFileSchema).optional(),
|
|
673
|
-
startedAt: import_zod22.z.string().optional(),
|
|
674
|
-
completedAt: import_zod22.z.string().optional(),
|
|
675
|
-
llmTrace: LLMTraceSchema.optional()
|
|
676
|
-
});
|
|
677
|
-
var PromptResultSchema = import_zod22.z.object({
|
|
678
|
-
text: import_zod22.z.string(),
|
|
679
|
-
files: import_zod22.z.array(import_zod22.z.unknown()).optional(),
|
|
680
|
-
finishReason: import_zod22.z.string().optional(),
|
|
681
|
-
reasoning: import_zod22.z.string().optional(),
|
|
682
|
-
reasoningDetails: import_zod22.z.unknown().optional(),
|
|
683
|
-
toolCalls: import_zod22.z.array(import_zod22.z.unknown()).optional(),
|
|
684
|
-
toolResults: import_zod22.z.array(import_zod22.z.unknown()).optional(),
|
|
685
|
-
warnings: import_zod22.z.array(import_zod22.z.unknown()).optional(),
|
|
686
|
-
sources: import_zod22.z.array(import_zod22.z.unknown()).optional(),
|
|
687
|
-
steps: import_zod22.z.array(import_zod22.z.unknown()),
|
|
688
|
-
generationTimeMs: import_zod22.z.number(),
|
|
689
|
-
prompt: import_zod22.z.string(),
|
|
690
|
-
systemPrompt: import_zod22.z.string(),
|
|
691
|
-
usage: import_zod22.z.object({
|
|
692
|
-
totalTokens: import_zod22.z.number().optional(),
|
|
693
|
-
totalMicrocentsSpent: import_zod22.z.number().optional()
|
|
694
|
-
})
|
|
695
|
-
});
|
|
696
|
-
var EvaluationResultSchema = import_zod22.z.object({
|
|
697
|
-
id: import_zod22.z.string(),
|
|
698
|
-
runId: import_zod22.z.string(),
|
|
699
|
-
timestamp: import_zod22.z.number(),
|
|
700
|
-
promptResult: PromptResultSchema,
|
|
701
|
-
testResults: import_zod22.z.array(import_zod22.z.unknown()),
|
|
702
|
-
tags: import_zod22.z.array(import_zod22.z.string()).optional(),
|
|
703
|
-
feedback: import_zod22.z.string().optional(),
|
|
704
|
-
score: import_zod22.z.number(),
|
|
705
|
-
suiteId: import_zod22.z.string().optional()
|
|
706
|
-
});
|
|
707
|
-
var LeanEvaluationResultSchema = import_zod22.z.object({
|
|
708
|
-
id: import_zod22.z.string(),
|
|
709
|
-
runId: import_zod22.z.string(),
|
|
710
|
-
timestamp: import_zod22.z.number(),
|
|
711
|
-
tags: import_zod22.z.array(import_zod22.z.string()).optional(),
|
|
712
|
-
scenarioId: import_zod22.z.string(),
|
|
713
|
-
scenarioVersion: import_zod22.z.number().optional(),
|
|
714
|
-
targetId: import_zod22.z.string(),
|
|
715
|
-
targetVersion: import_zod22.z.number().optional(),
|
|
716
|
-
suiteId: import_zod22.z.string().optional(),
|
|
717
|
-
score: import_zod22.z.number(),
|
|
718
|
-
time: import_zod22.z.number().optional(),
|
|
719
|
-
microcentsSpent: import_zod22.z.number().optional()
|
|
720
|
-
});
|
|
637
|
+
var import_zod24 = require("zod");
|
|
721
638
|
|
|
722
639
|
// src/evaluation/eval-run.ts
|
|
723
|
-
var
|
|
640
|
+
var import_zod23 = require("zod");
|
|
724
641
|
|
|
725
642
|
// src/evaluation/live-trace.ts
|
|
726
|
-
var
|
|
643
|
+
var import_zod22 = require("zod");
|
|
727
644
|
var LiveTraceEventType = /* @__PURE__ */ ((LiveTraceEventType2) => {
|
|
728
645
|
LiveTraceEventType2["THINKING"] = "thinking";
|
|
729
646
|
LiveTraceEventType2["TOOL_USE"] = "tool_use";
|
|
@@ -732,31 +649,31 @@ var LiveTraceEventType = /* @__PURE__ */ ((LiveTraceEventType2) => {
|
|
|
732
649
|
LiveTraceEventType2["DIAGNOSTIC"] = "diagnostic";
|
|
733
650
|
return LiveTraceEventType2;
|
|
734
651
|
})(LiveTraceEventType || {});
|
|
735
|
-
var LiveTraceEventSchema =
|
|
652
|
+
var LiveTraceEventSchema = import_zod22.z.object({
|
|
736
653
|
/** The evaluation run ID */
|
|
737
|
-
evalRunId:
|
|
654
|
+
evalRunId: import_zod22.z.string(),
|
|
738
655
|
/** The scenario ID being executed */
|
|
739
|
-
scenarioId:
|
|
656
|
+
scenarioId: import_zod22.z.string(),
|
|
740
657
|
/** The scenario name for display */
|
|
741
|
-
scenarioName:
|
|
658
|
+
scenarioName: import_zod22.z.string(),
|
|
742
659
|
/** The target ID (skill, agent, etc.) */
|
|
743
|
-
targetId:
|
|
660
|
+
targetId: import_zod22.z.string(),
|
|
744
661
|
/** The target name for display */
|
|
745
|
-
targetName:
|
|
662
|
+
targetName: import_zod22.z.string(),
|
|
746
663
|
/** Step number in the current scenario execution */
|
|
747
|
-
stepNumber:
|
|
664
|
+
stepNumber: import_zod22.z.number(),
|
|
748
665
|
/** Type of trace event */
|
|
749
|
-
type:
|
|
666
|
+
type: import_zod22.z.enum(LiveTraceEventType),
|
|
750
667
|
/** Tool name if this is a tool_use event */
|
|
751
|
-
toolName:
|
|
668
|
+
toolName: import_zod22.z.string().optional(),
|
|
752
669
|
/** Tool arguments preview (truncated JSON) */
|
|
753
|
-
toolArgs:
|
|
670
|
+
toolArgs: import_zod22.z.string().optional(),
|
|
754
671
|
/** Output preview (truncated text) */
|
|
755
|
-
outputPreview:
|
|
672
|
+
outputPreview: import_zod22.z.string().optional(),
|
|
756
673
|
/** Timestamp when this event occurred */
|
|
757
|
-
timestamp:
|
|
674
|
+
timestamp: import_zod22.z.string(),
|
|
758
675
|
/** Whether this is the final event for this scenario */
|
|
759
|
-
isComplete:
|
|
676
|
+
isComplete: import_zod22.z.boolean()
|
|
760
677
|
});
|
|
761
678
|
var TRACE_EVENT_PREFIX = "TRACE_EVENT:";
|
|
762
679
|
function parseTraceEventLine(line) {
|
|
@@ -784,14 +701,14 @@ var TriggerType = /* @__PURE__ */ ((TriggerType2) => {
|
|
|
784
701
|
TriggerType2["MANUAL"] = "MANUAL";
|
|
785
702
|
return TriggerType2;
|
|
786
703
|
})(TriggerType || {});
|
|
787
|
-
var TriggerMetadataSchema =
|
|
788
|
-
version:
|
|
789
|
-
resourceUpdated:
|
|
704
|
+
var TriggerMetadataSchema = import_zod23.z.object({
|
|
705
|
+
version: import_zod23.z.string().optional(),
|
|
706
|
+
resourceUpdated: import_zod23.z.array(import_zod23.z.string()).optional()
|
|
790
707
|
});
|
|
791
|
-
var TriggerSchema =
|
|
792
|
-
id:
|
|
708
|
+
var TriggerSchema = import_zod23.z.object({
|
|
709
|
+
id: import_zod23.z.string(),
|
|
793
710
|
metadata: TriggerMetadataSchema.optional(),
|
|
794
|
-
type:
|
|
711
|
+
type: import_zod23.z.enum(TriggerType)
|
|
795
712
|
});
|
|
796
713
|
var FailureCategory = /* @__PURE__ */ ((FailureCategory2) => {
|
|
797
714
|
FailureCategory2["MISSING_FILE"] = "missing_file";
|
|
@@ -809,89 +726,89 @@ var FailureSeverity = /* @__PURE__ */ ((FailureSeverity2) => {
|
|
|
809
726
|
FailureSeverity2["LOW"] = "low";
|
|
810
727
|
return FailureSeverity2;
|
|
811
728
|
})(FailureSeverity || {});
|
|
812
|
-
var DiffLineTypeSchema =
|
|
813
|
-
var DiffLineSchema =
|
|
729
|
+
var DiffLineTypeSchema = import_zod23.z.enum(["added", "removed", "unchanged"]);
|
|
730
|
+
var DiffLineSchema = import_zod23.z.object({
|
|
814
731
|
type: DiffLineTypeSchema,
|
|
815
|
-
content:
|
|
816
|
-
lineNumber:
|
|
817
|
-
});
|
|
818
|
-
var DiffContentSchema =
|
|
819
|
-
path:
|
|
820
|
-
expected:
|
|
821
|
-
actual:
|
|
822
|
-
diffLines:
|
|
823
|
-
});
|
|
824
|
-
var CommandExecutionSchema =
|
|
825
|
-
command:
|
|
826
|
-
exitCode:
|
|
827
|
-
output:
|
|
828
|
-
duration:
|
|
829
|
-
});
|
|
830
|
-
var FileModificationSchema =
|
|
831
|
-
path:
|
|
832
|
-
action:
|
|
833
|
-
});
|
|
834
|
-
var ApiCallSchema =
|
|
835
|
-
endpoint:
|
|
836
|
-
tokensUsed:
|
|
837
|
-
duration:
|
|
838
|
-
});
|
|
839
|
-
var ExecutionTraceSchema =
|
|
840
|
-
commands:
|
|
841
|
-
filesModified:
|
|
842
|
-
apiCalls:
|
|
843
|
-
totalDuration:
|
|
844
|
-
});
|
|
845
|
-
var FailureAnalysisSchema =
|
|
846
|
-
category:
|
|
847
|
-
severity:
|
|
848
|
-
summary:
|
|
849
|
-
details:
|
|
850
|
-
rootCause:
|
|
851
|
-
suggestedFix:
|
|
852
|
-
relatedAssertions:
|
|
853
|
-
codeSnippet:
|
|
854
|
-
similarIssues:
|
|
855
|
-
patternId:
|
|
732
|
+
content: import_zod23.z.string(),
|
|
733
|
+
lineNumber: import_zod23.z.number()
|
|
734
|
+
});
|
|
735
|
+
var DiffContentSchema = import_zod23.z.object({
|
|
736
|
+
path: import_zod23.z.string(),
|
|
737
|
+
expected: import_zod23.z.string(),
|
|
738
|
+
actual: import_zod23.z.string(),
|
|
739
|
+
diffLines: import_zod23.z.array(DiffLineSchema)
|
|
740
|
+
});
|
|
741
|
+
var CommandExecutionSchema = import_zod23.z.object({
|
|
742
|
+
command: import_zod23.z.string(),
|
|
743
|
+
exitCode: import_zod23.z.number(),
|
|
744
|
+
output: import_zod23.z.string().optional(),
|
|
745
|
+
duration: import_zod23.z.number()
|
|
746
|
+
});
|
|
747
|
+
var FileModificationSchema = import_zod23.z.object({
|
|
748
|
+
path: import_zod23.z.string(),
|
|
749
|
+
action: import_zod23.z.enum(["created", "modified", "deleted"])
|
|
750
|
+
});
|
|
751
|
+
var ApiCallSchema = import_zod23.z.object({
|
|
752
|
+
endpoint: import_zod23.z.string(),
|
|
753
|
+
tokensUsed: import_zod23.z.number(),
|
|
754
|
+
duration: import_zod23.z.number()
|
|
755
|
+
});
|
|
756
|
+
var ExecutionTraceSchema = import_zod23.z.object({
|
|
757
|
+
commands: import_zod23.z.array(CommandExecutionSchema),
|
|
758
|
+
filesModified: import_zod23.z.array(FileModificationSchema),
|
|
759
|
+
apiCalls: import_zod23.z.array(ApiCallSchema),
|
|
760
|
+
totalDuration: import_zod23.z.number()
|
|
761
|
+
});
|
|
762
|
+
var FailureAnalysisSchema = import_zod23.z.object({
|
|
763
|
+
category: import_zod23.z.enum(FailureCategory),
|
|
764
|
+
severity: import_zod23.z.enum(FailureSeverity),
|
|
765
|
+
summary: import_zod23.z.string(),
|
|
766
|
+
details: import_zod23.z.string(),
|
|
767
|
+
rootCause: import_zod23.z.string(),
|
|
768
|
+
suggestedFix: import_zod23.z.string(),
|
|
769
|
+
relatedAssertions: import_zod23.z.array(import_zod23.z.string()),
|
|
770
|
+
codeSnippet: import_zod23.z.string().optional(),
|
|
771
|
+
similarIssues: import_zod23.z.array(import_zod23.z.string()).optional(),
|
|
772
|
+
patternId: import_zod23.z.string().optional(),
|
|
856
773
|
// Extended fields for detailed debugging
|
|
857
774
|
diff: DiffContentSchema.optional(),
|
|
858
775
|
executionTrace: ExecutionTraceSchema.optional()
|
|
859
776
|
});
|
|
860
777
|
var EvalRunSchema = TenantEntitySchema.extend({
|
|
861
778
|
/** Agent ID for this run */
|
|
862
|
-
agentId:
|
|
779
|
+
agentId: import_zod23.z.string().optional(),
|
|
863
780
|
/** Skills group ID for this run */
|
|
864
|
-
skillsGroupId:
|
|
781
|
+
skillsGroupId: import_zod23.z.string().optional(),
|
|
865
782
|
/** Scenario IDs to run */
|
|
866
|
-
scenarioIds:
|
|
783
|
+
scenarioIds: import_zod23.z.array(import_zod23.z.string()),
|
|
867
784
|
/** Current status */
|
|
868
785
|
status: EvalStatusSchema,
|
|
869
786
|
/** Progress percentage (0-100) */
|
|
870
|
-
progress:
|
|
787
|
+
progress: import_zod23.z.number(),
|
|
871
788
|
/** Results for each scenario/target combination */
|
|
872
|
-
results:
|
|
789
|
+
results: import_zod23.z.array(EvalRunResultSchema),
|
|
873
790
|
/** Aggregated metrics across all results */
|
|
874
791
|
aggregateMetrics: EvalMetricsSchema,
|
|
875
792
|
/** Failure analyses */
|
|
876
|
-
failureAnalyses:
|
|
793
|
+
failureAnalyses: import_zod23.z.array(FailureAnalysisSchema).optional(),
|
|
877
794
|
/** Aggregated LLM trace summary */
|
|
878
795
|
llmTraceSummary: LLMTraceSummarySchema.optional(),
|
|
879
796
|
/** What triggered this run */
|
|
880
797
|
trigger: TriggerSchema.optional(),
|
|
881
798
|
/** When the run started (set when evaluation is triggered) */
|
|
882
|
-
startedAt:
|
|
799
|
+
startedAt: import_zod23.z.string().optional(),
|
|
883
800
|
/** When the run completed */
|
|
884
|
-
completedAt:
|
|
801
|
+
completedAt: import_zod23.z.string().optional(),
|
|
885
802
|
/** Live trace events captured during execution (for playback on results page) */
|
|
886
|
-
liveTraceEvents:
|
|
803
|
+
liveTraceEvents: import_zod23.z.array(LiveTraceEventSchema).optional(),
|
|
887
804
|
/** Remote job ID for tracking execution in Dev Machines */
|
|
888
|
-
jobId:
|
|
805
|
+
jobId: import_zod23.z.string().optional(),
|
|
889
806
|
/** Remote job status from the Dev Machine API (PENDING, RUNNING, COMPLETED, FAILED, CANCELLED) */
|
|
890
|
-
jobStatus:
|
|
807
|
+
jobStatus: import_zod23.z.string().optional(),
|
|
891
808
|
/** Remote job error message if the job failed */
|
|
892
|
-
jobError:
|
|
809
|
+
jobError: import_zod23.z.string().optional(),
|
|
893
810
|
/** Timestamp of the last job status check */
|
|
894
|
-
jobStatusCheckedAt:
|
|
811
|
+
jobStatusCheckedAt: import_zod23.z.string().optional()
|
|
895
812
|
});
|
|
896
813
|
var CreateEvalRunInputSchema = EvalRunSchema.omit({
|
|
897
814
|
id: true,
|
|
@@ -904,32 +821,119 @@ var CreateEvalRunInputSchema = EvalRunSchema.omit({
|
|
|
904
821
|
startedAt: true,
|
|
905
822
|
completedAt: true
|
|
906
823
|
});
|
|
907
|
-
var EvaluationProgressSchema =
|
|
908
|
-
runId:
|
|
909
|
-
targetId:
|
|
910
|
-
totalScenarios:
|
|
911
|
-
completedScenarios:
|
|
912
|
-
scenarioProgress:
|
|
913
|
-
|
|
914
|
-
scenarioId:
|
|
915
|
-
currentStep:
|
|
916
|
-
error:
|
|
824
|
+
var EvaluationProgressSchema = import_zod23.z.object({
|
|
825
|
+
runId: import_zod23.z.string(),
|
|
826
|
+
targetId: import_zod23.z.string(),
|
|
827
|
+
totalScenarios: import_zod23.z.number(),
|
|
828
|
+
completedScenarios: import_zod23.z.number(),
|
|
829
|
+
scenarioProgress: import_zod23.z.array(
|
|
830
|
+
import_zod23.z.object({
|
|
831
|
+
scenarioId: import_zod23.z.string(),
|
|
832
|
+
currentStep: import_zod23.z.string(),
|
|
833
|
+
error: import_zod23.z.string().optional()
|
|
917
834
|
})
|
|
918
835
|
),
|
|
919
|
-
createdAt:
|
|
836
|
+
createdAt: import_zod23.z.number()
|
|
920
837
|
});
|
|
921
|
-
var EvaluationLogSchema =
|
|
922
|
-
runId:
|
|
923
|
-
scenarioId:
|
|
924
|
-
log:
|
|
925
|
-
level:
|
|
926
|
-
message:
|
|
927
|
-
args:
|
|
928
|
-
error:
|
|
838
|
+
var EvaluationLogSchema = import_zod23.z.object({
|
|
839
|
+
runId: import_zod23.z.string(),
|
|
840
|
+
scenarioId: import_zod23.z.string(),
|
|
841
|
+
log: import_zod23.z.object({
|
|
842
|
+
level: import_zod23.z.enum(["info", "error", "debug"]),
|
|
843
|
+
message: import_zod23.z.string().optional(),
|
|
844
|
+
args: import_zod23.z.array(import_zod23.z.any()).optional(),
|
|
845
|
+
error: import_zod23.z.string().optional()
|
|
929
846
|
})
|
|
930
847
|
});
|
|
931
848
|
var LLM_TIMEOUT = 12e4;
|
|
932
849
|
|
|
850
|
+
// src/evaluation/eval-result.ts
|
|
851
|
+
var AssertionResultStatus = /* @__PURE__ */ ((AssertionResultStatus2) => {
|
|
852
|
+
AssertionResultStatus2["PASSED"] = "passed";
|
|
853
|
+
AssertionResultStatus2["FAILED"] = "failed";
|
|
854
|
+
AssertionResultStatus2["SKIPPED"] = "skipped";
|
|
855
|
+
AssertionResultStatus2["ERROR"] = "error";
|
|
856
|
+
return AssertionResultStatus2;
|
|
857
|
+
})(AssertionResultStatus || {});
|
|
858
|
+
var AssertionResultSchema = import_zod24.z.object({
|
|
859
|
+
id: import_zod24.z.string(),
|
|
860
|
+
assertionId: import_zod24.z.string(),
|
|
861
|
+
assertionType: import_zod24.z.string(),
|
|
862
|
+
assertionName: import_zod24.z.string(),
|
|
863
|
+
status: import_zod24.z.enum(AssertionResultStatus),
|
|
864
|
+
message: import_zod24.z.string().optional(),
|
|
865
|
+
expected: import_zod24.z.string().optional(),
|
|
866
|
+
actual: import_zod24.z.string().optional(),
|
|
867
|
+
duration: import_zod24.z.number().optional(),
|
|
868
|
+
details: import_zod24.z.record(import_zod24.z.string(), import_zod24.z.unknown()).optional(),
|
|
869
|
+
llmTraceSteps: import_zod24.z.array(LLMTraceStepSchema).optional()
|
|
870
|
+
});
|
|
871
|
+
var EvalRunResultSchema = import_zod24.z.object({
|
|
872
|
+
id: import_zod24.z.string(),
|
|
873
|
+
targetId: import_zod24.z.string(),
|
|
874
|
+
targetName: import_zod24.z.string().optional(),
|
|
875
|
+
scenarioId: import_zod24.z.string(),
|
|
876
|
+
scenarioName: import_zod24.z.string(),
|
|
877
|
+
modelConfig: ModelConfigSchema.optional(),
|
|
878
|
+
assertionResults: import_zod24.z.array(AssertionResultSchema),
|
|
879
|
+
metrics: EvalMetricsSchema.optional(),
|
|
880
|
+
passed: import_zod24.z.number(),
|
|
881
|
+
failed: import_zod24.z.number(),
|
|
882
|
+
passRate: import_zod24.z.number(),
|
|
883
|
+
duration: import_zod24.z.number(),
|
|
884
|
+
outputText: import_zod24.z.string().optional(),
|
|
885
|
+
files: import_zod24.z.array(ExpectedFileSchema).optional(),
|
|
886
|
+
/** File diffs showing changes made by the agent during execution */
|
|
887
|
+
fileDiffs: import_zod24.z.array(DiffContentSchema).optional(),
|
|
888
|
+
startedAt: import_zod24.z.string().optional(),
|
|
889
|
+
completedAt: import_zod24.z.string().optional(),
|
|
890
|
+
llmTrace: LLMTraceSchema.optional()
|
|
891
|
+
});
|
|
892
|
+
var PromptResultSchema = import_zod24.z.object({
|
|
893
|
+
text: import_zod24.z.string(),
|
|
894
|
+
files: import_zod24.z.array(import_zod24.z.unknown()).optional(),
|
|
895
|
+
finishReason: import_zod24.z.string().optional(),
|
|
896
|
+
reasoning: import_zod24.z.string().optional(),
|
|
897
|
+
reasoningDetails: import_zod24.z.unknown().optional(),
|
|
898
|
+
toolCalls: import_zod24.z.array(import_zod24.z.unknown()).optional(),
|
|
899
|
+
toolResults: import_zod24.z.array(import_zod24.z.unknown()).optional(),
|
|
900
|
+
warnings: import_zod24.z.array(import_zod24.z.unknown()).optional(),
|
|
901
|
+
sources: import_zod24.z.array(import_zod24.z.unknown()).optional(),
|
|
902
|
+
steps: import_zod24.z.array(import_zod24.z.unknown()),
|
|
903
|
+
generationTimeMs: import_zod24.z.number(),
|
|
904
|
+
prompt: import_zod24.z.string(),
|
|
905
|
+
systemPrompt: import_zod24.z.string(),
|
|
906
|
+
usage: import_zod24.z.object({
|
|
907
|
+
totalTokens: import_zod24.z.number().optional(),
|
|
908
|
+
totalMicrocentsSpent: import_zod24.z.number().optional()
|
|
909
|
+
})
|
|
910
|
+
});
|
|
911
|
+
var EvaluationResultSchema = import_zod24.z.object({
|
|
912
|
+
id: import_zod24.z.string(),
|
|
913
|
+
runId: import_zod24.z.string(),
|
|
914
|
+
timestamp: import_zod24.z.number(),
|
|
915
|
+
promptResult: PromptResultSchema,
|
|
916
|
+
testResults: import_zod24.z.array(import_zod24.z.unknown()),
|
|
917
|
+
tags: import_zod24.z.array(import_zod24.z.string()).optional(),
|
|
918
|
+
feedback: import_zod24.z.string().optional(),
|
|
919
|
+
score: import_zod24.z.number(),
|
|
920
|
+
suiteId: import_zod24.z.string().optional()
|
|
921
|
+
});
|
|
922
|
+
var LeanEvaluationResultSchema = import_zod24.z.object({
|
|
923
|
+
id: import_zod24.z.string(),
|
|
924
|
+
runId: import_zod24.z.string(),
|
|
925
|
+
timestamp: import_zod24.z.number(),
|
|
926
|
+
tags: import_zod24.z.array(import_zod24.z.string()).optional(),
|
|
927
|
+
scenarioId: import_zod24.z.string(),
|
|
928
|
+
scenarioVersion: import_zod24.z.number().optional(),
|
|
929
|
+
targetId: import_zod24.z.string(),
|
|
930
|
+
targetVersion: import_zod24.z.number().optional(),
|
|
931
|
+
suiteId: import_zod24.z.string().optional(),
|
|
932
|
+
score: import_zod24.z.number(),
|
|
933
|
+
time: import_zod24.z.number().optional(),
|
|
934
|
+
microcentsSpent: import_zod24.z.number().optional()
|
|
935
|
+
});
|
|
936
|
+
|
|
933
937
|
// src/project/project.ts
|
|
934
938
|
var import_zod25 = require("zod");
|
|
935
939
|
var ProjectSchema = BaseEntitySchema.extend({
|