@wix/evalforge-types 0.5.0 → 0.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/build/index.js +209 -163
- package/build/index.js.map +4 -4
- package/build/index.mjs +205 -163
- package/build/index.mjs.map +4 -4
- package/build/types/evaluation/eval-result.d.ts +14 -0
- package/build/types/evaluation/eval-run.d.ts +14 -0
- package/build/types/scenario/assertions.d.ts +57 -0
- package/build/types/scenario/index.d.ts +1 -0
- package/build/types/scenario/test-scenario.d.ts +48 -0
- package/package.json +2 -2
package/build/index.js
CHANGED
|
@@ -27,9 +27,11 @@ __export(index_exports, {
|
|
|
27
27
|
ApiCallSchema: () => ApiCallSchema,
|
|
28
28
|
AssertionResultSchema: () => AssertionResultSchema,
|
|
29
29
|
AssertionResultStatus: () => AssertionResultStatus,
|
|
30
|
+
AssertionSchema: () => AssertionSchema,
|
|
30
31
|
BaseEntitySchema: () => BaseEntitySchema,
|
|
31
32
|
BaseTestSchema: () => BaseTestSchema,
|
|
32
33
|
BuildCheckTestSchema: () => BuildCheckTestSchema,
|
|
34
|
+
BuildPassedAssertionSchema: () => BuildPassedAssertionSchema,
|
|
33
35
|
CommandExecutionSchema: () => CommandExecutionSchema,
|
|
34
36
|
CommandExecutionTestSchema: () => CommandExecutionTestSchema,
|
|
35
37
|
CreateAgentInputSchema: () => CreateAgentInputSchema,
|
|
@@ -71,6 +73,7 @@ __export(index_exports, {
|
|
|
71
73
|
LeanEvaluationResultSchema: () => LeanEvaluationResultSchema,
|
|
72
74
|
LiveTraceEventSchema: () => LiveTraceEventSchema,
|
|
73
75
|
LiveTraceEventType: () => LiveTraceEventType,
|
|
76
|
+
LlmJudgeAssertionSchema: () => LlmJudgeAssertionSchema,
|
|
74
77
|
LocalProjectConfigSchema: () => LocalProjectConfigSchema,
|
|
75
78
|
MCPServerConfigSchema: () => MCPServerConfigSchema,
|
|
76
79
|
MetaSiteConfigSchema: () => MetaSiteConfigSchema,
|
|
@@ -86,6 +89,7 @@ __export(index_exports, {
|
|
|
86
89
|
SkillMetadataSchema: () => SkillMetadataSchema,
|
|
87
90
|
SkillSchema: () => SkillSchema,
|
|
88
91
|
SkillVersionSchema: () => SkillVersionSchema,
|
|
92
|
+
SkillWasCalledAssertionSchema: () => SkillWasCalledAssertionSchema,
|
|
89
93
|
SkillsGroupSchema: () => SkillsGroupSchema,
|
|
90
94
|
TRACE_EVENT_PREFIX: () => TRACE_EVENT_PREFIX,
|
|
91
95
|
TargetSchema: () => TargetSchema,
|
|
@@ -492,34 +496,67 @@ var TestSchema = import_zod17.z.discriminatedUnion("type", [
|
|
|
492
496
|
PlaywrightNLTestSchema
|
|
493
497
|
]);
|
|
494
498
|
|
|
495
|
-
// src/scenario/
|
|
499
|
+
// src/scenario/assertions.ts
|
|
496
500
|
var import_zod18 = require("zod");
|
|
497
|
-
var
|
|
501
|
+
var SkillWasCalledAssertionSchema = import_zod18.z.object({
|
|
502
|
+
type: import_zod18.z.literal("skill_was_called"),
|
|
503
|
+
/** Name of the skill that must have been called (matched against trace Skill tool args) */
|
|
504
|
+
skillName: import_zod18.z.string()
|
|
505
|
+
});
|
|
506
|
+
var BuildPassedAssertionSchema = import_zod18.z.object({
|
|
507
|
+
type: import_zod18.z.literal("build_passed"),
|
|
508
|
+
/** Command to run (default: "yarn build") */
|
|
509
|
+
command: import_zod18.z.string().optional(),
|
|
510
|
+
/** Expected exit code (default: 0) */
|
|
511
|
+
expectedExitCode: import_zod18.z.number().int().optional()
|
|
512
|
+
});
|
|
513
|
+
var LlmJudgeAssertionSchema = import_zod18.z.object({
|
|
514
|
+
type: import_zod18.z.literal("llm_judge"),
|
|
515
|
+
/** Prompt template; placeholders: {{output}}, {{cwd}}, {{changedFiles}}, {{trace}} */
|
|
516
|
+
prompt: import_zod18.z.string(),
|
|
517
|
+
/** Optional system prompt for the judge (default asks for JSON with score) */
|
|
518
|
+
systemPrompt: import_zod18.z.string().optional(),
|
|
519
|
+
/** Minimum score to pass (0–100, default 70) */
|
|
520
|
+
minScore: import_zod18.z.number().int().min(0).max(100).optional(),
|
|
521
|
+
/** Model for the judge (e.g. claude-3-5-haiku) */
|
|
522
|
+
model: import_zod18.z.string().optional(),
|
|
523
|
+
maxTokens: import_zod18.z.number().int().optional(),
|
|
524
|
+
temperature: import_zod18.z.number().min(0).max(1).optional()
|
|
525
|
+
});
|
|
526
|
+
var AssertionSchema = import_zod18.z.discriminatedUnion("type", [
|
|
527
|
+
SkillWasCalledAssertionSchema,
|
|
528
|
+
BuildPassedAssertionSchema,
|
|
529
|
+
LlmJudgeAssertionSchema
|
|
530
|
+
]);
|
|
531
|
+
|
|
532
|
+
// src/scenario/environment.ts
|
|
533
|
+
var import_zod19 = require("zod");
|
|
534
|
+
var LocalProjectConfigSchema = import_zod19.z.object({
|
|
498
535
|
/** Template ID to use for the local project */
|
|
499
|
-
templateId:
|
|
536
|
+
templateId: import_zod19.z.string().optional(),
|
|
500
537
|
/** Files to create in the project */
|
|
501
|
-
files:
|
|
502
|
-
|
|
503
|
-
path:
|
|
504
|
-
content:
|
|
538
|
+
files: import_zod19.z.array(
|
|
539
|
+
import_zod19.z.object({
|
|
540
|
+
path: import_zod19.z.string().min(1),
|
|
541
|
+
content: import_zod19.z.string().min(1)
|
|
505
542
|
})
|
|
506
543
|
).optional()
|
|
507
544
|
});
|
|
508
|
-
var MetaSiteConfigSchema =
|
|
509
|
-
configurations:
|
|
510
|
-
|
|
511
|
-
name:
|
|
512
|
-
apiCalls:
|
|
513
|
-
|
|
514
|
-
url:
|
|
515
|
-
method:
|
|
516
|
-
body:
|
|
545
|
+
var MetaSiteConfigSchema = import_zod19.z.object({
|
|
546
|
+
configurations: import_zod19.z.array(
|
|
547
|
+
import_zod19.z.object({
|
|
548
|
+
name: import_zod19.z.string().min(1),
|
|
549
|
+
apiCalls: import_zod19.z.array(
|
|
550
|
+
import_zod19.z.object({
|
|
551
|
+
url: import_zod19.z.string().url(),
|
|
552
|
+
method: import_zod19.z.enum(["POST", "PUT"]),
|
|
553
|
+
body: import_zod19.z.string()
|
|
517
554
|
})
|
|
518
555
|
)
|
|
519
556
|
})
|
|
520
557
|
).optional()
|
|
521
558
|
});
|
|
522
|
-
var EnvironmentSchema =
|
|
559
|
+
var EnvironmentSchema = import_zod19.z.object({
|
|
523
560
|
/** Local project configuration */
|
|
524
561
|
localProject: LocalProjectConfigSchema.optional(),
|
|
525
562
|
/** Meta site configuration */
|
|
@@ -527,18 +564,20 @@ var EnvironmentSchema = import_zod18.z.object({
|
|
|
527
564
|
});
|
|
528
565
|
|
|
529
566
|
// src/scenario/test-scenario.ts
|
|
530
|
-
var
|
|
531
|
-
var ExpectedFileSchema =
|
|
567
|
+
var import_zod20 = require("zod");
|
|
568
|
+
var ExpectedFileSchema = import_zod20.z.object({
|
|
532
569
|
/** Relative path where the file should be created */
|
|
533
|
-
path:
|
|
570
|
+
path: import_zod20.z.string(),
|
|
534
571
|
/** Optional expected content */
|
|
535
|
-
content:
|
|
572
|
+
content: import_zod20.z.string().optional()
|
|
536
573
|
});
|
|
537
574
|
var TestScenarioSchema = TenantEntitySchema.extend({
|
|
538
575
|
/** The prompt sent to the agent to trigger the task */
|
|
539
|
-
triggerPrompt:
|
|
576
|
+
triggerPrompt: import_zod20.z.string().min(10),
|
|
540
577
|
/** ID of the template to use for this scenario */
|
|
541
|
-
templateId:
|
|
578
|
+
templateId: import_zod20.z.string().optional(),
|
|
579
|
+
/** Assertions to evaluate for this scenario */
|
|
580
|
+
assertions: import_zod20.z.array(AssertionSchema).optional()
|
|
542
581
|
});
|
|
543
582
|
var CreateTestScenarioInputSchema = TestScenarioSchema.omit({
|
|
544
583
|
id: true,
|
|
@@ -549,10 +588,10 @@ var CreateTestScenarioInputSchema = TestScenarioSchema.omit({
|
|
|
549
588
|
var UpdateTestScenarioInputSchema = CreateTestScenarioInputSchema.partial();
|
|
550
589
|
|
|
551
590
|
// src/suite/test-suite.ts
|
|
552
|
-
var
|
|
591
|
+
var import_zod21 = require("zod");
|
|
553
592
|
var TestSuiteSchema = TenantEntitySchema.extend({
|
|
554
593
|
/** IDs of test scenarios in this suite */
|
|
555
|
-
scenarioIds:
|
|
594
|
+
scenarioIds: import_zod21.z.array(import_zod21.z.string())
|
|
556
595
|
});
|
|
557
596
|
var CreateTestSuiteInputSchema = TestSuiteSchema.omit({
|
|
558
597
|
id: true,
|
|
@@ -563,21 +602,21 @@ var CreateTestSuiteInputSchema = TestSuiteSchema.omit({
|
|
|
563
602
|
var UpdateTestSuiteInputSchema = CreateTestSuiteInputSchema.partial();
|
|
564
603
|
|
|
565
604
|
// src/evaluation/metrics.ts
|
|
566
|
-
var
|
|
567
|
-
var TokenUsageSchema =
|
|
568
|
-
prompt:
|
|
569
|
-
completion:
|
|
570
|
-
total:
|
|
571
|
-
});
|
|
572
|
-
var EvalMetricsSchema =
|
|
573
|
-
totalAssertions:
|
|
574
|
-
passed:
|
|
575
|
-
failed:
|
|
576
|
-
skipped:
|
|
577
|
-
errors:
|
|
578
|
-
passRate:
|
|
579
|
-
avgDuration:
|
|
580
|
-
totalDuration:
|
|
605
|
+
var import_zod22 = require("zod");
|
|
606
|
+
var TokenUsageSchema = import_zod22.z.object({
|
|
607
|
+
prompt: import_zod22.z.number(),
|
|
608
|
+
completion: import_zod22.z.number(),
|
|
609
|
+
total: import_zod22.z.number()
|
|
610
|
+
});
|
|
611
|
+
var EvalMetricsSchema = import_zod22.z.object({
|
|
612
|
+
totalAssertions: import_zod22.z.number(),
|
|
613
|
+
passed: import_zod22.z.number(),
|
|
614
|
+
failed: import_zod22.z.number(),
|
|
615
|
+
skipped: import_zod22.z.number(),
|
|
616
|
+
errors: import_zod22.z.number(),
|
|
617
|
+
passRate: import_zod22.z.number(),
|
|
618
|
+
avgDuration: import_zod22.z.number(),
|
|
619
|
+
totalDuration: import_zod22.z.number()
|
|
581
620
|
});
|
|
582
621
|
var EvalStatus = /* @__PURE__ */ ((EvalStatus2) => {
|
|
583
622
|
EvalStatus2["PENDING"] = "pending";
|
|
@@ -587,7 +626,7 @@ var EvalStatus = /* @__PURE__ */ ((EvalStatus2) => {
|
|
|
587
626
|
EvalStatus2["CANCELLED"] = "cancelled";
|
|
588
627
|
return EvalStatus2;
|
|
589
628
|
})(EvalStatus || {});
|
|
590
|
-
var EvalStatusSchema =
|
|
629
|
+
var EvalStatusSchema = import_zod22.z.enum(EvalStatus);
|
|
591
630
|
var LLMStepType = /* @__PURE__ */ ((LLMStepType2) => {
|
|
592
631
|
LLMStepType2["COMPLETION"] = "completion";
|
|
593
632
|
LLMStepType2["TOOL_USE"] = "tool_use";
|
|
@@ -595,129 +634,46 @@ var LLMStepType = /* @__PURE__ */ ((LLMStepType2) => {
|
|
|
595
634
|
LLMStepType2["THINKING"] = "thinking";
|
|
596
635
|
return LLMStepType2;
|
|
597
636
|
})(LLMStepType || {});
|
|
598
|
-
var LLMTraceStepSchema =
|
|
599
|
-
id:
|
|
600
|
-
stepNumber:
|
|
601
|
-
type:
|
|
602
|
-
model:
|
|
603
|
-
provider:
|
|
604
|
-
startedAt:
|
|
605
|
-
durationMs:
|
|
637
|
+
var LLMTraceStepSchema = import_zod22.z.object({
|
|
638
|
+
id: import_zod22.z.string(),
|
|
639
|
+
stepNumber: import_zod22.z.number(),
|
|
640
|
+
type: import_zod22.z.enum(LLMStepType),
|
|
641
|
+
model: import_zod22.z.string(),
|
|
642
|
+
provider: import_zod22.z.string(),
|
|
643
|
+
startedAt: import_zod22.z.string(),
|
|
644
|
+
durationMs: import_zod22.z.number(),
|
|
606
645
|
tokenUsage: TokenUsageSchema,
|
|
607
|
-
costUsd:
|
|
608
|
-
toolName:
|
|
609
|
-
toolArguments:
|
|
610
|
-
inputPreview:
|
|
611
|
-
outputPreview:
|
|
612
|
-
success:
|
|
613
|
-
error:
|
|
614
|
-
});
|
|
615
|
-
var LLMBreakdownStatsSchema =
|
|
616
|
-
count:
|
|
617
|
-
durationMs:
|
|
618
|
-
tokens:
|
|
619
|
-
costUsd:
|
|
620
|
-
});
|
|
621
|
-
var LLMTraceSummarySchema =
|
|
622
|
-
totalSteps:
|
|
623
|
-
totalDurationMs:
|
|
646
|
+
costUsd: import_zod22.z.number(),
|
|
647
|
+
toolName: import_zod22.z.string().optional(),
|
|
648
|
+
toolArguments: import_zod22.z.string().optional(),
|
|
649
|
+
inputPreview: import_zod22.z.string().optional(),
|
|
650
|
+
outputPreview: import_zod22.z.string().optional(),
|
|
651
|
+
success: import_zod22.z.boolean(),
|
|
652
|
+
error: import_zod22.z.string().optional()
|
|
653
|
+
});
|
|
654
|
+
var LLMBreakdownStatsSchema = import_zod22.z.object({
|
|
655
|
+
count: import_zod22.z.number(),
|
|
656
|
+
durationMs: import_zod22.z.number(),
|
|
657
|
+
tokens: import_zod22.z.number(),
|
|
658
|
+
costUsd: import_zod22.z.number()
|
|
659
|
+
});
|
|
660
|
+
var LLMTraceSummarySchema = import_zod22.z.object({
|
|
661
|
+
totalSteps: import_zod22.z.number(),
|
|
662
|
+
totalDurationMs: import_zod22.z.number(),
|
|
624
663
|
totalTokens: TokenUsageSchema,
|
|
625
|
-
totalCostUsd:
|
|
626
|
-
stepTypeBreakdown:
|
|
627
|
-
modelBreakdown:
|
|
628
|
-
modelsUsed:
|
|
629
|
-
});
|
|
630
|
-
var LLMTraceSchema =
|
|
631
|
-
id:
|
|
632
|
-
steps:
|
|
664
|
+
totalCostUsd: import_zod22.z.number(),
|
|
665
|
+
stepTypeBreakdown: import_zod22.z.record(import_zod22.z.string(), LLMBreakdownStatsSchema).optional(),
|
|
666
|
+
modelBreakdown: import_zod22.z.record(import_zod22.z.string(), LLMBreakdownStatsSchema),
|
|
667
|
+
modelsUsed: import_zod22.z.array(import_zod22.z.string())
|
|
668
|
+
});
|
|
669
|
+
var LLMTraceSchema = import_zod22.z.object({
|
|
670
|
+
id: import_zod22.z.string(),
|
|
671
|
+
steps: import_zod22.z.array(LLMTraceStepSchema),
|
|
633
672
|
summary: LLMTraceSummarySchema
|
|
634
673
|
});
|
|
635
674
|
|
|
636
675
|
// src/evaluation/eval-result.ts
|
|
637
|
-
var
|
|
638
|
-
var AssertionResultStatus = /* @__PURE__ */ ((AssertionResultStatus2) => {
|
|
639
|
-
AssertionResultStatus2["PASSED"] = "passed";
|
|
640
|
-
AssertionResultStatus2["FAILED"] = "failed";
|
|
641
|
-
AssertionResultStatus2["SKIPPED"] = "skipped";
|
|
642
|
-
AssertionResultStatus2["ERROR"] = "error";
|
|
643
|
-
return AssertionResultStatus2;
|
|
644
|
-
})(AssertionResultStatus || {});
|
|
645
|
-
var AssertionResultSchema = import_zod22.z.object({
|
|
646
|
-
id: import_zod22.z.string(),
|
|
647
|
-
assertionId: import_zod22.z.string(),
|
|
648
|
-
assertionType: import_zod22.z.string(),
|
|
649
|
-
assertionName: import_zod22.z.string(),
|
|
650
|
-
status: import_zod22.z.enum(AssertionResultStatus),
|
|
651
|
-
message: import_zod22.z.string().optional(),
|
|
652
|
-
expected: import_zod22.z.string().optional(),
|
|
653
|
-
actual: import_zod22.z.string().optional(),
|
|
654
|
-
duration: import_zod22.z.number().optional(),
|
|
655
|
-
details: import_zod22.z.record(import_zod22.z.string(), import_zod22.z.unknown()).optional(),
|
|
656
|
-
llmTraceSteps: import_zod22.z.array(LLMTraceStepSchema).optional()
|
|
657
|
-
});
|
|
658
|
-
var EvalRunResultSchema = import_zod22.z.object({
|
|
659
|
-
id: import_zod22.z.string(),
|
|
660
|
-
targetId: import_zod22.z.string(),
|
|
661
|
-
targetName: import_zod22.z.string().optional(),
|
|
662
|
-
scenarioId: import_zod22.z.string(),
|
|
663
|
-
scenarioName: import_zod22.z.string(),
|
|
664
|
-
modelConfig: ModelConfigSchema.optional(),
|
|
665
|
-
assertionResults: import_zod22.z.array(AssertionResultSchema),
|
|
666
|
-
metrics: EvalMetricsSchema.optional(),
|
|
667
|
-
passed: import_zod22.z.number(),
|
|
668
|
-
failed: import_zod22.z.number(),
|
|
669
|
-
passRate: import_zod22.z.number(),
|
|
670
|
-
duration: import_zod22.z.number(),
|
|
671
|
-
outputText: import_zod22.z.string().optional(),
|
|
672
|
-
files: import_zod22.z.array(ExpectedFileSchema).optional(),
|
|
673
|
-
startedAt: import_zod22.z.string().optional(),
|
|
674
|
-
completedAt: import_zod22.z.string().optional(),
|
|
675
|
-
llmTrace: LLMTraceSchema.optional()
|
|
676
|
-
});
|
|
677
|
-
var PromptResultSchema = import_zod22.z.object({
|
|
678
|
-
text: import_zod22.z.string(),
|
|
679
|
-
files: import_zod22.z.array(import_zod22.z.unknown()).optional(),
|
|
680
|
-
finishReason: import_zod22.z.string().optional(),
|
|
681
|
-
reasoning: import_zod22.z.string().optional(),
|
|
682
|
-
reasoningDetails: import_zod22.z.unknown().optional(),
|
|
683
|
-
toolCalls: import_zod22.z.array(import_zod22.z.unknown()).optional(),
|
|
684
|
-
toolResults: import_zod22.z.array(import_zod22.z.unknown()).optional(),
|
|
685
|
-
warnings: import_zod22.z.array(import_zod22.z.unknown()).optional(),
|
|
686
|
-
sources: import_zod22.z.array(import_zod22.z.unknown()).optional(),
|
|
687
|
-
steps: import_zod22.z.array(import_zod22.z.unknown()),
|
|
688
|
-
generationTimeMs: import_zod22.z.number(),
|
|
689
|
-
prompt: import_zod22.z.string(),
|
|
690
|
-
systemPrompt: import_zod22.z.string(),
|
|
691
|
-
usage: import_zod22.z.object({
|
|
692
|
-
totalTokens: import_zod22.z.number().optional(),
|
|
693
|
-
totalMicrocentsSpent: import_zod22.z.number().optional()
|
|
694
|
-
})
|
|
695
|
-
});
|
|
696
|
-
var EvaluationResultSchema = import_zod22.z.object({
|
|
697
|
-
id: import_zod22.z.string(),
|
|
698
|
-
runId: import_zod22.z.string(),
|
|
699
|
-
timestamp: import_zod22.z.number(),
|
|
700
|
-
promptResult: PromptResultSchema,
|
|
701
|
-
testResults: import_zod22.z.array(import_zod22.z.unknown()),
|
|
702
|
-
tags: import_zod22.z.array(import_zod22.z.string()).optional(),
|
|
703
|
-
feedback: import_zod22.z.string().optional(),
|
|
704
|
-
score: import_zod22.z.number(),
|
|
705
|
-
suiteId: import_zod22.z.string().optional()
|
|
706
|
-
});
|
|
707
|
-
var LeanEvaluationResultSchema = import_zod22.z.object({
|
|
708
|
-
id: import_zod22.z.string(),
|
|
709
|
-
runId: import_zod22.z.string(),
|
|
710
|
-
timestamp: import_zod22.z.number(),
|
|
711
|
-
tags: import_zod22.z.array(import_zod22.z.string()).optional(),
|
|
712
|
-
scenarioId: import_zod22.z.string(),
|
|
713
|
-
scenarioVersion: import_zod22.z.number().optional(),
|
|
714
|
-
targetId: import_zod22.z.string(),
|
|
715
|
-
targetVersion: import_zod22.z.number().optional(),
|
|
716
|
-
suiteId: import_zod22.z.string().optional(),
|
|
717
|
-
score: import_zod22.z.number(),
|
|
718
|
-
time: import_zod22.z.number().optional(),
|
|
719
|
-
microcentsSpent: import_zod22.z.number().optional()
|
|
720
|
-
});
|
|
676
|
+
var import_zod25 = require("zod");
|
|
721
677
|
|
|
722
678
|
// src/evaluation/eval-run.ts
|
|
723
679
|
var import_zod24 = require("zod");
|
|
@@ -930,11 +886,97 @@ var EvaluationLogSchema = import_zod24.z.object({
|
|
|
930
886
|
});
|
|
931
887
|
var LLM_TIMEOUT = 12e4;
|
|
932
888
|
|
|
889
|
+
// src/evaluation/eval-result.ts
|
|
890
|
+
var AssertionResultStatus = /* @__PURE__ */ ((AssertionResultStatus2) => {
|
|
891
|
+
AssertionResultStatus2["PASSED"] = "passed";
|
|
892
|
+
AssertionResultStatus2["FAILED"] = "failed";
|
|
893
|
+
AssertionResultStatus2["SKIPPED"] = "skipped";
|
|
894
|
+
AssertionResultStatus2["ERROR"] = "error";
|
|
895
|
+
return AssertionResultStatus2;
|
|
896
|
+
})(AssertionResultStatus || {});
|
|
897
|
+
var AssertionResultSchema = import_zod25.z.object({
|
|
898
|
+
id: import_zod25.z.string(),
|
|
899
|
+
assertionId: import_zod25.z.string(),
|
|
900
|
+
assertionType: import_zod25.z.string(),
|
|
901
|
+
assertionName: import_zod25.z.string(),
|
|
902
|
+
status: import_zod25.z.enum(AssertionResultStatus),
|
|
903
|
+
message: import_zod25.z.string().optional(),
|
|
904
|
+
expected: import_zod25.z.string().optional(),
|
|
905
|
+
actual: import_zod25.z.string().optional(),
|
|
906
|
+
duration: import_zod25.z.number().optional(),
|
|
907
|
+
details: import_zod25.z.record(import_zod25.z.string(), import_zod25.z.unknown()).optional(),
|
|
908
|
+
llmTraceSteps: import_zod25.z.array(LLMTraceStepSchema).optional()
|
|
909
|
+
});
|
|
910
|
+
var EvalRunResultSchema = import_zod25.z.object({
|
|
911
|
+
id: import_zod25.z.string(),
|
|
912
|
+
targetId: import_zod25.z.string(),
|
|
913
|
+
targetName: import_zod25.z.string().optional(),
|
|
914
|
+
scenarioId: import_zod25.z.string(),
|
|
915
|
+
scenarioName: import_zod25.z.string(),
|
|
916
|
+
modelConfig: ModelConfigSchema.optional(),
|
|
917
|
+
assertionResults: import_zod25.z.array(AssertionResultSchema),
|
|
918
|
+
metrics: EvalMetricsSchema.optional(),
|
|
919
|
+
passed: import_zod25.z.number(),
|
|
920
|
+
failed: import_zod25.z.number(),
|
|
921
|
+
passRate: import_zod25.z.number(),
|
|
922
|
+
duration: import_zod25.z.number(),
|
|
923
|
+
outputText: import_zod25.z.string().optional(),
|
|
924
|
+
files: import_zod25.z.array(ExpectedFileSchema).optional(),
|
|
925
|
+
fileDiffs: import_zod25.z.array(DiffContentSchema).optional(),
|
|
926
|
+
startedAt: import_zod25.z.string().optional(),
|
|
927
|
+
completedAt: import_zod25.z.string().optional(),
|
|
928
|
+
llmTrace: LLMTraceSchema.optional()
|
|
929
|
+
});
|
|
930
|
+
var PromptResultSchema = import_zod25.z.object({
|
|
931
|
+
text: import_zod25.z.string(),
|
|
932
|
+
files: import_zod25.z.array(import_zod25.z.unknown()).optional(),
|
|
933
|
+
finishReason: import_zod25.z.string().optional(),
|
|
934
|
+
reasoning: import_zod25.z.string().optional(),
|
|
935
|
+
reasoningDetails: import_zod25.z.unknown().optional(),
|
|
936
|
+
toolCalls: import_zod25.z.array(import_zod25.z.unknown()).optional(),
|
|
937
|
+
toolResults: import_zod25.z.array(import_zod25.z.unknown()).optional(),
|
|
938
|
+
warnings: import_zod25.z.array(import_zod25.z.unknown()).optional(),
|
|
939
|
+
sources: import_zod25.z.array(import_zod25.z.unknown()).optional(),
|
|
940
|
+
steps: import_zod25.z.array(import_zod25.z.unknown()),
|
|
941
|
+
generationTimeMs: import_zod25.z.number(),
|
|
942
|
+
prompt: import_zod25.z.string(),
|
|
943
|
+
systemPrompt: import_zod25.z.string(),
|
|
944
|
+
usage: import_zod25.z.object({
|
|
945
|
+
totalTokens: import_zod25.z.number().optional(),
|
|
946
|
+
totalMicrocentsSpent: import_zod25.z.number().optional()
|
|
947
|
+
})
|
|
948
|
+
});
|
|
949
|
+
var EvaluationResultSchema = import_zod25.z.object({
|
|
950
|
+
id: import_zod25.z.string(),
|
|
951
|
+
runId: import_zod25.z.string(),
|
|
952
|
+
timestamp: import_zod25.z.number(),
|
|
953
|
+
promptResult: PromptResultSchema,
|
|
954
|
+
testResults: import_zod25.z.array(import_zod25.z.unknown()),
|
|
955
|
+
tags: import_zod25.z.array(import_zod25.z.string()).optional(),
|
|
956
|
+
feedback: import_zod25.z.string().optional(),
|
|
957
|
+
score: import_zod25.z.number(),
|
|
958
|
+
suiteId: import_zod25.z.string().optional()
|
|
959
|
+
});
|
|
960
|
+
var LeanEvaluationResultSchema = import_zod25.z.object({
|
|
961
|
+
id: import_zod25.z.string(),
|
|
962
|
+
runId: import_zod25.z.string(),
|
|
963
|
+
timestamp: import_zod25.z.number(),
|
|
964
|
+
tags: import_zod25.z.array(import_zod25.z.string()).optional(),
|
|
965
|
+
scenarioId: import_zod25.z.string(),
|
|
966
|
+
scenarioVersion: import_zod25.z.number().optional(),
|
|
967
|
+
targetId: import_zod25.z.string(),
|
|
968
|
+
targetVersion: import_zod25.z.number().optional(),
|
|
969
|
+
suiteId: import_zod25.z.string().optional(),
|
|
970
|
+
score: import_zod25.z.number(),
|
|
971
|
+
time: import_zod25.z.number().optional(),
|
|
972
|
+
microcentsSpent: import_zod25.z.number().optional()
|
|
973
|
+
});
|
|
974
|
+
|
|
933
975
|
// src/project/project.ts
|
|
934
|
-
var
|
|
976
|
+
var import_zod26 = require("zod");
|
|
935
977
|
var ProjectSchema = BaseEntitySchema.extend({
|
|
936
|
-
appId:
|
|
937
|
-
appSecret:
|
|
978
|
+
appId: import_zod26.z.string().optional().describe("The ID of the app in Dev Center"),
|
|
979
|
+
appSecret: import_zod26.z.string().optional().describe("The secret of the app in Dev Center")
|
|
938
980
|
});
|
|
939
981
|
var CreateProjectInputSchema = ProjectSchema.omit({
|
|
940
982
|
id: true,
|
|
@@ -945,10 +987,10 @@ var CreateProjectInputSchema = ProjectSchema.omit({
|
|
|
945
987
|
var UpdateProjectInputSchema = CreateProjectInputSchema.partial();
|
|
946
988
|
|
|
947
989
|
// src/template/template.ts
|
|
948
|
-
var
|
|
990
|
+
var import_zod27 = require("zod");
|
|
949
991
|
var TemplateSchema = TenantEntitySchema.extend({
|
|
950
992
|
/** URL to download the template from */
|
|
951
|
-
downloadUrl:
|
|
993
|
+
downloadUrl: import_zod27.z.url()
|
|
952
994
|
});
|
|
953
995
|
var CreateTemplateInputSchema = TemplateSchema.omit({
|
|
954
996
|
id: true,
|
|
@@ -966,9 +1008,11 @@ var UpdateTemplateInputSchema = CreateTemplateInputSchema.partial();
|
|
|
966
1008
|
ApiCallSchema,
|
|
967
1009
|
AssertionResultSchema,
|
|
968
1010
|
AssertionResultStatus,
|
|
1011
|
+
AssertionSchema,
|
|
969
1012
|
BaseEntitySchema,
|
|
970
1013
|
BaseTestSchema,
|
|
971
1014
|
BuildCheckTestSchema,
|
|
1015
|
+
BuildPassedAssertionSchema,
|
|
972
1016
|
CommandExecutionSchema,
|
|
973
1017
|
CommandExecutionTestSchema,
|
|
974
1018
|
CreateAgentInputSchema,
|
|
@@ -1010,6 +1054,7 @@ var UpdateTemplateInputSchema = CreateTemplateInputSchema.partial();
|
|
|
1010
1054
|
LeanEvaluationResultSchema,
|
|
1011
1055
|
LiveTraceEventSchema,
|
|
1012
1056
|
LiveTraceEventType,
|
|
1057
|
+
LlmJudgeAssertionSchema,
|
|
1013
1058
|
LocalProjectConfigSchema,
|
|
1014
1059
|
MCPServerConfigSchema,
|
|
1015
1060
|
MetaSiteConfigSchema,
|
|
@@ -1025,6 +1070,7 @@ var UpdateTemplateInputSchema = CreateTemplateInputSchema.partial();
|
|
|
1025
1070
|
SkillMetadataSchema,
|
|
1026
1071
|
SkillSchema,
|
|
1027
1072
|
SkillVersionSchema,
|
|
1073
|
+
SkillWasCalledAssertionSchema,
|
|
1028
1074
|
SkillsGroupSchema,
|
|
1029
1075
|
TRACE_EVENT_PREFIX,
|
|
1030
1076
|
TargetSchema,
|