@wix/evalforge-types 0.16.0 → 0.18.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/build/index.js +439 -317
- package/build/index.js.map +4 -4
- package/build/index.mjs +431 -317
- package/build/index.mjs.map +4 -4
- package/build/types/assertion/assertion.d.ts +134 -37
- package/build/types/assertion/index.d.ts +2 -1
- package/build/types/assertion/system-assertions.d.ts +42 -0
- package/build/types/scenario/test-scenario.d.ts +12 -0
- package/package.json +2 -2
package/build/index.mjs
CHANGED
|
@@ -454,22 +454,145 @@ var EnvironmentSchema = z19.object({
|
|
|
454
454
|
});
|
|
455
455
|
|
|
456
456
|
// src/scenario/test-scenario.ts
|
|
457
|
+
import { z as z21 } from "zod";
|
|
458
|
+
|
|
459
|
+
// src/assertion/assertion.ts
|
|
457
460
|
import { z as z20 } from "zod";
|
|
458
|
-
var
|
|
461
|
+
var AssertionTypeSchema = z20.enum([
|
|
462
|
+
"skill_was_called",
|
|
463
|
+
"build_passed",
|
|
464
|
+
"llm_judge"
|
|
465
|
+
]);
|
|
466
|
+
var AssertionParameterTypeSchema = z20.enum([
|
|
467
|
+
"string",
|
|
468
|
+
"number",
|
|
469
|
+
"boolean"
|
|
470
|
+
]);
|
|
471
|
+
var AssertionParameterSchema = z20.object({
|
|
472
|
+
/** Parameter name (used as key in params object) */
|
|
473
|
+
name: z20.string().min(1),
|
|
474
|
+
/** Display label for the parameter */
|
|
475
|
+
label: z20.string().min(1),
|
|
476
|
+
/** Parameter type */
|
|
477
|
+
type: AssertionParameterTypeSchema,
|
|
478
|
+
/** Whether this parameter is required */
|
|
479
|
+
required: z20.boolean(),
|
|
480
|
+
/** Default value (optional, used when not provided) */
|
|
481
|
+
defaultValue: z20.union([z20.string(), z20.number(), z20.boolean()]).optional()
|
|
482
|
+
});
|
|
483
|
+
var ScenarioAssertionLinkSchema = z20.object({
|
|
484
|
+
/** ID of the assertion (can be system assertion like 'system:skill_was_called' or custom assertion UUID) */
|
|
485
|
+
assertionId: z20.string(),
|
|
486
|
+
/** Parameter values for this assertion in this scenario */
|
|
487
|
+
params: z20.record(
|
|
488
|
+
z20.string(),
|
|
489
|
+
z20.union([z20.string(), z20.number(), z20.boolean(), z20.null()])
|
|
490
|
+
).optional()
|
|
491
|
+
});
|
|
492
|
+
var SkillWasCalledConfigSchema = z20.object({
|
|
493
|
+
/** Name of the skill that must have been called */
|
|
494
|
+
skillName: z20.string().min(1)
|
|
495
|
+
});
|
|
496
|
+
var BuildPassedConfigSchema = z20.strictObject({
|
|
497
|
+
/** Command to run (default: "yarn build") */
|
|
498
|
+
command: z20.string().optional(),
|
|
499
|
+
/** Expected exit code (default: 0) */
|
|
500
|
+
expectedExitCode: z20.number().int().optional()
|
|
501
|
+
});
|
|
502
|
+
var LlmJudgeConfigSchema = z20.object({
|
|
503
|
+
/**
|
|
504
|
+
* Prompt template with placeholders:
|
|
505
|
+
* - {{output}}: agent's final output
|
|
506
|
+
* - {{cwd}}: working directory
|
|
507
|
+
* - {{changedFiles}}: all files changed (new, modified)
|
|
508
|
+
* - {{modifiedFiles}}: only existing files that were modified
|
|
509
|
+
* - {{newFiles}}: only new files that were created
|
|
510
|
+
* - {{trace}}: step-by-step trace of tool calls
|
|
511
|
+
* - Custom parameters defined in the parameters array
|
|
512
|
+
*/
|
|
513
|
+
prompt: z20.string().min(1),
|
|
514
|
+
/** Optional system prompt for the judge */
|
|
515
|
+
systemPrompt: z20.string().optional(),
|
|
516
|
+
/** Minimum score to pass (0-100, default 70) */
|
|
517
|
+
minScore: z20.number().int().min(0).max(100).optional(),
|
|
518
|
+
/** Model for the judge (e.g. claude-3-5-haiku-20241022) */
|
|
519
|
+
model: z20.string().optional(),
|
|
520
|
+
/** Max output tokens */
|
|
521
|
+
maxTokens: z20.number().int().optional(),
|
|
522
|
+
/** Temperature (0-1) */
|
|
523
|
+
temperature: z20.number().min(0).max(1).optional(),
|
|
524
|
+
/** User-defined parameters for this assertion */
|
|
525
|
+
parameters: z20.array(AssertionParameterSchema).optional()
|
|
526
|
+
});
|
|
527
|
+
var AssertionConfigSchema = z20.union([
|
|
528
|
+
LlmJudgeConfigSchema,
|
|
529
|
+
// requires prompt - check first
|
|
530
|
+
SkillWasCalledConfigSchema,
|
|
531
|
+
// requires skillName
|
|
532
|
+
BuildPassedConfigSchema,
|
|
533
|
+
// all optional, uses strictObject to reject unknown keys
|
|
534
|
+
z20.object({})
|
|
535
|
+
// fallback empty config
|
|
536
|
+
]);
|
|
537
|
+
var CustomAssertionSchema = TenantEntitySchema.extend({
|
|
538
|
+
/** The assertion type */
|
|
539
|
+
type: AssertionTypeSchema,
|
|
540
|
+
/** Type-specific configuration */
|
|
541
|
+
config: AssertionConfigSchema
|
|
542
|
+
});
|
|
543
|
+
var CreateCustomAssertionInputSchema = CustomAssertionSchema.omit({
|
|
544
|
+
id: true,
|
|
545
|
+
createdAt: true,
|
|
546
|
+
updatedAt: true,
|
|
547
|
+
deleted: true
|
|
548
|
+
});
|
|
549
|
+
var UpdateCustomAssertionInputSchema = CreateCustomAssertionInputSchema.partial();
|
|
550
|
+
function validateAssertionConfig(type, config) {
|
|
551
|
+
switch (type) {
|
|
552
|
+
case "skill_was_called":
|
|
553
|
+
return SkillWasCalledConfigSchema.safeParse(config).success;
|
|
554
|
+
case "build_passed":
|
|
555
|
+
return BuildPassedConfigSchema.safeParse(config).success;
|
|
556
|
+
case "llm_judge":
|
|
557
|
+
return LlmJudgeConfigSchema.safeParse(config).success;
|
|
558
|
+
default:
|
|
559
|
+
return false;
|
|
560
|
+
}
|
|
561
|
+
}
|
|
562
|
+
function getSkillWasCalledConfig(assertion) {
|
|
563
|
+
if (assertion.type !== "skill_was_called") return null;
|
|
564
|
+
const result = SkillWasCalledConfigSchema.safeParse(assertion.config);
|
|
565
|
+
return result.success ? result.data : null;
|
|
566
|
+
}
|
|
567
|
+
function getBuildPassedConfig(assertion) {
|
|
568
|
+
if (assertion.type !== "build_passed") return null;
|
|
569
|
+
const result = BuildPassedConfigSchema.safeParse(assertion.config);
|
|
570
|
+
return result.success ? result.data : null;
|
|
571
|
+
}
|
|
572
|
+
function getLlmJudgeConfig(assertion) {
|
|
573
|
+
if (assertion.type !== "llm_judge") return null;
|
|
574
|
+
const result = LlmJudgeConfigSchema.safeParse(assertion.config);
|
|
575
|
+
return result.success ? result.data : null;
|
|
576
|
+
}
|
|
577
|
+
|
|
578
|
+
// src/scenario/test-scenario.ts
|
|
579
|
+
var ExpectedFileSchema = z21.object({
|
|
459
580
|
/** Relative path where the file should be created */
|
|
460
|
-
path:
|
|
581
|
+
path: z21.string(),
|
|
461
582
|
/** Optional expected content */
|
|
462
|
-
content:
|
|
583
|
+
content: z21.string().optional()
|
|
463
584
|
});
|
|
464
585
|
var TestScenarioSchema = TenantEntitySchema.extend({
|
|
465
586
|
/** The prompt sent to the agent to trigger the task */
|
|
466
|
-
triggerPrompt:
|
|
587
|
+
triggerPrompt: z21.string().min(10),
|
|
467
588
|
/** ID of the template to use for this scenario (null = no template) */
|
|
468
|
-
templateId:
|
|
589
|
+
templateId: z21.string().nullish(),
|
|
469
590
|
/** Inline assertions to evaluate for this scenario (legacy) */
|
|
470
|
-
assertions:
|
|
471
|
-
/** IDs of saved assertions to evaluate (from assertions table) */
|
|
472
|
-
assertionIds:
|
|
591
|
+
assertions: z21.array(AssertionSchema).optional(),
|
|
592
|
+
/** IDs of saved assertions to evaluate (from assertions table) - legacy, use assertionLinks */
|
|
593
|
+
assertionIds: z21.array(z21.string()).optional(),
|
|
594
|
+
/** Linked assertions with per-scenario parameter values */
|
|
595
|
+
assertionLinks: z21.array(ScenarioAssertionLinkSchema).optional()
|
|
473
596
|
});
|
|
474
597
|
var CreateTestScenarioInputSchema = TestScenarioSchema.omit({
|
|
475
598
|
id: true,
|
|
@@ -480,10 +603,10 @@ var CreateTestScenarioInputSchema = TestScenarioSchema.omit({
|
|
|
480
603
|
var UpdateTestScenarioInputSchema = CreateTestScenarioInputSchema.partial();
|
|
481
604
|
|
|
482
605
|
// src/suite/test-suite.ts
|
|
483
|
-
import { z as
|
|
606
|
+
import { z as z22 } from "zod";
|
|
484
607
|
var TestSuiteSchema = TenantEntitySchema.extend({
|
|
485
608
|
/** IDs of test scenarios in this suite */
|
|
486
|
-
scenarioIds:
|
|
609
|
+
scenarioIds: z22.array(z22.string())
|
|
487
610
|
});
|
|
488
611
|
var CreateTestSuiteInputSchema = TestSuiteSchema.omit({
|
|
489
612
|
id: true,
|
|
@@ -494,21 +617,21 @@ var CreateTestSuiteInputSchema = TestSuiteSchema.omit({
|
|
|
494
617
|
var UpdateTestSuiteInputSchema = CreateTestSuiteInputSchema.partial();
|
|
495
618
|
|
|
496
619
|
// src/evaluation/metrics.ts
|
|
497
|
-
import { z as
|
|
498
|
-
var TokenUsageSchema =
|
|
499
|
-
prompt:
|
|
500
|
-
completion:
|
|
501
|
-
total:
|
|
502
|
-
});
|
|
503
|
-
var EvalMetricsSchema =
|
|
504
|
-
totalAssertions:
|
|
505
|
-
passed:
|
|
506
|
-
failed:
|
|
507
|
-
skipped:
|
|
508
|
-
errors:
|
|
509
|
-
passRate:
|
|
510
|
-
avgDuration:
|
|
511
|
-
totalDuration:
|
|
620
|
+
import { z as z23 } from "zod";
|
|
621
|
+
var TokenUsageSchema = z23.object({
|
|
622
|
+
prompt: z23.number(),
|
|
623
|
+
completion: z23.number(),
|
|
624
|
+
total: z23.number()
|
|
625
|
+
});
|
|
626
|
+
var EvalMetricsSchema = z23.object({
|
|
627
|
+
totalAssertions: z23.number(),
|
|
628
|
+
passed: z23.number(),
|
|
629
|
+
failed: z23.number(),
|
|
630
|
+
skipped: z23.number(),
|
|
631
|
+
errors: z23.number(),
|
|
632
|
+
passRate: z23.number(),
|
|
633
|
+
avgDuration: z23.number(),
|
|
634
|
+
totalDuration: z23.number()
|
|
512
635
|
});
|
|
513
636
|
var EvalStatus = /* @__PURE__ */ ((EvalStatus2) => {
|
|
514
637
|
EvalStatus2["PENDING"] = "pending";
|
|
@@ -518,7 +641,7 @@ var EvalStatus = /* @__PURE__ */ ((EvalStatus2) => {
|
|
|
518
641
|
EvalStatus2["CANCELLED"] = "cancelled";
|
|
519
642
|
return EvalStatus2;
|
|
520
643
|
})(EvalStatus || {});
|
|
521
|
-
var EvalStatusSchema =
|
|
644
|
+
var EvalStatusSchema = z23.enum(EvalStatus);
|
|
522
645
|
var LLMStepType = /* @__PURE__ */ ((LLMStepType2) => {
|
|
523
646
|
LLMStepType2["COMPLETION"] = "completion";
|
|
524
647
|
LLMStepType2["TOOL_USE"] = "tool_use";
|
|
@@ -526,52 +649,52 @@ var LLMStepType = /* @__PURE__ */ ((LLMStepType2) => {
|
|
|
526
649
|
LLMStepType2["THINKING"] = "thinking";
|
|
527
650
|
return LLMStepType2;
|
|
528
651
|
})(LLMStepType || {});
|
|
529
|
-
var LLMTraceStepSchema =
|
|
530
|
-
id:
|
|
531
|
-
stepNumber:
|
|
532
|
-
type:
|
|
533
|
-
model:
|
|
534
|
-
provider:
|
|
535
|
-
startedAt:
|
|
536
|
-
durationMs:
|
|
652
|
+
var LLMTraceStepSchema = z23.object({
|
|
653
|
+
id: z23.string(),
|
|
654
|
+
stepNumber: z23.number(),
|
|
655
|
+
type: z23.enum(LLMStepType),
|
|
656
|
+
model: z23.string(),
|
|
657
|
+
provider: z23.string(),
|
|
658
|
+
startedAt: z23.string(),
|
|
659
|
+
durationMs: z23.number(),
|
|
537
660
|
tokenUsage: TokenUsageSchema,
|
|
538
|
-
costUsd:
|
|
539
|
-
toolName:
|
|
540
|
-
toolArguments:
|
|
541
|
-
inputPreview:
|
|
542
|
-
outputPreview:
|
|
543
|
-
success:
|
|
544
|
-
error:
|
|
545
|
-
});
|
|
546
|
-
var LLMBreakdownStatsSchema =
|
|
547
|
-
count:
|
|
548
|
-
durationMs:
|
|
549
|
-
tokens:
|
|
550
|
-
costUsd:
|
|
551
|
-
});
|
|
552
|
-
var LLMTraceSummarySchema =
|
|
553
|
-
totalSteps:
|
|
554
|
-
totalDurationMs:
|
|
661
|
+
costUsd: z23.number(),
|
|
662
|
+
toolName: z23.string().optional(),
|
|
663
|
+
toolArguments: z23.string().optional(),
|
|
664
|
+
inputPreview: z23.string().optional(),
|
|
665
|
+
outputPreview: z23.string().optional(),
|
|
666
|
+
success: z23.boolean(),
|
|
667
|
+
error: z23.string().optional()
|
|
668
|
+
});
|
|
669
|
+
var LLMBreakdownStatsSchema = z23.object({
|
|
670
|
+
count: z23.number(),
|
|
671
|
+
durationMs: z23.number(),
|
|
672
|
+
tokens: z23.number(),
|
|
673
|
+
costUsd: z23.number()
|
|
674
|
+
});
|
|
675
|
+
var LLMTraceSummarySchema = z23.object({
|
|
676
|
+
totalSteps: z23.number(),
|
|
677
|
+
totalDurationMs: z23.number(),
|
|
555
678
|
totalTokens: TokenUsageSchema,
|
|
556
|
-
totalCostUsd:
|
|
557
|
-
stepTypeBreakdown:
|
|
558
|
-
modelBreakdown:
|
|
559
|
-
modelsUsed:
|
|
560
|
-
});
|
|
561
|
-
var LLMTraceSchema =
|
|
562
|
-
id:
|
|
563
|
-
steps:
|
|
679
|
+
totalCostUsd: z23.number(),
|
|
680
|
+
stepTypeBreakdown: z23.record(z23.string(), LLMBreakdownStatsSchema).optional(),
|
|
681
|
+
modelBreakdown: z23.record(z23.string(), LLMBreakdownStatsSchema),
|
|
682
|
+
modelsUsed: z23.array(z23.string())
|
|
683
|
+
});
|
|
684
|
+
var LLMTraceSchema = z23.object({
|
|
685
|
+
id: z23.string(),
|
|
686
|
+
steps: z23.array(LLMTraceStepSchema),
|
|
564
687
|
summary: LLMTraceSummarySchema
|
|
565
688
|
});
|
|
566
689
|
|
|
567
690
|
// src/evaluation/eval-result.ts
|
|
568
|
-
import { z as
|
|
691
|
+
import { z as z26 } from "zod";
|
|
569
692
|
|
|
570
693
|
// src/evaluation/eval-run.ts
|
|
571
|
-
import { z as
|
|
694
|
+
import { z as z25 } from "zod";
|
|
572
695
|
|
|
573
696
|
// src/evaluation/live-trace.ts
|
|
574
|
-
import { z as
|
|
697
|
+
import { z as z24 } from "zod";
|
|
575
698
|
var LiveTraceEventType = /* @__PURE__ */ ((LiveTraceEventType2) => {
|
|
576
699
|
LiveTraceEventType2["THINKING"] = "thinking";
|
|
577
700
|
LiveTraceEventType2["TOOL_USE"] = "tool_use";
|
|
@@ -585,37 +708,37 @@ var LiveTraceEventType = /* @__PURE__ */ ((LiveTraceEventType2) => {
|
|
|
585
708
|
LiveTraceEventType2["USER"] = "user";
|
|
586
709
|
return LiveTraceEventType2;
|
|
587
710
|
})(LiveTraceEventType || {});
|
|
588
|
-
var LiveTraceEventSchema =
|
|
711
|
+
var LiveTraceEventSchema = z24.object({
|
|
589
712
|
/** The evaluation run ID */
|
|
590
|
-
evalRunId:
|
|
713
|
+
evalRunId: z24.string(),
|
|
591
714
|
/** The scenario ID being executed */
|
|
592
|
-
scenarioId:
|
|
715
|
+
scenarioId: z24.string(),
|
|
593
716
|
/** The scenario name for display */
|
|
594
|
-
scenarioName:
|
|
717
|
+
scenarioName: z24.string(),
|
|
595
718
|
/** The target ID (skill, agent, etc.) */
|
|
596
|
-
targetId:
|
|
719
|
+
targetId: z24.string(),
|
|
597
720
|
/** The target name for display */
|
|
598
|
-
targetName:
|
|
721
|
+
targetName: z24.string(),
|
|
599
722
|
/** Step number in the current scenario execution */
|
|
600
|
-
stepNumber:
|
|
723
|
+
stepNumber: z24.number(),
|
|
601
724
|
/** Type of trace event */
|
|
602
|
-
type:
|
|
725
|
+
type: z24.enum(LiveTraceEventType),
|
|
603
726
|
/** Tool name if this is a tool_use event */
|
|
604
|
-
toolName:
|
|
727
|
+
toolName: z24.string().optional(),
|
|
605
728
|
/** Tool arguments preview (truncated JSON) */
|
|
606
|
-
toolArgs:
|
|
729
|
+
toolArgs: z24.string().optional(),
|
|
607
730
|
/** Output preview (truncated text) */
|
|
608
|
-
outputPreview:
|
|
731
|
+
outputPreview: z24.string().optional(),
|
|
609
732
|
/** File path for file operations */
|
|
610
|
-
filePath:
|
|
733
|
+
filePath: z24.string().optional(),
|
|
611
734
|
/** Elapsed time in milliseconds for progress events */
|
|
612
|
-
elapsedMs:
|
|
735
|
+
elapsedMs: z24.number().optional(),
|
|
613
736
|
/** Thinking/reasoning text from Claude */
|
|
614
|
-
thinking:
|
|
737
|
+
thinking: z24.string().optional(),
|
|
615
738
|
/** Timestamp when this event occurred */
|
|
616
|
-
timestamp:
|
|
739
|
+
timestamp: z24.string(),
|
|
617
740
|
/** Whether this is the final event for this scenario */
|
|
618
|
-
isComplete:
|
|
741
|
+
isComplete: z24.boolean()
|
|
619
742
|
});
|
|
620
743
|
var TRACE_EVENT_PREFIX = "TRACE_EVENT:";
|
|
621
744
|
function parseTraceEventLine(line) {
|
|
@@ -643,14 +766,14 @@ var TriggerType = /* @__PURE__ */ ((TriggerType2) => {
|
|
|
643
766
|
TriggerType2["MANUAL"] = "MANUAL";
|
|
644
767
|
return TriggerType2;
|
|
645
768
|
})(TriggerType || {});
|
|
646
|
-
var TriggerMetadataSchema =
|
|
647
|
-
version:
|
|
648
|
-
resourceUpdated:
|
|
769
|
+
var TriggerMetadataSchema = z25.object({
|
|
770
|
+
version: z25.string().optional(),
|
|
771
|
+
resourceUpdated: z25.array(z25.string()).optional()
|
|
649
772
|
});
|
|
650
|
-
var TriggerSchema =
|
|
651
|
-
id:
|
|
773
|
+
var TriggerSchema = z25.object({
|
|
774
|
+
id: z25.string(),
|
|
652
775
|
metadata: TriggerMetadataSchema.optional(),
|
|
653
|
-
type:
|
|
776
|
+
type: z25.enum(TriggerType)
|
|
654
777
|
});
|
|
655
778
|
var FailureCategory = /* @__PURE__ */ ((FailureCategory2) => {
|
|
656
779
|
FailureCategory2["MISSING_FILE"] = "missing_file";
|
|
@@ -668,28 +791,28 @@ var FailureSeverity = /* @__PURE__ */ ((FailureSeverity2) => {
|
|
|
668
791
|
FailureSeverity2["LOW"] = "low";
|
|
669
792
|
return FailureSeverity2;
|
|
670
793
|
})(FailureSeverity || {});
|
|
671
|
-
var DiffLineTypeSchema =
|
|
672
|
-
var DiffLineSchema =
|
|
794
|
+
var DiffLineTypeSchema = z25.enum(["added", "removed", "unchanged"]);
|
|
795
|
+
var DiffLineSchema = z25.object({
|
|
673
796
|
type: DiffLineTypeSchema,
|
|
674
|
-
content:
|
|
675
|
-
lineNumber:
|
|
676
|
-
});
|
|
677
|
-
var DiffContentSchema =
|
|
678
|
-
path:
|
|
679
|
-
expected:
|
|
680
|
-
actual:
|
|
681
|
-
diffLines:
|
|
682
|
-
renamedFrom:
|
|
683
|
-
});
|
|
684
|
-
var CommandExecutionSchema =
|
|
685
|
-
command:
|
|
686
|
-
exitCode:
|
|
687
|
-
output:
|
|
688
|
-
duration:
|
|
689
|
-
});
|
|
690
|
-
var FileModificationSchema =
|
|
691
|
-
path:
|
|
692
|
-
action:
|
|
797
|
+
content: z25.string(),
|
|
798
|
+
lineNumber: z25.number()
|
|
799
|
+
});
|
|
800
|
+
var DiffContentSchema = z25.object({
|
|
801
|
+
path: z25.string(),
|
|
802
|
+
expected: z25.string(),
|
|
803
|
+
actual: z25.string(),
|
|
804
|
+
diffLines: z25.array(DiffLineSchema),
|
|
805
|
+
renamedFrom: z25.string().optional()
|
|
806
|
+
});
|
|
807
|
+
var CommandExecutionSchema = z25.object({
|
|
808
|
+
command: z25.string(),
|
|
809
|
+
exitCode: z25.number(),
|
|
810
|
+
output: z25.string().optional(),
|
|
811
|
+
duration: z25.number()
|
|
812
|
+
});
|
|
813
|
+
var FileModificationSchema = z25.object({
|
|
814
|
+
path: z25.string(),
|
|
815
|
+
action: z25.enum(["created", "modified", "deleted"])
|
|
693
816
|
});
|
|
694
817
|
var TemplateFileStatus = /* @__PURE__ */ ((TemplateFileStatus2) => {
|
|
695
818
|
TemplateFileStatus2["NEW"] = "new";
|
|
@@ -697,75 +820,75 @@ var TemplateFileStatus = /* @__PURE__ */ ((TemplateFileStatus2) => {
|
|
|
697
820
|
TemplateFileStatus2["UNCHANGED"] = "unchanged";
|
|
698
821
|
return TemplateFileStatus2;
|
|
699
822
|
})(TemplateFileStatus || {});
|
|
700
|
-
var TemplateFileSchema =
|
|
823
|
+
var TemplateFileSchema = z25.object({
|
|
701
824
|
/** Relative path within the template */
|
|
702
|
-
path:
|
|
825
|
+
path: z25.string(),
|
|
703
826
|
/** Full file content after execution */
|
|
704
|
-
content:
|
|
827
|
+
content: z25.string(),
|
|
705
828
|
/** File status (new, modified, unchanged) */
|
|
706
|
-
status:
|
|
707
|
-
});
|
|
708
|
-
var ApiCallSchema =
|
|
709
|
-
endpoint:
|
|
710
|
-
tokensUsed:
|
|
711
|
-
duration:
|
|
712
|
-
});
|
|
713
|
-
var ExecutionTraceSchema =
|
|
714
|
-
commands:
|
|
715
|
-
filesModified:
|
|
716
|
-
apiCalls:
|
|
717
|
-
totalDuration:
|
|
718
|
-
});
|
|
719
|
-
var FailureAnalysisSchema =
|
|
720
|
-
category:
|
|
721
|
-
severity:
|
|
722
|
-
summary:
|
|
723
|
-
details:
|
|
724
|
-
rootCause:
|
|
725
|
-
suggestedFix:
|
|
726
|
-
relatedAssertions:
|
|
727
|
-
codeSnippet:
|
|
728
|
-
similarIssues:
|
|
729
|
-
patternId:
|
|
829
|
+
status: z25.enum(["new", "modified", "unchanged"])
|
|
830
|
+
});
|
|
831
|
+
var ApiCallSchema = z25.object({
|
|
832
|
+
endpoint: z25.string(),
|
|
833
|
+
tokensUsed: z25.number(),
|
|
834
|
+
duration: z25.number()
|
|
835
|
+
});
|
|
836
|
+
var ExecutionTraceSchema = z25.object({
|
|
837
|
+
commands: z25.array(CommandExecutionSchema),
|
|
838
|
+
filesModified: z25.array(FileModificationSchema),
|
|
839
|
+
apiCalls: z25.array(ApiCallSchema),
|
|
840
|
+
totalDuration: z25.number()
|
|
841
|
+
});
|
|
842
|
+
var FailureAnalysisSchema = z25.object({
|
|
843
|
+
category: z25.enum(FailureCategory),
|
|
844
|
+
severity: z25.enum(FailureSeverity),
|
|
845
|
+
summary: z25.string(),
|
|
846
|
+
details: z25.string(),
|
|
847
|
+
rootCause: z25.string(),
|
|
848
|
+
suggestedFix: z25.string(),
|
|
849
|
+
relatedAssertions: z25.array(z25.string()),
|
|
850
|
+
codeSnippet: z25.string().optional(),
|
|
851
|
+
similarIssues: z25.array(z25.string()).optional(),
|
|
852
|
+
patternId: z25.string().optional(),
|
|
730
853
|
// Extended fields for detailed debugging
|
|
731
854
|
diff: DiffContentSchema.optional(),
|
|
732
855
|
executionTrace: ExecutionTraceSchema.optional()
|
|
733
856
|
});
|
|
734
857
|
var EvalRunSchema = TenantEntitySchema.extend({
|
|
735
858
|
/** Agent ID for this run */
|
|
736
|
-
agentId:
|
|
859
|
+
agentId: z25.string().optional(),
|
|
737
860
|
/** Skills group ID for this run */
|
|
738
|
-
skillsGroupId:
|
|
861
|
+
skillsGroupId: z25.string().optional(),
|
|
739
862
|
/** Scenario IDs to run */
|
|
740
|
-
scenarioIds:
|
|
863
|
+
scenarioIds: z25.array(z25.string()),
|
|
741
864
|
/** Current status */
|
|
742
865
|
status: EvalStatusSchema,
|
|
743
866
|
/** Progress percentage (0-100) */
|
|
744
|
-
progress:
|
|
867
|
+
progress: z25.number(),
|
|
745
868
|
/** Results for each scenario/target combination (lazy to break eval-result ↔ eval-run cycle) */
|
|
746
|
-
results:
|
|
869
|
+
results: z25.array(z25.lazy(() => EvalRunResultSchema)),
|
|
747
870
|
/** Aggregated metrics across all results */
|
|
748
871
|
aggregateMetrics: EvalMetricsSchema,
|
|
749
872
|
/** Failure analyses */
|
|
750
|
-
failureAnalyses:
|
|
873
|
+
failureAnalyses: z25.array(FailureAnalysisSchema).optional(),
|
|
751
874
|
/** Aggregated LLM trace summary */
|
|
752
875
|
llmTraceSummary: LLMTraceSummarySchema.optional(),
|
|
753
876
|
/** What triggered this run */
|
|
754
877
|
trigger: TriggerSchema.optional(),
|
|
755
878
|
/** When the run started (set when evaluation is triggered) */
|
|
756
|
-
startedAt:
|
|
879
|
+
startedAt: z25.string().optional(),
|
|
757
880
|
/** When the run completed */
|
|
758
|
-
completedAt:
|
|
881
|
+
completedAt: z25.string().optional(),
|
|
759
882
|
/** Live trace events captured during execution (for playback on results page) */
|
|
760
|
-
liveTraceEvents:
|
|
883
|
+
liveTraceEvents: z25.array(LiveTraceEventSchema).optional(),
|
|
761
884
|
/** Remote job ID for tracking execution in Dev Machines */
|
|
762
|
-
jobId:
|
|
885
|
+
jobId: z25.string().optional(),
|
|
763
886
|
/** Remote job status from the Dev Machine API (PENDING, RUNNING, COMPLETED, FAILED, CANCELLED) */
|
|
764
|
-
jobStatus:
|
|
887
|
+
jobStatus: z25.string().optional(),
|
|
765
888
|
/** Remote job error message if the job failed */
|
|
766
|
-
jobError:
|
|
889
|
+
jobError: z25.string().optional(),
|
|
767
890
|
/** Timestamp of the last job status check */
|
|
768
|
-
jobStatusCheckedAt:
|
|
891
|
+
jobStatusCheckedAt: z25.string().optional()
|
|
769
892
|
});
|
|
770
893
|
var CreateEvalRunInputSchema = EvalRunSchema.omit({
|
|
771
894
|
id: true,
|
|
@@ -778,28 +901,28 @@ var CreateEvalRunInputSchema = EvalRunSchema.omit({
|
|
|
778
901
|
startedAt: true,
|
|
779
902
|
completedAt: true
|
|
780
903
|
});
|
|
781
|
-
var EvaluationProgressSchema =
|
|
782
|
-
runId:
|
|
783
|
-
targetId:
|
|
784
|
-
totalScenarios:
|
|
785
|
-
completedScenarios:
|
|
786
|
-
scenarioProgress:
|
|
787
|
-
|
|
788
|
-
scenarioId:
|
|
789
|
-
currentStep:
|
|
790
|
-
error:
|
|
904
|
+
var EvaluationProgressSchema = z25.object({
|
|
905
|
+
runId: z25.string(),
|
|
906
|
+
targetId: z25.string(),
|
|
907
|
+
totalScenarios: z25.number(),
|
|
908
|
+
completedScenarios: z25.number(),
|
|
909
|
+
scenarioProgress: z25.array(
|
|
910
|
+
z25.object({
|
|
911
|
+
scenarioId: z25.string(),
|
|
912
|
+
currentStep: z25.string(),
|
|
913
|
+
error: z25.string().optional()
|
|
791
914
|
})
|
|
792
915
|
),
|
|
793
|
-
createdAt:
|
|
916
|
+
createdAt: z25.number()
|
|
794
917
|
});
|
|
795
|
-
var EvaluationLogSchema =
|
|
796
|
-
runId:
|
|
797
|
-
scenarioId:
|
|
798
|
-
log:
|
|
799
|
-
level:
|
|
800
|
-
message:
|
|
801
|
-
args:
|
|
802
|
-
error:
|
|
918
|
+
var EvaluationLogSchema = z25.object({
|
|
919
|
+
runId: z25.string(),
|
|
920
|
+
scenarioId: z25.string(),
|
|
921
|
+
log: z25.object({
|
|
922
|
+
level: z25.enum(["info", "error", "debug"]),
|
|
923
|
+
message: z25.string().optional(),
|
|
924
|
+
args: z25.array(z25.any()).optional(),
|
|
925
|
+
error: z25.string().optional()
|
|
803
926
|
})
|
|
804
927
|
});
|
|
805
928
|
var LLM_TIMEOUT = 12e4;
|
|
@@ -812,91 +935,91 @@ var AssertionResultStatus = /* @__PURE__ */ ((AssertionResultStatus2) => {
|
|
|
812
935
|
AssertionResultStatus2["ERROR"] = "error";
|
|
813
936
|
return AssertionResultStatus2;
|
|
814
937
|
})(AssertionResultStatus || {});
|
|
815
|
-
var AssertionResultSchema =
|
|
816
|
-
id:
|
|
817
|
-
assertionId:
|
|
818
|
-
assertionType:
|
|
819
|
-
assertionName:
|
|
820
|
-
status:
|
|
821
|
-
message:
|
|
822
|
-
expected:
|
|
823
|
-
actual:
|
|
824
|
-
duration:
|
|
825
|
-
details:
|
|
826
|
-
llmTraceSteps:
|
|
827
|
-
});
|
|
828
|
-
var EvalRunResultSchema =
|
|
829
|
-
id:
|
|
830
|
-
targetId:
|
|
831
|
-
targetName:
|
|
832
|
-
scenarioId:
|
|
833
|
-
scenarioName:
|
|
938
|
+
var AssertionResultSchema = z26.object({
|
|
939
|
+
id: z26.string(),
|
|
940
|
+
assertionId: z26.string(),
|
|
941
|
+
assertionType: z26.string(),
|
|
942
|
+
assertionName: z26.string(),
|
|
943
|
+
status: z26.enum(AssertionResultStatus),
|
|
944
|
+
message: z26.string().optional(),
|
|
945
|
+
expected: z26.string().optional(),
|
|
946
|
+
actual: z26.string().optional(),
|
|
947
|
+
duration: z26.number().optional(),
|
|
948
|
+
details: z26.record(z26.string(), z26.unknown()).optional(),
|
|
949
|
+
llmTraceSteps: z26.array(LLMTraceStepSchema).optional()
|
|
950
|
+
});
|
|
951
|
+
var EvalRunResultSchema = z26.object({
|
|
952
|
+
id: z26.string(),
|
|
953
|
+
targetId: z26.string(),
|
|
954
|
+
targetName: z26.string().optional(),
|
|
955
|
+
scenarioId: z26.string(),
|
|
956
|
+
scenarioName: z26.string(),
|
|
834
957
|
modelConfig: ModelConfigSchema.optional(),
|
|
835
|
-
assertionResults:
|
|
958
|
+
assertionResults: z26.array(AssertionResultSchema),
|
|
836
959
|
metrics: EvalMetricsSchema.optional(),
|
|
837
|
-
passed:
|
|
838
|
-
failed:
|
|
839
|
-
passRate:
|
|
840
|
-
duration:
|
|
841
|
-
outputText:
|
|
842
|
-
files:
|
|
843
|
-
fileDiffs:
|
|
960
|
+
passed: z26.number(),
|
|
961
|
+
failed: z26.number(),
|
|
962
|
+
passRate: z26.number(),
|
|
963
|
+
duration: z26.number(),
|
|
964
|
+
outputText: z26.string().optional(),
|
|
965
|
+
files: z26.array(ExpectedFileSchema).optional(),
|
|
966
|
+
fileDiffs: z26.array(DiffContentSchema).optional(),
|
|
844
967
|
/** Full template files after execution with status indicators */
|
|
845
|
-
templateFiles:
|
|
846
|
-
startedAt:
|
|
847
|
-
completedAt:
|
|
968
|
+
templateFiles: z26.array(TemplateFileSchema).optional(),
|
|
969
|
+
startedAt: z26.string().optional(),
|
|
970
|
+
completedAt: z26.string().optional(),
|
|
848
971
|
llmTrace: LLMTraceSchema.optional()
|
|
849
972
|
});
|
|
850
|
-
var PromptResultSchema =
|
|
851
|
-
text:
|
|
852
|
-
files:
|
|
853
|
-
finishReason:
|
|
854
|
-
reasoning:
|
|
855
|
-
reasoningDetails:
|
|
856
|
-
toolCalls:
|
|
857
|
-
toolResults:
|
|
858
|
-
warnings:
|
|
859
|
-
sources:
|
|
860
|
-
steps:
|
|
861
|
-
generationTimeMs:
|
|
862
|
-
prompt:
|
|
863
|
-
systemPrompt:
|
|
864
|
-
usage:
|
|
865
|
-
totalTokens:
|
|
866
|
-
totalMicrocentsSpent:
|
|
973
|
+
var PromptResultSchema = z26.object({
|
|
974
|
+
text: z26.string(),
|
|
975
|
+
files: z26.array(z26.unknown()).optional(),
|
|
976
|
+
finishReason: z26.string().optional(),
|
|
977
|
+
reasoning: z26.string().optional(),
|
|
978
|
+
reasoningDetails: z26.unknown().optional(),
|
|
979
|
+
toolCalls: z26.array(z26.unknown()).optional(),
|
|
980
|
+
toolResults: z26.array(z26.unknown()).optional(),
|
|
981
|
+
warnings: z26.array(z26.unknown()).optional(),
|
|
982
|
+
sources: z26.array(z26.unknown()).optional(),
|
|
983
|
+
steps: z26.array(z26.unknown()),
|
|
984
|
+
generationTimeMs: z26.number(),
|
|
985
|
+
prompt: z26.string(),
|
|
986
|
+
systemPrompt: z26.string(),
|
|
987
|
+
usage: z26.object({
|
|
988
|
+
totalTokens: z26.number().optional(),
|
|
989
|
+
totalMicrocentsSpent: z26.number().optional()
|
|
867
990
|
})
|
|
868
991
|
});
|
|
869
|
-
var EvaluationResultSchema =
|
|
870
|
-
id:
|
|
871
|
-
runId:
|
|
872
|
-
timestamp:
|
|
992
|
+
var EvaluationResultSchema = z26.object({
|
|
993
|
+
id: z26.string(),
|
|
994
|
+
runId: z26.string(),
|
|
995
|
+
timestamp: z26.number(),
|
|
873
996
|
promptResult: PromptResultSchema,
|
|
874
|
-
testResults:
|
|
875
|
-
tags:
|
|
876
|
-
feedback:
|
|
877
|
-
score:
|
|
878
|
-
suiteId:
|
|
879
|
-
});
|
|
880
|
-
var LeanEvaluationResultSchema =
|
|
881
|
-
id:
|
|
882
|
-
runId:
|
|
883
|
-
timestamp:
|
|
884
|
-
tags:
|
|
885
|
-
scenarioId:
|
|
886
|
-
scenarioVersion:
|
|
887
|
-
targetId:
|
|
888
|
-
targetVersion:
|
|
889
|
-
suiteId:
|
|
890
|
-
score:
|
|
891
|
-
time:
|
|
892
|
-
microcentsSpent:
|
|
997
|
+
testResults: z26.array(z26.unknown()),
|
|
998
|
+
tags: z26.array(z26.string()).optional(),
|
|
999
|
+
feedback: z26.string().optional(),
|
|
1000
|
+
score: z26.number(),
|
|
1001
|
+
suiteId: z26.string().optional()
|
|
1002
|
+
});
|
|
1003
|
+
var LeanEvaluationResultSchema = z26.object({
|
|
1004
|
+
id: z26.string(),
|
|
1005
|
+
runId: z26.string(),
|
|
1006
|
+
timestamp: z26.number(),
|
|
1007
|
+
tags: z26.array(z26.string()).optional(),
|
|
1008
|
+
scenarioId: z26.string(),
|
|
1009
|
+
scenarioVersion: z26.number().optional(),
|
|
1010
|
+
targetId: z26.string(),
|
|
1011
|
+
targetVersion: z26.number().optional(),
|
|
1012
|
+
suiteId: z26.string().optional(),
|
|
1013
|
+
score: z26.number(),
|
|
1014
|
+
time: z26.number().optional(),
|
|
1015
|
+
microcentsSpent: z26.number().optional()
|
|
893
1016
|
});
|
|
894
1017
|
|
|
895
1018
|
// src/project/project.ts
|
|
896
|
-
import { z as
|
|
1019
|
+
import { z as z27 } from "zod";
|
|
897
1020
|
var ProjectSchema = BaseEntitySchema.extend({
|
|
898
|
-
appId:
|
|
899
|
-
appSecret:
|
|
1021
|
+
appId: z27.string().optional().describe("The ID of the app in Dev Center"),
|
|
1022
|
+
appSecret: z27.string().optional().describe("The secret of the app in Dev Center")
|
|
900
1023
|
});
|
|
901
1024
|
var CreateProjectInputSchema = ProjectSchema.omit({
|
|
902
1025
|
id: true,
|
|
@@ -907,10 +1030,10 @@ var CreateProjectInputSchema = ProjectSchema.omit({
|
|
|
907
1030
|
var UpdateProjectInputSchema = CreateProjectInputSchema.partial();
|
|
908
1031
|
|
|
909
1032
|
// src/template/template.ts
|
|
910
|
-
import { z as
|
|
1033
|
+
import { z as z28 } from "zod";
|
|
911
1034
|
var TemplateSchema = TenantEntitySchema.extend({
|
|
912
1035
|
/** URL to download the template from */
|
|
913
|
-
downloadUrl:
|
|
1036
|
+
downloadUrl: z28.url()
|
|
914
1037
|
});
|
|
915
1038
|
var CreateTemplateInputSchema = TemplateSchema.omit({
|
|
916
1039
|
id: true,
|
|
@@ -920,86 +1043,69 @@ var CreateTemplateInputSchema = TemplateSchema.omit({
|
|
|
920
1043
|
});
|
|
921
1044
|
var UpdateTemplateInputSchema = CreateTemplateInputSchema.partial();
|
|
922
1045
|
|
|
923
|
-
// src/assertion/
|
|
924
|
-
|
|
925
|
-
|
|
926
|
-
"
|
|
927
|
-
|
|
928
|
-
|
|
929
|
-
"
|
|
930
|
-
]);
|
|
931
|
-
var SkillWasCalledConfigSchema = z28.object({
|
|
932
|
-
/** Name of the skill that must have been called */
|
|
933
|
-
skillName: z28.string().min(1)
|
|
934
|
-
});
|
|
935
|
-
var BuildPassedConfigSchema = z28.object({
|
|
936
|
-
/** Command to run (default: "yarn build") */
|
|
937
|
-
command: z28.string().optional(),
|
|
938
|
-
/** Expected exit code (default: 0) */
|
|
939
|
-
expectedExitCode: z28.number().int().optional()
|
|
940
|
-
});
|
|
941
|
-
var LlmJudgeConfigSchema = z28.object({
|
|
942
|
-
/** Prompt template; placeholders: {{output}}, {{cwd}}, {{changedFiles}}, {{trace}} */
|
|
943
|
-
prompt: z28.string().min(1),
|
|
944
|
-
/** Optional system prompt for the judge */
|
|
945
|
-
systemPrompt: z28.string().optional(),
|
|
946
|
-
/** Minimum score to pass (0-100, default 70) */
|
|
947
|
-
minScore: z28.number().int().min(0).max(100).optional(),
|
|
948
|
-
/** Model for the judge (e.g. claude-3-5-haiku-20241022) */
|
|
949
|
-
model: z28.string().optional(),
|
|
950
|
-
/** Max output tokens */
|
|
951
|
-
maxTokens: z28.number().int().optional(),
|
|
952
|
-
/** Temperature (0-1) */
|
|
953
|
-
temperature: z28.number().min(0).max(1).optional()
|
|
954
|
-
});
|
|
955
|
-
var AssertionConfigSchema = z28.union([
|
|
956
|
-
SkillWasCalledConfigSchema,
|
|
957
|
-
BuildPassedConfigSchema,
|
|
958
|
-
LlmJudgeConfigSchema,
|
|
959
|
-
z28.object({})
|
|
960
|
-
// Empty config for cases where defaults are used
|
|
961
|
-
]);
|
|
962
|
-
var CustomAssertionSchema = TenantEntitySchema.extend({
|
|
963
|
-
/** The assertion type */
|
|
964
|
-
type: AssertionTypeSchema,
|
|
965
|
-
/** Type-specific configuration */
|
|
966
|
-
config: AssertionConfigSchema
|
|
967
|
-
});
|
|
968
|
-
var CreateCustomAssertionInputSchema = CustomAssertionSchema.omit({
|
|
969
|
-
id: true,
|
|
970
|
-
createdAt: true,
|
|
971
|
-
updatedAt: true,
|
|
972
|
-
deleted: true
|
|
973
|
-
});
|
|
974
|
-
var UpdateCustomAssertionInputSchema = CreateCustomAssertionInputSchema.partial();
|
|
975
|
-
function validateAssertionConfig(type, config) {
|
|
976
|
-
switch (type) {
|
|
977
|
-
case "skill_was_called":
|
|
978
|
-
return SkillWasCalledConfigSchema.safeParse(config).success;
|
|
979
|
-
case "build_passed":
|
|
980
|
-
return BuildPassedConfigSchema.safeParse(config).success;
|
|
981
|
-
case "llm_judge":
|
|
982
|
-
case "custom":
|
|
983
|
-
return LlmJudgeConfigSchema.safeParse(config).success;
|
|
984
|
-
default:
|
|
985
|
-
return false;
|
|
986
|
-
}
|
|
987
|
-
}
|
|
988
|
-
function getSkillWasCalledConfig(assertion) {
|
|
989
|
-
if (assertion.type !== "skill_was_called") return null;
|
|
990
|
-
const result = SkillWasCalledConfigSchema.safeParse(assertion.config);
|
|
991
|
-
return result.success ? result.data : null;
|
|
1046
|
+
// src/assertion/system-assertions.ts
|
|
1047
|
+
var SYSTEM_ASSERTION_IDS = {
|
|
1048
|
+
SKILL_WAS_CALLED: "system:skill_was_called",
|
|
1049
|
+
BUILD_PASSED: "system:build_passed"
|
|
1050
|
+
};
|
|
1051
|
+
function isSystemAssertionId(id) {
|
|
1052
|
+
return id.startsWith("system:");
|
|
992
1053
|
}
|
|
993
|
-
|
|
994
|
-
|
|
995
|
-
|
|
996
|
-
|
|
1054
|
+
var SYSTEM_ASSERTIONS = {
|
|
1055
|
+
[SYSTEM_ASSERTION_IDS.SKILL_WAS_CALLED]: {
|
|
1056
|
+
id: SYSTEM_ASSERTION_IDS.SKILL_WAS_CALLED,
|
|
1057
|
+
name: "Skill Was Called",
|
|
1058
|
+
description: "Check if a specific skill was invoked during the agent run",
|
|
1059
|
+
type: "skill_was_called",
|
|
1060
|
+
parameters: [
|
|
1061
|
+
{
|
|
1062
|
+
name: "skillName",
|
|
1063
|
+
label: "Skill Name",
|
|
1064
|
+
type: "string",
|
|
1065
|
+
required: true
|
|
1066
|
+
}
|
|
1067
|
+
]
|
|
1068
|
+
},
|
|
1069
|
+
[SYSTEM_ASSERTION_IDS.BUILD_PASSED]: {
|
|
1070
|
+
id: SYSTEM_ASSERTION_IDS.BUILD_PASSED,
|
|
1071
|
+
name: "Build Passed",
|
|
1072
|
+
description: "Run a build command and verify it exits with expected code",
|
|
1073
|
+
type: "build_passed",
|
|
1074
|
+
parameters: [
|
|
1075
|
+
{
|
|
1076
|
+
name: "command",
|
|
1077
|
+
label: "Build Command",
|
|
1078
|
+
type: "string",
|
|
1079
|
+
required: false,
|
|
1080
|
+
defaultValue: "yarn build"
|
|
1081
|
+
},
|
|
1082
|
+
{
|
|
1083
|
+
name: "expectedExitCode",
|
|
1084
|
+
label: "Expected Exit Code",
|
|
1085
|
+
type: "number",
|
|
1086
|
+
required: false,
|
|
1087
|
+
defaultValue: 0
|
|
1088
|
+
},
|
|
1089
|
+
{
|
|
1090
|
+
name: "maxBuildTime",
|
|
1091
|
+
label: "Max Build Time (ms)",
|
|
1092
|
+
type: "number",
|
|
1093
|
+
required: false
|
|
1094
|
+
},
|
|
1095
|
+
{
|
|
1096
|
+
name: "maxMemory",
|
|
1097
|
+
label: "Max Memory (MB)",
|
|
1098
|
+
type: "number",
|
|
1099
|
+
required: false
|
|
1100
|
+
}
|
|
1101
|
+
]
|
|
1102
|
+
}
|
|
1103
|
+
};
|
|
1104
|
+
function getSystemAssertions() {
|
|
1105
|
+
return Object.values(SYSTEM_ASSERTIONS);
|
|
997
1106
|
}
|
|
998
|
-
function
|
|
999
|
-
|
|
1000
|
-
return null;
|
|
1001
|
-
const result = LlmJudgeConfigSchema.safeParse(assertion.config);
|
|
1002
|
-
return result.success ? result.data : null;
|
|
1107
|
+
function getSystemAssertion(id) {
|
|
1108
|
+
return SYSTEM_ASSERTIONS[id];
|
|
1003
1109
|
}
|
|
1004
1110
|
export {
|
|
1005
1111
|
AVAILABLE_MODELS,
|
|
@@ -1008,6 +1114,8 @@ export {
|
|
|
1008
1114
|
AllowedCommands,
|
|
1009
1115
|
ApiCallSchema,
|
|
1010
1116
|
AssertionConfigSchema,
|
|
1117
|
+
AssertionParameterSchema,
|
|
1118
|
+
AssertionParameterTypeSchema,
|
|
1011
1119
|
AssertionResultSchema,
|
|
1012
1120
|
AssertionResultStatus,
|
|
1013
1121
|
AssertionSchema,
|
|
@@ -1074,6 +1182,9 @@ export {
|
|
|
1074
1182
|
ProjectSchema,
|
|
1075
1183
|
PromptResultSchema,
|
|
1076
1184
|
SKILL_FOLDER_NAME_REGEX,
|
|
1185
|
+
SYSTEM_ASSERTIONS,
|
|
1186
|
+
SYSTEM_ASSERTION_IDS,
|
|
1187
|
+
ScenarioAssertionLinkSchema,
|
|
1077
1188
|
SiteConfigTestSchema,
|
|
1078
1189
|
SkillMetadataSchema,
|
|
1079
1190
|
SkillSchema,
|
|
@@ -1112,6 +1223,9 @@ export {
|
|
|
1112
1223
|
getBuildPassedConfig,
|
|
1113
1224
|
getLlmJudgeConfig,
|
|
1114
1225
|
getSkillWasCalledConfig,
|
|
1226
|
+
getSystemAssertion,
|
|
1227
|
+
getSystemAssertions,
|
|
1228
|
+
isSystemAssertionId,
|
|
1115
1229
|
isValidSkillFolderName,
|
|
1116
1230
|
parseTraceEventLine,
|
|
1117
1231
|
validateAssertionConfig
|