@wix/evalforge-types 0.74.0 → 0.75.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -1
- package/build/index.js +703 -489
- package/build/index.js.map +4 -4
- package/build/index.mjs +684 -489
- package/build/index.mjs.map +4 -4
- package/build/types/evaluation/eval-run.d.ts +4 -0
- package/build/types/target/capability-converters.d.ts +25 -0
- package/build/types/target/capability.d.ts +254 -0
- package/build/types/target/index.d.ts +2 -0
- package/build/types/target/preset.d.ts +6 -0
- package/package.json +2 -2
package/build/index.mjs
CHANGED
|
@@ -349,10 +349,14 @@ var PresetSchema = TenantEntitySchema.extend({
|
|
|
349
349
|
/** Sub-agent IDs included in this preset */
|
|
350
350
|
subAgentIds: z9.array(z9.string()).default([]),
|
|
351
351
|
/** Rule IDs included in this preset */
|
|
352
|
-
ruleIds: z9.array(z9.string()).default([])
|
|
353
|
-
|
|
354
|
-
|
|
355
|
-
|
|
352
|
+
ruleIds: z9.array(z9.string()).default([]),
|
|
353
|
+
/** Unified capability IDs (replaces skill/mcp/subAgent/ruleIds) */
|
|
354
|
+
capabilityIds: z9.array(z9.string()).optional(),
|
|
355
|
+
/** Map of capabilityId to capabilityVersionId for version pinning */
|
|
356
|
+
capabilityVersions: z9.record(z9.string(), z9.string()).optional()
|
|
357
|
+
});
|
|
358
|
+
var atLeastOneEntity = (data) => (data.capabilityIds?.length ?? 0) > 0 || (data.skillIds?.length ?? 0) > 0 || (data.mcpIds?.length ?? 0) > 0 || (data.subAgentIds?.length ?? 0) > 0 || (data.ruleIds?.length ?? 0) > 0;
|
|
359
|
+
var AT_LEAST_ONE_ENTITY_MESSAGE = "At least one of capabilityIds, skillIds, mcpIds, subAgentIds, or ruleIds must be non-empty";
|
|
356
360
|
var CreatePresetInputSchema = PresetSchema.omit({
|
|
357
361
|
id: true,
|
|
358
362
|
createdAt: true,
|
|
@@ -366,11 +370,179 @@ var UpdatePresetInputSchema = PresetSchema.omit({
|
|
|
366
370
|
deleted: true
|
|
367
371
|
}).partial();
|
|
368
372
|
|
|
373
|
+
// src/target/capability.ts
|
|
374
|
+
import { z as z10 } from "zod";
|
|
375
|
+
var CapabilityTypeSchema = z10.enum([
|
|
376
|
+
"SKILL",
|
|
377
|
+
"SUB_AGENT",
|
|
378
|
+
"RULE",
|
|
379
|
+
"MCP"
|
|
380
|
+
]);
|
|
381
|
+
var CAPABILITY_NAME_REGEX = /^[a-z0-9]+(-[a-z0-9]+)*$/;
|
|
382
|
+
function isValidCapabilityName(name) {
|
|
383
|
+
return typeof name === "string" && name.length > 0 && CAPABILITY_NAME_REGEX.test(name);
|
|
384
|
+
}
|
|
385
|
+
var KEBAB_CASE_MESSAGE2 = "Name must be in kebab-case (lowercase letters, numbers, hyphens only, e.g. my-capability)";
|
|
386
|
+
var CapabilityContentSchema = z10.record(z10.string(), z10.unknown());
|
|
387
|
+
var CapabilityVersionOriginSchema = z10.enum(["manual", "pr", "master"]);
|
|
388
|
+
var CapabilitySchema = TenantEntitySchema.extend({
|
|
389
|
+
capabilityType: CapabilityTypeSchema,
|
|
390
|
+
source: GitHubSourceSchema.optional()
|
|
391
|
+
});
|
|
392
|
+
var CapabilityVersionSchema = z10.object({
|
|
393
|
+
id: z10.string(),
|
|
394
|
+
projectId: z10.string(),
|
|
395
|
+
capabilityId: z10.string(),
|
|
396
|
+
version: z10.string(),
|
|
397
|
+
origin: CapabilityVersionOriginSchema,
|
|
398
|
+
source: GitHubSourceSchema.optional(),
|
|
399
|
+
content: CapabilityContentSchema.optional(),
|
|
400
|
+
notes: z10.string().optional(),
|
|
401
|
+
createdAt: z10.string()
|
|
402
|
+
});
|
|
403
|
+
var CapabilityWithLatestVersionSchema = CapabilitySchema.extend({
|
|
404
|
+
latestVersion: CapabilityVersionSchema.optional()
|
|
405
|
+
});
|
|
406
|
+
var CapabilityInputBaseSchema = CapabilitySchema.omit({
|
|
407
|
+
id: true,
|
|
408
|
+
createdAt: true,
|
|
409
|
+
updatedAt: true,
|
|
410
|
+
deleted: true,
|
|
411
|
+
description: true,
|
|
412
|
+
source: true
|
|
413
|
+
}).extend({
|
|
414
|
+
description: z10.string().optional(),
|
|
415
|
+
source: GitHubSourceSchema.optional()
|
|
416
|
+
});
|
|
417
|
+
var InitialCapabilityVersionInputSchema = z10.object({
|
|
418
|
+
content: CapabilityContentSchema.optional(),
|
|
419
|
+
notes: z10.string().optional(),
|
|
420
|
+
source: GitHubSourceSchema.optional(),
|
|
421
|
+
version: z10.string().optional(),
|
|
422
|
+
origin: CapabilityVersionOriginSchema.optional()
|
|
423
|
+
});
|
|
424
|
+
var CreateCapabilityInputSchema = CapabilityInputBaseSchema.extend({
|
|
425
|
+
initialVersion: InitialCapabilityVersionInputSchema.optional()
|
|
426
|
+
}).refine((data) => isValidCapabilityName(data.name), {
|
|
427
|
+
message: KEBAB_CASE_MESSAGE2,
|
|
428
|
+
path: ["name"]
|
|
429
|
+
});
|
|
430
|
+
var UpdateCapabilityInputSchema = CapabilityInputBaseSchema.omit({
|
|
431
|
+
capabilityType: true
|
|
432
|
+
}).partial().refine(
|
|
433
|
+
(data) => data.name === void 0 || isValidCapabilityName(data.name),
|
|
434
|
+
{ message: KEBAB_CASE_MESSAGE2, path: ["name"] }
|
|
435
|
+
);
|
|
436
|
+
var CreateCapabilityVersionInputSchema = z10.object({
|
|
437
|
+
source: GitHubSourceSchema.optional(),
|
|
438
|
+
version: z10.string().min(1),
|
|
439
|
+
notes: z10.string().optional(),
|
|
440
|
+
origin: CapabilityVersionOriginSchema.optional(),
|
|
441
|
+
content: CapabilityContentSchema.optional()
|
|
442
|
+
});
|
|
443
|
+
|
|
444
|
+
// src/target/capability-converters.ts
|
|
445
|
+
function capabilityToSkill(cap) {
|
|
446
|
+
return {
|
|
447
|
+
id: cap.id,
|
|
448
|
+
projectId: cap.projectId,
|
|
449
|
+
name: cap.name,
|
|
450
|
+
description: cap.description,
|
|
451
|
+
source: cap.source,
|
|
452
|
+
createdAt: cap.createdAt,
|
|
453
|
+
updatedAt: cap.updatedAt,
|
|
454
|
+
deleted: cap.deleted
|
|
455
|
+
};
|
|
456
|
+
}
|
|
457
|
+
function capabilityVersionToSkillVersion(cv) {
|
|
458
|
+
const content = cv.content;
|
|
459
|
+
return {
|
|
460
|
+
id: cv.id,
|
|
461
|
+
projectId: cv.projectId,
|
|
462
|
+
skillId: cv.capabilityId,
|
|
463
|
+
version: cv.version,
|
|
464
|
+
origin: cv.origin,
|
|
465
|
+
source: cv.source,
|
|
466
|
+
files: content?.files,
|
|
467
|
+
notes: cv.notes,
|
|
468
|
+
createdAt: cv.createdAt
|
|
469
|
+
};
|
|
470
|
+
}
|
|
471
|
+
function capabilityToSkillWithLatestVersion(cap) {
|
|
472
|
+
const skill = capabilityToSkill(cap);
|
|
473
|
+
const latestVersion = cap.latestVersion ? capabilityVersionToSkillVersion(cap.latestVersion) : void 0;
|
|
474
|
+
return { ...skill, latestVersion };
|
|
475
|
+
}
|
|
476
|
+
function capabilityToSubAgent(cap) {
|
|
477
|
+
const content = cap.latestVersion?.content;
|
|
478
|
+
return {
|
|
479
|
+
id: cap.id,
|
|
480
|
+
projectId: cap.projectId,
|
|
481
|
+
name: cap.name,
|
|
482
|
+
description: cap.description,
|
|
483
|
+
subAgentMd: content?.subAgentMd ?? "",
|
|
484
|
+
source: cap.source,
|
|
485
|
+
createdAt: cap.createdAt,
|
|
486
|
+
updatedAt: cap.updatedAt,
|
|
487
|
+
deleted: cap.deleted
|
|
488
|
+
};
|
|
489
|
+
}
|
|
490
|
+
function capabilityToRule(cap) {
|
|
491
|
+
const content = cap.latestVersion?.content;
|
|
492
|
+
return {
|
|
493
|
+
id: cap.id,
|
|
494
|
+
projectId: cap.projectId,
|
|
495
|
+
name: cap.name,
|
|
496
|
+
description: cap.description,
|
|
497
|
+
ruleType: content?.ruleType ?? "claude-md",
|
|
498
|
+
content: content?.content ?? "",
|
|
499
|
+
createdAt: cap.createdAt,
|
|
500
|
+
updatedAt: cap.updatedAt,
|
|
501
|
+
deleted: cap.deleted
|
|
502
|
+
};
|
|
503
|
+
}
|
|
504
|
+
function capabilityToMcp(cap) {
|
|
505
|
+
const content = cap.latestVersion?.content;
|
|
506
|
+
return {
|
|
507
|
+
id: cap.id,
|
|
508
|
+
projectId: cap.projectId,
|
|
509
|
+
name: cap.name,
|
|
510
|
+
description: cap.description,
|
|
511
|
+
config: content?.config ?? {},
|
|
512
|
+
createdAt: cap.createdAt,
|
|
513
|
+
updatedAt: cap.updatedAt,
|
|
514
|
+
deleted: cap.deleted
|
|
515
|
+
};
|
|
516
|
+
}
|
|
517
|
+
function groupCapabilitiesByType(capabilities) {
|
|
518
|
+
const skills = [];
|
|
519
|
+
const subAgents = [];
|
|
520
|
+
const rules = [];
|
|
521
|
+
const mcps = [];
|
|
522
|
+
for (const cap of capabilities) {
|
|
523
|
+
switch (cap.capabilityType) {
|
|
524
|
+
case "SKILL":
|
|
525
|
+
skills.push(capabilityToSkillWithLatestVersion(cap));
|
|
526
|
+
break;
|
|
527
|
+
case "SUB_AGENT":
|
|
528
|
+
subAgents.push(capabilityToSubAgent(cap));
|
|
529
|
+
break;
|
|
530
|
+
case "RULE":
|
|
531
|
+
rules.push(capabilityToRule(cap));
|
|
532
|
+
break;
|
|
533
|
+
case "MCP":
|
|
534
|
+
mcps.push(capabilityToMcp(cap));
|
|
535
|
+
break;
|
|
536
|
+
}
|
|
537
|
+
}
|
|
538
|
+
return { skills, subAgents, rules, mcps };
|
|
539
|
+
}
|
|
540
|
+
|
|
369
541
|
// src/test/index.ts
|
|
370
|
-
import { z as
|
|
542
|
+
import { z as z21 } from "zod";
|
|
371
543
|
|
|
372
544
|
// src/test/base.ts
|
|
373
|
-
import { z as
|
|
545
|
+
import { z as z11 } from "zod";
|
|
374
546
|
var TestType = /* @__PURE__ */ ((TestType2) => {
|
|
375
547
|
TestType2["LLM"] = "LLM";
|
|
376
548
|
TestType2["TOOL"] = "TOOL";
|
|
@@ -383,7 +555,7 @@ var TestType = /* @__PURE__ */ ((TestType2) => {
|
|
|
383
555
|
TestType2["PLAYWRIGHT_NL"] = "PLAYWRIGHT_NL";
|
|
384
556
|
return TestType2;
|
|
385
557
|
})(TestType || {});
|
|
386
|
-
var TestTypeSchema =
|
|
558
|
+
var TestTypeSchema = z11.enum(TestType);
|
|
387
559
|
var TestImportance = /* @__PURE__ */ ((TestImportance2) => {
|
|
388
560
|
TestImportance2["LOW"] = "low";
|
|
389
561
|
TestImportance2["MEDIUM"] = "medium";
|
|
@@ -391,153 +563,153 @@ var TestImportance = /* @__PURE__ */ ((TestImportance2) => {
|
|
|
391
563
|
TestImportance2["CRITICAL"] = "critical";
|
|
392
564
|
return TestImportance2;
|
|
393
565
|
})(TestImportance || {});
|
|
394
|
-
var TestImportanceSchema =
|
|
395
|
-
var BaseTestSchema =
|
|
396
|
-
id:
|
|
566
|
+
var TestImportanceSchema = z11.enum(TestImportance);
|
|
567
|
+
var BaseTestSchema = z11.object({
|
|
568
|
+
id: z11.string(),
|
|
397
569
|
type: TestTypeSchema,
|
|
398
|
-
name:
|
|
399
|
-
description:
|
|
570
|
+
name: z11.string().min(3),
|
|
571
|
+
description: z11.string().optional(),
|
|
400
572
|
importance: TestImportanceSchema.optional()
|
|
401
573
|
});
|
|
402
574
|
|
|
403
575
|
// src/test/llm.ts
|
|
404
|
-
import { z as
|
|
576
|
+
import { z as z12 } from "zod";
|
|
405
577
|
var LLMTestSchema = BaseTestSchema.extend({
|
|
406
|
-
type:
|
|
578
|
+
type: z12.literal("LLM" /* LLM */),
|
|
407
579
|
/** Maximum steps for the LLM to take */
|
|
408
|
-
maxSteps:
|
|
580
|
+
maxSteps: z12.number().min(1).max(100),
|
|
409
581
|
/** Prompt to send to the evaluator */
|
|
410
|
-
prompt:
|
|
582
|
+
prompt: z12.string().min(1),
|
|
411
583
|
/** ID of the evaluator agent to use */
|
|
412
|
-
evaluatorId:
|
|
584
|
+
evaluatorId: z12.string()
|
|
413
585
|
});
|
|
414
586
|
|
|
415
587
|
// src/test/tool.ts
|
|
416
|
-
import { z as
|
|
588
|
+
import { z as z13 } from "zod";
|
|
417
589
|
var ToolTestSchema = BaseTestSchema.extend({
|
|
418
|
-
type:
|
|
590
|
+
type: z13.literal("TOOL" /* TOOL */),
|
|
419
591
|
/** Name of the tool that should be called */
|
|
420
|
-
toolName:
|
|
592
|
+
toolName: z13.string().min(3),
|
|
421
593
|
/** Expected arguments for the tool call */
|
|
422
|
-
args:
|
|
594
|
+
args: z13.record(z13.string(), z13.any()),
|
|
423
595
|
/** Expected content in the tool results */
|
|
424
|
-
resultsContent:
|
|
596
|
+
resultsContent: z13.string()
|
|
425
597
|
});
|
|
426
598
|
|
|
427
599
|
// src/test/site-config.ts
|
|
428
|
-
import { z as
|
|
600
|
+
import { z as z14 } from "zod";
|
|
429
601
|
var SiteConfigTestSchema = BaseTestSchema.extend({
|
|
430
|
-
type:
|
|
602
|
+
type: z14.literal("SITE_CONFIG" /* SITE_CONFIG */),
|
|
431
603
|
/** URL to call */
|
|
432
|
-
url:
|
|
604
|
+
url: z14.string().url(),
|
|
433
605
|
/** HTTP method */
|
|
434
|
-
method:
|
|
606
|
+
method: z14.enum(["GET", "POST"]),
|
|
435
607
|
/** Request body (for POST) */
|
|
436
|
-
body:
|
|
608
|
+
body: z14.string().optional(),
|
|
437
609
|
/** Expected HTTP status code */
|
|
438
|
-
expectedStatusCode:
|
|
610
|
+
expectedStatusCode: z14.number().int().min(100).max(599),
|
|
439
611
|
/** Expected response content */
|
|
440
|
-
expectedResponse:
|
|
612
|
+
expectedResponse: z14.string().optional(),
|
|
441
613
|
/** JMESPath expression to extract from response */
|
|
442
|
-
expectedResponseJMESPath:
|
|
614
|
+
expectedResponseJMESPath: z14.string().optional()
|
|
443
615
|
});
|
|
444
616
|
|
|
445
617
|
// src/test/command-execution.ts
|
|
446
|
-
import { z as
|
|
618
|
+
import { z as z15 } from "zod";
|
|
447
619
|
var AllowedCommands = [
|
|
448
620
|
"yarn install --no-immutable && yarn build",
|
|
449
621
|
"npm run build",
|
|
450
622
|
"yarn typecheck"
|
|
451
623
|
];
|
|
452
624
|
var CommandExecutionTestSchema = BaseTestSchema.extend({
|
|
453
|
-
type:
|
|
625
|
+
type: z15.literal("COMMAND_EXECUTION" /* COMMAND_EXECUTION */),
|
|
454
626
|
/** Command to execute (must be in AllowedCommands) */
|
|
455
|
-
command:
|
|
627
|
+
command: z15.string().refine((value) => AllowedCommands.includes(value), {
|
|
456
628
|
message: `Command must be one of: ${AllowedCommands.join(", ")}`
|
|
457
629
|
}),
|
|
458
630
|
/** Expected exit code (default: 0) */
|
|
459
|
-
expectedExitCode:
|
|
631
|
+
expectedExitCode: z15.number().default(0).optional()
|
|
460
632
|
});
|
|
461
633
|
|
|
462
634
|
// src/test/file-presence.ts
|
|
463
|
-
import { z as
|
|
635
|
+
import { z as z16 } from "zod";
|
|
464
636
|
var FilePresenceTestSchema = BaseTestSchema.extend({
|
|
465
|
-
type:
|
|
637
|
+
type: z16.literal("FILE_PRESENCE" /* FILE_PRESENCE */),
|
|
466
638
|
/** Paths to check */
|
|
467
|
-
paths:
|
|
639
|
+
paths: z16.array(z16.string()),
|
|
468
640
|
/** Whether files should exist (true) or not exist (false) */
|
|
469
|
-
shouldExist:
|
|
641
|
+
shouldExist: z16.boolean()
|
|
470
642
|
});
|
|
471
643
|
|
|
472
644
|
// src/test/file-content.ts
|
|
473
|
-
import { z as
|
|
474
|
-
var FileContentCheckSchema =
|
|
645
|
+
import { z as z17 } from "zod";
|
|
646
|
+
var FileContentCheckSchema = z17.object({
|
|
475
647
|
/** Strings that must be present in the file */
|
|
476
|
-
contains:
|
|
648
|
+
contains: z17.array(z17.string()).optional(),
|
|
477
649
|
/** Strings that must NOT be present in the file */
|
|
478
|
-
notContains:
|
|
650
|
+
notContains: z17.array(z17.string()).optional(),
|
|
479
651
|
/** Regex pattern the content must match */
|
|
480
|
-
matches:
|
|
652
|
+
matches: z17.string().optional(),
|
|
481
653
|
/** JSON path checks for structured content */
|
|
482
|
-
jsonPath:
|
|
483
|
-
|
|
484
|
-
path:
|
|
485
|
-
value:
|
|
654
|
+
jsonPath: z17.array(
|
|
655
|
+
z17.object({
|
|
656
|
+
path: z17.string(),
|
|
657
|
+
value: z17.unknown()
|
|
486
658
|
})
|
|
487
659
|
).optional(),
|
|
488
660
|
/** Lines that should be added (for diff checking) */
|
|
489
|
-
added:
|
|
661
|
+
added: z17.array(z17.string()).optional(),
|
|
490
662
|
/** Lines that should be removed (for diff checking) */
|
|
491
|
-
removed:
|
|
663
|
+
removed: z17.array(z17.string()).optional()
|
|
492
664
|
});
|
|
493
665
|
var FileContentTestSchema = BaseTestSchema.extend({
|
|
494
|
-
type:
|
|
666
|
+
type: z17.literal("FILE_CONTENT" /* FILE_CONTENT */),
|
|
495
667
|
/** Path to the file to check */
|
|
496
|
-
path:
|
|
668
|
+
path: z17.string(),
|
|
497
669
|
/** Content checks to perform */
|
|
498
670
|
checks: FileContentCheckSchema
|
|
499
671
|
});
|
|
500
672
|
|
|
501
673
|
// src/test/build-check.ts
|
|
502
|
-
import { z as
|
|
674
|
+
import { z as z18 } from "zod";
|
|
503
675
|
var BuildCheckTestSchema = BaseTestSchema.extend({
|
|
504
|
-
type:
|
|
676
|
+
type: z18.literal("BUILD_CHECK" /* BUILD_CHECK */),
|
|
505
677
|
/** Build command to execute */
|
|
506
|
-
command:
|
|
678
|
+
command: z18.string(),
|
|
507
679
|
/** Whether the build should succeed */
|
|
508
|
-
expectSuccess:
|
|
680
|
+
expectSuccess: z18.boolean(),
|
|
509
681
|
/** Maximum allowed warnings (optional) */
|
|
510
|
-
allowedWarnings:
|
|
682
|
+
allowedWarnings: z18.number().optional(),
|
|
511
683
|
/** Timeout in milliseconds */
|
|
512
|
-
timeout:
|
|
684
|
+
timeout: z18.number().optional()
|
|
513
685
|
});
|
|
514
686
|
|
|
515
687
|
// src/test/vitest.ts
|
|
516
|
-
import { z as
|
|
688
|
+
import { z as z19 } from "zod";
|
|
517
689
|
var VitestTestSchema = BaseTestSchema.extend({
|
|
518
|
-
type:
|
|
690
|
+
type: z19.literal("VITEST" /* VITEST */),
|
|
519
691
|
/** Test file content */
|
|
520
|
-
testFile:
|
|
692
|
+
testFile: z19.string(),
|
|
521
693
|
/** Name of the test file */
|
|
522
|
-
testFileName:
|
|
694
|
+
testFileName: z19.string(),
|
|
523
695
|
/** Minimum pass rate required (0-100) */
|
|
524
|
-
minPassRate:
|
|
696
|
+
minPassRate: z19.number().min(0).max(100)
|
|
525
697
|
});
|
|
526
698
|
|
|
527
699
|
// src/test/playwright-nl.ts
|
|
528
|
-
import { z as
|
|
700
|
+
import { z as z20 } from "zod";
|
|
529
701
|
var PlaywrightNLTestSchema = BaseTestSchema.extend({
|
|
530
|
-
type:
|
|
702
|
+
type: z20.literal("PLAYWRIGHT_NL" /* PLAYWRIGHT_NL */),
|
|
531
703
|
/** Natural language steps to execute */
|
|
532
|
-
steps:
|
|
704
|
+
steps: z20.array(z20.string()),
|
|
533
705
|
/** Expected outcome description */
|
|
534
|
-
expectedOutcome:
|
|
706
|
+
expectedOutcome: z20.string(),
|
|
535
707
|
/** Timeout in milliseconds */
|
|
536
|
-
timeout:
|
|
708
|
+
timeout: z20.number().optional()
|
|
537
709
|
});
|
|
538
710
|
|
|
539
711
|
// src/test/index.ts
|
|
540
|
-
var TestSchema =
|
|
712
|
+
var TestSchema = z21.discriminatedUnion("type", [
|
|
541
713
|
LLMTestSchema,
|
|
542
714
|
ToolTestSchema,
|
|
543
715
|
SiteConfigTestSchema,
|
|
@@ -550,33 +722,33 @@ var TestSchema = z20.discriminatedUnion("type", [
|
|
|
550
722
|
]);
|
|
551
723
|
|
|
552
724
|
// src/scenario/environment.ts
|
|
553
|
-
import { z as
|
|
554
|
-
var LocalProjectConfigSchema =
|
|
725
|
+
import { z as z22 } from "zod";
|
|
726
|
+
var LocalProjectConfigSchema = z22.object({
|
|
555
727
|
/** Template ID to use for the local project */
|
|
556
|
-
templateId:
|
|
728
|
+
templateId: z22.string().optional(),
|
|
557
729
|
/** Files to create in the project */
|
|
558
|
-
files:
|
|
559
|
-
|
|
560
|
-
path:
|
|
561
|
-
content:
|
|
730
|
+
files: z22.array(
|
|
731
|
+
z22.object({
|
|
732
|
+
path: z22.string().min(1),
|
|
733
|
+
content: z22.string().min(1)
|
|
562
734
|
})
|
|
563
735
|
).optional()
|
|
564
736
|
});
|
|
565
|
-
var MetaSiteConfigSchema =
|
|
566
|
-
configurations:
|
|
567
|
-
|
|
568
|
-
name:
|
|
569
|
-
apiCalls:
|
|
570
|
-
|
|
571
|
-
url:
|
|
572
|
-
method:
|
|
573
|
-
body:
|
|
737
|
+
var MetaSiteConfigSchema = z22.object({
|
|
738
|
+
configurations: z22.array(
|
|
739
|
+
z22.object({
|
|
740
|
+
name: z22.string().min(1),
|
|
741
|
+
apiCalls: z22.array(
|
|
742
|
+
z22.object({
|
|
743
|
+
url: z22.string().url(),
|
|
744
|
+
method: z22.enum(["POST", "PUT"]),
|
|
745
|
+
body: z22.string()
|
|
574
746
|
})
|
|
575
747
|
)
|
|
576
748
|
})
|
|
577
749
|
).optional()
|
|
578
750
|
});
|
|
579
|
-
var EnvironmentSchema =
|
|
751
|
+
var EnvironmentSchema = z22.object({
|
|
580
752
|
/** Local project configuration */
|
|
581
753
|
localProject: LocalProjectConfigSchema.optional(),
|
|
582
754
|
/** Meta site configuration */
|
|
@@ -584,13 +756,13 @@ var EnvironmentSchema = z21.object({
|
|
|
584
756
|
});
|
|
585
757
|
|
|
586
758
|
// src/scenario/test-scenario.ts
|
|
587
|
-
import { z as
|
|
759
|
+
import { z as z25 } from "zod";
|
|
588
760
|
|
|
589
761
|
// src/assertion/assertion.ts
|
|
590
|
-
import { z as
|
|
762
|
+
import { z as z24 } from "zod";
|
|
591
763
|
|
|
592
764
|
// src/assertion/build-passed-command.ts
|
|
593
|
-
import { z as
|
|
765
|
+
import { z as z23 } from "zod";
|
|
594
766
|
var ALLOWED_BUILD_COMMANDS = [
|
|
595
767
|
"yarn build",
|
|
596
768
|
"npm run build",
|
|
@@ -616,10 +788,10 @@ function parseBuildCommandToArgv(command) {
|
|
|
616
788
|
return BUILD_COMMAND_ARGV[trimmed];
|
|
617
789
|
}
|
|
618
790
|
var enumTuple = ALLOWED_BUILD_COMMANDS;
|
|
619
|
-
var BuildPassedCommandStringSchema =
|
|
791
|
+
var BuildPassedCommandStringSchema = z23.enum(enumTuple);
|
|
620
792
|
|
|
621
793
|
// src/assertion/assertion.ts
|
|
622
|
-
var AssertionTypeSchema =
|
|
794
|
+
var AssertionTypeSchema = z24.enum([
|
|
623
795
|
"skill_was_called",
|
|
624
796
|
"tool_called_with_param",
|
|
625
797
|
"build_passed",
|
|
@@ -628,61 +800,61 @@ var AssertionTypeSchema = z23.enum([
|
|
|
628
800
|
"llm_judge",
|
|
629
801
|
"api_call"
|
|
630
802
|
]);
|
|
631
|
-
var AssertionParameterTypeSchema =
|
|
803
|
+
var AssertionParameterTypeSchema = z24.enum([
|
|
632
804
|
"string",
|
|
633
805
|
"number",
|
|
634
806
|
"boolean"
|
|
635
807
|
]);
|
|
636
|
-
var AssertionParameterSchema =
|
|
808
|
+
var AssertionParameterSchema = z24.object({
|
|
637
809
|
/** Parameter name (used as key in params object) */
|
|
638
|
-
name:
|
|
810
|
+
name: z24.string().min(1),
|
|
639
811
|
/** Display label for the parameter */
|
|
640
|
-
label:
|
|
812
|
+
label: z24.string().min(1),
|
|
641
813
|
/** Parameter type */
|
|
642
814
|
type: AssertionParameterTypeSchema,
|
|
643
815
|
/** Whether this parameter is required */
|
|
644
|
-
required:
|
|
816
|
+
required: z24.boolean(),
|
|
645
817
|
/** Default value (optional, used when not provided) */
|
|
646
|
-
defaultValue:
|
|
818
|
+
defaultValue: z24.union([z24.string(), z24.number(), z24.boolean()]).optional(),
|
|
647
819
|
/** If true, parameter is hidden by default behind "Show advanced options" */
|
|
648
|
-
advanced:
|
|
820
|
+
advanced: z24.boolean().optional()
|
|
649
821
|
});
|
|
650
|
-
var ScenarioAssertionLinkSchema =
|
|
822
|
+
var ScenarioAssertionLinkSchema = z24.object({
|
|
651
823
|
/** ID of the system assertion (e.g., 'system:skill_was_called') */
|
|
652
|
-
assertionId:
|
|
824
|
+
assertionId: z24.string(),
|
|
653
825
|
/** Parameter values for this assertion in this scenario */
|
|
654
|
-
params:
|
|
655
|
-
|
|
656
|
-
|
|
826
|
+
params: z24.record(
|
|
827
|
+
z24.string(),
|
|
828
|
+
z24.union([z24.string(), z24.number(), z24.boolean(), z24.null()])
|
|
657
829
|
).optional()
|
|
658
830
|
});
|
|
659
|
-
var SkillWasCalledConfigSchema =
|
|
831
|
+
var SkillWasCalledConfigSchema = z24.object({
|
|
660
832
|
/** Names of the skills that must have been called */
|
|
661
|
-
skillNames:
|
|
833
|
+
skillNames: z24.array(z24.string().min(1)).min(1)
|
|
662
834
|
});
|
|
663
|
-
var CostConfigSchema =
|
|
835
|
+
var CostConfigSchema = z24.strictObject({
|
|
664
836
|
/** Maximum allowed cost in USD */
|
|
665
|
-
maxCostUsd:
|
|
837
|
+
maxCostUsd: z24.number().positive()
|
|
666
838
|
});
|
|
667
|
-
var ToolCalledWithParamConfigSchema =
|
|
839
|
+
var ToolCalledWithParamConfigSchema = z24.strictObject({
|
|
668
840
|
/** Name of the tool that must have been called */
|
|
669
|
-
toolName:
|
|
841
|
+
toolName: z24.string().min(1),
|
|
670
842
|
/** JSON string of key-value pairs for expected parameters (substring match). Optional — when omitted, only checks tool presence. */
|
|
671
|
-
expectedParams:
|
|
843
|
+
expectedParams: z24.string().min(1).optional(),
|
|
672
844
|
/** If true, the matching tool call must also have succeeded (step.success === true) */
|
|
673
|
-
requireSuccess:
|
|
845
|
+
requireSuccess: z24.boolean().optional()
|
|
674
846
|
});
|
|
675
|
-
var BuildPassedConfigSchema =
|
|
847
|
+
var BuildPassedConfigSchema = z24.strictObject({
|
|
676
848
|
/** Allowlisted command only (default at runtime: "yarn build") */
|
|
677
849
|
command: BuildPassedCommandStringSchema.optional(),
|
|
678
850
|
/** Expected exit code (default: 0) */
|
|
679
|
-
expectedExitCode:
|
|
851
|
+
expectedExitCode: z24.number().int().optional()
|
|
680
852
|
});
|
|
681
|
-
var TimeConfigSchema =
|
|
853
|
+
var TimeConfigSchema = z24.strictObject({
|
|
682
854
|
/** Maximum allowed duration in milliseconds */
|
|
683
|
-
maxDurationMs:
|
|
855
|
+
maxDurationMs: z24.number().int().positive()
|
|
684
856
|
});
|
|
685
|
-
var LlmJudgeConfigSchema =
|
|
857
|
+
var LlmJudgeConfigSchema = z24.object({
|
|
686
858
|
/**
|
|
687
859
|
* Prompt template with placeholders:
|
|
688
860
|
* - {{output}}: agent's final output
|
|
@@ -693,65 +865,65 @@ var LlmJudgeConfigSchema = z23.object({
|
|
|
693
865
|
* - {{trace}}: step-by-step trace of tool calls
|
|
694
866
|
* - Custom parameters defined in the parameters array
|
|
695
867
|
*/
|
|
696
|
-
prompt:
|
|
868
|
+
prompt: z24.string().min(1),
|
|
697
869
|
/** Minimum score to pass (0-10, default 7) */
|
|
698
|
-
minScore:
|
|
870
|
+
minScore: z24.number().int().min(0).max(10).optional(),
|
|
699
871
|
/** Model for the judge (e.g. claude-3-5-haiku-20241022) */
|
|
700
|
-
model:
|
|
872
|
+
model: z24.string().optional(),
|
|
701
873
|
/** Max output tokens */
|
|
702
|
-
maxTokens:
|
|
874
|
+
maxTokens: z24.number().int().optional(),
|
|
703
875
|
/** Temperature (0-1) */
|
|
704
|
-
temperature:
|
|
876
|
+
temperature: z24.number().min(0).max(1).optional(),
|
|
705
877
|
/** User-defined parameters for this assertion */
|
|
706
|
-
parameters:
|
|
878
|
+
parameters: z24.array(AssertionParameterSchema).optional()
|
|
707
879
|
});
|
|
708
|
-
var ApiCallConfigSchema =
|
|
880
|
+
var ApiCallConfigSchema = z24.strictObject({
|
|
709
881
|
/** URL to call */
|
|
710
|
-
url:
|
|
882
|
+
url: z24.string().min(1),
|
|
711
883
|
/** HTTP method (default GET) */
|
|
712
|
-
method:
|
|
884
|
+
method: z24.enum(["GET", "POST"]).optional(),
|
|
713
885
|
/** Request body (JSON string, for POST requests) */
|
|
714
|
-
requestBody:
|
|
886
|
+
requestBody: z24.string().optional(),
|
|
715
887
|
/** Expected JSON response to validate against (subset match — extra fields in actual are OK) */
|
|
716
|
-
expectedResponse:
|
|
888
|
+
expectedResponse: z24.string().min(1),
|
|
717
889
|
/** Request headers as JSON string of key-value pairs */
|
|
718
|
-
requestHeaders:
|
|
890
|
+
requestHeaders: z24.string().optional(),
|
|
719
891
|
/** Request timeout in milliseconds (default 30000) */
|
|
720
|
-
timeoutMs:
|
|
892
|
+
timeoutMs: z24.number().int().positive().optional()
|
|
721
893
|
});
|
|
722
894
|
var AssertionBaseFields = {
|
|
723
895
|
/** When true, the assertion's pass/fail logic is inverted (NOT operator). */
|
|
724
|
-
negate:
|
|
896
|
+
negate: z24.boolean().optional()
|
|
725
897
|
};
|
|
726
898
|
var SkillWasCalledAssertionSchema = SkillWasCalledConfigSchema.extend({
|
|
727
|
-
type:
|
|
899
|
+
type: z24.literal("skill_was_called"),
|
|
728
900
|
...AssertionBaseFields
|
|
729
901
|
});
|
|
730
902
|
var ToolCalledWithParamAssertionSchema = ToolCalledWithParamConfigSchema.extend({
|
|
731
|
-
type:
|
|
903
|
+
type: z24.literal("tool_called_with_param"),
|
|
732
904
|
...AssertionBaseFields
|
|
733
905
|
});
|
|
734
906
|
var BuildPassedAssertionSchema = BuildPassedConfigSchema.extend({
|
|
735
|
-
type:
|
|
907
|
+
type: z24.literal("build_passed"),
|
|
736
908
|
...AssertionBaseFields
|
|
737
909
|
});
|
|
738
910
|
var CostAssertionSchema = CostConfigSchema.extend({
|
|
739
|
-
type:
|
|
911
|
+
type: z24.literal("cost"),
|
|
740
912
|
...AssertionBaseFields
|
|
741
913
|
});
|
|
742
914
|
var LlmJudgeAssertionSchema = LlmJudgeConfigSchema.extend({
|
|
743
|
-
type:
|
|
915
|
+
type: z24.literal("llm_judge"),
|
|
744
916
|
...AssertionBaseFields
|
|
745
917
|
});
|
|
746
918
|
var ApiCallAssertionSchema = ApiCallConfigSchema.extend({
|
|
747
|
-
type:
|
|
919
|
+
type: z24.literal("api_call"),
|
|
748
920
|
...AssertionBaseFields
|
|
749
921
|
});
|
|
750
922
|
var TimeAssertionSchema = TimeConfigSchema.extend({
|
|
751
|
-
type:
|
|
923
|
+
type: z24.literal("time_limit"),
|
|
752
924
|
...AssertionBaseFields
|
|
753
925
|
});
|
|
754
|
-
var AssertionSchema =
|
|
926
|
+
var AssertionSchema = z24.union([
|
|
755
927
|
SkillWasCalledAssertionSchema,
|
|
756
928
|
ToolCalledWithParamAssertionSchema,
|
|
757
929
|
BuildPassedAssertionSchema,
|
|
@@ -760,7 +932,7 @@ var AssertionSchema = z23.union([
|
|
|
760
932
|
LlmJudgeAssertionSchema,
|
|
761
933
|
ApiCallAssertionSchema
|
|
762
934
|
]);
|
|
763
|
-
var AssertionConfigSchema =
|
|
935
|
+
var AssertionConfigSchema = z24.union([
|
|
764
936
|
LlmJudgeConfigSchema,
|
|
765
937
|
// requires prompt - check first
|
|
766
938
|
SkillWasCalledConfigSchema,
|
|
@@ -775,7 +947,7 @@ var AssertionConfigSchema = z23.union([
|
|
|
775
947
|
// requires maxCostUsd, uses strictObject
|
|
776
948
|
BuildPassedConfigSchema,
|
|
777
949
|
// all optional, uses strictObject to reject unknown keys
|
|
778
|
-
|
|
950
|
+
z24.object({})
|
|
779
951
|
// fallback empty config
|
|
780
952
|
]);
|
|
781
953
|
function validateAssertionConfig(type, config) {
|
|
@@ -1021,35 +1193,35 @@ function getSystemAssertion(id) {
|
|
|
1021
1193
|
|
|
1022
1194
|
// src/scenario/test-scenario.ts
|
|
1023
1195
|
var MAX_IMAGE_BASE64_LENGTH = 4 * Math.ceil(2 * 1024 * 1024 / 3);
|
|
1024
|
-
var TriggerPromptImageSchema =
|
|
1196
|
+
var TriggerPromptImageSchema = z25.object({
|
|
1025
1197
|
/** Base64-encoded image data (no data URL prefix) */
|
|
1026
|
-
base64:
|
|
1198
|
+
base64: z25.string().max(MAX_IMAGE_BASE64_LENGTH, "Image exceeds 2 MB size limit"),
|
|
1027
1199
|
/** MIME type of the image */
|
|
1028
|
-
mediaType:
|
|
1200
|
+
mediaType: z25.enum(["image/jpeg", "image/png", "image/gif", "image/webp"]),
|
|
1029
1201
|
/** Original filename of the image */
|
|
1030
|
-
name:
|
|
1202
|
+
name: z25.string()
|
|
1031
1203
|
});
|
|
1032
|
-
var ExpectedFileSchema =
|
|
1204
|
+
var ExpectedFileSchema = z25.object({
|
|
1033
1205
|
/** Relative path where the file should be created */
|
|
1034
|
-
path:
|
|
1206
|
+
path: z25.string(),
|
|
1035
1207
|
/** Optional expected content */
|
|
1036
|
-
content:
|
|
1208
|
+
content: z25.string().optional()
|
|
1037
1209
|
});
|
|
1038
1210
|
var TestScenarioSchema = TenantEntitySchema.extend({
|
|
1039
1211
|
/** The prompt sent to the agent to trigger the task */
|
|
1040
|
-
triggerPrompt:
|
|
1212
|
+
triggerPrompt: z25.string().min(10),
|
|
1041
1213
|
/** ID of the template to use for this scenario (null = no template) */
|
|
1042
|
-
templateId:
|
|
1214
|
+
templateId: z25.string().nullish(),
|
|
1043
1215
|
/** Inline assertions to evaluate for this scenario (legacy) */
|
|
1044
|
-
assertions:
|
|
1216
|
+
assertions: z25.array(AssertionSchema).optional(),
|
|
1045
1217
|
/** IDs of saved assertions to evaluate (from assertions table) - legacy, use assertionLinks */
|
|
1046
|
-
assertionIds:
|
|
1218
|
+
assertionIds: z25.array(z25.string()).optional(),
|
|
1047
1219
|
/** Linked assertions with per-scenario parameter values */
|
|
1048
|
-
assertionLinks:
|
|
1220
|
+
assertionLinks: z25.array(ScenarioAssertionLinkSchema).optional(),
|
|
1049
1221
|
/** Tags for categorisation and filtering */
|
|
1050
|
-
tags:
|
|
1222
|
+
tags: z25.array(z25.string()).optional(),
|
|
1051
1223
|
/** Base64-encoded images attached to the trigger prompt (max 3) */
|
|
1052
|
-
triggerPromptImages:
|
|
1224
|
+
triggerPromptImages: z25.array(TriggerPromptImageSchema).max(3).optional()
|
|
1053
1225
|
});
|
|
1054
1226
|
function validateBuildPassedParamsInAssertionLinks(links, ctx) {
|
|
1055
1227
|
if (!links) return;
|
|
@@ -1060,7 +1232,7 @@ function validateBuildPassedParamsInAssertionLinks(links, ctx) {
|
|
|
1060
1232
|
if (cmd === void 0 || cmd === null) continue;
|
|
1061
1233
|
if (typeof cmd !== "string") {
|
|
1062
1234
|
ctx.addIssue({
|
|
1063
|
-
code:
|
|
1235
|
+
code: z25.ZodIssueCode.custom,
|
|
1064
1236
|
message: "build_passed command must be a string",
|
|
1065
1237
|
path: ["assertionLinks", i, "params", "command"]
|
|
1066
1238
|
});
|
|
@@ -1068,7 +1240,7 @@ function validateBuildPassedParamsInAssertionLinks(links, ctx) {
|
|
|
1068
1240
|
}
|
|
1069
1241
|
if (!isAllowedBuildCommandString(cmd)) {
|
|
1070
1242
|
ctx.addIssue({
|
|
1071
|
-
code:
|
|
1243
|
+
code: z25.ZodIssueCode.custom,
|
|
1072
1244
|
message: "Invalid build_passed command. Allowed: yarn build, npm run build, pnpm run build, pnpm build",
|
|
1073
1245
|
path: ["assertionLinks", i, "params", "command"]
|
|
1074
1246
|
});
|
|
@@ -1091,19 +1263,19 @@ var UpdateTestScenarioInputSchema = TestScenarioCreateBaseSchema.partial().super
|
|
|
1091
1263
|
});
|
|
1092
1264
|
|
|
1093
1265
|
// src/scenario/batch-import.ts
|
|
1094
|
-
import { z as
|
|
1266
|
+
import { z as z26 } from "zod";
|
|
1095
1267
|
var UUID_REGEX = /^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$/i;
|
|
1096
|
-
var BatchAssertionLinkSchema =
|
|
1097
|
-
|
|
1268
|
+
var BatchAssertionLinkSchema = z26.union([
|
|
1269
|
+
z26.string().min(1),
|
|
1098
1270
|
ScenarioAssertionLinkSchema
|
|
1099
1271
|
]);
|
|
1100
|
-
var BatchScenarioEntrySchema =
|
|
1101
|
-
name:
|
|
1102
|
-
description:
|
|
1103
|
-
triggerPrompt:
|
|
1104
|
-
templateId:
|
|
1105
|
-
tags:
|
|
1106
|
-
assertionLinks:
|
|
1272
|
+
var BatchScenarioEntrySchema = z26.object({
|
|
1273
|
+
name: z26.string().min(1, "name: Required"),
|
|
1274
|
+
description: z26.string().optional().default(""),
|
|
1275
|
+
triggerPrompt: z26.string().min(10, "triggerPrompt: Must be at least 10 characters"),
|
|
1276
|
+
templateId: z26.string().nullish(),
|
|
1277
|
+
tags: z26.array(z26.string()).optional(),
|
|
1278
|
+
assertionLinks: z26.array(BatchAssertionLinkSchema).optional()
|
|
1107
1279
|
}).superRefine((data, ctx) => {
|
|
1108
1280
|
if (!data.assertionLinks) return;
|
|
1109
1281
|
const objectLinks = data.assertionLinks.filter(
|
|
@@ -1113,8 +1285,8 @@ var BatchScenarioEntrySchema = z25.object({
|
|
|
1113
1285
|
validateBuildPassedParamsInAssertionLinks(objectLinks, ctx);
|
|
1114
1286
|
}
|
|
1115
1287
|
});
|
|
1116
|
-
var BatchImportPayloadSchema =
|
|
1117
|
-
scenarios:
|
|
1288
|
+
var BatchImportPayloadSchema = z26.object({
|
|
1289
|
+
scenarios: z26.array(BatchScenarioEntrySchema).min(1, "scenarios array must contain at least one entry").max(100, "Maximum 100 scenarios per upload")
|
|
1118
1290
|
});
|
|
1119
1291
|
var BATCH_IMPORT_LIMITS = {
|
|
1120
1292
|
MAX_SCENARIOS: 100,
|
|
@@ -1136,29 +1308,29 @@ function normalizeBatchAssertionLink(link) {
|
|
|
1136
1308
|
}
|
|
1137
1309
|
return link;
|
|
1138
1310
|
}
|
|
1139
|
-
var BatchResultItemSchema =
|
|
1140
|
-
index:
|
|
1141
|
-
name:
|
|
1142
|
-
status:
|
|
1143
|
-
id:
|
|
1144
|
-
errors:
|
|
1145
|
-
});
|
|
1146
|
-
var BatchSummarySchema =
|
|
1147
|
-
total:
|
|
1148
|
-
valid:
|
|
1149
|
-
invalid:
|
|
1150
|
-
created:
|
|
1151
|
-
});
|
|
1152
|
-
var BatchImportResponseSchema =
|
|
1311
|
+
var BatchResultItemSchema = z26.object({
|
|
1312
|
+
index: z26.number(),
|
|
1313
|
+
name: z26.string(),
|
|
1314
|
+
status: z26.enum(["valid", "invalid"]),
|
|
1315
|
+
id: z26.string().nullable().optional(),
|
|
1316
|
+
errors: z26.array(z26.string()).optional()
|
|
1317
|
+
});
|
|
1318
|
+
var BatchSummarySchema = z26.object({
|
|
1319
|
+
total: z26.number(),
|
|
1320
|
+
valid: z26.number(),
|
|
1321
|
+
invalid: z26.number(),
|
|
1322
|
+
created: z26.number()
|
|
1323
|
+
});
|
|
1324
|
+
var BatchImportResponseSchema = z26.object({
|
|
1153
1325
|
summary: BatchSummarySchema,
|
|
1154
|
-
results:
|
|
1326
|
+
results: z26.array(BatchResultItemSchema)
|
|
1155
1327
|
});
|
|
1156
1328
|
|
|
1157
1329
|
// src/suite/test-suite.ts
|
|
1158
|
-
import { z as
|
|
1330
|
+
import { z as z27 } from "zod";
|
|
1159
1331
|
var TestSuiteSchema = TenantEntitySchema.extend({
|
|
1160
1332
|
/** IDs of test scenarios in this suite */
|
|
1161
|
-
scenarioIds:
|
|
1333
|
+
scenarioIds: z27.array(z27.string())
|
|
1162
1334
|
});
|
|
1163
1335
|
var CreateTestSuiteInputSchema = TestSuiteSchema.omit({
|
|
1164
1336
|
id: true,
|
|
@@ -1169,21 +1341,21 @@ var CreateTestSuiteInputSchema = TestSuiteSchema.omit({
|
|
|
1169
1341
|
var UpdateTestSuiteInputSchema = CreateTestSuiteInputSchema.partial();
|
|
1170
1342
|
|
|
1171
1343
|
// src/evaluation/metrics.ts
|
|
1172
|
-
import { z as
|
|
1173
|
-
var TokenUsageSchema =
|
|
1174
|
-
prompt:
|
|
1175
|
-
completion:
|
|
1176
|
-
total:
|
|
1177
|
-
});
|
|
1178
|
-
var EvalMetricsSchema =
|
|
1179
|
-
totalAssertions:
|
|
1180
|
-
passed:
|
|
1181
|
-
failed:
|
|
1182
|
-
skipped:
|
|
1183
|
-
errors:
|
|
1184
|
-
passRate:
|
|
1185
|
-
avgDuration:
|
|
1186
|
-
totalDuration:
|
|
1344
|
+
import { z as z28 } from "zod";
|
|
1345
|
+
var TokenUsageSchema = z28.object({
|
|
1346
|
+
prompt: z28.number(),
|
|
1347
|
+
completion: z28.number(),
|
|
1348
|
+
total: z28.number()
|
|
1349
|
+
});
|
|
1350
|
+
var EvalMetricsSchema = z28.object({
|
|
1351
|
+
totalAssertions: z28.number(),
|
|
1352
|
+
passed: z28.number(),
|
|
1353
|
+
failed: z28.number(),
|
|
1354
|
+
skipped: z28.number(),
|
|
1355
|
+
errors: z28.number(),
|
|
1356
|
+
passRate: z28.number(),
|
|
1357
|
+
avgDuration: z28.number(),
|
|
1358
|
+
totalDuration: z28.number()
|
|
1187
1359
|
});
|
|
1188
1360
|
var EvalStatus = /* @__PURE__ */ ((EvalStatus2) => {
|
|
1189
1361
|
EvalStatus2["PENDING"] = "pending";
|
|
@@ -1193,7 +1365,7 @@ var EvalStatus = /* @__PURE__ */ ((EvalStatus2) => {
|
|
|
1193
1365
|
EvalStatus2["CANCELLED"] = "cancelled";
|
|
1194
1366
|
return EvalStatus2;
|
|
1195
1367
|
})(EvalStatus || {});
|
|
1196
|
-
var EvalStatusSchema =
|
|
1368
|
+
var EvalStatusSchema = z28.enum(EvalStatus);
|
|
1197
1369
|
var LLMStepType = /* @__PURE__ */ ((LLMStepType2) => {
|
|
1198
1370
|
LLMStepType2["COMPLETION"] = "completion";
|
|
1199
1371
|
LLMStepType2["TOOL_USE"] = "tool_use";
|
|
@@ -1201,54 +1373,54 @@ var LLMStepType = /* @__PURE__ */ ((LLMStepType2) => {
|
|
|
1201
1373
|
LLMStepType2["THINKING"] = "thinking";
|
|
1202
1374
|
return LLMStepType2;
|
|
1203
1375
|
})(LLMStepType || {});
|
|
1204
|
-
var LLMTraceStepSchema =
|
|
1205
|
-
id:
|
|
1206
|
-
stepNumber:
|
|
1207
|
-
type:
|
|
1208
|
-
model:
|
|
1209
|
-
provider:
|
|
1210
|
-
startedAt:
|
|
1211
|
-
durationMs:
|
|
1376
|
+
var LLMTraceStepSchema = z28.object({
|
|
1377
|
+
id: z28.string(),
|
|
1378
|
+
stepNumber: z28.number(),
|
|
1379
|
+
type: z28.enum(LLMStepType),
|
|
1380
|
+
model: z28.string(),
|
|
1381
|
+
provider: z28.string(),
|
|
1382
|
+
startedAt: z28.string(),
|
|
1383
|
+
durationMs: z28.number(),
|
|
1212
1384
|
tokenUsage: TokenUsageSchema,
|
|
1213
|
-
costUsd:
|
|
1214
|
-
toolName:
|
|
1215
|
-
toolArguments:
|
|
1216
|
-
inputPreview:
|
|
1217
|
-
outputPreview:
|
|
1218
|
-
success:
|
|
1219
|
-
error:
|
|
1220
|
-
turnIndex:
|
|
1221
|
-
});
|
|
1222
|
-
var LLMBreakdownStatsSchema =
|
|
1223
|
-
count:
|
|
1224
|
-
durationMs:
|
|
1225
|
-
tokens:
|
|
1226
|
-
costUsd:
|
|
1227
|
-
});
|
|
1228
|
-
var LLMTraceSummarySchema =
|
|
1229
|
-
totalSteps:
|
|
1230
|
-
totalTurns:
|
|
1231
|
-
totalDurationMs:
|
|
1385
|
+
costUsd: z28.number(),
|
|
1386
|
+
toolName: z28.string().optional(),
|
|
1387
|
+
toolArguments: z28.string().optional(),
|
|
1388
|
+
inputPreview: z28.string().optional(),
|
|
1389
|
+
outputPreview: z28.string().optional(),
|
|
1390
|
+
success: z28.boolean(),
|
|
1391
|
+
error: z28.string().optional(),
|
|
1392
|
+
turnIndex: z28.number().optional()
|
|
1393
|
+
});
|
|
1394
|
+
var LLMBreakdownStatsSchema = z28.object({
|
|
1395
|
+
count: z28.number(),
|
|
1396
|
+
durationMs: z28.number(),
|
|
1397
|
+
tokens: z28.number(),
|
|
1398
|
+
costUsd: z28.number()
|
|
1399
|
+
});
|
|
1400
|
+
var LLMTraceSummarySchema = z28.object({
|
|
1401
|
+
totalSteps: z28.number(),
|
|
1402
|
+
totalTurns: z28.number().optional(),
|
|
1403
|
+
totalDurationMs: z28.number(),
|
|
1232
1404
|
totalTokens: TokenUsageSchema,
|
|
1233
|
-
totalCostUsd:
|
|
1234
|
-
stepTypeBreakdown:
|
|
1235
|
-
modelBreakdown:
|
|
1236
|
-
modelsUsed:
|
|
1237
|
-
});
|
|
1238
|
-
var LLMTraceSchema =
|
|
1239
|
-
id:
|
|
1240
|
-
steps:
|
|
1405
|
+
totalCostUsd: z28.number(),
|
|
1406
|
+
stepTypeBreakdown: z28.record(z28.string(), LLMBreakdownStatsSchema).optional(),
|
|
1407
|
+
modelBreakdown: z28.record(z28.string(), LLMBreakdownStatsSchema),
|
|
1408
|
+
modelsUsed: z28.array(z28.string())
|
|
1409
|
+
});
|
|
1410
|
+
var LLMTraceSchema = z28.object({
|
|
1411
|
+
id: z28.string(),
|
|
1412
|
+
steps: z28.array(LLMTraceStepSchema),
|
|
1241
1413
|
summary: LLMTraceSummarySchema
|
|
1242
1414
|
});
|
|
1243
1415
|
|
|
1244
1416
|
// src/evaluation/eval-result.ts
|
|
1245
|
-
import { z as
|
|
1417
|
+
import { z as z32 } from "zod";
|
|
1246
1418
|
|
|
1247
1419
|
// src/evaluation/eval-run.ts
|
|
1248
|
-
import { z as
|
|
1420
|
+
import { z as z30 } from "zod";
|
|
1249
1421
|
|
|
1250
1422
|
// src/evaluation/live-trace.ts
|
|
1251
|
-
import { z as
|
|
1423
|
+
import { z as z29 } from "zod";
|
|
1252
1424
|
var LiveTraceEventType = /* @__PURE__ */ ((LiveTraceEventType2) => {
|
|
1253
1425
|
LiveTraceEventType2["THINKING"] = "thinking";
|
|
1254
1426
|
LiveTraceEventType2["TOOL_USE"] = "tool_use";
|
|
@@ -1262,37 +1434,37 @@ var LiveTraceEventType = /* @__PURE__ */ ((LiveTraceEventType2) => {
|
|
|
1262
1434
|
LiveTraceEventType2["USER"] = "user";
|
|
1263
1435
|
return LiveTraceEventType2;
|
|
1264
1436
|
})(LiveTraceEventType || {});
|
|
1265
|
-
var LiveTraceEventSchema =
|
|
1437
|
+
var LiveTraceEventSchema = z29.object({
|
|
1266
1438
|
/** The evaluation run ID */
|
|
1267
|
-
evalRunId:
|
|
1439
|
+
evalRunId: z29.string(),
|
|
1268
1440
|
/** The scenario ID being executed */
|
|
1269
|
-
scenarioId:
|
|
1441
|
+
scenarioId: z29.string(),
|
|
1270
1442
|
/** The scenario name for display */
|
|
1271
|
-
scenarioName:
|
|
1443
|
+
scenarioName: z29.string(),
|
|
1272
1444
|
/** The target ID (skill, agent, etc.) */
|
|
1273
|
-
targetId:
|
|
1445
|
+
targetId: z29.string(),
|
|
1274
1446
|
/** The target name for display */
|
|
1275
|
-
targetName:
|
|
1447
|
+
targetName: z29.string(),
|
|
1276
1448
|
/** Step number in the current scenario execution */
|
|
1277
|
-
stepNumber:
|
|
1449
|
+
stepNumber: z29.number(),
|
|
1278
1450
|
/** Type of trace event */
|
|
1279
|
-
type:
|
|
1451
|
+
type: z29.enum(LiveTraceEventType),
|
|
1280
1452
|
/** Tool name if this is a tool_use event */
|
|
1281
|
-
toolName:
|
|
1453
|
+
toolName: z29.string().optional(),
|
|
1282
1454
|
/** Tool arguments preview (truncated JSON) */
|
|
1283
|
-
toolArgs:
|
|
1455
|
+
toolArgs: z29.string().optional(),
|
|
1284
1456
|
/** Output preview (truncated text) */
|
|
1285
|
-
outputPreview:
|
|
1457
|
+
outputPreview: z29.string().optional(),
|
|
1286
1458
|
/** File path for file operations */
|
|
1287
|
-
filePath:
|
|
1459
|
+
filePath: z29.string().optional(),
|
|
1288
1460
|
/** Elapsed time in milliseconds for progress events */
|
|
1289
|
-
elapsedMs:
|
|
1461
|
+
elapsedMs: z29.number().optional(),
|
|
1290
1462
|
/** Thinking/reasoning text from Claude */
|
|
1291
|
-
thinking:
|
|
1463
|
+
thinking: z29.string().optional(),
|
|
1292
1464
|
/** Timestamp when this event occurred */
|
|
1293
|
-
timestamp:
|
|
1465
|
+
timestamp: z29.string(),
|
|
1294
1466
|
/** Whether this is the final event for this scenario */
|
|
1295
|
-
isComplete:
|
|
1467
|
+
isComplete: z29.boolean()
|
|
1296
1468
|
});
|
|
1297
1469
|
var TRACE_EVENT_PREFIX = "TRACE_EVENT:";
|
|
1298
1470
|
function parseTraceEventLine(line) {
|
|
@@ -1321,40 +1493,40 @@ var TriggerType = /* @__PURE__ */ ((TriggerType2) => {
|
|
|
1321
1493
|
TriggerType2["SCHEDULED"] = "SCHEDULED";
|
|
1322
1494
|
return TriggerType2;
|
|
1323
1495
|
})(TriggerType || {});
|
|
1324
|
-
var TriggerMetadataSchema =
|
|
1325
|
-
version:
|
|
1326
|
-
resourceUpdated:
|
|
1327
|
-
scheduleId:
|
|
1496
|
+
var TriggerMetadataSchema = z30.object({
|
|
1497
|
+
version: z30.string().optional(),
|
|
1498
|
+
resourceUpdated: z30.array(z30.string()).optional(),
|
|
1499
|
+
scheduleId: z30.string().optional()
|
|
1328
1500
|
});
|
|
1329
|
-
var TriggerSchema =
|
|
1330
|
-
id:
|
|
1501
|
+
var TriggerSchema = z30.object({
|
|
1502
|
+
id: z30.string(),
|
|
1331
1503
|
metadata: TriggerMetadataSchema.optional(),
|
|
1332
|
-
type:
|
|
1504
|
+
type: z30.nativeEnum(TriggerType)
|
|
1333
1505
|
});
|
|
1334
|
-
var DiffLineTypeSchema =
|
|
1335
|
-
var DiffLineSchema =
|
|
1506
|
+
var DiffLineTypeSchema = z30.enum(["added", "removed", "unchanged"]);
|
|
1507
|
+
var DiffLineSchema = z30.object({
|
|
1336
1508
|
type: DiffLineTypeSchema,
|
|
1337
|
-
content:
|
|
1338
|
-
lineNumber:
|
|
1339
|
-
});
|
|
1340
|
-
var DiffContentSchema =
|
|
1341
|
-
path:
|
|
1342
|
-
expected:
|
|
1343
|
-
actual:
|
|
1344
|
-
diffLines:
|
|
1345
|
-
renamedFrom:
|
|
1509
|
+
content: z30.string(),
|
|
1510
|
+
lineNumber: z30.number()
|
|
1511
|
+
});
|
|
1512
|
+
var DiffContentSchema = z30.object({
|
|
1513
|
+
path: z30.string(),
|
|
1514
|
+
expected: z30.string(),
|
|
1515
|
+
actual: z30.string(),
|
|
1516
|
+
diffLines: z30.array(DiffLineSchema),
|
|
1517
|
+
renamedFrom: z30.string().optional(),
|
|
1346
1518
|
/** Whether this file is an infrastructure/config file (e.g. .claude/settings.json, .mcp.json) */
|
|
1347
|
-
isInfrastructure:
|
|
1519
|
+
isInfrastructure: z30.boolean().optional()
|
|
1348
1520
|
});
|
|
1349
|
-
var CommandExecutionSchema =
|
|
1350
|
-
command:
|
|
1351
|
-
exitCode:
|
|
1352
|
-
output:
|
|
1353
|
-
duration:
|
|
1521
|
+
var CommandExecutionSchema = z30.object({
|
|
1522
|
+
command: z30.string(),
|
|
1523
|
+
exitCode: z30.number(),
|
|
1524
|
+
output: z30.string().optional(),
|
|
1525
|
+
duration: z30.number()
|
|
1354
1526
|
});
|
|
1355
|
-
var FileModificationSchema =
|
|
1356
|
-
path:
|
|
1357
|
-
action:
|
|
1527
|
+
var FileModificationSchema = z30.object({
|
|
1528
|
+
path: z30.string(),
|
|
1529
|
+
action: z30.enum(["created", "modified", "deleted"])
|
|
1358
1530
|
});
|
|
1359
1531
|
var TemplateFileStatus = /* @__PURE__ */ ((TemplateFileStatus2) => {
|
|
1360
1532
|
TemplateFileStatus2["NEW"] = "new";
|
|
@@ -1362,62 +1534,62 @@ var TemplateFileStatus = /* @__PURE__ */ ((TemplateFileStatus2) => {
|
|
|
1362
1534
|
TemplateFileStatus2["UNCHANGED"] = "unchanged";
|
|
1363
1535
|
return TemplateFileStatus2;
|
|
1364
1536
|
})(TemplateFileStatus || {});
|
|
1365
|
-
var TemplateFileSchema =
|
|
1537
|
+
var TemplateFileSchema = z30.object({
|
|
1366
1538
|
/** Relative path within the template */
|
|
1367
|
-
path:
|
|
1539
|
+
path: z30.string(),
|
|
1368
1540
|
/** Full file content after execution */
|
|
1369
|
-
content:
|
|
1541
|
+
content: z30.string(),
|
|
1370
1542
|
/** File status (new, modified, unchanged) */
|
|
1371
|
-
status:
|
|
1543
|
+
status: z30.enum(["new", "modified", "unchanged"]),
|
|
1372
1544
|
/** Whether this file is an infrastructure/config file (e.g. .claude/settings.json, .mcp.json) */
|
|
1373
|
-
isInfrastructure:
|
|
1545
|
+
isInfrastructure: z30.boolean().optional()
|
|
1374
1546
|
});
|
|
1375
|
-
var ApiCallSchema =
|
|
1376
|
-
endpoint:
|
|
1377
|
-
tokensUsed:
|
|
1378
|
-
duration:
|
|
1547
|
+
var ApiCallSchema = z30.object({
|
|
1548
|
+
endpoint: z30.string(),
|
|
1549
|
+
tokensUsed: z30.number(),
|
|
1550
|
+
duration: z30.number()
|
|
1379
1551
|
});
|
|
1380
|
-
var ExecutionTraceSchema =
|
|
1381
|
-
commands:
|
|
1382
|
-
filesModified:
|
|
1383
|
-
apiCalls:
|
|
1384
|
-
totalDuration:
|
|
1552
|
+
var ExecutionTraceSchema = z30.object({
|
|
1553
|
+
commands: z30.array(CommandExecutionSchema),
|
|
1554
|
+
filesModified: z30.array(FileModificationSchema),
|
|
1555
|
+
apiCalls: z30.array(ApiCallSchema),
|
|
1556
|
+
totalDuration: z30.number()
|
|
1385
1557
|
});
|
|
1386
|
-
var RunAnalysisFindingSchema =
|
|
1387
|
-
category:
|
|
1558
|
+
var RunAnalysisFindingSchema = z30.object({
|
|
1559
|
+
category: z30.enum([
|
|
1388
1560
|
"failure_pattern",
|
|
1389
1561
|
"cost_waste",
|
|
1390
1562
|
"flakiness",
|
|
1391
1563
|
"inefficiency",
|
|
1392
1564
|
"positive"
|
|
1393
1565
|
]),
|
|
1394
|
-
severity:
|
|
1395
|
-
description:
|
|
1396
|
-
affectedScenarios:
|
|
1397
|
-
recommendation:
|
|
1566
|
+
severity: z30.enum(["high", "medium", "low"]),
|
|
1567
|
+
description: z30.string(),
|
|
1568
|
+
affectedScenarios: z30.array(z30.string()),
|
|
1569
|
+
recommendation: z30.string().optional()
|
|
1398
1570
|
});
|
|
1399
|
-
var RunAnalysisSchema =
|
|
1400
|
-
generatedAt:
|
|
1401
|
-
summary:
|
|
1402
|
-
findings:
|
|
1571
|
+
var RunAnalysisSchema = z30.object({
|
|
1572
|
+
generatedAt: z30.string(),
|
|
1573
|
+
summary: z30.string(),
|
|
1574
|
+
findings: z30.array(RunAnalysisFindingSchema)
|
|
1403
1575
|
});
|
|
1404
1576
|
var EvalRunSchema = TenantEntitySchema.extend({
|
|
1405
1577
|
/** Agent ID for this run */
|
|
1406
|
-
agentId:
|
|
1578
|
+
agentId: z30.string().optional(),
|
|
1407
1579
|
/** Preset ID that originated this run (optional) */
|
|
1408
|
-
presetId:
|
|
1580
|
+
presetId: z30.string().optional(),
|
|
1409
1581
|
/** Skill IDs for this run */
|
|
1410
|
-
skillIds:
|
|
1582
|
+
skillIds: z30.array(z30.string()).optional(),
|
|
1411
1583
|
/** Map of skillId to skillVersionId for this run */
|
|
1412
|
-
skillVersions:
|
|
1584
|
+
skillVersions: z30.record(z30.string(), z30.string()).optional(),
|
|
1413
1585
|
/** Scenario IDs to run (always present — resolved server-side from tags when needed) */
|
|
1414
|
-
scenarioIds:
|
|
1586
|
+
scenarioIds: z30.array(z30.string()),
|
|
1415
1587
|
/** Current status */
|
|
1416
1588
|
status: EvalStatusSchema,
|
|
1417
1589
|
/** Progress percentage (0-100) */
|
|
1418
|
-
progress:
|
|
1590
|
+
progress: z30.number(),
|
|
1419
1591
|
/** Results for each scenario/target combination (lazy to break eval-result ↔ eval-run cycle) */
|
|
1420
|
-
results:
|
|
1592
|
+
results: z30.array(z30.lazy(() => EvalRunResultSchema)),
|
|
1421
1593
|
/** Aggregated metrics across all results */
|
|
1422
1594
|
aggregateMetrics: EvalMetricsSchema,
|
|
1423
1595
|
/** Aggregated LLM trace summary */
|
|
@@ -1425,41 +1597,45 @@ var EvalRunSchema = TenantEntitySchema.extend({
|
|
|
1425
1597
|
/** What triggered this run */
|
|
1426
1598
|
trigger: TriggerSchema.optional(),
|
|
1427
1599
|
/** When the run started (set when evaluation is triggered) */
|
|
1428
|
-
startedAt:
|
|
1600
|
+
startedAt: z30.string().optional(),
|
|
1429
1601
|
/** When the run completed */
|
|
1430
|
-
completedAt:
|
|
1602
|
+
completedAt: z30.string().optional(),
|
|
1431
1603
|
/** Live trace events captured during execution (for playback on results page) */
|
|
1432
|
-
liveTraceEvents:
|
|
1604
|
+
liveTraceEvents: z30.array(LiveTraceEventSchema).optional(),
|
|
1433
1605
|
/** Remote job ID for tracking execution in Dev Machines */
|
|
1434
|
-
jobId:
|
|
1606
|
+
jobId: z30.string().optional(),
|
|
1435
1607
|
/** Remote job status from the Dev Machine API (PENDING, RUNNING, COMPLETED, FAILED, CANCELLED) */
|
|
1436
|
-
jobStatus:
|
|
1608
|
+
jobStatus: z30.string().optional(),
|
|
1437
1609
|
/** Remote job error message if the job failed */
|
|
1438
|
-
jobError:
|
|
1610
|
+
jobError: z30.string().optional(),
|
|
1439
1611
|
/** Timestamp of the last job status check */
|
|
1440
|
-
jobStatusCheckedAt:
|
|
1612
|
+
jobStatusCheckedAt: z30.string().optional(),
|
|
1441
1613
|
/** MCP server IDs to enable for this run (optional) */
|
|
1442
|
-
mcpIds:
|
|
1614
|
+
mcpIds: z30.array(z30.string()).optional(),
|
|
1443
1615
|
/** Sub-agent IDs to enable for this run (optional) */
|
|
1444
|
-
subAgentIds:
|
|
1616
|
+
subAgentIds: z30.array(z30.string()).optional(),
|
|
1445
1617
|
/** Rule IDs to enable for this run (optional) */
|
|
1446
|
-
ruleIds:
|
|
1618
|
+
ruleIds: z30.array(z30.string()).optional(),
|
|
1619
|
+
/** Unified capability IDs (replaces skill/mcp/subAgent/ruleIds) */
|
|
1620
|
+
capabilityIds: z30.array(z30.string()).optional(),
|
|
1621
|
+
/** Map of capabilityId to capabilityVersionId for version pinning */
|
|
1622
|
+
capabilityVersions: z30.record(z30.string(), z30.string()).optional(),
|
|
1447
1623
|
/** Tags used to select scenarios for this run (for traceability) */
|
|
1448
|
-
tags:
|
|
1624
|
+
tags: z30.array(z30.string()).optional(),
|
|
1449
1625
|
/** How many times each scenario is executed within this eval run. Default: 1. Max: 20. */
|
|
1450
|
-
runsPerScenario:
|
|
1626
|
+
runsPerScenario: z30.number().int().min(1).max(20).optional(),
|
|
1451
1627
|
/** Snapshot of agent configuration captured at run creation time */
|
|
1452
|
-
agentSnapshot:
|
|
1453
|
-
name:
|
|
1628
|
+
agentSnapshot: z30.object({
|
|
1629
|
+
name: z30.string().optional(),
|
|
1454
1630
|
agentType: AgentTypeSchema.optional(),
|
|
1455
1631
|
runCommand: AgentRunCommandSchema.optional(),
|
|
1456
|
-
systemPrompt:
|
|
1632
|
+
systemPrompt: z30.string().nullable().optional(),
|
|
1457
1633
|
modelConfig: ModelConfigSchema.optional()
|
|
1458
1634
|
}).optional(),
|
|
1459
1635
|
/** UUID linking all runs in a comparison group */
|
|
1460
|
-
comparisonGroupId:
|
|
1636
|
+
comparisonGroupId: z30.string().optional(),
|
|
1461
1637
|
/** Human-readable label for this variant (e.g., "MCP: Wix Stores") */
|
|
1462
|
-
comparisonLabel:
|
|
1638
|
+
comparisonLabel: z30.string().optional(),
|
|
1463
1639
|
/** LLM-generated analysis of the completed run */
|
|
1464
1640
|
runAnalysis: RunAnalysisSchema.optional()
|
|
1465
1641
|
});
|
|
@@ -1477,60 +1653,60 @@ var CreateEvalRunInputSchema = EvalRunSchema.omit({
|
|
|
1477
1653
|
agentSnapshot: true
|
|
1478
1654
|
}).extend({
|
|
1479
1655
|
/** Optional on input — backend resolves from tags when not provided */
|
|
1480
|
-
scenarioIds:
|
|
1656
|
+
scenarioIds: z30.array(z30.string()).optional()
|
|
1481
1657
|
}).refine(
|
|
1482
1658
|
(data) => data.scenarioIds && data.scenarioIds.length > 0 || data.tags && data.tags.length > 0,
|
|
1483
1659
|
{ message: "Either scenarioIds or tags must be provided" }
|
|
1484
1660
|
);
|
|
1485
|
-
var EvaluationProgressSchema =
|
|
1486
|
-
runId:
|
|
1487
|
-
targetId:
|
|
1488
|
-
totalScenarios:
|
|
1489
|
-
completedScenarios:
|
|
1490
|
-
scenarioProgress:
|
|
1491
|
-
|
|
1492
|
-
scenarioId:
|
|
1493
|
-
currentStep:
|
|
1494
|
-
error:
|
|
1661
|
+
var EvaluationProgressSchema = z30.object({
|
|
1662
|
+
runId: z30.string(),
|
|
1663
|
+
targetId: z30.string(),
|
|
1664
|
+
totalScenarios: z30.number(),
|
|
1665
|
+
completedScenarios: z30.number(),
|
|
1666
|
+
scenarioProgress: z30.array(
|
|
1667
|
+
z30.object({
|
|
1668
|
+
scenarioId: z30.string(),
|
|
1669
|
+
currentStep: z30.string(),
|
|
1670
|
+
error: z30.string().optional()
|
|
1495
1671
|
})
|
|
1496
1672
|
),
|
|
1497
|
-
createdAt:
|
|
1498
|
-
});
|
|
1499
|
-
var EvaluationLogSchema =
|
|
1500
|
-
runId:
|
|
1501
|
-
scenarioId:
|
|
1502
|
-
log:
|
|
1503
|
-
level:
|
|
1504
|
-
message:
|
|
1505
|
-
args:
|
|
1506
|
-
error:
|
|
1673
|
+
createdAt: z30.number()
|
|
1674
|
+
});
|
|
1675
|
+
var EvaluationLogSchema = z30.object({
|
|
1676
|
+
runId: z30.string(),
|
|
1677
|
+
scenarioId: z30.string(),
|
|
1678
|
+
log: z30.object({
|
|
1679
|
+
level: z30.enum(["info", "error", "debug"]),
|
|
1680
|
+
message: z30.string().optional(),
|
|
1681
|
+
args: z30.array(z30.any()).optional(),
|
|
1682
|
+
error: z30.string().optional()
|
|
1507
1683
|
})
|
|
1508
1684
|
});
|
|
1509
1685
|
var LLM_TIMEOUT = 12e4;
|
|
1510
1686
|
|
|
1511
1687
|
// src/evaluation/conversation.ts
|
|
1512
|
-
import { z as
|
|
1513
|
-
var TextBlockSchema =
|
|
1514
|
-
type:
|
|
1515
|
-
text:
|
|
1516
|
-
});
|
|
1517
|
-
var ThinkingBlockSchema =
|
|
1518
|
-
type:
|
|
1519
|
-
thinking:
|
|
1520
|
-
});
|
|
1521
|
-
var ToolUseBlockSchema =
|
|
1522
|
-
type:
|
|
1523
|
-
toolName:
|
|
1524
|
-
toolId:
|
|
1525
|
-
input:
|
|
1526
|
-
});
|
|
1527
|
-
var ToolResultBlockSchema =
|
|
1528
|
-
type:
|
|
1529
|
-
toolUseId:
|
|
1530
|
-
content:
|
|
1531
|
-
isError:
|
|
1532
|
-
});
|
|
1533
|
-
var ConversationBlockSchema =
|
|
1688
|
+
import { z as z31 } from "zod";
|
|
1689
|
+
var TextBlockSchema = z31.object({
|
|
1690
|
+
type: z31.literal("text"),
|
|
1691
|
+
text: z31.string()
|
|
1692
|
+
});
|
|
1693
|
+
var ThinkingBlockSchema = z31.object({
|
|
1694
|
+
type: z31.literal("thinking"),
|
|
1695
|
+
thinking: z31.string()
|
|
1696
|
+
});
|
|
1697
|
+
var ToolUseBlockSchema = z31.object({
|
|
1698
|
+
type: z31.literal("tool_use"),
|
|
1699
|
+
toolName: z31.string(),
|
|
1700
|
+
toolId: z31.string(),
|
|
1701
|
+
input: z31.unknown()
|
|
1702
|
+
});
|
|
1703
|
+
var ToolResultBlockSchema = z31.object({
|
|
1704
|
+
type: z31.literal("tool_result"),
|
|
1705
|
+
toolUseId: z31.string(),
|
|
1706
|
+
content: z31.string(),
|
|
1707
|
+
isError: z31.boolean().optional()
|
|
1708
|
+
});
|
|
1709
|
+
var ConversationBlockSchema = z31.discriminatedUnion("type", [
|
|
1534
1710
|
TextBlockSchema,
|
|
1535
1711
|
ThinkingBlockSchema,
|
|
1536
1712
|
ToolUseBlockSchema,
|
|
@@ -1541,18 +1717,18 @@ var ConversationMessageRoles = [
|
|
|
1541
1717
|
"user",
|
|
1542
1718
|
"system"
|
|
1543
1719
|
];
|
|
1544
|
-
var ConversationMessageSchema =
|
|
1545
|
-
role:
|
|
1546
|
-
content:
|
|
1547
|
-
timestamp:
|
|
1720
|
+
var ConversationMessageSchema = z31.object({
|
|
1721
|
+
role: z31.enum(ConversationMessageRoles),
|
|
1722
|
+
content: z31.array(ConversationBlockSchema),
|
|
1723
|
+
timestamp: z31.string()
|
|
1548
1724
|
});
|
|
1549
|
-
var ScenarioConversationSchema =
|
|
1550
|
-
id:
|
|
1551
|
-
projectId:
|
|
1552
|
-
evalRunId:
|
|
1553
|
-
resultId:
|
|
1554
|
-
messages:
|
|
1555
|
-
createdAt:
|
|
1725
|
+
var ScenarioConversationSchema = z31.object({
|
|
1726
|
+
id: z31.string(),
|
|
1727
|
+
projectId: z31.string(),
|
|
1728
|
+
evalRunId: z31.string(),
|
|
1729
|
+
resultId: z31.string(),
|
|
1730
|
+
messages: z31.array(ConversationMessageSchema),
|
|
1731
|
+
createdAt: z31.string()
|
|
1556
1732
|
});
|
|
1557
1733
|
|
|
1558
1734
|
// src/evaluation/eval-result.ts
|
|
@@ -1563,98 +1739,98 @@ var AssertionResultStatus = /* @__PURE__ */ ((AssertionResultStatus2) => {
|
|
|
1563
1739
|
AssertionResultStatus2["ERROR"] = "error";
|
|
1564
1740
|
return AssertionResultStatus2;
|
|
1565
1741
|
})(AssertionResultStatus || {});
|
|
1566
|
-
var AssertionResultSchema =
|
|
1567
|
-
id:
|
|
1568
|
-
assertionId:
|
|
1569
|
-
assertionType:
|
|
1570
|
-
assertionName:
|
|
1571
|
-
status:
|
|
1572
|
-
message:
|
|
1573
|
-
expected:
|
|
1574
|
-
actual:
|
|
1575
|
-
duration:
|
|
1576
|
-
details:
|
|
1577
|
-
llmTraceSteps:
|
|
1578
|
-
});
|
|
1579
|
-
var EvalRunResultSchema =
|
|
1580
|
-
id:
|
|
1581
|
-
targetId:
|
|
1582
|
-
targetName:
|
|
1742
|
+
var AssertionResultSchema = z32.object({
|
|
1743
|
+
id: z32.string(),
|
|
1744
|
+
assertionId: z32.string(),
|
|
1745
|
+
assertionType: z32.string(),
|
|
1746
|
+
assertionName: z32.string(),
|
|
1747
|
+
status: z32.enum(AssertionResultStatus),
|
|
1748
|
+
message: z32.string().optional(),
|
|
1749
|
+
expected: z32.string().optional(),
|
|
1750
|
+
actual: z32.string().optional(),
|
|
1751
|
+
duration: z32.number().optional(),
|
|
1752
|
+
details: z32.record(z32.string(), z32.unknown()).optional(),
|
|
1753
|
+
llmTraceSteps: z32.array(LLMTraceStepSchema).optional()
|
|
1754
|
+
});
|
|
1755
|
+
var EvalRunResultSchema = z32.object({
|
|
1756
|
+
id: z32.string(),
|
|
1757
|
+
targetId: z32.string(),
|
|
1758
|
+
targetName: z32.string().optional(),
|
|
1583
1759
|
/** SkillVersion ID used for this evaluation (for version tracking) */
|
|
1584
|
-
skillVersionId:
|
|
1760
|
+
skillVersionId: z32.string().optional(),
|
|
1585
1761
|
/** SkillVersion semver string (e.g., "1.0.0", "1.2.3") for display */
|
|
1586
|
-
skillVersion:
|
|
1587
|
-
scenarioId:
|
|
1588
|
-
scenarioName:
|
|
1762
|
+
skillVersion: z32.string().optional(),
|
|
1763
|
+
scenarioId: z32.string(),
|
|
1764
|
+
scenarioName: z32.string(),
|
|
1589
1765
|
/** Snapshot of the trigger prompt used during the run (prevents stale display after edits) */
|
|
1590
|
-
triggerPrompt:
|
|
1766
|
+
triggerPrompt: z32.string().optional(),
|
|
1591
1767
|
modelConfig: ModelConfigSchema.optional(),
|
|
1592
|
-
assertionResults:
|
|
1768
|
+
assertionResults: z32.array(AssertionResultSchema),
|
|
1593
1769
|
metrics: EvalMetricsSchema.optional(),
|
|
1594
|
-
passed:
|
|
1595
|
-
failed:
|
|
1596
|
-
passRate:
|
|
1597
|
-
duration:
|
|
1598
|
-
outputText:
|
|
1599
|
-
files:
|
|
1600
|
-
fileDiffs:
|
|
1770
|
+
passed: z32.number(),
|
|
1771
|
+
failed: z32.number(),
|
|
1772
|
+
passRate: z32.number(),
|
|
1773
|
+
duration: z32.number(),
|
|
1774
|
+
outputText: z32.string().optional(),
|
|
1775
|
+
files: z32.array(ExpectedFileSchema).optional(),
|
|
1776
|
+
fileDiffs: z32.array(DiffContentSchema).optional(),
|
|
1601
1777
|
/** Full template files after execution with status indicators */
|
|
1602
|
-
templateFiles:
|
|
1603
|
-
startedAt:
|
|
1604
|
-
completedAt:
|
|
1778
|
+
templateFiles: z32.array(TemplateFileSchema).optional(),
|
|
1779
|
+
startedAt: z32.string().optional(),
|
|
1780
|
+
completedAt: z32.string().optional(),
|
|
1605
1781
|
llmTrace: LLMTraceSchema.optional(),
|
|
1606
1782
|
/** Full conversation messages (only present in transit; stripped before DB storage) */
|
|
1607
|
-
conversation:
|
|
1783
|
+
conversation: z32.array(ConversationMessageSchema).optional(),
|
|
1608
1784
|
/** 0-based iteration index when a scenario is run multiple times within a single eval run */
|
|
1609
|
-
iterationIndex:
|
|
1610
|
-
});
|
|
1611
|
-
var PromptResultSchema =
|
|
1612
|
-
text:
|
|
1613
|
-
files:
|
|
1614
|
-
finishReason:
|
|
1615
|
-
reasoning:
|
|
1616
|
-
reasoningDetails:
|
|
1617
|
-
toolCalls:
|
|
1618
|
-
toolResults:
|
|
1619
|
-
warnings:
|
|
1620
|
-
sources:
|
|
1621
|
-
steps:
|
|
1622
|
-
generationTimeMs:
|
|
1623
|
-
prompt:
|
|
1624
|
-
systemPrompt:
|
|
1625
|
-
usage:
|
|
1626
|
-
totalTokens:
|
|
1627
|
-
totalMicrocentsSpent:
|
|
1785
|
+
iterationIndex: z32.number().int().min(0).optional()
|
|
1786
|
+
});
|
|
1787
|
+
var PromptResultSchema = z32.object({
|
|
1788
|
+
text: z32.string(),
|
|
1789
|
+
files: z32.array(z32.unknown()).optional(),
|
|
1790
|
+
finishReason: z32.string().optional(),
|
|
1791
|
+
reasoning: z32.string().optional(),
|
|
1792
|
+
reasoningDetails: z32.unknown().optional(),
|
|
1793
|
+
toolCalls: z32.array(z32.unknown()).optional(),
|
|
1794
|
+
toolResults: z32.array(z32.unknown()).optional(),
|
|
1795
|
+
warnings: z32.array(z32.unknown()).optional(),
|
|
1796
|
+
sources: z32.array(z32.unknown()).optional(),
|
|
1797
|
+
steps: z32.array(z32.unknown()),
|
|
1798
|
+
generationTimeMs: z32.number(),
|
|
1799
|
+
prompt: z32.string(),
|
|
1800
|
+
systemPrompt: z32.string(),
|
|
1801
|
+
usage: z32.object({
|
|
1802
|
+
totalTokens: z32.number().optional(),
|
|
1803
|
+
totalMicrocentsSpent: z32.number().optional()
|
|
1628
1804
|
})
|
|
1629
1805
|
});
|
|
1630
|
-
var EvaluationResultSchema =
|
|
1631
|
-
id:
|
|
1632
|
-
runId:
|
|
1633
|
-
timestamp:
|
|
1806
|
+
var EvaluationResultSchema = z32.object({
|
|
1807
|
+
id: z32.string(),
|
|
1808
|
+
runId: z32.string(),
|
|
1809
|
+
timestamp: z32.number(),
|
|
1634
1810
|
promptResult: PromptResultSchema,
|
|
1635
|
-
testResults:
|
|
1636
|
-
tags:
|
|
1637
|
-
feedback:
|
|
1638
|
-
score:
|
|
1639
|
-
suiteId:
|
|
1640
|
-
});
|
|
1641
|
-
var LeanEvaluationResultSchema =
|
|
1642
|
-
id:
|
|
1643
|
-
runId:
|
|
1644
|
-
timestamp:
|
|
1645
|
-
tags:
|
|
1646
|
-
scenarioId:
|
|
1647
|
-
scenarioVersion:
|
|
1648
|
-
targetId:
|
|
1649
|
-
targetVersion:
|
|
1650
|
-
suiteId:
|
|
1651
|
-
score:
|
|
1652
|
-
time:
|
|
1653
|
-
microcentsSpent:
|
|
1811
|
+
testResults: z32.array(z32.unknown()),
|
|
1812
|
+
tags: z32.array(z32.string()).optional(),
|
|
1813
|
+
feedback: z32.string().optional(),
|
|
1814
|
+
score: z32.number(),
|
|
1815
|
+
suiteId: z32.string().optional()
|
|
1816
|
+
});
|
|
1817
|
+
var LeanEvaluationResultSchema = z32.object({
|
|
1818
|
+
id: z32.string(),
|
|
1819
|
+
runId: z32.string(),
|
|
1820
|
+
timestamp: z32.number(),
|
|
1821
|
+
tags: z32.array(z32.string()).optional(),
|
|
1822
|
+
scenarioId: z32.string(),
|
|
1823
|
+
scenarioVersion: z32.number().optional(),
|
|
1824
|
+
targetId: z32.string(),
|
|
1825
|
+
targetVersion: z32.number().optional(),
|
|
1826
|
+
suiteId: z32.string().optional(),
|
|
1827
|
+
score: z32.number(),
|
|
1828
|
+
time: z32.number().optional(),
|
|
1829
|
+
microcentsSpent: z32.number().optional()
|
|
1654
1830
|
});
|
|
1655
1831
|
|
|
1656
1832
|
// src/evaluation/eval-run-folder.ts
|
|
1657
|
-
import { z as
|
|
1833
|
+
import { z as z33 } from "zod";
|
|
1658
1834
|
var EvalRunFolderSchema = TenantEntitySchema.extend({});
|
|
1659
1835
|
var CreateEvalRunFolderInputSchema = EvalRunFolderSchema.omit({
|
|
1660
1836
|
id: true,
|
|
@@ -1668,26 +1844,26 @@ var UpdateEvalRunFolderInputSchema = EvalRunFolderSchema.omit({
|
|
|
1668
1844
|
updatedAt: true,
|
|
1669
1845
|
deleted: true
|
|
1670
1846
|
}).partial();
|
|
1671
|
-
var EvalRunFolderMembershipSchema =
|
|
1672
|
-
folderId:
|
|
1673
|
-
evalRunId:
|
|
1674
|
-
projectId:
|
|
1675
|
-
createdAt:
|
|
1847
|
+
var EvalRunFolderMembershipSchema = z33.object({
|
|
1848
|
+
folderId: z33.string(),
|
|
1849
|
+
evalRunId: z33.string(),
|
|
1850
|
+
projectId: z33.string(),
|
|
1851
|
+
createdAt: z33.string()
|
|
1676
1852
|
});
|
|
1677
1853
|
|
|
1678
1854
|
// src/project/project.ts
|
|
1679
|
-
import { z as
|
|
1855
|
+
import { z as z34 } from "zod";
|
|
1680
1856
|
var ProjectSchema = BaseEntitySchema.extend({
|
|
1681
|
-
appId:
|
|
1682
|
-
scenarioTags:
|
|
1857
|
+
appId: z34.string().optional().describe("The ID of the app in Dev Center"),
|
|
1858
|
+
scenarioTags: z34.array(z34.string()).optional().describe("Project-level tag vocabulary for scenarios"),
|
|
1683
1859
|
/** Per-project Wix auth token (write-only — never returned in GET responses). null = clear. */
|
|
1684
|
-
wixAuthToken:
|
|
1860
|
+
wixAuthToken: z34.string().nullable().optional().describe("Wix auth token for CLI/MCP authentication (encrypted at rest)"),
|
|
1685
1861
|
/** Per-project Base44 auth file content (write-only — never returned in GET responses). null = clear. */
|
|
1686
|
-
base44AuthFile:
|
|
1862
|
+
base44AuthFile: z34.string().nullable().optional().describe("Base64-encoded Base44 auth file content (encrypted at rest)"),
|
|
1687
1863
|
/** Resolved at runtime from the encrypted Wix auth token */
|
|
1688
|
-
wixAuthEmail:
|
|
1864
|
+
wixAuthEmail: z34.string().optional().describe("Email associated with the Wix auth token (resolved at runtime)"),
|
|
1689
1865
|
/** Resolved at runtime from the encrypted Base44 auth file */
|
|
1690
|
-
base44AuthEmail:
|
|
1866
|
+
base44AuthEmail: z34.string().optional().describe("Email from the Base44 auth file (resolved at runtime)")
|
|
1691
1867
|
});
|
|
1692
1868
|
var CreateProjectInputSchema = ProjectSchema.omit({
|
|
1693
1869
|
id: true,
|
|
@@ -1697,7 +1873,7 @@ var CreateProjectInputSchema = ProjectSchema.omit({
|
|
|
1697
1873
|
wixAuthEmail: true,
|
|
1698
1874
|
base44AuthEmail: true
|
|
1699
1875
|
}).extend({
|
|
1700
|
-
appId:
|
|
1876
|
+
appId: z34.string().describe(
|
|
1701
1877
|
"Required: The ID of the app in Dev Center for credential scoping"
|
|
1702
1878
|
)
|
|
1703
1879
|
});
|
|
@@ -1717,7 +1893,7 @@ var CreateTemplateInputSchema = TemplateSchema.omit({
|
|
|
1717
1893
|
var UpdateTemplateInputSchema = CreateTemplateInputSchema.partial();
|
|
1718
1894
|
|
|
1719
1895
|
// src/schedule/eval-schedule.ts
|
|
1720
|
-
import { z as
|
|
1896
|
+
import { z as z35 } from "zod";
|
|
1721
1897
|
var FrequencyType = /* @__PURE__ */ ((FrequencyType2) => {
|
|
1722
1898
|
FrequencyType2["DAILY"] = "daily";
|
|
1723
1899
|
FrequencyType2["WEEKDAY"] = "weekday";
|
|
@@ -1727,29 +1903,29 @@ var FrequencyType = /* @__PURE__ */ ((FrequencyType2) => {
|
|
|
1727
1903
|
})(FrequencyType || {});
|
|
1728
1904
|
var EvalScheduleSchema = TenantEntitySchema.extend({
|
|
1729
1905
|
/** Whether the schedule is active */
|
|
1730
|
-
enabled:
|
|
1906
|
+
enabled: z35.boolean(),
|
|
1731
1907
|
/** Test suite to run */
|
|
1732
|
-
suiteId:
|
|
1908
|
+
suiteId: z35.string(),
|
|
1733
1909
|
/** Preset that provides agent + entities for this schedule */
|
|
1734
|
-
presetId:
|
|
1910
|
+
presetId: z35.string(),
|
|
1735
1911
|
/** How often to run */
|
|
1736
|
-
frequencyType:
|
|
1912
|
+
frequencyType: z35.nativeEnum(FrequencyType),
|
|
1737
1913
|
/** Time of day in 24h format (HH:MM), hours 00-23, minutes 00-59 */
|
|
1738
|
-
timeOfDay:
|
|
1914
|
+
timeOfDay: z35.string().regex(/^([01]\d|2[0-3]):[0-5]\d$/),
|
|
1739
1915
|
/** Day of week (0=Sun, 6=Sat) for weekly schedules */
|
|
1740
|
-
dayOfWeek:
|
|
1916
|
+
dayOfWeek: z35.number().min(0).max(6).optional(),
|
|
1741
1917
|
/** Day of month (1-31) for monthly schedules */
|
|
1742
|
-
dayOfMonth:
|
|
1918
|
+
dayOfMonth: z35.number().min(1).max(31).optional(),
|
|
1743
1919
|
/** IANA timezone (e.g., 'America/New_York') */
|
|
1744
|
-
timezone:
|
|
1920
|
+
timezone: z35.string(),
|
|
1745
1921
|
/** ID of the last eval run created by this schedule */
|
|
1746
|
-
lastRunId:
|
|
1922
|
+
lastRunId: z35.string().optional(),
|
|
1747
1923
|
/** Denormalized status of the last run */
|
|
1748
|
-
lastRunStatus:
|
|
1924
|
+
lastRunStatus: z35.string().optional(),
|
|
1749
1925
|
/** ISO timestamp of the last run */
|
|
1750
|
-
lastRunAt:
|
|
1926
|
+
lastRunAt: z35.string().optional(),
|
|
1751
1927
|
/** Next scheduled run time in UTC (pre-computed for efficient querying, set by backend) */
|
|
1752
|
-
nextRunAt:
|
|
1928
|
+
nextRunAt: z35.string().optional()
|
|
1753
1929
|
});
|
|
1754
1930
|
function isValidTimezone(tz) {
|
|
1755
1931
|
try {
|
|
@@ -1762,14 +1938,14 @@ function isValidTimezone(tz) {
|
|
|
1762
1938
|
function validateScheduleFields(data, ctx, options) {
|
|
1763
1939
|
if (data.frequencyType === "weekly" /* WEEKLY */ && data.dayOfWeek == null) {
|
|
1764
1940
|
ctx.addIssue({
|
|
1765
|
-
code:
|
|
1941
|
+
code: z35.ZodIssueCode.custom,
|
|
1766
1942
|
message: "dayOfWeek is required for weekly schedules",
|
|
1767
1943
|
path: ["dayOfWeek"]
|
|
1768
1944
|
});
|
|
1769
1945
|
}
|
|
1770
1946
|
if (data.frequencyType === "monthly" /* MONTHLY */ && data.dayOfMonth == null) {
|
|
1771
1947
|
ctx.addIssue({
|
|
1772
|
-
code:
|
|
1948
|
+
code: z35.ZodIssueCode.custom,
|
|
1773
1949
|
message: "dayOfMonth is required for monthly schedules",
|
|
1774
1950
|
path: ["dayOfMonth"]
|
|
1775
1951
|
});
|
|
@@ -1777,7 +1953,7 @@ function validateScheduleFields(data, ctx, options) {
|
|
|
1777
1953
|
const shouldValidateTz = options.partial ? data.timezone !== void 0 : true;
|
|
1778
1954
|
if (shouldValidateTz && !isValidTimezone(data.timezone)) {
|
|
1779
1955
|
ctx.addIssue({
|
|
1780
|
-
code:
|
|
1956
|
+
code: z35.ZodIssueCode.custom,
|
|
1781
1957
|
message: "Invalid IANA timezone",
|
|
1782
1958
|
path: ["timezone"]
|
|
1783
1959
|
});
|
|
@@ -1841,6 +2017,13 @@ export {
|
|
|
1841
2017
|
BulkImportResultItemSchema,
|
|
1842
2018
|
BulkImportResultSchema,
|
|
1843
2019
|
BulkImportSkillsInputSchema,
|
|
2020
|
+
CAPABILITY_NAME_REGEX,
|
|
2021
|
+
CapabilityContentSchema,
|
|
2022
|
+
CapabilitySchema,
|
|
2023
|
+
CapabilityTypeSchema,
|
|
2024
|
+
CapabilityVersionOriginSchema,
|
|
2025
|
+
CapabilityVersionSchema,
|
|
2026
|
+
CapabilityWithLatestVersionSchema,
|
|
1844
2027
|
ClaudeModel,
|
|
1845
2028
|
ClaudeModelSchema,
|
|
1846
2029
|
CommandExecutionSchema,
|
|
@@ -1851,6 +2034,8 @@ export {
|
|
|
1851
2034
|
CostAssertionSchema,
|
|
1852
2035
|
CostConfigSchema,
|
|
1853
2036
|
CreateAgentInputSchema,
|
|
2037
|
+
CreateCapabilityInputSchema,
|
|
2038
|
+
CreateCapabilityVersionInputSchema,
|
|
1854
2039
|
CreateEvalRunFolderInputSchema,
|
|
1855
2040
|
CreateEvalRunInputSchema,
|
|
1856
2041
|
CreateEvalScheduleInputSchema,
|
|
@@ -1890,6 +2075,7 @@ export {
|
|
|
1890
2075
|
FilePresenceTestSchema,
|
|
1891
2076
|
FrequencyType,
|
|
1892
2077
|
GitHubSourceSchema,
|
|
2078
|
+
InitialCapabilityVersionInputSchema,
|
|
1893
2079
|
InitialVersionInputSchema,
|
|
1894
2080
|
LEGACY_MODEL_ID_MAP,
|
|
1895
2081
|
LLMBreakdownStatsSchema,
|
|
@@ -1966,6 +2152,7 @@ export {
|
|
|
1966
2152
|
TriggerSchema,
|
|
1967
2153
|
TriggerType,
|
|
1968
2154
|
UpdateAgentInputSchema,
|
|
2155
|
+
UpdateCapabilityInputSchema,
|
|
1969
2156
|
UpdateEvalRunFolderInputSchema,
|
|
1970
2157
|
UpdateEvalScheduleInputSchema,
|
|
1971
2158
|
UpdateMcpInputSchema,
|
|
@@ -1978,12 +2165,20 @@ export {
|
|
|
1978
2165
|
UpdateTestScenarioInputSchema,
|
|
1979
2166
|
UpdateTestSuiteInputSchema,
|
|
1980
2167
|
VitestTestSchema,
|
|
2168
|
+
capabilityToMcp,
|
|
2169
|
+
capabilityToRule,
|
|
2170
|
+
capabilityToSkill,
|
|
2171
|
+
capabilityToSkillWithLatestVersion,
|
|
2172
|
+
capabilityToSubAgent,
|
|
2173
|
+
capabilityVersionToSkillVersion,
|
|
1981
2174
|
classifyAssertionRef,
|
|
1982
2175
|
formatTraceEventLine,
|
|
1983
2176
|
getSystemAssertion,
|
|
1984
2177
|
getSystemAssertions,
|
|
2178
|
+
groupCapabilitiesByType,
|
|
1985
2179
|
isAllowedBuildCommandString,
|
|
1986
2180
|
isSystemAssertionId,
|
|
2181
|
+
isValidCapabilityName,
|
|
1987
2182
|
isValidSkillFolderName,
|
|
1988
2183
|
normalizeBatchAssertionLink,
|
|
1989
2184
|
normalizeModelId,
|