@wix/evalforge-types 0.74.0 → 0.76.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -1
- package/build/index.js +698 -504
- package/build/index.js.map +4 -4
- package/build/index.mjs +679 -504
- package/build/index.mjs.map +4 -4
- package/build/types/evaluation/eval-run.d.ts +4 -10
- package/build/types/target/capability-converters.d.ts +25 -0
- package/build/types/target/capability.d.ts +254 -0
- package/build/types/target/index.d.ts +2 -0
- package/build/types/target/preset.d.ts +6 -15
- package/package.json +2 -2
package/build/index.mjs
CHANGED
|
@@ -340,25 +340,19 @@ import { z as z9 } from "zod";
|
|
|
340
340
|
var PresetSchema = TenantEntitySchema.extend({
|
|
341
341
|
/** Agent ID for this preset */
|
|
342
342
|
agentId: z9.string(),
|
|
343
|
-
/**
|
|
344
|
-
|
|
345
|
-
/**
|
|
346
|
-
|
|
347
|
-
|
|
348
|
-
|
|
349
|
-
|
|
350
|
-
subAgentIds: z9.array(z9.string()).default([]),
|
|
351
|
-
/** Rule IDs included in this preset */
|
|
352
|
-
ruleIds: z9.array(z9.string()).default([])
|
|
353
|
-
});
|
|
354
|
-
var atLeastOneEntity = (data) => (data.skillIds?.length ?? 0) > 0 || (data.mcpIds?.length ?? 0) > 0 || (data.subAgentIds?.length ?? 0) > 0 || (data.ruleIds?.length ?? 0) > 0;
|
|
355
|
-
var AT_LEAST_ONE_ENTITY_MESSAGE = "At least one of skillIds, mcpIds, subAgentIds, or ruleIds must be non-empty";
|
|
343
|
+
/** Unified capability IDs */
|
|
344
|
+
capabilityIds: z9.array(z9.string()).optional(),
|
|
345
|
+
/** Map of capabilityId to capabilityVersionId for version pinning */
|
|
346
|
+
capabilityVersions: z9.record(z9.string(), z9.string()).optional()
|
|
347
|
+
});
|
|
348
|
+
var hasCapabilities = (data) => (data.capabilityIds?.length ?? 0) > 0;
|
|
349
|
+
var CAPABILITY_IDS_REQUIRED_MESSAGE = "capabilityIds must be non-empty";
|
|
356
350
|
var CreatePresetInputSchema = PresetSchema.omit({
|
|
357
351
|
id: true,
|
|
358
352
|
createdAt: true,
|
|
359
353
|
updatedAt: true,
|
|
360
354
|
deleted: true
|
|
361
|
-
}).refine(
|
|
355
|
+
}).refine(hasCapabilities, { message: CAPABILITY_IDS_REQUIRED_MESSAGE });
|
|
362
356
|
var UpdatePresetInputSchema = PresetSchema.omit({
|
|
363
357
|
id: true,
|
|
364
358
|
createdAt: true,
|
|
@@ -366,11 +360,179 @@ var UpdatePresetInputSchema = PresetSchema.omit({
|
|
|
366
360
|
deleted: true
|
|
367
361
|
}).partial();
|
|
368
362
|
|
|
363
|
+
// src/target/capability.ts
|
|
364
|
+
import { z as z10 } from "zod";
|
|
365
|
+
var CapabilityTypeSchema = z10.enum([
|
|
366
|
+
"SKILL",
|
|
367
|
+
"SUB_AGENT",
|
|
368
|
+
"RULE",
|
|
369
|
+
"MCP"
|
|
370
|
+
]);
|
|
371
|
+
var CAPABILITY_NAME_REGEX = /^[a-z0-9]+(-[a-z0-9]+)*$/;
|
|
372
|
+
function isValidCapabilityName(name) {
|
|
373
|
+
return typeof name === "string" && name.length > 0 && CAPABILITY_NAME_REGEX.test(name);
|
|
374
|
+
}
|
|
375
|
+
var KEBAB_CASE_MESSAGE2 = "Name must be in kebab-case (lowercase letters, numbers, hyphens only, e.g. my-capability)";
|
|
376
|
+
var CapabilityContentSchema = z10.record(z10.string(), z10.unknown());
|
|
377
|
+
var CapabilityVersionOriginSchema = z10.enum(["manual", "pr", "master"]);
|
|
378
|
+
var CapabilitySchema = TenantEntitySchema.extend({
|
|
379
|
+
capabilityType: CapabilityTypeSchema,
|
|
380
|
+
source: GitHubSourceSchema.optional()
|
|
381
|
+
});
|
|
382
|
+
var CapabilityVersionSchema = z10.object({
|
|
383
|
+
id: z10.string(),
|
|
384
|
+
projectId: z10.string(),
|
|
385
|
+
capabilityId: z10.string(),
|
|
386
|
+
version: z10.string(),
|
|
387
|
+
origin: CapabilityVersionOriginSchema,
|
|
388
|
+
source: GitHubSourceSchema.optional(),
|
|
389
|
+
content: CapabilityContentSchema.optional(),
|
|
390
|
+
notes: z10.string().optional(),
|
|
391
|
+
createdAt: z10.string()
|
|
392
|
+
});
|
|
393
|
+
var CapabilityWithLatestVersionSchema = CapabilitySchema.extend({
|
|
394
|
+
latestVersion: CapabilityVersionSchema.optional()
|
|
395
|
+
});
|
|
396
|
+
var CapabilityInputBaseSchema = CapabilitySchema.omit({
|
|
397
|
+
id: true,
|
|
398
|
+
createdAt: true,
|
|
399
|
+
updatedAt: true,
|
|
400
|
+
deleted: true,
|
|
401
|
+
description: true,
|
|
402
|
+
source: true
|
|
403
|
+
}).extend({
|
|
404
|
+
description: z10.string().optional(),
|
|
405
|
+
source: GitHubSourceSchema.optional()
|
|
406
|
+
});
|
|
407
|
+
var InitialCapabilityVersionInputSchema = z10.object({
|
|
408
|
+
content: CapabilityContentSchema.optional(),
|
|
409
|
+
notes: z10.string().optional(),
|
|
410
|
+
source: GitHubSourceSchema.optional(),
|
|
411
|
+
version: z10.string().optional(),
|
|
412
|
+
origin: CapabilityVersionOriginSchema.optional()
|
|
413
|
+
});
|
|
414
|
+
var CreateCapabilityInputSchema = CapabilityInputBaseSchema.extend({
|
|
415
|
+
initialVersion: InitialCapabilityVersionInputSchema.optional()
|
|
416
|
+
}).refine((data) => isValidCapabilityName(data.name), {
|
|
417
|
+
message: KEBAB_CASE_MESSAGE2,
|
|
418
|
+
path: ["name"]
|
|
419
|
+
});
|
|
420
|
+
var UpdateCapabilityInputSchema = CapabilityInputBaseSchema.omit({
|
|
421
|
+
capabilityType: true
|
|
422
|
+
}).partial().refine(
|
|
423
|
+
(data) => data.name === void 0 || isValidCapabilityName(data.name),
|
|
424
|
+
{ message: KEBAB_CASE_MESSAGE2, path: ["name"] }
|
|
425
|
+
);
|
|
426
|
+
var CreateCapabilityVersionInputSchema = z10.object({
|
|
427
|
+
source: GitHubSourceSchema.optional(),
|
|
428
|
+
version: z10.string().min(1),
|
|
429
|
+
notes: z10.string().optional(),
|
|
430
|
+
origin: CapabilityVersionOriginSchema.optional(),
|
|
431
|
+
content: CapabilityContentSchema.optional()
|
|
432
|
+
});
|
|
433
|
+
|
|
434
|
+
// src/target/capability-converters.ts
|
|
435
|
+
function capabilityToSkill(cap) {
|
|
436
|
+
return {
|
|
437
|
+
id: cap.id,
|
|
438
|
+
projectId: cap.projectId,
|
|
439
|
+
name: cap.name,
|
|
440
|
+
description: cap.description,
|
|
441
|
+
source: cap.source,
|
|
442
|
+
createdAt: cap.createdAt,
|
|
443
|
+
updatedAt: cap.updatedAt,
|
|
444
|
+
deleted: cap.deleted
|
|
445
|
+
};
|
|
446
|
+
}
|
|
447
|
+
function capabilityVersionToSkillVersion(cv) {
|
|
448
|
+
const content = cv.content;
|
|
449
|
+
return {
|
|
450
|
+
id: cv.id,
|
|
451
|
+
projectId: cv.projectId,
|
|
452
|
+
skillId: cv.capabilityId,
|
|
453
|
+
version: cv.version,
|
|
454
|
+
origin: cv.origin,
|
|
455
|
+
source: cv.source,
|
|
456
|
+
files: content?.files,
|
|
457
|
+
notes: cv.notes,
|
|
458
|
+
createdAt: cv.createdAt
|
|
459
|
+
};
|
|
460
|
+
}
|
|
461
|
+
function capabilityToSkillWithLatestVersion(cap) {
|
|
462
|
+
const skill = capabilityToSkill(cap);
|
|
463
|
+
const latestVersion = cap.latestVersion ? capabilityVersionToSkillVersion(cap.latestVersion) : void 0;
|
|
464
|
+
return { ...skill, latestVersion };
|
|
465
|
+
}
|
|
466
|
+
function capabilityToSubAgent(cap) {
|
|
467
|
+
const content = cap.latestVersion?.content;
|
|
468
|
+
return {
|
|
469
|
+
id: cap.id,
|
|
470
|
+
projectId: cap.projectId,
|
|
471
|
+
name: cap.name,
|
|
472
|
+
description: cap.description,
|
|
473
|
+
subAgentMd: content?.subAgentMd ?? "",
|
|
474
|
+
source: cap.source,
|
|
475
|
+
createdAt: cap.createdAt,
|
|
476
|
+
updatedAt: cap.updatedAt,
|
|
477
|
+
deleted: cap.deleted
|
|
478
|
+
};
|
|
479
|
+
}
|
|
480
|
+
function capabilityToRule(cap) {
|
|
481
|
+
const content = cap.latestVersion?.content;
|
|
482
|
+
return {
|
|
483
|
+
id: cap.id,
|
|
484
|
+
projectId: cap.projectId,
|
|
485
|
+
name: cap.name,
|
|
486
|
+
description: cap.description,
|
|
487
|
+
ruleType: content?.ruleType ?? "claude-md",
|
|
488
|
+
content: content?.content ?? "",
|
|
489
|
+
createdAt: cap.createdAt,
|
|
490
|
+
updatedAt: cap.updatedAt,
|
|
491
|
+
deleted: cap.deleted
|
|
492
|
+
};
|
|
493
|
+
}
|
|
494
|
+
function capabilityToMcp(cap) {
|
|
495
|
+
const content = cap.latestVersion?.content;
|
|
496
|
+
return {
|
|
497
|
+
id: cap.id,
|
|
498
|
+
projectId: cap.projectId,
|
|
499
|
+
name: cap.name,
|
|
500
|
+
description: cap.description,
|
|
501
|
+
config: content?.config ?? {},
|
|
502
|
+
createdAt: cap.createdAt,
|
|
503
|
+
updatedAt: cap.updatedAt,
|
|
504
|
+
deleted: cap.deleted
|
|
505
|
+
};
|
|
506
|
+
}
|
|
507
|
+
function groupCapabilitiesByType(capabilities) {
|
|
508
|
+
const skills = [];
|
|
509
|
+
const subAgents = [];
|
|
510
|
+
const rules = [];
|
|
511
|
+
const mcps = [];
|
|
512
|
+
for (const cap of capabilities) {
|
|
513
|
+
switch (cap.capabilityType) {
|
|
514
|
+
case "SKILL":
|
|
515
|
+
skills.push(capabilityToSkillWithLatestVersion(cap));
|
|
516
|
+
break;
|
|
517
|
+
case "SUB_AGENT":
|
|
518
|
+
subAgents.push(capabilityToSubAgent(cap));
|
|
519
|
+
break;
|
|
520
|
+
case "RULE":
|
|
521
|
+
rules.push(capabilityToRule(cap));
|
|
522
|
+
break;
|
|
523
|
+
case "MCP":
|
|
524
|
+
mcps.push(capabilityToMcp(cap));
|
|
525
|
+
break;
|
|
526
|
+
}
|
|
527
|
+
}
|
|
528
|
+
return { skills, subAgents, rules, mcps };
|
|
529
|
+
}
|
|
530
|
+
|
|
369
531
|
// src/test/index.ts
|
|
370
|
-
import { z as
|
|
532
|
+
import { z as z21 } from "zod";
|
|
371
533
|
|
|
372
534
|
// src/test/base.ts
|
|
373
|
-
import { z as
|
|
535
|
+
import { z as z11 } from "zod";
|
|
374
536
|
var TestType = /* @__PURE__ */ ((TestType2) => {
|
|
375
537
|
TestType2["LLM"] = "LLM";
|
|
376
538
|
TestType2["TOOL"] = "TOOL";
|
|
@@ -383,7 +545,7 @@ var TestType = /* @__PURE__ */ ((TestType2) => {
|
|
|
383
545
|
TestType2["PLAYWRIGHT_NL"] = "PLAYWRIGHT_NL";
|
|
384
546
|
return TestType2;
|
|
385
547
|
})(TestType || {});
|
|
386
|
-
var TestTypeSchema =
|
|
548
|
+
var TestTypeSchema = z11.enum(TestType);
|
|
387
549
|
var TestImportance = /* @__PURE__ */ ((TestImportance2) => {
|
|
388
550
|
TestImportance2["LOW"] = "low";
|
|
389
551
|
TestImportance2["MEDIUM"] = "medium";
|
|
@@ -391,153 +553,153 @@ var TestImportance = /* @__PURE__ */ ((TestImportance2) => {
|
|
|
391
553
|
TestImportance2["CRITICAL"] = "critical";
|
|
392
554
|
return TestImportance2;
|
|
393
555
|
})(TestImportance || {});
|
|
394
|
-
var TestImportanceSchema =
|
|
395
|
-
var BaseTestSchema =
|
|
396
|
-
id:
|
|
556
|
+
var TestImportanceSchema = z11.enum(TestImportance);
|
|
557
|
+
var BaseTestSchema = z11.object({
|
|
558
|
+
id: z11.string(),
|
|
397
559
|
type: TestTypeSchema,
|
|
398
|
-
name:
|
|
399
|
-
description:
|
|
560
|
+
name: z11.string().min(3),
|
|
561
|
+
description: z11.string().optional(),
|
|
400
562
|
importance: TestImportanceSchema.optional()
|
|
401
563
|
});
|
|
402
564
|
|
|
403
565
|
// src/test/llm.ts
|
|
404
|
-
import { z as
|
|
566
|
+
import { z as z12 } from "zod";
|
|
405
567
|
var LLMTestSchema = BaseTestSchema.extend({
|
|
406
|
-
type:
|
|
568
|
+
type: z12.literal("LLM" /* LLM */),
|
|
407
569
|
/** Maximum steps for the LLM to take */
|
|
408
|
-
maxSteps:
|
|
570
|
+
maxSteps: z12.number().min(1).max(100),
|
|
409
571
|
/** Prompt to send to the evaluator */
|
|
410
|
-
prompt:
|
|
572
|
+
prompt: z12.string().min(1),
|
|
411
573
|
/** ID of the evaluator agent to use */
|
|
412
|
-
evaluatorId:
|
|
574
|
+
evaluatorId: z12.string()
|
|
413
575
|
});
|
|
414
576
|
|
|
415
577
|
// src/test/tool.ts
|
|
416
|
-
import { z as
|
|
578
|
+
import { z as z13 } from "zod";
|
|
417
579
|
var ToolTestSchema = BaseTestSchema.extend({
|
|
418
|
-
type:
|
|
580
|
+
type: z13.literal("TOOL" /* TOOL */),
|
|
419
581
|
/** Name of the tool that should be called */
|
|
420
|
-
toolName:
|
|
582
|
+
toolName: z13.string().min(3),
|
|
421
583
|
/** Expected arguments for the tool call */
|
|
422
|
-
args:
|
|
584
|
+
args: z13.record(z13.string(), z13.any()),
|
|
423
585
|
/** Expected content in the tool results */
|
|
424
|
-
resultsContent:
|
|
586
|
+
resultsContent: z13.string()
|
|
425
587
|
});
|
|
426
588
|
|
|
427
589
|
// src/test/site-config.ts
|
|
428
|
-
import { z as
|
|
590
|
+
import { z as z14 } from "zod";
|
|
429
591
|
var SiteConfigTestSchema = BaseTestSchema.extend({
|
|
430
|
-
type:
|
|
592
|
+
type: z14.literal("SITE_CONFIG" /* SITE_CONFIG */),
|
|
431
593
|
/** URL to call */
|
|
432
|
-
url:
|
|
594
|
+
url: z14.string().url(),
|
|
433
595
|
/** HTTP method */
|
|
434
|
-
method:
|
|
596
|
+
method: z14.enum(["GET", "POST"]),
|
|
435
597
|
/** Request body (for POST) */
|
|
436
|
-
body:
|
|
598
|
+
body: z14.string().optional(),
|
|
437
599
|
/** Expected HTTP status code */
|
|
438
|
-
expectedStatusCode:
|
|
600
|
+
expectedStatusCode: z14.number().int().min(100).max(599),
|
|
439
601
|
/** Expected response content */
|
|
440
|
-
expectedResponse:
|
|
602
|
+
expectedResponse: z14.string().optional(),
|
|
441
603
|
/** JMESPath expression to extract from response */
|
|
442
|
-
expectedResponseJMESPath:
|
|
604
|
+
expectedResponseJMESPath: z14.string().optional()
|
|
443
605
|
});
|
|
444
606
|
|
|
445
607
|
// src/test/command-execution.ts
|
|
446
|
-
import { z as
|
|
608
|
+
import { z as z15 } from "zod";
|
|
447
609
|
var AllowedCommands = [
|
|
448
610
|
"yarn install --no-immutable && yarn build",
|
|
449
611
|
"npm run build",
|
|
450
612
|
"yarn typecheck"
|
|
451
613
|
];
|
|
452
614
|
var CommandExecutionTestSchema = BaseTestSchema.extend({
|
|
453
|
-
type:
|
|
615
|
+
type: z15.literal("COMMAND_EXECUTION" /* COMMAND_EXECUTION */),
|
|
454
616
|
/** Command to execute (must be in AllowedCommands) */
|
|
455
|
-
command:
|
|
617
|
+
command: z15.string().refine((value) => AllowedCommands.includes(value), {
|
|
456
618
|
message: `Command must be one of: ${AllowedCommands.join(", ")}`
|
|
457
619
|
}),
|
|
458
620
|
/** Expected exit code (default: 0) */
|
|
459
|
-
expectedExitCode:
|
|
621
|
+
expectedExitCode: z15.number().default(0).optional()
|
|
460
622
|
});
|
|
461
623
|
|
|
462
624
|
// src/test/file-presence.ts
|
|
463
|
-
import { z as
|
|
625
|
+
import { z as z16 } from "zod";
|
|
464
626
|
var FilePresenceTestSchema = BaseTestSchema.extend({
|
|
465
|
-
type:
|
|
627
|
+
type: z16.literal("FILE_PRESENCE" /* FILE_PRESENCE */),
|
|
466
628
|
/** Paths to check */
|
|
467
|
-
paths:
|
|
629
|
+
paths: z16.array(z16.string()),
|
|
468
630
|
/** Whether files should exist (true) or not exist (false) */
|
|
469
|
-
shouldExist:
|
|
631
|
+
shouldExist: z16.boolean()
|
|
470
632
|
});
|
|
471
633
|
|
|
472
634
|
// src/test/file-content.ts
|
|
473
|
-
import { z as
|
|
474
|
-
var FileContentCheckSchema =
|
|
635
|
+
import { z as z17 } from "zod";
|
|
636
|
+
var FileContentCheckSchema = z17.object({
|
|
475
637
|
/** Strings that must be present in the file */
|
|
476
|
-
contains:
|
|
638
|
+
contains: z17.array(z17.string()).optional(),
|
|
477
639
|
/** Strings that must NOT be present in the file */
|
|
478
|
-
notContains:
|
|
640
|
+
notContains: z17.array(z17.string()).optional(),
|
|
479
641
|
/** Regex pattern the content must match */
|
|
480
|
-
matches:
|
|
642
|
+
matches: z17.string().optional(),
|
|
481
643
|
/** JSON path checks for structured content */
|
|
482
|
-
jsonPath:
|
|
483
|
-
|
|
484
|
-
path:
|
|
485
|
-
value:
|
|
644
|
+
jsonPath: z17.array(
|
|
645
|
+
z17.object({
|
|
646
|
+
path: z17.string(),
|
|
647
|
+
value: z17.unknown()
|
|
486
648
|
})
|
|
487
649
|
).optional(),
|
|
488
650
|
/** Lines that should be added (for diff checking) */
|
|
489
|
-
added:
|
|
651
|
+
added: z17.array(z17.string()).optional(),
|
|
490
652
|
/** Lines that should be removed (for diff checking) */
|
|
491
|
-
removed:
|
|
653
|
+
removed: z17.array(z17.string()).optional()
|
|
492
654
|
});
|
|
493
655
|
var FileContentTestSchema = BaseTestSchema.extend({
|
|
494
|
-
type:
|
|
656
|
+
type: z17.literal("FILE_CONTENT" /* FILE_CONTENT */),
|
|
495
657
|
/** Path to the file to check */
|
|
496
|
-
path:
|
|
658
|
+
path: z17.string(),
|
|
497
659
|
/** Content checks to perform */
|
|
498
660
|
checks: FileContentCheckSchema
|
|
499
661
|
});
|
|
500
662
|
|
|
501
663
|
// src/test/build-check.ts
|
|
502
|
-
import { z as
|
|
664
|
+
import { z as z18 } from "zod";
|
|
503
665
|
var BuildCheckTestSchema = BaseTestSchema.extend({
|
|
504
|
-
type:
|
|
666
|
+
type: z18.literal("BUILD_CHECK" /* BUILD_CHECK */),
|
|
505
667
|
/** Build command to execute */
|
|
506
|
-
command:
|
|
668
|
+
command: z18.string(),
|
|
507
669
|
/** Whether the build should succeed */
|
|
508
|
-
expectSuccess:
|
|
670
|
+
expectSuccess: z18.boolean(),
|
|
509
671
|
/** Maximum allowed warnings (optional) */
|
|
510
|
-
allowedWarnings:
|
|
672
|
+
allowedWarnings: z18.number().optional(),
|
|
511
673
|
/** Timeout in milliseconds */
|
|
512
|
-
timeout:
|
|
674
|
+
timeout: z18.number().optional()
|
|
513
675
|
});
|
|
514
676
|
|
|
515
677
|
// src/test/vitest.ts
|
|
516
|
-
import { z as
|
|
678
|
+
import { z as z19 } from "zod";
|
|
517
679
|
var VitestTestSchema = BaseTestSchema.extend({
|
|
518
|
-
type:
|
|
680
|
+
type: z19.literal("VITEST" /* VITEST */),
|
|
519
681
|
/** Test file content */
|
|
520
|
-
testFile:
|
|
682
|
+
testFile: z19.string(),
|
|
521
683
|
/** Name of the test file */
|
|
522
|
-
testFileName:
|
|
684
|
+
testFileName: z19.string(),
|
|
523
685
|
/** Minimum pass rate required (0-100) */
|
|
524
|
-
minPassRate:
|
|
686
|
+
minPassRate: z19.number().min(0).max(100)
|
|
525
687
|
});
|
|
526
688
|
|
|
527
689
|
// src/test/playwright-nl.ts
|
|
528
|
-
import { z as
|
|
690
|
+
import { z as z20 } from "zod";
|
|
529
691
|
var PlaywrightNLTestSchema = BaseTestSchema.extend({
|
|
530
|
-
type:
|
|
692
|
+
type: z20.literal("PLAYWRIGHT_NL" /* PLAYWRIGHT_NL */),
|
|
531
693
|
/** Natural language steps to execute */
|
|
532
|
-
steps:
|
|
694
|
+
steps: z20.array(z20.string()),
|
|
533
695
|
/** Expected outcome description */
|
|
534
|
-
expectedOutcome:
|
|
696
|
+
expectedOutcome: z20.string(),
|
|
535
697
|
/** Timeout in milliseconds */
|
|
536
|
-
timeout:
|
|
698
|
+
timeout: z20.number().optional()
|
|
537
699
|
});
|
|
538
700
|
|
|
539
701
|
// src/test/index.ts
|
|
540
|
-
var TestSchema =
|
|
702
|
+
var TestSchema = z21.discriminatedUnion("type", [
|
|
541
703
|
LLMTestSchema,
|
|
542
704
|
ToolTestSchema,
|
|
543
705
|
SiteConfigTestSchema,
|
|
@@ -550,33 +712,33 @@ var TestSchema = z20.discriminatedUnion("type", [
|
|
|
550
712
|
]);
|
|
551
713
|
|
|
552
714
|
// src/scenario/environment.ts
|
|
553
|
-
import { z as
|
|
554
|
-
var LocalProjectConfigSchema =
|
|
715
|
+
import { z as z22 } from "zod";
|
|
716
|
+
var LocalProjectConfigSchema = z22.object({
|
|
555
717
|
/** Template ID to use for the local project */
|
|
556
|
-
templateId:
|
|
718
|
+
templateId: z22.string().optional(),
|
|
557
719
|
/** Files to create in the project */
|
|
558
|
-
files:
|
|
559
|
-
|
|
560
|
-
path:
|
|
561
|
-
content:
|
|
720
|
+
files: z22.array(
|
|
721
|
+
z22.object({
|
|
722
|
+
path: z22.string().min(1),
|
|
723
|
+
content: z22.string().min(1)
|
|
562
724
|
})
|
|
563
725
|
).optional()
|
|
564
726
|
});
|
|
565
|
-
var MetaSiteConfigSchema =
|
|
566
|
-
configurations:
|
|
567
|
-
|
|
568
|
-
name:
|
|
569
|
-
apiCalls:
|
|
570
|
-
|
|
571
|
-
url:
|
|
572
|
-
method:
|
|
573
|
-
body:
|
|
727
|
+
var MetaSiteConfigSchema = z22.object({
|
|
728
|
+
configurations: z22.array(
|
|
729
|
+
z22.object({
|
|
730
|
+
name: z22.string().min(1),
|
|
731
|
+
apiCalls: z22.array(
|
|
732
|
+
z22.object({
|
|
733
|
+
url: z22.string().url(),
|
|
734
|
+
method: z22.enum(["POST", "PUT"]),
|
|
735
|
+
body: z22.string()
|
|
574
736
|
})
|
|
575
737
|
)
|
|
576
738
|
})
|
|
577
739
|
).optional()
|
|
578
740
|
});
|
|
579
|
-
var EnvironmentSchema =
|
|
741
|
+
var EnvironmentSchema = z22.object({
|
|
580
742
|
/** Local project configuration */
|
|
581
743
|
localProject: LocalProjectConfigSchema.optional(),
|
|
582
744
|
/** Meta site configuration */
|
|
@@ -584,13 +746,13 @@ var EnvironmentSchema = z21.object({
|
|
|
584
746
|
});
|
|
585
747
|
|
|
586
748
|
// src/scenario/test-scenario.ts
|
|
587
|
-
import { z as
|
|
749
|
+
import { z as z25 } from "zod";
|
|
588
750
|
|
|
589
751
|
// src/assertion/assertion.ts
|
|
590
|
-
import { z as
|
|
752
|
+
import { z as z24 } from "zod";
|
|
591
753
|
|
|
592
754
|
// src/assertion/build-passed-command.ts
|
|
593
|
-
import { z as
|
|
755
|
+
import { z as z23 } from "zod";
|
|
594
756
|
var ALLOWED_BUILD_COMMANDS = [
|
|
595
757
|
"yarn build",
|
|
596
758
|
"npm run build",
|
|
@@ -616,10 +778,10 @@ function parseBuildCommandToArgv(command) {
|
|
|
616
778
|
return BUILD_COMMAND_ARGV[trimmed];
|
|
617
779
|
}
|
|
618
780
|
var enumTuple = ALLOWED_BUILD_COMMANDS;
|
|
619
|
-
var BuildPassedCommandStringSchema =
|
|
781
|
+
var BuildPassedCommandStringSchema = z23.enum(enumTuple);
|
|
620
782
|
|
|
621
783
|
// src/assertion/assertion.ts
|
|
622
|
-
var AssertionTypeSchema =
|
|
784
|
+
var AssertionTypeSchema = z24.enum([
|
|
623
785
|
"skill_was_called",
|
|
624
786
|
"tool_called_with_param",
|
|
625
787
|
"build_passed",
|
|
@@ -628,61 +790,61 @@ var AssertionTypeSchema = z23.enum([
|
|
|
628
790
|
"llm_judge",
|
|
629
791
|
"api_call"
|
|
630
792
|
]);
|
|
631
|
-
var AssertionParameterTypeSchema =
|
|
793
|
+
var AssertionParameterTypeSchema = z24.enum([
|
|
632
794
|
"string",
|
|
633
795
|
"number",
|
|
634
796
|
"boolean"
|
|
635
797
|
]);
|
|
636
|
-
var AssertionParameterSchema =
|
|
798
|
+
var AssertionParameterSchema = z24.object({
|
|
637
799
|
/** Parameter name (used as key in params object) */
|
|
638
|
-
name:
|
|
800
|
+
name: z24.string().min(1),
|
|
639
801
|
/** Display label for the parameter */
|
|
640
|
-
label:
|
|
802
|
+
label: z24.string().min(1),
|
|
641
803
|
/** Parameter type */
|
|
642
804
|
type: AssertionParameterTypeSchema,
|
|
643
805
|
/** Whether this parameter is required */
|
|
644
|
-
required:
|
|
806
|
+
required: z24.boolean(),
|
|
645
807
|
/** Default value (optional, used when not provided) */
|
|
646
|
-
defaultValue:
|
|
808
|
+
defaultValue: z24.union([z24.string(), z24.number(), z24.boolean()]).optional(),
|
|
647
809
|
/** If true, parameter is hidden by default behind "Show advanced options" */
|
|
648
|
-
advanced:
|
|
810
|
+
advanced: z24.boolean().optional()
|
|
649
811
|
});
|
|
650
|
-
var ScenarioAssertionLinkSchema =
|
|
812
|
+
var ScenarioAssertionLinkSchema = z24.object({
|
|
651
813
|
/** ID of the system assertion (e.g., 'system:skill_was_called') */
|
|
652
|
-
assertionId:
|
|
814
|
+
assertionId: z24.string(),
|
|
653
815
|
/** Parameter values for this assertion in this scenario */
|
|
654
|
-
params:
|
|
655
|
-
|
|
656
|
-
|
|
816
|
+
params: z24.record(
|
|
817
|
+
z24.string(),
|
|
818
|
+
z24.union([z24.string(), z24.number(), z24.boolean(), z24.null()])
|
|
657
819
|
).optional()
|
|
658
820
|
});
|
|
659
|
-
var SkillWasCalledConfigSchema =
|
|
821
|
+
var SkillWasCalledConfigSchema = z24.object({
|
|
660
822
|
/** Names of the skills that must have been called */
|
|
661
|
-
skillNames:
|
|
823
|
+
skillNames: z24.array(z24.string().min(1)).min(1)
|
|
662
824
|
});
|
|
663
|
-
var CostConfigSchema =
|
|
825
|
+
var CostConfigSchema = z24.strictObject({
|
|
664
826
|
/** Maximum allowed cost in USD */
|
|
665
|
-
maxCostUsd:
|
|
827
|
+
maxCostUsd: z24.number().positive()
|
|
666
828
|
});
|
|
667
|
-
var ToolCalledWithParamConfigSchema =
|
|
829
|
+
var ToolCalledWithParamConfigSchema = z24.strictObject({
|
|
668
830
|
/** Name of the tool that must have been called */
|
|
669
|
-
toolName:
|
|
831
|
+
toolName: z24.string().min(1),
|
|
670
832
|
/** JSON string of key-value pairs for expected parameters (substring match). Optional — when omitted, only checks tool presence. */
|
|
671
|
-
expectedParams:
|
|
833
|
+
expectedParams: z24.string().min(1).optional(),
|
|
672
834
|
/** If true, the matching tool call must also have succeeded (step.success === true) */
|
|
673
|
-
requireSuccess:
|
|
835
|
+
requireSuccess: z24.boolean().optional()
|
|
674
836
|
});
|
|
675
|
-
var BuildPassedConfigSchema =
|
|
837
|
+
var BuildPassedConfigSchema = z24.strictObject({
|
|
676
838
|
/** Allowlisted command only (default at runtime: "yarn build") */
|
|
677
839
|
command: BuildPassedCommandStringSchema.optional(),
|
|
678
840
|
/** Expected exit code (default: 0) */
|
|
679
|
-
expectedExitCode:
|
|
841
|
+
expectedExitCode: z24.number().int().optional()
|
|
680
842
|
});
|
|
681
|
-
var TimeConfigSchema =
|
|
843
|
+
var TimeConfigSchema = z24.strictObject({
|
|
682
844
|
/** Maximum allowed duration in milliseconds */
|
|
683
|
-
maxDurationMs:
|
|
845
|
+
maxDurationMs: z24.number().int().positive()
|
|
684
846
|
});
|
|
685
|
-
var LlmJudgeConfigSchema =
|
|
847
|
+
var LlmJudgeConfigSchema = z24.object({
|
|
686
848
|
/**
|
|
687
849
|
* Prompt template with placeholders:
|
|
688
850
|
* - {{output}}: agent's final output
|
|
@@ -693,65 +855,65 @@ var LlmJudgeConfigSchema = z23.object({
|
|
|
693
855
|
* - {{trace}}: step-by-step trace of tool calls
|
|
694
856
|
* - Custom parameters defined in the parameters array
|
|
695
857
|
*/
|
|
696
|
-
prompt:
|
|
858
|
+
prompt: z24.string().min(1),
|
|
697
859
|
/** Minimum score to pass (0-10, default 7) */
|
|
698
|
-
minScore:
|
|
860
|
+
minScore: z24.number().int().min(0).max(10).optional(),
|
|
699
861
|
/** Model for the judge (e.g. claude-3-5-haiku-20241022) */
|
|
700
|
-
model:
|
|
862
|
+
model: z24.string().optional(),
|
|
701
863
|
/** Max output tokens */
|
|
702
|
-
maxTokens:
|
|
864
|
+
maxTokens: z24.number().int().optional(),
|
|
703
865
|
/** Temperature (0-1) */
|
|
704
|
-
temperature:
|
|
866
|
+
temperature: z24.number().min(0).max(1).optional(),
|
|
705
867
|
/** User-defined parameters for this assertion */
|
|
706
|
-
parameters:
|
|
868
|
+
parameters: z24.array(AssertionParameterSchema).optional()
|
|
707
869
|
});
|
|
708
|
-
var ApiCallConfigSchema =
|
|
870
|
+
var ApiCallConfigSchema = z24.strictObject({
|
|
709
871
|
/** URL to call */
|
|
710
|
-
url:
|
|
872
|
+
url: z24.string().min(1),
|
|
711
873
|
/** HTTP method (default GET) */
|
|
712
|
-
method:
|
|
874
|
+
method: z24.enum(["GET", "POST"]).optional(),
|
|
713
875
|
/** Request body (JSON string, for POST requests) */
|
|
714
|
-
requestBody:
|
|
876
|
+
requestBody: z24.string().optional(),
|
|
715
877
|
/** Expected JSON response to validate against (subset match — extra fields in actual are OK) */
|
|
716
|
-
expectedResponse:
|
|
878
|
+
expectedResponse: z24.string().min(1),
|
|
717
879
|
/** Request headers as JSON string of key-value pairs */
|
|
718
|
-
requestHeaders:
|
|
880
|
+
requestHeaders: z24.string().optional(),
|
|
719
881
|
/** Request timeout in milliseconds (default 30000) */
|
|
720
|
-
timeoutMs:
|
|
882
|
+
timeoutMs: z24.number().int().positive().optional()
|
|
721
883
|
});
|
|
722
884
|
var AssertionBaseFields = {
|
|
723
885
|
/** When true, the assertion's pass/fail logic is inverted (NOT operator). */
|
|
724
|
-
negate:
|
|
886
|
+
negate: z24.boolean().optional()
|
|
725
887
|
};
|
|
726
888
|
var SkillWasCalledAssertionSchema = SkillWasCalledConfigSchema.extend({
|
|
727
|
-
type:
|
|
889
|
+
type: z24.literal("skill_was_called"),
|
|
728
890
|
...AssertionBaseFields
|
|
729
891
|
});
|
|
730
892
|
var ToolCalledWithParamAssertionSchema = ToolCalledWithParamConfigSchema.extend({
|
|
731
|
-
type:
|
|
893
|
+
type: z24.literal("tool_called_with_param"),
|
|
732
894
|
...AssertionBaseFields
|
|
733
895
|
});
|
|
734
896
|
var BuildPassedAssertionSchema = BuildPassedConfigSchema.extend({
|
|
735
|
-
type:
|
|
897
|
+
type: z24.literal("build_passed"),
|
|
736
898
|
...AssertionBaseFields
|
|
737
899
|
});
|
|
738
900
|
var CostAssertionSchema = CostConfigSchema.extend({
|
|
739
|
-
type:
|
|
901
|
+
type: z24.literal("cost"),
|
|
740
902
|
...AssertionBaseFields
|
|
741
903
|
});
|
|
742
904
|
var LlmJudgeAssertionSchema = LlmJudgeConfigSchema.extend({
|
|
743
|
-
type:
|
|
905
|
+
type: z24.literal("llm_judge"),
|
|
744
906
|
...AssertionBaseFields
|
|
745
907
|
});
|
|
746
908
|
var ApiCallAssertionSchema = ApiCallConfigSchema.extend({
|
|
747
|
-
type:
|
|
909
|
+
type: z24.literal("api_call"),
|
|
748
910
|
...AssertionBaseFields
|
|
749
911
|
});
|
|
750
912
|
var TimeAssertionSchema = TimeConfigSchema.extend({
|
|
751
|
-
type:
|
|
913
|
+
type: z24.literal("time_limit"),
|
|
752
914
|
...AssertionBaseFields
|
|
753
915
|
});
|
|
754
|
-
var AssertionSchema =
|
|
916
|
+
var AssertionSchema = z24.union([
|
|
755
917
|
SkillWasCalledAssertionSchema,
|
|
756
918
|
ToolCalledWithParamAssertionSchema,
|
|
757
919
|
BuildPassedAssertionSchema,
|
|
@@ -760,7 +922,7 @@ var AssertionSchema = z23.union([
|
|
|
760
922
|
LlmJudgeAssertionSchema,
|
|
761
923
|
ApiCallAssertionSchema
|
|
762
924
|
]);
|
|
763
|
-
var AssertionConfigSchema =
|
|
925
|
+
var AssertionConfigSchema = z24.union([
|
|
764
926
|
LlmJudgeConfigSchema,
|
|
765
927
|
// requires prompt - check first
|
|
766
928
|
SkillWasCalledConfigSchema,
|
|
@@ -775,7 +937,7 @@ var AssertionConfigSchema = z23.union([
|
|
|
775
937
|
// requires maxCostUsd, uses strictObject
|
|
776
938
|
BuildPassedConfigSchema,
|
|
777
939
|
// all optional, uses strictObject to reject unknown keys
|
|
778
|
-
|
|
940
|
+
z24.object({})
|
|
779
941
|
// fallback empty config
|
|
780
942
|
]);
|
|
781
943
|
function validateAssertionConfig(type, config) {
|
|
@@ -1021,35 +1183,35 @@ function getSystemAssertion(id) {
|
|
|
1021
1183
|
|
|
1022
1184
|
// src/scenario/test-scenario.ts
|
|
1023
1185
|
var MAX_IMAGE_BASE64_LENGTH = 4 * Math.ceil(2 * 1024 * 1024 / 3);
|
|
1024
|
-
var TriggerPromptImageSchema =
|
|
1186
|
+
var TriggerPromptImageSchema = z25.object({
|
|
1025
1187
|
/** Base64-encoded image data (no data URL prefix) */
|
|
1026
|
-
base64:
|
|
1188
|
+
base64: z25.string().max(MAX_IMAGE_BASE64_LENGTH, "Image exceeds 2 MB size limit"),
|
|
1027
1189
|
/** MIME type of the image */
|
|
1028
|
-
mediaType:
|
|
1190
|
+
mediaType: z25.enum(["image/jpeg", "image/png", "image/gif", "image/webp"]),
|
|
1029
1191
|
/** Original filename of the image */
|
|
1030
|
-
name:
|
|
1192
|
+
name: z25.string()
|
|
1031
1193
|
});
|
|
1032
|
-
var ExpectedFileSchema =
|
|
1194
|
+
var ExpectedFileSchema = z25.object({
|
|
1033
1195
|
/** Relative path where the file should be created */
|
|
1034
|
-
path:
|
|
1196
|
+
path: z25.string(),
|
|
1035
1197
|
/** Optional expected content */
|
|
1036
|
-
content:
|
|
1198
|
+
content: z25.string().optional()
|
|
1037
1199
|
});
|
|
1038
1200
|
var TestScenarioSchema = TenantEntitySchema.extend({
|
|
1039
1201
|
/** The prompt sent to the agent to trigger the task */
|
|
1040
|
-
triggerPrompt:
|
|
1202
|
+
triggerPrompt: z25.string().min(10),
|
|
1041
1203
|
/** ID of the template to use for this scenario (null = no template) */
|
|
1042
|
-
templateId:
|
|
1204
|
+
templateId: z25.string().nullish(),
|
|
1043
1205
|
/** Inline assertions to evaluate for this scenario (legacy) */
|
|
1044
|
-
assertions:
|
|
1206
|
+
assertions: z25.array(AssertionSchema).optional(),
|
|
1045
1207
|
/** IDs of saved assertions to evaluate (from assertions table) - legacy, use assertionLinks */
|
|
1046
|
-
assertionIds:
|
|
1208
|
+
assertionIds: z25.array(z25.string()).optional(),
|
|
1047
1209
|
/** Linked assertions with per-scenario parameter values */
|
|
1048
|
-
assertionLinks:
|
|
1210
|
+
assertionLinks: z25.array(ScenarioAssertionLinkSchema).optional(),
|
|
1049
1211
|
/** Tags for categorisation and filtering */
|
|
1050
|
-
tags:
|
|
1212
|
+
tags: z25.array(z25.string()).optional(),
|
|
1051
1213
|
/** Base64-encoded images attached to the trigger prompt (max 3) */
|
|
1052
|
-
triggerPromptImages:
|
|
1214
|
+
triggerPromptImages: z25.array(TriggerPromptImageSchema).max(3).optional()
|
|
1053
1215
|
});
|
|
1054
1216
|
function validateBuildPassedParamsInAssertionLinks(links, ctx) {
|
|
1055
1217
|
if (!links) return;
|
|
@@ -1060,7 +1222,7 @@ function validateBuildPassedParamsInAssertionLinks(links, ctx) {
|
|
|
1060
1222
|
if (cmd === void 0 || cmd === null) continue;
|
|
1061
1223
|
if (typeof cmd !== "string") {
|
|
1062
1224
|
ctx.addIssue({
|
|
1063
|
-
code:
|
|
1225
|
+
code: z25.ZodIssueCode.custom,
|
|
1064
1226
|
message: "build_passed command must be a string",
|
|
1065
1227
|
path: ["assertionLinks", i, "params", "command"]
|
|
1066
1228
|
});
|
|
@@ -1068,7 +1230,7 @@ function validateBuildPassedParamsInAssertionLinks(links, ctx) {
|
|
|
1068
1230
|
}
|
|
1069
1231
|
if (!isAllowedBuildCommandString(cmd)) {
|
|
1070
1232
|
ctx.addIssue({
|
|
1071
|
-
code:
|
|
1233
|
+
code: z25.ZodIssueCode.custom,
|
|
1072
1234
|
message: "Invalid build_passed command. Allowed: yarn build, npm run build, pnpm run build, pnpm build",
|
|
1073
1235
|
path: ["assertionLinks", i, "params", "command"]
|
|
1074
1236
|
});
|
|
@@ -1091,19 +1253,19 @@ var UpdateTestScenarioInputSchema = TestScenarioCreateBaseSchema.partial().super
|
|
|
1091
1253
|
});
|
|
1092
1254
|
|
|
1093
1255
|
// src/scenario/batch-import.ts
|
|
1094
|
-
import { z as
|
|
1256
|
+
import { z as z26 } from "zod";
|
|
1095
1257
|
var UUID_REGEX = /^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$/i;
|
|
1096
|
-
var BatchAssertionLinkSchema =
|
|
1097
|
-
|
|
1258
|
+
var BatchAssertionLinkSchema = z26.union([
|
|
1259
|
+
z26.string().min(1),
|
|
1098
1260
|
ScenarioAssertionLinkSchema
|
|
1099
1261
|
]);
|
|
1100
|
-
var BatchScenarioEntrySchema =
|
|
1101
|
-
name:
|
|
1102
|
-
description:
|
|
1103
|
-
triggerPrompt:
|
|
1104
|
-
templateId:
|
|
1105
|
-
tags:
|
|
1106
|
-
assertionLinks:
|
|
1262
|
+
var BatchScenarioEntrySchema = z26.object({
|
|
1263
|
+
name: z26.string().min(1, "name: Required"),
|
|
1264
|
+
description: z26.string().optional().default(""),
|
|
1265
|
+
triggerPrompt: z26.string().min(10, "triggerPrompt: Must be at least 10 characters"),
|
|
1266
|
+
templateId: z26.string().nullish(),
|
|
1267
|
+
tags: z26.array(z26.string()).optional(),
|
|
1268
|
+
assertionLinks: z26.array(BatchAssertionLinkSchema).optional()
|
|
1107
1269
|
}).superRefine((data, ctx) => {
|
|
1108
1270
|
if (!data.assertionLinks) return;
|
|
1109
1271
|
const objectLinks = data.assertionLinks.filter(
|
|
@@ -1113,8 +1275,8 @@ var BatchScenarioEntrySchema = z25.object({
|
|
|
1113
1275
|
validateBuildPassedParamsInAssertionLinks(objectLinks, ctx);
|
|
1114
1276
|
}
|
|
1115
1277
|
});
|
|
1116
|
-
var BatchImportPayloadSchema =
|
|
1117
|
-
scenarios:
|
|
1278
|
+
var BatchImportPayloadSchema = z26.object({
|
|
1279
|
+
scenarios: z26.array(BatchScenarioEntrySchema).min(1, "scenarios array must contain at least one entry").max(100, "Maximum 100 scenarios per upload")
|
|
1118
1280
|
});
|
|
1119
1281
|
var BATCH_IMPORT_LIMITS = {
|
|
1120
1282
|
MAX_SCENARIOS: 100,
|
|
@@ -1136,29 +1298,29 @@ function normalizeBatchAssertionLink(link) {
|
|
|
1136
1298
|
}
|
|
1137
1299
|
return link;
|
|
1138
1300
|
}
|
|
1139
|
-
var BatchResultItemSchema =
|
|
1140
|
-
index:
|
|
1141
|
-
name:
|
|
1142
|
-
status:
|
|
1143
|
-
id:
|
|
1144
|
-
errors:
|
|
1145
|
-
});
|
|
1146
|
-
var BatchSummarySchema =
|
|
1147
|
-
total:
|
|
1148
|
-
valid:
|
|
1149
|
-
invalid:
|
|
1150
|
-
created:
|
|
1151
|
-
});
|
|
1152
|
-
var BatchImportResponseSchema =
|
|
1301
|
+
var BatchResultItemSchema = z26.object({
|
|
1302
|
+
index: z26.number(),
|
|
1303
|
+
name: z26.string(),
|
|
1304
|
+
status: z26.enum(["valid", "invalid"]),
|
|
1305
|
+
id: z26.string().nullable().optional(),
|
|
1306
|
+
errors: z26.array(z26.string()).optional()
|
|
1307
|
+
});
|
|
1308
|
+
var BatchSummarySchema = z26.object({
|
|
1309
|
+
total: z26.number(),
|
|
1310
|
+
valid: z26.number(),
|
|
1311
|
+
invalid: z26.number(),
|
|
1312
|
+
created: z26.number()
|
|
1313
|
+
});
|
|
1314
|
+
var BatchImportResponseSchema = z26.object({
|
|
1153
1315
|
summary: BatchSummarySchema,
|
|
1154
|
-
results:
|
|
1316
|
+
results: z26.array(BatchResultItemSchema)
|
|
1155
1317
|
});
|
|
1156
1318
|
|
|
1157
1319
|
// src/suite/test-suite.ts
|
|
1158
|
-
import { z as
|
|
1320
|
+
import { z as z27 } from "zod";
|
|
1159
1321
|
var TestSuiteSchema = TenantEntitySchema.extend({
|
|
1160
1322
|
/** IDs of test scenarios in this suite */
|
|
1161
|
-
scenarioIds:
|
|
1323
|
+
scenarioIds: z27.array(z27.string())
|
|
1162
1324
|
});
|
|
1163
1325
|
var CreateTestSuiteInputSchema = TestSuiteSchema.omit({
|
|
1164
1326
|
id: true,
|
|
@@ -1169,21 +1331,21 @@ var CreateTestSuiteInputSchema = TestSuiteSchema.omit({
|
|
|
1169
1331
|
var UpdateTestSuiteInputSchema = CreateTestSuiteInputSchema.partial();
|
|
1170
1332
|
|
|
1171
1333
|
// src/evaluation/metrics.ts
|
|
1172
|
-
import { z as
|
|
1173
|
-
var TokenUsageSchema =
|
|
1174
|
-
prompt:
|
|
1175
|
-
completion:
|
|
1176
|
-
total:
|
|
1177
|
-
});
|
|
1178
|
-
var EvalMetricsSchema =
|
|
1179
|
-
totalAssertions:
|
|
1180
|
-
passed:
|
|
1181
|
-
failed:
|
|
1182
|
-
skipped:
|
|
1183
|
-
errors:
|
|
1184
|
-
passRate:
|
|
1185
|
-
avgDuration:
|
|
1186
|
-
totalDuration:
|
|
1334
|
+
import { z as z28 } from "zod";
|
|
1335
|
+
var TokenUsageSchema = z28.object({
|
|
1336
|
+
prompt: z28.number(),
|
|
1337
|
+
completion: z28.number(),
|
|
1338
|
+
total: z28.number()
|
|
1339
|
+
});
|
|
1340
|
+
var EvalMetricsSchema = z28.object({
|
|
1341
|
+
totalAssertions: z28.number(),
|
|
1342
|
+
passed: z28.number(),
|
|
1343
|
+
failed: z28.number(),
|
|
1344
|
+
skipped: z28.number(),
|
|
1345
|
+
errors: z28.number(),
|
|
1346
|
+
passRate: z28.number(),
|
|
1347
|
+
avgDuration: z28.number(),
|
|
1348
|
+
totalDuration: z28.number()
|
|
1187
1349
|
});
|
|
1188
1350
|
var EvalStatus = /* @__PURE__ */ ((EvalStatus2) => {
|
|
1189
1351
|
EvalStatus2["PENDING"] = "pending";
|
|
@@ -1193,7 +1355,7 @@ var EvalStatus = /* @__PURE__ */ ((EvalStatus2) => {
|
|
|
1193
1355
|
EvalStatus2["CANCELLED"] = "cancelled";
|
|
1194
1356
|
return EvalStatus2;
|
|
1195
1357
|
})(EvalStatus || {});
|
|
1196
|
-
var EvalStatusSchema =
|
|
1358
|
+
var EvalStatusSchema = z28.enum(EvalStatus);
|
|
1197
1359
|
var LLMStepType = /* @__PURE__ */ ((LLMStepType2) => {
|
|
1198
1360
|
LLMStepType2["COMPLETION"] = "completion";
|
|
1199
1361
|
LLMStepType2["TOOL_USE"] = "tool_use";
|
|
@@ -1201,54 +1363,54 @@ var LLMStepType = /* @__PURE__ */ ((LLMStepType2) => {
|
|
|
1201
1363
|
LLMStepType2["THINKING"] = "thinking";
|
|
1202
1364
|
return LLMStepType2;
|
|
1203
1365
|
})(LLMStepType || {});
|
|
1204
|
-
var LLMTraceStepSchema =
|
|
1205
|
-
id:
|
|
1206
|
-
stepNumber:
|
|
1207
|
-
type:
|
|
1208
|
-
model:
|
|
1209
|
-
provider:
|
|
1210
|
-
startedAt:
|
|
1211
|
-
durationMs:
|
|
1366
|
+
var LLMTraceStepSchema = z28.object({
|
|
1367
|
+
id: z28.string(),
|
|
1368
|
+
stepNumber: z28.number(),
|
|
1369
|
+
type: z28.enum(LLMStepType),
|
|
1370
|
+
model: z28.string(),
|
|
1371
|
+
provider: z28.string(),
|
|
1372
|
+
startedAt: z28.string(),
|
|
1373
|
+
durationMs: z28.number(),
|
|
1212
1374
|
tokenUsage: TokenUsageSchema,
|
|
1213
|
-
costUsd:
|
|
1214
|
-
toolName:
|
|
1215
|
-
toolArguments:
|
|
1216
|
-
inputPreview:
|
|
1217
|
-
outputPreview:
|
|
1218
|
-
success:
|
|
1219
|
-
error:
|
|
1220
|
-
turnIndex:
|
|
1221
|
-
});
|
|
1222
|
-
var LLMBreakdownStatsSchema =
|
|
1223
|
-
count:
|
|
1224
|
-
durationMs:
|
|
1225
|
-
tokens:
|
|
1226
|
-
costUsd:
|
|
1227
|
-
});
|
|
1228
|
-
var LLMTraceSummarySchema =
|
|
1229
|
-
totalSteps:
|
|
1230
|
-
totalTurns:
|
|
1231
|
-
totalDurationMs:
|
|
1375
|
+
costUsd: z28.number(),
|
|
1376
|
+
toolName: z28.string().optional(),
|
|
1377
|
+
toolArguments: z28.string().optional(),
|
|
1378
|
+
inputPreview: z28.string().optional(),
|
|
1379
|
+
outputPreview: z28.string().optional(),
|
|
1380
|
+
success: z28.boolean(),
|
|
1381
|
+
error: z28.string().optional(),
|
|
1382
|
+
turnIndex: z28.number().optional()
|
|
1383
|
+
});
|
|
1384
|
+
var LLMBreakdownStatsSchema = z28.object({
|
|
1385
|
+
count: z28.number(),
|
|
1386
|
+
durationMs: z28.number(),
|
|
1387
|
+
tokens: z28.number(),
|
|
1388
|
+
costUsd: z28.number()
|
|
1389
|
+
});
|
|
1390
|
+
var LLMTraceSummarySchema = z28.object({
|
|
1391
|
+
totalSteps: z28.number(),
|
|
1392
|
+
totalTurns: z28.number().optional(),
|
|
1393
|
+
totalDurationMs: z28.number(),
|
|
1232
1394
|
totalTokens: TokenUsageSchema,
|
|
1233
|
-
totalCostUsd:
|
|
1234
|
-
stepTypeBreakdown:
|
|
1235
|
-
modelBreakdown:
|
|
1236
|
-
modelsUsed:
|
|
1237
|
-
});
|
|
1238
|
-
var LLMTraceSchema =
|
|
1239
|
-
id:
|
|
1240
|
-
steps:
|
|
1395
|
+
totalCostUsd: z28.number(),
|
|
1396
|
+
stepTypeBreakdown: z28.record(z28.string(), LLMBreakdownStatsSchema).optional(),
|
|
1397
|
+
modelBreakdown: z28.record(z28.string(), LLMBreakdownStatsSchema),
|
|
1398
|
+
modelsUsed: z28.array(z28.string())
|
|
1399
|
+
});
|
|
1400
|
+
var LLMTraceSchema = z28.object({
|
|
1401
|
+
id: z28.string(),
|
|
1402
|
+
steps: z28.array(LLMTraceStepSchema),
|
|
1241
1403
|
summary: LLMTraceSummarySchema
|
|
1242
1404
|
});
|
|
1243
1405
|
|
|
1244
1406
|
// src/evaluation/eval-result.ts
|
|
1245
|
-
import { z as
|
|
1407
|
+
import { z as z32 } from "zod";
|
|
1246
1408
|
|
|
1247
1409
|
// src/evaluation/eval-run.ts
|
|
1248
|
-
import { z as
|
|
1410
|
+
import { z as z30 } from "zod";
|
|
1249
1411
|
|
|
1250
1412
|
// src/evaluation/live-trace.ts
|
|
1251
|
-
import { z as
|
|
1413
|
+
import { z as z29 } from "zod";
|
|
1252
1414
|
var LiveTraceEventType = /* @__PURE__ */ ((LiveTraceEventType2) => {
|
|
1253
1415
|
LiveTraceEventType2["THINKING"] = "thinking";
|
|
1254
1416
|
LiveTraceEventType2["TOOL_USE"] = "tool_use";
|
|
@@ -1262,37 +1424,37 @@ var LiveTraceEventType = /* @__PURE__ */ ((LiveTraceEventType2) => {
|
|
|
1262
1424
|
LiveTraceEventType2["USER"] = "user";
|
|
1263
1425
|
return LiveTraceEventType2;
|
|
1264
1426
|
})(LiveTraceEventType || {});
|
|
1265
|
-
var LiveTraceEventSchema =
|
|
1427
|
+
var LiveTraceEventSchema = z29.object({
|
|
1266
1428
|
/** The evaluation run ID */
|
|
1267
|
-
evalRunId:
|
|
1429
|
+
evalRunId: z29.string(),
|
|
1268
1430
|
/** The scenario ID being executed */
|
|
1269
|
-
scenarioId:
|
|
1431
|
+
scenarioId: z29.string(),
|
|
1270
1432
|
/** The scenario name for display */
|
|
1271
|
-
scenarioName:
|
|
1433
|
+
scenarioName: z29.string(),
|
|
1272
1434
|
/** The target ID (skill, agent, etc.) */
|
|
1273
|
-
targetId:
|
|
1435
|
+
targetId: z29.string(),
|
|
1274
1436
|
/** The target name for display */
|
|
1275
|
-
targetName:
|
|
1437
|
+
targetName: z29.string(),
|
|
1276
1438
|
/** Step number in the current scenario execution */
|
|
1277
|
-
stepNumber:
|
|
1439
|
+
stepNumber: z29.number(),
|
|
1278
1440
|
/** Type of trace event */
|
|
1279
|
-
type:
|
|
1441
|
+
type: z29.enum(LiveTraceEventType),
|
|
1280
1442
|
/** Tool name if this is a tool_use event */
|
|
1281
|
-
toolName:
|
|
1443
|
+
toolName: z29.string().optional(),
|
|
1282
1444
|
/** Tool arguments preview (truncated JSON) */
|
|
1283
|
-
toolArgs:
|
|
1445
|
+
toolArgs: z29.string().optional(),
|
|
1284
1446
|
/** Output preview (truncated text) */
|
|
1285
|
-
outputPreview:
|
|
1447
|
+
outputPreview: z29.string().optional(),
|
|
1286
1448
|
/** File path for file operations */
|
|
1287
|
-
filePath:
|
|
1449
|
+
filePath: z29.string().optional(),
|
|
1288
1450
|
/** Elapsed time in milliseconds for progress events */
|
|
1289
|
-
elapsedMs:
|
|
1451
|
+
elapsedMs: z29.number().optional(),
|
|
1290
1452
|
/** Thinking/reasoning text from Claude */
|
|
1291
|
-
thinking:
|
|
1453
|
+
thinking: z29.string().optional(),
|
|
1292
1454
|
/** Timestamp when this event occurred */
|
|
1293
|
-
timestamp:
|
|
1455
|
+
timestamp: z29.string(),
|
|
1294
1456
|
/** Whether this is the final event for this scenario */
|
|
1295
|
-
isComplete:
|
|
1457
|
+
isComplete: z29.boolean()
|
|
1296
1458
|
});
|
|
1297
1459
|
var TRACE_EVENT_PREFIX = "TRACE_EVENT:";
|
|
1298
1460
|
function parseTraceEventLine(line) {
|
|
@@ -1321,40 +1483,40 @@ var TriggerType = /* @__PURE__ */ ((TriggerType2) => {
|
|
|
1321
1483
|
TriggerType2["SCHEDULED"] = "SCHEDULED";
|
|
1322
1484
|
return TriggerType2;
|
|
1323
1485
|
})(TriggerType || {});
|
|
1324
|
-
var TriggerMetadataSchema =
|
|
1325
|
-
version:
|
|
1326
|
-
resourceUpdated:
|
|
1327
|
-
scheduleId:
|
|
1486
|
+
var TriggerMetadataSchema = z30.object({
|
|
1487
|
+
version: z30.string().optional(),
|
|
1488
|
+
resourceUpdated: z30.array(z30.string()).optional(),
|
|
1489
|
+
scheduleId: z30.string().optional()
|
|
1328
1490
|
});
|
|
1329
|
-
var TriggerSchema =
|
|
1330
|
-
id:
|
|
1491
|
+
var TriggerSchema = z30.object({
|
|
1492
|
+
id: z30.string(),
|
|
1331
1493
|
metadata: TriggerMetadataSchema.optional(),
|
|
1332
|
-
type:
|
|
1494
|
+
type: z30.nativeEnum(TriggerType)
|
|
1333
1495
|
});
|
|
1334
|
-
var DiffLineTypeSchema =
|
|
1335
|
-
var DiffLineSchema =
|
|
1496
|
+
var DiffLineTypeSchema = z30.enum(["added", "removed", "unchanged"]);
|
|
1497
|
+
var DiffLineSchema = z30.object({
|
|
1336
1498
|
type: DiffLineTypeSchema,
|
|
1337
|
-
content:
|
|
1338
|
-
lineNumber:
|
|
1339
|
-
});
|
|
1340
|
-
var DiffContentSchema =
|
|
1341
|
-
path:
|
|
1342
|
-
expected:
|
|
1343
|
-
actual:
|
|
1344
|
-
diffLines:
|
|
1345
|
-
renamedFrom:
|
|
1499
|
+
content: z30.string(),
|
|
1500
|
+
lineNumber: z30.number()
|
|
1501
|
+
});
|
|
1502
|
+
var DiffContentSchema = z30.object({
|
|
1503
|
+
path: z30.string(),
|
|
1504
|
+
expected: z30.string(),
|
|
1505
|
+
actual: z30.string(),
|
|
1506
|
+
diffLines: z30.array(DiffLineSchema),
|
|
1507
|
+
renamedFrom: z30.string().optional(),
|
|
1346
1508
|
/** Whether this file is an infrastructure/config file (e.g. .claude/settings.json, .mcp.json) */
|
|
1347
|
-
isInfrastructure:
|
|
1509
|
+
isInfrastructure: z30.boolean().optional()
|
|
1348
1510
|
});
|
|
1349
|
-
var CommandExecutionSchema =
|
|
1350
|
-
command:
|
|
1351
|
-
exitCode:
|
|
1352
|
-
output:
|
|
1353
|
-
duration:
|
|
1511
|
+
var CommandExecutionSchema = z30.object({
|
|
1512
|
+
command: z30.string(),
|
|
1513
|
+
exitCode: z30.number(),
|
|
1514
|
+
output: z30.string().optional(),
|
|
1515
|
+
duration: z30.number()
|
|
1354
1516
|
});
|
|
1355
|
-
var FileModificationSchema =
|
|
1356
|
-
path:
|
|
1357
|
-
action:
|
|
1517
|
+
var FileModificationSchema = z30.object({
|
|
1518
|
+
path: z30.string(),
|
|
1519
|
+
action: z30.enum(["created", "modified", "deleted"])
|
|
1358
1520
|
});
|
|
1359
1521
|
var TemplateFileStatus = /* @__PURE__ */ ((TemplateFileStatus2) => {
|
|
1360
1522
|
TemplateFileStatus2["NEW"] = "new";
|
|
@@ -1362,62 +1524,58 @@ var TemplateFileStatus = /* @__PURE__ */ ((TemplateFileStatus2) => {
|
|
|
1362
1524
|
TemplateFileStatus2["UNCHANGED"] = "unchanged";
|
|
1363
1525
|
return TemplateFileStatus2;
|
|
1364
1526
|
})(TemplateFileStatus || {});
|
|
1365
|
-
var TemplateFileSchema =
|
|
1527
|
+
var TemplateFileSchema = z30.object({
|
|
1366
1528
|
/** Relative path within the template */
|
|
1367
|
-
path:
|
|
1529
|
+
path: z30.string(),
|
|
1368
1530
|
/** Full file content after execution */
|
|
1369
|
-
content:
|
|
1531
|
+
content: z30.string(),
|
|
1370
1532
|
/** File status (new, modified, unchanged) */
|
|
1371
|
-
status:
|
|
1533
|
+
status: z30.enum(["new", "modified", "unchanged"]),
|
|
1372
1534
|
/** Whether this file is an infrastructure/config file (e.g. .claude/settings.json, .mcp.json) */
|
|
1373
|
-
isInfrastructure:
|
|
1535
|
+
isInfrastructure: z30.boolean().optional()
|
|
1374
1536
|
});
|
|
1375
|
-
var ApiCallSchema =
|
|
1376
|
-
endpoint:
|
|
1377
|
-
tokensUsed:
|
|
1378
|
-
duration:
|
|
1537
|
+
var ApiCallSchema = z30.object({
|
|
1538
|
+
endpoint: z30.string(),
|
|
1539
|
+
tokensUsed: z30.number(),
|
|
1540
|
+
duration: z30.number()
|
|
1379
1541
|
});
|
|
1380
|
-
var ExecutionTraceSchema =
|
|
1381
|
-
commands:
|
|
1382
|
-
filesModified:
|
|
1383
|
-
apiCalls:
|
|
1384
|
-
totalDuration:
|
|
1542
|
+
var ExecutionTraceSchema = z30.object({
|
|
1543
|
+
commands: z30.array(CommandExecutionSchema),
|
|
1544
|
+
filesModified: z30.array(FileModificationSchema),
|
|
1545
|
+
apiCalls: z30.array(ApiCallSchema),
|
|
1546
|
+
totalDuration: z30.number()
|
|
1385
1547
|
});
|
|
1386
|
-
var RunAnalysisFindingSchema =
|
|
1387
|
-
category:
|
|
1548
|
+
var RunAnalysisFindingSchema = z30.object({
|
|
1549
|
+
category: z30.enum([
|
|
1388
1550
|
"failure_pattern",
|
|
1389
1551
|
"cost_waste",
|
|
1390
1552
|
"flakiness",
|
|
1391
1553
|
"inefficiency",
|
|
1392
1554
|
"positive"
|
|
1393
1555
|
]),
|
|
1394
|
-
severity:
|
|
1395
|
-
description:
|
|
1396
|
-
affectedScenarios:
|
|
1397
|
-
recommendation:
|
|
1556
|
+
severity: z30.enum(["high", "medium", "low"]),
|
|
1557
|
+
description: z30.string(),
|
|
1558
|
+
affectedScenarios: z30.array(z30.string()),
|
|
1559
|
+
recommendation: z30.string().optional()
|
|
1398
1560
|
});
|
|
1399
|
-
var RunAnalysisSchema =
|
|
1400
|
-
generatedAt:
|
|
1401
|
-
summary:
|
|
1402
|
-
findings:
|
|
1561
|
+
var RunAnalysisSchema = z30.object({
|
|
1562
|
+
generatedAt: z30.string(),
|
|
1563
|
+
summary: z30.string(),
|
|
1564
|
+
findings: z30.array(RunAnalysisFindingSchema)
|
|
1403
1565
|
});
|
|
1404
1566
|
var EvalRunSchema = TenantEntitySchema.extend({
|
|
1405
1567
|
/** Agent ID for this run */
|
|
1406
|
-
agentId:
|
|
1568
|
+
agentId: z30.string().optional(),
|
|
1407
1569
|
/** Preset ID that originated this run (optional) */
|
|
1408
|
-
presetId:
|
|
1409
|
-
/** Skill IDs for this run */
|
|
1410
|
-
skillIds: z29.array(z29.string()).optional(),
|
|
1411
|
-
/** Map of skillId to skillVersionId for this run */
|
|
1412
|
-
skillVersions: z29.record(z29.string(), z29.string()).optional(),
|
|
1570
|
+
presetId: z30.string().optional(),
|
|
1413
1571
|
/** Scenario IDs to run (always present — resolved server-side from tags when needed) */
|
|
1414
|
-
scenarioIds:
|
|
1572
|
+
scenarioIds: z30.array(z30.string()),
|
|
1415
1573
|
/** Current status */
|
|
1416
1574
|
status: EvalStatusSchema,
|
|
1417
1575
|
/** Progress percentage (0-100) */
|
|
1418
|
-
progress:
|
|
1576
|
+
progress: z30.number(),
|
|
1419
1577
|
/** Results for each scenario/target combination (lazy to break eval-result ↔ eval-run cycle) */
|
|
1420
|
-
results:
|
|
1578
|
+
results: z30.array(z30.lazy(() => EvalRunResultSchema)),
|
|
1421
1579
|
/** Aggregated metrics across all results */
|
|
1422
1580
|
aggregateMetrics: EvalMetricsSchema,
|
|
1423
1581
|
/** Aggregated LLM trace summary */
|
|
@@ -1425,41 +1583,39 @@ var EvalRunSchema = TenantEntitySchema.extend({
|
|
|
1425
1583
|
/** What triggered this run */
|
|
1426
1584
|
trigger: TriggerSchema.optional(),
|
|
1427
1585
|
/** When the run started (set when evaluation is triggered) */
|
|
1428
|
-
startedAt:
|
|
1586
|
+
startedAt: z30.string().optional(),
|
|
1429
1587
|
/** When the run completed */
|
|
1430
|
-
completedAt:
|
|
1588
|
+
completedAt: z30.string().optional(),
|
|
1431
1589
|
/** Live trace events captured during execution (for playback on results page) */
|
|
1432
|
-
liveTraceEvents:
|
|
1590
|
+
liveTraceEvents: z30.array(LiveTraceEventSchema).optional(),
|
|
1433
1591
|
/** Remote job ID for tracking execution in Dev Machines */
|
|
1434
|
-
jobId:
|
|
1592
|
+
jobId: z30.string().optional(),
|
|
1435
1593
|
/** Remote job status from the Dev Machine API (PENDING, RUNNING, COMPLETED, FAILED, CANCELLED) */
|
|
1436
|
-
jobStatus:
|
|
1594
|
+
jobStatus: z30.string().optional(),
|
|
1437
1595
|
/** Remote job error message if the job failed */
|
|
1438
|
-
jobError:
|
|
1596
|
+
jobError: z30.string().optional(),
|
|
1439
1597
|
/** Timestamp of the last job status check */
|
|
1440
|
-
jobStatusCheckedAt:
|
|
1441
|
-
/**
|
|
1442
|
-
|
|
1443
|
-
/**
|
|
1444
|
-
|
|
1445
|
-
/** Rule IDs to enable for this run (optional) */
|
|
1446
|
-
ruleIds: z29.array(z29.string()).optional(),
|
|
1598
|
+
jobStatusCheckedAt: z30.string().optional(),
|
|
1599
|
+
/** Unified capability IDs */
|
|
1600
|
+
capabilityIds: z30.array(z30.string()).optional(),
|
|
1601
|
+
/** Map of capabilityId to capabilityVersionId for version pinning */
|
|
1602
|
+
capabilityVersions: z30.record(z30.string(), z30.string()).optional(),
|
|
1447
1603
|
/** Tags used to select scenarios for this run (for traceability) */
|
|
1448
|
-
tags:
|
|
1604
|
+
tags: z30.array(z30.string()).optional(),
|
|
1449
1605
|
/** How many times each scenario is executed within this eval run. Default: 1. Max: 20. */
|
|
1450
|
-
runsPerScenario:
|
|
1606
|
+
runsPerScenario: z30.number().int().min(1).max(20).optional(),
|
|
1451
1607
|
/** Snapshot of agent configuration captured at run creation time */
|
|
1452
|
-
agentSnapshot:
|
|
1453
|
-
name:
|
|
1608
|
+
agentSnapshot: z30.object({
|
|
1609
|
+
name: z30.string().optional(),
|
|
1454
1610
|
agentType: AgentTypeSchema.optional(),
|
|
1455
1611
|
runCommand: AgentRunCommandSchema.optional(),
|
|
1456
|
-
systemPrompt:
|
|
1612
|
+
systemPrompt: z30.string().nullable().optional(),
|
|
1457
1613
|
modelConfig: ModelConfigSchema.optional()
|
|
1458
1614
|
}).optional(),
|
|
1459
1615
|
/** UUID linking all runs in a comparison group */
|
|
1460
|
-
comparisonGroupId:
|
|
1616
|
+
comparisonGroupId: z30.string().optional(),
|
|
1461
1617
|
/** Human-readable label for this variant (e.g., "MCP: Wix Stores") */
|
|
1462
|
-
comparisonLabel:
|
|
1618
|
+
comparisonLabel: z30.string().optional(),
|
|
1463
1619
|
/** LLM-generated analysis of the completed run */
|
|
1464
1620
|
runAnalysis: RunAnalysisSchema.optional()
|
|
1465
1621
|
});
|
|
@@ -1477,60 +1633,60 @@ var CreateEvalRunInputSchema = EvalRunSchema.omit({
|
|
|
1477
1633
|
agentSnapshot: true
|
|
1478
1634
|
}).extend({
|
|
1479
1635
|
/** Optional on input — backend resolves from tags when not provided */
|
|
1480
|
-
scenarioIds:
|
|
1636
|
+
scenarioIds: z30.array(z30.string()).optional()
|
|
1481
1637
|
}).refine(
|
|
1482
1638
|
(data) => data.scenarioIds && data.scenarioIds.length > 0 || data.tags && data.tags.length > 0,
|
|
1483
1639
|
{ message: "Either scenarioIds or tags must be provided" }
|
|
1484
1640
|
);
|
|
1485
|
-
var EvaluationProgressSchema =
|
|
1486
|
-
runId:
|
|
1487
|
-
targetId:
|
|
1488
|
-
totalScenarios:
|
|
1489
|
-
completedScenarios:
|
|
1490
|
-
scenarioProgress:
|
|
1491
|
-
|
|
1492
|
-
scenarioId:
|
|
1493
|
-
currentStep:
|
|
1494
|
-
error:
|
|
1641
|
+
var EvaluationProgressSchema = z30.object({
|
|
1642
|
+
runId: z30.string(),
|
|
1643
|
+
targetId: z30.string(),
|
|
1644
|
+
totalScenarios: z30.number(),
|
|
1645
|
+
completedScenarios: z30.number(),
|
|
1646
|
+
scenarioProgress: z30.array(
|
|
1647
|
+
z30.object({
|
|
1648
|
+
scenarioId: z30.string(),
|
|
1649
|
+
currentStep: z30.string(),
|
|
1650
|
+
error: z30.string().optional()
|
|
1495
1651
|
})
|
|
1496
1652
|
),
|
|
1497
|
-
createdAt:
|
|
1498
|
-
});
|
|
1499
|
-
var EvaluationLogSchema =
|
|
1500
|
-
runId:
|
|
1501
|
-
scenarioId:
|
|
1502
|
-
log:
|
|
1503
|
-
level:
|
|
1504
|
-
message:
|
|
1505
|
-
args:
|
|
1506
|
-
error:
|
|
1653
|
+
createdAt: z30.number()
|
|
1654
|
+
});
|
|
1655
|
+
var EvaluationLogSchema = z30.object({
|
|
1656
|
+
runId: z30.string(),
|
|
1657
|
+
scenarioId: z30.string(),
|
|
1658
|
+
log: z30.object({
|
|
1659
|
+
level: z30.enum(["info", "error", "debug"]),
|
|
1660
|
+
message: z30.string().optional(),
|
|
1661
|
+
args: z30.array(z30.any()).optional(),
|
|
1662
|
+
error: z30.string().optional()
|
|
1507
1663
|
})
|
|
1508
1664
|
});
|
|
1509
1665
|
var LLM_TIMEOUT = 12e4;
|
|
1510
1666
|
|
|
1511
1667
|
// src/evaluation/conversation.ts
|
|
1512
|
-
import { z as
|
|
1513
|
-
var TextBlockSchema =
|
|
1514
|
-
type:
|
|
1515
|
-
text:
|
|
1516
|
-
});
|
|
1517
|
-
var ThinkingBlockSchema =
|
|
1518
|
-
type:
|
|
1519
|
-
thinking:
|
|
1520
|
-
});
|
|
1521
|
-
var ToolUseBlockSchema =
|
|
1522
|
-
type:
|
|
1523
|
-
toolName:
|
|
1524
|
-
toolId:
|
|
1525
|
-
input:
|
|
1526
|
-
});
|
|
1527
|
-
var ToolResultBlockSchema =
|
|
1528
|
-
type:
|
|
1529
|
-
toolUseId:
|
|
1530
|
-
content:
|
|
1531
|
-
isError:
|
|
1532
|
-
});
|
|
1533
|
-
var ConversationBlockSchema =
|
|
1668
|
+
import { z as z31 } from "zod";
|
|
1669
|
+
var TextBlockSchema = z31.object({
|
|
1670
|
+
type: z31.literal("text"),
|
|
1671
|
+
text: z31.string()
|
|
1672
|
+
});
|
|
1673
|
+
var ThinkingBlockSchema = z31.object({
|
|
1674
|
+
type: z31.literal("thinking"),
|
|
1675
|
+
thinking: z31.string()
|
|
1676
|
+
});
|
|
1677
|
+
var ToolUseBlockSchema = z31.object({
|
|
1678
|
+
type: z31.literal("tool_use"),
|
|
1679
|
+
toolName: z31.string(),
|
|
1680
|
+
toolId: z31.string(),
|
|
1681
|
+
input: z31.unknown()
|
|
1682
|
+
});
|
|
1683
|
+
var ToolResultBlockSchema = z31.object({
|
|
1684
|
+
type: z31.literal("tool_result"),
|
|
1685
|
+
toolUseId: z31.string(),
|
|
1686
|
+
content: z31.string(),
|
|
1687
|
+
isError: z31.boolean().optional()
|
|
1688
|
+
});
|
|
1689
|
+
var ConversationBlockSchema = z31.discriminatedUnion("type", [
|
|
1534
1690
|
TextBlockSchema,
|
|
1535
1691
|
ThinkingBlockSchema,
|
|
1536
1692
|
ToolUseBlockSchema,
|
|
@@ -1541,18 +1697,18 @@ var ConversationMessageRoles = [
|
|
|
1541
1697
|
"user",
|
|
1542
1698
|
"system"
|
|
1543
1699
|
];
|
|
1544
|
-
var ConversationMessageSchema =
|
|
1545
|
-
role:
|
|
1546
|
-
content:
|
|
1547
|
-
timestamp:
|
|
1700
|
+
var ConversationMessageSchema = z31.object({
|
|
1701
|
+
role: z31.enum(ConversationMessageRoles),
|
|
1702
|
+
content: z31.array(ConversationBlockSchema),
|
|
1703
|
+
timestamp: z31.string()
|
|
1548
1704
|
});
|
|
1549
|
-
var ScenarioConversationSchema =
|
|
1550
|
-
id:
|
|
1551
|
-
projectId:
|
|
1552
|
-
evalRunId:
|
|
1553
|
-
resultId:
|
|
1554
|
-
messages:
|
|
1555
|
-
createdAt:
|
|
1705
|
+
var ScenarioConversationSchema = z31.object({
|
|
1706
|
+
id: z31.string(),
|
|
1707
|
+
projectId: z31.string(),
|
|
1708
|
+
evalRunId: z31.string(),
|
|
1709
|
+
resultId: z31.string(),
|
|
1710
|
+
messages: z31.array(ConversationMessageSchema),
|
|
1711
|
+
createdAt: z31.string()
|
|
1556
1712
|
});
|
|
1557
1713
|
|
|
1558
1714
|
// src/evaluation/eval-result.ts
|
|
@@ -1563,98 +1719,98 @@ var AssertionResultStatus = /* @__PURE__ */ ((AssertionResultStatus2) => {
|
|
|
1563
1719
|
AssertionResultStatus2["ERROR"] = "error";
|
|
1564
1720
|
return AssertionResultStatus2;
|
|
1565
1721
|
})(AssertionResultStatus || {});
|
|
1566
|
-
var AssertionResultSchema =
|
|
1567
|
-
id:
|
|
1568
|
-
assertionId:
|
|
1569
|
-
assertionType:
|
|
1570
|
-
assertionName:
|
|
1571
|
-
status:
|
|
1572
|
-
message:
|
|
1573
|
-
expected:
|
|
1574
|
-
actual:
|
|
1575
|
-
duration:
|
|
1576
|
-
details:
|
|
1577
|
-
llmTraceSteps:
|
|
1578
|
-
});
|
|
1579
|
-
var EvalRunResultSchema =
|
|
1580
|
-
id:
|
|
1581
|
-
targetId:
|
|
1582
|
-
targetName:
|
|
1722
|
+
var AssertionResultSchema = z32.object({
|
|
1723
|
+
id: z32.string(),
|
|
1724
|
+
assertionId: z32.string(),
|
|
1725
|
+
assertionType: z32.string(),
|
|
1726
|
+
assertionName: z32.string(),
|
|
1727
|
+
status: z32.enum(AssertionResultStatus),
|
|
1728
|
+
message: z32.string().optional(),
|
|
1729
|
+
expected: z32.string().optional(),
|
|
1730
|
+
actual: z32.string().optional(),
|
|
1731
|
+
duration: z32.number().optional(),
|
|
1732
|
+
details: z32.record(z32.string(), z32.unknown()).optional(),
|
|
1733
|
+
llmTraceSteps: z32.array(LLMTraceStepSchema).optional()
|
|
1734
|
+
});
|
|
1735
|
+
var EvalRunResultSchema = z32.object({
|
|
1736
|
+
id: z32.string(),
|
|
1737
|
+
targetId: z32.string(),
|
|
1738
|
+
targetName: z32.string().optional(),
|
|
1583
1739
|
/** SkillVersion ID used for this evaluation (for version tracking) */
|
|
1584
|
-
skillVersionId:
|
|
1740
|
+
skillVersionId: z32.string().optional(),
|
|
1585
1741
|
/** SkillVersion semver string (e.g., "1.0.0", "1.2.3") for display */
|
|
1586
|
-
skillVersion:
|
|
1587
|
-
scenarioId:
|
|
1588
|
-
scenarioName:
|
|
1742
|
+
skillVersion: z32.string().optional(),
|
|
1743
|
+
scenarioId: z32.string(),
|
|
1744
|
+
scenarioName: z32.string(),
|
|
1589
1745
|
/** Snapshot of the trigger prompt used during the run (prevents stale display after edits) */
|
|
1590
|
-
triggerPrompt:
|
|
1746
|
+
triggerPrompt: z32.string().optional(),
|
|
1591
1747
|
modelConfig: ModelConfigSchema.optional(),
|
|
1592
|
-
assertionResults:
|
|
1748
|
+
assertionResults: z32.array(AssertionResultSchema),
|
|
1593
1749
|
metrics: EvalMetricsSchema.optional(),
|
|
1594
|
-
passed:
|
|
1595
|
-
failed:
|
|
1596
|
-
passRate:
|
|
1597
|
-
duration:
|
|
1598
|
-
outputText:
|
|
1599
|
-
files:
|
|
1600
|
-
fileDiffs:
|
|
1750
|
+
passed: z32.number(),
|
|
1751
|
+
failed: z32.number(),
|
|
1752
|
+
passRate: z32.number(),
|
|
1753
|
+
duration: z32.number(),
|
|
1754
|
+
outputText: z32.string().optional(),
|
|
1755
|
+
files: z32.array(ExpectedFileSchema).optional(),
|
|
1756
|
+
fileDiffs: z32.array(DiffContentSchema).optional(),
|
|
1601
1757
|
/** Full template files after execution with status indicators */
|
|
1602
|
-
templateFiles:
|
|
1603
|
-
startedAt:
|
|
1604
|
-
completedAt:
|
|
1758
|
+
templateFiles: z32.array(TemplateFileSchema).optional(),
|
|
1759
|
+
startedAt: z32.string().optional(),
|
|
1760
|
+
completedAt: z32.string().optional(),
|
|
1605
1761
|
llmTrace: LLMTraceSchema.optional(),
|
|
1606
1762
|
/** Full conversation messages (only present in transit; stripped before DB storage) */
|
|
1607
|
-
conversation:
|
|
1763
|
+
conversation: z32.array(ConversationMessageSchema).optional(),
|
|
1608
1764
|
/** 0-based iteration index when a scenario is run multiple times within a single eval run */
|
|
1609
|
-
iterationIndex:
|
|
1610
|
-
});
|
|
1611
|
-
var PromptResultSchema =
|
|
1612
|
-
text:
|
|
1613
|
-
files:
|
|
1614
|
-
finishReason:
|
|
1615
|
-
reasoning:
|
|
1616
|
-
reasoningDetails:
|
|
1617
|
-
toolCalls:
|
|
1618
|
-
toolResults:
|
|
1619
|
-
warnings:
|
|
1620
|
-
sources:
|
|
1621
|
-
steps:
|
|
1622
|
-
generationTimeMs:
|
|
1623
|
-
prompt:
|
|
1624
|
-
systemPrompt:
|
|
1625
|
-
usage:
|
|
1626
|
-
totalTokens:
|
|
1627
|
-
totalMicrocentsSpent:
|
|
1765
|
+
iterationIndex: z32.number().int().min(0).optional()
|
|
1766
|
+
});
|
|
1767
|
+
var PromptResultSchema = z32.object({
|
|
1768
|
+
text: z32.string(),
|
|
1769
|
+
files: z32.array(z32.unknown()).optional(),
|
|
1770
|
+
finishReason: z32.string().optional(),
|
|
1771
|
+
reasoning: z32.string().optional(),
|
|
1772
|
+
reasoningDetails: z32.unknown().optional(),
|
|
1773
|
+
toolCalls: z32.array(z32.unknown()).optional(),
|
|
1774
|
+
toolResults: z32.array(z32.unknown()).optional(),
|
|
1775
|
+
warnings: z32.array(z32.unknown()).optional(),
|
|
1776
|
+
sources: z32.array(z32.unknown()).optional(),
|
|
1777
|
+
steps: z32.array(z32.unknown()),
|
|
1778
|
+
generationTimeMs: z32.number(),
|
|
1779
|
+
prompt: z32.string(),
|
|
1780
|
+
systemPrompt: z32.string(),
|
|
1781
|
+
usage: z32.object({
|
|
1782
|
+
totalTokens: z32.number().optional(),
|
|
1783
|
+
totalMicrocentsSpent: z32.number().optional()
|
|
1628
1784
|
})
|
|
1629
1785
|
});
|
|
1630
|
-
var EvaluationResultSchema =
|
|
1631
|
-
id:
|
|
1632
|
-
runId:
|
|
1633
|
-
timestamp:
|
|
1786
|
+
var EvaluationResultSchema = z32.object({
|
|
1787
|
+
id: z32.string(),
|
|
1788
|
+
runId: z32.string(),
|
|
1789
|
+
timestamp: z32.number(),
|
|
1634
1790
|
promptResult: PromptResultSchema,
|
|
1635
|
-
testResults:
|
|
1636
|
-
tags:
|
|
1637
|
-
feedback:
|
|
1638
|
-
score:
|
|
1639
|
-
suiteId:
|
|
1640
|
-
});
|
|
1641
|
-
var LeanEvaluationResultSchema =
|
|
1642
|
-
id:
|
|
1643
|
-
runId:
|
|
1644
|
-
timestamp:
|
|
1645
|
-
tags:
|
|
1646
|
-
scenarioId:
|
|
1647
|
-
scenarioVersion:
|
|
1648
|
-
targetId:
|
|
1649
|
-
targetVersion:
|
|
1650
|
-
suiteId:
|
|
1651
|
-
score:
|
|
1652
|
-
time:
|
|
1653
|
-
microcentsSpent:
|
|
1791
|
+
testResults: z32.array(z32.unknown()),
|
|
1792
|
+
tags: z32.array(z32.string()).optional(),
|
|
1793
|
+
feedback: z32.string().optional(),
|
|
1794
|
+
score: z32.number(),
|
|
1795
|
+
suiteId: z32.string().optional()
|
|
1796
|
+
});
|
|
1797
|
+
var LeanEvaluationResultSchema = z32.object({
|
|
1798
|
+
id: z32.string(),
|
|
1799
|
+
runId: z32.string(),
|
|
1800
|
+
timestamp: z32.number(),
|
|
1801
|
+
tags: z32.array(z32.string()).optional(),
|
|
1802
|
+
scenarioId: z32.string(),
|
|
1803
|
+
scenarioVersion: z32.number().optional(),
|
|
1804
|
+
targetId: z32.string(),
|
|
1805
|
+
targetVersion: z32.number().optional(),
|
|
1806
|
+
suiteId: z32.string().optional(),
|
|
1807
|
+
score: z32.number(),
|
|
1808
|
+
time: z32.number().optional(),
|
|
1809
|
+
microcentsSpent: z32.number().optional()
|
|
1654
1810
|
});
|
|
1655
1811
|
|
|
1656
1812
|
// src/evaluation/eval-run-folder.ts
|
|
1657
|
-
import { z as
|
|
1813
|
+
import { z as z33 } from "zod";
|
|
1658
1814
|
var EvalRunFolderSchema = TenantEntitySchema.extend({});
|
|
1659
1815
|
var CreateEvalRunFolderInputSchema = EvalRunFolderSchema.omit({
|
|
1660
1816
|
id: true,
|
|
@@ -1668,26 +1824,26 @@ var UpdateEvalRunFolderInputSchema = EvalRunFolderSchema.omit({
|
|
|
1668
1824
|
updatedAt: true,
|
|
1669
1825
|
deleted: true
|
|
1670
1826
|
}).partial();
|
|
1671
|
-
var EvalRunFolderMembershipSchema =
|
|
1672
|
-
folderId:
|
|
1673
|
-
evalRunId:
|
|
1674
|
-
projectId:
|
|
1675
|
-
createdAt:
|
|
1827
|
+
var EvalRunFolderMembershipSchema = z33.object({
|
|
1828
|
+
folderId: z33.string(),
|
|
1829
|
+
evalRunId: z33.string(),
|
|
1830
|
+
projectId: z33.string(),
|
|
1831
|
+
createdAt: z33.string()
|
|
1676
1832
|
});
|
|
1677
1833
|
|
|
1678
1834
|
// src/project/project.ts
|
|
1679
|
-
import { z as
|
|
1835
|
+
import { z as z34 } from "zod";
|
|
1680
1836
|
var ProjectSchema = BaseEntitySchema.extend({
|
|
1681
|
-
appId:
|
|
1682
|
-
scenarioTags:
|
|
1837
|
+
appId: z34.string().optional().describe("The ID of the app in Dev Center"),
|
|
1838
|
+
scenarioTags: z34.array(z34.string()).optional().describe("Project-level tag vocabulary for scenarios"),
|
|
1683
1839
|
/** Per-project Wix auth token (write-only — never returned in GET responses). null = clear. */
|
|
1684
|
-
wixAuthToken:
|
|
1840
|
+
wixAuthToken: z34.string().nullable().optional().describe("Wix auth token for CLI/MCP authentication (encrypted at rest)"),
|
|
1685
1841
|
/** Per-project Base44 auth file content (write-only — never returned in GET responses). null = clear. */
|
|
1686
|
-
base44AuthFile:
|
|
1842
|
+
base44AuthFile: z34.string().nullable().optional().describe("Base64-encoded Base44 auth file content (encrypted at rest)"),
|
|
1687
1843
|
/** Resolved at runtime from the encrypted Wix auth token */
|
|
1688
|
-
wixAuthEmail:
|
|
1844
|
+
wixAuthEmail: z34.string().optional().describe("Email associated with the Wix auth token (resolved at runtime)"),
|
|
1689
1845
|
/** Resolved at runtime from the encrypted Base44 auth file */
|
|
1690
|
-
base44AuthEmail:
|
|
1846
|
+
base44AuthEmail: z34.string().optional().describe("Email from the Base44 auth file (resolved at runtime)")
|
|
1691
1847
|
});
|
|
1692
1848
|
var CreateProjectInputSchema = ProjectSchema.omit({
|
|
1693
1849
|
id: true,
|
|
@@ -1697,7 +1853,7 @@ var CreateProjectInputSchema = ProjectSchema.omit({
|
|
|
1697
1853
|
wixAuthEmail: true,
|
|
1698
1854
|
base44AuthEmail: true
|
|
1699
1855
|
}).extend({
|
|
1700
|
-
appId:
|
|
1856
|
+
appId: z34.string().describe(
|
|
1701
1857
|
"Required: The ID of the app in Dev Center for credential scoping"
|
|
1702
1858
|
)
|
|
1703
1859
|
});
|
|
@@ -1717,7 +1873,7 @@ var CreateTemplateInputSchema = TemplateSchema.omit({
|
|
|
1717
1873
|
var UpdateTemplateInputSchema = CreateTemplateInputSchema.partial();
|
|
1718
1874
|
|
|
1719
1875
|
// src/schedule/eval-schedule.ts
|
|
1720
|
-
import { z as
|
|
1876
|
+
import { z as z35 } from "zod";
|
|
1721
1877
|
var FrequencyType = /* @__PURE__ */ ((FrequencyType2) => {
|
|
1722
1878
|
FrequencyType2["DAILY"] = "daily";
|
|
1723
1879
|
FrequencyType2["WEEKDAY"] = "weekday";
|
|
@@ -1727,29 +1883,29 @@ var FrequencyType = /* @__PURE__ */ ((FrequencyType2) => {
|
|
|
1727
1883
|
})(FrequencyType || {});
|
|
1728
1884
|
var EvalScheduleSchema = TenantEntitySchema.extend({
|
|
1729
1885
|
/** Whether the schedule is active */
|
|
1730
|
-
enabled:
|
|
1886
|
+
enabled: z35.boolean(),
|
|
1731
1887
|
/** Test suite to run */
|
|
1732
|
-
suiteId:
|
|
1888
|
+
suiteId: z35.string(),
|
|
1733
1889
|
/** Preset that provides agent + entities for this schedule */
|
|
1734
|
-
presetId:
|
|
1890
|
+
presetId: z35.string(),
|
|
1735
1891
|
/** How often to run */
|
|
1736
|
-
frequencyType:
|
|
1892
|
+
frequencyType: z35.nativeEnum(FrequencyType),
|
|
1737
1893
|
/** Time of day in 24h format (HH:MM), hours 00-23, minutes 00-59 */
|
|
1738
|
-
timeOfDay:
|
|
1894
|
+
timeOfDay: z35.string().regex(/^([01]\d|2[0-3]):[0-5]\d$/),
|
|
1739
1895
|
/** Day of week (0=Sun, 6=Sat) for weekly schedules */
|
|
1740
|
-
dayOfWeek:
|
|
1896
|
+
dayOfWeek: z35.number().min(0).max(6).optional(),
|
|
1741
1897
|
/** Day of month (1-31) for monthly schedules */
|
|
1742
|
-
dayOfMonth:
|
|
1898
|
+
dayOfMonth: z35.number().min(1).max(31).optional(),
|
|
1743
1899
|
/** IANA timezone (e.g., 'America/New_York') */
|
|
1744
|
-
timezone:
|
|
1900
|
+
timezone: z35.string(),
|
|
1745
1901
|
/** ID of the last eval run created by this schedule */
|
|
1746
|
-
lastRunId:
|
|
1902
|
+
lastRunId: z35.string().optional(),
|
|
1747
1903
|
/** Denormalized status of the last run */
|
|
1748
|
-
lastRunStatus:
|
|
1904
|
+
lastRunStatus: z35.string().optional(),
|
|
1749
1905
|
/** ISO timestamp of the last run */
|
|
1750
|
-
lastRunAt:
|
|
1906
|
+
lastRunAt: z35.string().optional(),
|
|
1751
1907
|
/** Next scheduled run time in UTC (pre-computed for efficient querying, set by backend) */
|
|
1752
|
-
nextRunAt:
|
|
1908
|
+
nextRunAt: z35.string().optional()
|
|
1753
1909
|
});
|
|
1754
1910
|
function isValidTimezone(tz) {
|
|
1755
1911
|
try {
|
|
@@ -1762,14 +1918,14 @@ function isValidTimezone(tz) {
|
|
|
1762
1918
|
function validateScheduleFields(data, ctx, options) {
|
|
1763
1919
|
if (data.frequencyType === "weekly" /* WEEKLY */ && data.dayOfWeek == null) {
|
|
1764
1920
|
ctx.addIssue({
|
|
1765
|
-
code:
|
|
1921
|
+
code: z35.ZodIssueCode.custom,
|
|
1766
1922
|
message: "dayOfWeek is required for weekly schedules",
|
|
1767
1923
|
path: ["dayOfWeek"]
|
|
1768
1924
|
});
|
|
1769
1925
|
}
|
|
1770
1926
|
if (data.frequencyType === "monthly" /* MONTHLY */ && data.dayOfMonth == null) {
|
|
1771
1927
|
ctx.addIssue({
|
|
1772
|
-
code:
|
|
1928
|
+
code: z35.ZodIssueCode.custom,
|
|
1773
1929
|
message: "dayOfMonth is required for monthly schedules",
|
|
1774
1930
|
path: ["dayOfMonth"]
|
|
1775
1931
|
});
|
|
@@ -1777,7 +1933,7 @@ function validateScheduleFields(data, ctx, options) {
|
|
|
1777
1933
|
const shouldValidateTz = options.partial ? data.timezone !== void 0 : true;
|
|
1778
1934
|
if (shouldValidateTz && !isValidTimezone(data.timezone)) {
|
|
1779
1935
|
ctx.addIssue({
|
|
1780
|
-
code:
|
|
1936
|
+
code: z35.ZodIssueCode.custom,
|
|
1781
1937
|
message: "Invalid IANA timezone",
|
|
1782
1938
|
path: ["timezone"]
|
|
1783
1939
|
});
|
|
@@ -1841,6 +1997,13 @@ export {
|
|
|
1841
1997
|
BulkImportResultItemSchema,
|
|
1842
1998
|
BulkImportResultSchema,
|
|
1843
1999
|
BulkImportSkillsInputSchema,
|
|
2000
|
+
CAPABILITY_NAME_REGEX,
|
|
2001
|
+
CapabilityContentSchema,
|
|
2002
|
+
CapabilitySchema,
|
|
2003
|
+
CapabilityTypeSchema,
|
|
2004
|
+
CapabilityVersionOriginSchema,
|
|
2005
|
+
CapabilityVersionSchema,
|
|
2006
|
+
CapabilityWithLatestVersionSchema,
|
|
1844
2007
|
ClaudeModel,
|
|
1845
2008
|
ClaudeModelSchema,
|
|
1846
2009
|
CommandExecutionSchema,
|
|
@@ -1851,6 +2014,8 @@ export {
|
|
|
1851
2014
|
CostAssertionSchema,
|
|
1852
2015
|
CostConfigSchema,
|
|
1853
2016
|
CreateAgentInputSchema,
|
|
2017
|
+
CreateCapabilityInputSchema,
|
|
2018
|
+
CreateCapabilityVersionInputSchema,
|
|
1854
2019
|
CreateEvalRunFolderInputSchema,
|
|
1855
2020
|
CreateEvalRunInputSchema,
|
|
1856
2021
|
CreateEvalScheduleInputSchema,
|
|
@@ -1890,6 +2055,7 @@ export {
|
|
|
1890
2055
|
FilePresenceTestSchema,
|
|
1891
2056
|
FrequencyType,
|
|
1892
2057
|
GitHubSourceSchema,
|
|
2058
|
+
InitialCapabilityVersionInputSchema,
|
|
1893
2059
|
InitialVersionInputSchema,
|
|
1894
2060
|
LEGACY_MODEL_ID_MAP,
|
|
1895
2061
|
LLMBreakdownStatsSchema,
|
|
@@ -1966,6 +2132,7 @@ export {
|
|
|
1966
2132
|
TriggerSchema,
|
|
1967
2133
|
TriggerType,
|
|
1968
2134
|
UpdateAgentInputSchema,
|
|
2135
|
+
UpdateCapabilityInputSchema,
|
|
1969
2136
|
UpdateEvalRunFolderInputSchema,
|
|
1970
2137
|
UpdateEvalScheduleInputSchema,
|
|
1971
2138
|
UpdateMcpInputSchema,
|
|
@@ -1978,12 +2145,20 @@ export {
|
|
|
1978
2145
|
UpdateTestScenarioInputSchema,
|
|
1979
2146
|
UpdateTestSuiteInputSchema,
|
|
1980
2147
|
VitestTestSchema,
|
|
2148
|
+
capabilityToMcp,
|
|
2149
|
+
capabilityToRule,
|
|
2150
|
+
capabilityToSkill,
|
|
2151
|
+
capabilityToSkillWithLatestVersion,
|
|
2152
|
+
capabilityToSubAgent,
|
|
2153
|
+
capabilityVersionToSkillVersion,
|
|
1981
2154
|
classifyAssertionRef,
|
|
1982
2155
|
formatTraceEventLine,
|
|
1983
2156
|
getSystemAssertion,
|
|
1984
2157
|
getSystemAssertions,
|
|
2158
|
+
groupCapabilitiesByType,
|
|
1985
2159
|
isAllowedBuildCommandString,
|
|
1986
2160
|
isSystemAssertionId,
|
|
2161
|
+
isValidCapabilityName,
|
|
1987
2162
|
isValidSkillFolderName,
|
|
1988
2163
|
normalizeBatchAssertionLink,
|
|
1989
2164
|
normalizeModelId,
|