@wix/evalforge-types 0.52.0 → 0.54.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/build/index.mjs CHANGED
@@ -1168,6 +1168,21 @@ var UpdateSkillInputSchema = SkillInputBaseSchema.partial().refine(
1168
1168
  var SkillWithLatestVersionSchema = SkillSchema.extend({
1169
1169
  latestVersion: SkillVersionSchema.optional()
1170
1170
  });
1171
+ var BulkImportSkillsInputSchema = z7.object({
1172
+ source: GitHubSourceSchema
1173
+ });
1174
+ var BulkImportResultItemSchema = z7.object({
1175
+ name: z7.string(),
1176
+ status: z7.enum(["created", "skipped", "failed"]),
1177
+ skillId: z7.string().optional(),
1178
+ reason: z7.string().optional()
1179
+ });
1180
+ var BulkImportResultSchema = z7.object({
1181
+ created: z7.number(),
1182
+ skipped: z7.number(),
1183
+ failed: z7.number(),
1184
+ items: z7.array(BulkImportResultItemSchema)
1185
+ });
1171
1186
 
1172
1187
  // src/target/sub-agent.ts
1173
1188
  import { z as z8 } from "zod";
@@ -1400,85 +1415,34 @@ var TestSchema = z20.discriminatedUnion("type", [
1400
1415
  PlaywrightNLTestSchema
1401
1416
  ]);
1402
1417
 
1403
- // src/scenario/assertions.ts
1404
- import { z as z21 } from "zod";
1405
- var SkillWasCalledAssertionSchema = z21.object({
1406
- type: z21.literal("skill_was_called"),
1407
- /** Names of the skills that must have been called (matched against trace Skill tool args) */
1408
- skillNames: z21.array(z21.string().min(1)).min(1)
1409
- });
1410
- var ToolCalledWithParamAssertionSchema = z21.object({
1411
- type: z21.literal("tool_called_with_param"),
1412
- /** Name of the tool that must have been called */
1413
- toolName: z21.string().min(1),
1414
- /** JSON string of key-value pairs for expected parameters (substring match) */
1415
- expectedParams: z21.string().min(1)
1416
- });
1417
- var BuildPassedAssertionSchema = z21.object({
1418
- type: z21.literal("build_passed"),
1419
- /** Command to run (default: "yarn build") */
1420
- command: z21.string().optional(),
1421
- /** Expected exit code (default: 0) */
1422
- expectedExitCode: z21.number().int().optional()
1423
- });
1424
- var CostAssertionSchema = z21.object({
1425
- type: z21.literal("cost"),
1426
- /** Maximum allowed cost in USD */
1427
- maxCostUsd: z21.number().positive()
1428
- });
1429
- var LlmJudgeAssertionSchema = z21.object({
1430
- type: z21.literal("llm_judge"),
1431
- /** Prompt template; placeholders: {{output}}, {{cwd}}, {{changedFiles}}, {{trace}} */
1432
- prompt: z21.string(),
1433
- /** Minimum score to pass (0-10, default 7) */
1434
- minScore: z21.number().int().min(0).max(10).optional(),
1435
- /** Model for the judge (e.g. claude-3-5-haiku) */
1436
- model: z21.string().optional(),
1437
- maxTokens: z21.number().int().optional(),
1438
- temperature: z21.number().min(0).max(1).optional()
1439
- });
1440
- var TimeAssertionSchema = z21.object({
1441
- type: z21.literal("time_limit"),
1442
- /** Maximum allowed duration in milliseconds */
1443
- maxDurationMs: z21.number().int().positive()
1444
- });
1445
- var AssertionSchema = z21.union([
1446
- SkillWasCalledAssertionSchema,
1447
- ToolCalledWithParamAssertionSchema,
1448
- BuildPassedAssertionSchema,
1449
- TimeAssertionSchema,
1450
- CostAssertionSchema,
1451
- LlmJudgeAssertionSchema
1452
- ]);
1453
-
1454
1418
  // src/scenario/environment.ts
1455
- import { z as z22 } from "zod";
1456
- var LocalProjectConfigSchema = z22.object({
1419
+ import { z as z21 } from "zod";
1420
+ var LocalProjectConfigSchema = z21.object({
1457
1421
  /** Template ID to use for the local project */
1458
- templateId: z22.string().optional(),
1422
+ templateId: z21.string().optional(),
1459
1423
  /** Files to create in the project */
1460
- files: z22.array(
1461
- z22.object({
1462
- path: z22.string().min(1),
1463
- content: z22.string().min(1)
1424
+ files: z21.array(
1425
+ z21.object({
1426
+ path: z21.string().min(1),
1427
+ content: z21.string().min(1)
1464
1428
  })
1465
1429
  ).optional()
1466
1430
  });
1467
- var MetaSiteConfigSchema = z22.object({
1468
- configurations: z22.array(
1469
- z22.object({
1470
- name: z22.string().min(1),
1471
- apiCalls: z22.array(
1472
- z22.object({
1473
- url: z22.string().url(),
1474
- method: z22.enum(["POST", "PUT"]),
1475
- body: z22.string()
1431
+ var MetaSiteConfigSchema = z21.object({
1432
+ configurations: z21.array(
1433
+ z21.object({
1434
+ name: z21.string().min(1),
1435
+ apiCalls: z21.array(
1436
+ z21.object({
1437
+ url: z21.string().url(),
1438
+ method: z21.enum(["POST", "PUT"]),
1439
+ body: z21.string()
1476
1440
  })
1477
1441
  )
1478
1442
  })
1479
1443
  ).optional()
1480
1444
  });
1481
- var EnvironmentSchema = z22.object({
1445
+ var EnvironmentSchema = z21.object({
1482
1446
  /** Local project configuration */
1483
1447
  localProject: LocalProjectConfigSchema.optional(),
1484
1448
  /** Meta site configuration */
@@ -1486,11 +1450,11 @@ var EnvironmentSchema = z22.object({
1486
1450
  });
1487
1451
 
1488
1452
  // src/scenario/test-scenario.ts
1489
- import { z as z24 } from "zod";
1453
+ import { z as z23 } from "zod";
1490
1454
 
1491
1455
  // src/assertion/assertion.ts
1492
- import { z as z23 } from "zod";
1493
- var AssertionTypeSchema = z23.enum([
1456
+ import { z as z22 } from "zod";
1457
+ var AssertionTypeSchema = z22.enum([
1494
1458
  "skill_was_called",
1495
1459
  "tool_called_with_param",
1496
1460
  "build_passed",
@@ -1498,59 +1462,61 @@ var AssertionTypeSchema = z23.enum([
1498
1462
  "cost",
1499
1463
  "llm_judge"
1500
1464
  ]);
1501
- var AssertionParameterTypeSchema = z23.enum([
1465
+ var AssertionParameterTypeSchema = z22.enum([
1502
1466
  "string",
1503
1467
  "number",
1504
1468
  "boolean"
1505
1469
  ]);
1506
- var AssertionParameterSchema = z23.object({
1470
+ var AssertionParameterSchema = z22.object({
1507
1471
  /** Parameter name (used as key in params object) */
1508
- name: z23.string().min(1),
1472
+ name: z22.string().min(1),
1509
1473
  /** Display label for the parameter */
1510
- label: z23.string().min(1),
1474
+ label: z22.string().min(1),
1511
1475
  /** Parameter type */
1512
1476
  type: AssertionParameterTypeSchema,
1513
1477
  /** Whether this parameter is required */
1514
- required: z23.boolean(),
1478
+ required: z22.boolean(),
1515
1479
  /** Default value (optional, used when not provided) */
1516
- defaultValue: z23.union([z23.string(), z23.number(), z23.boolean()]).optional(),
1480
+ defaultValue: z22.union([z22.string(), z22.number(), z22.boolean()]).optional(),
1517
1481
  /** If true, parameter is hidden by default behind "Show advanced options" */
1518
- advanced: z23.boolean().optional()
1482
+ advanced: z22.boolean().optional()
1519
1483
  });
1520
- var ScenarioAssertionLinkSchema = z23.object({
1484
+ var ScenarioAssertionLinkSchema = z22.object({
1521
1485
  /** ID of the assertion (can be system assertion like 'system:skill_was_called' or custom assertion UUID) */
1522
- assertionId: z23.string(),
1486
+ assertionId: z22.string(),
1523
1487
  /** Parameter values for this assertion in this scenario */
1524
- params: z23.record(
1525
- z23.string(),
1526
- z23.union([z23.string(), z23.number(), z23.boolean(), z23.null()])
1488
+ params: z22.record(
1489
+ z22.string(),
1490
+ z22.union([z22.string(), z22.number(), z22.boolean(), z22.null()])
1527
1491
  ).optional()
1528
1492
  });
1529
- var SkillWasCalledConfigSchema = z23.object({
1493
+ var SkillWasCalledConfigSchema = z22.object({
1530
1494
  /** Names of the skills that must have been called */
1531
- skillNames: z23.array(z23.string().min(1)).min(1)
1495
+ skillNames: z22.array(z22.string().min(1)).min(1)
1532
1496
  });
1533
- var CostConfigSchema = z23.strictObject({
1497
+ var CostConfigSchema = z22.strictObject({
1534
1498
  /** Maximum allowed cost in USD */
1535
- maxCostUsd: z23.number().positive()
1499
+ maxCostUsd: z22.number().positive()
1536
1500
  });
1537
- var ToolCalledWithParamConfigSchema = z23.strictObject({
1501
+ var ToolCalledWithParamConfigSchema = z22.strictObject({
1538
1502
  /** Name of the tool that must have been called */
1539
- toolName: z23.string().min(1),
1503
+ toolName: z22.string().min(1),
1540
1504
  /** JSON string of key-value pairs for expected parameters (substring match) */
1541
- expectedParams: z23.string().min(1)
1505
+ expectedParams: z22.string().min(1),
1506
+ /** If true, the matching tool call must also have succeeded (step.success === true) */
1507
+ requireSuccess: z22.boolean().optional()
1542
1508
  });
1543
- var BuildPassedConfigSchema = z23.strictObject({
1509
+ var BuildPassedConfigSchema = z22.strictObject({
1544
1510
  /** Command to run (default: "yarn build") */
1545
- command: z23.string().optional(),
1511
+ command: z22.string().optional(),
1546
1512
  /** Expected exit code (default: 0) */
1547
- expectedExitCode: z23.number().int().optional()
1513
+ expectedExitCode: z22.number().int().optional()
1548
1514
  });
1549
- var TimeConfigSchema = z23.strictObject({
1515
+ var TimeConfigSchema = z22.strictObject({
1550
1516
  /** Maximum allowed duration in milliseconds */
1551
- maxDurationMs: z23.number().int().positive()
1517
+ maxDurationMs: z22.number().int().positive()
1552
1518
  });
1553
- var LlmJudgeConfigSchema = z23.object({
1519
+ var LlmJudgeConfigSchema = z22.object({
1554
1520
  /**
1555
1521
  * Prompt template with placeholders:
1556
1522
  * - {{output}}: agent's final output
@@ -1561,19 +1527,45 @@ var LlmJudgeConfigSchema = z23.object({
1561
1527
  * - {{trace}}: step-by-step trace of tool calls
1562
1528
  * - Custom parameters defined in the parameters array
1563
1529
  */
1564
- prompt: z23.string().min(1),
1530
+ prompt: z22.string().min(1),
1565
1531
  /** Minimum score to pass (0-10, default 7) */
1566
- minScore: z23.number().int().min(0).max(10).optional(),
1532
+ minScore: z22.number().int().min(0).max(10).optional(),
1567
1533
  /** Model for the judge (e.g. claude-3-5-haiku-20241022) */
1568
- model: z23.string().optional(),
1534
+ model: z22.string().optional(),
1569
1535
  /** Max output tokens */
1570
- maxTokens: z23.number().int().optional(),
1536
+ maxTokens: z22.number().int().optional(),
1571
1537
  /** Temperature (0-1) */
1572
- temperature: z23.number().min(0).max(1).optional(),
1538
+ temperature: z22.number().min(0).max(1).optional(),
1573
1539
  /** User-defined parameters for this assertion */
1574
- parameters: z23.array(AssertionParameterSchema).optional()
1540
+ parameters: z22.array(AssertionParameterSchema).optional()
1541
+ });
1542
+ var SkillWasCalledAssertionSchema = SkillWasCalledConfigSchema.extend({
1543
+ type: z22.literal("skill_was_called")
1544
+ });
1545
+ var ToolCalledWithParamAssertionSchema = ToolCalledWithParamConfigSchema.extend({
1546
+ type: z22.literal("tool_called_with_param")
1575
1547
  });
1576
- var AssertionConfigSchema = z23.union([
1548
+ var BuildPassedAssertionSchema = BuildPassedConfigSchema.extend({
1549
+ type: z22.literal("build_passed")
1550
+ });
1551
+ var CostAssertionSchema = CostConfigSchema.extend({
1552
+ type: z22.literal("cost")
1553
+ });
1554
+ var LlmJudgeAssertionSchema = LlmJudgeConfigSchema.extend({
1555
+ type: z22.literal("llm_judge")
1556
+ });
1557
+ var TimeAssertionSchema = TimeConfigSchema.extend({
1558
+ type: z22.literal("time_limit")
1559
+ });
1560
+ var AssertionSchema = z22.union([
1561
+ SkillWasCalledAssertionSchema,
1562
+ ToolCalledWithParamAssertionSchema,
1563
+ BuildPassedAssertionSchema,
1564
+ TimeAssertionSchema,
1565
+ CostAssertionSchema,
1566
+ LlmJudgeAssertionSchema
1567
+ ]);
1568
+ var AssertionConfigSchema = z22.union([
1577
1569
  LlmJudgeConfigSchema,
1578
1570
  // requires prompt - check first
1579
1571
  SkillWasCalledConfigSchema,
@@ -1586,7 +1578,7 @@ var AssertionConfigSchema = z23.union([
1586
1578
  // requires maxCostUsd, uses strictObject
1587
1579
  BuildPassedConfigSchema,
1588
1580
  // all optional, uses strictObject to reject unknown keys
1589
- z23.object({})
1581
+ z22.object({})
1590
1582
  // fallback empty config
1591
1583
  ]);
1592
1584
  var CustomAssertionSchema = TenantEntitySchema.extend({
@@ -1637,25 +1629,25 @@ function getLlmJudgeConfig(assertion) {
1637
1629
  }
1638
1630
 
1639
1631
  // src/scenario/test-scenario.ts
1640
- var ExpectedFileSchema = z24.object({
1632
+ var ExpectedFileSchema = z23.object({
1641
1633
  /** Relative path where the file should be created */
1642
- path: z24.string(),
1634
+ path: z23.string(),
1643
1635
  /** Optional expected content */
1644
- content: z24.string().optional()
1636
+ content: z23.string().optional()
1645
1637
  });
1646
1638
  var TestScenarioSchema = TenantEntitySchema.extend({
1647
1639
  /** The prompt sent to the agent to trigger the task */
1648
- triggerPrompt: z24.string().min(10),
1640
+ triggerPrompt: z23.string().min(10),
1649
1641
  /** ID of the template to use for this scenario (null = no template) */
1650
- templateId: z24.string().nullish(),
1642
+ templateId: z23.string().nullish(),
1651
1643
  /** Inline assertions to evaluate for this scenario (legacy) */
1652
- assertions: z24.array(AssertionSchema).optional(),
1644
+ assertions: z23.array(AssertionSchema).optional(),
1653
1645
  /** IDs of saved assertions to evaluate (from assertions table) - legacy, use assertionLinks */
1654
- assertionIds: z24.array(z24.string()).optional(),
1646
+ assertionIds: z23.array(z23.string()).optional(),
1655
1647
  /** Linked assertions with per-scenario parameter values */
1656
- assertionLinks: z24.array(ScenarioAssertionLinkSchema).optional(),
1648
+ assertionLinks: z23.array(ScenarioAssertionLinkSchema).optional(),
1657
1649
  /** Tags for categorisation and filtering */
1658
- tags: z24.array(z24.string()).optional()
1650
+ tags: z23.array(z23.string()).optional()
1659
1651
  });
1660
1652
  var CreateTestScenarioInputSchema = TestScenarioSchema.omit({
1661
1653
  id: true,
@@ -1666,10 +1658,10 @@ var CreateTestScenarioInputSchema = TestScenarioSchema.omit({
1666
1658
  var UpdateTestScenarioInputSchema = CreateTestScenarioInputSchema.partial();
1667
1659
 
1668
1660
  // src/suite/test-suite.ts
1669
- import { z as z25 } from "zod";
1661
+ import { z as z24 } from "zod";
1670
1662
  var TestSuiteSchema = TenantEntitySchema.extend({
1671
1663
  /** IDs of test scenarios in this suite */
1672
- scenarioIds: z25.array(z25.string())
1664
+ scenarioIds: z24.array(z24.string())
1673
1665
  });
1674
1666
  var CreateTestSuiteInputSchema = TestSuiteSchema.omit({
1675
1667
  id: true,
@@ -1680,21 +1672,21 @@ var CreateTestSuiteInputSchema = TestSuiteSchema.omit({
1680
1672
  var UpdateTestSuiteInputSchema = CreateTestSuiteInputSchema.partial();
1681
1673
 
1682
1674
  // src/evaluation/metrics.ts
1683
- import { z as z26 } from "zod";
1684
- var TokenUsageSchema = z26.object({
1685
- prompt: z26.number(),
1686
- completion: z26.number(),
1687
- total: z26.number()
1688
- });
1689
- var EvalMetricsSchema = z26.object({
1690
- totalAssertions: z26.number(),
1691
- passed: z26.number(),
1692
- failed: z26.number(),
1693
- skipped: z26.number(),
1694
- errors: z26.number(),
1695
- passRate: z26.number(),
1696
- avgDuration: z26.number(),
1697
- totalDuration: z26.number()
1675
+ import { z as z25 } from "zod";
1676
+ var TokenUsageSchema = z25.object({
1677
+ prompt: z25.number(),
1678
+ completion: z25.number(),
1679
+ total: z25.number()
1680
+ });
1681
+ var EvalMetricsSchema = z25.object({
1682
+ totalAssertions: z25.number(),
1683
+ passed: z25.number(),
1684
+ failed: z25.number(),
1685
+ skipped: z25.number(),
1686
+ errors: z25.number(),
1687
+ passRate: z25.number(),
1688
+ avgDuration: z25.number(),
1689
+ totalDuration: z25.number()
1698
1690
  });
1699
1691
  var EvalStatus = /* @__PURE__ */ ((EvalStatus2) => {
1700
1692
  EvalStatus2["PENDING"] = "pending";
@@ -1704,7 +1696,7 @@ var EvalStatus = /* @__PURE__ */ ((EvalStatus2) => {
1704
1696
  EvalStatus2["CANCELLED"] = "cancelled";
1705
1697
  return EvalStatus2;
1706
1698
  })(EvalStatus || {});
1707
- var EvalStatusSchema = z26.enum(EvalStatus);
1699
+ var EvalStatusSchema = z25.enum(EvalStatus);
1708
1700
  var LLMStepType = /* @__PURE__ */ ((LLMStepType2) => {
1709
1701
  LLMStepType2["COMPLETION"] = "completion";
1710
1702
  LLMStepType2["TOOL_USE"] = "tool_use";
@@ -1712,52 +1704,52 @@ var LLMStepType = /* @__PURE__ */ ((LLMStepType2) => {
1712
1704
  LLMStepType2["THINKING"] = "thinking";
1713
1705
  return LLMStepType2;
1714
1706
  })(LLMStepType || {});
1715
- var LLMTraceStepSchema = z26.object({
1716
- id: z26.string(),
1717
- stepNumber: z26.number(),
1718
- type: z26.enum(LLMStepType),
1719
- model: z26.string(),
1720
- provider: z26.string(),
1721
- startedAt: z26.string(),
1722
- durationMs: z26.number(),
1707
+ var LLMTraceStepSchema = z25.object({
1708
+ id: z25.string(),
1709
+ stepNumber: z25.number(),
1710
+ type: z25.enum(LLMStepType),
1711
+ model: z25.string(),
1712
+ provider: z25.string(),
1713
+ startedAt: z25.string(),
1714
+ durationMs: z25.number(),
1723
1715
  tokenUsage: TokenUsageSchema,
1724
- costUsd: z26.number(),
1725
- toolName: z26.string().optional(),
1726
- toolArguments: z26.string().optional(),
1727
- inputPreview: z26.string().optional(),
1728
- outputPreview: z26.string().optional(),
1729
- success: z26.boolean(),
1730
- error: z26.string().optional()
1731
- });
1732
- var LLMBreakdownStatsSchema = z26.object({
1733
- count: z26.number(),
1734
- durationMs: z26.number(),
1735
- tokens: z26.number(),
1736
- costUsd: z26.number()
1737
- });
1738
- var LLMTraceSummarySchema = z26.object({
1739
- totalSteps: z26.number(),
1740
- totalDurationMs: z26.number(),
1716
+ costUsd: z25.number(),
1717
+ toolName: z25.string().optional(),
1718
+ toolArguments: z25.string().optional(),
1719
+ inputPreview: z25.string().optional(),
1720
+ outputPreview: z25.string().optional(),
1721
+ success: z25.boolean(),
1722
+ error: z25.string().optional()
1723
+ });
1724
+ var LLMBreakdownStatsSchema = z25.object({
1725
+ count: z25.number(),
1726
+ durationMs: z25.number(),
1727
+ tokens: z25.number(),
1728
+ costUsd: z25.number()
1729
+ });
1730
+ var LLMTraceSummarySchema = z25.object({
1731
+ totalSteps: z25.number(),
1732
+ totalDurationMs: z25.number(),
1741
1733
  totalTokens: TokenUsageSchema,
1742
- totalCostUsd: z26.number(),
1743
- stepTypeBreakdown: z26.record(z26.string(), LLMBreakdownStatsSchema).optional(),
1744
- modelBreakdown: z26.record(z26.string(), LLMBreakdownStatsSchema),
1745
- modelsUsed: z26.array(z26.string())
1746
- });
1747
- var LLMTraceSchema = z26.object({
1748
- id: z26.string(),
1749
- steps: z26.array(LLMTraceStepSchema),
1734
+ totalCostUsd: z25.number(),
1735
+ stepTypeBreakdown: z25.record(z25.string(), LLMBreakdownStatsSchema).optional(),
1736
+ modelBreakdown: z25.record(z25.string(), LLMBreakdownStatsSchema),
1737
+ modelsUsed: z25.array(z25.string())
1738
+ });
1739
+ var LLMTraceSchema = z25.object({
1740
+ id: z25.string(),
1741
+ steps: z25.array(LLMTraceStepSchema),
1750
1742
  summary: LLMTraceSummarySchema
1751
1743
  });
1752
1744
 
1753
1745
  // src/evaluation/eval-result.ts
1754
- import { z as z30 } from "zod";
1746
+ import { z as z29 } from "zod";
1755
1747
 
1756
1748
  // src/evaluation/eval-run.ts
1757
- import { z as z28 } from "zod";
1749
+ import { z as z27 } from "zod";
1758
1750
 
1759
1751
  // src/evaluation/live-trace.ts
1760
- import { z as z27 } from "zod";
1752
+ import { z as z26 } from "zod";
1761
1753
  var LiveTraceEventType = /* @__PURE__ */ ((LiveTraceEventType2) => {
1762
1754
  LiveTraceEventType2["THINKING"] = "thinking";
1763
1755
  LiveTraceEventType2["TOOL_USE"] = "tool_use";
@@ -1771,37 +1763,37 @@ var LiveTraceEventType = /* @__PURE__ */ ((LiveTraceEventType2) => {
1771
1763
  LiveTraceEventType2["USER"] = "user";
1772
1764
  return LiveTraceEventType2;
1773
1765
  })(LiveTraceEventType || {});
1774
- var LiveTraceEventSchema = z27.object({
1766
+ var LiveTraceEventSchema = z26.object({
1775
1767
  /** The evaluation run ID */
1776
- evalRunId: z27.string(),
1768
+ evalRunId: z26.string(),
1777
1769
  /** The scenario ID being executed */
1778
- scenarioId: z27.string(),
1770
+ scenarioId: z26.string(),
1779
1771
  /** The scenario name for display */
1780
- scenarioName: z27.string(),
1772
+ scenarioName: z26.string(),
1781
1773
  /** The target ID (skill, agent, etc.) */
1782
- targetId: z27.string(),
1774
+ targetId: z26.string(),
1783
1775
  /** The target name for display */
1784
- targetName: z27.string(),
1776
+ targetName: z26.string(),
1785
1777
  /** Step number in the current scenario execution */
1786
- stepNumber: z27.number(),
1778
+ stepNumber: z26.number(),
1787
1779
  /** Type of trace event */
1788
- type: z27.enum(LiveTraceEventType),
1780
+ type: z26.enum(LiveTraceEventType),
1789
1781
  /** Tool name if this is a tool_use event */
1790
- toolName: z27.string().optional(),
1782
+ toolName: z26.string().optional(),
1791
1783
  /** Tool arguments preview (truncated JSON) */
1792
- toolArgs: z27.string().optional(),
1784
+ toolArgs: z26.string().optional(),
1793
1785
  /** Output preview (truncated text) */
1794
- outputPreview: z27.string().optional(),
1786
+ outputPreview: z26.string().optional(),
1795
1787
  /** File path for file operations */
1796
- filePath: z27.string().optional(),
1788
+ filePath: z26.string().optional(),
1797
1789
  /** Elapsed time in milliseconds for progress events */
1798
- elapsedMs: z27.number().optional(),
1790
+ elapsedMs: z26.number().optional(),
1799
1791
  /** Thinking/reasoning text from Claude */
1800
- thinking: z27.string().optional(),
1792
+ thinking: z26.string().optional(),
1801
1793
  /** Timestamp when this event occurred */
1802
- timestamp: z27.string(),
1794
+ timestamp: z26.string(),
1803
1795
  /** Whether this is the final event for this scenario */
1804
- isComplete: z27.boolean()
1796
+ isComplete: z26.boolean()
1805
1797
  });
1806
1798
  var TRACE_EVENT_PREFIX = "TRACE_EVENT:";
1807
1799
  function parseTraceEventLine(line) {
@@ -1829,14 +1821,14 @@ var TriggerType = /* @__PURE__ */ ((TriggerType2) => {
1829
1821
  TriggerType2["MANUAL"] = "MANUAL";
1830
1822
  return TriggerType2;
1831
1823
  })(TriggerType || {});
1832
- var TriggerMetadataSchema = z28.object({
1833
- version: z28.string().optional(),
1834
- resourceUpdated: z28.array(z28.string()).optional()
1824
+ var TriggerMetadataSchema = z27.object({
1825
+ version: z27.string().optional(),
1826
+ resourceUpdated: z27.array(z27.string()).optional()
1835
1827
  });
1836
- var TriggerSchema = z28.object({
1837
- id: z28.string(),
1828
+ var TriggerSchema = z27.object({
1829
+ id: z27.string(),
1838
1830
  metadata: TriggerMetadataSchema.optional(),
1839
- type: z28.enum(TriggerType)
1831
+ type: z27.enum(TriggerType)
1840
1832
  });
1841
1833
  var FailureCategory = /* @__PURE__ */ ((FailureCategory2) => {
1842
1834
  FailureCategory2["MISSING_FILE"] = "missing_file";
@@ -1854,28 +1846,28 @@ var FailureSeverity = /* @__PURE__ */ ((FailureSeverity2) => {
1854
1846
  FailureSeverity2["LOW"] = "low";
1855
1847
  return FailureSeverity2;
1856
1848
  })(FailureSeverity || {});
1857
- var DiffLineTypeSchema = z28.enum(["added", "removed", "unchanged"]);
1858
- var DiffLineSchema = z28.object({
1849
+ var DiffLineTypeSchema = z27.enum(["added", "removed", "unchanged"]);
1850
+ var DiffLineSchema = z27.object({
1859
1851
  type: DiffLineTypeSchema,
1860
- content: z28.string(),
1861
- lineNumber: z28.number()
1862
- });
1863
- var DiffContentSchema = z28.object({
1864
- path: z28.string(),
1865
- expected: z28.string(),
1866
- actual: z28.string(),
1867
- diffLines: z28.array(DiffLineSchema),
1868
- renamedFrom: z28.string().optional()
1869
- });
1870
- var CommandExecutionSchema = z28.object({
1871
- command: z28.string(),
1872
- exitCode: z28.number(),
1873
- output: z28.string().optional(),
1874
- duration: z28.number()
1875
- });
1876
- var FileModificationSchema = z28.object({
1877
- path: z28.string(),
1878
- action: z28.enum(["created", "modified", "deleted"])
1852
+ content: z27.string(),
1853
+ lineNumber: z27.number()
1854
+ });
1855
+ var DiffContentSchema = z27.object({
1856
+ path: z27.string(),
1857
+ expected: z27.string(),
1858
+ actual: z27.string(),
1859
+ diffLines: z27.array(DiffLineSchema),
1860
+ renamedFrom: z27.string().optional()
1861
+ });
1862
+ var CommandExecutionSchema = z27.object({
1863
+ command: z27.string(),
1864
+ exitCode: z27.number(),
1865
+ output: z27.string().optional(),
1866
+ duration: z27.number()
1867
+ });
1868
+ var FileModificationSchema = z27.object({
1869
+ path: z27.string(),
1870
+ action: z27.enum(["created", "modified", "deleted"])
1879
1871
  });
1880
1872
  var TemplateFileStatus = /* @__PURE__ */ ((TemplateFileStatus2) => {
1881
1873
  TemplateFileStatus2["NEW"] = "new";
@@ -1883,87 +1875,87 @@ var TemplateFileStatus = /* @__PURE__ */ ((TemplateFileStatus2) => {
1883
1875
  TemplateFileStatus2["UNCHANGED"] = "unchanged";
1884
1876
  return TemplateFileStatus2;
1885
1877
  })(TemplateFileStatus || {});
1886
- var TemplateFileSchema = z28.object({
1878
+ var TemplateFileSchema = z27.object({
1887
1879
  /** Relative path within the template */
1888
- path: z28.string(),
1880
+ path: z27.string(),
1889
1881
  /** Full file content after execution */
1890
- content: z28.string(),
1882
+ content: z27.string(),
1891
1883
  /** File status (new, modified, unchanged) */
1892
- status: z28.enum(["new", "modified", "unchanged"])
1893
- });
1894
- var ApiCallSchema = z28.object({
1895
- endpoint: z28.string(),
1896
- tokensUsed: z28.number(),
1897
- duration: z28.number()
1898
- });
1899
- var ExecutionTraceSchema = z28.object({
1900
- commands: z28.array(CommandExecutionSchema),
1901
- filesModified: z28.array(FileModificationSchema),
1902
- apiCalls: z28.array(ApiCallSchema),
1903
- totalDuration: z28.number()
1904
- });
1905
- var FailureAnalysisSchema = z28.object({
1906
- category: z28.enum(FailureCategory),
1907
- severity: z28.enum(FailureSeverity),
1908
- summary: z28.string(),
1909
- details: z28.string(),
1910
- rootCause: z28.string(),
1911
- suggestedFix: z28.string(),
1912
- relatedAssertions: z28.array(z28.string()),
1913
- codeSnippet: z28.string().optional(),
1914
- similarIssues: z28.array(z28.string()).optional(),
1915
- patternId: z28.string().optional(),
1884
+ status: z27.enum(["new", "modified", "unchanged"])
1885
+ });
1886
+ var ApiCallSchema = z27.object({
1887
+ endpoint: z27.string(),
1888
+ tokensUsed: z27.number(),
1889
+ duration: z27.number()
1890
+ });
1891
+ var ExecutionTraceSchema = z27.object({
1892
+ commands: z27.array(CommandExecutionSchema),
1893
+ filesModified: z27.array(FileModificationSchema),
1894
+ apiCalls: z27.array(ApiCallSchema),
1895
+ totalDuration: z27.number()
1896
+ });
1897
+ var FailureAnalysisSchema = z27.object({
1898
+ category: z27.enum(FailureCategory),
1899
+ severity: z27.enum(FailureSeverity),
1900
+ summary: z27.string(),
1901
+ details: z27.string(),
1902
+ rootCause: z27.string(),
1903
+ suggestedFix: z27.string(),
1904
+ relatedAssertions: z27.array(z27.string()),
1905
+ codeSnippet: z27.string().optional(),
1906
+ similarIssues: z27.array(z27.string()).optional(),
1907
+ patternId: z27.string().optional(),
1916
1908
  // Extended fields for detailed debugging
1917
1909
  diff: DiffContentSchema.optional(),
1918
1910
  executionTrace: ExecutionTraceSchema.optional()
1919
1911
  });
1920
1912
  var EvalRunSchema = TenantEntitySchema.extend({
1921
1913
  /** Agent ID for this run */
1922
- agentId: z28.string().optional(),
1914
+ agentId: z27.string().optional(),
1923
1915
  /** Preset ID that originated this run (optional) */
1924
- presetId: z28.string().optional(),
1916
+ presetId: z27.string().optional(),
1925
1917
  /** Skill IDs for this run */
1926
- skillIds: z28.array(z28.string()).optional(),
1918
+ skillIds: z27.array(z27.string()).optional(),
1927
1919
  /** Map of skillId to skillVersionId for this run */
1928
- skillVersions: z28.record(z28.string(), z28.string()).optional(),
1920
+ skillVersions: z27.record(z27.string(), z27.string()).optional(),
1929
1921
  /** Scenario IDs to run (always present — resolved server-side from tags when needed) */
1930
- scenarioIds: z28.array(z28.string()),
1922
+ scenarioIds: z27.array(z27.string()),
1931
1923
  /** Current status */
1932
1924
  status: EvalStatusSchema,
1933
1925
  /** Progress percentage (0-100) */
1934
- progress: z28.number(),
1926
+ progress: z27.number(),
1935
1927
  /** Results for each scenario/target combination (lazy to break eval-result ↔ eval-run cycle) */
1936
- results: z28.array(z28.lazy(() => EvalRunResultSchema)),
1928
+ results: z27.array(z27.lazy(() => EvalRunResultSchema)),
1937
1929
  /** Aggregated metrics across all results */
1938
1930
  aggregateMetrics: EvalMetricsSchema,
1939
1931
  /** Failure analyses */
1940
- failureAnalyses: z28.array(FailureAnalysisSchema).optional(),
1932
+ failureAnalyses: z27.array(FailureAnalysisSchema).optional(),
1941
1933
  /** Aggregated LLM trace summary */
1942
1934
  llmTraceSummary: LLMTraceSummarySchema.optional(),
1943
1935
  /** What triggered this run */
1944
1936
  trigger: TriggerSchema.optional(),
1945
1937
  /** When the run started (set when evaluation is triggered) */
1946
- startedAt: z28.string().optional(),
1938
+ startedAt: z27.string().optional(),
1947
1939
  /** When the run completed */
1948
- completedAt: z28.string().optional(),
1940
+ completedAt: z27.string().optional(),
1949
1941
  /** Live trace events captured during execution (for playback on results page) */
1950
- liveTraceEvents: z28.array(LiveTraceEventSchema).optional(),
1942
+ liveTraceEvents: z27.array(LiveTraceEventSchema).optional(),
1951
1943
  /** Remote job ID for tracking execution in Dev Machines */
1952
- jobId: z28.string().optional(),
1944
+ jobId: z27.string().optional(),
1953
1945
  /** Remote job status from the Dev Machine API (PENDING, RUNNING, COMPLETED, FAILED, CANCELLED) */
1954
- jobStatus: z28.string().optional(),
1946
+ jobStatus: z27.string().optional(),
1955
1947
  /** Remote job error message if the job failed */
1956
- jobError: z28.string().optional(),
1948
+ jobError: z27.string().optional(),
1957
1949
  /** Timestamp of the last job status check */
1958
- jobStatusCheckedAt: z28.string().optional(),
1950
+ jobStatusCheckedAt: z27.string().optional(),
1959
1951
  /** MCP server IDs to enable for this run (optional) */
1960
- mcpIds: z28.array(z28.string()).optional(),
1952
+ mcpIds: z27.array(z27.string()).optional(),
1961
1953
  /** Sub-agent IDs to enable for this run (optional) */
1962
- subAgentIds: z28.array(z28.string()).optional(),
1954
+ subAgentIds: z27.array(z27.string()).optional(),
1963
1955
  /** Rule IDs to enable for this run (optional) */
1964
- ruleIds: z28.array(z28.string()).optional(),
1956
+ ruleIds: z27.array(z27.string()).optional(),
1965
1957
  /** Tags used to select scenarios for this run (for traceability) */
1966
- tags: z28.array(z28.string()).optional()
1958
+ tags: z27.array(z27.string()).optional()
1967
1959
  });
1968
1960
  var CreateEvalRunInputSchema = EvalRunSchema.omit({
1969
1961
  id: true,
@@ -1978,60 +1970,60 @@ var CreateEvalRunInputSchema = EvalRunSchema.omit({
1978
1970
  scenarioIds: true
1979
1971
  }).extend({
1980
1972
  /** Optional on input — backend resolves from tags when not provided */
1981
- scenarioIds: z28.array(z28.string()).optional()
1973
+ scenarioIds: z27.array(z27.string()).optional()
1982
1974
  }).refine(
1983
1975
  (data) => data.scenarioIds && data.scenarioIds.length > 0 || data.tags && data.tags.length > 0,
1984
1976
  { message: "Either scenarioIds or tags must be provided" }
1985
1977
  );
1986
- var EvaluationProgressSchema = z28.object({
1987
- runId: z28.string(),
1988
- targetId: z28.string(),
1989
- totalScenarios: z28.number(),
1990
- completedScenarios: z28.number(),
1991
- scenarioProgress: z28.array(
1992
- z28.object({
1993
- scenarioId: z28.string(),
1994
- currentStep: z28.string(),
1995
- error: z28.string().optional()
1978
+ var EvaluationProgressSchema = z27.object({
1979
+ runId: z27.string(),
1980
+ targetId: z27.string(),
1981
+ totalScenarios: z27.number(),
1982
+ completedScenarios: z27.number(),
1983
+ scenarioProgress: z27.array(
1984
+ z27.object({
1985
+ scenarioId: z27.string(),
1986
+ currentStep: z27.string(),
1987
+ error: z27.string().optional()
1996
1988
  })
1997
1989
  ),
1998
- createdAt: z28.number()
1999
- });
2000
- var EvaluationLogSchema = z28.object({
2001
- runId: z28.string(),
2002
- scenarioId: z28.string(),
2003
- log: z28.object({
2004
- level: z28.enum(["info", "error", "debug"]),
2005
- message: z28.string().optional(),
2006
- args: z28.array(z28.any()).optional(),
2007
- error: z28.string().optional()
1990
+ createdAt: z27.number()
1991
+ });
1992
+ var EvaluationLogSchema = z27.object({
1993
+ runId: z27.string(),
1994
+ scenarioId: z27.string(),
1995
+ log: z27.object({
1996
+ level: z27.enum(["info", "error", "debug"]),
1997
+ message: z27.string().optional(),
1998
+ args: z27.array(z27.any()).optional(),
1999
+ error: z27.string().optional()
2008
2000
  })
2009
2001
  });
2010
2002
  var LLM_TIMEOUT = 12e4;
2011
2003
 
2012
2004
  // src/evaluation/conversation.ts
2013
- import { z as z29 } from "zod";
2014
- var TextBlockSchema = z29.object({
2015
- type: z29.literal("text"),
2016
- text: z29.string()
2017
- });
2018
- var ThinkingBlockSchema = z29.object({
2019
- type: z29.literal("thinking"),
2020
- thinking: z29.string()
2021
- });
2022
- var ToolUseBlockSchema = z29.object({
2023
- type: z29.literal("tool_use"),
2024
- toolName: z29.string(),
2025
- toolId: z29.string(),
2026
- input: z29.unknown()
2027
- });
2028
- var ToolResultBlockSchema = z29.object({
2029
- type: z29.literal("tool_result"),
2030
- toolUseId: z29.string(),
2031
- content: z29.string(),
2032
- isError: z29.boolean().optional()
2033
- });
2034
- var ConversationBlockSchema = z29.discriminatedUnion("type", [
2005
+ import { z as z28 } from "zod";
2006
+ var TextBlockSchema = z28.object({
2007
+ type: z28.literal("text"),
2008
+ text: z28.string()
2009
+ });
2010
+ var ThinkingBlockSchema = z28.object({
2011
+ type: z28.literal("thinking"),
2012
+ thinking: z28.string()
2013
+ });
2014
+ var ToolUseBlockSchema = z28.object({
2015
+ type: z28.literal("tool_use"),
2016
+ toolName: z28.string(),
2017
+ toolId: z28.string(),
2018
+ input: z28.unknown()
2019
+ });
2020
+ var ToolResultBlockSchema = z28.object({
2021
+ type: z28.literal("tool_result"),
2022
+ toolUseId: z28.string(),
2023
+ content: z28.string(),
2024
+ isError: z28.boolean().optional()
2025
+ });
2026
+ var ConversationBlockSchema = z28.discriminatedUnion("type", [
2035
2027
  TextBlockSchema,
2036
2028
  ThinkingBlockSchema,
2037
2029
  ToolUseBlockSchema,
@@ -2042,18 +2034,18 @@ var ConversationMessageRoles = [
2042
2034
  "user",
2043
2035
  "system"
2044
2036
  ];
2045
- var ConversationMessageSchema = z29.object({
2046
- role: z29.enum(ConversationMessageRoles),
2047
- content: z29.array(ConversationBlockSchema),
2048
- timestamp: z29.string()
2037
+ var ConversationMessageSchema = z28.object({
2038
+ role: z28.enum(ConversationMessageRoles),
2039
+ content: z28.array(ConversationBlockSchema),
2040
+ timestamp: z28.string()
2049
2041
  });
2050
- var ScenarioConversationSchema = z29.object({
2051
- id: z29.string(),
2052
- projectId: z29.string(),
2053
- evalRunId: z29.string(),
2054
- resultId: z29.string(),
2055
- messages: z29.array(ConversationMessageSchema),
2056
- createdAt: z29.string()
2042
+ var ScenarioConversationSchema = z28.object({
2043
+ id: z28.string(),
2044
+ projectId: z28.string(),
2045
+ evalRunId: z28.string(),
2046
+ resultId: z28.string(),
2047
+ messages: z28.array(ConversationMessageSchema),
2048
+ createdAt: z28.string()
2057
2049
  });
2058
2050
 
2059
2051
  // src/evaluation/eval-result.ts
@@ -2064,100 +2056,100 @@ var AssertionResultStatus = /* @__PURE__ */ ((AssertionResultStatus2) => {
2064
2056
  AssertionResultStatus2["ERROR"] = "error";
2065
2057
  return AssertionResultStatus2;
2066
2058
  })(AssertionResultStatus || {});
2067
- var AssertionResultSchema = z30.object({
2068
- id: z30.string(),
2069
- assertionId: z30.string(),
2070
- assertionType: z30.string(),
2071
- assertionName: z30.string(),
2072
- status: z30.enum(AssertionResultStatus),
2073
- message: z30.string().optional(),
2074
- expected: z30.string().optional(),
2075
- actual: z30.string().optional(),
2076
- duration: z30.number().optional(),
2077
- details: z30.record(z30.string(), z30.unknown()).optional(),
2078
- llmTraceSteps: z30.array(LLMTraceStepSchema).optional()
2079
- });
2080
- var EvalRunResultSchema = z30.object({
2081
- id: z30.string(),
2082
- targetId: z30.string(),
2083
- targetName: z30.string().optional(),
2059
+ var AssertionResultSchema = z29.object({
2060
+ id: z29.string(),
2061
+ assertionId: z29.string(),
2062
+ assertionType: z29.string(),
2063
+ assertionName: z29.string(),
2064
+ status: z29.enum(AssertionResultStatus),
2065
+ message: z29.string().optional(),
2066
+ expected: z29.string().optional(),
2067
+ actual: z29.string().optional(),
2068
+ duration: z29.number().optional(),
2069
+ details: z29.record(z29.string(), z29.unknown()).optional(),
2070
+ llmTraceSteps: z29.array(LLMTraceStepSchema).optional()
2071
+ });
2072
+ var EvalRunResultSchema = z29.object({
2073
+ id: z29.string(),
2074
+ targetId: z29.string(),
2075
+ targetName: z29.string().optional(),
2084
2076
  /** SkillVersion ID used for this evaluation (for version tracking) */
2085
- skillVersionId: z30.string().optional(),
2077
+ skillVersionId: z29.string().optional(),
2086
2078
  /** SkillVersion semver string (e.g., "1.0.0", "1.2.3") for display */
2087
- skillVersion: z30.string().optional(),
2088
- scenarioId: z30.string(),
2089
- scenarioName: z30.string(),
2079
+ skillVersion: z29.string().optional(),
2080
+ scenarioId: z29.string(),
2081
+ scenarioName: z29.string(),
2090
2082
  modelConfig: ModelConfigSchema.optional(),
2091
- assertionResults: z30.array(AssertionResultSchema),
2083
+ assertionResults: z29.array(AssertionResultSchema),
2092
2084
  metrics: EvalMetricsSchema.optional(),
2093
- passed: z30.number(),
2094
- failed: z30.number(),
2095
- passRate: z30.number(),
2096
- duration: z30.number(),
2097
- outputText: z30.string().optional(),
2098
- files: z30.array(ExpectedFileSchema).optional(),
2099
- fileDiffs: z30.array(DiffContentSchema).optional(),
2085
+ passed: z29.number(),
2086
+ failed: z29.number(),
2087
+ passRate: z29.number(),
2088
+ duration: z29.number(),
2089
+ outputText: z29.string().optional(),
2090
+ files: z29.array(ExpectedFileSchema).optional(),
2091
+ fileDiffs: z29.array(DiffContentSchema).optional(),
2100
2092
  /** Full template files after execution with status indicators */
2101
- templateFiles: z30.array(TemplateFileSchema).optional(),
2102
- startedAt: z30.string().optional(),
2103
- completedAt: z30.string().optional(),
2093
+ templateFiles: z29.array(TemplateFileSchema).optional(),
2094
+ startedAt: z29.string().optional(),
2095
+ completedAt: z29.string().optional(),
2104
2096
  llmTrace: LLMTraceSchema.optional(),
2105
2097
  /** Full conversation messages (only present in transit; stripped before DB storage) */
2106
- conversation: z30.array(ConversationMessageSchema).optional()
2107
- });
2108
- var PromptResultSchema = z30.object({
2109
- text: z30.string(),
2110
- files: z30.array(z30.unknown()).optional(),
2111
- finishReason: z30.string().optional(),
2112
- reasoning: z30.string().optional(),
2113
- reasoningDetails: z30.unknown().optional(),
2114
- toolCalls: z30.array(z30.unknown()).optional(),
2115
- toolResults: z30.array(z30.unknown()).optional(),
2116
- warnings: z30.array(z30.unknown()).optional(),
2117
- sources: z30.array(z30.unknown()).optional(),
2118
- steps: z30.array(z30.unknown()),
2119
- generationTimeMs: z30.number(),
2120
- prompt: z30.string(),
2121
- systemPrompt: z30.string(),
2122
- usage: z30.object({
2123
- totalTokens: z30.number().optional(),
2124
- totalMicrocentsSpent: z30.number().optional()
2098
+ conversation: z29.array(ConversationMessageSchema).optional()
2099
+ });
2100
+ var PromptResultSchema = z29.object({
2101
+ text: z29.string(),
2102
+ files: z29.array(z29.unknown()).optional(),
2103
+ finishReason: z29.string().optional(),
2104
+ reasoning: z29.string().optional(),
2105
+ reasoningDetails: z29.unknown().optional(),
2106
+ toolCalls: z29.array(z29.unknown()).optional(),
2107
+ toolResults: z29.array(z29.unknown()).optional(),
2108
+ warnings: z29.array(z29.unknown()).optional(),
2109
+ sources: z29.array(z29.unknown()).optional(),
2110
+ steps: z29.array(z29.unknown()),
2111
+ generationTimeMs: z29.number(),
2112
+ prompt: z29.string(),
2113
+ systemPrompt: z29.string(),
2114
+ usage: z29.object({
2115
+ totalTokens: z29.number().optional(),
2116
+ totalMicrocentsSpent: z29.number().optional()
2125
2117
  })
2126
2118
  });
2127
- var EvaluationResultSchema = z30.object({
2128
- id: z30.string(),
2129
- runId: z30.string(),
2130
- timestamp: z30.number(),
2119
+ var EvaluationResultSchema = z29.object({
2120
+ id: z29.string(),
2121
+ runId: z29.string(),
2122
+ timestamp: z29.number(),
2131
2123
  promptResult: PromptResultSchema,
2132
- testResults: z30.array(z30.unknown()),
2133
- tags: z30.array(z30.string()).optional(),
2134
- feedback: z30.string().optional(),
2135
- score: z30.number(),
2136
- suiteId: z30.string().optional()
2137
- });
2138
- var LeanEvaluationResultSchema = z30.object({
2139
- id: z30.string(),
2140
- runId: z30.string(),
2141
- timestamp: z30.number(),
2142
- tags: z30.array(z30.string()).optional(),
2143
- scenarioId: z30.string(),
2144
- scenarioVersion: z30.number().optional(),
2145
- targetId: z30.string(),
2146
- targetVersion: z30.number().optional(),
2147
- suiteId: z30.string().optional(),
2148
- score: z30.number(),
2149
- time: z30.number().optional(),
2150
- microcentsSpent: z30.number().optional()
2124
+ testResults: z29.array(z29.unknown()),
2125
+ tags: z29.array(z29.string()).optional(),
2126
+ feedback: z29.string().optional(),
2127
+ score: z29.number(),
2128
+ suiteId: z29.string().optional()
2129
+ });
2130
+ var LeanEvaluationResultSchema = z29.object({
2131
+ id: z29.string(),
2132
+ runId: z29.string(),
2133
+ timestamp: z29.number(),
2134
+ tags: z29.array(z29.string()).optional(),
2135
+ scenarioId: z29.string(),
2136
+ scenarioVersion: z29.number().optional(),
2137
+ targetId: z29.string(),
2138
+ targetVersion: z29.number().optional(),
2139
+ suiteId: z29.string().optional(),
2140
+ score: z29.number(),
2141
+ time: z29.number().optional(),
2142
+ microcentsSpent: z29.number().optional()
2151
2143
  });
2152
2144
 
2153
2145
  // src/project/project.ts
2154
- import { z as z31 } from "zod";
2146
+ import { z as z30 } from "zod";
2155
2147
  var ProjectSchema = BaseEntitySchema.extend({
2156
- appId: z31.string().optional().describe("The ID of the app in Dev Center"),
2157
- appSecret: z31.string().optional().describe("The secret of the app in Dev Center"),
2158
- useWixAuth: z31.boolean().optional().describe("Enable Wix CLI/MCP auth for evaluations"),
2159
- useBase44Auth: z31.boolean().optional().describe("Enable Base44 auth for evaluations"),
2160
- scenarioTags: z31.array(z31.string()).optional().describe("Project-level tag vocabulary for scenarios")
2148
+ appId: z30.string().optional().describe("The ID of the app in Dev Center"),
2149
+ appSecret: z30.string().optional().describe("The secret of the app in Dev Center"),
2150
+ useWixAuth: z30.boolean().optional().describe("Enable Wix CLI/MCP auth for evaluations"),
2151
+ useBase44Auth: z30.boolean().optional().describe("Enable Base44 auth for evaluations"),
2152
+ scenarioTags: z30.array(z30.string()).optional().describe("Project-level tag vocabulary for scenarios")
2161
2153
  });
2162
2154
  var CreateProjectInputSchema = ProjectSchema.omit({
2163
2155
  id: true,
@@ -2210,7 +2202,7 @@ var SYSTEM_ASSERTIONS = {
2210
2202
  [SYSTEM_ASSERTION_IDS.TOOL_CALLED_WITH_PARAM]: {
2211
2203
  id: SYSTEM_ASSERTION_IDS.TOOL_CALLED_WITH_PARAM,
2212
2204
  name: "Tool Called With Param",
2213
- description: "Check that a tool was called with expected parameters",
2205
+ description: "Check that a tool was called with expected parameters (tool name is substring matched)",
2214
2206
  type: "tool_called_with_param",
2215
2207
  parameters: [
2216
2208
  {
@@ -2224,6 +2216,14 @@ var SYSTEM_ASSERTIONS = {
2224
2216
  label: "Expected Parameters (JSON, substring match)",
2225
2217
  type: "string",
2226
2218
  required: true
2219
+ },
2220
+ {
2221
+ name: "requireSuccess",
2222
+ label: "Require Successful Call",
2223
+ type: "boolean",
2224
+ required: false,
2225
+ defaultValue: false,
2226
+ advanced: true
2227
2227
  }
2228
2228
  ]
2229
2229
  },
@@ -2355,6 +2355,9 @@ export {
2355
2355
  BuildCheckTestSchema,
2356
2356
  BuildPassedAssertionSchema,
2357
2357
  BuildPassedConfigSchema,
2358
+ BulkImportResultItemSchema,
2359
+ BulkImportResultSchema,
2360
+ BulkImportSkillsInputSchema,
2358
2361
  ClaudeModel2 as ClaudeModel,
2359
2362
  ClaudeModelSchema,
2360
2363
  CommandExecutionSchema,