@wix/evalforge-types 0.53.0 → 0.54.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/build/index.mjs CHANGED
@@ -1415,85 +1415,34 @@ var TestSchema = z20.discriminatedUnion("type", [
1415
1415
  PlaywrightNLTestSchema
1416
1416
  ]);
1417
1417
 
1418
- // src/scenario/assertions.ts
1419
- import { z as z21 } from "zod";
1420
- var SkillWasCalledAssertionSchema = z21.object({
1421
- type: z21.literal("skill_was_called"),
1422
- /** Names of the skills that must have been called (matched against trace Skill tool args) */
1423
- skillNames: z21.array(z21.string().min(1)).min(1)
1424
- });
1425
- var ToolCalledWithParamAssertionSchema = z21.object({
1426
- type: z21.literal("tool_called_with_param"),
1427
- /** Name of the tool that must have been called */
1428
- toolName: z21.string().min(1),
1429
- /** JSON string of key-value pairs for expected parameters (substring match) */
1430
- expectedParams: z21.string().min(1)
1431
- });
1432
- var BuildPassedAssertionSchema = z21.object({
1433
- type: z21.literal("build_passed"),
1434
- /** Command to run (default: "yarn build") */
1435
- command: z21.string().optional(),
1436
- /** Expected exit code (default: 0) */
1437
- expectedExitCode: z21.number().int().optional()
1438
- });
1439
- var CostAssertionSchema = z21.object({
1440
- type: z21.literal("cost"),
1441
- /** Maximum allowed cost in USD */
1442
- maxCostUsd: z21.number().positive()
1443
- });
1444
- var LlmJudgeAssertionSchema = z21.object({
1445
- type: z21.literal("llm_judge"),
1446
- /** Prompt template; placeholders: {{output}}, {{cwd}}, {{changedFiles}}, {{trace}} */
1447
- prompt: z21.string(),
1448
- /** Minimum score to pass (0-10, default 7) */
1449
- minScore: z21.number().int().min(0).max(10).optional(),
1450
- /** Model for the judge (e.g. claude-3-5-haiku) */
1451
- model: z21.string().optional(),
1452
- maxTokens: z21.number().int().optional(),
1453
- temperature: z21.number().min(0).max(1).optional()
1454
- });
1455
- var TimeAssertionSchema = z21.object({
1456
- type: z21.literal("time_limit"),
1457
- /** Maximum allowed duration in milliseconds */
1458
- maxDurationMs: z21.number().int().positive()
1459
- });
1460
- var AssertionSchema = z21.union([
1461
- SkillWasCalledAssertionSchema,
1462
- ToolCalledWithParamAssertionSchema,
1463
- BuildPassedAssertionSchema,
1464
- TimeAssertionSchema,
1465
- CostAssertionSchema,
1466
- LlmJudgeAssertionSchema
1467
- ]);
1468
-
1469
1418
  // src/scenario/environment.ts
1470
- import { z as z22 } from "zod";
1471
- var LocalProjectConfigSchema = z22.object({
1419
+ import { z as z21 } from "zod";
1420
+ var LocalProjectConfigSchema = z21.object({
1472
1421
  /** Template ID to use for the local project */
1473
- templateId: z22.string().optional(),
1422
+ templateId: z21.string().optional(),
1474
1423
  /** Files to create in the project */
1475
- files: z22.array(
1476
- z22.object({
1477
- path: z22.string().min(1),
1478
- content: z22.string().min(1)
1424
+ files: z21.array(
1425
+ z21.object({
1426
+ path: z21.string().min(1),
1427
+ content: z21.string().min(1)
1479
1428
  })
1480
1429
  ).optional()
1481
1430
  });
1482
- var MetaSiteConfigSchema = z22.object({
1483
- configurations: z22.array(
1484
- z22.object({
1485
- name: z22.string().min(1),
1486
- apiCalls: z22.array(
1487
- z22.object({
1488
- url: z22.string().url(),
1489
- method: z22.enum(["POST", "PUT"]),
1490
- body: z22.string()
1431
+ var MetaSiteConfigSchema = z21.object({
1432
+ configurations: z21.array(
1433
+ z21.object({
1434
+ name: z21.string().min(1),
1435
+ apiCalls: z21.array(
1436
+ z21.object({
1437
+ url: z21.string().url(),
1438
+ method: z21.enum(["POST", "PUT"]),
1439
+ body: z21.string()
1491
1440
  })
1492
1441
  )
1493
1442
  })
1494
1443
  ).optional()
1495
1444
  });
1496
- var EnvironmentSchema = z22.object({
1445
+ var EnvironmentSchema = z21.object({
1497
1446
  /** Local project configuration */
1498
1447
  localProject: LocalProjectConfigSchema.optional(),
1499
1448
  /** Meta site configuration */
@@ -1501,11 +1450,11 @@ var EnvironmentSchema = z22.object({
1501
1450
  });
1502
1451
 
1503
1452
  // src/scenario/test-scenario.ts
1504
- import { z as z24 } from "zod";
1453
+ import { z as z23 } from "zod";
1505
1454
 
1506
1455
  // src/assertion/assertion.ts
1507
- import { z as z23 } from "zod";
1508
- var AssertionTypeSchema = z23.enum([
1456
+ import { z as z22 } from "zod";
1457
+ var AssertionTypeSchema = z22.enum([
1509
1458
  "skill_was_called",
1510
1459
  "tool_called_with_param",
1511
1460
  "build_passed",
@@ -1513,59 +1462,61 @@ var AssertionTypeSchema = z23.enum([
1513
1462
  "cost",
1514
1463
  "llm_judge"
1515
1464
  ]);
1516
- var AssertionParameterTypeSchema = z23.enum([
1465
+ var AssertionParameterTypeSchema = z22.enum([
1517
1466
  "string",
1518
1467
  "number",
1519
1468
  "boolean"
1520
1469
  ]);
1521
- var AssertionParameterSchema = z23.object({
1470
+ var AssertionParameterSchema = z22.object({
1522
1471
  /** Parameter name (used as key in params object) */
1523
- name: z23.string().min(1),
1472
+ name: z22.string().min(1),
1524
1473
  /** Display label for the parameter */
1525
- label: z23.string().min(1),
1474
+ label: z22.string().min(1),
1526
1475
  /** Parameter type */
1527
1476
  type: AssertionParameterTypeSchema,
1528
1477
  /** Whether this parameter is required */
1529
- required: z23.boolean(),
1478
+ required: z22.boolean(),
1530
1479
  /** Default value (optional, used when not provided) */
1531
- defaultValue: z23.union([z23.string(), z23.number(), z23.boolean()]).optional(),
1480
+ defaultValue: z22.union([z22.string(), z22.number(), z22.boolean()]).optional(),
1532
1481
  /** If true, parameter is hidden by default behind "Show advanced options" */
1533
- advanced: z23.boolean().optional()
1482
+ advanced: z22.boolean().optional()
1534
1483
  });
1535
- var ScenarioAssertionLinkSchema = z23.object({
1484
+ var ScenarioAssertionLinkSchema = z22.object({
1536
1485
  /** ID of the assertion (can be system assertion like 'system:skill_was_called' or custom assertion UUID) */
1537
- assertionId: z23.string(),
1486
+ assertionId: z22.string(),
1538
1487
  /** Parameter values for this assertion in this scenario */
1539
- params: z23.record(
1540
- z23.string(),
1541
- z23.union([z23.string(), z23.number(), z23.boolean(), z23.null()])
1488
+ params: z22.record(
1489
+ z22.string(),
1490
+ z22.union([z22.string(), z22.number(), z22.boolean(), z22.null()])
1542
1491
  ).optional()
1543
1492
  });
1544
- var SkillWasCalledConfigSchema = z23.object({
1493
+ var SkillWasCalledConfigSchema = z22.object({
1545
1494
  /** Names of the skills that must have been called */
1546
- skillNames: z23.array(z23.string().min(1)).min(1)
1495
+ skillNames: z22.array(z22.string().min(1)).min(1)
1547
1496
  });
1548
- var CostConfigSchema = z23.strictObject({
1497
+ var CostConfigSchema = z22.strictObject({
1549
1498
  /** Maximum allowed cost in USD */
1550
- maxCostUsd: z23.number().positive()
1499
+ maxCostUsd: z22.number().positive()
1551
1500
  });
1552
- var ToolCalledWithParamConfigSchema = z23.strictObject({
1501
+ var ToolCalledWithParamConfigSchema = z22.strictObject({
1553
1502
  /** Name of the tool that must have been called */
1554
- toolName: z23.string().min(1),
1503
+ toolName: z22.string().min(1),
1555
1504
  /** JSON string of key-value pairs for expected parameters (substring match) */
1556
- expectedParams: z23.string().min(1)
1505
+ expectedParams: z22.string().min(1),
1506
+ /** If true, the matching tool call must also have succeeded (step.success === true) */
1507
+ requireSuccess: z22.boolean().optional()
1557
1508
  });
1558
- var BuildPassedConfigSchema = z23.strictObject({
1509
+ var BuildPassedConfigSchema = z22.strictObject({
1559
1510
  /** Command to run (default: "yarn build") */
1560
- command: z23.string().optional(),
1511
+ command: z22.string().optional(),
1561
1512
  /** Expected exit code (default: 0) */
1562
- expectedExitCode: z23.number().int().optional()
1513
+ expectedExitCode: z22.number().int().optional()
1563
1514
  });
1564
- var TimeConfigSchema = z23.strictObject({
1515
+ var TimeConfigSchema = z22.strictObject({
1565
1516
  /** Maximum allowed duration in milliseconds */
1566
- maxDurationMs: z23.number().int().positive()
1517
+ maxDurationMs: z22.number().int().positive()
1567
1518
  });
1568
- var LlmJudgeConfigSchema = z23.object({
1519
+ var LlmJudgeConfigSchema = z22.object({
1569
1520
  /**
1570
1521
  * Prompt template with placeholders:
1571
1522
  * - {{output}}: agent's final output
@@ -1576,19 +1527,45 @@ var LlmJudgeConfigSchema = z23.object({
1576
1527
  * - {{trace}}: step-by-step trace of tool calls
1577
1528
  * - Custom parameters defined in the parameters array
1578
1529
  */
1579
- prompt: z23.string().min(1),
1530
+ prompt: z22.string().min(1),
1580
1531
  /** Minimum score to pass (0-10, default 7) */
1581
- minScore: z23.number().int().min(0).max(10).optional(),
1532
+ minScore: z22.number().int().min(0).max(10).optional(),
1582
1533
  /** Model for the judge (e.g. claude-3-5-haiku-20241022) */
1583
- model: z23.string().optional(),
1534
+ model: z22.string().optional(),
1584
1535
  /** Max output tokens */
1585
- maxTokens: z23.number().int().optional(),
1536
+ maxTokens: z22.number().int().optional(),
1586
1537
  /** Temperature (0-1) */
1587
- temperature: z23.number().min(0).max(1).optional(),
1538
+ temperature: z22.number().min(0).max(1).optional(),
1588
1539
  /** User-defined parameters for this assertion */
1589
- parameters: z23.array(AssertionParameterSchema).optional()
1540
+ parameters: z22.array(AssertionParameterSchema).optional()
1541
+ });
1542
+ var SkillWasCalledAssertionSchema = SkillWasCalledConfigSchema.extend({
1543
+ type: z22.literal("skill_was_called")
1544
+ });
1545
+ var ToolCalledWithParamAssertionSchema = ToolCalledWithParamConfigSchema.extend({
1546
+ type: z22.literal("tool_called_with_param")
1547
+ });
1548
+ var BuildPassedAssertionSchema = BuildPassedConfigSchema.extend({
1549
+ type: z22.literal("build_passed")
1550
+ });
1551
+ var CostAssertionSchema = CostConfigSchema.extend({
1552
+ type: z22.literal("cost")
1553
+ });
1554
+ var LlmJudgeAssertionSchema = LlmJudgeConfigSchema.extend({
1555
+ type: z22.literal("llm_judge")
1590
1556
  });
1591
- var AssertionConfigSchema = z23.union([
1557
+ var TimeAssertionSchema = TimeConfigSchema.extend({
1558
+ type: z22.literal("time_limit")
1559
+ });
1560
+ var AssertionSchema = z22.union([
1561
+ SkillWasCalledAssertionSchema,
1562
+ ToolCalledWithParamAssertionSchema,
1563
+ BuildPassedAssertionSchema,
1564
+ TimeAssertionSchema,
1565
+ CostAssertionSchema,
1566
+ LlmJudgeAssertionSchema
1567
+ ]);
1568
+ var AssertionConfigSchema = z22.union([
1592
1569
  LlmJudgeConfigSchema,
1593
1570
  // requires prompt - check first
1594
1571
  SkillWasCalledConfigSchema,
@@ -1601,7 +1578,7 @@ var AssertionConfigSchema = z23.union([
1601
1578
  // requires maxCostUsd, uses strictObject
1602
1579
  BuildPassedConfigSchema,
1603
1580
  // all optional, uses strictObject to reject unknown keys
1604
- z23.object({})
1581
+ z22.object({})
1605
1582
  // fallback empty config
1606
1583
  ]);
1607
1584
  var CustomAssertionSchema = TenantEntitySchema.extend({
@@ -1652,25 +1629,25 @@ function getLlmJudgeConfig(assertion) {
1652
1629
  }
1653
1630
 
1654
1631
  // src/scenario/test-scenario.ts
1655
- var ExpectedFileSchema = z24.object({
1632
+ var ExpectedFileSchema = z23.object({
1656
1633
  /** Relative path where the file should be created */
1657
- path: z24.string(),
1634
+ path: z23.string(),
1658
1635
  /** Optional expected content */
1659
- content: z24.string().optional()
1636
+ content: z23.string().optional()
1660
1637
  });
1661
1638
  var TestScenarioSchema = TenantEntitySchema.extend({
1662
1639
  /** The prompt sent to the agent to trigger the task */
1663
- triggerPrompt: z24.string().min(10),
1640
+ triggerPrompt: z23.string().min(10),
1664
1641
  /** ID of the template to use for this scenario (null = no template) */
1665
- templateId: z24.string().nullish(),
1642
+ templateId: z23.string().nullish(),
1666
1643
  /** Inline assertions to evaluate for this scenario (legacy) */
1667
- assertions: z24.array(AssertionSchema).optional(),
1644
+ assertions: z23.array(AssertionSchema).optional(),
1668
1645
  /** IDs of saved assertions to evaluate (from assertions table) - legacy, use assertionLinks */
1669
- assertionIds: z24.array(z24.string()).optional(),
1646
+ assertionIds: z23.array(z23.string()).optional(),
1670
1647
  /** Linked assertions with per-scenario parameter values */
1671
- assertionLinks: z24.array(ScenarioAssertionLinkSchema).optional(),
1648
+ assertionLinks: z23.array(ScenarioAssertionLinkSchema).optional(),
1672
1649
  /** Tags for categorisation and filtering */
1673
- tags: z24.array(z24.string()).optional()
1650
+ tags: z23.array(z23.string()).optional()
1674
1651
  });
1675
1652
  var CreateTestScenarioInputSchema = TestScenarioSchema.omit({
1676
1653
  id: true,
@@ -1681,10 +1658,10 @@ var CreateTestScenarioInputSchema = TestScenarioSchema.omit({
1681
1658
  var UpdateTestScenarioInputSchema = CreateTestScenarioInputSchema.partial();
1682
1659
 
1683
1660
  // src/suite/test-suite.ts
1684
- import { z as z25 } from "zod";
1661
+ import { z as z24 } from "zod";
1685
1662
  var TestSuiteSchema = TenantEntitySchema.extend({
1686
1663
  /** IDs of test scenarios in this suite */
1687
- scenarioIds: z25.array(z25.string())
1664
+ scenarioIds: z24.array(z24.string())
1688
1665
  });
1689
1666
  var CreateTestSuiteInputSchema = TestSuiteSchema.omit({
1690
1667
  id: true,
@@ -1695,21 +1672,21 @@ var CreateTestSuiteInputSchema = TestSuiteSchema.omit({
1695
1672
  var UpdateTestSuiteInputSchema = CreateTestSuiteInputSchema.partial();
1696
1673
 
1697
1674
  // src/evaluation/metrics.ts
1698
- import { z as z26 } from "zod";
1699
- var TokenUsageSchema = z26.object({
1700
- prompt: z26.number(),
1701
- completion: z26.number(),
1702
- total: z26.number()
1703
- });
1704
- var EvalMetricsSchema = z26.object({
1705
- totalAssertions: z26.number(),
1706
- passed: z26.number(),
1707
- failed: z26.number(),
1708
- skipped: z26.number(),
1709
- errors: z26.number(),
1710
- passRate: z26.number(),
1711
- avgDuration: z26.number(),
1712
- totalDuration: z26.number()
1675
+ import { z as z25 } from "zod";
1676
+ var TokenUsageSchema = z25.object({
1677
+ prompt: z25.number(),
1678
+ completion: z25.number(),
1679
+ total: z25.number()
1680
+ });
1681
+ var EvalMetricsSchema = z25.object({
1682
+ totalAssertions: z25.number(),
1683
+ passed: z25.number(),
1684
+ failed: z25.number(),
1685
+ skipped: z25.number(),
1686
+ errors: z25.number(),
1687
+ passRate: z25.number(),
1688
+ avgDuration: z25.number(),
1689
+ totalDuration: z25.number()
1713
1690
  });
1714
1691
  var EvalStatus = /* @__PURE__ */ ((EvalStatus2) => {
1715
1692
  EvalStatus2["PENDING"] = "pending";
@@ -1719,7 +1696,7 @@ var EvalStatus = /* @__PURE__ */ ((EvalStatus2) => {
1719
1696
  EvalStatus2["CANCELLED"] = "cancelled";
1720
1697
  return EvalStatus2;
1721
1698
  })(EvalStatus || {});
1722
- var EvalStatusSchema = z26.enum(EvalStatus);
1699
+ var EvalStatusSchema = z25.enum(EvalStatus);
1723
1700
  var LLMStepType = /* @__PURE__ */ ((LLMStepType2) => {
1724
1701
  LLMStepType2["COMPLETION"] = "completion";
1725
1702
  LLMStepType2["TOOL_USE"] = "tool_use";
@@ -1727,52 +1704,52 @@ var LLMStepType = /* @__PURE__ */ ((LLMStepType2) => {
1727
1704
  LLMStepType2["THINKING"] = "thinking";
1728
1705
  return LLMStepType2;
1729
1706
  })(LLMStepType || {});
1730
- var LLMTraceStepSchema = z26.object({
1731
- id: z26.string(),
1732
- stepNumber: z26.number(),
1733
- type: z26.enum(LLMStepType),
1734
- model: z26.string(),
1735
- provider: z26.string(),
1736
- startedAt: z26.string(),
1737
- durationMs: z26.number(),
1707
+ var LLMTraceStepSchema = z25.object({
1708
+ id: z25.string(),
1709
+ stepNumber: z25.number(),
1710
+ type: z25.enum(LLMStepType),
1711
+ model: z25.string(),
1712
+ provider: z25.string(),
1713
+ startedAt: z25.string(),
1714
+ durationMs: z25.number(),
1738
1715
  tokenUsage: TokenUsageSchema,
1739
- costUsd: z26.number(),
1740
- toolName: z26.string().optional(),
1741
- toolArguments: z26.string().optional(),
1742
- inputPreview: z26.string().optional(),
1743
- outputPreview: z26.string().optional(),
1744
- success: z26.boolean(),
1745
- error: z26.string().optional()
1746
- });
1747
- var LLMBreakdownStatsSchema = z26.object({
1748
- count: z26.number(),
1749
- durationMs: z26.number(),
1750
- tokens: z26.number(),
1751
- costUsd: z26.number()
1752
- });
1753
- var LLMTraceSummarySchema = z26.object({
1754
- totalSteps: z26.number(),
1755
- totalDurationMs: z26.number(),
1716
+ costUsd: z25.number(),
1717
+ toolName: z25.string().optional(),
1718
+ toolArguments: z25.string().optional(),
1719
+ inputPreview: z25.string().optional(),
1720
+ outputPreview: z25.string().optional(),
1721
+ success: z25.boolean(),
1722
+ error: z25.string().optional()
1723
+ });
1724
+ var LLMBreakdownStatsSchema = z25.object({
1725
+ count: z25.number(),
1726
+ durationMs: z25.number(),
1727
+ tokens: z25.number(),
1728
+ costUsd: z25.number()
1729
+ });
1730
+ var LLMTraceSummarySchema = z25.object({
1731
+ totalSteps: z25.number(),
1732
+ totalDurationMs: z25.number(),
1756
1733
  totalTokens: TokenUsageSchema,
1757
- totalCostUsd: z26.number(),
1758
- stepTypeBreakdown: z26.record(z26.string(), LLMBreakdownStatsSchema).optional(),
1759
- modelBreakdown: z26.record(z26.string(), LLMBreakdownStatsSchema),
1760
- modelsUsed: z26.array(z26.string())
1761
- });
1762
- var LLMTraceSchema = z26.object({
1763
- id: z26.string(),
1764
- steps: z26.array(LLMTraceStepSchema),
1734
+ totalCostUsd: z25.number(),
1735
+ stepTypeBreakdown: z25.record(z25.string(), LLMBreakdownStatsSchema).optional(),
1736
+ modelBreakdown: z25.record(z25.string(), LLMBreakdownStatsSchema),
1737
+ modelsUsed: z25.array(z25.string())
1738
+ });
1739
+ var LLMTraceSchema = z25.object({
1740
+ id: z25.string(),
1741
+ steps: z25.array(LLMTraceStepSchema),
1765
1742
  summary: LLMTraceSummarySchema
1766
1743
  });
1767
1744
 
1768
1745
  // src/evaluation/eval-result.ts
1769
- import { z as z30 } from "zod";
1746
+ import { z as z29 } from "zod";
1770
1747
 
1771
1748
  // src/evaluation/eval-run.ts
1772
- import { z as z28 } from "zod";
1749
+ import { z as z27 } from "zod";
1773
1750
 
1774
1751
  // src/evaluation/live-trace.ts
1775
- import { z as z27 } from "zod";
1752
+ import { z as z26 } from "zod";
1776
1753
  var LiveTraceEventType = /* @__PURE__ */ ((LiveTraceEventType2) => {
1777
1754
  LiveTraceEventType2["THINKING"] = "thinking";
1778
1755
  LiveTraceEventType2["TOOL_USE"] = "tool_use";
@@ -1786,37 +1763,37 @@ var LiveTraceEventType = /* @__PURE__ */ ((LiveTraceEventType2) => {
1786
1763
  LiveTraceEventType2["USER"] = "user";
1787
1764
  return LiveTraceEventType2;
1788
1765
  })(LiveTraceEventType || {});
1789
- var LiveTraceEventSchema = z27.object({
1766
+ var LiveTraceEventSchema = z26.object({
1790
1767
  /** The evaluation run ID */
1791
- evalRunId: z27.string(),
1768
+ evalRunId: z26.string(),
1792
1769
  /** The scenario ID being executed */
1793
- scenarioId: z27.string(),
1770
+ scenarioId: z26.string(),
1794
1771
  /** The scenario name for display */
1795
- scenarioName: z27.string(),
1772
+ scenarioName: z26.string(),
1796
1773
  /** The target ID (skill, agent, etc.) */
1797
- targetId: z27.string(),
1774
+ targetId: z26.string(),
1798
1775
  /** The target name for display */
1799
- targetName: z27.string(),
1776
+ targetName: z26.string(),
1800
1777
  /** Step number in the current scenario execution */
1801
- stepNumber: z27.number(),
1778
+ stepNumber: z26.number(),
1802
1779
  /** Type of trace event */
1803
- type: z27.enum(LiveTraceEventType),
1780
+ type: z26.enum(LiveTraceEventType),
1804
1781
  /** Tool name if this is a tool_use event */
1805
- toolName: z27.string().optional(),
1782
+ toolName: z26.string().optional(),
1806
1783
  /** Tool arguments preview (truncated JSON) */
1807
- toolArgs: z27.string().optional(),
1784
+ toolArgs: z26.string().optional(),
1808
1785
  /** Output preview (truncated text) */
1809
- outputPreview: z27.string().optional(),
1786
+ outputPreview: z26.string().optional(),
1810
1787
  /** File path for file operations */
1811
- filePath: z27.string().optional(),
1788
+ filePath: z26.string().optional(),
1812
1789
  /** Elapsed time in milliseconds for progress events */
1813
- elapsedMs: z27.number().optional(),
1790
+ elapsedMs: z26.number().optional(),
1814
1791
  /** Thinking/reasoning text from Claude */
1815
- thinking: z27.string().optional(),
1792
+ thinking: z26.string().optional(),
1816
1793
  /** Timestamp when this event occurred */
1817
- timestamp: z27.string(),
1794
+ timestamp: z26.string(),
1818
1795
  /** Whether this is the final event for this scenario */
1819
- isComplete: z27.boolean()
1796
+ isComplete: z26.boolean()
1820
1797
  });
1821
1798
  var TRACE_EVENT_PREFIX = "TRACE_EVENT:";
1822
1799
  function parseTraceEventLine(line) {
@@ -1844,14 +1821,14 @@ var TriggerType = /* @__PURE__ */ ((TriggerType2) => {
1844
1821
  TriggerType2["MANUAL"] = "MANUAL";
1845
1822
  return TriggerType2;
1846
1823
  })(TriggerType || {});
1847
- var TriggerMetadataSchema = z28.object({
1848
- version: z28.string().optional(),
1849
- resourceUpdated: z28.array(z28.string()).optional()
1824
+ var TriggerMetadataSchema = z27.object({
1825
+ version: z27.string().optional(),
1826
+ resourceUpdated: z27.array(z27.string()).optional()
1850
1827
  });
1851
- var TriggerSchema = z28.object({
1852
- id: z28.string(),
1828
+ var TriggerSchema = z27.object({
1829
+ id: z27.string(),
1853
1830
  metadata: TriggerMetadataSchema.optional(),
1854
- type: z28.enum(TriggerType)
1831
+ type: z27.enum(TriggerType)
1855
1832
  });
1856
1833
  var FailureCategory = /* @__PURE__ */ ((FailureCategory2) => {
1857
1834
  FailureCategory2["MISSING_FILE"] = "missing_file";
@@ -1869,28 +1846,28 @@ var FailureSeverity = /* @__PURE__ */ ((FailureSeverity2) => {
1869
1846
  FailureSeverity2["LOW"] = "low";
1870
1847
  return FailureSeverity2;
1871
1848
  })(FailureSeverity || {});
1872
- var DiffLineTypeSchema = z28.enum(["added", "removed", "unchanged"]);
1873
- var DiffLineSchema = z28.object({
1849
+ var DiffLineTypeSchema = z27.enum(["added", "removed", "unchanged"]);
1850
+ var DiffLineSchema = z27.object({
1874
1851
  type: DiffLineTypeSchema,
1875
- content: z28.string(),
1876
- lineNumber: z28.number()
1877
- });
1878
- var DiffContentSchema = z28.object({
1879
- path: z28.string(),
1880
- expected: z28.string(),
1881
- actual: z28.string(),
1882
- diffLines: z28.array(DiffLineSchema),
1883
- renamedFrom: z28.string().optional()
1884
- });
1885
- var CommandExecutionSchema = z28.object({
1886
- command: z28.string(),
1887
- exitCode: z28.number(),
1888
- output: z28.string().optional(),
1889
- duration: z28.number()
1890
- });
1891
- var FileModificationSchema = z28.object({
1892
- path: z28.string(),
1893
- action: z28.enum(["created", "modified", "deleted"])
1852
+ content: z27.string(),
1853
+ lineNumber: z27.number()
1854
+ });
1855
+ var DiffContentSchema = z27.object({
1856
+ path: z27.string(),
1857
+ expected: z27.string(),
1858
+ actual: z27.string(),
1859
+ diffLines: z27.array(DiffLineSchema),
1860
+ renamedFrom: z27.string().optional()
1861
+ });
1862
+ var CommandExecutionSchema = z27.object({
1863
+ command: z27.string(),
1864
+ exitCode: z27.number(),
1865
+ output: z27.string().optional(),
1866
+ duration: z27.number()
1867
+ });
1868
+ var FileModificationSchema = z27.object({
1869
+ path: z27.string(),
1870
+ action: z27.enum(["created", "modified", "deleted"])
1894
1871
  });
1895
1872
  var TemplateFileStatus = /* @__PURE__ */ ((TemplateFileStatus2) => {
1896
1873
  TemplateFileStatus2["NEW"] = "new";
@@ -1898,87 +1875,87 @@ var TemplateFileStatus = /* @__PURE__ */ ((TemplateFileStatus2) => {
1898
1875
  TemplateFileStatus2["UNCHANGED"] = "unchanged";
1899
1876
  return TemplateFileStatus2;
1900
1877
  })(TemplateFileStatus || {});
1901
- var TemplateFileSchema = z28.object({
1878
+ var TemplateFileSchema = z27.object({
1902
1879
  /** Relative path within the template */
1903
- path: z28.string(),
1880
+ path: z27.string(),
1904
1881
  /** Full file content after execution */
1905
- content: z28.string(),
1882
+ content: z27.string(),
1906
1883
  /** File status (new, modified, unchanged) */
1907
- status: z28.enum(["new", "modified", "unchanged"])
1908
- });
1909
- var ApiCallSchema = z28.object({
1910
- endpoint: z28.string(),
1911
- tokensUsed: z28.number(),
1912
- duration: z28.number()
1913
- });
1914
- var ExecutionTraceSchema = z28.object({
1915
- commands: z28.array(CommandExecutionSchema),
1916
- filesModified: z28.array(FileModificationSchema),
1917
- apiCalls: z28.array(ApiCallSchema),
1918
- totalDuration: z28.number()
1919
- });
1920
- var FailureAnalysisSchema = z28.object({
1921
- category: z28.enum(FailureCategory),
1922
- severity: z28.enum(FailureSeverity),
1923
- summary: z28.string(),
1924
- details: z28.string(),
1925
- rootCause: z28.string(),
1926
- suggestedFix: z28.string(),
1927
- relatedAssertions: z28.array(z28.string()),
1928
- codeSnippet: z28.string().optional(),
1929
- similarIssues: z28.array(z28.string()).optional(),
1930
- patternId: z28.string().optional(),
1884
+ status: z27.enum(["new", "modified", "unchanged"])
1885
+ });
1886
+ var ApiCallSchema = z27.object({
1887
+ endpoint: z27.string(),
1888
+ tokensUsed: z27.number(),
1889
+ duration: z27.number()
1890
+ });
1891
+ var ExecutionTraceSchema = z27.object({
1892
+ commands: z27.array(CommandExecutionSchema),
1893
+ filesModified: z27.array(FileModificationSchema),
1894
+ apiCalls: z27.array(ApiCallSchema),
1895
+ totalDuration: z27.number()
1896
+ });
1897
+ var FailureAnalysisSchema = z27.object({
1898
+ category: z27.enum(FailureCategory),
1899
+ severity: z27.enum(FailureSeverity),
1900
+ summary: z27.string(),
1901
+ details: z27.string(),
1902
+ rootCause: z27.string(),
1903
+ suggestedFix: z27.string(),
1904
+ relatedAssertions: z27.array(z27.string()),
1905
+ codeSnippet: z27.string().optional(),
1906
+ similarIssues: z27.array(z27.string()).optional(),
1907
+ patternId: z27.string().optional(),
1931
1908
  // Extended fields for detailed debugging
1932
1909
  diff: DiffContentSchema.optional(),
1933
1910
  executionTrace: ExecutionTraceSchema.optional()
1934
1911
  });
1935
1912
  var EvalRunSchema = TenantEntitySchema.extend({
1936
1913
  /** Agent ID for this run */
1937
- agentId: z28.string().optional(),
1914
+ agentId: z27.string().optional(),
1938
1915
  /** Preset ID that originated this run (optional) */
1939
- presetId: z28.string().optional(),
1916
+ presetId: z27.string().optional(),
1940
1917
  /** Skill IDs for this run */
1941
- skillIds: z28.array(z28.string()).optional(),
1918
+ skillIds: z27.array(z27.string()).optional(),
1942
1919
  /** Map of skillId to skillVersionId for this run */
1943
- skillVersions: z28.record(z28.string(), z28.string()).optional(),
1920
+ skillVersions: z27.record(z27.string(), z27.string()).optional(),
1944
1921
  /** Scenario IDs to run (always present — resolved server-side from tags when needed) */
1945
- scenarioIds: z28.array(z28.string()),
1922
+ scenarioIds: z27.array(z27.string()),
1946
1923
  /** Current status */
1947
1924
  status: EvalStatusSchema,
1948
1925
  /** Progress percentage (0-100) */
1949
- progress: z28.number(),
1926
+ progress: z27.number(),
1950
1927
  /** Results for each scenario/target combination (lazy to break eval-result ↔ eval-run cycle) */
1951
- results: z28.array(z28.lazy(() => EvalRunResultSchema)),
1928
+ results: z27.array(z27.lazy(() => EvalRunResultSchema)),
1952
1929
  /** Aggregated metrics across all results */
1953
1930
  aggregateMetrics: EvalMetricsSchema,
1954
1931
  /** Failure analyses */
1955
- failureAnalyses: z28.array(FailureAnalysisSchema).optional(),
1932
+ failureAnalyses: z27.array(FailureAnalysisSchema).optional(),
1956
1933
  /** Aggregated LLM trace summary */
1957
1934
  llmTraceSummary: LLMTraceSummarySchema.optional(),
1958
1935
  /** What triggered this run */
1959
1936
  trigger: TriggerSchema.optional(),
1960
1937
  /** When the run started (set when evaluation is triggered) */
1961
- startedAt: z28.string().optional(),
1938
+ startedAt: z27.string().optional(),
1962
1939
  /** When the run completed */
1963
- completedAt: z28.string().optional(),
1940
+ completedAt: z27.string().optional(),
1964
1941
  /** Live trace events captured during execution (for playback on results page) */
1965
- liveTraceEvents: z28.array(LiveTraceEventSchema).optional(),
1942
+ liveTraceEvents: z27.array(LiveTraceEventSchema).optional(),
1966
1943
  /** Remote job ID for tracking execution in Dev Machines */
1967
- jobId: z28.string().optional(),
1944
+ jobId: z27.string().optional(),
1968
1945
  /** Remote job status from the Dev Machine API (PENDING, RUNNING, COMPLETED, FAILED, CANCELLED) */
1969
- jobStatus: z28.string().optional(),
1946
+ jobStatus: z27.string().optional(),
1970
1947
  /** Remote job error message if the job failed */
1971
- jobError: z28.string().optional(),
1948
+ jobError: z27.string().optional(),
1972
1949
  /** Timestamp of the last job status check */
1973
- jobStatusCheckedAt: z28.string().optional(),
1950
+ jobStatusCheckedAt: z27.string().optional(),
1974
1951
  /** MCP server IDs to enable for this run (optional) */
1975
- mcpIds: z28.array(z28.string()).optional(),
1952
+ mcpIds: z27.array(z27.string()).optional(),
1976
1953
  /** Sub-agent IDs to enable for this run (optional) */
1977
- subAgentIds: z28.array(z28.string()).optional(),
1954
+ subAgentIds: z27.array(z27.string()).optional(),
1978
1955
  /** Rule IDs to enable for this run (optional) */
1979
- ruleIds: z28.array(z28.string()).optional(),
1956
+ ruleIds: z27.array(z27.string()).optional(),
1980
1957
  /** Tags used to select scenarios for this run (for traceability) */
1981
- tags: z28.array(z28.string()).optional()
1958
+ tags: z27.array(z27.string()).optional()
1982
1959
  });
1983
1960
  var CreateEvalRunInputSchema = EvalRunSchema.omit({
1984
1961
  id: true,
@@ -1993,60 +1970,60 @@ var CreateEvalRunInputSchema = EvalRunSchema.omit({
1993
1970
  scenarioIds: true
1994
1971
  }).extend({
1995
1972
  /** Optional on input — backend resolves from tags when not provided */
1996
- scenarioIds: z28.array(z28.string()).optional()
1973
+ scenarioIds: z27.array(z27.string()).optional()
1997
1974
  }).refine(
1998
1975
  (data) => data.scenarioIds && data.scenarioIds.length > 0 || data.tags && data.tags.length > 0,
1999
1976
  { message: "Either scenarioIds or tags must be provided" }
2000
1977
  );
2001
- var EvaluationProgressSchema = z28.object({
2002
- runId: z28.string(),
2003
- targetId: z28.string(),
2004
- totalScenarios: z28.number(),
2005
- completedScenarios: z28.number(),
2006
- scenarioProgress: z28.array(
2007
- z28.object({
2008
- scenarioId: z28.string(),
2009
- currentStep: z28.string(),
2010
- error: z28.string().optional()
1978
+ var EvaluationProgressSchema = z27.object({
1979
+ runId: z27.string(),
1980
+ targetId: z27.string(),
1981
+ totalScenarios: z27.number(),
1982
+ completedScenarios: z27.number(),
1983
+ scenarioProgress: z27.array(
1984
+ z27.object({
1985
+ scenarioId: z27.string(),
1986
+ currentStep: z27.string(),
1987
+ error: z27.string().optional()
2011
1988
  })
2012
1989
  ),
2013
- createdAt: z28.number()
2014
- });
2015
- var EvaluationLogSchema = z28.object({
2016
- runId: z28.string(),
2017
- scenarioId: z28.string(),
2018
- log: z28.object({
2019
- level: z28.enum(["info", "error", "debug"]),
2020
- message: z28.string().optional(),
2021
- args: z28.array(z28.any()).optional(),
2022
- error: z28.string().optional()
1990
+ createdAt: z27.number()
1991
+ });
1992
+ var EvaluationLogSchema = z27.object({
1993
+ runId: z27.string(),
1994
+ scenarioId: z27.string(),
1995
+ log: z27.object({
1996
+ level: z27.enum(["info", "error", "debug"]),
1997
+ message: z27.string().optional(),
1998
+ args: z27.array(z27.any()).optional(),
1999
+ error: z27.string().optional()
2023
2000
  })
2024
2001
  });
2025
2002
  var LLM_TIMEOUT = 12e4;
2026
2003
 
2027
2004
  // src/evaluation/conversation.ts
2028
- import { z as z29 } from "zod";
2029
- var TextBlockSchema = z29.object({
2030
- type: z29.literal("text"),
2031
- text: z29.string()
2032
- });
2033
- var ThinkingBlockSchema = z29.object({
2034
- type: z29.literal("thinking"),
2035
- thinking: z29.string()
2036
- });
2037
- var ToolUseBlockSchema = z29.object({
2038
- type: z29.literal("tool_use"),
2039
- toolName: z29.string(),
2040
- toolId: z29.string(),
2041
- input: z29.unknown()
2042
- });
2043
- var ToolResultBlockSchema = z29.object({
2044
- type: z29.literal("tool_result"),
2045
- toolUseId: z29.string(),
2046
- content: z29.string(),
2047
- isError: z29.boolean().optional()
2048
- });
2049
- var ConversationBlockSchema = z29.discriminatedUnion("type", [
2005
+ import { z as z28 } from "zod";
2006
+ var TextBlockSchema = z28.object({
2007
+ type: z28.literal("text"),
2008
+ text: z28.string()
2009
+ });
2010
+ var ThinkingBlockSchema = z28.object({
2011
+ type: z28.literal("thinking"),
2012
+ thinking: z28.string()
2013
+ });
2014
+ var ToolUseBlockSchema = z28.object({
2015
+ type: z28.literal("tool_use"),
2016
+ toolName: z28.string(),
2017
+ toolId: z28.string(),
2018
+ input: z28.unknown()
2019
+ });
2020
+ var ToolResultBlockSchema = z28.object({
2021
+ type: z28.literal("tool_result"),
2022
+ toolUseId: z28.string(),
2023
+ content: z28.string(),
2024
+ isError: z28.boolean().optional()
2025
+ });
2026
+ var ConversationBlockSchema = z28.discriminatedUnion("type", [
2050
2027
  TextBlockSchema,
2051
2028
  ThinkingBlockSchema,
2052
2029
  ToolUseBlockSchema,
@@ -2057,18 +2034,18 @@ var ConversationMessageRoles = [
2057
2034
  "user",
2058
2035
  "system"
2059
2036
  ];
2060
- var ConversationMessageSchema = z29.object({
2061
- role: z29.enum(ConversationMessageRoles),
2062
- content: z29.array(ConversationBlockSchema),
2063
- timestamp: z29.string()
2037
+ var ConversationMessageSchema = z28.object({
2038
+ role: z28.enum(ConversationMessageRoles),
2039
+ content: z28.array(ConversationBlockSchema),
2040
+ timestamp: z28.string()
2064
2041
  });
2065
- var ScenarioConversationSchema = z29.object({
2066
- id: z29.string(),
2067
- projectId: z29.string(),
2068
- evalRunId: z29.string(),
2069
- resultId: z29.string(),
2070
- messages: z29.array(ConversationMessageSchema),
2071
- createdAt: z29.string()
2042
+ var ScenarioConversationSchema = z28.object({
2043
+ id: z28.string(),
2044
+ projectId: z28.string(),
2045
+ evalRunId: z28.string(),
2046
+ resultId: z28.string(),
2047
+ messages: z28.array(ConversationMessageSchema),
2048
+ createdAt: z28.string()
2072
2049
  });
2073
2050
 
2074
2051
  // src/evaluation/eval-result.ts
@@ -2079,100 +2056,100 @@ var AssertionResultStatus = /* @__PURE__ */ ((AssertionResultStatus2) => {
2079
2056
  AssertionResultStatus2["ERROR"] = "error";
2080
2057
  return AssertionResultStatus2;
2081
2058
  })(AssertionResultStatus || {});
2082
- var AssertionResultSchema = z30.object({
2083
- id: z30.string(),
2084
- assertionId: z30.string(),
2085
- assertionType: z30.string(),
2086
- assertionName: z30.string(),
2087
- status: z30.enum(AssertionResultStatus),
2088
- message: z30.string().optional(),
2089
- expected: z30.string().optional(),
2090
- actual: z30.string().optional(),
2091
- duration: z30.number().optional(),
2092
- details: z30.record(z30.string(), z30.unknown()).optional(),
2093
- llmTraceSteps: z30.array(LLMTraceStepSchema).optional()
2094
- });
2095
- var EvalRunResultSchema = z30.object({
2096
- id: z30.string(),
2097
- targetId: z30.string(),
2098
- targetName: z30.string().optional(),
2059
+ var AssertionResultSchema = z29.object({
2060
+ id: z29.string(),
2061
+ assertionId: z29.string(),
2062
+ assertionType: z29.string(),
2063
+ assertionName: z29.string(),
2064
+ status: z29.enum(AssertionResultStatus),
2065
+ message: z29.string().optional(),
2066
+ expected: z29.string().optional(),
2067
+ actual: z29.string().optional(),
2068
+ duration: z29.number().optional(),
2069
+ details: z29.record(z29.string(), z29.unknown()).optional(),
2070
+ llmTraceSteps: z29.array(LLMTraceStepSchema).optional()
2071
+ });
2072
+ var EvalRunResultSchema = z29.object({
2073
+ id: z29.string(),
2074
+ targetId: z29.string(),
2075
+ targetName: z29.string().optional(),
2099
2076
  /** SkillVersion ID used for this evaluation (for version tracking) */
2100
- skillVersionId: z30.string().optional(),
2077
+ skillVersionId: z29.string().optional(),
2101
2078
  /** SkillVersion semver string (e.g., "1.0.0", "1.2.3") for display */
2102
- skillVersion: z30.string().optional(),
2103
- scenarioId: z30.string(),
2104
- scenarioName: z30.string(),
2079
+ skillVersion: z29.string().optional(),
2080
+ scenarioId: z29.string(),
2081
+ scenarioName: z29.string(),
2105
2082
  modelConfig: ModelConfigSchema.optional(),
2106
- assertionResults: z30.array(AssertionResultSchema),
2083
+ assertionResults: z29.array(AssertionResultSchema),
2107
2084
  metrics: EvalMetricsSchema.optional(),
2108
- passed: z30.number(),
2109
- failed: z30.number(),
2110
- passRate: z30.number(),
2111
- duration: z30.number(),
2112
- outputText: z30.string().optional(),
2113
- files: z30.array(ExpectedFileSchema).optional(),
2114
- fileDiffs: z30.array(DiffContentSchema).optional(),
2085
+ passed: z29.number(),
2086
+ failed: z29.number(),
2087
+ passRate: z29.number(),
2088
+ duration: z29.number(),
2089
+ outputText: z29.string().optional(),
2090
+ files: z29.array(ExpectedFileSchema).optional(),
2091
+ fileDiffs: z29.array(DiffContentSchema).optional(),
2115
2092
  /** Full template files after execution with status indicators */
2116
- templateFiles: z30.array(TemplateFileSchema).optional(),
2117
- startedAt: z30.string().optional(),
2118
- completedAt: z30.string().optional(),
2093
+ templateFiles: z29.array(TemplateFileSchema).optional(),
2094
+ startedAt: z29.string().optional(),
2095
+ completedAt: z29.string().optional(),
2119
2096
  llmTrace: LLMTraceSchema.optional(),
2120
2097
  /** Full conversation messages (only present in transit; stripped before DB storage) */
2121
- conversation: z30.array(ConversationMessageSchema).optional()
2122
- });
2123
- var PromptResultSchema = z30.object({
2124
- text: z30.string(),
2125
- files: z30.array(z30.unknown()).optional(),
2126
- finishReason: z30.string().optional(),
2127
- reasoning: z30.string().optional(),
2128
- reasoningDetails: z30.unknown().optional(),
2129
- toolCalls: z30.array(z30.unknown()).optional(),
2130
- toolResults: z30.array(z30.unknown()).optional(),
2131
- warnings: z30.array(z30.unknown()).optional(),
2132
- sources: z30.array(z30.unknown()).optional(),
2133
- steps: z30.array(z30.unknown()),
2134
- generationTimeMs: z30.number(),
2135
- prompt: z30.string(),
2136
- systemPrompt: z30.string(),
2137
- usage: z30.object({
2138
- totalTokens: z30.number().optional(),
2139
- totalMicrocentsSpent: z30.number().optional()
2098
+ conversation: z29.array(ConversationMessageSchema).optional()
2099
+ });
2100
+ var PromptResultSchema = z29.object({
2101
+ text: z29.string(),
2102
+ files: z29.array(z29.unknown()).optional(),
2103
+ finishReason: z29.string().optional(),
2104
+ reasoning: z29.string().optional(),
2105
+ reasoningDetails: z29.unknown().optional(),
2106
+ toolCalls: z29.array(z29.unknown()).optional(),
2107
+ toolResults: z29.array(z29.unknown()).optional(),
2108
+ warnings: z29.array(z29.unknown()).optional(),
2109
+ sources: z29.array(z29.unknown()).optional(),
2110
+ steps: z29.array(z29.unknown()),
2111
+ generationTimeMs: z29.number(),
2112
+ prompt: z29.string(),
2113
+ systemPrompt: z29.string(),
2114
+ usage: z29.object({
2115
+ totalTokens: z29.number().optional(),
2116
+ totalMicrocentsSpent: z29.number().optional()
2140
2117
  })
2141
2118
  });
2142
- var EvaluationResultSchema = z30.object({
2143
- id: z30.string(),
2144
- runId: z30.string(),
2145
- timestamp: z30.number(),
2119
+ var EvaluationResultSchema = z29.object({
2120
+ id: z29.string(),
2121
+ runId: z29.string(),
2122
+ timestamp: z29.number(),
2146
2123
  promptResult: PromptResultSchema,
2147
- testResults: z30.array(z30.unknown()),
2148
- tags: z30.array(z30.string()).optional(),
2149
- feedback: z30.string().optional(),
2150
- score: z30.number(),
2151
- suiteId: z30.string().optional()
2152
- });
2153
- var LeanEvaluationResultSchema = z30.object({
2154
- id: z30.string(),
2155
- runId: z30.string(),
2156
- timestamp: z30.number(),
2157
- tags: z30.array(z30.string()).optional(),
2158
- scenarioId: z30.string(),
2159
- scenarioVersion: z30.number().optional(),
2160
- targetId: z30.string(),
2161
- targetVersion: z30.number().optional(),
2162
- suiteId: z30.string().optional(),
2163
- score: z30.number(),
2164
- time: z30.number().optional(),
2165
- microcentsSpent: z30.number().optional()
2124
+ testResults: z29.array(z29.unknown()),
2125
+ tags: z29.array(z29.string()).optional(),
2126
+ feedback: z29.string().optional(),
2127
+ score: z29.number(),
2128
+ suiteId: z29.string().optional()
2129
+ });
2130
+ var LeanEvaluationResultSchema = z29.object({
2131
+ id: z29.string(),
2132
+ runId: z29.string(),
2133
+ timestamp: z29.number(),
2134
+ tags: z29.array(z29.string()).optional(),
2135
+ scenarioId: z29.string(),
2136
+ scenarioVersion: z29.number().optional(),
2137
+ targetId: z29.string(),
2138
+ targetVersion: z29.number().optional(),
2139
+ suiteId: z29.string().optional(),
2140
+ score: z29.number(),
2141
+ time: z29.number().optional(),
2142
+ microcentsSpent: z29.number().optional()
2166
2143
  });
2167
2144
 
2168
2145
  // src/project/project.ts
2169
- import { z as z31 } from "zod";
2146
+ import { z as z30 } from "zod";
2170
2147
  var ProjectSchema = BaseEntitySchema.extend({
2171
- appId: z31.string().optional().describe("The ID of the app in Dev Center"),
2172
- appSecret: z31.string().optional().describe("The secret of the app in Dev Center"),
2173
- useWixAuth: z31.boolean().optional().describe("Enable Wix CLI/MCP auth for evaluations"),
2174
- useBase44Auth: z31.boolean().optional().describe("Enable Base44 auth for evaluations"),
2175
- scenarioTags: z31.array(z31.string()).optional().describe("Project-level tag vocabulary for scenarios")
2148
+ appId: z30.string().optional().describe("The ID of the app in Dev Center"),
2149
+ appSecret: z30.string().optional().describe("The secret of the app in Dev Center"),
2150
+ useWixAuth: z30.boolean().optional().describe("Enable Wix CLI/MCP auth for evaluations"),
2151
+ useBase44Auth: z30.boolean().optional().describe("Enable Base44 auth for evaluations"),
2152
+ scenarioTags: z30.array(z30.string()).optional().describe("Project-level tag vocabulary for scenarios")
2176
2153
  });
2177
2154
  var CreateProjectInputSchema = ProjectSchema.omit({
2178
2155
  id: true,
@@ -2225,7 +2202,7 @@ var SYSTEM_ASSERTIONS = {
2225
2202
  [SYSTEM_ASSERTION_IDS.TOOL_CALLED_WITH_PARAM]: {
2226
2203
  id: SYSTEM_ASSERTION_IDS.TOOL_CALLED_WITH_PARAM,
2227
2204
  name: "Tool Called With Param",
2228
- description: "Check that a tool was called with expected parameters",
2205
+ description: "Check that a tool was called with expected parameters (tool name is substring matched)",
2229
2206
  type: "tool_called_with_param",
2230
2207
  parameters: [
2231
2208
  {
@@ -2239,6 +2216,14 @@ var SYSTEM_ASSERTIONS = {
2239
2216
  label: "Expected Parameters (JSON, substring match)",
2240
2217
  type: "string",
2241
2218
  required: true
2219
+ },
2220
+ {
2221
+ name: "requireSuccess",
2222
+ label: "Require Successful Call",
2223
+ type: "boolean",
2224
+ required: false,
2225
+ defaultValue: false,
2226
+ advanced: true
2242
2227
  }
2243
2228
  ]
2244
2229
  },