@wix/evalforge-evaluator 0.91.0 → 0.93.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -9,12 +9,13 @@ evaluator <project-id> <eval-run-id>
9
9
  ```
10
10
 
11
11
  1. **Load configuration** from environment variables (server URL, AI Gateway credentials, etc.)
12
- 2. **Fetch evaluation data** from the backend API — eval run, scenarios, agent config, skills, MCPs, sub-agents, and templates
12
+ 2. **Fetch evaluation data** from the backend API — eval run, scenarios, agent config, skills, MCPs, sub-agents, rules, and templates
13
13
  3. **For each scenario:**
14
14
  - Prepare a working directory (download and extract template)
15
15
  - Write skills to `.claude/skills/<name>/SKILL.md`
16
16
  - Write MCPs to `.mcp.json`
17
17
  - Write sub-agents to `.claude/agents/<name>.md`
18
+ - Write rules to `CLAUDE.md`, `AGENTS.md`, or `.cursor/rules/<name>.md` based on rule type
18
19
  - Launch the Claude Code agent with the scenario's trigger prompt via `@anthropic-ai/claude-agent-sdk`
19
20
  - Stream trace events back to the backend
20
21
  - Run assertions on the agent's output
package/build/index.js CHANGED
@@ -182,6 +182,9 @@ function createApiClient(serverUrl, options = "") {
182
182
  getSubAgent(projectId2, id) {
183
183
  return fetchJson(`/projects/${projectId2}/sub-agents/${id}`);
184
184
  },
185
+ getRule(projectId2, id) {
186
+ return fetchJson(`/projects/${projectId2}/rules/${id}`);
187
+ },
185
188
  getAssertion(projectId2, id) {
186
189
  return fetchJson(`/projects/${projectId2}/assertions/${id}`);
187
190
  },
@@ -378,6 +381,12 @@ async function fetchEvaluationData(api, projectId2, evalRunId2) {
378
381
  evalRun.subAgentIds.map((id) => api.getSubAgent(projectId2, id))
379
382
  );
380
383
  }
384
+ let rules = [];
385
+ if (evalRun.ruleIds && evalRun.ruleIds.length > 0) {
386
+ rules = await Promise.all(
387
+ evalRun.ruleIds.map((id) => api.getRule(projectId2, id))
388
+ );
389
+ }
381
390
  const templateIds = [
382
391
  ...new Set(
383
392
  scenarios.map((s) => s.templateId).filter((id) => !!id)
@@ -429,6 +438,7 @@ async function fetchEvaluationData(api, projectId2, evalRunId2) {
429
438
  skillsGroupName,
430
439
  mcps,
431
440
  subAgents,
441
+ rules,
432
442
  scenarioItems
433
443
  };
434
444
  }
@@ -729,6 +739,56 @@ async function writeSubAgentsToFilesystem(cwd, subAgents) {
729
739
  console.log(`[SubAgents] Written to ${agentsDir}`);
730
740
  }
731
741
 
742
+ // src/run-scenario/agents/claude-code/write-rules.ts
743
+ var import_promises4 = require("fs/promises");
744
+ var import_path5 = require("path");
745
+ var CURSOR_RULES_DIR = ".cursor/rules";
746
+ function toRuleFilename(name, index, nameCount) {
747
+ const base = (name || "").toLowerCase().replace(/\s+/g, "-").replace(/[^a-z0-9-]/g, "").replace(/^-+|-+$/g, "") || `rule-${index}`;
748
+ const count = nameCount.get(base) ?? 0;
749
+ nameCount.set(base, count + 1);
750
+ return count === 0 ? base : `${base}-${count + 1}`;
751
+ }
752
+ async function appendToFile(filePath, content) {
753
+ let existing = "";
754
+ try {
755
+ existing = await (0, import_promises4.readFile)(filePath, "utf8");
756
+ } catch {
757
+ }
758
+ const merged = existing ? `${existing.trimEnd()}
759
+
760
+ ${content}` : content;
761
+ await (0, import_promises4.writeFile)(filePath, merged, "utf8");
762
+ }
763
+ async function writeRulesToFilesystem(cwd, rules) {
764
+ if (rules.length === 0) return;
765
+ const nameCount = /* @__PURE__ */ new Map();
766
+ let hasCursorRules = false;
767
+ for (const [i, rule] of rules.entries()) {
768
+ switch (rule.ruleType) {
769
+ case "claude-md": {
770
+ await appendToFile((0, import_path5.join)(cwd, "CLAUDE.md"), rule.content);
771
+ break;
772
+ }
773
+ case "agents-md": {
774
+ await appendToFile((0, import_path5.join)(cwd, "AGENTS.md"), rule.content);
775
+ break;
776
+ }
777
+ case "cursor-rule": {
778
+ if (!hasCursorRules) {
779
+ await (0, import_promises4.mkdir)((0, import_path5.join)(cwd, CURSOR_RULES_DIR), { recursive: true });
780
+ hasCursorRules = true;
781
+ }
782
+ const filename = toRuleFilename(rule.name, i, nameCount);
783
+ const filePath = (0, import_path5.join)(cwd, CURSOR_RULES_DIR, `${filename}.md`);
784
+ await (0, import_promises4.writeFile)(filePath, rule.content, "utf8");
785
+ break;
786
+ }
787
+ }
788
+ }
789
+ console.log(`[Rules] Written ${rules.length} rule(s) to ${cwd}`);
790
+ }
791
+
732
792
  // src/run-scenario/agents/claude-code/execute.ts
733
793
  var DEFAULT_MODEL = import_evalforge_types3.ClaudeModel.CLAUDE_4_5_SONNET_1_0;
734
794
  function emitTraceEvent(event, tracePushUrl, routeHeader, authToken) {
@@ -943,10 +1003,10 @@ async function executeWithClaudeCode(skills, scenario, options) {
943
1003
  }
944
1004
  const startTime = /* @__PURE__ */ new Date();
945
1005
  const allMessages = [];
946
- const { mkdir: mkdirAsync, writeFile: writeFile4 } = await import("fs/promises");
1006
+ const { mkdir: mkdirAsync, writeFile: writeFile5 } = await import("fs/promises");
947
1007
  const claudeDir = `${options.cwd}/.claude`;
948
1008
  await mkdirAsync(claudeDir, { recursive: true });
949
- await writeFile4(`${claudeDir}/settings.json`, "{}", {
1009
+ await writeFile5(`${claudeDir}/settings.json`, "{}", {
950
1010
  flag: "wx"
951
1011
  }).catch(() => {
952
1012
  });
@@ -956,6 +1016,9 @@ async function executeWithClaudeCode(skills, scenario, options) {
956
1016
  if (options.subAgents && options.subAgents.length > 0) {
957
1017
  await writeSubAgentsToFilesystem(options.cwd, options.subAgents);
958
1018
  }
1019
+ if (options.rules && options.rules.length > 0) {
1020
+ await writeRulesToFilesystem(options.cwd, options.rules);
1021
+ }
959
1022
  console.error(
960
1023
  "[DEBUG-H4] writeSkillsToFilesystem START",
961
1024
  JSON.stringify({
@@ -1707,7 +1770,8 @@ var ClaudeCodeAdapter = class {
1707
1770
  aiGatewayHeaders,
1708
1771
  traceContext,
1709
1772
  mcps,
1710
- subAgents
1773
+ subAgents,
1774
+ rules
1711
1775
  } = context;
1712
1776
  const modelForSdk = modelConfig?.model;
1713
1777
  const options = {
@@ -1719,7 +1783,8 @@ var ClaudeCodeAdapter = class {
1719
1783
  aiGatewayHeaders,
1720
1784
  traceContext,
1721
1785
  mcps,
1722
- subAgents
1786
+ subAgents,
1787
+ rules
1723
1788
  };
1724
1789
  const { result, llmTrace } = await executeWithClaudeCode(
1725
1790
  skills,
@@ -1746,7 +1811,7 @@ defaultRegistry.register(claudeCodeAdapter);
1746
1811
 
1747
1812
  // src/run-scenario/file-diff.ts
1748
1813
  var import_fs2 = require("fs");
1749
- var import_path5 = require("path");
1814
+ var import_path6 = require("path");
1750
1815
 
1751
1816
  // ../../node_modules/diff/lib/index.mjs
1752
1817
  function Diff() {
@@ -1922,7 +1987,7 @@ Diff.prototype = {
1922
1987
  tokenize: function tokenize(value) {
1923
1988
  return Array.from(value);
1924
1989
  },
1925
- join: function join4(chars) {
1990
+ join: function join5(chars) {
1926
1991
  return chars.join("");
1927
1992
  },
1928
1993
  postProcess: function postProcess(changeObjects) {
@@ -2362,8 +2427,8 @@ function snapshotDirectory(dir, baseDir) {
2362
2427
  }
2363
2428
  const entries = (0, import_fs2.readdirSync)(dir, { withFileTypes: true });
2364
2429
  for (const entry of entries) {
2365
- const fullPath = (0, import_path5.join)(dir, entry.name);
2366
- const relativePath = (0, import_path5.relative)(base, fullPath);
2430
+ const fullPath = (0, import_path6.join)(dir, entry.name);
2431
+ const relativePath = (0, import_path6.relative)(base, fullPath);
2367
2432
  if (shouldIgnore(entry.name)) {
2368
2433
  continue;
2369
2434
  }
@@ -2501,7 +2566,8 @@ async function runAgentWithContext(config, evalRunId2, scenario, evalData, workD
2501
2566
  authToken: config.authToken
2502
2567
  },
2503
2568
  mcps: evalData.mcps.length > 0 ? evalData.mcps : void 0,
2504
- subAgents: evalData.subAgents.length > 0 ? evalData.subAgents : void 0
2569
+ subAgents: evalData.subAgents.length > 0 ? evalData.subAgents : void 0,
2570
+ rules: evalData.rules?.length > 0 ? evalData.rules : void 0
2505
2571
  };
2506
2572
  const { outputText, durationMs, llmTrace } = await adapter.execute(executionContext);
2507
2573
  const completedAt = (/* @__PURE__ */ new Date()).toISOString();