npm - @wix/evalforge-evaluator - Versions diffs - 0.90.0 → 0.92.0 - Mend

@wix/evalforge-evaluator 0.90.0 → 0.92.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

package/README.md +2 -1
package/build/index.js +81 -9
package/build/index.js.map +4 -4
package/build/index.mjs +80 -8
package/build/index.mjs.map +4 -4
package/build/types/api-client.d.ts +2 -1
package/build/types/fetch-evaluation-data.d.ts +2 -1
package/build/types/run-scenario/agents/claude-code/types.d.ts +3 -1
package/build/types/run-scenario/agents/claude-code/write-rules.d.ts +13 -0
package/package.json +5 -5

package/README.md CHANGED Viewed

@@ -9,12 +9,13 @@ evaluator <project-id> <eval-run-id>
 ```
 1. **Load configuration** from environment variables (server URL, AI Gateway credentials, etc.)
-2. **Fetch evaluation data** from the backend API — eval run, scenarios, agent config, skills, MCPs, sub-agents, and templates
+2. **Fetch evaluation data** from the backend API — eval run, scenarios, agent config, skills, MCPs, sub-agents, rules, and templates
 3. **For each scenario:**
    - Prepare a working directory (download and extract template)
    - Write skills to `.claude/skills/<name>/SKILL.md`
    - Write MCPs to `.mcp.json`
    - Write sub-agents to `.claude/agents/<name>.md`
+   - Write rules to `CLAUDE.md`, `AGENTS.md`, or `.cursor/rules/<name>.md` based on rule type
    - Launch the Claude Code agent with the scenario's trigger prompt via `@anthropic-ai/claude-agent-sdk`
    - Stream trace events back to the backend
    - Run assertions on the agent's output

package/build/index.js CHANGED Viewed

@@ -182,6 +182,9 @@ function createApiClient(serverUrl, options = "") {
     getSubAgent(projectId2, id) {
       return fetchJson(`/projects/${projectId2}/sub-agents/${id}`);
     },
+    getRule(projectId2, id) {
+      return fetchJson(`/projects/${projectId2}/rules/${id}`);
+    },
     getAssertion(projectId2, id) {
       return fetchJson(`/projects/${projectId2}/assertions/${id}`);
     },
@@ -286,6 +289,12 @@ function resolveSystemAssertion(assertionId, params) {
         maxDurationMs: params?.maxDurationMs ?? 3e5
       };
       break;
+    case "cost":
+      baseAssertion = {
+        type: "cost",
+        maxCostUsd: params?.maxCostUsd ?? 1
+      };
+      break;
     case "llm_judge":
       baseAssertion = {
         type: "llm_judge",
@@ -372,6 +381,12 @@ async function fetchEvaluationData(api, projectId2, evalRunId2) {
       evalRun.subAgentIds.map((id) => api.getSubAgent(projectId2, id))
     );
   }
+  let rules = [];
+  if (evalRun.ruleIds && evalRun.ruleIds.length > 0) {
+    rules = await Promise.all(
+      evalRun.ruleIds.map((id) => api.getRule(projectId2, id))
+    );
+  }
   const templateIds = [
     ...new Set(
       scenarios.map((s) => s.templateId).filter((id) => !!id)
@@ -423,6 +438,7 @@ async function fetchEvaluationData(api, projectId2, evalRunId2) {
     skillsGroupName,
     mcps,
     subAgents,
+    rules,
     scenarioItems
   };
 }
@@ -723,6 +739,56 @@ async function writeSubAgentsToFilesystem(cwd, subAgents) {
   console.log(`[SubAgents] Written to ${agentsDir}`);
 }
+// src/run-scenario/agents/claude-code/write-rules.ts
+var import_promises4 = require("fs/promises");
+var import_path5 = require("path");
+var CURSOR_RULES_DIR = ".cursor/rules";
+function toRuleFilename(name, index, nameCount) {
+  const base = (name || "").toLowerCase().replace(/\s+/g, "-").replace(/[^a-z0-9-]/g, "").replace(/^-+|-+$/g, "") || `rule-${index}`;
+  const count = nameCount.get(base) ?? 0;
+  nameCount.set(base, count + 1);
+  return count === 0 ? base : `${base}-${count + 1}`;
+}
+async function appendToFile(filePath, content) {
+  let existing = "";
+  try {
+    existing = await (0, import_promises4.readFile)(filePath, "utf8");
+  } catch {
+  }
+  const merged = existing ? `${existing.trimEnd()}
+${content}` : content;
+  await (0, import_promises4.writeFile)(filePath, merged, "utf8");
+}
+async function writeRulesToFilesystem(cwd, rules) {
+  if (rules.length === 0) return;
+  const nameCount = /* @__PURE__ */ new Map();
+  let hasCursorRules = false;
+  for (const [i, rule] of rules.entries()) {
+    switch (rule.ruleType) {
+      case "claude-md": {
+        await appendToFile((0, import_path5.join)(cwd, "CLAUDE.md"), rule.content);
+        break;
+      }
+      case "agents-md": {
+        await appendToFile((0, import_path5.join)(cwd, "AGENTS.md"), rule.content);
+        break;
+      }
+      case "cursor-rule": {
+        if (!hasCursorRules) {
+          await (0, import_promises4.mkdir)((0, import_path5.join)(cwd, CURSOR_RULES_DIR), { recursive: true });
+          hasCursorRules = true;
+        }
+        const filename = toRuleFilename(rule.name, i, nameCount);
+        const filePath = (0, import_path5.join)(cwd, CURSOR_RULES_DIR, `${filename}.md`);
+        await (0, import_promises4.writeFile)(filePath, rule.content, "utf8");
+        break;
+      }
+    }
+  }
+  console.log(`[Rules] Written ${rules.length} rule(s) to ${cwd}`);
+}
 // src/run-scenario/agents/claude-code/execute.ts
 var DEFAULT_MODEL = import_evalforge_types3.ClaudeModel.CLAUDE_4_5_SONNET_1_0;
 function emitTraceEvent(event, tracePushUrl, routeHeader, authToken) {
@@ -937,10 +1003,10 @@ async function executeWithClaudeCode(skills, scenario, options) {
   }
   const startTime = /* @__PURE__ */ new Date();
   const allMessages = [];
-  const { mkdir: mkdirAsync, writeFile: writeFile4 } = await import("fs/promises");
+  const { mkdir: mkdirAsync, writeFile: writeFile5 } = await import("fs/promises");
   const claudeDir = `${options.cwd}/.claude`;
   await mkdirAsync(claudeDir, { recursive: true });
-  await writeFile4(`${claudeDir}/settings.json`, "{}", {
+  await writeFile5(`${claudeDir}/settings.json`, "{}", {
     flag: "wx"
   }).catch(() => {
   });
@@ -950,6 +1016,9 @@ async function executeWithClaudeCode(skills, scenario, options) {
   if (options.subAgents && options.subAgents.length > 0) {
     await writeSubAgentsToFilesystem(options.cwd, options.subAgents);
   }
+  if (options.rules && options.rules.length > 0) {
+    await writeRulesToFilesystem(options.cwd, options.rules);
+  }
   console.error(
     "[DEBUG-H4] writeSkillsToFilesystem START",
     JSON.stringify({
@@ -1701,7 +1770,8 @@ var ClaudeCodeAdapter = class {
       aiGatewayHeaders,
       traceContext,
       mcps,
-      subAgents
+      subAgents,
+      rules
     } = context;
     const modelForSdk = modelConfig?.model;
     const options = {
@@ -1713,7 +1783,8 @@ var ClaudeCodeAdapter = class {
       aiGatewayHeaders,
       traceContext,
       mcps,
-      subAgents
+      subAgents,
+      rules
     };
     const { result, llmTrace } = await executeWithClaudeCode(
       skills,
@@ -1740,7 +1811,7 @@ defaultRegistry.register(claudeCodeAdapter);
 // src/run-scenario/file-diff.ts
 var import_fs2 = require("fs");
-var import_path5 = require("path");
+var import_path6 = require("path");
 // ../../node_modules/diff/lib/index.mjs
 function Diff() {
@@ -1916,7 +1987,7 @@ Diff.prototype = {
   tokenize: function tokenize(value) {
     return Array.from(value);
   },
-  join: function join4(chars) {
+  join: function join5(chars) {
     return chars.join("");
   },
   postProcess: function postProcess(changeObjects) {
@@ -2356,8 +2427,8 @@ function snapshotDirectory(dir, baseDir) {
   }
   const entries = (0, import_fs2.readdirSync)(dir, { withFileTypes: true });
   for (const entry of entries) {
-    const fullPath = (0, import_path5.join)(dir, entry.name);
-    const relativePath = (0, import_path5.relative)(base, fullPath);
+    const fullPath = (0, import_path6.join)(dir, entry.name);
+    const relativePath = (0, import_path6.relative)(base, fullPath);
     if (shouldIgnore(entry.name)) {
       continue;
     }
@@ -2495,7 +2566,8 @@ async function runAgentWithContext(config, evalRunId2, scenario, evalData, workD
       authToken: config.authToken
     },
     mcps: evalData.mcps.length > 0 ? evalData.mcps : void 0,
-    subAgents: evalData.subAgents.length > 0 ? evalData.subAgents : void 0
+    subAgents: evalData.subAgents.length > 0 ? evalData.subAgents : void 0,
+    rules: evalData.rules?.length > 0 ? evalData.rules : void 0
   };
   const { outputText, durationMs, llmTrace } = await adapter.execute(executionContext);
   const completedAt = (/* @__PURE__ */ new Date()).toISOString();