@wix/evalforge-evaluator 0.91.0 → 0.92.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +2 -1
- package/build/index.js +75 -9
- package/build/index.js.map +4 -4
- package/build/index.mjs +74 -8
- package/build/index.mjs.map +4 -4
- package/build/types/api-client.d.ts +2 -1
- package/build/types/fetch-evaluation-data.d.ts +2 -1
- package/build/types/run-scenario/agents/claude-code/types.d.ts +3 -1
- package/build/types/run-scenario/agents/claude-code/write-rules.d.ts +13 -0
- package/package.json +4 -4
package/build/index.mjs
CHANGED
|
@@ -159,6 +159,9 @@ function createApiClient(serverUrl, options = "") {
|
|
|
159
159
|
getSubAgent(projectId2, id) {
|
|
160
160
|
return fetchJson(`/projects/${projectId2}/sub-agents/${id}`);
|
|
161
161
|
},
|
|
162
|
+
getRule(projectId2, id) {
|
|
163
|
+
return fetchJson(`/projects/${projectId2}/rules/${id}`);
|
|
164
|
+
},
|
|
162
165
|
getAssertion(projectId2, id) {
|
|
163
166
|
return fetchJson(`/projects/${projectId2}/assertions/${id}`);
|
|
164
167
|
},
|
|
@@ -358,6 +361,12 @@ async function fetchEvaluationData(api, projectId2, evalRunId2) {
|
|
|
358
361
|
evalRun.subAgentIds.map((id) => api.getSubAgent(projectId2, id))
|
|
359
362
|
);
|
|
360
363
|
}
|
|
364
|
+
let rules = [];
|
|
365
|
+
if (evalRun.ruleIds && evalRun.ruleIds.length > 0) {
|
|
366
|
+
rules = await Promise.all(
|
|
367
|
+
evalRun.ruleIds.map((id) => api.getRule(projectId2, id))
|
|
368
|
+
);
|
|
369
|
+
}
|
|
361
370
|
const templateIds = [
|
|
362
371
|
...new Set(
|
|
363
372
|
scenarios.map((s) => s.templateId).filter((id) => !!id)
|
|
@@ -409,6 +418,7 @@ async function fetchEvaluationData(api, projectId2, evalRunId2) {
|
|
|
409
418
|
skillsGroupName,
|
|
410
419
|
mcps,
|
|
411
420
|
subAgents,
|
|
421
|
+
rules,
|
|
412
422
|
scenarioItems
|
|
413
423
|
};
|
|
414
424
|
}
|
|
@@ -719,6 +729,56 @@ async function writeSubAgentsToFilesystem(cwd, subAgents) {
|
|
|
719
729
|
console.log(`[SubAgents] Written to ${agentsDir}`);
|
|
720
730
|
}
|
|
721
731
|
|
|
732
|
+
// src/run-scenario/agents/claude-code/write-rules.ts
|
|
733
|
+
import { mkdir as mkdir3, writeFile as writeFile4, readFile } from "fs/promises";
|
|
734
|
+
import { join as join4 } from "path";
|
|
735
|
+
var CURSOR_RULES_DIR = ".cursor/rules";
|
|
736
|
+
function toRuleFilename(name, index, nameCount) {
|
|
737
|
+
const base = (name || "").toLowerCase().replace(/\s+/g, "-").replace(/[^a-z0-9-]/g, "").replace(/^-+|-+$/g, "") || `rule-${index}`;
|
|
738
|
+
const count = nameCount.get(base) ?? 0;
|
|
739
|
+
nameCount.set(base, count + 1);
|
|
740
|
+
return count === 0 ? base : `${base}-${count + 1}`;
|
|
741
|
+
}
|
|
742
|
+
async function appendToFile(filePath, content) {
|
|
743
|
+
let existing = "";
|
|
744
|
+
try {
|
|
745
|
+
existing = await readFile(filePath, "utf8");
|
|
746
|
+
} catch {
|
|
747
|
+
}
|
|
748
|
+
const merged = existing ? `${existing.trimEnd()}
|
|
749
|
+
|
|
750
|
+
${content}` : content;
|
|
751
|
+
await writeFile4(filePath, merged, "utf8");
|
|
752
|
+
}
|
|
753
|
+
async function writeRulesToFilesystem(cwd, rules) {
|
|
754
|
+
if (rules.length === 0) return;
|
|
755
|
+
const nameCount = /* @__PURE__ */ new Map();
|
|
756
|
+
let hasCursorRules = false;
|
|
757
|
+
for (const [i, rule] of rules.entries()) {
|
|
758
|
+
switch (rule.ruleType) {
|
|
759
|
+
case "claude-md": {
|
|
760
|
+
await appendToFile(join4(cwd, "CLAUDE.md"), rule.content);
|
|
761
|
+
break;
|
|
762
|
+
}
|
|
763
|
+
case "agents-md": {
|
|
764
|
+
await appendToFile(join4(cwd, "AGENTS.md"), rule.content);
|
|
765
|
+
break;
|
|
766
|
+
}
|
|
767
|
+
case "cursor-rule": {
|
|
768
|
+
if (!hasCursorRules) {
|
|
769
|
+
await mkdir3(join4(cwd, CURSOR_RULES_DIR), { recursive: true });
|
|
770
|
+
hasCursorRules = true;
|
|
771
|
+
}
|
|
772
|
+
const filename = toRuleFilename(rule.name, i, nameCount);
|
|
773
|
+
const filePath = join4(cwd, CURSOR_RULES_DIR, `${filename}.md`);
|
|
774
|
+
await writeFile4(filePath, rule.content, "utf8");
|
|
775
|
+
break;
|
|
776
|
+
}
|
|
777
|
+
}
|
|
778
|
+
}
|
|
779
|
+
console.log(`[Rules] Written ${rules.length} rule(s) to ${cwd}`);
|
|
780
|
+
}
|
|
781
|
+
|
|
722
782
|
// src/run-scenario/agents/claude-code/execute.ts
|
|
723
783
|
var DEFAULT_MODEL = ClaudeModel.CLAUDE_4_5_SONNET_1_0;
|
|
724
784
|
function emitTraceEvent(event, tracePushUrl, routeHeader, authToken) {
|
|
@@ -933,10 +993,10 @@ async function executeWithClaudeCode(skills, scenario, options) {
|
|
|
933
993
|
}
|
|
934
994
|
const startTime = /* @__PURE__ */ new Date();
|
|
935
995
|
const allMessages = [];
|
|
936
|
-
const { mkdir: mkdirAsync, writeFile:
|
|
996
|
+
const { mkdir: mkdirAsync, writeFile: writeFile5 } = await import("fs/promises");
|
|
937
997
|
const claudeDir = `${options.cwd}/.claude`;
|
|
938
998
|
await mkdirAsync(claudeDir, { recursive: true });
|
|
939
|
-
await
|
|
999
|
+
await writeFile5(`${claudeDir}/settings.json`, "{}", {
|
|
940
1000
|
flag: "wx"
|
|
941
1001
|
}).catch(() => {
|
|
942
1002
|
});
|
|
@@ -946,6 +1006,9 @@ async function executeWithClaudeCode(skills, scenario, options) {
|
|
|
946
1006
|
if (options.subAgents && options.subAgents.length > 0) {
|
|
947
1007
|
await writeSubAgentsToFilesystem(options.cwd, options.subAgents);
|
|
948
1008
|
}
|
|
1009
|
+
if (options.rules && options.rules.length > 0) {
|
|
1010
|
+
await writeRulesToFilesystem(options.cwd, options.rules);
|
|
1011
|
+
}
|
|
949
1012
|
console.error(
|
|
950
1013
|
"[DEBUG-H4] writeSkillsToFilesystem START",
|
|
951
1014
|
JSON.stringify({
|
|
@@ -1697,7 +1760,8 @@ var ClaudeCodeAdapter = class {
|
|
|
1697
1760
|
aiGatewayHeaders,
|
|
1698
1761
|
traceContext,
|
|
1699
1762
|
mcps,
|
|
1700
|
-
subAgents
|
|
1763
|
+
subAgents,
|
|
1764
|
+
rules
|
|
1701
1765
|
} = context;
|
|
1702
1766
|
const modelForSdk = modelConfig?.model;
|
|
1703
1767
|
const options = {
|
|
@@ -1709,7 +1773,8 @@ var ClaudeCodeAdapter = class {
|
|
|
1709
1773
|
aiGatewayHeaders,
|
|
1710
1774
|
traceContext,
|
|
1711
1775
|
mcps,
|
|
1712
|
-
subAgents
|
|
1776
|
+
subAgents,
|
|
1777
|
+
rules
|
|
1713
1778
|
};
|
|
1714
1779
|
const { result, llmTrace } = await executeWithClaudeCode(
|
|
1715
1780
|
skills,
|
|
@@ -1736,7 +1801,7 @@ defaultRegistry.register(claudeCodeAdapter);
|
|
|
1736
1801
|
|
|
1737
1802
|
// src/run-scenario/file-diff.ts
|
|
1738
1803
|
import { readdirSync, readFileSync as readFileSync2, statSync, existsSync as existsSync2 } from "fs";
|
|
1739
|
-
import { join as
|
|
1804
|
+
import { join as join6, relative } from "path";
|
|
1740
1805
|
|
|
1741
1806
|
// ../../node_modules/diff/lib/index.mjs
|
|
1742
1807
|
function Diff() {
|
|
@@ -1912,7 +1977,7 @@ Diff.prototype = {
|
|
|
1912
1977
|
tokenize: function tokenize(value) {
|
|
1913
1978
|
return Array.from(value);
|
|
1914
1979
|
},
|
|
1915
|
-
join: function
|
|
1980
|
+
join: function join5(chars) {
|
|
1916
1981
|
return chars.join("");
|
|
1917
1982
|
},
|
|
1918
1983
|
postProcess: function postProcess(changeObjects) {
|
|
@@ -2352,7 +2417,7 @@ function snapshotDirectory(dir, baseDir) {
|
|
|
2352
2417
|
}
|
|
2353
2418
|
const entries = readdirSync(dir, { withFileTypes: true });
|
|
2354
2419
|
for (const entry of entries) {
|
|
2355
|
-
const fullPath =
|
|
2420
|
+
const fullPath = join6(dir, entry.name);
|
|
2356
2421
|
const relativePath = relative(base, fullPath);
|
|
2357
2422
|
if (shouldIgnore(entry.name)) {
|
|
2358
2423
|
continue;
|
|
@@ -2491,7 +2556,8 @@ async function runAgentWithContext(config, evalRunId2, scenario, evalData, workD
|
|
|
2491
2556
|
authToken: config.authToken
|
|
2492
2557
|
},
|
|
2493
2558
|
mcps: evalData.mcps.length > 0 ? evalData.mcps : void 0,
|
|
2494
|
-
subAgents: evalData.subAgents.length > 0 ? evalData.subAgents : void 0
|
|
2559
|
+
subAgents: evalData.subAgents.length > 0 ? evalData.subAgents : void 0,
|
|
2560
|
+
rules: evalData.rules?.length > 0 ? evalData.rules : void 0
|
|
2495
2561
|
};
|
|
2496
2562
|
const { outputText, durationMs, llmTrace } = await adapter.execute(executionContext);
|
|
2497
2563
|
const completedAt = (/* @__PURE__ */ new Date()).toISOString();
|