@wix/evalforge-evaluator 0.96.0 → 0.97.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/build/index.js +22 -16
- package/build/index.js.map +2 -2
- package/build/index.mjs +23 -16
- package/build/index.mjs.map +2 -2
- package/build/types/run-scenario/agents/claude-code/types.d.ts +7 -0
- package/package.json +4 -4
package/build/index.js
CHANGED
|
@@ -1138,6 +1138,20 @@ async function executeWithClaudeCode(skills, scenario, options) {
|
|
|
1138
1138
|
permissionMode: "default",
|
|
1139
1139
|
canUseTool
|
|
1140
1140
|
};
|
|
1141
|
+
if (options.systemPrompt === null || options.systemPrompt === "") {
|
|
1142
|
+
} else if (options.systemPrompt != null) {
|
|
1143
|
+
queryOptions.systemPrompt = {
|
|
1144
|
+
type: "preset",
|
|
1145
|
+
preset: "claude_code",
|
|
1146
|
+
append: options.systemPrompt
|
|
1147
|
+
};
|
|
1148
|
+
} else {
|
|
1149
|
+
queryOptions.systemPrompt = {
|
|
1150
|
+
type: "preset",
|
|
1151
|
+
preset: "claude_code",
|
|
1152
|
+
append: import_evalforge_types3.DEFAULT_EVALUATOR_SYSTEM_PROMPT
|
|
1153
|
+
};
|
|
1154
|
+
}
|
|
1141
1155
|
if (options.temperature !== void 0) {
|
|
1142
1156
|
queryOptions.temperature = options.temperature;
|
|
1143
1157
|
}
|
|
@@ -1158,6 +1172,7 @@ async function executeWithClaudeCode(skills, scenario, options) {
|
|
|
1158
1172
|
"[SDK-DEBUG] canUseTool:",
|
|
1159
1173
|
queryOptions.canUseTool ? "custom handler (auto-allow)" : "not set"
|
|
1160
1174
|
);
|
|
1175
|
+
console.log("[SDK-DEBUG] systemPrompt:", queryOptions.systemPrompt);
|
|
1161
1176
|
console.log("[SDK-DEBUG] settingSources:", queryOptions.settingSources);
|
|
1162
1177
|
console.log("[SDK-DEBUG] allowedTools:", queryOptions.allowedTools);
|
|
1163
1178
|
console.log("[SDK-DEBUG] Calling SDK query()...");
|
|
@@ -1255,20 +1270,8 @@ async function executeWithClaudeCode(skills, scenario, options) {
|
|
|
1255
1270
|
}, HEARTBEAT_INTERVAL_MS);
|
|
1256
1271
|
}
|
|
1257
1272
|
const sdkPromise = (async () => {
|
|
1258
|
-
const evaluatorPromptSuffix = `
|
|
1259
|
-
|
|
1260
|
-
IMPORTANT: This is an automated evaluation run. Follow these guidelines:
|
|
1261
|
-
1. Execute the requested changes immediately without asking for confirmation.
|
|
1262
|
-
2. Do NOT ask "would you like me to proceed?" or similar questions.
|
|
1263
|
-
3. Do NOT use the Task tool to delegate simple operations - do them directly yourself.
|
|
1264
|
-
4. Keep your approach simple and direct - avoid excessive planning.
|
|
1265
|
-
5. Make targeted edits using Read and Edit tools rather than exploring the entire codebase.
|
|
1266
|
-
6. If you encounter an error, fix it directly rather than starting over.
|
|
1267
|
-
7. Your project root is the current working directory. Always create and modify source code files relative to the project root, NOT inside .claude/skills/ directories.
|
|
1268
|
-
8. Before finishing, run the project's package manager install command (e.g. \`npm install\`, \`yarn install\`, or \`pnpm install\` depending on the lockfile present) to ensure all dependencies are installed and the project is ready to build.`;
|
|
1269
|
-
const fullPrompt = scenario.triggerPrompt + evaluatorPromptSuffix;
|
|
1270
1273
|
for await (const message of query({
|
|
1271
|
-
prompt:
|
|
1274
|
+
prompt: scenario.triggerPrompt,
|
|
1272
1275
|
options: queryOptions
|
|
1273
1276
|
})) {
|
|
1274
1277
|
messageCount++;
|
|
@@ -1779,7 +1782,8 @@ var ClaudeCodeAdapter = class {
|
|
|
1779
1782
|
traceContext,
|
|
1780
1783
|
mcps,
|
|
1781
1784
|
subAgents,
|
|
1782
|
-
rules
|
|
1785
|
+
rules,
|
|
1786
|
+
systemPrompt
|
|
1783
1787
|
} = context;
|
|
1784
1788
|
const modelForSdk = modelConfig?.model;
|
|
1785
1789
|
const options = {
|
|
@@ -1792,7 +1796,8 @@ var ClaudeCodeAdapter = class {
|
|
|
1792
1796
|
traceContext,
|
|
1793
1797
|
mcps,
|
|
1794
1798
|
subAgents,
|
|
1795
|
-
rules
|
|
1799
|
+
rules,
|
|
1800
|
+
systemPrompt
|
|
1796
1801
|
};
|
|
1797
1802
|
const { result, llmTrace } = await executeWithClaudeCode(
|
|
1798
1803
|
skills,
|
|
@@ -2575,7 +2580,8 @@ async function runAgentWithContext(config, evalRunId2, scenario, evalData, workD
|
|
|
2575
2580
|
},
|
|
2576
2581
|
mcps: evalData.mcps.length > 0 ? evalData.mcps : void 0,
|
|
2577
2582
|
subAgents: evalData.subAgents.length > 0 ? evalData.subAgents : void 0,
|
|
2578
|
-
rules: evalData.rules?.length > 0 ? evalData.rules : void 0
|
|
2583
|
+
rules: evalData.rules?.length > 0 ? evalData.rules : void 0,
|
|
2584
|
+
systemPrompt: agent?.systemPrompt
|
|
2579
2585
|
};
|
|
2580
2586
|
const { outputText, durationMs, llmTrace } = await adapter.execute(executionContext);
|
|
2581
2587
|
const completedAt = (/* @__PURE__ */ new Date()).toISOString();
|