@wix/evalforge-evaluator 0.96.0 → 0.97.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/build/index.js CHANGED
@@ -1138,6 +1138,20 @@ async function executeWithClaudeCode(skills, scenario, options) {
1138
1138
  permissionMode: "default",
1139
1139
  canUseTool
1140
1140
  };
1141
+ if (options.systemPrompt === null || options.systemPrompt === "") {
1142
+ } else if (options.systemPrompt != null) {
1143
+ queryOptions.systemPrompt = {
1144
+ type: "preset",
1145
+ preset: "claude_code",
1146
+ append: options.systemPrompt
1147
+ };
1148
+ } else {
1149
+ queryOptions.systemPrompt = {
1150
+ type: "preset",
1151
+ preset: "claude_code",
1152
+ append: import_evalforge_types3.DEFAULT_EVALUATOR_SYSTEM_PROMPT
1153
+ };
1154
+ }
1141
1155
  if (options.temperature !== void 0) {
1142
1156
  queryOptions.temperature = options.temperature;
1143
1157
  }
@@ -1158,6 +1172,7 @@ async function executeWithClaudeCode(skills, scenario, options) {
1158
1172
  "[SDK-DEBUG] canUseTool:",
1159
1173
  queryOptions.canUseTool ? "custom handler (auto-allow)" : "not set"
1160
1174
  );
1175
+ console.log("[SDK-DEBUG] systemPrompt:", queryOptions.systemPrompt);
1161
1176
  console.log("[SDK-DEBUG] settingSources:", queryOptions.settingSources);
1162
1177
  console.log("[SDK-DEBUG] allowedTools:", queryOptions.allowedTools);
1163
1178
  console.log("[SDK-DEBUG] Calling SDK query()...");
@@ -1255,20 +1270,8 @@ async function executeWithClaudeCode(skills, scenario, options) {
1255
1270
  }, HEARTBEAT_INTERVAL_MS);
1256
1271
  }
1257
1272
  const sdkPromise = (async () => {
1258
- const evaluatorPromptSuffix = `
1259
-
1260
- IMPORTANT: This is an automated evaluation run. Follow these guidelines:
1261
- 1. Execute the requested changes immediately without asking for confirmation.
1262
- 2. Do NOT ask "would you like me to proceed?" or similar questions.
1263
- 3. Do NOT use the Task tool to delegate simple operations - do them directly yourself.
1264
- 4. Keep your approach simple and direct - avoid excessive planning.
1265
- 5. Make targeted edits using Read and Edit tools rather than exploring the entire codebase.
1266
- 6. If you encounter an error, fix it directly rather than starting over.
1267
- 7. Your project root is the current working directory. Always create and modify source code files relative to the project root, NOT inside .claude/skills/ directories.
1268
- 8. Before finishing, run the project's package manager install command (e.g. \`npm install\`, \`yarn install\`, or \`pnpm install\` depending on the lockfile present) to ensure all dependencies are installed and the project is ready to build.`;
1269
- const fullPrompt = scenario.triggerPrompt + evaluatorPromptSuffix;
1270
1273
  for await (const message of query({
1271
- prompt: fullPrompt,
1274
+ prompt: scenario.triggerPrompt,
1272
1275
  options: queryOptions
1273
1276
  })) {
1274
1277
  messageCount++;
@@ -1779,7 +1782,8 @@ var ClaudeCodeAdapter = class {
1779
1782
  traceContext,
1780
1783
  mcps,
1781
1784
  subAgents,
1782
- rules
1785
+ rules,
1786
+ systemPrompt
1783
1787
  } = context;
1784
1788
  const modelForSdk = modelConfig?.model;
1785
1789
  const options = {
@@ -1792,7 +1796,8 @@ var ClaudeCodeAdapter = class {
1792
1796
  traceContext,
1793
1797
  mcps,
1794
1798
  subAgents,
1795
- rules
1799
+ rules,
1800
+ systemPrompt
1796
1801
  };
1797
1802
  const { result, llmTrace } = await executeWithClaudeCode(
1798
1803
  skills,
@@ -2575,7 +2580,8 @@ async function runAgentWithContext(config, evalRunId2, scenario, evalData, workD
2575
2580
  },
2576
2581
  mcps: evalData.mcps.length > 0 ? evalData.mcps : void 0,
2577
2582
  subAgents: evalData.subAgents.length > 0 ? evalData.subAgents : void 0,
2578
- rules: evalData.rules?.length > 0 ? evalData.rules : void 0
2583
+ rules: evalData.rules?.length > 0 ? evalData.rules : void 0,
2584
+ systemPrompt: agent?.systemPrompt
2579
2585
  };
2580
2586
  const { outputText, durationMs, llmTrace } = await adapter.execute(executionContext);
2581
2587
  const completedAt = (/* @__PURE__ */ new Date()).toISOString();