@wix/evalforge-evaluator 0.96.0 → 0.97.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/build/index.mjs CHANGED
@@ -680,6 +680,7 @@ import { AgentRunCommand } from "@wix/evalforge-types";
680
680
  // src/run-scenario/agents/claude-code/execute.ts
681
681
  import {
682
682
  ClaudeModel,
683
+ DEFAULT_EVALUATOR_SYSTEM_PROMPT,
683
684
  LLMStepType,
684
685
  LiveTraceEventType,
685
686
  TRACE_EVENT_PREFIX
@@ -1128,6 +1129,20 @@ async function executeWithClaudeCode(skills, scenario, options) {
1128
1129
  permissionMode: "default",
1129
1130
  canUseTool
1130
1131
  };
1132
+ if (options.systemPrompt === null || options.systemPrompt === "") {
1133
+ } else if (options.systemPrompt != null) {
1134
+ queryOptions.systemPrompt = {
1135
+ type: "preset",
1136
+ preset: "claude_code",
1137
+ append: options.systemPrompt
1138
+ };
1139
+ } else {
1140
+ queryOptions.systemPrompt = {
1141
+ type: "preset",
1142
+ preset: "claude_code",
1143
+ append: DEFAULT_EVALUATOR_SYSTEM_PROMPT
1144
+ };
1145
+ }
1131
1146
  if (options.temperature !== void 0) {
1132
1147
  queryOptions.temperature = options.temperature;
1133
1148
  }
@@ -1148,6 +1163,7 @@ async function executeWithClaudeCode(skills, scenario, options) {
1148
1163
  "[SDK-DEBUG] canUseTool:",
1149
1164
  queryOptions.canUseTool ? "custom handler (auto-allow)" : "not set"
1150
1165
  );
1166
+ console.log("[SDK-DEBUG] systemPrompt:", queryOptions.systemPrompt);
1151
1167
  console.log("[SDK-DEBUG] settingSources:", queryOptions.settingSources);
1152
1168
  console.log("[SDK-DEBUG] allowedTools:", queryOptions.allowedTools);
1153
1169
  console.log("[SDK-DEBUG] Calling SDK query()...");
@@ -1245,20 +1261,8 @@ async function executeWithClaudeCode(skills, scenario, options) {
1245
1261
  }, HEARTBEAT_INTERVAL_MS);
1246
1262
  }
1247
1263
  const sdkPromise = (async () => {
1248
- const evaluatorPromptSuffix = `
1249
-
1250
- IMPORTANT: This is an automated evaluation run. Follow these guidelines:
1251
- 1. Execute the requested changes immediately without asking for confirmation.
1252
- 2. Do NOT ask "would you like me to proceed?" or similar questions.
1253
- 3. Do NOT use the Task tool to delegate simple operations - do them directly yourself.
1254
- 4. Keep your approach simple and direct - avoid excessive planning.
1255
- 5. Make targeted edits using Read and Edit tools rather than exploring the entire codebase.
1256
- 6. If you encounter an error, fix it directly rather than starting over.
1257
- 7. Your project root is the current working directory. Always create and modify source code files relative to the project root, NOT inside .claude/skills/ directories.
1258
- 8. Before finishing, run the project's package manager install command (e.g. \`npm install\`, \`yarn install\`, or \`pnpm install\` depending on the lockfile present) to ensure all dependencies are installed and the project is ready to build.`;
1259
- const fullPrompt = scenario.triggerPrompt + evaluatorPromptSuffix;
1260
1264
  for await (const message of query({
1261
- prompt: fullPrompt,
1265
+ prompt: scenario.triggerPrompt,
1262
1266
  options: queryOptions
1263
1267
  })) {
1264
1268
  messageCount++;
@@ -1769,7 +1773,8 @@ var ClaudeCodeAdapter = class {
1769
1773
  traceContext,
1770
1774
  mcps,
1771
1775
  subAgents,
1772
- rules
1776
+ rules,
1777
+ systemPrompt
1773
1778
  } = context;
1774
1779
  const modelForSdk = modelConfig?.model;
1775
1780
  const options = {
@@ -1782,7 +1787,8 @@ var ClaudeCodeAdapter = class {
1782
1787
  traceContext,
1783
1788
  mcps,
1784
1789
  subAgents,
1785
- rules
1790
+ rules,
1791
+ systemPrompt
1786
1792
  };
1787
1793
  const { result, llmTrace } = await executeWithClaudeCode(
1788
1794
  skills,
@@ -2565,7 +2571,8 @@ async function runAgentWithContext(config, evalRunId2, scenario, evalData, workD
2565
2571
  },
2566
2572
  mcps: evalData.mcps.length > 0 ? evalData.mcps : void 0,
2567
2573
  subAgents: evalData.subAgents.length > 0 ? evalData.subAgents : void 0,
2568
- rules: evalData.rules?.length > 0 ? evalData.rules : void 0
2574
+ rules: evalData.rules?.length > 0 ? evalData.rules : void 0,
2575
+ systemPrompt: agent?.systemPrompt
2569
2576
  };
2570
2577
  const { outputText, durationMs, llmTrace } = await adapter.execute(executionContext);
2571
2578
  const completedAt = (/* @__PURE__ */ new Date()).toISOString();