npm - @wix/evalforge-evaluator - Versions diffs - 0.114.0 → 0.116.0 - Mend

@wix/evalforge-evaluator 0.114.0 → 0.116.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

package/build/index.js +98 -46
package/build/index.js.map +3 -3
package/build/index.mjs +98 -46
package/build/index.mjs.map +3 -3
package/build/types/run-scenario/agents/claude-code/claude-code-adapter.d.ts +5 -0
package/build/types/run-scenario/agents/claude-code/execute.d.ts +6 -0
package/build/types/run-scenario/agents/opencode/execute.d.ts +8 -0
package/build/types/run-scenario/agents/opencode/opencode-adapter.d.ts +1 -0
package/build/types/run-scenario/file-diff.d.ts +10 -2
package/package.json +5 -5

package/build/types/run-scenario/agents/claude-code/claude-code-adapter.d.ts CHANGED Viewed

@@ -13,6 +13,11 @@ export declare class ClaudeCodeAdapter implements AgentAdapter {
     readonly id = "claude-code";
     readonly name = "Claude Code";
     readonly supportedCommands: readonly [AgentRunCommand.CLAUDE];
+    /**
+     * Write infrastructure files (settings, MCPs, sub-agents, rules, skills)
+     * before the baseline snapshot is taken.
+     */
+    prepareEnvironment(context: AgentExecutionContext): Promise<void>;
     /**
      * Execute a skill using the Claude Code SDK.
      *

package/build/types/run-scenario/agents/claude-code/execute.d.ts CHANGED Viewed

@@ -13,6 +13,12 @@ export interface TimestampedMessage {
     message: SDKMessage;
     receivedAt: Date;
 }
+/**
+ * Write all infrastructure files (settings, MCPs, sub-agents, rules, skills)
+ * to the working directory. Called by the adapter's `prepareEnvironment()` so
+ * that the orchestrator can take the baseline snapshot *after* infra is in place.
+ */
+export declare function prepareClaudeCodeEnvironment(cwd: string, skills: SkillWithLatestVersion[], options: Pick<ClaudeCodeExecutionOptions, 'mcps' | 'subAgents' | 'rules'>): Promise<void>;
 /**
  * Execute skills using the Claude Agent SDK.
  *

package/build/types/run-scenario/agents/opencode/execute.d.ts CHANGED Viewed

@@ -1,5 +1,13 @@
 import type { SkillWithLatestVersion, TestScenario, LLMTrace, ConversationMessage } from '@wix/evalforge-types';
 import type { OpenCodeExecutionOptions, OpenCodeExecutionResult } from './types.js';
+/**
+ * Write all infrastructure files (sub-agents, rules, skills)
+ * to the working directory. Called by the adapter's `prepareEnvironment()` so
+ * that the orchestrator can take the baseline snapshot *after* infra is in place.
+ *
+ * Note: MCPs for OpenCode are passed inline via config, not written to filesystem.
+ */
+export declare function prepareOpenCodeEnvironment(cwd: string, skills: SkillWithLatestVersion[], options: Pick<OpenCodeExecutionOptions, 'mcps' | 'subAgents' | 'rules'>): Promise<void>;
 /**
  * Execute skills using the OpenCode SDK.
  *

package/build/types/run-scenario/agents/opencode/opencode-adapter.d.ts CHANGED Viewed

@@ -13,6 +13,7 @@ export declare class OpenCodeAdapter implements AgentAdapter {
     readonly id = "opencode";
     readonly name = "OpenCode";
     readonly supportedCommands: readonly [AgentRunCommand.OPENCODE];
+    prepareEnvironment(context: AgentExecutionContext): Promise<void>;
     execute(context: AgentExecutionContext): Promise<AgentExecutionResult>;
 }
 export declare const openCodeAdapter: OpenCodeAdapter;

package/build/types/run-scenario/file-diff.d.ts CHANGED Viewed

@@ -5,6 +5,12 @@
  * when running against a template.
  */
 import type { DiffContent, TemplateFile } from '@wix/evalforge-types';
+/**
+ * Derive the set of infrastructure file paths by comparing a pre-prepare
+ * snapshot with the post-prepare (baseline) snapshot.
+ * Any file that was created or modified by `prepareEnvironment()` is infrastructure.
+ */
+export declare function deriveInfrastructurePaths(prePrep: FileSnapshot, postPrep: FileSnapshot): Set<string>;
 /**
  * A snapshot of file contents in a directory.
  * Maps relative file paths to their contents.
@@ -26,14 +32,16 @@ export declare function snapshotDirectory(dir: string, baseDir?: string): FileSn
  *
  * @param before - Snapshot before execution
  * @param after - Snapshot after execution
+ * @param infrastructurePaths - Optional set of paths known to be infrastructure files
  * @returns Array of DiffContent for files that were created, modified, or renamed
  */
-export declare function diffSnapshots(before: FileSnapshot, after: FileSnapshot): DiffContent[];
+export declare function diffSnapshots(before: FileSnapshot, after: FileSnapshot, infrastructurePaths?: Set<string>): DiffContent[];
 /**
  * Extract template files with their status from before/after snapshots.
  *
  * @param before - Snapshot before execution
  * @param after - Snapshot after execution
+ * @param infrastructurePaths - Optional set of paths known to be infrastructure files
  * @returns Array of TemplateFile with status indicators
  */
-export declare function extractTemplateFiles(before: FileSnapshot, after: FileSnapshot): TemplateFile[];
+export declare function extractTemplateFiles(before: FileSnapshot, after: FileSnapshot, infrastructurePaths?: Set<string>): TemplateFile[];

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@wix/evalforge-evaluator",
-  "version": "0.114.0",
+  "version": "0.116.0",
   "description": "EvalForge Evaluator",
   "bin": "./build/index.js",
   "files": [
@@ -22,9 +22,9 @@
     "@anthropic-ai/claude-agent-sdk": "^0.2.63",
     "@anthropic-ai/claude-code": "^2.1.63",
     "@opencode-ai/sdk": "^1.2.15",
-    "@wix/eval-assertions": "0.27.0",
-    "@wix/evalforge-github-client": "0.32.0",
-    "@wix/evalforge-types": "0.57.0",
+    "@wix/eval-assertions": "0.28.0",
+    "@wix/evalforge-github-client": "0.33.0",
+    "@wix/evalforge-types": "0.58.0",
     "ai": "^6.0.107",
     "diff": "^7.0.0",
     "tar": "^7.5.3",
@@ -63,5 +63,5 @@
       "artifactId": "evalforge-evaluator"
     }
   },
-  "falconPackageHash": "7580e561b029720761957e0a2ecad733cd2bc610cfe82d11e3c8060a"
+  "falconPackageHash": "9c7374a0596e12fcb8bdb6e1c2fd18bb9160ac34ed39cc196975dd98"
 }