npm - @wix/evalforge-evaluator - Versions diffs - 0.99.0 → 0.100.0 - Mend

@wix/evalforge-evaluator 0.99.0 → 0.100.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

package/build/index.js +147 -148
package/build/index.js.map +4 -4
package/build/index.mjs +127 -128
package/build/index.mjs.map +4 -4
package/build/types/fetch-evaluation-data.d.ts +2 -2
package/build/types/run-scenario/agents/claude-code/write-skills.d.ts +3 -1
package/build/types/run-scenario/agents/registry.d.ts +32 -63
package/build/types/run-scenario/index.d.ts +1 -1
package/build/types/run-scenario/run-agent-with-context.d.ts +3 -3
package/build/types/run-scenario/utils/write-files.d.ts +6 -0
package/package.json +4 -4

package/build/types/fetch-evaluation-data.d.ts CHANGED Viewed

@@ -31,11 +31,11 @@ export interface ScenarioItem {
 }
 /**
  * Data shape returned by fetchEvaluationData for the run flow.
- * Run-level code agent + skills + skills group + mcps + subAgents; per-scenario template.
+ * Run-level agent + skills + skills group + mcps + subAgents; per-scenario template.
  */
 export interface EvaluationData {
     evalRun: EvalRun;
-    codeAgent: Agent | null;
+    agent: Agent | null;
     skills: SkillWithLatestVersion[];
     skillsGroup: SkillsGroup | null;
     /** Display name for the skills group (from skillsGroup.name when present) */

package/build/types/run-scenario/agents/claude-code/write-skills.d.ts CHANGED Viewed

@@ -1,4 +1,5 @@
 import type { GitHubSource, SkillFile, SkillWithLatestVersion } from '@wix/evalforge-types';
+import { writeFilesToDirectory } from '../../utils/write-files.js';
 export type FetchGitHubFolderFn = (source: GitHubSource, options?: {
     userAgent?: string;
 }) => Promise<SkillFile[]>;
@@ -17,5 +18,6 @@ export declare function writeSkillsToFilesystem(cwd: string, skills: SkillWithLa
 export declare function writeSkillToFilesystem(cwd: string, skill: SkillWithLatestVersion, fetchFn?: FetchGitHubFolderFn): Promise<void>;
 /**
  * Write skill files from a snapshot array to the filesystem.
+ * @deprecated Use writeFilesToDirectory from utils/write-files.ts instead
  */
-export declare function writeSkillFiles(skillDir: string, files: SkillFile[]): Promise<void>;
+export declare const writeSkillFiles: typeof writeFilesToDirectory;

package/build/types/run-scenario/agents/registry.d.ts CHANGED Viewed

@@ -1,103 +1,72 @@
-import type { AgentAdapter, AgentRunCommand } from '@wix/evalforge-types';
+import type { AgentAdapter } from '@wix/evalforge-types';
 /**
  * Registry for agent adapters.
  *
- * Provides registration and lookup of agent adapters by their supported
- * commands. This enables the evaluation system to dynamically select
- * the appropriate adapter based on the agent's runCommand property.
+ * Supports two lookup strategies:
+ * - By CLI command (e.g. 'claude') for CLI-based agents
+ * - By adapter ID (e.g. 'simple-agent') for SDK-based agents
  *
  * @example
  * ```typescript
  * const registry = new AgentAdapterRegistry();
  * registry.register(new ClaudeCodeAdapter());
  *
- * const adapter = registry.get(AgentRunCommand.CLAUDE);
+ * const adapter = registry.resolve('claude');
  * if (adapter) {
  *   const result = await adapter.execute(context);
  * }
  * ```
  */
 export declare class AgentAdapterRegistry {
-    /**
-     * Map of run commands to their registered adapters.
-     * Multiple commands can map to the same adapter.
-     */
-    private adapters;
-    /**
-     * Set of all registered adapter instances (for getAll).
-     */
+    /** Map of CLI commands to their registered adapters. */
+    private commandMap;
+    /** Map of adapter IDs to their registered adapters. */
+    private idMap;
+    /** Set of all registered adapter instances (for getAll). */
     private registeredAdapters;
     /**
      * Register an agent adapter.
      *
-     * The adapter will be registered for all commands in its supportedCommands array.
-     * If a command is already registered, it will be overwritten with a warning.
-     *
-     * @param adapter - The adapter to register
+     * The adapter is registered by its ID and for all commands in its supportedCommands array.
+     * If a command or ID is already registered, it will be overwritten with a warning.
      */
     register(adapter: AgentAdapter): void;
+    /** Get an adapter by CLI command. */
+    getByCommand(command: string): AgentAdapter | undefined;
+    /** Get an adapter by adapter ID. */
+    getById(adapterId: string): AgentAdapter | undefined;
     /**
-     * Get an adapter by run command.
-     *
-     * @param runCommand - The run command to look up
-     * @returns The registered adapter, or undefined if not found
-     */
-    get(runCommand: AgentRunCommand): AgentAdapter | undefined;
-    /**
-     * Check if a command has a registered adapter.
-     *
-     * @param runCommand - The run command to check
-     * @returns True if an adapter is registered for this command
-     */
-    has(runCommand: AgentRunCommand): boolean;
-    /**
-     * Get all registered adapters.
-     *
-     * @returns Array of all unique registered adapters
+     * Unified lookup: tries CLI command first, then adapter ID.
+     * Use this when the identifier could be either a command or an adapter ID.
      */
+    resolve(identifier: string): AgentAdapter | undefined;
+    /** Check if a command or adapter ID has a registered adapter. */
+    has(identifier: string): boolean;
+    /** Get all registered adapters. */
     getAll(): AgentAdapter[];
-    /**
-     * Get all supported commands.
-     *
-     * @returns Array of all registered run commands
-     */
-    getSupportedCommands(): AgentRunCommand[];
+    /** Get all supported CLI commands. */
+    getSupportedCommands(): string[];
+    /** Get all registered adapter IDs. */
+    getAdapterIds(): string[];
     /**
      * Unregister an adapter by its ID.
-     *
      * Removes the adapter and all its command mappings.
-     *
-     * @param adapterId - The ID of the adapter to remove
-     * @returns True if the adapter was found and removed
      */
     unregister(adapterId: string): boolean;
-    /**
-     * Clear all registered adapters.
-     * Primarily useful for testing.
-     */
+    /** Clear all registered adapters. Primarily useful for testing. */
     clear(): void;
 }
 /**
  * Default global registry instance.
- *
- * This is the main registry used by the evaluation system.
  * Adapters are auto-registered here when their modules are imported.
  */
 export declare const defaultRegistry: AgentAdapterRegistry;
 /**
  * Get an adapter from the default registry.
  *
- * Convenience function that throws a helpful error if the adapter is not found.
- *
- * @param runCommand - The run command to look up
- * @returns The registered adapter
- * @throws Error if no adapter is registered for the command
- */
-export declare function getAdapter(runCommand: AgentRunCommand): AgentAdapter;
-/**
- * Check if a command has a registered adapter in the default registry.
- *
- * @param runCommand - The run command to check
- * @returns True if an adapter is registered for this command
+ * Uses unified lookup (command first, then adapter ID).
+ * Throws a helpful error if the adapter is not found.
  */
-export declare function hasAdapter(runCommand: AgentRunCommand): boolean;
+export declare function getAdapter(identifier: string): AgentAdapter;
+/** Check if an identifier has a registered adapter in the default registry. */
+export declare function hasAdapter(identifier: string): boolean;

package/build/types/run-scenario/index.d.ts CHANGED Viewed

@@ -10,7 +10,7 @@ export type { ScenarioItem, EvaluationData } from '../fetch-evaluation-data.js';
  * @param config - Evaluator configuration
  * @param evalRunId - The evaluation run ID
  * @param scenario - The test scenario to run
- * @param evalData - Fetched evaluation data (skills, skillsGroup, codeAgent, mcps, subAgents)
+ * @param evalData - Fetched evaluation data (skills, skillsGroup, agent, mcps, subAgents)
  * @param template - Optional pre-fetched template entity
  * @param resolvedAssertions - Optional assertions resolved from assertionIds
  * @returns Complete evaluation result

package/build/types/run-scenario/run-agent-with-context.d.ts CHANGED Viewed

@@ -6,13 +6,13 @@ import type { EvaluationData } from '../fetch-evaluation-data.js';
  * Run the agent with the full execution context (skills, MCPs, sub-agents, etc.).
  *
  * Uses the agent adapter registry to select the appropriate adapter based on
- * the agent's runCommand. If no agent is specified, defaults to 'claude'.
- * The context includes skills group, optional MCPs, optional sub-agents.
+ * the agent's runCommand (for CLI agents) or adapter ID (for SDK agents).
+ * If no agent is specified, defaults to 'claude'.
  *
  * @param config - Evaluator configuration
  * @param evalRunId - The evaluation run ID (for live trace context)
  * @param scenario - The test scenario to run
- * @param evalData - Fetched evaluation data (skills, skillsGroup, codeAgent, mcps, subAgents)
+ * @param evalData - Fetched evaluation data (skills, skillsGroup, agent, mcps, subAgents)
  * @param workDir - Optional working directory for the scenario
  * @returns Partial result without assertion fields
  */

package/build/types/run-scenario/utils/write-files.d.ts ADDED Viewed

@@ -0,0 +1,6 @@
+import type { SkillFile } from '@wix/evalforge-types';
+/**
+ * Write an array of {path, content} files to a target directory.
+ * Validates paths to prevent directory traversal.
+ */
+export declare function writeFilesToDirectory(targetDir: string, files: SkillFile[]): Promise<void>;

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@wix/evalforge-evaluator",
-  "version": "0.99.0",
+  "version": "0.100.0",
   "description": "EvalForge Evaluator",
   "bin": "./build/index.js",
   "files": [
@@ -20,8 +20,8 @@
     "@anthropic-ai/claude-agent-sdk": "^0.2.44",
     "@anthropic-ai/claude-code": "^2.1.44",
     "@wix/eval-assertions": "0.21.0",
-    "@wix/evalforge-github-client": "0.17.0",
-    "@wix/evalforge-types": "0.42.0",
+    "@wix/evalforge-github-client": "0.18.0",
+    "@wix/evalforge-types": "0.43.0",
     "ai": "^6.0.6",
     "diff": "^7.0.0",
     "tar": "^7.5.3",
@@ -60,5 +60,5 @@
       "artifactId": "evalforge-evaluator"
     }
   },
-  "falconPackageHash": "8a408bc699dc103228032cd55ff124376df9fefe004f21315d98ae9d"
+  "falconPackageHash": "04a26e31de9664dd48aa040a4bd2f8da8bcef67de281fc36482e8272"
 }