@wix/evalforge-evaluator 0.99.0 → 0.100.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -31,11 +31,11 @@ export interface ScenarioItem {
31
31
  }
32
32
  /**
33
33
  * Data shape returned by fetchEvaluationData for the run flow.
34
- * Run-level code agent + skills + skills group + mcps + subAgents; per-scenario template.
34
+ * Run-level agent + skills + skills group + mcps + subAgents; per-scenario template.
35
35
  */
36
36
  export interface EvaluationData {
37
37
  evalRun: EvalRun;
38
- codeAgent: Agent | null;
38
+ agent: Agent | null;
39
39
  skills: SkillWithLatestVersion[];
40
40
  skillsGroup: SkillsGroup | null;
41
41
  /** Display name for the skills group (from skillsGroup.name when present) */
@@ -1,4 +1,5 @@
1
1
  import type { GitHubSource, SkillFile, SkillWithLatestVersion } from '@wix/evalforge-types';
2
+ import { writeFilesToDirectory } from '../../utils/write-files.js';
2
3
  export type FetchGitHubFolderFn = (source: GitHubSource, options?: {
3
4
  userAgent?: string;
4
5
  }) => Promise<SkillFile[]>;
@@ -17,5 +18,6 @@ export declare function writeSkillsToFilesystem(cwd: string, skills: SkillWithLa
17
18
  export declare function writeSkillToFilesystem(cwd: string, skill: SkillWithLatestVersion, fetchFn?: FetchGitHubFolderFn): Promise<void>;
18
19
  /**
19
20
  * Write skill files from a snapshot array to the filesystem.
21
+ * @deprecated Use writeFilesToDirectory from utils/write-files.ts instead
20
22
  */
21
- export declare function writeSkillFiles(skillDir: string, files: SkillFile[]): Promise<void>;
23
+ export declare const writeSkillFiles: typeof writeFilesToDirectory;
@@ -1,103 +1,72 @@
1
- import type { AgentAdapter, AgentRunCommand } from '@wix/evalforge-types';
1
+ import type { AgentAdapter } from '@wix/evalforge-types';
2
2
  /**
3
3
  * Registry for agent adapters.
4
4
  *
5
- * Provides registration and lookup of agent adapters by their supported
6
- * commands. This enables the evaluation system to dynamically select
7
- * the appropriate adapter based on the agent's runCommand property.
5
+ * Supports two lookup strategies:
6
+ * - By CLI command (e.g. 'claude') for CLI-based agents
7
+ * - By adapter ID (e.g. 'simple-agent') for SDK-based agents
8
8
  *
9
9
  * @example
10
10
  * ```typescript
11
11
  * const registry = new AgentAdapterRegistry();
12
12
  * registry.register(new ClaudeCodeAdapter());
13
13
  *
14
- * const adapter = registry.get(AgentRunCommand.CLAUDE);
14
+ * const adapter = registry.resolve('claude');
15
15
  * if (adapter) {
16
16
  * const result = await adapter.execute(context);
17
17
  * }
18
18
  * ```
19
19
  */
20
20
  export declare class AgentAdapterRegistry {
21
- /**
22
- * Map of run commands to their registered adapters.
23
- * Multiple commands can map to the same adapter.
24
- */
25
- private adapters;
26
- /**
27
- * Set of all registered adapter instances (for getAll).
28
- */
21
+ /** Map of CLI commands to their registered adapters. */
22
+ private commandMap;
23
+ /** Map of adapter IDs to their registered adapters. */
24
+ private idMap;
25
+ /** Set of all registered adapter instances (for getAll). */
29
26
  private registeredAdapters;
30
27
  /**
31
28
  * Register an agent adapter.
32
29
  *
33
- * The adapter will be registered for all commands in its supportedCommands array.
34
- * If a command is already registered, it will be overwritten with a warning.
35
- *
36
- * @param adapter - The adapter to register
30
+ * The adapter is registered by its ID and for all commands in its supportedCommands array.
31
+ * If a command or ID is already registered, it will be overwritten with a warning.
37
32
  */
38
33
  register(adapter: AgentAdapter): void;
34
+ /** Get an adapter by CLI command. */
35
+ getByCommand(command: string): AgentAdapter | undefined;
36
+ /** Get an adapter by adapter ID. */
37
+ getById(adapterId: string): AgentAdapter | undefined;
39
38
  /**
40
- * Get an adapter by run command.
41
- *
42
- * @param runCommand - The run command to look up
43
- * @returns The registered adapter, or undefined if not found
44
- */
45
- get(runCommand: AgentRunCommand): AgentAdapter | undefined;
46
- /**
47
- * Check if a command has a registered adapter.
48
- *
49
- * @param runCommand - The run command to check
50
- * @returns True if an adapter is registered for this command
51
- */
52
- has(runCommand: AgentRunCommand): boolean;
53
- /**
54
- * Get all registered adapters.
55
- *
56
- * @returns Array of all unique registered adapters
39
+ * Unified lookup: tries CLI command first, then adapter ID.
40
+ * Use this when the identifier could be either a command or an adapter ID.
57
41
  */
42
+ resolve(identifier: string): AgentAdapter | undefined;
43
+ /** Check if a command or adapter ID has a registered adapter. */
44
+ has(identifier: string): boolean;
45
+ /** Get all registered adapters. */
58
46
  getAll(): AgentAdapter[];
59
- /**
60
- * Get all supported commands.
61
- *
62
- * @returns Array of all registered run commands
63
- */
64
- getSupportedCommands(): AgentRunCommand[];
47
+ /** Get all supported CLI commands. */
48
+ getSupportedCommands(): string[];
49
+ /** Get all registered adapter IDs. */
50
+ getAdapterIds(): string[];
65
51
  /**
66
52
  * Unregister an adapter by its ID.
67
- *
68
53
  * Removes the adapter and all its command mappings.
69
- *
70
- * @param adapterId - The ID of the adapter to remove
71
- * @returns True if the adapter was found and removed
72
54
  */
73
55
  unregister(adapterId: string): boolean;
74
- /**
75
- * Clear all registered adapters.
76
- * Primarily useful for testing.
77
- */
56
+ /** Clear all registered adapters. Primarily useful for testing. */
78
57
  clear(): void;
79
58
  }
80
59
  /**
81
60
  * Default global registry instance.
82
- *
83
- * This is the main registry used by the evaluation system.
84
61
  * Adapters are auto-registered here when their modules are imported.
85
62
  */
86
63
  export declare const defaultRegistry: AgentAdapterRegistry;
87
64
  /**
88
65
  * Get an adapter from the default registry.
89
66
  *
90
- * Convenience function that throws a helpful error if the adapter is not found.
91
- *
92
- * @param runCommand - The run command to look up
93
- * @returns The registered adapter
94
- * @throws Error if no adapter is registered for the command
95
- */
96
- export declare function getAdapter(runCommand: AgentRunCommand): AgentAdapter;
97
- /**
98
- * Check if a command has a registered adapter in the default registry.
99
- *
100
- * @param runCommand - The run command to check
101
- * @returns True if an adapter is registered for this command
67
+ * Uses unified lookup (command first, then adapter ID).
68
+ * Throws a helpful error if the adapter is not found.
102
69
  */
103
- export declare function hasAdapter(runCommand: AgentRunCommand): boolean;
70
+ export declare function getAdapter(identifier: string): AgentAdapter;
71
+ /** Check if an identifier has a registered adapter in the default registry. */
72
+ export declare function hasAdapter(identifier: string): boolean;
@@ -10,7 +10,7 @@ export type { ScenarioItem, EvaluationData } from '../fetch-evaluation-data.js';
10
10
  * @param config - Evaluator configuration
11
11
  * @param evalRunId - The evaluation run ID
12
12
  * @param scenario - The test scenario to run
13
- * @param evalData - Fetched evaluation data (skills, skillsGroup, codeAgent, mcps, subAgents)
13
+ * @param evalData - Fetched evaluation data (skills, skillsGroup, agent, mcps, subAgents)
14
14
  * @param template - Optional pre-fetched template entity
15
15
  * @param resolvedAssertions - Optional assertions resolved from assertionIds
16
16
  * @returns Complete evaluation result
@@ -6,13 +6,13 @@ import type { EvaluationData } from '../fetch-evaluation-data.js';
6
6
  * Run the agent with the full execution context (skills, MCPs, sub-agents, etc.).
7
7
  *
8
8
  * Uses the agent adapter registry to select the appropriate adapter based on
9
- * the agent's runCommand. If no agent is specified, defaults to 'claude'.
10
- * The context includes skills group, optional MCPs, optional sub-agents.
9
+ * the agent's runCommand (for CLI agents) or adapter ID (for SDK agents).
10
+ * If no agent is specified, defaults to 'claude'.
11
11
  *
12
12
  * @param config - Evaluator configuration
13
13
  * @param evalRunId - The evaluation run ID (for live trace context)
14
14
  * @param scenario - The test scenario to run
15
- * @param evalData - Fetched evaluation data (skills, skillsGroup, codeAgent, mcps, subAgents)
15
+ * @param evalData - Fetched evaluation data (skills, skillsGroup, agent, mcps, subAgents)
16
16
  * @param workDir - Optional working directory for the scenario
17
17
  * @returns Partial result without assertion fields
18
18
  */
@@ -0,0 +1,6 @@
1
+ import type { SkillFile } from '@wix/evalforge-types';
2
+ /**
3
+ * Write an array of {path, content} files to a target directory.
4
+ * Validates paths to prevent directory traversal.
5
+ */
6
+ export declare function writeFilesToDirectory(targetDir: string, files: SkillFile[]): Promise<void>;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@wix/evalforge-evaluator",
3
- "version": "0.99.0",
3
+ "version": "0.100.0",
4
4
  "description": "EvalForge Evaluator",
5
5
  "bin": "./build/index.js",
6
6
  "files": [
@@ -20,8 +20,8 @@
20
20
  "@anthropic-ai/claude-agent-sdk": "^0.2.44",
21
21
  "@anthropic-ai/claude-code": "^2.1.44",
22
22
  "@wix/eval-assertions": "0.21.0",
23
- "@wix/evalforge-github-client": "0.17.0",
24
- "@wix/evalforge-types": "0.42.0",
23
+ "@wix/evalforge-github-client": "0.18.0",
24
+ "@wix/evalforge-types": "0.43.0",
25
25
  "ai": "^6.0.6",
26
26
  "diff": "^7.0.0",
27
27
  "tar": "^7.5.3",
@@ -60,5 +60,5 @@
60
60
  "artifactId": "evalforge-evaluator"
61
61
  }
62
62
  },
63
- "falconPackageHash": "8a408bc699dc103228032cd55ff124376df9fefe004f21315d98ae9d"
63
+ "falconPackageHash": "04a26e31de9664dd48aa040a4bd2f8da8bcef67de281fc36482e8272"
64
64
  }