npm - @wix/evalforge-evaluator - Versions diffs - 0.105.0 → 0.107.0 - Mend

@wix/evalforge-evaluator 0.105.0 → 0.107.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

package/build/index.js +77 -38
package/build/index.js.map +3 -3
package/build/index.mjs +79 -38
package/build/index.mjs.map +3 -3
package/build/types/api-client.d.ts +2 -2
package/build/types/fetch-evaluation-data.d.ts +6 -7
package/build/types/run-scenario/agents/claude-code/write-sub-agents.d.ts +8 -4
package/build/types/run-scenario/index.d.ts +1 -1
package/build/types/run-scenario/run-agent-with-context.d.ts +1 -1
package/package.json +4 -4

package/build/types/api-client.d.ts CHANGED Viewed

@@ -1,10 +1,9 @@
 /**
  * API Client for fetching data from the eval server.
  */
-import type { EvalRun, EvalRunResult, SkillsGroup, TestScenario, Template, SkillWithLatestVersion, SkillVersion, Agent, CustomAssertion, MCPEntity, SubAgent, Rule } from '@wix/evalforge-types';
+import type { EvalRun, EvalRunResult, TestScenario, Template, SkillWithLatestVersion, SkillVersion, Agent, Preset, CustomAssertion, MCPEntity, SubAgent, Rule } from '@wix/evalforge-types';
 export interface ApiClient {
     getEvalRun(projectId: string, id: string): Promise<EvalRun>;
-    getSkillsGroup(projectId: string, id: string): Promise<SkillsGroup>;
     getScenario(projectId: string, id: string): Promise<TestScenario>;
     getSkill(projectId: string, id: string): Promise<SkillWithLatestVersion>;
     getSkillVersion(projectId: string, skillId: string, versionId: string): Promise<SkillVersion>;
@@ -14,6 +13,7 @@ export interface ApiClient {
     getMcp(projectId: string, id: string): Promise<MCPEntity>;
     getSubAgent(projectId: string, id: string): Promise<SubAgent>;
     getRule(projectId: string, id: string): Promise<Rule>;
+    getPreset(projectId: string, id: string): Promise<Preset>;
     getAssertion(projectId: string, id: string): Promise<CustomAssertion>;
     addResult(projectId: string, evalRunId: string, result: EvalRunResult): Promise<void>;
     clearResults(projectId: string, evalRunId: string): Promise<void>;

package/build/types/fetch-evaluation-data.d.ts CHANGED Viewed

@@ -1,4 +1,4 @@
-import type { EvalRun, TestScenario, SkillWithLatestVersion, SkillsGroup, Agent, Template, CustomAssertion, Assertion, MCPEntity, SubAgent, Rule } from '@wix/evalforge-types';
+import type { EvalRun, TestScenario, SkillWithLatestVersion, Agent, Template, CustomAssertion, Assertion, MCPEntity, SubAgent, Rule } from '@wix/evalforge-types';
 import { type SystemAssertionId } from '@wix/evalforge-types';
 import type { ApiClient } from './api-client.js';
 /**
@@ -31,15 +31,14 @@ export interface ScenarioItem {
 }
 /**
  * Data shape returned by fetchEvaluationData for the run flow.
- * Run-level agent + skills + skills group + mcps + subAgents; per-scenario template.
+ * Run-level agent + skills + mcps + subAgents; per-scenario template.
  */
 export interface EvaluationData {
     evalRun: EvalRun;
     agent: Agent | null;
     skills: SkillWithLatestVersion[];
-    skillsGroup: SkillsGroup | null;
-    /** Display name for the skills group (from skillsGroup.name when present) */
-    skillsGroupName: string;
+    /** Display name: preset name from DB when available, otherwise joined skill names */
+    presetName: string;
     mcps: MCPEntity[];
     subAgents: SubAgent[];
     rules: Rule[];
@@ -48,8 +47,8 @@ export interface EvaluationData {
 /**
  * Fetch all data needed to run an evaluation.
  *
- * Uses evalRun.agentId and evalRun.skillsGroupId as run-level inputs.
- * Loads skills from the skills group; loads code agent; loads scenarios and templates by scenario.templateId.
+ * Uses evalRun.agentId and evalRun.skillIds as run-level inputs.
+ * Loads skills by ID; loads code agent; loads scenarios and templates by scenario.templateId.
  *
  * @throws Error if eval run has no code agent when expected
  * @throws Error if eval run has no skills group or skills group has no skills when expected

package/build/types/run-scenario/agents/claude-code/write-sub-agents.d.ts CHANGED Viewed

@@ -1,12 +1,16 @@
-import type { SubAgent } from '@wix/evalforge-types';
+import type { SubAgent, GitHubSource } from '@wix/evalforge-types';
+import { type FetchGitHubFolderOptions } from '@wix/evalforge-github-client';
+/** Signature for the single-file GitHub fetch function (injectable for tests). */
+export type FetchGitHubFileFn = (source: GitHubSource, options?: FetchGitHubFolderOptions) => Promise<string>;
 /**
  * Write sub-agent markdown files to .claude/agents/ for Claude Code to discover.
  *
- * Each sub-agent's subAgentMd (YAML frontmatter + body) is written to
- * .claude/agents/{name}.md. Claude Code loads these at session start.
+ * For sub-agents with a GitHub source, the latest .md file is live-fetched.
+ * For inline sub-agents, the stored subAgentMd is used directly.
  *
  * @see https://code.claude.com/docs/en/sub-agents#write-subagent-files
  * @param cwd - Working directory (project root for Claude Code)
  * @param subAgents - Sub-agent entities to write
+ * @param fetchFn - Optional fetch function for testing (defaults to fetchGitHubFile)
  */
-export declare function writeSubAgentsToFilesystem(cwd: string, subAgents: SubAgent[]): Promise<void>;
+export declare function writeSubAgentsToFilesystem(cwd: string, subAgents: SubAgent[], fetchFn?: FetchGitHubFileFn): Promise<void>;

package/build/types/run-scenario/index.d.ts CHANGED Viewed

@@ -10,7 +10,7 @@ export type { ScenarioItem, EvaluationData } from '../fetch-evaluation-data.js';
  * @param config - Evaluator configuration
  * @param evalRunId - The evaluation run ID
  * @param scenario - The test scenario to run
- * @param evalData - Fetched evaluation data (skills, skillsGroup, agent, mcps, subAgents)
+ * @param evalData - Fetched evaluation data (skills, agent, mcps, subAgents)
  * @param template - Optional pre-fetched template entity
  * @param resolvedAssertions - Optional assertions resolved from assertionIds
  * @returns Complete evaluation result

package/build/types/run-scenario/run-agent-with-context.d.ts CHANGED Viewed

@@ -12,7 +12,7 @@ import type { EvaluationData } from '../fetch-evaluation-data.js';
  * @param config - Evaluator configuration
  * @param evalRunId - The evaluation run ID (for live trace context)
  * @param scenario - The test scenario to run
- * @param evalData - Fetched evaluation data (skills, skillsGroup, agent, mcps, subAgents)
+ * @param evalData - Fetched evaluation data (skills, agent, mcps, subAgents)
  * @param workDir - Optional working directory for the scenario
  * @returns Partial result without assertion fields
  */

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@wix/evalforge-evaluator",
-  "version": "0.105.0",
+  "version": "0.107.0",
   "description": "EvalForge Evaluator",
   "bin": "./build/index.js",
   "files": [
@@ -22,8 +22,8 @@
     "@anthropic-ai/claude-agent-sdk": "^0.2.49",
     "@anthropic-ai/claude-code": "^2.1.49",
     "@wix/eval-assertions": "0.23.0",
-    "@wix/evalforge-github-client": "0.23.0",
-    "@wix/evalforge-types": "0.48.0",
+    "@wix/evalforge-github-client": "0.25.0",
+    "@wix/evalforge-types": "0.50.0",
     "ai": "^6.0.93",
     "diff": "^7.0.0",
     "tar": "^7.5.3",
@@ -62,5 +62,5 @@
       "artifactId": "evalforge-evaluator"
     }
   },
-  "falconPackageHash": "7f3f2d1b27312daf480e195d122e0885ac476d0f881100f407a34755"
+  "falconPackageHash": "59ea86dd554511d348988564544eee3ea330a005592d9aa9a7d22719"
 }