@wix/evalforge-types 0.21.0 → 0.23.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -2,6 +2,8 @@ import type { Skill } from '../target/skill.js';
2
2
  import type { TestScenario } from '../scenario/test-scenario.js';
3
3
  import type { ModelConfig } from '../common/models.js';
4
4
  import type { LLMTrace } from '../evaluation/metrics.js';
5
+ import type { MCPEntity } from '../common/mcp.js';
6
+ import type { SubAgent } from '../target/sub-agent.js';
5
7
  /**
6
8
  * Trace context for live streaming of agent execution.
7
9
  * This is agent-agnostic and can be used by any adapter implementation.
@@ -27,14 +29,17 @@ export interface TraceContext {
27
29
  /**
28
30
  * Agent-agnostic execution context.
29
31
  *
30
- * Contains all the information an agent adapter needs to execute a skill
32
+ * Contains all the information an agent adapter needs to execute skills
31
33
  * against a test scenario. This abstraction allows different agent
32
34
  * implementations (Claude Code, Cursor, Aider, etc.) to receive the
33
35
  * same execution context.
36
+ *
37
+ * When running a skills group, all skills are provided and the agent
38
+ * has access to all of them together (e.g. all written to .claude/skills/).
34
39
  */
35
40
  export interface AgentExecutionContext {
36
- /** The skill to execute (contains skillMd content) */
37
- skill: Skill;
41
+ /** The skills to execute (each contains skillMd content). Run all together as a group. */
42
+ skills: Skill[];
38
43
  /** The test scenario containing the trigger prompt */
39
44
  scenario: TestScenario;
40
45
  /** Working directory for the execution */
@@ -47,6 +52,10 @@ export interface AgentExecutionContext {
47
52
  aiGatewayHeaders?: Record<string, string>;
48
53
  /** Trace context for live streaming (optional) */
49
54
  traceContext?: TraceContext;
55
+ /** MCPs to load (when present, .mcp.json is written and mcp__* allowed) */
56
+ mcps?: MCPEntity[];
57
+ /** Sub-agents to load (when present, written to .claude/agents/*.md) */
58
+ subAgents?: SubAgent[];
50
59
  }
51
60
  /**
52
61
  * Token usage statistics from agent execution.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@wix/evalforge-types",
3
- "version": "0.21.0",
3
+ "version": "0.23.0",
4
4
  "description": "Unified types for EvalForge agent evaluation system",
5
5
  "files": [
6
6
  "build"
@@ -46,5 +46,5 @@
46
46
  "artifactId": "evalforge-types"
47
47
  }
48
48
  },
49
- "falconPackageHash": "dd1b66b435e8b08bf00fbdc1de02908cece45b1816e2873a5082a827"
49
+ "falconPackageHash": "4e6aa15f6059d39f2e6e6d7f78fd5ac866ff773d2081878471c2f6b5"
50
50
  }