@wix/evalforge-types 0.41.0 → 0.43.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,11 +1,11 @@
1
1
  import type { SkillWithLatestVersion } from '../target/skill.js';
2
- import type { AgentRunCommand } from '../target/agent.js';
3
2
  import type { TestScenario } from '../scenario/test-scenario.js';
4
3
  import type { ModelConfig } from '../common/models.js';
5
4
  import type { LLMTrace } from '../evaluation/metrics.js';
6
5
  import type { MCPEntity } from '../common/mcp.js';
7
6
  import type { SubAgent } from '../target/sub-agent.js';
8
7
  import type { Rule } from '../common/rule.js';
8
+ import type { AgentRunCommand } from '../target/agent.js';
9
9
  /**
10
10
  * Trace context for live streaming of agent execution.
11
11
  * This is agent-agnostic and can be used by any adapter implementation.
@@ -60,6 +60,13 @@ export interface AgentExecutionContext {
60
60
  subAgents?: SubAgent[];
61
61
  /** Rules to write (CLAUDE.md, AGENTS.md, .cursor/rules/*.md based on ruleType) */
62
62
  rules?: Rule[];
63
+ /**
64
+ * System prompt override for evaluation runs.
65
+ * - undefined: use default evaluator behavioral instructions
66
+ * - null: no system prompt (raw agent behavior)
67
+ * - string: custom system prompt text
68
+ */
69
+ systemPrompt?: string | null;
63
70
  }
64
71
  /**
65
72
  * Token usage statistics from agent execution.
@@ -107,23 +114,29 @@ export interface AgentExecutionResult {
107
114
  *
108
115
  * @example
109
116
  * ```typescript
117
+ * // CLI-based adapter (looked up by command)
110
118
  * class CursorAdapter implements AgentAdapter {
111
119
  * readonly id = 'cursor';
112
120
  * readonly name = 'Cursor CLI';
113
- * readonly supportedCommands = [AgentRunCommand.CURSOR];
121
+ * readonly supportedCommands = [AgentRunCommand.CLAUDE];
122
+ * async execute(context: AgentExecutionContext): Promise<AgentExecutionResult> { ... }
123
+ * }
114
124
  *
115
- * async execute(context: AgentExecutionContext): Promise<AgentExecutionResult> {
116
- * // Implementation
117
- * }
125
+ * // SDK-based adapter (looked up by adapter ID)
126
+ * class SimpleAgentAdapter implements AgentAdapter {
127
+ * readonly id = 'simple-agent';
128
+ * readonly name = 'Simple Agent';
129
+ * readonly supportedCommands: AgentRunCommand[] = [];
130
+ * async execute(context: AgentExecutionContext): Promise<AgentExecutionResult> { ... }
118
131
  * }
119
132
  * ```
120
133
  */
121
134
  export interface AgentAdapter {
122
- /** Unique identifier for this adapter */
135
+ /** Unique identifier for this adapter (used for SDK agent lookup) */
123
136
  readonly id: string;
124
137
  /** Human-readable name for display */
125
138
  readonly name: string;
126
- /** CLI commands this adapter handles (e.g., [AgentRunCommand.CLAUDE]) */
139
+ /** CLI commands this adapter handles (e.g., ['claude']). Can be empty for SDK-only adapters. */
127
140
  readonly supportedCommands: readonly AgentRunCommand[];
128
141
  /**
129
142
  * Execute a skill against a test scenario using this agent.
@@ -10,6 +10,10 @@ export declare const AVAILABLE_OPENAI_MODEL_IDS: OpenAIModel[];
10
10
  export declare const OpenAIModelSchema: z.ZodEnum<{
11
11
  [x: string]: string;
12
12
  }>;
13
+ export declare const ALL_AVAILABLE_MODEL_IDS: string[];
14
+ export declare const AnyModelSchema: z.ZodEnum<{
15
+ [x: string]: string;
16
+ }>;
13
17
  export declare const ModelConfigSchema: z.ZodObject<{
14
18
  model: z.ZodEnum<{
15
19
  [x: string]: string;
@@ -7,7 +7,7 @@
7
7
  * - BaseEntity: id, name, description, dates
8
8
  * - TenantEntity: extends BaseEntity with projectId
9
9
  * - Target: extends TenantEntity (base for testable entities)
10
- * - Agent: CLI-based agent (runCommand, modelConfig)
10
+ * - Agent: CLI-based agent (runCommand, modelConfig, systemPrompt)
11
11
  * - Skill: SKILL.md-based capability
12
12
  *
13
13
  * Test Types (9 total):
@@ -1,9 +1,31 @@
1
1
  import { z } from 'zod';
2
+ /**
3
+ * Default behavioral instructions appended to the Claude Code system prompt
4
+ * during evaluation runs. These ensure the agent executes autonomously without
5
+ * waiting for human confirmation.
6
+ */
7
+ export declare const DEFAULT_EVALUATOR_SYSTEM_PROMPT = "IMPORTANT: This is an automated evaluation run. Follow these guidelines:\n1. Execute the requested changes immediately without asking for confirmation.\n2. Do NOT ask \"would you like me to proceed?\" or similar questions.\n3. Do NOT use the Task tool to delegate simple operations - do them directly yourself.\n4. Keep your approach simple and direct - avoid excessive planning.\n5. Make targeted edits using Read and Edit tools rather than exploring the entire codebase.\n6. If you encounter an error, fix it directly rather than starting over.\n7. Your project root is the current working directory. Always create and modify source code files relative to the project root, NOT inside .claude/skills/ directories.\n8. Before finishing, run the project's package manager install command (e.g. `npm install`, `yarn install`, or `pnpm install` depending on the lockfile present) to ensure all dependencies are installed and the project is ready to build.";
8
+ /**
9
+ * Agent type discriminator.
10
+ * - CLI: External CLI tool (e.g. Claude Code, Codex)
11
+ * - SDK: In-process SDK agent (e.g. Simple Agent via AI SDK)
12
+ */
13
+ export declare const AgentType: {
14
+ readonly CLI: "cli";
15
+ readonly SDK: "sdk";
16
+ };
17
+ export type AgentTypeValue = (typeof AgentType)[keyof typeof AgentType];
18
+ export declare const AgentTypeSchema: z.ZodEnum<{
19
+ cli: "cli";
20
+ sdk: "sdk";
21
+ }>;
22
+ /** Labels for agent types in UI dropdowns. */
23
+ export declare const AGENT_TYPE_LABELS: Record<AgentTypeValue, string>;
2
24
  /**
3
25
  * Supported agent CLI commands.
4
26
  *
5
27
  * Each value corresponds to a registered AgentAdapter in the evaluator.
6
- * When adding a new agent adapter, add its command here first.
28
+ * When adding a new CLI agent adapter, add its command here first.
7
29
  */
8
30
  export declare enum AgentRunCommand {
9
31
  CLAUDE = "claude"
@@ -14,10 +36,10 @@ export declare const AVAILABLE_RUN_COMMANDS: AgentRunCommand[];
14
36
  export declare const RUN_COMMAND_LABELS: Record<AgentRunCommand, string>;
15
37
  export declare const AgentRunCommandSchema: z.ZodEnum<typeof AgentRunCommand>;
16
38
  /**
17
- * Agent schema - a CLI-based coding agent.
39
+ * Agent schema.
18
40
  *
19
- * Agents are external CLI tools that can execute coding tasks.
20
- * Examples: Claude Code CLI, Codex CLI, Cursor CLI.
41
+ * Agents can be CLI-based (external process) or SDK-based (in-process).
42
+ * CLI agents use runCommand to select the adapter; SDK agents use their adapter ID.
21
43
  */
22
44
  export declare const AgentSchema: z.ZodObject<{
23
45
  id: z.ZodString;
@@ -27,7 +49,11 @@ export declare const AgentSchema: z.ZodObject<{
27
49
  updatedAt: z.ZodString;
28
50
  deleted: z.ZodOptional<z.ZodBoolean>;
29
51
  projectId: z.ZodString;
30
- runCommand: z.ZodEnum<typeof AgentRunCommand>;
52
+ agentType: z.ZodDefault<z.ZodEnum<{
53
+ cli: "cli";
54
+ sdk: "sdk";
55
+ }>>;
56
+ runCommand: z.ZodOptional<z.ZodEnum<typeof AgentRunCommand>>;
31
57
  modelConfig: z.ZodOptional<z.ZodObject<{
32
58
  model: z.ZodEnum<{
33
59
  [x: string]: string;
@@ -35,6 +61,7 @@ export declare const AgentSchema: z.ZodObject<{
35
61
  temperature: z.ZodPipe<z.ZodTransform<{} | undefined, unknown>, z.ZodOptional<z.ZodNumber>>;
36
62
  maxTokens: z.ZodPipe<z.ZodTransform<{} | undefined, unknown>, z.ZodOptional<z.ZodNumber>>;
37
63
  }, z.core.$strip>>;
64
+ systemPrompt: z.ZodOptional<z.ZodNullable<z.ZodString>>;
38
65
  }, z.core.$strip>;
39
66
  export type Agent = z.infer<typeof AgentSchema>;
40
67
  /**
@@ -44,7 +71,11 @@ export declare const CreateAgentInputSchema: z.ZodObject<{
44
71
  name: z.ZodString;
45
72
  description: z.ZodString;
46
73
  projectId: z.ZodString;
47
- runCommand: z.ZodEnum<typeof AgentRunCommand>;
74
+ agentType: z.ZodDefault<z.ZodEnum<{
75
+ cli: "cli";
76
+ sdk: "sdk";
77
+ }>>;
78
+ runCommand: z.ZodOptional<z.ZodEnum<typeof AgentRunCommand>>;
48
79
  modelConfig: z.ZodOptional<z.ZodObject<{
49
80
  model: z.ZodEnum<{
50
81
  [x: string]: string;
@@ -52,17 +83,22 @@ export declare const CreateAgentInputSchema: z.ZodObject<{
52
83
  temperature: z.ZodPipe<z.ZodTransform<{} | undefined, unknown>, z.ZodOptional<z.ZodNumber>>;
53
84
  maxTokens: z.ZodPipe<z.ZodTransform<{} | undefined, unknown>, z.ZodOptional<z.ZodNumber>>;
54
85
  }, z.core.$strip>>;
86
+ systemPrompt: z.ZodOptional<z.ZodNullable<z.ZodString>>;
55
87
  }, z.core.$strip>;
56
88
  export type CreateAgentInput = z.infer<typeof CreateAgentInputSchema>;
57
89
  /**
58
90
  * Input schema for updating an Agent.
59
- * modelConfig can be null to explicitly clear it (vs undefined = keep existing).
91
+ * modelConfig and systemPrompt can be null to explicitly clear (vs undefined = keep existing).
60
92
  */
61
93
  export declare const UpdateAgentInputSchema: z.ZodObject<{
62
94
  name: z.ZodOptional<z.ZodString>;
63
95
  description: z.ZodOptional<z.ZodString>;
64
96
  projectId: z.ZodOptional<z.ZodString>;
65
- runCommand: z.ZodOptional<z.ZodEnum<typeof AgentRunCommand>>;
97
+ agentType: z.ZodOptional<z.ZodDefault<z.ZodEnum<{
98
+ cli: "cli";
99
+ sdk: "sdk";
100
+ }>>>;
101
+ runCommand: z.ZodOptional<z.ZodOptional<z.ZodEnum<typeof AgentRunCommand>>>;
66
102
  modelConfig: z.ZodNullable<z.ZodOptional<z.ZodObject<{
67
103
  model: z.ZodEnum<{
68
104
  [x: string]: string;
@@ -70,5 +106,6 @@ export declare const UpdateAgentInputSchema: z.ZodObject<{
70
106
  temperature: z.ZodPipe<z.ZodTransform<{} | undefined, unknown>, z.ZodOptional<z.ZodNumber>>;
71
107
  maxTokens: z.ZodPipe<z.ZodTransform<{} | undefined, unknown>, z.ZodOptional<z.ZodNumber>>;
72
108
  }, z.core.$strip>>>;
109
+ systemPrompt: z.ZodNullable<z.ZodOptional<z.ZodString>>;
73
110
  }, z.core.$strip>;
74
111
  export type UpdateAgentInput = z.infer<typeof UpdateAgentInputSchema>;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@wix/evalforge-types",
3
- "version": "0.41.0",
3
+ "version": "0.43.0",
4
4
  "description": "Unified types for EvalForge agent evaluation system",
5
5
  "files": [
6
6
  "build"
@@ -47,5 +47,5 @@
47
47
  "artifactId": "evalforge-types"
48
48
  }
49
49
  },
50
- "falconPackageHash": "05f127add95c6bdf6bee7bc611c14e751a58f44328d5e4f9621c92b2"
50
+ "falconPackageHash": "22e0c8e929bd0d6cd5bc802978eb373db3d7d290794c908608da12c0"
51
51
  }