@wix/evalforge-types 0.41.0 → 0.43.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/build/index.mjs CHANGED
@@ -949,9 +949,16 @@ var AVAILABLE_OPENAI_MODEL_IDS = Object.values(
949
949
  var OpenAIModelSchema = z4.enum(
950
950
  AVAILABLE_OPENAI_MODEL_IDS
951
951
  );
952
+ var ALL_AVAILABLE_MODEL_IDS = [
953
+ ...AVAILABLE_MODEL_IDS,
954
+ ...AVAILABLE_OPENAI_MODEL_IDS
955
+ ];
956
+ var AnyModelSchema = z4.enum(
957
+ ALL_AVAILABLE_MODEL_IDS
958
+ );
952
959
  var nullToUndefined = (val) => val === null ? void 0 : val;
953
960
  var ModelConfigSchema = z4.object({
954
- model: ClaudeModelSchema,
961
+ model: AnyModelSchema,
955
962
  temperature: z4.preprocess(
956
963
  nullToUndefined,
957
964
  z4.number().min(0).max(1).optional()
@@ -994,6 +1001,24 @@ var TargetSchema = TenantEntitySchema.extend({
994
1001
 
995
1002
  // src/target/agent.ts
996
1003
  import { z as z6 } from "zod";
1004
+ var DEFAULT_EVALUATOR_SYSTEM_PROMPT = `IMPORTANT: This is an automated evaluation run. Follow these guidelines:
1005
+ 1. Execute the requested changes immediately without asking for confirmation.
1006
+ 2. Do NOT ask "would you like me to proceed?" or similar questions.
1007
+ 3. Do NOT use the Task tool to delegate simple operations - do them directly yourself.
1008
+ 4. Keep your approach simple and direct - avoid excessive planning.
1009
+ 5. Make targeted edits using Read and Edit tools rather than exploring the entire codebase.
1010
+ 6. If you encounter an error, fix it directly rather than starting over.
1011
+ 7. Your project root is the current working directory. Always create and modify source code files relative to the project root, NOT inside .claude/skills/ directories.
1012
+ 8. Before finishing, run the project's package manager install command (e.g. \`npm install\`, \`yarn install\`, or \`pnpm install\` depending on the lockfile present) to ensure all dependencies are installed and the project is ready to build.`;
1013
+ var AgentType = {
1014
+ CLI: "cli",
1015
+ SDK: "sdk"
1016
+ };
1017
+ var AgentTypeSchema = z6.enum([AgentType.CLI, AgentType.SDK]);
1018
+ var AGENT_TYPE_LABELS = {
1019
+ [AgentType.CLI]: "CLI Agent",
1020
+ [AgentType.SDK]: "Simple Agent"
1021
+ };
997
1022
  var AgentRunCommand = /* @__PURE__ */ ((AgentRunCommand2) => {
998
1023
  AgentRunCommand2["CLAUDE"] = "claude";
999
1024
  return AgentRunCommand2;
@@ -1004,10 +1029,15 @@ var RUN_COMMAND_LABELS = {
1004
1029
  };
1005
1030
  var AgentRunCommandSchema = z6.nativeEnum(AgentRunCommand);
1006
1031
  var AgentSchema = TargetSchema.extend({
1007
- /** Command to run the agent */
1008
- runCommand: AgentRunCommandSchema,
1032
+ /** Agent type: 'cli' for external CLI tools, 'sdk' for in-process SDK agents */
1033
+ agentType: AgentTypeSchema.default(AgentType.CLI),
1034
+ /** Command to run the agent (required for CLI agents, absent for SDK agents) */
1035
+ runCommand: AgentRunCommandSchema.optional(),
1009
1036
  /** Optional model configuration override */
1010
- modelConfig: ModelConfigSchema.optional()
1037
+ modelConfig: ModelConfigSchema.optional(),
1038
+ systemPrompt: z6.string().nullish().describe(
1039
+ "Override for eval runs. undefined=default instructions, null=raw agent, string=append to claude_code preset. See https://docs.anthropic.com/en/docs/claude-code/sdk/modifying-system-prompts"
1040
+ )
1011
1041
  });
1012
1042
  var CreateAgentInputSchema = AgentSchema.omit({
1013
1043
  id: true,
@@ -1016,7 +1046,8 @@ var CreateAgentInputSchema = AgentSchema.omit({
1016
1046
  deleted: true
1017
1047
  });
1018
1048
  var UpdateAgentInputSchema = CreateAgentInputSchema.partial().extend({
1019
- modelConfig: ModelConfigSchema.optional().nullable()
1049
+ modelConfig: ModelConfigSchema.optional().nullable(),
1050
+ systemPrompt: z6.string().optional().nullable()
1020
1051
  });
1021
1052
 
1022
1053
  // src/target/skill.ts
@@ -2181,6 +2212,8 @@ function getSystemAssertion(id) {
2181
2212
  return SYSTEM_ASSERTIONS[id];
2182
2213
  }
2183
2214
  export {
2215
+ AGENT_TYPE_LABELS,
2216
+ ALL_AVAILABLE_MODEL_IDS,
2184
2217
  AVAILABLE_MODEL_IDS,
2185
2218
  AVAILABLE_OPENAI_MODEL_IDS,
2186
2219
  AVAILABLE_RUN_COMMANDS,
@@ -2188,7 +2221,10 @@ export {
2188
2221
  AgentRunCommand,
2189
2222
  AgentRunCommandSchema,
2190
2223
  AgentSchema,
2224
+ AgentType,
2225
+ AgentTypeSchema,
2191
2226
  AllowedCommands,
2227
+ AnyModelSchema,
2192
2228
  ApiCallSchema,
2193
2229
  AssertionConfigSchema,
2194
2230
  AssertionParameterSchema,
@@ -2222,6 +2258,7 @@ export {
2222
2258
  CreateTestScenarioInputSchema,
2223
2259
  CreateTestSuiteInputSchema,
2224
2260
  CustomAssertionSchema,
2261
+ DEFAULT_EVALUATOR_SYSTEM_PROMPT,
2225
2262
  DEFAULT_JUDGE_MODEL,
2226
2263
  DiffContentSchema,
2227
2264
  DiffLineSchema,