npm - @wix/evalforge-types - Versions diffs - 0.42.0 → 0.44.0 - Mend

@wix/evalforge-types 0.42.0 → 0.44.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

package/build/index.js +38 -10
package/build/index.js.map +2 -2
package/build/index.mjs +33 -10
package/build/index.mjs.map +3 -3
package/build/types/agent/adapter.d.ts +13 -7
package/build/types/common/models.d.ts +4 -0
package/build/types/scenario/assertions.d.ts +1 -1
package/build/types/target/agent.d.ts +35 -7
package/build/types/test/llm.d.ts +2 -2
package/package.json +2 -2

package/build/index.mjs CHANGED Viewed

@@ -949,9 +949,16 @@ var AVAILABLE_OPENAI_MODEL_IDS = Object.values(
 var OpenAIModelSchema = z4.enum(
   AVAILABLE_OPENAI_MODEL_IDS
 );
+var ALL_AVAILABLE_MODEL_IDS = [
+  ...AVAILABLE_MODEL_IDS,
+  ...AVAILABLE_OPENAI_MODEL_IDS
+];
+var AnyModelSchema = z4.enum(
+  ALL_AVAILABLE_MODEL_IDS
+);
 var nullToUndefined = (val) => val === null ? void 0 : val;
 var ModelConfigSchema = z4.object({
-  model: ClaudeModelSchema,
+  model: AnyModelSchema,
   temperature: z4.preprocess(
     nullToUndefined,
     z4.number().min(0).max(1).optional()
@@ -1003,6 +1010,15 @@ var DEFAULT_EVALUATOR_SYSTEM_PROMPT = `IMPORTANT: This is an automated evaluatio
 6. If you encounter an error, fix it directly rather than starting over.
 7. Your project root is the current working directory. Always create and modify source code files relative to the project root, NOT inside .claude/skills/ directories.
 8. Before finishing, run the project's package manager install command (e.g. \`npm install\`, \`yarn install\`, or \`pnpm install\` depending on the lockfile present) to ensure all dependencies are installed and the project is ready to build.`;
+var AgentType = {
+  CLI: "cli",
+  SDK: "sdk"
+};
+var AgentTypeSchema = z6.enum([AgentType.CLI, AgentType.SDK]);
+var AGENT_TYPE_LABELS = {
+  [AgentType.CLI]: "CLI Agent",
+  [AgentType.SDK]: "Simple Agent"
+};
 var AgentRunCommand = /* @__PURE__ */ ((AgentRunCommand2) => {
   AgentRunCommand2["CLAUDE"] = "claude";
   return AgentRunCommand2;
@@ -1013,8 +1029,10 @@ var RUN_COMMAND_LABELS = {
 };
 var AgentRunCommandSchema = z6.nativeEnum(AgentRunCommand);
 var AgentSchema = TargetSchema.extend({
-  /** Command to run the agent */
-  runCommand: AgentRunCommandSchema,
+  /** Agent type: 'cli' for external CLI tools, 'sdk' for in-process SDK agents */
+  agentType: AgentTypeSchema.default(AgentType.CLI),
+  /** Command to run the agent (required for CLI agents, absent for SDK agents) */
+  runCommand: AgentRunCommandSchema.optional(),
   /** Optional model configuration override */
   modelConfig: ModelConfigSchema.optional(),
   systemPrompt: z6.string().nullish().describe(
@@ -1360,8 +1378,8 @@ var LlmJudgeAssertionSchema = z21.object({
   type: z21.literal("llm_judge"),
   /** Prompt template; placeholders: {{output}}, {{cwd}}, {{changedFiles}}, {{trace}} */
   prompt: z21.string(),
-  /** Minimum score to pass (0-100, default 70) */
-  minScore: z21.number().int().min(0).max(100).optional(),
+  /** Minimum score to pass (0-10, default 7) */
+  minScore: z21.number().int().min(0).max(10).optional(),
   /** Model for the judge (e.g. claude-3-5-haiku) */
   model: z21.string().optional(),
   maxTokens: z21.number().int().optional(),
@@ -1492,8 +1510,8 @@ var LlmJudgeConfigSchema = z23.object({
    * - Custom parameters defined in the parameters array
    */
   prompt: z23.string().min(1),
-  /** Minimum score to pass (0-100, default 70) */
-  minScore: z23.number().int().min(0).max(100).optional(),
+  /** Minimum score to pass (0-10, default 7) */
+  minScore: z23.number().int().min(0).max(10).optional(),
   /** Model for the judge (e.g. claude-3-5-haiku-20241022) */
   model: z23.string().optional(),
   /** Max output tokens */
@@ -2161,7 +2179,7 @@ var SYSTEM_ASSERTIONS = {
   [SYSTEM_ASSERTION_IDS.LLM_JUDGE]: {
     id: SYSTEM_ASSERTION_IDS.LLM_JUDGE,
     name: "LLM Judge",
-    description: "LLM evaluates the output and assigns a score (0-100)",
+    description: "LLM evaluates the output and assigns a score (0-10)",
     type: "llm_judge",
     parameters: [
       {
@@ -2173,10 +2191,10 @@ var SYSTEM_ASSERTIONS = {
       },
       {
         name: "minScore",
-        label: "Minimum Score (0-100)",
+        label: "Minimum Score (0-10)",
         type: "number",
         required: false,
-        defaultValue: 70
+        defaultValue: 7
       },
       {
         name: "model",
@@ -2194,6 +2212,8 @@ function getSystemAssertion(id) {
   return SYSTEM_ASSERTIONS[id];
 }
 export {
+  AGENT_TYPE_LABELS,
+  ALL_AVAILABLE_MODEL_IDS,
   AVAILABLE_MODEL_IDS,
   AVAILABLE_OPENAI_MODEL_IDS,
   AVAILABLE_RUN_COMMANDS,
@@ -2201,7 +2221,10 @@ export {
   AgentRunCommand,
   AgentRunCommandSchema,
   AgentSchema,
+  AgentType,
+  AgentTypeSchema,
   AllowedCommands,
+  AnyModelSchema,
   ApiCallSchema,
   AssertionConfigSchema,
   AssertionParameterSchema,