npm - create-agentmark - Versions diffs - 0.10.4 → 0.10.5 - Mend

create-agentmark 0.10.4 → 0.10.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (5) hide show

package/dist/index.js +26 -32
package/dist/index.js.map +1 -1
package/dist/utils/examples/templates/index.js +18 -22
package/dist/utils/examples/templates/index.js.map +1 -1
package/package.json +1 -1

package/dist/index.js CHANGED Viewed

@@ -815,7 +815,7 @@ import type { ToolsInput } from '@mastra/core/agent';
 import { z } from 'zod';` : `import { tool } from 'ai';
 import type { Tool } from 'ai';
 import { z } from 'zod';`;
-  const createClientCall = isClaudeAgentSdk ? `return createAgentMarkClient<AgentMarkTypes>({ loader, modelRegistry, scores, adapterOptions, mcpServers: { 'customer-support': customerSupportTools } });` : `return createAgentMarkClient<AgentMarkTypes>({ loader, modelRegistry, tools, scores });`;
+  const createClientCall = isClaudeAgentSdk ? `return createAgentMarkClient<AgentMarkTypes>({ loader, modelRegistry, evals, adapterOptions, mcpServers: { 'customer-support': customerSupportTools } });` : `return createAgentMarkClient<AgentMarkTypes>({ loader, modelRegistry, tools, evals });`;
   const toolSchemaField = isMastra ? `parameters: z.object({ query: z.string().describe('The search query') })` : `inputSchema: z.object({ query: z.string().describe('The search query') })`;
   const toolsReturnType = isMastra ? "ToolsInput" : "Record<string, Tool>";
   const toolsSetup = isClaudeAgentSdk ? `
@@ -868,7 +868,7 @@ import path from 'node:path';
 import dotenv from 'dotenv';
 dotenv.config({ path: path.resolve(__dirname, '.env') });
 import { createAgentMarkClient, ${modelRegistry} } from "${adapterConfig.package}";
-import type { ScoreRegistry } from "@agentmark-ai/prompt-core";
+import type { EvalRegistry } from "@agentmark-ai/prompt-core";
 ${loaderImport}
 import AgentMarkTypes from './agentmark.types';
 ${providerImport}
@@ -878,26 +878,22 @@ ${adapterOptionsImport}
 ${modelRegistrySetup}
 ${toolsSetup}
-const scores: ScoreRegistry = {
-  exact_match_json: {
-    schema: { type: 'boolean' },
-    description: 'Whether output matches expected JSON exactly',
-    eval: ({ output, expectedOutput }) => {
-      if (!expectedOutput) {
-        return { score: 0, label: 'error', reason: 'No expected output provided', passed: false };
-      }
-      try {
-        const ok = JSON.stringify(output) === JSON.stringify(JSON.parse(expectedOutput));
-        return {
-          score: ok ? 1 : 0,
-          label: ok ? 'correct' : 'incorrect',
-          reason: ok ? 'Exact match' : 'Mismatch',
-          passed: ok
-        };
-      } catch (e) {
-        return { score: 0, label: 'error', reason: 'Failed to parse expected output as JSON', passed: false };
-      }
-    },
+const evals: EvalRegistry = {
+  exact_match_json: async ({ output, expectedOutput }) => {
+    if (!expectedOutput) {
+      return { score: 0, label: 'error', reason: 'No expected output provided', passed: false };
+    }
+    try {
+      const ok = JSON.stringify(output) === JSON.stringify(JSON.parse(expectedOutput));
+      return {
+        score: ok ? 1 : 0,
+        label: ok ? 'correct' : 'incorrect',
+        reason: ok ? 'Exact match' : 'Mismatch',
+        passed: ok
+      };
+    } catch (e) {
+      return { score: 0, label: 'error', reason: 'Failed to parse expected output as JSON', passed: false };
+    }
   },
 };
@@ -1604,6 +1600,7 @@ from pathlib import Path
 from dotenv import load_dotenv
 ${loaderImport}
+from agentmark.prompt_core import EvalRegistry
 from agentmark_claude_agent_sdk import (
     create_claude_agent_client,
     ClaudeAgentModelRegistry,
@@ -1620,9 +1617,7 @@ model_registry.register_providers({
 })
-# Eval registry \u2014 define evaluation functions for experiments
-# TODO: Update to use scores (ScoreRegistry) once the Python SDK supports it.
-# The TypeScript SDK already uses scores with schema definitions.
+# Evaluation functions \u2014 used by experiments to score model outputs
 def exact_match_json(params):
     """Check if output matches expected output exactly."""
     output = params.get("output")
@@ -1642,7 +1637,7 @@ def exact_match_json(params):
     except (json.JSONDecodeError, TypeError):
         return {"score": 0, "label": "error", "reason": "Failed to parse JSON", "passed": False}
-eval_registry = {
+evals: EvalRegistry = {
     "exact_match_json": exact_match_json,
 }
@@ -1652,7 +1647,7 @@ ${loaderSetup}
 # Claude Agent SDK handles tools natively through the SDK
 client = create_claude_agent_client(
     model_registry=model_registry,
-    eval_registry=eval_registry,
+    evals=evals,
     loader=loader,
 )
@@ -1671,6 +1666,7 @@ from pathlib import Path
 from dotenv import load_dotenv
 ${loaderImport}
+from agentmark.prompt_core import EvalRegistry
 from agentmark_pydantic_ai_v0 import (
     create_pydantic_ai_client,
     PydanticAIModelRegistry,
@@ -1695,9 +1691,7 @@ model_registry.register_providers({
 tools = []
-# Eval registry \u2014 define evaluation functions for experiments
-# TODO: Update to use scores (ScoreRegistry) once the Python SDK supports it.
-# The TypeScript SDK already uses scores with schema definitions.
+# Evaluation functions \u2014 used by experiments to score model outputs
 def exact_match_json(params):
     """Check if output matches expected output exactly."""
     output = params.get("output")
@@ -1717,7 +1711,7 @@ def exact_match_json(params):
     except (json.JSONDecodeError, TypeError):
         return {"score": 0, "label": "error", "reason": "Failed to parse JSON", "passed": False}
-eval_registry = {
+evals: EvalRegistry = {
     "exact_match_json": exact_match_json,
 }
@@ -1727,7 +1721,7 @@ ${loaderSetup}
 client = create_pydantic_ai_client(
     model_registry=model_registry,
     tools=tools,
-    eval_registry=eval_registry,
+    evals=evals,
     loader=loader,
 )