create-agentmark 0.10.4 → 0.10.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -815,7 +815,7 @@ import type { ToolsInput } from '@mastra/core/agent';
815
815
  import { z } from 'zod';` : `import { tool } from 'ai';
816
816
  import type { Tool } from 'ai';
817
817
  import { z } from 'zod';`;
818
- const createClientCall = isClaudeAgentSdk ? `return createAgentMarkClient<AgentMarkTypes>({ loader, modelRegistry, scores, adapterOptions, mcpServers: { 'customer-support': customerSupportTools } });` : `return createAgentMarkClient<AgentMarkTypes>({ loader, modelRegistry, tools, scores });`;
818
+ const createClientCall = isClaudeAgentSdk ? `return createAgentMarkClient<AgentMarkTypes>({ loader, modelRegistry, evals, adapterOptions, mcpServers: { 'customer-support': customerSupportTools } });` : `return createAgentMarkClient<AgentMarkTypes>({ loader, modelRegistry, tools, evals });`;
819
819
  const toolSchemaField = isMastra ? `parameters: z.object({ query: z.string().describe('The search query') })` : `inputSchema: z.object({ query: z.string().describe('The search query') })`;
820
820
  const toolsReturnType = isMastra ? "ToolsInput" : "Record<string, Tool>";
821
821
  const toolsSetup = isClaudeAgentSdk ? `
@@ -868,7 +868,7 @@ import path from 'node:path';
868
868
  import dotenv from 'dotenv';
869
869
  dotenv.config({ path: path.resolve(__dirname, '.env') });
870
870
  import { createAgentMarkClient, ${modelRegistry} } from "${adapterConfig.package}";
871
- import type { ScoreRegistry } from "@agentmark-ai/prompt-core";
871
+ import type { EvalRegistry } from "@agentmark-ai/prompt-core";
872
872
  ${loaderImport}
873
873
  import AgentMarkTypes from './agentmark.types';
874
874
  ${providerImport}
@@ -878,26 +878,22 @@ ${adapterOptionsImport}
878
878
  ${modelRegistrySetup}
879
879
  ${toolsSetup}
880
880
 
881
- const scores: ScoreRegistry = {
882
- exact_match_json: {
883
- schema: { type: 'boolean' },
884
- description: 'Whether output matches expected JSON exactly',
885
- eval: ({ output, expectedOutput }) => {
886
- if (!expectedOutput) {
887
- return { score: 0, label: 'error', reason: 'No expected output provided', passed: false };
888
- }
889
- try {
890
- const ok = JSON.stringify(output) === JSON.stringify(JSON.parse(expectedOutput));
891
- return {
892
- score: ok ? 1 : 0,
893
- label: ok ? 'correct' : 'incorrect',
894
- reason: ok ? 'Exact match' : 'Mismatch',
895
- passed: ok
896
- };
897
- } catch (e) {
898
- return { score: 0, label: 'error', reason: 'Failed to parse expected output as JSON', passed: false };
899
- }
900
- },
881
+ const evals: EvalRegistry = {
882
+ exact_match_json: async ({ output, expectedOutput }) => {
883
+ if (!expectedOutput) {
884
+ return { score: 0, label: 'error', reason: 'No expected output provided', passed: false };
885
+ }
886
+ try {
887
+ const ok = JSON.stringify(output) === JSON.stringify(JSON.parse(expectedOutput));
888
+ return {
889
+ score: ok ? 1 : 0,
890
+ label: ok ? 'correct' : 'incorrect',
891
+ reason: ok ? 'Exact match' : 'Mismatch',
892
+ passed: ok
893
+ };
894
+ } catch (e) {
895
+ return { score: 0, label: 'error', reason: 'Failed to parse expected output as JSON', passed: false };
896
+ }
901
897
  },
902
898
  };
903
899
 
@@ -1604,6 +1600,7 @@ from pathlib import Path
1604
1600
  from dotenv import load_dotenv
1605
1601
 
1606
1602
  ${loaderImport}
1603
+ from agentmark.prompt_core import EvalRegistry
1607
1604
  from agentmark_claude_agent_sdk import (
1608
1605
  create_claude_agent_client,
1609
1606
  ClaudeAgentModelRegistry,
@@ -1620,9 +1617,7 @@ model_registry.register_providers({
1620
1617
  })
1621
1618
 
1622
1619
 
1623
- # Eval registry \u2014 define evaluation functions for experiments
1624
- # TODO: Update to use scores (ScoreRegistry) once the Python SDK supports it.
1625
- # The TypeScript SDK already uses scores with schema definitions.
1620
+ # Evaluation functions \u2014 used by experiments to score model outputs
1626
1621
  def exact_match_json(params):
1627
1622
  """Check if output matches expected output exactly."""
1628
1623
  output = params.get("output")
@@ -1642,7 +1637,7 @@ def exact_match_json(params):
1642
1637
  except (json.JSONDecodeError, TypeError):
1643
1638
  return {"score": 0, "label": "error", "reason": "Failed to parse JSON", "passed": False}
1644
1639
 
1645
- eval_registry = {
1640
+ evals: EvalRegistry = {
1646
1641
  "exact_match_json": exact_match_json,
1647
1642
  }
1648
1643
 
@@ -1652,7 +1647,7 @@ ${loaderSetup}
1652
1647
  # Claude Agent SDK handles tools natively through the SDK
1653
1648
  client = create_claude_agent_client(
1654
1649
  model_registry=model_registry,
1655
- eval_registry=eval_registry,
1650
+ evals=evals,
1656
1651
  loader=loader,
1657
1652
  )
1658
1653
 
@@ -1671,6 +1666,7 @@ from pathlib import Path
1671
1666
  from dotenv import load_dotenv
1672
1667
 
1673
1668
  ${loaderImport}
1669
+ from agentmark.prompt_core import EvalRegistry
1674
1670
  from agentmark_pydantic_ai_v0 import (
1675
1671
  create_pydantic_ai_client,
1676
1672
  PydanticAIModelRegistry,
@@ -1695,9 +1691,7 @@ model_registry.register_providers({
1695
1691
  tools = []
1696
1692
 
1697
1693
 
1698
- # Eval registry \u2014 define evaluation functions for experiments
1699
- # TODO: Update to use scores (ScoreRegistry) once the Python SDK supports it.
1700
- # The TypeScript SDK already uses scores with schema definitions.
1694
+ # Evaluation functions \u2014 used by experiments to score model outputs
1701
1695
  def exact_match_json(params):
1702
1696
  """Check if output matches expected output exactly."""
1703
1697
  output = params.get("output")
@@ -1717,7 +1711,7 @@ def exact_match_json(params):
1717
1711
  except (json.JSONDecodeError, TypeError):
1718
1712
  return {"score": 0, "label": "error", "reason": "Failed to parse JSON", "passed": False}
1719
1713
 
1720
- eval_registry = {
1714
+ evals: EvalRegistry = {
1721
1715
  "exact_match_json": exact_match_json,
1722
1716
  }
1723
1717
 
@@ -1727,7 +1721,7 @@ ${loaderSetup}
1727
1721
  client = create_pydantic_ai_client(
1728
1722
  model_registry=model_registry,
1729
1723
  tools=tools,
1730
- eval_registry=eval_registry,
1724
+ evals=evals,
1731
1725
  loader=loader,
1732
1726
  )
1733
1727