create-agentmark 0.10.3 → 0.10.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js
CHANGED
|
@@ -815,7 +815,7 @@ import type { ToolsInput } from '@mastra/core/agent';
|
|
|
815
815
|
import { z } from 'zod';` : `import { tool } from 'ai';
|
|
816
816
|
import type { Tool } from 'ai';
|
|
817
817
|
import { z } from 'zod';`;
|
|
818
|
-
const createClientCall = isClaudeAgentSdk ? `return createAgentMarkClient<AgentMarkTypes>({ loader, modelRegistry,
|
|
818
|
+
const createClientCall = isClaudeAgentSdk ? `return createAgentMarkClient<AgentMarkTypes>({ loader, modelRegistry, evals, adapterOptions, mcpServers: { 'customer-support': customerSupportTools } });` : `return createAgentMarkClient<AgentMarkTypes>({ loader, modelRegistry, tools, evals });`;
|
|
819
819
|
const toolSchemaField = isMastra ? `parameters: z.object({ query: z.string().describe('The search query') })` : `inputSchema: z.object({ query: z.string().describe('The search query') })`;
|
|
820
820
|
const toolsReturnType = isMastra ? "ToolsInput" : "Record<string, Tool>";
|
|
821
821
|
const toolsSetup = isClaudeAgentSdk ? `
|
|
@@ -868,7 +868,7 @@ import path from 'node:path';
|
|
|
868
868
|
import dotenv from 'dotenv';
|
|
869
869
|
dotenv.config({ path: path.resolve(__dirname, '.env') });
|
|
870
870
|
import { createAgentMarkClient, ${modelRegistry} } from "${adapterConfig.package}";
|
|
871
|
-
import type {
|
|
871
|
+
import type { EvalRegistry } from "@agentmark-ai/prompt-core";
|
|
872
872
|
${loaderImport}
|
|
873
873
|
import AgentMarkTypes from './agentmark.types';
|
|
874
874
|
${providerImport}
|
|
@@ -878,26 +878,22 @@ ${adapterOptionsImport}
|
|
|
878
878
|
${modelRegistrySetup}
|
|
879
879
|
${toolsSetup}
|
|
880
880
|
|
|
881
|
-
const
|
|
882
|
-
exact_match_json: {
|
|
883
|
-
|
|
884
|
-
|
|
885
|
-
|
|
886
|
-
|
|
887
|
-
|
|
888
|
-
|
|
889
|
-
|
|
890
|
-
|
|
891
|
-
|
|
892
|
-
|
|
893
|
-
|
|
894
|
-
|
|
895
|
-
|
|
896
|
-
|
|
897
|
-
} catch (e) {
|
|
898
|
-
return { score: 0, label: 'error', reason: 'Failed to parse expected output as JSON', passed: false };
|
|
899
|
-
}
|
|
900
|
-
},
|
|
881
|
+
const evals: EvalRegistry = {
|
|
882
|
+
exact_match_json: async ({ output, expectedOutput }) => {
|
|
883
|
+
if (!expectedOutput) {
|
|
884
|
+
return { score: 0, label: 'error', reason: 'No expected output provided', passed: false };
|
|
885
|
+
}
|
|
886
|
+
try {
|
|
887
|
+
const ok = JSON.stringify(output) === JSON.stringify(JSON.parse(expectedOutput));
|
|
888
|
+
return {
|
|
889
|
+
score: ok ? 1 : 0,
|
|
890
|
+
label: ok ? 'correct' : 'incorrect',
|
|
891
|
+
reason: ok ? 'Exact match' : 'Mismatch',
|
|
892
|
+
passed: ok
|
|
893
|
+
};
|
|
894
|
+
} catch (e) {
|
|
895
|
+
return { score: 0, label: 'error', reason: 'Failed to parse expected output as JSON', passed: false };
|
|
896
|
+
}
|
|
901
897
|
},
|
|
902
898
|
};
|
|
903
899
|
|
|
@@ -1604,6 +1600,7 @@ from pathlib import Path
|
|
|
1604
1600
|
from dotenv import load_dotenv
|
|
1605
1601
|
|
|
1606
1602
|
${loaderImport}
|
|
1603
|
+
from agentmark.prompt_core import EvalRegistry
|
|
1607
1604
|
from agentmark_claude_agent_sdk import (
|
|
1608
1605
|
create_claude_agent_client,
|
|
1609
1606
|
ClaudeAgentModelRegistry,
|
|
@@ -1620,9 +1617,7 @@ model_registry.register_providers({
|
|
|
1620
1617
|
})
|
|
1621
1618
|
|
|
1622
1619
|
|
|
1623
|
-
#
|
|
1624
|
-
# TODO: Update to use scores (ScoreRegistry) once the Python SDK supports it.
|
|
1625
|
-
# The TypeScript SDK already uses scores with schema definitions.
|
|
1620
|
+
# Evaluation functions \u2014 used by experiments to score model outputs
|
|
1626
1621
|
def exact_match_json(params):
|
|
1627
1622
|
"""Check if output matches expected output exactly."""
|
|
1628
1623
|
output = params.get("output")
|
|
@@ -1642,7 +1637,7 @@ def exact_match_json(params):
|
|
|
1642
1637
|
except (json.JSONDecodeError, TypeError):
|
|
1643
1638
|
return {"score": 0, "label": "error", "reason": "Failed to parse JSON", "passed": False}
|
|
1644
1639
|
|
|
1645
|
-
|
|
1640
|
+
evals: EvalRegistry = {
|
|
1646
1641
|
"exact_match_json": exact_match_json,
|
|
1647
1642
|
}
|
|
1648
1643
|
|
|
@@ -1652,7 +1647,7 @@ ${loaderSetup}
|
|
|
1652
1647
|
# Claude Agent SDK handles tools natively through the SDK
|
|
1653
1648
|
client = create_claude_agent_client(
|
|
1654
1649
|
model_registry=model_registry,
|
|
1655
|
-
|
|
1650
|
+
evals=evals,
|
|
1656
1651
|
loader=loader,
|
|
1657
1652
|
)
|
|
1658
1653
|
|
|
@@ -1671,6 +1666,7 @@ from pathlib import Path
|
|
|
1671
1666
|
from dotenv import load_dotenv
|
|
1672
1667
|
|
|
1673
1668
|
${loaderImport}
|
|
1669
|
+
from agentmark.prompt_core import EvalRegistry
|
|
1674
1670
|
from agentmark_pydantic_ai_v0 import (
|
|
1675
1671
|
create_pydantic_ai_client,
|
|
1676
1672
|
PydanticAIModelRegistry,
|
|
@@ -1695,9 +1691,7 @@ model_registry.register_providers({
|
|
|
1695
1691
|
tools = []
|
|
1696
1692
|
|
|
1697
1693
|
|
|
1698
|
-
#
|
|
1699
|
-
# TODO: Update to use scores (ScoreRegistry) once the Python SDK supports it.
|
|
1700
|
-
# The TypeScript SDK already uses scores with schema definitions.
|
|
1694
|
+
# Evaluation functions \u2014 used by experiments to score model outputs
|
|
1701
1695
|
def exact_match_json(params):
|
|
1702
1696
|
"""Check if output matches expected output exactly."""
|
|
1703
1697
|
output = params.get("output")
|
|
@@ -1717,7 +1711,7 @@ def exact_match_json(params):
|
|
|
1717
1711
|
except (json.JSONDecodeError, TypeError):
|
|
1718
1712
|
return {"score": 0, "label": "error", "reason": "Failed to parse JSON", "passed": False}
|
|
1719
1713
|
|
|
1720
|
-
|
|
1714
|
+
evals: EvalRegistry = {
|
|
1721
1715
|
"exact_match_json": exact_match_json,
|
|
1722
1716
|
}
|
|
1723
1717
|
|
|
@@ -1727,7 +1721,7 @@ ${loaderSetup}
|
|
|
1727
1721
|
client = create_pydantic_ai_client(
|
|
1728
1722
|
model_registry=model_registry,
|
|
1729
1723
|
tools=tools,
|
|
1730
|
-
|
|
1724
|
+
evals=evals,
|
|
1731
1725
|
loader=loader,
|
|
1732
1726
|
)
|
|
1733
1727
|
|