create-agentmark 0.10.4 → 0.10.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js
CHANGED
|
@@ -71,7 +71,7 @@ sdk.initTracing({ disableBatch: true });
|
|
|
71
71
|
`;
|
|
72
72
|
const staticTracingInit = `
|
|
73
73
|
// Initialize tracing - traces will be sent to local dev server
|
|
74
|
-
// Make sure to run "
|
|
74
|
+
// Make sure to run "npx agentmark dev" in another terminal first
|
|
75
75
|
// To disable tracing, comment out sdk.initTracing() below
|
|
76
76
|
const sdk = new AgentMarkSDK({
|
|
77
77
|
apiKey: "",
|
|
@@ -242,7 +242,7 @@ AGENTMARK_APP_ID=your_agentmark_app_id
|
|
|
242
242
|
`;
|
|
243
243
|
return `${apiKeyName}=${apiKeyValue}
|
|
244
244
|
${cloudEnvVars}
|
|
245
|
-
# Learn more: https://docs.agentmark.co/
|
|
245
|
+
# Learn more: https://docs.agentmark.co/getting-started/quickstart
|
|
246
246
|
`;
|
|
247
247
|
};
|
|
248
248
|
|
|
@@ -487,9 +487,10 @@ var setupPackageJson = (targetPath = ".", deploymentMode = "cloud", projectInfo
|
|
|
487
487
|
console.log("Creating package.json...");
|
|
488
488
|
execSync("npm init -y", { cwd: targetPath });
|
|
489
489
|
}
|
|
490
|
+
const demoScript = deploymentMode === "static" ? "npx agentmark build --out dist/agentmark && npx tsx index.ts" : "npx tsx index.ts";
|
|
490
491
|
if (isExistingProject && fs2.existsSync(packageJsonPath)) {
|
|
491
492
|
const scriptsToAdd = {
|
|
492
|
-
"demo":
|
|
493
|
+
"demo": demoScript,
|
|
493
494
|
"agentmark": "agentmark"
|
|
494
495
|
};
|
|
495
496
|
if (deploymentMode === "static") {
|
|
@@ -509,9 +510,12 @@ var setupPackageJson = (targetPath = ".", deploymentMode = "cloud", projectInfo
|
|
|
509
510
|
const pkgJson = fs2.readJsonSync(packageJsonPath);
|
|
510
511
|
pkgJson.name = pkgJson.name === "test" || !pkgJson.name ? "agentmark-example-app" : pkgJson.name;
|
|
511
512
|
pkgJson.description = pkgJson.description || "A simple Node.js app using the Agentmark SDK";
|
|
513
|
+
if (pkgJson.type === "commonjs") {
|
|
514
|
+
delete pkgJson.type;
|
|
515
|
+
}
|
|
512
516
|
const scripts = {
|
|
513
517
|
...pkgJson.scripts,
|
|
514
|
-
"demo":
|
|
518
|
+
"demo": demoScript,
|
|
515
519
|
"agentmark": "agentmark"
|
|
516
520
|
};
|
|
517
521
|
if (deploymentMode === "static") {
|
|
@@ -641,7 +645,7 @@ object_config:
|
|
|
641
645
|
- names
|
|
642
646
|
test_settings:
|
|
643
647
|
dataset: party.jsonl
|
|
644
|
-
|
|
648
|
+
evals:
|
|
645
649
|
- exact_match_json
|
|
646
650
|
props:
|
|
647
651
|
party_text: "We're having a party with Alice, Bob, and Carol."
|
|
@@ -757,12 +761,16 @@ var getClientConfigContent = (options) => {
|
|
|
757
761
|
import { FileLoader } from "@agentmark-ai/loader-file";`;
|
|
758
762
|
const loaderSetup = deploymentMode === "cloud" ? ` // ApiLoader works for both development and production
|
|
759
763
|
// - Development: 'agentmark dev' sets AGENTMARK_BASE_URL to localhost
|
|
760
|
-
// - Production: Set AGENTMARK_API_KEY and AGENTMARK_APP_ID for cloud
|
|
764
|
+
// - Production: Set AGENTMARK_API_KEY and AGENTMARK_APP_ID for cloud.
|
|
765
|
+
// AGENTMARK_BASE_URL overrides the default https://api.agentmark.co
|
|
766
|
+
// target \u2014 managed deployments use this to point back at the gateway
|
|
767
|
+
// that dispatched the job.
|
|
761
768
|
const loader = process.env.NODE_ENV === 'development'
|
|
762
769
|
? ApiLoader.local({ baseUrl: process.env.AGENTMARK_BASE_URL || 'http://localhost:9418' })
|
|
763
770
|
: ApiLoader.cloud({
|
|
764
771
|
apiKey: process.env.AGENTMARK_API_KEY!,
|
|
765
772
|
appId: process.env.AGENTMARK_APP_ID!,
|
|
773
|
+
baseUrl: process.env.AGENTMARK_BASE_URL,
|
|
766
774
|
});` : ` const loader = process.env.NODE_ENV === 'development'
|
|
767
775
|
? ApiLoader.local({ baseUrl: process.env.AGENTMARK_BASE_URL || 'http://localhost:9418' })
|
|
768
776
|
: new FileLoader('./dist/agentmark');`;
|
|
@@ -815,7 +823,7 @@ import type { ToolsInput } from '@mastra/core/agent';
|
|
|
815
823
|
import { z } from 'zod';` : `import { tool } from 'ai';
|
|
816
824
|
import type { Tool } from 'ai';
|
|
817
825
|
import { z } from 'zod';`;
|
|
818
|
-
const createClientCall = isClaudeAgentSdk ? `return createAgentMarkClient<AgentMarkTypes>({ loader, modelRegistry,
|
|
826
|
+
const createClientCall = isClaudeAgentSdk ? `return createAgentMarkClient<AgentMarkTypes>({ loader, modelRegistry, evals, adapterOptions, mcpServers: { 'customer-support': customerSupportTools } });` : `return createAgentMarkClient<AgentMarkTypes>({ loader, modelRegistry, tools, evals });`;
|
|
819
827
|
const toolSchemaField = isMastra ? `parameters: z.object({ query: z.string().describe('The search query') })` : `inputSchema: z.object({ query: z.string().describe('The search query') })`;
|
|
820
828
|
const toolsReturnType = isMastra ? "ToolsInput" : "Record<string, Tool>";
|
|
821
829
|
const toolsSetup = isClaudeAgentSdk ? `
|
|
@@ -868,7 +876,7 @@ import path from 'node:path';
|
|
|
868
876
|
import dotenv from 'dotenv';
|
|
869
877
|
dotenv.config({ path: path.resolve(__dirname, '.env') });
|
|
870
878
|
import { createAgentMarkClient, ${modelRegistry} } from "${adapterConfig.package}";
|
|
871
|
-
import type {
|
|
879
|
+
import type { EvalRegistry } from "@agentmark-ai/prompt-core";
|
|
872
880
|
${loaderImport}
|
|
873
881
|
import AgentMarkTypes from './agentmark.types';
|
|
874
882
|
${providerImport}
|
|
@@ -878,26 +886,22 @@ ${adapterOptionsImport}
|
|
|
878
886
|
${modelRegistrySetup}
|
|
879
887
|
${toolsSetup}
|
|
880
888
|
|
|
881
|
-
const
|
|
882
|
-
exact_match_json: {
|
|
883
|
-
|
|
884
|
-
|
|
885
|
-
|
|
886
|
-
|
|
887
|
-
|
|
888
|
-
|
|
889
|
-
|
|
890
|
-
|
|
891
|
-
|
|
892
|
-
|
|
893
|
-
|
|
894
|
-
|
|
895
|
-
|
|
896
|
-
|
|
897
|
-
} catch (e) {
|
|
898
|
-
return { score: 0, label: 'error', reason: 'Failed to parse expected output as JSON', passed: false };
|
|
899
|
-
}
|
|
900
|
-
},
|
|
889
|
+
const evals: EvalRegistry = {
|
|
890
|
+
exact_match_json: async ({ output, expectedOutput }) => {
|
|
891
|
+
if (!expectedOutput) {
|
|
892
|
+
return { score: 0, label: 'error', reason: 'No expected output provided', passed: false };
|
|
893
|
+
}
|
|
894
|
+
try {
|
|
895
|
+
const ok = JSON.stringify(output) === JSON.stringify(JSON.parse(expectedOutput));
|
|
896
|
+
return {
|
|
897
|
+
score: ok ? 1 : 0,
|
|
898
|
+
label: ok ? 'correct' : 'incorrect',
|
|
899
|
+
reason: ok ? 'Exact match' : 'Mismatch',
|
|
900
|
+
passed: ok
|
|
901
|
+
};
|
|
902
|
+
} catch (e) {
|
|
903
|
+
return { score: 0, label: 'error', reason: 'Failed to parse expected output as JSON', passed: false };
|
|
904
|
+
}
|
|
901
905
|
},
|
|
902
906
|
};
|
|
903
907
|
|
|
@@ -1140,6 +1144,19 @@ var createExampleApp = async (client, targetPath = ".", apiKey = "", adapter = "
|
|
|
1140
1144
|
`${targetPath}/agentmark.client.ts`,
|
|
1141
1145
|
getClientConfigContent({ provider: modelProvider, adapter, deploymentMode })
|
|
1142
1146
|
);
|
|
1147
|
+
const gitignoreEntries = ["node_modules/", ".env", "*.agentmark-outputs/", "dist/"];
|
|
1148
|
+
if (shouldMergeFile(".gitignore", projectInfo, resolutions)) {
|
|
1149
|
+
const result = appendGitignore(targetPath, gitignoreEntries);
|
|
1150
|
+
if (result.added.length > 0) {
|
|
1151
|
+
console.log(`\u2705 Added to .gitignore: ${result.added.join(", ")}`);
|
|
1152
|
+
}
|
|
1153
|
+
if (result.skipped.length > 0) {
|
|
1154
|
+
console.log(`\u23ED\uFE0F Already in .gitignore: ${result.skipped.join(", ")}`);
|
|
1155
|
+
}
|
|
1156
|
+
} else {
|
|
1157
|
+
const gitignore = gitignoreEntries.join("\n");
|
|
1158
|
+
fs4.writeFileSync(`${targetPath}/.gitignore`, gitignore);
|
|
1159
|
+
}
|
|
1143
1160
|
if (shouldMergeFile(".env", projectInfo, resolutions)) {
|
|
1144
1161
|
const envVars = {};
|
|
1145
1162
|
const apiKeyEnvVar = adapter === "claude-agent-sdk" ? "ANTHROPIC_API_KEY" : "OPENAI_API_KEY";
|
|
@@ -1162,19 +1179,6 @@ var createExampleApp = async (client, targetPath = ".", apiKey = "", adapter = "
|
|
|
1162
1179
|
} else {
|
|
1163
1180
|
fs4.writeFileSync(`${targetPath}/.env`, getEnvFileContent(modelProvider, apiKey, adapter, deploymentMode));
|
|
1164
1181
|
}
|
|
1165
|
-
const gitignoreEntries = ["node_modules/", ".env", "*.agentmark-outputs/", "dist/"];
|
|
1166
|
-
if (shouldMergeFile(".gitignore", projectInfo, resolutions)) {
|
|
1167
|
-
const result = appendGitignore(targetPath, gitignoreEntries);
|
|
1168
|
-
if (result.added.length > 0) {
|
|
1169
|
-
console.log(`\u2705 Added to .gitignore: ${result.added.join(", ")}`);
|
|
1170
|
-
}
|
|
1171
|
-
if (result.skipped.length > 0) {
|
|
1172
|
-
console.log(`\u23ED\uFE0F Already in .gitignore: ${result.skipped.join(", ")}`);
|
|
1173
|
-
}
|
|
1174
|
-
} else {
|
|
1175
|
-
const gitignore = gitignoreEntries.join("\n");
|
|
1176
|
-
fs4.writeFileSync(`${targetPath}/.gitignore`, gitignore);
|
|
1177
|
-
}
|
|
1178
1182
|
if (!isExistingProject) {
|
|
1179
1183
|
fs4.writeFileSync(
|
|
1180
1184
|
`${targetPath}/index.ts`,
|
|
@@ -1223,15 +1227,23 @@ sdk.initTracing({ disableBatch: true });
|
|
|
1223
1227
|
const adapter = new ${handlerClass}(client as any);
|
|
1224
1228
|
|
|
1225
1229
|
export default async function handler(request: {
|
|
1226
|
-
type: 'prompt-run' | 'dataset-run';
|
|
1230
|
+
type: 'prompt-run' | 'dataset-run' | 'get-evals';
|
|
1227
1231
|
data: {
|
|
1228
|
-
ast
|
|
1232
|
+
ast?: any;
|
|
1229
1233
|
customProps?: Record<string, unknown>;
|
|
1230
1234
|
options?: { shouldStream?: boolean };
|
|
1231
1235
|
experimentId?: string;
|
|
1232
1236
|
datasetPath?: string;
|
|
1233
1237
|
};
|
|
1234
1238
|
}) {
|
|
1239
|
+
if (request.type === 'get-evals') {
|
|
1240
|
+
return {
|
|
1241
|
+
type: 'evals',
|
|
1242
|
+
result: JSON.stringify(Object.keys(client.getEvalRegistry())),
|
|
1243
|
+
traceId: '',
|
|
1244
|
+
};
|
|
1245
|
+
}
|
|
1246
|
+
|
|
1235
1247
|
if (request.type === 'prompt-run') {
|
|
1236
1248
|
return adapter.runPrompt(request.data.ast, {
|
|
1237
1249
|
shouldStream: request.data.options?.shouldStream,
|
|
@@ -1331,20 +1343,11 @@ main().catch((err) => {
|
|
|
1331
1343
|
console.log("\n" + "\u2550".repeat(70));
|
|
1332
1344
|
console.log("Next Steps");
|
|
1333
1345
|
console.log("\u2550".repeat(70));
|
|
1334
|
-
const runCmd = packageManager?.runCmd ?? "npm run";
|
|
1335
|
-
const pkgJsonPath = path2.join(targetPath, "package.json");
|
|
1336
|
-
let agentmarkScriptName = "agentmark";
|
|
1337
|
-
if (fs4.existsSync(pkgJsonPath)) {
|
|
1338
|
-
const pkgJson = fs4.readJsonSync(pkgJsonPath);
|
|
1339
|
-
if (pkgJson.scripts?.["agentmark:agentmark"]) {
|
|
1340
|
-
agentmarkScriptName = "agentmark:agentmark";
|
|
1341
|
-
}
|
|
1342
|
-
}
|
|
1343
1346
|
console.log("\n Get Started:");
|
|
1344
1347
|
if (folderName !== "." && folderName !== "./" && !isExistingProject) {
|
|
1345
1348
|
console.log(` $ cd ${folderName}`);
|
|
1346
1349
|
}
|
|
1347
|
-
console.log(` $
|
|
1350
|
+
console.log(` $ npx agentmark dev
|
|
1348
1351
|
`);
|
|
1349
1352
|
console.log("\u2500".repeat(70));
|
|
1350
1353
|
console.log("Resources");
|
|
@@ -1471,7 +1474,8 @@ var setupMCPServer2 = (client, targetPath) => {
|
|
|
1471
1474
|
return;
|
|
1472
1475
|
}
|
|
1473
1476
|
};
|
|
1474
|
-
var getPyprojectContent = (projectName, adapter) => {
|
|
1477
|
+
var getPyprojectContent = (projectName, adapter, deploymentMode) => {
|
|
1478
|
+
const pyModules = deploymentMode === "cloud" ? `["agentmark_client", "main", "handler"]` : `["agentmark_client", "main"]`;
|
|
1475
1479
|
if (adapter === "claude-agent-sdk") {
|
|
1476
1480
|
return `[project]
|
|
1477
1481
|
name = "${projectName}"
|
|
@@ -1479,9 +1483,9 @@ version = "0.1.0"
|
|
|
1479
1483
|
description = "An AgentMark application using Claude Agent SDK"
|
|
1480
1484
|
requires-python = ">=3.12"
|
|
1481
1485
|
dependencies = [
|
|
1482
|
-
"agentmark-sdk>=0.
|
|
1483
|
-
"agentmark-claude-agent-sdk-v0>=0.1.
|
|
1484
|
-
"agentmark-prompt-core>=0.1.
|
|
1486
|
+
"agentmark-sdk>=0.2.0",
|
|
1487
|
+
"agentmark-claude-agent-sdk-v0>=0.1.4",
|
|
1488
|
+
"agentmark-prompt-core>=0.1.2",
|
|
1485
1489
|
"python-dotenv>=1.0.0",
|
|
1486
1490
|
"claude-agent-sdk>=0.1.0",
|
|
1487
1491
|
]
|
|
@@ -1494,8 +1498,11 @@ dev = [
|
|
|
1494
1498
|
]
|
|
1495
1499
|
|
|
1496
1500
|
[build-system]
|
|
1497
|
-
requires = ["
|
|
1498
|
-
build-backend = "
|
|
1501
|
+
requires = ["setuptools>=61", "wheel"]
|
|
1502
|
+
build-backend = "setuptools.build_meta"
|
|
1503
|
+
|
|
1504
|
+
[tool.setuptools]
|
|
1505
|
+
py-modules = ${pyModules}
|
|
1499
1506
|
|
|
1500
1507
|
[tool.pytest.ini_options]
|
|
1501
1508
|
asyncio_mode = "auto"
|
|
@@ -1510,11 +1517,11 @@ version = "0.1.0"
|
|
|
1510
1517
|
description = "An AgentMark application using Pydantic AI"
|
|
1511
1518
|
requires-python = ">=3.12"
|
|
1512
1519
|
dependencies = [
|
|
1513
|
-
"agentmark-sdk>=0.
|
|
1514
|
-
"agentmark-pydantic-ai-v0>=0.1.
|
|
1515
|
-
"agentmark-prompt-core>=0.1.
|
|
1520
|
+
"agentmark-sdk>=0.2.0",
|
|
1521
|
+
"agentmark-pydantic-ai-v0>=0.1.4",
|
|
1522
|
+
"agentmark-prompt-core>=0.1.2",
|
|
1516
1523
|
"python-dotenv>=1.0.0",
|
|
1517
|
-
"pydantic-ai[openai]>=
|
|
1524
|
+
"pydantic-ai-slim[openai]>=1.0,<2.0",
|
|
1518
1525
|
]
|
|
1519
1526
|
|
|
1520
1527
|
[project.optional-dependencies]
|
|
@@ -1523,12 +1530,13 @@ dev = [
|
|
|
1523
1530
|
"pytest-asyncio>=0.21",
|
|
1524
1531
|
"mypy>=1.0",
|
|
1525
1532
|
]
|
|
1526
|
-
anthropic = ["pydantic-ai[anthropic]"]
|
|
1527
|
-
gemini = ["pydantic-ai[gemini]"]
|
|
1528
1533
|
|
|
1529
1534
|
[build-system]
|
|
1530
|
-
requires = ["
|
|
1531
|
-
build-backend = "
|
|
1535
|
+
requires = ["setuptools>=61", "wheel"]
|
|
1536
|
+
build-backend = "setuptools.build_meta"
|
|
1537
|
+
|
|
1538
|
+
[tool.setuptools]
|
|
1539
|
+
py-modules = ${pyModules}
|
|
1532
1540
|
|
|
1533
1541
|
[tool.pytest.ini_options]
|
|
1534
1542
|
asyncio_mode = "auto"
|
|
@@ -1538,8 +1546,8 @@ strict = true
|
|
|
1538
1546
|
`;
|
|
1539
1547
|
};
|
|
1540
1548
|
var getHandlerPyContent = (adapter) => {
|
|
1541
|
-
const webhookClass = adapter === "claude-agent-sdk" ? "
|
|
1542
|
-
const webhookImport = adapter === "claude-agent-sdk" ? "from
|
|
1549
|
+
const webhookClass = adapter === "claude-agent-sdk" ? "ClaudeAgentWebhookHandler" : "PydanticAIWebhookHandler";
|
|
1550
|
+
const webhookImport = adapter === "claude-agent-sdk" ? "from agentmark_claude_agent_sdk_v0 import ClaudeAgentWebhookHandler" : "from agentmark_pydantic_ai_v0 import PydanticAIWebhookHandler";
|
|
1543
1551
|
return `"""AgentMark handler for managed cloud deployments.
|
|
1544
1552
|
|
|
1545
1553
|
This file is used by the AgentMark platform to execute prompts and experiments
|
|
@@ -1564,10 +1572,18 @@ adapter = ${webhookClass}(client)
|
|
|
1564
1572
|
|
|
1565
1573
|
|
|
1566
1574
|
async def handler(request: dict):
|
|
1567
|
-
"""Handle prompt-run
|
|
1575
|
+
"""Handle prompt-run, dataset-run, and get-evals requests from the platform."""
|
|
1568
1576
|
req_type = request.get("type")
|
|
1569
1577
|
data = request.get("data", {})
|
|
1570
1578
|
|
|
1579
|
+
if req_type == "get-evals":
|
|
1580
|
+
import json
|
|
1581
|
+
return {
|
|
1582
|
+
"type": "evals",
|
|
1583
|
+
"result": json.dumps(list(client.get_eval_registry().keys())),
|
|
1584
|
+
"traceId": "",
|
|
1585
|
+
}
|
|
1586
|
+
|
|
1571
1587
|
if req_type == "prompt-run":
|
|
1572
1588
|
return await adapter.run_prompt(data["ast"], {
|
|
1573
1589
|
"shouldStream": data.get("options", {}).get("shouldStream", True),
|
|
@@ -1604,7 +1620,8 @@ from pathlib import Path
|
|
|
1604
1620
|
from dotenv import load_dotenv
|
|
1605
1621
|
|
|
1606
1622
|
${loaderImport}
|
|
1607
|
-
from
|
|
1623
|
+
from agentmark.prompt_core import EvalRegistry
|
|
1624
|
+
from agentmark_claude_agent_sdk_v0 import (
|
|
1608
1625
|
create_claude_agent_client,
|
|
1609
1626
|
ClaudeAgentModelRegistry,
|
|
1610
1627
|
)
|
|
@@ -1620,9 +1637,7 @@ model_registry.register_providers({
|
|
|
1620
1637
|
})
|
|
1621
1638
|
|
|
1622
1639
|
|
|
1623
|
-
#
|
|
1624
|
-
# TODO: Update to use scores (ScoreRegistry) once the Python SDK supports it.
|
|
1625
|
-
# The TypeScript SDK already uses scores with schema definitions.
|
|
1640
|
+
# Evaluation functions \u2014 used by experiments to score model outputs
|
|
1626
1641
|
def exact_match_json(params):
|
|
1627
1642
|
"""Check if output matches expected output exactly."""
|
|
1628
1643
|
output = params.get("output")
|
|
@@ -1642,7 +1657,7 @@ def exact_match_json(params):
|
|
|
1642
1657
|
except (json.JSONDecodeError, TypeError):
|
|
1643
1658
|
return {"score": 0, "label": "error", "reason": "Failed to parse JSON", "passed": False}
|
|
1644
1659
|
|
|
1645
|
-
|
|
1660
|
+
evals: EvalRegistry = {
|
|
1646
1661
|
"exact_match_json": exact_match_json,
|
|
1647
1662
|
}
|
|
1648
1663
|
|
|
@@ -1652,7 +1667,7 @@ ${loaderSetup}
|
|
|
1652
1667
|
# Claude Agent SDK handles tools natively through the SDK
|
|
1653
1668
|
client = create_claude_agent_client(
|
|
1654
1669
|
model_registry=model_registry,
|
|
1655
|
-
|
|
1670
|
+
evals=evals,
|
|
1656
1671
|
loader=loader,
|
|
1657
1672
|
)
|
|
1658
1673
|
|
|
@@ -1671,6 +1686,7 @@ from pathlib import Path
|
|
|
1671
1686
|
from dotenv import load_dotenv
|
|
1672
1687
|
|
|
1673
1688
|
${loaderImport}
|
|
1689
|
+
from agentmark.prompt_core import EvalRegistry
|
|
1674
1690
|
from agentmark_pydantic_ai_v0 import (
|
|
1675
1691
|
create_pydantic_ai_client,
|
|
1676
1692
|
PydanticAIModelRegistry,
|
|
@@ -1695,9 +1711,7 @@ model_registry.register_providers({
|
|
|
1695
1711
|
tools = []
|
|
1696
1712
|
|
|
1697
1713
|
|
|
1698
|
-
#
|
|
1699
|
-
# TODO: Update to use scores (ScoreRegistry) once the Python SDK supports it.
|
|
1700
|
-
# The TypeScript SDK already uses scores with schema definitions.
|
|
1714
|
+
# Evaluation functions \u2014 used by experiments to score model outputs
|
|
1701
1715
|
def exact_match_json(params):
|
|
1702
1716
|
"""Check if output matches expected output exactly."""
|
|
1703
1717
|
output = params.get("output")
|
|
@@ -1717,7 +1731,7 @@ def exact_match_json(params):
|
|
|
1717
1731
|
except (json.JSONDecodeError, TypeError):
|
|
1718
1732
|
return {"score": 0, "label": "error", "reason": "Failed to parse JSON", "passed": False}
|
|
1719
1733
|
|
|
1720
|
-
|
|
1734
|
+
evals: EvalRegistry = {
|
|
1721
1735
|
"exact_match_json": exact_match_json,
|
|
1722
1736
|
}
|
|
1723
1737
|
|
|
@@ -1727,7 +1741,7 @@ ${loaderSetup}
|
|
|
1727
1741
|
client = create_pydantic_ai_client(
|
|
1728
1742
|
model_registry=model_registry,
|
|
1729
1743
|
tools=tools,
|
|
1730
|
-
|
|
1744
|
+
evals=evals,
|
|
1731
1745
|
loader=loader,
|
|
1732
1746
|
)
|
|
1733
1747
|
|
|
@@ -1747,7 +1761,7 @@ sdk.init_tracing(disable_batch=True)
|
|
|
1747
1761
|
`;
|
|
1748
1762
|
const staticTracingInit = `
|
|
1749
1763
|
# Initialize tracing - traces will be sent to local dev server
|
|
1750
|
-
# Make sure to run "
|
|
1764
|
+
# Make sure to run "npx agentmark dev" in another terminal first
|
|
1751
1765
|
# To disable tracing, comment out sdk.init_tracing() below
|
|
1752
1766
|
sdk = AgentMarkSDK(
|
|
1753
1767
|
api_key="",
|
|
@@ -1760,49 +1774,40 @@ sdk.init_tracing(disable_batch=True)
|
|
|
1760
1774
|
if (adapter === "claude-agent-sdk") {
|
|
1761
1775
|
return `"""Example usage of AgentMark with Claude Agent SDK.
|
|
1762
1776
|
|
|
1763
|
-
Run with:
|
|
1777
|
+
Run with:
|
|
1778
|
+
npx agentmark build # compile party-planner.prompt.mdx -> dist/agentmark/*.json
|
|
1779
|
+
python main.py
|
|
1764
1780
|
"""
|
|
1765
1781
|
|
|
1766
1782
|
import asyncio
|
|
1767
|
-
import json
|
|
1768
1783
|
import os
|
|
1769
|
-
from pathlib import Path
|
|
1770
1784
|
|
|
1771
1785
|
from agentmark_sdk import AgentMarkSDK
|
|
1772
|
-
from
|
|
1786
|
+
from agentmark_claude_agent_sdk_v0 import traced_query
|
|
1773
1787
|
from agentmark_client import client
|
|
1774
1788
|
${tracingInit}
|
|
1775
1789
|
|
|
1776
1790
|
async def main():
|
|
1777
|
-
"""Run the party planner prompt."""
|
|
1778
|
-
#
|
|
1779
|
-
|
|
1780
|
-
|
|
1781
|
-
|
|
1782
|
-
|
|
1791
|
+
"""Run the party planner prompt (object_config: extracts attendee names)."""
|
|
1792
|
+
# \`agentmark build\` writes pre-compiled prompts to dist/agentmark/.
|
|
1793
|
+
# The FileLoader reads them by name \u2014 extension is optional.
|
|
1794
|
+
try:
|
|
1795
|
+
prompt = await client.load_object_prompt("party-planner.prompt.mdx")
|
|
1796
|
+
except FileNotFoundError:
|
|
1797
|
+
print("Pre-built prompt not found. Run 'npx agentmark build' first.")
|
|
1783
1798
|
return
|
|
1784
1799
|
|
|
1785
|
-
|
|
1786
|
-
|
|
1787
|
-
|
|
1788
|
-
# Load and format the prompt
|
|
1789
|
-
prompt = await client.load_text_prompt(ast)
|
|
1790
|
-
params = await prompt.format(props={
|
|
1791
|
-
"numberOfGuests": 10,
|
|
1792
|
-
"theme": "80s disco",
|
|
1793
|
-
"dietaryRestrictions": ["vegetarian", "gluten-free"],
|
|
1800
|
+
adapted = await prompt.format(props={
|
|
1801
|
+
"party_text": "We're having a party with Alice, Bob, and Carol.",
|
|
1794
1802
|
})
|
|
1795
1803
|
|
|
1796
|
-
# Execute
|
|
1804
|
+
# Execute via Claude Agent SDK (streamed) with automatic OTEL tracing.
|
|
1805
|
+
# adapted.query.options.output_format is set to the object schema.
|
|
1797
1806
|
print("Running party planner prompt...")
|
|
1798
|
-
result = await run_text_prompt(params)
|
|
1799
|
-
|
|
1800
|
-
print("\\n" + "=" * 50)
|
|
1801
|
-
print("Party Plan:")
|
|
1802
1807
|
print("=" * 50)
|
|
1803
|
-
|
|
1804
|
-
|
|
1805
|
-
print(
|
|
1808
|
+
async for message in traced_query(adapted):
|
|
1809
|
+
print(message)
|
|
1810
|
+
print("=" * 50)
|
|
1806
1811
|
|
|
1807
1812
|
|
|
1808
1813
|
if __name__ == "__main__":
|
|
@@ -1811,47 +1816,44 @@ if __name__ == "__main__":
|
|
|
1811
1816
|
}
|
|
1812
1817
|
return `"""Example usage of AgentMark with Pydantic AI.
|
|
1813
1818
|
|
|
1814
|
-
Run with:
|
|
1819
|
+
Run with:
|
|
1820
|
+
npx agentmark build # compile party-planner.prompt.mdx -> dist/agentmark/*.json
|
|
1821
|
+
python main.py
|
|
1815
1822
|
"""
|
|
1823
|
+
# To use a different LLM provider, install: pip install "pydantic-ai-slim[anthropic]" (or [google], [bedrock], etc.)
|
|
1816
1824
|
|
|
1817
1825
|
import asyncio
|
|
1818
|
-
import json
|
|
1819
1826
|
import os
|
|
1820
|
-
from pathlib import Path
|
|
1821
1827
|
|
|
1822
1828
|
from agentmark_sdk import AgentMarkSDK
|
|
1823
|
-
from agentmark_pydantic_ai_v0 import
|
|
1829
|
+
from agentmark_pydantic_ai_v0 import run_object_prompt
|
|
1824
1830
|
from agentmark_client import client
|
|
1825
1831
|
${tracingInit}
|
|
1826
1832
|
|
|
1827
1833
|
async def main():
|
|
1828
|
-
"""Run the party planner prompt."""
|
|
1829
|
-
#
|
|
1830
|
-
|
|
1831
|
-
|
|
1832
|
-
|
|
1833
|
-
|
|
1834
|
+
"""Run the party planner prompt (object_config: extracts attendee names)."""
|
|
1835
|
+
# \`agentmark build\` writes pre-compiled prompts to dist/agentmark/.
|
|
1836
|
+
# The FileLoader reads them by name \u2014 extension is optional, and it
|
|
1837
|
+
# extracts the inner AST from the { ast, metadata } wrapper for you.
|
|
1838
|
+
try:
|
|
1839
|
+
prompt = await client.load_object_prompt("party-planner.prompt.mdx")
|
|
1840
|
+
except FileNotFoundError:
|
|
1841
|
+
print("Pre-built prompt not found. Run 'npx agentmark build' first.")
|
|
1834
1842
|
return
|
|
1835
1843
|
|
|
1836
|
-
with open(prompt_path) as f:
|
|
1837
|
-
ast = json.load(f)
|
|
1838
|
-
|
|
1839
|
-
# Load and format the prompt
|
|
1840
|
-
prompt = await client.load_text_prompt(ast)
|
|
1841
1844
|
params = await prompt.format(props={
|
|
1842
|
-
"
|
|
1843
|
-
"theme": "80s disco",
|
|
1844
|
-
"dietaryRestrictions": ["vegetarian", "gluten-free"],
|
|
1845
|
+
"party_text": "We're having a party with Alice, Bob, and Carol.",
|
|
1845
1846
|
})
|
|
1846
1847
|
|
|
1847
1848
|
# Execute the prompt
|
|
1848
1849
|
print("Running party planner prompt...")
|
|
1849
|
-
result = await
|
|
1850
|
+
result = await run_object_prompt(params)
|
|
1850
1851
|
|
|
1851
1852
|
print("\\n" + "=" * 50)
|
|
1852
|
-
print("
|
|
1853
|
+
print("Extracted attendees:")
|
|
1853
1854
|
print("=" * 50)
|
|
1854
|
-
|
|
1855
|
+
# result.output is a Pydantic model instance with the schema's fields
|
|
1856
|
+
print(result.output.names)
|
|
1855
1857
|
print("\\n" + "-" * 50)
|
|
1856
1858
|
print(f"Tokens used: {result.usage.total_tokens}")
|
|
1857
1859
|
|
|
@@ -1861,10 +1863,10 @@ if __name__ == "__main__":
|
|
|
1861
1863
|
`;
|
|
1862
1864
|
};
|
|
1863
1865
|
var getDevServerContent = (adapter) => {
|
|
1864
|
-
const adapterPackage = adapter === "claude-agent-sdk" ? "
|
|
1866
|
+
const adapterPackage = adapter === "claude-agent-sdk" ? "agentmark_claude_agent_sdk_v0" : "agentmark_pydantic_ai_v0";
|
|
1865
1867
|
return `"""Auto-generated webhook server for AgentMark development.
|
|
1866
1868
|
|
|
1867
|
-
This server is started by '
|
|
1869
|
+
This server is started by 'npx agentmark dev' (agentmark dev) and handles
|
|
1868
1870
|
prompt execution requests from the CLI.
|
|
1869
1871
|
"""
|
|
1870
1872
|
|
|
@@ -1904,6 +1906,62 @@ OPENAI_API_KEY=${apiKey}
|
|
|
1904
1906
|
# GOOGLE_API_KEY=your-key-here
|
|
1905
1907
|
`;
|
|
1906
1908
|
};
|
|
1909
|
+
var getReadmeContent = (projectName, adapter) => {
|
|
1910
|
+
const adapterName = adapter === "claude-agent-sdk" ? "Claude Agent SDK" : "Pydantic AI";
|
|
1911
|
+
const apiKeyEnvVar = adapter === "claude-agent-sdk" ? "ANTHROPIC_API_KEY" : "OPENAI_API_KEY";
|
|
1912
|
+
const directInstallCmd = adapter === "claude-agent-sdk" ? "pip install agentmark-sdk agentmark-claude-agent-sdk-v0 agentmark-prompt-core python-dotenv claude-agent-sdk" : 'pip install agentmark-sdk agentmark-pydantic-ai-v0 agentmark-prompt-core python-dotenv "pydantic-ai-slim[openai]>=1.0,<2.0"';
|
|
1913
|
+
return `# ${projectName}
|
|
1914
|
+
|
|
1915
|
+
An AgentMark application using ${adapterName}.
|
|
1916
|
+
|
|
1917
|
+
## Prerequisites
|
|
1918
|
+
|
|
1919
|
+
- Python 3.12+
|
|
1920
|
+
- **pip 26.1+** (older pip versions fail to resolve the dependency graph \u2014 see "Why pip 26.1?" below)
|
|
1921
|
+
|
|
1922
|
+
## Setup
|
|
1923
|
+
|
|
1924
|
+
\`\`\`bash
|
|
1925
|
+
# 1. Create and activate a virtual environment
|
|
1926
|
+
python -m venv .venv
|
|
1927
|
+
source .venv/bin/activate # On Windows: .venv\\Scripts\\activate
|
|
1928
|
+
|
|
1929
|
+
# 2. Upgrade pip (REQUIRED \u2014 older pip cannot resolve pydantic-ai's transitive graph)
|
|
1930
|
+
python -m pip install --upgrade "pip>=26.1"
|
|
1931
|
+
|
|
1932
|
+
# 3. Install dependencies
|
|
1933
|
+
${directInstallCmd}
|
|
1934
|
+
|
|
1935
|
+
# 4. Set your API key
|
|
1936
|
+
echo "${apiKeyEnvVar}=your-key-here" > .env
|
|
1937
|
+
\`\`\`
|
|
1938
|
+
|
|
1939
|
+
## Run
|
|
1940
|
+
|
|
1941
|
+
\`\`\`bash
|
|
1942
|
+
# Start the AgentMark dev server (in another terminal)
|
|
1943
|
+
npx agentmark dev
|
|
1944
|
+
|
|
1945
|
+
# Run the example prompt
|
|
1946
|
+
python main.py
|
|
1947
|
+
\`\`\`
|
|
1948
|
+
|
|
1949
|
+
Then open [http://localhost:9418](http://localhost:9418) to view your traces.
|
|
1950
|
+
|
|
1951
|
+
## Why pip 26.1?
|
|
1952
|
+
|
|
1953
|
+
The \`pydantic-ai-slim\` package transitively depends on \`mcp\`, \`fastmcp\`, and
|
|
1954
|
+
\`logfire\`, which together produce a deep dependency graph. Pip versions before
|
|
1955
|
+
26.1 fall into long backtracking loops and abort with \`resolution-too-deep\`.
|
|
1956
|
+
Pip 26.1's resolver handles this graph efficiently. If you skip the upgrade and
|
|
1957
|
+
hit the error, run \`python -m pip install --upgrade pip\` and retry.
|
|
1958
|
+
|
|
1959
|
+
## Resources
|
|
1960
|
+
|
|
1961
|
+
- [Documentation](https://docs.agentmark.co)
|
|
1962
|
+
- [GitHub](https://github.com/agentmark-ai/agentmark)
|
|
1963
|
+
`;
|
|
1964
|
+
};
|
|
1907
1965
|
var getGitignoreContent = () => {
|
|
1908
1966
|
return `# Python
|
|
1909
1967
|
__pycache__/
|
|
@@ -1950,7 +2008,7 @@ var createPythonApp = async (client, targetPath = ".", apiKey = "", deploymentMo
|
|
|
1950
2008
|
console.log(`Example prompts and datasets created in ${folderName}/agentmark/`);
|
|
1951
2009
|
if (!isExistingProject) {
|
|
1952
2010
|
const projectName = path3.basename(targetPath).replace(/[^a-zA-Z0-9_-]/g, "-");
|
|
1953
|
-
fs5.writeFileSync(`${targetPath}/pyproject.toml`, getPyprojectContent(projectName, adapter));
|
|
2011
|
+
fs5.writeFileSync(`${targetPath}/pyproject.toml`, getPyprojectContent(projectName, adapter, deploymentMode));
|
|
1954
2012
|
} else {
|
|
1955
2013
|
console.log("\u23ED\uFE0F Skipped pyproject.toml (existing project)");
|
|
1956
2014
|
}
|
|
@@ -2017,6 +2075,14 @@ var createPythonApp = async (client, targetPath = ".", apiKey = "", deploymentMo
|
|
|
2017
2075
|
} else {
|
|
2018
2076
|
fs5.writeFileSync(`${targetPath}/.gitignore`, getGitignoreContent());
|
|
2019
2077
|
}
|
|
2078
|
+
if (!isExistingProject) {
|
|
2079
|
+
const readmePath = path3.join(targetPath, "README.md");
|
|
2080
|
+
if (!fs5.existsSync(readmePath)) {
|
|
2081
|
+
const projectName = path3.basename(targetPath).replace(/[^a-zA-Z0-9_-]/g, "-");
|
|
2082
|
+
fs5.writeFileSync(readmePath, getReadmeContent(projectName, adapter));
|
|
2083
|
+
console.log(`\u2705 Created README.md`);
|
|
2084
|
+
}
|
|
2085
|
+
}
|
|
2020
2086
|
const agentmarkInternalDir = path3.join(targetPath, ".agentmark");
|
|
2021
2087
|
fs5.ensureDirSync(agentmarkInternalDir);
|
|
2022
2088
|
fs5.writeFileSync(path3.join(agentmarkInternalDir, "dev_server.py"), getDevServerContent(adapter));
|
|
@@ -2049,16 +2115,20 @@ var createPythonApp = async (client, targetPath = ".", apiKey = "", deploymentMo
|
|
|
2049
2115
|
if (folderName !== "." && folderName !== "./" && !isExistingProject) {
|
|
2050
2116
|
console.log(` $ cd ${folderName}`);
|
|
2051
2117
|
}
|
|
2118
|
+
const pipInstallCmd = adapter === "claude-agent-sdk" ? " $ pip install agentmark-sdk agentmark-claude-agent-sdk-v0 agentmark-prompt-core python-dotenv claude-agent-sdk" : ' $ pip install agentmark-sdk agentmark-pydantic-ai-v0 agentmark-prompt-core python-dotenv "pydantic-ai-slim[openai]>=1.0,<2.0"';
|
|
2119
|
+
const pipUpgradeCmd = ' $ python -m pip install --upgrade "pip>=26.1"';
|
|
2052
2120
|
if (pythonVenv) {
|
|
2053
2121
|
const activateCmd = process.platform === "win32" ? `${pythonVenv.name}\\Scripts\\activate` : `source ${pythonVenv.name}/bin/activate`;
|
|
2054
2122
|
console.log(` $ ${activateCmd}`);
|
|
2055
|
-
console.log(
|
|
2123
|
+
console.log(pipUpgradeCmd);
|
|
2124
|
+
console.log(pipInstallCmd);
|
|
2056
2125
|
} else {
|
|
2057
2126
|
console.log(" $ python -m venv .venv");
|
|
2058
2127
|
console.log(" $ source .venv/bin/activate # On Windows: .venv\\Scripts\\activate");
|
|
2059
|
-
console.log(
|
|
2128
|
+
console.log(pipUpgradeCmd);
|
|
2129
|
+
console.log(pipInstallCmd);
|
|
2060
2130
|
}
|
|
2061
|
-
console.log(" $
|
|
2131
|
+
console.log(" $ npx agentmark dev\n");
|
|
2062
2132
|
console.log("\u2500".repeat(70));
|
|
2063
2133
|
console.log("Resources");
|
|
2064
2134
|
console.log("\u2500".repeat(70));
|