@poncho-ai/cli 0.36.9 → 0.38.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,563 @@
1
+ import { readFile } from "node:fs/promises";
2
+ import { existsSync } from "node:fs";
3
+ import { dirname, relative, resolve } from "node:path";
4
+ import { fileURLToPath } from "node:url";
5
+
6
+ const __dirname = dirname(fileURLToPath(import.meta.url));
7
+ const packageRoot = resolve(__dirname, "..");
8
+
9
+ const readCliVersion = async (): Promise<string> => {
10
+ const fallback = "0.1.0";
11
+ try {
12
+ const packageJsonPath = resolve(packageRoot, "package.json");
13
+ const content = await readFile(packageJsonPath, "utf8");
14
+ const parsed = JSON.parse(content) as { version?: unknown };
15
+ if (typeof parsed.version === "string" && parsed.version.trim().length > 0) {
16
+ return parsed.version;
17
+ }
18
+ } catch {
19
+ // Use fallback when package metadata cannot be read.
20
+ }
21
+ return fallback;
22
+ };
23
+
24
+ export const AGENT_TEMPLATE = (
25
+ name: string,
26
+ id: string,
27
+ options: { modelProvider: "anthropic" | "openai" | "openai-codex"; modelName: string },
28
+ ): string => `---
29
+ name: ${name}
30
+ id: ${id}
31
+ description: A helpful Poncho assistant
32
+ model:
33
+ provider: ${options.modelProvider}
34
+ name: ${options.modelName}
35
+ temperature: 0.2
36
+ limits:
37
+ maxSteps: 20
38
+ timeout: 300
39
+ ---
40
+
41
+ # {{name}}
42
+
43
+ You are **{{name}}**, a helpful assistant built with Poncho.
44
+
45
+ Working directory: {{runtime.workingDir}}
46
+ Environment: {{runtime.environment}}
47
+
48
+ ## Task Guidance
49
+
50
+ - Use tools when needed
51
+ - Explain your reasoning clearly
52
+ - Ask clarifying questions when requirements are ambiguous
53
+ - Never claim a file/tool change unless the corresponding tool call actually succeeded
54
+ `;
55
+
56
+ /**
57
+ * Resolve the monorepo packages root if we're running from a local dev build.
58
+ * Returns the absolute path to the \`packages/\` directory, or null when
59
+ * running from an npm-installed copy.
60
+ */
61
+ export const resolveLocalPackagesRoot = (): string | null => {
62
+ // __dirname is packages/cli/dist — the monorepo root is three levels up
63
+ const candidate = resolve(__dirname, "..", "..", "harness", "package.json");
64
+ if (existsSync(candidate)) {
65
+ return resolve(__dirname, "..", "..");
66
+ }
67
+ return null;
68
+ };
69
+
70
+ /**
71
+ * Resolve the @poncho-ai/cli dependency specifier for the scaffolded project.
72
+ * In dev mode we use \`link:\` so pnpm can resolve the local package;
73
+ * in production we point at the npm registry.
74
+ */
75
+ export const resolveCliDep = async (projectDir: string): Promise<string> => {
76
+ const packagesRoot = resolveLocalPackagesRoot();
77
+ if (packagesRoot) {
78
+ const cliAbs = resolve(packagesRoot, "cli");
79
+ return `link:${relative(projectDir, cliAbs)}`;
80
+ }
81
+ const version = await readCliVersion();
82
+ return `^${version}`;
83
+ };
84
+
85
+ export const PACKAGE_TEMPLATE = async (name: string, projectDir: string): Promise<string> => {
86
+ const cliDep = await resolveCliDep(projectDir);
87
+ return JSON.stringify(
88
+ {
89
+ name,
90
+ private: true,
91
+ type: "module",
92
+ scripts: {
93
+ dev: "poncho dev",
94
+ start: "poncho dev",
95
+ test: "poncho test",
96
+ },
97
+ dependencies: {
98
+ "@poncho-ai/cli": cliDep,
99
+ },
100
+ },
101
+ null,
102
+ 2,
103
+ );
104
+ };
105
+
106
+ export const README_TEMPLATE = (name: string): string => `# ${name}
107
+
108
+ An AI agent built with [Poncho](https://github.com/cesr/poncho-ai).
109
+
110
+ ## Prerequisites
111
+
112
+ - Node.js 20+
113
+ - npm (or pnpm/yarn)
114
+ - Anthropic API key, OpenAI API key, or OpenAI Codex OAuth refresh token
115
+
116
+ ## Quick Start
117
+
118
+ \`\`\`bash
119
+ npm install
120
+ # If you didn't enter credentials during init:
121
+ cp .env.example .env
122
+ # Then edit .env and add provider credentials
123
+ poncho dev
124
+ \`\`\`
125
+
126
+ For OpenAI Codex OAuth bootstrap:
127
+
128
+ \`\`\`bash
129
+ poncho auth login --provider openai-codex --device
130
+ poncho auth export --provider openai-codex --format env
131
+ \`\`\`
132
+
133
+ Open \`http://localhost:3000\` for the web UI, or \`http://localhost:3000/api/docs\` for interactive API documentation.
134
+
135
+ The web UI supports file attachments (drag-and-drop, paste, or attach button), conversation management (sidebar), a context window usage ring, and tool approval prompts. It can be installed as a PWA.
136
+
137
+ On your first interactive session, the agent introduces its configurable capabilities.
138
+ While a response is streaming, you can stop it:
139
+ - Web UI: click the send button again (it switches to a stop icon)
140
+ - Interactive CLI: press \`Ctrl+C\`
141
+
142
+ Stopping is best-effort and keeps partial assistant output/tool activity already produced.
143
+
144
+ Interactive CLI commands: \`/help\`, \`/clear\`, \`/tools\`, \`/exit\`, \`/attach <path>\`, \`/files\`, \`/list\`, \`/open <id>\`, \`/new [title]\`, \`/delete [id]\`, \`/continue\`, \`/compact [focus]\`, \`/reset [all]\`.
145
+
146
+ ## Common Commands
147
+
148
+ \`\`\`bash
149
+ # Local web UI + API server
150
+ poncho dev
151
+ poncho dev --port 8080
152
+
153
+ # Local interactive CLI
154
+ poncho run --interactive
155
+
156
+ # One-off run
157
+ poncho run "Your task here"
158
+ poncho run "Explain this code" --file ./src/index.ts
159
+ poncho run "Review the code" --param projectName=my-app
160
+
161
+ # Run tests
162
+ poncho test
163
+
164
+ # List available tools
165
+ poncho tools
166
+
167
+ # OpenAI Codex auth (OAuth subscription)
168
+ poncho auth login --provider openai-codex --device
169
+ poncho auth status --provider openai-codex
170
+ poncho auth export --provider openai-codex --format env
171
+
172
+ # Remove deprecated guidance from AGENT.md after upgrading
173
+ poncho update-agent
174
+
175
+ # Multi-tenancy: create a tenant token
176
+ poncho auth create-token --tenant acme-corp --ttl 24h
177
+
178
+ # Manage per-tenant secrets
179
+ poncho secrets set --tenant acme-corp LINEAR_API_KEY lk_123
180
+ poncho secrets list --tenant acme-corp
181
+ poncho secrets delete --tenant acme-corp LINEAR_API_KEY
182
+ \`\`\`
183
+
184
+ ## Add Skills
185
+
186
+ Install skills from a local path or remote repository, then verify discovery:
187
+
188
+ \`\`\`bash
189
+ # Install all skills from a source package/repo
190
+ poncho skills add <repo-or-path>
191
+
192
+ # Install one specific skill path from a source
193
+ poncho skills add <repo-or-path> <relative-skill-path>
194
+
195
+ # Remove all installed skills from a source
196
+ poncho skills remove <repo-or-path>
197
+
198
+ # Remove one installed skill path from a source
199
+ poncho skills remove <repo-or-path> <relative-skill-path>
200
+
201
+ # List installed skills
202
+ poncho skills list
203
+
204
+ # Verify loaded tools
205
+ poncho tools
206
+ \`\`\`
207
+
208
+ \`poncho skills add\` copies discovered skill directories (folders that contain \`SKILL.md\`) into \`skills/<source>/...\`.
209
+ If a destination folder already exists, the command fails instead of overwriting files.
210
+ \`poncho add\` and \`poncho remove\` remain available as aliases.
211
+
212
+ After adding skills, run \`poncho dev\` or \`poncho run --interactive\` and ask the agent to use them.
213
+
214
+ ## Configure MCP Servers (Remote)
215
+
216
+ Connect remote MCP servers and expose their tools to the agent:
217
+
218
+ \`\`\`bash
219
+ # Add remote MCP server
220
+ poncho mcp add --url https://mcp.example.com/github --name github --auth-bearer-env GITHUB_TOKEN
221
+
222
+ # Server with custom headers (e.g. Arcade)
223
+ poncho mcp add --url https://mcp.arcade.dev --name arcade \\
224
+ --auth-bearer-env ARCADE_API_KEY --header "Arcade-User-ID: user@example.com"
225
+
226
+ # List configured servers
227
+ poncho mcp list
228
+
229
+ # Discover MCP tools and print frontmatter intent snippets
230
+ poncho mcp tools list github
231
+ poncho mcp tools select github
232
+
233
+ # Remove a server
234
+ poncho mcp remove github
235
+ \`\`\`
236
+
237
+ Set required secrets in \`.env\` (for example, \`GITHUB_TOKEN=...\`).
238
+
239
+ ## Tool Intent and Approvals in Frontmatter
240
+
241
+ Declare tool intent directly in \`AGENT.md\` and \`SKILL.md\` frontmatter:
242
+
243
+ \`\`\`yaml
244
+ allowed-tools:
245
+ - mcp:github/list_issues
246
+ - mcp:github/*
247
+ approval-required:
248
+ - mcp:github/create_issue
249
+ - ./scripts/deploy.ts
250
+ \`\`\`
251
+
252
+ How it works:
253
+
254
+ - \`AGENT.md\` provides fallback MCP intent when no skill is active.
255
+ - \`SKILL.md\` intent applies when you activate that skill (\`activate_skill\`).
256
+ - Scripts in a sibling \`scripts/\` directory are available by convention.
257
+ - For non-standard script folders (for example \`tools/\`), add explicit relative entries in \`allowed-tools\`.
258
+ - Use \`approval-required\` to require human approval for specific MCP calls or script files.
259
+ - Deactivating a skill (\`deactivate_skill\`) removes its MCP tools from runtime registration.
260
+
261
+ Pattern format:
262
+
263
+ - MCP: \`mcp:server/tool\`, \`mcp:server/*\` (protocol-like prefix)
264
+ - Scripts: relative paths such as \`./scripts/file.ts\`, \`./scripts/*\`, \`./tools/deploy.ts\`
265
+
266
+ Skill authoring guardrails:
267
+
268
+ - Every \`SKILL.md\` must include YAML frontmatter between \`---\` markers.
269
+ - Include at least \`name\` (required for discovery) and \`description\`.
270
+ - Put tool intent in frontmatter using \`allowed-tools\` and \`approval-required\`.
271
+ - \`approval-required\` is stricter than allowed access:
272
+ - MCP entries in \`approval-required\` must also appear in \`allowed-tools\`.
273
+ - Script entries outside \`./scripts/\` must also appear in \`allowed-tools\`.
274
+ - Keep MCP server connection details in \`poncho.config.js\`, not in \`SKILL.md\`.
275
+
276
+ ## Configuration
277
+
278
+ Core files:
279
+
280
+ - \`AGENT.md\`: behavior, model selection, runtime guidance
281
+ - \`poncho.config.js\`: runtime config (storage, auth, telemetry, MCP, tools)
282
+ - \`.env\`: secrets and environment variables (loaded before the harness starts, so \`process.env\` is available in skill scripts)
283
+
284
+ Example \`poncho.config.js\`:
285
+
286
+ \`\`\`javascript
287
+ export default {
288
+ storage: {
289
+ provider: "local", // local | memory | redis | upstash | dynamodb
290
+ memory: {
291
+ enabled: true,
292
+ maxRecallConversations: 20,
293
+ },
294
+ },
295
+ auth: {
296
+ required: false,
297
+ },
298
+ telemetry: {
299
+ enabled: true,
300
+ },
301
+ mcp: [
302
+ {
303
+ name: "github",
304
+ url: "https://mcp.example.com/github",
305
+ auth: { type: "bearer", tokenEnv: "GITHUB_TOKEN" },
306
+ },
307
+ // Custom headers for servers that require them (e.g. Arcade)
308
+ // { name: "arcade", url: "https://mcp.arcade.dev", auth: { type: "bearer", tokenEnv: "ARCADE_API_KEY" }, headers: { "Arcade-User-ID": "user@example.com" } },
309
+ ],
310
+ // Tool access: true (available), false (disabled), 'approval' (requires human approval)
311
+ tools: {
312
+ list_directory: true,
313
+ read_file: true,
314
+ write_file: true, // gated by environment for writes
315
+ edit_file: true, // gated by environment for writes
316
+ delete_file: 'approval', // requires human approval
317
+ delete_directory: 'approval', // requires human approval
318
+ send_email: 'approval', // requires human approval
319
+ byEnvironment: {
320
+ production: {
321
+ write_file: false,
322
+ edit_file: false,
323
+ delete_file: false,
324
+ delete_directory: false,
325
+ },
326
+ development: {
327
+ send_email: true, // skip approval in dev
328
+ },
329
+ },
330
+ },
331
+ // browser: true, // Enable browser automation tools (requires @poncho-ai/browser)
332
+ // browser: { provider: 'browserbase' }, // Cloud browser for serverless (Vercel, Lambda)
333
+ // webUi: false, // Disable built-in UI for API-only deployments
334
+ // uploads: { provider: 'local' }, // 'local' | 'vercel-blob' | 's3'
335
+ };
336
+ \`\`\`
337
+
338
+ ## Project Structure
339
+
340
+ \`\`\`
341
+ \${name}/
342
+ \u251C\u2500\u2500 AGENT.md # Agent definition and system prompt
343
+ \u251C\u2500\u2500 poncho.config.js # Configuration (MCP servers, auth, etc.)
344
+ \u251C\u2500\u2500 package.json # Dependencies
345
+ \u251C\u2500\u2500 .env.example # Environment variables template
346
+ \u251C\u2500\u2500 tests/
347
+ \u2502 \u2514\u2500\u2500 basic.yaml # Test suite
348
+ \u2514\u2500\u2500 skills/
349
+ \u2514\u2500\u2500 starter/
350
+ \u251C\u2500\u2500 SKILL.md
351
+ \u2514\u2500\u2500 scripts/
352
+ \u2514\u2500\u2500 starter-echo.ts
353
+ \`\`\`
354
+
355
+ ## Cron Jobs
356
+
357
+ Define scheduled tasks in \`AGENT.md\` frontmatter:
358
+
359
+ \`\`\`yaml
360
+ cron:
361
+ daily-report:
362
+ schedule: "0 9 * * *"
363
+ task: "Generate the daily sales report"
364
+ morning-checkin:
365
+ schedule: "0 8 * * 1-5"
366
+ task: "Check in with the user about their day"
367
+ channel: telegram
368
+ \`\`\`
369
+
370
+ - \`poncho dev\`: jobs run via an in-process scheduler.
371
+ - \`poncho build vercel\`: generates \`vercel.json\` cron entries. Set \`CRON_SECRET\` to the same value as \`PONCHO_AUTH_TOKEN\` so Vercel can authenticate.
372
+ - Docker/Fly.io: scheduler runs automatically.
373
+ - Lambda: use AWS EventBridge to trigger \`GET /api/cron/<jobName>\` with \`Authorization: Bearer <token>\`.
374
+ - Trigger manually: \`curl http://localhost:3000/api/cron/daily-report\`
375
+
376
+ Add \`channel: telegram\` (or another platform) to have the agent proactively send the response to all known chats on that platform. The bot must have received at least one message from each user first.
377
+
378
+ ## Reminders
379
+
380
+ One-off reminders are enabled by default. The agent gets \`set_reminder\`, \`list_reminders\`, and \`cancel_reminder\` tools. Users can say things like "remind me tomorrow at 9am to check the report."
381
+
382
+ Configure in \`poncho.config.js\`:
383
+
384
+ \`\`\`javascript
385
+ export default {
386
+ reminders: {
387
+ enabled: true,
388
+ pollSchedule: '*/10 * * * *', // how often to check for due reminders
389
+ },
390
+ };
391
+ \`\`\`
392
+
393
+ - Reminders fire via a polling loop (same interval locally and on serverless).
394
+ - On Vercel, \`poncho build vercel\` adds a cron entry for \`/api/reminders/check\`.
395
+ - Channel reminders (Telegram/Slack) reply in the original conversation.
396
+ - Non-channel reminders create a new \`[reminder]\` conversation visible in the web UI.
397
+
398
+ ## Messaging (Slack)
399
+
400
+ Connect your agent to Slack so it responds to @mentions:
401
+
402
+ 1. Create a Slack App at [api.slack.com/apps](https://api.slack.com/apps)
403
+ 2. Add Bot Token Scopes: \`app_mentions:read\`, \`chat:write\`, \`reactions:write\`
404
+ 3. Enable Event Subscriptions, set Request URL to \`https://<your-url>/api/messaging/slack\`, subscribe to \`app_mention\`
405
+ 4. Install to workspace, copy Bot Token and Signing Secret
406
+ 5. Set env vars:
407
+ \`\`\`
408
+ SLACK_BOT_TOKEN=xoxb-...
409
+ SLACK_SIGNING_SECRET=...
410
+ \`\`\`
411
+ 6. Add to \`poncho.config.js\`:
412
+ \`\`\`javascript
413
+ messaging: [{ platform: 'slack' }]
414
+ \`\`\`
415
+
416
+ **Vercel deployments:** install \`@vercel/functions\` so Poncho can keep the serverless function alive while processing: \`npm install @vercel/functions\`
417
+
418
+ ## Messaging (Telegram)
419
+
420
+ Connect your agent to Telegram so it responds to messages and @mentions:
421
+
422
+ 1. Talk to [@BotFather](https://t.me/BotFather) on Telegram, send \`/newbot\`, and follow the prompts
423
+ 2. Copy the Bot Token
424
+ 3. Set env vars:
425
+ \`\`\`
426
+ TELEGRAM_BOT_TOKEN=123456:ABC-...
427
+ TELEGRAM_WEBHOOK_SECRET=my-secret-token # optional but recommended
428
+ \`\`\`
429
+ 4. Add to \`poncho.config.js\`:
430
+ \`\`\`javascript
431
+ messaging: [{ platform: 'telegram' }]
432
+ \`\`\`
433
+ 5. Register the webhook after deploying:
434
+ \`\`\`bash
435
+ curl -X POST "https://api.telegram.org/bot<TOKEN>/setWebhook" \\
436
+ -H "Content-Type: application/json" \\
437
+ -d '{"url": "https://<your-url>/api/messaging/telegram", "secret_token": "<SECRET>"}'
438
+ \`\`\`
439
+
440
+ The bot responds to all messages in private chats and only to @mentions in groups. Use \`/new\` to reset the conversation.
441
+
442
+ **Vercel deployments:** install \`@vercel/functions\` so Poncho can keep the serverless function alive while processing: \`npm install @vercel/functions\`
443
+
444
+ ## Messaging (Email via Resend)
445
+
446
+ Connect your agent to email so users can interact by sending emails:
447
+
448
+ 1. Set up a domain and enable Inbound at [resend.com](https://resend.com)
449
+ 2. Create a webhook for \`email.received\` pointing to \`https://<your-url>/api/messaging/resend\`
450
+ 3. Install the Resend SDK: \`npm install resend\`
451
+ 4. Set env vars:
452
+ \`\`\`
453
+ RESEND_API_KEY=re_...
454
+ RESEND_WEBHOOK_SECRET=whsec_...
455
+ RESEND_FROM=Agent <agent@yourdomain.com>
456
+ RESEND_REPLY_TO=support@yourdomain.com # optional
457
+ \`\`\`
458
+ 5. Add to \`poncho.config.js\`:
459
+ \`\`\`javascript
460
+ messaging: [{ platform: 'resend' }]
461
+ \`\`\`
462
+
463
+ For full control over outbound emails, use **tool mode** (\`mode: 'tool'\`) — the agent gets a \`send_email\` tool instead of auto-replying. See the repo README for details.
464
+
465
+ **Vercel deployments:** install \`@vercel/functions\` so Poncho can keep the serverless function alive while processing: \`npm install @vercel/functions\`
466
+
467
+ ## Subagents
468
+
469
+ Your agent can spawn **subagents** — independent background tasks that run in their own conversations. Subagents are useful for parallelizing work or isolating subtasks.
470
+
471
+ The agent gets four tools automatically: \`spawn_subagent\` (create and run a subagent), \`message_subagent\` (send follow-ups), \`stop_subagent\`, and \`list_subagents\`. Calls return immediately — subagents run in the background and their results are delivered to the parent automatically.
472
+
473
+ - **Limits**: subagents cannot spawn other subagents; max 5 concurrent per parent.
474
+ - **Memory**: subagents have read-only access to the parent's persistent memory.
475
+ - **Approvals**: subagent tool approvals are tunneled to the parent conversation thread.
476
+ - **Web UI**: subagent conversations appear nested under the parent in the sidebar.
477
+
478
+ ## Deployment
479
+
480
+ \`\`\`bash
481
+ # Build for Vercel
482
+ poncho build vercel
483
+ vercel deploy --prod
484
+
485
+ # Build for Docker
486
+ poncho build docker
487
+ docker build -t \${name} .
488
+ docker run -p 3000:3000 -e ANTHROPIC_API_KEY=sk-ant-... \${name}
489
+
490
+ # AWS Lambda
491
+ poncho build lambda
492
+
493
+ # Fly.io
494
+ poncho build fly
495
+ fly deploy
496
+ \`\`\`
497
+
498
+ Set environment variables on your deployment platform:
499
+
500
+ \`\`\`bash
501
+ ANTHROPIC_API_KEY=sk-ant-... # Required
502
+ # OR for OpenAI API key provider:
503
+ # OPENAI_API_KEY=sk-...
504
+ # OR for OpenAI Codex OAuth provider:
505
+ # OPENAI_CODEX_REFRESH_TOKEN=rt_...
506
+ # OPENAI_CODEX_ACCOUNT_ID=... # Optional
507
+ PONCHO_AUTH_TOKEN=your-secret # Optional: protect your endpoint
508
+ PONCHO_MAX_DURATION=55 # Optional: serverless timeout in seconds (enables auto-continuation)
509
+ PONCHO_INTERNAL_SECRET=... # Recommended on serverless: shared secret for internal callback auth
510
+ \`\`\`
511
+
512
+ When \`PONCHO_MAX_DURATION\` is set, the agent automatically checkpoints and resumes across
513
+ request cycles when it approaches the platform timeout. The web UI and client SDK handle
514
+ this transparently.
515
+
516
+ For serverless deployments with subagents or background callbacks, use a shared state backend
517
+ (\`upstash\`, \`redis\`, or \`dynamodb\`) instead of \`state.provider: 'local'\` / \`'memory'\`.
518
+
519
+ ## Troubleshooting
520
+
521
+ ### Vercel deploy issues
522
+
523
+ - After upgrading \`@poncho-ai/cli\`, re-run \`poncho build vercel --force\` to refresh generated deploy files.
524
+ - If Vercel fails during \`pnpm install\` due to a lockfile mismatch, run \`pnpm install --no-frozen-lockfile\` locally and commit \`pnpm-lock.yaml\`.
525
+ - Deploy from the project root: \`vercel deploy --prod\`.
526
+ - For subagents/background callbacks, set \`PONCHO_INTERNAL_SECRET\` and use non-local state storage.
527
+
528
+ For full reference:
529
+ https://github.com/cesr/poncho-ai
530
+ `;
531
+
532
+ export const ENV_TEMPLATE = "ANTHROPIC_API_KEY=sk-ant-...\n";
533
+ export const GITIGNORE_TEMPLATE =
534
+ ".env\nnode_modules\ndist\n.poncho/\ninteractive-session.json\n.vercel\n";
535
+ export const TEST_TEMPLATE = `tests:
536
+ - name: "Basic sanity"
537
+ task: "What is 2 + 2?"
538
+ expect:
539
+ contains: "4"
540
+ `;
541
+
542
+ export const SKILL_TEMPLATE = `---
543
+ name: starter-skill
544
+ description: Starter local skill template
545
+ allowed-tools:
546
+ - ./scripts/starter-echo.ts
547
+ ---
548
+
549
+ # Starter Skill
550
+
551
+ This is a starter local skill created by \`poncho init\`.
552
+
553
+ ## Authoring Notes
554
+
555
+ - Put executable JavaScript/TypeScript files in \`scripts/\`.
556
+ - Ask the agent to call \`run_skill_script\` with \`skill\`, \`script\`, and optional \`input\`.
557
+ `;
558
+
559
+ export const SKILL_TOOL_TEMPLATE = `export default async function run(input) {
560
+ const message = typeof input?.message === "string" ? input.message : "";
561
+ return { echoed: message };
562
+ }
563
+ `;
package/src/testing.ts ADDED
@@ -0,0 +1,108 @@
1
+ import { readFile } from "node:fs/promises";
2
+ import { resolve } from "node:path";
3
+ import { AgentHarness } from "@poncho-ai/harness";
4
+ import dotenv from "dotenv";
5
+ import YAML from "yaml";
6
+ import {
7
+ normalizeDeployTarget,
8
+ checkVercelCronDrift,
9
+ scaffoldDeployTarget,
10
+ } from "./scaffolding.js";
11
+
12
+ export const runTests = async (
13
+ workingDir: string,
14
+ filePath?: string,
15
+ ): Promise<{ passed: number; failed: number }> => {
16
+ dotenv.config({ path: resolve(workingDir, ".env") });
17
+ const testFilePath = filePath ?? resolve(workingDir, "tests", "basic.yaml");
18
+ const content = await readFile(testFilePath, "utf8");
19
+ const parsed = YAML.parse(content) as {
20
+ tests?: Array<{
21
+ name: string;
22
+ task: string;
23
+ expect?: {
24
+ contains?: string;
25
+ refusal?: boolean;
26
+ toolCalled?: string;
27
+ maxSteps?: number;
28
+ maxTokens?: number;
29
+ };
30
+ }>;
31
+ };
32
+ const tests = parsed.tests ?? [];
33
+
34
+ const harness = new AgentHarness({ workingDir });
35
+ await harness.initialize();
36
+
37
+ let passed = 0;
38
+ let failed = 0;
39
+
40
+ for (const testCase of tests) {
41
+ try {
42
+ const output = await harness.runToCompletion({ task: testCase.task });
43
+ const response = output.result.response ?? "";
44
+ const events = output.events;
45
+ const expectation = testCase.expect ?? {};
46
+ const checks: boolean[] = [];
47
+
48
+ if (expectation.contains) {
49
+ checks.push(response.includes(expectation.contains));
50
+ }
51
+ if (typeof expectation.maxSteps === "number") {
52
+ checks.push(output.result.steps <= expectation.maxSteps);
53
+ }
54
+ if (typeof expectation.maxTokens === "number") {
55
+ checks.push(
56
+ output.result.tokens.input + output.result.tokens.output <= expectation.maxTokens,
57
+ );
58
+ }
59
+ if (expectation.refusal) {
60
+ checks.push(
61
+ response.toLowerCase().includes("can't") || response.toLowerCase().includes("cannot"),
62
+ );
63
+ }
64
+ if (expectation.toolCalled) {
65
+ checks.push(
66
+ events.some(
67
+ (event) => event.type === "tool:started" && event.tool === expectation.toolCalled,
68
+ ),
69
+ );
70
+ }
71
+
72
+ const ok = checks.length === 0 ? output.result.status === "completed" : checks.every(Boolean);
73
+ if (ok) {
74
+ passed += 1;
75
+ process.stdout.write(`PASS ${testCase.name}\n`);
76
+ } else {
77
+ failed += 1;
78
+ process.stdout.write(`FAIL ${testCase.name}\n`);
79
+ }
80
+ } catch (error) {
81
+ failed += 1;
82
+ process.stdout.write(
83
+ `FAIL ${testCase.name} (${error instanceof Error ? error.message : "Unknown test error"})\n`,
84
+ );
85
+ }
86
+ }
87
+
88
+ process.stdout.write(`\nTest summary: ${passed} passed, ${failed} failed\n`);
89
+ return { passed, failed };
90
+ };
91
+
92
+ export const buildTarget = async (
93
+ workingDir: string,
94
+ target: string,
95
+ options?: { force?: boolean },
96
+ ): Promise<void> => {
97
+ const normalizedTarget = normalizeDeployTarget(target);
98
+ if (normalizedTarget === "vercel" && !options?.force) {
99
+ await checkVercelCronDrift(workingDir);
100
+ }
101
+ const writtenPaths = await scaffoldDeployTarget(workingDir, normalizedTarget, {
102
+ force: options?.force,
103
+ });
104
+ process.stdout.write(`Scaffolded deploy files for ${normalizedTarget}:\n`);
105
+ for (const filePath of writtenPaths) {
106
+ process.stdout.write(` - ${filePath}\n`);
107
+ }
108
+ };