@poncho-ai/harness 0.2.0 → 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@poncho-ai/harness",
3
- "version": "0.2.0",
3
+ "version": "0.3.1",
4
4
  "description": "Agent execution runtime - conversation loop, tool dispatch, streaming",
5
5
  "repository": {
6
6
  "type": "git",
@@ -27,13 +27,11 @@
27
27
  "mustache": "^4.2.0",
28
28
  "openai": "^6.3.0",
29
29
  "redis": "^5.10.0",
30
- "ws": "^8.18.0",
31
30
  "yaml": "^2.4.0",
32
31
  "@poncho-ai/sdk": "0.2.0"
33
32
  },
34
33
  "devDependencies": {
35
34
  "@types/mustache": "^4.2.6",
36
- "@types/ws": "^8.18.1",
37
35
  "tsup": "^8.0.0",
38
36
  "vitest": "^1.4.0"
39
37
  },
@@ -3,6 +3,7 @@ import { readFile } from "node:fs/promises";
3
3
  import { resolve } from "node:path";
4
4
  import Mustache from "mustache";
5
5
  import YAML from "yaml";
6
+ import { validateMcpPattern, validateScriptPattern } from "./tool-policy.js";
6
7
 
7
8
  export interface AgentModelConfig {
8
9
  provider: string;
@@ -21,6 +22,10 @@ export interface AgentFrontmatter {
21
22
  description?: string;
22
23
  model?: AgentModelConfig;
23
24
  limits?: AgentLimitsConfig;
25
+ tools?: {
26
+ mcp?: string[];
27
+ scripts?: string[];
28
+ };
24
29
  }
25
30
 
26
31
  export interface ParsedAgent {
@@ -66,6 +71,19 @@ export const parseAgentMarkdown = (content: string): ParsedAgent => {
66
71
 
67
72
  const modelValue = asRecord(parsed.model);
68
73
  const limitsValue = asRecord(parsed.limits);
74
+ const toolsValue = asRecord(parsed.tools);
75
+ const mcpTools = Array.isArray(toolsValue.mcp)
76
+ ? toolsValue.mcp.filter((item): item is string => typeof item === "string")
77
+ : undefined;
78
+ const scriptTools = Array.isArray(toolsValue.scripts)
79
+ ? toolsValue.scripts.filter((item): item is string => typeof item === "string")
80
+ : undefined;
81
+ for (const [index, pattern] of (mcpTools ?? []).entries()) {
82
+ validateMcpPattern(pattern, `AGENT.md frontmatter tools.mcp[${index}]`);
83
+ }
84
+ for (const [index, pattern] of (scriptTools ?? []).entries()) {
85
+ validateScriptPattern(pattern, `AGENT.md frontmatter tools.scripts[${index}]`);
86
+ }
69
87
 
70
88
  const frontmatter: AgentFrontmatter = {
71
89
  name: parsed.name,
@@ -93,6 +111,13 @@ export const parseAgentMarkdown = (content: string): ParsedAgent => {
93
111
  timeout: asNumberOrUndefined(limitsValue.timeout),
94
112
  }
95
113
  : undefined,
114
+ tools:
115
+ mcpTools || scriptTools
116
+ ? {
117
+ mcp: mcpTools,
118
+ scripts: scriptTools,
119
+ }
120
+ : undefined,
96
121
  };
97
122
 
98
123
  return {
package/src/config.ts CHANGED
@@ -3,6 +3,7 @@ import { resolve } from "node:path";
3
3
  import type { MemoryConfig } from "./memory.js";
4
4
  import type { McpConfig } from "./mcp.js";
5
5
  import type { StateConfig } from "./state.js";
6
+ import type { ToolPatternPolicy } from "./tool-policy.js";
6
7
 
7
8
  export interface StorageConfig {
8
9
  provider?: "local" | "memory" | "redis" | "upstash" | "dynamodb";
@@ -63,6 +64,7 @@ export interface PonchoConfig extends McpConfig {
63
64
  handler?: (event: unknown) => Promise<void> | void;
64
65
  };
65
66
  skills?: Record<string, Record<string, unknown>>;
67
+ scripts?: ToolPatternPolicy;
66
68
  /** Extra directories (relative to project root) to scan for skills.
67
69
  * `skills/` and `.poncho/skills/` are always scanned. */
68
70
  skillPaths?: string[];
package/src/harness.ts CHANGED
@@ -20,7 +20,15 @@ import { LocalMcpBridge } from "./mcp.js";
20
20
  import type { ModelClient, ModelResponse } from "./model-client.js";
21
21
  import { createModelClient } from "./model-factory.js";
22
22
  import { buildSkillContextWindow, loadSkillMetadata } from "./skill-context.js";
23
+ import type { SkillMetadata } from "./skill-context.js";
23
24
  import { createSkillTools } from "./skill-tools.js";
25
+ import {
26
+ applyToolPolicy,
27
+ matchesSlashPattern,
28
+ mergePolicyForEnvironment,
29
+ type RuntimeEnvironment,
30
+ validateScriptPattern,
31
+ } from "./tool-policy.js";
24
32
  import { ToolDispatcher } from "./tool-dispatcher.js";
25
33
 
26
34
  export interface HarnessOptions {
@@ -51,14 +59,71 @@ const trimMessageWindow = (messages: Message[]): Message[] =>
51
59
  ? messages
52
60
  : messages.slice(messages.length - MAX_CONTEXT_MESSAGES);
53
61
 
62
+ const MODEL_TOOL_NAME_PATTERN = /^[a-zA-Z0-9_-]{1,128}$/;
63
+
64
+ const toProviderSafeToolName = (
65
+ originalName: string,
66
+ index: number,
67
+ used: Set<string>,
68
+ ): string => {
69
+ if (MODEL_TOOL_NAME_PATTERN.test(originalName) && !used.has(originalName)) {
70
+ used.add(originalName);
71
+ return originalName;
72
+ }
73
+ let base = originalName
74
+ .replace(/[^a-zA-Z0-9_-]/g, "_")
75
+ .replace(/_+/g, "_")
76
+ .replace(/^_+|_+$/g, "");
77
+ if (base.length === 0) {
78
+ base = `tool_${index + 1}`;
79
+ }
80
+ if (base.length > 120) {
81
+ base = base.slice(0, 120);
82
+ }
83
+ let candidate = base;
84
+ let suffix = 2;
85
+ while (used.has(candidate) || !MODEL_TOOL_NAME_PATTERN.test(candidate)) {
86
+ const suffixText = `_${suffix}`;
87
+ const maxBaseLength = Math.max(1, 128 - suffixText.length);
88
+ candidate = `${base.slice(0, maxBaseLength)}${suffixText}`;
89
+ suffix += 1;
90
+ }
91
+ used.add(candidate);
92
+ return candidate;
93
+ };
94
+
54
95
  const DEVELOPMENT_MODE_CONTEXT = `## Development Mode Context
55
96
 
56
97
  You are running locally in development mode. Treat this as an editable agent workspace.
57
98
 
58
- When users ask about customization:
99
+ ## Understanding Your Environment
100
+
101
+ - Built-in tools: \`list_directory\` and \`read_file\`
102
+ - \`write_file\` is available in development (disabled by default in production)
103
+ - A starter local skill is included (\`starter-echo\`)
104
+ - Bash/shell commands are **not** available unless you install and enable a shell tool/skill
105
+ - Git operations are only available if a git-capable tool/skill is configured
106
+ - For setup/configuration/skills/MCP questions, proactively read \`README.md\` with \`read_file\` before answering
107
+ - Prefer concrete commands and examples from \`README.md\` over assumptions
108
+
109
+ ## When users ask about customization:
110
+
59
111
  - Explain and edit \`poncho.config.js\` for model/provider, storage+memory, auth, telemetry, and MCP settings.
60
112
  - Help create or update local skills under \`skills/<skill-name>/SKILL.md\`.
61
113
  - For executable skills, add JavaScript/TypeScript scripts under \`skills/<skill-name>/scripts/\` and run them via \`run_skill_script\`.
114
+ - For MCP setup, default to direct \`poncho.config.js\` edits (\`mcp\` entries with URL, bearer token env, and tool policy).
115
+ - Keep MCP server connection details in \`poncho.config.js\` only (name/url/auth/tools policy). Do not move server definitions into \`SKILL.md\`.
116
+ - In \`AGENT.md\`/\`SKILL.md\`, declare MCP intent only as \`tools.mcp\` string patterns (for example \`linear/*\` or \`linear/list_issues\`).
117
+ - Never use nested MCP objects in skill frontmatter (for example \`mcp: [{ name, url, auth }]\`) and never use underscore/colon tool patterns.
118
+ - To scope tools to a skill: keep server config in \`poncho.config.js\`, add desired \`tools.mcp\` patterns in that skill's \`SKILL.md\`, and remove global \`AGENT.md tools.mcp\` fallback if you do not want global availability.
119
+ - Do not invent unsupported top-level config keys (for example \`model\` in \`poncho.config.js\`). Keep existing config structure unless README/spec explicitly says otherwise.
120
+ - In \`poncho.config.js\`, MCP tool allowlist patterns must be slash-based (for example \`linear/list_initiatives\` or \`linear/*\`), not underscored names like \`linear_list_initiatives\`.
121
+ - Keep \`poncho.config.js\` valid JavaScript and preserve existing imports/types/comments. If there is a JSDoc type import, do not rewrite it to a different package name.
122
+ - Preferred MCP config shape in \`poncho.config.js\`:
123
+ \`mcp: [{ name: "linear", url: "https://mcp.linear.app/mcp", auth: { type: "bearer", tokenEnv: "LINEAR_TOKEN" }, tools: { mode: "allowlist", include: ["linear/*"] } }]\`
124
+ - If shell/CLI access exists, you can use \`poncho mcp add --url ... --name ... --auth-bearer-env ...\`, then \`poncho mcp tools list <server>\` and \`poncho mcp tools select <server>\`.
125
+ - If shell/CLI access is unavailable, ask the user to run needed commands and provide exact copy-paste commands.
126
+ - Use strict slash patterns for MCP tool selections (\`server/tool\`, \`server/*\`) and verify by inspecting config/tool state.
62
127
  - For setup, skills, MCP, auth, storage, telemetry, or "how do I..." questions, proactively read \`README.md\` with \`read_file\` before answering.
63
128
  - Prefer quoting concrete commands and examples from \`README.md\` over guessing.
64
129
  - Keep edits minimal, preserve unrelated settings/code, and summarize what changed.`;
@@ -71,6 +136,10 @@ export class AgentHarness {
71
136
  private readonly approvalHandler?: HarnessOptions["approvalHandler"];
72
137
  private skillContextWindow = "";
73
138
  private memoryStore?: MemoryStore;
139
+ private loadedConfig?: PonchoConfig;
140
+ private loadedSkills: SkillMetadata[] = [];
141
+ private readonly activeSkillNames = new Set<string>();
142
+ private readonly registeredMcpToolNames = new Set<string>();
74
143
 
75
144
  private parsedAgent?: ParsedAgent;
76
145
  private mcpBridge?: LocalMcpBridge;
@@ -141,9 +210,145 @@ export class AgentHarness {
141
210
  }
142
211
  }
143
212
 
213
+ private runtimeEnvironment(): RuntimeEnvironment {
214
+ return this.environment ?? "development";
215
+ }
216
+
217
+ private listActiveSkills(): string[] {
218
+ return [...this.activeSkillNames].sort();
219
+ }
220
+
221
+ private getAgentMcpIntent(): string[] {
222
+ return this.parsedAgent?.frontmatter.tools?.mcp ?? [];
223
+ }
224
+
225
+ private getAgentScriptIntent(): string[] {
226
+ return this.parsedAgent?.frontmatter.tools?.scripts ?? [];
227
+ }
228
+
229
+ private getRequestedMcpPatterns(): string[] {
230
+ const skillPatterns = new Set<string>();
231
+ for (const skillName of this.activeSkillNames) {
232
+ const skill = this.loadedSkills.find((entry) => entry.name === skillName);
233
+ if (!skill) {
234
+ continue;
235
+ }
236
+ for (const pattern of skill.tools.mcp) {
237
+ skillPatterns.add(pattern);
238
+ }
239
+ }
240
+ if (skillPatterns.size > 0) {
241
+ return [...skillPatterns];
242
+ }
243
+ return this.getAgentMcpIntent();
244
+ }
245
+
246
+ private getRequestedScriptPatterns(): string[] {
247
+ const skillPatterns = new Set<string>();
248
+ for (const skillName of this.activeSkillNames) {
249
+ const skill = this.loadedSkills.find((entry) => entry.name === skillName);
250
+ if (!skill) {
251
+ continue;
252
+ }
253
+ for (const pattern of skill.tools.scripts) {
254
+ skillPatterns.add(pattern);
255
+ }
256
+ }
257
+ if (skillPatterns.size > 0) {
258
+ return [...skillPatterns];
259
+ }
260
+ return this.getAgentScriptIntent();
261
+ }
262
+
263
+ private isScriptAllowedByPolicy(skill: string, scriptPath: string): boolean {
264
+ const identifier = `${skill}/${scriptPath}`;
265
+ const intentPatterns = this.getRequestedScriptPatterns();
266
+ const matchedIntent =
267
+ intentPatterns.length === 0
268
+ ? true
269
+ : intentPatterns.some((pattern) => matchesSlashPattern(identifier, pattern));
270
+ if (!matchedIntent) {
271
+ return false;
272
+ }
273
+ const policy = mergePolicyForEnvironment(
274
+ this.loadedConfig?.scripts,
275
+ this.runtimeEnvironment(),
276
+ );
277
+ const decision = applyToolPolicy([identifier], policy);
278
+ return decision.allowed.length > 0;
279
+ }
280
+
281
+ private async refreshMcpTools(reason: string): Promise<void> {
282
+ if (!this.mcpBridge) {
283
+ return;
284
+ }
285
+ const requestedPatterns = this.getRequestedMcpPatterns();
286
+ this.dispatcher.unregisterMany(this.registeredMcpToolNames);
287
+ this.registeredMcpToolNames.clear();
288
+ if (requestedPatterns.length === 0) {
289
+ console.info(
290
+ `[poncho][mcp] ${JSON.stringify({ event: "tools.cleared", reason, requestedPatterns })}`,
291
+ );
292
+ return;
293
+ }
294
+ const tools = await this.mcpBridge.loadTools(
295
+ requestedPatterns,
296
+ this.runtimeEnvironment(),
297
+ );
298
+ this.dispatcher.registerMany(tools);
299
+ for (const tool of tools) {
300
+ this.registeredMcpToolNames.add(tool.name);
301
+ }
302
+ console.info(
303
+ `[poncho][mcp] ${JSON.stringify({
304
+ event: "tools.refreshed",
305
+ reason,
306
+ requestedPatterns,
307
+ registeredCount: tools.length,
308
+ activeSkills: this.listActiveSkills(),
309
+ })}`,
310
+ );
311
+ }
312
+
313
+ private validateScriptPolicyConfig(config: PonchoConfig | undefined): void {
314
+ const check = (values: string[] | undefined, path: string): void => {
315
+ for (const [index, value] of (values ?? []).entries()) {
316
+ validateScriptPattern(value, `${path}[${index}]`);
317
+ }
318
+ };
319
+ check(config?.scripts?.include, "poncho.config.js scripts.include");
320
+ check(config?.scripts?.exclude, "poncho.config.js scripts.exclude");
321
+ check(
322
+ config?.scripts?.byEnvironment?.development?.include,
323
+ "poncho.config.js scripts.byEnvironment.development.include",
324
+ );
325
+ check(
326
+ config?.scripts?.byEnvironment?.development?.exclude,
327
+ "poncho.config.js scripts.byEnvironment.development.exclude",
328
+ );
329
+ check(
330
+ config?.scripts?.byEnvironment?.staging?.include,
331
+ "poncho.config.js scripts.byEnvironment.staging.include",
332
+ );
333
+ check(
334
+ config?.scripts?.byEnvironment?.staging?.exclude,
335
+ "poncho.config.js scripts.byEnvironment.staging.exclude",
336
+ );
337
+ check(
338
+ config?.scripts?.byEnvironment?.production?.include,
339
+ "poncho.config.js scripts.byEnvironment.production.include",
340
+ );
341
+ check(
342
+ config?.scripts?.byEnvironment?.production?.exclude,
343
+ "poncho.config.js scripts.byEnvironment.production.exclude",
344
+ );
345
+ }
346
+
144
347
  async initialize(): Promise<void> {
145
348
  this.parsedAgent = await parseAgentFile(this.workingDir);
146
349
  const config = await loadPonchoConfig(this.workingDir);
350
+ this.validateScriptPolicyConfig(config);
351
+ this.loadedConfig = config;
147
352
  this.registerConfiguredBuiltInTools(config);
148
353
  const provider = this.parsedAgent.frontmatter.model?.provider ?? "anthropic";
149
354
  const memoryConfig = resolveMemoryConfig(config);
@@ -165,8 +370,25 @@ export class AgentHarness {
165
370
  this.mcpBridge = bridge;
166
371
  const extraSkillPaths = config?.skillPaths;
167
372
  const skillMetadata = await loadSkillMetadata(this.workingDir, extraSkillPaths);
373
+ this.loadedSkills = skillMetadata;
168
374
  this.skillContextWindow = buildSkillContextWindow(skillMetadata);
169
- this.dispatcher.registerMany(createSkillTools(skillMetadata));
375
+ this.dispatcher.registerMany(
376
+ createSkillTools(skillMetadata, {
377
+ onActivateSkill: async (name: string) => {
378
+ this.activeSkillNames.add(name);
379
+ await this.refreshMcpTools(`activate:${name}`);
380
+ return this.listActiveSkills();
381
+ },
382
+ onDeactivateSkill: async (name: string) => {
383
+ this.activeSkillNames.delete(name);
384
+ await this.refreshMcpTools(`deactivate:${name}`);
385
+ return this.listActiveSkills();
386
+ },
387
+ onListActiveSkills: () => this.listActiveSkills(),
388
+ isScriptAllowed: (skill: string, scriptPath: string) =>
389
+ this.isScriptAllowedByPolicy(skill, scriptPath),
390
+ }),
391
+ );
170
392
  if (memoryConfig?.enabled) {
171
393
  this.memoryStore = createMemoryStore(
172
394
  this.parsedAgent.frontmatter.name,
@@ -180,7 +402,8 @@ export class AgentHarness {
180
402
  );
181
403
  }
182
404
  await bridge.startLocalServers();
183
- this.dispatcher.registerMany(await bridge.loadTools());
405
+ await bridge.discoverTools();
406
+ await this.refreshMcpTools("initialize");
184
407
  }
185
408
 
186
409
  async shutdown(): Promise<void> {
@@ -279,13 +502,25 @@ ${boundedMainMemory.trim()}`
279
502
  yield pushEvent({ type: "step:started", step });
280
503
  yield pushEvent({ type: "model:request", tokens: 0 });
281
504
 
505
+ const dispatcherTools = this.dispatcher.list();
506
+ const exposedToolNames = new Map<string, string>();
507
+ const usedProviderToolNames = new Set<string>();
508
+ const modelTools = dispatcherTools.map((tool, index) => {
509
+ const safeName = toProviderSafeToolName(tool.name, index, usedProviderToolNames);
510
+ exposedToolNames.set(safeName, tool.name);
511
+ if (safeName === tool.name) {
512
+ return tool;
513
+ }
514
+ return { ...tool, name: safeName };
515
+ });
516
+
282
517
  const modelCallInput = {
283
518
  modelName: agent.frontmatter.model?.name ?? "claude-opus-4-5",
284
519
  temperature: agent.frontmatter.model?.temperature,
285
520
  maxTokens: agent.frontmatter.model?.maxTokens,
286
521
  systemPrompt: integrityPrompt,
287
522
  messages: trimMessageWindow(messages),
288
- tools: this.dispatcher.list(),
523
+ tools: modelTools,
289
524
  };
290
525
  let modelResponse: ModelResponse | undefined;
291
526
  let streamedAnyChunk = false;
@@ -366,19 +601,20 @@ ${boundedMainMemory.trim()}`
366
601
  }> = [];
367
602
 
368
603
  for (const call of modelResponse.toolCalls) {
369
- yield pushEvent({ type: "tool:started", tool: call.name, input: call.input });
370
- const definition = this.dispatcher.get(call.name);
604
+ const runtimeToolName = exposedToolNames.get(call.name) ?? call.name;
605
+ yield pushEvent({ type: "tool:started", tool: runtimeToolName, input: call.input });
606
+ const definition = this.dispatcher.get(runtimeToolName);
371
607
  if (definition?.requiresApproval) {
372
608
  const approvalId = `approval_${randomUUID()}`;
373
609
  yield pushEvent({
374
610
  type: "tool:approval:required",
375
- tool: call.name,
611
+ tool: runtimeToolName,
376
612
  input: call.input,
377
613
  approvalId,
378
614
  });
379
615
  const approved = this.approvalHandler
380
616
  ? await this.approvalHandler({
381
- tool: call.name,
617
+ tool: runtimeToolName,
382
618
  input: call.input,
383
619
  runId,
384
620
  step,
@@ -408,7 +644,7 @@ ${boundedMainMemory.trim()}`
408
644
  }
409
645
  approvedCalls.push({
410
646
  id: call.id,
411
- name: call.name,
647
+ name: runtimeToolName,
412
648
  input: call.input,
413
649
  });
414
650
  }