@poncho-ai/harness 0.40.0 → 0.41.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/harness.ts CHANGED
@@ -102,6 +102,27 @@ export interface HarnessOptions {
102
102
  toolDefinitions?: ToolDefinition[];
103
103
  modelProvider?: ModelProviderFactory;
104
104
  uploadStore?: UploadStore;
105
+ /**
106
+ * Inject the agent definition directly instead of reading AGENT.md from
107
+ * `workingDir`. Pass raw markdown (string) or a pre-parsed `ParsedAgent`.
108
+ * When provided, `storageEngine` is also required — the engine's
109
+ * `agentId` becomes the source of truth for partitioning, and the
110
+ * filesystem identity dance (`ensureAgentIdentity`) is skipped.
111
+ */
112
+ agentDefinition?: string | ParsedAgent;
113
+ /**
114
+ * Pre-constructed storage engine. When provided, the harness will not
115
+ * create one internally. The engine's `agentId` is used wherever the
116
+ * harness today reads `parsedAgent.frontmatter.id`.
117
+ */
118
+ storageEngine?: StorageEngine;
119
+ /**
120
+ * Inject a `PonchoConfig` object directly instead of importing
121
+ * `poncho.config.js` from `workingDir`. When provided, the disk-based
122
+ * loader is skipped. Downstream resolvers (`resolveMemoryConfig`,
123
+ * `resolveStateConfig`, etc.) run as today regardless of source.
124
+ */
125
+ config?: PonchoConfig;
105
126
  }
106
127
 
107
128
  export interface HarnessRunOutput {
@@ -805,6 +826,7 @@ export class AgentHarness {
805
826
  reminderStore?: ReminderStore;
806
827
  secretsStore?: SecretsStore;
807
828
  private loadedConfig?: PonchoConfig;
829
+ private readonly injectedConfig?: PonchoConfig;
808
830
  private loadedSkills: SkillMetadata[] = [];
809
831
  private skillFingerprint = "";
810
832
  private lastSkillRefreshAt = 0;
@@ -823,6 +845,8 @@ export class AgentHarness {
823
845
 
824
846
  private parsedAgent?: ParsedAgent;
825
847
  private agentFileFingerprint = "";
848
+ private injectedAgentDefinition?: string | ParsedAgent;
849
+ private injectedStorageEngine = false;
826
850
  private mcpBridge?: LocalMcpBridge;
827
851
  private subagentManager?: SubagentManager;
828
852
  private readonly archivedToolResultsByConversation = new Map<string, Record<string, ArchivedToolResult>>();
@@ -1011,6 +1035,19 @@ export class AgentHarness {
1011
1035
  this.modelProviderInjected = !!options.modelProvider;
1012
1036
  this.modelProvider = options.modelProvider ?? createModelProvider("anthropic");
1013
1037
  this.uploadStore = options.uploadStore;
1038
+ this.injectedConfig = options.config;
1039
+
1040
+ if (options.agentDefinition !== undefined && options.storageEngine === undefined) {
1041
+ throw new Error(
1042
+ "HarnessOptions.agentDefinition requires HarnessOptions.storageEngine — " +
1043
+ "construct a StorageEngine with the desired agentId and pass both.",
1044
+ );
1045
+ }
1046
+ this.injectedAgentDefinition = options.agentDefinition;
1047
+ if (options.storageEngine) {
1048
+ this.storageEngine = options.storageEngine;
1049
+ this.injectedStorageEngine = true;
1050
+ }
1014
1051
 
1015
1052
  if (options.toolDefinitions?.length) {
1016
1053
  this.dispatcher.registerMany(options.toolDefinitions);
@@ -1508,6 +1545,11 @@ export class AgentHarness {
1508
1545
  if (this.environment !== "development") {
1509
1546
  return false;
1510
1547
  }
1548
+ if (this.injectedAgentDefinition !== undefined) {
1549
+ // Caller owns the agent definition — re-instantiate the harness to
1550
+ // pick up changes rather than re-reading from disk.
1551
+ return false;
1552
+ }
1511
1553
  try {
1512
1554
  const agentFilePath = resolve(this.workingDir, "AGENT.md");
1513
1555
  const rawContent = await readFile(agentFilePath, "utf8");
@@ -1586,15 +1628,28 @@ export class AgentHarness {
1586
1628
  }
1587
1629
 
1588
1630
  async initialize(): Promise<void> {
1589
- const agentFilePath = resolve(this.workingDir, "AGENT.md");
1590
- const agentRawContent = await readFile(agentFilePath, "utf8");
1591
- this.parsedAgent = parseAgentMarkdown(agentRawContent);
1592
- this.agentFileFingerprint = agentRawContent;
1593
- const identity = await ensureAgentIdentity(this.workingDir);
1594
- if (!this.parsedAgent.frontmatter.id) {
1595
- this.parsedAgent.frontmatter.id = identity.id;
1596
- }
1597
- const config = await loadPonchoConfig(this.workingDir);
1631
+ if (this.injectedAgentDefinition !== undefined) {
1632
+ this.parsedAgent = typeof this.injectedAgentDefinition === "string"
1633
+ ? parseAgentMarkdown(this.injectedAgentDefinition)
1634
+ : this.injectedAgentDefinition;
1635
+ this.agentFileFingerprint = "";
1636
+ // The injected StorageEngine is the source of truth for agentId.
1637
+ // Mirror it onto frontmatter.id so existing downstream readers
1638
+ // (`frontmatter.id ?? frontmatter.name`) keep resolving correctly.
1639
+ if (this.storageEngine) {
1640
+ this.parsedAgent.frontmatter.id = this.storageEngine.agentId;
1641
+ }
1642
+ } else {
1643
+ const agentFilePath = resolve(this.workingDir, "AGENT.md");
1644
+ const agentRawContent = await readFile(agentFilePath, "utf8");
1645
+ this.parsedAgent = parseAgentMarkdown(agentRawContent);
1646
+ this.agentFileFingerprint = agentRawContent;
1647
+ const identity = await ensureAgentIdentity(this.workingDir);
1648
+ if (!this.parsedAgent.frontmatter.id) {
1649
+ this.parsedAgent.frontmatter.id = identity.id;
1650
+ }
1651
+ }
1652
+ const config = this.injectedConfig ?? await loadPonchoConfig(this.workingDir);
1598
1653
  this.loadedConfig = config;
1599
1654
  this.registerConfiguredBuiltInTools(config);
1600
1655
  const provider = this.parsedAgent.frontmatter.model?.provider ?? "anthropic";
@@ -1612,15 +1667,23 @@ export class AgentHarness {
1612
1667
  const agentId = this.parsedAgent.frontmatter.id ?? this.parsedAgent.frontmatter.name;
1613
1668
 
1614
1669
  // --- Unified Storage Engine ---
1615
- const storageProvider = (config?.storage?.provider ?? "sqlite") as StorageProvider;
1616
- const engine = createStorageEngine({
1617
- provider: storageProvider,
1618
- workingDir: this.workingDir,
1619
- agentId,
1620
- urlEnv: config?.storage?.urlEnv,
1621
- });
1622
- await engine.initialize();
1623
- this.storageEngine = engine;
1670
+ let engine: StorageEngine;
1671
+ if (this.injectedStorageEngine && this.storageEngine) {
1672
+ // Caller-constructed engine; assume already initialized or will be
1673
+ // initialized by them (initialize() is idempotent in current impls).
1674
+ engine = this.storageEngine;
1675
+ await engine.initialize();
1676
+ } else {
1677
+ const storageProvider = (config?.storage?.provider ?? "sqlite") as StorageProvider;
1678
+ engine = createStorageEngine({
1679
+ provider: storageProvider,
1680
+ workingDir: this.workingDir,
1681
+ agentId,
1682
+ urlEnv: config?.storage?.urlEnv,
1683
+ });
1684
+ await engine.initialize();
1685
+ this.storageEngine = engine;
1686
+ }
1624
1687
 
1625
1688
  // --- Bash Environment Manager ---
1626
1689
  const maxFileSize = config?.storage?.limits?.maxFileSize ?? 100 * 1024 * 1024; // 100MB
package/src/mcp.ts CHANGED
@@ -46,6 +46,12 @@ class McpHttpError extends Error {
46
46
  }
47
47
  }
48
48
 
49
+ class McpSessionExpiredError extends Error {
50
+ constructor() {
51
+ super("MCP session expired");
52
+ }
53
+ }
54
+
49
55
  class StreamableHttpMcpRpcClient implements McpRpcClient {
50
56
  private readonly endpoint: string;
51
57
  private readonly timeoutMs: number;
@@ -106,6 +112,9 @@ class StreamableHttpMcpRpcClient implements McpRpcClient {
106
112
  if (response.status === 403) {
107
113
  throw new McpHttpError(403, "MCP server forbidden");
108
114
  }
115
+ if (response.status === 404 && this.sessionId) {
116
+ throw new McpSessionExpiredError();
117
+ }
109
118
  if (!response.ok) {
110
119
  throw new Error(`MCP HTTP request failed with status ${response.status}`);
111
120
  }
@@ -192,20 +201,32 @@ class StreamableHttpMcpRpcClient implements McpRpcClient {
192
201
  }
193
202
 
194
203
  private async request(method: string, params?: Record<string, unknown>): Promise<unknown> {
195
- await this.ensureInitialized();
196
- const id = this.idCounter++;
197
- const payload = {
198
- jsonrpc: "2.0",
199
- id,
200
- method,
201
- params: params ?? {},
202
- };
203
- const payloads = await this.postMessage(payload);
204
- const result = this.extractResult(payloads, id);
205
- if (result.error) {
206
- throw new Error(result.error.message ?? `MCP error on ${method}`);
204
+ for (let attempt = 0; attempt < 2; attempt++) {
205
+ try {
206
+ await this.ensureInitialized();
207
+ const id = this.idCounter++;
208
+ const payload = {
209
+ jsonrpc: "2.0",
210
+ id,
211
+ method,
212
+ params: params ?? {},
213
+ };
214
+ const payloads = await this.postMessage(payload);
215
+ const result = this.extractResult(payloads, id);
216
+ if (result.error) {
217
+ throw new Error(result.error.message ?? `MCP error on ${method}`);
218
+ }
219
+ return result.result;
220
+ } catch (error) {
221
+ if (error instanceof McpSessionExpiredError && attempt === 0) {
222
+ this.sessionId = undefined;
223
+ this.initialized = false;
224
+ continue;
225
+ }
226
+ throw error;
227
+ }
207
228
  }
208
- return result.result;
229
+ throw new Error(`MCP request to ${method} failed after session retry`);
209
230
  }
210
231
 
211
232
  private async ensureInitialized(): Promise<void> {
@@ -270,6 +291,16 @@ class StreamableHttpMcpRpcClient implements McpRpcClient {
270
291
  }
271
292
  }
272
293
 
294
+ interface CachedTenantClient {
295
+ client: StreamableHttpMcpRpcClient;
296
+ token: string;
297
+ lastUsed: number;
298
+ }
299
+
300
+ const TENANT_CLIENT_TTL_MS = 15 * 60 * 1000;
301
+ const tenantClientKey = (serverName: string, tenantId: string): string =>
302
+ `${serverName}\0${tenantId}`;
303
+
273
304
  export class LocalMcpBridge {
274
305
  private readonly remoteServers: RemoteMcpServerConfig[];
275
306
  private readonly rpcClients = new Map<string, McpRpcClient>();
@@ -277,6 +308,18 @@ export class LocalMcpBridge {
277
308
  private readonly unavailableServers = new Map<string, string>();
278
309
  private readonly authFailedServers = new Set<string>();
279
310
  private envResolver?: (tenantId: string | undefined, envName: string) => Promise<string | undefined>;
311
+ /**
312
+ * Per-tenant MCP client cache. For consumer/SaaS deployments where every
313
+ * call resolves a different bearer token, building a fresh
314
+ * `StreamableHttpMcpRpcClient` per call would force a fresh `initialize`
315
+ * round-trip every time. We keep one client per `(serverName, tenantId)`
316
+ * with TTL-based idle eviction; on token rotation we evict the entry
317
+ * lazily and rebuild.
318
+ */
319
+ private readonly tenantClients = new Map<string, CachedTenantClient>();
320
+ /** Test/observability hook: bumped every time a new tenant client is constructed. */
321
+ tenantClientConstructions = 0;
322
+ private readonly tenantClientTtlMs: number;
280
323
 
281
324
  /**
282
325
  * Set a resolver for per-tenant env vars (e.g. MCP auth tokens).
@@ -286,7 +329,8 @@ export class LocalMcpBridge {
286
329
  this.envResolver = resolver;
287
330
  }
288
331
 
289
- constructor(config: McpConfig | undefined) {
332
+ constructor(config: McpConfig | undefined, options?: { tenantClientTtlMs?: number }) {
333
+ this.tenantClientTtlMs = options?.tenantClientTtlMs ?? TENANT_CLIENT_TTL_MS;
290
334
  this.remoteServers = (config?.mcp ?? []).filter((entry): entry is RemoteMcpServerConfig =>
291
335
  typeof entry.url === "string",
292
336
  );
@@ -477,6 +521,43 @@ export class LocalMcpBridge {
477
521
  await client.close();
478
522
  }
479
523
  this.rpcClients.clear();
524
+ for (const [, entry] of this.tenantClients) {
525
+ await entry.client.close();
526
+ }
527
+ this.tenantClients.clear();
528
+ }
529
+
530
+ private getOrCreateTenantClient(
531
+ serverName: string,
532
+ tenantId: string,
533
+ token: string,
534
+ server: RemoteMcpServerConfig,
535
+ ): StreamableHttpMcpRpcClient {
536
+ const key = tenantClientKey(serverName, tenantId);
537
+ const now = Date.now();
538
+ // Lazily evict idle entries on access — no background timer needed.
539
+ const existing = this.tenantClients.get(key);
540
+ if (existing) {
541
+ const idle = now - existing.lastUsed > this.tenantClientTtlMs;
542
+ const tokenChanged = existing.token !== token;
543
+ if (idle || tokenChanged) {
544
+ // Best-effort close; the new client supersedes it.
545
+ void existing.client.close();
546
+ this.tenantClients.delete(key);
547
+ } else {
548
+ existing.lastUsed = now;
549
+ return existing.client;
550
+ }
551
+ }
552
+ const client = new StreamableHttpMcpRpcClient(
553
+ server.url,
554
+ server.timeoutMs ?? 10_000,
555
+ token,
556
+ server.headers,
557
+ );
558
+ this.tenantClients.set(key, { client, token, lastUsed: now });
559
+ this.tenantClientConstructions += 1;
560
+ return client;
480
561
  }
481
562
 
482
563
  listServers(): RemoteMcpServerConfig[] {
@@ -617,20 +698,18 @@ export class LocalMcpBridge {
617
698
  handler: async (input, context) => {
618
699
  try {
619
700
  // Per-tenant token resolution: if we have a resolver and the server uses tokenEnv,
620
- // create a per-request client with the tenant-specific token
701
+ // resolve the tenant token and reuse a cached per-tenant client when present.
621
702
  const tokenEnv = server?.auth?.tokenEnv;
622
- let callClient = client;
623
- if (tokenEnv && this.envResolver && context?.tenantId) {
703
+ let callClient: McpRpcClient = client;
704
+ if (tokenEnv && this.envResolver && context?.tenantId && server) {
624
705
  const tenantToken = await this.envResolver(context.tenantId, tokenEnv);
625
706
  const defaultToken = process.env[tokenEnv];
626
- // Only create a per-request client when the tenant has a different token.
627
- // Using the original client preserves the established MCP session.
628
707
  if (tenantToken && tenantToken !== defaultToken) {
629
- callClient = new StreamableHttpMcpRpcClient(
630
- server.url,
631
- server.timeoutMs ?? 10_000,
708
+ callClient = this.getOrCreateTenantClient(
709
+ serverName,
710
+ context.tenantId,
632
711
  tenantToken,
633
- server.headers,
712
+ server,
634
713
  );
635
714
  }
636
715
  }
@@ -33,6 +33,8 @@ export interface VfsDirEntry {
33
33
  // ---------------------------------------------------------------------------
34
34
 
35
35
  export interface StorageEngine {
36
+ /** Partition key: every read/write is scoped to this agent id. */
37
+ readonly agentId: string;
36
38
  /** Run migrations and prepare the storage backend. */
37
39
  initialize(): Promise<void>;
38
40
  /** Gracefully release resources. */
@@ -55,7 +55,7 @@ const vfsKey = (tenantId: string, path: string) => `${tenantId}\0${path}`;
55
55
  // ---------------------------------------------------------------------------
56
56
 
57
57
  export class InMemoryEngine implements StorageEngine {
58
- private readonly agentId: string;
58
+ readonly agentId: string;
59
59
 
60
60
  // Conversation data
61
61
  private convs = new Map<string, Conversation>();
@@ -191,7 +191,7 @@ const colBytes = (v: unknown): number => {
191
191
 
192
192
  export abstract class SqlStorageEngine implements StorageEngine {
193
193
  protected readonly dialect: Dialect;
194
- protected readonly agentId: string;
194
+ readonly agentId: string;
195
195
  protected abstract readonly executor: QueryExecutor;
196
196
  protected readonly egressMeter = new ConversationEgressMeter();
197
197
 
@@ -166,6 +166,24 @@ cron:
166
166
  expect(parsed.frontmatter.cron!["job"]!.timezone).toBe("Europe/London");
167
167
  });
168
168
 
169
+ it.each(["UTC", "GMT", "Etc/UTC"])(
170
+ "accepts %s as a timezone",
171
+ (tz) => {
172
+ const parsed = parseAgentMarkdown(`---
173
+ name: test-agent
174
+ cron:
175
+ job:
176
+ schedule: "0 9 * * *"
177
+ timezone: "${tz}"
178
+ task: "Do something"
179
+ ---
180
+
181
+ # Agent
182
+ `);
183
+ expect(parsed.frontmatter.cron!["job"]!.timezone).toBe(tz);
184
+ },
185
+ );
186
+
169
187
  it("parses maxRuns when present", () => {
170
188
  const parsed = parseAgentMarkdown(`---
171
189
  name: test-agent
@@ -0,0 +1,63 @@
1
+ import { mkdtemp, rm, writeFile } from "node:fs/promises";
2
+ import { tmpdir } from "node:os";
3
+ import { join } from "node:path";
4
+ import { describe, expect, it } from "vitest";
5
+ import { AgentHarness } from "../src/harness.js";
6
+ import type { PonchoConfig } from "../src/config.js";
7
+
8
+ const AGENT_MD = `---
9
+ name: config-inject-agent
10
+ model:
11
+ provider: anthropic
12
+ name: claude-opus-4-5
13
+ ---
14
+
15
+ # Config injection test
16
+ `;
17
+
18
+ describe("HarnessOptions.config injection (PR 2)", () => {
19
+ it("uses an injected PonchoConfig instead of reading poncho.config.js from disk", async () => {
20
+ const dir = await mkdtemp(join(tmpdir(), "poncho-cfg-injected-"));
21
+ try {
22
+ await writeFile(join(dir, "AGENT.md"), AGENT_MD, "utf8");
23
+ // Deliberately do NOT write a poncho.config.js — the injected
24
+ // config should be used end-to-end.
25
+ const config: PonchoConfig = {
26
+ tools: { web_search: false },
27
+ storage: { provider: "memory" },
28
+ };
29
+
30
+ const harness = new AgentHarness({ workingDir: dir, config });
31
+ await harness.initialize();
32
+
33
+ const names = harness.listTools().map((t) => t.name);
34
+ // web_search was disabled in the injected config; bash is a default
35
+ // built-in that should still be registered.
36
+ expect(names).not.toContain("web_search");
37
+ expect(names).toContain("bash");
38
+ } finally {
39
+ await rm(dir, { recursive: true, force: true });
40
+ }
41
+ });
42
+
43
+ it("disk-loaded behaviour is unchanged when no config option is provided", async () => {
44
+ const dir = await mkdtemp(join(tmpdir(), "poncho-cfg-disk-"));
45
+ try {
46
+ await writeFile(join(dir, "AGENT.md"), AGENT_MD, "utf8");
47
+ // Write a poncho.config.js that disables a tool — proves loadPonchoConfig
48
+ // ran (otherwise web_search would be present).
49
+ await writeFile(
50
+ join(dir, "poncho.config.js"),
51
+ "export default { tools: { web_search: false }, storage: { provider: 'memory' } };\n",
52
+ "utf8",
53
+ );
54
+ const harness = new AgentHarness({ workingDir: dir });
55
+ await harness.initialize();
56
+ const names = harness.listTools().map((t) => t.name);
57
+ expect(names).not.toContain("web_search");
58
+ expect(names).toContain("bash");
59
+ } finally {
60
+ await rm(dir, { recursive: true, force: true });
61
+ }
62
+ });
63
+ });
@@ -0,0 +1,93 @@
1
+ import { mkdtemp, rm } from "node:fs/promises";
2
+ import { tmpdir } from "node:os";
3
+ import { join } from "node:path";
4
+ import { describe, expect, it } from "vitest";
5
+ import { AgentHarness } from "../src/harness.js";
6
+ import { InMemoryEngine } from "../src/storage/memory-engine.js";
7
+
8
+ const AGENT_MD = `---
9
+ name: injected-agent
10
+ model:
11
+ provider: anthropic
12
+ name: claude-opus-4-5
13
+ ---
14
+
15
+ # Injected agent
16
+
17
+ You are a test agent.
18
+ `;
19
+
20
+ describe("HarnessOptions injection (PR 1)", () => {
21
+ it("initializes without an AGENT.md on disk when agentDefinition + storageEngine are provided", async () => {
22
+ const dir = await mkdtemp(join(tmpdir(), "poncho-injection-"));
23
+ try {
24
+ const engine = new InMemoryEngine("user-123");
25
+ const harness = new AgentHarness({
26
+ workingDir: dir,
27
+ agentDefinition: AGENT_MD,
28
+ storageEngine: engine,
29
+ });
30
+ await expect(harness.initialize()).resolves.toBeUndefined();
31
+ // No AGENT.md was written into `dir` — confirm initialize ran from
32
+ // injected content alone.
33
+ expect(harness.frontmatter?.name).toBe("injected-agent");
34
+ } finally {
35
+ await rm(dir, { recursive: true, force: true });
36
+ }
37
+ });
38
+
39
+ it("mirrors storageEngine.agentId onto frontmatter.id on the injected path", async () => {
40
+ const dir = await mkdtemp(join(tmpdir(), "poncho-injection-id-"));
41
+ try {
42
+ const engine = new InMemoryEngine("user-456");
43
+ const harness = new AgentHarness({
44
+ workingDir: dir,
45
+ agentDefinition: AGENT_MD,
46
+ storageEngine: engine,
47
+ });
48
+ await harness.initialize();
49
+ expect(harness.frontmatter?.id).toBe("user-456");
50
+ } finally {
51
+ await rm(dir, { recursive: true, force: true });
52
+ }
53
+ });
54
+
55
+ it("accepts a pre-parsed ParsedAgent as agentDefinition", async () => {
56
+ const dir = await mkdtemp(join(tmpdir(), "poncho-injection-parsed-"));
57
+ try {
58
+ const engine = new InMemoryEngine("user-789");
59
+ const parsed = {
60
+ frontmatter: {
61
+ name: "preparsed-agent",
62
+ model: { provider: "anthropic" as const, name: "claude-opus-4-5" },
63
+ },
64
+ body: "# Pre-parsed agent\n",
65
+ };
66
+ const harness = new AgentHarness({
67
+ workingDir: dir,
68
+ agentDefinition: parsed,
69
+ storageEngine: engine,
70
+ });
71
+ await harness.initialize();
72
+ expect(harness.frontmatter?.name).toBe("preparsed-agent");
73
+ expect(harness.frontmatter?.id).toBe("user-789");
74
+ } finally {
75
+ await rm(dir, { recursive: true, force: true });
76
+ }
77
+ });
78
+
79
+ it("throws when agentDefinition is provided without storageEngine", () => {
80
+ expect(
81
+ () =>
82
+ new AgentHarness({
83
+ agentDefinition: AGENT_MD,
84
+ }),
85
+ ).toThrow(/agentDefinition requires HarnessOptions\.storageEngine/);
86
+ });
87
+
88
+ it("falls back to disk path when neither agentDefinition nor storageEngine is provided (existing behaviour unchanged)", async () => {
89
+ // This is implicitly covered by every other test in harness.test.ts —
90
+ // we simply assert that the constructor accepts no injection options.
91
+ expect(() => new AgentHarness({ workingDir: tmpdir() })).not.toThrow();
92
+ });
93
+ });