@poncho-ai/harness 0.14.2 → 0.15.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,5 +1,5 @@
1
1
 
2
- > @poncho-ai/harness@0.14.2 build /Users/cesar/Dev/latitude/poncho-ai/packages/harness
2
+ > @poncho-ai/harness@0.15.1 build /Users/cesar/Dev/latitude/poncho-ai/packages/harness
3
3
  > tsup src/index.ts --format esm --dts
4
4
 
5
5
  CLI Building entry: src/index.ts
@@ -7,8 +7,8 @@
7
7
  CLI tsup v8.5.1
8
8
  CLI Target: es2022
9
9
  ESM Build start
10
- ESM dist/index.js 177.01 KB
11
- ESM ⚡️ Build success in 73ms
10
+ ESM dist/index.js 182.55 KB
11
+ ESM ⚡️ Build success in 131ms
12
12
  DTS Build start
13
- DTS ⚡️ Build success in 3459ms
14
- DTS dist/index.d.ts 21.71 KB
13
+ DTS ⚡️ Build success in 6605ms
14
+ DTS dist/index.d.ts 22.34 KB
@@ -1,18 +1,20 @@
1
1
 
2
- > @poncho-ai/harness@0.14.0 test /Users/cesar/Dev/latitude/poncho-ai/packages/harness
2
+ > @poncho-ai/harness@0.14.2 test /Users/cesar/Dev/latitude/poncho-ai/packages/harness
3
3
  > vitest
4
4
 
5
5
 
6
6
   RUN  v1.6.1 /Users/cesar/Dev/latitude/poncho-ai/packages/harness
7
7
 
8
8
  [event] step:completed {"type":"step:completed","step":1,"duration":1}
9
+ ✓ test/telemetry.test.ts  (3 tests) 3ms
9
10
  [event] step:started {"type":"step:started","step":2}
10
- ✓ test/telemetry.test.ts  (3 tests) 11ms
11
- ✓ test/schema-converter.test.ts  (27 tests) 21ms
11
+ ✓ test/schema-converter.test.ts  (27 tests) 16ms
12
12
  stdout | test/mcp.test.ts > mcp bridge protocol transports > discovers and calls tools over streamable HTTP
13
13
  [poncho][mcp] {"event":"catalog.loaded","server":"remote","discoveredCount":1}
14
14
  [poncho][mcp] {"event":"tools.selected","requestedPatternCount":1,"registeredCount":1,"filteredByPolicyCount":0,"filteredByIntentCount":0}
15
15
 
16
+ ✓ test/agent-parser.test.ts  (10 tests) 17ms
17
+ ✓ test/memory.test.ts  (4 tests) 16ms
16
18
  stdout | test/mcp.test.ts > mcp bridge protocol transports > selects discovered tools by requested patterns
17
19
  [poncho][mcp] {"event":"catalog.loaded","server":"remote","discoveredCount":2}
18
20
  [poncho][mcp] {"event":"tools.selected","requestedPatternCount":1,"registeredCount":1,"filteredByPolicyCount":0,"filteredByIntentCount":1}
@@ -28,12 +30,10 @@
28
30
  [poncho][mcp] {"event":"catalog.loaded","server":"remote","discoveredCount":1}
29
31
  [poncho][mcp] {"event":"tools.selected","requestedPatternCount":1,"registeredCount":1,"filteredByPolicyCount":0,"filteredByIntentCount":0}
30
32
 
31
- ✓ test/mcp.test.ts  (6 tests) 516ms
32
- ✓ test/agent-parser.test.ts  (10 tests) 130ms
33
- ✓ test/memory.test.ts  (4 tests) 192ms
34
- ✓ test/state.test.ts  (5 tests) 294ms
35
- ✓ test/model-factory.test.ts  (4 tests) 8ms
36
- ✓ test/agent-identity.test.ts  (2 tests) 33ms
33
+ ✓ test/mcp.test.ts  (6 tests) 97ms
34
+ ✓ test/state.test.ts  (5 tests) 217ms
35
+ ✓ test/agent-identity.test.ts  (2 tests) 15ms
36
+ ✓ test/model-factory.test.ts  (4 tests) 3ms
37
37
  stdout | test/harness.test.ts > agent harness > registers default filesystem tools
38
38
  [poncho][mcp] {"event":"tools.cleared","reason":"initialize","requestedPatterns":[]}
39
39
 
@@ -57,6 +57,8 @@
57
57
 
58
58
  stdout | test/harness.test.ts > agent harness > refreshes skill metadata and tools in development mode
59
59
  [poncho][mcp] {"event":"tools.cleared","reason":"skills:changed","requestedPatterns":[]}
60
+
61
+ stdout | test/harness.test.ts > agent harness > refreshes skill metadata and tools in development mode
60
62
  [poncho][mcp] {"event":"tools.cleared","reason":"activate:beta","requestedPatterns":[]}
61
63
 
62
64
  stdout | test/harness.test.ts > agent harness > prunes removed active skills after refresh in development mode
@@ -71,8 +73,6 @@
71
73
 
72
74
  stdout | test/harness.test.ts > agent harness > clears active skills when skill metadata changes in development mode
73
75
  [poncho][mcp] {"event":"tools.cleared","reason":"initialize","requestedPatterns":[]}
74
-
75
- stdout | test/harness.test.ts > agent harness > clears active skills when skill metadata changes in development mode
76
76
  [poncho][mcp] {"event":"tools.cleared","reason":"activate:alpha","requestedPatterns":[]}
77
77
 
78
78
  stdout | test/harness.test.ts > agent harness > clears active skills when skill metadata changes in development mode
@@ -98,6 +98,8 @@
98
98
  [poncho][mcp] {"event":"tools.cleared","reason":"initialize","requestedPatterns":[]}
99
99
  [poncho][mcp] {"event":"tools.selected","requestedPatternCount":1,"registeredCount":1,"filteredByPolicyCount":0,"filteredByIntentCount":1}
100
100
  [poncho][mcp] {"event":"tools.refreshed","reason":"activate:skill-a","requestedPatterns":["remote/a"],"registeredCount":1,"activeSkills":["skill-a"]}
101
+
102
+ stdout | test/harness.test.ts > agent harness > registers MCP tools dynamically for stacked active skills and supports deactivation
101
103
  [poncho][mcp] {"event":"tools.selected","requestedPatternCount":2,"registeredCount":2,"filteredByPolicyCount":0,"filteredByIntentCount":0}
102
104
  [poncho][mcp] {"event":"tools.refreshed","reason":"activate:skill-b","requestedPatterns":["remote/a","remote/b"],"registeredCount":2,"activeSkills":["skill-a","skill-b"]}
103
105
  [poncho][mcp] {"event":"tools.selected","requestedPatternCount":1,"registeredCount":1,"filteredByPolicyCount":0,"filteredByIntentCount":1}
@@ -128,10 +130,10 @@
128
130
  [poncho][mcp] {"event":"tools.refreshed","reason":"activate:skill-slow","requestedPatterns":["remote/slow"],"registeredCount":1,"activeSkills":["skill-slow"]}
129
131
  [poncho][mcp] {"event":"tools.cleared","reason":"deactivate:skill-slow","requestedPatterns":[]}
130
132
 
131
- ✓ test/harness.test.ts  (25 tests) 429ms
133
+ ✓ test/harness.test.ts  (25 tests) 365ms
132
134
 
133
135
   Test Files  9 passed (9)
134
136
   Tests  86 passed (86)
135
-  Start at  10:02:54
136
-  Duration  4.19s (transform 2.16s, setup 0ms, collect 7.51s, tests 1.63s, environment 1ms, prepare 1.97s)
137
+  Start at  13:30:36
138
+  Duration  2.19s (transform 1.11s, setup 0ms, collect 2.81s, tests 749ms, environment 5ms, prepare 1.23s)
137
139
 
package/CHANGELOG.md CHANGED
@@ -1,5 +1,25 @@
1
1
  # @poncho-ai/harness
2
2
 
3
+ ## 0.15.1
4
+
5
+ ### Patch Changes
6
+
7
+ - Fix browser session reconnection, tab lifecycle management, and web UI panel state handling.
8
+
9
+ - Updated dependencies []:
10
+ - @poncho-ai/sdk@1.1.1
11
+
12
+ ## 0.15.0
13
+
14
+ ### Minor Changes
15
+
16
+ - [`139ed89`](https://github.com/cesr/poncho-ai/commit/139ed89a5df2372cfb0a124c967f51f4d8158c3b) Thanks [@cesr](https://github.com/cesr)! - Add browser automation for Poncho agents with real-time viewport streaming, per-conversation tab management, interactive browser control in the web UI, and shared agent-level profiles for authentication persistence.
17
+
18
+ ### Patch Changes
19
+
20
+ - Updated dependencies [[`139ed89`](https://github.com/cesr/poncho-ai/commit/139ed89a5df2372cfb0a124c967f51f4d8158c3b)]:
21
+ - @poncho-ai/sdk@1.1.0
22
+
3
23
  ## 0.14.2
4
24
 
5
25
  ### Patch Changes
package/dist/index.d.ts CHANGED
@@ -316,6 +316,19 @@ interface PonchoConfig extends McpConfig {
316
316
  };
317
317
  /** Set to `false` to disable the built-in web UI (headless / API-only mode). */
318
318
  webUi?: false;
319
+ /** Enable browser automation tools. Set `true` for defaults, or provide config. */
320
+ browser?: boolean | {
321
+ viewport?: {
322
+ width?: number;
323
+ height?: number;
324
+ };
325
+ quality?: number;
326
+ everyNthFrame?: number;
327
+ profileDir?: string;
328
+ sessionName?: string;
329
+ executablePath?: string;
330
+ headless?: boolean;
331
+ };
319
332
  }
320
333
  declare const resolveStateConfig: (config: PonchoConfig | undefined) => StateConfig | undefined;
321
334
  declare const resolveMemoryConfig: (config: PonchoConfig | undefined) => MemoryConfig | undefined;
@@ -439,6 +452,8 @@ declare class AgentHarness {
439
452
  private readonly registeredMcpToolNames;
440
453
  private latitudeTelemetry?;
441
454
  private insideTelemetryCapture;
455
+ private _browserSession?;
456
+ private _browserMod?;
442
457
  private parsedAgent?;
443
458
  private mcpBridge?;
444
459
  private resolveToolAccess;
@@ -471,6 +486,10 @@ declare class AgentHarness {
471
486
  private registerSkillTools;
472
487
  private refreshSkillsIfChanged;
473
488
  initialize(): Promise<void>;
489
+ private initBrowserTools;
490
+ /** Conversation ID of the currently executing run (set during run, cleared after). */
491
+ private _currentRunConversationId?;
492
+ get browserSession(): unknown;
474
493
  shutdown(): Promise<void>;
475
494
  listTools(): ToolDefinition[];
476
495
  /**
package/dist/index.js CHANGED
@@ -2884,6 +2884,30 @@ Since all fields have defaults, you only need to specify \`*Env\` when your env
2884
2884
  - For setup, skills, MCP, auth, storage, telemetry, or "how do I..." questions, proactively read \`README.md\` with \`read_file\` before answering.
2885
2885
  - Prefer quoting concrete commands and examples from \`README.md\` over guessing.
2886
2886
  - Keep edits minimal, preserve unrelated settings/code, and summarize what changed.`;
2887
+ function extractMediaFromToolOutput(output) {
2888
+ const mediaItems = [];
2889
+ function walk(node) {
2890
+ if (node === null || node === void 0) return node;
2891
+ if (Array.isArray(node)) return node.map(walk);
2892
+ if (typeof node === "object") {
2893
+ const obj = node;
2894
+ if (obj.type === "file" && typeof obj.data === "string" && typeof obj.mediaType === "string" && obj.mediaType.startsWith("image/")) {
2895
+ mediaItems.push({
2896
+ type: "media",
2897
+ data: obj.data,
2898
+ mediaType: obj.mediaType
2899
+ });
2900
+ return { type: "file", mediaType: obj.mediaType, filename: obj.filename ?? "image", _stripped: true };
2901
+ }
2902
+ const out = {};
2903
+ for (const [k, v] of Object.entries(obj)) out[k] = walk(v);
2904
+ return out;
2905
+ }
2906
+ return node;
2907
+ }
2908
+ const strippedOutput = walk(output);
2909
+ return { mediaItems, strippedOutput };
2910
+ }
2887
2911
  var AgentHarness = class {
2888
2912
  workingDir;
2889
2913
  environment;
@@ -2900,6 +2924,8 @@ var AgentHarness = class {
2900
2924
  registeredMcpToolNames = /* @__PURE__ */ new Set();
2901
2925
  latitudeTelemetry;
2902
2926
  insideTelemetryCapture = false;
2927
+ _browserSession;
2928
+ _browserMod;
2903
2929
  parsedAgent;
2904
2930
  mcpBridge;
2905
2931
  resolveToolAccess(toolName) {
@@ -3219,6 +3245,13 @@ var AgentHarness = class {
3219
3245
  })
3220
3246
  );
3221
3247
  }
3248
+ if (config?.browser) {
3249
+ await this.initBrowserTools(config).catch((e) => {
3250
+ console.warn(
3251
+ `[poncho][browser] Failed to load browser tools: ${e instanceof Error ? e.message : String(e)}`
3252
+ );
3253
+ });
3254
+ }
3222
3255
  await bridge.startLocalServers();
3223
3256
  await bridge.discoverTools();
3224
3257
  await this.refreshMcpTools("initialize");
@@ -3257,7 +3290,61 @@ var AgentHarness = class {
3257
3290
  );
3258
3291
  }
3259
3292
  }
3293
+ async initBrowserTools(config) {
3294
+ const spec = ["@poncho-ai", "browser"].join("/");
3295
+ let browserMod;
3296
+ try {
3297
+ const { existsSync } = await import("fs");
3298
+ const { join, dirname: dirname5 } = await import("path");
3299
+ const { pathToFileURL: pathToFileURL2 } = await import("url");
3300
+ let searchDir = this.workingDir;
3301
+ let entryPath;
3302
+ for (; ; ) {
3303
+ const candidate = join(searchDir, "node_modules", "@poncho-ai", "browser", "dist", "index.js");
3304
+ if (existsSync(candidate)) {
3305
+ entryPath = candidate;
3306
+ break;
3307
+ }
3308
+ const parent = dirname5(searchDir);
3309
+ if (parent === searchDir) break;
3310
+ searchDir = parent;
3311
+ }
3312
+ if (!entryPath) throw new Error("not installed");
3313
+ browserMod = await import(pathToFileURL2(entryPath).href);
3314
+ } catch {
3315
+ throw new Error(
3316
+ `browser: true is set in poncho.config but @poncho-ai/browser is not installed.
3317
+ Run: pnpm add @poncho-ai/browser`
3318
+ );
3319
+ }
3320
+ this._browserMod = browserMod;
3321
+ const browserCfg = typeof config.browser === "object" ? config.browser : {};
3322
+ const agentId = this.parsedAgent?.frontmatter.id ?? this.parsedAgent?.frontmatter.name ?? "default";
3323
+ const session = new browserMod.BrowserSession(`poncho-${agentId}`, browserCfg);
3324
+ this._browserSession = session;
3325
+ const tools = browserMod.createBrowserTools(
3326
+ () => session,
3327
+ () => this._currentRunConversationId ?? "__default__"
3328
+ );
3329
+ for (const tool of tools) {
3330
+ if (this.isToolEnabled(tool.name)) {
3331
+ this.registerIfMissing(tool);
3332
+ }
3333
+ }
3334
+ }
3335
+ /** Conversation ID of the currently executing run (set during run, cleared after). */
3336
+ _currentRunConversationId;
3337
+ get browserSession() {
3338
+ return this._browserSession;
3339
+ }
3260
3340
  async shutdown() {
3341
+ if (this._browserSession) {
3342
+ try {
3343
+ await this._browserSession.close();
3344
+ } catch {
3345
+ }
3346
+ this._browserSession = void 0;
3347
+ }
3261
3348
  await this.mcpBridge?.stopLocalServers();
3262
3349
  if (this.latitudeTelemetry) {
3263
3350
  await this.latitudeTelemetry.shutdown().catch((err) => {
@@ -3343,6 +3430,7 @@ var AgentHarness = class {
3343
3430
  await this.initialize();
3344
3431
  }
3345
3432
  await this.refreshSkillsIfChanged();
3433
+ this._currentRunConversationId = input.conversationId;
3346
3434
  const agent = this.parsedAgent;
3347
3435
  const runId = `run_${randomUUID3()}`;
3348
3436
  const start = now();
@@ -3402,6 +3490,25 @@ ${boundedMainMemory.trim()}` : "";
3402
3490
  agentId: agent.frontmatter.id ?? agent.frontmatter.name,
3403
3491
  contextWindow
3404
3492
  });
3493
+ const browserEventQueue = [];
3494
+ const browserCleanups = [];
3495
+ const browserSession = this._browserSession;
3496
+ const conversationId = input.conversationId ?? "__default__";
3497
+ if (browserSession) {
3498
+ browserCleanups.push(
3499
+ browserSession.onFrame(conversationId, (frame) => {
3500
+ browserEventQueue.push({ type: "browser:frame", data: frame.data, width: frame.width, height: frame.height });
3501
+ }),
3502
+ browserSession.onStatus(conversationId, (status) => {
3503
+ browserEventQueue.push({ type: "browser:status", ...status });
3504
+ })
3505
+ );
3506
+ }
3507
+ const drainBrowserEvents = function* () {
3508
+ while (browserEventQueue.length > 0) {
3509
+ yield browserEventQueue.shift();
3510
+ }
3511
+ };
3405
3512
  if (input.task != null) {
3406
3513
  if (input.files && input.files.length > 0) {
3407
3514
  const parts = [
@@ -3447,6 +3554,7 @@ ${boundedMainMemory.trim()}` : "";
3447
3554
  let transientStepRetryCount = 0;
3448
3555
  for (let step = 1; step <= maxSteps; step += 1) {
3449
3556
  try {
3557
+ yield* drainBrowserEvents();
3450
3558
  if (isCancelled()) {
3451
3559
  yield emitCancellation();
3452
3560
  return;
@@ -3498,6 +3606,11 @@ ${boundedMainMemory.trim()}` : "";
3498
3606
  }
3499
3607
  const convertMessage = async (msg) => {
3500
3608
  if (msg.role === "tool") {
3609
+ const meta = msg.metadata;
3610
+ const rich = meta?._richToolResults;
3611
+ if (rich && rich.length > 0) {
3612
+ return [{ role: "tool", content: rich }];
3613
+ }
3501
3614
  const textContent = typeof msg.content === "string" ? msg.content : getTextContent(msg);
3502
3615
  try {
3503
3616
  const parsed = JSON.parse(textContent);
@@ -3860,6 +3973,7 @@ ${textContent}` };
3860
3973
  abortSignal: input.abortSignal
3861
3974
  };
3862
3975
  const toolResultsForModel = [];
3976
+ const richToolResults = [];
3863
3977
  const approvedCalls = [];
3864
3978
  for (const call of toolCalls) {
3865
3979
  if (isCancelled()) {
@@ -3953,6 +4067,12 @@ ${textContent}` };
3953
4067
  tool_name: result2.tool,
3954
4068
  content: `Tool error: ${result2.error}`
3955
4069
  });
4070
+ richToolResults.push({
4071
+ type: "tool-result",
4072
+ toolCallId: result2.callId,
4073
+ toolName: result2.tool,
4074
+ output: { type: "json", value: { error: result2.error } }
4075
+ });
3956
4076
  } else {
3957
4077
  span?.end({ result: { value: result2.output ?? null, isError: false } });
3958
4078
  yield pushEvent({
@@ -3961,12 +4081,34 @@ ${textContent}` };
3961
4081
  output: result2.output,
3962
4082
  duration: now() - batchStart
3963
4083
  });
4084
+ const { mediaItems, strippedOutput } = extractMediaFromToolOutput(result2.output);
3964
4085
  toolResultsForModel.push({
3965
4086
  type: "tool_result",
3966
4087
  tool_use_id: result2.callId,
3967
4088
  tool_name: result2.tool,
3968
- content: JSON.stringify(result2.output ?? null)
4089
+ content: JSON.stringify(strippedOutput ?? null)
3969
4090
  });
4091
+ if (mediaItems.length > 0) {
4092
+ richToolResults.push({
4093
+ type: "tool-result",
4094
+ toolCallId: result2.callId,
4095
+ toolName: result2.tool,
4096
+ output: {
4097
+ type: "content",
4098
+ value: [
4099
+ { type: "text", text: JSON.stringify(strippedOutput ?? null) },
4100
+ ...mediaItems
4101
+ ]
4102
+ }
4103
+ });
4104
+ } else {
4105
+ richToolResults.push({
4106
+ type: "tool-result",
4107
+ toolCallId: result2.callId,
4108
+ toolName: result2.tool,
4109
+ output: { type: "json", value: result2.output ?? null }
4110
+ });
4111
+ }
3970
4112
  }
3971
4113
  }
3972
4114
  const assistantContent = toolCalls.length > 0 ? JSON.stringify({
@@ -3982,10 +4124,11 @@ ${textContent}` };
3982
4124
  content: assistantContent,
3983
4125
  metadata: { timestamp: now(), id: randomUUID3(), step }
3984
4126
  });
4127
+ const toolMsgMeta = { timestamp: now(), id: randomUUID3(), step, _richToolResults: richToolResults };
3985
4128
  messages.push({
3986
4129
  role: "tool",
3987
4130
  content: JSON.stringify(toolResultsForModel),
3988
- metadata: { timestamp: now(), id: randomUUID3(), step }
4131
+ metadata: toolMsgMeta
3989
4132
  });
3990
4133
  yield pushEvent({
3991
4134
  type: "step:completed",
@@ -4040,6 +4183,8 @@ ${textContent}` };
4040
4183
  }
4041
4184
  });
4042
4185
  }
4186
+ yield* drainBrowserEvents();
4187
+ for (const cleanup of browserCleanups) cleanup();
4043
4188
  }
4044
4189
  async executeTools(calls, context) {
4045
4190
  return this.dispatcher.executeBatch(calls, context);
@@ -5133,6 +5278,15 @@ var createConversationStore = (config, options) => {
5133
5278
  };
5134
5279
 
5135
5280
  // src/telemetry.ts
5281
+ var MAX_FIELD_LENGTH = 200;
5282
+ function sanitizeEventForLog(event) {
5283
+ return JSON.stringify(event, (_key, value) => {
5284
+ if (typeof value === "string" && value.length > MAX_FIELD_LENGTH) {
5285
+ return `${value.slice(0, 80)}...[${value.length} chars]`;
5286
+ }
5287
+ return value;
5288
+ });
5289
+ }
5136
5290
  var TelemetryEmitter = class {
5137
5291
  config;
5138
5292
  constructor(config) {
@@ -5149,7 +5303,7 @@ var TelemetryEmitter = class {
5149
5303
  if (this.config?.otlp) {
5150
5304
  await this.sendOtlp(event);
5151
5305
  }
5152
- process.stdout.write(`[event] ${event.type} ${JSON.stringify(event)}
5306
+ process.stdout.write(`[event] ${event.type} ${sanitizeEventForLog(event)}
5153
5307
  `);
5154
5308
  }
5155
5309
  async sendOtlp(event) {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@poncho-ai/harness",
3
- "version": "0.14.2",
3
+ "version": "0.15.1",
4
4
  "description": "Agent execution runtime - conversation loop, tool dispatch, streaming",
5
5
  "repository": {
6
6
  "type": "git",
@@ -31,7 +31,7 @@
31
31
  "redis": "^5.10.0",
32
32
  "yaml": "^2.4.0",
33
33
  "zod": "^3.22.0",
34
- "@poncho-ai/sdk": "1.0.3"
34
+ "@poncho-ai/sdk": "1.1.1"
35
35
  },
36
36
  "devDependencies": {
37
37
  "@types/mustache": "^4.2.6",
package/src/config.ts CHANGED
@@ -114,6 +114,18 @@ export interface PonchoConfig extends McpConfig {
114
114
  };
115
115
  /** Set to `false` to disable the built-in web UI (headless / API-only mode). */
116
116
  webUi?: false;
117
+ /** Enable browser automation tools. Set `true` for defaults, or provide config. */
118
+ browser?:
119
+ | boolean
120
+ | {
121
+ viewport?: { width?: number; height?: number };
122
+ quality?: number;
123
+ everyNthFrame?: number;
124
+ profileDir?: string;
125
+ sessionName?: string;
126
+ executablePath?: string;
127
+ headless?: boolean;
128
+ };
117
129
  }
118
130
 
119
131
  const resolveTtl = (
package/src/harness.ts CHANGED
@@ -435,6 +435,49 @@ Since all fields have defaults, you only need to specify \`*Env\` when your env
435
435
  - Prefer quoting concrete commands and examples from \`README.md\` over guessing.
436
436
  - Keep edits minimal, preserve unrelated settings/code, and summarize what changed.`;
437
437
 
438
+ /**
439
+ * Detect FileContentPart objects ({ type:"file", data, mediaType }) in a tool
440
+ * output value and split them into:
441
+ * - `mediaItems` – items suitable for the AI SDK multi-part `content` output
442
+ * (images become proper vision tokens, not base64 text).
443
+ * - `strippedOutput` – the original output with base64 `data` fields replaced
444
+ * by a short placeholder so the stored conversation stays small.
445
+ */
446
+ function extractMediaFromToolOutput(output: unknown): {
447
+ mediaItems: Array<{ type: "media"; data: string; mediaType: string }>;
448
+ strippedOutput: unknown;
449
+ } {
450
+ const mediaItems: Array<{ type: "media"; data: string; mediaType: string }> = [];
451
+
452
+ function walk(node: unknown): unknown {
453
+ if (node === null || node === undefined) return node;
454
+ if (Array.isArray(node)) return node.map(walk);
455
+ if (typeof node === "object") {
456
+ const obj = node as Record<string, unknown>;
457
+ if (
458
+ obj.type === "file" &&
459
+ typeof obj.data === "string" &&
460
+ typeof obj.mediaType === "string" &&
461
+ (obj.mediaType as string).startsWith("image/")
462
+ ) {
463
+ mediaItems.push({
464
+ type: "media",
465
+ data: obj.data as string,
466
+ mediaType: obj.mediaType as string,
467
+ });
468
+ return { type: "file", mediaType: obj.mediaType, filename: obj.filename ?? "image", _stripped: true };
469
+ }
470
+ const out: Record<string, unknown> = {};
471
+ for (const [k, v] of Object.entries(obj)) out[k] = walk(v);
472
+ return out;
473
+ }
474
+ return node;
475
+ }
476
+
477
+ const strippedOutput = walk(output);
478
+ return { mediaItems, strippedOutput };
479
+ }
480
+
438
481
  export class AgentHarness {
439
482
  private readonly workingDir: string;
440
483
  private readonly environment: HarnessOptions["environment"];
@@ -451,6 +494,11 @@ export class AgentHarness {
451
494
  private readonly registeredMcpToolNames = new Set<string>();
452
495
  private latitudeTelemetry?: LatitudeTelemetry;
453
496
  private insideTelemetryCapture = false;
497
+ private _browserSession?: unknown;
498
+ private _browserMod?: {
499
+ createBrowserTools: (getSession: () => unknown, getConversationId: () => string) => ToolDefinition[];
500
+ BrowserSession: new (sessionId: string, config: Record<string, unknown>) => unknown;
501
+ };
454
502
 
455
503
  private parsedAgent?: ParsedAgent;
456
504
  private mcpBridge?: LocalMcpBridge;
@@ -816,6 +864,16 @@ export class AgentHarness {
816
864
  }),
817
865
  );
818
866
  }
867
+
868
+ if (config?.browser) {
869
+ await this.initBrowserTools(config)
870
+ .catch((e) => {
871
+ console.warn(
872
+ `[poncho][browser] Failed to load browser tools: ${e instanceof Error ? e.message : String(e)}`,
873
+ );
874
+ });
875
+ }
876
+
819
877
  await bridge.startLocalServers();
820
878
  await bridge.discoverTools();
821
879
  await this.refreshMcpTools("initialize");
@@ -856,7 +914,68 @@ export class AgentHarness {
856
914
  }
857
915
  }
858
916
 
917
+ private async initBrowserTools(config: PonchoConfig): Promise<void> {
918
+ const spec = ["@poncho-ai", "browser"].join("/");
919
+ let browserMod: {
920
+ createBrowserTools: (getSession: () => unknown, getConversationId: () => string) => ToolDefinition[];
921
+ BrowserSession: new (sessionId: string, cfg?: Record<string, unknown>) => unknown;
922
+ };
923
+ try {
924
+ // Resolve from the agent project's node_modules (not the harness dist
925
+ // location). Walk up from workingDir the same way Node's resolution
926
+ // algorithm does, then dynamically import the ESM entry point.
927
+ const { existsSync } = await import("node:fs");
928
+ const { join, dirname } = await import("node:path");
929
+ const { pathToFileURL } = await import("node:url");
930
+
931
+ let searchDir = this.workingDir;
932
+ let entryPath: string | undefined;
933
+ for (;;) {
934
+ const candidate = join(searchDir, "node_modules", "@poncho-ai", "browser", "dist", "index.js");
935
+ if (existsSync(candidate)) { entryPath = candidate; break; }
936
+ const parent = dirname(searchDir);
937
+ if (parent === searchDir) break;
938
+ searchDir = parent;
939
+ }
940
+ if (!entryPath) throw new Error("not installed");
941
+ browserMod = await import(pathToFileURL(entryPath).href);
942
+ } catch {
943
+ throw new Error(
944
+ `browser: true is set in poncho.config but @poncho-ai/browser is not installed.\n` +
945
+ ` Run: pnpm add @poncho-ai/browser`,
946
+ );
947
+ }
948
+
949
+ this._browserMod = browserMod;
950
+ const browserCfg = typeof config.browser === "object" ? config.browser : {};
951
+ const agentId = this.parsedAgent?.frontmatter.id ?? this.parsedAgent?.frontmatter.name ?? "default";
952
+ const session = new browserMod.BrowserSession(`poncho-${agentId}`, browserCfg);
953
+ this._browserSession = session;
954
+
955
+ const tools = browserMod.createBrowserTools(
956
+ () => session,
957
+ () => this._currentRunConversationId ?? "__default__",
958
+ );
959
+ for (const tool of tools) {
960
+ if (this.isToolEnabled(tool.name)) {
961
+ this.registerIfMissing(tool);
962
+ }
963
+ }
964
+ }
965
+
966
+ /** Conversation ID of the currently executing run (set during run, cleared after). */
967
+ private _currentRunConversationId?: string;
968
+
969
+ get browserSession(): unknown {
970
+ return this._browserSession;
971
+ }
972
+
859
973
  async shutdown(): Promise<void> {
974
+ if (this._browserSession) {
975
+ try { await (this._browserSession as { close(): Promise<void> }).close(); } catch { /* best-effort */ }
976
+ this._browserSession = undefined;
977
+ }
978
+
860
979
  await this.mcpBridge?.stopLocalServers();
861
980
  if (this.latitudeTelemetry) {
862
981
  await this.latitudeTelemetry.shutdown().catch((err) => {
@@ -966,6 +1085,9 @@ export class AgentHarness {
966
1085
  }
967
1086
  await this.refreshSkillsIfChanged();
968
1087
 
1088
+ // Track which conversation this run belongs to so browser tools resolve the right session
1089
+ this._currentRunConversationId = input.conversationId;
1090
+
969
1091
  const agent = this.parsedAgent as ParsedAgent;
970
1092
  const runId = `run_${randomUUID()}`;
971
1093
  const start = now();
@@ -1041,6 +1163,34 @@ ${boundedMainMemory.trim()}`
1041
1163
  contextWindow,
1042
1164
  });
1043
1165
 
1166
+ // Subscribe to browser frame/status events for this conversation's tab.
1167
+ const browserEventQueue: AgentEvent[] = [];
1168
+ const browserCleanups: Array<() => void> = [];
1169
+ const browserSession = this._browserSession as
1170
+ | { onFrame: (cid: string, cb: (f: { data: string; width: number; height: number }) => void) => () => void;
1171
+ onStatus: (cid: string, cb: (s: { active: boolean; url?: string; interactionAllowed: boolean }) => void) => () => void;
1172
+ saveState: (path: string) => Promise<void>;
1173
+ close: () => Promise<void>;
1174
+ profileDir: string;
1175
+ isLaunched: boolean }
1176
+ | undefined;
1177
+ const conversationId = input.conversationId ?? "__default__";
1178
+ if (browserSession) {
1179
+ browserCleanups.push(
1180
+ browserSession.onFrame(conversationId, (frame) => {
1181
+ browserEventQueue.push({ type: "browser:frame", data: frame.data, width: frame.width, height: frame.height });
1182
+ }),
1183
+ browserSession.onStatus(conversationId, (status) => {
1184
+ browserEventQueue.push({ type: "browser:status", ...status });
1185
+ }),
1186
+ );
1187
+ }
1188
+ const drainBrowserEvents = function* (): Generator<AgentEvent> {
1189
+ while (browserEventQueue.length > 0) {
1190
+ yield browserEventQueue.shift()!;
1191
+ }
1192
+ };
1193
+
1044
1194
  if (input.task != null) {
1045
1195
  if (input.files && input.files.length > 0) {
1046
1196
  const parts: ContentPart[] = [
@@ -1088,6 +1238,7 @@ ${boundedMainMemory.trim()}`
1088
1238
 
1089
1239
  for (let step = 1; step <= maxSteps; step += 1) {
1090
1240
  try {
1241
+ yield* drainBrowserEvents();
1091
1242
  if (isCancelled()) {
1092
1243
  yield emitCancellation();
1093
1244
  return;
@@ -1145,8 +1296,22 @@ ${boundedMainMemory.trim()}`
1145
1296
  // Convert messages to ModelMessage format
1146
1297
  const convertMessage = async (msg: Message): Promise<ModelMessage[]> => {
1147
1298
  if (msg.role === "tool") {
1148
- // Tool messages are provider-sensitive; skip malformed historical records
1149
- // instead of failing the entire run continuation.
1299
+ // When rich (multi-part) tool results are attached from the
1300
+ // current run, use them directly they include proper image
1301
+ // content blocks instead of base64 text.
1302
+ const meta = msg.metadata as Record<string, unknown> | undefined;
1303
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
1304
+ const rich = (meta as any)?._richToolResults as unknown[] | undefined;
1305
+ if (rich && rich.length > 0) {
1306
+ // The rich array already conforms to the AI SDK ToolContent shape
1307
+ // (tool-result parts with multi-part content outputs). Cast
1308
+ // through `any` because the exact generic types are internal.
1309
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
1310
+ return [{ role: "tool" as const, content: rich as any }];
1311
+ }
1312
+
1313
+ // Fallback for historical messages loaded from storage (base64
1314
+ // already stripped, so this is always token-safe).
1150
1315
  const textContent = typeof msg.content === "string" ? msg.content : getTextContent(msg);
1151
1316
  try {
1152
1317
  const parsed: unknown = JSON.parse(textContent);
@@ -1171,7 +1336,6 @@ ${boundedMainMemory.trim()}`
1171
1336
  return [{
1172
1337
  role: "tool" as const,
1173
1338
  content: toolResults.map((tr) => {
1174
- // Parse JSON content for successful results, keep error messages as strings.
1175
1339
  if (tr.content.startsWith("Tool error:")) {
1176
1340
  return {
1177
1341
  type: "tool-result" as const,
@@ -1589,6 +1753,17 @@ ${boundedMainMemory.trim()}`
1589
1753
  content: string;
1590
1754
  }> = [];
1591
1755
 
1756
+ // Rich tool results that use multi-part content for images (proper
1757
+ // vision tokens instead of base64 text). Used for the *current* step
1758
+ // model call; the `toolResultsForModel` array holds the storage-safe
1759
+ // version with base64 stripped.
1760
+ const richToolResults: Array<{
1761
+ type: "tool-result";
1762
+ toolCallId: string;
1763
+ toolName: string;
1764
+ output: { type: "json"; value: unknown } | { type: "content"; value: Array<{ type: "text"; text: string } | { type: "media"; data: string; mediaType: string }> };
1765
+ }> = [];
1766
+
1592
1767
  const approvedCalls: Array<{
1593
1768
  id: string;
1594
1769
  name: string;
@@ -1697,6 +1872,12 @@ ${boundedMainMemory.trim()}`
1697
1872
  tool_name: result.tool,
1698
1873
  content: `Tool error: ${result.error}`,
1699
1874
  });
1875
+ richToolResults.push({
1876
+ type: "tool-result",
1877
+ toolCallId: result.callId,
1878
+ toolName: result.tool,
1879
+ output: { type: "json", value: { error: result.error } },
1880
+ });
1700
1881
  } else {
1701
1882
  span?.end({ result: { value: result.output ?? null, isError: false } });
1702
1883
  yield pushEvent({
@@ -1705,12 +1886,36 @@ ${boundedMainMemory.trim()}`
1705
1886
  output: result.output,
1706
1887
  duration: now() - batchStart,
1707
1888
  });
1889
+
1890
+ const { mediaItems, strippedOutput } = extractMediaFromToolOutput(result.output);
1708
1891
  toolResultsForModel.push({
1709
1892
  type: "tool_result",
1710
1893
  tool_use_id: result.callId,
1711
1894
  tool_name: result.tool,
1712
- content: JSON.stringify(result.output ?? null),
1895
+ content: JSON.stringify(strippedOutput ?? null),
1713
1896
  });
1897
+
1898
+ if (mediaItems.length > 0) {
1899
+ richToolResults.push({
1900
+ type: "tool-result",
1901
+ toolCallId: result.callId,
1902
+ toolName: result.tool,
1903
+ output: {
1904
+ type: "content",
1905
+ value: [
1906
+ { type: "text", text: JSON.stringify(strippedOutput ?? null) },
1907
+ ...mediaItems,
1908
+ ],
1909
+ },
1910
+ });
1911
+ } else {
1912
+ richToolResults.push({
1913
+ type: "tool-result",
1914
+ toolCallId: result.callId,
1915
+ toolName: result.tool,
1916
+ output: { type: "json", value: result.output ?? null },
1917
+ });
1918
+ }
1714
1919
  }
1715
1920
  }
1716
1921
 
@@ -1731,10 +1936,11 @@ ${boundedMainMemory.trim()}`
1731
1936
  content: assistantContent,
1732
1937
  metadata: { timestamp: now(), id: randomUUID(), step },
1733
1938
  });
1939
+ const toolMsgMeta: Record<string, unknown> = { timestamp: now(), id: randomUUID(), step, _richToolResults: richToolResults };
1734
1940
  messages.push({
1735
1941
  role: "tool",
1736
1942
  content: JSON.stringify(toolResultsForModel),
1737
- metadata: { timestamp: now(), id: randomUUID(), step },
1943
+ metadata: toolMsgMeta as Message["metadata"],
1738
1944
  });
1739
1945
 
1740
1946
  yield pushEvent({
@@ -1793,6 +1999,10 @@ ${boundedMainMemory.trim()}`
1793
1999
  },
1794
2000
  });
1795
2001
  }
2002
+
2003
+ // Drain any remaining browser events and clean up subscriptions
2004
+ yield* drainBrowserEvents();
2005
+ for (const cleanup of browserCleanups) cleanup();
1796
2006
  }
1797
2007
 
1798
2008
  async executeTools(
package/src/telemetry.ts CHANGED
@@ -1,5 +1,16 @@
1
1
  import type { AgentEvent } from "@poncho-ai/sdk";
2
2
 
3
+ const MAX_FIELD_LENGTH = 200;
4
+
5
+ function sanitizeEventForLog(event: AgentEvent): string {
6
+ return JSON.stringify(event, (_key, value) => {
7
+ if (typeof value === "string" && value.length > MAX_FIELD_LENGTH) {
8
+ return `${value.slice(0, 80)}...[${value.length} chars]`;
9
+ }
10
+ return value;
11
+ });
12
+ }
13
+
3
14
  export interface TelemetryConfig {
4
15
  enabled?: boolean;
5
16
  otlp?: string;
@@ -33,7 +44,8 @@ export class TelemetryEmitter {
33
44
  // Latitude telemetry is handled by LatitudeTelemetry (from
34
45
  // @latitude-data/telemetry) via harness.runWithTelemetry().
35
46
  // Default behavior in local dev: print concise structured logs.
36
- process.stdout.write(`[event] ${event.type} ${JSON.stringify(event)}\n`);
47
+ // Strip large binary payloads (e.g. base64 images) to keep logs readable.
48
+ process.stdout.write(`[event] ${event.type} ${sanitizeEventForLog(event)}\n`);
37
49
  }
38
50
 
39
51
  private async sendOtlp(event: AgentEvent): Promise<void> {