agiagent-dev 2026.1.32 → 2026.1.35

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -768,12 +768,16 @@ export function createExecTool(defaults) {
768
768
  // Fall back to requiring approval if node approvals cannot be fetched.
769
769
  }
770
770
  }
771
- const requiresAsk = requiresExecApproval({
772
- ask: hostAsk,
773
- security: hostSecurity,
774
- analysisOk,
775
- allowlistSatisfied,
776
- });
771
+ // In hosted mode, skip approval entirely - commands are pre-authorized by the gateway
772
+ const hostedAutoApprove = process.env.AGIAGENT_HOSTED_MODE === "1";
773
+ const requiresAsk = hostedAutoApprove
774
+ ? false
775
+ : requiresExecApproval({
776
+ ask: hostAsk,
777
+ security: hostSecurity,
778
+ analysisOk,
779
+ allowlistSatisfied,
780
+ });
777
781
  const commandText = params.command;
778
782
  const invokeTimeoutMs = Math.max(10_000, (typeof params.timeout === "number" ? params.timeout : defaultTimeoutSec) * 1000 + 5_000);
779
783
  const buildInvokeParams = (approvedByAsk, approvalDecision, runId) => ({
@@ -782,7 +786,9 @@ export function createExecTool(defaults) {
782
786
  params: {
783
787
  command: argv,
784
788
  rawCommand: params.command,
785
- cwd: workdir,
789
+ // Don't pass gateway's cwd to remote nodes - use null to let node use its own cwd
790
+ // The gateway's cwd (e.g., /app or /data on Fly.io) doesn't exist on user's machine
791
+ cwd: params.workdir?.trim() || null,
786
792
  env: nodeEnv,
787
793
  timeoutMs: typeof params.timeout === "number" ? params.timeout * 1000 : undefined,
788
794
  agentId,
@@ -790,6 +796,8 @@ export function createExecTool(defaults) {
790
796
  approved: approvedByAsk,
791
797
  approvalDecision: approvalDecision ?? undefined,
792
798
  runId: runId ?? undefined,
799
+ // Tell node to skip macOS app exec host and use direct spawn in hosted mode
800
+ hostedMode: process.env.AGIAGENT_HOSTED_MODE === "1" ? true : undefined,
793
801
  },
794
802
  idempotencyKey: crypto.randomUUID(),
795
803
  });
@@ -893,7 +901,8 @@ export function createExecTool(defaults) {
893
901
  };
894
902
  }
895
903
  const startedAt = Date.now();
896
- const raw = await callGatewayTool("node.invoke", { timeoutMs: invokeTimeoutMs }, buildInvokeParams(false, null));
904
+ // In hosted mode, hostedAutoApprove is true, so we send approved=true to the node
905
+ const raw = await callGatewayTool("node.invoke", { timeoutMs: invokeTimeoutMs }, buildInvokeParams(hostedAutoApprove, hostedAutoApprove ? "allow-once" : null));
897
906
  const payload = raw && typeof raw === "object" ? raw.payload : undefined;
898
907
  const payloadObj = payload && typeof payload === "object" ? payload : {};
899
908
  const stdout = typeof payloadObj.stdout === "string" ? payloadObj.stdout : "";
@@ -314,6 +314,23 @@ export function buildAgentSystemPrompt(params) {
314
314
  "",
315
315
  ...skillsSection,
316
316
  ...memorySection,
317
+ // File type guidance for document handling
318
+ !isMinimal ? "## File Type Guidance" : "",
319
+ !isMinimal
320
+ ? [
321
+ "When working with specific file types, ALWAYS read the corresponding skill BEFORE any editing:",
322
+ "",
323
+ "### Word Documents (.docx)",
324
+ "1. Find the `docx` skill in <available_skills> above",
325
+ `2. Use \`${readToolName}\` to read the skill's SKILL.md at its <location>`,
326
+ "3. Follow ALL instructions in that skill - it contains CRITICAL rules",
327
+ "4. Use python-docx (NOT raw XML/zipfile manipulation)",
328
+ "5. Count paragraphs/bullets before AND after editing - counts must match unless user requested reduction",
329
+ "",
330
+ "NEVER skip reading the skill. NEVER use zipfile+ElementTree for .docx files.",
331
+ ].join("\n")
332
+ : "",
333
+ !isMinimal ? "" : "",
317
334
  // Skip self-update for subagent/none modes
318
335
  hasGateway && !isMinimal ? "## AGIAgent Self-Update" : "",
319
336
  hasGateway && !isMinimal
@@ -19,19 +19,34 @@ async function resolveBrowserNodeTarget(params) {
19
19
  const cfg = loadConfig();
20
20
  const policy = cfg.gateway?.nodes?.browser;
21
21
  const mode = policy?.mode ?? "auto";
22
+ // In hosted mode, prefer node browser since there's no local browser on the gateway
23
+ const isHostedMode = process.env.AGIAGENT_HOSTED_MODE === "1";
22
24
  if (mode === "off") {
23
25
  if (params.target === "node" || params.requestedNode) {
24
26
  throw new Error("Node browser proxy is disabled (gateway.nodes.browser.mode=off).");
25
27
  }
26
- return null;
28
+ // In hosted mode, we still need a node even if mode is off (no local browser)
29
+ if (!isHostedMode) {
30
+ return null;
31
+ }
27
32
  }
28
33
  if (params.sandboxBridgeUrl?.trim() && params.target !== "node" && !params.requestedNode) {
29
- return null;
34
+ // Sandbox browser available, but in hosted mode prefer node if no explicit target
35
+ if (!isHostedMode) {
36
+ return null;
37
+ }
30
38
  }
31
39
  if (params.target && params.target !== "node") {
32
- return null;
40
+ // Explicit non-node target requested
41
+ // In hosted mode with target=host, fall through to check for nodes since host won't work
42
+ if (params.target === "host" && isHostedMode) {
43
+ // Let it fall through to try node browser
44
+ }
45
+ else {
46
+ return null;
47
+ }
33
48
  }
34
- if (mode === "manual" && params.target !== "node" && !params.requestedNode) {
49
+ if (mode === "manual" && params.target !== "node" && !params.requestedNode && !isHostedMode) {
35
50
  return null;
36
51
  }
37
52
  const nodes = await listNodes({});
@@ -40,6 +55,10 @@ async function resolveBrowserNodeTarget(params) {
40
55
  if (params.target === "node" || params.requestedNode) {
41
56
  throw new Error("No connected browser-capable nodes.");
42
57
  }
58
+ // In hosted mode, we need a node browser but none available
59
+ if (isHostedMode) {
60
+ throw new Error("No connected browser-capable nodes. Connect a device with browser support to use browser features.");
61
+ }
43
62
  return null;
44
63
  }
45
64
  const requested = params.requestedNode?.trim() || policy?.node?.trim();
@@ -48,7 +67,7 @@ async function resolveBrowserNodeTarget(params) {
48
67
  const node = browserNodes.find((entry) => entry.nodeId === nodeId);
49
68
  return { nodeId, label: node?.displayName ?? node?.remoteIp ?? nodeId };
50
69
  }
51
- if (params.target === "node") {
70
+ if (params.target === "node" || isHostedMode) {
52
71
  if (browserNodes.length === 1) {
53
72
  const node = browserNodes[0];
54
73
  return { nodeId: node.nodeId, label: node.displayName ?? node.remoteIp ?? node.nodeId };
@@ -147,7 +166,7 @@ export function createBrowserTool(opts) {
147
166
  label: "Browser",
148
167
  name: "browser",
149
168
  description: [
150
- "Control the browser via AGIAgent's browser control server (status/start/stop/profiles/tabs/open/snapshot/screenshot/actions).",
169
+ "Control the browser via AGIAgent's browser control server (status/start/stop/profiles/tabs/open/snapshot/screenshot/actions/list_upload_inputs/attach_file).",
151
170
  'Profiles: use profile="chrome" for Chrome extension relay takeover (your existing Chrome tabs). Use profile="agiagent" for the isolated agiagent-managed browser.',
152
171
  'If the user mentions the Chrome extension / Browser Relay / toolbar button / “attach tab”, ALWAYS use profile="chrome" (do not ask which profile).',
153
172
  'When a node-hosted browser proxy is available, the tool may auto-route to it. Pin a node with node=<id|name> or target="node".',
@@ -595,6 +614,44 @@ export function createBrowserTool(opts) {
595
614
  throw err;
596
615
  }
597
616
  }
617
+ case "list_upload_inputs": {
618
+ const targetId = typeof params.targetId === "string" ? params.targetId.trim() : undefined;
619
+ if (proxyRequest) {
620
+ const result = await proxyRequest({
621
+ method: "GET",
622
+ path: "/upload-inputs",
623
+ profile,
624
+ query: { targetId },
625
+ });
626
+ return jsonResult(result);
627
+ }
628
+ // Local execution not yet implemented - requires browser control server
629
+ throw new Error("list_upload_inputs requires a connected browser node");
630
+ }
631
+ case "attach_file": {
632
+ const targetId = typeof params.targetId === "string" ? params.targetId.trim() : undefined;
633
+ const inputIndex = typeof params.inputIndex === "number" && Number.isFinite(params.inputIndex)
634
+ ? params.inputIndex
635
+ : undefined;
636
+ const filePath = readStringParam(params, "filePath");
637
+ if (inputIndex === undefined) {
638
+ throw new Error("inputIndex is required");
639
+ }
640
+ if (!filePath) {
641
+ throw new Error("filePath is required");
642
+ }
643
+ if (proxyRequest) {
644
+ const result = await proxyRequest({
645
+ method: "POST",
646
+ path: "/attach-file",
647
+ profile,
648
+ body: { targetId, inputIndex, filePath },
649
+ });
650
+ return jsonResult(result);
651
+ }
652
+ // Local execution not yet implemented - requires browser control server
653
+ throw new Error("attach_file requires a connected browser node");
654
+ }
598
655
  default:
599
656
  throw new Error(`Unknown action: ${action}`);
600
657
  }
@@ -1,5 +1,5 @@
1
1
  export declare const BrowserToolSchema: import("@sinclair/typebox").TObject<{
2
- action: import("@sinclair/typebox").TUnsafe<"close" | "status" | "start" | "open" | "navigate" | "profiles" | "upload" | "snapshot" | "stop" | "tabs" | "focus" | "screenshot" | "console" | "pdf" | "dialog" | "act">;
2
+ action: import("@sinclair/typebox").TUnsafe<"close" | "status" | "start" | "open" | "navigate" | "profiles" | "upload" | "snapshot" | "stop" | "tabs" | "focus" | "screenshot" | "console" | "pdf" | "dialog" | "act" | "list_upload_inputs" | "attach_file">;
3
3
  target: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TUnsafe<"sandbox" | "node" | "host">>;
4
4
  node: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TString>;
5
5
  profile: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TString>;
@@ -47,4 +47,6 @@ export declare const BrowserToolSchema: import("@sinclair/typebox").TObject<{
47
47
  textGone: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TString>;
48
48
  fn: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TString>;
49
49
  }>>;
50
+ inputIndex: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TNumber>;
51
+ filePath: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TString>;
50
52
  }>;
@@ -30,6 +30,8 @@ const BROWSER_TOOL_ACTIONS = [
30
30
  "upload",
31
31
  "dialog",
32
32
  "act",
33
+ "list_upload_inputs",
34
+ "attach_file",
33
35
  ];
34
36
  const BROWSER_TARGETS = ["sandbox", "host", "node"];
35
37
  const BROWSER_SNAPSHOT_FORMATS = ["aria", "ai"];
@@ -102,4 +104,7 @@ export const BrowserToolSchema = Type.Object({
102
104
  accept: Type.Optional(Type.Boolean()),
103
105
  promptText: Type.Optional(Type.String()),
104
106
  request: Type.Optional(BrowserActSchema),
107
+ // attach_file action
108
+ inputIndex: Type.Optional(Type.Number()),
109
+ filePath: Type.Optional(Type.String()),
105
110
  });
@@ -364,6 +364,8 @@ export function createNodesTool(options) {
364
364
  const needsScreenRecording = typeof params.needsScreenRecording === "boolean"
365
365
  ? params.needsScreenRecording
366
366
  : undefined;
367
+ // In hosted mode, auto-approve commands - they're pre-authorized by the gateway
368
+ const isHostedMode = process.env.AGIAGENT_HOSTED_MODE === "1";
367
369
  const raw = await callGatewayTool("node.invoke", gatewayOpts, {
368
370
  nodeId,
369
371
  command: "system.run",
@@ -375,6 +377,11 @@ export function createNodesTool(options) {
375
377
  needsScreenRecording,
376
378
  agentId,
377
379
  sessionKey,
380
+ // Hosted mode: pass approval flags so node skips its approval check
381
+ approved: isHostedMode ? true : undefined,
382
+ approvalDecision: isHostedMode ? "allow-once" : undefined,
383
+ // Tell node to skip macOS app exec host and use direct spawn
384
+ hostedMode: isHostedMode ? true : undefined,
378
385
  },
379
386
  timeoutMs: invokeTimeoutMs,
380
387
  idempotencyKey: crypto.randomUUID(),
@@ -1,2 +1,3 @@
1
1
  export { type BrowserConsoleMessage, closePageByTargetIdViaPlaywright, closePlaywrightBrowserConnection, createPageViaPlaywright, ensurePageState, focusPageByTargetIdViaPlaywright, getPageForTargetId, listPagesViaPlaywright, refLocator, type WithSnapshotForAI, } from "./pw-session.js";
2
2
  export { armDialogViaPlaywright, armFileUploadViaPlaywright, clickViaPlaywright, closePageViaPlaywright, cookiesClearViaPlaywright, cookiesGetViaPlaywright, cookiesSetViaPlaywright, downloadViaPlaywright, dragViaPlaywright, emulateMediaViaPlaywright, evaluateViaPlaywright, fillFormViaPlaywright, getConsoleMessagesViaPlaywright, getNetworkRequestsViaPlaywright, getPageErrorsViaPlaywright, highlightViaPlaywright, hoverViaPlaywright, navigateViaPlaywright, pdfViaPlaywright, pressKeyViaPlaywright, resizeViewportViaPlaywright, responseBodyViaPlaywright, scrollIntoViewViaPlaywright, selectOptionViaPlaywright, setDeviceViaPlaywright, setExtraHTTPHeadersViaPlaywright, setGeolocationViaPlaywright, setHttpCredentialsViaPlaywright, setInputFilesViaPlaywright, setLocaleViaPlaywright, setOfflineViaPlaywright, setTimezoneViaPlaywright, snapshotAiViaPlaywright, snapshotAriaViaPlaywright, snapshotRoleViaPlaywright, screenshotWithLabelsViaPlaywright, storageClearViaPlaywright, storageGetViaPlaywright, storageSetViaPlaywright, takeScreenshotViaPlaywright, traceStartViaPlaywright, traceStopViaPlaywright, typeViaPlaywright, waitForDownloadViaPlaywright, waitForViaPlaywright, } from "./pw-tools-core.js";
3
+ export { type FileInputInfo, type ListFileInputsResult, type AttachFileResult, listFileInputsViaPlaywright, attachFileToInputViaPlaywright, isFileTypeAccepted, } from "./pw-tools-core.uploads.js";
@@ -1,2 +1,3 @@
1
1
  export { closePageByTargetIdViaPlaywright, closePlaywrightBrowserConnection, createPageViaPlaywright, ensurePageState, focusPageByTargetIdViaPlaywright, getPageForTargetId, listPagesViaPlaywright, refLocator, } from "./pw-session.js";
2
2
  export { armDialogViaPlaywright, armFileUploadViaPlaywright, clickViaPlaywright, closePageViaPlaywright, cookiesClearViaPlaywright, cookiesGetViaPlaywright, cookiesSetViaPlaywright, downloadViaPlaywright, dragViaPlaywright, emulateMediaViaPlaywright, evaluateViaPlaywright, fillFormViaPlaywright, getConsoleMessagesViaPlaywright, getNetworkRequestsViaPlaywright, getPageErrorsViaPlaywright, highlightViaPlaywright, hoverViaPlaywright, navigateViaPlaywright, pdfViaPlaywright, pressKeyViaPlaywright, resizeViewportViaPlaywright, responseBodyViaPlaywright, scrollIntoViewViaPlaywright, selectOptionViaPlaywright, setDeviceViaPlaywright, setExtraHTTPHeadersViaPlaywright, setGeolocationViaPlaywright, setHttpCredentialsViaPlaywright, setInputFilesViaPlaywright, setLocaleViaPlaywright, setOfflineViaPlaywright, setTimezoneViaPlaywright, snapshotAiViaPlaywright, snapshotAriaViaPlaywright, snapshotRoleViaPlaywright, screenshotWithLabelsViaPlaywright, storageClearViaPlaywright, storageGetViaPlaywright, storageSetViaPlaywright, takeScreenshotViaPlaywright, traceStartViaPlaywright, traceStopViaPlaywright, typeViaPlaywright, waitForDownloadViaPlaywright, waitForViaPlaywright, } from "./pw-tools-core.js";
3
+ export { listFileInputsViaPlaywright, attachFileToInputViaPlaywright, isFileTypeAccepted, } from "./pw-tools-core.uploads.js";
@@ -0,0 +1,51 @@
1
+ /**
2
+ * File upload discovery and attachment functions for browser automation.
3
+ *
4
+ * These functions support the LLM-driven file upload workflow:
5
+ * 1. listFileInputsViaPlaywright() - discovers all file inputs on page + iframes
6
+ * 2. attachFileToInputViaPlaywright() - uploads file to a specific input by index
7
+ */
8
+ export type FileInputInfo = {
9
+ index: number;
10
+ id: string;
11
+ name: string;
12
+ accept: string;
13
+ multiple: boolean;
14
+ frameUrl: string;
15
+ nearbyText: string;
16
+ ariaLabel: string | null;
17
+ };
18
+ export type ListFileInputsResult = {
19
+ inputs: FileInputInfo[];
20
+ targetId?: string;
21
+ };
22
+ export type AttachFileResult = {
23
+ success: boolean;
24
+ inputIndex: number;
25
+ inputId: string;
26
+ fileName: string;
27
+ frameUrl: string;
28
+ };
29
+ /**
30
+ * Discovers all file inputs on the page and all iframes.
31
+ * Returns structured metadata that the LLM can use to decide which input to target.
32
+ */
33
+ export declare function listFileInputsViaPlaywright(opts: {
34
+ cdpUrl: string;
35
+ targetId?: string;
36
+ }): Promise<ListFileInputsResult>;
37
+ /**
38
+ * Attaches a file to a specific file input by index.
39
+ * The index corresponds to the index returned by listFileInputsViaPlaywright.
40
+ */
41
+ export declare function attachFileToInputViaPlaywright(opts: {
42
+ cdpUrl: string;
43
+ targetId?: string;
44
+ inputIndex: number;
45
+ filePath: string;
46
+ }): Promise<AttachFileResult>;
47
+ /**
48
+ * Validates that a file extension matches an accept attribute.
49
+ * Returns true if the file is allowed, false otherwise.
50
+ */
51
+ export declare function isFileTypeAccepted(filePath: string, accept: string): boolean;
@@ -0,0 +1,225 @@
1
+ /**
2
+ * File upload discovery and attachment functions for browser automation.
3
+ *
4
+ * These functions support the LLM-driven file upload workflow:
5
+ * 1. listFileInputsViaPlaywright() - discovers all file inputs on page + iframes
6
+ * 2. attachFileToInputViaPlaywright() - uploads file to a specific input by index
7
+ */
8
+ import { existsSync } from "node:fs";
9
+ import { basename, extname } from "node:path";
10
+ import { getPageForTargetId, ensurePageState } from "./pw-session.js";
11
+ /**
12
+ * Extracts metadata about file inputs from a page or frame.
13
+ */
14
+ async function extractFileInputsFromFrame(frame, frameUrl, startIndex) {
15
+ try {
16
+ const inputs = await frame.$$("input[type='file']");
17
+ const results = [];
18
+ for (let i = 0; i < inputs.length; i++) {
19
+ try {
20
+ const info = await inputs[i].evaluate((el) => {
21
+ // Get nearby text for context (parent, siblings, labels)
22
+ let nearbyText = "";
23
+ // Check for associated label
24
+ const labelFor = el.id ? document.querySelector(`label[for="${el.id}"]`) : null;
25
+ if (labelFor) {
26
+ nearbyText = labelFor.textContent?.trim().substring(0, 100) || "";
27
+ }
28
+ // Check parent for text
29
+ if (!nearbyText && el.parentElement) {
30
+ const parentText = el.parentElement.textContent?.trim() || "";
31
+ // Only use if not too long (avoid grabbing entire page text)
32
+ if (parentText.length < 200) {
33
+ nearbyText = parentText.substring(0, 100);
34
+ }
35
+ }
36
+ // Check previous sibling
37
+ if (!nearbyText && el.previousElementSibling) {
38
+ nearbyText = el.previousElementSibling.textContent?.trim().substring(0, 100) || "";
39
+ }
40
+ return {
41
+ id: el.id || "",
42
+ name: el.name || "",
43
+ accept: el.accept || "",
44
+ multiple: el.multiple,
45
+ ariaLabel: el.getAttribute("aria-label"),
46
+ nearbyText,
47
+ };
48
+ });
49
+ results.push({
50
+ index: startIndex + i,
51
+ id: info.id,
52
+ name: info.name,
53
+ accept: info.accept,
54
+ multiple: info.multiple,
55
+ frameUrl,
56
+ nearbyText: info.nearbyText,
57
+ ariaLabel: info.ariaLabel,
58
+ });
59
+ }
60
+ catch {
61
+ // Skip inputs that can't be accessed
62
+ }
63
+ }
64
+ return results;
65
+ }
66
+ catch {
67
+ return [];
68
+ }
69
+ }
70
+ /**
71
+ * Discovers all file inputs on the page and all iframes.
72
+ * Returns structured metadata that the LLM can use to decide which input to target.
73
+ */
74
+ export async function listFileInputsViaPlaywright(opts) {
75
+ const page = await getPageForTargetId(opts);
76
+ ensurePageState(page);
77
+ const allInputs = [];
78
+ // Get inputs from main page
79
+ const mainInputs = await extractFileInputsFromFrame(page, page.url(), 0);
80
+ allInputs.push(...mainInputs);
81
+ // Get inputs from all frames
82
+ const frames = page.frames();
83
+ for (const frame of frames) {
84
+ // Skip main frame (already processed) and empty frames
85
+ if (frame === page.mainFrame()) {
86
+ continue;
87
+ }
88
+ const frameUrl = frame.url();
89
+ if (!frameUrl || frameUrl === "about:blank") {
90
+ continue;
91
+ }
92
+ const frameInputs = await extractFileInputsFromFrame(frame, frameUrl, allInputs.length);
93
+ allInputs.push(...frameInputs);
94
+ }
95
+ return {
96
+ inputs: allInputs,
97
+ targetId: opts.targetId,
98
+ };
99
+ }
100
+ /**
101
+ * Internal: find the file input element by index across page and iframes.
102
+ */
103
+ async function findInputByIndex(page, targetIndex) {
104
+ let currentIndex = 0;
105
+ // Check main page first
106
+ const mainInputs = await page.$$("input[type='file']");
107
+ if (targetIndex < currentIndex + mainInputs.length) {
108
+ const localIndex = targetIndex - currentIndex;
109
+ return { element: mainInputs[localIndex], frame: page };
110
+ }
111
+ currentIndex += mainInputs.length;
112
+ // Check frames
113
+ const frames = page.frames();
114
+ for (const frame of frames) {
115
+ if (frame === page.mainFrame()) {
116
+ continue;
117
+ }
118
+ const frameUrl = frame.url();
119
+ if (!frameUrl || frameUrl === "about:blank") {
120
+ continue;
121
+ }
122
+ try {
123
+ const frameInputs = await frame.$$("input[type='file']");
124
+ if (targetIndex < currentIndex + frameInputs.length) {
125
+ const localIndex = targetIndex - currentIndex;
126
+ return { element: frameInputs[localIndex], frame };
127
+ }
128
+ currentIndex += frameInputs.length;
129
+ }
130
+ catch {
131
+ // Frame might be detached, continue
132
+ }
133
+ }
134
+ return null;
135
+ }
136
+ /**
137
+ * Attaches a file to a specific file input by index.
138
+ * The index corresponds to the index returned by listFileInputsViaPlaywright.
139
+ */
140
+ export async function attachFileToInputViaPlaywright(opts) {
141
+ const page = await getPageForTargetId(opts);
142
+ ensurePageState(page);
143
+ // Validate file exists
144
+ if (!existsSync(opts.filePath)) {
145
+ throw new Error(`File not found: ${opts.filePath}`);
146
+ }
147
+ // Find the input by index
148
+ const found = await findInputByIndex(page, opts.inputIndex);
149
+ if (!found || !found.element) {
150
+ throw new Error(`No file input found at index ${opts.inputIndex}. Run list_upload_inputs first.`);
151
+ }
152
+ const { element, frame } = found;
153
+ // Get input ID for the response
154
+ const inputId = await element.evaluate((el) => el.id || "(no id)");
155
+ // Set the file
156
+ await element.setInputFiles(opts.filePath);
157
+ // Dispatch change/input events for sites that need them
158
+ try {
159
+ await element.evaluate((el) => {
160
+ el.dispatchEvent(new Event("input", { bubbles: true }));
161
+ el.dispatchEvent(new Event("change", { bubbles: true }));
162
+ });
163
+ }
164
+ catch {
165
+ // Best-effort event dispatch
166
+ }
167
+ return {
168
+ success: true,
169
+ inputIndex: opts.inputIndex,
170
+ inputId,
171
+ fileName: basename(opts.filePath),
172
+ frameUrl: "url" in frame && typeof frame.url === "function" ? frame.url() : page.url(),
173
+ };
174
+ }
175
+ /**
176
+ * Validates that a file extension matches an accept attribute.
177
+ * Returns true if the file is allowed, false otherwise.
178
+ */
179
+ export function isFileTypeAccepted(filePath, accept) {
180
+ if (!accept || accept.trim() === "") {
181
+ return true; // No restriction
182
+ }
183
+ const fileExt = extname(filePath).toLowerCase();
184
+ const acceptedTypes = accept.split(",").map((t) => t.trim().toLowerCase());
185
+ for (const accepted of acceptedTypes) {
186
+ // Handle extension matches (.pdf, .doc, etc)
187
+ if (accepted.startsWith(".") && fileExt === accepted) {
188
+ return true;
189
+ }
190
+ // Handle MIME type matches (application/pdf, image/*, etc)
191
+ if (accepted.includes("/")) {
192
+ // Map common extensions to MIME types for basic matching
193
+ const mimeMap = {
194
+ ".pdf": ["application/pdf"],
195
+ ".doc": ["application/msword"],
196
+ ".docx": ["application/vnd.openxmlformats-officedocument.wordprocessingml.document"],
197
+ ".xls": ["application/vnd.ms-excel"],
198
+ ".xlsx": ["application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"],
199
+ ".png": ["image/png"],
200
+ ".jpg": ["image/jpeg"],
201
+ ".jpeg": ["image/jpeg"],
202
+ ".gif": ["image/gif"],
203
+ ".webp": ["image/webp"],
204
+ ".txt": ["text/plain"],
205
+ ".md": ["text/markdown", "text/plain"],
206
+ ".csv": ["text/csv"],
207
+ };
208
+ const fileMimes = mimeMap[fileExt] || [];
209
+ if (accepted.endsWith("/*")) {
210
+ // Wildcard like image/*
211
+ const prefix = accepted.replace("/*", "/");
212
+ if (fileMimes.some((m) => m.startsWith(prefix))) {
213
+ return true;
214
+ }
215
+ }
216
+ else {
217
+ // Exact MIME match
218
+ if (fileMimes.includes(accepted)) {
219
+ return true;
220
+ }
221
+ }
222
+ }
223
+ }
224
+ return false;
225
+ }
@@ -474,6 +474,61 @@ export function registerBrowserAgentActRoutes(app, ctx) {
474
474
  handleRouteError(ctx, res, err);
475
475
  }
476
476
  });
477
+ app.get("/upload-inputs", async (req, res) => {
478
+ const profileCtx = resolveProfileContext(req, res, ctx);
479
+ if (!profileCtx) {
480
+ return;
481
+ }
482
+ const targetId = toStringOrEmpty(req.query.targetId) || undefined;
483
+ try {
484
+ const tab = await profileCtx.ensureTabAvailable(targetId);
485
+ const pw = await requirePwAi(res, "list upload inputs");
486
+ if (!pw) {
487
+ return;
488
+ }
489
+ const result = await pw.listFileInputsViaPlaywright({
490
+ cdpUrl: profileCtx.profile.cdpUrl,
491
+ targetId: tab.targetId,
492
+ });
493
+ res.json({ ok: true, targetId: tab.targetId, ...result });
494
+ }
495
+ catch (err) {
496
+ handleRouteError(ctx, res, err);
497
+ }
498
+ });
499
+ app.post("/attach-file", async (req, res) => {
500
+ const profileCtx = resolveProfileContext(req, res, ctx);
501
+ if (!profileCtx) {
502
+ return;
503
+ }
504
+ const body = readBody(req);
505
+ const targetId = toStringOrEmpty(body.targetId) || undefined;
506
+ const inputIndex = toNumber(body.inputIndex);
507
+ const filePath = toStringOrEmpty(body.filePath);
508
+ if (inputIndex === undefined || inputIndex === null) {
509
+ return jsonError(res, 400, "inputIndex is required");
510
+ }
511
+ if (!filePath) {
512
+ return jsonError(res, 400, "filePath is required");
513
+ }
514
+ try {
515
+ const tab = await profileCtx.ensureTabAvailable(targetId);
516
+ const pw = await requirePwAi(res, "attach file");
517
+ if (!pw) {
518
+ return;
519
+ }
520
+ const result = await pw.attachFileToInputViaPlaywright({
521
+ cdpUrl: profileCtx.profile.cdpUrl,
522
+ targetId: tab.targetId,
523
+ inputIndex,
524
+ filePath,
525
+ });
526
+ res.json({ ok: true, targetId: tab.targetId, ...result });
527
+ }
528
+ catch (err) {
529
+ handleRouteError(ctx, res, err);
530
+ }
531
+ });
477
532
  app.post("/highlight", async (req, res) => {
478
533
  const profileCtx = resolveProfileContext(req, res, ctx);
479
534
  if (!profileCtx) {
@@ -1,5 +1,5 @@
1
1
  {
2
- "version": "2026.1.32",
3
- "commit": "63a5a6cdc28f72ec1bb0ec415ff148eba790b2d8",
4
- "builtAt": "2026-02-02T03:31:57.234Z"
2
+ "version": "2026.1.35",
3
+ "commit": "60d46f62b6417f469753f0bc785f4481b74c76eb",
4
+ "builtAt": "2026-02-03T06:50:10.937Z"
5
5
  }
@@ -1 +1 @@
1
- 0c8337264f78308ab09fce8a670fbfae17efd8cad8a60f1d46a50e9a48a5eec6
1
+ c2e45112d75a09927b99786eaa6500c5433a73d88440d47ec00a0598c2359d7f
@@ -44,7 +44,11 @@ ${theme.muted("Your WhatsApp messages will trigger AI that runs commands on this
44
44
  if (gatewayUrl.includes("://")) {
45
45
  const url = new URL(gatewayUrl);
46
46
  host = url.hostname;
47
- port = url.port ? parseInt(url.port, 10) : (url.protocol === "wss:" || url.protocol === "https:" ? 443 : 80);
47
+ port = url.port
48
+ ? parseInt(url.port, 10)
49
+ : url.protocol === "wss:" || url.protocol === "https:"
50
+ ? 443
51
+ : 80;
48
52
  useTls = url.protocol === "wss:" || url.protocol === "https:";
49
53
  }
50
54
  else if (gatewayUrl.includes(":")) {