@phnx-labs/agents-cli 1.17.4 → 1.17.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -1,5 +1,13 @@
1
1
  # Changelog
2
2
 
3
+ ## 1.17.6
4
+
5
+ **Workflows**
6
+
7
+ - New `workflows` skill — author-and-run guide for workflow bundles (`WORKFLOW.md` frontmatter, `subagents/` directory for multi-agent pipelines, scoped `skills/` and `plugins/`, sharing via `agents repo push` or GitHub install). Calls out the `--mode plan` deadlock that bites workflows which need to post comments or edit files.
8
+ - `agents workflows --help` rewritten with a structure diagram, project > user > system resolution order, and an explicit note that workflows mutating state need `--mode edit` or `--mode full` to avoid a headless deadlock at `ExitPlanMode`.
9
+ - README gains a `Workflows` section between Teams and Browser covering the bundle layout, frontmatter, subagents/skills/plugins, and the `--mode` requirement.
10
+
3
11
  ## 1.17.4
4
12
 
5
13
  **Browser**
package/README.md CHANGED
@@ -47,6 +47,7 @@ Also available as `ag` -- all commands work with both `agents` and `ag`.
47
47
  - [Sessions across agents](#sessions-across-agents)
48
48
  - [Run open models through Claude Code](#run-open-models-through-claude-code)
49
49
  - [Teams](#teams)
50
+ - [Workflows](#workflows)
50
51
  - [Browser](#browser)
51
52
  - [Secrets](#secrets)
52
53
  - [Routines](#routines)
@@ -244,6 +245,57 @@ Team state is observable via `agents teams list --json` / `agents teams status -
244
245
 
245
246
  ---
246
247
 
248
+ ## Workflows
249
+
250
+ Bundle an orchestrator prompt with optional subagents, skills, and plugins into a named, reusable pipeline. One bundle, one invocation.
251
+
252
+ ```bash
253
+ # Use a workflow — workflow name goes in the agent slot
254
+ agents run code-review "review PR #42 on acme/api"
255
+
256
+ # List + inspect
257
+ agents workflows list
258
+ agents workflows view code-review
259
+
260
+ # Install from GitHub or local
261
+ agents workflows add gh:yourteam/code-review
262
+ agents workflows add ./my-workflow
263
+ ```
264
+
265
+ A workflow is a directory:
266
+
267
+ ```
268
+ ~/.agents/workflows/code-review/
269
+ WORKFLOW.md # YAML frontmatter + orchestrator system prompt
270
+ subagents/ # optional: *.md files exposed to the orchestrator
271
+ security.md
272
+ style.md
273
+ skills/ # optional: knowledge packs scoped to this workflow
274
+ plugins/ # optional: plugin bundles
275
+ ```
276
+
277
+ `WORKFLOW.md`'s Markdown body is the orchestrator's system prompt. Files under `subagents/` get copied to `~/.claude/agents/` at run time so the built-in Agent tool can dispatch to them by name — including in parallel. `skills/` and `plugins/` sync into the version home just for the run.
278
+
279
+ ```yaml
280
+ # WORKFLOW.md frontmatter
281
+ ---
282
+ name: Code Review
283
+ description: Evidence-grounded PR review with file:line citations.
284
+ model: claude-opus-4-7
285
+ tools:
286
+ - Read
287
+ - Grep
288
+ - Bash
289
+ - WebFetch
290
+ ---
291
+ ```
292
+
293
+ Workflows that need to write — post PR comments, edit files, send Slack — should run with `--mode edit` or `--mode full`. `agents run` defaults to `--mode plan` (read-only), which deadlocks at `ExitPlanMode` in headless runs.
294
+
295
+ Resolution is project > user > system: a `<repo>/.agents/workflows/<name>/` overrides a same-named workflow in `~/.agents/workflows/`. Commit project workflows with your repo so teammates get the same pipeline.
296
+
297
+ ---
298
+
247
299
  ## Browser
248
300
 
249
301
  Give agents access to a real browser — no relay extension, no cloud service, no Playwright getting blocked.
@@ -871,6 +871,53 @@ function registerTaskCommands(browser) {
871
871
  }
872
872
  console.log('Scrolled');
873
873
  });
874
+ browser
875
+ .command('upload <task>')
876
+ .description('Upload file(s) — supports hidden file inputs, drag-drop targets, and OS chooser interception')
877
+ .option('-t, --tab <tabId>', 'Tab ID (defaults to current)')
878
+ .option('-r, --ref <n>', 'Ref of the upload target element (file input or drop zone)', (v) => parseInt(v, 10))
879
+ .option('--trigger <n>', 'Ref of a button that opens the OS file chooser (Pattern C)', (v) => parseInt(v, 10))
880
+ .option('-f, --file <path...>', 'Absolute path(s) to file(s) to upload (repeatable)')
881
+ .option('--drop', 'Force drag-drop pattern even if ref is an <input type=file>')
882
+ .option('--input', 'Force file-input pattern (DOM.setFileInputFiles)')
883
+ .option('--timeout <ms>', 'Timeout for chooser interception (Pattern C)', (v) => parseInt(v, 10))
884
+ .action(async (task, opts) => {
885
+ const files = opts.file ?? [];
886
+ if (files.length === 0) {
887
+ console.error('--file <path> is required (repeat for multiple files)');
888
+ process.exit(1);
889
+ }
890
+ if (opts.ref === undefined && opts.trigger === undefined) {
891
+ console.error('--ref <n> or --trigger <n> is required');
892
+ process.exit(1);
893
+ }
894
+ if (opts.drop && opts.input) {
895
+ console.error('--drop and --input are mutually exclusive');
896
+ process.exit(1);
897
+ }
898
+ let mode = 'auto';
899
+ if (opts.trigger !== undefined)
900
+ mode = 'chooser';
901
+ else if (opts.drop)
902
+ mode = 'drop';
903
+ else if (opts.input)
904
+ mode = 'input';
905
+ const response = await sendIPCRequest({
906
+ action: 'upload',
907
+ task,
908
+ tabId: opts.tab,
909
+ ref: opts.ref,
910
+ trigger: opts.trigger,
911
+ files,
912
+ uploadMode: mode,
913
+ timeout: opts.timeout,
914
+ });
915
+ if (!response.ok) {
916
+ console.error(response.error);
917
+ process.exit(1);
918
+ }
919
+ console.log(`Uploaded ${files.length} file${files.length === 1 ? '' : 's'} (${response.uploadMode})`);
920
+ });
874
921
  // ─── Viewport & Device ───────────────────────────────────────────────────────
875
922
  const setCmd = browser.command('set').description('Set browser emulation options');
876
923
  setCmd
@@ -17,21 +17,42 @@ export function registerWorkflowsCommands(program) {
17
17
  .command('workflows')
18
18
  .description('Manage multi-agent pipeline workflows (WORKFLOW.md bundles)')
19
19
  .addHelpText('after', `
20
- Workflows are directory bundles (WORKFLOW.md + subagents/) that define multi-agent pipelines run via:
21
- agents run <workflow-name>
20
+ Workflows are directory bundles that define reusable named agent pipelines.
21
+ Run a workflow with:
22
+ agents run <workflow-name> [prompt]
23
+
24
+ Structure:
25
+ ~/.agents/workflows/<name>/
26
+ WORKFLOW.md required: YAML frontmatter + orchestrator system prompt
27
+ subagents/*.md optional: subagents the orchestrator can dispatch to
28
+ skills/ optional: knowledge packs scoped to this workflow
29
+ plugins/ optional: plugin bundles scoped to this workflow
30
+
31
+ Resolution: project (.agents/workflows/) > user (~/.agents/workflows/) > system.
32
+
33
+ Note: agents run defaults to --mode plan (read-only). For workflows that
34
+ write files, post comments, or otherwise mutate state, pass --mode edit or
35
+ --mode full or the run will deadlock at ExitPlanMode.
22
36
 
23
37
  Examples:
24
38
  # See what workflows are available
25
39
  agents workflows list
26
40
 
27
- # Install from GitHub
41
+ # Install from GitHub or a local directory
28
42
  agents workflows add gh:user/workflows
43
+ agents workflows add ./code-review
29
44
 
30
- # Install a local workflow directory
31
- agents workflows add ./rdev
45
+ # Inspect a workflow's frontmatter and subagents
46
+ agents workflows view code-review
32
47
 
33
- # Remove a workflow
34
- agents workflows remove rdev
48
+ # Run it (workflow name goes in the agent slot)
49
+ agents run code-review "review PR #42"
50
+
51
+ # Run a workflow that posts comments / edits files
52
+ agents run code-review --mode full "review PR #42 and post the review"
53
+
54
+ # Remove from version homes (and central storage on second run)
55
+ agents workflows remove code-review
35
56
  `);
36
57
  workflowsCmd
37
58
  .command('list [agent]')
@@ -296,6 +296,19 @@ export class BrowserIPCServer {
296
296
  const downloadPath = await this.service.waitForDownload(request.task, request.timeout);
297
297
  return { ok: true, downloadPath };
298
298
  }
299
+ case 'upload': {
300
+ if (!request.task || !request.files || request.files.length === 0) {
301
+ return { ok: false, error: 'Task and at least one file required' };
302
+ }
303
+ const result = await this.service.upload(request.task, request.files, {
304
+ ref: request.ref,
305
+ trigger: request.trigger,
306
+ mode: request.uploadMode,
307
+ tabHint: request.tabId,
308
+ timeout: request.timeout,
309
+ });
310
+ return { ok: true, uploadMode: result.mode };
311
+ }
299
312
  default:
300
313
  return { ok: false, error: `Unknown action: ${request.action}` };
301
314
  }
@@ -1,6 +1,7 @@
1
1
  import { type TabInfo, type ProfileStatus, type HistoricalTask } from './types.js';
2
2
  import { type RefOpts, type RefNode } from './refs.js';
3
3
  import type { TargetFilter } from './types.js';
4
+ export type UploadMode = 'auto' | 'input' | 'drop' | 'chooser';
4
5
  /**
5
6
  * Parse a `targetFilter` string into its kind + value, or return `null`
6
7
  * when the input is missing or malformed. Filter syntax:
@@ -103,6 +104,15 @@ export declare class BrowserService {
103
104
  press(taskId: string, key: string, tabHint?: string): Promise<void>;
104
105
  hover(taskId: string, ref: number, tabHint?: string): Promise<void>;
105
106
  scroll(taskId: string, deltaX: number, deltaY: number, atX?: number, atY?: number, tabHint?: string): Promise<void>;
107
+ upload(taskId: string, files: string[], options: {
108
+ ref?: number;
109
+ trigger?: number;
110
+ mode?: UploadMode;
111
+ tabHint?: string;
112
+ timeout?: number;
113
+ }): Promise<{
114
+ mode: 'input' | 'drop' | 'chooser';
115
+ }>;
106
116
  status(profileName?: string): Promise<ProfileStatus[]>;
107
117
  private reconcileFromDisk;
108
118
  setViewport(taskId: string, width: number, height: number, options?: {
@@ -9,6 +9,7 @@ import { generateTaskId, generateShortId, generateFunName, } from './types.js';
9
9
  import { getRefs, resolveRefToCoords } from './refs.js';
10
10
  import { clickAtCoords, hoverAtCoords, scrollAtCoords, typeText, pressKey, focusNode } from './input.js';
11
11
  import { typeEditorText } from './editor.js';
12
+ import { detectUploadPattern, uploadToDropTarget, uploadToFileInput, uploadViaFileChooser, } from './upload.js';
12
13
  import { emit } from '../events.js';
13
14
  /**
14
15
  * Parse a `targetFilter` string into its kind + value, or return `null`
@@ -557,6 +558,60 @@ export class BrowserService {
557
558
  const sessionId = await this.getSessionId(conn, target.targetId);
558
559
  await scrollAtCoords(conn.cdp, sessionId, atX ?? 0, atY ?? 0, deltaX, deltaY);
559
560
  }
561
+ async upload(taskId, files, options) {
562
+ const { conn, task } = await this.findTask(taskId);
563
+ const shortId = options.tabHint
564
+ ? await this.resolveTabHint(conn, task, options.tabHint)
565
+ : this.resolveCurrentTab(task);
566
+ const cdpTargetId = this.getCdpTargetId(task, shortId);
567
+ const target = await this.getTarget(conn, cdpTargetId);
568
+ if (!target)
569
+ throw new Error(`Tab ${shortId} not found`);
570
+ const sessionId = await this.getSessionId(conn, target.targetId);
571
+ // Match the user-facing ref numbering from `agents browser refs` (which
572
+ // defaults to interactive=true). The other action helpers in this file
573
+ // use interactive=false historically, but that produces ref numbers the
574
+ // user never sees — `--ref 1` then resolves to the RootWebArea instead of
575
+ // the first interactive element. Match the listing the user actually saw.
576
+ const { nodeMap } = await getRefs(conn.cdp, sessionId, { interactive: true, limit: 1000 });
577
+ const mode = options.mode ?? 'auto';
578
+ if (options.trigger !== undefined || mode === 'chooser') {
579
+ const ref = options.trigger ?? options.ref;
580
+ if (ref === undefined) {
581
+ throw new Error('chooser mode requires --trigger <ref> (or --ref) pointing at the button that opens the file dialog');
582
+ }
583
+ const node = nodeMap.get(ref);
584
+ if (!node)
585
+ throw new Error(`Ref ${ref} not found`);
586
+ await uploadViaFileChooser(conn.cdp, sessionId, { node, nodeMap }, files, options.timeout);
587
+ return { mode: 'chooser' };
588
+ }
589
+ if (options.ref === undefined) {
590
+ throw new Error('upload requires --ref <n> (target element) or --trigger <n> (button that opens chooser)');
591
+ }
592
+ const node = nodeMap.get(options.ref);
593
+ if (!node)
594
+ throw new Error(`Ref ${options.ref} not found`);
595
+ if (!node.backendNodeId)
596
+ throw new Error(`Ref ${options.ref} has no DOM node`);
597
+ let resolved;
598
+ if (mode === 'input') {
599
+ resolved = 'input';
600
+ }
601
+ else if (mode === 'drop') {
602
+ resolved = 'drop';
603
+ }
604
+ else {
605
+ resolved = await detectUploadPattern(conn.cdp, sessionId, node.backendNodeId);
606
+ }
607
+ if (resolved === 'input') {
608
+ await uploadToFileInput(conn.cdp, sessionId, node.backendNodeId, files);
609
+ }
610
+ else {
611
+ await uploadToDropTarget(conn.cdp, sessionId, node.backendNodeId, files);
612
+ }
613
+ return { mode: resolved };
614
+ }
560
615
  async status(profileName) {
561
616
  const seen = new Set();
562
617
  const statuses = [];
@@ -83,7 +83,7 @@ export interface HistoricalTask {
83
83
  domains: string[];
84
84
  tabCount: number;
85
85
  }
86
- export type IPCAction = 'start' | 'launch-profile' | 'done' | 'stop' | 'status' | 'history' | 'navigate' | 'tab-add' | 'tab-focus' | 'tab-close' | 'tab-list' | 'evaluate' | 'screenshot' | 'refs' | 'click' | 'type' | 'press' | 'hover' | 'scroll' | 'set-viewport' | 'set-device' | 'console' | 'errors' | 'requests' | 'response-body' | 'wait' | 'set-download-path' | 'wait-download';
86
+ export type IPCAction = 'start' | 'launch-profile' | 'done' | 'stop' | 'status' | 'history' | 'navigate' | 'tab-add' | 'tab-focus' | 'tab-close' | 'tab-list' | 'evaluate' | 'screenshot' | 'refs' | 'click' | 'type' | 'press' | 'hover' | 'scroll' | 'set-viewport' | 'set-device' | 'console' | 'errors' | 'requests' | 'response-body' | 'wait' | 'set-download-path' | 'wait-download' | 'upload';
87
87
  export interface IPCRequest {
88
88
  action: IPCAction;
89
89
  task?: string;
@@ -116,6 +116,9 @@ export interface IPCRequest {
116
116
  waitValue?: string | number;
117
117
  timeout?: number;
118
118
  downloadPath?: string;
119
+ files?: string[];
120
+ trigger?: number;
121
+ uploadMode?: 'auto' | 'input' | 'drop' | 'chooser';
119
122
  }
120
123
  export interface IPCResponse {
121
124
  ok: boolean;
@@ -138,6 +141,7 @@ export interface IPCResponse {
138
141
  body?: string;
139
142
  downloadPath?: string;
140
143
  devices?: string[];
144
+ uploadMode?: 'input' | 'drop' | 'chooser';
141
145
  }
142
146
  export interface ConsoleEntry {
143
147
  level: 'log' | 'info' | 'warn' | 'error';
@@ -0,0 +1,54 @@
1
+ import type { CDPClient } from './cdp.js';
2
+ import { type RefNode } from './refs.js';
3
+ /**
4
+ * File upload strategies for `agents browser upload`.
5
+ *
6
+ * Every uploader on the web is one of three patterns:
7
+ *
8
+ * A. Direct file input — the page exposes (or hides) `<input type=file>`.
9
+ * `DOM.setFileInputFiles` plants the paths directly. The cleanest
10
+ * path; works whenever the input is in the DOM, even when CSS-hidden
11
+ * or visually offscreen.
12
+ *
13
+ * B. Drag-drop target — the page listens for `drop` events on a region
14
+ * (Canva, Notion, Linear, GitHub PRs). We dispatch synthetic
15
+ * `dragenter`/`dragover`/`drop` events with a `DataTransfer` whose
16
+ * `files` list carries a `File` built from disk bytes. The dispatch
17
+ * uses real elementFromPoint coordinates so React/DOM listeners fire.
18
+ *
19
+ * C. Native chooser interception — the user clicks a button that calls
20
+ * `input.click()` and the only file input is dynamically created in
21
+ * response. `Page.setInterceptFileChooserDialog` flips the chooser
22
+ * from a blocking OS dialog into a CDP event; we click the trigger,
23
+ * wait for `Page.fileChooserOpened`, then satisfy it with
24
+ * `Page.handleFileChooser({ action: 'accept', files })`. Lifecycle:
25
+ * enable interception -> click -> wait -> accept -> disable.
26
+ */
27
+ export interface UploadOptions {
28
+ files: string[];
29
+ }
30
+ /** Pattern A — direct file input via `DOM.setFileInputFiles`. */
31
+ export declare function uploadToFileInput(cdp: CDPClient, sessionId: string, backendNodeId: number, files: string[]): Promise<void>;
32
+ /** Pattern B — synthetic drag-drop onto a target node. */
33
+ export declare function uploadToDropTarget(cdp: CDPClient, sessionId: string, backendNodeId: number, files: string[]): Promise<void>;
34
+ /**
35
+ * Pattern C — click a trigger, intercept the OS file chooser, feed files.
36
+ *
37
+ * `Page.setInterceptFileChooserDialog` must be enabled before the click; the
38
+ * chooser only fires once, so we register a single-shot handler ahead of time.
39
+ * Auto-attached child sessions matter here: the chooser event arrives on the
40
+ * session whose page hosts the input, which is the same session we used for
41
+ * the click — so we filter event params by sessionId.
42
+ */
43
+ export declare function uploadViaFileChooser(cdp: CDPClient, sessionId: string, triggerRef: {
44
+ node: RefNode;
45
+ nodeMap: Map<number, RefNode>;
46
+ }, files: string[], timeoutMs?: number): Promise<void>;
47
+ export declare function mimeFromExt(filePath: string): string;
48
+ /**
49
+ * Inspect a ref's DOM node to decide which pattern fits when the caller
50
+ * didn't specify. Returns 'input' if the node is `<input type=file>`,
51
+ * otherwise 'drop'. Chooser interception (Pattern C) is never auto-selected
52
+ * because it requires clicking the ref, which mutates page state — opt-in only.
53
+ */
54
+ export declare function detectUploadPattern(cdp: CDPClient, sessionId: string, backendNodeId: number): Promise<'input' | 'drop'>;
@@ -0,0 +1,264 @@
1
+ import * as fs from 'fs';
2
+ import * as path from 'path';
3
+ import { clickAtCoords } from './input.js';
4
+ import { resolveRefToCoords } from './refs.js';
5
+ const RESOLVE_FILE_INPUT_FN = `(function() {
6
+ const start = this;
7
+ // Walk multiple paths to find an <input type=file>:
8
+ // 1. The node itself (when the AX backend node IS the input).
9
+ // 2. Its ancestors via parentElement (custom button wrappers).
10
+ // 3. closest('input[type=file]') (handles label/span/button inside or
11
+ // near a file input).
12
+ // 4. Across user-agent shadow boundaries via getRootNode().host
13
+ // (Chromium's internal shadow-button pseudo-element for file inputs).
14
+ // 5. If the AX backend node is associated with a <label for=...>, follow
15
+ // the htmlFor relationship.
16
+ // 6. Last resort: if the start node was a click target that fires
17
+ // input.click() inside a click handler, fall back to the unique
18
+ // <input type=file> on the page (when there is exactly one).
19
+ let el = start;
20
+ for (let i = 0; i < 8 && el; i++) {
21
+ if (el.tagName === 'INPUT' && el.type === 'file') return el;
22
+ if (el.closest) {
23
+ const found = el.closest('input[type=file]');
24
+ if (found) return found;
25
+ }
26
+ if (el.tagName === 'LABEL' && el.htmlFor) {
27
+ const t = document.getElementById(el.htmlFor);
28
+ if (t && t.tagName === 'INPUT' && t.type === 'file') return t;
29
+ }
30
+ const root = el.getRootNode && el.getRootNode();
31
+ if (root && root.host && root !== document) {
32
+ el = root.host;
33
+ continue;
34
+ }
35
+ el = el.parentElement;
36
+ }
37
+ // Final fallback: if exactly one file input exists on the page, use it.
38
+ // This handles cases where the AX tree exposes the input as an internal
39
+ // pseudo-button whose parentElement is null. A page with a single uploader
40
+ // (Slack composer, Notion image block, Canva ingredient) hits this branch.
41
+ const all = document.querySelectorAll('input[type=file]');
42
+ if (all.length === 1) return all[0];
43
+ return null;
44
+ })`;
45
+ /** Pattern A — direct file input via `DOM.setFileInputFiles`. */
46
+ export async function uploadToFileInput(cdp, sessionId, backendNodeId, files) {
47
+ validateFiles(files);
48
+ const resolvedId = await resolveActualFileInput(cdp, sessionId, backendNodeId);
49
+ await cdp.send('DOM.setFileInputFiles', { files, backendNodeId: resolvedId }, sessionId);
50
+ }
51
+ async function resolveActualFileInput(cdp, sessionId, backendNodeId) {
52
+ const { object } = await cdp.send('DOM.resolveNode', { backendNodeId }, sessionId);
53
+ if (!object.objectId)
54
+ return backendNodeId;
55
+ const objectId = object.objectId;
56
+ try {
57
+ const { result } = await cdp.send('Runtime.callFunctionOn', { objectId, functionDeclaration: RESOLVE_FILE_INPUT_FN, returnByValue: false }, sessionId);
58
+ if (!result.objectId) {
59
+ throw new Error('Ref is not (and is not contained in) an <input type=file>');
60
+ }
61
+ const inputObjectId = result.objectId;
62
+ try {
63
+ const { node } = await cdp.send('DOM.describeNode', { objectId: inputObjectId }, sessionId);
64
+ return node.backendNodeId;
65
+ }
66
+ finally {
67
+ await cdp.send('Runtime.releaseObject', { objectId: inputObjectId }, sessionId);
68
+ }
69
+ }
70
+ finally {
71
+ await cdp.send('Runtime.releaseObject', { objectId }, sessionId);
72
+ }
73
+ }
74
+ const DRAG_DROP_FN = `(function(files) {
75
+ const el = this;
76
+ const rect = el.getBoundingClientRect();
77
+ const x = rect.left + rect.width / 2;
78
+ const y = rect.top + rect.height / 2;
79
+ const dt = new DataTransfer();
80
+ for (const f of files) {
81
+ const u8 = Uint8Array.from(atob(f.bytes), c => c.charCodeAt(0));
82
+ const blob = new Blob([u8], { type: f.type || 'application/octet-stream' });
83
+ const file = new File([blob], f.name, { type: f.type || 'application/octet-stream' });
84
+ dt.items.add(file);
85
+ }
86
+ function dispatch(type) {
87
+ // Chromium does not honor the dataTransfer field in DragEventInit for
88
+ // synthetic events. Build the DragEvent with no dataTransfer in the init,
89
+ // then override the event dataTransfer getter via defineProperty so
90
+ // page-level listeners see the File list we constructed.
91
+ const ev = new DragEvent(type, {
92
+ bubbles: true,
93
+ cancelable: true,
94
+ composed: true,
95
+ clientX: x,
96
+ clientY: y,
97
+ });
98
+ Object.defineProperty(ev, 'dataTransfer', { value: dt });
99
+ el.dispatchEvent(ev);
100
+ }
101
+ dispatch('dragenter');
102
+ dispatch('dragover');
103
+ dispatch('drop');
104
+ return { dispatched: 3, files: files.length };
105
+ })`;
106
+ /** Pattern B — synthetic drag-drop onto a target node. */
107
+ export async function uploadToDropTarget(cdp, sessionId, backendNodeId, files) {
108
+ validateFiles(files);
109
+ const payload = files.map((p) => ({
110
+ name: path.basename(p),
111
+ type: mimeFromExt(p),
112
+ bytes: fs.readFileSync(p).toString('base64'),
113
+ }));
114
+ const { object } = await cdp.send('DOM.resolveNode', { backendNodeId }, sessionId);
115
+ if (!object.objectId) {
116
+ throw new Error('Drop target node could not be resolved');
117
+ }
118
+ const objectId = object.objectId;
119
+ try {
120
+ const r = await cdp.send('Runtime.callFunctionOn', {
121
+ objectId,
122
+ functionDeclaration: DRAG_DROP_FN,
123
+ arguments: [{ value: payload }],
124
+ returnByValue: true,
125
+ awaitPromise: true,
126
+ }, sessionId);
127
+ if (r.exceptionDetails) {
128
+ throw new Error('Drop dispatch threw: ' + JSON.stringify(r.exceptionDetails));
129
+ }
130
+ }
131
+ finally {
132
+ await cdp.send('Runtime.releaseObject', { objectId }, sessionId);
133
+ }
134
+ }
135
+ /**
136
+ * Pattern C — click a trigger, intercept the OS file chooser, feed files.
137
+ *
138
+ * `Page.setInterceptFileChooserDialog` must be enabled before the click; the
139
+ * chooser only fires once, so we register a single-shot handler ahead of time.
140
+ * Auto-attached child sessions matter here: the chooser event arrives on the
141
+ * session whose page hosts the input, which is the same session we used for
142
+ * the click — so we filter event params by sessionId.
143
+ */
144
+ export async function uploadViaFileChooser(cdp, sessionId, triggerRef, files, timeoutMs = 5000) {
145
+ validateFiles(files);
146
+ await cdp.send('Page.enable', {}, sessionId);
147
+ await cdp.send('Page.setInterceptFileChooserDialog', { enabled: true }, sessionId);
148
+ let opened = null;
149
+ let resolve;
150
+ let reject;
151
+ const wait = new Promise((res, rej) => {
152
+ resolve = res;
153
+ reject = rej;
154
+ });
155
+ const handler = (params) => {
156
+ const ev = params;
157
+ if (typeof ev.backendNodeId === 'number') {
158
+ opened = { backendNodeId: ev.backendNodeId };
159
+ resolve();
160
+ }
161
+ };
162
+ cdp.on('Page.fileChooserOpened', handler);
163
+ const timer = setTimeout(() => {
164
+ reject(new Error(`File chooser did not open within ${timeoutMs}ms — is the trigger ref correct?`));
165
+ }, timeoutMs);
166
+ try {
167
+ const { x, y } = await resolveRefToCoords(cdp, sessionId, triggerRef.nodeMap, triggerRef.node.ref);
168
+ await clickAtCoords(cdp, sessionId, x, y);
169
+ await wait;
170
+ await cdp.send('Page.handleFileChooser', { action: 'accept', files }, sessionId);
171
+ // Some Chromium builds expect setFileInputFiles instead of handleFileChooser.
172
+ // We try handleFileChooser first because it's the documented path for
173
+ // intercepted dialogs; if Chromium rejects it (older protocol), fall back
174
+ // to setFileInputFiles using the backendNodeId from the event.
175
+ }
176
+ catch (err) {
177
+ if (opened && err instanceof Error && /not supported|not found|Method/i.test(err.message)) {
178
+ await cdp.send('DOM.setFileInputFiles', { files, backendNodeId: opened.backendNodeId }, sessionId);
179
+ }
180
+ else {
181
+ throw err;
182
+ }
183
+ }
184
+ finally {
185
+ clearTimeout(timer);
186
+ cdp.off('Page.fileChooserOpened', handler);
187
+ await cdp.send('Page.setInterceptFileChooserDialog', { enabled: false }, sessionId).catch(() => { });
188
+ }
189
+ }
190
+ function validateFiles(files) {
191
+ if (!files || files.length === 0) {
192
+ throw new Error('At least one file path is required');
193
+ }
194
+ for (const f of files) {
195
+ if (!path.isAbsolute(f)) {
196
+ throw new Error(`Upload path must be absolute: ${f}`);
197
+ }
198
+ if (!fs.existsSync(f)) {
199
+ throw new Error(`File not found: ${f}`);
200
+ }
201
+ }
202
+ }
203
+ const MIME_BY_EXT = {
204
+ '.png': 'image/png',
205
+ '.jpg': 'image/jpeg',
206
+ '.jpeg': 'image/jpeg',
207
+ '.gif': 'image/gif',
208
+ '.webp': 'image/webp',
209
+ '.svg': 'image/svg+xml',
210
+ '.pdf': 'application/pdf',
211
+ '.txt': 'text/plain',
212
+ '.csv': 'text/csv',
213
+ '.json': 'application/json',
214
+ '.zip': 'application/zip',
215
+ '.mp4': 'video/mp4',
216
+ '.mov': 'video/quicktime',
217
+ '.mp3': 'audio/mpeg',
218
+ '.wav': 'audio/wav',
219
+ };
220
+ export function mimeFromExt(filePath) {
221
+ const ext = path.extname(filePath).toLowerCase();
222
+ return MIME_BY_EXT[ext] ?? 'application/octet-stream';
223
+ }
224
+ /**
225
+ * Inspect a ref's DOM node to decide which pattern fits when the caller
226
+ * didn't specify. Returns 'input' if the node is `<input type=file>`,
227
+ * otherwise 'drop'. Chooser interception (Pattern C) is never auto-selected
228
+ * because it requires clicking the ref, which mutates page state — opt-in only.
229
+ */
230
+ export async function detectUploadPattern(cdp, sessionId, backendNodeId) {
231
+ const { node } = await cdp.send('DOM.describeNode', { backendNodeId, depth: 0 }, sessionId);
232
+ if (isFileInputNode(node))
233
+ return 'input';
234
+ // The node itself isn't <input type=file>, but it might be a button or
235
+ // shadow-DOM descendant *inside* one — that's what the accessibility tree
236
+ // surfaces for file inputs. Walk up to confirm before falling back to drop.
237
+ const { object } = await cdp.send('DOM.resolveNode', { backendNodeId }, sessionId);
238
+ if (!object.objectId)
239
+ return 'drop';
240
+ const objectId = object.objectId;
241
+ try {
242
+ const { result } = await cdp.send('Runtime.callFunctionOn', { objectId, functionDeclaration: RESOLVE_FILE_INPUT_FN, returnByValue: false }, sessionId);
243
+ if (result.objectId) {
244
+ await cdp.send('Runtime.releaseObject', { objectId: result.objectId }, sessionId);
245
+ return 'input';
246
+ }
247
+ return 'drop';
248
+ }
249
+ finally {
250
+ await cdp.send('Runtime.releaseObject', { objectId }, sessionId);
251
+ }
252
+ }
253
+ function isFileInputNode(node) {
254
+ const tag = (node.nodeName ?? '').toLowerCase();
255
+ if (tag !== 'input')
256
+ return false;
257
+ const attrs = node.attributes ?? [];
258
+ for (let i = 0; i < attrs.length; i += 2) {
259
+ if (attrs[i] === 'type' && attrs[i + 1]?.toLowerCase() === 'file') {
260
+ return true;
261
+ }
262
+ }
263
+ return false;
264
+ }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@phnx-labs/agents-cli",
3
- "version": "1.17.4",
3
+ "version": "1.17.6",
4
4
  "description": "One CLI for all your AI coding agents - versions, config, cloud dispatch, sessions, and teams",
5
5
  "type": "module",
6
6
  "main": "dist/index.js",