gologin-agent-browser-cli 0.1.2 → 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -148,6 +148,10 @@ gologin-agent-browser click @e3
148
148
 
149
149
  Refs are best-effort and should be regenerated after navigation or major DOM changes.
150
150
 
151
+ Most mutating commands will leave the page in `snapshot=stale` state. When that happens, run `snapshot` again before using old refs.
152
+
153
+ On dynamic pages, `find ...` is usually a better fallback than stale refs because it re-resolves against the live page instead of the last snapshot.
154
+
151
155
  ## More Examples
152
156
 
153
157
  ```bash
@@ -164,6 +168,7 @@ gologin-agent-browser scrollintoview "#submit"
164
168
  gologin-agent-browser find label "Email" fill "test@example.com"
165
169
  gologin-agent-browser upload "input[type='file']" /absolute/path/to/avatar.png
166
170
  gologin-agent-browser wait --text "Welcome"
171
+ gologin-agent-browser screenshot page.png --annotate --press-escape
167
172
  ```
168
173
 
169
174
  ## Commands
@@ -188,7 +193,7 @@ gologin-agent-browser wait --text "Welcome"
188
193
  - `find <role|text|label|placeholder|first|last|nth> ...`
189
194
  - `upload <target> <file...> [--session <sessionId>]`
190
195
  - `pdf <path> [--session <sessionId>]`
191
- - `screenshot <path> [--annotate] [--session <sessionId>]`
196
+ - `screenshot <path> [--annotate] [--press-escape] [--session <sessionId>]`
192
197
  - `close [--session <sessionId>]`
193
198
  - `sessions`
194
199
  - `current`
@@ -218,6 +223,8 @@ gologin-agent-browser screenshot page.png --annotate
218
223
 
219
224
  Targets can be either snapshot refs like `@e4` or raw Playwright/CSS selectors. `find` adds semantic locator flows similar to agent-browser.
220
225
 
226
+ If a ref stops resolving after navigation or a DOM update, prefer a fresh `snapshot` or use a semantic `find ...` command instead.
227
+
221
228
  `open`, `current`, and `sessions` also expose session metadata in a shell-friendly form:
222
229
 
223
230
  ```text
@@ -240,6 +247,7 @@ Supported aliases:
240
247
  - Snapshot and ref resolution are best-effort. Dynamic pages can invalidate refs after heavy DOM changes or navigation.
241
248
  - Snapshot output is compact and accessibility-informed, but it is not a full accessibility tree dump.
242
249
  - Annotated screenshots are based on the current snapshot/ref model, so labels are also best-effort on highly dynamic pages.
250
+ - `screenshot` has a hard timeout and supports `--press-escape` for pages with modals, chat widgets, or overlay-driven render issues.
243
251
  - The daemon keeps only the latest snapshot ref map for each session.
244
252
  - Real browser sessions require a valid Gologin Cloud Browser account and token. A profile id is optional.
245
253
  - Token-only mode works by provisioning a temporary cloud profile through the Gologin API before connecting to Cloud Browser.
package/dist/cli.js CHANGED
@@ -61,7 +61,7 @@ function printUsage() {
61
61
  " find <role|text|label|placeholder|first|last|nth> ... [--exact]",
62
62
  " upload <target> <file...> [--session <sessionId>]",
63
63
  " pdf <path> [--session <sessionId>]",
64
- " screenshot <path> [--annotate] [--session <sessionId>]",
64
+ " screenshot <path> [--annotate] [--press-escape] [--session <sessionId>]",
65
65
  " close [--session <sessionId>] (aliases: quit, exit)",
66
66
  " sessions",
67
67
  " current",
@@ -9,12 +9,14 @@ async function runScreenshotCommand(context, argv) {
9
9
  const inputPath = parsed.positional[0];
10
10
  const sessionId = (0, utils_1.getFlagString)(parsed, "session");
11
11
  const annotate = (0, utils_1.getFlagBoolean)(parsed, "annotate");
12
+ const pressEscape = (0, utils_1.getFlagBoolean)(parsed, "press-escape");
12
13
  if (!inputPath) {
13
- throw new errors_1.AppError("BAD_REQUEST", "Usage: gologin-agent-browser screenshot <path> [--session <sessionId>]", 400);
14
+ throw new errors_1.AppError("BAD_REQUEST", "Usage: gologin-agent-browser screenshot <path> [--annotate] [--press-escape] [--session <sessionId>]", 400);
14
15
  }
15
16
  const resolvedSessionId = await (0, shared_1.resolveSessionId)(context, sessionId);
16
17
  const targetPath = (0, utils_1.ensureAbsolutePath)(context.cwd, inputPath);
17
- const response = await context.client.request("POST", `/sessions/${resolvedSessionId}/screenshot`, { path: targetPath, annotate });
18
+ const response = await context.client.request("POST", `/sessions/${resolvedSessionId}/screenshot`, { path: targetPath, annotate, pressEscape });
18
19
  const annotated = response.annotated ? " annotated=yes" : "";
19
- context.stdout.write(`screenshot=${response.path} session=${response.sessionId}${annotated}\n`);
20
+ const escaped = response.pressedEscape ? " escape=yes" : "";
21
+ context.stdout.write(`screenshot=${response.path} session=${response.sessionId}${annotated}${escaped}\n`);
20
22
  }
@@ -28,6 +28,7 @@ export declare function scrollElement(locator: Locator, direction: ScrollDirecti
28
28
  export declare function scrollLocatorIntoView(locator: Locator, timeoutMs: number): Promise<void>;
29
29
  export declare function uploadFiles(locator: Locator, files: string[], timeoutMs: number): Promise<void>;
30
30
  export declare function savePdf(page: Page, targetPath: string): Promise<void>;
31
+ export declare function captureScreenshot(page: Page, targetPath: string, timeoutMs: number): Promise<void>;
31
32
  export declare function annotatePageWithRefs(page: Page, refs: Array<{
32
33
  ref: string;
33
34
  x: number;
@@ -25,6 +25,7 @@ exports.scrollElement = scrollElement;
25
25
  exports.scrollLocatorIntoView = scrollLocatorIntoView;
26
26
  exports.uploadFiles = uploadFiles;
27
27
  exports.savePdf = savePdf;
28
+ exports.captureScreenshot = captureScreenshot;
28
29
  exports.annotatePageWithRefs = annotatePageWithRefs;
29
30
  exports.clearPageAnnotations = clearPageAnnotations;
30
31
  exports.closeSessionHandles = closeSessionHandles;
@@ -363,7 +364,7 @@ async function resolveDescriptorLocator(page, descriptor) {
363
364
  return locator;
364
365
  }
365
366
  }
366
- throw new errors_1.AppError("REF_NOT_FOUND", `ref ${descriptor.ref} is not available on the current page`, 404, {
367
+ throw new errors_1.AppError("REF_NOT_FOUND", `ref ${descriptor.ref} is stale or unavailable on the current page; run snapshot again`, 404, {
367
368
  ref: descriptor.ref
368
369
  });
369
370
  }
@@ -514,6 +515,13 @@ async function savePdf(page, targetPath) {
514
515
  printBackground: true
515
516
  });
516
517
  }
518
+ async function captureScreenshot(page, targetPath, timeoutMs) {
519
+ await page.screenshot({
520
+ path: targetPath,
521
+ fullPage: true,
522
+ timeout: timeoutMs
523
+ });
524
+ }
517
525
  async function annotatePageWithRefs(page, refs) {
518
526
  await page.evaluate((labels) => {
519
527
  document.getElementById("__gologin-agent-browser-annotations")?.remove();
@@ -151,7 +151,7 @@ async function handleRequest(request, response) {
151
151
  const screenshotSessionId = matchSessionRoute(pathname, "screenshot");
152
152
  if (method === "POST" && screenshotSessionId) {
153
153
  const body = (await (0, utils_1.readJsonBody)(request));
154
- (0, utils_1.writeJsonResponse)(response, 200, await sessionManager.screenshot(screenshotSessionId, body.path, body.annotate === true));
154
+ (0, utils_1.writeJsonResponse)(response, 200, await sessionManager.screenshot(screenshotSessionId, body.path, body.annotate === true, body.pressEscape === true));
155
155
  return;
156
156
  }
157
157
  const uploadSessionId = matchSessionRoute(pathname, "upload");
@@ -41,7 +41,7 @@ export declare class SessionManager {
41
41
  }): Promise<WaitResponse>;
42
42
  get(sessionId: string | undefined, kind: GetKind, target?: string): Promise<GetResponse>;
43
43
  find(sessionId: string | undefined, request: FindRequest): Promise<FindResponse>;
44
- screenshot(sessionId: string | undefined, targetPath: string, annotate?: boolean): Promise<ScreenshotResponse>;
44
+ screenshot(sessionId: string | undefined, targetPath: string, annotate?: boolean, pressEscape?: boolean): Promise<ScreenshotResponse>;
45
45
  pdf(sessionId: string | undefined, targetPath: string): Promise<PdfResponse>;
46
46
  close(sessionId?: string): Promise<CloseSessionResponse>;
47
47
  listSessions(): Promise<SessionsResponse>;
@@ -110,7 +110,7 @@ class SessionManager {
110
110
  if ((0, utils_1.isRefTarget)(target)) {
111
111
  const descriptor = this.refStore.get(session.sessionId, target);
112
112
  if (!descriptor) {
113
- throw new errors_1.AppError("REF_NOT_FOUND", `ref ${target} is not available in session ${session.sessionId}`, 404, {
113
+ throw new errors_1.AppError("REF_NOT_FOUND", `ref ${target} is stale or unavailable in session ${session.sessionId}; run snapshot again`, 404, {
114
114
  ref: target,
115
115
  sessionId: session.sessionId
116
116
  });
@@ -412,10 +412,16 @@ class SessionManager {
412
412
  value
413
413
  };
414
414
  }
415
- async screenshot(sessionId, targetPath, annotate = false) {
415
+ async screenshot(sessionId, targetPath, annotate = false, pressEscape = false) {
416
416
  const session = await this.getSessionOrThrow(sessionId);
417
+ let pressedEscape = false;
417
418
  try {
418
419
  node_fs_1.default.mkdirSync(node_path_1.default.dirname(targetPath), { recursive: true });
420
+ if (pressEscape) {
421
+ await (0, browser_1.pressKey)(session.page, "Escape", this.config.actionTimeoutMs);
422
+ await new Promise((resolve) => setTimeout(resolve, 200));
423
+ pressedEscape = true;
424
+ }
419
425
  if (annotate) {
420
426
  const snapshot = await (0, snapshot_1.buildSnapshot)(session.page, { interactive: true });
421
427
  this.refStore.set(session.sessionId, snapshot.refs);
@@ -439,10 +445,7 @@ class SessionManager {
439
445
  }
440
446
  await (0, browser_1.annotatePageWithRefs)(session.page, labels);
441
447
  }
442
- await session.page.screenshot({
443
- path: targetPath,
444
- fullPage: true
445
- });
448
+ await (0, browser_1.captureScreenshot)(session.page, targetPath, this.config.navigationTimeoutMs);
446
449
  if (annotate) {
447
450
  await (0, browser_1.clearPageAnnotations)(session.page).catch(() => undefined);
448
451
  }
@@ -451,17 +454,26 @@ class SessionManager {
451
454
  if (annotate) {
452
455
  await (0, browser_1.clearPageAnnotations)(session.page).catch(() => undefined);
453
456
  }
454
- throw new errors_1.AppError("SCREENSHOT_FAILED", error instanceof Error ? error.message : String(error), 500, {
455
- path: targetPath
457
+ const baseMessage = error instanceof Error ? error.message : String(error);
458
+ const hint = pressEscape
459
+ ? "Screenshot timed out or failed even after pressing Escape"
460
+ : "Screenshot timed out or failed; try pressing Escape first or re-run with --press-escape";
461
+ throw new errors_1.AppError("SCREENSHOT_FAILED", `${hint}: ${baseMessage}`, 500, {
462
+ path: targetPath,
463
+ pressEscape
456
464
  });
457
465
  }
466
+ if (pressedEscape && !annotate && session.hasSnapshot) {
467
+ session.staleSnapshot = true;
468
+ }
458
469
  session.lastScreenshotPath = targetPath;
459
470
  this.touchSession(session);
460
471
  return {
461
472
  sessionId: session.sessionId,
462
473
  path: targetPath,
463
474
  url: session.currentUrl,
464
- annotated: annotate
475
+ annotated: annotate,
476
+ pressedEscape
465
477
  };
466
478
  }
467
479
  async pdf(sessionId, targetPath) {
@@ -169,12 +169,14 @@ export interface PressResponse extends ActionResponse {
169
169
  export interface ScreenshotRequest {
170
170
  path: string;
171
171
  annotate?: boolean;
172
+ pressEscape?: boolean;
172
173
  }
173
174
  export interface ScreenshotResponse {
174
175
  sessionId: string;
175
176
  path: string;
176
177
  url: string;
177
178
  annotated?: boolean;
179
+ pressedEscape?: boolean;
178
180
  }
179
181
  export interface CloseSessionResponse {
180
182
  sessionId: string;
package/dist/lib/utils.js CHANGED
@@ -26,7 +26,7 @@ const errors_1 = require("./errors");
26
26
  function parseArgs(argv) {
27
27
  const positional = [];
28
28
  const flags = {};
29
- const booleanFlags = new Set(["interactive", "exact", "annotate"]);
29
+ const booleanFlags = new Set(["interactive", "exact", "annotate", "press-escape"]);
30
30
  for (let index = 0; index < argv.length; index += 1) {
31
31
  const token = argv[index];
32
32
  if (token === "-i") {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "gologin-agent-browser-cli",
3
- "version": "0.1.2",
3
+ "version": "0.1.3",
4
4
  "description": "Agent-native cloud browser automation CLI for Gologin",
5
5
  "main": "dist/cli.js",
6
6
  "types": "dist/lib/types.d.ts",