npm - gologin-web-access - Versions diffs - 0.3.0 - Mend

gologin-web-access 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (141) hide show

package/CHANGELOG.md +19 -0
package/LICENSE +21 -0
package/README.md +344 -0
package/dist/cli.js +173 -0
package/dist/commands/back.js +13 -0
package/dist/commands/batch.js +81 -0
package/dist/commands/batchChangeTrack.js +99 -0
package/dist/commands/batchExtract.js +97 -0
package/dist/commands/batchScrape.js +140 -0
package/dist/commands/changeTrack.js +65 -0
package/dist/commands/check.js +14 -0
package/dist/commands/click.js +14 -0
package/dist/commands/close.js +19 -0
package/dist/commands/configInit.js +77 -0
package/dist/commands/configShow.js +23 -0
package/dist/commands/cookies.js +22 -0
package/dist/commands/cookiesClear.js +13 -0
package/dist/commands/cookiesImport.js +14 -0
package/dist/commands/crawl.js +71 -0
package/dist/commands/crawlErrors.js +20 -0
package/dist/commands/crawlResult.js +27 -0
package/dist/commands/crawlStart.js +56 -0
package/dist/commands/crawlStatus.js +25 -0
package/dist/commands/current.js +14 -0
package/dist/commands/dblclick.js +14 -0
package/dist/commands/eval.js +20 -0
package/dist/commands/extract.js +44 -0
package/dist/commands/fill.js +15 -0
package/dist/commands/find.js +16 -0
package/dist/commands/focus.js +14 -0
package/dist/commands/forward.js +13 -0
package/dist/commands/get.js +15 -0
package/dist/commands/hover.js +14 -0
package/dist/commands/jobs.js +47 -0
package/dist/commands/map.js +61 -0
package/dist/commands/open.js +22 -0
package/dist/commands/parseDocument.js +34 -0
package/dist/commands/pdf.js +14 -0
package/dist/commands/press.js +15 -0
package/dist/commands/read.js +51 -0
package/dist/commands/reload.js +13 -0
package/dist/commands/run.js +76 -0
package/dist/commands/scrape.js +19 -0
package/dist/commands/scrapeJson.js +24 -0
package/dist/commands/scrapeMarkdown.js +37 -0
package/dist/commands/scrapeScreenshot.js +65 -0
package/dist/commands/scrapeText.js +37 -0
package/dist/commands/screenshot.js +23 -0
package/dist/commands/scroll.js +23 -0
package/dist/commands/scrollIntoView.js +14 -0
package/dist/commands/search.js +39 -0
package/dist/commands/searchBrowser.js +28 -0
package/dist/commands/select.js +15 -0
package/dist/commands/sessions.js +14 -0
package/dist/commands/shared.js +102 -0
package/dist/commands/snapshot.js +18 -0
package/dist/commands/storageClear.js +18 -0
package/dist/commands/storageExport.js +26 -0
package/dist/commands/storageImport.js +23 -0
package/dist/commands/tabClose.js +18 -0
package/dist/commands/tabFocus.js +15 -0
package/dist/commands/tabOpen.js +19 -0
package/dist/commands/tabs.js +13 -0
package/dist/commands/type.js +15 -0
package/dist/commands/uncheck.js +14 -0
package/dist/commands/upload.js +15 -0
package/dist/commands/wait.js +27 -0
package/dist/config.js +260 -0
package/dist/doctor.js +86 -0
package/dist/internal-agent/cli.js +336 -0
package/dist/internal-agent/commands/back.js +12 -0
package/dist/internal-agent/commands/check.js +17 -0
package/dist/internal-agent/commands/click.js +17 -0
package/dist/internal-agent/commands/close.js +12 -0
package/dist/internal-agent/commands/cookies.js +23 -0
package/dist/internal-agent/commands/cookiesClear.js +12 -0
package/dist/internal-agent/commands/cookiesImport.js +18 -0
package/dist/internal-agent/commands/current.js +9 -0
package/dist/internal-agent/commands/dblclick.js +17 -0
package/dist/internal-agent/commands/doctor.js +53 -0
package/dist/internal-agent/commands/eval.js +30 -0
package/dist/internal-agent/commands/fill.js +18 -0
package/dist/internal-agent/commands/find.js +86 -0
package/dist/internal-agent/commands/focus.js +17 -0
package/dist/internal-agent/commands/forward.js +12 -0
package/dist/internal-agent/commands/get.js +19 -0
package/dist/internal-agent/commands/hover.js +17 -0
package/dist/internal-agent/commands/open.js +67 -0
package/dist/internal-agent/commands/pdf.js +18 -0
package/dist/internal-agent/commands/press.js +19 -0
package/dist/internal-agent/commands/reload.js +12 -0
package/dist/internal-agent/commands/screenshot.js +22 -0
package/dist/internal-agent/commands/scroll.js +25 -0
package/dist/internal-agent/commands/scrollIntoView.js +17 -0
package/dist/internal-agent/commands/select.js +18 -0
package/dist/internal-agent/commands/sessions.js +15 -0
package/dist/internal-agent/commands/shared.js +51 -0
package/dist/internal-agent/commands/snapshot.js +16 -0
package/dist/internal-agent/commands/storageClear.js +13 -0
package/dist/internal-agent/commands/storageExport.js +24 -0
package/dist/internal-agent/commands/storageImport.js +20 -0
package/dist/internal-agent/commands/tabClose.js +21 -0
package/dist/internal-agent/commands/tabFocus.js +21 -0
package/dist/internal-agent/commands/tabOpen.js +13 -0
package/dist/internal-agent/commands/tabs.js +17 -0
package/dist/internal-agent/commands/type.js +18 -0
package/dist/internal-agent/commands/uncheck.js +17 -0
package/dist/internal-agent/commands/upload.js +18 -0
package/dist/internal-agent/commands/wait.js +41 -0
package/dist/internal-agent/daemon/browser.js +818 -0
package/dist/internal-agent/daemon/refStore.js +26 -0
package/dist/internal-agent/daemon/server.js +330 -0
package/dist/internal-agent/daemon/sessionManager.js +684 -0
package/dist/internal-agent/daemon/snapshot.js +285 -0
package/dist/internal-agent/lib/config.js +59 -0
package/dist/internal-agent/lib/daemon.js +300 -0
package/dist/internal-agent/lib/errors.js +63 -0
package/dist/internal-agent/lib/types.js +2 -0
package/dist/internal-agent/lib/utils.js +165 -0
package/dist/jobRunner.js +56 -0
package/dist/lib/agentCli.js +158 -0
package/dist/lib/browserRead.js +125 -0
package/dist/lib/browserStructured.js +77 -0
package/dist/lib/changeTracking.js +117 -0
package/dist/lib/cloudApi.js +41 -0
package/dist/lib/concurrency.js +15 -0
package/dist/lib/crawl.js +313 -0
package/dist/lib/document.js +170 -0
package/dist/lib/errors.js +55 -0
package/dist/lib/extract.js +65 -0
package/dist/lib/extractRunner.js +22 -0
package/dist/lib/jobRegistry.js +164 -0
package/dist/lib/output.js +122 -0
package/dist/lib/readSource.js +297 -0
package/dist/lib/runbooks.js +193 -0
package/dist/lib/search.js +727 -0
package/dist/lib/selfCli.js +136 -0
package/dist/lib/structuredScrape.js +83 -0
package/dist/lib/types.js +2 -0
package/dist/lib/unlocker.js +383 -0
package/package.json +67 -0

package/CHANGELOG.md ADDED Viewed

@@ -0,0 +1,19 @@
+# Changelog
+## Unreleased
+- browser automation is now embedded directly in `gologin-web-access`, so one repo and one install contains both Web Unlocker and Cloud Browser flows
+- doctor now reports the embedded browser runtime source and version
+## 0.1.0 - 2026-03-10
+Initial public release of Gologin Web Access.
+Highlights:
+- Unified CLI entry point for Gologin Web Unlocker and Gologin Cloud Browser workflows
+- Scraping commands: `scrape`, `scrape-markdown`, `scrape-text`, `scrape-json`, `batch-scrape`
+- Browser commands: `open`, `snapshot`, `click`, `type`, `screenshot`, `close`, `sessions`, `current`
+- Clear two-key configuration model with `GOLOGIN_WEB_UNLOCKER_API_KEY` and `GOLOGIN_CLOUD_TOKEN`
+- `doctor`, `config show`, and `config init` to reduce setup friction
+- Compatibility support for legacy env names used by existing Gologin tools

package/LICENSE ADDED Viewed

@@ -0,0 +1,21 @@
+MIT License
+Copyright (c) 2026 Gologin
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

package/README.md ADDED Viewed

@@ -0,0 +1,344 @@
+# Gologin Web Access
+Gologin Web Access lets developers and AI agents read and interact with the web using Gologin Web Unlocker and Gologin Cloud Browser.
+This is a unified web access layer, not just a scraping tool and not just a browser automation tool.
+- Read the web through stateless extraction APIs
+- Interact with the web through stateful cloud browser sessions
+- Carry Gologin’s browser-side strengths into those workflows: profiles, identity-aware browser sessions, cloud browser infrastructure, and Gologin’s profile/proxy stack when you run against a configured profile
+Package name and binary are the same:
+- npm package: `gologin-web-access`
+- command: `gologin-web-access`
+## What It Unifies
+Gologin Web Access combines two existing product surfaces behind one CLI:
+- Web Unlocker
+  Stateless read and extraction. Best when you want page content quickly without maintaining a browser session.
+- Cloud Browser
+  Stateful interaction. Best when you need navigation, clicks, typing, screenshots, or multi-step flows that persist across commands.
+The point of the unified CLI is that both modes live in one product with one command surface and one config model, while still being honest about which credential powers which workflow. Recommended setup is still to configure both credentials up front so agents do not stop to ask for missing keys mid-task.
+## Command Groups
+### Scraping / Read
+These commands use Gologin Web Unlocker:
+- `gologin-web-access scrape <url>`
+- `gologin-web-access read <url> [--format text|markdown|html] [--source auto|unlocker|browser]`
+- `gologin-web-access scrape-markdown <url> [--source auto|unlocker|browser]`
+- `gologin-web-access scrape-text <url> [--source auto|unlocker|browser]`
+- `gologin-web-access scrape-json <url> [--fallback none|browser]`
+- `gologin-web-access batch-scrape <url...> [--format html|markdown|text|json] [--fallback none|browser] [--source auto|unlocker|browser] [--only-main-content] [--retry <n>] [--backoff-ms <ms>] [--summary]`
+- `gologin-web-access batch-extract <url...> --schema <schema.json> [--source auto|unlocker|browser] [--retry <n>] [--backoff-ms <ms>] [--summary] [--output <path>]`
+- `gologin-web-access search <query> [--limit <n>] [--country <cc>] [--language <lang>] [--source auto|unlocker|browser]`
+- `gologin-web-access map <url> [--limit <n>] [--max-depth <n>] [--concurrency <n>] [--strict]`
+- `gologin-web-access crawl <url> [--format html|markdown|text|json] [--limit <n>] [--max-depth <n>] [--only-main-content] [--strict]`
+- `gologin-web-access crawl-start <url> ...`
+- `gologin-web-access crawl-status <jobId>`
+- `gologin-web-access crawl-result <jobId>`
+- `gologin-web-access crawl-errors <jobId>`
+- `gologin-web-access extract <url> --schema <schema.json> [--source auto|unlocker|browser]`
+- `gologin-web-access change-track <url> [--format html|markdown|text|json]`
+- `gologin-web-access batch-change-track <url...> [--format html|markdown|text|json] [--retry <n>] [--backoff-ms <ms>] [--summary] [--output <path>]`
+- `gologin-web-access parse-document <url-or-path>`
+- `gologin-web-access run <runbook.json>`
+- `gologin-web-access batch <runbook.json> --targets <targets.json>`
+- `gologin-web-access jobs`
+- `gologin-web-access job <jobId>`
+Use these when you want stateless page retrieval or extracted content.
+### Browser / Interact
+These commands use Gologin Cloud Browser through the local daemon-backed agent layer:
+- `gologin-web-access open <url> [--profile <id>]`
+- `gologin-web-access search-browser <query> [--profile <id>]`
+- `gologin-web-access scrape-screenshot <url> [path] [--profile <id>]`
+- `gologin-web-access tabs`
+- `gologin-web-access tabopen [url]`
+- `gologin-web-access tabfocus <index>`
+- `gologin-web-access tabclose [index]`
+- `gologin-web-access snapshot`
+- `gologin-web-access click <ref>`
+- `gologin-web-access dblclick <ref>`
+- `gologin-web-access focus <ref>`
+- `gologin-web-access type <ref> <text>`
+- `gologin-web-access fill <ref> <text>`
+- `gologin-web-access hover <ref>`
+- `gologin-web-access select <ref> <value>`
+- `gologin-web-access check <ref>`
+- `gologin-web-access uncheck <ref>`
+- `gologin-web-access press <key> [target]`
+- `gologin-web-access scroll <direction> [pixels]`
+- `gologin-web-access scrollintoview <ref>`
+- `gologin-web-access wait <target|ms>`
+- `gologin-web-access get <kind> [target]`
+- `gologin-web-access back`
+- `gologin-web-access forward`
+- `gologin-web-access reload`
+- `gologin-web-access find ...`
+- `gologin-web-access cookies [--output <path>] [--json]`
+- `gologin-web-access cookies-import <cookies.json>`
+- `gologin-web-access cookies-clear`
+- `gologin-web-access storage-export [path] [--scope <local|session|both>]`
+- `gologin-web-access storage-import <storage.json> [--scope <local|session|both>] [--clear]`
+- `gologin-web-access storage-clear [--scope <local|session|both>]`
+- `gologin-web-access eval <expression>`
+- `gologin-web-access upload <ref> <file...>`
+- `gologin-web-access pdf <path>`
+- `gologin-web-access screenshot <path>`
+- `gologin-web-access close`
+- `gologin-web-access sessions`
+- `gologin-web-access current`
+Use these when you need state, interaction, or multi-step browser flows.
+## When To Use `scrape` vs `browser`
+- Use `scrape` commands when you need page content, extracted text, markdown, or simple structured output.
+- Use `search` when you need web discovery or SERP results before deciding what to scrape. It now tries multiple search paths automatically, validates that the response is a real SERP, and reuses a short local cache for repeated queries.
+- Use `map` when you need internal link discovery or a site inventory.
+- Use `crawl` when you need multi-page read-only extraction across a site.
+- Use `crawl-start` plus `crawl-status` and `crawl-result` when the crawl should run detached.
+- Use `extract` when you want deterministic structured output from CSS selectors rather than generic page summaries.
+- Use `batch-extract` when the same selector schema should run across many known URLs.
+- Use `change-track` when you want local change detection against the last stored snapshot of a page.
+- Use `batch-change-track` when you want to monitor a watchlist of pages in one pass.
+- Use `parse-document` when the source is a PDF, DOCX, XLSX, HTML, or local document path instead of a normal HTML page.
+- Use browser commands when you need clicks, forms, navigation, screenshots, sessions, or logged-in/profile-backed flows.
+- Use browser commands when you need ref-based interaction, uploads, PDFs, semantic find flows, keyboard control, or a browser-visible search journey.
+- Use `run` and `batch` when you want reusable workflows or multi-target execution on top of the CLI surface.
+- Use `scrape` when stateless speed matters more than interaction.
+- Use browser commands when the site requires state, continuity, or real browser behavior.
+## Why This Is Not Just A Read-Only Crawler
+The read layer matters, but this product is broader than a Firecrawl-like “read the page” use case.
+What makes Gologin Web Access different is the ability to move from stateless extraction into stateful browser interaction without leaving the CLI:
+- Browser sessions can run through Gologin Cloud Browser instead of a local one-off browser process.
+- Browser workflows can use a Gologin profile via `--profile` or `GOLOGIN_DEFAULT_PROFILE_ID`.
+- That gives the CLI access to Gologin’s identity/profile model and session layer, instead of stopping at raw fetches.
+- When a configured profile carries proxy settings, those browser-side capabilities come from the Gologin browser stack rather than from a separate scraping-only pipeline.
+This README only documents what the current CLI actually implements. It does not claim extra browser capabilities beyond the commands listed above.
+## Command Structure Choice
+The current CLI keeps commands flat:
+- `gologin-web-access scrape ...`
+- `gologin-web-access scrape-markdown ...`
+- `gologin-web-access open ...`
+- `gologin-web-access snapshot`
+This is clearer right now than introducing a `browser` namespace such as `gologin-web-access browser open`.
+Why:
+- The command surface is still compact.
+- Flat commands are shorter for both humans and AI agents.
+- The read vs interact split is already explicit through the command names and documentation.
+If the browser surface grows substantially later, a nested namespace may become worth adding. For the current product, flat commands are simpler.
+## Credentials And Config
+This CLI uses two different Gologin credentials on purpose, because the underlying products are different.
+- `GOLOGIN_WEB_UNLOCKER_API_KEY`
+  Required for Scraping / Read commands.
+- `GOLOGIN_CLOUD_TOKEN`
+  Required for `gologin-web-access open` and for profile validation in `gologin-web-access doctor`.
+- `GOLOGIN_DEFAULT_PROFILE_ID`
+  Optional default profile for browser flows.
+- `GOLOGIN_DAEMON_PORT`
+  Optional local daemon port for browser workflows.
+Recommended full setup for agents is to configure both `GOLOGIN_WEB_UNLOCKER_API_KEY` and `GOLOGIN_CLOUD_TOKEN` before starting work, even if the current task looks read-only or browser-only.
+Missing-key errors are command-group specific. Example:
+`Missing GOLOGIN_WEB_UNLOCKER_API_KEY. This is required for scraping commands like \`gologin-web-access scrape\`.`
+Environment variables are the primary configuration mechanism:
+```bash
+export GOLOGIN_WEB_UNLOCKER_API_KEY="wu_..."
+export GOLOGIN_CLOUD_TOKEN="gl_..."
+export GOLOGIN_DEFAULT_PROFILE_ID="profile_123"
+export GOLOGIN_DAEMON_PORT="4590"
+```
+If you do not want to `source ~/.zprofile` in every shell, run:
+```bash
+gologin-web-access config init
+```
+Useful variants:
+```bash
+gologin-web-access config init --web-unlocker-api-key wu_... --cloud-token gl_...
+gologin-web-access config init --web-unlocker-key wu_... --cloud-token gl_...
+```
+That writes `~/.gologin-web-access/config.json` once and the CLI will keep reading it on later runs.
+By default `config init` also validates both keys immediately so you find bad credentials during setup instead of on the first real request. Use `--no-validate` only when you intentionally want an offline write.
+You can also write a minimal config file at `~/.gologin-web-access/config.json`:
+```json
+{
+  "webUnlockerApiKey": "wu_...",
+  "cloudToken": "gl_...",
+  "defaultProfileId": "profile_123",
+  "daemonPort": 4590
+}
+```
+Gologin Web Access will also read the older path `~/.gologin-web/config.json` if it already exists, but new config writes go to `~/.gologin-web-access/config.json`.
+Backward-compatible aliases are also accepted for existing setups:
+- `GOLOGIN_WEBUNLOCKER_API_KEY`
+- `GOLOGIN_TOKEN`
+- `GOLOGIN_PROFILE_ID`
+Useful config commands:
+```bash
+gologin-web-access version
+gologin-web-access config init
+gologin-web-access config show
+gologin-web-access doctor
+```
+`doctor` reports the embedded Cloud Browser runtime bundled inside this package, whether the local daemon is reachable, and whether the recommended two-key setup is complete.
+## Install
+```bash
+npm install -g gologin-web-access
+```
+## Quickstart
+### Read A Page
+```bash
+export GOLOGIN_WEB_UNLOCKER_API_KEY="wu_..."
+gologin-web-access scrape https://example.com
+gologin-web-access read https://docs.browserbase.com/features/stealth-mode
+gologin-web-access scrape-markdown https://example.com/docs
+gologin-web-access scrape-text https://docs.browserbase.com/features/stealth-mode
+gologin-web-access scrape-json https://example.com --fallback browser
+gologin-web-access batch-scrape https://docs.browserbase.com/features/contexts https://docs.browserbase.com/features/proxies --format text --only-main-content --summary
+gologin-web-access batch-extract https://example.com https://www.iana.org/help/example-domains --schema ./schema.json --summary --output ./artifacts/extract.json
+gologin-web-access search "gologin antidetect browser" --limit 5
+gologin-web-access search "gologin antidetect browser" --limit 5 --source auto
+gologin-web-access map https://example.com --limit 50 --max-depth 2
+gologin-web-access crawl https://docs.browserbase.com --format text --limit 20 --max-depth 2 --only-main-content
+gologin-web-access crawl-start https://example.com --limit 20 --max-depth 2
+gologin-web-access extract https://example.com --schema ./schema.json
+gologin-web-access change-track https://example.com --format markdown
+gologin-web-access batch-change-track https://example.com https://example.org --format text --summary --output ./artifacts/watchlist.json
+gologin-web-access parse-document ./example.pdf
+```
+### Interact With A Site
+```bash
+export GOLOGIN_CLOUD_TOKEN="gl_..."
+export GOLOGIN_DEFAULT_PROFILE_ID="profile_123"
+gologin-web-access open https://example.com
+gologin-web-access tabs
+gologin-web-access snapshot
+gologin-web-access click e3
+gologin-web-access type e5 "search terms"
+gologin-web-access wait 1500
+gologin-web-access get title
+gologin-web-access eval "document.title"
+gologin-web-access cookies --output ./cookies.json
+gologin-web-access storage-export ./storage.json
+gologin-web-access screenshot ./page.png
+gologin-web-access current
+gologin-web-access close
+```
+### Search In A Real Browser
+```bash
+export GOLOGIN_CLOUD_TOKEN="gl_..."
+gologin-web-access search-browser "gologin antidetect browser"
+gologin-web-access snapshot -i
+```
+## Structured Output And Retry Controls
+- `scrape-markdown` and `scrape-text` now default to `--source auto`: they start with Unlocker, isolate the most readable content block, and can auto-retry with Cloud Browser when the output still looks like JS-rendered docs chrome.
+- `read` is the shortest path for "look at this docs page" work: it targets the most readable content block and defaults to `--format text --source auto`.
+- `scrape-markdown` and `scrape-text` also accept `--source unlocker` and `--source browser` when you want to force one path.
+- `extract` now accepts `--source auto|unlocker|browser` and returns `renderSource`, fallback flags, and request metadata with the extracted JSON.
+- `batch-extract` reuses the same extraction path across many URLs and returns one structured result per URL, including request and fallback metadata. Add `--output <path>` to save the full array directly.
+- `scrape-json` now returns both a flat `headings` array and `headingsByLevel` buckets for `h1` through `h6`.
+- `scrape-json --fallback browser` is available for JS-heavy pages where stateless extraction returns weak heading data.
+- `scrape`, `scrape-markdown`, `scrape-text`, `scrape-json`, and `batch-scrape` accept `--retry`, `--backoff-ms`, and `--timeout-ms`.
+- `batch-scrape --only-main-content` lets markdown, text, and html batch runs use the same readable-content isolation path as `read`.
+- `crawl --only-main-content` uses the same readable-fragment extraction strategy for html, markdown, and text crawl output, but stays on the stateless unlocker path.
+- `batch-scrape --summary` prints a one-line success/failure summary to `stderr` after the JSON payload.
+- `batch-scrape --format json` now returns the same structured scrape envelope as `scrape-json`, including `renderSource`, `fallbackAttempted`, `fallbackUsed`, and `request.attemptCount/retryCount/attempts`.
+- `search` now returns `requestedLimit`, `returnedCount`, `warnings`, `cacheTtlMs`, and per-result `position`.
+- `search` may return fewer results than the requested `--limit` when the upstream SERP contains fewer valid results; inspect `returnedCount`, `warnings`, and `attempts`.
+- `change-track` now accepts `--retry`, `--backoff-ms`, and `--timeout-ms`, and JSON output includes request metadata.
+- `batch-change-track` tracks many pages in one pass and reports per-URL `new|same|changed` status plus a summary line when `--summary` is used. Add `--output <path>` to save the full watchlist result directly.
+### Reusable Workflows
+```bash
+gologin-web-access run ./examples/runbook.json --session s1
+gologin-web-access batch ./examples/runbook.json --targets ./examples/targets.json --concurrency 2
+gologin-web-access jobs
+```
+`snapshot` prints refs such as `e1`, `e2`, `e3`. Those refs stay valid until the page changes or you take a new snapshot.
+`map` and `crawl` now return `status: ok|partial|failed`. By default, partial results stay usable and do not exit non-zero. Add `--strict` when any failed page should fail the command.
+## Product Boundaries
+Gologin Web Access still has two runtime layers:
+- Web Unlocker for stateless read and extraction
+- Cloud Browser for stateful interaction
+But both are now shipped inside the same package and the same repository. One install gives you the full read layer and the full browser/session layer.
+## Development
+```bash
+npm install
+npm run build
+npm run typecheck
+npm test
+```
+## Publish
+```bash
+npm publish --access public
+```
+Prepublish checks run automatically through `prepublishOnly`.

package/dist/cli.js ADDED Viewed

@@ -0,0 +1,173 @@
+#!/usr/bin/env node
+"use strict";
+Object.defineProperty(exports, "__esModule", { value: true });
+const commander_1 = require("commander");
+const back_1 = require("./commands/back");
+const batch_1 = require("./commands/batch");
+const batchChangeTrack_1 = require("./commands/batchChangeTrack");
+const batchExtract_1 = require("./commands/batchExtract");
+const batchScrape_1 = require("./commands/batchScrape");
+const check_1 = require("./commands/check");
+const changeTrack_1 = require("./commands/changeTrack");
+const crawl_1 = require("./commands/crawl");
+const crawlErrors_1 = require("./commands/crawlErrors");
+const crawlResult_1 = require("./commands/crawlResult");
+const crawlStart_1 = require("./commands/crawlStart");
+const crawlStatus_1 = require("./commands/crawlStatus");
+const click_1 = require("./commands/click");
+const close_1 = require("./commands/close");
+const configInit_1 = require("./commands/configInit");
+const configShow_1 = require("./commands/configShow");
+const cookies_1 = require("./commands/cookies");
+const cookiesClear_1 = require("./commands/cookiesClear");
+const cookiesImport_1 = require("./commands/cookiesImport");
+const current_1 = require("./commands/current");
+const dblclick_1 = require("./commands/dblclick");
+const eval_1 = require("./commands/eval");
+const extract_1 = require("./commands/extract");
+const fill_1 = require("./commands/fill");
+const find_1 = require("./commands/find");
+const focus_1 = require("./commands/focus");
+const forward_1 = require("./commands/forward");
+const get_1 = require("./commands/get");
+const hover_1 = require("./commands/hover");
+const jobs_1 = require("./commands/jobs");
+const map_1 = require("./commands/map");
+const open_1 = require("./commands/open");
+const parseDocument_1 = require("./commands/parseDocument");
+const pdf_1 = require("./commands/pdf");
+const press_1 = require("./commands/press");
+const reload_1 = require("./commands/reload");
+const read_1 = require("./commands/read");
+const run_1 = require("./commands/run");
+const scrape_1 = require("./commands/scrape");
+const scrapeJson_1 = require("./commands/scrapeJson");
+const scrapeMarkdown_1 = require("./commands/scrapeMarkdown");
+const scrapeScreenshot_1 = require("./commands/scrapeScreenshot");
+const scrapeText_1 = require("./commands/scrapeText");
+const scroll_1 = require("./commands/scroll");
+const scrollIntoView_1 = require("./commands/scrollIntoView");
+const searchBrowser_1 = require("./commands/searchBrowser");
+const search_1 = require("./commands/search");
+const select_1 = require("./commands/select");
+const screenshot_1 = require("./commands/screenshot");
+const sessions_1 = require("./commands/sessions");
+const snapshot_1 = require("./commands/snapshot");
+const storageClear_1 = require("./commands/storageClear");
+const storageExport_1 = require("./commands/storageExport");
+const storageImport_1 = require("./commands/storageImport");
+const tabClose_1 = require("./commands/tabClose");
+const tabFocus_1 = require("./commands/tabFocus");
+const tabOpen_1 = require("./commands/tabOpen");
+const tabs_1 = require("./commands/tabs");
+const uncheck_1 = require("./commands/uncheck");
+const type_1 = require("./commands/type");
+const upload_1 = require("./commands/upload");
+const wait_1 = require("./commands/wait");
+const doctor_1 = require("./doctor");
+const errors_1 = require("./lib/errors");
+const output_1 = require("./lib/output");
+const CLI_VERSION = "0.3.0";
+async function main() {
+    const program = new commander_1.Command();
+    program
+        .name("gologin-web-access")
+        .description("Read and interact with the web using Gologin Web Unlocker and Cloud Browser.")
+        .version(CLI_VERSION)
+        .showHelpAfterError()
+        .showSuggestionAfterError();
+    program.addCommand((0, scrape_1.buildScrapeCommand)());
+    program.addCommand((0, read_1.buildReadCommand)());
+    program.addCommand((0, scrapeMarkdown_1.buildScrapeMarkdownCommand)());
+    program.addCommand((0, scrapeText_1.buildScrapeTextCommand)());
+    program.addCommand((0, scrapeJson_1.buildScrapeJsonCommand)());
+    program.addCommand((0, batchScrape_1.buildBatchScrapeCommand)());
+    program.addCommand((0, batchExtract_1.buildBatchExtractCommand)());
+    program.addCommand((0, search_1.buildSearchCommand)());
+    program.addCommand((0, map_1.buildMapCommand)());
+    program.addCommand((0, crawl_1.buildCrawlCommand)());
+    program.addCommand((0, crawlStart_1.buildCrawlStartCommand)());
+    program.addCommand((0, crawlStatus_1.buildCrawlStatusCommand)());
+    program.addCommand((0, crawlResult_1.buildCrawlResultCommand)());
+    program.addCommand((0, crawlErrors_1.buildCrawlErrorsCommand)());
+    program.addCommand((0, extract_1.buildExtractCommand)());
+    program.addCommand((0, changeTrack_1.buildChangeTrackCommand)());
+    program.addCommand((0, batchChangeTrack_1.buildBatchChangeTrackCommand)());
+    program.addCommand((0, parseDocument_1.buildParseDocumentCommand)());
+    program.addCommand((0, run_1.buildRunCommand)());
+    program.addCommand((0, batch_1.buildBatchCommand)());
+    program.addCommand((0, jobs_1.buildJobsCommand)());
+    program.addCommand((0, jobs_1.buildJobCommand)());
+    program.addCommand((0, open_1.buildOpenCommand)());
+    program.addCommand((0, searchBrowser_1.buildSearchBrowserCommand)());
+    program.addCommand((0, scrapeScreenshot_1.buildScrapeScreenshotCommand)());
+    program.addCommand((0, tabs_1.buildTabsCommand)());
+    program.addCommand((0, tabOpen_1.buildTabOpenCommand)());
+    program.addCommand((0, tabFocus_1.buildTabFocusCommand)());
+    program.addCommand((0, tabClose_1.buildTabCloseCommand)());
+    program.addCommand((0, snapshot_1.buildSnapshotCommand)());
+    program.addCommand((0, click_1.buildClickCommand)());
+    program.addCommand((0, dblclick_1.buildDoubleClickCommand)());
+    program.addCommand((0, focus_1.buildFocusCommand)());
+    program.addCommand((0, type_1.buildTypeCommand)());
+    program.addCommand((0, fill_1.buildFillCommand)());
+    program.addCommand((0, hover_1.buildHoverCommand)());
+    program.addCommand((0, select_1.buildSelectCommand)());
+    program.addCommand((0, check_1.buildCheckCommand)());
+    program.addCommand((0, uncheck_1.buildUncheckCommand)());
+    program.addCommand((0, press_1.buildPressCommand)());
+    program.addCommand((0, scroll_1.buildScrollCommand)());
+    program.addCommand((0, scrollIntoView_1.buildScrollIntoViewCommand)());
+    program.addCommand((0, wait_1.buildWaitCommand)());
+    program.addCommand((0, get_1.buildGetCommand)());
+    program.addCommand((0, back_1.buildBackCommand)());
+    program.addCommand((0, forward_1.buildForwardCommand)());
+    program.addCommand((0, reload_1.buildReloadCommand)());
+    program.addCommand((0, find_1.buildFindCommand)());
+    program.addCommand((0, cookies_1.buildCookiesCommand)());
+    program.addCommand((0, cookiesImport_1.buildCookiesImportCommand)());
+    program.addCommand((0, cookiesClear_1.buildCookiesClearCommand)());
+    program.addCommand((0, storageExport_1.buildStorageExportCommand)());
+    program.addCommand((0, storageImport_1.buildStorageImportCommand)());
+    program.addCommand((0, storageClear_1.buildStorageClearCommand)());
+    program.addCommand((0, eval_1.buildEvalCommand)());
+    program.addCommand((0, upload_1.buildUploadCommand)());
+    program.addCommand((0, pdf_1.buildPdfCommand)());
+    program.addCommand((0, screenshot_1.buildScreenshotCommand)());
+    program.addCommand((0, close_1.buildCloseCommand)());
+    program.addCommand((0, sessions_1.buildSessionsCommand)());
+    program.addCommand((0, current_1.buildCurrentCommand)());
+    program
+        .command("doctor")
+        .description("Inspect both recommended keys, profile configuration, and local daemon health.")
+        .option("--json", "Print JSON output")
+        .action(async (options) => {
+        await (0, doctor_1.runDoctor)(options);
+    });
+    program
+        .command("version")
+        .description("Print the CLI version.")
+        .action(() => {
+        (0, output_1.printText)(CLI_VERSION);
+    });
+    const configGroup = program.command("config").description("Inspect or initialize CLI configuration.");
+    configGroup.addCommand((0, configShow_1.buildConfigShowCommand)());
+    configGroup.addCommand((0, configInit_1.buildConfigInitCommand)());
+    program.addHelpText("after", `
+Command groups:
+  Scraping: gologin-web-access scrape|read|scrape-markdown|scrape-text|scrape-json|batch-scrape|batch-extract|search|map|crawl|crawl-start|crawl-status|crawl-result|crawl-errors|extract|change-track|batch-change-track|parse-document
+  Browser:  gologin-web-access open|search-browser|scrape-screenshot|tabs|tabopen|tabfocus|tabclose|snapshot|click|dblclick|focus|type|fill|hover|select|check|uncheck|press|scroll|scrollintoview|wait|get|back|forward|reload|find|cookies|cookies-import|cookies-clear|storage-export|storage-import|storage-clear|eval|upload|pdf|screenshot|close|sessions|current
+  Agent:    gologin-web-access run|batch|jobs|job
+Key model:
+  ${"GOLOGIN_WEB_UNLOCKER_API_KEY"} powers scraping commands.
+  ${"GOLOGIN_CLOUD_TOKEN"} powers browser commands.
+  Recommended setup: configure both keys up front, even if the current task only needs one path.
+`);
+    await program.parseAsync(process.argv);
+}
+void main().catch((error) => {
+    const cliError = (0, errors_1.toCliError)(error);
+    (0, output_1.printError)(cliError);
+    process.exit(cliError.exitCode);
+});

package/dist/commands/back.js ADDED Viewed

@@ -0,0 +1,13 @@
+"use strict";
+Object.defineProperty(exports, "__esModule", { value: true });
+exports.buildBackCommand = buildBackCommand;
+const commander_1 = require("commander");
+const shared_1 = require("./shared");
+function buildBackCommand() {
+    const command = new commander_1.Command("back")
+        .description("Navigate back in the active Cloud Browser tab history.")
+        .action(async (options) => {
+        await (0, shared_1.runBrowserCommand)(["back"], { session: options.session });
+    });
+    return (0, shared_1.addSessionOption)(command);
+}

package/dist/commands/batch.js ADDED Viewed

@@ -0,0 +1,81 @@
+"use strict";
+var __importDefault = (this && this.__importDefault) || function (mod) {
+    return (mod && mod.__esModule) ? mod : { "default": mod };
+};
+Object.defineProperty(exports, "__esModule", { value: true });
+exports.buildBatchCommand = buildBatchCommand;
+const path_1 = __importDefault(require("path"));
+const commander_1 = require("commander");
+const config_1 = require("../config");
+const jobRegistry_1 = require("../lib/jobRegistry");
+const runbooks_1 = require("../lib/runbooks");
+const output_1 = require("../lib/output");
+function buildBatchCommand() {
+    return new commander_1.Command("batch")
+        .description("Execute a runbook across multiple profile/session targets.")
+        .argument("<runbookPath>", "Path to the runbook JSON file")
+        .requiredOption("--targets <path>", "Path to a batch targets JSON file")
+        .option("--concurrency <count>", "Maximum number of targets to run in parallel")
+        .option("--vars <path>", "Path to a JSON variables file")
+        .option("--name <name>", "Override the stored job name")
+        .option("--continue-on-error", "Continue after failed steps inside each target")
+        .option("--json", "Print the final job record as JSON")
+        .action(async (runbookPath, options) => {
+        const config = await (0, config_1.loadConfig)();
+        const runbook = (0, runbooks_1.loadRunbookDefinition)(process.cwd(), runbookPath);
+        const batch = (0, runbooks_1.loadBatchDefinition)(process.cwd(), options.targets);
+        const variables = options.vars ? (0, runbooks_1.loadVariablesFile)(process.cwd(), options.vars) : undefined;
+        const absoluteRunbookPath = path_1.default.resolve(runbookPath);
+        const job = await (0, jobRegistry_1.createJob)(config, {
+            kind: "batch",
+            name: options.name ?? path_1.default.basename(absoluteRunbookPath, path_1.default.extname(absoluteRunbookPath)),
+            cwd: process.cwd(),
+            args: process.argv.slice(2),
+            metadata: {
+                runbookPath: absoluteRunbookPath,
+                targetsPath: path_1.default.resolve(options.targets)
+            }
+        });
+        await (0, jobRegistry_1.markJobRunning)(config, job.jobId);
+        try {
+            const results = await (0, runbooks_1.executeBatch)(runbook, batch, {
+                cwd: process.cwd(),
+                concurrency: options.concurrency ? Number(options.concurrency) : undefined,
+                variables,
+                continueOnError: options.continueOnError === true
+            });
+            const failed = results.filter((target) => target.status === "failed").length;
+            const output = results
+                .map((target) => {
+                const lines = [`target=${target.name} status=${target.status} durationMs=${target.durationMs}`];
+                for (const step of target.steps) {
+                    lines.push(`  step=${step.command} status=${step.status} durationMs=${step.durationMs}`);
+                }
+                return lines.join("\n");
+            })
+                .join("\n");
+            const record = await (0, jobRegistry_1.finalizeJob)(config, job.jobId, {
+                status: failed > 0 ? "partial" : "ok",
+                output,
+                result: results
+            });
+            if (options.json) {
+                (0, output_1.printJson)(record);
+                return;
+            }
+            (0, output_1.printText)(output);
+        }
+        catch (error) {
+            const record = await (0, jobRegistry_1.finalizeJob)(config, job.jobId, {
+                status: "failed",
+                error: error instanceof Error ? error.message : String(error)
+            });
+            if (options.json) {
+                (0, output_1.printJson)(record);
+                process.exitCode = 1;
+                return;
+            }
+            throw error;
+        }
+    });
+}