gologin-web-access 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (141) hide show
  1. package/CHANGELOG.md +19 -0
  2. package/LICENSE +21 -0
  3. package/README.md +344 -0
  4. package/dist/cli.js +173 -0
  5. package/dist/commands/back.js +13 -0
  6. package/dist/commands/batch.js +81 -0
  7. package/dist/commands/batchChangeTrack.js +99 -0
  8. package/dist/commands/batchExtract.js +97 -0
  9. package/dist/commands/batchScrape.js +140 -0
  10. package/dist/commands/changeTrack.js +65 -0
  11. package/dist/commands/check.js +14 -0
  12. package/dist/commands/click.js +14 -0
  13. package/dist/commands/close.js +19 -0
  14. package/dist/commands/configInit.js +77 -0
  15. package/dist/commands/configShow.js +23 -0
  16. package/dist/commands/cookies.js +22 -0
  17. package/dist/commands/cookiesClear.js +13 -0
  18. package/dist/commands/cookiesImport.js +14 -0
  19. package/dist/commands/crawl.js +71 -0
  20. package/dist/commands/crawlErrors.js +20 -0
  21. package/dist/commands/crawlResult.js +27 -0
  22. package/dist/commands/crawlStart.js +56 -0
  23. package/dist/commands/crawlStatus.js +25 -0
  24. package/dist/commands/current.js +14 -0
  25. package/dist/commands/dblclick.js +14 -0
  26. package/dist/commands/eval.js +20 -0
  27. package/dist/commands/extract.js +44 -0
  28. package/dist/commands/fill.js +15 -0
  29. package/dist/commands/find.js +16 -0
  30. package/dist/commands/focus.js +14 -0
  31. package/dist/commands/forward.js +13 -0
  32. package/dist/commands/get.js +15 -0
  33. package/dist/commands/hover.js +14 -0
  34. package/dist/commands/jobs.js +47 -0
  35. package/dist/commands/map.js +61 -0
  36. package/dist/commands/open.js +22 -0
  37. package/dist/commands/parseDocument.js +34 -0
  38. package/dist/commands/pdf.js +14 -0
  39. package/dist/commands/press.js +15 -0
  40. package/dist/commands/read.js +51 -0
  41. package/dist/commands/reload.js +13 -0
  42. package/dist/commands/run.js +76 -0
  43. package/dist/commands/scrape.js +19 -0
  44. package/dist/commands/scrapeJson.js +24 -0
  45. package/dist/commands/scrapeMarkdown.js +37 -0
  46. package/dist/commands/scrapeScreenshot.js +65 -0
  47. package/dist/commands/scrapeText.js +37 -0
  48. package/dist/commands/screenshot.js +23 -0
  49. package/dist/commands/scroll.js +23 -0
  50. package/dist/commands/scrollIntoView.js +14 -0
  51. package/dist/commands/search.js +39 -0
  52. package/dist/commands/searchBrowser.js +28 -0
  53. package/dist/commands/select.js +15 -0
  54. package/dist/commands/sessions.js +14 -0
  55. package/dist/commands/shared.js +102 -0
  56. package/dist/commands/snapshot.js +18 -0
  57. package/dist/commands/storageClear.js +18 -0
  58. package/dist/commands/storageExport.js +26 -0
  59. package/dist/commands/storageImport.js +23 -0
  60. package/dist/commands/tabClose.js +18 -0
  61. package/dist/commands/tabFocus.js +15 -0
  62. package/dist/commands/tabOpen.js +19 -0
  63. package/dist/commands/tabs.js +13 -0
  64. package/dist/commands/type.js +15 -0
  65. package/dist/commands/uncheck.js +14 -0
  66. package/dist/commands/upload.js +15 -0
  67. package/dist/commands/wait.js +27 -0
  68. package/dist/config.js +260 -0
  69. package/dist/doctor.js +86 -0
  70. package/dist/internal-agent/cli.js +336 -0
  71. package/dist/internal-agent/commands/back.js +12 -0
  72. package/dist/internal-agent/commands/check.js +17 -0
  73. package/dist/internal-agent/commands/click.js +17 -0
  74. package/dist/internal-agent/commands/close.js +12 -0
  75. package/dist/internal-agent/commands/cookies.js +23 -0
  76. package/dist/internal-agent/commands/cookiesClear.js +12 -0
  77. package/dist/internal-agent/commands/cookiesImport.js +18 -0
  78. package/dist/internal-agent/commands/current.js +9 -0
  79. package/dist/internal-agent/commands/dblclick.js +17 -0
  80. package/dist/internal-agent/commands/doctor.js +53 -0
  81. package/dist/internal-agent/commands/eval.js +30 -0
  82. package/dist/internal-agent/commands/fill.js +18 -0
  83. package/dist/internal-agent/commands/find.js +86 -0
  84. package/dist/internal-agent/commands/focus.js +17 -0
  85. package/dist/internal-agent/commands/forward.js +12 -0
  86. package/dist/internal-agent/commands/get.js +19 -0
  87. package/dist/internal-agent/commands/hover.js +17 -0
  88. package/dist/internal-agent/commands/open.js +67 -0
  89. package/dist/internal-agent/commands/pdf.js +18 -0
  90. package/dist/internal-agent/commands/press.js +19 -0
  91. package/dist/internal-agent/commands/reload.js +12 -0
  92. package/dist/internal-agent/commands/screenshot.js +22 -0
  93. package/dist/internal-agent/commands/scroll.js +25 -0
  94. package/dist/internal-agent/commands/scrollIntoView.js +17 -0
  95. package/dist/internal-agent/commands/select.js +18 -0
  96. package/dist/internal-agent/commands/sessions.js +15 -0
  97. package/dist/internal-agent/commands/shared.js +51 -0
  98. package/dist/internal-agent/commands/snapshot.js +16 -0
  99. package/dist/internal-agent/commands/storageClear.js +13 -0
  100. package/dist/internal-agent/commands/storageExport.js +24 -0
  101. package/dist/internal-agent/commands/storageImport.js +20 -0
  102. package/dist/internal-agent/commands/tabClose.js +21 -0
  103. package/dist/internal-agent/commands/tabFocus.js +21 -0
  104. package/dist/internal-agent/commands/tabOpen.js +13 -0
  105. package/dist/internal-agent/commands/tabs.js +17 -0
  106. package/dist/internal-agent/commands/type.js +18 -0
  107. package/dist/internal-agent/commands/uncheck.js +17 -0
  108. package/dist/internal-agent/commands/upload.js +18 -0
  109. package/dist/internal-agent/commands/wait.js +41 -0
  110. package/dist/internal-agent/daemon/browser.js +818 -0
  111. package/dist/internal-agent/daemon/refStore.js +26 -0
  112. package/dist/internal-agent/daemon/server.js +330 -0
  113. package/dist/internal-agent/daemon/sessionManager.js +684 -0
  114. package/dist/internal-agent/daemon/snapshot.js +285 -0
  115. package/dist/internal-agent/lib/config.js +59 -0
  116. package/dist/internal-agent/lib/daemon.js +300 -0
  117. package/dist/internal-agent/lib/errors.js +63 -0
  118. package/dist/internal-agent/lib/types.js +2 -0
  119. package/dist/internal-agent/lib/utils.js +165 -0
  120. package/dist/jobRunner.js +56 -0
  121. package/dist/lib/agentCli.js +158 -0
  122. package/dist/lib/browserRead.js +125 -0
  123. package/dist/lib/browserStructured.js +77 -0
  124. package/dist/lib/changeTracking.js +117 -0
  125. package/dist/lib/cloudApi.js +41 -0
  126. package/dist/lib/concurrency.js +15 -0
  127. package/dist/lib/crawl.js +313 -0
  128. package/dist/lib/document.js +170 -0
  129. package/dist/lib/errors.js +55 -0
  130. package/dist/lib/extract.js +65 -0
  131. package/dist/lib/extractRunner.js +22 -0
  132. package/dist/lib/jobRegistry.js +164 -0
  133. package/dist/lib/output.js +122 -0
  134. package/dist/lib/readSource.js +297 -0
  135. package/dist/lib/runbooks.js +193 -0
  136. package/dist/lib/search.js +727 -0
  137. package/dist/lib/selfCli.js +136 -0
  138. package/dist/lib/structuredScrape.js +83 -0
  139. package/dist/lib/types.js +2 -0
  140. package/dist/lib/unlocker.js +383 -0
  141. package/package.json +67 -0
package/CHANGELOG.md ADDED
@@ -0,0 +1,19 @@
1
+ # Changelog
2
+
3
+ ## Unreleased
4
+
5
+ - browser automation is now embedded directly in `gologin-web-access`, so one repo and one install contains both Web Unlocker and Cloud Browser flows
6
+ - doctor now reports the embedded browser runtime source and version
7
+
8
+ ## 0.1.0 - 2026-03-10
9
+
10
+ Initial public release of Gologin Web Access.
11
+
12
+ Highlights:
13
+
14
+ - Unified CLI entry point for Gologin Web Unlocker and Gologin Cloud Browser workflows
15
+ - Scraping commands: `scrape`, `scrape-markdown`, `scrape-text`, `scrape-json`, `batch-scrape`
16
+ - Browser commands: `open`, `snapshot`, `click`, `type`, `screenshot`, `close`, `sessions`, `current`
17
+ - Clear two-key configuration model with `GOLOGIN_WEB_UNLOCKER_API_KEY` and `GOLOGIN_CLOUD_TOKEN`
18
+ - `doctor`, `config show`, and `config init` to reduce setup friction
19
+ - Compatibility support for legacy env names used by existing Gologin tools
package/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Gologin
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
package/README.md ADDED
@@ -0,0 +1,344 @@
1
+ # Gologin Web Access
2
+
3
+ Gologin Web Access lets developers and AI agents read and interact with the web using Gologin Web Unlocker and Gologin Cloud Browser.
4
+
5
+ This is a unified web access layer, not just a scraping tool and not just a browser automation tool.
6
+
7
+ - Read the web through stateless extraction APIs
8
+ - Interact with the web through stateful cloud browser sessions
9
+ - Carry Gologin’s browser-side strengths into those workflows: profiles, identity-aware browser sessions, cloud browser infrastructure, and Gologin’s profile/proxy stack when you run against a configured profile
10
+
11
+ Package name and binary are the same:
12
+
13
+ - npm package: `gologin-web-access`
14
+ - command: `gologin-web-access`
15
+
16
+ ## What It Unifies
17
+
18
+ Gologin Web Access combines two existing product surfaces behind one CLI:
19
+
20
+ - Web Unlocker
21
+ Stateless read and extraction. Best when you want page content quickly without maintaining a browser session.
22
+ - Cloud Browser
23
+ Stateful interaction. Best when you need navigation, clicks, typing, screenshots, or multi-step flows that persist across commands.
24
+
25
+ The point of the unified CLI is that both modes live in one product with one command surface and one config model, while still being honest about which credential powers which workflow. Recommended setup is still to configure both credentials up front so agents do not stop to ask for missing keys mid-task.
26
+
27
+ ## Command Groups
28
+
29
+ ### Scraping / Read
30
+
31
+ These commands use Gologin Web Unlocker:
32
+
33
+ - `gologin-web-access scrape <url>`
34
+ - `gologin-web-access read <url> [--format text|markdown|html] [--source auto|unlocker|browser]`
35
+ - `gologin-web-access scrape-markdown <url> [--source auto|unlocker|browser]`
36
+ - `gologin-web-access scrape-text <url> [--source auto|unlocker|browser]`
37
+ - `gologin-web-access scrape-json <url> [--fallback none|browser]`
38
+ - `gologin-web-access batch-scrape <url...> [--format html|markdown|text|json] [--fallback none|browser] [--source auto|unlocker|browser] [--only-main-content] [--retry <n>] [--backoff-ms <ms>] [--summary]`
39
+ - `gologin-web-access batch-extract <url...> --schema <schema.json> [--source auto|unlocker|browser] [--retry <n>] [--backoff-ms <ms>] [--summary] [--output <path>]`
40
+ - `gologin-web-access search <query> [--limit <n>] [--country <cc>] [--language <lang>] [--source auto|unlocker|browser]`
41
+ - `gologin-web-access map <url> [--limit <n>] [--max-depth <n>] [--concurrency <n>] [--strict]`
42
+ - `gologin-web-access crawl <url> [--format html|markdown|text|json] [--limit <n>] [--max-depth <n>] [--only-main-content] [--strict]`
43
+ - `gologin-web-access crawl-start <url> ...`
44
+ - `gologin-web-access crawl-status <jobId>`
45
+ - `gologin-web-access crawl-result <jobId>`
46
+ - `gologin-web-access crawl-errors <jobId>`
47
+ - `gologin-web-access extract <url> --schema <schema.json> [--source auto|unlocker|browser]`
48
+ - `gologin-web-access change-track <url> [--format html|markdown|text|json]`
49
+ - `gologin-web-access batch-change-track <url...> [--format html|markdown|text|json] [--retry <n>] [--backoff-ms <ms>] [--summary] [--output <path>]`
50
+ - `gologin-web-access parse-document <url-or-path>`
51
+ - `gologin-web-access run <runbook.json>`
52
+ - `gologin-web-access batch <runbook.json> --targets <targets.json>`
53
+ - `gologin-web-access jobs`
54
+ - `gologin-web-access job <jobId>`
55
+
56
+ Use these when you want stateless page retrieval or extracted content.
57
+
58
+ ### Browser / Interact
59
+
60
+ These commands use Gologin Cloud Browser through the local daemon-backed agent layer:
61
+
62
+ - `gologin-web-access open <url> [--profile <id>]`
63
+ - `gologin-web-access search-browser <query> [--profile <id>]`
64
+ - `gologin-web-access scrape-screenshot <url> [path] [--profile <id>]`
65
+ - `gologin-web-access tabs`
66
+ - `gologin-web-access tabopen [url]`
67
+ - `gologin-web-access tabfocus <index>`
68
+ - `gologin-web-access tabclose [index]`
69
+ - `gologin-web-access snapshot`
70
+ - `gologin-web-access click <ref>`
71
+ - `gologin-web-access dblclick <ref>`
72
+ - `gologin-web-access focus <ref>`
73
+ - `gologin-web-access type <ref> <text>`
74
+ - `gologin-web-access fill <ref> <text>`
75
+ - `gologin-web-access hover <ref>`
76
+ - `gologin-web-access select <ref> <value>`
77
+ - `gologin-web-access check <ref>`
78
+ - `gologin-web-access uncheck <ref>`
79
+ - `gologin-web-access press <key> [target]`
80
+ - `gologin-web-access scroll <direction> [pixels]`
81
+ - `gologin-web-access scrollintoview <ref>`
82
+ - `gologin-web-access wait <target|ms>`
83
+ - `gologin-web-access get <kind> [target]`
84
+ - `gologin-web-access back`
85
+ - `gologin-web-access forward`
86
+ - `gologin-web-access reload`
87
+ - `gologin-web-access find ...`
88
+ - `gologin-web-access cookies [--output <path>] [--json]`
89
+ - `gologin-web-access cookies-import <cookies.json>`
90
+ - `gologin-web-access cookies-clear`
91
+ - `gologin-web-access storage-export [path] [--scope <local|session|both>]`
92
+ - `gologin-web-access storage-import <storage.json> [--scope <local|session|both>] [--clear]`
93
+ - `gologin-web-access storage-clear [--scope <local|session|both>]`
94
+ - `gologin-web-access eval <expression>`
95
+ - `gologin-web-access upload <ref> <file...>`
96
+ - `gologin-web-access pdf <path>`
97
+ - `gologin-web-access screenshot <path>`
98
+ - `gologin-web-access close`
99
+ - `gologin-web-access sessions`
100
+ - `gologin-web-access current`
101
+
102
+ Use these when you need state, interaction, or multi-step browser flows.
103
+
104
+ ## When To Use `scrape` vs `browser`
105
+
106
+ - Use `scrape` commands when you need page content, extracted text, markdown, or simple structured output.
107
+ - Use `search` when you need web discovery or SERP results before deciding what to scrape. It now tries multiple search paths automatically, validates that the response is a real SERP, and reuses a short local cache for repeated queries.
108
+ - Use `map` when you need internal link discovery or a site inventory.
109
+ - Use `crawl` when you need multi-page read-only extraction across a site.
110
+ - Use `crawl-start` plus `crawl-status` and `crawl-result` when the crawl should run detached.
111
+ - Use `extract` when you want deterministic structured output from CSS selectors rather than generic page summaries.
112
+ - Use `batch-extract` when the same selector schema should run across many known URLs.
113
+ - Use `change-track` when you want local change detection against the last stored snapshot of a page.
114
+ - Use `batch-change-track` when you want to monitor a watchlist of pages in one pass.
115
+ - Use `parse-document` when the source is a PDF, DOCX, XLSX, HTML, or local document path instead of a normal HTML page.
116
+ - Use browser commands when you need clicks, forms, navigation, screenshots, sessions, or logged-in/profile-backed flows.
117
+ - Use browser commands when you need ref-based interaction, uploads, PDFs, semantic find flows, keyboard control, or a browser-visible search journey.
118
+ - Use `run` and `batch` when you want reusable workflows or multi-target execution on top of the CLI surface.
119
+ - Use `scrape` when stateless speed matters more than interaction.
120
+ - Use browser commands when the site requires state, continuity, or real browser behavior.
121
+
122
+ ## Why This Is Not Just A Read-Only Crawler
123
+
124
+ The read layer matters, but this product is broader than a Firecrawl-like “read the page” use case.
125
+
126
+ What makes Gologin Web Access different is the ability to move from stateless extraction into stateful browser interaction without leaving the CLI:
127
+
128
+ - Browser sessions can run through Gologin Cloud Browser instead of a local one-off browser process.
129
+ - Browser workflows can use a Gologin profile via `--profile` or `GOLOGIN_DEFAULT_PROFILE_ID`.
130
+ - That gives the CLI access to Gologin’s identity/profile model and session layer, instead of stopping at raw fetches.
131
+ - When a configured profile carries proxy settings, those browser-side capabilities come from the Gologin browser stack rather than from a separate scraping-only pipeline.
132
+
133
+ This README only documents what the current CLI actually implements. It does not claim extra browser capabilities beyond the commands listed above.
134
+
135
+ ## Command Structure Choice
136
+
137
+ The current CLI keeps commands flat:
138
+
139
+ - `gologin-web-access scrape ...`
140
+ - `gologin-web-access scrape-markdown ...`
141
+ - `gologin-web-access open ...`
142
+ - `gologin-web-access snapshot`
143
+
144
+ This is clearer right now than introducing a `browser` namespace such as `gologin-web-access browser open`.
145
+
146
+ Why:
147
+
148
+ - The command surface is still compact.
149
+ - Flat commands are shorter for both humans and AI agents.
150
+ - The read vs interact split is already explicit through the command names and documentation.
151
+
152
+ If the browser surface grows substantially later, a nested namespace may become worth adding. For the current product, flat commands are simpler.
153
+
154
+ ## Credentials And Config
155
+
156
+ This CLI uses two different Gologin credentials on purpose, because the underlying products are different.
157
+
158
+ - `GOLOGIN_WEB_UNLOCKER_API_KEY`
159
+ Required for Scraping / Read commands.
160
+ - `GOLOGIN_CLOUD_TOKEN`
161
+ Required for `gologin-web-access open` and for profile validation in `gologin-web-access doctor`.
162
+ - `GOLOGIN_DEFAULT_PROFILE_ID`
163
+ Optional default profile for browser flows.
164
+ - `GOLOGIN_DAEMON_PORT`
165
+ Optional local daemon port for browser workflows.
166
+
167
+ Recommended full setup for agents is to configure both `GOLOGIN_WEB_UNLOCKER_API_KEY` and `GOLOGIN_CLOUD_TOKEN` before starting work, even if the current task looks read-only or browser-only.
168
+
169
+ Missing-key errors are command-group specific. Example:
170
+
171
+ `Missing GOLOGIN_WEB_UNLOCKER_API_KEY. This is required for scraping commands like \`gologin-web-access scrape\`.`
172
+
173
+ Environment variables are the primary configuration mechanism:
174
+
175
+ ```bash
176
+ export GOLOGIN_WEB_UNLOCKER_API_KEY="wu_..."
177
+ export GOLOGIN_CLOUD_TOKEN="gl_..."
178
+ export GOLOGIN_DEFAULT_PROFILE_ID="profile_123"
179
+ export GOLOGIN_DAEMON_PORT="4590"
180
+ ```
181
+
182
+ If you do not want to `source ~/.zprofile` in every shell, run:
183
+
184
+ ```bash
185
+ gologin-web-access config init
186
+ ```
187
+
188
+ Useful variants:
189
+
190
+ ```bash
191
+ gologin-web-access config init --web-unlocker-api-key wu_... --cloud-token gl_...
192
+ gologin-web-access config init --web-unlocker-key wu_... --cloud-token gl_...
193
+ ```
194
+
195
+ That writes `~/.gologin-web-access/config.json` once and the CLI will keep reading it on later runs.
196
+ By default `config init` also validates both keys immediately so you find bad credentials during setup instead of on the first real request. Use `--no-validate` only when you intentionally want an offline write.
197
+
198
+ You can also write a minimal config file at `~/.gologin-web-access/config.json`:
199
+
200
+ ```json
201
+ {
202
+ "webUnlockerApiKey": "wu_...",
203
+ "cloudToken": "gl_...",
204
+ "defaultProfileId": "profile_123",
205
+ "daemonPort": 4590
206
+ }
207
+ ```
208
+
209
+ Gologin Web Access will also read the older path `~/.gologin-web/config.json` if it already exists, but new config writes go to `~/.gologin-web-access/config.json`.
210
+
211
+ Backward-compatible aliases are also accepted for existing setups:
212
+
213
+ - `GOLOGIN_WEBUNLOCKER_API_KEY`
214
+ - `GOLOGIN_TOKEN`
215
+ - `GOLOGIN_PROFILE_ID`
216
+
217
+ Useful config commands:
218
+
219
+ ```bash
220
+ gologin-web-access version
221
+ gologin-web-access config init
222
+ gologin-web-access config show
223
+ gologin-web-access doctor
224
+ ```
225
+
226
+ `doctor` reports the embedded Cloud Browser runtime bundled inside this package, whether the local daemon is reachable, and whether the recommended two-key setup is complete.
227
+
228
+ ## Install
229
+
230
+ ```bash
231
+ npm install -g gologin-web-access
232
+ ```
233
+
234
+ ## Quickstart
235
+
236
+ ### Read A Page
237
+
238
+ ```bash
239
+ export GOLOGIN_WEB_UNLOCKER_API_KEY="wu_..."
240
+
241
+ gologin-web-access scrape https://example.com
242
+ gologin-web-access read https://docs.browserbase.com/features/stealth-mode
243
+ gologin-web-access scrape-markdown https://example.com/docs
244
+ gologin-web-access scrape-text https://docs.browserbase.com/features/stealth-mode
245
+ gologin-web-access scrape-json https://example.com --fallback browser
246
+ gologin-web-access batch-scrape https://docs.browserbase.com/features/contexts https://docs.browserbase.com/features/proxies --format text --only-main-content --summary
247
+ gologin-web-access batch-extract https://example.com https://www.iana.org/help/example-domains --schema ./schema.json --summary --output ./artifacts/extract.json
248
+ gologin-web-access search "gologin antidetect browser" --limit 5
249
+ gologin-web-access search "gologin antidetect browser" --limit 5 --source auto
250
+ gologin-web-access map https://example.com --limit 50 --max-depth 2
251
+ gologin-web-access crawl https://docs.browserbase.com --format text --limit 20 --max-depth 2 --only-main-content
252
+ gologin-web-access crawl-start https://example.com --limit 20 --max-depth 2
253
+ gologin-web-access extract https://example.com --schema ./schema.json
254
+ gologin-web-access change-track https://example.com --format markdown
255
+ gologin-web-access batch-change-track https://example.com https://example.org --format text --summary --output ./artifacts/watchlist.json
256
+ gologin-web-access parse-document ./example.pdf
257
+ ```
258
+
259
+ ### Interact With A Site
260
+
261
+ ```bash
262
+ export GOLOGIN_CLOUD_TOKEN="gl_..."
263
+ export GOLOGIN_DEFAULT_PROFILE_ID="profile_123"
264
+
265
+ gologin-web-access open https://example.com
266
+ gologin-web-access tabs
267
+ gologin-web-access snapshot
268
+ gologin-web-access click e3
269
+ gologin-web-access type e5 "search terms"
270
+ gologin-web-access wait 1500
271
+ gologin-web-access get title
272
+ gologin-web-access eval "document.title"
273
+ gologin-web-access cookies --output ./cookies.json
274
+ gologin-web-access storage-export ./storage.json
275
+ gologin-web-access screenshot ./page.png
276
+ gologin-web-access current
277
+ gologin-web-access close
278
+ ```
279
+
280
+ ### Search In A Real Browser
281
+
282
+ ```bash
283
+ export GOLOGIN_CLOUD_TOKEN="gl_..."
284
+
285
+ gologin-web-access search-browser "gologin antidetect browser"
286
+ gologin-web-access snapshot -i
287
+ ```
288
+
289
+ ## Structured Output And Retry Controls
290
+
291
+ - `scrape-markdown` and `scrape-text` now default to `--source auto`: they start with Unlocker, isolate the most readable content block, and can auto-retry with Cloud Browser when the output still looks like JS-rendered docs chrome.
292
+ - `read` is the shortest path for "look at this docs page" work: it targets the most readable content block and defaults to `--format text --source auto`.
293
+ - `scrape-markdown` and `scrape-text` also accept `--source unlocker` and `--source browser` when you want to force one path.
294
+ - `extract` now accepts `--source auto|unlocker|browser` and returns `renderSource`, fallback flags, and request metadata with the extracted JSON.
295
+ - `batch-extract` reuses the same extraction path across many URLs and returns one structured result per URL, including request and fallback metadata. Add `--output <path>` to save the full array directly.
296
+ - `scrape-json` now returns both a flat `headings` array and `headingsByLevel` buckets for `h1` through `h6`.
297
+ - `scrape-json --fallback browser` is available for JS-heavy pages where stateless extraction returns weak heading data.
298
+ - `scrape`, `scrape-markdown`, `scrape-text`, `scrape-json`, and `batch-scrape` accept `--retry`, `--backoff-ms`, and `--timeout-ms`.
299
+ - `batch-scrape --only-main-content` lets markdown, text, and html batch runs use the same readable-content isolation path as `read`.
300
+ - `crawl --only-main-content` uses the same readable-fragment extraction strategy for html, markdown, and text crawl output, but stays on the stateless unlocker path.
301
+ - `batch-scrape --summary` prints a one-line success/failure summary to `stderr` after the JSON payload.
302
+ - `batch-scrape --format json` now returns the same structured scrape envelope as `scrape-json`, including `renderSource`, `fallbackAttempted`, `fallbackUsed`, and `request.attemptCount/retryCount/attempts`.
303
+ - `search` now returns `requestedLimit`, `returnedCount`, `warnings`, `cacheTtlMs`, and per-result `position`.
304
+ - `search` may return fewer results than the requested `--limit` when the upstream SERP contains fewer valid results; inspect `returnedCount`, `warnings`, and `attempts`.
305
+ - `change-track` now accepts `--retry`, `--backoff-ms`, and `--timeout-ms`, and JSON output includes request metadata.
306
+ - `batch-change-track` tracks many pages in one pass and reports per-URL `new|same|changed` status plus a summary line when `--summary` is used. Add `--output <path>` to save the full watchlist result directly.
307
+
308
+ ### Reusable Workflows
309
+
310
+ ```bash
311
+ gologin-web-access run ./examples/runbook.json --session s1
312
+ gologin-web-access batch ./examples/runbook.json --targets ./examples/targets.json --concurrency 2
313
+ gologin-web-access jobs
314
+ ```
315
+
316
+ `snapshot` prints refs such as `e1`, `e2`, `e3`. Those refs stay valid until the page changes or you take a new snapshot.
317
+
318
+ `map` and `crawl` now return `status: ok|partial|failed`. By default, partial results stay usable and do not exit non-zero. Add `--strict` when any failed page should fail the command.
319
+
320
+ ## Product Boundaries
321
+
322
+ Gologin Web Access still has two runtime layers:
323
+
324
+ - Web Unlocker for stateless read and extraction
325
+ - Cloud Browser for stateful interaction
326
+
327
+ But both are now shipped inside the same package and the same repository. One install gives you the full read layer and the full browser/session layer.
328
+
329
+ ## Development
330
+
331
+ ```bash
332
+ npm install
333
+ npm run build
334
+ npm run typecheck
335
+ npm test
336
+ ```
337
+
338
+ ## Publish
339
+
340
+ ```bash
341
+ npm publish --access public
342
+ ```
343
+
344
+ Prepublish checks run automatically through `prepublishOnly`.
package/dist/cli.js ADDED
@@ -0,0 +1,173 @@
1
+ #!/usr/bin/env node
2
+ "use strict";
3
+ Object.defineProperty(exports, "__esModule", { value: true });
4
+ const commander_1 = require("commander");
5
+ const back_1 = require("./commands/back");
6
+ const batch_1 = require("./commands/batch");
7
+ const batchChangeTrack_1 = require("./commands/batchChangeTrack");
8
+ const batchExtract_1 = require("./commands/batchExtract");
9
+ const batchScrape_1 = require("./commands/batchScrape");
10
+ const check_1 = require("./commands/check");
11
+ const changeTrack_1 = require("./commands/changeTrack");
12
+ const crawl_1 = require("./commands/crawl");
13
+ const crawlErrors_1 = require("./commands/crawlErrors");
14
+ const crawlResult_1 = require("./commands/crawlResult");
15
+ const crawlStart_1 = require("./commands/crawlStart");
16
+ const crawlStatus_1 = require("./commands/crawlStatus");
17
+ const click_1 = require("./commands/click");
18
+ const close_1 = require("./commands/close");
19
+ const configInit_1 = require("./commands/configInit");
20
+ const configShow_1 = require("./commands/configShow");
21
+ const cookies_1 = require("./commands/cookies");
22
+ const cookiesClear_1 = require("./commands/cookiesClear");
23
+ const cookiesImport_1 = require("./commands/cookiesImport");
24
+ const current_1 = require("./commands/current");
25
+ const dblclick_1 = require("./commands/dblclick");
26
+ const eval_1 = require("./commands/eval");
27
+ const extract_1 = require("./commands/extract");
28
+ const fill_1 = require("./commands/fill");
29
+ const find_1 = require("./commands/find");
30
+ const focus_1 = require("./commands/focus");
31
+ const forward_1 = require("./commands/forward");
32
+ const get_1 = require("./commands/get");
33
+ const hover_1 = require("./commands/hover");
34
+ const jobs_1 = require("./commands/jobs");
35
+ const map_1 = require("./commands/map");
36
+ const open_1 = require("./commands/open");
37
+ const parseDocument_1 = require("./commands/parseDocument");
38
+ const pdf_1 = require("./commands/pdf");
39
+ const press_1 = require("./commands/press");
40
+ const reload_1 = require("./commands/reload");
41
+ const read_1 = require("./commands/read");
42
+ const run_1 = require("./commands/run");
43
+ const scrape_1 = require("./commands/scrape");
44
+ const scrapeJson_1 = require("./commands/scrapeJson");
45
+ const scrapeMarkdown_1 = require("./commands/scrapeMarkdown");
46
+ const scrapeScreenshot_1 = require("./commands/scrapeScreenshot");
47
+ const scrapeText_1 = require("./commands/scrapeText");
48
+ const scroll_1 = require("./commands/scroll");
49
+ const scrollIntoView_1 = require("./commands/scrollIntoView");
50
+ const searchBrowser_1 = require("./commands/searchBrowser");
51
+ const search_1 = require("./commands/search");
52
+ const select_1 = require("./commands/select");
53
+ const screenshot_1 = require("./commands/screenshot");
54
+ const sessions_1 = require("./commands/sessions");
55
+ const snapshot_1 = require("./commands/snapshot");
56
+ const storageClear_1 = require("./commands/storageClear");
57
+ const storageExport_1 = require("./commands/storageExport");
58
+ const storageImport_1 = require("./commands/storageImport");
59
+ const tabClose_1 = require("./commands/tabClose");
60
+ const tabFocus_1 = require("./commands/tabFocus");
61
+ const tabOpen_1 = require("./commands/tabOpen");
62
+ const tabs_1 = require("./commands/tabs");
63
+ const uncheck_1 = require("./commands/uncheck");
64
+ const type_1 = require("./commands/type");
65
+ const upload_1 = require("./commands/upload");
66
+ const wait_1 = require("./commands/wait");
67
+ const doctor_1 = require("./doctor");
68
+ const errors_1 = require("./lib/errors");
69
+ const output_1 = require("./lib/output");
70
+ const CLI_VERSION = "0.3.0";
71
+ async function main() {
72
+ const program = new commander_1.Command();
73
+ program
74
+ .name("gologin-web-access")
75
+ .description("Read and interact with the web using Gologin Web Unlocker and Cloud Browser.")
76
+ .version(CLI_VERSION)
77
+ .showHelpAfterError()
78
+ .showSuggestionAfterError();
79
+ program.addCommand((0, scrape_1.buildScrapeCommand)());
80
+ program.addCommand((0, read_1.buildReadCommand)());
81
+ program.addCommand((0, scrapeMarkdown_1.buildScrapeMarkdownCommand)());
82
+ program.addCommand((0, scrapeText_1.buildScrapeTextCommand)());
83
+ program.addCommand((0, scrapeJson_1.buildScrapeJsonCommand)());
84
+ program.addCommand((0, batchScrape_1.buildBatchScrapeCommand)());
85
+ program.addCommand((0, batchExtract_1.buildBatchExtractCommand)());
86
+ program.addCommand((0, search_1.buildSearchCommand)());
87
+ program.addCommand((0, map_1.buildMapCommand)());
88
+ program.addCommand((0, crawl_1.buildCrawlCommand)());
89
+ program.addCommand((0, crawlStart_1.buildCrawlStartCommand)());
90
+ program.addCommand((0, crawlStatus_1.buildCrawlStatusCommand)());
91
+ program.addCommand((0, crawlResult_1.buildCrawlResultCommand)());
92
+ program.addCommand((0, crawlErrors_1.buildCrawlErrorsCommand)());
93
+ program.addCommand((0, extract_1.buildExtractCommand)());
94
+ program.addCommand((0, changeTrack_1.buildChangeTrackCommand)());
95
+ program.addCommand((0, batchChangeTrack_1.buildBatchChangeTrackCommand)());
96
+ program.addCommand((0, parseDocument_1.buildParseDocumentCommand)());
97
+ program.addCommand((0, run_1.buildRunCommand)());
98
+ program.addCommand((0, batch_1.buildBatchCommand)());
99
+ program.addCommand((0, jobs_1.buildJobsCommand)());
100
+ program.addCommand((0, jobs_1.buildJobCommand)());
101
+ program.addCommand((0, open_1.buildOpenCommand)());
102
+ program.addCommand((0, searchBrowser_1.buildSearchBrowserCommand)());
103
+ program.addCommand((0, scrapeScreenshot_1.buildScrapeScreenshotCommand)());
104
+ program.addCommand((0, tabs_1.buildTabsCommand)());
105
+ program.addCommand((0, tabOpen_1.buildTabOpenCommand)());
106
+ program.addCommand((0, tabFocus_1.buildTabFocusCommand)());
107
+ program.addCommand((0, tabClose_1.buildTabCloseCommand)());
108
+ program.addCommand((0, snapshot_1.buildSnapshotCommand)());
109
+ program.addCommand((0, click_1.buildClickCommand)());
110
+ program.addCommand((0, dblclick_1.buildDoubleClickCommand)());
111
+ program.addCommand((0, focus_1.buildFocusCommand)());
112
+ program.addCommand((0, type_1.buildTypeCommand)());
113
+ program.addCommand((0, fill_1.buildFillCommand)());
114
+ program.addCommand((0, hover_1.buildHoverCommand)());
115
+ program.addCommand((0, select_1.buildSelectCommand)());
116
+ program.addCommand((0, check_1.buildCheckCommand)());
117
+ program.addCommand((0, uncheck_1.buildUncheckCommand)());
118
+ program.addCommand((0, press_1.buildPressCommand)());
119
+ program.addCommand((0, scroll_1.buildScrollCommand)());
120
+ program.addCommand((0, scrollIntoView_1.buildScrollIntoViewCommand)());
121
+ program.addCommand((0, wait_1.buildWaitCommand)());
122
+ program.addCommand((0, get_1.buildGetCommand)());
123
+ program.addCommand((0, back_1.buildBackCommand)());
124
+ program.addCommand((0, forward_1.buildForwardCommand)());
125
+ program.addCommand((0, reload_1.buildReloadCommand)());
126
+ program.addCommand((0, find_1.buildFindCommand)());
127
+ program.addCommand((0, cookies_1.buildCookiesCommand)());
128
+ program.addCommand((0, cookiesImport_1.buildCookiesImportCommand)());
129
+ program.addCommand((0, cookiesClear_1.buildCookiesClearCommand)());
130
+ program.addCommand((0, storageExport_1.buildStorageExportCommand)());
131
+ program.addCommand((0, storageImport_1.buildStorageImportCommand)());
132
+ program.addCommand((0, storageClear_1.buildStorageClearCommand)());
133
+ program.addCommand((0, eval_1.buildEvalCommand)());
134
+ program.addCommand((0, upload_1.buildUploadCommand)());
135
+ program.addCommand((0, pdf_1.buildPdfCommand)());
136
+ program.addCommand((0, screenshot_1.buildScreenshotCommand)());
137
+ program.addCommand((0, close_1.buildCloseCommand)());
138
+ program.addCommand((0, sessions_1.buildSessionsCommand)());
139
+ program.addCommand((0, current_1.buildCurrentCommand)());
140
+ program
141
+ .command("doctor")
142
+ .description("Inspect both recommended keys, profile configuration, and local daemon health.")
143
+ .option("--json", "Print JSON output")
144
+ .action(async (options) => {
145
+ await (0, doctor_1.runDoctor)(options);
146
+ });
147
+ program
148
+ .command("version")
149
+ .description("Print the CLI version.")
150
+ .action(() => {
151
+ (0, output_1.printText)(CLI_VERSION);
152
+ });
153
+ const configGroup = program.command("config").description("Inspect or initialize CLI configuration.");
154
+ configGroup.addCommand((0, configShow_1.buildConfigShowCommand)());
155
+ configGroup.addCommand((0, configInit_1.buildConfigInitCommand)());
156
+ program.addHelpText("after", `
157
+ Command groups:
158
+ Scraping: gologin-web-access scrape|read|scrape-markdown|scrape-text|scrape-json|batch-scrape|batch-extract|search|map|crawl|crawl-start|crawl-status|crawl-result|crawl-errors|extract|change-track|batch-change-track|parse-document
159
+ Browser: gologin-web-access open|search-browser|scrape-screenshot|tabs|tabopen|tabfocus|tabclose|snapshot|click|dblclick|focus|type|fill|hover|select|check|uncheck|press|scroll|scrollintoview|wait|get|back|forward|reload|find|cookies|cookies-import|cookies-clear|storage-export|storage-import|storage-clear|eval|upload|pdf|screenshot|close|sessions|current
160
+ Agent: gologin-web-access run|batch|jobs|job
161
+
162
+ Key model:
163
+ ${"GOLOGIN_WEB_UNLOCKER_API_KEY"} powers scraping commands.
164
+ ${"GOLOGIN_CLOUD_TOKEN"} powers browser commands.
165
+ Recommended setup: configure both keys up front, even if the current task only needs one path.
166
+ `);
167
+ await program.parseAsync(process.argv);
168
+ }
169
+ void main().catch((error) => {
170
+ const cliError = (0, errors_1.toCliError)(error);
171
+ (0, output_1.printError)(cliError);
172
+ process.exit(cliError.exitCode);
173
+ });
@@ -0,0 +1,13 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.buildBackCommand = buildBackCommand;
4
+ const commander_1 = require("commander");
5
+ const shared_1 = require("./shared");
6
+ function buildBackCommand() {
7
+ const command = new commander_1.Command("back")
8
+ .description("Navigate back in the active Cloud Browser tab history.")
9
+ .action(async (options) => {
10
+ await (0, shared_1.runBrowserCommand)(["back"], { session: options.session });
11
+ });
12
+ return (0, shared_1.addSessionOption)(command);
13
+ }
@@ -0,0 +1,81 @@
1
+ "use strict";
2
+ var __importDefault = (this && this.__importDefault) || function (mod) {
3
+ return (mod && mod.__esModule) ? mod : { "default": mod };
4
+ };
5
+ Object.defineProperty(exports, "__esModule", { value: true });
6
+ exports.buildBatchCommand = buildBatchCommand;
7
+ const path_1 = __importDefault(require("path"));
8
+ const commander_1 = require("commander");
9
+ const config_1 = require("../config");
10
+ const jobRegistry_1 = require("../lib/jobRegistry");
11
+ const runbooks_1 = require("../lib/runbooks");
12
+ const output_1 = require("../lib/output");
13
+ function buildBatchCommand() {
14
+ return new commander_1.Command("batch")
15
+ .description("Execute a runbook across multiple profile/session targets.")
16
+ .argument("<runbookPath>", "Path to the runbook JSON file")
17
+ .requiredOption("--targets <path>", "Path to a batch targets JSON file")
18
+ .option("--concurrency <count>", "Maximum number of targets to run in parallel")
19
+ .option("--vars <path>", "Path to a JSON variables file")
20
+ .option("--name <name>", "Override the stored job name")
21
+ .option("--continue-on-error", "Continue after failed steps inside each target")
22
+ .option("--json", "Print the final job record as JSON")
23
+ .action(async (runbookPath, options) => {
24
+ const config = await (0, config_1.loadConfig)();
25
+ const runbook = (0, runbooks_1.loadRunbookDefinition)(process.cwd(), runbookPath);
26
+ const batch = (0, runbooks_1.loadBatchDefinition)(process.cwd(), options.targets);
27
+ const variables = options.vars ? (0, runbooks_1.loadVariablesFile)(process.cwd(), options.vars) : undefined;
28
+ const absoluteRunbookPath = path_1.default.resolve(runbookPath);
29
+ const job = await (0, jobRegistry_1.createJob)(config, {
30
+ kind: "batch",
31
+ name: options.name ?? path_1.default.basename(absoluteRunbookPath, path_1.default.extname(absoluteRunbookPath)),
32
+ cwd: process.cwd(),
33
+ args: process.argv.slice(2),
34
+ metadata: {
35
+ runbookPath: absoluteRunbookPath,
36
+ targetsPath: path_1.default.resolve(options.targets)
37
+ }
38
+ });
39
+ await (0, jobRegistry_1.markJobRunning)(config, job.jobId);
40
+ try {
41
+ const results = await (0, runbooks_1.executeBatch)(runbook, batch, {
42
+ cwd: process.cwd(),
43
+ concurrency: options.concurrency ? Number(options.concurrency) : undefined,
44
+ variables,
45
+ continueOnError: options.continueOnError === true
46
+ });
47
+ const failed = results.filter((target) => target.status === "failed").length;
48
+ const output = results
49
+ .map((target) => {
50
+ const lines = [`target=${target.name} status=${target.status} durationMs=${target.durationMs}`];
51
+ for (const step of target.steps) {
52
+ lines.push(` step=${step.command} status=${step.status} durationMs=${step.durationMs}`);
53
+ }
54
+ return lines.join("\n");
55
+ })
56
+ .join("\n");
57
+ const record = await (0, jobRegistry_1.finalizeJob)(config, job.jobId, {
58
+ status: failed > 0 ? "partial" : "ok",
59
+ output,
60
+ result: results
61
+ });
62
+ if (options.json) {
63
+ (0, output_1.printJson)(record);
64
+ return;
65
+ }
66
+ (0, output_1.printText)(output);
67
+ }
68
+ catch (error) {
69
+ const record = await (0, jobRegistry_1.finalizeJob)(config, job.jobId, {
70
+ status: "failed",
71
+ error: error instanceof Error ? error.message : String(error)
72
+ });
73
+ if (options.json) {
74
+ (0, output_1.printJson)(record);
75
+ process.exitCode = 1;
76
+ return;
77
+ }
78
+ throw error;
79
+ }
80
+ });
81
+ }