gologin-web-access 0.3.2 → 0.3.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +3 -3
- package/README.md +57 -23
- package/dist/cli.js +11 -3
- package/dist/commands/batchExtract.js +1 -1
- package/dist/commands/batchScrape.js +2 -2
- package/dist/commands/configInit.js +13 -7
- package/dist/commands/crawl.js +1 -1
- package/dist/commands/extract.js +1 -1
- package/dist/commands/gologinApi.js +324 -0
- package/dist/commands/map.js +1 -1
- package/dist/commands/read.js +1 -1
- package/dist/commands/scrape.js +1 -1
- package/dist/commands/scrapeJson.js +1 -1
- package/dist/commands/scrapeMarkdown.js +2 -2
- package/dist/commands/scrapeText.js +2 -2
- package/dist/commands/search.js +4 -1
- package/dist/config.js +4 -4
- package/dist/doctor.js +2 -2
- package/dist/lib/cloudApi.js +61 -0
- package/dist/lib/errors.js +1 -1
- package/dist/lib/output.js +1 -1
- package/dist/lib/readSource.js +3 -0
- package/dist/lib/search.js +1 -1
- package/dist/lib/unlocker.js +7 -7
- package/package.json +3 -2
package/CHANGELOG.md
CHANGED
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
|
|
3
3
|
## Unreleased
|
|
4
4
|
|
|
5
|
-
- browser automation is now embedded directly in `gologin-web-access`, so one repo and one install contains both
|
|
5
|
+
- browser automation is now embedded directly in `gologin-web-access`, so one repo and one install contains both Scraping API and Cloud Browser flows
|
|
6
6
|
- doctor now reports the embedded browser runtime source and version
|
|
7
7
|
|
|
8
8
|
## 0.3.2 - 2026-04-03
|
|
@@ -17,9 +17,9 @@ Initial public release of Gologin Web Access.
|
|
|
17
17
|
|
|
18
18
|
Highlights:
|
|
19
19
|
|
|
20
|
-
- Unified CLI entry point for
|
|
20
|
+
- Unified CLI entry point for GoLogin Scraping API and Gologin Cloud Browser workflows
|
|
21
21
|
- Scraping commands: `scrape`, `scrape-markdown`, `scrape-text`, `scrape-json`, `batch-scrape`
|
|
22
22
|
- Browser commands: `open`, `snapshot`, `click`, `type`, `screenshot`, `close`, `sessions`, `current`
|
|
23
|
-
- Clear two-key configuration model with `
|
|
23
|
+
- Clear two-key configuration model with `GOLOGIN_SCRAPING_API_KEY` and `GOLOGIN_TOKEN`
|
|
24
24
|
- `doctor`, `config show`, and `config init` to reduce setup friction
|
|
25
25
|
- Compatibility support for legacy env names used by existing Gologin tools
|
package/README.md
CHANGED
|
@@ -1,12 +1,13 @@
|
|
|
1
1
|
# Gologin Web Access
|
|
2
2
|
|
|
3
|
-
Gologin Web Access lets developers and AI agents read and interact with the web using
|
|
3
|
+
Gologin Web Access lets developers and AI agents read and interact with the web using GoLogin Scraping API and Gologin Cloud Browser.
|
|
4
4
|
|
|
5
5
|
This is a unified web access layer, not just a scraping tool and not just a browser automation tool.
|
|
6
6
|
|
|
7
7
|
- Read the web through stateless extraction APIs
|
|
8
8
|
- Interact with the web through stateful cloud browser sessions
|
|
9
9
|
- Carry Gologin’s browser-side strengths into those workflows: profiles, identity-aware browser sessions, cloud browser infrastructure, and Gologin’s profile/proxy stack when you run against a configured profile
|
|
10
|
+
- Manage common GoLogin profile/proxy API operations without leaving the CLI: cloud usage, cloud profile start/stop, profile cookies, fingerprint refresh, managed proxies, and user-agent updates
|
|
10
11
|
|
|
11
12
|
Package name and binary are the same:
|
|
12
13
|
|
|
@@ -17,7 +18,7 @@ Package name and binary are the same:
|
|
|
17
18
|
|
|
18
19
|
Gologin Web Access combines two existing product surfaces behind one CLI:
|
|
19
20
|
|
|
20
|
-
-
|
|
21
|
+
- Scraping API
|
|
21
22
|
Stateless read and extraction. Best when you want page content quickly without maintaining a browser session.
|
|
22
23
|
- Cloud Browser
|
|
23
24
|
Stateful interaction. Best when you need navigation, clicks, typing, screenshots, or multi-step flows that persist across commands.
|
|
@@ -35,23 +36,23 @@ The point of the unified CLI is that both modes live in one product with one com
|
|
|
35
36
|
|
|
36
37
|
### Scraping / Read
|
|
37
38
|
|
|
38
|
-
These commands use
|
|
39
|
+
These commands use GoLogin Scraping API:
|
|
39
40
|
|
|
40
41
|
- `gologin-web-access scrape <url>`
|
|
41
|
-
- `gologin-web-access read <url> [--format text|markdown|html] [--source auto|
|
|
42
|
-
- `gologin-web-access scrape-markdown <url> [--source auto|
|
|
43
|
-
- `gologin-web-access scrape-text <url> [--source auto|
|
|
42
|
+
- `gologin-web-access read <url> [--format text|markdown|html] [--source auto|scraping|browser]`
|
|
43
|
+
- `gologin-web-access scrape-markdown <url> [--source auto|scraping|browser]`
|
|
44
|
+
- `gologin-web-access scrape-text <url> [--source auto|scraping|browser]`
|
|
44
45
|
- `gologin-web-access scrape-json <url> [--fallback none|browser]`
|
|
45
|
-
- `gologin-web-access batch-scrape <url...> [--format html|markdown|text|json] [--fallback none|browser] [--source auto|
|
|
46
|
-
- `gologin-web-access batch-extract <url...> --schema <schema.json> [--source auto|
|
|
47
|
-
- `gologin-web-access search <query> [--limit <n>] [--country <cc>] [--language <lang>] [--source auto|
|
|
46
|
+
- `gologin-web-access batch-scrape <url...> [--format html|markdown|text|json] [--fallback none|browser] [--source auto|scraping|browser] [--only-main-content] [--retry <n>] [--backoff-ms <ms>] [--summary] [--output <path>] [--strict]`
|
|
47
|
+
- `gologin-web-access batch-extract <url...> --schema <schema.json> [--source auto|scraping|browser] [--retry <n>] [--backoff-ms <ms>] [--summary] [--output <path>]`
|
|
48
|
+
- `gologin-web-access search <query> [--limit <n>] [--country <cc>] [--language <lang>] [--source auto|scraping|browser]`
|
|
48
49
|
- `gologin-web-access map <url> [--limit <n>] [--max-depth <n>] [--concurrency <n>] [--strict]`
|
|
49
50
|
- `gologin-web-access crawl <url> [--format html|markdown|text|json] [--limit <n>] [--max-depth <n>] [--only-main-content] [--strict]`
|
|
50
51
|
- `gologin-web-access crawl-start <url> ...`
|
|
51
52
|
- `gologin-web-access crawl-status <jobId>`
|
|
52
53
|
- `gologin-web-access crawl-result <jobId>`
|
|
53
54
|
- `gologin-web-access crawl-errors <jobId>`
|
|
54
|
-
- `gologin-web-access extract <url> --schema <schema.json> [--source auto|
|
|
55
|
+
- `gologin-web-access extract <url> --schema <schema.json> [--source auto|scraping|browser]`
|
|
55
56
|
- `gologin-web-access change-track <url> [--format html|markdown|text|json]`
|
|
56
57
|
- `gologin-web-access batch-change-track <url...> [--format html|markdown|text|json] [--retry <n>] [--backoff-ms <ms>] [--summary] [--output <path>]`
|
|
57
58
|
- `gologin-web-access parse-document <url-or-path>`
|
|
@@ -108,6 +109,24 @@ These commands use Gologin Cloud Browser through the local daemon-backed agent l
|
|
|
108
109
|
|
|
109
110
|
Use these when you need state, interaction, or multi-step browser flows.
|
|
110
111
|
|
|
112
|
+
### GoLogin API Helpers
|
|
113
|
+
|
|
114
|
+
These commands use the GoLogin REST API directly through `GOLOGIN_TOKEN`. They do not require Scraping API and do not start the browser daemon:
|
|
115
|
+
|
|
116
|
+
- `gologin-web-access cloud-usage --profile <profileId> | --workspace <workspaceId> [--days <1-30>] [--json]`
|
|
117
|
+
- `gologin-web-access profile-cloud start <profileId> [--json]`
|
|
118
|
+
- `gologin-web-access profile-cloud stop <profileId> [--json]`
|
|
119
|
+
- `gologin-web-access profile-cookies export <profileId> [--output <path>] [--json]`
|
|
120
|
+
- `gologin-web-access profile-cookies import <profileId> <cookies.json> [--clean] [--json]`
|
|
121
|
+
- `gologin-web-access profile-fingerprint refresh <profileId...> [--json]`
|
|
122
|
+
- `gologin-web-access profile-proxy list [--page <n>] [--json]`
|
|
123
|
+
- `gologin-web-access profile-proxy traffic`
|
|
124
|
+
- `gologin-web-access profile-proxy add-gologin <profileId> --country <cc> [--city <city>] [--type residential|mobile|dc] [--json]`
|
|
125
|
+
- `gologin-web-access profile-ua latest [--os lin|mac|win|android|android-cloud] [--json]`
|
|
126
|
+
- `gologin-web-access profile-ua update <profileId...> [--all-profiles] [--workspace <id>] [--json]`
|
|
127
|
+
|
|
128
|
+
Use these when an agent needs GoLogin account/profile operations and would otherwise drop into raw REST calls or SDK code.
|
|
129
|
+
|
|
111
130
|
## When To Use `scrape` vs `browser`
|
|
112
131
|
|
|
113
132
|
- Use `scrape` commands when you need page content, extracted text, markdown, or simple structured output.
|
|
@@ -124,6 +143,7 @@ Use these when you need state, interaction, or multi-step browser flows.
|
|
|
124
143
|
- Use `batch-change-track` when you want to monitor a watchlist of pages in one pass.
|
|
125
144
|
- Use `parse-document` when the source is a PDF, DOCX, XLSX, HTML, or local document path instead of a normal HTML page.
|
|
126
145
|
- Use browser commands when you need clicks, forms, navigation, screenshots, sessions, or logged-in/profile-backed flows.
|
|
146
|
+
- Use GoLogin API helper commands when you need to attach managed proxy traffic, export/import profile cookies, refresh fingerprints, update user agents, inspect usage, or start/stop a cloud profile.
|
|
127
147
|
- Use browser commands when you need ref-based interaction, uploads, PDFs, semantic find flows, keyboard control, or a browser-visible search journey.
|
|
128
148
|
- Use `run` and `batch` when you want reusable workflows or multi-target execution on top of the CLI surface.
|
|
129
149
|
- Use `scrape` when stateless speed matters more than interaction.
|
|
@@ -165,25 +185,25 @@ If the browser surface grows substantially later, a nested namespace may become
|
|
|
165
185
|
|
|
166
186
|
This CLI uses two different GoLogin credentials on purpose, because the underlying products are different.
|
|
167
187
|
|
|
168
|
-
- `
|
|
188
|
+
- `GOLOGIN_SCRAPING_API_KEY`
|
|
169
189
|
Required for Scraping / Read commands.
|
|
170
190
|
- `GOLOGIN_TOKEN`
|
|
171
|
-
Required for `gologin-web-access open
|
|
191
|
+
Required for `gologin-web-access open`, GoLogin API helper commands, and profile validation in `gologin-web-access doctor`.
|
|
172
192
|
- `GOLOGIN_DEFAULT_PROFILE_ID`
|
|
173
193
|
Optional default profile for browser flows.
|
|
174
194
|
- `GOLOGIN_DAEMON_PORT`
|
|
175
195
|
Optional local daemon port for browser workflows.
|
|
176
196
|
|
|
177
|
-
Recommended full setup for agents is to configure both `
|
|
197
|
+
Recommended full setup for agents is to configure both `GOLOGIN_SCRAPING_API_KEY` and `GOLOGIN_TOKEN` before starting work, even if the current task looks read-only or browser-only.
|
|
178
198
|
|
|
179
199
|
Missing-key errors are command-group specific. Example:
|
|
180
200
|
|
|
181
|
-
`Missing
|
|
201
|
+
`Missing GOLOGIN_SCRAPING_API_KEY. This is required for scraping commands like \`gologin-web-access scrape\`.`
|
|
182
202
|
|
|
183
203
|
Environment variables are the primary configuration mechanism:
|
|
184
204
|
|
|
185
205
|
```bash
|
|
186
|
-
export
|
|
206
|
+
export GOLOGIN_SCRAPING_API_KEY="wu_..."
|
|
187
207
|
export GOLOGIN_TOKEN="gl_..."
|
|
188
208
|
export GOLOGIN_DEFAULT_PROFILE_ID="profile_123"
|
|
189
209
|
export GOLOGIN_DAEMON_PORT="4590"
|
|
@@ -198,8 +218,8 @@ gologin-web-access config init
|
|
|
198
218
|
Useful variants:
|
|
199
219
|
|
|
200
220
|
```bash
|
|
201
|
-
gologin-web-access config init --
|
|
202
|
-
gologin-web-access config init --web-unlocker-key wu_... --token gl_...
|
|
221
|
+
gologin-web-access config init --scraping-api-key wu_... --token gl_...
|
|
222
|
+
gologin-web-access config init --web-unlocker-key wu_... --token gl_... # legacy alias
|
|
203
223
|
```
|
|
204
224
|
|
|
205
225
|
That writes `~/.gologin-web-access/config.json` once and the CLI will keep reading it on later runs.
|
|
@@ -246,7 +266,7 @@ npm install -g gologin-web-access
|
|
|
246
266
|
### Read A Page
|
|
247
267
|
|
|
248
268
|
```bash
|
|
249
|
-
export
|
|
269
|
+
export GOLOGIN_SCRAPING_API_KEY="wu_..."
|
|
250
270
|
|
|
251
271
|
gologin-web-access scrape https://example.com
|
|
252
272
|
gologin-web-access read https://docs.browserbase.com/features/stealth-mode
|
|
@@ -287,6 +307,20 @@ gologin-web-access current
|
|
|
287
307
|
gologin-web-access close
|
|
288
308
|
```
|
|
289
309
|
|
|
310
|
+
### Manage Profiles And Proxies
|
|
311
|
+
|
|
312
|
+
```bash
|
|
313
|
+
export GOLOGIN_TOKEN="gl_..."
|
|
314
|
+
|
|
315
|
+
gologin-web-access cloud-usage --profile profile_123
|
|
316
|
+
gologin-web-access profile-proxy add-gologin profile_123 --country us --type residential
|
|
317
|
+
gologin-web-access profile-proxy traffic
|
|
318
|
+
gologin-web-access profile-cookies export profile_123 --output ./cookies.json
|
|
319
|
+
gologin-web-access profile-fingerprint refresh profile_123
|
|
320
|
+
gologin-web-access profile-ua latest --os mac
|
|
321
|
+
gologin-web-access profile-ua update profile_123
|
|
322
|
+
```
|
|
323
|
+
|
|
290
324
|
### Search In A Real Browser
|
|
291
325
|
|
|
292
326
|
```bash
|
|
@@ -298,17 +332,17 @@ gologin-web-access snapshot -i
|
|
|
298
332
|
|
|
299
333
|
## Structured Output And Retry Controls
|
|
300
334
|
|
|
301
|
-
- `scrape-markdown` and `scrape-text` now default to `--source auto`: they start with
|
|
335
|
+
- `scrape-markdown` and `scrape-text` now default to `--source auto`: they start with Scraping API, isolate the most readable content block, and can auto-retry with Cloud Browser when the output still looks like JS-rendered docs chrome.
|
|
302
336
|
- `read` is the shortest path for "look at this docs page" work: it targets the most readable content block and defaults to `--format text --source auto`.
|
|
303
|
-
- `scrape-markdown` and `scrape-text` also accept `--source
|
|
304
|
-
- `extract` now accepts `--source auto|
|
|
337
|
+
- `scrape-markdown` and `scrape-text` also accept `--source scraping` and `--source browser` when you want to force one path. `--source unlocker` remains as a legacy alias.
|
|
338
|
+
- `extract` now accepts `--source auto|scraping|browser` and returns `renderSource`, fallback flags, and request metadata with the extracted JSON.
|
|
305
339
|
- `batch-extract` reuses the same extraction path across many URLs and returns one structured result per URL, including request and fallback metadata. Add `--output <path>` to save the full array directly.
|
|
306
340
|
- `scrape-json` now returns both a flat `headings` array and `headingsByLevel` buckets for `h1` through `h6`.
|
|
307
341
|
- `scrape-json --fallback browser` is available for JS-heavy pages where stateless extraction returns weak heading data.
|
|
308
342
|
- `scrape-json` now also classifies the page outcome as `ok`, `empty`, `incomplete`, `authwall`, `challenge`, `blocked`, or `cookie_wall`, and includes `nextActionHint` when the result is weak or gated.
|
|
309
343
|
- `scrape`, `scrape-markdown`, `scrape-text`, `scrape-json`, and `batch-scrape` accept `--retry`, `--backoff-ms`, and `--timeout-ms`.
|
|
310
344
|
- `batch-scrape --only-main-content` lets markdown, text, and html batch runs use the same readable-content isolation path as `read`.
|
|
311
|
-
- `crawl --only-main-content` uses the same readable-fragment extraction strategy for html, markdown, and text crawl output, but stays on the stateless
|
|
345
|
+
- `crawl --only-main-content` uses the same readable-fragment extraction strategy for html, markdown, and text crawl output, but stays on the stateless Scraping API path.
|
|
312
346
|
- `batch-scrape --summary` prints a one-line success/failure summary to `stderr` after the JSON payload.
|
|
313
347
|
- `batch-scrape` now returns exit code `0` on partial success by default and only fails the command when every URL failed. Add `--strict` if any single failed URL should make the whole batch exit non-zero.
|
|
314
348
|
- `batch-scrape --output <path>` writes the full JSON to disk so shells and agent consoles cannot truncate a large payload silently.
|
|
@@ -336,7 +370,7 @@ gologin-web-access jobs
|
|
|
336
370
|
|
|
337
371
|
Gologin Web Access still has two runtime layers:
|
|
338
372
|
|
|
339
|
-
-
|
|
373
|
+
- Scraping API for stateless read and extraction
|
|
340
374
|
- Cloud Browser for stateful interaction
|
|
341
375
|
|
|
342
376
|
But both are now shipped inside the same package and the same repository. One install gives you the full read layer and the full browser/session layer.
|
package/dist/cli.js
CHANGED
|
@@ -30,6 +30,7 @@ const find_1 = require("./commands/find");
|
|
|
30
30
|
const focus_1 = require("./commands/focus");
|
|
31
31
|
const forward_1 = require("./commands/forward");
|
|
32
32
|
const get_1 = require("./commands/get");
|
|
33
|
+
const gologinApi_1 = require("./commands/gologinApi");
|
|
33
34
|
const hover_1 = require("./commands/hover");
|
|
34
35
|
const jobs_1 = require("./commands/jobs");
|
|
35
36
|
const map_1 = require("./commands/map");
|
|
@@ -67,12 +68,12 @@ const wait_1 = require("./commands/wait");
|
|
|
67
68
|
const doctor_1 = require("./doctor");
|
|
68
69
|
const errors_1 = require("./lib/errors");
|
|
69
70
|
const output_1 = require("./lib/output");
|
|
70
|
-
const CLI_VERSION = "0.3.
|
|
71
|
+
const CLI_VERSION = "0.3.4";
|
|
71
72
|
async function main() {
|
|
72
73
|
const program = new commander_1.Command();
|
|
73
74
|
program
|
|
74
75
|
.name("gologin-web-access")
|
|
75
|
-
.description("Read and interact with the web using
|
|
76
|
+
.description("Read and interact with the web using the GoLogin Scraping API and Cloud Browser.")
|
|
76
77
|
.version(CLI_VERSION)
|
|
77
78
|
.showHelpAfterError()
|
|
78
79
|
.showSuggestionAfterError();
|
|
@@ -137,6 +138,12 @@ async function main() {
|
|
|
137
138
|
program.addCommand((0, close_1.buildCloseCommand)());
|
|
138
139
|
program.addCommand((0, sessions_1.buildSessionsCommand)());
|
|
139
140
|
program.addCommand((0, current_1.buildCurrentCommand)());
|
|
141
|
+
program.addCommand((0, gologinApi_1.buildCloudUsageCommand)());
|
|
142
|
+
program.addCommand((0, gologinApi_1.buildProfileCloudCommand)());
|
|
143
|
+
program.addCommand((0, gologinApi_1.buildProfileCookiesCommand)());
|
|
144
|
+
program.addCommand((0, gologinApi_1.buildProfileFingerprintCommand)());
|
|
145
|
+
program.addCommand((0, gologinApi_1.buildProfileProxyCommand)());
|
|
146
|
+
program.addCommand((0, gologinApi_1.buildProfileUaCommand)());
|
|
140
147
|
program
|
|
141
148
|
.command("doctor")
|
|
142
149
|
.description("Inspect both recommended keys, profile configuration, and local daemon health.")
|
|
@@ -163,10 +170,11 @@ Quick picks:
|
|
|
163
170
|
Command groups:
|
|
164
171
|
Scraping: gologin-web-access scrape|read|scrape-markdown|scrape-text|scrape-json|batch-scrape|batch-extract|search|map|crawl|crawl-start|crawl-status|crawl-result|crawl-errors|extract|change-track|batch-change-track|parse-document
|
|
165
172
|
Browser: gologin-web-access open|search-browser|scrape-screenshot|tabs|tabopen|tabfocus|tabclose|snapshot|click|dblclick|focus|type|fill|hover|select|check|uncheck|press|scroll|scrollintoview|wait|get|back|forward|reload|find|cookies|cookies-import|cookies-clear|storage-export|storage-import|storage-clear|eval|upload|pdf|screenshot|close|sessions|current
|
|
173
|
+
GoLogin API: gologin-web-access cloud-usage|profile-cloud|profile-cookies|profile-fingerprint|profile-proxy|profile-ua
|
|
166
174
|
Agent: gologin-web-access run|batch|jobs|job
|
|
167
175
|
|
|
168
176
|
Key model:
|
|
169
|
-
${"
|
|
177
|
+
${"GOLOGIN_SCRAPING_API_KEY"} powers scraping commands.
|
|
170
178
|
${"GOLOGIN_TOKEN"} powers browser commands.
|
|
171
179
|
Recommended setup: configure both keys up front, even if the current task only needs one path.
|
|
172
180
|
`);
|
|
@@ -18,7 +18,7 @@ function buildBatchExtractCommand() {
|
|
|
18
18
|
.description("Extract structured data from multiple pages using one selector schema.")
|
|
19
19
|
.argument("<urls...>", "One or more URLs")
|
|
20
20
|
.requiredOption("--schema <path>", "Path to a JSON extraction schema")
|
|
21
|
-
.option("--source <source>", "Read source: auto,
|
|
21
|
+
.option("--source <source>", "Read source: auto, scraping, or browser. Legacy alias: unlocker", "auto")
|
|
22
22
|
.option("--concurrency <count>", "Number of concurrent requests", "4")
|
|
23
23
|
.option("--output <path>", "Write the full batch result JSON to a file")
|
|
24
24
|
.option("--summary", "Print one-line summary stats to stderr after the JSON output")
|
|
@@ -17,12 +17,12 @@ const unlocker_1 = require("../lib/unlocker");
|
|
|
17
17
|
const shared_1 = require("./shared");
|
|
18
18
|
function buildBatchScrapeCommand() {
|
|
19
19
|
return (0, shared_1.addProfileOption)((0, shared_1.addUnlockerRequestOptions)(new commander_1.Command("batch-scrape")
|
|
20
|
-
.description("Fetch multiple pages through
|
|
20
|
+
.description("Fetch multiple pages through Scraping API and print a JSON array of results.")
|
|
21
21
|
.argument("<urls...>", "One or more URLs")
|
|
22
22
|
.option("--format <format>", "html, markdown, text, or json", "html")
|
|
23
23
|
.option("--concurrency <count>", "Number of concurrent requests", "4")
|
|
24
24
|
.option("--fallback <mode>", "Structured scrape fallback: none or browser", "none")
|
|
25
|
-
.option("--source <source>", "Read source for --only-main-content mode: auto,
|
|
25
|
+
.option("--source <source>", "Read source for --only-main-content mode: auto, scraping, or browser. Legacy alias: unlocker", "auto")
|
|
26
26
|
.option("--only-main-content", "For html, markdown, or text formats, isolate the most readable content block per page")
|
|
27
27
|
.option("--output <path>", "Write the full batch result JSON to a file")
|
|
28
28
|
.option("--summary", "Print one-line summary stats to stderr after the JSON output")
|
|
@@ -8,9 +8,10 @@ const output_1 = require("../lib/output");
|
|
|
8
8
|
const unlocker_1 = require("../lib/unlocker");
|
|
9
9
|
function buildConfigInitCommand() {
|
|
10
10
|
return new commander_1.Command("init")
|
|
11
|
-
.description("Write ~/.gologin-web-access/config.json with current values or placeholders. Recommended: persist both the
|
|
12
|
-
.option("--
|
|
13
|
-
.option("--web-unlocker-key <key>", "
|
|
11
|
+
.description("Write ~/.gologin-web-access/config.json with current values or placeholders. Recommended: persist both the Scraping API key and the GoLogin token.")
|
|
12
|
+
.option("--scraping-api-key <key>", "Persist a Scraping API key")
|
|
13
|
+
.option("--web-unlocker-api-key <key>", "Legacy alias for --scraping-api-key")
|
|
14
|
+
.option("--web-unlocker-key <key>", "Legacy alias for --scraping-api-key")
|
|
14
15
|
.option("--token <token>", "Persist a GoLogin token")
|
|
15
16
|
.option("--cloud-token <token>", "Backward-compatible alias for --token")
|
|
16
17
|
.option("--default-profile-id <id>", "Persist a default Gologin profile ID")
|
|
@@ -18,7 +19,12 @@ function buildConfigInitCommand() {
|
|
|
18
19
|
.option("--no-validate", "Skip live key validation after writing config")
|
|
19
20
|
.option("--force", "Overwrite an existing config file")
|
|
20
21
|
.action(async (options) => {
|
|
21
|
-
const webUnlockerApiKey = options.
|
|
22
|
+
const webUnlockerApiKey = options.scrapingApiKey ??
|
|
23
|
+
options.webUnlockerApiKey ??
|
|
24
|
+
options.webUnlockerKey ??
|
|
25
|
+
process.env[config_1.ENV_NAMES.webUnlockerApiKey] ??
|
|
26
|
+
process.env.GOLOGIN_WEB_UNLOCKER_API_KEY ??
|
|
27
|
+
process.env.GOLOGIN_WEBUNLOCKER_API_KEY;
|
|
22
28
|
const result = await (0, config_1.initConfigFile)({
|
|
23
29
|
webUnlockerApiKey,
|
|
24
30
|
cloudToken: options.token ??
|
|
@@ -37,7 +43,7 @@ function buildConfigInitCommand() {
|
|
|
37
43
|
(0, output_1.printKeyValueRows)([
|
|
38
44
|
{ label: "Config file", value: result.path },
|
|
39
45
|
{
|
|
40
|
-
label: "
|
|
46
|
+
label: "Scraping API key",
|
|
41
47
|
value: result.config.webUnlockerApiKey ? "written" : "left empty",
|
|
42
48
|
},
|
|
43
49
|
{
|
|
@@ -54,7 +60,7 @@ function buildConfigInitCommand() {
|
|
|
54
60
|
},
|
|
55
61
|
]);
|
|
56
62
|
if (!result.config.webUnlockerApiKey || !result.config.cloudToken) {
|
|
57
|
-
(0, output_1.printText)("Recommended next step: configure both
|
|
63
|
+
(0, output_1.printText)("Recommended next step: configure both GOLOGIN_SCRAPING_API_KEY and GOLOGIN_TOKEN so agents can use scraping and browser flows without asking again.");
|
|
58
64
|
}
|
|
59
65
|
if (options.validate === false) {
|
|
60
66
|
return;
|
|
@@ -63,7 +69,7 @@ function buildConfigInitCommand() {
|
|
|
63
69
|
if (result.config.webUnlockerApiKey) {
|
|
64
70
|
const validation = await (0, unlocker_1.validateWebUnlockerKey)(result.config.webUnlockerApiKey);
|
|
65
71
|
validationRows.push({
|
|
66
|
-
label: "
|
|
72
|
+
label: "Scraping API validation",
|
|
67
73
|
value: validation.ok ? "ok" : `failed${validation.status ? ` (${validation.status})` : ""}: ${validation.detail}`,
|
|
68
74
|
});
|
|
69
75
|
}
|
package/dist/commands/crawl.js
CHANGED
|
@@ -7,7 +7,7 @@ const crawl_1 = require("../lib/crawl");
|
|
|
7
7
|
const output_1 = require("../lib/output");
|
|
8
8
|
function buildCrawlCommand() {
|
|
9
9
|
return new commander_1.Command("crawl")
|
|
10
|
-
.description("Crawl a website through
|
|
10
|
+
.description("Crawl a website through GoLogin Scraping API and return per-page extracted content.")
|
|
11
11
|
.argument("<url>", "Root website URL to crawl")
|
|
12
12
|
.option("--format <format>", "html, markdown, text, or json", "markdown")
|
|
13
13
|
.option("--limit <count>", "Maximum number of pages to visit", "25")
|
package/dist/commands/extract.js
CHANGED
|
@@ -18,7 +18,7 @@ function buildExtractCommand() {
|
|
|
18
18
|
.argument("<url>", "Target URL")
|
|
19
19
|
.requiredOption("--schema <path>", "Path to a JSON extraction schema")
|
|
20
20
|
.option("--output <path>", "Write extracted JSON to a file")
|
|
21
|
-
.option("--source <source>", "Read source: auto,
|
|
21
|
+
.option("--source <source>", "Read source: auto, scraping, or browser. Legacy alias: unlocker", "auto")
|
|
22
22
|
.action(async (url, options) => {
|
|
23
23
|
const config = await (0, config_1.loadConfig)();
|
|
24
24
|
const source = (0, readSource_1.normalizeReadSourceMode)(options.source, "auto");
|
|
@@ -0,0 +1,324 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
3
|
+
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
4
|
+
};
|
|
5
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
6
|
+
exports.buildCloudUsageCommand = buildCloudUsageCommand;
|
|
7
|
+
exports.buildProfileCloudCommand = buildProfileCloudCommand;
|
|
8
|
+
exports.buildProfileCookiesCommand = buildProfileCookiesCommand;
|
|
9
|
+
exports.buildProfileFingerprintCommand = buildProfileFingerprintCommand;
|
|
10
|
+
exports.buildProfileProxyCommand = buildProfileProxyCommand;
|
|
11
|
+
exports.buildProfileUaCommand = buildProfileUaCommand;
|
|
12
|
+
const fs_1 = require("fs");
|
|
13
|
+
const path_1 = __importDefault(require("path"));
|
|
14
|
+
const commander_1 = require("commander");
|
|
15
|
+
const config_1 = require("../config");
|
|
16
|
+
const cloudApi_1 = require("../lib/cloudApi");
|
|
17
|
+
const errors_1 = require("../lib/errors");
|
|
18
|
+
const output_1 = require("../lib/output");
|
|
19
|
+
async function getCloudToken() {
|
|
20
|
+
const config = await (0, config_1.loadConfig)();
|
|
21
|
+
return (0, config_1.requireCloudToken)(config);
|
|
22
|
+
}
|
|
23
|
+
function parseDays(value) {
|
|
24
|
+
const days = value ? Number(value) : 7;
|
|
25
|
+
if (!Number.isInteger(days) || days < 1 || days > 30) {
|
|
26
|
+
throw new errors_1.CliError("--days must be an integer from 1 to 30.");
|
|
27
|
+
}
|
|
28
|
+
return days;
|
|
29
|
+
}
|
|
30
|
+
function parsePage(value) {
|
|
31
|
+
if (!value) {
|
|
32
|
+
return undefined;
|
|
33
|
+
}
|
|
34
|
+
const page = Number(value);
|
|
35
|
+
if (!Number.isInteger(page) || page < 1) {
|
|
36
|
+
throw new errors_1.CliError("--page must be a positive integer.");
|
|
37
|
+
}
|
|
38
|
+
return page;
|
|
39
|
+
}
|
|
40
|
+
function normalizeCountryCode(value) {
|
|
41
|
+
const countryCode = value.toLowerCase();
|
|
42
|
+
if (!/^[a-z]{2}$/.test(countryCode)) {
|
|
43
|
+
throw new errors_1.CliError("--country must be a 2-letter country code, for example us.");
|
|
44
|
+
}
|
|
45
|
+
return countryCode;
|
|
46
|
+
}
|
|
47
|
+
function normalizeProxyType(value) {
|
|
48
|
+
const type = value ?? "residential";
|
|
49
|
+
if (type === "mobile") {
|
|
50
|
+
return { isMobile: true, label: "mobile" };
|
|
51
|
+
}
|
|
52
|
+
if (type === "dc" || type === "datacenter") {
|
|
53
|
+
return { isDC: true, label: "datacenter" };
|
|
54
|
+
}
|
|
55
|
+
if (type === "residential") {
|
|
56
|
+
return { label: "residential" };
|
|
57
|
+
}
|
|
58
|
+
throw new errors_1.CliError("--type must be one of residential, mobile, or dc.");
|
|
59
|
+
}
|
|
60
|
+
function detectHostOs() {
|
|
61
|
+
if (process.platform === "darwin") {
|
|
62
|
+
return "mac";
|
|
63
|
+
}
|
|
64
|
+
if (process.platform === "win32") {
|
|
65
|
+
return "win";
|
|
66
|
+
}
|
|
67
|
+
return "lin";
|
|
68
|
+
}
|
|
69
|
+
function normalizeOs(value) {
|
|
70
|
+
const os = value ?? detectHostOs();
|
|
71
|
+
if (!["lin", "mac", "win", "android", "android-cloud"].includes(os)) {
|
|
72
|
+
throw new errors_1.CliError("--os must be one of lin, mac, win, android, or android-cloud.");
|
|
73
|
+
}
|
|
74
|
+
return os;
|
|
75
|
+
}
|
|
76
|
+
async function readJsonFile(targetPath) {
|
|
77
|
+
const absolutePath = path_1.default.resolve(targetPath);
|
|
78
|
+
try {
|
|
79
|
+
return JSON.parse(await fs_1.promises.readFile(absolutePath, "utf8"));
|
|
80
|
+
}
|
|
81
|
+
catch (error) {
|
|
82
|
+
throw new errors_1.CliError(`Failed to read JSON file ${absolutePath}: ${error instanceof Error ? error.message : String(error)}`);
|
|
83
|
+
}
|
|
84
|
+
}
|
|
85
|
+
async function writeJsonFile(targetPath, payload) {
|
|
86
|
+
const absolutePath = path_1.default.resolve(targetPath);
|
|
87
|
+
await fs_1.promises.mkdir(path_1.default.dirname(absolutePath), { recursive: true });
|
|
88
|
+
await fs_1.promises.writeFile(absolutePath, `${JSON.stringify(payload, null, 2)}\n`, "utf8");
|
|
89
|
+
return absolutePath;
|
|
90
|
+
}
|
|
91
|
+
function buildCloudUsageCommand() {
|
|
92
|
+
return new commander_1.Command("cloud-usage")
|
|
93
|
+
.description("Read GoLogin Cloud Browser usage statistics.")
|
|
94
|
+
.option("--profile <profileId>", "Profile ID to inspect")
|
|
95
|
+
.option("--workspace <workspaceId>", "Workspace ID to inspect")
|
|
96
|
+
.option("--days <days>", "Workspace stats range from 1 to 30 days", "7")
|
|
97
|
+
.option("--json", "Print JSON output")
|
|
98
|
+
.action(async (options) => {
|
|
99
|
+
if (options.profile && options.workspace) {
|
|
100
|
+
throw new errors_1.CliError("Use either --profile or --workspace, not both.");
|
|
101
|
+
}
|
|
102
|
+
const token = await getCloudToken();
|
|
103
|
+
if (options.profile) {
|
|
104
|
+
const payload = await (0, cloudApi_1.gologinApiRequest)(token, "GET", `/cloud-usage/profile/${options.profile}/stats`);
|
|
105
|
+
if (options.json) {
|
|
106
|
+
(0, output_1.printJson)(payload);
|
|
107
|
+
return;
|
|
108
|
+
}
|
|
109
|
+
(0, output_1.printText)(`profile=${options.profile} usage=${JSON.stringify(payload)}`);
|
|
110
|
+
return;
|
|
111
|
+
}
|
|
112
|
+
if (!options.workspace) {
|
|
113
|
+
throw new errors_1.CliError("Usage: gologin-web-access cloud-usage --profile <profileId> | --workspace <workspaceId> [--days <1-30>] [--json]");
|
|
114
|
+
}
|
|
115
|
+
const days = parseDays(options.days);
|
|
116
|
+
const payload = await (0, cloudApi_1.gologinApiRequest)(token, "GET", "/cloud-usage/stats", {
|
|
117
|
+
query: { workspaceId: options.workspace, days },
|
|
118
|
+
});
|
|
119
|
+
if (options.json) {
|
|
120
|
+
(0, output_1.printJson)(payload);
|
|
121
|
+
return;
|
|
122
|
+
}
|
|
123
|
+
(0, output_1.printText)(`workspace=${options.workspace} days=${days} usage=${JSON.stringify(payload)}`);
|
|
124
|
+
});
|
|
125
|
+
}
|
|
126
|
+
function buildProfileCloudCommand() {
|
|
127
|
+
const command = new commander_1.Command("profile-cloud").description("Start or stop a GoLogin profile in Cloud Browser.");
|
|
128
|
+
command
|
|
129
|
+
.command("start")
|
|
130
|
+
.argument("<profileId>", "Profile ID")
|
|
131
|
+
.option("--json", "Print JSON output")
|
|
132
|
+
.action(async (profileId, options) => {
|
|
133
|
+
const token = await getCloudToken();
|
|
134
|
+
const payload = await (0, cloudApi_1.gologinApiRequest)(token, "POST", `/browser/${profileId}/web`, { body: {} });
|
|
135
|
+
if (options.json) {
|
|
136
|
+
(0, output_1.printJson)(payload ?? { profileId, started: true });
|
|
137
|
+
return;
|
|
138
|
+
}
|
|
139
|
+
const value = (0, cloudApi_1.asObjectPayload)(payload);
|
|
140
|
+
const remoteUrl = typeof value.remoteOrbitaUrl === "string" ? ` remote=${value.remoteOrbitaUrl}` : "";
|
|
141
|
+
(0, output_1.printText)(`profile=${profileId} cloud=started${remoteUrl}`);
|
|
142
|
+
});
|
|
143
|
+
command
|
|
144
|
+
.command("stop")
|
|
145
|
+
.argument("<profileId>", "Profile ID")
|
|
146
|
+
.option("--json", "Print JSON output")
|
|
147
|
+
.action(async (profileId, options) => {
|
|
148
|
+
const token = await getCloudToken();
|
|
149
|
+
await (0, cloudApi_1.gologinApiRequest)(token, "DELETE", `/browser/${profileId}/web`);
|
|
150
|
+
if (options.json) {
|
|
151
|
+
(0, output_1.printJson)({ profileId, stopped: true });
|
|
152
|
+
return;
|
|
153
|
+
}
|
|
154
|
+
(0, output_1.printText)(`profile=${profileId} cloud=stopped`);
|
|
155
|
+
});
|
|
156
|
+
return command;
|
|
157
|
+
}
|
|
158
|
+
function buildProfileCookiesCommand() {
|
|
159
|
+
const command = new commander_1.Command("profile-cookies").description("Export or import cookies from the GoLogin profile database.");
|
|
160
|
+
command
|
|
161
|
+
.command("export")
|
|
162
|
+
.argument("<profileId>", "Profile ID")
|
|
163
|
+
.option("--output <path>", "Write cookies JSON to a file")
|
|
164
|
+
.option("--json", "Print JSON output with profile metadata")
|
|
165
|
+
.action(async (profileId, options) => {
|
|
166
|
+
const token = await getCloudToken();
|
|
167
|
+
const cookies = await (0, cloudApi_1.gologinApiRequest)(token, "GET", `/browser/${profileId}/cookies`);
|
|
168
|
+
if (options.output) {
|
|
169
|
+
(0, output_1.printText)(await writeJsonFile(options.output, cookies));
|
|
170
|
+
return;
|
|
171
|
+
}
|
|
172
|
+
(0, output_1.printJson)(options.json ? { profileId, cookies } : cookies);
|
|
173
|
+
});
|
|
174
|
+
command
|
|
175
|
+
.command("import")
|
|
176
|
+
.argument("<profileId>", "Profile ID")
|
|
177
|
+
.argument("<cookiesJson>", "Cookie JSON file")
|
|
178
|
+
.option("--clean", "Clear existing cookies before importing")
|
|
179
|
+
.option("--json", "Print JSON output")
|
|
180
|
+
.action(async (profileId, cookiesJson, options) => {
|
|
181
|
+
const cookies = await readJsonFile(cookiesJson);
|
|
182
|
+
if (!Array.isArray(cookies)) {
|
|
183
|
+
throw new errors_1.CliError("Cookie import file must contain a JSON array.");
|
|
184
|
+
}
|
|
185
|
+
const token = await getCloudToken();
|
|
186
|
+
await (0, cloudApi_1.gologinApiRequest)(token, "POST", `/browser/${profileId}/cookies`, {
|
|
187
|
+
query: { fromUser: true, cleanCookies: options.clean || undefined },
|
|
188
|
+
body: cookies,
|
|
189
|
+
});
|
|
190
|
+
if (options.json) {
|
|
191
|
+
(0, output_1.printJson)({ profileId, imported: cookies.length, cleanCookies: options.clean === true });
|
|
192
|
+
return;
|
|
193
|
+
}
|
|
194
|
+
(0, output_1.printText)(`profile=${profileId} importedCookies=${cookies.length}${options.clean ? " clean=true" : ""}`);
|
|
195
|
+
});
|
|
196
|
+
return command;
|
|
197
|
+
}
|
|
198
|
+
function buildProfileFingerprintCommand() {
|
|
199
|
+
const command = new commander_1.Command("profile-fingerprint").description("Refresh fingerprints for one or more GoLogin profiles.");
|
|
200
|
+
command
|
|
201
|
+
.command("refresh")
|
|
202
|
+
.argument("<profileIds...>", "Profile IDs")
|
|
203
|
+
.option("--json", "Print JSON output")
|
|
204
|
+
.action(async (profileIds, options) => {
|
|
205
|
+
const token = await getCloudToken();
|
|
206
|
+
const payload = await (0, cloudApi_1.gologinApiRequest)(token, "PATCH", "/browser/fingerprints", {
|
|
207
|
+
body: { browsersIds: profileIds },
|
|
208
|
+
});
|
|
209
|
+
if (options.json) {
|
|
210
|
+
(0, output_1.printJson)(payload);
|
|
211
|
+
return;
|
|
212
|
+
}
|
|
213
|
+
(0, output_1.printText)(`refreshedFingerprints=${profileIds.length} profiles=${profileIds.join(",")}`);
|
|
214
|
+
});
|
|
215
|
+
return command;
|
|
216
|
+
}
|
|
217
|
+
function buildProfileProxyCommand() {
|
|
218
|
+
const command = new commander_1.Command("profile-proxy").description("Manage GoLogin proxies through the REST API.");
|
|
219
|
+
command
|
|
220
|
+
.command("list")
|
|
221
|
+
.option("--page <page>", "Page number", "1")
|
|
222
|
+
.option("--json", "Print JSON output")
|
|
223
|
+
.action(async (options) => {
|
|
224
|
+
const token = await getCloudToken();
|
|
225
|
+
const payload = await (0, cloudApi_1.gologinApiRequest)(token, "GET", "/proxy/v2", {
|
|
226
|
+
query: { page: parsePage(options.page) },
|
|
227
|
+
});
|
|
228
|
+
if (options.json) {
|
|
229
|
+
(0, output_1.printJson)(payload);
|
|
230
|
+
return;
|
|
231
|
+
}
|
|
232
|
+
const value = (0, cloudApi_1.asObjectPayload)(payload);
|
|
233
|
+
const proxies = Array.isArray(value.proxies) ? value.proxies : [];
|
|
234
|
+
(0, output_1.printText)(`proxies=${proxies.length} hasMore=${value.hasMore === true}`);
|
|
235
|
+
});
|
|
236
|
+
command
|
|
237
|
+
.command("traffic")
|
|
238
|
+
.description("Read GoLogin managed proxy traffic balance/usage.")
|
|
239
|
+
.action(async () => {
|
|
240
|
+
const token = await getCloudToken();
|
|
241
|
+
(0, output_1.printJson)(await (0, cloudApi_1.gologinApiRequest)(token, "GET", "/users-proxies/geolocation/traffic"));
|
|
242
|
+
});
|
|
243
|
+
command
|
|
244
|
+
.command("add-gologin")
|
|
245
|
+
.argument("<profileId>", "Profile ID to link the managed proxy to")
|
|
246
|
+
.requiredOption("--country <cc>", "2-letter country code, for example us")
|
|
247
|
+
.option("--city <city>", "Optional city name")
|
|
248
|
+
.option("--type <type>", "residential, mobile, or dc", "residential")
|
|
249
|
+
.option("--name <name>", "Custom proxy name")
|
|
250
|
+
.option("--json", "Print JSON output")
|
|
251
|
+
.action(async (profileId, options) => {
|
|
252
|
+
const token = await getCloudToken();
|
|
253
|
+
const countryCode = normalizeCountryCode(options.country);
|
|
254
|
+
const proxyType = normalizeProxyType(options.type);
|
|
255
|
+
const body = {
|
|
256
|
+
countryCode,
|
|
257
|
+
profileIdToLink: profileId,
|
|
258
|
+
customName: options.name ?? `gologin-${countryCode}-${profileId.slice(0, 6)}`,
|
|
259
|
+
};
|
|
260
|
+
if (options.city) {
|
|
261
|
+
body.city = options.city;
|
|
262
|
+
}
|
|
263
|
+
if (proxyType.isMobile !== undefined) {
|
|
264
|
+
body.isMobile = proxyType.isMobile;
|
|
265
|
+
}
|
|
266
|
+
if (proxyType.isDC !== undefined) {
|
|
267
|
+
body.isDC = proxyType.isDC;
|
|
268
|
+
}
|
|
269
|
+
const payload = await (0, cloudApi_1.gologinApiRequest)(token, "POST", "/users-proxies/mobile-proxy", { body });
|
|
270
|
+
if (options.json) {
|
|
271
|
+
(0, output_1.printJson)(payload ?? { profileId, countryCode, type: proxyType.label });
|
|
272
|
+
return;
|
|
273
|
+
}
|
|
274
|
+
(0, output_1.printText)(`profile=${profileId} proxy=gologin:${countryCode} type=${proxyType.label}`);
|
|
275
|
+
});
|
|
276
|
+
return command;
|
|
277
|
+
}
|
|
278
|
+
function buildProfileUaCommand() {
|
|
279
|
+
const command = new commander_1.Command("profile-ua").description("Read latest GoLogin user agent or update profile UA.");
|
|
280
|
+
command
|
|
281
|
+
.command("latest")
|
|
282
|
+
.option("--os <os>", "lin, mac, win, android, or android-cloud")
|
|
283
|
+
.option("--json", "Print JSON output")
|
|
284
|
+
.action(async (options) => {
|
|
285
|
+
const os = normalizeOs(options.os);
|
|
286
|
+
const token = await getCloudToken();
|
|
287
|
+
const payload = await (0, cloudApi_1.gologinApiRequest)(token, "GET", "/browser/latest-useragent", {
|
|
288
|
+
query: { os },
|
|
289
|
+
});
|
|
290
|
+
if (options.json) {
|
|
291
|
+
(0, output_1.printJson)(payload);
|
|
292
|
+
return;
|
|
293
|
+
}
|
|
294
|
+
(0, output_1.printText)(`os=${os} latestUserAgent=${typeof payload === "string" ? payload : JSON.stringify(payload)}`);
|
|
295
|
+
});
|
|
296
|
+
command
|
|
297
|
+
.command("update")
|
|
298
|
+
.argument("[profileIds...]", "Profile IDs")
|
|
299
|
+
.option("--all-profiles", "Update all profiles in the current workspace")
|
|
300
|
+
.option("--workspace <id>", "Current workspace ID")
|
|
301
|
+
.option("--json", "Print JSON output")
|
|
302
|
+
.action(async (profileIds, options) => {
|
|
303
|
+
if (profileIds.length === 0 && !options.allProfiles) {
|
|
304
|
+
throw new errors_1.CliError("Usage: gologin-web-access profile-ua update <profileId...> [--all-profiles] [--workspace <id>] [--json]");
|
|
305
|
+
}
|
|
306
|
+
const token = await getCloudToken();
|
|
307
|
+
const payload = await (0, cloudApi_1.gologinApiRequest)(token, "PATCH", "/browser/update_ua_to_new_browser_v", {
|
|
308
|
+
query: { currentWorkspace: options.workspace },
|
|
309
|
+
body: {
|
|
310
|
+
browserIds: profileIds,
|
|
311
|
+
updateUaToNewBrowserV: true,
|
|
312
|
+
updateAllProfiles: options.allProfiles === true,
|
|
313
|
+
},
|
|
314
|
+
});
|
|
315
|
+
if (options.json) {
|
|
316
|
+
(0, output_1.printJson)(payload ?? { updated: true, profileIds, allProfiles: options.allProfiles === true });
|
|
317
|
+
return;
|
|
318
|
+
}
|
|
319
|
+
(0, output_1.printText)(options.allProfiles
|
|
320
|
+
? "updatedUserAgent=allProfiles"
|
|
321
|
+
: `updatedUserAgentProfiles=${profileIds.length} profiles=${profileIds.join(",")}`);
|
|
322
|
+
});
|
|
323
|
+
return command;
|
|
324
|
+
}
|
package/dist/commands/map.js
CHANGED
|
@@ -7,7 +7,7 @@ const output_1 = require("../lib/output");
|
|
|
7
7
|
const crawl_1 = require("../lib/crawl");
|
|
8
8
|
function buildMapCommand() {
|
|
9
9
|
return new commander_1.Command("map")
|
|
10
|
-
.description("Discover internal website links through
|
|
10
|
+
.description("Discover internal website links through GoLogin Scraping API.")
|
|
11
11
|
.argument("<url>", "Root website URL to map")
|
|
12
12
|
.option("--limit <count>", "Maximum number of pages to visit", "100")
|
|
13
13
|
.option("--max-depth <depth>", "Maximum link depth from the root URL", "2")
|
package/dist/commands/read.js
CHANGED
|
@@ -12,7 +12,7 @@ function buildReadCommand() {
|
|
|
12
12
|
.description("Read the main content of a docs page or article with automatic fallback to Cloud Browser when needed.")
|
|
13
13
|
.argument("<url>", "URL to read")
|
|
14
14
|
.option("--format <format>", "Output format: html, markdown, or text", "text")
|
|
15
|
-
.option("--source <source>", "Read source: auto,
|
|
15
|
+
.option("--source <source>", "Read source: auto, scraping, or browser. Legacy alias: unlocker", "auto")
|
|
16
16
|
.action(async (url, options) => {
|
|
17
17
|
const config = await (0, config_1.loadConfig)();
|
|
18
18
|
const format = normalizeReadFormat(options.format);
|
package/dist/commands/scrape.js
CHANGED
|
@@ -8,7 +8,7 @@ const output_1 = require("../lib/output");
|
|
|
8
8
|
const unlocker_1 = require("../lib/unlocker");
|
|
9
9
|
function buildScrapeCommand() {
|
|
10
10
|
return (0, shared_1.addUnlockerRequestOptions)(new commander_1.Command("scrape")
|
|
11
|
-
.description("Fetch rendered HTML through
|
|
11
|
+
.description("Fetch rendered HTML through GoLogin Scraping API.")
|
|
12
12
|
.argument("<url>", "URL to scrape")
|
|
13
13
|
.action(async (url, options) => {
|
|
14
14
|
const config = await (0, config_1.loadConfig)();
|
|
@@ -8,7 +8,7 @@ const structuredScrape_1 = require("../lib/structuredScrape");
|
|
|
8
8
|
const shared_1 = require("./shared");
|
|
9
9
|
function buildScrapeJsonCommand() {
|
|
10
10
|
return (0, shared_1.addProfileOption)((0, shared_1.addUnlockerRequestOptions)(new commander_1.Command("scrape-json")
|
|
11
|
-
.description("Fetch a page through
|
|
11
|
+
.description("Fetch a page through Scraping API and print a structured JSON envelope.")
|
|
12
12
|
.argument("<url>", "URL to scrape")
|
|
13
13
|
.option("--fallback <mode>", "none or browser structured fallback for JS-heavy pages", "none")
|
|
14
14
|
.action(async (url, options) => {
|
|
@@ -9,9 +9,9 @@ const shared_1 = require("./shared");
|
|
|
9
9
|
const output_1 = require("../lib/output");
|
|
10
10
|
function buildScrapeMarkdownCommand() {
|
|
11
11
|
return (0, shared_1.addUnlockerRequestOptions)(new commander_1.Command("scrape-markdown")
|
|
12
|
-
.description("Fetch a page through
|
|
12
|
+
.description("Fetch a page through Scraping API and print Markdown.")
|
|
13
13
|
.argument("<url>", "URL to scrape")
|
|
14
|
-
.option("--source <source>", "Read source: auto,
|
|
14
|
+
.option("--source <source>", "Read source: auto, scraping, or browser. Legacy alias: unlocker", "auto")
|
|
15
15
|
.action(async (url, options) => {
|
|
16
16
|
const config = await (0, config_1.loadConfig)();
|
|
17
17
|
const source = (0, readSource_1.normalizeReadSourceMode)(options.source, "auto");
|
|
@@ -9,9 +9,9 @@ const shared_1 = require("./shared");
|
|
|
9
9
|
const output_1 = require("../lib/output");
|
|
10
10
|
function buildScrapeTextCommand() {
|
|
11
11
|
return (0, shared_1.addUnlockerRequestOptions)(new commander_1.Command("scrape-text")
|
|
12
|
-
.description("Fetch a page through
|
|
12
|
+
.description("Fetch a page through Scraping API and print plain text.")
|
|
13
13
|
.argument("<url>", "URL to scrape")
|
|
14
|
-
.option("--source <source>", "Read source: auto,
|
|
14
|
+
.option("--source <source>", "Read source: auto, scraping, or browser. Legacy alias: unlocker", "auto")
|
|
15
15
|
.action(async (url, options) => {
|
|
16
16
|
const config = await (0, config_1.loadConfig)();
|
|
17
17
|
const source = (0, readSource_1.normalizeReadSourceMode)(options.source, "auto");
|
package/dist/commands/search.js
CHANGED
|
@@ -12,7 +12,7 @@ function buildSearchCommand() {
|
|
|
12
12
|
.option("--limit <count>", "Maximum number of results", "10")
|
|
13
13
|
.option("--country <country>", "Country code for Google search", "us")
|
|
14
14
|
.option("--language <language>", "Language for Google search", "en")
|
|
15
|
-
.option("--source <mode>", "Search path: auto,
|
|
15
|
+
.option("--source <mode>", "Search path: auto, scraping, or browser. Legacy alias: unlocker", "auto")
|
|
16
16
|
.action(async (query, options) => {
|
|
17
17
|
const config = await (0, config_1.loadConfig)();
|
|
18
18
|
const result = await (0, search_1.searchWeb)(query, config, {
|
|
@@ -32,6 +32,9 @@ function normalizeLimit(value) {
|
|
|
32
32
|
return Math.min(Math.floor(parsed), 100);
|
|
33
33
|
}
|
|
34
34
|
function normalizeSource(value) {
|
|
35
|
+
if (value === "scraping" || value === "scraping-api") {
|
|
36
|
+
return "unlocker";
|
|
37
|
+
}
|
|
35
38
|
if (value === "auto" || value === "unlocker" || value === "browser") {
|
|
36
39
|
return value;
|
|
37
40
|
}
|
package/dist/config.js
CHANGED
|
@@ -22,13 +22,13 @@ const LEGACY_CONFIG_DIR = ".gologin-web";
|
|
|
22
22
|
const CONFIG_FILENAME = "config.json";
|
|
23
23
|
exports.DEFAULT_DAEMON_PORT = 4590;
|
|
24
24
|
exports.ENV_NAMES = {
|
|
25
|
-
webUnlockerApiKey: "
|
|
25
|
+
webUnlockerApiKey: "GOLOGIN_SCRAPING_API_KEY",
|
|
26
26
|
cloudToken: "GOLOGIN_TOKEN",
|
|
27
27
|
defaultProfileId: "GOLOGIN_DEFAULT_PROFILE_ID",
|
|
28
28
|
daemonPort: "GOLOGIN_DAEMON_PORT",
|
|
29
29
|
};
|
|
30
30
|
const LEGACY_ENV_NAMES = {
|
|
31
|
-
webUnlockerApiKey: ["GOLOGIN_WEBUNLOCKER_API_KEY"],
|
|
31
|
+
webUnlockerApiKey: ["GOLOGIN_WEB_UNLOCKER_API_KEY", "GOLOGIN_WEBUNLOCKER_API_KEY"],
|
|
32
32
|
cloudToken: ["GOLOGIN_CLOUD_TOKEN"],
|
|
33
33
|
defaultProfileId: ["GOLOGIN_PROFILE_ID"],
|
|
34
34
|
daemonPort: [],
|
|
@@ -100,7 +100,7 @@ function requireWebUnlockerKey(config) {
|
|
|
100
100
|
}
|
|
101
101
|
function requireCloudToken(config) {
|
|
102
102
|
if (!config.cloudToken) {
|
|
103
|
-
throw new errors_1.MissingCredentialError(exports.ENV_NAMES.cloudToken, "browser commands like `gologin-web-access open`");
|
|
103
|
+
throw new errors_1.MissingCredentialError(exports.ENV_NAMES.cloudToken, "browser and GoLogin API commands like `gologin-web-access open` or `gologin-web-access profile-proxy`");
|
|
104
104
|
}
|
|
105
105
|
return config.cloudToken;
|
|
106
106
|
}
|
|
@@ -119,7 +119,7 @@ function getRecommendedCredentialStatus(config) {
|
|
|
119
119
|
return {
|
|
120
120
|
ready: true,
|
|
121
121
|
missing,
|
|
122
|
-
detail: "complete (
|
|
122
|
+
detail: "complete (Scraping API + Cloud Browser configured)",
|
|
123
123
|
};
|
|
124
124
|
}
|
|
125
125
|
return {
|
package/dist/doctor.js
CHANGED
|
@@ -12,7 +12,7 @@ async function runDoctor(options = {}) {
|
|
|
12
12
|
const agentCli = await (0, agentCli_1.inspectAgentCli)();
|
|
13
13
|
const recommended = (0, config_1.getRecommendedCredentialStatus)(config);
|
|
14
14
|
checks.push({
|
|
15
|
-
name: "
|
|
15
|
+
name: "Scraping API key",
|
|
16
16
|
status: config.webUnlockerApiKey ? "ok" : "warn",
|
|
17
17
|
detail: config.webUnlockerApiKey ? `configured via ${config.sources.webUnlockerApiKey}` : "missing",
|
|
18
18
|
});
|
|
@@ -25,7 +25,7 @@ async function runDoctor(options = {}) {
|
|
|
25
25
|
name: "Recommended full setup",
|
|
26
26
|
status: recommended.ready ? "ok" : "warn",
|
|
27
27
|
detail: recommended.ready
|
|
28
|
-
? "both
|
|
28
|
+
? "both GOLOGIN_SCRAPING_API_KEY and GOLOGIN_TOKEN are configured"
|
|
29
29
|
: `missing ${recommended.missing.join(" and ")}`,
|
|
30
30
|
});
|
|
31
31
|
checks.push({
|
package/dist/lib/cloudApi.js
CHANGED
|
@@ -1,9 +1,70 @@
|
|
|
1
1
|
"use strict";
|
|
2
2
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.gologinApiRequest = gologinApiRequest;
|
|
4
|
+
exports.asObjectPayload = asObjectPayload;
|
|
3
5
|
exports.validateCloudToken = validateCloudToken;
|
|
4
6
|
exports.getProfile = getProfile;
|
|
5
7
|
const errors_1 = require("./errors");
|
|
6
8
|
const GOLOGIN_API_BASE_URL = "https://api.gologin.com";
|
|
9
|
+
function buildApiUrl(path, query) {
|
|
10
|
+
const url = new URL(path.startsWith("http") ? path : `${GOLOGIN_API_BASE_URL}${path}`);
|
|
11
|
+
for (const [key, value] of Object.entries(query ?? {})) {
|
|
12
|
+
if (value !== undefined) {
|
|
13
|
+
url.searchParams.set(key, String(value));
|
|
14
|
+
}
|
|
15
|
+
}
|
|
16
|
+
return url.toString();
|
|
17
|
+
}
|
|
18
|
+
async function readPayload(response) {
|
|
19
|
+
const text = await response.text();
|
|
20
|
+
if (!text) {
|
|
21
|
+
return undefined;
|
|
22
|
+
}
|
|
23
|
+
try {
|
|
24
|
+
return JSON.parse(text);
|
|
25
|
+
}
|
|
26
|
+
catch {
|
|
27
|
+
return text;
|
|
28
|
+
}
|
|
29
|
+
}
|
|
30
|
+
function formatApiErrorDetail(payload) {
|
|
31
|
+
if (!payload) {
|
|
32
|
+
return undefined;
|
|
33
|
+
}
|
|
34
|
+
if (typeof payload === "string") {
|
|
35
|
+
return payload.slice(0, 500);
|
|
36
|
+
}
|
|
37
|
+
if (typeof payload === "object") {
|
|
38
|
+
const value = payload;
|
|
39
|
+
for (const key of ["message", "error", "reason"]) {
|
|
40
|
+
if (typeof value[key] === "string" && value[key].length > 0) {
|
|
41
|
+
return value[key].slice(0, 500);
|
|
42
|
+
}
|
|
43
|
+
}
|
|
44
|
+
return JSON.stringify(value).slice(0, 500);
|
|
45
|
+
}
|
|
46
|
+
return String(payload).slice(0, 500);
|
|
47
|
+
}
|
|
48
|
+
async function gologinApiRequest(token, method, path, options = {}) {
|
|
49
|
+
const hasBody = options.body !== undefined;
|
|
50
|
+
const response = await fetch(buildApiUrl(path, options.query), {
|
|
51
|
+
method,
|
|
52
|
+
headers: {
|
|
53
|
+
Authorization: `Bearer ${token}`,
|
|
54
|
+
...(hasBody ? { "Content-Type": "application/json" } : {}),
|
|
55
|
+
},
|
|
56
|
+
body: hasBody ? JSON.stringify(options.body) : undefined,
|
|
57
|
+
});
|
|
58
|
+
const payload = await readPayload(response);
|
|
59
|
+
if (!response.ok) {
|
|
60
|
+
const detail = formatApiErrorDetail(payload);
|
|
61
|
+
throw new errors_1.HttpError(`GoLogin API ${method} ${path} failed with status ${response.status}.`, response.status, detail);
|
|
62
|
+
}
|
|
63
|
+
return payload;
|
|
64
|
+
}
|
|
65
|
+
function asObjectPayload(value) {
|
|
66
|
+
return value && typeof value === "object" ? value : {};
|
|
67
|
+
}
|
|
7
68
|
async function validateCloudToken(token) {
|
|
8
69
|
const response = await fetch(`${GOLOGIN_API_BASE_URL}/browser/v2`, {
|
|
9
70
|
headers: {
|
package/dist/lib/errors.js
CHANGED
|
@@ -26,7 +26,7 @@ class MissingCredentialError extends CliError {
|
|
|
26
26
|
constructor(envName, commandGroup) {
|
|
27
27
|
super(`Missing ${envName}. This is required for ${commandGroup}.`, 1, [
|
|
28
28
|
"This CLI only reads credentials from environment variables or ~/.gologin-web-access/config.json.",
|
|
29
|
-
"Recommended setup: configure both
|
|
29
|
+
"Recommended setup: configure both GOLOGIN_SCRAPING_API_KEY and GOLOGIN_TOKEN up front so agents do not stop to ask again. GOLOGIN_WEB_UNLOCKER_API_KEY is still accepted as a legacy alias.",
|
|
30
30
|
`Set ${envName} in your environment or add it to ~/.gologin-web-access/config.json.`,
|
|
31
31
|
"Helpful commands: gologin-web-access config init, gologin-web-access config show, gologin-web-access doctor.",
|
|
32
32
|
].join("\n"));
|
package/dist/lib/output.js
CHANGED
package/dist/lib/readSource.js
CHANGED
|
@@ -16,6 +16,9 @@ function normalizeReadSourceMode(value, defaultMode = "auto") {
|
|
|
16
16
|
if (!value) {
|
|
17
17
|
return defaultMode;
|
|
18
18
|
}
|
|
19
|
+
if (value === "scraping" || value === "scraping-api") {
|
|
20
|
+
return "unlocker";
|
|
21
|
+
}
|
|
19
22
|
if (value === "auto" || value === "unlocker" || value === "browser") {
|
|
20
23
|
return value;
|
|
21
24
|
}
|
package/dist/lib/search.js
CHANGED
|
@@ -294,7 +294,7 @@ function classifySearchPage(engine, html, results) {
|
|
|
294
294
|
}
|
|
295
295
|
async function searchViaUnlocker(query, config, options, engine) {
|
|
296
296
|
if (!config.webUnlockerApiKey) {
|
|
297
|
-
throw new errors_1.CliError("Missing
|
|
297
|
+
throw new errors_1.CliError("Missing GOLOGIN_SCRAPING_API_KEY for Scraping API search.");
|
|
298
298
|
}
|
|
299
299
|
const searchUrl = buildSearchUrl(engine, query, options);
|
|
300
300
|
const scraped = await (0, unlocker_1.scrapeRenderedHtml)(searchUrl, config.webUnlockerApiKey);
|
package/dist/lib/unlocker.js
CHANGED
|
@@ -44,7 +44,7 @@ class WebUnlockerClient {
|
|
|
44
44
|
});
|
|
45
45
|
if (!response.ok) {
|
|
46
46
|
const body = await safeReadText(response, this.timeoutMs);
|
|
47
|
-
throw new errors_1.HttpError(`
|
|
47
|
+
throw new errors_1.HttpError(`Scraping API request failed with status ${response.status}.`, response.status, body ? truncate(body, 300) : undefined);
|
|
48
48
|
}
|
|
49
49
|
const content = await readResponseTextWithTimeout(response, this.timeoutMs);
|
|
50
50
|
return {
|
|
@@ -132,7 +132,7 @@ async function fetchWithRetry(url, options) {
|
|
|
132
132
|
};
|
|
133
133
|
}
|
|
134
134
|
const body = await safeReadText(response, options.timeoutMs);
|
|
135
|
-
const error = new errors_1.HttpError(`
|
|
135
|
+
const error = new errors_1.HttpError(`Scraping API request failed with status ${response.status}.`, response.status, body ? truncate(body, 300) : undefined);
|
|
136
136
|
const retriable = attempt < options.maxRetries && isRetriableStatus(response.status);
|
|
137
137
|
attempts.push({
|
|
138
138
|
attempt: attempt + 1,
|
|
@@ -154,10 +154,10 @@ async function fetchWithRetry(url, options) {
|
|
|
154
154
|
throw error;
|
|
155
155
|
}
|
|
156
156
|
const normalizedError = error instanceof Error && error.name === "AbortError"
|
|
157
|
-
? new errors_1.HttpError("
|
|
157
|
+
? new errors_1.HttpError("Scraping API request timed out.", 408)
|
|
158
158
|
: error instanceof Error
|
|
159
159
|
? new errors_1.HttpError(error.message, 500)
|
|
160
|
-
: new errors_1.HttpError("
|
|
160
|
+
: new errors_1.HttpError("Scraping API request failed.", 500);
|
|
161
161
|
const retriable = attempt < options.maxRetries;
|
|
162
162
|
attempts.push({
|
|
163
163
|
attempt: attempt + 1,
|
|
@@ -175,11 +175,11 @@ async function fetchWithRetry(url, options) {
|
|
|
175
175
|
throw attachRequestMeta(lastStatusError, attempts);
|
|
176
176
|
}
|
|
177
177
|
if (lastError instanceof Error && lastError.name === "AbortError") {
|
|
178
|
-
throw attachRequestMeta(new errors_1.HttpError("
|
|
178
|
+
throw attachRequestMeta(new errors_1.HttpError("Scraping API request timed out.", 408), attempts);
|
|
179
179
|
}
|
|
180
180
|
throw attachRequestMeta(lastError instanceof Error
|
|
181
181
|
? new errors_1.HttpError(lastError.message, 500)
|
|
182
|
-
: new errors_1.HttpError("
|
|
182
|
+
: new errors_1.HttpError("Scraping API request failed.", 500), attempts);
|
|
183
183
|
}
|
|
184
184
|
function buildScrapeRequestMeta(attempts) {
|
|
185
185
|
return {
|
|
@@ -230,7 +230,7 @@ async function readResponseTextWithTimeout(response, timeoutMs) {
|
|
|
230
230
|
new Promise((_, reject) => {
|
|
231
231
|
timer = setTimeout(() => {
|
|
232
232
|
void response.body?.cancel().catch(() => undefined);
|
|
233
|
-
reject(new errors_1.HttpError("
|
|
233
|
+
reject(new errors_1.HttpError("Scraping API response body timed out.", 408));
|
|
234
234
|
}, timeoutMs);
|
|
235
235
|
}),
|
|
236
236
|
]);
|
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "gologin-web-access",
|
|
3
|
-
"version": "0.3.
|
|
4
|
-
"description": "Unified web access CLI for developers and AI agents to read and interact with the web using
|
|
3
|
+
"version": "0.3.4",
|
|
4
|
+
"description": "Unified web access CLI for developers and AI agents to read and interact with the web using the GoLogin Scraping API and Cloud Browser.",
|
|
5
5
|
"main": "dist/cli.js",
|
|
6
6
|
"bin": {
|
|
7
7
|
"gologin-web-access": "dist/cli.js"
|
|
@@ -39,6 +39,7 @@
|
|
|
39
39
|
"gologin",
|
|
40
40
|
"web-access",
|
|
41
41
|
"cli",
|
|
42
|
+
"scraping-api",
|
|
42
43
|
"webunlocker",
|
|
43
44
|
"scraping",
|
|
44
45
|
"cloud-browser",
|