gologin-web-access 0.3.3 → 0.3.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +3 -3
- package/README.md +23 -23
- package/dist/cli.js +3 -3
- package/dist/commands/batchExtract.js +1 -1
- package/dist/commands/batchScrape.js +2 -2
- package/dist/commands/configInit.js +13 -7
- package/dist/commands/crawl.js +1 -1
- package/dist/commands/extract.js +1 -1
- package/dist/commands/map.js +1 -1
- package/dist/commands/read.js +1 -1
- package/dist/commands/scrape.js +1 -1
- package/dist/commands/scrapeJson.js +1 -1
- package/dist/commands/scrapeMarkdown.js +2 -2
- package/dist/commands/scrapeText.js +2 -2
- package/dist/commands/search.js +4 -1
- package/dist/config.js +3 -3
- package/dist/doctor.js +2 -2
- package/dist/lib/errors.js +1 -1
- package/dist/lib/readSource.js +3 -0
- package/dist/lib/search.js +1 -1
- package/dist/lib/unlocker.js +7 -7
- package/package.json +3 -2
package/CHANGELOG.md
CHANGED
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
|
|
3
3
|
## Unreleased
|
|
4
4
|
|
|
5
|
-
- browser automation is now embedded directly in `gologin-web-access`, so one repo and one install contains both
|
|
5
|
+
- browser automation is now embedded directly in `gologin-web-access`, so one repo and one install contains both Scraping API and Cloud Browser flows
|
|
6
6
|
- doctor now reports the embedded browser runtime source and version
|
|
7
7
|
|
|
8
8
|
## 0.3.2 - 2026-04-03
|
|
@@ -17,9 +17,9 @@ Initial public release of Gologin Web Access.
|
|
|
17
17
|
|
|
18
18
|
Highlights:
|
|
19
19
|
|
|
20
|
-
- Unified CLI entry point for
|
|
20
|
+
- Unified CLI entry point for GoLogin Scraping API and Gologin Cloud Browser workflows
|
|
21
21
|
- Scraping commands: `scrape`, `scrape-markdown`, `scrape-text`, `scrape-json`, `batch-scrape`
|
|
22
22
|
- Browser commands: `open`, `snapshot`, `click`, `type`, `screenshot`, `close`, `sessions`, `current`
|
|
23
|
-
- Clear two-key configuration model with `
|
|
23
|
+
- Clear two-key configuration model with `GOLOGIN_SCRAPING_API_KEY` and `GOLOGIN_TOKEN`
|
|
24
24
|
- `doctor`, `config show`, and `config init` to reduce setup friction
|
|
25
25
|
- Compatibility support for legacy env names used by existing Gologin tools
|
package/README.md
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
# Gologin Web Access
|
|
2
2
|
|
|
3
|
-
Gologin Web Access lets developers and AI agents read and interact with the web using
|
|
3
|
+
Gologin Web Access lets developers and AI agents read and interact with the web using GoLogin Scraping API and Gologin Cloud Browser.
|
|
4
4
|
|
|
5
5
|
This is a unified web access layer, not just a scraping tool and not just a browser automation tool.
|
|
6
6
|
|
|
@@ -18,7 +18,7 @@ Package name and binary are the same:
|
|
|
18
18
|
|
|
19
19
|
Gologin Web Access combines two existing product surfaces behind one CLI:
|
|
20
20
|
|
|
21
|
-
-
|
|
21
|
+
- Scraping API
|
|
22
22
|
Stateless read and extraction. Best when you want page content quickly without maintaining a browser session.
|
|
23
23
|
- Cloud Browser
|
|
24
24
|
Stateful interaction. Best when you need navigation, clicks, typing, screenshots, or multi-step flows that persist across commands.
|
|
@@ -36,23 +36,23 @@ The point of the unified CLI is that both modes live in one product with one com
|
|
|
36
36
|
|
|
37
37
|
### Scraping / Read
|
|
38
38
|
|
|
39
|
-
These commands use
|
|
39
|
+
These commands use GoLogin Scraping API:
|
|
40
40
|
|
|
41
41
|
- `gologin-web-access scrape <url>`
|
|
42
|
-
- `gologin-web-access read <url> [--format text|markdown|html] [--source auto|
|
|
43
|
-
- `gologin-web-access scrape-markdown <url> [--source auto|
|
|
44
|
-
- `gologin-web-access scrape-text <url> [--source auto|
|
|
42
|
+
- `gologin-web-access read <url> [--format text|markdown|html] [--source auto|scraping|browser]`
|
|
43
|
+
- `gologin-web-access scrape-markdown <url> [--source auto|scraping|browser]`
|
|
44
|
+
- `gologin-web-access scrape-text <url> [--source auto|scraping|browser]`
|
|
45
45
|
- `gologin-web-access scrape-json <url> [--fallback none|browser]`
|
|
46
|
-
- `gologin-web-access batch-scrape <url...> [--format html|markdown|text|json] [--fallback none|browser] [--source auto|
|
|
47
|
-
- `gologin-web-access batch-extract <url...> --schema <schema.json> [--source auto|
|
|
48
|
-
- `gologin-web-access search <query> [--limit <n>] [--country <cc>] [--language <lang>] [--source auto|
|
|
46
|
+
- `gologin-web-access batch-scrape <url...> [--format html|markdown|text|json] [--fallback none|browser] [--source auto|scraping|browser] [--only-main-content] [--retry <n>] [--backoff-ms <ms>] [--summary] [--output <path>] [--strict]`
|
|
47
|
+
- `gologin-web-access batch-extract <url...> --schema <schema.json> [--source auto|scraping|browser] [--retry <n>] [--backoff-ms <ms>] [--summary] [--output <path>]`
|
|
48
|
+
- `gologin-web-access search <query> [--limit <n>] [--country <cc>] [--language <lang>] [--source auto|scraping|browser]`
|
|
49
49
|
- `gologin-web-access map <url> [--limit <n>] [--max-depth <n>] [--concurrency <n>] [--strict]`
|
|
50
50
|
- `gologin-web-access crawl <url> [--format html|markdown|text|json] [--limit <n>] [--max-depth <n>] [--only-main-content] [--strict]`
|
|
51
51
|
- `gologin-web-access crawl-start <url> ...`
|
|
52
52
|
- `gologin-web-access crawl-status <jobId>`
|
|
53
53
|
- `gologin-web-access crawl-result <jobId>`
|
|
54
54
|
- `gologin-web-access crawl-errors <jobId>`
|
|
55
|
-
- `gologin-web-access extract <url> --schema <schema.json> [--source auto|
|
|
55
|
+
- `gologin-web-access extract <url> --schema <schema.json> [--source auto|scraping|browser]`
|
|
56
56
|
- `gologin-web-access change-track <url> [--format html|markdown|text|json]`
|
|
57
57
|
- `gologin-web-access batch-change-track <url...> [--format html|markdown|text|json] [--retry <n>] [--backoff-ms <ms>] [--summary] [--output <path>]`
|
|
58
58
|
- `gologin-web-access parse-document <url-or-path>`
|
|
@@ -111,7 +111,7 @@ Use these when you need state, interaction, or multi-step browser flows.
|
|
|
111
111
|
|
|
112
112
|
### GoLogin API Helpers
|
|
113
113
|
|
|
114
|
-
These commands use the GoLogin REST API directly through `GOLOGIN_TOKEN`. They do not require
|
|
114
|
+
These commands use the GoLogin REST API directly through `GOLOGIN_TOKEN`. They do not require Scraping API and do not start the browser daemon:
|
|
115
115
|
|
|
116
116
|
- `gologin-web-access cloud-usage --profile <profileId> | --workspace <workspaceId> [--days <1-30>] [--json]`
|
|
117
117
|
- `gologin-web-access profile-cloud start <profileId> [--json]`
|
|
@@ -185,7 +185,7 @@ If the browser surface grows substantially later, a nested namespace may become
|
|
|
185
185
|
|
|
186
186
|
This CLI uses two different GoLogin credentials on purpose, because the underlying products are different.
|
|
187
187
|
|
|
188
|
-
- `
|
|
188
|
+
- `GOLOGIN_SCRAPING_API_KEY`
|
|
189
189
|
Required for Scraping / Read commands.
|
|
190
190
|
- `GOLOGIN_TOKEN`
|
|
191
191
|
Required for `gologin-web-access open`, GoLogin API helper commands, and profile validation in `gologin-web-access doctor`.
|
|
@@ -194,16 +194,16 @@ This CLI uses two different GoLogin credentials on purpose, because the underlyi
|
|
|
194
194
|
- `GOLOGIN_DAEMON_PORT`
|
|
195
195
|
Optional local daemon port for browser workflows.
|
|
196
196
|
|
|
197
|
-
Recommended full setup for agents is to configure both `
|
|
197
|
+
Recommended full setup for agents is to configure both `GOLOGIN_SCRAPING_API_KEY` and `GOLOGIN_TOKEN` before starting work, even if the current task looks read-only or browser-only.
|
|
198
198
|
|
|
199
199
|
Missing-key errors are command-group specific. Example:
|
|
200
200
|
|
|
201
|
-
`Missing
|
|
201
|
+
`Missing GOLOGIN_SCRAPING_API_KEY. This is required for scraping commands like \`gologin-web-access scrape\`.`
|
|
202
202
|
|
|
203
203
|
Environment variables are the primary configuration mechanism:
|
|
204
204
|
|
|
205
205
|
```bash
|
|
206
|
-
export
|
|
206
|
+
export GOLOGIN_SCRAPING_API_KEY="wu_..."
|
|
207
207
|
export GOLOGIN_TOKEN="gl_..."
|
|
208
208
|
export GOLOGIN_DEFAULT_PROFILE_ID="profile_123"
|
|
209
209
|
export GOLOGIN_DAEMON_PORT="4590"
|
|
@@ -218,8 +218,8 @@ gologin-web-access config init
|
|
|
218
218
|
Useful variants:
|
|
219
219
|
|
|
220
220
|
```bash
|
|
221
|
-
gologin-web-access config init --
|
|
222
|
-
gologin-web-access config init --web-unlocker-key wu_... --token gl_...
|
|
221
|
+
gologin-web-access config init --scraping-api-key wu_... --token gl_...
|
|
222
|
+
gologin-web-access config init --web-unlocker-key wu_... --token gl_... # legacy alias
|
|
223
223
|
```
|
|
224
224
|
|
|
225
225
|
That writes `~/.gologin-web-access/config.json` once and the CLI will keep reading it on later runs.
|
|
@@ -266,7 +266,7 @@ npm install -g gologin-web-access
|
|
|
266
266
|
### Read A Page
|
|
267
267
|
|
|
268
268
|
```bash
|
|
269
|
-
export
|
|
269
|
+
export GOLOGIN_SCRAPING_API_KEY="wu_..."
|
|
270
270
|
|
|
271
271
|
gologin-web-access scrape https://example.com
|
|
272
272
|
gologin-web-access read https://docs.browserbase.com/features/stealth-mode
|
|
@@ -332,17 +332,17 @@ gologin-web-access snapshot -i
|
|
|
332
332
|
|
|
333
333
|
## Structured Output And Retry Controls
|
|
334
334
|
|
|
335
|
-
- `scrape-markdown` and `scrape-text` now default to `--source auto`: they start with
|
|
335
|
+
- `scrape-markdown` and `scrape-text` now default to `--source auto`: they start with Scraping API, isolate the most readable content block, and can auto-retry with Cloud Browser when the output still looks like JS-rendered docs chrome.
|
|
336
336
|
- `read` is the shortest path for "look at this docs page" work: it targets the most readable content block and defaults to `--format text --source auto`.
|
|
337
|
-
- `scrape-markdown` and `scrape-text` also accept `--source
|
|
338
|
-
- `extract` now accepts `--source auto|
|
|
337
|
+
- `scrape-markdown` and `scrape-text` also accept `--source scraping` and `--source browser` when you want to force one path. `--source unlocker` remains as a legacy alias.
|
|
338
|
+
- `extract` now accepts `--source auto|scraping|browser` and returns `renderSource`, fallback flags, and request metadata with the extracted JSON.
|
|
339
339
|
- `batch-extract` reuses the same extraction path across many URLs and returns one structured result per URL, including request and fallback metadata. Add `--output <path>` to save the full array directly.
|
|
340
340
|
- `scrape-json` now returns both a flat `headings` array and `headingsByLevel` buckets for `h1` through `h6`.
|
|
341
341
|
- `scrape-json --fallback browser` is available for JS-heavy pages where stateless extraction returns weak heading data.
|
|
342
342
|
- `scrape-json` now also classifies the page outcome as `ok`, `empty`, `incomplete`, `authwall`, `challenge`, `blocked`, or `cookie_wall`, and includes `nextActionHint` when the result is weak or gated.
|
|
343
343
|
- `scrape`, `scrape-markdown`, `scrape-text`, `scrape-json`, and `batch-scrape` accept `--retry`, `--backoff-ms`, and `--timeout-ms`.
|
|
344
344
|
- `batch-scrape --only-main-content` lets markdown, text, and html batch runs use the same readable-content isolation path as `read`.
|
|
345
|
-
- `crawl --only-main-content` uses the same readable-fragment extraction strategy for html, markdown, and text crawl output, but stays on the stateless
|
|
345
|
+
- `crawl --only-main-content` uses the same readable-fragment extraction strategy for html, markdown, and text crawl output, but stays on the stateless Scraping API path.
|
|
346
346
|
- `batch-scrape --summary` prints a one-line success/failure summary to `stderr` after the JSON payload.
|
|
347
347
|
- `batch-scrape` now returns exit code `0` on partial success by default and only fails the command when every URL failed. Add `--strict` if any single failed URL should make the whole batch exit non-zero.
|
|
348
348
|
- `batch-scrape --output <path>` writes the full JSON to disk so shells and agent consoles cannot truncate a large payload silently.
|
|
@@ -370,7 +370,7 @@ gologin-web-access jobs
|
|
|
370
370
|
|
|
371
371
|
Gologin Web Access still has two runtime layers:
|
|
372
372
|
|
|
373
|
-
-
|
|
373
|
+
- Scraping API for stateless read and extraction
|
|
374
374
|
- Cloud Browser for stateful interaction
|
|
375
375
|
|
|
376
376
|
But both are now shipped inside the same package and the same repository. One install gives you the full read layer and the full browser/session layer.
|
package/dist/cli.js
CHANGED
|
@@ -68,12 +68,12 @@ const wait_1 = require("./commands/wait");
|
|
|
68
68
|
const doctor_1 = require("./doctor");
|
|
69
69
|
const errors_1 = require("./lib/errors");
|
|
70
70
|
const output_1 = require("./lib/output");
|
|
71
|
-
const CLI_VERSION = "0.3.
|
|
71
|
+
const CLI_VERSION = "0.3.4";
|
|
72
72
|
async function main() {
|
|
73
73
|
const program = new commander_1.Command();
|
|
74
74
|
program
|
|
75
75
|
.name("gologin-web-access")
|
|
76
|
-
.description("Read and interact with the web using
|
|
76
|
+
.description("Read and interact with the web using the GoLogin Scraping API and Cloud Browser.")
|
|
77
77
|
.version(CLI_VERSION)
|
|
78
78
|
.showHelpAfterError()
|
|
79
79
|
.showSuggestionAfterError();
|
|
@@ -174,7 +174,7 @@ Command groups:
|
|
|
174
174
|
Agent: gologin-web-access run|batch|jobs|job
|
|
175
175
|
|
|
176
176
|
Key model:
|
|
177
|
-
${"
|
|
177
|
+
${"GOLOGIN_SCRAPING_API_KEY"} powers scraping commands.
|
|
178
178
|
${"GOLOGIN_TOKEN"} powers browser commands.
|
|
179
179
|
Recommended setup: configure both keys up front, even if the current task only needs one path.
|
|
180
180
|
`);
|
|
@@ -18,7 +18,7 @@ function buildBatchExtractCommand() {
|
|
|
18
18
|
.description("Extract structured data from multiple pages using one selector schema.")
|
|
19
19
|
.argument("<urls...>", "One or more URLs")
|
|
20
20
|
.requiredOption("--schema <path>", "Path to a JSON extraction schema")
|
|
21
|
-
.option("--source <source>", "Read source: auto,
|
|
21
|
+
.option("--source <source>", "Read source: auto, scraping, or browser. Legacy alias: unlocker", "auto")
|
|
22
22
|
.option("--concurrency <count>", "Number of concurrent requests", "4")
|
|
23
23
|
.option("--output <path>", "Write the full batch result JSON to a file")
|
|
24
24
|
.option("--summary", "Print one-line summary stats to stderr after the JSON output")
|
|
@@ -17,12 +17,12 @@ const unlocker_1 = require("../lib/unlocker");
|
|
|
17
17
|
const shared_1 = require("./shared");
|
|
18
18
|
function buildBatchScrapeCommand() {
|
|
19
19
|
return (0, shared_1.addProfileOption)((0, shared_1.addUnlockerRequestOptions)(new commander_1.Command("batch-scrape")
|
|
20
|
-
.description("Fetch multiple pages through
|
|
20
|
+
.description("Fetch multiple pages through Scraping API and print a JSON array of results.")
|
|
21
21
|
.argument("<urls...>", "One or more URLs")
|
|
22
22
|
.option("--format <format>", "html, markdown, text, or json", "html")
|
|
23
23
|
.option("--concurrency <count>", "Number of concurrent requests", "4")
|
|
24
24
|
.option("--fallback <mode>", "Structured scrape fallback: none or browser", "none")
|
|
25
|
-
.option("--source <source>", "Read source for --only-main-content mode: auto,
|
|
25
|
+
.option("--source <source>", "Read source for --only-main-content mode: auto, scraping, or browser. Legacy alias: unlocker", "auto")
|
|
26
26
|
.option("--only-main-content", "For html, markdown, or text formats, isolate the most readable content block per page")
|
|
27
27
|
.option("--output <path>", "Write the full batch result JSON to a file")
|
|
28
28
|
.option("--summary", "Print one-line summary stats to stderr after the JSON output")
|
|
@@ -8,9 +8,10 @@ const output_1 = require("../lib/output");
|
|
|
8
8
|
const unlocker_1 = require("../lib/unlocker");
|
|
9
9
|
function buildConfigInitCommand() {
|
|
10
10
|
return new commander_1.Command("init")
|
|
11
|
-
.description("Write ~/.gologin-web-access/config.json with current values or placeholders. Recommended: persist both the
|
|
12
|
-
.option("--
|
|
13
|
-
.option("--web-unlocker-key <key>", "
|
|
11
|
+
.description("Write ~/.gologin-web-access/config.json with current values or placeholders. Recommended: persist both the Scraping API key and the GoLogin token.")
|
|
12
|
+
.option("--scraping-api-key <key>", "Persist a Scraping API key")
|
|
13
|
+
.option("--web-unlocker-api-key <key>", "Legacy alias for --scraping-api-key")
|
|
14
|
+
.option("--web-unlocker-key <key>", "Legacy alias for --scraping-api-key")
|
|
14
15
|
.option("--token <token>", "Persist a GoLogin token")
|
|
15
16
|
.option("--cloud-token <token>", "Backward-compatible alias for --token")
|
|
16
17
|
.option("--default-profile-id <id>", "Persist a default Gologin profile ID")
|
|
@@ -18,7 +19,12 @@ function buildConfigInitCommand() {
|
|
|
18
19
|
.option("--no-validate", "Skip live key validation after writing config")
|
|
19
20
|
.option("--force", "Overwrite an existing config file")
|
|
20
21
|
.action(async (options) => {
|
|
21
|
-
const webUnlockerApiKey = options.
|
|
22
|
+
const webUnlockerApiKey = options.scrapingApiKey ??
|
|
23
|
+
options.webUnlockerApiKey ??
|
|
24
|
+
options.webUnlockerKey ??
|
|
25
|
+
process.env[config_1.ENV_NAMES.webUnlockerApiKey] ??
|
|
26
|
+
process.env.GOLOGIN_WEB_UNLOCKER_API_KEY ??
|
|
27
|
+
process.env.GOLOGIN_WEBUNLOCKER_API_KEY;
|
|
22
28
|
const result = await (0, config_1.initConfigFile)({
|
|
23
29
|
webUnlockerApiKey,
|
|
24
30
|
cloudToken: options.token ??
|
|
@@ -37,7 +43,7 @@ function buildConfigInitCommand() {
|
|
|
37
43
|
(0, output_1.printKeyValueRows)([
|
|
38
44
|
{ label: "Config file", value: result.path },
|
|
39
45
|
{
|
|
40
|
-
label: "
|
|
46
|
+
label: "Scraping API key",
|
|
41
47
|
value: result.config.webUnlockerApiKey ? "written" : "left empty",
|
|
42
48
|
},
|
|
43
49
|
{
|
|
@@ -54,7 +60,7 @@ function buildConfigInitCommand() {
|
|
|
54
60
|
},
|
|
55
61
|
]);
|
|
56
62
|
if (!result.config.webUnlockerApiKey || !result.config.cloudToken) {
|
|
57
|
-
(0, output_1.printText)("Recommended next step: configure both
|
|
63
|
+
(0, output_1.printText)("Recommended next step: configure both GOLOGIN_SCRAPING_API_KEY and GOLOGIN_TOKEN so agents can use scraping and browser flows without asking again.");
|
|
58
64
|
}
|
|
59
65
|
if (options.validate === false) {
|
|
60
66
|
return;
|
|
@@ -63,7 +69,7 @@ function buildConfigInitCommand() {
|
|
|
63
69
|
if (result.config.webUnlockerApiKey) {
|
|
64
70
|
const validation = await (0, unlocker_1.validateWebUnlockerKey)(result.config.webUnlockerApiKey);
|
|
65
71
|
validationRows.push({
|
|
66
|
-
label: "
|
|
72
|
+
label: "Scraping API validation",
|
|
67
73
|
value: validation.ok ? "ok" : `failed${validation.status ? ` (${validation.status})` : ""}: ${validation.detail}`,
|
|
68
74
|
});
|
|
69
75
|
}
|
package/dist/commands/crawl.js
CHANGED
|
@@ -7,7 +7,7 @@ const crawl_1 = require("../lib/crawl");
|
|
|
7
7
|
const output_1 = require("../lib/output");
|
|
8
8
|
function buildCrawlCommand() {
|
|
9
9
|
return new commander_1.Command("crawl")
|
|
10
|
-
.description("Crawl a website through
|
|
10
|
+
.description("Crawl a website through GoLogin Scraping API and return per-page extracted content.")
|
|
11
11
|
.argument("<url>", "Root website URL to crawl")
|
|
12
12
|
.option("--format <format>", "html, markdown, text, or json", "markdown")
|
|
13
13
|
.option("--limit <count>", "Maximum number of pages to visit", "25")
|
package/dist/commands/extract.js
CHANGED
|
@@ -18,7 +18,7 @@ function buildExtractCommand() {
|
|
|
18
18
|
.argument("<url>", "Target URL")
|
|
19
19
|
.requiredOption("--schema <path>", "Path to a JSON extraction schema")
|
|
20
20
|
.option("--output <path>", "Write extracted JSON to a file")
|
|
21
|
-
.option("--source <source>", "Read source: auto,
|
|
21
|
+
.option("--source <source>", "Read source: auto, scraping, or browser. Legacy alias: unlocker", "auto")
|
|
22
22
|
.action(async (url, options) => {
|
|
23
23
|
const config = await (0, config_1.loadConfig)();
|
|
24
24
|
const source = (0, readSource_1.normalizeReadSourceMode)(options.source, "auto");
|
package/dist/commands/map.js
CHANGED
|
@@ -7,7 +7,7 @@ const output_1 = require("../lib/output");
|
|
|
7
7
|
const crawl_1 = require("../lib/crawl");
|
|
8
8
|
function buildMapCommand() {
|
|
9
9
|
return new commander_1.Command("map")
|
|
10
|
-
.description("Discover internal website links through
|
|
10
|
+
.description("Discover internal website links through GoLogin Scraping API.")
|
|
11
11
|
.argument("<url>", "Root website URL to map")
|
|
12
12
|
.option("--limit <count>", "Maximum number of pages to visit", "100")
|
|
13
13
|
.option("--max-depth <depth>", "Maximum link depth from the root URL", "2")
|
package/dist/commands/read.js
CHANGED
|
@@ -12,7 +12,7 @@ function buildReadCommand() {
|
|
|
12
12
|
.description("Read the main content of a docs page or article with automatic fallback to Cloud Browser when needed.")
|
|
13
13
|
.argument("<url>", "URL to read")
|
|
14
14
|
.option("--format <format>", "Output format: html, markdown, or text", "text")
|
|
15
|
-
.option("--source <source>", "Read source: auto,
|
|
15
|
+
.option("--source <source>", "Read source: auto, scraping, or browser. Legacy alias: unlocker", "auto")
|
|
16
16
|
.action(async (url, options) => {
|
|
17
17
|
const config = await (0, config_1.loadConfig)();
|
|
18
18
|
const format = normalizeReadFormat(options.format);
|
package/dist/commands/scrape.js
CHANGED
|
@@ -8,7 +8,7 @@ const output_1 = require("../lib/output");
|
|
|
8
8
|
const unlocker_1 = require("../lib/unlocker");
|
|
9
9
|
function buildScrapeCommand() {
|
|
10
10
|
return (0, shared_1.addUnlockerRequestOptions)(new commander_1.Command("scrape")
|
|
11
|
-
.description("Fetch rendered HTML through
|
|
11
|
+
.description("Fetch rendered HTML through GoLogin Scraping API.")
|
|
12
12
|
.argument("<url>", "URL to scrape")
|
|
13
13
|
.action(async (url, options) => {
|
|
14
14
|
const config = await (0, config_1.loadConfig)();
|
|
@@ -8,7 +8,7 @@ const structuredScrape_1 = require("../lib/structuredScrape");
|
|
|
8
8
|
const shared_1 = require("./shared");
|
|
9
9
|
function buildScrapeJsonCommand() {
|
|
10
10
|
return (0, shared_1.addProfileOption)((0, shared_1.addUnlockerRequestOptions)(new commander_1.Command("scrape-json")
|
|
11
|
-
.description("Fetch a page through
|
|
11
|
+
.description("Fetch a page through Scraping API and print a structured JSON envelope.")
|
|
12
12
|
.argument("<url>", "URL to scrape")
|
|
13
13
|
.option("--fallback <mode>", "none or browser structured fallback for JS-heavy pages", "none")
|
|
14
14
|
.action(async (url, options) => {
|
|
@@ -9,9 +9,9 @@ const shared_1 = require("./shared");
|
|
|
9
9
|
const output_1 = require("../lib/output");
|
|
10
10
|
function buildScrapeMarkdownCommand() {
|
|
11
11
|
return (0, shared_1.addUnlockerRequestOptions)(new commander_1.Command("scrape-markdown")
|
|
12
|
-
.description("Fetch a page through
|
|
12
|
+
.description("Fetch a page through Scraping API and print Markdown.")
|
|
13
13
|
.argument("<url>", "URL to scrape")
|
|
14
|
-
.option("--source <source>", "Read source: auto,
|
|
14
|
+
.option("--source <source>", "Read source: auto, scraping, or browser. Legacy alias: unlocker", "auto")
|
|
15
15
|
.action(async (url, options) => {
|
|
16
16
|
const config = await (0, config_1.loadConfig)();
|
|
17
17
|
const source = (0, readSource_1.normalizeReadSourceMode)(options.source, "auto");
|
|
@@ -9,9 +9,9 @@ const shared_1 = require("./shared");
|
|
|
9
9
|
const output_1 = require("../lib/output");
|
|
10
10
|
function buildScrapeTextCommand() {
|
|
11
11
|
return (0, shared_1.addUnlockerRequestOptions)(new commander_1.Command("scrape-text")
|
|
12
|
-
.description("Fetch a page through
|
|
12
|
+
.description("Fetch a page through Scraping API and print plain text.")
|
|
13
13
|
.argument("<url>", "URL to scrape")
|
|
14
|
-
.option("--source <source>", "Read source: auto,
|
|
14
|
+
.option("--source <source>", "Read source: auto, scraping, or browser. Legacy alias: unlocker", "auto")
|
|
15
15
|
.action(async (url, options) => {
|
|
16
16
|
const config = await (0, config_1.loadConfig)();
|
|
17
17
|
const source = (0, readSource_1.normalizeReadSourceMode)(options.source, "auto");
|
package/dist/commands/search.js
CHANGED
|
@@ -12,7 +12,7 @@ function buildSearchCommand() {
|
|
|
12
12
|
.option("--limit <count>", "Maximum number of results", "10")
|
|
13
13
|
.option("--country <country>", "Country code for Google search", "us")
|
|
14
14
|
.option("--language <language>", "Language for Google search", "en")
|
|
15
|
-
.option("--source <mode>", "Search path: auto,
|
|
15
|
+
.option("--source <mode>", "Search path: auto, scraping, or browser. Legacy alias: unlocker", "auto")
|
|
16
16
|
.action(async (query, options) => {
|
|
17
17
|
const config = await (0, config_1.loadConfig)();
|
|
18
18
|
const result = await (0, search_1.searchWeb)(query, config, {
|
|
@@ -32,6 +32,9 @@ function normalizeLimit(value) {
|
|
|
32
32
|
return Math.min(Math.floor(parsed), 100);
|
|
33
33
|
}
|
|
34
34
|
function normalizeSource(value) {
|
|
35
|
+
if (value === "scraping" || value === "scraping-api") {
|
|
36
|
+
return "unlocker";
|
|
37
|
+
}
|
|
35
38
|
if (value === "auto" || value === "unlocker" || value === "browser") {
|
|
36
39
|
return value;
|
|
37
40
|
}
|
package/dist/config.js
CHANGED
|
@@ -22,13 +22,13 @@ const LEGACY_CONFIG_DIR = ".gologin-web";
|
|
|
22
22
|
const CONFIG_FILENAME = "config.json";
|
|
23
23
|
exports.DEFAULT_DAEMON_PORT = 4590;
|
|
24
24
|
exports.ENV_NAMES = {
|
|
25
|
-
webUnlockerApiKey: "
|
|
25
|
+
webUnlockerApiKey: "GOLOGIN_SCRAPING_API_KEY",
|
|
26
26
|
cloudToken: "GOLOGIN_TOKEN",
|
|
27
27
|
defaultProfileId: "GOLOGIN_DEFAULT_PROFILE_ID",
|
|
28
28
|
daemonPort: "GOLOGIN_DAEMON_PORT",
|
|
29
29
|
};
|
|
30
30
|
const LEGACY_ENV_NAMES = {
|
|
31
|
-
webUnlockerApiKey: ["GOLOGIN_WEBUNLOCKER_API_KEY"],
|
|
31
|
+
webUnlockerApiKey: ["GOLOGIN_WEB_UNLOCKER_API_KEY", "GOLOGIN_WEBUNLOCKER_API_KEY"],
|
|
32
32
|
cloudToken: ["GOLOGIN_CLOUD_TOKEN"],
|
|
33
33
|
defaultProfileId: ["GOLOGIN_PROFILE_ID"],
|
|
34
34
|
daemonPort: [],
|
|
@@ -119,7 +119,7 @@ function getRecommendedCredentialStatus(config) {
|
|
|
119
119
|
return {
|
|
120
120
|
ready: true,
|
|
121
121
|
missing,
|
|
122
|
-
detail: "complete (
|
|
122
|
+
detail: "complete (Scraping API + Cloud Browser configured)",
|
|
123
123
|
};
|
|
124
124
|
}
|
|
125
125
|
return {
|
package/dist/doctor.js
CHANGED
|
@@ -12,7 +12,7 @@ async function runDoctor(options = {}) {
|
|
|
12
12
|
const agentCli = await (0, agentCli_1.inspectAgentCli)();
|
|
13
13
|
const recommended = (0, config_1.getRecommendedCredentialStatus)(config);
|
|
14
14
|
checks.push({
|
|
15
|
-
name: "
|
|
15
|
+
name: "Scraping API key",
|
|
16
16
|
status: config.webUnlockerApiKey ? "ok" : "warn",
|
|
17
17
|
detail: config.webUnlockerApiKey ? `configured via ${config.sources.webUnlockerApiKey}` : "missing",
|
|
18
18
|
});
|
|
@@ -25,7 +25,7 @@ async function runDoctor(options = {}) {
|
|
|
25
25
|
name: "Recommended full setup",
|
|
26
26
|
status: recommended.ready ? "ok" : "warn",
|
|
27
27
|
detail: recommended.ready
|
|
28
|
-
? "both
|
|
28
|
+
? "both GOLOGIN_SCRAPING_API_KEY and GOLOGIN_TOKEN are configured"
|
|
29
29
|
: `missing ${recommended.missing.join(" and ")}`,
|
|
30
30
|
});
|
|
31
31
|
checks.push({
|
package/dist/lib/errors.js
CHANGED
|
@@ -26,7 +26,7 @@ class MissingCredentialError extends CliError {
|
|
|
26
26
|
constructor(envName, commandGroup) {
|
|
27
27
|
super(`Missing ${envName}. This is required for ${commandGroup}.`, 1, [
|
|
28
28
|
"This CLI only reads credentials from environment variables or ~/.gologin-web-access/config.json.",
|
|
29
|
-
"Recommended setup: configure both
|
|
29
|
+
"Recommended setup: configure both GOLOGIN_SCRAPING_API_KEY and GOLOGIN_TOKEN up front so agents do not stop to ask again. GOLOGIN_WEB_UNLOCKER_API_KEY is still accepted as a legacy alias.",
|
|
30
30
|
`Set ${envName} in your environment or add it to ~/.gologin-web-access/config.json.`,
|
|
31
31
|
"Helpful commands: gologin-web-access config init, gologin-web-access config show, gologin-web-access doctor.",
|
|
32
32
|
].join("\n"));
|
package/dist/lib/readSource.js
CHANGED
|
@@ -16,6 +16,9 @@ function normalizeReadSourceMode(value, defaultMode = "auto") {
|
|
|
16
16
|
if (!value) {
|
|
17
17
|
return defaultMode;
|
|
18
18
|
}
|
|
19
|
+
if (value === "scraping" || value === "scraping-api") {
|
|
20
|
+
return "unlocker";
|
|
21
|
+
}
|
|
19
22
|
if (value === "auto" || value === "unlocker" || value === "browser") {
|
|
20
23
|
return value;
|
|
21
24
|
}
|
package/dist/lib/search.js
CHANGED
|
@@ -294,7 +294,7 @@ function classifySearchPage(engine, html, results) {
|
|
|
294
294
|
}
|
|
295
295
|
async function searchViaUnlocker(query, config, options, engine) {
|
|
296
296
|
if (!config.webUnlockerApiKey) {
|
|
297
|
-
throw new errors_1.CliError("Missing
|
|
297
|
+
throw new errors_1.CliError("Missing GOLOGIN_SCRAPING_API_KEY for Scraping API search.");
|
|
298
298
|
}
|
|
299
299
|
const searchUrl = buildSearchUrl(engine, query, options);
|
|
300
300
|
const scraped = await (0, unlocker_1.scrapeRenderedHtml)(searchUrl, config.webUnlockerApiKey);
|
package/dist/lib/unlocker.js
CHANGED
|
@@ -44,7 +44,7 @@ class WebUnlockerClient {
|
|
|
44
44
|
});
|
|
45
45
|
if (!response.ok) {
|
|
46
46
|
const body = await safeReadText(response, this.timeoutMs);
|
|
47
|
-
throw new errors_1.HttpError(`
|
|
47
|
+
throw new errors_1.HttpError(`Scraping API request failed with status ${response.status}.`, response.status, body ? truncate(body, 300) : undefined);
|
|
48
48
|
}
|
|
49
49
|
const content = await readResponseTextWithTimeout(response, this.timeoutMs);
|
|
50
50
|
return {
|
|
@@ -132,7 +132,7 @@ async function fetchWithRetry(url, options) {
|
|
|
132
132
|
};
|
|
133
133
|
}
|
|
134
134
|
const body = await safeReadText(response, options.timeoutMs);
|
|
135
|
-
const error = new errors_1.HttpError(`
|
|
135
|
+
const error = new errors_1.HttpError(`Scraping API request failed with status ${response.status}.`, response.status, body ? truncate(body, 300) : undefined);
|
|
136
136
|
const retriable = attempt < options.maxRetries && isRetriableStatus(response.status);
|
|
137
137
|
attempts.push({
|
|
138
138
|
attempt: attempt + 1,
|
|
@@ -154,10 +154,10 @@ async function fetchWithRetry(url, options) {
|
|
|
154
154
|
throw error;
|
|
155
155
|
}
|
|
156
156
|
const normalizedError = error instanceof Error && error.name === "AbortError"
|
|
157
|
-
? new errors_1.HttpError("
|
|
157
|
+
? new errors_1.HttpError("Scraping API request timed out.", 408)
|
|
158
158
|
: error instanceof Error
|
|
159
159
|
? new errors_1.HttpError(error.message, 500)
|
|
160
|
-
: new errors_1.HttpError("
|
|
160
|
+
: new errors_1.HttpError("Scraping API request failed.", 500);
|
|
161
161
|
const retriable = attempt < options.maxRetries;
|
|
162
162
|
attempts.push({
|
|
163
163
|
attempt: attempt + 1,
|
|
@@ -175,11 +175,11 @@ async function fetchWithRetry(url, options) {
|
|
|
175
175
|
throw attachRequestMeta(lastStatusError, attempts);
|
|
176
176
|
}
|
|
177
177
|
if (lastError instanceof Error && lastError.name === "AbortError") {
|
|
178
|
-
throw attachRequestMeta(new errors_1.HttpError("
|
|
178
|
+
throw attachRequestMeta(new errors_1.HttpError("Scraping API request timed out.", 408), attempts);
|
|
179
179
|
}
|
|
180
180
|
throw attachRequestMeta(lastError instanceof Error
|
|
181
181
|
? new errors_1.HttpError(lastError.message, 500)
|
|
182
|
-
: new errors_1.HttpError("
|
|
182
|
+
: new errors_1.HttpError("Scraping API request failed.", 500), attempts);
|
|
183
183
|
}
|
|
184
184
|
function buildScrapeRequestMeta(attempts) {
|
|
185
185
|
return {
|
|
@@ -230,7 +230,7 @@ async function readResponseTextWithTimeout(response, timeoutMs) {
|
|
|
230
230
|
new Promise((_, reject) => {
|
|
231
231
|
timer = setTimeout(() => {
|
|
232
232
|
void response.body?.cancel().catch(() => undefined);
|
|
233
|
-
reject(new errors_1.HttpError("
|
|
233
|
+
reject(new errors_1.HttpError("Scraping API response body timed out.", 408));
|
|
234
234
|
}, timeoutMs);
|
|
235
235
|
}),
|
|
236
236
|
]);
|
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "gologin-web-access",
|
|
3
|
-
"version": "0.3.
|
|
4
|
-
"description": "Unified web access CLI for developers and AI agents to read and interact with the web using
|
|
3
|
+
"version": "0.3.4",
|
|
4
|
+
"description": "Unified web access CLI for developers and AI agents to read and interact with the web using the GoLogin Scraping API and Cloud Browser.",
|
|
5
5
|
"main": "dist/cli.js",
|
|
6
6
|
"bin": {
|
|
7
7
|
"gologin-web-access": "dist/cli.js"
|
|
@@ -39,6 +39,7 @@
|
|
|
39
39
|
"gologin",
|
|
40
40
|
"web-access",
|
|
41
41
|
"cli",
|
|
42
|
+
"scraping-api",
|
|
42
43
|
"webunlocker",
|
|
43
44
|
"scraping",
|
|
44
45
|
"cloud-browser",
|