gologin-web-access 0.3.0 → 0.3.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +7 -1
- package/README.md +25 -10
- package/dist/cli.js +8 -2
- package/dist/commands/batchScrape.js +121 -14
- package/dist/commands/close.js +4 -0
- package/dist/commands/configInit.js +10 -6
- package/dist/commands/read.js +18 -8
- package/dist/commands/scrapeJson.js +9 -0
- package/dist/commands/scrapeMarkdown.js +18 -8
- package/dist/commands/scrapeText.js +18 -8
- package/dist/commands/sessions.js +11 -2
- package/dist/config.js +2 -2
- package/dist/doctor.js +3 -3
- package/dist/internal-agent/commands/close.js +10 -0
- package/dist/internal-agent/commands/sessions.js +19 -1
- package/dist/internal-agent/daemon/server.js +9 -0
- package/dist/internal-agent/daemon/sessionManager.js +93 -24
- package/dist/internal-agent/lib/utils.js +1 -1
- package/dist/lib/browserRead.js +1 -1
- package/dist/lib/browserStructured.js +1 -1
- package/dist/lib/errors.js +30 -2
- package/dist/lib/extractRunner.js +4 -0
- package/dist/lib/output.js +3 -0
- package/dist/lib/pageOutcome.js +192 -0
- package/dist/lib/readSource.js +41 -3
- package/dist/lib/search.js +1 -1
- package/dist/lib/structuredScrape.js +79 -7
- package/package.json +1 -1
package/CHANGELOG.md
CHANGED
|
@@ -5,6 +5,12 @@
|
|
|
5
5
|
- browser automation is now embedded directly in `gologin-web-access`, so one repo and one install contains both Web Unlocker and Cloud Browser flows
|
|
6
6
|
- doctor now reports the embedded browser runtime source and version
|
|
7
7
|
|
|
8
|
+
## 0.3.2 - 2026-04-03
|
|
9
|
+
|
|
10
|
+
- added unified page outcome classification across `read`, `scrape-json`, and `batch-scrape`
|
|
11
|
+
- structured and readable paths now distinguish `ok`, `empty`, `incomplete`, `authwall`, `challenge`, `blocked`, and `cookie_wall`
|
|
12
|
+
- batch and extract-oriented flows now propagate next-step hints and fallback metadata more consistently for agents
|
|
13
|
+
|
|
8
14
|
## 0.1.0 - 2026-03-10
|
|
9
15
|
|
|
10
16
|
Initial public release of Gologin Web Access.
|
|
@@ -14,6 +20,6 @@ Highlights:
|
|
|
14
20
|
- Unified CLI entry point for Gologin Web Unlocker and Gologin Cloud Browser workflows
|
|
15
21
|
- Scraping commands: `scrape`, `scrape-markdown`, `scrape-text`, `scrape-json`, `batch-scrape`
|
|
16
22
|
- Browser commands: `open`, `snapshot`, `click`, `type`, `screenshot`, `close`, `sessions`, `current`
|
|
17
|
-
- Clear two-key configuration model with `GOLOGIN_WEB_UNLOCKER_API_KEY` and `
|
|
23
|
+
- Clear two-key configuration model with `GOLOGIN_WEB_UNLOCKER_API_KEY` and `GOLOGIN_TOKEN`
|
|
18
24
|
- `doctor`, `config show`, and `config init` to reduce setup friction
|
|
19
25
|
- Compatibility support for legacy env names used by existing Gologin tools
|
package/README.md
CHANGED
|
@@ -26,6 +26,13 @@ The point of the unified CLI is that both modes live in one product with one com
|
|
|
26
26
|
|
|
27
27
|
## Command Groups
|
|
28
28
|
|
|
29
|
+
### Quick Picks
|
|
30
|
+
|
|
31
|
+
- `read` for "read this docs page/article" or "tell me what is on this page"
|
|
32
|
+
- `scrape-text` for plain text from one known page when you do not need headings/links metadata
|
|
33
|
+
- `scrape-json` for structured title, description, headings, and links from one known page
|
|
34
|
+
- `batch-scrape` for many known URLs at once; add `--output <path>` when the JSON may be large and add `--strict` only if partial success should fail the command
|
|
35
|
+
|
|
29
36
|
### Scraping / Read
|
|
30
37
|
|
|
31
38
|
These commands use Gologin Web Unlocker:
|
|
@@ -35,7 +42,7 @@ These commands use Gologin Web Unlocker:
|
|
|
35
42
|
- `gologin-web-access scrape-markdown <url> [--source auto|unlocker|browser]`
|
|
36
43
|
- `gologin-web-access scrape-text <url> [--source auto|unlocker|browser]`
|
|
37
44
|
- `gologin-web-access scrape-json <url> [--fallback none|browser]`
|
|
38
|
-
- `gologin-web-access batch-scrape <url...> [--format html|markdown|text|json] [--fallback none|browser] [--source auto|unlocker|browser] [--only-main-content] [--retry <n>] [--backoff-ms <ms>] [--summary]`
|
|
45
|
+
- `gologin-web-access batch-scrape <url...> [--format html|markdown|text|json] [--fallback none|browser] [--source auto|unlocker|browser] [--only-main-content] [--retry <n>] [--backoff-ms <ms>] [--summary] [--output <path>] [--strict]`
|
|
39
46
|
- `gologin-web-access batch-extract <url...> --schema <schema.json> [--source auto|unlocker|browser] [--retry <n>] [--backoff-ms <ms>] [--summary] [--output <path>]`
|
|
40
47
|
- `gologin-web-access search <query> [--limit <n>] [--country <cc>] [--language <lang>] [--source auto|unlocker|browser]`
|
|
41
48
|
- `gologin-web-access map <url> [--limit <n>] [--max-depth <n>] [--concurrency <n>] [--strict]`
|
|
@@ -104,6 +111,9 @@ Use these when you need state, interaction, or multi-step browser flows.
|
|
|
104
111
|
## When To Use `scrape` vs `browser`
|
|
105
112
|
|
|
106
113
|
- Use `scrape` commands when you need page content, extracted text, markdown, or simple structured output.
|
|
114
|
+
- Use `read` as the default for docs and article reading when you want one high-level main-content command rather than choosing HTML/text/markdown yourself.
|
|
115
|
+
- Use `scrape-text` when you already know you want plain text.
|
|
116
|
+
- Use `scrape-json` when you want structured metadata and headings instead of full prose.
|
|
107
117
|
- Use `search` when you need web discovery or SERP results before deciding what to scrape. It now tries multiple search paths automatically, validates that the response is a real SERP, and reuses a short local cache for repeated queries.
|
|
108
118
|
- Use `map` when you need internal link discovery or a site inventory.
|
|
109
119
|
- Use `crawl` when you need multi-page read-only extraction across a site.
|
|
@@ -153,18 +163,18 @@ If the browser surface grows substantially later, a nested namespace may become
|
|
|
153
163
|
|
|
154
164
|
## Credentials And Config
|
|
155
165
|
|
|
156
|
-
This CLI uses two different
|
|
166
|
+
This CLI uses two different GoLogin credentials on purpose, because the underlying products are different.
|
|
157
167
|
|
|
158
168
|
- `GOLOGIN_WEB_UNLOCKER_API_KEY`
|
|
159
169
|
Required for Scraping / Read commands.
|
|
160
|
-
- `
|
|
170
|
+
- `GOLOGIN_TOKEN`
|
|
161
171
|
Required for `gologin-web-access open` and for profile validation in `gologin-web-access doctor`.
|
|
162
172
|
- `GOLOGIN_DEFAULT_PROFILE_ID`
|
|
163
173
|
Optional default profile for browser flows.
|
|
164
174
|
- `GOLOGIN_DAEMON_PORT`
|
|
165
175
|
Optional local daemon port for browser workflows.
|
|
166
176
|
|
|
167
|
-
Recommended full setup for agents is to configure both `GOLOGIN_WEB_UNLOCKER_API_KEY` and `
|
|
177
|
+
Recommended full setup for agents is to configure both `GOLOGIN_WEB_UNLOCKER_API_KEY` and `GOLOGIN_TOKEN` before starting work, even if the current task looks read-only or browser-only.
|
|
168
178
|
|
|
169
179
|
Missing-key errors are command-group specific. Example:
|
|
170
180
|
|
|
@@ -174,7 +184,7 @@ Environment variables are the primary configuration mechanism:
|
|
|
174
184
|
|
|
175
185
|
```bash
|
|
176
186
|
export GOLOGIN_WEB_UNLOCKER_API_KEY="wu_..."
|
|
177
|
-
export
|
|
187
|
+
export GOLOGIN_TOKEN="gl_..."
|
|
178
188
|
export GOLOGIN_DEFAULT_PROFILE_ID="profile_123"
|
|
179
189
|
export GOLOGIN_DAEMON_PORT="4590"
|
|
180
190
|
```
|
|
@@ -188,8 +198,8 @@ gologin-web-access config init
|
|
|
188
198
|
Useful variants:
|
|
189
199
|
|
|
190
200
|
```bash
|
|
191
|
-
gologin-web-access config init --web-unlocker-api-key wu_... --
|
|
192
|
-
gologin-web-access config init --web-unlocker-key wu_... --
|
|
201
|
+
gologin-web-access config init --web-unlocker-api-key wu_... --token gl_...
|
|
202
|
+
gologin-web-access config init --web-unlocker-key wu_... --token gl_...
|
|
193
203
|
```
|
|
194
204
|
|
|
195
205
|
That writes `~/.gologin-web-access/config.json` once and the CLI will keep reading it on later runs.
|
|
@@ -211,7 +221,7 @@ Gologin Web Access will also read the older path `~/.gologin-web/config.json` if
|
|
|
211
221
|
Backward-compatible aliases are also accepted for existing setups:
|
|
212
222
|
|
|
213
223
|
- `GOLOGIN_WEBUNLOCKER_API_KEY`
|
|
214
|
-
- `
|
|
224
|
+
- `GOLOGIN_CLOUD_TOKEN`
|
|
215
225
|
- `GOLOGIN_PROFILE_ID`
|
|
216
226
|
|
|
217
227
|
Useful config commands:
|
|
@@ -259,7 +269,7 @@ gologin-web-access parse-document ./example.pdf
|
|
|
259
269
|
### Interact With A Site
|
|
260
270
|
|
|
261
271
|
```bash
|
|
262
|
-
export
|
|
272
|
+
export GOLOGIN_TOKEN="gl_..."
|
|
263
273
|
export GOLOGIN_DEFAULT_PROFILE_ID="profile_123"
|
|
264
274
|
|
|
265
275
|
gologin-web-access open https://example.com
|
|
@@ -280,7 +290,7 @@ gologin-web-access close
|
|
|
280
290
|
### Search In A Real Browser
|
|
281
291
|
|
|
282
292
|
```bash
|
|
283
|
-
export
|
|
293
|
+
export GOLOGIN_TOKEN="gl_..."
|
|
284
294
|
|
|
285
295
|
gologin-web-access search-browser "gologin antidetect browser"
|
|
286
296
|
gologin-web-access snapshot -i
|
|
@@ -295,11 +305,16 @@ gologin-web-access snapshot -i
|
|
|
295
305
|
- `batch-extract` reuses the same extraction path across many URLs and returns one structured result per URL, including request and fallback metadata. Add `--output <path>` to save the full array directly.
|
|
296
306
|
- `scrape-json` now returns both a flat `headings` array and `headingsByLevel` buckets for `h1` through `h6`.
|
|
297
307
|
- `scrape-json --fallback browser` is available for JS-heavy pages where stateless extraction returns weak heading data.
|
|
308
|
+
- `scrape-json` now also classifies the page outcome as `ok`, `empty`, `incomplete`, `authwall`, `challenge`, `blocked`, or `cookie_wall`, and includes `nextActionHint` when the result is weak or gated.
|
|
298
309
|
- `scrape`, `scrape-markdown`, `scrape-text`, `scrape-json`, and `batch-scrape` accept `--retry`, `--backoff-ms`, and `--timeout-ms`.
|
|
299
310
|
- `batch-scrape --only-main-content` lets markdown, text, and html batch runs use the same readable-content isolation path as `read`.
|
|
300
311
|
- `crawl --only-main-content` uses the same readable-fragment extraction strategy for html, markdown, and text crawl output, but stays on the stateless unlocker path.
|
|
301
312
|
- `batch-scrape --summary` prints a one-line success/failure summary to `stderr` after the JSON payload.
|
|
313
|
+
- `batch-scrape` now returns exit code `0` on partial success by default and only fails the command when every URL failed. Add `--strict` if any single failed URL should make the whole batch exit non-zero.
|
|
314
|
+
- `batch-scrape --output <path>` writes the full JSON to disk so shells and agent consoles cannot truncate a large payload silently.
|
|
302
315
|
- `batch-scrape --format json` now returns the same structured scrape envelope as `scrape-json`, including `renderSource`, `fallbackAttempted`, `fallbackUsed`, and `request.attemptCount/retryCount/attempts`.
|
|
316
|
+
- `batch-scrape --only-main-content` now propagates `outcome`, `outcomeReason`, `nextActionHint`, and fallback metadata per URL so agents can tell "weak page" from "gated page" without scraping log text.
|
|
317
|
+
- `scrape-json` now surfaces explicit `BLOCKED_PAGE` failures when structured output clearly matches a challenge or block page, instead of silently looking like a valid empty result.
|
|
303
318
|
- `search` now returns `requestedLimit`, `returnedCount`, `warnings`, `cacheTtlMs`, and per-result `position`.
|
|
304
319
|
- `search` may return fewer results than the requested `--limit` when the upstream SERP contains fewer valid results; inspect `returnedCount`, `warnings`, and `attempts`.
|
|
305
320
|
- `change-track` now accepts `--retry`, `--backoff-ms`, and `--timeout-ms`, and JSON output includes request metadata.
|
package/dist/cli.js
CHANGED
|
@@ -67,7 +67,7 @@ const wait_1 = require("./commands/wait");
|
|
|
67
67
|
const doctor_1 = require("./doctor");
|
|
68
68
|
const errors_1 = require("./lib/errors");
|
|
69
69
|
const output_1 = require("./lib/output");
|
|
70
|
-
const CLI_VERSION = "0.3.
|
|
70
|
+
const CLI_VERSION = "0.3.2";
|
|
71
71
|
async function main() {
|
|
72
72
|
const program = new commander_1.Command();
|
|
73
73
|
program
|
|
@@ -154,6 +154,12 @@ async function main() {
|
|
|
154
154
|
configGroup.addCommand((0, configShow_1.buildConfigShowCommand)());
|
|
155
155
|
configGroup.addCommand((0, configInit_1.buildConfigInitCommand)());
|
|
156
156
|
program.addHelpText("after", `
|
|
157
|
+
Quick picks:
|
|
158
|
+
read Best default for "read this docs page/article" and main-content extraction
|
|
159
|
+
scrape-text Plain text from one known page when you do not need headings/links metadata
|
|
160
|
+
scrape-json Structured title, description, headings, and links from one known page
|
|
161
|
+
batch-scrape Fetch many known URLs at once; add --output <path> for large results and --strict only when partial success should fail the command
|
|
162
|
+
|
|
157
163
|
Command groups:
|
|
158
164
|
Scraping: gologin-web-access scrape|read|scrape-markdown|scrape-text|scrape-json|batch-scrape|batch-extract|search|map|crawl|crawl-start|crawl-status|crawl-result|crawl-errors|extract|change-track|batch-change-track|parse-document
|
|
159
165
|
Browser: gologin-web-access open|search-browser|scrape-screenshot|tabs|tabopen|tabfocus|tabclose|snapshot|click|dblclick|focus|type|fill|hover|select|check|uncheck|press|scroll|scrollintoview|wait|get|back|forward|reload|find|cookies|cookies-import|cookies-clear|storage-export|storage-import|storage-clear|eval|upload|pdf|screenshot|close|sessions|current
|
|
@@ -161,7 +167,7 @@ Command groups:
|
|
|
161
167
|
|
|
162
168
|
Key model:
|
|
163
169
|
${"GOLOGIN_WEB_UNLOCKER_API_KEY"} powers scraping commands.
|
|
164
|
-
${"
|
|
170
|
+
${"GOLOGIN_TOKEN"} powers browser commands.
|
|
165
171
|
Recommended setup: configure both keys up front, even if the current task only needs one path.
|
|
166
172
|
`);
|
|
167
173
|
await program.parseAsync(process.argv);
|
|
@@ -1,6 +1,13 @@
|
|
|
1
1
|
"use strict";
|
|
2
|
+
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
3
|
+
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
4
|
+
};
|
|
2
5
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
6
|
exports.buildBatchScrapeCommand = buildBatchScrapeCommand;
|
|
7
|
+
exports.resolveBatchScrapeExitCode = resolveBatchScrapeExitCode;
|
|
8
|
+
exports.shouldWarnAboutLargeBatchOutput = shouldWarnAboutLargeBatchOutput;
|
|
9
|
+
const fs_1 = require("fs");
|
|
10
|
+
const path_1 = __importDefault(require("path"));
|
|
4
11
|
const commander_1 = require("commander");
|
|
5
12
|
const config_1 = require("../config");
|
|
6
13
|
const output_1 = require("../lib/output");
|
|
@@ -17,7 +24,9 @@ function buildBatchScrapeCommand() {
|
|
|
17
24
|
.option("--fallback <mode>", "Structured scrape fallback: none or browser", "none")
|
|
18
25
|
.option("--source <source>", "Read source for --only-main-content mode: auto, unlocker, or browser", "auto")
|
|
19
26
|
.option("--only-main-content", "For html, markdown, or text formats, isolate the most readable content block per page")
|
|
27
|
+
.option("--output <path>", "Write the full batch result JSON to a file")
|
|
20
28
|
.option("--summary", "Print one-line summary stats to stderr after the JSON output")
|
|
29
|
+
.option("--strict", "Exit non-zero if any URL in the batch fails")
|
|
21
30
|
.action(async (urls, options) => {
|
|
22
31
|
const config = await (0, config_1.loadConfig)();
|
|
23
32
|
const format = normalizeFormat(options.format);
|
|
@@ -38,7 +47,16 @@ function buildBatchScrapeCommand() {
|
|
|
38
47
|
url,
|
|
39
48
|
ok: true,
|
|
40
49
|
format,
|
|
41
|
-
output,
|
|
50
|
+
output: output.output,
|
|
51
|
+
outcome: output.outcome,
|
|
52
|
+
outcomeReason: output.outcomeReason,
|
|
53
|
+
nextActionHint: output.nextActionHint,
|
|
54
|
+
renderSource: output.renderSource,
|
|
55
|
+
fallbackAttempted: output.fallbackAttempted,
|
|
56
|
+
fallbackUsed: output.fallbackUsed,
|
|
57
|
+
fallbackReason: output.fallbackReason,
|
|
58
|
+
warning: output.warning,
|
|
59
|
+
request: output.request,
|
|
42
60
|
};
|
|
43
61
|
}
|
|
44
62
|
catch (error) {
|
|
@@ -48,18 +66,31 @@ function buildBatchScrapeCommand() {
|
|
|
48
66
|
ok: false,
|
|
49
67
|
format,
|
|
50
68
|
error: error instanceof Error ? error.message : "Unknown error",
|
|
69
|
+
code: extractErrorCode(error),
|
|
51
70
|
status: extractStatusCode(error),
|
|
71
|
+
outcome: extractOutcome(error),
|
|
72
|
+
nextActionHint: extractNextActionHint(error),
|
|
52
73
|
request,
|
|
53
74
|
};
|
|
54
75
|
}
|
|
55
76
|
});
|
|
56
|
-
|
|
77
|
+
const payload = `${JSON.stringify(results, null, 2)}\n`;
|
|
78
|
+
if (options.output) {
|
|
79
|
+
const outputPath = (0, shared_1.resolveOutputPath)(options.output);
|
|
80
|
+
await fs_1.promises.mkdir(path_1.default.dirname(outputPath), { recursive: true });
|
|
81
|
+
await fs_1.promises.writeFile(outputPath, payload, "utf8");
|
|
82
|
+
(0, output_1.printText)(outputPath);
|
|
83
|
+
}
|
|
84
|
+
else {
|
|
85
|
+
(0, output_1.printText)(payload);
|
|
86
|
+
if (shouldWarnAboutLargeBatchOutput(payload)) {
|
|
87
|
+
process.stderr.write("Batch output is large. If your shell or agent truncates stdout, rerun with --output <path> to keep the full JSON.\n");
|
|
88
|
+
}
|
|
89
|
+
}
|
|
57
90
|
if (options.summary) {
|
|
58
91
|
process.stderr.write(formatBatchSummary(results) + "\n");
|
|
59
92
|
}
|
|
60
|
-
|
|
61
|
-
process.exitCode = 1;
|
|
62
|
-
}
|
|
93
|
+
process.exitCode = resolveBatchScrapeExitCode(results, Boolean(options.strict));
|
|
63
94
|
})));
|
|
64
95
|
}
|
|
65
96
|
function normalizeFormat(value) {
|
|
@@ -77,31 +108,67 @@ async function formatOutput(url, config, apiKey, format, requestOptions, fallbac
|
|
|
77
108
|
};
|
|
78
109
|
switch (format) {
|
|
79
110
|
case "html":
|
|
80
|
-
return (await (0, readSource_1.readHtmlContent)(url, config, apiKey, readOptions))
|
|
111
|
+
return mapReadableBatchResult(await (0, readSource_1.readHtmlContent)(url, config, apiKey, readOptions));
|
|
81
112
|
case "markdown":
|
|
82
|
-
return (await (0, readSource_1.readMarkdownContent)(url, config, apiKey, readOptions))
|
|
113
|
+
return mapReadableBatchResult(await (0, readSource_1.readMarkdownContent)(url, config, apiKey, readOptions));
|
|
83
114
|
case "text":
|
|
84
|
-
return (await (0, readSource_1.readTextContent)(url, config, apiKey, readOptions))
|
|
115
|
+
return mapReadableBatchResult(await (0, readSource_1.readTextContent)(url, config, apiKey, readOptions));
|
|
85
116
|
default:
|
|
86
117
|
break;
|
|
87
118
|
}
|
|
88
119
|
}
|
|
89
120
|
switch (format) {
|
|
90
121
|
case "html":
|
|
91
|
-
return
|
|
122
|
+
return {
|
|
123
|
+
output: (await (0, unlocker_1.scrapeRenderedHtml)(url, apiKey, requestOptions)).content,
|
|
124
|
+
};
|
|
92
125
|
case "markdown":
|
|
93
|
-
return
|
|
126
|
+
return {
|
|
127
|
+
output: (await (0, unlocker_1.scrapeMarkdown)(url, apiKey, requestOptions)).markdown,
|
|
128
|
+
};
|
|
94
129
|
case "text":
|
|
95
|
-
return
|
|
130
|
+
return {
|
|
131
|
+
output: (await (0, unlocker_1.scrapeText)(url, apiKey, requestOptions)).text,
|
|
132
|
+
};
|
|
96
133
|
case "json":
|
|
97
|
-
return await (0, structuredScrape_1.scrapeStructuredJson)(url, config, apiKey, {
|
|
134
|
+
return mapStructuredBatchResult(await (0, structuredScrape_1.scrapeStructuredJson)(url, config, apiKey, {
|
|
98
135
|
fallback,
|
|
99
136
|
request: requestOptions,
|
|
100
|
-
});
|
|
137
|
+
}));
|
|
101
138
|
default:
|
|
102
|
-
return
|
|
139
|
+
return {
|
|
140
|
+
output: (await (0, unlocker_1.scrapeRenderedHtml)(url, apiKey, requestOptions)).content,
|
|
141
|
+
};
|
|
103
142
|
}
|
|
104
143
|
}
|
|
144
|
+
function mapReadableBatchResult(result) {
|
|
145
|
+
return {
|
|
146
|
+
output: result.content,
|
|
147
|
+
outcome: result.outcome,
|
|
148
|
+
outcomeReason: result.outcomeReason,
|
|
149
|
+
nextActionHint: result.nextActionHint,
|
|
150
|
+
renderSource: result.renderSource,
|
|
151
|
+
fallbackAttempted: result.fallbackAttempted,
|
|
152
|
+
fallbackUsed: result.fallbackUsed,
|
|
153
|
+
fallbackReason: result.fallbackReason,
|
|
154
|
+
warning: result.warning,
|
|
155
|
+
request: result.request,
|
|
156
|
+
};
|
|
157
|
+
}
|
|
158
|
+
function mapStructuredBatchResult(result) {
|
|
159
|
+
return {
|
|
160
|
+
output: result,
|
|
161
|
+
outcome: result.outcome,
|
|
162
|
+
outcomeReason: result.outcomeReason,
|
|
163
|
+
nextActionHint: result.nextActionHint,
|
|
164
|
+
renderSource: result.renderSource,
|
|
165
|
+
fallbackAttempted: result.fallbackAttempted,
|
|
166
|
+
fallbackUsed: result.fallbackUsed,
|
|
167
|
+
fallbackReason: result.fallbackReason,
|
|
168
|
+
warning: result.warning,
|
|
169
|
+
request: result.request,
|
|
170
|
+
};
|
|
171
|
+
}
|
|
105
172
|
async function mapWithConcurrency(items, concurrency, mapper) {
|
|
106
173
|
const results = new Array(items.length);
|
|
107
174
|
let nextIndex = 0;
|
|
@@ -120,6 +187,19 @@ function formatBatchSummary(results) {
|
|
|
120
187
|
const failed = requested - ok;
|
|
121
188
|
return `Summary: ${requested} requested, ${ok} ok, ${failed} failed.`;
|
|
122
189
|
}
|
|
190
|
+
function resolveBatchScrapeExitCode(results, strict) {
|
|
191
|
+
const okCount = results.filter((result) => result.ok).length;
|
|
192
|
+
if (okCount === 0) {
|
|
193
|
+
return 1;
|
|
194
|
+
}
|
|
195
|
+
if (strict && okCount !== results.length) {
|
|
196
|
+
return 1;
|
|
197
|
+
}
|
|
198
|
+
return 0;
|
|
199
|
+
}
|
|
200
|
+
function shouldWarnAboutLargeBatchOutput(payload) {
|
|
201
|
+
return payload.length >= 100_000;
|
|
202
|
+
}
|
|
123
203
|
function extractStatusCode(error) {
|
|
124
204
|
if (typeof error === "object" &&
|
|
125
205
|
error !== null &&
|
|
@@ -138,3 +218,30 @@ function extractRequestMeta(error) {
|
|
|
138
218
|
}
|
|
139
219
|
return undefined;
|
|
140
220
|
}
|
|
221
|
+
function extractErrorCode(error) {
|
|
222
|
+
if (typeof error === "object" &&
|
|
223
|
+
error !== null &&
|
|
224
|
+
"code" in error &&
|
|
225
|
+
typeof error.code === "string") {
|
|
226
|
+
return error.code;
|
|
227
|
+
}
|
|
228
|
+
return undefined;
|
|
229
|
+
}
|
|
230
|
+
function extractOutcome(error) {
|
|
231
|
+
if (typeof error === "object" &&
|
|
232
|
+
error !== null &&
|
|
233
|
+
"outcome" in error &&
|
|
234
|
+
typeof error.outcome === "string") {
|
|
235
|
+
return error.outcome;
|
|
236
|
+
}
|
|
237
|
+
return undefined;
|
|
238
|
+
}
|
|
239
|
+
function extractNextActionHint(error) {
|
|
240
|
+
if (typeof error === "object" &&
|
|
241
|
+
error !== null &&
|
|
242
|
+
"nextActionHint" in error &&
|
|
243
|
+
typeof error.nextActionHint === "string") {
|
|
244
|
+
return error.nextActionHint;
|
|
245
|
+
}
|
|
246
|
+
return undefined;
|
|
247
|
+
}
|
package/dist/commands/close.js
CHANGED
|
@@ -8,9 +8,13 @@ function buildCloseCommand() {
|
|
|
8
8
|
return new commander_1.Command("close")
|
|
9
9
|
.description("Close the current browser session or a specific session.")
|
|
10
10
|
.option("--session <id>", "Session ID. Defaults to the current session.")
|
|
11
|
+
.option("--all", "Close every tracked browser session in the current daemon.")
|
|
11
12
|
.action(async (options) => {
|
|
12
13
|
const config = await (0, config_1.loadConfig)();
|
|
13
14
|
const args = ["close"];
|
|
15
|
+
if (options.all) {
|
|
16
|
+
args.push("--all");
|
|
17
|
+
}
|
|
14
18
|
if (options.session) {
|
|
15
19
|
args.push("--session", options.session);
|
|
16
20
|
}
|
|
@@ -8,10 +8,11 @@ const output_1 = require("../lib/output");
|
|
|
8
8
|
const unlocker_1 = require("../lib/unlocker");
|
|
9
9
|
function buildConfigInitCommand() {
|
|
10
10
|
return new commander_1.Command("init")
|
|
11
|
-
.description("Write ~/.gologin-web-access/config.json with current values or placeholders. Recommended: persist both Web Unlocker and
|
|
11
|
+
.description("Write ~/.gologin-web-access/config.json with current values or placeholders. Recommended: persist both the Web Unlocker key and the GoLogin token.")
|
|
12
12
|
.option("--web-unlocker-api-key <key>", "Persist a Web Unlocker API key")
|
|
13
13
|
.option("--web-unlocker-key <key>", "Alias for --web-unlocker-api-key")
|
|
14
|
-
.option("--
|
|
14
|
+
.option("--token <token>", "Persist a GoLogin token")
|
|
15
|
+
.option("--cloud-token <token>", "Backward-compatible alias for --token")
|
|
15
16
|
.option("--default-profile-id <id>", "Persist a default Gologin profile ID")
|
|
16
17
|
.option("--daemon-port <port>", "Persist a daemon port", String(config_1.DEFAULT_DAEMON_PORT))
|
|
17
18
|
.option("--no-validate", "Skip live key validation after writing config")
|
|
@@ -20,7 +21,10 @@ function buildConfigInitCommand() {
|
|
|
20
21
|
const webUnlockerApiKey = options.webUnlockerApiKey ?? options.webUnlockerKey ?? process.env[config_1.ENV_NAMES.webUnlockerApiKey];
|
|
21
22
|
const result = await (0, config_1.initConfigFile)({
|
|
22
23
|
webUnlockerApiKey,
|
|
23
|
-
cloudToken: options.
|
|
24
|
+
cloudToken: options.token ??
|
|
25
|
+
options.cloudToken ??
|
|
26
|
+
process.env[config_1.ENV_NAMES.cloudToken] ??
|
|
27
|
+
process.env.GOLOGIN_CLOUD_TOKEN,
|
|
24
28
|
defaultProfileId: options.defaultProfileId ?? process.env[config_1.ENV_NAMES.defaultProfileId],
|
|
25
29
|
daemonPort: Number(options.daemonPort ?? process.env[config_1.ENV_NAMES.daemonPort] ?? config_1.DEFAULT_DAEMON_PORT),
|
|
26
30
|
}, {
|
|
@@ -37,7 +41,7 @@ function buildConfigInitCommand() {
|
|
|
37
41
|
value: result.config.webUnlockerApiKey ? "written" : "left empty",
|
|
38
42
|
},
|
|
39
43
|
{
|
|
40
|
-
label: "
|
|
44
|
+
label: "GoLogin token",
|
|
41
45
|
value: result.config.cloudToken ? "written" : "left empty",
|
|
42
46
|
},
|
|
43
47
|
{
|
|
@@ -50,7 +54,7 @@ function buildConfigInitCommand() {
|
|
|
50
54
|
},
|
|
51
55
|
]);
|
|
52
56
|
if (!result.config.webUnlockerApiKey || !result.config.cloudToken) {
|
|
53
|
-
(0, output_1.printText)("Recommended next step: configure both GOLOGIN_WEB_UNLOCKER_API_KEY and
|
|
57
|
+
(0, output_1.printText)("Recommended next step: configure both GOLOGIN_WEB_UNLOCKER_API_KEY and GOLOGIN_TOKEN so agents can use scraping and browser flows without asking again.");
|
|
54
58
|
}
|
|
55
59
|
if (options.validate === false) {
|
|
56
60
|
return;
|
|
@@ -66,7 +70,7 @@ function buildConfigInitCommand() {
|
|
|
66
70
|
if (result.config.cloudToken) {
|
|
67
71
|
const validation = await (0, cloudApi_1.validateCloudToken)(result.config.cloudToken);
|
|
68
72
|
validationRows.push({
|
|
69
|
-
label: "
|
|
73
|
+
label: "GoLogin token validation",
|
|
70
74
|
value: validation.ok ? "ok" : `failed${validation.status ? ` (${validation.status})` : ""}: ${validation.detail}`,
|
|
71
75
|
});
|
|
72
76
|
}
|
package/dist/commands/read.js
CHANGED
|
@@ -3,6 +3,7 @@ Object.defineProperty(exports, "__esModule", { value: true });
|
|
|
3
3
|
exports.buildReadCommand = buildReadCommand;
|
|
4
4
|
const commander_1 = require("commander");
|
|
5
5
|
const config_1 = require("../config");
|
|
6
|
+
const pageOutcome_1 = require("../lib/pageOutcome");
|
|
6
7
|
const readSource_1 = require("../lib/readSource");
|
|
7
8
|
const output_1 = require("../lib/output");
|
|
8
9
|
const shared_1 = require("./shared");
|
|
@@ -27,7 +28,7 @@ function buildReadCommand() {
|
|
|
27
28
|
: format === "markdown"
|
|
28
29
|
? await (0, readSource_1.readMarkdownContent)(url, config, apiKey, readOptions)
|
|
29
30
|
: await (0, readSource_1.readTextContent)(url, config, apiKey, readOptions);
|
|
30
|
-
emitReadNotice(result
|
|
31
|
+
emitReadNotice(result);
|
|
31
32
|
(0, output_1.printText)(result.content);
|
|
32
33
|
})));
|
|
33
34
|
}
|
|
@@ -37,15 +38,24 @@ function normalizeReadFormat(value) {
|
|
|
37
38
|
}
|
|
38
39
|
throw new Error(`Unsupported read format: ${value}`);
|
|
39
40
|
}
|
|
40
|
-
function emitReadNotice(
|
|
41
|
-
if (
|
|
42
|
-
|
|
41
|
+
function emitReadNotice(result) {
|
|
42
|
+
if (result.fallbackAttempted) {
|
|
43
|
+
if (result.fallbackUsed) {
|
|
44
|
+
process.stderr.write(`JS-rendered page detected, retrying with browser. ${result.fallbackReason ?? ""}\n`);
|
|
45
|
+
}
|
|
46
|
+
else if (result.fallbackReason) {
|
|
47
|
+
process.stderr.write(`${result.fallbackReason}\n`);
|
|
48
|
+
}
|
|
49
|
+
}
|
|
50
|
+
if (result.outcome !== "ok") {
|
|
51
|
+
process.stderr.write(`Outcome: ${result.outcome}\n`);
|
|
43
52
|
}
|
|
44
|
-
if (
|
|
45
|
-
process.stderr.write(
|
|
53
|
+
if (result.warning) {
|
|
54
|
+
process.stderr.write(`${result.warning}\n`);
|
|
46
55
|
return;
|
|
47
56
|
}
|
|
48
|
-
|
|
49
|
-
|
|
57
|
+
const hint = (0, pageOutcome_1.describeNextActionHint)(result.nextActionHint);
|
|
58
|
+
if (hint && result.outcome !== "ok") {
|
|
59
|
+
process.stderr.write(`${hint}\n`);
|
|
50
60
|
}
|
|
51
61
|
}
|
|
@@ -19,6 +19,15 @@ function buildScrapeJsonCommand() {
|
|
|
19
19
|
profile: options.profile,
|
|
20
20
|
request: (0, shared_1.normalizeUnlockerRequestOptions)(options),
|
|
21
21
|
});
|
|
22
|
+
if (envelope.fallbackAttempted) {
|
|
23
|
+
const fallbackStatus = envelope.fallbackUsed
|
|
24
|
+
? "Browser fallback succeeded and replaced the unlocker result."
|
|
25
|
+
: `Browser fallback was attempted but not used. ${envelope.fallbackReason ?? "It did not improve the structured output."}`;
|
|
26
|
+
process.stderr.write(`${fallbackStatus}\n`);
|
|
27
|
+
}
|
|
28
|
+
if (envelope.warning) {
|
|
29
|
+
process.stderr.write(`${envelope.warning}\n`);
|
|
30
|
+
}
|
|
22
31
|
(0, output_1.printJson)(envelope);
|
|
23
32
|
})));
|
|
24
33
|
}
|
|
@@ -3,6 +3,7 @@ Object.defineProperty(exports, "__esModule", { value: true });
|
|
|
3
3
|
exports.buildScrapeMarkdownCommand = buildScrapeMarkdownCommand;
|
|
4
4
|
const commander_1 = require("commander");
|
|
5
5
|
const config_1 = require("../config");
|
|
6
|
+
const pageOutcome_1 = require("../lib/pageOutcome");
|
|
6
7
|
const readSource_1 = require("../lib/readSource");
|
|
7
8
|
const shared_1 = require("./shared");
|
|
8
9
|
const output_1 = require("../lib/output");
|
|
@@ -19,19 +20,28 @@ function buildScrapeMarkdownCommand() {
|
|
|
19
20
|
source,
|
|
20
21
|
request: (0, shared_1.normalizeUnlockerRequestOptions)(options),
|
|
21
22
|
});
|
|
22
|
-
emitReadNotice(result
|
|
23
|
+
emitReadNotice(result);
|
|
23
24
|
(0, output_1.printText)(result.content);
|
|
24
25
|
}));
|
|
25
26
|
}
|
|
26
|
-
function emitReadNotice(
|
|
27
|
-
if (
|
|
28
|
-
|
|
27
|
+
function emitReadNotice(result) {
|
|
28
|
+
if (result.fallbackAttempted) {
|
|
29
|
+
if (result.fallbackUsed) {
|
|
30
|
+
process.stderr.write(`JS-rendered page detected, retrying with browser. ${result.fallbackReason ?? ""}\n`);
|
|
31
|
+
}
|
|
32
|
+
else if (result.fallbackReason) {
|
|
33
|
+
process.stderr.write(`${result.fallbackReason}\n`);
|
|
34
|
+
}
|
|
35
|
+
}
|
|
36
|
+
if (result.outcome !== "ok") {
|
|
37
|
+
process.stderr.write(`Outcome: ${result.outcome}\n`);
|
|
29
38
|
}
|
|
30
|
-
if (
|
|
31
|
-
process.stderr.write(
|
|
39
|
+
if (result.warning) {
|
|
40
|
+
process.stderr.write(`${result.warning}\n`);
|
|
32
41
|
return;
|
|
33
42
|
}
|
|
34
|
-
|
|
35
|
-
|
|
43
|
+
const hint = (0, pageOutcome_1.describeNextActionHint)(result.nextActionHint);
|
|
44
|
+
if (hint && result.outcome !== "ok") {
|
|
45
|
+
process.stderr.write(`${hint}\n`);
|
|
36
46
|
}
|
|
37
47
|
}
|
|
@@ -3,6 +3,7 @@ Object.defineProperty(exports, "__esModule", { value: true });
|
|
|
3
3
|
exports.buildScrapeTextCommand = buildScrapeTextCommand;
|
|
4
4
|
const commander_1 = require("commander");
|
|
5
5
|
const config_1 = require("../config");
|
|
6
|
+
const pageOutcome_1 = require("../lib/pageOutcome");
|
|
6
7
|
const readSource_1 = require("../lib/readSource");
|
|
7
8
|
const shared_1 = require("./shared");
|
|
8
9
|
const output_1 = require("../lib/output");
|
|
@@ -19,19 +20,28 @@ function buildScrapeTextCommand() {
|
|
|
19
20
|
source,
|
|
20
21
|
request: (0, shared_1.normalizeUnlockerRequestOptions)(options),
|
|
21
22
|
});
|
|
22
|
-
emitReadNotice(result
|
|
23
|
+
emitReadNotice(result);
|
|
23
24
|
(0, output_1.printText)(result.content);
|
|
24
25
|
}));
|
|
25
26
|
}
|
|
26
|
-
function emitReadNotice(
|
|
27
|
-
if (
|
|
28
|
-
|
|
27
|
+
function emitReadNotice(result) {
|
|
28
|
+
if (result.fallbackAttempted) {
|
|
29
|
+
if (result.fallbackUsed) {
|
|
30
|
+
process.stderr.write(`JS-rendered page detected, retrying with browser. ${result.fallbackReason ?? ""}\n`);
|
|
31
|
+
}
|
|
32
|
+
else if (result.fallbackReason) {
|
|
33
|
+
process.stderr.write(`${result.fallbackReason}\n`);
|
|
34
|
+
}
|
|
35
|
+
}
|
|
36
|
+
if (result.outcome !== "ok") {
|
|
37
|
+
process.stderr.write(`Outcome: ${result.outcome}\n`);
|
|
29
38
|
}
|
|
30
|
-
if (
|
|
31
|
-
process.stderr.write(
|
|
39
|
+
if (result.warning) {
|
|
40
|
+
process.stderr.write(`${result.warning}\n`);
|
|
32
41
|
return;
|
|
33
42
|
}
|
|
34
|
-
|
|
35
|
-
|
|
43
|
+
const hint = (0, pageOutcome_1.describeNextActionHint)(result.nextActionHint);
|
|
44
|
+
if (hint && result.outcome !== "ok") {
|
|
45
|
+
process.stderr.write(`${hint}\n`);
|
|
36
46
|
}
|
|
37
47
|
}
|
|
@@ -7,8 +7,17 @@ const agentCli_1 = require("../lib/agentCli");
|
|
|
7
7
|
function buildSessionsCommand() {
|
|
8
8
|
return new commander_1.Command("sessions")
|
|
9
9
|
.description("List active daemon-backed browser sessions.")
|
|
10
|
-
.
|
|
10
|
+
.option("--prune", "Close tracked sessions idle for too long before listing.")
|
|
11
|
+
.option("--older-than-ms <ms>", "Idle threshold used with --prune.")
|
|
12
|
+
.action(async (options) => {
|
|
11
13
|
const config = await (0, config_1.loadConfig)();
|
|
12
|
-
|
|
14
|
+
const args = ["sessions"];
|
|
15
|
+
if (options.prune) {
|
|
16
|
+
args.push("--prune");
|
|
17
|
+
}
|
|
18
|
+
if (options.olderThanMs) {
|
|
19
|
+
args.push("--older-than-ms", options.olderThanMs);
|
|
20
|
+
}
|
|
21
|
+
await (0, agentCli_1.runAgentCommand)(args, config);
|
|
13
22
|
});
|
|
14
23
|
}
|
package/dist/config.js
CHANGED
|
@@ -23,13 +23,13 @@ const CONFIG_FILENAME = "config.json";
|
|
|
23
23
|
exports.DEFAULT_DAEMON_PORT = 4590;
|
|
24
24
|
exports.ENV_NAMES = {
|
|
25
25
|
webUnlockerApiKey: "GOLOGIN_WEB_UNLOCKER_API_KEY",
|
|
26
|
-
cloudToken: "
|
|
26
|
+
cloudToken: "GOLOGIN_TOKEN",
|
|
27
27
|
defaultProfileId: "GOLOGIN_DEFAULT_PROFILE_ID",
|
|
28
28
|
daemonPort: "GOLOGIN_DAEMON_PORT",
|
|
29
29
|
};
|
|
30
30
|
const LEGACY_ENV_NAMES = {
|
|
31
31
|
webUnlockerApiKey: ["GOLOGIN_WEBUNLOCKER_API_KEY"],
|
|
32
|
-
cloudToken: ["
|
|
32
|
+
cloudToken: ["GOLOGIN_CLOUD_TOKEN"],
|
|
33
33
|
defaultProfileId: ["GOLOGIN_PROFILE_ID"],
|
|
34
34
|
daemonPort: [],
|
|
35
35
|
};
|