gologin-web-access 0.3.1 → 0.3.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +6 -0
- package/README.md +2 -0
- package/dist/cli.js +1 -1
- package/dist/commands/batchScrape.js +75 -10
- package/dist/commands/read.js +18 -8
- package/dist/commands/scrapeMarkdown.js +18 -8
- package/dist/commands/scrapeText.js +18 -8
- package/dist/lib/extractRunner.js +4 -0
- package/dist/lib/pageOutcome.js +192 -0
- package/dist/lib/readSource.js +39 -1
- package/dist/lib/structuredScrape.js +45 -56
- package/package.json +1 -1
package/CHANGELOG.md
CHANGED
|
@@ -5,6 +5,12 @@
|
|
|
5
5
|
- browser automation is now embedded directly in `gologin-web-access`, so one repo and one install contains both Web Unlocker and Cloud Browser flows
|
|
6
6
|
- doctor now reports the embedded browser runtime source and version
|
|
7
7
|
|
|
8
|
+
## 0.3.2 - 2026-04-03
|
|
9
|
+
|
|
10
|
+
- added unified page outcome classification across `read`, `scrape-json`, and `batch-scrape`
|
|
11
|
+
- structured and readable paths now distinguish `ok`, `empty`, `incomplete`, `authwall`, `challenge`, `blocked`, and `cookie_wall`
|
|
12
|
+
- batch and extract-oriented flows now propagate next-step hints and fallback metadata more consistently for agents
|
|
13
|
+
|
|
8
14
|
## 0.1.0 - 2026-03-10
|
|
9
15
|
|
|
10
16
|
Initial public release of Gologin Web Access.
|
package/README.md
CHANGED
|
@@ -305,6 +305,7 @@ gologin-web-access snapshot -i
|
|
|
305
305
|
- `batch-extract` reuses the same extraction path across many URLs and returns one structured result per URL, including request and fallback metadata. Add `--output <path>` to save the full array directly.
|
|
306
306
|
- `scrape-json` now returns both a flat `headings` array and `headingsByLevel` buckets for `h1` through `h6`.
|
|
307
307
|
- `scrape-json --fallback browser` is available for JS-heavy pages where stateless extraction returns weak heading data.
|
|
308
|
+
- `scrape-json` now also classifies the page outcome as `ok`, `empty`, `incomplete`, `authwall`, `challenge`, `blocked`, or `cookie_wall`, and includes `nextActionHint` when the result is weak or gated.
|
|
308
309
|
- `scrape`, `scrape-markdown`, `scrape-text`, `scrape-json`, and `batch-scrape` accept `--retry`, `--backoff-ms`, and `--timeout-ms`.
|
|
309
310
|
- `batch-scrape --only-main-content` lets markdown, text, and html batch runs use the same readable-content isolation path as `read`.
|
|
310
311
|
- `crawl --only-main-content` uses the same readable-fragment extraction strategy for html, markdown, and text crawl output, but stays on the stateless unlocker path.
|
|
@@ -312,6 +313,7 @@ gologin-web-access snapshot -i
|
|
|
312
313
|
- `batch-scrape` now returns exit code `0` on partial success by default and only fails the command when every URL failed. Add `--strict` if any single failed URL should make the whole batch exit non-zero.
|
|
313
314
|
- `batch-scrape --output <path>` writes the full JSON to disk so shells and agent consoles cannot truncate a large payload silently.
|
|
314
315
|
- `batch-scrape --format json` now returns the same structured scrape envelope as `scrape-json`, including `renderSource`, `fallbackAttempted`, `fallbackUsed`, and `request.attemptCount/retryCount/attempts`.
|
|
316
|
+
- `batch-scrape --only-main-content` now propagates `outcome`, `outcomeReason`, `nextActionHint`, and fallback metadata per URL so agents can tell "weak page" from "gated page" without scraping log text.
|
|
315
317
|
- `scrape-json` now surfaces explicit `BLOCKED_PAGE` failures when structured output clearly matches a challenge or block page, instead of silently looking like a valid empty result.
|
|
316
318
|
- `search` now returns `requestedLimit`, `returnedCount`, `warnings`, `cacheTtlMs`, and per-result `position`.
|
|
317
319
|
- `search` may return fewer results than the requested `--limit` when the upstream SERP contains fewer valid results; inspect `returnedCount`, `warnings`, and `attempts`.
|
package/dist/cli.js
CHANGED
|
@@ -67,7 +67,7 @@ const wait_1 = require("./commands/wait");
|
|
|
67
67
|
const doctor_1 = require("./doctor");
|
|
68
68
|
const errors_1 = require("./lib/errors");
|
|
69
69
|
const output_1 = require("./lib/output");
|
|
70
|
-
const CLI_VERSION = "0.3.
|
|
70
|
+
const CLI_VERSION = "0.3.2";
|
|
71
71
|
async function main() {
|
|
72
72
|
const program = new commander_1.Command();
|
|
73
73
|
program
|
|
@@ -47,7 +47,16 @@ function buildBatchScrapeCommand() {
|
|
|
47
47
|
url,
|
|
48
48
|
ok: true,
|
|
49
49
|
format,
|
|
50
|
-
output,
|
|
50
|
+
output: output.output,
|
|
51
|
+
outcome: output.outcome,
|
|
52
|
+
outcomeReason: output.outcomeReason,
|
|
53
|
+
nextActionHint: output.nextActionHint,
|
|
54
|
+
renderSource: output.renderSource,
|
|
55
|
+
fallbackAttempted: output.fallbackAttempted,
|
|
56
|
+
fallbackUsed: output.fallbackUsed,
|
|
57
|
+
fallbackReason: output.fallbackReason,
|
|
58
|
+
warning: output.warning,
|
|
59
|
+
request: output.request,
|
|
51
60
|
};
|
|
52
61
|
}
|
|
53
62
|
catch (error) {
|
|
@@ -59,6 +68,8 @@ function buildBatchScrapeCommand() {
|
|
|
59
68
|
error: error instanceof Error ? error.message : "Unknown error",
|
|
60
69
|
code: extractErrorCode(error),
|
|
61
70
|
status: extractStatusCode(error),
|
|
71
|
+
outcome: extractOutcome(error),
|
|
72
|
+
nextActionHint: extractNextActionHint(error),
|
|
62
73
|
request,
|
|
63
74
|
};
|
|
64
75
|
}
|
|
@@ -97,31 +108,67 @@ async function formatOutput(url, config, apiKey, format, requestOptions, fallbac
|
|
|
97
108
|
};
|
|
98
109
|
switch (format) {
|
|
99
110
|
case "html":
|
|
100
|
-
return (await (0, readSource_1.readHtmlContent)(url, config, apiKey, readOptions))
|
|
111
|
+
return mapReadableBatchResult(await (0, readSource_1.readHtmlContent)(url, config, apiKey, readOptions));
|
|
101
112
|
case "markdown":
|
|
102
|
-
return (await (0, readSource_1.readMarkdownContent)(url, config, apiKey, readOptions))
|
|
113
|
+
return mapReadableBatchResult(await (0, readSource_1.readMarkdownContent)(url, config, apiKey, readOptions));
|
|
103
114
|
case "text":
|
|
104
|
-
return (await (0, readSource_1.readTextContent)(url, config, apiKey, readOptions))
|
|
115
|
+
return mapReadableBatchResult(await (0, readSource_1.readTextContent)(url, config, apiKey, readOptions));
|
|
105
116
|
default:
|
|
106
117
|
break;
|
|
107
118
|
}
|
|
108
119
|
}
|
|
109
120
|
switch (format) {
|
|
110
121
|
case "html":
|
|
111
|
-
return
|
|
122
|
+
return {
|
|
123
|
+
output: (await (0, unlocker_1.scrapeRenderedHtml)(url, apiKey, requestOptions)).content,
|
|
124
|
+
};
|
|
112
125
|
case "markdown":
|
|
113
|
-
return
|
|
126
|
+
return {
|
|
127
|
+
output: (await (0, unlocker_1.scrapeMarkdown)(url, apiKey, requestOptions)).markdown,
|
|
128
|
+
};
|
|
114
129
|
case "text":
|
|
115
|
-
return
|
|
130
|
+
return {
|
|
131
|
+
output: (await (0, unlocker_1.scrapeText)(url, apiKey, requestOptions)).text,
|
|
132
|
+
};
|
|
116
133
|
case "json":
|
|
117
|
-
return await (0, structuredScrape_1.scrapeStructuredJson)(url, config, apiKey, {
|
|
134
|
+
return mapStructuredBatchResult(await (0, structuredScrape_1.scrapeStructuredJson)(url, config, apiKey, {
|
|
118
135
|
fallback,
|
|
119
136
|
request: requestOptions,
|
|
120
|
-
});
|
|
137
|
+
}));
|
|
121
138
|
default:
|
|
122
|
-
return
|
|
139
|
+
return {
|
|
140
|
+
output: (await (0, unlocker_1.scrapeRenderedHtml)(url, apiKey, requestOptions)).content,
|
|
141
|
+
};
|
|
123
142
|
}
|
|
124
143
|
}
|
|
144
|
+
function mapReadableBatchResult(result) {
|
|
145
|
+
return {
|
|
146
|
+
output: result.content,
|
|
147
|
+
outcome: result.outcome,
|
|
148
|
+
outcomeReason: result.outcomeReason,
|
|
149
|
+
nextActionHint: result.nextActionHint,
|
|
150
|
+
renderSource: result.renderSource,
|
|
151
|
+
fallbackAttempted: result.fallbackAttempted,
|
|
152
|
+
fallbackUsed: result.fallbackUsed,
|
|
153
|
+
fallbackReason: result.fallbackReason,
|
|
154
|
+
warning: result.warning,
|
|
155
|
+
request: result.request,
|
|
156
|
+
};
|
|
157
|
+
}
|
|
158
|
+
function mapStructuredBatchResult(result) {
|
|
159
|
+
return {
|
|
160
|
+
output: result,
|
|
161
|
+
outcome: result.outcome,
|
|
162
|
+
outcomeReason: result.outcomeReason,
|
|
163
|
+
nextActionHint: result.nextActionHint,
|
|
164
|
+
renderSource: result.renderSource,
|
|
165
|
+
fallbackAttempted: result.fallbackAttempted,
|
|
166
|
+
fallbackUsed: result.fallbackUsed,
|
|
167
|
+
fallbackReason: result.fallbackReason,
|
|
168
|
+
warning: result.warning,
|
|
169
|
+
request: result.request,
|
|
170
|
+
};
|
|
171
|
+
}
|
|
125
172
|
async function mapWithConcurrency(items, concurrency, mapper) {
|
|
126
173
|
const results = new Array(items.length);
|
|
127
174
|
let nextIndex = 0;
|
|
@@ -180,3 +227,21 @@ function extractErrorCode(error) {
|
|
|
180
227
|
}
|
|
181
228
|
return undefined;
|
|
182
229
|
}
|
|
230
|
+
function extractOutcome(error) {
|
|
231
|
+
if (typeof error === "object" &&
|
|
232
|
+
error !== null &&
|
|
233
|
+
"outcome" in error &&
|
|
234
|
+
typeof error.outcome === "string") {
|
|
235
|
+
return error.outcome;
|
|
236
|
+
}
|
|
237
|
+
return undefined;
|
|
238
|
+
}
|
|
239
|
+
function extractNextActionHint(error) {
|
|
240
|
+
if (typeof error === "object" &&
|
|
241
|
+
error !== null &&
|
|
242
|
+
"nextActionHint" in error &&
|
|
243
|
+
typeof error.nextActionHint === "string") {
|
|
244
|
+
return error.nextActionHint;
|
|
245
|
+
}
|
|
246
|
+
return undefined;
|
|
247
|
+
}
|
package/dist/commands/read.js
CHANGED
|
@@ -3,6 +3,7 @@ Object.defineProperty(exports, "__esModule", { value: true });
|
|
|
3
3
|
exports.buildReadCommand = buildReadCommand;
|
|
4
4
|
const commander_1 = require("commander");
|
|
5
5
|
const config_1 = require("../config");
|
|
6
|
+
const pageOutcome_1 = require("../lib/pageOutcome");
|
|
6
7
|
const readSource_1 = require("../lib/readSource");
|
|
7
8
|
const output_1 = require("../lib/output");
|
|
8
9
|
const shared_1 = require("./shared");
|
|
@@ -27,7 +28,7 @@ function buildReadCommand() {
|
|
|
27
28
|
: format === "markdown"
|
|
28
29
|
? await (0, readSource_1.readMarkdownContent)(url, config, apiKey, readOptions)
|
|
29
30
|
: await (0, readSource_1.readTextContent)(url, config, apiKey, readOptions);
|
|
30
|
-
emitReadNotice(result
|
|
31
|
+
emitReadNotice(result);
|
|
31
32
|
(0, output_1.printText)(result.content);
|
|
32
33
|
})));
|
|
33
34
|
}
|
|
@@ -37,15 +38,24 @@ function normalizeReadFormat(value) {
|
|
|
37
38
|
}
|
|
38
39
|
throw new Error(`Unsupported read format: ${value}`);
|
|
39
40
|
}
|
|
40
|
-
function emitReadNotice(
|
|
41
|
-
if (
|
|
42
|
-
|
|
41
|
+
function emitReadNotice(result) {
|
|
42
|
+
if (result.fallbackAttempted) {
|
|
43
|
+
if (result.fallbackUsed) {
|
|
44
|
+
process.stderr.write(`JS-rendered page detected, retrying with browser. ${result.fallbackReason ?? ""}\n`);
|
|
45
|
+
}
|
|
46
|
+
else if (result.fallbackReason) {
|
|
47
|
+
process.stderr.write(`${result.fallbackReason}\n`);
|
|
48
|
+
}
|
|
49
|
+
}
|
|
50
|
+
if (result.outcome !== "ok") {
|
|
51
|
+
process.stderr.write(`Outcome: ${result.outcome}\n`);
|
|
43
52
|
}
|
|
44
|
-
if (
|
|
45
|
-
process.stderr.write(
|
|
53
|
+
if (result.warning) {
|
|
54
|
+
process.stderr.write(`${result.warning}\n`);
|
|
46
55
|
return;
|
|
47
56
|
}
|
|
48
|
-
|
|
49
|
-
|
|
57
|
+
const hint = (0, pageOutcome_1.describeNextActionHint)(result.nextActionHint);
|
|
58
|
+
if (hint && result.outcome !== "ok") {
|
|
59
|
+
process.stderr.write(`${hint}\n`);
|
|
50
60
|
}
|
|
51
61
|
}
|
|
@@ -3,6 +3,7 @@ Object.defineProperty(exports, "__esModule", { value: true });
|
|
|
3
3
|
exports.buildScrapeMarkdownCommand = buildScrapeMarkdownCommand;
|
|
4
4
|
const commander_1 = require("commander");
|
|
5
5
|
const config_1 = require("../config");
|
|
6
|
+
const pageOutcome_1 = require("../lib/pageOutcome");
|
|
6
7
|
const readSource_1 = require("../lib/readSource");
|
|
7
8
|
const shared_1 = require("./shared");
|
|
8
9
|
const output_1 = require("../lib/output");
|
|
@@ -19,19 +20,28 @@ function buildScrapeMarkdownCommand() {
|
|
|
19
20
|
source,
|
|
20
21
|
request: (0, shared_1.normalizeUnlockerRequestOptions)(options),
|
|
21
22
|
});
|
|
22
|
-
emitReadNotice(result
|
|
23
|
+
emitReadNotice(result);
|
|
23
24
|
(0, output_1.printText)(result.content);
|
|
24
25
|
}));
|
|
25
26
|
}
|
|
26
|
-
function emitReadNotice(
|
|
27
|
-
if (
|
|
28
|
-
|
|
27
|
+
function emitReadNotice(result) {
|
|
28
|
+
if (result.fallbackAttempted) {
|
|
29
|
+
if (result.fallbackUsed) {
|
|
30
|
+
process.stderr.write(`JS-rendered page detected, retrying with browser. ${result.fallbackReason ?? ""}\n`);
|
|
31
|
+
}
|
|
32
|
+
else if (result.fallbackReason) {
|
|
33
|
+
process.stderr.write(`${result.fallbackReason}\n`);
|
|
34
|
+
}
|
|
35
|
+
}
|
|
36
|
+
if (result.outcome !== "ok") {
|
|
37
|
+
process.stderr.write(`Outcome: ${result.outcome}\n`);
|
|
29
38
|
}
|
|
30
|
-
if (
|
|
31
|
-
process.stderr.write(
|
|
39
|
+
if (result.warning) {
|
|
40
|
+
process.stderr.write(`${result.warning}\n`);
|
|
32
41
|
return;
|
|
33
42
|
}
|
|
34
|
-
|
|
35
|
-
|
|
43
|
+
const hint = (0, pageOutcome_1.describeNextActionHint)(result.nextActionHint);
|
|
44
|
+
if (hint && result.outcome !== "ok") {
|
|
45
|
+
process.stderr.write(`${hint}\n`);
|
|
36
46
|
}
|
|
37
47
|
}
|
|
@@ -3,6 +3,7 @@ Object.defineProperty(exports, "__esModule", { value: true });
|
|
|
3
3
|
exports.buildScrapeTextCommand = buildScrapeTextCommand;
|
|
4
4
|
const commander_1 = require("commander");
|
|
5
5
|
const config_1 = require("../config");
|
|
6
|
+
const pageOutcome_1 = require("../lib/pageOutcome");
|
|
6
7
|
const readSource_1 = require("../lib/readSource");
|
|
7
8
|
const shared_1 = require("./shared");
|
|
8
9
|
const output_1 = require("../lib/output");
|
|
@@ -19,19 +20,28 @@ function buildScrapeTextCommand() {
|
|
|
19
20
|
source,
|
|
20
21
|
request: (0, shared_1.normalizeUnlockerRequestOptions)(options),
|
|
21
22
|
});
|
|
22
|
-
emitReadNotice(result
|
|
23
|
+
emitReadNotice(result);
|
|
23
24
|
(0, output_1.printText)(result.content);
|
|
24
25
|
}));
|
|
25
26
|
}
|
|
26
|
-
function emitReadNotice(
|
|
27
|
-
if (
|
|
28
|
-
|
|
27
|
+
function emitReadNotice(result) {
|
|
28
|
+
if (result.fallbackAttempted) {
|
|
29
|
+
if (result.fallbackUsed) {
|
|
30
|
+
process.stderr.write(`JS-rendered page detected, retrying with browser. ${result.fallbackReason ?? ""}\n`);
|
|
31
|
+
}
|
|
32
|
+
else if (result.fallbackReason) {
|
|
33
|
+
process.stderr.write(`${result.fallbackReason}\n`);
|
|
34
|
+
}
|
|
35
|
+
}
|
|
36
|
+
if (result.outcome !== "ok") {
|
|
37
|
+
process.stderr.write(`Outcome: ${result.outcome}\n`);
|
|
29
38
|
}
|
|
30
|
-
if (
|
|
31
|
-
process.stderr.write(
|
|
39
|
+
if (result.warning) {
|
|
40
|
+
process.stderr.write(`${result.warning}\n`);
|
|
32
41
|
return;
|
|
33
42
|
}
|
|
34
|
-
|
|
35
|
-
|
|
43
|
+
const hint = (0, pageOutcome_1.describeNextActionHint)(result.nextActionHint);
|
|
44
|
+
if (hint && result.outcome !== "ok") {
|
|
45
|
+
process.stderr.write(`${hint}\n`);
|
|
36
46
|
}
|
|
37
47
|
}
|
|
@@ -16,6 +16,10 @@ async function extractUrlWithSchema(url, config, apiKey, schema, options = {}) {
|
|
|
16
16
|
fallbackAttempted: rendered.fallbackAttempted,
|
|
17
17
|
fallbackUsed: rendered.fallbackUsed,
|
|
18
18
|
fallbackReason: rendered.fallbackReason,
|
|
19
|
+
outcome: rendered.outcome,
|
|
20
|
+
outcomeReason: rendered.outcomeReason,
|
|
21
|
+
nextActionHint: rendered.nextActionHint,
|
|
22
|
+
warning: rendered.warning,
|
|
19
23
|
request: rendered.request,
|
|
20
24
|
extracted: (0, extract_1.extractWithSchema)(rendered.html, schema),
|
|
21
25
|
};
|
|
@@ -0,0 +1,192 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.assessStructuredPageOutcome = assessStructuredPageOutcome;
|
|
4
|
+
exports.assessReadablePageOutcome = assessReadablePageOutcome;
|
|
5
|
+
exports.detectStructuredChallengeReason = detectStructuredChallengeReason;
|
|
6
|
+
exports.detectStructuredBlockedReason = detectStructuredBlockedReason;
|
|
7
|
+
exports.detectStructuredAuthwallReason = detectStructuredAuthwallReason;
|
|
8
|
+
exports.detectStructuredCookieWallReason = detectStructuredCookieWallReason;
|
|
9
|
+
exports.describeNextActionHint = describeNextActionHint;
|
|
10
|
+
const cheerio_1 = require("cheerio");
|
|
11
|
+
function assessStructuredPageOutcome(data) {
|
|
12
|
+
const authwallReason = detectStructuredAuthwallReason(data);
|
|
13
|
+
if (authwallReason) {
|
|
14
|
+
return buildAssessment("authwall", authwallReason, "use_logged_in_session", true);
|
|
15
|
+
}
|
|
16
|
+
const challengeReason = detectStructuredChallengeReason(data);
|
|
17
|
+
if (challengeReason) {
|
|
18
|
+
return buildAssessment("challenge", challengeReason, "use_local_profile", true);
|
|
19
|
+
}
|
|
20
|
+
const blockedReason = detectStructuredBlockedReason(data);
|
|
21
|
+
if (blockedReason) {
|
|
22
|
+
return buildAssessment("blocked", blockedReason, "use_local_profile", true);
|
|
23
|
+
}
|
|
24
|
+
const cookieWallReason = detectStructuredCookieWallReason(data);
|
|
25
|
+
if (cookieWallReason) {
|
|
26
|
+
return buildAssessment("cookie_wall", cookieWallReason, "retry_with_browser", true);
|
|
27
|
+
}
|
|
28
|
+
if (isStructuredDataEmpty(data)) {
|
|
29
|
+
return buildAssessment("empty", "Structured output contained almost no readable fields", "retry_with_browser", true);
|
|
30
|
+
}
|
|
31
|
+
if (isStructuredDataLikelyIncomplete(data)) {
|
|
32
|
+
return buildAssessment("incomplete", "Structured output looks incomplete or client-rendered", "retry_with_browser", true);
|
|
33
|
+
}
|
|
34
|
+
return {
|
|
35
|
+
outcome: "ok",
|
|
36
|
+
browserRecommended: false,
|
|
37
|
+
};
|
|
38
|
+
}
|
|
39
|
+
function assessReadablePageOutcome(html, content, options = {}) {
|
|
40
|
+
const $ = (0, cheerio_1.load)(html);
|
|
41
|
+
const title = $("title").first().text().trim();
|
|
42
|
+
const firstHeading = $("h1").first().text().trim();
|
|
43
|
+
const canonical = ($("link[rel='canonical']").attr("href") ?? "").trim();
|
|
44
|
+
const normalizedText = normalizeText(content);
|
|
45
|
+
const inputCount = $("input, textarea, select").length;
|
|
46
|
+
const formCount = $("form").length;
|
|
47
|
+
const candidates = [
|
|
48
|
+
title,
|
|
49
|
+
firstHeading,
|
|
50
|
+
canonical,
|
|
51
|
+
normalizedText.slice(0, 1_500),
|
|
52
|
+
].filter(Boolean);
|
|
53
|
+
if (canonical.includes("/authwall")) {
|
|
54
|
+
return buildAssessment("authwall", "Canonical URL points to an authwall path", "use_logged_in_session", true);
|
|
55
|
+
}
|
|
56
|
+
for (const candidate of candidates) {
|
|
57
|
+
const authwallReason = classifyAuthwallText(candidate);
|
|
58
|
+
if (authwallReason && (formCount > 0 || inputCount > 0 || normalizedText.length < 3_000)) {
|
|
59
|
+
return buildAssessment("authwall", authwallReason, "use_logged_in_session", true);
|
|
60
|
+
}
|
|
61
|
+
}
|
|
62
|
+
for (const candidate of candidates) {
|
|
63
|
+
const challengeReason = classifyChallengeText(candidate);
|
|
64
|
+
if (challengeReason) {
|
|
65
|
+
return buildAssessment("challenge", challengeReason, "use_local_profile", true);
|
|
66
|
+
}
|
|
67
|
+
}
|
|
68
|
+
for (const candidate of candidates) {
|
|
69
|
+
const blockedReason = classifyBlockedText(candidate);
|
|
70
|
+
if (blockedReason) {
|
|
71
|
+
return buildAssessment("blocked", blockedReason, "use_local_profile", true);
|
|
72
|
+
}
|
|
73
|
+
}
|
|
74
|
+
for (const candidate of candidates) {
|
|
75
|
+
const cookieWallReason = classifyCookieWallText(candidate);
|
|
76
|
+
if (cookieWallReason && (formCount > 0 || normalizedText.length < 2_500)) {
|
|
77
|
+
return buildAssessment("cookie_wall", cookieWallReason, "retry_with_browser", true);
|
|
78
|
+
}
|
|
79
|
+
}
|
|
80
|
+
if (normalizedText.length === 0) {
|
|
81
|
+
return buildAssessment("empty", "Readable output was empty", "retry_with_browser", true);
|
|
82
|
+
}
|
|
83
|
+
if (options.looksIncomplete) {
|
|
84
|
+
return buildAssessment("incomplete", options.incompleteReason ?? "Readable output looks incomplete", "retry_with_browser", true);
|
|
85
|
+
}
|
|
86
|
+
return {
|
|
87
|
+
outcome: "ok",
|
|
88
|
+
browserRecommended: false,
|
|
89
|
+
};
|
|
90
|
+
}
|
|
91
|
+
function detectStructuredChallengeReason(data) {
|
|
92
|
+
return findStructuredReason(data, classifyChallengeText);
|
|
93
|
+
}
|
|
94
|
+
function detectStructuredBlockedReason(data) {
|
|
95
|
+
return findStructuredReason(data, classifyBlockedText);
|
|
96
|
+
}
|
|
97
|
+
function detectStructuredAuthwallReason(data) {
|
|
98
|
+
if ((data.canonical ?? "").includes("/authwall")) {
|
|
99
|
+
return "Canonical URL points to an authwall path";
|
|
100
|
+
}
|
|
101
|
+
return findStructuredReason(data, classifyAuthwallText);
|
|
102
|
+
}
|
|
103
|
+
function detectStructuredCookieWallReason(data) {
|
|
104
|
+
return findStructuredReason(data, classifyCookieWallText);
|
|
105
|
+
}
|
|
106
|
+
function describeNextActionHint(hint) {
|
|
107
|
+
switch (hint) {
|
|
108
|
+
case "retry_with_browser":
|
|
109
|
+
return "Retry with --source browser or --fallback browser.";
|
|
110
|
+
case "use_logged_in_session":
|
|
111
|
+
return "Use a logged-in browser session or a GoLogin profile with saved cookies.";
|
|
112
|
+
case "use_local_profile":
|
|
113
|
+
return "Switch to gologin-local-agent-browser or another profile-backed browser path.";
|
|
114
|
+
default:
|
|
115
|
+
return undefined;
|
|
116
|
+
}
|
|
117
|
+
}
|
|
118
|
+
function buildAssessment(outcome, reason, nextActionHint, browserRecommended) {
|
|
119
|
+
const actionText = describeNextActionHint(nextActionHint);
|
|
120
|
+
return {
|
|
121
|
+
outcome,
|
|
122
|
+
reason,
|
|
123
|
+
nextActionHint,
|
|
124
|
+
browserRecommended,
|
|
125
|
+
warning: actionText ? `${capitalizeOutcome(outcome)} detected: ${reason}. ${actionText}` : `${capitalizeOutcome(outcome)} detected: ${reason}.`,
|
|
126
|
+
};
|
|
127
|
+
}
|
|
128
|
+
function capitalizeOutcome(value) {
|
|
129
|
+
return value.replace(/_/g, " ").replace(/^\w/, (char) => char.toUpperCase());
|
|
130
|
+
}
|
|
131
|
+
function isStructuredDataEmpty(data) {
|
|
132
|
+
return !data.title && !data.description && data.headings.length === 0 && data.links.length === 0;
|
|
133
|
+
}
|
|
134
|
+
function isStructuredDataLikelyIncomplete(data) {
|
|
135
|
+
const firstH1 = data.headingsByLevel.h1[0];
|
|
136
|
+
if (!firstH1) {
|
|
137
|
+
return true;
|
|
138
|
+
}
|
|
139
|
+
if (looksSuspiciousHeadingText(firstH1)) {
|
|
140
|
+
return true;
|
|
141
|
+
}
|
|
142
|
+
if (!data.title && data.headings.length < 2) {
|
|
143
|
+
return true;
|
|
144
|
+
}
|
|
145
|
+
return false;
|
|
146
|
+
}
|
|
147
|
+
function looksSuspiciousHeadingText(value) {
|
|
148
|
+
return /function\s*\(|window\.|document\.|const\s+|let\s+|var\s+|=>|import\s+/i.test(value) || value.length > 240;
|
|
149
|
+
}
|
|
150
|
+
function findStructuredReason(data, matcher) {
|
|
151
|
+
const candidates = [
|
|
152
|
+
data.title,
|
|
153
|
+
data.description,
|
|
154
|
+
data.canonical,
|
|
155
|
+
...data.headingsByLevel.h1.slice(0, 2),
|
|
156
|
+
...data.headingsByLevel.h2.slice(0, 2),
|
|
157
|
+
].filter((value) => Boolean(value && value.trim()));
|
|
158
|
+
for (const candidate of candidates) {
|
|
159
|
+
const reason = matcher(candidate);
|
|
160
|
+
if (reason) {
|
|
161
|
+
return reason;
|
|
162
|
+
}
|
|
163
|
+
}
|
|
164
|
+
return undefined;
|
|
165
|
+
}
|
|
166
|
+
function normalizeText(value) {
|
|
167
|
+
return value.replace(/\s+/g, " ").trim();
|
|
168
|
+
}
|
|
169
|
+
function classifyChallengeText(value) {
|
|
170
|
+
if (/(verify you are human|verify you are a human|are you human|captcha|security check|attention required|just a moment|checking your browser|enable javascript and cookies to continue|one more step|security verification)/i.test(value)) {
|
|
171
|
+
return "Challenge or verification markers matched the page";
|
|
172
|
+
}
|
|
173
|
+
return undefined;
|
|
174
|
+
}
|
|
175
|
+
function classifyBlockedText(value) {
|
|
176
|
+
if (/(access denied|forbidden|blocked request|request blocked|request unsuccessful|temporarily blocked|temporarily unavailable|you have been blocked|access to this page has been denied)/i.test(value)) {
|
|
177
|
+
return "Blocked-page markers matched the page";
|
|
178
|
+
}
|
|
179
|
+
return undefined;
|
|
180
|
+
}
|
|
181
|
+
function classifyAuthwallText(value) {
|
|
182
|
+
if (/(sign in to view|sign in to continue|log in to continue|join to view|join now|join linkedin|join to continue|sign up to view|create a free account|view full profile|view profile|join to see more)/i.test(value)) {
|
|
183
|
+
return "Login or signup wall markers matched the page";
|
|
184
|
+
}
|
|
185
|
+
return undefined;
|
|
186
|
+
}
|
|
187
|
+
function classifyCookieWallText(value) {
|
|
188
|
+
if (/(cookie preferences|accept cookies|manage cookies|consent preferences|we use cookies|your privacy choices|respects your privacy)/i.test(value)) {
|
|
189
|
+
return "Cookie or consent wall markers matched the page";
|
|
190
|
+
}
|
|
191
|
+
return undefined;
|
|
192
|
+
}
|
package/dist/lib/readSource.js
CHANGED
|
@@ -10,6 +10,7 @@ exports.extractReadableSegmentFromHtml = extractReadableSegmentFromHtml;
|
|
|
10
10
|
const cheerio_1 = require("cheerio");
|
|
11
11
|
const config_1 = require("../config");
|
|
12
12
|
const browserRead_1 = require("./browserRead");
|
|
13
|
+
const pageOutcome_1 = require("./pageOutcome");
|
|
13
14
|
const unlocker_1 = require("./unlocker");
|
|
14
15
|
function normalizeReadSourceMode(value, defaultMode = "auto") {
|
|
15
16
|
if (!value) {
|
|
@@ -50,6 +51,7 @@ async function readRenderedHtmlContent(url, config, apiKey, options = {}) {
|
|
|
50
51
|
renderSource: "browser",
|
|
51
52
|
fallbackAttempted: false,
|
|
52
53
|
fallbackUsed: false,
|
|
54
|
+
outcome: "ok",
|
|
53
55
|
};
|
|
54
56
|
}
|
|
55
57
|
const unlocker = await (0, unlocker_1.scrapeRenderedHtml)(url, apiKey, options.request);
|
|
@@ -59,17 +61,26 @@ async function readRenderedHtmlContent(url, config, apiKey, options = {}) {
|
|
|
59
61
|
renderSource: "unlocker",
|
|
60
62
|
fallbackAttempted: false,
|
|
61
63
|
fallbackUsed: false,
|
|
64
|
+
outcome: "ok",
|
|
62
65
|
request: unlocker.request,
|
|
63
66
|
};
|
|
64
67
|
}
|
|
65
68
|
const unlockerText = (0, unlocker_1.htmlToText)(unlocker.content);
|
|
66
69
|
const assessment = assessReadableContent(unlocker.content, unlockerText);
|
|
70
|
+
const outcomeAssessment = (0, pageOutcome_1.assessReadablePageOutcome)(unlocker.content, unlockerText, {
|
|
71
|
+
looksIncomplete: assessment.shouldFallback,
|
|
72
|
+
incompleteReason: assessment.reason,
|
|
73
|
+
});
|
|
67
74
|
if (!assessment.shouldFallback) {
|
|
68
75
|
return {
|
|
69
76
|
html: unlocker.content,
|
|
70
77
|
renderSource: "unlocker",
|
|
71
78
|
fallbackAttempted: false,
|
|
72
79
|
fallbackUsed: false,
|
|
80
|
+
outcome: outcomeAssessment.outcome,
|
|
81
|
+
outcomeReason: outcomeAssessment.reason,
|
|
82
|
+
nextActionHint: outcomeAssessment.nextActionHint,
|
|
83
|
+
warning: outcomeAssessment.warning,
|
|
73
84
|
request: unlocker.request,
|
|
74
85
|
};
|
|
75
86
|
}
|
|
@@ -80,6 +91,10 @@ async function readRenderedHtmlContent(url, config, apiKey, options = {}) {
|
|
|
80
91
|
fallbackAttempted: true,
|
|
81
92
|
fallbackUsed: false,
|
|
82
93
|
fallbackReason: `${assessment.reason}; GOLOGIN_TOKEN is not configured`,
|
|
94
|
+
outcome: outcomeAssessment.outcome,
|
|
95
|
+
outcomeReason: outcomeAssessment.reason,
|
|
96
|
+
nextActionHint: outcomeAssessment.nextActionHint,
|
|
97
|
+
warning: outcomeAssessment.warning,
|
|
83
98
|
request: unlocker.request,
|
|
84
99
|
};
|
|
85
100
|
}
|
|
@@ -92,6 +107,7 @@ async function readRenderedHtmlContent(url, config, apiKey, options = {}) {
|
|
|
92
107
|
fallbackAttempted: true,
|
|
93
108
|
fallbackUsed: true,
|
|
94
109
|
fallbackReason: assessment.reason,
|
|
110
|
+
outcome: "ok",
|
|
95
111
|
request: unlocker.request,
|
|
96
112
|
};
|
|
97
113
|
}
|
|
@@ -150,27 +166,40 @@ async function readReadableContent(url, config, apiKey, options) {
|
|
|
150
166
|
renderSource: "browser",
|
|
151
167
|
fallbackAttempted: false,
|
|
152
168
|
fallbackUsed: false,
|
|
169
|
+
outcome: "ok",
|
|
153
170
|
};
|
|
154
171
|
}
|
|
155
172
|
const unlocker = await (0, unlocker_1.scrapeRenderedHtml)(url, apiKey, options.request);
|
|
156
173
|
const readable = extractReadableSegmentFromHtml(unlocker.content);
|
|
157
174
|
const unlockerContent = formatReadableContent(options.format, readable.html, readable.text);
|
|
175
|
+
const assessment = assessReadableContent(unlocker.content, unlockerContent);
|
|
176
|
+
const outcomeAssessment = (0, pageOutcome_1.assessReadablePageOutcome)(unlocker.content, unlockerContent, {
|
|
177
|
+
looksIncomplete: assessment.shouldFallback,
|
|
178
|
+
incompleteReason: assessment.reason,
|
|
179
|
+
});
|
|
158
180
|
if (source === "unlocker") {
|
|
159
181
|
return {
|
|
160
182
|
content: unlockerContent,
|
|
161
183
|
renderSource: "unlocker",
|
|
162
184
|
fallbackAttempted: false,
|
|
163
185
|
fallbackUsed: false,
|
|
186
|
+
outcome: outcomeAssessment.outcome,
|
|
187
|
+
outcomeReason: outcomeAssessment.reason,
|
|
188
|
+
nextActionHint: outcomeAssessment.nextActionHint,
|
|
189
|
+
warning: outcomeAssessment.warning,
|
|
164
190
|
request: unlocker.request,
|
|
165
191
|
};
|
|
166
192
|
}
|
|
167
|
-
const assessment = assessReadableContent(unlocker.content, unlockerContent);
|
|
168
193
|
if (!assessment.shouldFallback) {
|
|
169
194
|
return {
|
|
170
195
|
content: unlockerContent,
|
|
171
196
|
renderSource: "unlocker",
|
|
172
197
|
fallbackAttempted: false,
|
|
173
198
|
fallbackUsed: false,
|
|
199
|
+
outcome: outcomeAssessment.outcome,
|
|
200
|
+
outcomeReason: outcomeAssessment.reason,
|
|
201
|
+
nextActionHint: outcomeAssessment.nextActionHint,
|
|
202
|
+
warning: outcomeAssessment.warning,
|
|
174
203
|
request: unlocker.request,
|
|
175
204
|
};
|
|
176
205
|
}
|
|
@@ -181,6 +210,10 @@ async function readReadableContent(url, config, apiKey, options) {
|
|
|
181
210
|
fallbackAttempted: true,
|
|
182
211
|
fallbackUsed: false,
|
|
183
212
|
fallbackReason: `${assessment.reason}; GOLOGIN_TOKEN is not configured`,
|
|
213
|
+
outcome: outcomeAssessment.outcome,
|
|
214
|
+
outcomeReason: outcomeAssessment.reason,
|
|
215
|
+
nextActionHint: outcomeAssessment.nextActionHint,
|
|
216
|
+
warning: outcomeAssessment.warning,
|
|
184
217
|
request: unlocker.request,
|
|
185
218
|
};
|
|
186
219
|
}
|
|
@@ -195,6 +228,10 @@ async function readReadableContent(url, config, apiKey, options) {
|
|
|
195
228
|
fallbackAttempted: true,
|
|
196
229
|
fallbackUsed: false,
|
|
197
230
|
fallbackReason: "Browser fallback did not improve readable output",
|
|
231
|
+
outcome: outcomeAssessment.outcome,
|
|
232
|
+
outcomeReason: outcomeAssessment.reason,
|
|
233
|
+
nextActionHint: outcomeAssessment.nextActionHint,
|
|
234
|
+
warning: outcomeAssessment.warning,
|
|
198
235
|
request: unlocker.request,
|
|
199
236
|
};
|
|
200
237
|
}
|
|
@@ -204,6 +241,7 @@ async function readReadableContent(url, config, apiKey, options) {
|
|
|
204
241
|
fallbackAttempted: true,
|
|
205
242
|
fallbackUsed: true,
|
|
206
243
|
fallbackReason: assessment.reason,
|
|
244
|
+
outcome: "ok",
|
|
207
245
|
request: unlocker.request,
|
|
208
246
|
};
|
|
209
247
|
}
|
|
@@ -9,21 +9,27 @@ exports.detectStructuredBlockReason = detectStructuredBlockReason;
|
|
|
9
9
|
const config_1 = require("../config");
|
|
10
10
|
const errors_1 = require("./errors");
|
|
11
11
|
const browserStructured_1 = require("./browserStructured");
|
|
12
|
+
const pageOutcome_1 = require("./pageOutcome");
|
|
12
13
|
const unlocker_1 = require("./unlocker");
|
|
13
14
|
class StructuredBlockedPageError extends errors_1.CliError {
|
|
14
15
|
status;
|
|
15
16
|
request;
|
|
16
|
-
|
|
17
|
-
|
|
17
|
+
outcome;
|
|
18
|
+
nextActionHint;
|
|
19
|
+
constructor(url, status, request, outcome, reason, nextActionHint, options) {
|
|
20
|
+
super(`Structured scrape returned ${outcome.replace(/_/g, " ")} content for ${url}.`, 1, [
|
|
18
21
|
`Reason: ${reason}.`,
|
|
19
22
|
options.fallbackAttempted
|
|
20
23
|
? options.fallbackUsed
|
|
21
24
|
? "Browser fallback was used but the page still looked blocked."
|
|
22
25
|
: `Browser fallback was attempted but not used. ${options.fallbackReason ?? "It did not improve the result."}`
|
|
23
|
-
:
|
|
24
|
-
|
|
26
|
+
: (0, pageOutcome_1.describeNextActionHint)(nextActionHint) ??
|
|
27
|
+
"Retry with --fallback browser, use read --source browser, or switch to gologin-local-agent-browser for full rendered DOM.",
|
|
28
|
+
].join("\n"), outcomeToErrorCode(outcome));
|
|
25
29
|
this.status = status;
|
|
26
30
|
this.request = request;
|
|
31
|
+
this.outcome = outcome;
|
|
32
|
+
this.nextActionHint = nextActionHint;
|
|
27
33
|
}
|
|
28
34
|
}
|
|
29
35
|
async function scrapeStructuredJson(url, config, apiKey, options = {}) {
|
|
@@ -34,7 +40,7 @@ async function scrapeStructuredJson(url, config, apiKey, options = {}) {
|
|
|
34
40
|
let fallbackAttempted = false;
|
|
35
41
|
let fallbackUsed = false;
|
|
36
42
|
let fallbackReason;
|
|
37
|
-
let { browserRecommended, warning } =
|
|
43
|
+
let { outcome, reason, nextActionHint, browserRecommended, warning } = (0, pageOutcome_1.assessStructuredPageOutcome)(data);
|
|
38
44
|
if (fallbackMode === "browser" && shouldUseBrowserFallback(data)) {
|
|
39
45
|
fallbackAttempted = true;
|
|
40
46
|
(0, config_1.requireCloudToken)(config);
|
|
@@ -46,16 +52,14 @@ async function scrapeStructuredJson(url, config, apiKey, options = {}) {
|
|
|
46
52
|
renderSource = "browser";
|
|
47
53
|
fallbackUsed = true;
|
|
48
54
|
fallbackReason = "unlocker structured data looked incomplete";
|
|
49
|
-
browserRecommended =
|
|
50
|
-
warning = undefined;
|
|
55
|
+
({ outcome, reason, nextActionHint, browserRecommended, warning } = (0, pageOutcome_1.assessStructuredPageOutcome)(data));
|
|
51
56
|
}
|
|
52
57
|
else {
|
|
53
58
|
fallbackReason = "browser fallback did not improve structured output";
|
|
54
59
|
}
|
|
55
60
|
}
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
throw new StructuredBlockedPageError(url, result.status, result.request, blockedReason, {
|
|
61
|
+
if (outcome === "authwall" || outcome === "challenge" || outcome === "blocked" || outcome === "cookie_wall") {
|
|
62
|
+
throw new StructuredBlockedPageError(url, result.status, result.request, outcome, reason ?? "Outcome matched page markers", nextActionHint, {
|
|
59
63
|
fallbackAttempted,
|
|
60
64
|
fallbackUsed,
|
|
61
65
|
fallbackReason,
|
|
@@ -63,6 +67,9 @@ async function scrapeStructuredJson(url, config, apiKey, options = {}) {
|
|
|
63
67
|
}
|
|
64
68
|
return makeStructuredScrapeEnvelope(url, result, data, {
|
|
65
69
|
renderSource,
|
|
70
|
+
outcome,
|
|
71
|
+
outcomeReason: reason,
|
|
72
|
+
nextActionHint,
|
|
66
73
|
fallbackAttempted,
|
|
67
74
|
fallbackUsed,
|
|
68
75
|
fallbackReason,
|
|
@@ -74,6 +81,9 @@ function makeStructuredScrapeEnvelope(url, result, data, options = {}) {
|
|
|
74
81
|
return {
|
|
75
82
|
url,
|
|
76
83
|
status: result.status,
|
|
84
|
+
outcome: options.outcome ?? "ok",
|
|
85
|
+
outcomeReason: options.outcomeReason,
|
|
86
|
+
nextActionHint: options.nextActionHint,
|
|
77
87
|
renderSource: options.renderSource ?? "unlocker",
|
|
78
88
|
fallbackAttempted: options.fallbackAttempted ?? false,
|
|
79
89
|
fallbackUsed: options.fallbackUsed ?? false,
|
|
@@ -94,37 +104,18 @@ function normalizeStructuredFallbackMode(value) {
|
|
|
94
104
|
throw new Error(`Unsupported scrape-json fallback mode: ${value}`);
|
|
95
105
|
}
|
|
96
106
|
function shouldUseBrowserFallback(data) {
|
|
97
|
-
|
|
98
|
-
return true;
|
|
99
|
-
}
|
|
100
|
-
const firstH1 = data.headingsByLevel.h1[0];
|
|
101
|
-
if (!firstH1) {
|
|
102
|
-
return true;
|
|
103
|
-
}
|
|
104
|
-
return looksSuspiciousHeadingText(firstH1);
|
|
107
|
+
return (0, pageOutcome_1.assessStructuredPageOutcome)(data).outcome !== "ok";
|
|
105
108
|
}
|
|
106
109
|
function buildStructuredFallbackAdvisory(data) {
|
|
107
|
-
const
|
|
108
|
-
if (blockedReason) {
|
|
109
|
-
return {
|
|
110
|
-
browserRecommended: true,
|
|
111
|
-
warning: `Structured output looks blocked or challenge-gated (${blockedReason}). Retry with --fallback browser or use a rendered browser path.`,
|
|
112
|
-
};
|
|
113
|
-
}
|
|
114
|
-
if (!shouldUseBrowserFallback(data)) {
|
|
115
|
-
return { browserRecommended: false };
|
|
116
|
-
}
|
|
110
|
+
const assessment = (0, pageOutcome_1.assessStructuredPageOutcome)(data);
|
|
117
111
|
return {
|
|
118
|
-
browserRecommended:
|
|
119
|
-
warning:
|
|
112
|
+
browserRecommended: assessment.browserRecommended,
|
|
113
|
+
warning: assessment.warning,
|
|
120
114
|
};
|
|
121
115
|
}
|
|
122
|
-
function looksSuspiciousHeadingText(value) {
|
|
123
|
-
return /function\s*\(|window\.|document\.|const\s+|let\s+|var\s+|=>|import\s+/i.test(value) || value.length > 240;
|
|
124
|
-
}
|
|
125
116
|
function isBrowserDataBetter(current, candidate) {
|
|
126
|
-
const currentBlocked = Boolean(
|
|
127
|
-
const candidateBlocked = Boolean(
|
|
117
|
+
const currentBlocked = Boolean((0, pageOutcome_1.detectStructuredBlockedReason)(current));
|
|
118
|
+
const candidateBlocked = Boolean((0, pageOutcome_1.detectStructuredBlockedReason)(candidate));
|
|
128
119
|
if (currentBlocked !== candidateBlocked) {
|
|
129
120
|
return currentBlocked && !candidateBlocked;
|
|
130
121
|
}
|
|
@@ -140,27 +131,25 @@ function isBrowserDataBetter(current, candidate) {
|
|
|
140
131
|
return false;
|
|
141
132
|
}
|
|
142
133
|
function detectStructuredBlockReason(data) {
|
|
143
|
-
|
|
144
|
-
data
|
|
145
|
-
data.description,
|
|
146
|
-
...data.headingsByLevel.h1.slice(0, 2),
|
|
147
|
-
...data.headingsByLevel.h2.slice(0, 2),
|
|
148
|
-
].filter((value) => Boolean(value && value.trim()));
|
|
149
|
-
for (const candidate of candidates) {
|
|
150
|
-
const reason = classifyBlockedText(candidate);
|
|
151
|
-
if (reason) {
|
|
152
|
-
return reason;
|
|
153
|
-
}
|
|
154
|
-
}
|
|
155
|
-
return undefined;
|
|
134
|
+
return ((0, pageOutcome_1.assessStructuredPageOutcome)(data).reason ??
|
|
135
|
+
(0, pageOutcome_1.detectStructuredBlockedReason)(data));
|
|
156
136
|
}
|
|
157
|
-
function
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
137
|
+
function outcomeToErrorCode(outcome) {
|
|
138
|
+
switch (outcome) {
|
|
139
|
+
case "authwall":
|
|
140
|
+
return "AUTHWALL_PAGE";
|
|
141
|
+
case "challenge":
|
|
142
|
+
return "CHALLENGE_PAGE";
|
|
143
|
+
case "cookie_wall":
|
|
144
|
+
return "COOKIE_WALL_PAGE";
|
|
145
|
+
case "blocked":
|
|
146
|
+
return "BLOCKED_PAGE";
|
|
147
|
+
case "empty":
|
|
148
|
+
return "EMPTY_PAGE";
|
|
149
|
+
case "incomplete":
|
|
150
|
+
return "INCOMPLETE_PAGE";
|
|
151
|
+
case "ok":
|
|
152
|
+
default:
|
|
153
|
+
return "PAGE_OUTCOME";
|
|
164
154
|
}
|
|
165
|
-
return undefined;
|
|
166
155
|
}
|
package/package.json
CHANGED