@aluvia/sdk 1.4.0 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +188 -0
- package/README.md +162 -477
- package/dist/cjs/api/apiUtils.js +4 -1
- package/dist/cjs/client/AluviaClient.js +30 -32
- package/dist/cjs/client/BlockDetection.js +69 -87
- package/dist/cjs/client/rules.js +12 -2
- package/dist/cjs/connect.js +2 -2
- package/dist/cjs/index.js +12 -1
- package/dist/cjs/session/lock.js +40 -4
- package/dist/esm/api/apiUtils.js +4 -1
- package/dist/esm/client/AluviaClient.js +38 -40
- package/dist/esm/client/BlockDetection.js +69 -87
- package/dist/esm/client/rules.js +12 -2
- package/dist/esm/connect.js +2 -2
- package/dist/esm/index.js +6 -4
- package/dist/esm/session/lock.js +40 -4
- package/dist/types/client/AluviaClient.d.ts +2 -2
- package/dist/types/client/BlockDetection.d.ts +4 -4
- package/dist/types/client/types.d.ts +11 -11
- package/dist/types/index.d.ts +9 -7
- package/package.json +15 -23
- package/dist/cjs/bin/account.js +0 -31
- package/dist/cjs/bin/api-helpers.js +0 -58
- package/dist/cjs/bin/cli-adapter.js +0 -16
- package/dist/cjs/bin/cli.js +0 -245
- package/dist/cjs/bin/close.js +0 -120
- package/dist/cjs/bin/geos.js +0 -10
- package/dist/cjs/bin/mcp-helpers.js +0 -57
- package/dist/cjs/bin/mcp-server.js +0 -220
- package/dist/cjs/bin/mcp-tools.js +0 -90
- package/dist/cjs/bin/open.js +0 -293
- package/dist/cjs/bin/session.js +0 -259
- package/dist/cjs/client/PageLoadDetection.js +0 -175
- package/dist/esm/bin/account.js +0 -28
- package/dist/esm/bin/api-helpers.js +0 -53
- package/dist/esm/bin/cli-adapter.js +0 -8
- package/dist/esm/bin/cli.js +0 -242
- package/dist/esm/bin/close.js +0 -117
- package/dist/esm/bin/geos.js +0 -7
- package/dist/esm/bin/mcp-helpers.js +0 -51
- package/dist/esm/bin/mcp-server.js +0 -185
- package/dist/esm/bin/mcp-tools.js +0 -78
- package/dist/esm/bin/open.js +0 -256
- package/dist/esm/bin/session.js +0 -252
- package/dist/esm/client/PageLoadDetection.js +0 -171
- package/dist/types/bin/account.d.ts +0 -1
- package/dist/types/bin/api-helpers.d.ts +0 -20
- package/dist/types/bin/cli-adapter.d.ts +0 -8
- package/dist/types/bin/cli.d.ts +0 -2
- package/dist/types/bin/close.d.ts +0 -1
- package/dist/types/bin/geos.d.ts +0 -1
- package/dist/types/bin/mcp-helpers.d.ts +0 -28
- package/dist/types/bin/mcp-server.d.ts +0 -2
- package/dist/types/bin/mcp-tools.d.ts +0 -46
- package/dist/types/bin/open.d.ts +0 -21
- package/dist/types/bin/session.d.ts +0 -11
- package/dist/types/client/PageLoadDetection.d.ts +0 -93
package/dist/cjs/session/lock.js
CHANGED
|
@@ -47,12 +47,48 @@ const path = __importStar(require("node:path"));
|
|
|
47
47
|
const os = __importStar(require("node:os"));
|
|
48
48
|
const LOCK_DIR = path.join(os.tmpdir(), 'aluvia-sdk');
|
|
49
49
|
const ADJECTIVES = [
|
|
50
|
-
'swift',
|
|
51
|
-
'
|
|
50
|
+
'swift',
|
|
51
|
+
'bold',
|
|
52
|
+
'calm',
|
|
53
|
+
'keen',
|
|
54
|
+
'warm',
|
|
55
|
+
'bright',
|
|
56
|
+
'silent',
|
|
57
|
+
'rapid',
|
|
58
|
+
'steady',
|
|
59
|
+
'clever',
|
|
60
|
+
'vivid',
|
|
61
|
+
'agile',
|
|
62
|
+
'noble',
|
|
63
|
+
'lucid',
|
|
64
|
+
'crisp',
|
|
65
|
+
'gentle',
|
|
66
|
+
'fierce',
|
|
67
|
+
'nimble',
|
|
68
|
+
'sturdy',
|
|
69
|
+
'witty',
|
|
52
70
|
];
|
|
53
71
|
const NOUNS = [
|
|
54
|
-
'falcon',
|
|
55
|
-
'
|
|
72
|
+
'falcon',
|
|
73
|
+
'tiger',
|
|
74
|
+
'river',
|
|
75
|
+
'maple',
|
|
76
|
+
'coral',
|
|
77
|
+
'cedar',
|
|
78
|
+
'orbit',
|
|
79
|
+
'prism',
|
|
80
|
+
'flint',
|
|
81
|
+
'spark',
|
|
82
|
+
'ridge',
|
|
83
|
+
'ember',
|
|
84
|
+
'crane',
|
|
85
|
+
'grove',
|
|
86
|
+
'stone',
|
|
87
|
+
'brook',
|
|
88
|
+
'drift',
|
|
89
|
+
'crest',
|
|
90
|
+
'sage',
|
|
91
|
+
'lynx',
|
|
56
92
|
];
|
|
57
93
|
function lockFileName(sessionName) {
|
|
58
94
|
return `cli-${sessionName ?? 'default'}.lock`;
|
package/dist/esm/api/apiUtils.js
CHANGED
|
@@ -14,7 +14,10 @@ export function asErrorEnvelope(value) {
|
|
|
14
14
|
const message = error['message'];
|
|
15
15
|
if (typeof code !== 'string' || typeof message !== 'string')
|
|
16
16
|
return null;
|
|
17
|
-
return {
|
|
17
|
+
return {
|
|
18
|
+
success: false,
|
|
19
|
+
error: { code, message, details: error['details'] },
|
|
20
|
+
};
|
|
18
21
|
}
|
|
19
22
|
export function formatErrorDetails(details) {
|
|
20
23
|
if (details == null)
|
|
@@ -1,12 +1,12 @@
|
|
|
1
1
|
// AluviaClient - Main public class for Aluvia Client
|
|
2
|
-
import { ConfigManager } from
|
|
3
|
-
import { ProxyServer } from
|
|
4
|
-
import { ApiError, MissingApiKeyError } from
|
|
5
|
-
import { createNodeProxyAgents, createUndiciDispatcher, createUndiciFetch, toAxiosConfig, toGotOptions, toPlaywrightProxySettings, toPuppeteerArgs, toSeleniumArgs, } from
|
|
6
|
-
import { Logger } from
|
|
7
|
-
import { AluviaApi } from
|
|
8
|
-
import { BlockDetection
|
|
9
|
-
import * as net from
|
|
2
|
+
import { ConfigManager } from './ConfigManager.js';
|
|
3
|
+
import { ProxyServer } from './ProxyServer.js';
|
|
4
|
+
import { ApiError, MissingApiKeyError } from '../errors.js';
|
|
5
|
+
import { createNodeProxyAgents, createUndiciDispatcher, createUndiciFetch, toAxiosConfig, toGotOptions, toPlaywrightProxySettings, toPuppeteerArgs, toSeleniumArgs, } from './adapters.js';
|
|
6
|
+
import { Logger } from './logger.js';
|
|
7
|
+
import { AluviaApi } from '../api/AluviaApi.js';
|
|
8
|
+
import { BlockDetection } from './BlockDetection.js';
|
|
9
|
+
import * as net from 'node:net';
|
|
10
10
|
/**
|
|
11
11
|
* AluviaClient is the main entry point for the Aluvia Client.
|
|
12
12
|
*
|
|
@@ -25,9 +25,9 @@ export class AluviaClient {
|
|
|
25
25
|
this.pageStates = new WeakMap();
|
|
26
26
|
/** Promise-based mutex to serialize handleDetectionResult's critical section. */
|
|
27
27
|
this._detectionMutex = Promise.resolve();
|
|
28
|
-
const apiKey = String(options.apiKey ??
|
|
28
|
+
const apiKey = String(options.apiKey ?? '').trim();
|
|
29
29
|
if (!apiKey) {
|
|
30
|
-
throw new MissingApiKeyError(
|
|
30
|
+
throw new MissingApiKeyError('Aluvia apiKey is required');
|
|
31
31
|
}
|
|
32
32
|
const strict = options.strict ?? true;
|
|
33
33
|
this.options = { ...options, apiKey, strict };
|
|
@@ -35,12 +35,12 @@ export class AluviaClient {
|
|
|
35
35
|
if (connectionId !== undefined && !Number.isFinite(connectionId)) {
|
|
36
36
|
throw new Error('connectionId must be a finite number');
|
|
37
37
|
}
|
|
38
|
-
const apiBaseUrl = options.apiBaseUrl ??
|
|
38
|
+
const apiBaseUrl = options.apiBaseUrl ?? 'https://api.aluvia.io/v1';
|
|
39
39
|
const pollIntervalMs = Math.max(options.pollIntervalMs ?? 5000, 1000);
|
|
40
40
|
const timeoutMs = options.timeoutMs;
|
|
41
|
-
const gatewayProtocol = options.gatewayProtocol ??
|
|
42
|
-
const gatewayPort = options.gatewayPort ?? (gatewayProtocol ===
|
|
43
|
-
const logLevel = options.logLevel ??
|
|
41
|
+
const gatewayProtocol = options.gatewayProtocol ?? 'http';
|
|
42
|
+
const gatewayPort = options.gatewayPort ?? (gatewayProtocol === 'https' ? 8443 : 8080);
|
|
43
|
+
const logLevel = options.logLevel ?? 'info';
|
|
44
44
|
this.logger = new Logger(logLevel);
|
|
45
45
|
// Create ConfigManager
|
|
46
46
|
this.configManager = new ConfigManager({
|
|
@@ -62,7 +62,7 @@ export class AluviaClient {
|
|
|
62
62
|
});
|
|
63
63
|
// Initialize block detection if configured
|
|
64
64
|
if (options.blockDetection !== undefined || options.startPlaywright) {
|
|
65
|
-
this.logger.debug(
|
|
65
|
+
this.logger.debug('Initializing block detection');
|
|
66
66
|
const detectionConfig = options.blockDetection ?? { enabled: true };
|
|
67
67
|
this.blockDetection = new BlockDetection(detectionConfig, this.logger);
|
|
68
68
|
}
|
|
@@ -79,10 +79,9 @@ export class AluviaClient {
|
|
|
79
79
|
};
|
|
80
80
|
this.pageStates.set(page, pageState);
|
|
81
81
|
// Capture navigation responses on main frame
|
|
82
|
-
page.on(
|
|
82
|
+
page.on('response', (response) => {
|
|
83
83
|
try {
|
|
84
|
-
if (response.request().isNavigationRequest() &&
|
|
85
|
-
response.request().frame() === page.mainFrame()) {
|
|
84
|
+
if (response.request().isNavigationRequest() && response.request().frame() === page.mainFrame()) {
|
|
86
85
|
pageState.lastResponse = response;
|
|
87
86
|
pageState.skipFullPass = false;
|
|
88
87
|
pageState.fastResult = null;
|
|
@@ -93,7 +92,7 @@ export class AluviaClient {
|
|
|
93
92
|
}
|
|
94
93
|
});
|
|
95
94
|
// Fast pass at domcontentloaded
|
|
96
|
-
page.on(
|
|
95
|
+
page.on('domcontentloaded', async () => {
|
|
97
96
|
if (!this.blockDetection)
|
|
98
97
|
return;
|
|
99
98
|
try {
|
|
@@ -110,13 +109,13 @@ export class AluviaClient {
|
|
|
110
109
|
}
|
|
111
110
|
});
|
|
112
111
|
// Full pass at load
|
|
113
|
-
page.on(
|
|
112
|
+
page.on('load', async () => {
|
|
114
113
|
if (!this.blockDetection || pageState.skipFullPass)
|
|
115
114
|
return;
|
|
116
115
|
try {
|
|
117
116
|
// Wait for networkidle with timeout cap
|
|
118
117
|
try {
|
|
119
|
-
await page.waitForLoadState(
|
|
118
|
+
await page.waitForLoadState('networkidle', {
|
|
120
119
|
timeout: this.blockDetection.getNetworkIdleTimeoutMs(),
|
|
121
120
|
});
|
|
122
121
|
}
|
|
@@ -132,7 +131,7 @@ export class AluviaClient {
|
|
|
132
131
|
}
|
|
133
132
|
});
|
|
134
133
|
// SPA detection via framenavigated
|
|
135
|
-
page.on(
|
|
134
|
+
page.on('framenavigated', async (frame) => {
|
|
136
135
|
if (!this.blockDetection)
|
|
137
136
|
return;
|
|
138
137
|
try {
|
|
@@ -161,14 +160,14 @@ export class AluviaClient {
|
|
|
161
160
|
* Attaches page listeners to all existing and future pages in a context.
|
|
162
161
|
*/
|
|
163
162
|
attachBlockDetectionListener(context) {
|
|
164
|
-
this.logger.debug(
|
|
163
|
+
this.logger.debug('Attaching block detection listener to context');
|
|
165
164
|
// Attach to existing pages
|
|
166
165
|
try {
|
|
167
166
|
const existingPages = context.pages();
|
|
168
167
|
for (const page of existingPages) {
|
|
169
168
|
this.attachPageListeners(page);
|
|
170
169
|
// Check if page has already loaded (not about:blank)
|
|
171
|
-
if (page.url() !==
|
|
170
|
+
if (page.url() !== 'about:blank' && this.blockDetection) {
|
|
172
171
|
this.blockDetection
|
|
173
172
|
.analyzeFull(page, null)
|
|
174
173
|
.then((result) => {
|
|
@@ -184,7 +183,7 @@ export class AluviaClient {
|
|
|
184
183
|
// Ignore errors
|
|
185
184
|
}
|
|
186
185
|
// Attach to future pages
|
|
187
|
-
context.on(
|
|
186
|
+
context.on('page', (page) => {
|
|
188
187
|
this.logger.debug(`New page detected: ${page.url()}`);
|
|
189
188
|
this.attachPageListeners(page);
|
|
190
189
|
});
|
|
@@ -218,15 +217,16 @@ export class AluviaClient {
|
|
|
218
217
|
if (!this.blockDetection.isAutoUnblock())
|
|
219
218
|
return;
|
|
220
219
|
// Check if auto-reload should fire for this blockStatus
|
|
221
|
-
const shouldReload = result.blockStatus ===
|
|
222
|
-
(result.blockStatus ===
|
|
223
|
-
this.blockDetection.isAutoUnblockOnSuspected());
|
|
220
|
+
const shouldReload = result.blockStatus === 'blocked' ||
|
|
221
|
+
(result.blockStatus === 'suspected' && this.blockDetection.isAutoUnblockOnSuspected());
|
|
224
222
|
if (!shouldReload)
|
|
225
223
|
return;
|
|
226
224
|
// Serialize the critical section: persistent-block state, rule updates, reload.
|
|
227
225
|
// This prevents concurrent handlers from reading stale retriedUrls/persistentHostnames.
|
|
228
226
|
let release;
|
|
229
|
-
const gate = new Promise((resolve) => {
|
|
227
|
+
const gate = new Promise((resolve) => {
|
|
228
|
+
release = resolve;
|
|
229
|
+
});
|
|
230
230
|
const acquired = this._detectionMutex;
|
|
231
231
|
this._detectionMutex = this._detectionMutex.then(() => gate);
|
|
232
232
|
await acquired;
|
|
@@ -302,9 +302,7 @@ export class AluviaClient {
|
|
|
302
302
|
this.startPromise = (async () => {
|
|
303
303
|
// Fetch initial configuration (may throw InvalidApiKeyError or ApiError)
|
|
304
304
|
await this.configManager.init();
|
|
305
|
-
const browserInstance = this.options.startPlaywright
|
|
306
|
-
? await this._initPlaywright()
|
|
307
|
-
: undefined;
|
|
305
|
+
const browserInstance = this.options.startPlaywright ? await this._initPlaywright() : undefined;
|
|
308
306
|
// Keep config fresh so routing decisions update without restarting.
|
|
309
307
|
this.configManager.startPolling();
|
|
310
308
|
try {
|
|
@@ -329,10 +327,10 @@ export class AluviaClient {
|
|
|
329
327
|
if (!d)
|
|
330
328
|
return;
|
|
331
329
|
try {
|
|
332
|
-
if (typeof d.close ===
|
|
330
|
+
if (typeof d.close === 'function') {
|
|
333
331
|
await d.close();
|
|
334
332
|
}
|
|
335
|
-
else if (typeof d.destroy ===
|
|
333
|
+
else if (typeof d.destroy === 'function') {
|
|
336
334
|
d.destroy();
|
|
337
335
|
}
|
|
338
336
|
}
|
|
@@ -510,20 +508,20 @@ export class AluviaClient {
|
|
|
510
508
|
*/
|
|
511
509
|
async _initPlaywright() {
|
|
512
510
|
try {
|
|
513
|
-
const pw = await import(
|
|
511
|
+
const pw = await import('playwright');
|
|
514
512
|
// @ts-ignore
|
|
515
513
|
return pw.chromium;
|
|
516
514
|
}
|
|
517
515
|
catch {
|
|
518
516
|
// Playwright not installed — attempt auto-install
|
|
519
|
-
this.logger.info(
|
|
520
|
-
const { execSync } = await import(
|
|
517
|
+
this.logger.info('Playwright not found. Installing playwright...');
|
|
518
|
+
const { execSync } = await import('node:child_process');
|
|
521
519
|
try {
|
|
522
|
-
execSync(
|
|
523
|
-
stdio:
|
|
520
|
+
execSync('npm install playwright', {
|
|
521
|
+
stdio: 'inherit',
|
|
524
522
|
cwd: process.cwd(),
|
|
525
523
|
});
|
|
526
|
-
const pw = await import(
|
|
524
|
+
const pw = await import('playwright');
|
|
527
525
|
// @ts-ignore
|
|
528
526
|
return pw.chromium;
|
|
529
527
|
}
|
|
@@ -1,46 +1,35 @@
|
|
|
1
1
|
// BlockDetection - Website block detection with weighted scoring
|
|
2
2
|
const DEFAULT_CHALLENGE_SELECTORS = [
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
3
|
+
'#challenge-form',
|
|
4
|
+
'#challenge-running',
|
|
5
|
+
'.cf-browser-verification',
|
|
6
6
|
'iframe[src*="recaptcha"]',
|
|
7
|
-
|
|
8
|
-
|
|
7
|
+
'.g-recaptcha',
|
|
8
|
+
'#px-captcha',
|
|
9
9
|
'iframe[src*="hcaptcha"]',
|
|
10
|
-
|
|
10
|
+
'.h-captcha',
|
|
11
11
|
];
|
|
12
12
|
const TITLE_KEYWORDS = [
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
];
|
|
20
|
-
const STRONG_TEXT_KEYWORDS = [
|
|
21
|
-
"captcha",
|
|
22
|
-
"access denied",
|
|
23
|
-
"verify you are human",
|
|
24
|
-
"bot detection",
|
|
25
|
-
];
|
|
26
|
-
const WEAK_TEXT_KEYWORDS = [
|
|
27
|
-
"blocked",
|
|
28
|
-
"forbidden",
|
|
29
|
-
"cloudflare",
|
|
30
|
-
"please verify",
|
|
31
|
-
"unusual activity",
|
|
13
|
+
'access denied',
|
|
14
|
+
'blocked',
|
|
15
|
+
'forbidden',
|
|
16
|
+
'security check',
|
|
17
|
+
'attention required',
|
|
18
|
+
'just a moment',
|
|
32
19
|
];
|
|
20
|
+
const STRONG_TEXT_KEYWORDS = ['captcha', 'access denied', 'verify you are human', 'bot detection'];
|
|
21
|
+
const WEAK_TEXT_KEYWORDS = ['blocked', 'forbidden', 'cloudflare', 'please verify', 'unusual activity'];
|
|
33
22
|
function escapeRegex(str) {
|
|
34
|
-
return str.replace(/[.*+?^${}()|[\]\\]/g,
|
|
23
|
+
return str.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
|
|
35
24
|
}
|
|
36
25
|
const WEAK_TEXT_REGEXES = WEAK_TEXT_KEYWORDS.map((keyword) => ({
|
|
37
26
|
keyword,
|
|
38
|
-
regex: new RegExp(
|
|
27
|
+
regex: new RegExp('\\b' + escapeRegex(keyword) + '\\b', 'i'),
|
|
39
28
|
}));
|
|
40
29
|
const CHALLENGE_DOMAIN_PATTERNS = [
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
30
|
+
'/cdn-cgi/challenge-platform/',
|
|
31
|
+
'challenges.cloudflare.com',
|
|
32
|
+
'geo.captcha-delivery.com',
|
|
44
33
|
];
|
|
45
34
|
/**
|
|
46
35
|
* BlockDetection handles detection of website blocks, CAPTCHAs, and WAF challenges
|
|
@@ -84,9 +73,9 @@ export class BlockDetection {
|
|
|
84
73
|
// --- Scoring Engine ---
|
|
85
74
|
computeScore(signals) {
|
|
86
75
|
if (signals.length === 0)
|
|
87
|
-
return { score: 0, blockStatus:
|
|
76
|
+
return { score: 0, blockStatus: 'clear' };
|
|
88
77
|
const score = 1 - signals.reduce((product, s) => product * (1 - s.weight), 1);
|
|
89
|
-
const blockStatus = score >= 0.7 ?
|
|
78
|
+
const blockStatus = score >= 0.7 ? 'blocked' : score >= 0.4 ? 'suspected' : 'clear';
|
|
90
79
|
return { score, blockStatus };
|
|
91
80
|
}
|
|
92
81
|
// --- Fast-pass Signal Detectors ---
|
|
@@ -99,15 +88,15 @@ export class BlockDetection {
|
|
|
99
88
|
name: `http_status_${status}`,
|
|
100
89
|
weight: 0.85,
|
|
101
90
|
details: `HTTP ${status} response`,
|
|
102
|
-
source:
|
|
91
|
+
source: 'fast',
|
|
103
92
|
};
|
|
104
93
|
}
|
|
105
94
|
if (status === 503) {
|
|
106
95
|
return {
|
|
107
|
-
name:
|
|
96
|
+
name: 'http_status_503',
|
|
108
97
|
weight: 0.6,
|
|
109
|
-
details:
|
|
110
|
-
source:
|
|
98
|
+
details: 'HTTP 503 response',
|
|
99
|
+
source: 'fast',
|
|
111
100
|
};
|
|
112
101
|
}
|
|
113
102
|
return null;
|
|
@@ -118,23 +107,22 @@ export class BlockDetection {
|
|
|
118
107
|
return signals;
|
|
119
108
|
try {
|
|
120
109
|
const headers = response.headers?.() ?? {};
|
|
121
|
-
const cfMitigated = headers[
|
|
122
|
-
if (cfMitigated &&
|
|
123
|
-
cfMitigated.toLowerCase().includes("challenge")) {
|
|
110
|
+
const cfMitigated = headers['cf-mitigated'];
|
|
111
|
+
if (cfMitigated && cfMitigated.toLowerCase().includes('challenge')) {
|
|
124
112
|
signals.push({
|
|
125
|
-
name:
|
|
113
|
+
name: 'waf_header_cf_mitigated',
|
|
126
114
|
weight: 0.9,
|
|
127
115
|
details: `cf-mitigated: ${cfMitigated}`,
|
|
128
|
-
source:
|
|
116
|
+
source: 'fast',
|
|
129
117
|
});
|
|
130
118
|
}
|
|
131
|
-
const server = headers[
|
|
132
|
-
if (server && server.toLowerCase().includes(
|
|
119
|
+
const server = headers['server'];
|
|
120
|
+
if (server && server.toLowerCase().includes('cloudflare')) {
|
|
133
121
|
signals.push({
|
|
134
|
-
name:
|
|
122
|
+
name: 'waf_header_cloudflare',
|
|
135
123
|
weight: 0.1,
|
|
136
124
|
details: `server: ${server}`,
|
|
137
|
-
source:
|
|
125
|
+
source: 'fast',
|
|
138
126
|
});
|
|
139
127
|
}
|
|
140
128
|
}
|
|
@@ -150,10 +138,10 @@ export class BlockDetection {
|
|
|
150
138
|
for (const keyword of this.allTitleKeywords) {
|
|
151
139
|
if (title.includes(keyword.toLowerCase())) {
|
|
152
140
|
return {
|
|
153
|
-
name:
|
|
141
|
+
name: 'title_keyword',
|
|
154
142
|
weight: 0.8,
|
|
155
143
|
details: `Title contains "${keyword}"`,
|
|
156
|
-
source:
|
|
144
|
+
source: 'full',
|
|
157
145
|
};
|
|
158
146
|
}
|
|
159
147
|
}
|
|
@@ -175,10 +163,10 @@ export class BlockDetection {
|
|
|
175
163
|
}, selectors);
|
|
176
164
|
if (found) {
|
|
177
165
|
return {
|
|
178
|
-
name:
|
|
166
|
+
name: 'challenge_selector',
|
|
179
167
|
weight: 0.8,
|
|
180
168
|
details: `Challenge selector found: ${found}`,
|
|
181
|
-
source:
|
|
169
|
+
source: 'full',
|
|
182
170
|
};
|
|
183
171
|
}
|
|
184
172
|
}
|
|
@@ -191,30 +179,27 @@ export class BlockDetection {
|
|
|
191
179
|
const signals = [];
|
|
192
180
|
try {
|
|
193
181
|
const text = useInnerText
|
|
194
|
-
? await page.evaluate(() => document.body?.innerText ??
|
|
195
|
-
: await page.evaluate(() => document.body?.textContent ??
|
|
182
|
+
? await page.evaluate(() => document.body?.innerText ?? '')
|
|
183
|
+
: await page.evaluate(() => document.body?.textContent ?? '');
|
|
196
184
|
const textLower = text.toLowerCase();
|
|
197
185
|
if (text.length < 50) {
|
|
198
186
|
signals.push({
|
|
199
|
-
name:
|
|
187
|
+
name: 'visible_text_short',
|
|
200
188
|
weight: 0.2,
|
|
201
189
|
details: `Visible text very short (${text.length} chars)`,
|
|
202
|
-
source:
|
|
190
|
+
source: 'full',
|
|
203
191
|
});
|
|
204
192
|
}
|
|
205
193
|
// Strong keywords (substring match, short page < 500 chars)
|
|
206
194
|
if (text.length < 500) {
|
|
207
|
-
const allStrong = [
|
|
208
|
-
...STRONG_TEXT_KEYWORDS,
|
|
209
|
-
...this.config.extraKeywords,
|
|
210
|
-
];
|
|
195
|
+
const allStrong = [...STRONG_TEXT_KEYWORDS, ...this.config.extraKeywords];
|
|
211
196
|
for (const keyword of allStrong) {
|
|
212
197
|
if (textLower.includes(keyword.toLowerCase())) {
|
|
213
198
|
signals.push({
|
|
214
|
-
name:
|
|
199
|
+
name: 'visible_text_keyword_strong',
|
|
215
200
|
weight: 0.6,
|
|
216
201
|
details: `Strong keyword "${keyword}" on short page`,
|
|
217
|
-
source:
|
|
202
|
+
source: 'full',
|
|
218
203
|
});
|
|
219
204
|
break;
|
|
220
205
|
}
|
|
@@ -224,10 +209,10 @@ export class BlockDetection {
|
|
|
224
209
|
for (const { keyword, regex } of WEAK_TEXT_REGEXES) {
|
|
225
210
|
if (regex.test(text)) {
|
|
226
211
|
signals.push({
|
|
227
|
-
name:
|
|
212
|
+
name: 'visible_text_keyword_weak',
|
|
228
213
|
weight: 0.15,
|
|
229
214
|
details: `Weak keyword "${keyword}" found with word boundary`,
|
|
230
|
-
source:
|
|
215
|
+
source: 'full',
|
|
231
216
|
});
|
|
232
217
|
break;
|
|
233
218
|
}
|
|
@@ -241,17 +226,16 @@ export class BlockDetection {
|
|
|
241
226
|
async detectTextToHtmlRatio(page) {
|
|
242
227
|
try {
|
|
243
228
|
const result = await page.evaluate(() => {
|
|
244
|
-
const html = document.documentElement?.outerHTML ??
|
|
245
|
-
const text = document.body?.textContent ??
|
|
229
|
+
const html = document.documentElement?.outerHTML ?? '';
|
|
230
|
+
const text = document.body?.textContent ?? '';
|
|
246
231
|
return { htmlLength: html.length, textLength: text.length };
|
|
247
232
|
});
|
|
248
|
-
if (result.htmlLength >= 1000 &&
|
|
249
|
-
result.textLength / result.htmlLength < 0.03) {
|
|
233
|
+
if (result.htmlLength >= 1000 && result.textLength / result.htmlLength < 0.03) {
|
|
250
234
|
return {
|
|
251
|
-
name:
|
|
235
|
+
name: 'low_text_ratio',
|
|
252
236
|
weight: 0.2,
|
|
253
237
|
details: `Low text/HTML ratio: ${result.textLength}/${result.htmlLength}`,
|
|
254
|
-
source:
|
|
238
|
+
source: 'full',
|
|
255
239
|
};
|
|
256
240
|
}
|
|
257
241
|
}
|
|
@@ -275,7 +259,7 @@ export class BlockDetection {
|
|
|
275
259
|
break;
|
|
276
260
|
const redirectResponse = redirectedFrom.response?.();
|
|
277
261
|
hops.push({
|
|
278
|
-
url: redirectedFrom.url?.() ??
|
|
262
|
+
url: redirectedFrom.url?.() ?? '',
|
|
279
263
|
statusCode: redirectResponse?.status?.() ?? 0,
|
|
280
264
|
});
|
|
281
265
|
req = redirectedFrom;
|
|
@@ -288,24 +272,24 @@ export class BlockDetection {
|
|
|
288
272
|
for (const pattern of CHALLENGE_DOMAIN_PATTERNS) {
|
|
289
273
|
if (hop.url.includes(pattern)) {
|
|
290
274
|
signals.push({
|
|
291
|
-
name:
|
|
275
|
+
name: 'redirect_to_challenge',
|
|
292
276
|
weight: 0.7,
|
|
293
277
|
details: `Redirect through challenge domain: ${hop.url}`,
|
|
294
|
-
source:
|
|
278
|
+
source: 'full',
|
|
295
279
|
});
|
|
296
280
|
return { signals, chain };
|
|
297
281
|
}
|
|
298
282
|
}
|
|
299
283
|
}
|
|
300
284
|
// Also check the final response URL
|
|
301
|
-
const finalUrl = response.url?.() ??
|
|
285
|
+
const finalUrl = response.url?.() ?? '';
|
|
302
286
|
for (const pattern of CHALLENGE_DOMAIN_PATTERNS) {
|
|
303
287
|
if (finalUrl.includes(pattern)) {
|
|
304
288
|
signals.push({
|
|
305
|
-
name:
|
|
289
|
+
name: 'redirect_to_challenge',
|
|
306
290
|
weight: 0.7,
|
|
307
291
|
details: `Final URL is challenge domain: ${finalUrl}`,
|
|
308
|
-
source:
|
|
292
|
+
source: 'full',
|
|
309
293
|
});
|
|
310
294
|
break;
|
|
311
295
|
}
|
|
@@ -322,7 +306,7 @@ export class BlockDetection {
|
|
|
322
306
|
const meta = document.querySelector('meta[http-equiv="refresh"]');
|
|
323
307
|
if (!meta)
|
|
324
308
|
return null;
|
|
325
|
-
const content = meta.getAttribute(
|
|
309
|
+
const content = meta.getAttribute('content') ?? '';
|
|
326
310
|
const match = content.match(/url\s*=\s*(.+)/i);
|
|
327
311
|
return match ? match[1].trim() : null;
|
|
328
312
|
});
|
|
@@ -330,10 +314,10 @@ export class BlockDetection {
|
|
|
330
314
|
for (const pattern of CHALLENGE_DOMAIN_PATTERNS) {
|
|
331
315
|
if (refreshUrl.includes(pattern)) {
|
|
332
316
|
return {
|
|
333
|
-
name:
|
|
317
|
+
name: 'meta_refresh_challenge',
|
|
334
318
|
weight: 0.65,
|
|
335
319
|
details: `Meta refresh to challenge URL: ${refreshUrl}`,
|
|
336
|
-
source:
|
|
320
|
+
source: 'full',
|
|
337
321
|
};
|
|
338
322
|
}
|
|
339
323
|
}
|
|
@@ -353,7 +337,7 @@ export class BlockDetection {
|
|
|
353
337
|
const url = page.url();
|
|
354
338
|
const hostname = this.extractHostname(url);
|
|
355
339
|
if (!this.config.enabled) {
|
|
356
|
-
return this.makeResult(url, hostname, [],
|
|
340
|
+
return this.makeResult(url, hostname, [], 'fast', []);
|
|
357
341
|
}
|
|
358
342
|
const signals = [];
|
|
359
343
|
const statusSignal = this.detectHttpStatus(response);
|
|
@@ -361,7 +345,7 @@ export class BlockDetection {
|
|
|
361
345
|
signals.push(statusSignal);
|
|
362
346
|
const headerSignals = this.detectResponseHeaders(response);
|
|
363
347
|
signals.push(...headerSignals);
|
|
364
|
-
const result = this.makeResult(url, hostname, signals,
|
|
348
|
+
const result = this.makeResult(url, hostname, signals, 'fast', []);
|
|
365
349
|
this.logResult(result);
|
|
366
350
|
return result;
|
|
367
351
|
}
|
|
@@ -396,12 +380,10 @@ export class BlockDetection {
|
|
|
396
380
|
const url = page.url();
|
|
397
381
|
const hostname = this.extractHostname(url);
|
|
398
382
|
if (!this.config.enabled) {
|
|
399
|
-
return this.makeResult(url, hostname, [],
|
|
383
|
+
return this.makeResult(url, hostname, [], 'full', []);
|
|
400
384
|
}
|
|
401
385
|
// Start with fast-pass signals
|
|
402
|
-
const signals = fastResult
|
|
403
|
-
? [...fastResult.signals]
|
|
404
|
-
: [];
|
|
386
|
+
const signals = fastResult ? [...fastResult.signals] : [];
|
|
405
387
|
// If no fast pass was done and we have a response, run fast detectors
|
|
406
388
|
if (!fastResult && response) {
|
|
407
389
|
const statusSignal = this.detectHttpStatus(response);
|
|
@@ -410,7 +392,7 @@ export class BlockDetection {
|
|
|
410
392
|
const headerSignals = this.detectResponseHeaders(response);
|
|
411
393
|
signals.push(...headerSignals);
|
|
412
394
|
}
|
|
413
|
-
signals.push(...await this.runContentDetectors(page));
|
|
395
|
+
signals.push(...(await this.runContentDetectors(page)));
|
|
414
396
|
const { signals: redirectSignals, chain } = this.detectRedirectChain(response);
|
|
415
397
|
signals.push(...redirectSignals);
|
|
416
398
|
return this.reEvaluateIfSuspected(page, url, hostname, signals, chain);
|
|
@@ -422,7 +404,7 @@ export class BlockDetection {
|
|
|
422
404
|
const url = page.url();
|
|
423
405
|
const hostname = this.extractHostname(url);
|
|
424
406
|
if (!this.config.enabled) {
|
|
425
|
-
return this.makeResult(url, hostname, [],
|
|
407
|
+
return this.makeResult(url, hostname, [], 'full', []);
|
|
426
408
|
}
|
|
427
409
|
const signals = await this.runContentDetectors(page);
|
|
428
410
|
return this.reEvaluateIfSuspected(page, url, hostname, signals, []);
|
|
@@ -430,14 +412,14 @@ export class BlockDetection {
|
|
|
430
412
|
async reEvaluateIfSuspected(page, url, hostname, signals, redirectChain) {
|
|
431
413
|
const preliminary = this.computeScore(signals);
|
|
432
414
|
if (preliminary.score >= 0.4 && preliminary.score < 0.7) {
|
|
433
|
-
const nonTextSignals = signals.filter((s) => !s.name.startsWith(
|
|
415
|
+
const nonTextSignals = signals.filter((s) => !s.name.startsWith('visible_text_'));
|
|
434
416
|
const innerTextSignals = await this.detectVisibleText(page, true);
|
|
435
417
|
nonTextSignals.push(...innerTextSignals);
|
|
436
|
-
const result = this.makeResult(url, hostname, nonTextSignals,
|
|
418
|
+
const result = this.makeResult(url, hostname, nonTextSignals, 'full', redirectChain);
|
|
437
419
|
this.logResult(result);
|
|
438
420
|
return result;
|
|
439
421
|
}
|
|
440
|
-
const result = this.makeResult(url, hostname, signals,
|
|
422
|
+
const result = this.makeResult(url, hostname, signals, 'full', redirectChain);
|
|
441
423
|
this.logResult(result);
|
|
442
424
|
return result;
|
|
443
425
|
}
|
package/dist/esm/client/rules.js
CHANGED
|
@@ -56,7 +56,12 @@ export function matchPattern(hostname, pattern) {
|
|
|
56
56
|
*/
|
|
57
57
|
export function normalizeRules(rules) {
|
|
58
58
|
if (!rules || rules.length === 0) {
|
|
59
|
-
return {
|
|
59
|
+
return {
|
|
60
|
+
positiveRules: [],
|
|
61
|
+
negativeRules: [],
|
|
62
|
+
hasCatchAll: false,
|
|
63
|
+
empty: true,
|
|
64
|
+
};
|
|
60
65
|
}
|
|
61
66
|
const trimmed = rules
|
|
62
67
|
.filter((r) => typeof r === 'string')
|
|
@@ -64,7 +69,12 @@ export function normalizeRules(rules) {
|
|
|
64
69
|
.filter((r) => r.length > 0)
|
|
65
70
|
.filter((r) => r !== 'auto');
|
|
66
71
|
if (trimmed.length === 0) {
|
|
67
|
-
return {
|
|
72
|
+
return {
|
|
73
|
+
positiveRules: [],
|
|
74
|
+
negativeRules: [],
|
|
75
|
+
hasCatchAll: false,
|
|
76
|
+
empty: true,
|
|
77
|
+
};
|
|
68
78
|
}
|
|
69
79
|
const negativeRules = [];
|
|
70
80
|
const positiveRules = [];
|
package/dist/esm/connect.js
CHANGED
|
@@ -60,8 +60,8 @@ export async function connect(sessionName) {
|
|
|
60
60
|
let context;
|
|
61
61
|
let page;
|
|
62
62
|
try {
|
|
63
|
-
context = browser.contexts()[0] ?? await browser.newContext();
|
|
64
|
-
page = context.pages()[0] ?? await context.newPage();
|
|
63
|
+
context = browser.contexts()[0] ?? (await browser.newContext());
|
|
64
|
+
page = context.pages()[0] ?? (await context.newPage());
|
|
65
65
|
}
|
|
66
66
|
catch (err) {
|
|
67
67
|
await browser.close().catch(() => { });
|