@paywalls-net/filter 1.3.9 → 1.3.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/jest.config.js +7 -0
- package/package.json +6 -2
- package/src/index.js +42 -37
- package/src/signal-extraction.js +385 -0
- package/tests/proxy-vai-request.test.js +379 -0
- package/tests/signal-extraction.test.js +624 -0
package/jest.config.js
ADDED
package/package.json
CHANGED
|
@@ -3,7 +3,7 @@
|
|
|
3
3
|
"description": "Client SDK for integrating paywalls.net bot filtering and authorization services into your server or CDN.",
|
|
4
4
|
"author": "paywalls.net",
|
|
5
5
|
"license": "MIT",
|
|
6
|
-
"version": "1.3.
|
|
6
|
+
"version": "1.3.10",
|
|
7
7
|
"publishConfig": {
|
|
8
8
|
"access": "public"
|
|
9
9
|
},
|
|
@@ -17,9 +17,13 @@
|
|
|
17
17
|
".": "./src/index.js"
|
|
18
18
|
},
|
|
19
19
|
"scripts": {
|
|
20
|
-
"test": "
|
|
20
|
+
"test": "node --experimental-vm-modules node_modules/.bin/jest --runInBand",
|
|
21
|
+
"test:watch": "node --experimental-vm-modules node_modules/.bin/jest --watch"
|
|
21
22
|
},
|
|
22
23
|
"dependencies": {
|
|
23
24
|
"ua-parser-js": "^2.0.4"
|
|
25
|
+
},
|
|
26
|
+
"devDependencies": {
|
|
27
|
+
"jest": "^30.2.0"
|
|
24
28
|
}
|
|
25
29
|
}
|
package/src/index.js
CHANGED
|
@@ -4,6 +4,11 @@
|
|
|
4
4
|
*/
|
|
5
5
|
const sdk_version = "1.2.x";
|
|
6
6
|
import { classifyUserAgent, loadAgentPatterns } from './user-agent-classification.js';
|
|
7
|
+
import {
|
|
8
|
+
extractAcceptFeatures, extractEncodingFeatures, extractLanguageFeatures,
|
|
9
|
+
extractNetFeatures, extractCHFeatures, extractUAFeatures,
|
|
10
|
+
computeUAHMAC, computeConfidenceToken,
|
|
11
|
+
} from './signal-extraction.js';
|
|
7
12
|
|
|
8
13
|
const PAYWALLS_CLOUD_API_HOST = "https://cloud-api.paywalls.net";
|
|
9
14
|
|
|
@@ -91,27 +96,12 @@ const PROXY_HEADER_MAP = [
|
|
|
91
96
|
];
|
|
92
97
|
|
|
93
98
|
/**
|
|
94
|
-
*
|
|
95
|
-
*
|
|
96
|
-
* from:'headers' — read from incoming request headers (lowercase)
|
|
97
|
-
* from:'cf' — read from request.cf property
|
|
99
|
+
* Set a header only if the value is non-null (extractor returns null for
|
|
100
|
+
* absent inputs → header omitted entirely, not sent as empty string).
|
|
98
101
|
*/
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
{ from: 'headers', src: 'sec-fetch-mode', dest: 'X-PW-Sec-Fetch-Mode' },
|
|
103
|
-
{ from: 'headers', src: 'sec-fetch-site', dest: 'X-PW-Sec-Fetch-Site' },
|
|
104
|
-
// Bundle B: Accept (2 pts)
|
|
105
|
-
{ from: 'headers', src: 'accept', dest: 'X-PW-Accept' },
|
|
106
|
-
{ from: 'headers', src: 'accept-language', dest: 'X-PW-Accept-Language' },
|
|
107
|
-
{ from: 'headers', src: 'accept-encoding', dest: 'X-PW-Accept-Encoding' },
|
|
108
|
-
// Bundle C: Client Hints (2 pts)
|
|
109
|
-
{ from: 'headers', src: 'sec-ch-ua', dest: 'X-PW-Sec-CH-UA' },
|
|
110
|
-
// Bundle D: CF infrastructure (1 pt) — only valid at first-hop CF Worker
|
|
111
|
-
{ from: 'cf', src: 'tlsVersion', dest: 'X-PW-TLS-Version' },
|
|
112
|
-
{ from: 'cf', src: 'httpProtocol', dest: 'X-PW-HTTP-Protocol' },
|
|
113
|
-
{ from: 'cf', src: 'asn', dest: 'X-PW-ASN' },
|
|
114
|
-
];
|
|
102
|
+
function setIfPresent(obj, key, value) {
|
|
103
|
+
if (value != null) obj[key] = value;
|
|
104
|
+
}
|
|
115
105
|
|
|
116
106
|
/**
|
|
117
107
|
* Proxy VAI requests to the cloud-api service (Spec §7).
|
|
@@ -128,10 +118,9 @@ const SIGNAL_HEADER_MAP = [
|
|
|
128
118
|
* - User-Agent, X-Forwarded-For: standard proxy headers
|
|
129
119
|
* - Authorization: publisher API key (§7.4)
|
|
130
120
|
*
|
|
131
|
-
* Human-confidence signal forwarding (§
|
|
132
|
-
*
|
|
133
|
-
*
|
|
134
|
-
* Simple passthrough: present values forwarded, absent values omitted.
|
|
121
|
+
* Human-confidence signal forwarding (§7.2):
|
|
122
|
+
* Uses signal-extraction module to transform raw browser headers into compact
|
|
123
|
+
* RFC 8941 Structured Field Value strings. Absent inputs → null → header omitted.
|
|
135
124
|
*
|
|
136
125
|
* Response passthrough (§7.3):
|
|
137
126
|
* All response headers from cloud-api are returned unchanged — including
|
|
@@ -155,7 +144,7 @@ async function proxyVAIRequest(cfg, request) {
|
|
|
155
144
|
// Build forwarding headers — include everything cloud-api needs
|
|
156
145
|
// for CORS evaluation, domain auth, and request context.
|
|
157
146
|
const forwardHeaders = {
|
|
158
|
-
'User-Agent':
|
|
147
|
+
'User-Agent': sdkUserAgent,
|
|
159
148
|
'Authorization': `Bearer ${cfg.paywallsAPIKey}`
|
|
160
149
|
};
|
|
161
150
|
|
|
@@ -171,16 +160,29 @@ async function proxyVAIRequest(cfg, request) {
|
|
|
171
160
|
if (headers[src]) forwardHeaders[dest] = headers[src];
|
|
172
161
|
}
|
|
173
162
|
|
|
174
|
-
//
|
|
175
|
-
|
|
163
|
+
// Signal protocol version (§7.1)
|
|
164
|
+
forwardHeaders['X-PW-V'] = '2';
|
|
165
|
+
|
|
176
166
|
const cf = request.cf || {};
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
167
|
+
|
|
168
|
+
// Tier 1: kept raw (§6.1)
|
|
169
|
+
setIfPresent(forwardHeaders, 'X-PW-Sec-Fetch-Dest', headers['sec-fetch-dest']);
|
|
170
|
+
setIfPresent(forwardHeaders, 'X-PW-Sec-Fetch-Mode', headers['sec-fetch-mode']);
|
|
171
|
+
setIfPresent(forwardHeaders, 'X-PW-Sec-Fetch-Site', headers['sec-fetch-site']);
|
|
172
|
+
setIfPresent(forwardHeaders, 'X-PW-TLS-Version', cf.tlsVersion != null ? String(cf.tlsVersion) : null);
|
|
173
|
+
setIfPresent(forwardHeaders, 'X-PW-HTTP-Protocol', cf.httpProtocol != null ? String(cf.httpProtocol) : null);
|
|
174
|
+
|
|
175
|
+
// Tier 2: extract features (§6.2)
|
|
176
|
+
setIfPresent(forwardHeaders, 'X-PW-Accept', extractAcceptFeatures(headers['accept']));
|
|
177
|
+
setIfPresent(forwardHeaders, 'X-PW-Enc', extractEncodingFeatures(headers['accept-encoding']));
|
|
178
|
+
setIfPresent(forwardHeaders, 'X-PW-Lang', extractLanguageFeatures(headers['accept-language']));
|
|
179
|
+
setIfPresent(forwardHeaders, 'X-PW-Net', extractNetFeatures(cf.asn));
|
|
180
|
+
setIfPresent(forwardHeaders, 'X-PW-CH', extractCHFeatures(headers['sec-ch-ua'], headers['user-agent']));
|
|
181
|
+
|
|
182
|
+
// Tier 3: UA features + HMAC (§6.3)
|
|
183
|
+
setIfPresent(forwardHeaders, 'X-PW-UA', extractUAFeatures(headers['user-agent']));
|
|
184
|
+
setIfPresent(forwardHeaders, 'X-PW-UA-HMAC', await computeUAHMAC(headers['user-agent'], cfg.vaiUAHmacKey));
|
|
185
|
+
setIfPresent(forwardHeaders, 'X-PW-CT-FP', await computeConfidenceToken(headers['user-agent'], headers['accept-language'], headers['sec-ch-ua']));
|
|
184
186
|
|
|
185
187
|
// Forward request to cloud-api
|
|
186
188
|
const response = await fetch(`${cfg.paywallsAPIHost}${cloudApiPath}`, {
|
|
@@ -417,7 +419,8 @@ async function cloudflare(config = null) {
|
|
|
417
419
|
paywallsAPIHost: env.PAYWALLS_CLOUD_API_HOST || PAYWALLS_CLOUD_API_HOST,
|
|
418
420
|
paywallsAPIKey: env.PAYWALLS_CLOUD_API_KEY,
|
|
419
421
|
paywallsPublisherId: env.PAYWALLS_PUBLISHER_ID,
|
|
420
|
-
vaiPath: env.PAYWALLS_VAI_PATH || '/pw'
|
|
422
|
+
vaiPath: env.PAYWALLS_VAI_PATH || '/pw',
|
|
423
|
+
vaiUAHmacKey: env.VAI_UA_HMAC_KEY || null,
|
|
421
424
|
};
|
|
422
425
|
|
|
423
426
|
// Check if this is a VAI endpoint request and proxy it
|
|
@@ -449,7 +452,8 @@ async function fastly() {
|
|
|
449
452
|
paywallsAPIHost: config.get('PAYWALLS_CLOUD_API_HOST') || PAYWALLS_CLOUD_API_HOST,
|
|
450
453
|
paywallsAPIKey: config.get('PAYWALLS_API_KEY'),
|
|
451
454
|
paywallsPublisherId: config.get('PAYWALLS_PUBLISHER_ID'),
|
|
452
|
-
vaiPath: config.get('PAYWALLS_VAI_PATH') || '/pw'
|
|
455
|
+
vaiPath: config.get('PAYWALLS_VAI_PATH') || '/pw',
|
|
456
|
+
vaiUAHmacKey: config.get('VAI_UA_HMAC_KEY') || null,
|
|
453
457
|
};
|
|
454
458
|
|
|
455
459
|
// Check if this is a VAI endpoint request and proxy it
|
|
@@ -531,7 +535,8 @@ async function cloudfront(config) {
|
|
|
531
535
|
paywallsAPIHost: config.PAYWALLS_CLOUD_API_HOST || PAYWALLS_CLOUD_API_HOST,
|
|
532
536
|
paywallsAPIKey: config.PAYWALLS_API_KEY,
|
|
533
537
|
paywallsPublisherId: config.PAYWALLS_PUBLISHER_ID,
|
|
534
|
-
vaiPath: config.PAYWALLS_VAI_PATH || '/pw'
|
|
538
|
+
vaiPath: config.PAYWALLS_VAI_PATH || '/pw',
|
|
539
|
+
vaiUAHmacKey: config.VAI_UA_HMAC_KEY || null,
|
|
535
540
|
};
|
|
536
541
|
await loadAgentPatterns(paywallsConfig);
|
|
537
542
|
|
|
@@ -0,0 +1,385 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Signal Extraction Module — Tier 2 + Tier 3 feature extractors
|
|
3
|
+
*
|
|
4
|
+
* Transforms raw browser headers into compact RFC 8941 Structured Field
|
|
5
|
+
* Dictionary strings for privacy-preserving VAI signal forwarding.
|
|
6
|
+
*
|
|
7
|
+
* Spec: specs/vai-privacy-v2.spec.md §6.2–§6.4
|
|
8
|
+
*
|
|
9
|
+
* Each function returns an SF-Dictionary string (e.g. "html, wildcard")
|
|
10
|
+
* or null if the input is absent/empty. null means the caller should
|
|
11
|
+
* omit the header entirely (not send an empty value).
|
|
12
|
+
*/
|
|
13
|
+
|
|
14
|
+
// ── §6.2.4 / Appendix A: Data-center ASN set ──────────────────────────────
|
|
15
|
+
// Comprehensive cloud/hosting provider ASNs for DC classification.
|
|
16
|
+
// Kept in sync with cloud-api DC_ASN_LIST (cloudflare/vai.js).
|
|
17
|
+
// Source: public ASN registries (PeeringDB, RIPE, ARIN).
|
|
18
|
+
const DC_ASN_SET = new Set([
|
|
19
|
+
// ── Major IaaS ───────────────────────────────────────────────────────────
|
|
20
|
+
16509, 14618, // Amazon AWS (primary + secondary)
|
|
21
|
+
396982, 36492, 15169, // Google Cloud + Google infra
|
|
22
|
+
8075, 8069, 8068, // Microsoft Azure
|
|
23
|
+
31898, // Oracle Cloud
|
|
24
|
+
36351, // IBM Cloud / SoftLayer
|
|
25
|
+
45102, // Alibaba Cloud
|
|
26
|
+
132203, // Tencent Cloud
|
|
27
|
+
|
|
28
|
+
// ── VPS / Hosting ────────────────────────────────────────────────────────
|
|
29
|
+
14061, // DigitalOcean
|
|
30
|
+
24940, 213230, // Hetzner (dedicated + cloud)
|
|
31
|
+
16276, // OVH
|
|
32
|
+
63949, // Linode / Akamai Connected Cloud
|
|
33
|
+
20473, // Vultr / The Constant Company
|
|
34
|
+
12876, // Scaleway
|
|
35
|
+
51167, // Contabo
|
|
36
|
+
60781, 28753, // Leaseweb (NL + global)
|
|
37
|
+
]);
|
|
38
|
+
|
|
39
|
+
// ── §6.2.1 Accept → X-PW-Accept ──────────────────────────────────────────
|
|
40
|
+
/**
|
|
41
|
+
* Extract boolean feature flags from the Accept header.
|
|
42
|
+
*
|
|
43
|
+
* @param {string|null|undefined} accept Raw Accept header value
|
|
44
|
+
* @returns {string|null} SF-Dictionary string or null if absent/empty
|
|
45
|
+
*/
|
|
46
|
+
export function extractAcceptFeatures(accept) {
|
|
47
|
+
if (!accept) return null;
|
|
48
|
+
|
|
49
|
+
const parts = [];
|
|
50
|
+
if (accept.includes('text/html')) parts.push('html');
|
|
51
|
+
if (accept.includes('*/*')) parts.push('wildcard');
|
|
52
|
+
if (accept.includes('application/json')) parts.push('json');
|
|
53
|
+
if (accept.includes('image/')) parts.push('image');
|
|
54
|
+
|
|
55
|
+
return parts.length > 0 ? parts.join(', ') : null;
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
// ── §6.2.2 Accept-Encoding → X-PW-Enc ────────────────────────────────────
|
|
59
|
+
/**
|
|
60
|
+
* Extract boolean feature flags from the Accept-Encoding header.
|
|
61
|
+
*
|
|
62
|
+
* @param {string|null|undefined} acceptEncoding Raw Accept-Encoding value
|
|
63
|
+
* @returns {string|null} SF-Dictionary string or null if absent/empty
|
|
64
|
+
*/
|
|
65
|
+
export function extractEncodingFeatures(acceptEncoding) {
|
|
66
|
+
if (!acceptEncoding) return null;
|
|
67
|
+
|
|
68
|
+
const parts = [];
|
|
69
|
+
const hasBr = acceptEncoding.includes('br');
|
|
70
|
+
const hasGzip = acceptEncoding.includes('gzip');
|
|
71
|
+
|
|
72
|
+
if (hasBr) parts.push('br');
|
|
73
|
+
if (hasGzip) parts.push('gzip');
|
|
74
|
+
if (hasBr && hasGzip) parts.push('modern');
|
|
75
|
+
|
|
76
|
+
return parts.length > 0 ? parts.join(', ') : null;
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
// ── §6.2.3 Accept-Language → X-PW-Lang ───────────────────────────────────
|
|
80
|
+
/**
|
|
81
|
+
* Extract presence, primary language family, and locale count from
|
|
82
|
+
* the Accept-Language header.
|
|
83
|
+
*
|
|
84
|
+
* @param {string|null|undefined} acceptLanguage Raw Accept-Language value
|
|
85
|
+
* @returns {string|null} SF-Dictionary string or null if absent/empty
|
|
86
|
+
*/
|
|
87
|
+
export function extractLanguageFeatures(acceptLanguage) {
|
|
88
|
+
if (!acceptLanguage) return null;
|
|
89
|
+
|
|
90
|
+
const trimmed = acceptLanguage.trim();
|
|
91
|
+
if (trimmed === '' || trimmed === '*') return null;
|
|
92
|
+
|
|
93
|
+
// Split on comma to count locales, ignoring quality values
|
|
94
|
+
const locales = trimmed.split(',').map(s => s.trim().split(';')[0].trim()).filter(Boolean);
|
|
95
|
+
const count = locales.length;
|
|
96
|
+
if (count === 0) return null;
|
|
97
|
+
|
|
98
|
+
// Primary language family = first 2 chars of first locale (lowercase)
|
|
99
|
+
const first = locales[0].toLowerCase();
|
|
100
|
+
const primary = first.length >= 2 ? first.slice(0, 2) : first;
|
|
101
|
+
|
|
102
|
+
const parts = ['present', `primary=${primary}`, `count=${count}`];
|
|
103
|
+
return parts.join(', ');
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
// ── §6.2.4 ASN → X-PW-Net ────────────────────────────────────────────────
|
|
107
|
+
/**
|
|
108
|
+
* Classify an ASN into a named enum category.
|
|
109
|
+
*
|
|
110
|
+
* @param {string|number|null|undefined} asn Numeric ASN value
|
|
111
|
+
* @returns {string|null} SF-Dictionary string or null if absent/empty
|
|
112
|
+
*/
|
|
113
|
+
export function extractNetFeatures(asn) {
|
|
114
|
+
if (asn == null || asn === '') return null;
|
|
115
|
+
|
|
116
|
+
const num = typeof asn === 'number' ? asn : parseInt(asn, 10);
|
|
117
|
+
if (isNaN(num)) return null;
|
|
118
|
+
|
|
119
|
+
const category = DC_ASN_SET.has(num) ? 'cloud' : 'consumer';
|
|
120
|
+
return `asn=${category}`;
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
// ── §6.2.5 Sec-CH-UA → X-PW-CH ───────────────────────────────────────────
|
|
124
|
+
|
|
125
|
+
/**
|
|
126
|
+
* Extract Chrome version from a Sec-CH-UA header value.
|
|
127
|
+
* Looks for "Chromium" or "Google Chrome" brand and returns the major version.
|
|
128
|
+
*
|
|
129
|
+
* @param {string} secChUA Raw Sec-CH-UA header
|
|
130
|
+
* @returns {number|null} Major Chrome version or null
|
|
131
|
+
*/
|
|
132
|
+
function extractChromeVersionFromCH(secChUA) {
|
|
133
|
+
// Sec-CH-UA format: "Brand";v="version", "Brand";v="version", ...
|
|
134
|
+
const match = secChUA.match(/"(?:Google Chrome|Chromium)";v="(\d+)"/);
|
|
135
|
+
return match ? parseInt(match[1], 10) : null;
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
/**
|
|
139
|
+
* Extract Chrome version from a User-Agent string.
|
|
140
|
+
*
|
|
141
|
+
* @param {string} userAgent Raw User-Agent string
|
|
142
|
+
* @returns {number|null} Major Chrome version or null
|
|
143
|
+
*/
|
|
144
|
+
function extractChromeVersionFromUA(userAgent) {
|
|
145
|
+
// UA format: ...Chrome/134.0.0.0...
|
|
146
|
+
const match = userAgent.match(/Chrome\/(\d+)/);
|
|
147
|
+
return match ? parseInt(match[1], 10) : null;
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
/**
|
|
151
|
+
* Extract features from Sec-CH-UA header, cross-referenced with User-Agent
|
|
152
|
+
* for the consistency check.
|
|
153
|
+
*
|
|
154
|
+
* @param {string|null|undefined} secChUA Raw Sec-CH-UA header value
|
|
155
|
+
* @param {string|null|undefined} userAgent Raw User-Agent string (for consistency check)
|
|
156
|
+
* @returns {string|null} SF-Dictionary string or null if CH absent/empty
|
|
157
|
+
*/
|
|
158
|
+
export function extractCHFeatures(secChUA, userAgent) {
|
|
159
|
+
if (!secChUA) return null;
|
|
160
|
+
|
|
161
|
+
const trimmed = secChUA.trim();
|
|
162
|
+
if (trimmed === '') return null;
|
|
163
|
+
|
|
164
|
+
const parts = ['present'];
|
|
165
|
+
|
|
166
|
+
// Count brand entries: each is a quoted string followed by ;v="..."
|
|
167
|
+
// Split on comma to count entries
|
|
168
|
+
const brands = trimmed.split(',').map(s => s.trim()).filter(Boolean);
|
|
169
|
+
parts.push(`brands=${brands.length}`);
|
|
170
|
+
|
|
171
|
+
// GREASE detection: Chromium convention includes a "Not" brand
|
|
172
|
+
const hasGrease = brands.some(b => /not[^"]*brand/i.test(b) || /not[:\-_.]/i.test(b));
|
|
173
|
+
if (hasGrease) parts.push('grease');
|
|
174
|
+
|
|
175
|
+
// Consistency check: Chrome version in CH matches Chrome version in UA
|
|
176
|
+
if (userAgent) {
|
|
177
|
+
const chVersion = extractChromeVersionFromCH(trimmed);
|
|
178
|
+
const uaVersion = extractChromeVersionFromUA(userAgent);
|
|
179
|
+
if (chVersion != null && uaVersion != null && chVersion === uaVersion) {
|
|
180
|
+
parts.push('consistent');
|
|
181
|
+
}
|
|
182
|
+
}
|
|
183
|
+
|
|
184
|
+
return parts.join(', ');
|
|
185
|
+
}
|
|
186
|
+
|
|
187
|
+
// ═══════════════════════════════════════════════════════════════════════════
|
|
188
|
+
// Tier 3 — Replace User-Agent with derived features (§6.3) + CT (§6.4)
|
|
189
|
+
// ═══════════════════════════════════════════════════════════════════════════
|
|
190
|
+
|
|
191
|
+
// ── §6.3.3 Automation marker detection ────────────────────────────────────
|
|
192
|
+
// HeadlessChrome triggers 'headless' only (via HEADLESS_MARKERS).
|
|
193
|
+
// Explicit automation tools (Puppeteer, Selenium, etc.) trigger 'automation'.
|
|
194
|
+
const AUTOMATION_MARKERS = [
|
|
195
|
+
/Puppeteer/i, /Playwright/i, /Selenium/i, /WebDriver/i,
|
|
196
|
+
/PhantomJS/i, /CasperJS/i,
|
|
197
|
+
/python-requests/i, /python-urllib/i, /Go-http-client/i,
|
|
198
|
+
/okhttp/i, /Apache-HttpClient/i, /libcurl/i,
|
|
199
|
+
/\bcurl\//i, /\bwget\//i, /HTTPie/i,
|
|
200
|
+
/node-fetch/i, /undici/i, /axios\//i, /\bgot\//i, /superagent/i,
|
|
201
|
+
/Cypress/i, /TestCafe/i, /Nightwatch/i, /WebdriverIO/i,
|
|
202
|
+
];
|
|
203
|
+
|
|
204
|
+
const HEADLESS_MARKERS = [/HeadlessChrome/i, /\bHeadless\b/i];
|
|
205
|
+
|
|
206
|
+
// ── §6.3.4 Entropy bucketing ──────────────────────────────────────────────
|
|
207
|
+
/**
|
|
208
|
+
* Bucket a User-Agent string's structural complexity.
|
|
209
|
+
* @param {string} userAgent
|
|
210
|
+
* @returns {'low'|'medium'|'high'}
|
|
211
|
+
*/
|
|
212
|
+
function computeUAEntropy(userAgent) {
|
|
213
|
+
if (!userAgent || userAgent.length < 10) return 'low';
|
|
214
|
+
|
|
215
|
+
const hasUpper = /[A-Z]/.test(userAgent);
|
|
216
|
+
const hasLower = /[a-z]/.test(userAgent);
|
|
217
|
+
const hasDigit = /\d/.test(userAgent);
|
|
218
|
+
const hasSpecial = /[\/\.;()\s,_\-]/.test(userAgent);
|
|
219
|
+
const classCount = [hasUpper, hasLower, hasDigit, hasSpecial].filter(Boolean).length;
|
|
220
|
+
|
|
221
|
+
const len = userAgent.length;
|
|
222
|
+
const hasParens = /\([^)]+\)/.test(userAgent);
|
|
223
|
+
|
|
224
|
+
// Typical browser UA: 60-250 chars, 4 char classes, has parens
|
|
225
|
+
if (classCount >= 4 && len >= 60 && len <= 250 && hasParens) return 'medium';
|
|
226
|
+
if (classCount >= 3 && len >= 40 && len <= 300) return 'medium';
|
|
227
|
+
|
|
228
|
+
// Very short, very long, or missing structure
|
|
229
|
+
if (len < 40 || len > 300 || classCount < 3) return 'low';
|
|
230
|
+
|
|
231
|
+
// Unusual: high-entropy random strings
|
|
232
|
+
const uniqueChars = new Set(userAgent).size;
|
|
233
|
+
if (uniqueChars / len > 0.7) return 'high';
|
|
234
|
+
|
|
235
|
+
return 'medium';
|
|
236
|
+
}
|
|
237
|
+
|
|
238
|
+
// ── §6.3.1 UA dpf/version parsing ─────────────────────────────────────────
|
|
239
|
+
|
|
240
|
+
/** @returns {'desktop'|'mobile'|'tablet'|'server'|'unknown'} */
|
|
241
|
+
function detectDevice(ua) {
|
|
242
|
+
if (/\b(iPad|Tablet|PlayBook|Silk|Kindle)\b/i.test(ua)) return 'tablet';
|
|
243
|
+
if (/\b(iPhone|iPod|Android.*Mobile|Mobile.*Android|webOS|BlackBerry|Opera Mini|IEMobile|Windows Phone)\b/i.test(ua)) return 'mobile';
|
|
244
|
+
if (/\b(Android)\b/i.test(ua) && !/Mobile/i.test(ua)) return 'tablet';
|
|
245
|
+
if (/\b(Macintosh|Windows NT|X11|Linux(?!.*Android))\b/i.test(ua)) return 'desktop';
|
|
246
|
+
if (/\b(Googlebot|bingbot|Baiduspider|YandexBot|DuckDuckBot)\b/i.test(ua)) return 'server';
|
|
247
|
+
return 'unknown';
|
|
248
|
+
}
|
|
249
|
+
|
|
250
|
+
/** @returns {'windows'|'mac'|'ios'|'android'|'linux'|'other'} */
|
|
251
|
+
function detectPlatform(ua) {
|
|
252
|
+
if (/\b(iPhone|iPad|iPod)\b/i.test(ua)) return 'ios';
|
|
253
|
+
if (/\bAndroid\b/i.test(ua)) return 'android';
|
|
254
|
+
if (/\bMacintosh\b/i.test(ua)) return 'mac';
|
|
255
|
+
if (/\bWindows\b/i.test(ua)) return 'windows';
|
|
256
|
+
if (/\bLinux\b/i.test(ua) || /\bX11\b/i.test(ua)) return 'linux';
|
|
257
|
+
return 'other';
|
|
258
|
+
}
|
|
259
|
+
|
|
260
|
+
/** @returns {'chrome'|'safari'|'firefox'|'edge'|'other'|'bot'} */
|
|
261
|
+
function detectFamily(ua) {
|
|
262
|
+
if (/\b(Googlebot|bingbot|Baiduspider|YandexBot|DuckDuckBot|Slurp|ia_archiver)\b/i.test(ua)) return 'bot';
|
|
263
|
+
// Order matters: Edge before Chrome (Edge UA contains "Chrome")
|
|
264
|
+
if (/\bEdg(?:e|A)?\/\d/i.test(ua)) return 'edge';
|
|
265
|
+
if (/\bFirefox\//i.test(ua)) return 'firefox';
|
|
266
|
+
// Safari check: has "Safari/" but NOT "Chrome/" or "Chromium/" or "HeadlessChrome/"
|
|
267
|
+
if (/\bSafari\//i.test(ua) && !/Chrome|Chromium|HeadlessChrome/i.test(ua)) return 'safari';
|
|
268
|
+
if (/(?:\b|Headless)Chrom(?:e|ium)\//i.test(ua)) return 'chrome';
|
|
269
|
+
return 'other';
|
|
270
|
+
}
|
|
271
|
+
|
|
272
|
+
/**
|
|
273
|
+
* Extract major browser version from a User-Agent string.
|
|
274
|
+
* @param {string} ua
|
|
275
|
+
* @returns {number|null}
|
|
276
|
+
*/
|
|
277
|
+
function extractMajorVersion(ua) {
|
|
278
|
+
// Try common version patterns in order of specificity
|
|
279
|
+
let m = ua.match(/\bEdg(?:e|A)?\/(\d+)/);
|
|
280
|
+
if (m) return parseInt(m[1], 10);
|
|
281
|
+
m = ua.match(/\bFirefox\/(\d+)/);
|
|
282
|
+
if (m) return parseInt(m[1], 10);
|
|
283
|
+
// Chrome / Chromium / HeadlessChrome
|
|
284
|
+
m = ua.match(/(?:\b|Headless)Chrom(?:e|ium)\/(\d+)/);
|
|
285
|
+
if (m) return parseInt(m[1], 10);
|
|
286
|
+
// Safari: Version/17.x (not the Safari/605 build number)
|
|
287
|
+
m = ua.match(/\bVersion\/(\d+)/);
|
|
288
|
+
if (m) return parseInt(m[1], 10);
|
|
289
|
+
// Generic: first thing/number pattern
|
|
290
|
+
m = ua.match(/\/(\d+)/);
|
|
291
|
+
if (m) return parseInt(m[1], 10);
|
|
292
|
+
return null;
|
|
293
|
+
}
|
|
294
|
+
|
|
295
|
+
/**
|
|
296
|
+
* Bucket a major version number into a range token.
|
|
297
|
+
* @param {number|null} ver
|
|
298
|
+
* @returns {string}
|
|
299
|
+
*/
|
|
300
|
+
function bucketVersion(ver) {
|
|
301
|
+
if (ver == null) return '0-79';
|
|
302
|
+
if (ver < 80) return '0-79';
|
|
303
|
+
if (ver < 100) return '80-99';
|
|
304
|
+
if (ver < 120) return '100-119';
|
|
305
|
+
if (ver < 140) return '120-139';
|
|
306
|
+
return '140+';
|
|
307
|
+
}
|
|
308
|
+
|
|
309
|
+
// ── §6.3.1 extractUAFeatures ──────────────────────────────────────────────
|
|
310
|
+
/**
|
|
311
|
+
* Parse a User-Agent string into an SF-Dictionary of derived features.
|
|
312
|
+
*
|
|
313
|
+
* @param {string|null|undefined} userAgent Raw User-Agent string
|
|
314
|
+
* @returns {string|null} SF-Dictionary string or null if absent/empty
|
|
315
|
+
*/
|
|
316
|
+
export function extractUAFeatures(userAgent) {
|
|
317
|
+
if (!userAgent) return null;
|
|
318
|
+
const ua = userAgent.trim();
|
|
319
|
+
if (ua === '') return null;
|
|
320
|
+
|
|
321
|
+
const device = detectDevice(ua);
|
|
322
|
+
const platform = detectPlatform(ua);
|
|
323
|
+
const family = detectFamily(ua);
|
|
324
|
+
const ver = bucketVersion(extractMajorVersion(ua));
|
|
325
|
+
|
|
326
|
+
const parts = [`dpf=${device}/${platform}/${family}`, `ver=${ver}`];
|
|
327
|
+
|
|
328
|
+
if (/^Mozilla\//i.test(ua)) parts.push('browser');
|
|
329
|
+
|
|
330
|
+
if (HEADLESS_MARKERS.some(re => re.test(ua))) parts.push('headless');
|
|
331
|
+
if (AUTOMATION_MARKERS.some(re => re.test(ua))) parts.push('automation');
|
|
332
|
+
|
|
333
|
+
parts.push(`entropy=${computeUAEntropy(ua)}`);
|
|
334
|
+
|
|
335
|
+
return parts.join(', ');
|
|
336
|
+
}
|
|
337
|
+
|
|
338
|
+
// ── §6.3.2 computeUAHMAC ─────────────────────────────────────────────────
|
|
339
|
+
/**
|
|
340
|
+
* Compute HMAC-SHA256 of the raw User-Agent, returned as an RFC 8941
|
|
341
|
+
* Byte Sequence string (:base64:).
|
|
342
|
+
*
|
|
343
|
+
* Uses crypto.subtle — compatible with Cloudflare Workers and modern Node.
|
|
344
|
+
*
|
|
345
|
+
* @param {string} userAgent Raw User-Agent string
|
|
346
|
+
* @param {string} hmacKey HMAC secret key (plain text)
|
|
347
|
+
* @returns {Promise<string|null>} RFC 8941 Byte Sequence or null if inputs missing
|
|
348
|
+
*/
|
|
349
|
+
export async function computeUAHMAC(userAgent, hmacKey) {
|
|
350
|
+
if (!userAgent || !hmacKey) return null;
|
|
351
|
+
|
|
352
|
+
const enc = new TextEncoder();
|
|
353
|
+
const key = await crypto.subtle.importKey(
|
|
354
|
+
'raw', enc.encode(hmacKey),
|
|
355
|
+
{ name: 'HMAC', hash: 'SHA-256' },
|
|
356
|
+
false, ['sign']
|
|
357
|
+
);
|
|
358
|
+
const sig = await crypto.subtle.sign('HMAC', key, enc.encode(userAgent));
|
|
359
|
+
const b64 = btoa(String.fromCharCode(...new Uint8Array(sig)));
|
|
360
|
+
return `:${b64}:`;
|
|
361
|
+
}
|
|
362
|
+
|
|
363
|
+
// ── §6.4 computeConfidenceToken ───────────────────────────────────────────
|
|
364
|
+
/**
|
|
365
|
+
* Compute the confidence token.
|
|
366
|
+
* ct = SHA-256(userAgent + acceptLanguage + secChUA)[0:8] hex
|
|
367
|
+
*
|
|
368
|
+
* Matches the logic in cloud-api computeConfidenceFingerprint().
|
|
369
|
+
*
|
|
370
|
+
* @param {string|null|undefined} userAgent Raw User-Agent
|
|
371
|
+
* @param {string|null|undefined} acceptLanguage Raw Accept-Language
|
|
372
|
+
* @param {string|null|undefined} secChUA Raw Sec-CH-UA
|
|
373
|
+
* @returns {Promise<string>} 8-char hex token, never null
|
|
374
|
+
*/
|
|
375
|
+
export async function computeConfidenceToken(userAgent, acceptLanguage, secChUA) {
|
|
376
|
+
const ua = userAgent || '';
|
|
377
|
+
const lang = acceptLanguage || '';
|
|
378
|
+
const ch = secChUA || '';
|
|
379
|
+
|
|
380
|
+
const msgBuffer = new TextEncoder().encode(ua + lang + ch);
|
|
381
|
+
const hashBuffer = await crypto.subtle.digest('SHA-256', msgBuffer);
|
|
382
|
+
const hashArray = Array.from(new Uint8Array(hashBuffer));
|
|
383
|
+
const hex = hashArray.map(b => b.toString(16).padStart(2, '0')).join('');
|
|
384
|
+
return hex.slice(0, 8);
|
|
385
|
+
}
|