@paywalls-net/filter 1.3.8 → 1.3.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/jest.config.js +7 -0
- package/package.json +6 -2
- package/src/index.js +67 -32
- package/src/signal-extraction.js +385 -0
- package/src/user-agent-classification.js +2 -0
- package/tests/proxy-vai-request.test.js +379 -0
- package/tests/signal-extraction.test.js +624 -0
package/jest.config.js
ADDED
package/package.json
CHANGED
|
@@ -3,7 +3,7 @@
|
|
|
3
3
|
"description": "Client SDK for integrating paywalls.net bot filtering and authorization services into your server or CDN.",
|
|
4
4
|
"author": "paywalls.net",
|
|
5
5
|
"license": "MIT",
|
|
6
|
-
"version": "1.3.
|
|
6
|
+
"version": "1.3.10",
|
|
7
7
|
"publishConfig": {
|
|
8
8
|
"access": "public"
|
|
9
9
|
},
|
|
@@ -17,9 +17,13 @@
|
|
|
17
17
|
".": "./src/index.js"
|
|
18
18
|
},
|
|
19
19
|
"scripts": {
|
|
20
|
-
"test": "
|
|
20
|
+
"test": "node --experimental-vm-modules node_modules/.bin/jest --runInBand",
|
|
21
|
+
"test:watch": "node --experimental-vm-modules node_modules/.bin/jest --watch"
|
|
21
22
|
},
|
|
22
23
|
"dependencies": {
|
|
23
24
|
"ua-parser-js": "^2.0.4"
|
|
25
|
+
},
|
|
26
|
+
"devDependencies": {
|
|
27
|
+
"jest": "^30.2.0"
|
|
24
28
|
}
|
|
25
29
|
}
|
package/src/index.js
CHANGED
|
@@ -4,6 +4,11 @@
|
|
|
4
4
|
*/
|
|
5
5
|
const sdk_version = "1.2.x";
|
|
6
6
|
import { classifyUserAgent, loadAgentPatterns } from './user-agent-classification.js';
|
|
7
|
+
import {
|
|
8
|
+
extractAcceptFeatures, extractEncodingFeatures, extractLanguageFeatures,
|
|
9
|
+
extractNetFeatures, extractCHFeatures, extractUAFeatures,
|
|
10
|
+
computeUAHMAC, computeConfidenceToken,
|
|
11
|
+
} from './signal-extraction.js';
|
|
7
12
|
|
|
8
13
|
const PAYWALLS_CLOUD_API_HOST = "https://cloud-api.paywalls.net";
|
|
9
14
|
|
|
@@ -77,6 +82,27 @@ function isVAIRequest(request, vaiPath = '/pw') {
|
|
|
77
82
|
}
|
|
78
83
|
}
|
|
79
84
|
|
|
85
|
+
/**
|
|
86
|
+
* Clean 1:1 header forwarding map for operational proxy headers (§7.2).
|
|
87
|
+
* Each entry: { src: string (lowercase incoming), dest: string (outgoing) }
|
|
88
|
+
* Headers with fallback logic or multi-source derivation are handled separately.
|
|
89
|
+
*/
|
|
90
|
+
const PROXY_HEADER_MAP = [
|
|
91
|
+
{ src: 'host', dest: 'X-Original-Host' }, // publisher hostname for domain binding
|
|
92
|
+
{ src: 'origin', dest: 'X-Forwarded-Origin' }, // relay for CORS evaluation (§5, §7.2)
|
|
93
|
+
{ src: 'access-control-request-method', dest: 'Access-Control-Request-Method' }, // preflight (§5.4)
|
|
94
|
+
{ src: 'access-control-request-headers',dest: 'Access-Control-Request-Headers' }, // preflight (§5.4)
|
|
95
|
+
{ src: 'cookie', dest: 'Cookie' }, // session/identity context
|
|
96
|
+
];
|
|
97
|
+
|
|
98
|
+
/**
|
|
99
|
+
* Set a header only if the value is non-null (extractor returns null for
|
|
100
|
+
* absent inputs → header omitted entirely, not sent as empty string).
|
|
101
|
+
*/
|
|
102
|
+
function setIfPresent(obj, key, value) {
|
|
103
|
+
if (value != null) obj[key] = value;
|
|
104
|
+
}
|
|
105
|
+
|
|
80
106
|
/**
|
|
81
107
|
* Proxy VAI requests to the cloud-api service (Spec §7).
|
|
82
108
|
*
|
|
@@ -92,6 +118,10 @@ function isVAIRequest(request, vaiPath = '/pw') {
|
|
|
92
118
|
* - User-Agent, X-Forwarded-For: standard proxy headers
|
|
93
119
|
* - Authorization: publisher API key (§7.4)
|
|
94
120
|
*
|
|
121
|
+
* Human-confidence signal forwarding (§7.2):
|
|
122
|
+
* Uses signal-extraction module to transform raw browser headers into compact
|
|
123
|
+
* RFC 8941 Structured Field Value strings. Absent inputs → null → header omitted.
|
|
124
|
+
*
|
|
95
125
|
* Response passthrough (§7.3):
|
|
96
126
|
* All response headers from cloud-api are returned unchanged — including
|
|
97
127
|
* Access-Control-*, Vary, Cache-Control. The proxy never injects or
|
|
@@ -114,44 +144,46 @@ async function proxyVAIRequest(cfg, request) {
|
|
|
114
144
|
// Build forwarding headers — include everything cloud-api needs
|
|
115
145
|
// for CORS evaluation, domain auth, and request context.
|
|
116
146
|
const forwardHeaders = {
|
|
117
|
-
'User-Agent':
|
|
147
|
+
'User-Agent': sdkUserAgent,
|
|
118
148
|
'Authorization': `Bearer ${cfg.paywallsAPIKey}`
|
|
119
149
|
};
|
|
120
150
|
|
|
121
|
-
// Client IP forwarding
|
|
151
|
+
// Client IP forwarding — dual-source, so handled explicitly
|
|
122
152
|
if (headers['x-forwarded-for']) {
|
|
123
153
|
forwardHeaders['X-Forwarded-For'] = headers['x-forwarded-for'];
|
|
124
154
|
} else if (headers['cf-connecting-ip']) {
|
|
125
155
|
forwardHeaders['X-Forwarded-For'] = headers['cf-connecting-ip'];
|
|
126
156
|
}
|
|
127
|
-
|
|
128
|
-
//
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
forwardHeaders['X-Original-Host'] = headers['host'];
|
|
132
|
-
}
|
|
133
|
-
|
|
134
|
-
// Forward browser Origin via custom header for CORS evaluation (§5, §7.2).
|
|
135
|
-
// Cloudflare Workers runtime controls the outbound Origin header on fetch(),
|
|
136
|
-
// so we relay the browser's Origin via X-Forwarded-Origin. Cloud-api's
|
|
137
|
-
// evaluateCORS() reads this to make the authoritative CORS decision.
|
|
138
|
-
if (headers['origin']) {
|
|
139
|
-
forwardHeaders['X-Forwarded-Origin'] = headers['origin'];
|
|
140
|
-
}
|
|
141
|
-
|
|
142
|
-
// Forward preflight headers so cloud-api can evaluate OPTIONS (§5.4, §7.2)
|
|
143
|
-
if (headers['access-control-request-method']) {
|
|
144
|
-
forwardHeaders['Access-Control-Request-Method'] = headers['access-control-request-method'];
|
|
145
|
-
}
|
|
146
|
-
if (headers['access-control-request-headers']) {
|
|
147
|
-
forwardHeaders['Access-Control-Request-Headers'] = headers['access-control-request-headers'];
|
|
148
|
-
}
|
|
149
|
-
|
|
150
|
-
// Forward cookies for session/identity context (§7.2)
|
|
151
|
-
if (headers['cookie']) {
|
|
152
|
-
forwardHeaders['Cookie'] = headers['cookie'];
|
|
157
|
+
|
|
158
|
+
// Clean 1:1 operational header forwarding (§7.2)
|
|
159
|
+
for (const { src, dest } of PROXY_HEADER_MAP) {
|
|
160
|
+
if (headers[src]) forwardHeaders[dest] = headers[src];
|
|
153
161
|
}
|
|
154
|
-
|
|
162
|
+
|
|
163
|
+
// Signal protocol version (§7.1)
|
|
164
|
+
forwardHeaders['X-PW-V'] = '2';
|
|
165
|
+
|
|
166
|
+
const cf = request.cf || {};
|
|
167
|
+
|
|
168
|
+
// Tier 1: kept raw (§6.1)
|
|
169
|
+
setIfPresent(forwardHeaders, 'X-PW-Sec-Fetch-Dest', headers['sec-fetch-dest']);
|
|
170
|
+
setIfPresent(forwardHeaders, 'X-PW-Sec-Fetch-Mode', headers['sec-fetch-mode']);
|
|
171
|
+
setIfPresent(forwardHeaders, 'X-PW-Sec-Fetch-Site', headers['sec-fetch-site']);
|
|
172
|
+
setIfPresent(forwardHeaders, 'X-PW-TLS-Version', cf.tlsVersion != null ? String(cf.tlsVersion) : null);
|
|
173
|
+
setIfPresent(forwardHeaders, 'X-PW-HTTP-Protocol', cf.httpProtocol != null ? String(cf.httpProtocol) : null);
|
|
174
|
+
|
|
175
|
+
// Tier 2: extract features (§6.2)
|
|
176
|
+
setIfPresent(forwardHeaders, 'X-PW-Accept', extractAcceptFeatures(headers['accept']));
|
|
177
|
+
setIfPresent(forwardHeaders, 'X-PW-Enc', extractEncodingFeatures(headers['accept-encoding']));
|
|
178
|
+
setIfPresent(forwardHeaders, 'X-PW-Lang', extractLanguageFeatures(headers['accept-language']));
|
|
179
|
+
setIfPresent(forwardHeaders, 'X-PW-Net', extractNetFeatures(cf.asn));
|
|
180
|
+
setIfPresent(forwardHeaders, 'X-PW-CH', extractCHFeatures(headers['sec-ch-ua'], headers['user-agent']));
|
|
181
|
+
|
|
182
|
+
// Tier 3: UA features + HMAC (§6.3)
|
|
183
|
+
setIfPresent(forwardHeaders, 'X-PW-UA', extractUAFeatures(headers['user-agent']));
|
|
184
|
+
setIfPresent(forwardHeaders, 'X-PW-UA-HMAC', await computeUAHMAC(headers['user-agent'], cfg.vaiUAHmacKey));
|
|
185
|
+
setIfPresent(forwardHeaders, 'X-PW-CT-FP', await computeConfidenceToken(headers['user-agent'], headers['accept-language'], headers['sec-ch-ua']));
|
|
186
|
+
|
|
155
187
|
// Forward request to cloud-api
|
|
156
188
|
const response = await fetch(`${cfg.paywallsAPIHost}${cloudApiPath}`, {
|
|
157
189
|
method: request.method || 'GET',
|
|
@@ -387,7 +419,8 @@ async function cloudflare(config = null) {
|
|
|
387
419
|
paywallsAPIHost: env.PAYWALLS_CLOUD_API_HOST || PAYWALLS_CLOUD_API_HOST,
|
|
388
420
|
paywallsAPIKey: env.PAYWALLS_CLOUD_API_KEY,
|
|
389
421
|
paywallsPublisherId: env.PAYWALLS_PUBLISHER_ID,
|
|
390
|
-
vaiPath: env.PAYWALLS_VAI_PATH || '/pw'
|
|
422
|
+
vaiPath: env.PAYWALLS_VAI_PATH || '/pw',
|
|
423
|
+
vaiUAHmacKey: env.VAI_UA_HMAC_KEY || null,
|
|
391
424
|
};
|
|
392
425
|
|
|
393
426
|
// Check if this is a VAI endpoint request and proxy it
|
|
@@ -419,7 +452,8 @@ async function fastly() {
|
|
|
419
452
|
paywallsAPIHost: config.get('PAYWALLS_CLOUD_API_HOST') || PAYWALLS_CLOUD_API_HOST,
|
|
420
453
|
paywallsAPIKey: config.get('PAYWALLS_API_KEY'),
|
|
421
454
|
paywallsPublisherId: config.get('PAYWALLS_PUBLISHER_ID'),
|
|
422
|
-
vaiPath: config.get('PAYWALLS_VAI_PATH') || '/pw'
|
|
455
|
+
vaiPath: config.get('PAYWALLS_VAI_PATH') || '/pw',
|
|
456
|
+
vaiUAHmacKey: config.get('VAI_UA_HMAC_KEY') || null,
|
|
423
457
|
};
|
|
424
458
|
|
|
425
459
|
// Check if this is a VAI endpoint request and proxy it
|
|
@@ -501,7 +535,8 @@ async function cloudfront(config) {
|
|
|
501
535
|
paywallsAPIHost: config.PAYWALLS_CLOUD_API_HOST || PAYWALLS_CLOUD_API_HOST,
|
|
502
536
|
paywallsAPIKey: config.PAYWALLS_API_KEY,
|
|
503
537
|
paywallsPublisherId: config.PAYWALLS_PUBLISHER_ID,
|
|
504
|
-
vaiPath: config.PAYWALLS_VAI_PATH || '/pw'
|
|
538
|
+
vaiPath: config.PAYWALLS_VAI_PATH || '/pw',
|
|
539
|
+
vaiUAHmacKey: config.VAI_UA_HMAC_KEY || null,
|
|
505
540
|
};
|
|
506
541
|
await loadAgentPatterns(paywallsConfig);
|
|
507
542
|
|
|
@@ -0,0 +1,385 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Signal Extraction Module — Tier 2 + Tier 3 feature extractors
|
|
3
|
+
*
|
|
4
|
+
* Transforms raw browser headers into compact RFC 8941 Structured Field
|
|
5
|
+
* Dictionary strings for privacy-preserving VAI signal forwarding.
|
|
6
|
+
*
|
|
7
|
+
* Spec: specs/vai-privacy-v2.spec.md §6.2–§6.4
|
|
8
|
+
*
|
|
9
|
+
* Each function returns an SF-Dictionary string (e.g. "html, wildcard")
|
|
10
|
+
* or null if the input is absent/empty. null means the caller should
|
|
11
|
+
* omit the header entirely (not send an empty value).
|
|
12
|
+
*/
|
|
13
|
+
|
|
14
|
+
// ── §6.2.4 / Appendix A: Data-center ASN set ──────────────────────────────
|
|
15
|
+
// Comprehensive cloud/hosting provider ASNs for DC classification.
|
|
16
|
+
// Kept in sync with cloud-api DC_ASN_LIST (cloudflare/vai.js).
|
|
17
|
+
// Source: public ASN registries (PeeringDB, RIPE, ARIN).
|
|
18
|
+
const DC_ASN_SET = new Set([
|
|
19
|
+
// ── Major IaaS ───────────────────────────────────────────────────────────
|
|
20
|
+
16509, 14618, // Amazon AWS (primary + secondary)
|
|
21
|
+
396982, 36492, 15169, // Google Cloud + Google infra
|
|
22
|
+
8075, 8069, 8068, // Microsoft Azure
|
|
23
|
+
31898, // Oracle Cloud
|
|
24
|
+
36351, // IBM Cloud / SoftLayer
|
|
25
|
+
45102, // Alibaba Cloud
|
|
26
|
+
132203, // Tencent Cloud
|
|
27
|
+
|
|
28
|
+
// ── VPS / Hosting ────────────────────────────────────────────────────────
|
|
29
|
+
14061, // DigitalOcean
|
|
30
|
+
24940, 213230, // Hetzner (dedicated + cloud)
|
|
31
|
+
16276, // OVH
|
|
32
|
+
63949, // Linode / Akamai Connected Cloud
|
|
33
|
+
20473, // Vultr / The Constant Company
|
|
34
|
+
12876, // Scaleway
|
|
35
|
+
51167, // Contabo
|
|
36
|
+
60781, 28753, // Leaseweb (NL + global)
|
|
37
|
+
]);
|
|
38
|
+
|
|
39
|
+
// ── §6.2.1 Accept → X-PW-Accept ──────────────────────────────────────────
|
|
40
|
+
/**
|
|
41
|
+
* Extract boolean feature flags from the Accept header.
|
|
42
|
+
*
|
|
43
|
+
* @param {string|null|undefined} accept Raw Accept header value
|
|
44
|
+
* @returns {string|null} SF-Dictionary string or null if absent/empty
|
|
45
|
+
*/
|
|
46
|
+
export function extractAcceptFeatures(accept) {
|
|
47
|
+
if (!accept) return null;
|
|
48
|
+
|
|
49
|
+
const parts = [];
|
|
50
|
+
if (accept.includes('text/html')) parts.push('html');
|
|
51
|
+
if (accept.includes('*/*')) parts.push('wildcard');
|
|
52
|
+
if (accept.includes('application/json')) parts.push('json');
|
|
53
|
+
if (accept.includes('image/')) parts.push('image');
|
|
54
|
+
|
|
55
|
+
return parts.length > 0 ? parts.join(', ') : null;
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
// ── §6.2.2 Accept-Encoding → X-PW-Enc ────────────────────────────────────
|
|
59
|
+
/**
|
|
60
|
+
* Extract boolean feature flags from the Accept-Encoding header.
|
|
61
|
+
*
|
|
62
|
+
* @param {string|null|undefined} acceptEncoding Raw Accept-Encoding value
|
|
63
|
+
* @returns {string|null} SF-Dictionary string or null if absent/empty
|
|
64
|
+
*/
|
|
65
|
+
export function extractEncodingFeatures(acceptEncoding) {
|
|
66
|
+
if (!acceptEncoding) return null;
|
|
67
|
+
|
|
68
|
+
const parts = [];
|
|
69
|
+
const hasBr = acceptEncoding.includes('br');
|
|
70
|
+
const hasGzip = acceptEncoding.includes('gzip');
|
|
71
|
+
|
|
72
|
+
if (hasBr) parts.push('br');
|
|
73
|
+
if (hasGzip) parts.push('gzip');
|
|
74
|
+
if (hasBr && hasGzip) parts.push('modern');
|
|
75
|
+
|
|
76
|
+
return parts.length > 0 ? parts.join(', ') : null;
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
// ── §6.2.3 Accept-Language → X-PW-Lang ───────────────────────────────────
|
|
80
|
+
/**
|
|
81
|
+
* Extract presence, primary language family, and locale count from
|
|
82
|
+
* the Accept-Language header.
|
|
83
|
+
*
|
|
84
|
+
* @param {string|null|undefined} acceptLanguage Raw Accept-Language value
|
|
85
|
+
* @returns {string|null} SF-Dictionary string or null if absent/empty
|
|
86
|
+
*/
|
|
87
|
+
export function extractLanguageFeatures(acceptLanguage) {
|
|
88
|
+
if (!acceptLanguage) return null;
|
|
89
|
+
|
|
90
|
+
const trimmed = acceptLanguage.trim();
|
|
91
|
+
if (trimmed === '' || trimmed === '*') return null;
|
|
92
|
+
|
|
93
|
+
// Split on comma to count locales, ignoring quality values
|
|
94
|
+
const locales = trimmed.split(',').map(s => s.trim().split(';')[0].trim()).filter(Boolean);
|
|
95
|
+
const count = locales.length;
|
|
96
|
+
if (count === 0) return null;
|
|
97
|
+
|
|
98
|
+
// Primary language family = first 2 chars of first locale (lowercase)
|
|
99
|
+
const first = locales[0].toLowerCase();
|
|
100
|
+
const primary = first.length >= 2 ? first.slice(0, 2) : first;
|
|
101
|
+
|
|
102
|
+
const parts = ['present', `primary=${primary}`, `count=${count}`];
|
|
103
|
+
return parts.join(', ');
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
// ── §6.2.4 ASN → X-PW-Net ────────────────────────────────────────────────
|
|
107
|
+
/**
|
|
108
|
+
* Classify an ASN into a named enum category.
|
|
109
|
+
*
|
|
110
|
+
* @param {string|number|null|undefined} asn Numeric ASN value
|
|
111
|
+
* @returns {string|null} SF-Dictionary string or null if absent/empty
|
|
112
|
+
*/
|
|
113
|
+
export function extractNetFeatures(asn) {
|
|
114
|
+
if (asn == null || asn === '') return null;
|
|
115
|
+
|
|
116
|
+
const num = typeof asn === 'number' ? asn : parseInt(asn, 10);
|
|
117
|
+
if (isNaN(num)) return null;
|
|
118
|
+
|
|
119
|
+
const category = DC_ASN_SET.has(num) ? 'cloud' : 'consumer';
|
|
120
|
+
return `asn=${category}`;
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
// ── §6.2.5 Sec-CH-UA → X-PW-CH ───────────────────────────────────────────
|
|
124
|
+
|
|
125
|
+
/**
|
|
126
|
+
* Extract Chrome version from a Sec-CH-UA header value.
|
|
127
|
+
* Looks for "Chromium" or "Google Chrome" brand and returns the major version.
|
|
128
|
+
*
|
|
129
|
+
* @param {string} secChUA Raw Sec-CH-UA header
|
|
130
|
+
* @returns {number|null} Major Chrome version or null
|
|
131
|
+
*/
|
|
132
|
+
function extractChromeVersionFromCH(secChUA) {
|
|
133
|
+
// Sec-CH-UA format: "Brand";v="version", "Brand";v="version", ...
|
|
134
|
+
const match = secChUA.match(/"(?:Google Chrome|Chromium)";v="(\d+)"/);
|
|
135
|
+
return match ? parseInt(match[1], 10) : null;
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
/**
|
|
139
|
+
* Extract Chrome version from a User-Agent string.
|
|
140
|
+
*
|
|
141
|
+
* @param {string} userAgent Raw User-Agent string
|
|
142
|
+
* @returns {number|null} Major Chrome version or null
|
|
143
|
+
*/
|
|
144
|
+
function extractChromeVersionFromUA(userAgent) {
|
|
145
|
+
// UA format: ...Chrome/134.0.0.0...
|
|
146
|
+
const match = userAgent.match(/Chrome\/(\d+)/);
|
|
147
|
+
return match ? parseInt(match[1], 10) : null;
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
/**
|
|
151
|
+
* Extract features from Sec-CH-UA header, cross-referenced with User-Agent
|
|
152
|
+
* for the consistency check.
|
|
153
|
+
*
|
|
154
|
+
* @param {string|null|undefined} secChUA Raw Sec-CH-UA header value
|
|
155
|
+
* @param {string|null|undefined} userAgent Raw User-Agent string (for consistency check)
|
|
156
|
+
* @returns {string|null} SF-Dictionary string or null if CH absent/empty
|
|
157
|
+
*/
|
|
158
|
+
export function extractCHFeatures(secChUA, userAgent) {
|
|
159
|
+
if (!secChUA) return null;
|
|
160
|
+
|
|
161
|
+
const trimmed = secChUA.trim();
|
|
162
|
+
if (trimmed === '') return null;
|
|
163
|
+
|
|
164
|
+
const parts = ['present'];
|
|
165
|
+
|
|
166
|
+
// Count brand entries: each is a quoted string followed by ;v="..."
|
|
167
|
+
// Split on comma to count entries
|
|
168
|
+
const brands = trimmed.split(',').map(s => s.trim()).filter(Boolean);
|
|
169
|
+
parts.push(`brands=${brands.length}`);
|
|
170
|
+
|
|
171
|
+
// GREASE detection: Chromium convention includes a "Not" brand
|
|
172
|
+
const hasGrease = brands.some(b => /not[^"]*brand/i.test(b) || /not[:\-_.]/i.test(b));
|
|
173
|
+
if (hasGrease) parts.push('grease');
|
|
174
|
+
|
|
175
|
+
// Consistency check: Chrome version in CH matches Chrome version in UA
|
|
176
|
+
if (userAgent) {
|
|
177
|
+
const chVersion = extractChromeVersionFromCH(trimmed);
|
|
178
|
+
const uaVersion = extractChromeVersionFromUA(userAgent);
|
|
179
|
+
if (chVersion != null && uaVersion != null && chVersion === uaVersion) {
|
|
180
|
+
parts.push('consistent');
|
|
181
|
+
}
|
|
182
|
+
}
|
|
183
|
+
|
|
184
|
+
return parts.join(', ');
|
|
185
|
+
}
|
|
186
|
+
|
|
187
|
+
// ═══════════════════════════════════════════════════════════════════════════
|
|
188
|
+
// Tier 3 — Replace User-Agent with derived features (§6.3) + CT (§6.4)
|
|
189
|
+
// ═══════════════════════════════════════════════════════════════════════════
|
|
190
|
+
|
|
191
|
+
// ── §6.3.3 Automation marker detection ────────────────────────────────────
|
|
192
|
+
// HeadlessChrome triggers 'headless' only (via HEADLESS_MARKERS).
|
|
193
|
+
// Explicit automation tools (Puppeteer, Selenium, etc.) trigger 'automation'.
|
|
194
|
+
const AUTOMATION_MARKERS = [
|
|
195
|
+
/Puppeteer/i, /Playwright/i, /Selenium/i, /WebDriver/i,
|
|
196
|
+
/PhantomJS/i, /CasperJS/i,
|
|
197
|
+
/python-requests/i, /python-urllib/i, /Go-http-client/i,
|
|
198
|
+
/okhttp/i, /Apache-HttpClient/i, /libcurl/i,
|
|
199
|
+
/\bcurl\//i, /\bwget\//i, /HTTPie/i,
|
|
200
|
+
/node-fetch/i, /undici/i, /axios\//i, /\bgot\//i, /superagent/i,
|
|
201
|
+
/Cypress/i, /TestCafe/i, /Nightwatch/i, /WebdriverIO/i,
|
|
202
|
+
];
|
|
203
|
+
|
|
204
|
+
const HEADLESS_MARKERS = [/HeadlessChrome/i, /\bHeadless\b/i];
|
|
205
|
+
|
|
206
|
+
// ── §6.3.4 Entropy bucketing ──────────────────────────────────────────────
|
|
207
|
+
/**
|
|
208
|
+
* Bucket a User-Agent string's structural complexity.
|
|
209
|
+
* @param {string} userAgent
|
|
210
|
+
* @returns {'low'|'medium'|'high'}
|
|
211
|
+
*/
|
|
212
|
+
function computeUAEntropy(userAgent) {
|
|
213
|
+
if (!userAgent || userAgent.length < 10) return 'low';
|
|
214
|
+
|
|
215
|
+
const hasUpper = /[A-Z]/.test(userAgent);
|
|
216
|
+
const hasLower = /[a-z]/.test(userAgent);
|
|
217
|
+
const hasDigit = /\d/.test(userAgent);
|
|
218
|
+
const hasSpecial = /[\/\.;()\s,_\-]/.test(userAgent);
|
|
219
|
+
const classCount = [hasUpper, hasLower, hasDigit, hasSpecial].filter(Boolean).length;
|
|
220
|
+
|
|
221
|
+
const len = userAgent.length;
|
|
222
|
+
const hasParens = /\([^)]+\)/.test(userAgent);
|
|
223
|
+
|
|
224
|
+
// Typical browser UA: 60-250 chars, 4 char classes, has parens
|
|
225
|
+
if (classCount >= 4 && len >= 60 && len <= 250 && hasParens) return 'medium';
|
|
226
|
+
if (classCount >= 3 && len >= 40 && len <= 300) return 'medium';
|
|
227
|
+
|
|
228
|
+
// Very short, very long, or missing structure
|
|
229
|
+
if (len < 40 || len > 300 || classCount < 3) return 'low';
|
|
230
|
+
|
|
231
|
+
// Unusual: high-entropy random strings
|
|
232
|
+
const uniqueChars = new Set(userAgent).size;
|
|
233
|
+
if (uniqueChars / len > 0.7) return 'high';
|
|
234
|
+
|
|
235
|
+
return 'medium';
|
|
236
|
+
}
|
|
237
|
+
|
|
238
|
+
// ── §6.3.1 UA dpf/version parsing ─────────────────────────────────────────
|
|
239
|
+
|
|
240
|
+
/** @returns {'desktop'|'mobile'|'tablet'|'server'|'unknown'} */
|
|
241
|
+
function detectDevice(ua) {
|
|
242
|
+
if (/\b(iPad|Tablet|PlayBook|Silk|Kindle)\b/i.test(ua)) return 'tablet';
|
|
243
|
+
if (/\b(iPhone|iPod|Android.*Mobile|Mobile.*Android|webOS|BlackBerry|Opera Mini|IEMobile|Windows Phone)\b/i.test(ua)) return 'mobile';
|
|
244
|
+
if (/\b(Android)\b/i.test(ua) && !/Mobile/i.test(ua)) return 'tablet';
|
|
245
|
+
if (/\b(Macintosh|Windows NT|X11|Linux(?!.*Android))\b/i.test(ua)) return 'desktop';
|
|
246
|
+
if (/\b(Googlebot|bingbot|Baiduspider|YandexBot|DuckDuckBot)\b/i.test(ua)) return 'server';
|
|
247
|
+
return 'unknown';
|
|
248
|
+
}
|
|
249
|
+
|
|
250
|
+
/** @returns {'windows'|'mac'|'ios'|'android'|'linux'|'other'} */
|
|
251
|
+
function detectPlatform(ua) {
|
|
252
|
+
if (/\b(iPhone|iPad|iPod)\b/i.test(ua)) return 'ios';
|
|
253
|
+
if (/\bAndroid\b/i.test(ua)) return 'android';
|
|
254
|
+
if (/\bMacintosh\b/i.test(ua)) return 'mac';
|
|
255
|
+
if (/\bWindows\b/i.test(ua)) return 'windows';
|
|
256
|
+
if (/\bLinux\b/i.test(ua) || /\bX11\b/i.test(ua)) return 'linux';
|
|
257
|
+
return 'other';
|
|
258
|
+
}
|
|
259
|
+
|
|
260
|
+
/** @returns {'chrome'|'safari'|'firefox'|'edge'|'other'|'bot'} */
|
|
261
|
+
function detectFamily(ua) {
|
|
262
|
+
if (/\b(Googlebot|bingbot|Baiduspider|YandexBot|DuckDuckBot|Slurp|ia_archiver)\b/i.test(ua)) return 'bot';
|
|
263
|
+
// Order matters: Edge before Chrome (Edge UA contains "Chrome")
|
|
264
|
+
if (/\bEdg(?:e|A)?\/\d/i.test(ua)) return 'edge';
|
|
265
|
+
if (/\bFirefox\//i.test(ua)) return 'firefox';
|
|
266
|
+
// Safari check: has "Safari/" but NOT "Chrome/" or "Chromium/" or "HeadlessChrome/"
|
|
267
|
+
if (/\bSafari\//i.test(ua) && !/Chrome|Chromium|HeadlessChrome/i.test(ua)) return 'safari';
|
|
268
|
+
if (/(?:\b|Headless)Chrom(?:e|ium)\//i.test(ua)) return 'chrome';
|
|
269
|
+
return 'other';
|
|
270
|
+
}
|
|
271
|
+
|
|
272
|
+
/**
|
|
273
|
+
* Extract major browser version from a User-Agent string.
|
|
274
|
+
* @param {string} ua
|
|
275
|
+
* @returns {number|null}
|
|
276
|
+
*/
|
|
277
|
+
function extractMajorVersion(ua) {
|
|
278
|
+
// Try common version patterns in order of specificity
|
|
279
|
+
let m = ua.match(/\bEdg(?:e|A)?\/(\d+)/);
|
|
280
|
+
if (m) return parseInt(m[1], 10);
|
|
281
|
+
m = ua.match(/\bFirefox\/(\d+)/);
|
|
282
|
+
if (m) return parseInt(m[1], 10);
|
|
283
|
+
// Chrome / Chromium / HeadlessChrome
|
|
284
|
+
m = ua.match(/(?:\b|Headless)Chrom(?:e|ium)\/(\d+)/);
|
|
285
|
+
if (m) return parseInt(m[1], 10);
|
|
286
|
+
// Safari: Version/17.x (not the Safari/605 build number)
|
|
287
|
+
m = ua.match(/\bVersion\/(\d+)/);
|
|
288
|
+
if (m) return parseInt(m[1], 10);
|
|
289
|
+
// Generic: first thing/number pattern
|
|
290
|
+
m = ua.match(/\/(\d+)/);
|
|
291
|
+
if (m) return parseInt(m[1], 10);
|
|
292
|
+
return null;
|
|
293
|
+
}
|
|
294
|
+
|
|
295
|
+
/**
|
|
296
|
+
* Bucket a major version number into a range token.
|
|
297
|
+
* @param {number|null} ver
|
|
298
|
+
* @returns {string}
|
|
299
|
+
*/
|
|
300
|
+
function bucketVersion(ver) {
|
|
301
|
+
if (ver == null) return '0-79';
|
|
302
|
+
if (ver < 80) return '0-79';
|
|
303
|
+
if (ver < 100) return '80-99';
|
|
304
|
+
if (ver < 120) return '100-119';
|
|
305
|
+
if (ver < 140) return '120-139';
|
|
306
|
+
return '140+';
|
|
307
|
+
}
|
|
308
|
+
|
|
309
|
+
// ── §6.3.1 extractUAFeatures ──────────────────────────────────────────────
|
|
310
|
+
/**
|
|
311
|
+
* Parse a User-Agent string into an SF-Dictionary of derived features.
|
|
312
|
+
*
|
|
313
|
+
* @param {string|null|undefined} userAgent Raw User-Agent string
|
|
314
|
+
* @returns {string|null} SF-Dictionary string or null if absent/empty
|
|
315
|
+
*/
|
|
316
|
+
export function extractUAFeatures(userAgent) {
|
|
317
|
+
if (!userAgent) return null;
|
|
318
|
+
const ua = userAgent.trim();
|
|
319
|
+
if (ua === '') return null;
|
|
320
|
+
|
|
321
|
+
const device = detectDevice(ua);
|
|
322
|
+
const platform = detectPlatform(ua);
|
|
323
|
+
const family = detectFamily(ua);
|
|
324
|
+
const ver = bucketVersion(extractMajorVersion(ua));
|
|
325
|
+
|
|
326
|
+
const parts = [`dpf=${device}/${platform}/${family}`, `ver=${ver}`];
|
|
327
|
+
|
|
328
|
+
if (/^Mozilla\//i.test(ua)) parts.push('browser');
|
|
329
|
+
|
|
330
|
+
if (HEADLESS_MARKERS.some(re => re.test(ua))) parts.push('headless');
|
|
331
|
+
if (AUTOMATION_MARKERS.some(re => re.test(ua))) parts.push('automation');
|
|
332
|
+
|
|
333
|
+
parts.push(`entropy=${computeUAEntropy(ua)}`);
|
|
334
|
+
|
|
335
|
+
return parts.join(', ');
|
|
336
|
+
}
|
|
337
|
+
|
|
338
|
+
// ── §6.3.2 computeUAHMAC ─────────────────────────────────────────────────
|
|
339
|
+
/**
|
|
340
|
+
* Compute HMAC-SHA256 of the raw User-Agent, returned as an RFC 8941
|
|
341
|
+
* Byte Sequence string (:base64:).
|
|
342
|
+
*
|
|
343
|
+
* Uses crypto.subtle — compatible with Cloudflare Workers and modern Node.
|
|
344
|
+
*
|
|
345
|
+
* @param {string} userAgent Raw User-Agent string
|
|
346
|
+
* @param {string} hmacKey HMAC secret key (plain text)
|
|
347
|
+
* @returns {Promise<string|null>} RFC 8941 Byte Sequence or null if inputs missing
|
|
348
|
+
*/
|
|
349
|
+
export async function computeUAHMAC(userAgent, hmacKey) {
|
|
350
|
+
if (!userAgent || !hmacKey) return null;
|
|
351
|
+
|
|
352
|
+
const enc = new TextEncoder();
|
|
353
|
+
const key = await crypto.subtle.importKey(
|
|
354
|
+
'raw', enc.encode(hmacKey),
|
|
355
|
+
{ name: 'HMAC', hash: 'SHA-256' },
|
|
356
|
+
false, ['sign']
|
|
357
|
+
);
|
|
358
|
+
const sig = await crypto.subtle.sign('HMAC', key, enc.encode(userAgent));
|
|
359
|
+
const b64 = btoa(String.fromCharCode(...new Uint8Array(sig)));
|
|
360
|
+
return `:${b64}:`;
|
|
361
|
+
}
|
|
362
|
+
|
|
363
|
+
// ── §6.4 computeConfidenceToken ───────────────────────────────────────────
|
|
364
|
+
/**
|
|
365
|
+
* Compute the confidence token.
|
|
366
|
+
* ct = SHA-256(userAgent + acceptLanguage + secChUA)[0:8] hex
|
|
367
|
+
*
|
|
368
|
+
* Matches the logic in cloud-api computeConfidenceFingerprint().
|
|
369
|
+
*
|
|
370
|
+
* @param {string|null|undefined} userAgent Raw User-Agent
|
|
371
|
+
* @param {string|null|undefined} acceptLanguage Raw Accept-Language
|
|
372
|
+
* @param {string|null|undefined} secChUA Raw Sec-CH-UA
|
|
373
|
+
* @returns {Promise<string>} 8-char hex token, never null
|
|
374
|
+
*/
|
|
375
|
+
export async function computeConfidenceToken(userAgent, acceptLanguage, secChUA) {
|
|
376
|
+
const ua = userAgent || '';
|
|
377
|
+
const lang = acceptLanguage || '';
|
|
378
|
+
const ch = secChUA || '';
|
|
379
|
+
|
|
380
|
+
const msgBuffer = new TextEncoder().encode(ua + lang + ch);
|
|
381
|
+
const hashBuffer = await crypto.subtle.digest('SHA-256', msgBuffer);
|
|
382
|
+
const hashArray = Array.from(new Uint8Array(hashBuffer));
|
|
383
|
+
const hex = hashArray.map(b => b.toString(16).padStart(2, '0')).join('');
|
|
384
|
+
return hex.slice(0, 8);
|
|
385
|
+
}
|