@paywalls-net/filter 1.3.9 → 1.3.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,524 @@
1
+ /**
2
+ * Signal Extraction Module — Tier 2 + Tier 3 feature extractors
3
+ *
4
+ * Transforms raw browser headers into compact RFC 8941 Structured Field
5
+ * Dictionary strings for privacy-preserving VAI signal forwarding.
6
+ *
7
+ * Spec: specs/vai-privacy-v2.spec.md §6.2–§6.4
8
+ *
9
+ * Each function returns an SF-Dictionary string (e.g. "html, wildcard")
10
+ * or null if the input is absent/empty. null means the caller should
11
+ * omit the header entirely (not send an empty value).
12
+ */
13
+
14
+ // ── VAI Metadata: dynamic loading with hardcoded fallbacks ──────────────────
15
+ // (paywalls-site-fc4)
16
+ //
17
+ // These module-level vars are initialized from hardcoded defaults below.
18
+ // When loadVAIMetadata() is called, they are updated from the cloud-api
19
+ // /pw/vai/metadata endpoint. If the fetch fails, the hardcoded defaults
20
+ // remain in effect — no data loss, no crash.
21
+
22
+ // ── Hardcoded defaults (bootstrap / fallback) ──────────────────────────────
23
+
24
+ const DEFAULT_DC_ASNS = [
25
+ // ── Major IaaS ───────────────────────────────────────────────────────────
26
+ 16509, 14618, // Amazon AWS (primary + secondary)
27
+ 396982, 36492, 15169, // Google Cloud + Google infra
28
+ 8075, 8069, 8068, // Microsoft Azure
29
+ 31898, // Oracle Cloud
30
+ 36351, // IBM Cloud / SoftLayer
31
+ 45102, // Alibaba Cloud
32
+ 132203, // Tencent Cloud
33
+
34
+ // ── VPS / Hosting ────────────────────────────────────────────────────────
35
+ 14061, // DigitalOcean
36
+ 24940, 213230, // Hetzner (dedicated + cloud)
37
+ 16276, // OVH
38
+ 63949, // Linode / Akamai Connected Cloud
39
+ 20473, // Vultr / The Constant Company
40
+ 12876, // Scaleway
41
+ 51167, // Contabo
42
+ 60781, 28753, // Leaseweb (NL + global)
43
+ ];
44
+
45
+ const DEFAULT_AUTOMATION_PATTERNS = [
46
+ 'Puppeteer', 'Playwright', 'Selenium', 'WebDriver',
47
+ 'PhantomJS', 'CasperJS',
48
+ 'python-requests', 'python-urllib', 'Go-http-client',
49
+ 'okhttp', 'Apache-HttpClient', 'libcurl',
50
+ '\\bcurl\\/', '\\bwget\\/', 'HTTPie',
51
+ 'node-fetch', 'undici', 'axios\\/', '\\bgot\\/', 'superagent',
52
+ 'Cypress', 'TestCafe', 'Nightwatch', 'WebdriverIO',
53
+ 'Scrapy', 'Java\\/|Java HttpURLConnection', 'PostmanRuntime\\/',
54
+ '\\bDeno\\/', '\\bhttpx\\b|python-httpx',
55
+ ];
56
+
57
+ const DEFAULT_HEADLESS_PATTERNS = [
58
+ 'HeadlessChrome', '\\bHeadless\\b',
59
+ ];
60
+
61
+ const DEFAULT_BOT_PATTERNS = [
62
+ 'Googlebot', 'bingbot', 'Baiduspider', 'YandexBot', 'DuckDuckBot',
63
+ 'Slurp', 'ia_archiver', 'GPTBot', 'ClaudeBot', 'CCBot', 'Bytespider',
64
+ 'Applebot', 'PetalBot', 'SemrushBot', 'AhrefsBot', 'DotBot',
65
+ ];
66
+
67
+ // ── Mutable state: updated by loadVAIMetadata() ────────────────────────────
68
+
69
+ /** @type {Set<number>} */
70
+ let DC_ASN_SET = new Set(DEFAULT_DC_ASNS);
71
+
72
+ /** @type {RegExp[]} */
73
+ let AUTOMATION_MARKERS = DEFAULT_AUTOMATION_PATTERNS.map(p => new RegExp(p, 'i'));
74
+
75
+ /** @type {RegExp[]} */
76
+ let HEADLESS_MARKERS = DEFAULT_HEADLESS_PATTERNS.map(p => new RegExp(p, 'i'));
77
+
78
+ /** @type {RegExp} — single combined regex for bot family detection */
79
+ let BOT_FAMILY_RE = new RegExp('\\b(' + DEFAULT_BOT_PATTERNS.join('|') + ')\\b', 'i');
80
+
81
+ // ── Metadata cache ─────────────────────────────────────────────────────────
82
+
83
+ let _vaiMetadataCache = null; // { data, ts }
84
+ const VAI_METADATA_TTL = 60 * 60 * 1000; // 1 hour
85
+
86
+ /**
87
+ * Compile pattern strings (from metadata JSON) into RegExp objects.
88
+ * Each string is treated as a regex source with case-insensitive flag.
89
+ * @param {string[]} patterns
90
+ * @returns {RegExp[]}
91
+ */
92
+ function compilePatterns(patterns) {
93
+ return patterns.map(p => new RegExp(p, 'i'));
94
+ }
95
+
96
+ /**
97
+ * Fetch VAI metadata from cloud-api and update mutable module state.
98
+ * Caches for 1 hour. Falls back to hardcoded defaults on failure.
99
+ *
100
+ * Pattern: matches loadAgentPatterns() in user-agent-classification.js.
101
+ *
102
+ * @param {Object} cfg Config with paywallsAPIHost (cloud-api base URL)
103
+ * @returns {Promise<void>}
104
+ */
105
+ export async function loadVAIMetadata(cfg) {
106
+ const now = Date.now();
107
+
108
+ // Return early if cache is still valid
109
+ if (_vaiMetadataCache && (now - _vaiMetadataCache.ts) < VAI_METADATA_TTL) {
110
+ return;
111
+ }
112
+
113
+ try {
114
+ const response = await fetch(`${cfg.paywallsAPIHost}/pw/vai/metadata`, {
115
+ method: 'GET',
116
+ headers: { 'Accept': 'application/json' },
117
+ });
118
+
119
+ if (!response.ok) {
120
+ throw new Error(`VAI metadata fetch failed: ${response.status} ${response.statusText}`);
121
+ }
122
+
123
+ const data = await response.json();
124
+
125
+ // Validate minimal schema
126
+ if (!data || typeof data.version !== 'number') {
127
+ throw new Error('VAI metadata: invalid schema (missing version)');
128
+ }
129
+
130
+ // Update mutable state from fetched data
131
+ if (Array.isArray(data.dc_asns) && data.dc_asns.length > 0) {
132
+ DC_ASN_SET = new Set(data.dc_asns);
133
+ }
134
+ if (Array.isArray(data.automation_patterns) && data.automation_patterns.length > 0) {
135
+ AUTOMATION_MARKERS = compilePatterns(data.automation_patterns);
136
+ }
137
+ if (Array.isArray(data.headless_patterns) && data.headless_patterns.length > 0) {
138
+ HEADLESS_MARKERS = compilePatterns(data.headless_patterns);
139
+ }
140
+ if (Array.isArray(data.bot_patterns) && data.bot_patterns.length > 0) {
141
+ BOT_FAMILY_RE = new RegExp('\\b(' + data.bot_patterns.join('|') + ')\\b', 'i');
142
+ }
143
+
144
+ _vaiMetadataCache = { data, ts: now };
145
+ } catch (error) {
146
+ console.error('loadVAIMetadata: fetch failed, using hardcoded defaults.', error.message || error);
147
+ // Mark cache so we don't retry immediately (back off for 5 minutes)
148
+ _vaiMetadataCache = { data: null, ts: now - VAI_METADATA_TTL + (5 * 60 * 1000) };
149
+ }
150
+ }
151
+
152
+ /**
153
+ * Reset metadata state to hardcoded defaults and clear cache.
154
+ * Exposed for testing only.
155
+ */
156
+ export function _resetVAIMetadata() {
157
+ DC_ASN_SET = new Set(DEFAULT_DC_ASNS);
158
+ AUTOMATION_MARKERS = DEFAULT_AUTOMATION_PATTERNS.map(p => new RegExp(p, 'i'));
159
+ HEADLESS_MARKERS = DEFAULT_HEADLESS_PATTERNS.map(p => new RegExp(p, 'i'));
160
+ BOT_FAMILY_RE = new RegExp('\\b(' + DEFAULT_BOT_PATTERNS.join('|') + ')\\b', 'i');
161
+ _vaiMetadataCache = null;
162
+ }
163
+
164
+ // ── §6.2.1 Accept → X-PW-Accept ──────────────────────────────────────────
165
+ /**
166
+ * Extract boolean feature flags from the Accept header.
167
+ *
168
+ * @param {string|null|undefined} accept Raw Accept header value
169
+ * @returns {string|null} SF-Dictionary string or null if absent/empty
170
+ */
171
+ export function extractAcceptFeatures(accept) {
172
+ if (!accept) return null;
173
+
174
+ const parts = [];
175
+ if (accept.includes('text/html')) parts.push('html');
176
+ if (accept.includes('*/*')) parts.push('wildcard');
177
+ if (accept.includes('application/json')) parts.push('json');
178
+ if (accept.includes('image/')) parts.push('image');
179
+
180
+ return parts.length > 0 ? parts.join(', ') : null;
181
+ }
182
+
183
+ // ── §6.2.2 Accept-Encoding → X-PW-Enc ────────────────────────────────────
184
+ /**
185
+ * Extract boolean feature flags from the Accept-Encoding header.
186
+ *
187
+ * @param {string|null|undefined} acceptEncoding Raw Accept-Encoding value
188
+ * @returns {string|null} SF-Dictionary string or null if absent/empty
189
+ */
190
+ export function extractEncodingFeatures(acceptEncoding) {
191
+ if (!acceptEncoding) return null;
192
+
193
+ const parts = [];
194
+ const hasBr = acceptEncoding.includes('br');
195
+ const hasGzip = acceptEncoding.includes('gzip');
196
+
197
+ if (hasBr) parts.push('br');
198
+ if (hasGzip) parts.push('gzip');
199
+ if (hasBr && hasGzip) parts.push('modern');
200
+
201
+ return parts.length > 0 ? parts.join(', ') : null;
202
+ }
203
+
204
+ // ── §6.2.3 Accept-Language → X-PW-Lang ───────────────────────────────────
205
+ /**
206
+ * Extract presence, primary language family, and locale count from
207
+ * the Accept-Language header.
208
+ *
209
+ * @param {string|null|undefined} acceptLanguage Raw Accept-Language value
210
+ * @returns {string|null} SF-Dictionary string or null if absent/empty
211
+ */
212
+ export function extractLanguageFeatures(acceptLanguage) {
213
+ if (!acceptLanguage) return null;
214
+
215
+ const trimmed = acceptLanguage.trim();
216
+ if (trimmed === '' || trimmed === '*') return null;
217
+
218
+ // Split on comma to count locales, ignoring quality values
219
+ const locales = trimmed.split(',').map(s => s.trim().split(';')[0].trim()).filter(Boolean);
220
+ const count = locales.length;
221
+ if (count === 0) return null;
222
+
223
+ // Primary language family = first 2 chars of first locale (lowercase)
224
+ const first = locales[0].toLowerCase();
225
+ const primary = first.length >= 2 ? first.slice(0, 2) : first;
226
+
227
+ const parts = ['present', `primary=${primary}`, `count=${count}`];
228
+ return parts.join(', ');
229
+ }
230
+
231
+ // ── §6.2.4 ASN → X-PW-Net ────────────────────────────────────────────────
232
+ /**
233
+ * Classify an ASN into a named enum category.
234
+ *
235
+ * @param {string|number|null|undefined} asn Numeric ASN value
236
+ * @returns {string|null} SF-Dictionary string or null if absent/empty
237
+ */
238
+ export function extractNetFeatures(asn) {
239
+ if (asn == null || asn === '') return null;
240
+
241
+ const num = typeof asn === 'number' ? asn : parseInt(asn, 10);
242
+ if (isNaN(num)) return null;
243
+
244
+ const category = DC_ASN_SET.has(num) ? 'cloud' : 'consumer';
245
+ return `asn=${category}`;
246
+ }
247
+
248
+ // ── §6.2.5 Sec-CH-UA → X-PW-CH ───────────────────────────────────────────
249
+
250
+ /**
251
+ * Extract Chrome version from a Sec-CH-UA header value.
252
+ * Looks for "Chromium" or "Google Chrome" brand and returns the major version.
253
+ *
254
+ * @param {string} secChUA Raw Sec-CH-UA header
255
+ * @returns {number|null} Major Chrome version or null
256
+ */
257
+ function extractChromeVersionFromCH(secChUA) {
258
+ // Sec-CH-UA format: "Brand";v="version", "Brand";v="version", ...
259
+ const match = secChUA.match(/"(?:Google Chrome|Chromium)";v="(\d+)"/);
260
+ return match ? parseInt(match[1], 10) : null;
261
+ }
262
+
263
+ /**
264
+ * Extract Chrome version from a User-Agent string.
265
+ *
266
+ * @param {string} userAgent Raw User-Agent string
267
+ * @returns {number|null} Major Chrome version or null
268
+ */
269
+ function extractChromeVersionFromUA(userAgent) {
270
+ // UA format: ...Chrome/134.0.0.0...
271
+ const match = userAgent.match(/Chrome\/(\d+)/);
272
+ return match ? parseInt(match[1], 10) : null;
273
+ }
274
+
275
+ /**
276
+ * Extract features from Sec-CH-UA header, cross-referenced with User-Agent
277
+ * for the consistency check.
278
+ *
279
+ * @param {string|null|undefined} secChUA Raw Sec-CH-UA header value
280
+ * @param {string|null|undefined} userAgent Raw User-Agent string (for consistency check)
281
+ * @returns {string|null} SF-Dictionary string or null if CH absent/empty
282
+ */
283
+ export function extractCHFeatures(secChUA, userAgent) {
284
+ if (!secChUA) return null;
285
+
286
+ const trimmed = secChUA.trim();
287
+ if (trimmed === '') return null;
288
+
289
+ const parts = ['present'];
290
+
291
+ // Count brand entries: each is a quoted string followed by ;v="..."
292
+ // Split on comma to count entries
293
+ const brands = trimmed.split(',').map(s => s.trim()).filter(Boolean);
294
+ parts.push(`brands=${brands.length}`);
295
+
296
+ // GREASE detection: Chromium convention includes a "Not" brand
297
+ const hasGrease = brands.some(b => /not[^"]*brand/i.test(b) || /not[:\-_.]/i.test(b));
298
+ if (hasGrease) parts.push('grease');
299
+
300
+ // Consistency check: Chrome version in CH matches Chrome version in UA
301
+ if (userAgent) {
302
+ const chVersion = extractChromeVersionFromCH(trimmed);
303
+ const uaVersion = extractChromeVersionFromUA(userAgent);
304
+ if (chVersion != null && uaVersion != null && chVersion === uaVersion) {
305
+ parts.push('consistent');
306
+ }
307
+ }
308
+
309
+ return parts.join(', ');
310
+ }
311
+
312
+ // ═══════════════════════════════════════════════════════════════════════════
313
+ // Tier 3 — Replace User-Agent with derived features (§6.3) + CT (§6.4)
314
+ // ═══════════════════════════════════════════════════════════════════════════
315
+
316
+ // ── §6.3.3 Automation marker detection ────────────────────────────────────
317
+ // HeadlessChrome triggers 'headless' only (via HEADLESS_MARKERS).
318
+ // Explicit automation tools (Puppeteer, Selenium, etc.) trigger 'automation'.
319
+ // AUTOMATION_MARKERS and HEADLESS_MARKERS are now module-level mutable vars
320
+ // initialized from hardcoded defaults (top of file) and updated dynamically
321
+ // by loadVAIMetadata(). See paywalls-site-fc4.
322
+
323
+ // ── §6.3.4 Entropy bucketing ──────────────────────────────────────────────
324
+ /**
325
+ * Bucket a User-Agent string's structural complexity.
326
+ * @param {string} userAgent
327
+ * @returns {'low'|'medium'|'high'}
328
+ */
329
+ function computeUAEntropy(userAgent) {
330
+ if (!userAgent || userAgent.length < 10) return 'low';
331
+
332
+ const hasUpper = /[A-Z]/.test(userAgent);
333
+ const hasLower = /[a-z]/.test(userAgent);
334
+ const hasDigit = /\d/.test(userAgent);
335
+ const hasSpecial = /[\/\.;()\s,_\-]/.test(userAgent);
336
+ const classCount = [hasUpper, hasLower, hasDigit, hasSpecial].filter(Boolean).length;
337
+
338
+ const len = userAgent.length;
339
+ const hasParens = /\([^)]+\)/.test(userAgent);
340
+
341
+ // Typical browser UA: 60-250 chars, 4 char classes, has parens
342
+ if (classCount >= 4 && len >= 60 && len <= 250 && hasParens) return 'medium';
343
+ if (classCount >= 3 && len >= 40 && len <= 300) return 'medium';
344
+
345
+ // Very short, very long, or missing structure
346
+ if (len < 40 || len > 300 || classCount < 3) return 'low';
347
+
348
+ // Unusual: high-entropy random strings
349
+ const uniqueChars = new Set(userAgent).size;
350
+ if (uniqueChars / len > 0.7) return 'high';
351
+
352
+ return 'medium';
353
+ }
354
+
355
+ // ── §6.3.1 UA dpf/version parsing ─────────────────────────────────────────
356
+
357
+ /** @returns {'desktop'|'mobile'|'tablet'|'smarttv'|'console'|'car'|'wearable'|'vr'|'server'|'unknown'} */
358
+ function detectDevice(ua) {
359
+ // Smart TV: check before tablet/mobile (some TVs include Android)
360
+ if (/SmartTV|SMART-TV|\bTizen\b|\bWebOS\b|\bBRAVIA\b|\bVizio\b|\bRoku\b|\bAppleTV\b|\bFire TV\b|\bAndroidTV\b|\btvOS\b|\bHBBTV\b/i.test(ua)) return 'smarttv';
361
+ // Gaming consoles
362
+ if (/\b(PlayStation|PLAYSTATION|Xbox|Nintendo)\b/i.test(ua)) return 'console';
363
+ // VR headsets (Meta Quest / Oculus)
364
+ if (/OculusBrowser|\bQuest\b/i.test(ua)) return 'vr';
365
+ // Wearables (Apple Watch, etc.)
366
+ if (/\bWatch\b|\bwearable\b/i.test(ua)) return 'wearable';
367
+ // Automotive
368
+ if (/\bTesla\b|\bCarPlay\b/i.test(ua)) return 'car';
369
+ if (/\b(iPad|Tablet|PlayBook|Silk|Kindle)\b/i.test(ua)) return 'tablet';
370
+ if (/\b(iPhone|iPod|Android.*Mobile|Mobile.*Android|webOS|BlackBerry|Opera Mini|IEMobile|Windows Phone)\b/i.test(ua)) return 'mobile';
371
+ if (/\b(Android)\b/i.test(ua) && !/Mobile/i.test(ua)) return 'tablet';
372
+ if (/\b(Macintosh|Windows NT|X11|Linux(?!.*Android))\b/i.test(ua)) return 'desktop';
373
+ if (/\b(Googlebot|bingbot|Baiduspider|YandexBot|DuckDuckBot)\b/i.test(ua)) return 'server';
374
+ return 'unknown';
375
+ }
376
+
377
+ /** @returns {'windows'|'mac'|'ios'|'android'|'linux'|'chromeos'|'freebsd'|'other'} */
378
+ function detectPlatform(ua) {
379
+ if (/\b(iPhone|iPad|iPod)\b/i.test(ua)) return 'ios';
380
+ if (/\bAndroid\b/i.test(ua)) return 'android';
381
+ if (/\bCrOS\b/i.test(ua)) return 'chromeos';
382
+ if (/\bMacintosh\b/i.test(ua)) return 'mac';
383
+ if (/\bWindows\b/i.test(ua)) return 'windows';
384
+ if (/\bFreeBSD\b/i.test(ua)) return 'freebsd';
385
+ if (/\bLinux\b/i.test(ua) || /\bX11\b/i.test(ua)) return 'linux';
386
+ return 'other';
387
+ }
388
+
389
+ /** @returns {'chrome'|'safari'|'firefox'|'edge'|'ucbrowser'|'other'|'bot'} */
390
+ function detectFamily(ua) {
391
+ // Bots: search engine crawlers + AI/SEO crawlers (dynamic via loadVAIMetadata)
392
+ if (BOT_FAMILY_RE.test(ua)) return 'bot';
393
+ // UC Browser: mobile-heavy, no Client Hints — check before Chrome
394
+ if (/UCBrowser|UCWEB/i.test(ua)) return 'ucbrowser';
395
+ // Order matters: Edge before Chrome (Edge UA contains "Chrome")
396
+ if (/\bEdg(?:e|A)?\/\d/i.test(ua)) return 'edge';
397
+ if (/\bFirefox\//i.test(ua)) return 'firefox';
398
+ // Safari check: has "Safari/" but NOT "Chrome/" or "Chromium/" or "HeadlessChrome/"
399
+ if (/\bSafari\//i.test(ua) && !/Chrome|Chromium|HeadlessChrome/i.test(ua)) return 'safari';
400
+ // Opera (OPR/) and Brave share Chromium engine; keep as 'chrome' family
401
+ // since they support Client Hints and score the same.
402
+ if (/(?:\b|Headless)Chrom(?:e|ium)\//i.test(ua)) return 'chrome';
403
+ return 'other';
404
+ }
405
+
406
+ /**
407
+ * Extract major browser version from a User-Agent string.
408
+ * @param {string} ua
409
+ * @returns {number|null}
410
+ */
411
+ function extractMajorVersion(ua) {
412
+ // Try common version patterns in order of specificity
413
+ let m = ua.match(/\bEdg(?:e|A)?\/(\d+)/);
414
+ if (m) return parseInt(m[1], 10);
415
+ m = ua.match(/\bFirefox\/(\d+)/);
416
+ if (m) return parseInt(m[1], 10);
417
+ // Chrome / Chromium / HeadlessChrome
418
+ m = ua.match(/(?:\b|Headless)Chrom(?:e|ium)\/(\d+)/);
419
+ if (m) return parseInt(m[1], 10);
420
+ // Safari: Version/17.x (not the Safari/605 build number)
421
+ m = ua.match(/\bVersion\/(\d+)/);
422
+ if (m) return parseInt(m[1], 10);
423
+ // Generic: first thing/number pattern
424
+ m = ua.match(/\/(\d+)/);
425
+ if (m) return parseInt(m[1], 10);
426
+ return null;
427
+ }
428
+
429
+ /**
430
+ * Bucket a major version number into a range token.
431
+ * Uses math-based 20-version spans starting at 80, capped at 420+.
432
+ * Legacy range: 0-79. Then 80-99, 100-119, …, 400-419, 420+.
433
+ * @param {number|null} ver
434
+ * @returns {string}
435
+ */
436
+ function bucketVersion(ver) {
437
+ if (ver == null || ver < 80) return '0-79';
438
+ if (ver >= 420) return '420+';
439
+ // 20-version spans starting at 80: floor((ver - 80) / 20) gives bucket index
440
+ const base = 80;
441
+ const span = 20;
442
+ const bucketIndex = Math.floor((ver - base) / span);
443
+ const lo = base + bucketIndex * span;
444
+ const hi = lo + span - 1;
445
+ return `${lo}-${hi}`;
446
+ }
447
+
448
+ // ── §6.3.1 extractUAFeatures ──────────────────────────────────────────────
449
+ /**
450
+ * Parse a User-Agent string into an SF-Dictionary of derived features.
451
+ *
452
+ * @param {string|null|undefined} userAgent Raw User-Agent string
453
+ * @returns {string|null} SF-Dictionary string or null if absent/empty
454
+ */
455
+ export function extractUAFeatures(userAgent) {
456
+ if (!userAgent) return null;
457
+ const ua = userAgent.trim();
458
+ if (ua === '') return null;
459
+
460
+ const device = detectDevice(ua);
461
+ const platform = detectPlatform(ua);
462
+ const family = detectFamily(ua);
463
+ const ver = bucketVersion(extractMajorVersion(ua));
464
+
465
+ const parts = [`dpf=${device}/${platform}/${family}`, `ver=${ver}`];
466
+
467
+ if (/^Mozilla\//i.test(ua)) parts.push('browser');
468
+
469
+ if (HEADLESS_MARKERS.some(re => re.test(ua))) parts.push('headless');
470
+ if (AUTOMATION_MARKERS.some(re => re.test(ua))) parts.push('automation');
471
+
472
+ parts.push(`entropy=${computeUAEntropy(ua)}`);
473
+
474
+ return parts.join(', ');
475
+ }
476
+
477
+ // ── §6.3.2 computeUAHMAC ─────────────────────────────────────────────────
478
+ /**
479
+ * Compute HMAC-SHA256 of the raw User-Agent, returned as an RFC 8941
480
+ * Byte Sequence string (:base64:).
481
+ *
482
+ * Uses crypto.subtle — compatible with Cloudflare Workers and modern Node.
483
+ *
484
+ * @param {string} userAgent Raw User-Agent string
485
+ * @param {string} hmacKey HMAC secret key (plain text)
486
+ * @returns {Promise<string|null>} RFC 8941 Byte Sequence or null if inputs missing
487
+ */
488
+ export async function computeUAHMAC(userAgent, hmacKey) {
489
+ if (!userAgent || !hmacKey) return null;
490
+
491
+ const enc = new TextEncoder();
492
+ const key = await crypto.subtle.importKey(
493
+ 'raw', enc.encode(hmacKey),
494
+ { name: 'HMAC', hash: 'SHA-256' },
495
+ false, ['sign']
496
+ );
497
+ const sig = await crypto.subtle.sign('HMAC', key, enc.encode(userAgent));
498
+ const b64 = btoa(String.fromCharCode(...new Uint8Array(sig)));
499
+ return `:${b64}:`;
500
+ }
501
+
502
+ // ── §6.4 computeConfidenceToken ───────────────────────────────────────────
503
+ /**
504
+ * Compute the confidence token.
505
+ * ct = SHA-256(userAgent + acceptLanguage + secChUA)[0:8] hex
506
+ *
507
+ * Matches the logic in cloud-api computeConfidenceFingerprint().
508
+ *
509
+ * @param {string|null|undefined} userAgent Raw User-Agent
510
+ * @param {string|null|undefined} acceptLanguage Raw Accept-Language
511
+ * @param {string|null|undefined} secChUA Raw Sec-CH-UA
512
+ * @returns {Promise<string>} 8-char hex token, never null
513
+ */
514
+ export async function computeConfidenceToken(userAgent, acceptLanguage, secChUA) {
515
+ const ua = userAgent || '';
516
+ const lang = acceptLanguage || '';
517
+ const ch = secChUA || '';
518
+
519
+ const msgBuffer = new TextEncoder().encode(ua + lang + ch);
520
+ const hashBuffer = await crypto.subtle.digest('SHA-256', msgBuffer);
521
+ const hashArray = Array.from(new Uint8Array(hashBuffer));
522
+ const hex = hashArray.map(b => b.toString(16).padStart(2, '0')).join('');
523
+ return hex.slice(0, 8);
524
+ }