@paywalls-net/filter 1.3.9 → 1.3.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/jest.config.js ADDED
@@ -0,0 +1,7 @@
1
+ export default {
2
+ testEnvironment: 'node',
3
+ roots: ['<rootDir>/tests'],
4
+ testMatch: ['**/*.test.js'],
5
+ // ESM transform: jest uses --experimental-vm-modules via the npm script
6
+ transform: {},
7
+ };
package/package.json CHANGED
@@ -3,7 +3,7 @@
3
3
  "description": "Client SDK for integrating paywalls.net bot filtering and authorization services into your server or CDN.",
4
4
  "author": "paywalls.net",
5
5
  "license": "MIT",
6
- "version": "1.3.9",
6
+ "version": "1.3.10",
7
7
  "publishConfig": {
8
8
  "access": "public"
9
9
  },
@@ -17,9 +17,13 @@
17
17
  ".": "./src/index.js"
18
18
  },
19
19
  "scripts": {
20
- "test": "echo \"Error: no test specified\" && exit 1"
20
+ "test": "node --experimental-vm-modules node_modules/.bin/jest --runInBand",
21
+ "test:watch": "node --experimental-vm-modules node_modules/.bin/jest --watch"
21
22
  },
22
23
  "dependencies": {
23
24
  "ua-parser-js": "^2.0.4"
25
+ },
26
+ "devDependencies": {
27
+ "jest": "^30.2.0"
24
28
  }
25
29
  }
package/src/index.js CHANGED
@@ -4,6 +4,11 @@
4
4
  */
5
5
  const sdk_version = "1.2.x";
6
6
  import { classifyUserAgent, loadAgentPatterns } from './user-agent-classification.js';
7
+ import {
8
+ extractAcceptFeatures, extractEncodingFeatures, extractLanguageFeatures,
9
+ extractNetFeatures, extractCHFeatures, extractUAFeatures,
10
+ computeUAHMAC, computeConfidenceToken,
11
+ } from './signal-extraction.js';
7
12
 
8
13
  const PAYWALLS_CLOUD_API_HOST = "https://cloud-api.paywalls.net";
9
14
 
@@ -91,27 +96,12 @@ const PROXY_HEADER_MAP = [
91
96
  ];
92
97
 
93
98
  /**
94
- * Maps browser signal sources X-PW-* forwarding headers (§5.2).
95
- * Each entry: { from: 'headers'|'cf', src: string, dest: string }
96
- * from:'headers' — read from incoming request headers (lowercase)
97
- * from:'cf' — read from request.cf property
99
+ * Set a header only if the value is non-null (extractor returns null for
100
+ * absent inputs header omitted entirely, not sent as empty string).
98
101
  */
99
- const SIGNAL_HEADER_MAP = [
100
- // Bundle A: Sec-Fetch (3 pts)
101
- { from: 'headers', src: 'sec-fetch-dest', dest: 'X-PW-Sec-Fetch-Dest' },
102
- { from: 'headers', src: 'sec-fetch-mode', dest: 'X-PW-Sec-Fetch-Mode' },
103
- { from: 'headers', src: 'sec-fetch-site', dest: 'X-PW-Sec-Fetch-Site' },
104
- // Bundle B: Accept (2 pts)
105
- { from: 'headers', src: 'accept', dest: 'X-PW-Accept' },
106
- { from: 'headers', src: 'accept-language', dest: 'X-PW-Accept-Language' },
107
- { from: 'headers', src: 'accept-encoding', dest: 'X-PW-Accept-Encoding' },
108
- // Bundle C: Client Hints (2 pts)
109
- { from: 'headers', src: 'sec-ch-ua', dest: 'X-PW-Sec-CH-UA' },
110
- // Bundle D: CF infrastructure (1 pt) — only valid at first-hop CF Worker
111
- { from: 'cf', src: 'tlsVersion', dest: 'X-PW-TLS-Version' },
112
- { from: 'cf', src: 'httpProtocol', dest: 'X-PW-HTTP-Protocol' },
113
- { from: 'cf', src: 'asn', dest: 'X-PW-ASN' },
114
- ];
102
+ function setIfPresent(obj, key, value) {
103
+ if (value != null) obj[key] = value;
104
+ }
115
105
 
116
106
  /**
117
107
  * Proxy VAI requests to the cloud-api service (Spec §7).
@@ -128,10 +118,9 @@ const SIGNAL_HEADER_MAP = [
128
118
  * - User-Agent, X-Forwarded-For: standard proxy headers
129
119
  * - Authorization: publisher API key (§7.4)
130
120
  *
131
- * Human-confidence signal forwarding (§5.2):
132
- * Driven by SIGNAL_HEADER_MAP each entry specifies a source ('headers' or 'cf')
133
- * and property name to read, and the X-PW-* destination header to write.
134
- * Simple passthrough: present values forwarded, absent values omitted.
121
+ * Human-confidence signal forwarding (§7.2):
122
+ * Uses signal-extraction module to transform raw browser headers into compact
123
+ * RFC 8941 Structured Field Value strings. Absent inputs null header omitted.
135
124
  *
136
125
  * Response passthrough (§7.3):
137
126
  * All response headers from cloud-api are returned unchanged — including
@@ -155,7 +144,7 @@ async function proxyVAIRequest(cfg, request) {
155
144
  // Build forwarding headers — include everything cloud-api needs
156
145
  // for CORS evaluation, domain auth, and request context.
157
146
  const forwardHeaders = {
158
- 'User-Agent': headers['user-agent'] || sdkUserAgent,
147
+ 'User-Agent': sdkUserAgent,
159
148
  'Authorization': `Bearer ${cfg.paywallsAPIKey}`
160
149
  };
161
150
 
@@ -171,16 +160,29 @@ async function proxyVAIRequest(cfg, request) {
171
160
  if (headers[src]) forwardHeaders[dest] = headers[src];
172
161
  }
173
162
 
174
- // Forward browser-provenance signals as X-PW-* headers 5.2).
175
- // Simple passthrough: forward whatever is present, no cross-request state.
163
+ // Signal protocol version7.1)
164
+ forwardHeaders['X-PW-V'] = '2';
165
+
176
166
  const cf = request.cf || {};
177
- const sources = { headers, cf };
178
- for (const { from, src, dest } of SIGNAL_HEADER_MAP) {
179
- const value = sources[from][src];
180
- if (value != null && value !== '') {
181
- forwardHeaders[dest] = String(value);
182
- }
183
- }
167
+
168
+ // Tier 1: kept raw (§6.1)
169
+ setIfPresent(forwardHeaders, 'X-PW-Sec-Fetch-Dest', headers['sec-fetch-dest']);
170
+ setIfPresent(forwardHeaders, 'X-PW-Sec-Fetch-Mode', headers['sec-fetch-mode']);
171
+ setIfPresent(forwardHeaders, 'X-PW-Sec-Fetch-Site', headers['sec-fetch-site']);
172
+ setIfPresent(forwardHeaders, 'X-PW-TLS-Version', cf.tlsVersion != null ? String(cf.tlsVersion) : null);
173
+ setIfPresent(forwardHeaders, 'X-PW-HTTP-Protocol', cf.httpProtocol != null ? String(cf.httpProtocol) : null);
174
+
175
+ // Tier 2: extract features (§6.2)
176
+ setIfPresent(forwardHeaders, 'X-PW-Accept', extractAcceptFeatures(headers['accept']));
177
+ setIfPresent(forwardHeaders, 'X-PW-Enc', extractEncodingFeatures(headers['accept-encoding']));
178
+ setIfPresent(forwardHeaders, 'X-PW-Lang', extractLanguageFeatures(headers['accept-language']));
179
+ setIfPresent(forwardHeaders, 'X-PW-Net', extractNetFeatures(cf.asn));
180
+ setIfPresent(forwardHeaders, 'X-PW-CH', extractCHFeatures(headers['sec-ch-ua'], headers['user-agent']));
181
+
182
+ // Tier 3: UA features + HMAC (§6.3)
183
+ setIfPresent(forwardHeaders, 'X-PW-UA', extractUAFeatures(headers['user-agent']));
184
+ setIfPresent(forwardHeaders, 'X-PW-UA-HMAC', await computeUAHMAC(headers['user-agent'], cfg.vaiUAHmacKey));
185
+ setIfPresent(forwardHeaders, 'X-PW-CT-FP', await computeConfidenceToken(headers['user-agent'], headers['accept-language'], headers['sec-ch-ua']));
184
186
 
185
187
  // Forward request to cloud-api
186
188
  const response = await fetch(`${cfg.paywallsAPIHost}${cloudApiPath}`, {
@@ -417,7 +419,8 @@ async function cloudflare(config = null) {
417
419
  paywallsAPIHost: env.PAYWALLS_CLOUD_API_HOST || PAYWALLS_CLOUD_API_HOST,
418
420
  paywallsAPIKey: env.PAYWALLS_CLOUD_API_KEY,
419
421
  paywallsPublisherId: env.PAYWALLS_PUBLISHER_ID,
420
- vaiPath: env.PAYWALLS_VAI_PATH || '/pw'
422
+ vaiPath: env.PAYWALLS_VAI_PATH || '/pw',
423
+ vaiUAHmacKey: env.VAI_UA_HMAC_KEY || null,
421
424
  };
422
425
 
423
426
  // Check if this is a VAI endpoint request and proxy it
@@ -449,7 +452,8 @@ async function fastly() {
449
452
  paywallsAPIHost: config.get('PAYWALLS_CLOUD_API_HOST') || PAYWALLS_CLOUD_API_HOST,
450
453
  paywallsAPIKey: config.get('PAYWALLS_API_KEY'),
451
454
  paywallsPublisherId: config.get('PAYWALLS_PUBLISHER_ID'),
452
- vaiPath: config.get('PAYWALLS_VAI_PATH') || '/pw'
455
+ vaiPath: config.get('PAYWALLS_VAI_PATH') || '/pw',
456
+ vaiUAHmacKey: config.get('VAI_UA_HMAC_KEY') || null,
453
457
  };
454
458
 
455
459
  // Check if this is a VAI endpoint request and proxy it
@@ -531,7 +535,8 @@ async function cloudfront(config) {
531
535
  paywallsAPIHost: config.PAYWALLS_CLOUD_API_HOST || PAYWALLS_CLOUD_API_HOST,
532
536
  paywallsAPIKey: config.PAYWALLS_API_KEY,
533
537
  paywallsPublisherId: config.PAYWALLS_PUBLISHER_ID,
534
- vaiPath: config.PAYWALLS_VAI_PATH || '/pw'
538
+ vaiPath: config.PAYWALLS_VAI_PATH || '/pw',
539
+ vaiUAHmacKey: config.VAI_UA_HMAC_KEY || null,
535
540
  };
536
541
  await loadAgentPatterns(paywallsConfig);
537
542
 
@@ -0,0 +1,385 @@
1
+ /**
2
+ * Signal Extraction Module — Tier 2 + Tier 3 feature extractors
3
+ *
4
+ * Transforms raw browser headers into compact RFC 8941 Structured Field
5
+ * Dictionary strings for privacy-preserving VAI signal forwarding.
6
+ *
7
+ * Spec: specs/vai-privacy-v2.spec.md §6.2–§6.4
8
+ *
9
+ * Each function returns an SF-Dictionary string (e.g. "html, wildcard")
10
+ * or null if the input is absent/empty. null means the caller should
11
+ * omit the header entirely (not send an empty value).
12
+ */
13
+
14
+ // ── §6.2.4 / Appendix A: Data-center ASN set ──────────────────────────────
15
+ // Comprehensive cloud/hosting provider ASNs for DC classification.
16
+ // Kept in sync with cloud-api DC_ASN_LIST (cloudflare/vai.js).
17
+ // Source: public ASN registries (PeeringDB, RIPE, ARIN).
18
+ const DC_ASN_SET = new Set([
19
+ // ── Major IaaS ───────────────────────────────────────────────────────────
20
+ 16509, 14618, // Amazon AWS (primary + secondary)
21
+ 396982, 36492, 15169, // Google Cloud + Google infra
22
+ 8075, 8069, 8068, // Microsoft Azure
23
+ 31898, // Oracle Cloud
24
+ 36351, // IBM Cloud / SoftLayer
25
+ 45102, // Alibaba Cloud
26
+ 132203, // Tencent Cloud
27
+
28
+ // ── VPS / Hosting ────────────────────────────────────────────────────────
29
+ 14061, // DigitalOcean
30
+ 24940, 213230, // Hetzner (dedicated + cloud)
31
+ 16276, // OVH
32
+ 63949, // Linode / Akamai Connected Cloud
33
+ 20473, // Vultr / The Constant Company
34
+ 12876, // Scaleway
35
+ 51167, // Contabo
36
+ 60781, 28753, // Leaseweb (NL + global)
37
+ ]);
38
+
39
+ // ── §6.2.1 Accept → X-PW-Accept ──────────────────────────────────────────
40
+ /**
41
+ * Extract boolean feature flags from the Accept header.
42
+ *
43
+ * @param {string|null|undefined} accept Raw Accept header value
44
+ * @returns {string|null} SF-Dictionary string or null if absent/empty
45
+ */
46
+ export function extractAcceptFeatures(accept) {
47
+ if (!accept) return null;
48
+
49
+ const parts = [];
50
+ if (accept.includes('text/html')) parts.push('html');
51
+ if (accept.includes('*/*')) parts.push('wildcard');
52
+ if (accept.includes('application/json')) parts.push('json');
53
+ if (accept.includes('image/')) parts.push('image');
54
+
55
+ return parts.length > 0 ? parts.join(', ') : null;
56
+ }
57
+
58
+ // ── §6.2.2 Accept-Encoding → X-PW-Enc ────────────────────────────────────
59
+ /**
60
+ * Extract boolean feature flags from the Accept-Encoding header.
61
+ *
62
+ * @param {string|null|undefined} acceptEncoding Raw Accept-Encoding value
63
+ * @returns {string|null} SF-Dictionary string or null if absent/empty
64
+ */
65
+ export function extractEncodingFeatures(acceptEncoding) {
66
+ if (!acceptEncoding) return null;
67
+
68
+ const parts = [];
69
+ const hasBr = acceptEncoding.includes('br');
70
+ const hasGzip = acceptEncoding.includes('gzip');
71
+
72
+ if (hasBr) parts.push('br');
73
+ if (hasGzip) parts.push('gzip');
74
+ if (hasBr && hasGzip) parts.push('modern');
75
+
76
+ return parts.length > 0 ? parts.join(', ') : null;
77
+ }
78
+
79
+ // ── §6.2.3 Accept-Language → X-PW-Lang ───────────────────────────────────
80
+ /**
81
+ * Extract presence, primary language family, and locale count from
82
+ * the Accept-Language header.
83
+ *
84
+ * @param {string|null|undefined} acceptLanguage Raw Accept-Language value
85
+ * @returns {string|null} SF-Dictionary string or null if absent/empty
86
+ */
87
+ export function extractLanguageFeatures(acceptLanguage) {
88
+ if (!acceptLanguage) return null;
89
+
90
+ const trimmed = acceptLanguage.trim();
91
+ if (trimmed === '' || trimmed === '*') return null;
92
+
93
+ // Split on comma to count locales, ignoring quality values
94
+ const locales = trimmed.split(',').map(s => s.trim().split(';')[0].trim()).filter(Boolean);
95
+ const count = locales.length;
96
+ if (count === 0) return null;
97
+
98
+ // Primary language family = first 2 chars of first locale (lowercase)
99
+ const first = locales[0].toLowerCase();
100
+ const primary = first.length >= 2 ? first.slice(0, 2) : first;
101
+
102
+ const parts = ['present', `primary=${primary}`, `count=${count}`];
103
+ return parts.join(', ');
104
+ }
105
+
106
+ // ── §6.2.4 ASN → X-PW-Net ────────────────────────────────────────────────
107
+ /**
108
+ * Classify an ASN into a named enum category.
109
+ *
110
+ * @param {string|number|null|undefined} asn Numeric ASN value
111
+ * @returns {string|null} SF-Dictionary string or null if absent/empty
112
+ */
113
+ export function extractNetFeatures(asn) {
114
+ if (asn == null || asn === '') return null;
115
+
116
+ const num = typeof asn === 'number' ? asn : parseInt(asn, 10);
117
+ if (isNaN(num)) return null;
118
+
119
+ const category = DC_ASN_SET.has(num) ? 'cloud' : 'consumer';
120
+ return `asn=${category}`;
121
+ }
122
+
123
+ // ── §6.2.5 Sec-CH-UA → X-PW-CH ───────────────────────────────────────────
124
+
125
+ /**
126
+ * Extract Chrome version from a Sec-CH-UA header value.
127
+ * Looks for "Chromium" or "Google Chrome" brand and returns the major version.
128
+ *
129
+ * @param {string} secChUA Raw Sec-CH-UA header
130
+ * @returns {number|null} Major Chrome version or null
131
+ */
132
+ function extractChromeVersionFromCH(secChUA) {
133
+ // Sec-CH-UA format: "Brand";v="version", "Brand";v="version", ...
134
+ const match = secChUA.match(/"(?:Google Chrome|Chromium)";v="(\d+)"/);
135
+ return match ? parseInt(match[1], 10) : null;
136
+ }
137
+
138
+ /**
139
+ * Extract Chrome version from a User-Agent string.
140
+ *
141
+ * @param {string} userAgent Raw User-Agent string
142
+ * @returns {number|null} Major Chrome version or null
143
+ */
144
+ function extractChromeVersionFromUA(userAgent) {
145
+ // UA format: ...Chrome/134.0.0.0...
146
+ const match = userAgent.match(/Chrome\/(\d+)/);
147
+ return match ? parseInt(match[1], 10) : null;
148
+ }
149
+
150
+ /**
151
+ * Extract features from Sec-CH-UA header, cross-referenced with User-Agent
152
+ * for the consistency check.
153
+ *
154
+ * @param {string|null|undefined} secChUA Raw Sec-CH-UA header value
155
+ * @param {string|null|undefined} userAgent Raw User-Agent string (for consistency check)
156
+ * @returns {string|null} SF-Dictionary string or null if CH absent/empty
157
+ */
158
+ export function extractCHFeatures(secChUA, userAgent) {
159
+ if (!secChUA) return null;
160
+
161
+ const trimmed = secChUA.trim();
162
+ if (trimmed === '') return null;
163
+
164
+ const parts = ['present'];
165
+
166
+ // Count brand entries: each is a quoted string followed by ;v="..."
167
+ // Split on comma to count entries
168
+ const brands = trimmed.split(',').map(s => s.trim()).filter(Boolean);
169
+ parts.push(`brands=${brands.length}`);
170
+
171
+ // GREASE detection: Chromium convention includes a "Not" brand
172
+ const hasGrease = brands.some(b => /not[^"]*brand/i.test(b) || /not[:\-_.]/i.test(b));
173
+ if (hasGrease) parts.push('grease');
174
+
175
+ // Consistency check: Chrome version in CH matches Chrome version in UA
176
+ if (userAgent) {
177
+ const chVersion = extractChromeVersionFromCH(trimmed);
178
+ const uaVersion = extractChromeVersionFromUA(userAgent);
179
+ if (chVersion != null && uaVersion != null && chVersion === uaVersion) {
180
+ parts.push('consistent');
181
+ }
182
+ }
183
+
184
+ return parts.join(', ');
185
+ }
186
+
187
+ // ═══════════════════════════════════════════════════════════════════════════
188
+ // Tier 3 — Replace User-Agent with derived features (§6.3) + CT (§6.4)
189
+ // ═══════════════════════════════════════════════════════════════════════════
190
+
191
+ // ── §6.3.3 Automation marker detection ────────────────────────────────────
192
+ // HeadlessChrome triggers 'headless' only (via HEADLESS_MARKERS).
193
+ // Explicit automation tools (Puppeteer, Selenium, etc.) trigger 'automation'.
194
+ const AUTOMATION_MARKERS = [
195
+ /Puppeteer/i, /Playwright/i, /Selenium/i, /WebDriver/i,
196
+ /PhantomJS/i, /CasperJS/i,
197
+ /python-requests/i, /python-urllib/i, /Go-http-client/i,
198
+ /okhttp/i, /Apache-HttpClient/i, /libcurl/i,
199
+ /\bcurl\//i, /\bwget\//i, /HTTPie/i,
200
+ /node-fetch/i, /undici/i, /axios\//i, /\bgot\//i, /superagent/i,
201
+ /Cypress/i, /TestCafe/i, /Nightwatch/i, /WebdriverIO/i,
202
+ ];
203
+
204
+ const HEADLESS_MARKERS = [/HeadlessChrome/i, /\bHeadless\b/i];
205
+
206
+ // ── §6.3.4 Entropy bucketing ──────────────────────────────────────────────
207
+ /**
208
+ * Bucket a User-Agent string's structural complexity.
209
+ * @param {string} userAgent
210
+ * @returns {'low'|'medium'|'high'}
211
+ */
212
+ function computeUAEntropy(userAgent) {
213
+ if (!userAgent || userAgent.length < 10) return 'low';
214
+
215
+ const hasUpper = /[A-Z]/.test(userAgent);
216
+ const hasLower = /[a-z]/.test(userAgent);
217
+ const hasDigit = /\d/.test(userAgent);
218
+ const hasSpecial = /[\/\.;()\s,_\-]/.test(userAgent);
219
+ const classCount = [hasUpper, hasLower, hasDigit, hasSpecial].filter(Boolean).length;
220
+
221
+ const len = userAgent.length;
222
+ const hasParens = /\([^)]+\)/.test(userAgent);
223
+
224
+ // Typical browser UA: 60-250 chars, 4 char classes, has parens
225
+ if (classCount >= 4 && len >= 60 && len <= 250 && hasParens) return 'medium';
226
+ if (classCount >= 3 && len >= 40 && len <= 300) return 'medium';
227
+
228
+ // Very short, very long, or missing structure
229
+ if (len < 40 || len > 300 || classCount < 3) return 'low';
230
+
231
+ // Unusual: high-entropy random strings
232
+ const uniqueChars = new Set(userAgent).size;
233
+ if (uniqueChars / len > 0.7) return 'high';
234
+
235
+ return 'medium';
236
+ }
237
+
238
+ // ── §6.3.1 UA dpf/version parsing ─────────────────────────────────────────
239
+
240
+ /** @returns {'desktop'|'mobile'|'tablet'|'server'|'unknown'} */
241
+ function detectDevice(ua) {
242
+ if (/\b(iPad|Tablet|PlayBook|Silk|Kindle)\b/i.test(ua)) return 'tablet';
243
+ if (/\b(iPhone|iPod|Android.*Mobile|Mobile.*Android|webOS|BlackBerry|Opera Mini|IEMobile|Windows Phone)\b/i.test(ua)) return 'mobile';
244
+ if (/\b(Android)\b/i.test(ua) && !/Mobile/i.test(ua)) return 'tablet';
245
+ if (/\b(Macintosh|Windows NT|X11|Linux(?!.*Android))\b/i.test(ua)) return 'desktop';
246
+ if (/\b(Googlebot|bingbot|Baiduspider|YandexBot|DuckDuckBot)\b/i.test(ua)) return 'server';
247
+ return 'unknown';
248
+ }
249
+
250
+ /** @returns {'windows'|'mac'|'ios'|'android'|'linux'|'other'} */
251
+ function detectPlatform(ua) {
252
+ if (/\b(iPhone|iPad|iPod)\b/i.test(ua)) return 'ios';
253
+ if (/\bAndroid\b/i.test(ua)) return 'android';
254
+ if (/\bMacintosh\b/i.test(ua)) return 'mac';
255
+ if (/\bWindows\b/i.test(ua)) return 'windows';
256
+ if (/\bLinux\b/i.test(ua) || /\bX11\b/i.test(ua)) return 'linux';
257
+ return 'other';
258
+ }
259
+
260
+ /** @returns {'chrome'|'safari'|'firefox'|'edge'|'other'|'bot'} */
261
+ function detectFamily(ua) {
262
+ if (/\b(Googlebot|bingbot|Baiduspider|YandexBot|DuckDuckBot|Slurp|ia_archiver)\b/i.test(ua)) return 'bot';
263
+ // Order matters: Edge before Chrome (Edge UA contains "Chrome")
264
+ if (/\bEdg(?:e|A)?\/\d/i.test(ua)) return 'edge';
265
+ if (/\bFirefox\//i.test(ua)) return 'firefox';
266
+ // Safari check: has "Safari/" but NOT "Chrome/" or "Chromium/" or "HeadlessChrome/"
267
+ if (/\bSafari\//i.test(ua) && !/Chrome|Chromium|HeadlessChrome/i.test(ua)) return 'safari';
268
+ if (/(?:\b|Headless)Chrom(?:e|ium)\//i.test(ua)) return 'chrome';
269
+ return 'other';
270
+ }
271
+
272
+ /**
273
+ * Extract major browser version from a User-Agent string.
274
+ * @param {string} ua
275
+ * @returns {number|null}
276
+ */
277
+ function extractMajorVersion(ua) {
278
+ // Try common version patterns in order of specificity
279
+ let m = ua.match(/\bEdg(?:e|A)?\/(\d+)/);
280
+ if (m) return parseInt(m[1], 10);
281
+ m = ua.match(/\bFirefox\/(\d+)/);
282
+ if (m) return parseInt(m[1], 10);
283
+ // Chrome / Chromium / HeadlessChrome
284
+ m = ua.match(/(?:\b|Headless)Chrom(?:e|ium)\/(\d+)/);
285
+ if (m) return parseInt(m[1], 10);
286
+ // Safari: Version/17.x (not the Safari/605 build number)
287
+ m = ua.match(/\bVersion\/(\d+)/);
288
+ if (m) return parseInt(m[1], 10);
289
+ // Generic: first thing/number pattern
290
+ m = ua.match(/\/(\d+)/);
291
+ if (m) return parseInt(m[1], 10);
292
+ return null;
293
+ }
294
+
295
+ /**
296
+ * Bucket a major version number into a range token.
297
+ * @param {number|null} ver
298
+ * @returns {string}
299
+ */
300
+ function bucketVersion(ver) {
301
+ if (ver == null) return '0-79';
302
+ if (ver < 80) return '0-79';
303
+ if (ver < 100) return '80-99';
304
+ if (ver < 120) return '100-119';
305
+ if (ver < 140) return '120-139';
306
+ return '140+';
307
+ }
308
+
309
+ // ── §6.3.1 extractUAFeatures ──────────────────────────────────────────────
310
+ /**
311
+ * Parse a User-Agent string into an SF-Dictionary of derived features.
312
+ *
313
+ * @param {string|null|undefined} userAgent Raw User-Agent string
314
+ * @returns {string|null} SF-Dictionary string or null if absent/empty
315
+ */
316
+ export function extractUAFeatures(userAgent) {
317
+ if (!userAgent) return null;
318
+ const ua = userAgent.trim();
319
+ if (ua === '') return null;
320
+
321
+ const device = detectDevice(ua);
322
+ const platform = detectPlatform(ua);
323
+ const family = detectFamily(ua);
324
+ const ver = bucketVersion(extractMajorVersion(ua));
325
+
326
+ const parts = [`dpf=${device}/${platform}/${family}`, `ver=${ver}`];
327
+
328
+ if (/^Mozilla\//i.test(ua)) parts.push('browser');
329
+
330
+ if (HEADLESS_MARKERS.some(re => re.test(ua))) parts.push('headless');
331
+ if (AUTOMATION_MARKERS.some(re => re.test(ua))) parts.push('automation');
332
+
333
+ parts.push(`entropy=${computeUAEntropy(ua)}`);
334
+
335
+ return parts.join(', ');
336
+ }
337
+
338
+ // ── §6.3.2 computeUAHMAC ─────────────────────────────────────────────────
339
+ /**
340
+ * Compute HMAC-SHA256 of the raw User-Agent, returned as an RFC 8941
341
+ * Byte Sequence string (:base64:).
342
+ *
343
+ * Uses crypto.subtle — compatible with Cloudflare Workers and modern Node.
344
+ *
345
+ * @param {string} userAgent Raw User-Agent string
346
+ * @param {string} hmacKey HMAC secret key (plain text)
347
+ * @returns {Promise<string|null>} RFC 8941 Byte Sequence or null if inputs missing
348
+ */
349
+ export async function computeUAHMAC(userAgent, hmacKey) {
350
+ if (!userAgent || !hmacKey) return null;
351
+
352
+ const enc = new TextEncoder();
353
+ const key = await crypto.subtle.importKey(
354
+ 'raw', enc.encode(hmacKey),
355
+ { name: 'HMAC', hash: 'SHA-256' },
356
+ false, ['sign']
357
+ );
358
+ const sig = await crypto.subtle.sign('HMAC', key, enc.encode(userAgent));
359
+ const b64 = btoa(String.fromCharCode(...new Uint8Array(sig)));
360
+ return `:${b64}:`;
361
+ }
362
+
363
+ // ── §6.4 computeConfidenceToken ───────────────────────────────────────────
364
+ /**
365
+ * Compute the confidence token.
366
+ * ct = SHA-256(userAgent + acceptLanguage + secChUA)[0:8] hex
367
+ *
368
+ * Matches the logic in cloud-api computeConfidenceFingerprint().
369
+ *
370
+ * @param {string|null|undefined} userAgent Raw User-Agent
371
+ * @param {string|null|undefined} acceptLanguage Raw Accept-Language
372
+ * @param {string|null|undefined} secChUA Raw Sec-CH-UA
373
+ * @returns {Promise<string>} 8-char hex token, never null
374
+ */
375
+ export async function computeConfidenceToken(userAgent, acceptLanguage, secChUA) {
376
+ const ua = userAgent || '';
377
+ const lang = acceptLanguage || '';
378
+ const ch = secChUA || '';
379
+
380
+ const msgBuffer = new TextEncoder().encode(ua + lang + ch);
381
+ const hashBuffer = await crypto.subtle.digest('SHA-256', msgBuffer);
382
+ const hashArray = Array.from(new Uint8Array(hashBuffer));
383
+ const hex = hashArray.map(b => b.toString(16).padStart(2, '0')).join('');
384
+ return hex.slice(0, 8);
385
+ }