@paywalls-net/filter 1.3.8 → 1.3.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/jest.config.js ADDED
@@ -0,0 +1,7 @@
1
+ export default {
2
+ testEnvironment: 'node',
3
+ roots: ['<rootDir>/tests'],
4
+ testMatch: ['**/*.test.js'],
5
+ // ESM transform: jest uses --experimental-vm-modules via the npm script
6
+ transform: {},
7
+ };
package/package.json CHANGED
@@ -3,7 +3,7 @@
3
3
  "description": "Client SDK for integrating paywalls.net bot filtering and authorization services into your server or CDN.",
4
4
  "author": "paywalls.net",
5
5
  "license": "MIT",
6
- "version": "1.3.8",
6
+ "version": "1.3.10",
7
7
  "publishConfig": {
8
8
  "access": "public"
9
9
  },
@@ -17,9 +17,13 @@
17
17
  ".": "./src/index.js"
18
18
  },
19
19
  "scripts": {
20
- "test": "echo \"Error: no test specified\" && exit 1"
20
+ "test": "node --experimental-vm-modules node_modules/.bin/jest --runInBand",
21
+ "test:watch": "node --experimental-vm-modules node_modules/.bin/jest --watch"
21
22
  },
22
23
  "dependencies": {
23
24
  "ua-parser-js": "^2.0.4"
25
+ },
26
+ "devDependencies": {
27
+ "jest": "^30.2.0"
24
28
  }
25
29
  }
package/src/index.js CHANGED
@@ -4,6 +4,11 @@
4
4
  */
5
5
  const sdk_version = "1.2.x";
6
6
  import { classifyUserAgent, loadAgentPatterns } from './user-agent-classification.js';
7
+ import {
8
+ extractAcceptFeatures, extractEncodingFeatures, extractLanguageFeatures,
9
+ extractNetFeatures, extractCHFeatures, extractUAFeatures,
10
+ computeUAHMAC, computeConfidenceToken,
11
+ } from './signal-extraction.js';
7
12
 
8
13
  const PAYWALLS_CLOUD_API_HOST = "https://cloud-api.paywalls.net";
9
14
 
@@ -77,6 +82,27 @@ function isVAIRequest(request, vaiPath = '/pw') {
77
82
  }
78
83
  }
79
84
 
85
+ /**
86
+ * Clean 1:1 header forwarding map for operational proxy headers (§7.2).
87
+ * Each entry: { src: string (lowercase incoming), dest: string (outgoing) }
88
+ * Headers with fallback logic or multi-source derivation are handled separately.
89
+ */
90
+ const PROXY_HEADER_MAP = [
91
+ { src: 'host', dest: 'X-Original-Host' }, // publisher hostname for domain binding
92
+ { src: 'origin', dest: 'X-Forwarded-Origin' }, // relay for CORS evaluation (§5, §7.2)
93
+ { src: 'access-control-request-method', dest: 'Access-Control-Request-Method' }, // preflight (§5.4)
94
+ { src: 'access-control-request-headers',dest: 'Access-Control-Request-Headers' }, // preflight (§5.4)
95
+ { src: 'cookie', dest: 'Cookie' }, // session/identity context
96
+ ];
97
+
98
+ /**
99
+ * Set a header only if the value is non-null (extractor returns null for
100
+ * absent inputs → header omitted entirely, not sent as empty string).
101
+ */
102
+ function setIfPresent(obj, key, value) {
103
+ if (value != null) obj[key] = value;
104
+ }
105
+
80
106
  /**
81
107
  * Proxy VAI requests to the cloud-api service (Spec §7).
82
108
  *
@@ -92,6 +118,10 @@ function isVAIRequest(request, vaiPath = '/pw') {
92
118
  * - User-Agent, X-Forwarded-For: standard proxy headers
93
119
  * - Authorization: publisher API key (§7.4)
94
120
  *
121
+ * Human-confidence signal forwarding (§7.2):
122
+ * Uses signal-extraction module to transform raw browser headers into compact
123
+ * RFC 8941 Structured Field Value strings. Absent inputs → null → header omitted.
124
+ *
95
125
  * Response passthrough (§7.3):
96
126
  * All response headers from cloud-api are returned unchanged — including
97
127
  * Access-Control-*, Vary, Cache-Control. The proxy never injects or
@@ -114,44 +144,46 @@ async function proxyVAIRequest(cfg, request) {
114
144
  // Build forwarding headers — include everything cloud-api needs
115
145
  // for CORS evaluation, domain auth, and request context.
116
146
  const forwardHeaders = {
117
- 'User-Agent': headers['user-agent'] || sdkUserAgent,
147
+ 'User-Agent': sdkUserAgent,
118
148
  'Authorization': `Bearer ${cfg.paywallsAPIKey}`
119
149
  };
120
150
 
121
- // Client IP forwarding
151
+ // Client IP forwarding — dual-source, so handled explicitly
122
152
  if (headers['x-forwarded-for']) {
123
153
  forwardHeaders['X-Forwarded-For'] = headers['x-forwarded-for'];
124
154
  } else if (headers['cf-connecting-ip']) {
125
155
  forwardHeaders['X-Forwarded-For'] = headers['cf-connecting-ip'];
126
156
  }
127
-
128
- // Publisher hostname for domain binding (§7.2, §4)
129
- // Cloud-api gates reading this on the vai_forwarded_host feature flag.
130
- if (headers['host']) {
131
- forwardHeaders['X-Original-Host'] = headers['host'];
132
- }
133
-
134
- // Forward browser Origin via custom header for CORS evaluation (§5, §7.2).
135
- // Cloudflare Workers runtime controls the outbound Origin header on fetch(),
136
- // so we relay the browser's Origin via X-Forwarded-Origin. Cloud-api's
137
- // evaluateCORS() reads this to make the authoritative CORS decision.
138
- if (headers['origin']) {
139
- forwardHeaders['X-Forwarded-Origin'] = headers['origin'];
140
- }
141
-
142
- // Forward preflight headers so cloud-api can evaluate OPTIONS (§5.4, §7.2)
143
- if (headers['access-control-request-method']) {
144
- forwardHeaders['Access-Control-Request-Method'] = headers['access-control-request-method'];
145
- }
146
- if (headers['access-control-request-headers']) {
147
- forwardHeaders['Access-Control-Request-Headers'] = headers['access-control-request-headers'];
148
- }
149
-
150
- // Forward cookies for session/identity context (§7.2)
151
- if (headers['cookie']) {
152
- forwardHeaders['Cookie'] = headers['cookie'];
157
+
158
+ // Clean 1:1 operational header forwarding (§7.2)
159
+ for (const { src, dest } of PROXY_HEADER_MAP) {
160
+ if (headers[src]) forwardHeaders[dest] = headers[src];
153
161
  }
154
-
162
+
163
+ // Signal protocol version (§7.1)
164
+ forwardHeaders['X-PW-V'] = '2';
165
+
166
+ const cf = request.cf || {};
167
+
168
+ // Tier 1: kept raw (§6.1)
169
+ setIfPresent(forwardHeaders, 'X-PW-Sec-Fetch-Dest', headers['sec-fetch-dest']);
170
+ setIfPresent(forwardHeaders, 'X-PW-Sec-Fetch-Mode', headers['sec-fetch-mode']);
171
+ setIfPresent(forwardHeaders, 'X-PW-Sec-Fetch-Site', headers['sec-fetch-site']);
172
+ setIfPresent(forwardHeaders, 'X-PW-TLS-Version', cf.tlsVersion != null ? String(cf.tlsVersion) : null);
173
+ setIfPresent(forwardHeaders, 'X-PW-HTTP-Protocol', cf.httpProtocol != null ? String(cf.httpProtocol) : null);
174
+
175
+ // Tier 2: extract features (§6.2)
176
+ setIfPresent(forwardHeaders, 'X-PW-Accept', extractAcceptFeatures(headers['accept']));
177
+ setIfPresent(forwardHeaders, 'X-PW-Enc', extractEncodingFeatures(headers['accept-encoding']));
178
+ setIfPresent(forwardHeaders, 'X-PW-Lang', extractLanguageFeatures(headers['accept-language']));
179
+ setIfPresent(forwardHeaders, 'X-PW-Net', extractNetFeatures(cf.asn));
180
+ setIfPresent(forwardHeaders, 'X-PW-CH', extractCHFeatures(headers['sec-ch-ua'], headers['user-agent']));
181
+
182
+ // Tier 3: UA features + HMAC (§6.3)
183
+ setIfPresent(forwardHeaders, 'X-PW-UA', extractUAFeatures(headers['user-agent']));
184
+ setIfPresent(forwardHeaders, 'X-PW-UA-HMAC', await computeUAHMAC(headers['user-agent'], cfg.vaiUAHmacKey));
185
+ setIfPresent(forwardHeaders, 'X-PW-CT-FP', await computeConfidenceToken(headers['user-agent'], headers['accept-language'], headers['sec-ch-ua']));
186
+
155
187
  // Forward request to cloud-api
156
188
  const response = await fetch(`${cfg.paywallsAPIHost}${cloudApiPath}`, {
157
189
  method: request.method || 'GET',
@@ -387,7 +419,8 @@ async function cloudflare(config = null) {
387
419
  paywallsAPIHost: env.PAYWALLS_CLOUD_API_HOST || PAYWALLS_CLOUD_API_HOST,
388
420
  paywallsAPIKey: env.PAYWALLS_CLOUD_API_KEY,
389
421
  paywallsPublisherId: env.PAYWALLS_PUBLISHER_ID,
390
- vaiPath: env.PAYWALLS_VAI_PATH || '/pw'
422
+ vaiPath: env.PAYWALLS_VAI_PATH || '/pw',
423
+ vaiUAHmacKey: env.VAI_UA_HMAC_KEY || null,
391
424
  };
392
425
 
393
426
  // Check if this is a VAI endpoint request and proxy it
@@ -419,7 +452,8 @@ async function fastly() {
419
452
  paywallsAPIHost: config.get('PAYWALLS_CLOUD_API_HOST') || PAYWALLS_CLOUD_API_HOST,
420
453
  paywallsAPIKey: config.get('PAYWALLS_API_KEY'),
421
454
  paywallsPublisherId: config.get('PAYWALLS_PUBLISHER_ID'),
422
- vaiPath: config.get('PAYWALLS_VAI_PATH') || '/pw'
455
+ vaiPath: config.get('PAYWALLS_VAI_PATH') || '/pw',
456
+ vaiUAHmacKey: config.get('VAI_UA_HMAC_KEY') || null,
423
457
  };
424
458
 
425
459
  // Check if this is a VAI endpoint request and proxy it
@@ -501,7 +535,8 @@ async function cloudfront(config) {
501
535
  paywallsAPIHost: config.PAYWALLS_CLOUD_API_HOST || PAYWALLS_CLOUD_API_HOST,
502
536
  paywallsAPIKey: config.PAYWALLS_API_KEY,
503
537
  paywallsPublisherId: config.PAYWALLS_PUBLISHER_ID,
504
- vaiPath: config.PAYWALLS_VAI_PATH || '/pw'
538
+ vaiPath: config.PAYWALLS_VAI_PATH || '/pw',
539
+ vaiUAHmacKey: config.VAI_UA_HMAC_KEY || null,
505
540
  };
506
541
  await loadAgentPatterns(paywallsConfig);
507
542
 
@@ -0,0 +1,385 @@
1
+ /**
2
+ * Signal Extraction Module — Tier 2 + Tier 3 feature extractors
3
+ *
4
+ * Transforms raw browser headers into compact RFC 8941 Structured Field
5
+ * Dictionary strings for privacy-preserving VAI signal forwarding.
6
+ *
7
+ * Spec: specs/vai-privacy-v2.spec.md §6.2–§6.4
8
+ *
9
+ * Each function returns an SF-Dictionary string (e.g. "html, wildcard")
10
+ * or null if the input is absent/empty. null means the caller should
11
+ * omit the header entirely (not send an empty value).
12
+ */
13
+
14
+ // ── §6.2.4 / Appendix A: Data-center ASN set ──────────────────────────────
15
+ // Comprehensive cloud/hosting provider ASNs for DC classification.
16
+ // Kept in sync with cloud-api DC_ASN_LIST (cloudflare/vai.js).
17
+ // Source: public ASN registries (PeeringDB, RIPE, ARIN).
18
+ const DC_ASN_SET = new Set([
19
+ // ── Major IaaS ───────────────────────────────────────────────────────────
20
+ 16509, 14618, // Amazon AWS (primary + secondary)
21
+ 396982, 36492, 15169, // Google Cloud + Google infra
22
+ 8075, 8069, 8068, // Microsoft Azure
23
+ 31898, // Oracle Cloud
24
+ 36351, // IBM Cloud / SoftLayer
25
+ 45102, // Alibaba Cloud
26
+ 132203, // Tencent Cloud
27
+
28
+ // ── VPS / Hosting ────────────────────────────────────────────────────────
29
+ 14061, // DigitalOcean
30
+ 24940, 213230, // Hetzner (dedicated + cloud)
31
+ 16276, // OVH
32
+ 63949, // Linode / Akamai Connected Cloud
33
+ 20473, // Vultr / The Constant Company
34
+ 12876, // Scaleway
35
+ 51167, // Contabo
36
+ 60781, 28753, // Leaseweb (NL + global)
37
+ ]);
38
+
39
+ // ── §6.2.1 Accept → X-PW-Accept ──────────────────────────────────────────
40
+ /**
41
+ * Extract boolean feature flags from the Accept header.
42
+ *
43
+ * @param {string|null|undefined} accept Raw Accept header value
44
+ * @returns {string|null} SF-Dictionary string or null if absent/empty
45
+ */
46
+ export function extractAcceptFeatures(accept) {
47
+ if (!accept) return null;
48
+
49
+ const parts = [];
50
+ if (accept.includes('text/html')) parts.push('html');
51
+ if (accept.includes('*/*')) parts.push('wildcard');
52
+ if (accept.includes('application/json')) parts.push('json');
53
+ if (accept.includes('image/')) parts.push('image');
54
+
55
+ return parts.length > 0 ? parts.join(', ') : null;
56
+ }
57
+
58
+ // ── §6.2.2 Accept-Encoding → X-PW-Enc ────────────────────────────────────
59
+ /**
60
+ * Extract boolean feature flags from the Accept-Encoding header.
61
+ *
62
+ * @param {string|null|undefined} acceptEncoding Raw Accept-Encoding value
63
+ * @returns {string|null} SF-Dictionary string or null if absent/empty
64
+ */
65
+ export function extractEncodingFeatures(acceptEncoding) {
66
+ if (!acceptEncoding) return null;
67
+
68
+ const parts = [];
69
+ const hasBr = acceptEncoding.includes('br');
70
+ const hasGzip = acceptEncoding.includes('gzip');
71
+
72
+ if (hasBr) parts.push('br');
73
+ if (hasGzip) parts.push('gzip');
74
+ if (hasBr && hasGzip) parts.push('modern');
75
+
76
+ return parts.length > 0 ? parts.join(', ') : null;
77
+ }
78
+
79
+ // ── §6.2.3 Accept-Language → X-PW-Lang ───────────────────────────────────
80
+ /**
81
+ * Extract presence, primary language family, and locale count from
82
+ * the Accept-Language header.
83
+ *
84
+ * @param {string|null|undefined} acceptLanguage Raw Accept-Language value
85
+ * @returns {string|null} SF-Dictionary string or null if absent/empty
86
+ */
87
+ export function extractLanguageFeatures(acceptLanguage) {
88
+ if (!acceptLanguage) return null;
89
+
90
+ const trimmed = acceptLanguage.trim();
91
+ if (trimmed === '' || trimmed === '*') return null;
92
+
93
+ // Split on comma to count locales, ignoring quality values
94
+ const locales = trimmed.split(',').map(s => s.trim().split(';')[0].trim()).filter(Boolean);
95
+ const count = locales.length;
96
+ if (count === 0) return null;
97
+
98
+ // Primary language family = first 2 chars of first locale (lowercase)
99
+ const first = locales[0].toLowerCase();
100
+ const primary = first.length >= 2 ? first.slice(0, 2) : first;
101
+
102
+ const parts = ['present', `primary=${primary}`, `count=${count}`];
103
+ return parts.join(', ');
104
+ }
105
+
106
+ // ── §6.2.4 ASN → X-PW-Net ────────────────────────────────────────────────
107
+ /**
108
+ * Classify an ASN into a named enum category.
109
+ *
110
+ * @param {string|number|null|undefined} asn Numeric ASN value
111
+ * @returns {string|null} SF-Dictionary string or null if absent/empty
112
+ */
113
+ export function extractNetFeatures(asn) {
114
+ if (asn == null || asn === '') return null;
115
+
116
+ const num = typeof asn === 'number' ? asn : parseInt(asn, 10);
117
+ if (isNaN(num)) return null;
118
+
119
+ const category = DC_ASN_SET.has(num) ? 'cloud' : 'consumer';
120
+ return `asn=${category}`;
121
+ }
122
+
123
+ // ── §6.2.5 Sec-CH-UA → X-PW-CH ───────────────────────────────────────────
124
+
125
+ /**
126
+ * Extract Chrome version from a Sec-CH-UA header value.
127
+ * Looks for "Chromium" or "Google Chrome" brand and returns the major version.
128
+ *
129
+ * @param {string} secChUA Raw Sec-CH-UA header
130
+ * @returns {number|null} Major Chrome version or null
131
+ */
132
+ function extractChromeVersionFromCH(secChUA) {
133
+ // Sec-CH-UA format: "Brand";v="version", "Brand";v="version", ...
134
+ const match = secChUA.match(/"(?:Google Chrome|Chromium)";v="(\d+)"/);
135
+ return match ? parseInt(match[1], 10) : null;
136
+ }
137
+
138
+ /**
139
+ * Extract Chrome version from a User-Agent string.
140
+ *
141
+ * @param {string} userAgent Raw User-Agent string
142
+ * @returns {number|null} Major Chrome version or null
143
+ */
144
+ function extractChromeVersionFromUA(userAgent) {
145
+ // UA format: ...Chrome/134.0.0.0...
146
+ const match = userAgent.match(/Chrome\/(\d+)/);
147
+ return match ? parseInt(match[1], 10) : null;
148
+ }
149
+
150
+ /**
151
+ * Extract features from Sec-CH-UA header, cross-referenced with User-Agent
152
+ * for the consistency check.
153
+ *
154
+ * @param {string|null|undefined} secChUA Raw Sec-CH-UA header value
155
+ * @param {string|null|undefined} userAgent Raw User-Agent string (for consistency check)
156
+ * @returns {string|null} SF-Dictionary string or null if CH absent/empty
157
+ */
158
+ export function extractCHFeatures(secChUA, userAgent) {
159
+ if (!secChUA) return null;
160
+
161
+ const trimmed = secChUA.trim();
162
+ if (trimmed === '') return null;
163
+
164
+ const parts = ['present'];
165
+
166
+ // Count brand entries: each is a quoted string followed by ;v="..."
167
+ // Split on comma to count entries
168
+ const brands = trimmed.split(',').map(s => s.trim()).filter(Boolean);
169
+ parts.push(`brands=${brands.length}`);
170
+
171
+ // GREASE detection: Chromium convention includes a "Not" brand
172
+ const hasGrease = brands.some(b => /not[^"]*brand/i.test(b) || /not[:\-_.]/i.test(b));
173
+ if (hasGrease) parts.push('grease');
174
+
175
+ // Consistency check: Chrome version in CH matches Chrome version in UA
176
+ if (userAgent) {
177
+ const chVersion = extractChromeVersionFromCH(trimmed);
178
+ const uaVersion = extractChromeVersionFromUA(userAgent);
179
+ if (chVersion != null && uaVersion != null && chVersion === uaVersion) {
180
+ parts.push('consistent');
181
+ }
182
+ }
183
+
184
+ return parts.join(', ');
185
+ }
186
+
187
+ // ═══════════════════════════════════════════════════════════════════════════
188
+ // Tier 3 — Replace User-Agent with derived features (§6.3) + CT (§6.4)
189
+ // ═══════════════════════════════════════════════════════════════════════════
190
+
191
+ // ── §6.3.3 Automation marker detection ────────────────────────────────────
192
+ // HeadlessChrome triggers 'headless' only (via HEADLESS_MARKERS).
193
+ // Explicit automation tools (Puppeteer, Selenium, etc.) trigger 'automation'.
194
+ const AUTOMATION_MARKERS = [
195
+ /Puppeteer/i, /Playwright/i, /Selenium/i, /WebDriver/i,
196
+ /PhantomJS/i, /CasperJS/i,
197
+ /python-requests/i, /python-urllib/i, /Go-http-client/i,
198
+ /okhttp/i, /Apache-HttpClient/i, /libcurl/i,
199
+ /\bcurl\//i, /\bwget\//i, /HTTPie/i,
200
+ /node-fetch/i, /undici/i, /axios\//i, /\bgot\//i, /superagent/i,
201
+ /Cypress/i, /TestCafe/i, /Nightwatch/i, /WebdriverIO/i,
202
+ ];
203
+
204
+ const HEADLESS_MARKERS = [/HeadlessChrome/i, /\bHeadless\b/i];
205
+
206
+ // ── §6.3.4 Entropy bucketing ──────────────────────────────────────────────
207
+ /**
208
+ * Bucket a User-Agent string's structural complexity.
209
+ * @param {string} userAgent
210
+ * @returns {'low'|'medium'|'high'}
211
+ */
212
+ function computeUAEntropy(userAgent) {
213
+ if (!userAgent || userAgent.length < 10) return 'low';
214
+
215
+ const hasUpper = /[A-Z]/.test(userAgent);
216
+ const hasLower = /[a-z]/.test(userAgent);
217
+ const hasDigit = /\d/.test(userAgent);
218
+ const hasSpecial = /[\/\.;()\s,_\-]/.test(userAgent);
219
+ const classCount = [hasUpper, hasLower, hasDigit, hasSpecial].filter(Boolean).length;
220
+
221
+ const len = userAgent.length;
222
+ const hasParens = /\([^)]+\)/.test(userAgent);
223
+
224
+ // Typical browser UA: 60-250 chars, 4 char classes, has parens
225
+ if (classCount >= 4 && len >= 60 && len <= 250 && hasParens) return 'medium';
226
+ if (classCount >= 3 && len >= 40 && len <= 300) return 'medium';
227
+
228
+ // Very short, very long, or missing structure
229
+ if (len < 40 || len > 300 || classCount < 3) return 'low';
230
+
231
+ // Unusual: high-entropy random strings
232
+ const uniqueChars = new Set(userAgent).size;
233
+ if (uniqueChars / len > 0.7) return 'high';
234
+
235
+ return 'medium';
236
+ }
237
+
238
+ // ── §6.3.1 UA dpf/version parsing ─────────────────────────────────────────
239
+
240
+ /** @returns {'desktop'|'mobile'|'tablet'|'server'|'unknown'} */
241
+ function detectDevice(ua) {
242
+ if (/\b(iPad|Tablet|PlayBook|Silk|Kindle)\b/i.test(ua)) return 'tablet';
243
+ if (/\b(iPhone|iPod|Android.*Mobile|Mobile.*Android|webOS|BlackBerry|Opera Mini|IEMobile|Windows Phone)\b/i.test(ua)) return 'mobile';
244
+ if (/\b(Android)\b/i.test(ua) && !/Mobile/i.test(ua)) return 'tablet';
245
+ if (/\b(Macintosh|Windows NT|X11|Linux(?!.*Android))\b/i.test(ua)) return 'desktop';
246
+ if (/\b(Googlebot|bingbot|Baiduspider|YandexBot|DuckDuckBot)\b/i.test(ua)) return 'server';
247
+ return 'unknown';
248
+ }
249
+
250
+ /** @returns {'windows'|'mac'|'ios'|'android'|'linux'|'other'} */
251
+ function detectPlatform(ua) {
252
+ if (/\b(iPhone|iPad|iPod)\b/i.test(ua)) return 'ios';
253
+ if (/\bAndroid\b/i.test(ua)) return 'android';
254
+ if (/\bMacintosh\b/i.test(ua)) return 'mac';
255
+ if (/\bWindows\b/i.test(ua)) return 'windows';
256
+ if (/\bLinux\b/i.test(ua) || /\bX11\b/i.test(ua)) return 'linux';
257
+ return 'other';
258
+ }
259
+
260
+ /** @returns {'chrome'|'safari'|'firefox'|'edge'|'other'|'bot'} */
261
+ function detectFamily(ua) {
262
+ if (/\b(Googlebot|bingbot|Baiduspider|YandexBot|DuckDuckBot|Slurp|ia_archiver)\b/i.test(ua)) return 'bot';
263
+ // Order matters: Edge before Chrome (Edge UA contains "Chrome")
264
+ if (/\bEdg(?:e|A)?\/\d/i.test(ua)) return 'edge';
265
+ if (/\bFirefox\//i.test(ua)) return 'firefox';
266
+ // Safari check: has "Safari/" but NOT "Chrome/" or "Chromium/" or "HeadlessChrome/"
267
+ if (/\bSafari\//i.test(ua) && !/Chrome|Chromium|HeadlessChrome/i.test(ua)) return 'safari';
268
+ if (/(?:\b|Headless)Chrom(?:e|ium)\//i.test(ua)) return 'chrome';
269
+ return 'other';
270
+ }
271
+
272
+ /**
273
+ * Extract major browser version from a User-Agent string.
274
+ * @param {string} ua
275
+ * @returns {number|null}
276
+ */
277
+ function extractMajorVersion(ua) {
278
+ // Try common version patterns in order of specificity
279
+ let m = ua.match(/\bEdg(?:e|A)?\/(\d+)/);
280
+ if (m) return parseInt(m[1], 10);
281
+ m = ua.match(/\bFirefox\/(\d+)/);
282
+ if (m) return parseInt(m[1], 10);
283
+ // Chrome / Chromium / HeadlessChrome
284
+ m = ua.match(/(?:\b|Headless)Chrom(?:e|ium)\/(\d+)/);
285
+ if (m) return parseInt(m[1], 10);
286
+ // Safari: Version/17.x (not the Safari/605 build number)
287
+ m = ua.match(/\bVersion\/(\d+)/);
288
+ if (m) return parseInt(m[1], 10);
289
+ // Generic: first thing/number pattern
290
+ m = ua.match(/\/(\d+)/);
291
+ if (m) return parseInt(m[1], 10);
292
+ return null;
293
+ }
294
+
295
+ /**
296
+ * Bucket a major version number into a range token.
297
+ * @param {number|null} ver
298
+ * @returns {string}
299
+ */
300
+ function bucketVersion(ver) {
301
+ if (ver == null) return '0-79';
302
+ if (ver < 80) return '0-79';
303
+ if (ver < 100) return '80-99';
304
+ if (ver < 120) return '100-119';
305
+ if (ver < 140) return '120-139';
306
+ return '140+';
307
+ }
308
+
309
+ // ── §6.3.1 extractUAFeatures ──────────────────────────────────────────────
310
+ /**
311
+ * Parse a User-Agent string into an SF-Dictionary of derived features.
312
+ *
313
+ * @param {string|null|undefined} userAgent Raw User-Agent string
314
+ * @returns {string|null} SF-Dictionary string or null if absent/empty
315
+ */
316
+ export function extractUAFeatures(userAgent) {
317
+ if (!userAgent) return null;
318
+ const ua = userAgent.trim();
319
+ if (ua === '') return null;
320
+
321
+ const device = detectDevice(ua);
322
+ const platform = detectPlatform(ua);
323
+ const family = detectFamily(ua);
324
+ const ver = bucketVersion(extractMajorVersion(ua));
325
+
326
+ const parts = [`dpf=${device}/${platform}/${family}`, `ver=${ver}`];
327
+
328
+ if (/^Mozilla\//i.test(ua)) parts.push('browser');
329
+
330
+ if (HEADLESS_MARKERS.some(re => re.test(ua))) parts.push('headless');
331
+ if (AUTOMATION_MARKERS.some(re => re.test(ua))) parts.push('automation');
332
+
333
+ parts.push(`entropy=${computeUAEntropy(ua)}`);
334
+
335
+ return parts.join(', ');
336
+ }
337
+
338
+ // ── §6.3.2 computeUAHMAC ─────────────────────────────────────────────────
339
+ /**
340
+ * Compute HMAC-SHA256 of the raw User-Agent, returned as an RFC 8941
341
+ * Byte Sequence string (:base64:).
342
+ *
343
+ * Uses crypto.subtle — compatible with Cloudflare Workers and modern Node.
344
+ *
345
+ * @param {string} userAgent Raw User-Agent string
346
+ * @param {string} hmacKey HMAC secret key (plain text)
347
+ * @returns {Promise<string|null>} RFC 8941 Byte Sequence or null if inputs missing
348
+ */
349
+ export async function computeUAHMAC(userAgent, hmacKey) {
350
+ if (!userAgent || !hmacKey) return null;
351
+
352
+ const enc = new TextEncoder();
353
+ const key = await crypto.subtle.importKey(
354
+ 'raw', enc.encode(hmacKey),
355
+ { name: 'HMAC', hash: 'SHA-256' },
356
+ false, ['sign']
357
+ );
358
+ const sig = await crypto.subtle.sign('HMAC', key, enc.encode(userAgent));
359
+ const b64 = btoa(String.fromCharCode(...new Uint8Array(sig)));
360
+ return `:${b64}:`;
361
+ }
362
+
363
+ // ── §6.4 computeConfidenceToken ───────────────────────────────────────────
364
+ /**
365
+ * Compute the confidence token.
366
+ * ct = SHA-256(userAgent + acceptLanguage + secChUA)[0:8] hex
367
+ *
368
+ * Matches the logic in cloud-api computeConfidenceFingerprint().
369
+ *
370
+ * @param {string|null|undefined} userAgent Raw User-Agent
371
+ * @param {string|null|undefined} acceptLanguage Raw Accept-Language
372
+ * @param {string|null|undefined} secChUA Raw Sec-CH-UA
373
+ * @returns {Promise<string>} 8-char hex token, never null
374
+ */
375
+ export async function computeConfidenceToken(userAgent, acceptLanguage, secChUA) {
376
+ const ua = userAgent || '';
377
+ const lang = acceptLanguage || '';
378
+ const ch = secChUA || '';
379
+
380
+ const msgBuffer = new TextEncoder().encode(ua + lang + ch);
381
+ const hashBuffer = await crypto.subtle.digest('SHA-256', msgBuffer);
382
+ const hashArray = Array.from(new Uint8Array(hashBuffer));
383
+ const hex = hashArray.map(b => b.toString(16).padStart(2, '0')).join('');
384
+ return hex.slice(0, 8);
385
+ }
@@ -142,6 +142,8 @@ export async function classifyUserAgent(cfg, userAgent) {
142
142
  const result = {
143
143
  browser,
144
144
  os,
145
+ purpose: 'other',
146
+ purpose_mode: ['other'],
145
147
  vat: 'HUMAN',
146
148
  act: 'ACT-2', // Unmatched UA with detected browser — medium confidence
147
149
  };