@paywalls-net/filter 1.3.9 → 1.3.11
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/jest.config.js +7 -0
- package/package.json +6 -2
- package/src/index.js +58 -40
- package/src/signal-extraction.js +524 -0
- package/tests/proxy-vai-request.test.js +379 -0
- package/tests/signal-extraction.test.js +1002 -0
|
@@ -0,0 +1,1002 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Unit tests for signal extraction functions (Tier 2 + Tier 3)
|
|
3
|
+
*
|
|
4
|
+
* Spec: specs/vai-privacy-v2.spec.md §6.2–§6.4
|
|
5
|
+
* Issue: paywalls-site-drk, paywalls-site-fc4
|
|
6
|
+
*/
|
|
7
|
+
import {
|
|
8
|
+
extractAcceptFeatures,
|
|
9
|
+
extractEncodingFeatures,
|
|
10
|
+
extractLanguageFeatures,
|
|
11
|
+
extractNetFeatures,
|
|
12
|
+
extractCHFeatures,
|
|
13
|
+
extractUAFeatures,
|
|
14
|
+
computeUAHMAC,
|
|
15
|
+
computeConfidenceToken,
|
|
16
|
+
loadVAIMetadata,
|
|
17
|
+
_resetVAIMetadata,
|
|
18
|
+
} from '../src/signal-extraction.js';
|
|
19
|
+
|
|
20
|
+
// ═══════════════════════════════════════════════════════════════════════════
|
|
21
|
+
// Helpers: SF-Dictionary format validation (RFC 8941)
|
|
22
|
+
// ═══════════════════════════════════════════════════════════════════════════
|
|
23
|
+
|
|
24
|
+
/**
|
|
25
|
+
* Validate that a string is well-formed SF-Dictionary per our protocol.
|
|
26
|
+
*
|
|
27
|
+
* Strict RFC 8941 allows only alpha-starting tokens and plain integers.
|
|
28
|
+
* Our protocol extends this with:
|
|
29
|
+
* - Compound path values: dpf=desktop/mac/chrome
|
|
30
|
+
* - Version ranges: ver=120-139, ver=140+
|
|
31
|
+
*
|
|
32
|
+
* Each member is either a bare key (boolean true) or key=value.
|
|
33
|
+
* - key matches sf-key = lcalpha *( lcalpha / DIGIT / "_" / "-" / "." / "*" )
|
|
34
|
+
* - value is alphanumeric-starting string with path/range chars
|
|
35
|
+
* Members are separated by ", ".
|
|
36
|
+
*/
|
|
37
|
+
function isValidSFDictionary(str) {
|
|
38
|
+
if (typeof str !== 'string' || str.length === 0) return false;
|
|
39
|
+
const members = str.split(', ');
|
|
40
|
+
const keyRe = /^[a-z*][a-z0-9_\-.*]*$/;
|
|
41
|
+
// Extended value: starts with alphanumeric/*, allows tchar + / and digits
|
|
42
|
+
const valRe = /^[A-Za-z0-9*][A-Za-z0-9!#$&'*+.^_|~\/-]*$/;
|
|
43
|
+
for (const m of members) {
|
|
44
|
+
const eq = m.indexOf('=');
|
|
45
|
+
if (eq === -1) {
|
|
46
|
+
// bare key (boolean true)
|
|
47
|
+
if (!keyRe.test(m)) return false;
|
|
48
|
+
} else {
|
|
49
|
+
const key = m.slice(0, eq);
|
|
50
|
+
const val = m.slice(eq + 1);
|
|
51
|
+
if (!keyRe.test(key)) return false;
|
|
52
|
+
if (!valRe.test(val)) return false;
|
|
53
|
+
}
|
|
54
|
+
}
|
|
55
|
+
return true;
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
// ── §6.2.1 extractAcceptFeatures ──────────────────────────────────────────
|
|
59
|
+
|
|
60
|
+
describe('extractAcceptFeatures', () => {
|
|
61
|
+
test('typical browser Accept → html, wildcard', () => {
|
|
62
|
+
expect(extractAcceptFeatures('text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8'))
|
|
63
|
+
.toBe('html, wildcard');
|
|
64
|
+
});
|
|
65
|
+
|
|
66
|
+
test('API client → json', () => {
|
|
67
|
+
expect(extractAcceptFeatures('application/json')).toBe('json');
|
|
68
|
+
});
|
|
69
|
+
|
|
70
|
+
test('curl default → wildcard', () => {
|
|
71
|
+
expect(extractAcceptFeatures('*/*')).toBe('wildcard');
|
|
72
|
+
});
|
|
73
|
+
|
|
74
|
+
test('image request → image', () => {
|
|
75
|
+
expect(extractAcceptFeatures('image/webp,image/apng,image/*,*/*;q=0.8'))
|
|
76
|
+
.toBe('wildcard, image');
|
|
77
|
+
});
|
|
78
|
+
|
|
79
|
+
test('combined html + json + wildcard', () => {
|
|
80
|
+
expect(extractAcceptFeatures('text/html, application/json, */*'))
|
|
81
|
+
.toBe('html, wildcard, json');
|
|
82
|
+
});
|
|
83
|
+
|
|
84
|
+
test('null input → null', () => {
|
|
85
|
+
expect(extractAcceptFeatures(null)).toBeNull();
|
|
86
|
+
});
|
|
87
|
+
|
|
88
|
+
test('undefined input → null', () => {
|
|
89
|
+
expect(extractAcceptFeatures(undefined)).toBeNull();
|
|
90
|
+
});
|
|
91
|
+
|
|
92
|
+
test('empty string → null', () => {
|
|
93
|
+
expect(extractAcceptFeatures('')).toBeNull();
|
|
94
|
+
});
|
|
95
|
+
|
|
96
|
+
test('unrecognized type only → null', () => {
|
|
97
|
+
expect(extractAcceptFeatures('application/xml')).toBeNull();
|
|
98
|
+
});
|
|
99
|
+
|
|
100
|
+
test('output is valid SF-Dictionary', () => {
|
|
101
|
+
const result = extractAcceptFeatures('text/html,application/json,*/*;q=0.8');
|
|
102
|
+
expect(isValidSFDictionary(result)).toBe(true);
|
|
103
|
+
});
|
|
104
|
+
});
|
|
105
|
+
|
|
106
|
+
// ── §6.2.2 extractEncodingFeatures ────────────────────────────────────────
|
|
107
|
+
|
|
108
|
+
describe('extractEncodingFeatures', () => {
|
|
109
|
+
test('modern browser → br, gzip, modern', () => {
|
|
110
|
+
expect(extractEncodingFeatures('gzip, deflate, br, zstd'))
|
|
111
|
+
.toBe('br, gzip, modern');
|
|
112
|
+
});
|
|
113
|
+
|
|
114
|
+
test('gzip only → gzip (no modern)', () => {
|
|
115
|
+
expect(extractEncodingFeatures('gzip, deflate')).toBe('gzip');
|
|
116
|
+
});
|
|
117
|
+
|
|
118
|
+
test('br only → br (no modern)', () => {
|
|
119
|
+
expect(extractEncodingFeatures('br')).toBe('br');
|
|
120
|
+
});
|
|
121
|
+
|
|
122
|
+
test('null → null', () => {
|
|
123
|
+
expect(extractEncodingFeatures(null)).toBeNull();
|
|
124
|
+
});
|
|
125
|
+
|
|
126
|
+
test('empty string → null', () => {
|
|
127
|
+
expect(extractEncodingFeatures('')).toBeNull();
|
|
128
|
+
});
|
|
129
|
+
|
|
130
|
+
test('deflate only (no br/gzip) → null', () => {
|
|
131
|
+
expect(extractEncodingFeatures('deflate')).toBeNull();
|
|
132
|
+
});
|
|
133
|
+
|
|
134
|
+
test('zstd alone (no br/gzip) → null (not yet a tracked feature)', () => {
|
|
135
|
+
expect(extractEncodingFeatures('zstd')).toBeNull();
|
|
136
|
+
});
|
|
137
|
+
|
|
138
|
+
test('output is valid SF-Dictionary', () => {
|
|
139
|
+
const result = extractEncodingFeatures('gzip, deflate, br');
|
|
140
|
+
expect(isValidSFDictionary(result)).toBe(true);
|
|
141
|
+
});
|
|
142
|
+
});
|
|
143
|
+
|
|
144
|
+
// ── §6.2.3 extractLanguageFeatures ────────────────────────────────────────
|
|
145
|
+
|
|
146
|
+
describe('extractLanguageFeatures', () => {
|
|
147
|
+
test('typical browser → present, primary=en, count=3', () => {
|
|
148
|
+
expect(extractLanguageFeatures('en-US,en;q=0.9,fr;q=0.8'))
|
|
149
|
+
.toBe('present, primary=en, count=3');
|
|
150
|
+
});
|
|
151
|
+
|
|
152
|
+
test('single locale → count=1', () => {
|
|
153
|
+
expect(extractLanguageFeatures('ja'))
|
|
154
|
+
.toBe('present, primary=ja, count=1');
|
|
155
|
+
});
|
|
156
|
+
|
|
157
|
+
test('primary with region → primary extracts 2-char family', () => {
|
|
158
|
+
expect(extractLanguageFeatures('fr-FR'))
|
|
159
|
+
.toBe('present, primary=fr, count=1');
|
|
160
|
+
});
|
|
161
|
+
|
|
162
|
+
// NOTE: Test matrix suggests * → present, primary=other, count=1.
|
|
163
|
+
// Current implementation returns null (wildcard is not a useful locale
|
|
164
|
+
// for privacy classification). If spec intent changes, update here.
|
|
165
|
+
test('wildcard only → null (not a real locale)', () => {
|
|
166
|
+
expect(extractLanguageFeatures('*')).toBeNull();
|
|
167
|
+
});
|
|
168
|
+
|
|
169
|
+
test('null → null', () => {
|
|
170
|
+
expect(extractLanguageFeatures(null)).toBeNull();
|
|
171
|
+
});
|
|
172
|
+
|
|
173
|
+
test('empty string → null', () => {
|
|
174
|
+
expect(extractLanguageFeatures('')).toBeNull();
|
|
175
|
+
});
|
|
176
|
+
|
|
177
|
+
test('whitespace only → null', () => {
|
|
178
|
+
expect(extractLanguageFeatures(' ')).toBeNull();
|
|
179
|
+
});
|
|
180
|
+
|
|
181
|
+
test('Chinese locale → primary=zh', () => {
|
|
182
|
+
expect(extractLanguageFeatures('zh-CN,zh;q=0.9,en;q=0.8'))
|
|
183
|
+
.toBe('present, primary=zh, count=3');
|
|
184
|
+
});
|
|
185
|
+
|
|
186
|
+
test('many locales → count reflects total', () => {
|
|
187
|
+
expect(extractLanguageFeatures('en-US,en;q=0.9,fr;q=0.8,de;q=0.7,es;q=0.6,pt;q=0.5'))
|
|
188
|
+
.toBe('present, primary=en, count=6');
|
|
189
|
+
});
|
|
190
|
+
|
|
191
|
+
test('three-letter language code → first 2 chars', () => {
|
|
192
|
+
// "tlh" (Klingon) → primary=tl
|
|
193
|
+
expect(extractLanguageFeatures('tlh')).toBe('present, primary=tl, count=1');
|
|
194
|
+
});
|
|
195
|
+
|
|
196
|
+
test('output is valid SF-Dictionary', () => {
|
|
197
|
+
const result = extractLanguageFeatures('en-US,en;q=0.9');
|
|
198
|
+
expect(isValidSFDictionary(result)).toBe(true);
|
|
199
|
+
});
|
|
200
|
+
});
|
|
201
|
+
|
|
202
|
+
// ── §6.2.4 extractNetFeatures ─────────────────────────────────────────────
|
|
203
|
+
|
|
204
|
+
describe('extractNetFeatures', () => {
|
|
205
|
+
// ── Classification boundary ────────────────────────────────────────────
|
|
206
|
+
test('well-known cloud ASN (AWS) → asn=cloud', () => {
|
|
207
|
+
expect(extractNetFeatures('16509')).toBe('asn=cloud');
|
|
208
|
+
});
|
|
209
|
+
|
|
210
|
+
test('well-known consumer ISP (Comcast) → asn=consumer', () => {
|
|
211
|
+
expect(extractNetFeatures('7922')).toBe('asn=consumer');
|
|
212
|
+
});
|
|
213
|
+
|
|
214
|
+
// ── Full DC_ASN_SET coverage ───────────────────────────────────────────
|
|
215
|
+
test.each([
|
|
216
|
+
[16509, 'AWS primary'], [14618, 'AWS secondary'],
|
|
217
|
+
[396982, 'GCP'], [36492, 'GCP secondary'], [15169, 'Google infra'],
|
|
218
|
+
[8075, 'Azure'], [8069, 'Azure secondary'], [8068, 'Azure tertiary'],
|
|
219
|
+
[31898, 'Oracle Cloud'], [36351, 'IBM/SoftLayer'],
|
|
220
|
+
[45102, 'Alibaba Cloud'], [132203, 'Tencent Cloud'],
|
|
221
|
+
[14061, 'DigitalOcean'], [24940, 'Hetzner'], [213230, 'Hetzner Cloud'],
|
|
222
|
+
[16276, 'OVH'], [63949, 'Linode/Akamai'], [20473, 'Vultr'],
|
|
223
|
+
[12876, 'Scaleway'], [51167, 'Contabo'],
|
|
224
|
+
[60781, 'Leaseweb NL'], [28753, 'Leaseweb global'],
|
|
225
|
+
])('DC ASN %i (%s) → asn=cloud', (asn) => {
|
|
226
|
+
expect(extractNetFeatures(asn)).toBe('asn=cloud');
|
|
227
|
+
});
|
|
228
|
+
|
|
229
|
+
// ── Input type handling ────────────────────────────────────────────────
|
|
230
|
+
test('numeric input (number, not string) → works', () => {
|
|
231
|
+
expect(extractNetFeatures(16509)).toBe('asn=cloud');
|
|
232
|
+
});
|
|
233
|
+
|
|
234
|
+
test('string number for consumer → works', () => {
|
|
235
|
+
expect(extractNetFeatures('7018')).toBe('asn=consumer');
|
|
236
|
+
});
|
|
237
|
+
|
|
238
|
+
test('null → null', () => {
|
|
239
|
+
expect(extractNetFeatures(null)).toBeNull();
|
|
240
|
+
});
|
|
241
|
+
|
|
242
|
+
test('undefined → null', () => {
|
|
243
|
+
expect(extractNetFeatures(undefined)).toBeNull();
|
|
244
|
+
});
|
|
245
|
+
|
|
246
|
+
test('empty string → null', () => {
|
|
247
|
+
expect(extractNetFeatures('')).toBeNull();
|
|
248
|
+
});
|
|
249
|
+
|
|
250
|
+
// NOTE: Test matrix suggests non-numeric → asn=unknown.
|
|
251
|
+
// Current implementation returns null (omit header) since a
|
|
252
|
+
// non-numeric ASN is a malformed input, not a valid category.
|
|
253
|
+
test('non-numeric string → null', () => {
|
|
254
|
+
expect(extractNetFeatures('abc')).toBeNull();
|
|
255
|
+
});
|
|
256
|
+
|
|
257
|
+
test('zero → asn=consumer (not in cloud set)', () => {
|
|
258
|
+
expect(extractNetFeatures(0)).toBe('asn=consumer');
|
|
259
|
+
});
|
|
260
|
+
|
|
261
|
+
test('negative number → asn=consumer (not in cloud set)', () => {
|
|
262
|
+
expect(extractNetFeatures(-1)).toBe('asn=consumer');
|
|
263
|
+
});
|
|
264
|
+
|
|
265
|
+
test('very large ASN → asn=consumer', () => {
|
|
266
|
+
expect(extractNetFeatures(999999)).toBe('asn=consumer');
|
|
267
|
+
});
|
|
268
|
+
|
|
269
|
+
test('output is valid SF-Dictionary', () => {
|
|
270
|
+
const result = extractNetFeatures('16509');
|
|
271
|
+
expect(isValidSFDictionary(result)).toBe(true);
|
|
272
|
+
});
|
|
273
|
+
});
|
|
274
|
+
|
|
275
|
+
// ── §6.2.5 extractCHFeatures ──────────────────────────────────────────────
|
|
276
|
+
|
|
277
|
+
describe('extractCHFeatures', () => {
|
|
278
|
+
const CHROME_134_CH = '"Google Chrome";v="134", "Chromium";v="134", "Not:A-Brand";v="24"';
|
|
279
|
+
const CHROME_134_UA = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/134.0.0.0 Safari/537.36';
|
|
280
|
+
|
|
281
|
+
test('Chrome 134 with matching UA → present, brands=3, grease, consistent', () => {
|
|
282
|
+
expect(extractCHFeatures(CHROME_134_CH, CHROME_134_UA))
|
|
283
|
+
.toBe('present, brands=3, grease, consistent');
|
|
284
|
+
});
|
|
285
|
+
|
|
286
|
+
test('version mismatch → no consistent', () => {
|
|
287
|
+
const mismatchUA = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) Chrome/120.0.0.0 Safari/537.36';
|
|
288
|
+
expect(extractCHFeatures(CHROME_134_CH, mismatchUA))
|
|
289
|
+
.toBe('present, brands=3, grease');
|
|
290
|
+
});
|
|
291
|
+
|
|
292
|
+
test('no User-Agent → present but no consistent', () => {
|
|
293
|
+
expect(extractCHFeatures(CHROME_134_CH, null))
|
|
294
|
+
.toBe('present, brands=3, grease');
|
|
295
|
+
});
|
|
296
|
+
|
|
297
|
+
test('CH without GREASE brand', () => {
|
|
298
|
+
const noGrease = '"Google Chrome";v="134", "Chromium";v="134"';
|
|
299
|
+
expect(extractCHFeatures(noGrease, CHROME_134_UA))
|
|
300
|
+
.toBe('present, brands=2, consistent');
|
|
301
|
+
});
|
|
302
|
+
|
|
303
|
+
test('null CH → null (Firefox, etc.)', () => {
|
|
304
|
+
expect(extractCHFeatures(null, CHROME_134_UA)).toBeNull();
|
|
305
|
+
});
|
|
306
|
+
|
|
307
|
+
test('empty CH → null', () => {
|
|
308
|
+
expect(extractCHFeatures('', CHROME_134_UA)).toBeNull();
|
|
309
|
+
});
|
|
310
|
+
|
|
311
|
+
test('whitespace-only CH → null', () => {
|
|
312
|
+
expect(extractCHFeatures(' ', CHROME_134_UA)).toBeNull();
|
|
313
|
+
});
|
|
314
|
+
|
|
315
|
+
test('Edge CH (different brand name) with Chromium match', () => {
|
|
316
|
+
const edgeCH = '"Microsoft Edge";v="134", "Chromium";v="134", "Not-A.Brand";v="99"';
|
|
317
|
+
const edgeUA = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 Chrome/134.0.0.0 Safari/537.36 Edg/134.0.0.0';
|
|
318
|
+
expect(extractCHFeatures(edgeCH, edgeUA))
|
|
319
|
+
.toBe('present, brands=3, grease, consistent');
|
|
320
|
+
});
|
|
321
|
+
|
|
322
|
+
test('single brand (Google Chrome only) → brands=1, consistent', () => {
|
|
323
|
+
const singleBrand = '"Google Chrome";v="100"';
|
|
324
|
+
// Google Chrome brand matches CH version extractor, and Chrome/100 in UA matches
|
|
325
|
+
const ua = 'Mozilla/5.0 Chrome/100.0.0.0 Safari/537.36';
|
|
326
|
+
expect(extractCHFeatures(singleBrand, ua)).toBe('present, brands=1, consistent');
|
|
327
|
+
});
|
|
328
|
+
|
|
329
|
+
test('output is valid SF-Dictionary', () => {
|
|
330
|
+
const result = extractCHFeatures(CHROME_134_CH, CHROME_134_UA);
|
|
331
|
+
expect(isValidSFDictionary(result)).toBe(true);
|
|
332
|
+
});
|
|
333
|
+
});
|
|
334
|
+
|
|
335
|
+
// ── §6.3.1 extractUAFeatures ──────────────────────────────────────────────
|
|
336
|
+
|
|
337
|
+
describe('extractUAFeatures', () => {
|
|
338
|
+
const CHROME_MAC = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/134.0.0.0 Safari/537.36';
|
|
339
|
+
const FIREFOX_WIN = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:134.0) Gecko/20100101 Firefox/134.0';
|
|
340
|
+
const SAFARI_IOS = 'Mozilla/5.0 (iPhone; CPU iPhone OS 17_4 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.4 Mobile/15E148 Safari/604.1';
|
|
341
|
+
const EDGE_WIN = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/134.0.0.0 Safari/537.36 Edg/134.0.0.0';
|
|
342
|
+
const CURL = 'curl/7.88.1';
|
|
343
|
+
const HEADLESS_CHROME = 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) HeadlessChrome/134.0.0.0 Safari/537.36';
|
|
344
|
+
const PUPPETEER = 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) HeadlessChrome/134.0.0.0 Safari/537.36 Puppeteer';
|
|
345
|
+
const GOOGLEBOT = 'Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)';
|
|
346
|
+
const IPAD = 'Mozilla/5.0 (iPad; CPU OS 17_4 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.4 Mobile/15E148 Safari/604.1';
|
|
347
|
+
const ANDROID_TABLET = 'Mozilla/5.0 (Linux; Android 13; SM-X200) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36';
|
|
348
|
+
const ANDROID_PHONE = 'Mozilla/5.0 (Linux; Android 14; Pixel 8) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Mobile Safari/537.36';
|
|
349
|
+
const SAFARI_MAC = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.4 Safari/605.1.15';
|
|
350
|
+
|
|
351
|
+
// ── dpf compound token ─────────────────────────────────────────────────
|
|
352
|
+
test('Chrome on Mac → desktop/mac/chrome', () => {
|
|
353
|
+
expect(extractUAFeatures(CHROME_MAC)).toMatch(/^dpf=desktop\/mac\/chrome/);
|
|
354
|
+
});
|
|
355
|
+
|
|
356
|
+
test('Firefox on Windows → desktop/windows/firefox', () => {
|
|
357
|
+
expect(extractUAFeatures(FIREFOX_WIN)).toMatch(/^dpf=desktop\/windows\/firefox/);
|
|
358
|
+
});
|
|
359
|
+
|
|
360
|
+
test('Safari on iPhone → mobile/ios/safari', () => {
|
|
361
|
+
expect(extractUAFeatures(SAFARI_IOS)).toMatch(/^dpf=mobile\/ios\/safari/);
|
|
362
|
+
});
|
|
363
|
+
|
|
364
|
+
test('Edge on Windows → desktop/windows/edge', () => {
|
|
365
|
+
expect(extractUAFeatures(EDGE_WIN)).toMatch(/^dpf=desktop\/windows\/edge/);
|
|
366
|
+
});
|
|
367
|
+
|
|
368
|
+
test('curl → unknown/other/other', () => {
|
|
369
|
+
expect(extractUAFeatures(CURL)).toMatch(/^dpf=unknown\/other\/other/);
|
|
370
|
+
});
|
|
371
|
+
|
|
372
|
+
test('HeadlessChrome → desktop/linux/chrome', () => {
|
|
373
|
+
expect(extractUAFeatures(HEADLESS_CHROME)).toMatch(/^dpf=desktop\/linux\/chrome/);
|
|
374
|
+
});
|
|
375
|
+
|
|
376
|
+
test('Googlebot → server/other/bot', () => {
|
|
377
|
+
expect(extractUAFeatures(GOOGLEBOT)).toMatch(/dpf=server\/other\/bot/);
|
|
378
|
+
});
|
|
379
|
+
|
|
380
|
+
// ── new device types ───────────────────────────────────────────────────
|
|
381
|
+
test('Smart TV (Tizen) → smarttv device', () => {
|
|
382
|
+
const ua = 'Mozilla/5.0 (SMART-TV; Linux; Tizen 5.0) AppleWebKit/537.36 Chrome/69.0.3497.106 TV Safari/537.36';
|
|
383
|
+
expect(extractUAFeatures(ua)).toMatch(/^dpf=smarttv\//);
|
|
384
|
+
});
|
|
385
|
+
|
|
386
|
+
test('Smart TV (WebOS) → smarttv device', () => {
|
|
387
|
+
const ua = 'Mozilla/5.0 (Web0S; Linux/SmartTV) AppleWebKit/537.36 WebOS TV/5.0';
|
|
388
|
+
expect(extractUAFeatures(ua)).toMatch(/^dpf=smarttv\//);
|
|
389
|
+
});
|
|
390
|
+
|
|
391
|
+
test('Smart TV (Fire TV) → smarttv device', () => {
|
|
392
|
+
const ua = 'Mozilla/5.0 (Linux; Android 9; AFTS Build/PS7233) AppleWebKit/537.36 (KHTML, like Gecko) Silk/120.4.1 like Chrome/120.0.0.0 Mobile Safari/537.36 Fire TV';
|
|
393
|
+
expect(extractUAFeatures(ua)).toMatch(/^dpf=smarttv\//);
|
|
394
|
+
});
|
|
395
|
+
|
|
396
|
+
test('PlayStation → console device', () => {
|
|
397
|
+
const ua = 'Mozilla/5.0 (PlayStation 5) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/13.0 Safari/605.1.15';
|
|
398
|
+
expect(extractUAFeatures(ua)).toMatch(/^dpf=console\//);
|
|
399
|
+
});
|
|
400
|
+
|
|
401
|
+
test('Xbox → console device', () => {
|
|
402
|
+
const ua = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; Xbox; Xbox One) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36 Edge/44.18363.8131';
|
|
403
|
+
expect(extractUAFeatures(ua)).toMatch(/^dpf=console\//);
|
|
404
|
+
});
|
|
405
|
+
|
|
406
|
+
test('Nintendo → console device', () => {
|
|
407
|
+
const ua = 'Mozilla/5.0 (Nintendo Switch; WifiWebAuthApplet) AppleWebKit/606.4 (KHTML, like Gecko) NF/6.0.1.16.10 NintendoBrowser/5.1.0.22474';
|
|
408
|
+
expect(extractUAFeatures(ua)).toMatch(/^dpf=console\//);
|
|
409
|
+
});
|
|
410
|
+
|
|
411
|
+
test('Meta Quest (Oculus) → vr device', () => {
|
|
412
|
+
const ua = 'Mozilla/5.0 (Linux; Android 12; Quest 3) AppleWebKit/537.36 (KHTML, like Gecko) OculusBrowser/33.0 Chrome/126.0.6478.122 Mobile VR Safari/537.36';
|
|
413
|
+
expect(extractUAFeatures(ua)).toMatch(/^dpf=vr\//);
|
|
414
|
+
});
|
|
415
|
+
|
|
416
|
+
test('Apple Watch → wearable device', () => {
|
|
417
|
+
const ua = 'Mozilla/5.0 (Watch; CPU Watch OS 10_0 like Mac OS X) AppleWebKit/605.1.15';
|
|
418
|
+
expect(extractUAFeatures(ua)).toMatch(/^dpf=wearable\//);
|
|
419
|
+
});
|
|
420
|
+
|
|
421
|
+
test('Tesla → car device', () => {
|
|
422
|
+
const ua = 'Mozilla/5.0 (X11; GNU/Linux; Tesla) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36';
|
|
423
|
+
expect(extractUAFeatures(ua)).toMatch(/^dpf=car\//);
|
|
424
|
+
});
|
|
425
|
+
|
|
426
|
+
// ── new platforms ──────────────────────────────────────────────────────
|
|
427
|
+
test('ChromeOS → chromeos platform', () => {
|
|
428
|
+
const ua = 'Mozilla/5.0 (X11; CrOS x86_64 14541.0.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/134.0.0.0 Safari/537.36';
|
|
429
|
+
expect(extractUAFeatures(ua)).toMatch(/dpf=desktop\/chromeos\/chrome/);
|
|
430
|
+
});
|
|
431
|
+
|
|
432
|
+
test('FreeBSD → freebsd platform', () => {
|
|
433
|
+
const ua = 'Mozilla/5.0 (X11; FreeBSD amd64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36';
|
|
434
|
+
expect(extractUAFeatures(ua)).toMatch(/dpf=desktop\/freebsd\/chrome/);
|
|
435
|
+
});
|
|
436
|
+
|
|
437
|
+
// ── new browser families ───────────────────────────────────────────────
|
|
438
|
+
test('UC Browser → ucbrowser family', () => {
|
|
439
|
+
const ua = 'Mozilla/5.0 (Linux; Android 10; SM-A505F) AppleWebKit/537.36 (KHTML, like Gecko) UCBrowser/16.0.1.3715 Mobile Safari/537.36';
|
|
440
|
+
expect(extractUAFeatures(ua)).toMatch(/dpf=mobile\/android\/ucbrowser/);
|
|
441
|
+
});
|
|
442
|
+
|
|
443
|
+
test('UCWEB variant → ucbrowser family', () => {
|
|
444
|
+
const ua = 'UCWEB/2.0 (Java; U; MIDP-2.0)';
|
|
445
|
+
expect(extractUAFeatures(ua)).toMatch(/\/ucbrowser/);
|
|
446
|
+
});
|
|
447
|
+
|
|
448
|
+
test('Opera (OPR/) → chrome family (Chromium-based)', () => {
|
|
449
|
+
const ua = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/134.0.0.0 Safari/537.36 OPR/120.0.0.0';
|
|
450
|
+
expect(extractUAFeatures(ua)).toMatch(/\/chrome/);
|
|
451
|
+
});
|
|
452
|
+
|
|
453
|
+
test('Brave → chrome family (indistinguishable UA)', () => {
|
|
454
|
+
// Brave UA is intentionally identical to Chrome
|
|
455
|
+
const ua = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/134.0.0.0 Safari/537.36';
|
|
456
|
+
expect(extractUAFeatures(ua)).toMatch(/\/chrome/);
|
|
457
|
+
});
|
|
458
|
+
|
|
459
|
+
// ── AI/SEO bot detection ───────────────────────────────────────────────
|
|
460
|
+
test('GPTBot → bot family', () => {
|
|
461
|
+
expect(extractUAFeatures('Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko; compatible; GPTBot/1.0; +https://openai.com/gptbot)')).toMatch(/\/bot/);
|
|
462
|
+
});
|
|
463
|
+
|
|
464
|
+
test('ClaudeBot → bot family', () => {
|
|
465
|
+
expect(extractUAFeatures('Mozilla/5.0 (compatible; ClaudeBot/1.0; +https://anthropic.com)')).toMatch(/\/bot/);
|
|
466
|
+
});
|
|
467
|
+
|
|
468
|
+
test('CCBot → bot family', () => {
|
|
469
|
+
expect(extractUAFeatures('CCBot/2.0 (https://commoncrawl.org/faq/)')).toMatch(/\/bot/);
|
|
470
|
+
});
|
|
471
|
+
|
|
472
|
+
test('Bytespider → bot family', () => {
|
|
473
|
+
expect(extractUAFeatures('Mozilla/5.0 (Linux; Android 5.0) AppleWebKit/537.36 (compatible; Bytespider; spider-feedback@bytedance.com)')).toMatch(/\/bot/);
|
|
474
|
+
});
|
|
475
|
+
|
|
476
|
+
test('Applebot → bot family', () => {
|
|
477
|
+
expect(extractUAFeatures('Mozilla/5.0 (compatible; Applebot/0.1; +http://www.apple.com/go/applebot)')).toMatch(/\/bot/);
|
|
478
|
+
});
|
|
479
|
+
|
|
480
|
+
test('SemrushBot → bot family', () => {
|
|
481
|
+
expect(extractUAFeatures('Mozilla/5.0 (compatible; SemrushBot/7~bl; +http://www.semrush.com/bot.html)')).toMatch(/\/bot/);
|
|
482
|
+
});
|
|
483
|
+
|
|
484
|
+
test('AhrefsBot → bot family', () => {
|
|
485
|
+
expect(extractUAFeatures('Mozilla/5.0 (compatible; AhrefsBot/7.0; +http://ahrefs.com/robot/)')).toMatch(/\/bot/);
|
|
486
|
+
});
|
|
487
|
+
|
|
488
|
+
// ── new automation markers ─────────────────────────────────────────────
|
|
489
|
+
test('Scrapy → automation', () => {
|
|
490
|
+
expect(extractUAFeatures('Scrapy/2.11.0 (+https://scrapy.org)')).toMatch(/\bautomation\b/);
|
|
491
|
+
});
|
|
492
|
+
|
|
493
|
+
test('Java HttpURLConnection → automation', () => {
|
|
494
|
+
expect(extractUAFeatures('Java/17.0.2')).toMatch(/\bautomation\b/);
|
|
495
|
+
});
|
|
496
|
+
|
|
497
|
+
test('PostmanRuntime → automation', () => {
|
|
498
|
+
expect(extractUAFeatures('PostmanRuntime/7.36.1')).toMatch(/\bautomation\b/);
|
|
499
|
+
});
|
|
500
|
+
|
|
501
|
+
test('Deno → automation', () => {
|
|
502
|
+
expect(extractUAFeatures('Deno/1.40.0')).toMatch(/\bautomation\b/);
|
|
503
|
+
});
|
|
504
|
+
|
|
505
|
+
test('httpx → automation', () => {
|
|
506
|
+
expect(extractUAFeatures('python-httpx/0.27.0')).toMatch(/\bautomation\b/);
|
|
507
|
+
});
|
|
508
|
+
|
|
509
|
+
test('iPad → tablet/ios/safari', () => {
|
|
510
|
+
expect(extractUAFeatures(IPAD)).toMatch(/^dpf=tablet\/ios\/safari/);
|
|
511
|
+
});
|
|
512
|
+
|
|
513
|
+
test('Android tablet (no "Mobile") → tablet/android/chrome', () => {
|
|
514
|
+
expect(extractUAFeatures(ANDROID_TABLET)).toMatch(/^dpf=tablet\/android\/chrome/);
|
|
515
|
+
});
|
|
516
|
+
|
|
517
|
+
test('Android phone → mobile/android/chrome', () => {
|
|
518
|
+
expect(extractUAFeatures(ANDROID_PHONE)).toMatch(/^dpf=mobile\/android\/chrome/);
|
|
519
|
+
});
|
|
520
|
+
|
|
521
|
+
test('Safari on Mac → desktop/mac/safari', () => {
|
|
522
|
+
expect(extractUAFeatures(SAFARI_MAC)).toMatch(/^dpf=desktop\/mac\/safari/);
|
|
523
|
+
});
|
|
524
|
+
|
|
525
|
+
// ── version bucketing ──────────────────────────────────────────────────
|
|
526
|
+
test('Chrome 134 → ver=120-139', () => {
|
|
527
|
+
expect(extractUAFeatures(CHROME_MAC)).toMatch(/ver=120-139/);
|
|
528
|
+
});
|
|
529
|
+
|
|
530
|
+
test('curl/7.88.1 → ver=0-79', () => {
|
|
531
|
+
expect(extractUAFeatures(CURL)).toMatch(/ver=0-79/);
|
|
532
|
+
});
|
|
533
|
+
|
|
534
|
+
// Bucket boundary tests: verify version numbers at edges of each range
|
|
535
|
+
// Math-based 20-version spans starting at 80, capped at 420+.
|
|
536
|
+
test.each([
|
|
537
|
+
['Chrome/79.0.0.0', '0-79'],
|
|
538
|
+
['Chrome/80.0.0.0', '80-99'],
|
|
539
|
+
['Chrome/99.0.0.0', '80-99'],
|
|
540
|
+
['Chrome/100.0.0.0', '100-119'],
|
|
541
|
+
['Chrome/119.0.0.0', '100-119'],
|
|
542
|
+
['Chrome/120.0.0.0', '120-139'],
|
|
543
|
+
['Chrome/139.0.0.0', '120-139'],
|
|
544
|
+
['Chrome/140.0.0.0', '140-159'],
|
|
545
|
+
['Chrome/159.0.0.0', '140-159'],
|
|
546
|
+
['Chrome/160.0.0.0', '160-179'],
|
|
547
|
+
['Chrome/200.0.0.0', '200-219'],
|
|
548
|
+
['Chrome/400.0.0.0', '400-419'],
|
|
549
|
+
['Chrome/419.0.0.0', '400-419'],
|
|
550
|
+
['Chrome/420.0.0.0', '420+'],
|
|
551
|
+
['Chrome/999.0.0.0', '420+'],
|
|
552
|
+
])('version bucket boundary: %s → ver=%s', (chromeToken, expected) => {
|
|
553
|
+
// Wrap in a minimal browser-like UA so detectDevice/detectPlatform work
|
|
554
|
+
const ua = `Mozilla/5.0 (X11; Linux x86_64) ${chromeToken} Safari/537.36`;
|
|
555
|
+
expect(extractUAFeatures(ua)).toMatch(new RegExp(`ver=${expected.replace('+', '\\+')}`));
|
|
556
|
+
});
|
|
557
|
+
|
|
558
|
+
// ── browser flag ───────────────────────────────────────────────────────
|
|
559
|
+
test('Mozilla/ prefix → browser flag present', () => {
|
|
560
|
+
expect(extractUAFeatures(CHROME_MAC)).toMatch(/\bbrowser\b/);
|
|
561
|
+
});
|
|
562
|
+
|
|
563
|
+
test('curl → no browser flag', () => {
|
|
564
|
+
expect(extractUAFeatures(CURL)).not.toMatch(/\bbrowser\b/);
|
|
565
|
+
});
|
|
566
|
+
|
|
567
|
+
// ── headless / automation ──────────────────────────────────────────────
|
|
568
|
+
test('HeadlessChrome → headless flag', () => {
|
|
569
|
+
expect(extractUAFeatures(HEADLESS_CHROME)).toMatch(/\bheadless\b/);
|
|
570
|
+
});
|
|
571
|
+
|
|
572
|
+
test('HeadlessChrome → no automation flag (headless only)', () => {
|
|
573
|
+
expect(extractUAFeatures(HEADLESS_CHROME)).not.toMatch(/\bautomation\b/);
|
|
574
|
+
});
|
|
575
|
+
|
|
576
|
+
test('Puppeteer → both headless and automation', () => {
|
|
577
|
+
const result = extractUAFeatures(PUPPETEER);
|
|
578
|
+
expect(result).toMatch(/\bheadless\b/);
|
|
579
|
+
expect(result).toMatch(/\bautomation\b/);
|
|
580
|
+
});
|
|
581
|
+
|
|
582
|
+
test('python-requests → automation', () => {
|
|
583
|
+
expect(extractUAFeatures('python-requests/2.31.0')).toMatch(/\bautomation\b/);
|
|
584
|
+
});
|
|
585
|
+
|
|
586
|
+
test('Selenium → automation', () => {
|
|
587
|
+
expect(extractUAFeatures('Mozilla/5.0 Selenium/4.0')).toMatch(/\bautomation\b/);
|
|
588
|
+
});
|
|
589
|
+
|
|
590
|
+
test('wget → automation', () => {
|
|
591
|
+
expect(extractUAFeatures('wget/1.21.4')).toMatch(/\bautomation\b/);
|
|
592
|
+
});
|
|
593
|
+
|
|
594
|
+
test('normal Chrome → no headless or automation', () => {
|
|
595
|
+
const result = extractUAFeatures(CHROME_MAC);
|
|
596
|
+
expect(result).not.toMatch(/\bheadless\b/);
|
|
597
|
+
expect(result).not.toMatch(/\bautomation\b/);
|
|
598
|
+
});
|
|
599
|
+
|
|
600
|
+
// ── entropy ────────────────────────────────────────────────────────────
|
|
601
|
+
test('normal browser UA → entropy=medium', () => {
|
|
602
|
+
expect(extractUAFeatures(CHROME_MAC)).toMatch(/entropy=medium/);
|
|
603
|
+
});
|
|
604
|
+
|
|
605
|
+
test('curl (short) → entropy=low', () => {
|
|
606
|
+
expect(extractUAFeatures(CURL)).toMatch(/entropy=low/);
|
|
607
|
+
});
|
|
608
|
+
|
|
609
|
+
test('very long UA (>300 chars) → entropy=low', () => {
|
|
610
|
+
const longUA = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) ' + 'A'.repeat(300);
|
|
611
|
+
expect(extractUAFeatures(longUA)).toMatch(/entropy=low/);
|
|
612
|
+
});
|
|
613
|
+
|
|
614
|
+
test('minimal short string → entropy=low', () => {
|
|
615
|
+
expect(extractUAFeatures('bot')).toMatch(/entropy=low/);
|
|
616
|
+
});
|
|
617
|
+
|
|
618
|
+
// ── null/empty ─────────────────────────────────────────────────────────
|
|
619
|
+
test('null → null', () => {
|
|
620
|
+
expect(extractUAFeatures(null)).toBeNull();
|
|
621
|
+
});
|
|
622
|
+
|
|
623
|
+
test('empty string → null', () => {
|
|
624
|
+
expect(extractUAFeatures('')).toBeNull();
|
|
625
|
+
});
|
|
626
|
+
|
|
627
|
+
test('whitespace only → null', () => {
|
|
628
|
+
expect(extractUAFeatures(' ')).toBeNull();
|
|
629
|
+
});
|
|
630
|
+
|
|
631
|
+
// ── spec Appendix B examples (exact output match) ──────────────────────
|
|
632
|
+
test('spec example: Chrome on Mac', () => {
|
|
633
|
+
expect(extractUAFeatures(CHROME_MAC))
|
|
634
|
+
.toBe('dpf=desktop/mac/chrome, ver=120-139, browser, entropy=medium');
|
|
635
|
+
});
|
|
636
|
+
|
|
637
|
+
test('spec example: HeadlessChrome', () => {
|
|
638
|
+
expect(extractUAFeatures(HEADLESS_CHROME))
|
|
639
|
+
.toBe('dpf=desktop/linux/chrome, ver=120-139, browser, headless, entropy=medium');
|
|
640
|
+
});
|
|
641
|
+
|
|
642
|
+
test('spec example: Puppeteer', () => {
|
|
643
|
+
expect(extractUAFeatures(PUPPETEER))
|
|
644
|
+
.toBe('dpf=desktop/linux/chrome, ver=120-139, browser, headless, automation, entropy=medium');
|
|
645
|
+
});
|
|
646
|
+
|
|
647
|
+
test('output is valid SF-Dictionary', () => {
|
|
648
|
+
const result = extractUAFeatures(CHROME_MAC);
|
|
649
|
+
expect(isValidSFDictionary(result)).toBe(true);
|
|
650
|
+
});
|
|
651
|
+
|
|
652
|
+
test('all dpf compound values use slash separators (no spaces)', () => {
|
|
653
|
+
const uas = [CHROME_MAC, FIREFOX_WIN, SAFARI_IOS, EDGE_WIN, CURL, HEADLESS_CHROME, GOOGLEBOT, IPAD, ANDROID_TABLET];
|
|
654
|
+
for (const ua of uas) {
|
|
655
|
+
const result = extractUAFeatures(ua);
|
|
656
|
+
const dpf = result.match(/^dpf=([^,]+)/)[1];
|
|
657
|
+
const segments = dpf.split('/');
|
|
658
|
+
expect(segments).toHaveLength(3);
|
|
659
|
+
}
|
|
660
|
+
});
|
|
661
|
+
});
|
|
662
|
+
|
|
663
|
+
// ── §6.3.2 computeUAHMAC ──────────────────────────────────────────────────
|
|
664
|
+
|
|
665
|
+
describe('computeUAHMAC', () => {
|
|
666
|
+
const TEST_UA = 'Mozilla/5.0 Chrome/134.0.0.0';
|
|
667
|
+
const TEST_KEY = 'test-hmac-secret-key';
|
|
668
|
+
|
|
669
|
+
test('returns RFC 8941 Byte Sequence format (:base64:)', async () => {
|
|
670
|
+
const result = await computeUAHMAC(TEST_UA, TEST_KEY);
|
|
671
|
+
expect(result).toMatch(/^:[A-Za-z0-9+/]+=*:$/);
|
|
672
|
+
});
|
|
673
|
+
|
|
674
|
+
test('deterministic — same input produces same output', async () => {
|
|
675
|
+
const a = await computeUAHMAC(TEST_UA, TEST_KEY);
|
|
676
|
+
const b = await computeUAHMAC(TEST_UA, TEST_KEY);
|
|
677
|
+
expect(a).toBe(b);
|
|
678
|
+
});
|
|
679
|
+
|
|
680
|
+
test('different UA → different HMAC', async () => {
|
|
681
|
+
const a = await computeUAHMAC(TEST_UA, TEST_KEY);
|
|
682
|
+
const b = await computeUAHMAC('curl/7.88.1', TEST_KEY);
|
|
683
|
+
expect(a).not.toBe(b);
|
|
684
|
+
});
|
|
685
|
+
|
|
686
|
+
test('different key → different HMAC', async () => {
|
|
687
|
+
const a = await computeUAHMAC(TEST_UA, TEST_KEY);
|
|
688
|
+
const b = await computeUAHMAC(TEST_UA, 'different-key');
|
|
689
|
+
expect(a).not.toBe(b);
|
|
690
|
+
});
|
|
691
|
+
|
|
692
|
+
test('null UA → null', async () => {
|
|
693
|
+
expect(await computeUAHMAC(null, TEST_KEY)).toBeNull();
|
|
694
|
+
});
|
|
695
|
+
|
|
696
|
+
test('null key → null', async () => {
|
|
697
|
+
expect(await computeUAHMAC(TEST_UA, null)).toBeNull();
|
|
698
|
+
});
|
|
699
|
+
|
|
700
|
+
// NOTE: Test matrix suggests empty UA should still produce HMAC.
|
|
701
|
+
// Current implementation returns null (empty string is falsy, no
|
|
702
|
+
// useful signal to HMAC). If spec intent changes, update here.
|
|
703
|
+
test('empty UA → null', async () => {
|
|
704
|
+
expect(await computeUAHMAC('', TEST_KEY)).toBeNull();
|
|
705
|
+
});
|
|
706
|
+
|
|
707
|
+
test('empty key → null', async () => {
|
|
708
|
+
expect(await computeUAHMAC(TEST_UA, '')).toBeNull();
|
|
709
|
+
});
|
|
710
|
+
|
|
711
|
+
test('very long UA still produces valid HMAC', async () => {
|
|
712
|
+
const longUA = 'Mozilla/5.0 ' + 'X'.repeat(5000);
|
|
713
|
+
const result = await computeUAHMAC(longUA, TEST_KEY);
|
|
714
|
+
expect(result).toMatch(/^:[A-Za-z0-9+/]+=*:$/);
|
|
715
|
+
});
|
|
716
|
+
|
|
717
|
+
test('HMAC length is consistent (44-char base64 = 256-bit digest)', async () => {
|
|
718
|
+
const result = await computeUAHMAC(TEST_UA, TEST_KEY);
|
|
719
|
+
// SHA-256 → 32 bytes → 44 base64 chars, wrapped with ':'
|
|
720
|
+
const inner = result.slice(1, -1); // strip : delimiters
|
|
721
|
+
expect(inner).toHaveLength(44);
|
|
722
|
+
});
|
|
723
|
+
});
|
|
724
|
+
|
|
725
|
+
// ── §6.4 computeConfidenceToken ───────────────────────────────────────────
|
|
726
|
+
|
|
727
|
+
describe('computeConfidenceToken', () => {
|
|
728
|
+
const TEST_UA = 'Mozilla/5.0 Chrome/134.0.0.0';
|
|
729
|
+
const TEST_LANG = 'en-US,en;q=0.9';
|
|
730
|
+
const TEST_CH = '"Google Chrome";v="134"';
|
|
731
|
+
|
|
732
|
+
test('returns 8-char hex string', async () => {
|
|
733
|
+
const ct = await computeConfidenceToken(TEST_UA, TEST_LANG, TEST_CH);
|
|
734
|
+
expect(ct).toMatch(/^[0-9a-f]{8}$/);
|
|
735
|
+
});
|
|
736
|
+
|
|
737
|
+
test('deterministic — same inputs produce same output', async () => {
|
|
738
|
+
const a = await computeConfidenceToken(TEST_UA, TEST_LANG, TEST_CH);
|
|
739
|
+
const b = await computeConfidenceToken(TEST_UA, TEST_LANG, TEST_CH);
|
|
740
|
+
expect(a).toBe(b);
|
|
741
|
+
});
|
|
742
|
+
|
|
743
|
+
test('different UA → different token', async () => {
|
|
744
|
+
const a = await computeConfidenceToken(TEST_UA, TEST_LANG, TEST_CH);
|
|
745
|
+
const b = await computeConfidenceToken('curl/7.88.1', TEST_LANG, TEST_CH);
|
|
746
|
+
expect(a).not.toBe(b);
|
|
747
|
+
});
|
|
748
|
+
|
|
749
|
+
test('different language → different token', async () => {
|
|
750
|
+
const a = await computeConfidenceToken(TEST_UA, 'en-US', TEST_CH);
|
|
751
|
+
const b = await computeConfidenceToken(TEST_UA, 'fr-FR', TEST_CH);
|
|
752
|
+
expect(a).not.toBe(b);
|
|
753
|
+
});
|
|
754
|
+
|
|
755
|
+
test('different CH → different token', async () => {
|
|
756
|
+
const a = await computeConfidenceToken(TEST_UA, TEST_LANG, '"Chrome";v="134"');
|
|
757
|
+
const b = await computeConfidenceToken(TEST_UA, TEST_LANG, '"Chrome";v="120"');
|
|
758
|
+
expect(a).not.toBe(b);
|
|
759
|
+
});
|
|
760
|
+
|
|
761
|
+
test('null inputs treated as empty strings (still produces token)', async () => {
|
|
762
|
+
const ct = await computeConfidenceToken(null, null, null);
|
|
763
|
+
expect(ct).toMatch(/^[0-9a-f]{8}$/);
|
|
764
|
+
});
|
|
765
|
+
|
|
766
|
+
test('partial inputs work (missing lang/ch)', async () => {
|
|
767
|
+
const ct = await computeConfidenceToken(TEST_UA, null, null);
|
|
768
|
+
expect(ct).toMatch(/^[0-9a-f]{8}$/);
|
|
769
|
+
});
|
|
770
|
+
|
|
771
|
+
test('order of concatenation matters (UA+lang+CH ≠ lang+UA+CH)', async () => {
|
|
772
|
+
// Swapping inputs should produce different tokens
|
|
773
|
+
const a = await computeConfidenceToken('A', 'B', 'C');
|
|
774
|
+
const b = await computeConfidenceToken('B', 'A', 'C');
|
|
775
|
+
expect(a).not.toBe(b);
|
|
776
|
+
});
|
|
777
|
+
});
|
|
778
|
+
|
|
779
|
+
// ═══════════════════════════════════════════════════════════════════════════
|
|
780
|
+
// loadVAIMetadata — Dynamic metadata loading (paywalls-site-fc4)
|
|
781
|
+
// ═══════════════════════════════════════════════════════════════════════════
|
|
782
|
+
|
|
783
|
+
/** Simple tracking mock for fetch — records call count and args */
|
|
784
|
+
function mockFetchWith(response) {
|
|
785
|
+
let calls = 0;
|
|
786
|
+
const fn = async (url, opts) => {
|
|
787
|
+
calls++;
|
|
788
|
+
fn._lastUrl = url;
|
|
789
|
+
fn._lastOpts = opts;
|
|
790
|
+
return response;
|
|
791
|
+
};
|
|
792
|
+
fn.callCount = () => calls;
|
|
793
|
+
fn._lastUrl = null;
|
|
794
|
+
fn._lastOpts = null;
|
|
795
|
+
return fn;
|
|
796
|
+
}
|
|
797
|
+
|
|
798
|
+
function mockFetchReject(error) {
|
|
799
|
+
return async () => { throw error; };
|
|
800
|
+
}
|
|
801
|
+
|
|
802
|
+
describe('loadVAIMetadata', () => {
|
|
803
|
+
const originalFetch = globalThis.fetch;
|
|
804
|
+
const originalConsoleError = console.error;
|
|
805
|
+
const cfg = { paywallsAPIHost: 'https://cloud-api.example.com' };
|
|
806
|
+
|
|
807
|
+
afterEach(() => {
|
|
808
|
+
globalThis.fetch = originalFetch;
|
|
809
|
+
console.error = originalConsoleError;
|
|
810
|
+
_resetVAIMetadata();
|
|
811
|
+
});
|
|
812
|
+
|
|
813
|
+
test('updates DC_ASN_SET from fetched metadata', async () => {
|
|
814
|
+
// Before: hardcoded defaults include 16509 (AWS)
|
|
815
|
+
expect(extractNetFeatures(16509)).toBe('asn=cloud');
|
|
816
|
+
expect(extractNetFeatures(99999)).toBe('asn=consumer');
|
|
817
|
+
|
|
818
|
+
// Mock metadata with a custom ASN set
|
|
819
|
+
const mock = mockFetchWith({
|
|
820
|
+
ok: true,
|
|
821
|
+
json: async () => ({
|
|
822
|
+
version: 1,
|
|
823
|
+
dc_asns: [99999], // Custom: only 99999 is cloud
|
|
824
|
+
automation_patterns: ['Puppeteer'],
|
|
825
|
+
headless_patterns: ['HeadlessChrome'],
|
|
826
|
+
bot_patterns: ['Googlebot'],
|
|
827
|
+
}),
|
|
828
|
+
});
|
|
829
|
+
globalThis.fetch = mock;
|
|
830
|
+
|
|
831
|
+
await loadVAIMetadata(cfg);
|
|
832
|
+
|
|
833
|
+
// After: 99999 is now cloud, 16509 is not
|
|
834
|
+
expect(extractNetFeatures(99999)).toBe('asn=cloud');
|
|
835
|
+
expect(extractNetFeatures(16509)).toBe('asn=consumer');
|
|
836
|
+
expect(mock.callCount()).toBe(1);
|
|
837
|
+
expect(mock._lastUrl).toBe('https://cloud-api.example.com/pw/vai/metadata');
|
|
838
|
+
expect(mock._lastOpts.method).toBe('GET');
|
|
839
|
+
});
|
|
840
|
+
|
|
841
|
+
test('updates automation markers from fetched metadata', async () => {
|
|
842
|
+
// Before: hardcoded includes Puppeteer
|
|
843
|
+
const before = extractUAFeatures('Mozilla/5.0 Puppeteer/1.0');
|
|
844
|
+
expect(before).toContain('automation');
|
|
845
|
+
|
|
846
|
+
// Mock with a custom automation list that doesn't include Puppeteer
|
|
847
|
+
globalThis.fetch = mockFetchWith({
|
|
848
|
+
ok: true,
|
|
849
|
+
json: async () => ({
|
|
850
|
+
version: 1,
|
|
851
|
+
dc_asns: [16509],
|
|
852
|
+
automation_patterns: ['CustomBot'],
|
|
853
|
+
headless_patterns: ['HeadlessChrome'],
|
|
854
|
+
bot_patterns: ['Googlebot'],
|
|
855
|
+
}),
|
|
856
|
+
});
|
|
857
|
+
|
|
858
|
+
await loadVAIMetadata(cfg);
|
|
859
|
+
|
|
860
|
+
// Puppeteer no longer matches automation
|
|
861
|
+
const after = extractUAFeatures('Mozilla/5.0 Puppeteer/1.0');
|
|
862
|
+
expect(after).not.toContain('automation');
|
|
863
|
+
|
|
864
|
+
// CustomBot does
|
|
865
|
+
const custom = extractUAFeatures('Mozilla/5.0 CustomBot/2.0');
|
|
866
|
+
expect(custom).toContain('automation');
|
|
867
|
+
});
|
|
868
|
+
|
|
869
|
+
test('updates bot patterns from fetched metadata', async () => {
|
|
870
|
+
// Before: hardcoded includes Googlebot → family=bot
|
|
871
|
+
const before = extractUAFeatures('Googlebot/2.1');
|
|
872
|
+
expect(before).toContain('bot');
|
|
873
|
+
|
|
874
|
+
// Mock with a different bot list
|
|
875
|
+
globalThis.fetch = mockFetchWith({
|
|
876
|
+
ok: true,
|
|
877
|
+
json: async () => ({
|
|
878
|
+
version: 1,
|
|
879
|
+
dc_asns: [16509],
|
|
880
|
+
automation_patterns: ['Puppeteer'],
|
|
881
|
+
headless_patterns: ['HeadlessChrome'],
|
|
882
|
+
bot_patterns: ['NewAIBot'],
|
|
883
|
+
}),
|
|
884
|
+
});
|
|
885
|
+
|
|
886
|
+
await loadVAIMetadata(cfg);
|
|
887
|
+
|
|
888
|
+
// Googlebot no longer detected as bot family
|
|
889
|
+
const afterGoogle = extractUAFeatures('Googlebot/2.1');
|
|
890
|
+
expect(afterGoogle).not.toContain('dpf=server/other/bot');
|
|
891
|
+
|
|
892
|
+
// NewAIBot is now detected
|
|
893
|
+
const afterNew = extractUAFeatures('NewAIBot/1.0');
|
|
894
|
+
expect(afterNew).toContain('bot');
|
|
895
|
+
});
|
|
896
|
+
|
|
897
|
+
test('falls back to defaults when fetch fails (network error)', async () => {
|
|
898
|
+
globalThis.fetch = mockFetchReject(new Error('Network error'));
|
|
899
|
+
console.error = () => {}; // suppress expected error
|
|
900
|
+
|
|
901
|
+
await loadVAIMetadata(cfg);
|
|
902
|
+
|
|
903
|
+
// Defaults still active: 16509 is cloud
|
|
904
|
+
expect(extractNetFeatures(16509)).toBe('asn=cloud');
|
|
905
|
+
// Automation still works
|
|
906
|
+
const ua = extractUAFeatures('Mozilla/5.0 Puppeteer/1.0');
|
|
907
|
+
expect(ua).toContain('automation');
|
|
908
|
+
});
|
|
909
|
+
|
|
910
|
+
test('falls back to defaults when fetch returns non-OK', async () => {
|
|
911
|
+
globalThis.fetch = mockFetchWith({
|
|
912
|
+
ok: false,
|
|
913
|
+
status: 500,
|
|
914
|
+
statusText: 'Internal Server Error',
|
|
915
|
+
});
|
|
916
|
+
console.error = () => {}; // suppress expected error
|
|
917
|
+
|
|
918
|
+
await loadVAIMetadata(cfg);
|
|
919
|
+
|
|
920
|
+
// Defaults still active
|
|
921
|
+
expect(extractNetFeatures(16509)).toBe('asn=cloud');
|
|
922
|
+
});
|
|
923
|
+
|
|
924
|
+
test('falls back to defaults when response has invalid schema', async () => {
|
|
925
|
+
globalThis.fetch = mockFetchWith({
|
|
926
|
+
ok: true,
|
|
927
|
+
json: async () => ({ invalid: true }), // missing version
|
|
928
|
+
});
|
|
929
|
+
console.error = () => {}; // suppress expected error
|
|
930
|
+
|
|
931
|
+
await loadVAIMetadata(cfg);
|
|
932
|
+
|
|
933
|
+
// Defaults still active
|
|
934
|
+
expect(extractNetFeatures(16509)).toBe('asn=cloud');
|
|
935
|
+
});
|
|
936
|
+
|
|
937
|
+
test('caches metadata and does not re-fetch within TTL', async () => {
|
|
938
|
+
const mock = mockFetchWith({
|
|
939
|
+
ok: true,
|
|
940
|
+
json: async () => ({
|
|
941
|
+
version: 1,
|
|
942
|
+
dc_asns: [16509],
|
|
943
|
+
automation_patterns: ['Puppeteer'],
|
|
944
|
+
headless_patterns: ['HeadlessChrome'],
|
|
945
|
+
bot_patterns: ['Googlebot'],
|
|
946
|
+
}),
|
|
947
|
+
});
|
|
948
|
+
globalThis.fetch = mock;
|
|
949
|
+
|
|
950
|
+
await loadVAIMetadata(cfg);
|
|
951
|
+
expect(mock.callCount()).toBe(1);
|
|
952
|
+
|
|
953
|
+
// Second call within 1 hour — should not fetch again
|
|
954
|
+
await loadVAIMetadata(cfg);
|
|
955
|
+
expect(mock.callCount()).toBe(1);
|
|
956
|
+
});
|
|
957
|
+
|
|
958
|
+
test('_resetVAIMetadata restores hardcoded defaults', async () => {
|
|
959
|
+
// Load custom metadata
|
|
960
|
+
globalThis.fetch = mockFetchWith({
|
|
961
|
+
ok: true,
|
|
962
|
+
json: async () => ({
|
|
963
|
+
version: 1,
|
|
964
|
+
dc_asns: [99999],
|
|
965
|
+
automation_patterns: ['OnlyThisOne'],
|
|
966
|
+
headless_patterns: ['OnlyHeadless'],
|
|
967
|
+
bot_patterns: ['OnlyBot'],
|
|
968
|
+
}),
|
|
969
|
+
});
|
|
970
|
+
|
|
971
|
+
await loadVAIMetadata(cfg);
|
|
972
|
+
expect(extractNetFeatures(99999)).toBe('asn=cloud');
|
|
973
|
+
expect(extractNetFeatures(16509)).toBe('asn=consumer');
|
|
974
|
+
|
|
975
|
+
// Reset
|
|
976
|
+
_resetVAIMetadata();
|
|
977
|
+
|
|
978
|
+
// Back to defaults
|
|
979
|
+
expect(extractNetFeatures(16509)).toBe('asn=cloud');
|
|
980
|
+
expect(extractNetFeatures(99999)).toBe('asn=consumer');
|
|
981
|
+
});
|
|
982
|
+
|
|
983
|
+
test('ignores empty arrays in metadata (keeps defaults)', async () => {
|
|
984
|
+
globalThis.fetch = mockFetchWith({
|
|
985
|
+
ok: true,
|
|
986
|
+
json: async () => ({
|
|
987
|
+
version: 1,
|
|
988
|
+
dc_asns: [],
|
|
989
|
+
automation_patterns: [],
|
|
990
|
+
headless_patterns: [],
|
|
991
|
+
bot_patterns: [],
|
|
992
|
+
}),
|
|
993
|
+
});
|
|
994
|
+
|
|
995
|
+
await loadVAIMetadata(cfg);
|
|
996
|
+
|
|
997
|
+
// Defaults still active (empty arrays are ignored)
|
|
998
|
+
expect(extractNetFeatures(16509)).toBe('asn=cloud');
|
|
999
|
+
const ua = extractUAFeatures('Mozilla/5.0 Puppeteer/1.0');
|
|
1000
|
+
expect(ua).toContain('automation');
|
|
1001
|
+
});
|
|
1002
|
+
});
|