@paywalls-net/filter 1.3.8 → 1.3.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/jest.config.js +7 -0
- package/package.json +6 -2
- package/src/index.js +67 -32
- package/src/signal-extraction.js +385 -0
- package/src/user-agent-classification.js +2 -0
- package/tests/proxy-vai-request.test.js +379 -0
- package/tests/signal-extraction.test.js +624 -0
|
@@ -0,0 +1,624 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Unit tests for signal extraction functions (Tier 2 + Tier 3)
|
|
3
|
+
*
|
|
4
|
+
* Spec: specs/vai-privacy-v2.spec.md §6.2–§6.4
|
|
5
|
+
* Issue: paywalls-site-drk
|
|
6
|
+
*/
|
|
7
|
+
import {
|
|
8
|
+
extractAcceptFeatures,
|
|
9
|
+
extractEncodingFeatures,
|
|
10
|
+
extractLanguageFeatures,
|
|
11
|
+
extractNetFeatures,
|
|
12
|
+
extractCHFeatures,
|
|
13
|
+
extractUAFeatures,
|
|
14
|
+
computeUAHMAC,
|
|
15
|
+
computeConfidenceToken,
|
|
16
|
+
} from '../src/signal-extraction.js';
|
|
17
|
+
|
|
18
|
+
// ═══════════════════════════════════════════════════════════════════════════
|
|
19
|
+
// Helpers: SF-Dictionary format validation (RFC 8941)
|
|
20
|
+
// ═══════════════════════════════════════════════════════════════════════════
|
|
21
|
+
|
|
22
|
+
/**
|
|
23
|
+
* Validate that a string is well-formed SF-Dictionary per our protocol.
|
|
24
|
+
*
|
|
25
|
+
* Strict RFC 8941 allows only alpha-starting tokens and plain integers.
|
|
26
|
+
* Our protocol extends this with:
|
|
27
|
+
* - Compound path values: dpf=desktop/mac/chrome
|
|
28
|
+
* - Version ranges: ver=120-139, ver=140+
|
|
29
|
+
*
|
|
30
|
+
* Each member is either a bare key (boolean true) or key=value.
|
|
31
|
+
* - key matches sf-key = lcalpha *( lcalpha / DIGIT / "_" / "-" / "." / "*" )
|
|
32
|
+
* - value is alphanumeric-starting string with path/range chars
|
|
33
|
+
* Members are separated by ", ".
|
|
34
|
+
*/
|
|
35
|
+
function isValidSFDictionary(str) {
|
|
36
|
+
if (typeof str !== 'string' || str.length === 0) return false;
|
|
37
|
+
const members = str.split(', ');
|
|
38
|
+
const keyRe = /^[a-z*][a-z0-9_\-.*]*$/;
|
|
39
|
+
// Extended value: starts with alphanumeric/*, allows tchar + / and digits
|
|
40
|
+
const valRe = /^[A-Za-z0-9*][A-Za-z0-9!#$&'*+.^_|~\/-]*$/;
|
|
41
|
+
for (const m of members) {
|
|
42
|
+
const eq = m.indexOf('=');
|
|
43
|
+
if (eq === -1) {
|
|
44
|
+
// bare key (boolean true)
|
|
45
|
+
if (!keyRe.test(m)) return false;
|
|
46
|
+
} else {
|
|
47
|
+
const key = m.slice(0, eq);
|
|
48
|
+
const val = m.slice(eq + 1);
|
|
49
|
+
if (!keyRe.test(key)) return false;
|
|
50
|
+
if (!valRe.test(val)) return false;
|
|
51
|
+
}
|
|
52
|
+
}
|
|
53
|
+
return true;
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
// ── §6.2.1 extractAcceptFeatures ──────────────────────────────────────────
|
|
57
|
+
|
|
58
|
+
describe('extractAcceptFeatures', () => {
|
|
59
|
+
test('typical browser Accept → html, wildcard', () => {
|
|
60
|
+
expect(extractAcceptFeatures('text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8'))
|
|
61
|
+
.toBe('html, wildcard');
|
|
62
|
+
});
|
|
63
|
+
|
|
64
|
+
test('API client → json', () => {
|
|
65
|
+
expect(extractAcceptFeatures('application/json')).toBe('json');
|
|
66
|
+
});
|
|
67
|
+
|
|
68
|
+
test('curl default → wildcard', () => {
|
|
69
|
+
expect(extractAcceptFeatures('*/*')).toBe('wildcard');
|
|
70
|
+
});
|
|
71
|
+
|
|
72
|
+
test('image request → image', () => {
|
|
73
|
+
expect(extractAcceptFeatures('image/webp,image/apng,image/*,*/*;q=0.8'))
|
|
74
|
+
.toBe('wildcard, image');
|
|
75
|
+
});
|
|
76
|
+
|
|
77
|
+
test('combined html + json + wildcard', () => {
|
|
78
|
+
expect(extractAcceptFeatures('text/html, application/json, */*'))
|
|
79
|
+
.toBe('html, wildcard, json');
|
|
80
|
+
});
|
|
81
|
+
|
|
82
|
+
test('null input → null', () => {
|
|
83
|
+
expect(extractAcceptFeatures(null)).toBeNull();
|
|
84
|
+
});
|
|
85
|
+
|
|
86
|
+
test('undefined input → null', () => {
|
|
87
|
+
expect(extractAcceptFeatures(undefined)).toBeNull();
|
|
88
|
+
});
|
|
89
|
+
|
|
90
|
+
test('empty string → null', () => {
|
|
91
|
+
expect(extractAcceptFeatures('')).toBeNull();
|
|
92
|
+
});
|
|
93
|
+
|
|
94
|
+
test('unrecognized type only → null', () => {
|
|
95
|
+
expect(extractAcceptFeatures('application/xml')).toBeNull();
|
|
96
|
+
});
|
|
97
|
+
|
|
98
|
+
test('output is valid SF-Dictionary', () => {
|
|
99
|
+
const result = extractAcceptFeatures('text/html,application/json,*/*;q=0.8');
|
|
100
|
+
expect(isValidSFDictionary(result)).toBe(true);
|
|
101
|
+
});
|
|
102
|
+
});
|
|
103
|
+
|
|
104
|
+
// ── §6.2.2 extractEncodingFeatures ────────────────────────────────────────
|
|
105
|
+
|
|
106
|
+
describe('extractEncodingFeatures', () => {
|
|
107
|
+
test('modern browser → br, gzip, modern', () => {
|
|
108
|
+
expect(extractEncodingFeatures('gzip, deflate, br, zstd'))
|
|
109
|
+
.toBe('br, gzip, modern');
|
|
110
|
+
});
|
|
111
|
+
|
|
112
|
+
test('gzip only → gzip (no modern)', () => {
|
|
113
|
+
expect(extractEncodingFeatures('gzip, deflate')).toBe('gzip');
|
|
114
|
+
});
|
|
115
|
+
|
|
116
|
+
test('br only → br (no modern)', () => {
|
|
117
|
+
expect(extractEncodingFeatures('br')).toBe('br');
|
|
118
|
+
});
|
|
119
|
+
|
|
120
|
+
test('null → null', () => {
|
|
121
|
+
expect(extractEncodingFeatures(null)).toBeNull();
|
|
122
|
+
});
|
|
123
|
+
|
|
124
|
+
test('empty string → null', () => {
|
|
125
|
+
expect(extractEncodingFeatures('')).toBeNull();
|
|
126
|
+
});
|
|
127
|
+
|
|
128
|
+
test('deflate only (no br/gzip) → null', () => {
|
|
129
|
+
expect(extractEncodingFeatures('deflate')).toBeNull();
|
|
130
|
+
});
|
|
131
|
+
|
|
132
|
+
test('zstd alone (no br/gzip) → null (not yet a tracked feature)', () => {
|
|
133
|
+
expect(extractEncodingFeatures('zstd')).toBeNull();
|
|
134
|
+
});
|
|
135
|
+
|
|
136
|
+
test('output is valid SF-Dictionary', () => {
|
|
137
|
+
const result = extractEncodingFeatures('gzip, deflate, br');
|
|
138
|
+
expect(isValidSFDictionary(result)).toBe(true);
|
|
139
|
+
});
|
|
140
|
+
});
|
|
141
|
+
|
|
142
|
+
// ── §6.2.3 extractLanguageFeatures ────────────────────────────────────────
|
|
143
|
+
|
|
144
|
+
describe('extractLanguageFeatures', () => {
|
|
145
|
+
test('typical browser → present, primary=en, count=3', () => {
|
|
146
|
+
expect(extractLanguageFeatures('en-US,en;q=0.9,fr;q=0.8'))
|
|
147
|
+
.toBe('present, primary=en, count=3');
|
|
148
|
+
});
|
|
149
|
+
|
|
150
|
+
test('single locale → count=1', () => {
|
|
151
|
+
expect(extractLanguageFeatures('ja'))
|
|
152
|
+
.toBe('present, primary=ja, count=1');
|
|
153
|
+
});
|
|
154
|
+
|
|
155
|
+
test('primary with region → primary extracts 2-char family', () => {
|
|
156
|
+
expect(extractLanguageFeatures('fr-FR'))
|
|
157
|
+
.toBe('present, primary=fr, count=1');
|
|
158
|
+
});
|
|
159
|
+
|
|
160
|
+
// NOTE: Test matrix suggests * → present, primary=other, count=1.
|
|
161
|
+
// Current implementation returns null (wildcard is not a useful locale
|
|
162
|
+
// for privacy classification). If spec intent changes, update here.
|
|
163
|
+
test('wildcard only → null (not a real locale)', () => {
|
|
164
|
+
expect(extractLanguageFeatures('*')).toBeNull();
|
|
165
|
+
});
|
|
166
|
+
|
|
167
|
+
test('null → null', () => {
|
|
168
|
+
expect(extractLanguageFeatures(null)).toBeNull();
|
|
169
|
+
});
|
|
170
|
+
|
|
171
|
+
test('empty string → null', () => {
|
|
172
|
+
expect(extractLanguageFeatures('')).toBeNull();
|
|
173
|
+
});
|
|
174
|
+
|
|
175
|
+
test('whitespace only → null', () => {
|
|
176
|
+
expect(extractLanguageFeatures(' ')).toBeNull();
|
|
177
|
+
});
|
|
178
|
+
|
|
179
|
+
test('Chinese locale → primary=zh', () => {
|
|
180
|
+
expect(extractLanguageFeatures('zh-CN,zh;q=0.9,en;q=0.8'))
|
|
181
|
+
.toBe('present, primary=zh, count=3');
|
|
182
|
+
});
|
|
183
|
+
|
|
184
|
+
test('many locales → count reflects total', () => {
|
|
185
|
+
expect(extractLanguageFeatures('en-US,en;q=0.9,fr;q=0.8,de;q=0.7,es;q=0.6,pt;q=0.5'))
|
|
186
|
+
.toBe('present, primary=en, count=6');
|
|
187
|
+
});
|
|
188
|
+
|
|
189
|
+
test('three-letter language code → first 2 chars', () => {
|
|
190
|
+
// "tlh" (Klingon) → primary=tl
|
|
191
|
+
expect(extractLanguageFeatures('tlh')).toBe('present, primary=tl, count=1');
|
|
192
|
+
});
|
|
193
|
+
|
|
194
|
+
test('output is valid SF-Dictionary', () => {
|
|
195
|
+
const result = extractLanguageFeatures('en-US,en;q=0.9');
|
|
196
|
+
expect(isValidSFDictionary(result)).toBe(true);
|
|
197
|
+
});
|
|
198
|
+
});
|
|
199
|
+
|
|
200
|
+
// ── §6.2.4 extractNetFeatures ─────────────────────────────────────────────
|
|
201
|
+
|
|
202
|
+
describe('extractNetFeatures', () => {
|
|
203
|
+
// ── Classification boundary ────────────────────────────────────────────
|
|
204
|
+
test('well-known cloud ASN (AWS) → asn=cloud', () => {
|
|
205
|
+
expect(extractNetFeatures('16509')).toBe('asn=cloud');
|
|
206
|
+
});
|
|
207
|
+
|
|
208
|
+
test('well-known consumer ISP (Comcast) → asn=consumer', () => {
|
|
209
|
+
expect(extractNetFeatures('7922')).toBe('asn=consumer');
|
|
210
|
+
});
|
|
211
|
+
|
|
212
|
+
// ── Input type handling ────────────────────────────────────────────────
|
|
213
|
+
test('numeric input (number, not string) → works', () => {
|
|
214
|
+
expect(extractNetFeatures(16509)).toBe('asn=cloud');
|
|
215
|
+
});
|
|
216
|
+
|
|
217
|
+
test('string number for consumer → works', () => {
|
|
218
|
+
expect(extractNetFeatures('7018')).toBe('asn=consumer');
|
|
219
|
+
});
|
|
220
|
+
|
|
221
|
+
test('null → null', () => {
|
|
222
|
+
expect(extractNetFeatures(null)).toBeNull();
|
|
223
|
+
});
|
|
224
|
+
|
|
225
|
+
test('undefined → null', () => {
|
|
226
|
+
expect(extractNetFeatures(undefined)).toBeNull();
|
|
227
|
+
});
|
|
228
|
+
|
|
229
|
+
test('empty string → null', () => {
|
|
230
|
+
expect(extractNetFeatures('')).toBeNull();
|
|
231
|
+
});
|
|
232
|
+
|
|
233
|
+
// NOTE: Test matrix suggests non-numeric → asn=unknown.
|
|
234
|
+
// Current implementation returns null (omit header) since a
|
|
235
|
+
// non-numeric ASN is a malformed input, not a valid category.
|
|
236
|
+
test('non-numeric string → null', () => {
|
|
237
|
+
expect(extractNetFeatures('abc')).toBeNull();
|
|
238
|
+
});
|
|
239
|
+
|
|
240
|
+
test('zero → asn=consumer (not in cloud set)', () => {
|
|
241
|
+
expect(extractNetFeatures(0)).toBe('asn=consumer');
|
|
242
|
+
});
|
|
243
|
+
|
|
244
|
+
test('negative number → asn=consumer (not in cloud set)', () => {
|
|
245
|
+
expect(extractNetFeatures(-1)).toBe('asn=consumer');
|
|
246
|
+
});
|
|
247
|
+
|
|
248
|
+
test('very large ASN → asn=consumer', () => {
|
|
249
|
+
expect(extractNetFeatures(999999)).toBe('asn=consumer');
|
|
250
|
+
});
|
|
251
|
+
|
|
252
|
+
test('output is valid SF-Dictionary', () => {
|
|
253
|
+
const result = extractNetFeatures('16509');
|
|
254
|
+
expect(isValidSFDictionary(result)).toBe(true);
|
|
255
|
+
});
|
|
256
|
+
});
|
|
257
|
+
|
|
258
|
+
// ── §6.2.5 extractCHFeatures ──────────────────────────────────────────────
|
|
259
|
+
|
|
260
|
+
describe('extractCHFeatures', () => {
|
|
261
|
+
const CHROME_134_CH = '"Google Chrome";v="134", "Chromium";v="134", "Not:A-Brand";v="24"';
|
|
262
|
+
const CHROME_134_UA = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/134.0.0.0 Safari/537.36';
|
|
263
|
+
|
|
264
|
+
test('Chrome 134 with matching UA → present, brands=3, grease, consistent', () => {
|
|
265
|
+
expect(extractCHFeatures(CHROME_134_CH, CHROME_134_UA))
|
|
266
|
+
.toBe('present, brands=3, grease, consistent');
|
|
267
|
+
});
|
|
268
|
+
|
|
269
|
+
test('version mismatch → no consistent', () => {
|
|
270
|
+
const mismatchUA = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) Chrome/120.0.0.0 Safari/537.36';
|
|
271
|
+
expect(extractCHFeatures(CHROME_134_CH, mismatchUA))
|
|
272
|
+
.toBe('present, brands=3, grease');
|
|
273
|
+
});
|
|
274
|
+
|
|
275
|
+
test('no User-Agent → present but no consistent', () => {
|
|
276
|
+
expect(extractCHFeatures(CHROME_134_CH, null))
|
|
277
|
+
.toBe('present, brands=3, grease');
|
|
278
|
+
});
|
|
279
|
+
|
|
280
|
+
test('CH without GREASE brand', () => {
|
|
281
|
+
const noGrease = '"Google Chrome";v="134", "Chromium";v="134"';
|
|
282
|
+
expect(extractCHFeatures(noGrease, CHROME_134_UA))
|
|
283
|
+
.toBe('present, brands=2, consistent');
|
|
284
|
+
});
|
|
285
|
+
|
|
286
|
+
test('null CH → null (Firefox, etc.)', () => {
|
|
287
|
+
expect(extractCHFeatures(null, CHROME_134_UA)).toBeNull();
|
|
288
|
+
});
|
|
289
|
+
|
|
290
|
+
test('empty CH → null', () => {
|
|
291
|
+
expect(extractCHFeatures('', CHROME_134_UA)).toBeNull();
|
|
292
|
+
});
|
|
293
|
+
|
|
294
|
+
test('whitespace-only CH → null', () => {
|
|
295
|
+
expect(extractCHFeatures(' ', CHROME_134_UA)).toBeNull();
|
|
296
|
+
});
|
|
297
|
+
|
|
298
|
+
test('Edge CH (different brand name) with Chromium match', () => {
|
|
299
|
+
const edgeCH = '"Microsoft Edge";v="134", "Chromium";v="134", "Not-A.Brand";v="99"';
|
|
300
|
+
const edgeUA = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 Chrome/134.0.0.0 Safari/537.36 Edg/134.0.0.0';
|
|
301
|
+
expect(extractCHFeatures(edgeCH, edgeUA))
|
|
302
|
+
.toBe('present, brands=3, grease, consistent');
|
|
303
|
+
});
|
|
304
|
+
|
|
305
|
+
test('single brand (Google Chrome only) → brands=1, consistent', () => {
|
|
306
|
+
const singleBrand = '"Google Chrome";v="100"';
|
|
307
|
+
// Google Chrome brand matches CH version extractor, and Chrome/100 in UA matches
|
|
308
|
+
const ua = 'Mozilla/5.0 Chrome/100.0.0.0 Safari/537.36';
|
|
309
|
+
expect(extractCHFeatures(singleBrand, ua)).toBe('present, brands=1, consistent');
|
|
310
|
+
});
|
|
311
|
+
|
|
312
|
+
test('output is valid SF-Dictionary', () => {
|
|
313
|
+
const result = extractCHFeatures(CHROME_134_CH, CHROME_134_UA);
|
|
314
|
+
expect(isValidSFDictionary(result)).toBe(true);
|
|
315
|
+
});
|
|
316
|
+
});
|
|
317
|
+
|
|
318
|
+
// ── §6.3.1 extractUAFeatures ──────────────────────────────────────────────
|
|
319
|
+
|
|
320
|
+
describe('extractUAFeatures', () => {
|
|
321
|
+
const CHROME_MAC = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/134.0.0.0 Safari/537.36';
|
|
322
|
+
const FIREFOX_WIN = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:134.0) Gecko/20100101 Firefox/134.0';
|
|
323
|
+
const SAFARI_IOS = 'Mozilla/5.0 (iPhone; CPU iPhone OS 17_4 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.4 Mobile/15E148 Safari/604.1';
|
|
324
|
+
const EDGE_WIN = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/134.0.0.0 Safari/537.36 Edg/134.0.0.0';
|
|
325
|
+
const CURL = 'curl/7.88.1';
|
|
326
|
+
const HEADLESS_CHROME = 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) HeadlessChrome/134.0.0.0 Safari/537.36';
|
|
327
|
+
const PUPPETEER = 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) HeadlessChrome/134.0.0.0 Safari/537.36 Puppeteer';
|
|
328
|
+
const GOOGLEBOT = 'Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)';
|
|
329
|
+
const IPAD = 'Mozilla/5.0 (iPad; CPU OS 17_4 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.4 Mobile/15E148 Safari/604.1';
|
|
330
|
+
const ANDROID_TABLET = 'Mozilla/5.0 (Linux; Android 13; SM-X200) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36';
|
|
331
|
+
const ANDROID_PHONE = 'Mozilla/5.0 (Linux; Android 14; Pixel 8) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Mobile Safari/537.36';
|
|
332
|
+
const SAFARI_MAC = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.4 Safari/605.1.15';
|
|
333
|
+
|
|
334
|
+
// ── dpf compound token ─────────────────────────────────────────────────
|
|
335
|
+
test('Chrome on Mac → desktop/mac/chrome', () => {
|
|
336
|
+
expect(extractUAFeatures(CHROME_MAC)).toMatch(/^dpf=desktop\/mac\/chrome/);
|
|
337
|
+
});
|
|
338
|
+
|
|
339
|
+
test('Firefox on Windows → desktop/windows/firefox', () => {
|
|
340
|
+
expect(extractUAFeatures(FIREFOX_WIN)).toMatch(/^dpf=desktop\/windows\/firefox/);
|
|
341
|
+
});
|
|
342
|
+
|
|
343
|
+
test('Safari on iPhone → mobile/ios/safari', () => {
|
|
344
|
+
expect(extractUAFeatures(SAFARI_IOS)).toMatch(/^dpf=mobile\/ios\/safari/);
|
|
345
|
+
});
|
|
346
|
+
|
|
347
|
+
test('Edge on Windows → desktop/windows/edge', () => {
|
|
348
|
+
expect(extractUAFeatures(EDGE_WIN)).toMatch(/^dpf=desktop\/windows\/edge/);
|
|
349
|
+
});
|
|
350
|
+
|
|
351
|
+
test('curl → unknown/other/other', () => {
|
|
352
|
+
expect(extractUAFeatures(CURL)).toMatch(/^dpf=unknown\/other\/other/);
|
|
353
|
+
});
|
|
354
|
+
|
|
355
|
+
test('HeadlessChrome → desktop/linux/chrome', () => {
|
|
356
|
+
expect(extractUAFeatures(HEADLESS_CHROME)).toMatch(/^dpf=desktop\/linux\/chrome/);
|
|
357
|
+
});
|
|
358
|
+
|
|
359
|
+
test('Googlebot → server/other/bot', () => {
|
|
360
|
+
expect(extractUAFeatures(GOOGLEBOT)).toMatch(/dpf=server\/other\/bot/);
|
|
361
|
+
});
|
|
362
|
+
|
|
363
|
+
test('iPad → tablet/ios/safari', () => {
|
|
364
|
+
expect(extractUAFeatures(IPAD)).toMatch(/^dpf=tablet\/ios\/safari/);
|
|
365
|
+
});
|
|
366
|
+
|
|
367
|
+
test('Android tablet (no "Mobile") → tablet/android/chrome', () => {
|
|
368
|
+
expect(extractUAFeatures(ANDROID_TABLET)).toMatch(/^dpf=tablet\/android\/chrome/);
|
|
369
|
+
});
|
|
370
|
+
|
|
371
|
+
test('Android phone → mobile/android/chrome', () => {
|
|
372
|
+
expect(extractUAFeatures(ANDROID_PHONE)).toMatch(/^dpf=mobile\/android\/chrome/);
|
|
373
|
+
});
|
|
374
|
+
|
|
375
|
+
test('Safari on Mac → desktop/mac/safari', () => {
|
|
376
|
+
expect(extractUAFeatures(SAFARI_MAC)).toMatch(/^dpf=desktop\/mac\/safari/);
|
|
377
|
+
});
|
|
378
|
+
|
|
379
|
+
// ── version bucketing ──────────────────────────────────────────────────
|
|
380
|
+
test('Chrome 134 → ver=120-139', () => {
|
|
381
|
+
expect(extractUAFeatures(CHROME_MAC)).toMatch(/ver=120-139/);
|
|
382
|
+
});
|
|
383
|
+
|
|
384
|
+
test('curl/7.88.1 → ver=0-79', () => {
|
|
385
|
+
expect(extractUAFeatures(CURL)).toMatch(/ver=0-79/);
|
|
386
|
+
});
|
|
387
|
+
|
|
388
|
+
// Bucket boundary tests: verify version numbers at edges of each range
|
|
389
|
+
test.each([
|
|
390
|
+
['Chrome/79.0.0.0', '0-79'],
|
|
391
|
+
['Chrome/80.0.0.0', '80-99'],
|
|
392
|
+
['Chrome/99.0.0.0', '80-99'],
|
|
393
|
+
['Chrome/100.0.0.0', '100-119'],
|
|
394
|
+
['Chrome/119.0.0.0', '100-119'],
|
|
395
|
+
['Chrome/120.0.0.0', '120-139'],
|
|
396
|
+
['Chrome/139.0.0.0', '120-139'],
|
|
397
|
+
['Chrome/140.0.0.0', '140+'],
|
|
398
|
+
['Chrome/999.0.0.0', '140+'],
|
|
399
|
+
])('version bucket boundary: %s → ver=%s', (chromeToken, expected) => {
|
|
400
|
+
// Wrap in a minimal browser-like UA so detectDevice/detectPlatform work
|
|
401
|
+
const ua = `Mozilla/5.0 (X11; Linux x86_64) ${chromeToken} Safari/537.36`;
|
|
402
|
+
expect(extractUAFeatures(ua)).toMatch(new RegExp(`ver=${expected.replace('+', '\\+')}`));
|
|
403
|
+
});
|
|
404
|
+
|
|
405
|
+
// ── browser flag ───────────────────────────────────────────────────────
|
|
406
|
+
test('Mozilla/ prefix → browser flag present', () => {
|
|
407
|
+
expect(extractUAFeatures(CHROME_MAC)).toMatch(/\bbrowser\b/);
|
|
408
|
+
});
|
|
409
|
+
|
|
410
|
+
test('curl → no browser flag', () => {
|
|
411
|
+
expect(extractUAFeatures(CURL)).not.toMatch(/\bbrowser\b/);
|
|
412
|
+
});
|
|
413
|
+
|
|
414
|
+
// ── headless / automation ──────────────────────────────────────────────
|
|
415
|
+
test('HeadlessChrome → headless flag', () => {
|
|
416
|
+
expect(extractUAFeatures(HEADLESS_CHROME)).toMatch(/\bheadless\b/);
|
|
417
|
+
});
|
|
418
|
+
|
|
419
|
+
test('HeadlessChrome → no automation flag (headless only)', () => {
|
|
420
|
+
expect(extractUAFeatures(HEADLESS_CHROME)).not.toMatch(/\bautomation\b/);
|
|
421
|
+
});
|
|
422
|
+
|
|
423
|
+
test('Puppeteer → both headless and automation', () => {
|
|
424
|
+
const result = extractUAFeatures(PUPPETEER);
|
|
425
|
+
expect(result).toMatch(/\bheadless\b/);
|
|
426
|
+
expect(result).toMatch(/\bautomation\b/);
|
|
427
|
+
});
|
|
428
|
+
|
|
429
|
+
test('python-requests → automation', () => {
|
|
430
|
+
expect(extractUAFeatures('python-requests/2.31.0')).toMatch(/\bautomation\b/);
|
|
431
|
+
});
|
|
432
|
+
|
|
433
|
+
test('Selenium → automation', () => {
|
|
434
|
+
expect(extractUAFeatures('Mozilla/5.0 Selenium/4.0')).toMatch(/\bautomation\b/);
|
|
435
|
+
});
|
|
436
|
+
|
|
437
|
+
test('wget → automation', () => {
|
|
438
|
+
expect(extractUAFeatures('wget/1.21.4')).toMatch(/\bautomation\b/);
|
|
439
|
+
});
|
|
440
|
+
|
|
441
|
+
test('normal Chrome → no headless or automation', () => {
|
|
442
|
+
const result = extractUAFeatures(CHROME_MAC);
|
|
443
|
+
expect(result).not.toMatch(/\bheadless\b/);
|
|
444
|
+
expect(result).not.toMatch(/\bautomation\b/);
|
|
445
|
+
});
|
|
446
|
+
|
|
447
|
+
// ── entropy ────────────────────────────────────────────────────────────
|
|
448
|
+
test('normal browser UA → entropy=medium', () => {
|
|
449
|
+
expect(extractUAFeatures(CHROME_MAC)).toMatch(/entropy=medium/);
|
|
450
|
+
});
|
|
451
|
+
|
|
452
|
+
test('curl (short) → entropy=low', () => {
|
|
453
|
+
expect(extractUAFeatures(CURL)).toMatch(/entropy=low/);
|
|
454
|
+
});
|
|
455
|
+
|
|
456
|
+
test('very long UA (>300 chars) → entropy=low', () => {
|
|
457
|
+
const longUA = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) ' + 'A'.repeat(300);
|
|
458
|
+
expect(extractUAFeatures(longUA)).toMatch(/entropy=low/);
|
|
459
|
+
});
|
|
460
|
+
|
|
461
|
+
test('minimal short string → entropy=low', () => {
|
|
462
|
+
expect(extractUAFeatures('bot')).toMatch(/entropy=low/);
|
|
463
|
+
});
|
|
464
|
+
|
|
465
|
+
// ── null/empty ─────────────────────────────────────────────────────────
|
|
466
|
+
test('null → null', () => {
|
|
467
|
+
expect(extractUAFeatures(null)).toBeNull();
|
|
468
|
+
});
|
|
469
|
+
|
|
470
|
+
test('empty string → null', () => {
|
|
471
|
+
expect(extractUAFeatures('')).toBeNull();
|
|
472
|
+
});
|
|
473
|
+
|
|
474
|
+
test('whitespace only → null', () => {
|
|
475
|
+
expect(extractUAFeatures(' ')).toBeNull();
|
|
476
|
+
});
|
|
477
|
+
|
|
478
|
+
// ── spec Appendix B examples (exact output match) ──────────────────────
|
|
479
|
+
test('spec example: Chrome on Mac', () => {
|
|
480
|
+
expect(extractUAFeatures(CHROME_MAC))
|
|
481
|
+
.toBe('dpf=desktop/mac/chrome, ver=120-139, browser, entropy=medium');
|
|
482
|
+
});
|
|
483
|
+
|
|
484
|
+
test('spec example: HeadlessChrome', () => {
|
|
485
|
+
expect(extractUAFeatures(HEADLESS_CHROME))
|
|
486
|
+
.toBe('dpf=desktop/linux/chrome, ver=120-139, browser, headless, entropy=medium');
|
|
487
|
+
});
|
|
488
|
+
|
|
489
|
+
test('spec example: Puppeteer', () => {
|
|
490
|
+
expect(extractUAFeatures(PUPPETEER))
|
|
491
|
+
.toBe('dpf=desktop/linux/chrome, ver=120-139, browser, headless, automation, entropy=medium');
|
|
492
|
+
});
|
|
493
|
+
|
|
494
|
+
test('output is valid SF-Dictionary', () => {
|
|
495
|
+
const result = extractUAFeatures(CHROME_MAC);
|
|
496
|
+
expect(isValidSFDictionary(result)).toBe(true);
|
|
497
|
+
});
|
|
498
|
+
|
|
499
|
+
test('all dpf compound values use slash separators (no spaces)', () => {
|
|
500
|
+
const uas = [CHROME_MAC, FIREFOX_WIN, SAFARI_IOS, EDGE_WIN, CURL, HEADLESS_CHROME, GOOGLEBOT, IPAD, ANDROID_TABLET];
|
|
501
|
+
for (const ua of uas) {
|
|
502
|
+
const result = extractUAFeatures(ua);
|
|
503
|
+
const dpf = result.match(/^dpf=([^,]+)/)[1];
|
|
504
|
+
const segments = dpf.split('/');
|
|
505
|
+
expect(segments).toHaveLength(3);
|
|
506
|
+
}
|
|
507
|
+
});
|
|
508
|
+
});
|
|
509
|
+
|
|
510
|
+
// ── §6.3.2 computeUAHMAC ──────────────────────────────────────────────────
|
|
511
|
+
|
|
512
|
+
describe('computeUAHMAC', () => {
|
|
513
|
+
const TEST_UA = 'Mozilla/5.0 Chrome/134.0.0.0';
|
|
514
|
+
const TEST_KEY = 'test-hmac-secret-key';
|
|
515
|
+
|
|
516
|
+
test('returns RFC 8941 Byte Sequence format (:base64:)', async () => {
|
|
517
|
+
const result = await computeUAHMAC(TEST_UA, TEST_KEY);
|
|
518
|
+
expect(result).toMatch(/^:[A-Za-z0-9+/]+=*:$/);
|
|
519
|
+
});
|
|
520
|
+
|
|
521
|
+
test('deterministic — same input produces same output', async () => {
|
|
522
|
+
const a = await computeUAHMAC(TEST_UA, TEST_KEY);
|
|
523
|
+
const b = await computeUAHMAC(TEST_UA, TEST_KEY);
|
|
524
|
+
expect(a).toBe(b);
|
|
525
|
+
});
|
|
526
|
+
|
|
527
|
+
test('different UA → different HMAC', async () => {
|
|
528
|
+
const a = await computeUAHMAC(TEST_UA, TEST_KEY);
|
|
529
|
+
const b = await computeUAHMAC('curl/7.88.1', TEST_KEY);
|
|
530
|
+
expect(a).not.toBe(b);
|
|
531
|
+
});
|
|
532
|
+
|
|
533
|
+
test('different key → different HMAC', async () => {
|
|
534
|
+
const a = await computeUAHMAC(TEST_UA, TEST_KEY);
|
|
535
|
+
const b = await computeUAHMAC(TEST_UA, 'different-key');
|
|
536
|
+
expect(a).not.toBe(b);
|
|
537
|
+
});
|
|
538
|
+
|
|
539
|
+
test('null UA → null', async () => {
|
|
540
|
+
expect(await computeUAHMAC(null, TEST_KEY)).toBeNull();
|
|
541
|
+
});
|
|
542
|
+
|
|
543
|
+
test('null key → null', async () => {
|
|
544
|
+
expect(await computeUAHMAC(TEST_UA, null)).toBeNull();
|
|
545
|
+
});
|
|
546
|
+
|
|
547
|
+
// NOTE: Test matrix suggests empty UA should still produce HMAC.
|
|
548
|
+
// Current implementation returns null (empty string is falsy, no
|
|
549
|
+
// useful signal to HMAC). If spec intent changes, update here.
|
|
550
|
+
test('empty UA → null', async () => {
|
|
551
|
+
expect(await computeUAHMAC('', TEST_KEY)).toBeNull();
|
|
552
|
+
});
|
|
553
|
+
|
|
554
|
+
test('empty key → null', async () => {
|
|
555
|
+
expect(await computeUAHMAC(TEST_UA, '')).toBeNull();
|
|
556
|
+
});
|
|
557
|
+
|
|
558
|
+
test('very long UA still produces valid HMAC', async () => {
|
|
559
|
+
const longUA = 'Mozilla/5.0 ' + 'X'.repeat(5000);
|
|
560
|
+
const result = await computeUAHMAC(longUA, TEST_KEY);
|
|
561
|
+
expect(result).toMatch(/^:[A-Za-z0-9+/]+=*:$/);
|
|
562
|
+
});
|
|
563
|
+
|
|
564
|
+
test('HMAC length is consistent (44-char base64 = 256-bit digest)', async () => {
|
|
565
|
+
const result = await computeUAHMAC(TEST_UA, TEST_KEY);
|
|
566
|
+
// SHA-256 → 32 bytes → 44 base64 chars, wrapped with ':'
|
|
567
|
+
const inner = result.slice(1, -1); // strip : delimiters
|
|
568
|
+
expect(inner).toHaveLength(44);
|
|
569
|
+
});
|
|
570
|
+
});
|
|
571
|
+
|
|
572
|
+
// ── §6.4 computeConfidenceToken ───────────────────────────────────────────
|
|
573
|
+
|
|
574
|
+
describe('computeConfidenceToken', () => {
|
|
575
|
+
const TEST_UA = 'Mozilla/5.0 Chrome/134.0.0.0';
|
|
576
|
+
const TEST_LANG = 'en-US,en;q=0.9';
|
|
577
|
+
const TEST_CH = '"Google Chrome";v="134"';
|
|
578
|
+
|
|
579
|
+
test('returns 8-char hex string', async () => {
|
|
580
|
+
const ct = await computeConfidenceToken(TEST_UA, TEST_LANG, TEST_CH);
|
|
581
|
+
expect(ct).toMatch(/^[0-9a-f]{8}$/);
|
|
582
|
+
});
|
|
583
|
+
|
|
584
|
+
test('deterministic — same inputs produce same output', async () => {
|
|
585
|
+
const a = await computeConfidenceToken(TEST_UA, TEST_LANG, TEST_CH);
|
|
586
|
+
const b = await computeConfidenceToken(TEST_UA, TEST_LANG, TEST_CH);
|
|
587
|
+
expect(a).toBe(b);
|
|
588
|
+
});
|
|
589
|
+
|
|
590
|
+
test('different UA → different token', async () => {
|
|
591
|
+
const a = await computeConfidenceToken(TEST_UA, TEST_LANG, TEST_CH);
|
|
592
|
+
const b = await computeConfidenceToken('curl/7.88.1', TEST_LANG, TEST_CH);
|
|
593
|
+
expect(a).not.toBe(b);
|
|
594
|
+
});
|
|
595
|
+
|
|
596
|
+
test('different language → different token', async () => {
|
|
597
|
+
const a = await computeConfidenceToken(TEST_UA, 'en-US', TEST_CH);
|
|
598
|
+
const b = await computeConfidenceToken(TEST_UA, 'fr-FR', TEST_CH);
|
|
599
|
+
expect(a).not.toBe(b);
|
|
600
|
+
});
|
|
601
|
+
|
|
602
|
+
test('different CH → different token', async () => {
|
|
603
|
+
const a = await computeConfidenceToken(TEST_UA, TEST_LANG, '"Chrome";v="134"');
|
|
604
|
+
const b = await computeConfidenceToken(TEST_UA, TEST_LANG, '"Chrome";v="120"');
|
|
605
|
+
expect(a).not.toBe(b);
|
|
606
|
+
});
|
|
607
|
+
|
|
608
|
+
test('null inputs treated as empty strings (still produces token)', async () => {
|
|
609
|
+
const ct = await computeConfidenceToken(null, null, null);
|
|
610
|
+
expect(ct).toMatch(/^[0-9a-f]{8}$/);
|
|
611
|
+
});
|
|
612
|
+
|
|
613
|
+
test('partial inputs work (missing lang/ch)', async () => {
|
|
614
|
+
const ct = await computeConfidenceToken(TEST_UA, null, null);
|
|
615
|
+
expect(ct).toMatch(/^[0-9a-f]{8}$/);
|
|
616
|
+
});
|
|
617
|
+
|
|
618
|
+
test('order of concatenation matters (UA+lang+CH ≠ lang+UA+CH)', async () => {
|
|
619
|
+
// Swapping inputs should produce different tokens
|
|
620
|
+
const a = await computeConfidenceToken('A', 'B', 'C');
|
|
621
|
+
const b = await computeConfidenceToken('B', 'A', 'C');
|
|
622
|
+
expect(a).not.toBe(b);
|
|
623
|
+
});
|
|
624
|
+
});
|