@paywalls-net/filter 1.3.12 → 1.3.14
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/index.js +12 -1
- package/src/signal-extraction.js +50 -0
- package/tests/bot-signal-extraction.test.js +263 -0
- package/tests/proxy-vai-request.test.js +50 -0
package/package.json
CHANGED
package/src/index.js
CHANGED
|
@@ -185,7 +185,18 @@ async function proxyVAIRequest(cfg, request) {
|
|
|
185
185
|
cf.country ? `co=${cf.country}, re=${cf.region || ''}, ci=${cf.city || ''}, asn=${cf.asn || ''}` : null);
|
|
186
186
|
|
|
187
187
|
// Tier 3: UA features + HMAC (§6.3)
|
|
188
|
-
|
|
188
|
+
let uaFeatures = extractUAFeatures(headers['user-agent']);
|
|
189
|
+
|
|
190
|
+
// Sec-Fetch context mismatch detection (paywalls-site-dz23):
|
|
191
|
+
// vai.json is fetched via sync XHR — browsers set Sec-Fetch-Dest: empty,
|
|
192
|
+
// Mode: cors. If we see document/navigate, the headless browser is leaking
|
|
193
|
+
// page-level Sec-Fetch onto the XHR. Emit marker so cloud-api can classify.
|
|
194
|
+
if (uaFeatures && cloudApiPath.endsWith('/vai.json') &&
|
|
195
|
+
headers['sec-fetch-dest'] === 'document' && headers['sec-fetch-mode'] === 'navigate') {
|
|
196
|
+
uaFeatures += ', sec-fetch-mismatch';
|
|
197
|
+
}
|
|
198
|
+
|
|
199
|
+
setIfPresent(forwardHeaders, 'X-PW-UA', uaFeatures);
|
|
189
200
|
setIfPresent(forwardHeaders, 'X-PW-UA-HMAC', await computeUAHMAC(headers['user-agent'], cfg.vaiUAHmacKey));
|
|
190
201
|
setIfPresent(forwardHeaders, 'X-PW-CT-FP', await computeConfidenceToken(headers['user-agent'], headers['accept-language'], headers['sec-ch-ua']));
|
|
191
202
|
|
package/src/signal-extraction.js
CHANGED
|
@@ -446,6 +446,50 @@ function bucketVersion(ver) {
|
|
|
446
446
|
}
|
|
447
447
|
|
|
448
448
|
// ── §6.3.1 extractUAFeatures ──────────────────────────────────────────────
|
|
449
|
+
|
|
450
|
+
/**
|
|
451
|
+
* Detect structurally impossible or fabricated browser version strings.
|
|
452
|
+
*
|
|
453
|
+
* Chrome frozen UA policy (since Chrome 107, late 2022):
|
|
454
|
+
* Real Chrome reports Chrome/[major].0.0.0 — minor, build, and patch are
|
|
455
|
+
* always zero. Any major >= 107 with non-zero build or patch is fabricated.
|
|
456
|
+
*
|
|
457
|
+
* Legacy Chrome with 4-digit patch (e.g. Chrome/48.0.1025.1402):
|
|
458
|
+
* Chrome patch numbers are 1-4 digits (max ~6367 in historical builds).
|
|
459
|
+
* A 4+ digit patch on an old Chrome version is structurally fabricated.
|
|
460
|
+
*
|
|
461
|
+
* Fabricated Edge (e.g. Edge/18.19582):
|
|
462
|
+
* Edge/18 was EdgeHTML-era; real minor versions were at most 3 digits.
|
|
463
|
+
* A 5-digit minor on EdgeHTML is structurally impossible.
|
|
464
|
+
*
|
|
465
|
+
* @param {string} ua
|
|
466
|
+
* @returns {boolean}
|
|
467
|
+
*/
|
|
468
|
+
function isFabricatedVersion(ua) {
|
|
469
|
+
// Chrome / HeadlessChrome / Chromium: full version parse
|
|
470
|
+
const chromeMatch = ua.match(/(?:\b|Headless)Chrom(?:e|ium)\/(\d+)\.(\d+)\.(\d+)\.(\d+)/);
|
|
471
|
+
if (chromeMatch) {
|
|
472
|
+
const major = parseInt(chromeMatch[1], 10);
|
|
473
|
+
const build = parseInt(chromeMatch[3], 10);
|
|
474
|
+
const patch = parseInt(chromeMatch[4], 10);
|
|
475
|
+
|
|
476
|
+
// Frozen UA policy: Chrome >= 107 must be major.0.0.0
|
|
477
|
+
if (major >= 107 && (build !== 0 || patch !== 0)) return true;
|
|
478
|
+
|
|
479
|
+
// 4-digit patch on any Chrome version is structurally impossible
|
|
480
|
+
if (chromeMatch[4].length >= 4) return true;
|
|
481
|
+
}
|
|
482
|
+
|
|
483
|
+
// EdgeHTML-era (Edge/12-18): minor version should be ≤ 3 digits
|
|
484
|
+
const edgeMatch = ua.match(/\bEdge\/(\d+)\.(\d+)/);
|
|
485
|
+
if (edgeMatch) {
|
|
486
|
+
const major = parseInt(edgeMatch[1], 10);
|
|
487
|
+
if (major <= 18 && edgeMatch[2].length >= 5) return true;
|
|
488
|
+
}
|
|
489
|
+
|
|
490
|
+
return false;
|
|
491
|
+
}
|
|
492
|
+
|
|
449
493
|
/**
|
|
450
494
|
* Parse a User-Agent string into an SF-Dictionary of derived features.
|
|
451
495
|
*
|
|
@@ -468,6 +512,12 @@ export function extractUAFeatures(userAgent) {
|
|
|
468
512
|
|
|
469
513
|
if (HEADLESS_MARKERS.some(re => re.test(ua))) parts.push('headless');
|
|
470
514
|
if (AUTOMATION_MARKERS.some(re => re.test(ua))) parts.push('automation');
|
|
515
|
+
if (isFabricatedVersion(ua)) parts.push('fabricated');
|
|
516
|
+
|
|
517
|
+
// Stale version: Chrome family with ver=0-79 is 6+ years old (pre-2020)
|
|
518
|
+
if (family === 'chrome' && extractMajorVersion(ua) !== null && extractMajorVersion(ua) < 80) {
|
|
519
|
+
parts.push('stale');
|
|
520
|
+
}
|
|
471
521
|
|
|
472
522
|
parts.push(`entropy=${computeUAEntropy(ua)}`);
|
|
473
523
|
|
|
@@ -0,0 +1,263 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Unit tests for bot/automation signal extraction via extractUAFeatures
|
|
3
|
+
*
|
|
4
|
+
* Tests the Tier 3 UA feature extractor against real-world bot signals
|
|
5
|
+
* discovered from Cloudflare production logs (2026-03-14).
|
|
6
|
+
*
|
|
7
|
+
* Fixture: paywalls-site/tests/fixtures/cloudflare-prod-paywalls-2026-03-14.csv
|
|
8
|
+
* Issue: (to be assigned)
|
|
9
|
+
*
|
|
10
|
+
* These tests verify that extractUAFeatures correctly identifies:
|
|
11
|
+
* - headless markers (HeadlessChrome)
|
|
12
|
+
* - automation markers (Puppeteer, Selenium, etc.)
|
|
13
|
+
* - bot family detection (Googlebot, Applebot, Bytespider, etc.)
|
|
14
|
+
* - device/platform/family parsing for suspicious UAs
|
|
15
|
+
* - fabricated version patterns
|
|
16
|
+
*/
|
|
17
|
+
import {
|
|
18
|
+
extractUAFeatures,
|
|
19
|
+
_resetVAIMetadata,
|
|
20
|
+
} from '../src/signal-extraction.js';
|
|
21
|
+
|
|
22
|
+
beforeEach(() => {
|
|
23
|
+
_resetVAIMetadata();
|
|
24
|
+
});
|
|
25
|
+
|
|
26
|
+
// ── 1. HeadlessChrome detection ────────────────────────────────────────────
|
|
27
|
+
|
|
28
|
+
describe('HeadlessChrome signal extraction', () => {
|
|
29
|
+
test('HeadlessChrome/145 should have headless marker', () => {
|
|
30
|
+
const result = extractUAFeatures(
|
|
31
|
+
'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) HeadlessChrome/145.0.0.0 Safari/537.36'
|
|
32
|
+
);
|
|
33
|
+
expect(result).toMatch(/\bheadless\b/);
|
|
34
|
+
expect(result).toMatch(/dpf=desktop\/linux\/chrome/);
|
|
35
|
+
expect(result).toMatch(/browser/);
|
|
36
|
+
});
|
|
37
|
+
|
|
38
|
+
test('HeadlessChrome/143 should have headless and fabricated markers', () => {
|
|
39
|
+
const result = extractUAFeatures(
|
|
40
|
+
'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) HeadlessChrome/143.0.7499.4 Safari/537.36'
|
|
41
|
+
);
|
|
42
|
+
expect(result).toMatch(/\bheadless\b/);
|
|
43
|
+
expect(result).toMatch(/dpf=desktop\/linux\/chrome/);
|
|
44
|
+
expect(result).toMatch(/\bfabricated\b/);
|
|
45
|
+
});
|
|
46
|
+
});
|
|
47
|
+
|
|
48
|
+
// ── 2. Self-identified bots ────────────────────────────────────────────────
|
|
49
|
+
|
|
50
|
+
describe('Self-identified bot signal extraction', () => {
|
|
51
|
+
test('Applebot should be detected as bot family', () => {
|
|
52
|
+
const result = extractUAFeatures(
|
|
53
|
+
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.4 Safari/605.1.15 (Applebot/0.1; +http://www.apple.com/go/applebot)'
|
|
54
|
+
);
|
|
55
|
+
expect(result).toMatch(/\/bot/);
|
|
56
|
+
});
|
|
57
|
+
|
|
58
|
+
test('Googlebot mobile should be detected as bot family', () => {
|
|
59
|
+
const result = extractUAFeatures(
|
|
60
|
+
'Mozilla/5.0 (Linux; Android 6.0.1; Nexus 5X Build/MMB29P) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/145.0.7632.116 Mobile Safari/537.36 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)'
|
|
61
|
+
);
|
|
62
|
+
expect(result).toMatch(/\/bot/);
|
|
63
|
+
});
|
|
64
|
+
|
|
65
|
+
test('Googlebot desktop should be detected as bot family', () => {
|
|
66
|
+
const result = extractUAFeatures(
|
|
67
|
+
'Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko; compatible; Googlebot/2.1; +http://www.google.com/bot.html) Chrome/145.0.7632.116 Safari/537.36'
|
|
68
|
+
);
|
|
69
|
+
expect(result).toMatch(/\/bot/);
|
|
70
|
+
});
|
|
71
|
+
|
|
72
|
+
test('Bytespider should be detected as bot family', () => {
|
|
73
|
+
const result = extractUAFeatures(
|
|
74
|
+
'Mozilla/5.0 (Linux; Android 5.0) AppleWebKit/537.36 (KHTML, like Gecko) Mobile Safari/537.36 (compatible; Bytespider; https://zhanzhang.toutiao.com/)'
|
|
75
|
+
);
|
|
76
|
+
expect(result).toMatch(/\/bot/);
|
|
77
|
+
});
|
|
78
|
+
|
|
79
|
+
test('amazon-Quick non-browser UA should have low entropy', () => {
|
|
80
|
+
const result = extractUAFeatures('amazon-Quick-on-behalf-of-20e61c5a');
|
|
81
|
+
expect(result).not.toMatch(/browser/);
|
|
82
|
+
expect(result).toMatch(/entropy=low/);
|
|
83
|
+
});
|
|
84
|
+
});
|
|
85
|
+
|
|
86
|
+
// ── 3. Fabricated Chrome versions (4-digit patch) ──────────────────────────
|
|
87
|
+
|
|
88
|
+
describe('Fabricated Chrome version UAs — signal extraction', () => {
|
|
89
|
+
// These have impossible 4-digit patch numbers. extractUAFeatures doesn't
|
|
90
|
+
// currently detect version fabrication, but it should at minimum:
|
|
91
|
+
// - Parse the very old major version into the low bucket (0-79)
|
|
92
|
+
// - Have medium entropy (looks like a browser UA)
|
|
93
|
+
|
|
94
|
+
const fabricatedUAs = [
|
|
95
|
+
{ ua: 'Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/48.0.1025.1402 Mobile Safari/537.36', ver: '0-79' },
|
|
96
|
+
{ ua: 'Mozilla/5.0 (Linux; Android 5.0; SM-G900P Build/LRX21T) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/46.0.5596.1136 Mobile Safari/537.36', ver: '0-79' },
|
|
97
|
+
{ ua: 'Mozilla/5.0 (iPhone; CPU iPhone OS 11_0 like Mac OS X) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2714.1709 Mobile Safari/537.36', ver: '0-79' },
|
|
98
|
+
{ ua: 'Mozilla/5.0 (Linux; Android 8.0; Pixel 2 Build/OPD3.170816.012) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/44.0.5974.1013 Mobile Safari/537.36', ver: '0-79' },
|
|
99
|
+
{ ua: 'Mozilla/5.0 (Linux; Android 5.0; SM-G900P Build/LRX21T) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/53.0.1957.1646 Mobile Safari/537.36', ver: '0-79' },
|
|
100
|
+
{ ua: 'Mozilla/5.0 (iPhone; CPU iPhone OS 11_0 like Mac OS X) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/43.0.4130.1795 Mobile Safari/537.36', ver: '0-79' },
|
|
101
|
+
{ ua: 'Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.7842.1119 Mobile Safari/537.36', ver: '0-79' },
|
|
102
|
+
];
|
|
103
|
+
|
|
104
|
+
test.each(fabricatedUAs)('$ver Chrome with 4-digit patch → fabricated marker', ({ ua, ver }) => {
|
|
105
|
+
const result = extractUAFeatures(ua);
|
|
106
|
+
expect(result).toMatch(new RegExp(`ver=${ver}`));
|
|
107
|
+
expect(result).toMatch(/browser/);
|
|
108
|
+
expect(result).toMatch(/\bfabricated\b/);
|
|
109
|
+
});
|
|
110
|
+
});
|
|
111
|
+
|
|
112
|
+
// ── 4. Fabricated Edge version ─────────────────────────────────────────────
|
|
113
|
+
|
|
114
|
+
describe('Fabricated Edge version — signal extraction', () => {
|
|
115
|
+
test('Edge/18.19582 should parse as edge family and be fabricated', () => {
|
|
116
|
+
const result = extractUAFeatures(
|
|
117
|
+
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/146.0.7680.71 Safari/537.36 Edge/18.19582'
|
|
118
|
+
);
|
|
119
|
+
expect(result).toMatch(/\/edge/);
|
|
120
|
+
expect(result).toMatch(/dpf=desktop\/windows\/edge/);
|
|
121
|
+
expect(result).toMatch(/\bfabricated\b/);
|
|
122
|
+
});
|
|
123
|
+
});
|
|
124
|
+
|
|
125
|
+
// ── 5. Outdated browser UAs from bot farm ──────────────────────────────────
|
|
126
|
+
|
|
127
|
+
describe('Outdated browser UAs from bot farm — version bucketing', () => {
|
|
128
|
+
test('Chrome/59 (2017) should bucket to 0-79', () => {
|
|
129
|
+
const result = extractUAFeatures(
|
|
130
|
+
'Mozilla/5.0 (Linux; Android 7.0; SM-G930V Build/NRD90M) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.125 Mobile Safari/537.36'
|
|
131
|
+
);
|
|
132
|
+
expect(result).toMatch(/ver=0-79/);
|
|
133
|
+
expect(result).toMatch(/dpf=mobile\/android\/chrome/);
|
|
134
|
+
});
|
|
135
|
+
|
|
136
|
+
test('Chrome/117 with non-zero build (frozen UA violation) → fabricated', () => {
|
|
137
|
+
const result = extractUAFeatures(
|
|
138
|
+
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.5938.132 Safari/537.36'
|
|
139
|
+
);
|
|
140
|
+
expect(result).toMatch(/ver=100-119/);
|
|
141
|
+
expect(result).toMatch(/\bfabricated\b/);
|
|
142
|
+
});
|
|
143
|
+
|
|
144
|
+
test('Chrome/83 should bucket to 80-99', () => {
|
|
145
|
+
const result = extractUAFeatures(
|
|
146
|
+
'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.61 Safari/537.36'
|
|
147
|
+
);
|
|
148
|
+
expect(result).toMatch(/ver=80-99/);
|
|
149
|
+
});
|
|
150
|
+
|
|
151
|
+
test('Chrome/79 should bucket to 0-79', () => {
|
|
152
|
+
const result = extractUAFeatures(
|
|
153
|
+
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.79 Safari/537.36'
|
|
154
|
+
);
|
|
155
|
+
expect(result).toMatch(/ver=0-79/);
|
|
156
|
+
});
|
|
157
|
+
});
|
|
158
|
+
|
|
159
|
+
// ── 6. Legitimate browser UAs ──────────────────────────────────────────────
|
|
160
|
+
|
|
161
|
+
describe('Legitimate browser UAs — correct feature extraction', () => {
|
|
162
|
+
test('Chrome/145 on macOS → desktop/mac/chrome, current version bucket', () => {
|
|
163
|
+
const result = extractUAFeatures(
|
|
164
|
+
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/145.0.0.0 Safari/537.36'
|
|
165
|
+
);
|
|
166
|
+
expect(result).toMatch(/dpf=desktop\/mac\/chrome/);
|
|
167
|
+
expect(result).toMatch(/ver=140-159/);
|
|
168
|
+
expect(result).toMatch(/browser/);
|
|
169
|
+
expect(result).not.toMatch(/headless/);
|
|
170
|
+
expect(result).not.toMatch(/automation/);
|
|
171
|
+
expect(result).not.toMatch(/fabricated/);
|
|
172
|
+
});
|
|
173
|
+
|
|
174
|
+
test('Chrome/146 on Windows → desktop/windows/chrome, current version bucket', () => {
|
|
175
|
+
const result = extractUAFeatures(
|
|
176
|
+
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/146.0.0.0 Safari/537.36'
|
|
177
|
+
);
|
|
178
|
+
expect(result).toMatch(/dpf=desktop\/windows\/chrome/);
|
|
179
|
+
expect(result).toMatch(/ver=140-159/);
|
|
180
|
+
expect(result).not.toMatch(/headless/);
|
|
181
|
+
expect(result).not.toMatch(/automation/);
|
|
182
|
+
expect(result).not.toMatch(/fabricated/);
|
|
183
|
+
});
|
|
184
|
+
|
|
185
|
+
test('Safari/17.4.1 on macOS → desktop/mac/safari', () => {
|
|
186
|
+
const result = extractUAFeatures(
|
|
187
|
+
'Mozilla/5.0 (Macintosh; Intel Mac OS X 14_4_1) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.4.1 Safari/605.1.15'
|
|
188
|
+
);
|
|
189
|
+
expect(result).toMatch(/dpf=desktop\/mac\/safari/);
|
|
190
|
+
expect(result).not.toMatch(/headless/);
|
|
191
|
+
expect(result).not.toMatch(/automation/);
|
|
192
|
+
expect(result).not.toMatch(/fabricated/);
|
|
193
|
+
});
|
|
194
|
+
|
|
195
|
+
test('Edge/122 on Windows → desktop/windows/edge', () => {
|
|
196
|
+
const result = extractUAFeatures(
|
|
197
|
+
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36 Edg/122.0.0.0'
|
|
198
|
+
);
|
|
199
|
+
expect(result).toMatch(/dpf=desktop\/windows\/edge/);
|
|
200
|
+
expect(result).toMatch(/ver=120-139/);
|
|
201
|
+
expect(result).not.toMatch(/headless/);
|
|
202
|
+
expect(result).not.toMatch(/fabricated/);
|
|
203
|
+
expect(result).not.toMatch(/stale/);
|
|
204
|
+
});
|
|
205
|
+
});
|
|
206
|
+
|
|
207
|
+
// ── 7. Stale browser version marker ───────────────────────────────────────
|
|
208
|
+
|
|
209
|
+
describe('Stale browser version — signal extraction', () => {
|
|
210
|
+
test('Chrome/59 (2017, ver=0-79) should have stale marker', () => {
|
|
211
|
+
const result = extractUAFeatures(
|
|
212
|
+
'Mozilla/5.0 (Linux; Android 7.0; SM-G930V Build/NRD90M) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.125 Mobile Safari/537.36'
|
|
213
|
+
);
|
|
214
|
+
expect(result).toMatch(/ver=0-79/);
|
|
215
|
+
expect(result).toMatch(/\bstale\b/);
|
|
216
|
+
});
|
|
217
|
+
|
|
218
|
+
test('Chrome/79 (2019, ver=0-79) should have stale marker', () => {
|
|
219
|
+
const result = extractUAFeatures(
|
|
220
|
+
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.79 Safari/537.36'
|
|
221
|
+
);
|
|
222
|
+
expect(result).toMatch(/ver=0-79/);
|
|
223
|
+
expect(result).toMatch(/\bstale\b/);
|
|
224
|
+
});
|
|
225
|
+
|
|
226
|
+
test('Chrome/83 (80-99 bucket, borderline old) should NOT have stale marker', () => {
|
|
227
|
+
const result = extractUAFeatures(
|
|
228
|
+
'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.61 Safari/537.36'
|
|
229
|
+
);
|
|
230
|
+
expect(result).toMatch(/ver=80-99/);
|
|
231
|
+
expect(result).not.toMatch(/\bstale\b/);
|
|
232
|
+
});
|
|
233
|
+
|
|
234
|
+
test('Chrome/145 (current) should NOT have stale marker', () => {
|
|
235
|
+
const result = extractUAFeatures(
|
|
236
|
+
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/145.0.0.0 Safari/537.36'
|
|
237
|
+
);
|
|
238
|
+
expect(result).toMatch(/ver=140-159/);
|
|
239
|
+
expect(result).not.toMatch(/\bstale\b/);
|
|
240
|
+
});
|
|
241
|
+
|
|
242
|
+
test('Safari/17 should NOT have stale marker', () => {
|
|
243
|
+
const result = extractUAFeatures(
|
|
244
|
+
'Mozilla/5.0 (Macintosh; Intel Mac OS X 14_4_1) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.4.1 Safari/605.1.15'
|
|
245
|
+
);
|
|
246
|
+
expect(result).not.toMatch(/\bstale\b/);
|
|
247
|
+
});
|
|
248
|
+
|
|
249
|
+
test('Firefox/130 should NOT have stale marker', () => {
|
|
250
|
+
const result = extractUAFeatures(
|
|
251
|
+
'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:130.0) Gecko/20100101 Firefox/130.0'
|
|
252
|
+
);
|
|
253
|
+
expect(result).not.toMatch(/\bstale\b/);
|
|
254
|
+
});
|
|
255
|
+
|
|
256
|
+
test('Fabricated Chrome/48.0.1025.1402 also gets stale (both markers)', () => {
|
|
257
|
+
const result = extractUAFeatures(
|
|
258
|
+
'Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/48.0.1025.1402 Mobile Safari/537.36'
|
|
259
|
+
);
|
|
260
|
+
expect(result).toMatch(/\bfabricated\b/);
|
|
261
|
+
expect(result).toMatch(/\bstale\b/);
|
|
262
|
+
});
|
|
263
|
+
});
|
|
@@ -291,6 +291,56 @@ describe('proxyVAIRequest — signal extraction pipeline', () => {
|
|
|
291
291
|
});
|
|
292
292
|
});
|
|
293
293
|
|
|
294
|
+
// ── sec-fetch-mismatch CDN marker emission ──────────────────────────────────
|
|
295
|
+
//
|
|
296
|
+
// When the CDN sees sec-fetch-dest: document + sec-fetch-mode: navigate on a
|
|
297
|
+
// vai.json request, it appends sec-fetch-mismatch to X-PW-UA. This marker is
|
|
298
|
+
// used by the cloud-api to reclassify the request as OTHER without inspecting
|
|
299
|
+
// raw Sec-Fetch headers in the classification path.
|
|
300
|
+
|
|
301
|
+
describe('CDN sec-fetch-mismatch marker emission', () => {
|
|
302
|
+
let handler;
|
|
303
|
+
|
|
304
|
+
beforeAll(async () => {
|
|
305
|
+
handler = await init('cloudflare');
|
|
306
|
+
});
|
|
307
|
+
|
|
308
|
+
async function proxyVAI(url, headerMap, cf) {
|
|
309
|
+
const request = makeRequest(url, headerMap, cf);
|
|
310
|
+
capturedFetchArgs = null;
|
|
311
|
+
await handler(request, ENV, CTX);
|
|
312
|
+
return capturedFetchArgs;
|
|
313
|
+
}
|
|
314
|
+
|
|
315
|
+
test('document/navigate on vai.json → appends sec-fetch-mismatch', async () => {
|
|
316
|
+
const mismatchHeaders = {
|
|
317
|
+
...CHROME_MAC_HEADERS,
|
|
318
|
+
'sec-fetch-dest': 'document',
|
|
319
|
+
'sec-fetch-mode': 'navigate',
|
|
320
|
+
};
|
|
321
|
+
const args = await proxyVAI('https://pub.example.com/pw/vai.json', mismatchHeaders, CF_PROPS);
|
|
322
|
+
expect(args.headers['X-PW-UA']).toContain('sec-fetch-mismatch');
|
|
323
|
+
});
|
|
324
|
+
|
|
325
|
+
test('empty/cors on vai.json (normal XHR) → no sec-fetch-mismatch', async () => {
|
|
326
|
+
const args = await proxyVAI('https://pub.example.com/pw/vai.json', CHROME_MAC_HEADERS, CF_PROPS);
|
|
327
|
+
expect(args.headers['X-PW-UA']).not.toContain('sec-fetch-mismatch');
|
|
328
|
+
});
|
|
329
|
+
|
|
330
|
+
test('document/navigate on non-vai path → no sec-fetch-mismatch', async () => {
|
|
331
|
+
const mismatchHeaders = {
|
|
332
|
+
...CHROME_MAC_HEADERS,
|
|
333
|
+
'sec-fetch-dest': 'document',
|
|
334
|
+
'sec-fetch-mode': 'navigate',
|
|
335
|
+
};
|
|
336
|
+
const args = await proxyVAI('https://pub.example.com/pw/access/check', mismatchHeaders, CF_PROPS);
|
|
337
|
+
// sec-fetch-mismatch only applies to vai.json requests
|
|
338
|
+
if (args && args.headers) {
|
|
339
|
+
expect(args.headers['X-PW-UA'] || '').not.toContain('sec-fetch-mismatch');
|
|
340
|
+
}
|
|
341
|
+
});
|
|
342
|
+
});
|
|
343
|
+
|
|
294
344
|
// ── logAccess — non-VAI path raw header forwarding ───────────────────────────
|
|
295
345
|
//
|
|
296
346
|
// Verifies that logAccess() does NOT transform headers like proxyVAIRequest().
|