@paywalls-net/filter 1.3.8 → 1.3.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,379 @@
1
+ /**
2
+ * Tests for proxyVAIRequest() signal extraction pipeline (§7).
3
+ *
4
+ * Verifies that VAI proxy requests emit compact SF headers (v2 protocol)
5
+ * instead of raw header passthrough.
6
+ *
7
+ * Issue: paywalls-site-qiw
8
+ * Spec: specs/vai-privacy-v2.spec.md §7, §8.1.2
9
+ */
10
+ import { init } from '../src/index.js';
11
+
12
+ // ── Test helpers ───────────────────────────────────────────────────────────
13
+
14
+ /** Captured headers from the last fetch call */
15
+ let capturedFetchArgs = null;
16
+
17
+ /** Mock Response returned by fetch */
18
+ const MOCK_RESPONSE = {
19
+ ok: true,
20
+ status: 200,
21
+ statusText: 'OK',
22
+ body: 'mock-body',
23
+ headers: new Headers({ 'content-type': 'application/json' }),
24
+ };
25
+
26
+ /**
27
+ * Build a minimal Request-like object that proxyVAIRequest expects.
28
+ * Mimics Cloudflare Workers Request shape.
29
+ */
30
+ function makeRequest(url, headerMap = {}, cf = {}) {
31
+ const headers = new Headers(headerMap);
32
+ return {
33
+ url,
34
+ method: 'GET',
35
+ headers,
36
+ cf,
37
+ };
38
+ }
39
+
40
+ // ── Setup/teardown ─────────────────────────────────────────────────────────
41
+
42
+ const originalFetch = globalThis.fetch;
43
+
44
+ beforeEach(() => {
45
+ capturedFetchArgs = null;
46
+ globalThis.fetch = async (url, opts) => {
47
+ capturedFetchArgs = { url, ...opts };
48
+ return MOCK_RESPONSE;
49
+ };
50
+ });
51
+
52
+ afterAll(() => {
53
+ globalThis.fetch = originalFetch;
54
+ });
55
+
56
+ // ── Env config ─────────────────────────────────────────────────────────────
57
+
58
+ const ENV = {
59
+ PAYWALLS_CLOUD_API_HOST: 'https://test-cloud-api.example.com',
60
+ PAYWALLS_CLOUD_API_KEY: 'test-key-123',
61
+ PAYWALLS_PUBLISHER_ID: 'pub-123',
62
+ VAI_UA_HMAC_KEY: 'test-hmac-secret',
63
+ };
64
+
65
+ const CTX = { waitUntil: () => {} };
66
+
67
+ // Chrome on Mac — typical browser request
68
+ const CHROME_MAC_HEADERS = {
69
+ 'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/134.0.0.0 Safari/537.36',
70
+ 'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
71
+ 'accept-encoding': 'gzip, deflate, br, zstd',
72
+ 'accept-language': 'en-US,en;q=0.9,fr;q=0.8',
73
+ 'sec-ch-ua': '"Google Chrome";v="134", "Chromium";v="134", "Not:A-Brand";v="24"',
74
+ 'sec-fetch-dest': 'empty',
75
+ 'sec-fetch-mode': 'cors',
76
+ 'sec-fetch-site': 'same-origin',
77
+ 'host': 'publisher.example.com',
78
+ 'origin': 'https://publisher.example.com',
79
+ 'cookie': 'session=abc123',
80
+ };
81
+
82
+ const CF_PROPS = {
83
+ tlsVersion: 'TLSv1.3',
84
+ httpProtocol: 'HTTP/2',
85
+ asn: 7922, // Comcast — consumer
86
+ };
87
+
88
+ // ── Tests ──────────────────────────────────────────────────────────────────
89
+
90
+ describe('proxyVAIRequest — signal extraction pipeline', () => {
91
+ let handler;
92
+
93
+ beforeAll(async () => {
94
+ handler = await init('cloudflare');
95
+ });
96
+
97
+ async function proxyVAI(url, headerMap, cf) {
98
+ const request = makeRequest(url, headerMap, cf);
99
+ await handler(request, ENV, CTX);
100
+ return capturedFetchArgs;
101
+ }
102
+
103
+ // ── Protocol version ─────────────────────────────────────────────────
104
+
105
+ test('emits X-PW-V: 2', async () => {
106
+ const args = await proxyVAI('https://pub.example.com/pw/vai.json', CHROME_MAC_HEADERS, CF_PROPS);
107
+ expect(args.headers['X-PW-V']).toBe('2');
108
+ });
109
+
110
+ // ── User-Agent replacement ───────────────────────────────────────────
111
+
112
+ test('User-Agent is SDK identifier, not browser UA', async () => {
113
+ const args = await proxyVAI('https://pub.example.com/pw/vai.json', CHROME_MAC_HEADERS, CF_PROPS);
114
+ expect(args.headers['User-Agent']).toMatch(/^pw-filter-sdk\//);
115
+ expect(args.headers['User-Agent']).not.toContain('Chrome');
116
+ });
117
+
118
+ // ── Tier 1: raw headers ──────────────────────────────────────────────
119
+
120
+ test('Tier 1: Sec-Fetch headers forwarded raw', async () => {
121
+ const args = await proxyVAI('https://pub.example.com/pw/vai.json', CHROME_MAC_HEADERS, CF_PROPS);
122
+ expect(args.headers['X-PW-Sec-Fetch-Dest']).toBe('empty');
123
+ expect(args.headers['X-PW-Sec-Fetch-Mode']).toBe('cors');
124
+ expect(args.headers['X-PW-Sec-Fetch-Site']).toBe('same-origin');
125
+ });
126
+
127
+ test('Tier 1: TLS and HTTP protocol from cf object', async () => {
128
+ const args = await proxyVAI('https://pub.example.com/pw/vai.json', CHROME_MAC_HEADERS, CF_PROPS);
129
+ expect(args.headers['X-PW-TLS-Version']).toBe('TLSv1.3');
130
+ expect(args.headers['X-PW-HTTP-Protocol']).toBe('HTTP/2');
131
+ });
132
+
133
+ // ── Tier 2: extracted features ───────────────────────────────────────
134
+
135
+ test('X-PW-Accept is SF-Dictionary (not raw Accept header)', async () => {
136
+ const args = await proxyVAI('https://pub.example.com/pw/vai.json', CHROME_MAC_HEADERS, CF_PROPS);
137
+ expect(args.headers['X-PW-Accept']).toBe('html, wildcard');
138
+ });
139
+
140
+ test('X-PW-Enc is SF-Dictionary', async () => {
141
+ const args = await proxyVAI('https://pub.example.com/pw/vai.json', CHROME_MAC_HEADERS, CF_PROPS);
142
+ expect(args.headers['X-PW-Enc']).toBe('br, gzip, modern');
143
+ });
144
+
145
+ test('X-PW-Lang is SF-Dictionary', async () => {
146
+ const args = await proxyVAI('https://pub.example.com/pw/vai.json', CHROME_MAC_HEADERS, CF_PROPS);
147
+ expect(args.headers['X-PW-Lang']).toBe('present, primary=en, count=3');
148
+ });
149
+
150
+ test('X-PW-Net classifies consumer ASN', async () => {
151
+ const args = await proxyVAI('https://pub.example.com/pw/vai.json', CHROME_MAC_HEADERS, CF_PROPS);
152
+ expect(args.headers['X-PW-Net']).toBe('asn=consumer');
153
+ });
154
+
155
+ test('X-PW-Net classifies cloud ASN (AWS)', async () => {
156
+ const cloudCf = { ...CF_PROPS, asn: 16509 };
157
+ const args = await proxyVAI('https://pub.example.com/pw/vai.json', CHROME_MAC_HEADERS, cloudCf);
158
+ expect(args.headers['X-PW-Net']).toBe('asn=cloud');
159
+ });
160
+
161
+ test('X-PW-CH is SF-Dictionary with consistency check', async () => {
162
+ const args = await proxyVAI('https://pub.example.com/pw/vai.json', CHROME_MAC_HEADERS, CF_PROPS);
163
+ expect(args.headers['X-PW-CH']).toBe('present, brands=3, grease, consistent');
164
+ });
165
+
166
+ // ── Tier 3: UA features + HMAC ──────────────────────────────────────
167
+
168
+ test('X-PW-UA is SF-Dictionary', async () => {
169
+ const args = await proxyVAI('https://pub.example.com/pw/vai.json', CHROME_MAC_HEADERS, CF_PROPS);
170
+ expect(args.headers['X-PW-UA']).toBe('dpf=desktop/mac/chrome, ver=120-139, browser, entropy=medium');
171
+ });
172
+
173
+ test('X-PW-UA-HMAC is RFC 8941 Byte Sequence', async () => {
174
+ const args = await proxyVAI('https://pub.example.com/pw/vai.json', CHROME_MAC_HEADERS, CF_PROPS);
175
+ expect(args.headers['X-PW-UA-HMAC']).toMatch(/^:[A-Za-z0-9+/]+=*:$/);
176
+ });
177
+
178
+ test('X-PW-CT-FP is 8-char hex confidence token', async () => {
179
+ const args = await proxyVAI('https://pub.example.com/pw/vai.json', CHROME_MAC_HEADERS, CF_PROPS);
180
+ expect(args.headers['X-PW-CT-FP']).toMatch(/^[0-9a-f]{8}$/);
181
+ });
182
+
183
+ // ── Old headers NOT present ──────────────────────────────────────────
184
+
185
+ test('old raw headers are NOT forwarded', async () => {
186
+ const args = await proxyVAI('https://pub.example.com/pw/vai.json', CHROME_MAC_HEADERS, CF_PROPS);
187
+ // v1 header names that should no longer appear
188
+ expect(args.headers['X-PW-Accept-Language']).toBeUndefined();
189
+ expect(args.headers['X-PW-Accept-Encoding']).toBeUndefined();
190
+ expect(args.headers['X-PW-Sec-CH-UA']).toBeUndefined();
191
+ expect(args.headers['X-PW-ASN']).toBeUndefined();
192
+ });
193
+
194
+ // ── Operational headers still present ────────────────────────────────
195
+
196
+ test('operational headers forwarded (Host, Origin, Cookie)', async () => {
197
+ const args = await proxyVAI('https://pub.example.com/pw/vai.json', CHROME_MAC_HEADERS, CF_PROPS);
198
+ expect(args.headers['X-Original-Host']).toBe('publisher.example.com');
199
+ expect(args.headers['X-Forwarded-Origin']).toBe('https://publisher.example.com');
200
+ expect(args.headers['Cookie']).toBe('session=abc123');
201
+ });
202
+
203
+ test('Authorization header present', async () => {
204
+ const args = await proxyVAI('https://pub.example.com/pw/vai.json', CHROME_MAC_HEADERS, CF_PROPS);
205
+ expect(args.headers['Authorization']).toBe('Bearer test-key-123');
206
+ });
207
+
208
+ // ── Absent inputs → headers omitted ──────────────────────────────────
209
+
210
+ test('missing Accept-Language → X-PW-Lang omitted', async () => {
211
+ const { 'accept-language': _, ...noLang } = CHROME_MAC_HEADERS;
212
+ const args = await proxyVAI('https://pub.example.com/pw/vai.json', noLang, CF_PROPS);
213
+ expect(args.headers['X-PW-Lang']).toBeUndefined();
214
+ });
215
+
216
+ test('missing Sec-CH-UA → X-PW-CH omitted', async () => {
217
+ const { 'sec-ch-ua': _, ...noCH } = CHROME_MAC_HEADERS;
218
+ const args = await proxyVAI('https://pub.example.com/pw/vai.json', noCH, CF_PROPS);
219
+ expect(args.headers['X-PW-CH']).toBeUndefined();
220
+ });
221
+
222
+ test('missing cf.asn → X-PW-Net omitted', async () => {
223
+ const noCf = {};
224
+ const args = await proxyVAI('https://pub.example.com/pw/vai.json', CHROME_MAC_HEADERS, noCf);
225
+ expect(args.headers['X-PW-Net']).toBeUndefined();
226
+ });
227
+
228
+ test('no HMAC key → X-PW-UA-HMAC omitted', async () => {
229
+ // Temporarily override ENV to remove HMAC key
230
+ const noHmacEnv = { ...ENV, VAI_UA_HMAC_KEY: undefined };
231
+ const noHmacHandler = await init('cloudflare');
232
+ const request = makeRequest('https://pub.example.com/pw/vai.json', CHROME_MAC_HEADERS, CF_PROPS);
233
+ await noHmacHandler(request, noHmacEnv, CTX);
234
+ expect(capturedFetchArgs.headers['X-PW-UA-HMAC']).toBeUndefined();
235
+ });
236
+
237
+ // ── curl (minimal headers) ──────────────────────────────────────────
238
+
239
+ test('curl request — minimal headers, automation detected', async () => {
240
+ const curlHeaders = {
241
+ 'user-agent': 'curl/7.88.1',
242
+ 'accept': '*/*',
243
+ 'host': 'publisher.example.com',
244
+ };
245
+ const args = await proxyVAI('https://pub.example.com/pw/vai.json', curlHeaders, {});
246
+ expect(args.headers['X-PW-V']).toBe('2');
247
+ expect(args.headers['X-PW-Accept']).toBe('wildcard');
248
+ expect(args.headers['X-PW-UA']).toMatch(/automation/);
249
+ expect(args.headers['X-PW-UA']).toMatch(/entropy=low/);
250
+ // No language, encoding, CH, net
251
+ expect(args.headers['X-PW-Lang']).toBeUndefined();
252
+ expect(args.headers['X-PW-Enc']).toBeUndefined();
253
+ expect(args.headers['X-PW-CH']).toBeUndefined();
254
+ expect(args.headers['X-PW-Net']).toBeUndefined();
255
+ });
256
+
257
+ // ── Proxies to correct URL ──────────────────────────────────────────
258
+
259
+ test('proxies to cloud-api host with original path', async () => {
260
+ const args = await proxyVAI('https://pub.example.com/pw/vai.json?v=2', CHROME_MAC_HEADERS, CF_PROPS);
261
+ expect(args.url).toBe('https://test-cloud-api.example.com/pw/vai.json?v=2');
262
+ });
263
+
264
+ // ── Deterministic HMAC and CT ────────────────────────────────────────
265
+
266
+ test('HMAC is deterministic for same UA + key', async () => {
267
+ const args1 = await proxyVAI('https://pub.example.com/pw/vai.json', CHROME_MAC_HEADERS, CF_PROPS);
268
+ const args2 = await proxyVAI('https://pub.example.com/pw/vai.json', CHROME_MAC_HEADERS, CF_PROPS);
269
+ expect(args1.headers['X-PW-UA-HMAC']).toBe(args2.headers['X-PW-UA-HMAC']);
270
+ });
271
+
272
+ test('confidence token is deterministic for same inputs', async () => {
273
+ const args1 = await proxyVAI('https://pub.example.com/pw/vai.json', CHROME_MAC_HEADERS, CF_PROPS);
274
+ const args2 = await proxyVAI('https://pub.example.com/pw/vai.json', CHROME_MAC_HEADERS, CF_PROPS);
275
+ expect(args1.headers['X-PW-CT-FP']).toBe(args2.headers['X-PW-CT-FP']);
276
+ });
277
+
278
+ // ── All 14 signal headers present (Chrome full) ──────────────────────────
279
+
280
+ test('Chrome full request — all 14 X-PW signal headers emitted', async () => {
281
+ const args = await proxyVAI('https://pub.example.com/pw/vai.json', CHROME_MAC_HEADERS, CF_PROPS);
282
+ const xpwHeaders = Object.keys(args.headers).filter(k => k.startsWith('X-PW-'));
283
+ expect(xpwHeaders).toHaveLength(14);
284
+ expect(xpwHeaders).toEqual(expect.arrayContaining([
285
+ 'X-PW-V',
286
+ 'X-PW-Sec-Fetch-Dest', 'X-PW-Sec-Fetch-Mode', 'X-PW-Sec-Fetch-Site',
287
+ 'X-PW-TLS-Version', 'X-PW-HTTP-Protocol',
288
+ 'X-PW-Accept', 'X-PW-Enc', 'X-PW-Lang', 'X-PW-Net', 'X-PW-CH',
289
+ 'X-PW-UA', 'X-PW-UA-HMAC', 'X-PW-CT-FP',
290
+ ]));
291
+ });
292
+ });
293
+
294
+ // ── logAccess — non-VAI path raw header forwarding ───────────────────────────
295
+ //
296
+ // Verifies that logAccess() does NOT transform headers like proxyVAIRequest().
297
+ // The access-log body must contain the raw browser User-Agent (not the SDK
298
+ // sentinel) and must NOT contain any X-PW-* signal headers.
299
+
300
+ describe('logAccess — non-VAI path forwards raw user-agent in body', () => {
301
+ let handler;
302
+ let fetchCalls;
303
+
304
+ beforeAll(async () => {
305
+ handler = await init('cloudflare');
306
+ });
307
+
308
+ beforeEach(() => {
309
+ fetchCalls = [];
310
+ globalThis.fetch = async (url, opts) => {
311
+ const call = { url, ...opts };
312
+ fetchCalls.push(call);
313
+ if (String(url).includes('/agents/metadata')) {
314
+ // loadAgentPatterns — return empty pattern list
315
+ return {
316
+ ok: true,
317
+ json: async () => ({ version: 2, patterns: [] }),
318
+ };
319
+ }
320
+ if (String(url).includes('/agents/auth')) {
321
+ // checkAgentStatus — allow the bot through so logAccess is called
322
+ return {
323
+ ok: true,
324
+ json: async () => ({ access: 'allow', reason: 'known_bot', response: { code: 200, headers: {} } }),
325
+ };
326
+ }
327
+ // logAccess POST /access/logs
328
+ return { ok: true, status: 200, statusText: 'OK' };
329
+ };
330
+ });
331
+
332
+ afterAll(() => {
333
+ globalThis.fetch = originalFetch;
334
+ });
335
+
336
+ test('logAccess body contains raw user-agent, not SDK identifier', async () => {
337
+ const browserUA = CHROME_MAC_HEADERS['user-agent'];
338
+ // ?user-agent=testbot triggers isTestBot() → bot path → checkAgentStatus + logAccess
339
+ const request = makeRequest(
340
+ 'https://pub.example.com/article/1?user-agent=testbot',
341
+ CHROME_MAC_HEADERS,
342
+ CF_PROPS,
343
+ );
344
+ let logPromise;
345
+ const ctx = { waitUntil: (p) => { logPromise = p; } };
346
+
347
+ await handler(request, ENV, ctx);
348
+ if (logPromise) await logPromise;
349
+
350
+ const logCall = fetchCalls.find(c => String(c.url).includes('/access/logs'));
351
+ expect(logCall).toBeDefined();
352
+ const body = JSON.parse(logCall.body);
353
+
354
+ expect(body.user_agent).toBe(browserUA);
355
+ expect(body.user_agent).not.toMatch(/^pw-filter-sdk\//);
356
+ });
357
+
358
+ test('logAccess body headers contain no X-PW-* signal headers', async () => {
359
+ const request = makeRequest(
360
+ 'https://pub.example.com/article/2?user-agent=testbot',
361
+ CHROME_MAC_HEADERS,
362
+ CF_PROPS,
363
+ );
364
+ let logPromise;
365
+ const ctx = { waitUntil: (p) => { logPromise = p; } };
366
+
367
+ await handler(request, ENV, ctx);
368
+ if (logPromise) await logPromise;
369
+
370
+ const logCall = fetchCalls.find(c => String(c.url).includes('/access/logs'));
371
+ expect(logCall).toBeDefined();
372
+ const body = JSON.parse(logCall.body);
373
+
374
+ const xpwKeysInLog = Object.keys(body.headers || {}).filter(
375
+ k => k.toLowerCase().startsWith('x-pw-'),
376
+ );
377
+ expect(xpwKeysInLog).toHaveLength(0);
378
+ });
379
+ });