@paywalls-net/filter 1.3.11 → 1.3.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -3,7 +3,7 @@
3
3
  "description": "Client SDK for integrating paywalls.net bot filtering and authorization services into your server or CDN.",
4
4
  "author": "paywalls.net",
5
5
  "license": "MIT",
6
- "version": "1.3.11",
6
+ "version": "1.3.13",
7
7
  "publishConfig": {
8
8
  "access": "public"
9
9
  },
package/src/index.js CHANGED
@@ -180,6 +180,10 @@ async function proxyVAIRequest(cfg, request) {
180
180
  setIfPresent(forwardHeaders, 'X-PW-Net', extractNetFeatures(cf.asn));
181
181
  setIfPresent(forwardHeaders, 'X-PW-CH', extractCHFeatures(headers['sec-ch-ua'], headers['user-agent']));
182
182
 
183
+ // Geo context: forward CF edge geo data for logging/investigation (paywalls-site-60rp)
184
+ setIfPresent(forwardHeaders, 'X-PW-Geo',
185
+ cf.country ? `co=${cf.country}, re=${cf.region || ''}, ci=${cf.city || ''}, asn=${cf.asn || ''}` : null);
186
+
183
187
  // Tier 3: UA features + HMAC (§6.3)
184
188
  setIfPresent(forwardHeaders, 'X-PW-UA', extractUAFeatures(headers['user-agent']));
185
189
  setIfPresent(forwardHeaders, 'X-PW-UA-HMAC', await computeUAHMAC(headers['user-agent'], cfg.vaiUAHmacKey));
@@ -446,6 +446,50 @@ function bucketVersion(ver) {
446
446
  }
447
447
 
448
448
  // ── §6.3.1 extractUAFeatures ──────────────────────────────────────────────
449
+
450
+ /**
451
+ * Detect structurally impossible or fabricated browser version strings.
452
+ *
453
+ * Chrome frozen UA policy (since Chrome 107, late 2022):
454
+ * Real Chrome reports Chrome/[major].0.0.0 — minor, build, and patch are
455
+ * always zero. Any major >= 107 with non-zero build or patch is fabricated.
456
+ *
457
+ * Legacy Chrome with 4-digit patch (e.g. Chrome/48.0.1025.1402):
458
+ * Chrome patch numbers are 1-4 digits (max ~6367 in historical builds).
459
+ * A 4+ digit patch on an old Chrome version is structurally fabricated.
460
+ *
461
+ * Fabricated Edge (e.g. Edge/18.19582):
462
+ * Edge/18 was EdgeHTML-era; real minor versions were at most 3 digits.
463
+ * A 5-digit minor on EdgeHTML is structurally impossible.
464
+ *
465
+ * @param {string} ua
466
+ * @returns {boolean}
467
+ */
468
+ function isFabricatedVersion(ua) {
469
+ // Chrome / HeadlessChrome / Chromium: full version parse
470
+ const chromeMatch = ua.match(/(?:\b|Headless)Chrom(?:e|ium)\/(\d+)\.(\d+)\.(\d+)\.(\d+)/);
471
+ if (chromeMatch) {
472
+ const major = parseInt(chromeMatch[1], 10);
473
+ const build = parseInt(chromeMatch[3], 10);
474
+ const patch = parseInt(chromeMatch[4], 10);
475
+
476
+ // Frozen UA policy: Chrome >= 107 must be major.0.0.0
477
+ if (major >= 107 && (build !== 0 || patch !== 0)) return true;
478
+
479
+ // 4-digit patch on any Chrome version is structurally impossible
480
+ if (chromeMatch[4].length >= 4) return true;
481
+ }
482
+
483
+ // EdgeHTML-era (Edge/12-18): minor version should be ≤ 3 digits
484
+ const edgeMatch = ua.match(/\bEdge\/(\d+)\.(\d+)/);
485
+ if (edgeMatch) {
486
+ const major = parseInt(edgeMatch[1], 10);
487
+ if (major <= 18 && edgeMatch[2].length >= 5) return true;
488
+ }
489
+
490
+ return false;
491
+ }
492
+
449
493
  /**
450
494
  * Parse a User-Agent string into an SF-Dictionary of derived features.
451
495
  *
@@ -468,6 +512,7 @@ export function extractUAFeatures(userAgent) {
468
512
 
469
513
  if (HEADLESS_MARKERS.some(re => re.test(ua))) parts.push('headless');
470
514
  if (AUTOMATION_MARKERS.some(re => re.test(ua))) parts.push('automation');
515
+ if (isFabricatedVersion(ua)) parts.push('fabricated');
471
516
 
472
517
  parts.push(`entropy=${computeUAEntropy(ua)}`);
473
518
 
@@ -0,0 +1,204 @@
1
+ /**
2
+ * Unit tests for bot/automation signal extraction via extractUAFeatures
3
+ *
4
+ * Tests the Tier 3 UA feature extractor against real-world bot signals
5
+ * discovered from Cloudflare production logs (2026-03-14).
6
+ *
7
+ * Fixture: paywalls-site/tests/fixtures/cloudflare-prod-paywalls-2026-03-14.csv
8
+ * Issue: (to be assigned)
9
+ *
10
+ * These tests verify that extractUAFeatures correctly identifies:
11
+ * - headless markers (HeadlessChrome)
12
+ * - automation markers (Puppeteer, Selenium, etc.)
13
+ * - bot family detection (Googlebot, Applebot, Bytespider, etc.)
14
+ * - device/platform/family parsing for suspicious UAs
15
+ * - fabricated version patterns
16
+ */
17
+ import {
18
+ extractUAFeatures,
19
+ _resetVAIMetadata,
20
+ } from '../src/signal-extraction.js';
21
+
22
+ beforeEach(() => {
23
+ _resetVAIMetadata();
24
+ });
25
+
26
+ // ── 1. HeadlessChrome detection ────────────────────────────────────────────
27
+
28
+ describe('HeadlessChrome signal extraction', () => {
29
+ test('HeadlessChrome/145 should have headless marker', () => {
30
+ const result = extractUAFeatures(
31
+ 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) HeadlessChrome/145.0.0.0 Safari/537.36'
32
+ );
33
+ expect(result).toMatch(/\bheadless\b/);
34
+ expect(result).toMatch(/dpf=desktop\/linux\/chrome/);
35
+ expect(result).toMatch(/browser/);
36
+ });
37
+
38
+ test('HeadlessChrome/143 should have headless and fabricated markers', () => {
39
+ const result = extractUAFeatures(
40
+ 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) HeadlessChrome/143.0.7499.4 Safari/537.36'
41
+ );
42
+ expect(result).toMatch(/\bheadless\b/);
43
+ expect(result).toMatch(/dpf=desktop\/linux\/chrome/);
44
+ expect(result).toMatch(/\bfabricated\b/);
45
+ });
46
+ });
47
+
48
+ // ── 2. Self-identified bots ────────────────────────────────────────────────
49
+
50
+ describe('Self-identified bot signal extraction', () => {
51
+ test('Applebot should be detected as bot family', () => {
52
+ const result = extractUAFeatures(
53
+ 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.4 Safari/605.1.15 (Applebot/0.1; +http://www.apple.com/go/applebot)'
54
+ );
55
+ expect(result).toMatch(/\/bot/);
56
+ });
57
+
58
+ test('Googlebot mobile should be detected as bot family', () => {
59
+ const result = extractUAFeatures(
60
+ 'Mozilla/5.0 (Linux; Android 6.0.1; Nexus 5X Build/MMB29P) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/145.0.7632.116 Mobile Safari/537.36 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)'
61
+ );
62
+ expect(result).toMatch(/\/bot/);
63
+ });
64
+
65
+ test('Googlebot desktop should be detected as bot family', () => {
66
+ const result = extractUAFeatures(
67
+ 'Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko; compatible; Googlebot/2.1; +http://www.google.com/bot.html) Chrome/145.0.7632.116 Safari/537.36'
68
+ );
69
+ expect(result).toMatch(/\/bot/);
70
+ });
71
+
72
+ test('Bytespider should be detected as bot family', () => {
73
+ const result = extractUAFeatures(
74
+ 'Mozilla/5.0 (Linux; Android 5.0) AppleWebKit/537.36 (KHTML, like Gecko) Mobile Safari/537.36 (compatible; Bytespider; https://zhanzhang.toutiao.com/)'
75
+ );
76
+ expect(result).toMatch(/\/bot/);
77
+ });
78
+
79
+ test('amazon-Quick non-browser UA should have low entropy', () => {
80
+ const result = extractUAFeatures('amazon-Quick-on-behalf-of-20e61c5a');
81
+ expect(result).not.toMatch(/browser/);
82
+ expect(result).toMatch(/entropy=low/);
83
+ });
84
+ });
85
+
86
+ // ── 3. Fabricated Chrome versions (4-digit patch) ──────────────────────────
87
+
88
+ describe('Fabricated Chrome version UAs — signal extraction', () => {
89
+ // These have impossible 4-digit patch numbers. extractUAFeatures doesn't
90
+ // currently detect version fabrication, but it should at minimum:
91
+ // - Parse the very old major version into the low bucket (0-79)
92
+ // - Have medium entropy (looks like a browser UA)
93
+
94
+ const fabricatedUAs = [
95
+ { ua: 'Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/48.0.1025.1402 Mobile Safari/537.36', ver: '0-79' },
96
+ { ua: 'Mozilla/5.0 (Linux; Android 5.0; SM-G900P Build/LRX21T) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/46.0.5596.1136 Mobile Safari/537.36', ver: '0-79' },
97
+ { ua: 'Mozilla/5.0 (iPhone; CPU iPhone OS 11_0 like Mac OS X) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2714.1709 Mobile Safari/537.36', ver: '0-79' },
98
+ { ua: 'Mozilla/5.0 (Linux; Android 8.0; Pixel 2 Build/OPD3.170816.012) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/44.0.5974.1013 Mobile Safari/537.36', ver: '0-79' },
99
+ { ua: 'Mozilla/5.0 (Linux; Android 5.0; SM-G900P Build/LRX21T) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/53.0.1957.1646 Mobile Safari/537.36', ver: '0-79' },
100
+ { ua: 'Mozilla/5.0 (iPhone; CPU iPhone OS 11_0 like Mac OS X) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/43.0.4130.1795 Mobile Safari/537.36', ver: '0-79' },
101
+ { ua: 'Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.7842.1119 Mobile Safari/537.36', ver: '0-79' },
102
+ ];
103
+
104
+ test.each(fabricatedUAs)('$ver Chrome with 4-digit patch → fabricated marker', ({ ua, ver }) => {
105
+ const result = extractUAFeatures(ua);
106
+ expect(result).toMatch(new RegExp(`ver=${ver}`));
107
+ expect(result).toMatch(/browser/);
108
+ expect(result).toMatch(/\bfabricated\b/);
109
+ });
110
+ });
111
+
112
+ // ── 4. Fabricated Edge version ─────────────────────────────────────────────
113
+
114
+ describe('Fabricated Edge version — signal extraction', () => {
115
+ test('Edge/18.19582 should parse as edge family and be fabricated', () => {
116
+ const result = extractUAFeatures(
117
+ 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/146.0.7680.71 Safari/537.36 Edge/18.19582'
118
+ );
119
+ expect(result).toMatch(/\/edge/);
120
+ expect(result).toMatch(/dpf=desktop\/windows\/edge/);
121
+ expect(result).toMatch(/\bfabricated\b/);
122
+ });
123
+ });
124
+
125
+ // ── 5. Outdated browser UAs from bot farm ──────────────────────────────────
126
+
127
+ describe('Outdated browser UAs from bot farm — version bucketing', () => {
128
+ test('Chrome/59 (2017) should bucket to 0-79', () => {
129
+ const result = extractUAFeatures(
130
+ 'Mozilla/5.0 (Linux; Android 7.0; SM-G930V Build/NRD90M) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.125 Mobile Safari/537.36'
131
+ );
132
+ expect(result).toMatch(/ver=0-79/);
133
+ expect(result).toMatch(/dpf=mobile\/android\/chrome/);
134
+ });
135
+
136
+ test('Chrome/117 with non-zero build (frozen UA violation) → fabricated', () => {
137
+ const result = extractUAFeatures(
138
+ 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.5938.132 Safari/537.36'
139
+ );
140
+ expect(result).toMatch(/ver=100-119/);
141
+ expect(result).toMatch(/\bfabricated\b/);
142
+ });
143
+
144
+ test('Chrome/83 should bucket to 80-99', () => {
145
+ const result = extractUAFeatures(
146
+ 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.61 Safari/537.36'
147
+ );
148
+ expect(result).toMatch(/ver=80-99/);
149
+ });
150
+
151
+ test('Chrome/79 should bucket to 0-79', () => {
152
+ const result = extractUAFeatures(
153
+ 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.79 Safari/537.36'
154
+ );
155
+ expect(result).toMatch(/ver=0-79/);
156
+ });
157
+ });
158
+
159
+ // ── 6. Legitimate browser UAs ──────────────────────────────────────────────
160
+
161
+ describe('Legitimate browser UAs — correct feature extraction', () => {
162
+ test('Chrome/145 on macOS → desktop/mac/chrome, current version bucket', () => {
163
+ const result = extractUAFeatures(
164
+ 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/145.0.0.0 Safari/537.36'
165
+ );
166
+ expect(result).toMatch(/dpf=desktop\/mac\/chrome/);
167
+ expect(result).toMatch(/ver=140-159/);
168
+ expect(result).toMatch(/browser/);
169
+ expect(result).not.toMatch(/headless/);
170
+ expect(result).not.toMatch(/automation/);
171
+ expect(result).not.toMatch(/fabricated/);
172
+ });
173
+
174
+ test('Chrome/146 on Windows → desktop/windows/chrome, current version bucket', () => {
175
+ const result = extractUAFeatures(
176
+ 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/146.0.0.0 Safari/537.36'
177
+ );
178
+ expect(result).toMatch(/dpf=desktop\/windows\/chrome/);
179
+ expect(result).toMatch(/ver=140-159/);
180
+ expect(result).not.toMatch(/headless/);
181
+ expect(result).not.toMatch(/automation/);
182
+ expect(result).not.toMatch(/fabricated/);
183
+ });
184
+
185
+ test('Safari/17.4.1 on macOS → desktop/mac/safari', () => {
186
+ const result = extractUAFeatures(
187
+ 'Mozilla/5.0 (Macintosh; Intel Mac OS X 14_4_1) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.4.1 Safari/605.1.15'
188
+ );
189
+ expect(result).toMatch(/dpf=desktop\/mac\/safari/);
190
+ expect(result).not.toMatch(/headless/);
191
+ expect(result).not.toMatch(/automation/);
192
+ expect(result).not.toMatch(/fabricated/);
193
+ });
194
+
195
+ test('Edge/122 on Windows → desktop/windows/edge', () => {
196
+ const result = extractUAFeatures(
197
+ 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36 Edg/122.0.0.0'
198
+ );
199
+ expect(result).toMatch(/dpf=desktop\/windows\/edge/);
200
+ expect(result).toMatch(/ver=120-139/);
201
+ expect(result).not.toMatch(/headless/);
202
+ expect(result).not.toMatch(/fabricated/);
203
+ });
204
+ });