@paywalls-net/filter 1.3.0 → 1.3.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -3,7 +3,7 @@
3
3
  "description": "Client SDK for integrating paywalls.net bot filtering and authorization services into your server or CDN.",
4
4
  "author": "paywalls.net",
5
5
  "license": "MIT",
6
- "version": "1.3.0",
6
+ "version": "1.3.2",
7
7
  "publishConfig": {
8
8
  "access": "public"
9
9
  },
package/src/index.js CHANGED
@@ -51,6 +51,71 @@ function getAllHeaders(request) {
51
51
  return headers;
52
52
  }
53
53
 
54
+ /**
55
+ * Check if the request is for a VAI endpoint (vai.json or vai.js)
56
+ * @param {Request} request - The incoming request
57
+ * @param {string} vaiPath - The path prefix for VAI endpoints (default: '/pw')
58
+ * @returns {boolean} - True if this is a VAI endpoint request
59
+ */
60
+ function isVAIRequest(request, vaiPath = '/pw') {
61
+ try {
62
+ const url = new URL(request.url || `http://host${request.uri || ''}`);
63
+ const pathname = url.pathname;
64
+ return pathname === `${vaiPath}/vai.json` || pathname === `${vaiPath}/vai.js`;
65
+ } catch (err) {
66
+ return false;
67
+ }
68
+ }
69
+
70
+ /**
71
+ * Proxy VAI requests to the cloud-api service
72
+ * @param {Object} cfg - Configuration object with paywallsAPIHost and paywallsAPIKey
73
+ * @param {Request} request - The incoming request
74
+ * @returns {Promise<Response>} - The proxied response from cloud-api
75
+ */
76
+ async function proxyVAIRequest(cfg, request) {
77
+ try {
78
+ const url = new URL(request.url || `http://host${request.uri || ''}`);
79
+ const isJson = url.pathname.endsWith('/vai.json');
80
+ const cloudApiPath = isJson ? '/pw/vai.json' : '/pw/vai.js';
81
+
82
+ // Get all request headers
83
+ const headers = getAllHeaders(request);
84
+
85
+ // Build forwarding headers
86
+ const forwardHeaders = {
87
+ 'User-Agent': headers['user-agent'] || sdkUserAgent,
88
+ 'Authorization': `Bearer ${cfg.paywallsAPIKey}`
89
+ };
90
+
91
+ // Add forwarding headers if available
92
+ if (headers['x-forwarded-for']) {
93
+ forwardHeaders['X-Forwarded-For'] = headers['x-forwarded-for'];
94
+ } else if (headers['cf-connecting-ip']) {
95
+ forwardHeaders['X-Forwarded-For'] = headers['cf-connecting-ip'];
96
+ }
97
+
98
+ if (headers['host']) {
99
+ forwardHeaders['X-Original-Host'] = headers['host'];
100
+ }
101
+
102
+ // Forward request to cloud-api
103
+ const response = await fetch(`${cfg.paywallsAPIHost}${cloudApiPath}`, {
104
+ method: 'GET',
105
+ headers: forwardHeaders
106
+ });
107
+
108
+ if (!response.ok) {
109
+ console.error(`VAI proxy error: ${response.status} ${response.statusText}`);
110
+ }
111
+
112
+ return response;
113
+ } catch (err) {
114
+ console.error(`Error proxying VAI request: ${err.message}`);
115
+ return new Response('Internal Server Error', { status: 500 });
116
+ }
117
+ }
118
+
54
119
  async function logAccess(cfg, request, access) {
55
120
  // Separate html from the status in the access object.
56
121
  const { response, ...status } = access;
@@ -263,8 +328,15 @@ async function cloudflare(config = null) {
263
328
  const paywallsConfig = {
264
329
  paywallsAPIHost: env.PAYWALLS_CLOUD_API_HOST || PAYWALLS_CLOUD_API_HOST,
265
330
  paywallsAPIKey: env.PAYWALLS_CLOUD_API_KEY,
266
- paywallsPublisherId: env.PAYWALLS_PUBLISHER_ID
331
+ paywallsPublisherId: env.PAYWALLS_PUBLISHER_ID,
332
+ vaiPath: env.PAYWALLS_VAI_PATH || '/pw'
267
333
  };
334
+
335
+ // Check if this is a VAI endpoint request and proxy it
336
+ if (isVAIRequest(request, paywallsConfig.vaiPath)) {
337
+ return await proxyVAIRequest(paywallsConfig, request);
338
+ }
339
+
268
340
  await loadAgentPatterns(paywallsConfig);
269
341
 
270
342
  if (await isRecognizedBot(paywallsConfig, request)) {
@@ -288,8 +360,14 @@ async function fastly() {
288
360
  const paywallsConfig = {
289
361
  paywallsAPIHost: config.get('PAYWALLS_CLOUD_API_HOST') || PAYWALLS_CLOUD_API_HOST,
290
362
  paywallsAPIKey: config.get('PAYWALLS_API_KEY'),
291
- paywallsPublisherId: config.get('PAYWALLS_PUBLISHER_ID')
363
+ paywallsPublisherId: config.get('PAYWALLS_PUBLISHER_ID'),
364
+ vaiPath: config.get('PAYWALLS_VAI_PATH') || '/pw'
292
365
  };
366
+
367
+ // Check if this is a VAI endpoint request and proxy it
368
+ if (isVAIRequest(request, paywallsConfig.vaiPath)) {
369
+ return await proxyVAIRequest(paywallsConfig, request);
370
+ }
293
371
 
294
372
  await loadAgentPatterns(paywallsConfig);
295
373
 
@@ -304,6 +382,34 @@ async function fastly() {
304
382
  }
305
383
  };
306
384
  }
385
+ /**
386
+ * Convert a standard Response to CloudFront format
387
+ * @param {Response} response - Standard fetch Response object
388
+ * @returns {Promise<Object>} - CloudFront-formatted response
389
+ */
390
+ async function responseToCloudFront(response) {
391
+ const headers = {};
392
+
393
+ // Convert response headers to CloudFront format
394
+ for (const [key, value] of response.headers.entries()) {
395
+ headers[key.toLowerCase()] = [
396
+ {
397
+ key: key,
398
+ value: value
399
+ }
400
+ ];
401
+ }
402
+
403
+ const body = await response.text();
404
+
405
+ return {
406
+ status: response.status,
407
+ statusDescription: response.statusText || 'OK',
408
+ headers: headers,
409
+ body: body
410
+ };
411
+ }
412
+
307
413
  /**
308
414
  * Adapt to CloudFront format
309
415
  * Lambda@Edge events see https://docs.aws.amazon.com/AmazonCloudFront/latest/DeveloperGuide/lambda-event-structure.html#lambda-event-structure-request
@@ -336,13 +442,21 @@ async function cloudfront(config) {
336
442
  const paywallsConfig = {
337
443
  paywallsAPIHost: config.PAYWALLS_CLOUD_API_HOST || PAYWALLS_CLOUD_API_HOST,
338
444
  paywallsAPIKey: config.PAYWALLS_API_KEY,
339
- paywallsPublisherId: config.PAYWALLS_PUBLISHER_ID
445
+ paywallsPublisherId: config.PAYWALLS_PUBLISHER_ID,
446
+ vaiPath: config.PAYWALLS_VAI_PATH || '/pw'
340
447
  };
341
448
  await loadAgentPatterns(paywallsConfig);
342
449
 
343
450
  return async function handle(event, ctx) {
344
451
  let request = event.Records[0].cf.request;
345
452
  request = requestShim(request);
453
+
454
+ // Check if this is a VAI endpoint request and proxy it
455
+ if (isVAIRequest(request, paywallsConfig.vaiPath)) {
456
+ const response = await proxyVAIRequest(paywallsConfig, request);
457
+ return await responseToCloudFront(response);
458
+ }
459
+
346
460
  if (await isRecognizedBot(paywallsConfig, request)) {
347
461
  const authz = await checkAgentStatus(paywallsConfig, request);
348
462
 
@@ -5,6 +5,39 @@ let cachedUserAgentPatterns = null;
5
5
  let cacheTimestamp = null;
6
6
  const CACHE_DURATION = 60 * 60 * 1000; // 1 hour
7
7
 
8
+ // Cache for user agent classifications
9
+ //
10
+ // CACHE STRATEGY CONSIDERATIONS:
11
+ //
12
+ // Current approach: Raw user-agent string as cache key
13
+ // - Pro: No parsing overhead before cache lookup
14
+ // - Pro: Exact matches are very fast
15
+ // - Con: User-agents with minor version differences create separate cache entries
16
+ // - Con: Cache could grow large with many unique UAs (especially browser traffic)
17
+ //
18
+ // Alternative approaches to consider:
19
+ // 1. Normalized keys (e.g., browser name + major version + OS)
20
+ // - Would improve hit rate and reduce memory
21
+ // - But adds parsing cost before every cache check
22
+ // - Risk: Might miss pattern-specific matches if patterns are version-sensitive
23
+ //
24
+ // 2. LRU cache with size limit
25
+ // - Bounds memory usage
26
+ // - Evicts least-recently-used entries
27
+ // - Good if traffic patterns are consistent
28
+ //
29
+ // 3. Separate caches for bots vs browsers
30
+ // - Bot UAs are typically more stable (better cache hit rate)
31
+ // - Browser UAs change frequently with versions (lower hit rate)
32
+ // - Could optimize each differently
33
+ //
34
+ // Decision: Start with raw UA keys until we have production metrics showing:
35
+ // - Actual cache size growth
36
+ // - Cache hit rates
37
+ // - Memory pressure
38
+ // Then optimize based on data rather than speculation.
39
+ let classificationCache = new Map();
40
+
8
41
  /**
9
42
  * Fetch user agent patterns from the API and cache them.
10
43
  * @returns {Promise<Array>} The user agent patterns.
@@ -39,6 +72,10 @@ export async function loadAgentPatterns(cfg) {
39
72
  }));
40
73
 
41
74
  cacheTimestamp = now;
75
+
76
+ // Clear classification cache when patterns are refreshed
77
+ classificationCache.clear();
78
+
42
79
  return cachedUserAgentPatterns;
43
80
  } catch (error) {
44
81
  console.error('Error loading agent patterns:', error);
@@ -53,6 +90,14 @@ export async function loadAgentPatterns(cfg) {
53
90
  * @returns {Promise<Object>} An object containing the browser, OS, operator, usage, and user_initiated status.
54
91
  */
55
92
  export async function classifyUserAgent(cfg, userAgent) {
93
+ // Check classification cache first (single lookup is more efficient than has + get)
94
+ const cached = classificationCache.get(userAgent);
95
+ if (cached !== undefined) {
96
+ console.log(`User agent classification cache hit for: ${userAgent}`);
97
+ return cached;
98
+ }
99
+ console.log(`User agent classification cache miss for: ${userAgent}`);
100
+
56
101
  const parsedUA = new UAParser(userAgent).getResult();
57
102
 
58
103
  const browser = parsedUA.browser.name || 'Unknown';
@@ -64,7 +109,7 @@ export async function classifyUserAgent(cfg, userAgent) {
64
109
  if (!config.patterns) continue;
65
110
  for (const pattern of config.patterns) {
66
111
  if (new RegExp(pattern).test(userAgent)) {
67
- return {
112
+ const result = {
68
113
  operator: config.operator,
69
114
  agent: config.agent || browser,
70
115
  usage: config.usage,
@@ -72,12 +117,18 @@ export async function classifyUserAgent(cfg, userAgent) {
72
117
  browser,
73
118
  os,
74
119
  };
120
+ // Cache the classification result
121
+ classificationCache.set(userAgent, result);
122
+ return result;
75
123
  }
76
124
  }
77
125
  }
78
126
 
79
- return {
127
+ const result = {
80
128
  browser,
81
129
  os
82
130
  };
131
+ // Cache the default classification
132
+ classificationCache.set(userAgent, result);
133
+ return result;
83
134
  }