@paywalls-net/filter 1.3.1 → 1.3.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -3,7 +3,7 @@
3
3
  "description": "Client SDK for integrating paywalls.net bot filtering and authorization services into your server or CDN.",
4
4
  "author": "paywalls.net",
5
5
  "license": "MIT",
6
- "version": "1.3.1",
6
+ "version": "1.3.2",
7
7
  "publishConfig": {
8
8
  "access": "public"
9
9
  },
@@ -5,6 +5,39 @@ let cachedUserAgentPatterns = null;
5
5
  let cacheTimestamp = null;
6
6
  const CACHE_DURATION = 60 * 60 * 1000; // 1 hour
7
7
 
8
+ // Cache for user agent classifications
9
+ //
10
+ // CACHE STRATEGY CONSIDERATIONS:
11
+ //
12
+ // Current approach: Raw user-agent string as cache key
13
+ // - Pro: No parsing overhead before cache lookup
14
+ // - Pro: Exact matches are very fast
15
+ // - Con: User-agents with minor version differences create separate cache entries
16
+ // - Con: Cache could grow large with many unique UAs (especially browser traffic)
17
+ //
18
+ // Alternative approaches to consider:
19
+ // 1. Normalized keys (e.g., browser name + major version + OS)
20
+ // - Would improve hit rate and reduce memory
21
+ // - But adds parsing cost before every cache check
22
+ // - Risk: Might miss pattern-specific matches if patterns are version-sensitive
23
+ //
24
+ // 2. LRU cache with size limit
25
+ // - Bounds memory usage
26
+ // - Evicts least-recently-used entries
27
+ // - Good if traffic patterns are consistent
28
+ //
29
+ // 3. Separate caches for bots vs browsers
30
+ // - Bot UAs are typically more stable (better cache hit rate)
31
+ // - Browser UAs change frequently with versions (lower hit rate)
32
+ // - Could optimize each differently
33
+ //
34
+ // Decision: Start with raw UA keys until we have production metrics showing:
35
+ // - Actual cache size growth
36
+ // - Cache hit rates
37
+ // - Memory pressure
38
+ // Then optimize based on data rather than speculation.
39
+ let classificationCache = new Map();
40
+
8
41
  /**
9
42
  * Fetch user agent patterns from the API and cache them.
10
43
  * @returns {Promise<Array>} The user agent patterns.
@@ -39,6 +72,10 @@ export async function loadAgentPatterns(cfg) {
39
72
  }));
40
73
 
41
74
  cacheTimestamp = now;
75
+
76
+ // Clear classification cache when patterns are refreshed
77
+ classificationCache.clear();
78
+
42
79
  return cachedUserAgentPatterns;
43
80
  } catch (error) {
44
81
  console.error('Error loading agent patterns:', error);
@@ -53,6 +90,14 @@ export async function loadAgentPatterns(cfg) {
53
90
  * @returns {Promise<Object>} An object containing the browser, OS, operator, usage, and user_initiated status.
54
91
  */
55
92
  export async function classifyUserAgent(cfg, userAgent) {
93
+ // Check classification cache first (single lookup is more efficient than has + get)
94
+ const cached = classificationCache.get(userAgent);
95
+ if (cached !== undefined) {
96
+ console.log(`User agent classification cache hit for: ${userAgent}`);
97
+ return cached;
98
+ }
99
+ console.log(`User agent classification cache miss for: ${userAgent}`);
100
+
56
101
  const parsedUA = new UAParser(userAgent).getResult();
57
102
 
58
103
  const browser = parsedUA.browser.name || 'Unknown';
@@ -64,7 +109,7 @@ export async function classifyUserAgent(cfg, userAgent) {
64
109
  if (!config.patterns) continue;
65
110
  for (const pattern of config.patterns) {
66
111
  if (new RegExp(pattern).test(userAgent)) {
67
- return {
112
+ const result = {
68
113
  operator: config.operator,
69
114
  agent: config.agent || browser,
70
115
  usage: config.usage,
@@ -72,12 +117,18 @@ export async function classifyUserAgent(cfg, userAgent) {
72
117
  browser,
73
118
  os,
74
119
  };
120
+ // Cache the classification result
121
+ classificationCache.set(userAgent, result);
122
+ return result;
75
123
  }
76
124
  }
77
125
  }
78
126
 
79
- return {
127
+ const result = {
80
128
  browser,
81
129
  os
82
130
  };
131
+ // Cache the default classification
132
+ classificationCache.set(userAgent, result);
133
+ return result;
83
134
  }