@paywalls-net/filter 1.3.3 → 1.3.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/index.js +3 -4
- package/src/user-agent-classification.js +20 -3
package/package.json
CHANGED
package/src/index.js
CHANGED
|
@@ -193,12 +193,10 @@ async function checkAgentStatus(cfg, request) {
|
|
|
193
193
|
};
|
|
194
194
|
}
|
|
195
195
|
let headers = getAllHeaders(request);
|
|
196
|
-
const agentInfo = await classifyUserAgent(cfg, userAgent);
|
|
197
196
|
|
|
198
197
|
const body = JSON.stringify({
|
|
199
198
|
account_id: cfg.paywallsPublisherId,
|
|
200
|
-
|
|
201
|
-
agent: agentInfo.agent,
|
|
199
|
+
user_agent: userAgent,
|
|
202
200
|
token: token,
|
|
203
201
|
headers: headers
|
|
204
202
|
});
|
|
@@ -262,7 +260,8 @@ function isTestBot(request) {
|
|
|
262
260
|
async function isPaywallsKnownBot(cfg, request) {
|
|
263
261
|
const userAgent = request.headers.get("User-Agent");
|
|
264
262
|
const uaClassification = await classifyUserAgent(cfg, userAgent);
|
|
265
|
-
|
|
263
|
+
// Classified as non-human by pattern match, or has known operator/agent
|
|
264
|
+
return (uaClassification.vat && uaClassification.vat !== 'HUMAN') || (uaClassification.operator && uaClassification.agent);
|
|
266
265
|
}
|
|
267
266
|
|
|
268
267
|
async function isRecognizedBot(cfg, request) {
|
|
@@ -63,12 +63,23 @@ export async function loadAgentPatterns(cfg) {
|
|
|
63
63
|
throw new Error(`Failed to fetch agent patterns: ${response.status} ${response.statusText}`);
|
|
64
64
|
}
|
|
65
65
|
|
|
66
|
-
const
|
|
66
|
+
const data = await response.json();
|
|
67
|
+
|
|
68
|
+
// Handle v2 envelope ({ version: 2, patterns: [...] }) or v1 flat array
|
|
69
|
+
const serializedPatterns = (data && data.version === 2 && Array.isArray(data.patterns))
|
|
70
|
+
? data.patterns
|
|
71
|
+
: Array.isArray(data) ? data : [];
|
|
67
72
|
|
|
68
73
|
// Deserialize RegExp strings back into RegExp objects
|
|
74
|
+
// Format: "/pattern/flags" — extract pattern and flags separately
|
|
69
75
|
cachedUserAgentPatterns = serializedPatterns.map((pattern) => ({
|
|
70
76
|
...pattern,
|
|
71
|
-
patterns: pattern.patterns.map((regexString) =>
|
|
77
|
+
patterns: pattern.patterns.map((regexString) => {
|
|
78
|
+
const lastSlash = regexString.lastIndexOf('/');
|
|
79
|
+
const pattern = regexString.slice(1, lastSlash);
|
|
80
|
+
const flags = regexString.slice(lastSlash + 1);
|
|
81
|
+
return new RegExp(pattern, flags);
|
|
82
|
+
})
|
|
72
83
|
}));
|
|
73
84
|
|
|
74
85
|
cacheTimestamp = now;
|
|
@@ -114,6 +125,10 @@ export async function classifyUserAgent(cfg, userAgent) {
|
|
|
114
125
|
agent: config.agent || browser,
|
|
115
126
|
usage: config.usage,
|
|
116
127
|
user_initiated: config.user_initiated,
|
|
128
|
+
purpose: config.purpose,
|
|
129
|
+
purpose_mode: config.purpose_mode,
|
|
130
|
+
vat: config.vat,
|
|
131
|
+
act: config.act,
|
|
117
132
|
browser,
|
|
118
133
|
os,
|
|
119
134
|
};
|
|
@@ -126,7 +141,9 @@ export async function classifyUserAgent(cfg, userAgent) {
|
|
|
126
141
|
|
|
127
142
|
const result = {
|
|
128
143
|
browser,
|
|
129
|
-
os
|
|
144
|
+
os,
|
|
145
|
+
vat: 'HUMAN',
|
|
146
|
+
act: 'ACT-2', // Unmatched UA with detected browser — medium confidence
|
|
130
147
|
};
|
|
131
148
|
// Cache the default classification
|
|
132
149
|
classificationCache.set(userAgent, result);
|