@paywalls-net/filter 1.3.3 → 1.3.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/index.js +4 -1
- package/src/user-agent-classification.js +20 -3
package/package.json
CHANGED
package/src/index.js
CHANGED
|
@@ -199,6 +199,8 @@ async function checkAgentStatus(cfg, request) {
|
|
|
199
199
|
account_id: cfg.paywallsPublisherId,
|
|
200
200
|
operator: agentInfo.operator,
|
|
201
201
|
agent: agentInfo.agent,
|
|
202
|
+
vat: agentInfo.vat,
|
|
203
|
+
act: agentInfo.act,
|
|
202
204
|
token: token,
|
|
203
205
|
headers: headers
|
|
204
206
|
});
|
|
@@ -262,7 +264,8 @@ function isTestBot(request) {
|
|
|
262
264
|
async function isPaywallsKnownBot(cfg, request) {
|
|
263
265
|
const userAgent = request.headers.get("User-Agent");
|
|
264
266
|
const uaClassification = await classifyUserAgent(cfg, userAgent);
|
|
265
|
-
|
|
267
|
+
// Classified as non-human by pattern match, or has known operator/agent
|
|
268
|
+
return (uaClassification.vat && uaClassification.vat !== 'HUMAN') || (uaClassification.operator && uaClassification.agent);
|
|
266
269
|
}
|
|
267
270
|
|
|
268
271
|
async function isRecognizedBot(cfg, request) {
|
|
@@ -63,12 +63,23 @@ export async function loadAgentPatterns(cfg) {
|
|
|
63
63
|
throw new Error(`Failed to fetch agent patterns: ${response.status} ${response.statusText}`);
|
|
64
64
|
}
|
|
65
65
|
|
|
66
|
-
const
|
|
66
|
+
const data = await response.json();
|
|
67
|
+
|
|
68
|
+
// Handle v2 envelope ({ version: 2, patterns: [...] }) or v1 flat array
|
|
69
|
+
const serializedPatterns = (data && data.version === 2 && Array.isArray(data.patterns))
|
|
70
|
+
? data.patterns
|
|
71
|
+
: Array.isArray(data) ? data : [];
|
|
67
72
|
|
|
68
73
|
// Deserialize RegExp strings back into RegExp objects
|
|
74
|
+
// Format: "/pattern/flags" — extract pattern and flags separately
|
|
69
75
|
cachedUserAgentPatterns = serializedPatterns.map((pattern) => ({
|
|
70
76
|
...pattern,
|
|
71
|
-
patterns: pattern.patterns.map((regexString) =>
|
|
77
|
+
patterns: pattern.patterns.map((regexString) => {
|
|
78
|
+
const lastSlash = regexString.lastIndexOf('/');
|
|
79
|
+
const pattern = regexString.slice(1, lastSlash);
|
|
80
|
+
const flags = regexString.slice(lastSlash + 1);
|
|
81
|
+
return new RegExp(pattern, flags);
|
|
82
|
+
})
|
|
72
83
|
}));
|
|
73
84
|
|
|
74
85
|
cacheTimestamp = now;
|
|
@@ -114,6 +125,10 @@ export async function classifyUserAgent(cfg, userAgent) {
|
|
|
114
125
|
agent: config.agent || browser,
|
|
115
126
|
usage: config.usage,
|
|
116
127
|
user_initiated: config.user_initiated,
|
|
128
|
+
purpose: config.purpose,
|
|
129
|
+
purpose_mode: config.purpose_mode,
|
|
130
|
+
vat: config.vat,
|
|
131
|
+
act: config.act,
|
|
117
132
|
browser,
|
|
118
133
|
os,
|
|
119
134
|
};
|
|
@@ -126,7 +141,9 @@ export async function classifyUserAgent(cfg, userAgent) {
|
|
|
126
141
|
|
|
127
142
|
const result = {
|
|
128
143
|
browser,
|
|
129
|
-
os
|
|
144
|
+
os,
|
|
145
|
+
vat: 'HUMAN',
|
|
146
|
+
act: 'ACT-3',
|
|
130
147
|
};
|
|
131
148
|
// Cache the default classification
|
|
132
149
|
classificationCache.set(userAgent, result);
|