@paywalls-net/filter 1.3.2 → 1.3.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +18 -0
- package/package.json +1 -1
- package/src/index.js +23 -6
- package/src/user-agent-classification.js +20 -3
package/README.md
CHANGED
|
@@ -12,6 +12,24 @@ npm install @paywalls-net/filter
|
|
|
12
12
|
- `PAYWALLS_PUBLISHER_ID`: The unique identifier for the publisher using paywalls.net services.
|
|
13
13
|
- `PAYWALLS_CLOUD_API_KEY`: The API key for accessing paywalls.net services. NOTE: This key should be treated like a password and kept secret and stored in a secure secrets vault or environment variable.
|
|
14
14
|
|
|
15
|
+
## Architecture: Path Prefix Ownership
|
|
16
|
+
|
|
17
|
+
The SDK uses a **path prefix ownership strategy** for VAI (Validated Actor Inventory) endpoints. All requests to `/pw/*` are automatically proxied to the paywalls.net cloud-api service with API key authentication.
|
|
18
|
+
|
|
19
|
+
### Benefits
|
|
20
|
+
- **Version Independent**: New VAI endpoints work automatically without SDK updates
|
|
21
|
+
- **Reduced Publisher Friction**: Publishers don't need to update client code when new features are added
|
|
22
|
+
- **Future Proof**: Supports nested paths like `/pw/v2/*` or `/pw/analytics/*`
|
|
23
|
+
|
|
24
|
+
### Proxied Endpoints
|
|
25
|
+
Any request matching `/pw/*` is proxied with authentication:
|
|
26
|
+
- `/pw/vai.json` - VAI classification (JSON)
|
|
27
|
+
- `/pw/vai.js` - VAI classification (JavaScript)
|
|
28
|
+
- `/pw/jwks.json` - JSON Web Key Set for signature verification
|
|
29
|
+
- Future endpoints automatically supported
|
|
30
|
+
|
|
31
|
+
This strategy minimizes version coupling between the client SDK and the paywalls.net platform.
|
|
32
|
+
|
|
15
33
|
## Usage
|
|
16
34
|
The following is an example of using the SDK with Cloudflare Workers:
|
|
17
35
|
|
package/package.json
CHANGED
package/src/index.js
CHANGED
|
@@ -52,23 +52,36 @@ function getAllHeaders(request) {
|
|
|
52
52
|
}
|
|
53
53
|
|
|
54
54
|
/**
|
|
55
|
-
* Check if the request is for a VAI endpoint (vai.json or
|
|
55
|
+
* Check if the request is for a VAI endpoint (vai.json, vai.js, or jwks.json)
|
|
56
56
|
* @param {Request} request - The incoming request
|
|
57
57
|
* @param {string} vaiPath - The path prefix for VAI endpoints (default: '/pw')
|
|
58
58
|
* @returns {boolean} - True if this is a VAI endpoint request
|
|
59
59
|
*/
|
|
60
|
+
/**
|
|
61
|
+
* Check if request is for a VAI endpoint.
|
|
62
|
+
* Uses path prefix matching to proxy all /pw/* requests without hardcoding specific endpoints.
|
|
63
|
+
* This makes the SDK future-proof - new VAI endpoints work automatically without SDK updates.
|
|
64
|
+
*
|
|
65
|
+
* @param {Request} request - The incoming request
|
|
66
|
+
* @param {string} vaiPath - VAI path prefix (default: '/pw')
|
|
67
|
+
* @returns {boolean} - True if request should be proxied to cloud-api
|
|
68
|
+
*/
|
|
60
69
|
function isVAIRequest(request, vaiPath = '/pw') {
|
|
61
70
|
try {
|
|
62
71
|
const url = new URL(request.url || `http://host${request.uri || ''}`);
|
|
63
72
|
const pathname = url.pathname;
|
|
64
|
-
|
|
73
|
+
// Proxy everything under the VAI path prefix
|
|
74
|
+
return pathname.startsWith(`${vaiPath}/`);
|
|
65
75
|
} catch (err) {
|
|
66
76
|
return false;
|
|
67
77
|
}
|
|
68
78
|
}
|
|
69
79
|
|
|
70
80
|
/**
|
|
71
|
-
* Proxy VAI requests to the cloud-api service
|
|
81
|
+
* Proxy VAI requests to the cloud-api service.
|
|
82
|
+
* Proxies the entire request path without endpoint-specific logic,
|
|
83
|
+
* allowing new VAI endpoints to work automatically.
|
|
84
|
+
*
|
|
72
85
|
* @param {Object} cfg - Configuration object with paywallsAPIHost and paywallsAPIKey
|
|
73
86
|
* @param {Request} request - The incoming request
|
|
74
87
|
* @returns {Promise<Response>} - The proxied response from cloud-api
|
|
@@ -76,8 +89,9 @@ function isVAIRequest(request, vaiPath = '/pw') {
|
|
|
76
89
|
async function proxyVAIRequest(cfg, request) {
|
|
77
90
|
try {
|
|
78
91
|
const url = new URL(request.url || `http://host${request.uri || ''}`);
|
|
79
|
-
|
|
80
|
-
|
|
92
|
+
|
|
93
|
+
// Proxy the entire path as-is (path prefix ownership strategy)
|
|
94
|
+
const cloudApiPath = url.pathname + url.search;
|
|
81
95
|
|
|
82
96
|
// Get all request headers
|
|
83
97
|
const headers = getAllHeaders(request);
|
|
@@ -185,6 +199,8 @@ async function checkAgentStatus(cfg, request) {
|
|
|
185
199
|
account_id: cfg.paywallsPublisherId,
|
|
186
200
|
operator: agentInfo.operator,
|
|
187
201
|
agent: agentInfo.agent,
|
|
202
|
+
vat: agentInfo.vat,
|
|
203
|
+
act: agentInfo.act,
|
|
188
204
|
token: token,
|
|
189
205
|
headers: headers
|
|
190
206
|
});
|
|
@@ -248,7 +264,8 @@ function isTestBot(request) {
|
|
|
248
264
|
async function isPaywallsKnownBot(cfg, request) {
|
|
249
265
|
const userAgent = request.headers.get("User-Agent");
|
|
250
266
|
const uaClassification = await classifyUserAgent(cfg, userAgent);
|
|
251
|
-
|
|
267
|
+
// Classified as non-human by pattern match, or has known operator/agent
|
|
268
|
+
return (uaClassification.vat && uaClassification.vat !== 'HUMAN') || (uaClassification.operator && uaClassification.agent);
|
|
252
269
|
}
|
|
253
270
|
|
|
254
271
|
async function isRecognizedBot(cfg, request) {
|
|
@@ -63,12 +63,23 @@ export async function loadAgentPatterns(cfg) {
|
|
|
63
63
|
throw new Error(`Failed to fetch agent patterns: ${response.status} ${response.statusText}`);
|
|
64
64
|
}
|
|
65
65
|
|
|
66
|
-
const
|
|
66
|
+
const data = await response.json();
|
|
67
|
+
|
|
68
|
+
// Handle v2 envelope ({ version: 2, patterns: [...] }) or v1 flat array
|
|
69
|
+
const serializedPatterns = (data && data.version === 2 && Array.isArray(data.patterns))
|
|
70
|
+
? data.patterns
|
|
71
|
+
: Array.isArray(data) ? data : [];
|
|
67
72
|
|
|
68
73
|
// Deserialize RegExp strings back into RegExp objects
|
|
74
|
+
// Format: "/pattern/flags" — extract pattern and flags separately
|
|
69
75
|
cachedUserAgentPatterns = serializedPatterns.map((pattern) => ({
|
|
70
76
|
...pattern,
|
|
71
|
-
patterns: pattern.patterns.map((regexString) =>
|
|
77
|
+
patterns: pattern.patterns.map((regexString) => {
|
|
78
|
+
const lastSlash = regexString.lastIndexOf('/');
|
|
79
|
+
const pattern = regexString.slice(1, lastSlash);
|
|
80
|
+
const flags = regexString.slice(lastSlash + 1);
|
|
81
|
+
return new RegExp(pattern, flags);
|
|
82
|
+
})
|
|
72
83
|
}));
|
|
73
84
|
|
|
74
85
|
cacheTimestamp = now;
|
|
@@ -114,6 +125,10 @@ export async function classifyUserAgent(cfg, userAgent) {
|
|
|
114
125
|
agent: config.agent || browser,
|
|
115
126
|
usage: config.usage,
|
|
116
127
|
user_initiated: config.user_initiated,
|
|
128
|
+
purpose: config.purpose,
|
|
129
|
+
purpose_mode: config.purpose_mode,
|
|
130
|
+
vat: config.vat,
|
|
131
|
+
act: config.act,
|
|
117
132
|
browser,
|
|
118
133
|
os,
|
|
119
134
|
};
|
|
@@ -126,7 +141,9 @@ export async function classifyUserAgent(cfg, userAgent) {
|
|
|
126
141
|
|
|
127
142
|
const result = {
|
|
128
143
|
browser,
|
|
129
|
-
os
|
|
144
|
+
os,
|
|
145
|
+
vat: 'HUMAN',
|
|
146
|
+
act: 'ACT-3',
|
|
130
147
|
};
|
|
131
148
|
// Cache the default classification
|
|
132
149
|
classificationCache.set(userAgent, result);
|