webpeel 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +415 -0
- package/dist/cli.d.ts +16 -0
- package/dist/cli.d.ts.map +1 -0
- package/dist/cli.js +140 -0
- package/dist/cli.js.map +1 -0
- package/dist/core/fetcher.d.ts +32 -0
- package/dist/core/fetcher.d.ts.map +1 -0
- package/dist/core/fetcher.js +479 -0
- package/dist/core/fetcher.js.map +1 -0
- package/dist/core/markdown.d.ts +17 -0
- package/dist/core/markdown.d.ts.map +1 -0
- package/dist/core/markdown.js +143 -0
- package/dist/core/markdown.js.map +1 -0
- package/dist/core/metadata.d.ts +17 -0
- package/dist/core/metadata.d.ts.map +1 -0
- package/dist/core/metadata.js +159 -0
- package/dist/core/metadata.js.map +1 -0
- package/dist/core/strategies.d.ts +30 -0
- package/dist/core/strategies.d.ts.map +1 -0
- package/dist/core/strategies.js +67 -0
- package/dist/core/strategies.js.map +1 -0
- package/dist/index.d.ts +31 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +81 -0
- package/dist/index.js.map +1 -0
- package/dist/mcp/server.d.ts +7 -0
- package/dist/mcp/server.d.ts.map +1 -0
- package/dist/mcp/server.js +248 -0
- package/dist/mcp/server.js.map +1 -0
- package/dist/server/app.d.ts +13 -0
- package/dist/server/app.d.ts.map +1 -0
- package/dist/server/app.js +89 -0
- package/dist/server/app.js.map +1 -0
- package/dist/server/auth-store.d.ts +28 -0
- package/dist/server/auth-store.d.ts.map +1 -0
- package/dist/server/auth-store.js +87 -0
- package/dist/server/auth-store.js.map +1 -0
- package/dist/server/middleware/auth.d.ts +18 -0
- package/dist/server/middleware/auth.d.ts.map +1 -0
- package/dist/server/middleware/auth.js +55 -0
- package/dist/server/middleware/auth.js.map +1 -0
- package/dist/server/middleware/rate-limit.d.ts +23 -0
- package/dist/server/middleware/rate-limit.d.ts.map +1 -0
- package/dist/server/middleware/rate-limit.js +85 -0
- package/dist/server/middleware/rate-limit.js.map +1 -0
- package/dist/server/routes/fetch.d.ts +7 -0
- package/dist/server/routes/fetch.d.ts.map +1 -0
- package/dist/server/routes/fetch.js +127 -0
- package/dist/server/routes/fetch.js.map +1 -0
- package/dist/server/routes/health.d.ts +6 -0
- package/dist/server/routes/health.d.ts.map +1 -0
- package/dist/server/routes/health.js +19 -0
- package/dist/server/routes/health.js.map +1 -0
- package/dist/server/routes/search.d.ts +7 -0
- package/dist/server/routes/search.d.ts.map +1 -0
- package/dist/server/routes/search.js +124 -0
- package/dist/server/routes/search.js.map +1 -0
- package/dist/types.d.ts +59 -0
- package/dist/types.d.ts.map +1 -0
- package/dist/types.js +30 -0
- package/dist/types.js.map +1 -0
- package/llms.txt +60 -0
- package/package.json +80 -0
|
@@ -0,0 +1,85 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Sliding window rate limiting middleware
|
|
3
|
+
*/
|
|
4
|
+
export class RateLimiter {
|
|
5
|
+
store = new Map();
|
|
6
|
+
windowMs;
|
|
7
|
+
constructor(windowMs = 60000) {
|
|
8
|
+
this.windowMs = windowMs;
|
|
9
|
+
}
|
|
10
|
+
/**
|
|
11
|
+
* Check if request is allowed under rate limit
|
|
12
|
+
*/
|
|
13
|
+
checkLimit(identifier, limit) {
|
|
14
|
+
const now = Date.now();
|
|
15
|
+
const windowStart = now - this.windowMs;
|
|
16
|
+
// Get or create entry
|
|
17
|
+
let entry = this.store.get(identifier);
|
|
18
|
+
if (!entry) {
|
|
19
|
+
entry = { timestamps: [] };
|
|
20
|
+
this.store.set(identifier, entry);
|
|
21
|
+
}
|
|
22
|
+
// Remove timestamps outside the window
|
|
23
|
+
entry.timestamps = entry.timestamps.filter(ts => ts > windowStart);
|
|
24
|
+
// Check if limit exceeded
|
|
25
|
+
if (entry.timestamps.length >= limit) {
|
|
26
|
+
const oldestTimestamp = entry.timestamps[0];
|
|
27
|
+
const retryAfter = Math.ceil((oldestTimestamp + this.windowMs - now) / 1000);
|
|
28
|
+
return {
|
|
29
|
+
allowed: false,
|
|
30
|
+
remaining: 0,
|
|
31
|
+
retryAfter,
|
|
32
|
+
};
|
|
33
|
+
}
|
|
34
|
+
// Add current timestamp
|
|
35
|
+
entry.timestamps.push(now);
|
|
36
|
+
return {
|
|
37
|
+
allowed: true,
|
|
38
|
+
remaining: limit - entry.timestamps.length,
|
|
39
|
+
};
|
|
40
|
+
}
|
|
41
|
+
/**
|
|
42
|
+
* Clean up old entries (call periodically)
|
|
43
|
+
*/
|
|
44
|
+
cleanup() {
|
|
45
|
+
const now = Date.now();
|
|
46
|
+
const windowStart = now - this.windowMs;
|
|
47
|
+
for (const [identifier, entry] of this.store.entries()) {
|
|
48
|
+
entry.timestamps = entry.timestamps.filter(ts => ts > windowStart);
|
|
49
|
+
if (entry.timestamps.length === 0) {
|
|
50
|
+
this.store.delete(identifier);
|
|
51
|
+
}
|
|
52
|
+
}
|
|
53
|
+
}
|
|
54
|
+
}
|
|
55
|
+
export function createRateLimitMiddleware(limiter) {
|
|
56
|
+
return (req, res, next) => {
|
|
57
|
+
try {
|
|
58
|
+
// Use API key or IP address as identifier
|
|
59
|
+
const identifier = req.auth?.keyInfo?.key || req.ip || 'unknown';
|
|
60
|
+
const limit = req.auth?.rateLimit || 10;
|
|
61
|
+
const result = limiter.checkLimit(identifier, limit);
|
|
62
|
+
// Set rate limit headers
|
|
63
|
+
res.setHeader('X-RateLimit-Limit', limit.toString());
|
|
64
|
+
res.setHeader('X-RateLimit-Remaining', result.remaining.toString());
|
|
65
|
+
if (!result.allowed) {
|
|
66
|
+
res.setHeader('Retry-After', result.retryAfter.toString());
|
|
67
|
+
res.status(429).json({
|
|
68
|
+
error: 'rate_limited',
|
|
69
|
+
message: 'Rate limit exceeded',
|
|
70
|
+
retryAfter: result.retryAfter,
|
|
71
|
+
});
|
|
72
|
+
return;
|
|
73
|
+
}
|
|
74
|
+
next();
|
|
75
|
+
}
|
|
76
|
+
catch (error) {
|
|
77
|
+
const err = error;
|
|
78
|
+
res.status(500).json({
|
|
79
|
+
error: 'rate_limit_error',
|
|
80
|
+
message: err.message || 'Rate limiting failed',
|
|
81
|
+
});
|
|
82
|
+
}
|
|
83
|
+
};
|
|
84
|
+
}
|
|
85
|
+
//# sourceMappingURL=rate-limit.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"rate-limit.js","sourceRoot":"","sources":["../../../src/server/middleware/rate-limit.ts"],"names":[],"mappings":"AAAA;;GAEG;AAQH,MAAM,OAAO,WAAW;IACd,KAAK,GAAG,IAAI,GAAG,EAA0B,CAAC;IAC1C,QAAQ,CAAS;IAEzB,YAAY,WAAmB,KAAK;QAClC,IAAI,CAAC,QAAQ,GAAG,QAAQ,CAAC;IAC3B,CAAC;IAED;;OAEG;IACH,UAAU,CAAC,UAAkB,EAAE,KAAa;QAK1C,MAAM,GAAG,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;QACvB,MAAM,WAAW,GAAG,GAAG,GAAG,IAAI,CAAC,QAAQ,CAAC;QAExC,sBAAsB;QACtB,IAAI,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,UAAU,CAAC,CAAC;QACvC,IAAI,CAAC,KAAK,EAAE,CAAC;YACX,KAAK,GAAG,EAAE,UAAU,EAAE,EAAE,EAAE,CAAC;YAC3B,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,UAAU,EAAE,KAAK,CAAC,CAAC;QACpC,CAAC;QAED,uCAAuC;QACvC,KAAK,CAAC,UAAU,GAAG,KAAK,CAAC,UAAU,CAAC,MAAM,CAAC,EAAE,CAAC,EAAE,CAAC,EAAE,GAAG,WAAW,CAAC,CAAC;QAEnE,0BAA0B;QAC1B,IAAI,KAAK,CAAC,UAAU,CAAC,MAAM,IAAI,KAAK,EAAE,CAAC;YACrC,MAAM,eAAe,GAAG,KAAK,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC;YAC5C,MAAM,UAAU,GAAG,IAAI,CAAC,IAAI,CAAC,CAAC,eAAe,GAAG,IAAI,CAAC,QAAQ,GAAG,GAAG,CAAC,GAAG,IAAI,CAAC,CAAC;YAE7E,OAAO;gBACL,OAAO,EAAE,KAAK;gBACd,SAAS,EAAE,CAAC;gBACZ,UAAU;aACX,CAAC;QACJ,CAAC;QAED,wBAAwB;QACxB,KAAK,CAAC,UAAU,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;QAE3B,OAAO;YACL,OAAO,EAAE,IAAI;YACb,SAAS,EAAE,KAAK,GAAG,KAAK,CAAC,UAAU,CAAC,MAAM;SAC3C,CAAC;IACJ,CAAC;IAED;;OAEG;IACH,OAAO;QACL,MAAM,GAAG,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;QACvB,MAAM,WAAW,GAAG,GAAG,GAAG,IAAI,CAAC,QAAQ,CAAC;QAExC,KAAK,MAAM,CAAC,UAAU,EAAE,KAAK,CAAC,IAAI,IAAI,CAAC,KAAK,CAAC,OAAO,EAAE,EAAE,CAAC;YACvD,KAAK,CAAC,UAAU,GAAG,KAAK,CAAC,UAAU,CAAC,MAAM,CAAC,EAAE,CAAC,EAAE,CAAC,EAAE,GAAG,WAAW,CAAC,CAAC;YACnE,IAAI,KAAK,CAAC,UAAU,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;gBAClC,IAAI,CAAC,KAAK,CAAC,MAAM,CAAC,UAAU,CAAC,CAAC;YAChC,CAAC;QACH,CAAC;IACH,CAAC;CACF;AAED,MAAM,UAAU,yBAAyB,CAAC,OAAoB;IAC5D,OAAO,CAAC,GAAY,EAAE,GAAa,EAAE,IAAkB,EAAE,EAAE;QACzD,IAAI,CAAC;YACH,0CAA0C;YAC1C,MAAM,UAAU,GAAG,GAAG,CAAC,IAAI,EAAE,OAAO,EAAE,GAAG,IAAI,GAAG,CAAC,EAAE,IAAI,SAAS,CAAC;YACjE,MAAM,KAAK,GAAG,GAAG,CAAC,IAAI,EAAE,SAAS,IAAI,EAAE,CAAC;YAExC,MAAM,MAAM,GAAG,OAAO,CAAC,UAAU,CAAC,UAAU,EAAE,KAAK,CAAC,CAAC;YAErD,yBAAyB;YACzB,GAAG,CAAC,SAAS,CAAC,mBAAmB,EAAE,KAAK,CAAC,QAAQ,EAAE,CAAC,CAAC;YACrD,GAAG,CAAC,SAAS,CAAC,uBAAuB,EAAE,MAAM,CAAC,SAAS,CAAC,QAAQ,EAAE,CAAC,CAAC;YAEpE,IAAI,CAAC,MAAM,CAAC,OAAO,EAAE,CAAC;gBACpB,GAAG,CAAC,SAAS,CAAC,aAAa,EAAE,MAAM,CAAC,UAAW,CAAC,QAAQ,EAAE,CAAC,CAAC;gBAC5D,GAAG,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC,IAAI,CAAC;oBACnB,KAAK,EAAE,cAAc;oBACrB,OAAO,EAAE,qBAAqB;oBAC9B,UAAU,EAAE,MAAM,CAAC,UAAU;iBAC9B,CAAC,CAAC;gBACH,OAAO;YACT,CAAC;YAED,IAAI,EAAE,CAAC;QACT,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,MAAM,GAAG,GAAG,KAAc,CAAC;YAC3B,GAAG,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC,IAAI,CAAC;gBACnB,KAAK,EAAE,kBAAkB;gBACzB,OAAO,EAAE,GAAG,CAAC,OAAO,IAAI,sBAAsB;aAC/C,CAAC,CAAC;QACL,CAAC;IACH,CAAC,CAAC;AACJ,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"fetch.d.ts","sourceRoot":"","sources":["../../../src/server/routes/fetch.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,EAAE,MAAM,EAAqB,MAAM,SAAS,CAAC;AAIpD,OAAO,EAAE,SAAS,EAAE,MAAM,kBAAkB,CAAC;AAO7C,wBAAgB,iBAAiB,CAAC,SAAS,EAAE,SAAS,GAAG,MAAM,CAsI9D"}
|
|
@@ -0,0 +1,127 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Fetch endpoint with caching
|
|
3
|
+
*/
|
|
4
|
+
import { Router } from 'express';
|
|
5
|
+
import { peel } from '../../index.js';
|
|
6
|
+
import { LRUCache } from 'lru-cache';
|
|
7
|
+
export function createFetchRouter(authStore) {
|
|
8
|
+
const router = Router();
|
|
9
|
+
// LRU cache: 5 minute TTL, max 1000 entries, 100MB total size
|
|
10
|
+
const cache = new LRUCache({
|
|
11
|
+
max: 1000,
|
|
12
|
+
ttl: 5 * 60 * 1000, // 5 minutes
|
|
13
|
+
maxSize: 100 * 1024 * 1024, // 100MB
|
|
14
|
+
sizeCalculation: (entry) => {
|
|
15
|
+
return JSON.stringify(entry).length;
|
|
16
|
+
},
|
|
17
|
+
});
|
|
18
|
+
router.get('/v1/fetch', async (req, res) => {
|
|
19
|
+
try {
|
|
20
|
+
const { url, render, wait, format } = req.query;
|
|
21
|
+
// Validate URL parameter
|
|
22
|
+
if (!url || typeof url !== 'string') {
|
|
23
|
+
res.status(400).json({
|
|
24
|
+
error: 'invalid_request',
|
|
25
|
+
message: 'Missing or invalid "url" parameter',
|
|
26
|
+
});
|
|
27
|
+
return;
|
|
28
|
+
}
|
|
29
|
+
// SECURITY: Validate URL format and length
|
|
30
|
+
if (url.length > 2048) {
|
|
31
|
+
res.status(400).json({
|
|
32
|
+
error: 'invalid_url',
|
|
33
|
+
message: 'URL too long (max 2048 characters)',
|
|
34
|
+
});
|
|
35
|
+
return;
|
|
36
|
+
}
|
|
37
|
+
try {
|
|
38
|
+
const parsed = new URL(url);
|
|
39
|
+
// Normalize URL for consistent caching
|
|
40
|
+
const normalizedUrl = parsed.href;
|
|
41
|
+
// Use normalized URL for cache key
|
|
42
|
+
if (normalizedUrl !== url) {
|
|
43
|
+
// URL was normalized, update for caching
|
|
44
|
+
}
|
|
45
|
+
}
|
|
46
|
+
catch {
|
|
47
|
+
res.status(400).json({
|
|
48
|
+
error: 'invalid_url',
|
|
49
|
+
message: 'Invalid URL format',
|
|
50
|
+
});
|
|
51
|
+
return;
|
|
52
|
+
}
|
|
53
|
+
// Build cache key
|
|
54
|
+
const cacheKey = `fetch:${url}:${render}:${wait}:${format}`;
|
|
55
|
+
// Check cache
|
|
56
|
+
const cached = cache.get(cacheKey);
|
|
57
|
+
if (cached) {
|
|
58
|
+
res.setHeader('X-Cache', 'HIT');
|
|
59
|
+
res.setHeader('X-Cache-Age', Math.floor((Date.now() - cached.timestamp) / 1000).toString());
|
|
60
|
+
res.json(cached.result);
|
|
61
|
+
return;
|
|
62
|
+
}
|
|
63
|
+
// Parse options
|
|
64
|
+
const options = {
|
|
65
|
+
render: render === 'true',
|
|
66
|
+
wait: wait ? parseInt(wait, 10) : undefined,
|
|
67
|
+
format: format || 'markdown',
|
|
68
|
+
};
|
|
69
|
+
// Validate wait parameter
|
|
70
|
+
if (options.wait !== undefined && (isNaN(options.wait) || options.wait < 0 || options.wait > 60000)) {
|
|
71
|
+
res.status(400).json({
|
|
72
|
+
error: 'invalid_request',
|
|
73
|
+
message: 'Invalid "wait" parameter: must be between 0 and 60000ms',
|
|
74
|
+
});
|
|
75
|
+
return;
|
|
76
|
+
}
|
|
77
|
+
// Validate format parameter
|
|
78
|
+
if (!['markdown', 'text', 'html'].includes(options.format || '')) {
|
|
79
|
+
res.status(400).json({
|
|
80
|
+
error: 'invalid_request',
|
|
81
|
+
message: 'Invalid "format" parameter: must be "markdown", "text", or "html"',
|
|
82
|
+
});
|
|
83
|
+
return;
|
|
84
|
+
}
|
|
85
|
+
// Fetch content
|
|
86
|
+
const startTime = Date.now();
|
|
87
|
+
const result = await peel(url, options);
|
|
88
|
+
const elapsed = Date.now() - startTime;
|
|
89
|
+
// Track usage (1 credit per fetch)
|
|
90
|
+
if (req.auth?.keyInfo?.key) {
|
|
91
|
+
await authStore.trackUsage(req.auth.keyInfo.key, 1);
|
|
92
|
+
}
|
|
93
|
+
// Cache result
|
|
94
|
+
cache.set(cacheKey, {
|
|
95
|
+
result,
|
|
96
|
+
timestamp: Date.now(),
|
|
97
|
+
});
|
|
98
|
+
// Add usage headers
|
|
99
|
+
res.setHeader('X-Cache', 'MISS');
|
|
100
|
+
res.setHeader('X-Credits-Used', '1');
|
|
101
|
+
res.setHeader('X-Processing-Time', elapsed.toString());
|
|
102
|
+
res.json(result);
|
|
103
|
+
}
|
|
104
|
+
catch (error) {
|
|
105
|
+
const err = error;
|
|
106
|
+
// SECURITY: Sanitize error messages to prevent information disclosure
|
|
107
|
+
if (err.code) {
|
|
108
|
+
// WebPeelError from core library - safe to expose
|
|
109
|
+
const safeMessage = err.message.replace(/[<>"']/g, ''); // Remove HTML chars
|
|
110
|
+
res.status(500).json({
|
|
111
|
+
error: err.code,
|
|
112
|
+
message: safeMessage,
|
|
113
|
+
});
|
|
114
|
+
}
|
|
115
|
+
else {
|
|
116
|
+
// Unexpected error - generic message only
|
|
117
|
+
console.error('Fetch error:', err); // Log full error server-side
|
|
118
|
+
res.status(500).json({
|
|
119
|
+
error: 'internal_error',
|
|
120
|
+
message: 'An unexpected error occurred while fetching the URL',
|
|
121
|
+
});
|
|
122
|
+
}
|
|
123
|
+
}
|
|
124
|
+
});
|
|
125
|
+
return router;
|
|
126
|
+
}
|
|
127
|
+
//# sourceMappingURL=fetch.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"fetch.js","sourceRoot":"","sources":["../../../src/server/routes/fetch.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,EAAE,MAAM,EAAqB,MAAM,SAAS,CAAC;AACpD,OAAO,EAAE,IAAI,EAAE,MAAM,gBAAgB,CAAC;AAEtC,OAAO,EAAE,QAAQ,EAAE,MAAM,WAAW,CAAC;AAQrC,MAAM,UAAU,iBAAiB,CAAC,SAAoB;IACpD,MAAM,MAAM,GAAG,MAAM,EAAE,CAAC;IAExB,8DAA8D;IAC9D,MAAM,KAAK,GAAG,IAAI,QAAQ,CAAqB;QAC7C,GAAG,EAAE,IAAI;QACT,GAAG,EAAE,CAAC,GAAG,EAAE,GAAG,IAAI,EAAE,YAAY;QAChC,OAAO,EAAE,GAAG,GAAG,IAAI,GAAG,IAAI,EAAE,QAAQ;QACpC,eAAe,EAAE,CAAC,KAAK,EAAE,EAAE;YACzB,OAAO,IAAI,CAAC,SAAS,CAAC,KAAK,CAAC,CAAC,MAAM,CAAC;QACtC,CAAC;KACF,CAAC,CAAC;IAEH,MAAM,CAAC,GAAG,CAAC,WAAW,EAAE,KAAK,EAAE,GAAY,EAAE,GAAa,EAAE,EAAE;QAC5D,IAAI,CAAC;YACH,MAAM,EAAE,GAAG,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,EAAE,GAAG,GAAG,CAAC,KAAK,CAAC;YAEhD,yBAAyB;YACzB,IAAI,CAAC,GAAG,IAAI,OAAO,GAAG,KAAK,QAAQ,EAAE,CAAC;gBACpC,GAAG,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC,IAAI,CAAC;oBACnB,KAAK,EAAE,iBAAiB;oBACxB,OAAO,EAAE,oCAAoC;iBAC9C,CAAC,CAAC;gBACH,OAAO;YACT,CAAC;YAED,2CAA2C;YAC3C,IAAI,GAAG,CAAC,MAAM,GAAG,IAAI,EAAE,CAAC;gBACtB,GAAG,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC,IAAI,CAAC;oBACnB,KAAK,EAAE,aAAa;oBACpB,OAAO,EAAE,oCAAoC;iBAC9C,CAAC,CAAC;gBACH,OAAO;YACT,CAAC;YAED,IAAI,CAAC;gBACH,MAAM,MAAM,GAAG,IAAI,GAAG,CAAC,GAAG,CAAC,CAAC;gBAC5B,uCAAuC;gBACvC,MAAM,aAAa,GAAG,MAAM,CAAC,IAAI,CAAC;gBAElC,mCAAmC;gBACnC,IAAI,aAAa,KAAK,GAAG,EAAE,CAAC;oBAC1B,yCAAyC;gBAC3C,CAAC;YACH,CAAC;YAAC,MAAM,CAAC;gBACP,GAAG,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC,IAAI,CAAC;oBACnB,KAAK,EAAE,aAAa;oBACpB,OAAO,EAAE,oBAAoB;iBAC9B,CAAC,CAAC;gBACH,OAAO;YACT,CAAC;YAED,kBAAkB;YAClB,MAAM,QAAQ,GAAG,SAAS,GAAG,IAAI,MAAM,IAAI,IAAI,IAAI,MAAM,EAAE,CAAC;YAE5D,cAAc;YACd,MAAM,MAAM,GAAG,KAAK,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAC;YACnC,IAAI,MAAM,EAAE,CAAC;gBACX,GAAG,CAAC,SAAS,CAAC,SAAS,EAAE,KAAK,CAAC,CAAC;gBAChC,GAAG,CAAC,SAAS,CAAC,aAAa,EAAE,IAAI,CAAC,KAAK,CAAC,CAAC,IAAI,CAAC,GAAG,EAAE,GAAG,MAAM,CAAC,SAAS,CAAC,GAAG,IAAI,CAAC,CAAC,QAAQ,EAAE,CAAC,CAAC;gBAC5F,GAAG,CAAC,IAAI,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC;gBACxB,OAAO;YACT,CAAC;YAED,gBAAgB;YAChB,MAAM,OAAO,GAAgB;gBAC3B,MAAM,EAAE,MAAM,KAAK,MAAM;gBACzB,IAAI,EAAE,IAAI,CAAC,CAAC,CAAC,QAAQ,CAAC,IAAc,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,SAAS;gBACrD,MAAM,EAAG,MAAuC,IAAI,UAAU;aAC/D,CAAC;YAEF,0BAA0B;YAC1B,IAAI,OAAO,CAAC,IAAI,KAAK,SAAS,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,IAAI,CAAC,IAAI,OAAO,CAAC,IAAI,GAAG,CAAC,IAAI,OAAO,CAAC,IAAI,GAAG,KAAK,CAAC,EAAE,CAAC;gBACpG,GAAG,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC,IAAI,CAAC;oBACnB,KAAK,EAAE,iBAAiB;oBACxB,OAAO,EAAE,yDAAyD;iBACnE,CAAC,CAAC;gBACH,OAAO;YACT,CAAC;YAED,4BAA4B;YAC5B,IAAI,CAAC,CAAC,UAAU,EAAE,MAAM,EAAE,MAAM,CAAC,CAAC,QAAQ,CAAC,OAAO,CAAC,MAAM,IAAI,EAAE,CAAC,EAAE,CAAC;gBACjE,GAAG,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC,IAAI,CAAC;oBACnB,KAAK,EAAE,iBAAiB;oBACxB,OAAO,EAAE,mEAAmE;iBAC7E,CAAC,CAAC;gBACH,OAAO;YACT,CAAC;YAED,gBAAgB;YAChB,MAAM,SAAS,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;YAC7B,MAAM,MAAM,GAAG,MAAM,IAAI,CAAC,GAAG,EAAE,OAAO,CAAC,CAAC;YACxC,MAAM,OAAO,GAAG,IAAI,CAAC,GAAG,EAAE,GAAG,SAAS,CAAC;YAEvC,mCAAmC;YACnC,IAAI,GAAG,CAAC,IAAI,EAAE,OAAO,EAAE,GAAG,EAAE,CAAC;gBAC3B,MAAM,SAAS,CAAC,UAAU,CAAC,GAAG,CAAC,IAAI,CAAC,OAAO,CAAC,GAAG,EAAE,CAAC,CAAC,CAAC;YACtD,CAAC;YAED,eAAe;YACf,KAAK,CAAC,GAAG,CAAC,QAAQ,EAAE;gBAClB,MAAM;gBACN,SAAS,EAAE,IAAI,CAAC,GAAG,EAAE;aACtB,CAAC,CAAC;YAEH,oBAAoB;YACpB,GAAG,CAAC,SAAS,CAAC,SAAS,EAAE,MAAM,CAAC,CAAC;YACjC,GAAG,CAAC,SAAS,CAAC,gBAAgB,EAAE,GAAG,CAAC,CAAC;YACrC,GAAG,CAAC,SAAS,CAAC,mBAAmB,EAAE,OAAO,CAAC,QAAQ,EAAE,CAAC,CAAC;YAEvD,GAAG,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;QACnB,CAAC;QAAC,OAAO,KAAU,EAAE,CAAC;YACpB,MAAM,GAAG,GAAG,KAAY,CAAC;YAEzB,sEAAsE;YACtE,IAAI,GAAG,CAAC,IAAI,EAAE,CAAC;gBACb,kDAAkD;gBAClD,MAAM,WAAW,GAAG,GAAG,CAAC,OAAO,CAAC,OAAO,CAAC,SAAS,EAAE,EAAE,CAAC,CAAC,CAAC,oBAAoB;gBAC5E,GAAG,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC,IAAI,CAAC;oBACnB,KAAK,EAAE,GAAG,CAAC,IAAI;oBACf,OAAO,EAAE,WAAW;iBACrB,CAAC,CAAC;YACL,CAAC;iBAAM,CAAC;gBACN,0CAA0C;gBAC1C,OAAO,CAAC,KAAK,CAAC,cAAc,EAAE,GAAG,CAAC,CAAC,CAAC,6BAA6B;gBACjE,GAAG,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC,IAAI,CAAC;oBACnB,KAAK,EAAE,gBAAgB;oBACvB,OAAO,EAAE,qDAAqD;iBAC/D,CAAC,CAAC;YACL,CAAC;QACH,CAAC;IACH,CAAC,CAAC,CAAC;IAEH,OAAO,MAAM,CAAC;AAChB,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"health.d.ts","sourceRoot":"","sources":["../../../src/server/routes/health.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,EAAE,MAAM,EAAqB,MAAM,SAAS,CAAC;AAIpD,wBAAgB,kBAAkB,IAAI,MAAM,CAe3C"}
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Health check endpoint
|
|
3
|
+
*/
|
|
4
|
+
import { Router } from 'express';
|
|
5
|
+
const startTime = Date.now();
|
|
6
|
+
export function createHealthRouter() {
|
|
7
|
+
const router = Router();
|
|
8
|
+
router.get('/health', (_req, res) => {
|
|
9
|
+
const uptime = Math.floor((Date.now() - startTime) / 1000);
|
|
10
|
+
res.json({
|
|
11
|
+
status: 'healthy',
|
|
12
|
+
version: process.env.npm_package_version || '1.0.0',
|
|
13
|
+
uptime,
|
|
14
|
+
timestamp: new Date().toISOString(),
|
|
15
|
+
});
|
|
16
|
+
});
|
|
17
|
+
return router;
|
|
18
|
+
}
|
|
19
|
+
//# sourceMappingURL=health.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"health.js","sourceRoot":"","sources":["../../../src/server/routes/health.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,EAAE,MAAM,EAAqB,MAAM,SAAS,CAAC;AAEpD,MAAM,SAAS,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;AAE7B,MAAM,UAAU,kBAAkB;IAChC,MAAM,MAAM,GAAG,MAAM,EAAE,CAAC;IAExB,MAAM,CAAC,GAAG,CAAC,SAAS,EAAE,CAAC,IAAa,EAAE,GAAa,EAAE,EAAE;QACrD,MAAM,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,CAAC,IAAI,CAAC,GAAG,EAAE,GAAG,SAAS,CAAC,GAAG,IAAI,CAAC,CAAC;QAE3D,GAAG,CAAC,IAAI,CAAC;YACP,MAAM,EAAE,SAAS;YACjB,OAAO,EAAE,OAAO,CAAC,GAAG,CAAC,mBAAmB,IAAI,OAAO;YACnD,MAAM;YACN,SAAS,EAAE,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE;SACpC,CAAC,CAAC;IACL,CAAC,CAAC,CAAC;IAEH,OAAO,MAAM,CAAC;AAChB,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"search.d.ts","sourceRoot":"","sources":["../../../src/server/routes/search.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,EAAE,MAAM,EAAqB,MAAM,SAAS,CAAC;AAIpD,OAAO,EAAE,SAAS,EAAE,MAAM,kBAAkB,CAAC;AAa7C,wBAAgB,kBAAkB,CAAC,SAAS,EAAE,SAAS,GAAG,MAAM,CAsI/D"}
|
|
@@ -0,0 +1,124 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Search endpoint with caching
|
|
3
|
+
*/
|
|
4
|
+
import { Router } from 'express';
|
|
5
|
+
import { fetch as undiciFetch } from 'undici';
|
|
6
|
+
import { load } from 'cheerio';
|
|
7
|
+
import { LRUCache } from 'lru-cache';
|
|
8
|
+
export function createSearchRouter(authStore) {
|
|
9
|
+
const router = Router();
|
|
10
|
+
// LRU cache: 15 minute TTL, max 500 entries, 50MB total size
|
|
11
|
+
const cache = new LRUCache({
|
|
12
|
+
max: 500,
|
|
13
|
+
ttl: 15 * 60 * 1000, // 15 minutes
|
|
14
|
+
maxSize: 50 * 1024 * 1024, // 50MB
|
|
15
|
+
sizeCalculation: (entry) => {
|
|
16
|
+
return JSON.stringify(entry).length;
|
|
17
|
+
},
|
|
18
|
+
});
|
|
19
|
+
router.get('/v1/search', async (req, res) => {
|
|
20
|
+
try {
|
|
21
|
+
const { q, count } = req.query;
|
|
22
|
+
// Validate query parameter
|
|
23
|
+
if (!q || typeof q !== 'string') {
|
|
24
|
+
res.status(400).json({
|
|
25
|
+
error: 'invalid_request',
|
|
26
|
+
message: 'Missing or invalid "q" parameter',
|
|
27
|
+
});
|
|
28
|
+
return;
|
|
29
|
+
}
|
|
30
|
+
// Parse and validate count
|
|
31
|
+
const resultCount = count ? parseInt(count, 10) : 5;
|
|
32
|
+
if (isNaN(resultCount) || resultCount < 1 || resultCount > 10) {
|
|
33
|
+
res.status(400).json({
|
|
34
|
+
error: 'invalid_request',
|
|
35
|
+
message: 'Invalid "count" parameter: must be between 1 and 10',
|
|
36
|
+
});
|
|
37
|
+
return;
|
|
38
|
+
}
|
|
39
|
+
// Build cache key
|
|
40
|
+
const cacheKey = `search:${q}:${resultCount}`;
|
|
41
|
+
// Check cache
|
|
42
|
+
const cached = cache.get(cacheKey);
|
|
43
|
+
if (cached) {
|
|
44
|
+
res.setHeader('X-Cache', 'HIT');
|
|
45
|
+
res.setHeader('X-Cache-Age', Math.floor((Date.now() - cached.timestamp) / 1000).toString());
|
|
46
|
+
res.json({
|
|
47
|
+
query: q,
|
|
48
|
+
count: cached.results.length,
|
|
49
|
+
results: cached.results,
|
|
50
|
+
});
|
|
51
|
+
return;
|
|
52
|
+
}
|
|
53
|
+
// Perform search
|
|
54
|
+
const searchUrl = `https://html.duckduckgo.com/html/?q=${encodeURIComponent(q)}`;
|
|
55
|
+
const startTime = Date.now();
|
|
56
|
+
const response = await undiciFetch(searchUrl, {
|
|
57
|
+
headers: {
|
|
58
|
+
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36',
|
|
59
|
+
},
|
|
60
|
+
});
|
|
61
|
+
if (!response.ok) {
|
|
62
|
+
throw new Error(`Search failed: HTTP ${response.status}`);
|
|
63
|
+
}
|
|
64
|
+
const html = await response.text();
|
|
65
|
+
const $ = load(html);
|
|
66
|
+
const results = [];
|
|
67
|
+
$('.result').each((_i, elem) => {
|
|
68
|
+
if (results.length >= resultCount)
|
|
69
|
+
return;
|
|
70
|
+
const $result = $(elem);
|
|
71
|
+
let title = $result.find('.result__title').text().trim();
|
|
72
|
+
let url = $result.find('.result__url').attr('href') || '';
|
|
73
|
+
let snippet = $result.find('.result__snippet').text().trim();
|
|
74
|
+
// SECURITY: Validate and sanitize results
|
|
75
|
+
if (!title || !url)
|
|
76
|
+
return;
|
|
77
|
+
// Only allow HTTP/HTTPS URLs
|
|
78
|
+
try {
|
|
79
|
+
const parsed = new URL(url);
|
|
80
|
+
if (!['http:', 'https:'].includes(parsed.protocol)) {
|
|
81
|
+
return;
|
|
82
|
+
}
|
|
83
|
+
}
|
|
84
|
+
catch {
|
|
85
|
+
return;
|
|
86
|
+
}
|
|
87
|
+
// Limit text lengths to prevent bloat
|
|
88
|
+
title = title.slice(0, 200);
|
|
89
|
+
snippet = snippet.slice(0, 500);
|
|
90
|
+
results.push({ title, url, snippet });
|
|
91
|
+
});
|
|
92
|
+
const elapsed = Date.now() - startTime;
|
|
93
|
+
// Track usage (1 credit per search)
|
|
94
|
+
if (req.auth?.keyInfo?.key) {
|
|
95
|
+
await authStore.trackUsage(req.auth.keyInfo.key, 1);
|
|
96
|
+
}
|
|
97
|
+
// Cache results
|
|
98
|
+
cache.set(cacheKey, {
|
|
99
|
+
results,
|
|
100
|
+
timestamp: Date.now(),
|
|
101
|
+
});
|
|
102
|
+
// Add headers
|
|
103
|
+
res.setHeader('X-Cache', 'MISS');
|
|
104
|
+
res.setHeader('X-Credits-Used', '1');
|
|
105
|
+
res.setHeader('X-Processing-Time', elapsed.toString());
|
|
106
|
+
res.json({
|
|
107
|
+
query: q,
|
|
108
|
+
count: results.length,
|
|
109
|
+
results,
|
|
110
|
+
});
|
|
111
|
+
}
|
|
112
|
+
catch (error) {
|
|
113
|
+
const err = error;
|
|
114
|
+
// SECURITY: Generic error message to prevent information disclosure
|
|
115
|
+
console.error('Search error:', err); // Log full error server-side
|
|
116
|
+
res.status(500).json({
|
|
117
|
+
error: 'search_failed',
|
|
118
|
+
message: 'Search request failed. Please try again.',
|
|
119
|
+
});
|
|
120
|
+
}
|
|
121
|
+
});
|
|
122
|
+
return router;
|
|
123
|
+
}
|
|
124
|
+
//# sourceMappingURL=search.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"search.js","sourceRoot":"","sources":["../../../src/server/routes/search.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,EAAE,MAAM,EAAqB,MAAM,SAAS,CAAC;AACpD,OAAO,EAAE,KAAK,IAAI,WAAW,EAAE,MAAM,QAAQ,CAAC;AAC9C,OAAO,EAAE,IAAI,EAAE,MAAM,SAAS,CAAC;AAC/B,OAAO,EAAE,QAAQ,EAAE,MAAM,WAAW,CAAC;AAcrC,MAAM,UAAU,kBAAkB,CAAC,SAAoB;IACrD,MAAM,MAAM,GAAG,MAAM,EAAE,CAAC;IAExB,6DAA6D;IAC7D,MAAM,KAAK,GAAG,IAAI,QAAQ,CAAqB;QAC7C,GAAG,EAAE,GAAG;QACR,GAAG,EAAE,EAAE,GAAG,EAAE,GAAG,IAAI,EAAE,aAAa;QAClC,OAAO,EAAE,EAAE,GAAG,IAAI,GAAG,IAAI,EAAE,OAAO;QAClC,eAAe,EAAE,CAAC,KAAK,EAAE,EAAE;YACzB,OAAO,IAAI,CAAC,SAAS,CAAC,KAAK,CAAC,CAAC,MAAM,CAAC;QACtC,CAAC;KACF,CAAC,CAAC;IAEH,MAAM,CAAC,GAAG,CAAC,YAAY,EAAE,KAAK,EAAE,GAAY,EAAE,GAAa,EAAE,EAAE;QAC7D,IAAI,CAAC;YACH,MAAM,EAAE,CAAC,EAAE,KAAK,EAAE,GAAG,GAAG,CAAC,KAAK,CAAC;YAE/B,2BAA2B;YAC3B,IAAI,CAAC,CAAC,IAAI,OAAO,CAAC,KAAK,QAAQ,EAAE,CAAC;gBAChC,GAAG,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC,IAAI,CAAC;oBACnB,KAAK,EAAE,iBAAiB;oBACxB,OAAO,EAAE,kCAAkC;iBAC5C,CAAC,CAAC;gBACH,OAAO;YACT,CAAC;YAED,2BAA2B;YAC3B,MAAM,WAAW,GAAG,KAAK,CAAC,CAAC,CAAC,QAAQ,CAAC,KAAe,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;YAC9D,IAAI,KAAK,CAAC,WAAW,CAAC,IAAI,WAAW,GAAG,CAAC,IAAI,WAAW,GAAG,EAAE,EAAE,CAAC;gBAC9D,GAAG,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC,IAAI,CAAC;oBACnB,KAAK,EAAE,iBAAiB;oBACxB,OAAO,EAAE,qDAAqD;iBAC/D,CAAC,CAAC;gBACH,OAAO;YACT,CAAC;YAED,kBAAkB;YAClB,MAAM,QAAQ,GAAG,UAAU,CAAC,IAAI,WAAW,EAAE,CAAC;YAE9C,cAAc;YACd,MAAM,MAAM,GAAG,KAAK,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAC;YACnC,IAAI,MAAM,EAAE,CAAC;gBACX,GAAG,CAAC,SAAS,CAAC,SAAS,EAAE,KAAK,CAAC,CAAC;gBAChC,GAAG,CAAC,SAAS,CAAC,aAAa,EAAE,IAAI,CAAC,KAAK,CAAC,CAAC,IAAI,CAAC,GAAG,EAAE,GAAG,MAAM,CAAC,SAAS,CAAC,GAAG,IAAI,CAAC,CAAC,QAAQ,EAAE,CAAC,CAAC;gBAC5F,GAAG,CAAC,IAAI,CAAC;oBACP,KAAK,EAAE,CAAC;oBACR,KAAK,EAAE,MAAM,CAAC,OAAO,CAAC,MAAM;oBAC5B,OAAO,EAAE,MAAM,CAAC,OAAO;iBACxB,CAAC,CAAC;gBACH,OAAO;YACT,CAAC;YAED,iBAAiB;YACjB,MAAM,SAAS,GAAG,uCAAuC,kBAAkB,CAAC,CAAC,CAAC,EAAE,CAAC;YACjF,MAAM,SAAS,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;YAE7B,MAAM,QAAQ,GAAG,MAAM,WAAW,CAAC,SAAS,EAAE;gBAC5C,OAAO,EAAE;oBACP,YAAY,EAAE,oEAAoE;iBACnF;aACF,CAAC,CAAC;YAEH,IAAI,CAAC,QAAQ,CAAC,EAAE,EAAE,CAAC;gBACjB,MAAM,IAAI,KAAK,CAAC,uBAAuB,QAAQ,CAAC,MAAM,EAAE,CAAC,CAAC;YAC5D,CAAC;YAED,MAAM,IAAI,GAAG,MAAM,QAAQ,CAAC,IAAI,EAAE,CAAC;YACnC,MAAM,CAAC,GAAG,IAAI,CAAC,IAAI,CAAC,CAAC;YAErB,MAAM,OAAO,GAAmB,EAAE,CAAC;YAEnC,CAAC,CAAC,SAAS,CAAC,CAAC,IAAI,CAAC,CAAC,EAAE,EAAE,IAAI,EAAE,EAAE;gBAC7B,IAAI,OAAO,CAAC,MAAM,IAAI,WAAW;oBAAE,OAAO;gBAE1C,MAAM,OAAO,GAAG,CAAC,CAAC,IAAI,CAAC,CAAC;gBACxB,IAAI,KAAK,GAAG,OAAO,CAAC,IAAI,CAAC,gBAAgB,CAAC,CAAC,IAAI,EAAE,CAAC,IAAI,EAAE,CAAC;gBACzD,IAAI,GAAG,GAAG,OAAO,CAAC,IAAI,CAAC,cAAc,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,IAAI,EAAE,CAAC;gBAC1D,IAAI,OAAO,GAAG,OAAO,CAAC,IAAI,CAAC,kBAAkB,CAAC,CAAC,IAAI,EAAE,CAAC,IAAI,EAAE,CAAC;gBAE7D,0CAA0C;gBAC1C,IAAI,CAAC,KAAK,IAAI,CAAC,GAAG;oBAAE,OAAO;gBAE3B,6BAA6B;gBAC7B,IAAI,CAAC;oBACH,MAAM,MAAM,GAAG,IAAI,GAAG,CAAC,GAAG,CAAC,CAAC;oBAC5B,IAAI,CAAC,CAAC,OAAO,EAAE,QAAQ,CAAC,CAAC,QAAQ,CAAC,MAAM,CAAC,QAAQ,CAAC,EAAE,CAAC;wBACnD,OAAO;oBACT,CAAC;gBACH,CAAC;gBAAC,MAAM,CAAC;oBACP,OAAO;gBACT,CAAC;gBAED,sCAAsC;gBACtC,KAAK,GAAG,KAAK,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC,CAAC;gBAC5B,OAAO,GAAG,OAAO,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC,CAAC;gBAEhC,OAAO,CAAC,IAAI,CAAC,EAAE,KAAK,EAAE,GAAG,EAAE,OAAO,EAAE,CAAC,CAAC;YACxC,CAAC,CAAC,CAAC;YAEH,MAAM,OAAO,GAAG,IAAI,CAAC,GAAG,EAAE,GAAG,SAAS,CAAC;YAEvC,oCAAoC;YACpC,IAAI,GAAG,CAAC,IAAI,EAAE,OAAO,EAAE,GAAG,EAAE,CAAC;gBAC3B,MAAM,SAAS,CAAC,UAAU,CAAC,GAAG,CAAC,IAAI,CAAC,OAAO,CAAC,GAAG,EAAE,CAAC,CAAC,CAAC;YACtD,CAAC;YAED,gBAAgB;YAChB,KAAK,CAAC,GAAG,CAAC,QAAQ,EAAE;gBAClB,OAAO;gBACP,SAAS,EAAE,IAAI,CAAC,GAAG,EAAE;aACtB,CAAC,CAAC;YAEH,cAAc;YACd,GAAG,CAAC,SAAS,CAAC,SAAS,EAAE,MAAM,CAAC,CAAC;YACjC,GAAG,CAAC,SAAS,CAAC,gBAAgB,EAAE,GAAG,CAAC,CAAC;YACrC,GAAG,CAAC,SAAS,CAAC,mBAAmB,EAAE,OAAO,CAAC,QAAQ,EAAE,CAAC,CAAC;YAEvD,GAAG,CAAC,IAAI,CAAC;gBACP,KAAK,EAAE,CAAC;gBACR,KAAK,EAAE,OAAO,CAAC,MAAM;gBACrB,OAAO;aACR,CAAC,CAAC;QACL,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,MAAM,GAAG,GAAG,KAAc,CAAC;YAC3B,oEAAoE;YACpE,OAAO,CAAC,KAAK,CAAC,eAAe,EAAE,GAAG,CAAC,CAAC,CAAC,6BAA6B;YAClE,GAAG,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC,IAAI,CAAC;gBACnB,KAAK,EAAE,eAAe;gBACtB,OAAO,EAAE,0CAA0C;aACpD,CAAC,CAAC;QACL,CAAC;IACH,CAAC,CAAC,CAAC;IAEH,OAAO,MAAM,CAAC;AAChB,CAAC"}
|
package/dist/types.d.ts
ADDED
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Core types for WebPeel
|
|
3
|
+
*/
|
|
4
|
+
export interface PeelOptions {
|
|
5
|
+
/** Use headless browser instead of simple HTTP fetch */
|
|
6
|
+
render?: boolean;
|
|
7
|
+
/** Wait time in milliseconds after page load (only with render=true) */
|
|
8
|
+
wait?: number;
|
|
9
|
+
/** Output format */
|
|
10
|
+
format?: 'markdown' | 'text' | 'html';
|
|
11
|
+
/** Request timeout in milliseconds (default: 30000) */
|
|
12
|
+
timeout?: number;
|
|
13
|
+
/** Custom user agent */
|
|
14
|
+
userAgent?: string;
|
|
15
|
+
}
|
|
16
|
+
export interface PeelResult {
|
|
17
|
+
/** Final URL (after redirects) */
|
|
18
|
+
url: string;
|
|
19
|
+
/** Page title */
|
|
20
|
+
title: string;
|
|
21
|
+
/** Page content in requested format */
|
|
22
|
+
content: string;
|
|
23
|
+
/** Extracted metadata */
|
|
24
|
+
metadata: PageMetadata;
|
|
25
|
+
/** All links found on the page (absolute URLs, deduplicated) */
|
|
26
|
+
links: string[];
|
|
27
|
+
/** Estimated token count (rough: content.length / 4) */
|
|
28
|
+
tokens: number;
|
|
29
|
+
/** Method used: 'simple' | 'browser' */
|
|
30
|
+
method: 'simple' | 'browser';
|
|
31
|
+
/** Time elapsed in milliseconds */
|
|
32
|
+
elapsed: number;
|
|
33
|
+
}
|
|
34
|
+
export interface PageMetadata {
|
|
35
|
+
/** Meta description */
|
|
36
|
+
description?: string;
|
|
37
|
+
/** Author name */
|
|
38
|
+
author?: string;
|
|
39
|
+
/** Published date (ISO 8601) */
|
|
40
|
+
published?: string;
|
|
41
|
+
/** Open Graph image URL */
|
|
42
|
+
image?: string;
|
|
43
|
+
/** Canonical URL */
|
|
44
|
+
canonical?: string;
|
|
45
|
+
}
|
|
46
|
+
export declare class WebPeelError extends Error {
|
|
47
|
+
code?: string | undefined;
|
|
48
|
+
constructor(message: string, code?: string | undefined);
|
|
49
|
+
}
|
|
50
|
+
export declare class TimeoutError extends WebPeelError {
|
|
51
|
+
constructor(message: string);
|
|
52
|
+
}
|
|
53
|
+
export declare class BlockedError extends WebPeelError {
|
|
54
|
+
constructor(message: string);
|
|
55
|
+
}
|
|
56
|
+
export declare class NetworkError extends WebPeelError {
|
|
57
|
+
constructor(message: string);
|
|
58
|
+
}
|
|
59
|
+
//# sourceMappingURL=types.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../src/types.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,MAAM,WAAW,WAAW;IAC1B,wDAAwD;IACxD,MAAM,CAAC,EAAE,OAAO,CAAC;IACjB,wEAAwE;IACxE,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,oBAAoB;IACpB,MAAM,CAAC,EAAE,UAAU,GAAG,MAAM,GAAG,MAAM,CAAC;IACtC,uDAAuD;IACvD,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,wBAAwB;IACxB,SAAS,CAAC,EAAE,MAAM,CAAC;CACpB;AAED,MAAM,WAAW,UAAU;IACzB,kCAAkC;IAClC,GAAG,EAAE,MAAM,CAAC;IACZ,iBAAiB;IACjB,KAAK,EAAE,MAAM,CAAC;IACd,uCAAuC;IACvC,OAAO,EAAE,MAAM,CAAC;IAChB,yBAAyB;IACzB,QAAQ,EAAE,YAAY,CAAC;IACvB,gEAAgE;IAChE,KAAK,EAAE,MAAM,EAAE,CAAC;IAChB,wDAAwD;IACxD,MAAM,EAAE,MAAM,CAAC;IACf,wCAAwC;IACxC,MAAM,EAAE,QAAQ,GAAG,SAAS,CAAC;IAC7B,mCAAmC;IACnC,OAAO,EAAE,MAAM,CAAC;CACjB;AAED,MAAM,WAAW,YAAY;IAC3B,uBAAuB;IACvB,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,kBAAkB;IAClB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,gCAAgC;IAChC,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,2BAA2B;IAC3B,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,oBAAoB;IACpB,SAAS,CAAC,EAAE,MAAM,CAAC;CACpB;AAED,qBAAa,YAAa,SAAQ,KAAK;IACD,IAAI,CAAC,EAAE,MAAM;gBAArC,OAAO,EAAE,MAAM,EAAS,IAAI,CAAC,EAAE,MAAM,YAAA;CAIlD;AAED,qBAAa,YAAa,SAAQ,YAAY;gBAChC,OAAO,EAAE,MAAM;CAI5B;AAED,qBAAa,YAAa,SAAQ,YAAY;gBAChC,OAAO,EAAE,MAAM;CAI5B;AAED,qBAAa,YAAa,SAAQ,YAAY;gBAChC,OAAO,EAAE,MAAM;CAI5B"}
|
package/dist/types.js
ADDED
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Core types for WebPeel
|
|
3
|
+
*/
|
|
4
|
+
export class WebPeelError extends Error {
|
|
5
|
+
code;
|
|
6
|
+
constructor(message, code) {
|
|
7
|
+
super(message);
|
|
8
|
+
this.code = code;
|
|
9
|
+
this.name = 'WebPeelError';
|
|
10
|
+
}
|
|
11
|
+
}
|
|
12
|
+
export class TimeoutError extends WebPeelError {
|
|
13
|
+
constructor(message) {
|
|
14
|
+
super(message, 'TIMEOUT');
|
|
15
|
+
this.name = 'TimeoutError';
|
|
16
|
+
}
|
|
17
|
+
}
|
|
18
|
+
export class BlockedError extends WebPeelError {
|
|
19
|
+
constructor(message) {
|
|
20
|
+
super(message, 'BLOCKED');
|
|
21
|
+
this.name = 'BlockedError';
|
|
22
|
+
}
|
|
23
|
+
}
|
|
24
|
+
export class NetworkError extends WebPeelError {
|
|
25
|
+
constructor(message) {
|
|
26
|
+
super(message, 'NETWORK');
|
|
27
|
+
this.name = 'NetworkError';
|
|
28
|
+
}
|
|
29
|
+
}
|
|
30
|
+
//# sourceMappingURL=types.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"types.js","sourceRoot":"","sources":["../src/types.ts"],"names":[],"mappings":"AAAA;;GAEG;AA+CH,MAAM,OAAO,YAAa,SAAQ,KAAK;IACD;IAApC,YAAY,OAAe,EAAS,IAAa;QAC/C,KAAK,CAAC,OAAO,CAAC,CAAC;QADmB,SAAI,GAAJ,IAAI,CAAS;QAE/C,IAAI,CAAC,IAAI,GAAG,cAAc,CAAC;IAC7B,CAAC;CACF;AAED,MAAM,OAAO,YAAa,SAAQ,YAAY;IAC5C,YAAY,OAAe;QACzB,KAAK,CAAC,OAAO,EAAE,SAAS,CAAC,CAAC;QAC1B,IAAI,CAAC,IAAI,GAAG,cAAc,CAAC;IAC7B,CAAC;CACF;AAED,MAAM,OAAO,YAAa,SAAQ,YAAY;IAC5C,YAAY,OAAe;QACzB,KAAK,CAAC,OAAO,EAAE,SAAS,CAAC,CAAC;QAC1B,IAAI,CAAC,IAAI,GAAG,cAAc,CAAC;IAC7B,CAAC;CACF;AAED,MAAM,OAAO,YAAa,SAAQ,YAAY;IAC5C,YAAY,OAAe;QACzB,KAAK,CAAC,OAAO,EAAE,SAAS,CAAC,CAAC;QAC1B,IAAI,CAAC,IAAI,GAAG,cAAc,CAAC;IAC7B,CAAC;CACF"}
|
package/llms.txt
ADDED
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
# WebPeel
|
|
2
|
+
|
|
3
|
+
> Fetch any web page as clean, AI-ready markdown.
|
|
4
|
+
|
|
5
|
+
WebPeel is an open-source web fetcher designed for AI agents. It converts web pages to clean markdown with smart escalation: tries simple HTTP first (~200ms), automatically escalates to a headless browser when blocked.
|
|
6
|
+
|
|
7
|
+
## Quick Start
|
|
8
|
+
|
|
9
|
+
```bash
|
|
10
|
+
# CLI (zero install)
|
|
11
|
+
npx webpeel https://example.com
|
|
12
|
+
|
|
13
|
+
# Library
|
|
14
|
+
import { peel } from 'webpeel';
|
|
15
|
+
const result = await peel('https://example.com');
|
|
16
|
+
|
|
17
|
+
# MCP Server (Claude Desktop / Cursor / VS Code)
|
|
18
|
+
npx webpeel mcp
|
|
19
|
+
```
|
|
20
|
+
|
|
21
|
+
## MCP Tools
|
|
22
|
+
|
|
23
|
+
- `webpeel_fetch` — Fetch a URL, return clean markdown. Params: url (required), render (boolean), wait (ms), format (markdown|text|html)
|
|
24
|
+
- `webpeel_search` — Search the web via DuckDuckGo. Params: query (required), count (1-10)
|
|
25
|
+
|
|
26
|
+
## MCP Configuration
|
|
27
|
+
|
|
28
|
+
```json
|
|
29
|
+
{
|
|
30
|
+
"mcpServers": {
|
|
31
|
+
"webpeel": {
|
|
32
|
+
"command": "npx",
|
|
33
|
+
"args": ["-y", "webpeel", "mcp"]
|
|
34
|
+
}
|
|
35
|
+
}
|
|
36
|
+
}
|
|
37
|
+
```
|
|
38
|
+
|
|
39
|
+
## Key Features
|
|
40
|
+
|
|
41
|
+
- Smart escalation: HTTP → Playwright headless browser (only when needed)
|
|
42
|
+
- Anti-bot bypass: handles Cloudflare challenges, JavaScript walls, 403s
|
|
43
|
+
- Token-optimized: strips navigation, ads, scripts, cookie banners
|
|
44
|
+
- Metadata extraction: title, description, author, published date, links
|
|
45
|
+
- Search: built-in DuckDuckGo integration
|
|
46
|
+
- Local-first: runs on your machine, no API key required
|
|
47
|
+
|
|
48
|
+
## Hosted API
|
|
49
|
+
|
|
50
|
+
```bash
|
|
51
|
+
curl "https://api.webpeel.dev/v1/fetch?url=https://example.com"
|
|
52
|
+
curl "https://api.webpeel.dev/v1/search?q=your+query"
|
|
53
|
+
```
|
|
54
|
+
|
|
55
|
+
## Links
|
|
56
|
+
|
|
57
|
+
- Website: https://webpeel.dev
|
|
58
|
+
- GitHub: https://github.com/JakeLiuMe/webpeel
|
|
59
|
+
- npm: https://www.npmjs.com/package/webpeel
|
|
60
|
+
- API Docs: https://webpeel.dev/docs
|
package/package.json
ADDED
|
@@ -0,0 +1,80 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "webpeel",
|
|
3
|
+
"version": "0.1.0",
|
|
4
|
+
"description": "Fast web fetcher for AI agents - smart escalation from simple HTTP to headless browser",
|
|
5
|
+
"author": "Jake Liu",
|
|
6
|
+
"license": "MIT",
|
|
7
|
+
"type": "module",
|
|
8
|
+
"main": "./dist/index.js",
|
|
9
|
+
"types": "./dist/index.d.ts",
|
|
10
|
+
"bin": {
|
|
11
|
+
"webpeel": "./dist/cli.js"
|
|
12
|
+
},
|
|
13
|
+
"exports": {
|
|
14
|
+
".": {
|
|
15
|
+
"import": "./dist/index.js",
|
|
16
|
+
"types": "./dist/index.d.ts"
|
|
17
|
+
}
|
|
18
|
+
},
|
|
19
|
+
"files": [
|
|
20
|
+
"dist",
|
|
21
|
+
"!dist/tests",
|
|
22
|
+
"README.md",
|
|
23
|
+
"LICENSE",
|
|
24
|
+
"llms.txt"
|
|
25
|
+
],
|
|
26
|
+
"scripts": {
|
|
27
|
+
"build": "tsc",
|
|
28
|
+
"dev": "tsc --watch",
|
|
29
|
+
"test": "vitest run",
|
|
30
|
+
"test:watch": "vitest",
|
|
31
|
+
"lint": "tsc --noEmit",
|
|
32
|
+
"prepublishOnly": "npm run build",
|
|
33
|
+
"serve": "node dist/server/app.js",
|
|
34
|
+
"mcp": "node dist/mcp/server.js"
|
|
35
|
+
},
|
|
36
|
+
"repository": {
|
|
37
|
+
"type": "git",
|
|
38
|
+
"url": "https://github.com/JakeLiuMe/webpeel.git"
|
|
39
|
+
},
|
|
40
|
+
"bugs": {
|
|
41
|
+
"url": "https://github.com/JakeLiuMe/webpeel/issues"
|
|
42
|
+
},
|
|
43
|
+
"homepage": "https://webpeel.dev",
|
|
44
|
+
"keywords": [
|
|
45
|
+
"web-scraper",
|
|
46
|
+
"ai-agent",
|
|
47
|
+
"mcp-server",
|
|
48
|
+
"mcp",
|
|
49
|
+
"playwright",
|
|
50
|
+
"markdown",
|
|
51
|
+
"fetcher",
|
|
52
|
+
"web-fetcher",
|
|
53
|
+
"claude",
|
|
54
|
+
"cursor",
|
|
55
|
+
"codex"
|
|
56
|
+
],
|
|
57
|
+
"dependencies": {
|
|
58
|
+
"@modelcontextprotocol/sdk": "^1.0.4",
|
|
59
|
+
"cheerio": "^1.0.0",
|
|
60
|
+
"commander": "^12.0.0",
|
|
61
|
+
"cors": "^2.8.5",
|
|
62
|
+
"express": "^4.21.2",
|
|
63
|
+
"lru-cache": "^11.0.2",
|
|
64
|
+
"ora": "^8.0.1",
|
|
65
|
+
"playwright": "^1.48.0",
|
|
66
|
+
"turndown": "^7.2.0",
|
|
67
|
+
"undici": "^7.2.0"
|
|
68
|
+
},
|
|
69
|
+
"devDependencies": {
|
|
70
|
+
"@types/cors": "^2.8.17",
|
|
71
|
+
"@types/express": "^5.0.0",
|
|
72
|
+
"@types/node": "^22.0.0",
|
|
73
|
+
"@types/turndown": "^5.0.5",
|
|
74
|
+
"typescript": "^5.6.0",
|
|
75
|
+
"vitest": "^2.1.0"
|
|
76
|
+
},
|
|
77
|
+
"engines": {
|
|
78
|
+
"node": ">=20.0.0"
|
|
79
|
+
}
|
|
80
|
+
}
|