webpeel 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +415 -0
  3. package/dist/cli.d.ts +16 -0
  4. package/dist/cli.d.ts.map +1 -0
  5. package/dist/cli.js +140 -0
  6. package/dist/cli.js.map +1 -0
  7. package/dist/core/fetcher.d.ts +32 -0
  8. package/dist/core/fetcher.d.ts.map +1 -0
  9. package/dist/core/fetcher.js +479 -0
  10. package/dist/core/fetcher.js.map +1 -0
  11. package/dist/core/markdown.d.ts +17 -0
  12. package/dist/core/markdown.d.ts.map +1 -0
  13. package/dist/core/markdown.js +143 -0
  14. package/dist/core/markdown.js.map +1 -0
  15. package/dist/core/metadata.d.ts +17 -0
  16. package/dist/core/metadata.d.ts.map +1 -0
  17. package/dist/core/metadata.js +159 -0
  18. package/dist/core/metadata.js.map +1 -0
  19. package/dist/core/strategies.d.ts +30 -0
  20. package/dist/core/strategies.d.ts.map +1 -0
  21. package/dist/core/strategies.js +67 -0
  22. package/dist/core/strategies.js.map +1 -0
  23. package/dist/index.d.ts +31 -0
  24. package/dist/index.d.ts.map +1 -0
  25. package/dist/index.js +81 -0
  26. package/dist/index.js.map +1 -0
  27. package/dist/mcp/server.d.ts +7 -0
  28. package/dist/mcp/server.d.ts.map +1 -0
  29. package/dist/mcp/server.js +248 -0
  30. package/dist/mcp/server.js.map +1 -0
  31. package/dist/server/app.d.ts +13 -0
  32. package/dist/server/app.d.ts.map +1 -0
  33. package/dist/server/app.js +89 -0
  34. package/dist/server/app.js.map +1 -0
  35. package/dist/server/auth-store.d.ts +28 -0
  36. package/dist/server/auth-store.d.ts.map +1 -0
  37. package/dist/server/auth-store.js +87 -0
  38. package/dist/server/auth-store.js.map +1 -0
  39. package/dist/server/middleware/auth.d.ts +18 -0
  40. package/dist/server/middleware/auth.d.ts.map +1 -0
  41. package/dist/server/middleware/auth.js +55 -0
  42. package/dist/server/middleware/auth.js.map +1 -0
  43. package/dist/server/middleware/rate-limit.d.ts +23 -0
  44. package/dist/server/middleware/rate-limit.d.ts.map +1 -0
  45. package/dist/server/middleware/rate-limit.js +85 -0
  46. package/dist/server/middleware/rate-limit.js.map +1 -0
  47. package/dist/server/routes/fetch.d.ts +7 -0
  48. package/dist/server/routes/fetch.d.ts.map +1 -0
  49. package/dist/server/routes/fetch.js +127 -0
  50. package/dist/server/routes/fetch.js.map +1 -0
  51. package/dist/server/routes/health.d.ts +6 -0
  52. package/dist/server/routes/health.d.ts.map +1 -0
  53. package/dist/server/routes/health.js +19 -0
  54. package/dist/server/routes/health.js.map +1 -0
  55. package/dist/server/routes/search.d.ts +7 -0
  56. package/dist/server/routes/search.d.ts.map +1 -0
  57. package/dist/server/routes/search.js +124 -0
  58. package/dist/server/routes/search.js.map +1 -0
  59. package/dist/types.d.ts +59 -0
  60. package/dist/types.d.ts.map +1 -0
  61. package/dist/types.js +30 -0
  62. package/dist/types.js.map +1 -0
  63. package/llms.txt +60 -0
  64. package/package.json +80 -0
@@ -0,0 +1,85 @@
1
+ /**
2
+ * Sliding window rate limiting middleware
3
+ */
4
+ export class RateLimiter {
5
+ store = new Map();
6
+ windowMs;
7
+ constructor(windowMs = 60000) {
8
+ this.windowMs = windowMs;
9
+ }
10
+ /**
11
+ * Check if request is allowed under rate limit
12
+ */
13
+ checkLimit(identifier, limit) {
14
+ const now = Date.now();
15
+ const windowStart = now - this.windowMs;
16
+ // Get or create entry
17
+ let entry = this.store.get(identifier);
18
+ if (!entry) {
19
+ entry = { timestamps: [] };
20
+ this.store.set(identifier, entry);
21
+ }
22
+ // Remove timestamps outside the window
23
+ entry.timestamps = entry.timestamps.filter(ts => ts > windowStart);
24
+ // Check if limit exceeded
25
+ if (entry.timestamps.length >= limit) {
26
+ const oldestTimestamp = entry.timestamps[0];
27
+ const retryAfter = Math.ceil((oldestTimestamp + this.windowMs - now) / 1000);
28
+ return {
29
+ allowed: false,
30
+ remaining: 0,
31
+ retryAfter,
32
+ };
33
+ }
34
+ // Add current timestamp
35
+ entry.timestamps.push(now);
36
+ return {
37
+ allowed: true,
38
+ remaining: limit - entry.timestamps.length,
39
+ };
40
+ }
41
+ /**
42
+ * Clean up old entries (call periodically)
43
+ */
44
+ cleanup() {
45
+ const now = Date.now();
46
+ const windowStart = now - this.windowMs;
47
+ for (const [identifier, entry] of this.store.entries()) {
48
+ entry.timestamps = entry.timestamps.filter(ts => ts > windowStart);
49
+ if (entry.timestamps.length === 0) {
50
+ this.store.delete(identifier);
51
+ }
52
+ }
53
+ }
54
+ }
55
+ export function createRateLimitMiddleware(limiter) {
56
+ return (req, res, next) => {
57
+ try {
58
+ // Use API key or IP address as identifier
59
+ const identifier = req.auth?.keyInfo?.key || req.ip || 'unknown';
60
+ const limit = req.auth?.rateLimit || 10;
61
+ const result = limiter.checkLimit(identifier, limit);
62
+ // Set rate limit headers
63
+ res.setHeader('X-RateLimit-Limit', limit.toString());
64
+ res.setHeader('X-RateLimit-Remaining', result.remaining.toString());
65
+ if (!result.allowed) {
66
+ res.setHeader('Retry-After', result.retryAfter.toString());
67
+ res.status(429).json({
68
+ error: 'rate_limited',
69
+ message: 'Rate limit exceeded',
70
+ retryAfter: result.retryAfter,
71
+ });
72
+ return;
73
+ }
74
+ next();
75
+ }
76
+ catch (error) {
77
+ const err = error;
78
+ res.status(500).json({
79
+ error: 'rate_limit_error',
80
+ message: err.message || 'Rate limiting failed',
81
+ });
82
+ }
83
+ };
84
+ }
85
+ //# sourceMappingURL=rate-limit.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"rate-limit.js","sourceRoot":"","sources":["../../../src/server/middleware/rate-limit.ts"],"names":[],"mappings":"AAAA;;GAEG;AAQH,MAAM,OAAO,WAAW;IACd,KAAK,GAAG,IAAI,GAAG,EAA0B,CAAC;IAC1C,QAAQ,CAAS;IAEzB,YAAY,WAAmB,KAAK;QAClC,IAAI,CAAC,QAAQ,GAAG,QAAQ,CAAC;IAC3B,CAAC;IAED;;OAEG;IACH,UAAU,CAAC,UAAkB,EAAE,KAAa;QAK1C,MAAM,GAAG,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;QACvB,MAAM,WAAW,GAAG,GAAG,GAAG,IAAI,CAAC,QAAQ,CAAC;QAExC,sBAAsB;QACtB,IAAI,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,UAAU,CAAC,CAAC;QACvC,IAAI,CAAC,KAAK,EAAE,CAAC;YACX,KAAK,GAAG,EAAE,UAAU,EAAE,EAAE,EAAE,CAAC;YAC3B,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,UAAU,EAAE,KAAK,CAAC,CAAC;QACpC,CAAC;QAED,uCAAuC;QACvC,KAAK,CAAC,UAAU,GAAG,KAAK,CAAC,UAAU,CAAC,MAAM,CAAC,EAAE,CAAC,EAAE,CAAC,EAAE,GAAG,WAAW,CAAC,CAAC;QAEnE,0BAA0B;QAC1B,IAAI,KAAK,CAAC,UAAU,CAAC,MAAM,IAAI,KAAK,EAAE,CAAC;YACrC,MAAM,eAAe,GAAG,KAAK,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC;YAC5C,MAAM,UAAU,GAAG,IAAI,CAAC,IAAI,CAAC,CAAC,eAAe,GAAG,IAAI,CAAC,QAAQ,GAAG,GAAG,CAAC,GAAG,IAAI,CAAC,CAAC;YAE7E,OAAO;gBACL,OAAO,EAAE,KAAK;gBACd,SAAS,EAAE,CAAC;gBACZ,UAAU;aACX,CAAC;QACJ,CAAC;QAED,wBAAwB;QACxB,KAAK,CAAC,UAAU,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;QAE3B,OAAO;YACL,OAAO,EAAE,IAAI;YACb,SAAS,EAAE,KAAK,GAAG,KAAK,CAAC,UAAU,CAAC,MAAM;SAC3C,CAAC;IACJ,CAAC;IAED;;OAEG;IACH,OAAO;QACL,MAAM,GAAG,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;QACvB,MAAM,WAAW,GAAG,GAAG,GAAG,IAAI,CAAC,QAAQ,CAAC;QAExC,KAAK,MAAM,CAAC,UAAU,EAAE,KAAK,CAAC,IAAI,IAAI,CAAC,KAAK,CAAC,OAAO,EAAE,EAAE,CAAC;YACvD,KAAK,CAAC,UAAU,GAAG,KAAK,CAAC,UAAU,CAAC,MAAM,CAAC,EAAE,CAAC,EAAE,CAAC,EAAE,GAAG,WAAW,CAAC,CAAC;YACnE,IAAI,KAAK,CAAC,UAAU,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;gBAClC,IAAI,CAAC,KAAK,CAAC,MAAM,CAAC,UAAU,CAAC,CAAC;YAChC,CAAC;QACH,CAAC;IACH,CAAC;CACF;AAED,MAAM,UAAU,yBAAyB,CAAC,OAAoB;IAC5D,OAAO,CAAC,GAAY,EAAE,GAAa,EAAE,IAAkB,EAAE,EAAE;QACzD,IAAI,CAAC;YACH,0CAA0C;YAC1C,MAAM,UAAU,GAAG,GAAG,CAAC,IAAI,EAAE,OAAO,EAAE,GAAG,IAAI,GAAG,CAAC,EAAE,IAAI,SAAS,CAAC;YACjE,MAAM,KAAK,GAAG,GAAG,CAAC,IAAI,EAAE,SAAS,IAAI,EAAE,CAAC;YAExC,MAAM,MAAM,GAAG,OAAO,CAAC,UAAU,CAAC,UAAU,EAAE,KAAK,CAAC,CAAC;YAErD,yBAAyB;YACzB,GAAG,CAAC,SAAS,CAAC,mBAAmB,EAAE,KAAK,CAAC,QAAQ,EAAE,CAAC,CAAC;YACrD,GAAG,CAAC,SAAS,CAAC,uBAAuB,EAAE,MAAM,CAAC,SAAS,CAAC,QAAQ,EAAE,CAAC,CAAC;YAEpE,IAAI,CAAC,MAAM,CAAC,OAAO,EAAE,CAAC;gBACpB,GAAG,CAAC,SAAS,CAAC,aAAa,EAAE,MAAM,CAAC,UAAW,CAAC,QAAQ,EAAE,CAAC,CAAC;gBAC5D,GAAG,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC,IAAI,CAAC;oBACnB,KAAK,EAAE,cAAc;oBACrB,OAAO,EAAE,qBAAqB;oBAC9B,UAAU,EAAE,MAAM,CAAC,UAAU;iBAC9B,CAAC,CAAC;gBACH,OAAO;YACT,CAAC;YAED,IAAI,EAAE,CAAC;QACT,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,MAAM,GAAG,GAAG,KAAc,CAAC;YAC3B,GAAG,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC,IAAI,CAAC;gBACnB,KAAK,EAAE,kBAAkB;gBACzB,OAAO,EAAE,GAAG,CAAC,OAAO,IAAI,sBAAsB;aAC/C,CAAC,CAAC;QACL,CAAC;IACH,CAAC,CAAC;AACJ,CAAC"}
@@ -0,0 +1,7 @@
1
+ /**
2
+ * Fetch endpoint with caching
3
+ */
4
+ import { Router } from 'express';
5
+ import { AuthStore } from '../auth-store.js';
6
+ export declare function createFetchRouter(authStore: AuthStore): Router;
7
+ //# sourceMappingURL=fetch.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"fetch.d.ts","sourceRoot":"","sources":["../../../src/server/routes/fetch.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,EAAE,MAAM,EAAqB,MAAM,SAAS,CAAC;AAIpD,OAAO,EAAE,SAAS,EAAE,MAAM,kBAAkB,CAAC;AAO7C,wBAAgB,iBAAiB,CAAC,SAAS,EAAE,SAAS,GAAG,MAAM,CAsI9D"}
@@ -0,0 +1,127 @@
1
+ /**
2
+ * Fetch endpoint with caching
3
+ */
4
+ import { Router } from 'express';
5
+ import { peel } from '../../index.js';
6
+ import { LRUCache } from 'lru-cache';
7
+ export function createFetchRouter(authStore) {
8
+ const router = Router();
9
+ // LRU cache: 5 minute TTL, max 1000 entries, 100MB total size
10
+ const cache = new LRUCache({
11
+ max: 1000,
12
+ ttl: 5 * 60 * 1000, // 5 minutes
13
+ maxSize: 100 * 1024 * 1024, // 100MB
14
+ sizeCalculation: (entry) => {
15
+ return JSON.stringify(entry).length;
16
+ },
17
+ });
18
+ router.get('/v1/fetch', async (req, res) => {
19
+ try {
20
+ const { url, render, wait, format } = req.query;
21
+ // Validate URL parameter
22
+ if (!url || typeof url !== 'string') {
23
+ res.status(400).json({
24
+ error: 'invalid_request',
25
+ message: 'Missing or invalid "url" parameter',
26
+ });
27
+ return;
28
+ }
29
+ // SECURITY: Validate URL format and length
30
+ if (url.length > 2048) {
31
+ res.status(400).json({
32
+ error: 'invalid_url',
33
+ message: 'URL too long (max 2048 characters)',
34
+ });
35
+ return;
36
+ }
37
+ try {
38
+ const parsed = new URL(url);
39
+ // Normalize URL for consistent caching
40
+ const normalizedUrl = parsed.href;
41
+ // Use normalized URL for cache key
42
+ if (normalizedUrl !== url) {
43
+ // URL was normalized, update for caching
44
+ }
45
+ }
46
+ catch {
47
+ res.status(400).json({
48
+ error: 'invalid_url',
49
+ message: 'Invalid URL format',
50
+ });
51
+ return;
52
+ }
53
+ // Build cache key
54
+ const cacheKey = `fetch:${url}:${render}:${wait}:${format}`;
55
+ // Check cache
56
+ const cached = cache.get(cacheKey);
57
+ if (cached) {
58
+ res.setHeader('X-Cache', 'HIT');
59
+ res.setHeader('X-Cache-Age', Math.floor((Date.now() - cached.timestamp) / 1000).toString());
60
+ res.json(cached.result);
61
+ return;
62
+ }
63
+ // Parse options
64
+ const options = {
65
+ render: render === 'true',
66
+ wait: wait ? parseInt(wait, 10) : undefined,
67
+ format: format || 'markdown',
68
+ };
69
+ // Validate wait parameter
70
+ if (options.wait !== undefined && (isNaN(options.wait) || options.wait < 0 || options.wait > 60000)) {
71
+ res.status(400).json({
72
+ error: 'invalid_request',
73
+ message: 'Invalid "wait" parameter: must be between 0 and 60000ms',
74
+ });
75
+ return;
76
+ }
77
+ // Validate format parameter
78
+ if (!['markdown', 'text', 'html'].includes(options.format || '')) {
79
+ res.status(400).json({
80
+ error: 'invalid_request',
81
+ message: 'Invalid "format" parameter: must be "markdown", "text", or "html"',
82
+ });
83
+ return;
84
+ }
85
+ // Fetch content
86
+ const startTime = Date.now();
87
+ const result = await peel(url, options);
88
+ const elapsed = Date.now() - startTime;
89
+ // Track usage (1 credit per fetch)
90
+ if (req.auth?.keyInfo?.key) {
91
+ await authStore.trackUsage(req.auth.keyInfo.key, 1);
92
+ }
93
+ // Cache result
94
+ cache.set(cacheKey, {
95
+ result,
96
+ timestamp: Date.now(),
97
+ });
98
+ // Add usage headers
99
+ res.setHeader('X-Cache', 'MISS');
100
+ res.setHeader('X-Credits-Used', '1');
101
+ res.setHeader('X-Processing-Time', elapsed.toString());
102
+ res.json(result);
103
+ }
104
+ catch (error) {
105
+ const err = error;
106
+ // SECURITY: Sanitize error messages to prevent information disclosure
107
+ if (err.code) {
108
+ // WebPeelError from core library - safe to expose
109
+ const safeMessage = err.message.replace(/[<>"']/g, ''); // Remove HTML chars
110
+ res.status(500).json({
111
+ error: err.code,
112
+ message: safeMessage,
113
+ });
114
+ }
115
+ else {
116
+ // Unexpected error - generic message only
117
+ console.error('Fetch error:', err); // Log full error server-side
118
+ res.status(500).json({
119
+ error: 'internal_error',
120
+ message: 'An unexpected error occurred while fetching the URL',
121
+ });
122
+ }
123
+ }
124
+ });
125
+ return router;
126
+ }
127
+ //# sourceMappingURL=fetch.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"fetch.js","sourceRoot":"","sources":["../../../src/server/routes/fetch.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,EAAE,MAAM,EAAqB,MAAM,SAAS,CAAC;AACpD,OAAO,EAAE,IAAI,EAAE,MAAM,gBAAgB,CAAC;AAEtC,OAAO,EAAE,QAAQ,EAAE,MAAM,WAAW,CAAC;AAQrC,MAAM,UAAU,iBAAiB,CAAC,SAAoB;IACpD,MAAM,MAAM,GAAG,MAAM,EAAE,CAAC;IAExB,8DAA8D;IAC9D,MAAM,KAAK,GAAG,IAAI,QAAQ,CAAqB;QAC7C,GAAG,EAAE,IAAI;QACT,GAAG,EAAE,CAAC,GAAG,EAAE,GAAG,IAAI,EAAE,YAAY;QAChC,OAAO,EAAE,GAAG,GAAG,IAAI,GAAG,IAAI,EAAE,QAAQ;QACpC,eAAe,EAAE,CAAC,KAAK,EAAE,EAAE;YACzB,OAAO,IAAI,CAAC,SAAS,CAAC,KAAK,CAAC,CAAC,MAAM,CAAC;QACtC,CAAC;KACF,CAAC,CAAC;IAEH,MAAM,CAAC,GAAG,CAAC,WAAW,EAAE,KAAK,EAAE,GAAY,EAAE,GAAa,EAAE,EAAE;QAC5D,IAAI,CAAC;YACH,MAAM,EAAE,GAAG,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,EAAE,GAAG,GAAG,CAAC,KAAK,CAAC;YAEhD,yBAAyB;YACzB,IAAI,CAAC,GAAG,IAAI,OAAO,GAAG,KAAK,QAAQ,EAAE,CAAC;gBACpC,GAAG,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC,IAAI,CAAC;oBACnB,KAAK,EAAE,iBAAiB;oBACxB,OAAO,EAAE,oCAAoC;iBAC9C,CAAC,CAAC;gBACH,OAAO;YACT,CAAC;YAED,2CAA2C;YAC3C,IAAI,GAAG,CAAC,MAAM,GAAG,IAAI,EAAE,CAAC;gBACtB,GAAG,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC,IAAI,CAAC;oBACnB,KAAK,EAAE,aAAa;oBACpB,OAAO,EAAE,oCAAoC;iBAC9C,CAAC,CAAC;gBACH,OAAO;YACT,CAAC;YAED,IAAI,CAAC;gBACH,MAAM,MAAM,GAAG,IAAI,GAAG,CAAC,GAAG,CAAC,CAAC;gBAC5B,uCAAuC;gBACvC,MAAM,aAAa,GAAG,MAAM,CAAC,IAAI,CAAC;gBAElC,mCAAmC;gBACnC,IAAI,aAAa,KAAK,GAAG,EAAE,CAAC;oBAC1B,yCAAyC;gBAC3C,CAAC;YACH,CAAC;YAAC,MAAM,CAAC;gBACP,GAAG,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC,IAAI,CAAC;oBACnB,KAAK,EAAE,aAAa;oBACpB,OAAO,EAAE,oBAAoB;iBAC9B,CAAC,CAAC;gBACH,OAAO;YACT,CAAC;YAED,kBAAkB;YAClB,MAAM,QAAQ,GAAG,SAAS,GAAG,IAAI,MAAM,IAAI,IAAI,IAAI,MAAM,EAAE,CAAC;YAE5D,cAAc;YACd,MAAM,MAAM,GAAG,KAAK,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAC;YACnC,IAAI,MAAM,EAAE,CAAC;gBACX,GAAG,CAAC,SAAS,CAAC,SAAS,EAAE,KAAK,CAAC,CAAC;gBAChC,GAAG,CAAC,SAAS,CAAC,aAAa,EAAE,IAAI,CAAC,KAAK,CAAC,CAAC,IAAI,CAAC,GAAG,EAAE,GAAG,MAAM,CAAC,SAAS,CAAC,GAAG,IAAI,CAAC,CAAC,QAAQ,EAAE,CAAC,CAAC;gBAC5F,GAAG,CAAC,IAAI,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC;gBACxB,OAAO;YACT,CAAC;YAED,gBAAgB;YAChB,MAAM,OAAO,GAAgB;gBAC3B,MAAM,EAAE,MAAM,KAAK,MAAM;gBACzB,IAAI,EAAE,IAAI,CAAC,CAAC,CAAC,QAAQ,CAAC,IAAc,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,SAAS;gBACrD,MAAM,EAAG,MAAuC,IAAI,UAAU;aAC/D,CAAC;YAEF,0BAA0B;YAC1B,IAAI,OAAO,CAAC,IAAI,KAAK,SAAS,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,IAAI,CAAC,IAAI,OAAO,CAAC,IAAI,GAAG,CAAC,IAAI,OAAO,CAAC,IAAI,GAAG,KAAK,CAAC,EAAE,CAAC;gBACpG,GAAG,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC,IAAI,CAAC;oBACnB,KAAK,EAAE,iBAAiB;oBACxB,OAAO,EAAE,yDAAyD;iBACnE,CAAC,CAAC;gBACH,OAAO;YACT,CAAC;YAED,4BAA4B;YAC5B,IAAI,CAAC,CAAC,UAAU,EAAE,MAAM,EAAE,MAAM,CAAC,CAAC,QAAQ,CAAC,OAAO,CAAC,MAAM,IAAI,EAAE,CAAC,EAAE,CAAC;gBACjE,GAAG,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC,IAAI,CAAC;oBACnB,KAAK,EAAE,iBAAiB;oBACxB,OAAO,EAAE,mEAAmE;iBAC7E,CAAC,CAAC;gBACH,OAAO;YACT,CAAC;YAED,gBAAgB;YAChB,MAAM,SAAS,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;YAC7B,MAAM,MAAM,GAAG,MAAM,IAAI,CAAC,GAAG,EAAE,OAAO,CAAC,CAAC;YACxC,MAAM,OAAO,GAAG,IAAI,CAAC,GAAG,EAAE,GAAG,SAAS,CAAC;YAEvC,mCAAmC;YACnC,IAAI,GAAG,CAAC,IAAI,EAAE,OAAO,EAAE,GAAG,EAAE,CAAC;gBAC3B,MAAM,SAAS,CAAC,UAAU,CAAC,GAAG,CAAC,IAAI,CAAC,OAAO,CAAC,GAAG,EAAE,CAAC,CAAC,CAAC;YACtD,CAAC;YAED,eAAe;YACf,KAAK,CAAC,GAAG,CAAC,QAAQ,EAAE;gBAClB,MAAM;gBACN,SAAS,EAAE,IAAI,CAAC,GAAG,EAAE;aACtB,CAAC,CAAC;YAEH,oBAAoB;YACpB,GAAG,CAAC,SAAS,CAAC,SAAS,EAAE,MAAM,CAAC,CAAC;YACjC,GAAG,CAAC,SAAS,CAAC,gBAAgB,EAAE,GAAG,CAAC,CAAC;YACrC,GAAG,CAAC,SAAS,CAAC,mBAAmB,EAAE,OAAO,CAAC,QAAQ,EAAE,CAAC,CAAC;YAEvD,GAAG,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;QACnB,CAAC;QAAC,OAAO,KAAU,EAAE,CAAC;YACpB,MAAM,GAAG,GAAG,KAAY,CAAC;YAEzB,sEAAsE;YACtE,IAAI,GAAG,CAAC,IAAI,EAAE,CAAC;gBACb,kDAAkD;gBAClD,MAAM,WAAW,GAAG,GAAG,CAAC,OAAO,CAAC,OAAO,CAAC,SAAS,EAAE,EAAE,CAAC,CAAC,CAAC,oBAAoB;gBAC5E,GAAG,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC,IAAI,CAAC;oBACnB,KAAK,EAAE,GAAG,CAAC,IAAI;oBACf,OAAO,EAAE,WAAW;iBACrB,CAAC,CAAC;YACL,CAAC;iBAAM,CAAC;gBACN,0CAA0C;gBAC1C,OAAO,CAAC,KAAK,CAAC,cAAc,EAAE,GAAG,CAAC,CAAC,CAAC,6BAA6B;gBACjE,GAAG,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC,IAAI,CAAC;oBACnB,KAAK,EAAE,gBAAgB;oBACvB,OAAO,EAAE,qDAAqD;iBAC/D,CAAC,CAAC;YACL,CAAC;QACH,CAAC;IACH,CAAC,CAAC,CAAC;IAEH,OAAO,MAAM,CAAC;AAChB,CAAC"}
@@ -0,0 +1,6 @@
1
+ /**
2
+ * Health check endpoint
3
+ */
4
+ import { Router } from 'express';
5
+ export declare function createHealthRouter(): Router;
6
+ //# sourceMappingURL=health.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"health.d.ts","sourceRoot":"","sources":["../../../src/server/routes/health.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,EAAE,MAAM,EAAqB,MAAM,SAAS,CAAC;AAIpD,wBAAgB,kBAAkB,IAAI,MAAM,CAe3C"}
@@ -0,0 +1,19 @@
1
+ /**
2
+ * Health check endpoint
3
+ */
4
+ import { Router } from 'express';
5
+ const startTime = Date.now();
6
+ export function createHealthRouter() {
7
+ const router = Router();
8
+ router.get('/health', (_req, res) => {
9
+ const uptime = Math.floor((Date.now() - startTime) / 1000);
10
+ res.json({
11
+ status: 'healthy',
12
+ version: process.env.npm_package_version || '1.0.0',
13
+ uptime,
14
+ timestamp: new Date().toISOString(),
15
+ });
16
+ });
17
+ return router;
18
+ }
19
+ //# sourceMappingURL=health.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"health.js","sourceRoot":"","sources":["../../../src/server/routes/health.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,EAAE,MAAM,EAAqB,MAAM,SAAS,CAAC;AAEpD,MAAM,SAAS,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;AAE7B,MAAM,UAAU,kBAAkB;IAChC,MAAM,MAAM,GAAG,MAAM,EAAE,CAAC;IAExB,MAAM,CAAC,GAAG,CAAC,SAAS,EAAE,CAAC,IAAa,EAAE,GAAa,EAAE,EAAE;QACrD,MAAM,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,CAAC,IAAI,CAAC,GAAG,EAAE,GAAG,SAAS,CAAC,GAAG,IAAI,CAAC,CAAC;QAE3D,GAAG,CAAC,IAAI,CAAC;YACP,MAAM,EAAE,SAAS;YACjB,OAAO,EAAE,OAAO,CAAC,GAAG,CAAC,mBAAmB,IAAI,OAAO;YACnD,MAAM;YACN,SAAS,EAAE,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE;SACpC,CAAC,CAAC;IACL,CAAC,CAAC,CAAC;IAEH,OAAO,MAAM,CAAC;AAChB,CAAC"}
@@ -0,0 +1,7 @@
1
+ /**
2
+ * Search endpoint with caching
3
+ */
4
+ import { Router } from 'express';
5
+ import { AuthStore } from '../auth-store.js';
6
+ export declare function createSearchRouter(authStore: AuthStore): Router;
7
+ //# sourceMappingURL=search.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"search.d.ts","sourceRoot":"","sources":["../../../src/server/routes/search.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,EAAE,MAAM,EAAqB,MAAM,SAAS,CAAC;AAIpD,OAAO,EAAE,SAAS,EAAE,MAAM,kBAAkB,CAAC;AAa7C,wBAAgB,kBAAkB,CAAC,SAAS,EAAE,SAAS,GAAG,MAAM,CAsI/D"}
@@ -0,0 +1,124 @@
1
+ /**
2
+ * Search endpoint with caching
3
+ */
4
+ import { Router } from 'express';
5
+ import { fetch as undiciFetch } from 'undici';
6
+ import { load } from 'cheerio';
7
+ import { LRUCache } from 'lru-cache';
8
+ export function createSearchRouter(authStore) {
9
+ const router = Router();
10
+ // LRU cache: 15 minute TTL, max 500 entries, 50MB total size
11
+ const cache = new LRUCache({
12
+ max: 500,
13
+ ttl: 15 * 60 * 1000, // 15 minutes
14
+ maxSize: 50 * 1024 * 1024, // 50MB
15
+ sizeCalculation: (entry) => {
16
+ return JSON.stringify(entry).length;
17
+ },
18
+ });
19
+ router.get('/v1/search', async (req, res) => {
20
+ try {
21
+ const { q, count } = req.query;
22
+ // Validate query parameter
23
+ if (!q || typeof q !== 'string') {
24
+ res.status(400).json({
25
+ error: 'invalid_request',
26
+ message: 'Missing or invalid "q" parameter',
27
+ });
28
+ return;
29
+ }
30
+ // Parse and validate count
31
+ const resultCount = count ? parseInt(count, 10) : 5;
32
+ if (isNaN(resultCount) || resultCount < 1 || resultCount > 10) {
33
+ res.status(400).json({
34
+ error: 'invalid_request',
35
+ message: 'Invalid "count" parameter: must be between 1 and 10',
36
+ });
37
+ return;
38
+ }
39
+ // Build cache key
40
+ const cacheKey = `search:${q}:${resultCount}`;
41
+ // Check cache
42
+ const cached = cache.get(cacheKey);
43
+ if (cached) {
44
+ res.setHeader('X-Cache', 'HIT');
45
+ res.setHeader('X-Cache-Age', Math.floor((Date.now() - cached.timestamp) / 1000).toString());
46
+ res.json({
47
+ query: q,
48
+ count: cached.results.length,
49
+ results: cached.results,
50
+ });
51
+ return;
52
+ }
53
+ // Perform search
54
+ const searchUrl = `https://html.duckduckgo.com/html/?q=${encodeURIComponent(q)}`;
55
+ const startTime = Date.now();
56
+ const response = await undiciFetch(searchUrl, {
57
+ headers: {
58
+ 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36',
59
+ },
60
+ });
61
+ if (!response.ok) {
62
+ throw new Error(`Search failed: HTTP ${response.status}`);
63
+ }
64
+ const html = await response.text();
65
+ const $ = load(html);
66
+ const results = [];
67
+ $('.result').each((_i, elem) => {
68
+ if (results.length >= resultCount)
69
+ return;
70
+ const $result = $(elem);
71
+ let title = $result.find('.result__title').text().trim();
72
+ let url = $result.find('.result__url').attr('href') || '';
73
+ let snippet = $result.find('.result__snippet').text().trim();
74
+ // SECURITY: Validate and sanitize results
75
+ if (!title || !url)
76
+ return;
77
+ // Only allow HTTP/HTTPS URLs
78
+ try {
79
+ const parsed = new URL(url);
80
+ if (!['http:', 'https:'].includes(parsed.protocol)) {
81
+ return;
82
+ }
83
+ }
84
+ catch {
85
+ return;
86
+ }
87
+ // Limit text lengths to prevent bloat
88
+ title = title.slice(0, 200);
89
+ snippet = snippet.slice(0, 500);
90
+ results.push({ title, url, snippet });
91
+ });
92
+ const elapsed = Date.now() - startTime;
93
+ // Track usage (1 credit per search)
94
+ if (req.auth?.keyInfo?.key) {
95
+ await authStore.trackUsage(req.auth.keyInfo.key, 1);
96
+ }
97
+ // Cache results
98
+ cache.set(cacheKey, {
99
+ results,
100
+ timestamp: Date.now(),
101
+ });
102
+ // Add headers
103
+ res.setHeader('X-Cache', 'MISS');
104
+ res.setHeader('X-Credits-Used', '1');
105
+ res.setHeader('X-Processing-Time', elapsed.toString());
106
+ res.json({
107
+ query: q,
108
+ count: results.length,
109
+ results,
110
+ });
111
+ }
112
+ catch (error) {
113
+ const err = error;
114
+ // SECURITY: Generic error message to prevent information disclosure
115
+ console.error('Search error:', err); // Log full error server-side
116
+ res.status(500).json({
117
+ error: 'search_failed',
118
+ message: 'Search request failed. Please try again.',
119
+ });
120
+ }
121
+ });
122
+ return router;
123
+ }
124
+ //# sourceMappingURL=search.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"search.js","sourceRoot":"","sources":["../../../src/server/routes/search.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,EAAE,MAAM,EAAqB,MAAM,SAAS,CAAC;AACpD,OAAO,EAAE,KAAK,IAAI,WAAW,EAAE,MAAM,QAAQ,CAAC;AAC9C,OAAO,EAAE,IAAI,EAAE,MAAM,SAAS,CAAC;AAC/B,OAAO,EAAE,QAAQ,EAAE,MAAM,WAAW,CAAC;AAcrC,MAAM,UAAU,kBAAkB,CAAC,SAAoB;IACrD,MAAM,MAAM,GAAG,MAAM,EAAE,CAAC;IAExB,6DAA6D;IAC7D,MAAM,KAAK,GAAG,IAAI,QAAQ,CAAqB;QAC7C,GAAG,EAAE,GAAG;QACR,GAAG,EAAE,EAAE,GAAG,EAAE,GAAG,IAAI,EAAE,aAAa;QAClC,OAAO,EAAE,EAAE,GAAG,IAAI,GAAG,IAAI,EAAE,OAAO;QAClC,eAAe,EAAE,CAAC,KAAK,EAAE,EAAE;YACzB,OAAO,IAAI,CAAC,SAAS,CAAC,KAAK,CAAC,CAAC,MAAM,CAAC;QACtC,CAAC;KACF,CAAC,CAAC;IAEH,MAAM,CAAC,GAAG,CAAC,YAAY,EAAE,KAAK,EAAE,GAAY,EAAE,GAAa,EAAE,EAAE;QAC7D,IAAI,CAAC;YACH,MAAM,EAAE,CAAC,EAAE,KAAK,EAAE,GAAG,GAAG,CAAC,KAAK,CAAC;YAE/B,2BAA2B;YAC3B,IAAI,CAAC,CAAC,IAAI,OAAO,CAAC,KAAK,QAAQ,EAAE,CAAC;gBAChC,GAAG,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC,IAAI,CAAC;oBACnB,KAAK,EAAE,iBAAiB;oBACxB,OAAO,EAAE,kCAAkC;iBAC5C,CAAC,CAAC;gBACH,OAAO;YACT,CAAC;YAED,2BAA2B;YAC3B,MAAM,WAAW,GAAG,KAAK,CAAC,CAAC,CAAC,QAAQ,CAAC,KAAe,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;YAC9D,IAAI,KAAK,CAAC,WAAW,CAAC,IAAI,WAAW,GAAG,CAAC,IAAI,WAAW,GAAG,EAAE,EAAE,CAAC;gBAC9D,GAAG,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC,IAAI,CAAC;oBACnB,KAAK,EAAE,iBAAiB;oBACxB,OAAO,EAAE,qDAAqD;iBAC/D,CAAC,CAAC;gBACH,OAAO;YACT,CAAC;YAED,kBAAkB;YAClB,MAAM,QAAQ,GAAG,UAAU,CAAC,IAAI,WAAW,EAAE,CAAC;YAE9C,cAAc;YACd,MAAM,MAAM,GAAG,KAAK,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAC;YACnC,IAAI,MAAM,EAAE,CAAC;gBACX,GAAG,CAAC,SAAS,CAAC,SAAS,EAAE,KAAK,CAAC,CAAC;gBAChC,GAAG,CAAC,SAAS,CAAC,aAAa,EAAE,IAAI,CAAC,KAAK,CAAC,CAAC,IAAI,CAAC,GAAG,EAAE,GAAG,MAAM,CAAC,SAAS,CAAC,GAAG,IAAI,CAAC,CAAC,QAAQ,EAAE,CAAC,CAAC;gBAC5F,GAAG,CAAC,IAAI,CAAC;oBACP,KAAK,EAAE,CAAC;oBACR,KAAK,EAAE,MAAM,CAAC,OAAO,CAAC,MAAM;oBAC5B,OAAO,EAAE,MAAM,CAAC,OAAO;iBACxB,CAAC,CAAC;gBACH,OAAO;YACT,CAAC;YAED,iBAAiB;YACjB,MAAM,SAAS,GAAG,uCAAuC,kBAAkB,CAAC,CAAC,CAAC,EAAE,CAAC;YACjF,MAAM,SAAS,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;YAE7B,MAAM,QAAQ,GAAG,MAAM,WAAW,CAAC,SAAS,EAAE;gBAC5C,OAAO,EAAE;oBACP,YAAY,EAAE,oEAAoE;iBACnF;aACF,CAAC,CAAC;YAEH,IAAI,CAAC,QAAQ,CAAC,EAAE,EAAE,CAAC;gBACjB,MAAM,IAAI,KAAK,CAAC,uBAAuB,QAAQ,CAAC,MAAM,EAAE,CAAC,CAAC;YAC5D,CAAC;YAED,MAAM,IAAI,GAAG,MAAM,QAAQ,CAAC,IAAI,EAAE,CAAC;YACnC,MAAM,CAAC,GAAG,IAAI,CAAC,IAAI,CAAC,CAAC;YAErB,MAAM,OAAO,GAAmB,EAAE,CAAC;YAEnC,CAAC,CAAC,SAAS,CAAC,CAAC,IAAI,CAAC,CAAC,EAAE,EAAE,IAAI,EAAE,EAAE;gBAC7B,IAAI,OAAO,CAAC,MAAM,IAAI,WAAW;oBAAE,OAAO;gBAE1C,MAAM,OAAO,GAAG,CAAC,CAAC,IAAI,CAAC,CAAC;gBACxB,IAAI,KAAK,GAAG,OAAO,CAAC,IAAI,CAAC,gBAAgB,CAAC,CAAC,IAAI,EAAE,CAAC,IAAI,EAAE,CAAC;gBACzD,IAAI,GAAG,GAAG,OAAO,CAAC,IAAI,CAAC,cAAc,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,IAAI,EAAE,CAAC;gBAC1D,IAAI,OAAO,GAAG,OAAO,CAAC,IAAI,CAAC,kBAAkB,CAAC,CAAC,IAAI,EAAE,CAAC,IAAI,EAAE,CAAC;gBAE7D,0CAA0C;gBAC1C,IAAI,CAAC,KAAK,IAAI,CAAC,GAAG;oBAAE,OAAO;gBAE3B,6BAA6B;gBAC7B,IAAI,CAAC;oBACH,MAAM,MAAM,GAAG,IAAI,GAAG,CAAC,GAAG,CAAC,CAAC;oBAC5B,IAAI,CAAC,CAAC,OAAO,EAAE,QAAQ,CAAC,CAAC,QAAQ,CAAC,MAAM,CAAC,QAAQ,CAAC,EAAE,CAAC;wBACnD,OAAO;oBACT,CAAC;gBACH,CAAC;gBAAC,MAAM,CAAC;oBACP,OAAO;gBACT,CAAC;gBAED,sCAAsC;gBACtC,KAAK,GAAG,KAAK,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC,CAAC;gBAC5B,OAAO,GAAG,OAAO,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC,CAAC;gBAEhC,OAAO,CAAC,IAAI,CAAC,EAAE,KAAK,EAAE,GAAG,EAAE,OAAO,EAAE,CAAC,CAAC;YACxC,CAAC,CAAC,CAAC;YAEH,MAAM,OAAO,GAAG,IAAI,CAAC,GAAG,EAAE,GAAG,SAAS,CAAC;YAEvC,oCAAoC;YACpC,IAAI,GAAG,CAAC,IAAI,EAAE,OAAO,EAAE,GAAG,EAAE,CAAC;gBAC3B,MAAM,SAAS,CAAC,UAAU,CAAC,GAAG,CAAC,IAAI,CAAC,OAAO,CAAC,GAAG,EAAE,CAAC,CAAC,CAAC;YACtD,CAAC;YAED,gBAAgB;YAChB,KAAK,CAAC,GAAG,CAAC,QAAQ,EAAE;gBAClB,OAAO;gBACP,SAAS,EAAE,IAAI,CAAC,GAAG,EAAE;aACtB,CAAC,CAAC;YAEH,cAAc;YACd,GAAG,CAAC,SAAS,CAAC,SAAS,EAAE,MAAM,CAAC,CAAC;YACjC,GAAG,CAAC,SAAS,CAAC,gBAAgB,EAAE,GAAG,CAAC,CAAC;YACrC,GAAG,CAAC,SAAS,CAAC,mBAAmB,EAAE,OAAO,CAAC,QAAQ,EAAE,CAAC,CAAC;YAEvD,GAAG,CAAC,IAAI,CAAC;gBACP,KAAK,EAAE,CAAC;gBACR,KAAK,EAAE,OAAO,CAAC,MAAM;gBACrB,OAAO;aACR,CAAC,CAAC;QACL,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,MAAM,GAAG,GAAG,KAAc,CAAC;YAC3B,oEAAoE;YACpE,OAAO,CAAC,KAAK,CAAC,eAAe,EAAE,GAAG,CAAC,CAAC,CAAC,6BAA6B;YAClE,GAAG,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC,IAAI,CAAC;gBACnB,KAAK,EAAE,eAAe;gBACtB,OAAO,EAAE,0CAA0C;aACpD,CAAC,CAAC;QACL,CAAC;IACH,CAAC,CAAC,CAAC;IAEH,OAAO,MAAM,CAAC;AAChB,CAAC"}
@@ -0,0 +1,59 @@
1
+ /**
2
+ * Core types for WebPeel
3
+ */
4
+ export interface PeelOptions {
5
+ /** Use headless browser instead of simple HTTP fetch */
6
+ render?: boolean;
7
+ /** Wait time in milliseconds after page load (only with render=true) */
8
+ wait?: number;
9
+ /** Output format */
10
+ format?: 'markdown' | 'text' | 'html';
11
+ /** Request timeout in milliseconds (default: 30000) */
12
+ timeout?: number;
13
+ /** Custom user agent */
14
+ userAgent?: string;
15
+ }
16
+ export interface PeelResult {
17
+ /** Final URL (after redirects) */
18
+ url: string;
19
+ /** Page title */
20
+ title: string;
21
+ /** Page content in requested format */
22
+ content: string;
23
+ /** Extracted metadata */
24
+ metadata: PageMetadata;
25
+ /** All links found on the page (absolute URLs, deduplicated) */
26
+ links: string[];
27
+ /** Estimated token count (rough: content.length / 4) */
28
+ tokens: number;
29
+ /** Method used: 'simple' | 'browser' */
30
+ method: 'simple' | 'browser';
31
+ /** Time elapsed in milliseconds */
32
+ elapsed: number;
33
+ }
34
+ export interface PageMetadata {
35
+ /** Meta description */
36
+ description?: string;
37
+ /** Author name */
38
+ author?: string;
39
+ /** Published date (ISO 8601) */
40
+ published?: string;
41
+ /** Open Graph image URL */
42
+ image?: string;
43
+ /** Canonical URL */
44
+ canonical?: string;
45
+ }
46
+ export declare class WebPeelError extends Error {
47
+ code?: string | undefined;
48
+ constructor(message: string, code?: string | undefined);
49
+ }
50
+ export declare class TimeoutError extends WebPeelError {
51
+ constructor(message: string);
52
+ }
53
+ export declare class BlockedError extends WebPeelError {
54
+ constructor(message: string);
55
+ }
56
+ export declare class NetworkError extends WebPeelError {
57
+ constructor(message: string);
58
+ }
59
+ //# sourceMappingURL=types.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../src/types.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,MAAM,WAAW,WAAW;IAC1B,wDAAwD;IACxD,MAAM,CAAC,EAAE,OAAO,CAAC;IACjB,wEAAwE;IACxE,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,oBAAoB;IACpB,MAAM,CAAC,EAAE,UAAU,GAAG,MAAM,GAAG,MAAM,CAAC;IACtC,uDAAuD;IACvD,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,wBAAwB;IACxB,SAAS,CAAC,EAAE,MAAM,CAAC;CACpB;AAED,MAAM,WAAW,UAAU;IACzB,kCAAkC;IAClC,GAAG,EAAE,MAAM,CAAC;IACZ,iBAAiB;IACjB,KAAK,EAAE,MAAM,CAAC;IACd,uCAAuC;IACvC,OAAO,EAAE,MAAM,CAAC;IAChB,yBAAyB;IACzB,QAAQ,EAAE,YAAY,CAAC;IACvB,gEAAgE;IAChE,KAAK,EAAE,MAAM,EAAE,CAAC;IAChB,wDAAwD;IACxD,MAAM,EAAE,MAAM,CAAC;IACf,wCAAwC;IACxC,MAAM,EAAE,QAAQ,GAAG,SAAS,CAAC;IAC7B,mCAAmC;IACnC,OAAO,EAAE,MAAM,CAAC;CACjB;AAED,MAAM,WAAW,YAAY;IAC3B,uBAAuB;IACvB,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,kBAAkB;IAClB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,gCAAgC;IAChC,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,2BAA2B;IAC3B,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,oBAAoB;IACpB,SAAS,CAAC,EAAE,MAAM,CAAC;CACpB;AAED,qBAAa,YAAa,SAAQ,KAAK;IACD,IAAI,CAAC,EAAE,MAAM;gBAArC,OAAO,EAAE,MAAM,EAAS,IAAI,CAAC,EAAE,MAAM,YAAA;CAIlD;AAED,qBAAa,YAAa,SAAQ,YAAY;gBAChC,OAAO,EAAE,MAAM;CAI5B;AAED,qBAAa,YAAa,SAAQ,YAAY;gBAChC,OAAO,EAAE,MAAM;CAI5B;AAED,qBAAa,YAAa,SAAQ,YAAY;gBAChC,OAAO,EAAE,MAAM;CAI5B"}
package/dist/types.js ADDED
@@ -0,0 +1,30 @@
1
+ /**
2
+ * Core types for WebPeel
3
+ */
4
+ export class WebPeelError extends Error {
5
+ code;
6
+ constructor(message, code) {
7
+ super(message);
8
+ this.code = code;
9
+ this.name = 'WebPeelError';
10
+ }
11
+ }
12
+ export class TimeoutError extends WebPeelError {
13
+ constructor(message) {
14
+ super(message, 'TIMEOUT');
15
+ this.name = 'TimeoutError';
16
+ }
17
+ }
18
+ export class BlockedError extends WebPeelError {
19
+ constructor(message) {
20
+ super(message, 'BLOCKED');
21
+ this.name = 'BlockedError';
22
+ }
23
+ }
24
+ export class NetworkError extends WebPeelError {
25
+ constructor(message) {
26
+ super(message, 'NETWORK');
27
+ this.name = 'NetworkError';
28
+ }
29
+ }
30
+ //# sourceMappingURL=types.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"types.js","sourceRoot":"","sources":["../src/types.ts"],"names":[],"mappings":"AAAA;;GAEG;AA+CH,MAAM,OAAO,YAAa,SAAQ,KAAK;IACD;IAApC,YAAY,OAAe,EAAS,IAAa;QAC/C,KAAK,CAAC,OAAO,CAAC,CAAC;QADmB,SAAI,GAAJ,IAAI,CAAS;QAE/C,IAAI,CAAC,IAAI,GAAG,cAAc,CAAC;IAC7B,CAAC;CACF;AAED,MAAM,OAAO,YAAa,SAAQ,YAAY;IAC5C,YAAY,OAAe;QACzB,KAAK,CAAC,OAAO,EAAE,SAAS,CAAC,CAAC;QAC1B,IAAI,CAAC,IAAI,GAAG,cAAc,CAAC;IAC7B,CAAC;CACF;AAED,MAAM,OAAO,YAAa,SAAQ,YAAY;IAC5C,YAAY,OAAe;QACzB,KAAK,CAAC,OAAO,EAAE,SAAS,CAAC,CAAC;QAC1B,IAAI,CAAC,IAAI,GAAG,cAAc,CAAC;IAC7B,CAAC;CACF;AAED,MAAM,OAAO,YAAa,SAAQ,YAAY;IAC5C,YAAY,OAAe;QACzB,KAAK,CAAC,OAAO,EAAE,SAAS,CAAC,CAAC;QAC1B,IAAI,CAAC,IAAI,GAAG,cAAc,CAAC;IAC7B,CAAC;CACF"}
package/llms.txt ADDED
@@ -0,0 +1,60 @@
1
+ # WebPeel
2
+
3
+ > Fetch any web page as clean, AI-ready markdown.
4
+
5
+ WebPeel is an open-source web fetcher designed for AI agents. It converts web pages to clean markdown with smart escalation: tries simple HTTP first (~200ms), automatically escalates to a headless browser when blocked.
6
+
7
+ ## Quick Start
8
+
9
+ ```bash
10
+ # CLI (zero install)
11
+ npx webpeel https://example.com
12
+
13
+ # Library
14
+ import { peel } from 'webpeel';
15
+ const result = await peel('https://example.com');
16
+
17
+ # MCP Server (Claude Desktop / Cursor / VS Code)
18
+ npx webpeel mcp
19
+ ```
20
+
21
+ ## MCP Tools
22
+
23
+ - `webpeel_fetch` — Fetch a URL, return clean markdown. Params: url (required), render (boolean), wait (ms), format (markdown|text|html)
24
+ - `webpeel_search` — Search the web via DuckDuckGo. Params: query (required), count (1-10)
25
+
26
+ ## MCP Configuration
27
+
28
+ ```json
29
+ {
30
+ "mcpServers": {
31
+ "webpeel": {
32
+ "command": "npx",
33
+ "args": ["-y", "webpeel", "mcp"]
34
+ }
35
+ }
36
+ }
37
+ ```
38
+
39
+ ## Key Features
40
+
41
+ - Smart escalation: HTTP → Playwright headless browser (only when needed)
42
+ - Anti-bot bypass: handles Cloudflare challenges, JavaScript walls, 403s
43
+ - Token-optimized: strips navigation, ads, scripts, cookie banners
44
+ - Metadata extraction: title, description, author, published date, links
45
+ - Search: built-in DuckDuckGo integration
46
+ - Local-first: runs on your machine, no API key required
47
+
48
+ ## Hosted API
49
+
50
+ ```bash
51
+ curl "https://api.webpeel.dev/v1/fetch?url=https://example.com"
52
+ curl "https://api.webpeel.dev/v1/search?q=your+query"
53
+ ```
54
+
55
+ ## Links
56
+
57
+ - Website: https://webpeel.dev
58
+ - GitHub: https://github.com/JakeLiuMe/webpeel
59
+ - npm: https://www.npmjs.com/package/webpeel
60
+ - API Docs: https://webpeel.dev/docs
package/package.json ADDED
@@ -0,0 +1,80 @@
1
+ {
2
+ "name": "webpeel",
3
+ "version": "0.1.0",
4
+ "description": "Fast web fetcher for AI agents - smart escalation from simple HTTP to headless browser",
5
+ "author": "Jake Liu",
6
+ "license": "MIT",
7
+ "type": "module",
8
+ "main": "./dist/index.js",
9
+ "types": "./dist/index.d.ts",
10
+ "bin": {
11
+ "webpeel": "./dist/cli.js"
12
+ },
13
+ "exports": {
14
+ ".": {
15
+ "import": "./dist/index.js",
16
+ "types": "./dist/index.d.ts"
17
+ }
18
+ },
19
+ "files": [
20
+ "dist",
21
+ "!dist/tests",
22
+ "README.md",
23
+ "LICENSE",
24
+ "llms.txt"
25
+ ],
26
+ "scripts": {
27
+ "build": "tsc",
28
+ "dev": "tsc --watch",
29
+ "test": "vitest run",
30
+ "test:watch": "vitest",
31
+ "lint": "tsc --noEmit",
32
+ "prepublishOnly": "npm run build",
33
+ "serve": "node dist/server/app.js",
34
+ "mcp": "node dist/mcp/server.js"
35
+ },
36
+ "repository": {
37
+ "type": "git",
38
+ "url": "https://github.com/JakeLiuMe/webpeel.git"
39
+ },
40
+ "bugs": {
41
+ "url": "https://github.com/JakeLiuMe/webpeel/issues"
42
+ },
43
+ "homepage": "https://webpeel.dev",
44
+ "keywords": [
45
+ "web-scraper",
46
+ "ai-agent",
47
+ "mcp-server",
48
+ "mcp",
49
+ "playwright",
50
+ "markdown",
51
+ "fetcher",
52
+ "web-fetcher",
53
+ "claude",
54
+ "cursor",
55
+ "codex"
56
+ ],
57
+ "dependencies": {
58
+ "@modelcontextprotocol/sdk": "^1.0.4",
59
+ "cheerio": "^1.0.0",
60
+ "commander": "^12.0.0",
61
+ "cors": "^2.8.5",
62
+ "express": "^4.21.2",
63
+ "lru-cache": "^11.0.2",
64
+ "ora": "^8.0.1",
65
+ "playwright": "^1.48.0",
66
+ "turndown": "^7.2.0",
67
+ "undici": "^7.2.0"
68
+ },
69
+ "devDependencies": {
70
+ "@types/cors": "^2.8.17",
71
+ "@types/express": "^5.0.0",
72
+ "@types/node": "^22.0.0",
73
+ "@types/turndown": "^5.0.5",
74
+ "typescript": "^5.6.0",
75
+ "vitest": "^2.1.0"
76
+ },
77
+ "engines": {
78
+ "node": ">=20.0.0"
79
+ }
80
+ }