webpeel 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +415 -0
  3. package/dist/cli.d.ts +16 -0
  4. package/dist/cli.d.ts.map +1 -0
  5. package/dist/cli.js +140 -0
  6. package/dist/cli.js.map +1 -0
  7. package/dist/core/fetcher.d.ts +32 -0
  8. package/dist/core/fetcher.d.ts.map +1 -0
  9. package/dist/core/fetcher.js +479 -0
  10. package/dist/core/fetcher.js.map +1 -0
  11. package/dist/core/markdown.d.ts +17 -0
  12. package/dist/core/markdown.d.ts.map +1 -0
  13. package/dist/core/markdown.js +143 -0
  14. package/dist/core/markdown.js.map +1 -0
  15. package/dist/core/metadata.d.ts +17 -0
  16. package/dist/core/metadata.d.ts.map +1 -0
  17. package/dist/core/metadata.js +159 -0
  18. package/dist/core/metadata.js.map +1 -0
  19. package/dist/core/strategies.d.ts +30 -0
  20. package/dist/core/strategies.d.ts.map +1 -0
  21. package/dist/core/strategies.js +67 -0
  22. package/dist/core/strategies.js.map +1 -0
  23. package/dist/index.d.ts +31 -0
  24. package/dist/index.d.ts.map +1 -0
  25. package/dist/index.js +81 -0
  26. package/dist/index.js.map +1 -0
  27. package/dist/mcp/server.d.ts +7 -0
  28. package/dist/mcp/server.d.ts.map +1 -0
  29. package/dist/mcp/server.js +248 -0
  30. package/dist/mcp/server.js.map +1 -0
  31. package/dist/server/app.d.ts +13 -0
  32. package/dist/server/app.d.ts.map +1 -0
  33. package/dist/server/app.js +89 -0
  34. package/dist/server/app.js.map +1 -0
  35. package/dist/server/auth-store.d.ts +28 -0
  36. package/dist/server/auth-store.d.ts.map +1 -0
  37. package/dist/server/auth-store.js +87 -0
  38. package/dist/server/auth-store.js.map +1 -0
  39. package/dist/server/middleware/auth.d.ts +18 -0
  40. package/dist/server/middleware/auth.d.ts.map +1 -0
  41. package/dist/server/middleware/auth.js +55 -0
  42. package/dist/server/middleware/auth.js.map +1 -0
  43. package/dist/server/middleware/rate-limit.d.ts +23 -0
  44. package/dist/server/middleware/rate-limit.d.ts.map +1 -0
  45. package/dist/server/middleware/rate-limit.js +85 -0
  46. package/dist/server/middleware/rate-limit.js.map +1 -0
  47. package/dist/server/routes/fetch.d.ts +7 -0
  48. package/dist/server/routes/fetch.d.ts.map +1 -0
  49. package/dist/server/routes/fetch.js +127 -0
  50. package/dist/server/routes/fetch.js.map +1 -0
  51. package/dist/server/routes/health.d.ts +6 -0
  52. package/dist/server/routes/health.d.ts.map +1 -0
  53. package/dist/server/routes/health.js +19 -0
  54. package/dist/server/routes/health.js.map +1 -0
  55. package/dist/server/routes/search.d.ts +7 -0
  56. package/dist/server/routes/search.d.ts.map +1 -0
  57. package/dist/server/routes/search.js +124 -0
  58. package/dist/server/routes/search.js.map +1 -0
  59. package/dist/types.d.ts +59 -0
  60. package/dist/types.d.ts.map +1 -0
  61. package/dist/types.js +30 -0
  62. package/dist/types.js.map +1 -0
  63. package/llms.txt +60 -0
  64. package/package.json +80 -0
@@ -0,0 +1 @@
1
+ {"version":3,"file":"markdown.js","sourceRoot":"","sources":["../../src/core/markdown.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,eAAe,MAAM,UAAU,CAAC;AACvC,OAAO,KAAK,OAAO,MAAM,SAAS,CAAC;AAEnC,MAAM,cAAc,GAAG;IACrB,QAAQ;IACR,OAAO;IACP,KAAK;IACL,QAAQ;IACR,oBAAoB;IACpB,OAAO;IACP,UAAU;IACV,gBAAgB;IAChB,KAAK;IACL,gBAAgB;IAChB,gBAAgB;IAChB,oBAAoB;IACpB,eAAe;IACf,gBAAgB;IAChB,WAAW;IACX,WAAW;IACX,iBAAiB;IACjB,mBAAmB;IACnB,gBAAgB;IAChB,mBAAmB;IACnB,kBAAkB;IAClB,kBAAkB;CACnB,CAAC;AAEF;;;GAGG;AACH,SAAS,SAAS,CAAC,IAAY;IAC7B,2CAA2C;IAC3C,IAAI,IAAI,CAAC,MAAM,GAAG,EAAE,GAAG,IAAI,GAAG,IAAI,EAAE,CAAC,CAAC,OAAO;QAC3C,MAAM,IAAI,KAAK,CAAC,sCAAsC,CAAC,CAAC;IAC1D,CAAC;IAED,MAAM,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAE7B,uBAAuB;IACvB,cAAc,CAAC,OAAO,CAAC,CAAC,QAAQ,EAAE,EAAE;QAClC,CAAC,CAAC,QAAQ,CAAC,CAAC,MAAM,EAAE,CAAC;IACvB,CAAC,CAAC,CAAC;IAEH,mCAAmC;IACnC,CAAC,CAAC,oBAAoB,CAAC,CAAC,MAAM,EAAE,CAAC;IAEjC,uCAAuC;IACvC,CAAC,CAAC,GAAG,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,IAAI,EAAE,EAAE;QACtB,MAAM,KAAK,GAAG,CAAC,CAAC,IAAI,CAAC,CAAC;QACtB,MAAM,IAAI,GAAG,KAAK,CAAC,IAAI,EAAE,CAAC,IAAI,EAAE,CAAC;QACjC,IAAI,CAAC,IAAI,IAAI,KAAK,CAAC,QAAQ,EAAE,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YAC3C,KAAK,CAAC,MAAM,EAAE,CAAC;QACjB,CAAC;IACH,CAAC,CAAC,CAAC;IAEH,OAAO,CAAC,CAAC,IAAI,EAAE,CAAC;AAClB,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,cAAc,CAAC,IAAY;IACzC,MAAM,WAAW,GAAG,SAAS,CAAC,IAAI,CAAC,CAAC;IAEpC,MAAM,QAAQ,GAAG,IAAI,eAAe,CAAC;QACnC,YAAY,EAAE,KAAK;QACnB,cAAc,EAAE,QAAQ;QACxB,gBAAgB,EAAE,GAAG;QACrB,WAAW,EAAE,GAAG;QAChB,eAAe,EAAE,IAAI;KACtB,CAAC,CAAC;IAEH,kBAAkB;IAClB,QAAQ,CAAC,IAAI,CAAC,CAAC,OAAO,EAAE,OAAO,EAAE,OAAO,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,CAAC,CAAC,CAAC;IAE7D,kDAAkD;IAClD,QAAQ,CAAC,OAAO,CAAC,QAAQ,EAAE;QACzB,MAAM,EAAE,KAAK;QACb,WAAW,EAAE,CAAC,QAAQ,EAAE,IAAI,EAAE,EAAE;YAC9B,MAAM,GAAG,GAAI,IAAY,CAAC,GAAG,CAAC;YAC9B,MAAM,GAAG,GAAI,IAAY,CAAC,GAAG,CAAC;YAC9B,IAAI,GAAG,EAAE,CAAC;gBACR,OAAO,KAAK,GAAG,KAAK,GAAG,GAAG,CAAC;YAC7B,CAAC;YACD,OAAO,EAAE,CAAC;QACZ,CAAC;KACF,CAAC,CAAC;IAEH,oCAAoC;IACpC,QAAQ,CAAC,OAAO,CAAC,YAAY,EAAE;QAC7B,MAAM,EAAE,CAAC,IAAI,EAAE,EAAE;YACf,OAAO,IAAI,CAAC,QAAQ,KAAK,KAAK,IAAI,IAAI,CAAC,UAAU,EAAE,QAAQ,KAAK,MAAM,CAAC;QACzE,CAAC;QACD,WAAW,EAAE,CAAC,QAAQ,EAAE,IAAI,EAAE,EAAE;YAC9B,MAAM,QAAQ,GAAG,IAAI,CAAC,UAAiB,CAAC;YACxC,MAAM,SAAS,GAAG,QAAQ,CAAC,YAAY,CAAC,OAAO,CAAC,IAAI,EAAE,CAAC;YACvD,MAAM,QAAQ,GAAG,SAAS,CAAC,KAAK,CAAC,gBAAgB,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;YAC9D,OAAO,SAAS,GAAG,QAAQ,GAAG,IAAI,GAAG,QAAQ,CAAC,WAAW,GAAG,WAAW,CAAC;QAC1E,CAAC;KACF,CAAC,CAAC;IAEH,IAAI,QAAQ,GAAG,QAAQ,CAAC,QAAQ,CAAC,WAAW,CAAC,CAAC;IAE9C,kEAAkE;IAClE,IAAI,QAAQ,CAAC,MAAM,GAAG,IAAI,GAAG,IAAI,EAAE,CAAC,CAAC,yBAAyB;QAC5D,QAAQ,GAAG,QAAQ,CAAC,KAAK,CAAC,CAAC,EAAE,IAAI,GAAG,IAAI,CAAC,CAAC;IAC5C,CAAC;IAED,8DAA8D;IAC9D,QAAQ,GAAG,QAAQ,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,IAAI,EAAE,CAAC,EAAE,GAAG,EAAE,EAAE;QAC3D,IAAI,CAAC,KAAK,CAAC;YAAE,OAAO,IAAI,CAAC;QACzB,MAAM,SAAS,GAAG,GAAG,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,IAAI,EAAE,KAAK,EAAE,CAAC;QAC3C,MAAM,SAAS,GAAG,IAAI,CAAC,IAAI,EAAE,KAAK,EAAE,CAAC;QACrC,IAAI,SAAS,IAAI,SAAS;YAAE,OAAO,GAAG,CAAC;QACvC,OAAO,GAAG,GAAG,IAAI,GAAG,IAAI,CAAC;IAC3B,CAAC,EAAE,EAAE,CAAC,CAAC;IAEP,qCAAqC;IACrC,QAAQ,GAAG,QAAQ,CAAC,IAAI,EAAE,CAAC;IAE3B,OAAO,QAAQ,CAAC;AAClB,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,UAAU,CAAC,IAAY;IACrC,MAAM,WAAW,GAAG,SAAS,CAAC,IAAI,CAAC,CAAC;IACpC,MAAM,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC;IAEpC,8CAA8C;IAC9C,IAAI,IAAI,GAAG,EAAE,CAAC;IACd,CAAC,CAAC,+BAA+B,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,IAAI,EAAE,EAAE;QAClD,MAAM,OAAO,GAAG,CAAC,CAAC,IAAI,CAAC,CAAC,IAAI,EAAE,CAAC,IAAI,EAAE,CAAC;QACtC,IAAI,OAAO,EAAE,CAAC;YACZ,IAAI,IAAI,OAAO,GAAG,MAAM,CAAC;QAC3B,CAAC;IACH,CAAC,CAAC,CAAC;IAEH,yDAAyD;IACzD,IAAI,CAAC,IAAI,CAAC,IAAI,EAAE,EAAE,CAAC;QACjB,IAAI,GAAG,CAAC,CAAC,MAAM,CAAC,CAAC,IAAI,EAAE,CAAC;IAC1B,CAAC;IAED,gCAAgC;IAChC,IAAI,GAAG,IAAI,CAAC,OAAO,CAAC,SAAS,EAAE,MAAM,CAAC,CAAC;IACvC,IAAI,GAAG,IAAI,CAAC,OAAO,CAAC,SAAS,EAAE,GAAG,CAAC,CAAC;IAEpC,OAAO,IAAI,CAAC,IAAI,EAAE,CAAC;AACrB,CAAC;AAED;;;GAGG;AACH,MAAM,UAAU,cAAc,CAAC,IAAY;IACzC,OAAO,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;AACpC,CAAC"}
@@ -0,0 +1,17 @@
1
+ /**
2
+ * Extract structured metadata from HTML
3
+ */
4
+ import type { PageMetadata } from '../types.js';
5
+ /**
6
+ * Extract all links from page
7
+ * Returns absolute URLs, deduplicated
8
+ */
9
+ export declare function extractLinks(html: string, baseUrl: string): string[];
10
+ /**
11
+ * Extract all metadata from HTML
12
+ */
13
+ export declare function extractMetadata(html: string, _url: string): {
14
+ title: string;
15
+ metadata: PageMetadata;
16
+ };
17
+ //# sourceMappingURL=metadata.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"metadata.d.ts","sourceRoot":"","sources":["../../src/core/metadata.ts"],"names":[],"mappings":"AAAA;;GAEG;AAGH,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,aAAa,CAAC;AAsHhD;;;GAGG;AACH,wBAAgB,YAAY,CAAC,IAAI,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,GAAG,MAAM,EAAE,CA4BpE;AAED;;GAEG;AACH,wBAAgB,eAAe,CAAC,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,GAAG;IAAE,KAAK,EAAE,MAAM,CAAC;IAAC,QAAQ,EAAE,YAAY,CAAA;CAAE,CAarG"}
@@ -0,0 +1,159 @@
1
+ /**
2
+ * Extract structured metadata from HTML
3
+ */
4
+ import * as cheerio from 'cheerio';
5
+ /**
6
+ * Extract page title using fallback chain:
7
+ * og:title → twitter:title → title tag → h1
8
+ */
9
+ function extractTitle($) {
10
+ // Try Open Graph title
11
+ let title = $('meta[property="og:title"]').attr('content');
12
+ if (title)
13
+ return title.trim();
14
+ // Try Twitter title
15
+ title = $('meta[name="twitter:title"]').attr('content');
16
+ if (title)
17
+ return title.trim();
18
+ // Try title tag
19
+ title = $('title').text();
20
+ if (title)
21
+ return title.trim();
22
+ // Fallback to first h1
23
+ title = $('h1').first().text();
24
+ if (title)
25
+ return title.trim();
26
+ return '';
27
+ }
28
+ /**
29
+ * Extract page description using fallback chain:
30
+ * og:description → twitter:description → meta description
31
+ */
32
+ function extractDescription($) {
33
+ // Try Open Graph description
34
+ let desc = $('meta[property="og:description"]').attr('content');
35
+ if (desc)
36
+ return desc.trim();
37
+ // Try Twitter description
38
+ desc = $('meta[name="twitter:description"]').attr('content');
39
+ if (desc)
40
+ return desc.trim();
41
+ // Try standard meta description
42
+ desc = $('meta[name="description"]').attr('content');
43
+ if (desc)
44
+ return desc.trim();
45
+ return undefined;
46
+ }
47
+ /**
48
+ * Extract author from meta tags
49
+ */
50
+ function extractAuthor($) {
51
+ // Try article:author
52
+ let author = $('meta[property="article:author"]').attr('content');
53
+ if (author)
54
+ return author.trim();
55
+ // Try author meta tag
56
+ author = $('meta[name="author"]').attr('content');
57
+ if (author)
58
+ return author.trim();
59
+ return undefined;
60
+ }
61
+ /**
62
+ * Extract published date from meta tags
63
+ * Returns ISO 8601 date string if found
64
+ */
65
+ function extractPublished($) {
66
+ // Try article:published_time
67
+ let published = $('meta[property="article:published_time"]').attr('content');
68
+ if (published) {
69
+ try {
70
+ return new Date(published).toISOString();
71
+ }
72
+ catch {
73
+ // Invalid date, continue
74
+ }
75
+ }
76
+ // Try datePublished schema.org
77
+ published = $('meta[itemprop="datePublished"]').attr('content');
78
+ if (published) {
79
+ try {
80
+ return new Date(published).toISOString();
81
+ }
82
+ catch {
83
+ // Invalid date, continue
84
+ }
85
+ }
86
+ return undefined;
87
+ }
88
+ /**
89
+ * Extract Open Graph image URL
90
+ */
91
+ function extractImage($) {
92
+ // Try og:image
93
+ let image = $('meta[property="og:image"]').attr('content');
94
+ if (image)
95
+ return image.trim();
96
+ // Try twitter:image
97
+ image = $('meta[name="twitter:image"]').attr('content');
98
+ if (image)
99
+ return image.trim();
100
+ return undefined;
101
+ }
102
+ /**
103
+ * Extract canonical URL
104
+ */
105
+ function extractCanonical($) {
106
+ const canonical = $('link[rel="canonical"]').attr('href');
107
+ if (canonical)
108
+ return canonical.trim();
109
+ // Fallback to og:url
110
+ const ogUrl = $('meta[property="og:url"]').attr('content');
111
+ if (ogUrl)
112
+ return ogUrl.trim();
113
+ return undefined;
114
+ }
115
+ /**
116
+ * Extract all links from page
117
+ * Returns absolute URLs, deduplicated
118
+ */
119
+ export function extractLinks(html, baseUrl) {
120
+ const $ = cheerio.load(html);
121
+ const links = new Set();
122
+ $('a[href]').each((_, elem) => {
123
+ const href = $(elem).attr('href');
124
+ if (!href)
125
+ return;
126
+ try {
127
+ const absoluteUrl = new URL(href, baseUrl);
128
+ // SECURITY: Only allow HTTP and HTTPS protocols
129
+ if (!['http:', 'https:'].includes(absoluteUrl.protocol)) {
130
+ return;
131
+ }
132
+ // Skip anchor links
133
+ if (absoluteUrl.hash && absoluteUrl.href === baseUrl + absoluteUrl.hash) {
134
+ return;
135
+ }
136
+ links.add(absoluteUrl.href);
137
+ }
138
+ catch {
139
+ // Invalid URL, skip
140
+ }
141
+ });
142
+ return Array.from(links).sort();
143
+ }
144
+ /**
145
+ * Extract all metadata from HTML
146
+ */
147
+ export function extractMetadata(html, _url) {
148
+ const $ = cheerio.load(html);
149
+ const title = extractTitle($);
150
+ const metadata = {
151
+ description: extractDescription($),
152
+ author: extractAuthor($),
153
+ published: extractPublished($),
154
+ image: extractImage($),
155
+ canonical: extractCanonical($),
156
+ };
157
+ return { title, metadata };
158
+ }
159
+ //# sourceMappingURL=metadata.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"metadata.js","sourceRoot":"","sources":["../../src/core/metadata.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,KAAK,OAAO,MAAM,SAAS,CAAC;AAGnC;;;GAGG;AACH,SAAS,YAAY,CAAC,CAAqB;IACzC,uBAAuB;IACvB,IAAI,KAAK,GAAG,CAAC,CAAC,2BAA2B,CAAC,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;IAC3D,IAAI,KAAK;QAAE,OAAO,KAAK,CAAC,IAAI,EAAE,CAAC;IAE/B,oBAAoB;IACpB,KAAK,GAAG,CAAC,CAAC,4BAA4B,CAAC,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;IACxD,IAAI,KAAK;QAAE,OAAO,KAAK,CAAC,IAAI,EAAE,CAAC;IAE/B,gBAAgB;IAChB,KAAK,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,IAAI,EAAE,CAAC;IAC1B,IAAI,KAAK;QAAE,OAAO,KAAK,CAAC,IAAI,EAAE,CAAC;IAE/B,uBAAuB;IACvB,KAAK,GAAG,CAAC,CAAC,IAAI,CAAC,CAAC,KAAK,EAAE,CAAC,IAAI,EAAE,CAAC;IAC/B,IAAI,KAAK;QAAE,OAAO,KAAK,CAAC,IAAI,EAAE,CAAC;IAE/B,OAAO,EAAE,CAAC;AACZ,CAAC;AAED;;;GAGG;AACH,SAAS,kBAAkB,CAAC,CAAqB;IAC/C,6BAA6B;IAC7B,IAAI,IAAI,GAAG,CAAC,CAAC,iCAAiC,CAAC,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;IAChE,IAAI,IAAI;QAAE,OAAO,IAAI,CAAC,IAAI,EAAE,CAAC;IAE7B,0BAA0B;IAC1B,IAAI,GAAG,CAAC,CAAC,kCAAkC,CAAC,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;IAC7D,IAAI,IAAI;QAAE,OAAO,IAAI,CAAC,IAAI,EAAE,CAAC;IAE7B,gCAAgC;IAChC,IAAI,GAAG,CAAC,CAAC,0BAA0B,CAAC,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;IACrD,IAAI,IAAI;QAAE,OAAO,IAAI,CAAC,IAAI,EAAE,CAAC;IAE7B,OAAO,SAAS,CAAC;AACnB,CAAC;AAED;;GAEG;AACH,SAAS,aAAa,CAAC,CAAqB;IAC1C,qBAAqB;IACrB,IAAI,MAAM,GAAG,CAAC,CAAC,iCAAiC,CAAC,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;IAClE,IAAI,MAAM;QAAE,OAAO,MAAM,CAAC,IAAI,EAAE,CAAC;IAEjC,sBAAsB;IACtB,MAAM,GAAG,CAAC,CAAC,qBAAqB,CAAC,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;IAClD,IAAI,MAAM;QAAE,OAAO,MAAM,CAAC,IAAI,EAAE,CAAC;IAEjC,OAAO,SAAS,CAAC;AACnB,CAAC;AAED;;;GAGG;AACH,SAAS,gBAAgB,CAAC,CAAqB;IAC7C,6BAA6B;IAC7B,IAAI,SAAS,GAAG,CAAC,CAAC,yCAAyC,CAAC,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;IAC7E,IAAI,SAAS,EAAE,CAAC;QACd,IAAI,CAAC;YACH,OAAO,IAAI,IAAI,CAAC,SAAS,CAAC,CAAC,WAAW,EAAE,CAAC;QAC3C,CAAC;QAAC,MAAM,CAAC;YACP,yBAAyB;QAC3B,CAAC;IACH,CAAC;IAED,+BAA+B;IAC/B,SAAS,GAAG,CAAC,CAAC,gCAAgC,CAAC,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;IAChE,IAAI,SAAS,EAAE,CAAC;QACd,IAAI,CAAC;YACH,OAAO,IAAI,IAAI,CAAC,SAAS,CAAC,CAAC,WAAW,EAAE,CAAC;QAC3C,CAAC;QAAC,MAAM,CAAC;YACP,yBAAyB;QAC3B,CAAC;IACH,CAAC;IAED,OAAO,SAAS,CAAC;AACnB,CAAC;AAED;;GAEG;AACH,SAAS,YAAY,CAAC,CAAqB;IACzC,eAAe;IACf,IAAI,KAAK,GAAG,CAAC,CAAC,2BAA2B,CAAC,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;IAC3D,IAAI,KAAK;QAAE,OAAO,KAAK,CAAC,IAAI,EAAE,CAAC;IAE/B,oBAAoB;IACpB,KAAK,GAAG,CAAC,CAAC,4BAA4B,CAAC,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;IACxD,IAAI,KAAK;QAAE,OAAO,KAAK,CAAC,IAAI,EAAE,CAAC;IAE/B,OAAO,SAAS,CAAC;AACnB,CAAC;AAED;;GAEG;AACH,SAAS,gBAAgB,CAAC,CAAqB;IAC7C,MAAM,SAAS,GAAG,CAAC,CAAC,uBAAuB,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;IAC1D,IAAI,SAAS;QAAE,OAAO,SAAS,CAAC,IAAI,EAAE,CAAC;IAEvC,qBAAqB;IACrB,MAAM,KAAK,GAAG,CAAC,CAAC,yBAAyB,CAAC,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;IAC3D,IAAI,KAAK;QAAE,OAAO,KAAK,CAAC,IAAI,EAAE,CAAC;IAE/B,OAAO,SAAS,CAAC;AACnB,CAAC;AAED;;;GAGG;AACH,MAAM,UAAU,YAAY,CAAC,IAAY,EAAE,OAAe;IACxD,MAAM,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAC7B,MAAM,KAAK,GAAG,IAAI,GAAG,EAAU,CAAC;IAEhC,CAAC,CAAC,SAAS,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,IAAI,EAAE,EAAE;QAC5B,MAAM,IAAI,GAAG,CAAC,CAAC,IAAI,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;QAClC,IAAI,CAAC,IAAI;YAAE,OAAO;QAElB,IAAI,CAAC;YACH,MAAM,WAAW,GAAG,IAAI,GAAG,CAAC,IAAI,EAAE,OAAO,CAAC,CAAC;YAE3C,gDAAgD;YAChD,IAAI,CAAC,CAAC,OAAO,EAAE,QAAQ,CAAC,CAAC,QAAQ,CAAC,WAAW,CAAC,QAAQ,CAAC,EAAE,CAAC;gBACxD,OAAO;YACT,CAAC;YAED,oBAAoB;YACpB,IAAI,WAAW,CAAC,IAAI,IAAI,WAAW,CAAC,IAAI,KAAK,OAAO,GAAG,WAAW,CAAC,IAAI,EAAE,CAAC;gBACxE,OAAO;YACT,CAAC;YAED,KAAK,CAAC,GAAG,CAAC,WAAW,CAAC,IAAI,CAAC,CAAC;QAC9B,CAAC;QAAC,MAAM,CAAC;YACP,oBAAoB;QACtB,CAAC;IACH,CAAC,CAAC,CAAC;IAEH,OAAO,KAAK,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,IAAI,EAAE,CAAC;AAClC,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,eAAe,CAAC,IAAY,EAAE,IAAY;IACxD,MAAM,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAE7B,MAAM,KAAK,GAAG,YAAY,CAAC,CAAC,CAAC,CAAC;IAC9B,MAAM,QAAQ,GAAiB;QAC7B,WAAW,EAAE,kBAAkB,CAAC,CAAC,CAAC;QAClC,MAAM,EAAE,aAAa,CAAC,CAAC,CAAC;QACxB,SAAS,EAAE,gBAAgB,CAAC,CAAC,CAAC;QAC9B,KAAK,EAAE,YAAY,CAAC,CAAC,CAAC;QACtB,SAAS,EAAE,gBAAgB,CAAC,CAAC,CAAC;KAC/B,CAAC;IAEF,OAAO,EAAE,KAAK,EAAE,QAAQ,EAAE,CAAC;AAC7B,CAAC"}
@@ -0,0 +1,30 @@
1
+ /**
2
+ * Smart escalation strategy: try simple fetch first, escalate to browser if needed
3
+ */
4
+ import { type FetchResult } from './fetcher.js';
5
+ export interface StrategyOptions {
6
+ /** Force browser mode (skip simple fetch) */
7
+ forceBrowser?: boolean;
8
+ /** Wait time after page load in browser mode (ms) */
9
+ waitMs?: number;
10
+ /** Custom user agent */
11
+ userAgent?: string;
12
+ /** Request timeout (ms) */
13
+ timeoutMs?: number;
14
+ }
15
+ export interface StrategyResult extends FetchResult {
16
+ /** Which strategy succeeded: 'simple' | 'browser' */
17
+ method: 'simple' | 'browser';
18
+ }
19
+ /**
20
+ * Smart fetch with automatic escalation
21
+ *
22
+ * Strategy:
23
+ * 1. Try simple HTTP fetch first (fast, ~200ms)
24
+ * 2. If blocked (403, 503, Cloudflare, empty body) → try browser
25
+ * 3. If browser encounters Cloudflare challenge → wait 5s and retry
26
+ *
27
+ * Returns the result along with which method worked
28
+ */
29
+ export declare function smartFetch(url: string, options?: StrategyOptions): Promise<StrategyResult>;
30
+ //# sourceMappingURL=strategies.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"strategies.d.ts","sourceRoot":"","sources":["../../src/core/strategies.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,EAAyC,KAAK,WAAW,EAAE,MAAM,cAAc,CAAC;AAGvF,MAAM,WAAW,eAAe;IAC9B,6CAA6C;IAC7C,YAAY,CAAC,EAAE,OAAO,CAAC;IACvB,qDAAqD;IACrD,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,wBAAwB;IACxB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,2BAA2B;IAC3B,SAAS,CAAC,EAAE,MAAM,CAAC;CACpB;AAED,MAAM,WAAW,cAAe,SAAQ,WAAW;IACjD,qDAAqD;IACrD,MAAM,EAAE,QAAQ,GAAG,SAAS,CAAC;CAC9B;AAED;;;;;;;;;GASG;AACH,wBAAsB,UAAU,CAAC,GAAG,EAAE,MAAM,EAAE,OAAO,GAAE,eAAoB,GAAG,OAAO,CAAC,cAAc,CAAC,CAuDpG"}
@@ -0,0 +1,67 @@
1
+ /**
2
+ * Smart escalation strategy: try simple fetch first, escalate to browser if needed
3
+ */
4
+ import { simpleFetch, browserFetch, retryFetch } from './fetcher.js';
5
+ import { BlockedError, NetworkError } from '../types.js';
6
+ /**
7
+ * Smart fetch with automatic escalation
8
+ *
9
+ * Strategy:
10
+ * 1. Try simple HTTP fetch first (fast, ~200ms)
11
+ * 2. If blocked (403, 503, Cloudflare, empty body) → try browser
12
+ * 3. If browser encounters Cloudflare challenge → wait 5s and retry
13
+ *
14
+ * Returns the result along with which method worked
15
+ */
16
+ export async function smartFetch(url, options = {}) {
17
+ const { forceBrowser = false, waitMs = 0, userAgent, timeoutMs = 30000 } = options;
18
+ // Strategy 1: Simple fetch (unless browser is forced)
19
+ if (!forceBrowser) {
20
+ try {
21
+ const result = await retryFetch(() => simpleFetch(url, userAgent, timeoutMs), 3);
22
+ return {
23
+ ...result,
24
+ method: 'simple',
25
+ };
26
+ }
27
+ catch (error) {
28
+ // If blocked or needs JS, escalate to browser
29
+ if (error instanceof BlockedError) {
30
+ // Fall through to browser strategy
31
+ }
32
+ else {
33
+ // Re-throw other errors (timeout, network errors)
34
+ throw error;
35
+ }
36
+ }
37
+ }
38
+ // Strategy 2: Browser fetch
39
+ try {
40
+ const result = await browserFetch(url, {
41
+ userAgent,
42
+ waitMs,
43
+ timeoutMs,
44
+ });
45
+ return {
46
+ ...result,
47
+ method: 'browser',
48
+ };
49
+ }
50
+ catch (error) {
51
+ // If browser encounters Cloudflare, retry with extra wait time
52
+ if (error instanceof NetworkError &&
53
+ error.message.toLowerCase().includes('cloudflare')) {
54
+ const result = await browserFetch(url, {
55
+ userAgent,
56
+ waitMs: 5000, // Wait 5s for Cloudflare challenge
57
+ timeoutMs,
58
+ });
59
+ return {
60
+ ...result,
61
+ method: 'browser',
62
+ };
63
+ }
64
+ throw error;
65
+ }
66
+ }
67
+ //# sourceMappingURL=strategies.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"strategies.js","sourceRoot":"","sources":["../../src/core/strategies.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,EAAE,WAAW,EAAE,YAAY,EAAE,UAAU,EAAoB,MAAM,cAAc,CAAC;AACvF,OAAO,EAAE,YAAY,EAAE,YAAY,EAAE,MAAM,aAAa,CAAC;AAkBzD;;;;;;;;;GASG;AACH,MAAM,CAAC,KAAK,UAAU,UAAU,CAAC,GAAW,EAAE,UAA2B,EAAE;IACzE,MAAM,EAAE,YAAY,GAAG,KAAK,EAAE,MAAM,GAAG,CAAC,EAAE,SAAS,EAAE,SAAS,GAAG,KAAK,EAAE,GAAG,OAAO,CAAC;IAEnF,sDAAsD;IACtD,IAAI,CAAC,YAAY,EAAE,CAAC;QAClB,IAAI,CAAC;YACH,MAAM,MAAM,GAAG,MAAM,UAAU,CAC7B,GAAG,EAAE,CAAC,WAAW,CAAC,GAAG,EAAE,SAAS,EAAE,SAAS,CAAC,EAC5C,CAAC,CACF,CAAC;YACF,OAAO;gBACL,GAAG,MAAM;gBACT,MAAM,EAAE,QAAQ;aACjB,CAAC;QACJ,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,8CAA8C;YAC9C,IAAI,KAAK,YAAY,YAAY,EAAE,CAAC;gBAClC,mCAAmC;YACrC,CAAC;iBAAM,CAAC;gBACN,kDAAkD;gBAClD,MAAM,KAAK,CAAC;YACd,CAAC;QACH,CAAC;IACH,CAAC;IAED,4BAA4B;IAC5B,IAAI,CAAC;QACH,MAAM,MAAM,GAAG,MAAM,YAAY,CAAC,GAAG,EAAE;YACrC,SAAS;YACT,MAAM;YACN,SAAS;SACV,CAAC,CAAC;QACH,OAAO;YACL,GAAG,MAAM;YACT,MAAM,EAAE,SAAS;SAClB,CAAC;IACJ,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,+DAA+D;QAC/D,IACE,KAAK,YAAY,YAAY;YAC7B,KAAK,CAAC,OAAO,CAAC,WAAW,EAAE,CAAC,QAAQ,CAAC,YAAY,CAAC,EAClD,CAAC;YACD,MAAM,MAAM,GAAG,MAAM,YAAY,CAAC,GAAG,EAAE;gBACrC,SAAS;gBACT,MAAM,EAAE,IAAI,EAAE,mCAAmC;gBACjD,SAAS;aACV,CAAC,CAAC;YACH,OAAO;gBACL,GAAG,MAAM;gBACT,MAAM,EAAE,SAAS;aAClB,CAAC;QACJ,CAAC;QAED,MAAM,KAAK,CAAC;IACd,CAAC;AACH,CAAC"}
@@ -0,0 +1,31 @@
1
+ /**
2
+ * WebPeel - Fast web fetcher for AI agents
3
+ *
4
+ * Main library export
5
+ */
6
+ import { cleanup } from './core/fetcher.js';
7
+ import type { PeelOptions, PeelResult } from './types.js';
8
+ export * from './types.js';
9
+ /**
10
+ * Fetch and extract content from a URL
11
+ *
12
+ * @param url - URL to fetch
13
+ * @param options - Fetch options
14
+ * @returns Extracted content and metadata
15
+ *
16
+ * @example
17
+ * ```typescript
18
+ * import { peel } from 'webpeel';
19
+ *
20
+ * const result = await peel('https://example.com');
21
+ * console.log(result.content); // Markdown content
22
+ * console.log(result.metadata); // Structured metadata
23
+ * ```
24
+ */
25
+ export declare function peel(url: string, options?: PeelOptions): Promise<PeelResult>;
26
+ /**
27
+ * Clean up any browser resources
28
+ * Call this when you're done using WebPeel
29
+ */
30
+ export { cleanup };
31
+ //# sourceMappingURL=index.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAKH,OAAO,EAAE,OAAO,EAAE,MAAM,mBAAmB,CAAC;AAC5C,OAAO,KAAK,EAAE,WAAW,EAAE,UAAU,EAAE,MAAM,YAAY,CAAC;AAE1D,cAAc,YAAY,CAAC;AAE3B;;;;;;;;;;;;;;;GAeG;AACH,wBAAsB,IAAI,CAAC,GAAG,EAAE,MAAM,EAAE,OAAO,GAAE,WAAgB,GAAG,OAAO,CAAC,UAAU,CAAC,CA4DtF;AAED;;;GAGG;AACH,OAAO,EAAE,OAAO,EAAE,CAAC"}
package/dist/index.js ADDED
@@ -0,0 +1,81 @@
1
+ /**
2
+ * WebPeel - Fast web fetcher for AI agents
3
+ *
4
+ * Main library export
5
+ */
6
+ import { smartFetch } from './core/strategies.js';
7
+ import { htmlToMarkdown, htmlToText, estimateTokens } from './core/markdown.js';
8
+ import { extractMetadata, extractLinks } from './core/metadata.js';
9
+ import { cleanup } from './core/fetcher.js';
10
+ export * from './types.js';
11
+ /**
12
+ * Fetch and extract content from a URL
13
+ *
14
+ * @param url - URL to fetch
15
+ * @param options - Fetch options
16
+ * @returns Extracted content and metadata
17
+ *
18
+ * @example
19
+ * ```typescript
20
+ * import { peel } from 'webpeel';
21
+ *
22
+ * const result = await peel('https://example.com');
23
+ * console.log(result.content); // Markdown content
24
+ * console.log(result.metadata); // Structured metadata
25
+ * ```
26
+ */
27
+ export async function peel(url, options = {}) {
28
+ const startTime = Date.now();
29
+ const { render = false, wait = 0, format = 'markdown', timeout = 30000, userAgent, } = options;
30
+ try {
31
+ // Fetch the page
32
+ const fetchResult = await smartFetch(url, {
33
+ forceBrowser: render,
34
+ waitMs: wait,
35
+ userAgent,
36
+ timeoutMs: timeout,
37
+ });
38
+ // Extract metadata and title
39
+ const { title, metadata } = extractMetadata(fetchResult.html, fetchResult.url);
40
+ // Extract links
41
+ const links = extractLinks(fetchResult.html, fetchResult.url);
42
+ // Convert content to requested format
43
+ let content;
44
+ switch (format) {
45
+ case 'html':
46
+ content = fetchResult.html;
47
+ break;
48
+ case 'text':
49
+ content = htmlToText(fetchResult.html);
50
+ break;
51
+ case 'markdown':
52
+ default:
53
+ content = htmlToMarkdown(fetchResult.html);
54
+ break;
55
+ }
56
+ // Calculate elapsed time and token estimate
57
+ const elapsed = Date.now() - startTime;
58
+ const tokens = estimateTokens(content);
59
+ return {
60
+ url: fetchResult.url,
61
+ title,
62
+ content,
63
+ metadata,
64
+ links,
65
+ tokens,
66
+ method: fetchResult.method,
67
+ elapsed,
68
+ };
69
+ }
70
+ catch (error) {
71
+ // Clean up browser resources on error
72
+ await cleanup();
73
+ throw error;
74
+ }
75
+ }
76
+ /**
77
+ * Clean up any browser resources
78
+ * Call this when you're done using WebPeel
79
+ */
80
+ export { cleanup };
81
+ //# sourceMappingURL=index.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAEH,OAAO,EAAE,UAAU,EAAE,MAAM,sBAAsB,CAAC;AAClD,OAAO,EAAE,cAAc,EAAE,UAAU,EAAE,cAAc,EAAE,MAAM,oBAAoB,CAAC;AAChF,OAAO,EAAE,eAAe,EAAE,YAAY,EAAE,MAAM,oBAAoB,CAAC;AACnE,OAAO,EAAE,OAAO,EAAE,MAAM,mBAAmB,CAAC;AAG5C,cAAc,YAAY,CAAC;AAE3B;;;;;;;;;;;;;;;GAeG;AACH,MAAM,CAAC,KAAK,UAAU,IAAI,CAAC,GAAW,EAAE,UAAuB,EAAE;IAC/D,MAAM,SAAS,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;IAE7B,MAAM,EACJ,MAAM,GAAG,KAAK,EACd,IAAI,GAAG,CAAC,EACR,MAAM,GAAG,UAAU,EACnB,OAAO,GAAG,KAAK,EACf,SAAS,GACV,GAAG,OAAO,CAAC;IAEZ,IAAI,CAAC;QACH,iBAAiB;QACjB,MAAM,WAAW,GAAG,MAAM,UAAU,CAAC,GAAG,EAAE;YACxC,YAAY,EAAE,MAAM;YACpB,MAAM,EAAE,IAAI;YACZ,SAAS;YACT,SAAS,EAAE,OAAO;SACnB,CAAC,CAAC;QAEH,6BAA6B;QAC7B,MAAM,EAAE,KAAK,EAAE,QAAQ,EAAE,GAAG,eAAe,CAAC,WAAW,CAAC,IAAI,EAAE,WAAW,CAAC,GAAG,CAAC,CAAC;QAE/E,gBAAgB;QAChB,MAAM,KAAK,GAAG,YAAY,CAAC,WAAW,CAAC,IAAI,EAAE,WAAW,CAAC,GAAG,CAAC,CAAC;QAE9D,sCAAsC;QACtC,IAAI,OAAe,CAAC;QACpB,QAAQ,MAAM,EAAE,CAAC;YACf,KAAK,MAAM;gBACT,OAAO,GAAG,WAAW,CAAC,IAAI,CAAC;gBAC3B,MAAM;YACR,KAAK,MAAM;gBACT,OAAO,GAAG,UAAU,CAAC,WAAW,CAAC,IAAI,CAAC,CAAC;gBACvC,MAAM;YACR,KAAK,UAAU,CAAC;YAChB;gBACE,OAAO,GAAG,cAAc,CAAC,WAAW,CAAC,IAAI,CAAC,CAAC;gBAC3C,MAAM;QACV,CAAC;QAED,4CAA4C;QAC5C,MAAM,OAAO,GAAG,IAAI,CAAC,GAAG,EAAE,GAAG,SAAS,CAAC;QACvC,MAAM,MAAM,GAAG,cAAc,CAAC,OAAO,CAAC,CAAC;QAEvC,OAAO;YACL,GAAG,EAAE,WAAW,CAAC,GAAG;YACpB,KAAK;YACL,OAAO;YACP,QAAQ;YACR,KAAK;YACL,MAAM;YACN,MAAM,EAAE,WAAW,CAAC,MAAM;YAC1B,OAAO;SACR,CAAC;IACJ,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,sCAAsC;QACtC,MAAM,OAAO,EAAE,CAAC;QAChB,MAAM,KAAK,CAAC;IACd,CAAC;AACH,CAAC;AAED;;;GAGG;AACH,OAAO,EAAE,OAAO,EAAE,CAAC"}
@@ -0,0 +1,7 @@
1
+ #!/usr/bin/env node
2
+ /**
3
+ * MCP Server for WebPeel
4
+ * Provides webpeel_fetch and webpeel_search tools for Claude Desktop / Cursor
5
+ */
6
+ export {};
7
+ //# sourceMappingURL=server.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"server.d.ts","sourceRoot":"","sources":["../../src/mcp/server.ts"],"names":[],"mappings":";AAEA;;;GAGG"}