webpeel 0.3.6 → 0.4.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +25 -4
- package/dist/cli-auth.d.ts +0 -16
- package/dist/cli-auth.d.ts.map +1 -1
- package/dist/cli-auth.js +0 -76
- package/dist/cli-auth.js.map +1 -1
- package/dist/cli.js +146 -4
- package/dist/cli.js.map +1 -1
- package/dist/core/actions.d.ts +15 -0
- package/dist/core/actions.d.ts.map +1 -0
- package/dist/core/actions.js +67 -0
- package/dist/core/actions.js.map +1 -0
- package/dist/core/crawler.d.ts +19 -0
- package/dist/core/crawler.d.ts.map +1 -1
- package/dist/core/crawler.js +68 -4
- package/dist/core/crawler.js.map +1 -1
- package/dist/core/extract.d.ts +10 -0
- package/dist/core/extract.d.ts.map +1 -0
- package/dist/core/extract.js +127 -0
- package/dist/core/extract.js.map +1 -0
- package/dist/core/fetcher.d.ts +9 -0
- package/dist/core/fetcher.d.ts.map +1 -1
- package/dist/core/fetcher.js +12 -4
- package/dist/core/fetcher.js.map +1 -1
- package/dist/core/map.d.ts +30 -0
- package/dist/core/map.d.ts.map +1 -0
- package/dist/core/map.js +61 -0
- package/dist/core/map.js.map +1 -0
- package/dist/core/markdown.d.ts +5 -0
- package/dist/core/markdown.d.ts.map +1 -1
- package/dist/core/markdown.js +40 -0
- package/dist/core/markdown.js.map +1 -1
- package/dist/core/pdf.d.ts +9 -0
- package/dist/core/pdf.d.ts.map +1 -0
- package/dist/core/pdf.js +24 -0
- package/dist/core/pdf.js.map +1 -0
- package/dist/core/sitemap.d.ts +24 -0
- package/dist/core/sitemap.d.ts.map +1 -0
- package/dist/core/sitemap.js +106 -0
- package/dist/core/sitemap.js.map +1 -0
- package/dist/core/strategies.d.ts +10 -0
- package/dist/core/strategies.d.ts.map +1 -1
- package/dist/core/strategies.js +4 -1
- package/dist/core/strategies.js.map +1 -1
- package/dist/index.d.ts +3 -1
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +28 -2
- package/dist/index.js.map +1 -1
- package/dist/mcp/server.js +255 -4
- package/dist/mcp/server.js.map +1 -1
- package/dist/types.d.ts +31 -0
- package/dist/types.d.ts.map +1 -1
- package/dist/types.js.map +1 -1
- package/package.json +4 -3
package/dist/core/map.js
ADDED
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Domain URL mapping
|
|
3
|
+
* Combines sitemap discovery with link crawling to discover all URLs on a domain
|
|
4
|
+
*/
|
|
5
|
+
import { discoverSitemap } from './sitemap.js';
|
|
6
|
+
import { peel } from '../index.js';
|
|
7
|
+
export async function mapDomain(startUrl, options = {}) {
|
|
8
|
+
const startTime = Date.now();
|
|
9
|
+
const { useSitemap = true, crawlHomepage = true, maxUrls = 5000, timeout = 10000, includePatterns = [], excludePatterns = [], } = options;
|
|
10
|
+
const urlObj = new URL(startUrl);
|
|
11
|
+
const domain = urlObj.hostname;
|
|
12
|
+
const allUrls = new Set();
|
|
13
|
+
let sitemapUrls = [];
|
|
14
|
+
// Compile filter patterns
|
|
15
|
+
const includeRegexes = includePatterns.map(p => new RegExp(p));
|
|
16
|
+
const excludeRegexes = excludePatterns.map(p => new RegExp(p));
|
|
17
|
+
function shouldInclude(url) {
|
|
18
|
+
if (excludeRegexes.some(r => r.test(url)))
|
|
19
|
+
return false;
|
|
20
|
+
if (includeRegexes.length > 0 && !includeRegexes.some(r => r.test(url)))
|
|
21
|
+
return false;
|
|
22
|
+
return true;
|
|
23
|
+
}
|
|
24
|
+
// Step 1: Sitemap discovery
|
|
25
|
+
if (useSitemap) {
|
|
26
|
+
const sitemap = await discoverSitemap(domain, { timeout, maxUrls });
|
|
27
|
+
sitemapUrls = sitemap.sitemapUrls;
|
|
28
|
+
for (const entry of sitemap.urls) {
|
|
29
|
+
if (allUrls.size >= maxUrls)
|
|
30
|
+
break;
|
|
31
|
+
if (shouldInclude(entry.url)) {
|
|
32
|
+
allUrls.add(entry.url);
|
|
33
|
+
}
|
|
34
|
+
}
|
|
35
|
+
}
|
|
36
|
+
// Step 2: Crawl homepage for additional links
|
|
37
|
+
if (crawlHomepage && allUrls.size < maxUrls) {
|
|
38
|
+
try {
|
|
39
|
+
const result = await peel(startUrl, { timeout });
|
|
40
|
+
for (const link of result.links) {
|
|
41
|
+
if (allUrls.size >= maxUrls)
|
|
42
|
+
break;
|
|
43
|
+
try {
|
|
44
|
+
const linkUrl = new URL(link);
|
|
45
|
+
if (linkUrl.hostname === domain && shouldInclude(link)) {
|
|
46
|
+
allUrls.add(link);
|
|
47
|
+
}
|
|
48
|
+
}
|
|
49
|
+
catch { /* skip invalid URLs */ }
|
|
50
|
+
}
|
|
51
|
+
}
|
|
52
|
+
catch { /* skip homepage crawl errors */ }
|
|
53
|
+
}
|
|
54
|
+
return {
|
|
55
|
+
urls: Array.from(allUrls).sort(),
|
|
56
|
+
sitemapUrls,
|
|
57
|
+
total: allUrls.size,
|
|
58
|
+
elapsed: Date.now() - startTime,
|
|
59
|
+
};
|
|
60
|
+
}
|
|
61
|
+
//# sourceMappingURL=map.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"map.js","sourceRoot":"","sources":["../../src/core/map.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAEH,OAAO,EAAE,eAAe,EAAE,MAAM,cAAc,CAAC;AAC/C,OAAO,EAAE,IAAI,EAAE,MAAM,aAAa,CAAC;AA4BnC,MAAM,CAAC,KAAK,UAAU,SAAS,CAAC,QAAgB,EAAE,UAAsB,EAAE;IACxE,MAAM,SAAS,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;IAC7B,MAAM,EACJ,UAAU,GAAG,IAAI,EACjB,aAAa,GAAG,IAAI,EACpB,OAAO,GAAG,IAAI,EACd,OAAO,GAAG,KAAK,EACf,eAAe,GAAG,EAAE,EACpB,eAAe,GAAG,EAAE,GACrB,GAAG,OAAO,CAAC;IAEZ,MAAM,MAAM,GAAG,IAAI,GAAG,CAAC,QAAQ,CAAC,CAAC;IACjC,MAAM,MAAM,GAAG,MAAM,CAAC,QAAQ,CAAC;IAC/B,MAAM,OAAO,GAAG,IAAI,GAAG,EAAU,CAAC;IAClC,IAAI,WAAW,GAAa,EAAE,CAAC;IAE/B,0BAA0B;IAC1B,MAAM,cAAc,GAAG,eAAe,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,IAAI,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC;IAC/D,MAAM,cAAc,GAAG,eAAe,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,IAAI,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC;IAE/D,SAAS,aAAa,CAAC,GAAW;QAChC,IAAI,cAAc,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;YAAE,OAAO,KAAK,CAAC;QACxD,IAAI,cAAc,CAAC,MAAM,GAAG,CAAC,IAAI,CAAC,cAAc,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;YAAE,OAAO,KAAK,CAAC;QACtF,OAAO,IAAI,CAAC;IACd,CAAC;IAED,4BAA4B;IAC5B,IAAI,UAAU,EAAE,CAAC;QACf,MAAM,OAAO,GAAG,MAAM,eAAe,CAAC,MAAM,EAAE,EAAE,OAAO,EAAE,OAAO,EAAE,CAAC,CAAC;QACpE,WAAW,GAAG,OAAO,CAAC,WAAW,CAAC;QAClC,KAAK,MAAM,KAAK,IAAI,OAAO,CAAC,IAAI,EAAE,CAAC;YACjC,IAAI,OAAO,CAAC,IAAI,IAAI,OAAO;gBAAE,MAAM;YACnC,IAAI,aAAa,CAAC,KAAK,CAAC,GAAG,CAAC,EAAE,CAAC;gBAC7B,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC;YACzB,CAAC;QACH,CAAC;IACH,CAAC;IAED,8CAA8C;IAC9C,IAAI,aAAa,IAAI,OAAO,CAAC,IAAI,GAAG,OAAO,EAAE,CAAC;QAC5C,IAAI,CAAC;YACH,MAAM,MAAM,GAAG,MAAM,IAAI,CAAC,QAAQ,EAAE,EAAE,OAAO,EAAE,CAAC,CAAC;YACjD,KAAK,MAAM,IAAI,IAAI,MAAM,CAAC,KAAK,EAAE,CAAC;gBAChC,IAAI,OAAO,CAAC,IAAI,IAAI,OAAO;oBAAE,MAAM;gBACnC,IAAI,CAAC;oBACH,MAAM,OAAO,GAAG,IAAI,GAAG,CAAC,IAAI,CAAC,CAAC;oBAC9B,IAAI,OAAO,CAAC,QAAQ,KAAK,MAAM,IAAI,aAAa,CAAC,IAAI,CAAC,EAAE,CAAC;wBACvD,OAAO,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC;oBACpB,CAAC;gBACH,CAAC;gBAAC,MAAM,CAAC,CAAC,uBAAuB,CAAC,CAAC;YACrC,CAAC;QACH,CAAC;QAAC,MAAM,CAAC,CAAC,gCAAgC,CAAC,CAAC;IAC9C,CAAC;IAED,OAAO;QACL,IAAI,EAAE,KAAK,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC,IAAI,EAAE;QAChC,WAAW;QACX,KAAK,EAAE,OAAO,CAAC,IAAI;QACnB,OAAO,EAAE,IAAI,CAAC,GAAG,EAAE,GAAG,SAAS;KAChC,CAAC;AACJ,CAAC"}
|
package/dist/core/markdown.d.ts
CHANGED
|
@@ -35,4 +35,9 @@ export declare function htmlToText(html: string): string;
|
|
|
35
35
|
* Rule of thumb: 1 token ≈ 4 characters for English text
|
|
36
36
|
*/
|
|
37
37
|
export declare function estimateTokens(text: string): number;
|
|
38
|
+
/**
|
|
39
|
+
* Truncate content to fit within a token budget
|
|
40
|
+
* Intelligently preserves structure (headings, first paragraph)
|
|
41
|
+
*/
|
|
42
|
+
export declare function truncateToTokenBudget(content: string, maxTokens: number): string;
|
|
38
43
|
//# sourceMappingURL=markdown.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"markdown.d.ts","sourceRoot":"","sources":["../../src/core/markdown.ts"],"names":[],"mappings":"AAAA;;GAEG;AAuCH;;;GAGG;AACH,wBAAgB,aAAa,CAAC,IAAI,EAAE,MAAM,EAAE,QAAQ,EAAE,MAAM,EAAE,OAAO,CAAC,EAAE,MAAM,EAAE,GAAG,MAAM,CAiBxF;AAmDD;;;GAGG;AACH,wBAAgB,iBAAiB,CAAC,IAAI,EAAE,MAAM,GAAG;IAAE,IAAI,EAAE,MAAM,CAAC;IAAC,QAAQ,EAAE,OAAO,CAAA;CAAE,CAqCnF;AAED;;;GAGG;AACH,wBAAgB,gBAAgB,CAAC,OAAO,EAAE,MAAM,EAAE,YAAY,EAAE,MAAM,GAAG,MAAM,CAqC9E;AAED;;;GAGG;AACH,wBAAgB,cAAc,CAAC,IAAI,EAAE,MAAM,EAAE,QAAQ,CAAC,EAAE;IAAE,GAAG,CAAC,EAAE,OAAO,CAAA;CAAE,GAAG,MAAM,CA4DjF;AAED;;GAEG;AACH,wBAAgB,UAAU,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,CAuB/C;AAED;;;GAGG;AACH,wBAAgB,cAAc,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,CAEnD"}
|
|
1
|
+
{"version":3,"file":"markdown.d.ts","sourceRoot":"","sources":["../../src/core/markdown.ts"],"names":[],"mappings":"AAAA;;GAEG;AAuCH;;;GAGG;AACH,wBAAgB,aAAa,CAAC,IAAI,EAAE,MAAM,EAAE,QAAQ,EAAE,MAAM,EAAE,OAAO,CAAC,EAAE,MAAM,EAAE,GAAG,MAAM,CAiBxF;AAmDD;;;GAGG;AACH,wBAAgB,iBAAiB,CAAC,IAAI,EAAE,MAAM,GAAG;IAAE,IAAI,EAAE,MAAM,CAAC;IAAC,QAAQ,EAAE,OAAO,CAAA;CAAE,CAqCnF;AAED;;;GAGG;AACH,wBAAgB,gBAAgB,CAAC,OAAO,EAAE,MAAM,EAAE,YAAY,EAAE,MAAM,GAAG,MAAM,CAqC9E;AAED;;;GAGG;AACH,wBAAgB,cAAc,CAAC,IAAI,EAAE,MAAM,EAAE,QAAQ,CAAC,EAAE;IAAE,GAAG,CAAC,EAAE,OAAO,CAAA;CAAE,GAAG,MAAM,CA4DjF;AAED;;GAEG;AACH,wBAAgB,UAAU,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,CAuB/C;AAED;;;GAGG;AACH,wBAAgB,cAAc,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,CAEnD;AAED;;;GAGG;AACH,wBAAgB,qBAAqB,CAAC,OAAO,EAAE,MAAM,EAAE,SAAS,EAAE,MAAM,GAAG,MAAM,CA4ChF"}
|
package/dist/core/markdown.js
CHANGED
|
@@ -256,4 +256,44 @@ export function htmlToText(html) {
|
|
|
256
256
|
export function estimateTokens(text) {
|
|
257
257
|
return Math.ceil(text.length / 4);
|
|
258
258
|
}
|
|
259
|
+
/**
|
|
260
|
+
* Truncate content to fit within a token budget
|
|
261
|
+
* Intelligently preserves structure (headings, first paragraph)
|
|
262
|
+
*/
|
|
263
|
+
export function truncateToTokenBudget(content, maxTokens) {
|
|
264
|
+
const currentTokens = estimateTokens(content);
|
|
265
|
+
// If under budget, return as-is
|
|
266
|
+
if (currentTokens <= maxTokens) {
|
|
267
|
+
return content;
|
|
268
|
+
}
|
|
269
|
+
// Split into lines
|
|
270
|
+
const lines = content.split('\n');
|
|
271
|
+
// Build truncated content
|
|
272
|
+
const result = [];
|
|
273
|
+
let currentTokenCount = 0;
|
|
274
|
+
let foundFirstHeading = false;
|
|
275
|
+
for (const line of lines) {
|
|
276
|
+
const lineTokens = estimateTokens(line);
|
|
277
|
+
const isHeading = /^#{1,6}\s/.test(line);
|
|
278
|
+
// Always include the first heading
|
|
279
|
+
if (!foundFirstHeading && isHeading) {
|
|
280
|
+
result.push(line);
|
|
281
|
+
currentTokenCount += lineTokens;
|
|
282
|
+
foundFirstHeading = true;
|
|
283
|
+
continue;
|
|
284
|
+
}
|
|
285
|
+
// Check if adding this line would exceed budget
|
|
286
|
+
if (currentTokenCount + lineTokens > maxTokens) {
|
|
287
|
+
// Stop here
|
|
288
|
+
break;
|
|
289
|
+
}
|
|
290
|
+
// Add the line
|
|
291
|
+
result.push(line);
|
|
292
|
+
currentTokenCount += lineTokens;
|
|
293
|
+
}
|
|
294
|
+
// Add truncation notice
|
|
295
|
+
result.push('');
|
|
296
|
+
result.push(`[Content truncated to ~${maxTokens} tokens]`);
|
|
297
|
+
return result.join('\n');
|
|
298
|
+
}
|
|
259
299
|
//# sourceMappingURL=markdown.js.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"markdown.js","sourceRoot":"","sources":["../../src/core/markdown.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,eAAe,MAAM,UAAU,CAAC;AACvC,OAAO,KAAK,OAAO,MAAM,SAAS,CAAC;AAEnC,MAAM,cAAc,GAAG;IACrB,4BAA4B;IAC5B,QAAQ,EAAE,OAAO,EAAE,UAAU,EAAE,QAAQ,EAAE,wBAAwB;IACjE,yBAAyB;IACzB,KAAK,EAAE,QAAQ,EAAE,QAAQ,EAAE,OAAO;IAClC,qBAAqB,EAAE,iBAAiB,EAAE,sBAAsB;IAChE,wBAAwB,EAAE,iBAAiB;IAC3C,UAAU,EAAE,SAAS,EAAE,UAAU,EAAE,WAAW,EAAE,WAAW;IAC3D,aAAa,EAAE,cAAc,EAAE,uBAAuB;IACtD,aAAa,EAAE,uBAAuB;IACtC,iBAAiB;IACjB,gBAAgB,EAAE,KAAK,EAAE,gBAAgB,EAAE,aAAa;IACxD,mBAAmB,EAAE,oBAAoB,EAAE,kBAAkB;IAC7D,mBAAmB;IACnB,gBAAgB,EAAE,gBAAgB,EAAE,iBAAiB;IACrD,mBAAmB,EAAE,gBAAgB;IACrC,oBAAoB,EAAE,iBAAiB;IACvC,0BAA0B;IAC1B,mBAAmB,EAAE,kBAAkB,EAAE,kBAAkB;IAC3D,oBAAoB,EAAE,6BAA6B;IACnD,mBAAmB;IACnB,eAAe,EAAE,kBAAkB,EAAE,mBAAmB;IACxD,mBAAmB;IACnB,oBAAoB,EAAE,uBAAuB,EAAE,sBAAsB;IACrE,gBAAgB,EAAE,2BAA2B,EAAE,mBAAmB;IAClE,kBAAkB;IAClB,gBAAgB,EAAE,oBAAoB,EAAE,wBAAwB;IAChE,yBAAyB,EAAE,yBAAyB;IACpD,WAAW;IACX,WAAW,EAAE,WAAW,EAAE,oBAAoB;IAC9C,+CAA+C;IAC/C,kBAAkB;CACnB,CAAC;AAEF;;;GAGG;AACH,MAAM,UAAU,aAAa,CAAC,IAAY,EAAE,QAAgB,EAAE,OAAkB;IAC9E,MAAM,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAE7B,uBAAuB;IACvB,IAAI,OAAO,EAAE,MAAM,EAAE,CAAC;QACpB,OAAO,CAAC,OAAO,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,MAAM,EAAE,CAAC,CAAC;IAC1C,CAAC;IAED,2BAA2B;IAC3B,MAAM,QAAQ,GAAG,CAAC,CAAC,QAAQ,CAAC,CAAC;IAC7B,IAAI,QAAQ,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QAC1B,oDAAoD;QACpD,OAAO,IAAI,CAAC;IACd,CAAC;IAED,0CAA0C;IAC1C,OAAO,QAAQ,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC,CAAC,GAAG,EAAE,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;AAC9D,CAAC;AAED;;;GAGG;AACH,SAAS,SAAS,CAAC,IAAY;IAC7B,2CAA2C;IAC3C,IAAI,IAAI,CAAC,MAAM,GAAG,EAAE,GAAG,IAAI,GAAG,IAAI,EAAE,CAAC,CAAC,OAAO;QAC3C,MAAM,IAAI,KAAK,CAAC,sCAAsC,CAAC,CAAC;IAC1D,CAAC;IAED,MAAM,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAE7B,uBAAuB;IACvB,cAAc,CAAC,OAAO,CAAC,CAAC,QAAQ,EAAE,EAAE;QAClC,CAAC,CAAC,QAAQ,CAAC,CAAC,MAAM,EAAE,CAAC;IACvB,CAAC,CAAC,CAAC;IAEH,mCAAmC;IACnC,CAAC,CAAC,oBAAoB,CAAC,CAAC,MAAM,EAAE,CAAC;IAEjC,uCAAuC;IACvC,CAAC,CAAC,GAAG,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,IAAI,EAAE,EAAE;QACtB,MAAM,KAAK,GAAG,CAAC,CAAC,IAAI,CAAC,CAAC;QACtB,MAAM,IAAI,GAAG,KAAK,CAAC,IAAI,EAAE,CAAC,IAAI,EAAE,CAAC;QACjC,IAAI,CAAC,IAAI,IAAI,KAAK,CAAC,QAAQ,EAAE,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YAC3C,KAAK,CAAC,MAAM,EAAE,CAAC;QACjB,CAAC;IACH,CAAC,CAAC,CAAC;IAEH,OAAO,CAAC,CAAC,IAAI,EAAE,CAAC;AAClB,CAAC;AAED;;;GAGG;AACH,MAAM,sBAAsB,GAAG;IAC7B,sBAAsB;IACtB,cAAc;IACd,uBAAuB;IACvB,SAAS;IACT,eAAe;IACf,MAAM;IACN,eAAe,EAAE,kBAAkB,EAAE,eAAe,EAAE,gBAAgB;IACtE,YAAY,EAAE,aAAa,EAAE,eAAe;IAC5C,UAAU,EAAE,eAAe,EAAE,UAAU,EAAE,OAAO;IAChD,UAAU,EAAE,eAAe;CAC5B,CAAC;AAEF;;;GAGG;AACH,MAAM,UAAU,iBAAiB,CAAC,IAAY;IAC5C,MAAM,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAE7B,KAAK,MAAM,QAAQ,IAAI,sBAAsB,EAAE,CAAC;QAC9C,MAAM,EAAE,GAAG,CAAC,CAAC,QAAQ,CAAC,CAAC;QACvB,IAAI,EAAE,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YAClB,kEAAkE;YAClE,MAAM,IAAI,GAAG,EAAE,CAAC,KAAK,EAAE,CAAC,IAAI,EAAE,CAAC,IAAI,EAAE,CAAC;YACtC,IAAI,IAAI,CAAC,MAAM,IAAI,GAAG,EAAE,CAAC;gBACvB,OAAO,EAAE,IAAI,EAAE,CAAC,CAAC,IAAI,CAAC,EAAE,CAAC,KAAK,EAAE,CAAC,EAAE,QAAQ,EAAE,IAAI,EAAE,CAAC;YACtD,CAAC;QACH,CAAC;IACH,CAAC;IAED,wEAAwE;IACxE,IAAI,MAAM,GAAgC,IAAI,CAAC;IAC/C,IAAI,OAAO,GAAG,CAAC,CAAC;IAEhB,CAAC,CAAC,cAAc,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,IAAI,EAAE,EAAE;QACjC,MAAM,KAAK,GAAG,CAAC,CAAC,IAAI,CAAC,CAAC;QACtB,MAAM,IAAI,GAAG,KAAK,CAAC,IAAI,EAAE,CAAC,IAAI,EAAE,CAAC;QACjC,sEAAsE;QACtE,IAAI,IAAI,CAAC,MAAM,GAAG,OAAO,IAAI,IAAI,CAAC,MAAM,IAAI,GAAG,EAAE,CAAC;YAChD,6CAA6C;YAC7C,MAAM,MAAM,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC;YAC9B,IAAI,MAAM,CAAC,MAAM,IAAI,MAAM,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,IAAI,MAAM,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;gBAC9E,MAAM,GAAG,KAAK,CAAC;gBACf,OAAO,GAAG,IAAI,CAAC,MAAM,CAAC;YACxB,CAAC;QACH,CAAC;IACH,CAAC,CAAC,CAAC;IAEH,IAAI,MAAM,IAAI,OAAO,GAAG,GAAG,EAAE,CAAC;QAC5B,OAAO,EAAE,IAAI,EAAE,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,EAAE,QAAQ,EAAE,IAAI,EAAE,CAAC;IAClD,CAAC;IAED,OAAO,EAAE,IAAI,EAAE,QAAQ,EAAE,KAAK,EAAE,CAAC;AACnC,CAAC;AAED;;;GAGG;AACH,MAAM,UAAU,gBAAgB,CAAC,OAAe,EAAE,YAAoB;IACpE,IAAI,CAAC,OAAO,IAAI,OAAO,CAAC,MAAM,GAAG,EAAE;QAAE,OAAO,CAAC,CAAC;IAE9C,MAAM,UAAU,GAAG,OAAO,CAAC,MAAM,CAAC;IAClC,MAAM,OAAO,GAAG,YAAY,CAAC,MAAM,CAAC;IAEpC,uFAAuF;IACvF,MAAM,gBAAgB,GAAG,IAAI,CAAC,GAAG,CAAC,UAAU,GAAG,IAAI,CAAC,GAAG,CAAC,OAAO,EAAE,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC;IACxE,iEAAiE;IACjE,MAAM,gBAAgB,GAAG,gBAAgB,GAAG,IAAI,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC;QACtD,gBAAgB,GAAG,IAAI,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC;YAC/B,gBAAgB,GAAG,IAAI,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC;gBAC/B,gBAAgB,GAAG,IAAI,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,GAAG,CAAC;IAEtC,wEAAwE;IACxE,MAAM,QAAQ,GAAG,OAAO,CAAC,OAAO,CAAC,qBAAqB,EAAE,EAAE,CAAC,CAAC;IAC5D,MAAM,WAAW,GAAG,QAAQ,CAAC,IAAI,EAAE,CAAC,MAAM,GAAG,IAAI,CAAC,GAAG,CAAC,UAAU,EAAE,CAAC,CAAC,CAAC;IACrE,MAAM,YAAY,GAAG,IAAI,CAAC,GAAG,CAAC,WAAW,GAAG,GAAG,EAAE,CAAC,CAAC,CAAC;IAEpD,4DAA4D;IAC5D,MAAM,WAAW,GAAG,YAAY,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC;IACzD,MAAM,aAAa,GAAG,OAAO,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC;IAEjE,wCAAwC;IACxC,MAAM,WAAW,GAAG,UAAU,GAAG,EAAE,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC;QACzC,UAAU,GAAG,GAAG,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC;YACxB,UAAU,GAAG,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,GAAG,CAAC;IAEjC,mBAAmB;IACnB,MAAM,OAAO,GAAG,CACd,gBAAgB,GAAG,GAAG;QACtB,YAAY,GAAG,GAAG;QAClB,CAAC,WAAW,GAAG,aAAa,CAAC,GAAG,GAAG;QACnC,WAAW,GAAG,GAAG,CAClB,CAAC;IAEF,OAAO,IAAI,CAAC,KAAK,CAAC,OAAO,GAAG,GAAG,CAAC,GAAG,GAAG,CAAC;AACzC,CAAC;AAED;;;GAGG;AACH,MAAM,UAAU,cAAc,CAAC,IAAY,EAAE,QAA4B;IACvE,MAAM,WAAW,GAAG,SAAS,CAAC,IAAI,CAAC,CAAC;IAEpC,MAAM,QAAQ,GAAG,IAAI,eAAe,CAAC;QACnC,YAAY,EAAE,KAAK;QACnB,cAAc,EAAE,QAAQ;QACxB,gBAAgB,EAAE,GAAG;QACrB,WAAW,EAAE,GAAG;QAChB,eAAe,EAAE,IAAI;KACtB,CAAC,CAAC;IAEH,kBAAkB;IAClB,QAAQ,CAAC,IAAI,CAAC,CAAC,OAAO,EAAE,OAAO,EAAE,OAAO,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,CAAC,CAAC,CAAC;IAE7D,kDAAkD;IAClD,QAAQ,CAAC,OAAO,CAAC,QAAQ,EAAE;QACzB,MAAM,EAAE,KAAK;QACb,WAAW,EAAE,CAAC,QAAQ,EAAE,IAAI,EAAE,EAAE;YAC9B,MAAM,GAAG,GAAI,IAAY,CAAC,GAAG,CAAC;YAC9B,MAAM,GAAG,GAAI,IAAY,CAAC,GAAG,CAAC;YAC9B,IAAI,GAAG,EAAE,CAAC;gBACR,OAAO,KAAK,GAAG,KAAK,GAAG,GAAG,CAAC;YAC7B,CAAC;YACD,OAAO,EAAE,CAAC;QACZ,CAAC;KACF,CAAC,CAAC;IAEH,oCAAoC;IACpC,QAAQ,CAAC,OAAO,CAAC,YAAY,EAAE;QAC7B,MAAM,EAAE,CAAC,IAAI,EAAE,EAAE;YACf,OAAO,IAAI,CAAC,QAAQ,KAAK,KAAK,IAAI,IAAI,CAAC,UAAU,EAAE,QAAQ,KAAK,MAAM,CAAC;QACzE,CAAC;QACD,WAAW,EAAE,CAAC,QAAQ,EAAE,IAAI,EAAE,EAAE;YAC9B,MAAM,QAAQ,GAAG,IAAI,CAAC,UAAiB,CAAC;YACxC,MAAM,SAAS,GAAG,QAAQ,CAAC,YAAY,CAAC,OAAO,CAAC,IAAI,EAAE,CAAC;YACvD,MAAM,QAAQ,GAAG,SAAS,CAAC,KAAK,CAAC,gBAAgB,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;YAC9D,OAAO,SAAS,GAAG,QAAQ,GAAG,IAAI,GAAG,QAAQ,CAAC,WAAW,GAAG,WAAW,CAAC;QAC1E,CAAC;KACF,CAAC,CAAC;IAEH,IAAI,QAAQ,GAAG,QAAQ,CAAC,QAAQ,CAAC,WAAW,CAAC,CAAC;IAE9C,kEAAkE;IAClE,IAAI,QAAQ,CAAC,MAAM,GAAG,IAAI,GAAG,IAAI,EAAE,CAAC,CAAC,yBAAyB;QAC5D,QAAQ,GAAG,QAAQ,CAAC,KAAK,CAAC,CAAC,EAAE,IAAI,GAAG,IAAI,CAAC,CAAC;IAC5C,CAAC;IAED,8DAA8D;IAC9D,QAAQ,GAAG,QAAQ,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,IAAI,EAAE,CAAC,EAAE,GAAG,EAAE,EAAE;QAC3D,IAAI,CAAC,KAAK,CAAC;YAAE,OAAO,IAAI,CAAC;QACzB,MAAM,SAAS,GAAG,GAAG,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,IAAI,EAAE,KAAK,EAAE,CAAC;QAC3C,MAAM,SAAS,GAAG,IAAI,CAAC,IAAI,EAAE,KAAK,EAAE,CAAC;QACrC,IAAI,SAAS,IAAI,SAAS;YAAE,OAAO,GAAG,CAAC;QACvC,OAAO,GAAG,GAAG,IAAI,GAAG,IAAI,CAAC;IAC3B,CAAC,EAAE,EAAE,CAAC,CAAC;IAEP,qCAAqC;IACrC,QAAQ,GAAG,QAAQ,CAAC,IAAI,EAAE,CAAC;IAE3B,OAAO,QAAQ,CAAC;AAClB,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,UAAU,CAAC,IAAY;IACrC,MAAM,WAAW,GAAG,SAAS,CAAC,IAAI,CAAC,CAAC;IACpC,MAAM,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC;IAEpC,8CAA8C;IAC9C,IAAI,IAAI,GAAG,EAAE,CAAC;IACd,CAAC,CAAC,+BAA+B,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,IAAI,EAAE,EAAE;QAClD,MAAM,OAAO,GAAG,CAAC,CAAC,IAAI,CAAC,CAAC,IAAI,EAAE,CAAC,IAAI,EAAE,CAAC;QACtC,IAAI,OAAO,EAAE,CAAC;YACZ,IAAI,IAAI,OAAO,GAAG,MAAM,CAAC;QAC3B,CAAC;IACH,CAAC,CAAC,CAAC;IAEH,yDAAyD;IACzD,IAAI,CAAC,IAAI,CAAC,IAAI,EAAE,EAAE,CAAC;QACjB,IAAI,GAAG,CAAC,CAAC,MAAM,CAAC,CAAC,IAAI,EAAE,CAAC;IAC1B,CAAC;IAED,gCAAgC;IAChC,IAAI,GAAG,IAAI,CAAC,OAAO,CAAC,SAAS,EAAE,MAAM,CAAC,CAAC;IACvC,IAAI,GAAG,IAAI,CAAC,OAAO,CAAC,SAAS,EAAE,GAAG,CAAC,CAAC;IAEpC,OAAO,IAAI,CAAC,IAAI,EAAE,CAAC;AACrB,CAAC;AAED;;;GAGG;AACH,MAAM,UAAU,cAAc,CAAC,IAAY;IACzC,OAAO,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;AACpC,CAAC"}
|
|
1
|
+
{"version":3,"file":"markdown.js","sourceRoot":"","sources":["../../src/core/markdown.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,eAAe,MAAM,UAAU,CAAC;AACvC,OAAO,KAAK,OAAO,MAAM,SAAS,CAAC;AAEnC,MAAM,cAAc,GAAG;IACrB,4BAA4B;IAC5B,QAAQ,EAAE,OAAO,EAAE,UAAU,EAAE,QAAQ,EAAE,wBAAwB;IACjE,yBAAyB;IACzB,KAAK,EAAE,QAAQ,EAAE,QAAQ,EAAE,OAAO;IAClC,qBAAqB,EAAE,iBAAiB,EAAE,sBAAsB;IAChE,wBAAwB,EAAE,iBAAiB;IAC3C,UAAU,EAAE,SAAS,EAAE,UAAU,EAAE,WAAW,EAAE,WAAW;IAC3D,aAAa,EAAE,cAAc,EAAE,uBAAuB;IACtD,aAAa,EAAE,uBAAuB;IACtC,iBAAiB;IACjB,gBAAgB,EAAE,KAAK,EAAE,gBAAgB,EAAE,aAAa;IACxD,mBAAmB,EAAE,oBAAoB,EAAE,kBAAkB;IAC7D,mBAAmB;IACnB,gBAAgB,EAAE,gBAAgB,EAAE,iBAAiB;IACrD,mBAAmB,EAAE,gBAAgB;IACrC,oBAAoB,EAAE,iBAAiB;IACvC,0BAA0B;IAC1B,mBAAmB,EAAE,kBAAkB,EAAE,kBAAkB;IAC3D,oBAAoB,EAAE,6BAA6B;IACnD,mBAAmB;IACnB,eAAe,EAAE,kBAAkB,EAAE,mBAAmB;IACxD,mBAAmB;IACnB,oBAAoB,EAAE,uBAAuB,EAAE,sBAAsB;IACrE,gBAAgB,EAAE,2BAA2B,EAAE,mBAAmB;IAClE,kBAAkB;IAClB,gBAAgB,EAAE,oBAAoB,EAAE,wBAAwB;IAChE,yBAAyB,EAAE,yBAAyB;IACpD,WAAW;IACX,WAAW,EAAE,WAAW,EAAE,oBAAoB;IAC9C,+CAA+C;IAC/C,kBAAkB;CACnB,CAAC;AAEF;;;GAGG;AACH,MAAM,UAAU,aAAa,CAAC,IAAY,EAAE,QAAgB,EAAE,OAAkB;IAC9E,MAAM,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAE7B,uBAAuB;IACvB,IAAI,OAAO,EAAE,MAAM,EAAE,CAAC;QACpB,OAAO,CAAC,OAAO,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,MAAM,EAAE,CAAC,CAAC;IAC1C,CAAC;IAED,2BAA2B;IAC3B,MAAM,QAAQ,GAAG,CAAC,CAAC,QAAQ,CAAC,CAAC;IAC7B,IAAI,QAAQ,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QAC1B,oDAAoD;QACpD,OAAO,IAAI,CAAC;IACd,CAAC;IAED,0CAA0C;IAC1C,OAAO,QAAQ,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC,CAAC,GAAG,EAAE,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;AAC9D,CAAC;AAED;;;GAGG;AACH,SAAS,SAAS,CAAC,IAAY;IAC7B,2CAA2C;IAC3C,IAAI,IAAI,CAAC,MAAM,GAAG,EAAE,GAAG,IAAI,GAAG,IAAI,EAAE,CAAC,CAAC,OAAO;QAC3C,MAAM,IAAI,KAAK,CAAC,sCAAsC,CAAC,CAAC;IAC1D,CAAC;IAED,MAAM,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAE7B,uBAAuB;IACvB,cAAc,CAAC,OAAO,CAAC,CAAC,QAAQ,EAAE,EAAE;QAClC,CAAC,CAAC,QAAQ,CAAC,CAAC,MAAM,EAAE,CAAC;IACvB,CAAC,CAAC,CAAC;IAEH,mCAAmC;IACnC,CAAC,CAAC,oBAAoB,CAAC,CAAC,MAAM,EAAE,CAAC;IAEjC,uCAAuC;IACvC,CAAC,CAAC,GAAG,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,IAAI,EAAE,EAAE;QACtB,MAAM,KAAK,GAAG,CAAC,CAAC,IAAI,CAAC,CAAC;QACtB,MAAM,IAAI,GAAG,KAAK,CAAC,IAAI,EAAE,CAAC,IAAI,EAAE,CAAC;QACjC,IAAI,CAAC,IAAI,IAAI,KAAK,CAAC,QAAQ,EAAE,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YAC3C,KAAK,CAAC,MAAM,EAAE,CAAC;QACjB,CAAC;IACH,CAAC,CAAC,CAAC;IAEH,OAAO,CAAC,CAAC,IAAI,EAAE,CAAC;AAClB,CAAC;AAED;;;GAGG;AACH,MAAM,sBAAsB,GAAG;IAC7B,sBAAsB;IACtB,cAAc;IACd,uBAAuB;IACvB,SAAS;IACT,eAAe;IACf,MAAM;IACN,eAAe,EAAE,kBAAkB,EAAE,eAAe,EAAE,gBAAgB;IACtE,YAAY,EAAE,aAAa,EAAE,eAAe;IAC5C,UAAU,EAAE,eAAe,EAAE,UAAU,EAAE,OAAO;IAChD,UAAU,EAAE,eAAe;CAC5B,CAAC;AAEF;;;GAGG;AACH,MAAM,UAAU,iBAAiB,CAAC,IAAY;IAC5C,MAAM,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAE7B,KAAK,MAAM,QAAQ,IAAI,sBAAsB,EAAE,CAAC;QAC9C,MAAM,EAAE,GAAG,CAAC,CAAC,QAAQ,CAAC,CAAC;QACvB,IAAI,EAAE,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YAClB,kEAAkE;YAClE,MAAM,IAAI,GAAG,EAAE,CAAC,KAAK,EAAE,CAAC,IAAI,EAAE,CAAC,IAAI,EAAE,CAAC;YACtC,IAAI,IAAI,CAAC,MAAM,IAAI,GAAG,EAAE,CAAC;gBACvB,OAAO,EAAE,IAAI,EAAE,CAAC,CAAC,IAAI,CAAC,EAAE,CAAC,KAAK,EAAE,CAAC,EAAE,QAAQ,EAAE,IAAI,EAAE,CAAC;YACtD,CAAC;QACH,CAAC;IACH,CAAC;IAED,wEAAwE;IACxE,IAAI,MAAM,GAAgC,IAAI,CAAC;IAC/C,IAAI,OAAO,GAAG,CAAC,CAAC;IAEhB,CAAC,CAAC,cAAc,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,IAAI,EAAE,EAAE;QACjC,MAAM,KAAK,GAAG,CAAC,CAAC,IAAI,CAAC,CAAC;QACtB,MAAM,IAAI,GAAG,KAAK,CAAC,IAAI,EAAE,CAAC,IAAI,EAAE,CAAC;QACjC,sEAAsE;QACtE,IAAI,IAAI,CAAC,MAAM,GAAG,OAAO,IAAI,IAAI,CAAC,MAAM,IAAI,GAAG,EAAE,CAAC;YAChD,6CAA6C;YAC7C,MAAM,MAAM,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC;YAC9B,IAAI,MAAM,CAAC,MAAM,IAAI,MAAM,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,IAAI,MAAM,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;gBAC9E,MAAM,GAAG,KAAK,CAAC;gBACf,OAAO,GAAG,IAAI,CAAC,MAAM,CAAC;YACxB,CAAC;QACH,CAAC;IACH,CAAC,CAAC,CAAC;IAEH,IAAI,MAAM,IAAI,OAAO,GAAG,GAAG,EAAE,CAAC;QAC5B,OAAO,EAAE,IAAI,EAAE,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,EAAE,QAAQ,EAAE,IAAI,EAAE,CAAC;IAClD,CAAC;IAED,OAAO,EAAE,IAAI,EAAE,QAAQ,EAAE,KAAK,EAAE,CAAC;AACnC,CAAC;AAED;;;GAGG;AACH,MAAM,UAAU,gBAAgB,CAAC,OAAe,EAAE,YAAoB;IACpE,IAAI,CAAC,OAAO,IAAI,OAAO,CAAC,MAAM,GAAG,EAAE;QAAE,OAAO,CAAC,CAAC;IAE9C,MAAM,UAAU,GAAG,OAAO,CAAC,MAAM,CAAC;IAClC,MAAM,OAAO,GAAG,YAAY,CAAC,MAAM,CAAC;IAEpC,uFAAuF;IACvF,MAAM,gBAAgB,GAAG,IAAI,CAAC,GAAG,CAAC,UAAU,GAAG,IAAI,CAAC,GAAG,CAAC,OAAO,EAAE,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC;IACxE,iEAAiE;IACjE,MAAM,gBAAgB,GAAG,gBAAgB,GAAG,IAAI,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC;QACtD,gBAAgB,GAAG,IAAI,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC;YAC/B,gBAAgB,GAAG,IAAI,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC;gBAC/B,gBAAgB,GAAG,IAAI,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,GAAG,CAAC;IAEtC,wEAAwE;IACxE,MAAM,QAAQ,GAAG,OAAO,CAAC,OAAO,CAAC,qBAAqB,EAAE,EAAE,CAAC,CAAC;IAC5D,MAAM,WAAW,GAAG,QAAQ,CAAC,IAAI,EAAE,CAAC,MAAM,GAAG,IAAI,CAAC,GAAG,CAAC,UAAU,EAAE,CAAC,CAAC,CAAC;IACrE,MAAM,YAAY,GAAG,IAAI,CAAC,GAAG,CAAC,WAAW,GAAG,GAAG,EAAE,CAAC,CAAC,CAAC;IAEpD,4DAA4D;IAC5D,MAAM,WAAW,GAAG,YAAY,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC;IACzD,MAAM,aAAa,GAAG,OAAO,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC;IAEjE,wCAAwC;IACxC,MAAM,WAAW,GAAG,UAAU,GAAG,EAAE,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC;QACzC,UAAU,GAAG,GAAG,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC;YACxB,UAAU,GAAG,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,GAAG,CAAC;IAEjC,mBAAmB;IACnB,MAAM,OAAO,GAAG,CACd,gBAAgB,GAAG,GAAG;QACtB,YAAY,GAAG,GAAG;QAClB,CAAC,WAAW,GAAG,aAAa,CAAC,GAAG,GAAG;QACnC,WAAW,GAAG,GAAG,CAClB,CAAC;IAEF,OAAO,IAAI,CAAC,KAAK,CAAC,OAAO,GAAG,GAAG,CAAC,GAAG,GAAG,CAAC;AACzC,CAAC;AAED;;;GAGG;AACH,MAAM,UAAU,cAAc,CAAC,IAAY,EAAE,QAA4B;IACvE,MAAM,WAAW,GAAG,SAAS,CAAC,IAAI,CAAC,CAAC;IAEpC,MAAM,QAAQ,GAAG,IAAI,eAAe,CAAC;QACnC,YAAY,EAAE,KAAK;QACnB,cAAc,EAAE,QAAQ;QACxB,gBAAgB,EAAE,GAAG;QACrB,WAAW,EAAE,GAAG;QAChB,eAAe,EAAE,IAAI;KACtB,CAAC,CAAC;IAEH,kBAAkB;IAClB,QAAQ,CAAC,IAAI,CAAC,CAAC,OAAO,EAAE,OAAO,EAAE,OAAO,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,CAAC,CAAC,CAAC;IAE7D,kDAAkD;IAClD,QAAQ,CAAC,OAAO,CAAC,QAAQ,EAAE;QACzB,MAAM,EAAE,KAAK;QACb,WAAW,EAAE,CAAC,QAAQ,EAAE,IAAI,EAAE,EAAE;YAC9B,MAAM,GAAG,GAAI,IAAY,CAAC,GAAG,CAAC;YAC9B,MAAM,GAAG,GAAI,IAAY,CAAC,GAAG,CAAC;YAC9B,IAAI,GAAG,EAAE,CAAC;gBACR,OAAO,KAAK,GAAG,KAAK,GAAG,GAAG,CAAC;YAC7B,CAAC;YACD,OAAO,EAAE,CAAC;QACZ,CAAC;KACF,CAAC,CAAC;IAEH,oCAAoC;IACpC,QAAQ,CAAC,OAAO,CAAC,YAAY,EAAE;QAC7B,MAAM,EAAE,CAAC,IAAI,EAAE,EAAE;YACf,OAAO,IAAI,CAAC,QAAQ,KAAK,KAAK,IAAI,IAAI,CAAC,UAAU,EAAE,QAAQ,KAAK,MAAM,CAAC;QACzE,CAAC;QACD,WAAW,EAAE,CAAC,QAAQ,EAAE,IAAI,EAAE,EAAE;YAC9B,MAAM,QAAQ,GAAG,IAAI,CAAC,UAAiB,CAAC;YACxC,MAAM,SAAS,GAAG,QAAQ,CAAC,YAAY,CAAC,OAAO,CAAC,IAAI,EAAE,CAAC;YACvD,MAAM,QAAQ,GAAG,SAAS,CAAC,KAAK,CAAC,gBAAgB,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;YAC9D,OAAO,SAAS,GAAG,QAAQ,GAAG,IAAI,GAAG,QAAQ,CAAC,WAAW,GAAG,WAAW,CAAC;QAC1E,CAAC;KACF,CAAC,CAAC;IAEH,IAAI,QAAQ,GAAG,QAAQ,CAAC,QAAQ,CAAC,WAAW,CAAC,CAAC;IAE9C,kEAAkE;IAClE,IAAI,QAAQ,CAAC,MAAM,GAAG,IAAI,GAAG,IAAI,EAAE,CAAC,CAAC,yBAAyB;QAC5D,QAAQ,GAAG,QAAQ,CAAC,KAAK,CAAC,CAAC,EAAE,IAAI,GAAG,IAAI,CAAC,CAAC;IAC5C,CAAC;IAED,8DAA8D;IAC9D,QAAQ,GAAG,QAAQ,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,IAAI,EAAE,CAAC,EAAE,GAAG,EAAE,EAAE;QAC3D,IAAI,CAAC,KAAK,CAAC;YAAE,OAAO,IAAI,CAAC;QACzB,MAAM,SAAS,GAAG,GAAG,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,IAAI,EAAE,KAAK,EAAE,CAAC;QAC3C,MAAM,SAAS,GAAG,IAAI,CAAC,IAAI,EAAE,KAAK,EAAE,CAAC;QACrC,IAAI,SAAS,IAAI,SAAS;YAAE,OAAO,GAAG,CAAC;QACvC,OAAO,GAAG,GAAG,IAAI,GAAG,IAAI,CAAC;IAC3B,CAAC,EAAE,EAAE,CAAC,CAAC;IAEP,qCAAqC;IACrC,QAAQ,GAAG,QAAQ,CAAC,IAAI,EAAE,CAAC;IAE3B,OAAO,QAAQ,CAAC;AAClB,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,UAAU,CAAC,IAAY;IACrC,MAAM,WAAW,GAAG,SAAS,CAAC,IAAI,CAAC,CAAC;IACpC,MAAM,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC;IAEpC,8CAA8C;IAC9C,IAAI,IAAI,GAAG,EAAE,CAAC;IACd,CAAC,CAAC,+BAA+B,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,IAAI,EAAE,EAAE;QAClD,MAAM,OAAO,GAAG,CAAC,CAAC,IAAI,CAAC,CAAC,IAAI,EAAE,CAAC,IAAI,EAAE,CAAC;QACtC,IAAI,OAAO,EAAE,CAAC;YACZ,IAAI,IAAI,OAAO,GAAG,MAAM,CAAC;QAC3B,CAAC;IACH,CAAC,CAAC,CAAC;IAEH,yDAAyD;IACzD,IAAI,CAAC,IAAI,CAAC,IAAI,EAAE,EAAE,CAAC;QACjB,IAAI,GAAG,CAAC,CAAC,MAAM,CAAC,CAAC,IAAI,EAAE,CAAC;IAC1B,CAAC;IAED,gCAAgC;IAChC,IAAI,GAAG,IAAI,CAAC,OAAO,CAAC,SAAS,EAAE,MAAM,CAAC,CAAC;IACvC,IAAI,GAAG,IAAI,CAAC,OAAO,CAAC,SAAS,EAAE,GAAG,CAAC,CAAC;IAEpC,OAAO,IAAI,CAAC,IAAI,EAAE,CAAC;AACrB,CAAC;AAED;;;GAGG;AACH,MAAM,UAAU,cAAc,CAAC,IAAY;IACzC,OAAO,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;AACpC,CAAC;AAED;;;GAGG;AACH,MAAM,UAAU,qBAAqB,CAAC,OAAe,EAAE,SAAiB;IACtE,MAAM,aAAa,GAAG,cAAc,CAAC,OAAO,CAAC,CAAC;IAE9C,gCAAgC;IAChC,IAAI,aAAa,IAAI,SAAS,EAAE,CAAC;QAC/B,OAAO,OAAO,CAAC;IACjB,CAAC;IAED,mBAAmB;IACnB,MAAM,KAAK,GAAG,OAAO,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;IAElC,0BAA0B;IAC1B,MAAM,MAAM,GAAa,EAAE,CAAC;IAC5B,IAAI,iBAAiB,GAAG,CAAC,CAAC;IAC1B,IAAI,iBAAiB,GAAG,KAAK,CAAC;IAE9B,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,MAAM,UAAU,GAAG,cAAc,CAAC,IAAI,CAAC,CAAC;QACxC,MAAM,SAAS,GAAG,WAAW,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QAEzC,mCAAmC;QACnC,IAAI,CAAC,iBAAiB,IAAI,SAAS,EAAE,CAAC;YACpC,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;YAClB,iBAAiB,IAAI,UAAU,CAAC;YAChC,iBAAiB,GAAG,IAAI,CAAC;YACzB,SAAS;QACX,CAAC;QAED,gDAAgD;QAChD,IAAI,iBAAiB,GAAG,UAAU,GAAG,SAAS,EAAE,CAAC;YAC/C,YAAY;YACZ,MAAM;QACR,CAAC;QAED,eAAe;QACf,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QAClB,iBAAiB,IAAI,UAAU,CAAC;IAClC,CAAC;IAED,wBAAwB;IACxB,MAAM,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;IAChB,MAAM,CAAC,IAAI,CAAC,0BAA0B,SAAS,UAAU,CAAC,CAAC;IAE3D,OAAO,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;AAC3B,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"pdf.d.ts","sourceRoot":"","sources":["../../src/core/pdf.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,wBAAsB,UAAU,CAAC,MAAM,EAAE,MAAM,GAAG,OAAO,CAAC;IAAE,IAAI,EAAE,MAAM,CAAC;IAAC,QAAQ,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC;IAAC,KAAK,EAAE,MAAM,CAAA;CAAE,CAAC,CAkBxH"}
|
package/dist/core/pdf.js
ADDED
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* PDF extraction using pdf-parse
|
|
3
|
+
*/
|
|
4
|
+
export async function extractPdf(buffer) {
|
|
5
|
+
try {
|
|
6
|
+
const pdfParse = (await import('pdf-parse')).default;
|
|
7
|
+
const data = await pdfParse(buffer);
|
|
8
|
+
return {
|
|
9
|
+
text: data.text,
|
|
10
|
+
metadata: {
|
|
11
|
+
title: data.info?.Title || '',
|
|
12
|
+
author: data.info?.Author || '',
|
|
13
|
+
creator: data.info?.Creator || '',
|
|
14
|
+
producer: data.info?.Producer || '',
|
|
15
|
+
creationDate: data.info?.CreationDate || '',
|
|
16
|
+
},
|
|
17
|
+
pages: data.numpages,
|
|
18
|
+
};
|
|
19
|
+
}
|
|
20
|
+
catch (error) {
|
|
21
|
+
throw new Error(`PDF parsing failed: ${error instanceof Error ? error.message : 'Unknown error'}. Install pdf-parse: npm install pdf-parse`);
|
|
22
|
+
}
|
|
23
|
+
}
|
|
24
|
+
//# sourceMappingURL=pdf.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"pdf.js","sourceRoot":"","sources":["../../src/core/pdf.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,MAAM,CAAC,KAAK,UAAU,UAAU,CAAC,MAAc;IAC7C,IAAI,CAAC;QACH,MAAM,QAAQ,GAAG,CAAC,MAAM,MAAM,CAAC,WAAW,CAAC,CAAC,CAAC,OAAO,CAAC;QACrD,MAAM,IAAI,GAAG,MAAM,QAAQ,CAAC,MAAM,CAAC,CAAC;QACpC,OAAO;YACL,IAAI,EAAE,IAAI,CAAC,IAAI;YACf,QAAQ,EAAE;gBACR,KAAK,EAAE,IAAI,CAAC,IAAI,EAAE,KAAK,IAAI,EAAE;gBAC7B,MAAM,EAAE,IAAI,CAAC,IAAI,EAAE,MAAM,IAAI,EAAE;gBAC/B,OAAO,EAAE,IAAI,CAAC,IAAI,EAAE,OAAO,IAAI,EAAE;gBACjC,QAAQ,EAAE,IAAI,CAAC,IAAI,EAAE,QAAQ,IAAI,EAAE;gBACnC,YAAY,EAAE,IAAI,CAAC,IAAI,EAAE,YAAY,IAAI,EAAE;aAC5C;YACD,KAAK,EAAE,IAAI,CAAC,QAAQ;SACrB,CAAC;IACJ,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,MAAM,IAAI,KAAK,CAAC,uBAAuB,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,eAAe,4CAA4C,CAAC,CAAC;IAC/I,CAAC;AACH,CAAC"}
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Sitemap discovery and parsing
|
|
3
|
+
* Discovers URLs from sitemap.xml files
|
|
4
|
+
*/
|
|
5
|
+
export interface SitemapUrl {
|
|
6
|
+
url: string;
|
|
7
|
+
lastmod?: string;
|
|
8
|
+
changefreq?: string;
|
|
9
|
+
priority?: number;
|
|
10
|
+
}
|
|
11
|
+
export interface SitemapResult {
|
|
12
|
+
urls: SitemapUrl[];
|
|
13
|
+
sitemapUrls: string[];
|
|
14
|
+
elapsed: number;
|
|
15
|
+
}
|
|
16
|
+
/**
|
|
17
|
+
* Discover all URLs from a domain's sitemap.xml
|
|
18
|
+
* Handles sitemap index files (recursive), gzip compression, and common locations
|
|
19
|
+
*/
|
|
20
|
+
export declare function discoverSitemap(domain: string, options?: {
|
|
21
|
+
timeout?: number;
|
|
22
|
+
maxUrls?: number;
|
|
23
|
+
}): Promise<SitemapResult>;
|
|
24
|
+
//# sourceMappingURL=sitemap.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"sitemap.d.ts","sourceRoot":"","sources":["../../src/core/sitemap.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAIH,MAAM,WAAW,UAAU;IACzB,GAAG,EAAE,MAAM,CAAC;IACZ,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,QAAQ,CAAC,EAAE,MAAM,CAAC;CACnB;AAED,MAAM,WAAW,aAAa;IAC5B,IAAI,EAAE,UAAU,EAAE,CAAC;IACnB,WAAW,EAAE,MAAM,EAAE,CAAC;IACtB,OAAO,EAAE,MAAM,CAAC;CACjB;AAED;;;GAGG;AACH,wBAAsB,eAAe,CAAC,MAAM,EAAE,MAAM,EAAE,OAAO,CAAC,EAAE;IAAE,OAAO,CAAC,EAAE,MAAM,CAAC;IAAC,OAAO,CAAC,EAAE,MAAM,CAAA;CAAE,GAAG,OAAO,CAAC,aAAa,CAAC,CAiG9H"}
|
|
@@ -0,0 +1,106 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Sitemap discovery and parsing
|
|
3
|
+
* Discovers URLs from sitemap.xml files
|
|
4
|
+
*/
|
|
5
|
+
import { fetch as undiciFetch } from 'undici';
|
|
6
|
+
/**
|
|
7
|
+
* Discover all URLs from a domain's sitemap.xml
|
|
8
|
+
* Handles sitemap index files (recursive), gzip compression, and common locations
|
|
9
|
+
*/
|
|
10
|
+
export async function discoverSitemap(domain, options) {
|
|
11
|
+
const startTime = Date.now();
|
|
12
|
+
const maxUrls = options?.maxUrls || 10000;
|
|
13
|
+
const timeout = options?.timeout || 10000;
|
|
14
|
+
const allUrls = [];
|
|
15
|
+
const sitemapUrls = [];
|
|
16
|
+
const visited = new Set();
|
|
17
|
+
// Try common sitemap locations
|
|
18
|
+
const sitemapLocations = [
|
|
19
|
+
`https://${domain}/sitemap.xml`,
|
|
20
|
+
`https://${domain}/sitemap_index.xml`,
|
|
21
|
+
`https://${domain}/sitemap/sitemap.xml`,
|
|
22
|
+
`https://${domain}/wp-sitemap.xml`,
|
|
23
|
+
];
|
|
24
|
+
// Also check robots.txt for sitemap references
|
|
25
|
+
try {
|
|
26
|
+
const robotsResp = await undiciFetch(`https://${domain}/robots.txt`, {
|
|
27
|
+
signal: AbortSignal.timeout(timeout),
|
|
28
|
+
headers: { 'User-Agent': 'WebPeel/0.4.0 (+https://webpeel.dev)' },
|
|
29
|
+
});
|
|
30
|
+
if (robotsResp.ok) {
|
|
31
|
+
const robotsText = await robotsResp.text();
|
|
32
|
+
const sitemapMatches = robotsText.match(/Sitemap:\s*(.+)/gi) || [];
|
|
33
|
+
for (const match of sitemapMatches) {
|
|
34
|
+
const url = match.replace(/Sitemap:\s*/i, '').trim();
|
|
35
|
+
if (url && !sitemapLocations.includes(url)) {
|
|
36
|
+
sitemapLocations.unshift(url); // Prioritize robots.txt sitemaps
|
|
37
|
+
}
|
|
38
|
+
}
|
|
39
|
+
}
|
|
40
|
+
}
|
|
41
|
+
catch { /* ignore robots.txt errors */ }
|
|
42
|
+
async function parseSitemap(sitemapUrl) {
|
|
43
|
+
if (visited.has(sitemapUrl) || allUrls.length >= maxUrls)
|
|
44
|
+
return;
|
|
45
|
+
visited.add(sitemapUrl);
|
|
46
|
+
try {
|
|
47
|
+
const resp = await undiciFetch(sitemapUrl, {
|
|
48
|
+
signal: AbortSignal.timeout(timeout),
|
|
49
|
+
headers: { 'User-Agent': 'WebPeel/0.4.0 (+https://webpeel.dev)' },
|
|
50
|
+
});
|
|
51
|
+
if (!resp.ok)
|
|
52
|
+
return;
|
|
53
|
+
const xml = await resp.text();
|
|
54
|
+
sitemapUrls.push(sitemapUrl);
|
|
55
|
+
// Use cheerio for XML parsing
|
|
56
|
+
const { load } = await import('cheerio');
|
|
57
|
+
const $ = load(xml, { xml: true });
|
|
58
|
+
// Check if this is a sitemap index
|
|
59
|
+
const sitemapIndexEntries = $('sitemapindex > sitemap > loc');
|
|
60
|
+
if (sitemapIndexEntries.length > 0) {
|
|
61
|
+
for (let i = 0; i < sitemapIndexEntries.length && allUrls.length < maxUrls; i++) {
|
|
62
|
+
const childUrl = $(sitemapIndexEntries[i]).text().trim();
|
|
63
|
+
if (childUrl) {
|
|
64
|
+
await parseSitemap(childUrl);
|
|
65
|
+
}
|
|
66
|
+
}
|
|
67
|
+
return;
|
|
68
|
+
}
|
|
69
|
+
// Parse URL entries
|
|
70
|
+
$('urlset > url').each((_, el) => {
|
|
71
|
+
if (allUrls.length >= maxUrls)
|
|
72
|
+
return false;
|
|
73
|
+
const loc = $(el).find('loc').text().trim();
|
|
74
|
+
if (!loc)
|
|
75
|
+
return undefined;
|
|
76
|
+
const entry = { url: loc };
|
|
77
|
+
const lastmod = $(el).find('lastmod').text().trim();
|
|
78
|
+
const changefreq = $(el).find('changefreq').text().trim();
|
|
79
|
+
const priority = $(el).find('priority').text().trim();
|
|
80
|
+
if (lastmod)
|
|
81
|
+
entry.lastmod = lastmod;
|
|
82
|
+
if (changefreq)
|
|
83
|
+
entry.changefreq = changefreq;
|
|
84
|
+
if (priority)
|
|
85
|
+
entry.priority = parseFloat(priority);
|
|
86
|
+
allUrls.push(entry);
|
|
87
|
+
return undefined;
|
|
88
|
+
});
|
|
89
|
+
}
|
|
90
|
+
catch { /* skip failed sitemaps */ }
|
|
91
|
+
}
|
|
92
|
+
// Try each sitemap location
|
|
93
|
+
for (const sitemapUrl of sitemapLocations) {
|
|
94
|
+
if (allUrls.length >= maxUrls)
|
|
95
|
+
break;
|
|
96
|
+
await parseSitemap(sitemapUrl);
|
|
97
|
+
if (allUrls.length > 0)
|
|
98
|
+
break; // Found a working sitemap, stop trying others
|
|
99
|
+
}
|
|
100
|
+
return {
|
|
101
|
+
urls: allUrls,
|
|
102
|
+
sitemapUrls,
|
|
103
|
+
elapsed: Date.now() - startTime,
|
|
104
|
+
};
|
|
105
|
+
}
|
|
106
|
+
//# sourceMappingURL=sitemap.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"sitemap.js","sourceRoot":"","sources":["../../src/core/sitemap.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAEH,OAAO,EAAE,KAAK,IAAI,WAAW,EAAE,MAAM,QAAQ,CAAC;AAe9C;;;GAGG;AACH,MAAM,CAAC,KAAK,UAAU,eAAe,CAAC,MAAc,EAAE,OAAgD;IACpG,MAAM,SAAS,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;IAC7B,MAAM,OAAO,GAAG,OAAO,EAAE,OAAO,IAAI,KAAK,CAAC;IAC1C,MAAM,OAAO,GAAG,OAAO,EAAE,OAAO,IAAI,KAAK,CAAC;IAC1C,MAAM,OAAO,GAAiB,EAAE,CAAC;IACjC,MAAM,WAAW,GAAa,EAAE,CAAC;IACjC,MAAM,OAAO,GAAG,IAAI,GAAG,EAAU,CAAC;IAElC,+BAA+B;IAC/B,MAAM,gBAAgB,GAAG;QACvB,WAAW,MAAM,cAAc;QAC/B,WAAW,MAAM,oBAAoB;QACrC,WAAW,MAAM,sBAAsB;QACvC,WAAW,MAAM,iBAAiB;KACnC,CAAC;IAEF,+CAA+C;IAC/C,IAAI,CAAC;QACH,MAAM,UAAU,GAAG,MAAM,WAAW,CAAC,WAAW,MAAM,aAAa,EAAE;YACnE,MAAM,EAAE,WAAW,CAAC,OAAO,CAAC,OAAO,CAAC;YACpC,OAAO,EAAE,EAAE,YAAY,EAAE,sCAAsC,EAAE;SAClE,CAAC,CAAC;QACH,IAAI,UAAU,CAAC,EAAE,EAAE,CAAC;YAClB,MAAM,UAAU,GAAG,MAAM,UAAU,CAAC,IAAI,EAAE,CAAC;YAC3C,MAAM,cAAc,GAAG,UAAU,CAAC,KAAK,CAAC,mBAAmB,CAAC,IAAI,EAAE,CAAC;YACnE,KAAK,MAAM,KAAK,IAAI,cAAc,EAAE,CAAC;gBACnC,MAAM,GAAG,GAAG,KAAK,CAAC,OAAO,CAAC,cAAc,EAAE,EAAE,CAAC,CAAC,IAAI,EAAE,CAAC;gBACrD,IAAI,GAAG,IAAI,CAAC,gBAAgB,CAAC,QAAQ,CAAC,GAAG,CAAC,EAAE,CAAC;oBAC3C,gBAAgB,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,iCAAiC;gBAClE,CAAC;YACH,CAAC;QACH,CAAC;IACH,CAAC;IAAC,MAAM,CAAC,CAAC,8BAA8B,CAAC,CAAC;IAE1C,KAAK,UAAU,YAAY,CAAC,UAAkB;QAC5C,IAAI,OAAO,CAAC,GAAG,CAAC,UAAU,CAAC,IAAI,OAAO,CAAC,MAAM,IAAI,OAAO;YAAE,OAAO;QACjE,OAAO,CAAC,GAAG,CAAC,UAAU,CAAC,CAAC;QAExB,IAAI,CAAC;YACH,MAAM,IAAI,GAAG,MAAM,WAAW,CAAC,UAAU,EAAE;gBACzC,MAAM,EAAE,WAAW,CAAC,OAAO,CAAC,OAAO,CAAC;gBACpC,OAAO,EAAE,EAAE,YAAY,EAAE,sCAAsC,EAAE;aAClE,CAAC,CAAC;YACH,IAAI,CAAC,IAAI,CAAC,EAAE;gBAAE,OAAO;YAErB,MAAM,GAAG,GAAG,MAAM,IAAI,CAAC,IAAI,EAAE,CAAC;YAC9B,WAAW,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC;YAE7B,8BAA8B;YAC9B,MAAM,EAAE,IAAI,EAAE,GAAG,MAAM,MAAM,CAAC,SAAS,CAAC,CAAC;YACzC,MAAM,CAAC,GAAG,IAAI,CAAC,GAAG,EAAE,EAAE,GAAG,EAAE,IAAI,EAAE,CAAC,CAAC;YAEnC,mCAAmC;YACnC,MAAM,mBAAmB,GAAG,CAAC,CAAC,8BAA8B,CAAC,CAAC;YAC9D,IAAI,mBAAmB,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;gBACnC,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,mBAAmB,CAAC,MAAM,IAAI,OAAO,CAAC,MAAM,GAAG,OAAO,EAAE,CAAC,EAAE,EAAE,CAAC;oBAChF,MAAM,QAAQ,GAAG,CAAC,CAAC,mBAAmB,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,IAAI,EAAE,CAAC;oBACzD,IAAI,QAAQ,EAAE,CAAC;wBACb,MAAM,YAAY,CAAC,QAAQ,CAAC,CAAC;oBAC/B,CAAC;gBACH,CAAC;gBACD,OAAO;YACT,CAAC;YAED,oBAAoB;YACpB,CAAC,CAAC,cAAc,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,EAAE,EAAE;gBAC/B,IAAI,OAAO,CAAC,MAAM,IAAI,OAAO;oBAAE,OAAO,KAAK,CAAC;gBAC5C,MAAM,GAAG,GAAG,CAAC,CAAC,EAAE,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,IAAI,EAAE,CAAC,IAAI,EAAE,CAAC;gBAC5C,IAAI,CAAC,GAAG;oBAAE,OAAO,SAAS,CAAC;gBAE3B,MAAM,KAAK,GAAe,EAAE,GAAG,EAAE,GAAG,EAAE,CAAC;gBACvC,MAAM,OAAO,GAAG,CAAC,CAAC,EAAE,CAAC,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC,IAAI,EAAE,CAAC,IAAI,EAAE,CAAC;gBACpD,MAAM,UAAU,GAAG,CAAC,CAAC,EAAE,CAAC,CAAC,IAAI,CAAC,YAAY,CAAC,CAAC,IAAI,EAAE,CAAC,IAAI,EAAE,CAAC;gBAC1D,MAAM,QAAQ,GAAG,CAAC,CAAC,EAAE,CAAC,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC,IAAI,EAAE,CAAC,IAAI,EAAE,CAAC;gBAEtD,IAAI,OAAO;oBAAE,KAAK,CAAC,OAAO,GAAG,OAAO,CAAC;gBACrC,IAAI,UAAU;oBAAE,KAAK,CAAC,UAAU,GAAG,UAAU,CAAC;gBAC9C,IAAI,QAAQ;oBAAE,KAAK,CAAC,QAAQ,GAAG,UAAU,CAAC,QAAQ,CAAC,CAAC;gBAEpD,OAAO,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;gBACpB,OAAO,SAAS,CAAC;YACnB,CAAC,CAAC,CAAC;QACL,CAAC;QAAC,MAAM,CAAC,CAAC,0BAA0B,CAAC,CAAC;IACxC,CAAC;IAED,4BAA4B;IAC5B,KAAK,MAAM,UAAU,IAAI,gBAAgB,EAAE,CAAC;QAC1C,IAAI,OAAO,CAAC,MAAM,IAAI,OAAO;YAAE,MAAM;QACrC,MAAM,YAAY,CAAC,UAAU,CAAC,CAAC;QAC/B,IAAI,OAAO,CAAC,MAAM,GAAG,CAAC;YAAE,MAAM,CAAC,8CAA8C;IAC/E,CAAC;IAED,OAAO;QACL,IAAI,EAAE,OAAO;QACb,WAAW;QACX,OAAO,EAAE,IAAI,CAAC,GAAG,EAAE,GAAG,SAAS;KAChC,CAAC;AACJ,CAAC"}
|
|
@@ -21,6 +21,16 @@ export interface StrategyOptions {
|
|
|
21
21
|
headers?: Record<string, string>;
|
|
22
22
|
/** Cookies to set (key=value pairs) */
|
|
23
23
|
cookies?: string[];
|
|
24
|
+
/** Page actions to execute before extraction */
|
|
25
|
+
actions?: Array<{
|
|
26
|
+
type: 'wait' | 'click' | 'scroll' | 'type' | 'fill' | 'select' | 'press' | 'hover' | 'waitForSelector' | 'screenshot';
|
|
27
|
+
selector?: string;
|
|
28
|
+
value?: string;
|
|
29
|
+
key?: string;
|
|
30
|
+
ms?: number;
|
|
31
|
+
to?: 'top' | 'bottom' | number;
|
|
32
|
+
timeout?: number;
|
|
33
|
+
}>;
|
|
24
34
|
}
|
|
25
35
|
export interface StrategyResult extends FetchResult {
|
|
26
36
|
/** Which strategy succeeded: 'simple' | 'browser' | 'stealth' */
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"strategies.d.ts","sourceRoot":"","sources":["../../src/core/strategies.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,EAAyC,KAAK,WAAW,EAAE,MAAM,cAAc,CAAC;AAGvF,MAAM,WAAW,eAAe;IAC9B,6CAA6C;IAC7C,YAAY,CAAC,EAAE,OAAO,CAAC;IACvB,+CAA+C;IAC/C,OAAO,CAAC,EAAE,OAAO,CAAC;IAClB,qDAAqD;IACrD,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,wBAAwB;IACxB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,2BAA2B;IAC3B,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,uCAAuC;IACvC,UAAU,CAAC,EAAE,OAAO,CAAC;IACrB,oDAAoD;IACpD,kBAAkB,CAAC,EAAE,OAAO,CAAC;IAC7B,kCAAkC;IAClC,OAAO,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IACjC,uCAAuC;IACvC,OAAO,CAAC,EAAE,MAAM,EAAE,CAAC;
|
|
1
|
+
{"version":3,"file":"strategies.d.ts","sourceRoot":"","sources":["../../src/core/strategies.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,EAAyC,KAAK,WAAW,EAAE,MAAM,cAAc,CAAC;AAGvF,MAAM,WAAW,eAAe;IAC9B,6CAA6C;IAC7C,YAAY,CAAC,EAAE,OAAO,CAAC;IACvB,+CAA+C;IAC/C,OAAO,CAAC,EAAE,OAAO,CAAC;IAClB,qDAAqD;IACrD,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,wBAAwB;IACxB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,2BAA2B;IAC3B,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,uCAAuC;IACvC,UAAU,CAAC,EAAE,OAAO,CAAC;IACrB,oDAAoD;IACpD,kBAAkB,CAAC,EAAE,OAAO,CAAC;IAC7B,kCAAkC;IAClC,OAAO,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IACjC,uCAAuC;IACvC,OAAO,CAAC,EAAE,MAAM,EAAE,CAAC;IACnB,gDAAgD;IAChD,OAAO,CAAC,EAAE,KAAK,CAAC;QACd,IAAI,EAAE,MAAM,GAAG,OAAO,GAAG,QAAQ,GAAG,MAAM,GAAG,MAAM,GAAG,QAAQ,GAAG,OAAO,GAAG,OAAO,GAAG,iBAAiB,GAAG,YAAY,CAAC;QACtH,QAAQ,CAAC,EAAE,MAAM,CAAC;QAClB,KAAK,CAAC,EAAE,MAAM,CAAC;QACf,GAAG,CAAC,EAAE,MAAM,CAAC;QACb,EAAE,CAAC,EAAE,MAAM,CAAC;QACZ,EAAE,CAAC,EAAE,KAAK,GAAG,QAAQ,GAAG,MAAM,CAAC;QAC/B,OAAO,CAAC,EAAE,MAAM,CAAC;KAClB,CAAC,CAAC;CACJ;AAED,MAAM,WAAW,cAAe,SAAQ,WAAW;IACjD,iEAAiE;IACjE,MAAM,EAAE,QAAQ,GAAG,SAAS,GAAG,SAAS,CAAC;CAC1C;AAED;;;;;;;;;;GAUG;AACH,wBAAsB,UAAU,CAAC,GAAG,EAAE,MAAM,EAAE,OAAO,GAAE,eAAoB,GAAG,OAAO,CAAC,cAAc,CAAC,CAyGpG"}
|
package/dist/core/strategies.js
CHANGED
|
@@ -15,7 +15,7 @@ import { BlockedError, NetworkError } from '../types.js';
|
|
|
15
15
|
* Returns the result along with which method worked
|
|
16
16
|
*/
|
|
17
17
|
export async function smartFetch(url, options = {}) {
|
|
18
|
-
const { forceBrowser = false, stealth = false, waitMs = 0, userAgent, timeoutMs = 30000, screenshot = false, screenshotFullPage = false, headers, cookies } = options;
|
|
18
|
+
const { forceBrowser = false, stealth = false, waitMs = 0, userAgent, timeoutMs = 30000, screenshot = false, screenshotFullPage = false, headers, cookies, actions } = options;
|
|
19
19
|
// If stealth is requested, force browser mode (stealth requires browser)
|
|
20
20
|
const shouldUseBrowser = forceBrowser || screenshot || stealth;
|
|
21
21
|
// Strategy 1: Simple fetch (unless browser is forced or screenshot is requested)
|
|
@@ -49,6 +49,7 @@ export async function smartFetch(url, options = {}) {
|
|
|
49
49
|
headers,
|
|
50
50
|
cookies,
|
|
51
51
|
stealth,
|
|
52
|
+
actions,
|
|
52
53
|
});
|
|
53
54
|
return {
|
|
54
55
|
...result,
|
|
@@ -68,6 +69,7 @@ export async function smartFetch(url, options = {}) {
|
|
|
68
69
|
headers,
|
|
69
70
|
cookies,
|
|
70
71
|
stealth: true, // Escalate to stealth mode
|
|
72
|
+
actions,
|
|
71
73
|
});
|
|
72
74
|
return {
|
|
73
75
|
...result,
|
|
@@ -91,6 +93,7 @@ export async function smartFetch(url, options = {}) {
|
|
|
91
93
|
headers,
|
|
92
94
|
cookies,
|
|
93
95
|
stealth, // Keep stealth setting
|
|
96
|
+
actions,
|
|
94
97
|
});
|
|
95
98
|
return {
|
|
96
99
|
...result,
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"strategies.js","sourceRoot":"","sources":["../../src/core/strategies.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,EAAE,WAAW,EAAE,YAAY,EAAE,UAAU,EAAoB,MAAM,cAAc,CAAC;AACvF,OAAO,EAAE,YAAY,EAAE,YAAY,EAAE,MAAM,aAAa,CAAC;
|
|
1
|
+
{"version":3,"file":"strategies.js","sourceRoot":"","sources":["../../src/core/strategies.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,EAAE,WAAW,EAAE,YAAY,EAAE,UAAU,EAAoB,MAAM,cAAc,CAAC;AACvF,OAAO,EAAE,YAAY,EAAE,YAAY,EAAE,MAAM,aAAa,CAAC;AAsCzD;;;;;;;;;;GAUG;AACH,MAAM,CAAC,KAAK,UAAU,UAAU,CAAC,GAAW,EAAE,UAA2B,EAAE;IACzE,MAAM,EACJ,YAAY,GAAG,KAAK,EACpB,OAAO,GAAG,KAAK,EACf,MAAM,GAAG,CAAC,EACV,SAAS,EACT,SAAS,GAAG,KAAK,EACjB,UAAU,GAAG,KAAK,EAClB,kBAAkB,GAAG,KAAK,EAC1B,OAAO,EACP,OAAO,EACP,OAAO,EACR,GAAG,OAAO,CAAC;IAEZ,yEAAyE;IACzE,MAAM,gBAAgB,GAAG,YAAY,IAAI,UAAU,IAAI,OAAO,CAAC;IAE/D,iFAAiF;IACjF,IAAI,CAAC,gBAAgB,EAAE,CAAC;QACtB,IAAI,CAAC;YACH,MAAM,MAAM,GAAG,MAAM,UAAU,CAC7B,GAAG,EAAE,CAAC,WAAW,CAAC,GAAG,EAAE,SAAS,EAAE,SAAS,EAAE,OAAO,CAAC,EACrD,CAAC,CACF,CAAC;YACF,OAAO;gBACL,GAAG,MAAM;gBACT,MAAM,EAAE,QAAQ;aACjB,CAAC;QACJ,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,8CAA8C;YAC9C,IAAI,KAAK,YAAY,YAAY,EAAE,CAAC;gBAClC,mCAAmC;YACrC,CAAC;iBAAM,CAAC;gBACN,kDAAkD;gBAClD,MAAM,KAAK,CAAC;YACd,CAAC;QACH,CAAC;IACH,CAAC;IAED,sDAAsD;IACtD,IAAI,CAAC;QACH,MAAM,MAAM,GAAG,MAAM,YAAY,CAAC,GAAG,EAAE;YACrC,SAAS;YACT,MAAM;YACN,SAAS;YACT,UAAU;YACV,kBAAkB;YAClB,OAAO;YACP,OAAO;YACP,OAAO;YACP,OAAO;SACR,CAAC,CAAC;QACH,OAAO;YACL,GAAG,MAAM;YACT,MAAM,EAAE,OAAO,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,SAAS;SACxC,CAAC;IACJ,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,mGAAmG;QACnG,IAAI,CAAC,OAAO,IAAI,KAAK,YAAY,YAAY,EAAE,CAAC;YAC9C,IAAI,CAAC;gBACH,MAAM,MAAM,GAAG,MAAM,YAAY,CAAC,GAAG,EAAE;oBACrC,SAAS;oBACT,MAAM;oBACN,SAAS;oBACT,UAAU;oBACV,kBAAkB;oBAClB,OAAO;oBACP,OAAO;oBACP,OAAO,EAAE,IAAI,EAAE,2BAA2B;oBAC1C,OAAO;iBACR,CAAC,CAAC;gBACH,OAAO;oBACL,GAAG,MAAM;oBACT,MAAM,EAAE,SAAS;iBAClB,CAAC;YACJ,CAAC;YAAC,OAAO,YAAY,EAAE,CAAC;gBACtB,kDAAkD;gBAClD,MAAM,YAAY,CAAC;YACrB,CAAC;QACH,CAAC;QAED,+DAA+D;QAC/D,IACE,KAAK,YAAY,YAAY;YAC7B,KAAK,CAAC,OAAO,CAAC,WAAW,EAAE,CAAC,QAAQ,CAAC,YAAY,CAAC,EAClD,CAAC;YACD,MAAM,MAAM,GAAG,MAAM,YAAY,CAAC,GAAG,EAAE;gBACrC,SAAS;gBACT,MAAM,EAAE,IAAI,EAAE,mCAAmC;gBACjD,SAAS;gBACT,UAAU;gBACV,kBAAkB;gBAClB,OAAO;gBACP,OAAO;gBACP,OAAO,EAAE,uBAAuB;gBAChC,OAAO;aACR,CAAC,CAAC;YACH,OAAO;gBACL,GAAG,MAAM;gBACT,MAAM,EAAE,OAAO,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,SAAS;aACxC,CAAC;QACJ,CAAC;QAED,MAAM,KAAK,CAAC;IACd,CAAC;AACH,CAAC"}
|
package/dist/index.d.ts
CHANGED
|
@@ -6,7 +6,9 @@
|
|
|
6
6
|
import { cleanup } from './core/fetcher.js';
|
|
7
7
|
import type { PeelOptions, PeelResult } from './types.js';
|
|
8
8
|
export * from './types.js';
|
|
9
|
-
export { crawl, type CrawlOptions, type CrawlResult } from './core/crawler.js';
|
|
9
|
+
export { crawl, type CrawlOptions, type CrawlResult, type CrawlProgress } from './core/crawler.js';
|
|
10
|
+
export { discoverSitemap, type SitemapUrl, type SitemapResult } from './core/sitemap.js';
|
|
11
|
+
export { mapDomain, type MapOptions, type MapResult } from './core/map.js';
|
|
10
12
|
/**
|
|
11
13
|
* Fetch and extract content from a URL
|
|
12
14
|
*
|
package/dist/index.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAMH,OAAO,EAAE,OAAO,EAAE,MAAM,mBAAmB,CAAC;
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAMH,OAAO,EAAE,OAAO,EAAE,MAAM,mBAAmB,CAAC;AAE5C,OAAO,KAAK,EAAE,WAAW,EAAE,UAAU,EAAE,MAAM,YAAY,CAAC;AAE1D,cAAc,YAAY,CAAC;AAC3B,OAAO,EAAE,KAAK,EAAE,KAAK,YAAY,EAAE,KAAK,WAAW,EAAE,KAAK,aAAa,EAAE,MAAM,mBAAmB,CAAC;AACnG,OAAO,EAAE,eAAe,EAAE,KAAK,UAAU,EAAE,KAAK,aAAa,EAAE,MAAM,mBAAmB,CAAC;AACzF,OAAO,EAAE,SAAS,EAAE,KAAK,UAAU,EAAE,KAAK,SAAS,EAAE,MAAM,eAAe,CAAC;AAE3E;;;;;;;;;;;;;;;GAeG;AACH,wBAAsB,IAAI,CAAC,GAAG,EAAE,MAAM,EAAE,OAAO,GAAE,WAAgB,GAAG,OAAO,CAAC,UAAU,CAAC,CAkNtF;AAED;;;;;;;;;;;;;;GAcG;AACH,wBAAsB,SAAS,CAC7B,IAAI,EAAE,MAAM,EAAE,EACd,OAAO,GAAE,WAAW,GAAG;IAAE,WAAW,CAAC,EAAE,MAAM,CAAA;CAAO,GACnD,OAAO,CAAC,CAAC,UAAU,GAAG;IAAE,GAAG,EAAE,MAAM,CAAC;IAAC,KAAK,EAAE,MAAM,CAAA;CAAE,CAAC,EAAE,CAAC,CAwB1D;AAED;;;GAGG;AACH,OAAO,EAAE,OAAO,EAAE,CAAC"}
|
package/dist/index.js
CHANGED
|
@@ -5,11 +5,14 @@
|
|
|
5
5
|
*/
|
|
6
6
|
import { createHash } from 'crypto';
|
|
7
7
|
import { smartFetch } from './core/strategies.js';
|
|
8
|
-
import { htmlToMarkdown, htmlToText, estimateTokens, selectContent, detectMainContent, calculateQuality } from './core/markdown.js';
|
|
8
|
+
import { htmlToMarkdown, htmlToText, estimateTokens, selectContent, detectMainContent, calculateQuality, truncateToTokenBudget } from './core/markdown.js';
|
|
9
9
|
import { extractMetadata, extractLinks } from './core/metadata.js';
|
|
10
10
|
import { cleanup } from './core/fetcher.js';
|
|
11
|
+
import { extractStructured } from './core/extract.js';
|
|
11
12
|
export * from './types.js';
|
|
12
13
|
export { crawl } from './core/crawler.js';
|
|
14
|
+
export { discoverSitemap } from './core/sitemap.js';
|
|
15
|
+
export { mapDomain } from './core/map.js';
|
|
13
16
|
/**
|
|
14
17
|
* Fetch and extract content from a URL
|
|
15
18
|
*
|
|
@@ -28,7 +31,7 @@ export { crawl } from './core/crawler.js';
|
|
|
28
31
|
*/
|
|
29
32
|
export async function peel(url, options = {}) {
|
|
30
33
|
const startTime = Date.now();
|
|
31
|
-
let { render = false, stealth = false, wait = 0, format = 'markdown', timeout = 30000, userAgent, screenshot = false, screenshotFullPage = false, selector, exclude, headers, cookies, raw = false, } = options;
|
|
34
|
+
let { render = false, stealth = false, wait = 0, format = 'markdown', timeout = 30000, userAgent, screenshot = false, screenshotFullPage = false, selector, exclude, headers, cookies, raw = false, actions, extract, maxTokens, } = options;
|
|
32
35
|
// Detect PDF URLs and force browser rendering
|
|
33
36
|
const isPdf = url.toLowerCase().endsWith('.pdf');
|
|
34
37
|
if (isPdf) {
|
|
@@ -42,6 +45,10 @@ export async function peel(url, options = {}) {
|
|
|
42
45
|
if (stealth) {
|
|
43
46
|
render = true;
|
|
44
47
|
}
|
|
48
|
+
// If actions are provided, force render mode
|
|
49
|
+
if (actions && actions.length > 0) {
|
|
50
|
+
render = true;
|
|
51
|
+
}
|
|
45
52
|
try {
|
|
46
53
|
// Fetch the page
|
|
47
54
|
const fetchResult = await smartFetch(url, {
|
|
@@ -54,6 +61,7 @@ export async function peel(url, options = {}) {
|
|
|
54
61
|
screenshotFullPage,
|
|
55
62
|
headers,
|
|
56
63
|
cookies,
|
|
64
|
+
actions,
|
|
57
65
|
});
|
|
58
66
|
// Detect content type from the response
|
|
59
67
|
const ct = (fetchResult.contentType || '').toLowerCase();
|
|
@@ -157,6 +165,23 @@ export async function peel(url, options = {}) {
|
|
|
157
165
|
links = [...new Set(found)];
|
|
158
166
|
quality = 1.0;
|
|
159
167
|
}
|
|
168
|
+
// Extract structured data if requested
|
|
169
|
+
let extracted;
|
|
170
|
+
if (extract && isHTML) {
|
|
171
|
+
if (extract.llmApiKey && (extract.prompt || extract.schema)) {
|
|
172
|
+
// LLM-powered extraction
|
|
173
|
+
const { extractWithLLM } = await import('./core/extract.js');
|
|
174
|
+
extracted = await extractWithLLM(content, extract);
|
|
175
|
+
}
|
|
176
|
+
else if (extract.selectors || extract.schema) {
|
|
177
|
+
// CSS-based extraction (existing)
|
|
178
|
+
extracted = extractStructured(fetchResult.html, extract);
|
|
179
|
+
}
|
|
180
|
+
}
|
|
181
|
+
// Truncate to token budget if requested
|
|
182
|
+
if (maxTokens && maxTokens > 0) {
|
|
183
|
+
content = truncateToTokenBudget(content, maxTokens);
|
|
184
|
+
}
|
|
160
185
|
// Calculate elapsed time, tokens, and fingerprint
|
|
161
186
|
const elapsed = Date.now() - startTime;
|
|
162
187
|
const tokens = estimateTokens(content);
|
|
@@ -176,6 +201,7 @@ export async function peel(url, options = {}) {
|
|
|
176
201
|
contentType: detectedType,
|
|
177
202
|
quality,
|
|
178
203
|
fingerprint,
|
|
204
|
+
extracted,
|
|
179
205
|
};
|
|
180
206
|
}
|
|
181
207
|
catch (error) {
|
package/dist/index.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAEH,OAAO,EAAE,UAAU,EAAE,MAAM,QAAQ,CAAC;AACpC,OAAO,EAAE,UAAU,EAAE,MAAM,sBAAsB,CAAC;AAClD,OAAO,EAAE,cAAc,EAAE,UAAU,EAAE,cAAc,EAAE,aAAa,EAAE,iBAAiB,EAAE,gBAAgB,EAAE,MAAM,oBAAoB,CAAC;
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAEH,OAAO,EAAE,UAAU,EAAE,MAAM,QAAQ,CAAC;AACpC,OAAO,EAAE,UAAU,EAAE,MAAM,sBAAsB,CAAC;AAClD,OAAO,EAAE,cAAc,EAAE,UAAU,EAAE,cAAc,EAAE,aAAa,EAAE,iBAAiB,EAAE,gBAAgB,EAAE,qBAAqB,EAAE,MAAM,oBAAoB,CAAC;AAC3J,OAAO,EAAE,eAAe,EAAE,YAAY,EAAE,MAAM,oBAAoB,CAAC;AACnE,OAAO,EAAE,OAAO,EAAE,MAAM,mBAAmB,CAAC;AAC5C,OAAO,EAAE,iBAAiB,EAAE,MAAM,mBAAmB,CAAC;AAGtD,cAAc,YAAY,CAAC;AAC3B,OAAO,EAAE,KAAK,EAA2D,MAAM,mBAAmB,CAAC;AACnG,OAAO,EAAE,eAAe,EAAuC,MAAM,mBAAmB,CAAC;AACzF,OAAO,EAAE,SAAS,EAAmC,MAAM,eAAe,CAAC;AAE3E;;;;;;;;;;;;;;;GAeG;AACH,MAAM,CAAC,KAAK,UAAU,IAAI,CAAC,GAAW,EAAE,UAAuB,EAAE;IAC/D,MAAM,SAAS,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;IAE7B,IAAI,EACF,MAAM,GAAG,KAAK,EACd,OAAO,GAAG,KAAK,EACf,IAAI,GAAG,CAAC,EACR,MAAM,GAAG,UAAU,EACnB,OAAO,GAAG,KAAK,EACf,SAAS,EACT,UAAU,GAAG,KAAK,EAClB,kBAAkB,GAAG,KAAK,EAC1B,QAAQ,EACR,OAAO,EACP,OAAO,EACP,OAAO,EACP,GAAG,GAAG,KAAK,EACX,OAAO,EACP,OAAO,EACP,SAAS,GACV,GAAG,OAAO,CAAC;IAEZ,8CAA8C;IAC9C,MAAM,KAAK,GAAG,GAAG,CAAC,WAAW,EAAE,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC;IACjD,IAAI,KAAK,EAAE,CAAC;QACV,MAAM,GAAG,IAAI,CAAC;IAChB,CAAC;IAED,gDAAgD;IAChD,IAAI,UAAU,EAAE,CAAC;QACf,MAAM,GAAG,IAAI,CAAC;IAChB,CAAC;IAED,6CAA6C;IAC7C,IAAI,OAAO,EAAE,CAAC;QACZ,MAAM,GAAG,IAAI,CAAC;IAChB,CAAC;IAED,6CAA6C;IAC7C,IAAI,OAAO,IAAI,OAAO,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QAClC,MAAM,GAAG,IAAI,CAAC;IAChB,CAAC;IAED,IAAI,CAAC;QACH,iBAAiB;QACjB,MAAM,WAAW,GAAG,MAAM,UAAU,CAAC,GAAG,EAAE;YACxC,YAAY,EAAE,MAAM;YACpB,OAAO;YACP,MAAM,EAAE,IAAI;YACZ,SAAS;YACT,SAAS,EAAE,OAAO;YAClB,UAAU;YACV,kBAAkB;YAClB,OAAO;YACP,OAAO;YACP,OAAO;SACR,CAAC,CAAC;QAEH,wCAAwC;QACxC,MAAM,EAAE,GAAG,CAAC,WAAW,CAAC,WAAW,IAAI,EAAE,CAAC,CAAC,WAAW,EAAE,CAAC;QACzD,MAAM,MAAM,GAAG,EAAE,CAAC,QAAQ,CAAC,MAAM,CAAC,IAAI,EAAE,CAAC,QAAQ,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC,EAAE,IAAI,WAAW,CAAC,IAAI,CAAC,SAAS,EAAE,CAAC,UAAU,CAAC,GAAG,CAAC,CAAC,CAAC;QACpH,MAAM,MAAM,GAAG,EAAE,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC;QACnC,MAAM,KAAK,GAAG,EAAE,CAAC,QAAQ,CAAC,KAAK,CAAC,IAAI,EAAE,CAAC,QAAQ,CAAC,KAAK,CAAC,IAAI,EAAE,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC;QAC9E,MAAM,WAAW,GAAG,EAAE,CAAC,QAAQ,CAAC,YAAY,CAAC,IAAI,EAAE,CAAC,QAAQ,CAAC,eAAe,CAAC,IAAI,EAAE,CAAC,QAAQ,CAAC,UAAU,CAAC,IAAI,EAAE,CAAC,QAAQ,CAAC,UAAU,CAAC,IAAI,EAAE,CAAC,QAAQ,CAAC,YAAY,CAAC,CAAC;QAEjK,MAAM,YAAY,GAAG,MAAM,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,WAAW,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,MAAM,CAAC;QAEvG,IAAI,OAAe,CAAC;QACpB,IAAI,KAAK,GAAG,EAAE,CAAC;QACf,IAAI,QAAQ,GAAQ,EAAE,CAAC;QACvB,IAAI,KAAK,GAAa,EAAE,CAAC;QACzB,IAAI,OAAO,GAAG,CAAC,CAAC;QAEhB,IAAI,MAAM,EAAE,CAAC;YACX,yBAAyB;YACzB,IAAI,IAAI,GAAG,WAAW,CAAC,IAAI,CAAC;YAC5B,IAAI,QAAQ,EAAE,CAAC;gBACb,IAAI,GAAG,aAAa,CAAC,IAAI,EAAE,QAAQ,EAAE,OAAO,CAAC,CAAC;YAChD,CAAC;YAED,4CAA4C;YAC5C,MAAM,IAAI,GAAG,eAAe,CAAC,IAAI,EAAE,WAAW,CAAC,GAAG,CAAC,CAAC;YACpD,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC;YACnB,QAAQ,GAAG,IAAI,CAAC,QAAQ,CAAC;YACzB,KAAK,GAAG,YAAY,CAAC,IAAI,EAAE,WAAW,CAAC,GAAG,CAAC,CAAC;YAE5C,kEAAkE;YAClE,IAAI,WAAW,GAAG,IAAI,CAAC;YACvB,IAAI,CAAC,GAAG,IAAI,CAAC,QAAQ,EAAE,CAAC;gBACtB,MAAM,QAAQ,GAAG,iBAAiB,CAAC,IAAI,CAAC,CAAC;gBACzC,IAAI,QAAQ,CAAC,QAAQ,EAAE,CAAC;oBACtB,WAAW,GAAG,QAAQ,CAAC,IAAI,CAAC;gBAC9B,CAAC;YACH,CAAC;YAED,QAAQ,MAAM,EAAE,CAAC;gBACf,KAAK,MAAM;oBACT,OAAO,GAAG,WAAW,CAAC;oBACtB,MAAM;gBACR,KAAK,MAAM;oBACT,OAAO,GAAG,UAAU,CAAC,WAAW,CAAC,CAAC;oBAClC,MAAM;gBACR,KAAK,UAAU,CAAC;gBAChB;oBACE,OAAO,GAAG,cAAc,CAAC,WAAW,EAAE,EAAE,GAAG,EAAE,CAAC,CAAC;oBAC/C,MAAM;YACV,CAAC;YAED,OAAO,GAAG,gBAAgB,CAAC,OAAO,EAAE,WAAW,CAAC,IAAI,CAAC,CAAC;QACxD,CAAC;aAAM,IAAI,MAAM,EAAE,CAAC;YAClB,+BAA+B;YAC/B,IAAI,CAAC;gBACH,MAAM,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,WAAW,CAAC,IAAI,CAAC,CAAC;gBAC5C,OAAO,GAAG,IAAI,CAAC,SAAS,CAAC,MAAM,EAAE,IAAI,EAAE,CAAC,CAAC,CAAC;gBAC1C,KAAK,GAAG,eAAe,CAAC;gBAExB,uCAAuC;gBACvC,MAAM,QAAQ,GAAG,4BAA4B,CAAC;gBAC9C,MAAM,KAAK,GAAG,OAAO,CAAC,KAAK,CAAC,QAAQ,CAAC,IAAI,EAAE,CAAC;gBAC5C,KAAK,GAAG,CAAC,GAAG,IAAI,GAAG,CAAC,KAAK,CAAC,CAAC,CAAC;YAC9B,CAAC;YAAC,MAAM,CAAC;gBACP,OAAO,GAAG,WAAW,CAAC,IAAI,CAAC;gBAC3B,KAAK,GAAG,2BAA2B,CAAC;YACtC,CAAC;YACD,OAAO,GAAG,GAAG,CAAC,CAAC,qCAAqC;QACtD,CAAC;aAAM,IAAI,KAAK,EAAE,CAAC;YACjB,4CAA4C;YAC5C,IAAI,CAAC;gBACH,MAAM,CAAC,GAAG,CAAC,MAAM,MAAM,CAAC,SAAS,CAAC,CAAC,CAAC,IAAI,CAAC,WAAW,CAAC,IAAI,EAAE,EAAE,GAAG,EAAE,IAAI,EAAE,CAAC,CAAC;gBAE1E,yBAAyB;gBACzB,MAAM,KAAK,GAAG,CAAC,CAAC,aAAa,CAAC,CAAC;gBAC/B,IAAI,KAAK,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;oBACrB,KAAK,GAAG,CAAC,CAAC,+BAA+B,CAAC,CAAC,KAAK,EAAE,CAAC,IAAI,EAAE,IAAI,eAAe,CAAC;oBAC7E,MAAM,SAAS,GAAa,EAAE,CAAC;oBAC/B,KAAK,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,EAAE,EAAE;wBACnB,MAAM,SAAS,GAAG,CAAC,CAAC,EAAE,CAAC,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC,KAAK,EAAE,CAAC,IAAI,EAAE,CAAC;wBACrD,MAAM,QAAQ,GAAG,CAAC,CAAC,EAAE,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,KAAK,EAAE,CAAC,IAAI,EAAE,IAAI,CAAC,CAAC,EAAE,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,KAAK,EAAE,CAAC,IAAI,CAAC,MAAM,CAAC,IAAI,EAAE,CAAC;wBACpG,MAAM,QAAQ,GAAG,CAAC,CAAC,EAAE,CAAC,CAAC,IAAI,CAAC,+BAA+B,CAAC,CAAC,KAAK,EAAE,CAAC,IAAI,EAAE,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC,CAAC;wBAC1F,SAAS,CAAC,IAAI,CAAC,MAAM,SAAS,KAAK,QAAQ,KAAK,QAAQ,EAAE,CAAC,CAAC;wBAC5D,IAAI,QAAQ;4BAAE,KAAK,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;oBACrC,CAAC,CAAC,CAAC;oBACH,OAAO,GAAG,KAAK,KAAK,OAAO,SAAS,CAAC,IAAI,CAAC,aAAa,CAAC,EAAE,CAAC;gBAC7D,CAAC;qBAAM,CAAC;oBACN,OAAO,GAAG,WAAW,CAAC,IAAI,CAAC;oBAC3B,KAAK,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,KAAK,EAAE,CAAC,IAAI,EAAE,IAAI,cAAc,CAAC;gBACtD,CAAC;YACH,CAAC;YAAC,MAAM,CAAC;gBACP,OAAO,GAAG,WAAW,CAAC,IAAI,CAAC;gBAC3B,KAAK,GAAG,cAAc,CAAC;YACzB,CAAC;YACD,OAAO,GAAG,GAAG,CAAC;QAChB,CAAC;aAAM,CAAC;YACN,0CAA0C;YAC1C,OAAO,GAAG,WAAW,CAAC,IAAI,CAAC;YAC3B,KAAK,GAAG,WAAW,CAAC,GAAG,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,GAAG,EAAE,IAAI,eAAe,CAAC;YAE5D,+BAA+B;YAC/B,MAAM,QAAQ,GAAG,4BAA4B,CAAC;YAC9C,MAAM,KAAK,GAAG,OAAO,CAAC,KAAK,CAAC,QAAQ,CAAC,IAAI,EAAE,CAAC;YAC5C,KAAK,GAAG,CAAC,GAAG,IAAI,GAAG,CAAC,KAAK,CAAC,CAAC,CAAC;YAC5B,OAAO,GAAG,GAAG,CAAC;QAChB,CAAC;QAED,uCAAuC;QACvC,IAAI,SAA0C,CAAC;QAC/C,IAAI,OAAO,IAAI,MAAM,EAAE,CAAC;YACtB,IAAI,OAAO,CAAC,SAAS,IAAI,CAAC,OAAO,CAAC,MAAM,IAAI,OAAO,CAAC,MAAM,CAAC,EAAE,CAAC;gBAC5D,yBAAyB;gBACzB,MAAM,EAAE,cAAc,EAAE,GAAG,MAAM,MAAM,CAAC,mBAAmB,CAAC,CAAC;gBAC7D,SAAS,GAAG,MAAM,cAAc,CAAC,OAAO,EAAE,OAAO,CAAC,CAAC;YACrD,CAAC;iBAAM,IAAI,OAAO,CAAC,SAAS,IAAI,OAAO,CAAC,MAAM,EAAE,CAAC;gBAC/C,kCAAkC;gBAClC,SAAS,GAAG,iBAAiB,CAAC,WAAW,CAAC,IAAI,EAAE,OAAO,CAAC,CAAC;YAC3D,CAAC;QACH,CAAC;QAED,wCAAwC;QACxC,IAAI,SAAS,IAAI,SAAS,GAAG,CAAC,EAAE,CAAC;YAC/B,OAAO,GAAG,qBAAqB,CAAC,OAAO,EAAE,SAAS,CAAC,CAAC;QACtD,CAAC;QAED,kDAAkD;QAClD,MAAM,OAAO,GAAG,IAAI,CAAC,GAAG,EAAE,GAAG,SAAS,CAAC;QACvC,MAAM,MAAM,GAAG,cAAc,CAAC,OAAO,CAAC,CAAC;QACvC,MAAM,WAAW,GAAG,UAAU,CAAC,QAAQ,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;QAEpF,iDAAiD;QACjD,MAAM,gBAAgB,GAAG,WAAW,CAAC,UAAU,EAAE,QAAQ,CAAC,QAAQ,CAAC,CAAC;QAEpE,OAAO;YACL,GAAG,EAAE,WAAW,CAAC,GAAG;YACpB,KAAK;YACL,OAAO;YACP,QAAQ;YACR,KAAK;YACL,MAAM;YACN,MAAM,EAAE,WAAW,CAAC,MAAM;YAC1B,OAAO;YACP,UAAU,EAAE,gBAAgB;YAC5B,WAAW,EAAE,YAAY;YACzB,OAAO;YACP,WAAW;YACX,SAAS;SACV,CAAC;IACJ,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,sCAAsC;QACtC,MAAM,OAAO,EAAE,CAAC;QAChB,MAAM,KAAK,CAAC;IACd,CAAC;AACH,CAAC;AAED;;;;;;;;;;;;;;GAcG;AACH,MAAM,CAAC,KAAK,UAAU,SAAS,CAC7B,IAAc,EACd,UAAkD,EAAE;IAEpD,MAAM,EAAE,WAAW,GAAG,CAAC,EAAE,GAAG,QAAQ,EAAE,GAAG,OAAO,CAAC;IACjD,MAAM,OAAO,GAAoD,EAAE,CAAC;IAEpE,qBAAqB;IACrB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,IAAI,CAAC,MAAM,EAAE,CAAC,IAAI,WAAW,EAAE,CAAC;QAClD,MAAM,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,GAAG,WAAW,CAAC,CAAC;QAC7C,MAAM,YAAY,GAAG,MAAM,OAAO,CAAC,UAAU,CAC3C,KAAK,CAAC,GAAG,CAAC,GAAG,CAAC,EAAE,CAAC,IAAI,CAAC,GAAG,EAAE,QAAQ,CAAC,CAAC,CACtC,CAAC;QAEF,YAAY,CAAC,OAAO,CAAC,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE;YACjC,IAAI,MAAM,CAAC,MAAM,KAAK,WAAW,EAAE,CAAC;gBAClC,OAAO,CAAC,IAAI,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC;YAC7B,CAAC;iBAAM,CAAC;gBACN,OAAO,CAAC,IAAI,CAAC;oBACX,GAAG,EAAE,KAAK,CAAC,CAAC,CAAC;oBACb,KAAK,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,IAAI,eAAe;iBACjD,CAAC,CAAC;YACL,CAAC;QACH,CAAC,CAAC,CAAC;IACL,CAAC;IAED,OAAO,OAAO,CAAC;AACjB,CAAC;AAED;;;GAGG;AACH,OAAO,EAAE,OAAO,EAAE,CAAC"}
|