@memvid/maw 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +188 -0
- package/dist/bin/maw.d.ts +6 -0
- package/dist/bin/maw.d.ts.map +1 -0
- package/dist/bin/maw.js +275 -0
- package/dist/bin/maw.js.map +1 -0
- package/dist/src/crawler/index.d.ts +71 -0
- package/dist/src/crawler/index.d.ts.map +1 -0
- package/dist/src/crawler/index.js +249 -0
- package/dist/src/crawler/index.js.map +1 -0
- package/dist/src/crawler/robots.d.ts +26 -0
- package/dist/src/crawler/robots.d.ts.map +1 -0
- package/dist/src/crawler/robots.js +179 -0
- package/dist/src/crawler/robots.js.map +1 -0
- package/dist/src/crawler/sitemap.d.ts +36 -0
- package/dist/src/crawler/sitemap.d.ts.map +1 -0
- package/dist/src/crawler/sitemap.js +209 -0
- package/dist/src/crawler/sitemap.js.map +1 -0
- package/dist/src/engine/detector.d.ts +18 -0
- package/dist/src/engine/detector.d.ts.map +1 -0
- package/dist/src/engine/detector.js +155 -0
- package/dist/src/engine/detector.js.map +1 -0
- package/dist/src/engine/fetch.d.ts +18 -0
- package/dist/src/engine/fetch.d.ts.map +1 -0
- package/dist/src/engine/fetch.js +53 -0
- package/dist/src/engine/fetch.js.map +1 -0
- package/dist/src/engine/index.d.ts +39 -0
- package/dist/src/engine/index.d.ts.map +1 -0
- package/dist/src/engine/index.js +116 -0
- package/dist/src/engine/index.js.map +1 -0
- package/dist/src/engine/playwright.d.ts +23 -0
- package/dist/src/engine/playwright.d.ts.map +1 -0
- package/dist/src/engine/playwright.js +88 -0
- package/dist/src/engine/playwright.js.map +1 -0
- package/dist/src/engine/rebrowser.d.ts +22 -0
- package/dist/src/engine/rebrowser.d.ts.map +1 -0
- package/dist/src/engine/rebrowser.js +142 -0
- package/dist/src/engine/rebrowser.js.map +1 -0
- package/dist/src/extractor/cleaner.d.ts +13 -0
- package/dist/src/extractor/cleaner.d.ts.map +1 -0
- package/dist/src/extractor/cleaner.js +122 -0
- package/dist/src/extractor/cleaner.js.map +1 -0
- package/dist/src/extractor/index.d.ts +29 -0
- package/dist/src/extractor/index.d.ts.map +1 -0
- package/dist/src/extractor/index.js +162 -0
- package/dist/src/extractor/index.js.map +1 -0
- package/dist/src/extractor/links.d.ts +22 -0
- package/dist/src/extractor/links.d.ts.map +1 -0
- package/dist/src/extractor/links.js +92 -0
- package/dist/src/extractor/links.js.map +1 -0
- package/dist/src/extractor/markdown.d.ts +13 -0
- package/dist/src/extractor/markdown.d.ts.map +1 -0
- package/dist/src/extractor/markdown.js +94 -0
- package/dist/src/extractor/markdown.js.map +1 -0
- package/dist/src/git/index.d.ts +40 -0
- package/dist/src/git/index.d.ts.map +1 -0
- package/dist/src/git/index.js +303 -0
- package/dist/src/git/index.js.map +1 -0
- package/dist/src/index.d.ts +103 -0
- package/dist/src/index.d.ts.map +1 -0
- package/dist/src/index.js +229 -0
- package/dist/src/index.js.map +1 -0
- package/dist/src/ingestor/index.d.ts +95 -0
- package/dist/src/ingestor/index.d.ts.map +1 -0
- package/dist/src/ingestor/index.js +471 -0
- package/dist/src/ingestor/index.js.map +1 -0
- package/dist/src/utils/dedup.d.ts +66 -0
- package/dist/src/utils/dedup.d.ts.map +1 -0
- package/dist/src/utils/dedup.js +296 -0
- package/dist/src/utils/dedup.js.map +1 -0
- package/dist/src/utils/index.d.ts +3 -0
- package/dist/src/utils/index.d.ts.map +1 -0
- package/dist/src/utils/index.js +3 -0
- package/dist/src/utils/index.js.map +1 -0
- package/dist/src/utils/logger.d.ts +12 -0
- package/dist/src/utils/logger.d.ts.map +1 -0
- package/dist/src/utils/logger.js +49 -0
- package/dist/src/utils/logger.js.map +1 -0
- package/dist/src/utils/ui.d.ts +126 -0
- package/dist/src/utils/ui.d.ts.map +1 -0
- package/dist/src/utils/ui.js +357 -0
- package/dist/src/utils/ui.js.map +1 -0
- package/dist/src/utils/url.d.ts +21 -0
- package/dist/src/utils/url.d.ts.map +1 -0
- package/dist/src/utils/url.js +107 -0
- package/dist/src/utils/url.js.map +1 -0
- package/package.json +71 -0
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Playwright engine for JavaScript rendering
|
|
3
|
+
* Lazy-loaded only when needed
|
|
4
|
+
*/
|
|
5
|
+
/**
|
|
6
|
+
* Check if playwright is installed
|
|
7
|
+
*/
|
|
8
|
+
export declare function isPlaywrightInstalled(): Promise<boolean>;
|
|
9
|
+
export interface PlaywrightOptions {
|
|
10
|
+
timeout?: number;
|
|
11
|
+
userAgent?: string;
|
|
12
|
+
}
|
|
13
|
+
export declare class PlaywrightEngine {
|
|
14
|
+
private browser;
|
|
15
|
+
private context;
|
|
16
|
+
fetch(url: string, options?: PlaywrightOptions): Promise<{
|
|
17
|
+
html: string;
|
|
18
|
+
statusCode: number;
|
|
19
|
+
finalUrl: string;
|
|
20
|
+
}>;
|
|
21
|
+
close(): Promise<void>;
|
|
22
|
+
}
|
|
23
|
+
//# sourceMappingURL=playwright.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"playwright.d.ts","sourceRoot":"","sources":["../../../src/engine/playwright.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAMH;;GAEG;AACH,wBAAsB,qBAAqB,IAAI,OAAO,CAAC,OAAO,CAAC,CAO9D;AAED,MAAM,WAAW,iBAAiB;IAChC,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,SAAS,CAAC,EAAE,MAAM,CAAC;CACpB;AAED,qBAAa,gBAAgB;IAC3B,OAAO,CAAC,OAAO,CAAwB;IACvC,OAAO,CAAC,OAAO,CAA+B;IAExC,KAAK,CAAC,GAAG,EAAE,MAAM,EAAE,OAAO,GAAE,iBAAsB,GAAG,OAAO,CAAC;QACjE,IAAI,EAAE,MAAM,CAAC;QACb,UAAU,EAAE,MAAM,CAAC;QACnB,QAAQ,EAAE,MAAM,CAAC;KAClB,CAAC;IA+DI,KAAK,IAAI,OAAO,CAAC,IAAI,CAAC;CAU7B"}
|
|
@@ -0,0 +1,88 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Playwright engine for JavaScript rendering
|
|
3
|
+
* Lazy-loaded only when needed
|
|
4
|
+
*/
|
|
5
|
+
let playwrightModule = null;
|
|
6
|
+
/**
|
|
7
|
+
* Check if playwright is installed
|
|
8
|
+
*/
|
|
9
|
+
export async function isPlaywrightInstalled() {
|
|
10
|
+
try {
|
|
11
|
+
playwrightModule = await import('playwright');
|
|
12
|
+
return true;
|
|
13
|
+
}
|
|
14
|
+
catch {
|
|
15
|
+
return false;
|
|
16
|
+
}
|
|
17
|
+
}
|
|
18
|
+
export class PlaywrightEngine {
|
|
19
|
+
browser = null;
|
|
20
|
+
context = null;
|
|
21
|
+
async fetch(url, options = {}) {
|
|
22
|
+
if (!playwrightModule) {
|
|
23
|
+
playwrightModule = await import('playwright');
|
|
24
|
+
}
|
|
25
|
+
if (!this.browser) {
|
|
26
|
+
this.browser = await playwrightModule.chromium.launch({
|
|
27
|
+
headless: true,
|
|
28
|
+
args: [
|
|
29
|
+
'--disable-blink-features=AutomationControlled',
|
|
30
|
+
'--disable-dev-shm-usage',
|
|
31
|
+
'--no-sandbox',
|
|
32
|
+
'--disable-setuid-sandbox',
|
|
33
|
+
'--disable-infobars',
|
|
34
|
+
'--window-position=0,0',
|
|
35
|
+
'--ignore-certifcate-errors',
|
|
36
|
+
'--ignore-certifcate-errors-spki-list',
|
|
37
|
+
],
|
|
38
|
+
});
|
|
39
|
+
this.context = await this.browser.newContext({
|
|
40
|
+
userAgent: options.userAgent ||
|
|
41
|
+
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
|
|
42
|
+
viewport: { width: 1920, height: 1080 },
|
|
43
|
+
locale: 'en-US',
|
|
44
|
+
timezoneId: 'America/New_York',
|
|
45
|
+
deviceScaleFactor: 1,
|
|
46
|
+
hasTouch: false,
|
|
47
|
+
isMobile: false,
|
|
48
|
+
});
|
|
49
|
+
// Block unnecessary resources for speed
|
|
50
|
+
await this.context.route('**/*', (route) => {
|
|
51
|
+
const type = route.request().resourceType();
|
|
52
|
+
if (['image', 'stylesheet', 'font', 'media'].includes(type)) {
|
|
53
|
+
route.abort();
|
|
54
|
+
}
|
|
55
|
+
else {
|
|
56
|
+
route.continue();
|
|
57
|
+
}
|
|
58
|
+
});
|
|
59
|
+
}
|
|
60
|
+
const page = await this.context.newPage();
|
|
61
|
+
try {
|
|
62
|
+
const response = await page.goto(url, {
|
|
63
|
+
waitUntil: 'networkidle',
|
|
64
|
+
timeout: options.timeout || 15000,
|
|
65
|
+
});
|
|
66
|
+
// Wait a bit for dynamic content
|
|
67
|
+
await page.waitForTimeout(1000);
|
|
68
|
+
const html = await page.content();
|
|
69
|
+
const statusCode = response?.status() || 200;
|
|
70
|
+
const finalUrl = page.url();
|
|
71
|
+
return { html, statusCode, finalUrl };
|
|
72
|
+
}
|
|
73
|
+
finally {
|
|
74
|
+
await page.close();
|
|
75
|
+
}
|
|
76
|
+
}
|
|
77
|
+
async close() {
|
|
78
|
+
if (this.context) {
|
|
79
|
+
await this.context.close();
|
|
80
|
+
this.context = null;
|
|
81
|
+
}
|
|
82
|
+
if (this.browser) {
|
|
83
|
+
await this.browser.close();
|
|
84
|
+
this.browser = null;
|
|
85
|
+
}
|
|
86
|
+
}
|
|
87
|
+
}
|
|
88
|
+
//# sourceMappingURL=playwright.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"playwright.js","sourceRoot":"","sources":["../../../src/engine/playwright.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAIH,IAAI,gBAAgB,GAAuC,IAAI,CAAC;AAEhE;;GAEG;AACH,MAAM,CAAC,KAAK,UAAU,qBAAqB;IACzC,IAAI,CAAC;QACH,gBAAgB,GAAG,MAAM,MAAM,CAAC,YAAY,CAAC,CAAC;QAC9C,OAAO,IAAI,CAAC;IACd,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,KAAK,CAAC;IACf,CAAC;AACH,CAAC;AAOD,MAAM,OAAO,gBAAgB;IACnB,OAAO,GAAmB,IAAI,CAAC;IAC/B,OAAO,GAA0B,IAAI,CAAC;IAE9C,KAAK,CAAC,KAAK,CAAC,GAAW,EAAE,UAA6B,EAAE;QAKtD,IAAI,CAAC,gBAAgB,EAAE,CAAC;YACtB,gBAAgB,GAAG,MAAM,MAAM,CAAC,YAAY,CAAC,CAAC;QAChD,CAAC;QAED,IAAI,CAAC,IAAI,CAAC,OAAO,EAAE,CAAC;YAClB,IAAI,CAAC,OAAO,GAAG,MAAM,gBAAgB,CAAC,QAAQ,CAAC,MAAM,CAAC;gBACpD,QAAQ,EAAE,IAAI;gBACd,IAAI,EAAE;oBACJ,+CAA+C;oBAC/C,yBAAyB;oBACzB,cAAc;oBACd,0BAA0B;oBAC1B,oBAAoB;oBACpB,uBAAuB;oBACvB,4BAA4B;oBAC5B,sCAAsC;iBACvC;aACF,CAAC,CAAC;YAEH,IAAI,CAAC,OAAO,GAAG,MAAM,IAAI,CAAC,OAAO,CAAC,UAAU,CAAC;gBAC3C,SAAS,EAAE,OAAO,CAAC,SAAS;oBAC1B,uHAAuH;gBACzH,QAAQ,EAAE,EAAE,KAAK,EAAE,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE;gBACvC,MAAM,EAAE,OAAO;gBACf,UAAU,EAAE,kBAAkB;gBAC9B,iBAAiB,EAAE,CAAC;gBACpB,QAAQ,EAAE,KAAK;gBACf,QAAQ,EAAE,KAAK;aAChB,CAAC,CAAC;YAEH,wCAAwC;YACxC,MAAM,IAAI,CAAC,OAAO,CAAC,KAAK,CAAC,MAAM,EAAE,CAAC,KAAK,EAAE,EAAE;gBACzC,MAAM,IAAI,GAAG,KAAK,CAAC,OAAO,EAAE,CAAC,YAAY,EAAE,CAAC;gBAC5C,IAAI,CAAC,OAAO,EAAE,YAAY,EAAE,MAAM,EAAE,OAAO,CAAC,CAAC,QAAQ,CAAC,IAAI,CAAC,EAAE,CAAC;oBAC5D,KAAK,CAAC,KAAK,EAAE,CAAC;gBAChB,CAAC;qBAAM,CAAC;oBACN,KAAK,CAAC,QAAQ,EAAE,CAAC;gBACnB,CAAC;YACH,CAAC,CAAC,CAAC;QACL,CAAC;QAED,MAAM,IAAI,GAAG,MAAM,IAAI,CAAC,OAAQ,CAAC,OAAO,EAAE,CAAC;QAE3C,IAAI,CAAC;YACH,MAAM,QAAQ,GAAG,MAAM,IAAI,CAAC,IAAI,CAAC,GAAG,EAAE;gBACpC,SAAS,EAAE,aAAa;gBACxB,OAAO,EAAE,OAAO,CAAC,OAAO,IAAI,KAAK;aAClC,CAAC,CAAC;YAEH,iCAAiC;YACjC,MAAM,IAAI,CAAC,cAAc,CAAC,IAAI,CAAC,CAAC;YAEhC,MAAM,IAAI,GAAG,MAAM,IAAI,CAAC,OAAO,EAAE,CAAC;YAClC,MAAM,UAAU,GAAG,QAAQ,EAAE,MAAM,EAAE,IAAI,GAAG,CAAC;YAC7C,MAAM,QAAQ,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;YAE5B,OAAO,EAAE,IAAI,EAAE,UAAU,EAAE,QAAQ,EAAE,CAAC;QACxC,CAAC;gBAAS,CAAC;YACT,MAAM,IAAI,CAAC,KAAK,EAAE,CAAC;QACrB,CAAC;IACH,CAAC;IAED,KAAK,CAAC,KAAK;QACT,IAAI,IAAI,CAAC,OAAO,EAAE,CAAC;YACjB,MAAM,IAAI,CAAC,OAAO,CAAC,KAAK,EAAE,CAAC;YAC3B,IAAI,CAAC,OAAO,GAAG,IAAI,CAAC;QACtB,CAAC;QACD,IAAI,IAAI,CAAC,OAAO,EAAE,CAAC;YACjB,MAAM,IAAI,CAAC,OAAO,CAAC,KAAK,EAAE,CAAC;YAC3B,IAAI,CAAC,OAAO,GAAG,IAAI,CAAC;QACtB,CAAC;IACH,CAAC;CACF"}
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Stealth engine for anti-bot bypass
|
|
3
|
+
* Uses playwright with stealth patches and human-like behavior
|
|
4
|
+
* Lazy-loaded only when needed
|
|
5
|
+
*/
|
|
6
|
+
export interface RebrowserOptions {
|
|
7
|
+
timeout?: number;
|
|
8
|
+
userAgent?: string;
|
|
9
|
+
}
|
|
10
|
+
export declare class RebrowserEngine {
|
|
11
|
+
private browser;
|
|
12
|
+
private context;
|
|
13
|
+
fetch(url: string, options?: RebrowserOptions): Promise<{
|
|
14
|
+
html: string;
|
|
15
|
+
statusCode: number;
|
|
16
|
+
finalUrl: string;
|
|
17
|
+
}>;
|
|
18
|
+
private humanize;
|
|
19
|
+
private waitForChallenge;
|
|
20
|
+
close(): Promise<void>;
|
|
21
|
+
}
|
|
22
|
+
//# sourceMappingURL=rebrowser.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"rebrowser.d.ts","sourceRoot":"","sources":["../../../src/engine/rebrowser.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAMH,MAAM,WAAW,gBAAgB;IAC/B,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,SAAS,CAAC,EAAE,MAAM,CAAC;CACpB;AAED,qBAAa,eAAe;IAC1B,OAAO,CAAC,OAAO,CAAwB;IACvC,OAAO,CAAC,OAAO,CAA+B;IAExC,KAAK,CAAC,GAAG,EAAE,MAAM,EAAE,OAAO,GAAE,gBAAqB,GAAG,OAAO,CAAC;QAChE,IAAI,EAAE,MAAM,CAAC;QACb,UAAU,EAAE,MAAM,CAAC;QACnB,QAAQ,EAAE,MAAM,CAAC;KAClB,CAAC;YAsGY,QAAQ;YAWR,gBAAgB;IA4BxB,KAAK,IAAI,OAAO,CAAC,IAAI,CAAC;CAU7B"}
|
|
@@ -0,0 +1,142 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Stealth engine for anti-bot bypass
|
|
3
|
+
* Uses playwright with stealth patches and human-like behavior
|
|
4
|
+
* Lazy-loaded only when needed
|
|
5
|
+
*/
|
|
6
|
+
let playwrightModule = null;
|
|
7
|
+
export class RebrowserEngine {
|
|
8
|
+
browser = null;
|
|
9
|
+
context = null;
|
|
10
|
+
async fetch(url, options = {}) {
|
|
11
|
+
if (!playwrightModule) {
|
|
12
|
+
playwrightModule = await import('playwright');
|
|
13
|
+
}
|
|
14
|
+
if (!this.browser) {
|
|
15
|
+
// Use full browser with stealth settings
|
|
16
|
+
this.browser = await playwrightModule.chromium.launch({
|
|
17
|
+
headless: true,
|
|
18
|
+
args: [
|
|
19
|
+
'--disable-blink-features=AutomationControlled',
|
|
20
|
+
'--disable-dev-shm-usage',
|
|
21
|
+
'--no-sandbox',
|
|
22
|
+
'--disable-setuid-sandbox',
|
|
23
|
+
'--disable-web-security',
|
|
24
|
+
'--disable-features=IsolateOrigins,site-per-process',
|
|
25
|
+
'--disable-infobars',
|
|
26
|
+
'--window-position=0,0',
|
|
27
|
+
'--ignore-certifcate-errors',
|
|
28
|
+
],
|
|
29
|
+
});
|
|
30
|
+
this.context = await this.browser.newContext({
|
|
31
|
+
userAgent: options.userAgent ||
|
|
32
|
+
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
|
|
33
|
+
viewport: { width: 1920, height: 1080 },
|
|
34
|
+
locale: 'en-US',
|
|
35
|
+
timezoneId: 'America/New_York',
|
|
36
|
+
geolocation: { latitude: 40.7128, longitude: -74.0060 },
|
|
37
|
+
permissions: ['geolocation'],
|
|
38
|
+
deviceScaleFactor: 1,
|
|
39
|
+
hasTouch: false,
|
|
40
|
+
isMobile: false,
|
|
41
|
+
});
|
|
42
|
+
// Add stealth scripts
|
|
43
|
+
await this.context.addInitScript(() => {
|
|
44
|
+
// Hide webdriver
|
|
45
|
+
Object.defineProperty(navigator, 'webdriver', { get: () => false });
|
|
46
|
+
// Fake plugins
|
|
47
|
+
Object.defineProperty(navigator, 'plugins', {
|
|
48
|
+
get: () => [1, 2, 3, 4, 5],
|
|
49
|
+
});
|
|
50
|
+
// Fake languages
|
|
51
|
+
Object.defineProperty(navigator, 'languages', {
|
|
52
|
+
get: () => ['en-US', 'en'],
|
|
53
|
+
});
|
|
54
|
+
// Override permissions
|
|
55
|
+
const originalQuery = window.navigator.permissions.query;
|
|
56
|
+
// @ts-ignore
|
|
57
|
+
window.navigator.permissions.query = (parameters) => parameters.name === 'notifications'
|
|
58
|
+
? Promise.resolve({ state: Notification.permission })
|
|
59
|
+
: originalQuery(parameters);
|
|
60
|
+
// Hide automation indicators
|
|
61
|
+
// @ts-ignore
|
|
62
|
+
delete window.cdc_adoQpoasnfa76pfcZLmcfl_Array;
|
|
63
|
+
// @ts-ignore
|
|
64
|
+
delete window.cdc_adoQpoasnfa76pfcZLmcfl_Promise;
|
|
65
|
+
// @ts-ignore
|
|
66
|
+
delete window.cdc_adoQpoasnfa76pfcZLmcfl_Symbol;
|
|
67
|
+
});
|
|
68
|
+
// Block heavy resources
|
|
69
|
+
await this.context.route('**/*', (route) => {
|
|
70
|
+
const type = route.request().resourceType();
|
|
71
|
+
if (['image', 'stylesheet', 'font', 'media'].includes(type)) {
|
|
72
|
+
route.abort();
|
|
73
|
+
}
|
|
74
|
+
else {
|
|
75
|
+
route.continue();
|
|
76
|
+
}
|
|
77
|
+
});
|
|
78
|
+
}
|
|
79
|
+
const page = await this.context.newPage();
|
|
80
|
+
try {
|
|
81
|
+
// Simulate human-like behavior
|
|
82
|
+
await this.humanize(page);
|
|
83
|
+
const response = await page.goto(url, {
|
|
84
|
+
waitUntil: 'networkidle',
|
|
85
|
+
timeout: options.timeout || 30000,
|
|
86
|
+
});
|
|
87
|
+
// Wait for Cloudflare challenge if present
|
|
88
|
+
await this.waitForChallenge(page);
|
|
89
|
+
const html = await page.content();
|
|
90
|
+
const statusCode = response?.status() || 200;
|
|
91
|
+
const finalUrl = page.url();
|
|
92
|
+
return { html, statusCode, finalUrl };
|
|
93
|
+
}
|
|
94
|
+
finally {
|
|
95
|
+
await page.close();
|
|
96
|
+
}
|
|
97
|
+
}
|
|
98
|
+
async humanize(page) {
|
|
99
|
+
// Random mouse movements
|
|
100
|
+
await page.mouse.move(100 + Math.random() * 500, 100 + Math.random() * 300);
|
|
101
|
+
// Random delay
|
|
102
|
+
await page.waitForTimeout(500 + Math.random() * 1000);
|
|
103
|
+
}
|
|
104
|
+
async waitForChallenge(page) {
|
|
105
|
+
// Wait for Cloudflare/other challenge to complete
|
|
106
|
+
const challengeSelectors = [
|
|
107
|
+
'#challenge-running',
|
|
108
|
+
'.cf-browser-verification',
|
|
109
|
+
'#challenge-form',
|
|
110
|
+
'[data-testid="challenge-running"]',
|
|
111
|
+
'.challenge-running',
|
|
112
|
+
];
|
|
113
|
+
for (const selector of challengeSelectors) {
|
|
114
|
+
try {
|
|
115
|
+
const element = await page.$(selector);
|
|
116
|
+
if (element) {
|
|
117
|
+
// Challenge detected, wait for it to resolve
|
|
118
|
+
await page.waitForSelector(selector, {
|
|
119
|
+
state: 'detached',
|
|
120
|
+
timeout: 15000,
|
|
121
|
+
});
|
|
122
|
+
await page.waitForTimeout(2000);
|
|
123
|
+
break;
|
|
124
|
+
}
|
|
125
|
+
}
|
|
126
|
+
catch {
|
|
127
|
+
// Selector not found, continue
|
|
128
|
+
}
|
|
129
|
+
}
|
|
130
|
+
}
|
|
131
|
+
async close() {
|
|
132
|
+
if (this.context) {
|
|
133
|
+
await this.context.close();
|
|
134
|
+
this.context = null;
|
|
135
|
+
}
|
|
136
|
+
if (this.browser) {
|
|
137
|
+
await this.browser.close();
|
|
138
|
+
this.browser = null;
|
|
139
|
+
}
|
|
140
|
+
}
|
|
141
|
+
}
|
|
142
|
+
//# sourceMappingURL=rebrowser.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"rebrowser.js","sourceRoot":"","sources":["../../../src/engine/rebrowser.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAIH,IAAI,gBAAgB,GAAuC,IAAI,CAAC;AAOhE,MAAM,OAAO,eAAe;IAClB,OAAO,GAAmB,IAAI,CAAC;IAC/B,OAAO,GAA0B,IAAI,CAAC;IAE9C,KAAK,CAAC,KAAK,CAAC,GAAW,EAAE,UAA4B,EAAE;QAKrD,IAAI,CAAC,gBAAgB,EAAE,CAAC;YACtB,gBAAgB,GAAG,MAAM,MAAM,CAAC,YAAY,CAAC,CAAC;QAChD,CAAC;QAED,IAAI,CAAC,IAAI,CAAC,OAAO,EAAE,CAAC;YAClB,yCAAyC;YACzC,IAAI,CAAC,OAAO,GAAG,MAAM,gBAAgB,CAAC,QAAQ,CAAC,MAAM,CAAC;gBACpD,QAAQ,EAAE,IAAI;gBACd,IAAI,EAAE;oBACJ,+CAA+C;oBAC/C,yBAAyB;oBACzB,cAAc;oBACd,0BAA0B;oBAC1B,wBAAwB;oBACxB,oDAAoD;oBACpD,oBAAoB;oBACpB,uBAAuB;oBACvB,4BAA4B;iBAC7B;aACF,CAAC,CAAC;YAEH,IAAI,CAAC,OAAO,GAAG,MAAM,IAAI,CAAC,OAAO,CAAC,UAAU,CAAC;gBAC3C,SAAS,EAAE,OAAO,CAAC,SAAS;oBAC1B,uHAAuH;gBACzH,QAAQ,EAAE,EAAE,KAAK,EAAE,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE;gBACvC,MAAM,EAAE,OAAO;gBACf,UAAU,EAAE,kBAAkB;gBAC9B,WAAW,EAAE,EAAE,QAAQ,EAAE,OAAO,EAAE,SAAS,EAAE,CAAC,OAAO,EAAE;gBACvD,WAAW,EAAE,CAAC,aAAa,CAAC;gBAC5B,iBAAiB,EAAE,CAAC;gBACpB,QAAQ,EAAE,KAAK;gBACf,QAAQ,EAAE,KAAK;aAChB,CAAC,CAAC;YAEH,sBAAsB;YACtB,MAAM,IAAI,CAAC,OAAO,CAAC,aAAa,CAAC,GAAG,EAAE;gBACpC,iBAAiB;gBACjB,MAAM,CAAC,cAAc,CAAC,SAAS,EAAE,WAAW,EAAE,EAAE,GAAG,EAAE,GAAG,EAAE,CAAC,KAAK,EAAE,CAAC,CAAC;gBAEpE,eAAe;gBACf,MAAM,CAAC,cAAc,CAAC,SAAS,EAAE,SAAS,EAAE;oBAC1C,GAAG,EAAE,GAAG,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,CAAC,EAAE,CAAC,EAAE,CAAC,CAAC;iBAC3B,CAAC,CAAC;gBAEH,iBAAiB;gBACjB,MAAM,CAAC,cAAc,CAAC,SAAS,EAAE,WAAW,EAAE;oBAC5C,GAAG,EAAE,GAAG,EAAE,CAAC,CAAC,OAAO,EAAE,IAAI,CAAC;iBAC3B,CAAC,CAAC;gBAEH,uBAAuB;gBACvB,MAAM,aAAa,GAAG,MAAM,CAAC,SAAS,CAAC,WAAW,CAAC,KAAK,CAAC;gBACzD,aAAa;gBACb,MAAM,CAAC,SAAS,CAAC,WAAW,CAAC,KAAK,GAAG,CAAC,UAAe,EAAE,EAAE,CACvD,UAAU,CAAC,IAAI,KAAK,eAAe;oBACjC,CAAC,CAAC,OAAO,CAAC,OAAO,CAAC,EAAE,KAAK,EAAE,YAAY,CAAC,UAAU,EAAsB,CAAC;oBACzE,CAAC,CAAC,aAAa,CAAC,UAAU,CAAC,CAAC;gBAEhC,6BAA6B;gBAC7B,aAAa;gBACb,OAAO,MAAM,CAAC,gCAAgC,CAAC;gBAC/C,aAAa;gBACb,OAAO,MAAM,CAAC,kCAAkC,CAAC;gBACjD,aAAa;gBACb,OAAO,MAAM,CAAC,iCAAiC,CAAC;YAClD,CAAC,CAAC,CAAC;YAEH,wBAAwB;YACxB,MAAM,IAAI,CAAC,OAAO,CAAC,KAAK,CAAC,MAAM,EAAE,CAAC,KAAK,EAAE,EAAE;gBACzC,MAAM,IAAI,GAAG,KAAK,CAAC,OAAO,EAAE,CAAC,YAAY,EAAE,CAAC;gBAC5C,IAAI,CAAC,OAAO,EAAE,YAAY,EAAE,MAAM,EAAE,OAAO,CAAC,CAAC,QAAQ,CAAC,IAAI,CAAC,EAAE,CAAC;oBAC5D,KAAK,CAAC,KAAK,EAAE,CAAC;gBAChB,CAAC;qBAAM,CAAC;oBACN,KAAK,CAAC,QAAQ,EAAE,CAAC;gBACnB,CAAC;YACH,CAAC,CAAC,CAAC;QACL,CAAC;QAED,MAAM,IAAI,GAAG,MAAM,IAAI,CAAC,OAAQ,CAAC,OAAO,EAAE,CAAC;QAE3C,IAAI,CAAC;YACH,+BAA+B;YAC/B,MAAM,IAAI,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC;YAE1B,MAAM,QAAQ,GAAG,MAAM,IAAI,CAAC,IAAI,CAAC,GAAG,EAAE;gBACpC,SAAS,EAAE,aAAa;gBACxB,OAAO,EAAE,OAAO,CAAC,OAAO,IAAI,KAAK;aAClC,CAAC,CAAC;YAEH,2CAA2C;YAC3C,MAAM,IAAI,CAAC,gBAAgB,CAAC,IAAI,CAAC,CAAC;YAElC,MAAM,IAAI,GAAG,MAAM,IAAI,CAAC,OAAO,EAAE,CAAC;YAClC,MAAM,UAAU,GAAG,QAAQ,EAAE,MAAM,EAAE,IAAI,GAAG,CAAC;YAC7C,MAAM,QAAQ,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;YAE5B,OAAO,EAAE,IAAI,EAAE,UAAU,EAAE,QAAQ,EAAE,CAAC;QACxC,CAAC;gBAAS,CAAC;YACT,MAAM,IAAI,CAAC,KAAK,EAAE,CAAC;QACrB,CAAC;IACH,CAAC;IAEO,KAAK,CAAC,QAAQ,CAAC,IAAU;QAC/B,yBAAyB;QACzB,MAAM,IAAI,CAAC,KAAK,CAAC,IAAI,CACnB,GAAG,GAAG,IAAI,CAAC,MAAM,EAAE,GAAG,GAAG,EACzB,GAAG,GAAG,IAAI,CAAC,MAAM,EAAE,GAAG,GAAG,CAC1B,CAAC;QAEF,eAAe;QACf,MAAM,IAAI,CAAC,cAAc,CAAC,GAAG,GAAG,IAAI,CAAC,MAAM,EAAE,GAAG,IAAI,CAAC,CAAC;IACxD,CAAC;IAEO,KAAK,CAAC,gBAAgB,CAAC,IAAU;QACvC,kDAAkD;QAClD,MAAM,kBAAkB,GAAG;YACzB,oBAAoB;YACpB,0BAA0B;YAC1B,iBAAiB;YACjB,mCAAmC;YACnC,oBAAoB;SACrB,CAAC;QAEF,KAAK,MAAM,QAAQ,IAAI,kBAAkB,EAAE,CAAC;YAC1C,IAAI,CAAC;gBACH,MAAM,OAAO,GAAG,MAAM,IAAI,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC;gBACvC,IAAI,OAAO,EAAE,CAAC;oBACZ,6CAA6C;oBAC7C,MAAM,IAAI,CAAC,eAAe,CAAC,QAAQ,EAAE;wBACnC,KAAK,EAAE,UAAU;wBACjB,OAAO,EAAE,KAAK;qBACf,CAAC,CAAC;oBACH,MAAM,IAAI,CAAC,cAAc,CAAC,IAAI,CAAC,CAAC;oBAChC,MAAM;gBACR,CAAC;YACH,CAAC;YAAC,MAAM,CAAC;gBACP,+BAA+B;YACjC,CAAC;QACH,CAAC;IACH,CAAC;IAED,KAAK,CAAC,KAAK;QACT,IAAI,IAAI,CAAC,OAAO,EAAE,CAAC;YACjB,MAAM,IAAI,CAAC,OAAO,CAAC,KAAK,EAAE,CAAC;YAC3B,IAAI,CAAC,OAAO,GAAG,IAAI,CAAC;QACtB,CAAC;QACD,IAAI,IAAI,CAAC,OAAO,EAAE,CAAC;YACjB,MAAM,IAAI,CAAC,OAAO,CAAC,KAAK,EAAE,CAAC;YAC3B,IAAI,CAAC,OAAO,GAAG,IAAI,CAAC;QACtB,CAAC;IACH,CAAC;CACF"}
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* HTML cleaner - removes unwanted elements before extraction
|
|
3
|
+
*/
|
|
4
|
+
import type { CheerioAPI } from 'cheerio';
|
|
5
|
+
/**
|
|
6
|
+
* Clean HTML by removing unwanted elements
|
|
7
|
+
*/
|
|
8
|
+
export declare function cleanHtml($: CheerioAPI): void;
|
|
9
|
+
/**
|
|
10
|
+
* Extract the main content area if identifiable
|
|
11
|
+
*/
|
|
12
|
+
export declare function findMainContent($: CheerioAPI): string | null;
|
|
13
|
+
//# sourceMappingURL=cleaner.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"cleaner.d.ts","sourceRoot":"","sources":["../../../src/extractor/cleaner.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,KAAK,EAAE,UAAU,EAAE,MAAM,SAAS,CAAC;AA4D1C;;GAEG;AACH,wBAAgB,SAAS,CAAC,CAAC,EAAE,UAAU,GAAG,IAAI,CA+C7C;AAED;;GAEG;AACH,wBAAgB,eAAe,CAAC,CAAC,EAAE,UAAU,GAAG,MAAM,GAAG,IAAI,CAyB5D"}
|
|
@@ -0,0 +1,122 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* HTML cleaner - removes unwanted elements before extraction
|
|
3
|
+
*/
|
|
4
|
+
// Elements to remove before extraction
|
|
5
|
+
const REMOVE_SELECTORS = [
|
|
6
|
+
// Scripts and styles
|
|
7
|
+
'script', 'style', 'noscript', 'iframe', 'svg', 'canvas',
|
|
8
|
+
// Navigation and layout
|
|
9
|
+
'nav', 'header', 'footer', 'aside',
|
|
10
|
+
'[role="banner"]', '[role="navigation"]', '[role="contentinfo"]',
|
|
11
|
+
'[role="complementary"]', '[role="menu"]', '[role="menubar"]',
|
|
12
|
+
// Common class patterns for navigation/layout
|
|
13
|
+
'.nav', '.navbar', '.navigation', '.header', '.footer', '.sidebar',
|
|
14
|
+
'.menu', '.top-bar', '.bottom-bar', '.site-header', '.site-footer',
|
|
15
|
+
'.page-header', '.page-footer', '.masthead',
|
|
16
|
+
// Ads and tracking
|
|
17
|
+
'.ad', '.ads', '.advertisement', '.advert', '.sponsored',
|
|
18
|
+
'[class*="ad-"]', '[class*="ads-"]', '[id*="google_ads"]',
|
|
19
|
+
'.tracking', '.analytics',
|
|
20
|
+
// Popups and overlays
|
|
21
|
+
'.popup', '.modal', '.overlay', '.lightbox', '.dialog',
|
|
22
|
+
'.cookie-banner', '.cookie-consent', '.cookie-notice',
|
|
23
|
+
'.gdpr', '.consent-banner',
|
|
24
|
+
// Social and sharing
|
|
25
|
+
'.social-share', '.share-buttons', '.share-links', '.social-links',
|
|
26
|
+
'.follow-us', '.social-icons',
|
|
27
|
+
// Comments
|
|
28
|
+
'.comments', '.comment-section', '.comment-form', '#comments', '#disqus',
|
|
29
|
+
'.discuss', '.discussion',
|
|
30
|
+
// Related/recommended content
|
|
31
|
+
'.related-posts', '.related-articles', '.recommended', '.suggestions',
|
|
32
|
+
'.more-from', '.you-may-like', '.also-read',
|
|
33
|
+
// Newsletter and subscriptions
|
|
34
|
+
'.newsletter', '.subscribe', '.subscription', '.signup-form',
|
|
35
|
+
'.email-signup', '.mailing-list',
|
|
36
|
+
// Breadcrumbs and pagination
|
|
37
|
+
'.breadcrumb', '.breadcrumbs', '.pagination', '.pager',
|
|
38
|
+
// Author bio (usually after article)
|
|
39
|
+
'.author-bio', '.author-box', '.about-author',
|
|
40
|
+
// Hidden elements
|
|
41
|
+
'[aria-hidden="true"]', '[hidden]', '.hidden', '.visually-hidden',
|
|
42
|
+
'.screen-reader-text', '.sr-only',
|
|
43
|
+
// Forms (except search)
|
|
44
|
+
'form:not([role="search"])',
|
|
45
|
+
// Print-only elements
|
|
46
|
+
'.print-only', '.no-screen',
|
|
47
|
+
];
|
|
48
|
+
/**
|
|
49
|
+
* Clean HTML by removing unwanted elements
|
|
50
|
+
*/
|
|
51
|
+
export function cleanHtml($) {
|
|
52
|
+
// Remove unwanted elements
|
|
53
|
+
$(REMOVE_SELECTORS.join(', ')).remove();
|
|
54
|
+
// Remove empty elements (but preserve br, hr, img, etc.)
|
|
55
|
+
const preserveTags = new Set(['br', 'hr', 'img', 'input', 'meta', 'link', 'area', 'base', 'col', 'embed', 'source', 'track', 'wbr']);
|
|
56
|
+
$('*').each((_, el) => {
|
|
57
|
+
if (el.type !== 'tag')
|
|
58
|
+
return;
|
|
59
|
+
const $el = $(el);
|
|
60
|
+
const tagName = el.tagName?.toLowerCase();
|
|
61
|
+
if (preserveTags.has(tagName))
|
|
62
|
+
return;
|
|
63
|
+
// Remove if empty (no children and no text)
|
|
64
|
+
if (!$el.children().length && !$el.text().trim()) {
|
|
65
|
+
$el.remove();
|
|
66
|
+
}
|
|
67
|
+
});
|
|
68
|
+
// Remove data attributes (reduce noise, but keep data-language for code blocks)
|
|
69
|
+
$('*').each((_, el) => {
|
|
70
|
+
if (el.type !== 'tag')
|
|
71
|
+
return;
|
|
72
|
+
const attribs = el.attribs || {};
|
|
73
|
+
Object.keys(attribs).forEach(attr => {
|
|
74
|
+
if (attr.startsWith('data-') && attr !== 'data-language' && attr !== 'data-lang') {
|
|
75
|
+
$(el).removeAttr(attr);
|
|
76
|
+
}
|
|
77
|
+
});
|
|
78
|
+
});
|
|
79
|
+
// Remove inline event handlers
|
|
80
|
+
$('*').each((_, el) => {
|
|
81
|
+
if (el.type !== 'tag')
|
|
82
|
+
return;
|
|
83
|
+
const attribs = el.attribs || {};
|
|
84
|
+
Object.keys(attribs).forEach(attr => {
|
|
85
|
+
if (attr.startsWith('on')) {
|
|
86
|
+
$(el).removeAttr(attr);
|
|
87
|
+
}
|
|
88
|
+
});
|
|
89
|
+
});
|
|
90
|
+
// Remove style attributes
|
|
91
|
+
$('[style]').removeAttr('style');
|
|
92
|
+
// Remove class attributes (optional - can help reduce noise)
|
|
93
|
+
// $('[class]').removeAttr('class');
|
|
94
|
+
}
|
|
95
|
+
/**
|
|
96
|
+
* Extract the main content area if identifiable
|
|
97
|
+
*/
|
|
98
|
+
export function findMainContent($) {
|
|
99
|
+
// Priority order for main content
|
|
100
|
+
const selectors = [
|
|
101
|
+
'main',
|
|
102
|
+
'article',
|
|
103
|
+
'[role="main"]',
|
|
104
|
+
'#main-content',
|
|
105
|
+
'#content',
|
|
106
|
+
'.main-content',
|
|
107
|
+
'.content',
|
|
108
|
+
'.post-content',
|
|
109
|
+
'.article-content',
|
|
110
|
+
'.entry-content',
|
|
111
|
+
'.post-body',
|
|
112
|
+
'.article-body',
|
|
113
|
+
];
|
|
114
|
+
for (const selector of selectors) {
|
|
115
|
+
const $el = $(selector).first();
|
|
116
|
+
if ($el.length && $el.text().trim().length > 200) {
|
|
117
|
+
return $el.html() || null;
|
|
118
|
+
}
|
|
119
|
+
}
|
|
120
|
+
return null;
|
|
121
|
+
}
|
|
122
|
+
//# sourceMappingURL=cleaner.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"cleaner.js","sourceRoot":"","sources":["../../../src/extractor/cleaner.ts"],"names":[],"mappings":"AAAA;;GAEG;AAIH,uCAAuC;AACvC,MAAM,gBAAgB,GAAG;IACvB,qBAAqB;IACrB,QAAQ,EAAE,OAAO,EAAE,UAAU,EAAE,QAAQ,EAAE,KAAK,EAAE,QAAQ;IAExD,wBAAwB;IACxB,KAAK,EAAE,QAAQ,EAAE,QAAQ,EAAE,OAAO;IAClC,iBAAiB,EAAE,qBAAqB,EAAE,sBAAsB;IAChE,wBAAwB,EAAE,eAAe,EAAE,kBAAkB;IAE7D,8CAA8C;IAC9C,MAAM,EAAE,SAAS,EAAE,aAAa,EAAE,SAAS,EAAE,SAAS,EAAE,UAAU;IAClE,OAAO,EAAE,UAAU,EAAE,aAAa,EAAE,cAAc,EAAE,cAAc;IAClE,cAAc,EAAE,cAAc,EAAE,WAAW;IAE3C,mBAAmB;IACnB,KAAK,EAAE,MAAM,EAAE,gBAAgB,EAAE,SAAS,EAAE,YAAY;IACxD,gBAAgB,EAAE,iBAAiB,EAAE,oBAAoB;IACzD,WAAW,EAAE,YAAY;IAEzB,sBAAsB;IACtB,QAAQ,EAAE,QAAQ,EAAE,UAAU,EAAE,WAAW,EAAE,SAAS;IACtD,gBAAgB,EAAE,iBAAiB,EAAE,gBAAgB;IACrD,OAAO,EAAE,iBAAiB;IAE1B,qBAAqB;IACrB,eAAe,EAAE,gBAAgB,EAAE,cAAc,EAAE,eAAe;IAClE,YAAY,EAAE,eAAe;IAE7B,WAAW;IACX,WAAW,EAAE,kBAAkB,EAAE,eAAe,EAAE,WAAW,EAAE,SAAS;IACxE,UAAU,EAAE,aAAa;IAEzB,8BAA8B;IAC9B,gBAAgB,EAAE,mBAAmB,EAAE,cAAc,EAAE,cAAc;IACrE,YAAY,EAAE,eAAe,EAAE,YAAY;IAE3C,+BAA+B;IAC/B,aAAa,EAAE,YAAY,EAAE,eAAe,EAAE,cAAc;IAC5D,eAAe,EAAE,eAAe;IAEhC,6BAA6B;IAC7B,aAAa,EAAE,cAAc,EAAE,aAAa,EAAE,QAAQ;IAEtD,qCAAqC;IACrC,aAAa,EAAE,aAAa,EAAE,eAAe;IAE7C,kBAAkB;IAClB,sBAAsB,EAAE,UAAU,EAAE,SAAS,EAAE,kBAAkB;IACjE,qBAAqB,EAAE,UAAU;IAEjC,wBAAwB;IACxB,2BAA2B;IAE3B,sBAAsB;IACtB,aAAa,EAAE,YAAY;CAC5B,CAAC;AAEF;;GAEG;AACH,MAAM,UAAU,SAAS,CAAC,CAAa;IACrC,2BAA2B;IAC3B,CAAC,CAAC,gBAAgB,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC,MAAM,EAAE,CAAC;IAExC,yDAAyD;IACzD,MAAM,YAAY,GAAG,IAAI,GAAG,CAAC,CAAC,IAAI,EAAE,IAAI,EAAE,KAAK,EAAE,OAAO,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,KAAK,EAAE,OAAO,EAAE,QAAQ,EAAE,OAAO,EAAE,KAAK,CAAC,CAAC,CAAC;IAErI,CAAC,CAAC,GAAG,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,EAAE,EAAE;QACpB,IAAI,EAAE,CAAC,IAAI,KAAK,KAAK;YAAE,OAAO;QAC9B,MAAM,GAAG,GAAG,CAAC,CAAC,EAAE,CAAC,CAAC;QAClB,MAAM,OAAO,GAAG,EAAE,CAAC,OAAO,EAAE,WAAW,EAAE,CAAC;QAE1C,IAAI,YAAY,CAAC,GAAG,CAAC,OAAO,CAAC;YAAE,OAAO;QAEtC,4CAA4C;QAC5C,IAAI,CAAC,GAAG,CAAC,QAAQ,EAAE,CAAC,MAAM,IAAI,CAAC,GAAG,CAAC,IAAI,EAAE,CAAC,IAAI,EAAE,EAAE,CAAC;YACjD,GAAG,CAAC,MAAM,EAAE,CAAC;QACf,CAAC;IACH,CAAC,CAAC,CAAC;IAEH,gFAAgF;IAChF,CAAC,CAAC,GAAG,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,EAAE,EAAE;QACpB,IAAI,EAAE,CAAC,IAAI,KAAK,KAAK;YAAE,OAAO;QAC9B,MAAM,OAAO,GAAG,EAAE,CAAC,OAAO,IAAI,EAAE,CAAC;QACjC,MAAM,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC,OAAO,CAAC,IAAI,CAAC,EAAE;YAClC,IAAI,IAAI,CAAC,UAAU,CAAC,OAAO,CAAC,IAAI,IAAI,KAAK,eAAe,IAAI,IAAI,KAAK,WAAW,EAAE,CAAC;gBACjF,CAAC,CAAC,EAAE,CAAC,CAAC,UAAU,CAAC,IAAI,CAAC,CAAC;YACzB,CAAC;QACH,CAAC,CAAC,CAAC;IACL,CAAC,CAAC,CAAC;IAEH,+BAA+B;IAC/B,CAAC,CAAC,GAAG,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,EAAE,EAAE;QACpB,IAAI,EAAE,CAAC,IAAI,KAAK,KAAK;YAAE,OAAO;QAC9B,MAAM,OAAO,GAAG,EAAE,CAAC,OAAO,IAAI,EAAE,CAAC;QACjC,MAAM,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC,OAAO,CAAC,IAAI,CAAC,EAAE;YAClC,IAAI,IAAI,CAAC,UAAU,CAAC,IAAI,CAAC,EAAE,CAAC;gBAC1B,CAAC,CAAC,EAAE,CAAC,CAAC,UAAU,CAAC,IAAI,CAAC,CAAC;YACzB,CAAC;QACH,CAAC,CAAC,CAAC;IACL,CAAC,CAAC,CAAC;IAEH,0BAA0B;IAC1B,CAAC,CAAC,SAAS,CAAC,CAAC,UAAU,CAAC,OAAO,CAAC,CAAC;IAEjC,6DAA6D;IAC7D,oCAAoC;AACtC,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,eAAe,CAAC,CAAa;IAC3C,kCAAkC;IAClC,MAAM,SAAS,GAAG;QAChB,MAAM;QACN,SAAS;QACT,eAAe;QACf,eAAe;QACf,UAAU;QACV,eAAe;QACf,UAAU;QACV,eAAe;QACf,kBAAkB;QAClB,gBAAgB;QAChB,YAAY;QACZ,eAAe;KAChB,CAAC;IAEF,KAAK,MAAM,QAAQ,IAAI,SAAS,EAAE,CAAC;QACjC,MAAM,GAAG,GAAG,CAAC,CAAC,QAAQ,CAAC,CAAC,KAAK,EAAE,CAAC;QAChC,IAAI,GAAG,CAAC,MAAM,IAAI,GAAG,CAAC,IAAI,EAAE,CAAC,IAAI,EAAE,CAAC,MAAM,GAAG,GAAG,EAAE,CAAC;YACjD,OAAO,GAAG,CAAC,IAAI,EAAE,IAAI,IAAI,CAAC;QAC5B,CAAC;IACH,CAAC;IAED,OAAO,IAAI,CAAC;AACd,CAAC"}
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Content extraction pipeline
|
|
3
|
+
* Uses Readability for main content extraction + node-html-markdown for conversion
|
|
4
|
+
*/
|
|
5
|
+
export interface ExtractResult {
|
|
6
|
+
title: string;
|
|
7
|
+
markdown: string;
|
|
8
|
+
description: string;
|
|
9
|
+
author: string | null;
|
|
10
|
+
publishedDate: string | null;
|
|
11
|
+
links: string[];
|
|
12
|
+
wordCount: number;
|
|
13
|
+
byteSize: number;
|
|
14
|
+
}
|
|
15
|
+
export interface ExtractOptions {
|
|
16
|
+
includeLinks?: boolean;
|
|
17
|
+
maxContentLength?: number;
|
|
18
|
+
}
|
|
19
|
+
export declare class Extractor {
|
|
20
|
+
extract(html: string, url: string, options?: ExtractOptions): ExtractResult;
|
|
21
|
+
private extractTitle;
|
|
22
|
+
private extractDescription;
|
|
23
|
+
private extractAuthor;
|
|
24
|
+
private extractDate;
|
|
25
|
+
}
|
|
26
|
+
export { cleanHtml, findMainContent } from './cleaner.js';
|
|
27
|
+
export { extractLinks, extractInternalLinks, extractLinksWithMeta } from './links.js';
|
|
28
|
+
export { htmlToMarkdown, markdownToPlainText } from './markdown.js';
|
|
29
|
+
//# sourceMappingURL=index.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/extractor/index.ts"],"names":[],"mappings":"AAAA;;;GAGG;AASH,MAAM,WAAW,aAAa;IAC5B,KAAK,EAAE,MAAM,CAAC;IACd,QAAQ,EAAE,MAAM,CAAC;IACjB,WAAW,EAAE,MAAM,CAAC;IACpB,MAAM,EAAE,MAAM,GAAG,IAAI,CAAC;IACtB,aAAa,EAAE,MAAM,GAAG,IAAI,CAAC;IAC7B,KAAK,EAAE,MAAM,EAAE,CAAC;IAChB,SAAS,EAAE,MAAM,CAAC;IAClB,QAAQ,EAAE,MAAM,CAAC;CAClB;AAED,MAAM,WAAW,cAAc;IAC7B,YAAY,CAAC,EAAE,OAAO,CAAC;IACvB,gBAAgB,CAAC,EAAE,MAAM,CAAC;CAC3B;AAOD,qBAAa,SAAS;IACpB,OAAO,CAAC,IAAI,EAAE,MAAM,EAAE,GAAG,EAAE,MAAM,EAAE,OAAO,GAAE,cAAmB,GAAG,aAAa;IAkE/E,OAAO,CAAC,YAAY;IAkCpB,OAAO,CAAC,kBAAkB;IAgB1B,OAAO,CAAC,aAAa;IAmBrB,OAAO,CAAC,WAAW;CAuBpB;AAED,OAAO,EAAE,SAAS,EAAE,eAAe,EAAE,MAAM,cAAc,CAAC;AAC1D,OAAO,EAAE,YAAY,EAAE,oBAAoB,EAAE,oBAAoB,EAAE,MAAM,YAAY,CAAC;AACtF,OAAO,EAAE,cAAc,EAAE,mBAAmB,EAAE,MAAM,eAAe,CAAC"}
|
|
@@ -0,0 +1,162 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Content extraction pipeline
|
|
3
|
+
* Uses Readability for main content extraction + node-html-markdown for conversion
|
|
4
|
+
*/
|
|
5
|
+
import { Readability } from '@mozilla/readability';
|
|
6
|
+
import { JSDOM } from 'jsdom';
|
|
7
|
+
import * as cheerio from 'cheerio';
|
|
8
|
+
import { cleanHtml, findMainContent } from './cleaner.js';
|
|
9
|
+
import { extractLinks } from './links.js';
|
|
10
|
+
import { htmlToMarkdown } from './markdown.js';
|
|
11
|
+
const DEFAULT_OPTIONS = {
|
|
12
|
+
includeLinks: true,
|
|
13
|
+
maxContentLength: 500000, // 500KB max content
|
|
14
|
+
};
|
|
15
|
+
export class Extractor {
|
|
16
|
+
extract(html, url, options = {}) {
|
|
17
|
+
const opts = { ...DEFAULT_OPTIONS, ...options };
|
|
18
|
+
const $ = cheerio.load(html);
|
|
19
|
+
// Extract metadata before cleaning
|
|
20
|
+
const title = this.extractTitle($, url);
|
|
21
|
+
const description = this.extractDescription($);
|
|
22
|
+
const author = this.extractAuthor($);
|
|
23
|
+
const publishedDate = this.extractDate($);
|
|
24
|
+
const links = opts.includeLinks ? extractLinks($, url) : [];
|
|
25
|
+
// Clean HTML
|
|
26
|
+
cleanHtml($);
|
|
27
|
+
// Try Readability first for article extraction
|
|
28
|
+
let markdown;
|
|
29
|
+
try {
|
|
30
|
+
const cleanedHtml = $.html();
|
|
31
|
+
const dom = new JSDOM(cleanedHtml, { url });
|
|
32
|
+
const reader = new Readability(dom.window.document, {
|
|
33
|
+
charThreshold: 50,
|
|
34
|
+
});
|
|
35
|
+
const article = reader.parse();
|
|
36
|
+
if (article?.content && article.textContent && article.textContent.length > 100) {
|
|
37
|
+
// Use node-html-markdown (faster than Turndown)
|
|
38
|
+
markdown = htmlToMarkdown(article.content);
|
|
39
|
+
// Use Readability's title if better
|
|
40
|
+
if (article.title && article.title.length > title.length) {
|
|
41
|
+
// title = article.title; // Uncomment if preferred
|
|
42
|
+
}
|
|
43
|
+
}
|
|
44
|
+
else {
|
|
45
|
+
// Fallback: try to find main content area
|
|
46
|
+
const mainContent = findMainContent($);
|
|
47
|
+
if (mainContent) {
|
|
48
|
+
markdown = htmlToMarkdown(mainContent);
|
|
49
|
+
}
|
|
50
|
+
else {
|
|
51
|
+
// Last resort: convert body
|
|
52
|
+
markdown = htmlToMarkdown($('body').html() || cleanedHtml);
|
|
53
|
+
}
|
|
54
|
+
}
|
|
55
|
+
}
|
|
56
|
+
catch {
|
|
57
|
+
// Fallback if Readability fails
|
|
58
|
+
markdown = htmlToMarkdown($.html());
|
|
59
|
+
}
|
|
60
|
+
// Truncate if too long
|
|
61
|
+
if (opts.maxContentLength && markdown.length > opts.maxContentLength) {
|
|
62
|
+
markdown = markdown.slice(0, opts.maxContentLength) + '\n\n[Content truncated...]';
|
|
63
|
+
}
|
|
64
|
+
const wordCount = markdown.split(/\s+/).filter(Boolean).length;
|
|
65
|
+
return {
|
|
66
|
+
title,
|
|
67
|
+
markdown,
|
|
68
|
+
description,
|
|
69
|
+
author,
|
|
70
|
+
publishedDate,
|
|
71
|
+
links,
|
|
72
|
+
wordCount,
|
|
73
|
+
byteSize: Buffer.byteLength(markdown, 'utf8'),
|
|
74
|
+
};
|
|
75
|
+
}
|
|
76
|
+
extractTitle($, url) {
|
|
77
|
+
const sources = [
|
|
78
|
+
$('meta[property="og:title"]').attr('content'),
|
|
79
|
+
$('meta[name="twitter:title"]').attr('content'),
|
|
80
|
+
$('title').text(),
|
|
81
|
+
$('h1').first().text(),
|
|
82
|
+
];
|
|
83
|
+
for (const source of sources) {
|
|
84
|
+
if (source?.trim()) {
|
|
85
|
+
// Remove site name suffix (e.g., "Page Title | Site Name")
|
|
86
|
+
const cleaned = source.split(/\s*[|\-–—]\s*/)[0].trim();
|
|
87
|
+
if (cleaned.length > 0) {
|
|
88
|
+
return cleaned;
|
|
89
|
+
}
|
|
90
|
+
}
|
|
91
|
+
}
|
|
92
|
+
// Fallback to URL path
|
|
93
|
+
try {
|
|
94
|
+
const pathname = new URL(url).pathname;
|
|
95
|
+
if (pathname && pathname !== '/') {
|
|
96
|
+
return pathname
|
|
97
|
+
.replace(/\/$/, '')
|
|
98
|
+
.split('/')
|
|
99
|
+
.pop()
|
|
100
|
+
.replace(/[-_]/g, ' ')
|
|
101
|
+
.replace(/\.\w+$/, '');
|
|
102
|
+
}
|
|
103
|
+
}
|
|
104
|
+
catch { }
|
|
105
|
+
return url;
|
|
106
|
+
}
|
|
107
|
+
extractDescription($) {
|
|
108
|
+
const sources = [
|
|
109
|
+
$('meta[property="og:description"]').attr('content'),
|
|
110
|
+
$('meta[name="description"]').attr('content'),
|
|
111
|
+
$('meta[name="twitter:description"]').attr('content'),
|
|
112
|
+
];
|
|
113
|
+
for (const source of sources) {
|
|
114
|
+
if (source?.trim()) {
|
|
115
|
+
return source.slice(0, 300);
|
|
116
|
+
}
|
|
117
|
+
}
|
|
118
|
+
return '';
|
|
119
|
+
}
|
|
120
|
+
extractAuthor($) {
|
|
121
|
+
const sources = [
|
|
122
|
+
$('meta[name="author"]').attr('content'),
|
|
123
|
+
$('meta[property="article:author"]').attr('content'),
|
|
124
|
+
$('[rel="author"]').first().text(),
|
|
125
|
+
$('[itemprop="author"]').first().text(),
|
|
126
|
+
$('.author').first().text(),
|
|
127
|
+
$('[class*="author-name"]').first().text(),
|
|
128
|
+
];
|
|
129
|
+
for (const source of sources) {
|
|
130
|
+
if (source?.trim()) {
|
|
131
|
+
return source.trim().slice(0, 100);
|
|
132
|
+
}
|
|
133
|
+
}
|
|
134
|
+
return null;
|
|
135
|
+
}
|
|
136
|
+
extractDate($) {
|
|
137
|
+
const sources = [
|
|
138
|
+
$('meta[property="article:published_time"]').attr('content'),
|
|
139
|
+
$('meta[name="date"]').attr('content'),
|
|
140
|
+
$('meta[name="publish-date"]').attr('content'),
|
|
141
|
+
$('time[datetime]').attr('datetime'),
|
|
142
|
+
$('[itemprop="datePublished"]').attr('content'),
|
|
143
|
+
$('[itemprop="datePublished"]').attr('datetime'),
|
|
144
|
+
];
|
|
145
|
+
for (const source of sources) {
|
|
146
|
+
if (source?.trim()) {
|
|
147
|
+
try {
|
|
148
|
+
const date = new Date(source);
|
|
149
|
+
if (!isNaN(date.getTime())) {
|
|
150
|
+
return date.toISOString();
|
|
151
|
+
}
|
|
152
|
+
}
|
|
153
|
+
catch { }
|
|
154
|
+
}
|
|
155
|
+
}
|
|
156
|
+
return null;
|
|
157
|
+
}
|
|
158
|
+
}
|
|
159
|
+
export { cleanHtml, findMainContent } from './cleaner.js';
|
|
160
|
+
export { extractLinks, extractInternalLinks, extractLinksWithMeta } from './links.js';
|
|
161
|
+
export { htmlToMarkdown, markdownToPlainText } from './markdown.js';
|
|
162
|
+
//# sourceMappingURL=index.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../../src/extractor/index.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAEH,OAAO,EAAE,WAAW,EAAE,MAAM,sBAAsB,CAAC;AACnD,OAAO,EAAE,KAAK,EAAE,MAAM,OAAO,CAAC;AAC9B,OAAO,KAAK,OAAO,MAAM,SAAS,CAAC;AACnC,OAAO,EAAE,SAAS,EAAE,eAAe,EAAE,MAAM,cAAc,CAAC;AAC1D,OAAO,EAAE,YAAY,EAAE,MAAM,YAAY,CAAC;AAC1C,OAAO,EAAE,cAAc,EAAE,MAAM,eAAe,CAAC;AAkB/C,MAAM,eAAe,GAAmB;IACtC,YAAY,EAAE,IAAI;IAClB,gBAAgB,EAAE,MAAM,EAAE,oBAAoB;CAC/C,CAAC;AAEF,MAAM,OAAO,SAAS;IACpB,OAAO,CAAC,IAAY,EAAE,GAAW,EAAE,UAA0B,EAAE;QAC7D,MAAM,IAAI,GAAG,EAAE,GAAG,eAAe,EAAE,GAAG,OAAO,EAAE,CAAC;QAChD,MAAM,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QAE7B,mCAAmC;QACnC,MAAM,KAAK,GAAG,IAAI,CAAC,YAAY,CAAC,CAAC,EAAE,GAAG,CAAC,CAAC;QACxC,MAAM,WAAW,GAAG,IAAI,CAAC,kBAAkB,CAAC,CAAC,CAAC,CAAC;QAC/C,MAAM,MAAM,GAAG,IAAI,CAAC,aAAa,CAAC,CAAC,CAAC,CAAC;QACrC,MAAM,aAAa,GAAG,IAAI,CAAC,WAAW,CAAC,CAAC,CAAC,CAAC;QAC1C,MAAM,KAAK,GAAG,IAAI,CAAC,YAAY,CAAC,CAAC,CAAC,YAAY,CAAC,CAAC,EAAE,GAAG,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;QAE5D,aAAa;QACb,SAAS,CAAC,CAAC,CAAC,CAAC;QAEb,+CAA+C;QAC/C,IAAI,QAAgB,CAAC;QACrB,IAAI,CAAC;YACH,MAAM,WAAW,GAAG,CAAC,CAAC,IAAI,EAAE,CAAC;YAC7B,MAAM,GAAG,GAAG,IAAI,KAAK,CAAC,WAAW,EAAE,EAAE,GAAG,EAAE,CAAC,CAAC;YAC5C,MAAM,MAAM,GAAG,IAAI,WAAW,CAAC,GAAG,CAAC,MAAM,CAAC,QAAQ,EAAE;gBAClD,aAAa,EAAE,EAAE;aAClB,CAAC,CAAC;YACH,MAAM,OAAO,GAAG,MAAM,CAAC,KAAK,EAAE,CAAC;YAE/B,IAAI,OAAO,EAAE,OAAO,IAAI,OAAO,CAAC,WAAW,IAAI,OAAO,CAAC,WAAW,CAAC,MAAM,GAAG,GAAG,EAAE,CAAC;gBAChF,gDAAgD;gBAChD,QAAQ,GAAG,cAAc,CAAC,OAAO,CAAC,OAAO,CAAC,CAAC;gBAE3C,oCAAoC;gBACpC,IAAI,OAAO,CAAC,KAAK,IAAI,OAAO,CAAC,KAAK,CAAC,MAAM,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC;oBACzD,mDAAmD;gBACrD,CAAC;YACH,CAAC;iBAAM,CAAC;gBACN,0CAA0C;gBAC1C,MAAM,WAAW,GAAG,eAAe,CAAC,CAAC,CAAC,CAAC;gBACvC,IAAI,WAAW,EAAE,CAAC;oBAChB,QAAQ,GAAG,cAAc,CAAC,WAAW,CAAC,CAAC;gBACzC,CAAC;qBAAM,CAAC;oBACN,4BAA4B;oBAC5B,QAAQ,GAAG,cAAc,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,IAAI,EAAE,IAAI,WAAW,CAAC,CAAC;gBAC7D,CAAC;YACH,CAAC;QACH,CAAC;QAAC,MAAM,CAAC;YACP,gCAAgC;YAChC,QAAQ,GAAG,cAAc,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC;QACtC,CAAC;QAED,uBAAuB;QACvB,IAAI,IAAI,CAAC,gBAAgB,IAAI,QAAQ,CAAC,MAAM,GAAG,IAAI,CAAC,gBAAgB,EAAE,CAAC;YACrE,QAAQ,GAAG,QAAQ,CAAC,KAAK,CAAC,CAAC,EAAE,IAAI,CAAC,gBAAgB,CAAC,GAAG,4BAA4B,CAAC;QACrF,CAAC;QAED,MAAM,SAAS,GAAG,QAAQ,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,MAAM,CAAC;QAE/D,OAAO;YACL,KAAK;YACL,QAAQ;YACR,WAAW;YACX,MAAM;YACN,aAAa;YACb,KAAK;YACL,SAAS;YACT,QAAQ,EAAE,MAAM,CAAC,UAAU,CAAC,QAAQ,EAAE,MAAM,CAAC;SAC9C,CAAC;IACJ,CAAC;IAEO,YAAY,CAAC,CAAqB,EAAE,GAAW;QACrD,MAAM,OAAO,GAAG;YACd,CAAC,CAAC,2BAA2B,CAAC,CAAC,IAAI,CAAC,SAAS,CAAC;YAC9C,CAAC,CAAC,4BAA4B,CAAC,CAAC,IAAI,CAAC,SAAS,CAAC;YAC/C,CAAC,CAAC,OAAO,CAAC,CAAC,IAAI,EAAE;YACjB,CAAC,CAAC,IAAI,CAAC,CAAC,KAAK,EAAE,CAAC,IAAI,EAAE;SACvB,CAAC;QAEF,KAAK,MAAM,MAAM,IAAI,OAAO,EAAE,CAAC;YAC7B,IAAI,MAAM,EAAE,IAAI,EAAE,EAAE,CAAC;gBACnB,2DAA2D;gBAC3D,MAAM,OAAO,GAAG,MAAM,CAAC,KAAK,CAAC,eAAe,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;gBACxD,IAAI,OAAO,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;oBACvB,OAAO,OAAO,CAAC;gBACjB,CAAC;YACH,CAAC;QACH,CAAC;QAED,uBAAuB;QACvB,IAAI,CAAC;YACH,MAAM,QAAQ,GAAG,IAAI,GAAG,CAAC,GAAG,CAAC,CAAC,QAAQ,CAAC;YACvC,IAAI,QAAQ,IAAI,QAAQ,KAAK,GAAG,EAAE,CAAC;gBACjC,OAAO,QAAQ;qBACZ,OAAO,CAAC,KAAK,EAAE,EAAE,CAAC;qBAClB,KAAK,CAAC,GAAG,CAAC;qBACV,GAAG,EAAG;qBACN,OAAO,CAAC,OAAO,EAAE,GAAG,CAAC;qBACrB,OAAO,CAAC,QAAQ,EAAE,EAAE,CAAC,CAAC;YAC3B,CAAC;QACH,CAAC;QAAC,MAAM,CAAC,CAAA,CAAC;QAEV,OAAO,GAAG,CAAC;IACb,CAAC;IAEO,kBAAkB,CAAC,CAAqB;QAC9C,MAAM,OAAO,GAAG;YACd,CAAC,CAAC,iCAAiC,CAAC,CAAC,IAAI,CAAC,SAAS,CAAC;YACpD,CAAC,CAAC,0BAA0B,CAAC,CAAC,IAAI,CAAC,SAAS,CAAC;YAC7C,CAAC,CAAC,kCAAkC,CAAC,CAAC,IAAI,CAAC,SAAS,CAAC;SACtD,CAAC;QAEF,KAAK,MAAM,MAAM,IAAI,OAAO,EAAE,CAAC;YAC7B,IAAI,MAAM,EAAE,IAAI,EAAE,EAAE,CAAC;gBACnB,OAAO,MAAM,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC,CAAC;YAC9B,CAAC;QACH,CAAC;QAED,OAAO,EAAE,CAAC;IACZ,CAAC;IAEO,aAAa,CAAC,CAAqB;QACzC,MAAM,OAAO,GAAG;YACd,CAAC,CAAC,qBAAqB,CAAC,CAAC,IAAI,CAAC,SAAS,CAAC;YACxC,CAAC,CAAC,iCAAiC,CAAC,CAAC,IAAI,CAAC,SAAS,CAAC;YACpD,CAAC,CAAC,gBAAgB,CAAC,CAAC,KAAK,EAAE,CAAC,IAAI,EAAE;YAClC,CAAC,CAAC,qBAAqB,CAAC,CAAC,KAAK,EAAE,CAAC,IAAI,EAAE;YACvC,CAAC,CAAC,SAAS,CAAC,CAAC,KAAK,EAAE,CAAC,IAAI,EAAE;YAC3B,CAAC,CAAC,wBAAwB,CAAC,CAAC,KAAK,EAAE,CAAC,IAAI,EAAE;SAC3C,CAAC;QAEF,KAAK,MAAM,MAAM,IAAI,OAAO,EAAE,CAAC;YAC7B,IAAI,MAAM,EAAE,IAAI,EAAE,EAAE,CAAC;gBACnB,OAAO,MAAM,CAAC,IAAI,EAAE,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC,CAAC;YACrC,CAAC;QACH,CAAC;QAED,OAAO,IAAI,CAAC;IACd,CAAC;IAEO,WAAW,CAAC,CAAqB;QACvC,MAAM,OAAO,GAAG;YACd,CAAC,CAAC,yCAAyC,CAAC,CAAC,IAAI,CAAC,SAAS,CAAC;YAC5D,CAAC,CAAC,mBAAmB,CAAC,CAAC,IAAI,CAAC,SAAS,CAAC;YACtC,CAAC,CAAC,2BAA2B,CAAC,CAAC,IAAI,CAAC,SAAS,CAAC;YAC9C,CAAC,CAAC,gBAAgB,CAAC,CAAC,IAAI,CAAC,UAAU,CAAC;YACpC,CAAC,CAAC,4BAA4B,CAAC,CAAC,IAAI,CAAC,SAAS,CAAC;YAC/C,CAAC,CAAC,4BAA4B,CAAC,CAAC,IAAI,CAAC,UAAU,CAAC;SACjD,CAAC;QAEF,KAAK,MAAM,MAAM,IAAI,OAAO,EAAE,CAAC;YAC7B,IAAI,MAAM,EAAE,IAAI,EAAE,EAAE,CAAC;gBACnB,IAAI,CAAC;oBACH,MAAM,IAAI,GAAG,IAAI,IAAI,CAAC,MAAM,CAAC,CAAC;oBAC9B,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,OAAO,EAAE,CAAC,EAAE,CAAC;wBAC3B,OAAO,IAAI,CAAC,WAAW,EAAE,CAAC;oBAC5B,CAAC;gBACH,CAAC;gBAAC,MAAM,CAAC,CAAA,CAAC;YACZ,CAAC;QACH,CAAC;QAED,OAAO,IAAI,CAAC;IACd,CAAC;CACF;AAED,OAAO,EAAE,SAAS,EAAE,eAAe,EAAE,MAAM,cAAc,CAAC;AAC1D,OAAO,EAAE,YAAY,EAAE,oBAAoB,EAAE,oBAAoB,EAAE,MAAM,YAAY,CAAC;AACtF,OAAO,EAAE,cAAc,EAAE,mBAAmB,EAAE,MAAM,eAAe,CAAC"}
|