@memvid/maw 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +188 -0
- package/dist/bin/maw.d.ts +6 -0
- package/dist/bin/maw.d.ts.map +1 -0
- package/dist/bin/maw.js +275 -0
- package/dist/bin/maw.js.map +1 -0
- package/dist/src/crawler/index.d.ts +71 -0
- package/dist/src/crawler/index.d.ts.map +1 -0
- package/dist/src/crawler/index.js +249 -0
- package/dist/src/crawler/index.js.map +1 -0
- package/dist/src/crawler/robots.d.ts +26 -0
- package/dist/src/crawler/robots.d.ts.map +1 -0
- package/dist/src/crawler/robots.js +179 -0
- package/dist/src/crawler/robots.js.map +1 -0
- package/dist/src/crawler/sitemap.d.ts +36 -0
- package/dist/src/crawler/sitemap.d.ts.map +1 -0
- package/dist/src/crawler/sitemap.js +209 -0
- package/dist/src/crawler/sitemap.js.map +1 -0
- package/dist/src/engine/detector.d.ts +18 -0
- package/dist/src/engine/detector.d.ts.map +1 -0
- package/dist/src/engine/detector.js +155 -0
- package/dist/src/engine/detector.js.map +1 -0
- package/dist/src/engine/fetch.d.ts +18 -0
- package/dist/src/engine/fetch.d.ts.map +1 -0
- package/dist/src/engine/fetch.js +53 -0
- package/dist/src/engine/fetch.js.map +1 -0
- package/dist/src/engine/index.d.ts +39 -0
- package/dist/src/engine/index.d.ts.map +1 -0
- package/dist/src/engine/index.js +116 -0
- package/dist/src/engine/index.js.map +1 -0
- package/dist/src/engine/playwright.d.ts +23 -0
- package/dist/src/engine/playwright.d.ts.map +1 -0
- package/dist/src/engine/playwright.js +88 -0
- package/dist/src/engine/playwright.js.map +1 -0
- package/dist/src/engine/rebrowser.d.ts +22 -0
- package/dist/src/engine/rebrowser.d.ts.map +1 -0
- package/dist/src/engine/rebrowser.js +142 -0
- package/dist/src/engine/rebrowser.js.map +1 -0
- package/dist/src/extractor/cleaner.d.ts +13 -0
- package/dist/src/extractor/cleaner.d.ts.map +1 -0
- package/dist/src/extractor/cleaner.js +122 -0
- package/dist/src/extractor/cleaner.js.map +1 -0
- package/dist/src/extractor/index.d.ts +29 -0
- package/dist/src/extractor/index.d.ts.map +1 -0
- package/dist/src/extractor/index.js +162 -0
- package/dist/src/extractor/index.js.map +1 -0
- package/dist/src/extractor/links.d.ts +22 -0
- package/dist/src/extractor/links.d.ts.map +1 -0
- package/dist/src/extractor/links.js +92 -0
- package/dist/src/extractor/links.js.map +1 -0
- package/dist/src/extractor/markdown.d.ts +13 -0
- package/dist/src/extractor/markdown.d.ts.map +1 -0
- package/dist/src/extractor/markdown.js +94 -0
- package/dist/src/extractor/markdown.js.map +1 -0
- package/dist/src/git/index.d.ts +40 -0
- package/dist/src/git/index.d.ts.map +1 -0
- package/dist/src/git/index.js +303 -0
- package/dist/src/git/index.js.map +1 -0
- package/dist/src/index.d.ts +103 -0
- package/dist/src/index.d.ts.map +1 -0
- package/dist/src/index.js +229 -0
- package/dist/src/index.js.map +1 -0
- package/dist/src/ingestor/index.d.ts +95 -0
- package/dist/src/ingestor/index.d.ts.map +1 -0
- package/dist/src/ingestor/index.js +471 -0
- package/dist/src/ingestor/index.js.map +1 -0
- package/dist/src/utils/dedup.d.ts +66 -0
- package/dist/src/utils/dedup.d.ts.map +1 -0
- package/dist/src/utils/dedup.js +296 -0
- package/dist/src/utils/dedup.js.map +1 -0
- package/dist/src/utils/index.d.ts +3 -0
- package/dist/src/utils/index.d.ts.map +1 -0
- package/dist/src/utils/index.js +3 -0
- package/dist/src/utils/index.js.map +1 -0
- package/dist/src/utils/logger.d.ts +12 -0
- package/dist/src/utils/logger.d.ts.map +1 -0
- package/dist/src/utils/logger.js +49 -0
- package/dist/src/utils/logger.js.map +1 -0
- package/dist/src/utils/ui.d.ts +126 -0
- package/dist/src/utils/ui.d.ts.map +1 -0
- package/dist/src/utils/ui.js +357 -0
- package/dist/src/utils/ui.js.map +1 -0
- package/dist/src/utils/url.d.ts +21 -0
- package/dist/src/utils/url.d.ts.map +1 -0
- package/dist/src/utils/url.js +107 -0
- package/dist/src/utils/url.js.map +1 -0
- package/package.json +71 -0
|
@@ -0,0 +1,209 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Sitemap.xml parser using sitemapper
|
|
3
|
+
*/
|
|
4
|
+
import * as cheerio from 'cheerio';
|
|
5
|
+
import Sitemapper from 'sitemapper';
|
|
6
|
+
export class SitemapParser {
|
|
7
|
+
cache = new Map();
|
|
8
|
+
sitemapper;
|
|
9
|
+
constructor() {
|
|
10
|
+
this.sitemapper = new Sitemapper({
|
|
11
|
+
timeout: 10000,
|
|
12
|
+
requestHeaders: {
|
|
13
|
+
'User-Agent': 'maw/1.0 (sitemap crawler)',
|
|
14
|
+
},
|
|
15
|
+
});
|
|
16
|
+
}
|
|
17
|
+
/**
|
|
18
|
+
* Parse sitemap for a URL and return all URLs
|
|
19
|
+
*/
|
|
20
|
+
async parse(url) {
|
|
21
|
+
try {
|
|
22
|
+
const parsedUrl = new URL(url);
|
|
23
|
+
const host = parsedUrl.hostname;
|
|
24
|
+
// Check cache
|
|
25
|
+
if (this.cache.has(host)) {
|
|
26
|
+
return this.cache.get(host);
|
|
27
|
+
}
|
|
28
|
+
const urls = [];
|
|
29
|
+
// Try common sitemap locations
|
|
30
|
+
const sitemapUrls = [
|
|
31
|
+
`${parsedUrl.origin}/sitemap.xml`,
|
|
32
|
+
`${parsedUrl.origin}/sitemap_index.xml`,
|
|
33
|
+
`${parsedUrl.origin}/sitemap/sitemap.xml`,
|
|
34
|
+
];
|
|
35
|
+
for (const sitemapUrl of sitemapUrls) {
|
|
36
|
+
try {
|
|
37
|
+
const sitemapContent = await this.fetchSitemap(sitemapUrl);
|
|
38
|
+
if (sitemapContent) {
|
|
39
|
+
const parsed = await this.parseSitemapContent(sitemapContent, parsedUrl.origin);
|
|
40
|
+
urls.push(...parsed);
|
|
41
|
+
if (urls.length > 0)
|
|
42
|
+
break;
|
|
43
|
+
}
|
|
44
|
+
}
|
|
45
|
+
catch {
|
|
46
|
+
// Try next location
|
|
47
|
+
}
|
|
48
|
+
}
|
|
49
|
+
this.cache.set(host, urls);
|
|
50
|
+
return urls;
|
|
51
|
+
}
|
|
52
|
+
catch {
|
|
53
|
+
return [];
|
|
54
|
+
}
|
|
55
|
+
}
|
|
56
|
+
/**
|
|
57
|
+
* Parse sitemap from a specific URL
|
|
58
|
+
*/
|
|
59
|
+
async parseUrl(sitemapUrl) {
|
|
60
|
+
try {
|
|
61
|
+
const content = await this.fetchSitemap(sitemapUrl);
|
|
62
|
+
if (!content)
|
|
63
|
+
return [];
|
|
64
|
+
const origin = new URL(sitemapUrl).origin;
|
|
65
|
+
return this.parseSitemapContent(content, origin);
|
|
66
|
+
}
|
|
67
|
+
catch {
|
|
68
|
+
return [];
|
|
69
|
+
}
|
|
70
|
+
}
|
|
71
|
+
async fetchSitemap(url) {
|
|
72
|
+
try {
|
|
73
|
+
const response = await fetch(url, {
|
|
74
|
+
headers: {
|
|
75
|
+
'User-Agent': 'maw/1.0',
|
|
76
|
+
'Accept': 'application/xml, text/xml, */*',
|
|
77
|
+
},
|
|
78
|
+
signal: AbortSignal.timeout(5000), // 5s timeout - fail fast
|
|
79
|
+
});
|
|
80
|
+
if (!response.ok)
|
|
81
|
+
return null;
|
|
82
|
+
return await response.text();
|
|
83
|
+
}
|
|
84
|
+
catch {
|
|
85
|
+
return null;
|
|
86
|
+
}
|
|
87
|
+
}
|
|
88
|
+
async parseSitemapContent(content, origin) {
|
|
89
|
+
const urls = [];
|
|
90
|
+
const MAX_URLS = 200; // Don't parse more than we need
|
|
91
|
+
// Check if it's a sitemap index
|
|
92
|
+
if (content.includes('<sitemapindex')) {
|
|
93
|
+
const indexUrls = this.parseSitemapIndex(content);
|
|
94
|
+
// Fetch only first 2 sub-sitemaps (speed over completeness)
|
|
95
|
+
for (const indexUrl of indexUrls.slice(0, 2)) {
|
|
96
|
+
if (urls.length >= MAX_URLS)
|
|
97
|
+
break; // Early exit
|
|
98
|
+
try {
|
|
99
|
+
const subContent = await this.fetchSitemap(indexUrl);
|
|
100
|
+
if (subContent) {
|
|
101
|
+
const subUrls = this.parseUrlset(subContent);
|
|
102
|
+
urls.push(...subUrls.slice(0, MAX_URLS - urls.length));
|
|
103
|
+
}
|
|
104
|
+
}
|
|
105
|
+
catch {
|
|
106
|
+
// Skip failed sitemaps
|
|
107
|
+
}
|
|
108
|
+
}
|
|
109
|
+
}
|
|
110
|
+
else {
|
|
111
|
+
// Regular sitemap - limit to MAX_URLS
|
|
112
|
+
const parsed = this.parseUrlset(content);
|
|
113
|
+
urls.push(...parsed.slice(0, MAX_URLS));
|
|
114
|
+
}
|
|
115
|
+
return urls.slice(0, MAX_URLS);
|
|
116
|
+
}
|
|
117
|
+
parseSitemapIndex(content) {
|
|
118
|
+
const $ = cheerio.load(content, { xmlMode: true });
|
|
119
|
+
const urls = [];
|
|
120
|
+
$('sitemap loc').each((_, el) => {
|
|
121
|
+
const loc = $(el).text().trim();
|
|
122
|
+
if (loc)
|
|
123
|
+
urls.push(loc);
|
|
124
|
+
});
|
|
125
|
+
return urls;
|
|
126
|
+
}
|
|
127
|
+
parseUrlset(content) {
|
|
128
|
+
const $ = cheerio.load(content, { xmlMode: true });
|
|
129
|
+
const urls = [];
|
|
130
|
+
$('url loc').each((_, el) => {
|
|
131
|
+
const loc = $(el).text().trim();
|
|
132
|
+
if (loc)
|
|
133
|
+
urls.push(loc);
|
|
134
|
+
});
|
|
135
|
+
return urls;
|
|
136
|
+
}
|
|
137
|
+
/**
|
|
138
|
+
* Parse sitemap for a URL and return all URLs with metadata
|
|
139
|
+
* Uses sitemapper for robust parsing of sitemap indexes and news sitemaps
|
|
140
|
+
*/
|
|
141
|
+
async parseWithMetadata(url) {
|
|
142
|
+
try {
|
|
143
|
+
const parsedUrl = new URL(url);
|
|
144
|
+
const origin = parsedUrl.origin;
|
|
145
|
+
// Try news sitemaps first (more recent content)
|
|
146
|
+
const newsSitemapUrls = [
|
|
147
|
+
`${origin}/sitemap/news.xml`,
|
|
148
|
+
`${origin}/news-sitemap.xml`,
|
|
149
|
+
`${origin}/sitemap-news.xml`,
|
|
150
|
+
`${origin}/sitemaps/news.xml`,
|
|
151
|
+
];
|
|
152
|
+
// Try news sitemaps first
|
|
153
|
+
for (const sitemapUrl of newsSitemapUrls) {
|
|
154
|
+
try {
|
|
155
|
+
const content = await this.fetchSitemap(sitemapUrl);
|
|
156
|
+
if (content && content.includes('<url')) {
|
|
157
|
+
const urls = this.parseWithMeta(content);
|
|
158
|
+
if (urls.length > 0) {
|
|
159
|
+
return urls.slice(0, 1000);
|
|
160
|
+
}
|
|
161
|
+
}
|
|
162
|
+
}
|
|
163
|
+
catch {
|
|
164
|
+
// Try next
|
|
165
|
+
}
|
|
166
|
+
}
|
|
167
|
+
// Fall back to sitemapper for regular sitemaps
|
|
168
|
+
const result = await this.sitemapper.fetch(`${origin}/sitemap.xml`);
|
|
169
|
+
if (result.sites && result.sites.length > 0) {
|
|
170
|
+
return result.sites.slice(0, 1000).map(loc => ({ loc }));
|
|
171
|
+
}
|
|
172
|
+
// Try sitemap index
|
|
173
|
+
const indexResult = await this.sitemapper.fetch(`${origin}/sitemap_index.xml`);
|
|
174
|
+
if (indexResult.sites && indexResult.sites.length > 0) {
|
|
175
|
+
return indexResult.sites.slice(0, 1000).map(loc => ({ loc }));
|
|
176
|
+
}
|
|
177
|
+
return [];
|
|
178
|
+
}
|
|
179
|
+
catch {
|
|
180
|
+
return [];
|
|
181
|
+
}
|
|
182
|
+
}
|
|
183
|
+
/**
|
|
184
|
+
* Parse sitemap with full metadata
|
|
185
|
+
*/
|
|
186
|
+
parseWithMeta(content) {
|
|
187
|
+
const $ = cheerio.load(content, { xmlMode: true });
|
|
188
|
+
const urls = [];
|
|
189
|
+
$('url').each((_, el) => {
|
|
190
|
+
const $el = $(el);
|
|
191
|
+
const loc = $el.find('loc').text().trim();
|
|
192
|
+
if (loc) {
|
|
193
|
+
const entry = { loc };
|
|
194
|
+
const lastmod = $el.find('lastmod').text().trim();
|
|
195
|
+
if (lastmod)
|
|
196
|
+
entry.lastmod = lastmod;
|
|
197
|
+
const changefreq = $el.find('changefreq').text().trim();
|
|
198
|
+
if (changefreq)
|
|
199
|
+
entry.changefreq = changefreq;
|
|
200
|
+
const priority = $el.find('priority').text().trim();
|
|
201
|
+
if (priority)
|
|
202
|
+
entry.priority = parseFloat(priority);
|
|
203
|
+
urls.push(entry);
|
|
204
|
+
}
|
|
205
|
+
});
|
|
206
|
+
return urls;
|
|
207
|
+
}
|
|
208
|
+
}
|
|
209
|
+
//# sourceMappingURL=sitemap.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"sitemap.js","sourceRoot":"","sources":["../../../src/crawler/sitemap.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,KAAK,OAAO,MAAM,SAAS,CAAC;AACnC,OAAO,UAAU,MAAM,YAAY,CAAC;AASpC,MAAM,OAAO,aAAa;IAChB,KAAK,GAA0B,IAAI,GAAG,EAAE,CAAC;IACzC,UAAU,CAAa;IAE/B;QACE,IAAI,CAAC,UAAU,GAAG,IAAI,UAAU,CAAC;YAC/B,OAAO,EAAE,KAAK;YACd,cAAc,EAAE;gBACd,YAAY,EAAE,2BAA2B;aAC1C;SACF,CAAC,CAAC;IACL,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,KAAK,CAAC,GAAW;QACrB,IAAI,CAAC;YACH,MAAM,SAAS,GAAG,IAAI,GAAG,CAAC,GAAG,CAAC,CAAC;YAC/B,MAAM,IAAI,GAAG,SAAS,CAAC,QAAQ,CAAC;YAEhC,cAAc;YACd,IAAI,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,IAAI,CAAC,EAAE,CAAC;gBACzB,OAAO,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,IAAI,CAAE,CAAC;YAC/B,CAAC;YAED,MAAM,IAAI,GAAa,EAAE,CAAC;YAE1B,+BAA+B;YAC/B,MAAM,WAAW,GAAG;gBAClB,GAAG,SAAS,CAAC,MAAM,cAAc;gBACjC,GAAG,SAAS,CAAC,MAAM,oBAAoB;gBACvC,GAAG,SAAS,CAAC,MAAM,sBAAsB;aAC1C,CAAC;YAEF,KAAK,MAAM,UAAU,IAAI,WAAW,EAAE,CAAC;gBACrC,IAAI,CAAC;oBACH,MAAM,cAAc,GAAG,MAAM,IAAI,CAAC,YAAY,CAAC,UAAU,CAAC,CAAC;oBAC3D,IAAI,cAAc,EAAE,CAAC;wBACnB,MAAM,MAAM,GAAG,MAAM,IAAI,CAAC,mBAAmB,CAAC,cAAc,EAAE,SAAS,CAAC,MAAM,CAAC,CAAC;wBAChF,IAAI,CAAC,IAAI,CAAC,GAAG,MAAM,CAAC,CAAC;wBACrB,IAAI,IAAI,CAAC,MAAM,GAAG,CAAC;4BAAE,MAAM;oBAC7B,CAAC;gBACH,CAAC;gBAAC,MAAM,CAAC;oBACP,oBAAoB;gBACtB,CAAC;YACH,CAAC;YAED,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,IAAI,EAAE,IAAI,CAAC,CAAC;YAC3B,OAAO,IAAI,CAAC;QACd,CAAC;QAAC,MAAM,CAAC;YACP,OAAO,EAAE,CAAC;QACZ,CAAC;IACH,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,QAAQ,CAAC,UAAkB;QAC/B,IAAI,CAAC;YACH,MAAM,OAAO,GAAG,MAAM,IAAI,CAAC,YAAY,CAAC,UAAU,CAAC,CAAC;YACpD,IAAI,CAAC,OAAO;gBAAE,OAAO,EAAE,CAAC;YAExB,MAAM,MAAM,GAAG,IAAI,GAAG,CAAC,UAAU,CAAC,CAAC,MAAM,CAAC;YAC1C,OAAO,IAAI,CAAC,mBAAmB,CAAC,OAAO,EAAE,MAAM,CAAC,CAAC;QACnD,CAAC;QAAC,MAAM,CAAC;YACP,OAAO,EAAE,CAAC;QACZ,CAAC;IACH,CAAC;IAEO,KAAK,CAAC,YAAY,CAAC,GAAW;QACpC,IAAI,CAAC;YACH,MAAM,QAAQ,GAAG,MAAM,KAAK,CAAC,GAAG,EAAE;gBAChC,OAAO,EAAE;oBACP,YAAY,EAAE,SAAS;oBACvB,QAAQ,EAAE,gCAAgC;iBAC3C;gBACD,MAAM,EAAE,WAAW,CAAC,OAAO,CAAC,IAAI,CAAC,EAAE,yBAAyB;aAC7D,CAAC,CAAC;YAEH,IAAI,CAAC,QAAQ,CAAC,EAAE;gBAAE,OAAO,IAAI,CAAC;YAE9B,OAAO,MAAM,QAAQ,CAAC,IAAI,EAAE,CAAC;QAC/B,CAAC;QAAC,MAAM,CAAC;YACP,OAAO,IAAI,CAAC;QACd,CAAC;IACH,CAAC;IAEO,KAAK,CAAC,mBAAmB,CAAC,OAAe,EAAE,MAAc;QAC/D,MAAM,IAAI,GAAa,EAAE,CAAC;QAC1B,MAAM,QAAQ,GAAG,GAAG,CAAC,CAAC,gCAAgC;QAEtD,gCAAgC;QAChC,IAAI,OAAO,CAAC,QAAQ,CAAC,eAAe,CAAC,EAAE,CAAC;YACtC,MAAM,SAAS,GAAG,IAAI,CAAC,iBAAiB,CAAC,OAAO,CAAC,CAAC;YAElD,4DAA4D;YAC5D,KAAK,MAAM,QAAQ,IAAI,SAAS,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,EAAE,CAAC;gBAC7C,IAAI,IAAI,CAAC,MAAM,IAAI,QAAQ;oBAAE,MAAM,CAAC,aAAa;gBACjD,IAAI,CAAC;oBACH,MAAM,UAAU,GAAG,MAAM,IAAI,CAAC,YAAY,CAAC,QAAQ,CAAC,CAAC;oBACrD,IAAI,UAAU,EAAE,CAAC;wBACf,MAAM,OAAO,GAAG,IAAI,CAAC,WAAW,CAAC,UAAU,CAAC,CAAC;wBAC7C,IAAI,CAAC,IAAI,CAAC,GAAG,OAAO,CAAC,KAAK,CAAC,CAAC,EAAE,QAAQ,GAAG,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC;oBACzD,CAAC;gBACH,CAAC;gBAAC,MAAM,CAAC;oBACP,uBAAuB;gBACzB,CAAC;YACH,CAAC;QACH,CAAC;aAAM,CAAC;YACN,sCAAsC;YACtC,MAAM,MAAM,GAAG,IAAI,CAAC,WAAW,CAAC,OAAO,CAAC,CAAC;YACzC,IAAI,CAAC,IAAI,CAAC,GAAG,MAAM,CAAC,KAAK,CAAC,CAAC,EAAE,QAAQ,CAAC,CAAC,CAAC;QAC1C,CAAC;QAED,OAAO,IAAI,CAAC,KAAK,CAAC,CAAC,EAAE,QAAQ,CAAC,CAAC;IACjC,CAAC;IAEO,iBAAiB,CAAC,OAAe;QACvC,MAAM,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC,OAAO,EAAE,EAAE,OAAO,EAAE,IAAI,EAAE,CAAC,CAAC;QACnD,MAAM,IAAI,GAAa,EAAE,CAAC;QAE1B,CAAC,CAAC,aAAa,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,EAAE,EAAE;YAC9B,MAAM,GAAG,GAAG,CAAC,CAAC,EAAE,CAAC,CAAC,IAAI,EAAE,CAAC,IAAI,EAAE,CAAC;YAChC,IAAI,GAAG;gBAAE,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;QAC1B,CAAC,CAAC,CAAC;QAEH,OAAO,IAAI,CAAC;IACd,CAAC;IAEO,WAAW,CAAC,OAAe;QACjC,MAAM,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC,OAAO,EAAE,EAAE,OAAO,EAAE,IAAI,EAAE,CAAC,CAAC;QACnD,MAAM,IAAI,GAAa,EAAE,CAAC;QAE1B,CAAC,CAAC,SAAS,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,EAAE,EAAE;YAC1B,MAAM,GAAG,GAAG,CAAC,CAAC,EAAE,CAAC,CAAC,IAAI,EAAE,CAAC,IAAI,EAAE,CAAC;YAChC,IAAI,GAAG;gBAAE,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;QAC1B,CAAC,CAAC,CAAC;QAEH,OAAO,IAAI,CAAC;IACd,CAAC;IAED;;;OAGG;IACH,KAAK,CAAC,iBAAiB,CAAC,GAAW;QACjC,IAAI,CAAC;YACH,MAAM,SAAS,GAAG,IAAI,GAAG,CAAC,GAAG,CAAC,CAAC;YAC/B,MAAM,MAAM,GAAG,SAAS,CAAC,MAAM,CAAC;YAEhC,gDAAgD;YAChD,MAAM,eAAe,GAAG;gBACtB,GAAG,MAAM,mBAAmB;gBAC5B,GAAG,MAAM,mBAAmB;gBAC5B,GAAG,MAAM,mBAAmB;gBAC5B,GAAG,MAAM,oBAAoB;aAC9B,CAAC;YAEF,0BAA0B;YAC1B,KAAK,MAAM,UAAU,IAAI,eAAe,EAAE,CAAC;gBACzC,IAAI,CAAC;oBACH,MAAM,OAAO,GAAG,MAAM,IAAI,CAAC,YAAY,CAAC,UAAU,CAAC,CAAC;oBACpD,IAAI,OAAO,IAAI,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAC,EAAE,CAAC;wBACxC,MAAM,IAAI,GAAG,IAAI,CAAC,aAAa,CAAC,OAAO,CAAC,CAAC;wBACzC,IAAI,IAAI,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;4BACpB,OAAO,IAAI,CAAC,KAAK,CAAC,CAAC,EAAE,IAAI,CAAC,CAAC;wBAC7B,CAAC;oBACH,CAAC;gBACH,CAAC;gBAAC,MAAM,CAAC;oBACP,WAAW;gBACb,CAAC;YACH,CAAC;YAED,+CAA+C;YAC/C,MAAM,MAAM,GAAG,MAAM,IAAI,CAAC,UAAU,CAAC,KAAK,CAAC,GAAG,MAAM,cAAc,CAAC,CAAC;YACpE,IAAI,MAAM,CAAC,KAAK,IAAI,MAAM,CAAC,KAAK,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;gBAC5C,OAAO,MAAM,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,EAAE,IAAI,CAAC,CAAC,GAAG,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC,EAAE,GAAG,EAAE,CAAC,CAAC,CAAC;YAC3D,CAAC;YAED,oBAAoB;YACpB,MAAM,WAAW,GAAG,MAAM,IAAI,CAAC,UAAU,CAAC,KAAK,CAAC,GAAG,MAAM,oBAAoB,CAAC,CAAC;YAC/E,IAAI,WAAW,CAAC,KAAK,IAAI,WAAW,CAAC,KAAK,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;gBACtD,OAAO,WAAW,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,EAAE,IAAI,CAAC,CAAC,GAAG,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC,EAAE,GAAG,EAAE,CAAC,CAAC,CAAC;YAChE,CAAC;YAED,OAAO,EAAE,CAAC;QACZ,CAAC;QAAC,MAAM,CAAC;YACP,OAAO,EAAE,CAAC;QACZ,CAAC;IACH,CAAC;IAED;;OAEG;IACH,aAAa,CAAC,OAAe;QAC3B,MAAM,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC,OAAO,EAAE,EAAE,OAAO,EAAE,IAAI,EAAE,CAAC,CAAC;QACnD,MAAM,IAAI,GAAiB,EAAE,CAAC;QAE9B,CAAC,CAAC,KAAK,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,EAAE,EAAE;YACtB,MAAM,GAAG,GAAG,CAAC,CAAC,EAAE,CAAC,CAAC;YAClB,MAAM,GAAG,GAAG,GAAG,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,IAAI,EAAE,CAAC,IAAI,EAAE,CAAC;YAE1C,IAAI,GAAG,EAAE,CAAC;gBACR,MAAM,KAAK,GAAe,EAAE,GAAG,EAAE,CAAC;gBAElC,MAAM,OAAO,GAAG,GAAG,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC,IAAI,EAAE,CAAC,IAAI,EAAE,CAAC;gBAClD,IAAI,OAAO;oBAAE,KAAK,CAAC,OAAO,GAAG,OAAO,CAAC;gBAErC,MAAM,UAAU,GAAG,GAAG,CAAC,IAAI,CAAC,YAAY,CAAC,CAAC,IAAI,EAAE,CAAC,IAAI,EAAE,CAAC;gBACxD,IAAI,UAAU;oBAAE,KAAK,CAAC,UAAU,GAAG,UAAU,CAAC;gBAE9C,MAAM,QAAQ,GAAG,GAAG,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC,IAAI,EAAE,CAAC,IAAI,EAAE,CAAC;gBACpD,IAAI,QAAQ;oBAAE,KAAK,CAAC,QAAQ,GAAG,UAAU,CAAC,QAAQ,CAAC,CAAC;gBAEpD,IAAI,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;YACnB,CAAC;QACH,CAAC,CAAC,CAAC;QAEH,OAAO,IAAI,CAAC;IACd,CAAC;CACF"}
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Block detection heuristics for anti-bot systems
|
|
3
|
+
*/
|
|
4
|
+
export type BlockReason = 'cloudflare' | 'datadome' | 'captcha' | 'rate_limit' | 'forbidden' | 'empty_body' | 'javascript_required' | 'access_denied';
|
|
5
|
+
export interface BlockCheck {
|
|
6
|
+
blocked: boolean;
|
|
7
|
+
reason?: BlockReason;
|
|
8
|
+
confidence: number;
|
|
9
|
+
}
|
|
10
|
+
/**
|
|
11
|
+
* Check if the response indicates blocking
|
|
12
|
+
*/
|
|
13
|
+
export declare function isBlocked(html: string, statusCode: number, url: string): BlockCheck;
|
|
14
|
+
/**
|
|
15
|
+
* Quick pre-check based on known protected domains
|
|
16
|
+
*/
|
|
17
|
+
export declare function needsBrowser(url: string): boolean;
|
|
18
|
+
//# sourceMappingURL=detector.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"detector.d.ts","sourceRoot":"","sources":["../../../src/engine/detector.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,MAAM,MAAM,WAAW,GACnB,YAAY,GACZ,UAAU,GACV,SAAS,GACT,YAAY,GACZ,WAAW,GACX,YAAY,GACZ,qBAAqB,GACrB,eAAe,CAAC;AAEpB,MAAM,WAAW,UAAU;IACzB,OAAO,EAAE,OAAO,CAAC;IACjB,MAAM,CAAC,EAAE,WAAW,CAAC;IACrB,UAAU,EAAE,MAAM,CAAC;CACpB;AA8DD;;GAEG;AACH,wBAAgB,SAAS,CAAC,IAAI,EAAE,MAAM,EAAE,UAAU,EAAE,MAAM,EAAE,GAAG,EAAE,MAAM,GAAG,UAAU,CAuEnF;AAgBD;;GAEG;AACH,wBAAgB,YAAY,CAAC,GAAG,EAAE,MAAM,GAAG,OAAO,CAoBjD"}
|
|
@@ -0,0 +1,155 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Block detection heuristics for anti-bot systems
|
|
3
|
+
*/
|
|
4
|
+
// Cloudflare challenge patterns
|
|
5
|
+
const CLOUDFLARE_PATTERNS = [
|
|
6
|
+
/Checking your browser/i,
|
|
7
|
+
/cf-browser-verification/i,
|
|
8
|
+
/cloudflare/i,
|
|
9
|
+
/_cf_chl_opt/,
|
|
10
|
+
/challenge-platform/i,
|
|
11
|
+
/Just a moment\.\.\./i,
|
|
12
|
+
/ray id:/i,
|
|
13
|
+
/cf-turnstile/i,
|
|
14
|
+
];
|
|
15
|
+
// DataDome patterns
|
|
16
|
+
const DATADOME_PATTERNS = [
|
|
17
|
+
/datadome/i,
|
|
18
|
+
/dd\.js/,
|
|
19
|
+
/captcha-delivery\.com/,
|
|
20
|
+
/geo\.captcha-delivery\.com/,
|
|
21
|
+
];
|
|
22
|
+
// PerimeterX / HUMAN patterns
|
|
23
|
+
const PERIMETERX_PATTERNS = [
|
|
24
|
+
/perimeterx/i,
|
|
25
|
+
/px-captcha/i,
|
|
26
|
+
/_pxhd/,
|
|
27
|
+
/human challenge/i,
|
|
28
|
+
];
|
|
29
|
+
// Akamai Bot Manager patterns
|
|
30
|
+
const AKAMAI_PATTERNS = [
|
|
31
|
+
/akamai/i,
|
|
32
|
+
/ak_bmsc/,
|
|
33
|
+
/_abck/,
|
|
34
|
+
];
|
|
35
|
+
// Generic bot detection patterns
|
|
36
|
+
const BOT_DETECTION_PATTERNS = [
|
|
37
|
+
/access denied/i,
|
|
38
|
+
/please verify you are human/i,
|
|
39
|
+
/enable javascript/i,
|
|
40
|
+
/browser.*not supported/i,
|
|
41
|
+
/automated access/i,
|
|
42
|
+
/bot detected/i,
|
|
43
|
+
/please complete the security check/i,
|
|
44
|
+
/unusual traffic/i,
|
|
45
|
+
/blocked/i,
|
|
46
|
+
/forbidden/i,
|
|
47
|
+
/not allowed/i,
|
|
48
|
+
];
|
|
49
|
+
// Captcha patterns
|
|
50
|
+
const CAPTCHA_PATTERNS = [
|
|
51
|
+
/recaptcha/i,
|
|
52
|
+
/hcaptcha/i,
|
|
53
|
+
/g-recaptcha/,
|
|
54
|
+
/h-captcha/,
|
|
55
|
+
/captcha/i,
|
|
56
|
+
/turnstile/i,
|
|
57
|
+
];
|
|
58
|
+
/**
|
|
59
|
+
* Check if the response indicates blocking
|
|
60
|
+
*/
|
|
61
|
+
export function isBlocked(html, statusCode, url) {
|
|
62
|
+
// Status code checks
|
|
63
|
+
if (statusCode === 403) {
|
|
64
|
+
return { blocked: true, reason: 'forbidden', confidence: 0.9 };
|
|
65
|
+
}
|
|
66
|
+
if (statusCode === 429) {
|
|
67
|
+
return { blocked: true, reason: 'rate_limit', confidence: 0.95 };
|
|
68
|
+
}
|
|
69
|
+
if (statusCode === 503) {
|
|
70
|
+
// Could be Cloudflare challenge or actual server error
|
|
71
|
+
if (CLOUDFLARE_PATTERNS.some(p => p.test(html))) {
|
|
72
|
+
return { blocked: true, reason: 'cloudflare', confidence: 0.95 };
|
|
73
|
+
}
|
|
74
|
+
}
|
|
75
|
+
// Empty or minimal body
|
|
76
|
+
if (!html || html.length < 500) {
|
|
77
|
+
// Some pages are legitimately small, check for tell-tale signs
|
|
78
|
+
if (html && (html.includes('challenge') || html.includes('captcha'))) {
|
|
79
|
+
return { blocked: true, reason: 'empty_body', confidence: 0.7 };
|
|
80
|
+
}
|
|
81
|
+
}
|
|
82
|
+
// Cloudflare
|
|
83
|
+
const cfMatches = CLOUDFLARE_PATTERNS.filter(p => p.test(html));
|
|
84
|
+
if (cfMatches.length >= 2) {
|
|
85
|
+
return { blocked: true, reason: 'cloudflare', confidence: 0.9 };
|
|
86
|
+
}
|
|
87
|
+
// DataDome - only flag if page is SHORT (actual block pages are small)
|
|
88
|
+
if (DATADOME_PATTERNS.filter(p => p.test(html)).length >= 2 && html.length < 50000) {
|
|
89
|
+
return { blocked: true, reason: 'datadome', confidence: 0.9 };
|
|
90
|
+
}
|
|
91
|
+
// PerimeterX - only flag if page is short
|
|
92
|
+
if (PERIMETERX_PATTERNS.some(p => p.test(html)) && html.length < 50000) {
|
|
93
|
+
return { blocked: true, reason: 'access_denied', confidence: 0.85 };
|
|
94
|
+
}
|
|
95
|
+
// Akamai - only flag if page is short
|
|
96
|
+
if (AKAMAI_PATTERNS.filter(p => p.test(html)).length >= 2 && html.length < 50000) {
|
|
97
|
+
return { blocked: true, reason: 'access_denied', confidence: 0.8 };
|
|
98
|
+
}
|
|
99
|
+
// Captcha - only flag if the page is SHORT and has captcha patterns
|
|
100
|
+
// Long pages with captcha are probably just forms, not block pages
|
|
101
|
+
const captchaMatches = CAPTCHA_PATTERNS.filter(p => p.test(html)).length;
|
|
102
|
+
if (captchaMatches >= 2 && html.length < 10000) {
|
|
103
|
+
// Page is short with multiple captcha patterns - likely a challenge page
|
|
104
|
+
return { blocked: true, reason: 'captcha', confidence: 0.85 };
|
|
105
|
+
}
|
|
106
|
+
// Generic bot detection - only if page is very short
|
|
107
|
+
if (BOT_DETECTION_PATTERNS.some(p => p.test(html)) && html.length < 3000) {
|
|
108
|
+
return { blocked: true, reason: 'access_denied', confidence: 0.7 };
|
|
109
|
+
}
|
|
110
|
+
// JavaScript required (no actual content)
|
|
111
|
+
if (html.includes('noscript') &&
|
|
112
|
+
(html.includes('enable javascript') || html.includes('JavaScript is required')) &&
|
|
113
|
+
!html.includes('<article') &&
|
|
114
|
+
!html.includes('<main') &&
|
|
115
|
+
html.length < 5000) {
|
|
116
|
+
return { blocked: true, reason: 'javascript_required', confidence: 0.6 };
|
|
117
|
+
}
|
|
118
|
+
return { blocked: false, confidence: 0 };
|
|
119
|
+
}
|
|
120
|
+
// Known protected domains that typically need browser
|
|
121
|
+
const KNOWN_PROTECTED_DOMAINS = new Set([
|
|
122
|
+
'linkedin.com',
|
|
123
|
+
'instagram.com',
|
|
124
|
+
'facebook.com',
|
|
125
|
+
'twitter.com',
|
|
126
|
+
'x.com',
|
|
127
|
+
'tiktok.com',
|
|
128
|
+
'indeed.com',
|
|
129
|
+
'glassdoor.com',
|
|
130
|
+
'zillow.com',
|
|
131
|
+
'yelp.com',
|
|
132
|
+
]);
|
|
133
|
+
/**
|
|
134
|
+
* Quick pre-check based on known protected domains
|
|
135
|
+
*/
|
|
136
|
+
export function needsBrowser(url) {
|
|
137
|
+
try {
|
|
138
|
+
const hostname = new URL(url).hostname.replace('www.', '');
|
|
139
|
+
// Check exact match
|
|
140
|
+
if (KNOWN_PROTECTED_DOMAINS.has(hostname)) {
|
|
141
|
+
return true;
|
|
142
|
+
}
|
|
143
|
+
// Check if subdomain of protected domain
|
|
144
|
+
for (const domain of KNOWN_PROTECTED_DOMAINS) {
|
|
145
|
+
if (hostname.endsWith(`.${domain}`)) {
|
|
146
|
+
return true;
|
|
147
|
+
}
|
|
148
|
+
}
|
|
149
|
+
return false;
|
|
150
|
+
}
|
|
151
|
+
catch {
|
|
152
|
+
return false;
|
|
153
|
+
}
|
|
154
|
+
}
|
|
155
|
+
//# sourceMappingURL=detector.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"detector.js","sourceRoot":"","sources":["../../../src/engine/detector.ts"],"names":[],"mappings":"AAAA;;GAEG;AAkBH,gCAAgC;AAChC,MAAM,mBAAmB,GAAG;IAC1B,wBAAwB;IACxB,0BAA0B;IAC1B,aAAa;IACb,aAAa;IACb,qBAAqB;IACrB,sBAAsB;IACtB,UAAU;IACV,eAAe;CAChB,CAAC;AAEF,oBAAoB;AACpB,MAAM,iBAAiB,GAAG;IACxB,WAAW;IACX,QAAQ;IACR,uBAAuB;IACvB,4BAA4B;CAC7B,CAAC;AAEF,8BAA8B;AAC9B,MAAM,mBAAmB,GAAG;IAC1B,aAAa;IACb,aAAa;IACb,OAAO;IACP,kBAAkB;CACnB,CAAC;AAEF,8BAA8B;AAC9B,MAAM,eAAe,GAAG;IACtB,SAAS;IACT,SAAS;IACT,OAAO;CACR,CAAC;AAEF,iCAAiC;AACjC,MAAM,sBAAsB,GAAG;IAC7B,gBAAgB;IAChB,8BAA8B;IAC9B,oBAAoB;IACpB,yBAAyB;IACzB,mBAAmB;IACnB,eAAe;IACf,qCAAqC;IACrC,kBAAkB;IAClB,UAAU;IACV,YAAY;IACZ,cAAc;CACf,CAAC;AAEF,mBAAmB;AACnB,MAAM,gBAAgB,GAAG;IACvB,YAAY;IACZ,WAAW;IACX,aAAa;IACb,WAAW;IACX,UAAU;IACV,YAAY;CACb,CAAC;AAEF;;GAEG;AACH,MAAM,UAAU,SAAS,CAAC,IAAY,EAAE,UAAkB,EAAE,GAAW;IACrE,qBAAqB;IACrB,IAAI,UAAU,KAAK,GAAG,EAAE,CAAC;QACvB,OAAO,EAAE,OAAO,EAAE,IAAI,EAAE,MAAM,EAAE,WAAW,EAAE,UAAU,EAAE,GAAG,EAAE,CAAC;IACjE,CAAC;IAED,IAAI,UAAU,KAAK,GAAG,EAAE,CAAC;QACvB,OAAO,EAAE,OAAO,EAAE,IAAI,EAAE,MAAM,EAAE,YAAY,EAAE,UAAU,EAAE,IAAI,EAAE,CAAC;IACnE,CAAC;IAED,IAAI,UAAU,KAAK,GAAG,EAAE,CAAC;QACvB,uDAAuD;QACvD,IAAI,mBAAmB,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,EAAE,CAAC;YAChD,OAAO,EAAE,OAAO,EAAE,IAAI,EAAE,MAAM,EAAE,YAAY,EAAE,UAAU,EAAE,IAAI,EAAE,CAAC;QACnE,CAAC;IACH,CAAC;IAED,wBAAwB;IACxB,IAAI,CAAC,IAAI,IAAI,IAAI,CAAC,MAAM,GAAG,GAAG,EAAE,CAAC;QAC/B,+DAA+D;QAC/D,IAAI,IAAI,IAAI,CAAC,IAAI,CAAC,QAAQ,CAAC,WAAW,CAAC,IAAI,IAAI,CAAC,QAAQ,CAAC,SAAS,CAAC,CAAC,EAAE,CAAC;YACrE,OAAO,EAAE,OAAO,EAAE,IAAI,EAAE,MAAM,EAAE,YAAY,EAAE,UAAU,EAAE,GAAG,EAAE,CAAC;QAClE,CAAC;IACH,CAAC;IAED,aAAa;IACb,MAAM,SAAS,GAAG,mBAAmB,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC;IAChE,IAAI,SAAS,CAAC,MAAM,IAAI,CAAC,EAAE,CAAC;QAC1B,OAAO,EAAE,OAAO,EAAE,IAAI,EAAE,MAAM,EAAE,YAAY,EAAE,UAAU,EAAE,GAAG,EAAE,CAAC;IAClE,CAAC;IAED,uEAAuE;IACvE,IAAI,iBAAiB,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC,MAAM,IAAI,CAAC,IAAI,IAAI,CAAC,MAAM,GAAG,KAAK,EAAE,CAAC;QACnF,OAAO,EAAE,OAAO,EAAE,IAAI,EAAE,MAAM,EAAE,UAAU,EAAE,UAAU,EAAE,GAAG,EAAE,CAAC;IAChE,CAAC;IAED,0CAA0C;IAC1C,IAAI,mBAAmB,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,IAAI,IAAI,CAAC,MAAM,GAAG,KAAK,EAAE,CAAC;QACvE,OAAO,EAAE,OAAO,EAAE,IAAI,EAAE,MAAM,EAAE,eAAe,EAAE,UAAU,EAAE,IAAI,EAAE,CAAC;IACtE,CAAC;IAED,sCAAsC;IACtC,IAAI,eAAe,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC,MAAM,IAAI,CAAC,IAAI,IAAI,CAAC,MAAM,GAAG,KAAK,EAAE,CAAC;QACjF,OAAO,EAAE,OAAO,EAAE,IAAI,EAAE,MAAM,EAAE,eAAe,EAAE,UAAU,EAAE,GAAG,EAAE,CAAC;IACrE,CAAC;IAED,oEAAoE;IACpE,mEAAmE;IACnE,MAAM,cAAc,GAAG,gBAAgB,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC,MAAM,CAAC;IACzE,IAAI,cAAc,IAAI,CAAC,IAAI,IAAI,CAAC,MAAM,GAAG,KAAK,EAAE,CAAC;QAC/C,yEAAyE;QACzE,OAAO,EAAE,OAAO,EAAE,IAAI,EAAE,MAAM,EAAE,SAAS,EAAE,UAAU,EAAE,IAAI,EAAE,CAAC;IAChE,CAAC;IAED,qDAAqD;IACrD,IAAI,sBAAsB,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,IAAI,IAAI,CAAC,MAAM,GAAG,IAAI,EAAE,CAAC;QACzE,OAAO,EAAE,OAAO,EAAE,IAAI,EAAE,MAAM,EAAE,eAAe,EAAE,UAAU,EAAE,GAAG,EAAE,CAAC;IACrE,CAAC;IAED,0CAA0C;IAC1C,IACE,IAAI,CAAC,QAAQ,CAAC,UAAU,CAAC;QACzB,CAAC,IAAI,CAAC,QAAQ,CAAC,mBAAmB,CAAC,IAAI,IAAI,CAAC,QAAQ,CAAC,wBAAwB,CAAC,CAAC;QAC/E,CAAC,IAAI,CAAC,QAAQ,CAAC,UAAU,CAAC;QAC1B,CAAC,IAAI,CAAC,QAAQ,CAAC,OAAO,CAAC;QACvB,IAAI,CAAC,MAAM,GAAG,IAAI,EAClB,CAAC;QACD,OAAO,EAAE,OAAO,EAAE,IAAI,EAAE,MAAM,EAAE,qBAAqB,EAAE,UAAU,EAAE,GAAG,EAAE,CAAC;IAC3E,CAAC;IAED,OAAO,EAAE,OAAO,EAAE,KAAK,EAAE,UAAU,EAAE,CAAC,EAAE,CAAC;AAC3C,CAAC;AAED,sDAAsD;AACtD,MAAM,uBAAuB,GAAG,IAAI,GAAG,CAAC;IACtC,cAAc;IACd,eAAe;IACf,cAAc;IACd,aAAa;IACb,OAAO;IACP,YAAY;IACZ,YAAY;IACZ,eAAe;IACf,YAAY;IACZ,UAAU;CACX,CAAC,CAAC;AAEH;;GAEG;AACH,MAAM,UAAU,YAAY,CAAC,GAAW;IACtC,IAAI,CAAC;QACH,MAAM,QAAQ,GAAG,IAAI,GAAG,CAAC,GAAG,CAAC,CAAC,QAAQ,CAAC,OAAO,CAAC,MAAM,EAAE,EAAE,CAAC,CAAC;QAE3D,oBAAoB;QACpB,IAAI,uBAAuB,CAAC,GAAG,CAAC,QAAQ,CAAC,EAAE,CAAC;YAC1C,OAAO,IAAI,CAAC;QACd,CAAC;QAED,yCAAyC;QACzC,KAAK,MAAM,MAAM,IAAI,uBAAuB,EAAE,CAAC;YAC7C,IAAI,QAAQ,CAAC,QAAQ,CAAC,IAAI,MAAM,EAAE,CAAC,EAAE,CAAC;gBACpC,OAAO,IAAI,CAAC;YACd,CAAC;QACH,CAAC;QAED,OAAO,KAAK,CAAC;IACf,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,KAAK,CAAC;IACf,CAAC;AACH,CAAC"}
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Fast HTTP fetch engine - default for most sites
|
|
3
|
+
*/
|
|
4
|
+
export interface FetchOptions {
|
|
5
|
+
timeout?: number;
|
|
6
|
+
userAgent?: string;
|
|
7
|
+
headers?: Record<string, string>;
|
|
8
|
+
}
|
|
9
|
+
export interface FetchResult {
|
|
10
|
+
html: string;
|
|
11
|
+
statusCode: number;
|
|
12
|
+
headers: Record<string, string>;
|
|
13
|
+
finalUrl: string;
|
|
14
|
+
}
|
|
15
|
+
export declare class FetchEngine {
|
|
16
|
+
fetch(url: string, options?: FetchOptions): Promise<FetchResult>;
|
|
17
|
+
}
|
|
18
|
+
//# sourceMappingURL=fetch.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"fetch.d.ts","sourceRoot":"","sources":["../../../src/engine/fetch.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,MAAM,WAAW,YAAY;IAC3B,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,OAAO,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;CAClC;AAED,MAAM,WAAW,WAAW;IAC1B,IAAI,EAAE,MAAM,CAAC;IACb,UAAU,EAAE,MAAM,CAAC;IACnB,OAAO,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IAChC,QAAQ,EAAE,MAAM,CAAC;CAClB;AAqBD,qBAAa,WAAW;IAChB,KAAK,CAAC,GAAG,EAAE,MAAM,EAAE,OAAO,GAAE,YAAiB,GAAG,OAAO,CAAC,WAAW,CAAC;CAoC3E"}
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Fast HTTP fetch engine - default for most sites
|
|
3
|
+
*/
|
|
4
|
+
const DEFAULT_USER_AGENT = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36';
|
|
5
|
+
const DEFAULT_HEADERS = {
|
|
6
|
+
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8',
|
|
7
|
+
'Accept-Language': 'en-US,en;q=0.9',
|
|
8
|
+
'Accept-Encoding': 'gzip, deflate, br',
|
|
9
|
+
'Cache-Control': 'no-cache',
|
|
10
|
+
'Pragma': 'no-cache',
|
|
11
|
+
'Sec-Ch-Ua': '"Not_A Brand";v="8", "Chromium";v="120", "Google Chrome";v="120"',
|
|
12
|
+
'Sec-Ch-Ua-Mobile': '?0',
|
|
13
|
+
'Sec-Ch-Ua-Platform': '"macOS"',
|
|
14
|
+
'Sec-Fetch-Dest': 'document',
|
|
15
|
+
'Sec-Fetch-Mode': 'navigate',
|
|
16
|
+
'Sec-Fetch-Site': 'none',
|
|
17
|
+
'Sec-Fetch-User': '?1',
|
|
18
|
+
'Upgrade-Insecure-Requests': '1',
|
|
19
|
+
};
|
|
20
|
+
export class FetchEngine {
|
|
21
|
+
async fetch(url, options = {}) {
|
|
22
|
+
const controller = new AbortController();
|
|
23
|
+
const timeout = options.timeout || 5000;
|
|
24
|
+
const timeoutId = setTimeout(() => controller.abort(), timeout);
|
|
25
|
+
try {
|
|
26
|
+
const response = await fetch(url, {
|
|
27
|
+
headers: {
|
|
28
|
+
'User-Agent': options.userAgent || DEFAULT_USER_AGENT,
|
|
29
|
+
...DEFAULT_HEADERS,
|
|
30
|
+
...options.headers,
|
|
31
|
+
},
|
|
32
|
+
redirect: 'follow',
|
|
33
|
+
signal: controller.signal,
|
|
34
|
+
});
|
|
35
|
+
clearTimeout(timeoutId);
|
|
36
|
+
const html = await response.text();
|
|
37
|
+
return {
|
|
38
|
+
html,
|
|
39
|
+
statusCode: response.status,
|
|
40
|
+
headers: Object.fromEntries(response.headers.entries()),
|
|
41
|
+
finalUrl: response.url,
|
|
42
|
+
};
|
|
43
|
+
}
|
|
44
|
+
catch (error) {
|
|
45
|
+
clearTimeout(timeoutId);
|
|
46
|
+
if (error.name === 'AbortError') {
|
|
47
|
+
throw new Error(`Timeout after ${timeout}ms`);
|
|
48
|
+
}
|
|
49
|
+
throw error;
|
|
50
|
+
}
|
|
51
|
+
}
|
|
52
|
+
}
|
|
53
|
+
//# sourceMappingURL=fetch.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"fetch.js","sourceRoot":"","sources":["../../../src/engine/fetch.ts"],"names":[],"mappings":"AAAA;;GAEG;AAeH,MAAM,kBAAkB,GACtB,uHAAuH,CAAC;AAE1H,MAAM,eAAe,GAAG;IACtB,QAAQ,EAAE,kGAAkG;IAC5G,iBAAiB,EAAE,gBAAgB;IACnC,iBAAiB,EAAE,mBAAmB;IACtC,eAAe,EAAE,UAAU;IAC3B,QAAQ,EAAE,UAAU;IACpB,WAAW,EAAE,kEAAkE;IAC/E,kBAAkB,EAAE,IAAI;IACxB,oBAAoB,EAAE,SAAS;IAC/B,gBAAgB,EAAE,UAAU;IAC5B,gBAAgB,EAAE,UAAU;IAC5B,gBAAgB,EAAE,MAAM;IACxB,gBAAgB,EAAE,IAAI;IACtB,2BAA2B,EAAE,GAAG;CACjC,CAAC;AAEF,MAAM,OAAO,WAAW;IACtB,KAAK,CAAC,KAAK,CAAC,GAAW,EAAE,UAAwB,EAAE;QACjD,MAAM,UAAU,GAAG,IAAI,eAAe,EAAE,CAAC;QACzC,MAAM,OAAO,GAAG,OAAO,CAAC,OAAO,IAAI,IAAI,CAAC;QACxC,MAAM,SAAS,GAAG,UAAU,CAAC,GAAG,EAAE,CAAC,UAAU,CAAC,KAAK,EAAE,EAAE,OAAO,CAAC,CAAC;QAEhE,IAAI,CAAC;YACH,MAAM,QAAQ,GAAG,MAAM,KAAK,CAAC,GAAG,EAAE;gBAChC,OAAO,EAAE;oBACP,YAAY,EAAE,OAAO,CAAC,SAAS,IAAI,kBAAkB;oBACrD,GAAG,eAAe;oBAClB,GAAG,OAAO,CAAC,OAAO;iBACnB;gBACD,QAAQ,EAAE,QAAQ;gBAClB,MAAM,EAAE,UAAU,CAAC,MAAM;aAC1B,CAAC,CAAC;YAEH,YAAY,CAAC,SAAS,CAAC,CAAC;YAExB,MAAM,IAAI,GAAG,MAAM,QAAQ,CAAC,IAAI,EAAE,CAAC;YAEnC,OAAO;gBACL,IAAI;gBACJ,UAAU,EAAE,QAAQ,CAAC,MAAM;gBAC3B,OAAO,EAAE,MAAM,CAAC,WAAW,CAAC,QAAQ,CAAC,OAAO,CAAC,OAAO,EAAE,CAAC;gBACvD,QAAQ,EAAE,QAAQ,CAAC,GAAG;aACvB,CAAC;QACJ,CAAC;QAAC,OAAO,KAAU,EAAE,CAAC;YACpB,YAAY,CAAC,SAAS,CAAC,CAAC;YAExB,IAAI,KAAK,CAAC,IAAI,KAAK,YAAY,EAAE,CAAC;gBAChC,MAAM,IAAI,KAAK,CAAC,iBAAiB,OAAO,IAAI,CAAC,CAAC;YAChD,CAAC;YAED,MAAM,KAAK,CAAC;QACd,CAAC;IACH,CAAC;CACF"}
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Engine Waterfall - orchestrates fetch engines
|
|
3
|
+
* Tries fast fetch first, escalates to browser only when blocked
|
|
4
|
+
*/
|
|
5
|
+
import { type BlockReason } from './detector.js';
|
|
6
|
+
export interface EngineResult {
|
|
7
|
+
html: string;
|
|
8
|
+
statusCode: number;
|
|
9
|
+
engine: 'fetch' | 'playwright' | 'rebrowser';
|
|
10
|
+
blocked: boolean;
|
|
11
|
+
blockReason?: BlockReason;
|
|
12
|
+
finalUrl: string;
|
|
13
|
+
}
|
|
14
|
+
export interface EngineOptions {
|
|
15
|
+
timeout?: number;
|
|
16
|
+
userAgent?: string;
|
|
17
|
+
headers?: Record<string, string>;
|
|
18
|
+
forceEngine?: 'fetch' | 'playwright' | 'rebrowser';
|
|
19
|
+
}
|
|
20
|
+
export interface EngineStats {
|
|
21
|
+
fetch: number;
|
|
22
|
+
playwright: number;
|
|
23
|
+
rebrowser: number;
|
|
24
|
+
blocked: number;
|
|
25
|
+
}
|
|
26
|
+
export declare class EngineWaterfall {
|
|
27
|
+
private engines;
|
|
28
|
+
private stats;
|
|
29
|
+
private browserInstallPromise;
|
|
30
|
+
fetch(url: string, options?: EngineOptions): Promise<EngineResult>;
|
|
31
|
+
private fetchWithEngine;
|
|
32
|
+
private ensureBrowserInstalled;
|
|
33
|
+
private installBrowser;
|
|
34
|
+
getStats(): EngineStats;
|
|
35
|
+
close(): Promise<void>;
|
|
36
|
+
}
|
|
37
|
+
export { isBlocked, needsBrowser } from './detector.js';
|
|
38
|
+
export type { BlockReason, BlockCheck } from './detector.js';
|
|
39
|
+
//# sourceMappingURL=index.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/engine/index.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAKH,OAAO,EAA2B,KAAK,WAAW,EAAE,MAAM,eAAe,CAAC;AAG1E,MAAM,WAAW,YAAY;IAC3B,IAAI,EAAE,MAAM,CAAC;IACb,UAAU,EAAE,MAAM,CAAC;IACnB,MAAM,EAAE,OAAO,GAAG,YAAY,GAAG,WAAW,CAAC;IAC7C,OAAO,EAAE,OAAO,CAAC;IACjB,WAAW,CAAC,EAAE,WAAW,CAAC;IAC1B,QAAQ,EAAE,MAAM,CAAC;CAClB;AAED,MAAM,WAAW,aAAa;IAC5B,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,OAAO,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IACjC,WAAW,CAAC,EAAE,OAAO,GAAG,YAAY,GAAG,WAAW,CAAC;CACpD;AAED,MAAM,WAAW,WAAW;IAC1B,KAAK,EAAE,MAAM,CAAC;IACd,UAAU,EAAE,MAAM,CAAC;IACnB,SAAS,EAAE,MAAM,CAAC;IAClB,OAAO,EAAE,MAAM,CAAC;CACjB;AAmBD,qBAAa,eAAe;IAC1B,OAAO,CAAC,OAAO,CAA4E;IAC3F,OAAO,CAAC,KAAK,CAAsE;IACnF,OAAO,CAAC,qBAAqB,CAAiC;IAExD,KAAK,CAAC,GAAG,EAAE,MAAM,EAAE,OAAO,GAAE,aAAkB,GAAG,OAAO,CAAC,YAAY,CAAC;YAmC9D,eAAe;YAqCf,sBAAsB;YAatB,cAAc;IAc5B,QAAQ,IAAI,WAAW;IAIjB,KAAK,IAAI,OAAO,CAAC,IAAI,CAAC;CAQ7B;AAED,OAAO,EAAE,SAAS,EAAE,YAAY,EAAE,MAAM,eAAe,CAAC;AACxD,YAAY,EAAE,WAAW,EAAE,UAAU,EAAE,MAAM,eAAe,CAAC"}
|
|
@@ -0,0 +1,116 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Engine Waterfall - orchestrates fetch engines
|
|
3
|
+
* Tries fast fetch first, escalates to browser only when blocked
|
|
4
|
+
*/
|
|
5
|
+
import { FetchEngine } from './fetch.js';
|
|
6
|
+
import { PlaywrightEngine, isPlaywrightInstalled } from './playwright.js';
|
|
7
|
+
import { RebrowserEngine } from './rebrowser.js';
|
|
8
|
+
import { isBlocked, needsBrowser } from './detector.js';
|
|
9
|
+
import { createLogger } from '../utils/logger.js';
|
|
10
|
+
const log = createLogger();
|
|
11
|
+
// Engine priority order
|
|
12
|
+
const ENGINES = [
|
|
13
|
+
{ name: 'fetch', timeout: 5000, create: () => new FetchEngine() },
|
|
14
|
+
{ name: 'playwright', timeout: 15000, create: () => new PlaywrightEngine() },
|
|
15
|
+
{ name: 'rebrowser', timeout: 30000, create: () => new RebrowserEngine() },
|
|
16
|
+
];
|
|
17
|
+
export class EngineWaterfall {
|
|
18
|
+
engines = new Map();
|
|
19
|
+
stats = { fetch: 0, playwright: 0, rebrowser: 0, blocked: 0 };
|
|
20
|
+
browserInstallPromise = null;
|
|
21
|
+
async fetch(url, options = {}) {
|
|
22
|
+
// Force specific engine if requested
|
|
23
|
+
if (options.forceEngine) {
|
|
24
|
+
return this.fetchWithEngine(url, options.forceEngine, options);
|
|
25
|
+
}
|
|
26
|
+
// Skip fetch for known protected sites
|
|
27
|
+
const startIndex = needsBrowser(url) ? 1 : 0;
|
|
28
|
+
const enginesToTry = ENGINES.slice(startIndex);
|
|
29
|
+
// Try engines in order until success
|
|
30
|
+
for (const engineConfig of enginesToTry) {
|
|
31
|
+
try {
|
|
32
|
+
const result = await this.fetchWithEngine(url, engineConfig.name, {
|
|
33
|
+
...options,
|
|
34
|
+
timeout: options.timeout || engineConfig.timeout,
|
|
35
|
+
});
|
|
36
|
+
// Check if response looks blocked
|
|
37
|
+
if (!result.blocked) {
|
|
38
|
+
this.stats[engineConfig.name]++;
|
|
39
|
+
return result;
|
|
40
|
+
}
|
|
41
|
+
log.dim(` ${engineConfig.name} blocked: ${result.blockReason}`);
|
|
42
|
+
}
|
|
43
|
+
catch (error) {
|
|
44
|
+
log.dim(` ${engineConfig.name} failed: ${error.message}`);
|
|
45
|
+
}
|
|
46
|
+
}
|
|
47
|
+
// All engines failed
|
|
48
|
+
this.stats.blocked++;
|
|
49
|
+
throw new Error(`All engines failed for ${url}`);
|
|
50
|
+
}
|
|
51
|
+
async fetchWithEngine(url, engineName, options) {
|
|
52
|
+
// Lazy-load browser engines
|
|
53
|
+
if (engineName === 'playwright' || engineName === 'rebrowser') {
|
|
54
|
+
if (!await this.ensureBrowserInstalled()) {
|
|
55
|
+
throw new Error('Browser not available');
|
|
56
|
+
}
|
|
57
|
+
}
|
|
58
|
+
// Get or create engine instance
|
|
59
|
+
let engine = this.engines.get(engineName);
|
|
60
|
+
if (!engine) {
|
|
61
|
+
const config = ENGINES.find(e => e.name === engineName);
|
|
62
|
+
if (!config)
|
|
63
|
+
throw new Error(`Unknown engine: ${engineName}`);
|
|
64
|
+
engine = config.create();
|
|
65
|
+
this.engines.set(engineName, engine);
|
|
66
|
+
}
|
|
67
|
+
// Fetch
|
|
68
|
+
const result = await engine.fetch(url, options);
|
|
69
|
+
// Detect blocking
|
|
70
|
+
const blockCheck = isBlocked(result.html, result.statusCode, url);
|
|
71
|
+
return {
|
|
72
|
+
html: result.html,
|
|
73
|
+
statusCode: result.statusCode,
|
|
74
|
+
engine: engineName,
|
|
75
|
+
blocked: blockCheck.blocked,
|
|
76
|
+
blockReason: blockCheck.reason,
|
|
77
|
+
finalUrl: result.finalUrl,
|
|
78
|
+
};
|
|
79
|
+
}
|
|
80
|
+
async ensureBrowserInstalled() {
|
|
81
|
+
if (await isPlaywrightInstalled()) {
|
|
82
|
+
return true;
|
|
83
|
+
}
|
|
84
|
+
// Only show install prompt once
|
|
85
|
+
if (!this.browserInstallPromise) {
|
|
86
|
+
this.browserInstallPromise = this.installBrowser();
|
|
87
|
+
}
|
|
88
|
+
return this.browserInstallPromise;
|
|
89
|
+
}
|
|
90
|
+
async installBrowser() {
|
|
91
|
+
log.info('\n Protected site detected. Installing browser...');
|
|
92
|
+
try {
|
|
93
|
+
const { execSync } = await import('child_process');
|
|
94
|
+
execSync('npx playwright install chromium', { stdio: 'inherit' });
|
|
95
|
+
log.success(' Browser installed successfully.\n');
|
|
96
|
+
return true;
|
|
97
|
+
}
|
|
98
|
+
catch (error) {
|
|
99
|
+
log.error(' Failed to install browser. Some sites may not work.');
|
|
100
|
+
return false;
|
|
101
|
+
}
|
|
102
|
+
}
|
|
103
|
+
getStats() {
|
|
104
|
+
return { ...this.stats };
|
|
105
|
+
}
|
|
106
|
+
async close() {
|
|
107
|
+
for (const engine of this.engines.values()) {
|
|
108
|
+
if ('close' in engine && typeof engine.close === 'function') {
|
|
109
|
+
await engine.close();
|
|
110
|
+
}
|
|
111
|
+
}
|
|
112
|
+
this.engines.clear();
|
|
113
|
+
}
|
|
114
|
+
}
|
|
115
|
+
export { isBlocked, needsBrowser } from './detector.js';
|
|
116
|
+
//# sourceMappingURL=index.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../../src/engine/index.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAEH,OAAO,EAAE,WAAW,EAAE,MAAM,YAAY,CAAC;AACzC,OAAO,EAAE,gBAAgB,EAAE,qBAAqB,EAAE,MAAM,iBAAiB,CAAC;AAC1E,OAAO,EAAE,eAAe,EAAE,MAAM,gBAAgB,CAAC;AACjD,OAAO,EAAE,SAAS,EAAE,YAAY,EAAoB,MAAM,eAAe,CAAC;AAC1E,OAAO,EAAE,YAAY,EAAE,MAAM,oBAAoB,CAAC;AAyBlD,MAAM,GAAG,GAAG,YAAY,EAAE,CAAC;AAU3B,wBAAwB;AACxB,MAAM,OAAO,GAAmB;IAC9B,EAAE,IAAI,EAAE,OAAO,EAAE,OAAO,EAAE,IAAI,EAAE,MAAM,EAAE,GAAG,EAAE,CAAC,IAAI,WAAW,EAAE,EAAE;IACjE,EAAE,IAAI,EAAE,YAAY,EAAE,OAAO,EAAE,KAAK,EAAE,MAAM,EAAE,GAAG,EAAE,CAAC,IAAI,gBAAgB,EAAE,EAAE;IAC5E,EAAE,IAAI,EAAE,WAAW,EAAE,OAAO,EAAE,KAAK,EAAE,MAAM,EAAE,GAAG,EAAE,CAAC,IAAI,eAAe,EAAE,EAAE;CAC3E,CAAC;AAEF,MAAM,OAAO,eAAe;IAClB,OAAO,GAAkE,IAAI,GAAG,EAAE,CAAC;IACnF,KAAK,GAAgB,EAAE,KAAK,EAAE,CAAC,EAAE,UAAU,EAAE,CAAC,EAAE,SAAS,EAAE,CAAC,EAAE,OAAO,EAAE,CAAC,EAAE,CAAC;IAC3E,qBAAqB,GAA4B,IAAI,CAAC;IAE9D,KAAK,CAAC,KAAK,CAAC,GAAW,EAAE,UAAyB,EAAE;QAClD,qCAAqC;QACrC,IAAI,OAAO,CAAC,WAAW,EAAE,CAAC;YACxB,OAAO,IAAI,CAAC,eAAe,CAAC,GAAG,EAAE,OAAO,CAAC,WAAW,EAAE,OAAO,CAAC,CAAC;QACjE,CAAC;QAED,uCAAuC;QACvC,MAAM,UAAU,GAAG,YAAY,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;QAC7C,MAAM,YAAY,GAAG,OAAO,CAAC,KAAK,CAAC,UAAU,CAAC,CAAC;QAE/C,qCAAqC;QACrC,KAAK,MAAM,YAAY,IAAI,YAAY,EAAE,CAAC;YACxC,IAAI,CAAC;gBACH,MAAM,MAAM,GAAG,MAAM,IAAI,CAAC,eAAe,CAAC,GAAG,EAAE,YAAY,CAAC,IAAI,EAAE;oBAChE,GAAG,OAAO;oBACV,OAAO,EAAE,OAAO,CAAC,OAAO,IAAI,YAAY,CAAC,OAAO;iBACjD,CAAC,CAAC;gBAEH,kCAAkC;gBAClC,IAAI,CAAC,MAAM,CAAC,OAAO,EAAE,CAAC;oBACpB,IAAI,CAAC,KAAK,CAAC,YAAY,CAAC,IAAI,CAAC,EAAE,CAAC;oBAChC,OAAO,MAAM,CAAC;gBAChB,CAAC;gBAED,GAAG,CAAC,GAAG,CAAC,KAAK,YAAY,CAAC,IAAI,aAAa,MAAM,CAAC,WAAW,EAAE,CAAC,CAAC;YACnE,CAAC;YAAC,OAAO,KAAU,EAAE,CAAC;gBACpB,GAAG,CAAC,GAAG,CAAC,KAAK,YAAY,CAAC,IAAI,YAAY,KAAK,CAAC,OAAO,EAAE,CAAC,CAAC;YAC7D,CAAC;QACH,CAAC;QAED,qBAAqB;QACrB,IAAI,CAAC,KAAK,CAAC,OAAO,EAAE,CAAC;QACrB,MAAM,IAAI,KAAK,CAAC,0BAA0B,GAAG,EAAE,CAAC,CAAC;IACnD,CAAC;IAEO,KAAK,CAAC,eAAe,CAC3B,GAAW,EACX,UAAsB,EACtB,OAAsB;QAEtB,4BAA4B;QAC5B,IAAI,UAAU,KAAK,YAAY,IAAI,UAAU,KAAK,WAAW,EAAE,CAAC;YAC9D,IAAI,CAAC,MAAM,IAAI,CAAC,sBAAsB,EAAE,EAAE,CAAC;gBACzC,MAAM,IAAI,KAAK,CAAC,uBAAuB,CAAC,CAAC;YAC3C,CAAC;QACH,CAAC;QAED,gCAAgC;QAChC,IAAI,MAAM,GAAG,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,UAAU,CAAC,CAAC;QAC1C,IAAI,CAAC,MAAM,EAAE,CAAC;YACZ,MAAM,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,KAAK,UAAU,CAAC,CAAC;YACxD,IAAI,CAAC,MAAM;gBAAE,MAAM,IAAI,KAAK,CAAC,mBAAmB,UAAU,EAAE,CAAC,CAAC;YAC9D,MAAM,GAAG,MAAM,CAAC,MAAM,EAAE,CAAC;YACzB,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,UAAU,EAAE,MAAM,CAAC,CAAC;QACvC,CAAC;QAED,QAAQ;QACR,MAAM,MAAM,GAAG,MAAM,MAAM,CAAC,KAAK,CAAC,GAAG,EAAE,OAAO,CAAC,CAAC;QAEhD,kBAAkB;QAClB,MAAM,UAAU,GAAG,SAAS,CAAC,MAAM,CAAC,IAAI,EAAE,MAAM,CAAC,UAAU,EAAE,GAAG,CAAC,CAAC;QAElE,OAAO;YACL,IAAI,EAAE,MAAM,CAAC,IAAI;YACjB,UAAU,EAAE,MAAM,CAAC,UAAU;YAC7B,MAAM,EAAE,UAAU;YAClB,OAAO,EAAE,UAAU,CAAC,OAAO;YAC3B,WAAW,EAAE,UAAU,CAAC,MAAM;YAC9B,QAAQ,EAAE,MAAM,CAAC,QAAQ;SAC1B,CAAC;IACJ,CAAC;IAEO,KAAK,CAAC,sBAAsB;QAClC,IAAI,MAAM,qBAAqB,EAAE,EAAE,CAAC;YAClC,OAAO,IAAI,CAAC;QACd,CAAC;QAED,gCAAgC;QAChC,IAAI,CAAC,IAAI,CAAC,qBAAqB,EAAE,CAAC;YAChC,IAAI,CAAC,qBAAqB,GAAG,IAAI,CAAC,cAAc,EAAE,CAAC;QACrD,CAAC;QAED,OAAO,IAAI,CAAC,qBAAqB,CAAC;IACpC,CAAC;IAEO,KAAK,CAAC,cAAc;QAC1B,GAAG,CAAC,IAAI,CAAC,oDAAoD,CAAC,CAAC;QAE/D,IAAI,CAAC;YACH,MAAM,EAAE,QAAQ,EAAE,GAAG,MAAM,MAAM,CAAC,eAAe,CAAC,CAAC;YACnD,QAAQ,CAAC,iCAAiC,EAAE,EAAE,KAAK,EAAE,SAAS,EAAE,CAAC,CAAC;YAClE,GAAG,CAAC,OAAO,CAAC,qCAAqC,CAAC,CAAC;YACnD,OAAO,IAAI,CAAC;QACd,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,GAAG,CAAC,KAAK,CAAC,uDAAuD,CAAC,CAAC;YACnE,OAAO,KAAK,CAAC;QACf,CAAC;IACH,CAAC;IAED,QAAQ;QACN,OAAO,EAAE,GAAG,IAAI,CAAC,KAAK,EAAE,CAAC;IAC3B,CAAC;IAED,KAAK,CAAC,KAAK;QACT,KAAK,MAAM,MAAM,IAAI,IAAI,CAAC,OAAO,CAAC,MAAM,EAAE,EAAE,CAAC;YAC3C,IAAI,OAAO,IAAI,MAAM,IAAI,OAAO,MAAM,CAAC,KAAK,KAAK,UAAU,EAAE,CAAC;gBAC5D,MAAM,MAAM,CAAC,KAAK,EAAE,CAAC;YACvB,CAAC;QACH,CAAC;QACD,IAAI,CAAC,OAAO,CAAC,KAAK,EAAE,CAAC;IACvB,CAAC;CACF;AAED,OAAO,EAAE,SAAS,EAAE,YAAY,EAAE,MAAM,eAAe,CAAC"}
|