@tyroneross/blog-scraper 0.1.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (71) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +254 -279
  3. package/dist/lib/circuit-breaker.d.ts +29 -0
  4. package/dist/lib/circuit-breaker.d.ts.map +1 -0
  5. package/dist/lib/circuit-breaker.js +89 -0
  6. package/dist/lib/circuit-breaker.js.map +1 -0
  7. package/dist/lib/content-extractor.d.ts +13 -0
  8. package/dist/lib/content-extractor.d.ts.map +1 -0
  9. package/dist/lib/content-extractor.js +75 -0
  10. package/dist/lib/content-extractor.js.map +1 -0
  11. package/dist/lib/formatters/html-to-markdown.d.ts +21 -0
  12. package/dist/lib/formatters/html-to-markdown.d.ts.map +1 -0
  13. package/dist/lib/formatters/html-to-markdown.js +146 -0
  14. package/dist/lib/formatters/html-to-markdown.js.map +1 -0
  15. package/dist/lib/formatters/text-cleaner.d.ts +44 -0
  16. package/dist/lib/formatters/text-cleaner.d.ts.map +1 -0
  17. package/dist/lib/formatters/text-cleaner.js +143 -0
  18. package/dist/lib/formatters/text-cleaner.js.map +1 -0
  19. package/dist/lib/index.d.ts +96 -0
  20. package/dist/lib/index.d.ts.map +1 -0
  21. package/dist/lib/index.js +184 -0
  22. package/dist/lib/index.js.map +1 -0
  23. package/dist/lib/quality-scorer.d.ts +83 -0
  24. package/dist/lib/quality-scorer.d.ts.map +1 -0
  25. package/dist/lib/quality-scorer.js +376 -0
  26. package/dist/lib/quality-scorer.js.map +1 -0
  27. package/dist/lib/rss-utils.d.ts +31 -0
  28. package/dist/lib/rss-utils.d.ts.map +1 -0
  29. package/dist/lib/rss-utils.js +175 -0
  30. package/dist/lib/rss-utils.js.map +1 -0
  31. package/dist/lib/scraping-rate-limiter.d.ts +52 -0
  32. package/dist/lib/scraping-rate-limiter.d.ts.map +1 -0
  33. package/dist/lib/scraping-rate-limiter.js +238 -0
  34. package/dist/lib/scraping-rate-limiter.js.map +1 -0
  35. package/dist/lib/source-orchestrator.d.ts +306 -0
  36. package/dist/lib/source-orchestrator.d.ts.map +1 -0
  37. package/dist/lib/source-orchestrator.js +840 -0
  38. package/dist/lib/source-orchestrator.js.map +1 -0
  39. package/dist/lib/types.d.ts +143 -0
  40. package/dist/lib/types.d.ts.map +1 -0
  41. package/dist/lib/types.js +7 -0
  42. package/dist/lib/types.js.map +1 -0
  43. package/dist/lib/web-scrapers/content-extractor.d.ts +62 -0
  44. package/dist/lib/web-scrapers/content-extractor.d.ts.map +1 -0
  45. package/dist/lib/web-scrapers/content-extractor.js +531 -0
  46. package/dist/lib/web-scrapers/content-extractor.js.map +1 -0
  47. package/dist/lib/web-scrapers/html-scraper.d.ts +74 -0
  48. package/dist/lib/web-scrapers/html-scraper.d.ts.map +1 -0
  49. package/dist/lib/web-scrapers/html-scraper.js +598 -0
  50. package/dist/lib/web-scrapers/html-scraper.js.map +1 -0
  51. package/dist/lib/web-scrapers/playwright-scraper.d.ts +57 -0
  52. package/dist/lib/web-scrapers/playwright-scraper.d.ts.map +1 -0
  53. package/dist/lib/web-scrapers/playwright-scraper.js +355 -0
  54. package/dist/lib/web-scrapers/playwright-scraper.js.map +1 -0
  55. package/dist/lib/web-scrapers/robots-checker.d.ts +42 -0
  56. package/dist/lib/web-scrapers/robots-checker.d.ts.map +1 -0
  57. package/dist/lib/web-scrapers/robots-checker.js +285 -0
  58. package/dist/lib/web-scrapers/robots-checker.js.map +1 -0
  59. package/dist/lib/web-scrapers/rss-discovery.d.ts +62 -0
  60. package/dist/lib/web-scrapers/rss-discovery.d.ts.map +1 -0
  61. package/dist/lib/web-scrapers/rss-discovery.js +384 -0
  62. package/dist/lib/web-scrapers/rss-discovery.js.map +1 -0
  63. package/dist/lib/web-scrapers/sitemap-parser.d.ts +65 -0
  64. package/dist/lib/web-scrapers/sitemap-parser.d.ts.map +1 -0
  65. package/dist/lib/web-scrapers/sitemap-parser.js +430 -0
  66. package/dist/lib/web-scrapers/sitemap-parser.js.map +1 -0
  67. package/package.json +54 -33
  68. package/dist/index.d.mts +0 -949
  69. package/dist/index.d.ts +0 -949
  70. package/dist/index.js +0 -3236
  71. package/dist/index.mjs +0 -3165
@@ -0,0 +1,598 @@
1
+ "use strict";
2
+ var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
3
+ if (k2 === undefined) k2 = k;
4
+ var desc = Object.getOwnPropertyDescriptor(m, k);
5
+ if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
6
+ desc = { enumerable: true, get: function() { return m[k]; } };
7
+ }
8
+ Object.defineProperty(o, k2, desc);
9
+ }) : (function(o, m, k, k2) {
10
+ if (k2 === undefined) k2 = k;
11
+ o[k2] = m[k];
12
+ }));
13
+ var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
14
+ Object.defineProperty(o, "default", { enumerable: true, value: v });
15
+ }) : function(o, v) {
16
+ o["default"] = v;
17
+ });
18
+ var __importStar = (this && this.__importStar) || (function () {
19
+ var ownKeys = function(o) {
20
+ ownKeys = Object.getOwnPropertyNames || function (o) {
21
+ var ar = [];
22
+ for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
23
+ return ar;
24
+ };
25
+ return ownKeys(o);
26
+ };
27
+ return function (mod) {
28
+ if (mod && mod.__esModule) return mod;
29
+ var result = {};
30
+ if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
31
+ __setModuleDefault(result, mod);
32
+ return result;
33
+ };
34
+ })();
35
+ Object.defineProperty(exports, "__esModule", { value: true });
36
+ exports.globalHTMLScraper = exports.HTMLScraper = void 0;
37
+ const cheerio = __importStar(require("cheerio"));
38
+ const scraping_rate_limiter_1 = require("../scraping-rate-limiter");
39
+ const robots_checker_1 = require("./robots-checker");
40
+ // Optional Perplexity integration - users need to provide their own API key
41
+ // Set PERPLEXITY_API_KEY environment variable to enable
42
+ const PERPLEXITY_MODELS = {
43
+ SONAR: 'llama-3.1-sonar-small-128k-online',
44
+ SONAR_PRO: 'llama-3.1-sonar-large-128k-online'
45
+ };
46
+ class HTMLScraper {
47
+ constructor() {
48
+ this.userAgent = 'Mozilla/5.0 (compatible; AtomizeNews/1.0; +https://atomize-news.vercel.app)';
49
+ this.timeout = 10000; // 10 seconds
50
+ this.defaultConfig = {
51
+ selectors: {
52
+ articleLinks: [
53
+ 'article a[href]',
54
+ '.article a[href]',
55
+ '.post a[href]',
56
+ '.story a[href]',
57
+ '.news-item a[href]',
58
+ '.content-item a[href]',
59
+ 'h1 a[href]',
60
+ 'h2 a[href]',
61
+ 'h3 a[href]',
62
+ '.headline a[href]',
63
+ '.title a[href]'
64
+ ],
65
+ titleSelectors: [
66
+ 'h1',
67
+ 'h2',
68
+ 'h3',
69
+ '.headline',
70
+ '.title',
71
+ '.article-title',
72
+ '.post-title',
73
+ '.story-title'
74
+ ],
75
+ dateSelectors: [
76
+ 'time[datetime]',
77
+ '.date',
78
+ '.published',
79
+ '.timestamp',
80
+ '.publish-date',
81
+ '.article-date'
82
+ ],
83
+ excludeSelectors: [
84
+ '.advertisement',
85
+ '.ads',
86
+ '.sidebar',
87
+ '.footer',
88
+ '.navigation',
89
+ '.menu',
90
+ '.comments',
91
+ '.related'
92
+ ]
93
+ },
94
+ filters: {
95
+ minTitleLength: 10,
96
+ maxTitleLength: 200,
97
+ includePatterns: [
98
+ /\/article\//i,
99
+ /\/post\//i,
100
+ /\/story\//i,
101
+ /\/news\//i,
102
+ /\/blog\//i,
103
+ /\/\d{4}\/\d{2}\/\d{2}\//, // Date patterns
104
+ /\/\d{4}\/\d{2}\//
105
+ ],
106
+ excludePatterns: [
107
+ /\/(tag|category|author|search|archive)\//i,
108
+ /\/(login|register|contact|about)\//i,
109
+ /\.(pdf|jpg|jpeg|png|gif|mp4|zip|doc)$/i,
110
+ /#/, // Skip hash links
111
+ /javascript:/i,
112
+ /mailto:/i
113
+ ]
114
+ },
115
+ limits: {
116
+ maxLinksPerPage: 100,
117
+ maxDepth: 3
118
+ }
119
+ };
120
+ }
121
+ /**
122
+ * Extract article links from a webpage
123
+ */
124
+ async extractArticleLinks(url, config = {}) {
125
+ console.log(`📰 [HTMLScraper] Starting to extract articles from ${url}`);
126
+ try {
127
+ // Check robots.txt compliance
128
+ const robotsCheck = await robots_checker_1.globalRobotsChecker.isAllowed(url);
129
+ if (!robotsCheck.allowed) {
130
+ console.warn(`🤖 [HTMLScraper] URL blocked by robots.txt: ${url} - ${robotsCheck.reason}`);
131
+ // Try Perplexity fallback if enabled for robots.txt blocks
132
+ if (config.perplexityFallback?.enabled && config.perplexityFallback?.useForRobotsBlocked) {
133
+ console.log(`🔄 [HTMLScraper] Attempting Perplexity fallback for robots-blocked URL`);
134
+ return await this.extractWithPerplexity(url, config);
135
+ }
136
+ return [];
137
+ }
138
+ const html = await this.fetchPage(url);
139
+ if (!html) {
140
+ // Try Perplexity fallback if fetch failed
141
+ if (config.perplexityFallback?.enabled && config.perplexityFallback?.useForParseFailed) {
142
+ console.log(`🔄 [HTMLScraper] Attempting Perplexity fallback for failed fetch`);
143
+ return await this.extractWithPerplexity(url, config);
144
+ }
145
+ return [];
146
+ }
147
+ const mergedConfig = this.mergeConfig(this.defaultConfig, config);
148
+ const articles = this.parseArticleLinks(html, url, mergedConfig);
149
+ // If no articles found and Perplexity fallback is enabled
150
+ if (articles.length === 0 && config.perplexityFallback?.enabled && config.perplexityFallback?.useForParseFailed) {
151
+ console.log(`🔄 [HTMLScraper] No articles found, attempting Perplexity fallback`);
152
+ return await this.extractWithPerplexity(url, config);
153
+ }
154
+ console.log(`📰 [HTMLScraper] Extracted ${articles.length} article links from ${url}`);
155
+ return articles;
156
+ }
157
+ catch (error) {
158
+ console.error(`❌ [HTMLScraper] Error extracting articles from ${url}:`, error);
159
+ // Try Perplexity fallback on error
160
+ if (config.perplexityFallback?.enabled) {
161
+ console.log(`🔄 [HTMLScraper] Attempting Perplexity fallback after error`);
162
+ return await this.extractWithPerplexity(url, config);
163
+ }
164
+ return [];
165
+ }
166
+ }
167
+ /**
168
+ * Extract articles from multiple pages with pagination support
169
+ */
170
+ async extractFromMultiplePages(startUrl, config = {}, options = {}) {
171
+ const maxPages = options.maxPages || 5;
172
+ const allArticles = [];
173
+ const visitedUrls = new Set();
174
+ const urlsToVisit = [startUrl];
175
+ let pageCount = 0;
176
+ while (urlsToVisit.length > 0 && pageCount < maxPages) {
177
+ const currentUrl = urlsToVisit.shift();
178
+ if (visitedUrls.has(currentUrl)) {
179
+ continue;
180
+ }
181
+ visitedUrls.add(currentUrl);
182
+ pageCount++;
183
+ console.log(`📰 [HTMLScraper] Processing page ${pageCount}/${maxPages}: ${currentUrl}`);
184
+ try {
185
+ const articles = await this.extractArticleLinks(currentUrl, config);
186
+ allArticles.push(...articles);
187
+ // Look for next page links if we haven't hit the limit
188
+ if (pageCount < maxPages) {
189
+ const nextPageUrls = await this.findNextPageUrls(currentUrl, options);
190
+ for (const nextUrl of nextPageUrls) {
191
+ if (!visitedUrls.has(nextUrl)) {
192
+ urlsToVisit.push(nextUrl);
193
+ }
194
+ }
195
+ }
196
+ }
197
+ catch (error) {
198
+ console.warn(`⚠️ [HTMLScraper] Error processing page ${currentUrl}:`, error);
199
+ continue;
200
+ }
201
+ }
202
+ // Remove duplicates and sort by confidence
203
+ const uniqueArticles = this.deduplicateArticles(allArticles);
204
+ uniqueArticles.sort((a, b) => b.confidence - a.confidence);
205
+ console.log(`📰 [HTMLScraper] Total extracted ${uniqueArticles.length} unique articles from ${pageCount} pages`);
206
+ return uniqueArticles;
207
+ }
208
+ async fetchPage(url) {
209
+ try {
210
+ return await scraping_rate_limiter_1.globalRateLimiter.execute(url, async () => {
211
+ const controller = new AbortController();
212
+ const timeoutId = setTimeout(() => controller.abort(), this.timeout);
213
+ try {
214
+ const response = await fetch(url, {
215
+ headers: {
216
+ 'User-Agent': this.userAgent,
217
+ 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8'
218
+ },
219
+ signal: controller.signal,
220
+ });
221
+ clearTimeout(timeoutId);
222
+ if (!response.ok) {
223
+ throw new Error(`HTTP ${response.status}: ${response.statusText}`);
224
+ }
225
+ const contentType = response.headers.get('content-type') || '';
226
+ if (!contentType.includes('text/html')) {
227
+ throw new Error(`Not HTML content: ${contentType}`);
228
+ }
229
+ return await response.text();
230
+ }
231
+ catch (error) {
232
+ clearTimeout(timeoutId);
233
+ throw error;
234
+ }
235
+ });
236
+ }
237
+ catch (error) {
238
+ console.error(`❌ [HTMLScraper] Error fetching page ${url}:`, error);
239
+ return null;
240
+ }
241
+ }
242
+ parseArticleLinks(html, baseUrl, config) {
243
+ const articles = [];
244
+ try {
245
+ const $ = cheerio.load(html);
246
+ const seenUrls = new Set();
247
+ // Remove excluded sections first
248
+ config.selectors?.excludeSelectors?.forEach(selector => {
249
+ $(selector).remove();
250
+ });
251
+ // Extract links using configured selectors
252
+ config.selectors?.articleLinks?.forEach(selector => {
253
+ $(selector).each((_, element) => {
254
+ const $link = $(element);
255
+ const href = $link.attr('href');
256
+ if (!href)
257
+ return;
258
+ const absoluteUrl = this.resolveUrl(href, baseUrl);
259
+ if (!absoluteUrl || seenUrls.has(absoluteUrl)) {
260
+ return;
261
+ }
262
+ // Apply filters
263
+ if (!this.passesFilters(absoluteUrl, config.filters)) {
264
+ return;
265
+ }
266
+ seenUrls.add(absoluteUrl);
267
+ // Extract article information
268
+ const article = this.extractArticleInfo($link, $, absoluteUrl);
269
+ if (article && articles.length < (config.limits?.maxLinksPerPage || 100)) {
270
+ articles.push(article);
271
+ }
272
+ });
273
+ });
274
+ // Look for structured data (JSON-LD, microdata)
275
+ const structuredArticles = this.extractStructuredData($, baseUrl);
276
+ structuredArticles.forEach(article => {
277
+ if (!seenUrls.has(article.url)) {
278
+ seenUrls.add(article.url);
279
+ articles.push(article);
280
+ }
281
+ });
282
+ }
283
+ catch (error) {
284
+ console.error(`❌ [HTMLScraper] Error parsing HTML:`, error);
285
+ }
286
+ return articles;
287
+ }
288
+ extractArticleInfo($link, $, url) {
289
+ let title = $link.text().trim();
290
+ let confidence = 0.5;
291
+ let publishedDate;
292
+ let description;
293
+ // Try to find better title from parent elements
294
+ if (!title || title.length < 5) {
295
+ const $parent = $link.closest('article, .article, .post, .story, .news-item');
296
+ if ($parent.length > 0) {
297
+ const betterTitle = $parent.find('h1, h2, h3, .headline, .title').first().text().trim();
298
+ if (betterTitle && betterTitle.length > title.length) {
299
+ title = betterTitle;
300
+ confidence += 0.2;
301
+ }
302
+ }
303
+ }
304
+ // Extract date information
305
+ const $dateElement = $link.closest('article, .article, .post').find('time[datetime], .date, .published').first();
306
+ if ($dateElement.length > 0) {
307
+ const dateText = $dateElement.attr('datetime') || $dateElement.text().trim();
308
+ if (dateText) {
309
+ const date = this.parseDate(dateText);
310
+ if (date) {
311
+ publishedDate = date;
312
+ confidence += 0.1;
313
+ }
314
+ }
315
+ }
316
+ // Extract description
317
+ const $parent = $link.closest('article, .article, .post, .story');
318
+ if ($parent.length > 0) {
319
+ description = $parent.find('.excerpt, .summary, p').first().text().trim();
320
+ if (description && description.length > 50) {
321
+ description = description.substring(0, 300) + '...';
322
+ confidence += 0.1;
323
+ }
324
+ }
325
+ // Boost confidence based on URL patterns
326
+ if (this.isLikelyArticleUrl(url)) {
327
+ confidence += 0.2;
328
+ }
329
+ // Boost confidence based on title quality
330
+ if (title && title.length >= 20 && title.length <= 120) {
331
+ confidence += 0.1;
332
+ }
333
+ if (!title || title.length < 10) {
334
+ return null;
335
+ }
336
+ return {
337
+ url,
338
+ title,
339
+ publishedDate,
340
+ description,
341
+ confidence: Math.min(confidence, 1.0),
342
+ source: 'link-text'
343
+ };
344
+ }
345
+ extractStructuredData($, baseUrl) {
346
+ const articles = [];
347
+ // Extract JSON-LD structured data
348
+ $('script[type="application/ld+json"]').each((_, element) => {
349
+ try {
350
+ const jsonText = $(element).html();
351
+ if (!jsonText)
352
+ return;
353
+ const data = JSON.parse(jsonText);
354
+ const items = Array.isArray(data) ? data : [data];
355
+ for (const item of items) {
356
+ if (item['@type'] === 'Article' || item['@type'] === 'NewsArticle') {
357
+ const url = item.url || item.mainEntityOfPage?.['@id'];
358
+ if (url) {
359
+ const absoluteUrl = this.resolveUrl(url, baseUrl);
360
+ if (absoluteUrl) {
361
+ articles.push({
362
+ url: absoluteUrl,
363
+ title: item.headline || item.name,
364
+ publishedDate: item.datePublished ? new Date(item.datePublished) : undefined,
365
+ description: item.description,
366
+ confidence: 0.9,
367
+ source: 'structured-data'
368
+ });
369
+ }
370
+ }
371
+ }
372
+ }
373
+ }
374
+ catch (error) {
375
+ // Skip malformed JSON-LD
376
+ }
377
+ });
378
+ return articles;
379
+ }
380
+ async findNextPageUrls(currentUrl, options) {
381
+ try {
382
+ const html = await this.fetchPage(currentUrl);
383
+ if (!html)
384
+ return [];
385
+ const $ = cheerio.load(html);
386
+ const nextUrls = [];
387
+ // Look for pagination links
388
+ const paginationSelector = options.paginationSelector ||
389
+ 'a[rel="next"], .pagination a, .next a, .pager a, [class*="next"] a';
390
+ $(paginationSelector).each((_, element) => {
391
+ const $link = $(element);
392
+ const href = $link.attr('href');
393
+ const text = $link.text().toLowerCase().trim();
394
+ if (href && (text.includes('next') || text.includes('→') || text === '>')) {
395
+ const absoluteUrl = this.resolveUrl(href, currentUrl);
396
+ if (absoluteUrl) {
397
+ nextUrls.push(absoluteUrl);
398
+ }
399
+ }
400
+ });
401
+ return Array.from(new Set(nextUrls)); // Remove duplicates
402
+ }
403
+ catch (error) {
404
+ console.warn(`⚠️ [HTMLScraper] Error finding next page URLs:`, error);
405
+ return [];
406
+ }
407
+ }
408
+ deduplicateArticles(articles) {
409
+ const seen = new Map();
410
+ for (const article of articles) {
411
+ const existing = seen.get(article.url);
412
+ if (!existing || article.confidence > existing.confidence) {
413
+ seen.set(article.url, article);
414
+ }
415
+ }
416
+ return Array.from(seen.values());
417
+ }
418
+ passesFilters(url, filters) {
419
+ if (!filters)
420
+ return true;
421
+ const urlLower = url.toLowerCase();
422
+ // Check exclude patterns first
423
+ if (filters.excludePatterns?.some(pattern => pattern.test(url))) {
424
+ return false;
425
+ }
426
+ // Check include patterns
427
+ if (filters.includePatterns?.length &&
428
+ !filters.includePatterns.some(pattern => pattern.test(url))) {
429
+ return false;
430
+ }
431
+ // Check allowed domains
432
+ if (filters.allowedDomains?.length) {
433
+ try {
434
+ const urlObj = new URL(url);
435
+ const domain = urlObj.hostname.toLowerCase();
436
+ if (!filters.allowedDomains.some(allowed => domain === allowed.toLowerCase() || domain.endsWith('.' + allowed.toLowerCase()))) {
437
+ return false;
438
+ }
439
+ }
440
+ catch {
441
+ return false;
442
+ }
443
+ }
444
+ return true;
445
+ }
446
+ isLikelyArticleUrl(url) {
447
+ const urlLower = url.toLowerCase();
448
+ const articlePatterns = [
449
+ /\/article[s]?\//,
450
+ /\/post[s]?\//,
451
+ /\/story\//,
452
+ /\/stories\//,
453
+ /\/news\//,
454
+ /\/blog\//,
455
+ /\/\d{4}\/\d{2}\/\d{2}\//, // Date-based URLs
456
+ /\/\d{4}\/\d{2}\//
457
+ ];
458
+ return articlePatterns.some(pattern => pattern.test(urlLower));
459
+ }
460
+ parseDate(dateString) {
461
+ try {
462
+ const date = new Date(dateString);
463
+ if (isNaN(date.getTime())) {
464
+ // Try common date formats
465
+ const formats = [
466
+ /(\d{4})-(\d{2})-(\d{2})/, // YYYY-MM-DD
467
+ /(\d{2})\/(\d{2})\/(\d{4})/, // MM/DD/YYYY
468
+ /(\d{2})\.(\d{2})\.(\d{4})/ // DD.MM.YYYY
469
+ ];
470
+ for (const format of formats) {
471
+ const match = dateString.match(format);
472
+ if (match) {
473
+ const [, p1, p2, p3] = match;
474
+ // Assume first format is YYYY-MM-DD
475
+ const testDate = new Date(`${p1}-${p2}-${p3}`);
476
+ if (!isNaN(testDate.getTime())) {
477
+ return testDate;
478
+ }
479
+ }
480
+ }
481
+ return null;
482
+ }
483
+ return date;
484
+ }
485
+ catch {
486
+ return null;
487
+ }
488
+ }
489
+ resolveUrl(url, baseUrl) {
490
+ try {
491
+ return new URL(url, baseUrl).toString();
492
+ }
493
+ catch {
494
+ return null;
495
+ }
496
+ }
497
+ mergeConfig(defaultConfig, userConfig) {
498
+ return {
499
+ selectors: {
500
+ ...defaultConfig.selectors,
501
+ ...userConfig.selectors,
502
+ articleLinks: [
503
+ ...(defaultConfig.selectors?.articleLinks || []),
504
+ ...(userConfig.selectors?.articleLinks || [])
505
+ ]
506
+ },
507
+ filters: {
508
+ ...defaultConfig.filters,
509
+ ...userConfig.filters,
510
+ includePatterns: [
511
+ ...(defaultConfig.filters?.includePatterns || []),
512
+ ...(userConfig.filters?.includePatterns || [])
513
+ ],
514
+ excludePatterns: [
515
+ ...(defaultConfig.filters?.excludePatterns || []),
516
+ ...(userConfig.filters?.excludePatterns || [])
517
+ ]
518
+ },
519
+ limits: {
520
+ ...defaultConfig.limits,
521
+ ...userConfig.limits
522
+ },
523
+ perplexityFallback: {
524
+ ...defaultConfig.perplexityFallback,
525
+ ...userConfig.perplexityFallback
526
+ }
527
+ };
528
+ }
529
+ /**
530
+ * Use Perplexity API to extract articles when traditional scraping fails
531
+ * Requires PERPLEXITY_API_KEY environment variable to be set
532
+ */
533
+ async extractWithPerplexity(url, config) {
534
+ try {
535
+ // Check if Perplexity API key is available
536
+ if (!process.env.PERPLEXITY_API_KEY) {
537
+ console.warn(`⚠️ [HTMLScraper] Perplexity API key not configured - set PERPLEXITY_API_KEY env variable`);
538
+ return [];
539
+ }
540
+ const domain = new URL(url).hostname;
541
+ const query = `Find recent news articles and stories from ${domain}. List article titles and URLs.`;
542
+ console.log(`🔍 [HTMLScraper] Using Perplexity to find articles from ${domain}`);
543
+ // Direct Perplexity API call
544
+ const response = await fetch('https://api.perplexity.ai/chat/completions', {
545
+ method: 'POST',
546
+ headers: {
547
+ 'Content-Type': 'application/json',
548
+ 'Authorization': `Bearer ${process.env.PERPLEXITY_API_KEY}`
549
+ },
550
+ body: JSON.stringify({
551
+ model: config.perplexityFallback?.model || PERPLEXITY_MODELS.SONAR,
552
+ messages: [{ role: 'user', content: query }],
553
+ max_tokens: 1000,
554
+ return_citations: true,
555
+ search_recency_filter: config.perplexityFallback?.searchRecency || 'day'
556
+ })
557
+ });
558
+ if (!response.ok) {
559
+ throw new Error(`Perplexity API error: ${response.status} ${response.statusText}`);
560
+ }
561
+ const data = await response.json();
562
+ const articles = [];
563
+ // Extract from citations if available
564
+ if (data.citations && Array.isArray(data.citations)) {
565
+ for (const citation of data.citations) {
566
+ try {
567
+ const citationUrl = citation;
568
+ const citationDomain = new URL(citationUrl).hostname;
569
+ if (citationDomain === domain || citationDomain.includes(domain.split('.')[0])) {
570
+ articles.push({
571
+ url: citationUrl,
572
+ title: citationUrl.split('/').pop() || domain,
573
+ confidence: 0.7,
574
+ source: 'meta-data'
575
+ });
576
+ }
577
+ }
578
+ catch {
579
+ continue;
580
+ }
581
+ }
582
+ }
583
+ // Apply limits
584
+ const maxLinks = config.limits?.maxLinksPerPage || 100;
585
+ const limitedArticles = articles.slice(0, maxLinks);
586
+ console.log(`✨ [HTMLScraper] Perplexity found ${limitedArticles.length} articles`);
587
+ return limitedArticles;
588
+ }
589
+ catch (error) {
590
+ console.error(`❌ [HTMLScraper] Perplexity fallback failed:`, error);
591
+ return [];
592
+ }
593
+ }
594
+ }
595
+ exports.HTMLScraper = HTMLScraper;
596
+ // Default global instance
597
+ exports.globalHTMLScraper = new HTMLScraper();
598
+ //# sourceMappingURL=html-scraper.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"html-scraper.js","sourceRoot":"","sources":["../../../lib/web-scrapers/html-scraper.ts"],"names":[],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AAAA,iDAAmC;AACnC,oEAA6D;AAC7D,qDAAuD;AAEvD,4EAA4E;AAC5E,wDAAwD;AACxD,MAAM,iBAAiB,GAAG;IACxB,KAAK,EAAE,mCAAmC;IAC1C,SAAS,EAAE,mCAAmC;CACtC,CAAC;AAsCX,MAAa,WAAW;IAAxB;QACmB,cAAS,GAAG,6EAA6E,CAAC;QAC1F,YAAO,GAAG,KAAK,CAAC,CAAC,aAAa;QAC9B,kBAAa,GAAmB;YAC/C,SAAS,EAAE;gBACT,YAAY,EAAE;oBACZ,iBAAiB;oBACjB,kBAAkB;oBAClB,eAAe;oBACf,gBAAgB;oBAChB,oBAAoB;oBACpB,uBAAuB;oBACvB,YAAY;oBACZ,YAAY;oBACZ,YAAY;oBACZ,mBAAmB;oBACnB,gBAAgB;iBACjB;gBACD,cAAc,EAAE;oBACd,IAAI;oBACJ,IAAI;oBACJ,IAAI;oBACJ,WAAW;oBACX,QAAQ;oBACR,gBAAgB;oBAChB,aAAa;oBACb,cAAc;iBACf;gBACD,aAAa,EAAE;oBACb,gBAAgB;oBAChB,OAAO;oBACP,YAAY;oBACZ,YAAY;oBACZ,eAAe;oBACf,eAAe;iBAChB;gBACD,gBAAgB,EAAE;oBAChB,gBAAgB;oBAChB,MAAM;oBACN,UAAU;oBACV,SAAS;oBACT,aAAa;oBACb,OAAO;oBACP,WAAW;oBACX,UAAU;iBACX;aACF;YACD,OAAO,EAAE;gBACP,cAAc,EAAE,EAAE;gBAClB,cAAc,EAAE,GAAG;gBACnB,eAAe,EAAE;oBACf,cAAc;oBACd,WAAW;oBACX,YAAY;oBACZ,WAAW;oBACX,WAAW;oBACX,yBAAyB,EAAG,gBAAgB;oBAC5C,kBAAkB;iBACnB;gBACD,eAAe,EAAE;oBACf,2CAA2C;oBAC3C,qCAAqC;oBACrC,wCAAwC;oBACxC,GAAG,EAAG,kBAAkB;oBACxB,cAAc;oBACd,UAAU;iBACX;aACF;YACD,MAAM,EAAE;gBACN,eAAe,EAAE,GAAG;gBACpB,QAAQ,EAAE,CAAC;aACZ;SACF,CAAC;IAikBJ,CAAC;IA/jBC;;OAEG;IACH,KAAK,CAAC,mBAAmB,CACvB,GAAW,EACX,SAAyB,EAAE;QAE3B,OAAO,CAAC,GAAG,CAAC,sDAAsD,GAAG,EAAE,CAAC,CAAC;QAEzE,IAAI,CAAC;YACH,8BAA8B;YAC9B,MAAM,WAAW,GAAG,MAAM,oCAAmB,CAAC,SAAS,CAAC,GAAG,CAAC,CAAC;YAC7D,IAAI,CAAC,WAAW,CAAC,OAAO,EAAE,CAAC;gBACzB,OAAO,CAAC,IAAI,CAAC,+CAA+C,GAAG,MAAM,WAAW,CAAC,MAAM,EAAE,CAAC,CAAC;gBAE3F,2DAA2D;gBAC3D,IAAI,MAAM,CAAC,kBAAkB,EAAE,OAAO,IAAI,MAAM,CAAC,kBAAkB,EAAE,mBAAmB,EAAE,CAAC;oBACzF,OAAO,CAAC,GAAG,CAAC,wEAAwE,CAAC,CAAC;oBACtF,OAAO,MAAM,IAAI,CAAC,qBAAqB,CAAC,GAAG,EAAE,MAAM,CAAC,CAAC;gBACvD,CAAC;gBAED,OAAO,EAAE,CAAC;YACZ,CAAC;YAED,MAAM,IAAI,GAAG,MAAM,IAAI,CAAC,SAAS,CAAC,GAAG,CAAC,CAAC;YACvC,IAAI,CAAC,IAAI,EAAE,CAAC;gBACV,0CAA0C;gBAC1C,IAAI,MAAM,CAAC,kBAAkB,EAAE,OAAO,IAAI,MAAM,CAAC,kBAAkB,EAAE,iBAAiB,EAAE,CAAC;oBACvF,OAAO,CAAC,GAAG,CAAC,kEAAkE,CAAC,CAAC;oBAChF,OAAO,MAAM,IAAI,CAAC,qBAAqB,CAAC,GAAG,EAAE,MAAM,CAAC,CAAC;gBACvD,CAAC;gBACD,OAAO,EAAE,CAAC;YACZ,CAAC;YAED,MAAM,YAAY,GAAG,IAAI,CAAC,WAAW,CAAC,IAAI,CAAC,aAAa,EAAE,MAAM,CAAC,CAAC;YAClE,MAAM,QAAQ,GAAG,IAAI,CAAC,iBAAiB,CAAC,IAAI,EAAE,GAAG,EAAE,YAAY,CAAC,CAAC;YAEjE,0DAA0D;YAC1D,IAAI,QAAQ,CAAC,MAAM,KAAK,CAAC,IAAI,MAAM,CAAC,kBAAkB,EAAE,OAAO,IAAI,MAAM,CAAC,kBAAkB,EAAE,iBAAiB,EAAE,CAAC;gBAChH,OAAO,CAAC,GAAG,CAAC,oEAAoE,CAAC,CAAC;gBAClF,OAAO,MAAM,IAAI,CAAC,qBAAqB,CAAC,GAAG,EAAE,MAAM,CAAC,CAAC;YACvD,CAAC;YAED,OAAO,CAAC,GAAG,CAAC,8BAA8B,QAAQ,CAAC,MAAM,uBAAuB,GAAG,EAAE,CAAC,CAAC;YACvF,OAAO,QAAQ,CAAC;QAElB,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,OAAO,CAAC,KAAK,CAAC,kDAAkD,GAAG,GAAG,EAAE,KAAK,CAAC,CAAC;YAE/E,mCAAmC;YACnC,IAAI,MAAM,CAAC,kBAAkB,EAAE,OAAO,EAAE,CAAC;gBACvC,OAAO,CAAC,GAAG,CAAC,6DAA6D,CAAC,CAAC;gBAC3E,OAAO,MAAM,IAAI,CAAC,qBAAqB,CAAC,GAAG,EAAE,MAAM,CAAC,CAAC;YACvD,CAAC;YAED,OAAO,EAAE,CAAC;QACZ,CAAC;IACH,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,wBAAwB,CAC5B,QAAgB,EAChB,SAAyB,EAAE,EAC3B,UAII,EAAE;QAEN,MAAM,QAAQ,GAAG,OAAO,CAAC,QAAQ,IAAI,CAAC,CAAC;QACvC,MAAM,WAAW,GAAuB,EAAE,CAAC;QAC3C,MAAM,WAAW,GAAG,IAAI,GAAG,EAAU,CAAC;QACtC,MAAM,WAAW,GAAG,CAAC,QAAQ,CAAC,CAAC;QAE/B,IAAI,SAAS,GAAG,CAAC,CAAC;QAElB,OAAO,WAAW,CAAC,MAAM,GAAG,CAAC,IAAI,SAAS,GAAG,QAAQ,EAAE,CAAC;YACtD,MAAM,UAAU,GAAG,WAAW,CAAC,KAAK,EAAG,CAAC;YAExC,IAAI,WAAW,CAAC,GAAG,CAAC,UAAU,CAAC,EAAE,CAAC;gBAChC,SAAS;YACX,CAAC;YAED,WAAW,CAAC,GAAG,CAAC,UAAU,CAAC,CAAC;YAC5B,SAAS,EAAE,CAAC;YAEZ,OAAO,CAAC,GAAG,CAAC,oCAAoC,SAAS,IAAI,QAAQ,KAAK,UAAU,EAAE,CAAC,CAAC;YAExF,IAAI,CAAC;gBACH,MAAM,QAAQ,GAAG,MAAM,IAAI,CAAC,mBAAmB,CAAC,UAAU,EAAE,MAAM,CAAC,CAAC;gBACpE,WAAW,CAAC,IAAI,CAAC,GAAG,QAAQ,CAAC,CAAC;gBAE9B,uDAAuD;gBACvD,IAAI,SAAS,GAAG,QAAQ,EAAE,CAAC;oBACzB,MAAM,YAAY,GAAG,MAAM,IAAI,CAAC,gBAAgB,CAAC,UAAU,EAAE,OAAO,CAAC,CAAC;oBACtE,KAAK,MAAM,OAAO,IAAI,YAAY,EAAE,CAAC;wBACnC,IAAI,CAAC,WAAW,CAAC,GAAG,CAAC,OAAO,CAAC,EAAE,CAAC;4BAC9B,WAAW,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;wBAC5B,CAAC;oBACH,CAAC;gBACH,CAAC;YAEH,CAAC;YAAC,OAAO,KAAK,EAAE,CAAC;gBACf,OAAO,CAAC,IAAI,CAAC,0CAA0C,UAAU,GAAG,EAAE,KAAK,CAAC,CAAC;gBAC7E,SAAS;YACX,CAAC;QACH,CAAC;QAED,2CAA2C;QAC3C,MAAM,cAAc,GAAG,IAAI,CAAC,mBAAmB,CAAC,WAAW,CAAC,CAAC;QAC7D,cAAc,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,UAAU,GAAG,CAAC,CAAC,UAAU,CAAC,CAAC;QAE3D,OAAO,CAAC,GAAG,CAAC,oCAAoC,cAAc,CAAC,MAAM,yBAAyB,SAAS,QAAQ,CAAC,CAAC;QACjH,OAAO,cAAc,CAAC;IACxB,CAAC;IAEO,KAAK,CAAC,SAAS,CAAC,GAAW;QACjC,IAAI,CAAC;YACH,OAAO,MAAM,yCAAiB,CAAC,OAAO,CAAC,GAAG,EAAE,KAAK,IAAI,EAAE;gBACrD,MAAM,UAAU,GAAG,IAAI,eAAe,EAAE,CAAC;gBACzC,MAAM,SAAS,GAAG,UAAU,CAAC,GAAG,EAAE,CAAC,UAAU,CAAC,KAAK,EAAE,EAAE,IAAI,CAAC,OAAO,CAAC,CAAC;gBAErE,IAAI,CAAC;oBACH,MAAM,QAAQ,GAAG,MAAM,KAAK,CAAC,GAAG,EAAE;wBAChC,OAAO,EAAE;4BACP,YAAY,EAAE,IAAI,CAAC,SAAS;4BAC5B,QAAQ,EAAE,iEAAiE;yBAC5E;wBACD,MAAM,EAAE,UAAU,CAAC,MAAM;qBAC1B,CAAC,CAAC;oBAEH,YAAY,CAAC,SAAS,CAAC,CAAC;oBAExB,IAAI,CAAC,QAAQ,CAAC,EAAE,EAAE,CAAC;wBACjB,MAAM,IAAI,KAAK,CAAC,QAAQ,QAAQ,CAAC,MAAM,KAAK,QAAQ,CAAC,UAAU,EAAE,CAAC,CAAC;oBACrE,CAAC;oBAED,MAAM,WAAW,GAAG,QAAQ,CAAC,OAAO,CAAC,GAAG,CAAC,cAAc,CAAC,IAAI,EAAE,CAAC;oBAC/D,IAAI,CAAC,WAAW,CAAC,QAAQ,CAAC,WAAW,CAAC,EAAE,CAAC;wBACvC,MAAM,IAAI,KAAK,CAAC,qBAAqB,WAAW,EAAE,CAAC,CAAC;oBACtD,CAAC;oBAED,OAAO,MAAM,QAAQ,CAAC,IAAI,EAAE,CAAC;gBAE/B,CAAC;gBAAC,OAAO,KAAK,EAAE,CAAC;oBACf,YAAY,CAAC,SAAS,CAAC,CAAC;oBACxB,MAAM,KAAK,CAAC;gBACd,CAAC;YACH,CAAC,CAAC,CAAC;QACL,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,OAAO,CAAC,KAAK,CAAC,uCAAuC,GAAG,GAAG,EAAE,KAAK,CAAC,CAAC;YACpE,OAAO,IAAI,CAAC;QACd,CAAC;IACH,CAAC;IAEO,iBAAiB,CACvB,IAAY,EACZ,OAAe,EACf,MAAsB;QAEtB,MAAM,QAAQ,GAAuB,EAAE,CAAC;QAExC,IAAI,CAAC;YACH,MAAM,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;YAC7B,MAAM,QAAQ,GAAG,IAAI,GAAG,EAAU,CAAC;YAEnC,iCAAiC;YACjC,MAAM,CAAC,SAAS,EAAE,gBAAgB,EAAE,OAAO,CAAC,QAAQ,CAAC,EAAE;gBACrD,CAAC,CAAC,QAAQ,CAAC,CAAC,MAAM,EAAE,CAAC;YACvB,CAAC,CAAC,CAAC;YAEH,2CAA2C;YAC3C,MAAM,CAAC,SAAS,EAAE,YAAY,EAAE,OAAO,CAAC,QAAQ,CAAC,EAAE;gBACjD,CAAC,CAAC,QAAQ,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,OAAO,EAAE,EAAE;oBAC9B,MAAM,KAAK,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC;oBACzB,MAAM,IAAI,GAAG,KAAK,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;oBAEhC,IAAI,CAAC,IAAI;wBAAE,OAAO;oBAElB,MAAM,WAAW,GAAG,IAAI,CAAC,UAAU,CAAC,IAAI,EAAE,OAAO,CAAC,CAAC;oBACnD,IAAI,CAAC,WAAW,IAAI,QAAQ,CAAC,GAAG,CAAC,WAAW,CAAC,EAAE,CAAC;wBAC9C,OAAO;oBACT,CAAC;oBAED,gBAAgB;oBAChB,IAAI,CAAC,IAAI,CAAC,aAAa,CAAC,WAAW,EAAE,MAAM,CAAC,OAAO,CAAC,EAAE,CAAC;wBACrD,OAAO;oBACT,CAAC;oBAED,QAAQ,CAAC,GAAG,CAAC,WAAW,CAAC,CAAC;oBAE1B,8BAA8B;oBAC9B,MAAM,OAAO,GAAG,IAAI,CAAC,kBAAkB,CAAC,KAAK,EAAE,CAAC,EAAE,WAAW,CAAC,CAAC;oBAC/D,IAAI,OAAO,IAAI,QAAQ,CAAC,MAAM,GAAG,CAAC,MAAM,CAAC,MAAM,EAAE,eAAe,IAAI,GAAG,CAAC,EAAE,CAAC;wBACzE,QAAQ,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;oBACzB,CAAC;gBACH,CAAC,CAAC,CAAC;YACL,CAAC,CAAC,CAAC;YAEH,gDAAgD;YAChD,MAAM,kBAAkB,GAAG,IAAI,CAAC,qBAAqB,CAAC,CAAC,EAAE,OAAO,CAAC,CAAC;YAClE,kBAAkB,CAAC,OAAO,CAAC,OAAO,CAAC,EAAE;gBACnC,IAAI,CAAC,QAAQ,CAAC,GAAG,CAAC,OAAO,CAAC,GAAG,CAAC,EAAE,CAAC;oBAC/B,QAAQ,CAAC,GAAG,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC;oBAC1B,QAAQ,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;gBACzB,CAAC;YACH,CAAC,CAAC,CAAC;QAEL,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,OAAO,CAAC,KAAK,CAAC,qCAAqC,EAAE,KAAK,CAAC,CAAC;QAC9D,CAAC;QAED,OAAO,QAAQ,CAAC;IAClB,CAAC;IAEO,kBAAkB,CACxB,KAA2B,EAC3B,CAAqB,EACrB,GAAW;QAEX,IAAI,KAAK,GAAG,KAAK,CAAC,IAAI,EAAE,CAAC,IAAI,EAAE,CAAC;QAChC,IAAI,UAAU,GAAG,GAAG,CAAC;QACrB,IAAI,aAA+B,CAAC;QACpC,IAAI,WAA+B,CAAC;QAEpC,gDAAgD;QAChD,IAAI,CAAC,KAAK,IAAI,KAAK,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YAC/B,MAAM,OAAO,GAAG,KAAK,CAAC,OAAO,CAAC,8CAA8C,CAAC,CAAC;YAC9E,IAAI,OAAO,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;gBACvB,MAAM,WAAW,GAAG,OAAO,CAAC,IAAI,CAAC,+BAA+B,CAAC,CAAC,KAAK,EAAE,CAAC,IAAI,EAAE,CAAC,IAAI,EAAE,CAAC;gBACxF,IAAI,WAAW,IAAI,WAAW,CAAC,MAAM,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC;oBACrD,KAAK,GAAG,WAAW,CAAC;oBACpB,UAAU,IAAI,GAAG,CAAC;gBACpB,CAAC;YACH,CAAC;QACH,CAAC;QAED,2BAA2B;QAC3B,MAAM,YAAY,GAAG,KAAK,CAAC,OAAO,CAAC,0BAA0B,CAAC,CAAC,IAAI,CAAC,mCAAmC,CAAC,CAAC,KAAK,EAAE,CAAC;QACjH,IAAI,YAAY,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YAC5B,MAAM,QAAQ,GAAG,YAAY,CAAC,IAAI,CAAC,UAAU,CAAC,IAAI,YAAY,CAAC,IAAI,EAAE,CAAC,IAAI,EAAE,CAAC;YAC7E,IAAI,QAAQ,EAAE,CAAC;gBACb,MAAM,IAAI,GAAG,IAAI,CAAC,SAAS,CAAC,QAAQ,CAAC,CAAC;gBACtC,IAAI,IAAI,EAAE,CAAC;oBACT,aAAa,GAAG,IAAI,CAAC;oBACrB,UAAU,IAAI,GAAG,CAAC;gBACpB,CAAC;YACH,CAAC;QACH,CAAC;QAED,sBAAsB;QACtB,MAAM,OAAO,GAAG,KAAK,CAAC,OAAO,CAAC,kCAAkC,CAAC,CAAC;QAClE,IAAI,OAAO,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACvB,WAAW,GAAG,OAAO,CAAC,IAAI,CAAC,uBAAuB,CAAC,CAAC,KAAK,EAAE,CAAC,IAAI,EAAE,CAAC,IAAI,EAAE,CAAC;YAC1E,IAAI,WAAW,IAAI,WAAW,CAAC,MAAM,GAAG,EAAE,EAAE,CAAC;gBAC3C,WAAW,GAAG,WAAW,CAAC,SAAS,CAAC,CAAC,EAAE,GAAG,CAAC,GAAG,KAAK,CAAC;gBACpD,UAAU,IAAI,GAAG,CAAC;YACpB,CAAC;QACH,CAAC;QAED,yCAAyC;QACzC,IAAI,IAAI,CAAC,kBAAkB,CAAC,GAAG,CAAC,EAAE,CAAC;YACjC,UAAU,IAAI,GAAG,CAAC;QACpB,CAAC;QAED,0CAA0C;QAC1C,IAAI,KAAK,IAAI,KAAK,CAAC,MAAM,IAAI,EAAE,IAAI,KAAK,CAAC,MAAM,IAAI,GAAG,EAAE,CAAC;YACvD,UAAU,IAAI,GAAG,CAAC;QACpB,CAAC;QAED,IAAI,CAAC,KAAK,IAAI,KAAK,CAAC,MAAM,GAAG,EAAE,EAAE,CAAC;YAChC,OAAO,IAAI,CAAC;QACd,CAAC;QAED,OAAO;YACL,GAAG;YACH,KAAK;YACL,aAAa;YACb,WAAW;YACX,UAAU,EAAE,IAAI,CAAC,GAAG,CAAC,UAAU,EAAE,GAAG,CAAC;YACrC,MAAM,EAAE,WAAW;SACpB,CAAC;IACJ,CAAC;IAEO,qBAAqB,CAAC,CAAqB,EAAE,OAAe;QAClE,MAAM,QAAQ,GAAuB,EAAE,CAAC;QAExC,kCAAkC;QAClC,CAAC,CAAC,oCAAoC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,OAAO,EAAE,EAAE;YAC1D,IAAI,CAAC;gBACH,MAAM,QAAQ,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,IAAI,EAAE,CAAC;gBACnC,IAAI,CAAC,QAAQ;oBAAE,OAAO;gBAEtB,MAAM,IAAI,GAAG,IAAI,CAAC,KAAK,CAAC,QAAQ,CAAC,CAAC;gBAClC,MAAM,KAAK,GAAG,KAAK,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC;gBAElD,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;oBACzB,IAAI,IAAI,CAAC,OAAO,CAAC,KAAK,SAAS,IAAI,IAAI,CAAC,OAAO,CAAC,KAAK,aAAa,EAAE,CAAC;wBACnE,MAAM,GAAG,GAAG,IAAI,CAAC,GAAG,IAAI,IAAI,CAAC,gBAAgB,EAAE,CAAC,KAAK,CAAC,CAAC;wBACvD,IAAI,GAAG,EAAE,CAAC;4BACR,MAAM,WAAW,GAAG,IAAI,CAAC,UAAU,CAAC,GAAG,EAAE,OAAO,CAAC,CAAC;4BAClD,IAAI,WAAW,EAAE,CAAC;gCAChB,QAAQ,CAAC,IAAI,CAAC;oCACZ,GAAG,EAAE,WAAW;oCAChB,KAAK,EAAE,IAAI,CAAC,QAAQ,IAAI,IAAI,CAAC,IAAI;oCACjC,aAAa,EAAE,IAAI,CAAC,aAAa,CAAC,CAAC,CAAC,IAAI,IAAI,CAAC,IAAI,CAAC,aAAa,CAAC,CAAC,CAAC,CAAC,SAAS;oCAC5E,WAAW,EAAE,IAAI,CAAC,WAAW;oCAC7B,UAAU,EAAE,GAAG;oCACf,MAAM,EAAE,iBAAiB;iCAC1B,CAAC,CAAC;4BACL,CAAC;wBACH,CAAC;oBACH,CAAC;gBACH,CAAC;YACH,CAAC;YAAC,OAAO,KAAK,EAAE,CAAC;gBACf,yBAAyB;YAC3B,CAAC;QACH,CAAC,CAAC,CAAC;QAEH,OAAO,QAAQ,CAAC;IAClB,CAAC;IAEO,KAAK,CAAC,gBAAgB,CAC5B,UAAkB,EAClB,OAGC;QAED,IAAI,CAAC;YACH,MAAM,IAAI,GAAG,MAAM,IAAI,CAAC,SAAS,CAAC,UAAU,CAAC,CAAC;YAC9C,IAAI,CAAC,IAAI;gBAAE,OAAO,EAAE,CAAC;YAErB,MAAM,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;YAC7B,MAAM,QAAQ,GAAa,EAAE,CAAC;YAE9B,4BAA4B;YAC5B,MAAM,kBAAkB,GAAG,OAAO,CAAC,kBAAkB;gBACnD,oEAAoE,CAAC;YAEvE,CAAC,CAAC,kBAAkB,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,OAAO,EAAE,EAAE;gBACxC,MAAM,KAAK,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC;gBACzB,MAAM,IAAI,GAAG,KAAK,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;gBAChC,MAAM,IAAI,GAAG,KAAK,CAAC,IAAI,EAAE,CAAC,WAAW,EAAE,CAAC,IAAI,EAAE,CAAC;gBAE/C,IAAI,IAAI,IAAI,CAAC,IAAI,CAAC,QAAQ,CAAC,MAAM,CAAC,IAAI,IAAI,CAAC,QAAQ,CAAC,GAAG,CAAC,IAAI,IAAI,KAAK,GAAG,CAAC,EAAE,CAAC;oBAC1E,MAAM,WAAW,GAAG,IAAI,CAAC,UAAU,CAAC,IAAI,EAAE,UAAU,CAAC,CAAC;oBACtD,IAAI,WAAW,EAAE,CAAC;wBAChB,QAAQ,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC;oBAC7B,CAAC;gBACH,CAAC;YACH,CAAC,CAAC,CAAC;YAEH,OAAO,KAAK,CAAC,IAAI,CAAC,IAAI,GAAG,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,oBAAoB;QAE5D,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,OAAO,CAAC,IAAI,CAAC,gDAAgD,EAAE,KAAK,CAAC,CAAC;YACtE,OAAO,EAAE,CAAC;QACZ,CAAC;IACH,CAAC;IAEO,mBAAmB,CAAC,QAA4B;QACtD,MAAM,IAAI,GAAG,IAAI,GAAG,EAA4B,CAAC;QAEjD,KAAK,MAAM,OAAO,IAAI,QAAQ,EAAE,CAAC;YAC/B,MAAM,QAAQ,GAAG,IAAI,CAAC,GAAG,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC;YACvC,IAAI,CAAC,QAAQ,IAAI,OAAO,CAAC,UAAU,GAAG,QAAQ,CAAC,UAAU,EAAE,CAAC;gBAC1D,IAAI,CAAC,GAAG,CAAC,OAAO,CAAC,GAAG,EAAE,OAAO,CAAC,CAAC;YACjC,CAAC;QACH,CAAC;QAED,OAAO,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,MAAM,EAAE,CAAC,CAAC;IACnC,CAAC;IAEO,aAAa,CAAC,GAAW,EAAE,OAAmC;QACpE,IAAI,CAAC,OAAO;YAAE,OAAO,IAAI,CAAC;QAE1B,MAAM,QAAQ,GAAG,GAAG,CAAC,WAAW,EAAE,CAAC;QAEnC,+BAA+B;QAC/B,IAAI,OAAO,CAAC,eAAe,EAAE,IAAI,CAAC,OAAO,CAAC,EAAE,CAAC,OAAO,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,CAAC;YAChE,OAAO,KAAK,CAAC;QACf,CAAC;QAED,yBAAyB;QACzB,IAAI,OAAO,CAAC,eAAe,EAAE,MAAM;YAC/B,CAAC,OAAO,CAAC,eAAe,CAAC,IAAI,CAAC,OAAO,CAAC,EAAE,CAAC,OAAO,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,CAAC;YAChE,OAAO,KAAK,CAAC;QACf,CAAC;QAED,wBAAwB;QACxB,IAAI,OAAO,CAAC,cAAc,EAAE,MAAM,EAAE,CAAC;YACnC,IAAI,CAAC;gBACH,MAAM,MAAM,GAAG,IAAI,GAAG,CAAC,GAAG,CAAC,CAAC;gBAC5B,MAAM,MAAM,GAAG,MAAM,CAAC,QAAQ,CAAC,WAAW,EAAE,CAAC;gBAC7C,IAAI,CAAC,OAAO,CAAC,cAAc,CAAC,IAAI,CAAC,OAAO,CAAC,EAAE,CACzC,MAAM,KAAK,OAAO,CAAC,WAAW,EAAE,IAAI,MAAM,CAAC,QAAQ,CAAC,GAAG,GAAG,OAAO,CAAC,WAAW,EAAE,CAAC,CACjF,EAAE,CAAC;oBACF,OAAO,KAAK,CAAC;gBACf,CAAC;YACH,CAAC;YAAC,MAAM,CAAC;gBACP,OAAO,KAAK,CAAC;YACf,CAAC;QACH,CAAC;QAED,OAAO,IAAI,CAAC;IACd,CAAC;IAEO,kBAAkB,CAAC,GAAW;QACpC,MAAM,QAAQ,GAAG,GAAG,CAAC,WAAW,EAAE,CAAC;QAEnC,MAAM,eAAe,GAAG;YACtB,iBAAiB;YACjB,cAAc;YACd,WAAW;YACX,aAAa;YACb,UAAU;YACV,UAAU;YACV,yBAAyB,EAAG,kBAAkB;YAC9C,kBAAkB;SACnB,CAAC;QAEF,OAAO,eAAe,CAAC,IAAI,CAAC,OAAO,CAAC,EAAE,CAAC,OAAO,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC,CAAC;IACjE,CAAC;IAEO,SAAS,CAAC,UAAkB;QAClC,IAAI,CAAC;YACH,MAAM,IAAI,GAAG,IAAI,IAAI,CAAC,UAAU,CAAC,CAAC;YAClC,IAAI,KAAK,CAAC,IAAI,CAAC,OAAO,EAAE,CAAC,EAAE,CAAC;gBAC1B,0BAA0B;gBAC1B,MAAM,OAAO,GAAG;oBACd,yBAAyB,EAAG,aAAa;oBACzC,2BAA2B,EAAG,aAAa;oBAC3C,2BAA2B,CAAG,aAAa;iBAC5C,CAAC;gBAEF,KAAK,MAAM,MAAM,IAAI,OAAO,EAAE,CAAC;oBAC7B,MAAM,KAAK,GAAG,UAAU,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC;oBACvC,IAAI,KAAK,EAAE,CAAC;wBACV,MAAM,CAAC,EAAE,EAAE,EAAE,EAAE,EAAE,EAAE,CAAC,GAAG,KAAK,CAAC;wBAC7B,oCAAoC;wBACpC,MAAM,QAAQ,GAAG,IAAI,IAAI,CAAC,GAAG,EAAE,IAAI,EAAE,IAAI,EAAE,EAAE,CAAC,CAAC;wBAC/C,IAAI,CAAC,KAAK,CAAC,QAAQ,CAAC,OAAO,EAAE,CAAC,EAAE,CAAC;4BAC/B,OAAO,QAAQ,CAAC;wBAClB,CAAC;oBACH,CAAC;gBACH,CAAC;gBACD,OAAO,IAAI,CAAC;YACd,CAAC;YACD,OAAO,IAAI,CAAC;QACd,CAAC;QAAC,MAAM,CAAC;YACP,OAAO,IAAI,CAAC;QACd,CAAC;IACH,CAAC;IAEO,UAAU,CAAC,GAAW,EAAE,OAAe;QAC7C,IAAI,CAAC;YACH,OAAO,IAAI,GAAG,CAAC,GAAG,EAAE,OAAO,CAAC,CAAC,QAAQ,EAAE,CAAC;QAC1C,CAAC;QAAC,MAAM,CAAC;YACP,OAAO,IAAI,CAAC;QACd,CAAC;IACH,CAAC;IAEO,WAAW,CAAC,aAA6B,EAAE,UAA0B;QAC3E,OAAO;YACL,SAAS,EAAE;gBACT,GAAG,aAAa,CAAC,SAAS;gBAC1B,GAAG,UAAU,CAAC,SAAS;gBACvB,YAAY,EAAE;oBACZ,GAAG,CAAC,aAAa,CAAC,SAAS,EAAE,YAAY,IAAI,EAAE,CAAC;oBAChD,GAAG,CAAC,UAAU,CAAC,SAAS,EAAE,YAAY,IAAI,EAAE,CAAC;iBAC9C;aACF;YACD,OAAO,EAAE;gBACP,GAAG,aAAa,CAAC,OAAO;gBACxB,GAAG,UAAU,CAAC,OAAO;gBACrB,eAAe,EAAE;oBACf,GAAG,CAAC,aAAa,CAAC,OAAO,EAAE,eAAe,IAAI,EAAE,CAAC;oBACjD,GAAG,CAAC,UAAU,CAAC,OAAO,EAAE,eAAe,IAAI,EAAE,CAAC;iBAC/C;gBACD,eAAe,EAAE;oBACf,GAAG,CAAC,aAAa,CAAC,OAAO,EAAE,eAAe,IAAI,EAAE,CAAC;oBACjD,GAAG,CAAC,UAAU,CAAC,OAAO,EAAE,eAAe,IAAI,EAAE,CAAC;iBAC/C;aACF;YACD,MAAM,EAAE;gBACN,GAAG,aAAa,CAAC,MAAM;gBACvB,GAAG,UAAU,CAAC,MAAM;aACrB;YACD,kBAAkB,EAAE;gBAClB,GAAG,aAAa,CAAC,kBAAkB;gBACnC,GAAG,UAAU,CAAC,kBAAkB;aACjC;SACF,CAAC;IACJ,CAAC;IAED;;;OAGG;IACK,KAAK,CAAC,qBAAqB,CACjC,GAAW,EACX,MAAsB;QAEtB,IAAI,CAAC;YACH,2CAA2C;YAC3C,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,kBAAkB,EAAE,CAAC;gBACpC,OAAO,CAAC,IAAI,CAAC,0FAA0F,CAAC,CAAC;gBACzG,OAAO,EAAE,CAAC;YACZ,CAAC;YAED,MAAM,MAAM,GAAG,IAAI,GAAG,CAAC,GAAG,CAAC,CAAC,QAAQ,CAAC;YACrC,MAAM,KAAK,GAAG,8CAA8C,MAAM,iCAAiC,CAAC;YAEpG,OAAO,CAAC,GAAG,CAAC,2DAA2D,MAAM,EAAE,CAAC,CAAC;YAEjF,6BAA6B;YAC7B,MAAM,QAAQ,GAAG,MAAM,KAAK,CAAC,4CAA4C,EAAE;gBACzE,MAAM,EAAE,MAAM;gBACd,OAAO,EAAE;oBACP,cAAc,EAAE,kBAAkB;oBAClC,eAAe,EAAE,UAAU,OAAO,CAAC,GAAG,CAAC,kBAAkB,EAAE;iBAC5D;gBACD,IAAI,EAAE,IAAI,CAAC,SAAS,CAAC;oBACnB,KAAK,EAAE,MAAM,CAAC,kBAAkB,EAAE,KAAK,IAAI,iBAAiB,CAAC,KAAK;oBAClE,QAAQ,EAAE,CAAC,EAAE,IAAI,EAAE,MAAM,EAAE,OAAO,EAAE,KAAK,EAAE,CAAC;oBAC5C,UAAU,EAAE,IAAI;oBAChB,gBAAgB,EAAE,IAAI;oBACtB,qBAAqB,EAAE,MAAM,CAAC,kBAAkB,EAAE,aAAa,IAAI,KAAK;iBACzE,CAAC;aACH,CAAC,CAAC;YAEH,IAAI,CAAC,QAAQ,CAAC,EAAE,EAAE,CAAC;gBACjB,MAAM,IAAI,KAAK,CAAC,yBAAyB,QAAQ,CAAC,MAAM,IAAI,QAAQ,CAAC,UAAU,EAAE,CAAC,CAAC;YACrF,CAAC;YAED,MAAM,IAAI,GAAG,MAAM,QAAQ,CAAC,IAAI,EAAE,CAAC;YACnC,MAAM,QAAQ,GAAuB,EAAE,CAAC;YAExC,sCAAsC;YACtC,IAAI,IAAI,CAAC,SAAS,IAAI,KAAK,CAAC,OAAO,CAAC,IAAI,CAAC,SAAS,CAAC,EAAE,CAAC;gBACpD,KAAK,MAAM,QAAQ,IAAI,IAAI,CAAC,SAAS,EAAE,CAAC;oBACtC,IAAI,CAAC;wBACH,MAAM,WAAW,GAAG,QAAkB,CAAC;wBACvC,MAAM,cAAc,GAAG,IAAI,GAAG,CAAC,WAAW,CAAC,CAAC,QAAQ,CAAC;wBAErD,IAAI,cAAc,KAAK,MAAM,IAAI,cAAc,CAAC,QAAQ,CAAC,MAAM,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;4BAC/E,QAAQ,CAAC,IAAI,CAAC;gCACZ,GAAG,EAAE,WAAW;gCAChB,KAAK,EAAE,WAAW,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,GAAG,EAAE,IAAI,MAAM;gCAC7C,UAAU,EAAE,GAAG;gCACf,MAAM,EAAE,WAAoB;6BAC7B,CAAC,CAAC;wBACL,CAAC;oBACH,CAAC;oBAAC,MAAM,CAAC;wBACP,SAAS;oBACX,CAAC;gBACH,CAAC;YACH,CAAC;YAED,eAAe;YACf,MAAM,QAAQ,GAAG,MAAM,CAAC,MAAM,EAAE,eAAe,IAAI,GAAG,CAAC;YACvD,MAAM,eAAe,GAAG,QAAQ,CAAC,KAAK,CAAC,CAAC,EAAE,QAAQ,CAAC,CAAC;YAEpD,OAAO,CAAC,GAAG,CAAC,oCAAoC,eAAe,CAAC,MAAM,WAAW,CAAC,CAAC;YAEnF,OAAO,eAAe,CAAC;QAEzB,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,OAAO,CAAC,KAAK,CAAC,6CAA6C,EAAE,KAAK,CAAC,CAAC;YACpE,OAAO,EAAE,CAAC;QACZ,CAAC;IACH,CAAC;CACF;AAzoBD,kCAyoBC;AAED,0BAA0B;AACb,QAAA,iBAAiB,GAAG,IAAI,WAAW,EAAE,CAAC"}