@tyroneross/blog-scraper 0.1.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (71) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +254 -279
  3. package/dist/lib/circuit-breaker.d.ts +29 -0
  4. package/dist/lib/circuit-breaker.d.ts.map +1 -0
  5. package/dist/lib/circuit-breaker.js +89 -0
  6. package/dist/lib/circuit-breaker.js.map +1 -0
  7. package/dist/lib/content-extractor.d.ts +13 -0
  8. package/dist/lib/content-extractor.d.ts.map +1 -0
  9. package/dist/lib/content-extractor.js +75 -0
  10. package/dist/lib/content-extractor.js.map +1 -0
  11. package/dist/lib/formatters/html-to-markdown.d.ts +21 -0
  12. package/dist/lib/formatters/html-to-markdown.d.ts.map +1 -0
  13. package/dist/lib/formatters/html-to-markdown.js +146 -0
  14. package/dist/lib/formatters/html-to-markdown.js.map +1 -0
  15. package/dist/lib/formatters/text-cleaner.d.ts +44 -0
  16. package/dist/lib/formatters/text-cleaner.d.ts.map +1 -0
  17. package/dist/lib/formatters/text-cleaner.js +143 -0
  18. package/dist/lib/formatters/text-cleaner.js.map +1 -0
  19. package/dist/lib/index.d.ts +96 -0
  20. package/dist/lib/index.d.ts.map +1 -0
  21. package/dist/lib/index.js +184 -0
  22. package/dist/lib/index.js.map +1 -0
  23. package/dist/lib/quality-scorer.d.ts +83 -0
  24. package/dist/lib/quality-scorer.d.ts.map +1 -0
  25. package/dist/lib/quality-scorer.js +376 -0
  26. package/dist/lib/quality-scorer.js.map +1 -0
  27. package/dist/lib/rss-utils.d.ts +31 -0
  28. package/dist/lib/rss-utils.d.ts.map +1 -0
  29. package/dist/lib/rss-utils.js +175 -0
  30. package/dist/lib/rss-utils.js.map +1 -0
  31. package/dist/lib/scraping-rate-limiter.d.ts +52 -0
  32. package/dist/lib/scraping-rate-limiter.d.ts.map +1 -0
  33. package/dist/lib/scraping-rate-limiter.js +238 -0
  34. package/dist/lib/scraping-rate-limiter.js.map +1 -0
  35. package/dist/lib/source-orchestrator.d.ts +306 -0
  36. package/dist/lib/source-orchestrator.d.ts.map +1 -0
  37. package/dist/lib/source-orchestrator.js +840 -0
  38. package/dist/lib/source-orchestrator.js.map +1 -0
  39. package/dist/lib/types.d.ts +143 -0
  40. package/dist/lib/types.d.ts.map +1 -0
  41. package/dist/lib/types.js +7 -0
  42. package/dist/lib/types.js.map +1 -0
  43. package/dist/lib/web-scrapers/content-extractor.d.ts +62 -0
  44. package/dist/lib/web-scrapers/content-extractor.d.ts.map +1 -0
  45. package/dist/lib/web-scrapers/content-extractor.js +531 -0
  46. package/dist/lib/web-scrapers/content-extractor.js.map +1 -0
  47. package/dist/lib/web-scrapers/html-scraper.d.ts +74 -0
  48. package/dist/lib/web-scrapers/html-scraper.d.ts.map +1 -0
  49. package/dist/lib/web-scrapers/html-scraper.js +598 -0
  50. package/dist/lib/web-scrapers/html-scraper.js.map +1 -0
  51. package/dist/lib/web-scrapers/playwright-scraper.d.ts +57 -0
  52. package/dist/lib/web-scrapers/playwright-scraper.d.ts.map +1 -0
  53. package/dist/lib/web-scrapers/playwright-scraper.js +355 -0
  54. package/dist/lib/web-scrapers/playwright-scraper.js.map +1 -0
  55. package/dist/lib/web-scrapers/robots-checker.d.ts +42 -0
  56. package/dist/lib/web-scrapers/robots-checker.d.ts.map +1 -0
  57. package/dist/lib/web-scrapers/robots-checker.js +285 -0
  58. package/dist/lib/web-scrapers/robots-checker.js.map +1 -0
  59. package/dist/lib/web-scrapers/rss-discovery.d.ts +62 -0
  60. package/dist/lib/web-scrapers/rss-discovery.d.ts.map +1 -0
  61. package/dist/lib/web-scrapers/rss-discovery.js +384 -0
  62. package/dist/lib/web-scrapers/rss-discovery.js.map +1 -0
  63. package/dist/lib/web-scrapers/sitemap-parser.d.ts +65 -0
  64. package/dist/lib/web-scrapers/sitemap-parser.d.ts.map +1 -0
  65. package/dist/lib/web-scrapers/sitemap-parser.js +430 -0
  66. package/dist/lib/web-scrapers/sitemap-parser.js.map +1 -0
  67. package/package.json +54 -33
  68. package/dist/index.d.mts +0 -949
  69. package/dist/index.d.ts +0 -949
  70. package/dist/index.js +0 -3236
  71. package/dist/index.mjs +0 -3165
@@ -0,0 +1,89 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.circuitBreakers = exports.CircuitBreaker = void 0;
4
+ class CircuitBreaker {
5
+ constructor(options) {
6
+ this.failures = 0;
7
+ this.lastFailureTime = 0;
8
+ this.state = 'CLOSED';
9
+ this.options = options;
10
+ }
11
+ async execute(operation) {
12
+ if (this.state === 'OPEN') {
13
+ if (Date.now() - this.lastFailureTime < this.options.resetTimeout) {
14
+ throw new Error(`[CircuitBreaker:${this.options.name}] Circuit is OPEN - preventing request`);
15
+ }
16
+ else {
17
+ this.state = 'HALF_OPEN';
18
+ console.log(`🔄 [CircuitBreaker:${this.options.name}] Circuit moving to HALF_OPEN state`);
19
+ }
20
+ }
21
+ try {
22
+ const result = await this.executeWithTimeout(operation);
23
+ this.onSuccess();
24
+ return result;
25
+ }
26
+ catch (error) {
27
+ this.onFailure();
28
+ throw error;
29
+ }
30
+ }
31
+ async executeWithTimeout(operation) {
32
+ return new Promise((resolve, reject) => {
33
+ const timer = setTimeout(() => {
34
+ reject(new Error(`[CircuitBreaker:${this.options.name}] Operation timeout after ${this.options.timeout}ms`));
35
+ }, this.options.timeout);
36
+ operation()
37
+ .then(result => {
38
+ clearTimeout(timer);
39
+ resolve(result);
40
+ })
41
+ .catch(error => {
42
+ clearTimeout(timer);
43
+ reject(error);
44
+ });
45
+ });
46
+ }
47
+ onSuccess() {
48
+ this.failures = 0;
49
+ this.state = 'CLOSED';
50
+ }
51
+ onFailure() {
52
+ this.failures++;
53
+ this.lastFailureTime = Date.now();
54
+ if (this.failures >= this.options.failureThreshold) {
55
+ this.state = 'OPEN';
56
+ console.error(`❌ [CircuitBreaker:${this.options.name}] Circuit opened after ${this.failures} failures`);
57
+ }
58
+ }
59
+ getState() {
60
+ return {
61
+ state: this.state,
62
+ failures: this.failures,
63
+ lastFailureTime: this.lastFailureTime
64
+ };
65
+ }
66
+ }
67
+ exports.CircuitBreaker = CircuitBreaker;
68
+ // Pre-configured circuit breakers for common services
69
+ exports.circuitBreakers = {
70
+ rss: new CircuitBreaker({
71
+ name: 'RSS',
72
+ failureThreshold: 3,
73
+ timeout: 15000, // 15 seconds
74
+ resetTimeout: 30000 // 30 seconds
75
+ }),
76
+ scraping: new CircuitBreaker({
77
+ name: 'Scraping',
78
+ failureThreshold: 5,
79
+ timeout: 10000, // 10 seconds
80
+ resetTimeout: 30000 // 30 seconds
81
+ }),
82
+ scrapingTest: new CircuitBreaker({
83
+ name: 'ScrapingTest',
84
+ failureThreshold: 3,
85
+ timeout: 120000, // 120 seconds to accommodate large sites with many sitemaps
86
+ resetTimeout: 60000 // 1 minute
87
+ })
88
+ };
89
+ //# sourceMappingURL=circuit-breaker.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"circuit-breaker.js","sourceRoot":"","sources":["../../lib/circuit-breaker.ts"],"names":[],"mappings":";;;AAOA,MAAa,cAAc;IAMzB,YAAY,OAA8B;QALlC,aAAQ,GAAG,CAAC,CAAC;QACb,oBAAe,GAAG,CAAC,CAAC;QACpB,UAAK,GAAoC,QAAQ,CAAC;QAIxD,IAAI,CAAC,OAAO,GAAG,OAAO,CAAC;IACzB,CAAC;IAED,KAAK,CAAC,OAAO,CAAI,SAA2B;QAC1C,IAAI,IAAI,CAAC,KAAK,KAAK,MAAM,EAAE,CAAC;YAC1B,IAAI,IAAI,CAAC,GAAG,EAAE,GAAG,IAAI,CAAC,eAAe,GAAG,IAAI,CAAC,OAAO,CAAC,YAAY,EAAE,CAAC;gBAClE,MAAM,IAAI,KAAK,CAAC,mBAAmB,IAAI,CAAC,OAAO,CAAC,IAAI,wCAAwC,CAAC,CAAC;YAChG,CAAC;iBAAM,CAAC;gBACN,IAAI,CAAC,KAAK,GAAG,WAAW,CAAC;gBACzB,OAAO,CAAC,GAAG,CAAC,sBAAsB,IAAI,CAAC,OAAO,CAAC,IAAI,qCAAqC,CAAC,CAAC;YAC5F,CAAC;QACH,CAAC;QAED,IAAI,CAAC;YACH,MAAM,MAAM,GAAG,MAAM,IAAI,CAAC,kBAAkB,CAAC,SAAS,CAAC,CAAC;YACxD,IAAI,CAAC,SAAS,EAAE,CAAC;YACjB,OAAO,MAAM,CAAC;QAChB,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,IAAI,CAAC,SAAS,EAAE,CAAC;YACjB,MAAM,KAAK,CAAC;QACd,CAAC;IACH,CAAC;IAEO,KAAK,CAAC,kBAAkB,CAAI,SAA2B;QAC7D,OAAO,IAAI,OAAO,CAAC,CAAC,OAAO,EAAE,MAAM,EAAE,EAAE;YACrC,MAAM,KAAK,GAAG,UAAU,CAAC,GAAG,EAAE;gBAC5B,MAAM,CAAC,IAAI,KAAK,CAAC,mBAAmB,IAAI,CAAC,OAAO,CAAC,IAAI,6BAA6B,IAAI,CAAC,OAAO,CAAC,OAAO,IAAI,CAAC,CAAC,CAAC;YAC/G,CAAC,EAAE,IAAI,CAAC,OAAO,CAAC,OAAO,CAAC,CAAC;YAEzB,SAAS,EAAE;iBACR,IAAI,CAAC,MAAM,CAAC,EAAE;gBACb,YAAY,CAAC,KAAK,CAAC,CAAC;gBACpB,OAAO,CAAC,MAAM,CAAC,CAAC;YAClB,CAAC,CAAC;iBACD,KAAK,CAAC,KAAK,CAAC,EAAE;gBACb,YAAY,CAAC,KAAK,CAAC,CAAC;gBACpB,MAAM,CAAC,KAAK,CAAC,CAAC;YAChB,CAAC,CAAC,CAAC;QACP,CAAC,CAAC,CAAC;IACL,CAAC;IAEO,SAAS;QACf,IAAI,CAAC,QAAQ,GAAG,CAAC,CAAC;QAClB,IAAI,CAAC,KAAK,GAAG,QAAQ,CAAC;IACxB,CAAC;IAEO,SAAS;QACf,IAAI,CAAC,QAAQ,EAAE,CAAC;QAChB,IAAI,CAAC,eAAe,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;QAElC,IAAI,IAAI,CAAC,QAAQ,IAAI,IAAI,CAAC,OAAO,CAAC,gBAAgB,EAAE,CAAC;YACnD,IAAI,CAAC,KAAK,GAAG,MAAM,CAAC;YACpB,OAAO,CAAC,KAAK,CAAC,qBAAqB,IAAI,CAAC,OAAO,CAAC,IAAI,0BAA0B,IAAI,CAAC,QAAQ,WAAW,CAAC,CAAC;QAC1G,CAAC;IACH,CAAC;IAED,QAAQ;QACN,OAAO;YACL,KAAK,EAAE,IAAI,CAAC,KAAK;YACjB,QAAQ,EAAE,IAAI,CAAC,QAAQ;YACvB,eAAe,EAAE,IAAI,CAAC,eAAe;SACtC,CAAC;IACJ,CAAC;CACF;AAtED,wCAsEC;AAED,sDAAsD;AACzC,QAAA,eAAe,GAAG;IAC7B,GAAG,EAAE,IAAI,cAAc,CAAC;QACtB,IAAI,EAAE,KAAK;QACX,gBAAgB,EAAE,CAAC;QACnB,OAAO,EAAE,KAAK,EAAE,aAAa;QAC7B,YAAY,EAAE,KAAK,CAAC,aAAa;KAClC,CAAC;IAEF,QAAQ,EAAE,IAAI,cAAc,CAAC;QAC3B,IAAI,EAAE,UAAU;QAChB,gBAAgB,EAAE,CAAC;QACnB,OAAO,EAAE,KAAK,EAAE,aAAa;QAC7B,YAAY,EAAE,KAAK,CAAC,aAAa;KAClC,CAAC;IAEF,YAAY,EAAE,IAAI,cAAc,CAAC;QAC/B,IAAI,EAAE,cAAc;QACpB,gBAAgB,EAAE,CAAC;QACnB,OAAO,EAAE,MAAM,EAAE,4DAA4D;QAC7E,YAAY,EAAE,KAAK,CAAC,WAAW;KAChC,CAAC;CACH,CAAC"}
@@ -0,0 +1,13 @@
1
+ /**
2
+ * Simplified content extractor for standalone app
3
+ * Uses Mozilla Readability for article extraction
4
+ */
5
+ import { ExtractedContent } from './types';
6
+ export declare class ContentExtractor {
7
+ extractContent(url: string): Promise<ExtractedContent | null>;
8
+ /**
9
+ * Extract published time from document metadata
10
+ */
11
+ private extractPublishedTime;
12
+ }
13
+ //# sourceMappingURL=content-extractor.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"content-extractor.d.ts","sourceRoot":"","sources":["../../lib/content-extractor.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAIH,OAAO,EAAE,gBAAgB,EAAE,MAAM,SAAS,CAAC;AAE3C,qBAAa,gBAAgB;IACrB,cAAc,CAAC,GAAG,EAAE,MAAM,GAAG,OAAO,CAAC,gBAAgB,GAAG,IAAI,CAAC;IA6CnE;;OAEG;IACH,OAAO,CAAC,oBAAoB;CAqB7B"}
@@ -0,0 +1,75 @@
1
+ "use strict";
2
+ /**
3
+ * Simplified content extractor for standalone app
4
+ * Uses Mozilla Readability for article extraction
5
+ */
6
+ Object.defineProperty(exports, "__esModule", { value: true });
7
+ exports.ContentExtractor = void 0;
8
+ const readability_1 = require("@mozilla/readability");
9
+ const jsdom_1 = require("jsdom");
10
+ class ContentExtractor {
11
+ async extractContent(url) {
12
+ try {
13
+ const response = await fetch(url, {
14
+ headers: {
15
+ 'User-Agent': 'Mozilla/5.0 (compatible; ScraperApp/1.0)',
16
+ },
17
+ });
18
+ if (!response.ok) {
19
+ throw new Error(`HTTP ${response.status}: ${response.statusText}`);
20
+ }
21
+ const html = await response.text();
22
+ const dom = new jsdom_1.JSDOM(html, { url });
23
+ const reader = new readability_1.Readability(dom.window.document);
24
+ const article = reader.parse();
25
+ if (!article) {
26
+ return null;
27
+ }
28
+ // Calculate reading time (200 words per minute)
29
+ const wordCount = article.textContent?.split(/\s+/).length || 0;
30
+ const readingTime = Math.ceil(wordCount / 200);
31
+ // Extract publish time from metadata
32
+ const publishedTime = this.extractPublishedTime(dom.window.document);
33
+ return {
34
+ title: article.title ?? undefined,
35
+ byline: article.byline ?? undefined,
36
+ content: article.content ?? undefined,
37
+ textContent: article.textContent ?? undefined,
38
+ length: article.length ?? undefined,
39
+ excerpt: article.excerpt ?? undefined,
40
+ siteName: article.siteName ?? undefined,
41
+ publishedTime,
42
+ readingTime,
43
+ };
44
+ }
45
+ catch (error) {
46
+ console.error(`[ContentExtractor] Failed to extract from ${url}:`, error);
47
+ return null;
48
+ }
49
+ }
50
+ /**
51
+ * Extract published time from document metadata
52
+ */
53
+ extractPublishedTime(doc) {
54
+ // Try various metadata sources
55
+ const selectors = [
56
+ 'meta[property="article:published_time"]',
57
+ 'meta[name="article:published_time"]',
58
+ 'meta[property="og:published_time"]',
59
+ 'meta[name="published_time"]',
60
+ 'meta[name="date"]',
61
+ 'time[datetime]',
62
+ ];
63
+ for (const selector of selectors) {
64
+ const element = doc.querySelector(selector);
65
+ if (element) {
66
+ const content = element.getAttribute('content') || element.getAttribute('datetime');
67
+ if (content)
68
+ return content;
69
+ }
70
+ }
71
+ return undefined;
72
+ }
73
+ }
74
+ exports.ContentExtractor = ContentExtractor;
75
+ //# sourceMappingURL=content-extractor.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"content-extractor.js","sourceRoot":"","sources":["../../lib/content-extractor.ts"],"names":[],"mappings":";AAAA;;;GAGG;;;AAEH,sDAAmD;AACnD,iCAA8B;AAG9B,MAAa,gBAAgB;IAC3B,KAAK,CAAC,cAAc,CAAC,GAAW;QAC9B,IAAI,CAAC;YACH,MAAM,QAAQ,GAAG,MAAM,KAAK,CAAC,GAAG,EAAE;gBAChC,OAAO,EAAE;oBACP,YAAY,EAAE,0CAA0C;iBACzD;aACF,CAAC,CAAC;YAEH,IAAI,CAAC,QAAQ,CAAC,EAAE,EAAE,CAAC;gBACjB,MAAM,IAAI,KAAK,CAAC,QAAQ,QAAQ,CAAC,MAAM,KAAK,QAAQ,CAAC,UAAU,EAAE,CAAC,CAAC;YACrE,CAAC;YAED,MAAM,IAAI,GAAG,MAAM,QAAQ,CAAC,IAAI,EAAE,CAAC;YACnC,MAAM,GAAG,GAAG,IAAI,aAAK,CAAC,IAAI,EAAE,EAAE,GAAG,EAAE,CAAC,CAAC;YACrC,MAAM,MAAM,GAAG,IAAI,yBAAW,CAAC,GAAG,CAAC,MAAM,CAAC,QAAQ,CAAC,CAAC;YACpD,MAAM,OAAO,GAAG,MAAM,CAAC,KAAK,EAAE,CAAC;YAE/B,IAAI,CAAC,OAAO,EAAE,CAAC;gBACb,OAAO,IAAI,CAAC;YACd,CAAC;YAED,gDAAgD;YAChD,MAAM,SAAS,GAAG,OAAO,CAAC,WAAW,EAAE,KAAK,CAAC,KAAK,CAAC,CAAC,MAAM,IAAI,CAAC,CAAC;YAChE,MAAM,WAAW,GAAG,IAAI,CAAC,IAAI,CAAC,SAAS,GAAG,GAAG,CAAC,CAAC;YAE/C,qCAAqC;YACrC,MAAM,aAAa,GAAG,IAAI,CAAC,oBAAoB,CAAC,GAAG,CAAC,MAAM,CAAC,QAAQ,CAAC,CAAC;YAErE,OAAO;gBACL,KAAK,EAAE,OAAO,CAAC,KAAK,IAAI,SAAS;gBACjC,MAAM,EAAE,OAAO,CAAC,MAAM,IAAI,SAAS;gBACnC,OAAO,EAAE,OAAO,CAAC,OAAO,IAAI,SAAS;gBACrC,WAAW,EAAE,OAAO,CAAC,WAAW,IAAI,SAAS;gBAC7C,MAAM,EAAE,OAAO,CAAC,MAAM,IAAI,SAAS;gBACnC,OAAO,EAAE,OAAO,CAAC,OAAO,IAAI,SAAS;gBACrC,QAAQ,EAAE,OAAO,CAAC,QAAQ,IAAI,SAAS;gBACvC,aAAa;gBACb,WAAW;aACZ,CAAC;QACJ,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,OAAO,CAAC,KAAK,CAAC,6CAA6C,GAAG,GAAG,EAAE,KAAK,CAAC,CAAC;YAC1E,OAAO,IAAI,CAAC;QACd,CAAC;IACH,CAAC;IAED;;OAEG;IACK,oBAAoB,CAAC,GAAa;QACxC,+BAA+B;QAC/B,MAAM,SAAS,GAAG;YAChB,yCAAyC;YACzC,qCAAqC;YACrC,oCAAoC;YACpC,6BAA6B;YAC7B,mBAAmB;YACnB,gBAAgB;SACjB,CAAC;QAEF,KAAK,MAAM,QAAQ,IAAI,SAAS,EAAE,CAAC;YACjC,MAAM,OAAO,GAAG,GAAG,CAAC,aAAa,CAAC,QAAQ,CAAC,CAAC;YAC5C,IAAI,OAAO,EAAE,CAAC;gBACZ,MAAM,OAAO,GAAG,OAAO,CAAC,YAAY,CAAC,SAAS,CAAC,IAAI,OAAO,CAAC,YAAY,CAAC,UAAU,CAAC,CAAC;gBACpF,IAAI,OAAO;oBAAE,OAAO,OAAO,CAAC;YAC9B,CAAC;QACH,CAAC;QAED,OAAO,SAAS,CAAC;IACnB,CAAC;CACF;AAtED,4CAsEC"}
@@ -0,0 +1,21 @@
1
+ /**
2
+ * Convert HTML to clean Markdown
3
+ * - Preserves headings, bold, lists, links, code blocks
4
+ * - Strips navigation, forms, UI elements
5
+ * - Smart paragraph detection
6
+ */
7
+ export declare function htmlToMarkdown(html: string): string;
8
+ /**
9
+ * Strip non-article content from HTML before conversion
10
+ * Removes navigation, forms, UI elements
11
+ */
12
+ export declare function stripNonArticleContent(html: string): string;
13
+ /**
14
+ * Convert HTML to Markdown with full cleaning
15
+ * This is the main function developers should use
16
+ */
17
+ export declare function convertToMarkdown(html: string, options?: {
18
+ cleanNonArticle?: boolean;
19
+ smartParagraphs?: boolean;
20
+ }): string;
21
+ //# sourceMappingURL=html-to-markdown.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"html-to-markdown.d.ts","sourceRoot":"","sources":["../../../lib/formatters/html-to-markdown.ts"],"names":[],"mappings":"AAEA;;;;;GAKG;AACH,wBAAgB,cAAc,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,CAgDnD;AA0DD;;;GAGG;AACH,wBAAgB,sBAAsB,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,CAyB3D;AAED;;;GAGG;AACH,wBAAgB,iBAAiB,CAAC,IAAI,EAAE,MAAM,EAAE,OAAO,GAAE;IACvD,eAAe,CAAC,EAAE,OAAO,CAAC;IAC1B,eAAe,CAAC,EAAE,OAAO,CAAC;CACtB,GAAG,MAAM,CAiBd"}
@@ -0,0 +1,146 @@
1
+ "use strict";
2
+ var __importDefault = (this && this.__importDefault) || function (mod) {
3
+ return (mod && mod.__esModule) ? mod : { "default": mod };
4
+ };
5
+ Object.defineProperty(exports, "__esModule", { value: true });
6
+ exports.htmlToMarkdown = htmlToMarkdown;
7
+ exports.stripNonArticleContent = stripNonArticleContent;
8
+ exports.convertToMarkdown = convertToMarkdown;
9
+ const turndown_1 = __importDefault(require("turndown"));
10
+ /**
11
+ * Convert HTML to clean Markdown
12
+ * - Preserves headings, bold, lists, links, code blocks
13
+ * - Strips navigation, forms, UI elements
14
+ * - Smart paragraph detection
15
+ */
16
+ function htmlToMarkdown(html) {
17
+ if (!html)
18
+ return '';
19
+ // Create Turndown service with custom rules
20
+ const turndownService = new turndown_1.default({
21
+ headingStyle: 'atx', // Use # for headings
22
+ codeBlockStyle: 'fenced', // Use ``` for code blocks
23
+ bulletListMarker: '-', // Use - for lists
24
+ emDelimiter: '*', // Use * for emphasis
25
+ strongDelimiter: '**', // Use ** for strong
26
+ });
27
+ // Remove unwanted elements before conversion
28
+ turndownService.remove([
29
+ 'script',
30
+ 'style',
31
+ 'nav',
32
+ 'header',
33
+ 'footer',
34
+ 'aside',
35
+ 'form',
36
+ 'button',
37
+ 'input',
38
+ 'select',
39
+ 'textarea',
40
+ 'iframe',
41
+ 'noscript',
42
+ ]);
43
+ // Custom rule: Clean up attributes from elements
44
+ turndownService.addRule('cleanAttributes', {
45
+ filter: ['div', 'span', 'p', 'section', 'article'],
46
+ replacement: (content) => {
47
+ // Just return content, stripping the wrapper
48
+ return content;
49
+ },
50
+ });
51
+ // Convert HTML to Markdown
52
+ let markdown = turndownService.turndown(html);
53
+ // Post-processing: Smart paragraph detection
54
+ markdown = smartParagraphDetection(markdown);
55
+ // Clean up excessive whitespace
56
+ markdown = normalizeWhitespace(markdown);
57
+ return markdown;
58
+ }
59
+ /**
60
+ * Smart paragraph detection
61
+ * Adds proper spacing between sections
62
+ */
63
+ function smartParagraphDetection(markdown) {
64
+ // Split into lines
65
+ const lines = markdown.split('\n');
66
+ const result = [];
67
+ for (let i = 0; i < lines.length; i++) {
68
+ const line = lines[i];
69
+ const prevLine = i > 0 ? lines[i - 1] : '';
70
+ const nextLine = i < lines.length - 1 ? lines[i + 1] : '';
71
+ result.push(line);
72
+ // Add extra line break after headings
73
+ if (line.match(/^#{1,6}\s/) && nextLine && !nextLine.match(/^#{1,6}\s/)) {
74
+ result.push('');
75
+ }
76
+ // Add extra line break before headings
77
+ if (nextLine.match(/^#{1,6}\s/) && line && !line.match(/^#{1,6}\s/) && !prevLine.match(/^$/)) {
78
+ result.push('');
79
+ }
80
+ // Add line break after lists
81
+ if (line.match(/^[-*+]\s/) && nextLine && !nextLine.match(/^[-*+]\s/) && !nextLine.match(/^$/)) {
82
+ result.push('');
83
+ }
84
+ }
85
+ return result.join('\n');
86
+ }
87
+ /**
88
+ * Normalize whitespace
89
+ * - Remove excessive line breaks (more than 2)
90
+ * - Trim lines
91
+ */
92
+ function normalizeWhitespace(markdown) {
93
+ // Replace 3+ consecutive line breaks with just 2
94
+ markdown = markdown.replace(/\n{3,}/g, '\n\n');
95
+ // Trim each line
96
+ markdown = markdown
97
+ .split('\n')
98
+ .map(line => line.trim())
99
+ .join('\n');
100
+ // Remove leading/trailing whitespace
101
+ markdown = markdown.trim();
102
+ return markdown;
103
+ }
104
+ /**
105
+ * Strip non-article content from HTML before conversion
106
+ * Removes navigation, forms, UI elements
107
+ */
108
+ function stripNonArticleContent(html) {
109
+ if (!html)
110
+ return '';
111
+ // Remove elements with specific classes/IDs that indicate non-article content
112
+ const nonArticlePatterns = [
113
+ /<nav\b[^>]*>.*?<\/nav>/gi,
114
+ /<header\b[^>]*>.*?<\/header>/gi,
115
+ /<footer\b[^>]*>.*?<\/footer>/gi,
116
+ /<aside\b[^>]*>.*?<\/aside>/gi,
117
+ /<form\b[^>]*>.*?<\/form>/gi,
118
+ /<div[^>]*class="[^"]*(?:nav|menu|sidebar|advertisement|ads|social|share|comment|popup|modal)[^"]*"[^>]*>.*?<\/div>/gi,
119
+ /<div[^>]*id="[^"]*(?:nav|menu|sidebar|advertisement|ads|social|share|comment|popup|modal)[^"]*"[^>]*>.*?<\/div>/gi,
120
+ ];
121
+ let cleaned = html;
122
+ for (const pattern of nonArticlePatterns) {
123
+ cleaned = cleaned.replace(pattern, '');
124
+ }
125
+ // Remove all class, id, and data attributes
126
+ cleaned = cleaned.replace(/\s*class="[^"]*"/gi, '');
127
+ cleaned = cleaned.replace(/\s*id="[^"]*"/gi, '');
128
+ cleaned = cleaned.replace(/\s*data-[^=]*="[^"]*"/gi, '');
129
+ return cleaned;
130
+ }
131
+ /**
132
+ * Convert HTML to Markdown with full cleaning
133
+ * This is the main function developers should use
134
+ */
135
+ function convertToMarkdown(html, options = {}) {
136
+ const { cleanNonArticle = true, smartParagraphs = true, } = options;
137
+ let processedHtml = html;
138
+ // Step 1: Strip non-article content if requested
139
+ if (cleanNonArticle) {
140
+ processedHtml = stripNonArticleContent(processedHtml);
141
+ }
142
+ // Step 2: Convert to Markdown
143
+ const markdown = htmlToMarkdown(processedHtml);
144
+ return markdown;
145
+ }
146
+ //# sourceMappingURL=html-to-markdown.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"html-to-markdown.js","sourceRoot":"","sources":["../../../lib/formatters/html-to-markdown.ts"],"names":[],"mappings":";;;;;AAQA,wCAgDC;AA8DD,wDAyBC;AAMD,8CAoBC;AAzKD,wDAAuC;AAEvC;;;;;GAKG;AACH,SAAgB,cAAc,CAAC,IAAY;IACzC,IAAI,CAAC,IAAI;QAAE,OAAO,EAAE,CAAC;IAErB,4CAA4C;IAC5C,MAAM,eAAe,GAAG,IAAI,kBAAe,CAAC;QAC1C,YAAY,EAAE,KAAK,EAAE,qBAAqB;QAC1C,cAAc,EAAE,QAAQ,EAAE,0BAA0B;QACpD,gBAAgB,EAAE,GAAG,EAAE,kBAAkB;QACzC,WAAW,EAAE,GAAG,EAAE,qBAAqB;QACvC,eAAe,EAAE,IAAI,EAAE,oBAAoB;KAC5C,CAAC,CAAC;IAEH,6CAA6C;IAC7C,eAAe,CAAC,MAAM,CAAC;QACrB,QAAQ;QACR,OAAO;QACP,KAAK;QACL,QAAQ;QACR,QAAQ;QACR,OAAO;QACP,MAAM;QACN,QAAQ;QACR,OAAO;QACP,QAAQ;QACR,UAAU;QACV,QAAQ;QACR,UAAU;KACX,CAAC,CAAC;IAEH,iDAAiD;IACjD,eAAe,CAAC,OAAO,CAAC,iBAAiB,EAAE;QACzC,MAAM,EAAE,CAAC,KAAK,EAAE,MAAM,EAAE,GAAG,EAAE,SAAS,EAAE,SAAS,CAAC;QAClD,WAAW,EAAE,CAAC,OAAO,EAAE,EAAE;YACvB,6CAA6C;YAC7C,OAAO,OAAO,CAAC;QACjB,CAAC;KACF,CAAC,CAAC;IAEH,2BAA2B;IAC3B,IAAI,QAAQ,GAAG,eAAe,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC;IAE9C,6CAA6C;IAC7C,QAAQ,GAAG,uBAAuB,CAAC,QAAQ,CAAC,CAAC;IAE7C,gCAAgC;IAChC,QAAQ,GAAG,mBAAmB,CAAC,QAAQ,CAAC,CAAC;IAEzC,OAAO,QAAQ,CAAC;AAClB,CAAC;AAED;;;GAGG;AACH,SAAS,uBAAuB,CAAC,QAAgB;IAC/C,mBAAmB;IACnB,MAAM,KAAK,GAAG,QAAQ,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;IACnC,MAAM,MAAM,GAAa,EAAE,CAAC;IAE5B,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QACtC,MAAM,IAAI,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC;QACtB,MAAM,QAAQ,GAAG,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;QAC3C,MAAM,QAAQ,GAAG,CAAC,GAAG,KAAK,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;QAE1D,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QAElB,sCAAsC;QACtC,IAAI,IAAI,CAAC,KAAK,CAAC,WAAW,CAAC,IAAI,QAAQ,IAAI,CAAC,QAAQ,CAAC,KAAK,CAAC,WAAW,CAAC,EAAE,CAAC;YACxE,MAAM,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;QAClB,CAAC;QAED,uCAAuC;QACvC,IAAI,QAAQ,CAAC,KAAK,CAAC,WAAW,CAAC,IAAI,IAAI,IAAI,CAAC,IAAI,CAAC,KAAK,CAAC,WAAW,CAAC,IAAI,CAAC,QAAQ,CAAC,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC;YAC7F,MAAM,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;QAClB,CAAC;QAED,6BAA6B;QAC7B,IAAI,IAAI,CAAC,KAAK,CAAC,UAAU,CAAC,IAAI,QAAQ,IAAI,CAAC,QAAQ,CAAC,KAAK,CAAC,UAAU,CAAC,IAAI,CAAC,QAAQ,CAAC,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC;YAC/F,MAAM,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;QAClB,CAAC;IACH,CAAC;IAED,OAAO,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;AAC3B,CAAC;AAED;;;;GAIG;AACH,SAAS,mBAAmB,CAAC,QAAgB;IAC3C,iDAAiD;IACjD,QAAQ,GAAG,QAAQ,CAAC,OAAO,CAAC,SAAS,EAAE,MAAM,CAAC,CAAC;IAE/C,iBAAiB;IACjB,QAAQ,GAAG,QAAQ;SAChB,KAAK,CAAC,IAAI,CAAC;SACX,GAAG,CAAC,IAAI,CAAC,EAAE,CAAC,IAAI,CAAC,IAAI,EAAE,CAAC;SACxB,IAAI,CAAC,IAAI,CAAC,CAAC;IAEd,qCAAqC;IACrC,QAAQ,GAAG,QAAQ,CAAC,IAAI,EAAE,CAAC;IAE3B,OAAO,QAAQ,CAAC;AAClB,CAAC;AAED;;;GAGG;AACH,SAAgB,sBAAsB,CAAC,IAAY;IACjD,IAAI,CAAC,IAAI;QAAE,OAAO,EAAE,CAAC;IAErB,8EAA8E;IAC9E,MAAM,kBAAkB,GAAG;QACzB,0BAA0B;QAC1B,gCAAgC;QAChC,gCAAgC;QAChC,8BAA8B;QAC9B,4BAA4B;QAC5B,sHAAsH;QACtH,mHAAmH;KACpH,CAAC;IAEF,IAAI,OAAO,GAAG,IAAI,CAAC;IACnB,KAAK,MAAM,OAAO,IAAI,kBAAkB,EAAE,CAAC;QACzC,OAAO,GAAG,OAAO,CAAC,OAAO,CAAC,OAAO,EAAE,EAAE,CAAC,CAAC;IACzC,CAAC;IAED,4CAA4C;IAC5C,OAAO,GAAG,OAAO,CAAC,OAAO,CAAC,oBAAoB,EAAE,EAAE,CAAC,CAAC;IACpD,OAAO,GAAG,OAAO,CAAC,OAAO,CAAC,iBAAiB,EAAE,EAAE,CAAC,CAAC;IACjD,OAAO,GAAG,OAAO,CAAC,OAAO,CAAC,yBAAyB,EAAE,EAAE,CAAC,CAAC;IAEzD,OAAO,OAAO,CAAC;AACjB,CAAC;AAED;;;GAGG;AACH,SAAgB,iBAAiB,CAAC,IAAY,EAAE,UAG5C,EAAE;IACJ,MAAM,EACJ,eAAe,GAAG,IAAI,EACtB,eAAe,GAAG,IAAI,GACvB,GAAG,OAAO,CAAC;IAEZ,IAAI,aAAa,GAAG,IAAI,CAAC;IAEzB,iDAAiD;IACjD,IAAI,eAAe,EAAE,CAAC;QACpB,aAAa,GAAG,sBAAsB,CAAC,aAAa,CAAC,CAAC;IACxD,CAAC;IAED,8BAA8B;IAC9B,MAAM,QAAQ,GAAG,cAAc,CAAC,aAAa,CAAC,CAAC;IAE/C,OAAO,QAAQ,CAAC;AAClB,CAAC"}
@@ -0,0 +1,44 @@
1
+ /**
2
+ * Text cleanup utilities
3
+ * Normalize whitespace, remove excessive line breaks, clean HTML entities
4
+ */
5
+ /**
6
+ * Clean text content
7
+ * - Normalize whitespace between paragraphs
8
+ * - Remove excessive line breaks
9
+ * - Decode HTML entities
10
+ * - Trim redundant spaces
11
+ */
12
+ export declare function cleanText(text: string): string;
13
+ /**
14
+ * Decode HTML entities (&nbsp;, &amp;, etc.)
15
+ */
16
+ export declare function decodeHTMLEntities(text: string): string;
17
+ /**
18
+ * Normalize whitespace
19
+ * - Replace multiple spaces with single space
20
+ * - Replace tabs with spaces
21
+ * - Remove trailing/leading whitespace from lines
22
+ */
23
+ export declare function normalizeWhitespace(text: string): string;
24
+ /**
25
+ * Detect paragraph boundaries and add proper spacing
26
+ * Looks for sentence endings followed by capital letters
27
+ */
28
+ export declare function detectParagraphs(text: string): string;
29
+ /**
30
+ * Remove URLs from text
31
+ * Useful for cleaning up citations or references
32
+ */
33
+ export declare function removeUrls(text: string): string;
34
+ /**
35
+ * Truncate text to a maximum length
36
+ * Breaks at word boundaries and adds ellipsis
37
+ */
38
+ export declare function truncateText(text: string, maxLength: number): string;
39
+ /**
40
+ * Extract plain text from HTML
41
+ * Quick and dirty HTML stripping
42
+ */
43
+ export declare function stripHTML(html: string): string;
44
+ //# sourceMappingURL=text-cleaner.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"text-cleaner.d.ts","sourceRoot":"","sources":["../../../lib/formatters/text-cleaner.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAEH;;;;;;GAMG;AACH,wBAAgB,SAAS,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,CAkB9C;AAED;;GAEG;AACH,wBAAgB,kBAAkB,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,CAgCvD;AAED;;;;;GAKG;AACH,wBAAgB,mBAAmB,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,CAiBxD;AAED;;;GAGG;AACH,wBAAgB,gBAAgB,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,CA2BrD;AAED;;;GAGG;AACH,wBAAgB,UAAU,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,CAE/C;AAED;;;GAGG;AACH,wBAAgB,YAAY,CAAC,IAAI,EAAE,MAAM,EAAE,SAAS,EAAE,MAAM,GAAG,MAAM,CAYpE;AAED;;;GAGG;AACH,wBAAgB,SAAS,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,CAO9C"}
@@ -0,0 +1,143 @@
1
+ "use strict";
2
+ /**
3
+ * Text cleanup utilities
4
+ * Normalize whitespace, remove excessive line breaks, clean HTML entities
5
+ */
6
+ Object.defineProperty(exports, "__esModule", { value: true });
7
+ exports.cleanText = cleanText;
8
+ exports.decodeHTMLEntities = decodeHTMLEntities;
9
+ exports.normalizeWhitespace = normalizeWhitespace;
10
+ exports.detectParagraphs = detectParagraphs;
11
+ exports.removeUrls = removeUrls;
12
+ exports.truncateText = truncateText;
13
+ exports.stripHTML = stripHTML;
14
+ /**
15
+ * Clean text content
16
+ * - Normalize whitespace between paragraphs
17
+ * - Remove excessive line breaks
18
+ * - Decode HTML entities
19
+ * - Trim redundant spaces
20
+ */
21
+ function cleanText(text) {
22
+ if (!text)
23
+ return '';
24
+ let cleaned = text;
25
+ // Step 1: Decode HTML entities
26
+ cleaned = decodeHTMLEntities(cleaned);
27
+ // Step 2: Normalize whitespace
28
+ cleaned = normalizeWhitespace(cleaned);
29
+ // Step 3: Smart paragraph detection
30
+ cleaned = detectParagraphs(cleaned);
31
+ // Step 4: Trim
32
+ cleaned = cleaned.trim();
33
+ return cleaned;
34
+ }
35
+ /**
36
+ * Decode HTML entities (&nbsp;, &amp;, etc.)
37
+ */
38
+ function decodeHTMLEntities(text) {
39
+ const entities = {
40
+ '&nbsp;': ' ',
41
+ '&amp;': '&',
42
+ '&lt;': '<',
43
+ '&gt;': '>',
44
+ '&quot;': '"',
45
+ '&#039;': "'",
46
+ '&apos;': "'",
47
+ '&ndash;': '–',
48
+ '&mdash;': '—',
49
+ '&hellip;': '…',
50
+ '&ldquo;': '"',
51
+ '&rdquo;': '"',
52
+ '&lsquo;': '\u2018',
53
+ '&rsquo;': '\u2019',
54
+ };
55
+ let decoded = text;
56
+ for (const [entity, char] of Object.entries(entities)) {
57
+ decoded = decoded.replace(new RegExp(entity, 'g'), char);
58
+ }
59
+ // Handle numeric entities (&#123;, &#x1a2b;)
60
+ decoded = decoded.replace(/&#(\d+);/g, (_, code) => String.fromCharCode(parseInt(code, 10)));
61
+ decoded = decoded.replace(/&#x([0-9a-f]+);/gi, (_, code) => String.fromCharCode(parseInt(code, 16)));
62
+ return decoded;
63
+ }
64
+ /**
65
+ * Normalize whitespace
66
+ * - Replace multiple spaces with single space
67
+ * - Replace tabs with spaces
68
+ * - Remove trailing/leading whitespace from lines
69
+ */
70
+ function normalizeWhitespace(text) {
71
+ // Replace tabs with spaces
72
+ let normalized = text.replace(/\t/g, ' ');
73
+ // Replace multiple spaces with single space (but preserve line breaks)
74
+ normalized = normalized.replace(/ {2,}/g, ' ');
75
+ // Trim each line
76
+ normalized = normalized
77
+ .split('\n')
78
+ .map(line => line.trim())
79
+ .join('\n');
80
+ // Replace 3+ consecutive line breaks with just 2
81
+ normalized = normalized.replace(/\n{3,}/g, '\n\n');
82
+ return normalized;
83
+ }
84
+ /**
85
+ * Detect paragraph boundaries and add proper spacing
86
+ * Looks for sentence endings followed by capital letters
87
+ */
88
+ function detectParagraphs(text) {
89
+ // Split by existing line breaks
90
+ const lines = text.split('\n').filter(line => line.trim().length > 0);
91
+ const result = [];
92
+ for (let i = 0; i < lines.length; i++) {
93
+ const line = lines[i];
94
+ const nextLine = i < lines.length - 1 ? lines[i + 1] : '';
95
+ result.push(line);
96
+ // Add paragraph break if:
97
+ // 1. Current line ends with sentence-ending punctuation (. ! ?)
98
+ // 2. Next line starts with capital letter or number
99
+ // 3. Lines are not too short (likely not a title)
100
+ if (line.match(/[.!?]$/) &&
101
+ nextLine.match(/^[A-Z0-9]/) &&
102
+ line.length > 40 && // Avoid breaking after short lines
103
+ nextLine.length > 20) {
104
+ result.push(''); // Add empty line for paragraph break
105
+ }
106
+ }
107
+ return result.join('\n');
108
+ }
109
+ /**
110
+ * Remove URLs from text
111
+ * Useful for cleaning up citations or references
112
+ */
113
+ function removeUrls(text) {
114
+ return text.replace(/https?:\/\/[^\s]+/g, '');
115
+ }
116
+ /**
117
+ * Truncate text to a maximum length
118
+ * Breaks at word boundaries and adds ellipsis
119
+ */
120
+ function truncateText(text, maxLength) {
121
+ if (text.length <= maxLength)
122
+ return text;
123
+ // Find the last space before maxLength
124
+ const truncated = text.substring(0, maxLength);
125
+ const lastSpace = truncated.lastIndexOf(' ');
126
+ if (lastSpace > 0) {
127
+ return truncated.substring(0, lastSpace) + '…';
128
+ }
129
+ return truncated + '…';
130
+ }
131
+ /**
132
+ * Extract plain text from HTML
133
+ * Quick and dirty HTML stripping
134
+ */
135
+ function stripHTML(html) {
136
+ return html
137
+ .replace(/<script\b[^<]*(?:(?!<\/script>)<[^<]*)*<\/script>/gi, '')
138
+ .replace(/<style\b[^<]*(?:(?!<\/style>)<[^<]*)*<\/style>/gi, '')
139
+ .replace(/<[^>]+>/g, '')
140
+ .replace(/\s+/g, ' ')
141
+ .trim();
142
+ }
143
+ //# sourceMappingURL=text-cleaner.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"text-cleaner.js","sourceRoot":"","sources":["../../../lib/formatters/text-cleaner.ts"],"names":[],"mappings":";AAAA;;;GAGG;;AASH,8BAkBC;AAKD,gDAgCC;AAQD,kDAiBC;AAMD,4CA2BC;AAMD,gCAEC;AAMD,oCAYC;AAMD,8BAOC;AA/JD;;;;;;GAMG;AACH,SAAgB,SAAS,CAAC,IAAY;IACpC,IAAI,CAAC,IAAI;QAAE,OAAO,EAAE,CAAC;IAErB,IAAI,OAAO,GAAG,IAAI,CAAC;IAEnB,+BAA+B;IAC/B,OAAO,GAAG,kBAAkB,CAAC,OAAO,CAAC,CAAC;IAEtC,+BAA+B;IAC/B,OAAO,GAAG,mBAAmB,CAAC,OAAO,CAAC,CAAC;IAEvC,oCAAoC;IACpC,OAAO,GAAG,gBAAgB,CAAC,OAAO,CAAC,CAAC;IAEpC,eAAe;IACf,OAAO,GAAG,OAAO,CAAC,IAAI,EAAE,CAAC;IAEzB,OAAO,OAAO,CAAC;AACjB,CAAC;AAED;;GAEG;AACH,SAAgB,kBAAkB,CAAC,IAAY;IAC7C,MAAM,QAAQ,GAA2B;QACvC,QAAQ,EAAE,GAAG;QACb,OAAO,EAAE,GAAG;QACZ,MAAM,EAAE,GAAG;QACX,MAAM,EAAE,GAAG;QACX,QAAQ,EAAE,GAAG;QACb,QAAQ,EAAE,GAAG;QACb,QAAQ,EAAE,GAAG;QACb,SAAS,EAAE,GAAG;QACd,SAAS,EAAE,GAAG;QACd,UAAU,EAAE,GAAG;QACf,SAAS,EAAE,GAAG;QACd,SAAS,EAAE,GAAG;QACd,SAAS,EAAE,QAAQ;QACnB,SAAS,EAAE,QAAQ;KACpB,CAAC;IAEF,IAAI,OAAO,GAAG,IAAI,CAAC;IACnB,KAAK,MAAM,CAAC,MAAM,EAAE,IAAI,CAAC,IAAI,MAAM,CAAC,OAAO,CAAC,QAAQ,CAAC,EAAE,CAAC;QACtD,OAAO,GAAG,OAAO,CAAC,OAAO,CAAC,IAAI,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,EAAE,IAAI,CAAC,CAAC;IAC3D,CAAC;IAED,6CAA6C;IAC7C,OAAO,GAAG,OAAO,CAAC,OAAO,CAAC,WAAW,EAAE,CAAC,CAAC,EAAE,IAAI,EAAE,EAAE,CACjD,MAAM,CAAC,YAAY,CAAC,QAAQ,CAAC,IAAI,EAAE,EAAE,CAAC,CAAC,CACxC,CAAC;IACF,OAAO,GAAG,OAAO,CAAC,OAAO,CAAC,mBAAmB,EAAE,CAAC,CAAC,EAAE,IAAI,EAAE,EAAE,CACzD,MAAM,CAAC,YAAY,CAAC,QAAQ,CAAC,IAAI,EAAE,EAAE,CAAC,CAAC,CACxC,CAAC;IAEF,OAAO,OAAO,CAAC;AACjB,CAAC;AAED;;;;;GAKG;AACH,SAAgB,mBAAmB,CAAC,IAAY;IAC9C,2BAA2B;IAC3B,IAAI,UAAU,GAAG,IAAI,CAAC,OAAO,CAAC,KAAK,EAAE,GAAG,CAAC,CAAC;IAE1C,uEAAuE;IACvE,UAAU,GAAG,UAAU,CAAC,OAAO,CAAC,QAAQ,EAAE,GAAG,CAAC,CAAC;IAE/C,iBAAiB;IACjB,UAAU,GAAG,UAAU;SACpB,KAAK,CAAC,IAAI,CAAC;SACX,GAAG,CAAC,IAAI,CAAC,EAAE,CAAC,IAAI,CAAC,IAAI,EAAE,CAAC;SACxB,IAAI,CAAC,IAAI,CAAC,CAAC;IAEd,iDAAiD;IACjD,UAAU,GAAG,UAAU,CAAC,OAAO,CAAC,SAAS,EAAE,MAAM,CAAC,CAAC;IAEnD,OAAO,UAAU,CAAC;AACpB,CAAC;AAED;;;GAGG;AACH,SAAgB,gBAAgB,CAAC,IAAY;IAC3C,gCAAgC;IAChC,MAAM,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,MAAM,CAAC,IAAI,CAAC,EAAE,CAAC,IAAI,CAAC,IAAI,EAAE,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;IAEtE,MAAM,MAAM,GAAa,EAAE,CAAC;IAE5B,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QACtC,MAAM,IAAI,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC;QACtB,MAAM,QAAQ,GAAG,CAAC,GAAG,KAAK,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;QAE1D,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QAElB,0BAA0B;QAC1B,gEAAgE;QAChE,oDAAoD;QACpD,kDAAkD;QAClD,IACE,IAAI,CAAC,KAAK,CAAC,QAAQ,CAAC;YACpB,QAAQ,CAAC,KAAK,CAAC,WAAW,CAAC;YAC3B,IAAI,CAAC,MAAM,GAAG,EAAE,IAAI,mCAAmC;YACvD,QAAQ,CAAC,MAAM,GAAG,EAAE,EACpB,CAAC;YACD,MAAM,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC,CAAC,qCAAqC;QACxD,CAAC;IACH,CAAC;IAED,OAAO,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;AAC3B,CAAC;AAED;;;GAGG;AACH,SAAgB,UAAU,CAAC,IAAY;IACrC,OAAO,IAAI,CAAC,OAAO,CAAC,oBAAoB,EAAE,EAAE,CAAC,CAAC;AAChD,CAAC;AAED;;;GAGG;AACH,SAAgB,YAAY,CAAC,IAAY,EAAE,SAAiB;IAC1D,IAAI,IAAI,CAAC,MAAM,IAAI,SAAS;QAAE,OAAO,IAAI,CAAC;IAE1C,uCAAuC;IACvC,MAAM,SAAS,GAAG,IAAI,CAAC,SAAS,CAAC,CAAC,EAAE,SAAS,CAAC,CAAC;IAC/C,MAAM,SAAS,GAAG,SAAS,CAAC,WAAW,CAAC,GAAG,CAAC,CAAC;IAE7C,IAAI,SAAS,GAAG,CAAC,EAAE,CAAC;QAClB,OAAO,SAAS,CAAC,SAAS,CAAC,CAAC,EAAE,SAAS,CAAC,GAAG,GAAG,CAAC;IACjD,CAAC;IAED,OAAO,SAAS,GAAG,GAAG,CAAC;AACzB,CAAC;AAED;;;GAGG;AACH,SAAgB,SAAS,CAAC,IAAY;IACpC,OAAO,IAAI;SACR,OAAO,CAAC,qDAAqD,EAAE,EAAE,CAAC;SAClE,OAAO,CAAC,kDAAkD,EAAE,EAAE,CAAC;SAC/D,OAAO,CAAC,UAAU,EAAE,EAAE,CAAC;SACvB,OAAO,CAAC,MAAM,EAAE,GAAG,CAAC;SACpB,IAAI,EAAE,CAAC;AACZ,CAAC"}