@j0hanz/superfetch 1.0.0 → 1.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (70) hide show
  1. package/dist/errors/app-error.d.ts +0 -25
  2. package/dist/errors/app-error.d.ts.map +1 -1
  3. package/dist/errors/app-error.js +0 -34
  4. package/dist/errors/app-error.js.map +1 -1
  5. package/dist/index.js +86 -21
  6. package/dist/index.js.map +1 -1
  7. package/dist/middleware/error-handler.d.ts.map +1 -1
  8. package/dist/middleware/error-handler.js.map +1 -1
  9. package/dist/middleware/rate-limiter.d.ts +1 -0
  10. package/dist/middleware/rate-limiter.d.ts.map +1 -1
  11. package/dist/middleware/rate-limiter.js +29 -10
  12. package/dist/middleware/rate-limiter.js.map +1 -1
  13. package/dist/services/cache.d.ts +9 -5
  14. package/dist/services/cache.d.ts.map +1 -1
  15. package/dist/services/cache.js +67 -28
  16. package/dist/services/cache.js.map +1 -1
  17. package/dist/services/extractor.d.ts +8 -18
  18. package/dist/services/extractor.d.ts.map +1 -1
  19. package/dist/services/extractor.js +28 -31
  20. package/dist/services/extractor.js.map +1 -1
  21. package/dist/services/fetcher.d.ts +6 -2
  22. package/dist/services/fetcher.d.ts.map +1 -1
  23. package/dist/services/fetcher.js +86 -17
  24. package/dist/services/fetcher.js.map +1 -1
  25. package/dist/services/logger.d.ts +1 -1
  26. package/dist/services/logger.d.ts.map +1 -1
  27. package/dist/services/logger.js +14 -4
  28. package/dist/services/logger.js.map +1 -1
  29. package/dist/services/parser.d.ts +2 -0
  30. package/dist/services/parser.d.ts.map +1 -1
  31. package/dist/services/parser.js +42 -11
  32. package/dist/services/parser.js.map +1 -1
  33. package/dist/tools/handlers/fetch-links.tool.d.ts +8 -4
  34. package/dist/tools/handlers/fetch-links.tool.d.ts.map +1 -1
  35. package/dist/tools/handlers/fetch-links.tool.js +40 -21
  36. package/dist/tools/handlers/fetch-links.tool.js.map +1 -1
  37. package/dist/tools/handlers/fetch-markdown.tool.d.ts +7 -8
  38. package/dist/tools/handlers/fetch-markdown.tool.d.ts.map +1 -1
  39. package/dist/tools/handlers/fetch-markdown.tool.js +48 -36
  40. package/dist/tools/handlers/fetch-markdown.tool.js.map +1 -1
  41. package/dist/tools/handlers/fetch-url.tool.d.ts +9 -8
  42. package/dist/tools/handlers/fetch-url.tool.d.ts.map +1 -1
  43. package/dist/tools/handlers/fetch-url.tool.js +55 -39
  44. package/dist/tools/handlers/fetch-url.tool.js.map +1 -1
  45. package/dist/tools/index.d.ts.map +1 -1
  46. package/dist/tools/index.js +42 -0
  47. package/dist/tools/index.js.map +1 -1
  48. package/dist/transformers/jsonl.transformer.d.ts +0 -1
  49. package/dist/transformers/jsonl.transformer.d.ts.map +1 -1
  50. package/dist/transformers/jsonl.transformer.js +27 -22
  51. package/dist/transformers/jsonl.transformer.js.map +1 -1
  52. package/dist/transformers/markdown.transformer.d.ts +1 -2
  53. package/dist/transformers/markdown.transformer.d.ts.map +1 -1
  54. package/dist/transformers/markdown.transformer.js +20 -63
  55. package/dist/transformers/markdown.transformer.js.map +1 -1
  56. package/dist/types/content.types.d.ts +1 -1
  57. package/dist/types/content.types.d.ts.map +1 -1
  58. package/dist/utils/sanitizer.d.ts +6 -2
  59. package/dist/utils/sanitizer.d.ts.map +1 -1
  60. package/dist/utils/sanitizer.js +13 -5
  61. package/dist/utils/sanitizer.js.map +1 -1
  62. package/dist/utils/tool-error-handler.d.ts +18 -0
  63. package/dist/utils/tool-error-handler.d.ts.map +1 -0
  64. package/dist/utils/tool-error-handler.js +27 -0
  65. package/dist/utils/tool-error-handler.js.map +1 -0
  66. package/dist/utils/url-validator.d.ts +1 -0
  67. package/dist/utils/url-validator.d.ts.map +1 -1
  68. package/dist/utils/url-validator.js +39 -6
  69. package/dist/utils/url-validator.js.map +1 -1
  70. package/package.json +79 -80
@@ -1,32 +1,22 @@
1
1
  import type { ExtractedArticle } from '../types/index.js';
2
- /**
3
- * Metadata extracted from HTML document
4
- */
5
- export interface ExtractedMetadata {
2
+ /** Metadata extracted from HTML document (internal) */
3
+ interface ExtractedMetadata {
6
4
  title?: string;
7
5
  description?: string;
8
6
  author?: string;
9
7
  }
10
- /**
11
- * Combined extraction result from a single JSDOM parse
12
- */
13
- export interface ExtractionResult {
8
+ /** Combined extraction result (internal) */
9
+ interface ExtractionResult {
14
10
  article: ExtractedArticle | null;
15
11
  metadata: ExtractedMetadata;
16
12
  }
17
13
  /**
18
14
  * Extracts both article content and metadata from HTML in a single JSDOM parse.
19
15
  * This is more efficient than calling extractArticle and extractMetadata separately.
16
+ * @param html - HTML string to extract content from
17
+ * @param url - URL of the page (used for resolving relative links)
18
+ * @returns Extraction result with article and metadata
20
19
  */
21
20
  export declare function extractContent(html: string, url: string): ExtractionResult;
22
- /**
23
- * Extracts main article content using Mozilla Readability
24
- * @deprecated Use extractContent() for better performance when you need both article and metadata
25
- */
26
- export declare function extractArticle(html: string, url: string): ExtractedArticle | null;
27
- /**
28
- * Extracts metadata from HTML
29
- * @deprecated Use extractContent() for better performance when you need both article and metadata
30
- */
31
- export declare function extractMetadata(html: string): ExtractedMetadata;
21
+ export {};
32
22
  //# sourceMappingURL=extractor.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"extractor.d.ts","sourceRoot":"","sources":["../../src/services/extractor.ts"],"names":[],"mappings":"AAEA,OAAO,KAAK,EAAE,gBAAgB,EAAE,MAAM,mBAAmB,CAAC;AAG1D;;GAEG;AACH,MAAM,WAAW,iBAAiB;IAChC,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,MAAM,CAAC,EAAE,MAAM,CAAC;CACjB;AAED;;GAEG;AACH,MAAM,WAAW,gBAAgB;IAC/B,OAAO,EAAE,gBAAgB,GAAG,IAAI,CAAC;IACjC,QAAQ,EAAE,iBAAiB,CAAC;CAC7B;AAuDD;;;GAGG;AACH,wBAAgB,cAAc,CAAC,IAAI,EAAE,MAAM,EAAE,GAAG,EAAE,MAAM,GAAG,gBAAgB,CAgB1E;AAED;;;GAGG;AACH,wBAAgB,cAAc,CAAC,IAAI,EAAE,MAAM,EAAE,GAAG,EAAE,MAAM,GAAG,gBAAgB,GAAG,IAAI,CAQjF;AAED;;;GAGG;AACH,wBAAgB,eAAe,CAAC,IAAI,EAAE,MAAM,GAAG,iBAAiB,CAQ/D"}
1
+ {"version":3,"file":"extractor.d.ts","sourceRoot":"","sources":["../../src/services/extractor.ts"],"names":[],"mappings":"AAEA,OAAO,KAAK,EAAE,gBAAgB,EAAE,MAAM,mBAAmB,CAAC;AAM1D,uDAAuD;AACvD,UAAU,iBAAiB;IACzB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,MAAM,CAAC,EAAE,MAAM,CAAC;CACjB;AAED,4CAA4C;AAC5C,UAAU,gBAAgB;IACxB,OAAO,EAAE,gBAAgB,GAAG,IAAI,CAAC;IACjC,QAAQ,EAAE,iBAAiB,CAAC;CAC7B;AA8DD;;;;;;GAMG;AACH,wBAAgB,cAAc,CAAC,IAAI,EAAE,MAAM,EAAE,GAAG,EAAE,MAAM,GAAG,gBAAgB,CAwC1E"}
@@ -1,6 +1,8 @@
1
1
  import { JSDOM } from 'jsdom';
2
2
  import { Readability } from '@mozilla/readability';
3
- import { logError } from './logger.js';
3
+ import { logError, logWarn } from './logger.js';
4
+ // Maximum HTML size to process (10MB)
5
+ const MAX_HTML_SIZE = 10 * 1024 * 1024;
4
6
  function getMetaContent(document, selectors) {
5
7
  for (const selector of selectors) {
6
8
  const content = document.querySelector(selector)?.getAttribute('content');
@@ -16,7 +18,9 @@ function extractMetadataFromDocument(document) {
16
18
  const title = getMetaContent(document, [
17
19
  'meta[property="og:title"]',
18
20
  'meta[name="twitter:title"]',
19
- ]) ?? document.querySelector('title')?.textContent ?? undefined;
21
+ ]) ??
22
+ document.querySelector('title')?.textContent ??
23
+ undefined;
20
24
  const description = getMetaContent(document, [
21
25
  'meta[property="og:description"]',
22
26
  'meta[name="twitter:description"]',
@@ -50,10 +54,31 @@ function extractArticleFromDocument(document) {
50
54
  /**
51
55
  * Extracts both article content and metadata from HTML in a single JSDOM parse.
52
56
  * This is more efficient than calling extractArticle and extractMetadata separately.
57
+ * @param html - HTML string to extract content from
58
+ * @param url - URL of the page (used for resolving relative links)
59
+ * @returns Extraction result with article and metadata
53
60
  */
54
61
  export function extractContent(html, url) {
62
+ // Input validation
63
+ if (!html || typeof html !== 'string') {
64
+ logWarn('extractContent called with invalid HTML input');
65
+ return { article: null, metadata: {} };
66
+ }
67
+ if (!url || typeof url !== 'string') {
68
+ logWarn('extractContent called with invalid URL');
69
+ return { article: null, metadata: {} };
70
+ }
71
+ // Size validation to prevent memory issues
72
+ let processedHtml = html;
73
+ if (html.length > MAX_HTML_SIZE) {
74
+ logWarn('HTML content exceeds maximum size for extraction, truncating', {
75
+ size: html.length,
76
+ maxSize: MAX_HTML_SIZE,
77
+ });
78
+ processedHtml = html.substring(0, MAX_HTML_SIZE);
79
+ }
55
80
  try {
56
- const dom = new JSDOM(html, { url });
81
+ const dom = new JSDOM(processedHtml, { url });
57
82
  const document = dom.window.document;
58
83
  // Extract metadata first (non-destructive)
59
84
  const metadata = extractMetadataFromDocument(document);
@@ -66,32 +91,4 @@ export function extractContent(html, url) {
66
91
  return { article: null, metadata: {} };
67
92
  }
68
93
  }
69
- /**
70
- * Extracts main article content using Mozilla Readability
71
- * @deprecated Use extractContent() for better performance when you need both article and metadata
72
- */
73
- export function extractArticle(html, url) {
74
- try {
75
- const dom = new JSDOM(html, { url });
76
- return extractArticleFromDocument(dom.window.document);
77
- }
78
- catch (error) {
79
- logError('Failed to extract article', error instanceof Error ? error : undefined);
80
- return null;
81
- }
82
- }
83
- /**
84
- * Extracts metadata from HTML
85
- * @deprecated Use extractContent() for better performance when you need both article and metadata
86
- */
87
- export function extractMetadata(html) {
88
- try {
89
- const { document } = new JSDOM(html).window;
90
- return extractMetadataFromDocument(document);
91
- }
92
- catch (error) {
93
- logError('Failed to extract metadata', error instanceof Error ? error : undefined);
94
- return {};
95
- }
96
- }
97
94
  //# sourceMappingURL=extractor.js.map
@@ -1 +1 @@
1
- {"version":3,"file":"extractor.js","sourceRoot":"","sources":["../../src/services/extractor.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,KAAK,EAAE,MAAM,OAAO,CAAC;AAC9B,OAAO,EAAE,WAAW,EAAE,MAAM,sBAAsB,CAAC;AAEnD,OAAO,EAAE,QAAQ,EAAE,MAAM,aAAa,CAAC;AAmBvC,SAAS,cAAc,CAAC,QAAkB,EAAE,SAAmB;IAC7D,KAAK,MAAM,QAAQ,IAAI,SAAS,EAAE,CAAC;QACjC,MAAM,OAAO,GAAG,QAAQ,CAAC,aAAa,CAAC,QAAQ,CAAC,EAAE,YAAY,CAAC,SAAS,CAAC,CAAC;QAC1E,IAAI,OAAO;YAAE,OAAO,OAAO,CAAC;IAC9B,CAAC;IACD,OAAO,SAAS,CAAC;AACnB,CAAC;AAED;;GAEG;AACH,SAAS,2BAA2B,CAAC,QAAkB;IACrD,MAAM,KAAK,GACT,cAAc,CAAC,QAAQ,EAAE;QACvB,2BAA2B;QAC3B,4BAA4B;KAC7B,CAAC,IAAI,QAAQ,CAAC,aAAa,CAAC,OAAO,CAAC,EAAE,WAAW,IAAI,SAAS,CAAC;IAElE,MAAM,WAAW,GAAG,cAAc,CAAC,QAAQ,EAAE;QAC3C,iCAAiC;QACjC,kCAAkC;QAClC,0BAA0B;KAC3B,CAAC,CAAC;IAEH,MAAM,MAAM,GAAG,cAAc,CAAC,QAAQ,EAAE;QACtC,qBAAqB;QACrB,iCAAiC;KAClC,CAAC,CAAC;IAEH,OAAO,EAAE,KAAK,EAAE,WAAW,EAAE,MAAM,EAAE,CAAC;AACxC,CAAC;AAED;;GAEG;AACH,SAAS,0BAA0B,CAAC,QAAkB;IACpD,kDAAkD;IAClD,MAAM,SAAS,GAAG,QAAQ,CAAC,SAAS,CAAC,IAAI,CAAa,CAAC;IACvD,MAAM,MAAM,GAAG,IAAI,WAAW,CAAC,SAAS,CAAC,CAAC;IAC1C,MAAM,OAAO,GAAG,MAAM,CAAC,KAAK,EAAE,CAAC;IAE/B,IAAI,CAAC,OAAO;QAAE,OAAO,IAAI,CAAC;IAE1B,OAAO;QACL,KAAK,EAAE,OAAO,CAAC,KAAK,IAAI,SAAS;QACjC,MAAM,EAAE,OAAO,CAAC,MAAM,IAAI,SAAS;QACnC,OAAO,EAAE,OAAO,CAAC,OAAO,IAAI,EAAE;QAC9B,WAAW,EAAE,OAAO,CAAC,WAAW,IAAI,EAAE;QACtC,OAAO,EAAE,OAAO,CAAC,OAAO,IAAI,SAAS;QACrC,QAAQ,EAAE,OAAO,CAAC,QAAQ,IAAI,SAAS;KACxC,CAAC;AACJ,CAAC;AAED;;;GAGG;AACH,MAAM,UAAU,cAAc,CAAC,IAAY,EAAE,GAAW;IACtD,IAAI,CAAC;QACH,MAAM,GAAG,GAAG,IAAI,KAAK,CAAC,IAAI,EAAE,EAAE,GAAG,EAAE,CAAC,CAAC;QACrC,MAAM,QAAQ,GAAG,GAAG,CAAC,MAAM,CAAC,QAAQ,CAAC;QAErC,2CAA2C;QAC3C,MAAM,QAAQ,GAAG,2BAA2B,CAAC,QAAQ,CAAC,CAAC;QAEvD,mEAAmE;QACnE,MAAM,OAAO,GAAG,0BAA0B,CAAC,QAAQ,CAAC,CAAC;QAErD,OAAO,EAAE,OAAO,EAAE,QAAQ,EAAE,CAAC;IAC/B,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,QAAQ,CAAC,2BAA2B,EAAE,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC;QAClF,OAAO,EAAE,OAAO,EAAE,IAAI,EAAE,QAAQ,EAAE,EAAE,EAAE,CAAC;IACzC,CAAC;AACH,CAAC;AAED;;;GAGG;AACH,MAAM,UAAU,cAAc,CAAC,IAAY,EAAE,GAAW;IACtD,IAAI,CAAC;QACH,MAAM,GAAG,GAAG,IAAI,KAAK,CAAC,IAAI,EAAE,EAAE,GAAG,EAAE,CAAC,CAAC;QACrC,OAAO,0BAA0B,CAAC,GAAG,CAAC,MAAM,CAAC,QAAQ,CAAC,CAAC;IACzD,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,QAAQ,CAAC,2BAA2B,EAAE,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC;QAClF,OAAO,IAAI,CAAC;IACd,CAAC;AACH,CAAC;AAED;;;GAGG;AACH,MAAM,UAAU,eAAe,CAAC,IAAY;IAC1C,IAAI,CAAC;QACH,MAAM,EAAE,QAAQ,EAAE,GAAG,IAAI,KAAK,CAAC,IAAI,CAAC,CAAC,MAAM,CAAC;QAC5C,OAAO,2BAA2B,CAAC,QAAQ,CAAC,CAAC;IAC/C,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,QAAQ,CAAC,4BAA4B,EAAE,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC;QACnF,OAAO,EAAE,CAAC;IACZ,CAAC;AACH,CAAC"}
1
+ {"version":3,"file":"extractor.js","sourceRoot":"","sources":["../../src/services/extractor.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,KAAK,EAAE,MAAM,OAAO,CAAC;AAC9B,OAAO,EAAE,WAAW,EAAE,MAAM,sBAAsB,CAAC;AAEnD,OAAO,EAAE,QAAQ,EAAE,OAAO,EAAE,MAAM,aAAa,CAAC;AAEhD,sCAAsC;AACtC,MAAM,aAAa,GAAG,EAAE,GAAG,IAAI,GAAG,IAAI,CAAC;AAevC,SAAS,cAAc,CACrB,QAAkB,EAClB,SAAmB;IAEnB,KAAK,MAAM,QAAQ,IAAI,SAAS,EAAE,CAAC;QACjC,MAAM,OAAO,GAAG,QAAQ,CAAC,aAAa,CAAC,QAAQ,CAAC,EAAE,YAAY,CAAC,SAAS,CAAC,CAAC;QAC1E,IAAI,OAAO;YAAE,OAAO,OAAO,CAAC;IAC9B,CAAC;IACD,OAAO,SAAS,CAAC;AACnB,CAAC;AAED;;GAEG;AACH,SAAS,2BAA2B,CAAC,QAAkB;IACrD,MAAM,KAAK,GACT,cAAc,CAAC,QAAQ,EAAE;QACvB,2BAA2B;QAC3B,4BAA4B;KAC7B,CAAC;QACF,QAAQ,CAAC,aAAa,CAAC,OAAO,CAAC,EAAE,WAAW;QAC5C,SAAS,CAAC;IAEZ,MAAM,WAAW,GAAG,cAAc,CAAC,QAAQ,EAAE;QAC3C,iCAAiC;QACjC,kCAAkC;QAClC,0BAA0B;KAC3B,CAAC,CAAC;IAEH,MAAM,MAAM,GAAG,cAAc,CAAC,QAAQ,EAAE;QACtC,qBAAqB;QACrB,iCAAiC;KAClC,CAAC,CAAC;IAEH,OAAO,EAAE,KAAK,EAAE,WAAW,EAAE,MAAM,EAAE,CAAC;AACxC,CAAC;AAED;;GAEG;AACH,SAAS,0BAA0B,CACjC,QAAkB;IAElB,kDAAkD;IAClD,MAAM,SAAS,GAAG,QAAQ,CAAC,SAAS,CAAC,IAAI,CAAa,CAAC;IACvD,MAAM,MAAM,GAAG,IAAI,WAAW,CAAC,SAAS,CAAC,CAAC;IAC1C,MAAM,OAAO,GAAG,MAAM,CAAC,KAAK,EAAE,CAAC;IAE/B,IAAI,CAAC,OAAO;QAAE,OAAO,IAAI,CAAC;IAE1B,OAAO;QACL,KAAK,EAAE,OAAO,CAAC,KAAK,IAAI,SAAS;QACjC,MAAM,EAAE,OAAO,CAAC,MAAM,IAAI,SAAS;QACnC,OAAO,EAAE,OAAO,CAAC,OAAO,IAAI,EAAE;QAC9B,WAAW,EAAE,OAAO,CAAC,WAAW,IAAI,EAAE;QACtC,OAAO,EAAE,OAAO,CAAC,OAAO,IAAI,SAAS;QACrC,QAAQ,EAAE,OAAO,CAAC,QAAQ,IAAI,SAAS;KACxC,CAAC;AACJ,CAAC;AAED;;;;;;GAMG;AACH,MAAM,UAAU,cAAc,CAAC,IAAY,EAAE,GAAW;IACtD,mBAAmB;IACnB,IAAI,CAAC,IAAI,IAAI,OAAO,IAAI,KAAK,QAAQ,EAAE,CAAC;QACtC,OAAO,CAAC,+CAA+C,CAAC,CAAC;QACzD,OAAO,EAAE,OAAO,EAAE,IAAI,EAAE,QAAQ,EAAE,EAAE,EAAE,CAAC;IACzC,CAAC;IAED,IAAI,CAAC,GAAG,IAAI,OAAO,GAAG,KAAK,QAAQ,EAAE,CAAC;QACpC,OAAO,CAAC,wCAAwC,CAAC,CAAC;QAClD,OAAO,EAAE,OAAO,EAAE,IAAI,EAAE,QAAQ,EAAE,EAAE,EAAE,CAAC;IACzC,CAAC;IAED,2CAA2C;IAC3C,IAAI,aAAa,GAAG,IAAI,CAAC;IACzB,IAAI,IAAI,CAAC,MAAM,GAAG,aAAa,EAAE,CAAC;QAChC,OAAO,CAAC,8DAA8D,EAAE;YACtE,IAAI,EAAE,IAAI,CAAC,MAAM;YACjB,OAAO,EAAE,aAAa;SACvB,CAAC,CAAC;QACH,aAAa,GAAG,IAAI,CAAC,SAAS,CAAC,CAAC,EAAE,aAAa,CAAC,CAAC;IACnD,CAAC;IAED,IAAI,CAAC;QACH,MAAM,GAAG,GAAG,IAAI,KAAK,CAAC,aAAa,EAAE,EAAE,GAAG,EAAE,CAAC,CAAC;QAC9C,MAAM,QAAQ,GAAG,GAAG,CAAC,MAAM,CAAC,QAAQ,CAAC;QAErC,2CAA2C;QAC3C,MAAM,QAAQ,GAAG,2BAA2B,CAAC,QAAQ,CAAC,CAAC;QAEvD,mEAAmE;QACnE,MAAM,OAAO,GAAG,0BAA0B,CAAC,QAAQ,CAAC,CAAC;QAErD,OAAO,EAAE,OAAO,EAAE,QAAQ,EAAE,CAAC;IAC/B,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,QAAQ,CACN,2BAA2B,EAC3B,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,SAAS,CAC3C,CAAC;QACF,OAAO,EAAE,OAAO,EAAE,IAAI,EAAE,QAAQ,EAAE,EAAE,EAAE,CAAC;IACzC,CAAC;AACH,CAAC"}
@@ -1,9 +1,13 @@
1
1
  /**
2
- * Fetches HTML content from a URL
2
+ * Destroys HTTP agents and closes all sockets
3
+ * Should be called during graceful shutdown
3
4
  */
4
- export declare function fetchUrl(url: string, customHeaders?: Record<string, string>): Promise<string>;
5
+ export declare function destroyAgents(): void;
5
6
  /**
6
7
  * Fetches URL with exponential backoff retry logic
8
+ * @param url - URL to fetch
9
+ * @param customHeaders - Optional custom headers
10
+ * @param maxRetries - Maximum retry attempts (1-10, defaults to 3)
7
11
  */
8
12
  export declare function fetchUrlWithRetry(url: string, customHeaders?: Record<string, string>, maxRetries?: number): Promise<string>;
9
13
  //# sourceMappingURL=fetcher.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"fetcher.d.ts","sourceRoot":"","sources":["../../src/services/fetcher.ts"],"names":[],"mappings":"AAgDA;;GAEG;AACH,wBAAsB,QAAQ,CAC5B,GAAG,EAAE,MAAM,EACX,aAAa,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,GACrC,OAAO,CAAC,MAAM,CAAC,CAyCjB;AAED;;GAEG;AACH,wBAAsB,iBAAiB,CACrC,GAAG,EAAE,MAAM,EACX,aAAa,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,EACtC,UAAU,SAAI,GACb,OAAO,CAAC,MAAM,CAAC,CA4BjB"}
1
+ {"version":3,"file":"fetcher.d.ts","sourceRoot":"","sources":["../../src/services/fetcher.ts"],"names":[],"mappings":"AAyCA;;;GAGG;AACH,wBAAgB,aAAa,IAAI,IAAI,CAGpC;AAqID;;;;;GAKG;AACH,wBAAsB,iBAAiB,CACrC,GAAG,EAAE,MAAM,EACX,aAAa,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,EACtC,UAAU,SAAI,GACb,OAAO,CAAC,MAAM,CAAC,CA8BjB"}
@@ -1,6 +1,9 @@
1
1
  import axios from 'axios';
2
+ import http from 'http';
3
+ import https from 'https';
2
4
  import { config } from '../config/index.js';
3
5
  import { FetchError, TimeoutError } from '../errors/app-error.js';
6
+ import { logDebug, logError } from './logger.js';
4
7
  const BLOCKED_HEADERS = new Set([
5
8
  'host',
6
9
  'authorization',
@@ -25,10 +28,23 @@ function calculateBackoff(attempt, maxDelay = 10000) {
25
28
  const jitter = baseDelay * 0.25 * (Math.random() * 2 - 1);
26
29
  return Math.round(baseDelay + jitter);
27
30
  }
31
+ // HTTP/HTTPS agents with connection pooling for better performance
32
+ const httpAgent = new http.Agent({ keepAlive: true, maxSockets: 25 });
33
+ const httpsAgent = new https.Agent({ keepAlive: true, maxSockets: 25 });
34
+ /**
35
+ * Destroys HTTP agents and closes all sockets
36
+ * Should be called during graceful shutdown
37
+ */
38
+ export function destroyAgents() {
39
+ httpAgent.destroy();
40
+ httpsAgent.destroy();
41
+ }
28
42
  const client = axios.create({
29
43
  timeout: config.fetcher.timeout,
30
44
  maxRedirects: config.fetcher.maxRedirects,
31
45
  maxContentLength: config.fetcher.maxContentLength,
46
+ httpAgent,
47
+ httpsAgent,
32
48
  headers: {
33
49
  'User-Agent': config.fetcher.userAgent,
34
50
  Accept: 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
@@ -38,10 +54,51 @@ const client = axios.create({
38
54
  },
39
55
  validateStatus: (status) => status >= 200 && status < 300,
40
56
  });
57
+ // Request interceptor for logging and request enhancement
58
+ client.interceptors.request.use((requestConfig) => {
59
+ logDebug('HTTP Request', {
60
+ method: requestConfig.method?.toUpperCase(),
61
+ url: requestConfig.url,
62
+ });
63
+ return requestConfig;
64
+ }, (error) => {
65
+ logError('HTTP Request Error', error);
66
+ return Promise.reject(error);
67
+ });
68
+ // Response interceptor for logging and consistent error transformation
69
+ client.interceptors.response.use((response) => {
70
+ logDebug('HTTP Response', {
71
+ status: response.status,
72
+ url: response.config.url,
73
+ contentType: response.headers['content-type'],
74
+ });
75
+ return response;
76
+ }, (error) => {
77
+ const url = error.config?.url ?? 'unknown';
78
+ // Transform Axios errors to application errors
79
+ if (error.code === 'ECONNABORTED' || error.code === 'ETIMEDOUT') {
80
+ logError('HTTP Timeout', { url, timeout: config.fetcher.timeout });
81
+ return Promise.reject(new TimeoutError(config.fetcher.timeout, true));
82
+ }
83
+ if (error.response) {
84
+ const status = error.response.status;
85
+ const statusText = error.response.statusText;
86
+ logError('HTTP Error Response', { url, status, statusText });
87
+ return Promise.reject(new FetchError(`HTTP ${status}: ${statusText}`, url, status));
88
+ }
89
+ if (error.request) {
90
+ logError('HTTP Network Error', { url, code: error.code });
91
+ return Promise.reject(new FetchError(`Network error: Could not reach ${url}`, url));
92
+ }
93
+ logError('HTTP Unknown Error', { url, message: error.message });
94
+ return Promise.reject(new FetchError(error.message, url));
95
+ });
41
96
  /**
42
- * Fetches HTML content from a URL
97
+ * Fetches HTML content from a URL (internal - use fetchUrlWithRetry for retry logic)
98
+ * @throws {FetchError} if request fails or returns non-HTML content
99
+ * @throws {TimeoutError} if request times out
43
100
  */
44
- export async function fetchUrl(url, customHeaders) {
101
+ async function fetchUrl(url, customHeaders) {
45
102
  const requestConfig = {
46
103
  method: 'GET',
47
104
  url,
@@ -53,30 +110,42 @@ export async function fetchUrl(url, customHeaders) {
53
110
  }
54
111
  try {
55
112
  const response = await client.request(requestConfig);
113
+ // Validate content type is HTML/text
114
+ const contentType = response.headers['content-type'];
115
+ if (contentType && !isHtmlContentType(contentType)) {
116
+ throw new FetchError(`Unexpected content type: ${contentType}. Expected HTML content.`, url);
117
+ }
56
118
  return response.data;
57
119
  }
58
120
  catch (error) {
59
- if (!axios.isAxiosError(error)) {
60
- throw new FetchError(`Unexpected error: ${error instanceof Error ? error.message : 'Unknown'}`, url);
61
- }
62
- if (error.code === 'ECONNABORTED' || error.code === 'ETIMEDOUT') {
63
- throw new TimeoutError(config.fetcher.timeout, true);
64
- }
65
- if (error.response) {
66
- throw new FetchError(`HTTP ${error.response.status}: ${error.response.statusText}`, url, error.response.status);
67
- }
68
- if (error.request) {
69
- throw new FetchError(`Network error: Could not reach ${url}`, url);
121
+ // Re-throw our custom errors (from interceptors or content-type check)
122
+ if (error instanceof FetchError || error instanceof TimeoutError) {
123
+ throw error;
70
124
  }
71
- throw new FetchError(error.message, url);
125
+ // Handle any unexpected errors
126
+ throw new FetchError(`Unexpected error: ${error instanceof Error ? error.message : 'Unknown'}`, url);
72
127
  }
73
128
  }
129
+ /**
130
+ * Checks if content type indicates HTML content
131
+ */
132
+ function isHtmlContentType(contentType) {
133
+ const normalized = contentType.toLowerCase();
134
+ return (normalized.includes('text/html') ||
135
+ normalized.includes('application/xhtml') ||
136
+ normalized.includes('text/plain'));
137
+ }
74
138
  /**
75
139
  * Fetches URL with exponential backoff retry logic
140
+ * @param url - URL to fetch
141
+ * @param customHeaders - Optional custom headers
142
+ * @param maxRetries - Maximum retry attempts (1-10, defaults to 3)
76
143
  */
77
144
  export async function fetchUrlWithRetry(url, customHeaders, maxRetries = 3) {
145
+ // Validate maxRetries within bounds
146
+ const retries = Math.min(Math.max(1, maxRetries), 10);
78
147
  let lastError;
79
- for (let attempt = 1; attempt <= maxRetries; attempt++) {
148
+ for (let attempt = 1; attempt <= retries; attempt++) {
80
149
  try {
81
150
  return await fetchUrl(url, customHeaders);
82
151
  }
@@ -89,12 +158,12 @@ export async function fetchUrlWithRetry(url, customHeaders, maxRetries = 3) {
89
158
  throw error;
90
159
  }
91
160
  }
92
- if (attempt < maxRetries) {
161
+ if (attempt < retries) {
93
162
  const delay = calculateBackoff(attempt);
94
163
  await new Promise((resolve) => setTimeout(resolve, delay));
95
164
  }
96
165
  }
97
166
  }
98
- throw new FetchError(`Failed after ${maxRetries} attempts: ${lastError?.message ?? 'Unknown error'}`, url);
167
+ throw new FetchError(`Failed after ${retries} attempts: ${lastError?.message ?? 'Unknown error'}`, url);
99
168
  }
100
169
  //# sourceMappingURL=fetcher.js.map
@@ -1 +1 @@
1
- {"version":3,"file":"fetcher.js","sourceRoot":"","sources":["../../src/services/fetcher.ts"],"names":[],"mappings":"AAAA,OAAO,KAA6B,MAAM,OAAO,CAAC;AAClD,OAAO,EAAE,MAAM,EAAE,MAAM,oBAAoB,CAAC;AAC5C,OAAO,EAAE,UAAU,EAAE,YAAY,EAAE,MAAM,wBAAwB,CAAC;AAElE,MAAM,eAAe,GAAG,IAAI,GAAG,CAAC;IAC9B,MAAM;IACN,eAAe;IACf,QAAQ;IACR,iBAAiB;IACjB,WAAW;IACX,qBAAqB;CACtB,CAAC,CAAC;AAEH,SAAS,eAAe,CACtB,OAAgC;IAEhC,IAAI,CAAC,OAAO,IAAI,MAAM,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,SAAS,CAAC;IAEpE,MAAM,SAAS,GAA2B,EAAE,CAAC;IAC7C,KAAK,MAAM,CAAC,GAAG,EAAE,KAAK,CAAC,IAAI,MAAM,CAAC,OAAO,CAAC,OAAO,CAAC,EAAE,CAAC;QACnD,IAAI,CAAC,eAAe,CAAC,GAAG,CAAC,GAAG,CAAC,WAAW,EAAE,CAAC,EAAE,CAAC;YAC5C,SAAS,CAAC,GAAG,CAAC,GAAG,KAAK,CAAC;QACzB,CAAC;IACH,CAAC;IAED,OAAO,MAAM,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,SAAS,CAAC;AACnE,CAAC;AAED,SAAS,gBAAgB,CAAC,OAAe,EAAE,QAAQ,GAAG,KAAK;IACzD,MAAM,SAAS,GAAG,IAAI,CAAC,GAAG,CAAC,IAAI,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,OAAO,GAAG,CAAC,CAAC,EAAE,QAAQ,CAAC,CAAC;IACtE,MAAM,MAAM,GAAG,SAAS,GAAG,IAAI,GAAG,CAAC,IAAI,CAAC,MAAM,EAAE,GAAG,CAAC,GAAG,CAAC,CAAC,CAAC;IAC1D,OAAO,IAAI,CAAC,KAAK,CAAC,SAAS,GAAG,MAAM,CAAC,CAAC;AACxC,CAAC;AAED,MAAM,MAAM,GAAG,KAAK,CAAC,MAAM,CAAC;IAC1B,OAAO,EAAE,MAAM,CAAC,OAAO,CAAC,OAAO;IAC/B,YAAY,EAAE,MAAM,CAAC,OAAO,CAAC,YAAY;IACzC,gBAAgB,EAAE,MAAM,CAAC,OAAO,CAAC,gBAAgB;IACjD,OAAO,EAAE;QACP,YAAY,EAAE,MAAM,CAAC,OAAO,CAAC,SAAS;QACtC,MAAM,EAAE,4EAA4E;QACpF,iBAAiB,EAAE,gBAAgB;QACnC,iBAAiB,EAAE,mBAAmB;QACtC,UAAU,EAAE,YAAY;KACzB;IACD,cAAc,EAAE,CAAC,MAAM,EAAE,EAAE,CAAC,MAAM,IAAI,GAAG,IAAI,MAAM,GAAG,GAAG;CAC1D,CAAC,CAAC;AAEH;;GAEG;AACH,MAAM,CAAC,KAAK,UAAU,QAAQ,CAC5B,GAAW,EACX,aAAsC;IAEtC,MAAM,aAAa,GAAuB;QACxC,MAAM,EAAE,KAAK;QACb,GAAG;QACH,YAAY,EAAE,MAAM;KACrB,CAAC;IAEF,MAAM,SAAS,GAAG,eAAe,CAAC,aAAa,CAAC,CAAC;IACjD,IAAI,SAAS,EAAE,CAAC;QACd,aAAa,CAAC,OAAO,GAAG,EAAE,GAAG,aAAa,CAAC,OAAO,EAAE,GAAG,SAAS,EAAE,CAAC;IACrE,CAAC;IAED,IAAI,CAAC;QACH,MAAM,QAAQ,GAAG,MAAM,MAAM,CAAC,OAAO,CAAS,aAAa,CAAC,CAAC;QAC7D,OAAO,QAAQ,CAAC,IAAI,CAAC;IACvB,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,IAAI,CAAC,KAAK,CAAC,YAAY,CAAC,KAAK,CAAC,EAAE,CAAC;YAC/B,MAAM,IAAI,UAAU,CAClB,qBAAqB,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,SAAS,EAAE,EACzE,GAAG,CACJ,CAAC;QACJ,CAAC;QAED,IAAI,KAAK,CAAC,IAAI,KAAK,cAAc,IAAI,KAAK,CAAC,IAAI,KAAK,WAAW,EAAE,CAAC;YAChE,MAAM,IAAI,YAAY,CAAC,MAAM,CAAC,OAAO,CAAC,OAAO,EAAE,IAAI,CAAC,CAAC;QACvD,CAAC;QAED,IAAI,KAAK,CAAC,QAAQ,EAAE,CAAC;YACnB,MAAM,IAAI,UAAU,CAClB,QAAQ,KAAK,CAAC,QAAQ,CAAC,MAAM,KAAK,KAAK,CAAC,QAAQ,CAAC,UAAU,EAAE,EAC7D,GAAG,EACH,KAAK,CAAC,QAAQ,CAAC,MAAM,CACtB,CAAC;QACJ,CAAC;QAED,IAAI,KAAK,CAAC,OAAO,EAAE,CAAC;YAClB,MAAM,IAAI,UAAU,CAAC,kCAAkC,GAAG,EAAE,EAAE,GAAG,CAAC,CAAC;QACrE,CAAC;QAED,MAAM,IAAI,UAAU,CAAC,KAAK,CAAC,OAAO,EAAE,GAAG,CAAC,CAAC;IAC3C,CAAC;AACH,CAAC;AAED;;GAEG;AACH,MAAM,CAAC,KAAK,UAAU,iBAAiB,CACrC,GAAW,EACX,aAAsC,EACtC,UAAU,GAAG,CAAC;IAEd,IAAI,SAA4B,CAAC;IAEjC,KAAK,IAAI,OAAO,GAAG,CAAC,EAAE,OAAO,IAAI,UAAU,EAAE,OAAO,EAAE,EAAE,CAAC;QACvD,IAAI,CAAC;YACH,OAAO,MAAM,QAAQ,CAAC,GAAG,EAAE,aAAa,CAAC,CAAC;QAC5C,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,SAAS,GAAG,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,IAAI,KAAK,CAAC,eAAe,CAAC,CAAC;YAExE,+DAA+D;YAC/D,IAAI,KAAK,YAAY,UAAU,IAAI,KAAK,CAAC,UAAU,EAAE,CAAC;gBACpD,MAAM,MAAM,GAAG,KAAK,CAAC,UAAU,CAAC;gBAChC,IAAI,MAAM,IAAI,GAAG,IAAI,MAAM,GAAG,GAAG,IAAI,MAAM,KAAK,GAAG,EAAE,CAAC;oBACpD,MAAM,KAAK,CAAC;gBACd,CAAC;YACH,CAAC;YAED,IAAI,OAAO,GAAG,UAAU,EAAE,CAAC;gBACzB,MAAM,KAAK,GAAG,gBAAgB,CAAC,OAAO,CAAC,CAAC;gBACxC,MAAM,IAAI,OAAO,CAAC,CAAC,OAAO,EAAE,EAAE,CAAC,UAAU,CAAC,OAAO,EAAE,KAAK,CAAC,CAAC,CAAC;YAC7D,CAAC;QACH,CAAC;IACH,CAAC;IAED,MAAM,IAAI,UAAU,CAClB,gBAAgB,UAAU,cAAc,SAAS,EAAE,OAAO,IAAI,eAAe,EAAE,EAC/E,GAAG,CACJ,CAAC;AACJ,CAAC"}
1
+ {"version":3,"file":"fetcher.js","sourceRoot":"","sources":["../../src/services/fetcher.ts"],"names":[],"mappings":"AAAA,OAAO,KAAyC,MAAM,OAAO,CAAC;AAC9D,OAAO,IAAI,MAAM,MAAM,CAAC;AACxB,OAAO,KAAK,MAAM,OAAO,CAAC;AAC1B,OAAO,EAAE,MAAM,EAAE,MAAM,oBAAoB,CAAC;AAC5C,OAAO,EAAE,UAAU,EAAE,YAAY,EAAE,MAAM,wBAAwB,CAAC;AAClE,OAAO,EAAE,QAAQ,EAAE,QAAQ,EAAE,MAAM,aAAa,CAAC;AAEjD,MAAM,eAAe,GAAG,IAAI,GAAG,CAAC;IAC9B,MAAM;IACN,eAAe;IACf,QAAQ;IACR,iBAAiB;IACjB,WAAW;IACX,qBAAqB;CACtB,CAAC,CAAC;AAEH,SAAS,eAAe,CACtB,OAAgC;IAEhC,IAAI,CAAC,OAAO,IAAI,MAAM,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,SAAS,CAAC;IAEpE,MAAM,SAAS,GAA2B,EAAE,CAAC;IAC7C,KAAK,MAAM,CAAC,GAAG,EAAE,KAAK,CAAC,IAAI,MAAM,CAAC,OAAO,CAAC,OAAO,CAAC,EAAE,CAAC;QACnD,IAAI,CAAC,eAAe,CAAC,GAAG,CAAC,GAAG,CAAC,WAAW,EAAE,CAAC,EAAE,CAAC;YAC5C,SAAS,CAAC,GAAG,CAAC,GAAG,KAAK,CAAC;QACzB,CAAC;IACH,CAAC;IAED,OAAO,MAAM,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,SAAS,CAAC;AACnE,CAAC;AAED,SAAS,gBAAgB,CAAC,OAAe,EAAE,QAAQ,GAAG,KAAK;IACzD,MAAM,SAAS,GAAG,IAAI,CAAC,GAAG,CAAC,IAAI,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,OAAO,GAAG,CAAC,CAAC,EAAE,QAAQ,CAAC,CAAC;IACtE,MAAM,MAAM,GAAG,SAAS,GAAG,IAAI,GAAG,CAAC,IAAI,CAAC,MAAM,EAAE,GAAG,CAAC,GAAG,CAAC,CAAC,CAAC;IAC1D,OAAO,IAAI,CAAC,KAAK,CAAC,SAAS,GAAG,MAAM,CAAC,CAAC;AACxC,CAAC;AAED,mEAAmE;AACnE,MAAM,SAAS,GAAG,IAAI,IAAI,CAAC,KAAK,CAAC,EAAE,SAAS,EAAE,IAAI,EAAE,UAAU,EAAE,EAAE,EAAE,CAAC,CAAC;AACtE,MAAM,UAAU,GAAG,IAAI,KAAK,CAAC,KAAK,CAAC,EAAE,SAAS,EAAE,IAAI,EAAE,UAAU,EAAE,EAAE,EAAE,CAAC,CAAC;AAExE;;;GAGG;AACH,MAAM,UAAU,aAAa;IAC3B,SAAS,CAAC,OAAO,EAAE,CAAC;IACpB,UAAU,CAAC,OAAO,EAAE,CAAC;AACvB,CAAC;AAED,MAAM,MAAM,GAAG,KAAK,CAAC,MAAM,CAAC;IAC1B,OAAO,EAAE,MAAM,CAAC,OAAO,CAAC,OAAO;IAC/B,YAAY,EAAE,MAAM,CAAC,OAAO,CAAC,YAAY;IACzC,gBAAgB,EAAE,MAAM,CAAC,OAAO,CAAC,gBAAgB;IACjD,SAAS;IACT,UAAU;IACV,OAAO,EAAE;QACP,YAAY,EAAE,MAAM,CAAC,OAAO,CAAC,SAAS;QACtC,MAAM,EACJ,4EAA4E;QAC9E,iBAAiB,EAAE,gBAAgB;QACnC,iBAAiB,EAAE,mBAAmB;QACtC,UAAU,EAAE,YAAY;KACzB;IACD,cAAc,EAAE,CAAC,MAAM,EAAE,EAAE,CAAC,MAAM,IAAI,GAAG,IAAI,MAAM,GAAG,GAAG;CAC1D,CAAC,CAAC;AAEH,0DAA0D;AAC1D,MAAM,CAAC,YAAY,CAAC,OAAO,CAAC,GAAG,CAC7B,CAAC,aAAa,EAAE,EAAE;IAChB,QAAQ,CAAC,cAAc,EAAE;QACvB,MAAM,EAAE,aAAa,CAAC,MAAM,EAAE,WAAW,EAAE;QAC3C,GAAG,EAAE,aAAa,CAAC,GAAG;KACvB,CAAC,CAAC;IACH,OAAO,aAAa,CAAC;AACvB,CAAC,EACD,CAAC,KAAiB,EAAE,EAAE;IACpB,QAAQ,CAAC,oBAAoB,EAAE,KAAK,CAAC,CAAC;IACtC,OAAO,OAAO,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC;AAC/B,CAAC,CACF,CAAC;AAEF,uEAAuE;AACvE,MAAM,CAAC,YAAY,CAAC,QAAQ,CAAC,GAAG,CAC9B,CAAC,QAAQ,EAAE,EAAE;IACX,QAAQ,CAAC,eAAe,EAAE;QACxB,MAAM,EAAE,QAAQ,CAAC,MAAM;QACvB,GAAG,EAAE,QAAQ,CAAC,MAAM,CAAC,GAAG;QACxB,WAAW,EAAE,QAAQ,CAAC,OAAO,CAAC,cAAc,CAAC;KAC9C,CAAC,CAAC;IACH,OAAO,QAAQ,CAAC;AAClB,CAAC,EACD,CAAC,KAAiB,EAAE,EAAE;IACpB,MAAM,GAAG,GAAG,KAAK,CAAC,MAAM,EAAE,GAAG,IAAI,SAAS,CAAC;IAE3C,+CAA+C;IAC/C,IAAI,KAAK,CAAC,IAAI,KAAK,cAAc,IAAI,KAAK,CAAC,IAAI,KAAK,WAAW,EAAE,CAAC;QAChE,QAAQ,CAAC,cAAc,EAAE,EAAE,GAAG,EAAE,OAAO,EAAE,MAAM,CAAC,OAAO,CAAC,OAAO,EAAE,CAAC,CAAC;QACnE,OAAO,OAAO,CAAC,MAAM,CAAC,IAAI,YAAY,CAAC,MAAM,CAAC,OAAO,CAAC,OAAO,EAAE,IAAI,CAAC,CAAC,CAAC;IACxE,CAAC;IAED,IAAI,KAAK,CAAC,QAAQ,EAAE,CAAC;QACnB,MAAM,MAAM,GAAG,KAAK,CAAC,QAAQ,CAAC,MAAM,CAAC;QACrC,MAAM,UAAU,GAAG,KAAK,CAAC,QAAQ,CAAC,UAAU,CAAC;QAC7C,QAAQ,CAAC,qBAAqB,EAAE,EAAE,GAAG,EAAE,MAAM,EAAE,UAAU,EAAE,CAAC,CAAC;QAC7D,OAAO,OAAO,CAAC,MAAM,CACnB,IAAI,UAAU,CAAC,QAAQ,MAAM,KAAK,UAAU,EAAE,EAAE,GAAG,EAAE,MAAM,CAAC,CAC7D,CAAC;IACJ,CAAC;IAED,IAAI,KAAK,CAAC,OAAO,EAAE,CAAC;QAClB,QAAQ,CAAC,oBAAoB,EAAE,EAAE,GAAG,EAAE,IAAI,EAAE,KAAK,CAAC,IAAI,EAAE,CAAC,CAAC;QAC1D,OAAO,OAAO,CAAC,MAAM,CACnB,IAAI,UAAU,CAAC,kCAAkC,GAAG,EAAE,EAAE,GAAG,CAAC,CAC7D,CAAC;IACJ,CAAC;IAED,QAAQ,CAAC,oBAAoB,EAAE,EAAE,GAAG,EAAE,OAAO,EAAE,KAAK,CAAC,OAAO,EAAE,CAAC,CAAC;IAChE,OAAO,OAAO,CAAC,MAAM,CAAC,IAAI,UAAU,CAAC,KAAK,CAAC,OAAO,EAAE,GAAG,CAAC,CAAC,CAAC;AAC5D,CAAC,CACF,CAAC;AAEF;;;;GAIG;AACH,KAAK,UAAU,QAAQ,CACrB,GAAW,EACX,aAAsC;IAEtC,MAAM,aAAa,GAAuB;QACxC,MAAM,EAAE,KAAK;QACb,GAAG;QACH,YAAY,EAAE,MAAM;KACrB,CAAC;IAEF,MAAM,SAAS,GAAG,eAAe,CAAC,aAAa,CAAC,CAAC;IACjD,IAAI,SAAS,EAAE,CAAC;QACd,aAAa,CAAC,OAAO,GAAG,EAAE,GAAG,aAAa,CAAC,OAAO,EAAE,GAAG,SAAS,EAAE,CAAC;IACrE,CAAC;IAED,IAAI,CAAC;QACH,MAAM,QAAQ,GAAG,MAAM,MAAM,CAAC,OAAO,CAAS,aAAa,CAAC,CAAC;QAE7D,qCAAqC;QACrC,MAAM,WAAW,GAAG,QAAQ,CAAC,OAAO,CAAC,cAAc,CAAuB,CAAC;QAC3E,IAAI,WAAW,IAAI,CAAC,iBAAiB,CAAC,WAAW,CAAC,EAAE,CAAC;YACnD,MAAM,IAAI,UAAU,CAClB,4BAA4B,WAAW,0BAA0B,EACjE,GAAG,CACJ,CAAC;QACJ,CAAC;QAED,OAAO,QAAQ,CAAC,IAAI,CAAC;IACvB,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,uEAAuE;QACvE,IAAI,KAAK,YAAY,UAAU,IAAI,KAAK,YAAY,YAAY,EAAE,CAAC;YACjE,MAAM,KAAK,CAAC;QACd,CAAC;QAED,+BAA+B;QAC/B,MAAM,IAAI,UAAU,CAClB,qBAAqB,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,SAAS,EAAE,EACzE,GAAG,CACJ,CAAC;IACJ,CAAC;AACH,CAAC;AAED;;GAEG;AACH,SAAS,iBAAiB,CAAC,WAAmB;IAC5C,MAAM,UAAU,GAAG,WAAW,CAAC,WAAW,EAAE,CAAC;IAC7C,OAAO,CACL,UAAU,CAAC,QAAQ,CAAC,WAAW,CAAC;QAChC,UAAU,CAAC,QAAQ,CAAC,mBAAmB,CAAC;QACxC,UAAU,CAAC,QAAQ,CAAC,YAAY,CAAC,CAClC,CAAC;AACJ,CAAC;AAED;;;;;GAKG;AACH,MAAM,CAAC,KAAK,UAAU,iBAAiB,CACrC,GAAW,EACX,aAAsC,EACtC,UAAU,GAAG,CAAC;IAEd,oCAAoC;IACpC,MAAM,OAAO,GAAG,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,UAAU,CAAC,EAAE,EAAE,CAAC,CAAC;IACtD,IAAI,SAA4B,CAAC;IAEjC,KAAK,IAAI,OAAO,GAAG,CAAC,EAAE,OAAO,IAAI,OAAO,EAAE,OAAO,EAAE,EAAE,CAAC;QACpD,IAAI,CAAC;YACH,OAAO,MAAM,QAAQ,CAAC,GAAG,EAAE,aAAa,CAAC,CAAC;QAC5C,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,SAAS,GAAG,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,IAAI,KAAK,CAAC,eAAe,CAAC,CAAC;YAExE,+DAA+D;YAC/D,IAAI,KAAK,YAAY,UAAU,IAAI,KAAK,CAAC,UAAU,EAAE,CAAC;gBACpD,MAAM,MAAM,GAAG,KAAK,CAAC,UAAU,CAAC;gBAChC,IAAI,MAAM,IAAI,GAAG,IAAI,MAAM,GAAG,GAAG,IAAI,MAAM,KAAK,GAAG,EAAE,CAAC;oBACpD,MAAM,KAAK,CAAC;gBACd,CAAC;YACH,CAAC;YAED,IAAI,OAAO,GAAG,OAAO,EAAE,CAAC;gBACtB,MAAM,KAAK,GAAG,gBAAgB,CAAC,OAAO,CAAC,CAAC;gBACxC,MAAM,IAAI,OAAO,CAAC,CAAC,OAAO,EAAE,EAAE,CAAC,UAAU,CAAC,OAAO,EAAE,KAAK,CAAC,CAAC,CAAC;YAC7D,CAAC;QACH,CAAC;IACH,CAAC;IAED,MAAM,IAAI,UAAU,CAClB,gBAAgB,OAAO,cAAc,SAAS,EAAE,OAAO,IAAI,eAAe,EAAE,EAC5E,GAAG,CACJ,CAAC;AACJ,CAAC"}
@@ -1,5 +1,5 @@
1
1
  export declare function logInfo(message: string, meta?: Record<string, unknown>): void;
2
- export declare function logWarn(message: string, meta?: Record<string, unknown>): void;
3
2
  export declare function logDebug(message: string, meta?: Record<string, unknown>): void;
3
+ export declare function logWarn(message: string, meta?: Record<string, unknown>): void;
4
4
  export declare function logError(message: string, error?: Error | Record<string, unknown>): void;
5
5
  //# sourceMappingURL=logger.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"logger.d.ts","sourceRoot":"","sources":["../../src/services/logger.ts"],"names":[],"mappings":"AAyCA,wBAAgB,OAAO,CAAC,OAAO,EAAE,MAAM,EAAE,IAAI,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,GAAG,IAAI,CAE7E;AAED,wBAAgB,OAAO,CAAC,OAAO,EAAE,MAAM,EAAE,IAAI,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,GAAG,IAAI,CAE7E;AAED,wBAAgB,QAAQ,CAAC,OAAO,EAAE,MAAM,EAAE,IAAI,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,GAAG,IAAI,CAE9E;AAED,wBAAgB,QAAQ,CACtB,OAAO,EAAE,MAAM,EACf,KAAK,CAAC,EAAE,KAAK,GAAG,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,GACtC,IAAI,CAQN"}
1
+ {"version":3,"file":"logger.d.ts","sourceRoot":"","sources":["../../src/services/logger.ts"],"names":[],"mappings":"AAmDA,wBAAgB,OAAO,CAAC,OAAO,EAAE,MAAM,EAAE,IAAI,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,GAAG,IAAI,CAE7E;AAED,wBAAgB,QAAQ,CACtB,OAAO,EAAE,MAAM,EACf,IAAI,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,GAC7B,IAAI,CAEN;AAED,wBAAgB,OAAO,CAAC,OAAO,EAAE,MAAM,EAAE,IAAI,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,GAAG,IAAI,CAE7E;AAED,wBAAgB,QAAQ,CACtB,OAAO,EAAE,MAAM,EACf,KAAK,CAAC,EAAE,KAAK,GAAG,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,GACtC,IAAI,CAQN"}
@@ -1,7 +1,17 @@
1
1
  import winston from 'winston';
2
+ import fs from 'fs';
2
3
  import { config } from '../config/index.js';
3
4
  import path from 'path';
4
5
  const logsDir = path.join(process.cwd(), 'logs');
6
+ // Ensure logs directory exists
7
+ try {
8
+ if (!fs.existsSync(logsDir)) {
9
+ fs.mkdirSync(logsDir, { recursive: true });
10
+ }
11
+ }
12
+ catch {
13
+ // If we can't create logs dir, file transports will fail gracefully
14
+ }
5
15
  const logger = winston.createLogger({
6
16
  level: config.logging.level,
7
17
  format: winston.format.combine(winston.format.timestamp({ format: 'YYYY-MM-DD HH:mm:ss' }), winston.format.errors({ stack: true }), winston.format.splat(), winston.format.json()),
@@ -29,14 +39,14 @@ export function logInfo(message, meta) {
29
39
  if (config.logging.enabled)
30
40
  logger.info(message, meta);
31
41
  }
32
- export function logWarn(message, meta) {
33
- if (config.logging.enabled)
34
- logger.warn(message, meta);
35
- }
36
42
  export function logDebug(message, meta) {
37
43
  if (config.logging.enabled)
38
44
  logger.debug(message, meta);
39
45
  }
46
+ export function logWarn(message, meta) {
47
+ if (config.logging.enabled)
48
+ logger.warn(message, meta);
49
+ }
40
50
  export function logError(message, error) {
41
51
  if (!config.logging.enabled)
42
52
  return;
@@ -1 +1 @@
1
- {"version":3,"file":"logger.js","sourceRoot":"","sources":["../../src/services/logger.ts"],"names":[],"mappings":"AAAA,OAAO,OAAO,MAAM,SAAS,CAAC;AAC9B,OAAO,EAAE,MAAM,EAAE,MAAM,oBAAoB,CAAC;AAC5C,OAAO,IAAI,MAAM,MAAM,CAAC;AAExB,MAAM,OAAO,GAAG,IAAI,CAAC,IAAI,CAAC,OAAO,CAAC,GAAG,EAAE,EAAE,MAAM,CAAC,CAAC;AAEjD,MAAM,MAAM,GAAG,OAAO,CAAC,YAAY,CAAC;IAClC,KAAK,EAAE,MAAM,CAAC,OAAO,CAAC,KAAK;IAC3B,MAAM,EAAE,OAAO,CAAC,MAAM,CAAC,OAAO,CAC5B,OAAO,CAAC,MAAM,CAAC,SAAS,CAAC,EAAE,MAAM,EAAE,qBAAqB,EAAE,CAAC,EAC3D,OAAO,CAAC,MAAM,CAAC,MAAM,CAAC,EAAE,KAAK,EAAE,IAAI,EAAE,CAAC,EACtC,OAAO,CAAC,MAAM,CAAC,KAAK,EAAE,EACtB,OAAO,CAAC,MAAM,CAAC,IAAI,EAAE,CACtB;IACD,WAAW,EAAE,EAAE,OAAO,EAAE,YAAY,EAAE;IACtC,UAAU,EAAE;QACV,IAAI,OAAO,CAAC,UAAU,CAAC,IAAI,CAAC;YAC1B,QAAQ,EAAE,IAAI,CAAC,IAAI,CAAC,OAAO,EAAE,cAAc,CAAC;YAC5C,OAAO,EAAE,OAAO;YAChB,QAAQ,EAAE,CAAC;SACZ,CAAC;QACF,IAAI,OAAO,CAAC,UAAU,CAAC,IAAI,CAAC;YAC1B,QAAQ,EAAE,IAAI,CAAC,IAAI,CAAC,OAAO,EAAE,WAAW,CAAC;YACzC,KAAK,EAAE,OAAO;YACd,OAAO,EAAE,OAAO;YAChB,QAAQ,EAAE,CAAC;SACZ,CAAC;KACH;CACF,CAAC,CAAC;AAEH,IAAI,OAAO,CAAC,GAAG,CAAC,QAAQ,KAAK,YAAY,EAAE,CAAC;IAC1C,MAAM,CAAC,GAAG,CACR,IAAI,OAAO,CAAC,UAAU,CAAC,OAAO,CAAC;QAC7B,MAAM,EAAE,OAAO,CAAC,MAAM,CAAC,OAAO,CAC5B,OAAO,CAAC,MAAM,CAAC,QAAQ,EAAE,EACzB,OAAO,CAAC,MAAM,CAAC,MAAM,EAAE,CACxB;KACF,CAAC,CACH,CAAC;AACJ,CAAC;AAED,MAAM,UAAU,OAAO,CAAC,OAAe,EAAE,IAA8B;IACrE,IAAI,MAAM,CAAC,OAAO,CAAC,OAAO;QAAE,MAAM,CAAC,IAAI,CAAC,OAAO,EAAE,IAAI,CAAC,CAAC;AACzD,CAAC;AAED,MAAM,UAAU,OAAO,CAAC,OAAe,EAAE,IAA8B;IACrE,IAAI,MAAM,CAAC,OAAO,CAAC,OAAO;QAAE,MAAM,CAAC,IAAI,CAAC,OAAO,EAAE,IAAI,CAAC,CAAC;AACzD,CAAC;AAED,MAAM,UAAU,QAAQ,CAAC,OAAe,EAAE,IAA8B;IACtE,IAAI,MAAM,CAAC,OAAO,CAAC,OAAO;QAAE,MAAM,CAAC,KAAK,CAAC,OAAO,EAAE,IAAI,CAAC,CAAC;AAC1D,CAAC;AAED,MAAM,UAAU,QAAQ,CACtB,OAAe,EACf,KAAuC;IAEvC,IAAI,CAAC,MAAM,CAAC,OAAO,CAAC,OAAO;QAAE,OAAO;IAEpC,MAAM,SAAS,GACb,KAAK,YAAY,KAAK;QACpB,CAAC,CAAC,EAAE,KAAK,EAAE,KAAK,CAAC,OAAO,EAAE,KAAK,EAAE,KAAK,CAAC,KAAK,EAAE;QAC9C,CAAC,CAAC,KAAK,CAAC;IACZ,MAAM,CAAC,KAAK,CAAC,OAAO,EAAE,SAAS,CAAC,CAAC;AACnC,CAAC"}
1
+ {"version":3,"file":"logger.js","sourceRoot":"","sources":["../../src/services/logger.ts"],"names":[],"mappings":"AAAA,OAAO,OAAO,MAAM,SAAS,CAAC;AAC9B,OAAO,EAAE,MAAM,IAAI,CAAC;AACpB,OAAO,EAAE,MAAM,EAAE,MAAM,oBAAoB,CAAC;AAC5C,OAAO,IAAI,MAAM,MAAM,CAAC;AAExB,MAAM,OAAO,GAAG,IAAI,CAAC,IAAI,CAAC,OAAO,CAAC,GAAG,EAAE,EAAE,MAAM,CAAC,CAAC;AAEjD,+BAA+B;AAC/B,IAAI,CAAC;IACH,IAAI,CAAC,EAAE,CAAC,UAAU,CAAC,OAAO,CAAC,EAAE,CAAC;QAC5B,EAAE,CAAC,SAAS,CAAC,OAAO,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;IAC7C,CAAC;AACH,CAAC;AAAC,MAAM,CAAC;IACP,oEAAoE;AACtE,CAAC;AAED,MAAM,MAAM,GAAG,OAAO,CAAC,YAAY,CAAC;IAClC,KAAK,EAAE,MAAM,CAAC,OAAO,CAAC,KAAK;IAC3B,MAAM,EAAE,OAAO,CAAC,MAAM,CAAC,OAAO,CAC5B,OAAO,CAAC,MAAM,CAAC,SAAS,CAAC,EAAE,MAAM,EAAE,qBAAqB,EAAE,CAAC,EAC3D,OAAO,CAAC,MAAM,CAAC,MAAM,CAAC,EAAE,KAAK,EAAE,IAAI,EAAE,CAAC,EACtC,OAAO,CAAC,MAAM,CAAC,KAAK,EAAE,EACtB,OAAO,CAAC,MAAM,CAAC,IAAI,EAAE,CACtB;IACD,WAAW,EAAE,EAAE,OAAO,EAAE,YAAY,EAAE;IACtC,UAAU,EAAE;QACV,IAAI,OAAO,CAAC,UAAU,CAAC,IAAI,CAAC;YAC1B,QAAQ,EAAE,IAAI,CAAC,IAAI,CAAC,OAAO,EAAE,cAAc,CAAC;YAC5C,OAAO,EAAE,OAAO;YAChB,QAAQ,EAAE,CAAC;SACZ,CAAC;QACF,IAAI,OAAO,CAAC,UAAU,CAAC,IAAI,CAAC;YAC1B,QAAQ,EAAE,IAAI,CAAC,IAAI,CAAC,OAAO,EAAE,WAAW,CAAC;YACzC,KAAK,EAAE,OAAO;YACd,OAAO,EAAE,OAAO;YAChB,QAAQ,EAAE,CAAC;SACZ,CAAC;KACH;CACF,CAAC,CAAC;AAEH,IAAI,OAAO,CAAC,GAAG,CAAC,QAAQ,KAAK,YAAY,EAAE,CAAC;IAC1C,MAAM,CAAC,GAAG,CACR,IAAI,OAAO,CAAC,UAAU,CAAC,OAAO,CAAC;QAC7B,MAAM,EAAE,OAAO,CAAC,MAAM,CAAC,OAAO,CAC5B,OAAO,CAAC,MAAM,CAAC,QAAQ,EAAE,EACzB,OAAO,CAAC,MAAM,CAAC,MAAM,EAAE,CACxB;KACF,CAAC,CACH,CAAC;AACJ,CAAC;AAED,MAAM,UAAU,OAAO,CAAC,OAAe,EAAE,IAA8B;IACrE,IAAI,MAAM,CAAC,OAAO,CAAC,OAAO;QAAE,MAAM,CAAC,IAAI,CAAC,OAAO,EAAE,IAAI,CAAC,CAAC;AACzD,CAAC;AAED,MAAM,UAAU,QAAQ,CACtB,OAAe,EACf,IAA8B;IAE9B,IAAI,MAAM,CAAC,OAAO,CAAC,OAAO;QAAE,MAAM,CAAC,KAAK,CAAC,OAAO,EAAE,IAAI,CAAC,CAAC;AAC1D,CAAC;AAED,MAAM,UAAU,OAAO,CAAC,OAAe,EAAE,IAA8B;IACrE,IAAI,MAAM,CAAC,OAAO,CAAC,OAAO;QAAE,MAAM,CAAC,IAAI,CAAC,OAAO,EAAE,IAAI,CAAC,CAAC;AACzD,CAAC;AAED,MAAM,UAAU,QAAQ,CACtB,OAAe,EACf,KAAuC;IAEvC,IAAI,CAAC,MAAM,CAAC,OAAO,CAAC,OAAO;QAAE,OAAO;IAEpC,MAAM,SAAS,GACb,KAAK,YAAY,KAAK;QACpB,CAAC,CAAC,EAAE,KAAK,EAAE,KAAK,CAAC,OAAO,EAAE,KAAK,EAAE,KAAK,CAAC,KAAK,EAAE;QAC9C,CAAC,CAAC,KAAK,CAAC;IACZ,MAAM,CAAC,KAAK,CAAC,OAAO,EAAE,SAAS,CAAC,CAAC;AACnC,CAAC"}
@@ -1,6 +1,8 @@
1
1
  import type { ContentBlockUnion } from '../types/index.js';
2
2
  /**
3
3
  * Parses HTML content and extracts semantic blocks
4
+ * @param html - HTML string to parse
5
+ * @returns Array of content blocks (empty array if parsing fails)
4
6
  */
5
7
  export declare function parseHtml(html: string): ContentBlockUnion[];
6
8
  //# sourceMappingURL=parser.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"parser.d.ts","sourceRoot":"","sources":["../../src/services/parser.ts"],"names":[],"mappings":"AAKA,OAAO,KAAK,EAOV,iBAAiB,EAClB,MAAM,mBAAmB,CAAC;AAyJ3B;;GAEG;AACH,wBAAgB,SAAS,CAAC,IAAI,EAAE,MAAM,GAAG,iBAAiB,EAAE,CAc3D"}
1
+ {"version":3,"file":"parser.d.ts","sourceRoot":"","sources":["../../src/services/parser.ts"],"names":[],"mappings":"AAMA,OAAO,KAAK,EAOV,iBAAiB,EAClB,MAAM,mBAAmB,CAAC;AA+J3B;;;;GAIG;AACH,wBAAgB,SAAS,CAAC,IAAI,EAAE,MAAM,GAAG,iBAAiB,EAAE,CAwC3D"}
@@ -1,6 +1,9 @@
1
1
  import * as cheerio from 'cheerio';
2
2
  import { sanitizeText } from '../utils/sanitizer.js';
3
3
  import { config } from '../config/index.js';
4
+ import { logWarn } from './logger.js';
5
+ // Maximum HTML size to parse (10MB)
6
+ const MAX_HTML_SIZE = 10 * 1024 * 1024;
4
7
  function parseHeading($, element) {
5
8
  const text = sanitizeText($(element).text());
6
9
  if (!text)
@@ -135,18 +138,46 @@ function filterBlocks(blocks) {
135
138
  }
136
139
  /**
137
140
  * Parses HTML content and extracts semantic blocks
141
+ * @param html - HTML string to parse
142
+ * @returns Array of content blocks (empty array if parsing fails)
138
143
  */
139
144
  export function parseHtml(html) {
140
- const $ = cheerio.load(html);
141
- const blocks = [];
142
- $('script, style, noscript, iframe, svg').remove();
143
- $('body')
144
- .find('h1, h2, h3, h4, h5, h6, p, ul, ol, pre, code, table, img')
145
- .each((_, element) => {
146
- const block = parseElement($, element);
147
- if (block)
148
- blocks.push(block);
149
- });
150
- return filterBlocks(blocks);
145
+ // Input validation
146
+ if (!html || typeof html !== 'string') {
147
+ return [];
148
+ }
149
+ // Size validation to prevent memory issues
150
+ if (html.length > MAX_HTML_SIZE) {
151
+ logWarn('HTML content exceeds maximum size, truncating', {
152
+ size: html.length,
153
+ maxSize: MAX_HTML_SIZE,
154
+ });
155
+ html = html.substring(0, MAX_HTML_SIZE);
156
+ }
157
+ try {
158
+ const $ = cheerio.load(html);
159
+ const blocks = [];
160
+ $('script, style, noscript, iframe, svg').remove();
161
+ $('body')
162
+ .find('h1, h2, h3, h4, h5, h6, p, ul, ol, pre, code, table, img')
163
+ .each((_, element) => {
164
+ try {
165
+ const block = parseElement($, element);
166
+ if (block)
167
+ blocks.push(block);
168
+ }
169
+ catch {
170
+ // Skip individual element parsing errors
171
+ }
172
+ });
173
+ return filterBlocks(blocks);
174
+ }
175
+ catch (error) {
176
+ logWarn('Failed to parse HTML', {
177
+ error: error instanceof Error ? error.message : 'Unknown error',
178
+ htmlLength: html.length,
179
+ });
180
+ return [];
181
+ }
151
182
  }
152
183
  //# sourceMappingURL=parser.js.map
@@ -1 +1 @@
1
- {"version":3,"file":"parser.js","sourceRoot":"","sources":["../../src/services/parser.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,OAAO,MAAM,SAAS,CAAC;AAGnC,OAAO,EAAE,YAAY,EAAE,MAAM,uBAAuB,CAAC;AACrD,OAAO,EAAE,MAAM,EAAE,MAAM,oBAAoB,CAAC;AAW5C,SAAS,YAAY,CAAC,CAAa,EAAE,OAAgB;IACnD,MAAM,IAAI,GAAG,YAAY,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC;IAC7C,IAAI,CAAC,IAAI;QAAE,OAAO,IAAI,CAAC;IAEvB,OAAO;QACL,IAAI,EAAE,SAAS;QACf,KAAK,EAAE,QAAQ,CAAC,OAAO,CAAC,OAAO,CAAC,SAAS,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC;QACjD,IAAI;KACL,CAAC;AACJ,CAAC;AAED,SAAS,cAAc,CAAC,CAAa,EAAE,OAAgB;IACrD,MAAM,IAAI,GAAG,YAAY,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC;IAC7C,IAAI,CAAC,IAAI,IAAI,IAAI,CAAC,MAAM,GAAG,MAAM,CAAC,UAAU,CAAC,kBAAkB;QAAE,OAAO,IAAI,CAAC;IAE7E,OAAO,EAAE,IAAI,EAAE,WAAW,EAAE,IAAI,EAAE,CAAC;AACrC,CAAC;AAED,SAAS,SAAS,CAAC,CAAa,EAAE,OAAgB;IAChD,MAAM,KAAK,GAAa,EAAE,CAAC;IAC3B,CAAC,CAAC,OAAO,CAAC;SACP,IAAI,CAAC,IAAI,CAAC;SACV,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,EAAE,EAAE;QACd,MAAM,IAAI,GAAG,YAAY,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC;QACxC,IAAI,IAAI;YAAE,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAC7B,CAAC,CAAC,CAAC;IAEL,IAAI,KAAK,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,IAAI,CAAC;IAEpC,OAAO;QACL,IAAI,EAAE,MAAM;QACZ,OAAO,EAAE,OAAO,CAAC,OAAO,CAAC,WAAW,EAAE,KAAK,IAAI;QAC/C,KAAK;KACN,CAAC;AACJ,CAAC;AAED,SAAS,SAAS,CAAC,CAAa,EAAE,OAAgB;IAChD,MAAM,IAAI,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,IAAI,EAAE,CAAC,IAAI,EAAE,CAAC;IACtC,IAAI,CAAC,IAAI;QAAE,OAAO,IAAI,CAAC;IAEvB,MAAM,SAAS,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,IAAI,CAAC,OAAO,CAAC,IAAI,EAAE,CAAC;IACjD,MAAM,aAAa,GAAG,SAAS,CAAC,KAAK,CAAC,gBAAgB,CAAC,CAAC;IAExD,OAAO;QACL,IAAI,EAAE,MAAM;QACZ,QAAQ,EAAE,aAAa,EAAE,CAAC,CAAC,CAAC;QAC5B,IAAI;KACL,CAAC;AACJ,CAAC;AAED,SAAS,UAAU,CAAC,CAAa,EAAE,OAAgB;IACjD,MAAM,OAAO,GAAa,EAAE,CAAC;IAC7B,MAAM,IAAI,GAAe,EAAE,CAAC;IAC5B,MAAM,MAAM,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC;IAE1B,0CAA0C;IAC1C,MAAM,CAAC,IAAI,CAAC,oBAAoB,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,IAAI,EAAE,EAAE;QACjD,OAAO,CAAC,IAAI,CAAC,YAAY,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC,CAAC;IAC7C,CAAC,CAAC,CAAC;IAEH,IAAI,OAAO,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACzB,MAAM;aACH,IAAI,CAAC,IAAI,CAAC;aACV,KAAK,EAAE;aACP,IAAI,CAAC,QAAQ,CAAC;aACd,IAAI,CAAC,CAAC,CAAC,EAAE,IAAI,EAAE,EAAE;YAChB,OAAO,CAAC,IAAI,CAAC,YAAY,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC,CAAC;QAC7C,CAAC,CAAC,CAAC;IACP,CAAC;IAED,oBAAoB;IACpB,MAAM,YAAY,GAChB,OAAO,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,0BAA0B,CAAC,CAAC,CAAC,cAAc,CAAC;IACnE,MAAM,CAAC,IAAI,CAAC,YAAY,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,GAAG,EAAE,EAAE;QACxC,MAAM,KAAK,GAAa,EAAE,CAAC;QAC3B,CAAC,CAAC,GAAG,CAAC;aACH,IAAI,CAAC,QAAQ,CAAC;aACd,IAAI,CAAC,CAAC,CAAC,EAAE,IAAI,EAAE,EAAE;YAChB,KAAK,CAAC,IAAI,CAAC,YAAY,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC,CAAC;QAC3C,CAAC,CAAC,CAAC;QACL,IAAI,KAAK,CAAC,MAAM,GAAG,CAAC;YAAE,IAAI,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;IACzC,CAAC,CAAC,CAAC;IAEH,IAAI,IAAI,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,IAAI,CAAC;IAEnC,OAAO;QACL,IAAI,EAAE,OAAO;QACb,OAAO,EAAE,OAAO,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,SAAS;QACjD,IAAI;KACL,CAAC;AACJ,CAAC;AAED,SAAS,UAAU,CAAC,CAAa,EAAE,OAAgB;IACjD,MAAM,GAAG,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;IACnC,IAAI,CAAC,GAAG;QAAE,OAAO,IAAI,CAAC;IAEtB,OAAO;QACL,IAAI,EAAE,OAAO;QACb,GAAG;QACH,GAAG,EAAE,CAAC,CAAC,OAAO,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,IAAI,SAAS;KACzC,CAAC;AACJ,CAAC;AAED,MAAM,eAAe,GAAG;IACtB,EAAE,EAAE,YAAY;IAChB,EAAE,EAAE,YAAY;IAChB,EAAE,EAAE,YAAY;IAChB,EAAE,EAAE,YAAY;IAChB,EAAE,EAAE,YAAY;IAChB,EAAE,EAAE,YAAY;IAChB,CAAC,EAAE,cAAc;IACjB,EAAE,EAAE,SAAS;IACb,EAAE,EAAE,SAAS;IACb,GAAG,EAAE,SAAS;IACd,IAAI,EAAE,SAAS;IACf,KAAK,EAAE,UAAU;IACjB,GAAG,EAAE,UAAU;CAIhB,CAAC;AAIF,SAAS,cAAc,CAAC,GAAW;IACjC,OAAO,GAAG,IAAI,eAAe,CAAC;AAChC,CAAC;AAED,SAAS,YAAY,CAAC,CAAa,EAAE,IAAa;IAChD,IAAI,CAAC,CAAC,SAAS,IAAI,IAAI,CAAC,IAAI,OAAO,IAAI,CAAC,OAAO,KAAK,QAAQ;QAAE,OAAO,IAAI,CAAC;IAE1E,MAAM,OAAO,GAAG,IAAI,CAAC,OAAO,CAAC,WAAW,EAAE,CAAC;IAC3C,IAAI,CAAC,cAAc,CAAC,OAAO,CAAC;QAAE,OAAO,IAAI,CAAC;IAC1C,OAAO,eAAe,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE,IAAI,CAAC,CAAC;AAC3C,CAAC;AAED,SAAS,YAAY,CAAC,MAA2B;IAC/C,OAAO,MAAM,CAAC,MAAM,CAAC,CAAC,KAAK,EAAE,EAAE;QAC7B,QAAQ,KAAK,CAAC,IAAI,EAAE,CAAC;YACnB,KAAK,WAAW,CAAC;YACjB,KAAK,SAAS,CAAC;YACf,KAAK,MAAM;gBACT,OAAO,KAAK,CAAC,IAAI,CAAC,MAAM,GAAG,CAAC,CAAC;YAC/B,KAAK,MAAM;gBACT,OAAO,KAAK,CAAC,KAAK,CAAC,MAAM,GAAG,CAAC,CAAC;YAChC;gBACE,OAAO,IAAI,CAAC;QAChB,CAAC;IACH,CAAC,CAAC,CAAC;AACL,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,SAAS,CAAC,IAAY;IACpC,MAAM,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAC7B,MAAM,MAAM,GAAwB,EAAE,CAAC;IAEvC,CAAC,CAAC,sCAAsC,CAAC,CAAC,MAAM,EAAE,CAAC;IAEnD,CAAC,CAAC,MAAM,CAAC;SACN,IAAI,CAAC,0DAA0D,CAAC;SAChE,IAAI,CAAC,CAAC,CAAC,EAAE,OAAO,EAAE,EAAE;QACnB,MAAM,KAAK,GAAG,YAAY,CAAC,CAAC,EAAE,OAAO,CAAC,CAAC;QACvC,IAAI,KAAK;YAAE,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;IAChC,CAAC,CAAC,CAAC;IAEL,OAAO,YAAY,CAAC,MAAM,CAAC,CAAC;AAC9B,CAAC"}
1
+ {"version":3,"file":"parser.js","sourceRoot":"","sources":["../../src/services/parser.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,OAAO,MAAM,SAAS,CAAC;AAGnC,OAAO,EAAE,YAAY,EAAE,MAAM,uBAAuB,CAAC;AACrD,OAAO,EAAE,MAAM,EAAE,MAAM,oBAAoB,CAAC;AAC5C,OAAO,EAAE,OAAO,EAAE,MAAM,aAAa,CAAC;AAWtC,oCAAoC;AACpC,MAAM,aAAa,GAAG,EAAE,GAAG,IAAI,GAAG,IAAI,CAAC;AAEvC,SAAS,YAAY,CAAC,CAAa,EAAE,OAAgB;IACnD,MAAM,IAAI,GAAG,YAAY,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC;IAC7C,IAAI,CAAC,IAAI;QAAE,OAAO,IAAI,CAAC;IAEvB,OAAO;QACL,IAAI,EAAE,SAAS;QACf,KAAK,EAAE,QAAQ,CAAC,OAAO,CAAC,OAAO,CAAC,SAAS,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC;QACjD,IAAI;KACL,CAAC;AACJ,CAAC;AAED,SAAS,cAAc,CACrB,CAAa,EACb,OAAgB;IAEhB,MAAM,IAAI,GAAG,YAAY,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC;IAC7C,IAAI,CAAC,IAAI,IAAI,IAAI,CAAC,MAAM,GAAG,MAAM,CAAC,UAAU,CAAC,kBAAkB;QAAE,OAAO,IAAI,CAAC;IAE7E,OAAO,EAAE,IAAI,EAAE,WAAW,EAAE,IAAI,EAAE,CAAC;AACrC,CAAC;AAED,SAAS,SAAS,CAAC,CAAa,EAAE,OAAgB;IAChD,MAAM,KAAK,GAAa,EAAE,CAAC;IAC3B,CAAC,CAAC,OAAO,CAAC;SACP,IAAI,CAAC,IAAI,CAAC;SACV,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,EAAE,EAAE;QACd,MAAM,IAAI,GAAG,YAAY,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC;QACxC,IAAI,IAAI;YAAE,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAC7B,CAAC,CAAC,CAAC;IAEL,IAAI,KAAK,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,IAAI,CAAC;IAEpC,OAAO;QACL,IAAI,EAAE,MAAM;QACZ,OAAO,EAAE,OAAO,CAAC,OAAO,CAAC,WAAW,EAAE,KAAK,IAAI;QAC/C,KAAK;KACN,CAAC;AACJ,CAAC;AAED,SAAS,SAAS,CAAC,CAAa,EAAE,OAAgB;IAChD,MAAM,IAAI,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,IAAI,EAAE,CAAC,IAAI,EAAE,CAAC;IACtC,IAAI,CAAC,IAAI;QAAE,OAAO,IAAI,CAAC;IAEvB,MAAM,SAAS,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,IAAI,CAAC,OAAO,CAAC,IAAI,EAAE,CAAC;IACjD,MAAM,aAAa,GAAG,SAAS,CAAC,KAAK,CAAC,gBAAgB,CAAC,CAAC;IAExD,OAAO;QACL,IAAI,EAAE,MAAM;QACZ,QAAQ,EAAE,aAAa,EAAE,CAAC,CAAC,CAAC;QAC5B,IAAI;KACL,CAAC;AACJ,CAAC;AAED,SAAS,UAAU,CAAC,CAAa,EAAE,OAAgB;IACjD,MAAM,OAAO,GAAa,EAAE,CAAC;IAC7B,MAAM,IAAI,GAAe,EAAE,CAAC;IAC5B,MAAM,MAAM,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC;IAE1B,0CAA0C;IAC1C,MAAM,CAAC,IAAI,CAAC,oBAAoB,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,IAAI,EAAE,EAAE;QACjD,OAAO,CAAC,IAAI,CAAC,YAAY,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC,CAAC;IAC7C,CAAC,CAAC,CAAC;IAEH,IAAI,OAAO,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACzB,MAAM;aACH,IAAI,CAAC,IAAI,CAAC;aACV,KAAK,EAAE;aACP,IAAI,CAAC,QAAQ,CAAC;aACd,IAAI,CAAC,CAAC,CAAC,EAAE,IAAI,EAAE,EAAE;YAChB,OAAO,CAAC,IAAI,CAAC,YAAY,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC,CAAC;QAC7C,CAAC,CAAC,CAAC;IACP,CAAC;IAED,oBAAoB;IACpB,MAAM,YAAY,GAChB,OAAO,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,0BAA0B,CAAC,CAAC,CAAC,cAAc,CAAC;IACnE,MAAM,CAAC,IAAI,CAAC,YAAY,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,GAAG,EAAE,EAAE;QACxC,MAAM,KAAK,GAAa,EAAE,CAAC;QAC3B,CAAC,CAAC,GAAG,CAAC;aACH,IAAI,CAAC,QAAQ,CAAC;aACd,IAAI,CAAC,CAAC,CAAC,EAAE,IAAI,EAAE,EAAE;YAChB,KAAK,CAAC,IAAI,CAAC,YAAY,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC,CAAC;QAC3C,CAAC,CAAC,CAAC;QACL,IAAI,KAAK,CAAC,MAAM,GAAG,CAAC;YAAE,IAAI,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;IACzC,CAAC,CAAC,CAAC;IAEH,IAAI,IAAI,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,IAAI,CAAC;IAEnC,OAAO;QACL,IAAI,EAAE,OAAO;QACb,OAAO,EAAE,OAAO,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,SAAS;QACjD,IAAI;KACL,CAAC;AACJ,CAAC;AAED,SAAS,UAAU,CAAC,CAAa,EAAE,OAAgB;IACjD,MAAM,GAAG,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;IACnC,IAAI,CAAC,GAAG;QAAE,OAAO,IAAI,CAAC;IAEtB,OAAO;QACL,IAAI,EAAE,OAAO;QACb,GAAG;QACH,GAAG,EAAE,CAAC,CAAC,OAAO,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,IAAI,SAAS;KACzC,CAAC;AACJ,CAAC;AAED,MAAM,eAAe,GAAG;IACtB,EAAE,EAAE,YAAY;IAChB,EAAE,EAAE,YAAY;IAChB,EAAE,EAAE,YAAY;IAChB,EAAE,EAAE,YAAY;IAChB,EAAE,EAAE,YAAY;IAChB,EAAE,EAAE,YAAY;IAChB,CAAC,EAAE,cAAc;IACjB,EAAE,EAAE,SAAS;IACb,EAAE,EAAE,SAAS;IACb,GAAG,EAAE,SAAS;IACd,IAAI,EAAE,SAAS;IACf,KAAK,EAAE,UAAU;IACjB,GAAG,EAAE,UAAU;CAIhB,CAAC;AAIF,SAAS,cAAc,CAAC,GAAW;IACjC,OAAO,GAAG,IAAI,eAAe,CAAC;AAChC,CAAC;AAED,SAAS,YAAY,CAAC,CAAa,EAAE,IAAa;IAChD,IAAI,CAAC,CAAC,SAAS,IAAI,IAAI,CAAC,IAAI,OAAO,IAAI,CAAC,OAAO,KAAK,QAAQ;QAAE,OAAO,IAAI,CAAC;IAE1E,MAAM,OAAO,GAAG,IAAI,CAAC,OAAO,CAAC,WAAW,EAAE,CAAC;IAC3C,IAAI,CAAC,cAAc,CAAC,OAAO,CAAC;QAAE,OAAO,IAAI,CAAC;IAC1C,OAAO,eAAe,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE,IAAI,CAAC,CAAC;AAC3C,CAAC;AAED,SAAS,YAAY,CAAC,MAA2B;IAC/C,OAAO,MAAM,CAAC,MAAM,CAAC,CAAC,KAAK,EAAE,EAAE;QAC7B,QAAQ,KAAK,CAAC,IAAI,EAAE,CAAC;YACnB,KAAK,WAAW,CAAC;YACjB,KAAK,SAAS,CAAC;YACf,KAAK,MAAM;gBACT,OAAO,KAAK,CAAC,IAAI,CAAC,MAAM,GAAG,CAAC,CAAC;YAC/B,KAAK,MAAM;gBACT,OAAO,KAAK,CAAC,KAAK,CAAC,MAAM,GAAG,CAAC,CAAC;YAChC;gBACE,OAAO,IAAI,CAAC;QAChB,CAAC;IACH,CAAC,CAAC,CAAC;AACL,CAAC;AAED;;;;GAIG;AACH,MAAM,UAAU,SAAS,CAAC,IAAY;IACpC,mBAAmB;IACnB,IAAI,CAAC,IAAI,IAAI,OAAO,IAAI,KAAK,QAAQ,EAAE,CAAC;QACtC,OAAO,EAAE,CAAC;IACZ,CAAC;IAED,2CAA2C;IAC3C,IAAI,IAAI,CAAC,MAAM,GAAG,aAAa,EAAE,CAAC;QAChC,OAAO,CAAC,+CAA+C,EAAE;YACvD,IAAI,EAAE,IAAI,CAAC,MAAM;YACjB,OAAO,EAAE,aAAa;SACvB,CAAC,CAAC;QACH,IAAI,GAAG,IAAI,CAAC,SAAS,CAAC,CAAC,EAAE,aAAa,CAAC,CAAC;IAC1C,CAAC;IAED,IAAI,CAAC;QACH,MAAM,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QAC7B,MAAM,MAAM,GAAwB,EAAE,CAAC;QAEvC,CAAC,CAAC,sCAAsC,CAAC,CAAC,MAAM,EAAE,CAAC;QAEnD,CAAC,CAAC,MAAM,CAAC;aACN,IAAI,CAAC,0DAA0D,CAAC;aAChE,IAAI,CAAC,CAAC,CAAC,EAAE,OAAO,EAAE,EAAE;YACnB,IAAI,CAAC;gBACH,MAAM,KAAK,GAAG,YAAY,CAAC,CAAC,EAAE,OAAO,CAAC,CAAC;gBACvC,IAAI,KAAK;oBAAE,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;YAChC,CAAC;YAAC,MAAM,CAAC;gBACP,yCAAyC;YAC3C,CAAC;QACH,CAAC,CAAC,CAAC;QAEL,OAAO,YAAY,CAAC,MAAM,CAAC,CAAC;IAC9B,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,OAAO,CAAC,sBAAsB,EAAE;YAC9B,KAAK,EAAE,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,eAAe;YAC/D,UAAU,EAAE,IAAI,CAAC,MAAM;SACxB,CAAC,CAAC;QACH,OAAO,EAAE,CAAC;IACZ,CAAC;AACH,CAAC"}
@@ -1,20 +1,24 @@
1
- import type { FetchLinksInput } from '../../types/index.js';
1
+ import type { FetchLinksInput, ExtractedLink } from '../../types/index.js';
2
2
  export declare const FETCH_LINKS_TOOL_NAME = "fetch-links";
3
3
  export declare const FETCH_LINKS_TOOL_DESCRIPTION = "Extracts all hyperlinks from a webpage with anchor text and type classification";
4
4
  /**
5
5
  * Tool handler for extracting links from a URL
6
6
  */
7
- export declare function fetchLinksToolHandler(input: FetchLinksInput): Promise<{
7
+ export declare function fetchLinksToolHandler(input: FetchLinksInput): Promise<import("../../utils/tool-error-handler.js").ToolErrorResponse | {
8
8
  content: {
9
9
  type: "text";
10
10
  text: string;
11
11
  }[];
12
- isError?: undefined;
12
+ structuredContent: {
13
+ url: string;
14
+ linkCount: number;
15
+ links: ExtractedLink[];
16
+ };
13
17
  } | {
14
18
  content: {
15
19
  type: "text";
16
20
  text: string;
17
21
  }[];
18
- isError: boolean;
22
+ structuredContent?: undefined;
19
23
  }>;
20
24
  //# sourceMappingURL=fetch-links.tool.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"fetch-links.tool.d.ts","sourceRoot":"","sources":["../../../src/tools/handlers/fetch-links.tool.ts"],"names":[],"mappings":"AAOA,OAAO,KAAK,EAAE,eAAe,EAAiB,MAAM,sBAAsB,CAAC;AAE3E,eAAO,MAAM,qBAAqB,gBAAgB,CAAC;AACnD,eAAO,MAAM,4BAA4B,oFAC0C,CAAC;AAmDpF;;GAEG;AACH,wBAAsB,qBAAqB,CAAC,KAAK,EAAE,eAAe;;;;;;;;;;;;GA+CjE"}
1
+ {"version":3,"file":"fetch-links.tool.d.ts","sourceRoot":"","sources":["../../../src/tools/handlers/fetch-links.tool.ts"],"names":[],"mappings":"AAYA,OAAO,KAAK,EAAE,eAAe,EAAE,aAAa,EAAE,MAAM,sBAAsB,CAAC;AAE3E,eAAO,MAAM,qBAAqB,gBAAgB,CAAC;AACnD,eAAO,MAAM,4BAA4B,oFAC0C,CAAC;AAqDpF;;GAEG;AACH,wBAAsB,qBAAqB,CAAC,KAAK,EAAE,eAAe;;;;;;aAiBjD,MAAM;mBACA,MAAM;eACV,aAAa,EAAE;;;;;;;;GAuDjC"}