@j0hanz/superfetch 1.0.0 → 1.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/errors/app-error.d.ts +0 -25
- package/dist/errors/app-error.d.ts.map +1 -1
- package/dist/errors/app-error.js +0 -34
- package/dist/errors/app-error.js.map +1 -1
- package/dist/index.js +86 -21
- package/dist/index.js.map +1 -1
- package/dist/middleware/error-handler.d.ts.map +1 -1
- package/dist/middleware/error-handler.js.map +1 -1
- package/dist/middleware/rate-limiter.d.ts +1 -0
- package/dist/middleware/rate-limiter.d.ts.map +1 -1
- package/dist/middleware/rate-limiter.js +29 -10
- package/dist/middleware/rate-limiter.js.map +1 -1
- package/dist/services/cache.d.ts +9 -5
- package/dist/services/cache.d.ts.map +1 -1
- package/dist/services/cache.js +67 -28
- package/dist/services/cache.js.map +1 -1
- package/dist/services/extractor.d.ts +8 -18
- package/dist/services/extractor.d.ts.map +1 -1
- package/dist/services/extractor.js +28 -31
- package/dist/services/extractor.js.map +1 -1
- package/dist/services/fetcher.d.ts +6 -2
- package/dist/services/fetcher.d.ts.map +1 -1
- package/dist/services/fetcher.js +86 -17
- package/dist/services/fetcher.js.map +1 -1
- package/dist/services/logger.d.ts +1 -1
- package/dist/services/logger.d.ts.map +1 -1
- package/dist/services/logger.js +14 -4
- package/dist/services/logger.js.map +1 -1
- package/dist/services/parser.d.ts +2 -0
- package/dist/services/parser.d.ts.map +1 -1
- package/dist/services/parser.js +42 -11
- package/dist/services/parser.js.map +1 -1
- package/dist/tools/handlers/fetch-links.tool.d.ts +8 -4
- package/dist/tools/handlers/fetch-links.tool.d.ts.map +1 -1
- package/dist/tools/handlers/fetch-links.tool.js +40 -21
- package/dist/tools/handlers/fetch-links.tool.js.map +1 -1
- package/dist/tools/handlers/fetch-markdown.tool.d.ts +7 -8
- package/dist/tools/handlers/fetch-markdown.tool.d.ts.map +1 -1
- package/dist/tools/handlers/fetch-markdown.tool.js +48 -36
- package/dist/tools/handlers/fetch-markdown.tool.js.map +1 -1
- package/dist/tools/handlers/fetch-url.tool.d.ts +9 -8
- package/dist/tools/handlers/fetch-url.tool.d.ts.map +1 -1
- package/dist/tools/handlers/fetch-url.tool.js +55 -39
- package/dist/tools/handlers/fetch-url.tool.js.map +1 -1
- package/dist/tools/index.d.ts.map +1 -1
- package/dist/tools/index.js +42 -0
- package/dist/tools/index.js.map +1 -1
- package/dist/transformers/jsonl.transformer.d.ts +0 -1
- package/dist/transformers/jsonl.transformer.d.ts.map +1 -1
- package/dist/transformers/jsonl.transformer.js +27 -22
- package/dist/transformers/jsonl.transformer.js.map +1 -1
- package/dist/transformers/markdown.transformer.d.ts +1 -2
- package/dist/transformers/markdown.transformer.d.ts.map +1 -1
- package/dist/transformers/markdown.transformer.js +20 -63
- package/dist/transformers/markdown.transformer.js.map +1 -1
- package/dist/types/content.types.d.ts +1 -1
- package/dist/types/content.types.d.ts.map +1 -1
- package/dist/utils/sanitizer.d.ts +6 -2
- package/dist/utils/sanitizer.d.ts.map +1 -1
- package/dist/utils/sanitizer.js +13 -5
- package/dist/utils/sanitizer.js.map +1 -1
- package/dist/utils/tool-error-handler.d.ts +18 -0
- package/dist/utils/tool-error-handler.d.ts.map +1 -0
- package/dist/utils/tool-error-handler.js +27 -0
- package/dist/utils/tool-error-handler.js.map +1 -0
- package/dist/utils/url-validator.d.ts +1 -0
- package/dist/utils/url-validator.d.ts.map +1 -1
- package/dist/utils/url-validator.js +39 -6
- package/dist/utils/url-validator.js.map +1 -1
- package/package.json +79 -80
|
@@ -1,32 +1,22 @@
|
|
|
1
1
|
import type { ExtractedArticle } from '../types/index.js';
|
|
2
|
-
/**
|
|
3
|
-
|
|
4
|
-
*/
|
|
5
|
-
export interface ExtractedMetadata {
|
|
2
|
+
/** Metadata extracted from HTML document (internal) */
|
|
3
|
+
interface ExtractedMetadata {
|
|
6
4
|
title?: string;
|
|
7
5
|
description?: string;
|
|
8
6
|
author?: string;
|
|
9
7
|
}
|
|
10
|
-
/**
|
|
11
|
-
|
|
12
|
-
*/
|
|
13
|
-
export interface ExtractionResult {
|
|
8
|
+
/** Combined extraction result (internal) */
|
|
9
|
+
interface ExtractionResult {
|
|
14
10
|
article: ExtractedArticle | null;
|
|
15
11
|
metadata: ExtractedMetadata;
|
|
16
12
|
}
|
|
17
13
|
/**
|
|
18
14
|
* Extracts both article content and metadata from HTML in a single JSDOM parse.
|
|
19
15
|
* This is more efficient than calling extractArticle and extractMetadata separately.
|
|
16
|
+
* @param html - HTML string to extract content from
|
|
17
|
+
* @param url - URL of the page (used for resolving relative links)
|
|
18
|
+
* @returns Extraction result with article and metadata
|
|
20
19
|
*/
|
|
21
20
|
export declare function extractContent(html: string, url: string): ExtractionResult;
|
|
22
|
-
|
|
23
|
-
* Extracts main article content using Mozilla Readability
|
|
24
|
-
* @deprecated Use extractContent() for better performance when you need both article and metadata
|
|
25
|
-
*/
|
|
26
|
-
export declare function extractArticle(html: string, url: string): ExtractedArticle | null;
|
|
27
|
-
/**
|
|
28
|
-
* Extracts metadata from HTML
|
|
29
|
-
* @deprecated Use extractContent() for better performance when you need both article and metadata
|
|
30
|
-
*/
|
|
31
|
-
export declare function extractMetadata(html: string): ExtractedMetadata;
|
|
21
|
+
export {};
|
|
32
22
|
//# sourceMappingURL=extractor.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"extractor.d.ts","sourceRoot":"","sources":["../../src/services/extractor.ts"],"names":[],"mappings":"AAEA,OAAO,KAAK,EAAE,gBAAgB,EAAE,MAAM,mBAAmB,CAAC;
|
|
1
|
+
{"version":3,"file":"extractor.d.ts","sourceRoot":"","sources":["../../src/services/extractor.ts"],"names":[],"mappings":"AAEA,OAAO,KAAK,EAAE,gBAAgB,EAAE,MAAM,mBAAmB,CAAC;AAM1D,uDAAuD;AACvD,UAAU,iBAAiB;IACzB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,MAAM,CAAC,EAAE,MAAM,CAAC;CACjB;AAED,4CAA4C;AAC5C,UAAU,gBAAgB;IACxB,OAAO,EAAE,gBAAgB,GAAG,IAAI,CAAC;IACjC,QAAQ,EAAE,iBAAiB,CAAC;CAC7B;AA8DD;;;;;;GAMG;AACH,wBAAgB,cAAc,CAAC,IAAI,EAAE,MAAM,EAAE,GAAG,EAAE,MAAM,GAAG,gBAAgB,CAwC1E"}
|
|
@@ -1,6 +1,8 @@
|
|
|
1
1
|
import { JSDOM } from 'jsdom';
|
|
2
2
|
import { Readability } from '@mozilla/readability';
|
|
3
|
-
import { logError } from './logger.js';
|
|
3
|
+
import { logError, logWarn } from './logger.js';
|
|
4
|
+
// Maximum HTML size to process (10MB)
|
|
5
|
+
const MAX_HTML_SIZE = 10 * 1024 * 1024;
|
|
4
6
|
function getMetaContent(document, selectors) {
|
|
5
7
|
for (const selector of selectors) {
|
|
6
8
|
const content = document.querySelector(selector)?.getAttribute('content');
|
|
@@ -16,7 +18,9 @@ function extractMetadataFromDocument(document) {
|
|
|
16
18
|
const title = getMetaContent(document, [
|
|
17
19
|
'meta[property="og:title"]',
|
|
18
20
|
'meta[name="twitter:title"]',
|
|
19
|
-
]) ??
|
|
21
|
+
]) ??
|
|
22
|
+
document.querySelector('title')?.textContent ??
|
|
23
|
+
undefined;
|
|
20
24
|
const description = getMetaContent(document, [
|
|
21
25
|
'meta[property="og:description"]',
|
|
22
26
|
'meta[name="twitter:description"]',
|
|
@@ -50,10 +54,31 @@ function extractArticleFromDocument(document) {
|
|
|
50
54
|
/**
|
|
51
55
|
* Extracts both article content and metadata from HTML in a single JSDOM parse.
|
|
52
56
|
* This is more efficient than calling extractArticle and extractMetadata separately.
|
|
57
|
+
* @param html - HTML string to extract content from
|
|
58
|
+
* @param url - URL of the page (used for resolving relative links)
|
|
59
|
+
* @returns Extraction result with article and metadata
|
|
53
60
|
*/
|
|
54
61
|
export function extractContent(html, url) {
|
|
62
|
+
// Input validation
|
|
63
|
+
if (!html || typeof html !== 'string') {
|
|
64
|
+
logWarn('extractContent called with invalid HTML input');
|
|
65
|
+
return { article: null, metadata: {} };
|
|
66
|
+
}
|
|
67
|
+
if (!url || typeof url !== 'string') {
|
|
68
|
+
logWarn('extractContent called with invalid URL');
|
|
69
|
+
return { article: null, metadata: {} };
|
|
70
|
+
}
|
|
71
|
+
// Size validation to prevent memory issues
|
|
72
|
+
let processedHtml = html;
|
|
73
|
+
if (html.length > MAX_HTML_SIZE) {
|
|
74
|
+
logWarn('HTML content exceeds maximum size for extraction, truncating', {
|
|
75
|
+
size: html.length,
|
|
76
|
+
maxSize: MAX_HTML_SIZE,
|
|
77
|
+
});
|
|
78
|
+
processedHtml = html.substring(0, MAX_HTML_SIZE);
|
|
79
|
+
}
|
|
55
80
|
try {
|
|
56
|
-
const dom = new JSDOM(
|
|
81
|
+
const dom = new JSDOM(processedHtml, { url });
|
|
57
82
|
const document = dom.window.document;
|
|
58
83
|
// Extract metadata first (non-destructive)
|
|
59
84
|
const metadata = extractMetadataFromDocument(document);
|
|
@@ -66,32 +91,4 @@ export function extractContent(html, url) {
|
|
|
66
91
|
return { article: null, metadata: {} };
|
|
67
92
|
}
|
|
68
93
|
}
|
|
69
|
-
/**
|
|
70
|
-
* Extracts main article content using Mozilla Readability
|
|
71
|
-
* @deprecated Use extractContent() for better performance when you need both article and metadata
|
|
72
|
-
*/
|
|
73
|
-
export function extractArticle(html, url) {
|
|
74
|
-
try {
|
|
75
|
-
const dom = new JSDOM(html, { url });
|
|
76
|
-
return extractArticleFromDocument(dom.window.document);
|
|
77
|
-
}
|
|
78
|
-
catch (error) {
|
|
79
|
-
logError('Failed to extract article', error instanceof Error ? error : undefined);
|
|
80
|
-
return null;
|
|
81
|
-
}
|
|
82
|
-
}
|
|
83
|
-
/**
|
|
84
|
-
* Extracts metadata from HTML
|
|
85
|
-
* @deprecated Use extractContent() for better performance when you need both article and metadata
|
|
86
|
-
*/
|
|
87
|
-
export function extractMetadata(html) {
|
|
88
|
-
try {
|
|
89
|
-
const { document } = new JSDOM(html).window;
|
|
90
|
-
return extractMetadataFromDocument(document);
|
|
91
|
-
}
|
|
92
|
-
catch (error) {
|
|
93
|
-
logError('Failed to extract metadata', error instanceof Error ? error : undefined);
|
|
94
|
-
return {};
|
|
95
|
-
}
|
|
96
|
-
}
|
|
97
94
|
//# sourceMappingURL=extractor.js.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"extractor.js","sourceRoot":"","sources":["../../src/services/extractor.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,KAAK,EAAE,MAAM,OAAO,CAAC;AAC9B,OAAO,EAAE,WAAW,EAAE,MAAM,sBAAsB,CAAC;AAEnD,OAAO,EAAE,QAAQ,EAAE,MAAM,aAAa,CAAC;
|
|
1
|
+
{"version":3,"file":"extractor.js","sourceRoot":"","sources":["../../src/services/extractor.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,KAAK,EAAE,MAAM,OAAO,CAAC;AAC9B,OAAO,EAAE,WAAW,EAAE,MAAM,sBAAsB,CAAC;AAEnD,OAAO,EAAE,QAAQ,EAAE,OAAO,EAAE,MAAM,aAAa,CAAC;AAEhD,sCAAsC;AACtC,MAAM,aAAa,GAAG,EAAE,GAAG,IAAI,GAAG,IAAI,CAAC;AAevC,SAAS,cAAc,CACrB,QAAkB,EAClB,SAAmB;IAEnB,KAAK,MAAM,QAAQ,IAAI,SAAS,EAAE,CAAC;QACjC,MAAM,OAAO,GAAG,QAAQ,CAAC,aAAa,CAAC,QAAQ,CAAC,EAAE,YAAY,CAAC,SAAS,CAAC,CAAC;QAC1E,IAAI,OAAO;YAAE,OAAO,OAAO,CAAC;IAC9B,CAAC;IACD,OAAO,SAAS,CAAC;AACnB,CAAC;AAED;;GAEG;AACH,SAAS,2BAA2B,CAAC,QAAkB;IACrD,MAAM,KAAK,GACT,cAAc,CAAC,QAAQ,EAAE;QACvB,2BAA2B;QAC3B,4BAA4B;KAC7B,CAAC;QACF,QAAQ,CAAC,aAAa,CAAC,OAAO,CAAC,EAAE,WAAW;QAC5C,SAAS,CAAC;IAEZ,MAAM,WAAW,GAAG,cAAc,CAAC,QAAQ,EAAE;QAC3C,iCAAiC;QACjC,kCAAkC;QAClC,0BAA0B;KAC3B,CAAC,CAAC;IAEH,MAAM,MAAM,GAAG,cAAc,CAAC,QAAQ,EAAE;QACtC,qBAAqB;QACrB,iCAAiC;KAClC,CAAC,CAAC;IAEH,OAAO,EAAE,KAAK,EAAE,WAAW,EAAE,MAAM,EAAE,CAAC;AACxC,CAAC;AAED;;GAEG;AACH,SAAS,0BAA0B,CACjC,QAAkB;IAElB,kDAAkD;IAClD,MAAM,SAAS,GAAG,QAAQ,CAAC,SAAS,CAAC,IAAI,CAAa,CAAC;IACvD,MAAM,MAAM,GAAG,IAAI,WAAW,CAAC,SAAS,CAAC,CAAC;IAC1C,MAAM,OAAO,GAAG,MAAM,CAAC,KAAK,EAAE,CAAC;IAE/B,IAAI,CAAC,OAAO;QAAE,OAAO,IAAI,CAAC;IAE1B,OAAO;QACL,KAAK,EAAE,OAAO,CAAC,KAAK,IAAI,SAAS;QACjC,MAAM,EAAE,OAAO,CAAC,MAAM,IAAI,SAAS;QACnC,OAAO,EAAE,OAAO,CAAC,OAAO,IAAI,EAAE;QAC9B,WAAW,EAAE,OAAO,CAAC,WAAW,IAAI,EAAE;QACtC,OAAO,EAAE,OAAO,CAAC,OAAO,IAAI,SAAS;QACrC,QAAQ,EAAE,OAAO,CAAC,QAAQ,IAAI,SAAS;KACxC,CAAC;AACJ,CAAC;AAED;;;;;;GAMG;AACH,MAAM,UAAU,cAAc,CAAC,IAAY,EAAE,GAAW;IACtD,mBAAmB;IACnB,IAAI,CAAC,IAAI,IAAI,OAAO,IAAI,KAAK,QAAQ,EAAE,CAAC;QACtC,OAAO,CAAC,+CAA+C,CAAC,CAAC;QACzD,OAAO,EAAE,OAAO,EAAE,IAAI,EAAE,QAAQ,EAAE,EAAE,EAAE,CAAC;IACzC,CAAC;IAED,IAAI,CAAC,GAAG,IAAI,OAAO,GAAG,KAAK,QAAQ,EAAE,CAAC;QACpC,OAAO,CAAC,wCAAwC,CAAC,CAAC;QAClD,OAAO,EAAE,OAAO,EAAE,IAAI,EAAE,QAAQ,EAAE,EAAE,EAAE,CAAC;IACzC,CAAC;IAED,2CAA2C;IAC3C,IAAI,aAAa,GAAG,IAAI,CAAC;IACzB,IAAI,IAAI,CAAC,MAAM,GAAG,aAAa,EAAE,CAAC;QAChC,OAAO,CAAC,8DAA8D,EAAE;YACtE,IAAI,EAAE,IAAI,CAAC,MAAM;YACjB,OAAO,EAAE,aAAa;SACvB,CAAC,CAAC;QACH,aAAa,GAAG,IAAI,CAAC,SAAS,CAAC,CAAC,EAAE,aAAa,CAAC,CAAC;IACnD,CAAC;IAED,IAAI,CAAC;QACH,MAAM,GAAG,GAAG,IAAI,KAAK,CAAC,aAAa,EAAE,EAAE,GAAG,EAAE,CAAC,CAAC;QAC9C,MAAM,QAAQ,GAAG,GAAG,CAAC,MAAM,CAAC,QAAQ,CAAC;QAErC,2CAA2C;QAC3C,MAAM,QAAQ,GAAG,2BAA2B,CAAC,QAAQ,CAAC,CAAC;QAEvD,mEAAmE;QACnE,MAAM,OAAO,GAAG,0BAA0B,CAAC,QAAQ,CAAC,CAAC;QAErD,OAAO,EAAE,OAAO,EAAE,QAAQ,EAAE,CAAC;IAC/B,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,QAAQ,CACN,2BAA2B,EAC3B,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,SAAS,CAC3C,CAAC;QACF,OAAO,EAAE,OAAO,EAAE,IAAI,EAAE,QAAQ,EAAE,EAAE,EAAE,CAAC;IACzC,CAAC;AACH,CAAC"}
|
|
@@ -1,9 +1,13 @@
|
|
|
1
1
|
/**
|
|
2
|
-
*
|
|
2
|
+
* Destroys HTTP agents and closes all sockets
|
|
3
|
+
* Should be called during graceful shutdown
|
|
3
4
|
*/
|
|
4
|
-
export declare function
|
|
5
|
+
export declare function destroyAgents(): void;
|
|
5
6
|
/**
|
|
6
7
|
* Fetches URL with exponential backoff retry logic
|
|
8
|
+
* @param url - URL to fetch
|
|
9
|
+
* @param customHeaders - Optional custom headers
|
|
10
|
+
* @param maxRetries - Maximum retry attempts (1-10, defaults to 3)
|
|
7
11
|
*/
|
|
8
12
|
export declare function fetchUrlWithRetry(url: string, customHeaders?: Record<string, string>, maxRetries?: number): Promise<string>;
|
|
9
13
|
//# sourceMappingURL=fetcher.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"fetcher.d.ts","sourceRoot":"","sources":["../../src/services/fetcher.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"fetcher.d.ts","sourceRoot":"","sources":["../../src/services/fetcher.ts"],"names":[],"mappings":"AAyCA;;;GAGG;AACH,wBAAgB,aAAa,IAAI,IAAI,CAGpC;AAqID;;;;;GAKG;AACH,wBAAsB,iBAAiB,CACrC,GAAG,EAAE,MAAM,EACX,aAAa,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,EACtC,UAAU,SAAI,GACb,OAAO,CAAC,MAAM,CAAC,CA8BjB"}
|
package/dist/services/fetcher.js
CHANGED
|
@@ -1,6 +1,9 @@
|
|
|
1
1
|
import axios from 'axios';
|
|
2
|
+
import http from 'http';
|
|
3
|
+
import https from 'https';
|
|
2
4
|
import { config } from '../config/index.js';
|
|
3
5
|
import { FetchError, TimeoutError } from '../errors/app-error.js';
|
|
6
|
+
import { logDebug, logError } from './logger.js';
|
|
4
7
|
const BLOCKED_HEADERS = new Set([
|
|
5
8
|
'host',
|
|
6
9
|
'authorization',
|
|
@@ -25,10 +28,23 @@ function calculateBackoff(attempt, maxDelay = 10000) {
|
|
|
25
28
|
const jitter = baseDelay * 0.25 * (Math.random() * 2 - 1);
|
|
26
29
|
return Math.round(baseDelay + jitter);
|
|
27
30
|
}
|
|
31
|
+
// HTTP/HTTPS agents with connection pooling for better performance
|
|
32
|
+
const httpAgent = new http.Agent({ keepAlive: true, maxSockets: 25 });
|
|
33
|
+
const httpsAgent = new https.Agent({ keepAlive: true, maxSockets: 25 });
|
|
34
|
+
/**
|
|
35
|
+
* Destroys HTTP agents and closes all sockets
|
|
36
|
+
* Should be called during graceful shutdown
|
|
37
|
+
*/
|
|
38
|
+
export function destroyAgents() {
|
|
39
|
+
httpAgent.destroy();
|
|
40
|
+
httpsAgent.destroy();
|
|
41
|
+
}
|
|
28
42
|
const client = axios.create({
|
|
29
43
|
timeout: config.fetcher.timeout,
|
|
30
44
|
maxRedirects: config.fetcher.maxRedirects,
|
|
31
45
|
maxContentLength: config.fetcher.maxContentLength,
|
|
46
|
+
httpAgent,
|
|
47
|
+
httpsAgent,
|
|
32
48
|
headers: {
|
|
33
49
|
'User-Agent': config.fetcher.userAgent,
|
|
34
50
|
Accept: 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
|
|
@@ -38,10 +54,51 @@ const client = axios.create({
|
|
|
38
54
|
},
|
|
39
55
|
validateStatus: (status) => status >= 200 && status < 300,
|
|
40
56
|
});
|
|
57
|
+
// Request interceptor for logging and request enhancement
|
|
58
|
+
client.interceptors.request.use((requestConfig) => {
|
|
59
|
+
logDebug('HTTP Request', {
|
|
60
|
+
method: requestConfig.method?.toUpperCase(),
|
|
61
|
+
url: requestConfig.url,
|
|
62
|
+
});
|
|
63
|
+
return requestConfig;
|
|
64
|
+
}, (error) => {
|
|
65
|
+
logError('HTTP Request Error', error);
|
|
66
|
+
return Promise.reject(error);
|
|
67
|
+
});
|
|
68
|
+
// Response interceptor for logging and consistent error transformation
|
|
69
|
+
client.interceptors.response.use((response) => {
|
|
70
|
+
logDebug('HTTP Response', {
|
|
71
|
+
status: response.status,
|
|
72
|
+
url: response.config.url,
|
|
73
|
+
contentType: response.headers['content-type'],
|
|
74
|
+
});
|
|
75
|
+
return response;
|
|
76
|
+
}, (error) => {
|
|
77
|
+
const url = error.config?.url ?? 'unknown';
|
|
78
|
+
// Transform Axios errors to application errors
|
|
79
|
+
if (error.code === 'ECONNABORTED' || error.code === 'ETIMEDOUT') {
|
|
80
|
+
logError('HTTP Timeout', { url, timeout: config.fetcher.timeout });
|
|
81
|
+
return Promise.reject(new TimeoutError(config.fetcher.timeout, true));
|
|
82
|
+
}
|
|
83
|
+
if (error.response) {
|
|
84
|
+
const status = error.response.status;
|
|
85
|
+
const statusText = error.response.statusText;
|
|
86
|
+
logError('HTTP Error Response', { url, status, statusText });
|
|
87
|
+
return Promise.reject(new FetchError(`HTTP ${status}: ${statusText}`, url, status));
|
|
88
|
+
}
|
|
89
|
+
if (error.request) {
|
|
90
|
+
logError('HTTP Network Error', { url, code: error.code });
|
|
91
|
+
return Promise.reject(new FetchError(`Network error: Could not reach ${url}`, url));
|
|
92
|
+
}
|
|
93
|
+
logError('HTTP Unknown Error', { url, message: error.message });
|
|
94
|
+
return Promise.reject(new FetchError(error.message, url));
|
|
95
|
+
});
|
|
41
96
|
/**
|
|
42
|
-
* Fetches HTML content from a URL
|
|
97
|
+
* Fetches HTML content from a URL (internal - use fetchUrlWithRetry for retry logic)
|
|
98
|
+
* @throws {FetchError} if request fails or returns non-HTML content
|
|
99
|
+
* @throws {TimeoutError} if request times out
|
|
43
100
|
*/
|
|
44
|
-
|
|
101
|
+
async function fetchUrl(url, customHeaders) {
|
|
45
102
|
const requestConfig = {
|
|
46
103
|
method: 'GET',
|
|
47
104
|
url,
|
|
@@ -53,30 +110,42 @@ export async function fetchUrl(url, customHeaders) {
|
|
|
53
110
|
}
|
|
54
111
|
try {
|
|
55
112
|
const response = await client.request(requestConfig);
|
|
113
|
+
// Validate content type is HTML/text
|
|
114
|
+
const contentType = response.headers['content-type'];
|
|
115
|
+
if (contentType && !isHtmlContentType(contentType)) {
|
|
116
|
+
throw new FetchError(`Unexpected content type: ${contentType}. Expected HTML content.`, url);
|
|
117
|
+
}
|
|
56
118
|
return response.data;
|
|
57
119
|
}
|
|
58
120
|
catch (error) {
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
if (error.code === 'ECONNABORTED' || error.code === 'ETIMEDOUT') {
|
|
63
|
-
throw new TimeoutError(config.fetcher.timeout, true);
|
|
64
|
-
}
|
|
65
|
-
if (error.response) {
|
|
66
|
-
throw new FetchError(`HTTP ${error.response.status}: ${error.response.statusText}`, url, error.response.status);
|
|
67
|
-
}
|
|
68
|
-
if (error.request) {
|
|
69
|
-
throw new FetchError(`Network error: Could not reach ${url}`, url);
|
|
121
|
+
// Re-throw our custom errors (from interceptors or content-type check)
|
|
122
|
+
if (error instanceof FetchError || error instanceof TimeoutError) {
|
|
123
|
+
throw error;
|
|
70
124
|
}
|
|
71
|
-
|
|
125
|
+
// Handle any unexpected errors
|
|
126
|
+
throw new FetchError(`Unexpected error: ${error instanceof Error ? error.message : 'Unknown'}`, url);
|
|
72
127
|
}
|
|
73
128
|
}
|
|
129
|
+
/**
|
|
130
|
+
* Checks if content type indicates HTML content
|
|
131
|
+
*/
|
|
132
|
+
function isHtmlContentType(contentType) {
|
|
133
|
+
const normalized = contentType.toLowerCase();
|
|
134
|
+
return (normalized.includes('text/html') ||
|
|
135
|
+
normalized.includes('application/xhtml') ||
|
|
136
|
+
normalized.includes('text/plain'));
|
|
137
|
+
}
|
|
74
138
|
/**
|
|
75
139
|
* Fetches URL with exponential backoff retry logic
|
|
140
|
+
* @param url - URL to fetch
|
|
141
|
+
* @param customHeaders - Optional custom headers
|
|
142
|
+
* @param maxRetries - Maximum retry attempts (1-10, defaults to 3)
|
|
76
143
|
*/
|
|
77
144
|
export async function fetchUrlWithRetry(url, customHeaders, maxRetries = 3) {
|
|
145
|
+
// Validate maxRetries within bounds
|
|
146
|
+
const retries = Math.min(Math.max(1, maxRetries), 10);
|
|
78
147
|
let lastError;
|
|
79
|
-
for (let attempt = 1; attempt <=
|
|
148
|
+
for (let attempt = 1; attempt <= retries; attempt++) {
|
|
80
149
|
try {
|
|
81
150
|
return await fetchUrl(url, customHeaders);
|
|
82
151
|
}
|
|
@@ -89,12 +158,12 @@ export async function fetchUrlWithRetry(url, customHeaders, maxRetries = 3) {
|
|
|
89
158
|
throw error;
|
|
90
159
|
}
|
|
91
160
|
}
|
|
92
|
-
if (attempt <
|
|
161
|
+
if (attempt < retries) {
|
|
93
162
|
const delay = calculateBackoff(attempt);
|
|
94
163
|
await new Promise((resolve) => setTimeout(resolve, delay));
|
|
95
164
|
}
|
|
96
165
|
}
|
|
97
166
|
}
|
|
98
|
-
throw new FetchError(`Failed after ${
|
|
167
|
+
throw new FetchError(`Failed after ${retries} attempts: ${lastError?.message ?? 'Unknown error'}`, url);
|
|
99
168
|
}
|
|
100
169
|
//# sourceMappingURL=fetcher.js.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"fetcher.js","sourceRoot":"","sources":["../../src/services/fetcher.ts"],"names":[],"mappings":"AAAA,OAAO,
|
|
1
|
+
{"version":3,"file":"fetcher.js","sourceRoot":"","sources":["../../src/services/fetcher.ts"],"names":[],"mappings":"AAAA,OAAO,KAAyC,MAAM,OAAO,CAAC;AAC9D,OAAO,IAAI,MAAM,MAAM,CAAC;AACxB,OAAO,KAAK,MAAM,OAAO,CAAC;AAC1B,OAAO,EAAE,MAAM,EAAE,MAAM,oBAAoB,CAAC;AAC5C,OAAO,EAAE,UAAU,EAAE,YAAY,EAAE,MAAM,wBAAwB,CAAC;AAClE,OAAO,EAAE,QAAQ,EAAE,QAAQ,EAAE,MAAM,aAAa,CAAC;AAEjD,MAAM,eAAe,GAAG,IAAI,GAAG,CAAC;IAC9B,MAAM;IACN,eAAe;IACf,QAAQ;IACR,iBAAiB;IACjB,WAAW;IACX,qBAAqB;CACtB,CAAC,CAAC;AAEH,SAAS,eAAe,CACtB,OAAgC;IAEhC,IAAI,CAAC,OAAO,IAAI,MAAM,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,SAAS,CAAC;IAEpE,MAAM,SAAS,GAA2B,EAAE,CAAC;IAC7C,KAAK,MAAM,CAAC,GAAG,EAAE,KAAK,CAAC,IAAI,MAAM,CAAC,OAAO,CAAC,OAAO,CAAC,EAAE,CAAC;QACnD,IAAI,CAAC,eAAe,CAAC,GAAG,CAAC,GAAG,CAAC,WAAW,EAAE,CAAC,EAAE,CAAC;YAC5C,SAAS,CAAC,GAAG,CAAC,GAAG,KAAK,CAAC;QACzB,CAAC;IACH,CAAC;IAED,OAAO,MAAM,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,SAAS,CAAC;AACnE,CAAC;AAED,SAAS,gBAAgB,CAAC,OAAe,EAAE,QAAQ,GAAG,KAAK;IACzD,MAAM,SAAS,GAAG,IAAI,CAAC,GAAG,CAAC,IAAI,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,OAAO,GAAG,CAAC,CAAC,EAAE,QAAQ,CAAC,CAAC;IACtE,MAAM,MAAM,GAAG,SAAS,GAAG,IAAI,GAAG,CAAC,IAAI,CAAC,MAAM,EAAE,GAAG,CAAC,GAAG,CAAC,CAAC,CAAC;IAC1D,OAAO,IAAI,CAAC,KAAK,CAAC,SAAS,GAAG,MAAM,CAAC,CAAC;AACxC,CAAC;AAED,mEAAmE;AACnE,MAAM,SAAS,GAAG,IAAI,IAAI,CAAC,KAAK,CAAC,EAAE,SAAS,EAAE,IAAI,EAAE,UAAU,EAAE,EAAE,EAAE,CAAC,CAAC;AACtE,MAAM,UAAU,GAAG,IAAI,KAAK,CAAC,KAAK,CAAC,EAAE,SAAS,EAAE,IAAI,EAAE,UAAU,EAAE,EAAE,EAAE,CAAC,CAAC;AAExE;;;GAGG;AACH,MAAM,UAAU,aAAa;IAC3B,SAAS,CAAC,OAAO,EAAE,CAAC;IACpB,UAAU,CAAC,OAAO,EAAE,CAAC;AACvB,CAAC;AAED,MAAM,MAAM,GAAG,KAAK,CAAC,MAAM,CAAC;IAC1B,OAAO,EAAE,MAAM,CAAC,OAAO,CAAC,OAAO;IAC/B,YAAY,EAAE,MAAM,CAAC,OAAO,CAAC,YAAY;IACzC,gBAAgB,EAAE,MAAM,CAAC,OAAO,CAAC,gBAAgB;IACjD,SAAS;IACT,UAAU;IACV,OAAO,EAAE;QACP,YAAY,EAAE,MAAM,CAAC,OAAO,CAAC,SAAS;QACtC,MAAM,EACJ,4EAA4E;QAC9E,iBAAiB,EAAE,gBAAgB;QACnC,iBAAiB,EAAE,mBAAmB;QACtC,UAAU,EAAE,YAAY;KACzB;IACD,cAAc,EAAE,CAAC,MAAM,EAAE,EAAE,CAAC,MAAM,IAAI,GAAG,IAAI,MAAM,GAAG,GAAG;CAC1D,CAAC,CAAC;AAEH,0DAA0D;AAC1D,MAAM,CAAC,YAAY,CAAC,OAAO,CAAC,GAAG,CAC7B,CAAC,aAAa,EAAE,EAAE;IAChB,QAAQ,CAAC,cAAc,EAAE;QACvB,MAAM,EAAE,aAAa,CAAC,MAAM,EAAE,WAAW,EAAE;QAC3C,GAAG,EAAE,aAAa,CAAC,GAAG;KACvB,CAAC,CAAC;IACH,OAAO,aAAa,CAAC;AACvB,CAAC,EACD,CAAC,KAAiB,EAAE,EAAE;IACpB,QAAQ,CAAC,oBAAoB,EAAE,KAAK,CAAC,CAAC;IACtC,OAAO,OAAO,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC;AAC/B,CAAC,CACF,CAAC;AAEF,uEAAuE;AACvE,MAAM,CAAC,YAAY,CAAC,QAAQ,CAAC,GAAG,CAC9B,CAAC,QAAQ,EAAE,EAAE;IACX,QAAQ,CAAC,eAAe,EAAE;QACxB,MAAM,EAAE,QAAQ,CAAC,MAAM;QACvB,GAAG,EAAE,QAAQ,CAAC,MAAM,CAAC,GAAG;QACxB,WAAW,EAAE,QAAQ,CAAC,OAAO,CAAC,cAAc,CAAC;KAC9C,CAAC,CAAC;IACH,OAAO,QAAQ,CAAC;AAClB,CAAC,EACD,CAAC,KAAiB,EAAE,EAAE;IACpB,MAAM,GAAG,GAAG,KAAK,CAAC,MAAM,EAAE,GAAG,IAAI,SAAS,CAAC;IAE3C,+CAA+C;IAC/C,IAAI,KAAK,CAAC,IAAI,KAAK,cAAc,IAAI,KAAK,CAAC,IAAI,KAAK,WAAW,EAAE,CAAC;QAChE,QAAQ,CAAC,cAAc,EAAE,EAAE,GAAG,EAAE,OAAO,EAAE,MAAM,CAAC,OAAO,CAAC,OAAO,EAAE,CAAC,CAAC;QACnE,OAAO,OAAO,CAAC,MAAM,CAAC,IAAI,YAAY,CAAC,MAAM,CAAC,OAAO,CAAC,OAAO,EAAE,IAAI,CAAC,CAAC,CAAC;IACxE,CAAC;IAED,IAAI,KAAK,CAAC,QAAQ,EAAE,CAAC;QACnB,MAAM,MAAM,GAAG,KAAK,CAAC,QAAQ,CAAC,MAAM,CAAC;QACrC,MAAM,UAAU,GAAG,KAAK,CAAC,QAAQ,CAAC,UAAU,CAAC;QAC7C,QAAQ,CAAC,qBAAqB,EAAE,EAAE,GAAG,EAAE,MAAM,EAAE,UAAU,EAAE,CAAC,CAAC;QAC7D,OAAO,OAAO,CAAC,MAAM,CACnB,IAAI,UAAU,CAAC,QAAQ,MAAM,KAAK,UAAU,EAAE,EAAE,GAAG,EAAE,MAAM,CAAC,CAC7D,CAAC;IACJ,CAAC;IAED,IAAI,KAAK,CAAC,OAAO,EAAE,CAAC;QAClB,QAAQ,CAAC,oBAAoB,EAAE,EAAE,GAAG,EAAE,IAAI,EAAE,KAAK,CAAC,IAAI,EAAE,CAAC,CAAC;QAC1D,OAAO,OAAO,CAAC,MAAM,CACnB,IAAI,UAAU,CAAC,kCAAkC,GAAG,EAAE,EAAE,GAAG,CAAC,CAC7D,CAAC;IACJ,CAAC;IAED,QAAQ,CAAC,oBAAoB,EAAE,EAAE,GAAG,EAAE,OAAO,EAAE,KAAK,CAAC,OAAO,EAAE,CAAC,CAAC;IAChE,OAAO,OAAO,CAAC,MAAM,CAAC,IAAI,UAAU,CAAC,KAAK,CAAC,OAAO,EAAE,GAAG,CAAC,CAAC,CAAC;AAC5D,CAAC,CACF,CAAC;AAEF;;;;GAIG;AACH,KAAK,UAAU,QAAQ,CACrB,GAAW,EACX,aAAsC;IAEtC,MAAM,aAAa,GAAuB;QACxC,MAAM,EAAE,KAAK;QACb,GAAG;QACH,YAAY,EAAE,MAAM;KACrB,CAAC;IAEF,MAAM,SAAS,GAAG,eAAe,CAAC,aAAa,CAAC,CAAC;IACjD,IAAI,SAAS,EAAE,CAAC;QACd,aAAa,CAAC,OAAO,GAAG,EAAE,GAAG,aAAa,CAAC,OAAO,EAAE,GAAG,SAAS,EAAE,CAAC;IACrE,CAAC;IAED,IAAI,CAAC;QACH,MAAM,QAAQ,GAAG,MAAM,MAAM,CAAC,OAAO,CAAS,aAAa,CAAC,CAAC;QAE7D,qCAAqC;QACrC,MAAM,WAAW,GAAG,QAAQ,CAAC,OAAO,CAAC,cAAc,CAAuB,CAAC;QAC3E,IAAI,WAAW,IAAI,CAAC,iBAAiB,CAAC,WAAW,CAAC,EAAE,CAAC;YACnD,MAAM,IAAI,UAAU,CAClB,4BAA4B,WAAW,0BAA0B,EACjE,GAAG,CACJ,CAAC;QACJ,CAAC;QAED,OAAO,QAAQ,CAAC,IAAI,CAAC;IACvB,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,uEAAuE;QACvE,IAAI,KAAK,YAAY,UAAU,IAAI,KAAK,YAAY,YAAY,EAAE,CAAC;YACjE,MAAM,KAAK,CAAC;QACd,CAAC;QAED,+BAA+B;QAC/B,MAAM,IAAI,UAAU,CAClB,qBAAqB,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,SAAS,EAAE,EACzE,GAAG,CACJ,CAAC;IACJ,CAAC;AACH,CAAC;AAED;;GAEG;AACH,SAAS,iBAAiB,CAAC,WAAmB;IAC5C,MAAM,UAAU,GAAG,WAAW,CAAC,WAAW,EAAE,CAAC;IAC7C,OAAO,CACL,UAAU,CAAC,QAAQ,CAAC,WAAW,CAAC;QAChC,UAAU,CAAC,QAAQ,CAAC,mBAAmB,CAAC;QACxC,UAAU,CAAC,QAAQ,CAAC,YAAY,CAAC,CAClC,CAAC;AACJ,CAAC;AAED;;;;;GAKG;AACH,MAAM,CAAC,KAAK,UAAU,iBAAiB,CACrC,GAAW,EACX,aAAsC,EACtC,UAAU,GAAG,CAAC;IAEd,oCAAoC;IACpC,MAAM,OAAO,GAAG,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,UAAU,CAAC,EAAE,EAAE,CAAC,CAAC;IACtD,IAAI,SAA4B,CAAC;IAEjC,KAAK,IAAI,OAAO,GAAG,CAAC,EAAE,OAAO,IAAI,OAAO,EAAE,OAAO,EAAE,EAAE,CAAC;QACpD,IAAI,CAAC;YACH,OAAO,MAAM,QAAQ,CAAC,GAAG,EAAE,aAAa,CAAC,CAAC;QAC5C,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,SAAS,GAAG,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,IAAI,KAAK,CAAC,eAAe,CAAC,CAAC;YAExE,+DAA+D;YAC/D,IAAI,KAAK,YAAY,UAAU,IAAI,KAAK,CAAC,UAAU,EAAE,CAAC;gBACpD,MAAM,MAAM,GAAG,KAAK,CAAC,UAAU,CAAC;gBAChC,IAAI,MAAM,IAAI,GAAG,IAAI,MAAM,GAAG,GAAG,IAAI,MAAM,KAAK,GAAG,EAAE,CAAC;oBACpD,MAAM,KAAK,CAAC;gBACd,CAAC;YACH,CAAC;YAED,IAAI,OAAO,GAAG,OAAO,EAAE,CAAC;gBACtB,MAAM,KAAK,GAAG,gBAAgB,CAAC,OAAO,CAAC,CAAC;gBACxC,MAAM,IAAI,OAAO,CAAC,CAAC,OAAO,EAAE,EAAE,CAAC,UAAU,CAAC,OAAO,EAAE,KAAK,CAAC,CAAC,CAAC;YAC7D,CAAC;QACH,CAAC;IACH,CAAC;IAED,MAAM,IAAI,UAAU,CAClB,gBAAgB,OAAO,cAAc,SAAS,EAAE,OAAO,IAAI,eAAe,EAAE,EAC5E,GAAG,CACJ,CAAC;AACJ,CAAC"}
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
export declare function logInfo(message: string, meta?: Record<string, unknown>): void;
|
|
2
|
-
export declare function logWarn(message: string, meta?: Record<string, unknown>): void;
|
|
3
2
|
export declare function logDebug(message: string, meta?: Record<string, unknown>): void;
|
|
3
|
+
export declare function logWarn(message: string, meta?: Record<string, unknown>): void;
|
|
4
4
|
export declare function logError(message: string, error?: Error | Record<string, unknown>): void;
|
|
5
5
|
//# sourceMappingURL=logger.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"logger.d.ts","sourceRoot":"","sources":["../../src/services/logger.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"logger.d.ts","sourceRoot":"","sources":["../../src/services/logger.ts"],"names":[],"mappings":"AAmDA,wBAAgB,OAAO,CAAC,OAAO,EAAE,MAAM,EAAE,IAAI,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,GAAG,IAAI,CAE7E;AAED,wBAAgB,QAAQ,CACtB,OAAO,EAAE,MAAM,EACf,IAAI,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,GAC7B,IAAI,CAEN;AAED,wBAAgB,OAAO,CAAC,OAAO,EAAE,MAAM,EAAE,IAAI,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,GAAG,IAAI,CAE7E;AAED,wBAAgB,QAAQ,CACtB,OAAO,EAAE,MAAM,EACf,KAAK,CAAC,EAAE,KAAK,GAAG,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,GACtC,IAAI,CAQN"}
|
package/dist/services/logger.js
CHANGED
|
@@ -1,7 +1,17 @@
|
|
|
1
1
|
import winston from 'winston';
|
|
2
|
+
import fs from 'fs';
|
|
2
3
|
import { config } from '../config/index.js';
|
|
3
4
|
import path from 'path';
|
|
4
5
|
const logsDir = path.join(process.cwd(), 'logs');
|
|
6
|
+
// Ensure logs directory exists
|
|
7
|
+
try {
|
|
8
|
+
if (!fs.existsSync(logsDir)) {
|
|
9
|
+
fs.mkdirSync(logsDir, { recursive: true });
|
|
10
|
+
}
|
|
11
|
+
}
|
|
12
|
+
catch {
|
|
13
|
+
// If we can't create logs dir, file transports will fail gracefully
|
|
14
|
+
}
|
|
5
15
|
const logger = winston.createLogger({
|
|
6
16
|
level: config.logging.level,
|
|
7
17
|
format: winston.format.combine(winston.format.timestamp({ format: 'YYYY-MM-DD HH:mm:ss' }), winston.format.errors({ stack: true }), winston.format.splat(), winston.format.json()),
|
|
@@ -29,14 +39,14 @@ export function logInfo(message, meta) {
|
|
|
29
39
|
if (config.logging.enabled)
|
|
30
40
|
logger.info(message, meta);
|
|
31
41
|
}
|
|
32
|
-
export function logWarn(message, meta) {
|
|
33
|
-
if (config.logging.enabled)
|
|
34
|
-
logger.warn(message, meta);
|
|
35
|
-
}
|
|
36
42
|
export function logDebug(message, meta) {
|
|
37
43
|
if (config.logging.enabled)
|
|
38
44
|
logger.debug(message, meta);
|
|
39
45
|
}
|
|
46
|
+
export function logWarn(message, meta) {
|
|
47
|
+
if (config.logging.enabled)
|
|
48
|
+
logger.warn(message, meta);
|
|
49
|
+
}
|
|
40
50
|
export function logError(message, error) {
|
|
41
51
|
if (!config.logging.enabled)
|
|
42
52
|
return;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"logger.js","sourceRoot":"","sources":["../../src/services/logger.ts"],"names":[],"mappings":"AAAA,OAAO,OAAO,MAAM,SAAS,CAAC;AAC9B,OAAO,EAAE,MAAM,EAAE,MAAM,oBAAoB,CAAC;AAC5C,OAAO,IAAI,MAAM,MAAM,CAAC;AAExB,MAAM,OAAO,GAAG,IAAI,CAAC,IAAI,CAAC,OAAO,CAAC,GAAG,EAAE,EAAE,MAAM,CAAC,CAAC;AAEjD,MAAM,MAAM,GAAG,OAAO,CAAC,YAAY,CAAC;IAClC,KAAK,EAAE,MAAM,CAAC,OAAO,CAAC,KAAK;IAC3B,MAAM,EAAE,OAAO,CAAC,MAAM,CAAC,OAAO,CAC5B,OAAO,CAAC,MAAM,CAAC,SAAS,CAAC,EAAE,MAAM,EAAE,qBAAqB,EAAE,CAAC,EAC3D,OAAO,CAAC,MAAM,CAAC,MAAM,CAAC,EAAE,KAAK,EAAE,IAAI,EAAE,CAAC,EACtC,OAAO,CAAC,MAAM,CAAC,KAAK,EAAE,EACtB,OAAO,CAAC,MAAM,CAAC,IAAI,EAAE,CACtB;IACD,WAAW,EAAE,EAAE,OAAO,EAAE,YAAY,EAAE;IACtC,UAAU,EAAE;QACV,IAAI,OAAO,CAAC,UAAU,CAAC,IAAI,CAAC;YAC1B,QAAQ,EAAE,IAAI,CAAC,IAAI,CAAC,OAAO,EAAE,cAAc,CAAC;YAC5C,OAAO,EAAE,OAAO;YAChB,QAAQ,EAAE,CAAC;SACZ,CAAC;QACF,IAAI,OAAO,CAAC,UAAU,CAAC,IAAI,CAAC;YAC1B,QAAQ,EAAE,IAAI,CAAC,IAAI,CAAC,OAAO,EAAE,WAAW,CAAC;YACzC,KAAK,EAAE,OAAO;YACd,OAAO,EAAE,OAAO;YAChB,QAAQ,EAAE,CAAC;SACZ,CAAC;KACH;CACF,CAAC,CAAC;AAEH,IAAI,OAAO,CAAC,GAAG,CAAC,QAAQ,KAAK,YAAY,EAAE,CAAC;IAC1C,MAAM,CAAC,GAAG,CACR,IAAI,OAAO,CAAC,UAAU,CAAC,OAAO,CAAC;QAC7B,MAAM,EAAE,OAAO,CAAC,MAAM,CAAC,OAAO,CAC5B,OAAO,CAAC,MAAM,CAAC,QAAQ,EAAE,EACzB,OAAO,CAAC,MAAM,CAAC,MAAM,EAAE,CACxB;KACF,CAAC,CACH,CAAC;AACJ,CAAC;AAED,MAAM,UAAU,OAAO,CAAC,OAAe,EAAE,IAA8B;IACrE,IAAI,MAAM,CAAC,OAAO,CAAC,OAAO;QAAE,MAAM,CAAC,IAAI,CAAC,OAAO,EAAE,IAAI,CAAC,CAAC;AACzD,CAAC;AAED,MAAM,UAAU,
|
|
1
|
+
{"version":3,"file":"logger.js","sourceRoot":"","sources":["../../src/services/logger.ts"],"names":[],"mappings":"AAAA,OAAO,OAAO,MAAM,SAAS,CAAC;AAC9B,OAAO,EAAE,MAAM,IAAI,CAAC;AACpB,OAAO,EAAE,MAAM,EAAE,MAAM,oBAAoB,CAAC;AAC5C,OAAO,IAAI,MAAM,MAAM,CAAC;AAExB,MAAM,OAAO,GAAG,IAAI,CAAC,IAAI,CAAC,OAAO,CAAC,GAAG,EAAE,EAAE,MAAM,CAAC,CAAC;AAEjD,+BAA+B;AAC/B,IAAI,CAAC;IACH,IAAI,CAAC,EAAE,CAAC,UAAU,CAAC,OAAO,CAAC,EAAE,CAAC;QAC5B,EAAE,CAAC,SAAS,CAAC,OAAO,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;IAC7C,CAAC;AACH,CAAC;AAAC,MAAM,CAAC;IACP,oEAAoE;AACtE,CAAC;AAED,MAAM,MAAM,GAAG,OAAO,CAAC,YAAY,CAAC;IAClC,KAAK,EAAE,MAAM,CAAC,OAAO,CAAC,KAAK;IAC3B,MAAM,EAAE,OAAO,CAAC,MAAM,CAAC,OAAO,CAC5B,OAAO,CAAC,MAAM,CAAC,SAAS,CAAC,EAAE,MAAM,EAAE,qBAAqB,EAAE,CAAC,EAC3D,OAAO,CAAC,MAAM,CAAC,MAAM,CAAC,EAAE,KAAK,EAAE,IAAI,EAAE,CAAC,EACtC,OAAO,CAAC,MAAM,CAAC,KAAK,EAAE,EACtB,OAAO,CAAC,MAAM,CAAC,IAAI,EAAE,CACtB;IACD,WAAW,EAAE,EAAE,OAAO,EAAE,YAAY,EAAE;IACtC,UAAU,EAAE;QACV,IAAI,OAAO,CAAC,UAAU,CAAC,IAAI,CAAC;YAC1B,QAAQ,EAAE,IAAI,CAAC,IAAI,CAAC,OAAO,EAAE,cAAc,CAAC;YAC5C,OAAO,EAAE,OAAO;YAChB,QAAQ,EAAE,CAAC;SACZ,CAAC;QACF,IAAI,OAAO,CAAC,UAAU,CAAC,IAAI,CAAC;YAC1B,QAAQ,EAAE,IAAI,CAAC,IAAI,CAAC,OAAO,EAAE,WAAW,CAAC;YACzC,KAAK,EAAE,OAAO;YACd,OAAO,EAAE,OAAO;YAChB,QAAQ,EAAE,CAAC;SACZ,CAAC;KACH;CACF,CAAC,CAAC;AAEH,IAAI,OAAO,CAAC,GAAG,CAAC,QAAQ,KAAK,YAAY,EAAE,CAAC;IAC1C,MAAM,CAAC,GAAG,CACR,IAAI,OAAO,CAAC,UAAU,CAAC,OAAO,CAAC;QAC7B,MAAM,EAAE,OAAO,CAAC,MAAM,CAAC,OAAO,CAC5B,OAAO,CAAC,MAAM,CAAC,QAAQ,EAAE,EACzB,OAAO,CAAC,MAAM,CAAC,MAAM,EAAE,CACxB;KACF,CAAC,CACH,CAAC;AACJ,CAAC;AAED,MAAM,UAAU,OAAO,CAAC,OAAe,EAAE,IAA8B;IACrE,IAAI,MAAM,CAAC,OAAO,CAAC,OAAO;QAAE,MAAM,CAAC,IAAI,CAAC,OAAO,EAAE,IAAI,CAAC,CAAC;AACzD,CAAC;AAED,MAAM,UAAU,QAAQ,CACtB,OAAe,EACf,IAA8B;IAE9B,IAAI,MAAM,CAAC,OAAO,CAAC,OAAO;QAAE,MAAM,CAAC,KAAK,CAAC,OAAO,EAAE,IAAI,CAAC,CAAC;AAC1D,CAAC;AAED,MAAM,UAAU,OAAO,CAAC,OAAe,EAAE,IAA8B;IACrE,IAAI,MAAM,CAAC,OAAO,CAAC,OAAO;QAAE,MAAM,CAAC,IAAI,CAAC,OAAO,EAAE,IAAI,CAAC,CAAC;AACzD,CAAC;AAED,MAAM,UAAU,QAAQ,CACtB,OAAe,EACf,KAAuC;IAEvC,IAAI,CAAC,MAAM,CAAC,OAAO,CAAC,OAAO;QAAE,OAAO;IAEpC,MAAM,SAAS,GACb,KAAK,YAAY,KAAK;QACpB,CAAC,CAAC,EAAE,KAAK,EAAE,KAAK,CAAC,OAAO,EAAE,KAAK,EAAE,KAAK,CAAC,KAAK,EAAE;QAC9C,CAAC,CAAC,KAAK,CAAC;IACZ,MAAM,CAAC,KAAK,CAAC,OAAO,EAAE,SAAS,CAAC,CAAC;AACnC,CAAC"}
|
|
@@ -1,6 +1,8 @@
|
|
|
1
1
|
import type { ContentBlockUnion } from '../types/index.js';
|
|
2
2
|
/**
|
|
3
3
|
* Parses HTML content and extracts semantic blocks
|
|
4
|
+
* @param html - HTML string to parse
|
|
5
|
+
* @returns Array of content blocks (empty array if parsing fails)
|
|
4
6
|
*/
|
|
5
7
|
export declare function parseHtml(html: string): ContentBlockUnion[];
|
|
6
8
|
//# sourceMappingURL=parser.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"parser.d.ts","sourceRoot":"","sources":["../../src/services/parser.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"parser.d.ts","sourceRoot":"","sources":["../../src/services/parser.ts"],"names":[],"mappings":"AAMA,OAAO,KAAK,EAOV,iBAAiB,EAClB,MAAM,mBAAmB,CAAC;AA+J3B;;;;GAIG;AACH,wBAAgB,SAAS,CAAC,IAAI,EAAE,MAAM,GAAG,iBAAiB,EAAE,CAwC3D"}
|
package/dist/services/parser.js
CHANGED
|
@@ -1,6 +1,9 @@
|
|
|
1
1
|
import * as cheerio from 'cheerio';
|
|
2
2
|
import { sanitizeText } from '../utils/sanitizer.js';
|
|
3
3
|
import { config } from '../config/index.js';
|
|
4
|
+
import { logWarn } from './logger.js';
|
|
5
|
+
// Maximum HTML size to parse (10MB)
|
|
6
|
+
const MAX_HTML_SIZE = 10 * 1024 * 1024;
|
|
4
7
|
function parseHeading($, element) {
|
|
5
8
|
const text = sanitizeText($(element).text());
|
|
6
9
|
if (!text)
|
|
@@ -135,18 +138,46 @@ function filterBlocks(blocks) {
|
|
|
135
138
|
}
|
|
136
139
|
/**
|
|
137
140
|
* Parses HTML content and extracts semantic blocks
|
|
141
|
+
* @param html - HTML string to parse
|
|
142
|
+
* @returns Array of content blocks (empty array if parsing fails)
|
|
138
143
|
*/
|
|
139
144
|
export function parseHtml(html) {
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
145
|
+
// Input validation
|
|
146
|
+
if (!html || typeof html !== 'string') {
|
|
147
|
+
return [];
|
|
148
|
+
}
|
|
149
|
+
// Size validation to prevent memory issues
|
|
150
|
+
if (html.length > MAX_HTML_SIZE) {
|
|
151
|
+
logWarn('HTML content exceeds maximum size, truncating', {
|
|
152
|
+
size: html.length,
|
|
153
|
+
maxSize: MAX_HTML_SIZE,
|
|
154
|
+
});
|
|
155
|
+
html = html.substring(0, MAX_HTML_SIZE);
|
|
156
|
+
}
|
|
157
|
+
try {
|
|
158
|
+
const $ = cheerio.load(html);
|
|
159
|
+
const blocks = [];
|
|
160
|
+
$('script, style, noscript, iframe, svg').remove();
|
|
161
|
+
$('body')
|
|
162
|
+
.find('h1, h2, h3, h4, h5, h6, p, ul, ol, pre, code, table, img')
|
|
163
|
+
.each((_, element) => {
|
|
164
|
+
try {
|
|
165
|
+
const block = parseElement($, element);
|
|
166
|
+
if (block)
|
|
167
|
+
blocks.push(block);
|
|
168
|
+
}
|
|
169
|
+
catch {
|
|
170
|
+
// Skip individual element parsing errors
|
|
171
|
+
}
|
|
172
|
+
});
|
|
173
|
+
return filterBlocks(blocks);
|
|
174
|
+
}
|
|
175
|
+
catch (error) {
|
|
176
|
+
logWarn('Failed to parse HTML', {
|
|
177
|
+
error: error instanceof Error ? error.message : 'Unknown error',
|
|
178
|
+
htmlLength: html.length,
|
|
179
|
+
});
|
|
180
|
+
return [];
|
|
181
|
+
}
|
|
151
182
|
}
|
|
152
183
|
//# sourceMappingURL=parser.js.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"parser.js","sourceRoot":"","sources":["../../src/services/parser.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,OAAO,MAAM,SAAS,CAAC;AAGnC,OAAO,EAAE,YAAY,EAAE,MAAM,uBAAuB,CAAC;AACrD,OAAO,EAAE,MAAM,EAAE,MAAM,oBAAoB,CAAC;
|
|
1
|
+
{"version":3,"file":"parser.js","sourceRoot":"","sources":["../../src/services/parser.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,OAAO,MAAM,SAAS,CAAC;AAGnC,OAAO,EAAE,YAAY,EAAE,MAAM,uBAAuB,CAAC;AACrD,OAAO,EAAE,MAAM,EAAE,MAAM,oBAAoB,CAAC;AAC5C,OAAO,EAAE,OAAO,EAAE,MAAM,aAAa,CAAC;AAWtC,oCAAoC;AACpC,MAAM,aAAa,GAAG,EAAE,GAAG,IAAI,GAAG,IAAI,CAAC;AAEvC,SAAS,YAAY,CAAC,CAAa,EAAE,OAAgB;IACnD,MAAM,IAAI,GAAG,YAAY,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC;IAC7C,IAAI,CAAC,IAAI;QAAE,OAAO,IAAI,CAAC;IAEvB,OAAO;QACL,IAAI,EAAE,SAAS;QACf,KAAK,EAAE,QAAQ,CAAC,OAAO,CAAC,OAAO,CAAC,SAAS,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC;QACjD,IAAI;KACL,CAAC;AACJ,CAAC;AAED,SAAS,cAAc,CACrB,CAAa,EACb,OAAgB;IAEhB,MAAM,IAAI,GAAG,YAAY,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC;IAC7C,IAAI,CAAC,IAAI,IAAI,IAAI,CAAC,MAAM,GAAG,MAAM,CAAC,UAAU,CAAC,kBAAkB;QAAE,OAAO,IAAI,CAAC;IAE7E,OAAO,EAAE,IAAI,EAAE,WAAW,EAAE,IAAI,EAAE,CAAC;AACrC,CAAC;AAED,SAAS,SAAS,CAAC,CAAa,EAAE,OAAgB;IAChD,MAAM,KAAK,GAAa,EAAE,CAAC;IAC3B,CAAC,CAAC,OAAO,CAAC;SACP,IAAI,CAAC,IAAI,CAAC;SACV,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,EAAE,EAAE;QACd,MAAM,IAAI,GAAG,YAAY,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC;QACxC,IAAI,IAAI;YAAE,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAC7B,CAAC,CAAC,CAAC;IAEL,IAAI,KAAK,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,IAAI,CAAC;IAEpC,OAAO;QACL,IAAI,EAAE,MAAM;QACZ,OAAO,EAAE,OAAO,CAAC,OAAO,CAAC,WAAW,EAAE,KAAK,IAAI;QAC/C,KAAK;KACN,CAAC;AACJ,CAAC;AAED,SAAS,SAAS,CAAC,CAAa,EAAE,OAAgB;IAChD,MAAM,IAAI,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,IAAI,EAAE,CAAC,IAAI,EAAE,CAAC;IACtC,IAAI,CAAC,IAAI;QAAE,OAAO,IAAI,CAAC;IAEvB,MAAM,SAAS,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,IAAI,CAAC,OAAO,CAAC,IAAI,EAAE,CAAC;IACjD,MAAM,aAAa,GAAG,SAAS,CAAC,KAAK,CAAC,gBAAgB,CAAC,CAAC;IAExD,OAAO;QACL,IAAI,EAAE,MAAM;QACZ,QAAQ,EAAE,aAAa,EAAE,CAAC,CAAC,CAAC;QAC5B,IAAI;KACL,CAAC;AACJ,CAAC;AAED,SAAS,UAAU,CAAC,CAAa,EAAE,OAAgB;IACjD,MAAM,OAAO,GAAa,EAAE,CAAC;IAC7B,MAAM,IAAI,GAAe,EAAE,CAAC;IAC5B,MAAM,MAAM,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC;IAE1B,0CAA0C;IAC1C,MAAM,CAAC,IAAI,CAAC,oBAAoB,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,IAAI,EAAE,EAAE;QACjD,OAAO,CAAC,IAAI,CAAC,YAAY,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC,CAAC;IAC7C,CAAC,CAAC,CAAC;IAEH,IAAI,OAAO,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACzB,MAAM;aACH,IAAI,CAAC,IAAI,CAAC;aACV,KAAK,EAAE;aACP,IAAI,CAAC,QAAQ,CAAC;aACd,IAAI,CAAC,CAAC,CAAC,EAAE,IAAI,EAAE,EAAE;YAChB,OAAO,CAAC,IAAI,CAAC,YAAY,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC,CAAC;QAC7C,CAAC,CAAC,CAAC;IACP,CAAC;IAED,oBAAoB;IACpB,MAAM,YAAY,GAChB,OAAO,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,0BAA0B,CAAC,CAAC,CAAC,cAAc,CAAC;IACnE,MAAM,CAAC,IAAI,CAAC,YAAY,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,GAAG,EAAE,EAAE;QACxC,MAAM,KAAK,GAAa,EAAE,CAAC;QAC3B,CAAC,CAAC,GAAG,CAAC;aACH,IAAI,CAAC,QAAQ,CAAC;aACd,IAAI,CAAC,CAAC,CAAC,EAAE,IAAI,EAAE,EAAE;YAChB,KAAK,CAAC,IAAI,CAAC,YAAY,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC,CAAC;QAC3C,CAAC,CAAC,CAAC;QACL,IAAI,KAAK,CAAC,MAAM,GAAG,CAAC;YAAE,IAAI,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;IACzC,CAAC,CAAC,CAAC;IAEH,IAAI,IAAI,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,IAAI,CAAC;IAEnC,OAAO;QACL,IAAI,EAAE,OAAO;QACb,OAAO,EAAE,OAAO,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,SAAS;QACjD,IAAI;KACL,CAAC;AACJ,CAAC;AAED,SAAS,UAAU,CAAC,CAAa,EAAE,OAAgB;IACjD,MAAM,GAAG,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;IACnC,IAAI,CAAC,GAAG;QAAE,OAAO,IAAI,CAAC;IAEtB,OAAO;QACL,IAAI,EAAE,OAAO;QACb,GAAG;QACH,GAAG,EAAE,CAAC,CAAC,OAAO,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,IAAI,SAAS;KACzC,CAAC;AACJ,CAAC;AAED,MAAM,eAAe,GAAG;IACtB,EAAE,EAAE,YAAY;IAChB,EAAE,EAAE,YAAY;IAChB,EAAE,EAAE,YAAY;IAChB,EAAE,EAAE,YAAY;IAChB,EAAE,EAAE,YAAY;IAChB,EAAE,EAAE,YAAY;IAChB,CAAC,EAAE,cAAc;IACjB,EAAE,EAAE,SAAS;IACb,EAAE,EAAE,SAAS;IACb,GAAG,EAAE,SAAS;IACd,IAAI,EAAE,SAAS;IACf,KAAK,EAAE,UAAU;IACjB,GAAG,EAAE,UAAU;CAIhB,CAAC;AAIF,SAAS,cAAc,CAAC,GAAW;IACjC,OAAO,GAAG,IAAI,eAAe,CAAC;AAChC,CAAC;AAED,SAAS,YAAY,CAAC,CAAa,EAAE,IAAa;IAChD,IAAI,CAAC,CAAC,SAAS,IAAI,IAAI,CAAC,IAAI,OAAO,IAAI,CAAC,OAAO,KAAK,QAAQ;QAAE,OAAO,IAAI,CAAC;IAE1E,MAAM,OAAO,GAAG,IAAI,CAAC,OAAO,CAAC,WAAW,EAAE,CAAC;IAC3C,IAAI,CAAC,cAAc,CAAC,OAAO,CAAC;QAAE,OAAO,IAAI,CAAC;IAC1C,OAAO,eAAe,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE,IAAI,CAAC,CAAC;AAC3C,CAAC;AAED,SAAS,YAAY,CAAC,MAA2B;IAC/C,OAAO,MAAM,CAAC,MAAM,CAAC,CAAC,KAAK,EAAE,EAAE;QAC7B,QAAQ,KAAK,CAAC,IAAI,EAAE,CAAC;YACnB,KAAK,WAAW,CAAC;YACjB,KAAK,SAAS,CAAC;YACf,KAAK,MAAM;gBACT,OAAO,KAAK,CAAC,IAAI,CAAC,MAAM,GAAG,CAAC,CAAC;YAC/B,KAAK,MAAM;gBACT,OAAO,KAAK,CAAC,KAAK,CAAC,MAAM,GAAG,CAAC,CAAC;YAChC;gBACE,OAAO,IAAI,CAAC;QAChB,CAAC;IACH,CAAC,CAAC,CAAC;AACL,CAAC;AAED;;;;GAIG;AACH,MAAM,UAAU,SAAS,CAAC,IAAY;IACpC,mBAAmB;IACnB,IAAI,CAAC,IAAI,IAAI,OAAO,IAAI,KAAK,QAAQ,EAAE,CAAC;QACtC,OAAO,EAAE,CAAC;IACZ,CAAC;IAED,2CAA2C;IAC3C,IAAI,IAAI,CAAC,MAAM,GAAG,aAAa,EAAE,CAAC;QAChC,OAAO,CAAC,+CAA+C,EAAE;YACvD,IAAI,EAAE,IAAI,CAAC,MAAM;YACjB,OAAO,EAAE,aAAa;SACvB,CAAC,CAAC;QACH,IAAI,GAAG,IAAI,CAAC,SAAS,CAAC,CAAC,EAAE,aAAa,CAAC,CAAC;IAC1C,CAAC;IAED,IAAI,CAAC;QACH,MAAM,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QAC7B,MAAM,MAAM,GAAwB,EAAE,CAAC;QAEvC,CAAC,CAAC,sCAAsC,CAAC,CAAC,MAAM,EAAE,CAAC;QAEnD,CAAC,CAAC,MAAM,CAAC;aACN,IAAI,CAAC,0DAA0D,CAAC;aAChE,IAAI,CAAC,CAAC,CAAC,EAAE,OAAO,EAAE,EAAE;YACnB,IAAI,CAAC;gBACH,MAAM,KAAK,GAAG,YAAY,CAAC,CAAC,EAAE,OAAO,CAAC,CAAC;gBACvC,IAAI,KAAK;oBAAE,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;YAChC,CAAC;YAAC,MAAM,CAAC;gBACP,yCAAyC;YAC3C,CAAC;QACH,CAAC,CAAC,CAAC;QAEL,OAAO,YAAY,CAAC,MAAM,CAAC,CAAC;IAC9B,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,OAAO,CAAC,sBAAsB,EAAE;YAC9B,KAAK,EAAE,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,eAAe;YAC/D,UAAU,EAAE,IAAI,CAAC,MAAM;SACxB,CAAC,CAAC;QACH,OAAO,EAAE,CAAC;IACZ,CAAC;AACH,CAAC"}
|
|
@@ -1,20 +1,24 @@
|
|
|
1
|
-
import type { FetchLinksInput } from '../../types/index.js';
|
|
1
|
+
import type { FetchLinksInput, ExtractedLink } from '../../types/index.js';
|
|
2
2
|
export declare const FETCH_LINKS_TOOL_NAME = "fetch-links";
|
|
3
3
|
export declare const FETCH_LINKS_TOOL_DESCRIPTION = "Extracts all hyperlinks from a webpage with anchor text and type classification";
|
|
4
4
|
/**
|
|
5
5
|
* Tool handler for extracting links from a URL
|
|
6
6
|
*/
|
|
7
|
-
export declare function fetchLinksToolHandler(input: FetchLinksInput): Promise<{
|
|
7
|
+
export declare function fetchLinksToolHandler(input: FetchLinksInput): Promise<import("../../utils/tool-error-handler.js").ToolErrorResponse | {
|
|
8
8
|
content: {
|
|
9
9
|
type: "text";
|
|
10
10
|
text: string;
|
|
11
11
|
}[];
|
|
12
|
-
|
|
12
|
+
structuredContent: {
|
|
13
|
+
url: string;
|
|
14
|
+
linkCount: number;
|
|
15
|
+
links: ExtractedLink[];
|
|
16
|
+
};
|
|
13
17
|
} | {
|
|
14
18
|
content: {
|
|
15
19
|
type: "text";
|
|
16
20
|
text: string;
|
|
17
21
|
}[];
|
|
18
|
-
|
|
22
|
+
structuredContent?: undefined;
|
|
19
23
|
}>;
|
|
20
24
|
//# sourceMappingURL=fetch-links.tool.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"fetch-links.tool.d.ts","sourceRoot":"","sources":["../../../src/tools/handlers/fetch-links.tool.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"fetch-links.tool.d.ts","sourceRoot":"","sources":["../../../src/tools/handlers/fetch-links.tool.ts"],"names":[],"mappings":"AAYA,OAAO,KAAK,EAAE,eAAe,EAAE,aAAa,EAAE,MAAM,sBAAsB,CAAC;AAE3E,eAAO,MAAM,qBAAqB,gBAAgB,CAAC;AACnD,eAAO,MAAM,4BAA4B,oFAC0C,CAAC;AAqDpF;;GAEG;AACH,wBAAsB,qBAAqB,CAAC,KAAK,EAAE,eAAe;;;;;;aAiBjD,MAAM;mBACA,MAAM;eACV,aAAa,EAAE;;;;;;;;GAuDjC"}
|