@j0hanz/superfetch 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +327 -0
- package/dist/config/index.d.ts +30 -0
- package/dist/config/index.d.ts.map +1 -0
- package/dist/config/index.js +42 -0
- package/dist/config/index.js.map +1 -0
- package/dist/errors/app-error.d.ts +71 -0
- package/dist/errors/app-error.d.ts.map +1 -0
- package/dist/errors/app-error.js +103 -0
- package/dist/errors/app-error.js.map +1 -0
- package/dist/errors/index.d.ts +2 -0
- package/dist/errors/index.d.ts.map +1 -0
- package/dist/errors/index.js +2 -0
- package/dist/errors/index.js.map +1 -0
- package/dist/index.d.ts +3 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +179 -0
- package/dist/index.js.map +1 -0
- package/dist/middleware/error-handler.d.ts +7 -0
- package/dist/middleware/error-handler.d.ts.map +1 -0
- package/dist/middleware/error-handler.js +37 -0
- package/dist/middleware/error-handler.js.map +1 -0
- package/dist/middleware/rate-limiter.d.ts +33 -0
- package/dist/middleware/rate-limiter.d.ts.map +1 -0
- package/dist/middleware/rate-limiter.js +100 -0
- package/dist/middleware/rate-limiter.js.map +1 -0
- package/dist/prompts/index.d.ts +6 -0
- package/dist/prompts/index.d.ts.map +1 -0
- package/dist/prompts/index.js +81 -0
- package/dist/prompts/index.js.map +1 -0
- package/dist/resources/index.d.ts +6 -0
- package/dist/resources/index.d.ts.map +1 -0
- package/dist/resources/index.js +44 -0
- package/dist/resources/index.js.map +1 -0
- package/dist/server.d.ts +8 -0
- package/dist/server.d.ts.map +1 -0
- package/dist/server.js +39 -0
- package/dist/server.js.map +1 -0
- package/dist/services/cache.d.ts +16 -0
- package/dist/services/cache.d.ts.map +1 -0
- package/dist/services/cache.js +63 -0
- package/dist/services/cache.js.map +1 -0
- package/dist/services/cache.service.d.ts +52 -0
- package/dist/services/cache.service.d.ts.map +1 -0
- package/dist/services/cache.service.js +113 -0
- package/dist/services/cache.service.js.map +1 -0
- package/dist/services/extractor.d.ts +32 -0
- package/dist/services/extractor.d.ts.map +1 -0
- package/dist/services/extractor.js +97 -0
- package/dist/services/extractor.js.map +1 -0
- package/dist/services/extractor.service.d.ts +18 -0
- package/dist/services/extractor.service.d.ts.map +1 -0
- package/dist/services/extractor.service.js +75 -0
- package/dist/services/extractor.service.js.map +1 -0
- package/dist/services/fetcher.d.ts +9 -0
- package/dist/services/fetcher.d.ts.map +1 -0
- package/dist/services/fetcher.js +100 -0
- package/dist/services/fetcher.js.map +1 -0
- package/dist/services/fetcher.service.d.ts +18 -0
- package/dist/services/fetcher.service.d.ts.map +1 -0
- package/dist/services/fetcher.service.js +122 -0
- package/dist/services/fetcher.service.js.map +1 -0
- package/dist/services/logger.d.ts +5 -0
- package/dist/services/logger.d.ts.map +1 -0
- package/dist/services/logger.js +48 -0
- package/dist/services/logger.js.map +1 -0
- package/dist/services/logger.service.d.ts +5 -0
- package/dist/services/logger.service.d.ts.map +1 -0
- package/dist/services/logger.service.js +57 -0
- package/dist/services/logger.service.js.map +1 -0
- package/dist/services/parser.d.ts +6 -0
- package/dist/services/parser.d.ts.map +1 -0
- package/dist/services/parser.js +152 -0
- package/dist/services/parser.js.map +1 -0
- package/dist/services/parser.service.d.ts +42 -0
- package/dist/services/parser.service.d.ts.map +1 -0
- package/dist/services/parser.service.js +209 -0
- package/dist/services/parser.service.js.map +1 -0
- package/dist/tools/handlers/fetch-links.tool.d.ts +20 -0
- package/dist/tools/handlers/fetch-links.tool.d.ts.map +1 -0
- package/dist/tools/handlers/fetch-links.tool.js +91 -0
- package/dist/tools/handlers/fetch-links.tool.js.map +1 -0
- package/dist/tools/handlers/fetch-markdown.tool.d.ts +17 -0
- package/dist/tools/handlers/fetch-markdown.tool.d.ts.map +1 -0
- package/dist/tools/handlers/fetch-markdown.tool.js +99 -0
- package/dist/tools/handlers/fetch-markdown.tool.js.map +1 -0
- package/dist/tools/handlers/fetch-url.tool.d.ts +17 -0
- package/dist/tools/handlers/fetch-url.tool.d.ts.map +1 -0
- package/dist/tools/handlers/fetch-url.tool.js +103 -0
- package/dist/tools/handlers/fetch-url.tool.js.map +1 -0
- package/dist/tools/index.d.ts +7 -0
- package/dist/tools/index.d.ts.map +1 -0
- package/dist/tools/index.js +83 -0
- package/dist/tools/index.js.map +1 -0
- package/dist/transformers/jsonl.transformer.d.ts +4 -0
- package/dist/transformers/jsonl.transformer.d.ts.map +1 -0
- package/dist/transformers/jsonl.transformer.js +42 -0
- package/dist/transformers/jsonl.transformer.js.map +1 -0
- package/dist/transformers/markdown.transformer.d.ts +4 -0
- package/dist/transformers/markdown.transformer.d.ts.map +1 -0
- package/dist/transformers/markdown.transformer.js +104 -0
- package/dist/transformers/markdown.transformer.js.map +1 -0
- package/dist/types/content.types.d.ts +63 -0
- package/dist/types/content.types.d.ts.map +1 -0
- package/dist/types/content.types.js +2 -0
- package/dist/types/content.types.js.map +1 -0
- package/dist/types/index.d.ts +3 -0
- package/dist/types/index.d.ts.map +1 -0
- package/dist/types/index.js +3 -0
- package/dist/types/index.js.map +1 -0
- package/dist/types/schemas.d.ts +22 -0
- package/dist/types/schemas.d.ts.map +1 -0
- package/dist/types/schemas.js +5 -0
- package/dist/types/schemas.js.map +1 -0
- package/dist/utils/sanitizer.d.ts +9 -0
- package/dist/utils/sanitizer.d.ts.map +1 -0
- package/dist/utils/sanitizer.js +19 -0
- package/dist/utils/sanitizer.js.map +1 -0
- package/dist/utils/url-validator.d.ts +10 -0
- package/dist/utils/url-validator.d.ts.map +1 -0
- package/dist/utils/url-validator.js +69 -0
- package/dist/utils/url-validator.js.map +1 -0
- package/package.json +80 -0
|
@@ -0,0 +1,122 @@
|
|
|
1
|
+
import axios from 'axios';
|
|
2
|
+
import { config } from '../config/index.js';
|
|
3
|
+
import { FetchError, TimeoutError } from '../errors/app-error.js';
|
|
4
|
+
// Headers that should not be overridden by custom headers (security)
|
|
5
|
+
const BLOCKED_HEADERS = new Set([
|
|
6
|
+
'host',
|
|
7
|
+
'authorization',
|
|
8
|
+
'cookie',
|
|
9
|
+
'x-forwarded-for',
|
|
10
|
+
'x-real-ip',
|
|
11
|
+
'proxy-authorization',
|
|
12
|
+
]);
|
|
13
|
+
/**
|
|
14
|
+
* Sanitize custom headers to prevent injection of sensitive headers
|
|
15
|
+
*/
|
|
16
|
+
function sanitizeHeaders(headers) {
|
|
17
|
+
if (!headers || Object.keys(headers).length === 0) {
|
|
18
|
+
return undefined;
|
|
19
|
+
}
|
|
20
|
+
const sanitized = {};
|
|
21
|
+
for (const [key, value] of Object.entries(headers)) {
|
|
22
|
+
if (!BLOCKED_HEADERS.has(key.toLowerCase())) {
|
|
23
|
+
sanitized[key] = value;
|
|
24
|
+
}
|
|
25
|
+
}
|
|
26
|
+
return Object.keys(sanitized).length > 0 ? sanitized : undefined;
|
|
27
|
+
}
|
|
28
|
+
/**
|
|
29
|
+
* Calculates exponential backoff delay with jitter
|
|
30
|
+
*/
|
|
31
|
+
function calculateBackoff(attempt, maxDelay = 10000) {
|
|
32
|
+
const baseDelay = Math.min(1000 * Math.pow(2, attempt - 1), maxDelay);
|
|
33
|
+
// Add jitter (±25%)
|
|
34
|
+
const jitter = baseDelay * 0.25 * (Math.random() * 2 - 1);
|
|
35
|
+
return Math.round(baseDelay + jitter);
|
|
36
|
+
}
|
|
37
|
+
class FetcherService {
|
|
38
|
+
client;
|
|
39
|
+
constructor() {
|
|
40
|
+
this.client = axios.create({
|
|
41
|
+
timeout: config.fetcher.timeout,
|
|
42
|
+
maxRedirects: config.fetcher.maxRedirects,
|
|
43
|
+
maxContentLength: config.fetcher.maxContentLength,
|
|
44
|
+
headers: {
|
|
45
|
+
'User-Agent': config.fetcher.userAgent,
|
|
46
|
+
Accept: 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
|
|
47
|
+
'Accept-Language': 'en-US,en;q=0.5',
|
|
48
|
+
'Accept-Encoding': 'gzip, deflate, br',
|
|
49
|
+
Connection: 'keep-alive',
|
|
50
|
+
},
|
|
51
|
+
// Validate status - only 2xx responses are valid
|
|
52
|
+
validateStatus: (status) => status >= 200 && status < 300,
|
|
53
|
+
});
|
|
54
|
+
}
|
|
55
|
+
/**
|
|
56
|
+
* Fetches HTML content from a URL
|
|
57
|
+
* @throws {FetchError} on network or HTTP errors
|
|
58
|
+
* @throws {TimeoutError} on request timeout
|
|
59
|
+
*/
|
|
60
|
+
async fetchUrl(url, customHeaders) {
|
|
61
|
+
const requestConfig = {
|
|
62
|
+
method: 'GET',
|
|
63
|
+
url,
|
|
64
|
+
responseType: 'text',
|
|
65
|
+
};
|
|
66
|
+
// Add sanitized custom headers if provided
|
|
67
|
+
const sanitized = sanitizeHeaders(customHeaders);
|
|
68
|
+
if (sanitized) {
|
|
69
|
+
requestConfig.headers = { ...requestConfig.headers, ...sanitized };
|
|
70
|
+
}
|
|
71
|
+
try {
|
|
72
|
+
const response = await this.client.request(requestConfig);
|
|
73
|
+
return response.data;
|
|
74
|
+
}
|
|
75
|
+
catch (error) {
|
|
76
|
+
if (!axios.isAxiosError(error)) {
|
|
77
|
+
throw new FetchError(`Unexpected error: ${error instanceof Error ? error.message : 'Unknown'}`, url);
|
|
78
|
+
}
|
|
79
|
+
const axiosError = error;
|
|
80
|
+
if (axiosError.code === 'ECONNABORTED' || axiosError.code === 'ETIMEDOUT') {
|
|
81
|
+
throw new TimeoutError(config.fetcher.timeout, true);
|
|
82
|
+
}
|
|
83
|
+
if (axiosError.response) {
|
|
84
|
+
throw new FetchError(`HTTP ${axiosError.response.status}: ${axiosError.response.statusText}`, url, axiosError.response.status);
|
|
85
|
+
}
|
|
86
|
+
if (axiosError.request) {
|
|
87
|
+
throw new FetchError(`Network error: Could not reach ${url}`, url);
|
|
88
|
+
}
|
|
89
|
+
throw new FetchError(axiosError.message, url);
|
|
90
|
+
}
|
|
91
|
+
}
|
|
92
|
+
/**
|
|
93
|
+
* Fetches URL with exponential backoff retry logic
|
|
94
|
+
* @throws {FetchError} after all retries exhausted
|
|
95
|
+
*/
|
|
96
|
+
async fetchUrlWithRetry(url, customHeaders, maxRetries = 3) {
|
|
97
|
+
let lastError;
|
|
98
|
+
for (let attempt = 1; attempt <= maxRetries; attempt++) {
|
|
99
|
+
try {
|
|
100
|
+
return await this.fetchUrl(url, customHeaders);
|
|
101
|
+
}
|
|
102
|
+
catch (error) {
|
|
103
|
+
lastError = error instanceof Error ? error : new Error('Unknown error');
|
|
104
|
+
// Don't retry on client errors (4xx) except 429 (rate limited)
|
|
105
|
+
if (error instanceof FetchError && error.httpStatus) {
|
|
106
|
+
const status = error.httpStatus;
|
|
107
|
+
if (status >= 400 && status < 500 && status !== 429) {
|
|
108
|
+
throw error;
|
|
109
|
+
}
|
|
110
|
+
}
|
|
111
|
+
if (attempt < maxRetries) {
|
|
112
|
+
const delay = calculateBackoff(attempt);
|
|
113
|
+
await new Promise((resolve) => setTimeout(resolve, delay));
|
|
114
|
+
}
|
|
115
|
+
}
|
|
116
|
+
}
|
|
117
|
+
throw new FetchError(`Failed after ${maxRetries} attempts: ${lastError?.message ?? 'Unknown error'}`, url);
|
|
118
|
+
}
|
|
119
|
+
}
|
|
120
|
+
// Singleton instance
|
|
121
|
+
export const fetcherService = new FetcherService();
|
|
122
|
+
//# sourceMappingURL=fetcher.service.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"fetcher.service.js","sourceRoot":"","sources":["../../src/services/fetcher.service.ts"],"names":[],"mappings":"AAAA,OAAO,KAAwD,MAAM,OAAO,CAAC;AAC7E,OAAO,EAAE,MAAM,EAAE,MAAM,oBAAoB,CAAC;AAC5C,OAAO,EAAE,UAAU,EAAE,YAAY,EAAE,MAAM,wBAAwB,CAAC;AAElE,qEAAqE;AACrE,MAAM,eAAe,GAAG,IAAI,GAAG,CAAC;IAC9B,MAAM;IACN,eAAe;IACf,QAAQ;IACR,iBAAiB;IACjB,WAAW;IACX,qBAAqB;CACtB,CAAC,CAAC;AAEH;;GAEG;AACH,SAAS,eAAe,CACtB,OAAgC;IAEhC,IAAI,CAAC,OAAO,IAAI,MAAM,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QAClD,OAAO,SAAS,CAAC;IACnB,CAAC;IAED,MAAM,SAAS,GAA2B,EAAE,CAAC;IAE7C,KAAK,MAAM,CAAC,GAAG,EAAE,KAAK,CAAC,IAAI,MAAM,CAAC,OAAO,CAAC,OAAO,CAAC,EAAE,CAAC;QACnD,IAAI,CAAC,eAAe,CAAC,GAAG,CAAC,GAAG,CAAC,WAAW,EAAE,CAAC,EAAE,CAAC;YAC5C,SAAS,CAAC,GAAG,CAAC,GAAG,KAAK,CAAC;QACzB,CAAC;IACH,CAAC;IAED,OAAO,MAAM,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,SAAS,CAAC;AACnE,CAAC;AAED;;GAEG;AACH,SAAS,gBAAgB,CAAC,OAAe,EAAE,QAAQ,GAAG,KAAK;IACzD,MAAM,SAAS,GAAG,IAAI,CAAC,GAAG,CAAC,IAAI,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,OAAO,GAAG,CAAC,CAAC,EAAE,QAAQ,CAAC,CAAC;IACtE,oBAAoB;IACpB,MAAM,MAAM,GAAG,SAAS,GAAG,IAAI,GAAG,CAAC,IAAI,CAAC,MAAM,EAAE,GAAG,CAAC,GAAG,CAAC,CAAC,CAAC;IAC1D,OAAO,IAAI,CAAC,KAAK,CAAC,SAAS,GAAG,MAAM,CAAC,CAAC;AACxC,CAAC;AAED,MAAM,cAAc;IACD,MAAM,CAAgB;IAEvC;QACE,IAAI,CAAC,MAAM,GAAG,KAAK,CAAC,MAAM,CAAC;YACzB,OAAO,EAAE,MAAM,CAAC,OAAO,CAAC,OAAO;YAC/B,YAAY,EAAE,MAAM,CAAC,OAAO,CAAC,YAAY;YACzC,gBAAgB,EAAE,MAAM,CAAC,OAAO,CAAC,gBAAgB;YACjD,OAAO,EAAE;gBACP,YAAY,EAAE,MAAM,CAAC,OAAO,CAAC,SAAS;gBACtC,MAAM,EACJ,4EAA4E;gBAC9E,iBAAiB,EAAE,gBAAgB;gBACnC,iBAAiB,EAAE,mBAAmB;gBACtC,UAAU,EAAE,YAAY;aACzB;YACD,iDAAiD;YACjD,cAAc,EAAE,CAAC,MAAM,EAAE,EAAE,CAAC,MAAM,IAAI,GAAG,IAAI,MAAM,GAAG,GAAG;SAC1D,CAAC,CAAC;IACL,CAAC;IAED;;;;OAIG;IACH,KAAK,CAAC,QAAQ,CACZ,GAAW,EACX,aAAsC;QAEtC,MAAM,aAAa,GAAuB;YACxC,MAAM,EAAE,KAAK;YACb,GAAG;YACH,YAAY,EAAE,MAAM;SACrB,CAAC;QAEF,2CAA2C;QAC3C,MAAM,SAAS,GAAG,eAAe,CAAC,aAAa,CAAC,CAAC;QACjD,IAAI,SAAS,EAAE,CAAC;YACd,aAAa,CAAC,OAAO,GAAG,EAAE,GAAG,aAAa,CAAC,OAAO,EAAE,GAAG,SAAS,EAAE,CAAC;QACrE,CAAC;QAED,IAAI,CAAC;YACH,MAAM,QAAQ,GAAG,MAAM,IAAI,CAAC,MAAM,CAAC,OAAO,CAAS,aAAa,CAAC,CAAC;YAClE,OAAO,QAAQ,CAAC,IAAI,CAAC;QACvB,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,IAAI,CAAC,KAAK,CAAC,YAAY,CAAC,KAAK,CAAC,EAAE,CAAC;gBAC/B,MAAM,IAAI,UAAU,CAClB,qBAAqB,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,SAAS,EAAE,EACzE,GAAG,CACJ,CAAC;YACJ,CAAC;YAED,MAAM,UAAU,GAAG,KAAmB,CAAC;YAEvC,IAAI,UAAU,CAAC,IAAI,KAAK,cAAc,IAAI,UAAU,CAAC,IAAI,KAAK,WAAW,EAAE,CAAC;gBAC1E,MAAM,IAAI,YAAY,CAAC,MAAM,CAAC,OAAO,CAAC,OAAO,EAAE,IAAI,CAAC,CAAC;YACvD,CAAC;YAED,IAAI,UAAU,CAAC,QAAQ,EAAE,CAAC;gBACxB,MAAM,IAAI,UAAU,CAClB,QAAQ,UAAU,CAAC,QAAQ,CAAC,MAAM,KAAK,UAAU,CAAC,QAAQ,CAAC,UAAU,EAAE,EACvE,GAAG,EACH,UAAU,CAAC,QAAQ,CAAC,MAAM,CAC3B,CAAC;YACJ,CAAC;YAED,IAAI,UAAU,CAAC,OAAO,EAAE,CAAC;gBACvB,MAAM,IAAI,UAAU,CAAC,kCAAkC,GAAG,EAAE,EAAE,GAAG,CAAC,CAAC;YACrE,CAAC;YAED,MAAM,IAAI,UAAU,CAAC,UAAU,CAAC,OAAO,EAAE,GAAG,CAAC,CAAC;QAChD,CAAC;IACH,CAAC;IAED;;;OAGG;IACH,KAAK,CAAC,iBAAiB,CACrB,GAAW,EACX,aAAsC,EACtC,UAAU,GAAG,CAAC;QAEd,IAAI,SAA4B,CAAC;QAEjC,KAAK,IAAI,OAAO,GAAG,CAAC,EAAE,OAAO,IAAI,UAAU,EAAE,OAAO,EAAE,EAAE,CAAC;YACvD,IAAI,CAAC;gBACH,OAAO,MAAM,IAAI,CAAC,QAAQ,CAAC,GAAG,EAAE,aAAa,CAAC,CAAC;YACjD,CAAC;YAAC,OAAO,KAAK,EAAE,CAAC;gBACf,SAAS,GAAG,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,IAAI,KAAK,CAAC,eAAe,CAAC,CAAC;gBAExE,+DAA+D;gBAC/D,IAAI,KAAK,YAAY,UAAU,IAAI,KAAK,CAAC,UAAU,EAAE,CAAC;oBACpD,MAAM,MAAM,GAAG,KAAK,CAAC,UAAU,CAAC;oBAChC,IAAI,MAAM,IAAI,GAAG,IAAI,MAAM,GAAG,GAAG,IAAI,MAAM,KAAK,GAAG,EAAE,CAAC;wBACpD,MAAM,KAAK,CAAC;oBACd,CAAC;gBACH,CAAC;gBAED,IAAI,OAAO,GAAG,UAAU,EAAE,CAAC;oBACzB,MAAM,KAAK,GAAG,gBAAgB,CAAC,OAAO,CAAC,CAAC;oBACxC,MAAM,IAAI,OAAO,CAAC,CAAC,OAAO,EAAE,EAAE,CAAC,UAAU,CAAC,OAAO,EAAE,KAAK,CAAC,CAAC,CAAC;gBAC7D,CAAC;YACH,CAAC;QACH,CAAC;QAED,MAAM,IAAI,UAAU,CAClB,gBAAgB,UAAU,cAAc,SAAS,EAAE,OAAO,IAAI,eAAe,EAAE,EAC/E,GAAG,CACJ,CAAC;IACJ,CAAC;CACF;AAED,qBAAqB;AACrB,MAAM,CAAC,MAAM,cAAc,GAAG,IAAI,cAAc,EAAE,CAAC"}
|
|
@@ -0,0 +1,5 @@
|
|
|
1
|
+
export declare function logInfo(message: string, meta?: Record<string, unknown>): void;
|
|
2
|
+
export declare function logWarn(message: string, meta?: Record<string, unknown>): void;
|
|
3
|
+
export declare function logDebug(message: string, meta?: Record<string, unknown>): void;
|
|
4
|
+
export declare function logError(message: string, error?: Error | Record<string, unknown>): void;
|
|
5
|
+
//# sourceMappingURL=logger.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"logger.d.ts","sourceRoot":"","sources":["../../src/services/logger.ts"],"names":[],"mappings":"AAyCA,wBAAgB,OAAO,CAAC,OAAO,EAAE,MAAM,EAAE,IAAI,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,GAAG,IAAI,CAE7E;AAED,wBAAgB,OAAO,CAAC,OAAO,EAAE,MAAM,EAAE,IAAI,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,GAAG,IAAI,CAE7E;AAED,wBAAgB,QAAQ,CAAC,OAAO,EAAE,MAAM,EAAE,IAAI,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,GAAG,IAAI,CAE9E;AAED,wBAAgB,QAAQ,CACtB,OAAO,EAAE,MAAM,EACf,KAAK,CAAC,EAAE,KAAK,GAAG,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,GACtC,IAAI,CAQN"}
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
import winston from 'winston';
|
|
2
|
+
import { config } from '../config/index.js';
|
|
3
|
+
import path from 'path';
|
|
4
|
+
const logsDir = path.join(process.cwd(), 'logs');
|
|
5
|
+
const logger = winston.createLogger({
|
|
6
|
+
level: config.logging.level,
|
|
7
|
+
format: winston.format.combine(winston.format.timestamp({ format: 'YYYY-MM-DD HH:mm:ss' }), winston.format.errors({ stack: true }), winston.format.splat(), winston.format.json()),
|
|
8
|
+
defaultMeta: { service: 'superfetch' },
|
|
9
|
+
transports: [
|
|
10
|
+
new winston.transports.File({
|
|
11
|
+
filename: path.join(logsDir, 'combined.log'),
|
|
12
|
+
maxsize: 5242880,
|
|
13
|
+
maxFiles: 5,
|
|
14
|
+
}),
|
|
15
|
+
new winston.transports.File({
|
|
16
|
+
filename: path.join(logsDir, 'error.log'),
|
|
17
|
+
level: 'error',
|
|
18
|
+
maxsize: 5242880,
|
|
19
|
+
maxFiles: 5,
|
|
20
|
+
}),
|
|
21
|
+
],
|
|
22
|
+
});
|
|
23
|
+
if (process.env.NODE_ENV !== 'production') {
|
|
24
|
+
logger.add(new winston.transports.Console({
|
|
25
|
+
format: winston.format.combine(winston.format.colorize(), winston.format.simple()),
|
|
26
|
+
}));
|
|
27
|
+
}
|
|
28
|
+
export function logInfo(message, meta) {
|
|
29
|
+
if (config.logging.enabled)
|
|
30
|
+
logger.info(message, meta);
|
|
31
|
+
}
|
|
32
|
+
export function logWarn(message, meta) {
|
|
33
|
+
if (config.logging.enabled)
|
|
34
|
+
logger.warn(message, meta);
|
|
35
|
+
}
|
|
36
|
+
export function logDebug(message, meta) {
|
|
37
|
+
if (config.logging.enabled)
|
|
38
|
+
logger.debug(message, meta);
|
|
39
|
+
}
|
|
40
|
+
export function logError(message, error) {
|
|
41
|
+
if (!config.logging.enabled)
|
|
42
|
+
return;
|
|
43
|
+
const errorMeta = error instanceof Error
|
|
44
|
+
? { error: error.message, stack: error.stack }
|
|
45
|
+
: error;
|
|
46
|
+
logger.error(message, errorMeta);
|
|
47
|
+
}
|
|
48
|
+
//# sourceMappingURL=logger.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"logger.js","sourceRoot":"","sources":["../../src/services/logger.ts"],"names":[],"mappings":"AAAA,OAAO,OAAO,MAAM,SAAS,CAAC;AAC9B,OAAO,EAAE,MAAM,EAAE,MAAM,oBAAoB,CAAC;AAC5C,OAAO,IAAI,MAAM,MAAM,CAAC;AAExB,MAAM,OAAO,GAAG,IAAI,CAAC,IAAI,CAAC,OAAO,CAAC,GAAG,EAAE,EAAE,MAAM,CAAC,CAAC;AAEjD,MAAM,MAAM,GAAG,OAAO,CAAC,YAAY,CAAC;IAClC,KAAK,EAAE,MAAM,CAAC,OAAO,CAAC,KAAK;IAC3B,MAAM,EAAE,OAAO,CAAC,MAAM,CAAC,OAAO,CAC5B,OAAO,CAAC,MAAM,CAAC,SAAS,CAAC,EAAE,MAAM,EAAE,qBAAqB,EAAE,CAAC,EAC3D,OAAO,CAAC,MAAM,CAAC,MAAM,CAAC,EAAE,KAAK,EAAE,IAAI,EAAE,CAAC,EACtC,OAAO,CAAC,MAAM,CAAC,KAAK,EAAE,EACtB,OAAO,CAAC,MAAM,CAAC,IAAI,EAAE,CACtB;IACD,WAAW,EAAE,EAAE,OAAO,EAAE,YAAY,EAAE;IACtC,UAAU,EAAE;QACV,IAAI,OAAO,CAAC,UAAU,CAAC,IAAI,CAAC;YAC1B,QAAQ,EAAE,IAAI,CAAC,IAAI,CAAC,OAAO,EAAE,cAAc,CAAC;YAC5C,OAAO,EAAE,OAAO;YAChB,QAAQ,EAAE,CAAC;SACZ,CAAC;QACF,IAAI,OAAO,CAAC,UAAU,CAAC,IAAI,CAAC;YAC1B,QAAQ,EAAE,IAAI,CAAC,IAAI,CAAC,OAAO,EAAE,WAAW,CAAC;YACzC,KAAK,EAAE,OAAO;YACd,OAAO,EAAE,OAAO;YAChB,QAAQ,EAAE,CAAC;SACZ,CAAC;KACH;CACF,CAAC,CAAC;AAEH,IAAI,OAAO,CAAC,GAAG,CAAC,QAAQ,KAAK,YAAY,EAAE,CAAC;IAC1C,MAAM,CAAC,GAAG,CACR,IAAI,OAAO,CAAC,UAAU,CAAC,OAAO,CAAC;QAC7B,MAAM,EAAE,OAAO,CAAC,MAAM,CAAC,OAAO,CAC5B,OAAO,CAAC,MAAM,CAAC,QAAQ,EAAE,EACzB,OAAO,CAAC,MAAM,CAAC,MAAM,EAAE,CACxB;KACF,CAAC,CACH,CAAC;AACJ,CAAC;AAED,MAAM,UAAU,OAAO,CAAC,OAAe,EAAE,IAA8B;IACrE,IAAI,MAAM,CAAC,OAAO,CAAC,OAAO;QAAE,MAAM,CAAC,IAAI,CAAC,OAAO,EAAE,IAAI,CAAC,CAAC;AACzD,CAAC;AAED,MAAM,UAAU,OAAO,CAAC,OAAe,EAAE,IAA8B;IACrE,IAAI,MAAM,CAAC,OAAO,CAAC,OAAO;QAAE,MAAM,CAAC,IAAI,CAAC,OAAO,EAAE,IAAI,CAAC,CAAC;AACzD,CAAC;AAED,MAAM,UAAU,QAAQ,CAAC,OAAe,EAAE,IAA8B;IACtE,IAAI,MAAM,CAAC,OAAO,CAAC,OAAO;QAAE,MAAM,CAAC,KAAK,CAAC,OAAO,EAAE,IAAI,CAAC,CAAC;AAC1D,CAAC;AAED,MAAM,UAAU,QAAQ,CACtB,OAAe,EACf,KAAuC;IAEvC,IAAI,CAAC,MAAM,CAAC,OAAO,CAAC,OAAO;QAAE,OAAO;IAEpC,MAAM,SAAS,GACb,KAAK,YAAY,KAAK;QACpB,CAAC,CAAC,EAAE,KAAK,EAAE,KAAK,CAAC,OAAO,EAAE,KAAK,EAAE,KAAK,CAAC,KAAK,EAAE;QAC9C,CAAC,CAAC,KAAK,CAAC;IACZ,MAAM,CAAC,KAAK,CAAC,OAAO,EAAE,SAAS,CAAC,CAAC;AACnC,CAAC"}
|
|
@@ -0,0 +1,5 @@
|
|
|
1
|
+
export declare const logInfo: (message: string, meta?: Record<string, unknown>) => void;
|
|
2
|
+
export declare const logWarn: (message: string, meta?: Record<string, unknown>) => void;
|
|
3
|
+
export declare const logDebug: (message: string, meta?: Record<string, unknown>) => void;
|
|
4
|
+
export declare const logError: (message: string, error?: Error | Record<string, unknown>) => void;
|
|
5
|
+
//# sourceMappingURL=logger.service.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"logger.service.d.ts","sourceRoot":"","sources":["../../src/services/logger.service.ts"],"names":[],"mappings":"AA+CA,eAAO,MAAM,OAAO,GAAI,SAAS,MAAM,EAAE,OAAO,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,SAItE,CAAC;AAEF,eAAO,MAAM,OAAO,GAAI,SAAS,MAAM,EAAE,OAAO,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,SAItE,CAAC;AAEF,eAAO,MAAM,QAAQ,GAAI,SAAS,MAAM,EAAE,OAAO,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,SAIvE,CAAC;AAEF,eAAO,MAAM,QAAQ,GACnB,SAAS,MAAM,EACf,QAAQ,KAAK,GAAG,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,SASxC,CAAC"}
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
import winston from 'winston';
|
|
2
|
+
import { config } from '../config/index.js';
|
|
3
|
+
import path from 'path';
|
|
4
|
+
// Create logs directory path
|
|
5
|
+
const logsDir = path.join(process.cwd(), 'logs');
|
|
6
|
+
// Create logger instance
|
|
7
|
+
const logger = winston.createLogger({
|
|
8
|
+
level: config.logging.level,
|
|
9
|
+
format: winston.format.combine(winston.format.timestamp({ format: 'YYYY-MM-DD HH:mm:ss' }), winston.format.errors({ stack: true }), winston.format.splat(), winston.format.json()),
|
|
10
|
+
defaultMeta: { service: 'superfetch' },
|
|
11
|
+
transports: [
|
|
12
|
+
// Write all logs to combined.log
|
|
13
|
+
new winston.transports.File({
|
|
14
|
+
filename: path.join(logsDir, 'combined.log'),
|
|
15
|
+
maxsize: 5242880, // 5MB
|
|
16
|
+
maxFiles: 5,
|
|
17
|
+
}),
|
|
18
|
+
// Write error logs to error.log
|
|
19
|
+
new winston.transports.File({
|
|
20
|
+
filename: path.join(logsDir, 'error.log'),
|
|
21
|
+
level: 'error',
|
|
22
|
+
maxsize: 5242880, // 5MB
|
|
23
|
+
maxFiles: 5,
|
|
24
|
+
}),
|
|
25
|
+
],
|
|
26
|
+
});
|
|
27
|
+
// Add console transport in development
|
|
28
|
+
if (process.env.NODE_ENV !== 'production') {
|
|
29
|
+
logger.add(new winston.transports.Console({
|
|
30
|
+
format: winston.format.combine(winston.format.colorize(), winston.format.simple()),
|
|
31
|
+
}));
|
|
32
|
+
}
|
|
33
|
+
// Export convenience methods
|
|
34
|
+
export const logInfo = (message, meta) => {
|
|
35
|
+
if (config.logging.enabled) {
|
|
36
|
+
logger.info(message, meta);
|
|
37
|
+
}
|
|
38
|
+
};
|
|
39
|
+
export const logWarn = (message, meta) => {
|
|
40
|
+
if (config.logging.enabled) {
|
|
41
|
+
logger.warn(message, meta);
|
|
42
|
+
}
|
|
43
|
+
};
|
|
44
|
+
export const logDebug = (message, meta) => {
|
|
45
|
+
if (config.logging.enabled) {
|
|
46
|
+
logger.debug(message, meta);
|
|
47
|
+
}
|
|
48
|
+
};
|
|
49
|
+
export const logError = (message, error) => {
|
|
50
|
+
if (config.logging.enabled) {
|
|
51
|
+
const errorMeta = error instanceof Error
|
|
52
|
+
? { error: error.message, stack: error.stack }
|
|
53
|
+
: error;
|
|
54
|
+
logger.error(message, errorMeta);
|
|
55
|
+
}
|
|
56
|
+
};
|
|
57
|
+
//# sourceMappingURL=logger.service.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"logger.service.js","sourceRoot":"","sources":["../../src/services/logger.service.ts"],"names":[],"mappings":"AAAA,OAAO,OAAO,MAAM,SAAS,CAAC;AAC9B,OAAO,EAAE,MAAM,EAAE,MAAM,oBAAoB,CAAC;AAC5C,OAAO,IAAI,MAAM,MAAM,CAAC;AAExB,6BAA6B;AAC7B,MAAM,OAAO,GAAG,IAAI,CAAC,IAAI,CAAC,OAAO,CAAC,GAAG,EAAE,EAAE,MAAM,CAAC,CAAC;AAEjD,yBAAyB;AACzB,MAAM,MAAM,GAAG,OAAO,CAAC,YAAY,CAAC;IAClC,KAAK,EAAE,MAAM,CAAC,OAAO,CAAC,KAAK;IAC3B,MAAM,EAAE,OAAO,CAAC,MAAM,CAAC,OAAO,CAC5B,OAAO,CAAC,MAAM,CAAC,SAAS,CAAC,EAAE,MAAM,EAAE,qBAAqB,EAAE,CAAC,EAC3D,OAAO,CAAC,MAAM,CAAC,MAAM,CAAC,EAAE,KAAK,EAAE,IAAI,EAAE,CAAC,EACtC,OAAO,CAAC,MAAM,CAAC,KAAK,EAAE,EACtB,OAAO,CAAC,MAAM,CAAC,IAAI,EAAE,CACtB;IACD,WAAW,EAAE,EAAE,OAAO,EAAE,YAAY,EAAE;IACtC,UAAU,EAAE;QACV,iCAAiC;QACjC,IAAI,OAAO,CAAC,UAAU,CAAC,IAAI,CAAC;YAC1B,QAAQ,EAAE,IAAI,CAAC,IAAI,CAAC,OAAO,EAAE,cAAc,CAAC;YAC5C,OAAO,EAAE,OAAO,EAAE,MAAM;YACxB,QAAQ,EAAE,CAAC;SACZ,CAAC;QACF,gCAAgC;QAChC,IAAI,OAAO,CAAC,UAAU,CAAC,IAAI,CAAC;YAC1B,QAAQ,EAAE,IAAI,CAAC,IAAI,CAAC,OAAO,EAAE,WAAW,CAAC;YACzC,KAAK,EAAE,OAAO;YACd,OAAO,EAAE,OAAO,EAAE,MAAM;YACxB,QAAQ,EAAE,CAAC;SACZ,CAAC;KACH;CACF,CAAC,CAAC;AAEH,uCAAuC;AACvC,IAAI,OAAO,CAAC,GAAG,CAAC,QAAQ,KAAK,YAAY,EAAE,CAAC;IAC1C,MAAM,CAAC,GAAG,CACR,IAAI,OAAO,CAAC,UAAU,CAAC,OAAO,CAAC;QAC7B,MAAM,EAAE,OAAO,CAAC,MAAM,CAAC,OAAO,CAC5B,OAAO,CAAC,MAAM,CAAC,QAAQ,EAAE,EACzB,OAAO,CAAC,MAAM,CAAC,MAAM,EAAE,CACxB;KACF,CAAC,CACH,CAAC;AACJ,CAAC;AAED,6BAA6B;AAC7B,MAAM,CAAC,MAAM,OAAO,GAAG,CAAC,OAAe,EAAE,IAA8B,EAAE,EAAE;IACzE,IAAI,MAAM,CAAC,OAAO,CAAC,OAAO,EAAE,CAAC;QAC3B,MAAM,CAAC,IAAI,CAAC,OAAO,EAAE,IAAI,CAAC,CAAC;IAC7B,CAAC;AACH,CAAC,CAAC;AAEF,MAAM,CAAC,MAAM,OAAO,GAAG,CAAC,OAAe,EAAE,IAA8B,EAAE,EAAE;IACzE,IAAI,MAAM,CAAC,OAAO,CAAC,OAAO,EAAE,CAAC;QAC3B,MAAM,CAAC,IAAI,CAAC,OAAO,EAAE,IAAI,CAAC,CAAC;IAC7B,CAAC;AACH,CAAC,CAAC;AAEF,MAAM,CAAC,MAAM,QAAQ,GAAG,CAAC,OAAe,EAAE,IAA8B,EAAE,EAAE;IAC1E,IAAI,MAAM,CAAC,OAAO,CAAC,OAAO,EAAE,CAAC;QAC3B,MAAM,CAAC,KAAK,CAAC,OAAO,EAAE,IAAI,CAAC,CAAC;IAC9B,CAAC;AACH,CAAC,CAAC;AAEF,MAAM,CAAC,MAAM,QAAQ,GAAG,CACtB,OAAe,EACf,KAAuC,EACvC,EAAE;IACF,IAAI,MAAM,CAAC,OAAO,CAAC,OAAO,EAAE,CAAC;QAC3B,MAAM,SAAS,GACb,KAAK,YAAY,KAAK;YACpB,CAAC,CAAC,EAAE,KAAK,EAAE,KAAK,CAAC,OAAO,EAAE,KAAK,EAAE,KAAK,CAAC,KAAK,EAAE;YAC9C,CAAC,CAAC,KAAK,CAAC;QACZ,MAAM,CAAC,KAAK,CAAC,OAAO,EAAE,SAAS,CAAC,CAAC;IACnC,CAAC;AACH,CAAC,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"parser.d.ts","sourceRoot":"","sources":["../../src/services/parser.ts"],"names":[],"mappings":"AAKA,OAAO,KAAK,EAOV,iBAAiB,EAClB,MAAM,mBAAmB,CAAC;AAyJ3B;;GAEG;AACH,wBAAgB,SAAS,CAAC,IAAI,EAAE,MAAM,GAAG,iBAAiB,EAAE,CAc3D"}
|
|
@@ -0,0 +1,152 @@
|
|
|
1
|
+
import * as cheerio from 'cheerio';
|
|
2
|
+
import { sanitizeText } from '../utils/sanitizer.js';
|
|
3
|
+
import { config } from '../config/index.js';
|
|
4
|
+
function parseHeading($, element) {
|
|
5
|
+
const text = sanitizeText($(element).text());
|
|
6
|
+
if (!text)
|
|
7
|
+
return null;
|
|
8
|
+
return {
|
|
9
|
+
type: 'heading',
|
|
10
|
+
level: parseInt(element.tagName.substring(1), 10),
|
|
11
|
+
text,
|
|
12
|
+
};
|
|
13
|
+
}
|
|
14
|
+
function parseParagraph($, element) {
|
|
15
|
+
const text = sanitizeText($(element).text());
|
|
16
|
+
if (!text || text.length < config.extraction.minParagraphLength)
|
|
17
|
+
return null;
|
|
18
|
+
return { type: 'paragraph', text };
|
|
19
|
+
}
|
|
20
|
+
function parseList($, element) {
|
|
21
|
+
const items = [];
|
|
22
|
+
$(element)
|
|
23
|
+
.find('li')
|
|
24
|
+
.each((_, li) => {
|
|
25
|
+
const text = sanitizeText($(li).text());
|
|
26
|
+
if (text)
|
|
27
|
+
items.push(text);
|
|
28
|
+
});
|
|
29
|
+
if (items.length === 0)
|
|
30
|
+
return null;
|
|
31
|
+
return {
|
|
32
|
+
type: 'list',
|
|
33
|
+
ordered: element.tagName.toLowerCase() === 'ol',
|
|
34
|
+
items,
|
|
35
|
+
};
|
|
36
|
+
}
|
|
37
|
+
function parseCode($, element) {
|
|
38
|
+
const text = $(element).text().trim();
|
|
39
|
+
if (!text)
|
|
40
|
+
return null;
|
|
41
|
+
const className = $(element).attr('class') || '';
|
|
42
|
+
const languageMatch = className.match(/language-(\w+)/);
|
|
43
|
+
return {
|
|
44
|
+
type: 'code',
|
|
45
|
+
language: languageMatch?.[1],
|
|
46
|
+
text,
|
|
47
|
+
};
|
|
48
|
+
}
|
|
49
|
+
function parseTable($, element) {
|
|
50
|
+
const headers = [];
|
|
51
|
+
const rows = [];
|
|
52
|
+
const $table = $(element);
|
|
53
|
+
// Extract headers from thead or first row
|
|
54
|
+
$table.find('thead th, thead td').each((_, cell) => {
|
|
55
|
+
headers.push(sanitizeText($(cell).text()));
|
|
56
|
+
});
|
|
57
|
+
if (headers.length === 0) {
|
|
58
|
+
$table
|
|
59
|
+
.find('tr')
|
|
60
|
+
.first()
|
|
61
|
+
.find('th, td')
|
|
62
|
+
.each((_, cell) => {
|
|
63
|
+
headers.push(sanitizeText($(cell).text()));
|
|
64
|
+
});
|
|
65
|
+
}
|
|
66
|
+
// Extract body rows
|
|
67
|
+
const rowsSelector = headers.length > 0 ? 'tbody tr, tr:not(:first)' : 'tbody tr, tr';
|
|
68
|
+
$table.find(rowsSelector).each((_, row) => {
|
|
69
|
+
const cells = [];
|
|
70
|
+
$(row)
|
|
71
|
+
.find('td, th')
|
|
72
|
+
.each((_, cell) => {
|
|
73
|
+
cells.push(sanitizeText($(cell).text()));
|
|
74
|
+
});
|
|
75
|
+
if (cells.length > 0)
|
|
76
|
+
rows.push(cells);
|
|
77
|
+
});
|
|
78
|
+
if (rows.length === 0)
|
|
79
|
+
return null;
|
|
80
|
+
return {
|
|
81
|
+
type: 'table',
|
|
82
|
+
headers: headers.length > 0 ? headers : undefined,
|
|
83
|
+
rows,
|
|
84
|
+
};
|
|
85
|
+
}
|
|
86
|
+
function parseImage($, element) {
|
|
87
|
+
const src = $(element).attr('src');
|
|
88
|
+
if (!src)
|
|
89
|
+
return null;
|
|
90
|
+
return {
|
|
91
|
+
type: 'image',
|
|
92
|
+
src,
|
|
93
|
+
alt: $(element).attr('alt') || undefined,
|
|
94
|
+
};
|
|
95
|
+
}
|
|
96
|
+
const ELEMENT_PARSERS = {
|
|
97
|
+
h1: parseHeading,
|
|
98
|
+
h2: parseHeading,
|
|
99
|
+
h3: parseHeading,
|
|
100
|
+
h4: parseHeading,
|
|
101
|
+
h5: parseHeading,
|
|
102
|
+
h6: parseHeading,
|
|
103
|
+
p: parseParagraph,
|
|
104
|
+
ul: parseList,
|
|
105
|
+
ol: parseList,
|
|
106
|
+
pre: parseCode,
|
|
107
|
+
code: parseCode,
|
|
108
|
+
table: parseTable,
|
|
109
|
+
img: parseImage,
|
|
110
|
+
};
|
|
111
|
+
function isParseableTag(tag) {
|
|
112
|
+
return tag in ELEMENT_PARSERS;
|
|
113
|
+
}
|
|
114
|
+
function parseElement($, node) {
|
|
115
|
+
if (!('tagName' in node) || typeof node.tagName !== 'string')
|
|
116
|
+
return null;
|
|
117
|
+
const tagName = node.tagName.toLowerCase();
|
|
118
|
+
if (!isParseableTag(tagName))
|
|
119
|
+
return null;
|
|
120
|
+
return ELEMENT_PARSERS[tagName]($, node);
|
|
121
|
+
}
|
|
122
|
+
function filterBlocks(blocks) {
|
|
123
|
+
return blocks.filter((block) => {
|
|
124
|
+
switch (block.type) {
|
|
125
|
+
case 'paragraph':
|
|
126
|
+
case 'heading':
|
|
127
|
+
case 'code':
|
|
128
|
+
return block.text.length > 0;
|
|
129
|
+
case 'list':
|
|
130
|
+
return block.items.length > 0;
|
|
131
|
+
default:
|
|
132
|
+
return true;
|
|
133
|
+
}
|
|
134
|
+
});
|
|
135
|
+
}
|
|
136
|
+
/**
|
|
137
|
+
* Parses HTML content and extracts semantic blocks
|
|
138
|
+
*/
|
|
139
|
+
export function parseHtml(html) {
|
|
140
|
+
const $ = cheerio.load(html);
|
|
141
|
+
const blocks = [];
|
|
142
|
+
$('script, style, noscript, iframe, svg').remove();
|
|
143
|
+
$('body')
|
|
144
|
+
.find('h1, h2, h3, h4, h5, h6, p, ul, ol, pre, code, table, img')
|
|
145
|
+
.each((_, element) => {
|
|
146
|
+
const block = parseElement($, element);
|
|
147
|
+
if (block)
|
|
148
|
+
blocks.push(block);
|
|
149
|
+
});
|
|
150
|
+
return filterBlocks(blocks);
|
|
151
|
+
}
|
|
152
|
+
//# sourceMappingURL=parser.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"parser.js","sourceRoot":"","sources":["../../src/services/parser.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,OAAO,MAAM,SAAS,CAAC;AAGnC,OAAO,EAAE,YAAY,EAAE,MAAM,uBAAuB,CAAC;AACrD,OAAO,EAAE,MAAM,EAAE,MAAM,oBAAoB,CAAC;AAW5C,SAAS,YAAY,CAAC,CAAa,EAAE,OAAgB;IACnD,MAAM,IAAI,GAAG,YAAY,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC;IAC7C,IAAI,CAAC,IAAI;QAAE,OAAO,IAAI,CAAC;IAEvB,OAAO;QACL,IAAI,EAAE,SAAS;QACf,KAAK,EAAE,QAAQ,CAAC,OAAO,CAAC,OAAO,CAAC,SAAS,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC;QACjD,IAAI;KACL,CAAC;AACJ,CAAC;AAED,SAAS,cAAc,CAAC,CAAa,EAAE,OAAgB;IACrD,MAAM,IAAI,GAAG,YAAY,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC;IAC7C,IAAI,CAAC,IAAI,IAAI,IAAI,CAAC,MAAM,GAAG,MAAM,CAAC,UAAU,CAAC,kBAAkB;QAAE,OAAO,IAAI,CAAC;IAE7E,OAAO,EAAE,IAAI,EAAE,WAAW,EAAE,IAAI,EAAE,CAAC;AACrC,CAAC;AAED,SAAS,SAAS,CAAC,CAAa,EAAE,OAAgB;IAChD,MAAM,KAAK,GAAa,EAAE,CAAC;IAC3B,CAAC,CAAC,OAAO,CAAC;SACP,IAAI,CAAC,IAAI,CAAC;SACV,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,EAAE,EAAE;QACd,MAAM,IAAI,GAAG,YAAY,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC;QACxC,IAAI,IAAI;YAAE,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAC7B,CAAC,CAAC,CAAC;IAEL,IAAI,KAAK,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,IAAI,CAAC;IAEpC,OAAO;QACL,IAAI,EAAE,MAAM;QACZ,OAAO,EAAE,OAAO,CAAC,OAAO,CAAC,WAAW,EAAE,KAAK,IAAI;QAC/C,KAAK;KACN,CAAC;AACJ,CAAC;AAED,SAAS,SAAS,CAAC,CAAa,EAAE,OAAgB;IAChD,MAAM,IAAI,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,IAAI,EAAE,CAAC,IAAI,EAAE,CAAC;IACtC,IAAI,CAAC,IAAI;QAAE,OAAO,IAAI,CAAC;IAEvB,MAAM,SAAS,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,IAAI,CAAC,OAAO,CAAC,IAAI,EAAE,CAAC;IACjD,MAAM,aAAa,GAAG,SAAS,CAAC,KAAK,CAAC,gBAAgB,CAAC,CAAC;IAExD,OAAO;QACL,IAAI,EAAE,MAAM;QACZ,QAAQ,EAAE,aAAa,EAAE,CAAC,CAAC,CAAC;QAC5B,IAAI;KACL,CAAC;AACJ,CAAC;AAED,SAAS,UAAU,CAAC,CAAa,EAAE,OAAgB;IACjD,MAAM,OAAO,GAAa,EAAE,CAAC;IAC7B,MAAM,IAAI,GAAe,EAAE,CAAC;IAC5B,MAAM,MAAM,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC;IAE1B,0CAA0C;IAC1C,MAAM,CAAC,IAAI,CAAC,oBAAoB,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,IAAI,EAAE,EAAE;QACjD,OAAO,CAAC,IAAI,CAAC,YAAY,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC,CAAC;IAC7C,CAAC,CAAC,CAAC;IAEH,IAAI,OAAO,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACzB,MAAM;aACH,IAAI,CAAC,IAAI,CAAC;aACV,KAAK,EAAE;aACP,IAAI,CAAC,QAAQ,CAAC;aACd,IAAI,CAAC,CAAC,CAAC,EAAE,IAAI,EAAE,EAAE;YAChB,OAAO,CAAC,IAAI,CAAC,YAAY,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC,CAAC;QAC7C,CAAC,CAAC,CAAC;IACP,CAAC;IAED,oBAAoB;IACpB,MAAM,YAAY,GAChB,OAAO,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,0BAA0B,CAAC,CAAC,CAAC,cAAc,CAAC;IACnE,MAAM,CAAC,IAAI,CAAC,YAAY,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,GAAG,EAAE,EAAE;QACxC,MAAM,KAAK,GAAa,EAAE,CAAC;QAC3B,CAAC,CAAC,GAAG,CAAC;aACH,IAAI,CAAC,QAAQ,CAAC;aACd,IAAI,CAAC,CAAC,CAAC,EAAE,IAAI,EAAE,EAAE;YAChB,KAAK,CAAC,IAAI,CAAC,YAAY,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC,CAAC;QAC3C,CAAC,CAAC,CAAC;QACL,IAAI,KAAK,CAAC,MAAM,GAAG,CAAC;YAAE,IAAI,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;IACzC,CAAC,CAAC,CAAC;IAEH,IAAI,IAAI,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,IAAI,CAAC;IAEnC,OAAO;QACL,IAAI,EAAE,OAAO;QACb,OAAO,EAAE,OAAO,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,SAAS;QACjD,IAAI;KACL,CAAC;AACJ,CAAC;AAED,SAAS,UAAU,CAAC,CAAa,EAAE,OAAgB;IACjD,MAAM,GAAG,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;IACnC,IAAI,CAAC,GAAG;QAAE,OAAO,IAAI,CAAC;IAEtB,OAAO;QACL,IAAI,EAAE,OAAO;QACb,GAAG;QACH,GAAG,EAAE,CAAC,CAAC,OAAO,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,IAAI,SAAS;KACzC,CAAC;AACJ,CAAC;AAED,MAAM,eAAe,GAAG;IACtB,EAAE,EAAE,YAAY;IAChB,EAAE,EAAE,YAAY;IAChB,EAAE,EAAE,YAAY;IAChB,EAAE,EAAE,YAAY;IAChB,EAAE,EAAE,YAAY;IAChB,EAAE,EAAE,YAAY;IAChB,CAAC,EAAE,cAAc;IACjB,EAAE,EAAE,SAAS;IACb,EAAE,EAAE,SAAS;IACb,GAAG,EAAE,SAAS;IACd,IAAI,EAAE,SAAS;IACf,KAAK,EAAE,UAAU;IACjB,GAAG,EAAE,UAAU;CAIhB,CAAC;AAIF,SAAS,cAAc,CAAC,GAAW;IACjC,OAAO,GAAG,IAAI,eAAe,CAAC;AAChC,CAAC;AAED,SAAS,YAAY,CAAC,CAAa,EAAE,IAAa;IAChD,IAAI,CAAC,CAAC,SAAS,IAAI,IAAI,CAAC,IAAI,OAAO,IAAI,CAAC,OAAO,KAAK,QAAQ;QAAE,OAAO,IAAI,CAAC;IAE1E,MAAM,OAAO,GAAG,IAAI,CAAC,OAAO,CAAC,WAAW,EAAE,CAAC;IAC3C,IAAI,CAAC,cAAc,CAAC,OAAO,CAAC;QAAE,OAAO,IAAI,CAAC;IAC1C,OAAO,eAAe,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE,IAAI,CAAC,CAAC;AAC3C,CAAC;AAED,SAAS,YAAY,CAAC,MAA2B;IAC/C,OAAO,MAAM,CAAC,MAAM,CAAC,CAAC,KAAK,EAAE,EAAE;QAC7B,QAAQ,KAAK,CAAC,IAAI,EAAE,CAAC;YACnB,KAAK,WAAW,CAAC;YACjB,KAAK,SAAS,CAAC;YACf,KAAK,MAAM;gBACT,OAAO,KAAK,CAAC,IAAI,CAAC,MAAM,GAAG,CAAC,CAAC;YAC/B,KAAK,MAAM;gBACT,OAAO,KAAK,CAAC,KAAK,CAAC,MAAM,GAAG,CAAC,CAAC;YAChC;gBACE,OAAO,IAAI,CAAC;QAChB,CAAC;IACH,CAAC,CAAC,CAAC;AACL,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,SAAS,CAAC,IAAY;IACpC,MAAM,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAC7B,MAAM,MAAM,GAAwB,EAAE,CAAC;IAEvC,CAAC,CAAC,sCAAsC,CAAC,CAAC,MAAM,EAAE,CAAC;IAEnD,CAAC,CAAC,MAAM,CAAC;SACN,IAAI,CAAC,0DAA0D,CAAC;SAChE,IAAI,CAAC,CAAC,CAAC,EAAE,OAAO,EAAE,EAAE;QACnB,MAAM,KAAK,GAAG,YAAY,CAAC,CAAC,EAAE,OAAO,CAAC,CAAC;QACvC,IAAI,KAAK;YAAE,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;IAChC,CAAC,CAAC,CAAC;IAEL,OAAO,YAAY,CAAC,MAAM,CAAC,CAAC;AAC9B,CAAC"}
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
import type { ContentBlockUnion } from '../types/index.js';
|
|
2
|
+
declare class ParserService {
|
|
3
|
+
/**
|
|
4
|
+
* Parses HTML content and extracts semantic blocks
|
|
5
|
+
*/
|
|
6
|
+
parseHtml(html: string): ContentBlockUnion[];
|
|
7
|
+
/**
|
|
8
|
+
* Parses a single element into a content block
|
|
9
|
+
*/
|
|
10
|
+
private parseElement;
|
|
11
|
+
/**
|
|
12
|
+
* Parses a heading element
|
|
13
|
+
*/
|
|
14
|
+
private parseHeading;
|
|
15
|
+
/**
|
|
16
|
+
* Parses a paragraph element
|
|
17
|
+
*/
|
|
18
|
+
private parseParagraph;
|
|
19
|
+
/**
|
|
20
|
+
* Parses a list element
|
|
21
|
+
*/
|
|
22
|
+
private parseList;
|
|
23
|
+
/**
|
|
24
|
+
* Parses a code element
|
|
25
|
+
*/
|
|
26
|
+
private parseCode;
|
|
27
|
+
/**
|
|
28
|
+
* Parses a table element
|
|
29
|
+
*/
|
|
30
|
+
private parseTable;
|
|
31
|
+
/**
|
|
32
|
+
* Parses an image element
|
|
33
|
+
*/
|
|
34
|
+
private parseImage;
|
|
35
|
+
/**
|
|
36
|
+
* Filters out empty or invalid blocks
|
|
37
|
+
*/
|
|
38
|
+
private filterBlocks;
|
|
39
|
+
}
|
|
40
|
+
export declare const parserService: ParserService;
|
|
41
|
+
export {};
|
|
42
|
+
//# sourceMappingURL=parser.service.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"parser.service.d.ts","sourceRoot":"","sources":["../../src/services/parser.service.ts"],"names":[],"mappings":"AAKA,OAAO,KAAK,EAOV,iBAAiB,EAClB,MAAM,mBAAmB,CAAC;AAE3B,cAAM,aAAa;IACjB;;OAEG;IACH,SAAS,CAAC,IAAI,EAAE,MAAM,GAAG,iBAAiB,EAAE;IAoB5C;;OAEG;IACH,OAAO,CAAC,YAAY;IAuCpB;;OAEG;IACH,OAAO,CAAC,YAAY;IAepB;;OAEG;IACH,OAAO,CAAC,cAAc;IAgBtB;;OAEG;IACH,OAAO,CAAC,SAAS;IAwBjB;;OAEG;IACH,OAAO,CAAC,SAAS;IAkBjB;;OAEG;IACH,OAAO,CAAC,UAAU;IA+ClB;;OAEG;IACH,OAAO,CAAC,UAAU;IAelB;;OAEG;IACH,OAAO,CAAC,YAAY;CAiBrB;AAGD,eAAO,MAAM,aAAa,eAAsB,CAAC"}
|