@j0hanz/superfetch 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (122) hide show
  1. package/README.md +327 -0
  2. package/dist/config/index.d.ts +30 -0
  3. package/dist/config/index.d.ts.map +1 -0
  4. package/dist/config/index.js +42 -0
  5. package/dist/config/index.js.map +1 -0
  6. package/dist/errors/app-error.d.ts +71 -0
  7. package/dist/errors/app-error.d.ts.map +1 -0
  8. package/dist/errors/app-error.js +103 -0
  9. package/dist/errors/app-error.js.map +1 -0
  10. package/dist/errors/index.d.ts +2 -0
  11. package/dist/errors/index.d.ts.map +1 -0
  12. package/dist/errors/index.js +2 -0
  13. package/dist/errors/index.js.map +1 -0
  14. package/dist/index.d.ts +3 -0
  15. package/dist/index.d.ts.map +1 -0
  16. package/dist/index.js +179 -0
  17. package/dist/index.js.map +1 -0
  18. package/dist/middleware/error-handler.d.ts +7 -0
  19. package/dist/middleware/error-handler.d.ts.map +1 -0
  20. package/dist/middleware/error-handler.js +37 -0
  21. package/dist/middleware/error-handler.js.map +1 -0
  22. package/dist/middleware/rate-limiter.d.ts +33 -0
  23. package/dist/middleware/rate-limiter.d.ts.map +1 -0
  24. package/dist/middleware/rate-limiter.js +100 -0
  25. package/dist/middleware/rate-limiter.js.map +1 -0
  26. package/dist/prompts/index.d.ts +6 -0
  27. package/dist/prompts/index.d.ts.map +1 -0
  28. package/dist/prompts/index.js +81 -0
  29. package/dist/prompts/index.js.map +1 -0
  30. package/dist/resources/index.d.ts +6 -0
  31. package/dist/resources/index.d.ts.map +1 -0
  32. package/dist/resources/index.js +44 -0
  33. package/dist/resources/index.js.map +1 -0
  34. package/dist/server.d.ts +8 -0
  35. package/dist/server.d.ts.map +1 -0
  36. package/dist/server.js +39 -0
  37. package/dist/server.js.map +1 -0
  38. package/dist/services/cache.d.ts +16 -0
  39. package/dist/services/cache.d.ts.map +1 -0
  40. package/dist/services/cache.js +63 -0
  41. package/dist/services/cache.js.map +1 -0
  42. package/dist/services/cache.service.d.ts +52 -0
  43. package/dist/services/cache.service.d.ts.map +1 -0
  44. package/dist/services/cache.service.js +113 -0
  45. package/dist/services/cache.service.js.map +1 -0
  46. package/dist/services/extractor.d.ts +32 -0
  47. package/dist/services/extractor.d.ts.map +1 -0
  48. package/dist/services/extractor.js +97 -0
  49. package/dist/services/extractor.js.map +1 -0
  50. package/dist/services/extractor.service.d.ts +18 -0
  51. package/dist/services/extractor.service.d.ts.map +1 -0
  52. package/dist/services/extractor.service.js +75 -0
  53. package/dist/services/extractor.service.js.map +1 -0
  54. package/dist/services/fetcher.d.ts +9 -0
  55. package/dist/services/fetcher.d.ts.map +1 -0
  56. package/dist/services/fetcher.js +100 -0
  57. package/dist/services/fetcher.js.map +1 -0
  58. package/dist/services/fetcher.service.d.ts +18 -0
  59. package/dist/services/fetcher.service.d.ts.map +1 -0
  60. package/dist/services/fetcher.service.js +122 -0
  61. package/dist/services/fetcher.service.js.map +1 -0
  62. package/dist/services/logger.d.ts +5 -0
  63. package/dist/services/logger.d.ts.map +1 -0
  64. package/dist/services/logger.js +48 -0
  65. package/dist/services/logger.js.map +1 -0
  66. package/dist/services/logger.service.d.ts +5 -0
  67. package/dist/services/logger.service.d.ts.map +1 -0
  68. package/dist/services/logger.service.js +57 -0
  69. package/dist/services/logger.service.js.map +1 -0
  70. package/dist/services/parser.d.ts +6 -0
  71. package/dist/services/parser.d.ts.map +1 -0
  72. package/dist/services/parser.js +152 -0
  73. package/dist/services/parser.js.map +1 -0
  74. package/dist/services/parser.service.d.ts +42 -0
  75. package/dist/services/parser.service.d.ts.map +1 -0
  76. package/dist/services/parser.service.js +209 -0
  77. package/dist/services/parser.service.js.map +1 -0
  78. package/dist/tools/handlers/fetch-links.tool.d.ts +20 -0
  79. package/dist/tools/handlers/fetch-links.tool.d.ts.map +1 -0
  80. package/dist/tools/handlers/fetch-links.tool.js +91 -0
  81. package/dist/tools/handlers/fetch-links.tool.js.map +1 -0
  82. package/dist/tools/handlers/fetch-markdown.tool.d.ts +17 -0
  83. package/dist/tools/handlers/fetch-markdown.tool.d.ts.map +1 -0
  84. package/dist/tools/handlers/fetch-markdown.tool.js +99 -0
  85. package/dist/tools/handlers/fetch-markdown.tool.js.map +1 -0
  86. package/dist/tools/handlers/fetch-url.tool.d.ts +17 -0
  87. package/dist/tools/handlers/fetch-url.tool.d.ts.map +1 -0
  88. package/dist/tools/handlers/fetch-url.tool.js +103 -0
  89. package/dist/tools/handlers/fetch-url.tool.js.map +1 -0
  90. package/dist/tools/index.d.ts +7 -0
  91. package/dist/tools/index.d.ts.map +1 -0
  92. package/dist/tools/index.js +83 -0
  93. package/dist/tools/index.js.map +1 -0
  94. package/dist/transformers/jsonl.transformer.d.ts +4 -0
  95. package/dist/transformers/jsonl.transformer.d.ts.map +1 -0
  96. package/dist/transformers/jsonl.transformer.js +42 -0
  97. package/dist/transformers/jsonl.transformer.js.map +1 -0
  98. package/dist/transformers/markdown.transformer.d.ts +4 -0
  99. package/dist/transformers/markdown.transformer.d.ts.map +1 -0
  100. package/dist/transformers/markdown.transformer.js +104 -0
  101. package/dist/transformers/markdown.transformer.js.map +1 -0
  102. package/dist/types/content.types.d.ts +63 -0
  103. package/dist/types/content.types.d.ts.map +1 -0
  104. package/dist/types/content.types.js +2 -0
  105. package/dist/types/content.types.js.map +1 -0
  106. package/dist/types/index.d.ts +3 -0
  107. package/dist/types/index.d.ts.map +1 -0
  108. package/dist/types/index.js +3 -0
  109. package/dist/types/index.js.map +1 -0
  110. package/dist/types/schemas.d.ts +22 -0
  111. package/dist/types/schemas.d.ts.map +1 -0
  112. package/dist/types/schemas.js +5 -0
  113. package/dist/types/schemas.js.map +1 -0
  114. package/dist/utils/sanitizer.d.ts +9 -0
  115. package/dist/utils/sanitizer.d.ts.map +1 -0
  116. package/dist/utils/sanitizer.js +19 -0
  117. package/dist/utils/sanitizer.js.map +1 -0
  118. package/dist/utils/url-validator.d.ts +10 -0
  119. package/dist/utils/url-validator.d.ts.map +1 -0
  120. package/dist/utils/url-validator.js +69 -0
  121. package/dist/utils/url-validator.js.map +1 -0
  122. package/package.json +80 -0
@@ -0,0 +1,122 @@
1
+ import axios from 'axios';
2
+ import { config } from '../config/index.js';
3
+ import { FetchError, TimeoutError } from '../errors/app-error.js';
4
+ // Headers that should not be overridden by custom headers (security)
5
+ const BLOCKED_HEADERS = new Set([
6
+ 'host',
7
+ 'authorization',
8
+ 'cookie',
9
+ 'x-forwarded-for',
10
+ 'x-real-ip',
11
+ 'proxy-authorization',
12
+ ]);
13
+ /**
14
+ * Sanitize custom headers to prevent injection of sensitive headers
15
+ */
16
+ function sanitizeHeaders(headers) {
17
+ if (!headers || Object.keys(headers).length === 0) {
18
+ return undefined;
19
+ }
20
+ const sanitized = {};
21
+ for (const [key, value] of Object.entries(headers)) {
22
+ if (!BLOCKED_HEADERS.has(key.toLowerCase())) {
23
+ sanitized[key] = value;
24
+ }
25
+ }
26
+ return Object.keys(sanitized).length > 0 ? sanitized : undefined;
27
+ }
28
+ /**
29
+ * Calculates exponential backoff delay with jitter
30
+ */
31
+ function calculateBackoff(attempt, maxDelay = 10000) {
32
+ const baseDelay = Math.min(1000 * Math.pow(2, attempt - 1), maxDelay);
33
+ // Add jitter (±25%)
34
+ const jitter = baseDelay * 0.25 * (Math.random() * 2 - 1);
35
+ return Math.round(baseDelay + jitter);
36
+ }
37
+ class FetcherService {
38
+ client;
39
+ constructor() {
40
+ this.client = axios.create({
41
+ timeout: config.fetcher.timeout,
42
+ maxRedirects: config.fetcher.maxRedirects,
43
+ maxContentLength: config.fetcher.maxContentLength,
44
+ headers: {
45
+ 'User-Agent': config.fetcher.userAgent,
46
+ Accept: 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
47
+ 'Accept-Language': 'en-US,en;q=0.5',
48
+ 'Accept-Encoding': 'gzip, deflate, br',
49
+ Connection: 'keep-alive',
50
+ },
51
+ // Validate status - only 2xx responses are valid
52
+ validateStatus: (status) => status >= 200 && status < 300,
53
+ });
54
+ }
55
+ /**
56
+ * Fetches HTML content from a URL
57
+ * @throws {FetchError} on network or HTTP errors
58
+ * @throws {TimeoutError} on request timeout
59
+ */
60
+ async fetchUrl(url, customHeaders) {
61
+ const requestConfig = {
62
+ method: 'GET',
63
+ url,
64
+ responseType: 'text',
65
+ };
66
+ // Add sanitized custom headers if provided
67
+ const sanitized = sanitizeHeaders(customHeaders);
68
+ if (sanitized) {
69
+ requestConfig.headers = { ...requestConfig.headers, ...sanitized };
70
+ }
71
+ try {
72
+ const response = await this.client.request(requestConfig);
73
+ return response.data;
74
+ }
75
+ catch (error) {
76
+ if (!axios.isAxiosError(error)) {
77
+ throw new FetchError(`Unexpected error: ${error instanceof Error ? error.message : 'Unknown'}`, url);
78
+ }
79
+ const axiosError = error;
80
+ if (axiosError.code === 'ECONNABORTED' || axiosError.code === 'ETIMEDOUT') {
81
+ throw new TimeoutError(config.fetcher.timeout, true);
82
+ }
83
+ if (axiosError.response) {
84
+ throw new FetchError(`HTTP ${axiosError.response.status}: ${axiosError.response.statusText}`, url, axiosError.response.status);
85
+ }
86
+ if (axiosError.request) {
87
+ throw new FetchError(`Network error: Could not reach ${url}`, url);
88
+ }
89
+ throw new FetchError(axiosError.message, url);
90
+ }
91
+ }
92
+ /**
93
+ * Fetches URL with exponential backoff retry logic
94
+ * @throws {FetchError} after all retries exhausted
95
+ */
96
+ async fetchUrlWithRetry(url, customHeaders, maxRetries = 3) {
97
+ let lastError;
98
+ for (let attempt = 1; attempt <= maxRetries; attempt++) {
99
+ try {
100
+ return await this.fetchUrl(url, customHeaders);
101
+ }
102
+ catch (error) {
103
+ lastError = error instanceof Error ? error : new Error('Unknown error');
104
+ // Don't retry on client errors (4xx) except 429 (rate limited)
105
+ if (error instanceof FetchError && error.httpStatus) {
106
+ const status = error.httpStatus;
107
+ if (status >= 400 && status < 500 && status !== 429) {
108
+ throw error;
109
+ }
110
+ }
111
+ if (attempt < maxRetries) {
112
+ const delay = calculateBackoff(attempt);
113
+ await new Promise((resolve) => setTimeout(resolve, delay));
114
+ }
115
+ }
116
+ }
117
+ throw new FetchError(`Failed after ${maxRetries} attempts: ${lastError?.message ?? 'Unknown error'}`, url);
118
+ }
119
+ }
120
+ // Singleton instance
121
+ export const fetcherService = new FetcherService();
122
+ //# sourceMappingURL=fetcher.service.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"fetcher.service.js","sourceRoot":"","sources":["../../src/services/fetcher.service.ts"],"names":[],"mappings":"AAAA,OAAO,KAAwD,MAAM,OAAO,CAAC;AAC7E,OAAO,EAAE,MAAM,EAAE,MAAM,oBAAoB,CAAC;AAC5C,OAAO,EAAE,UAAU,EAAE,YAAY,EAAE,MAAM,wBAAwB,CAAC;AAElE,qEAAqE;AACrE,MAAM,eAAe,GAAG,IAAI,GAAG,CAAC;IAC9B,MAAM;IACN,eAAe;IACf,QAAQ;IACR,iBAAiB;IACjB,WAAW;IACX,qBAAqB;CACtB,CAAC,CAAC;AAEH;;GAEG;AACH,SAAS,eAAe,CACtB,OAAgC;IAEhC,IAAI,CAAC,OAAO,IAAI,MAAM,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QAClD,OAAO,SAAS,CAAC;IACnB,CAAC;IAED,MAAM,SAAS,GAA2B,EAAE,CAAC;IAE7C,KAAK,MAAM,CAAC,GAAG,EAAE,KAAK,CAAC,IAAI,MAAM,CAAC,OAAO,CAAC,OAAO,CAAC,EAAE,CAAC;QACnD,IAAI,CAAC,eAAe,CAAC,GAAG,CAAC,GAAG,CAAC,WAAW,EAAE,CAAC,EAAE,CAAC;YAC5C,SAAS,CAAC,GAAG,CAAC,GAAG,KAAK,CAAC;QACzB,CAAC;IACH,CAAC;IAED,OAAO,MAAM,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,SAAS,CAAC;AACnE,CAAC;AAED;;GAEG;AACH,SAAS,gBAAgB,CAAC,OAAe,EAAE,QAAQ,GAAG,KAAK;IACzD,MAAM,SAAS,GAAG,IAAI,CAAC,GAAG,CAAC,IAAI,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,OAAO,GAAG,CAAC,CAAC,EAAE,QAAQ,CAAC,CAAC;IACtE,oBAAoB;IACpB,MAAM,MAAM,GAAG,SAAS,GAAG,IAAI,GAAG,CAAC,IAAI,CAAC,MAAM,EAAE,GAAG,CAAC,GAAG,CAAC,CAAC,CAAC;IAC1D,OAAO,IAAI,CAAC,KAAK,CAAC,SAAS,GAAG,MAAM,CAAC,CAAC;AACxC,CAAC;AAED,MAAM,cAAc;IACD,MAAM,CAAgB;IAEvC;QACE,IAAI,CAAC,MAAM,GAAG,KAAK,CAAC,MAAM,CAAC;YACzB,OAAO,EAAE,MAAM,CAAC,OAAO,CAAC,OAAO;YAC/B,YAAY,EAAE,MAAM,CAAC,OAAO,CAAC,YAAY;YACzC,gBAAgB,EAAE,MAAM,CAAC,OAAO,CAAC,gBAAgB;YACjD,OAAO,EAAE;gBACP,YAAY,EAAE,MAAM,CAAC,OAAO,CAAC,SAAS;gBACtC,MAAM,EACJ,4EAA4E;gBAC9E,iBAAiB,EAAE,gBAAgB;gBACnC,iBAAiB,EAAE,mBAAmB;gBACtC,UAAU,EAAE,YAAY;aACzB;YACD,iDAAiD;YACjD,cAAc,EAAE,CAAC,MAAM,EAAE,EAAE,CAAC,MAAM,IAAI,GAAG,IAAI,MAAM,GAAG,GAAG;SAC1D,CAAC,CAAC;IACL,CAAC;IAED;;;;OAIG;IACH,KAAK,CAAC,QAAQ,CACZ,GAAW,EACX,aAAsC;QAEtC,MAAM,aAAa,GAAuB;YACxC,MAAM,EAAE,KAAK;YACb,GAAG;YACH,YAAY,EAAE,MAAM;SACrB,CAAC;QAEF,2CAA2C;QAC3C,MAAM,SAAS,GAAG,eAAe,CAAC,aAAa,CAAC,CAAC;QACjD,IAAI,SAAS,EAAE,CAAC;YACd,aAAa,CAAC,OAAO,GAAG,EAAE,GAAG,aAAa,CAAC,OAAO,EAAE,GAAG,SAAS,EAAE,CAAC;QACrE,CAAC;QAED,IAAI,CAAC;YACH,MAAM,QAAQ,GAAG,MAAM,IAAI,CAAC,MAAM,CAAC,OAAO,CAAS,aAAa,CAAC,CAAC;YAClE,OAAO,QAAQ,CAAC,IAAI,CAAC;QACvB,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,IAAI,CAAC,KAAK,CAAC,YAAY,CAAC,KAAK,CAAC,EAAE,CAAC;gBAC/B,MAAM,IAAI,UAAU,CAClB,qBAAqB,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,SAAS,EAAE,EACzE,GAAG,CACJ,CAAC;YACJ,CAAC;YAED,MAAM,UAAU,GAAG,KAAmB,CAAC;YAEvC,IAAI,UAAU,CAAC,IAAI,KAAK,cAAc,IAAI,UAAU,CAAC,IAAI,KAAK,WAAW,EAAE,CAAC;gBAC1E,MAAM,IAAI,YAAY,CAAC,MAAM,CAAC,OAAO,CAAC,OAAO,EAAE,IAAI,CAAC,CAAC;YACvD,CAAC;YAED,IAAI,UAAU,CAAC,QAAQ,EAAE,CAAC;gBACxB,MAAM,IAAI,UAAU,CAClB,QAAQ,UAAU,CAAC,QAAQ,CAAC,MAAM,KAAK,UAAU,CAAC,QAAQ,CAAC,UAAU,EAAE,EACvE,GAAG,EACH,UAAU,CAAC,QAAQ,CAAC,MAAM,CAC3B,CAAC;YACJ,CAAC;YAED,IAAI,UAAU,CAAC,OAAO,EAAE,CAAC;gBACvB,MAAM,IAAI,UAAU,CAAC,kCAAkC,GAAG,EAAE,EAAE,GAAG,CAAC,CAAC;YACrE,CAAC;YAED,MAAM,IAAI,UAAU,CAAC,UAAU,CAAC,OAAO,EAAE,GAAG,CAAC,CAAC;QAChD,CAAC;IACH,CAAC;IAED;;;OAGG;IACH,KAAK,CAAC,iBAAiB,CACrB,GAAW,EACX,aAAsC,EACtC,UAAU,GAAG,CAAC;QAEd,IAAI,SAA4B,CAAC;QAEjC,KAAK,IAAI,OAAO,GAAG,CAAC,EAAE,OAAO,IAAI,UAAU,EAAE,OAAO,EAAE,EAAE,CAAC;YACvD,IAAI,CAAC;gBACH,OAAO,MAAM,IAAI,CAAC,QAAQ,CAAC,GAAG,EAAE,aAAa,CAAC,CAAC;YACjD,CAAC;YAAC,OAAO,KAAK,EAAE,CAAC;gBACf,SAAS,GAAG,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,IAAI,KAAK,CAAC,eAAe,CAAC,CAAC;gBAExE,+DAA+D;gBAC/D,IAAI,KAAK,YAAY,UAAU,IAAI,KAAK,CAAC,UAAU,EAAE,CAAC;oBACpD,MAAM,MAAM,GAAG,KAAK,CAAC,UAAU,CAAC;oBAChC,IAAI,MAAM,IAAI,GAAG,IAAI,MAAM,GAAG,GAAG,IAAI,MAAM,KAAK,GAAG,EAAE,CAAC;wBACpD,MAAM,KAAK,CAAC;oBACd,CAAC;gBACH,CAAC;gBAED,IAAI,OAAO,GAAG,UAAU,EAAE,CAAC;oBACzB,MAAM,KAAK,GAAG,gBAAgB,CAAC,OAAO,CAAC,CAAC;oBACxC,MAAM,IAAI,OAAO,CAAC,CAAC,OAAO,EAAE,EAAE,CAAC,UAAU,CAAC,OAAO,EAAE,KAAK,CAAC,CAAC,CAAC;gBAC7D,CAAC;YACH,CAAC;QACH,CAAC;QAED,MAAM,IAAI,UAAU,CAClB,gBAAgB,UAAU,cAAc,SAAS,EAAE,OAAO,IAAI,eAAe,EAAE,EAC/E,GAAG,CACJ,CAAC;IACJ,CAAC;CACF;AAED,qBAAqB;AACrB,MAAM,CAAC,MAAM,cAAc,GAAG,IAAI,cAAc,EAAE,CAAC"}
@@ -0,0 +1,5 @@
1
+ export declare function logInfo(message: string, meta?: Record<string, unknown>): void;
2
+ export declare function logWarn(message: string, meta?: Record<string, unknown>): void;
3
+ export declare function logDebug(message: string, meta?: Record<string, unknown>): void;
4
+ export declare function logError(message: string, error?: Error | Record<string, unknown>): void;
5
+ //# sourceMappingURL=logger.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"logger.d.ts","sourceRoot":"","sources":["../../src/services/logger.ts"],"names":[],"mappings":"AAyCA,wBAAgB,OAAO,CAAC,OAAO,EAAE,MAAM,EAAE,IAAI,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,GAAG,IAAI,CAE7E;AAED,wBAAgB,OAAO,CAAC,OAAO,EAAE,MAAM,EAAE,IAAI,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,GAAG,IAAI,CAE7E;AAED,wBAAgB,QAAQ,CAAC,OAAO,EAAE,MAAM,EAAE,IAAI,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,GAAG,IAAI,CAE9E;AAED,wBAAgB,QAAQ,CACtB,OAAO,EAAE,MAAM,EACf,KAAK,CAAC,EAAE,KAAK,GAAG,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,GACtC,IAAI,CAQN"}
@@ -0,0 +1,48 @@
1
+ import winston from 'winston';
2
+ import { config } from '../config/index.js';
3
+ import path from 'path';
4
+ const logsDir = path.join(process.cwd(), 'logs');
5
+ const logger = winston.createLogger({
6
+ level: config.logging.level,
7
+ format: winston.format.combine(winston.format.timestamp({ format: 'YYYY-MM-DD HH:mm:ss' }), winston.format.errors({ stack: true }), winston.format.splat(), winston.format.json()),
8
+ defaultMeta: { service: 'superfetch' },
9
+ transports: [
10
+ new winston.transports.File({
11
+ filename: path.join(logsDir, 'combined.log'),
12
+ maxsize: 5242880,
13
+ maxFiles: 5,
14
+ }),
15
+ new winston.transports.File({
16
+ filename: path.join(logsDir, 'error.log'),
17
+ level: 'error',
18
+ maxsize: 5242880,
19
+ maxFiles: 5,
20
+ }),
21
+ ],
22
+ });
23
+ if (process.env.NODE_ENV !== 'production') {
24
+ logger.add(new winston.transports.Console({
25
+ format: winston.format.combine(winston.format.colorize(), winston.format.simple()),
26
+ }));
27
+ }
28
+ export function logInfo(message, meta) {
29
+ if (config.logging.enabled)
30
+ logger.info(message, meta);
31
+ }
32
+ export function logWarn(message, meta) {
33
+ if (config.logging.enabled)
34
+ logger.warn(message, meta);
35
+ }
36
+ export function logDebug(message, meta) {
37
+ if (config.logging.enabled)
38
+ logger.debug(message, meta);
39
+ }
40
+ export function logError(message, error) {
41
+ if (!config.logging.enabled)
42
+ return;
43
+ const errorMeta = error instanceof Error
44
+ ? { error: error.message, stack: error.stack }
45
+ : error;
46
+ logger.error(message, errorMeta);
47
+ }
48
+ //# sourceMappingURL=logger.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"logger.js","sourceRoot":"","sources":["../../src/services/logger.ts"],"names":[],"mappings":"AAAA,OAAO,OAAO,MAAM,SAAS,CAAC;AAC9B,OAAO,EAAE,MAAM,EAAE,MAAM,oBAAoB,CAAC;AAC5C,OAAO,IAAI,MAAM,MAAM,CAAC;AAExB,MAAM,OAAO,GAAG,IAAI,CAAC,IAAI,CAAC,OAAO,CAAC,GAAG,EAAE,EAAE,MAAM,CAAC,CAAC;AAEjD,MAAM,MAAM,GAAG,OAAO,CAAC,YAAY,CAAC;IAClC,KAAK,EAAE,MAAM,CAAC,OAAO,CAAC,KAAK;IAC3B,MAAM,EAAE,OAAO,CAAC,MAAM,CAAC,OAAO,CAC5B,OAAO,CAAC,MAAM,CAAC,SAAS,CAAC,EAAE,MAAM,EAAE,qBAAqB,EAAE,CAAC,EAC3D,OAAO,CAAC,MAAM,CAAC,MAAM,CAAC,EAAE,KAAK,EAAE,IAAI,EAAE,CAAC,EACtC,OAAO,CAAC,MAAM,CAAC,KAAK,EAAE,EACtB,OAAO,CAAC,MAAM,CAAC,IAAI,EAAE,CACtB;IACD,WAAW,EAAE,EAAE,OAAO,EAAE,YAAY,EAAE;IACtC,UAAU,EAAE;QACV,IAAI,OAAO,CAAC,UAAU,CAAC,IAAI,CAAC;YAC1B,QAAQ,EAAE,IAAI,CAAC,IAAI,CAAC,OAAO,EAAE,cAAc,CAAC;YAC5C,OAAO,EAAE,OAAO;YAChB,QAAQ,EAAE,CAAC;SACZ,CAAC;QACF,IAAI,OAAO,CAAC,UAAU,CAAC,IAAI,CAAC;YAC1B,QAAQ,EAAE,IAAI,CAAC,IAAI,CAAC,OAAO,EAAE,WAAW,CAAC;YACzC,KAAK,EAAE,OAAO;YACd,OAAO,EAAE,OAAO;YAChB,QAAQ,EAAE,CAAC;SACZ,CAAC;KACH;CACF,CAAC,CAAC;AAEH,IAAI,OAAO,CAAC,GAAG,CAAC,QAAQ,KAAK,YAAY,EAAE,CAAC;IAC1C,MAAM,CAAC,GAAG,CACR,IAAI,OAAO,CAAC,UAAU,CAAC,OAAO,CAAC;QAC7B,MAAM,EAAE,OAAO,CAAC,MAAM,CAAC,OAAO,CAC5B,OAAO,CAAC,MAAM,CAAC,QAAQ,EAAE,EACzB,OAAO,CAAC,MAAM,CAAC,MAAM,EAAE,CACxB;KACF,CAAC,CACH,CAAC;AACJ,CAAC;AAED,MAAM,UAAU,OAAO,CAAC,OAAe,EAAE,IAA8B;IACrE,IAAI,MAAM,CAAC,OAAO,CAAC,OAAO;QAAE,MAAM,CAAC,IAAI,CAAC,OAAO,EAAE,IAAI,CAAC,CAAC;AACzD,CAAC;AAED,MAAM,UAAU,OAAO,CAAC,OAAe,EAAE,IAA8B;IACrE,IAAI,MAAM,CAAC,OAAO,CAAC,OAAO;QAAE,MAAM,CAAC,IAAI,CAAC,OAAO,EAAE,IAAI,CAAC,CAAC;AACzD,CAAC;AAED,MAAM,UAAU,QAAQ,CAAC,OAAe,EAAE,IAA8B;IACtE,IAAI,MAAM,CAAC,OAAO,CAAC,OAAO;QAAE,MAAM,CAAC,KAAK,CAAC,OAAO,EAAE,IAAI,CAAC,CAAC;AAC1D,CAAC;AAED,MAAM,UAAU,QAAQ,CACtB,OAAe,EACf,KAAuC;IAEvC,IAAI,CAAC,MAAM,CAAC,OAAO,CAAC,OAAO;QAAE,OAAO;IAEpC,MAAM,SAAS,GACb,KAAK,YAAY,KAAK;QACpB,CAAC,CAAC,EAAE,KAAK,EAAE,KAAK,CAAC,OAAO,EAAE,KAAK,EAAE,KAAK,CAAC,KAAK,EAAE;QAC9C,CAAC,CAAC,KAAK,CAAC;IACZ,MAAM,CAAC,KAAK,CAAC,OAAO,EAAE,SAAS,CAAC,CAAC;AACnC,CAAC"}
@@ -0,0 +1,5 @@
1
+ export declare const logInfo: (message: string, meta?: Record<string, unknown>) => void;
2
+ export declare const logWarn: (message: string, meta?: Record<string, unknown>) => void;
3
+ export declare const logDebug: (message: string, meta?: Record<string, unknown>) => void;
4
+ export declare const logError: (message: string, error?: Error | Record<string, unknown>) => void;
5
+ //# sourceMappingURL=logger.service.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"logger.service.d.ts","sourceRoot":"","sources":["../../src/services/logger.service.ts"],"names":[],"mappings":"AA+CA,eAAO,MAAM,OAAO,GAAI,SAAS,MAAM,EAAE,OAAO,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,SAItE,CAAC;AAEF,eAAO,MAAM,OAAO,GAAI,SAAS,MAAM,EAAE,OAAO,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,SAItE,CAAC;AAEF,eAAO,MAAM,QAAQ,GAAI,SAAS,MAAM,EAAE,OAAO,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,SAIvE,CAAC;AAEF,eAAO,MAAM,QAAQ,GACnB,SAAS,MAAM,EACf,QAAQ,KAAK,GAAG,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,SASxC,CAAC"}
@@ -0,0 +1,57 @@
1
+ import winston from 'winston';
2
+ import { config } from '../config/index.js';
3
+ import path from 'path';
4
+ // Create logs directory path
5
+ const logsDir = path.join(process.cwd(), 'logs');
6
+ // Create logger instance
7
+ const logger = winston.createLogger({
8
+ level: config.logging.level,
9
+ format: winston.format.combine(winston.format.timestamp({ format: 'YYYY-MM-DD HH:mm:ss' }), winston.format.errors({ stack: true }), winston.format.splat(), winston.format.json()),
10
+ defaultMeta: { service: 'superfetch' },
11
+ transports: [
12
+ // Write all logs to combined.log
13
+ new winston.transports.File({
14
+ filename: path.join(logsDir, 'combined.log'),
15
+ maxsize: 5242880, // 5MB
16
+ maxFiles: 5,
17
+ }),
18
+ // Write error logs to error.log
19
+ new winston.transports.File({
20
+ filename: path.join(logsDir, 'error.log'),
21
+ level: 'error',
22
+ maxsize: 5242880, // 5MB
23
+ maxFiles: 5,
24
+ }),
25
+ ],
26
+ });
27
+ // Add console transport in development
28
+ if (process.env.NODE_ENV !== 'production') {
29
+ logger.add(new winston.transports.Console({
30
+ format: winston.format.combine(winston.format.colorize(), winston.format.simple()),
31
+ }));
32
+ }
33
+ // Export convenience methods
34
+ export const logInfo = (message, meta) => {
35
+ if (config.logging.enabled) {
36
+ logger.info(message, meta);
37
+ }
38
+ };
39
+ export const logWarn = (message, meta) => {
40
+ if (config.logging.enabled) {
41
+ logger.warn(message, meta);
42
+ }
43
+ };
44
+ export const logDebug = (message, meta) => {
45
+ if (config.logging.enabled) {
46
+ logger.debug(message, meta);
47
+ }
48
+ };
49
+ export const logError = (message, error) => {
50
+ if (config.logging.enabled) {
51
+ const errorMeta = error instanceof Error
52
+ ? { error: error.message, stack: error.stack }
53
+ : error;
54
+ logger.error(message, errorMeta);
55
+ }
56
+ };
57
+ //# sourceMappingURL=logger.service.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"logger.service.js","sourceRoot":"","sources":["../../src/services/logger.service.ts"],"names":[],"mappings":"AAAA,OAAO,OAAO,MAAM,SAAS,CAAC;AAC9B,OAAO,EAAE,MAAM,EAAE,MAAM,oBAAoB,CAAC;AAC5C,OAAO,IAAI,MAAM,MAAM,CAAC;AAExB,6BAA6B;AAC7B,MAAM,OAAO,GAAG,IAAI,CAAC,IAAI,CAAC,OAAO,CAAC,GAAG,EAAE,EAAE,MAAM,CAAC,CAAC;AAEjD,yBAAyB;AACzB,MAAM,MAAM,GAAG,OAAO,CAAC,YAAY,CAAC;IAClC,KAAK,EAAE,MAAM,CAAC,OAAO,CAAC,KAAK;IAC3B,MAAM,EAAE,OAAO,CAAC,MAAM,CAAC,OAAO,CAC5B,OAAO,CAAC,MAAM,CAAC,SAAS,CAAC,EAAE,MAAM,EAAE,qBAAqB,EAAE,CAAC,EAC3D,OAAO,CAAC,MAAM,CAAC,MAAM,CAAC,EAAE,KAAK,EAAE,IAAI,EAAE,CAAC,EACtC,OAAO,CAAC,MAAM,CAAC,KAAK,EAAE,EACtB,OAAO,CAAC,MAAM,CAAC,IAAI,EAAE,CACtB;IACD,WAAW,EAAE,EAAE,OAAO,EAAE,YAAY,EAAE;IACtC,UAAU,EAAE;QACV,iCAAiC;QACjC,IAAI,OAAO,CAAC,UAAU,CAAC,IAAI,CAAC;YAC1B,QAAQ,EAAE,IAAI,CAAC,IAAI,CAAC,OAAO,EAAE,cAAc,CAAC;YAC5C,OAAO,EAAE,OAAO,EAAE,MAAM;YACxB,QAAQ,EAAE,CAAC;SACZ,CAAC;QACF,gCAAgC;QAChC,IAAI,OAAO,CAAC,UAAU,CAAC,IAAI,CAAC;YAC1B,QAAQ,EAAE,IAAI,CAAC,IAAI,CAAC,OAAO,EAAE,WAAW,CAAC;YACzC,KAAK,EAAE,OAAO;YACd,OAAO,EAAE,OAAO,EAAE,MAAM;YACxB,QAAQ,EAAE,CAAC;SACZ,CAAC;KACH;CACF,CAAC,CAAC;AAEH,uCAAuC;AACvC,IAAI,OAAO,CAAC,GAAG,CAAC,QAAQ,KAAK,YAAY,EAAE,CAAC;IAC1C,MAAM,CAAC,GAAG,CACR,IAAI,OAAO,CAAC,UAAU,CAAC,OAAO,CAAC;QAC7B,MAAM,EAAE,OAAO,CAAC,MAAM,CAAC,OAAO,CAC5B,OAAO,CAAC,MAAM,CAAC,QAAQ,EAAE,EACzB,OAAO,CAAC,MAAM,CAAC,MAAM,EAAE,CACxB;KACF,CAAC,CACH,CAAC;AACJ,CAAC;AAED,6BAA6B;AAC7B,MAAM,CAAC,MAAM,OAAO,GAAG,CAAC,OAAe,EAAE,IAA8B,EAAE,EAAE;IACzE,IAAI,MAAM,CAAC,OAAO,CAAC,OAAO,EAAE,CAAC;QAC3B,MAAM,CAAC,IAAI,CAAC,OAAO,EAAE,IAAI,CAAC,CAAC;IAC7B,CAAC;AACH,CAAC,CAAC;AAEF,MAAM,CAAC,MAAM,OAAO,GAAG,CAAC,OAAe,EAAE,IAA8B,EAAE,EAAE;IACzE,IAAI,MAAM,CAAC,OAAO,CAAC,OAAO,EAAE,CAAC;QAC3B,MAAM,CAAC,IAAI,CAAC,OAAO,EAAE,IAAI,CAAC,CAAC;IAC7B,CAAC;AACH,CAAC,CAAC;AAEF,MAAM,CAAC,MAAM,QAAQ,GAAG,CAAC,OAAe,EAAE,IAA8B,EAAE,EAAE;IAC1E,IAAI,MAAM,CAAC,OAAO,CAAC,OAAO,EAAE,CAAC;QAC3B,MAAM,CAAC,KAAK,CAAC,OAAO,EAAE,IAAI,CAAC,CAAC;IAC9B,CAAC;AACH,CAAC,CAAC;AAEF,MAAM,CAAC,MAAM,QAAQ,GAAG,CACtB,OAAe,EACf,KAAuC,EACvC,EAAE;IACF,IAAI,MAAM,CAAC,OAAO,CAAC,OAAO,EAAE,CAAC;QAC3B,MAAM,SAAS,GACb,KAAK,YAAY,KAAK;YACpB,CAAC,CAAC,EAAE,KAAK,EAAE,KAAK,CAAC,OAAO,EAAE,KAAK,EAAE,KAAK,CAAC,KAAK,EAAE;YAC9C,CAAC,CAAC,KAAK,CAAC;QACZ,MAAM,CAAC,KAAK,CAAC,OAAO,EAAE,SAAS,CAAC,CAAC;IACnC,CAAC;AACH,CAAC,CAAC"}
@@ -0,0 +1,6 @@
1
+ import type { ContentBlockUnion } from '../types/index.js';
2
+ /**
3
+ * Parses HTML content and extracts semantic blocks
4
+ */
5
+ export declare function parseHtml(html: string): ContentBlockUnion[];
6
+ //# sourceMappingURL=parser.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"parser.d.ts","sourceRoot":"","sources":["../../src/services/parser.ts"],"names":[],"mappings":"AAKA,OAAO,KAAK,EAOV,iBAAiB,EAClB,MAAM,mBAAmB,CAAC;AAyJ3B;;GAEG;AACH,wBAAgB,SAAS,CAAC,IAAI,EAAE,MAAM,GAAG,iBAAiB,EAAE,CAc3D"}
@@ -0,0 +1,152 @@
1
+ import * as cheerio from 'cheerio';
2
+ import { sanitizeText } from '../utils/sanitizer.js';
3
+ import { config } from '../config/index.js';
4
+ function parseHeading($, element) {
5
+ const text = sanitizeText($(element).text());
6
+ if (!text)
7
+ return null;
8
+ return {
9
+ type: 'heading',
10
+ level: parseInt(element.tagName.substring(1), 10),
11
+ text,
12
+ };
13
+ }
14
+ function parseParagraph($, element) {
15
+ const text = sanitizeText($(element).text());
16
+ if (!text || text.length < config.extraction.minParagraphLength)
17
+ return null;
18
+ return { type: 'paragraph', text };
19
+ }
20
+ function parseList($, element) {
21
+ const items = [];
22
+ $(element)
23
+ .find('li')
24
+ .each((_, li) => {
25
+ const text = sanitizeText($(li).text());
26
+ if (text)
27
+ items.push(text);
28
+ });
29
+ if (items.length === 0)
30
+ return null;
31
+ return {
32
+ type: 'list',
33
+ ordered: element.tagName.toLowerCase() === 'ol',
34
+ items,
35
+ };
36
+ }
37
+ function parseCode($, element) {
38
+ const text = $(element).text().trim();
39
+ if (!text)
40
+ return null;
41
+ const className = $(element).attr('class') || '';
42
+ const languageMatch = className.match(/language-(\w+)/);
43
+ return {
44
+ type: 'code',
45
+ language: languageMatch?.[1],
46
+ text,
47
+ };
48
+ }
49
+ function parseTable($, element) {
50
+ const headers = [];
51
+ const rows = [];
52
+ const $table = $(element);
53
+ // Extract headers from thead or first row
54
+ $table.find('thead th, thead td').each((_, cell) => {
55
+ headers.push(sanitizeText($(cell).text()));
56
+ });
57
+ if (headers.length === 0) {
58
+ $table
59
+ .find('tr')
60
+ .first()
61
+ .find('th, td')
62
+ .each((_, cell) => {
63
+ headers.push(sanitizeText($(cell).text()));
64
+ });
65
+ }
66
+ // Extract body rows
67
+ const rowsSelector = headers.length > 0 ? 'tbody tr, tr:not(:first)' : 'tbody tr, tr';
68
+ $table.find(rowsSelector).each((_, row) => {
69
+ const cells = [];
70
+ $(row)
71
+ .find('td, th')
72
+ .each((_, cell) => {
73
+ cells.push(sanitizeText($(cell).text()));
74
+ });
75
+ if (cells.length > 0)
76
+ rows.push(cells);
77
+ });
78
+ if (rows.length === 0)
79
+ return null;
80
+ return {
81
+ type: 'table',
82
+ headers: headers.length > 0 ? headers : undefined,
83
+ rows,
84
+ };
85
+ }
86
+ function parseImage($, element) {
87
+ const src = $(element).attr('src');
88
+ if (!src)
89
+ return null;
90
+ return {
91
+ type: 'image',
92
+ src,
93
+ alt: $(element).attr('alt') || undefined,
94
+ };
95
+ }
96
+ const ELEMENT_PARSERS = {
97
+ h1: parseHeading,
98
+ h2: parseHeading,
99
+ h3: parseHeading,
100
+ h4: parseHeading,
101
+ h5: parseHeading,
102
+ h6: parseHeading,
103
+ p: parseParagraph,
104
+ ul: parseList,
105
+ ol: parseList,
106
+ pre: parseCode,
107
+ code: parseCode,
108
+ table: parseTable,
109
+ img: parseImage,
110
+ };
111
+ function isParseableTag(tag) {
112
+ return tag in ELEMENT_PARSERS;
113
+ }
114
+ function parseElement($, node) {
115
+ if (!('tagName' in node) || typeof node.tagName !== 'string')
116
+ return null;
117
+ const tagName = node.tagName.toLowerCase();
118
+ if (!isParseableTag(tagName))
119
+ return null;
120
+ return ELEMENT_PARSERS[tagName]($, node);
121
+ }
122
+ function filterBlocks(blocks) {
123
+ return blocks.filter((block) => {
124
+ switch (block.type) {
125
+ case 'paragraph':
126
+ case 'heading':
127
+ case 'code':
128
+ return block.text.length > 0;
129
+ case 'list':
130
+ return block.items.length > 0;
131
+ default:
132
+ return true;
133
+ }
134
+ });
135
+ }
136
+ /**
137
+ * Parses HTML content and extracts semantic blocks
138
+ */
139
+ export function parseHtml(html) {
140
+ const $ = cheerio.load(html);
141
+ const blocks = [];
142
+ $('script, style, noscript, iframe, svg').remove();
143
+ $('body')
144
+ .find('h1, h2, h3, h4, h5, h6, p, ul, ol, pre, code, table, img')
145
+ .each((_, element) => {
146
+ const block = parseElement($, element);
147
+ if (block)
148
+ blocks.push(block);
149
+ });
150
+ return filterBlocks(blocks);
151
+ }
152
+ //# sourceMappingURL=parser.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"parser.js","sourceRoot":"","sources":["../../src/services/parser.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,OAAO,MAAM,SAAS,CAAC;AAGnC,OAAO,EAAE,YAAY,EAAE,MAAM,uBAAuB,CAAC;AACrD,OAAO,EAAE,MAAM,EAAE,MAAM,oBAAoB,CAAC;AAW5C,SAAS,YAAY,CAAC,CAAa,EAAE,OAAgB;IACnD,MAAM,IAAI,GAAG,YAAY,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC;IAC7C,IAAI,CAAC,IAAI;QAAE,OAAO,IAAI,CAAC;IAEvB,OAAO;QACL,IAAI,EAAE,SAAS;QACf,KAAK,EAAE,QAAQ,CAAC,OAAO,CAAC,OAAO,CAAC,SAAS,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC;QACjD,IAAI;KACL,CAAC;AACJ,CAAC;AAED,SAAS,cAAc,CAAC,CAAa,EAAE,OAAgB;IACrD,MAAM,IAAI,GAAG,YAAY,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC;IAC7C,IAAI,CAAC,IAAI,IAAI,IAAI,CAAC,MAAM,GAAG,MAAM,CAAC,UAAU,CAAC,kBAAkB;QAAE,OAAO,IAAI,CAAC;IAE7E,OAAO,EAAE,IAAI,EAAE,WAAW,EAAE,IAAI,EAAE,CAAC;AACrC,CAAC;AAED,SAAS,SAAS,CAAC,CAAa,EAAE,OAAgB;IAChD,MAAM,KAAK,GAAa,EAAE,CAAC;IAC3B,CAAC,CAAC,OAAO,CAAC;SACP,IAAI,CAAC,IAAI,CAAC;SACV,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,EAAE,EAAE;QACd,MAAM,IAAI,GAAG,YAAY,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC;QACxC,IAAI,IAAI;YAAE,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAC7B,CAAC,CAAC,CAAC;IAEL,IAAI,KAAK,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,IAAI,CAAC;IAEpC,OAAO;QACL,IAAI,EAAE,MAAM;QACZ,OAAO,EAAE,OAAO,CAAC,OAAO,CAAC,WAAW,EAAE,KAAK,IAAI;QAC/C,KAAK;KACN,CAAC;AACJ,CAAC;AAED,SAAS,SAAS,CAAC,CAAa,EAAE,OAAgB;IAChD,MAAM,IAAI,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,IAAI,EAAE,CAAC,IAAI,EAAE,CAAC;IACtC,IAAI,CAAC,IAAI;QAAE,OAAO,IAAI,CAAC;IAEvB,MAAM,SAAS,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,IAAI,CAAC,OAAO,CAAC,IAAI,EAAE,CAAC;IACjD,MAAM,aAAa,GAAG,SAAS,CAAC,KAAK,CAAC,gBAAgB,CAAC,CAAC;IAExD,OAAO;QACL,IAAI,EAAE,MAAM;QACZ,QAAQ,EAAE,aAAa,EAAE,CAAC,CAAC,CAAC;QAC5B,IAAI;KACL,CAAC;AACJ,CAAC;AAED,SAAS,UAAU,CAAC,CAAa,EAAE,OAAgB;IACjD,MAAM,OAAO,GAAa,EAAE,CAAC;IAC7B,MAAM,IAAI,GAAe,EAAE,CAAC;IAC5B,MAAM,MAAM,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC;IAE1B,0CAA0C;IAC1C,MAAM,CAAC,IAAI,CAAC,oBAAoB,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,IAAI,EAAE,EAAE;QACjD,OAAO,CAAC,IAAI,CAAC,YAAY,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC,CAAC;IAC7C,CAAC,CAAC,CAAC;IAEH,IAAI,OAAO,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACzB,MAAM;aACH,IAAI,CAAC,IAAI,CAAC;aACV,KAAK,EAAE;aACP,IAAI,CAAC,QAAQ,CAAC;aACd,IAAI,CAAC,CAAC,CAAC,EAAE,IAAI,EAAE,EAAE;YAChB,OAAO,CAAC,IAAI,CAAC,YAAY,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC,CAAC;QAC7C,CAAC,CAAC,CAAC;IACP,CAAC;IAED,oBAAoB;IACpB,MAAM,YAAY,GAChB,OAAO,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,0BAA0B,CAAC,CAAC,CAAC,cAAc,CAAC;IACnE,MAAM,CAAC,IAAI,CAAC,YAAY,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,GAAG,EAAE,EAAE;QACxC,MAAM,KAAK,GAAa,EAAE,CAAC;QAC3B,CAAC,CAAC,GAAG,CAAC;aACH,IAAI,CAAC,QAAQ,CAAC;aACd,IAAI,CAAC,CAAC,CAAC,EAAE,IAAI,EAAE,EAAE;YAChB,KAAK,CAAC,IAAI,CAAC,YAAY,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC,CAAC;QAC3C,CAAC,CAAC,CAAC;QACL,IAAI,KAAK,CAAC,MAAM,GAAG,CAAC;YAAE,IAAI,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;IACzC,CAAC,CAAC,CAAC;IAEH,IAAI,IAAI,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,IAAI,CAAC;IAEnC,OAAO;QACL,IAAI,EAAE,OAAO;QACb,OAAO,EAAE,OAAO,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,SAAS;QACjD,IAAI;KACL,CAAC;AACJ,CAAC;AAED,SAAS,UAAU,CAAC,CAAa,EAAE,OAAgB;IACjD,MAAM,GAAG,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;IACnC,IAAI,CAAC,GAAG;QAAE,OAAO,IAAI,CAAC;IAEtB,OAAO;QACL,IAAI,EAAE,OAAO;QACb,GAAG;QACH,GAAG,EAAE,CAAC,CAAC,OAAO,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,IAAI,SAAS;KACzC,CAAC;AACJ,CAAC;AAED,MAAM,eAAe,GAAG;IACtB,EAAE,EAAE,YAAY;IAChB,EAAE,EAAE,YAAY;IAChB,EAAE,EAAE,YAAY;IAChB,EAAE,EAAE,YAAY;IAChB,EAAE,EAAE,YAAY;IAChB,EAAE,EAAE,YAAY;IAChB,CAAC,EAAE,cAAc;IACjB,EAAE,EAAE,SAAS;IACb,EAAE,EAAE,SAAS;IACb,GAAG,EAAE,SAAS;IACd,IAAI,EAAE,SAAS;IACf,KAAK,EAAE,UAAU;IACjB,GAAG,EAAE,UAAU;CAIhB,CAAC;AAIF,SAAS,cAAc,CAAC,GAAW;IACjC,OAAO,GAAG,IAAI,eAAe,CAAC;AAChC,CAAC;AAED,SAAS,YAAY,CAAC,CAAa,EAAE,IAAa;IAChD,IAAI,CAAC,CAAC,SAAS,IAAI,IAAI,CAAC,IAAI,OAAO,IAAI,CAAC,OAAO,KAAK,QAAQ;QAAE,OAAO,IAAI,CAAC;IAE1E,MAAM,OAAO,GAAG,IAAI,CAAC,OAAO,CAAC,WAAW,EAAE,CAAC;IAC3C,IAAI,CAAC,cAAc,CAAC,OAAO,CAAC;QAAE,OAAO,IAAI,CAAC;IAC1C,OAAO,eAAe,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE,IAAI,CAAC,CAAC;AAC3C,CAAC;AAED,SAAS,YAAY,CAAC,MAA2B;IAC/C,OAAO,MAAM,CAAC,MAAM,CAAC,CAAC,KAAK,EAAE,EAAE;QAC7B,QAAQ,KAAK,CAAC,IAAI,EAAE,CAAC;YACnB,KAAK,WAAW,CAAC;YACjB,KAAK,SAAS,CAAC;YACf,KAAK,MAAM;gBACT,OAAO,KAAK,CAAC,IAAI,CAAC,MAAM,GAAG,CAAC,CAAC;YAC/B,KAAK,MAAM;gBACT,OAAO,KAAK,CAAC,KAAK,CAAC,MAAM,GAAG,CAAC,CAAC;YAChC;gBACE,OAAO,IAAI,CAAC;QAChB,CAAC;IACH,CAAC,CAAC,CAAC;AACL,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,SAAS,CAAC,IAAY;IACpC,MAAM,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAC7B,MAAM,MAAM,GAAwB,EAAE,CAAC;IAEvC,CAAC,CAAC,sCAAsC,CAAC,CAAC,MAAM,EAAE,CAAC;IAEnD,CAAC,CAAC,MAAM,CAAC;SACN,IAAI,CAAC,0DAA0D,CAAC;SAChE,IAAI,CAAC,CAAC,CAAC,EAAE,OAAO,EAAE,EAAE;QACnB,MAAM,KAAK,GAAG,YAAY,CAAC,CAAC,EAAE,OAAO,CAAC,CAAC;QACvC,IAAI,KAAK;YAAE,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;IAChC,CAAC,CAAC,CAAC;IAEL,OAAO,YAAY,CAAC,MAAM,CAAC,CAAC;AAC9B,CAAC"}
@@ -0,0 +1,42 @@
1
+ import type { ContentBlockUnion } from '../types/index.js';
2
+ declare class ParserService {
3
+ /**
4
+ * Parses HTML content and extracts semantic blocks
5
+ */
6
+ parseHtml(html: string): ContentBlockUnion[];
7
+ /**
8
+ * Parses a single element into a content block
9
+ */
10
+ private parseElement;
11
+ /**
12
+ * Parses a heading element
13
+ */
14
+ private parseHeading;
15
+ /**
16
+ * Parses a paragraph element
17
+ */
18
+ private parseParagraph;
19
+ /**
20
+ * Parses a list element
21
+ */
22
+ private parseList;
23
+ /**
24
+ * Parses a code element
25
+ */
26
+ private parseCode;
27
+ /**
28
+ * Parses a table element
29
+ */
30
+ private parseTable;
31
+ /**
32
+ * Parses an image element
33
+ */
34
+ private parseImage;
35
+ /**
36
+ * Filters out empty or invalid blocks
37
+ */
38
+ private filterBlocks;
39
+ }
40
+ export declare const parserService: ParserService;
41
+ export {};
42
+ //# sourceMappingURL=parser.service.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"parser.service.d.ts","sourceRoot":"","sources":["../../src/services/parser.service.ts"],"names":[],"mappings":"AAKA,OAAO,KAAK,EAOV,iBAAiB,EAClB,MAAM,mBAAmB,CAAC;AAE3B,cAAM,aAAa;IACjB;;OAEG;IACH,SAAS,CAAC,IAAI,EAAE,MAAM,GAAG,iBAAiB,EAAE;IAoB5C;;OAEG;IACH,OAAO,CAAC,YAAY;IAuCpB;;OAEG;IACH,OAAO,CAAC,YAAY;IAepB;;OAEG;IACH,OAAO,CAAC,cAAc;IAgBtB;;OAEG;IACH,OAAO,CAAC,SAAS;IAwBjB;;OAEG;IACH,OAAO,CAAC,SAAS;IAkBjB;;OAEG;IACH,OAAO,CAAC,UAAU;IA+ClB;;OAEG;IACH,OAAO,CAAC,UAAU;IAelB;;OAEG;IACH,OAAO,CAAC,YAAY;CAiBrB;AAGD,eAAO,MAAM,aAAa,eAAsB,CAAC"}