@j0hanz/superfetch 1.0.2 → 1.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +590 -327
- package/dist/config/index.d.ts.map +1 -1
- package/dist/config/index.js +6 -10
- package/dist/config/index.js.map +1 -1
- package/dist/config/types.d.ts +251 -0
- package/dist/config/types.d.ts.map +1 -0
- package/dist/config/types.js +2 -0
- package/dist/config/types.js.map +1 -0
- package/dist/errors/app-error.d.ts +2 -20
- package/dist/errors/app-error.d.ts.map +1 -1
- package/dist/errors/app-error.js +0 -18
- package/dist/errors/app-error.js.map +1 -1
- package/dist/index.js +13 -47
- package/dist/index.js.map +1 -1
- package/dist/middleware/error-handler.d.ts +1 -5
- package/dist/middleware/error-handler.d.ts.map +1 -1
- package/dist/middleware/error-handler.js +1 -11
- package/dist/middleware/error-handler.js.map +1 -1
- package/dist/middleware/rate-limiter.d.ts +2 -20
- package/dist/middleware/rate-limiter.d.ts.map +1 -1
- package/dist/middleware/rate-limiter.js +11 -44
- package/dist/middleware/rate-limiter.js.map +1 -1
- package/dist/prompts/index.d.ts +0 -3
- package/dist/prompts/index.d.ts.map +1 -1
- package/dist/prompts/index.js +0 -3
- package/dist/prompts/index.js.map +1 -1
- package/dist/resources/index.d.ts +0 -3
- package/dist/resources/index.d.ts.map +1 -1
- package/dist/resources/index.js +1 -4
- package/dist/resources/index.js.map +1 -1
- package/dist/server.d.ts +0 -4
- package/dist/server.d.ts.map +1 -1
- package/dist/server.js +2 -6
- package/dist/server.js.map +1 -1
- package/dist/services/cache.d.ts +9 -6
- package/dist/services/cache.d.ts.map +1 -1
- package/dist/services/cache.js +71 -20
- package/dist/services/cache.js.map +1 -1
- package/dist/services/card-extractor.d.ts +10 -0
- package/dist/services/card-extractor.d.ts.map +1 -0
- package/dist/services/card-extractor.js +187 -0
- package/dist/services/card-extractor.js.map +1 -0
- package/dist/services/extractor.d.ts +6 -19
- package/dist/services/extractor.d.ts.map +1 -1
- package/dist/services/extractor.js +53 -46
- package/dist/services/extractor.js.map +1 -1
- package/dist/services/fetcher.d.ts +4 -11
- package/dist/services/fetcher.d.ts.map +1 -1
- package/dist/services/fetcher.js +30 -36
- package/dist/services/fetcher.js.map +1 -1
- package/dist/services/logger.d.ts.map +1 -1
- package/dist/services/logger.js +4 -6
- package/dist/services/logger.js.map +1 -1
- package/dist/services/parser.d.ts +1 -6
- package/dist/services/parser.d.ts.map +1 -1
- package/dist/services/parser.js +64 -47
- package/dist/services/parser.js.map +1 -1
- package/dist/tools/handlers/fetch-links.tool.d.ts +5 -12
- package/dist/tools/handlers/fetch-links.tool.d.ts.map +1 -1
- package/dist/tools/handlers/fetch-links.tool.js +104 -79
- package/dist/tools/handlers/fetch-links.tool.js.map +1 -1
- package/dist/tools/handlers/fetch-markdown.tool.d.ts +7 -4
- package/dist/tools/handlers/fetch-markdown.tool.d.ts.map +1 -1
- package/dist/tools/handlers/fetch-markdown.tool.js +84 -84
- package/dist/tools/handlers/fetch-markdown.tool.js.map +1 -1
- package/dist/tools/handlers/fetch-url.tool.d.ts +8 -6
- package/dist/tools/handlers/fetch-url.tool.d.ts.map +1 -1
- package/dist/tools/handlers/fetch-url.tool.js +51 -93
- package/dist/tools/handlers/fetch-url.tool.js.map +1 -1
- package/dist/tools/handlers/fetch-urls.tool.d.ts +5 -0
- package/dist/tools/handlers/fetch-urls.tool.d.ts.map +1 -0
- package/dist/tools/handlers/fetch-urls.tool.js +147 -0
- package/dist/tools/handlers/fetch-urls.tool.js.map +1 -0
- package/dist/tools/index.d.ts +0 -4
- package/dist/tools/index.d.ts.map +1 -1
- package/dist/tools/index.js +145 -15
- package/dist/tools/index.js.map +1 -1
- package/dist/tools/utils/common.d.ts +8 -0
- package/dist/tools/utils/common.d.ts.map +1 -0
- package/dist/tools/utils/common.js +35 -0
- package/dist/tools/utils/common.js.map +1 -0
- package/dist/tools/utils/fetch-pipeline.d.ts +3 -0
- package/dist/tools/utils/fetch-pipeline.d.ts.map +1 -0
- package/dist/tools/utils/fetch-pipeline.js +37 -0
- package/dist/tools/utils/fetch-pipeline.js.map +1 -0
- package/dist/tools/utils/index.d.ts +4 -0
- package/dist/tools/utils/index.d.ts.map +1 -0
- package/dist/tools/utils/index.js +3 -0
- package/dist/tools/utils/index.js.map +1 -0
- package/dist/tools/utils/response-builder.d.ts +3 -0
- package/dist/tools/utils/response-builder.d.ts.map +1 -0
- package/dist/tools/utils/response-builder.js +24 -0
- package/dist/tools/utils/response-builder.js.map +1 -0
- package/dist/transformers/jsonl.transformer.d.ts +1 -1
- package/dist/transformers/jsonl.transformer.d.ts.map +1 -1
- package/dist/transformers/jsonl.transformer.js +2 -1
- package/dist/transformers/jsonl.transformer.js.map +1 -1
- package/dist/transformers/markdown.transformer.d.ts +1 -1
- package/dist/transformers/markdown.transformer.d.ts.map +1 -1
- package/dist/transformers/markdown.transformer.js +116 -2
- package/dist/transformers/markdown.transformer.js.map +1 -1
- package/dist/types/content.types.d.ts +11 -11
- package/dist/types/content.types.d.ts.map +1 -1
- package/dist/types/index.d.ts +1 -2
- package/dist/types/index.d.ts.map +1 -1
- package/dist/types/index.js +1 -2
- package/dist/types/index.js.map +1 -1
- package/dist/types/schemas.d.ts +39 -12
- package/dist/types/schemas.d.ts.map +1 -1
- package/dist/utils/concurrency.d.ts +2 -0
- package/dist/utils/concurrency.d.ts.map +1 -0
- package/dist/utils/concurrency.js +25 -0
- package/dist/utils/concurrency.js.map +1 -0
- package/dist/utils/content-cleaner.d.ts +32 -0
- package/dist/utils/content-cleaner.d.ts.map +1 -0
- package/dist/utils/content-cleaner.js +240 -0
- package/dist/utils/content-cleaner.js.map +1 -0
- package/dist/utils/language-detector.d.ts +5 -0
- package/dist/utils/language-detector.d.ts.map +1 -0
- package/dist/utils/language-detector.js +50 -0
- package/dist/utils/language-detector.js.map +1 -0
- package/dist/utils/sanitizer.d.ts +0 -10
- package/dist/utils/sanitizer.d.ts.map +1 -1
- package/dist/utils/sanitizer.js +3 -11
- package/dist/utils/sanitizer.js.map +1 -1
- package/dist/utils/tool-error-handler.d.ts +1 -15
- package/dist/utils/tool-error-handler.d.ts.map +1 -1
- package/dist/utils/tool-error-handler.js +1 -1
- package/dist/utils/tool-error-handler.js.map +1 -1
- package/dist/utils/url-validator.d.ts +0 -8
- package/dist/utils/url-validator.d.ts.map +1 -1
- package/dist/utils/url-validator.js +17 -31
- package/dist/utils/url-validator.js.map +1 -1
- package/package.json +4 -3
package/dist/tools/index.js
CHANGED
|
@@ -1,8 +1,28 @@
|
|
|
1
1
|
import { z } from 'zod';
|
|
2
|
-
import {
|
|
3
|
-
import {
|
|
4
|
-
import {
|
|
2
|
+
import { FETCH_LINKS_TOOL_DESCRIPTION, FETCH_LINKS_TOOL_NAME, fetchLinksToolHandler, } from './handlers/fetch-links.tool.js';
|
|
3
|
+
import { FETCH_MARKDOWN_TOOL_DESCRIPTION, FETCH_MARKDOWN_TOOL_NAME, fetchMarkdownToolHandler, } from './handlers/fetch-markdown.tool.js';
|
|
4
|
+
import { FETCH_URL_TOOL_DESCRIPTION, FETCH_URL_TOOL_NAME, fetchUrlToolHandler, } from './handlers/fetch-url.tool.js';
|
|
5
|
+
import { FETCH_URLS_TOOL_DESCRIPTION, FETCH_URLS_TOOL_NAME, fetchUrlsToolHandler, } from './handlers/fetch-urls.tool.js';
|
|
5
6
|
// Zod schemas for runtime validation - single source of truth
|
|
7
|
+
// Common request options shared across tools
|
|
8
|
+
const RequestOptionsSchema = {
|
|
9
|
+
customHeaders: z
|
|
10
|
+
.record(z.string())
|
|
11
|
+
.optional()
|
|
12
|
+
.describe('Custom HTTP headers for the request'),
|
|
13
|
+
timeout: z
|
|
14
|
+
.number()
|
|
15
|
+
.min(1000)
|
|
16
|
+
.max(60000)
|
|
17
|
+
.optional()
|
|
18
|
+
.describe('Request timeout in milliseconds (1000-60000)'),
|
|
19
|
+
retries: z
|
|
20
|
+
.number()
|
|
21
|
+
.min(1)
|
|
22
|
+
.max(10)
|
|
23
|
+
.optional()
|
|
24
|
+
.describe('Number of retry attempts (1-10)'),
|
|
25
|
+
};
|
|
6
26
|
// Input schemas
|
|
7
27
|
const FetchUrlInputSchema = {
|
|
8
28
|
url: z.string().min(1).describe('The URL to fetch'),
|
|
@@ -26,10 +46,7 @@ const FetchUrlInputSchema = {
|
|
|
26
46
|
.optional()
|
|
27
47
|
.default('jsonl')
|
|
28
48
|
.describe('Output format'),
|
|
29
|
-
|
|
30
|
-
.record(z.string())
|
|
31
|
-
.optional()
|
|
32
|
-
.describe('Custom HTTP headers for the request'),
|
|
49
|
+
...RequestOptionsSchema,
|
|
33
50
|
};
|
|
34
51
|
const FetchLinksInputSchema = {
|
|
35
52
|
url: z.string().min(1).describe('The URL to extract links from'),
|
|
@@ -43,6 +60,22 @@ const FetchLinksInputSchema = {
|
|
|
43
60
|
.optional()
|
|
44
61
|
.default(true)
|
|
45
62
|
.describe('Include internal links'),
|
|
63
|
+
maxLinks: z
|
|
64
|
+
.number()
|
|
65
|
+
.positive()
|
|
66
|
+
.max(1000)
|
|
67
|
+
.optional()
|
|
68
|
+
.describe('Maximum number of links to return (1-1000)'),
|
|
69
|
+
filterPattern: z
|
|
70
|
+
.string()
|
|
71
|
+
.optional()
|
|
72
|
+
.describe('Regex pattern to filter links (matches against href)'),
|
|
73
|
+
includeImages: z
|
|
74
|
+
.boolean()
|
|
75
|
+
.optional()
|
|
76
|
+
.default(false)
|
|
77
|
+
.describe('Include image links (img src attributes)'),
|
|
78
|
+
...RequestOptionsSchema,
|
|
46
79
|
};
|
|
47
80
|
const FetchMarkdownInputSchema = {
|
|
48
81
|
url: z.string().min(1).describe('The URL to fetch'),
|
|
@@ -56,6 +89,57 @@ const FetchMarkdownInputSchema = {
|
|
|
56
89
|
.optional()
|
|
57
90
|
.default(true)
|
|
58
91
|
.describe('Include YAML frontmatter metadata'),
|
|
92
|
+
maxContentLength: z
|
|
93
|
+
.number()
|
|
94
|
+
.positive()
|
|
95
|
+
.optional()
|
|
96
|
+
.describe('Maximum content length in characters'),
|
|
97
|
+
generateToc: z
|
|
98
|
+
.boolean()
|
|
99
|
+
.optional()
|
|
100
|
+
.default(false)
|
|
101
|
+
.describe('Generate table of contents from headings'),
|
|
102
|
+
...RequestOptionsSchema,
|
|
103
|
+
};
|
|
104
|
+
const FetchUrlsInputSchema = {
|
|
105
|
+
urls: z
|
|
106
|
+
.array(z.string().min(1))
|
|
107
|
+
.min(1)
|
|
108
|
+
.max(10)
|
|
109
|
+
.describe('Array of URLs to fetch (1-10 URLs)'),
|
|
110
|
+
extractMainContent: z
|
|
111
|
+
.boolean()
|
|
112
|
+
.optional()
|
|
113
|
+
.default(true)
|
|
114
|
+
.describe('Use Readability to extract main article content'),
|
|
115
|
+
includeMetadata: z
|
|
116
|
+
.boolean()
|
|
117
|
+
.optional()
|
|
118
|
+
.default(true)
|
|
119
|
+
.describe('Include page metadata (title, description, etc.)'),
|
|
120
|
+
maxContentLength: z
|
|
121
|
+
.number()
|
|
122
|
+
.positive()
|
|
123
|
+
.optional()
|
|
124
|
+
.describe('Maximum content length per URL in characters'),
|
|
125
|
+
format: z
|
|
126
|
+
.enum(['jsonl', 'markdown'])
|
|
127
|
+
.optional()
|
|
128
|
+
.default('jsonl')
|
|
129
|
+
.describe('Output format for all URLs'),
|
|
130
|
+
concurrency: z
|
|
131
|
+
.number()
|
|
132
|
+
.min(1)
|
|
133
|
+
.max(5)
|
|
134
|
+
.optional()
|
|
135
|
+
.default(3)
|
|
136
|
+
.describe('Maximum concurrent requests (1-5)'),
|
|
137
|
+
continueOnError: z
|
|
138
|
+
.boolean()
|
|
139
|
+
.optional()
|
|
140
|
+
.default(true)
|
|
141
|
+
.describe('Continue processing if some URLs fail'),
|
|
142
|
+
...RequestOptionsSchema,
|
|
59
143
|
};
|
|
60
144
|
// Output schemas for structured content validation
|
|
61
145
|
const FetchUrlOutputSchema = {
|
|
@@ -78,9 +162,17 @@ const FetchLinksOutputSchema = {
|
|
|
78
162
|
.array(z.object({
|
|
79
163
|
href: z.string().describe('The link URL'),
|
|
80
164
|
text: z.string().describe('The link anchor text'),
|
|
81
|
-
type: z.enum(['internal', 'external']).describe('Link type'),
|
|
165
|
+
type: z.enum(['internal', 'external', 'image']).describe('Link type'),
|
|
82
166
|
}))
|
|
83
167
|
.describe('Array of extracted links'),
|
|
168
|
+
filtered: z
|
|
169
|
+
.number()
|
|
170
|
+
.optional()
|
|
171
|
+
.describe('Number of links filtered out by pattern'),
|
|
172
|
+
truncated: z
|
|
173
|
+
.boolean()
|
|
174
|
+
.optional()
|
|
175
|
+
.describe('Whether results were truncated by maxLinks'),
|
|
84
176
|
error: z.string().optional().describe('Error message if the request failed'),
|
|
85
177
|
errorCode: z.string().optional().describe('Error code if the request failed'),
|
|
86
178
|
};
|
|
@@ -91,35 +183,73 @@ const FetchMarkdownOutputSchema = {
|
|
|
91
183
|
.string()
|
|
92
184
|
.describe('ISO timestamp of when the content was fetched'),
|
|
93
185
|
markdown: z.string().describe('The extracted content in Markdown format'),
|
|
186
|
+
toc: z
|
|
187
|
+
.array(z.object({
|
|
188
|
+
level: z.number().describe('Heading level (1-6)'),
|
|
189
|
+
text: z.string().describe('Heading text'),
|
|
190
|
+
slug: z.string().describe('URL-friendly anchor slug'),
|
|
191
|
+
}))
|
|
192
|
+
.optional()
|
|
193
|
+
.describe('Table of contents (if generateToc is true)'),
|
|
94
194
|
cached: z.boolean().describe('Whether the result was served from cache'),
|
|
195
|
+
truncated: z
|
|
196
|
+
.boolean()
|
|
197
|
+
.optional()
|
|
198
|
+
.describe('Whether content was truncated by maxContentLength'),
|
|
95
199
|
error: z.string().optional().describe('Error message if the request failed'),
|
|
96
200
|
errorCode: z.string().optional().describe('Error code if the request failed'),
|
|
97
201
|
};
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
202
|
+
const FetchUrlsOutputSchema = {
|
|
203
|
+
results: z
|
|
204
|
+
.array(z.object({
|
|
205
|
+
url: z.string().describe('The fetched URL'),
|
|
206
|
+
success: z.boolean().describe('Whether the fetch was successful'),
|
|
207
|
+
title: z.string().optional().describe('Page title'),
|
|
208
|
+
content: z.string().optional().describe('The extracted content'),
|
|
209
|
+
contentBlocks: z
|
|
210
|
+
.number()
|
|
211
|
+
.optional()
|
|
212
|
+
.describe('Number of content blocks (JSONL only)'),
|
|
213
|
+
cached: z.boolean().optional().describe('Whether served from cache'),
|
|
214
|
+
error: z.string().optional().describe('Error message if failed'),
|
|
215
|
+
errorCode: z.string().optional().describe('Error code if failed'),
|
|
216
|
+
}))
|
|
217
|
+
.describe('Array of results for each URL'),
|
|
218
|
+
summary: z
|
|
219
|
+
.object({
|
|
220
|
+
total: z.number().describe('Total URLs processed'),
|
|
221
|
+
successful: z.number().describe('Number of successful fetches'),
|
|
222
|
+
failed: z.number().describe('Number of failed fetches'),
|
|
223
|
+
cached: z.number().describe('Number served from cache'),
|
|
224
|
+
totalContentBlocks: z.number().describe('Total content blocks extracted'),
|
|
225
|
+
})
|
|
226
|
+
.describe('Summary statistics'),
|
|
227
|
+
fetchedAt: z.string().describe('ISO timestamp of batch completion'),
|
|
228
|
+
};
|
|
102
229
|
export function registerTools(server) {
|
|
103
|
-
// Register fetch-url tool
|
|
104
230
|
server.registerTool(FETCH_URL_TOOL_NAME, {
|
|
105
231
|
title: 'Fetch URL',
|
|
106
232
|
description: FETCH_URL_TOOL_DESCRIPTION,
|
|
107
233
|
inputSchema: FetchUrlInputSchema,
|
|
108
234
|
outputSchema: FetchUrlOutputSchema,
|
|
109
235
|
}, async (args) => fetchUrlToolHandler(args));
|
|
110
|
-
// Register fetch-links tool
|
|
111
236
|
server.registerTool(FETCH_LINKS_TOOL_NAME, {
|
|
112
237
|
title: 'Fetch Links',
|
|
113
238
|
description: FETCH_LINKS_TOOL_DESCRIPTION,
|
|
114
239
|
inputSchema: FetchLinksInputSchema,
|
|
115
240
|
outputSchema: FetchLinksOutputSchema,
|
|
116
241
|
}, async (args) => fetchLinksToolHandler(args));
|
|
117
|
-
// Register fetch-markdown tool
|
|
118
242
|
server.registerTool(FETCH_MARKDOWN_TOOL_NAME, {
|
|
119
243
|
title: 'Fetch Markdown',
|
|
120
244
|
description: FETCH_MARKDOWN_TOOL_DESCRIPTION,
|
|
121
245
|
inputSchema: FetchMarkdownInputSchema,
|
|
122
246
|
outputSchema: FetchMarkdownOutputSchema,
|
|
123
247
|
}, async (args) => fetchMarkdownToolHandler(args));
|
|
248
|
+
server.registerTool(FETCH_URLS_TOOL_NAME, {
|
|
249
|
+
title: 'Fetch URLs (Batch)',
|
|
250
|
+
description: FETCH_URLS_TOOL_DESCRIPTION,
|
|
251
|
+
inputSchema: FetchUrlsInputSchema,
|
|
252
|
+
outputSchema: FetchUrlsOutputSchema,
|
|
253
|
+
}, async (args) => fetchUrlsToolHandler(args));
|
|
124
254
|
}
|
|
125
255
|
//# sourceMappingURL=index.js.map
|
package/dist/tools/index.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/tools/index.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/tools/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AAIxB,OAAO,EACL,4BAA4B,EAC5B,qBAAqB,EACrB,qBAAqB,GACtB,MAAM,gCAAgC,CAAC;AACxC,OAAO,EACL,+BAA+B,EAC/B,wBAAwB,EACxB,wBAAwB,GACzB,MAAM,mCAAmC,CAAC;AAC3C,OAAO,EACL,0BAA0B,EAC1B,mBAAmB,EACnB,mBAAmB,GACpB,MAAM,8BAA8B,CAAC;AACtC,OAAO,EACL,2BAA2B,EAC3B,oBAAoB,EACpB,oBAAoB,GACrB,MAAM,+BAA+B,CAAC;AAEvC,8DAA8D;AAE9D,6CAA6C;AAC7C,MAAM,oBAAoB,GAAG;IAC3B,aAAa,EAAE,CAAC;SACb,MAAM,CAAC,CAAC,CAAC,MAAM,EAAE,CAAC;SAClB,QAAQ,EAAE;SACV,QAAQ,CAAC,qCAAqC,CAAC;IAClD,OAAO,EAAE,CAAC;SACP,MAAM,EAAE;SACR,GAAG,CAAC,IAAI,CAAC;SACT,GAAG,CAAC,KAAK,CAAC;SACV,QAAQ,EAAE;SACV,QAAQ,CAAC,8CAA8C,CAAC;IAC3D,OAAO,EAAE,CAAC;SACP,MAAM,EAAE;SACR,GAAG,CAAC,CAAC,CAAC;SACN,GAAG,CAAC,EAAE,CAAC;SACP,QAAQ,EAAE;SACV,QAAQ,CAAC,iCAAiC,CAAC;CAC/C,CAAC;AAEF,gBAAgB;AAChB,MAAM,mBAAmB,GAAG;IAC1B,GAAG,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,kBAAkB,CAAC;IACnD,kBAAkB,EAAE,CAAC;SAClB,OAAO,EAAE;SACT,QAAQ,EAAE;SACV,OAAO,CAAC,IAAI,CAAC;SACb,QAAQ,CAAC,iDAAiD,CAAC;IAC9D,eAAe,EAAE,CAAC;SACf,OAAO,EAAE;SACT,QAAQ,EAAE;SACV,OAAO,CAAC,IAAI,CAAC;SACb,QAAQ,CAAC,kDAAkD,CAAC;IAC/D,gBAAgB,EAAE,CAAC;SAChB,MAAM,EAAE;SACR,QAAQ,EAAE;SACV,QAAQ,EAAE;SACV,QAAQ,CAAC,sCAAsC,CAAC;IACnD,MAAM,EAAE,CAAC;SACN,IAAI,CAAC,CAAC,OAAO,EAAE,UAAU,CAAC,CAAC;SAC3B,QAAQ,EAAE;SACV,OAAO,CAAC,OAAO,CAAC;SAChB,QAAQ,CAAC,eAAe,CAAC;IAC5B,GAAG,oBAAoB;CACxB,CAAC;AAEF,MAAM,qBAAqB,GAAG;IAC5B,GAAG,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,+BAA+B,CAAC;IAChE,eAAe,EAAE,CAAC;SACf,OAAO,EAAE;SACT,QAAQ,EAAE;SACV,OAAO,CAAC,IAAI,CAAC;SACb,QAAQ,CAAC,wBAAwB,CAAC;IACrC,eAAe,EAAE,CAAC;SACf,OAAO,EAAE;SACT,QAAQ,EAAE;SACV,OAAO,CAAC,IAAI,CAAC;SACb,QAAQ,CAAC,wBAAwB,CAAC;IACrC,QAAQ,EAAE,CAAC;SACR,MAAM,EAAE;SACR,QAAQ,EAAE;SACV,GAAG,CAAC,IAAI,CAAC;SACT,QAAQ,EAAE;SACV,QAAQ,CAAC,4CAA4C,CAAC;IACzD,aAAa,EAAE,CAAC;SACb,MAAM,EAAE;SACR,QAAQ,EAAE;SACV,QAAQ,CAAC,sDAAsD,CAAC;IACnE,aAAa,EAAE,CAAC;SACb,OAAO,EAAE;SACT,QAAQ,EAAE;SACV,OAAO,CAAC,KAAK,CAAC;SACd,QAAQ,CAAC,0CAA0C,CAAC;IACvD,GAAG,oBAAoB;CACxB,CAAC;AAEF,MAAM,wBAAwB,GAAG;IAC/B,GAAG,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,kBAAkB,CAAC;IACnD,kBAAkB,EAAE,CAAC;SAClB,OAAO,EAAE;SACT,QAAQ,EAAE;SACV,OAAO,CAAC,IAAI,CAAC;SACb,QAAQ,CAAC,gDAAgD,CAAC;IAC7D,eAAe,EAAE,CAAC;SACf,OAAO,EAAE;SACT,QAAQ,EAAE;SACV,OAAO,CAAC,IAAI,CAAC;SACb,QAAQ,CAAC,mCAAmC,CAAC;IAChD,gBAAgB,EAAE,CAAC;SAChB,MAAM,EAAE;SACR,QAAQ,EAAE;SACV,QAAQ,EAAE;SACV,QAAQ,CAAC,sCAAsC,CAAC;IACnD,WAAW,EAAE,CAAC;SACX,OAAO,EAAE;SACT,QAAQ,EAAE;SACV,OAAO,CAAC,KAAK,CAAC;SACd,QAAQ,CAAC,0CAA0C,CAAC;IACvD,GAAG,oBAAoB;CACxB,CAAC;AAEF,MAAM,oBAAoB,GAAG;IAC3B,IAAI,EAAE,CAAC;SACJ,KAAK,CAAC,CAAC,CAAC,MAAM,EAAE,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC;SACxB,GAAG,CAAC,CAAC,CAAC;SACN,GAAG,CAAC,EAAE,CAAC;SACP,QAAQ,CAAC,oCAAoC,CAAC;IACjD,kBAAkB,EAAE,CAAC;SAClB,OAAO,EAAE;SACT,QAAQ,EAAE;SACV,OAAO,CAAC,IAAI,CAAC;SACb,QAAQ,CAAC,iDAAiD,CAAC;IAC9D,eAAe,EAAE,CAAC;SACf,OAAO,EAAE;SACT,QAAQ,EAAE;SACV,OAAO,CAAC,IAAI,CAAC;SACb,QAAQ,CAAC,kDAAkD,CAAC;IAC/D,gBAAgB,EAAE,CAAC;SAChB,MAAM,EAAE;SACR,QAAQ,EAAE;SACV,QAAQ,EAAE;SACV,QAAQ,CAAC,8CAA8C,CAAC;IAC3D,MAAM,EAAE,CAAC;SACN,IAAI,CAAC,CAAC,OAAO,EAAE,UAAU,CAAC,CAAC;SAC3B,QAAQ,EAAE;SACV,OAAO,CAAC,OAAO,CAAC;SAChB,QAAQ,CAAC,4BAA4B,CAAC;IACzC,WAAW,EAAE,CAAC;SACX,MAAM,EAAE;SACR,GAAG,CAAC,CAAC,CAAC;SACN,GAAG,CAAC,CAAC,CAAC;SACN,QAAQ,EAAE;SACV,OAAO,CAAC,CAAC,CAAC;SACV,QAAQ,CAAC,mCAAmC,CAAC;IAChD,eAAe,EAAE,CAAC;SACf,OAAO,EAAE;SACT,QAAQ,EAAE;SACV,OAAO,CAAC,IAAI,CAAC;SACb,QAAQ,CAAC,uCAAuC,CAAC;IACpD,GAAG,oBAAoB;CACxB,CAAC;AAEF,mDAAmD;AACnD,MAAM,oBAAoB,GAAG;IAC3B,GAAG,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,CAAC,iBAAiB,CAAC;IAC3C,KAAK,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,EAAE,CAAC,QAAQ,CAAC,YAAY,CAAC;IACnD,aAAa,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,CAAC,oCAAoC,CAAC;IACxE,SAAS,EAAE,CAAC;SACT,MAAM,EAAE;SACR,QAAQ,CAAC,+CAA+C,CAAC;IAC5D,MAAM,EAAE,CAAC,CAAC,IAAI,CAAC,CAAC,OAAO,EAAE,UAAU,CAAC,CAAC,CAAC,QAAQ,CAAC,oBAAoB,CAAC;IACpE,OAAO,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,CAAC,uCAAuC,CAAC;IACrE,MAAM,EAAE,CAAC,CAAC,OAAO,EAAE,CAAC,QAAQ,CAAC,0CAA0C,CAAC;IACxE,KAAK,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,EAAE,CAAC,QAAQ,CAAC,qCAAqC,CAAC;IAC5E,SAAS,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,EAAE,CAAC,QAAQ,CAAC,kCAAkC,CAAC;CAC9E,CAAC;AAEF,MAAM,sBAAsB,GAAG;IAC7B,GAAG,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,CAAC,gBAAgB,CAAC;IAC1C,SAAS,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,CAAC,iCAAiC,CAAC;IACjE,KAAK,EAAE,CAAC;SACL,KAAK,CACJ,CAAC,CAAC,MAAM,CAAC;QACP,IAAI,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,CAAC,cAAc,CAAC;QACzC,IAAI,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,CAAC,sBAAsB,CAAC;QACjD,IAAI,EAAE,CAAC,CAAC,IAAI,CAAC,CAAC,UAAU,EAAE,UAAU,EAAE,OAAO,CAAC,CAAC,CAAC,QAAQ,CAAC,WAAW,CAAC;KACtE,CAAC,CACH;SACA,QAAQ,CAAC,0BAA0B,CAAC;IACvC,QAAQ,EAAE,CAAC;SACR,MAAM,EAAE;SACR,QAAQ,EAAE;SACV,QAAQ,CAAC,yCAAyC,CAAC;IACtD,SAAS,EAAE,CAAC;SACT,OAAO,EAAE;SACT,QAAQ,EAAE;SACV,QAAQ,CAAC,4CAA4C,CAAC;IACzD,KAAK,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,EAAE,CAAC,QAAQ,CAAC,qCAAqC,CAAC;IAC5E,SAAS,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,EAAE,CAAC,QAAQ,CAAC,kCAAkC,CAAC;CAC9E,CAAC;AAEF,MAAM,yBAAyB,GAAG;IAChC,GAAG,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,CAAC,iBAAiB,CAAC;IAC3C,KAAK,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,EAAE,CAAC,QAAQ,CAAC,YAAY,CAAC;IACnD,SAAS,EAAE,CAAC;SACT,MAAM,EAAE;SACR,QAAQ,CAAC,+CAA+C,CAAC;IAC5D,QAAQ,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,CAAC,0CAA0C,CAAC;IACzE,GAAG,EAAE,CAAC;SACH,KAAK,CACJ,CAAC,CAAC,MAAM,CAAC;QACP,KAAK,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,CAAC,qBAAqB,CAAC;QACjD,IAAI,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,CAAC,cAAc,CAAC;QACzC,IAAI,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,CAAC,0BAA0B,CAAC;KACtD,CAAC,CACH;SACA,QAAQ,EAAE;SACV,QAAQ,CAAC,4CAA4C,CAAC;IACzD,MAAM,EAAE,CAAC,CAAC,OAAO,EAAE,CAAC,QAAQ,CAAC,0CAA0C,CAAC;IACxE,SAAS,EAAE,CAAC;SACT,OAAO,EAAE;SACT,QAAQ,EAAE;SACV,QAAQ,CAAC,mDAAmD,CAAC;IAChE,KAAK,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,EAAE,CAAC,QAAQ,CAAC,qCAAqC,CAAC;IAC5E,SAAS,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,EAAE,CAAC,QAAQ,CAAC,kCAAkC,CAAC;CAC9E,CAAC;AAEF,MAAM,qBAAqB,GAAG;IAC5B,OAAO,EAAE,CAAC;SACP,KAAK,CACJ,CAAC,CAAC,MAAM,CAAC;QACP,GAAG,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,CAAC,iBAAiB,CAAC;QAC3C,OAAO,EAAE,CAAC,CAAC,OAAO,EAAE,CAAC,QAAQ,CAAC,kCAAkC,CAAC;QACjE,KAAK,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,EAAE,CAAC,QAAQ,CAAC,YAAY,CAAC;QACnD,OAAO,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,EAAE,CAAC,QAAQ,CAAC,uBAAuB,CAAC;QAChE,aAAa,EAAE,CAAC;aACb,MAAM,EAAE;aACR,QAAQ,EAAE;aACV,QAAQ,CAAC,uCAAuC,CAAC;QACpD,MAAM,EAAE,CAAC,CAAC,OAAO,EAAE,CAAC,QAAQ,EAAE,CAAC,QAAQ,CAAC,2BAA2B,CAAC;QACpE,KAAK,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,EAAE,CAAC,QAAQ,CAAC,yBAAyB,CAAC;QAChE,SAAS,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,EAAE,CAAC,QAAQ,CAAC,sBAAsB,CAAC;KAClE,CAAC,CACH;SACA,QAAQ,CAAC,+BAA+B,CAAC;IAC5C,OAAO,EAAE,CAAC;SACP,MAAM,CAAC;QACN,KAAK,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,CAAC,sBAAsB,CAAC;QAClD,UAAU,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,CAAC,8BAA8B,CAAC;QAC/D,MAAM,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,CAAC,0BAA0B,CAAC;QACvD,MAAM,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,CAAC,0BAA0B,CAAC;QACvD,kBAAkB,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,CAAC,gCAAgC,CAAC;KAC1E,CAAC;SACD,QAAQ,CAAC,oBAAoB,CAAC;IACjC,SAAS,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,CAAC,mCAAmC,CAAC;CACpE,CAAC;AAEF,MAAM,UAAU,aAAa,CAAC,MAAiB;IAC7C,MAAM,CAAC,YAAY,CACjB,mBAAmB,EACnB;QACE,KAAK,EAAE,WAAW;QAClB,WAAW,EAAE,0BAA0B;QACvC,WAAW,EAAE,mBAAmB;QAChC,YAAY,EAAE,oBAAoB;KACnC,EACD,KAAK,EAAE,IAAI,EAAE,EAAE,CAAC,mBAAmB,CAAC,IAAI,CAAC,CAC1C,CAAC;IAEF,MAAM,CAAC,YAAY,CACjB,qBAAqB,EACrB;QACE,KAAK,EAAE,aAAa;QACpB,WAAW,EAAE,4BAA4B;QACzC,WAAW,EAAE,qBAAqB;QAClC,YAAY,EAAE,sBAAsB;KACrC,EACD,KAAK,EAAE,IAAI,EAAE,EAAE,CAAC,qBAAqB,CAAC,IAAI,CAAC,CAC5C,CAAC;IAEF,MAAM,CAAC,YAAY,CACjB,wBAAwB,EACxB;QACE,KAAK,EAAE,gBAAgB;QACvB,WAAW,EAAE,+BAA+B;QAC5C,WAAW,EAAE,wBAAwB;QACrC,YAAY,EAAE,yBAAyB;KACxC,EACD,KAAK,EAAE,IAAI,EAAE,EAAE,CAAC,wBAAwB,CAAC,IAAI,CAAC,CAC/C,CAAC;IAEF,MAAM,CAAC,YAAY,CACjB,oBAAoB,EACpB;QACE,KAAK,EAAE,oBAAoB;QAC3B,WAAW,EAAE,2BAA2B;QACxC,WAAW,EAAE,oBAAoB;QACjC,YAAY,EAAE,qBAAqB;KACpC,EACD,KAAK,EAAE,IAAI,EAAE,EAAE,CAAC,oBAAoB,CAAC,IAAI,CAAC,CAC3C,CAAC;AACJ,CAAC"}
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
import type { ExtractedArticle, ExtractedMetadata, MetadataBlock } from '../../config/types.js';
|
|
2
|
+
export declare function shouldUseArticle(extractMainContent: boolean, article: ExtractedArticle | null): article is ExtractedArticle;
|
|
3
|
+
export declare function buildMetadata(url: string, article: ExtractedArticle | null, extractedMeta: ExtractedMetadata, useArticle: boolean, includeMetadata: boolean): MetadataBlock | undefined;
|
|
4
|
+
export declare function truncateContent(content: string, maxLength?: number): {
|
|
5
|
+
content: string;
|
|
6
|
+
truncated: boolean;
|
|
7
|
+
};
|
|
8
|
+
//# sourceMappingURL=common.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"common.d.ts","sourceRoot":"","sources":["../../../src/tools/utils/common.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EACV,gBAAgB,EAChB,iBAAiB,EACjB,aAAa,EACd,MAAM,uBAAuB,CAAC;AAE/B,wBAAgB,gBAAgB,CAC9B,kBAAkB,EAAE,OAAO,EAC3B,OAAO,EAAE,gBAAgB,GAAG,IAAI,GAC/B,OAAO,IAAI,gBAAgB,CAI7B;AAED,wBAAgB,aAAa,CAC3B,GAAG,EAAE,MAAM,EACX,OAAO,EAAE,gBAAgB,GAAG,IAAI,EAChC,aAAa,EAAE,iBAAiB,EAChC,UAAU,EAAE,OAAO,EACnB,eAAe,EAAE,OAAO,GACvB,aAAa,GAAG,SAAS,CAmB3B;AAED,wBAAgB,eAAe,CAC7B,OAAO,EAAE,MAAM,EACf,SAAS,CAAC,EAAE,MAAM,GACjB;IAAE,OAAO,EAAE,MAAM,CAAC;IAAC,SAAS,EAAE,OAAO,CAAA;CAAE,CAQzC"}
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
import { config } from '../../config/index.js';
|
|
2
|
+
export function shouldUseArticle(extractMainContent, article) {
|
|
3
|
+
return (extractMainContent && config.extraction.extractMainContent && !!article);
|
|
4
|
+
}
|
|
5
|
+
export function buildMetadata(url, article, extractedMeta, useArticle, includeMetadata) {
|
|
6
|
+
if (!includeMetadata || !config.extraction.includeMetadata)
|
|
7
|
+
return undefined;
|
|
8
|
+
const now = new Date().toISOString();
|
|
9
|
+
return useArticle && article
|
|
10
|
+
? {
|
|
11
|
+
type: 'metadata',
|
|
12
|
+
title: article.title,
|
|
13
|
+
author: article.byline,
|
|
14
|
+
url,
|
|
15
|
+
fetchedAt: now,
|
|
16
|
+
}
|
|
17
|
+
: {
|
|
18
|
+
type: 'metadata',
|
|
19
|
+
title: extractedMeta.title,
|
|
20
|
+
description: extractedMeta.description,
|
|
21
|
+
author: extractedMeta.author,
|
|
22
|
+
url,
|
|
23
|
+
fetchedAt: now,
|
|
24
|
+
};
|
|
25
|
+
}
|
|
26
|
+
export function truncateContent(content, maxLength) {
|
|
27
|
+
if (!maxLength || maxLength <= 0 || content.length <= maxLength) {
|
|
28
|
+
return { content, truncated: false };
|
|
29
|
+
}
|
|
30
|
+
return {
|
|
31
|
+
content: content.substring(0, maxLength) + '\n...[truncated]',
|
|
32
|
+
truncated: true,
|
|
33
|
+
};
|
|
34
|
+
}
|
|
35
|
+
//# sourceMappingURL=common.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"common.js","sourceRoot":"","sources":["../../../src/tools/utils/common.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,MAAM,EAAE,MAAM,uBAAuB,CAAC;AAO/C,MAAM,UAAU,gBAAgB,CAC9B,kBAA2B,EAC3B,OAAgC;IAEhC,OAAO,CACL,kBAAkB,IAAI,MAAM,CAAC,UAAU,CAAC,kBAAkB,IAAI,CAAC,CAAC,OAAO,CACxE,CAAC;AACJ,CAAC;AAED,MAAM,UAAU,aAAa,CAC3B,GAAW,EACX,OAAgC,EAChC,aAAgC,EAChC,UAAmB,EACnB,eAAwB;IAExB,IAAI,CAAC,eAAe,IAAI,CAAC,MAAM,CAAC,UAAU,CAAC,eAAe;QAAE,OAAO,SAAS,CAAC;IAC7E,MAAM,GAAG,GAAG,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE,CAAC;IACrC,OAAO,UAAU,IAAI,OAAO;QAC1B,CAAC,CAAC;YACE,IAAI,EAAE,UAAU;YAChB,KAAK,EAAE,OAAO,CAAC,KAAK;YACpB,MAAM,EAAE,OAAO,CAAC,MAAM;YACtB,GAAG;YACH,SAAS,EAAE,GAAG;SACf;QACH,CAAC,CAAC;YACE,IAAI,EAAE,UAAU;YAChB,KAAK,EAAE,aAAa,CAAC,KAAK;YAC1B,WAAW,EAAE,aAAa,CAAC,WAAW;YACtC,MAAM,EAAE,aAAa,CAAC,MAAM;YAC5B,GAAG;YACH,SAAS,EAAE,GAAG;SACf,CAAC;AACR,CAAC;AAED,MAAM,UAAU,eAAe,CAC7B,OAAe,EACf,SAAkB;IAElB,IAAI,CAAC,SAAS,IAAI,SAAS,IAAI,CAAC,IAAI,OAAO,CAAC,MAAM,IAAI,SAAS,EAAE,CAAC;QAChE,OAAO,EAAE,OAAO,EAAE,SAAS,EAAE,KAAK,EAAE,CAAC;IACvC,CAAC;IACD,OAAO;QACL,OAAO,EAAE,OAAO,CAAC,SAAS,CAAC,CAAC,EAAE,SAAS,CAAC,GAAG,kBAAkB;QAC7D,SAAS,EAAE,IAAI;KAChB,CAAC;AACJ,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"fetch-pipeline.d.ts","sourceRoot":"","sources":["../../../src/tools/utils/fetch-pipeline.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EACV,oBAAoB,EACpB,cAAc,EACf,MAAM,uBAAuB,CAAC;AAQ/B,wBAAsB,oBAAoB,CAAC,CAAC,EAC1C,OAAO,EAAE,oBAAoB,CAAC,CAAC,CAAC,GAC/B,OAAO,CAAC,cAAc,CAAC,CAAC,CAAC,CAAC,CAiD5B"}
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
import * as cache from '../../services/cache.js';
|
|
2
|
+
import { fetchUrlWithRetry } from '../../services/fetcher.js';
|
|
3
|
+
import { logDebug } from '../../services/logger.js';
|
|
4
|
+
import { validateAndNormalizeUrl } from '../../utils/url-validator.js';
|
|
5
|
+
export async function executeFetchPipeline(options) {
|
|
6
|
+
const { url, cacheNamespace, customHeaders, retries, transform, serialize = JSON.stringify, deserialize = (cached) => JSON.parse(cached), } = options;
|
|
7
|
+
const normalizedUrl = validateAndNormalizeUrl(url);
|
|
8
|
+
const cacheKey = cache.createCacheKey(cacheNamespace, normalizedUrl);
|
|
9
|
+
if (cacheKey) {
|
|
10
|
+
const cached = cache.get(cacheKey);
|
|
11
|
+
if (cached) {
|
|
12
|
+
logDebug('Cache hit', { namespace: cacheNamespace, url: normalizedUrl });
|
|
13
|
+
const data = deserialize(cached.content);
|
|
14
|
+
return {
|
|
15
|
+
data,
|
|
16
|
+
fromCache: true,
|
|
17
|
+
url: normalizedUrl,
|
|
18
|
+
fetchedAt: cached.fetchedAt,
|
|
19
|
+
};
|
|
20
|
+
}
|
|
21
|
+
}
|
|
22
|
+
logDebug('Fetching URL', { url: normalizedUrl, retries });
|
|
23
|
+
const fetchResult = await fetchUrlWithRetry(normalizedUrl, customHeaders, retries);
|
|
24
|
+
const html = fetchResult.html;
|
|
25
|
+
const data = transform(html, normalizedUrl);
|
|
26
|
+
if (cacheKey) {
|
|
27
|
+
const serialized = serialize(data);
|
|
28
|
+
cache.set(cacheKey, serialized);
|
|
29
|
+
}
|
|
30
|
+
return {
|
|
31
|
+
data,
|
|
32
|
+
fromCache: false,
|
|
33
|
+
url: normalizedUrl,
|
|
34
|
+
fetchedAt: new Date().toISOString(),
|
|
35
|
+
};
|
|
36
|
+
}
|
|
37
|
+
//# sourceMappingURL=fetch-pipeline.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"fetch-pipeline.js","sourceRoot":"","sources":["../../../src/tools/utils/fetch-pipeline.ts"],"names":[],"mappings":"AAKA,OAAO,KAAK,KAAK,MAAM,yBAAyB,CAAC;AACjD,OAAO,EAAE,iBAAiB,EAAE,MAAM,2BAA2B,CAAC;AAC9D,OAAO,EAAE,QAAQ,EAAE,MAAM,0BAA0B,CAAC;AAEpD,OAAO,EAAE,uBAAuB,EAAE,MAAM,8BAA8B,CAAC;AAEvE,MAAM,CAAC,KAAK,UAAU,oBAAoB,CACxC,OAAgC;IAEhC,MAAM,EACJ,GAAG,EACH,cAAc,EACd,aAAa,EACb,OAAO,EACP,SAAS,EACT,SAAS,GAAG,IAAI,CAAC,SAAS,EAC1B,WAAW,GAAG,CAAC,MAAc,EAAE,EAAE,CAAC,IAAI,CAAC,KAAK,CAAC,MAAM,CAAM,GAC1D,GAAG,OAAO,CAAC;IAEZ,MAAM,aAAa,GAAG,uBAAuB,CAAC,GAAG,CAAC,CAAC;IACnD,MAAM,QAAQ,GAAG,KAAK,CAAC,cAAc,CAAC,cAAc,EAAE,aAAa,CAAC,CAAC;IAErE,IAAI,QAAQ,EAAE,CAAC;QACb,MAAM,MAAM,GAAG,KAAK,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAC;QACnC,IAAI,MAAM,EAAE,CAAC;YACX,QAAQ,CAAC,WAAW,EAAE,EAAE,SAAS,EAAE,cAAc,EAAE,GAAG,EAAE,aAAa,EAAE,CAAC,CAAC;YACzE,MAAM,IAAI,GAAG,WAAW,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC;YAEzC,OAAO;gBACL,IAAI;gBACJ,SAAS,EAAE,IAAI;gBACf,GAAG,EAAE,aAAa;gBAClB,SAAS,EAAE,MAAM,CAAC,SAAS;aAC5B,CAAC;QACJ,CAAC;IACH,CAAC;IAED,QAAQ,CAAC,cAAc,EAAE,EAAE,GAAG,EAAE,aAAa,EAAE,OAAO,EAAE,CAAC,CAAC;IAC1D,MAAM,WAAW,GAAG,MAAM,iBAAiB,CACzC,aAAa,EACb,aAAa,EACb,OAAO,CACR,CAAC;IACF,MAAM,IAAI,GAAG,WAAW,CAAC,IAAI,CAAC;IAC9B,MAAM,IAAI,GAAG,SAAS,CAAC,IAAI,EAAE,aAAa,CAAC,CAAC;IAE5C,IAAI,QAAQ,EAAE,CAAC;QACb,MAAM,UAAU,GAAG,SAAS,CAAC,IAAI,CAAC,CAAC;QACnC,KAAK,CAAC,GAAG,CAAC,QAAQ,EAAE,UAAU,CAAC,CAAC;IAClC,CAAC;IAED,OAAO;QACL,IAAI;QACJ,SAAS,EAAE,KAAK;QAChB,GAAG,EAAE,aAAa;QAClB,SAAS,EAAE,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE;KACpC,CAAC;AACJ,CAAC"}
|
|
@@ -0,0 +1,4 @@
|
|
|
1
|
+
export { executeFetchPipeline } from './fetch-pipeline.js';
|
|
2
|
+
export type { FetchPipelineOptions, PipelineResult } from './fetch-pipeline.js';
|
|
3
|
+
export { createSuccessResponse, createCachedResponse, createBatchResponse, } from './response-builder.js';
|
|
4
|
+
//# sourceMappingURL=index.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/tools/utils/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,oBAAoB,EAAE,MAAM,qBAAqB,CAAC;AAC3D,YAAY,EAAE,oBAAoB,EAAE,cAAc,EAAE,MAAM,qBAAqB,CAAC;AAChF,OAAO,EACL,qBAAqB,EACrB,oBAAoB,EACpB,mBAAmB,GACpB,MAAM,uBAAuB,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../../src/tools/utils/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,oBAAoB,EAAE,MAAM,qBAAqB,CAAC;AAE3D,OAAO,EACL,qBAAqB,EACrB,oBAAoB,EACpB,mBAAmB,GACpB,MAAM,uBAAuB,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"response-builder.d.ts","sourceRoot":"","sources":["../../../src/tools/utils/response-builder.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EACV,oBAAoB,EAEpB,cAAc,EACd,YAAY,EACb,MAAM,uBAAuB,CAAC;AAE/B,wBAAgB,mBAAmB,CACjC,OAAO,EAAE,cAAc,EAAE,GACxB,YAAY,CAAC,oBAAoB,CAAC,CA2BpC"}
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
export function createBatchResponse(results) {
|
|
2
|
+
const summary = {
|
|
3
|
+
total: results.length,
|
|
4
|
+
successful: results.filter((r) => r.success).length,
|
|
5
|
+
failed: results.filter((r) => !r.success).length,
|
|
6
|
+
cached: results.filter((r) => r.cached).length,
|
|
7
|
+
totalContentBlocks: results.reduce((sum, r) => sum + (r.contentBlocks ?? 0), 0),
|
|
8
|
+
};
|
|
9
|
+
const structuredContent = {
|
|
10
|
+
results,
|
|
11
|
+
summary,
|
|
12
|
+
fetchedAt: new Date().toISOString(),
|
|
13
|
+
};
|
|
14
|
+
return {
|
|
15
|
+
content: [
|
|
16
|
+
{
|
|
17
|
+
type: 'text',
|
|
18
|
+
text: JSON.stringify(structuredContent, null, 2),
|
|
19
|
+
},
|
|
20
|
+
],
|
|
21
|
+
structuredContent,
|
|
22
|
+
};
|
|
23
|
+
}
|
|
24
|
+
//# sourceMappingURL=response-builder.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"response-builder.js","sourceRoot":"","sources":["../../../src/tools/utils/response-builder.ts"],"names":[],"mappings":"AAOA,MAAM,UAAU,mBAAmB,CACjC,OAAyB;IAEzB,MAAM,OAAO,GAAiB;QAC5B,KAAK,EAAE,OAAO,CAAC,MAAM;QACrB,UAAU,EAAE,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,MAAM;QACnD,MAAM,EAAE,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,MAAM;QAChD,MAAM,EAAE,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,MAAM;QAC9C,kBAAkB,EAAE,OAAO,CAAC,MAAM,CAChC,CAAC,GAAG,EAAE,CAAC,EAAE,EAAE,CAAC,GAAG,GAAG,CAAC,CAAC,CAAC,aAAa,IAAI,CAAC,CAAC,EACxC,CAAC,CACF;KACF,CAAC;IAEF,MAAM,iBAAiB,GAAyB;QAC9C,OAAO;QACP,OAAO;QACP,SAAS,EAAE,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE;KACpC,CAAC;IAEF,OAAO;QACL,OAAO,EAAE;YACP;gBACE,IAAI,EAAE,MAAe;gBACrB,IAAI,EAAE,IAAI,CAAC,SAAS,CAAC,iBAAiB,EAAE,IAAI,EAAE,CAAC,CAAC;aACjD;SACF;QACD,iBAAiB;KAClB,CAAC;AACJ,CAAC"}
|
|
@@ -1,3 +1,3 @@
|
|
|
1
|
-
import type { ContentBlockUnion, MetadataBlock } from '../types
|
|
1
|
+
import type { ContentBlockUnion, MetadataBlock } from '../config/types.js';
|
|
2
2
|
export declare function toJsonl(blocks: ContentBlockUnion[], metadata?: MetadataBlock): string;
|
|
3
3
|
//# sourceMappingURL=jsonl.transformer.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"jsonl.transformer.d.ts","sourceRoot":"","sources":["../../src/transformers/jsonl.transformer.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"jsonl.transformer.d.ts","sourceRoot":"","sources":["../../src/transformers/jsonl.transformer.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,iBAAiB,EAAE,aAAa,EAAE,MAAM,oBAAoB,CAAC;AA6B3E,wBAAgB,OAAO,CACrB,MAAM,EAAE,iBAAiB,EAAE,EAC3B,QAAQ,CAAC,EAAE,aAAa,GACvB,MAAM,CA0BR"}
|
|
@@ -5,7 +5,8 @@ function truncateBlock(block) {
|
|
|
5
5
|
switch (block.type) {
|
|
6
6
|
case 'paragraph':
|
|
7
7
|
case 'heading':
|
|
8
|
-
case 'code':
|
|
8
|
+
case 'code':
|
|
9
|
+
case 'blockquote': {
|
|
9
10
|
const truncated = truncateText(block.text, maxLength);
|
|
10
11
|
return truncated === block.text ? block : { ...block, text: truncated };
|
|
11
12
|
}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"jsonl.transformer.js","sourceRoot":"","sources":["../../src/transformers/jsonl.transformer.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"jsonl.transformer.js","sourceRoot":"","sources":["../../src/transformers/jsonl.transformer.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,MAAM,EAAE,MAAM,oBAAoB,CAAC;AAG5C,OAAO,EAAE,YAAY,EAAE,MAAM,uBAAuB,CAAC;AAErD,SAAS,aAAa,CAAC,KAAwB;IAC7C,MAAM,SAAS,GAAG,MAAM,CAAC,UAAU,CAAC,cAAc,CAAC;IAEnD,QAAQ,KAAK,CAAC,IAAI,EAAE,CAAC;QACnB,KAAK,WAAW,CAAC;QACjB,KAAK,SAAS,CAAC;QACf,KAAK,MAAM,CAAC;QACZ,KAAK,YAAY,CAAC,CAAC,CAAC;YAClB,MAAM,SAAS,GAAG,YAAY,CAAC,KAAK,CAAC,IAAI,EAAE,SAAS,CAAC,CAAC;YACtD,OAAO,SAAS,KAAK,KAAK,CAAC,IAAI,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE,GAAG,KAAK,EAAE,IAAI,EAAE,SAAS,EAAE,CAAC;QAC1E,CAAC;QACD,KAAK,MAAM,CAAC,CAAC,CAAC;YACZ,MAAM,cAAc,GAAG,KAAK,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE,CAC9C,YAAY,CAAC,IAAI,EAAE,SAAS,CAAC,CAC9B,CAAC;YACF,MAAM,UAAU,GAAG,cAAc,CAAC,IAAI,CACpC,CAAC,IAAI,EAAE,CAAC,EAAE,EAAE,CAAC,IAAI,KAAK,KAAK,CAAC,KAAK,CAAC,CAAC,CAAC,CACrC,CAAC;YACF,OAAO,UAAU,CAAC,CAAC,CAAC,EAAE,GAAG,KAAK,EAAE,KAAK,EAAE,cAAc,EAAE,CAAC,CAAC,CAAC,KAAK,CAAC;QAClE,CAAC;QACD;YACE,OAAO,KAAK,CAAC;IACjB,CAAC;AACH,CAAC;AAED,MAAM,UAAU,OAAO,CACrB,MAA2B,EAC3B,QAAwB;IAExB,MAAM,KAAK,GAAa,EAAE,CAAC;IAE3B,oDAAoD;IACpD,IAAI,QAAQ,EAAE,CAAC;QACb,IAAI,CAAC;YACH,MAAM,OAAO,GAAG;gBACd,IAAI,EAAE,QAAQ,CAAC,IAAI;gBACnB,KAAK,EAAE,QAAQ,CAAC,KAAK;gBACrB,GAAG,EAAE,QAAQ,CAAC,GAAG;aAClB,CAAC;YACF,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,SAAS,CAAC,OAAO,CAAC,CAAC,CAAC;QACtC,CAAC;QAAC,MAAM,CAAC;YACP,wBAAwB;QAC1B,CAAC;IACH,CAAC;IAED,KAAK,MAAM,KAAK,IAAI,MAAM,EAAE,CAAC;QAC3B,IAAI,CAAC;YACH,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,SAAS,CAAC,aAAa,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;QACnD,CAAC;QAAC,MAAM,CAAC;YACP,qCAAqC;QACvC,CAAC;IACH,CAAC;IAED,OAAO,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;AAC1B,CAAC"}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"markdown.transformer.d.ts","sourceRoot":"","sources":["../../src/transformers/markdown.transformer.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"markdown.transformer.d.ts","sourceRoot":"","sources":["../../src/transformers/markdown.transformer.ts"],"names":[],"mappings":"AAEA,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,oBAAoB,CAAC;AAyLxD,wBAAgB,cAAc,CAAC,IAAI,EAAE,MAAM,EAAE,QAAQ,CAAC,EAAE,aAAa,GAAG,MAAM,CAsB7E"}
|
|
@@ -1,4 +1,89 @@
|
|
|
1
1
|
import TurndownService from 'turndown';
|
|
2
|
+
import { detectLanguage } from '../utils/language-detector.js';
|
|
3
|
+
// Patterns for standalone noise lines to remove from markdown
|
|
4
|
+
const NOISE_LINE_PATTERNS = [
|
|
5
|
+
// Timestamps - various formats
|
|
6
|
+
/^\d+\s*(seconds?|minutes?|hours?|days?|weeks?|months?|years?)\s*ago$/i,
|
|
7
|
+
/^(updated|modified|edited|created|published|posted)\s+\d+\s*(seconds?|minutes?|hours?|days?|weeks?|months?|years?)\s*ago$/i,
|
|
8
|
+
/^(just now|recently|today|yesterday)$/i,
|
|
9
|
+
/^(updated|modified|edited|created|published)\s*:?\s*$/i,
|
|
10
|
+
/^last\s+updated\s*:?$/i,
|
|
11
|
+
/^(last\s+)?(updated|modified|edited)\s*:?\s*\d/i,
|
|
12
|
+
// Single letters or panel labels (from splitter examples, etc.)
|
|
13
|
+
/^[A-Z]$/,
|
|
14
|
+
/^Panel\s+[A-Z]$/i,
|
|
15
|
+
/^[A-Z]\s*$/,
|
|
16
|
+
// Button/action labels
|
|
17
|
+
/^(share|copy|like|follow|subscribe|download|print|save|bookmark)$/i,
|
|
18
|
+
/^(copy to clipboard|copied!?|copy code|copy link)$/i,
|
|
19
|
+
/^(click to copy|expand|collapse|show more|show less|load more)$/i,
|
|
20
|
+
/^(view more|read more|see more|see all|view all)$/i,
|
|
21
|
+
/^(try it|run|execute|play|preview|demo|live demo)$/i,
|
|
22
|
+
/^(edit|delete|remove|add|cancel|confirm|submit|reset|clear)$/i,
|
|
23
|
+
// Navigation
|
|
24
|
+
/^(next|previous|prev|back|forward|home|menu|close|open)$/i,
|
|
25
|
+
/^(scroll to top|back to top|top)$/i,
|
|
26
|
+
// Interactive prompts
|
|
27
|
+
/^(drag|click|tap|swipe|hover)\s+(to|the|here)/i,
|
|
28
|
+
/^(drag the|move the|resize the)/i,
|
|
29
|
+
// Empty structural elements
|
|
30
|
+
/^[•·→←↑↓►▼▲◄▶◀■□●○★☆✓✗✔✘×]+$/,
|
|
31
|
+
/^[,;:\-–—]+$/,
|
|
32
|
+
/^\[\d+\]$/,
|
|
33
|
+
/^\(\d+\)$/,
|
|
34
|
+
];
|
|
35
|
+
/**
|
|
36
|
+
* Check if a line is noise that should be removed
|
|
37
|
+
*/
|
|
38
|
+
function isNoiseLine(line) {
|
|
39
|
+
const trimmed = line.trim();
|
|
40
|
+
// Empty lines are fine
|
|
41
|
+
if (!trimmed)
|
|
42
|
+
return false;
|
|
43
|
+
// Don't filter lines inside code blocks, headings, or lists
|
|
44
|
+
if (trimmed.startsWith('#') ||
|
|
45
|
+
trimmed.startsWith('-') ||
|
|
46
|
+
trimmed.startsWith('*') ||
|
|
47
|
+
trimmed.startsWith('`') ||
|
|
48
|
+
trimmed.startsWith('>') ||
|
|
49
|
+
trimmed.startsWith('|')) {
|
|
50
|
+
return false;
|
|
51
|
+
}
|
|
52
|
+
// Check against noise patterns
|
|
53
|
+
for (const pattern of NOISE_LINE_PATTERNS) {
|
|
54
|
+
if (pattern.test(trimmed)) {
|
|
55
|
+
return true;
|
|
56
|
+
}
|
|
57
|
+
}
|
|
58
|
+
return false;
|
|
59
|
+
}
|
|
60
|
+
/**
|
|
61
|
+
* Post-process markdown to remove noise lines
|
|
62
|
+
*/
|
|
63
|
+
function cleanMarkdownContent(markdown) {
|
|
64
|
+
// Split by lines but preserve code blocks
|
|
65
|
+
const lines = markdown.split('\n');
|
|
66
|
+
const cleanedLines = [];
|
|
67
|
+
let inCodeBlock = false;
|
|
68
|
+
for (const line of lines) {
|
|
69
|
+
// Track code block boundaries
|
|
70
|
+
if (line.trim().startsWith('```')) {
|
|
71
|
+
inCodeBlock = !inCodeBlock;
|
|
72
|
+
cleanedLines.push(line);
|
|
73
|
+
continue;
|
|
74
|
+
}
|
|
75
|
+
// Don't filter inside code blocks
|
|
76
|
+
if (inCodeBlock) {
|
|
77
|
+
cleanedLines.push(line);
|
|
78
|
+
continue;
|
|
79
|
+
}
|
|
80
|
+
// Filter noise lines outside code blocks
|
|
81
|
+
if (!isNoiseLine(line)) {
|
|
82
|
+
cleanedLines.push(line);
|
|
83
|
+
}
|
|
84
|
+
}
|
|
85
|
+
return cleanedLines.join('\n');
|
|
86
|
+
}
|
|
2
87
|
const turndown = new TurndownService({
|
|
3
88
|
headingStyle: 'atx',
|
|
4
89
|
codeBlockStyle: 'fenced',
|
|
@@ -10,13 +95,37 @@ turndown.addRule('removeNoise', {
|
|
|
10
95
|
filter: ['script', 'style', 'noscript', 'nav', 'footer', 'aside', 'iframe'],
|
|
11
96
|
replacement: () => '',
|
|
12
97
|
});
|
|
98
|
+
// Enhanced code block handling with language detection
|
|
99
|
+
turndown.addRule('fencedCodeBlockWithLanguage', {
|
|
100
|
+
filter: (node, options) => {
|
|
101
|
+
return (options.codeBlockStyle === 'fenced' &&
|
|
102
|
+
node.nodeName === 'PRE' &&
|
|
103
|
+
node.firstChild !== null &&
|
|
104
|
+
node.firstChild.nodeName === 'CODE');
|
|
105
|
+
},
|
|
106
|
+
replacement: (_content, node) => {
|
|
107
|
+
const codeNode = node.firstChild;
|
|
108
|
+
const code = codeNode.textContent || '';
|
|
109
|
+
// Try to get language from class
|
|
110
|
+
const className = codeNode.getAttribute('class') ?? '';
|
|
111
|
+
const dataLang = codeNode.getAttribute('data-language') ?? '';
|
|
112
|
+
const languageMatch = /language-(\w+)/.exec(className) ??
|
|
113
|
+
/lang-(\w+)/.exec(className) ??
|
|
114
|
+
/highlight-(\w+)/.exec(className) ??
|
|
115
|
+
/^(\w+)$/.exec(dataLang);
|
|
116
|
+
// Use detected language from class, or try to detect from content
|
|
117
|
+
const language = languageMatch?.[1] ?? detectLanguage(code) ?? '';
|
|
118
|
+
return `\n\n\`\`\`${language}\n${code.replace(/\n$/, '')}\n\`\`\`\n\n`;
|
|
119
|
+
},
|
|
120
|
+
});
|
|
13
121
|
// Pre-compiled regex patterns
|
|
14
|
-
const YAML_SPECIAL_CHARS = /[:[\]{}"\n\r'|>&*!?,#]/;
|
|
122
|
+
const YAML_SPECIAL_CHARS = /[:[\]{}"\n\r\t'|>&*!?,#]/;
|
|
15
123
|
const YAML_NUMERIC = /^[\d.]+$/;
|
|
16
124
|
const YAML_RESERVED_WORDS = /^(true|false|null|yes|no|on|off)$/i;
|
|
17
125
|
const ESCAPE_BACKSLASH = /\\/g;
|
|
18
126
|
const ESCAPE_QUOTE = /"/g;
|
|
19
127
|
const ESCAPE_NEWLINE = /\n/g;
|
|
128
|
+
const ESCAPE_TAB = /\t/g;
|
|
20
129
|
const MULTIPLE_NEWLINES = /\n{3,}/g;
|
|
21
130
|
function escapeYamlValue(value) {
|
|
22
131
|
const needsQuoting = YAML_SPECIAL_CHARS.test(value) ||
|
|
@@ -30,7 +139,8 @@ function escapeYamlValue(value) {
|
|
|
30
139
|
return `"${value
|
|
31
140
|
.replace(ESCAPE_BACKSLASH, '\\\\')
|
|
32
141
|
.replace(ESCAPE_QUOTE, '\\"')
|
|
33
|
-
.replace(ESCAPE_NEWLINE, '\\n')
|
|
142
|
+
.replace(ESCAPE_NEWLINE, '\\n')
|
|
143
|
+
.replace(ESCAPE_TAB, '\\t')}"`;
|
|
34
144
|
}
|
|
35
145
|
function createFrontmatter(metadata) {
|
|
36
146
|
const lines = ['---'];
|
|
@@ -49,6 +159,10 @@ export function htmlToMarkdown(html, metadata) {
|
|
|
49
159
|
try {
|
|
50
160
|
content = turndown.turndown(html);
|
|
51
161
|
content = content.replace(MULTIPLE_NEWLINES, '\n\n').trim();
|
|
162
|
+
// Clean up noise lines from the markdown
|
|
163
|
+
content = cleanMarkdownContent(content);
|
|
164
|
+
// Final cleanup of multiple newlines after removing noise
|
|
165
|
+
content = content.replace(MULTIPLE_NEWLINES, '\n\n').trim();
|
|
52
166
|
}
|
|
53
167
|
catch {
|
|
54
168
|
return metadata ? createFrontmatter(metadata) + '\n\n' : '';
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"markdown.transformer.js","sourceRoot":"","sources":["../../src/transformers/markdown.transformer.ts"],"names":[],"mappings":"AAAA,OAAO,eAAe,MAAM,UAAU,CAAC;
|
|
1
|
+
{"version":3,"file":"markdown.transformer.js","sourceRoot":"","sources":["../../src/transformers/markdown.transformer.ts"],"names":[],"mappings":"AAAA,OAAO,eAAe,MAAM,UAAU,CAAC;AAIvC,OAAO,EAAE,cAAc,EAAE,MAAM,+BAA+B,CAAC;AAE/D,8DAA8D;AAC9D,MAAM,mBAAmB,GAAa;IACpC,+BAA+B;IAC/B,uEAAuE;IACvE,4HAA4H;IAC5H,wCAAwC;IACxC,wDAAwD;IACxD,wBAAwB;IACxB,iDAAiD;IAEjD,gEAAgE;IAChE,SAAS;IACT,kBAAkB;IAClB,YAAY;IAEZ,uBAAuB;IACvB,oEAAoE;IACpE,qDAAqD;IACrD,kEAAkE;IAClE,oDAAoD;IACpD,qDAAqD;IACrD,+DAA+D;IAE/D,aAAa;IACb,2DAA2D;IAC3D,oCAAoC;IAEpC,sBAAsB;IACtB,gDAAgD;IAChD,kCAAkC;IAElC,4BAA4B;IAC5B,8BAA8B;IAC9B,cAAc;IACd,WAAW;IACX,WAAW;CACZ,CAAC;AAEF;;GAEG;AACH,SAAS,WAAW,CAAC,IAAY;IAC/B,MAAM,OAAO,GAAG,IAAI,CAAC,IAAI,EAAE,CAAC;IAE5B,uBAAuB;IACvB,IAAI,CAAC,OAAO;QAAE,OAAO,KAAK,CAAC;IAE3B,4DAA4D;IAC5D,IACE,OAAO,CAAC,UAAU,CAAC,GAAG,CAAC;QACvB,OAAO,CAAC,UAAU,CAAC,GAAG,CAAC;QACvB,OAAO,CAAC,UAAU,CAAC,GAAG,CAAC;QACvB,OAAO,CAAC,UAAU,CAAC,GAAG,CAAC;QACvB,OAAO,CAAC,UAAU,CAAC,GAAG,CAAC;QACvB,OAAO,CAAC,UAAU,CAAC,GAAG,CAAC,EACvB,CAAC;QACD,OAAO,KAAK,CAAC;IACf,CAAC;IAED,+BAA+B;IAC/B,KAAK,MAAM,OAAO,IAAI,mBAAmB,EAAE,CAAC;QAC1C,IAAI,OAAO,CAAC,IAAI,CAAC,OAAO,CAAC,EAAE,CAAC;YAC1B,OAAO,IAAI,CAAC;QACd,CAAC;IACH,CAAC;IAED,OAAO,KAAK,CAAC;AACf,CAAC;AAED;;GAEG;AACH,SAAS,oBAAoB,CAAC,QAAgB;IAC5C,0CAA0C;IAC1C,MAAM,KAAK,GAAG,QAAQ,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;IACnC,MAAM,YAAY,GAAa,EAAE,CAAC;IAClC,IAAI,WAAW,GAAG,KAAK,CAAC;IAExB,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,8BAA8B;QAC9B,IAAI,IAAI,CAAC,IAAI,EAAE,CAAC,UAAU,CAAC,KAAK,CAAC,EAAE,CAAC;YAClC,WAAW,GAAG,CAAC,WAAW,CAAC;YAC3B,YAAY,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;YACxB,SAAS;QACX,CAAC;QAED,kCAAkC;QAClC,IAAI,WAAW,EAAE,CAAC;YAChB,YAAY,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;YACxB,SAAS;QACX,CAAC;QAED,yCAAyC;QACzC,IAAI,CAAC,WAAW,CAAC,IAAI,CAAC,EAAE,CAAC;YACvB,YAAY,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QAC1B,CAAC;IACH,CAAC;IAED,OAAO,YAAY,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;AACjC,CAAC;AAED,MAAM,QAAQ,GAAG,IAAI,eAAe,CAAC;IACnC,YAAY,EAAE,KAAK;IACnB,cAAc,EAAE,QAAQ;IACxB,WAAW,EAAE,GAAG;IAChB,gBAAgB,EAAE,GAAG;CACtB,CAAC,CAAC;AAEH,wBAAwB;AACxB,QAAQ,CAAC,OAAO,CAAC,aAAa,EAAE;IAC9B,MAAM,EAAE,CAAC,QAAQ,EAAE,OAAO,EAAE,UAAU,EAAE,KAAK,EAAE,QAAQ,EAAE,OAAO,EAAE,QAAQ,CAAC;IAC3E,WAAW,EAAE,GAAG,EAAE,CAAC,EAAE;CACtB,CAAC,CAAC;AAEH,uDAAuD;AACvD,QAAQ,CAAC,OAAO,CAAC,6BAA6B,EAAE;IAC9C,MAAM,EAAE,CAAC,IAAI,EAAE,OAAO,EAAE,EAAE;QACxB,OAAO,CACL,OAAO,CAAC,cAAc,KAAK,QAAQ;YACnC,IAAI,CAAC,QAAQ,KAAK,KAAK;YACvB,IAAI,CAAC,UAAU,KAAK,IAAI;YACxB,IAAI,CAAC,UAAU,CAAC,QAAQ,KAAK,MAAM,CACpC,CAAC;IACJ,CAAC;IACD,WAAW,EAAE,CAAC,QAAQ,EAAE,IAAI,EAAE,EAAE;QAC9B,MAAM,QAAQ,GAAG,IAAI,CAAC,UAAyB,CAAC;QAChD,MAAM,IAAI,GAAG,QAAQ,CAAC,WAAW,IAAI,EAAE,CAAC;QAExC,iCAAiC;QACjC,MAAM,SAAS,GAAG,QAAQ,CAAC,YAAY,CAAC,OAAO,CAAC,IAAI,EAAE,CAAC;QACvD,MAAM,QAAQ,GAAG,QAAQ,CAAC,YAAY,CAAC,eAAe,CAAC,IAAI,EAAE,CAAC;QAE9D,MAAM,aAAa,GACjB,gBAAgB,CAAC,IAAI,CAAC,SAAS,CAAC;YAChC,YAAY,CAAC,IAAI,CAAC,SAAS,CAAC;YAC5B,iBAAiB,CAAC,IAAI,CAAC,SAAS,CAAC;YACjC,SAAS,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;QAE3B,kEAAkE;QAClE,MAAM,QAAQ,GAAG,aAAa,EAAE,CAAC,CAAC,CAAC,IAAI,cAAc,CAAC,IAAI,CAAC,IAAI,EAAE,CAAC;QAElE,OAAO,aAAa,QAAQ,KAAK,IAAI,CAAC,OAAO,CAAC,KAAK,EAAE,EAAE,CAAC,cAAc,CAAC;IACzE,CAAC;CACF,CAAC,CAAC;AAEH,8BAA8B;AAC9B,MAAM,kBAAkB,GAAG,0BAA0B,CAAC;AACtD,MAAM,YAAY,GAAG,UAAU,CAAC;AAChC,MAAM,mBAAmB,GAAG,oCAAoC,CAAC;AACjE,MAAM,gBAAgB,GAAG,KAAK,CAAC;AAC/B,MAAM,YAAY,GAAG,IAAI,CAAC;AAC1B,MAAM,cAAc,GAAG,KAAK,CAAC;AAC7B,MAAM,UAAU,GAAG,KAAK,CAAC;AACzB,MAAM,iBAAiB,GAAG,SAAS,CAAC;AAEpC,SAAS,eAAe,CAAC,KAAa;IACpC,MAAM,YAAY,GAChB,kBAAkB,CAAC,IAAI,CAAC,KAAK,CAAC;QAC9B,KAAK,CAAC,UAAU,CAAC,GAAG,CAAC;QACrB,KAAK,CAAC,QAAQ,CAAC,GAAG,CAAC;QACnB,KAAK,KAAK,EAAE;QACZ,YAAY,CAAC,IAAI,CAAC,KAAK,CAAC;QACxB,mBAAmB,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;IAElC,IAAI,CAAC,YAAY;QAAE,OAAO,KAAK,CAAC;IAEhC,OAAO,IAAI,KAAK;SACb,OAAO,CAAC,gBAAgB,EAAE,MAAM,CAAC;SACjC,OAAO,CAAC,YAAY,EAAE,KAAK,CAAC;SAC5B,OAAO,CAAC,cAAc,EAAE,KAAK,CAAC;SAC9B,OAAO,CAAC,UAAU,EAAE,KAAK,CAAC,GAAG,CAAC;AACnC,CAAC;AAED,SAAS,iBAAiB,CAAC,QAAuB;IAChD,MAAM,KAAK,GAAG,CAAC,KAAK,CAAC,CAAC;IACtB,IAAI,QAAQ,CAAC,KAAK;QAAE,KAAK,CAAC,IAAI,CAAC,UAAU,eAAe,CAAC,QAAQ,CAAC,KAAK,CAAC,EAAE,CAAC,CAAC;IAC5E,IAAI,QAAQ,CAAC,GAAG;QAAE,KAAK,CAAC,IAAI,CAAC,WAAW,eAAe,CAAC,QAAQ,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;IACzE,KAAK,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;IAClB,OAAO,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;AAC1B,CAAC;AAED,MAAM,UAAU,cAAc,CAAC,IAAY,EAAE,QAAwB;IACnE,IAAI,CAAC,IAAI,IAAI,OAAO,IAAI,KAAK,QAAQ,EAAE,CAAC;QACtC,OAAO,QAAQ,CAAC,CAAC,CAAC,iBAAiB,CAAC,QAAQ,CAAC,GAAG,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC;IAC9D,CAAC;IAED,IAAI,OAAO,GAAG,EAAE,CAAC;IACjB,IAAI,CAAC;QACH,OAAO,GAAG,QAAQ,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC;QAClC,OAAO,GAAG,OAAO,CAAC,OAAO,CAAC,iBAAiB,EAAE,MAAM,CAAC,CAAC,IAAI,EAAE,CAAC;QAC5D,yCAAyC;QACzC,OAAO,GAAG,oBAAoB,CAAC,OAAO,CAAC,CAAC;QACxC,0DAA0D;QAC1D,OAAO,GAAG,OAAO,CAAC,OAAO,CAAC,iBAAiB,EAAE,MAAM,CAAC,CAAC,IAAI,EAAE,CAAC;IAC9D,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,QAAQ,CAAC,CAAC,CAAC,iBAAiB,CAAC,QAAQ,CAAC,GAAG,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC;IAC9D,CAAC;IAED,IAAI,QAAQ,EAAE,CAAC;QACb,OAAO,iBAAiB,CAAC,QAAQ,CAAC,GAAG,MAAM,GAAG,OAAO,CAAC;IACxD,CAAC;IAED,OAAO,OAAO,CAAC;AACjB,CAAC"}
|