@j0hanz/superfetch 1.0.3 → 1.0.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +615 -590
- package/dist/config/types.d.ts +5 -0
- package/dist/config/types.d.ts.map +1 -1
- package/dist/errors/app-error.d.ts +4 -0
- package/dist/errors/app-error.d.ts.map +1 -1
- package/dist/errors/app-error.js +7 -0
- package/dist/errors/app-error.js.map +1 -1
- package/dist/index.js +62 -15
- package/dist/index.js.map +1 -1
- package/dist/middleware/error-handler.d.ts.map +1 -1
- package/dist/middleware/error-handler.js +3 -1
- package/dist/middleware/error-handler.js.map +1 -1
- package/dist/middleware/rate-limiter.d.ts.map +1 -1
- package/dist/middleware/rate-limiter.js +11 -3
- package/dist/middleware/rate-limiter.js.map +1 -1
- package/dist/prompts/index.d.ts.map +1 -1
- package/dist/prompts/index.js +2 -7
- package/dist/prompts/index.js.map +1 -1
- package/dist/resources/cached-content.d.ts +5 -0
- package/dist/resources/cached-content.d.ts.map +1 -0
- package/dist/resources/cached-content.js +93 -0
- package/dist/resources/cached-content.js.map +1 -0
- package/dist/resources/index.d.ts.map +1 -1
- package/dist/resources/index.js +39 -1
- package/dist/resources/index.js.map +1 -1
- package/dist/server.d.ts.map +1 -1
- package/dist/server.js +9 -0
- package/dist/server.js.map +1 -1
- package/dist/services/cache.d.ts +11 -0
- package/dist/services/cache.d.ts.map +1 -1
- package/dist/services/cache.js +63 -6
- package/dist/services/cache.js.map +1 -1
- package/dist/services/card-extractor.d.ts.map +1 -1
- package/dist/services/card-extractor.js +11 -4
- package/dist/services/card-extractor.js.map +1 -1
- package/dist/services/extractor.d.ts +7 -1
- package/dist/services/extractor.d.ts.map +1 -1
- package/dist/services/extractor.js +12 -5
- package/dist/services/extractor.js.map +1 -1
- package/dist/services/fetcher.d.ts +10 -1
- package/dist/services/fetcher.d.ts.map +1 -1
- package/dist/services/fetcher.js +131 -36
- package/dist/services/fetcher.js.map +1 -1
- package/dist/services/parser.d.ts.map +1 -1
- package/dist/services/parser.js +38 -25
- package/dist/services/parser.js.map +1 -1
- package/dist/tools/handlers/fetch-links.tool.d.ts +5 -10
- package/dist/tools/handlers/fetch-links.tool.d.ts.map +1 -1
- package/dist/tools/handlers/fetch-links.tool.js.map +1 -1
- package/dist/tools/handlers/fetch-markdown.tool.d.ts +5 -12
- package/dist/tools/handlers/fetch-markdown.tool.d.ts.map +1 -1
- package/dist/tools/handlers/fetch-markdown.tool.js +1 -2
- package/dist/tools/handlers/fetch-markdown.tool.js.map +1 -1
- package/dist/tools/handlers/fetch-url.tool.d.ts +4 -12
- package/dist/tools/handlers/fetch-url.tool.d.ts.map +1 -1
- package/dist/tools/handlers/fetch-url.tool.js.map +1 -1
- package/dist/tools/handlers/fetch-urls.tool.d.ts +8 -1
- package/dist/tools/handlers/fetch-urls.tool.d.ts.map +1 -1
- package/dist/tools/handlers/fetch-urls.tool.js +52 -15
- package/dist/tools/handlers/fetch-urls.tool.js.map +1 -1
- package/dist/tools/utils/common.js +1 -1
- package/dist/tools/utils/common.js.map +1 -1
- package/dist/tools/utils/fetch-pipeline.d.ts.map +1 -1
- package/dist/tools/utils/fetch-pipeline.js +54 -13
- package/dist/tools/utils/fetch-pipeline.js.map +1 -1
- package/dist/transformers/markdown.transformer.d.ts.map +1 -1
- package/dist/transformers/markdown.transformer.js +7 -27
- package/dist/transformers/markdown.transformer.js.map +1 -1
- package/dist/utils/concurrency.d.ts +5 -1
- package/dist/utils/concurrency.d.ts.map +1 -1
- package/dist/utils/concurrency.js +15 -2
- package/dist/utils/concurrency.js.map +1 -1
- package/dist/utils/content-cleaner.d.ts.map +1 -1
- package/dist/utils/content-cleaner.js +106 -108
- package/dist/utils/content-cleaner.js.map +1 -1
- package/dist/utils/language-detector.d.ts +1 -1
- package/dist/utils/language-detector.d.ts.map +1 -1
- package/dist/utils/sanitizer.js +1 -1
- package/dist/utils/sanitizer.js.map +1 -1
- package/dist/utils/tool-error-handler.d.ts.map +1 -1
- package/dist/utils/tool-error-handler.js +34 -6
- package/dist/utils/tool-error-handler.js.map +1 -1
- package/package.json +81 -80
|
@@ -1,5 +1,12 @@
|
|
|
1
1
|
import type { FetchUrlsInput } from '../../config/types.js';
|
|
2
2
|
export declare const FETCH_URLS_TOOL_NAME = "fetch-urls";
|
|
3
3
|
export declare const FETCH_URLS_TOOL_DESCRIPTION = "Fetches multiple URLs in parallel and converts them to AI-readable format (JSONL or Markdown). Supports concurrency control and continues on individual failures.";
|
|
4
|
-
export declare function fetchUrlsToolHandler(input: FetchUrlsInput): Promise<
|
|
4
|
+
export declare function fetchUrlsToolHandler(input: FetchUrlsInput): Promise<{
|
|
5
|
+
content: {
|
|
6
|
+
type: 'text';
|
|
7
|
+
text: string;
|
|
8
|
+
}[];
|
|
9
|
+
structuredContent?: Record<string, unknown>;
|
|
10
|
+
isError?: boolean;
|
|
11
|
+
}>;
|
|
5
12
|
//# sourceMappingURL=fetch-urls.tool.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"fetch-urls.tool.d.ts","sourceRoot":"","sources":["../../../src/tools/handlers/fetch-urls.tool.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"fetch-urls.tool.d.ts","sourceRoot":"","sources":["../../../src/tools/handlers/fetch-urls.tool.ts"],"names":[],"mappings":"AAEA,OAAO,KAAK,EAEV,cAAc,EAGf,MAAM,uBAAuB,CAAC;AAwB/B,eAAO,MAAM,oBAAoB,eAAe,CAAC;AACjD,eAAO,MAAM,2BAA2B,sKAC6H,CAAC;AA6HtK,wBAAsB,oBAAoB,CAAC,KAAK,EAAE,cAAc,GAAG,OAAO,CAAC;IACzE,OAAO,EAAE;QAAE,IAAI,EAAE,MAAM,CAAC;QAAC,IAAI,EAAE,MAAM,CAAA;KAAE,EAAE,CAAC;IAC1C,iBAAiB,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;IAC5C,OAAO,CAAC,EAAE,OAAO,CAAC;CACnB,CAAC,CAsHD"}
|
|
@@ -1,5 +1,6 @@
|
|
|
1
|
+
import * as cheerio from 'cheerio';
|
|
1
2
|
import * as cache from '../../services/cache.js';
|
|
2
|
-
import { extractContent } from '../../services/extractor.js';
|
|
3
|
+
import { extractContent, extractMetadataWithCheerio, } from '../../services/extractor.js';
|
|
3
4
|
import { fetchUrlWithRetry } from '../../services/fetcher.js';
|
|
4
5
|
import { logDebug, logError, logWarn } from '../../services/logger.js';
|
|
5
6
|
import { parseHtml } from '../../services/parser.js';
|
|
@@ -32,14 +33,36 @@ async function processSingleUrl(url, options) {
|
|
|
32
33
|
}
|
|
33
34
|
}
|
|
34
35
|
const fetchResult = await fetchUrlWithRetry(normalizedUrl);
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
36
|
+
let sourceHtml;
|
|
37
|
+
let title;
|
|
38
|
+
let metadata;
|
|
39
|
+
// Fast path: Skip JSDOM entirely when extractMainContent is false
|
|
40
|
+
if (!options.extractMainContent) {
|
|
41
|
+
sourceHtml = fetchResult.html;
|
|
42
|
+
const $ = cheerio.load(fetchResult.html);
|
|
43
|
+
const extractedMeta = extractMetadataWithCheerio($);
|
|
44
|
+
({ title } = extractedMeta);
|
|
45
|
+
if (options.includeMetadata) {
|
|
46
|
+
metadata = {
|
|
47
|
+
type: 'metadata',
|
|
48
|
+
url: normalizedUrl,
|
|
49
|
+
fetchedAt: new Date().toISOString(),
|
|
50
|
+
title: extractedMeta.title,
|
|
51
|
+
description: extractedMeta.description,
|
|
52
|
+
author: extractedMeta.author,
|
|
53
|
+
};
|
|
54
|
+
}
|
|
55
|
+
}
|
|
56
|
+
else {
|
|
57
|
+
// Slow path: Use JSDOM only when article extraction is needed
|
|
58
|
+
const { article, metadata: extractedMeta } = extractContent(fetchResult.html, normalizedUrl, {
|
|
59
|
+
extractArticle: true,
|
|
60
|
+
});
|
|
61
|
+
const useArticle = shouldUseArticle(true, article);
|
|
62
|
+
metadata = buildMetadata(normalizedUrl, article, extractedMeta, useArticle, options.includeMetadata);
|
|
63
|
+
sourceHtml = useArticle ? article.content : fetchResult.html;
|
|
64
|
+
title = useArticle ? article.title : extractedMeta.title;
|
|
65
|
+
}
|
|
43
66
|
let content;
|
|
44
67
|
let contentBlocks;
|
|
45
68
|
if (options.format === 'markdown') {
|
|
@@ -111,7 +134,22 @@ export async function fetchUrlsToolHandler(input) {
|
|
|
111
134
|
format,
|
|
112
135
|
}));
|
|
113
136
|
// Execute with concurrency control
|
|
114
|
-
const settledResults = await runWithConcurrency(concurrency, tasks
|
|
137
|
+
const settledResults = await runWithConcurrency(concurrency, tasks, {
|
|
138
|
+
onProgress: (completed, total) => {
|
|
139
|
+
logDebug('Batch progress', {
|
|
140
|
+
completed,
|
|
141
|
+
total,
|
|
142
|
+
percentage: Math.round((completed / total) * 100),
|
|
143
|
+
});
|
|
144
|
+
},
|
|
145
|
+
});
|
|
146
|
+
// Helper to safely extract error message from rejected promise
|
|
147
|
+
const getErrorMessage = ({ reason }) => {
|
|
148
|
+
const typedReason = reason;
|
|
149
|
+
return typedReason instanceof Error
|
|
150
|
+
? typedReason.message
|
|
151
|
+
: String(typedReason);
|
|
152
|
+
};
|
|
115
153
|
// Process results
|
|
116
154
|
const results = settledResults.map((result, index) => {
|
|
117
155
|
if (result.status === 'fulfilled') {
|
|
@@ -119,13 +157,11 @@ export async function fetchUrlsToolHandler(input) {
|
|
|
119
157
|
}
|
|
120
158
|
else {
|
|
121
159
|
// Promise rejection (shouldn't happen as processSingleUrl catches errors)
|
|
122
|
-
const reason = result.reason;
|
|
123
|
-
const errorMessage = reason instanceof Error ? reason.message : String(reason);
|
|
124
160
|
return {
|
|
125
161
|
url: validUrls[index] ?? 'unknown',
|
|
126
162
|
success: false,
|
|
127
163
|
cached: false,
|
|
128
|
-
error:
|
|
164
|
+
error: getErrorMessage(result),
|
|
129
165
|
errorCode: 'PROMISE_REJECTED',
|
|
130
166
|
};
|
|
131
167
|
}
|
|
@@ -133,8 +169,9 @@ export async function fetchUrlsToolHandler(input) {
|
|
|
133
169
|
// Check if we should fail fast on errors
|
|
134
170
|
if (!continueOnError) {
|
|
135
171
|
const firstError = results.find((r) => !r.success);
|
|
136
|
-
if (firstError) {
|
|
137
|
-
|
|
172
|
+
if (firstError && !firstError.success) {
|
|
173
|
+
const errorMsg = firstError.error ?? 'Unknown error';
|
|
174
|
+
return createToolErrorResponse(`Batch failed: ${errorMsg}`, firstError.url, firstError.errorCode ?? 'BATCH_ERROR');
|
|
138
175
|
}
|
|
139
176
|
}
|
|
140
177
|
return createBatchResponse(results);
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"fetch-urls.tool.js","sourceRoot":"","sources":["../../../src/tools/handlers/fetch-urls.tool.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"fetch-urls.tool.js","sourceRoot":"","sources":["../../../src/tools/handlers/fetch-urls.tool.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,OAAO,MAAM,SAAS,CAAC;AASnC,OAAO,KAAK,KAAK,MAAM,yBAAyB,CAAC;AACjD,OAAO,EACL,cAAc,EACd,0BAA0B,GAC3B,MAAM,6BAA6B,CAAC;AACrC,OAAO,EAAE,iBAAiB,EAAE,MAAM,2BAA2B,CAAC;AAC9D,OAAO,EAAE,QAAQ,EAAE,QAAQ,EAAE,OAAO,EAAE,MAAM,0BAA0B,CAAC;AACvE,OAAO,EAAE,SAAS,EAAE,MAAM,0BAA0B,CAAC;AAErD,OAAO,EAAE,kBAAkB,EAAE,MAAM,4BAA4B,CAAC;AAChE,OAAO,EAAE,uBAAuB,EAAE,MAAM,mCAAmC,CAAC;AAC5E,OAAO,EAAE,uBAAuB,EAAE,MAAM,8BAA8B,CAAC;AACvE,OAAO,EACL,aAAa,EACb,gBAAgB,EAChB,eAAe,GAChB,MAAM,oBAAoB,CAAC;AAC5B,OAAO,EAAE,mBAAmB,EAAE,MAAM,8BAA8B,CAAC;AAEnE,OAAO,EAAE,OAAO,EAAE,MAAM,yCAAyC,CAAC;AAClE,OAAO,EAAE,cAAc,EAAE,MAAM,4CAA4C,CAAC;AAE5E,MAAM,CAAC,MAAM,oBAAoB,GAAG,YAAY,CAAC;AACjD,MAAM,CAAC,MAAM,2BAA2B,GACtC,mKAAmK,CAAC;AAEtK,MAAM,QAAQ,GAAG,EAAE,CAAC;AACpB,MAAM,mBAAmB,GAAG,CAAC,CAAC;AAS9B,KAAK,UAAU,gBAAgB,CAC7B,GAAW,EACX,OAAuB;IAEvB,IAAI,CAAC;QACH,MAAM,aAAa,GAAG,uBAAuB,CAAC,GAAG,CAAC,CAAC;QACnD,MAAM,cAAc,GAAG,OAAO,CAAC,MAAM,KAAK,UAAU,CAAC,CAAC,CAAC,UAAU,CAAC,CAAC,CAAC,KAAK,CAAC;QAC1E,MAAM,QAAQ,GAAG,KAAK,CAAC,cAAc,CAAC,cAAc,EAAE,aAAa,CAAC,CAAC;QAErE,IAAI,QAAQ,EAAE,CAAC;YACb,MAAM,MAAM,GAAG,KAAK,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAC;YACnC,IAAI,MAAM,EAAE,CAAC;gBACX,QAAQ,CAAC,iBAAiB,EAAE,EAAE,GAAG,EAAE,aAAa,EAAE,CAAC,CAAC;gBACpD,OAAO;oBACL,GAAG,EAAE,aAAa;oBAClB,OAAO,EAAE,IAAI;oBACb,OAAO,EAAE,MAAM,CAAC,OAAO;oBACvB,MAAM,EAAE,IAAI;iBACb,CAAC;YACJ,CAAC;QACH,CAAC;QAED,MAAM,WAAW,GAAG,MAAM,iBAAiB,CAAC,aAAa,CAAC,CAAC;QAE3D,IAAI,UAAkB,CAAC;QACvB,IAAI,KAAyB,CAAC;QAC9B,IAAI,QAAmC,CAAC;QAExC,kEAAkE;QAClE,IAAI,CAAC,OAAO,CAAC,kBAAkB,EAAE,CAAC;YAChC,UAAU,GAAG,WAAW,CAAC,IAAI,CAAC;YAC9B,MAAM,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC,WAAW,CAAC,IAAI,CAAC,CAAC;YACzC,MAAM,aAAa,GAAG,0BAA0B,CAAC,CAAC,CAAC,CAAC;YACpD,CAAC,EAAE,KAAK,EAAE,GAAG,aAAa,CAAC,CAAC;YAE5B,IAAI,OAAO,CAAC,eAAe,EAAE,CAAC;gBAC5B,QAAQ,GAAG;oBACT,IAAI,EAAE,UAAmB;oBACzB,GAAG,EAAE,aAAa;oBAClB,SAAS,EAAE,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE;oBACnC,KAAK,EAAE,aAAa,CAAC,KAAK;oBAC1B,WAAW,EAAE,aAAa,CAAC,WAAW;oBACtC,MAAM,EAAE,aAAa,CAAC,MAAM;iBAC7B,CAAC;YACJ,CAAC;QACH,CAAC;aAAM,CAAC;YACN,8DAA8D;YAC9D,MAAM,EAAE,OAAO,EAAE,QAAQ,EAAE,aAAa,EAAE,GAAG,cAAc,CACzD,WAAW,CAAC,IAAI,EAChB,aAAa,EACb;gBACE,cAAc,EAAE,IAAI;aACrB,CACF,CAAC;YACF,MAAM,UAAU,GAAG,gBAAgB,CAAC,IAAI,EAAE,OAAO,CAAC,CAAC;YACnD,QAAQ,GAAG,aAAa,CACtB,aAAa,EACb,OAAO,EACP,aAAa,EACb,UAAU,EACV,OAAO,CAAC,eAAe,CACxB,CAAC;YACF,UAAU,GAAG,UAAU,CAAC,CAAC,CAAC,OAAO,CAAC,OAAO,CAAC,CAAC,CAAC,WAAW,CAAC,IAAI,CAAC;YAC7D,KAAK,GAAG,UAAU,CAAC,CAAC,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC,CAAC,aAAa,CAAC,KAAK,CAAC;QAC3D,CAAC;QAED,IAAI,OAAe,CAAC;QACpB,IAAI,aAAiC,CAAC;QAEtC,IAAI,OAAO,CAAC,MAAM,KAAK,UAAU,EAAE,CAAC;YAClC,OAAO,GAAG,cAAc,CAAC,UAAU,EAAE,QAAQ,CAAC,CAAC;QACjD,CAAC;aAAM,CAAC;YACN,MAAM,MAAM,GAAG,SAAS,CAAC,UAAU,CAAC,CAAC;YACrC,aAAa,GAAG,MAAM,CAAC,MAAM,CAAC;YAC9B,OAAO,GAAG,OAAO,CAAC,MAAM,EAAE,QAAQ,CAAC,CAAC;QACtC,CAAC;QAED,MAAM,EAAE,OAAO,EAAE,gBAAgB,EAAE,GAAG,eAAe,CACnD,OAAO,EACP,OAAO,CAAC,gBAAgB,CACzB,CAAC;QACF,OAAO,GAAG,gBAAgB,CAAC;QAC3B,IAAI,QAAQ;YAAE,KAAK,CAAC,GAAG,CAAC,QAAQ,EAAE,OAAO,CAAC,CAAC;QAE3C,OAAO;YACL,GAAG,EAAE,aAAa;YAClB,OAAO,EAAE,IAAI;YACb,KAAK;YACL,OAAO;YACP,aAAa;YACb,MAAM,EAAE,KAAK;SACd,CAAC;IACJ,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,MAAM,YAAY,GAChB,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,eAAe,CAAC;QAC3D,MAAM,SAAS,GACb,KAAK,YAAY,KAAK;YACtB,MAAM,IAAI,KAAK;YACf,OAAO,KAAK,CAAC,IAAI,KAAK,QAAQ;YAC5B,CAAC,CAAC,KAAK,CAAC,IAAI;YACZ,CAAC,CAAC,aAAa,CAAC;QAEpB,OAAO,CAAC,6BAA6B,EAAE,EAAE,GAAG,EAAE,KAAK,EAAE,YAAY,EAAE,CAAC,CAAC;QACrE,OAAO;YACL,GAAG;YACH,OAAO,EAAE,KAAK;YACd,MAAM,EAAE,KAAK;YACb,KAAK,EAAE,YAAY;YACnB,SAAS;SACV,CAAC;IACJ,CAAC;AACH,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,oBAAoB,CAAC,KAAqB;IAK9D,IAAI,CAAC;QACH,8EAA8E;QAC9E,IAAI,KAAK,CAAC,IAAI,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YAC5B,OAAO,uBAAuB,CAC5B,8BAA8B,EAC9B,EAAE,EACF,kBAAkB,CACnB,CAAC;QACJ,CAAC;QAED,yBAAyB;QACzB,IAAI,KAAK,CAAC,IAAI,CAAC,MAAM,GAAG,QAAQ,EAAE,CAAC;YACjC,OAAO,uBAAuB,CAC5B,WAAW,QAAQ,yBAAyB,EAC5C,EAAE,EACF,kBAAkB,CACnB,CAAC;QACJ,CAAC;QAED,wBAAwB;QACxB,MAAM,SAAS,GAAG,KAAK,CAAC,IAAI,CAAC,MAAM,CACjC,CAAC,GAAG,EAAE,EAAE,CAAC,OAAO,GAAG,KAAK,QAAQ,IAAI,GAAG,CAAC,IAAI,EAAE,CAAC,MAAM,GAAG,CAAC,CAC1D,CAAC;QAEF,IAAI,SAAS,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YAC3B,OAAO,uBAAuB,CAC5B,wBAAwB,EACxB,EAAE,EACF,kBAAkB,CACnB,CAAC;QACJ,CAAC;QAED,MAAM,WAAW,GAAG,IAAI,CAAC,GAAG,CAC1B,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,KAAK,CAAC,WAAW,IAAI,mBAAmB,CAAC,EACrD,CAAC,CACF,CAAC;QACF,MAAM,eAAe,GAAG,KAAK,CAAC,eAAe,IAAI,IAAI,CAAC;QACtD,MAAM,MAAM,GAAG,KAAK,CAAC,MAAM,IAAI,OAAO,CAAC;QAEvC,QAAQ,CAAC,0BAA0B,EAAE;YACnC,QAAQ,EAAE,SAAS,CAAC,MAAM;YAC1B,WAAW;YACX,MAAM;SACP,CAAC,CAAC;QAEH,4BAA4B;QAC5B,MAAM,KAAK,GAAG,SAAS,CAAC,GAAG,CACzB,CAAC,GAAG,EAAE,EAAE,CAAC,KAAK,IAAI,EAAE,CAClB,gBAAgB,CAAC,GAAG,EAAE;YACpB,kBAAkB,EAAE,KAAK,CAAC,kBAAkB,IAAI,IAAI;YACpD,eAAe,EAAE,KAAK,CAAC,eAAe,IAAI,IAAI;YAC9C,gBAAgB,EAAE,KAAK,CAAC,gBAAgB;YACxC,MAAM;SACP,CAAC,CACL,CAAC;QAEF,mCAAmC;QACnC,MAAM,cAAc,GAAG,MAAM,kBAAkB,CAAC,WAAW,EAAE,KAAK,EAAE;YAClE,UAAU,EAAE,CAAC,SAAS,EAAE,KAAK,EAAE,EAAE;gBAC/B,QAAQ,CAAC,gBAAgB,EAAE;oBACzB,SAAS;oBACT,KAAK;oBACL,UAAU,EAAE,IAAI,CAAC,KAAK,CAAC,CAAC,SAAS,GAAG,KAAK,CAAC,GAAG,GAAG,CAAC;iBAClD,CAAC,CAAC;YACL,CAAC;SACF,CAAC,CAAC;QAEH,+DAA+D;QAC/D,MAAM,eAAe,GAAG,CAAC,EAAE,MAAM,EAAyB,EAAU,EAAE;YACpE,MAAM,WAAW,GAAY,MAAM,CAAC;YACpC,OAAO,WAAW,YAAY,KAAK;gBACjC,CAAC,CAAC,WAAW,CAAC,OAAO;gBACrB,CAAC,CAAC,MAAM,CAAC,WAAW,CAAC,CAAC;QAC1B,CAAC,CAAC;QAEF,kBAAkB;QAClB,MAAM,OAAO,GAAqB,cAAc,CAAC,GAAG,CAAC,CAAC,MAAM,EAAE,KAAK,EAAE,EAAE;YACrE,IAAI,MAAM,CAAC,MAAM,KAAK,WAAW,EAAE,CAAC;gBAClC,OAAO,MAAM,CAAC,KAAK,CAAC;YACtB,CAAC;iBAAM,CAAC;gBACN,0EAA0E;gBAC1E,OAAO;oBACL,GAAG,EAAE,SAAS,CAAC,KAAK,CAAC,IAAI,SAAS;oBAClC,OAAO,EAAE,KAAc;oBACvB,MAAM,EAAE,KAAc;oBACtB,KAAK,EAAE,eAAe,CAAC,MAAM,CAAC;oBAC9B,SAAS,EAAE,kBAAkB;iBAC9B,CAAC;YACJ,CAAC;QACH,CAAC,CAAC,CAAC;QAEH,yCAAyC;QACzC,IAAI,CAAC,eAAe,EAAE,CAAC;YACrB,MAAM,UAAU,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC;YACnD,IAAI,UAAU,IAAI,CAAC,UAAU,CAAC,OAAO,EAAE,CAAC;gBACtC,MAAM,QAAQ,GAAG,UAAU,CAAC,KAAK,IAAI,eAAe,CAAC;gBACrD,OAAO,uBAAuB,CAC5B,iBAAiB,QAAQ,EAAE,EAC3B,UAAU,CAAC,GAAG,EACd,UAAU,CAAC,SAAS,IAAI,aAAa,CACtC,CAAC;YACJ,CAAC;QACH,CAAC;QAED,OAAO,mBAAmB,CAAC,OAAO,CAAC,CAAC;IACtC,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,QAAQ,CACN,uBAAuB,EACvB,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,SAAS,CAC3C,CAAC;QAEF,OAAO,uBAAuB,CAC5B,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,sBAAsB,EAC/D,EAAE,EACF,aAAa,CACd,CAAC;IACJ,CAAC;AACH,CAAC"}
|
|
@@ -28,7 +28,7 @@ export function truncateContent(content, maxLength) {
|
|
|
28
28
|
return { content, truncated: false };
|
|
29
29
|
}
|
|
30
30
|
return {
|
|
31
|
-
content: content.substring(0, maxLength)
|
|
31
|
+
content: `${content.substring(0, maxLength)}\n...[truncated]`,
|
|
32
32
|
truncated: true,
|
|
33
33
|
};
|
|
34
34
|
}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"common.js","sourceRoot":"","sources":["../../../src/tools/utils/common.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,MAAM,EAAE,MAAM,uBAAuB,CAAC;AAO/C,MAAM,UAAU,gBAAgB,CAC9B,kBAA2B,EAC3B,OAAgC;IAEhC,OAAO,CACL,kBAAkB,IAAI,MAAM,CAAC,UAAU,CAAC,kBAAkB,IAAI,CAAC,CAAC,OAAO,CACxE,CAAC;AACJ,CAAC;AAED,MAAM,UAAU,aAAa,CAC3B,GAAW,EACX,OAAgC,EAChC,aAAgC,EAChC,UAAmB,EACnB,eAAwB;IAExB,IAAI,CAAC,eAAe,IAAI,CAAC,MAAM,CAAC,UAAU,CAAC,eAAe;QAAE,OAAO,SAAS,CAAC;IAC7E,MAAM,GAAG,GAAG,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE,CAAC;IACrC,OAAO,UAAU,IAAI,OAAO;QAC1B,CAAC,CAAC;YACE,IAAI,EAAE,UAAU;YAChB,KAAK,EAAE,OAAO,CAAC,KAAK;YACpB,MAAM,EAAE,OAAO,CAAC,MAAM;YACtB,GAAG;YACH,SAAS,EAAE,GAAG;SACf;QACH,CAAC,CAAC;YACE,IAAI,EAAE,UAAU;YAChB,KAAK,EAAE,aAAa,CAAC,KAAK;YAC1B,WAAW,EAAE,aAAa,CAAC,WAAW;YACtC,MAAM,EAAE,aAAa,CAAC,MAAM;YAC5B,GAAG;YACH,SAAS,EAAE,GAAG;SACf,CAAC;AACR,CAAC;AAED,MAAM,UAAU,eAAe,CAC7B,OAAe,EACf,SAAkB;IAElB,IAAI,CAAC,SAAS,IAAI,SAAS,IAAI,CAAC,IAAI,OAAO,CAAC,MAAM,IAAI,SAAS,EAAE,CAAC;QAChE,OAAO,EAAE,OAAO,EAAE,SAAS,EAAE,KAAK,EAAE,CAAC;IACvC,CAAC;IACD,OAAO;QACL,OAAO,EAAE,OAAO,CAAC,SAAS,CAAC,CAAC,EAAE,SAAS,CAAC,
|
|
1
|
+
{"version":3,"file":"common.js","sourceRoot":"","sources":["../../../src/tools/utils/common.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,MAAM,EAAE,MAAM,uBAAuB,CAAC;AAO/C,MAAM,UAAU,gBAAgB,CAC9B,kBAA2B,EAC3B,OAAgC;IAEhC,OAAO,CACL,kBAAkB,IAAI,MAAM,CAAC,UAAU,CAAC,kBAAkB,IAAI,CAAC,CAAC,OAAO,CACxE,CAAC;AACJ,CAAC;AAED,MAAM,UAAU,aAAa,CAC3B,GAAW,EACX,OAAgC,EAChC,aAAgC,EAChC,UAAmB,EACnB,eAAwB;IAExB,IAAI,CAAC,eAAe,IAAI,CAAC,MAAM,CAAC,UAAU,CAAC,eAAe;QAAE,OAAO,SAAS,CAAC;IAC7E,MAAM,GAAG,GAAG,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE,CAAC;IACrC,OAAO,UAAU,IAAI,OAAO;QAC1B,CAAC,CAAC;YACE,IAAI,EAAE,UAAU;YAChB,KAAK,EAAE,OAAO,CAAC,KAAK;YACpB,MAAM,EAAE,OAAO,CAAC,MAAM;YACtB,GAAG;YACH,SAAS,EAAE,GAAG;SACf;QACH,CAAC,CAAC;YACE,IAAI,EAAE,UAAU;YAChB,KAAK,EAAE,aAAa,CAAC,KAAK;YAC1B,WAAW,EAAE,aAAa,CAAC,WAAW;YACtC,MAAM,EAAE,aAAa,CAAC,MAAM;YAC5B,GAAG;YACH,SAAS,EAAE,GAAG;SACf,CAAC;AACR,CAAC;AAED,MAAM,UAAU,eAAe,CAC7B,OAAe,EACf,SAAkB;IAElB,IAAI,CAAC,SAAS,IAAI,SAAS,IAAI,CAAC,IAAI,OAAO,CAAC,MAAM,IAAI,SAAS,EAAE,CAAC;QAChE,OAAO,EAAE,OAAO,EAAE,SAAS,EAAE,KAAK,EAAE,CAAC;IACvC,CAAC;IACD,OAAO;QACL,OAAO,EAAE,GAAG,OAAO,CAAC,SAAS,CAAC,CAAC,EAAE,SAAS,CAAC,kBAAkB;QAC7D,SAAS,EAAE,IAAI;KAChB,CAAC;AACJ,CAAC"}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"fetch-pipeline.d.ts","sourceRoot":"","sources":["../../../src/tools/utils/fetch-pipeline.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EACV,oBAAoB,EACpB,cAAc,EACf,MAAM,uBAAuB,CAAC;
|
|
1
|
+
{"version":3,"file":"fetch-pipeline.d.ts","sourceRoot":"","sources":["../../../src/tools/utils/fetch-pipeline.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EACV,oBAAoB,EACpB,cAAc,EACf,MAAM,uBAAuB,CAAC;AA+B/B,wBAAsB,oBAAoB,CAAC,CAAC,EAC1C,OAAO,EAAE,oBAAoB,CAAC,CAAC,CAAC,GAC/B,OAAO,CAAC,cAAc,CAAC,CAAC,CAAC,CAAC,CAiF5B"}
|
|
@@ -2,10 +2,24 @@ import * as cache from '../../services/cache.js';
|
|
|
2
2
|
import { fetchUrlWithRetry } from '../../services/fetcher.js';
|
|
3
3
|
import { logDebug } from '../../services/logger.js';
|
|
4
4
|
import { validateAndNormalizeUrl } from '../../utils/url-validator.js';
|
|
5
|
+
const pendingRequests = new Map();
|
|
6
|
+
const DEDUPLICATION_TIMEOUT = 60000; // 1 minute TTL
|
|
7
|
+
// Cleanup stale pending requests every 30 seconds to prevent memory leak
|
|
8
|
+
const cleanupInterval = setInterval(() => {
|
|
9
|
+
const now = Date.now();
|
|
10
|
+
for (const [key, value] of pendingRequests.entries()) {
|
|
11
|
+
if (now - value.timestamp > DEDUPLICATION_TIMEOUT) {
|
|
12
|
+
pendingRequests.delete(key);
|
|
13
|
+
}
|
|
14
|
+
}
|
|
15
|
+
}, 30000);
|
|
16
|
+
// Allow Node.js to exit if this is the only active timer
|
|
17
|
+
cleanupInterval.unref();
|
|
5
18
|
export async function executeFetchPipeline(options) {
|
|
6
|
-
const { url, cacheNamespace, customHeaders, retries, transform, serialize = JSON.stringify, deserialize = (cached) => JSON.parse(cached), } = options;
|
|
19
|
+
const { url, cacheNamespace, customHeaders, retries, signal, timeout, transform, serialize = JSON.stringify, deserialize = (cached) => JSON.parse(cached), } = options;
|
|
7
20
|
const normalizedUrl = validateAndNormalizeUrl(url);
|
|
8
21
|
const cacheKey = cache.createCacheKey(cacheNamespace, normalizedUrl);
|
|
22
|
+
// Check cache first
|
|
9
23
|
if (cacheKey) {
|
|
10
24
|
const cached = cache.get(cacheKey);
|
|
11
25
|
if (cached) {
|
|
@@ -19,19 +33,46 @@ export async function executeFetchPipeline(options) {
|
|
|
19
33
|
};
|
|
20
34
|
}
|
|
21
35
|
}
|
|
22
|
-
|
|
23
|
-
const
|
|
24
|
-
const
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
cache.set(cacheKey, serialized);
|
|
36
|
+
// Check for pending request to prevent duplicate fetches
|
|
37
|
+
const dedupeKey = `${cacheNamespace}:${normalizedUrl}`;
|
|
38
|
+
const pending = pendingRequests.get(dedupeKey);
|
|
39
|
+
if (pending) {
|
|
40
|
+
logDebug('Request deduplication hit', { url: normalizedUrl });
|
|
41
|
+
return pending.promise;
|
|
29
42
|
}
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
43
|
+
// Build fetch options
|
|
44
|
+
const fetchOptions = {
|
|
45
|
+
customHeaders,
|
|
46
|
+
signal,
|
|
47
|
+
timeout,
|
|
35
48
|
};
|
|
49
|
+
// Create new request
|
|
50
|
+
const request = (async () => {
|
|
51
|
+
try {
|
|
52
|
+
logDebug('Fetching URL', { url: normalizedUrl, retries });
|
|
53
|
+
const fetchResult = await fetchUrlWithRetry(normalizedUrl, fetchOptions, retries);
|
|
54
|
+
const { html } = fetchResult;
|
|
55
|
+
const data = transform(html, normalizedUrl);
|
|
56
|
+
if (cacheKey) {
|
|
57
|
+
const serialized = serialize(data);
|
|
58
|
+
cache.set(cacheKey, serialized);
|
|
59
|
+
}
|
|
60
|
+
return {
|
|
61
|
+
data,
|
|
62
|
+
fromCache: false,
|
|
63
|
+
url: normalizedUrl,
|
|
64
|
+
fetchedAt: new Date().toISOString(),
|
|
65
|
+
};
|
|
66
|
+
}
|
|
67
|
+
finally {
|
|
68
|
+
// Clean up pending request
|
|
69
|
+
pendingRequests.delete(dedupeKey);
|
|
70
|
+
}
|
|
71
|
+
})();
|
|
72
|
+
pendingRequests.set(dedupeKey, {
|
|
73
|
+
promise: request,
|
|
74
|
+
timestamp: Date.now(),
|
|
75
|
+
});
|
|
76
|
+
return request;
|
|
36
77
|
}
|
|
37
78
|
//# sourceMappingURL=fetch-pipeline.js.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"fetch-pipeline.js","sourceRoot":"","sources":["../../../src/tools/utils/fetch-pipeline.ts"],"names":[],"mappings":"AAKA,OAAO,KAAK,KAAK,MAAM,yBAAyB,CAAC;
|
|
1
|
+
{"version":3,"file":"fetch-pipeline.js","sourceRoot":"","sources":["../../../src/tools/utils/fetch-pipeline.ts"],"names":[],"mappings":"AAKA,OAAO,KAAK,KAAK,MAAM,yBAAyB,CAAC;AAEjD,OAAO,EAAE,iBAAiB,EAAE,MAAM,2BAA2B,CAAC;AAC9D,OAAO,EAAE,QAAQ,EAAE,MAAM,0BAA0B,CAAC;AAEpD,OAAO,EAAE,uBAAuB,EAAE,MAAM,8BAA8B,CAAC;AAQvE,MAAM,eAAe,GAAG,IAAI,GAAG,EAA0B,CAAC;AAC1D,MAAM,qBAAqB,GAAG,KAAK,CAAC,CAAC,eAAe;AAEpD,yEAAyE;AACzE,MAAM,eAAe,GAAG,WAAW,CAAC,GAAG,EAAE;IACvC,MAAM,GAAG,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;IACvB,KAAK,MAAM,CAAC,GAAG,EAAE,KAAK,CAAC,IAAI,eAAe,CAAC,OAAO,EAAE,EAAE,CAAC;QACrD,IAAI,GAAG,GAAG,KAAK,CAAC,SAAS,GAAG,qBAAqB,EAAE,CAAC;YAClD,eAAe,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC;QAC9B,CAAC;IACH,CAAC;AACH,CAAC,EAAE,KAAK,CAAC,CAAC;AAEV,yDAAyD;AACzD,eAAe,CAAC,KAAK,EAAE,CAAC;AAExB,MAAM,CAAC,KAAK,UAAU,oBAAoB,CACxC,OAAgC;IAEhC,MAAM,EACJ,GAAG,EACH,cAAc,EACd,aAAa,EACb,OAAO,EACP,MAAM,EACN,OAAO,EACP,SAAS,EACT,SAAS,GAAG,IAAI,CAAC,SAAS,EAC1B,WAAW,GAAG,CAAC,MAAc,EAAE,EAAE,CAAC,IAAI,CAAC,KAAK,CAAC,MAAM,CAAM,GAC1D,GAAG,OAAO,CAAC;IAEZ,MAAM,aAAa,GAAG,uBAAuB,CAAC,GAAG,CAAC,CAAC;IACnD,MAAM,QAAQ,GAAG,KAAK,CAAC,cAAc,CAAC,cAAc,EAAE,aAAa,CAAC,CAAC;IAErE,oBAAoB;IACpB,IAAI,QAAQ,EAAE,CAAC;QACb,MAAM,MAAM,GAAG,KAAK,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAC;QACnC,IAAI,MAAM,EAAE,CAAC;YACX,QAAQ,CAAC,WAAW,EAAE,EAAE,SAAS,EAAE,cAAc,EAAE,GAAG,EAAE,aAAa,EAAE,CAAC,CAAC;YACzE,MAAM,IAAI,GAAG,WAAW,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC;YAEzC,OAAO;gBACL,IAAI;gBACJ,SAAS,EAAE,IAAI;gBACf,GAAG,EAAE,aAAa;gBAClB,SAAS,EAAE,MAAM,CAAC,SAAS;aAC5B,CAAC;QACJ,CAAC;IACH,CAAC;IAED,yDAAyD;IACzD,MAAM,SAAS,GAAG,GAAG,cAAc,IAAI,aAAa,EAAE,CAAC;IACvD,MAAM,OAAO,GAAG,eAAe,CAAC,GAAG,CAAC,SAAS,CAAC,CAAC;IAC/C,IAAI,OAAO,EAAE,CAAC;QACZ,QAAQ,CAAC,2BAA2B,EAAE,EAAE,GAAG,EAAE,aAAa,EAAE,CAAC,CAAC;QAC9D,OAAO,OAAO,CAAC,OAAqC,CAAC;IACvD,CAAC;IAED,sBAAsB;IACtB,MAAM,YAAY,GAAiB;QACjC,aAAa;QACb,MAAM;QACN,OAAO;KACR,CAAC;IAEF,qBAAqB;IACrB,MAAM,OAAO,GAAG,CAAC,KAAK,IAAI,EAAE;QAC1B,IAAI,CAAC;YACH,QAAQ,CAAC,cAAc,EAAE,EAAE,GAAG,EAAE,aAAa,EAAE,OAAO,EAAE,CAAC,CAAC;YAC1D,MAAM,WAAW,GAAG,MAAM,iBAAiB,CACzC,aAAa,EACb,YAAY,EACZ,OAAO,CACR,CAAC;YACF,MAAM,EAAE,IAAI,EAAE,GAAG,WAAW,CAAC;YAC7B,MAAM,IAAI,GAAG,SAAS,CAAC,IAAI,EAAE,aAAa,CAAC,CAAC;YAE5C,IAAI,QAAQ,EAAE,CAAC;gBACb,MAAM,UAAU,GAAG,SAAS,CAAC,IAAI,CAAC,CAAC;gBACnC,KAAK,CAAC,GAAG,CAAC,QAAQ,EAAE,UAAU,CAAC,CAAC;YAClC,CAAC;YAED,OAAO;gBACL,IAAI;gBACJ,SAAS,EAAE,KAAK;gBAChB,GAAG,EAAE,aAAa;gBAClB,SAAS,EAAE,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE;aACpC,CAAC;QACJ,CAAC;gBAAS,CAAC;YACT,2BAA2B;YAC3B,eAAe,CAAC,MAAM,CAAC,SAAS,CAAC,CAAC;QACpC,CAAC;IACH,CAAC,CAAC,EAAE,CAAC;IAEL,eAAe,CAAC,GAAG,CAAC,SAAS,EAAE;QAC7B,OAAO,EAAE,OAA2C;QACpD,SAAS,EAAE,IAAI,CAAC,GAAG,EAAE;KACtB,CAAC,CAAC;IACH,OAAO,OAAO,CAAC;AACjB,CAAC"}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"markdown.transformer.d.ts","sourceRoot":"","sources":["../../src/transformers/markdown.transformer.ts"],"names":[],"mappings":"AAEA,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,oBAAoB,CAAC;
|
|
1
|
+
{"version":3,"file":"markdown.transformer.d.ts","sourceRoot":"","sources":["../../src/transformers/markdown.transformer.ts"],"names":[],"mappings":"AAEA,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,oBAAoB,CAAC;AAiKxD,wBAAgB,cAAc,CAAC,IAAI,EAAE,MAAM,EAAE,QAAQ,CAAC,EAAE,aAAa,GAAG,MAAM,CAsB7E"}
|
|
@@ -1,32 +1,12 @@
|
|
|
1
1
|
import TurndownService from 'turndown';
|
|
2
2
|
import { detectLanguage } from '../utils/language-detector.js';
|
|
3
|
-
//
|
|
3
|
+
// Markdown-specific noise patterns (minimal set - content-cleaner.ts handles most filtering)
|
|
4
|
+
// Only patterns that commonly appear as standalone lines in markdown output
|
|
4
5
|
const NOISE_LINE_PATTERNS = [
|
|
5
|
-
//
|
|
6
|
-
/^\d+\s*(seconds?|minutes?|hours?|days?|weeks?|months?|years?)\s*ago$/i,
|
|
7
|
-
/^(updated|modified|edited|created|published|posted)\s+\d+\s*(seconds?|minutes?|hours?|days?|weeks?|months?|years?)\s*ago$/i,
|
|
8
|
-
/^(just now|recently|today|yesterday)$/i,
|
|
9
|
-
/^(updated|modified|edited|created|published)\s*:?\s*$/i,
|
|
10
|
-
/^last\s+updated\s*:?$/i,
|
|
11
|
-
/^(last\s+)?(updated|modified|edited)\s*:?\s*\d/i,
|
|
12
|
-
// Single letters or panel labels (from splitter examples, etc.)
|
|
6
|
+
// Single letters or panel labels (common in code examples)
|
|
13
7
|
/^[A-Z]$/,
|
|
14
8
|
/^Panel\s+[A-Z]$/i,
|
|
15
|
-
|
|
16
|
-
// Button/action labels
|
|
17
|
-
/^(share|copy|like|follow|subscribe|download|print|save|bookmark)$/i,
|
|
18
|
-
/^(copy to clipboard|copied!?|copy code|copy link)$/i,
|
|
19
|
-
/^(click to copy|expand|collapse|show more|show less|load more)$/i,
|
|
20
|
-
/^(view more|read more|see more|see all|view all)$/i,
|
|
21
|
-
/^(try it|run|execute|play|preview|demo|live demo)$/i,
|
|
22
|
-
/^(edit|delete|remove|add|cancel|confirm|submit|reset|clear)$/i,
|
|
23
|
-
// Navigation
|
|
24
|
-
/^(next|previous|prev|back|forward|home|menu|close|open)$/i,
|
|
25
|
-
/^(scroll to top|back to top|top)$/i,
|
|
26
|
-
// Interactive prompts
|
|
27
|
-
/^(drag|click|tap|swipe|hover)\s+(to|the|here)/i,
|
|
28
|
-
/^(drag the|move the|resize the)/i,
|
|
29
|
-
// Empty structural elements
|
|
9
|
+
// Empty structural elements that survive HTML->Markdown conversion
|
|
30
10
|
/^[•·→←↑↓►▼▲◄▶◀■□●○★☆✓✗✔✘×]+$/,
|
|
31
11
|
/^[,;:\-–—]+$/,
|
|
32
12
|
/^\[\d+\]$/,
|
|
@@ -153,7 +133,7 @@ function createFrontmatter(metadata) {
|
|
|
153
133
|
}
|
|
154
134
|
export function htmlToMarkdown(html, metadata) {
|
|
155
135
|
if (!html || typeof html !== 'string') {
|
|
156
|
-
return metadata ? createFrontmatter(metadata)
|
|
136
|
+
return metadata ? `${createFrontmatter(metadata)}\n\n` : '';
|
|
157
137
|
}
|
|
158
138
|
let content = '';
|
|
159
139
|
try {
|
|
@@ -165,10 +145,10 @@ export function htmlToMarkdown(html, metadata) {
|
|
|
165
145
|
content = content.replace(MULTIPLE_NEWLINES, '\n\n').trim();
|
|
166
146
|
}
|
|
167
147
|
catch {
|
|
168
|
-
return metadata ? createFrontmatter(metadata)
|
|
148
|
+
return metadata ? `${createFrontmatter(metadata)}\n\n` : '';
|
|
169
149
|
}
|
|
170
150
|
if (metadata) {
|
|
171
|
-
return createFrontmatter(metadata)
|
|
151
|
+
return `${createFrontmatter(metadata)}\n\n${content}`;
|
|
172
152
|
}
|
|
173
153
|
return content;
|
|
174
154
|
}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"markdown.transformer.js","sourceRoot":"","sources":["../../src/transformers/markdown.transformer.ts"],"names":[],"mappings":"AAAA,OAAO,eAAe,MAAM,UAAU,CAAC;AAIvC,OAAO,EAAE,cAAc,EAAE,MAAM,+BAA+B,CAAC;AAE/D,
|
|
1
|
+
{"version":3,"file":"markdown.transformer.js","sourceRoot":"","sources":["../../src/transformers/markdown.transformer.ts"],"names":[],"mappings":"AAAA,OAAO,eAAe,MAAM,UAAU,CAAC;AAIvC,OAAO,EAAE,cAAc,EAAE,MAAM,+BAA+B,CAAC;AAE/D,6FAA6F;AAC7F,4EAA4E;AAC5E,MAAM,mBAAmB,GAAsB;IAC7C,2DAA2D;IAC3D,SAAS;IACT,kBAAkB;IAElB,mEAAmE;IACnE,8BAA8B;IAC9B,cAAc;IACd,WAAW;IACX,WAAW;CACH,CAAC;AAEX;;GAEG;AACH,SAAS,WAAW,CAAC,IAAY;IAC/B,MAAM,OAAO,GAAG,IAAI,CAAC,IAAI,EAAE,CAAC;IAE5B,uBAAuB;IACvB,IAAI,CAAC,OAAO;QAAE,OAAO,KAAK,CAAC;IAE3B,4DAA4D;IAC5D,IACE,OAAO,CAAC,UAAU,CAAC,GAAG,CAAC;QACvB,OAAO,CAAC,UAAU,CAAC,GAAG,CAAC;QACvB,OAAO,CAAC,UAAU,CAAC,GAAG,CAAC;QACvB,OAAO,CAAC,UAAU,CAAC,GAAG,CAAC;QACvB,OAAO,CAAC,UAAU,CAAC,GAAG,CAAC;QACvB,OAAO,CAAC,UAAU,CAAC,GAAG,CAAC,EACvB,CAAC;QACD,OAAO,KAAK,CAAC;IACf,CAAC;IAED,+BAA+B;IAC/B,KAAK,MAAM,OAAO,IAAI,mBAAmB,EAAE,CAAC;QAC1C,IAAI,OAAO,CAAC,IAAI,CAAC,OAAO,CAAC,EAAE,CAAC;YAC1B,OAAO,IAAI,CAAC;QACd,CAAC;IACH,CAAC;IAED,OAAO,KAAK,CAAC;AACf,CAAC;AAED;;GAEG;AACH,SAAS,oBAAoB,CAAC,QAAgB;IAC5C,0CAA0C;IAC1C,MAAM,KAAK,GAAG,QAAQ,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;IACnC,MAAM,YAAY,GAAa,EAAE,CAAC;IAClC,IAAI,WAAW,GAAG,KAAK,CAAC;IAExB,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,8BAA8B;QAC9B,IAAI,IAAI,CAAC,IAAI,EAAE,CAAC,UAAU,CAAC,KAAK,CAAC,EAAE,CAAC;YAClC,WAAW,GAAG,CAAC,WAAW,CAAC;YAC3B,YAAY,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;YACxB,SAAS;QACX,CAAC;QAED,kCAAkC;QAClC,IAAI,WAAW,EAAE,CAAC;YAChB,YAAY,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;YACxB,SAAS;QACX,CAAC;QAED,yCAAyC;QACzC,IAAI,CAAC,WAAW,CAAC,IAAI,CAAC,EAAE,CAAC;YACvB,YAAY,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QAC1B,CAAC;IACH,CAAC;IAED,OAAO,YAAY,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;AACjC,CAAC;AAED,MAAM,QAAQ,GAAG,IAAI,eAAe,CAAC;IACnC,YAAY,EAAE,KAAK;IACnB,cAAc,EAAE,QAAQ;IACxB,WAAW,EAAE,GAAG;IAChB,gBAAgB,EAAE,GAAG;CACtB,CAAC,CAAC;AAEH,wBAAwB;AACxB,QAAQ,CAAC,OAAO,CAAC,aAAa,EAAE;IAC9B,MAAM,EAAE,CAAC,QAAQ,EAAE,OAAO,EAAE,UAAU,EAAE,KAAK,EAAE,QAAQ,EAAE,OAAO,EAAE,QAAQ,CAAC;IAC3E,WAAW,EAAE,GAAG,EAAE,CAAC,EAAE;CACtB,CAAC,CAAC;AAEH,uDAAuD;AACvD,QAAQ,CAAC,OAAO,CAAC,6BAA6B,EAAE;IAC9C,MAAM,EAAE,CAAC,IAAI,EAAE,OAAO,EAAE,EAAE;QACxB,OAAO,CACL,OAAO,CAAC,cAAc,KAAK,QAAQ;YACnC,IAAI,CAAC,QAAQ,KAAK,KAAK;YACvB,IAAI,CAAC,UAAU,KAAK,IAAI;YACxB,IAAI,CAAC,UAAU,CAAC,QAAQ,KAAK,MAAM,CACpC,CAAC;IACJ,CAAC;IACD,WAAW,EAAE,CAAC,QAAQ,EAAE,IAAI,EAAE,EAAE;QAC9B,MAAM,QAAQ,GAAG,IAAI,CAAC,UAAyB,CAAC;QAChD,MAAM,IAAI,GAAG,QAAQ,CAAC,WAAW,IAAI,EAAE,CAAC;QAExC,iCAAiC;QACjC,MAAM,SAAS,GAAG,QAAQ,CAAC,YAAY,CAAC,OAAO,CAAC,IAAI,EAAE,CAAC;QACvD,MAAM,QAAQ,GAAG,QAAQ,CAAC,YAAY,CAAC,eAAe,CAAC,IAAI,EAAE,CAAC;QAE9D,MAAM,aAAa,GACjB,gBAAgB,CAAC,IAAI,CAAC,SAAS,CAAC;YAChC,YAAY,CAAC,IAAI,CAAC,SAAS,CAAC;YAC5B,iBAAiB,CAAC,IAAI,CAAC,SAAS,CAAC;YACjC,SAAS,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;QAE3B,kEAAkE;QAClE,MAAM,QAAQ,GAAG,aAAa,EAAE,CAAC,CAAC,CAAC,IAAI,cAAc,CAAC,IAAI,CAAC,IAAI,EAAE,CAAC;QAElE,OAAO,aAAa,QAAQ,KAAK,IAAI,CAAC,OAAO,CAAC,KAAK,EAAE,EAAE,CAAC,cAAc,CAAC;IACzE,CAAC;CACF,CAAC,CAAC;AAEH,8BAA8B;AAC9B,MAAM,kBAAkB,GAAG,0BAA0B,CAAC;AACtD,MAAM,YAAY,GAAG,UAAU,CAAC;AAChC,MAAM,mBAAmB,GAAG,oCAAoC,CAAC;AACjE,MAAM,gBAAgB,GAAG,KAAK,CAAC;AAC/B,MAAM,YAAY,GAAG,IAAI,CAAC;AAC1B,MAAM,cAAc,GAAG,KAAK,CAAC;AAC7B,MAAM,UAAU,GAAG,KAAK,CAAC;AACzB,MAAM,iBAAiB,GAAG,SAAS,CAAC;AAEpC,SAAS,eAAe,CAAC,KAAa;IACpC,MAAM,YAAY,GAChB,kBAAkB,CAAC,IAAI,CAAC,KAAK,CAAC;QAC9B,KAAK,CAAC,UAAU,CAAC,GAAG,CAAC;QACrB,KAAK,CAAC,QAAQ,CAAC,GAAG,CAAC;QACnB,KAAK,KAAK,EAAE;QACZ,YAAY,CAAC,IAAI,CAAC,KAAK,CAAC;QACxB,mBAAmB,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;IAElC,IAAI,CAAC,YAAY;QAAE,OAAO,KAAK,CAAC;IAEhC,OAAO,IAAI,KAAK;SACb,OAAO,CAAC,gBAAgB,EAAE,MAAM,CAAC;SACjC,OAAO,CAAC,YAAY,EAAE,KAAK,CAAC;SAC5B,OAAO,CAAC,cAAc,EAAE,KAAK,CAAC;SAC9B,OAAO,CAAC,UAAU,EAAE,KAAK,CAAC,GAAG,CAAC;AACnC,CAAC;AAED,SAAS,iBAAiB,CAAC,QAAuB;IAChD,MAAM,KAAK,GAAG,CAAC,KAAK,CAAC,CAAC;IACtB,IAAI,QAAQ,CAAC,KAAK;QAAE,KAAK,CAAC,IAAI,CAAC,UAAU,eAAe,CAAC,QAAQ,CAAC,KAAK,CAAC,EAAE,CAAC,CAAC;IAC5E,IAAI,QAAQ,CAAC,GAAG;QAAE,KAAK,CAAC,IAAI,CAAC,WAAW,eAAe,CAAC,QAAQ,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;IACzE,KAAK,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;IAClB,OAAO,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;AAC1B,CAAC;AAED,MAAM,UAAU,cAAc,CAAC,IAAY,EAAE,QAAwB;IACnE,IAAI,CAAC,IAAI,IAAI,OAAO,IAAI,KAAK,QAAQ,EAAE,CAAC;QACtC,OAAO,QAAQ,CAAC,CAAC,CAAC,GAAG,iBAAiB,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC;IAC9D,CAAC;IAED,IAAI,OAAO,GAAG,EAAE,CAAC;IACjB,IAAI,CAAC;QACH,OAAO,GAAG,QAAQ,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC;QAClC,OAAO,GAAG,OAAO,CAAC,OAAO,CAAC,iBAAiB,EAAE,MAAM,CAAC,CAAC,IAAI,EAAE,CAAC;QAC5D,yCAAyC;QACzC,OAAO,GAAG,oBAAoB,CAAC,OAAO,CAAC,CAAC;QACxC,0DAA0D;QAC1D,OAAO,GAAG,OAAO,CAAC,OAAO,CAAC,iBAAiB,EAAE,MAAM,CAAC,CAAC,IAAI,EAAE,CAAC;IAC9D,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,QAAQ,CAAC,CAAC,CAAC,GAAG,iBAAiB,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC;IAC9D,CAAC;IAED,IAAI,QAAQ,EAAE,CAAC;QACb,OAAO,GAAG,iBAAiB,CAAC,QAAQ,CAAC,OAAO,OAAO,EAAE,CAAC;IACxD,CAAC;IAED,OAAO,OAAO,CAAC;AACjB,CAAC"}
|
|
@@ -1,2 +1,6 @@
|
|
|
1
|
-
|
|
1
|
+
interface ConcurrencyOptions {
|
|
2
|
+
onProgress?: (completed: number, total: number) => void;
|
|
3
|
+
}
|
|
4
|
+
export declare function runWithConcurrency<T>(limit: number, tasks: (() => Promise<T>)[], options?: ConcurrencyOptions): Promise<PromiseSettledResult<T>[]>;
|
|
5
|
+
export {};
|
|
2
6
|
//# sourceMappingURL=concurrency.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"concurrency.d.ts","sourceRoot":"","sources":["../../src/utils/concurrency.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"concurrency.d.ts","sourceRoot":"","sources":["../../src/utils/concurrency.ts"],"names":[],"mappings":"AAEA,UAAU,kBAAkB;IAC1B,UAAU,CAAC,EAAE,CAAC,SAAS,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,KAAK,IAAI,CAAC;CACzD;AAsBD,wBAAsB,kBAAkB,CAAC,CAAC,EACxC,KAAK,EAAE,MAAM,EACb,KAAK,EAAE,CAAC,MAAM,OAAO,CAAC,CAAC,CAAC,CAAC,EAAE,EAC3B,OAAO,CAAC,EAAE,kBAAkB,GAC3B,OAAO,CAAC,oBAAoB,CAAC,CAAC,CAAC,EAAE,CAAC,CAiBpC"}
|
|
@@ -18,8 +18,21 @@ function createConcurrencyLimiter(limit) {
|
|
|
18
18
|
}
|
|
19
19
|
};
|
|
20
20
|
}
|
|
21
|
-
export async function runWithConcurrency(limit, tasks) {
|
|
21
|
+
export async function runWithConcurrency(limit, tasks, options) {
|
|
22
22
|
const limiter = createConcurrencyLimiter(limit);
|
|
23
|
-
|
|
23
|
+
const total = tasks.length;
|
|
24
|
+
let completed = 0;
|
|
25
|
+
const wrappedTasks = tasks.map((task) => async () => {
|
|
26
|
+
try {
|
|
27
|
+
return await limiter(task);
|
|
28
|
+
}
|
|
29
|
+
finally {
|
|
30
|
+
completed++;
|
|
31
|
+
if (options?.onProgress) {
|
|
32
|
+
options.onProgress(completed, total);
|
|
33
|
+
}
|
|
34
|
+
}
|
|
35
|
+
});
|
|
36
|
+
return Promise.allSettled(wrappedTasks.map(async (task) => task()));
|
|
24
37
|
}
|
|
25
38
|
//# sourceMappingURL=concurrency.js.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"concurrency.js","sourceRoot":"","sources":["../../src/utils/concurrency.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"concurrency.js","sourceRoot":"","sources":["../../src/utils/concurrency.ts"],"names":[],"mappings":"AAMA,SAAS,wBAAwB,CAAC,KAAa;IAC7C,MAAM,cAAc,GAAG,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,KAAK,CAAC,EAAE,EAAE,CAAC,CAAC;IACxD,IAAI,MAAM,GAAG,CAAC,CAAC;IACf,MAAM,KAAK,GAAmB,EAAE,CAAC;IAEjC,OAAO,KAAK,EAAK,EAAoB,EAAc,EAAE;QACnD,OAAO,MAAM,IAAI,cAAc,EAAE,CAAC;YAChC,MAAM,IAAI,OAAO,CAAO,CAAC,OAAO,EAAE,EAAE,CAAC,KAAK,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC,CAAC;QAC5D,CAAC;QAED,MAAM,EAAE,CAAC;QACT,IAAI,CAAC;YACH,OAAO,MAAM,EAAE,EAAE,CAAC;QACpB,CAAC;gBAAS,CAAC;YACT,MAAM,EAAE,CAAC;YACT,MAAM,IAAI,GAAG,KAAK,CAAC,KAAK,EAAE,CAAC;YAC3B,IAAI,IAAI;gBAAE,IAAI,EAAE,CAAC;QACnB,CAAC;IACH,CAAC,CAAC;AACJ,CAAC;AACD,MAAM,CAAC,KAAK,UAAU,kBAAkB,CACtC,KAAa,EACb,KAA2B,EAC3B,OAA4B;IAE5B,MAAM,OAAO,GAAG,wBAAwB,CAAC,KAAK,CAAC,CAAC;IAChD,MAAM,KAAK,GAAG,KAAK,CAAC,MAAM,CAAC;IAC3B,IAAI,SAAS,GAAG,CAAC,CAAC;IAElB,MAAM,YAAY,GAAG,KAAK,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,KAAK,IAAI,EAAE;QAClD,IAAI,CAAC;YACH,OAAO,MAAM,OAAO,CAAC,IAAI,CAAC,CAAC;QAC7B,CAAC;gBAAS,CAAC;YACT,SAAS,EAAE,CAAC;YACZ,IAAI,OAAO,EAAE,UAAU,EAAE,CAAC;gBACxB,OAAO,CAAC,UAAU,CAAC,SAAS,EAAE,KAAK,CAAC,CAAC;YACvC,CAAC;QACH,CAAC;IACH,CAAC,CAAC,CAAC;IAEH,OAAO,OAAO,CAAC,UAAU,CAAC,YAAY,CAAC,GAAG,CAAC,KAAK,EAAE,IAAI,EAAE,EAAE,CAAC,IAAI,EAAE,CAAC,CAAC,CAAC;AACtE,CAAC"}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"content-cleaner.d.ts","sourceRoot":"","sources":["../../src/utils/content-cleaner.ts"],"names":[],"mappings":"AAAA;;;GAGG;
|
|
1
|
+
{"version":3,"file":"content-cleaner.d.ts","sourceRoot":"","sources":["../../src/utils/content-cleaner.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAsKH;;GAEG;AACH,wBAAgB,cAAc,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,GAAG,IAAI,CAsB1D;AAED;;GAEG;AACH,wBAAgB,YAAY,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,GAAG,IAAI,CAuBxD;AAED;;GAEG;AACH,wBAAgB,cAAc,CAAC,KAAK,EAAE,MAAM,EAAE,GAAG,MAAM,EAAE,CAQxD;AAED;;GAEG;AACH,wBAAgB,cAAc,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,GAAG,IAAI,CAc1D;AAED;;;;GAIG;AACH,wBAAgB,kBAAkB,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,CAEvD;AAED;;;GAGG;AACH,wBAAgB,sBAAsB,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,CAsB3D"}
|