@j0hanz/superfetch 1.0.3 → 1.0.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +615 -590
- package/dist/config/index.d.ts +5 -0
- package/dist/config/index.d.ts.map +1 -1
- package/dist/config/index.js +5 -0
- package/dist/config/index.js.map +1 -1
- package/dist/config/types.d.ts +5 -0
- package/dist/config/types.d.ts.map +1 -1
- package/dist/errors/app-error.d.ts +4 -0
- package/dist/errors/app-error.d.ts.map +1 -1
- package/dist/errors/app-error.js +7 -0
- package/dist/errors/app-error.js.map +1 -1
- package/dist/index.js +94 -17
- package/dist/index.js.map +1 -1
- package/dist/middleware/error-handler.d.ts.map +1 -1
- package/dist/middleware/error-handler.js +4 -2
- package/dist/middleware/error-handler.js.map +1 -1
- package/dist/middleware/rate-limiter.d.ts.map +1 -1
- package/dist/middleware/rate-limiter.js +46 -13
- package/dist/middleware/rate-limiter.js.map +1 -1
- package/dist/prompts/index.d.ts.map +1 -1
- package/dist/prompts/index.js +2 -7
- package/dist/prompts/index.js.map +1 -1
- package/dist/resources/cached-content.d.ts +4 -0
- package/dist/resources/cached-content.d.ts.map +1 -0
- package/dist/resources/cached-content.js +68 -0
- package/dist/resources/cached-content.js.map +1 -0
- package/dist/resources/index.d.ts.map +1 -1
- package/dist/resources/index.js +39 -1
- package/dist/resources/index.js.map +1 -1
- package/dist/server.d.ts.map +1 -1
- package/dist/server.js +10 -0
- package/dist/server.js.map +1 -1
- package/dist/services/cache.d.ts +11 -0
- package/dist/services/cache.d.ts.map +1 -1
- package/dist/services/cache.js +72 -8
- package/dist/services/cache.js.map +1 -1
- package/dist/services/card-extractor.d.ts +0 -4
- package/dist/services/card-extractor.d.ts.map +1 -1
- package/dist/services/card-extractor.js +17 -5
- package/dist/services/card-extractor.js.map +1 -1
- package/dist/services/extractor.d.ts +7 -1
- package/dist/services/extractor.d.ts.map +1 -1
- package/dist/services/extractor.js +16 -9
- package/dist/services/extractor.js.map +1 -1
- package/dist/services/fetcher.d.ts +10 -1
- package/dist/services/fetcher.d.ts.map +1 -1
- package/dist/services/fetcher.js +162 -36
- package/dist/services/fetcher.js.map +1 -1
- package/dist/services/parser.d.ts.map +1 -1
- package/dist/services/parser.js +41 -29
- package/dist/services/parser.js.map +1 -1
- package/dist/tools/handlers/fetch-links.tool.d.ts +5 -10
- package/dist/tools/handlers/fetch-links.tool.d.ts.map +1 -1
- package/dist/tools/handlers/fetch-links.tool.js +4 -0
- package/dist/tools/handlers/fetch-links.tool.js.map +1 -1
- package/dist/tools/handlers/fetch-markdown.tool.d.ts +5 -12
- package/dist/tools/handlers/fetch-markdown.tool.d.ts.map +1 -1
- package/dist/tools/handlers/fetch-markdown.tool.js +1 -2
- package/dist/tools/handlers/fetch-markdown.tool.js.map +1 -1
- package/dist/tools/handlers/fetch-url.tool.d.ts +4 -12
- package/dist/tools/handlers/fetch-url.tool.d.ts.map +1 -1
- package/dist/tools/handlers/fetch-url.tool.js.map +1 -1
- package/dist/tools/handlers/fetch-urls.tool.d.ts +8 -1
- package/dist/tools/handlers/fetch-urls.tool.d.ts.map +1 -1
- package/dist/tools/handlers/fetch-urls.tool.js +67 -16
- package/dist/tools/handlers/fetch-urls.tool.js.map +1 -1
- package/dist/tools/utils/common.js +1 -1
- package/dist/tools/utils/common.js.map +1 -1
- package/dist/tools/utils/fetch-pipeline.d.ts.map +1 -1
- package/dist/tools/utils/fetch-pipeline.js +90 -20
- package/dist/tools/utils/fetch-pipeline.js.map +1 -1
- package/dist/transformers/markdown.transformer.d.ts.map +1 -1
- package/dist/transformers/markdown.transformer.js +8 -28
- package/dist/transformers/markdown.transformer.js.map +1 -1
- package/dist/utils/concurrency.d.ts +5 -1
- package/dist/utils/concurrency.d.ts.map +1 -1
- package/dist/utils/concurrency.js +15 -2
- package/dist/utils/concurrency.js.map +1 -1
- package/dist/utils/content-cleaner.d.ts.map +1 -1
- package/dist/utils/content-cleaner.js +124 -108
- package/dist/utils/content-cleaner.js.map +1 -1
- package/dist/utils/language-detector.d.ts +1 -1
- package/dist/utils/language-detector.d.ts.map +1 -1
- package/dist/utils/sanitizer.js +1 -1
- package/dist/utils/sanitizer.js.map +1 -1
- package/dist/utils/tool-error-handler.d.ts.map +1 -1
- package/dist/utils/tool-error-handler.js +36 -6
- package/dist/utils/tool-error-handler.js.map +1 -1
- package/dist/utils/url-validator.d.ts +10 -0
- package/dist/utils/url-validator.d.ts.map +1 -1
- package/dist/utils/url-validator.js +43 -5
- package/dist/utils/url-validator.js.map +1 -1
- package/package.json +83 -80
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
import { ResourceTemplate } from '@modelcontextprotocol/sdk/server/mcp.js';
|
|
2
|
+
import * as cache from '../services/cache.js';
|
|
3
|
+
export function registerCachedContentResource(server) {
|
|
4
|
+
server.registerResource('cached-content', new ResourceTemplate('superfetch://cache/{namespace}/{urlHash}', {
|
|
5
|
+
list: undefined,
|
|
6
|
+
}), {
|
|
7
|
+
title: 'Cached Content',
|
|
8
|
+
description: 'Access previously fetched web content from cache. Namespace: url, links, markdown. UrlHash: SHA-256 hash of the URL.',
|
|
9
|
+
mimeType: 'application/json',
|
|
10
|
+
}, (uri, params) => {
|
|
11
|
+
const namespace = params.namespace;
|
|
12
|
+
const urlHash = params.urlHash;
|
|
13
|
+
if (!namespace || !urlHash) {
|
|
14
|
+
throw new Error('Both namespace and urlHash parameters are required');
|
|
15
|
+
}
|
|
16
|
+
const cacheKey = `${namespace}:${urlHash}`;
|
|
17
|
+
const cached = cache.get(cacheKey);
|
|
18
|
+
if (!cached) {
|
|
19
|
+
throw new Error(`Content not found in cache for key: ${cacheKey}. Use superfetch://stats to see available cache entries.`);
|
|
20
|
+
}
|
|
21
|
+
return {
|
|
22
|
+
contents: [
|
|
23
|
+
{
|
|
24
|
+
uri: uri.href,
|
|
25
|
+
mimeType: 'application/json',
|
|
26
|
+
text: cached.content,
|
|
27
|
+
},
|
|
28
|
+
],
|
|
29
|
+
};
|
|
30
|
+
});
|
|
31
|
+
// Helper resource to list cached URLs
|
|
32
|
+
server.registerResource('cached-urls', 'superfetch://cache/list', {
|
|
33
|
+
title: 'Cached URLs List',
|
|
34
|
+
description: 'List all URLs currently in cache with their namespaces',
|
|
35
|
+
mimeType: 'application/json',
|
|
36
|
+
}, (uri) => {
|
|
37
|
+
const stats = cache.getStats();
|
|
38
|
+
const cacheList = {
|
|
39
|
+
totalEntries: stats.size + stats.htmlCacheSize,
|
|
40
|
+
entries: cache.keys().map((key) => {
|
|
41
|
+
const parts = key.split(':');
|
|
42
|
+
const namespace = parts[0] ?? 'unknown';
|
|
43
|
+
const urlHash = parts.slice(1).join(':') || 'unknown';
|
|
44
|
+
return {
|
|
45
|
+
namespace,
|
|
46
|
+
urlHash,
|
|
47
|
+
resourceUri: `superfetch://cache/${namespace}/${urlHash}`,
|
|
48
|
+
};
|
|
49
|
+
}),
|
|
50
|
+
timestamp: new Date().toISOString(),
|
|
51
|
+
};
|
|
52
|
+
return {
|
|
53
|
+
contents: [
|
|
54
|
+
{
|
|
55
|
+
uri: uri.href,
|
|
56
|
+
mimeType: 'application/json',
|
|
57
|
+
text: JSON.stringify(cacheList, null, 2),
|
|
58
|
+
},
|
|
59
|
+
],
|
|
60
|
+
};
|
|
61
|
+
});
|
|
62
|
+
}
|
|
63
|
+
// Subscription notifications - placeholder until MCP SDK fully supports sendResourceUpdated
|
|
64
|
+
export function setupCacheSubscriptions() {
|
|
65
|
+
// No-op: SDK doesn't support resource update notifications yet
|
|
66
|
+
// When it does, listen to cache.onUpdate() and call server.sendResourceUpdated()
|
|
67
|
+
}
|
|
68
|
+
//# sourceMappingURL=cached-content.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"cached-content.js","sourceRoot":"","sources":["../../src/resources/cached-content.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,gBAAgB,EAAE,MAAM,yCAAyC,CAAC;AAG3E,OAAO,KAAK,KAAK,MAAM,sBAAsB,CAAC;AAE9C,MAAM,UAAU,6BAA6B,CAAC,MAAiB;IAC7D,MAAM,CAAC,gBAAgB,CACrB,gBAAgB,EAChB,IAAI,gBAAgB,CAAC,0CAA0C,EAAE;QAC/D,IAAI,EAAE,SAAS;KAChB,CAAC,EACF;QACE,KAAK,EAAE,gBAAgB;QACvB,WAAW,EACT,sHAAsH;QACxH,QAAQ,EAAE,kBAAkB;KAC7B,EACD,CAAC,GAAG,EAAE,MAAM,EAAE,EAAE;QACd,MAAM,SAAS,GAAG,MAAM,CAAC,SAAmB,CAAC;QAC7C,MAAM,OAAO,GAAG,MAAM,CAAC,OAAiB,CAAC;QAEzC,IAAI,CAAC,SAAS,IAAI,CAAC,OAAO,EAAE,CAAC;YAC3B,MAAM,IAAI,KAAK,CAAC,oDAAoD,CAAC,CAAC;QACxE,CAAC;QAED,MAAM,QAAQ,GAAG,GAAG,SAAS,IAAI,OAAO,EAAE,CAAC;QAC3C,MAAM,MAAM,GAAG,KAAK,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAC;QAEnC,IAAI,CAAC,MAAM,EAAE,CAAC;YACZ,MAAM,IAAI,KAAK,CACb,uCAAuC,QAAQ,0DAA0D,CAC1G,CAAC;QACJ,CAAC;QAED,OAAO;YACL,QAAQ,EAAE;gBACR;oBACE,GAAG,EAAE,GAAG,CAAC,IAAI;oBACb,QAAQ,EAAE,kBAAkB;oBAC5B,IAAI,EAAE,MAAM,CAAC,OAAO;iBACrB;aACF;SACF,CAAC;IACJ,CAAC,CACF,CAAC;IAEF,sCAAsC;IACtC,MAAM,CAAC,gBAAgB,CACrB,aAAa,EACb,yBAAyB,EACzB;QACE,KAAK,EAAE,kBAAkB;QACzB,WAAW,EAAE,wDAAwD;QACrE,QAAQ,EAAE,kBAAkB;KAC7B,EACD,CAAC,GAAG,EAAE,EAAE;QACN,MAAM,KAAK,GAAG,KAAK,CAAC,QAAQ,EAAE,CAAC;QAC/B,MAAM,SAAS,GAAG;YAChB,YAAY,EAAE,KAAK,CAAC,IAAI,GAAG,KAAK,CAAC,aAAa;YAC9C,OAAO,EAAE,KAAK,CAAC,IAAI,EAAE,CAAC,GAAG,CAAC,CAAC,GAAW,EAAE,EAAE;gBACxC,MAAM,KAAK,GAAG,GAAG,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC;gBAC7B,MAAM,SAAS,GAAG,KAAK,CAAC,CAAC,CAAC,IAAI,SAAS,CAAC;gBACxC,MAAM,OAAO,GAAG,KAAK,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,IAAI,SAAS,CAAC;gBACtD,OAAO;oBACL,SAAS;oBACT,OAAO;oBACP,WAAW,EAAE,sBAAsB,SAAS,IAAI,OAAO,EAAE;iBAC1D,CAAC;YACJ,CAAC,CAAC;YACF,SAAS,EAAE,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE;SACpC,CAAC;QAEF,OAAO;YACL,QAAQ,EAAE;gBACR;oBACE,GAAG,EAAE,GAAG,CAAC,IAAI;oBACb,QAAQ,EAAE,kBAAkB;oBAC5B,IAAI,EAAE,IAAI,CAAC,SAAS,CAAC,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;iBACzC;aACF;SACF,CAAC;IACJ,CAAC,CACF,CAAC;AACJ,CAAC;AAED,4FAA4F;AAC5F,MAAM,UAAU,uBAAuB;IACrC,+DAA+D;IAC/D,iFAAiF;AACnF,CAAC"}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/resources/index.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,SAAS,EAAE,MAAM,yCAAyC,CAAC;
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/resources/index.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,SAAS,EAAE,MAAM,yCAAyC,CAAC;AAWzE,wBAAgB,iBAAiB,CAAC,MAAM,EAAE,SAAS,GAAG,IAAI,CA0FzD"}
|
package/dist/resources/index.js
CHANGED
|
@@ -1,12 +1,50 @@
|
|
|
1
1
|
import { config } from '../config/index.js';
|
|
2
2
|
import * as cache from '../services/cache.js';
|
|
3
|
+
import { registerCachedContentResource, setupCacheSubscriptions, } from './cached-content.js';
|
|
3
4
|
export function registerResources(server) {
|
|
5
|
+
// Register dynamic cached content resources
|
|
6
|
+
registerCachedContentResource(server);
|
|
7
|
+
// Setup cache update subscriptions
|
|
8
|
+
setupCacheSubscriptions();
|
|
9
|
+
// Register health check resource
|
|
10
|
+
server.registerResource('health', 'superfetch://health', {
|
|
11
|
+
title: 'Server Health',
|
|
12
|
+
description: 'Real-time server health and dependency status',
|
|
13
|
+
mimeType: 'application/json',
|
|
14
|
+
}, (uri) => {
|
|
15
|
+
const memUsage = process.memoryUsage();
|
|
16
|
+
const heapUsedMB = Math.round(memUsage.heapUsed / 1024 / 1024);
|
|
17
|
+
const heapTotalMB = Math.round(memUsage.heapTotal / 1024 / 1024);
|
|
18
|
+
const health = {
|
|
19
|
+
status: 'healthy',
|
|
20
|
+
uptime: process.uptime(),
|
|
21
|
+
checks: {
|
|
22
|
+
cache: config.cache.enabled,
|
|
23
|
+
memory: {
|
|
24
|
+
heapUsed: heapUsedMB,
|
|
25
|
+
heapTotal: heapTotalMB,
|
|
26
|
+
percentage: Math.round((heapUsedMB / heapTotalMB) * 100),
|
|
27
|
+
healthy: heapUsedMB < 400, // Flag if using > 400MB
|
|
28
|
+
},
|
|
29
|
+
},
|
|
30
|
+
timestamp: new Date().toISOString(),
|
|
31
|
+
};
|
|
32
|
+
return {
|
|
33
|
+
contents: [
|
|
34
|
+
{
|
|
35
|
+
uri: uri.href,
|
|
36
|
+
mimeType: 'application/json',
|
|
37
|
+
text: JSON.stringify(health, null, 2),
|
|
38
|
+
},
|
|
39
|
+
],
|
|
40
|
+
};
|
|
41
|
+
});
|
|
4
42
|
// Register server statistics resource
|
|
5
43
|
server.registerResource('stats', 'superfetch://stats', {
|
|
6
44
|
title: 'Server Statistics',
|
|
7
45
|
description: 'Fetch statistics and cache performance metrics',
|
|
8
46
|
mimeType: 'application/json',
|
|
9
|
-
},
|
|
47
|
+
}, (uri) => {
|
|
10
48
|
const stats = {
|
|
11
49
|
server: {
|
|
12
50
|
name: config.server.name,
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/resources/index.ts"],"names":[],"mappings":"AAEA,OAAO,EAAE,MAAM,EAAE,MAAM,oBAAoB,CAAC;AAE5C,OAAO,KAAK,KAAK,MAAM,sBAAsB,CAAC;AAE9C,MAAM,UAAU,iBAAiB,CAAC,MAAiB;IACjD,sCAAsC;IACtC,MAAM,CAAC,gBAAgB,CACrB,OAAO,EACP,oBAAoB,EACpB;QACE,KAAK,EAAE,mBAAmB;QAC1B,WAAW,EAAE,gDAAgD;QAC7D,QAAQ,EAAE,kBAAkB;KAC7B,EACD,
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/resources/index.ts"],"names":[],"mappings":"AAEA,OAAO,EAAE,MAAM,EAAE,MAAM,oBAAoB,CAAC;AAE5C,OAAO,KAAK,KAAK,MAAM,sBAAsB,CAAC;AAE9C,OAAO,EACL,6BAA6B,EAC7B,uBAAuB,GACxB,MAAM,qBAAqB,CAAC;AAE7B,MAAM,UAAU,iBAAiB,CAAC,MAAiB;IACjD,4CAA4C;IAC5C,6BAA6B,CAAC,MAAM,CAAC,CAAC;IAEtC,mCAAmC;IACnC,uBAAuB,EAAE,CAAC;IAE1B,iCAAiC;IACjC,MAAM,CAAC,gBAAgB,CACrB,QAAQ,EACR,qBAAqB,EACrB;QACE,KAAK,EAAE,eAAe;QACtB,WAAW,EAAE,+CAA+C;QAC5D,QAAQ,EAAE,kBAAkB;KAC7B,EACD,CAAC,GAAG,EAAE,EAAE;QACN,MAAM,QAAQ,GAAG,OAAO,CAAC,WAAW,EAAE,CAAC;QACvC,MAAM,UAAU,GAAG,IAAI,CAAC,KAAK,CAAC,QAAQ,CAAC,QAAQ,GAAG,IAAI,GAAG,IAAI,CAAC,CAAC;QAC/D,MAAM,WAAW,GAAG,IAAI,CAAC,KAAK,CAAC,QAAQ,CAAC,SAAS,GAAG,IAAI,GAAG,IAAI,CAAC,CAAC;QAEjE,MAAM,MAAM,GAAG;YACb,MAAM,EAAE,SAAS;YACjB,MAAM,EAAE,OAAO,CAAC,MAAM,EAAE;YACxB,MAAM,EAAE;gBACN,KAAK,EAAE,MAAM,CAAC,KAAK,CAAC,OAAO;gBAC3B,MAAM,EAAE;oBACN,QAAQ,EAAE,UAAU;oBACpB,SAAS,EAAE,WAAW;oBACtB,UAAU,EAAE,IAAI,CAAC,KAAK,CAAC,CAAC,UAAU,GAAG,WAAW,CAAC,GAAG,GAAG,CAAC;oBACxD,OAAO,EAAE,UAAU,GAAG,GAAG,EAAE,wBAAwB;iBACpD;aACF;YACD,SAAS,EAAE,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE;SACpC,CAAC;QAEF,OAAO;YACL,QAAQ,EAAE;gBACR;oBACE,GAAG,EAAE,GAAG,CAAC,IAAI;oBACb,QAAQ,EAAE,kBAAkB;oBAC5B,IAAI,EAAE,IAAI,CAAC,SAAS,CAAC,MAAM,EAAE,IAAI,EAAE,CAAC,CAAC;iBACtC;aACF;SACF,CAAC;IACJ,CAAC,CACF,CAAC;IAEF,sCAAsC;IACtC,MAAM,CAAC,gBAAgB,CACrB,OAAO,EACP,oBAAoB,EACpB;QACE,KAAK,EAAE,mBAAmB;QAC1B,WAAW,EAAE,gDAAgD;QAC7D,QAAQ,EAAE,kBAAkB;KAC7B,EACD,CAAC,GAAG,EAAE,EAAE;QACN,MAAM,KAAK,GAAG;YACZ,MAAM,EAAE;gBACN,IAAI,EAAE,MAAM,CAAC,MAAM,CAAC,IAAI;gBACxB,OAAO,EAAE,MAAM,CAAC,MAAM,CAAC,OAAO;gBAC9B,MAAM,EAAE,OAAO,CAAC,MAAM,EAAE;gBACxB,WAAW,EAAE,OAAO,CAAC,OAAO;gBAC5B,WAAW,EAAE,OAAO,CAAC,WAAW,EAAE;aACnC;YACD,KAAK,EAAE,KAAK,CAAC,QAAQ,EAAE;YACvB,MAAM,EAAE;gBACN,OAAO,EAAE;oBACP,OAAO,EAAE,MAAM,CAAC,OAAO,CAAC,OAAO;oBAC/B,YAAY,EAAE,MAAM,CAAC,OAAO,CAAC,YAAY;iBAC1C;gBACD,UAAU,EAAE;oBACV,kBAAkB,EAAE,MAAM,CAAC,UAAU,CAAC,kBAAkB;oBACxD,eAAe,EAAE,MAAM,CAAC,UAAU,CAAC,eAAe;iBACnD;aACF;SACF,CAAC;QAEF,OAAO;YACL,QAAQ,EAAE;gBACR;oBACE,GAAG,EAAE,GAAG,CAAC,IAAI;oBACb,QAAQ,EAAE,kBAAkB;oBAC5B,IAAI,EAAE,IAAI,CAAC,SAAS,CAAC,KAAK,EAAE,IAAI,EAAE,CAAC,CAAC;iBACrC;aACF;SACF,CAAC;IACJ,CAAC,CACF,CAAC;AACJ,CAAC"}
|
package/dist/server.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"server.d.ts","sourceRoot":"","sources":["../src/server.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,SAAS,EAAE,MAAM,yCAAyC,CAAC;
|
|
1
|
+
{"version":3,"file":"server.d.ts","sourceRoot":"","sources":["../src/server.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,SAAS,EAAE,MAAM,yCAAyC,CAAC;AAcpE,wBAAgB,eAAe,IAAI,SAAS,CAuB3C;AAGD,wBAAsB,gBAAgB,IAAI,OAAO,CAAC,IAAI,CAAC,CAkBtD"}
|
package/dist/server.js
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js';
|
|
2
2
|
import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js';
|
|
3
3
|
import { config } from './config/index.js';
|
|
4
|
+
import { destroyAgents } from './services/fetcher.js';
|
|
4
5
|
import { logError, logInfo } from './services/logger.js';
|
|
5
6
|
import { registerTools } from './tools/index.js';
|
|
6
7
|
import { registerPrompts } from './prompts/index.js';
|
|
@@ -9,6 +10,14 @@ export function createMcpServer() {
|
|
|
9
10
|
const server = new McpServer({
|
|
10
11
|
name: config.server.name,
|
|
11
12
|
version: config.server.version,
|
|
13
|
+
}, {
|
|
14
|
+
capabilities: {
|
|
15
|
+
tools: { listChanged: false },
|
|
16
|
+
resources: { listChanged: true, subscribe: true },
|
|
17
|
+
prompts: { listChanged: false },
|
|
18
|
+
logging: {},
|
|
19
|
+
},
|
|
20
|
+
instructions: `superFetch MCP server v${config.server.version} - AI-optimized web content fetching with JSONL/Markdown output. Provides tools for fetching, parsing, and transforming web content into structured formats suitable for LLM consumption. Supports resource subscriptions for cache updates.`,
|
|
12
21
|
});
|
|
13
22
|
// Register all features using the modern API
|
|
14
23
|
registerTools(server);
|
|
@@ -26,6 +35,7 @@ export async function startStdioServer() {
|
|
|
26
35
|
};
|
|
27
36
|
process.on('SIGINT', async () => {
|
|
28
37
|
process.stdout.write('\nShutting down superFetch MCP server...\n');
|
|
38
|
+
destroyAgents(); // Clean up HTTP connection pools
|
|
29
39
|
await server.close();
|
|
30
40
|
process.exit(0);
|
|
31
41
|
});
|
package/dist/server.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"server.js","sourceRoot":"","sources":["../src/server.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,SAAS,EAAE,MAAM,yCAAyC,CAAC;AACpE,OAAO,EAAE,oBAAoB,EAAE,MAAM,2CAA2C,CAAC;AAEjF,OAAO,EAAE,MAAM,EAAE,MAAM,mBAAmB,CAAC;AAE3C,OAAO,EAAE,QAAQ,EAAE,OAAO,EAAE,MAAM,sBAAsB,CAAC;AAEzD,OAAO,EAAE,aAAa,EAAE,MAAM,kBAAkB,CAAC;AAEjD,OAAO,EAAE,eAAe,EAAE,MAAM,oBAAoB,CAAC;AAErD,OAAO,EAAE,iBAAiB,EAAE,MAAM,sBAAsB,CAAC;AAEzD,MAAM,UAAU,eAAe;IAC7B,MAAM,MAAM,GAAG,IAAI,SAAS,
|
|
1
|
+
{"version":3,"file":"server.js","sourceRoot":"","sources":["../src/server.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,SAAS,EAAE,MAAM,yCAAyC,CAAC;AACpE,OAAO,EAAE,oBAAoB,EAAE,MAAM,2CAA2C,CAAC;AAEjF,OAAO,EAAE,MAAM,EAAE,MAAM,mBAAmB,CAAC;AAE3C,OAAO,EAAE,aAAa,EAAE,MAAM,uBAAuB,CAAC;AACtD,OAAO,EAAE,QAAQ,EAAE,OAAO,EAAE,MAAM,sBAAsB,CAAC;AAEzD,OAAO,EAAE,aAAa,EAAE,MAAM,kBAAkB,CAAC;AAEjD,OAAO,EAAE,eAAe,EAAE,MAAM,oBAAoB,CAAC;AAErD,OAAO,EAAE,iBAAiB,EAAE,MAAM,sBAAsB,CAAC;AAEzD,MAAM,UAAU,eAAe;IAC7B,MAAM,MAAM,GAAG,IAAI,SAAS,CAC1B;QACE,IAAI,EAAE,MAAM,CAAC,MAAM,CAAC,IAAI;QACxB,OAAO,EAAE,MAAM,CAAC,MAAM,CAAC,OAAO;KAC/B,EACD;QACE,YAAY,EAAE;YACZ,KAAK,EAAE,EAAE,WAAW,EAAE,KAAK,EAAE;YAC7B,SAAS,EAAE,EAAE,WAAW,EAAE,IAAI,EAAE,SAAS,EAAE,IAAI,EAAE;YACjD,OAAO,EAAE,EAAE,WAAW,EAAE,KAAK,EAAE;YAC/B,OAAO,EAAE,EAAE;SACZ;QACD,YAAY,EAAE,0BAA0B,MAAM,CAAC,MAAM,CAAC,OAAO,8OAA8O;KAC5S,CACF,CAAC;IAEF,6CAA6C;IAC7C,aAAa,CAAC,MAAM,CAAC,CAAC;IACtB,iBAAiB,CAAC,MAAM,CAAC,CAAC;IAC1B,eAAe,CAAC,MAAM,CAAC,CAAC;IAExB,OAAO,MAAM,CAAC;AAChB,CAAC;AAED,uDAAuD;AACvD,MAAM,CAAC,KAAK,UAAU,gBAAgB;IACpC,MAAM,MAAM,GAAG,eAAe,EAAE,CAAC;IACjC,MAAM,SAAS,GAAG,IAAI,oBAAoB,EAAE,CAAC;IAE7C,iBAAiB;IACjB,MAAM,CAAC,MAAM,CAAC,OAAO,GAAG,CAAC,KAAK,EAAE,EAAE;QAChC,QAAQ,CAAC,aAAa,EAAE,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE,KAAK,EAAE,CAAC,CAAC;IACtE,CAAC,CAAC;IAEF,OAAO,CAAC,EAAE,CAAC,QAAQ,EAAE,KAAK,IAAI,EAAE;QAC9B,OAAO,CAAC,MAAM,CAAC,KAAK,CAAC,4CAA4C,CAAC,CAAC;QACnE,aAAa,EAAE,CAAC,CAAC,iCAAiC;QAClD,MAAM,MAAM,CAAC,KAAK,EAAE,CAAC;QACrB,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAClB,CAAC,CAAC,CAAC;IAEH,MAAM,MAAM,CAAC,OAAO,CAAC,SAAS,CAAC,CAAC;IAChC,OAAO,CAAC,wCAAwC,CAAC,CAAC;AACpD,CAAC"}
|
package/dist/services/cache.d.ts
CHANGED
|
@@ -4,6 +4,9 @@ export declare function get(cacheKey: string | null): CacheEntry | undefined;
|
|
|
4
4
|
export declare function set(cacheKey: string | null, content: string): void;
|
|
5
5
|
export declare function getHtml(url: string): string | undefined;
|
|
6
6
|
export declare function setHtml(url: string, html: string): void;
|
|
7
|
+
export declare function keys(): string[];
|
|
8
|
+
type CacheUpdateCallback = (key: string, namespace: string) => void;
|
|
9
|
+
export declare function onUpdate(callback: CacheUpdateCallback): () => void;
|
|
7
10
|
export declare function getStats(): {
|
|
8
11
|
size: number;
|
|
9
12
|
maxKeys: number;
|
|
@@ -12,12 +15,20 @@ export declare function getStats(): {
|
|
|
12
15
|
misses: number;
|
|
13
16
|
sets: number;
|
|
14
17
|
errors: number;
|
|
18
|
+
evictions: number;
|
|
15
19
|
hitRate: string;
|
|
16
20
|
htmlCacheSize: number;
|
|
17
21
|
htmlCacheMaxKeys: number;
|
|
18
22
|
htmlCacheTtl: number;
|
|
19
23
|
htmlHits: number;
|
|
20
24
|
htmlMisses: number;
|
|
25
|
+
htmlEvictions: number;
|
|
21
26
|
htmlHitRate: string;
|
|
27
|
+
efficiency: {
|
|
28
|
+
hitRate: string;
|
|
29
|
+
missRate: string;
|
|
30
|
+
errorRate: string;
|
|
31
|
+
};
|
|
22
32
|
};
|
|
33
|
+
export {};
|
|
23
34
|
//# sourceMappingURL=cache.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"cache.d.ts","sourceRoot":"","sources":["../../src/services/cache.ts"],"names":[],"mappings":"AAIA,OAAO,KAAK,EAAE,UAAU,EAAE,MAAM,oBAAoB,CAAC;
|
|
1
|
+
{"version":3,"file":"cache.d.ts","sourceRoot":"","sources":["../../src/services/cache.ts"],"names":[],"mappings":"AAIA,OAAO,KAAK,EAAE,UAAU,EAAE,MAAM,oBAAoB,CAAC;AA6CrD,wBAAgB,cAAc,CAAC,SAAS,EAAE,MAAM,EAAE,GAAG,EAAE,MAAM,GAAG,MAAM,GAAG,IAAI,CAY5E;AAED,wBAAgB,GAAG,CAAC,QAAQ,EAAE,MAAM,GAAG,IAAI,GAAG,UAAU,GAAG,SAAS,CAqBnE;AAED,wBAAgB,GAAG,CAAC,QAAQ,EAAE,MAAM,GAAG,IAAI,EAAE,OAAO,EAAE,MAAM,GAAG,IAAI,CAkClE;AAED,wBAAgB,OAAO,CAAC,GAAG,EAAE,MAAM,GAAG,MAAM,GAAG,SAAS,CA4BvD;AAED,wBAAgB,OAAO,CAAC,GAAG,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,GAAG,IAAI,CAavD;AAED,wBAAgB,IAAI,IAAI,MAAM,EAAE,CAE/B;AAED,KAAK,mBAAmB,GAAG,CAAC,GAAG,EAAE,MAAM,EAAE,SAAS,EAAE,MAAM,KAAK,IAAI,CAAC;AAGpE,wBAAgB,QAAQ,CAAC,QAAQ,EAAE,mBAAmB,GAAG,MAAM,IAAI,CAUlE;AAgBD,wBAAgB,QAAQ,IAAI;IAC1B,IAAI,EAAE,MAAM,CAAC;IACb,OAAO,EAAE,MAAM,CAAC;IAChB,GAAG,EAAE,MAAM,CAAC;IACZ,IAAI,EAAE,MAAM,CAAC;IACb,MAAM,EAAE,MAAM,CAAC;IACf,IAAI,EAAE,MAAM,CAAC;IACb,MAAM,EAAE,MAAM,CAAC;IACf,SAAS,EAAE,MAAM,CAAC;IAClB,OAAO,EAAE,MAAM,CAAC;IAChB,aAAa,EAAE,MAAM,CAAC;IACtB,gBAAgB,EAAE,MAAM,CAAC;IACzB,YAAY,EAAE,MAAM,CAAC;IACrB,QAAQ,EAAE,MAAM,CAAC;IACjB,UAAU,EAAE,MAAM,CAAC;IACnB,aAAa,EAAE,MAAM,CAAC;IACtB,WAAW,EAAE,MAAM,CAAC;IACpB,UAAU,EAAE;QACV,OAAO,EAAE,MAAM,CAAC;QAChB,QAAQ,EAAE,MAAM,CAAC;QACjB,SAAS,EAAE,MAAM,CAAC;KACnB,CAAC;CACH,CAoCA"}
|
package/dist/services/cache.js
CHANGED
|
@@ -16,13 +16,23 @@ const htmlCache = new NodeCache({
|
|
|
16
16
|
useClones: false,
|
|
17
17
|
maxKeys: HTML_CACHE_MAX_KEYS,
|
|
18
18
|
});
|
|
19
|
+
// Track cache evictions
|
|
20
|
+
contentCache.on('del', (key) => {
|
|
21
|
+
stats.evictions++;
|
|
22
|
+
logDebug('Cache eviction', { key: String(key).substring(0, 100) });
|
|
23
|
+
});
|
|
24
|
+
htmlCache.on('del', () => {
|
|
25
|
+
stats.htmlEvictions++;
|
|
26
|
+
});
|
|
19
27
|
const stats = {
|
|
20
28
|
hits: 0,
|
|
21
29
|
misses: 0,
|
|
22
30
|
sets: 0,
|
|
23
31
|
errors: 0,
|
|
32
|
+
evictions: 0,
|
|
24
33
|
htmlHits: 0,
|
|
25
34
|
htmlMisses: 0,
|
|
35
|
+
htmlEvictions: 0,
|
|
26
36
|
};
|
|
27
37
|
const MAX_CONTENT_SIZE = 5242880;
|
|
28
38
|
const MAX_HTML_SIZE = 10485760;
|
|
@@ -33,12 +43,12 @@ export function createCacheKey(namespace, url) {
|
|
|
33
43
|
const key = `${namespace}:${url}`;
|
|
34
44
|
if (key.length <= MAX_KEY_LENGTH)
|
|
35
45
|
return key;
|
|
36
|
-
//
|
|
46
|
+
// SHA-256 hash for long URLs (consistent with cached-content.ts)
|
|
37
47
|
const hash = crypto
|
|
38
48
|
.createHash('sha256')
|
|
39
49
|
.update(url)
|
|
40
50
|
.digest('hex')
|
|
41
|
-
.substring(0,
|
|
51
|
+
.substring(0, 64);
|
|
42
52
|
return `${namespace}:hash:${hash}`;
|
|
43
53
|
}
|
|
44
54
|
export function get(cacheKey) {
|
|
@@ -89,6 +99,8 @@ export function set(cacheKey, content) {
|
|
|
89
99
|
};
|
|
90
100
|
contentCache.set(cacheKey, entry);
|
|
91
101
|
stats.sets++;
|
|
102
|
+
// Notify subscribers of cache update
|
|
103
|
+
notifyUpdate(cacheKey);
|
|
92
104
|
}
|
|
93
105
|
catch (error) {
|
|
94
106
|
stats.errors++;
|
|
@@ -104,14 +116,27 @@ export function getHtml(url) {
|
|
|
104
116
|
try {
|
|
105
117
|
const html = htmlCache.get(url);
|
|
106
118
|
if (html) {
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
119
|
+
// Validate cached HTML is within limits
|
|
120
|
+
if (typeof html === 'string' && html.length <= MAX_HTML_SIZE) {
|
|
121
|
+
stats.htmlHits++;
|
|
122
|
+
logDebug('HTML cache hit', { url: url.substring(0, 100) });
|
|
123
|
+
return html;
|
|
124
|
+
}
|
|
125
|
+
// Invalid cache entry, remove it
|
|
126
|
+
htmlCache.del(url);
|
|
127
|
+
logWarn('Removed oversized HTML from cache', {
|
|
128
|
+
url: url.substring(0, 100),
|
|
129
|
+
size: html.length,
|
|
130
|
+
});
|
|
110
131
|
}
|
|
111
132
|
stats.htmlMisses++;
|
|
112
133
|
return undefined;
|
|
113
134
|
}
|
|
114
|
-
catch {
|
|
135
|
+
catch (error) {
|
|
136
|
+
logDebug('HTML cache get error (non-critical)', {
|
|
137
|
+
url: url.substring(0, 100),
|
|
138
|
+
error: error instanceof Error ? error.message : 'Unknown',
|
|
139
|
+
});
|
|
115
140
|
return undefined;
|
|
116
141
|
}
|
|
117
142
|
}
|
|
@@ -124,15 +149,47 @@ export function setHtml(url, html) {
|
|
|
124
149
|
htmlCache.set(url, html);
|
|
125
150
|
logDebug('HTML cached', { url: url.substring(0, 100), size: html.length });
|
|
126
151
|
}
|
|
127
|
-
catch {
|
|
128
|
-
|
|
152
|
+
catch (error) {
|
|
153
|
+
logDebug('HTML cache set error (non-critical)', {
|
|
154
|
+
url: url.substring(0, 100),
|
|
155
|
+
error: error instanceof Error ? error.message : 'Unknown',
|
|
156
|
+
});
|
|
129
157
|
}
|
|
130
158
|
}
|
|
159
|
+
export function keys() {
|
|
160
|
+
return [...contentCache.keys(), ...htmlCache.keys()];
|
|
161
|
+
}
|
|
162
|
+
const updateCallbacks = [];
|
|
163
|
+
export function onUpdate(callback) {
|
|
164
|
+
updateCallbacks.push(callback);
|
|
165
|
+
// Return unsubscribe function
|
|
166
|
+
return () => {
|
|
167
|
+
const index = updateCallbacks.indexOf(callback);
|
|
168
|
+
if (index > -1) {
|
|
169
|
+
updateCallbacks.splice(index, 1);
|
|
170
|
+
}
|
|
171
|
+
};
|
|
172
|
+
}
|
|
173
|
+
// Notify callbacks when cache is updated
|
|
174
|
+
function notifyUpdate(key) {
|
|
175
|
+
const parts = key.split(':');
|
|
176
|
+
const namespace = parts[0] ?? 'unknown';
|
|
177
|
+
updateCallbacks.forEach((callback) => {
|
|
178
|
+
try {
|
|
179
|
+
callback(key, namespace);
|
|
180
|
+
}
|
|
181
|
+
catch {
|
|
182
|
+
// Silently ignore callback errors
|
|
183
|
+
}
|
|
184
|
+
});
|
|
185
|
+
}
|
|
131
186
|
export function getStats() {
|
|
132
187
|
const total = stats.hits + stats.misses;
|
|
133
188
|
const hitRate = total > 0 ? ((stats.hits / total) * 100).toFixed(2) : '0.00';
|
|
134
189
|
const htmlTotal = stats.htmlHits + stats.htmlMisses;
|
|
135
190
|
const htmlHitRate = htmlTotal > 0 ? ((stats.htmlHits / htmlTotal) * 100).toFixed(2) : '0.00';
|
|
191
|
+
const missRate = total > 0 ? ((stats.misses / total) * 100).toFixed(2) : '0.00';
|
|
192
|
+
const errorRate = stats.sets > 0 ? ((stats.errors / stats.sets) * 100).toFixed(2) : '0.00';
|
|
136
193
|
return {
|
|
137
194
|
size: contentCache.keys().length,
|
|
138
195
|
maxKeys: config.cache.maxKeys,
|
|
@@ -141,13 +198,20 @@ export function getStats() {
|
|
|
141
198
|
misses: stats.misses,
|
|
142
199
|
sets: stats.sets,
|
|
143
200
|
errors: stats.errors,
|
|
201
|
+
evictions: stats.evictions,
|
|
144
202
|
hitRate: `${hitRate}%`,
|
|
145
203
|
htmlCacheSize: htmlCache.keys().length,
|
|
146
204
|
htmlCacheMaxKeys: HTML_CACHE_MAX_KEYS,
|
|
147
205
|
htmlCacheTtl: HTML_CACHE_TTL,
|
|
148
206
|
htmlHits: stats.htmlHits,
|
|
149
207
|
htmlMisses: stats.htmlMisses,
|
|
208
|
+
htmlEvictions: stats.htmlEvictions,
|
|
150
209
|
htmlHitRate: `${htmlHitRate}%`,
|
|
210
|
+
efficiency: {
|
|
211
|
+
hitRate: `${hitRate}%`,
|
|
212
|
+
missRate: `${missRate}%`,
|
|
213
|
+
errorRate: `${errorRate}%`,
|
|
214
|
+
},
|
|
151
215
|
};
|
|
152
216
|
}
|
|
153
217
|
//# sourceMappingURL=cache.js.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"cache.js","sourceRoot":"","sources":["../../src/services/cache.ts"],"names":[],"mappings":"AAAA,OAAO,MAAM,MAAM,QAAQ,CAAC;AAC5B,OAAO,SAAS,MAAM,YAAY,CAAC;AAEnC,OAAO,EAAE,MAAM,EAAE,MAAM,oBAAoB,CAAC;AAG5C,OAAO,EAAE,QAAQ,EAAE,OAAO,EAAE,MAAM,aAAa,CAAC;AAEhD,MAAM,YAAY,GAAG,IAAI,SAAS,CAAC;IACjC,MAAM,EAAE,MAAM,CAAC,KAAK,CAAC,GAAG;IACxB,WAAW,EAAE,IAAI,CAAC,KAAK,CAAC,MAAM,CAAC,KAAK,CAAC,GAAG,GAAG,EAAE,CAAC;IAC9C,SAAS,EAAE,KAAK;IAChB,OAAO,EAAE,MAAM,CAAC,KAAK,CAAC,OAAO;CAC9B,CAAC,CAAC;AAEH,MAAM,cAAc,GAAG,EAAE,CAAC;AAC1B,MAAM,mBAAmB,GAAG,EAAE,CAAC;AAC/B,MAAM,SAAS,GAAG,IAAI,SAAS,CAAC;IAC9B,MAAM,EAAE,cAAc;IACtB,WAAW,EAAE,EAAE;IACf,SAAS,EAAE,KAAK;IAChB,OAAO,EAAE,mBAAmB;CAC7B,CAAC,CAAC;AAEH,MAAM,KAAK,GAAG;IACZ,IAAI,EAAE,CAAC;IACP,MAAM,EAAE,CAAC;IACT,IAAI,EAAE,CAAC;IACP,MAAM,EAAE,CAAC;IACT,QAAQ,EAAE,CAAC;IACX,UAAU,EAAE,CAAC;
|
|
1
|
+
{"version":3,"file":"cache.js","sourceRoot":"","sources":["../../src/services/cache.ts"],"names":[],"mappings":"AAAA,OAAO,MAAM,MAAM,QAAQ,CAAC;AAC5B,OAAO,SAAS,MAAM,YAAY,CAAC;AAEnC,OAAO,EAAE,MAAM,EAAE,MAAM,oBAAoB,CAAC;AAG5C,OAAO,EAAE,QAAQ,EAAE,OAAO,EAAE,MAAM,aAAa,CAAC;AAEhD,MAAM,YAAY,GAAG,IAAI,SAAS,CAAC;IACjC,MAAM,EAAE,MAAM,CAAC,KAAK,CAAC,GAAG;IACxB,WAAW,EAAE,IAAI,CAAC,KAAK,CAAC,MAAM,CAAC,KAAK,CAAC,GAAG,GAAG,EAAE,CAAC;IAC9C,SAAS,EAAE,KAAK;IAChB,OAAO,EAAE,MAAM,CAAC,KAAK,CAAC,OAAO;CAC9B,CAAC,CAAC;AAEH,MAAM,cAAc,GAAG,EAAE,CAAC;AAC1B,MAAM,mBAAmB,GAAG,EAAE,CAAC;AAC/B,MAAM,SAAS,GAAG,IAAI,SAAS,CAAC;IAC9B,MAAM,EAAE,cAAc;IACtB,WAAW,EAAE,EAAE;IACf,SAAS,EAAE,KAAK;IAChB,OAAO,EAAE,mBAAmB;CAC7B,CAAC,CAAC;AAEH,wBAAwB;AACxB,YAAY,CAAC,EAAE,CAAC,KAAK,EAAE,CAAC,GAAG,EAAE,EAAE;IAC7B,KAAK,CAAC,SAAS,EAAE,CAAC;IAClB,QAAQ,CAAC,gBAAgB,EAAE,EAAE,GAAG,EAAE,MAAM,CAAC,GAAG,CAAC,CAAC,SAAS,CAAC,CAAC,EAAE,GAAG,CAAC,EAAE,CAAC,CAAC;AACrE,CAAC,CAAC,CAAC;AAEH,SAAS,CAAC,EAAE,CAAC,KAAK,EAAE,GAAG,EAAE;IACvB,KAAK,CAAC,aAAa,EAAE,CAAC;AACxB,CAAC,CAAC,CAAC;AAEH,MAAM,KAAK,GAAG;IACZ,IAAI,EAAE,CAAC;IACP,MAAM,EAAE,CAAC;IACT,IAAI,EAAE,CAAC;IACP,MAAM,EAAE,CAAC;IACT,SAAS,EAAE,CAAC;IACZ,QAAQ,EAAE,CAAC;IACX,UAAU,EAAE,CAAC;IACb,aAAa,EAAE,CAAC;CACjB,CAAC;AAEF,MAAM,gBAAgB,GAAG,OAAO,CAAC;AACjC,MAAM,aAAa,GAAG,QAAQ,CAAC;AAC/B,MAAM,cAAc,GAAG,GAAG,CAAC;AAE3B,MAAM,UAAU,cAAc,CAAC,SAAiB,EAAE,GAAW;IAC3D,IAAI,CAAC,SAAS,IAAI,CAAC,GAAG;QAAE,OAAO,IAAI,CAAC;IACpC,MAAM,GAAG,GAAG,GAAG,SAAS,IAAI,GAAG,EAAE,CAAC;IAClC,IAAI,GAAG,CAAC,MAAM,IAAI,cAAc;QAAE,OAAO,GAAG,CAAC;IAE7C,iEAAiE;IACjE,MAAM,IAAI,GAAG,MAAM;SAChB,UAAU,CAAC,QAAQ,CAAC;SACpB,MAAM,CAAC,GAAG,CAAC;SACX,MAAM,CAAC,KAAK,CAAC;SACb,SAAS,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;IACpB,OAAO,GAAG,SAAS,SAAS,IAAI,EAAE,CAAC;AACrC,CAAC;AAED,MAAM,UAAU,GAAG,CAAC,QAAuB;IACzC,IAAI,CAAC,MAAM,CAAC,KAAK,CAAC,OAAO;QAAE,OAAO,SAAS,CAAC;IAC5C,IAAI,CAAC,QAAQ;QAAE,OAAO,SAAS,CAAC;IAEhC,IAAI,CAAC;QACH,MAAM,KAAK,GAAG,YAAY,CAAC,GAAG,CAAa,QAAQ,CAAC,CAAC;QACrD,IAAI,KAAK,EAAE,CAAC;YACV,KAAK,CAAC,IAAI,EAAE,CAAC;YACb,OAAO,KAAK,CAAC;QACf,CAAC;QAED,KAAK,CAAC,MAAM,EAAE,CAAC;QACf,OAAO,SAAS,CAAC;IACnB,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,KAAK,CAAC,MAAM,EAAE,CAAC;QACf,OAAO,CAAC,iBAAiB,EAAE;YACzB,GAAG,EAAE,QAAQ,CAAC,SAAS,CAAC,CAAC,EAAE,GAAG,CAAC;YAC/B,KAAK,EAAE,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,eAAe;SAChE,CAAC,CAAC;QACH,OAAO,SAAS,CAAC;IACnB,CAAC;AACH,CAAC;AAED,MAAM,UAAU,GAAG,CAAC,QAAuB,EAAE,OAAe;IAC1D,IAAI,CAAC,MAAM,CAAC,KAAK,CAAC,OAAO;QAAE,OAAO;IAClC,IAAI,CAAC,QAAQ;QAAE,OAAO;IACtB,IAAI,CAAC,OAAO,IAAI,OAAO,OAAO,KAAK,QAAQ;QAAE,OAAO;IACpD,IAAI,OAAO,CAAC,MAAM,GAAG,gBAAgB,EAAE,CAAC;QACtC,OAAO,CAAC,sCAAsC,EAAE;YAC9C,GAAG,EAAE,QAAQ,CAAC,SAAS,CAAC,CAAC,EAAE,GAAG,CAAC;YAC/B,IAAI,EAAE,OAAO,CAAC,MAAM;YACpB,OAAO,EAAE,gBAAgB;SAC1B,CAAC,CAAC;QACH,OAAO;IACT,CAAC;IAED,IAAI,CAAC;QACH,MAAM,KAAK,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;QACzB,MAAM,KAAK,GAAe;YACxB,GAAG,EAAE,QAAQ;YACb,OAAO;YACP,SAAS,EAAE,IAAI,IAAI,CAAC,KAAK,CAAC,CAAC,WAAW,EAAE;YACxC,SAAS,EAAE,IAAI,IAAI,CAAC,KAAK,GAAG,MAAM,CAAC,KAAK,CAAC,GAAG,GAAG,IAAI,CAAC,CAAC,WAAW,EAAE;SACnE,CAAC;QAEF,YAAY,CAAC,GAAG,CAAC,QAAQ,EAAE,KAAK,CAAC,CAAC;QAClC,KAAK,CAAC,IAAI,EAAE,CAAC;QAEb,qCAAqC;QACrC,YAAY,CAAC,QAAQ,CAAC,CAAC;IACzB,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,KAAK,CAAC,MAAM,EAAE,CAAC;QACf,OAAO,CAAC,iBAAiB,EAAE;YACzB,GAAG,EAAE,QAAQ,CAAC,SAAS,CAAC,CAAC,EAAE,GAAG,CAAC;YAC/B,KAAK,EAAE,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,eAAe;SAChE,CAAC,CAAC;IACL,CAAC;AACH,CAAC;AAED,MAAM,UAAU,OAAO,CAAC,GAAW;IACjC,IAAI,CAAC,MAAM,CAAC,KAAK,CAAC,OAAO;QAAE,OAAO,SAAS,CAAC;IAE5C,IAAI,CAAC;QACH,MAAM,IAAI,GAAG,SAAS,CAAC,GAAG,CAAS,GAAG,CAAC,CAAC;QACxC,IAAI,IAAI,EAAE,CAAC;YACT,wCAAwC;YACxC,IAAI,OAAO,IAAI,KAAK,QAAQ,IAAI,IAAI,CAAC,MAAM,IAAI,aAAa,EAAE,CAAC;gBAC7D,KAAK,CAAC,QAAQ,EAAE,CAAC;gBACjB,QAAQ,CAAC,gBAAgB,EAAE,EAAE,GAAG,EAAE,GAAG,CAAC,SAAS,CAAC,CAAC,EAAE,GAAG,CAAC,EAAE,CAAC,CAAC;gBAC3D,OAAO,IAAI,CAAC;YACd,CAAC;YACD,iCAAiC;YACjC,SAAS,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC;YACnB,OAAO,CAAC,mCAAmC,EAAE;gBAC3C,GAAG,EAAE,GAAG,CAAC,SAAS,CAAC,CAAC,EAAE,GAAG,CAAC;gBAC1B,IAAI,EAAE,IAAI,CAAC,MAAM;aAClB,CAAC,CAAC;QACL,CAAC;QACD,KAAK,CAAC,UAAU,EAAE,CAAC;QACnB,OAAO,SAAS,CAAC;IACnB,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,QAAQ,CAAC,qCAAqC,EAAE;YAC9C,GAAG,EAAE,GAAG,CAAC,SAAS,CAAC,CAAC,EAAE,GAAG,CAAC;YAC1B,KAAK,EAAE,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,SAAS;SAC1D,CAAC,CAAC;QACH,OAAO,SAAS,CAAC;IACnB,CAAC;AACH,CAAC;AAED,MAAM,UAAU,OAAO,CAAC,GAAW,EAAE,IAAY;IAC/C,IAAI,CAAC,MAAM,CAAC,KAAK,CAAC,OAAO;QAAE,OAAO;IAClC,IAAI,CAAC,IAAI,IAAI,IAAI,CAAC,MAAM,GAAG,aAAa;QAAE,OAAO;IAEjD,IAAI,CAAC;QACH,SAAS,CAAC,GAAG,CAAC,GAAG,EAAE,IAAI,CAAC,CAAC;QACzB,QAAQ,CAAC,aAAa,EAAE,EAAE,GAAG,EAAE,GAAG,CAAC,SAAS,CAAC,CAAC,EAAE,GAAG,CAAC,EAAE,IAAI,EAAE,IAAI,CAAC,MAAM,EAAE,CAAC,CAAC;IAC7E,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,QAAQ,CAAC,qCAAqC,EAAE;YAC9C,GAAG,EAAE,GAAG,CAAC,SAAS,CAAC,CAAC,EAAE,GAAG,CAAC;YAC1B,KAAK,EAAE,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,SAAS;SAC1D,CAAC,CAAC;IACL,CAAC;AACH,CAAC;AAED,MAAM,UAAU,IAAI;IAClB,OAAO,CAAC,GAAG,YAAY,CAAC,IAAI,EAAE,EAAE,GAAG,SAAS,CAAC,IAAI,EAAE,CAAC,CAAC;AACvD,CAAC;AAGD,MAAM,eAAe,GAA0B,EAAE,CAAC;AAElD,MAAM,UAAU,QAAQ,CAAC,QAA6B;IACpD,eAAe,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;IAE/B,8BAA8B;IAC9B,OAAO,GAAG,EAAE;QACV,MAAM,KAAK,GAAG,eAAe,CAAC,OAAO,CAAC,QAAQ,CAAC,CAAC;QAChD,IAAI,KAAK,GAAG,CAAC,CAAC,EAAE,CAAC;YACf,eAAe,CAAC,MAAM,CAAC,KAAK,EAAE,CAAC,CAAC,CAAC;QACnC,CAAC;IACH,CAAC,CAAC;AACJ,CAAC;AAED,yCAAyC;AACzC,SAAS,YAAY,CAAC,GAAW;IAC/B,MAAM,KAAK,GAAG,GAAG,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC;IAC7B,MAAM,SAAS,GAAG,KAAK,CAAC,CAAC,CAAC,IAAI,SAAS,CAAC;IAExC,eAAe,CAAC,OAAO,CAAC,CAAC,QAAQ,EAAE,EAAE;QACnC,IAAI,CAAC;YACH,QAAQ,CAAC,GAAG,EAAE,SAAS,CAAC,CAAC;QAC3B,CAAC;QAAC,MAAM,CAAC;YACP,kCAAkC;QACpC,CAAC;IACH,CAAC,CAAC,CAAC;AACL,CAAC;AAED,MAAM,UAAU,QAAQ;IAuBtB,MAAM,KAAK,GAAG,KAAK,CAAC,IAAI,GAAG,KAAK,CAAC,MAAM,CAAC;IACxC,MAAM,OAAO,GAAG,KAAK,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,IAAI,GAAG,KAAK,CAAC,GAAG,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC;IAE7E,MAAM,SAAS,GAAG,KAAK,CAAC,QAAQ,GAAG,KAAK,CAAC,UAAU,CAAC;IACpD,MAAM,WAAW,GACf,SAAS,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,QAAQ,GAAG,SAAS,CAAC,GAAG,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC;IAE3E,MAAM,QAAQ,GACZ,KAAK,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,MAAM,GAAG,KAAK,CAAC,GAAG,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC;IACjE,MAAM,SAAS,GACb,KAAK,CAAC,IAAI,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,MAAM,GAAG,KAAK,CAAC,IAAI,CAAC,GAAG,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC;IAE3E,OAAO;QACL,IAAI,EAAE,YAAY,CAAC,IAAI,EAAE,CAAC,MAAM;QAChC,OAAO,EAAE,MAAM,CAAC,KAAK,CAAC,OAAO;QAC7B,GAAG,EAAE,MAAM,CAAC,KAAK,CAAC,GAAG;QACrB,IAAI,EAAE,KAAK,CAAC,IAAI;QAChB,MAAM,EAAE,KAAK,CAAC,MAAM;QACpB,IAAI,EAAE,KAAK,CAAC,IAAI;QAChB,MAAM,EAAE,KAAK,CAAC,MAAM;QACpB,SAAS,EAAE,KAAK,CAAC,SAAS;QAC1B,OAAO,EAAE,GAAG,OAAO,GAAG;QACtB,aAAa,EAAE,SAAS,CAAC,IAAI,EAAE,CAAC,MAAM;QACtC,gBAAgB,EAAE,mBAAmB;QACrC,YAAY,EAAE,cAAc;QAC5B,QAAQ,EAAE,KAAK,CAAC,QAAQ;QACxB,UAAU,EAAE,KAAK,CAAC,UAAU;QAC5B,aAAa,EAAE,KAAK,CAAC,aAAa;QAClC,WAAW,EAAE,GAAG,WAAW,GAAG;QAC9B,UAAU,EAAE;YACV,OAAO,EAAE,GAAG,OAAO,GAAG;YACtB,QAAQ,EAAE,GAAG,QAAQ,GAAG;YACxB,SAAS,EAAE,GAAG,SAAS,GAAG;SAC3B;KACF,CAAC;AACJ,CAAC"}
|
|
@@ -1,7 +1,3 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Card link extraction utilities for preserving card-style navigation
|
|
3
|
-
* from documentation sites before Readability strips them.
|
|
4
|
-
*/
|
|
5
1
|
/**
|
|
6
2
|
* Pre-process HTML to preserve card links that Readability might strip.
|
|
7
3
|
* Converts card-like elements into simple link lists.
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"card-extractor.d.ts","sourceRoot":"","sources":["../../src/services/card-extractor.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"card-extractor.d.ts","sourceRoot":"","sources":["../../src/services/card-extractor.ts"],"names":[],"mappings":"AAkOA;;;GAGG;AACH,wBAAgB,iBAAiB,CAAC,QAAQ,EAAE,QAAQ,GAAG,IAAI,CAI1D"}
|
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import { logDebug } from './logger.js';
|
|
1
2
|
/**
|
|
2
3
|
* Card link extraction utilities for preserving card-style navigation
|
|
3
4
|
* from documentation sites before Readability strips them.
|
|
@@ -8,7 +9,9 @@ const NOISE_SELECTORS = 'style, svg, [class*="icon"], [aria-hidden="true"]';
|
|
|
8
9
|
*/
|
|
9
10
|
function cleanElement(element) {
|
|
10
11
|
const clone = element.cloneNode(true);
|
|
11
|
-
clone.querySelectorAll(NOISE_SELECTORS).forEach((el) =>
|
|
12
|
+
clone.querySelectorAll(NOISE_SELECTORS).forEach((el) => {
|
|
13
|
+
el.remove();
|
|
14
|
+
});
|
|
12
15
|
return clone;
|
|
13
16
|
}
|
|
14
17
|
/**
|
|
@@ -103,18 +106,23 @@ function processCustomCards(document) {
|
|
|
103
106
|
if (list.children.length > 0) {
|
|
104
107
|
const firstCard = customCards[0];
|
|
105
108
|
firstCard?.parentNode?.insertBefore(list, firstCard);
|
|
106
|
-
customCards.forEach((card) =>
|
|
109
|
+
customCards.forEach((card) => {
|
|
110
|
+
card.remove();
|
|
111
|
+
});
|
|
107
112
|
}
|
|
108
113
|
}
|
|
109
114
|
/**
|
|
110
115
|
* Process CSS grid card containers
|
|
116
|
+
* Optimized to use more specific selectors to reduce iteration overhead
|
|
111
117
|
*/
|
|
112
118
|
function processCardGrids(document) {
|
|
119
|
+
// Use querySelectorAll on all divs but filter early with direct child selector
|
|
113
120
|
for (const div of document.querySelectorAll('div')) {
|
|
114
|
-
|
|
121
|
+
// Use :scope > a[href] for direct child links only (more efficient than Array.from + filter)
|
|
122
|
+
const childLinks = div.querySelectorAll(':scope > a[href]');
|
|
115
123
|
if (childLinks.length < 2)
|
|
116
124
|
continue;
|
|
117
|
-
const looksLikeCards = childLinks.every((link) => {
|
|
125
|
+
const looksLikeCards = Array.from(childLinks).every((link) => {
|
|
118
126
|
const hasStructuredContent = link.querySelector('svg, div, p, span');
|
|
119
127
|
const hasReasonableText = link.textContent.trim().length > 3;
|
|
120
128
|
return hasStructuredContent && hasReasonableText;
|
|
@@ -170,8 +178,12 @@ function processSemanticCards(document) {
|
|
|
170
178
|
}
|
|
171
179
|
}
|
|
172
180
|
}
|
|
173
|
-
catch {
|
|
181
|
+
catch (error) {
|
|
174
182
|
// Selector might be invalid, skip it
|
|
183
|
+
logDebug('Card selector processing failed (non-critical)', {
|
|
184
|
+
selector,
|
|
185
|
+
error: error instanceof Error ? error.message : 'Unknown',
|
|
186
|
+
});
|
|
175
187
|
}
|
|
176
188
|
}
|
|
177
189
|
}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"card-extractor.js","sourceRoot":"","sources":["../../src/services/card-extractor.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAEH,MAAM,eAAe,GAAG,mDAAmD,CAAC;AAE5E;;GAEG;AACH,SAAS,YAAY,CAAC,OAAgB;IACpC,MAAM,KAAK,GAAG,OAAO,CAAC,SAAS,CAAC,IAAI,CAAY,CAAC;IACjD,KAAK,CAAC,gBAAgB,CAAC,eAAe,CAAC,CAAC,OAAO,CAAC,CAAC,EAAE,EAAE,EAAE,
|
|
1
|
+
{"version":3,"file":"card-extractor.js","sourceRoot":"","sources":["../../src/services/card-extractor.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,MAAM,aAAa,CAAC;AAEvC;;;GAGG;AAEH,MAAM,eAAe,GAAG,mDAAmD,CAAC;AAE5E;;GAEG;AACH,SAAS,YAAY,CAAC,OAAgB;IACpC,MAAM,KAAK,GAAG,OAAO,CAAC,SAAS,CAAC,IAAI,CAAY,CAAC;IACjD,KAAK,CAAC,gBAAgB,CAAC,eAAe,CAAC,CAAC,OAAO,CAAC,CAAC,EAAE,EAAE,EAAE;QACrD,EAAE,CAAC,MAAM,EAAE,CAAC;IACd,CAAC,CAAC,CAAC;IACH,OAAO,KAAK,CAAC;AACf,CAAC;AAED;;GAEG;AACH,SAAS,gBAAgB,CAAC,IAAa;IACrC,MAAM,KAAK,GAAG,YAAY,CAAC,IAAI,CAAC,CAAC;IAEjC,kFAAkF;IAClF,KAAK,MAAM,GAAG,IAAI,KAAK,CAAC,gBAAgB,CAAC,KAAK,CAAC,EAAE,CAAC;QAChD,IAAI,GAAG,CAAC,aAAa,CAAC,KAAK,CAAC;YAAE,SAAS,CAAC,sBAAsB;QAE9D,MAAM,IAAI,GAAG,GAAG,CAAC,WAAW,CAAC,IAAI,EAAE,CAAC;QACpC,IACE,IAAI,CAAC,MAAM,GAAG,CAAC;YACf,IAAI,CAAC,MAAM,GAAG,EAAE;YAChB,CAAC,IAAI,CAAC,QAAQ,CAAC,QAAQ,CAAC;YACxB,CAAC,IAAI,CAAC,QAAQ,CAAC,MAAM,CAAC,EACtB,CAAC;YACD,OAAO,IAAI,CAAC;QACd,CAAC;IACH,CAAC;IAED,qCAAqC;IACrC,MAAM,OAAO,GAAG,KAAK,CAAC,aAAa,CACjC,0CAA0C,CAC3C,CAAC;IACF,IAAI,OAAO,EAAE,CAAC;QACZ,MAAM,KAAK,GAAG,OAAO,CAAC,WAAW,CAAC,IAAI,EAAE,CAAC;QACzC,IAAI,KAAK,CAAC,MAAM,GAAG,CAAC,IAAI,KAAK,CAAC,MAAM,GAAG,GAAG;YAAE,OAAO,KAAK,CAAC;IAC3D,CAAC;IAED,6CAA6C;IAC7C,MAAM,IAAI,GAAG,KAAK,CAAC,WAAW,CAAC,IAAI,EAAE,CAAC,OAAO,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC;IAC3D,IAAI,CAAC,IAAI,IAAI,IAAI,CAAC,MAAM,IAAI,CAAC,IAAI,IAAI,CAAC,MAAM,IAAI,GAAG;QAAE,OAAO,IAAI,CAAC;IAEjE,4DAA4D;IAC5D,MAAM,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC,2BAA2B,CAAC,CAAC;IACtD,IAAI,KAAK,CAAC,MAAM,GAAG,CAAC,IAAI,KAAK,CAAC,CAAC,CAAC;QAAE,OAAO,KAAK,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;IAEzD,MAAM,SAAS,GAAG,IAAI;SACnB,KAAK,CAAC,OAAO,CAAC;SACd,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,MAAM,GAAG,CAAC,CAAC;QACjC,EAAE,IAAI,EAAE,CAAC;IACX,OAAO,SAAS,IAAI,IAAI,CAAC;AAC3B,CAAC;AAED;;GAEG;AACH,SAAS,sBAAsB,CAAC,IAAa;IAC3C,MAAM,KAAK,GAAG,YAAY,CAAC,IAAI,CAAC,CAAC;IAEjC,MAAM,MAAM,GAAG,KAAK,CAAC,aAAa,CAChC,6CAA6C,CAC9C,CAAC;IACF,IAAI,MAAM,EAAE,CAAC;QACX,MAAM,IAAI,GAAG,MAAM,CAAC,WAAW,CAAC,IAAI,EAAE,CAAC;QACvC,IAAI,IAAI,CAAC,MAAM,GAAG,CAAC,IAAI,IAAI,CAAC,MAAM,GAAG,GAAG;YAAE,OAAO,IAAI,CAAC;IACxD,CAAC;IAED,MAAM,IAAI,GAAG,KAAK,CAAC,WAAW,CAAC,IAAI,EAAE,CAAC,OAAO,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC;IAC3D,IAAI,CAAC,IAAI;QAAE,OAAO,IAAI,CAAC;IAEvB,MAAM,SAAS,GAAG,4BAA4B,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAC1D,IAAI,SAAS,IAAI,SAAS,CAAC,CAAC,CAAC,CAAC,MAAM,GAAG,EAAE;QAAE,OAAO,SAAS,CAAC,CAAC,CAAC,CAAC;IAE/D,OAAO,IAAI,CAAC;AACd,CAAC;AAED;;;GAGG;AACH,SAAS,kBAAkB,CACzB,QAAkB,EAClB,IAAY,EACZ,KAAa,EACb,WAA2B;IAE3B,MAAM,EAAE,GAAG,QAAQ,CAAC,aAAa,CAAC,IAAI,CAAC,CAAC;IACxC,MAAM,IAAI,GAAG,QAAQ,CAAC,aAAa,CAAC,GAAG,CAAC,CAAC;IACzC,IAAI,CAAC,YAAY,CAAC,MAAM,EAAE,IAAI,CAAC,CAAC;IAChC,IAAI,CAAC,WAAW,GAAG,KAAK,CAAC;IACzB,EAAE,CAAC,WAAW,CAAC,IAAI,CAAC,CAAC;IAErB,IAAI,WAAW,IAAI,WAAW,KAAK,KAAK,IAAI,CAAC,KAAK,CAAC,QAAQ,CAAC,WAAW,CAAC,EAAE,CAAC;QACzE,EAAE,CAAC,WAAW,CAAC,QAAQ,CAAC,cAAc,CAAC,MAAM,WAAW,EAAE,CAAC,CAAC,CAAC;IAC/D,CAAC;IAED,OAAO,EAAE,CAAC;AACZ,CAAC;AAED;;GAEG;AACH,SAAS,kBAAkB,CAAC,QAAkB;IAC5C,MAAM,WAAW,GAAG,QAAQ,CAAC,gBAAgB,CAAC,yBAAyB,CAAC,CAAC;IACzE,IAAI,WAAW,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO;IAErC,MAAM,IAAI,GAAG,QAAQ,CAAC,aAAa,CAAC,IAAI,CAAC,CAAC;IAC1C,IAAI,CAAC,YAAY,CAAC,sBAAsB,EAAE,MAAM,CAAC,CAAC;IAElD,KAAK,MAAM,IAAI,IAAI,WAAW,EAAE,CAAC;QAC/B,MAAM,IAAI,GAAG,IAAI,CAAC,YAAY,CAAC,MAAM,CAAC,CAAC;QACvC,MAAM,KAAK,GAAG,IAAI,CAAC,YAAY,CAAC,OAAO,CAAC,IAAI,IAAI,CAAC,WAAW,CAAC,IAAI,EAAE,CAAC;QAEpE,IAAI,IAAI,IAAI,KAAK,EAAE,CAAC;YAClB,MAAM,IAAI,GAAG,IAAI,CAAC,aAAa,CAAC,GAAG,CAAC,EAAE,WAAW,CAAC,IAAI,EAAE,CAAC;YACzD,IAAI,CAAC,WAAW,CAAC,kBAAkB,CAAC,QAAQ,EAAE,IAAI,EAAE,KAAK,EAAE,IAAI,CAAC,CAAC,CAAC;QACpE,CAAC;IACH,CAAC;IAED,IAAI,IAAI,CAAC,QAAQ,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QAC7B,MAAM,SAAS,GAAG,WAAW,CAAC,CAAC,CAAC,CAAC;QACjC,SAAS,EAAE,UAAU,EAAE,YAAY,CAAC,IAAI,EAAE,SAAS,CAAC,CAAC;QACrD,WAAW,CAAC,OAAO,CAAC,CAAC,IAAI,EAAE,EAAE;YAC3B,IAAI,CAAC,MAAM,EAAE,CAAC;QAChB,CAAC,CAAC,CAAC;IACL,CAAC;AACH,CAAC;AAED;;;GAGG;AACH,SAAS,gBAAgB,CAAC,QAAkB;IAC1C,+EAA+E;IAC/E,KAAK,MAAM,GAAG,IAAI,QAAQ,CAAC,gBAAgB,CAAC,KAAK,CAAC,EAAE,CAAC;QACnD,6FAA6F;QAC7F,MAAM,UAAU,GAAG,GAAG,CAAC,gBAAgB,CAAC,kBAAkB,CAAC,CAAC;QAE5D,IAAI,UAAU,CAAC,MAAM,GAAG,CAAC;YAAE,SAAS;QAEpC,MAAM,cAAc,GAAG,KAAK,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC,KAAK,CAAC,CAAC,IAAI,EAAE,EAAE;YAC3D,MAAM,oBAAoB,GAAG,IAAI,CAAC,aAAa,CAAC,mBAAmB,CAAC,CAAC;YACrE,MAAM,iBAAiB,GAAG,IAAI,CAAC,WAAW,CAAC,IAAI,EAAE,CAAC,MAAM,GAAG,CAAC,CAAC;YAC7D,OAAO,oBAAoB,IAAI,iBAAiB,CAAC;QACnD,CAAC,CAAC,CAAC;QAEH,IAAI,CAAC,cAAc;YAAE,SAAS;QAE9B,MAAM,OAAO,GAAG,QAAQ,CAAC,aAAa,CAAC,KAAK,CAAC,CAAC;QAC9C,OAAO,CAAC,YAAY,CAAC,sBAAsB,EAAE,MAAM,CAAC,CAAC;QACrD,MAAM,IAAI,GAAG,QAAQ,CAAC,aAAa,CAAC,IAAI,CAAC,CAAC;QAE1C,KAAK,MAAM,IAAI,IAAI,UAAU,EAAE,CAAC;YAC9B,MAAM,IAAI,GAAG,IAAI,CAAC,YAAY,CAAC,MAAM,CAAC,CAAC;YACvC,MAAM,KAAK,GAAG,gBAAgB,CAAC,IAAI,CAAC,CAAC;YACrC,MAAM,IAAI,GAAG,sBAAsB,CAAC,IAAI,CAAC,CAAC;YAE1C,IAAI,IAAI,IAAI,KAAK,EAAE,CAAC;gBAClB,IAAI,CAAC,WAAW,CAAC,kBAAkB,CAAC,QAAQ,EAAE,IAAI,EAAE,KAAK,EAAE,IAAI,CAAC,CAAC,CAAC;YACpE,CAAC;QACH,CAAC;QAED,IAAI,IAAI,CAAC,QAAQ,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YAC7B,OAAO,CAAC,WAAW,CAAC,IAAI,CAAC,CAAC;YAC1B,GAAG,CAAC,UAAU,EAAE,YAAY,CAAC,OAAO,EAAE,GAAG,CAAC,CAAC;QAC7C,CAAC;IACH,CAAC;AACH,CAAC;AAED;;GAEG;AACH,SAAS,oBAAoB,CAAC,QAAkB;IAC9C,MAAM,aAAa,GAAG;QACpB,uBAAuB;QACvB,sBAAsB;QACtB,kBAAkB;QAClB,cAAc;QACd,sBAAsB;QACtB,yBAAyB;KAC1B,CAAC;IAEF,KAAK,MAAM,QAAQ,IAAI,aAAa,EAAE,CAAC;QACrC,IAAI,CAAC;YACH,KAAK,MAAM,SAAS,IAAI,QAAQ,CAAC,gBAAgB,CAAC,QAAQ,CAAC,EAAE,CAAC;gBAC5D,MAAM,KAAK,GAAG,SAAS,CAAC,gBAAgB,CAAC,SAAS,CAAC,CAAC;gBACpD,IAAI,KAAK,CAAC,MAAM,KAAK,CAAC;oBAAE,SAAS;gBAEjC,MAAM,IAAI,GAAG,QAAQ,CAAC,aAAa,CAAC,IAAI,CAAC,CAAC;gBAC1C,IAAI,CAAC,YAAY,CAAC,sBAAsB,EAAE,MAAM,CAAC,CAAC;gBAElD,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;oBACzB,MAAM,IAAI,GAAG,IAAI,CAAC,YAAY,CAAC,MAAM,CAAC,CAAC;oBACvC,MAAM,KAAK,GAAG,gBAAgB,CAAC,IAAI,CAAC,CAAC;oBAErC,IAAI,IAAI,IAAI,KAAK,EAAE,CAAC;wBAClB,IAAI,CAAC,WAAW,CAAC,kBAAkB,CAAC,QAAQ,EAAE,IAAI,EAAE,KAAK,CAAC,CAAC,CAAC;oBAC9D,CAAC;gBACH,CAAC;gBAED,IAAI,IAAI,CAAC,QAAQ,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;oBAC7B,SAAS,CAAC,UAAU,EAAE,YAAY,CAAC,IAAI,EAAE,SAAS,CAAC,CAAC;gBACtD,CAAC;YACH,CAAC;QACH,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,qCAAqC;YACrC,QAAQ,CAAC,gDAAgD,EAAE;gBACzD,QAAQ;gBACR,KAAK,EAAE,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,SAAS;aAC1D,CAAC,CAAC;QACL,CAAC;IACH,CAAC;AACH,CAAC;AAED;;;GAGG;AACH,MAAM,UAAU,iBAAiB,CAAC,QAAkB;IAClD,kBAAkB,CAAC,QAAQ,CAAC,CAAC;IAC7B,gBAAgB,CAAC,QAAQ,CAAC,CAAC;IAC3B,oBAAoB,CAAC,QAAQ,CAAC,CAAC;AACjC,CAAC"}
|
|
@@ -1,4 +1,10 @@
|
|
|
1
|
-
import type {
|
|
1
|
+
import type { CheerioAPI } from 'cheerio';
|
|
2
|
+
import type { ExtractedMetadata, ExtractionResult } from '../config/types.js';
|
|
3
|
+
/**
|
|
4
|
+
* Extract metadata using Cheerio (fast, no full DOM)
|
|
5
|
+
* This avoids JSDOM overhead for simple meta tag extraction
|
|
6
|
+
*/
|
|
7
|
+
export declare function extractMetadataWithCheerio($: CheerioAPI): ExtractedMetadata;
|
|
2
8
|
/**
|
|
3
9
|
* Main extraction function - uses Cheerio for metadata (fast)
|
|
4
10
|
* and lazy-loads JSDOM only when article extraction is needed
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"extractor.d.ts","sourceRoot":"","sources":["../../src/services/extractor.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"extractor.d.ts","sourceRoot":"","sources":["../../src/services/extractor.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,UAAU,EAAE,MAAM,SAAS,CAAC;AAM1C,OAAO,KAAK,EAEV,iBAAiB,EACjB,gBAAgB,EACjB,MAAM,oBAAoB,CAAC;AAc5B;;;GAGG;AACH,wBAAgB,0BAA0B,CAAC,CAAC,EAAE,UAAU,GAAG,iBAAiB,CA4B3E;AAsCD;;;GAGG;AACH,wBAAgB,cAAc,CAC5B,IAAI,EAAE,MAAM,EACZ,GAAG,EAAE,MAAM,EACX,OAAO,GAAE;IAAE,cAAc,CAAC,EAAE,OAAO,CAAA;CAA6B,GAC/D,gBAAgB,CAsClB"}
|
|
@@ -1,14 +1,22 @@
|
|
|
1
1
|
import * as cheerio from 'cheerio';
|
|
2
2
|
import { JSDOM, VirtualConsole } from 'jsdom';
|
|
3
3
|
import { Readability } from '@mozilla/readability';
|
|
4
|
+
import { config } from '../config/index.js';
|
|
4
5
|
import { preserveCardLinks } from './card-extractor.js';
|
|
5
6
|
import { logError, logWarn } from './logger.js';
|
|
6
|
-
|
|
7
|
+
// Shared VirtualConsole to suppress JSDOM warnings/errors
|
|
8
|
+
const sharedVirtualConsole = new VirtualConsole();
|
|
9
|
+
sharedVirtualConsole.on('error', () => {
|
|
10
|
+
/* suppress JSDOM errors */
|
|
11
|
+
});
|
|
12
|
+
sharedVirtualConsole.on('warn', () => {
|
|
13
|
+
/* suppress JSDOM warnings */
|
|
14
|
+
});
|
|
7
15
|
/**
|
|
8
16
|
* Extract metadata using Cheerio (fast, no full DOM)
|
|
9
17
|
* This avoids JSDOM overhead for simple meta tag extraction
|
|
10
18
|
*/
|
|
11
|
-
function extractMetadataWithCheerio($) {
|
|
19
|
+
export function extractMetadataWithCheerio($) {
|
|
12
20
|
const getMetaContent = (selectors) => {
|
|
13
21
|
for (const selector of selectors) {
|
|
14
22
|
const content = $(selector).attr('content');
|
|
@@ -39,10 +47,9 @@ function extractMetadataWithCheerio($) {
|
|
|
39
47
|
*/
|
|
40
48
|
function extractArticleWithJsdom(html, url) {
|
|
41
49
|
try {
|
|
42
|
-
//
|
|
43
|
-
const
|
|
44
|
-
const
|
|
45
|
-
const document = dom.window.document;
|
|
50
|
+
// Use shared VirtualConsole to reduce per-parse overhead
|
|
51
|
+
const dom = new JSDOM(html, { url, virtualConsole: sharedVirtualConsole });
|
|
52
|
+
const { document } = dom.window;
|
|
46
53
|
preserveCardLinks(document);
|
|
47
54
|
const reader = new Readability(document);
|
|
48
55
|
const article = reader.parse();
|
|
@@ -76,12 +83,12 @@ export function extractContent(html, url, options = { extractArticle: true }) {
|
|
|
76
83
|
return { article: null, metadata: {} };
|
|
77
84
|
}
|
|
78
85
|
let processedHtml = html;
|
|
79
|
-
if (html.length >
|
|
86
|
+
if (html.length > config.constants.maxHtmlSize) {
|
|
80
87
|
logWarn('HTML content exceeds maximum size for extraction, truncating', {
|
|
81
88
|
size: html.length,
|
|
82
|
-
maxSize:
|
|
89
|
+
maxSize: config.constants.maxHtmlSize,
|
|
83
90
|
});
|
|
84
|
-
processedHtml = html.substring(0,
|
|
91
|
+
processedHtml = html.substring(0, config.constants.maxHtmlSize);
|
|
85
92
|
}
|
|
86
93
|
try {
|
|
87
94
|
// Fast path: Extract metadata with Cheerio (no full DOM parsing)
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"extractor.js","sourceRoot":"","sources":["../../src/services/extractor.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,OAAO,MAAM,SAAS,CAAC;AAEnC,OAAO,EAAE,KAAK,EAAE,cAAc,EAAE,MAAM,OAAO,CAAC;AAE9C,OAAO,EAAE,WAAW,EAAE,MAAM,sBAAsB,CAAC;
|
|
1
|
+
{"version":3,"file":"extractor.js","sourceRoot":"","sources":["../../src/services/extractor.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,OAAO,MAAM,SAAS,CAAC;AAEnC,OAAO,EAAE,KAAK,EAAE,cAAc,EAAE,MAAM,OAAO,CAAC;AAE9C,OAAO,EAAE,WAAW,EAAE,MAAM,sBAAsB,CAAC;AAEnD,OAAO,EAAE,MAAM,EAAE,MAAM,oBAAoB,CAAC;AAO5C,OAAO,EAAE,iBAAiB,EAAE,MAAM,qBAAqB,CAAC;AACxD,OAAO,EAAE,QAAQ,EAAE,OAAO,EAAE,MAAM,aAAa,CAAC;AAEhD,0DAA0D;AAC1D,MAAM,oBAAoB,GAAG,IAAI,cAAc,EAAE,CAAC;AAClD,oBAAoB,CAAC,EAAE,CAAC,OAAO,EAAE,GAAG,EAAE;IACpC,2BAA2B;AAC7B,CAAC,CAAC,CAAC;AACH,oBAAoB,CAAC,EAAE,CAAC,MAAM,EAAE,GAAG,EAAE;IACnC,6BAA6B;AAC/B,CAAC,CAAC,CAAC;AAEH;;;GAGG;AACH,MAAM,UAAU,0BAA0B,CAAC,CAAa;IACtD,MAAM,cAAc,GAAG,CAAC,SAAmB,EAAsB,EAAE;QACjE,KAAK,MAAM,QAAQ,IAAI,SAAS,EAAE,CAAC;YACjC,MAAM,OAAO,GAAG,CAAC,CAAC,QAAQ,CAAC,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;YAC5C,IAAI,OAAO;gBAAE,OAAO,OAAO,CAAC;QAC9B,CAAC;QACD,OAAO,SAAS,CAAC;IACnB,CAAC,CAAC;IAEF,MAAM,KAAK,GACT,cAAc,CAAC;QACb,2BAA2B;QAC3B,4BAA4B;KAC7B,CAAC;QACF,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,IAAI,EAAE,IAAI,SAAS,CAAC,CAAC;IAEnC,MAAM,WAAW,GAAG,cAAc,CAAC;QACjC,iCAAiC;QACjC,kCAAkC;QAClC,0BAA0B;KAC3B,CAAC,CAAC;IAEH,MAAM,MAAM,GAAG,cAAc,CAAC;QAC5B,qBAAqB;QACrB,iCAAiC;KAClC,CAAC,CAAC;IAEH,OAAO,EAAE,KAAK,EAAE,WAAW,EAAE,MAAM,EAAE,CAAC;AACxC,CAAC;AAED;;;GAGG;AACH,SAAS,uBAAuB,CAC9B,IAAY,EACZ,GAAW;IAEX,IAAI,CAAC;QACH,yDAAyD;QACzD,MAAM,GAAG,GAAG,IAAI,KAAK,CAAC,IAAI,EAAE,EAAE,GAAG,EAAE,cAAc,EAAE,oBAAoB,EAAE,CAAC,CAAC;QAC3E,MAAM,EAAE,QAAQ,EAAE,GAAG,GAAG,CAAC,MAAM,CAAC;QAEhC,iBAAiB,CAAC,QAAQ,CAAC,CAAC;QAC5B,MAAM,MAAM,GAAG,IAAI,WAAW,CAAC,QAAQ,CAAC,CAAC;QACzC,MAAM,OAAO,GAAG,MAAM,CAAC,KAAK,EAAE,CAAC;QAE/B,IAAI,CAAC,OAAO;YAAE,OAAO,IAAI,CAAC;QAE1B,OAAO;YACL,KAAK,EAAE,OAAO,CAAC,KAAK,IAAI,SAAS;YACjC,MAAM,EAAE,OAAO,CAAC,MAAM,IAAI,SAAS;YACnC,OAAO,EAAE,OAAO,CAAC,OAAO,IAAI,EAAE;YAC9B,WAAW,EAAE,OAAO,CAAC,WAAW,IAAI,EAAE;YACtC,OAAO,EAAE,OAAO,CAAC,OAAO,IAAI,SAAS;YACrC,QAAQ,EAAE,OAAO,CAAC,QAAQ,IAAI,SAAS;SACxC,CAAC;IACJ,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,QAAQ,CACN,sCAAsC,EACtC,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,SAAS,CAC3C,CAAC;QACF,OAAO,IAAI,CAAC;IACd,CAAC;AACH,CAAC;AAED;;;GAGG;AACH,MAAM,UAAU,cAAc,CAC5B,IAAY,EACZ,GAAW,EACX,UAAwC,EAAE,cAAc,EAAE,IAAI,EAAE;IAEhE,IAAI,CAAC,IAAI,IAAI,OAAO,IAAI,KAAK,QAAQ,EAAE,CAAC;QACtC,OAAO,CAAC,+CAA+C,CAAC,CAAC;QACzD,OAAO,EAAE,OAAO,EAAE,IAAI,EAAE,QAAQ,EAAE,EAAE,EAAE,CAAC;IACzC,CAAC;IAED,IAAI,CAAC,GAAG,IAAI,OAAO,GAAG,KAAK,QAAQ,EAAE,CAAC;QACpC,OAAO,CAAC,wCAAwC,CAAC,CAAC;QAClD,OAAO,EAAE,OAAO,EAAE,IAAI,EAAE,QAAQ,EAAE,EAAE,EAAE,CAAC;IACzC,CAAC;IAED,IAAI,aAAa,GAAG,IAAI,CAAC;IACzB,IAAI,IAAI,CAAC,MAAM,GAAG,MAAM,CAAC,SAAS,CAAC,WAAW,EAAE,CAAC;QAC/C,OAAO,CAAC,8DAA8D,EAAE;YACtE,IAAI,EAAE,IAAI,CAAC,MAAM;YACjB,OAAO,EAAE,MAAM,CAAC,SAAS,CAAC,WAAW;SACtC,CAAC,CAAC;QACH,aAAa,GAAG,IAAI,CAAC,SAAS,CAAC,CAAC,EAAE,MAAM,CAAC,SAAS,CAAC,WAAW,CAAC,CAAC;IAClE,CAAC;IAED,IAAI,CAAC;QACH,iEAAiE;QACjE,MAAM,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC,aAAa,CAAC,CAAC;QACtC,MAAM,QAAQ,GAAG,0BAA0B,CAAC,CAAC,CAAC,CAAC;QAE/C,iEAAiE;QACjE,MAAM,OAAO,GAAG,OAAO,CAAC,cAAc;YACpC,CAAC,CAAC,uBAAuB,CAAC,aAAa,EAAE,GAAG,CAAC;YAC7C,CAAC,CAAC,IAAI,CAAC;QAET,OAAO,EAAE,OAAO,EAAE,QAAQ,EAAE,CAAC;IAC/B,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,QAAQ,CACN,2BAA2B,EAC3B,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,SAAS,CAC3C,CAAC;QACF,OAAO,EAAE,OAAO,EAAE,IAAI,EAAE,QAAQ,EAAE,EAAE,EAAE,CAAC;IACzC,CAAC;AACH,CAAC"}
|
|
@@ -1,5 +1,14 @@
|
|
|
1
|
+
/** Options for fetch operations */
|
|
2
|
+
export interface FetchOptions {
|
|
3
|
+
/** Custom HTTP headers to include in the request */
|
|
4
|
+
customHeaders?: Record<string, string>;
|
|
5
|
+
/** AbortSignal for request cancellation */
|
|
6
|
+
signal?: AbortSignal;
|
|
7
|
+
/** Per-request timeout override in milliseconds */
|
|
8
|
+
timeout?: number;
|
|
9
|
+
}
|
|
1
10
|
export declare function destroyAgents(): void;
|
|
2
|
-
export declare function fetchUrlWithRetry(url: string,
|
|
11
|
+
export declare function fetchUrlWithRetry(url: string, options?: FetchOptions, maxRetries?: number, skipCache?: boolean): Promise<{
|
|
3
12
|
html: string;
|
|
4
13
|
fromHtmlCache: boolean;
|
|
5
14
|
}>;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"fetcher.d.ts","sourceRoot":"","sources":["../../src/services/fetcher.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"fetcher.d.ts","sourceRoot":"","sources":["../../src/services/fetcher.ts"],"names":[],"mappings":"AAwBA,mCAAmC;AACnC,MAAM,WAAW,YAAY;IAC3B,oDAAoD;IACpD,aAAa,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IACvC,2CAA2C;IAC3C,MAAM,CAAC,EAAE,WAAW,CAAC;IACrB,mDAAmD;IACnD,OAAO,CAAC,EAAE,MAAM,CAAC;CAClB;AAwED,wBAAgB,aAAa,IAAI,IAAI,CAGpC;AA+MD,wBAAsB,iBAAiB,CACrC,GAAG,EAAE,MAAM,EACX,OAAO,CAAC,EAAE,YAAY,EACtB,UAAU,SAAI,EACd,SAAS,UAAQ,GAChB,OAAO,CAAC;IAAE,IAAI,EAAE,MAAM,CAAC;IAAC,aAAa,EAAE,OAAO,CAAA;CAAE,CAAC,CAoEnD"}
|