@j0hanz/superfetch 2.0.0 → 2.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (150) hide show
  1. package/README.md +139 -46
  2. package/dist/cache.d.ts +42 -0
  3. package/dist/cache.js +565 -0
  4. package/dist/config/env-parsers.d.ts +1 -0
  5. package/dist/config/env-parsers.js +12 -0
  6. package/dist/config/index.d.ts +7 -0
  7. package/dist/config/index.js +20 -8
  8. package/dist/config/types/content.d.ts +1 -0
  9. package/dist/config.d.ts +77 -0
  10. package/dist/config.js +261 -0
  11. package/dist/crypto.d.ts +2 -0
  12. package/dist/crypto.js +32 -0
  13. package/dist/errors.d.ts +10 -0
  14. package/dist/errors.js +28 -0
  15. package/dist/fetch.d.ts +40 -0
  16. package/dist/fetch.js +910 -0
  17. package/dist/http/auth.js +161 -2
  18. package/dist/http/base-middleware.d.ts +7 -0
  19. package/dist/http/base-middleware.js +143 -0
  20. package/dist/http/cors.d.ts +0 -5
  21. package/dist/http/cors.js +0 -6
  22. package/dist/http/download-routes.js +6 -2
  23. package/dist/http/error-handler.d.ts +2 -0
  24. package/dist/http/error-handler.js +55 -0
  25. package/dist/http/host-allowlist.d.ts +3 -0
  26. package/dist/http/host-allowlist.js +117 -0
  27. package/dist/http/mcp-routes.d.ts +8 -2
  28. package/dist/http/mcp-routes.js +101 -8
  29. package/dist/http/mcp-session-eviction.d.ts +3 -0
  30. package/dist/http/mcp-session-eviction.js +24 -0
  31. package/dist/http/mcp-session-init.d.ts +7 -0
  32. package/dist/http/mcp-session-init.js +94 -0
  33. package/dist/http/mcp-session-slots.d.ts +17 -0
  34. package/dist/http/mcp-session-slots.js +55 -0
  35. package/dist/http/mcp-session-transport-init.d.ts +7 -0
  36. package/dist/http/mcp-session-transport-init.js +41 -0
  37. package/dist/http/mcp-session-types.d.ts +5 -0
  38. package/dist/http/mcp-session-types.js +1 -0
  39. package/dist/http/mcp-session.d.ts +9 -9
  40. package/dist/http/mcp-session.js +5 -114
  41. package/dist/http/mcp-sessions.d.ts +41 -0
  42. package/dist/http/mcp-sessions.js +392 -0
  43. package/dist/http/rate-limit.js +2 -2
  44. package/dist/http/server-middleware.d.ts +6 -1
  45. package/dist/http/server-middleware.js +3 -117
  46. package/dist/http/server-shutdown.js +1 -1
  47. package/dist/http/server-tuning.d.ts +9 -0
  48. package/dist/http/server-tuning.js +45 -0
  49. package/dist/http/server.js +206 -9
  50. package/dist/http/session-cleanup.js +8 -5
  51. package/dist/http.d.ts +78 -0
  52. package/dist/http.js +1437 -0
  53. package/dist/index.js +3 -3
  54. package/dist/mcp.d.ts +3 -0
  55. package/dist/mcp.js +94 -0
  56. package/dist/middleware/error-handler.d.ts +1 -1
  57. package/dist/middleware/error-handler.js +31 -30
  58. package/dist/observability.d.ts +16 -0
  59. package/dist/observability.js +78 -0
  60. package/dist/resources/cached-content-params.d.ts +5 -0
  61. package/dist/resources/cached-content-params.js +36 -0
  62. package/dist/resources/cached-content.js +33 -33
  63. package/dist/server.js +21 -6
  64. package/dist/services/cache-events.d.ts +8 -0
  65. package/dist/services/cache-events.js +19 -0
  66. package/dist/services/cache.d.ts +5 -4
  67. package/dist/services/cache.js +49 -45
  68. package/dist/services/context.d.ts +2 -0
  69. package/dist/services/context.js +3 -0
  70. package/dist/services/extractor.d.ts +1 -0
  71. package/dist/services/extractor.js +77 -40
  72. package/dist/services/fetcher/agents.js +1 -1
  73. package/dist/services/fetcher/dns-selection.js +1 -1
  74. package/dist/services/fetcher/interceptors.js +29 -60
  75. package/dist/services/fetcher/redirects.js +12 -4
  76. package/dist/services/fetcher/response.js +18 -8
  77. package/dist/services/fetcher.d.ts +23 -0
  78. package/dist/services/fetcher.js +553 -13
  79. package/dist/services/logger.js +4 -1
  80. package/dist/services/telemetry.d.ts +19 -0
  81. package/dist/services/telemetry.js +43 -0
  82. package/dist/services/transform-worker-pool.d.ts +10 -3
  83. package/dist/services/transform-worker-pool.js +213 -184
  84. package/dist/tools/handlers/fetch-single.shared.d.ts +11 -3
  85. package/dist/tools/handlers/fetch-single.shared.js +131 -2
  86. package/dist/tools/handlers/fetch-url.tool.d.ts +6 -0
  87. package/dist/tools/handlers/fetch-url.tool.js +56 -12
  88. package/dist/tools/index.d.ts +1 -0
  89. package/dist/tools/index.js +13 -1
  90. package/dist/tools/schemas.d.ts +2 -0
  91. package/dist/tools/schemas.js +8 -0
  92. package/dist/tools/utils/content-shaping.js +19 -4
  93. package/dist/tools/utils/content-transform-core.d.ts +5 -0
  94. package/dist/tools/utils/content-transform-core.js +180 -0
  95. package/dist/tools/utils/content-transform-workers.d.ts +1 -0
  96. package/dist/tools/utils/content-transform-workers.js +1 -0
  97. package/dist/tools/utils/content-transform.d.ts +2 -1
  98. package/dist/tools/utils/content-transform.js +37 -136
  99. package/dist/tools/utils/fetch-pipeline.js +47 -56
  100. package/dist/tools/utils/frontmatter.d.ts +3 -0
  101. package/dist/tools/utils/frontmatter.js +73 -0
  102. package/dist/tools/utils/markdown-heuristics.d.ts +1 -0
  103. package/dist/tools/utils/markdown-heuristics.js +19 -0
  104. package/dist/tools/utils/markdown-signals.d.ts +1 -0
  105. package/dist/tools/utils/markdown-signals.js +19 -0
  106. package/dist/tools/utils/raw-markdown-frontmatter.d.ts +3 -0
  107. package/dist/tools/utils/raw-markdown-frontmatter.js +73 -0
  108. package/dist/tools/utils/raw-markdown.d.ts +6 -0
  109. package/dist/tools/utils/raw-markdown.js +149 -0
  110. package/dist/tools.d.ts +104 -0
  111. package/dist/tools.js +421 -0
  112. package/dist/transform.d.ts +69 -0
  113. package/dist/transform.js +1509 -0
  114. package/dist/transformers/markdown/fenced-code-rule.d.ts +2 -0
  115. package/dist/transformers/markdown/fenced-code-rule.js +38 -0
  116. package/dist/transformers/markdown/frontmatter.d.ts +2 -0
  117. package/dist/transformers/markdown/frontmatter.js +45 -0
  118. package/dist/transformers/markdown/noise-rule.d.ts +2 -0
  119. package/dist/transformers/markdown/noise-rule.js +80 -0
  120. package/dist/transformers/markdown/turndown-instance.d.ts +2 -0
  121. package/dist/transformers/markdown/turndown-instance.js +19 -0
  122. package/dist/transformers/markdown.d.ts +5 -0
  123. package/dist/transformers/markdown.js +314 -0
  124. package/dist/transformers/markdown.transformer.js +2 -189
  125. package/dist/utils/cancellation.d.ts +1 -0
  126. package/dist/utils/cancellation.js +18 -0
  127. package/dist/utils/code-language-bash.d.ts +1 -0
  128. package/dist/utils/code-language-bash.js +48 -0
  129. package/dist/utils/code-language-core.d.ts +2 -0
  130. package/dist/utils/code-language-core.js +13 -0
  131. package/dist/utils/code-language-detectors.d.ts +5 -0
  132. package/dist/utils/code-language-detectors.js +142 -0
  133. package/dist/utils/code-language-helpers.d.ts +5 -0
  134. package/dist/utils/code-language-helpers.js +62 -0
  135. package/dist/utils/code-language-parsing.d.ts +5 -0
  136. package/dist/utils/code-language-parsing.js +62 -0
  137. package/dist/utils/code-language.js +250 -46
  138. package/dist/utils/error-details.d.ts +3 -0
  139. package/dist/utils/error-details.js +12 -0
  140. package/dist/utils/filename-generator.js +14 -3
  141. package/dist/utils/host-normalizer.d.ts +1 -0
  142. package/dist/utils/host-normalizer.js +37 -0
  143. package/dist/utils/ip-address.d.ts +4 -0
  144. package/dist/utils/ip-address.js +6 -0
  145. package/dist/utils/tool-error-handler.js +12 -17
  146. package/dist/utils/url-redactor.d.ts +1 -0
  147. package/dist/utils/url-redactor.js +13 -0
  148. package/dist/utils/url-validator.js +35 -20
  149. package/dist/workers/transform-worker.js +82 -38
  150. package/package.json +13 -10
@@ -1,50 +1,94 @@
1
1
  import { parentPort } from 'node:worker_threads';
2
- import { transformHtmlToJsonl, transformHtmlToMarkdown, transformHtmlToMarkdownWithBlocks, } from '../tools/utils/content-transform.js';
3
- function isTransformJob(value) {
4
- if (!value || typeof value !== 'object')
5
- return false;
6
- const record = value;
7
- return (typeof record.id === 'number' &&
8
- typeof record.mode === 'string' &&
9
- typeof record.html === 'string' &&
10
- typeof record.url === 'string');
2
+ import { FetchError, getErrorMessage } from '../errors.js';
3
+ import { transformHtmlToMarkdownInProcess } from '../transform.js';
4
+ const controllers = new Map();
5
+ function isRecord(value) {
6
+ return typeof value === 'object' && value !== null;
11
7
  }
12
- function resolveTransform(job) {
13
- if (job.mode === 'markdown') {
14
- return transformHtmlToMarkdown(job.html, job.url, job.options);
8
+ function post(message) {
9
+ parentPort?.postMessage(message);
10
+ }
11
+ function handleTransform(message) {
12
+ const controller = new AbortController();
13
+ controllers.set(message.id, controller);
14
+ try {
15
+ const result = transformHtmlToMarkdownInProcess(message.html, message.url, {
16
+ includeMetadata: message.includeMetadata,
17
+ signal: controller.signal,
18
+ });
19
+ post({
20
+ type: 'result',
21
+ id: message.id,
22
+ result: {
23
+ markdown: result.markdown,
24
+ ...(result.title === undefined ? {} : { title: result.title }),
25
+ truncated: result.truncated,
26
+ },
27
+ });
15
28
  }
16
- if (job.mode === 'markdown-blocks') {
17
- return transformHtmlToMarkdownWithBlocks(job.html, job.url, {
18
- ...job.options,
19
- includeContentBlocks: job.options.includeContentBlocks ?? true,
29
+ catch (error) {
30
+ if (error instanceof FetchError) {
31
+ post({
32
+ type: 'error',
33
+ id: message.id,
34
+ error: {
35
+ name: error.name,
36
+ message: error.message,
37
+ url: error.url,
38
+ statusCode: error.statusCode,
39
+ details: { ...error.details },
40
+ },
41
+ });
42
+ return;
43
+ }
44
+ post({
45
+ type: 'error',
46
+ id: message.id,
47
+ error: {
48
+ name: error instanceof Error ? error.name : 'Error',
49
+ message: getErrorMessage(error),
50
+ url: message.url,
51
+ },
20
52
  });
21
53
  }
22
- return transformHtmlToJsonl(job.html, job.url, job.options);
54
+ finally {
55
+ controllers.delete(message.id);
56
+ }
23
57
  }
24
- function sendResponse(response) {
25
- if (!parentPort)
58
+ function handleCancel(message) {
59
+ const controller = controllers.get(message.id);
60
+ if (!controller)
26
61
  return;
27
- parentPort.postMessage(response);
62
+ controller.abort(new Error('Canceled'));
28
63
  }
29
- function handleMessage(message) {
30
- if (!isTransformJob(message)) {
31
- sendResponse({
32
- id: -1,
33
- ok: false,
34
- error: 'Invalid transform job payload',
35
- });
64
+ if (!parentPort) {
65
+ throw new Error('transform-worker started without parentPort');
66
+ }
67
+ parentPort.on('message', (raw) => {
68
+ if (!isRecord(raw))
69
+ return;
70
+ const { type } = raw;
71
+ if (type === 'cancel') {
72
+ if (typeof raw.id !== 'string')
73
+ return;
74
+ handleCancel({ type: 'cancel', id: raw.id });
36
75
  return;
37
76
  }
38
- try {
39
- const result = resolveTransform(message);
40
- sendResponse({ id: message.id, ok: true, result });
41
- }
42
- catch (error) {
43
- sendResponse({
44
- id: message.id,
45
- ok: false,
46
- error: error instanceof Error ? error.message : String(error),
77
+ if (type === 'transform') {
78
+ if (typeof raw.id !== 'string')
79
+ return;
80
+ if (typeof raw.html !== 'string')
81
+ return;
82
+ if (typeof raw.url !== 'string')
83
+ return;
84
+ if (typeof raw.includeMetadata !== 'boolean')
85
+ return;
86
+ handleTransform({
87
+ type: 'transform',
88
+ id: raw.id,
89
+ html: raw.html,
90
+ url: raw.url,
91
+ includeMetadata: raw.includeMetadata,
47
92
  });
48
93
  }
49
- }
50
- parentPort?.on('message', handleMessage);
94
+ });
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@j0hanz/superfetch",
3
- "version": "2.0.0",
3
+ "version": "2.1.0",
4
4
  "mcpName": "io.github.j0hanz/superfetch",
5
5
  "description": "Intelligent web content fetcher MCP server that converts HTML to clean, AI-readable Markdown",
6
6
  "type": "module",
@@ -40,6 +40,8 @@
40
40
  "start": "node dist/index.js",
41
41
  "format": "prettier --write .",
42
42
  "type-check": "tsc --noEmit",
43
+ "type-check:diagnostics": "tsc --noEmit --extendedDiagnostics",
44
+ "type-check:trace": "node -e \"require('fs').rmSync('.ts-trace',{recursive:true,force:true})\" && tsc --noEmit --generateTrace .ts-trace",
43
45
  "lint": "eslint .",
44
46
  "lint:fix": "eslint . --fix",
45
47
  "test": "npm run build --silent && node --test --experimental-transform-types",
@@ -50,31 +52,32 @@
50
52
  "prepublishOnly": "npm run lint && npm run type-check && npm run build"
51
53
  },
52
54
  "dependencies": {
53
- "@modelcontextprotocol/sdk": "^1.25.1",
55
+ "@modelcontextprotocol/sdk": "^1.25.2",
54
56
  "@mozilla/readability": "^0.6.0",
55
57
  "express": "^5.2.1",
56
58
  "linkedom": "^0.18.12",
57
- "turndown": "^7.2.2",
58
- "undici": "^6.23.0",
59
+ "node-html-markdown": "^2.0.0",
60
+ "undici": "^7.18.2",
59
61
  "zod": "^4.3.5"
60
62
  },
61
63
  "devDependencies": {
62
64
  "@eslint/js": "^9.39.2",
63
- "@trivago/prettier-plugin-sort-imports": "^6.0.1",
65
+ "@trivago/prettier-plugin-sort-imports": "^6.0.2",
64
66
  "@types/express": "^5.0.6",
65
- "@types/node": "^22.19.3",
66
- "@types/turndown": "^5.0.6",
67
+ "@types/node": "^22.19.5",
67
68
  "eslint": "^9.23.2",
68
69
  "eslint-config-prettier": "^10.1.8",
70
+ "eslint-plugin-de-morgan": "^2.0.0",
71
+ "eslint-plugin-depend": "^1.4.0",
72
+ "eslint-plugin-sonarjs": "^3.0.5",
69
73
  "eslint-plugin-unused-imports": "^4.3.0",
70
- "knip": "^5.80.0",
74
+ "knip": "^5.80.2",
71
75
  "prettier": "^3.7.4",
72
- "shx": "^0.4.0",
73
76
  "tsx": "^4.21.0",
74
77
  "typescript": "^5.9.3",
75
78
  "typescript-eslint": "^8.52.0"
76
79
  },
77
80
  "engines": {
78
- "node": ">=20.12.0"
81
+ "node": ">=20.18.1"
79
82
  }
80
83
  }