@j0hanz/superfetch 2.7.0 → 2.7.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cache.js CHANGED
@@ -41,14 +41,6 @@ export function parseCachedPayload(raw) {
41
41
  export function resolveCachedPayloadContent(payload) {
42
42
  return payload.markdown ?? payload.content ?? null;
43
43
  }
44
- function stableStringify(value) {
45
- try {
46
- return stableJsonStringify(value);
47
- }
48
- catch {
49
- return null;
50
- }
51
- }
52
44
  function createHashFragment(input, length) {
53
45
  return sha256Hex(input).substring(0, length);
54
46
  }
@@ -63,9 +55,18 @@ export function createCacheKey(namespace, url, vary) {
63
55
  const urlHash = createHashFragment(url, CACHE_CONSTANTS.URL_HASH_LENGTH);
64
56
  let varyHash;
65
57
  if (vary) {
66
- const varyString = typeof vary === 'string' ? vary : stableStringify(vary);
67
- if (varyString === null)
68
- return null;
58
+ let varyString;
59
+ if (typeof vary === 'string') {
60
+ varyString = vary;
61
+ }
62
+ else {
63
+ try {
64
+ varyString = stableJsonStringify(vary);
65
+ }
66
+ catch {
67
+ return null;
68
+ }
69
+ }
69
70
  if (varyString) {
70
71
  varyHash = createHashFragment(varyString, CACHE_CONSTANTS.VARY_HASH_LENGTH);
71
72
  }
@@ -22,7 +22,6 @@ const REGEX = {
22
22
  SPACING_ADJ_COMBINED: /(?:\]\([^)]+\)|`[^`]+`)(?=[A-Za-z0-9])/g,
23
23
  SPACING_CODE_DASH: /(`[^`]+`)\s*\\-\s*/g,
24
24
  SPACING_ESCAPES: /\\([[\].])/g,
25
- SPACING_URL_ENC: /\]\([^)]*%5[Ff][^)]*\)/g,
26
25
  SPACING_LIST_NUM_COMBINED: /^((?![-*+] |\d+\. |[ \t]).+)\n((?:[-*+]|\d+\.) )/gm,
27
26
  TYPEDOC: /(`+)(?:(?!\1)[\s\S])*?\1|\s?\/\\?\*[\s\S]*?\\?\*\//g,
28
27
  };
@@ -203,7 +202,6 @@ function applyGlobalRegexes(text) {
203
202
  .replace(REGEX.SPACING_ADJ_COMBINED, '$& ')
204
203
  .replace(REGEX.SPACING_CODE_DASH, '$1 - ')
205
204
  .replace(REGEX.SPACING_ESCAPES, '$1')
206
- .replace(REGEX.SPACING_URL_ENC, (m) => m.replace(/%5[Ff]/g, '_'))
207
205
  .replace(REGEX.SPACING_LIST_NUM_COMBINED, '$1\n\n$2')
208
206
  .replace(REGEX.DOUBLE_NEWLINE_REDUCER, '\n\n');
209
207
  // fixProperties
package/dist/resources.js CHANGED
@@ -8,30 +8,18 @@ const REDACTED = '<REDACTED>';
8
8
  const CONFIG_RESOURCE_NAME = 'config';
9
9
  const CONFIG_RESOURCE_URI = 'internal://config';
10
10
  const JSON_MIME = 'application/json';
11
- function redactIfPresent(value) {
12
- return value ? REDACTED : undefined;
13
- }
14
- function redactArray(values) {
15
- return values.map(() => REDACTED);
16
- }
17
- function scrubAuth(auth) {
18
- return {
19
- ...auth,
20
- clientSecret: redactIfPresent(auth.clientSecret),
21
- staticTokens: redactArray(auth.staticTokens),
22
- };
23
- }
24
- function scrubSecurity(security) {
25
- return {
26
- ...security,
27
- apiKey: redactIfPresent(security.apiKey),
28
- };
29
- }
30
11
  function scrubConfig(source) {
31
12
  return {
32
13
  ...source,
33
- auth: scrubAuth(source.auth),
34
- security: scrubSecurity(source.security),
14
+ auth: {
15
+ ...source.auth,
16
+ clientSecret: source.auth.clientSecret ? REDACTED : undefined,
17
+ staticTokens: source.auth.staticTokens.map(() => REDACTED),
18
+ },
19
+ security: {
20
+ ...source.security,
21
+ apiKey: source.security.apiKey ? REDACTED : undefined,
22
+ },
35
23
  };
36
24
  }
37
25
  export function registerConfigResource(server) {
package/dist/transform.js CHANGED
@@ -1,7 +1,6 @@
1
1
  import { AsyncLocalStorage, AsyncResource } from 'node:async_hooks';
2
2
  import { Buffer } from 'node:buffer';
3
3
  import { fork } from 'node:child_process';
4
- import { randomUUID } from 'node:crypto';
5
4
  import diagnosticsChannel from 'node:diagnostics_channel';
6
5
  import { availableParallelism } from 'node:os';
7
6
  import { performance } from 'node:perf_hooks';
@@ -202,7 +201,11 @@ function truncateHtml(html) {
202
201
  const byteLength = Buffer.byteLength(html, 'utf8');
203
202
  if (byteLength <= maxSize)
204
203
  return { html, truncated: false };
205
- const htmlBuffer = Buffer.from(html, 'utf8');
204
+ const sliced = html.slice(0, maxSize);
205
+ if (Buffer.byteLength(sliced, 'utf8') <= maxSize) {
206
+ return { html: sliced, truncated: true };
207
+ }
208
+ const htmlBuffer = Buffer.from(sliced, 'utf8');
206
209
  let content = trimUtf8Buffer(htmlBuffer, maxSize).toString('utf8');
207
210
  // Avoid truncating inside tags.
208
211
  const lastOpen = content.lastIndexOf('<');
@@ -224,8 +227,11 @@ function willTruncate(html) {
224
227
  const HEAD_END_PATTERN = /<\/head\s*>|<body\b/i;
225
228
  const MAX_HEAD_SCAN_LENGTH = 50_000;
226
229
  function extractHeadSection(html) {
227
- const searchLimit = Math.min(html.length, MAX_HEAD_SCAN_LENGTH);
228
- const searchText = html.substring(0, searchLimit);
230
+ if (html.length <= MAX_HEAD_SCAN_LENGTH) {
231
+ const match = HEAD_END_PATTERN.exec(html);
232
+ return match ? html.substring(0, match.index) : null;
233
+ }
234
+ const searchText = html.substring(0, MAX_HEAD_SCAN_LENGTH);
229
235
  const match = HEAD_END_PATTERN.exec(searchText);
230
236
  if (!match)
231
237
  return null;
@@ -875,9 +881,22 @@ function createCustomTranslators() {
875
881
  sup: () => ({
876
882
  postprocess: ({ content }) => `^${content}^`,
877
883
  }),
878
- section: () => ({
879
- postprocess: ({ content }) => `\n\n${content}\n\n`,
880
- }),
884
+ section: (ctx) => {
885
+ if (isObject(ctx) && isObject(ctx.node)) {
886
+ const { node } = ctx;
887
+ const getAttribute = hasGetAttribute(node)
888
+ ? node.getAttribute.bind(node)
889
+ : undefined;
890
+ if (getAttribute?.('class')?.includes('tsd-member')) {
891
+ return {
892
+ postprocess: ({ content }) => `\n\n&nbsp;\n\n${content}\n\n`,
893
+ };
894
+ }
895
+ }
896
+ return {
897
+ postprocess: ({ content }) => `\n\n${content}\n\n`,
898
+ };
899
+ },
881
900
  details: () => ({
882
901
  postprocess: ({ content }) => {
883
902
  const trimmed = content.trim();
@@ -948,48 +967,6 @@ function containsWhitespace(value) {
948
967
  }
949
968
  return false;
950
969
  }
951
- function extractClassAttribute(openTag) {
952
- const lower = openTag.toLowerCase();
953
- const classIndex = lower.indexOf('class');
954
- if (classIndex === -1)
955
- return null;
956
- let i = classIndex + 5;
957
- while (i < lower.length && isWhitespaceChar(lower.charCodeAt(i)))
958
- i += 1;
959
- if (lower[i] !== '=')
960
- return null;
961
- i += 1;
962
- while (i < lower.length && isWhitespaceChar(lower.charCodeAt(i)))
963
- i += 1;
964
- const quote = openTag[i];
965
- if (quote !== '"' && quote !== "'")
966
- return null;
967
- i += 1;
968
- const end = openTag.indexOf(quote, i);
969
- if (end === -1)
970
- return null;
971
- return openTag.slice(i, end);
972
- }
973
- function skipWhitespace(text, start) {
974
- let index = start;
975
- while (index < text.length && isWhitespaceChar(text.charCodeAt(index))) {
976
- index += 1;
977
- }
978
- return index;
979
- }
980
- function isTsdMemberSectionTag(openTag) {
981
- const classValue = extractClassAttribute(openTag);
982
- return classValue ? classValue.toLowerCase().includes('tsd-member') : false;
983
- }
984
- function findTsdMemberSectionStart(html, scan) {
985
- if (scan >= html.length || !html.startsWith('<section', scan))
986
- return null;
987
- const tagEnd = html.indexOf('>', scan);
988
- if (tagEnd === -1)
989
- return null;
990
- const openTag = html.slice(scan, tagEnd + 1);
991
- return isTsdMemberSectionTag(openTag) ? scan : null;
992
- }
993
970
  function resolveRelativeHref(href, baseUrl, origin) {
994
971
  const trimmedHref = href.trim();
995
972
  if (!trimmedHref || containsWhitespace(trimmedHref))
@@ -1045,26 +1022,6 @@ function findInlineLink(markdown, start) {
1045
1022
  }
1046
1023
  return null;
1047
1024
  }
1048
- function preprocessPropertySections(html) {
1049
- const closeTag = '</section>';
1050
- let cursor = 0;
1051
- let output = '';
1052
- for (let closeIndex = html.indexOf(closeTag, cursor); closeIndex !== -1; closeIndex = html.indexOf(closeTag, cursor)) {
1053
- const afterClose = closeIndex + closeTag.length;
1054
- output += html.slice(cursor, afterClose);
1055
- const scan = skipWhitespace(html, afterClose);
1056
- const sectionStart = findTsdMemberSectionStart(html, scan);
1057
- if (sectionStart !== null) {
1058
- output += '<p>&nbsp;</p>';
1059
- cursor = sectionStart;
1060
- continue;
1061
- }
1062
- output += html.slice(afterClose, scan);
1063
- cursor = scan;
1064
- }
1065
- output += html.slice(cursor);
1066
- return output;
1067
- }
1068
1025
  function isAbsoluteOrSpecialUrl(href) {
1069
1026
  const trimmedHref = href.trim();
1070
1027
  if (!trimmedHref)
@@ -1102,8 +1059,7 @@ function translateHtmlToMarkdown(params) {
1102
1059
  ? html
1103
1060
  : stageTracker.run(url, 'markdown:noise', () => removeNoiseFromHtml(html, document, url));
1104
1061
  abortPolicy.throwIfAborted(signal, url, 'markdown:cleaned');
1105
- const preprocessedHtml = stageTracker.run(url, 'markdown:preprocess', () => preprocessPropertySections(cleanedHtml));
1106
- const content = stageTracker.run(url, 'markdown:translate', () => translateHtmlFragmentToMarkdown(preprocessedHtml));
1062
+ const content = stageTracker.run(url, 'markdown:translate', () => translateHtmlFragmentToMarkdown(cleanedHtml));
1107
1063
  abortPolicy.throwIfAborted(signal, url, 'markdown:translated');
1108
1064
  const cleaned = cleanupMarkdownArtifacts(content);
1109
1065
  return url ? resolveRelativeUrls(cleaned, url) : cleaned;
@@ -1673,6 +1629,7 @@ class WorkerPool {
1673
1629
  queueMax;
1674
1630
  spawnWorkerImpl;
1675
1631
  closed = false;
1632
+ taskIdSeq = 0;
1676
1633
  constructor(size, timeoutMs, spawnWorker) {
1677
1634
  if (size === 0) {
1678
1635
  this.capacity = 0;
@@ -1752,7 +1709,7 @@ class WorkerPool {
1752
1709
  throw new Error(WorkerPool.CLOSED_MESSAGE);
1753
1710
  }
1754
1711
  createPendingTask(htmlOrBuffer, url, options, resolve, reject) {
1755
- const id = randomUUID();
1712
+ const id = (this.taskIdSeq++).toString(36);
1756
1713
  // Preserve request context for resolve/reject even when callbacks fire
1757
1714
  // from worker thread events.
1758
1715
  const context = createTaskContext();
@@ -2153,10 +2110,26 @@ async function transformWithWorkerPool(htmlOrBuffer, url, options) {
2153
2110
  });
2154
2111
  }
2155
2112
  function resolveWorkerFallback(error, htmlOrBuffer, url, options) {
2113
+ const isQueueFull = error instanceof FetchError && error.details.reason === 'queue_full';
2114
+ if (isQueueFull) {
2115
+ logWarn('Transform worker queue full; falling back to in-process', {
2116
+ url: redactUrl(url),
2117
+ });
2118
+ return transformHtmlToMarkdownInProcess(decodeUtf8Input(htmlOrBuffer), url, options);
2119
+ }
2156
2120
  if (error instanceof FetchError)
2157
2121
  throw error;
2158
2122
  abortPolicy.throwIfAborted(options.signal, url, 'transform:worker-fallback');
2159
- return transformHtmlToMarkdownInProcess(decodeUtf8Input(htmlOrBuffer), url, options);
2123
+ const message = getErrorMessage(error);
2124
+ logWarn('Transform worker failed; refusing in-process fallback', {
2125
+ url: redactUrl(url),
2126
+ error: message,
2127
+ });
2128
+ throw new FetchError('Transform worker failed', url, 503, {
2129
+ reason: 'worker_failed',
2130
+ stage: 'transform:worker',
2131
+ error: message,
2132
+ });
2160
2133
  }
2161
2134
  async function transformInputToMarkdown(htmlOrBuffer, url, options) {
2162
2135
  const totalStage = stageTracker.start(url, 'transform:total');
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@j0hanz/superfetch",
3
- "version": "2.7.0",
3
+ "version": "2.7.1",
4
4
  "mcpName": "io.github.j0hanz/superfetch",
5
5
  "description": "Intelligent web content fetcher MCP server that converts HTML to clean, AI-readable Markdown",
6
6
  "type": "module",