@j0hanz/superfetch 2.2.1 → 2.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (75) hide show
  1. package/README.md +243 -494
  2. package/dist/cache.d.ts +2 -3
  3. package/dist/cache.js +51 -241
  4. package/dist/config.d.ts +6 -1
  5. package/dist/config.js +29 -34
  6. package/dist/crypto.d.ts +0 -1
  7. package/dist/crypto.js +0 -1
  8. package/dist/dom-noise-removal.d.ts +5 -0
  9. package/dist/dom-noise-removal.js +485 -0
  10. package/dist/errors.d.ts +0 -1
  11. package/dist/errors.js +8 -6
  12. package/dist/fetch.d.ts +0 -1
  13. package/dist/fetch.js +71 -61
  14. package/dist/host-normalization.d.ts +1 -0
  15. package/dist/host-normalization.js +47 -0
  16. package/dist/http-native.d.ts +5 -0
  17. package/dist/http-native.js +693 -0
  18. package/dist/index.d.ts +0 -1
  19. package/dist/index.js +1 -2
  20. package/dist/instructions.md +22 -20
  21. package/dist/json.d.ts +1 -0
  22. package/dist/json.js +29 -0
  23. package/dist/language-detection.d.ts +12 -0
  24. package/dist/language-detection.js +291 -0
  25. package/dist/markdown-cleanup.d.ts +18 -0
  26. package/dist/markdown-cleanup.js +283 -0
  27. package/dist/mcp-validator.d.ts +14 -0
  28. package/dist/mcp-validator.js +22 -0
  29. package/dist/mcp.d.ts +0 -1
  30. package/dist/mcp.js +0 -1
  31. package/dist/observability.d.ts +1 -1
  32. package/dist/observability.js +15 -3
  33. package/dist/server-tuning.d.ts +9 -0
  34. package/dist/server-tuning.js +30 -0
  35. package/dist/session.d.ts +36 -0
  36. package/dist/session.js +159 -0
  37. package/dist/tools.d.ts +0 -1
  38. package/dist/tools.js +23 -33
  39. package/dist/transform-types.d.ts +80 -0
  40. package/dist/transform-types.js +5 -0
  41. package/dist/transform.d.ts +7 -53
  42. package/dist/transform.js +434 -856
  43. package/dist/type-guards.d.ts +1 -2
  44. package/dist/type-guards.js +1 -2
  45. package/dist/workers/transform-worker.d.ts +0 -1
  46. package/dist/workers/transform-worker.js +52 -43
  47. package/package.json +11 -12
  48. package/dist/cache.d.ts.map +0 -1
  49. package/dist/cache.js.map +0 -1
  50. package/dist/config.d.ts.map +0 -1
  51. package/dist/config.js.map +0 -1
  52. package/dist/crypto.d.ts.map +0 -1
  53. package/dist/crypto.js.map +0 -1
  54. package/dist/errors.d.ts.map +0 -1
  55. package/dist/errors.js.map +0 -1
  56. package/dist/fetch.d.ts.map +0 -1
  57. package/dist/fetch.js.map +0 -1
  58. package/dist/http.d.ts +0 -90
  59. package/dist/http.d.ts.map +0 -1
  60. package/dist/http.js +0 -1576
  61. package/dist/http.js.map +0 -1
  62. package/dist/index.d.ts.map +0 -1
  63. package/dist/index.js.map +0 -1
  64. package/dist/mcp.d.ts.map +0 -1
  65. package/dist/mcp.js.map +0 -1
  66. package/dist/observability.d.ts.map +0 -1
  67. package/dist/observability.js.map +0 -1
  68. package/dist/tools.d.ts.map +0 -1
  69. package/dist/tools.js.map +0 -1
  70. package/dist/transform.d.ts.map +0 -1
  71. package/dist/transform.js.map +0 -1
  72. package/dist/type-guards.d.ts.map +0 -1
  73. package/dist/type-guards.js.map +0 -1
  74. package/dist/workers/transform-worker.d.ts.map +0 -1
  75. package/dist/workers/transform-worker.js.map +0 -1
package/dist/tools.js CHANGED
@@ -6,7 +6,7 @@ import { FetchError, getErrorMessage, isSystemError } from './errors.js';
6
6
  import { fetchNormalizedUrl, normalizeUrl, transformToRawUrl, } from './fetch.js';
7
7
  import { getRequestId, logDebug, logError, logWarn, runWithRequestContext, } from './observability.js';
8
8
  import { transformHtmlToMarkdown, } from './transform.js';
9
- import { isRecord } from './type-guards.js';
9
+ import { isObject } from './type-guards.js';
10
10
  const TRUNCATION_MARKER = '...[truncated]';
11
11
  const FETCH_PROGRESS_TOTAL = 4;
12
12
  const fetchUrlInputSchema = z.strictObject({
@@ -106,23 +106,16 @@ function buildEmbeddedResource(content, url, title) {
106
106
  },
107
107
  };
108
108
  }
109
- function resolveContentToEmbed(inlineResult, fullContent, useInlineInHttpMode) {
110
- if (useInlineInHttpMode) {
111
- return inlineResult.content;
112
- }
113
- return fullContent ?? inlineResult.content;
114
- }
115
- function maybeAppendEmbeddedResource(blocks, contentToEmbed, url, title) {
116
- if (!contentToEmbed)
117
- return;
118
- if (!url)
119
- return;
120
- const embeddedResource = buildEmbeddedResource(contentToEmbed, url, title);
121
- if (embeddedResource) {
122
- blocks.push(embeddedResource);
109
+ function appendResourceBlocks({ blocks, inlineResult, resourceName, url, title, fullContent, }) {
110
+ const contentToEmbed = config.runtime.httpMode
111
+ ? inlineResult.content
112
+ : (fullContent ?? inlineResult.content);
113
+ if (contentToEmbed && url) {
114
+ const embeddedResource = buildEmbeddedResource(contentToEmbed, url, title);
115
+ if (embeddedResource) {
116
+ blocks.push(embeddedResource);
117
+ }
123
118
  }
124
- }
125
- function maybeAppendResourceLink(blocks, inlineResult, resourceName) {
126
119
  const resourceLink = buildResourceLink(inlineResult, resourceName);
127
120
  if (resourceLink) {
128
121
  blocks.push(resourceLink);
@@ -136,9 +129,14 @@ function buildTextBlock(structuredContent) {
136
129
  }
137
130
  function buildToolContentBlocks(structuredContent, fromCache, inlineResult, resourceName, cacheKey, fullContent, url, title) {
138
131
  const blocks = [buildTextBlock(structuredContent)];
139
- const contentToEmbed = resolveContentToEmbed(inlineResult, fullContent, config.runtime.httpMode);
140
- maybeAppendEmbeddedResource(blocks, contentToEmbed, url, title);
141
- maybeAppendResourceLink(blocks, inlineResult, resourceName);
132
+ appendResourceBlocks({
133
+ blocks,
134
+ inlineResult,
135
+ resourceName,
136
+ url,
137
+ title,
138
+ fullContent,
139
+ });
142
140
  return blocks;
143
141
  }
144
142
  function applyInlineContentLimit(content, cacheKey) {
@@ -246,7 +244,7 @@ function persistCache({ cacheKey, data, serialize, normalizedUrl, }) {
246
244
  cache.set(cacheKey, serializer(data), metadata);
247
245
  }
248
246
  function extractTitle(value) {
249
- if (!isRecord(value))
247
+ if (!isObject(value))
250
248
  return undefined;
251
249
  const { title } = value;
252
250
  return typeof title === 'string' ? title : undefined;
@@ -266,14 +264,6 @@ function logRawUrlTransformation(resolvedUrl) {
266
264
  original: resolvedUrl.originalUrl,
267
265
  });
268
266
  }
269
- function applyOptionalPipelineSerialization(pipelineOptions, options) {
270
- if (options.serialize !== undefined) {
271
- pipelineOptions.serialize = options.serialize;
272
- }
273
- if (options.deserialize !== undefined) {
274
- pipelineOptions.deserialize = options.deserialize;
275
- }
276
- }
277
267
  export async function performSharedFetch(options, deps = {}) {
278
268
  const executePipeline = deps.executeFetchPipeline ?? executeFetchPipeline;
279
269
  const pipelineOptions = {
@@ -281,8 +271,9 @@ export async function performSharedFetch(options, deps = {}) {
281
271
  cacheNamespace: 'markdown',
282
272
  ...(options.signal === undefined ? {} : { signal: options.signal }),
283
273
  transform: options.transform,
274
+ ...(options.serialize ? { serialize: options.serialize } : {}),
275
+ ...(options.deserialize ? { deserialize: options.deserialize } : {}),
284
276
  };
285
- applyOptionalPipelineSerialization(pipelineOptions, options);
286
277
  const pipeline = await executePipeline(pipelineOptions);
287
278
  const inlineResult = applyInlineContentLimit(pipeline.data.content, pipeline.cacheKey ?? null);
288
279
  return { pipeline, inlineResult };
@@ -319,7 +310,7 @@ function resolveToolErrorMessage(error, fallbackMessage) {
319
310
  function parseJsonRecord(input) {
320
311
  try {
321
312
  const parsed = JSON.parse(input);
322
- return isRecord(parsed) ? parsed : undefined;
313
+ return isObject(parsed) ? parsed : undefined;
323
314
  }
324
315
  catch {
325
316
  return undefined;
@@ -467,7 +458,7 @@ export function withRequestContextIfMissing(handler) {
467
458
  };
468
459
  }
469
460
  function resolveRequestIdFromExtra(extra) {
470
- if (!isRecord(extra))
461
+ if (!isObject(extra))
471
462
  return undefined;
472
463
  const { requestId } = extra;
473
464
  if (typeof requestId === 'string')
@@ -485,4 +476,3 @@ export function registerTools(server) {
485
476
  annotations: TOOL_DEFINITION.annotations,
486
477
  }, withRequestContextIfMissing(TOOL_DEFINITION.handler));
487
478
  }
488
- //# sourceMappingURL=tools.js.map
@@ -0,0 +1,80 @@
1
+ /**
2
+ * Shared types for the transform pipeline.
3
+ * Extracted to avoid circular dependencies between transform modules.
4
+ */
5
+ /**
6
+ * Metadata block for attaching source information to markdown output.
7
+ */
8
+ export interface MetadataBlock {
9
+ type: 'metadata';
10
+ title?: string;
11
+ description?: string;
12
+ author?: string;
13
+ url: string;
14
+ fetchedAt: string;
15
+ }
16
+ /**
17
+ * Article extracted by Readability.
18
+ */
19
+ export interface ExtractedArticle {
20
+ title?: string;
21
+ byline?: string;
22
+ content: string;
23
+ textContent: string;
24
+ excerpt?: string;
25
+ siteName?: string;
26
+ }
27
+ /**
28
+ * Metadata extracted from HTML meta tags.
29
+ */
30
+ export interface ExtractedMetadata {
31
+ title?: string;
32
+ description?: string;
33
+ author?: string;
34
+ image?: string;
35
+ publishedAt?: string;
36
+ modifiedAt?: string;
37
+ }
38
+ /**
39
+ * Result of content extraction (article + metadata).
40
+ */
41
+ export interface ExtractionResult {
42
+ article: ExtractedArticle | null;
43
+ metadata: ExtractedMetadata;
44
+ }
45
+ /**
46
+ * Result of HTML to markdown transformation.
47
+ */
48
+ export interface MarkdownTransformResult {
49
+ markdown: string;
50
+ title: string | undefined;
51
+ truncated: boolean;
52
+ }
53
+ /**
54
+ * Options for transform operations.
55
+ */
56
+ export interface TransformOptions {
57
+ includeMetadata: boolean;
58
+ signal?: AbortSignal;
59
+ }
60
+ /**
61
+ * Telemetry event emitted during transform stages.
62
+ */
63
+ export interface TransformStageEvent {
64
+ v: 1;
65
+ type: 'stage';
66
+ stage: string;
67
+ durationMs: number;
68
+ url: string;
69
+ requestId?: string;
70
+ operationId?: string;
71
+ truncated?: boolean;
72
+ }
73
+ /**
74
+ * Context for tracking transform stage timing.
75
+ */
76
+ export interface TransformStageContext {
77
+ readonly stage: string;
78
+ readonly startTime: number;
79
+ readonly url: string;
80
+ }
@@ -0,0 +1,5 @@
1
+ /**
2
+ * Shared types for the transform pipeline.
3
+ * Extracted to avoid circular dependencies between transform modules.
4
+ */
5
+ export {};
@@ -1,52 +1,8 @@
1
- export interface MetadataBlock {
2
- type: 'metadata';
3
- title?: string;
4
- description?: string;
5
- author?: string;
6
- url: string;
7
- fetchedAt: string;
8
- }
9
- export interface ExtractedArticle {
10
- title?: string;
11
- byline?: string;
12
- content: string;
13
- textContent: string;
14
- excerpt?: string;
15
- siteName?: string;
16
- }
17
- export interface ExtractedMetadata {
18
- title?: string;
19
- description?: string;
20
- author?: string;
21
- }
22
- export interface ExtractionResult {
23
- article: ExtractedArticle | null;
24
- metadata: ExtractedMetadata;
25
- }
26
- export interface MarkdownTransformResult {
27
- markdown: string;
28
- title: string | undefined;
29
- truncated: boolean;
30
- }
31
- export interface TransformOptions {
32
- includeMetadata: boolean;
33
- signal?: AbortSignal;
34
- }
35
- export interface TransformStageEvent {
36
- v: 1;
37
- type: 'stage';
38
- stage: string;
39
- durationMs: number;
40
- url: string;
41
- requestId?: string;
42
- operationId?: string;
43
- truncated?: boolean;
44
- }
45
- export interface TransformStageContext {
46
- readonly stage: string;
47
- readonly startTime: number;
48
- readonly url: string;
49
- }
1
+ import type { ExtractedArticle, ExtractedMetadata, ExtractionResult, MarkdownTransformResult, MetadataBlock, TransformOptions, TransformStageContext } from './transform-types.js';
2
+ export { detectLanguageFromCode, resolveLanguageFromAttributes, } from './language-detection.js';
3
+ export { cleanupMarkdownArtifacts, promoteOrphanHeadings, } from './markdown-cleanup.js';
4
+ export { removeNoiseFromHtml } from './dom-noise-removal.js';
5
+ export type { MetadataBlock, ExtractedArticle, ExtractedMetadata, ExtractionResult, MarkdownTransformResult, TransformOptions, TransformStageEvent, TransformStageContext, } from './transform-types.js';
50
6
  export declare function startTransformStage(url: string, stage: string): TransformStageContext | null;
51
7
  export declare function endTransformStage(context: TransformStageContext | null, options?: {
52
8
  truncated?: boolean;
@@ -55,17 +11,15 @@ export declare function extractContent(html: string, url: string, options?: {
55
11
  extractArticle?: boolean;
56
12
  signal?: AbortSignal;
57
13
  }): ExtractionResult;
58
- export declare function detectLanguageFromCode(code: string): string | undefined;
59
- export declare function resolveLanguageFromAttributes(className: string, dataLang: string): string | undefined;
60
14
  export declare function htmlToMarkdown(html: string, metadata?: MetadataBlock, options?: {
61
15
  url?: string;
62
16
  signal?: AbortSignal;
63
17
  document?: Document;
18
+ skipNoiseRemoval?: boolean;
64
19
  }): string;
65
- export declare function isExtractionSufficient(article: ExtractedArticle | null, originalHtml: string): boolean;
20
+ export declare function isExtractionSufficient(article: ExtractedArticle | null, originalHtmlOrDocument: string | Document): boolean;
66
21
  export declare function determineContentExtractionSource(article: ExtractedArticle | null): article is ExtractedArticle;
67
22
  export declare function createContentMetadataBlock(url: string, article: ExtractedArticle | null, extractedMeta: ExtractedMetadata, shouldExtractFromArticle: boolean, includeMetadata: boolean): MetadataBlock | undefined;
68
23
  export declare function transformHtmlToMarkdownInProcess(html: string, url: string, options: TransformOptions): MarkdownTransformResult;
69
24
  export declare function shutdownTransformWorkerPool(): Promise<void>;
70
25
  export declare function transformHtmlToMarkdown(html: string, url: string, options: TransformOptions): Promise<MarkdownTransformResult>;
71
- //# sourceMappingURL=transform.d.ts.map