feedsweep 1.0.0 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (62) hide show
  1. package/README.md +29 -17
  2. package/dist/common.d.ts +10 -6
  3. package/dist/common.js +99 -28
  4. package/dist/defaults.d.ts +2 -1
  5. package/dist/defaults.js +46 -17
  6. package/dist/embeds/youtube.js +2 -2
  7. package/dist/index.d.ts +16 -10
  8. package/dist/index.js +23 -13
  9. package/dist/transforms/dom/convertBreaksToParagraphs.d.ts +6 -0
  10. package/dist/transforms/dom/convertBreaksToParagraphs.js +80 -0
  11. package/dist/transforms/dom/decodeDoubleEncodedTags.d.ts +6 -0
  12. package/dist/transforms/dom/decodeDoubleEncodedTags.js +30 -0
  13. package/dist/transforms/dom/enrichEmbedPlaceholders.d.ts +6 -0
  14. package/dist/transforms/dom/enrichEmbedPlaceholders.js +32 -0
  15. package/dist/transforms/dom/fixLazyImages.js +37 -13
  16. package/dist/transforms/dom/highlightCode.js +3 -2
  17. package/dist/transforms/dom/injectEnclosures.d.ts +6 -0
  18. package/dist/transforms/dom/injectEnclosures.js +66 -0
  19. package/dist/transforms/dom/mergeConsecutiveOneLinerPres.js +1 -1
  20. package/dist/transforms/dom/mergeFragmentedLists.d.ts +6 -0
  21. package/dist/transforms/dom/mergeFragmentedLists.js +84 -0
  22. package/dist/transforms/dom/proxyAssetUrls.d.ts +6 -0
  23. package/dist/transforms/dom/proxyAssetUrls.js +64 -0
  24. package/dist/transforms/dom/removeTrackingPixels.js +22 -25
  25. package/dist/transforms/dom/replaceEmbedsWithPlaceholders.js +24 -25
  26. package/dist/transforms/dom/replacePreLineBreaks.js +3 -4
  27. package/dist/transforms/dom/resolveRelativeUrls.js +44 -30
  28. package/dist/transforms/dom/stripComments.js +5 -15
  29. package/dist/transforms/dom/stripDeadAnchors.d.ts +6 -0
  30. package/dist/transforms/dom/stripDeadAnchors.js +20 -0
  31. package/dist/transforms/dom/stripDuplicateTitleHeading.d.ts +6 -0
  32. package/dist/transforms/dom/stripDuplicateTitleHeading.js +31 -0
  33. package/dist/transforms/dom/stripEmptyTags.d.ts +6 -0
  34. package/dist/transforms/dom/stripEmptyTags.js +53 -0
  35. package/dist/transforms/dom/stripInterBlockBreaks.js +28 -8
  36. package/dist/transforms/dom/stripParagraphBoundaryBreaks.js +26 -6
  37. package/dist/transforms/dom/stripTrackingParams.js +7 -6
  38. package/dist/transforms/dom/trimPreWhitespace.js +4 -3
  39. package/dist/transforms/dom/unwrapDoublyNestedLists.d.ts +6 -0
  40. package/dist/transforms/dom/unwrapDoublyNestedLists.js +41 -0
  41. package/dist/transforms/dom/unwrapRedirectUrls.js +4 -2
  42. package/dist/transforms/dom/unwrapWrappers.d.ts +6 -0
  43. package/dist/transforms/dom/unwrapWrappers.js +30 -0
  44. package/dist/transforms/string/paragraphizePlainText.js +1 -1
  45. package/dist/transforms/string/unwrapCdataComments.d.ts +6 -0
  46. package/dist/transforms/string/unwrapCdataComments.js +10 -0
  47. package/dist/types.d.ts +35 -6
  48. package/dist/unwraps/google.js +1 -1
  49. package/dist/unwraps/googleNewsModern.js +7 -3
  50. package/package.json +2 -2
  51. package/dist/transforms/dom/injectEnclosureEmbedPlaceholders.d.ts +0 -6
  52. package/dist/transforms/dom/injectEnclosureEmbedPlaceholders.js +0 -33
  53. package/dist/transforms/dom/simplifyFigures.d.ts +0 -6
  54. package/dist/transforms/dom/simplifyFigures.js +0 -27
  55. package/dist/transforms/string/decodeDoubleEncodedTags.d.ts +0 -6
  56. package/dist/transforms/string/decodeDoubleEncodedTags.js +0 -23
  57. package/dist/transforms/string/stripEmptyTags.d.ts +0 -6
  58. package/dist/transforms/string/stripEmptyTags.js +0 -25
  59. package/dist/transforms/string/stripOrphanedClosingTags.d.ts +0 -6
  60. package/dist/transforms/string/stripOrphanedClosingTags.js +0 -28
  61. package/dist/transforms/string/unwrapWrappers.d.ts +0 -6
  62. package/dist/transforms/string/unwrapWrappers.js +0 -10
package/README.md CHANGED
@@ -19,7 +19,7 @@ npm install feedsweep
19
19
  ```typescript
20
20
  import { transformContent } from 'feedsweep'
21
21
 
22
- const result = transformContent('<p>Check <img data-src="photo.jpg"> and visit /about</p>', {
22
+ const result = await transformContent('<p>Check <img data-src="photo.jpg"> and visit /about</p>', {
23
23
  baseUrl: 'https://example.com/post/1',
24
24
  })
25
25
  ```
@@ -30,27 +30,33 @@ Inventory of every transform exported from the package. Most are enabled by defa
30
30
 
31
31
  | Transform | Description |
32
32
  | --- | --- |
33
- | `stripOrphanedClosingTags` | Remove unmatched `</p>` / `</div>` close tags |
34
33
  | `decodeDoubleEncodedTags` | Decode `&lt;tag&gt;` back to `<tag>` in mixed content |
35
- | `unwrapWrappers` | Remove outer `<div>`, `<article>`, `<section>` wrappers |
36
- | `paragraphizePlainText` | Wrap plain text in `<p>` tags |
37
- | `stripEmptyTags` | Remove empty `<p>`, `<div>`, `<span>` and other tags |
38
- | `stripComments` | Remove HTML `<!-- comments -->` |
39
34
  | `fixLazyImages` | Move `data-src` / `data-original` to real `src` |
40
- | `resolveRelativeUrls` | Convert relative URLs to absolute using base URL |
41
- | `unwrapRedirectUrls` | Remove Google/Bing/Facebook/etc. redirect wrappers |
42
- | `stripTrackingParams` | Remove UTM and other tracking parameters |
43
- | `removeTrackingPixels` | Strip 1×1 tracking pixel images |
44
- | `stripInterBlockBreaks` | Remove `<br>` tags between block elements |
45
- | `stripParagraphBoundaryBreaks` | Remove `<br>` tags adjacent to paragraph boundaries |
46
- | `highlightCode` | Syntax-highlight `<code>` blocks with highlight.js |
47
35
  | `mergeConsecutiveOneLinerPres` | Merge consecutive single-line `<pre>` tags |
48
36
  | `replacePreLineBreaks` | Replace `<br>` with `\n` inside `<pre>` |
49
- | `trimPreWhitespace` | Remove common leading indentation from `<pre>` |
50
- | `linkifyUrls` | Wrap bare URLs in `<a>` tags |
37
+ | `stripInterBlockBreaks` | Remove `<br>` tags between block elements |
38
+ | `stripParagraphBoundaryBreaks` | Remove `<br>` tags adjacent to paragraph boundaries |
39
+ | `stripDuplicateTitleHeading` | Remove first `<h1>`–`<h6>` matching article title |
40
+ | `unwrapRedirectUrls` | Remove Google/Bing/Facebook/etc. redirect wrappers |
41
+ | `stripDeadAnchors` | Unwrap `<a>` with empty, `#`, or `javascript:` href |
42
+ | `removeTrackingPixels` | Strip 1×1 tracking pixel images |
43
+ | `stripTrackingParams` | Remove UTM and other tracking parameters |
44
+ | `convertBreaksToParagraphs` | Convert `<br><br>` runs into semantic `<p>` blocks |
45
+ | `injectEnclosures` | Inject feed enclosures into content as native `<audio>`/`<video>` or iframe placeholders |
51
46
  | `replaceEmbedsWithPlaceholders` | Convert `<iframe>` to embed placeholders |
52
- | `injectEnclosureEmbedPlaceholders` | Add audio/video enclosures to content |
53
- | `simplifyFigures` | Unwrap `<figure>` when the figcaption is empty or redundant |
47
+ | `enrichEmbedPlaceholders` | Populate placeholder metadata (`title`, `description`, `duration`, etc.) via a caller-supplied async fn. Opt-in; not in defaults |
48
+ | `proxyAssetUrls` | Rewrite image, video, and audio URLs through a caller-supplied proxy |
49
+ | `resolveRelativeUrls` | Convert relative URLs to absolute using base URL |
50
+ | `unwrapWrappers` | Remove outer `<div>`, `<article>`, `<section>` wrappers |
51
+ | `unwrapDoublyNestedLists` | Unwrap `<ul>`/`<ol>` that wrap a single `<li>` containing a same-type list |
52
+ | `mergeFragmentedLists` | Merge consecutive sibling `<ul>` / `<ol>` lists with matching attributes |
53
+ | `paragraphizePlainText` | Wrap plain text in `<p>` tags |
54
+ | `linkifyUrls` | Wrap bare URLs in `<a>` tags |
55
+ | `trimPreWhitespace` | Remove common leading indentation from `<pre>` |
56
+ | `highlightCode` | Syntax-highlight `<code>` blocks with highlight.js |
57
+ | `stripEmptyTags` | Remove empty `<p>`, `<div>`, `<span>` and other tags |
58
+ | `stripComments` | Remove HTML `<!-- comments -->` |
59
+ | `unwrapCdataComments` | Strip malformed `<!--[CDATA[ … ]]-->` wrappers before parsing so the wrapped article reaches the DOM as real HTML |
54
60
 
55
61
  ## Options
56
62
 
@@ -62,6 +68,12 @@ const result = transformContent(html, {
62
68
  baseUrl: 'https://example.com/post/1',
63
69
  // Feed item enclosures (audio/video).
64
70
  enclosures: [{ url: 'https://example.com/audio.mp3', type: 'audio/mpeg' }],
71
+ // Route image/video/audio URLs through a proxy. Return `undefined` to leave a URL untouched.
72
+ assetProxyFn: (url, type) => `https://proxy.example.com/?type=${type}&url=${encodeURIComponent(url)}`,
73
+ // Populate embed placeholder metadata from a remote source (e.g. YouTube oEmbed).
74
+ enrichEmbedFn: async (embeds) => {
75
+ return new Map(embeds.map(({ provider, id }) => [`${provider}:${id}`, { title: '…' }]))
76
+ },
65
77
  // Run a custom DOM transform pipeline (omit to use defaults).
66
78
  domTransforms: [fixLazyImages, resolveRelativeUrls],
67
79
  })
package/dist/common.d.ts CHANGED
@@ -1,11 +1,15 @@
1
- import { EmbedResolverResult } from "./types.js";
1
+ import { EmbedResolverResult, MaybePromise } from "./types.js";
2
2
 
3
3
  //#region src/common.d.ts
4
4
  declare const stripOversizedBase64Sources: (html: string, maxSize: number) => string;
5
+ declare const expandSvgSelfClose: (html: string) => string;
5
6
  declare const parseFragment: (html: string) => Document;
6
- declare const transformHtml: (html: string, transform: (document: Document) => void) => string;
7
- declare const applyDomTransforms: (html: string, transforms: Array<(document: Document) => void>) => string;
8
- declare const applyStringTransforms: (html: string, transforms: Array<(html: string) => string>) => string;
9
- declare const createEmbedPlaceholder: (document: Document, src: string, type: "video" | "audio" | "iframe", metadata?: Partial<EmbedResolverResult>) => HTMLElement;
7
+ declare const transformHtml: (html: string, transform: (document: Document) => MaybePromise<void>) => Promise<string>;
8
+ declare const applyDomTransforms: (html: string, transforms: Array<(document: Document) => MaybePromise<void>>) => Promise<string>;
9
+ declare const applyStringTransforms: (html: string, transforms: Array<(html: string) => MaybePromise<string>>) => Promise<string>;
10
+ declare const applyEmbedMetadata: (element: HTMLElement, metadata: Partial<EmbedResolverResult>, options?: {
11
+ setIfMissing?: boolean;
12
+ }) => void;
13
+ declare const createEmbedPlaceholder: (document: Document, src: string, metadata?: Partial<EmbedResolverResult>) => HTMLElement;
10
14
  //#endregion
11
- export { applyDomTransforms, applyStringTransforms, createEmbedPlaceholder, parseFragment, stripOversizedBase64Sources, transformHtml };
15
+ export { applyDomTransforms, applyEmbedMetadata, applyStringTransforms, createEmbedPlaceholder, expandSvgSelfClose, parseFragment, stripOversizedBase64Sources, transformHtml };
package/dist/common.js CHANGED
@@ -1,3 +1,4 @@
1
+ import { coerceNumber } from "./utils.js";
1
2
  import { resolveUrl } from "feedcanon";
2
3
  import { parseHTML } from "linkedom";
3
4
  //#region src/common.ts
@@ -6,6 +7,11 @@ const Node = {
6
7
  TEXT_NODE: 3,
7
8
  COMMENT_NODE: 8
8
9
  };
10
+ const NodeFilter = {
11
+ SHOW_ELEMENT: 1,
12
+ SHOW_TEXT: 4,
13
+ SHOW_COMMENT: 128
14
+ };
9
15
  const base64SrcRegex = /((?:src|srcset|poster)=["'])data:[^"']*;base64,[^"']*(["'])/g;
10
16
  const safeThumbnailDataUrlRegex = /^data:image\/(png|jpe?g|gif|webp|avif);/i;
11
17
  const isSafeThumbnailUrl = (url) => {
@@ -17,23 +23,53 @@ const stripOversizedBase64Sources = (html, maxSize) => {
17
23
  return `${prefix}${suffix}`;
18
24
  });
19
25
  };
26
+ const normalizeAttributeCase = (document) => {
27
+ for (const element of document.querySelectorAll("*")) {
28
+ const original = Array.from(element.attributes).map((attribute) => ({
29
+ name: attribute.name,
30
+ value: attribute.value
31
+ }));
32
+ const final = /* @__PURE__ */ new Map();
33
+ let needsRewrite = false;
34
+ for (const { name, value } of original) {
35
+ const lower = name.toLowerCase();
36
+ if (lower !== name) needsRewrite = true;
37
+ if (final.has(lower)) {
38
+ needsRewrite = true;
39
+ continue;
40
+ }
41
+ final.set(lower, value);
42
+ }
43
+ if (!needsRewrite) continue;
44
+ for (const { name } of original) element.removeAttribute(name);
45
+ for (const [name, value] of final) element.setAttribute(name, value);
46
+ }
47
+ };
48
+ const svgRegionRegex = /<svg\b[^>]*>[\s\S]*?<\/svg>/gi;
49
+ const svgSelfCloseRegex = /<([a-z][a-z0-9-]*)((?:\s[^>]*)?)\s*\/>/gi;
50
+ const expandSvgSelfClose = (html) => {
51
+ return html.replace(svgRegionRegex, (svgBlock) => {
52
+ return svgBlock.replace(svgSelfCloseRegex, "<$1$2></$1>");
53
+ });
54
+ };
20
55
  const parseFragment = (html) => {
21
- const { document } = parseHTML(`<!doctype html><html><head></head><body>${html}</body></html>`);
56
+ const { document } = parseHTML(`<!doctype html><html><head></head><body>${expandSvgSelfClose(html)}</body></html>`);
57
+ normalizeAttributeCase(document);
22
58
  return document;
23
59
  };
24
- const transformHtml = (html, transform) => {
60
+ const transformHtml = async (html, transform) => {
25
61
  const document = parseFragment(html);
26
- transform(document);
62
+ await transform(document);
27
63
  return document.body.innerHTML;
28
64
  };
29
- const applyDomTransforms = (html, transforms) => {
65
+ const applyDomTransforms = async (html, transforms) => {
30
66
  const document = parseFragment(stripOversizedBase64Sources(html, 50 * 1024));
31
- for (const transform of transforms) transform(document);
67
+ for (const transform of transforms) await transform(document);
32
68
  return document.body.innerHTML;
33
69
  };
34
- const applyStringTransforms = (html, transforms) => {
70
+ const applyStringTransforms = async (html, transforms) => {
35
71
  let output = html;
36
- for (const transform of transforms) output = transform(output);
72
+ for (const transform of transforms) output = await transform(output);
37
73
  return output;
38
74
  };
39
75
  const blockElements = new Set([
@@ -71,10 +107,10 @@ const blockElements = new Set([
71
107
  "ul"
72
108
  ]);
73
109
  const isWhitespaceText = (node) => {
74
- return node.nodeType === Node.TEXT_NODE && !(node.textContent ?? "").trim();
110
+ return node.nodeType === Node.TEXT_NODE && !node.textContent?.trim();
75
111
  };
76
112
  const isBr = (node) => {
77
- return node.nodeType === Node.ELEMENT_NODE && node.tagName.toLowerCase() === "br";
113
+ return node.nodeType === Node.ELEMENT_NODE && node.localName === "br";
78
114
  };
79
115
  const isComment = (node) => {
80
116
  return node.nodeType === Node.COMMENT_NODE;
@@ -83,28 +119,63 @@ const isSkippable = (node) => {
83
119
  return isWhitespaceText(node) || isBr(node) || isComment(node);
84
120
  };
85
121
  const isBlockElement = (node) => {
86
- return node.nodeType === Node.ELEMENT_NODE && blockElements.has(node.tagName.toLowerCase());
87
- };
88
- const unwrapOuterTag = (html, pattern) => {
89
- let result = html.trim();
90
- let match = pattern.exec(result);
91
- while (match) {
92
- result = match[3].trim();
93
- match = pattern.exec(result);
122
+ return node.nodeType === Node.ELEMENT_NODE && blockElements.has(node.localName);
123
+ };
124
+ const hasAncestorWithTagName = (node, tagSet, stopAt) => {
125
+ let ancestor = node.parentNode;
126
+ while (ancestor !== null && ancestor !== stopAt) {
127
+ if (ancestor.nodeType === Node.ELEMENT_NODE && tagSet.has(ancestor.localName)) return true;
128
+ ancestor = ancestor.parentNode;
94
129
  }
95
- return result;
130
+ return false;
131
+ };
132
+ const styleWidthRegex = /(?:^|;)\s*width\s*:\s*([0-9]*\.?[0-9]+)\s*(?:px)?\s*(?:;|$)/i;
133
+ const styleHeightRegex = /(?:^|;)\s*height\s*:\s*([0-9]*\.?[0-9]+)\s*(?:px)?\s*(?:;|$)/i;
134
+ const getDimensions = (element) => {
135
+ const width = coerceNumber(element.getAttribute("width"));
136
+ const height = coerceNumber(element.getAttribute("height"));
137
+ if (width !== void 0 && height !== void 0) return {
138
+ width,
139
+ height
140
+ };
141
+ const style = element.getAttribute("style");
142
+ if (!style) return {
143
+ width,
144
+ height
145
+ };
146
+ const fromStyle = (regex) => {
147
+ const match = regex.exec(style);
148
+ return match ? coerceNumber(match[1]) : void 0;
149
+ };
150
+ return {
151
+ width: width ?? fromStyle(styleWidthRegex),
152
+ height: height ?? fromStyle(styleHeightRegex)
153
+ };
154
+ };
155
+ const applyEmbedMetadata = (element, metadata, options) => {
156
+ const setIfMissing = options?.setIfMissing ?? false;
157
+ const set = (name, value) => {
158
+ if (setIfMissing && element.hasAttribute(name)) return;
159
+ element.setAttribute(name, value);
160
+ };
161
+ if (metadata.provider) set("data-embed-provider", metadata.provider);
162
+ if (metadata.id) set("data-embed-id", metadata.id);
163
+ if (metadata.src) set("data-embed-src", metadata.src);
164
+ if (metadata.url) set("data-embed-url", metadata.url);
165
+ if (metadata.thumbnail && isSafeThumbnailUrl(metadata.thumbnail)) set("data-embed-thumbnail", metadata.thumbnail);
166
+ if (metadata.width) set("data-embed-width", String(metadata.width));
167
+ if (metadata.height) set("data-embed-height", String(metadata.height));
168
+ if (metadata.title) set("data-embed-title", metadata.title);
169
+ if (metadata.description) set("data-embed-description", metadata.description);
170
+ if (metadata.author) set("data-embed-author", metadata.author);
171
+ if (metadata.avatar && isSafeThumbnailUrl(metadata.avatar)) set("data-embed-avatar", metadata.avatar);
172
+ if (metadata.duration) set("data-embed-duration", String(metadata.duration));
96
173
  };
97
- const createEmbedPlaceholder = (document, src, type, metadata) => {
174
+ const createEmbedPlaceholder = (document, src, metadata) => {
98
175
  const element = document.createElement("div");
99
- element.setAttribute("data-embed", metadata?.type ?? type);
176
+ element.setAttribute("data-embed", "iframe");
100
177
  element.setAttribute("data-embed-src", metadata?.src ?? src);
101
- if (metadata?.provider) element.setAttribute("data-embed-provider", metadata.provider);
102
- if (metadata?.url) element.setAttribute("data-embed-url", metadata.url);
103
- if (metadata?.thumbnail && isSafeThumbnailUrl(metadata.thumbnail)) element.setAttribute("data-embed-thumbnail", metadata.thumbnail);
104
- if (metadata?.width) element.setAttribute("data-embed-width", String(metadata.width));
105
- if (metadata?.height) element.setAttribute("data-embed-height", String(metadata.height));
106
- if (metadata?.author) element.setAttribute("data-embed-author", metadata.author);
107
- if (metadata?.text) element.setAttribute("data-embed-text", metadata.text);
178
+ if (metadata) applyEmbedMetadata(element, metadata);
108
179
  const fallbackUrl = metadata?.url ?? metadata?.src ?? src;
109
180
  const link = document.createElement("a");
110
181
  link.setAttribute("href", fallbackUrl);
@@ -113,4 +184,4 @@ const createEmbedPlaceholder = (document, src, type, metadata) => {
113
184
  return element;
114
185
  };
115
186
  //#endregion
116
- export { Node, applyDomTransforms, applyStringTransforms, createEmbedPlaceholder, isBlockElement, isBr, isSkippable, parseFragment, stripOversizedBase64Sources, transformHtml, unwrapOuterTag };
187
+ export { Node, NodeFilter, applyDomTransforms, applyEmbedMetadata, applyStringTransforms, createEmbedPlaceholder, expandSvgSelfClose, getDimensions, hasAncestorWithTagName, isBlockElement, isBr, isSkippable, isWhitespaceText, normalizeAttributeCase, parseFragment, stripOversizedBase64Sources, transformHtml };
@@ -7,8 +7,9 @@ declare const defaultFinalStringTransforms: Array<StringTransform>;
7
7
  declare const defaultEmbedResolvers: Array<EmbedResolver>;
8
8
  declare const defaultResolveUrlFn: ResolveUrlFn;
9
9
  declare const defaultLazySrcAttributes: string[];
10
+ declare const defaultLazySrcsetAttributes: string[];
10
11
  declare const defaultTrackingHosts: string[];
11
12
  declare const defaultTrackingPathSegments: string[];
12
13
  declare const defaultUrlUnwrappers: Array<UrlUnwrapper>;
13
14
  //#endregion
14
- export { defaultDomTransforms, defaultEmbedResolvers, defaultFinalStringTransforms, defaultLazySrcAttributes, defaultResolveUrlFn, defaultStringTransforms, defaultTrackingHosts, defaultTrackingPathSegments, defaultUrlUnwrappers };
15
+ export { defaultDomTransforms, defaultEmbedResolvers, defaultFinalStringTransforms, defaultLazySrcAttributes, defaultLazySrcsetAttributes, defaultResolveUrlFn, defaultStringTransforms, defaultTrackingHosts, defaultTrackingPathSegments, defaultUrlUnwrappers };
package/dist/defaults.js CHANGED
@@ -1,24 +1,30 @@
1
1
  import { youtubeEmbedResolver } from "./embeds/youtube.js";
2
+ import { convertBreaksToParagraphs } from "./transforms/dom/convertBreaksToParagraphs.js";
3
+ import { decodeDoubleEncodedTags } from "./transforms/dom/decodeDoubleEncodedTags.js";
2
4
  import { fixLazyImages } from "./transforms/dom/fixLazyImages.js";
3
5
  import { highlightCode } from "./transforms/dom/highlightCode.js";
4
- import { injectEnclosureEmbedPlaceholders } from "./transforms/dom/injectEnclosureEmbedPlaceholders.js";
6
+ import { injectEnclosures } from "./transforms/dom/injectEnclosures.js";
5
7
  import { linkifyUrls } from "./transforms/dom/linkifyUrls.js";
6
8
  import { mergeConsecutiveOneLinerPres } from "./transforms/dom/mergeConsecutiveOneLinerPres.js";
9
+ import { mergeFragmentedLists } from "./transforms/dom/mergeFragmentedLists.js";
10
+ import { proxyAssetUrls } from "./transforms/dom/proxyAssetUrls.js";
7
11
  import { removeTrackingPixels } from "./transforms/dom/removeTrackingPixels.js";
8
12
  import { replaceEmbedsWithPlaceholders } from "./transforms/dom/replaceEmbedsWithPlaceholders.js";
9
13
  import { replacePreLineBreaks } from "./transforms/dom/replacePreLineBreaks.js";
10
14
  import { resolveRelativeUrls } from "./transforms/dom/resolveRelativeUrls.js";
11
15
  import { stripComments } from "./transforms/dom/stripComments.js";
16
+ import { stripDeadAnchors } from "./transforms/dom/stripDeadAnchors.js";
17
+ import { stripDuplicateTitleHeading } from "./transforms/dom/stripDuplicateTitleHeading.js";
18
+ import { stripEmptyTags } from "./transforms/dom/stripEmptyTags.js";
12
19
  import { stripInterBlockBreaks } from "./transforms/dom/stripInterBlockBreaks.js";
13
20
  import { stripParagraphBoundaryBreaks } from "./transforms/dom/stripParagraphBoundaryBreaks.js";
14
21
  import { stripTrackingParams } from "./transforms/dom/stripTrackingParams.js";
15
22
  import { trimPreWhitespace } from "./transforms/dom/trimPreWhitespace.js";
23
+ import { unwrapDoublyNestedLists } from "./transforms/dom/unwrapDoublyNestedLists.js";
16
24
  import { unwrapRedirectUrls } from "./transforms/dom/unwrapRedirectUrls.js";
17
- import { decodeDoubleEncodedTags } from "./transforms/string/decodeDoubleEncodedTags.js";
25
+ import { unwrapWrappers } from "./transforms/dom/unwrapWrappers.js";
18
26
  import { paragraphizePlainText } from "./transforms/string/paragraphizePlainText.js";
19
- import { stripEmptyTags } from "./transforms/string/stripEmptyTags.js";
20
- import { stripOrphanedClosingTags } from "./transforms/string/stripOrphanedClosingTags.js";
21
- import { unwrapWrappers } from "./transforms/string/unwrapWrappers.js";
27
+ import { unwrapCdataComments } from "./transforms/string/unwrapCdataComments.js";
22
28
  import { unwrapBing } from "./unwraps/bing.js";
23
29
  import { unwrapFacebookShim } from "./unwraps/facebook.js";
24
30
  import { unwrapGoogle } from "./unwraps/google.js";
@@ -33,31 +39,34 @@ import { unwrapYahooSearch } from "./unwraps/yahooSearch.js";
33
39
  import { unwrapYouTube } from "./unwraps/youtube.js";
34
40
  import { resolveUrl } from "feedcanon";
35
41
  //#region src/defaults.ts
36
- const defaultStringTransforms = [
37
- stripOrphanedClosingTags,
38
- decodeDoubleEncodedTags,
39
- unwrapWrappers,
40
- paragraphizePlainText,
41
- stripEmptyTags
42
- ];
42
+ const defaultStringTransforms = [unwrapCdataComments, paragraphizePlainText];
43
43
  const defaultDomTransforms = [
44
+ decodeDoubleEncodedTags,
44
45
  stripComments,
46
+ unwrapDoublyNestedLists,
47
+ stripDuplicateTitleHeading,
45
48
  fixLazyImages,
46
49
  resolveRelativeUrls,
47
50
  unwrapRedirectUrls,
51
+ stripDeadAnchors,
48
52
  stripTrackingParams,
49
53
  removeTrackingPixels,
54
+ convertBreaksToParagraphs,
50
55
  stripInterBlockBreaks,
51
56
  stripParagraphBoundaryBreaks,
57
+ mergeFragmentedLists,
52
58
  highlightCode,
53
59
  mergeConsecutiveOneLinerPres,
54
60
  replacePreLineBreaks,
55
61
  trimPreWhitespace,
56
62
  linkifyUrls,
57
63
  replaceEmbedsWithPlaceholders,
58
- injectEnclosureEmbedPlaceholders
64
+ injectEnclosures,
65
+ proxyAssetUrls,
66
+ unwrapWrappers,
67
+ stripEmptyTags
59
68
  ];
60
- const defaultFinalStringTransforms = [stripEmptyTags];
69
+ const defaultFinalStringTransforms = [];
61
70
  const defaultEmbedResolvers = [youtubeEmbedResolver];
62
71
  const defaultResolveUrlFn = (url, baseUrl) => resolveUrl(url, baseUrl);
63
72
  const defaultLazySrcAttributes = [
@@ -75,9 +84,27 @@ const defaultLazySrcAttributes = [
75
84
  "data-image-src",
76
85
  "data-canonical-src",
77
86
  "data-img-url",
87
+ "nitro-lazy-src",
78
88
  "data-orig",
79
89
  "data-runner-src"
80
90
  ];
91
+ const defaultLazySrcsetAttributes = [
92
+ "data-srcset",
93
+ "data-tf-srcset",
94
+ "data-lazy-srcset",
95
+ "data-image-srcset",
96
+ "data-modal-srcset",
97
+ "data-splide-lazy-srcset",
98
+ "data-alt-srcset",
99
+ "fifu-data-srcset",
100
+ "data-thumb-srcset",
101
+ "data-vp-popup-img-srcset",
102
+ "data-original-srcset",
103
+ "data-pswp-srcset",
104
+ "data-nectar-img-srcset",
105
+ "nitro-lazy-srcset",
106
+ "data-flickity-lazyload-srcset"
107
+ ];
81
108
  const defaultTrackingHosts = [
82
109
  "feedsportal.com",
83
110
  "stats.wordpress.com",
@@ -102,12 +129,14 @@ const defaultTrackingHosts = [
102
129
  "quantserve.com",
103
130
  "chartbeat.com",
104
131
  "moatads.com",
105
- "sentry.io"
132
+ "sentry.io",
133
+ "hubspot.com"
106
134
  ];
107
135
  const defaultTrackingPathSegments = [
108
136
  "pixel",
109
137
  "beacon",
110
- "count"
138
+ "count",
139
+ "impression"
111
140
  ];
112
141
  const defaultUrlUnwrappers = [
113
142
  unwrapBing,
@@ -124,4 +153,4 @@ const defaultUrlUnwrappers = [
124
153
  unwrapRedditOut
125
154
  ];
126
155
  //#endregion
127
- export { defaultDomTransforms, defaultEmbedResolvers, defaultFinalStringTransforms, defaultLazySrcAttributes, defaultResolveUrlFn, defaultStringTransforms, defaultTrackingHosts, defaultTrackingPathSegments, defaultUrlUnwrappers };
156
+ export { defaultDomTransforms, defaultEmbedResolvers, defaultFinalStringTransforms, defaultLazySrcAttributes, defaultLazySrcsetAttributes, defaultResolveUrlFn, defaultStringTransforms, defaultTrackingHosts, defaultTrackingPathSegments, defaultUrlUnwrappers };
@@ -32,10 +32,10 @@ const youtubeResolveEmbed = (url) => {
32
32
  if (!videoId) return;
33
33
  return {
34
34
  provider: "youtube",
35
+ id: videoId,
35
36
  src: `https://www.youtube-nocookie.com/embed/${videoId}`,
36
37
  url: `https://www.youtube.com/watch?v=${videoId}`,
37
- thumbnail: composeThumbnailUrl(videoId),
38
- type: "iframe"
38
+ thumbnail: composeThumbnailUrl(videoId)
39
39
  };
40
40
  };
41
41
  const youtubeEmbedResolver = {
package/dist/index.d.ts CHANGED
@@ -1,28 +1,34 @@
1
- import { DomTransform, EmbedResolver, EmbedResolverResult, Enclosure, ResolveUrlFn, StringTransform, TransformContentOptions, TransformContext } from "./types.js";
1
+ import { AssetProxyFn, AssetType, DomTransform, EmbedResolver, EmbedResolverResult, Enclosure, EnrichEmbedFn, MaybePromise, ResolveUrlFn, StringTransform, TransformContentOptions, TransformContext } from "./types.js";
2
2
  import { defaultResolveUrlFn } from "./defaults.js";
3
- import { applyDomTransforms, applyStringTransforms, createEmbedPlaceholder, parseFragment, stripOversizedBase64Sources, transformHtml } from "./common.js";
3
+ import { applyDomTransforms, applyEmbedMetadata, applyStringTransforms, createEmbedPlaceholder, expandSvgSelfClose, parseFragment, stripOversizedBase64Sources, transformHtml } from "./common.js";
4
4
  import { composeThumbnailUrl, extractVideoId, youtubeEmbedResolver, youtubeResolveEmbed } from "./embeds/youtube.js";
5
+ import { convertBreaksToParagraphs } from "./transforms/dom/convertBreaksToParagraphs.js";
6
+ import { decodeDoubleEncodedTags } from "./transforms/dom/decodeDoubleEncodedTags.js";
7
+ import { enrichEmbedPlaceholders } from "./transforms/dom/enrichEmbedPlaceholders.js";
5
8
  import { fixLazyImages } from "./transforms/dom/fixLazyImages.js";
6
9
  import { detectLanguage, highlightCode } from "./transforms/dom/highlightCode.js";
7
- import { injectEnclosureEmbedPlaceholders } from "./transforms/dom/injectEnclosureEmbedPlaceholders.js";
10
+ import { injectEnclosures } from "./transforms/dom/injectEnclosures.js";
8
11
  import { linkifyUrls } from "./transforms/dom/linkifyUrls.js";
9
12
  import { mergeConsecutiveOneLinerPres } from "./transforms/dom/mergeConsecutiveOneLinerPres.js";
13
+ import { mergeFragmentedLists } from "./transforms/dom/mergeFragmentedLists.js";
14
+ import { proxyAssetUrls } from "./transforms/dom/proxyAssetUrls.js";
10
15
  import { removeTrackingPixels } from "./transforms/dom/removeTrackingPixels.js";
11
16
  import { replaceEmbedsWithPlaceholders } from "./transforms/dom/replaceEmbedsWithPlaceholders.js";
12
17
  import { replacePreLineBreaks } from "./transforms/dom/replacePreLineBreaks.js";
13
18
  import { resolveRelativeUrls } from "./transforms/dom/resolveRelativeUrls.js";
14
- import { simplifyFigures } from "./transforms/dom/simplifyFigures.js";
15
19
  import { stripComments } from "./transforms/dom/stripComments.js";
20
+ import { stripDeadAnchors } from "./transforms/dom/stripDeadAnchors.js";
21
+ import { stripDuplicateTitleHeading } from "./transforms/dom/stripDuplicateTitleHeading.js";
22
+ import { stripEmptyTags } from "./transforms/dom/stripEmptyTags.js";
16
23
  import { stripInterBlockBreaks } from "./transforms/dom/stripInterBlockBreaks.js";
17
24
  import { stripParagraphBoundaryBreaks } from "./transforms/dom/stripParagraphBoundaryBreaks.js";
18
25
  import { stripTrackingParams } from "./transforms/dom/stripTrackingParams.js";
19
26
  import { trimPreWhitespace } from "./transforms/dom/trimPreWhitespace.js";
27
+ import { unwrapDoublyNestedLists } from "./transforms/dom/unwrapDoublyNestedLists.js";
20
28
  import { extractRedirectTarget, unwrapRedirectUrls } from "./transforms/dom/unwrapRedirectUrls.js";
21
- import { decodeDoubleEncodedTags } from "./transforms/string/decodeDoubleEncodedTags.js";
29
+ import { unwrapWrappers } from "./transforms/dom/unwrapWrappers.js";
22
30
  import { paragraphizePlainText } from "./transforms/string/paragraphizePlainText.js";
23
- import { stripEmptyTags } from "./transforms/string/stripEmptyTags.js";
24
- import { stripOrphanedClosingTags } from "./transforms/string/stripOrphanedClosingTags.js";
25
- import { unwrapWrappers } from "./transforms/string/unwrapWrappers.js";
31
+ import { unwrapCdataComments } from "./transforms/string/unwrapCdataComments.js";
26
32
  import { unwrapAceml } from "./unwraps/aceml.js";
27
33
  import { unwrapAdjust } from "./unwraps/adjust.js";
28
34
  import { unwrapAmazonAffiliate } from "./unwraps/amazonAffiliate.js";
@@ -100,6 +106,6 @@ import { unwrapZhihu } from "./unwraps/zhihu.js";
100
106
  import { ParamExtractorConfig, chooseBaseUrl, coerceNumber, createParamExtractor } from "./utils.js";
101
107
 
102
108
  //#region src/index.d.ts
103
- declare const transformContent: (html: string, options?: TransformContentOptions) => string;
109
+ declare const transformContent: (html: string, options?: TransformContentOptions) => Promise<string>;
104
110
  //#endregion
105
- export { type DomTransform, type EmbedResolver, type EmbedResolverResult, type Enclosure, type ParamExtractorConfig, type ResolveUrlFn, type StringTransform, type TransformContentOptions, type TransformContext, applyDomTransforms, applyStringTransforms, chooseBaseUrl, coerceNumber, composeThumbnailUrl, createEmbedPlaceholder, createParamExtractor, decodeDoubleEncodedTags, defaultResolveUrlFn, detectLanguage, extractRedirectTarget, extractVideoId, fixLazyImages, highlightCode, injectEnclosureEmbedPlaceholders, linkifyUrls, mergeConsecutiveOneLinerPres, paragraphizePlainText, parseFragment, removeTrackingPixels, replaceEmbedsWithPlaceholders, replacePreLineBreaks, resolveRelativeUrls, simplifyFigures, stripComments, stripEmptyTags, stripInterBlockBreaks, stripOrphanedClosingTags, stripOversizedBase64Sources, stripParagraphBoundaryBreaks, stripTrackingParams, transformContent, transformHtml, trimPreWhitespace, unwrapAceml, unwrapAdjust, unwrapAmazonAffiliate, unwrapAmpCache, unwrapAwin, unwrapBing, unwrapCjNetwork, unwrapDigidip, unwrapDisqus, unwrapDouban, unwrapDuckduckgo, unwrapEbayRover, unwrapEffiliation, unwrapEmbedly, unwrapFacebookShim, unwrapFeedsportal, unwrapFirebaseDynamicLinks, unwrapFlipboard, unwrapGateSc, unwrapGeoriot, unwrapGitee, unwrapGoogle, unwrapGoogleAmpViewer, unwrapGoogleNews, unwrapGoogleNewsModern, unwrapGoogleScholar, unwrapGoogleTranslate, unwrapHashnode, unwrapIcptrack, unwrapIdealoPartner, unwrapInstagramShim, unwrapJianshuGo, unwrapJuejin, unwrapLeverAnalytics, unwrapLinksynergy, unwrapMailchimp, unwrapMailpanion, unwrapMailpgn, unwrapMailtrack, unwrapMedium, unwrapMimecast, unwrapMozillaOutgoing, unwrapNarrativ, unwrapNicoMs, unwrapOutlookSafelinks, unwrapPartnerAds, unwrapPocket, unwrapPostmark, unwrapProofpointV1, unwrapProofpointV2, unwrapProofpointV3, unwrapPxf, unwrapRecruitics, unwrapRedditOut, unwrapRedirectUrls, unwrapRedirectingat, unwrapSegmentfault, unwrapShareasale, unwrapSjv, unwrapSkimlinks, unwrapSlack, unwrapSmartredirect, unwrapSspai, unwrapSteamLinkfilter, unwrapTelegramIv, unwrapTradedoubler, unwrapTumblr, unwrapValuecommerce, unwrapViglink, unwrapVkAway, unwrapWebArchive, unwrapWrappers, unwrapYahooSearch, unwrapYandexTurbo, unwrapYouTube, unwrapZhihu, youtubeEmbedResolver, youtubeResolveEmbed };
111
+ export { type AssetProxyFn, type AssetType, type DomTransform, type EmbedResolver, type EmbedResolverResult, type Enclosure, type EnrichEmbedFn, type MaybePromise, type ParamExtractorConfig, type ResolveUrlFn, type StringTransform, type TransformContentOptions, type TransformContext, applyDomTransforms, applyEmbedMetadata, applyStringTransforms, chooseBaseUrl, coerceNumber, composeThumbnailUrl, convertBreaksToParagraphs, createEmbedPlaceholder, createParamExtractor, decodeDoubleEncodedTags, defaultResolveUrlFn, detectLanguage, enrichEmbedPlaceholders, expandSvgSelfClose, extractRedirectTarget, extractVideoId, fixLazyImages, highlightCode, injectEnclosures, linkifyUrls, mergeConsecutiveOneLinerPres, mergeFragmentedLists, paragraphizePlainText, parseFragment, proxyAssetUrls, removeTrackingPixels, replaceEmbedsWithPlaceholders, replacePreLineBreaks, resolveRelativeUrls, stripComments, stripDeadAnchors, stripDuplicateTitleHeading, stripEmptyTags, stripInterBlockBreaks, stripOversizedBase64Sources, stripParagraphBoundaryBreaks, stripTrackingParams, transformContent, transformHtml, trimPreWhitespace, unwrapAceml, unwrapAdjust, unwrapAmazonAffiliate, unwrapAmpCache, unwrapAwin, unwrapBing, unwrapCdataComments, unwrapCjNetwork, unwrapDigidip, unwrapDisqus, unwrapDouban, unwrapDoublyNestedLists, unwrapDuckduckgo, unwrapEbayRover, unwrapEffiliation, unwrapEmbedly, unwrapFacebookShim, unwrapFeedsportal, unwrapFirebaseDynamicLinks, unwrapFlipboard, unwrapGateSc, unwrapGeoriot, unwrapGitee, unwrapGoogle, unwrapGoogleAmpViewer, unwrapGoogleNews, unwrapGoogleNewsModern, unwrapGoogleScholar, unwrapGoogleTranslate, unwrapHashnode, unwrapIcptrack, unwrapIdealoPartner, unwrapInstagramShim, unwrapJianshuGo, unwrapJuejin, unwrapLeverAnalytics, unwrapLinksynergy, unwrapMailchimp, unwrapMailpanion, unwrapMailpgn, unwrapMailtrack, unwrapMedium, unwrapMimecast, unwrapMozillaOutgoing, unwrapNarrativ, unwrapNicoMs, unwrapOutlookSafelinks, unwrapPartnerAds, unwrapPocket, unwrapPostmark, unwrapProofpointV1, unwrapProofpointV2, unwrapProofpointV3, unwrapPxf, unwrapRecruitics, unwrapRedditOut, unwrapRedirectUrls, unwrapRedirectingat, unwrapSegmentfault, unwrapShareasale, unwrapSjv, unwrapSkimlinks, unwrapSlack, unwrapSmartredirect, unwrapSspai, unwrapSteamLinkfilter, unwrapTelegramIv, unwrapTradedoubler, unwrapTumblr, unwrapValuecommerce, unwrapViglink, unwrapVkAway, unwrapWebArchive, unwrapWrappers, unwrapYahooSearch, unwrapYandexTurbo, unwrapYouTube, unwrapZhihu, youtubeEmbedResolver, youtubeResolveEmbed };
package/dist/index.js CHANGED
@@ -1,26 +1,32 @@
1
- import { applyDomTransforms, applyStringTransforms, createEmbedPlaceholder, parseFragment, stripOversizedBase64Sources, transformHtml } from "./common.js";
1
+ import { chooseBaseUrl, coerceNumber, createParamExtractor } from "./utils.js";
2
+ import { applyDomTransforms, applyEmbedMetadata, applyStringTransforms, createEmbedPlaceholder, expandSvgSelfClose, parseFragment, stripOversizedBase64Sources, transformHtml } from "./common.js";
2
3
  import { composeThumbnailUrl, extractVideoId, youtubeEmbedResolver, youtubeResolveEmbed } from "./embeds/youtube.js";
4
+ import { convertBreaksToParagraphs } from "./transforms/dom/convertBreaksToParagraphs.js";
5
+ import { decodeDoubleEncodedTags } from "./transforms/dom/decodeDoubleEncodedTags.js";
3
6
  import { fixLazyImages } from "./transforms/dom/fixLazyImages.js";
4
7
  import { detectLanguage, highlightCode } from "./transforms/dom/highlightCode.js";
5
- import { injectEnclosureEmbedPlaceholders } from "./transforms/dom/injectEnclosureEmbedPlaceholders.js";
8
+ import { injectEnclosures } from "./transforms/dom/injectEnclosures.js";
6
9
  import { linkifyUrls } from "./transforms/dom/linkifyUrls.js";
7
10
  import { mergeConsecutiveOneLinerPres } from "./transforms/dom/mergeConsecutiveOneLinerPres.js";
11
+ import { mergeFragmentedLists } from "./transforms/dom/mergeFragmentedLists.js";
12
+ import { proxyAssetUrls } from "./transforms/dom/proxyAssetUrls.js";
8
13
  import { removeTrackingPixels } from "./transforms/dom/removeTrackingPixels.js";
9
- import { chooseBaseUrl, coerceNumber, createParamExtractor } from "./utils.js";
10
14
  import { replaceEmbedsWithPlaceholders } from "./transforms/dom/replaceEmbedsWithPlaceholders.js";
11
15
  import { replacePreLineBreaks } from "./transforms/dom/replacePreLineBreaks.js";
12
16
  import { resolveRelativeUrls } from "./transforms/dom/resolveRelativeUrls.js";
13
17
  import { stripComments } from "./transforms/dom/stripComments.js";
18
+ import { stripDeadAnchors } from "./transforms/dom/stripDeadAnchors.js";
19
+ import { stripDuplicateTitleHeading } from "./transforms/dom/stripDuplicateTitleHeading.js";
20
+ import { stripEmptyTags } from "./transforms/dom/stripEmptyTags.js";
14
21
  import { stripInterBlockBreaks } from "./transforms/dom/stripInterBlockBreaks.js";
15
22
  import { stripParagraphBoundaryBreaks } from "./transforms/dom/stripParagraphBoundaryBreaks.js";
16
23
  import { stripTrackingParams } from "./transforms/dom/stripTrackingParams.js";
17
24
  import { trimPreWhitespace } from "./transforms/dom/trimPreWhitespace.js";
25
+ import { unwrapDoublyNestedLists } from "./transforms/dom/unwrapDoublyNestedLists.js";
18
26
  import { extractRedirectTarget, unwrapRedirectUrls } from "./transforms/dom/unwrapRedirectUrls.js";
19
- import { decodeDoubleEncodedTags } from "./transforms/string/decodeDoubleEncodedTags.js";
27
+ import { unwrapWrappers } from "./transforms/dom/unwrapWrappers.js";
20
28
  import { paragraphizePlainText } from "./transforms/string/paragraphizePlainText.js";
21
- import { stripEmptyTags } from "./transforms/string/stripEmptyTags.js";
22
- import { stripOrphanedClosingTags } from "./transforms/string/stripOrphanedClosingTags.js";
23
- import { unwrapWrappers } from "./transforms/string/unwrapWrappers.js";
29
+ import { unwrapCdataComments } from "./transforms/string/unwrapCdataComments.js";
24
30
  import { unwrapBing } from "./unwraps/bing.js";
25
31
  import { unwrapFacebookShim } from "./unwraps/facebook.js";
26
32
  import { unwrapGoogle } from "./unwraps/google.js";
@@ -33,8 +39,8 @@ import { unwrapRedditOut } from "./unwraps/redditOut.js";
33
39
  import { unwrapVkAway } from "./unwraps/vkAway.js";
34
40
  import { unwrapYahooSearch } from "./unwraps/yahooSearch.js";
35
41
  import { unwrapYouTube } from "./unwraps/youtube.js";
36
- import { defaultDomTransforms, defaultEmbedResolvers, defaultFinalStringTransforms, defaultLazySrcAttributes, defaultResolveUrlFn, defaultStringTransforms, defaultTrackingHosts, defaultTrackingPathSegments, defaultUrlUnwrappers } from "./defaults.js";
37
- import { simplifyFigures } from "./transforms/dom/simplifyFigures.js";
42
+ import { defaultDomTransforms, defaultEmbedResolvers, defaultFinalStringTransforms, defaultLazySrcAttributes, defaultLazySrcsetAttributes, defaultResolveUrlFn, defaultStringTransforms, defaultTrackingHosts, defaultTrackingPathSegments, defaultUrlUnwrappers } from "./defaults.js";
43
+ import { enrichEmbedPlaceholders } from "./transforms/dom/enrichEmbedPlaceholders.js";
38
44
  import { unwrapAceml } from "./unwraps/aceml.js";
39
45
  import { unwrapAdjust } from "./unwraps/adjust.js";
40
46
  import { unwrapAmazonAffiliate } from "./unwraps/amazonAffiliate.js";
@@ -98,21 +104,25 @@ import { unwrapWebArchive } from "./unwraps/webArchive.js";
98
104
  import { unwrapYandexTurbo } from "./unwraps/yandexTurbo.js";
99
105
  import { unwrapZhihu } from "./unwraps/zhihu.js";
100
106
  //#region src/index.ts
101
- const transformContent = (html, options = {}) => {
107
+ const transformContent = async (html, options = {}) => {
102
108
  const context = {
103
109
  baseUrl: options.baseUrl,
104
110
  enclosures: options.enclosures,
105
111
  embedResolvers: options.embedResolvers ?? defaultEmbedResolvers,
106
112
  lazySrcAttributes: options.lazySrcAttributes ?? defaultLazySrcAttributes,
113
+ lazySrcsetAttributes: options.lazySrcsetAttributes ?? defaultLazySrcsetAttributes,
107
114
  trackingHosts: options.trackingHosts ?? defaultTrackingHosts,
108
115
  trackingPathSegments: options.trackingPathSegments ?? defaultTrackingPathSegments,
109
116
  urlUnwrappers: options.urlUnwrappers ?? defaultUrlUnwrappers,
110
- resolveUrlFn: options.resolveUrlFn ?? defaultResolveUrlFn
117
+ resolveUrlFn: options.resolveUrlFn ?? defaultResolveUrlFn,
118
+ assetProxyFn: options.assetProxyFn,
119
+ enrichEmbedFn: options.enrichEmbedFn,
120
+ articleTitle: options.articleTitle
111
121
  };
112
122
  const stringFns = options.stringTransforms ?? defaultStringTransforms;
113
123
  const domFns = options.domTransforms ?? defaultDomTransforms;
114
124
  const finalFns = options.finalStringTransforms ?? defaultFinalStringTransforms;
115
- return applyStringTransforms(applyDomTransforms(applyStringTransforms(html, stringFns.map((transform) => transform(context))), domFns.map((transform) => transform(context))), finalFns.map((transform) => transform(context)));
125
+ return await applyStringTransforms(await applyDomTransforms(await applyStringTransforms(html, stringFns.map((transform) => transform(context))), domFns.map((transform) => transform(context))), finalFns.map((transform) => transform(context)));
116
126
  };
117
127
  //#endregion
118
- export { applyDomTransforms, applyStringTransforms, chooseBaseUrl, coerceNumber, composeThumbnailUrl, createEmbedPlaceholder, createParamExtractor, decodeDoubleEncodedTags, defaultResolveUrlFn, detectLanguage, extractRedirectTarget, extractVideoId, fixLazyImages, highlightCode, injectEnclosureEmbedPlaceholders, linkifyUrls, mergeConsecutiveOneLinerPres, paragraphizePlainText, parseFragment, removeTrackingPixels, replaceEmbedsWithPlaceholders, replacePreLineBreaks, resolveRelativeUrls, simplifyFigures, stripComments, stripEmptyTags, stripInterBlockBreaks, stripOrphanedClosingTags, stripOversizedBase64Sources, stripParagraphBoundaryBreaks, stripTrackingParams, transformContent, transformHtml, trimPreWhitespace, unwrapAceml, unwrapAdjust, unwrapAmazonAffiliate, unwrapAmpCache, unwrapAwin, unwrapBing, unwrapCjNetwork, unwrapDigidip, unwrapDisqus, unwrapDouban, unwrapDuckduckgo, unwrapEbayRover, unwrapEffiliation, unwrapEmbedly, unwrapFacebookShim, unwrapFeedsportal, unwrapFirebaseDynamicLinks, unwrapFlipboard, unwrapGateSc, unwrapGeoriot, unwrapGitee, unwrapGoogle, unwrapGoogleAmpViewer, unwrapGoogleNews, unwrapGoogleNewsModern, unwrapGoogleScholar, unwrapGoogleTranslate, unwrapHashnode, unwrapIcptrack, unwrapIdealoPartner, unwrapInstagramShim, unwrapJianshuGo, unwrapJuejin, unwrapLeverAnalytics, unwrapLinksynergy, unwrapMailchimp, unwrapMailpanion, unwrapMailpgn, unwrapMailtrack, unwrapMedium, unwrapMimecast, unwrapMozillaOutgoing, unwrapNarrativ, unwrapNicoMs, unwrapOutlookSafelinks, unwrapPartnerAds, unwrapPocket, unwrapPostmark, unwrapProofpointV1, unwrapProofpointV2, unwrapProofpointV3, unwrapPxf, unwrapRecruitics, unwrapRedditOut, unwrapRedirectUrls, unwrapRedirectingat, unwrapSegmentfault, unwrapShareasale, unwrapSjv, unwrapSkimlinks, unwrapSlack, unwrapSmartredirect, unwrapSspai, unwrapSteamLinkfilter, unwrapTelegramIv, unwrapTradedoubler, unwrapTumblr, unwrapValuecommerce, unwrapViglink, unwrapVkAway, unwrapWebArchive, unwrapWrappers, unwrapYahooSearch, unwrapYandexTurbo, unwrapYouTube, unwrapZhihu, youtubeEmbedResolver, youtubeResolveEmbed };
128
+ export { applyDomTransforms, applyEmbedMetadata, applyStringTransforms, chooseBaseUrl, coerceNumber, composeThumbnailUrl, convertBreaksToParagraphs, createEmbedPlaceholder, createParamExtractor, decodeDoubleEncodedTags, defaultResolveUrlFn, detectLanguage, enrichEmbedPlaceholders, expandSvgSelfClose, extractRedirectTarget, extractVideoId, fixLazyImages, highlightCode, injectEnclosures, linkifyUrls, mergeConsecutiveOneLinerPres, mergeFragmentedLists, paragraphizePlainText, parseFragment, proxyAssetUrls, removeTrackingPixels, replaceEmbedsWithPlaceholders, replacePreLineBreaks, resolveRelativeUrls, stripComments, stripDeadAnchors, stripDuplicateTitleHeading, stripEmptyTags, stripInterBlockBreaks, stripOversizedBase64Sources, stripParagraphBoundaryBreaks, stripTrackingParams, transformContent, transformHtml, trimPreWhitespace, unwrapAceml, unwrapAdjust, unwrapAmazonAffiliate, unwrapAmpCache, unwrapAwin, unwrapBing, unwrapCdataComments, unwrapCjNetwork, unwrapDigidip, unwrapDisqus, unwrapDouban, unwrapDoublyNestedLists, unwrapDuckduckgo, unwrapEbayRover, unwrapEffiliation, unwrapEmbedly, unwrapFacebookShim, unwrapFeedsportal, unwrapFirebaseDynamicLinks, unwrapFlipboard, unwrapGateSc, unwrapGeoriot, unwrapGitee, unwrapGoogle, unwrapGoogleAmpViewer, unwrapGoogleNews, unwrapGoogleNewsModern, unwrapGoogleScholar, unwrapGoogleTranslate, unwrapHashnode, unwrapIcptrack, unwrapIdealoPartner, unwrapInstagramShim, unwrapJianshuGo, unwrapJuejin, unwrapLeverAnalytics, unwrapLinksynergy, unwrapMailchimp, unwrapMailpanion, unwrapMailpgn, unwrapMailtrack, unwrapMedium, unwrapMimecast, unwrapMozillaOutgoing, unwrapNarrativ, unwrapNicoMs, unwrapOutlookSafelinks, unwrapPartnerAds, unwrapPocket, unwrapPostmark, unwrapProofpointV1, unwrapProofpointV2, unwrapProofpointV3, unwrapPxf, unwrapRecruitics, unwrapRedditOut, unwrapRedirectUrls, unwrapRedirectingat, unwrapSegmentfault, unwrapShareasale, unwrapSjv, unwrapSkimlinks, unwrapSlack, unwrapSmartredirect, unwrapSspai, unwrapSteamLinkfilter, unwrapTelegramIv, unwrapTradedoubler, unwrapTumblr, unwrapValuecommerce, unwrapViglink, unwrapVkAway, unwrapWebArchive, unwrapWrappers, unwrapYahooSearch, unwrapYandexTurbo, unwrapYouTube, unwrapZhihu, youtubeEmbedResolver, youtubeResolveEmbed };
@@ -0,0 +1,6 @@
1
+ import { DomTransform } from "../../types.js";
2
+
3
+ //#region src/transforms/dom/convertBreaksToParagraphs.d.ts
4
+ declare const convertBreaksToParagraphs: DomTransform;
5
+ //#endregion
6
+ export { convertBreaksToParagraphs };