feedsweep 1.0.0 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +29 -17
- package/dist/common.d.ts +10 -6
- package/dist/common.js +99 -28
- package/dist/defaults.d.ts +2 -1
- package/dist/defaults.js +46 -17
- package/dist/embeds/youtube.js +2 -2
- package/dist/index.d.ts +16 -10
- package/dist/index.js +23 -13
- package/dist/transforms/dom/convertBreaksToParagraphs.d.ts +6 -0
- package/dist/transforms/dom/convertBreaksToParagraphs.js +80 -0
- package/dist/transforms/dom/decodeDoubleEncodedTags.d.ts +6 -0
- package/dist/transforms/dom/decodeDoubleEncodedTags.js +30 -0
- package/dist/transforms/dom/enrichEmbedPlaceholders.d.ts +6 -0
- package/dist/transforms/dom/enrichEmbedPlaceholders.js +32 -0
- package/dist/transforms/dom/fixLazyImages.js +37 -13
- package/dist/transforms/dom/highlightCode.js +3 -2
- package/dist/transforms/dom/injectEnclosures.d.ts +6 -0
- package/dist/transforms/dom/injectEnclosures.js +66 -0
- package/dist/transforms/dom/mergeConsecutiveOneLinerPres.js +1 -1
- package/dist/transforms/dom/mergeFragmentedLists.d.ts +6 -0
- package/dist/transforms/dom/mergeFragmentedLists.js +84 -0
- package/dist/transforms/dom/proxyAssetUrls.d.ts +6 -0
- package/dist/transforms/dom/proxyAssetUrls.js +64 -0
- package/dist/transforms/dom/removeTrackingPixels.js +22 -25
- package/dist/transforms/dom/replaceEmbedsWithPlaceholders.js +24 -25
- package/dist/transforms/dom/replacePreLineBreaks.js +3 -4
- package/dist/transforms/dom/resolveRelativeUrls.js +44 -30
- package/dist/transforms/dom/stripComments.js +5 -15
- package/dist/transforms/dom/stripDeadAnchors.d.ts +6 -0
- package/dist/transforms/dom/stripDeadAnchors.js +20 -0
- package/dist/transforms/dom/stripDuplicateTitleHeading.d.ts +6 -0
- package/dist/transforms/dom/stripDuplicateTitleHeading.js +31 -0
- package/dist/transforms/dom/stripEmptyTags.d.ts +6 -0
- package/dist/transforms/dom/stripEmptyTags.js +53 -0
- package/dist/transforms/dom/stripInterBlockBreaks.js +28 -8
- package/dist/transforms/dom/stripParagraphBoundaryBreaks.js +26 -6
- package/dist/transforms/dom/stripTrackingParams.js +7 -6
- package/dist/transforms/dom/trimPreWhitespace.js +4 -3
- package/dist/transforms/dom/unwrapDoublyNestedLists.d.ts +6 -0
- package/dist/transforms/dom/unwrapDoublyNestedLists.js +41 -0
- package/dist/transforms/dom/unwrapRedirectUrls.js +4 -2
- package/dist/transforms/dom/unwrapWrappers.d.ts +6 -0
- package/dist/transforms/dom/unwrapWrappers.js +30 -0
- package/dist/transforms/string/paragraphizePlainText.js +1 -1
- package/dist/transforms/string/unwrapCdataComments.d.ts +6 -0
- package/dist/transforms/string/unwrapCdataComments.js +10 -0
- package/dist/types.d.ts +35 -6
- package/dist/unwraps/google.js +1 -1
- package/dist/unwraps/googleNewsModern.js +7 -3
- package/package.json +2 -2
- package/dist/transforms/dom/injectEnclosureEmbedPlaceholders.d.ts +0 -6
- package/dist/transforms/dom/injectEnclosureEmbedPlaceholders.js +0 -33
- package/dist/transforms/dom/simplifyFigures.d.ts +0 -6
- package/dist/transforms/dom/simplifyFigures.js +0 -27
- package/dist/transforms/string/decodeDoubleEncodedTags.d.ts +0 -6
- package/dist/transforms/string/decodeDoubleEncodedTags.js +0 -23
- package/dist/transforms/string/stripEmptyTags.d.ts +0 -6
- package/dist/transforms/string/stripEmptyTags.js +0 -25
- package/dist/transforms/string/stripOrphanedClosingTags.d.ts +0 -6
- package/dist/transforms/string/stripOrphanedClosingTags.js +0 -28
- package/dist/transforms/string/unwrapWrappers.d.ts +0 -6
- package/dist/transforms/string/unwrapWrappers.js +0 -10
package/README.md
CHANGED
|
@@ -19,7 +19,7 @@ npm install feedsweep
|
|
|
19
19
|
```typescript
|
|
20
20
|
import { transformContent } from 'feedsweep'
|
|
21
21
|
|
|
22
|
-
const result = transformContent('<p>Check <img data-src="photo.jpg"> and visit /about</p>', {
|
|
22
|
+
const result = await transformContent('<p>Check <img data-src="photo.jpg"> and visit /about</p>', {
|
|
23
23
|
baseUrl: 'https://example.com/post/1',
|
|
24
24
|
})
|
|
25
25
|
```
|
|
@@ -30,27 +30,33 @@ Inventory of every transform exported from the package. Most are enabled by defa
|
|
|
30
30
|
|
|
31
31
|
| Transform | Description |
|
|
32
32
|
| --- | --- |
|
|
33
|
-
| `stripOrphanedClosingTags` | Remove unmatched `</p>` / `</div>` close tags |
|
|
34
33
|
| `decodeDoubleEncodedTags` | Decode `<tag>` back to `<tag>` in mixed content |
|
|
35
|
-
| `unwrapWrappers` | Remove outer `<div>`, `<article>`, `<section>` wrappers |
|
|
36
|
-
| `paragraphizePlainText` | Wrap plain text in `<p>` tags |
|
|
37
|
-
| `stripEmptyTags` | Remove empty `<p>`, `<div>`, `<span>` and other tags |
|
|
38
|
-
| `stripComments` | Remove HTML `<!-- comments -->` |
|
|
39
34
|
| `fixLazyImages` | Move `data-src` / `data-original` to real `src` |
|
|
40
|
-
| `resolveRelativeUrls` | Convert relative URLs to absolute using base URL |
|
|
41
|
-
| `unwrapRedirectUrls` | Remove Google/Bing/Facebook/etc. redirect wrappers |
|
|
42
|
-
| `stripTrackingParams` | Remove UTM and other tracking parameters |
|
|
43
|
-
| `removeTrackingPixels` | Strip 1×1 tracking pixel images |
|
|
44
|
-
| `stripInterBlockBreaks` | Remove `<br>` tags between block elements |
|
|
45
|
-
| `stripParagraphBoundaryBreaks` | Remove `<br>` tags adjacent to paragraph boundaries |
|
|
46
|
-
| `highlightCode` | Syntax-highlight `<code>` blocks with highlight.js |
|
|
47
35
|
| `mergeConsecutiveOneLinerPres` | Merge consecutive single-line `<pre>` tags |
|
|
48
36
|
| `replacePreLineBreaks` | Replace `<br>` with `\n` inside `<pre>` |
|
|
49
|
-
| `
|
|
50
|
-
| `
|
|
37
|
+
| `stripInterBlockBreaks` | Remove `<br>` tags between block elements |
|
|
38
|
+
| `stripParagraphBoundaryBreaks` | Remove `<br>` tags adjacent to paragraph boundaries |
|
|
39
|
+
| `stripDuplicateTitleHeading` | Remove first `<h1>`–`<h6>` matching article title |
|
|
40
|
+
| `unwrapRedirectUrls` | Remove Google/Bing/Facebook/etc. redirect wrappers |
|
|
41
|
+
| `stripDeadAnchors` | Unwrap `<a>` with empty, `#`, or `javascript:` href |
|
|
42
|
+
| `removeTrackingPixels` | Strip 1×1 tracking pixel images |
|
|
43
|
+
| `stripTrackingParams` | Remove UTM and other tracking parameters |
|
|
44
|
+
| `convertBreaksToParagraphs` | Convert `<br><br>` runs into semantic `<p>` blocks |
|
|
45
|
+
| `injectEnclosures` | Inject feed enclosures into content as native `<audio>`/`<video>` or iframe placeholders |
|
|
51
46
|
| `replaceEmbedsWithPlaceholders` | Convert `<iframe>` to embed placeholders |
|
|
52
|
-
| `
|
|
53
|
-
| `
|
|
47
|
+
| `enrichEmbedPlaceholders` | Populate placeholder metadata (`title`, `description`, `duration`, etc.) via a caller-supplied async fn. Opt-in; not in defaults |
|
|
48
|
+
| `proxyAssetUrls` | Rewrite image, video, and audio URLs through a caller-supplied proxy |
|
|
49
|
+
| `resolveRelativeUrls` | Convert relative URLs to absolute using base URL |
|
|
50
|
+
| `unwrapWrappers` | Remove outer `<div>`, `<article>`, `<section>` wrappers |
|
|
51
|
+
| `unwrapDoublyNestedLists` | Unwrap `<ul>`/`<ol>` that wrap a single `<li>` containing a same-type list |
|
|
52
|
+
| `mergeFragmentedLists` | Merge consecutive sibling `<ul>` / `<ol>` lists with matching attributes |
|
|
53
|
+
| `paragraphizePlainText` | Wrap plain text in `<p>` tags |
|
|
54
|
+
| `linkifyUrls` | Wrap bare URLs in `<a>` tags |
|
|
55
|
+
| `trimPreWhitespace` | Remove common leading indentation from `<pre>` |
|
|
56
|
+
| `highlightCode` | Syntax-highlight `<code>` blocks with highlight.js |
|
|
57
|
+
| `stripEmptyTags` | Remove empty `<p>`, `<div>`, `<span>` and other tags |
|
|
58
|
+
| `stripComments` | Remove HTML `<!-- comments -->` |
|
|
59
|
+
| `unwrapCdataComments` | Strip malformed `<!--[CDATA[ … ]]-->` wrappers before parsing so the wrapped article reaches the DOM as real HTML |
|
|
54
60
|
|
|
55
61
|
## Options
|
|
56
62
|
|
|
@@ -62,6 +68,12 @@ const result = transformContent(html, {
|
|
|
62
68
|
baseUrl: 'https://example.com/post/1',
|
|
63
69
|
// Feed item enclosures (audio/video).
|
|
64
70
|
enclosures: [{ url: 'https://example.com/audio.mp3', type: 'audio/mpeg' }],
|
|
71
|
+
// Route image/video/audio URLs through a proxy. Return `undefined` to leave a URL untouched.
|
|
72
|
+
assetProxyFn: (url, type) => `https://proxy.example.com/?type=${type}&url=${encodeURIComponent(url)}`,
|
|
73
|
+
// Populate embed placeholder metadata from a remote source (e.g. YouTube oEmbed).
|
|
74
|
+
enrichEmbedFn: async (embeds) => {
|
|
75
|
+
return new Map(embeds.map(({ provider, id }) => [`${provider}:${id}`, { title: '…' }]))
|
|
76
|
+
},
|
|
65
77
|
// Run a custom DOM transform pipeline (omit to use defaults).
|
|
66
78
|
domTransforms: [fixLazyImages, resolveRelativeUrls],
|
|
67
79
|
})
|
package/dist/common.d.ts
CHANGED
|
@@ -1,11 +1,15 @@
|
|
|
1
|
-
import { EmbedResolverResult } from "./types.js";
|
|
1
|
+
import { EmbedResolverResult, MaybePromise } from "./types.js";
|
|
2
2
|
|
|
3
3
|
//#region src/common.d.ts
|
|
4
4
|
declare const stripOversizedBase64Sources: (html: string, maxSize: number) => string;
|
|
5
|
+
declare const expandSvgSelfClose: (html: string) => string;
|
|
5
6
|
declare const parseFragment: (html: string) => Document;
|
|
6
|
-
declare const transformHtml: (html: string, transform: (document: Document) => void) => string
|
|
7
|
-
declare const applyDomTransforms: (html: string, transforms: Array<(document: Document) => void
|
|
8
|
-
declare const applyStringTransforms: (html: string, transforms: Array<(html: string) => string
|
|
9
|
-
declare const
|
|
7
|
+
declare const transformHtml: (html: string, transform: (document: Document) => MaybePromise<void>) => Promise<string>;
|
|
8
|
+
declare const applyDomTransforms: (html: string, transforms: Array<(document: Document) => MaybePromise<void>>) => Promise<string>;
|
|
9
|
+
declare const applyStringTransforms: (html: string, transforms: Array<(html: string) => MaybePromise<string>>) => Promise<string>;
|
|
10
|
+
declare const applyEmbedMetadata: (element: HTMLElement, metadata: Partial<EmbedResolverResult>, options?: {
|
|
11
|
+
setIfMissing?: boolean;
|
|
12
|
+
}) => void;
|
|
13
|
+
declare const createEmbedPlaceholder: (document: Document, src: string, metadata?: Partial<EmbedResolverResult>) => HTMLElement;
|
|
10
14
|
//#endregion
|
|
11
|
-
export { applyDomTransforms, applyStringTransforms, createEmbedPlaceholder, parseFragment, stripOversizedBase64Sources, transformHtml };
|
|
15
|
+
export { applyDomTransforms, applyEmbedMetadata, applyStringTransforms, createEmbedPlaceholder, expandSvgSelfClose, parseFragment, stripOversizedBase64Sources, transformHtml };
|
package/dist/common.js
CHANGED
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import { coerceNumber } from "./utils.js";
|
|
1
2
|
import { resolveUrl } from "feedcanon";
|
|
2
3
|
import { parseHTML } from "linkedom";
|
|
3
4
|
//#region src/common.ts
|
|
@@ -6,6 +7,11 @@ const Node = {
|
|
|
6
7
|
TEXT_NODE: 3,
|
|
7
8
|
COMMENT_NODE: 8
|
|
8
9
|
};
|
|
10
|
+
const NodeFilter = {
|
|
11
|
+
SHOW_ELEMENT: 1,
|
|
12
|
+
SHOW_TEXT: 4,
|
|
13
|
+
SHOW_COMMENT: 128
|
|
14
|
+
};
|
|
9
15
|
const base64SrcRegex = /((?:src|srcset|poster)=["'])data:[^"']*;base64,[^"']*(["'])/g;
|
|
10
16
|
const safeThumbnailDataUrlRegex = /^data:image\/(png|jpe?g|gif|webp|avif);/i;
|
|
11
17
|
const isSafeThumbnailUrl = (url) => {
|
|
@@ -17,23 +23,53 @@ const stripOversizedBase64Sources = (html, maxSize) => {
|
|
|
17
23
|
return `${prefix}${suffix}`;
|
|
18
24
|
});
|
|
19
25
|
};
|
|
26
|
+
const normalizeAttributeCase = (document) => {
|
|
27
|
+
for (const element of document.querySelectorAll("*")) {
|
|
28
|
+
const original = Array.from(element.attributes).map((attribute) => ({
|
|
29
|
+
name: attribute.name,
|
|
30
|
+
value: attribute.value
|
|
31
|
+
}));
|
|
32
|
+
const final = /* @__PURE__ */ new Map();
|
|
33
|
+
let needsRewrite = false;
|
|
34
|
+
for (const { name, value } of original) {
|
|
35
|
+
const lower = name.toLowerCase();
|
|
36
|
+
if (lower !== name) needsRewrite = true;
|
|
37
|
+
if (final.has(lower)) {
|
|
38
|
+
needsRewrite = true;
|
|
39
|
+
continue;
|
|
40
|
+
}
|
|
41
|
+
final.set(lower, value);
|
|
42
|
+
}
|
|
43
|
+
if (!needsRewrite) continue;
|
|
44
|
+
for (const { name } of original) element.removeAttribute(name);
|
|
45
|
+
for (const [name, value] of final) element.setAttribute(name, value);
|
|
46
|
+
}
|
|
47
|
+
};
|
|
48
|
+
const svgRegionRegex = /<svg\b[^>]*>[\s\S]*?<\/svg>/gi;
|
|
49
|
+
const svgSelfCloseRegex = /<([a-z][a-z0-9-]*)((?:\s[^>]*)?)\s*\/>/gi;
|
|
50
|
+
const expandSvgSelfClose = (html) => {
|
|
51
|
+
return html.replace(svgRegionRegex, (svgBlock) => {
|
|
52
|
+
return svgBlock.replace(svgSelfCloseRegex, "<$1$2></$1>");
|
|
53
|
+
});
|
|
54
|
+
};
|
|
20
55
|
const parseFragment = (html) => {
|
|
21
|
-
const { document } = parseHTML(`<!doctype html><html><head></head><body>${html}</body></html>`);
|
|
56
|
+
const { document } = parseHTML(`<!doctype html><html><head></head><body>${expandSvgSelfClose(html)}</body></html>`);
|
|
57
|
+
normalizeAttributeCase(document);
|
|
22
58
|
return document;
|
|
23
59
|
};
|
|
24
|
-
const transformHtml = (html, transform) => {
|
|
60
|
+
const transformHtml = async (html, transform) => {
|
|
25
61
|
const document = parseFragment(html);
|
|
26
|
-
transform(document);
|
|
62
|
+
await transform(document);
|
|
27
63
|
return document.body.innerHTML;
|
|
28
64
|
};
|
|
29
|
-
const applyDomTransforms = (html, transforms) => {
|
|
65
|
+
const applyDomTransforms = async (html, transforms) => {
|
|
30
66
|
const document = parseFragment(stripOversizedBase64Sources(html, 50 * 1024));
|
|
31
|
-
for (const transform of transforms) transform(document);
|
|
67
|
+
for (const transform of transforms) await transform(document);
|
|
32
68
|
return document.body.innerHTML;
|
|
33
69
|
};
|
|
34
|
-
const applyStringTransforms = (html, transforms) => {
|
|
70
|
+
const applyStringTransforms = async (html, transforms) => {
|
|
35
71
|
let output = html;
|
|
36
|
-
for (const transform of transforms) output = transform(output);
|
|
72
|
+
for (const transform of transforms) output = await transform(output);
|
|
37
73
|
return output;
|
|
38
74
|
};
|
|
39
75
|
const blockElements = new Set([
|
|
@@ -71,10 +107,10 @@ const blockElements = new Set([
|
|
|
71
107
|
"ul"
|
|
72
108
|
]);
|
|
73
109
|
const isWhitespaceText = (node) => {
|
|
74
|
-
return node.nodeType === Node.TEXT_NODE && !
|
|
110
|
+
return node.nodeType === Node.TEXT_NODE && !node.textContent?.trim();
|
|
75
111
|
};
|
|
76
112
|
const isBr = (node) => {
|
|
77
|
-
return node.nodeType === Node.ELEMENT_NODE && node.
|
|
113
|
+
return node.nodeType === Node.ELEMENT_NODE && node.localName === "br";
|
|
78
114
|
};
|
|
79
115
|
const isComment = (node) => {
|
|
80
116
|
return node.nodeType === Node.COMMENT_NODE;
|
|
@@ -83,28 +119,63 @@ const isSkippable = (node) => {
|
|
|
83
119
|
return isWhitespaceText(node) || isBr(node) || isComment(node);
|
|
84
120
|
};
|
|
85
121
|
const isBlockElement = (node) => {
|
|
86
|
-
return node.nodeType === Node.ELEMENT_NODE && blockElements.has(node.
|
|
87
|
-
};
|
|
88
|
-
const
|
|
89
|
-
let
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
match = pattern.exec(result);
|
|
122
|
+
return node.nodeType === Node.ELEMENT_NODE && blockElements.has(node.localName);
|
|
123
|
+
};
|
|
124
|
+
const hasAncestorWithTagName = (node, tagSet, stopAt) => {
|
|
125
|
+
let ancestor = node.parentNode;
|
|
126
|
+
while (ancestor !== null && ancestor !== stopAt) {
|
|
127
|
+
if (ancestor.nodeType === Node.ELEMENT_NODE && tagSet.has(ancestor.localName)) return true;
|
|
128
|
+
ancestor = ancestor.parentNode;
|
|
94
129
|
}
|
|
95
|
-
return
|
|
130
|
+
return false;
|
|
131
|
+
};
|
|
132
|
+
const styleWidthRegex = /(?:^|;)\s*width\s*:\s*([0-9]*\.?[0-9]+)\s*(?:px)?\s*(?:;|$)/i;
|
|
133
|
+
const styleHeightRegex = /(?:^|;)\s*height\s*:\s*([0-9]*\.?[0-9]+)\s*(?:px)?\s*(?:;|$)/i;
|
|
134
|
+
const getDimensions = (element) => {
|
|
135
|
+
const width = coerceNumber(element.getAttribute("width"));
|
|
136
|
+
const height = coerceNumber(element.getAttribute("height"));
|
|
137
|
+
if (width !== void 0 && height !== void 0) return {
|
|
138
|
+
width,
|
|
139
|
+
height
|
|
140
|
+
};
|
|
141
|
+
const style = element.getAttribute("style");
|
|
142
|
+
if (!style) return {
|
|
143
|
+
width,
|
|
144
|
+
height
|
|
145
|
+
};
|
|
146
|
+
const fromStyle = (regex) => {
|
|
147
|
+
const match = regex.exec(style);
|
|
148
|
+
return match ? coerceNumber(match[1]) : void 0;
|
|
149
|
+
};
|
|
150
|
+
return {
|
|
151
|
+
width: width ?? fromStyle(styleWidthRegex),
|
|
152
|
+
height: height ?? fromStyle(styleHeightRegex)
|
|
153
|
+
};
|
|
154
|
+
};
|
|
155
|
+
const applyEmbedMetadata = (element, metadata, options) => {
|
|
156
|
+
const setIfMissing = options?.setIfMissing ?? false;
|
|
157
|
+
const set = (name, value) => {
|
|
158
|
+
if (setIfMissing && element.hasAttribute(name)) return;
|
|
159
|
+
element.setAttribute(name, value);
|
|
160
|
+
};
|
|
161
|
+
if (metadata.provider) set("data-embed-provider", metadata.provider);
|
|
162
|
+
if (metadata.id) set("data-embed-id", metadata.id);
|
|
163
|
+
if (metadata.src) set("data-embed-src", metadata.src);
|
|
164
|
+
if (metadata.url) set("data-embed-url", metadata.url);
|
|
165
|
+
if (metadata.thumbnail && isSafeThumbnailUrl(metadata.thumbnail)) set("data-embed-thumbnail", metadata.thumbnail);
|
|
166
|
+
if (metadata.width) set("data-embed-width", String(metadata.width));
|
|
167
|
+
if (metadata.height) set("data-embed-height", String(metadata.height));
|
|
168
|
+
if (metadata.title) set("data-embed-title", metadata.title);
|
|
169
|
+
if (metadata.description) set("data-embed-description", metadata.description);
|
|
170
|
+
if (metadata.author) set("data-embed-author", metadata.author);
|
|
171
|
+
if (metadata.avatar && isSafeThumbnailUrl(metadata.avatar)) set("data-embed-avatar", metadata.avatar);
|
|
172
|
+
if (metadata.duration) set("data-embed-duration", String(metadata.duration));
|
|
96
173
|
};
|
|
97
|
-
const createEmbedPlaceholder = (document, src,
|
|
174
|
+
const createEmbedPlaceholder = (document, src, metadata) => {
|
|
98
175
|
const element = document.createElement("div");
|
|
99
|
-
element.setAttribute("data-embed",
|
|
176
|
+
element.setAttribute("data-embed", "iframe");
|
|
100
177
|
element.setAttribute("data-embed-src", metadata?.src ?? src);
|
|
101
|
-
if (metadata
|
|
102
|
-
if (metadata?.url) element.setAttribute("data-embed-url", metadata.url);
|
|
103
|
-
if (metadata?.thumbnail && isSafeThumbnailUrl(metadata.thumbnail)) element.setAttribute("data-embed-thumbnail", metadata.thumbnail);
|
|
104
|
-
if (metadata?.width) element.setAttribute("data-embed-width", String(metadata.width));
|
|
105
|
-
if (metadata?.height) element.setAttribute("data-embed-height", String(metadata.height));
|
|
106
|
-
if (metadata?.author) element.setAttribute("data-embed-author", metadata.author);
|
|
107
|
-
if (metadata?.text) element.setAttribute("data-embed-text", metadata.text);
|
|
178
|
+
if (metadata) applyEmbedMetadata(element, metadata);
|
|
108
179
|
const fallbackUrl = metadata?.url ?? metadata?.src ?? src;
|
|
109
180
|
const link = document.createElement("a");
|
|
110
181
|
link.setAttribute("href", fallbackUrl);
|
|
@@ -113,4 +184,4 @@ const createEmbedPlaceholder = (document, src, type, metadata) => {
|
|
|
113
184
|
return element;
|
|
114
185
|
};
|
|
115
186
|
//#endregion
|
|
116
|
-
export { Node, applyDomTransforms, applyStringTransforms, createEmbedPlaceholder, isBlockElement, isBr, isSkippable, parseFragment, stripOversizedBase64Sources, transformHtml
|
|
187
|
+
export { Node, NodeFilter, applyDomTransforms, applyEmbedMetadata, applyStringTransforms, createEmbedPlaceholder, expandSvgSelfClose, getDimensions, hasAncestorWithTagName, isBlockElement, isBr, isSkippable, isWhitespaceText, normalizeAttributeCase, parseFragment, stripOversizedBase64Sources, transformHtml };
|
package/dist/defaults.d.ts
CHANGED
|
@@ -7,8 +7,9 @@ declare const defaultFinalStringTransforms: Array<StringTransform>;
|
|
|
7
7
|
declare const defaultEmbedResolvers: Array<EmbedResolver>;
|
|
8
8
|
declare const defaultResolveUrlFn: ResolveUrlFn;
|
|
9
9
|
declare const defaultLazySrcAttributes: string[];
|
|
10
|
+
declare const defaultLazySrcsetAttributes: string[];
|
|
10
11
|
declare const defaultTrackingHosts: string[];
|
|
11
12
|
declare const defaultTrackingPathSegments: string[];
|
|
12
13
|
declare const defaultUrlUnwrappers: Array<UrlUnwrapper>;
|
|
13
14
|
//#endregion
|
|
14
|
-
export { defaultDomTransforms, defaultEmbedResolvers, defaultFinalStringTransforms, defaultLazySrcAttributes, defaultResolveUrlFn, defaultStringTransforms, defaultTrackingHosts, defaultTrackingPathSegments, defaultUrlUnwrappers };
|
|
15
|
+
export { defaultDomTransforms, defaultEmbedResolvers, defaultFinalStringTransforms, defaultLazySrcAttributes, defaultLazySrcsetAttributes, defaultResolveUrlFn, defaultStringTransforms, defaultTrackingHosts, defaultTrackingPathSegments, defaultUrlUnwrappers };
|
package/dist/defaults.js
CHANGED
|
@@ -1,24 +1,30 @@
|
|
|
1
1
|
import { youtubeEmbedResolver } from "./embeds/youtube.js";
|
|
2
|
+
import { convertBreaksToParagraphs } from "./transforms/dom/convertBreaksToParagraphs.js";
|
|
3
|
+
import { decodeDoubleEncodedTags } from "./transforms/dom/decodeDoubleEncodedTags.js";
|
|
2
4
|
import { fixLazyImages } from "./transforms/dom/fixLazyImages.js";
|
|
3
5
|
import { highlightCode } from "./transforms/dom/highlightCode.js";
|
|
4
|
-
import {
|
|
6
|
+
import { injectEnclosures } from "./transforms/dom/injectEnclosures.js";
|
|
5
7
|
import { linkifyUrls } from "./transforms/dom/linkifyUrls.js";
|
|
6
8
|
import { mergeConsecutiveOneLinerPres } from "./transforms/dom/mergeConsecutiveOneLinerPres.js";
|
|
9
|
+
import { mergeFragmentedLists } from "./transforms/dom/mergeFragmentedLists.js";
|
|
10
|
+
import { proxyAssetUrls } from "./transforms/dom/proxyAssetUrls.js";
|
|
7
11
|
import { removeTrackingPixels } from "./transforms/dom/removeTrackingPixels.js";
|
|
8
12
|
import { replaceEmbedsWithPlaceholders } from "./transforms/dom/replaceEmbedsWithPlaceholders.js";
|
|
9
13
|
import { replacePreLineBreaks } from "./transforms/dom/replacePreLineBreaks.js";
|
|
10
14
|
import { resolveRelativeUrls } from "./transforms/dom/resolveRelativeUrls.js";
|
|
11
15
|
import { stripComments } from "./transforms/dom/stripComments.js";
|
|
16
|
+
import { stripDeadAnchors } from "./transforms/dom/stripDeadAnchors.js";
|
|
17
|
+
import { stripDuplicateTitleHeading } from "./transforms/dom/stripDuplicateTitleHeading.js";
|
|
18
|
+
import { stripEmptyTags } from "./transforms/dom/stripEmptyTags.js";
|
|
12
19
|
import { stripInterBlockBreaks } from "./transforms/dom/stripInterBlockBreaks.js";
|
|
13
20
|
import { stripParagraphBoundaryBreaks } from "./transforms/dom/stripParagraphBoundaryBreaks.js";
|
|
14
21
|
import { stripTrackingParams } from "./transforms/dom/stripTrackingParams.js";
|
|
15
22
|
import { trimPreWhitespace } from "./transforms/dom/trimPreWhitespace.js";
|
|
23
|
+
import { unwrapDoublyNestedLists } from "./transforms/dom/unwrapDoublyNestedLists.js";
|
|
16
24
|
import { unwrapRedirectUrls } from "./transforms/dom/unwrapRedirectUrls.js";
|
|
17
|
-
import {
|
|
25
|
+
import { unwrapWrappers } from "./transforms/dom/unwrapWrappers.js";
|
|
18
26
|
import { paragraphizePlainText } from "./transforms/string/paragraphizePlainText.js";
|
|
19
|
-
import {
|
|
20
|
-
import { stripOrphanedClosingTags } from "./transforms/string/stripOrphanedClosingTags.js";
|
|
21
|
-
import { unwrapWrappers } from "./transforms/string/unwrapWrappers.js";
|
|
27
|
+
import { unwrapCdataComments } from "./transforms/string/unwrapCdataComments.js";
|
|
22
28
|
import { unwrapBing } from "./unwraps/bing.js";
|
|
23
29
|
import { unwrapFacebookShim } from "./unwraps/facebook.js";
|
|
24
30
|
import { unwrapGoogle } from "./unwraps/google.js";
|
|
@@ -33,31 +39,34 @@ import { unwrapYahooSearch } from "./unwraps/yahooSearch.js";
|
|
|
33
39
|
import { unwrapYouTube } from "./unwraps/youtube.js";
|
|
34
40
|
import { resolveUrl } from "feedcanon";
|
|
35
41
|
//#region src/defaults.ts
|
|
36
|
-
const defaultStringTransforms = [
|
|
37
|
-
stripOrphanedClosingTags,
|
|
38
|
-
decodeDoubleEncodedTags,
|
|
39
|
-
unwrapWrappers,
|
|
40
|
-
paragraphizePlainText,
|
|
41
|
-
stripEmptyTags
|
|
42
|
-
];
|
|
42
|
+
const defaultStringTransforms = [unwrapCdataComments, paragraphizePlainText];
|
|
43
43
|
const defaultDomTransforms = [
|
|
44
|
+
decodeDoubleEncodedTags,
|
|
44
45
|
stripComments,
|
|
46
|
+
unwrapDoublyNestedLists,
|
|
47
|
+
stripDuplicateTitleHeading,
|
|
45
48
|
fixLazyImages,
|
|
46
49
|
resolveRelativeUrls,
|
|
47
50
|
unwrapRedirectUrls,
|
|
51
|
+
stripDeadAnchors,
|
|
48
52
|
stripTrackingParams,
|
|
49
53
|
removeTrackingPixels,
|
|
54
|
+
convertBreaksToParagraphs,
|
|
50
55
|
stripInterBlockBreaks,
|
|
51
56
|
stripParagraphBoundaryBreaks,
|
|
57
|
+
mergeFragmentedLists,
|
|
52
58
|
highlightCode,
|
|
53
59
|
mergeConsecutiveOneLinerPres,
|
|
54
60
|
replacePreLineBreaks,
|
|
55
61
|
trimPreWhitespace,
|
|
56
62
|
linkifyUrls,
|
|
57
63
|
replaceEmbedsWithPlaceholders,
|
|
58
|
-
|
|
64
|
+
injectEnclosures,
|
|
65
|
+
proxyAssetUrls,
|
|
66
|
+
unwrapWrappers,
|
|
67
|
+
stripEmptyTags
|
|
59
68
|
];
|
|
60
|
-
const defaultFinalStringTransforms = [
|
|
69
|
+
const defaultFinalStringTransforms = [];
|
|
61
70
|
const defaultEmbedResolvers = [youtubeEmbedResolver];
|
|
62
71
|
const defaultResolveUrlFn = (url, baseUrl) => resolveUrl(url, baseUrl);
|
|
63
72
|
const defaultLazySrcAttributes = [
|
|
@@ -75,9 +84,27 @@ const defaultLazySrcAttributes = [
|
|
|
75
84
|
"data-image-src",
|
|
76
85
|
"data-canonical-src",
|
|
77
86
|
"data-img-url",
|
|
87
|
+
"nitro-lazy-src",
|
|
78
88
|
"data-orig",
|
|
79
89
|
"data-runner-src"
|
|
80
90
|
];
|
|
91
|
+
const defaultLazySrcsetAttributes = [
|
|
92
|
+
"data-srcset",
|
|
93
|
+
"data-tf-srcset",
|
|
94
|
+
"data-lazy-srcset",
|
|
95
|
+
"data-image-srcset",
|
|
96
|
+
"data-modal-srcset",
|
|
97
|
+
"data-splide-lazy-srcset",
|
|
98
|
+
"data-alt-srcset",
|
|
99
|
+
"fifu-data-srcset",
|
|
100
|
+
"data-thumb-srcset",
|
|
101
|
+
"data-vp-popup-img-srcset",
|
|
102
|
+
"data-original-srcset",
|
|
103
|
+
"data-pswp-srcset",
|
|
104
|
+
"data-nectar-img-srcset",
|
|
105
|
+
"nitro-lazy-srcset",
|
|
106
|
+
"data-flickity-lazyload-srcset"
|
|
107
|
+
];
|
|
81
108
|
const defaultTrackingHosts = [
|
|
82
109
|
"feedsportal.com",
|
|
83
110
|
"stats.wordpress.com",
|
|
@@ -102,12 +129,14 @@ const defaultTrackingHosts = [
|
|
|
102
129
|
"quantserve.com",
|
|
103
130
|
"chartbeat.com",
|
|
104
131
|
"moatads.com",
|
|
105
|
-
"sentry.io"
|
|
132
|
+
"sentry.io",
|
|
133
|
+
"hubspot.com"
|
|
106
134
|
];
|
|
107
135
|
const defaultTrackingPathSegments = [
|
|
108
136
|
"pixel",
|
|
109
137
|
"beacon",
|
|
110
|
-
"count"
|
|
138
|
+
"count",
|
|
139
|
+
"impression"
|
|
111
140
|
];
|
|
112
141
|
const defaultUrlUnwrappers = [
|
|
113
142
|
unwrapBing,
|
|
@@ -124,4 +153,4 @@ const defaultUrlUnwrappers = [
|
|
|
124
153
|
unwrapRedditOut
|
|
125
154
|
];
|
|
126
155
|
//#endregion
|
|
127
|
-
export { defaultDomTransforms, defaultEmbedResolvers, defaultFinalStringTransforms, defaultLazySrcAttributes, defaultResolveUrlFn, defaultStringTransforms, defaultTrackingHosts, defaultTrackingPathSegments, defaultUrlUnwrappers };
|
|
156
|
+
export { defaultDomTransforms, defaultEmbedResolvers, defaultFinalStringTransforms, defaultLazySrcAttributes, defaultLazySrcsetAttributes, defaultResolveUrlFn, defaultStringTransforms, defaultTrackingHosts, defaultTrackingPathSegments, defaultUrlUnwrappers };
|
package/dist/embeds/youtube.js
CHANGED
|
@@ -32,10 +32,10 @@ const youtubeResolveEmbed = (url) => {
|
|
|
32
32
|
if (!videoId) return;
|
|
33
33
|
return {
|
|
34
34
|
provider: "youtube",
|
|
35
|
+
id: videoId,
|
|
35
36
|
src: `https://www.youtube-nocookie.com/embed/${videoId}`,
|
|
36
37
|
url: `https://www.youtube.com/watch?v=${videoId}`,
|
|
37
|
-
thumbnail: composeThumbnailUrl(videoId)
|
|
38
|
-
type: "iframe"
|
|
38
|
+
thumbnail: composeThumbnailUrl(videoId)
|
|
39
39
|
};
|
|
40
40
|
};
|
|
41
41
|
const youtubeEmbedResolver = {
|
package/dist/index.d.ts
CHANGED
|
@@ -1,28 +1,34 @@
|
|
|
1
|
-
import { DomTransform, EmbedResolver, EmbedResolverResult, Enclosure, ResolveUrlFn, StringTransform, TransformContentOptions, TransformContext } from "./types.js";
|
|
1
|
+
import { AssetProxyFn, AssetType, DomTransform, EmbedResolver, EmbedResolverResult, Enclosure, EnrichEmbedFn, MaybePromise, ResolveUrlFn, StringTransform, TransformContentOptions, TransformContext } from "./types.js";
|
|
2
2
|
import { defaultResolveUrlFn } from "./defaults.js";
|
|
3
|
-
import { applyDomTransforms, applyStringTransforms, createEmbedPlaceholder, parseFragment, stripOversizedBase64Sources, transformHtml } from "./common.js";
|
|
3
|
+
import { applyDomTransforms, applyEmbedMetadata, applyStringTransforms, createEmbedPlaceholder, expandSvgSelfClose, parseFragment, stripOversizedBase64Sources, transformHtml } from "./common.js";
|
|
4
4
|
import { composeThumbnailUrl, extractVideoId, youtubeEmbedResolver, youtubeResolveEmbed } from "./embeds/youtube.js";
|
|
5
|
+
import { convertBreaksToParagraphs } from "./transforms/dom/convertBreaksToParagraphs.js";
|
|
6
|
+
import { decodeDoubleEncodedTags } from "./transforms/dom/decodeDoubleEncodedTags.js";
|
|
7
|
+
import { enrichEmbedPlaceholders } from "./transforms/dom/enrichEmbedPlaceholders.js";
|
|
5
8
|
import { fixLazyImages } from "./transforms/dom/fixLazyImages.js";
|
|
6
9
|
import { detectLanguage, highlightCode } from "./transforms/dom/highlightCode.js";
|
|
7
|
-
import {
|
|
10
|
+
import { injectEnclosures } from "./transforms/dom/injectEnclosures.js";
|
|
8
11
|
import { linkifyUrls } from "./transforms/dom/linkifyUrls.js";
|
|
9
12
|
import { mergeConsecutiveOneLinerPres } from "./transforms/dom/mergeConsecutiveOneLinerPres.js";
|
|
13
|
+
import { mergeFragmentedLists } from "./transforms/dom/mergeFragmentedLists.js";
|
|
14
|
+
import { proxyAssetUrls } from "./transforms/dom/proxyAssetUrls.js";
|
|
10
15
|
import { removeTrackingPixels } from "./transforms/dom/removeTrackingPixels.js";
|
|
11
16
|
import { replaceEmbedsWithPlaceholders } from "./transforms/dom/replaceEmbedsWithPlaceholders.js";
|
|
12
17
|
import { replacePreLineBreaks } from "./transforms/dom/replacePreLineBreaks.js";
|
|
13
18
|
import { resolveRelativeUrls } from "./transforms/dom/resolveRelativeUrls.js";
|
|
14
|
-
import { simplifyFigures } from "./transforms/dom/simplifyFigures.js";
|
|
15
19
|
import { stripComments } from "./transforms/dom/stripComments.js";
|
|
20
|
+
import { stripDeadAnchors } from "./transforms/dom/stripDeadAnchors.js";
|
|
21
|
+
import { stripDuplicateTitleHeading } from "./transforms/dom/stripDuplicateTitleHeading.js";
|
|
22
|
+
import { stripEmptyTags } from "./transforms/dom/stripEmptyTags.js";
|
|
16
23
|
import { stripInterBlockBreaks } from "./transforms/dom/stripInterBlockBreaks.js";
|
|
17
24
|
import { stripParagraphBoundaryBreaks } from "./transforms/dom/stripParagraphBoundaryBreaks.js";
|
|
18
25
|
import { stripTrackingParams } from "./transforms/dom/stripTrackingParams.js";
|
|
19
26
|
import { trimPreWhitespace } from "./transforms/dom/trimPreWhitespace.js";
|
|
27
|
+
import { unwrapDoublyNestedLists } from "./transforms/dom/unwrapDoublyNestedLists.js";
|
|
20
28
|
import { extractRedirectTarget, unwrapRedirectUrls } from "./transforms/dom/unwrapRedirectUrls.js";
|
|
21
|
-
import {
|
|
29
|
+
import { unwrapWrappers } from "./transforms/dom/unwrapWrappers.js";
|
|
22
30
|
import { paragraphizePlainText } from "./transforms/string/paragraphizePlainText.js";
|
|
23
|
-
import {
|
|
24
|
-
import { stripOrphanedClosingTags } from "./transforms/string/stripOrphanedClosingTags.js";
|
|
25
|
-
import { unwrapWrappers } from "./transforms/string/unwrapWrappers.js";
|
|
31
|
+
import { unwrapCdataComments } from "./transforms/string/unwrapCdataComments.js";
|
|
26
32
|
import { unwrapAceml } from "./unwraps/aceml.js";
|
|
27
33
|
import { unwrapAdjust } from "./unwraps/adjust.js";
|
|
28
34
|
import { unwrapAmazonAffiliate } from "./unwraps/amazonAffiliate.js";
|
|
@@ -100,6 +106,6 @@ import { unwrapZhihu } from "./unwraps/zhihu.js";
|
|
|
100
106
|
import { ParamExtractorConfig, chooseBaseUrl, coerceNumber, createParamExtractor } from "./utils.js";
|
|
101
107
|
|
|
102
108
|
//#region src/index.d.ts
|
|
103
|
-
declare const transformContent: (html: string, options?: TransformContentOptions) => string
|
|
109
|
+
declare const transformContent: (html: string, options?: TransformContentOptions) => Promise<string>;
|
|
104
110
|
//#endregion
|
|
105
|
-
export { type DomTransform, type EmbedResolver, type EmbedResolverResult, type Enclosure, type ParamExtractorConfig, type ResolveUrlFn, type StringTransform, type TransformContentOptions, type TransformContext, applyDomTransforms, applyStringTransforms, chooseBaseUrl, coerceNumber, composeThumbnailUrl, createEmbedPlaceholder, createParamExtractor, decodeDoubleEncodedTags, defaultResolveUrlFn, detectLanguage, extractRedirectTarget, extractVideoId, fixLazyImages, highlightCode,
|
|
111
|
+
export { type AssetProxyFn, type AssetType, type DomTransform, type EmbedResolver, type EmbedResolverResult, type Enclosure, type EnrichEmbedFn, type MaybePromise, type ParamExtractorConfig, type ResolveUrlFn, type StringTransform, type TransformContentOptions, type TransformContext, applyDomTransforms, applyEmbedMetadata, applyStringTransforms, chooseBaseUrl, coerceNumber, composeThumbnailUrl, convertBreaksToParagraphs, createEmbedPlaceholder, createParamExtractor, decodeDoubleEncodedTags, defaultResolveUrlFn, detectLanguage, enrichEmbedPlaceholders, expandSvgSelfClose, extractRedirectTarget, extractVideoId, fixLazyImages, highlightCode, injectEnclosures, linkifyUrls, mergeConsecutiveOneLinerPres, mergeFragmentedLists, paragraphizePlainText, parseFragment, proxyAssetUrls, removeTrackingPixels, replaceEmbedsWithPlaceholders, replacePreLineBreaks, resolveRelativeUrls, stripComments, stripDeadAnchors, stripDuplicateTitleHeading, stripEmptyTags, stripInterBlockBreaks, stripOversizedBase64Sources, stripParagraphBoundaryBreaks, stripTrackingParams, transformContent, transformHtml, trimPreWhitespace, unwrapAceml, unwrapAdjust, unwrapAmazonAffiliate, unwrapAmpCache, unwrapAwin, unwrapBing, unwrapCdataComments, unwrapCjNetwork, unwrapDigidip, unwrapDisqus, unwrapDouban, unwrapDoublyNestedLists, unwrapDuckduckgo, unwrapEbayRover, unwrapEffiliation, unwrapEmbedly, unwrapFacebookShim, unwrapFeedsportal, unwrapFirebaseDynamicLinks, unwrapFlipboard, unwrapGateSc, unwrapGeoriot, unwrapGitee, unwrapGoogle, unwrapGoogleAmpViewer, unwrapGoogleNews, unwrapGoogleNewsModern, unwrapGoogleScholar, unwrapGoogleTranslate, unwrapHashnode, unwrapIcptrack, unwrapIdealoPartner, unwrapInstagramShim, unwrapJianshuGo, unwrapJuejin, unwrapLeverAnalytics, unwrapLinksynergy, unwrapMailchimp, unwrapMailpanion, unwrapMailpgn, unwrapMailtrack, unwrapMedium, unwrapMimecast, unwrapMozillaOutgoing, unwrapNarrativ, unwrapNicoMs, unwrapOutlookSafelinks, unwrapPartnerAds, unwrapPocket, unwrapPostmark, unwrapProofpointV1, unwrapProofpointV2, unwrapProofpointV3, unwrapPxf, unwrapRecruitics, unwrapRedditOut, unwrapRedirectUrls, unwrapRedirectingat, unwrapSegmentfault, unwrapShareasale, unwrapSjv, unwrapSkimlinks, unwrapSlack, unwrapSmartredirect, unwrapSspai, unwrapSteamLinkfilter, unwrapTelegramIv, unwrapTradedoubler, unwrapTumblr, unwrapValuecommerce, unwrapViglink, unwrapVkAway, unwrapWebArchive, unwrapWrappers, unwrapYahooSearch, unwrapYandexTurbo, unwrapYouTube, unwrapZhihu, youtubeEmbedResolver, youtubeResolveEmbed };
|
package/dist/index.js
CHANGED
|
@@ -1,26 +1,32 @@
|
|
|
1
|
-
import {
|
|
1
|
+
import { chooseBaseUrl, coerceNumber, createParamExtractor } from "./utils.js";
|
|
2
|
+
import { applyDomTransforms, applyEmbedMetadata, applyStringTransforms, createEmbedPlaceholder, expandSvgSelfClose, parseFragment, stripOversizedBase64Sources, transformHtml } from "./common.js";
|
|
2
3
|
import { composeThumbnailUrl, extractVideoId, youtubeEmbedResolver, youtubeResolveEmbed } from "./embeds/youtube.js";
|
|
4
|
+
import { convertBreaksToParagraphs } from "./transforms/dom/convertBreaksToParagraphs.js";
|
|
5
|
+
import { decodeDoubleEncodedTags } from "./transforms/dom/decodeDoubleEncodedTags.js";
|
|
3
6
|
import { fixLazyImages } from "./transforms/dom/fixLazyImages.js";
|
|
4
7
|
import { detectLanguage, highlightCode } from "./transforms/dom/highlightCode.js";
|
|
5
|
-
import {
|
|
8
|
+
import { injectEnclosures } from "./transforms/dom/injectEnclosures.js";
|
|
6
9
|
import { linkifyUrls } from "./transforms/dom/linkifyUrls.js";
|
|
7
10
|
import { mergeConsecutiveOneLinerPres } from "./transforms/dom/mergeConsecutiveOneLinerPres.js";
|
|
11
|
+
import { mergeFragmentedLists } from "./transforms/dom/mergeFragmentedLists.js";
|
|
12
|
+
import { proxyAssetUrls } from "./transforms/dom/proxyAssetUrls.js";
|
|
8
13
|
import { removeTrackingPixels } from "./transforms/dom/removeTrackingPixels.js";
|
|
9
|
-
import { chooseBaseUrl, coerceNumber, createParamExtractor } from "./utils.js";
|
|
10
14
|
import { replaceEmbedsWithPlaceholders } from "./transforms/dom/replaceEmbedsWithPlaceholders.js";
|
|
11
15
|
import { replacePreLineBreaks } from "./transforms/dom/replacePreLineBreaks.js";
|
|
12
16
|
import { resolveRelativeUrls } from "./transforms/dom/resolveRelativeUrls.js";
|
|
13
17
|
import { stripComments } from "./transforms/dom/stripComments.js";
|
|
18
|
+
import { stripDeadAnchors } from "./transforms/dom/stripDeadAnchors.js";
|
|
19
|
+
import { stripDuplicateTitleHeading } from "./transforms/dom/stripDuplicateTitleHeading.js";
|
|
20
|
+
import { stripEmptyTags } from "./transforms/dom/stripEmptyTags.js";
|
|
14
21
|
import { stripInterBlockBreaks } from "./transforms/dom/stripInterBlockBreaks.js";
|
|
15
22
|
import { stripParagraphBoundaryBreaks } from "./transforms/dom/stripParagraphBoundaryBreaks.js";
|
|
16
23
|
import { stripTrackingParams } from "./transforms/dom/stripTrackingParams.js";
|
|
17
24
|
import { trimPreWhitespace } from "./transforms/dom/trimPreWhitespace.js";
|
|
25
|
+
import { unwrapDoublyNestedLists } from "./transforms/dom/unwrapDoublyNestedLists.js";
|
|
18
26
|
import { extractRedirectTarget, unwrapRedirectUrls } from "./transforms/dom/unwrapRedirectUrls.js";
|
|
19
|
-
import {
|
|
27
|
+
import { unwrapWrappers } from "./transforms/dom/unwrapWrappers.js";
|
|
20
28
|
import { paragraphizePlainText } from "./transforms/string/paragraphizePlainText.js";
|
|
21
|
-
import {
|
|
22
|
-
import { stripOrphanedClosingTags } from "./transforms/string/stripOrphanedClosingTags.js";
|
|
23
|
-
import { unwrapWrappers } from "./transforms/string/unwrapWrappers.js";
|
|
29
|
+
import { unwrapCdataComments } from "./transforms/string/unwrapCdataComments.js";
|
|
24
30
|
import { unwrapBing } from "./unwraps/bing.js";
|
|
25
31
|
import { unwrapFacebookShim } from "./unwraps/facebook.js";
|
|
26
32
|
import { unwrapGoogle } from "./unwraps/google.js";
|
|
@@ -33,8 +39,8 @@ import { unwrapRedditOut } from "./unwraps/redditOut.js";
|
|
|
33
39
|
import { unwrapVkAway } from "./unwraps/vkAway.js";
|
|
34
40
|
import { unwrapYahooSearch } from "./unwraps/yahooSearch.js";
|
|
35
41
|
import { unwrapYouTube } from "./unwraps/youtube.js";
|
|
36
|
-
import { defaultDomTransforms, defaultEmbedResolvers, defaultFinalStringTransforms, defaultLazySrcAttributes, defaultResolveUrlFn, defaultStringTransforms, defaultTrackingHosts, defaultTrackingPathSegments, defaultUrlUnwrappers } from "./defaults.js";
|
|
37
|
-
import {
|
|
42
|
+
import { defaultDomTransforms, defaultEmbedResolvers, defaultFinalStringTransforms, defaultLazySrcAttributes, defaultLazySrcsetAttributes, defaultResolveUrlFn, defaultStringTransforms, defaultTrackingHosts, defaultTrackingPathSegments, defaultUrlUnwrappers } from "./defaults.js";
|
|
43
|
+
import { enrichEmbedPlaceholders } from "./transforms/dom/enrichEmbedPlaceholders.js";
|
|
38
44
|
import { unwrapAceml } from "./unwraps/aceml.js";
|
|
39
45
|
import { unwrapAdjust } from "./unwraps/adjust.js";
|
|
40
46
|
import { unwrapAmazonAffiliate } from "./unwraps/amazonAffiliate.js";
|
|
@@ -98,21 +104,25 @@ import { unwrapWebArchive } from "./unwraps/webArchive.js";
|
|
|
98
104
|
import { unwrapYandexTurbo } from "./unwraps/yandexTurbo.js";
|
|
99
105
|
import { unwrapZhihu } from "./unwraps/zhihu.js";
|
|
100
106
|
//#region src/index.ts
|
|
101
|
-
const transformContent = (html, options = {}) => {
|
|
107
|
+
const transformContent = async (html, options = {}) => {
|
|
102
108
|
const context = {
|
|
103
109
|
baseUrl: options.baseUrl,
|
|
104
110
|
enclosures: options.enclosures,
|
|
105
111
|
embedResolvers: options.embedResolvers ?? defaultEmbedResolvers,
|
|
106
112
|
lazySrcAttributes: options.lazySrcAttributes ?? defaultLazySrcAttributes,
|
|
113
|
+
lazySrcsetAttributes: options.lazySrcsetAttributes ?? defaultLazySrcsetAttributes,
|
|
107
114
|
trackingHosts: options.trackingHosts ?? defaultTrackingHosts,
|
|
108
115
|
trackingPathSegments: options.trackingPathSegments ?? defaultTrackingPathSegments,
|
|
109
116
|
urlUnwrappers: options.urlUnwrappers ?? defaultUrlUnwrappers,
|
|
110
|
-
resolveUrlFn: options.resolveUrlFn ?? defaultResolveUrlFn
|
|
117
|
+
resolveUrlFn: options.resolveUrlFn ?? defaultResolveUrlFn,
|
|
118
|
+
assetProxyFn: options.assetProxyFn,
|
|
119
|
+
enrichEmbedFn: options.enrichEmbedFn,
|
|
120
|
+
articleTitle: options.articleTitle
|
|
111
121
|
};
|
|
112
122
|
const stringFns = options.stringTransforms ?? defaultStringTransforms;
|
|
113
123
|
const domFns = options.domTransforms ?? defaultDomTransforms;
|
|
114
124
|
const finalFns = options.finalStringTransforms ?? defaultFinalStringTransforms;
|
|
115
|
-
return applyStringTransforms(applyDomTransforms(applyStringTransforms(html, stringFns.map((transform) => transform(context))), domFns.map((transform) => transform(context))), finalFns.map((transform) => transform(context)));
|
|
125
|
+
return await applyStringTransforms(await applyDomTransforms(await applyStringTransforms(html, stringFns.map((transform) => transform(context))), domFns.map((transform) => transform(context))), finalFns.map((transform) => transform(context)));
|
|
116
126
|
};
|
|
117
127
|
//#endregion
|
|
118
|
-
export { applyDomTransforms, applyStringTransforms, chooseBaseUrl, coerceNumber, composeThumbnailUrl, createEmbedPlaceholder, createParamExtractor, decodeDoubleEncodedTags, defaultResolveUrlFn, detectLanguage, extractRedirectTarget, extractVideoId, fixLazyImages, highlightCode,
|
|
128
|
+
export { applyDomTransforms, applyEmbedMetadata, applyStringTransforms, chooseBaseUrl, coerceNumber, composeThumbnailUrl, convertBreaksToParagraphs, createEmbedPlaceholder, createParamExtractor, decodeDoubleEncodedTags, defaultResolveUrlFn, detectLanguage, enrichEmbedPlaceholders, expandSvgSelfClose, extractRedirectTarget, extractVideoId, fixLazyImages, highlightCode, injectEnclosures, linkifyUrls, mergeConsecutiveOneLinerPres, mergeFragmentedLists, paragraphizePlainText, parseFragment, proxyAssetUrls, removeTrackingPixels, replaceEmbedsWithPlaceholders, replacePreLineBreaks, resolveRelativeUrls, stripComments, stripDeadAnchors, stripDuplicateTitleHeading, stripEmptyTags, stripInterBlockBreaks, stripOversizedBase64Sources, stripParagraphBoundaryBreaks, stripTrackingParams, transformContent, transformHtml, trimPreWhitespace, unwrapAceml, unwrapAdjust, unwrapAmazonAffiliate, unwrapAmpCache, unwrapAwin, unwrapBing, unwrapCdataComments, unwrapCjNetwork, unwrapDigidip, unwrapDisqus, unwrapDouban, unwrapDoublyNestedLists, unwrapDuckduckgo, unwrapEbayRover, unwrapEffiliation, unwrapEmbedly, unwrapFacebookShim, unwrapFeedsportal, unwrapFirebaseDynamicLinks, unwrapFlipboard, unwrapGateSc, unwrapGeoriot, unwrapGitee, unwrapGoogle, unwrapGoogleAmpViewer, unwrapGoogleNews, unwrapGoogleNewsModern, unwrapGoogleScholar, unwrapGoogleTranslate, unwrapHashnode, unwrapIcptrack, unwrapIdealoPartner, unwrapInstagramShim, unwrapJianshuGo, unwrapJuejin, unwrapLeverAnalytics, unwrapLinksynergy, unwrapMailchimp, unwrapMailpanion, unwrapMailpgn, unwrapMailtrack, unwrapMedium, unwrapMimecast, unwrapMozillaOutgoing, unwrapNarrativ, unwrapNicoMs, unwrapOutlookSafelinks, unwrapPartnerAds, unwrapPocket, unwrapPostmark, unwrapProofpointV1, unwrapProofpointV2, unwrapProofpointV3, unwrapPxf, unwrapRecruitics, unwrapRedditOut, unwrapRedirectUrls, unwrapRedirectingat, unwrapSegmentfault, unwrapShareasale, unwrapSjv, unwrapSkimlinks, unwrapSlack, unwrapSmartredirect, unwrapSspai, unwrapSteamLinkfilter, unwrapTelegramIv, unwrapTradedoubler, unwrapTumblr, unwrapValuecommerce, unwrapViglink, unwrapVkAway, unwrapWebArchive, unwrapWrappers, unwrapYahooSearch, unwrapYandexTurbo, unwrapYouTube, unwrapZhihu, youtubeEmbedResolver, youtubeResolveEmbed };
|