feedsweep 1.2.0 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +5 -0
- package/dist/bookmarks/ghost.d.ts +6 -0
- package/dist/bookmarks/ghost.js +21 -0
- package/dist/bookmarks/substack.d.ts +6 -0
- package/dist/bookmarks/substack.js +26 -0
- package/dist/common.d.ts +7 -5
- package/dist/common.js +49 -24
- package/dist/defaults.d.ts +6 -2
- package/dist/defaults.js +48 -1
- package/dist/index.d.ts +10 -3
- package/dist/index.js +14 -3
- package/dist/transforms/dom/convertBookmarkCards.d.ts +6 -0
- package/dist/transforms/dom/convertBookmarkCards.js +14 -0
- package/dist/transforms/dom/demoteHeadings.d.ts +6 -0
- package/dist/transforms/dom/demoteHeadings.js +20 -0
- package/dist/transforms/dom/enrichEmbedPlaceholders.js +2 -2
- package/dist/transforms/dom/mergeConsecutiveOneLinerPres.js +10 -1
- package/dist/transforms/dom/proxyAssetUrls.js +15 -1
- package/dist/transforms/dom/stripInertElements.d.ts +6 -0
- package/dist/transforms/dom/stripInertElements.js +11 -0
- package/dist/transforms/dom/unwrapEmojiImages.d.ts +6 -0
- package/dist/transforms/dom/unwrapEmojiImages.js +21 -0
- package/dist/transforms/dom/unwrapWrappers.js +7 -3
- package/dist/transforms/string/stripControlChars.d.ts +6 -0
- package/dist/transforms/string/stripControlChars.js +21 -0
- package/dist/types.d.ts +23 -1
- package/package.json +2 -2
package/README.md
CHANGED
|
@@ -41,13 +41,17 @@ Inventory of every transform exported from the package. Most are enabled by defa
|
|
|
41
41
|
| `stripInterBlockBreaks` | Remove `<br>` tags between block elements |
|
|
42
42
|
| `stripParagraphBoundaryBreaks` | Remove `<br>` tags adjacent to paragraph boundaries |
|
|
43
43
|
| `stripDuplicateTitleHeading` | Remove first `<h1>`–`<h6>` matching article title |
|
|
44
|
+
| `demoteHeadings` | Shift every heading down by one level (`<h1>`→`<h2>`, …, `<h5>`→`<h6>`) when the body contains an `<h1>`, so it sits below the reader's own page title |
|
|
44
45
|
| `unwrapRedirectUrls` | Remove Google/Bing/Facebook/etc. redirect wrappers |
|
|
45
46
|
| `stripDeadAnchors` | Unwrap `<a>` with empty, `#`, or `javascript:` href |
|
|
47
|
+
| `stripInertElements` | Remove platform chrome and dead placeholders — subscribe widgets, share buttons, related-posts widgets, ad slots (AdSense / AdThrive), author bio blocks, email preheaders, Substack image controls, and Drupal `<drupal-render-placeholder>` tags. Pass `inertSelectors` to extend or replace |
|
|
46
48
|
| `removeTrackingPixels` | Strip 1×1 tracking pixel images |
|
|
49
|
+
| `unwrapEmojiImages` | Replace WordPress/Facebook/Twitter/GitHub emoji `<img>` tags with their alt-text glyph |
|
|
47
50
|
| `stripTrackingParams` | Remove UTM and other tracking parameters |
|
|
48
51
|
| `convertBreaksToParagraphs` | Convert `<br><br>` runs into semantic `<p>` blocks |
|
|
49
52
|
| `injectEnclosures` | Inject feed enclosures into content as native `<audio>`/`<video>` or iframe placeholders |
|
|
50
53
|
| `replaceEmbedsWithPlaceholders` | Convert `<iframe>` to embed placeholders |
|
|
54
|
+
| `convertBookmarkCards` | Convert link-preview cards into `data-bookmark-*` placeholders via a registry of per-provider `BookmarkResolver`s (`defaultBookmarkResolvers`: Ghost `kg-bookmark-card`, Substack `embedded-publication-wrap`). Extend via `bookmarkResolvers` |
|
|
51
55
|
| `enrichEmbedPlaceholders` | Populate placeholder metadata (`title`, `description`, `duration`, etc.) via a caller-supplied async fn. Opt-in; not in defaults |
|
|
52
56
|
| `proxyAssetUrls` | Rewrite image, video, and audio URLs through a caller-supplied proxy |
|
|
53
57
|
| `resolveRelativeUrls` | Convert relative URLs to absolute using base URL |
|
|
@@ -62,6 +66,7 @@ Inventory of every transform exported from the package. Most are enabled by defa
|
|
|
62
66
|
| `stripEmptyTags` | Remove empty `<p>`, `<div>`, `<span>` and other tags |
|
|
63
67
|
| `stripComments` | Remove HTML `<!-- comments -->` |
|
|
64
68
|
| `unwrapCdataComments` | Strip malformed `<!--[CDATA[ … ]]-->` wrappers before parsing so the wrapped article reaches the DOM as real HTML |
|
|
69
|
+
| `stripControlChars` | Strip rendering-hostile control characters (NUL, BEL, ESC, DEL, C1 range) before parsing. Preserves tab / LF / CR |
|
|
65
70
|
|
|
66
71
|
## Options
|
|
67
72
|
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
//#region src/bookmarks/ghost.ts
|
|
2
|
+
const ghostBookmarkResolver = {
|
|
3
|
+
selector: ".kg-bookmark-card",
|
|
4
|
+
extract: (element) => {
|
|
5
|
+
const url = element.querySelector("a.kg-bookmark-container")?.getAttribute("href") ?? void 0;
|
|
6
|
+
const title = element.querySelector(".kg-bookmark-title")?.textContent?.trim();
|
|
7
|
+
if (!url || !title) return;
|
|
8
|
+
return {
|
|
9
|
+
provider: "ghost",
|
|
10
|
+
url,
|
|
11
|
+
title,
|
|
12
|
+
description: element.querySelector(".kg-bookmark-description")?.textContent?.trim(),
|
|
13
|
+
author: element.querySelector(".kg-bookmark-author")?.textContent?.trim(),
|
|
14
|
+
publisher: element.querySelector(".kg-bookmark-publisher")?.textContent?.trim(),
|
|
15
|
+
icon: element.querySelector("img.kg-bookmark-icon")?.getAttribute("src") ?? void 0,
|
|
16
|
+
thumbnail: element.querySelector(".kg-bookmark-thumbnail img")?.getAttribute("src") ?? void 0
|
|
17
|
+
};
|
|
18
|
+
}
|
|
19
|
+
};
|
|
20
|
+
//#endregion
|
|
21
|
+
export { ghostBookmarkResolver };
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
//#region src/bookmarks/substack.ts
|
|
2
|
+
const parsePublicationAttrs = (raw) => {
|
|
3
|
+
if (!raw) return;
|
|
4
|
+
try {
|
|
5
|
+
return JSON.parse(raw);
|
|
6
|
+
} catch {}
|
|
7
|
+
};
|
|
8
|
+
const substackBookmarkResolver = {
|
|
9
|
+
selector: ".embedded-publication-wrap",
|
|
10
|
+
extract: (element) => {
|
|
11
|
+
const attrs = parsePublicationAttrs(element.getAttribute("data-attrs"));
|
|
12
|
+
const url = attrs?.base_url;
|
|
13
|
+
const title = attrs?.name?.trim();
|
|
14
|
+
if (!url || !title) return;
|
|
15
|
+
return {
|
|
16
|
+
provider: "substack",
|
|
17
|
+
url,
|
|
18
|
+
title,
|
|
19
|
+
description: attrs.hero_text?.trim(),
|
|
20
|
+
author: attrs.author_name?.trim(),
|
|
21
|
+
icon: attrs.logo_url
|
|
22
|
+
};
|
|
23
|
+
}
|
|
24
|
+
};
|
|
25
|
+
//#endregion
|
|
26
|
+
export { substackBookmarkResolver };
|
package/dist/common.d.ts
CHANGED
|
@@ -1,11 +1,13 @@
|
|
|
1
|
-
import { EmbedResolverResult, MaybePromise } from "./types.js";
|
|
1
|
+
import { BookmarkResolverResult, EmbedResolverResult, MaybePromise } from "./types.js";
|
|
2
2
|
|
|
3
3
|
//#region src/common.d.ts
|
|
4
|
+
declare const isSafeThumbnailUrl: (url: string) => boolean;
|
|
4
5
|
declare const applyDomTransforms: (document: Document, transforms: Array<(document: Document) => MaybePromise<void>>) => Promise<string>;
|
|
5
6
|
declare const applyStringTransforms: (html: string, transforms: Array<(html: string) => MaybePromise<string>>) => Promise<string>;
|
|
6
|
-
declare const
|
|
7
|
-
|
|
8
|
-
|
|
7
|
+
declare const createPlaceholder: <Type extends object>(document: Document, type: string, fields: Type) => HTMLElement;
|
|
8
|
+
declare const normalizeEmbedFields: (metadata: Partial<EmbedResolverResult>) => Record<string, string | undefined>;
|
|
9
|
+
declare const updateEmbedPlaceholder: (element: HTMLElement, metadata: Partial<EmbedResolverResult>) => void;
|
|
9
10
|
declare const createEmbedPlaceholder: (document: Document, src: string, metadata?: Partial<EmbedResolverResult>) => HTMLElement;
|
|
11
|
+
declare const createBookmarkPlaceholder: (document: Document, result: BookmarkResolverResult) => HTMLElement;
|
|
10
12
|
//#endregion
|
|
11
|
-
export { applyDomTransforms,
|
|
13
|
+
export { applyDomTransforms, applyStringTransforms, createBookmarkPlaceholder, createEmbedPlaceholder, createPlaceholder, isSafeThumbnailUrl, normalizeEmbedFields, updateEmbedPlaceholder };
|
package/dist/common.js
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import { coerceNumber } from "./utils.js";
|
|
2
|
-
import { resolveUrl } from "feedcanon";
|
|
2
|
+
import { resolveUrl, upgradeProtocol } from "feedcanon";
|
|
3
3
|
//#region src/common.ts
|
|
4
4
|
const Node = {
|
|
5
5
|
ELEMENT_NODE: 1,
|
|
@@ -104,36 +104,61 @@ const getDimensions = (element) => {
|
|
|
104
104
|
height: height ?? fromStyle(styleHeightRegex)
|
|
105
105
|
};
|
|
106
106
|
};
|
|
107
|
-
const
|
|
108
|
-
const
|
|
109
|
-
const
|
|
110
|
-
|
|
111
|
-
|
|
107
|
+
const createPlaceholder = (document, type, fields) => {
|
|
108
|
+
const element = document.createElement("div");
|
|
109
|
+
for (const [key, value] of Object.entries(fields)) if (value) element.setAttribute(`data-${type}-${key}`, value);
|
|
110
|
+
return element;
|
|
111
|
+
};
|
|
112
|
+
const normalizeEmbedFields = (metadata) => {
|
|
113
|
+
return {
|
|
114
|
+
src: metadata.src ? upgradeProtocol(metadata.src) : void 0,
|
|
115
|
+
provider: metadata.provider,
|
|
116
|
+
id: metadata.id,
|
|
117
|
+
url: metadata.url ? upgradeProtocol(metadata.url) : void 0,
|
|
118
|
+
thumbnail: metadata.thumbnail && isSafeThumbnailUrl(metadata.thumbnail) ? metadata.thumbnail : void 0,
|
|
119
|
+
width: metadata.width ? String(metadata.width) : void 0,
|
|
120
|
+
height: metadata.height ? String(metadata.height) : void 0,
|
|
121
|
+
title: metadata.title,
|
|
122
|
+
description: metadata.description,
|
|
123
|
+
author: metadata.author,
|
|
124
|
+
avatar: metadata.avatar && isSafeThumbnailUrl(metadata.avatar) ? metadata.avatar : void 0,
|
|
125
|
+
duration: metadata.duration ? String(metadata.duration) : void 0
|
|
112
126
|
};
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
if (metadata.height) set("data-embed-height", String(metadata.height));
|
|
120
|
-
if (metadata.title) set("data-embed-title", metadata.title);
|
|
121
|
-
if (metadata.description) set("data-embed-description", metadata.description);
|
|
122
|
-
if (metadata.author) set("data-embed-author", metadata.author);
|
|
123
|
-
if (metadata.avatar && isSafeThumbnailUrl(metadata.avatar)) set("data-embed-avatar", metadata.avatar);
|
|
124
|
-
if (metadata.duration) set("data-embed-duration", String(metadata.duration));
|
|
127
|
+
};
|
|
128
|
+
const updateEmbedPlaceholder = (element, metadata) => {
|
|
129
|
+
for (const [key, value] of Object.entries(normalizeEmbedFields(metadata))) {
|
|
130
|
+
const name = `data-embed-${key}`;
|
|
131
|
+
if (value && !element.hasAttribute(name)) element.setAttribute(name, value);
|
|
132
|
+
}
|
|
125
133
|
};
|
|
126
134
|
const createEmbedPlaceholder = (document, src, metadata) => {
|
|
127
|
-
const element = document
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
const fallbackUrl = metadata?.url ?? metadata?.src ?? src;
|
|
135
|
+
const element = createPlaceholder(document, "embed", normalizeEmbedFields({
|
|
136
|
+
...metadata,
|
|
137
|
+
src: metadata?.src ?? src
|
|
138
|
+
}));
|
|
139
|
+
const fallbackUrl = upgradeProtocol(metadata?.url ?? metadata?.src ?? src);
|
|
132
140
|
const link = document.createElement("a");
|
|
133
141
|
link.setAttribute("href", fallbackUrl);
|
|
134
142
|
link.textContent = fallbackUrl;
|
|
135
143
|
element.appendChild(link);
|
|
136
144
|
return element;
|
|
137
145
|
};
|
|
146
|
+
const createBookmarkPlaceholder = (document, result) => {
|
|
147
|
+
const { provider, title, url, icon, thumbnail, ...rest } = result;
|
|
148
|
+
const safeUrl = upgradeProtocol(url);
|
|
149
|
+
const element = createPlaceholder(document, "bookmark", {
|
|
150
|
+
provider,
|
|
151
|
+
...rest,
|
|
152
|
+
url: safeUrl,
|
|
153
|
+
title,
|
|
154
|
+
icon: icon && isSafeThumbnailUrl(icon) ? upgradeProtocol(icon) : void 0,
|
|
155
|
+
thumbnail: thumbnail && isSafeThumbnailUrl(thumbnail) ? upgradeProtocol(thumbnail) : void 0
|
|
156
|
+
});
|
|
157
|
+
const link = document.createElement("a");
|
|
158
|
+
link.setAttribute("href", safeUrl);
|
|
159
|
+
link.textContent = title;
|
|
160
|
+
element.appendChild(link);
|
|
161
|
+
return element;
|
|
162
|
+
};
|
|
138
163
|
//#endregion
|
|
139
|
-
export { Node, NodeFilter, applyDomTransforms,
|
|
164
|
+
export { Node, NodeFilter, applyDomTransforms, applyStringTransforms, createBookmarkPlaceholder, createEmbedPlaceholder, createPlaceholder, getDimensions, hasAncestorWithTagName, isBlockElement, isBr, isSafeThumbnailUrl, isSkippable, isWhitespaceText, normalizeEmbedFields, updateEmbedPlaceholder };
|
package/dist/defaults.d.ts
CHANGED
|
@@ -1,14 +1,18 @@
|
|
|
1
|
-
import { DomTransform, EmbedResolver, ResolveUrlFn, StringTransform, UrlUnwrapper } from "./types.js";
|
|
1
|
+
import { BookmarkResolver, DomTransform, EmbedResolver, ResolveUrlFn, StringTransform, UrlUnwrapper } from "./types.js";
|
|
2
2
|
|
|
3
3
|
//#region src/defaults.d.ts
|
|
4
4
|
declare const defaultStringTransforms: Array<StringTransform>;
|
|
5
5
|
declare const defaultDomTransforms: Array<DomTransform>;
|
|
6
6
|
declare const defaultEmbedResolvers: Array<EmbedResolver>;
|
|
7
|
+
declare const defaultBookmarkResolvers: Array<BookmarkResolver>;
|
|
7
8
|
declare const defaultResolveUrlFn: ResolveUrlFn;
|
|
8
9
|
declare const defaultLazySrcAttributes: string[];
|
|
9
10
|
declare const defaultLazySrcsetAttributes: string[];
|
|
10
11
|
declare const defaultTrackingHosts: string[];
|
|
11
12
|
declare const defaultTrackingPathSegments: string[];
|
|
13
|
+
declare const defaultEmojiImageHosts: string[];
|
|
14
|
+
declare const defaultPreservedPreClasses: string[];
|
|
15
|
+
declare const defaultInertSelectors: string[];
|
|
12
16
|
declare const defaultUrlUnwrappers: Array<UrlUnwrapper>;
|
|
13
17
|
//#endregion
|
|
14
|
-
export { defaultDomTransforms, defaultEmbedResolvers, defaultLazySrcAttributes, defaultLazySrcsetAttributes, defaultResolveUrlFn, defaultStringTransforms, defaultTrackingHosts, defaultTrackingPathSegments, defaultUrlUnwrappers };
|
|
18
|
+
export { defaultBookmarkResolvers, defaultDomTransforms, defaultEmbedResolvers, defaultEmojiImageHosts, defaultInertSelectors, defaultLazySrcAttributes, defaultLazySrcsetAttributes, defaultPreservedPreClasses, defaultResolveUrlFn, defaultStringTransforms, defaultTrackingHosts, defaultTrackingPathSegments, defaultUrlUnwrappers };
|
package/dist/defaults.js
CHANGED
|
@@ -1,6 +1,10 @@
|
|
|
1
|
+
import { ghostBookmarkResolver } from "./bookmarks/ghost.js";
|
|
2
|
+
import { substackBookmarkResolver } from "./bookmarks/substack.js";
|
|
1
3
|
import { youtubeEmbedResolver } from "./embeds/youtube.js";
|
|
4
|
+
import { convertBookmarkCards } from "./transforms/dom/convertBookmarkCards.js";
|
|
2
5
|
import { convertBreaksToParagraphs } from "./transforms/dom/convertBreaksToParagraphs.js";
|
|
3
6
|
import { decodeDoubleEncodedTags } from "./transforms/dom/decodeDoubleEncodedTags.js";
|
|
7
|
+
import { demoteHeadings } from "./transforms/dom/demoteHeadings.js";
|
|
4
8
|
import { fixLazyImages } from "./transforms/dom/fixLazyImages.js";
|
|
5
9
|
import { highlightCode } from "./transforms/dom/highlightCode.js";
|
|
6
10
|
import { injectEnclosures } from "./transforms/dom/injectEnclosures.js";
|
|
@@ -16,14 +20,17 @@ import { stripComments } from "./transforms/dom/stripComments.js";
|
|
|
16
20
|
import { stripDeadAnchors } from "./transforms/dom/stripDeadAnchors.js";
|
|
17
21
|
import { stripDuplicateTitleHeading } from "./transforms/dom/stripDuplicateTitleHeading.js";
|
|
18
22
|
import { stripEmptyTags } from "./transforms/dom/stripEmptyTags.js";
|
|
23
|
+
import { stripInertElements } from "./transforms/dom/stripInertElements.js";
|
|
19
24
|
import { stripInterBlockBreaks } from "./transforms/dom/stripInterBlockBreaks.js";
|
|
20
25
|
import { stripParagraphBoundaryBreaks } from "./transforms/dom/stripParagraphBoundaryBreaks.js";
|
|
21
26
|
import { stripTrackingParams } from "./transforms/dom/stripTrackingParams.js";
|
|
22
27
|
import { trimPreWhitespace } from "./transforms/dom/trimPreWhitespace.js";
|
|
23
28
|
import { unwrapDoublyNestedLists } from "./transforms/dom/unwrapDoublyNestedLists.js";
|
|
29
|
+
import { unwrapEmojiImages } from "./transforms/dom/unwrapEmojiImages.js";
|
|
24
30
|
import { unwrapRedirectUrls } from "./transforms/dom/unwrapRedirectUrls.js";
|
|
25
31
|
import { unwrapWrappers } from "./transforms/dom/unwrapWrappers.js";
|
|
26
32
|
import { paragraphizePlainText } from "./transforms/string/paragraphizePlainText.js";
|
|
33
|
+
import { stripControlChars } from "./transforms/string/stripControlChars.js";
|
|
27
34
|
import { stripOversizedBase64Sources } from "./transforms/string/stripOversizedBase64Sources.js";
|
|
28
35
|
import { unwrapCdataComments } from "./transforms/string/unwrapCdataComments.js";
|
|
29
36
|
import { unwrapBing } from "./unwraps/bing.js";
|
|
@@ -41,6 +48,7 @@ import { unwrapYouTube } from "./unwraps/youtube.js";
|
|
|
41
48
|
import { resolveUrl } from "feedcanon";
|
|
42
49
|
//#region src/defaults.ts
|
|
43
50
|
const defaultStringTransforms = [
|
|
51
|
+
stripControlChars,
|
|
44
52
|
stripOversizedBase64Sources,
|
|
45
53
|
unwrapCdataComments,
|
|
46
54
|
paragraphizePlainText
|
|
@@ -50,12 +58,16 @@ const defaultDomTransforms = [
|
|
|
50
58
|
stripComments,
|
|
51
59
|
unwrapDoublyNestedLists,
|
|
52
60
|
stripDuplicateTitleHeading,
|
|
61
|
+
demoteHeadings,
|
|
53
62
|
fixLazyImages,
|
|
63
|
+
stripInertElements,
|
|
54
64
|
resolveRelativeUrls,
|
|
55
65
|
unwrapRedirectUrls,
|
|
56
66
|
stripDeadAnchors,
|
|
57
67
|
stripTrackingParams,
|
|
68
|
+
convertBookmarkCards,
|
|
58
69
|
removeTrackingPixels,
|
|
70
|
+
unwrapEmojiImages,
|
|
59
71
|
convertBreaksToParagraphs,
|
|
60
72
|
stripInterBlockBreaks,
|
|
61
73
|
stripParagraphBoundaryBreaks,
|
|
@@ -72,6 +84,7 @@ const defaultDomTransforms = [
|
|
|
72
84
|
stripEmptyTags
|
|
73
85
|
];
|
|
74
86
|
const defaultEmbedResolvers = [youtubeEmbedResolver];
|
|
87
|
+
const defaultBookmarkResolvers = [ghostBookmarkResolver, substackBookmarkResolver];
|
|
75
88
|
const defaultResolveUrlFn = (url, baseUrl) => resolveUrl(url, baseUrl);
|
|
76
89
|
const defaultLazySrcAttributes = [
|
|
77
90
|
"data-src",
|
|
@@ -142,6 +155,40 @@ const defaultTrackingPathSegments = [
|
|
|
142
155
|
"count",
|
|
143
156
|
"impression"
|
|
144
157
|
];
|
|
158
|
+
const defaultEmojiImageHosts = [
|
|
159
|
+
"s.w.org/images/core/emoji/",
|
|
160
|
+
"s0.wp.com/wp-content/mu-plugins/wpcom-smileys/",
|
|
161
|
+
"fbcdn.net/images/emoji.php/",
|
|
162
|
+
"abs.twimg.com/emoji/",
|
|
163
|
+
"githubassets.com/images/icons/emoji/"
|
|
164
|
+
];
|
|
165
|
+
const defaultPreservedPreClasses = ["wp-block-verse", "wp-block-preformatted"];
|
|
166
|
+
const defaultInertSelectors = [
|
|
167
|
+
".image-link-expand",
|
|
168
|
+
"[data-component-name=\"SubscribeWidget\"]",
|
|
169
|
+
".subscription-widget-wrap-editor",
|
|
170
|
+
"drupal-render-placeholder",
|
|
171
|
+
".adsbygoogle",
|
|
172
|
+
".embedded-publication-wrap",
|
|
173
|
+
".yarpp-related",
|
|
174
|
+
".sharethis-inline-share-buttons",
|
|
175
|
+
".sharedaddy",
|
|
176
|
+
".wp-block-jetpack-subscriptions",
|
|
177
|
+
".wp-block-post-author",
|
|
178
|
+
".kg-signup-card",
|
|
179
|
+
".mc4wp-form",
|
|
180
|
+
".formkit-form",
|
|
181
|
+
".mcnPreviewText",
|
|
182
|
+
".saboxplugin-wrap",
|
|
183
|
+
".addtoany_share_save_container",
|
|
184
|
+
"iframe[src*=\"embeds.beehiiv.com\"]",
|
|
185
|
+
".jp-relatedposts",
|
|
186
|
+
".adthrive-ad",
|
|
187
|
+
".jetpack_subscription_widget",
|
|
188
|
+
".crp_related",
|
|
189
|
+
"form[action*=\"buttondown.email\"]",
|
|
190
|
+
".sqs-block-newsletter"
|
|
191
|
+
];
|
|
145
192
|
const defaultUrlUnwrappers = [
|
|
146
193
|
unwrapBing,
|
|
147
194
|
unwrapGoogle,
|
|
@@ -157,4 +204,4 @@ const defaultUrlUnwrappers = [
|
|
|
157
204
|
unwrapRedditOut
|
|
158
205
|
];
|
|
159
206
|
//#endregion
|
|
160
|
-
export { defaultDomTransforms, defaultEmbedResolvers, defaultLazySrcAttributes, defaultLazySrcsetAttributes, defaultResolveUrlFn, defaultStringTransforms, defaultTrackingHosts, defaultTrackingPathSegments, defaultUrlUnwrappers };
|
|
207
|
+
export { defaultBookmarkResolvers, defaultDomTransforms, defaultEmbedResolvers, defaultEmojiImageHosts, defaultInertSelectors, defaultLazySrcAttributes, defaultLazySrcsetAttributes, defaultPreservedPreClasses, defaultResolveUrlFn, defaultStringTransforms, defaultTrackingHosts, defaultTrackingPathSegments, defaultUrlUnwrappers };
|
package/dist/index.d.ts
CHANGED
|
@@ -1,9 +1,13 @@
|
|
|
1
|
-
import { AssetProxyFn, AssetType, DomTransform, EmbedResolver, EmbedResolverResult, Enclosure, EnrichEmbedFn, MaybePromise, ParseHtmlFn, ResolveUrlFn, StringTransform, TransformContentOptions, TransformContext } from "./types.js";
|
|
1
|
+
import { AssetProxyFn, AssetType, BookmarkResolver, BookmarkResolverResult, DomTransform, EmbedResolver, EmbedResolverResult, Enclosure, EnrichEmbedFn, MaybePromise, ParseHtmlFn, ResolveUrlFn, StringTransform, TransformContentOptions, TransformContext } from "./types.js";
|
|
2
2
|
import { defaultResolveUrlFn } from "./defaults.js";
|
|
3
|
-
import {
|
|
3
|
+
import { ghostBookmarkResolver } from "./bookmarks/ghost.js";
|
|
4
|
+
import { substackBookmarkResolver } from "./bookmarks/substack.js";
|
|
5
|
+
import { applyDomTransforms, applyStringTransforms, createBookmarkPlaceholder, createEmbedPlaceholder, createPlaceholder, isSafeThumbnailUrl, normalizeEmbedFields, updateEmbedPlaceholder } from "./common.js";
|
|
4
6
|
import { composeThumbnailUrl, extractVideoId, youtubeEmbedResolver, youtubeResolveEmbed } from "./embeds/youtube.js";
|
|
7
|
+
import { convertBookmarkCards } from "./transforms/dom/convertBookmarkCards.js";
|
|
5
8
|
import { convertBreaksToParagraphs } from "./transforms/dom/convertBreaksToParagraphs.js";
|
|
6
9
|
import { decodeDoubleEncodedTags } from "./transforms/dom/decodeDoubleEncodedTags.js";
|
|
10
|
+
import { demoteHeadings } from "./transforms/dom/demoteHeadings.js";
|
|
7
11
|
import { enrichEmbedPlaceholders } from "./transforms/dom/enrichEmbedPlaceholders.js";
|
|
8
12
|
import { fixLazyImages } from "./transforms/dom/fixLazyImages.js";
|
|
9
13
|
import { detectLanguage, highlightCode } from "./transforms/dom/highlightCode.js";
|
|
@@ -20,14 +24,17 @@ import { stripComments } from "./transforms/dom/stripComments.js";
|
|
|
20
24
|
import { stripDeadAnchors } from "./transforms/dom/stripDeadAnchors.js";
|
|
21
25
|
import { stripDuplicateTitleHeading } from "./transforms/dom/stripDuplicateTitleHeading.js";
|
|
22
26
|
import { stripEmptyTags } from "./transforms/dom/stripEmptyTags.js";
|
|
27
|
+
import { stripInertElements } from "./transforms/dom/stripInertElements.js";
|
|
23
28
|
import { stripInterBlockBreaks } from "./transforms/dom/stripInterBlockBreaks.js";
|
|
24
29
|
import { stripParagraphBoundaryBreaks } from "./transforms/dom/stripParagraphBoundaryBreaks.js";
|
|
25
30
|
import { stripTrackingParams } from "./transforms/dom/stripTrackingParams.js";
|
|
26
31
|
import { trimPreWhitespace } from "./transforms/dom/trimPreWhitespace.js";
|
|
27
32
|
import { unwrapDoublyNestedLists } from "./transforms/dom/unwrapDoublyNestedLists.js";
|
|
33
|
+
import { unwrapEmojiImages } from "./transforms/dom/unwrapEmojiImages.js";
|
|
28
34
|
import { extractRedirectTarget, unwrapRedirectUrls } from "./transforms/dom/unwrapRedirectUrls.js";
|
|
29
35
|
import { unwrapWrappers } from "./transforms/dom/unwrapWrappers.js";
|
|
30
36
|
import { paragraphizePlainText } from "./transforms/string/paragraphizePlainText.js";
|
|
37
|
+
import { stripControlChars } from "./transforms/string/stripControlChars.js";
|
|
31
38
|
import { stripOversizedBase64Sources } from "./transforms/string/stripOversizedBase64Sources.js";
|
|
32
39
|
import { unwrapCdataComments } from "./transforms/string/unwrapCdataComments.js";
|
|
33
40
|
import { unwrapAceml } from "./unwraps/aceml.js";
|
|
@@ -109,4 +116,4 @@ import { ParamExtractorConfig, chooseBaseUrl, coerceNumber, createParamExtractor
|
|
|
109
116
|
//#region src/index.d.ts
|
|
110
117
|
declare const transformContent: (html: string, options: TransformContentOptions) => Promise<string>;
|
|
111
118
|
//#endregion
|
|
112
|
-
export { type AssetProxyFn, type AssetType, type DomTransform, type EmbedResolver, type EmbedResolverResult, type Enclosure, type EnrichEmbedFn, type MaybePromise, type ParamExtractorConfig, type ParseHtmlFn, type ResolveUrlFn, type StringTransform, type TransformContentOptions, type TransformContext, applyDomTransforms,
|
|
119
|
+
export { type AssetProxyFn, type AssetType, type BookmarkResolver, type BookmarkResolverResult, type DomTransform, type EmbedResolver, type EmbedResolverResult, type Enclosure, type EnrichEmbedFn, type MaybePromise, type ParamExtractorConfig, type ParseHtmlFn, type ResolveUrlFn, type StringTransform, type TransformContentOptions, type TransformContext, applyDomTransforms, applyStringTransforms, chooseBaseUrl, coerceNumber, composeThumbnailUrl, convertBookmarkCards, convertBreaksToParagraphs, createBookmarkPlaceholder, createEmbedPlaceholder, createParamExtractor, createPlaceholder, decodeDoubleEncodedTags, defaultResolveUrlFn, demoteHeadings, detectLanguage, enrichEmbedPlaceholders, extractRedirectTarget, extractVideoId, fixLazyImages, ghostBookmarkResolver, highlightCode, injectEnclosures, isSafeThumbnailUrl, linkifyUrls, mergeConsecutiveOneLinerPres, mergeFragmentedLists, normalizeEmbedFields, paragraphizePlainText, proxyAssetUrls, removeTrackingPixels, replaceEmbedsWithPlaceholders, replacePreLineBreaks, resolveRelativeUrls, stripComments, stripControlChars, stripDeadAnchors, stripDuplicateTitleHeading, stripEmptyTags, stripInertElements, stripInterBlockBreaks, stripOversizedBase64Sources, stripParagraphBoundaryBreaks, stripTrackingParams, substackBookmarkResolver, transformContent, trimPreWhitespace, unwrapAceml, unwrapAdjust, unwrapAmazonAffiliate, unwrapAmpCache, unwrapAwin, unwrapBing, unwrapCdataComments, unwrapCjNetwork, unwrapDigidip, unwrapDisqus, unwrapDouban, unwrapDoublyNestedLists, unwrapDuckduckgo, unwrapEbayRover, unwrapEffiliation, unwrapEmbedly, unwrapEmojiImages, unwrapFacebookShim, unwrapFeedsportal, unwrapFirebaseDynamicLinks, unwrapFlipboard, unwrapGateSc, unwrapGeoriot, unwrapGitee, unwrapGoogle, unwrapGoogleAmpViewer, unwrapGoogleNews, unwrapGoogleNewsModern, unwrapGoogleScholar, unwrapGoogleTranslate, unwrapHashnode, unwrapIcptrack, unwrapIdealoPartner, unwrapInstagramShim, unwrapJianshuGo, unwrapJuejin, unwrapLeverAnalytics, unwrapLinksynergy, unwrapMailchimp, unwrapMailpanion, unwrapMailpgn, unwrapMailtrack, unwrapMedium, unwrapMimecast, unwrapMozillaOutgoing, unwrapNarrativ, unwrapNicoMs, unwrapOutlookSafelinks, unwrapPartnerAds, unwrapPocket, unwrapPostmark, unwrapProofpointV1, unwrapProofpointV2, unwrapProofpointV3, unwrapPxf, unwrapRecruitics, unwrapRedditOut, unwrapRedirectUrls, unwrapRedirectingat, unwrapSegmentfault, unwrapShareasale, unwrapSjv, unwrapSkimlinks, unwrapSlack, unwrapSmartredirect, unwrapSspai, unwrapSteamLinkfilter, unwrapTelegramIv, unwrapTradedoubler, unwrapTumblr, unwrapValuecommerce, unwrapViglink, unwrapVkAway, unwrapWebArchive, unwrapWrappers, unwrapYahooSearch, unwrapYandexTurbo, unwrapYouTube, unwrapZhihu, updateEmbedPlaceholder, youtubeEmbedResolver, youtubeResolveEmbed };
|
package/dist/index.js
CHANGED
|
@@ -1,8 +1,12 @@
|
|
|
1
1
|
import { chooseBaseUrl, coerceNumber, createParamExtractor } from "./utils.js";
|
|
2
|
-
import { applyDomTransforms,
|
|
2
|
+
import { applyDomTransforms, applyStringTransforms, createBookmarkPlaceholder, createEmbedPlaceholder, createPlaceholder, isSafeThumbnailUrl, normalizeEmbedFields, updateEmbedPlaceholder } from "./common.js";
|
|
3
|
+
import { ghostBookmarkResolver } from "./bookmarks/ghost.js";
|
|
4
|
+
import { substackBookmarkResolver } from "./bookmarks/substack.js";
|
|
3
5
|
import { composeThumbnailUrl, extractVideoId, youtubeEmbedResolver, youtubeResolveEmbed } from "./embeds/youtube.js";
|
|
6
|
+
import { convertBookmarkCards } from "./transforms/dom/convertBookmarkCards.js";
|
|
4
7
|
import { convertBreaksToParagraphs } from "./transforms/dom/convertBreaksToParagraphs.js";
|
|
5
8
|
import { decodeDoubleEncodedTags } from "./transforms/dom/decodeDoubleEncodedTags.js";
|
|
9
|
+
import { demoteHeadings } from "./transforms/dom/demoteHeadings.js";
|
|
6
10
|
import { fixLazyImages } from "./transforms/dom/fixLazyImages.js";
|
|
7
11
|
import { detectLanguage, highlightCode } from "./transforms/dom/highlightCode.js";
|
|
8
12
|
import { injectEnclosures } from "./transforms/dom/injectEnclosures.js";
|
|
@@ -18,14 +22,17 @@ import { stripComments } from "./transforms/dom/stripComments.js";
|
|
|
18
22
|
import { stripDeadAnchors } from "./transforms/dom/stripDeadAnchors.js";
|
|
19
23
|
import { stripDuplicateTitleHeading } from "./transforms/dom/stripDuplicateTitleHeading.js";
|
|
20
24
|
import { stripEmptyTags } from "./transforms/dom/stripEmptyTags.js";
|
|
25
|
+
import { stripInertElements } from "./transforms/dom/stripInertElements.js";
|
|
21
26
|
import { stripInterBlockBreaks } from "./transforms/dom/stripInterBlockBreaks.js";
|
|
22
27
|
import { stripParagraphBoundaryBreaks } from "./transforms/dom/stripParagraphBoundaryBreaks.js";
|
|
23
28
|
import { stripTrackingParams } from "./transforms/dom/stripTrackingParams.js";
|
|
24
29
|
import { trimPreWhitespace } from "./transforms/dom/trimPreWhitespace.js";
|
|
25
30
|
import { unwrapDoublyNestedLists } from "./transforms/dom/unwrapDoublyNestedLists.js";
|
|
31
|
+
import { unwrapEmojiImages } from "./transforms/dom/unwrapEmojiImages.js";
|
|
26
32
|
import { extractRedirectTarget, unwrapRedirectUrls } from "./transforms/dom/unwrapRedirectUrls.js";
|
|
27
33
|
import { unwrapWrappers } from "./transforms/dom/unwrapWrappers.js";
|
|
28
34
|
import { paragraphizePlainText } from "./transforms/string/paragraphizePlainText.js";
|
|
35
|
+
import { stripControlChars } from "./transforms/string/stripControlChars.js";
|
|
29
36
|
import { stripOversizedBase64Sources } from "./transforms/string/stripOversizedBase64Sources.js";
|
|
30
37
|
import { unwrapCdataComments } from "./transforms/string/unwrapCdataComments.js";
|
|
31
38
|
import { unwrapBing } from "./unwraps/bing.js";
|
|
@@ -40,7 +47,7 @@ import { unwrapRedditOut } from "./unwraps/redditOut.js";
|
|
|
40
47
|
import { unwrapVkAway } from "./unwraps/vkAway.js";
|
|
41
48
|
import { unwrapYahooSearch } from "./unwraps/yahooSearch.js";
|
|
42
49
|
import { unwrapYouTube } from "./unwraps/youtube.js";
|
|
43
|
-
import { defaultDomTransforms, defaultEmbedResolvers, defaultLazySrcAttributes, defaultLazySrcsetAttributes, defaultResolveUrlFn, defaultStringTransforms, defaultTrackingHosts, defaultTrackingPathSegments, defaultUrlUnwrappers } from "./defaults.js";
|
|
50
|
+
import { defaultBookmarkResolvers, defaultDomTransforms, defaultEmbedResolvers, defaultEmojiImageHosts, defaultInertSelectors, defaultLazySrcAttributes, defaultLazySrcsetAttributes, defaultPreservedPreClasses, defaultResolveUrlFn, defaultStringTransforms, defaultTrackingHosts, defaultTrackingPathSegments, defaultUrlUnwrappers } from "./defaults.js";
|
|
44
51
|
import { enrichEmbedPlaceholders } from "./transforms/dom/enrichEmbedPlaceholders.js";
|
|
45
52
|
import { unwrapAceml } from "./unwraps/aceml.js";
|
|
46
53
|
import { unwrapAdjust } from "./unwraps/adjust.js";
|
|
@@ -110,10 +117,14 @@ const transformContent = async (html, options) => {
|
|
|
110
117
|
baseUrl: options.baseUrl,
|
|
111
118
|
enclosures: options.enclosures,
|
|
112
119
|
embedResolvers: options.embedResolvers ?? defaultEmbedResolvers,
|
|
120
|
+
bookmarkResolvers: options.bookmarkResolvers ?? defaultBookmarkResolvers,
|
|
113
121
|
lazySrcAttributes: options.lazySrcAttributes ?? defaultLazySrcAttributes,
|
|
114
122
|
lazySrcsetAttributes: options.lazySrcsetAttributes ?? defaultLazySrcsetAttributes,
|
|
115
123
|
trackingHosts: options.trackingHosts ?? defaultTrackingHosts,
|
|
116
124
|
trackingPathSegments: options.trackingPathSegments ?? defaultTrackingPathSegments,
|
|
125
|
+
emojiImageHosts: options.emojiImageHosts ?? defaultEmojiImageHosts,
|
|
126
|
+
inertSelectors: options.inertSelectors ?? defaultInertSelectors,
|
|
127
|
+
preservedPreClasses: options.preservedPreClasses ?? defaultPreservedPreClasses,
|
|
117
128
|
urlUnwrappers: options.urlUnwrappers ?? defaultUrlUnwrappers,
|
|
118
129
|
resolveUrlFn: options.resolveUrlFn ?? defaultResolveUrlFn,
|
|
119
130
|
assetProxyFn: options.assetProxyFn,
|
|
@@ -126,4 +137,4 @@ const transformContent = async (html, options) => {
|
|
|
126
137
|
return await applyDomTransforms(await options.parseHtmlFn(afterString), domFns.map((transform) => transform(context)));
|
|
127
138
|
};
|
|
128
139
|
//#endregion
|
|
129
|
-
export { applyDomTransforms,
|
|
140
|
+
export { applyDomTransforms, applyStringTransforms, chooseBaseUrl, coerceNumber, composeThumbnailUrl, convertBookmarkCards, convertBreaksToParagraphs, createBookmarkPlaceholder, createEmbedPlaceholder, createParamExtractor, createPlaceholder, decodeDoubleEncodedTags, defaultResolveUrlFn, demoteHeadings, detectLanguage, enrichEmbedPlaceholders, extractRedirectTarget, extractVideoId, fixLazyImages, ghostBookmarkResolver, highlightCode, injectEnclosures, isSafeThumbnailUrl, linkifyUrls, mergeConsecutiveOneLinerPres, mergeFragmentedLists, normalizeEmbedFields, paragraphizePlainText, proxyAssetUrls, removeTrackingPixels, replaceEmbedsWithPlaceholders, replacePreLineBreaks, resolveRelativeUrls, stripComments, stripControlChars, stripDeadAnchors, stripDuplicateTitleHeading, stripEmptyTags, stripInertElements, stripInterBlockBreaks, stripOversizedBase64Sources, stripParagraphBoundaryBreaks, stripTrackingParams, substackBookmarkResolver, transformContent, trimPreWhitespace, unwrapAceml, unwrapAdjust, unwrapAmazonAffiliate, unwrapAmpCache, unwrapAwin, unwrapBing, unwrapCdataComments, unwrapCjNetwork, unwrapDigidip, unwrapDisqus, unwrapDouban, unwrapDoublyNestedLists, unwrapDuckduckgo, unwrapEbayRover, unwrapEffiliation, unwrapEmbedly, unwrapEmojiImages, unwrapFacebookShim, unwrapFeedsportal, unwrapFirebaseDynamicLinks, unwrapFlipboard, unwrapGateSc, unwrapGeoriot, unwrapGitee, unwrapGoogle, unwrapGoogleAmpViewer, unwrapGoogleNews, unwrapGoogleNewsModern, unwrapGoogleScholar, unwrapGoogleTranslate, unwrapHashnode, unwrapIcptrack, unwrapIdealoPartner, unwrapInstagramShim, unwrapJianshuGo, unwrapJuejin, unwrapLeverAnalytics, unwrapLinksynergy, unwrapMailchimp, unwrapMailpanion, unwrapMailpgn, unwrapMailtrack, unwrapMedium, unwrapMimecast, unwrapMozillaOutgoing, unwrapNarrativ, unwrapNicoMs, unwrapOutlookSafelinks, unwrapPartnerAds, unwrapPocket, unwrapPostmark, unwrapProofpointV1, unwrapProofpointV2, unwrapProofpointV3, unwrapPxf, unwrapRecruitics, unwrapRedditOut, unwrapRedirectUrls, unwrapRedirectingat, unwrapSegmentfault, unwrapShareasale, unwrapSjv, unwrapSkimlinks, unwrapSlack, unwrapSmartredirect, unwrapSspai, unwrapSteamLinkfilter, unwrapTelegramIv, unwrapTradedoubler, unwrapTumblr, unwrapValuecommerce, unwrapViglink, unwrapVkAway, unwrapWebArchive, unwrapWrappers, unwrapYahooSearch, unwrapYandexTurbo, unwrapYouTube, unwrapZhihu, updateEmbedPlaceholder, youtubeEmbedResolver, youtubeResolveEmbed };
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
import { createBookmarkPlaceholder } from "../../common.js";
|
|
2
|
+
//#region src/transforms/dom/convertBookmarkCards.ts
|
|
3
|
+
const convertBookmarkCards = (context) => {
|
|
4
|
+
const { bookmarkResolvers } = context;
|
|
5
|
+
return async (document) => {
|
|
6
|
+
for (const resolver of bookmarkResolvers) for (const element of document.querySelectorAll(resolver.selector)) {
|
|
7
|
+
const result = await resolver.extract(element);
|
|
8
|
+
if (!result) continue;
|
|
9
|
+
element.replaceWith(createBookmarkPlaceholder(document, result));
|
|
10
|
+
}
|
|
11
|
+
};
|
|
12
|
+
};
|
|
13
|
+
//#endregion
|
|
14
|
+
export { convertBookmarkCards };
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
//#region src/transforms/dom/demoteHeadings.ts
|
|
2
|
+
const headingSelector = "h1, h2, h3, h4, h5";
|
|
3
|
+
const demoteHeadings = () => {
|
|
4
|
+
return (document) => {
|
|
5
|
+
if (!document.querySelector("h1")) return;
|
|
6
|
+
const headings = document.querySelectorAll(headingSelector);
|
|
7
|
+
for (const heading of headings) {
|
|
8
|
+
const nextTagName = `h${Number(heading.tagName.slice(1)) + 1}`;
|
|
9
|
+
const replacement = document.createElement(nextTagName);
|
|
10
|
+
for (const name of heading.getAttributeNames().reverse()) {
|
|
11
|
+
const value = heading.getAttribute(name);
|
|
12
|
+
if (value !== null) replacement.setAttribute(name, value);
|
|
13
|
+
}
|
|
14
|
+
while (heading.firstChild) replacement.appendChild(heading.firstChild);
|
|
15
|
+
heading.replaceWith(replacement);
|
|
16
|
+
}
|
|
17
|
+
};
|
|
18
|
+
};
|
|
19
|
+
//#endregion
|
|
20
|
+
export { demoteHeadings };
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import {
|
|
1
|
+
import { updateEmbedPlaceholder } from "../../common.js";
|
|
2
2
|
//#region src/transforms/dom/enrichEmbedPlaceholders.ts
|
|
3
3
|
const enrichEmbedPlaceholders = (context) => {
|
|
4
4
|
const enrichEmbedFn = context.enrichEmbedFn;
|
|
@@ -24,7 +24,7 @@ const enrichEmbedPlaceholders = (context) => {
|
|
|
24
24
|
for (let i = 0; i < count; i++) {
|
|
25
25
|
const embed = embeds[i];
|
|
26
26
|
const data = enriched.get(`${embed.provider}:${embed.id}`);
|
|
27
|
-
if (data)
|
|
27
|
+
if (data) updateEmbedPlaceholder(placeholders[i], data);
|
|
28
28
|
}
|
|
29
29
|
};
|
|
30
30
|
};
|
|
@@ -1,7 +1,15 @@
|
|
|
1
1
|
//#region src/transforms/dom/mergeConsecutiveOneLinerPres.ts
|
|
2
2
|
const trailingBrRegex = /<br\s*\/?>\s*$/i;
|
|
3
3
|
const surroundingNewlinesRegex = /^\n+|\n+$/g;
|
|
4
|
-
const
|
|
4
|
+
const classTokenSeparator = /\s+/;
|
|
5
|
+
const mergeConsecutiveOneLinerPres = ({ preservedPreClasses }) => {
|
|
6
|
+
const preservedSet = new Set(preservedPreClasses);
|
|
7
|
+
const isPreserved = (element) => {
|
|
8
|
+
const classAttribute = element.getAttribute("class");
|
|
9
|
+
if (!classAttribute) return false;
|
|
10
|
+
for (const token of classAttribute.split(classTokenSeparator)) if (preservedSet.has(token)) return true;
|
|
11
|
+
return false;
|
|
12
|
+
};
|
|
5
13
|
return (document) => {
|
|
6
14
|
const pres = document.querySelectorAll("pre");
|
|
7
15
|
for (const pre of pres) {
|
|
@@ -20,6 +28,7 @@ const mergeConsecutiveOneLinerPres = () => {
|
|
|
20
28
|
sibling = sibling.nextSibling;
|
|
21
29
|
}
|
|
22
30
|
if (run.length < 2) continue;
|
|
31
|
+
if (run.some(isPreserved)) continue;
|
|
23
32
|
const isSingleLine = (element) => {
|
|
24
33
|
return !element.innerHTML.replace(surroundingNewlinesRegex, "").includes("\n");
|
|
25
34
|
};
|
|
@@ -1,5 +1,17 @@
|
|
|
1
1
|
import { parseSrcset, stringifySrcset } from "srcset";
|
|
2
2
|
//#region src/transforms/dom/proxyAssetUrls.ts
|
|
3
|
+
const proxyableSelectors = [
|
|
4
|
+
"img",
|
|
5
|
+
"video",
|
|
6
|
+
"audio",
|
|
7
|
+
"source",
|
|
8
|
+
"track",
|
|
9
|
+
"image",
|
|
10
|
+
"[data-embed-thumbnail]",
|
|
11
|
+
"[data-embed-avatar]",
|
|
12
|
+
"[data-bookmark-icon]",
|
|
13
|
+
"[data-bookmark-thumbnail]"
|
|
14
|
+
];
|
|
3
15
|
const sourceTypeFromParent = (element) => {
|
|
4
16
|
const parent = element.parentElement?.localName;
|
|
5
17
|
if (parent === "video") return "video";
|
|
@@ -30,7 +42,7 @@ const proxySrcset = (element, type, assetProxyFn) => {
|
|
|
30
42
|
const proxyAssetUrls = ({ assetProxyFn }) => {
|
|
31
43
|
if (!assetProxyFn) return () => {};
|
|
32
44
|
return (document) => {
|
|
33
|
-
const elements = document.querySelectorAll("
|
|
45
|
+
const elements = document.querySelectorAll(proxyableSelectors.join(", "));
|
|
34
46
|
for (const element of elements) {
|
|
35
47
|
switch (element.localName) {
|
|
36
48
|
case "img":
|
|
@@ -57,6 +69,8 @@ const proxyAssetUrls = ({ assetProxyFn }) => {
|
|
|
57
69
|
}
|
|
58
70
|
if (element.hasAttribute("data-embed-thumbnail")) proxyAttribute(element, "data-embed-thumbnail", "image", assetProxyFn);
|
|
59
71
|
if (element.hasAttribute("data-embed-avatar")) proxyAttribute(element, "data-embed-avatar", "image", assetProxyFn);
|
|
72
|
+
if (element.hasAttribute("data-bookmark-icon")) proxyAttribute(element, "data-bookmark-icon", "image", assetProxyFn);
|
|
73
|
+
if (element.hasAttribute("data-bookmark-thumbnail")) proxyAttribute(element, "data-bookmark-thumbnail", "image", assetProxyFn);
|
|
60
74
|
}
|
|
61
75
|
};
|
|
62
76
|
};
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
//#region src/transforms/dom/stripInertElements.ts
|
|
2
|
+
const stripInertElements = ({ inertSelectors }) => {
|
|
3
|
+
const selector = inertSelectors.join(",");
|
|
4
|
+
return (document) => {
|
|
5
|
+
if (!selector) return;
|
|
6
|
+
const elements = document.querySelectorAll(selector);
|
|
7
|
+
for (const element of elements) element.remove();
|
|
8
|
+
};
|
|
9
|
+
};
|
|
10
|
+
//#endregion
|
|
11
|
+
export { stripInertElements };
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
//#region src/transforms/dom/unwrapEmojiImages.ts
|
|
2
|
+
const nonAsciiRegex = /[-]/;
|
|
3
|
+
const asciiLetterRegex = /[a-zA-Z]/;
|
|
4
|
+
const isEmojiShapedAlt = (alt) => {
|
|
5
|
+
return nonAsciiRegex.test(alt) && !asciiLetterRegex.test(alt);
|
|
6
|
+
};
|
|
7
|
+
const unwrapEmojiImages = (context) => {
|
|
8
|
+
const selector = [
|
|
9
|
+
"img.wp-smiley[alt]",
|
|
10
|
+
"img.emoji[alt]",
|
|
11
|
+
...context.emojiImageHosts.map((host) => `img[alt][src*="${host}"]`)
|
|
12
|
+
].join(", ");
|
|
13
|
+
return (document) => {
|
|
14
|
+
for (const image of document.querySelectorAll(selector)) {
|
|
15
|
+
const alt = image.getAttribute("alt");
|
|
16
|
+
if (alt && isEmojiShapedAlt(alt)) image.replaceWith(document.createTextNode(alt));
|
|
17
|
+
}
|
|
18
|
+
};
|
|
19
|
+
};
|
|
20
|
+
//#endregion
|
|
21
|
+
export { unwrapEmojiImages };
|
|
@@ -7,9 +7,13 @@ const wrapperTags = new Set([
|
|
|
7
7
|
"header",
|
|
8
8
|
"footer"
|
|
9
9
|
]);
|
|
10
|
-
const
|
|
10
|
+
const preservedPrefixes = ["data-embed", "data-bookmark"];
|
|
11
|
+
const hasPreservedAttribute = (element) => {
|
|
11
12
|
const attributes = element.attributes;
|
|
12
|
-
for (let i = 0, n = attributes.length; i < n; i++)
|
|
13
|
+
for (let i = 0, n = attributes.length; i < n; i++) {
|
|
14
|
+
const name = attributes[i].name;
|
|
15
|
+
for (const prefix of preservedPrefixes) if (name.startsWith(prefix)) return true;
|
|
16
|
+
}
|
|
13
17
|
return false;
|
|
14
18
|
};
|
|
15
19
|
const unwrapWrappers = () => {
|
|
@@ -20,7 +24,7 @@ const unwrapWrappers = () => {
|
|
|
20
24
|
if (!wrapperTags.has(element.localName)) continue;
|
|
21
25
|
const parent = element.parentNode;
|
|
22
26
|
if (!parent) continue;
|
|
23
|
-
if (
|
|
27
|
+
if (hasPreservedAttribute(element)) continue;
|
|
24
28
|
while (element.firstChild) parent.insertBefore(element.firstChild, element);
|
|
25
29
|
element.remove();
|
|
26
30
|
}
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
//#region src/transforms/string/stripControlChars.ts
|
|
2
|
+
const ranges = [
|
|
3
|
+
"\\x00-\\x08",
|
|
4
|
+
"\\x0B\\x0C",
|
|
5
|
+
"\\x0E-\\x1F",
|
|
6
|
+
"\\x7F-\\x9F",
|
|
7
|
+
"\\uFDD0-\\uFDEF",
|
|
8
|
+
"\\uFFFE\\uFFFF",
|
|
9
|
+
...Array.from({ length: 16 }, (_, index) => {
|
|
10
|
+
const plane = (index + 1).toString(16).toUpperCase();
|
|
11
|
+
return `\\u{${plane}FFFE}\\u{${plane}FFFF}`;
|
|
12
|
+
})
|
|
13
|
+
];
|
|
14
|
+
const controlCharRegex = new RegExp(`[${ranges.join("")}]`, "gu");
|
|
15
|
+
const stripControlChars = () => {
|
|
16
|
+
return (html) => {
|
|
17
|
+
return html.replace(controlCharRegex, "");
|
|
18
|
+
};
|
|
19
|
+
};
|
|
20
|
+
//#endregion
|
|
21
|
+
export { stripControlChars };
|
package/dist/types.d.ts
CHANGED
|
@@ -41,6 +41,20 @@ type EmbedResolver = {
|
|
|
41
41
|
selector: string;
|
|
42
42
|
extract: (element: Element) => MaybePromise<EmbedResolverResult | undefined>;
|
|
43
43
|
};
|
|
44
|
+
type BookmarkResolverResult = {
|
|
45
|
+
provider: string;
|
|
46
|
+
url: string;
|
|
47
|
+
title: string;
|
|
48
|
+
description?: string;
|
|
49
|
+
author?: string;
|
|
50
|
+
publisher?: string;
|
|
51
|
+
icon?: string;
|
|
52
|
+
thumbnail?: string;
|
|
53
|
+
};
|
|
54
|
+
type BookmarkResolver = {
|
|
55
|
+
selector: string;
|
|
56
|
+
extract: (element: Element) => MaybePromise<BookmarkResolverResult | undefined>;
|
|
57
|
+
};
|
|
44
58
|
type UrlUnwrapper = (url: URL) => string | undefined;
|
|
45
59
|
type AssetType = 'image' | 'video' | 'audio';
|
|
46
60
|
type AssetProxyFn = (url: string, type: AssetType) => string | undefined;
|
|
@@ -48,10 +62,14 @@ type TransformContext = {
|
|
|
48
62
|
baseUrl?: string;
|
|
49
63
|
enclosures?: Array<Enclosure>;
|
|
50
64
|
embedResolvers: Array<EmbedResolver>;
|
|
65
|
+
bookmarkResolvers: Array<BookmarkResolver>;
|
|
51
66
|
lazySrcAttributes: Array<string>;
|
|
52
67
|
lazySrcsetAttributes: Array<string>;
|
|
53
68
|
trackingHosts: Array<string>;
|
|
54
69
|
trackingPathSegments: Array<string>;
|
|
70
|
+
emojiImageHosts: Array<string>;
|
|
71
|
+
inertSelectors: Array<string>;
|
|
72
|
+
preservedPreClasses: Array<string>;
|
|
55
73
|
urlUnwrappers: Array<UrlUnwrapper>;
|
|
56
74
|
resolveUrlFn: ResolveUrlFn;
|
|
57
75
|
assetProxyFn?: AssetProxyFn;
|
|
@@ -66,10 +84,14 @@ type TransformContentOptions = {
|
|
|
66
84
|
baseUrl?: string;
|
|
67
85
|
enclosures?: Array<Enclosure>;
|
|
68
86
|
embedResolvers?: Array<EmbedResolver>;
|
|
87
|
+
bookmarkResolvers?: Array<BookmarkResolver>;
|
|
69
88
|
lazySrcAttributes?: Array<string>;
|
|
70
89
|
lazySrcsetAttributes?: Array<string>;
|
|
71
90
|
trackingHosts?: Array<string>;
|
|
72
91
|
trackingPathSegments?: Array<string>;
|
|
92
|
+
emojiImageHosts?: Array<string>;
|
|
93
|
+
inertSelectors?: Array<string>;
|
|
94
|
+
preservedPreClasses?: Array<string>;
|
|
73
95
|
urlUnwrappers?: Array<UrlUnwrapper>;
|
|
74
96
|
resolveUrlFn?: ResolveUrlFn;
|
|
75
97
|
assetProxyFn?: AssetProxyFn;
|
|
@@ -79,4 +101,4 @@ type TransformContentOptions = {
|
|
|
79
101
|
domTransforms?: Array<DomTransform>;
|
|
80
102
|
};
|
|
81
103
|
//#endregion
|
|
82
|
-
export { AssetProxyFn, AssetType, DomTransform, EmbedResolver, EmbedResolverResult, Enclosure, EnrichEmbedFn, MaybePromise, ParseHtmlFn, ResolveUrlFn, StringTransform, TransformContentOptions, TransformContext, UrlUnwrapper };
|
|
104
|
+
export { AssetProxyFn, AssetType, BookmarkResolver, BookmarkResolverResult, DomTransform, EmbedResolver, EmbedResolverResult, Enclosure, EnrichEmbedFn, MaybePromise, ParseHtmlFn, ResolveUrlFn, StringTransform, TransformContentOptions, TransformContext, UrlUnwrapper };
|
package/package.json
CHANGED
|
@@ -49,7 +49,7 @@
|
|
|
49
49
|
"srcset": "^5.0.3"
|
|
50
50
|
},
|
|
51
51
|
"peerDependencies": {
|
|
52
|
-
"feedcanon": "^2.0.0-next.
|
|
52
|
+
"feedcanon": "^2.0.0-next.4",
|
|
53
53
|
"feedscout": "^2.0.0-next.2",
|
|
54
54
|
"linkedom": "^0.18.12"
|
|
55
55
|
},
|
|
@@ -64,5 +64,5 @@
|
|
|
64
64
|
"linkedom": "^0.18.12",
|
|
65
65
|
"tsdown": "^0.22.0"
|
|
66
66
|
},
|
|
67
|
-
"version": "
|
|
67
|
+
"version": "2.0.0"
|
|
68
68
|
}
|