feedsweep 1.1.0 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +51 -2
- package/dist/bookmarks/ghost.d.ts +6 -0
- package/dist/bookmarks/ghost.js +21 -0
- package/dist/bookmarks/substack.d.ts +6 -0
- package/dist/bookmarks/substack.js +26 -0
- package/dist/common.d.ts +8 -10
- package/dist/common.js +50 -73
- package/dist/defaults.d.ts +6 -3
- package/dist/defaults.js +54 -3
- package/dist/index.d.ts +12 -4
- package/dist/index.js +18 -6
- package/dist/parsers/linkedom.d.ts +4 -0
- package/dist/parsers/linkedom.js +38 -0
- package/dist/transforms/dom/convertBookmarkCards.d.ts +6 -0
- package/dist/transforms/dom/convertBookmarkCards.js +14 -0
- package/dist/transforms/dom/demoteHeadings.d.ts +6 -0
- package/dist/transforms/dom/demoteHeadings.js +20 -0
- package/dist/transforms/dom/enrichEmbedPlaceholders.js +2 -2
- package/dist/transforms/dom/fixLazyImages.js +0 -4
- package/dist/transforms/dom/mergeConsecutiveOneLinerPres.js +10 -1
- package/dist/transforms/dom/proxyAssetUrls.js +15 -1
- package/dist/transforms/dom/stripInertElements.d.ts +6 -0
- package/dist/transforms/dom/stripInertElements.js +11 -0
- package/dist/transforms/dom/unwrapEmojiImages.d.ts +6 -0
- package/dist/transforms/dom/unwrapEmojiImages.js +21 -0
- package/dist/transforms/dom/unwrapWrappers.js +7 -3
- package/dist/transforms/string/stripControlChars.d.ts +6 -0
- package/dist/transforms/string/stripControlChars.js +21 -0
- package/dist/transforms/string/stripOversizedBase64Sources.d.ts +6 -0
- package/dist/transforms/string/stripOversizedBase64Sources.js +13 -0
- package/dist/types.d.ts +25 -2
- package/package.json +15 -5
package/README.md
CHANGED
|
@@ -11,15 +11,19 @@ Feedsweep takes raw feed item HTML and runs it through a pipeline that genuinely
|
|
|
11
11
|
## Installation
|
|
12
12
|
|
|
13
13
|
```bash
|
|
14
|
-
npm install feedsweep
|
|
14
|
+
npm install feedsweep linkedom
|
|
15
15
|
```
|
|
16
16
|
|
|
17
|
+
`linkedom` is an optional peer dependency. You only need it if you use the bundled `parseHtml` helper — see [DOM library](#dom-library) for jsdom / happy-dom / browser-native alternatives.
|
|
18
|
+
|
|
17
19
|
## Quick Start
|
|
18
20
|
|
|
19
21
|
```typescript
|
|
20
22
|
import { transformContent } from 'feedsweep'
|
|
23
|
+
import { parseHtml } from 'feedsweep/linkedom'
|
|
21
24
|
|
|
22
25
|
const result = await transformContent('<p>Check <img data-src="photo.jpg"> and visit /about</p>', {
|
|
26
|
+
parseHtmlFn: parseHtml,
|
|
23
27
|
baseUrl: 'https://example.com/post/1',
|
|
24
28
|
})
|
|
25
29
|
```
|
|
@@ -37,13 +41,17 @@ Inventory of every transform exported from the package. Most are enabled by defa
|
|
|
37
41
|
| `stripInterBlockBreaks` | Remove `<br>` tags between block elements |
|
|
38
42
|
| `stripParagraphBoundaryBreaks` | Remove `<br>` tags adjacent to paragraph boundaries |
|
|
39
43
|
| `stripDuplicateTitleHeading` | Remove first `<h1>`–`<h6>` matching article title |
|
|
44
|
+
| `demoteHeadings` | Shift every heading down by one level (`<h1>`→`<h2>`, …, `<h5>`→`<h6>`) when the body contains an `<h1>`, so it sits below the reader's own page title |
|
|
40
45
|
| `unwrapRedirectUrls` | Remove Google/Bing/Facebook/etc. redirect wrappers |
|
|
41
46
|
| `stripDeadAnchors` | Unwrap `<a>` with empty, `#`, or `javascript:` href |
|
|
47
|
+
| `stripInertElements` | Remove platform chrome and dead placeholders — subscribe widgets, share buttons, related-posts widgets, ad slots (AdSense / AdThrive), author bio blocks, email preheaders, Substack image controls, and Drupal `<drupal-render-placeholder>` tags. Pass `inertSelectors` to extend or replace |
|
|
42
48
|
| `removeTrackingPixels` | Strip 1×1 tracking pixel images |
|
|
49
|
+
| `unwrapEmojiImages` | Replace WordPress/Facebook/Twitter/GitHub emoji `<img>` tags with their alt-text glyph |
|
|
43
50
|
| `stripTrackingParams` | Remove UTM and other tracking parameters |
|
|
44
51
|
| `convertBreaksToParagraphs` | Convert `<br><br>` runs into semantic `<p>` blocks |
|
|
45
52
|
| `injectEnclosures` | Inject feed enclosures into content as native `<audio>`/`<video>` or iframe placeholders |
|
|
46
53
|
| `replaceEmbedsWithPlaceholders` | Convert `<iframe>` to embed placeholders |
|
|
54
|
+
| `convertBookmarkCards` | Convert link-preview cards into `data-bookmark-*` placeholders via a registry of per-provider `BookmarkResolver`s (`defaultBookmarkResolvers`: Ghost `kg-bookmark-card`, Substack `embedded-publication-wrap`). Extend via `bookmarkResolvers` |
|
|
47
55
|
| `enrichEmbedPlaceholders` | Populate placeholder metadata (`title`, `description`, `duration`, etc.) via a caller-supplied async fn. Opt-in; not in defaults |
|
|
48
56
|
| `proxyAssetUrls` | Rewrite image, video, and audio URLs through a caller-supplied proxy |
|
|
49
57
|
| `resolveRelativeUrls` | Convert relative URLs to absolute using base URL |
|
|
@@ -51,19 +59,24 @@ Inventory of every transform exported from the package. Most are enabled by defa
|
|
|
51
59
|
| `unwrapDoublyNestedLists` | Unwrap `<ul>`/`<ol>` that wrap a single `<li>` containing a same-type list |
|
|
52
60
|
| `mergeFragmentedLists` | Merge consecutive sibling `<ul>` / `<ol>` lists with matching attributes |
|
|
53
61
|
| `paragraphizePlainText` | Wrap plain text in `<p>` tags |
|
|
62
|
+
| `stripOversizedBase64Sources` | Drop base64 `src`/`srcset`/`poster` payloads larger than 50 KB before parsing |
|
|
54
63
|
| `linkifyUrls` | Wrap bare URLs in `<a>` tags |
|
|
55
64
|
| `trimPreWhitespace` | Remove common leading indentation from `<pre>` |
|
|
56
65
|
| `highlightCode` | Syntax-highlight `<code>` blocks with highlight.js |
|
|
57
66
|
| `stripEmptyTags` | Remove empty `<p>`, `<div>`, `<span>` and other tags |
|
|
58
67
|
| `stripComments` | Remove HTML `<!-- comments -->` |
|
|
59
68
|
| `unwrapCdataComments` | Strip malformed `<!--[CDATA[ … ]]-->` wrappers before parsing so the wrapped article reaches the DOM as real HTML |
|
|
69
|
+
| `stripControlChars` | Strip rendering-hostile control characters (NUL, BEL, ESC, DEL, C1 range) before parsing. Preserves tab / LF / CR |
|
|
60
70
|
|
|
61
71
|
## Options
|
|
62
72
|
|
|
63
73
|
```typescript
|
|
64
74
|
import { fixLazyImages, resolveRelativeUrls, transformContent } from 'feedsweep'
|
|
75
|
+
import { parseHtml } from 'feedsweep/linkedom'
|
|
65
76
|
|
|
66
77
|
const result = transformContent(html, {
|
|
78
|
+
// Required: function that turns an HTML string into a `Document`. See "DOM library".
|
|
79
|
+
parseHtmlFn: parseHtml,
|
|
67
80
|
// Base URL for resolving relative URLs.
|
|
68
81
|
baseUrl: 'https://example.com/post/1',
|
|
69
82
|
// Feed item enclosures (audio/video).
|
|
@@ -79,4 +92,40 @@ const result = transformContent(html, {
|
|
|
79
92
|
})
|
|
80
93
|
```
|
|
81
94
|
|
|
82
|
-
The `stringTransforms
|
|
95
|
+
The `stringTransforms` and `domTransforms` options each fully replace the corresponding default phase when provided. Every transform is also exported individually from `feedsweep`, so you can compose any pipeline — list them explicitly to build from scratch, or spread `defaultDomTransforms` (etc.) from `feedsweep/defaults` to extend or filter the defaults.
|
|
96
|
+
|
|
97
|
+
## DOM library
|
|
98
|
+
|
|
99
|
+
Feedsweep is parser-agnostic. You provide `parseHtmlFn` — a function that turns an HTML string into a `Document`. Use any DOM library that produces a standards-compliant `Document`.
|
|
100
|
+
|
|
101
|
+
```typescript
|
|
102
|
+
// linkedom (recommended default)
|
|
103
|
+
import { transformContent } from 'feedsweep'
|
|
104
|
+
import { parseHtml } from 'feedsweep/linkedom'
|
|
105
|
+
|
|
106
|
+
await transformContent(html, { parseHtmlFn: parseHtml, baseUrl })
|
|
107
|
+
|
|
108
|
+
// jsdom
|
|
109
|
+
import { transformContent } from 'feedsweep'
|
|
110
|
+
import { JSDOM } from 'jsdom'
|
|
111
|
+
|
|
112
|
+
await transformContent(html, {
|
|
113
|
+
parseHtmlFn: (raw) => new JSDOM(`<!doctype html><body>${raw}</body>`).window.document,
|
|
114
|
+
baseUrl,
|
|
115
|
+
})
|
|
116
|
+
|
|
117
|
+
// happy-dom
|
|
118
|
+
import { transformContent } from 'feedsweep'
|
|
119
|
+
import { Window } from 'happy-dom'
|
|
120
|
+
|
|
121
|
+
await transformContent(html, {
|
|
122
|
+
parseHtmlFn: (raw) => {
|
|
123
|
+
const window = new Window()
|
|
124
|
+
window.document.body.innerHTML = raw
|
|
125
|
+
return window.document
|
|
126
|
+
},
|
|
127
|
+
baseUrl,
|
|
128
|
+
})
|
|
129
|
+
```
|
|
130
|
+
|
|
131
|
+
The bundled `feedsweep/linkedom` parser bakes in two workarounds for linkedom-specific spec violations (attribute case-folding and SVG XML mode). jsdom and happy-dom do not need them.
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
//#region src/bookmarks/ghost.ts
|
|
2
|
+
const ghostBookmarkResolver = {
|
|
3
|
+
selector: ".kg-bookmark-card",
|
|
4
|
+
extract: (element) => {
|
|
5
|
+
const url = element.querySelector("a.kg-bookmark-container")?.getAttribute("href") ?? void 0;
|
|
6
|
+
const title = element.querySelector(".kg-bookmark-title")?.textContent?.trim();
|
|
7
|
+
if (!url || !title) return;
|
|
8
|
+
return {
|
|
9
|
+
provider: "ghost",
|
|
10
|
+
url,
|
|
11
|
+
title,
|
|
12
|
+
description: element.querySelector(".kg-bookmark-description")?.textContent?.trim(),
|
|
13
|
+
author: element.querySelector(".kg-bookmark-author")?.textContent?.trim(),
|
|
14
|
+
publisher: element.querySelector(".kg-bookmark-publisher")?.textContent?.trim(),
|
|
15
|
+
icon: element.querySelector("img.kg-bookmark-icon")?.getAttribute("src") ?? void 0,
|
|
16
|
+
thumbnail: element.querySelector(".kg-bookmark-thumbnail img")?.getAttribute("src") ?? void 0
|
|
17
|
+
};
|
|
18
|
+
}
|
|
19
|
+
};
|
|
20
|
+
//#endregion
|
|
21
|
+
export { ghostBookmarkResolver };
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
//#region src/bookmarks/substack.ts
|
|
2
|
+
const parsePublicationAttrs = (raw) => {
|
|
3
|
+
if (!raw) return;
|
|
4
|
+
try {
|
|
5
|
+
return JSON.parse(raw);
|
|
6
|
+
} catch {}
|
|
7
|
+
};
|
|
8
|
+
const substackBookmarkResolver = {
|
|
9
|
+
selector: ".embedded-publication-wrap",
|
|
10
|
+
extract: (element) => {
|
|
11
|
+
const attrs = parsePublicationAttrs(element.getAttribute("data-attrs"));
|
|
12
|
+
const url = attrs?.base_url;
|
|
13
|
+
const title = attrs?.name?.trim();
|
|
14
|
+
if (!url || !title) return;
|
|
15
|
+
return {
|
|
16
|
+
provider: "substack",
|
|
17
|
+
url,
|
|
18
|
+
title,
|
|
19
|
+
description: attrs.hero_text?.trim(),
|
|
20
|
+
author: attrs.author_name?.trim(),
|
|
21
|
+
icon: attrs.logo_url
|
|
22
|
+
};
|
|
23
|
+
}
|
|
24
|
+
};
|
|
25
|
+
//#endregion
|
|
26
|
+
export { substackBookmarkResolver };
|
package/dist/common.d.ts
CHANGED
|
@@ -1,15 +1,13 @@
|
|
|
1
|
-
import { EmbedResolverResult, MaybePromise } from "./types.js";
|
|
1
|
+
import { BookmarkResolverResult, EmbedResolverResult, MaybePromise } from "./types.js";
|
|
2
2
|
|
|
3
3
|
//#region src/common.d.ts
|
|
4
|
-
declare const
|
|
5
|
-
declare const
|
|
6
|
-
declare const parseFragment: (html: string) => Document;
|
|
7
|
-
declare const transformHtml: (html: string, transform: (document: Document) => MaybePromise<void>) => Promise<string>;
|
|
8
|
-
declare const applyDomTransforms: (html: string, transforms: Array<(document: Document) => MaybePromise<void>>) => Promise<string>;
|
|
4
|
+
declare const isSafeThumbnailUrl: (url: string) => boolean;
|
|
5
|
+
declare const applyDomTransforms: (document: Document, transforms: Array<(document: Document) => MaybePromise<void>>) => Promise<string>;
|
|
9
6
|
declare const applyStringTransforms: (html: string, transforms: Array<(html: string) => MaybePromise<string>>) => Promise<string>;
|
|
10
|
-
declare const
|
|
11
|
-
|
|
12
|
-
|
|
7
|
+
declare const createPlaceholder: <Type extends object>(document: Document, type: string, fields: Type) => HTMLElement;
|
|
8
|
+
declare const normalizeEmbedFields: (metadata: Partial<EmbedResolverResult>) => Record<string, string | undefined>;
|
|
9
|
+
declare const updateEmbedPlaceholder: (element: HTMLElement, metadata: Partial<EmbedResolverResult>) => void;
|
|
13
10
|
declare const createEmbedPlaceholder: (document: Document, src: string, metadata?: Partial<EmbedResolverResult>) => HTMLElement;
|
|
11
|
+
declare const createBookmarkPlaceholder: (document: Document, result: BookmarkResolverResult) => HTMLElement;
|
|
14
12
|
//#endregion
|
|
15
|
-
export { applyDomTransforms,
|
|
13
|
+
export { applyDomTransforms, applyStringTransforms, createBookmarkPlaceholder, createEmbedPlaceholder, createPlaceholder, isSafeThumbnailUrl, normalizeEmbedFields, updateEmbedPlaceholder };
|
package/dist/common.js
CHANGED
|
@@ -1,6 +1,5 @@
|
|
|
1
1
|
import { coerceNumber } from "./utils.js";
|
|
2
|
-
import { resolveUrl } from "feedcanon";
|
|
3
|
-
import { parseHTML } from "linkedom";
|
|
2
|
+
import { resolveUrl, upgradeProtocol } from "feedcanon";
|
|
4
3
|
//#region src/common.ts
|
|
5
4
|
const Node = {
|
|
6
5
|
ELEMENT_NODE: 1,
|
|
@@ -12,58 +11,11 @@ const NodeFilter = {
|
|
|
12
11
|
SHOW_TEXT: 4,
|
|
13
12
|
SHOW_COMMENT: 128
|
|
14
13
|
};
|
|
15
|
-
const base64SrcRegex = /((?:src|srcset|poster)=["'])data:[^"']*;base64,[^"']*(["'])/g;
|
|
16
14
|
const safeThumbnailDataUrlRegex = /^data:image\/(png|jpe?g|gif|webp|avif);/i;
|
|
17
15
|
const isSafeThumbnailUrl = (url) => {
|
|
18
16
|
return resolveUrl(url) !== void 0 || safeThumbnailDataUrlRegex.test(url);
|
|
19
17
|
};
|
|
20
|
-
const
|
|
21
|
-
return html.replace(base64SrcRegex, (match, prefix, suffix) => {
|
|
22
|
-
if (match.length < maxSize) return match;
|
|
23
|
-
return `${prefix}${suffix}`;
|
|
24
|
-
});
|
|
25
|
-
};
|
|
26
|
-
const normalizeAttributeCase = (document) => {
|
|
27
|
-
for (const element of document.querySelectorAll("*")) {
|
|
28
|
-
const original = Array.from(element.attributes).map((attribute) => ({
|
|
29
|
-
name: attribute.name,
|
|
30
|
-
value: attribute.value
|
|
31
|
-
}));
|
|
32
|
-
const final = /* @__PURE__ */ new Map();
|
|
33
|
-
let needsRewrite = false;
|
|
34
|
-
for (const { name, value } of original) {
|
|
35
|
-
const lower = name.toLowerCase();
|
|
36
|
-
if (lower !== name) needsRewrite = true;
|
|
37
|
-
if (final.has(lower)) {
|
|
38
|
-
needsRewrite = true;
|
|
39
|
-
continue;
|
|
40
|
-
}
|
|
41
|
-
final.set(lower, value);
|
|
42
|
-
}
|
|
43
|
-
if (!needsRewrite) continue;
|
|
44
|
-
for (const { name } of original) element.removeAttribute(name);
|
|
45
|
-
for (const [name, value] of final) element.setAttribute(name, value);
|
|
46
|
-
}
|
|
47
|
-
};
|
|
48
|
-
const svgRegionRegex = /<svg\b[^>]*>[\s\S]*?<\/svg>/gi;
|
|
49
|
-
const svgSelfCloseRegex = /<([a-z][a-z0-9-]*)((?:\s[^>]*)?)\s*\/>/gi;
|
|
50
|
-
const expandSvgSelfClose = (html) => {
|
|
51
|
-
return html.replace(svgRegionRegex, (svgBlock) => {
|
|
52
|
-
return svgBlock.replace(svgSelfCloseRegex, "<$1$2></$1>");
|
|
53
|
-
});
|
|
54
|
-
};
|
|
55
|
-
const parseFragment = (html) => {
|
|
56
|
-
const { document } = parseHTML(`<!doctype html><html><head></head><body>${expandSvgSelfClose(html)}</body></html>`);
|
|
57
|
-
normalizeAttributeCase(document);
|
|
58
|
-
return document;
|
|
59
|
-
};
|
|
60
|
-
const transformHtml = async (html, transform) => {
|
|
61
|
-
const document = parseFragment(html);
|
|
62
|
-
await transform(document);
|
|
63
|
-
return document.body.innerHTML;
|
|
64
|
-
};
|
|
65
|
-
const applyDomTransforms = async (html, transforms) => {
|
|
66
|
-
const document = parseFragment(stripOversizedBase64Sources(html, 50 * 1024));
|
|
18
|
+
const applyDomTransforms = async (document, transforms) => {
|
|
67
19
|
for (const transform of transforms) await transform(document);
|
|
68
20
|
return document.body.innerHTML;
|
|
69
21
|
};
|
|
@@ -152,36 +104,61 @@ const getDimensions = (element) => {
|
|
|
152
104
|
height: height ?? fromStyle(styleHeightRegex)
|
|
153
105
|
};
|
|
154
106
|
};
|
|
155
|
-
const
|
|
156
|
-
const
|
|
157
|
-
const
|
|
158
|
-
|
|
159
|
-
|
|
107
|
+
const createPlaceholder = (document, type, fields) => {
|
|
108
|
+
const element = document.createElement("div");
|
|
109
|
+
for (const [key, value] of Object.entries(fields)) if (value) element.setAttribute(`data-${type}-${key}`, value);
|
|
110
|
+
return element;
|
|
111
|
+
};
|
|
112
|
+
const normalizeEmbedFields = (metadata) => {
|
|
113
|
+
return {
|
|
114
|
+
src: metadata.src ? upgradeProtocol(metadata.src) : void 0,
|
|
115
|
+
provider: metadata.provider,
|
|
116
|
+
id: metadata.id,
|
|
117
|
+
url: metadata.url ? upgradeProtocol(metadata.url) : void 0,
|
|
118
|
+
thumbnail: metadata.thumbnail && isSafeThumbnailUrl(metadata.thumbnail) ? metadata.thumbnail : void 0,
|
|
119
|
+
width: metadata.width ? String(metadata.width) : void 0,
|
|
120
|
+
height: metadata.height ? String(metadata.height) : void 0,
|
|
121
|
+
title: metadata.title,
|
|
122
|
+
description: metadata.description,
|
|
123
|
+
author: metadata.author,
|
|
124
|
+
avatar: metadata.avatar && isSafeThumbnailUrl(metadata.avatar) ? metadata.avatar : void 0,
|
|
125
|
+
duration: metadata.duration ? String(metadata.duration) : void 0
|
|
160
126
|
};
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
if (metadata.height) set("data-embed-height", String(metadata.height));
|
|
168
|
-
if (metadata.title) set("data-embed-title", metadata.title);
|
|
169
|
-
if (metadata.description) set("data-embed-description", metadata.description);
|
|
170
|
-
if (metadata.author) set("data-embed-author", metadata.author);
|
|
171
|
-
if (metadata.avatar && isSafeThumbnailUrl(metadata.avatar)) set("data-embed-avatar", metadata.avatar);
|
|
172
|
-
if (metadata.duration) set("data-embed-duration", String(metadata.duration));
|
|
127
|
+
};
|
|
128
|
+
const updateEmbedPlaceholder = (element, metadata) => {
|
|
129
|
+
for (const [key, value] of Object.entries(normalizeEmbedFields(metadata))) {
|
|
130
|
+
const name = `data-embed-${key}`;
|
|
131
|
+
if (value && !element.hasAttribute(name)) element.setAttribute(name, value);
|
|
132
|
+
}
|
|
173
133
|
};
|
|
174
134
|
const createEmbedPlaceholder = (document, src, metadata) => {
|
|
175
|
-
const element = document
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
const fallbackUrl = metadata?.url ?? metadata?.src ?? src;
|
|
135
|
+
const element = createPlaceholder(document, "embed", normalizeEmbedFields({
|
|
136
|
+
...metadata,
|
|
137
|
+
src: metadata?.src ?? src
|
|
138
|
+
}));
|
|
139
|
+
const fallbackUrl = upgradeProtocol(metadata?.url ?? metadata?.src ?? src);
|
|
180
140
|
const link = document.createElement("a");
|
|
181
141
|
link.setAttribute("href", fallbackUrl);
|
|
182
142
|
link.textContent = fallbackUrl;
|
|
183
143
|
element.appendChild(link);
|
|
184
144
|
return element;
|
|
185
145
|
};
|
|
146
|
+
const createBookmarkPlaceholder = (document, result) => {
|
|
147
|
+
const { provider, title, url, icon, thumbnail, ...rest } = result;
|
|
148
|
+
const safeUrl = upgradeProtocol(url);
|
|
149
|
+
const element = createPlaceholder(document, "bookmark", {
|
|
150
|
+
provider,
|
|
151
|
+
...rest,
|
|
152
|
+
url: safeUrl,
|
|
153
|
+
title,
|
|
154
|
+
icon: icon && isSafeThumbnailUrl(icon) ? upgradeProtocol(icon) : void 0,
|
|
155
|
+
thumbnail: thumbnail && isSafeThumbnailUrl(thumbnail) ? upgradeProtocol(thumbnail) : void 0
|
|
156
|
+
});
|
|
157
|
+
const link = document.createElement("a");
|
|
158
|
+
link.setAttribute("href", safeUrl);
|
|
159
|
+
link.textContent = title;
|
|
160
|
+
element.appendChild(link);
|
|
161
|
+
return element;
|
|
162
|
+
};
|
|
186
163
|
//#endregion
|
|
187
|
-
export { Node, NodeFilter, applyDomTransforms,
|
|
164
|
+
export { Node, NodeFilter, applyDomTransforms, applyStringTransforms, createBookmarkPlaceholder, createEmbedPlaceholder, createPlaceholder, getDimensions, hasAncestorWithTagName, isBlockElement, isBr, isSafeThumbnailUrl, isSkippable, isWhitespaceText, normalizeEmbedFields, updateEmbedPlaceholder };
|
package/dist/defaults.d.ts
CHANGED
|
@@ -1,15 +1,18 @@
|
|
|
1
|
-
import { DomTransform, EmbedResolver, ResolveUrlFn, StringTransform, UrlUnwrapper } from "./types.js";
|
|
1
|
+
import { BookmarkResolver, DomTransform, EmbedResolver, ResolveUrlFn, StringTransform, UrlUnwrapper } from "./types.js";
|
|
2
2
|
|
|
3
3
|
//#region src/defaults.d.ts
|
|
4
4
|
declare const defaultStringTransforms: Array<StringTransform>;
|
|
5
5
|
declare const defaultDomTransforms: Array<DomTransform>;
|
|
6
|
-
declare const defaultFinalStringTransforms: Array<StringTransform>;
|
|
7
6
|
declare const defaultEmbedResolvers: Array<EmbedResolver>;
|
|
7
|
+
declare const defaultBookmarkResolvers: Array<BookmarkResolver>;
|
|
8
8
|
declare const defaultResolveUrlFn: ResolveUrlFn;
|
|
9
9
|
declare const defaultLazySrcAttributes: string[];
|
|
10
10
|
declare const defaultLazySrcsetAttributes: string[];
|
|
11
11
|
declare const defaultTrackingHosts: string[];
|
|
12
12
|
declare const defaultTrackingPathSegments: string[];
|
|
13
|
+
declare const defaultEmojiImageHosts: string[];
|
|
14
|
+
declare const defaultPreservedPreClasses: string[];
|
|
15
|
+
declare const defaultInertSelectors: string[];
|
|
13
16
|
declare const defaultUrlUnwrappers: Array<UrlUnwrapper>;
|
|
14
17
|
//#endregion
|
|
15
|
-
export { defaultDomTransforms, defaultEmbedResolvers,
|
|
18
|
+
export { defaultBookmarkResolvers, defaultDomTransforms, defaultEmbedResolvers, defaultEmojiImageHosts, defaultInertSelectors, defaultLazySrcAttributes, defaultLazySrcsetAttributes, defaultPreservedPreClasses, defaultResolveUrlFn, defaultStringTransforms, defaultTrackingHosts, defaultTrackingPathSegments, defaultUrlUnwrappers };
|
package/dist/defaults.js
CHANGED
|
@@ -1,6 +1,10 @@
|
|
|
1
|
+
import { ghostBookmarkResolver } from "./bookmarks/ghost.js";
|
|
2
|
+
import { substackBookmarkResolver } from "./bookmarks/substack.js";
|
|
1
3
|
import { youtubeEmbedResolver } from "./embeds/youtube.js";
|
|
4
|
+
import { convertBookmarkCards } from "./transforms/dom/convertBookmarkCards.js";
|
|
2
5
|
import { convertBreaksToParagraphs } from "./transforms/dom/convertBreaksToParagraphs.js";
|
|
3
6
|
import { decodeDoubleEncodedTags } from "./transforms/dom/decodeDoubleEncodedTags.js";
|
|
7
|
+
import { demoteHeadings } from "./transforms/dom/demoteHeadings.js";
|
|
4
8
|
import { fixLazyImages } from "./transforms/dom/fixLazyImages.js";
|
|
5
9
|
import { highlightCode } from "./transforms/dom/highlightCode.js";
|
|
6
10
|
import { injectEnclosures } from "./transforms/dom/injectEnclosures.js";
|
|
@@ -16,14 +20,18 @@ import { stripComments } from "./transforms/dom/stripComments.js";
|
|
|
16
20
|
import { stripDeadAnchors } from "./transforms/dom/stripDeadAnchors.js";
|
|
17
21
|
import { stripDuplicateTitleHeading } from "./transforms/dom/stripDuplicateTitleHeading.js";
|
|
18
22
|
import { stripEmptyTags } from "./transforms/dom/stripEmptyTags.js";
|
|
23
|
+
import { stripInertElements } from "./transforms/dom/stripInertElements.js";
|
|
19
24
|
import { stripInterBlockBreaks } from "./transforms/dom/stripInterBlockBreaks.js";
|
|
20
25
|
import { stripParagraphBoundaryBreaks } from "./transforms/dom/stripParagraphBoundaryBreaks.js";
|
|
21
26
|
import { stripTrackingParams } from "./transforms/dom/stripTrackingParams.js";
|
|
22
27
|
import { trimPreWhitespace } from "./transforms/dom/trimPreWhitespace.js";
|
|
23
28
|
import { unwrapDoublyNestedLists } from "./transforms/dom/unwrapDoublyNestedLists.js";
|
|
29
|
+
import { unwrapEmojiImages } from "./transforms/dom/unwrapEmojiImages.js";
|
|
24
30
|
import { unwrapRedirectUrls } from "./transforms/dom/unwrapRedirectUrls.js";
|
|
25
31
|
import { unwrapWrappers } from "./transforms/dom/unwrapWrappers.js";
|
|
26
32
|
import { paragraphizePlainText } from "./transforms/string/paragraphizePlainText.js";
|
|
33
|
+
import { stripControlChars } from "./transforms/string/stripControlChars.js";
|
|
34
|
+
import { stripOversizedBase64Sources } from "./transforms/string/stripOversizedBase64Sources.js";
|
|
27
35
|
import { unwrapCdataComments } from "./transforms/string/unwrapCdataComments.js";
|
|
28
36
|
import { unwrapBing } from "./unwraps/bing.js";
|
|
29
37
|
import { unwrapFacebookShim } from "./unwraps/facebook.js";
|
|
@@ -39,18 +47,27 @@ import { unwrapYahooSearch } from "./unwraps/yahooSearch.js";
|
|
|
39
47
|
import { unwrapYouTube } from "./unwraps/youtube.js";
|
|
40
48
|
import { resolveUrl } from "feedcanon";
|
|
41
49
|
//#region src/defaults.ts
|
|
42
|
-
const defaultStringTransforms = [
|
|
50
|
+
const defaultStringTransforms = [
|
|
51
|
+
stripControlChars,
|
|
52
|
+
stripOversizedBase64Sources,
|
|
53
|
+
unwrapCdataComments,
|
|
54
|
+
paragraphizePlainText
|
|
55
|
+
];
|
|
43
56
|
const defaultDomTransforms = [
|
|
44
57
|
decodeDoubleEncodedTags,
|
|
45
58
|
stripComments,
|
|
46
59
|
unwrapDoublyNestedLists,
|
|
47
60
|
stripDuplicateTitleHeading,
|
|
61
|
+
demoteHeadings,
|
|
48
62
|
fixLazyImages,
|
|
63
|
+
stripInertElements,
|
|
49
64
|
resolveRelativeUrls,
|
|
50
65
|
unwrapRedirectUrls,
|
|
51
66
|
stripDeadAnchors,
|
|
52
67
|
stripTrackingParams,
|
|
68
|
+
convertBookmarkCards,
|
|
53
69
|
removeTrackingPixels,
|
|
70
|
+
unwrapEmojiImages,
|
|
54
71
|
convertBreaksToParagraphs,
|
|
55
72
|
stripInterBlockBreaks,
|
|
56
73
|
stripParagraphBoundaryBreaks,
|
|
@@ -66,8 +83,8 @@ const defaultDomTransforms = [
|
|
|
66
83
|
unwrapWrappers,
|
|
67
84
|
stripEmptyTags
|
|
68
85
|
];
|
|
69
|
-
const defaultFinalStringTransforms = [];
|
|
70
86
|
const defaultEmbedResolvers = [youtubeEmbedResolver];
|
|
87
|
+
const defaultBookmarkResolvers = [ghostBookmarkResolver, substackBookmarkResolver];
|
|
71
88
|
const defaultResolveUrlFn = (url, baseUrl) => resolveUrl(url, baseUrl);
|
|
72
89
|
const defaultLazySrcAttributes = [
|
|
73
90
|
"data-src",
|
|
@@ -138,6 +155,40 @@ const defaultTrackingPathSegments = [
|
|
|
138
155
|
"count",
|
|
139
156
|
"impression"
|
|
140
157
|
];
|
|
158
|
+
const defaultEmojiImageHosts = [
|
|
159
|
+
"s.w.org/images/core/emoji/",
|
|
160
|
+
"s0.wp.com/wp-content/mu-plugins/wpcom-smileys/",
|
|
161
|
+
"fbcdn.net/images/emoji.php/",
|
|
162
|
+
"abs.twimg.com/emoji/",
|
|
163
|
+
"githubassets.com/images/icons/emoji/"
|
|
164
|
+
];
|
|
165
|
+
const defaultPreservedPreClasses = ["wp-block-verse", "wp-block-preformatted"];
|
|
166
|
+
const defaultInertSelectors = [
|
|
167
|
+
".image-link-expand",
|
|
168
|
+
"[data-component-name=\"SubscribeWidget\"]",
|
|
169
|
+
".subscription-widget-wrap-editor",
|
|
170
|
+
"drupal-render-placeholder",
|
|
171
|
+
".adsbygoogle",
|
|
172
|
+
".embedded-publication-wrap",
|
|
173
|
+
".yarpp-related",
|
|
174
|
+
".sharethis-inline-share-buttons",
|
|
175
|
+
".sharedaddy",
|
|
176
|
+
".wp-block-jetpack-subscriptions",
|
|
177
|
+
".wp-block-post-author",
|
|
178
|
+
".kg-signup-card",
|
|
179
|
+
".mc4wp-form",
|
|
180
|
+
".formkit-form",
|
|
181
|
+
".mcnPreviewText",
|
|
182
|
+
".saboxplugin-wrap",
|
|
183
|
+
".addtoany_share_save_container",
|
|
184
|
+
"iframe[src*=\"embeds.beehiiv.com\"]",
|
|
185
|
+
".jp-relatedposts",
|
|
186
|
+
".adthrive-ad",
|
|
187
|
+
".jetpack_subscription_widget",
|
|
188
|
+
".crp_related",
|
|
189
|
+
"form[action*=\"buttondown.email\"]",
|
|
190
|
+
".sqs-block-newsletter"
|
|
191
|
+
];
|
|
141
192
|
const defaultUrlUnwrappers = [
|
|
142
193
|
unwrapBing,
|
|
143
194
|
unwrapGoogle,
|
|
@@ -153,4 +204,4 @@ const defaultUrlUnwrappers = [
|
|
|
153
204
|
unwrapRedditOut
|
|
154
205
|
];
|
|
155
206
|
//#endregion
|
|
156
|
-
export { defaultDomTransforms, defaultEmbedResolvers,
|
|
207
|
+
export { defaultBookmarkResolvers, defaultDomTransforms, defaultEmbedResolvers, defaultEmojiImageHosts, defaultInertSelectors, defaultLazySrcAttributes, defaultLazySrcsetAttributes, defaultPreservedPreClasses, defaultResolveUrlFn, defaultStringTransforms, defaultTrackingHosts, defaultTrackingPathSegments, defaultUrlUnwrappers };
|
package/dist/index.d.ts
CHANGED
|
@@ -1,9 +1,13 @@
|
|
|
1
|
-
import { AssetProxyFn, AssetType, DomTransform, EmbedResolver, EmbedResolverResult, Enclosure, EnrichEmbedFn, MaybePromise, ResolveUrlFn, StringTransform, TransformContentOptions, TransformContext } from "./types.js";
|
|
1
|
+
import { AssetProxyFn, AssetType, BookmarkResolver, BookmarkResolverResult, DomTransform, EmbedResolver, EmbedResolverResult, Enclosure, EnrichEmbedFn, MaybePromise, ParseHtmlFn, ResolveUrlFn, StringTransform, TransformContentOptions, TransformContext } from "./types.js";
|
|
2
2
|
import { defaultResolveUrlFn } from "./defaults.js";
|
|
3
|
-
import {
|
|
3
|
+
import { ghostBookmarkResolver } from "./bookmarks/ghost.js";
|
|
4
|
+
import { substackBookmarkResolver } from "./bookmarks/substack.js";
|
|
5
|
+
import { applyDomTransforms, applyStringTransforms, createBookmarkPlaceholder, createEmbedPlaceholder, createPlaceholder, isSafeThumbnailUrl, normalizeEmbedFields, updateEmbedPlaceholder } from "./common.js";
|
|
4
6
|
import { composeThumbnailUrl, extractVideoId, youtubeEmbedResolver, youtubeResolveEmbed } from "./embeds/youtube.js";
|
|
7
|
+
import { convertBookmarkCards } from "./transforms/dom/convertBookmarkCards.js";
|
|
5
8
|
import { convertBreaksToParagraphs } from "./transforms/dom/convertBreaksToParagraphs.js";
|
|
6
9
|
import { decodeDoubleEncodedTags } from "./transforms/dom/decodeDoubleEncodedTags.js";
|
|
10
|
+
import { demoteHeadings } from "./transforms/dom/demoteHeadings.js";
|
|
7
11
|
import { enrichEmbedPlaceholders } from "./transforms/dom/enrichEmbedPlaceholders.js";
|
|
8
12
|
import { fixLazyImages } from "./transforms/dom/fixLazyImages.js";
|
|
9
13
|
import { detectLanguage, highlightCode } from "./transforms/dom/highlightCode.js";
|
|
@@ -20,14 +24,18 @@ import { stripComments } from "./transforms/dom/stripComments.js";
|
|
|
20
24
|
import { stripDeadAnchors } from "./transforms/dom/stripDeadAnchors.js";
|
|
21
25
|
import { stripDuplicateTitleHeading } from "./transforms/dom/stripDuplicateTitleHeading.js";
|
|
22
26
|
import { stripEmptyTags } from "./transforms/dom/stripEmptyTags.js";
|
|
27
|
+
import { stripInertElements } from "./transforms/dom/stripInertElements.js";
|
|
23
28
|
import { stripInterBlockBreaks } from "./transforms/dom/stripInterBlockBreaks.js";
|
|
24
29
|
import { stripParagraphBoundaryBreaks } from "./transforms/dom/stripParagraphBoundaryBreaks.js";
|
|
25
30
|
import { stripTrackingParams } from "./transforms/dom/stripTrackingParams.js";
|
|
26
31
|
import { trimPreWhitespace } from "./transforms/dom/trimPreWhitespace.js";
|
|
27
32
|
import { unwrapDoublyNestedLists } from "./transforms/dom/unwrapDoublyNestedLists.js";
|
|
33
|
+
import { unwrapEmojiImages } from "./transforms/dom/unwrapEmojiImages.js";
|
|
28
34
|
import { extractRedirectTarget, unwrapRedirectUrls } from "./transforms/dom/unwrapRedirectUrls.js";
|
|
29
35
|
import { unwrapWrappers } from "./transforms/dom/unwrapWrappers.js";
|
|
30
36
|
import { paragraphizePlainText } from "./transforms/string/paragraphizePlainText.js";
|
|
37
|
+
import { stripControlChars } from "./transforms/string/stripControlChars.js";
|
|
38
|
+
import { stripOversizedBase64Sources } from "./transforms/string/stripOversizedBase64Sources.js";
|
|
31
39
|
import { unwrapCdataComments } from "./transforms/string/unwrapCdataComments.js";
|
|
32
40
|
import { unwrapAceml } from "./unwraps/aceml.js";
|
|
33
41
|
import { unwrapAdjust } from "./unwraps/adjust.js";
|
|
@@ -106,6 +114,6 @@ import { unwrapZhihu } from "./unwraps/zhihu.js";
|
|
|
106
114
|
import { ParamExtractorConfig, chooseBaseUrl, coerceNumber, createParamExtractor } from "./utils.js";
|
|
107
115
|
|
|
108
116
|
//#region src/index.d.ts
|
|
109
|
-
declare const transformContent: (html: string, options
|
|
117
|
+
declare const transformContent: (html: string, options: TransformContentOptions) => Promise<string>;
|
|
110
118
|
//#endregion
|
|
111
|
-
export { type AssetProxyFn, type AssetType, type DomTransform, type EmbedResolver, type EmbedResolverResult, type Enclosure, type EnrichEmbedFn, type MaybePromise, type ParamExtractorConfig, type ResolveUrlFn, type StringTransform, type TransformContentOptions, type TransformContext, applyDomTransforms,
|
|
119
|
+
export { type AssetProxyFn, type AssetType, type BookmarkResolver, type BookmarkResolverResult, type DomTransform, type EmbedResolver, type EmbedResolverResult, type Enclosure, type EnrichEmbedFn, type MaybePromise, type ParamExtractorConfig, type ParseHtmlFn, type ResolveUrlFn, type StringTransform, type TransformContentOptions, type TransformContext, applyDomTransforms, applyStringTransforms, chooseBaseUrl, coerceNumber, composeThumbnailUrl, convertBookmarkCards, convertBreaksToParagraphs, createBookmarkPlaceholder, createEmbedPlaceholder, createParamExtractor, createPlaceholder, decodeDoubleEncodedTags, defaultResolveUrlFn, demoteHeadings, detectLanguage, enrichEmbedPlaceholders, extractRedirectTarget, extractVideoId, fixLazyImages, ghostBookmarkResolver, highlightCode, injectEnclosures, isSafeThumbnailUrl, linkifyUrls, mergeConsecutiveOneLinerPres, mergeFragmentedLists, normalizeEmbedFields, paragraphizePlainText, proxyAssetUrls, removeTrackingPixels, replaceEmbedsWithPlaceholders, replacePreLineBreaks, resolveRelativeUrls, stripComments, stripControlChars, stripDeadAnchors, stripDuplicateTitleHeading, stripEmptyTags, stripInertElements, stripInterBlockBreaks, stripOversizedBase64Sources, stripParagraphBoundaryBreaks, stripTrackingParams, substackBookmarkResolver, transformContent, trimPreWhitespace, unwrapAceml, unwrapAdjust, unwrapAmazonAffiliate, unwrapAmpCache, unwrapAwin, unwrapBing, unwrapCdataComments, unwrapCjNetwork, unwrapDigidip, unwrapDisqus, unwrapDouban, unwrapDoublyNestedLists, unwrapDuckduckgo, unwrapEbayRover, unwrapEffiliation, unwrapEmbedly, unwrapEmojiImages, unwrapFacebookShim, unwrapFeedsportal, unwrapFirebaseDynamicLinks, unwrapFlipboard, unwrapGateSc, unwrapGeoriot, unwrapGitee, unwrapGoogle, unwrapGoogleAmpViewer, unwrapGoogleNews, unwrapGoogleNewsModern, unwrapGoogleScholar, unwrapGoogleTranslate, unwrapHashnode, unwrapIcptrack, unwrapIdealoPartner, unwrapInstagramShim, unwrapJianshuGo, unwrapJuejin, unwrapLeverAnalytics, unwrapLinksynergy, unwrapMailchimp, unwrapMailpanion, unwrapMailpgn, unwrapMailtrack, unwrapMedium, unwrapMimecast, unwrapMozillaOutgoing, unwrapNarrativ, unwrapNicoMs, unwrapOutlookSafelinks, unwrapPartnerAds, unwrapPocket, unwrapPostmark, unwrapProofpointV1, unwrapProofpointV2, unwrapProofpointV3, unwrapPxf, unwrapRecruitics, unwrapRedditOut, unwrapRedirectUrls, unwrapRedirectingat, unwrapSegmentfault, unwrapShareasale, unwrapSjv, unwrapSkimlinks, unwrapSlack, unwrapSmartredirect, unwrapSspai, unwrapSteamLinkfilter, unwrapTelegramIv, unwrapTradedoubler, unwrapTumblr, unwrapValuecommerce, unwrapViglink, unwrapVkAway, unwrapWebArchive, unwrapWrappers, unwrapYahooSearch, unwrapYandexTurbo, unwrapYouTube, unwrapZhihu, updateEmbedPlaceholder, youtubeEmbedResolver, youtubeResolveEmbed };
|
package/dist/index.js
CHANGED
|
@@ -1,8 +1,12 @@
|
|
|
1
1
|
import { chooseBaseUrl, coerceNumber, createParamExtractor } from "./utils.js";
|
|
2
|
-
import { applyDomTransforms,
|
|
2
|
+
import { applyDomTransforms, applyStringTransforms, createBookmarkPlaceholder, createEmbedPlaceholder, createPlaceholder, isSafeThumbnailUrl, normalizeEmbedFields, updateEmbedPlaceholder } from "./common.js";
|
|
3
|
+
import { ghostBookmarkResolver } from "./bookmarks/ghost.js";
|
|
4
|
+
import { substackBookmarkResolver } from "./bookmarks/substack.js";
|
|
3
5
|
import { composeThumbnailUrl, extractVideoId, youtubeEmbedResolver, youtubeResolveEmbed } from "./embeds/youtube.js";
|
|
6
|
+
import { convertBookmarkCards } from "./transforms/dom/convertBookmarkCards.js";
|
|
4
7
|
import { convertBreaksToParagraphs } from "./transforms/dom/convertBreaksToParagraphs.js";
|
|
5
8
|
import { decodeDoubleEncodedTags } from "./transforms/dom/decodeDoubleEncodedTags.js";
|
|
9
|
+
import { demoteHeadings } from "./transforms/dom/demoteHeadings.js";
|
|
6
10
|
import { fixLazyImages } from "./transforms/dom/fixLazyImages.js";
|
|
7
11
|
import { detectLanguage, highlightCode } from "./transforms/dom/highlightCode.js";
|
|
8
12
|
import { injectEnclosures } from "./transforms/dom/injectEnclosures.js";
|
|
@@ -18,14 +22,18 @@ import { stripComments } from "./transforms/dom/stripComments.js";
|
|
|
18
22
|
import { stripDeadAnchors } from "./transforms/dom/stripDeadAnchors.js";
|
|
19
23
|
import { stripDuplicateTitleHeading } from "./transforms/dom/stripDuplicateTitleHeading.js";
|
|
20
24
|
import { stripEmptyTags } from "./transforms/dom/stripEmptyTags.js";
|
|
25
|
+
import { stripInertElements } from "./transforms/dom/stripInertElements.js";
|
|
21
26
|
import { stripInterBlockBreaks } from "./transforms/dom/stripInterBlockBreaks.js";
|
|
22
27
|
import { stripParagraphBoundaryBreaks } from "./transforms/dom/stripParagraphBoundaryBreaks.js";
|
|
23
28
|
import { stripTrackingParams } from "./transforms/dom/stripTrackingParams.js";
|
|
24
29
|
import { trimPreWhitespace } from "./transforms/dom/trimPreWhitespace.js";
|
|
25
30
|
import { unwrapDoublyNestedLists } from "./transforms/dom/unwrapDoublyNestedLists.js";
|
|
31
|
+
import { unwrapEmojiImages } from "./transforms/dom/unwrapEmojiImages.js";
|
|
26
32
|
import { extractRedirectTarget, unwrapRedirectUrls } from "./transforms/dom/unwrapRedirectUrls.js";
|
|
27
33
|
import { unwrapWrappers } from "./transforms/dom/unwrapWrappers.js";
|
|
28
34
|
import { paragraphizePlainText } from "./transforms/string/paragraphizePlainText.js";
|
|
35
|
+
import { stripControlChars } from "./transforms/string/stripControlChars.js";
|
|
36
|
+
import { stripOversizedBase64Sources } from "./transforms/string/stripOversizedBase64Sources.js";
|
|
29
37
|
import { unwrapCdataComments } from "./transforms/string/unwrapCdataComments.js";
|
|
30
38
|
import { unwrapBing } from "./unwraps/bing.js";
|
|
31
39
|
import { unwrapFacebookShim } from "./unwraps/facebook.js";
|
|
@@ -39,7 +47,7 @@ import { unwrapRedditOut } from "./unwraps/redditOut.js";
|
|
|
39
47
|
import { unwrapVkAway } from "./unwraps/vkAway.js";
|
|
40
48
|
import { unwrapYahooSearch } from "./unwraps/yahooSearch.js";
|
|
41
49
|
import { unwrapYouTube } from "./unwraps/youtube.js";
|
|
42
|
-
import { defaultDomTransforms, defaultEmbedResolvers,
|
|
50
|
+
import { defaultBookmarkResolvers, defaultDomTransforms, defaultEmbedResolvers, defaultEmojiImageHosts, defaultInertSelectors, defaultLazySrcAttributes, defaultLazySrcsetAttributes, defaultPreservedPreClasses, defaultResolveUrlFn, defaultStringTransforms, defaultTrackingHosts, defaultTrackingPathSegments, defaultUrlUnwrappers } from "./defaults.js";
|
|
43
51
|
import { enrichEmbedPlaceholders } from "./transforms/dom/enrichEmbedPlaceholders.js";
|
|
44
52
|
import { unwrapAceml } from "./unwraps/aceml.js";
|
|
45
53
|
import { unwrapAdjust } from "./unwraps/adjust.js";
|
|
@@ -104,15 +112,19 @@ import { unwrapWebArchive } from "./unwraps/webArchive.js";
|
|
|
104
112
|
import { unwrapYandexTurbo } from "./unwraps/yandexTurbo.js";
|
|
105
113
|
import { unwrapZhihu } from "./unwraps/zhihu.js";
|
|
106
114
|
//#region src/index.ts
|
|
107
|
-
const transformContent = async (html, options
|
|
115
|
+
const transformContent = async (html, options) => {
|
|
108
116
|
const context = {
|
|
109
117
|
baseUrl: options.baseUrl,
|
|
110
118
|
enclosures: options.enclosures,
|
|
111
119
|
embedResolvers: options.embedResolvers ?? defaultEmbedResolvers,
|
|
120
|
+
bookmarkResolvers: options.bookmarkResolvers ?? defaultBookmarkResolvers,
|
|
112
121
|
lazySrcAttributes: options.lazySrcAttributes ?? defaultLazySrcAttributes,
|
|
113
122
|
lazySrcsetAttributes: options.lazySrcsetAttributes ?? defaultLazySrcsetAttributes,
|
|
114
123
|
trackingHosts: options.trackingHosts ?? defaultTrackingHosts,
|
|
115
124
|
trackingPathSegments: options.trackingPathSegments ?? defaultTrackingPathSegments,
|
|
125
|
+
emojiImageHosts: options.emojiImageHosts ?? defaultEmojiImageHosts,
|
|
126
|
+
inertSelectors: options.inertSelectors ?? defaultInertSelectors,
|
|
127
|
+
preservedPreClasses: options.preservedPreClasses ?? defaultPreservedPreClasses,
|
|
116
128
|
urlUnwrappers: options.urlUnwrappers ?? defaultUrlUnwrappers,
|
|
117
129
|
resolveUrlFn: options.resolveUrlFn ?? defaultResolveUrlFn,
|
|
118
130
|
assetProxyFn: options.assetProxyFn,
|
|
@@ -121,8 +133,8 @@ const transformContent = async (html, options = {}) => {
|
|
|
121
133
|
};
|
|
122
134
|
const stringFns = options.stringTransforms ?? defaultStringTransforms;
|
|
123
135
|
const domFns = options.domTransforms ?? defaultDomTransforms;
|
|
124
|
-
const
|
|
125
|
-
return await
|
|
136
|
+
const afterString = await applyStringTransforms(html, stringFns.map((transform) => transform(context)));
|
|
137
|
+
return await applyDomTransforms(await options.parseHtmlFn(afterString), domFns.map((transform) => transform(context)));
|
|
126
138
|
};
|
|
127
139
|
//#endregion
|
|
128
|
-
export { applyDomTransforms,
|
|
140
|
+
export { applyDomTransforms, applyStringTransforms, chooseBaseUrl, coerceNumber, composeThumbnailUrl, convertBookmarkCards, convertBreaksToParagraphs, createBookmarkPlaceholder, createEmbedPlaceholder, createParamExtractor, createPlaceholder, decodeDoubleEncodedTags, defaultResolveUrlFn, demoteHeadings, detectLanguage, enrichEmbedPlaceholders, extractRedirectTarget, extractVideoId, fixLazyImages, ghostBookmarkResolver, highlightCode, injectEnclosures, isSafeThumbnailUrl, linkifyUrls, mergeConsecutiveOneLinerPres, mergeFragmentedLists, normalizeEmbedFields, paragraphizePlainText, proxyAssetUrls, removeTrackingPixels, replaceEmbedsWithPlaceholders, replacePreLineBreaks, resolveRelativeUrls, stripComments, stripControlChars, stripDeadAnchors, stripDuplicateTitleHeading, stripEmptyTags, stripInertElements, stripInterBlockBreaks, stripOversizedBase64Sources, stripParagraphBoundaryBreaks, stripTrackingParams, substackBookmarkResolver, transformContent, trimPreWhitespace, unwrapAceml, unwrapAdjust, unwrapAmazonAffiliate, unwrapAmpCache, unwrapAwin, unwrapBing, unwrapCdataComments, unwrapCjNetwork, unwrapDigidip, unwrapDisqus, unwrapDouban, unwrapDoublyNestedLists, unwrapDuckduckgo, unwrapEbayRover, unwrapEffiliation, unwrapEmbedly, unwrapEmojiImages, unwrapFacebookShim, unwrapFeedsportal, unwrapFirebaseDynamicLinks, unwrapFlipboard, unwrapGateSc, unwrapGeoriot, unwrapGitee, unwrapGoogle, unwrapGoogleAmpViewer, unwrapGoogleNews, unwrapGoogleNewsModern, unwrapGoogleScholar, unwrapGoogleTranslate, unwrapHashnode, unwrapIcptrack, unwrapIdealoPartner, unwrapInstagramShim, unwrapJianshuGo, unwrapJuejin, unwrapLeverAnalytics, unwrapLinksynergy, unwrapMailchimp, unwrapMailpanion, unwrapMailpgn, unwrapMailtrack, unwrapMedium, unwrapMimecast, unwrapMozillaOutgoing, unwrapNarrativ, unwrapNicoMs, unwrapOutlookSafelinks, unwrapPartnerAds, unwrapPocket, unwrapPostmark, unwrapProofpointV1, unwrapProofpointV2, unwrapProofpointV3, unwrapPxf, unwrapRecruitics, unwrapRedditOut, unwrapRedirectUrls, unwrapRedirectingat, unwrapSegmentfault, unwrapShareasale, unwrapSjv, unwrapSkimlinks, unwrapSlack, unwrapSmartredirect, unwrapSspai, unwrapSteamLinkfilter, unwrapTelegramIv, unwrapTradedoubler, unwrapTumblr, unwrapValuecommerce, unwrapViglink, unwrapVkAway, unwrapWebArchive, unwrapWrappers, unwrapYahooSearch, unwrapYandexTurbo, unwrapYouTube, unwrapZhihu, updateEmbedPlaceholder, youtubeEmbedResolver, youtubeResolveEmbed };
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
import { parseHTML } from "linkedom";
|
|
2
|
+
//#region src/parsers/linkedom.ts
|
|
3
|
+
const normalizeAttributeCase = (document) => {
|
|
4
|
+
for (const element of document.querySelectorAll("*")) {
|
|
5
|
+
const original = Array.from(element.attributes).map((attribute) => ({
|
|
6
|
+
name: attribute.name,
|
|
7
|
+
value: attribute.value
|
|
8
|
+
}));
|
|
9
|
+
const final = /* @__PURE__ */ new Map();
|
|
10
|
+
let needsRewrite = false;
|
|
11
|
+
for (const { name, value } of original) {
|
|
12
|
+
const lower = name.toLowerCase();
|
|
13
|
+
if (lower !== name) needsRewrite = true;
|
|
14
|
+
if (final.has(lower)) {
|
|
15
|
+
needsRewrite = true;
|
|
16
|
+
continue;
|
|
17
|
+
}
|
|
18
|
+
final.set(lower, value);
|
|
19
|
+
}
|
|
20
|
+
if (!needsRewrite) continue;
|
|
21
|
+
for (const { name } of original) element.removeAttribute(name);
|
|
22
|
+
for (const [name, value] of final) element.setAttribute(name, value);
|
|
23
|
+
}
|
|
24
|
+
};
|
|
25
|
+
const svgRegionRegex = /<svg\b[^>]*>[\s\S]*?<\/svg>/gi;
|
|
26
|
+
const svgSelfCloseRegex = /<([a-z][a-z0-9-]*)((?:\s[^>]*)?)\s*\/>/gi;
|
|
27
|
+
const expandSvgSelfClose = (html) => {
|
|
28
|
+
return html.replace(svgRegionRegex, (svgBlock) => {
|
|
29
|
+
return svgBlock.replace(svgSelfCloseRegex, "<$1$2></$1>");
|
|
30
|
+
});
|
|
31
|
+
};
|
|
32
|
+
const parseHtml = (html) => {
|
|
33
|
+
const { document } = parseHTML(`<!doctype html><html><head></head><body>${expandSvgSelfClose(html)}</body></html>`);
|
|
34
|
+
normalizeAttributeCase(document);
|
|
35
|
+
return document;
|
|
36
|
+
};
|
|
37
|
+
//#endregion
|
|
38
|
+
export { parseHtml };
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
import { createBookmarkPlaceholder } from "../../common.js";
|
|
2
|
+
//#region src/transforms/dom/convertBookmarkCards.ts
|
|
3
|
+
const convertBookmarkCards = (context) => {
|
|
4
|
+
const { bookmarkResolvers } = context;
|
|
5
|
+
return async (document) => {
|
|
6
|
+
for (const resolver of bookmarkResolvers) for (const element of document.querySelectorAll(resolver.selector)) {
|
|
7
|
+
const result = await resolver.extract(element);
|
|
8
|
+
if (!result) continue;
|
|
9
|
+
element.replaceWith(createBookmarkPlaceholder(document, result));
|
|
10
|
+
}
|
|
11
|
+
};
|
|
12
|
+
};
|
|
13
|
+
//#endregion
|
|
14
|
+
export { convertBookmarkCards };
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
//#region src/transforms/dom/demoteHeadings.ts
|
|
2
|
+
const headingSelector = "h1, h2, h3, h4, h5";
|
|
3
|
+
const demoteHeadings = () => {
|
|
4
|
+
return (document) => {
|
|
5
|
+
if (!document.querySelector("h1")) return;
|
|
6
|
+
const headings = document.querySelectorAll(headingSelector);
|
|
7
|
+
for (const heading of headings) {
|
|
8
|
+
const nextTagName = `h${Number(heading.tagName.slice(1)) + 1}`;
|
|
9
|
+
const replacement = document.createElement(nextTagName);
|
|
10
|
+
for (const name of heading.getAttributeNames().reverse()) {
|
|
11
|
+
const value = heading.getAttribute(name);
|
|
12
|
+
if (value !== null) replacement.setAttribute(name, value);
|
|
13
|
+
}
|
|
14
|
+
while (heading.firstChild) replacement.appendChild(heading.firstChild);
|
|
15
|
+
heading.replaceWith(replacement);
|
|
16
|
+
}
|
|
17
|
+
};
|
|
18
|
+
};
|
|
19
|
+
//#endregion
|
|
20
|
+
export { demoteHeadings };
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import {
|
|
1
|
+
import { updateEmbedPlaceholder } from "../../common.js";
|
|
2
2
|
//#region src/transforms/dom/enrichEmbedPlaceholders.ts
|
|
3
3
|
const enrichEmbedPlaceholders = (context) => {
|
|
4
4
|
const enrichEmbedFn = context.enrichEmbedFn;
|
|
@@ -24,7 +24,7 @@ const enrichEmbedPlaceholders = (context) => {
|
|
|
24
24
|
for (let i = 0; i < count; i++) {
|
|
25
25
|
const embed = embeds[i];
|
|
26
26
|
const data = enriched.get(`${embed.provider}:${embed.id}`);
|
|
27
|
-
if (data)
|
|
27
|
+
if (data) updateEmbedPlaceholder(placeholders[i], data);
|
|
28
28
|
}
|
|
29
29
|
};
|
|
30
30
|
};
|
|
@@ -1,4 +1,3 @@
|
|
|
1
|
-
import { normalizeAttributeCase } from "../../common.js";
|
|
2
1
|
//#region src/transforms/dom/fixLazyImages.ts
|
|
3
2
|
const imgPattern = /<img\s/i;
|
|
4
3
|
const urlShapeRegex = /[:/.]/;
|
|
@@ -45,7 +44,6 @@ const fixLazyImages = (context) => {
|
|
|
45
44
|
}
|
|
46
45
|
}
|
|
47
46
|
const noscripts = document.querySelectorAll("noscript");
|
|
48
|
-
let replacedNoscript = false;
|
|
49
47
|
for (const noscript of noscripts) {
|
|
50
48
|
const sibling = noscript.previousElementSibling;
|
|
51
49
|
if (sibling?.localName !== "img") continue;
|
|
@@ -53,9 +51,7 @@ const fixLazyImages = (context) => {
|
|
|
53
51
|
if (!imgPattern.test(inner)) continue;
|
|
54
52
|
sibling.remove();
|
|
55
53
|
noscript.outerHTML = inner;
|
|
56
|
-
replacedNoscript = true;
|
|
57
54
|
}
|
|
58
|
-
if (replacedNoscript) normalizeAttributeCase(document);
|
|
59
55
|
};
|
|
60
56
|
};
|
|
61
57
|
//#endregion
|
|
@@ -1,7 +1,15 @@
|
|
|
1
1
|
//#region src/transforms/dom/mergeConsecutiveOneLinerPres.ts
|
|
2
2
|
const trailingBrRegex = /<br\s*\/?>\s*$/i;
|
|
3
3
|
const surroundingNewlinesRegex = /^\n+|\n+$/g;
|
|
4
|
-
const
|
|
4
|
+
const classTokenSeparator = /\s+/;
|
|
5
|
+
const mergeConsecutiveOneLinerPres = ({ preservedPreClasses }) => {
|
|
6
|
+
const preservedSet = new Set(preservedPreClasses);
|
|
7
|
+
const isPreserved = (element) => {
|
|
8
|
+
const classAttribute = element.getAttribute("class");
|
|
9
|
+
if (!classAttribute) return false;
|
|
10
|
+
for (const token of classAttribute.split(classTokenSeparator)) if (preservedSet.has(token)) return true;
|
|
11
|
+
return false;
|
|
12
|
+
};
|
|
5
13
|
return (document) => {
|
|
6
14
|
const pres = document.querySelectorAll("pre");
|
|
7
15
|
for (const pre of pres) {
|
|
@@ -20,6 +28,7 @@ const mergeConsecutiveOneLinerPres = () => {
|
|
|
20
28
|
sibling = sibling.nextSibling;
|
|
21
29
|
}
|
|
22
30
|
if (run.length < 2) continue;
|
|
31
|
+
if (run.some(isPreserved)) continue;
|
|
23
32
|
const isSingleLine = (element) => {
|
|
24
33
|
return !element.innerHTML.replace(surroundingNewlinesRegex, "").includes("\n");
|
|
25
34
|
};
|
|
@@ -1,5 +1,17 @@
|
|
|
1
1
|
import { parseSrcset, stringifySrcset } from "srcset";
|
|
2
2
|
//#region src/transforms/dom/proxyAssetUrls.ts
|
|
3
|
+
const proxyableSelectors = [
|
|
4
|
+
"img",
|
|
5
|
+
"video",
|
|
6
|
+
"audio",
|
|
7
|
+
"source",
|
|
8
|
+
"track",
|
|
9
|
+
"image",
|
|
10
|
+
"[data-embed-thumbnail]",
|
|
11
|
+
"[data-embed-avatar]",
|
|
12
|
+
"[data-bookmark-icon]",
|
|
13
|
+
"[data-bookmark-thumbnail]"
|
|
14
|
+
];
|
|
3
15
|
const sourceTypeFromParent = (element) => {
|
|
4
16
|
const parent = element.parentElement?.localName;
|
|
5
17
|
if (parent === "video") return "video";
|
|
@@ -30,7 +42,7 @@ const proxySrcset = (element, type, assetProxyFn) => {
|
|
|
30
42
|
const proxyAssetUrls = ({ assetProxyFn }) => {
|
|
31
43
|
if (!assetProxyFn) return () => {};
|
|
32
44
|
return (document) => {
|
|
33
|
-
const elements = document.querySelectorAll("
|
|
45
|
+
const elements = document.querySelectorAll(proxyableSelectors.join(", "));
|
|
34
46
|
for (const element of elements) {
|
|
35
47
|
switch (element.localName) {
|
|
36
48
|
case "img":
|
|
@@ -57,6 +69,8 @@ const proxyAssetUrls = ({ assetProxyFn }) => {
|
|
|
57
69
|
}
|
|
58
70
|
if (element.hasAttribute("data-embed-thumbnail")) proxyAttribute(element, "data-embed-thumbnail", "image", assetProxyFn);
|
|
59
71
|
if (element.hasAttribute("data-embed-avatar")) proxyAttribute(element, "data-embed-avatar", "image", assetProxyFn);
|
|
72
|
+
if (element.hasAttribute("data-bookmark-icon")) proxyAttribute(element, "data-bookmark-icon", "image", assetProxyFn);
|
|
73
|
+
if (element.hasAttribute("data-bookmark-thumbnail")) proxyAttribute(element, "data-bookmark-thumbnail", "image", assetProxyFn);
|
|
60
74
|
}
|
|
61
75
|
};
|
|
62
76
|
};
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
//#region src/transforms/dom/stripInertElements.ts
|
|
2
|
+
const stripInertElements = ({ inertSelectors }) => {
|
|
3
|
+
const selector = inertSelectors.join(",");
|
|
4
|
+
return (document) => {
|
|
5
|
+
if (!selector) return;
|
|
6
|
+
const elements = document.querySelectorAll(selector);
|
|
7
|
+
for (const element of elements) element.remove();
|
|
8
|
+
};
|
|
9
|
+
};
|
|
10
|
+
//#endregion
|
|
11
|
+
export { stripInertElements };
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
//#region src/transforms/dom/unwrapEmojiImages.ts
|
|
2
|
+
const nonAsciiRegex = /[-]/;
|
|
3
|
+
const asciiLetterRegex = /[a-zA-Z]/;
|
|
4
|
+
const isEmojiShapedAlt = (alt) => {
|
|
5
|
+
return nonAsciiRegex.test(alt) && !asciiLetterRegex.test(alt);
|
|
6
|
+
};
|
|
7
|
+
const unwrapEmojiImages = (context) => {
|
|
8
|
+
const selector = [
|
|
9
|
+
"img.wp-smiley[alt]",
|
|
10
|
+
"img.emoji[alt]",
|
|
11
|
+
...context.emojiImageHosts.map((host) => `img[alt][src*="${host}"]`)
|
|
12
|
+
].join(", ");
|
|
13
|
+
return (document) => {
|
|
14
|
+
for (const image of document.querySelectorAll(selector)) {
|
|
15
|
+
const alt = image.getAttribute("alt");
|
|
16
|
+
if (alt && isEmojiShapedAlt(alt)) image.replaceWith(document.createTextNode(alt));
|
|
17
|
+
}
|
|
18
|
+
};
|
|
19
|
+
};
|
|
20
|
+
//#endregion
|
|
21
|
+
export { unwrapEmojiImages };
|
|
@@ -7,9 +7,13 @@ const wrapperTags = new Set([
|
|
|
7
7
|
"header",
|
|
8
8
|
"footer"
|
|
9
9
|
]);
|
|
10
|
-
const
|
|
10
|
+
const preservedPrefixes = ["data-embed", "data-bookmark"];
|
|
11
|
+
const hasPreservedAttribute = (element) => {
|
|
11
12
|
const attributes = element.attributes;
|
|
12
|
-
for (let i = 0, n = attributes.length; i < n; i++)
|
|
13
|
+
for (let i = 0, n = attributes.length; i < n; i++) {
|
|
14
|
+
const name = attributes[i].name;
|
|
15
|
+
for (const prefix of preservedPrefixes) if (name.startsWith(prefix)) return true;
|
|
16
|
+
}
|
|
13
17
|
return false;
|
|
14
18
|
};
|
|
15
19
|
const unwrapWrappers = () => {
|
|
@@ -20,7 +24,7 @@ const unwrapWrappers = () => {
|
|
|
20
24
|
if (!wrapperTags.has(element.localName)) continue;
|
|
21
25
|
const parent = element.parentNode;
|
|
22
26
|
if (!parent) continue;
|
|
23
|
-
if (
|
|
27
|
+
if (hasPreservedAttribute(element)) continue;
|
|
24
28
|
while (element.firstChild) parent.insertBefore(element.firstChild, element);
|
|
25
29
|
element.remove();
|
|
26
30
|
}
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
//#region src/transforms/string/stripControlChars.ts
|
|
2
|
+
const ranges = [
|
|
3
|
+
"\\x00-\\x08",
|
|
4
|
+
"\\x0B\\x0C",
|
|
5
|
+
"\\x0E-\\x1F",
|
|
6
|
+
"\\x7F-\\x9F",
|
|
7
|
+
"\\uFDD0-\\uFDEF",
|
|
8
|
+
"\\uFFFE\\uFFFF",
|
|
9
|
+
...Array.from({ length: 16 }, (_, index) => {
|
|
10
|
+
const plane = (index + 1).toString(16).toUpperCase();
|
|
11
|
+
return `\\u{${plane}FFFE}\\u{${plane}FFFF}`;
|
|
12
|
+
})
|
|
13
|
+
];
|
|
14
|
+
const controlCharRegex = new RegExp(`[${ranges.join("")}]`, "gu");
|
|
15
|
+
const stripControlChars = () => {
|
|
16
|
+
return (html) => {
|
|
17
|
+
return html.replace(controlCharRegex, "");
|
|
18
|
+
};
|
|
19
|
+
};
|
|
20
|
+
//#endregion
|
|
21
|
+
export { stripControlChars };
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
//#region src/transforms/string/stripOversizedBase64Sources.ts
|
|
2
|
+
const base64SrcRegex = /((?:src|srcset|poster)=["'])data:[^"']*;base64,[^"']*(["'])/g;
|
|
3
|
+
const maxBase64Size = 50 * 1024;
|
|
4
|
+
const stripOversizedBase64Sources = () => {
|
|
5
|
+
return (html) => {
|
|
6
|
+
return html.replace(base64SrcRegex, (match, prefix, suffix) => {
|
|
7
|
+
if (match.length < maxBase64Size) return match;
|
|
8
|
+
return `${prefix}${suffix}`;
|
|
9
|
+
});
|
|
10
|
+
};
|
|
11
|
+
};
|
|
12
|
+
//#endregion
|
|
13
|
+
export { stripOversizedBase64Sources };
|
package/dist/types.d.ts
CHANGED
|
@@ -41,6 +41,20 @@ type EmbedResolver = {
|
|
|
41
41
|
selector: string;
|
|
42
42
|
extract: (element: Element) => MaybePromise<EmbedResolverResult | undefined>;
|
|
43
43
|
};
|
|
44
|
+
type BookmarkResolverResult = {
|
|
45
|
+
provider: string;
|
|
46
|
+
url: string;
|
|
47
|
+
title: string;
|
|
48
|
+
description?: string;
|
|
49
|
+
author?: string;
|
|
50
|
+
publisher?: string;
|
|
51
|
+
icon?: string;
|
|
52
|
+
thumbnail?: string;
|
|
53
|
+
};
|
|
54
|
+
type BookmarkResolver = {
|
|
55
|
+
selector: string;
|
|
56
|
+
extract: (element: Element) => MaybePromise<BookmarkResolverResult | undefined>;
|
|
57
|
+
};
|
|
44
58
|
type UrlUnwrapper = (url: URL) => string | undefined;
|
|
45
59
|
type AssetType = 'image' | 'video' | 'audio';
|
|
46
60
|
type AssetProxyFn = (url: string, type: AssetType) => string | undefined;
|
|
@@ -48,10 +62,14 @@ type TransformContext = {
|
|
|
48
62
|
baseUrl?: string;
|
|
49
63
|
enclosures?: Array<Enclosure>;
|
|
50
64
|
embedResolvers: Array<EmbedResolver>;
|
|
65
|
+
bookmarkResolvers: Array<BookmarkResolver>;
|
|
51
66
|
lazySrcAttributes: Array<string>;
|
|
52
67
|
lazySrcsetAttributes: Array<string>;
|
|
53
68
|
trackingHosts: Array<string>;
|
|
54
69
|
trackingPathSegments: Array<string>;
|
|
70
|
+
emojiImageHosts: Array<string>;
|
|
71
|
+
inertSelectors: Array<string>;
|
|
72
|
+
preservedPreClasses: Array<string>;
|
|
55
73
|
urlUnwrappers: Array<UrlUnwrapper>;
|
|
56
74
|
resolveUrlFn: ResolveUrlFn;
|
|
57
75
|
assetProxyFn?: AssetProxyFn;
|
|
@@ -60,14 +78,20 @@ type TransformContext = {
|
|
|
60
78
|
};
|
|
61
79
|
type DomTransform = (context: TransformContext) => (document: Document) => MaybePromise<void>;
|
|
62
80
|
type StringTransform = (context: TransformContext) => (html: string) => MaybePromise<string>;
|
|
81
|
+
type ParseHtmlFn = (html: string) => MaybePromise<Document>;
|
|
63
82
|
type TransformContentOptions = {
|
|
83
|
+
parseHtmlFn: ParseHtmlFn;
|
|
64
84
|
baseUrl?: string;
|
|
65
85
|
enclosures?: Array<Enclosure>;
|
|
66
86
|
embedResolvers?: Array<EmbedResolver>;
|
|
87
|
+
bookmarkResolvers?: Array<BookmarkResolver>;
|
|
67
88
|
lazySrcAttributes?: Array<string>;
|
|
68
89
|
lazySrcsetAttributes?: Array<string>;
|
|
69
90
|
trackingHosts?: Array<string>;
|
|
70
91
|
trackingPathSegments?: Array<string>;
|
|
92
|
+
emojiImageHosts?: Array<string>;
|
|
93
|
+
inertSelectors?: Array<string>;
|
|
94
|
+
preservedPreClasses?: Array<string>;
|
|
71
95
|
urlUnwrappers?: Array<UrlUnwrapper>;
|
|
72
96
|
resolveUrlFn?: ResolveUrlFn;
|
|
73
97
|
assetProxyFn?: AssetProxyFn;
|
|
@@ -75,7 +99,6 @@ type TransformContentOptions = {
|
|
|
75
99
|
articleTitle?: string;
|
|
76
100
|
stringTransforms?: Array<StringTransform>;
|
|
77
101
|
domTransforms?: Array<DomTransform>;
|
|
78
|
-
finalStringTransforms?: Array<StringTransform>;
|
|
79
102
|
};
|
|
80
103
|
//#endregion
|
|
81
|
-
export { AssetProxyFn, AssetType, DomTransform, EmbedResolver, EmbedResolverResult, Enclosure, EnrichEmbedFn, MaybePromise, ResolveUrlFn, StringTransform, TransformContentOptions, TransformContext, UrlUnwrapper };
|
|
104
|
+
export { AssetProxyFn, AssetType, BookmarkResolver, BookmarkResolverResult, DomTransform, EmbedResolver, EmbedResolverResult, Enclosure, EnrichEmbedFn, MaybePromise, ParseHtmlFn, ResolveUrlFn, StringTransform, TransformContentOptions, TransformContext, UrlUnwrapper };
|
package/package.json
CHANGED
|
@@ -29,6 +29,10 @@
|
|
|
29
29
|
"./defaults": {
|
|
30
30
|
"types": "./dist/defaults.d.ts",
|
|
31
31
|
"default": "./dist/defaults.js"
|
|
32
|
+
},
|
|
33
|
+
"./linkedom": {
|
|
34
|
+
"types": "./dist/parsers/linkedom.d.ts",
|
|
35
|
+
"default": "./dist/parsers/linkedom.js"
|
|
32
36
|
}
|
|
33
37
|
},
|
|
34
38
|
"files": [
|
|
@@ -36,23 +40,29 @@
|
|
|
36
40
|
],
|
|
37
41
|
"scripts": {
|
|
38
42
|
"prepare": "lefthook install",
|
|
39
|
-
"build": "tsdown src/index.ts src/defaults.ts --format esm --dts --clean --unbundle --no-fixed-extension"
|
|
43
|
+
"build": "tsdown src/index.ts src/defaults.ts src/parsers/linkedom.ts --format esm --dts --clean --unbundle --no-fixed-extension"
|
|
40
44
|
},
|
|
41
45
|
"dependencies": {
|
|
42
46
|
"@wordpress/autop": "^4.46.0",
|
|
43
47
|
"highlight.js": "^11.11.1",
|
|
44
|
-
"linkedom": "^0.18.12",
|
|
45
48
|
"linkifyjs": "^4.3.2",
|
|
46
49
|
"srcset": "^5.0.3"
|
|
47
50
|
},
|
|
48
51
|
"peerDependencies": {
|
|
49
|
-
"feedcanon": "^2.0.0-next.
|
|
50
|
-
"feedscout": "^2.0.0-next.2"
|
|
52
|
+
"feedcanon": "^2.0.0-next.4",
|
|
53
|
+
"feedscout": "^2.0.0-next.2",
|
|
54
|
+
"linkedom": "^0.18.12"
|
|
55
|
+
},
|
|
56
|
+
"peerDependenciesMeta": {
|
|
57
|
+
"linkedom": {
|
|
58
|
+
"optional": true
|
|
59
|
+
}
|
|
51
60
|
},
|
|
52
61
|
"devDependencies": {
|
|
53
62
|
"@types/bun": "^1.3.13",
|
|
54
63
|
"kvalita": "^1.13.0",
|
|
64
|
+
"linkedom": "^0.18.12",
|
|
55
65
|
"tsdown": "^0.22.0"
|
|
56
66
|
},
|
|
57
|
-
"version": "
|
|
67
|
+
"version": "2.0.0"
|
|
58
68
|
}
|