feedsweep 1.1.0 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (32) hide show
  1. package/README.md +51 -2
  2. package/dist/bookmarks/ghost.d.ts +6 -0
  3. package/dist/bookmarks/ghost.js +21 -0
  4. package/dist/bookmarks/substack.d.ts +6 -0
  5. package/dist/bookmarks/substack.js +26 -0
  6. package/dist/common.d.ts +8 -10
  7. package/dist/common.js +50 -73
  8. package/dist/defaults.d.ts +6 -3
  9. package/dist/defaults.js +54 -3
  10. package/dist/index.d.ts +12 -4
  11. package/dist/index.js +18 -6
  12. package/dist/parsers/linkedom.d.ts +4 -0
  13. package/dist/parsers/linkedom.js +38 -0
  14. package/dist/transforms/dom/convertBookmarkCards.d.ts +6 -0
  15. package/dist/transforms/dom/convertBookmarkCards.js +14 -0
  16. package/dist/transforms/dom/demoteHeadings.d.ts +6 -0
  17. package/dist/transforms/dom/demoteHeadings.js +20 -0
  18. package/dist/transforms/dom/enrichEmbedPlaceholders.js +2 -2
  19. package/dist/transforms/dom/fixLazyImages.js +0 -4
  20. package/dist/transforms/dom/mergeConsecutiveOneLinerPres.js +10 -1
  21. package/dist/transforms/dom/proxyAssetUrls.js +15 -1
  22. package/dist/transforms/dom/stripInertElements.d.ts +6 -0
  23. package/dist/transforms/dom/stripInertElements.js +11 -0
  24. package/dist/transforms/dom/unwrapEmojiImages.d.ts +6 -0
  25. package/dist/transforms/dom/unwrapEmojiImages.js +21 -0
  26. package/dist/transforms/dom/unwrapWrappers.js +7 -3
  27. package/dist/transforms/string/stripControlChars.d.ts +6 -0
  28. package/dist/transforms/string/stripControlChars.js +21 -0
  29. package/dist/transforms/string/stripOversizedBase64Sources.d.ts +6 -0
  30. package/dist/transforms/string/stripOversizedBase64Sources.js +13 -0
  31. package/dist/types.d.ts +25 -2
  32. package/package.json +15 -5
package/README.md CHANGED
@@ -11,15 +11,19 @@ Feedsweep takes raw feed item HTML and runs it through a pipeline that genuinely
11
11
  ## Installation
12
12
 
13
13
  ```bash
14
- npm install feedsweep
14
+ npm install feedsweep linkedom
15
15
  ```
16
16
 
17
+ `linkedom` is an optional peer dependency. You only need it if you use the bundled `parseHtml` helper — see [DOM library](#dom-library) for jsdom / happy-dom / browser-native alternatives.
18
+
17
19
  ## Quick Start
18
20
 
19
21
  ```typescript
20
22
  import { transformContent } from 'feedsweep'
23
+ import { parseHtml } from 'feedsweep/linkedom'
21
24
 
22
25
  const result = await transformContent('<p>Check <img data-src="photo.jpg"> and visit /about</p>', {
26
+ parseHtmlFn: parseHtml,
23
27
  baseUrl: 'https://example.com/post/1',
24
28
  })
25
29
  ```
@@ -37,13 +41,17 @@ Inventory of every transform exported from the package. Most are enabled by defa
37
41
  | `stripInterBlockBreaks` | Remove `<br>` tags between block elements |
38
42
  | `stripParagraphBoundaryBreaks` | Remove `<br>` tags adjacent to paragraph boundaries |
39
43
  | `stripDuplicateTitleHeading` | Remove first `<h1>`–`<h6>` matching article title |
44
+ | `demoteHeadings` | Shift every heading down by one level (`<h1>`→`<h2>`, …, `<h5>`→`<h6>`) when the body contains an `<h1>`, so it sits below the reader's own page title |
40
45
  | `unwrapRedirectUrls` | Remove Google/Bing/Facebook/etc. redirect wrappers |
41
46
  | `stripDeadAnchors` | Unwrap `<a>` with empty, `#`, or `javascript:` href |
47
+ | `stripInertElements` | Remove platform chrome and dead placeholders — subscribe widgets, share buttons, related-posts widgets, ad slots (AdSense / AdThrive), author bio blocks, email preheaders, Substack image controls, and Drupal `<drupal-render-placeholder>` tags. Pass `inertSelectors` to extend or replace |
42
48
  | `removeTrackingPixels` | Strip 1×1 tracking pixel images |
49
+ | `unwrapEmojiImages` | Replace WordPress/Facebook/Twitter/GitHub emoji `<img>` tags with their alt-text glyph |
43
50
  | `stripTrackingParams` | Remove UTM and other tracking parameters |
44
51
  | `convertBreaksToParagraphs` | Convert `<br><br>` runs into semantic `<p>` blocks |
45
52
  | `injectEnclosures` | Inject feed enclosures into content as native `<audio>`/`<video>` or iframe placeholders |
46
53
  | `replaceEmbedsWithPlaceholders` | Convert `<iframe>` to embed placeholders |
54
+ | `convertBookmarkCards` | Convert link-preview cards into `data-bookmark-*` placeholders via a registry of per-provider `BookmarkResolver`s (`defaultBookmarkResolvers`: Ghost `kg-bookmark-card`, Substack `embedded-publication-wrap`). Extend via `bookmarkResolvers` |
47
55
  | `enrichEmbedPlaceholders` | Populate placeholder metadata (`title`, `description`, `duration`, etc.) via a caller-supplied async fn. Opt-in; not in defaults |
48
56
  | `proxyAssetUrls` | Rewrite image, video, and audio URLs through a caller-supplied proxy |
49
57
  | `resolveRelativeUrls` | Convert relative URLs to absolute using base URL |
@@ -51,19 +59,24 @@ Inventory of every transform exported from the package. Most are enabled by defa
51
59
  | `unwrapDoublyNestedLists` | Unwrap `<ul>`/`<ol>` that wrap a single `<li>` containing a same-type list |
52
60
  | `mergeFragmentedLists` | Merge consecutive sibling `<ul>` / `<ol>` lists with matching attributes |
53
61
  | `paragraphizePlainText` | Wrap plain text in `<p>` tags |
62
+ | `stripOversizedBase64Sources` | Drop base64 `src`/`srcset`/`poster` payloads larger than 50 KB before parsing |
54
63
  | `linkifyUrls` | Wrap bare URLs in `<a>` tags |
55
64
  | `trimPreWhitespace` | Remove common leading indentation from `<pre>` |
56
65
  | `highlightCode` | Syntax-highlight `<code>` blocks with highlight.js |
57
66
  | `stripEmptyTags` | Remove empty `<p>`, `<div>`, `<span>` and other tags |
58
67
  | `stripComments` | Remove HTML `<!-- comments -->` |
59
68
  | `unwrapCdataComments` | Strip malformed `<!--[CDATA[ … ]]-->` wrappers before parsing so the wrapped article reaches the DOM as real HTML |
69
+ | `stripControlChars` | Strip rendering-hostile control characters (NUL, BEL, ESC, DEL, C1 range) before parsing. Preserves tab / LF / CR |
60
70
 
61
71
  ## Options
62
72
 
63
73
  ```typescript
64
74
  import { fixLazyImages, resolveRelativeUrls, transformContent } from 'feedsweep'
75
+ import { parseHtml } from 'feedsweep/linkedom'
65
76
 
66
77
  const result = transformContent(html, {
78
+ // Required: function that turns an HTML string into a `Document`. See "DOM library".
79
+ parseHtmlFn: parseHtml,
67
80
  // Base URL for resolving relative URLs.
68
81
  baseUrl: 'https://example.com/post/1',
69
82
  // Feed item enclosures (audio/video).
@@ -79,4 +92,40 @@ const result = transformContent(html, {
79
92
  })
80
93
  ```
81
94
 
82
- The `stringTransforms`, `domTransforms`, and `finalStringTransforms` options each fully replace the corresponding default phase when provided. Every transform is also exported individually from `feedsweep`, so you can compose any pipeline — list them explicitly to build from scratch, or spread `defaultDomTransforms` (etc.) from `feedsweep/defaults` to extend or filter the defaults.
95
+ The `stringTransforms` and `domTransforms` options each fully replace the corresponding default phase when provided. Every transform is also exported individually from `feedsweep`, so you can compose any pipeline — list them explicitly to build from scratch, or spread `defaultDomTransforms` (etc.) from `feedsweep/defaults` to extend or filter the defaults.
96
+
97
+ ## DOM library
98
+
99
+ Feedsweep is parser-agnostic. You provide `parseHtmlFn` — a function that turns an HTML string into a `Document`. Use any DOM library that produces a standards-compliant `Document`.
100
+
101
+ ```typescript
102
+ // linkedom (recommended default)
103
+ import { transformContent } from 'feedsweep'
104
+ import { parseHtml } from 'feedsweep/linkedom'
105
+
106
+ await transformContent(html, { parseHtmlFn: parseHtml, baseUrl })
107
+
108
+ // jsdom
109
+ import { transformContent } from 'feedsweep'
110
+ import { JSDOM } from 'jsdom'
111
+
112
+ await transformContent(html, {
113
+ parseHtmlFn: (raw) => new JSDOM(`<!doctype html><body>${raw}</body>`).window.document,
114
+ baseUrl,
115
+ })
116
+
117
+ // happy-dom
118
+ import { transformContent } from 'feedsweep'
119
+ import { Window } from 'happy-dom'
120
+
121
+ await transformContent(html, {
122
+ parseHtmlFn: (raw) => {
123
+ const window = new Window()
124
+ window.document.body.innerHTML = raw
125
+ return window.document
126
+ },
127
+ baseUrl,
128
+ })
129
+ ```
130
+
131
+ The bundled `feedsweep/linkedom` parser bakes in two workarounds for linkedom-specific spec violations (attribute case-folding and SVG XML mode). jsdom and happy-dom do not need them.
@@ -0,0 +1,6 @@
1
+ import { BookmarkResolver } from "../types.js";
2
+
3
+ //#region src/bookmarks/ghost.d.ts
4
+ declare const ghostBookmarkResolver: BookmarkResolver;
5
+ //#endregion
6
+ export { ghostBookmarkResolver };
@@ -0,0 +1,21 @@
1
+ //#region src/bookmarks/ghost.ts
2
+ const ghostBookmarkResolver = {
3
+ selector: ".kg-bookmark-card",
4
+ extract: (element) => {
5
+ const url = element.querySelector("a.kg-bookmark-container")?.getAttribute("href") ?? void 0;
6
+ const title = element.querySelector(".kg-bookmark-title")?.textContent?.trim();
7
+ if (!url || !title) return;
8
+ return {
9
+ provider: "ghost",
10
+ url,
11
+ title,
12
+ description: element.querySelector(".kg-bookmark-description")?.textContent?.trim(),
13
+ author: element.querySelector(".kg-bookmark-author")?.textContent?.trim(),
14
+ publisher: element.querySelector(".kg-bookmark-publisher")?.textContent?.trim(),
15
+ icon: element.querySelector("img.kg-bookmark-icon")?.getAttribute("src") ?? void 0,
16
+ thumbnail: element.querySelector(".kg-bookmark-thumbnail img")?.getAttribute("src") ?? void 0
17
+ };
18
+ }
19
+ };
20
+ //#endregion
21
+ export { ghostBookmarkResolver };
@@ -0,0 +1,6 @@
1
+ import { BookmarkResolver } from "../types.js";
2
+
3
+ //#region src/bookmarks/substack.d.ts
4
+ declare const substackBookmarkResolver: BookmarkResolver;
5
+ //#endregion
6
+ export { substackBookmarkResolver };
@@ -0,0 +1,26 @@
1
+ //#region src/bookmarks/substack.ts
2
+ const parsePublicationAttrs = (raw) => {
3
+ if (!raw) return;
4
+ try {
5
+ return JSON.parse(raw);
6
+ } catch {}
7
+ };
8
+ const substackBookmarkResolver = {
9
+ selector: ".embedded-publication-wrap",
10
+ extract: (element) => {
11
+ const attrs = parsePublicationAttrs(element.getAttribute("data-attrs"));
12
+ const url = attrs?.base_url;
13
+ const title = attrs?.name?.trim();
14
+ if (!url || !title) return;
15
+ return {
16
+ provider: "substack",
17
+ url,
18
+ title,
19
+ description: attrs.hero_text?.trim(),
20
+ author: attrs.author_name?.trim(),
21
+ icon: attrs.logo_url
22
+ };
23
+ }
24
+ };
25
+ //#endregion
26
+ export { substackBookmarkResolver };
package/dist/common.d.ts CHANGED
@@ -1,15 +1,13 @@
1
- import { EmbedResolverResult, MaybePromise } from "./types.js";
1
+ import { BookmarkResolverResult, EmbedResolverResult, MaybePromise } from "./types.js";
2
2
 
3
3
  //#region src/common.d.ts
4
- declare const stripOversizedBase64Sources: (html: string, maxSize: number) => string;
5
- declare const expandSvgSelfClose: (html: string) => string;
6
- declare const parseFragment: (html: string) => Document;
7
- declare const transformHtml: (html: string, transform: (document: Document) => MaybePromise<void>) => Promise<string>;
8
- declare const applyDomTransforms: (html: string, transforms: Array<(document: Document) => MaybePromise<void>>) => Promise<string>;
4
+ declare const isSafeThumbnailUrl: (url: string) => boolean;
5
+ declare const applyDomTransforms: (document: Document, transforms: Array<(document: Document) => MaybePromise<void>>) => Promise<string>;
9
6
  declare const applyStringTransforms: (html: string, transforms: Array<(html: string) => MaybePromise<string>>) => Promise<string>;
10
- declare const applyEmbedMetadata: (element: HTMLElement, metadata: Partial<EmbedResolverResult>, options?: {
11
- setIfMissing?: boolean;
12
- }) => void;
7
+ declare const createPlaceholder: <Type extends object>(document: Document, type: string, fields: Type) => HTMLElement;
8
+ declare const normalizeEmbedFields: (metadata: Partial<EmbedResolverResult>) => Record<string, string | undefined>;
9
+ declare const updateEmbedPlaceholder: (element: HTMLElement, metadata: Partial<EmbedResolverResult>) => void;
13
10
  declare const createEmbedPlaceholder: (document: Document, src: string, metadata?: Partial<EmbedResolverResult>) => HTMLElement;
11
+ declare const createBookmarkPlaceholder: (document: Document, result: BookmarkResolverResult) => HTMLElement;
14
12
  //#endregion
15
- export { applyDomTransforms, applyEmbedMetadata, applyStringTransforms, createEmbedPlaceholder, expandSvgSelfClose, parseFragment, stripOversizedBase64Sources, transformHtml };
13
+ export { applyDomTransforms, applyStringTransforms, createBookmarkPlaceholder, createEmbedPlaceholder, createPlaceholder, isSafeThumbnailUrl, normalizeEmbedFields, updateEmbedPlaceholder };
package/dist/common.js CHANGED
@@ -1,6 +1,5 @@
1
1
  import { coerceNumber } from "./utils.js";
2
- import { resolveUrl } from "feedcanon";
3
- import { parseHTML } from "linkedom";
2
+ import { resolveUrl, upgradeProtocol } from "feedcanon";
4
3
  //#region src/common.ts
5
4
  const Node = {
6
5
  ELEMENT_NODE: 1,
@@ -12,58 +11,11 @@ const NodeFilter = {
12
11
  SHOW_TEXT: 4,
13
12
  SHOW_COMMENT: 128
14
13
  };
15
- const base64SrcRegex = /((?:src|srcset|poster)=["'])data:[^"']*;base64,[^"']*(["'])/g;
16
14
  const safeThumbnailDataUrlRegex = /^data:image\/(png|jpe?g|gif|webp|avif);/i;
17
15
  const isSafeThumbnailUrl = (url) => {
18
16
  return resolveUrl(url) !== void 0 || safeThumbnailDataUrlRegex.test(url);
19
17
  };
20
- const stripOversizedBase64Sources = (html, maxSize) => {
21
- return html.replace(base64SrcRegex, (match, prefix, suffix) => {
22
- if (match.length < maxSize) return match;
23
- return `${prefix}${suffix}`;
24
- });
25
- };
26
- const normalizeAttributeCase = (document) => {
27
- for (const element of document.querySelectorAll("*")) {
28
- const original = Array.from(element.attributes).map((attribute) => ({
29
- name: attribute.name,
30
- value: attribute.value
31
- }));
32
- const final = /* @__PURE__ */ new Map();
33
- let needsRewrite = false;
34
- for (const { name, value } of original) {
35
- const lower = name.toLowerCase();
36
- if (lower !== name) needsRewrite = true;
37
- if (final.has(lower)) {
38
- needsRewrite = true;
39
- continue;
40
- }
41
- final.set(lower, value);
42
- }
43
- if (!needsRewrite) continue;
44
- for (const { name } of original) element.removeAttribute(name);
45
- for (const [name, value] of final) element.setAttribute(name, value);
46
- }
47
- };
48
- const svgRegionRegex = /<svg\b[^>]*>[\s\S]*?<\/svg>/gi;
49
- const svgSelfCloseRegex = /<([a-z][a-z0-9-]*)((?:\s[^>]*)?)\s*\/>/gi;
50
- const expandSvgSelfClose = (html) => {
51
- return html.replace(svgRegionRegex, (svgBlock) => {
52
- return svgBlock.replace(svgSelfCloseRegex, "<$1$2></$1>");
53
- });
54
- };
55
- const parseFragment = (html) => {
56
- const { document } = parseHTML(`<!doctype html><html><head></head><body>${expandSvgSelfClose(html)}</body></html>`);
57
- normalizeAttributeCase(document);
58
- return document;
59
- };
60
- const transformHtml = async (html, transform) => {
61
- const document = parseFragment(html);
62
- await transform(document);
63
- return document.body.innerHTML;
64
- };
65
- const applyDomTransforms = async (html, transforms) => {
66
- const document = parseFragment(stripOversizedBase64Sources(html, 50 * 1024));
18
+ const applyDomTransforms = async (document, transforms) => {
67
19
  for (const transform of transforms) await transform(document);
68
20
  return document.body.innerHTML;
69
21
  };
@@ -152,36 +104,61 @@ const getDimensions = (element) => {
152
104
  height: height ?? fromStyle(styleHeightRegex)
153
105
  };
154
106
  };
155
- const applyEmbedMetadata = (element, metadata, options) => {
156
- const setIfMissing = options?.setIfMissing ?? false;
157
- const set = (name, value) => {
158
- if (setIfMissing && element.hasAttribute(name)) return;
159
- element.setAttribute(name, value);
107
+ const createPlaceholder = (document, type, fields) => {
108
+ const element = document.createElement("div");
109
+ for (const [key, value] of Object.entries(fields)) if (value) element.setAttribute(`data-${type}-${key}`, value);
110
+ return element;
111
+ };
112
+ const normalizeEmbedFields = (metadata) => {
113
+ return {
114
+ src: metadata.src ? upgradeProtocol(metadata.src) : void 0,
115
+ provider: metadata.provider,
116
+ id: metadata.id,
117
+ url: metadata.url ? upgradeProtocol(metadata.url) : void 0,
118
+ thumbnail: metadata.thumbnail && isSafeThumbnailUrl(metadata.thumbnail) ? metadata.thumbnail : void 0,
119
+ width: metadata.width ? String(metadata.width) : void 0,
120
+ height: metadata.height ? String(metadata.height) : void 0,
121
+ title: metadata.title,
122
+ description: metadata.description,
123
+ author: metadata.author,
124
+ avatar: metadata.avatar && isSafeThumbnailUrl(metadata.avatar) ? metadata.avatar : void 0,
125
+ duration: metadata.duration ? String(metadata.duration) : void 0
160
126
  };
161
- if (metadata.provider) set("data-embed-provider", metadata.provider);
162
- if (metadata.id) set("data-embed-id", metadata.id);
163
- if (metadata.src) set("data-embed-src", metadata.src);
164
- if (metadata.url) set("data-embed-url", metadata.url);
165
- if (metadata.thumbnail && isSafeThumbnailUrl(metadata.thumbnail)) set("data-embed-thumbnail", metadata.thumbnail);
166
- if (metadata.width) set("data-embed-width", String(metadata.width));
167
- if (metadata.height) set("data-embed-height", String(metadata.height));
168
- if (metadata.title) set("data-embed-title", metadata.title);
169
- if (metadata.description) set("data-embed-description", metadata.description);
170
- if (metadata.author) set("data-embed-author", metadata.author);
171
- if (metadata.avatar && isSafeThumbnailUrl(metadata.avatar)) set("data-embed-avatar", metadata.avatar);
172
- if (metadata.duration) set("data-embed-duration", String(metadata.duration));
127
+ };
128
+ const updateEmbedPlaceholder = (element, metadata) => {
129
+ for (const [key, value] of Object.entries(normalizeEmbedFields(metadata))) {
130
+ const name = `data-embed-${key}`;
131
+ if (value && !element.hasAttribute(name)) element.setAttribute(name, value);
132
+ }
173
133
  };
174
134
  const createEmbedPlaceholder = (document, src, metadata) => {
175
- const element = document.createElement("div");
176
- element.setAttribute("data-embed", "iframe");
177
- element.setAttribute("data-embed-src", metadata?.src ?? src);
178
- if (metadata) applyEmbedMetadata(element, metadata);
179
- const fallbackUrl = metadata?.url ?? metadata?.src ?? src;
135
+ const element = createPlaceholder(document, "embed", normalizeEmbedFields({
136
+ ...metadata,
137
+ src: metadata?.src ?? src
138
+ }));
139
+ const fallbackUrl = upgradeProtocol(metadata?.url ?? metadata?.src ?? src);
180
140
  const link = document.createElement("a");
181
141
  link.setAttribute("href", fallbackUrl);
182
142
  link.textContent = fallbackUrl;
183
143
  element.appendChild(link);
184
144
  return element;
185
145
  };
146
+ const createBookmarkPlaceholder = (document, result) => {
147
+ const { provider, title, url, icon, thumbnail, ...rest } = result;
148
+ const safeUrl = upgradeProtocol(url);
149
+ const element = createPlaceholder(document, "bookmark", {
150
+ provider,
151
+ ...rest,
152
+ url: safeUrl,
153
+ title,
154
+ icon: icon && isSafeThumbnailUrl(icon) ? upgradeProtocol(icon) : void 0,
155
+ thumbnail: thumbnail && isSafeThumbnailUrl(thumbnail) ? upgradeProtocol(thumbnail) : void 0
156
+ });
157
+ const link = document.createElement("a");
158
+ link.setAttribute("href", safeUrl);
159
+ link.textContent = title;
160
+ element.appendChild(link);
161
+ return element;
162
+ };
186
163
  //#endregion
187
- export { Node, NodeFilter, applyDomTransforms, applyEmbedMetadata, applyStringTransforms, createEmbedPlaceholder, expandSvgSelfClose, getDimensions, hasAncestorWithTagName, isBlockElement, isBr, isSkippable, isWhitespaceText, normalizeAttributeCase, parseFragment, stripOversizedBase64Sources, transformHtml };
164
+ export { Node, NodeFilter, applyDomTransforms, applyStringTransforms, createBookmarkPlaceholder, createEmbedPlaceholder, createPlaceholder, getDimensions, hasAncestorWithTagName, isBlockElement, isBr, isSafeThumbnailUrl, isSkippable, isWhitespaceText, normalizeEmbedFields, updateEmbedPlaceholder };
@@ -1,15 +1,18 @@
1
- import { DomTransform, EmbedResolver, ResolveUrlFn, StringTransform, UrlUnwrapper } from "./types.js";
1
+ import { BookmarkResolver, DomTransform, EmbedResolver, ResolveUrlFn, StringTransform, UrlUnwrapper } from "./types.js";
2
2
 
3
3
  //#region src/defaults.d.ts
4
4
  declare const defaultStringTransforms: Array<StringTransform>;
5
5
  declare const defaultDomTransforms: Array<DomTransform>;
6
- declare const defaultFinalStringTransforms: Array<StringTransform>;
7
6
  declare const defaultEmbedResolvers: Array<EmbedResolver>;
7
+ declare const defaultBookmarkResolvers: Array<BookmarkResolver>;
8
8
  declare const defaultResolveUrlFn: ResolveUrlFn;
9
9
  declare const defaultLazySrcAttributes: string[];
10
10
  declare const defaultLazySrcsetAttributes: string[];
11
11
  declare const defaultTrackingHosts: string[];
12
12
  declare const defaultTrackingPathSegments: string[];
13
+ declare const defaultEmojiImageHosts: string[];
14
+ declare const defaultPreservedPreClasses: string[];
15
+ declare const defaultInertSelectors: string[];
13
16
  declare const defaultUrlUnwrappers: Array<UrlUnwrapper>;
14
17
  //#endregion
15
- export { defaultDomTransforms, defaultEmbedResolvers, defaultFinalStringTransforms, defaultLazySrcAttributes, defaultLazySrcsetAttributes, defaultResolveUrlFn, defaultStringTransforms, defaultTrackingHosts, defaultTrackingPathSegments, defaultUrlUnwrappers };
18
+ export { defaultBookmarkResolvers, defaultDomTransforms, defaultEmbedResolvers, defaultEmojiImageHosts, defaultInertSelectors, defaultLazySrcAttributes, defaultLazySrcsetAttributes, defaultPreservedPreClasses, defaultResolveUrlFn, defaultStringTransforms, defaultTrackingHosts, defaultTrackingPathSegments, defaultUrlUnwrappers };
package/dist/defaults.js CHANGED
@@ -1,6 +1,10 @@
1
+ import { ghostBookmarkResolver } from "./bookmarks/ghost.js";
2
+ import { substackBookmarkResolver } from "./bookmarks/substack.js";
1
3
  import { youtubeEmbedResolver } from "./embeds/youtube.js";
4
+ import { convertBookmarkCards } from "./transforms/dom/convertBookmarkCards.js";
2
5
  import { convertBreaksToParagraphs } from "./transforms/dom/convertBreaksToParagraphs.js";
3
6
  import { decodeDoubleEncodedTags } from "./transforms/dom/decodeDoubleEncodedTags.js";
7
+ import { demoteHeadings } from "./transforms/dom/demoteHeadings.js";
4
8
  import { fixLazyImages } from "./transforms/dom/fixLazyImages.js";
5
9
  import { highlightCode } from "./transforms/dom/highlightCode.js";
6
10
  import { injectEnclosures } from "./transforms/dom/injectEnclosures.js";
@@ -16,14 +20,18 @@ import { stripComments } from "./transforms/dom/stripComments.js";
16
20
  import { stripDeadAnchors } from "./transforms/dom/stripDeadAnchors.js";
17
21
  import { stripDuplicateTitleHeading } from "./transforms/dom/stripDuplicateTitleHeading.js";
18
22
  import { stripEmptyTags } from "./transforms/dom/stripEmptyTags.js";
23
+ import { stripInertElements } from "./transforms/dom/stripInertElements.js";
19
24
  import { stripInterBlockBreaks } from "./transforms/dom/stripInterBlockBreaks.js";
20
25
  import { stripParagraphBoundaryBreaks } from "./transforms/dom/stripParagraphBoundaryBreaks.js";
21
26
  import { stripTrackingParams } from "./transforms/dom/stripTrackingParams.js";
22
27
  import { trimPreWhitespace } from "./transforms/dom/trimPreWhitespace.js";
23
28
  import { unwrapDoublyNestedLists } from "./transforms/dom/unwrapDoublyNestedLists.js";
29
+ import { unwrapEmojiImages } from "./transforms/dom/unwrapEmojiImages.js";
24
30
  import { unwrapRedirectUrls } from "./transforms/dom/unwrapRedirectUrls.js";
25
31
  import { unwrapWrappers } from "./transforms/dom/unwrapWrappers.js";
26
32
  import { paragraphizePlainText } from "./transforms/string/paragraphizePlainText.js";
33
+ import { stripControlChars } from "./transforms/string/stripControlChars.js";
34
+ import { stripOversizedBase64Sources } from "./transforms/string/stripOversizedBase64Sources.js";
27
35
  import { unwrapCdataComments } from "./transforms/string/unwrapCdataComments.js";
28
36
  import { unwrapBing } from "./unwraps/bing.js";
29
37
  import { unwrapFacebookShim } from "./unwraps/facebook.js";
@@ -39,18 +47,27 @@ import { unwrapYahooSearch } from "./unwraps/yahooSearch.js";
39
47
  import { unwrapYouTube } from "./unwraps/youtube.js";
40
48
  import { resolveUrl } from "feedcanon";
41
49
  //#region src/defaults.ts
42
- const defaultStringTransforms = [unwrapCdataComments, paragraphizePlainText];
50
+ const defaultStringTransforms = [
51
+ stripControlChars,
52
+ stripOversizedBase64Sources,
53
+ unwrapCdataComments,
54
+ paragraphizePlainText
55
+ ];
43
56
  const defaultDomTransforms = [
44
57
  decodeDoubleEncodedTags,
45
58
  stripComments,
46
59
  unwrapDoublyNestedLists,
47
60
  stripDuplicateTitleHeading,
61
+ demoteHeadings,
48
62
  fixLazyImages,
63
+ stripInertElements,
49
64
  resolveRelativeUrls,
50
65
  unwrapRedirectUrls,
51
66
  stripDeadAnchors,
52
67
  stripTrackingParams,
68
+ convertBookmarkCards,
53
69
  removeTrackingPixels,
70
+ unwrapEmojiImages,
54
71
  convertBreaksToParagraphs,
55
72
  stripInterBlockBreaks,
56
73
  stripParagraphBoundaryBreaks,
@@ -66,8 +83,8 @@ const defaultDomTransforms = [
66
83
  unwrapWrappers,
67
84
  stripEmptyTags
68
85
  ];
69
- const defaultFinalStringTransforms = [];
70
86
  const defaultEmbedResolvers = [youtubeEmbedResolver];
87
+ const defaultBookmarkResolvers = [ghostBookmarkResolver, substackBookmarkResolver];
71
88
  const defaultResolveUrlFn = (url, baseUrl) => resolveUrl(url, baseUrl);
72
89
  const defaultLazySrcAttributes = [
73
90
  "data-src",
@@ -138,6 +155,40 @@ const defaultTrackingPathSegments = [
138
155
  "count",
139
156
  "impression"
140
157
  ];
158
+ const defaultEmojiImageHosts = [
159
+ "s.w.org/images/core/emoji/",
160
+ "s0.wp.com/wp-content/mu-plugins/wpcom-smileys/",
161
+ "fbcdn.net/images/emoji.php/",
162
+ "abs.twimg.com/emoji/",
163
+ "githubassets.com/images/icons/emoji/"
164
+ ];
165
+ const defaultPreservedPreClasses = ["wp-block-verse", "wp-block-preformatted"];
166
+ const defaultInertSelectors = [
167
+ ".image-link-expand",
168
+ "[data-component-name=\"SubscribeWidget\"]",
169
+ ".subscription-widget-wrap-editor",
170
+ "drupal-render-placeholder",
171
+ ".adsbygoogle",
172
+ ".embedded-publication-wrap",
173
+ ".yarpp-related",
174
+ ".sharethis-inline-share-buttons",
175
+ ".sharedaddy",
176
+ ".wp-block-jetpack-subscriptions",
177
+ ".wp-block-post-author",
178
+ ".kg-signup-card",
179
+ ".mc4wp-form",
180
+ ".formkit-form",
181
+ ".mcnPreviewText",
182
+ ".saboxplugin-wrap",
183
+ ".addtoany_share_save_container",
184
+ "iframe[src*=\"embeds.beehiiv.com\"]",
185
+ ".jp-relatedposts",
186
+ ".adthrive-ad",
187
+ ".jetpack_subscription_widget",
188
+ ".crp_related",
189
+ "form[action*=\"buttondown.email\"]",
190
+ ".sqs-block-newsletter"
191
+ ];
141
192
  const defaultUrlUnwrappers = [
142
193
  unwrapBing,
143
194
  unwrapGoogle,
@@ -153,4 +204,4 @@ const defaultUrlUnwrappers = [
153
204
  unwrapRedditOut
154
205
  ];
155
206
  //#endregion
156
- export { defaultDomTransforms, defaultEmbedResolvers, defaultFinalStringTransforms, defaultLazySrcAttributes, defaultLazySrcsetAttributes, defaultResolveUrlFn, defaultStringTransforms, defaultTrackingHosts, defaultTrackingPathSegments, defaultUrlUnwrappers };
207
+ export { defaultBookmarkResolvers, defaultDomTransforms, defaultEmbedResolvers, defaultEmojiImageHosts, defaultInertSelectors, defaultLazySrcAttributes, defaultLazySrcsetAttributes, defaultPreservedPreClasses, defaultResolveUrlFn, defaultStringTransforms, defaultTrackingHosts, defaultTrackingPathSegments, defaultUrlUnwrappers };
package/dist/index.d.ts CHANGED
@@ -1,9 +1,13 @@
1
- import { AssetProxyFn, AssetType, DomTransform, EmbedResolver, EmbedResolverResult, Enclosure, EnrichEmbedFn, MaybePromise, ResolveUrlFn, StringTransform, TransformContentOptions, TransformContext } from "./types.js";
1
+ import { AssetProxyFn, AssetType, BookmarkResolver, BookmarkResolverResult, DomTransform, EmbedResolver, EmbedResolverResult, Enclosure, EnrichEmbedFn, MaybePromise, ParseHtmlFn, ResolveUrlFn, StringTransform, TransformContentOptions, TransformContext } from "./types.js";
2
2
  import { defaultResolveUrlFn } from "./defaults.js";
3
- import { applyDomTransforms, applyEmbedMetadata, applyStringTransforms, createEmbedPlaceholder, expandSvgSelfClose, parseFragment, stripOversizedBase64Sources, transformHtml } from "./common.js";
3
+ import { ghostBookmarkResolver } from "./bookmarks/ghost.js";
4
+ import { substackBookmarkResolver } from "./bookmarks/substack.js";
5
+ import { applyDomTransforms, applyStringTransforms, createBookmarkPlaceholder, createEmbedPlaceholder, createPlaceholder, isSafeThumbnailUrl, normalizeEmbedFields, updateEmbedPlaceholder } from "./common.js";
4
6
  import { composeThumbnailUrl, extractVideoId, youtubeEmbedResolver, youtubeResolveEmbed } from "./embeds/youtube.js";
7
+ import { convertBookmarkCards } from "./transforms/dom/convertBookmarkCards.js";
5
8
  import { convertBreaksToParagraphs } from "./transforms/dom/convertBreaksToParagraphs.js";
6
9
  import { decodeDoubleEncodedTags } from "./transforms/dom/decodeDoubleEncodedTags.js";
10
+ import { demoteHeadings } from "./transforms/dom/demoteHeadings.js";
7
11
  import { enrichEmbedPlaceholders } from "./transforms/dom/enrichEmbedPlaceholders.js";
8
12
  import { fixLazyImages } from "./transforms/dom/fixLazyImages.js";
9
13
  import { detectLanguage, highlightCode } from "./transforms/dom/highlightCode.js";
@@ -20,14 +24,18 @@ import { stripComments } from "./transforms/dom/stripComments.js";
20
24
  import { stripDeadAnchors } from "./transforms/dom/stripDeadAnchors.js";
21
25
  import { stripDuplicateTitleHeading } from "./transforms/dom/stripDuplicateTitleHeading.js";
22
26
  import { stripEmptyTags } from "./transforms/dom/stripEmptyTags.js";
27
+ import { stripInertElements } from "./transforms/dom/stripInertElements.js";
23
28
  import { stripInterBlockBreaks } from "./transforms/dom/stripInterBlockBreaks.js";
24
29
  import { stripParagraphBoundaryBreaks } from "./transforms/dom/stripParagraphBoundaryBreaks.js";
25
30
  import { stripTrackingParams } from "./transforms/dom/stripTrackingParams.js";
26
31
  import { trimPreWhitespace } from "./transforms/dom/trimPreWhitespace.js";
27
32
  import { unwrapDoublyNestedLists } from "./transforms/dom/unwrapDoublyNestedLists.js";
33
+ import { unwrapEmojiImages } from "./transforms/dom/unwrapEmojiImages.js";
28
34
  import { extractRedirectTarget, unwrapRedirectUrls } from "./transforms/dom/unwrapRedirectUrls.js";
29
35
  import { unwrapWrappers } from "./transforms/dom/unwrapWrappers.js";
30
36
  import { paragraphizePlainText } from "./transforms/string/paragraphizePlainText.js";
37
+ import { stripControlChars } from "./transforms/string/stripControlChars.js";
38
+ import { stripOversizedBase64Sources } from "./transforms/string/stripOversizedBase64Sources.js";
31
39
  import { unwrapCdataComments } from "./transforms/string/unwrapCdataComments.js";
32
40
  import { unwrapAceml } from "./unwraps/aceml.js";
33
41
  import { unwrapAdjust } from "./unwraps/adjust.js";
@@ -106,6 +114,6 @@ import { unwrapZhihu } from "./unwraps/zhihu.js";
106
114
  import { ParamExtractorConfig, chooseBaseUrl, coerceNumber, createParamExtractor } from "./utils.js";
107
115
 
108
116
  //#region src/index.d.ts
109
- declare const transformContent: (html: string, options?: TransformContentOptions) => Promise<string>;
117
+ declare const transformContent: (html: string, options: TransformContentOptions) => Promise<string>;
110
118
  //#endregion
111
- export { type AssetProxyFn, type AssetType, type DomTransform, type EmbedResolver, type EmbedResolverResult, type Enclosure, type EnrichEmbedFn, type MaybePromise, type ParamExtractorConfig, type ResolveUrlFn, type StringTransform, type TransformContentOptions, type TransformContext, applyDomTransforms, applyEmbedMetadata, applyStringTransforms, chooseBaseUrl, coerceNumber, composeThumbnailUrl, convertBreaksToParagraphs, createEmbedPlaceholder, createParamExtractor, decodeDoubleEncodedTags, defaultResolveUrlFn, detectLanguage, enrichEmbedPlaceholders, expandSvgSelfClose, extractRedirectTarget, extractVideoId, fixLazyImages, highlightCode, injectEnclosures, linkifyUrls, mergeConsecutiveOneLinerPres, mergeFragmentedLists, paragraphizePlainText, parseFragment, proxyAssetUrls, removeTrackingPixels, replaceEmbedsWithPlaceholders, replacePreLineBreaks, resolveRelativeUrls, stripComments, stripDeadAnchors, stripDuplicateTitleHeading, stripEmptyTags, stripInterBlockBreaks, stripOversizedBase64Sources, stripParagraphBoundaryBreaks, stripTrackingParams, transformContent, transformHtml, trimPreWhitespace, unwrapAceml, unwrapAdjust, unwrapAmazonAffiliate, unwrapAmpCache, unwrapAwin, unwrapBing, unwrapCdataComments, unwrapCjNetwork, unwrapDigidip, unwrapDisqus, unwrapDouban, unwrapDoublyNestedLists, unwrapDuckduckgo, unwrapEbayRover, unwrapEffiliation, unwrapEmbedly, unwrapFacebookShim, unwrapFeedsportal, unwrapFirebaseDynamicLinks, unwrapFlipboard, unwrapGateSc, unwrapGeoriot, unwrapGitee, unwrapGoogle, unwrapGoogleAmpViewer, unwrapGoogleNews, unwrapGoogleNewsModern, unwrapGoogleScholar, unwrapGoogleTranslate, unwrapHashnode, unwrapIcptrack, unwrapIdealoPartner, unwrapInstagramShim, unwrapJianshuGo, unwrapJuejin, unwrapLeverAnalytics, unwrapLinksynergy, unwrapMailchimp, unwrapMailpanion, unwrapMailpgn, unwrapMailtrack, unwrapMedium, unwrapMimecast, unwrapMozillaOutgoing, unwrapNarrativ, unwrapNicoMs, unwrapOutlookSafelinks, unwrapPartnerAds, unwrapPocket, unwrapPostmark, unwrapProofpointV1, unwrapProofpointV2, unwrapProofpointV3, unwrapPxf, unwrapRecruitics, unwrapRedditOut, unwrapRedirectUrls, unwrapRedirectingat, unwrapSegmentfault, unwrapShareasale, unwrapSjv, unwrapSkimlinks, unwrapSlack, unwrapSmartredirect, unwrapSspai, unwrapSteamLinkfilter, unwrapTelegramIv, unwrapTradedoubler, unwrapTumblr, unwrapValuecommerce, unwrapViglink, unwrapVkAway, unwrapWebArchive, unwrapWrappers, unwrapYahooSearch, unwrapYandexTurbo, unwrapYouTube, unwrapZhihu, youtubeEmbedResolver, youtubeResolveEmbed };
119
+ export { type AssetProxyFn, type AssetType, type BookmarkResolver, type BookmarkResolverResult, type DomTransform, type EmbedResolver, type EmbedResolverResult, type Enclosure, type EnrichEmbedFn, type MaybePromise, type ParamExtractorConfig, type ParseHtmlFn, type ResolveUrlFn, type StringTransform, type TransformContentOptions, type TransformContext, applyDomTransforms, applyStringTransforms, chooseBaseUrl, coerceNumber, composeThumbnailUrl, convertBookmarkCards, convertBreaksToParagraphs, createBookmarkPlaceholder, createEmbedPlaceholder, createParamExtractor, createPlaceholder, decodeDoubleEncodedTags, defaultResolveUrlFn, demoteHeadings, detectLanguage, enrichEmbedPlaceholders, extractRedirectTarget, extractVideoId, fixLazyImages, ghostBookmarkResolver, highlightCode, injectEnclosures, isSafeThumbnailUrl, linkifyUrls, mergeConsecutiveOneLinerPres, mergeFragmentedLists, normalizeEmbedFields, paragraphizePlainText, proxyAssetUrls, removeTrackingPixels, replaceEmbedsWithPlaceholders, replacePreLineBreaks, resolveRelativeUrls, stripComments, stripControlChars, stripDeadAnchors, stripDuplicateTitleHeading, stripEmptyTags, stripInertElements, stripInterBlockBreaks, stripOversizedBase64Sources, stripParagraphBoundaryBreaks, stripTrackingParams, substackBookmarkResolver, transformContent, trimPreWhitespace, unwrapAceml, unwrapAdjust, unwrapAmazonAffiliate, unwrapAmpCache, unwrapAwin, unwrapBing, unwrapCdataComments, unwrapCjNetwork, unwrapDigidip, unwrapDisqus, unwrapDouban, unwrapDoublyNestedLists, unwrapDuckduckgo, unwrapEbayRover, unwrapEffiliation, unwrapEmbedly, unwrapEmojiImages, unwrapFacebookShim, unwrapFeedsportal, unwrapFirebaseDynamicLinks, unwrapFlipboard, unwrapGateSc, unwrapGeoriot, unwrapGitee, unwrapGoogle, unwrapGoogleAmpViewer, unwrapGoogleNews, unwrapGoogleNewsModern, unwrapGoogleScholar, unwrapGoogleTranslate, unwrapHashnode, unwrapIcptrack, unwrapIdealoPartner, unwrapInstagramShim, unwrapJianshuGo, unwrapJuejin, unwrapLeverAnalytics, unwrapLinksynergy, unwrapMailchimp, unwrapMailpanion, unwrapMailpgn, unwrapMailtrack, unwrapMedium, unwrapMimecast, unwrapMozillaOutgoing, unwrapNarrativ, unwrapNicoMs, unwrapOutlookSafelinks, unwrapPartnerAds, unwrapPocket, unwrapPostmark, unwrapProofpointV1, unwrapProofpointV2, unwrapProofpointV3, unwrapPxf, unwrapRecruitics, unwrapRedditOut, unwrapRedirectUrls, unwrapRedirectingat, unwrapSegmentfault, unwrapShareasale, unwrapSjv, unwrapSkimlinks, unwrapSlack, unwrapSmartredirect, unwrapSspai, unwrapSteamLinkfilter, unwrapTelegramIv, unwrapTradedoubler, unwrapTumblr, unwrapValuecommerce, unwrapViglink, unwrapVkAway, unwrapWebArchive, unwrapWrappers, unwrapYahooSearch, unwrapYandexTurbo, unwrapYouTube, unwrapZhihu, updateEmbedPlaceholder, youtubeEmbedResolver, youtubeResolveEmbed };
package/dist/index.js CHANGED
@@ -1,8 +1,12 @@
1
1
  import { chooseBaseUrl, coerceNumber, createParamExtractor } from "./utils.js";
2
- import { applyDomTransforms, applyEmbedMetadata, applyStringTransforms, createEmbedPlaceholder, expandSvgSelfClose, parseFragment, stripOversizedBase64Sources, transformHtml } from "./common.js";
2
+ import { applyDomTransforms, applyStringTransforms, createBookmarkPlaceholder, createEmbedPlaceholder, createPlaceholder, isSafeThumbnailUrl, normalizeEmbedFields, updateEmbedPlaceholder } from "./common.js";
3
+ import { ghostBookmarkResolver } from "./bookmarks/ghost.js";
4
+ import { substackBookmarkResolver } from "./bookmarks/substack.js";
3
5
  import { composeThumbnailUrl, extractVideoId, youtubeEmbedResolver, youtubeResolveEmbed } from "./embeds/youtube.js";
6
+ import { convertBookmarkCards } from "./transforms/dom/convertBookmarkCards.js";
4
7
  import { convertBreaksToParagraphs } from "./transforms/dom/convertBreaksToParagraphs.js";
5
8
  import { decodeDoubleEncodedTags } from "./transforms/dom/decodeDoubleEncodedTags.js";
9
+ import { demoteHeadings } from "./transforms/dom/demoteHeadings.js";
6
10
  import { fixLazyImages } from "./transforms/dom/fixLazyImages.js";
7
11
  import { detectLanguage, highlightCode } from "./transforms/dom/highlightCode.js";
8
12
  import { injectEnclosures } from "./transforms/dom/injectEnclosures.js";
@@ -18,14 +22,18 @@ import { stripComments } from "./transforms/dom/stripComments.js";
18
22
  import { stripDeadAnchors } from "./transforms/dom/stripDeadAnchors.js";
19
23
  import { stripDuplicateTitleHeading } from "./transforms/dom/stripDuplicateTitleHeading.js";
20
24
  import { stripEmptyTags } from "./transforms/dom/stripEmptyTags.js";
25
+ import { stripInertElements } from "./transforms/dom/stripInertElements.js";
21
26
  import { stripInterBlockBreaks } from "./transforms/dom/stripInterBlockBreaks.js";
22
27
  import { stripParagraphBoundaryBreaks } from "./transforms/dom/stripParagraphBoundaryBreaks.js";
23
28
  import { stripTrackingParams } from "./transforms/dom/stripTrackingParams.js";
24
29
  import { trimPreWhitespace } from "./transforms/dom/trimPreWhitespace.js";
25
30
  import { unwrapDoublyNestedLists } from "./transforms/dom/unwrapDoublyNestedLists.js";
31
+ import { unwrapEmojiImages } from "./transforms/dom/unwrapEmojiImages.js";
26
32
  import { extractRedirectTarget, unwrapRedirectUrls } from "./transforms/dom/unwrapRedirectUrls.js";
27
33
  import { unwrapWrappers } from "./transforms/dom/unwrapWrappers.js";
28
34
  import { paragraphizePlainText } from "./transforms/string/paragraphizePlainText.js";
35
+ import { stripControlChars } from "./transforms/string/stripControlChars.js";
36
+ import { stripOversizedBase64Sources } from "./transforms/string/stripOversizedBase64Sources.js";
29
37
  import { unwrapCdataComments } from "./transforms/string/unwrapCdataComments.js";
30
38
  import { unwrapBing } from "./unwraps/bing.js";
31
39
  import { unwrapFacebookShim } from "./unwraps/facebook.js";
@@ -39,7 +47,7 @@ import { unwrapRedditOut } from "./unwraps/redditOut.js";
39
47
  import { unwrapVkAway } from "./unwraps/vkAway.js";
40
48
  import { unwrapYahooSearch } from "./unwraps/yahooSearch.js";
41
49
  import { unwrapYouTube } from "./unwraps/youtube.js";
42
- import { defaultDomTransforms, defaultEmbedResolvers, defaultFinalStringTransforms, defaultLazySrcAttributes, defaultLazySrcsetAttributes, defaultResolveUrlFn, defaultStringTransforms, defaultTrackingHosts, defaultTrackingPathSegments, defaultUrlUnwrappers } from "./defaults.js";
50
+ import { defaultBookmarkResolvers, defaultDomTransforms, defaultEmbedResolvers, defaultEmojiImageHosts, defaultInertSelectors, defaultLazySrcAttributes, defaultLazySrcsetAttributes, defaultPreservedPreClasses, defaultResolveUrlFn, defaultStringTransforms, defaultTrackingHosts, defaultTrackingPathSegments, defaultUrlUnwrappers } from "./defaults.js";
43
51
  import { enrichEmbedPlaceholders } from "./transforms/dom/enrichEmbedPlaceholders.js";
44
52
  import { unwrapAceml } from "./unwraps/aceml.js";
45
53
  import { unwrapAdjust } from "./unwraps/adjust.js";
@@ -104,15 +112,19 @@ import { unwrapWebArchive } from "./unwraps/webArchive.js";
104
112
  import { unwrapYandexTurbo } from "./unwraps/yandexTurbo.js";
105
113
  import { unwrapZhihu } from "./unwraps/zhihu.js";
106
114
  //#region src/index.ts
107
- const transformContent = async (html, options = {}) => {
115
+ const transformContent = async (html, options) => {
108
116
  const context = {
109
117
  baseUrl: options.baseUrl,
110
118
  enclosures: options.enclosures,
111
119
  embedResolvers: options.embedResolvers ?? defaultEmbedResolvers,
120
+ bookmarkResolvers: options.bookmarkResolvers ?? defaultBookmarkResolvers,
112
121
  lazySrcAttributes: options.lazySrcAttributes ?? defaultLazySrcAttributes,
113
122
  lazySrcsetAttributes: options.lazySrcsetAttributes ?? defaultLazySrcsetAttributes,
114
123
  trackingHosts: options.trackingHosts ?? defaultTrackingHosts,
115
124
  trackingPathSegments: options.trackingPathSegments ?? defaultTrackingPathSegments,
125
+ emojiImageHosts: options.emojiImageHosts ?? defaultEmojiImageHosts,
126
+ inertSelectors: options.inertSelectors ?? defaultInertSelectors,
127
+ preservedPreClasses: options.preservedPreClasses ?? defaultPreservedPreClasses,
116
128
  urlUnwrappers: options.urlUnwrappers ?? defaultUrlUnwrappers,
117
129
  resolveUrlFn: options.resolveUrlFn ?? defaultResolveUrlFn,
118
130
  assetProxyFn: options.assetProxyFn,
@@ -121,8 +133,8 @@ const transformContent = async (html, options = {}) => {
121
133
  };
122
134
  const stringFns = options.stringTransforms ?? defaultStringTransforms;
123
135
  const domFns = options.domTransforms ?? defaultDomTransforms;
124
- const finalFns = options.finalStringTransforms ?? defaultFinalStringTransforms;
125
- return await applyStringTransforms(await applyDomTransforms(await applyStringTransforms(html, stringFns.map((transform) => transform(context))), domFns.map((transform) => transform(context))), finalFns.map((transform) => transform(context)));
136
+ const afterString = await applyStringTransforms(html, stringFns.map((transform) => transform(context)));
137
+ return await applyDomTransforms(await options.parseHtmlFn(afterString), domFns.map((transform) => transform(context)));
126
138
  };
127
139
  //#endregion
128
- export { applyDomTransforms, applyEmbedMetadata, applyStringTransforms, chooseBaseUrl, coerceNumber, composeThumbnailUrl, convertBreaksToParagraphs, createEmbedPlaceholder, createParamExtractor, decodeDoubleEncodedTags, defaultResolveUrlFn, detectLanguage, enrichEmbedPlaceholders, expandSvgSelfClose, extractRedirectTarget, extractVideoId, fixLazyImages, highlightCode, injectEnclosures, linkifyUrls, mergeConsecutiveOneLinerPres, mergeFragmentedLists, paragraphizePlainText, parseFragment, proxyAssetUrls, removeTrackingPixels, replaceEmbedsWithPlaceholders, replacePreLineBreaks, resolveRelativeUrls, stripComments, stripDeadAnchors, stripDuplicateTitleHeading, stripEmptyTags, stripInterBlockBreaks, stripOversizedBase64Sources, stripParagraphBoundaryBreaks, stripTrackingParams, transformContent, transformHtml, trimPreWhitespace, unwrapAceml, unwrapAdjust, unwrapAmazonAffiliate, unwrapAmpCache, unwrapAwin, unwrapBing, unwrapCdataComments, unwrapCjNetwork, unwrapDigidip, unwrapDisqus, unwrapDouban, unwrapDoublyNestedLists, unwrapDuckduckgo, unwrapEbayRover, unwrapEffiliation, unwrapEmbedly, unwrapFacebookShim, unwrapFeedsportal, unwrapFirebaseDynamicLinks, unwrapFlipboard, unwrapGateSc, unwrapGeoriot, unwrapGitee, unwrapGoogle, unwrapGoogleAmpViewer, unwrapGoogleNews, unwrapGoogleNewsModern, unwrapGoogleScholar, unwrapGoogleTranslate, unwrapHashnode, unwrapIcptrack, unwrapIdealoPartner, unwrapInstagramShim, unwrapJianshuGo, unwrapJuejin, unwrapLeverAnalytics, unwrapLinksynergy, unwrapMailchimp, unwrapMailpanion, unwrapMailpgn, unwrapMailtrack, unwrapMedium, unwrapMimecast, unwrapMozillaOutgoing, unwrapNarrativ, unwrapNicoMs, unwrapOutlookSafelinks, unwrapPartnerAds, unwrapPocket, unwrapPostmark, unwrapProofpointV1, unwrapProofpointV2, unwrapProofpointV3, unwrapPxf, unwrapRecruitics, unwrapRedditOut, unwrapRedirectUrls, unwrapRedirectingat, unwrapSegmentfault, unwrapShareasale, unwrapSjv, unwrapSkimlinks, unwrapSlack, unwrapSmartredirect, unwrapSspai, unwrapSteamLinkfilter, unwrapTelegramIv, unwrapTradedoubler, unwrapTumblr, unwrapValuecommerce, unwrapViglink, unwrapVkAway, unwrapWebArchive, unwrapWrappers, unwrapYahooSearch, unwrapYandexTurbo, unwrapYouTube, unwrapZhihu, youtubeEmbedResolver, youtubeResolveEmbed };
140
+ export { applyDomTransforms, applyStringTransforms, chooseBaseUrl, coerceNumber, composeThumbnailUrl, convertBookmarkCards, convertBreaksToParagraphs, createBookmarkPlaceholder, createEmbedPlaceholder, createParamExtractor, createPlaceholder, decodeDoubleEncodedTags, defaultResolveUrlFn, demoteHeadings, detectLanguage, enrichEmbedPlaceholders, extractRedirectTarget, extractVideoId, fixLazyImages, ghostBookmarkResolver, highlightCode, injectEnclosures, isSafeThumbnailUrl, linkifyUrls, mergeConsecutiveOneLinerPres, mergeFragmentedLists, normalizeEmbedFields, paragraphizePlainText, proxyAssetUrls, removeTrackingPixels, replaceEmbedsWithPlaceholders, replacePreLineBreaks, resolveRelativeUrls, stripComments, stripControlChars, stripDeadAnchors, stripDuplicateTitleHeading, stripEmptyTags, stripInertElements, stripInterBlockBreaks, stripOversizedBase64Sources, stripParagraphBoundaryBreaks, stripTrackingParams, substackBookmarkResolver, transformContent, trimPreWhitespace, unwrapAceml, unwrapAdjust, unwrapAmazonAffiliate, unwrapAmpCache, unwrapAwin, unwrapBing, unwrapCdataComments, unwrapCjNetwork, unwrapDigidip, unwrapDisqus, unwrapDouban, unwrapDoublyNestedLists, unwrapDuckduckgo, unwrapEbayRover, unwrapEffiliation, unwrapEmbedly, unwrapEmojiImages, unwrapFacebookShim, unwrapFeedsportal, unwrapFirebaseDynamicLinks, unwrapFlipboard, unwrapGateSc, unwrapGeoriot, unwrapGitee, unwrapGoogle, unwrapGoogleAmpViewer, unwrapGoogleNews, unwrapGoogleNewsModern, unwrapGoogleScholar, unwrapGoogleTranslate, unwrapHashnode, unwrapIcptrack, unwrapIdealoPartner, unwrapInstagramShim, unwrapJianshuGo, unwrapJuejin, unwrapLeverAnalytics, unwrapLinksynergy, unwrapMailchimp, unwrapMailpanion, unwrapMailpgn, unwrapMailtrack, unwrapMedium, unwrapMimecast, unwrapMozillaOutgoing, unwrapNarrativ, unwrapNicoMs, unwrapOutlookSafelinks, unwrapPartnerAds, unwrapPocket, unwrapPostmark, unwrapProofpointV1, unwrapProofpointV2, unwrapProofpointV3, unwrapPxf, unwrapRecruitics, unwrapRedditOut, unwrapRedirectUrls, unwrapRedirectingat, unwrapSegmentfault, unwrapShareasale, unwrapSjv, unwrapSkimlinks, unwrapSlack, unwrapSmartredirect, unwrapSspai, unwrapSteamLinkfilter, unwrapTelegramIv, unwrapTradedoubler, unwrapTumblr, unwrapValuecommerce, unwrapViglink, unwrapVkAway, unwrapWebArchive, unwrapWrappers, unwrapYahooSearch, unwrapYandexTurbo, unwrapYouTube, unwrapZhihu, updateEmbedPlaceholder, youtubeEmbedResolver, youtubeResolveEmbed };
@@ -0,0 +1,4 @@
1
+ //#region src/parsers/linkedom.d.ts
2
+ declare const parseHtml: (html: string) => Document;
3
+ //#endregion
4
+ export { parseHtml };
@@ -0,0 +1,38 @@
1
+ import { parseHTML } from "linkedom";
2
+ //#region src/parsers/linkedom.ts
3
+ const normalizeAttributeCase = (document) => {
4
+ for (const element of document.querySelectorAll("*")) {
5
+ const original = Array.from(element.attributes).map((attribute) => ({
6
+ name: attribute.name,
7
+ value: attribute.value
8
+ }));
9
+ const final = /* @__PURE__ */ new Map();
10
+ let needsRewrite = false;
11
+ for (const { name, value } of original) {
12
+ const lower = name.toLowerCase();
13
+ if (lower !== name) needsRewrite = true;
14
+ if (final.has(lower)) {
15
+ needsRewrite = true;
16
+ continue;
17
+ }
18
+ final.set(lower, value);
19
+ }
20
+ if (!needsRewrite) continue;
21
+ for (const { name } of original) element.removeAttribute(name);
22
+ for (const [name, value] of final) element.setAttribute(name, value);
23
+ }
24
+ };
25
+ const svgRegionRegex = /<svg\b[^>]*>[\s\S]*?<\/svg>/gi;
26
+ const svgSelfCloseRegex = /<([a-z][a-z0-9-]*)((?:\s[^>]*)?)\s*\/>/gi;
27
+ const expandSvgSelfClose = (html) => {
28
+ return html.replace(svgRegionRegex, (svgBlock) => {
29
+ return svgBlock.replace(svgSelfCloseRegex, "<$1$2></$1>");
30
+ });
31
+ };
32
+ const parseHtml = (html) => {
33
+ const { document } = parseHTML(`<!doctype html><html><head></head><body>${expandSvgSelfClose(html)}</body></html>`);
34
+ normalizeAttributeCase(document);
35
+ return document;
36
+ };
37
+ //#endregion
38
+ export { parseHtml };
@@ -0,0 +1,6 @@
1
+ import { DomTransform } from "../../types.js";
2
+
3
+ //#region src/transforms/dom/convertBookmarkCards.d.ts
4
+ declare const convertBookmarkCards: DomTransform;
5
+ //#endregion
6
+ export { convertBookmarkCards };
@@ -0,0 +1,14 @@
1
+ import { createBookmarkPlaceholder } from "../../common.js";
2
+ //#region src/transforms/dom/convertBookmarkCards.ts
3
+ const convertBookmarkCards = (context) => {
4
+ const { bookmarkResolvers } = context;
5
+ return async (document) => {
6
+ for (const resolver of bookmarkResolvers) for (const element of document.querySelectorAll(resolver.selector)) {
7
+ const result = await resolver.extract(element);
8
+ if (!result) continue;
9
+ element.replaceWith(createBookmarkPlaceholder(document, result));
10
+ }
11
+ };
12
+ };
13
+ //#endregion
14
+ export { convertBookmarkCards };
@@ -0,0 +1,6 @@
1
+ import { DomTransform } from "../../types.js";
2
+
3
+ //#region src/transforms/dom/demoteHeadings.d.ts
4
+ declare const demoteHeadings: DomTransform;
5
+ //#endregion
6
+ export { demoteHeadings };
@@ -0,0 +1,20 @@
1
+ //#region src/transforms/dom/demoteHeadings.ts
2
+ const headingSelector = "h1, h2, h3, h4, h5";
3
+ const demoteHeadings = () => {
4
+ return (document) => {
5
+ if (!document.querySelector("h1")) return;
6
+ const headings = document.querySelectorAll(headingSelector);
7
+ for (const heading of headings) {
8
+ const nextTagName = `h${Number(heading.tagName.slice(1)) + 1}`;
9
+ const replacement = document.createElement(nextTagName);
10
+ for (const name of heading.getAttributeNames().reverse()) {
11
+ const value = heading.getAttribute(name);
12
+ if (value !== null) replacement.setAttribute(name, value);
13
+ }
14
+ while (heading.firstChild) replacement.appendChild(heading.firstChild);
15
+ heading.replaceWith(replacement);
16
+ }
17
+ };
18
+ };
19
+ //#endregion
20
+ export { demoteHeadings };
@@ -1,4 +1,4 @@
1
- import { applyEmbedMetadata } from "../../common.js";
1
+ import { updateEmbedPlaceholder } from "../../common.js";
2
2
  //#region src/transforms/dom/enrichEmbedPlaceholders.ts
3
3
  const enrichEmbedPlaceholders = (context) => {
4
4
  const enrichEmbedFn = context.enrichEmbedFn;
@@ -24,7 +24,7 @@ const enrichEmbedPlaceholders = (context) => {
24
24
  for (let i = 0; i < count; i++) {
25
25
  const embed = embeds[i];
26
26
  const data = enriched.get(`${embed.provider}:${embed.id}`);
27
- if (data) applyEmbedMetadata(placeholders[i], data, { setIfMissing: true });
27
+ if (data) updateEmbedPlaceholder(placeholders[i], data);
28
28
  }
29
29
  };
30
30
  };
@@ -1,4 +1,3 @@
1
- import { normalizeAttributeCase } from "../../common.js";
2
1
  //#region src/transforms/dom/fixLazyImages.ts
3
2
  const imgPattern = /<img\s/i;
4
3
  const urlShapeRegex = /[:/.]/;
@@ -45,7 +44,6 @@ const fixLazyImages = (context) => {
45
44
  }
46
45
  }
47
46
  const noscripts = document.querySelectorAll("noscript");
48
- let replacedNoscript = false;
49
47
  for (const noscript of noscripts) {
50
48
  const sibling = noscript.previousElementSibling;
51
49
  if (sibling?.localName !== "img") continue;
@@ -53,9 +51,7 @@ const fixLazyImages = (context) => {
53
51
  if (!imgPattern.test(inner)) continue;
54
52
  sibling.remove();
55
53
  noscript.outerHTML = inner;
56
- replacedNoscript = true;
57
54
  }
58
- if (replacedNoscript) normalizeAttributeCase(document);
59
55
  };
60
56
  };
61
57
  //#endregion
@@ -1,7 +1,15 @@
1
1
  //#region src/transforms/dom/mergeConsecutiveOneLinerPres.ts
2
2
  const trailingBrRegex = /<br\s*\/?>\s*$/i;
3
3
  const surroundingNewlinesRegex = /^\n+|\n+$/g;
4
- const mergeConsecutiveOneLinerPres = () => {
4
+ const classTokenSeparator = /\s+/;
5
+ const mergeConsecutiveOneLinerPres = ({ preservedPreClasses }) => {
6
+ const preservedSet = new Set(preservedPreClasses);
7
+ const isPreserved = (element) => {
8
+ const classAttribute = element.getAttribute("class");
9
+ if (!classAttribute) return false;
10
+ for (const token of classAttribute.split(classTokenSeparator)) if (preservedSet.has(token)) return true;
11
+ return false;
12
+ };
5
13
  return (document) => {
6
14
  const pres = document.querySelectorAll("pre");
7
15
  for (const pre of pres) {
@@ -20,6 +28,7 @@ const mergeConsecutiveOneLinerPres = () => {
20
28
  sibling = sibling.nextSibling;
21
29
  }
22
30
  if (run.length < 2) continue;
31
+ if (run.some(isPreserved)) continue;
23
32
  const isSingleLine = (element) => {
24
33
  return !element.innerHTML.replace(surroundingNewlinesRegex, "").includes("\n");
25
34
  };
@@ -1,5 +1,17 @@
1
1
  import { parseSrcset, stringifySrcset } from "srcset";
2
2
  //#region src/transforms/dom/proxyAssetUrls.ts
3
+ const proxyableSelectors = [
4
+ "img",
5
+ "video",
6
+ "audio",
7
+ "source",
8
+ "track",
9
+ "image",
10
+ "[data-embed-thumbnail]",
11
+ "[data-embed-avatar]",
12
+ "[data-bookmark-icon]",
13
+ "[data-bookmark-thumbnail]"
14
+ ];
3
15
  const sourceTypeFromParent = (element) => {
4
16
  const parent = element.parentElement?.localName;
5
17
  if (parent === "video") return "video";
@@ -30,7 +42,7 @@ const proxySrcset = (element, type, assetProxyFn) => {
30
42
  const proxyAssetUrls = ({ assetProxyFn }) => {
31
43
  if (!assetProxyFn) return () => {};
32
44
  return (document) => {
33
- const elements = document.querySelectorAll("img, video, audio, source, track, image, [data-embed-thumbnail], [data-embed-avatar]");
45
+ const elements = document.querySelectorAll(proxyableSelectors.join(", "));
34
46
  for (const element of elements) {
35
47
  switch (element.localName) {
36
48
  case "img":
@@ -57,6 +69,8 @@ const proxyAssetUrls = ({ assetProxyFn }) => {
57
69
  }
58
70
  if (element.hasAttribute("data-embed-thumbnail")) proxyAttribute(element, "data-embed-thumbnail", "image", assetProxyFn);
59
71
  if (element.hasAttribute("data-embed-avatar")) proxyAttribute(element, "data-embed-avatar", "image", assetProxyFn);
72
+ if (element.hasAttribute("data-bookmark-icon")) proxyAttribute(element, "data-bookmark-icon", "image", assetProxyFn);
73
+ if (element.hasAttribute("data-bookmark-thumbnail")) proxyAttribute(element, "data-bookmark-thumbnail", "image", assetProxyFn);
60
74
  }
61
75
  };
62
76
  };
@@ -0,0 +1,6 @@
1
+ import { DomTransform } from "../../types.js";
2
+
3
+ //#region src/transforms/dom/stripInertElements.d.ts
4
+ declare const stripInertElements: DomTransform;
5
+ //#endregion
6
+ export { stripInertElements };
@@ -0,0 +1,11 @@
1
+ //#region src/transforms/dom/stripInertElements.ts
2
+ const stripInertElements = ({ inertSelectors }) => {
3
+ const selector = inertSelectors.join(",");
4
+ return (document) => {
5
+ if (!selector) return;
6
+ const elements = document.querySelectorAll(selector);
7
+ for (const element of elements) element.remove();
8
+ };
9
+ };
10
+ //#endregion
11
+ export { stripInertElements };
@@ -0,0 +1,6 @@
1
+ import { DomTransform } from "../../types.js";
2
+
3
+ //#region src/transforms/dom/unwrapEmojiImages.d.ts
4
+ declare const unwrapEmojiImages: DomTransform;
5
+ //#endregion
6
+ export { unwrapEmojiImages };
@@ -0,0 +1,21 @@
1
+ //#region src/transforms/dom/unwrapEmojiImages.ts
2
+ const nonAsciiRegex = /[€-￿]/;
3
+ const asciiLetterRegex = /[a-zA-Z]/;
4
+ const isEmojiShapedAlt = (alt) => {
5
+ return nonAsciiRegex.test(alt) && !asciiLetterRegex.test(alt);
6
+ };
7
+ const unwrapEmojiImages = (context) => {
8
+ const selector = [
9
+ "img.wp-smiley[alt]",
10
+ "img.emoji[alt]",
11
+ ...context.emojiImageHosts.map((host) => `img[alt][src*="${host}"]`)
12
+ ].join(", ");
13
+ return (document) => {
14
+ for (const image of document.querySelectorAll(selector)) {
15
+ const alt = image.getAttribute("alt");
16
+ if (alt && isEmojiShapedAlt(alt)) image.replaceWith(document.createTextNode(alt));
17
+ }
18
+ };
19
+ };
20
+ //#endregion
21
+ export { unwrapEmojiImages };
@@ -7,9 +7,13 @@ const wrapperTags = new Set([
7
7
  "header",
8
8
  "footer"
9
9
  ]);
10
- const hasEmbedAttribute = (element) => {
10
+ const preservedPrefixes = ["data-embed", "data-bookmark"];
11
+ const hasPreservedAttribute = (element) => {
11
12
  const attributes = element.attributes;
12
- for (let i = 0, n = attributes.length; i < n; i++) if (attributes[i].name.startsWith("data-embed")) return true;
13
+ for (let i = 0, n = attributes.length; i < n; i++) {
14
+ const name = attributes[i].name;
15
+ for (const prefix of preservedPrefixes) if (name.startsWith(prefix)) return true;
16
+ }
13
17
  return false;
14
18
  };
15
19
  const unwrapWrappers = () => {
@@ -20,7 +24,7 @@ const unwrapWrappers = () => {
20
24
  if (!wrapperTags.has(element.localName)) continue;
21
25
  const parent = element.parentNode;
22
26
  if (!parent) continue;
23
- if (hasEmbedAttribute(element)) continue;
27
+ if (hasPreservedAttribute(element)) continue;
24
28
  while (element.firstChild) parent.insertBefore(element.firstChild, element);
25
29
  element.remove();
26
30
  }
@@ -0,0 +1,6 @@
1
+ import { StringTransform } from "../../types.js";
2
+
3
+ //#region src/transforms/string/stripControlChars.d.ts
4
+ declare const stripControlChars: StringTransform;
5
+ //#endregion
6
+ export { stripControlChars };
@@ -0,0 +1,21 @@
1
+ //#region src/transforms/string/stripControlChars.ts
2
+ const ranges = [
3
+ "\\x00-\\x08",
4
+ "\\x0B\\x0C",
5
+ "\\x0E-\\x1F",
6
+ "\\x7F-\\x9F",
7
+ "\\uFDD0-\\uFDEF",
8
+ "\\uFFFE\\uFFFF",
9
+ ...Array.from({ length: 16 }, (_, index) => {
10
+ const plane = (index + 1).toString(16).toUpperCase();
11
+ return `\\u{${plane}FFFE}\\u{${plane}FFFF}`;
12
+ })
13
+ ];
14
+ const controlCharRegex = new RegExp(`[${ranges.join("")}]`, "gu");
15
+ const stripControlChars = () => {
16
+ return (html) => {
17
+ return html.replace(controlCharRegex, "");
18
+ };
19
+ };
20
+ //#endregion
21
+ export { stripControlChars };
@@ -0,0 +1,6 @@
1
+ import { StringTransform } from "../../types.js";
2
+
3
+ //#region src/transforms/string/stripOversizedBase64Sources.d.ts
4
+ declare const stripOversizedBase64Sources: StringTransform;
5
+ //#endregion
6
+ export { stripOversizedBase64Sources };
@@ -0,0 +1,13 @@
1
+ //#region src/transforms/string/stripOversizedBase64Sources.ts
2
+ const base64SrcRegex = /((?:src|srcset|poster)=["'])data:[^"']*;base64,[^"']*(["'])/g;
3
+ const maxBase64Size = 50 * 1024;
4
+ const stripOversizedBase64Sources = () => {
5
+ return (html) => {
6
+ return html.replace(base64SrcRegex, (match, prefix, suffix) => {
7
+ if (match.length < maxBase64Size) return match;
8
+ return `${prefix}${suffix}`;
9
+ });
10
+ };
11
+ };
12
+ //#endregion
13
+ export { stripOversizedBase64Sources };
package/dist/types.d.ts CHANGED
@@ -41,6 +41,20 @@ type EmbedResolver = {
41
41
  selector: string;
42
42
  extract: (element: Element) => MaybePromise<EmbedResolverResult | undefined>;
43
43
  };
44
+ type BookmarkResolverResult = {
45
+ provider: string;
46
+ url: string;
47
+ title: string;
48
+ description?: string;
49
+ author?: string;
50
+ publisher?: string;
51
+ icon?: string;
52
+ thumbnail?: string;
53
+ };
54
+ type BookmarkResolver = {
55
+ selector: string;
56
+ extract: (element: Element) => MaybePromise<BookmarkResolverResult | undefined>;
57
+ };
44
58
  type UrlUnwrapper = (url: URL) => string | undefined;
45
59
  type AssetType = 'image' | 'video' | 'audio';
46
60
  type AssetProxyFn = (url: string, type: AssetType) => string | undefined;
@@ -48,10 +62,14 @@ type TransformContext = {
48
62
  baseUrl?: string;
49
63
  enclosures?: Array<Enclosure>;
50
64
  embedResolvers: Array<EmbedResolver>;
65
+ bookmarkResolvers: Array<BookmarkResolver>;
51
66
  lazySrcAttributes: Array<string>;
52
67
  lazySrcsetAttributes: Array<string>;
53
68
  trackingHosts: Array<string>;
54
69
  trackingPathSegments: Array<string>;
70
+ emojiImageHosts: Array<string>;
71
+ inertSelectors: Array<string>;
72
+ preservedPreClasses: Array<string>;
55
73
  urlUnwrappers: Array<UrlUnwrapper>;
56
74
  resolveUrlFn: ResolveUrlFn;
57
75
  assetProxyFn?: AssetProxyFn;
@@ -60,14 +78,20 @@ type TransformContext = {
60
78
  };
61
79
  type DomTransform = (context: TransformContext) => (document: Document) => MaybePromise<void>;
62
80
  type StringTransform = (context: TransformContext) => (html: string) => MaybePromise<string>;
81
+ type ParseHtmlFn = (html: string) => MaybePromise<Document>;
63
82
  type TransformContentOptions = {
83
+ parseHtmlFn: ParseHtmlFn;
64
84
  baseUrl?: string;
65
85
  enclosures?: Array<Enclosure>;
66
86
  embedResolvers?: Array<EmbedResolver>;
87
+ bookmarkResolvers?: Array<BookmarkResolver>;
67
88
  lazySrcAttributes?: Array<string>;
68
89
  lazySrcsetAttributes?: Array<string>;
69
90
  trackingHosts?: Array<string>;
70
91
  trackingPathSegments?: Array<string>;
92
+ emojiImageHosts?: Array<string>;
93
+ inertSelectors?: Array<string>;
94
+ preservedPreClasses?: Array<string>;
71
95
  urlUnwrappers?: Array<UrlUnwrapper>;
72
96
  resolveUrlFn?: ResolveUrlFn;
73
97
  assetProxyFn?: AssetProxyFn;
@@ -75,7 +99,6 @@ type TransformContentOptions = {
75
99
  articleTitle?: string;
76
100
  stringTransforms?: Array<StringTransform>;
77
101
  domTransforms?: Array<DomTransform>;
78
- finalStringTransforms?: Array<StringTransform>;
79
102
  };
80
103
  //#endregion
81
- export { AssetProxyFn, AssetType, DomTransform, EmbedResolver, EmbedResolverResult, Enclosure, EnrichEmbedFn, MaybePromise, ResolveUrlFn, StringTransform, TransformContentOptions, TransformContext, UrlUnwrapper };
104
+ export { AssetProxyFn, AssetType, BookmarkResolver, BookmarkResolverResult, DomTransform, EmbedResolver, EmbedResolverResult, Enclosure, EnrichEmbedFn, MaybePromise, ParseHtmlFn, ResolveUrlFn, StringTransform, TransformContentOptions, TransformContext, UrlUnwrapper };
package/package.json CHANGED
@@ -29,6 +29,10 @@
29
29
  "./defaults": {
30
30
  "types": "./dist/defaults.d.ts",
31
31
  "default": "./dist/defaults.js"
32
+ },
33
+ "./linkedom": {
34
+ "types": "./dist/parsers/linkedom.d.ts",
35
+ "default": "./dist/parsers/linkedom.js"
32
36
  }
33
37
  },
34
38
  "files": [
@@ -36,23 +40,29 @@
36
40
  ],
37
41
  "scripts": {
38
42
  "prepare": "lefthook install",
39
- "build": "tsdown src/index.ts src/defaults.ts --format esm --dts --clean --unbundle --no-fixed-extension"
43
+ "build": "tsdown src/index.ts src/defaults.ts src/parsers/linkedom.ts --format esm --dts --clean --unbundle --no-fixed-extension"
40
44
  },
41
45
  "dependencies": {
42
46
  "@wordpress/autop": "^4.46.0",
43
47
  "highlight.js": "^11.11.1",
44
- "linkedom": "^0.18.12",
45
48
  "linkifyjs": "^4.3.2",
46
49
  "srcset": "^5.0.3"
47
50
  },
48
51
  "peerDependencies": {
49
- "feedcanon": "^2.0.0-next.3",
50
- "feedscout": "^2.0.0-next.2"
52
+ "feedcanon": "^2.0.0-next.4",
53
+ "feedscout": "^2.0.0-next.2",
54
+ "linkedom": "^0.18.12"
55
+ },
56
+ "peerDependenciesMeta": {
57
+ "linkedom": {
58
+ "optional": true
59
+ }
51
60
  },
52
61
  "devDependencies": {
53
62
  "@types/bun": "^1.3.13",
54
63
  "kvalita": "^1.13.0",
64
+ "linkedom": "^0.18.12",
55
65
  "tsdown": "^0.22.0"
56
66
  },
57
- "version": "1.1.0"
67
+ "version": "2.0.0"
58
68
  }