feedsweep 2.0.1 → 2.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +4 -1
- package/dist/common.js +14 -8
- package/dist/defaults.js +9 -3
- package/dist/index.d.ts +5 -2
- package/dist/index.js +5 -2
- package/dist/transforms/dom/convertBreaksToParagraphs.js +3 -4
- package/dist/transforms/dom/decodeDoubleEncodedTags.js +5 -5
- package/dist/transforms/dom/linkifyUrls.js +4 -4
- package/dist/transforms/dom/markTimestamps.d.ts +7 -0
- package/dist/transforms/dom/markTimestamps.js +64 -0
- package/dist/transforms/dom/mergeConsecutiveOneLinerPres.js +3 -2
- package/dist/transforms/dom/mergeFragmentedLists.js +9 -12
- package/dist/transforms/dom/stripBoundaryBreaks.d.ts +6 -0
- package/dist/transforms/dom/{stripParagraphBoundaryBreaks.js → stripBoundaryBreaks.js} +25 -9
- package/dist/transforms/dom/stripEmptyTags.js +3 -4
- package/dist/transforms/dom/unwrapDoublyNestedLists.js +3 -3
- package/dist/transforms/dom/unwrapHeadingBold.d.ts +6 -0
- package/dist/transforms/dom/unwrapHeadingBold.js +28 -0
- package/dist/transforms/dom/unwrapWrappers.js +5 -1
- package/dist/transforms/dom/wrapTablesForScroll.d.ts +6 -0
- package/dist/transforms/dom/wrapTablesForScroll.js +20 -0
- package/package.json +1 -1
- package/dist/transforms/dom/stripParagraphBoundaryBreaks.d.ts +0 -6
package/README.md
CHANGED
|
@@ -39,9 +39,10 @@ Inventory of every transform exported from the package. Most are enabled by defa
|
|
|
39
39
|
| `mergeConsecutiveOneLinerPres` | Merge consecutive single-line `<pre>` tags |
|
|
40
40
|
| `replacePreLineBreaks` | Replace `<br>` with `\n` inside `<pre>` |
|
|
41
41
|
| `stripInterBlockBreaks` | Remove `<br>` tags between block elements |
|
|
42
|
-
| `
|
|
42
|
+
| `stripBoundaryBreaks` | Remove `<br>` tags adjacent to block-element boundaries (paragraphs, headings, divs, list items, blockquotes, …) |
|
|
43
43
|
| `stripDuplicateTitleHeading` | Remove first `<h1>`–`<h6>` matching article title |
|
|
44
44
|
| `demoteHeadings` | Shift every heading down by one level (`<h1>`→`<h2>`, …, `<h5>`→`<h6>`) when the body contains an `<h1>`, so it sits below the reader's own page title |
|
|
45
|
+
| `unwrapHeadingBold` | Unwrap `<b>`/`<strong>` that wraps the entire content of a heading (redundant — headings are already bold) |
|
|
45
46
|
| `unwrapRedirectUrls` | Remove Google/Bing/Facebook/etc. redirect wrappers |
|
|
46
47
|
| `stripDeadAnchors` | Unwrap `<a>` with empty, `#`, or `javascript:` href |
|
|
47
48
|
| `stripInertElements` | Remove platform chrome and dead placeholders — subscribe widgets, share buttons, related-posts widgets, ad slots (AdSense / AdThrive), author bio blocks, email preheaders, Substack image controls, and Drupal `<drupal-render-placeholder>` tags. Pass `inertSelectors` to extend or replace |
|
|
@@ -57,10 +58,12 @@ Inventory of every transform exported from the package. Most are enabled by defa
|
|
|
57
58
|
| `resolveRelativeUrls` | Convert relative URLs to absolute using base URL |
|
|
58
59
|
| `unwrapWrappers` | Remove outer `<div>`, `<article>`, `<section>` wrappers |
|
|
59
60
|
| `unwrapDoublyNestedLists` | Unwrap `<ul>`/`<ol>` that wrap a single `<li>` containing a same-type list |
|
|
61
|
+
| `wrapTablesForScroll` | Wrap each top-level `<table>` in a `<div data-table>` as a horizontal-scroll container |
|
|
60
62
|
| `mergeFragmentedLists` | Merge consecutive sibling `<ul>` / `<ol>` lists with matching attributes |
|
|
61
63
|
| `paragraphizePlainText` | Wrap plain text in `<p>` tags |
|
|
62
64
|
| `stripOversizedBase64Sources` | Drop base64 `src`/`srcset`/`poster` payloads larger than 50 KB before parsing |
|
|
63
65
|
| `linkifyUrls` | Wrap bare URLs in `<a>` tags |
|
|
66
|
+
| `markTimestamps` | Wrap line-leading timestamps (`MM:SS` / `HH:MM:SS`) in `<span data-timestamp="seconds">` so a player can be seeked to that point |
|
|
64
67
|
| `trimPreWhitespace` | Remove common leading indentation from `<pre>` |
|
|
65
68
|
| `highlightCode` | Syntax-highlight `<code>` blocks with highlight.js |
|
|
66
69
|
| `stripEmptyTags` | Remove empty `<p>`, `<div>`, `<span>` and other tags |
|
package/dist/common.js
CHANGED
|
@@ -58,25 +58,31 @@ const blockElements = new Set([
|
|
|
58
58
|
"table",
|
|
59
59
|
"ul"
|
|
60
60
|
]);
|
|
61
|
-
const
|
|
62
|
-
return node
|
|
61
|
+
const isElement = (node) => {
|
|
62
|
+
return node?.nodeType === Node.ELEMENT_NODE;
|
|
63
63
|
};
|
|
64
|
-
const
|
|
65
|
-
return node
|
|
64
|
+
const isText = (node) => {
|
|
65
|
+
return node?.nodeType === Node.TEXT_NODE;
|
|
66
66
|
};
|
|
67
67
|
const isComment = (node) => {
|
|
68
|
-
return node
|
|
68
|
+
return node?.nodeType === Node.COMMENT_NODE;
|
|
69
|
+
};
|
|
70
|
+
const isWhitespaceText = (node) => {
|
|
71
|
+
return isText(node) && !node.textContent?.trim();
|
|
72
|
+
};
|
|
73
|
+
const isBr = (node) => {
|
|
74
|
+
return isElement(node) && node.localName === "br";
|
|
69
75
|
};
|
|
70
76
|
const isSkippable = (node) => {
|
|
71
77
|
return isWhitespaceText(node) || isBr(node) || isComment(node);
|
|
72
78
|
};
|
|
73
79
|
const isBlockElement = (node) => {
|
|
74
|
-
return node
|
|
80
|
+
return isElement(node) && blockElements.has(node.localName);
|
|
75
81
|
};
|
|
76
82
|
const hasAncestorWithTagName = (node, tagSet, stopAt) => {
|
|
77
83
|
let ancestor = node.parentNode;
|
|
78
84
|
while (ancestor !== null && ancestor !== stopAt) {
|
|
79
|
-
if (ancestor
|
|
85
|
+
if (isElement(ancestor) && tagSet.has(ancestor.localName)) return true;
|
|
80
86
|
ancestor = ancestor.parentNode;
|
|
81
87
|
}
|
|
82
88
|
return false;
|
|
@@ -161,4 +167,4 @@ const createBookmarkPlaceholder = (document, result) => {
|
|
|
161
167
|
return element;
|
|
162
168
|
};
|
|
163
169
|
//#endregion
|
|
164
|
-
export {
|
|
170
|
+
export { NodeFilter, applyDomTransforms, applyStringTransforms, createBookmarkPlaceholder, createEmbedPlaceholder, createPlaceholder, getDimensions, hasAncestorWithTagName, isBlockElement, isBr, isComment, isElement, isSafeThumbnailUrl, isSkippable, isText, isWhitespaceText, normalizeEmbedFields, updateEmbedPlaceholder };
|
package/dist/defaults.js
CHANGED
|
@@ -9,6 +9,7 @@ import { fixLazyImages } from "./transforms/dom/fixLazyImages.js";
|
|
|
9
9
|
import { highlightCode } from "./transforms/dom/highlightCode.js";
|
|
10
10
|
import { injectEnclosures } from "./transforms/dom/injectEnclosures.js";
|
|
11
11
|
import { linkifyUrls } from "./transforms/dom/linkifyUrls.js";
|
|
12
|
+
import { markTimestamps } from "./transforms/dom/markTimestamps.js";
|
|
12
13
|
import { mergeConsecutiveOneLinerPres } from "./transforms/dom/mergeConsecutiveOneLinerPres.js";
|
|
13
14
|
import { mergeFragmentedLists } from "./transforms/dom/mergeFragmentedLists.js";
|
|
14
15
|
import { proxyAssetUrls } from "./transforms/dom/proxyAssetUrls.js";
|
|
@@ -16,19 +17,21 @@ import { removeTrackingPixels } from "./transforms/dom/removeTrackingPixels.js";
|
|
|
16
17
|
import { replaceEmbedsWithPlaceholders } from "./transforms/dom/replaceEmbedsWithPlaceholders.js";
|
|
17
18
|
import { replacePreLineBreaks } from "./transforms/dom/replacePreLineBreaks.js";
|
|
18
19
|
import { resolveRelativeUrls } from "./transforms/dom/resolveRelativeUrls.js";
|
|
20
|
+
import { stripBoundaryBreaks } from "./transforms/dom/stripBoundaryBreaks.js";
|
|
19
21
|
import { stripComments } from "./transforms/dom/stripComments.js";
|
|
20
22
|
import { stripDeadAnchors } from "./transforms/dom/stripDeadAnchors.js";
|
|
21
23
|
import { stripDuplicateTitleHeading } from "./transforms/dom/stripDuplicateTitleHeading.js";
|
|
22
24
|
import { stripEmptyTags } from "./transforms/dom/stripEmptyTags.js";
|
|
23
25
|
import { stripInertElements } from "./transforms/dom/stripInertElements.js";
|
|
24
26
|
import { stripInterBlockBreaks } from "./transforms/dom/stripInterBlockBreaks.js";
|
|
25
|
-
import { stripParagraphBoundaryBreaks } from "./transforms/dom/stripParagraphBoundaryBreaks.js";
|
|
26
27
|
import { stripTrackingParams } from "./transforms/dom/stripTrackingParams.js";
|
|
27
28
|
import { trimPreWhitespace } from "./transforms/dom/trimPreWhitespace.js";
|
|
28
29
|
import { unwrapDoublyNestedLists } from "./transforms/dom/unwrapDoublyNestedLists.js";
|
|
29
30
|
import { unwrapEmojiImages } from "./transforms/dom/unwrapEmojiImages.js";
|
|
31
|
+
import { unwrapHeadingBold } from "./transforms/dom/unwrapHeadingBold.js";
|
|
30
32
|
import { unwrapRedirectUrls } from "./transforms/dom/unwrapRedirectUrls.js";
|
|
31
33
|
import { unwrapWrappers } from "./transforms/dom/unwrapWrappers.js";
|
|
34
|
+
import { wrapTablesForScroll } from "./transforms/dom/wrapTablesForScroll.js";
|
|
32
35
|
import { paragraphizePlainText } from "./transforms/string/paragraphizePlainText.js";
|
|
33
36
|
import { stripControlChars } from "./transforms/string/stripControlChars.js";
|
|
34
37
|
import { stripOversizedBase64Sources } from "./transforms/string/stripOversizedBase64Sources.js";
|
|
@@ -59,6 +62,7 @@ const defaultDomTransforms = [
|
|
|
59
62
|
unwrapDoublyNestedLists,
|
|
60
63
|
stripDuplicateTitleHeading,
|
|
61
64
|
demoteHeadings,
|
|
65
|
+
unwrapHeadingBold,
|
|
62
66
|
fixLazyImages,
|
|
63
67
|
stripInertElements,
|
|
64
68
|
resolveRelativeUrls,
|
|
@@ -70,18 +74,20 @@ const defaultDomTransforms = [
|
|
|
70
74
|
unwrapEmojiImages,
|
|
71
75
|
convertBreaksToParagraphs,
|
|
72
76
|
stripInterBlockBreaks,
|
|
73
|
-
|
|
77
|
+
stripBoundaryBreaks,
|
|
74
78
|
mergeFragmentedLists,
|
|
75
79
|
highlightCode,
|
|
76
80
|
mergeConsecutiveOneLinerPres,
|
|
77
81
|
replacePreLineBreaks,
|
|
78
82
|
trimPreWhitespace,
|
|
79
83
|
linkifyUrls,
|
|
84
|
+
markTimestamps,
|
|
80
85
|
replaceEmbedsWithPlaceholders,
|
|
81
86
|
injectEnclosures,
|
|
82
87
|
proxyAssetUrls,
|
|
83
88
|
unwrapWrappers,
|
|
84
|
-
stripEmptyTags
|
|
89
|
+
stripEmptyTags,
|
|
90
|
+
wrapTablesForScroll
|
|
85
91
|
];
|
|
86
92
|
const defaultEmbedResolvers = [youtubeEmbedResolver];
|
|
87
93
|
const defaultBookmarkResolvers = [ghostBookmarkResolver, substackBookmarkResolver];
|
package/dist/index.d.ts
CHANGED
|
@@ -13,6 +13,7 @@ import { fixLazyImages } from "./transforms/dom/fixLazyImages.js";
|
|
|
13
13
|
import { detectLanguage, highlightCode } from "./transforms/dom/highlightCode.js";
|
|
14
14
|
import { injectEnclosures } from "./transforms/dom/injectEnclosures.js";
|
|
15
15
|
import { linkifyUrls } from "./transforms/dom/linkifyUrls.js";
|
|
16
|
+
import { markTimestamps, parseTimestampSeconds } from "./transforms/dom/markTimestamps.js";
|
|
16
17
|
import { mergeConsecutiveOneLinerPres } from "./transforms/dom/mergeConsecutiveOneLinerPres.js";
|
|
17
18
|
import { mergeFragmentedLists } from "./transforms/dom/mergeFragmentedLists.js";
|
|
18
19
|
import { proxyAssetUrls } from "./transforms/dom/proxyAssetUrls.js";
|
|
@@ -20,19 +21,21 @@ import { removeTrackingPixels } from "./transforms/dom/removeTrackingPixels.js";
|
|
|
20
21
|
import { replaceEmbedsWithPlaceholders } from "./transforms/dom/replaceEmbedsWithPlaceholders.js";
|
|
21
22
|
import { replacePreLineBreaks } from "./transforms/dom/replacePreLineBreaks.js";
|
|
22
23
|
import { resolveRelativeUrls } from "./transforms/dom/resolveRelativeUrls.js";
|
|
24
|
+
import { stripBoundaryBreaks } from "./transforms/dom/stripBoundaryBreaks.js";
|
|
23
25
|
import { stripComments } from "./transforms/dom/stripComments.js";
|
|
24
26
|
import { stripDeadAnchors } from "./transforms/dom/stripDeadAnchors.js";
|
|
25
27
|
import { stripDuplicateTitleHeading } from "./transforms/dom/stripDuplicateTitleHeading.js";
|
|
26
28
|
import { stripEmptyTags } from "./transforms/dom/stripEmptyTags.js";
|
|
27
29
|
import { stripInertElements } from "./transforms/dom/stripInertElements.js";
|
|
28
30
|
import { stripInterBlockBreaks } from "./transforms/dom/stripInterBlockBreaks.js";
|
|
29
|
-
import { stripParagraphBoundaryBreaks } from "./transforms/dom/stripParagraphBoundaryBreaks.js";
|
|
30
31
|
import { stripTrackingParams } from "./transforms/dom/stripTrackingParams.js";
|
|
31
32
|
import { trimPreWhitespace } from "./transforms/dom/trimPreWhitespace.js";
|
|
32
33
|
import { unwrapDoublyNestedLists } from "./transforms/dom/unwrapDoublyNestedLists.js";
|
|
33
34
|
import { unwrapEmojiImages } from "./transforms/dom/unwrapEmojiImages.js";
|
|
35
|
+
import { unwrapHeadingBold } from "./transforms/dom/unwrapHeadingBold.js";
|
|
34
36
|
import { extractRedirectTarget, unwrapRedirectUrls } from "./transforms/dom/unwrapRedirectUrls.js";
|
|
35
37
|
import { unwrapWrappers } from "./transforms/dom/unwrapWrappers.js";
|
|
38
|
+
import { wrapTablesForScroll } from "./transforms/dom/wrapTablesForScroll.js";
|
|
36
39
|
import { paragraphizePlainText } from "./transforms/string/paragraphizePlainText.js";
|
|
37
40
|
import { stripControlChars } from "./transforms/string/stripControlChars.js";
|
|
38
41
|
import { stripOversizedBase64Sources } from "./transforms/string/stripOversizedBase64Sources.js";
|
|
@@ -116,4 +119,4 @@ import { ParamExtractorConfig, chooseBaseUrl, coerceNumber, createParamExtractor
|
|
|
116
119
|
//#region src/index.d.ts
|
|
117
120
|
declare const transformContent: (html: string, options: TransformContentOptions) => Promise<string>;
|
|
118
121
|
//#endregion
|
|
119
|
-
export { type AssetProxyFn, type AssetType, type BookmarkResolver, type BookmarkResolverResult, type DomTransform, type EmbedResolver, type EmbedResolverResult, type Enclosure, type EnrichEmbedFn, type MaybePromise, type ParamExtractorConfig, type ParseHtmlFn, type ResolveUrlFn, type StringTransform, type TransformContentOptions, type TransformContext, applyDomTransforms, applyStringTransforms, chooseBaseUrl, coerceNumber, composeThumbnailUrl, convertBookmarkCards, convertBreaksToParagraphs, createBookmarkPlaceholder, createEmbedPlaceholder, createParamExtractor, createPlaceholder, decodeDoubleEncodedTags, defaultResolveUrlFn, demoteHeadings, detectLanguage, enrichEmbedPlaceholders, extractRedirectTarget, extractVideoId, fixLazyImages, ghostBookmarkResolver, highlightCode, injectEnclosures, isSafeThumbnailUrl, linkifyUrls, mergeConsecutiveOneLinerPres, mergeFragmentedLists, normalizeEmbedFields, paragraphizePlainText, proxyAssetUrls, removeTrackingPixels, replaceEmbedsWithPlaceholders, replacePreLineBreaks, resolveRelativeUrls, stripComments, stripControlChars, stripDeadAnchors, stripDuplicateTitleHeading, stripEmptyTags, stripInertElements, stripInterBlockBreaks, stripOversizedBase64Sources,
|
|
122
|
+
export { type AssetProxyFn, type AssetType, type BookmarkResolver, type BookmarkResolverResult, type DomTransform, type EmbedResolver, type EmbedResolverResult, type Enclosure, type EnrichEmbedFn, type MaybePromise, type ParamExtractorConfig, type ParseHtmlFn, type ResolveUrlFn, type StringTransform, type TransformContentOptions, type TransformContext, applyDomTransforms, applyStringTransforms, chooseBaseUrl, coerceNumber, composeThumbnailUrl, convertBookmarkCards, convertBreaksToParagraphs, createBookmarkPlaceholder, createEmbedPlaceholder, createParamExtractor, createPlaceholder, decodeDoubleEncodedTags, defaultResolveUrlFn, demoteHeadings, detectLanguage, enrichEmbedPlaceholders, extractRedirectTarget, extractVideoId, fixLazyImages, ghostBookmarkResolver, highlightCode, injectEnclosures, isSafeThumbnailUrl, linkifyUrls, markTimestamps, mergeConsecutiveOneLinerPres, mergeFragmentedLists, normalizeEmbedFields, paragraphizePlainText, parseTimestampSeconds, proxyAssetUrls, removeTrackingPixels, replaceEmbedsWithPlaceholders, replacePreLineBreaks, resolveRelativeUrls, stripBoundaryBreaks, stripComments, stripControlChars, stripDeadAnchors, stripDuplicateTitleHeading, stripEmptyTags, stripInertElements, stripInterBlockBreaks, stripOversizedBase64Sources, stripTrackingParams, substackBookmarkResolver, transformContent, trimPreWhitespace, unwrapAceml, unwrapAdjust, unwrapAmazonAffiliate, unwrapAmpCache, unwrapAwin, unwrapBing, unwrapCdataComments, unwrapCjNetwork, unwrapDigidip, unwrapDisqus, unwrapDouban, unwrapDoublyNestedLists, unwrapDuckduckgo, unwrapEbayRover, unwrapEffiliation, unwrapEmbedly, unwrapEmojiImages, unwrapFacebookShim, unwrapFeedsportal, unwrapFirebaseDynamicLinks, unwrapFlipboard, unwrapGateSc, unwrapGeoriot, unwrapGitee, unwrapGoogle, unwrapGoogleAmpViewer, unwrapGoogleNews, unwrapGoogleNewsModern, unwrapGoogleScholar, unwrapGoogleTranslate, unwrapHashnode, unwrapHeadingBold, unwrapIcptrack, unwrapIdealoPartner, unwrapInstagramShim, unwrapJianshuGo, unwrapJuejin, unwrapLeverAnalytics, unwrapLinksynergy, unwrapMailchimp, unwrapMailpanion, unwrapMailpgn, unwrapMailtrack, unwrapMedium, unwrapMimecast, unwrapMozillaOutgoing, unwrapNarrativ, unwrapNicoMs, unwrapOutlookSafelinks, unwrapPartnerAds, unwrapPocket, unwrapPostmark, unwrapProofpointV1, unwrapProofpointV2, unwrapProofpointV3, unwrapPxf, unwrapRecruitics, unwrapRedditOut, unwrapRedirectUrls, unwrapRedirectingat, unwrapSegmentfault, unwrapShareasale, unwrapSjv, unwrapSkimlinks, unwrapSlack, unwrapSmartredirect, unwrapSspai, unwrapSteamLinkfilter, unwrapTelegramIv, unwrapTradedoubler, unwrapTumblr, unwrapValuecommerce, unwrapViglink, unwrapVkAway, unwrapWebArchive, unwrapWrappers, unwrapYahooSearch, unwrapYandexTurbo, unwrapYouTube, unwrapZhihu, updateEmbedPlaceholder, wrapTablesForScroll, youtubeEmbedResolver, youtubeResolveEmbed };
|
package/dist/index.js
CHANGED
|
@@ -11,6 +11,7 @@ import { fixLazyImages } from "./transforms/dom/fixLazyImages.js";
|
|
|
11
11
|
import { detectLanguage, highlightCode } from "./transforms/dom/highlightCode.js";
|
|
12
12
|
import { injectEnclosures } from "./transforms/dom/injectEnclosures.js";
|
|
13
13
|
import { linkifyUrls } from "./transforms/dom/linkifyUrls.js";
|
|
14
|
+
import { markTimestamps, parseTimestampSeconds } from "./transforms/dom/markTimestamps.js";
|
|
14
15
|
import { mergeConsecutiveOneLinerPres } from "./transforms/dom/mergeConsecutiveOneLinerPres.js";
|
|
15
16
|
import { mergeFragmentedLists } from "./transforms/dom/mergeFragmentedLists.js";
|
|
16
17
|
import { proxyAssetUrls } from "./transforms/dom/proxyAssetUrls.js";
|
|
@@ -18,19 +19,21 @@ import { removeTrackingPixels } from "./transforms/dom/removeTrackingPixels.js";
|
|
|
18
19
|
import { replaceEmbedsWithPlaceholders } from "./transforms/dom/replaceEmbedsWithPlaceholders.js";
|
|
19
20
|
import { replacePreLineBreaks } from "./transforms/dom/replacePreLineBreaks.js";
|
|
20
21
|
import { resolveRelativeUrls } from "./transforms/dom/resolveRelativeUrls.js";
|
|
22
|
+
import { stripBoundaryBreaks } from "./transforms/dom/stripBoundaryBreaks.js";
|
|
21
23
|
import { stripComments } from "./transforms/dom/stripComments.js";
|
|
22
24
|
import { stripDeadAnchors } from "./transforms/dom/stripDeadAnchors.js";
|
|
23
25
|
import { stripDuplicateTitleHeading } from "./transforms/dom/stripDuplicateTitleHeading.js";
|
|
24
26
|
import { stripEmptyTags } from "./transforms/dom/stripEmptyTags.js";
|
|
25
27
|
import { stripInertElements } from "./transforms/dom/stripInertElements.js";
|
|
26
28
|
import { stripInterBlockBreaks } from "./transforms/dom/stripInterBlockBreaks.js";
|
|
27
|
-
import { stripParagraphBoundaryBreaks } from "./transforms/dom/stripParagraphBoundaryBreaks.js";
|
|
28
29
|
import { stripTrackingParams } from "./transforms/dom/stripTrackingParams.js";
|
|
29
30
|
import { trimPreWhitespace } from "./transforms/dom/trimPreWhitespace.js";
|
|
30
31
|
import { unwrapDoublyNestedLists } from "./transforms/dom/unwrapDoublyNestedLists.js";
|
|
31
32
|
import { unwrapEmojiImages } from "./transforms/dom/unwrapEmojiImages.js";
|
|
33
|
+
import { unwrapHeadingBold } from "./transforms/dom/unwrapHeadingBold.js";
|
|
32
34
|
import { extractRedirectTarget, unwrapRedirectUrls } from "./transforms/dom/unwrapRedirectUrls.js";
|
|
33
35
|
import { unwrapWrappers } from "./transforms/dom/unwrapWrappers.js";
|
|
36
|
+
import { wrapTablesForScroll } from "./transforms/dom/wrapTablesForScroll.js";
|
|
34
37
|
import { paragraphizePlainText } from "./transforms/string/paragraphizePlainText.js";
|
|
35
38
|
import { stripControlChars } from "./transforms/string/stripControlChars.js";
|
|
36
39
|
import { stripOversizedBase64Sources } from "./transforms/string/stripOversizedBase64Sources.js";
|
|
@@ -137,4 +140,4 @@ const transformContent = async (html, options) => {
|
|
|
137
140
|
return await applyDomTransforms(await options.parseHtmlFn(afterString), domFns.map((transform) => transform(context)));
|
|
138
141
|
};
|
|
139
142
|
//#endregion
|
|
140
|
-
export { applyDomTransforms, applyStringTransforms, chooseBaseUrl, coerceNumber, composeThumbnailUrl, convertBookmarkCards, convertBreaksToParagraphs, createBookmarkPlaceholder, createEmbedPlaceholder, createParamExtractor, createPlaceholder, decodeDoubleEncodedTags, defaultResolveUrlFn, demoteHeadings, detectLanguage, enrichEmbedPlaceholders, extractRedirectTarget, extractVideoId, fixLazyImages, ghostBookmarkResolver, highlightCode, injectEnclosures, isSafeThumbnailUrl, linkifyUrls, mergeConsecutiveOneLinerPres, mergeFragmentedLists, normalizeEmbedFields, paragraphizePlainText, proxyAssetUrls, removeTrackingPixels, replaceEmbedsWithPlaceholders, replacePreLineBreaks, resolveRelativeUrls, stripComments, stripControlChars, stripDeadAnchors, stripDuplicateTitleHeading, stripEmptyTags, stripInertElements, stripInterBlockBreaks, stripOversizedBase64Sources,
|
|
143
|
+
export { applyDomTransforms, applyStringTransforms, chooseBaseUrl, coerceNumber, composeThumbnailUrl, convertBookmarkCards, convertBreaksToParagraphs, createBookmarkPlaceholder, createEmbedPlaceholder, createParamExtractor, createPlaceholder, decodeDoubleEncodedTags, defaultResolveUrlFn, demoteHeadings, detectLanguage, enrichEmbedPlaceholders, extractRedirectTarget, extractVideoId, fixLazyImages, ghostBookmarkResolver, highlightCode, injectEnclosures, isSafeThumbnailUrl, linkifyUrls, markTimestamps, mergeConsecutiveOneLinerPres, mergeFragmentedLists, normalizeEmbedFields, paragraphizePlainText, parseTimestampSeconds, proxyAssetUrls, removeTrackingPixels, replaceEmbedsWithPlaceholders, replacePreLineBreaks, resolveRelativeUrls, stripBoundaryBreaks, stripComments, stripControlChars, stripDeadAnchors, stripDuplicateTitleHeading, stripEmptyTags, stripInertElements, stripInterBlockBreaks, stripOversizedBase64Sources, stripTrackingParams, substackBookmarkResolver, transformContent, trimPreWhitespace, unwrapAceml, unwrapAdjust, unwrapAmazonAffiliate, unwrapAmpCache, unwrapAwin, unwrapBing, unwrapCdataComments, unwrapCjNetwork, unwrapDigidip, unwrapDisqus, unwrapDouban, unwrapDoublyNestedLists, unwrapDuckduckgo, unwrapEbayRover, unwrapEffiliation, unwrapEmbedly, unwrapEmojiImages, unwrapFacebookShim, unwrapFeedsportal, unwrapFirebaseDynamicLinks, unwrapFlipboard, unwrapGateSc, unwrapGeoriot, unwrapGitee, unwrapGoogle, unwrapGoogleAmpViewer, unwrapGoogleNews, unwrapGoogleNewsModern, unwrapGoogleScholar, unwrapGoogleTranslate, unwrapHashnode, unwrapHeadingBold, unwrapIcptrack, unwrapIdealoPartner, unwrapInstagramShim, unwrapJianshuGo, unwrapJuejin, unwrapLeverAnalytics, unwrapLinksynergy, unwrapMailchimp, unwrapMailpanion, unwrapMailpgn, unwrapMailtrack, unwrapMedium, unwrapMimecast, unwrapMozillaOutgoing, unwrapNarrativ, unwrapNicoMs, unwrapOutlookSafelinks, unwrapPartnerAds, unwrapPocket, unwrapPostmark, unwrapProofpointV1, unwrapProofpointV2, unwrapProofpointV3, unwrapPxf, unwrapRecruitics, unwrapRedditOut, unwrapRedirectUrls, unwrapRedirectingat, unwrapSegmentfault, unwrapShareasale, unwrapSjv, unwrapSkimlinks, unwrapSlack, unwrapSmartredirect, unwrapSspai, unwrapSteamLinkfilter, unwrapTelegramIv, unwrapTradedoubler, unwrapTumblr, unwrapValuecommerce, unwrapViglink, unwrapVkAway, unwrapWebArchive, unwrapWrappers, unwrapYahooSearch, unwrapYandexTurbo, unwrapYouTube, unwrapZhihu, updateEmbedPlaceholder, wrapTablesForScroll, youtubeEmbedResolver, youtubeResolveEmbed };
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import {
|
|
1
|
+
import { hasAncestorWithTagName, isBlockElement, isBr, isElement, isText, isWhitespaceText } from "../../common.js";
|
|
2
2
|
//#region src/transforms/dom/convertBreaksToParagraphs.ts
|
|
3
3
|
const processContainersSelector = "body, div, blockquote, td, li, article, section, main, header, footer, aside";
|
|
4
4
|
const preOrCodeTags = new Set(["pre", "code"]);
|
|
@@ -49,11 +49,10 @@ const convertBreaksToParagraphs = () => {
|
|
|
49
49
|
i++;
|
|
50
50
|
}
|
|
51
51
|
} else {
|
|
52
|
-
|
|
53
|
-
if (nodeType === Node.ELEMENT_NODE) {
|
|
52
|
+
if (isElement(child)) {
|
|
54
53
|
current.hasContent = true;
|
|
55
54
|
if (isBlockElement(child)) current.hasBlock = true;
|
|
56
|
-
} else if (
|
|
55
|
+
} else if (isText(child)) {
|
|
57
56
|
if (!current.hasContent && child.textContent?.trim()) current.hasContent = true;
|
|
58
57
|
}
|
|
59
58
|
i++;
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { NodeFilter, hasAncestorWithTagName } from "../../common.js";
|
|
1
|
+
import { NodeFilter, hasAncestorWithTagName, isText } from "../../common.js";
|
|
2
2
|
//#region src/transforms/dom/decodeDoubleEncodedTags.ts
|
|
3
3
|
const opaqueTags = new Set([
|
|
4
4
|
"code",
|
|
@@ -16,13 +16,13 @@ const decodeDoubleEncodedTags = () => {
|
|
|
16
16
|
const walker = document.createTreeWalker(document.body, NodeFilter.SHOW_TEXT);
|
|
17
17
|
let tempDiv = null;
|
|
18
18
|
for (let node = walker.nextNode(); node !== null; node = walker.nextNode()) {
|
|
19
|
-
|
|
20
|
-
const data =
|
|
19
|
+
if (!isText(node)) continue;
|
|
20
|
+
const data = node.data;
|
|
21
21
|
if (!data.includes("<") || !tagInTextRegex.test(data)) continue;
|
|
22
|
-
if (hasAncestorWithTagName(
|
|
22
|
+
if (hasAncestorWithTagName(node, opaqueTags)) continue;
|
|
23
23
|
if (tempDiv === null) tempDiv = document.createElement("div");
|
|
24
24
|
tempDiv.innerHTML = data;
|
|
25
|
-
|
|
25
|
+
node.replaceWith(...tempDiv.childNodes);
|
|
26
26
|
}
|
|
27
27
|
};
|
|
28
28
|
};
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import {
|
|
1
|
+
import { isElement, isText } from "../../common.js";
|
|
2
2
|
import { find } from "linkifyjs";
|
|
3
3
|
//#region src/transforms/dom/linkifyUrls.ts
|
|
4
4
|
const urlProtocolRegex = /^https?:\/\//i;
|
|
@@ -13,9 +13,9 @@ const linkifyIgnoreTags = new Set([
|
|
|
13
13
|
"style"
|
|
14
14
|
]);
|
|
15
15
|
const collectTextNodes = (node, result = []) => {
|
|
16
|
-
if (node
|
|
17
|
-
for (const child of node.childNodes) if (child
|
|
18
|
-
else if (child
|
|
16
|
+
if (isElement(node) && linkifyIgnoreTags.has(node.tagName.toLowerCase())) return result;
|
|
17
|
+
for (const child of node.childNodes) if (isText(child)) result.push(child);
|
|
18
|
+
else if (isElement(child) && !linkifyIgnoreTags.has(child.tagName.toLowerCase())) collectTextNodes(child, result);
|
|
19
19
|
return result;
|
|
20
20
|
};
|
|
21
21
|
const linkifyUrls = () => {
|
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
import { DomTransform } from "../../types.js";
|
|
2
|
+
|
|
3
|
+
//#region src/transforms/dom/markTimestamps.d.ts
|
|
4
|
+
declare const parseTimestampSeconds: (timestamp: string) => number | undefined;
|
|
5
|
+
declare const markTimestamps: DomTransform;
|
|
6
|
+
//#endregion
|
|
7
|
+
export { markTimestamps, parseTimestampSeconds };
|
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
import { isElement, isText } from "../../common.js";
|
|
2
|
+
//#region src/transforms/dom/markTimestamps.ts
|
|
3
|
+
const timestampIgnoreTags = new Set([
|
|
4
|
+
"a",
|
|
5
|
+
"pre",
|
|
6
|
+
"code",
|
|
7
|
+
"kbd",
|
|
8
|
+
"samp",
|
|
9
|
+
"var",
|
|
10
|
+
"script",
|
|
11
|
+
"style"
|
|
12
|
+
]);
|
|
13
|
+
const lineLeadingTimestampRegex = /(^|\n)([ \t]*)((?:\d{1,2}:)?\d{1,2}:\d{2})/gm;
|
|
14
|
+
const numericPartRegex = /^\d+$/;
|
|
15
|
+
const parseTimestampSeconds = (timestamp) => {
|
|
16
|
+
const parts = timestamp.split(":");
|
|
17
|
+
if (!parts.every((part) => numericPartRegex.test(part))) return;
|
|
18
|
+
if (parts.length === 2) {
|
|
19
|
+
const [minutes, seconds] = parts.map(Number);
|
|
20
|
+
if (seconds > 59) return;
|
|
21
|
+
return minutes * 60 + seconds;
|
|
22
|
+
}
|
|
23
|
+
if (parts.length === 3) {
|
|
24
|
+
const [hours, minutes, seconds] = parts.map(Number);
|
|
25
|
+
if (minutes > 59 || seconds > 59) return;
|
|
26
|
+
return hours * 3600 + minutes * 60 + seconds;
|
|
27
|
+
}
|
|
28
|
+
};
|
|
29
|
+
const shouldSkipElement = (element) => {
|
|
30
|
+
return timestampIgnoreTags.has(element.tagName.toLowerCase()) || element.hasAttribute("data-timestamp");
|
|
31
|
+
};
|
|
32
|
+
const collectTextNodes = (node, result = []) => {
|
|
33
|
+
for (const child of node.childNodes) if (isText(child)) result.push(child);
|
|
34
|
+
else if (isElement(child) && !shouldSkipElement(child)) collectTextNodes(child, result);
|
|
35
|
+
return result;
|
|
36
|
+
};
|
|
37
|
+
const markTimestamps = () => {
|
|
38
|
+
return (document) => {
|
|
39
|
+
const textNodes = collectTextNodes(document);
|
|
40
|
+
for (const node of textNodes) {
|
|
41
|
+
const text = node.textContent;
|
|
42
|
+
if (!text?.includes(":")) continue;
|
|
43
|
+
const parts = [];
|
|
44
|
+
let lastIndex = 0;
|
|
45
|
+
for (const match of text.matchAll(lineLeadingTimestampRegex)) {
|
|
46
|
+
const [, lineStart, leading, token] = match;
|
|
47
|
+
const seconds = parseTimestampSeconds(token);
|
|
48
|
+
if (seconds === void 0) continue;
|
|
49
|
+
const tokenStart = (match.index ?? 0) + lineStart.length + leading.length;
|
|
50
|
+
if (tokenStart > lastIndex) parts.push(document.createTextNode(text.slice(lastIndex, tokenStart)));
|
|
51
|
+
const span = document.createElement("span");
|
|
52
|
+
span.setAttribute("data-timestamp", String(seconds));
|
|
53
|
+
span.textContent = token;
|
|
54
|
+
parts.push(span);
|
|
55
|
+
lastIndex = tokenStart + token.length;
|
|
56
|
+
}
|
|
57
|
+
if (parts.length === 0) continue;
|
|
58
|
+
if (lastIndex < text.length) parts.push(document.createTextNode(text.slice(lastIndex)));
|
|
59
|
+
node.replaceWith(...parts);
|
|
60
|
+
}
|
|
61
|
+
};
|
|
62
|
+
};
|
|
63
|
+
//#endregion
|
|
64
|
+
export { markTimestamps, parseTimestampSeconds };
|
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import { isElement, isText } from "../../common.js";
|
|
1
2
|
//#region src/transforms/dom/mergeConsecutiveOneLinerPres.ts
|
|
2
3
|
const trailingBrRegex = /<br\s*\/?>\s*$/i;
|
|
3
4
|
const surroundingNewlinesRegex = /^\n+|\n+$/g;
|
|
@@ -17,8 +18,8 @@ const mergeConsecutiveOneLinerPres = ({ preservedPreClasses }) => {
|
|
|
17
18
|
const run = [pre];
|
|
18
19
|
let sibling = pre.nextSibling;
|
|
19
20
|
while (sibling) {
|
|
20
|
-
if (sibling
|
|
21
|
-
if (sibling
|
|
21
|
+
if (!isElement(sibling) && !isText(sibling)) break;
|
|
22
|
+
if (isText(sibling)) {
|
|
22
23
|
if (sibling.textContent?.trim() !== "") break;
|
|
23
24
|
sibling = sibling.nextSibling;
|
|
24
25
|
continue;
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import {
|
|
1
|
+
import { isComment, isElement, isText, isWhitespaceText } from "../../common.js";
|
|
2
2
|
//#region src/transforms/dom/mergeFragmentedLists.ts
|
|
3
3
|
const mergeFragmentedLists = () => {
|
|
4
4
|
return (document) => {
|
|
@@ -24,9 +24,8 @@ const mergeFragmentedLists = () => {
|
|
|
24
24
|
let between = target.nextSibling;
|
|
25
25
|
while (between && between !== extra) {
|
|
26
26
|
const next = between.nextSibling;
|
|
27
|
-
|
|
28
|
-
if (
|
|
29
|
-
else if (type === Node.TEXT_NODE) target.appendChild(between);
|
|
27
|
+
if (isComment(between)) between.parentNode?.removeChild(between);
|
|
28
|
+
else if (isText(between)) target.appendChild(between);
|
|
30
29
|
between = next;
|
|
31
30
|
}
|
|
32
31
|
while (extra.firstChild) target.appendChild(extra.firstChild);
|
|
@@ -38,14 +37,13 @@ const mergeFragmentedLists = () => {
|
|
|
38
37
|
const nextMergeableSibling = (from, localName) => {
|
|
39
38
|
let sibling = from.nextSibling;
|
|
40
39
|
while (sibling) {
|
|
41
|
-
|
|
42
|
-
if (
|
|
43
|
-
if (type === Node.TEXT_NODE) {
|
|
40
|
+
if (isElement(sibling)) return sibling.localName === localName ? sibling : void 0;
|
|
41
|
+
if (isText(sibling)) {
|
|
44
42
|
if (!isWhitespaceText(sibling)) return;
|
|
45
43
|
sibling = sibling.nextSibling;
|
|
46
44
|
continue;
|
|
47
45
|
}
|
|
48
|
-
if (
|
|
46
|
+
if (isComment(sibling)) {
|
|
49
47
|
sibling = sibling.nextSibling;
|
|
50
48
|
continue;
|
|
51
49
|
}
|
|
@@ -54,16 +52,15 @@ const nextMergeableSibling = (from, localName) => {
|
|
|
54
52
|
};
|
|
55
53
|
const hasOnlyListItemChildren = (list) => {
|
|
56
54
|
for (let child = list.firstChild; child; child = child.nextSibling) {
|
|
57
|
-
|
|
58
|
-
if (type === Node.ELEMENT_NODE) {
|
|
55
|
+
if (isElement(child)) {
|
|
59
56
|
if (child.localName !== "li") return false;
|
|
60
57
|
continue;
|
|
61
58
|
}
|
|
62
|
-
if (
|
|
59
|
+
if (isText(child)) {
|
|
63
60
|
if (!isWhitespaceText(child)) return false;
|
|
64
61
|
continue;
|
|
65
62
|
}
|
|
66
|
-
if (
|
|
63
|
+
if (!isComment(child)) return false;
|
|
67
64
|
}
|
|
68
65
|
return true;
|
|
69
66
|
};
|
|
@@ -1,10 +1,26 @@
|
|
|
1
1
|
import { isBr, isSkippable } from "../../common.js";
|
|
2
|
-
//#region src/transforms/dom/
|
|
3
|
-
const
|
|
2
|
+
//#region src/transforms/dom/stripBoundaryBreaks.ts
|
|
3
|
+
const boundaryBreakSelectors = [
|
|
4
|
+
"p",
|
|
5
|
+
"h1",
|
|
6
|
+
"h2",
|
|
7
|
+
"h3",
|
|
8
|
+
"h4",
|
|
9
|
+
"h5",
|
|
10
|
+
"h6",
|
|
11
|
+
"div",
|
|
12
|
+
"blockquote",
|
|
13
|
+
"li",
|
|
14
|
+
"ul",
|
|
15
|
+
"ol",
|
|
16
|
+
"figcaption",
|
|
17
|
+
"section"
|
|
18
|
+
];
|
|
19
|
+
const stripBoundaryBreaks = () => {
|
|
4
20
|
return (document) => {
|
|
5
|
-
const
|
|
6
|
-
for (const
|
|
7
|
-
let cursor =
|
|
21
|
+
const elements = document.querySelectorAll(boundaryBreakSelectors.join(", "));
|
|
22
|
+
for (const element of elements) {
|
|
23
|
+
let cursor = element.firstChild;
|
|
8
24
|
let leadingHasBr = false;
|
|
9
25
|
let leadingEnd = null;
|
|
10
26
|
while (cursor && isSkippable(cursor)) {
|
|
@@ -13,7 +29,7 @@ const stripParagraphBoundaryBreaks = () => {
|
|
|
13
29
|
cursor = cursor.nextSibling;
|
|
14
30
|
}
|
|
15
31
|
if (leadingHasBr) {
|
|
16
|
-
let node =
|
|
32
|
+
let node = element.firstChild;
|
|
17
33
|
while (node) {
|
|
18
34
|
const next = node.nextSibling;
|
|
19
35
|
node.remove();
|
|
@@ -21,7 +37,7 @@ const stripParagraphBoundaryBreaks = () => {
|
|
|
21
37
|
node = next;
|
|
22
38
|
}
|
|
23
39
|
}
|
|
24
|
-
cursor =
|
|
40
|
+
cursor = element.lastChild;
|
|
25
41
|
let trailingHasBr = false;
|
|
26
42
|
let trailingEnd = null;
|
|
27
43
|
while (cursor && isSkippable(cursor)) {
|
|
@@ -30,7 +46,7 @@ const stripParagraphBoundaryBreaks = () => {
|
|
|
30
46
|
cursor = cursor.previousSibling;
|
|
31
47
|
}
|
|
32
48
|
if (trailingHasBr) {
|
|
33
|
-
let node =
|
|
49
|
+
let node = element.lastChild;
|
|
34
50
|
while (node) {
|
|
35
51
|
const prev = node.previousSibling;
|
|
36
52
|
node.remove();
|
|
@@ -42,4 +58,4 @@ const stripParagraphBoundaryBreaks = () => {
|
|
|
42
58
|
};
|
|
43
59
|
};
|
|
44
60
|
//#endregion
|
|
45
|
-
export {
|
|
61
|
+
export { stripBoundaryBreaks };
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import {
|
|
1
|
+
import { isElement, isText } from "../../common.js";
|
|
2
2
|
//#region src/transforms/dom/stripEmptyTags.ts
|
|
3
3
|
const preserveWhenEmpty = new Set([
|
|
4
4
|
"iframe",
|
|
@@ -33,12 +33,11 @@ const stripEmptyTags = () => {
|
|
|
33
33
|
let hasContent = false;
|
|
34
34
|
for (let j = 0; j < childCount; j++) {
|
|
35
35
|
const child = childNodes[j];
|
|
36
|
-
|
|
37
|
-
if (nodeType === Node.ELEMENT_NODE) {
|
|
36
|
+
if (isElement(child)) {
|
|
38
37
|
hasContent = true;
|
|
39
38
|
break;
|
|
40
39
|
}
|
|
41
|
-
if (
|
|
40
|
+
if (isText(child) && child.data.trim().length > 0) {
|
|
42
41
|
hasContent = true;
|
|
43
42
|
break;
|
|
44
43
|
}
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import {
|
|
1
|
+
import { isText } from "../../common.js";
|
|
2
2
|
//#region src/transforms/dom/unwrapDoublyNestedLists.ts
|
|
3
3
|
const unwrapDoublyNestedLists = () => {
|
|
4
4
|
return (document) => {
|
|
@@ -21,7 +21,7 @@ const unwrapDoublyNestedLists = () => {
|
|
|
21
21
|
}
|
|
22
22
|
if (elementDisqualified || inner === null) continue;
|
|
23
23
|
let textDisqualified = false;
|
|
24
|
-
for (let node = wrapper.firstChild; node !== null; node = node.nextSibling) if (node
|
|
24
|
+
for (let node = wrapper.firstChild; node !== null; node = node.nextSibling) if (isText(node) && node.textContent?.trim()) {
|
|
25
25
|
textDisqualified = true;
|
|
26
26
|
break;
|
|
27
27
|
}
|
|
@@ -30,7 +30,7 @@ const unwrapDoublyNestedLists = () => {
|
|
|
30
30
|
if (parent === null) continue;
|
|
31
31
|
for (let node = wrapper.firstChild; node !== null;) {
|
|
32
32
|
const next = node.nextSibling;
|
|
33
|
-
if (node
|
|
33
|
+
if (isText(node) || node === inner) parent.insertBefore(node, outer);
|
|
34
34
|
node = next;
|
|
35
35
|
}
|
|
36
36
|
outer.remove();
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
import { isComment, isElement, isWhitespaceText } from "../../common.js";
|
|
2
|
+
//#region src/transforms/dom/unwrapHeadingBold.ts
|
|
3
|
+
const headingSelector = "h1, h2, h3, h4, h5, h6";
|
|
4
|
+
const boldTags = new Set(["b", "strong"]);
|
|
5
|
+
const soleContentElement = (heading) => {
|
|
6
|
+
let found = null;
|
|
7
|
+
for (const child of heading.childNodes) {
|
|
8
|
+
if (isWhitespaceText(child) || isComment(child)) continue;
|
|
9
|
+
if (found || !isElement(child)) return null;
|
|
10
|
+
found = child;
|
|
11
|
+
}
|
|
12
|
+
return found;
|
|
13
|
+
};
|
|
14
|
+
const unwrapHeadingBold = () => {
|
|
15
|
+
return (document) => {
|
|
16
|
+
const headings = document.querySelectorAll(headingSelector);
|
|
17
|
+
for (const heading of headings) {
|
|
18
|
+
let bold = soleContentElement(heading);
|
|
19
|
+
while (bold && boldTags.has(bold.localName)) {
|
|
20
|
+
while (bold.firstChild) heading.insertBefore(bold.firstChild, bold);
|
|
21
|
+
bold.remove();
|
|
22
|
+
bold = soleContentElement(heading);
|
|
23
|
+
}
|
|
24
|
+
}
|
|
25
|
+
};
|
|
26
|
+
};
|
|
27
|
+
//#endregion
|
|
28
|
+
export { unwrapHeadingBold };
|
|
@@ -7,7 +7,11 @@ const wrapperTags = new Set([
|
|
|
7
7
|
"header",
|
|
8
8
|
"footer"
|
|
9
9
|
]);
|
|
10
|
-
const preservedPrefixes = [
|
|
10
|
+
const preservedPrefixes = [
|
|
11
|
+
"data-embed",
|
|
12
|
+
"data-bookmark",
|
|
13
|
+
"data-table"
|
|
14
|
+
];
|
|
11
15
|
const hasPreservedAttribute = (element) => {
|
|
12
16
|
const attributes = element.attributes;
|
|
13
17
|
for (let i = 0, n = attributes.length; i < n; i++) {
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
import { hasAncestorWithTagName } from "../../common.js";
|
|
2
|
+
//#region src/transforms/dom/wrapTablesForScroll.ts
|
|
3
|
+
const tableTags = new Set(["table"]);
|
|
4
|
+
const wrapTablesForScroll = () => {
|
|
5
|
+
return (document) => {
|
|
6
|
+
const tables = document.querySelectorAll("table");
|
|
7
|
+
for (const table of tables) {
|
|
8
|
+
const parent = table.parentNode;
|
|
9
|
+
if (!parent) continue;
|
|
10
|
+
if (hasAncestorWithTagName(table, tableTags)) continue;
|
|
11
|
+
if (table.parentElement?.hasAttribute("data-table")) continue;
|
|
12
|
+
const wrapper = document.createElement("div");
|
|
13
|
+
wrapper.setAttribute("data-table", "");
|
|
14
|
+
parent.insertBefore(wrapper, table);
|
|
15
|
+
wrapper.appendChild(table);
|
|
16
|
+
}
|
|
17
|
+
};
|
|
18
|
+
};
|
|
19
|
+
//#endregion
|
|
20
|
+
export { wrapTablesForScroll };
|
package/package.json
CHANGED