feedsweep 2.1.0 → 2.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +3 -0
- package/dist/defaults.js +7 -1
- package/dist/index.d.ts +4 -1
- package/dist/index.js +4 -1
- package/dist/transforms/dom/stripBoundaryBreaks.js +50 -37
- package/dist/transforms/dom/stripEmptyTags.js +12 -3
- package/dist/transforms/dom/unwrapHeadingBold.d.ts +6 -0
- package/dist/transforms/dom/unwrapHeadingBold.js +28 -0
- package/dist/transforms/dom/unwrapWrappers.js +5 -1
- package/dist/transforms/dom/wrapBareInlineInParagraphs.d.ts +6 -0
- package/dist/transforms/dom/wrapBareInlineInParagraphs.js +62 -0
- package/dist/transforms/dom/wrapTablesForScroll.d.ts +6 -0
- package/dist/transforms/dom/wrapTablesForScroll.js +20 -0
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -42,6 +42,7 @@ Inventory of every transform exported from the package. Most are enabled by defa
|
|
|
42
42
|
| `stripBoundaryBreaks` | Remove `<br>` tags adjacent to block-element boundaries (paragraphs, headings, divs, list items, blockquotes, …) |
|
|
43
43
|
| `stripDuplicateTitleHeading` | Remove first `<h1>`–`<h6>` matching article title |
|
|
44
44
|
| `demoteHeadings` | Shift every heading down by one level (`<h1>`→`<h2>`, …, `<h5>`→`<h6>`) when the body contains an `<h1>`, so it sits below the reader's own page title |
|
|
45
|
+
| `unwrapHeadingBold` | Unwrap `<b>`/`<strong>` that wraps the entire content of a heading (redundant — headings are already bold) |
|
|
45
46
|
| `unwrapRedirectUrls` | Remove Google/Bing/Facebook/etc. redirect wrappers |
|
|
46
47
|
| `stripDeadAnchors` | Unwrap `<a>` with empty, `#`, or `javascript:` href |
|
|
47
48
|
| `stripInertElements` | Remove platform chrome and dead placeholders — subscribe widgets, share buttons, related-posts widgets, ad slots (AdSense / AdThrive), author bio blocks, email preheaders, Substack image controls, and Drupal `<drupal-render-placeholder>` tags. Pass `inertSelectors` to extend or replace |
|
|
@@ -49,6 +50,7 @@ Inventory of every transform exported from the package. Most are enabled by defa
|
|
|
49
50
|
| `unwrapEmojiImages` | Replace WordPress/Facebook/Twitter/GitHub emoji `<img>` tags with their alt-text glyph |
|
|
50
51
|
| `stripTrackingParams` | Remove UTM and other tracking parameters |
|
|
51
52
|
| `convertBreaksToParagraphs` | Convert `<br><br>` runs into semantic `<p>` blocks |
|
|
53
|
+
| `wrapBareInlineInParagraphs` | Wrap bare inline runs (delimited by block-level children) in semantic `<p>` blocks |
|
|
52
54
|
| `injectEnclosures` | Inject feed enclosures into content as native `<audio>`/`<video>` or iframe placeholders |
|
|
53
55
|
| `replaceEmbedsWithPlaceholders` | Convert `<iframe>` to embed placeholders |
|
|
54
56
|
| `convertBookmarkCards` | Convert link-preview cards into `data-bookmark-*` placeholders via a registry of per-provider `BookmarkResolver`s (`defaultBookmarkResolvers`: Ghost `kg-bookmark-card`, Substack `embedded-publication-wrap`). Extend via `bookmarkResolvers` |
|
|
@@ -57,6 +59,7 @@ Inventory of every transform exported from the package. Most are enabled by defa
|
|
|
57
59
|
| `resolveRelativeUrls` | Convert relative URLs to absolute using base URL |
|
|
58
60
|
| `unwrapWrappers` | Remove outer `<div>`, `<article>`, `<section>` wrappers |
|
|
59
61
|
| `unwrapDoublyNestedLists` | Unwrap `<ul>`/`<ol>` that wrap a single `<li>` containing a same-type list |
|
|
62
|
+
| `wrapTablesForScroll` | Wrap each top-level `<table>` in a `<div data-table>` as a horizontal-scroll container |
|
|
60
63
|
| `mergeFragmentedLists` | Merge consecutive sibling `<ul>` / `<ol>` lists with matching attributes |
|
|
61
64
|
| `paragraphizePlainText` | Wrap plain text in `<p>` tags |
|
|
62
65
|
| `stripOversizedBase64Sources` | Drop base64 `src`/`srcset`/`poster` payloads larger than 50 KB before parsing |
|
package/dist/defaults.js
CHANGED
|
@@ -28,8 +28,11 @@ import { stripTrackingParams } from "./transforms/dom/stripTrackingParams.js";
|
|
|
28
28
|
import { trimPreWhitespace } from "./transforms/dom/trimPreWhitespace.js";
|
|
29
29
|
import { unwrapDoublyNestedLists } from "./transforms/dom/unwrapDoublyNestedLists.js";
|
|
30
30
|
import { unwrapEmojiImages } from "./transforms/dom/unwrapEmojiImages.js";
|
|
31
|
+
import { unwrapHeadingBold } from "./transforms/dom/unwrapHeadingBold.js";
|
|
31
32
|
import { unwrapRedirectUrls } from "./transforms/dom/unwrapRedirectUrls.js";
|
|
32
33
|
import { unwrapWrappers } from "./transforms/dom/unwrapWrappers.js";
|
|
34
|
+
import { wrapBareInlineInParagraphs } from "./transforms/dom/wrapBareInlineInParagraphs.js";
|
|
35
|
+
import { wrapTablesForScroll } from "./transforms/dom/wrapTablesForScroll.js";
|
|
33
36
|
import { paragraphizePlainText } from "./transforms/string/paragraphizePlainText.js";
|
|
34
37
|
import { stripControlChars } from "./transforms/string/stripControlChars.js";
|
|
35
38
|
import { stripOversizedBase64Sources } from "./transforms/string/stripOversizedBase64Sources.js";
|
|
@@ -60,6 +63,7 @@ const defaultDomTransforms = [
|
|
|
60
63
|
unwrapDoublyNestedLists,
|
|
61
64
|
stripDuplicateTitleHeading,
|
|
62
65
|
demoteHeadings,
|
|
66
|
+
unwrapHeadingBold,
|
|
63
67
|
fixLazyImages,
|
|
64
68
|
stripInertElements,
|
|
65
69
|
resolveRelativeUrls,
|
|
@@ -70,6 +74,7 @@ const defaultDomTransforms = [
|
|
|
70
74
|
removeTrackingPixels,
|
|
71
75
|
unwrapEmojiImages,
|
|
72
76
|
convertBreaksToParagraphs,
|
|
77
|
+
wrapBareInlineInParagraphs,
|
|
73
78
|
stripInterBlockBreaks,
|
|
74
79
|
stripBoundaryBreaks,
|
|
75
80
|
mergeFragmentedLists,
|
|
@@ -82,8 +87,9 @@ const defaultDomTransforms = [
|
|
|
82
87
|
replaceEmbedsWithPlaceholders,
|
|
83
88
|
injectEnclosures,
|
|
84
89
|
proxyAssetUrls,
|
|
90
|
+
stripEmptyTags,
|
|
85
91
|
unwrapWrappers,
|
|
86
|
-
|
|
92
|
+
wrapTablesForScroll
|
|
87
93
|
];
|
|
88
94
|
const defaultEmbedResolvers = [youtubeEmbedResolver];
|
|
89
95
|
const defaultBookmarkResolvers = [ghostBookmarkResolver, substackBookmarkResolver];
|
package/dist/index.d.ts
CHANGED
|
@@ -32,8 +32,11 @@ import { stripTrackingParams } from "./transforms/dom/stripTrackingParams.js";
|
|
|
32
32
|
import { trimPreWhitespace } from "./transforms/dom/trimPreWhitespace.js";
|
|
33
33
|
import { unwrapDoublyNestedLists } from "./transforms/dom/unwrapDoublyNestedLists.js";
|
|
34
34
|
import { unwrapEmojiImages } from "./transforms/dom/unwrapEmojiImages.js";
|
|
35
|
+
import { unwrapHeadingBold } from "./transforms/dom/unwrapHeadingBold.js";
|
|
35
36
|
import { extractRedirectTarget, unwrapRedirectUrls } from "./transforms/dom/unwrapRedirectUrls.js";
|
|
36
37
|
import { unwrapWrappers } from "./transforms/dom/unwrapWrappers.js";
|
|
38
|
+
import { wrapBareInlineInParagraphs } from "./transforms/dom/wrapBareInlineInParagraphs.js";
|
|
39
|
+
import { wrapTablesForScroll } from "./transforms/dom/wrapTablesForScroll.js";
|
|
37
40
|
import { paragraphizePlainText } from "./transforms/string/paragraphizePlainText.js";
|
|
38
41
|
import { stripControlChars } from "./transforms/string/stripControlChars.js";
|
|
39
42
|
import { stripOversizedBase64Sources } from "./transforms/string/stripOversizedBase64Sources.js";
|
|
@@ -117,4 +120,4 @@ import { ParamExtractorConfig, chooseBaseUrl, coerceNumber, createParamExtractor
|
|
|
117
120
|
//#region src/index.d.ts
|
|
118
121
|
declare const transformContent: (html: string, options: TransformContentOptions) => Promise<string>;
|
|
119
122
|
//#endregion
|
|
120
|
-
export { type AssetProxyFn, type AssetType, type BookmarkResolver, type BookmarkResolverResult, type DomTransform, type EmbedResolver, type EmbedResolverResult, type Enclosure, type EnrichEmbedFn, type MaybePromise, type ParamExtractorConfig, type ParseHtmlFn, type ResolveUrlFn, type StringTransform, type TransformContentOptions, type TransformContext, applyDomTransforms, applyStringTransforms, chooseBaseUrl, coerceNumber, composeThumbnailUrl, convertBookmarkCards, convertBreaksToParagraphs, createBookmarkPlaceholder, createEmbedPlaceholder, createParamExtractor, createPlaceholder, decodeDoubleEncodedTags, defaultResolveUrlFn, demoteHeadings, detectLanguage, enrichEmbedPlaceholders, extractRedirectTarget, extractVideoId, fixLazyImages, ghostBookmarkResolver, highlightCode, injectEnclosures, isSafeThumbnailUrl, linkifyUrls, markTimestamps, mergeConsecutiveOneLinerPres, mergeFragmentedLists, normalizeEmbedFields, paragraphizePlainText, parseTimestampSeconds, proxyAssetUrls, removeTrackingPixels, replaceEmbedsWithPlaceholders, replacePreLineBreaks, resolveRelativeUrls, stripBoundaryBreaks, stripComments, stripControlChars, stripDeadAnchors, stripDuplicateTitleHeading, stripEmptyTags, stripInertElements, stripInterBlockBreaks, stripOversizedBase64Sources, stripTrackingParams, substackBookmarkResolver, transformContent, trimPreWhitespace, unwrapAceml, unwrapAdjust, unwrapAmazonAffiliate, unwrapAmpCache, unwrapAwin, unwrapBing, unwrapCdataComments, unwrapCjNetwork, unwrapDigidip, unwrapDisqus, unwrapDouban, unwrapDoublyNestedLists, unwrapDuckduckgo, unwrapEbayRover, unwrapEffiliation, unwrapEmbedly, unwrapEmojiImages, unwrapFacebookShim, unwrapFeedsportal, unwrapFirebaseDynamicLinks, unwrapFlipboard, unwrapGateSc, unwrapGeoriot, unwrapGitee, unwrapGoogle, unwrapGoogleAmpViewer, unwrapGoogleNews, unwrapGoogleNewsModern, unwrapGoogleScholar, unwrapGoogleTranslate, unwrapHashnode, unwrapIcptrack, unwrapIdealoPartner, unwrapInstagramShim, unwrapJianshuGo, unwrapJuejin, unwrapLeverAnalytics, unwrapLinksynergy, unwrapMailchimp, unwrapMailpanion, unwrapMailpgn, unwrapMailtrack, unwrapMedium, unwrapMimecast, unwrapMozillaOutgoing, unwrapNarrativ, unwrapNicoMs, unwrapOutlookSafelinks, unwrapPartnerAds, unwrapPocket, unwrapPostmark, unwrapProofpointV1, unwrapProofpointV2, unwrapProofpointV3, unwrapPxf, unwrapRecruitics, unwrapRedditOut, unwrapRedirectUrls, unwrapRedirectingat, unwrapSegmentfault, unwrapShareasale, unwrapSjv, unwrapSkimlinks, unwrapSlack, unwrapSmartredirect, unwrapSspai, unwrapSteamLinkfilter, unwrapTelegramIv, unwrapTradedoubler, unwrapTumblr, unwrapValuecommerce, unwrapViglink, unwrapVkAway, unwrapWebArchive, unwrapWrappers, unwrapYahooSearch, unwrapYandexTurbo, unwrapYouTube, unwrapZhihu, updateEmbedPlaceholder, youtubeEmbedResolver, youtubeResolveEmbed };
|
|
123
|
+
export { type AssetProxyFn, type AssetType, type BookmarkResolver, type BookmarkResolverResult, type DomTransform, type EmbedResolver, type EmbedResolverResult, type Enclosure, type EnrichEmbedFn, type MaybePromise, type ParamExtractorConfig, type ParseHtmlFn, type ResolveUrlFn, type StringTransform, type TransformContentOptions, type TransformContext, applyDomTransforms, applyStringTransforms, chooseBaseUrl, coerceNumber, composeThumbnailUrl, convertBookmarkCards, convertBreaksToParagraphs, createBookmarkPlaceholder, createEmbedPlaceholder, createParamExtractor, createPlaceholder, decodeDoubleEncodedTags, defaultResolveUrlFn, demoteHeadings, detectLanguage, enrichEmbedPlaceholders, extractRedirectTarget, extractVideoId, fixLazyImages, ghostBookmarkResolver, highlightCode, injectEnclosures, isSafeThumbnailUrl, linkifyUrls, markTimestamps, mergeConsecutiveOneLinerPres, mergeFragmentedLists, normalizeEmbedFields, paragraphizePlainText, parseTimestampSeconds, proxyAssetUrls, removeTrackingPixels, replaceEmbedsWithPlaceholders, replacePreLineBreaks, resolveRelativeUrls, stripBoundaryBreaks, stripComments, stripControlChars, stripDeadAnchors, stripDuplicateTitleHeading, stripEmptyTags, stripInertElements, stripInterBlockBreaks, stripOversizedBase64Sources, stripTrackingParams, substackBookmarkResolver, transformContent, trimPreWhitespace, unwrapAceml, unwrapAdjust, unwrapAmazonAffiliate, unwrapAmpCache, unwrapAwin, unwrapBing, unwrapCdataComments, unwrapCjNetwork, unwrapDigidip, unwrapDisqus, unwrapDouban, unwrapDoublyNestedLists, unwrapDuckduckgo, unwrapEbayRover, unwrapEffiliation, unwrapEmbedly, unwrapEmojiImages, unwrapFacebookShim, unwrapFeedsportal, unwrapFirebaseDynamicLinks, unwrapFlipboard, unwrapGateSc, unwrapGeoriot, unwrapGitee, unwrapGoogle, unwrapGoogleAmpViewer, unwrapGoogleNews, unwrapGoogleNewsModern, unwrapGoogleScholar, unwrapGoogleTranslate, unwrapHashnode, unwrapHeadingBold, unwrapIcptrack, unwrapIdealoPartner, unwrapInstagramShim, unwrapJianshuGo, unwrapJuejin, unwrapLeverAnalytics, unwrapLinksynergy, unwrapMailchimp, unwrapMailpanion, unwrapMailpgn, unwrapMailtrack, unwrapMedium, unwrapMimecast, unwrapMozillaOutgoing, unwrapNarrativ, unwrapNicoMs, unwrapOutlookSafelinks, unwrapPartnerAds, unwrapPocket, unwrapPostmark, unwrapProofpointV1, unwrapProofpointV2, unwrapProofpointV3, unwrapPxf, unwrapRecruitics, unwrapRedditOut, unwrapRedirectUrls, unwrapRedirectingat, unwrapSegmentfault, unwrapShareasale, unwrapSjv, unwrapSkimlinks, unwrapSlack, unwrapSmartredirect, unwrapSspai, unwrapSteamLinkfilter, unwrapTelegramIv, unwrapTradedoubler, unwrapTumblr, unwrapValuecommerce, unwrapViglink, unwrapVkAway, unwrapWebArchive, unwrapWrappers, unwrapYahooSearch, unwrapYandexTurbo, unwrapYouTube, unwrapZhihu, updateEmbedPlaceholder, wrapBareInlineInParagraphs, wrapTablesForScroll, youtubeEmbedResolver, youtubeResolveEmbed };
|
package/dist/index.js
CHANGED
|
@@ -30,8 +30,11 @@ import { stripTrackingParams } from "./transforms/dom/stripTrackingParams.js";
|
|
|
30
30
|
import { trimPreWhitespace } from "./transforms/dom/trimPreWhitespace.js";
|
|
31
31
|
import { unwrapDoublyNestedLists } from "./transforms/dom/unwrapDoublyNestedLists.js";
|
|
32
32
|
import { unwrapEmojiImages } from "./transforms/dom/unwrapEmojiImages.js";
|
|
33
|
+
import { unwrapHeadingBold } from "./transforms/dom/unwrapHeadingBold.js";
|
|
33
34
|
import { extractRedirectTarget, unwrapRedirectUrls } from "./transforms/dom/unwrapRedirectUrls.js";
|
|
34
35
|
import { unwrapWrappers } from "./transforms/dom/unwrapWrappers.js";
|
|
36
|
+
import { wrapBareInlineInParagraphs } from "./transforms/dom/wrapBareInlineInParagraphs.js";
|
|
37
|
+
import { wrapTablesForScroll } from "./transforms/dom/wrapTablesForScroll.js";
|
|
35
38
|
import { paragraphizePlainText } from "./transforms/string/paragraphizePlainText.js";
|
|
36
39
|
import { stripControlChars } from "./transforms/string/stripControlChars.js";
|
|
37
40
|
import { stripOversizedBase64Sources } from "./transforms/string/stripOversizedBase64Sources.js";
|
|
@@ -138,4 +141,4 @@ const transformContent = async (html, options) => {
|
|
|
138
141
|
return await applyDomTransforms(await options.parseHtmlFn(afterString), domFns.map((transform) => transform(context)));
|
|
139
142
|
};
|
|
140
143
|
//#endregion
|
|
141
|
-
export { applyDomTransforms, applyStringTransforms, chooseBaseUrl, coerceNumber, composeThumbnailUrl, convertBookmarkCards, convertBreaksToParagraphs, createBookmarkPlaceholder, createEmbedPlaceholder, createParamExtractor, createPlaceholder, decodeDoubleEncodedTags, defaultResolveUrlFn, demoteHeadings, detectLanguage, enrichEmbedPlaceholders, extractRedirectTarget, extractVideoId, fixLazyImages, ghostBookmarkResolver, highlightCode, injectEnclosures, isSafeThumbnailUrl, linkifyUrls, markTimestamps, mergeConsecutiveOneLinerPres, mergeFragmentedLists, normalizeEmbedFields, paragraphizePlainText, parseTimestampSeconds, proxyAssetUrls, removeTrackingPixels, replaceEmbedsWithPlaceholders, replacePreLineBreaks, resolveRelativeUrls, stripBoundaryBreaks, stripComments, stripControlChars, stripDeadAnchors, stripDuplicateTitleHeading, stripEmptyTags, stripInertElements, stripInterBlockBreaks, stripOversizedBase64Sources, stripTrackingParams, substackBookmarkResolver, transformContent, trimPreWhitespace, unwrapAceml, unwrapAdjust, unwrapAmazonAffiliate, unwrapAmpCache, unwrapAwin, unwrapBing, unwrapCdataComments, unwrapCjNetwork, unwrapDigidip, unwrapDisqus, unwrapDouban, unwrapDoublyNestedLists, unwrapDuckduckgo, unwrapEbayRover, unwrapEffiliation, unwrapEmbedly, unwrapEmojiImages, unwrapFacebookShim, unwrapFeedsportal, unwrapFirebaseDynamicLinks, unwrapFlipboard, unwrapGateSc, unwrapGeoriot, unwrapGitee, unwrapGoogle, unwrapGoogleAmpViewer, unwrapGoogleNews, unwrapGoogleNewsModern, unwrapGoogleScholar, unwrapGoogleTranslate, unwrapHashnode, unwrapIcptrack, unwrapIdealoPartner, unwrapInstagramShim, unwrapJianshuGo, unwrapJuejin, unwrapLeverAnalytics, unwrapLinksynergy, unwrapMailchimp, unwrapMailpanion, unwrapMailpgn, unwrapMailtrack, unwrapMedium, unwrapMimecast, unwrapMozillaOutgoing, unwrapNarrativ, unwrapNicoMs, unwrapOutlookSafelinks, unwrapPartnerAds, unwrapPocket, unwrapPostmark, unwrapProofpointV1, unwrapProofpointV2, unwrapProofpointV3, unwrapPxf, unwrapRecruitics, unwrapRedditOut, unwrapRedirectUrls, unwrapRedirectingat, unwrapSegmentfault, unwrapShareasale, unwrapSjv, unwrapSkimlinks, unwrapSlack, unwrapSmartredirect, unwrapSspai, unwrapSteamLinkfilter, unwrapTelegramIv, unwrapTradedoubler, unwrapTumblr, unwrapValuecommerce, unwrapViglink, unwrapVkAway, unwrapWebArchive, unwrapWrappers, unwrapYahooSearch, unwrapYandexTurbo, unwrapYouTube, unwrapZhihu, updateEmbedPlaceholder, youtubeEmbedResolver, youtubeResolveEmbed };
|
|
144
|
+
export { applyDomTransforms, applyStringTransforms, chooseBaseUrl, coerceNumber, composeThumbnailUrl, convertBookmarkCards, convertBreaksToParagraphs, createBookmarkPlaceholder, createEmbedPlaceholder, createParamExtractor, createPlaceholder, decodeDoubleEncodedTags, defaultResolveUrlFn, demoteHeadings, detectLanguage, enrichEmbedPlaceholders, extractRedirectTarget, extractVideoId, fixLazyImages, ghostBookmarkResolver, highlightCode, injectEnclosures, isSafeThumbnailUrl, linkifyUrls, markTimestamps, mergeConsecutiveOneLinerPres, mergeFragmentedLists, normalizeEmbedFields, paragraphizePlainText, parseTimestampSeconds, proxyAssetUrls, removeTrackingPixels, replaceEmbedsWithPlaceholders, replacePreLineBreaks, resolveRelativeUrls, stripBoundaryBreaks, stripComments, stripControlChars, stripDeadAnchors, stripDuplicateTitleHeading, stripEmptyTags, stripInertElements, stripInterBlockBreaks, stripOversizedBase64Sources, stripTrackingParams, substackBookmarkResolver, transformContent, trimPreWhitespace, unwrapAceml, unwrapAdjust, unwrapAmazonAffiliate, unwrapAmpCache, unwrapAwin, unwrapBing, unwrapCdataComments, unwrapCjNetwork, unwrapDigidip, unwrapDisqus, unwrapDouban, unwrapDoublyNestedLists, unwrapDuckduckgo, unwrapEbayRover, unwrapEffiliation, unwrapEmbedly, unwrapEmojiImages, unwrapFacebookShim, unwrapFeedsportal, unwrapFirebaseDynamicLinks, unwrapFlipboard, unwrapGateSc, unwrapGeoriot, unwrapGitee, unwrapGoogle, unwrapGoogleAmpViewer, unwrapGoogleNews, unwrapGoogleNewsModern, unwrapGoogleScholar, unwrapGoogleTranslate, unwrapHashnode, unwrapHeadingBold, unwrapIcptrack, unwrapIdealoPartner, unwrapInstagramShim, unwrapJianshuGo, unwrapJuejin, unwrapLeverAnalytics, unwrapLinksynergy, unwrapMailchimp, unwrapMailpanion, unwrapMailpgn, unwrapMailtrack, unwrapMedium, unwrapMimecast, unwrapMozillaOutgoing, unwrapNarrativ, unwrapNicoMs, unwrapOutlookSafelinks, unwrapPartnerAds, unwrapPocket, unwrapPostmark, unwrapProofpointV1, unwrapProofpointV2, unwrapProofpointV3, unwrapPxf, unwrapRecruitics, unwrapRedditOut, unwrapRedirectUrls, unwrapRedirectingat, unwrapSegmentfault, unwrapShareasale, unwrapSjv, unwrapSkimlinks, unwrapSlack, unwrapSmartredirect, unwrapSspai, unwrapSteamLinkfilter, unwrapTelegramIv, unwrapTradedoubler, unwrapTumblr, unwrapValuecommerce, unwrapViglink, unwrapVkAway, unwrapWebArchive, unwrapWrappers, unwrapYahooSearch, unwrapYandexTurbo, unwrapYouTube, unwrapZhihu, updateEmbedPlaceholder, wrapBareInlineInParagraphs, wrapTablesForScroll, youtubeEmbedResolver, youtubeResolveEmbed };
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { isBr,
|
|
1
|
+
import { isBlockElement, isBr, isComment, isElement, isWhitespaceText } from "../../common.js";
|
|
2
2
|
//#region src/transforms/dom/stripBoundaryBreaks.ts
|
|
3
3
|
const boundaryBreakSelectors = [
|
|
4
4
|
"p",
|
|
@@ -16,44 +16,57 @@ const boundaryBreakSelectors = [
|
|
|
16
16
|
"figcaption",
|
|
17
17
|
"section"
|
|
18
18
|
];
|
|
19
|
+
const isInlineWrapper = (node) => {
|
|
20
|
+
return isElement(node) && !isBlockElement(node) && !isBr(node);
|
|
21
|
+
};
|
|
22
|
+
const isVisuallyEmpty = (node) => {
|
|
23
|
+
for (let child = node.firstChild; child; child = child.nextSibling) {
|
|
24
|
+
if (isWhitespaceText(child) || isComment(child) || isBr(child)) continue;
|
|
25
|
+
return false;
|
|
26
|
+
}
|
|
27
|
+
return true;
|
|
28
|
+
};
|
|
29
|
+
const stripEdge = (container, trailing) => {
|
|
30
|
+
let node = trailing ? container.lastChild : container.firstChild;
|
|
31
|
+
let sawBr = false;
|
|
32
|
+
let pending = [];
|
|
33
|
+
const removePending = () => {
|
|
34
|
+
if (sawBr) for (const item of pending) item.remove();
|
|
35
|
+
};
|
|
36
|
+
while (node) {
|
|
37
|
+
const next = trailing ? node.previousSibling : node.nextSibling;
|
|
38
|
+
if (isWhitespaceText(node) || isComment(node)) {
|
|
39
|
+
pending.push(node);
|
|
40
|
+
node = next;
|
|
41
|
+
continue;
|
|
42
|
+
}
|
|
43
|
+
if (isBr(node)) {
|
|
44
|
+
sawBr = true;
|
|
45
|
+
pending.push(node);
|
|
46
|
+
node = next;
|
|
47
|
+
continue;
|
|
48
|
+
}
|
|
49
|
+
if (isInlineWrapper(node)) {
|
|
50
|
+
removePending();
|
|
51
|
+
pending = [];
|
|
52
|
+
sawBr = false;
|
|
53
|
+
stripEdge(node, trailing);
|
|
54
|
+
if (isVisuallyEmpty(node)) {
|
|
55
|
+
node = next;
|
|
56
|
+
continue;
|
|
57
|
+
}
|
|
58
|
+
return;
|
|
59
|
+
}
|
|
60
|
+
removePending();
|
|
61
|
+
return;
|
|
62
|
+
}
|
|
63
|
+
removePending();
|
|
64
|
+
};
|
|
19
65
|
const stripBoundaryBreaks = () => {
|
|
20
66
|
return (document) => {
|
|
21
|
-
const
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
let leadingHasBr = false;
|
|
25
|
-
let leadingEnd = null;
|
|
26
|
-
while (cursor && isSkippable(cursor)) {
|
|
27
|
-
if (!leadingHasBr && isBr(cursor)) leadingHasBr = true;
|
|
28
|
-
leadingEnd = cursor;
|
|
29
|
-
cursor = cursor.nextSibling;
|
|
30
|
-
}
|
|
31
|
-
if (leadingHasBr) {
|
|
32
|
-
let node = element.firstChild;
|
|
33
|
-
while (node) {
|
|
34
|
-
const next = node.nextSibling;
|
|
35
|
-
node.remove();
|
|
36
|
-
if (node === leadingEnd) break;
|
|
37
|
-
node = next;
|
|
38
|
-
}
|
|
39
|
-
}
|
|
40
|
-
cursor = element.lastChild;
|
|
41
|
-
let trailingHasBr = false;
|
|
42
|
-
let trailingEnd = null;
|
|
43
|
-
while (cursor && isSkippable(cursor)) {
|
|
44
|
-
if (!trailingHasBr && isBr(cursor)) trailingHasBr = true;
|
|
45
|
-
trailingEnd = cursor;
|
|
46
|
-
cursor = cursor.previousSibling;
|
|
47
|
-
}
|
|
48
|
-
if (trailingHasBr) {
|
|
49
|
-
let node = element.lastChild;
|
|
50
|
-
while (node) {
|
|
51
|
-
const prev = node.previousSibling;
|
|
52
|
-
node.remove();
|
|
53
|
-
if (node === trailingEnd) break;
|
|
54
|
-
node = prev;
|
|
55
|
-
}
|
|
56
|
-
}
|
|
67
|
+
for (const element of document.querySelectorAll(boundaryBreakSelectors.join(", "))) {
|
|
68
|
+
stripEdge(element, false);
|
|
69
|
+
stripEdge(element, true);
|
|
57
70
|
}
|
|
58
71
|
};
|
|
59
72
|
};
|
|
@@ -1,5 +1,12 @@
|
|
|
1
|
-
import { isElement, isText } from "../../common.js";
|
|
1
|
+
import { isBlockElement, isElement, isText } from "../../common.js";
|
|
2
2
|
//#region src/transforms/dom/stripEmptyTags.ts
|
|
3
|
+
const structuralTags = new Set([
|
|
4
|
+
"td",
|
|
5
|
+
"th",
|
|
6
|
+
"tr",
|
|
7
|
+
"dt",
|
|
8
|
+
"dd"
|
|
9
|
+
]);
|
|
3
10
|
const preserveWhenEmpty = new Set([
|
|
4
11
|
"iframe",
|
|
5
12
|
"video",
|
|
@@ -28,6 +35,7 @@ const stripEmptyTags = () => {
|
|
|
28
35
|
const tagName = element.localName;
|
|
29
36
|
if (preserveWhenEmpty.has(tagName)) continue;
|
|
30
37
|
if (tagName.includes("-")) continue;
|
|
38
|
+
if (element.hasAttribute("id") || element.hasAttribute("name")) continue;
|
|
31
39
|
const childNodes = element.childNodes;
|
|
32
40
|
const childCount = childNodes.length;
|
|
33
41
|
let hasContent = false;
|
|
@@ -43,8 +51,9 @@ const stripEmptyTags = () => {
|
|
|
43
51
|
}
|
|
44
52
|
}
|
|
45
53
|
if (hasContent) continue;
|
|
46
|
-
if (childCount
|
|
47
|
-
else element.remove();
|
|
54
|
+
if (childCount === 0) element.remove();
|
|
55
|
+
else if (isBlockElement(element) && !structuralTags.has(tagName)) element.remove();
|
|
56
|
+
else element.replaceWith(" ");
|
|
48
57
|
}
|
|
49
58
|
};
|
|
50
59
|
};
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
import { isComment, isElement, isWhitespaceText } from "../../common.js";
|
|
2
|
+
//#region src/transforms/dom/unwrapHeadingBold.ts
|
|
3
|
+
const headingSelector = "h1, h2, h3, h4, h5, h6";
|
|
4
|
+
const boldTags = new Set(["b", "strong"]);
|
|
5
|
+
const soleContentElement = (heading) => {
|
|
6
|
+
let found = null;
|
|
7
|
+
for (const child of heading.childNodes) {
|
|
8
|
+
if (isWhitespaceText(child) || isComment(child)) continue;
|
|
9
|
+
if (found || !isElement(child)) return null;
|
|
10
|
+
found = child;
|
|
11
|
+
}
|
|
12
|
+
return found;
|
|
13
|
+
};
|
|
14
|
+
const unwrapHeadingBold = () => {
|
|
15
|
+
return (document) => {
|
|
16
|
+
const headings = document.querySelectorAll(headingSelector);
|
|
17
|
+
for (const heading of headings) {
|
|
18
|
+
let bold = soleContentElement(heading);
|
|
19
|
+
while (bold && boldTags.has(bold.localName)) {
|
|
20
|
+
while (bold.firstChild) heading.insertBefore(bold.firstChild, bold);
|
|
21
|
+
bold.remove();
|
|
22
|
+
bold = soleContentElement(heading);
|
|
23
|
+
}
|
|
24
|
+
}
|
|
25
|
+
};
|
|
26
|
+
};
|
|
27
|
+
//#endregion
|
|
28
|
+
export { unwrapHeadingBold };
|
|
@@ -7,7 +7,11 @@ const wrapperTags = new Set([
|
|
|
7
7
|
"header",
|
|
8
8
|
"footer"
|
|
9
9
|
]);
|
|
10
|
-
const preservedPrefixes = [
|
|
10
|
+
const preservedPrefixes = [
|
|
11
|
+
"data-embed",
|
|
12
|
+
"data-bookmark",
|
|
13
|
+
"data-table"
|
|
14
|
+
];
|
|
11
15
|
const hasPreservedAttribute = (element) => {
|
|
12
16
|
const attributes = element.attributes;
|
|
13
17
|
for (let i = 0, n = attributes.length; i < n; i++) {
|
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
import { hasAncestorWithTagName, isBlockElement } from "../../common.js";
|
|
2
|
+
//#region src/transforms/dom/wrapBareInlineInParagraphs.ts
|
|
3
|
+
const processContainersSelector = "body, div, blockquote, td, li, article, section, main, header, footer, aside";
|
|
4
|
+
const inlineHostTags = new Set([
|
|
5
|
+
"pre",
|
|
6
|
+
"code",
|
|
7
|
+
"figure",
|
|
8
|
+
"figcaption",
|
|
9
|
+
"a",
|
|
10
|
+
"picture",
|
|
11
|
+
"caption",
|
|
12
|
+
"summary",
|
|
13
|
+
"h1",
|
|
14
|
+
"h2",
|
|
15
|
+
"h3",
|
|
16
|
+
"h4",
|
|
17
|
+
"h5",
|
|
18
|
+
"h6"
|
|
19
|
+
]);
|
|
20
|
+
const dissolvingTags = new Set([
|
|
21
|
+
"div",
|
|
22
|
+
"article",
|
|
23
|
+
"section",
|
|
24
|
+
"main",
|
|
25
|
+
"header",
|
|
26
|
+
"footer"
|
|
27
|
+
]);
|
|
28
|
+
const wrapBareInlineInParagraphs = () => {
|
|
29
|
+
return (document) => {
|
|
30
|
+
for (const container of document.querySelectorAll(processContainersSelector)) {
|
|
31
|
+
if (hasAncestorWithTagName(container, inlineHostTags)) continue;
|
|
32
|
+
const children = [];
|
|
33
|
+
let hasBlockChild = false;
|
|
34
|
+
for (let node = container.firstChild; node; node = node.nextSibling) {
|
|
35
|
+
children.push(node);
|
|
36
|
+
if (isBlockElement(node)) hasBlockChild = true;
|
|
37
|
+
}
|
|
38
|
+
if (!(container.localName === "body" || dissolvingTags.has(container.localName) || hasBlockChild)) continue;
|
|
39
|
+
const newChildren = [];
|
|
40
|
+
let buffer = [];
|
|
41
|
+
let wrapped = false;
|
|
42
|
+
const flush = () => {
|
|
43
|
+
if (buffer.length === 0) return;
|
|
44
|
+
if (buffer.some((node) => node.textContent?.trim())) {
|
|
45
|
+
const paragraph = document.createElement("p");
|
|
46
|
+
for (const node of buffer) paragraph.appendChild(node);
|
|
47
|
+
newChildren.push(paragraph);
|
|
48
|
+
wrapped = true;
|
|
49
|
+
} else for (const node of buffer) newChildren.push(node);
|
|
50
|
+
buffer = [];
|
|
51
|
+
};
|
|
52
|
+
for (const child of children) if (isBlockElement(child)) {
|
|
53
|
+
flush();
|
|
54
|
+
newChildren.push(child);
|
|
55
|
+
} else buffer.push(child);
|
|
56
|
+
flush();
|
|
57
|
+
if (wrapped) container.replaceChildren(...newChildren);
|
|
58
|
+
}
|
|
59
|
+
};
|
|
60
|
+
};
|
|
61
|
+
//#endregion
|
|
62
|
+
export { wrapBareInlineInParagraphs };
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
import { hasAncestorWithTagName } from "../../common.js";
|
|
2
|
+
//#region src/transforms/dom/wrapTablesForScroll.ts
|
|
3
|
+
const tableTags = new Set(["table"]);
|
|
4
|
+
const wrapTablesForScroll = () => {
|
|
5
|
+
return (document) => {
|
|
6
|
+
const tables = document.querySelectorAll("table");
|
|
7
|
+
for (const table of tables) {
|
|
8
|
+
const parent = table.parentNode;
|
|
9
|
+
if (!parent) continue;
|
|
10
|
+
if (hasAncestorWithTagName(table, tableTags)) continue;
|
|
11
|
+
if (table.parentElement?.hasAttribute("data-table")) continue;
|
|
12
|
+
const wrapper = document.createElement("div");
|
|
13
|
+
wrapper.setAttribute("data-table", "");
|
|
14
|
+
parent.insertBefore(wrapper, table);
|
|
15
|
+
wrapper.appendChild(table);
|
|
16
|
+
}
|
|
17
|
+
};
|
|
18
|
+
};
|
|
19
|
+
//#endregion
|
|
20
|
+
export { wrapTablesForScroll };
|
package/package.json
CHANGED