feedsweep 2.2.0 → 2.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -0
- package/dist/defaults.js +3 -1
- package/dist/index.d.ts +2 -1
- package/dist/index.js +2 -1
- package/dist/transforms/dom/stripBoundaryBreaks.js +50 -37
- package/dist/transforms/dom/stripEmptyTags.js +12 -3
- package/dist/transforms/dom/wrapBareInlineInParagraphs.d.ts +6 -0
- package/dist/transforms/dom/wrapBareInlineInParagraphs.js +62 -0
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -50,6 +50,7 @@ Inventory of every transform exported from the package. Most are enabled by defa
|
|
|
50
50
|
| `unwrapEmojiImages` | Replace WordPress/Facebook/Twitter/GitHub emoji `<img>` tags with their alt-text glyph |
|
|
51
51
|
| `stripTrackingParams` | Remove UTM and other tracking parameters |
|
|
52
52
|
| `convertBreaksToParagraphs` | Convert `<br><br>` runs into semantic `<p>` blocks |
|
|
53
|
+
| `wrapBareInlineInParagraphs` | Wrap bare inline runs (delimited by block-level children) in semantic `<p>` blocks |
|
|
53
54
|
| `injectEnclosures` | Inject feed enclosures into content as native `<audio>`/`<video>` or iframe placeholders |
|
|
54
55
|
| `replaceEmbedsWithPlaceholders` | Convert `<iframe>` to embed placeholders |
|
|
55
56
|
| `convertBookmarkCards` | Convert link-preview cards into `data-bookmark-*` placeholders via a registry of per-provider `BookmarkResolver`s (`defaultBookmarkResolvers`: Ghost `kg-bookmark-card`, Substack `embedded-publication-wrap`). Extend via `bookmarkResolvers` |
|
package/dist/defaults.js
CHANGED
|
@@ -31,6 +31,7 @@ import { unwrapEmojiImages } from "./transforms/dom/unwrapEmojiImages.js";
|
|
|
31
31
|
import { unwrapHeadingBold } from "./transforms/dom/unwrapHeadingBold.js";
|
|
32
32
|
import { unwrapRedirectUrls } from "./transforms/dom/unwrapRedirectUrls.js";
|
|
33
33
|
import { unwrapWrappers } from "./transforms/dom/unwrapWrappers.js";
|
|
34
|
+
import { wrapBareInlineInParagraphs } from "./transforms/dom/wrapBareInlineInParagraphs.js";
|
|
34
35
|
import { wrapTablesForScroll } from "./transforms/dom/wrapTablesForScroll.js";
|
|
35
36
|
import { paragraphizePlainText } from "./transforms/string/paragraphizePlainText.js";
|
|
36
37
|
import { stripControlChars } from "./transforms/string/stripControlChars.js";
|
|
@@ -73,6 +74,7 @@ const defaultDomTransforms = [
|
|
|
73
74
|
removeTrackingPixels,
|
|
74
75
|
unwrapEmojiImages,
|
|
75
76
|
convertBreaksToParagraphs,
|
|
77
|
+
wrapBareInlineInParagraphs,
|
|
76
78
|
stripInterBlockBreaks,
|
|
77
79
|
stripBoundaryBreaks,
|
|
78
80
|
mergeFragmentedLists,
|
|
@@ -85,8 +87,8 @@ const defaultDomTransforms = [
|
|
|
85
87
|
replaceEmbedsWithPlaceholders,
|
|
86
88
|
injectEnclosures,
|
|
87
89
|
proxyAssetUrls,
|
|
88
|
-
unwrapWrappers,
|
|
89
90
|
stripEmptyTags,
|
|
91
|
+
unwrapWrappers,
|
|
90
92
|
wrapTablesForScroll
|
|
91
93
|
];
|
|
92
94
|
const defaultEmbedResolvers = [youtubeEmbedResolver];
|
package/dist/index.d.ts
CHANGED
|
@@ -35,6 +35,7 @@ import { unwrapEmojiImages } from "./transforms/dom/unwrapEmojiImages.js";
|
|
|
35
35
|
import { unwrapHeadingBold } from "./transforms/dom/unwrapHeadingBold.js";
|
|
36
36
|
import { extractRedirectTarget, unwrapRedirectUrls } from "./transforms/dom/unwrapRedirectUrls.js";
|
|
37
37
|
import { unwrapWrappers } from "./transforms/dom/unwrapWrappers.js";
|
|
38
|
+
import { wrapBareInlineInParagraphs } from "./transforms/dom/wrapBareInlineInParagraphs.js";
|
|
38
39
|
import { wrapTablesForScroll } from "./transforms/dom/wrapTablesForScroll.js";
|
|
39
40
|
import { paragraphizePlainText } from "./transforms/string/paragraphizePlainText.js";
|
|
40
41
|
import { stripControlChars } from "./transforms/string/stripControlChars.js";
|
|
@@ -119,4 +120,4 @@ import { ParamExtractorConfig, chooseBaseUrl, coerceNumber, createParamExtractor
|
|
|
119
120
|
//#region src/index.d.ts
|
|
120
121
|
declare const transformContent: (html: string, options: TransformContentOptions) => Promise<string>;
|
|
121
122
|
//#endregion
|
|
122
|
-
export { type AssetProxyFn, type AssetType, type BookmarkResolver, type BookmarkResolverResult, type DomTransform, type EmbedResolver, type EmbedResolverResult, type Enclosure, type EnrichEmbedFn, type MaybePromise, type ParamExtractorConfig, type ParseHtmlFn, type ResolveUrlFn, type StringTransform, type TransformContentOptions, type TransformContext, applyDomTransforms, applyStringTransforms, chooseBaseUrl, coerceNumber, composeThumbnailUrl, convertBookmarkCards, convertBreaksToParagraphs, createBookmarkPlaceholder, createEmbedPlaceholder, createParamExtractor, createPlaceholder, decodeDoubleEncodedTags, defaultResolveUrlFn, demoteHeadings, detectLanguage, enrichEmbedPlaceholders, extractRedirectTarget, extractVideoId, fixLazyImages, ghostBookmarkResolver, highlightCode, injectEnclosures, isSafeThumbnailUrl, linkifyUrls, markTimestamps, mergeConsecutiveOneLinerPres, mergeFragmentedLists, normalizeEmbedFields, paragraphizePlainText, parseTimestampSeconds, proxyAssetUrls, removeTrackingPixels, replaceEmbedsWithPlaceholders, replacePreLineBreaks, resolveRelativeUrls, stripBoundaryBreaks, stripComments, stripControlChars, stripDeadAnchors, stripDuplicateTitleHeading, stripEmptyTags, stripInertElements, stripInterBlockBreaks, stripOversizedBase64Sources, stripTrackingParams, substackBookmarkResolver, transformContent, trimPreWhitespace, unwrapAceml, unwrapAdjust, unwrapAmazonAffiliate, unwrapAmpCache, unwrapAwin, unwrapBing, unwrapCdataComments, unwrapCjNetwork, unwrapDigidip, unwrapDisqus, unwrapDouban, unwrapDoublyNestedLists, unwrapDuckduckgo, unwrapEbayRover, unwrapEffiliation, unwrapEmbedly, unwrapEmojiImages, unwrapFacebookShim, unwrapFeedsportal, unwrapFirebaseDynamicLinks, unwrapFlipboard, unwrapGateSc, unwrapGeoriot, unwrapGitee, unwrapGoogle, unwrapGoogleAmpViewer, unwrapGoogleNews, unwrapGoogleNewsModern, unwrapGoogleScholar, unwrapGoogleTranslate, unwrapHashnode, unwrapHeadingBold, unwrapIcptrack, unwrapIdealoPartner, unwrapInstagramShim, unwrapJianshuGo, unwrapJuejin, unwrapLeverAnalytics, unwrapLinksynergy, unwrapMailchimp, unwrapMailpanion, unwrapMailpgn, unwrapMailtrack, unwrapMedium, unwrapMimecast, unwrapMozillaOutgoing, unwrapNarrativ, unwrapNicoMs, unwrapOutlookSafelinks, unwrapPartnerAds, unwrapPocket, unwrapPostmark, unwrapProofpointV1, unwrapProofpointV2, unwrapProofpointV3, unwrapPxf, unwrapRecruitics, unwrapRedditOut, unwrapRedirectUrls, unwrapRedirectingat, unwrapSegmentfault, unwrapShareasale, unwrapSjv, unwrapSkimlinks, unwrapSlack, unwrapSmartredirect, unwrapSspai, unwrapSteamLinkfilter, unwrapTelegramIv, unwrapTradedoubler, unwrapTumblr, unwrapValuecommerce, unwrapViglink, unwrapVkAway, unwrapWebArchive, unwrapWrappers, unwrapYahooSearch, unwrapYandexTurbo, unwrapYouTube, unwrapZhihu, updateEmbedPlaceholder, wrapTablesForScroll, youtubeEmbedResolver, youtubeResolveEmbed };
|
|
123
|
+
export { type AssetProxyFn, type AssetType, type BookmarkResolver, type BookmarkResolverResult, type DomTransform, type EmbedResolver, type EmbedResolverResult, type Enclosure, type EnrichEmbedFn, type MaybePromise, type ParamExtractorConfig, type ParseHtmlFn, type ResolveUrlFn, type StringTransform, type TransformContentOptions, type TransformContext, applyDomTransforms, applyStringTransforms, chooseBaseUrl, coerceNumber, composeThumbnailUrl, convertBookmarkCards, convertBreaksToParagraphs, createBookmarkPlaceholder, createEmbedPlaceholder, createParamExtractor, createPlaceholder, decodeDoubleEncodedTags, defaultResolveUrlFn, demoteHeadings, detectLanguage, enrichEmbedPlaceholders, extractRedirectTarget, extractVideoId, fixLazyImages, ghostBookmarkResolver, highlightCode, injectEnclosures, isSafeThumbnailUrl, linkifyUrls, markTimestamps, mergeConsecutiveOneLinerPres, mergeFragmentedLists, normalizeEmbedFields, paragraphizePlainText, parseTimestampSeconds, proxyAssetUrls, removeTrackingPixels, replaceEmbedsWithPlaceholders, replacePreLineBreaks, resolveRelativeUrls, stripBoundaryBreaks, stripComments, stripControlChars, stripDeadAnchors, stripDuplicateTitleHeading, stripEmptyTags, stripInertElements, stripInterBlockBreaks, stripOversizedBase64Sources, stripTrackingParams, substackBookmarkResolver, transformContent, trimPreWhitespace, unwrapAceml, unwrapAdjust, unwrapAmazonAffiliate, unwrapAmpCache, unwrapAwin, unwrapBing, unwrapCdataComments, unwrapCjNetwork, unwrapDigidip, unwrapDisqus, unwrapDouban, unwrapDoublyNestedLists, unwrapDuckduckgo, unwrapEbayRover, unwrapEffiliation, unwrapEmbedly, unwrapEmojiImages, unwrapFacebookShim, unwrapFeedsportal, unwrapFirebaseDynamicLinks, unwrapFlipboard, unwrapGateSc, unwrapGeoriot, unwrapGitee, unwrapGoogle, unwrapGoogleAmpViewer, unwrapGoogleNews, unwrapGoogleNewsModern, unwrapGoogleScholar, unwrapGoogleTranslate, unwrapHashnode, unwrapHeadingBold, unwrapIcptrack, unwrapIdealoPartner, unwrapInstagramShim, unwrapJianshuGo, unwrapJuejin, unwrapLeverAnalytics, unwrapLinksynergy, unwrapMailchimp, unwrapMailpanion, unwrapMailpgn, unwrapMailtrack, unwrapMedium, unwrapMimecast, unwrapMozillaOutgoing, unwrapNarrativ, unwrapNicoMs, unwrapOutlookSafelinks, unwrapPartnerAds, unwrapPocket, unwrapPostmark, unwrapProofpointV1, unwrapProofpointV2, unwrapProofpointV3, unwrapPxf, unwrapRecruitics, unwrapRedditOut, unwrapRedirectUrls, unwrapRedirectingat, unwrapSegmentfault, unwrapShareasale, unwrapSjv, unwrapSkimlinks, unwrapSlack, unwrapSmartredirect, unwrapSspai, unwrapSteamLinkfilter, unwrapTelegramIv, unwrapTradedoubler, unwrapTumblr, unwrapValuecommerce, unwrapViglink, unwrapVkAway, unwrapWebArchive, unwrapWrappers, unwrapYahooSearch, unwrapYandexTurbo, unwrapYouTube, unwrapZhihu, updateEmbedPlaceholder, wrapBareInlineInParagraphs, wrapTablesForScroll, youtubeEmbedResolver, youtubeResolveEmbed };
|
package/dist/index.js
CHANGED
|
@@ -33,6 +33,7 @@ import { unwrapEmojiImages } from "./transforms/dom/unwrapEmojiImages.js";
|
|
|
33
33
|
import { unwrapHeadingBold } from "./transforms/dom/unwrapHeadingBold.js";
|
|
34
34
|
import { extractRedirectTarget, unwrapRedirectUrls } from "./transforms/dom/unwrapRedirectUrls.js";
|
|
35
35
|
import { unwrapWrappers } from "./transforms/dom/unwrapWrappers.js";
|
|
36
|
+
import { wrapBareInlineInParagraphs } from "./transforms/dom/wrapBareInlineInParagraphs.js";
|
|
36
37
|
import { wrapTablesForScroll } from "./transforms/dom/wrapTablesForScroll.js";
|
|
37
38
|
import { paragraphizePlainText } from "./transforms/string/paragraphizePlainText.js";
|
|
38
39
|
import { stripControlChars } from "./transforms/string/stripControlChars.js";
|
|
@@ -140,4 +141,4 @@ const transformContent = async (html, options) => {
|
|
|
140
141
|
return await applyDomTransforms(await options.parseHtmlFn(afterString), domFns.map((transform) => transform(context)));
|
|
141
142
|
};
|
|
142
143
|
//#endregion
|
|
143
|
-
export { applyDomTransforms, applyStringTransforms, chooseBaseUrl, coerceNumber, composeThumbnailUrl, convertBookmarkCards, convertBreaksToParagraphs, createBookmarkPlaceholder, createEmbedPlaceholder, createParamExtractor, createPlaceholder, decodeDoubleEncodedTags, defaultResolveUrlFn, demoteHeadings, detectLanguage, enrichEmbedPlaceholders, extractRedirectTarget, extractVideoId, fixLazyImages, ghostBookmarkResolver, highlightCode, injectEnclosures, isSafeThumbnailUrl, linkifyUrls, markTimestamps, mergeConsecutiveOneLinerPres, mergeFragmentedLists, normalizeEmbedFields, paragraphizePlainText, parseTimestampSeconds, proxyAssetUrls, removeTrackingPixels, replaceEmbedsWithPlaceholders, replacePreLineBreaks, resolveRelativeUrls, stripBoundaryBreaks, stripComments, stripControlChars, stripDeadAnchors, stripDuplicateTitleHeading, stripEmptyTags, stripInertElements, stripInterBlockBreaks, stripOversizedBase64Sources, stripTrackingParams, substackBookmarkResolver, transformContent, trimPreWhitespace, unwrapAceml, unwrapAdjust, unwrapAmazonAffiliate, unwrapAmpCache, unwrapAwin, unwrapBing, unwrapCdataComments, unwrapCjNetwork, unwrapDigidip, unwrapDisqus, unwrapDouban, unwrapDoublyNestedLists, unwrapDuckduckgo, unwrapEbayRover, unwrapEffiliation, unwrapEmbedly, unwrapEmojiImages, unwrapFacebookShim, unwrapFeedsportal, unwrapFirebaseDynamicLinks, unwrapFlipboard, unwrapGateSc, unwrapGeoriot, unwrapGitee, unwrapGoogle, unwrapGoogleAmpViewer, unwrapGoogleNews, unwrapGoogleNewsModern, unwrapGoogleScholar, unwrapGoogleTranslate, unwrapHashnode, unwrapHeadingBold, unwrapIcptrack, unwrapIdealoPartner, unwrapInstagramShim, unwrapJianshuGo, unwrapJuejin, unwrapLeverAnalytics, unwrapLinksynergy, unwrapMailchimp, unwrapMailpanion, unwrapMailpgn, unwrapMailtrack, unwrapMedium, unwrapMimecast, unwrapMozillaOutgoing, unwrapNarrativ, unwrapNicoMs, unwrapOutlookSafelinks, unwrapPartnerAds, unwrapPocket, unwrapPostmark, unwrapProofpointV1, unwrapProofpointV2, unwrapProofpointV3, unwrapPxf, unwrapRecruitics, unwrapRedditOut, unwrapRedirectUrls, unwrapRedirectingat, unwrapSegmentfault, unwrapShareasale, unwrapSjv, unwrapSkimlinks, unwrapSlack, unwrapSmartredirect, unwrapSspai, unwrapSteamLinkfilter, unwrapTelegramIv, unwrapTradedoubler, unwrapTumblr, unwrapValuecommerce, unwrapViglink, unwrapVkAway, unwrapWebArchive, unwrapWrappers, unwrapYahooSearch, unwrapYandexTurbo, unwrapYouTube, unwrapZhihu, updateEmbedPlaceholder, wrapTablesForScroll, youtubeEmbedResolver, youtubeResolveEmbed };
|
|
144
|
+
export { applyDomTransforms, applyStringTransforms, chooseBaseUrl, coerceNumber, composeThumbnailUrl, convertBookmarkCards, convertBreaksToParagraphs, createBookmarkPlaceholder, createEmbedPlaceholder, createParamExtractor, createPlaceholder, decodeDoubleEncodedTags, defaultResolveUrlFn, demoteHeadings, detectLanguage, enrichEmbedPlaceholders, extractRedirectTarget, extractVideoId, fixLazyImages, ghostBookmarkResolver, highlightCode, injectEnclosures, isSafeThumbnailUrl, linkifyUrls, markTimestamps, mergeConsecutiveOneLinerPres, mergeFragmentedLists, normalizeEmbedFields, paragraphizePlainText, parseTimestampSeconds, proxyAssetUrls, removeTrackingPixels, replaceEmbedsWithPlaceholders, replacePreLineBreaks, resolveRelativeUrls, stripBoundaryBreaks, stripComments, stripControlChars, stripDeadAnchors, stripDuplicateTitleHeading, stripEmptyTags, stripInertElements, stripInterBlockBreaks, stripOversizedBase64Sources, stripTrackingParams, substackBookmarkResolver, transformContent, trimPreWhitespace, unwrapAceml, unwrapAdjust, unwrapAmazonAffiliate, unwrapAmpCache, unwrapAwin, unwrapBing, unwrapCdataComments, unwrapCjNetwork, unwrapDigidip, unwrapDisqus, unwrapDouban, unwrapDoublyNestedLists, unwrapDuckduckgo, unwrapEbayRover, unwrapEffiliation, unwrapEmbedly, unwrapEmojiImages, unwrapFacebookShim, unwrapFeedsportal, unwrapFirebaseDynamicLinks, unwrapFlipboard, unwrapGateSc, unwrapGeoriot, unwrapGitee, unwrapGoogle, unwrapGoogleAmpViewer, unwrapGoogleNews, unwrapGoogleNewsModern, unwrapGoogleScholar, unwrapGoogleTranslate, unwrapHashnode, unwrapHeadingBold, unwrapIcptrack, unwrapIdealoPartner, unwrapInstagramShim, unwrapJianshuGo, unwrapJuejin, unwrapLeverAnalytics, unwrapLinksynergy, unwrapMailchimp, unwrapMailpanion, unwrapMailpgn, unwrapMailtrack, unwrapMedium, unwrapMimecast, unwrapMozillaOutgoing, unwrapNarrativ, unwrapNicoMs, unwrapOutlookSafelinks, unwrapPartnerAds, unwrapPocket, unwrapPostmark, unwrapProofpointV1, unwrapProofpointV2, unwrapProofpointV3, unwrapPxf, unwrapRecruitics, unwrapRedditOut, unwrapRedirectUrls, unwrapRedirectingat, unwrapSegmentfault, unwrapShareasale, unwrapSjv, unwrapSkimlinks, unwrapSlack, unwrapSmartredirect, unwrapSspai, unwrapSteamLinkfilter, unwrapTelegramIv, unwrapTradedoubler, unwrapTumblr, unwrapValuecommerce, unwrapViglink, unwrapVkAway, unwrapWebArchive, unwrapWrappers, unwrapYahooSearch, unwrapYandexTurbo, unwrapYouTube, unwrapZhihu, updateEmbedPlaceholder, wrapBareInlineInParagraphs, wrapTablesForScroll, youtubeEmbedResolver, youtubeResolveEmbed };
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { isBr,
|
|
1
|
+
import { isBlockElement, isBr, isComment, isElement, isWhitespaceText } from "../../common.js";
|
|
2
2
|
//#region src/transforms/dom/stripBoundaryBreaks.ts
|
|
3
3
|
const boundaryBreakSelectors = [
|
|
4
4
|
"p",
|
|
@@ -16,44 +16,57 @@ const boundaryBreakSelectors = [
|
|
|
16
16
|
"figcaption",
|
|
17
17
|
"section"
|
|
18
18
|
];
|
|
19
|
+
const isInlineWrapper = (node) => {
|
|
20
|
+
return isElement(node) && !isBlockElement(node) && !isBr(node);
|
|
21
|
+
};
|
|
22
|
+
const isVisuallyEmpty = (node) => {
|
|
23
|
+
for (let child = node.firstChild; child; child = child.nextSibling) {
|
|
24
|
+
if (isWhitespaceText(child) || isComment(child) || isBr(child)) continue;
|
|
25
|
+
return false;
|
|
26
|
+
}
|
|
27
|
+
return true;
|
|
28
|
+
};
|
|
29
|
+
const stripEdge = (container, trailing) => {
|
|
30
|
+
let node = trailing ? container.lastChild : container.firstChild;
|
|
31
|
+
let sawBr = false;
|
|
32
|
+
let pending = [];
|
|
33
|
+
const removePending = () => {
|
|
34
|
+
if (sawBr) for (const item of pending) item.remove();
|
|
35
|
+
};
|
|
36
|
+
while (node) {
|
|
37
|
+
const next = trailing ? node.previousSibling : node.nextSibling;
|
|
38
|
+
if (isWhitespaceText(node) || isComment(node)) {
|
|
39
|
+
pending.push(node);
|
|
40
|
+
node = next;
|
|
41
|
+
continue;
|
|
42
|
+
}
|
|
43
|
+
if (isBr(node)) {
|
|
44
|
+
sawBr = true;
|
|
45
|
+
pending.push(node);
|
|
46
|
+
node = next;
|
|
47
|
+
continue;
|
|
48
|
+
}
|
|
49
|
+
if (isInlineWrapper(node)) {
|
|
50
|
+
removePending();
|
|
51
|
+
pending = [];
|
|
52
|
+
sawBr = false;
|
|
53
|
+
stripEdge(node, trailing);
|
|
54
|
+
if (isVisuallyEmpty(node)) {
|
|
55
|
+
node = next;
|
|
56
|
+
continue;
|
|
57
|
+
}
|
|
58
|
+
return;
|
|
59
|
+
}
|
|
60
|
+
removePending();
|
|
61
|
+
return;
|
|
62
|
+
}
|
|
63
|
+
removePending();
|
|
64
|
+
};
|
|
19
65
|
const stripBoundaryBreaks = () => {
|
|
20
66
|
return (document) => {
|
|
21
|
-
const
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
let leadingHasBr = false;
|
|
25
|
-
let leadingEnd = null;
|
|
26
|
-
while (cursor && isSkippable(cursor)) {
|
|
27
|
-
if (!leadingHasBr && isBr(cursor)) leadingHasBr = true;
|
|
28
|
-
leadingEnd = cursor;
|
|
29
|
-
cursor = cursor.nextSibling;
|
|
30
|
-
}
|
|
31
|
-
if (leadingHasBr) {
|
|
32
|
-
let node = element.firstChild;
|
|
33
|
-
while (node) {
|
|
34
|
-
const next = node.nextSibling;
|
|
35
|
-
node.remove();
|
|
36
|
-
if (node === leadingEnd) break;
|
|
37
|
-
node = next;
|
|
38
|
-
}
|
|
39
|
-
}
|
|
40
|
-
cursor = element.lastChild;
|
|
41
|
-
let trailingHasBr = false;
|
|
42
|
-
let trailingEnd = null;
|
|
43
|
-
while (cursor && isSkippable(cursor)) {
|
|
44
|
-
if (!trailingHasBr && isBr(cursor)) trailingHasBr = true;
|
|
45
|
-
trailingEnd = cursor;
|
|
46
|
-
cursor = cursor.previousSibling;
|
|
47
|
-
}
|
|
48
|
-
if (trailingHasBr) {
|
|
49
|
-
let node = element.lastChild;
|
|
50
|
-
while (node) {
|
|
51
|
-
const prev = node.previousSibling;
|
|
52
|
-
node.remove();
|
|
53
|
-
if (node === trailingEnd) break;
|
|
54
|
-
node = prev;
|
|
55
|
-
}
|
|
56
|
-
}
|
|
67
|
+
for (const element of document.querySelectorAll(boundaryBreakSelectors.join(", "))) {
|
|
68
|
+
stripEdge(element, false);
|
|
69
|
+
stripEdge(element, true);
|
|
57
70
|
}
|
|
58
71
|
};
|
|
59
72
|
};
|
|
@@ -1,5 +1,12 @@
|
|
|
1
|
-
import { isElement, isText } from "../../common.js";
|
|
1
|
+
import { isBlockElement, isElement, isText } from "../../common.js";
|
|
2
2
|
//#region src/transforms/dom/stripEmptyTags.ts
|
|
3
|
+
const structuralTags = new Set([
|
|
4
|
+
"td",
|
|
5
|
+
"th",
|
|
6
|
+
"tr",
|
|
7
|
+
"dt",
|
|
8
|
+
"dd"
|
|
9
|
+
]);
|
|
3
10
|
const preserveWhenEmpty = new Set([
|
|
4
11
|
"iframe",
|
|
5
12
|
"video",
|
|
@@ -28,6 +35,7 @@ const stripEmptyTags = () => {
|
|
|
28
35
|
const tagName = element.localName;
|
|
29
36
|
if (preserveWhenEmpty.has(tagName)) continue;
|
|
30
37
|
if (tagName.includes("-")) continue;
|
|
38
|
+
if (element.hasAttribute("id") || element.hasAttribute("name")) continue;
|
|
31
39
|
const childNodes = element.childNodes;
|
|
32
40
|
const childCount = childNodes.length;
|
|
33
41
|
let hasContent = false;
|
|
@@ -43,8 +51,9 @@ const stripEmptyTags = () => {
|
|
|
43
51
|
}
|
|
44
52
|
}
|
|
45
53
|
if (hasContent) continue;
|
|
46
|
-
if (childCount
|
|
47
|
-
else element.remove();
|
|
54
|
+
if (childCount === 0) element.remove();
|
|
55
|
+
else if (isBlockElement(element) && !structuralTags.has(tagName)) element.remove();
|
|
56
|
+
else element.replaceWith(" ");
|
|
48
57
|
}
|
|
49
58
|
};
|
|
50
59
|
};
|
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
import { hasAncestorWithTagName, isBlockElement } from "../../common.js";
|
|
2
|
+
//#region src/transforms/dom/wrapBareInlineInParagraphs.ts
|
|
3
|
+
const processContainersSelector = "body, div, blockquote, td, li, article, section, main, header, footer, aside";
|
|
4
|
+
const inlineHostTags = new Set([
|
|
5
|
+
"pre",
|
|
6
|
+
"code",
|
|
7
|
+
"figure",
|
|
8
|
+
"figcaption",
|
|
9
|
+
"a",
|
|
10
|
+
"picture",
|
|
11
|
+
"caption",
|
|
12
|
+
"summary",
|
|
13
|
+
"h1",
|
|
14
|
+
"h2",
|
|
15
|
+
"h3",
|
|
16
|
+
"h4",
|
|
17
|
+
"h5",
|
|
18
|
+
"h6"
|
|
19
|
+
]);
|
|
20
|
+
const dissolvingTags = new Set([
|
|
21
|
+
"div",
|
|
22
|
+
"article",
|
|
23
|
+
"section",
|
|
24
|
+
"main",
|
|
25
|
+
"header",
|
|
26
|
+
"footer"
|
|
27
|
+
]);
|
|
28
|
+
const wrapBareInlineInParagraphs = () => {
|
|
29
|
+
return (document) => {
|
|
30
|
+
for (const container of document.querySelectorAll(processContainersSelector)) {
|
|
31
|
+
if (hasAncestorWithTagName(container, inlineHostTags)) continue;
|
|
32
|
+
const children = [];
|
|
33
|
+
let hasBlockChild = false;
|
|
34
|
+
for (let node = container.firstChild; node; node = node.nextSibling) {
|
|
35
|
+
children.push(node);
|
|
36
|
+
if (isBlockElement(node)) hasBlockChild = true;
|
|
37
|
+
}
|
|
38
|
+
if (!(container.localName === "body" || dissolvingTags.has(container.localName) || hasBlockChild)) continue;
|
|
39
|
+
const newChildren = [];
|
|
40
|
+
let buffer = [];
|
|
41
|
+
let wrapped = false;
|
|
42
|
+
const flush = () => {
|
|
43
|
+
if (buffer.length === 0) return;
|
|
44
|
+
if (buffer.some((node) => node.textContent?.trim())) {
|
|
45
|
+
const paragraph = document.createElement("p");
|
|
46
|
+
for (const node of buffer) paragraph.appendChild(node);
|
|
47
|
+
newChildren.push(paragraph);
|
|
48
|
+
wrapped = true;
|
|
49
|
+
} else for (const node of buffer) newChildren.push(node);
|
|
50
|
+
buffer = [];
|
|
51
|
+
};
|
|
52
|
+
for (const child of children) if (isBlockElement(child)) {
|
|
53
|
+
flush();
|
|
54
|
+
newChildren.push(child);
|
|
55
|
+
} else buffer.push(child);
|
|
56
|
+
flush();
|
|
57
|
+
if (wrapped) container.replaceChildren(...newChildren);
|
|
58
|
+
}
|
|
59
|
+
};
|
|
60
|
+
};
|
|
61
|
+
//#endregion
|
|
62
|
+
export { wrapBareInlineInParagraphs };
|
package/package.json
CHANGED