feedsweep 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +70 -0
- package/dist/common.d.ts +11 -0
- package/dist/common.js +116 -0
- package/dist/defaults.d.ts +14 -0
- package/dist/defaults.js +127 -0
- package/dist/embeds/youtube.d.ts +9 -0
- package/dist/embeds/youtube.js +50 -0
- package/dist/index.d.ts +105 -0
- package/dist/index.js +118 -0
- package/dist/transforms/dom/fixLazyImages.d.ts +6 -0
- package/dist/transforms/dom/fixLazyImages.js +38 -0
- package/dist/transforms/dom/highlightCode.d.ts +7 -0
- package/dist/transforms/dom/highlightCode.js +30 -0
- package/dist/transforms/dom/injectEnclosureEmbedPlaceholders.d.ts +6 -0
- package/dist/transforms/dom/injectEnclosureEmbedPlaceholders.js +33 -0
- package/dist/transforms/dom/linkifyUrls.d.ts +6 -0
- package/dist/transforms/dom/linkifyUrls.js +45 -0
- package/dist/transforms/dom/mergeConsecutiveOneLinerPres.d.ts +6 -0
- package/dist/transforms/dom/mergeConsecutiveOneLinerPres.js +33 -0
- package/dist/transforms/dom/removeTrackingPixels.d.ts +6 -0
- package/dist/transforms/dom/removeTrackingPixels.js +59 -0
- package/dist/transforms/dom/replaceEmbedsWithPlaceholders.d.ts +6 -0
- package/dist/transforms/dom/replaceEmbedsWithPlaceholders.js +36 -0
- package/dist/transforms/dom/replacePreLineBreaks.d.ts +6 -0
- package/dist/transforms/dom/replacePreLineBreaks.js +13 -0
- package/dist/transforms/dom/resolveRelativeUrls.d.ts +6 -0
- package/dist/transforms/dom/resolveRelativeUrls.js +42 -0
- package/dist/transforms/dom/simplifyFigures.d.ts +6 -0
- package/dist/transforms/dom/simplifyFigures.js +27 -0
- package/dist/transforms/dom/stripComments.d.ts +6 -0
- package/dist/transforms/dom/stripComments.js +23 -0
- package/dist/transforms/dom/stripInterBlockBreaks.d.ts +6 -0
- package/dist/transforms/dom/stripInterBlockBreaks.js +18 -0
- package/dist/transforms/dom/stripParagraphBoundaryBreaks.d.ts +6 -0
- package/dist/transforms/dom/stripParagraphBoundaryBreaks.js +25 -0
- package/dist/transforms/dom/stripTrackingParams.d.ts +6 -0
- package/dist/transforms/dom/stripTrackingParams.js +22 -0
- package/dist/transforms/dom/trimPreWhitespace.d.ts +6 -0
- package/dist/transforms/dom/trimPreWhitespace.js +20 -0
- package/dist/transforms/dom/unwrapRedirectUrls.d.ts +7 -0
- package/dist/transforms/dom/unwrapRedirectUrls.js +28 -0
- package/dist/transforms/string/decodeDoubleEncodedTags.d.ts +6 -0
- package/dist/transforms/string/decodeDoubleEncodedTags.js +23 -0
- package/dist/transforms/string/paragraphizePlainText.d.ts +6 -0
- package/dist/transforms/string/paragraphizePlainText.js +10 -0
- package/dist/transforms/string/stripEmptyTags.d.ts +6 -0
- package/dist/transforms/string/stripEmptyTags.js +25 -0
- package/dist/transforms/string/stripOrphanedClosingTags.d.ts +6 -0
- package/dist/transforms/string/stripOrphanedClosingTags.js +28 -0
- package/dist/transforms/string/unwrapWrappers.d.ts +6 -0
- package/dist/transforms/string/unwrapWrappers.js +10 -0
- package/dist/types.d.ts +52 -0
- package/dist/unwraps/aceml.d.ts +6 -0
- package/dist/unwraps/aceml.js +17 -0
- package/dist/unwraps/adjust.d.ts +6 -0
- package/dist/unwraps/adjust.js +9 -0
- package/dist/unwraps/amazonAffiliate.d.ts +6 -0
- package/dist/unwraps/amazonAffiliate.js +9 -0
- package/dist/unwraps/ampCache.d.ts +6 -0
- package/dist/unwraps/ampCache.js +13 -0
- package/dist/unwraps/awin.d.ts +6 -0
- package/dist/unwraps/awin.js +9 -0
- package/dist/unwraps/bing.d.ts +6 -0
- package/dist/unwraps/bing.js +15 -0
- package/dist/unwraps/cjNetwork.d.ts +6 -0
- package/dist/unwraps/cjNetwork.js +17 -0
- package/dist/unwraps/digidip.d.ts +6 -0
- package/dist/unwraps/digidip.js +8 -0
- package/dist/unwraps/disqus.d.ts +6 -0
- package/dist/unwraps/disqus.js +8 -0
- package/dist/unwraps/douban.d.ts +6 -0
- package/dist/unwraps/douban.js +9 -0
- package/dist/unwraps/duckduckgo.d.ts +6 -0
- package/dist/unwraps/duckduckgo.js +9 -0
- package/dist/unwraps/ebayRover.d.ts +6 -0
- package/dist/unwraps/ebayRover.js +8 -0
- package/dist/unwraps/effiliation.d.ts +6 -0
- package/dist/unwraps/effiliation.js +8 -0
- package/dist/unwraps/embedly.d.ts +6 -0
- package/dist/unwraps/embedly.js +8 -0
- package/dist/unwraps/facebook.d.ts +6 -0
- package/dist/unwraps/facebook.js +9 -0
- package/dist/unwraps/feedsportal.d.ts +6 -0
- package/dist/unwraps/feedsportal.js +44 -0
- package/dist/unwraps/firebaseDynamicLinks.d.ts +6 -0
- package/dist/unwraps/firebaseDynamicLinks.js +8 -0
- package/dist/unwraps/flipboard.d.ts +6 -0
- package/dist/unwraps/flipboard.js +9 -0
- package/dist/unwraps/gateSc.d.ts +6 -0
- package/dist/unwraps/gateSc.js +8 -0
- package/dist/unwraps/georiot.d.ts +6 -0
- package/dist/unwraps/georiot.js +8 -0
- package/dist/unwraps/gitee.d.ts +6 -0
- package/dist/unwraps/gitee.js +9 -0
- package/dist/unwraps/google.d.ts +6 -0
- package/dist/unwraps/google.js +8 -0
- package/dist/unwraps/googleAmpViewer.d.ts +6 -0
- package/dist/unwraps/googleAmpViewer.js +13 -0
- package/dist/unwraps/googleNews.d.ts +6 -0
- package/dist/unwraps/googleNews.js +8 -0
- package/dist/unwraps/googleNewsModern.d.ts +6 -0
- package/dist/unwraps/googleNewsModern.js +11 -0
- package/dist/unwraps/googleScholar.d.ts +6 -0
- package/dist/unwraps/googleScholar.js +8 -0
- package/dist/unwraps/googleTranslate.d.ts +6 -0
- package/dist/unwraps/googleTranslate.js +8 -0
- package/dist/unwraps/hashnode.d.ts +6 -0
- package/dist/unwraps/hashnode.js +9 -0
- package/dist/unwraps/icptrack.d.ts +6 -0
- package/dist/unwraps/icptrack.js +9 -0
- package/dist/unwraps/idealoPartner.d.ts +6 -0
- package/dist/unwraps/idealoPartner.js +8 -0
- package/dist/unwraps/instagram.d.ts +6 -0
- package/dist/unwraps/instagram.js +8 -0
- package/dist/unwraps/jianshuGo.d.ts +6 -0
- package/dist/unwraps/jianshuGo.js +9 -0
- package/dist/unwraps/juejin.d.ts +6 -0
- package/dist/unwraps/juejin.js +8 -0
- package/dist/unwraps/leverAnalytics.d.ts +6 -0
- package/dist/unwraps/leverAnalytics.js +8 -0
- package/dist/unwraps/linksynergy.d.ts +6 -0
- package/dist/unwraps/linksynergy.js +9 -0
- package/dist/unwraps/mailchimp.d.ts +6 -0
- package/dist/unwraps/mailchimp.js +9 -0
- package/dist/unwraps/mailpanion.d.ts +6 -0
- package/dist/unwraps/mailpanion.js +8 -0
- package/dist/unwraps/mailpgn.d.ts +6 -0
- package/dist/unwraps/mailpgn.js +8 -0
- package/dist/unwraps/mailtrack.d.ts +6 -0
- package/dist/unwraps/mailtrack.js +8 -0
- package/dist/unwraps/medium.d.ts +6 -0
- package/dist/unwraps/medium.js +9 -0
- package/dist/unwraps/mimecast.d.ts +6 -0
- package/dist/unwraps/mimecast.js +11 -0
- package/dist/unwraps/mozillaOutgoing.d.ts +6 -0
- package/dist/unwraps/mozillaOutgoing.js +13 -0
- package/dist/unwraps/narrativ.d.ts +6 -0
- package/dist/unwraps/narrativ.js +8 -0
- package/dist/unwraps/nicoMs.d.ts +6 -0
- package/dist/unwraps/nicoMs.js +12 -0
- package/dist/unwraps/outlookSafelinks.d.ts +6 -0
- package/dist/unwraps/outlookSafelinks.js +8 -0
- package/dist/unwraps/partnerAds.d.ts +6 -0
- package/dist/unwraps/partnerAds.js +8 -0
- package/dist/unwraps/pocket.d.ts +6 -0
- package/dist/unwraps/pocket.js +9 -0
- package/dist/unwraps/postmark.d.ts +6 -0
- package/dist/unwraps/postmark.js +12 -0
- package/dist/unwraps/proofpointV1.d.ts +6 -0
- package/dist/unwraps/proofpointV1.js +16 -0
- package/dist/unwraps/proofpointV2.d.ts +6 -0
- package/dist/unwraps/proofpointV2.js +16 -0
- package/dist/unwraps/proofpointV3.d.ts +6 -0
- package/dist/unwraps/proofpointV3.js +78 -0
- package/dist/unwraps/pxf.d.ts +6 -0
- package/dist/unwraps/pxf.js +8 -0
- package/dist/unwraps/recruitics.d.ts +6 -0
- package/dist/unwraps/recruitics.js +8 -0
- package/dist/unwraps/redditOut.d.ts +6 -0
- package/dist/unwraps/redditOut.js +8 -0
- package/dist/unwraps/redirectingat.d.ts +6 -0
- package/dist/unwraps/redirectingat.js +8 -0
- package/dist/unwraps/segmentfault.d.ts +6 -0
- package/dist/unwraps/segmentfault.js +16 -0
- package/dist/unwraps/shareasale.d.ts +6 -0
- package/dist/unwraps/shareasale.js +9 -0
- package/dist/unwraps/sjv.d.ts +6 -0
- package/dist/unwraps/sjv.js +8 -0
- package/dist/unwraps/skimlinks.d.ts +6 -0
- package/dist/unwraps/skimlinks.js +8 -0
- package/dist/unwraps/slack.d.ts +6 -0
- package/dist/unwraps/slack.js +9 -0
- package/dist/unwraps/smartredirect.d.ts +6 -0
- package/dist/unwraps/smartredirect.js +8 -0
- package/dist/unwraps/sspai.d.ts +6 -0
- package/dist/unwraps/sspai.js +9 -0
- package/dist/unwraps/steamLinkfilter.d.ts +6 -0
- package/dist/unwraps/steamLinkfilter.js +9 -0
- package/dist/unwraps/telegramIv.d.ts +6 -0
- package/dist/unwraps/telegramIv.js +9 -0
- package/dist/unwraps/tradedoubler.d.ts +6 -0
- package/dist/unwraps/tradedoubler.js +9 -0
- package/dist/unwraps/tumblr.d.ts +6 -0
- package/dist/unwraps/tumblr.js +9 -0
- package/dist/unwraps/valuecommerce.d.ts +6 -0
- package/dist/unwraps/valuecommerce.js +9 -0
- package/dist/unwraps/viglink.d.ts +6 -0
- package/dist/unwraps/viglink.js +8 -0
- package/dist/unwraps/vkAway.d.ts +6 -0
- package/dist/unwraps/vkAway.js +9 -0
- package/dist/unwraps/webArchive.d.ts +6 -0
- package/dist/unwraps/webArchive.js +12 -0
- package/dist/unwraps/yahooSearch.d.ts +6 -0
- package/dist/unwraps/yahooSearch.js +12 -0
- package/dist/unwraps/yandexTurbo.d.ts +6 -0
- package/dist/unwraps/yandexTurbo.js +12 -0
- package/dist/unwraps/youtube.d.ts +6 -0
- package/dist/unwraps/youtube.js +9 -0
- package/dist/unwraps/zhihu.d.ts +6 -0
- package/dist/unwraps/zhihu.js +8 -0
- package/dist/utils.d.ts +13 -0
- package/dist/utils.js +31 -0
- package/package.json +58 -0
package/dist/index.js
ADDED
|
@@ -0,0 +1,118 @@
|
|
|
1
|
+
import { applyDomTransforms, applyStringTransforms, createEmbedPlaceholder, parseFragment, stripOversizedBase64Sources, transformHtml } from "./common.js";
|
|
2
|
+
import { composeThumbnailUrl, extractVideoId, youtubeEmbedResolver, youtubeResolveEmbed } from "./embeds/youtube.js";
|
|
3
|
+
import { fixLazyImages } from "./transforms/dom/fixLazyImages.js";
|
|
4
|
+
import { detectLanguage, highlightCode } from "./transforms/dom/highlightCode.js";
|
|
5
|
+
import { injectEnclosureEmbedPlaceholders } from "./transforms/dom/injectEnclosureEmbedPlaceholders.js";
|
|
6
|
+
import { linkifyUrls } from "./transforms/dom/linkifyUrls.js";
|
|
7
|
+
import { mergeConsecutiveOneLinerPres } from "./transforms/dom/mergeConsecutiveOneLinerPres.js";
|
|
8
|
+
import { removeTrackingPixels } from "./transforms/dom/removeTrackingPixels.js";
|
|
9
|
+
import { chooseBaseUrl, coerceNumber, createParamExtractor } from "./utils.js";
|
|
10
|
+
import { replaceEmbedsWithPlaceholders } from "./transforms/dom/replaceEmbedsWithPlaceholders.js";
|
|
11
|
+
import { replacePreLineBreaks } from "./transforms/dom/replacePreLineBreaks.js";
|
|
12
|
+
import { resolveRelativeUrls } from "./transforms/dom/resolveRelativeUrls.js";
|
|
13
|
+
import { stripComments } from "./transforms/dom/stripComments.js";
|
|
14
|
+
import { stripInterBlockBreaks } from "./transforms/dom/stripInterBlockBreaks.js";
|
|
15
|
+
import { stripParagraphBoundaryBreaks } from "./transforms/dom/stripParagraphBoundaryBreaks.js";
|
|
16
|
+
import { stripTrackingParams } from "./transforms/dom/stripTrackingParams.js";
|
|
17
|
+
import { trimPreWhitespace } from "./transforms/dom/trimPreWhitespace.js";
|
|
18
|
+
import { extractRedirectTarget, unwrapRedirectUrls } from "./transforms/dom/unwrapRedirectUrls.js";
|
|
19
|
+
import { decodeDoubleEncodedTags } from "./transforms/string/decodeDoubleEncodedTags.js";
|
|
20
|
+
import { paragraphizePlainText } from "./transforms/string/paragraphizePlainText.js";
|
|
21
|
+
import { stripEmptyTags } from "./transforms/string/stripEmptyTags.js";
|
|
22
|
+
import { stripOrphanedClosingTags } from "./transforms/string/stripOrphanedClosingTags.js";
|
|
23
|
+
import { unwrapWrappers } from "./transforms/string/unwrapWrappers.js";
|
|
24
|
+
import { unwrapBing } from "./unwraps/bing.js";
|
|
25
|
+
import { unwrapFacebookShim } from "./unwraps/facebook.js";
|
|
26
|
+
import { unwrapGoogle } from "./unwraps/google.js";
|
|
27
|
+
import { unwrapGoogleAmpViewer } from "./unwraps/googleAmpViewer.js";
|
|
28
|
+
import { unwrapGoogleNews } from "./unwraps/googleNews.js";
|
|
29
|
+
import { unwrapGoogleNewsModern } from "./unwraps/googleNewsModern.js";
|
|
30
|
+
import { unwrapGoogleScholar } from "./unwraps/googleScholar.js";
|
|
31
|
+
import { unwrapInstagramShim } from "./unwraps/instagram.js";
|
|
32
|
+
import { unwrapRedditOut } from "./unwraps/redditOut.js";
|
|
33
|
+
import { unwrapVkAway } from "./unwraps/vkAway.js";
|
|
34
|
+
import { unwrapYahooSearch } from "./unwraps/yahooSearch.js";
|
|
35
|
+
import { unwrapYouTube } from "./unwraps/youtube.js";
|
|
36
|
+
import { defaultDomTransforms, defaultEmbedResolvers, defaultFinalStringTransforms, defaultLazySrcAttributes, defaultResolveUrlFn, defaultStringTransforms, defaultTrackingHosts, defaultTrackingPathSegments, defaultUrlUnwrappers } from "./defaults.js";
|
|
37
|
+
import { simplifyFigures } from "./transforms/dom/simplifyFigures.js";
|
|
38
|
+
import { unwrapAceml } from "./unwraps/aceml.js";
|
|
39
|
+
import { unwrapAdjust } from "./unwraps/adjust.js";
|
|
40
|
+
import { unwrapAmazonAffiliate } from "./unwraps/amazonAffiliate.js";
|
|
41
|
+
import { unwrapAmpCache } from "./unwraps/ampCache.js";
|
|
42
|
+
import { unwrapAwin } from "./unwraps/awin.js";
|
|
43
|
+
import { unwrapCjNetwork } from "./unwraps/cjNetwork.js";
|
|
44
|
+
import { unwrapDigidip } from "./unwraps/digidip.js";
|
|
45
|
+
import { unwrapDisqus } from "./unwraps/disqus.js";
|
|
46
|
+
import { unwrapDouban } from "./unwraps/douban.js";
|
|
47
|
+
import { unwrapDuckduckgo } from "./unwraps/duckduckgo.js";
|
|
48
|
+
import { unwrapEbayRover } from "./unwraps/ebayRover.js";
|
|
49
|
+
import { unwrapEffiliation } from "./unwraps/effiliation.js";
|
|
50
|
+
import { unwrapEmbedly } from "./unwraps/embedly.js";
|
|
51
|
+
import { unwrapFeedsportal } from "./unwraps/feedsportal.js";
|
|
52
|
+
import { unwrapFirebaseDynamicLinks } from "./unwraps/firebaseDynamicLinks.js";
|
|
53
|
+
import { unwrapFlipboard } from "./unwraps/flipboard.js";
|
|
54
|
+
import { unwrapGateSc } from "./unwraps/gateSc.js";
|
|
55
|
+
import { unwrapGeoriot } from "./unwraps/georiot.js";
|
|
56
|
+
import { unwrapGitee } from "./unwraps/gitee.js";
|
|
57
|
+
import { unwrapGoogleTranslate } from "./unwraps/googleTranslate.js";
|
|
58
|
+
import { unwrapHashnode } from "./unwraps/hashnode.js";
|
|
59
|
+
import { unwrapIcptrack } from "./unwraps/icptrack.js";
|
|
60
|
+
import { unwrapIdealoPartner } from "./unwraps/idealoPartner.js";
|
|
61
|
+
import { unwrapJianshuGo } from "./unwraps/jianshuGo.js";
|
|
62
|
+
import { unwrapJuejin } from "./unwraps/juejin.js";
|
|
63
|
+
import { unwrapLeverAnalytics } from "./unwraps/leverAnalytics.js";
|
|
64
|
+
import { unwrapLinksynergy } from "./unwraps/linksynergy.js";
|
|
65
|
+
import { unwrapMailchimp } from "./unwraps/mailchimp.js";
|
|
66
|
+
import { unwrapMailpanion } from "./unwraps/mailpanion.js";
|
|
67
|
+
import { unwrapMailpgn } from "./unwraps/mailpgn.js";
|
|
68
|
+
import { unwrapMailtrack } from "./unwraps/mailtrack.js";
|
|
69
|
+
import { unwrapMedium } from "./unwraps/medium.js";
|
|
70
|
+
import { unwrapMimecast } from "./unwraps/mimecast.js";
|
|
71
|
+
import { unwrapMozillaOutgoing } from "./unwraps/mozillaOutgoing.js";
|
|
72
|
+
import { unwrapNarrativ } from "./unwraps/narrativ.js";
|
|
73
|
+
import { unwrapNicoMs } from "./unwraps/nicoMs.js";
|
|
74
|
+
import { unwrapOutlookSafelinks } from "./unwraps/outlookSafelinks.js";
|
|
75
|
+
import { unwrapPartnerAds } from "./unwraps/partnerAds.js";
|
|
76
|
+
import { unwrapPocket } from "./unwraps/pocket.js";
|
|
77
|
+
import { unwrapPostmark } from "./unwraps/postmark.js";
|
|
78
|
+
import { unwrapProofpointV1 } from "./unwraps/proofpointV1.js";
|
|
79
|
+
import { unwrapProofpointV2 } from "./unwraps/proofpointV2.js";
|
|
80
|
+
import { unwrapProofpointV3 } from "./unwraps/proofpointV3.js";
|
|
81
|
+
import { unwrapPxf } from "./unwraps/pxf.js";
|
|
82
|
+
import { unwrapRecruitics } from "./unwraps/recruitics.js";
|
|
83
|
+
import { unwrapRedirectingat } from "./unwraps/redirectingat.js";
|
|
84
|
+
import { unwrapSegmentfault } from "./unwraps/segmentfault.js";
|
|
85
|
+
import { unwrapShareasale } from "./unwraps/shareasale.js";
|
|
86
|
+
import { unwrapSjv } from "./unwraps/sjv.js";
|
|
87
|
+
import { unwrapSkimlinks } from "./unwraps/skimlinks.js";
|
|
88
|
+
import { unwrapSlack } from "./unwraps/slack.js";
|
|
89
|
+
import { unwrapSmartredirect } from "./unwraps/smartredirect.js";
|
|
90
|
+
import { unwrapSspai } from "./unwraps/sspai.js";
|
|
91
|
+
import { unwrapSteamLinkfilter } from "./unwraps/steamLinkfilter.js";
|
|
92
|
+
import { unwrapTelegramIv } from "./unwraps/telegramIv.js";
|
|
93
|
+
import { unwrapTradedoubler } from "./unwraps/tradedoubler.js";
|
|
94
|
+
import { unwrapTumblr } from "./unwraps/tumblr.js";
|
|
95
|
+
import { unwrapValuecommerce } from "./unwraps/valuecommerce.js";
|
|
96
|
+
import { unwrapViglink } from "./unwraps/viglink.js";
|
|
97
|
+
import { unwrapWebArchive } from "./unwraps/webArchive.js";
|
|
98
|
+
import { unwrapYandexTurbo } from "./unwraps/yandexTurbo.js";
|
|
99
|
+
import { unwrapZhihu } from "./unwraps/zhihu.js";
|
|
100
|
+
//#region src/index.ts
|
|
101
|
+
const transformContent = (html, options = {}) => {
|
|
102
|
+
const context = {
|
|
103
|
+
baseUrl: options.baseUrl,
|
|
104
|
+
enclosures: options.enclosures,
|
|
105
|
+
embedResolvers: options.embedResolvers ?? defaultEmbedResolvers,
|
|
106
|
+
lazySrcAttributes: options.lazySrcAttributes ?? defaultLazySrcAttributes,
|
|
107
|
+
trackingHosts: options.trackingHosts ?? defaultTrackingHosts,
|
|
108
|
+
trackingPathSegments: options.trackingPathSegments ?? defaultTrackingPathSegments,
|
|
109
|
+
urlUnwrappers: options.urlUnwrappers ?? defaultUrlUnwrappers,
|
|
110
|
+
resolveUrlFn: options.resolveUrlFn ?? defaultResolveUrlFn
|
|
111
|
+
};
|
|
112
|
+
const stringFns = options.stringTransforms ?? defaultStringTransforms;
|
|
113
|
+
const domFns = options.domTransforms ?? defaultDomTransforms;
|
|
114
|
+
const finalFns = options.finalStringTransforms ?? defaultFinalStringTransforms;
|
|
115
|
+
return applyStringTransforms(applyDomTransforms(applyStringTransforms(html, stringFns.map((transform) => transform(context))), domFns.map((transform) => transform(context))), finalFns.map((transform) => transform(context)));
|
|
116
|
+
};
|
|
117
|
+
//#endregion
|
|
118
|
+
export { applyDomTransforms, applyStringTransforms, chooseBaseUrl, coerceNumber, composeThumbnailUrl, createEmbedPlaceholder, createParamExtractor, decodeDoubleEncodedTags, defaultResolveUrlFn, detectLanguage, extractRedirectTarget, extractVideoId, fixLazyImages, highlightCode, injectEnclosureEmbedPlaceholders, linkifyUrls, mergeConsecutiveOneLinerPres, paragraphizePlainText, parseFragment, removeTrackingPixels, replaceEmbedsWithPlaceholders, replacePreLineBreaks, resolveRelativeUrls, simplifyFigures, stripComments, stripEmptyTags, stripInterBlockBreaks, stripOrphanedClosingTags, stripOversizedBase64Sources, stripParagraphBoundaryBreaks, stripTrackingParams, transformContent, transformHtml, trimPreWhitespace, unwrapAceml, unwrapAdjust, unwrapAmazonAffiliate, unwrapAmpCache, unwrapAwin, unwrapBing, unwrapCjNetwork, unwrapDigidip, unwrapDisqus, unwrapDouban, unwrapDuckduckgo, unwrapEbayRover, unwrapEffiliation, unwrapEmbedly, unwrapFacebookShim, unwrapFeedsportal, unwrapFirebaseDynamicLinks, unwrapFlipboard, unwrapGateSc, unwrapGeoriot, unwrapGitee, unwrapGoogle, unwrapGoogleAmpViewer, unwrapGoogleNews, unwrapGoogleNewsModern, unwrapGoogleScholar, unwrapGoogleTranslate, unwrapHashnode, unwrapIcptrack, unwrapIdealoPartner, unwrapInstagramShim, unwrapJianshuGo, unwrapJuejin, unwrapLeverAnalytics, unwrapLinksynergy, unwrapMailchimp, unwrapMailpanion, unwrapMailpgn, unwrapMailtrack, unwrapMedium, unwrapMimecast, unwrapMozillaOutgoing, unwrapNarrativ, unwrapNicoMs, unwrapOutlookSafelinks, unwrapPartnerAds, unwrapPocket, unwrapPostmark, unwrapProofpointV1, unwrapProofpointV2, unwrapProofpointV3, unwrapPxf, unwrapRecruitics, unwrapRedditOut, unwrapRedirectUrls, unwrapRedirectingat, unwrapSegmentfault, unwrapShareasale, unwrapSjv, unwrapSkimlinks, unwrapSlack, unwrapSmartredirect, unwrapSspai, unwrapSteamLinkfilter, unwrapTelegramIv, unwrapTradedoubler, unwrapTumblr, unwrapValuecommerce, unwrapViglink, unwrapVkAway, unwrapWebArchive, unwrapWrappers, unwrapYahooSearch, unwrapYandexTurbo, unwrapYouTube, unwrapZhihu, youtubeEmbedResolver, youtubeResolveEmbed };
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
//#region src/transforms/dom/fixLazyImages.ts
|
|
2
|
+
const imgPattern = /<img\s/i;
|
|
3
|
+
const urlShapeRegex = /[:/.]/;
|
|
4
|
+
const isUrlShaped = (value) => {
|
|
5
|
+
return urlShapeRegex.test(value) && !value.startsWith("{") && !value.startsWith("[");
|
|
6
|
+
};
|
|
7
|
+
const fixLazyImages = (context) => {
|
|
8
|
+
return (document) => {
|
|
9
|
+
const images = document.querySelectorAll("img");
|
|
10
|
+
for (const image of images) {
|
|
11
|
+
let resolved = false;
|
|
12
|
+
for (const attribute of context.lazySrcAttributes) {
|
|
13
|
+
const value = image.getAttribute(attribute);
|
|
14
|
+
if (!resolved && value && isUrlShaped(value)) {
|
|
15
|
+
image.setAttribute("src", value);
|
|
16
|
+
resolved = true;
|
|
17
|
+
}
|
|
18
|
+
image.removeAttribute(attribute);
|
|
19
|
+
}
|
|
20
|
+
const dataSrcset = image.getAttribute("data-srcset");
|
|
21
|
+
if (dataSrcset) {
|
|
22
|
+
image.setAttribute("srcset", dataSrcset);
|
|
23
|
+
image.removeAttribute("data-srcset");
|
|
24
|
+
}
|
|
25
|
+
}
|
|
26
|
+
const noscripts = document.querySelectorAll("noscript");
|
|
27
|
+
for (const noscript of noscripts) {
|
|
28
|
+
const sibling = noscript.previousElementSibling;
|
|
29
|
+
if (sibling?.tagName !== "IMG") continue;
|
|
30
|
+
const inner = noscript.textContent ?? "";
|
|
31
|
+
if (!imgPattern.test(inner)) continue;
|
|
32
|
+
sibling.remove();
|
|
33
|
+
noscript.outerHTML = inner;
|
|
34
|
+
}
|
|
35
|
+
};
|
|
36
|
+
};
|
|
37
|
+
//#endregion
|
|
38
|
+
export { fixLazyImages };
|
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
import { DomTransform } from "../../types.js";
|
|
2
|
+
|
|
3
|
+
//#region src/transforms/dom/highlightCode.d.ts
|
|
4
|
+
declare const detectLanguage: (pre: Element, code: Element | null) => string | undefined;
|
|
5
|
+
declare const highlightCode: DomTransform;
|
|
6
|
+
//#endregion
|
|
7
|
+
export { detectLanguage, highlightCode };
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
import hljs from "highlight.js/lib/common";
|
|
2
|
+
//#region src/transforms/dom/highlightCode.ts
|
|
3
|
+
const languagePattern = /(?:language|lang)-(\S+)/;
|
|
4
|
+
const languageAttributes = ["data-language", "data-lang"];
|
|
5
|
+
const detectLanguage = (pre, code) => {
|
|
6
|
+
for (const element of [code, pre]) {
|
|
7
|
+
const match = element?.className.match(languagePattern)?.[1];
|
|
8
|
+
if (match) return match;
|
|
9
|
+
}
|
|
10
|
+
for (const element of [pre, code]) for (const attribute of languageAttributes) {
|
|
11
|
+
const value = element?.getAttribute(attribute);
|
|
12
|
+
if (value) return value;
|
|
13
|
+
}
|
|
14
|
+
};
|
|
15
|
+
const highlightCode = () => {
|
|
16
|
+
return (document) => {
|
|
17
|
+
const pres = document.querySelectorAll("pre");
|
|
18
|
+
for (const pre of pres) {
|
|
19
|
+
const code = pre.querySelector("code");
|
|
20
|
+
if (!code) continue;
|
|
21
|
+
const text = code.textContent ?? "";
|
|
22
|
+
if (!text.trim()) continue;
|
|
23
|
+
const language = detectLanguage(pre, code);
|
|
24
|
+
code.innerHTML = (language && hljs.getLanguage(language) ? hljs.highlight(text, { language }) : hljs.highlightAuto(text)).value;
|
|
25
|
+
code.classList.add("hljs");
|
|
26
|
+
}
|
|
27
|
+
};
|
|
28
|
+
};
|
|
29
|
+
//#endregion
|
|
30
|
+
export { detectLanguage, highlightCode };
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
import { createEmbedPlaceholder } from "../../common.js";
|
|
2
|
+
//#region src/transforms/dom/injectEnclosureEmbedPlaceholders.ts
|
|
3
|
+
const isAudioEnclosure = (enclosure) => {
|
|
4
|
+
return enclosure.medium === "audio" || !!enclosure.type?.startsWith("audio/");
|
|
5
|
+
};
|
|
6
|
+
const isVideoEnclosure = (enclosure) => {
|
|
7
|
+
return enclosure.medium === "video" || !!enclosure.type?.startsWith("video/");
|
|
8
|
+
};
|
|
9
|
+
const resolveEnclosure = (url, resolvers, document) => {
|
|
10
|
+
const probe = document.createElement("iframe");
|
|
11
|
+
probe.setAttribute("src", url);
|
|
12
|
+
for (const resolver of resolvers) if (probe.matches(resolver.selector)) {
|
|
13
|
+
const metadata = resolver.extract(probe);
|
|
14
|
+
if (metadata) return metadata;
|
|
15
|
+
}
|
|
16
|
+
};
|
|
17
|
+
const injectEnclosureEmbedPlaceholders = (context) => {
|
|
18
|
+
return (document) => {
|
|
19
|
+
if (!context.enclosures?.length) return;
|
|
20
|
+
const html = document.toString();
|
|
21
|
+
for (const enclosure of context.enclosures) {
|
|
22
|
+
if (html.includes(enclosure.url)) continue;
|
|
23
|
+
if (!context.resolveUrlFn(enclosure.url, context.baseUrl)) continue;
|
|
24
|
+
const resolved = resolveEnclosure(enclosure.url, context.embedResolvers, document);
|
|
25
|
+
if (!resolved && !isAudioEnclosure(enclosure) && !isVideoEnclosure(enclosure)) continue;
|
|
26
|
+
const type = resolved?.type ?? (isAudioEnclosure(enclosure) ? "audio" : "video");
|
|
27
|
+
const placeholder = createEmbedPlaceholder(document, enclosure.url, type, resolved);
|
|
28
|
+
document.body.prepend(placeholder);
|
|
29
|
+
}
|
|
30
|
+
};
|
|
31
|
+
};
|
|
32
|
+
//#endregion
|
|
33
|
+
export { injectEnclosureEmbedPlaceholders };
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
import { Node } from "../../common.js";
|
|
2
|
+
import { find } from "linkifyjs";
|
|
3
|
+
//#region src/transforms/dom/linkifyUrls.ts
|
|
4
|
+
const urlProtocolRegex = /^https?:\/\//i;
|
|
5
|
+
const linkifyIgnoreTags = new Set([
|
|
6
|
+
"a",
|
|
7
|
+
"pre",
|
|
8
|
+
"code",
|
|
9
|
+
"kbd",
|
|
10
|
+
"samp",
|
|
11
|
+
"var",
|
|
12
|
+
"script",
|
|
13
|
+
"style"
|
|
14
|
+
]);
|
|
15
|
+
const collectTextNodes = (node, result = []) => {
|
|
16
|
+
if (node.nodeType === Node.ELEMENT_NODE && linkifyIgnoreTags.has(node.tagName.toLowerCase())) return result;
|
|
17
|
+
for (const child of node.childNodes) if (child.nodeType === Node.TEXT_NODE) result.push(child);
|
|
18
|
+
else if (child.nodeType === Node.ELEMENT_NODE && !linkifyIgnoreTags.has(child.tagName.toLowerCase())) collectTextNodes(child, result);
|
|
19
|
+
return result;
|
|
20
|
+
};
|
|
21
|
+
const linkifyUrls = () => {
|
|
22
|
+
return (document) => {
|
|
23
|
+
const textNodes = collectTextNodes(document);
|
|
24
|
+
for (const node of textNodes) {
|
|
25
|
+
const text = node.textContent;
|
|
26
|
+
if (!text?.trim() || !text?.includes("://")) continue;
|
|
27
|
+
const links = find(text).filter((link) => link.type === "url" && urlProtocolRegex.test(link.value));
|
|
28
|
+
if (links.length === 0) continue;
|
|
29
|
+
const parts = [];
|
|
30
|
+
let lastIndex = 0;
|
|
31
|
+
for (const link of links) {
|
|
32
|
+
if (link.start > lastIndex) parts.push(document.createTextNode(text.slice(lastIndex, link.start)));
|
|
33
|
+
const anchor = document.createElement("a");
|
|
34
|
+
anchor.setAttribute("href", link.href);
|
|
35
|
+
anchor.textContent = link.value;
|
|
36
|
+
parts.push(anchor);
|
|
37
|
+
lastIndex = link.end;
|
|
38
|
+
}
|
|
39
|
+
if (lastIndex < text.length) parts.push(document.createTextNode(text.slice(lastIndex)));
|
|
40
|
+
node.replaceWith(...parts);
|
|
41
|
+
}
|
|
42
|
+
};
|
|
43
|
+
};
|
|
44
|
+
//#endregion
|
|
45
|
+
export { linkifyUrls };
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
//#region src/transforms/dom/mergeConsecutiveOneLinerPres.ts
|
|
2
|
+
const trailingBrRegex = /<br\s*\/?>\s*$/i;
|
|
3
|
+
const surroundingNewlinesRegex = /^\n+|\n+$/g;
|
|
4
|
+
const mergeConsecutiveOneLinerPres = () => {
|
|
5
|
+
return (document) => {
|
|
6
|
+
const pres = document.querySelectorAll("pre");
|
|
7
|
+
for (const pre of pres) {
|
|
8
|
+
if (!pre.parentNode) continue;
|
|
9
|
+
const run = [pre];
|
|
10
|
+
let sibling = pre.nextSibling;
|
|
11
|
+
while (sibling) {
|
|
12
|
+
if (sibling.nodeType !== 1 && sibling.nodeType !== 3) break;
|
|
13
|
+
if (sibling.nodeType === 3) {
|
|
14
|
+
if (sibling.textContent?.trim() !== "") break;
|
|
15
|
+
sibling = sibling.nextSibling;
|
|
16
|
+
continue;
|
|
17
|
+
}
|
|
18
|
+
if (sibling.tagName !== "PRE") break;
|
|
19
|
+
run.push(sibling);
|
|
20
|
+
sibling = sibling.nextSibling;
|
|
21
|
+
}
|
|
22
|
+
if (run.length < 2) continue;
|
|
23
|
+
const isSingleLine = (element) => {
|
|
24
|
+
return !element.innerHTML.replace(surroundingNewlinesRegex, "").includes("\n");
|
|
25
|
+
};
|
|
26
|
+
if (!run.every(isSingleLine)) continue;
|
|
27
|
+
pre.innerHTML = run.map((element) => element.innerHTML.replace(surroundingNewlinesRegex, "").replace(trailingBrRegex, "")).join("\n");
|
|
28
|
+
for (const element of run.slice(1)) element.remove();
|
|
29
|
+
}
|
|
30
|
+
};
|
|
31
|
+
};
|
|
32
|
+
//#endregion
|
|
33
|
+
export { mergeConsecutiveOneLinerPres };
|
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
//#region src/transforms/dom/removeTrackingPixels.ts
|
|
2
|
+
const styleWidthRegex = /(?:^|;)\s*width\s*:\s*([0-9]*\.?[0-9]+)\s*(?:px)?\s*(?:;|$)/i;
|
|
3
|
+
const styleHeightRegex = /(?:^|;)\s*height\s*:\s*([0-9]*\.?[0-9]+)\s*(?:px)?\s*(?:;|$)/i;
|
|
4
|
+
const styleDisplayNoneRegex = /(?:^|;)\s*display\s*:\s*none/i;
|
|
5
|
+
const styleVisibilityHiddenRegex = /(?:^|;)\s*visibility\s*:\s*hidden/i;
|
|
6
|
+
const styleOpacityZeroRegex = /(?:^|;)\s*opacity\s*:\s*0(?:\.0+)?\s*(?:;|$)/i;
|
|
7
|
+
const pixelDimensionLimit = 2;
|
|
8
|
+
const buildPathRegex = (segments) => {
|
|
9
|
+
if (segments.length === 0) return null;
|
|
10
|
+
const alternation = segments.map(RegExp.escape).join("|");
|
|
11
|
+
return new RegExp(`/(?:${alternation})[./]`, "i");
|
|
12
|
+
};
|
|
13
|
+
const isTrackingUrl = (src, hosts, pathRegex) => {
|
|
14
|
+
try {
|
|
15
|
+
const url = new URL(src, "http://placeholder/");
|
|
16
|
+
for (const host of hosts) if (url.hostname === host || url.hostname.endsWith(`.${host}`)) return true;
|
|
17
|
+
return pathRegex?.test(url.pathname) ?? false;
|
|
18
|
+
} catch {
|
|
19
|
+
return false;
|
|
20
|
+
}
|
|
21
|
+
};
|
|
22
|
+
const getDimension = (image, prop) => {
|
|
23
|
+
const attribute = image.getAttribute(prop);
|
|
24
|
+
if (attribute !== null) {
|
|
25
|
+
const value = Number(attribute);
|
|
26
|
+
if (Number.isFinite(value)) return value;
|
|
27
|
+
}
|
|
28
|
+
const style = image.getAttribute("style");
|
|
29
|
+
if (style) {
|
|
30
|
+
const regex = prop === "width" ? styleWidthRegex : styleHeightRegex;
|
|
31
|
+
const match = style.match(regex);
|
|
32
|
+
if (match) return Number(match[1]);
|
|
33
|
+
}
|
|
34
|
+
};
|
|
35
|
+
const isHiddenImage = (image) => {
|
|
36
|
+
if (image.hasAttribute("hidden")) return true;
|
|
37
|
+
const style = image.getAttribute("style");
|
|
38
|
+
if (!style) return false;
|
|
39
|
+
return styleDisplayNoneRegex.test(style) || styleVisibilityHiddenRegex.test(style) || styleOpacityZeroRegex.test(style);
|
|
40
|
+
};
|
|
41
|
+
const isPixelSized = (image) => {
|
|
42
|
+
const width = getDimension(image, "width");
|
|
43
|
+
const height = getDimension(image, "height");
|
|
44
|
+
return width !== void 0 && width <= pixelDimensionLimit || height !== void 0 && height <= pixelDimensionLimit;
|
|
45
|
+
};
|
|
46
|
+
const removeTrackingPixels = (context) => {
|
|
47
|
+
const hosts = new Set(context.trackingHosts);
|
|
48
|
+
const pathRegex = buildPathRegex(context.trackingPathSegments);
|
|
49
|
+
return (document) => {
|
|
50
|
+
const images = document.querySelectorAll("img");
|
|
51
|
+
for (const image of images) {
|
|
52
|
+
const src = image.getAttribute("src");
|
|
53
|
+
const trackingSrc = src ? isTrackingUrl(src, hosts, pathRegex) : false;
|
|
54
|
+
if (isPixelSized(image) || isHiddenImage(image) || trackingSrc) image.remove();
|
|
55
|
+
}
|
|
56
|
+
};
|
|
57
|
+
};
|
|
58
|
+
//#endregion
|
|
59
|
+
export { removeTrackingPixels };
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
import { createEmbedPlaceholder } from "../../common.js";
|
|
2
|
+
import { coerceNumber } from "../../utils.js";
|
|
3
|
+
//#region src/transforms/dom/replaceEmbedsWithPlaceholders.ts
|
|
4
|
+
const replaceEmbedsWithPlaceholders = (context) => {
|
|
5
|
+
return (document) => {
|
|
6
|
+
for (const resolver of context.embedResolvers) {
|
|
7
|
+
const elements = document.querySelectorAll(resolver.selector);
|
|
8
|
+
for (const element of elements) {
|
|
9
|
+
const metadata = resolver.extract(element);
|
|
10
|
+
if (!metadata) continue;
|
|
11
|
+
if (!context.resolveUrlFn(metadata.src, context.baseUrl)) continue;
|
|
12
|
+
if (metadata.url && !context.resolveUrlFn(metadata.url, context.baseUrl)) continue;
|
|
13
|
+
const width = coerceNumber(element.getAttribute("width")) ?? metadata.width;
|
|
14
|
+
const height = coerceNumber(element.getAttribute("height")) ?? metadata.height;
|
|
15
|
+
const placeholder = createEmbedPlaceholder(document, metadata.src, metadata.type ?? "iframe", {
|
|
16
|
+
...metadata,
|
|
17
|
+
width,
|
|
18
|
+
height
|
|
19
|
+
});
|
|
20
|
+
element.replaceWith(placeholder);
|
|
21
|
+
}
|
|
22
|
+
}
|
|
23
|
+
for (const iframe of document.querySelectorAll("iframe[src]")) {
|
|
24
|
+
const src = iframe.getAttribute("src") ?? "";
|
|
25
|
+
if (!context.resolveUrlFn(src, context.baseUrl)) continue;
|
|
26
|
+
const width = coerceNumber(iframe.getAttribute("width"));
|
|
27
|
+
const height = coerceNumber(iframe.getAttribute("height"));
|
|
28
|
+
iframe.replaceWith(createEmbedPlaceholder(document, src, "iframe", {
|
|
29
|
+
width,
|
|
30
|
+
height
|
|
31
|
+
}));
|
|
32
|
+
}
|
|
33
|
+
};
|
|
34
|
+
};
|
|
35
|
+
//#endregion
|
|
36
|
+
export { replaceEmbedsWithPlaceholders };
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
//#region src/transforms/dom/replacePreLineBreaks.ts
|
|
2
|
+
const brTagRegex = /<br\s*\/?>/gi;
|
|
3
|
+
const replacePreLineBreaks = () => {
|
|
4
|
+
return (document) => {
|
|
5
|
+
const pres = document.querySelectorAll("pre");
|
|
6
|
+
for (const pre of pres) {
|
|
7
|
+
const target = pre.querySelector("code") ?? pre;
|
|
8
|
+
target.innerHTML = target.innerHTML.replace(brTagRegex, "\n");
|
|
9
|
+
}
|
|
10
|
+
};
|
|
11
|
+
};
|
|
12
|
+
//#endregion
|
|
13
|
+
export { replacePreLineBreaks };
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
import { resolveUrl } from "feedcanon";
|
|
2
|
+
import { parseSrcset, stringifySrcset } from "srcset";
|
|
3
|
+
//#region src/transforms/dom/resolveRelativeUrls.ts
|
|
4
|
+
const resolveRelativeUrls = ({ baseUrl }) => {
|
|
5
|
+
return (document) => {
|
|
6
|
+
if (!baseUrl) return;
|
|
7
|
+
const anchors = document.querySelectorAll("a[href]");
|
|
8
|
+
for (const anchor of anchors) {
|
|
9
|
+
const href = anchor.getAttribute("href");
|
|
10
|
+
if (!href) continue;
|
|
11
|
+
const resolved = resolveUrl(href, baseUrl);
|
|
12
|
+
if (resolved) anchor.setAttribute("href", resolved);
|
|
13
|
+
}
|
|
14
|
+
const elementsWithSrc = document.querySelectorAll("[src]");
|
|
15
|
+
for (const element of elementsWithSrc) {
|
|
16
|
+
const src = element.getAttribute("src");
|
|
17
|
+
if (!src) continue;
|
|
18
|
+
const resolved = resolveUrl(src, baseUrl);
|
|
19
|
+
if (resolved) element.setAttribute("src", resolved);
|
|
20
|
+
}
|
|
21
|
+
const videos = document.querySelectorAll("video[poster]");
|
|
22
|
+
for (const video of videos) {
|
|
23
|
+
const poster = video.getAttribute("poster");
|
|
24
|
+
if (!poster) continue;
|
|
25
|
+
const resolved = resolveUrl(poster, baseUrl);
|
|
26
|
+
if (resolved) video.setAttribute("poster", resolved);
|
|
27
|
+
}
|
|
28
|
+
const elements = document.querySelectorAll("img, source");
|
|
29
|
+
for (const element of elements) {
|
|
30
|
+
const srcset = element.getAttribute("srcset") ?? element.getAttribute("srcSet");
|
|
31
|
+
if (!srcset) continue;
|
|
32
|
+
const resolved = parseSrcset(srcset).map((entry) => ({
|
|
33
|
+
...entry,
|
|
34
|
+
url: resolveUrl(entry.url, baseUrl) ?? entry.url
|
|
35
|
+
}));
|
|
36
|
+
element.removeAttribute("srcSet");
|
|
37
|
+
element.setAttribute("srcset", stringifySrcset(resolved));
|
|
38
|
+
}
|
|
39
|
+
};
|
|
40
|
+
};
|
|
41
|
+
//#endregion
|
|
42
|
+
export { resolveRelativeUrls };
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
import { Node, unwrapOuterTag } from "../../common.js";
|
|
2
|
+
//#region src/transforms/dom/simplifyFigures.ts
|
|
3
|
+
const figureWrapperRegex = /^<(p|div|span)(\s[^>]*)?>[\s\n]*([\s\S]*)[\s\n]*<\/\1>$/i;
|
|
4
|
+
const mediaContentRegex = /<(img|picture|video|audio)[\s>]/i;
|
|
5
|
+
const isMediaOnly = (html) => {
|
|
6
|
+
return html.replace(/<\/?(img|picture|video|audio|source)(\s[^>]*)?>/gi, "").trim() === "" && mediaContentRegex.test(html);
|
|
7
|
+
};
|
|
8
|
+
const simplifyFigures = () => {
|
|
9
|
+
return (document) => {
|
|
10
|
+
const figures = document.querySelectorAll("figure");
|
|
11
|
+
for (const figure of figures) {
|
|
12
|
+
for (const child of [...figure.children]) {
|
|
13
|
+
if (child.tagName.toLowerCase() === "figcaption") continue;
|
|
14
|
+
const unwrapped = unwrapOuterTag(child.outerHTML, figureWrapperRegex);
|
|
15
|
+
if (unwrapped !== child.outerHTML && isMediaOnly(unwrapped)) child.outerHTML = unwrapped;
|
|
16
|
+
}
|
|
17
|
+
const captions = figure.querySelectorAll("figcaption");
|
|
18
|
+
for (const caption of captions) {
|
|
19
|
+
const elements = [...caption.children];
|
|
20
|
+
if (elements.length !== 1 || elements[0].tagName.toLowerCase() !== "div") continue;
|
|
21
|
+
if (![...caption.childNodes].some((node) => node.nodeType === Node.TEXT_NODE && (node.textContent ?? "").trim() !== "")) caption.innerHTML = elements[0].innerHTML;
|
|
22
|
+
}
|
|
23
|
+
}
|
|
24
|
+
};
|
|
25
|
+
};
|
|
26
|
+
//#endregion
|
|
27
|
+
export { simplifyFigures };
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
import { Node } from "../../common.js";
|
|
2
|
+
//#region src/transforms/dom/stripComments.ts
|
|
3
|
+
const codeBlockTags = new Set(["pre", "code"]);
|
|
4
|
+
const stripComments = () => {
|
|
5
|
+
return (document) => {
|
|
6
|
+
const visit = (node, inCodeBlock) => {
|
|
7
|
+
const children = Array.from(node.childNodes);
|
|
8
|
+
for (const child of children) {
|
|
9
|
+
if (child.nodeType === Node.COMMENT_NODE) {
|
|
10
|
+
if (!inCodeBlock) child.remove();
|
|
11
|
+
continue;
|
|
12
|
+
}
|
|
13
|
+
if (child.nodeType === Node.ELEMENT_NODE) {
|
|
14
|
+
const element = child;
|
|
15
|
+
visit(element, inCodeBlock || codeBlockTags.has(element.tagName.toLowerCase()));
|
|
16
|
+
}
|
|
17
|
+
}
|
|
18
|
+
};
|
|
19
|
+
visit(document.body, false);
|
|
20
|
+
};
|
|
21
|
+
};
|
|
22
|
+
//#endregion
|
|
23
|
+
export { stripComments };
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
import { isBlockElement, isSkippable } from "../../common.js";
|
|
2
|
+
//#region src/transforms/dom/stripInterBlockBreaks.ts
|
|
3
|
+
const stripInterBlockBreaks = () => {
|
|
4
|
+
return (document) => {
|
|
5
|
+
const brs = document.querySelectorAll("br");
|
|
6
|
+
for (const br of brs) {
|
|
7
|
+
let previous = br.previousSibling;
|
|
8
|
+
while (previous && isSkippable(previous)) previous = previous.previousSibling;
|
|
9
|
+
let next = br.nextSibling;
|
|
10
|
+
while (next && isSkippable(next)) next = next.nextSibling;
|
|
11
|
+
const previousIsBlock = !previous || isBlockElement(previous);
|
|
12
|
+
const nextIsBlock = !next || isBlockElement(next);
|
|
13
|
+
if (previousIsBlock && nextIsBlock) br.remove();
|
|
14
|
+
}
|
|
15
|
+
};
|
|
16
|
+
};
|
|
17
|
+
//#endregion
|
|
18
|
+
export { stripInterBlockBreaks };
|