feedsweep 2.1.0 → 2.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -42,6 +42,7 @@ Inventory of every transform exported from the package. Most are enabled by defa
42
42
  | `stripBoundaryBreaks` | Remove `<br>` tags adjacent to block-element boundaries (paragraphs, headings, divs, list items, blockquotes, …) |
43
43
  | `stripDuplicateTitleHeading` | Remove first `<h1>`–`<h6>` matching article title |
44
44
  | `demoteHeadings` | Shift every heading down by one level (`<h1>`→`<h2>`, …, `<h5>`→`<h6>`) when the body contains an `<h1>`, so it sits below the reader's own page title |
45
+ | `unwrapHeadingBold` | Unwrap `<b>`/`<strong>` that wraps the entire content of a heading (redundant — headings are already bold) |
45
46
  | `unwrapRedirectUrls` | Remove Google/Bing/Facebook/etc. redirect wrappers |
46
47
  | `stripDeadAnchors` | Unwrap `<a>` with empty, `#`, or `javascript:` href |
47
48
  | `stripInertElements` | Remove platform chrome and dead placeholders — subscribe widgets, share buttons, related-posts widgets, ad slots (AdSense / AdThrive), author bio blocks, email preheaders, Substack image controls, and Drupal `<drupal-render-placeholder>` tags. Pass `inertSelectors` to extend or replace |
@@ -57,6 +58,7 @@ Inventory of every transform exported from the package. Most are enabled by defa
57
58
  | `resolveRelativeUrls` | Convert relative URLs to absolute using base URL |
58
59
  | `unwrapWrappers` | Remove outer `<div>`, `<article>`, `<section>` wrappers |
59
60
  | `unwrapDoublyNestedLists` | Unwrap `<ul>`/`<ol>` that wrap a single `<li>` containing a same-type list |
61
+ | `wrapTablesForScroll` | Wrap each top-level `<table>` in a `<div data-table>` as a horizontal-scroll container |
60
62
  | `mergeFragmentedLists` | Merge consecutive sibling `<ul>` / `<ol>` lists with matching attributes |
61
63
  | `paragraphizePlainText` | Wrap plain text in `<p>` tags |
62
64
  | `stripOversizedBase64Sources` | Drop base64 `src`/`srcset`/`poster` payloads larger than 50 KB before parsing |
package/dist/defaults.js CHANGED
@@ -28,8 +28,10 @@ import { stripTrackingParams } from "./transforms/dom/stripTrackingParams.js";
28
28
  import { trimPreWhitespace } from "./transforms/dom/trimPreWhitespace.js";
29
29
  import { unwrapDoublyNestedLists } from "./transforms/dom/unwrapDoublyNestedLists.js";
30
30
  import { unwrapEmojiImages } from "./transforms/dom/unwrapEmojiImages.js";
31
+ import { unwrapHeadingBold } from "./transforms/dom/unwrapHeadingBold.js";
31
32
  import { unwrapRedirectUrls } from "./transforms/dom/unwrapRedirectUrls.js";
32
33
  import { unwrapWrappers } from "./transforms/dom/unwrapWrappers.js";
34
+ import { wrapTablesForScroll } from "./transforms/dom/wrapTablesForScroll.js";
33
35
  import { paragraphizePlainText } from "./transforms/string/paragraphizePlainText.js";
34
36
  import { stripControlChars } from "./transforms/string/stripControlChars.js";
35
37
  import { stripOversizedBase64Sources } from "./transforms/string/stripOversizedBase64Sources.js";
@@ -60,6 +62,7 @@ const defaultDomTransforms = [
60
62
  unwrapDoublyNestedLists,
61
63
  stripDuplicateTitleHeading,
62
64
  demoteHeadings,
65
+ unwrapHeadingBold,
63
66
  fixLazyImages,
64
67
  stripInertElements,
65
68
  resolveRelativeUrls,
@@ -83,7 +86,8 @@ const defaultDomTransforms = [
83
86
  injectEnclosures,
84
87
  proxyAssetUrls,
85
88
  unwrapWrappers,
86
- stripEmptyTags
89
+ stripEmptyTags,
90
+ wrapTablesForScroll
87
91
  ];
88
92
  const defaultEmbedResolvers = [youtubeEmbedResolver];
89
93
  const defaultBookmarkResolvers = [ghostBookmarkResolver, substackBookmarkResolver];
package/dist/index.d.ts CHANGED
@@ -32,8 +32,10 @@ import { stripTrackingParams } from "./transforms/dom/stripTrackingParams.js";
32
32
  import { trimPreWhitespace } from "./transforms/dom/trimPreWhitespace.js";
33
33
  import { unwrapDoublyNestedLists } from "./transforms/dom/unwrapDoublyNestedLists.js";
34
34
  import { unwrapEmojiImages } from "./transforms/dom/unwrapEmojiImages.js";
35
+ import { unwrapHeadingBold } from "./transforms/dom/unwrapHeadingBold.js";
35
36
  import { extractRedirectTarget, unwrapRedirectUrls } from "./transforms/dom/unwrapRedirectUrls.js";
36
37
  import { unwrapWrappers } from "./transforms/dom/unwrapWrappers.js";
38
+ import { wrapTablesForScroll } from "./transforms/dom/wrapTablesForScroll.js";
37
39
  import { paragraphizePlainText } from "./transforms/string/paragraphizePlainText.js";
38
40
  import { stripControlChars } from "./transforms/string/stripControlChars.js";
39
41
  import { stripOversizedBase64Sources } from "./transforms/string/stripOversizedBase64Sources.js";
@@ -117,4 +119,4 @@ import { ParamExtractorConfig, chooseBaseUrl, coerceNumber, createParamExtractor
117
119
  //#region src/index.d.ts
118
120
  declare const transformContent: (html: string, options: TransformContentOptions) => Promise<string>;
119
121
  //#endregion
120
- export { type AssetProxyFn, type AssetType, type BookmarkResolver, type BookmarkResolverResult, type DomTransform, type EmbedResolver, type EmbedResolverResult, type Enclosure, type EnrichEmbedFn, type MaybePromise, type ParamExtractorConfig, type ParseHtmlFn, type ResolveUrlFn, type StringTransform, type TransformContentOptions, type TransformContext, applyDomTransforms, applyStringTransforms, chooseBaseUrl, coerceNumber, composeThumbnailUrl, convertBookmarkCards, convertBreaksToParagraphs, createBookmarkPlaceholder, createEmbedPlaceholder, createParamExtractor, createPlaceholder, decodeDoubleEncodedTags, defaultResolveUrlFn, demoteHeadings, detectLanguage, enrichEmbedPlaceholders, extractRedirectTarget, extractVideoId, fixLazyImages, ghostBookmarkResolver, highlightCode, injectEnclosures, isSafeThumbnailUrl, linkifyUrls, markTimestamps, mergeConsecutiveOneLinerPres, mergeFragmentedLists, normalizeEmbedFields, paragraphizePlainText, parseTimestampSeconds, proxyAssetUrls, removeTrackingPixels, replaceEmbedsWithPlaceholders, replacePreLineBreaks, resolveRelativeUrls, stripBoundaryBreaks, stripComments, stripControlChars, stripDeadAnchors, stripDuplicateTitleHeading, stripEmptyTags, stripInertElements, stripInterBlockBreaks, stripOversizedBase64Sources, stripTrackingParams, substackBookmarkResolver, transformContent, trimPreWhitespace, unwrapAceml, unwrapAdjust, unwrapAmazonAffiliate, unwrapAmpCache, unwrapAwin, unwrapBing, unwrapCdataComments, unwrapCjNetwork, unwrapDigidip, unwrapDisqus, unwrapDouban, unwrapDoublyNestedLists, unwrapDuckduckgo, unwrapEbayRover, unwrapEffiliation, unwrapEmbedly, unwrapEmojiImages, unwrapFacebookShim, unwrapFeedsportal, unwrapFirebaseDynamicLinks, unwrapFlipboard, unwrapGateSc, unwrapGeoriot, unwrapGitee, unwrapGoogle, unwrapGoogleAmpViewer, unwrapGoogleNews, unwrapGoogleNewsModern, unwrapGoogleScholar, unwrapGoogleTranslate, unwrapHashnode, unwrapIcptrack, unwrapIdealoPartner, unwrapInstagramShim, unwrapJianshuGo, unwrapJuejin, unwrapLeverAnalytics, unwrapLinksynergy, unwrapMailchimp, unwrapMailpanion, unwrapMailpgn, unwrapMailtrack, unwrapMedium, unwrapMimecast, unwrapMozillaOutgoing, unwrapNarrativ, unwrapNicoMs, unwrapOutlookSafelinks, unwrapPartnerAds, unwrapPocket, unwrapPostmark, unwrapProofpointV1, unwrapProofpointV2, unwrapProofpointV3, unwrapPxf, unwrapRecruitics, unwrapRedditOut, unwrapRedirectUrls, unwrapRedirectingat, unwrapSegmentfault, unwrapShareasale, unwrapSjv, unwrapSkimlinks, unwrapSlack, unwrapSmartredirect, unwrapSspai, unwrapSteamLinkfilter, unwrapTelegramIv, unwrapTradedoubler, unwrapTumblr, unwrapValuecommerce, unwrapViglink, unwrapVkAway, unwrapWebArchive, unwrapWrappers, unwrapYahooSearch, unwrapYandexTurbo, unwrapYouTube, unwrapZhihu, updateEmbedPlaceholder, youtubeEmbedResolver, youtubeResolveEmbed };
122
+ export { type AssetProxyFn, type AssetType, type BookmarkResolver, type BookmarkResolverResult, type DomTransform, type EmbedResolver, type EmbedResolverResult, type Enclosure, type EnrichEmbedFn, type MaybePromise, type ParamExtractorConfig, type ParseHtmlFn, type ResolveUrlFn, type StringTransform, type TransformContentOptions, type TransformContext, applyDomTransforms, applyStringTransforms, chooseBaseUrl, coerceNumber, composeThumbnailUrl, convertBookmarkCards, convertBreaksToParagraphs, createBookmarkPlaceholder, createEmbedPlaceholder, createParamExtractor, createPlaceholder, decodeDoubleEncodedTags, defaultResolveUrlFn, demoteHeadings, detectLanguage, enrichEmbedPlaceholders, extractRedirectTarget, extractVideoId, fixLazyImages, ghostBookmarkResolver, highlightCode, injectEnclosures, isSafeThumbnailUrl, linkifyUrls, markTimestamps, mergeConsecutiveOneLinerPres, mergeFragmentedLists, normalizeEmbedFields, paragraphizePlainText, parseTimestampSeconds, proxyAssetUrls, removeTrackingPixels, replaceEmbedsWithPlaceholders, replacePreLineBreaks, resolveRelativeUrls, stripBoundaryBreaks, stripComments, stripControlChars, stripDeadAnchors, stripDuplicateTitleHeading, stripEmptyTags, stripInertElements, stripInterBlockBreaks, stripOversizedBase64Sources, stripTrackingParams, substackBookmarkResolver, transformContent, trimPreWhitespace, unwrapAceml, unwrapAdjust, unwrapAmazonAffiliate, unwrapAmpCache, unwrapAwin, unwrapBing, unwrapCdataComments, unwrapCjNetwork, unwrapDigidip, unwrapDisqus, unwrapDouban, unwrapDoublyNestedLists, unwrapDuckduckgo, unwrapEbayRover, unwrapEffiliation, unwrapEmbedly, unwrapEmojiImages, unwrapFacebookShim, unwrapFeedsportal, unwrapFirebaseDynamicLinks, unwrapFlipboard, unwrapGateSc, unwrapGeoriot, unwrapGitee, unwrapGoogle, unwrapGoogleAmpViewer, unwrapGoogleNews, unwrapGoogleNewsModern, unwrapGoogleScholar, unwrapGoogleTranslate, unwrapHashnode, unwrapHeadingBold, unwrapIcptrack, unwrapIdealoPartner, unwrapInstagramShim, unwrapJianshuGo, unwrapJuejin, unwrapLeverAnalytics, unwrapLinksynergy, unwrapMailchimp, unwrapMailpanion, unwrapMailpgn, unwrapMailtrack, unwrapMedium, unwrapMimecast, unwrapMozillaOutgoing, unwrapNarrativ, unwrapNicoMs, unwrapOutlookSafelinks, unwrapPartnerAds, unwrapPocket, unwrapPostmark, unwrapProofpointV1, unwrapProofpointV2, unwrapProofpointV3, unwrapPxf, unwrapRecruitics, unwrapRedditOut, unwrapRedirectUrls, unwrapRedirectingat, unwrapSegmentfault, unwrapShareasale, unwrapSjv, unwrapSkimlinks, unwrapSlack, unwrapSmartredirect, unwrapSspai, unwrapSteamLinkfilter, unwrapTelegramIv, unwrapTradedoubler, unwrapTumblr, unwrapValuecommerce, unwrapViglink, unwrapVkAway, unwrapWebArchive, unwrapWrappers, unwrapYahooSearch, unwrapYandexTurbo, unwrapYouTube, unwrapZhihu, updateEmbedPlaceholder, wrapTablesForScroll, youtubeEmbedResolver, youtubeResolveEmbed };
package/dist/index.js CHANGED
@@ -30,8 +30,10 @@ import { stripTrackingParams } from "./transforms/dom/stripTrackingParams.js";
30
30
  import { trimPreWhitespace } from "./transforms/dom/trimPreWhitespace.js";
31
31
  import { unwrapDoublyNestedLists } from "./transforms/dom/unwrapDoublyNestedLists.js";
32
32
  import { unwrapEmojiImages } from "./transforms/dom/unwrapEmojiImages.js";
33
+ import { unwrapHeadingBold } from "./transforms/dom/unwrapHeadingBold.js";
33
34
  import { extractRedirectTarget, unwrapRedirectUrls } from "./transforms/dom/unwrapRedirectUrls.js";
34
35
  import { unwrapWrappers } from "./transforms/dom/unwrapWrappers.js";
36
+ import { wrapTablesForScroll } from "./transforms/dom/wrapTablesForScroll.js";
35
37
  import { paragraphizePlainText } from "./transforms/string/paragraphizePlainText.js";
36
38
  import { stripControlChars } from "./transforms/string/stripControlChars.js";
37
39
  import { stripOversizedBase64Sources } from "./transforms/string/stripOversizedBase64Sources.js";
@@ -138,4 +140,4 @@ const transformContent = async (html, options) => {
138
140
  return await applyDomTransforms(await options.parseHtmlFn(afterString), domFns.map((transform) => transform(context)));
139
141
  };
140
142
  //#endregion
141
- export { applyDomTransforms, applyStringTransforms, chooseBaseUrl, coerceNumber, composeThumbnailUrl, convertBookmarkCards, convertBreaksToParagraphs, createBookmarkPlaceholder, createEmbedPlaceholder, createParamExtractor, createPlaceholder, decodeDoubleEncodedTags, defaultResolveUrlFn, demoteHeadings, detectLanguage, enrichEmbedPlaceholders, extractRedirectTarget, extractVideoId, fixLazyImages, ghostBookmarkResolver, highlightCode, injectEnclosures, isSafeThumbnailUrl, linkifyUrls, markTimestamps, mergeConsecutiveOneLinerPres, mergeFragmentedLists, normalizeEmbedFields, paragraphizePlainText, parseTimestampSeconds, proxyAssetUrls, removeTrackingPixels, replaceEmbedsWithPlaceholders, replacePreLineBreaks, resolveRelativeUrls, stripBoundaryBreaks, stripComments, stripControlChars, stripDeadAnchors, stripDuplicateTitleHeading, stripEmptyTags, stripInertElements, stripInterBlockBreaks, stripOversizedBase64Sources, stripTrackingParams, substackBookmarkResolver, transformContent, trimPreWhitespace, unwrapAceml, unwrapAdjust, unwrapAmazonAffiliate, unwrapAmpCache, unwrapAwin, unwrapBing, unwrapCdataComments, unwrapCjNetwork, unwrapDigidip, unwrapDisqus, unwrapDouban, unwrapDoublyNestedLists, unwrapDuckduckgo, unwrapEbayRover, unwrapEffiliation, unwrapEmbedly, unwrapEmojiImages, unwrapFacebookShim, unwrapFeedsportal, unwrapFirebaseDynamicLinks, unwrapFlipboard, unwrapGateSc, unwrapGeoriot, unwrapGitee, unwrapGoogle, unwrapGoogleAmpViewer, unwrapGoogleNews, unwrapGoogleNewsModern, unwrapGoogleScholar, unwrapGoogleTranslate, unwrapHashnode, unwrapIcptrack, unwrapIdealoPartner, unwrapInstagramShim, unwrapJianshuGo, unwrapJuejin, unwrapLeverAnalytics, unwrapLinksynergy, unwrapMailchimp, unwrapMailpanion, unwrapMailpgn, unwrapMailtrack, unwrapMedium, unwrapMimecast, unwrapMozillaOutgoing, unwrapNarrativ, unwrapNicoMs, unwrapOutlookSafelinks, unwrapPartnerAds, unwrapPocket, unwrapPostmark, unwrapProofpointV1, unwrapProofpointV2, unwrapProofpointV3, unwrapPxf, unwrapRecruitics, unwrapRedditOut, unwrapRedirectUrls, unwrapRedirectingat, unwrapSegmentfault, unwrapShareasale, unwrapSjv, unwrapSkimlinks, unwrapSlack, unwrapSmartredirect, unwrapSspai, unwrapSteamLinkfilter, unwrapTelegramIv, unwrapTradedoubler, unwrapTumblr, unwrapValuecommerce, unwrapViglink, unwrapVkAway, unwrapWebArchive, unwrapWrappers, unwrapYahooSearch, unwrapYandexTurbo, unwrapYouTube, unwrapZhihu, updateEmbedPlaceholder, youtubeEmbedResolver, youtubeResolveEmbed };
143
+ export { applyDomTransforms, applyStringTransforms, chooseBaseUrl, coerceNumber, composeThumbnailUrl, convertBookmarkCards, convertBreaksToParagraphs, createBookmarkPlaceholder, createEmbedPlaceholder, createParamExtractor, createPlaceholder, decodeDoubleEncodedTags, defaultResolveUrlFn, demoteHeadings, detectLanguage, enrichEmbedPlaceholders, extractRedirectTarget, extractVideoId, fixLazyImages, ghostBookmarkResolver, highlightCode, injectEnclosures, isSafeThumbnailUrl, linkifyUrls, markTimestamps, mergeConsecutiveOneLinerPres, mergeFragmentedLists, normalizeEmbedFields, paragraphizePlainText, parseTimestampSeconds, proxyAssetUrls, removeTrackingPixels, replaceEmbedsWithPlaceholders, replacePreLineBreaks, resolveRelativeUrls, stripBoundaryBreaks, stripComments, stripControlChars, stripDeadAnchors, stripDuplicateTitleHeading, stripEmptyTags, stripInertElements, stripInterBlockBreaks, stripOversizedBase64Sources, stripTrackingParams, substackBookmarkResolver, transformContent, trimPreWhitespace, unwrapAceml, unwrapAdjust, unwrapAmazonAffiliate, unwrapAmpCache, unwrapAwin, unwrapBing, unwrapCdataComments, unwrapCjNetwork, unwrapDigidip, unwrapDisqus, unwrapDouban, unwrapDoublyNestedLists, unwrapDuckduckgo, unwrapEbayRover, unwrapEffiliation, unwrapEmbedly, unwrapEmojiImages, unwrapFacebookShim, unwrapFeedsportal, unwrapFirebaseDynamicLinks, unwrapFlipboard, unwrapGateSc, unwrapGeoriot, unwrapGitee, unwrapGoogle, unwrapGoogleAmpViewer, unwrapGoogleNews, unwrapGoogleNewsModern, unwrapGoogleScholar, unwrapGoogleTranslate, unwrapHashnode, unwrapHeadingBold, unwrapIcptrack, unwrapIdealoPartner, unwrapInstagramShim, unwrapJianshuGo, unwrapJuejin, unwrapLeverAnalytics, unwrapLinksynergy, unwrapMailchimp, unwrapMailpanion, unwrapMailpgn, unwrapMailtrack, unwrapMedium, unwrapMimecast, unwrapMozillaOutgoing, unwrapNarrativ, unwrapNicoMs, unwrapOutlookSafelinks, unwrapPartnerAds, unwrapPocket, unwrapPostmark, unwrapProofpointV1, unwrapProofpointV2, unwrapProofpointV3, unwrapPxf, unwrapRecruitics, unwrapRedditOut, unwrapRedirectUrls, unwrapRedirectingat, unwrapSegmentfault, unwrapShareasale, unwrapSjv, unwrapSkimlinks, unwrapSlack, unwrapSmartredirect, unwrapSspai, unwrapSteamLinkfilter, unwrapTelegramIv, unwrapTradedoubler, unwrapTumblr, unwrapValuecommerce, unwrapViglink, unwrapVkAway, unwrapWebArchive, unwrapWrappers, unwrapYahooSearch, unwrapYandexTurbo, unwrapYouTube, unwrapZhihu, updateEmbedPlaceholder, wrapTablesForScroll, youtubeEmbedResolver, youtubeResolveEmbed };
@@ -0,0 +1,6 @@
1
+ import { DomTransform } from "../../types.js";
2
+
3
+ //#region src/transforms/dom/unwrapHeadingBold.d.ts
4
+ declare const unwrapHeadingBold: DomTransform;
5
+ //#endregion
6
+ export { unwrapHeadingBold };
@@ -0,0 +1,28 @@
1
+ import { isComment, isElement, isWhitespaceText } from "../../common.js";
2
+ //#region src/transforms/dom/unwrapHeadingBold.ts
3
+ const headingSelector = "h1, h2, h3, h4, h5, h6";
4
+ const boldTags = new Set(["b", "strong"]);
5
+ const soleContentElement = (heading) => {
6
+ let found = null;
7
+ for (const child of heading.childNodes) {
8
+ if (isWhitespaceText(child) || isComment(child)) continue;
9
+ if (found || !isElement(child)) return null;
10
+ found = child;
11
+ }
12
+ return found;
13
+ };
14
+ const unwrapHeadingBold = () => {
15
+ return (document) => {
16
+ const headings = document.querySelectorAll(headingSelector);
17
+ for (const heading of headings) {
18
+ let bold = soleContentElement(heading);
19
+ while (bold && boldTags.has(bold.localName)) {
20
+ while (bold.firstChild) heading.insertBefore(bold.firstChild, bold);
21
+ bold.remove();
22
+ bold = soleContentElement(heading);
23
+ }
24
+ }
25
+ };
26
+ };
27
+ //#endregion
28
+ export { unwrapHeadingBold };
@@ -7,7 +7,11 @@ const wrapperTags = new Set([
7
7
  "header",
8
8
  "footer"
9
9
  ]);
10
- const preservedPrefixes = ["data-embed", "data-bookmark"];
10
+ const preservedPrefixes = [
11
+ "data-embed",
12
+ "data-bookmark",
13
+ "data-table"
14
+ ];
11
15
  const hasPreservedAttribute = (element) => {
12
16
  const attributes = element.attributes;
13
17
  for (let i = 0, n = attributes.length; i < n; i++) {
@@ -0,0 +1,6 @@
1
+ import { DomTransform } from "../../types.js";
2
+
3
+ //#region src/transforms/dom/wrapTablesForScroll.d.ts
4
+ declare const wrapTablesForScroll: DomTransform;
5
+ //#endregion
6
+ export { wrapTablesForScroll };
@@ -0,0 +1,20 @@
1
+ import { hasAncestorWithTagName } from "../../common.js";
2
+ //#region src/transforms/dom/wrapTablesForScroll.ts
3
+ const tableTags = new Set(["table"]);
4
+ const wrapTablesForScroll = () => {
5
+ return (document) => {
6
+ const tables = document.querySelectorAll("table");
7
+ for (const table of tables) {
8
+ const parent = table.parentNode;
9
+ if (!parent) continue;
10
+ if (hasAncestorWithTagName(table, tableTags)) continue;
11
+ if (table.parentElement?.hasAttribute("data-table")) continue;
12
+ const wrapper = document.createElement("div");
13
+ wrapper.setAttribute("data-table", "");
14
+ parent.insertBefore(wrapper, table);
15
+ wrapper.appendChild(table);
16
+ }
17
+ };
18
+ };
19
+ //#endregion
20
+ export { wrapTablesForScroll };
package/package.json CHANGED
@@ -69,5 +69,5 @@
69
69
  "linkedom": "^0.18.12",
70
70
  "tsdown": "^0.22.1"
71
71
  },
72
- "version": "2.1.0"
72
+ "version": "2.2.0"
73
73
  }