feedsweep 2.0.1 → 2.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -39,7 +39,7 @@ Inventory of every transform exported from the package. Most are enabled by defa
39
39
  | `mergeConsecutiveOneLinerPres` | Merge consecutive single-line `<pre>` tags |
40
40
  | `replacePreLineBreaks` | Replace `<br>` with `\n` inside `<pre>` |
41
41
  | `stripInterBlockBreaks` | Remove `<br>` tags between block elements |
42
- | `stripParagraphBoundaryBreaks` | Remove `<br>` tags adjacent to paragraph boundaries |
42
+ | `stripBoundaryBreaks` | Remove `<br>` tags adjacent to block-element boundaries (paragraphs, headings, divs, list items, blockquotes, …) |
43
43
  | `stripDuplicateTitleHeading` | Remove first `<h1>`–`<h6>` matching article title |
44
44
  | `demoteHeadings` | Shift every heading down by one level (`<h1>`→`<h2>`, …, `<h5>`→`<h6>`) when the body contains an `<h1>`, so it sits below the reader's own page title |
45
45
  | `unwrapRedirectUrls` | Remove Google/Bing/Facebook/etc. redirect wrappers |
@@ -61,6 +61,7 @@ Inventory of every transform exported from the package. Most are enabled by defa
61
61
  | `paragraphizePlainText` | Wrap plain text in `<p>` tags |
62
62
  | `stripOversizedBase64Sources` | Drop base64 `src`/`srcset`/`poster` payloads larger than 50 KB before parsing |
63
63
  | `linkifyUrls` | Wrap bare URLs in `<a>` tags |
64
+ | `markTimestamps` | Wrap line-leading timestamps (`MM:SS` / `HH:MM:SS`) in `<span data-timestamp="seconds">` so a player can be seeked to that point |
64
65
  | `trimPreWhitespace` | Remove common leading indentation from `<pre>` |
65
66
  | `highlightCode` | Syntax-highlight `<code>` blocks with highlight.js |
66
67
  | `stripEmptyTags` | Remove empty `<p>`, `<div>`, `<span>` and other tags |
package/dist/common.js CHANGED
@@ -58,25 +58,31 @@ const blockElements = new Set([
58
58
  "table",
59
59
  "ul"
60
60
  ]);
61
- const isWhitespaceText = (node) => {
62
- return node.nodeType === Node.TEXT_NODE && !node.textContent?.trim();
61
+ const isElement = (node) => {
62
+ return node?.nodeType === Node.ELEMENT_NODE;
63
63
  };
64
- const isBr = (node) => {
65
- return node.nodeType === Node.ELEMENT_NODE && node.localName === "br";
64
+ const isText = (node) => {
65
+ return node?.nodeType === Node.TEXT_NODE;
66
66
  };
67
67
  const isComment = (node) => {
68
- return node.nodeType === Node.COMMENT_NODE;
68
+ return node?.nodeType === Node.COMMENT_NODE;
69
+ };
70
+ const isWhitespaceText = (node) => {
71
+ return isText(node) && !node.textContent?.trim();
72
+ };
73
+ const isBr = (node) => {
74
+ return isElement(node) && node.localName === "br";
69
75
  };
70
76
  const isSkippable = (node) => {
71
77
  return isWhitespaceText(node) || isBr(node) || isComment(node);
72
78
  };
73
79
  const isBlockElement = (node) => {
74
- return node.nodeType === Node.ELEMENT_NODE && blockElements.has(node.localName);
80
+ return isElement(node) && blockElements.has(node.localName);
75
81
  };
76
82
  const hasAncestorWithTagName = (node, tagSet, stopAt) => {
77
83
  let ancestor = node.parentNode;
78
84
  while (ancestor !== null && ancestor !== stopAt) {
79
- if (ancestor.nodeType === Node.ELEMENT_NODE && tagSet.has(ancestor.localName)) return true;
85
+ if (isElement(ancestor) && tagSet.has(ancestor.localName)) return true;
80
86
  ancestor = ancestor.parentNode;
81
87
  }
82
88
  return false;
@@ -161,4 +167,4 @@ const createBookmarkPlaceholder = (document, result) => {
161
167
  return element;
162
168
  };
163
169
  //#endregion
164
- export { Node, NodeFilter, applyDomTransforms, applyStringTransforms, createBookmarkPlaceholder, createEmbedPlaceholder, createPlaceholder, getDimensions, hasAncestorWithTagName, isBlockElement, isBr, isSafeThumbnailUrl, isSkippable, isWhitespaceText, normalizeEmbedFields, updateEmbedPlaceholder };
170
+ export { NodeFilter, applyDomTransforms, applyStringTransforms, createBookmarkPlaceholder, createEmbedPlaceholder, createPlaceholder, getDimensions, hasAncestorWithTagName, isBlockElement, isBr, isComment, isElement, isSafeThumbnailUrl, isSkippable, isText, isWhitespaceText, normalizeEmbedFields, updateEmbedPlaceholder };
package/dist/defaults.js CHANGED
@@ -9,6 +9,7 @@ import { fixLazyImages } from "./transforms/dom/fixLazyImages.js";
9
9
  import { highlightCode } from "./transforms/dom/highlightCode.js";
10
10
  import { injectEnclosures } from "./transforms/dom/injectEnclosures.js";
11
11
  import { linkifyUrls } from "./transforms/dom/linkifyUrls.js";
12
+ import { markTimestamps } from "./transforms/dom/markTimestamps.js";
12
13
  import { mergeConsecutiveOneLinerPres } from "./transforms/dom/mergeConsecutiveOneLinerPres.js";
13
14
  import { mergeFragmentedLists } from "./transforms/dom/mergeFragmentedLists.js";
14
15
  import { proxyAssetUrls } from "./transforms/dom/proxyAssetUrls.js";
@@ -16,13 +17,13 @@ import { removeTrackingPixels } from "./transforms/dom/removeTrackingPixels.js";
16
17
  import { replaceEmbedsWithPlaceholders } from "./transforms/dom/replaceEmbedsWithPlaceholders.js";
17
18
  import { replacePreLineBreaks } from "./transforms/dom/replacePreLineBreaks.js";
18
19
  import { resolveRelativeUrls } from "./transforms/dom/resolveRelativeUrls.js";
20
+ import { stripBoundaryBreaks } from "./transforms/dom/stripBoundaryBreaks.js";
19
21
  import { stripComments } from "./transforms/dom/stripComments.js";
20
22
  import { stripDeadAnchors } from "./transforms/dom/stripDeadAnchors.js";
21
23
  import { stripDuplicateTitleHeading } from "./transforms/dom/stripDuplicateTitleHeading.js";
22
24
  import { stripEmptyTags } from "./transforms/dom/stripEmptyTags.js";
23
25
  import { stripInertElements } from "./transforms/dom/stripInertElements.js";
24
26
  import { stripInterBlockBreaks } from "./transforms/dom/stripInterBlockBreaks.js";
25
- import { stripParagraphBoundaryBreaks } from "./transforms/dom/stripParagraphBoundaryBreaks.js";
26
27
  import { stripTrackingParams } from "./transforms/dom/stripTrackingParams.js";
27
28
  import { trimPreWhitespace } from "./transforms/dom/trimPreWhitespace.js";
28
29
  import { unwrapDoublyNestedLists } from "./transforms/dom/unwrapDoublyNestedLists.js";
@@ -70,13 +71,14 @@ const defaultDomTransforms = [
70
71
  unwrapEmojiImages,
71
72
  convertBreaksToParagraphs,
72
73
  stripInterBlockBreaks,
73
- stripParagraphBoundaryBreaks,
74
+ stripBoundaryBreaks,
74
75
  mergeFragmentedLists,
75
76
  highlightCode,
76
77
  mergeConsecutiveOneLinerPres,
77
78
  replacePreLineBreaks,
78
79
  trimPreWhitespace,
79
80
  linkifyUrls,
81
+ markTimestamps,
80
82
  replaceEmbedsWithPlaceholders,
81
83
  injectEnclosures,
82
84
  proxyAssetUrls,
package/dist/index.d.ts CHANGED
@@ -13,6 +13,7 @@ import { fixLazyImages } from "./transforms/dom/fixLazyImages.js";
13
13
  import { detectLanguage, highlightCode } from "./transforms/dom/highlightCode.js";
14
14
  import { injectEnclosures } from "./transforms/dom/injectEnclosures.js";
15
15
  import { linkifyUrls } from "./transforms/dom/linkifyUrls.js";
16
+ import { markTimestamps, parseTimestampSeconds } from "./transforms/dom/markTimestamps.js";
16
17
  import { mergeConsecutiveOneLinerPres } from "./transforms/dom/mergeConsecutiveOneLinerPres.js";
17
18
  import { mergeFragmentedLists } from "./transforms/dom/mergeFragmentedLists.js";
18
19
  import { proxyAssetUrls } from "./transforms/dom/proxyAssetUrls.js";
@@ -20,13 +21,13 @@ import { removeTrackingPixels } from "./transforms/dom/removeTrackingPixels.js";
20
21
  import { replaceEmbedsWithPlaceholders } from "./transforms/dom/replaceEmbedsWithPlaceholders.js";
21
22
  import { replacePreLineBreaks } from "./transforms/dom/replacePreLineBreaks.js";
22
23
  import { resolveRelativeUrls } from "./transforms/dom/resolveRelativeUrls.js";
24
+ import { stripBoundaryBreaks } from "./transforms/dom/stripBoundaryBreaks.js";
23
25
  import { stripComments } from "./transforms/dom/stripComments.js";
24
26
  import { stripDeadAnchors } from "./transforms/dom/stripDeadAnchors.js";
25
27
  import { stripDuplicateTitleHeading } from "./transforms/dom/stripDuplicateTitleHeading.js";
26
28
  import { stripEmptyTags } from "./transforms/dom/stripEmptyTags.js";
27
29
  import { stripInertElements } from "./transforms/dom/stripInertElements.js";
28
30
  import { stripInterBlockBreaks } from "./transforms/dom/stripInterBlockBreaks.js";
29
- import { stripParagraphBoundaryBreaks } from "./transforms/dom/stripParagraphBoundaryBreaks.js";
30
31
  import { stripTrackingParams } from "./transforms/dom/stripTrackingParams.js";
31
32
  import { trimPreWhitespace } from "./transforms/dom/trimPreWhitespace.js";
32
33
  import { unwrapDoublyNestedLists } from "./transforms/dom/unwrapDoublyNestedLists.js";
@@ -116,4 +117,4 @@ import { ParamExtractorConfig, chooseBaseUrl, coerceNumber, createParamExtractor
116
117
  //#region src/index.d.ts
117
118
  declare const transformContent: (html: string, options: TransformContentOptions) => Promise<string>;
118
119
  //#endregion
119
- export { type AssetProxyFn, type AssetType, type BookmarkResolver, type BookmarkResolverResult, type DomTransform, type EmbedResolver, type EmbedResolverResult, type Enclosure, type EnrichEmbedFn, type MaybePromise, type ParamExtractorConfig, type ParseHtmlFn, type ResolveUrlFn, type StringTransform, type TransformContentOptions, type TransformContext, applyDomTransforms, applyStringTransforms, chooseBaseUrl, coerceNumber, composeThumbnailUrl, convertBookmarkCards, convertBreaksToParagraphs, createBookmarkPlaceholder, createEmbedPlaceholder, createParamExtractor, createPlaceholder, decodeDoubleEncodedTags, defaultResolveUrlFn, demoteHeadings, detectLanguage, enrichEmbedPlaceholders, extractRedirectTarget, extractVideoId, fixLazyImages, ghostBookmarkResolver, highlightCode, injectEnclosures, isSafeThumbnailUrl, linkifyUrls, mergeConsecutiveOneLinerPres, mergeFragmentedLists, normalizeEmbedFields, paragraphizePlainText, proxyAssetUrls, removeTrackingPixels, replaceEmbedsWithPlaceholders, replacePreLineBreaks, resolveRelativeUrls, stripComments, stripControlChars, stripDeadAnchors, stripDuplicateTitleHeading, stripEmptyTags, stripInertElements, stripInterBlockBreaks, stripOversizedBase64Sources, stripParagraphBoundaryBreaks, stripTrackingParams, substackBookmarkResolver, transformContent, trimPreWhitespace, unwrapAceml, unwrapAdjust, unwrapAmazonAffiliate, unwrapAmpCache, unwrapAwin, unwrapBing, unwrapCdataComments, unwrapCjNetwork, unwrapDigidip, unwrapDisqus, unwrapDouban, unwrapDoublyNestedLists, unwrapDuckduckgo, unwrapEbayRover, unwrapEffiliation, unwrapEmbedly, unwrapEmojiImages, unwrapFacebookShim, unwrapFeedsportal, unwrapFirebaseDynamicLinks, unwrapFlipboard, unwrapGateSc, unwrapGeoriot, unwrapGitee, unwrapGoogle, unwrapGoogleAmpViewer, unwrapGoogleNews, unwrapGoogleNewsModern, unwrapGoogleScholar, unwrapGoogleTranslate, unwrapHashnode, unwrapIcptrack, unwrapIdealoPartner, unwrapInstagramShim, unwrapJianshuGo, unwrapJuejin, unwrapLeverAnalytics, unwrapLinksynergy, unwrapMailchimp, unwrapMailpanion, unwrapMailpgn, unwrapMailtrack, unwrapMedium, unwrapMimecast, unwrapMozillaOutgoing, unwrapNarrativ, unwrapNicoMs, unwrapOutlookSafelinks, unwrapPartnerAds, unwrapPocket, unwrapPostmark, unwrapProofpointV1, unwrapProofpointV2, unwrapProofpointV3, unwrapPxf, unwrapRecruitics, unwrapRedditOut, unwrapRedirectUrls, unwrapRedirectingat, unwrapSegmentfault, unwrapShareasale, unwrapSjv, unwrapSkimlinks, unwrapSlack, unwrapSmartredirect, unwrapSspai, unwrapSteamLinkfilter, unwrapTelegramIv, unwrapTradedoubler, unwrapTumblr, unwrapValuecommerce, unwrapViglink, unwrapVkAway, unwrapWebArchive, unwrapWrappers, unwrapYahooSearch, unwrapYandexTurbo, unwrapYouTube, unwrapZhihu, updateEmbedPlaceholder, youtubeEmbedResolver, youtubeResolveEmbed };
120
+ export { type AssetProxyFn, type AssetType, type BookmarkResolver, type BookmarkResolverResult, type DomTransform, type EmbedResolver, type EmbedResolverResult, type Enclosure, type EnrichEmbedFn, type MaybePromise, type ParamExtractorConfig, type ParseHtmlFn, type ResolveUrlFn, type StringTransform, type TransformContentOptions, type TransformContext, applyDomTransforms, applyStringTransforms, chooseBaseUrl, coerceNumber, composeThumbnailUrl, convertBookmarkCards, convertBreaksToParagraphs, createBookmarkPlaceholder, createEmbedPlaceholder, createParamExtractor, createPlaceholder, decodeDoubleEncodedTags, defaultResolveUrlFn, demoteHeadings, detectLanguage, enrichEmbedPlaceholders, extractRedirectTarget, extractVideoId, fixLazyImages, ghostBookmarkResolver, highlightCode, injectEnclosures, isSafeThumbnailUrl, linkifyUrls, markTimestamps, mergeConsecutiveOneLinerPres, mergeFragmentedLists, normalizeEmbedFields, paragraphizePlainText, parseTimestampSeconds, proxyAssetUrls, removeTrackingPixels, replaceEmbedsWithPlaceholders, replacePreLineBreaks, resolveRelativeUrls, stripBoundaryBreaks, stripComments, stripControlChars, stripDeadAnchors, stripDuplicateTitleHeading, stripEmptyTags, stripInertElements, stripInterBlockBreaks, stripOversizedBase64Sources, stripTrackingParams, substackBookmarkResolver, transformContent, trimPreWhitespace, unwrapAceml, unwrapAdjust, unwrapAmazonAffiliate, unwrapAmpCache, unwrapAwin, unwrapBing, unwrapCdataComments, unwrapCjNetwork, unwrapDigidip, unwrapDisqus, unwrapDouban, unwrapDoublyNestedLists, unwrapDuckduckgo, unwrapEbayRover, unwrapEffiliation, unwrapEmbedly, unwrapEmojiImages, unwrapFacebookShim, unwrapFeedsportal, unwrapFirebaseDynamicLinks, unwrapFlipboard, unwrapGateSc, unwrapGeoriot, unwrapGitee, unwrapGoogle, unwrapGoogleAmpViewer, unwrapGoogleNews, unwrapGoogleNewsModern, unwrapGoogleScholar, unwrapGoogleTranslate, unwrapHashnode, unwrapIcptrack, unwrapIdealoPartner, unwrapInstagramShim, unwrapJianshuGo, unwrapJuejin, unwrapLeverAnalytics, unwrapLinksynergy, unwrapMailchimp, unwrapMailpanion, unwrapMailpgn, unwrapMailtrack, unwrapMedium, unwrapMimecast, unwrapMozillaOutgoing, unwrapNarrativ, unwrapNicoMs, unwrapOutlookSafelinks, unwrapPartnerAds, unwrapPocket, unwrapPostmark, unwrapProofpointV1, unwrapProofpointV2, unwrapProofpointV3, unwrapPxf, unwrapRecruitics, unwrapRedditOut, unwrapRedirectUrls, unwrapRedirectingat, unwrapSegmentfault, unwrapShareasale, unwrapSjv, unwrapSkimlinks, unwrapSlack, unwrapSmartredirect, unwrapSspai, unwrapSteamLinkfilter, unwrapTelegramIv, unwrapTradedoubler, unwrapTumblr, unwrapValuecommerce, unwrapViglink, unwrapVkAway, unwrapWebArchive, unwrapWrappers, unwrapYahooSearch, unwrapYandexTurbo, unwrapYouTube, unwrapZhihu, updateEmbedPlaceholder, youtubeEmbedResolver, youtubeResolveEmbed };
package/dist/index.js CHANGED
@@ -11,6 +11,7 @@ import { fixLazyImages } from "./transforms/dom/fixLazyImages.js";
11
11
  import { detectLanguage, highlightCode } from "./transforms/dom/highlightCode.js";
12
12
  import { injectEnclosures } from "./transforms/dom/injectEnclosures.js";
13
13
  import { linkifyUrls } from "./transforms/dom/linkifyUrls.js";
14
+ import { markTimestamps, parseTimestampSeconds } from "./transforms/dom/markTimestamps.js";
14
15
  import { mergeConsecutiveOneLinerPres } from "./transforms/dom/mergeConsecutiveOneLinerPres.js";
15
16
  import { mergeFragmentedLists } from "./transforms/dom/mergeFragmentedLists.js";
16
17
  import { proxyAssetUrls } from "./transforms/dom/proxyAssetUrls.js";
@@ -18,13 +19,13 @@ import { removeTrackingPixels } from "./transforms/dom/removeTrackingPixels.js";
18
19
  import { replaceEmbedsWithPlaceholders } from "./transforms/dom/replaceEmbedsWithPlaceholders.js";
19
20
  import { replacePreLineBreaks } from "./transforms/dom/replacePreLineBreaks.js";
20
21
  import { resolveRelativeUrls } from "./transforms/dom/resolveRelativeUrls.js";
22
+ import { stripBoundaryBreaks } from "./transforms/dom/stripBoundaryBreaks.js";
21
23
  import { stripComments } from "./transforms/dom/stripComments.js";
22
24
  import { stripDeadAnchors } from "./transforms/dom/stripDeadAnchors.js";
23
25
  import { stripDuplicateTitleHeading } from "./transforms/dom/stripDuplicateTitleHeading.js";
24
26
  import { stripEmptyTags } from "./transforms/dom/stripEmptyTags.js";
25
27
  import { stripInertElements } from "./transforms/dom/stripInertElements.js";
26
28
  import { stripInterBlockBreaks } from "./transforms/dom/stripInterBlockBreaks.js";
27
- import { stripParagraphBoundaryBreaks } from "./transforms/dom/stripParagraphBoundaryBreaks.js";
28
29
  import { stripTrackingParams } from "./transforms/dom/stripTrackingParams.js";
29
30
  import { trimPreWhitespace } from "./transforms/dom/trimPreWhitespace.js";
30
31
  import { unwrapDoublyNestedLists } from "./transforms/dom/unwrapDoublyNestedLists.js";
@@ -137,4 +138,4 @@ const transformContent = async (html, options) => {
137
138
  return await applyDomTransforms(await options.parseHtmlFn(afterString), domFns.map((transform) => transform(context)));
138
139
  };
139
140
  //#endregion
140
- export { applyDomTransforms, applyStringTransforms, chooseBaseUrl, coerceNumber, composeThumbnailUrl, convertBookmarkCards, convertBreaksToParagraphs, createBookmarkPlaceholder, createEmbedPlaceholder, createParamExtractor, createPlaceholder, decodeDoubleEncodedTags, defaultResolveUrlFn, demoteHeadings, detectLanguage, enrichEmbedPlaceholders, extractRedirectTarget, extractVideoId, fixLazyImages, ghostBookmarkResolver, highlightCode, injectEnclosures, isSafeThumbnailUrl, linkifyUrls, mergeConsecutiveOneLinerPres, mergeFragmentedLists, normalizeEmbedFields, paragraphizePlainText, proxyAssetUrls, removeTrackingPixels, replaceEmbedsWithPlaceholders, replacePreLineBreaks, resolveRelativeUrls, stripComments, stripControlChars, stripDeadAnchors, stripDuplicateTitleHeading, stripEmptyTags, stripInertElements, stripInterBlockBreaks, stripOversizedBase64Sources, stripParagraphBoundaryBreaks, stripTrackingParams, substackBookmarkResolver, transformContent, trimPreWhitespace, unwrapAceml, unwrapAdjust, unwrapAmazonAffiliate, unwrapAmpCache, unwrapAwin, unwrapBing, unwrapCdataComments, unwrapCjNetwork, unwrapDigidip, unwrapDisqus, unwrapDouban, unwrapDoublyNestedLists, unwrapDuckduckgo, unwrapEbayRover, unwrapEffiliation, unwrapEmbedly, unwrapEmojiImages, unwrapFacebookShim, unwrapFeedsportal, unwrapFirebaseDynamicLinks, unwrapFlipboard, unwrapGateSc, unwrapGeoriot, unwrapGitee, unwrapGoogle, unwrapGoogleAmpViewer, unwrapGoogleNews, unwrapGoogleNewsModern, unwrapGoogleScholar, unwrapGoogleTranslate, unwrapHashnode, unwrapIcptrack, unwrapIdealoPartner, unwrapInstagramShim, unwrapJianshuGo, unwrapJuejin, unwrapLeverAnalytics, unwrapLinksynergy, unwrapMailchimp, unwrapMailpanion, unwrapMailpgn, unwrapMailtrack, unwrapMedium, unwrapMimecast, unwrapMozillaOutgoing, unwrapNarrativ, unwrapNicoMs, unwrapOutlookSafelinks, unwrapPartnerAds, unwrapPocket, unwrapPostmark, unwrapProofpointV1, unwrapProofpointV2, unwrapProofpointV3, unwrapPxf, unwrapRecruitics, unwrapRedditOut, unwrapRedirectUrls, unwrapRedirectingat, unwrapSegmentfault, unwrapShareasale, unwrapSjv, unwrapSkimlinks, unwrapSlack, unwrapSmartredirect, unwrapSspai, unwrapSteamLinkfilter, unwrapTelegramIv, unwrapTradedoubler, unwrapTumblr, unwrapValuecommerce, unwrapViglink, unwrapVkAway, unwrapWebArchive, unwrapWrappers, unwrapYahooSearch, unwrapYandexTurbo, unwrapYouTube, unwrapZhihu, updateEmbedPlaceholder, youtubeEmbedResolver, youtubeResolveEmbed };
141
+ export { applyDomTransforms, applyStringTransforms, chooseBaseUrl, coerceNumber, composeThumbnailUrl, convertBookmarkCards, convertBreaksToParagraphs, createBookmarkPlaceholder, createEmbedPlaceholder, createParamExtractor, createPlaceholder, decodeDoubleEncodedTags, defaultResolveUrlFn, demoteHeadings, detectLanguage, enrichEmbedPlaceholders, extractRedirectTarget, extractVideoId, fixLazyImages, ghostBookmarkResolver, highlightCode, injectEnclosures, isSafeThumbnailUrl, linkifyUrls, markTimestamps, mergeConsecutiveOneLinerPres, mergeFragmentedLists, normalizeEmbedFields, paragraphizePlainText, parseTimestampSeconds, proxyAssetUrls, removeTrackingPixels, replaceEmbedsWithPlaceholders, replacePreLineBreaks, resolveRelativeUrls, stripBoundaryBreaks, stripComments, stripControlChars, stripDeadAnchors, stripDuplicateTitleHeading, stripEmptyTags, stripInertElements, stripInterBlockBreaks, stripOversizedBase64Sources, stripTrackingParams, substackBookmarkResolver, transformContent, trimPreWhitespace, unwrapAceml, unwrapAdjust, unwrapAmazonAffiliate, unwrapAmpCache, unwrapAwin, unwrapBing, unwrapCdataComments, unwrapCjNetwork, unwrapDigidip, unwrapDisqus, unwrapDouban, unwrapDoublyNestedLists, unwrapDuckduckgo, unwrapEbayRover, unwrapEffiliation, unwrapEmbedly, unwrapEmojiImages, unwrapFacebookShim, unwrapFeedsportal, unwrapFirebaseDynamicLinks, unwrapFlipboard, unwrapGateSc, unwrapGeoriot, unwrapGitee, unwrapGoogle, unwrapGoogleAmpViewer, unwrapGoogleNews, unwrapGoogleNewsModern, unwrapGoogleScholar, unwrapGoogleTranslate, unwrapHashnode, unwrapIcptrack, unwrapIdealoPartner, unwrapInstagramShim, unwrapJianshuGo, unwrapJuejin, unwrapLeverAnalytics, unwrapLinksynergy, unwrapMailchimp, unwrapMailpanion, unwrapMailpgn, unwrapMailtrack, unwrapMedium, unwrapMimecast, unwrapMozillaOutgoing, unwrapNarrativ, unwrapNicoMs, unwrapOutlookSafelinks, unwrapPartnerAds, unwrapPocket, unwrapPostmark, unwrapProofpointV1, unwrapProofpointV2, unwrapProofpointV3, unwrapPxf, unwrapRecruitics, unwrapRedditOut, unwrapRedirectUrls, unwrapRedirectingat, unwrapSegmentfault, unwrapShareasale, unwrapSjv, unwrapSkimlinks, unwrapSlack, unwrapSmartredirect, unwrapSspai, unwrapSteamLinkfilter, unwrapTelegramIv, unwrapTradedoubler, unwrapTumblr, unwrapValuecommerce, unwrapViglink, unwrapVkAway, unwrapWebArchive, unwrapWrappers, unwrapYahooSearch, unwrapYandexTurbo, unwrapYouTube, unwrapZhihu, updateEmbedPlaceholder, youtubeEmbedResolver, youtubeResolveEmbed };
@@ -1,4 +1,4 @@
1
- import { Node, hasAncestorWithTagName, isBlockElement, isBr, isWhitespaceText } from "../../common.js";
1
+ import { hasAncestorWithTagName, isBlockElement, isBr, isElement, isText, isWhitespaceText } from "../../common.js";
2
2
  //#region src/transforms/dom/convertBreaksToParagraphs.ts
3
3
  const processContainersSelector = "body, div, blockquote, td, li, article, section, main, header, footer, aside";
4
4
  const preOrCodeTags = new Set(["pre", "code"]);
@@ -49,11 +49,10 @@ const convertBreaksToParagraphs = () => {
49
49
  i++;
50
50
  }
51
51
  } else {
52
- const nodeType = child.nodeType;
53
- if (nodeType === Node.ELEMENT_NODE) {
52
+ if (isElement(child)) {
54
53
  current.hasContent = true;
55
54
  if (isBlockElement(child)) current.hasBlock = true;
56
- } else if (nodeType === Node.TEXT_NODE) {
55
+ } else if (isText(child)) {
57
56
  if (!current.hasContent && child.textContent?.trim()) current.hasContent = true;
58
57
  }
59
58
  i++;
@@ -1,4 +1,4 @@
1
- import { NodeFilter, hasAncestorWithTagName } from "../../common.js";
1
+ import { NodeFilter, hasAncestorWithTagName, isText } from "../../common.js";
2
2
  //#region src/transforms/dom/decodeDoubleEncodedTags.ts
3
3
  const opaqueTags = new Set([
4
4
  "code",
@@ -16,13 +16,13 @@ const decodeDoubleEncodedTags = () => {
16
16
  const walker = document.createTreeWalker(document.body, NodeFilter.SHOW_TEXT);
17
17
  let tempDiv = null;
18
18
  for (let node = walker.nextNode(); node !== null; node = walker.nextNode()) {
19
- const text = node;
20
- const data = text.data;
19
+ if (!isText(node)) continue;
20
+ const data = node.data;
21
21
  if (!data.includes("<") || !tagInTextRegex.test(data)) continue;
22
- if (hasAncestorWithTagName(text, opaqueTags)) continue;
22
+ if (hasAncestorWithTagName(node, opaqueTags)) continue;
23
23
  if (tempDiv === null) tempDiv = document.createElement("div");
24
24
  tempDiv.innerHTML = data;
25
- text.replaceWith(...tempDiv.childNodes);
25
+ node.replaceWith(...tempDiv.childNodes);
26
26
  }
27
27
  };
28
28
  };
@@ -1,4 +1,4 @@
1
- import { Node } from "../../common.js";
1
+ import { isElement, isText } from "../../common.js";
2
2
  import { find } from "linkifyjs";
3
3
  //#region src/transforms/dom/linkifyUrls.ts
4
4
  const urlProtocolRegex = /^https?:\/\//i;
@@ -13,9 +13,9 @@ const linkifyIgnoreTags = new Set([
13
13
  "style"
14
14
  ]);
15
15
  const collectTextNodes = (node, result = []) => {
16
- if (node.nodeType === Node.ELEMENT_NODE && linkifyIgnoreTags.has(node.tagName.toLowerCase())) return result;
17
- for (const child of node.childNodes) if (child.nodeType === Node.TEXT_NODE) result.push(child);
18
- else if (child.nodeType === Node.ELEMENT_NODE && !linkifyIgnoreTags.has(child.tagName.toLowerCase())) collectTextNodes(child, result);
16
+ if (isElement(node) && linkifyIgnoreTags.has(node.tagName.toLowerCase())) return result;
17
+ for (const child of node.childNodes) if (isText(child)) result.push(child);
18
+ else if (isElement(child) && !linkifyIgnoreTags.has(child.tagName.toLowerCase())) collectTextNodes(child, result);
19
19
  return result;
20
20
  };
21
21
  const linkifyUrls = () => {
@@ -0,0 +1,7 @@
1
+ import { DomTransform } from "../../types.js";
2
+
3
+ //#region src/transforms/dom/markTimestamps.d.ts
4
+ declare const parseTimestampSeconds: (timestamp: string) => number | undefined;
5
+ declare const markTimestamps: DomTransform;
6
+ //#endregion
7
+ export { markTimestamps, parseTimestampSeconds };
@@ -0,0 +1,64 @@
1
+ import { isElement, isText } from "../../common.js";
2
+ //#region src/transforms/dom/markTimestamps.ts
3
+ const timestampIgnoreTags = new Set([
4
+ "a",
5
+ "pre",
6
+ "code",
7
+ "kbd",
8
+ "samp",
9
+ "var",
10
+ "script",
11
+ "style"
12
+ ]);
13
+ const lineLeadingTimestampRegex = /(^|\n)([ \t]*)((?:\d{1,2}:)?\d{1,2}:\d{2})/gm;
14
+ const numericPartRegex = /^\d+$/;
15
+ const parseTimestampSeconds = (timestamp) => {
16
+ const parts = timestamp.split(":");
17
+ if (!parts.every((part) => numericPartRegex.test(part))) return;
18
+ if (parts.length === 2) {
19
+ const [minutes, seconds] = parts.map(Number);
20
+ if (seconds > 59) return;
21
+ return minutes * 60 + seconds;
22
+ }
23
+ if (parts.length === 3) {
24
+ const [hours, minutes, seconds] = parts.map(Number);
25
+ if (minutes > 59 || seconds > 59) return;
26
+ return hours * 3600 + minutes * 60 + seconds;
27
+ }
28
+ };
29
+ const shouldSkipElement = (element) => {
30
+ return timestampIgnoreTags.has(element.tagName.toLowerCase()) || element.hasAttribute("data-timestamp");
31
+ };
32
+ const collectTextNodes = (node, result = []) => {
33
+ for (const child of node.childNodes) if (isText(child)) result.push(child);
34
+ else if (isElement(child) && !shouldSkipElement(child)) collectTextNodes(child, result);
35
+ return result;
36
+ };
37
+ const markTimestamps = () => {
38
+ return (document) => {
39
+ const textNodes = collectTextNodes(document);
40
+ for (const node of textNodes) {
41
+ const text = node.textContent;
42
+ if (!text?.includes(":")) continue;
43
+ const parts = [];
44
+ let lastIndex = 0;
45
+ for (const match of text.matchAll(lineLeadingTimestampRegex)) {
46
+ const [, lineStart, leading, token] = match;
47
+ const seconds = parseTimestampSeconds(token);
48
+ if (seconds === void 0) continue;
49
+ const tokenStart = (match.index ?? 0) + lineStart.length + leading.length;
50
+ if (tokenStart > lastIndex) parts.push(document.createTextNode(text.slice(lastIndex, tokenStart)));
51
+ const span = document.createElement("span");
52
+ span.setAttribute("data-timestamp", String(seconds));
53
+ span.textContent = token;
54
+ parts.push(span);
55
+ lastIndex = tokenStart + token.length;
56
+ }
57
+ if (parts.length === 0) continue;
58
+ if (lastIndex < text.length) parts.push(document.createTextNode(text.slice(lastIndex)));
59
+ node.replaceWith(...parts);
60
+ }
61
+ };
62
+ };
63
+ //#endregion
64
+ export { markTimestamps, parseTimestampSeconds };
@@ -1,3 +1,4 @@
1
+ import { isElement, isText } from "../../common.js";
1
2
  //#region src/transforms/dom/mergeConsecutiveOneLinerPres.ts
2
3
  const trailingBrRegex = /<br\s*\/?>\s*$/i;
3
4
  const surroundingNewlinesRegex = /^\n+|\n+$/g;
@@ -17,8 +18,8 @@ const mergeConsecutiveOneLinerPres = ({ preservedPreClasses }) => {
17
18
  const run = [pre];
18
19
  let sibling = pre.nextSibling;
19
20
  while (sibling) {
20
- if (sibling.nodeType !== 1 && sibling.nodeType !== 3) break;
21
- if (sibling.nodeType === 3) {
21
+ if (!isElement(sibling) && !isText(sibling)) break;
22
+ if (isText(sibling)) {
22
23
  if (sibling.textContent?.trim() !== "") break;
23
24
  sibling = sibling.nextSibling;
24
25
  continue;
@@ -1,4 +1,4 @@
1
- import { Node, isWhitespaceText } from "../../common.js";
1
+ import { isComment, isElement, isText, isWhitespaceText } from "../../common.js";
2
2
  //#region src/transforms/dom/mergeFragmentedLists.ts
3
3
  const mergeFragmentedLists = () => {
4
4
  return (document) => {
@@ -24,9 +24,8 @@ const mergeFragmentedLists = () => {
24
24
  let between = target.nextSibling;
25
25
  while (between && between !== extra) {
26
26
  const next = between.nextSibling;
27
- const type = between.nodeType;
28
- if (type === Node.COMMENT_NODE) between.parentNode?.removeChild(between);
29
- else if (type === Node.TEXT_NODE) target.appendChild(between);
27
+ if (isComment(between)) between.parentNode?.removeChild(between);
28
+ else if (isText(between)) target.appendChild(between);
30
29
  between = next;
31
30
  }
32
31
  while (extra.firstChild) target.appendChild(extra.firstChild);
@@ -38,14 +37,13 @@ const mergeFragmentedLists = () => {
38
37
  const nextMergeableSibling = (from, localName) => {
39
38
  let sibling = from.nextSibling;
40
39
  while (sibling) {
41
- const type = sibling.nodeType;
42
- if (type === Node.ELEMENT_NODE) return sibling.localName === localName ? sibling : void 0;
43
- if (type === Node.TEXT_NODE) {
40
+ if (isElement(sibling)) return sibling.localName === localName ? sibling : void 0;
41
+ if (isText(sibling)) {
44
42
  if (!isWhitespaceText(sibling)) return;
45
43
  sibling = sibling.nextSibling;
46
44
  continue;
47
45
  }
48
- if (type === Node.COMMENT_NODE) {
46
+ if (isComment(sibling)) {
49
47
  sibling = sibling.nextSibling;
50
48
  continue;
51
49
  }
@@ -54,16 +52,15 @@ const nextMergeableSibling = (from, localName) => {
54
52
  };
55
53
  const hasOnlyListItemChildren = (list) => {
56
54
  for (let child = list.firstChild; child; child = child.nextSibling) {
57
- const type = child.nodeType;
58
- if (type === Node.ELEMENT_NODE) {
55
+ if (isElement(child)) {
59
56
  if (child.localName !== "li") return false;
60
57
  continue;
61
58
  }
62
- if (type === Node.TEXT_NODE) {
59
+ if (isText(child)) {
63
60
  if (!isWhitespaceText(child)) return false;
64
61
  continue;
65
62
  }
66
- if (type !== Node.COMMENT_NODE) return false;
63
+ if (!isComment(child)) return false;
67
64
  }
68
65
  return true;
69
66
  };
@@ -0,0 +1,6 @@
1
+ import { DomTransform } from "../../types.js";
2
+
3
+ //#region src/transforms/dom/stripBoundaryBreaks.d.ts
4
+ declare const stripBoundaryBreaks: DomTransform;
5
+ //#endregion
6
+ export { stripBoundaryBreaks };
@@ -1,10 +1,26 @@
1
1
  import { isBr, isSkippable } from "../../common.js";
2
- //#region src/transforms/dom/stripParagraphBoundaryBreaks.ts
3
- const stripParagraphBoundaryBreaks = () => {
2
+ //#region src/transforms/dom/stripBoundaryBreaks.ts
3
+ const boundaryBreakSelectors = [
4
+ "p",
5
+ "h1",
6
+ "h2",
7
+ "h3",
8
+ "h4",
9
+ "h5",
10
+ "h6",
11
+ "div",
12
+ "blockquote",
13
+ "li",
14
+ "ul",
15
+ "ol",
16
+ "figcaption",
17
+ "section"
18
+ ];
19
+ const stripBoundaryBreaks = () => {
4
20
  return (document) => {
5
- const paragraphs = document.querySelectorAll("p");
6
- for (const paragraph of paragraphs) {
7
- let cursor = paragraph.firstChild;
21
+ const elements = document.querySelectorAll(boundaryBreakSelectors.join(", "));
22
+ for (const element of elements) {
23
+ let cursor = element.firstChild;
8
24
  let leadingHasBr = false;
9
25
  let leadingEnd = null;
10
26
  while (cursor && isSkippable(cursor)) {
@@ -13,7 +29,7 @@ const stripParagraphBoundaryBreaks = () => {
13
29
  cursor = cursor.nextSibling;
14
30
  }
15
31
  if (leadingHasBr) {
16
- let node = paragraph.firstChild;
32
+ let node = element.firstChild;
17
33
  while (node) {
18
34
  const next = node.nextSibling;
19
35
  node.remove();
@@ -21,7 +37,7 @@ const stripParagraphBoundaryBreaks = () => {
21
37
  node = next;
22
38
  }
23
39
  }
24
- cursor = paragraph.lastChild;
40
+ cursor = element.lastChild;
25
41
  let trailingHasBr = false;
26
42
  let trailingEnd = null;
27
43
  while (cursor && isSkippable(cursor)) {
@@ -30,7 +46,7 @@ const stripParagraphBoundaryBreaks = () => {
30
46
  cursor = cursor.previousSibling;
31
47
  }
32
48
  if (trailingHasBr) {
33
- let node = paragraph.lastChild;
49
+ let node = element.lastChild;
34
50
  while (node) {
35
51
  const prev = node.previousSibling;
36
52
  node.remove();
@@ -42,4 +58,4 @@ const stripParagraphBoundaryBreaks = () => {
42
58
  };
43
59
  };
44
60
  //#endregion
45
- export { stripParagraphBoundaryBreaks };
61
+ export { stripBoundaryBreaks };
@@ -1,4 +1,4 @@
1
- import { Node } from "../../common.js";
1
+ import { isElement, isText } from "../../common.js";
2
2
  //#region src/transforms/dom/stripEmptyTags.ts
3
3
  const preserveWhenEmpty = new Set([
4
4
  "iframe",
@@ -33,12 +33,11 @@ const stripEmptyTags = () => {
33
33
  let hasContent = false;
34
34
  for (let j = 0; j < childCount; j++) {
35
35
  const child = childNodes[j];
36
- const nodeType = child.nodeType;
37
- if (nodeType === Node.ELEMENT_NODE) {
36
+ if (isElement(child)) {
38
37
  hasContent = true;
39
38
  break;
40
39
  }
41
- if (nodeType === Node.TEXT_NODE && child.data.trim().length > 0) {
40
+ if (isText(child) && child.data.trim().length > 0) {
42
41
  hasContent = true;
43
42
  break;
44
43
  }
@@ -1,4 +1,4 @@
1
- import { Node } from "../../common.js";
1
+ import { isText } from "../../common.js";
2
2
  //#region src/transforms/dom/unwrapDoublyNestedLists.ts
3
3
  const unwrapDoublyNestedLists = () => {
4
4
  return (document) => {
@@ -21,7 +21,7 @@ const unwrapDoublyNestedLists = () => {
21
21
  }
22
22
  if (elementDisqualified || inner === null) continue;
23
23
  let textDisqualified = false;
24
- for (let node = wrapper.firstChild; node !== null; node = node.nextSibling) if (node.nodeType === Node.TEXT_NODE && node.textContent?.trim()) {
24
+ for (let node = wrapper.firstChild; node !== null; node = node.nextSibling) if (isText(node) && node.textContent?.trim()) {
25
25
  textDisqualified = true;
26
26
  break;
27
27
  }
@@ -30,7 +30,7 @@ const unwrapDoublyNestedLists = () => {
30
30
  if (parent === null) continue;
31
31
  for (let node = wrapper.firstChild; node !== null;) {
32
32
  const next = node.nextSibling;
33
- if (node.nodeType === Node.TEXT_NODE || node === inner) parent.insertBefore(node, outer);
33
+ if (isText(node) || node === inner) parent.insertBefore(node, outer);
34
34
  node = next;
35
35
  }
36
36
  outer.remove();
package/package.json CHANGED
@@ -69,5 +69,5 @@
69
69
  "linkedom": "^0.18.12",
70
70
  "tsdown": "^0.22.1"
71
71
  },
72
- "version": "2.0.1"
72
+ "version": "2.1.0"
73
73
  }
@@ -1,6 +0,0 @@
1
- import { DomTransform } from "../../types.js";
2
-
3
- //#region src/transforms/dom/stripParagraphBoundaryBreaks.d.ts
4
- declare const stripParagraphBoundaryBreaks: DomTransform;
5
- //#endregion
6
- export { stripParagraphBoundaryBreaks };