feedsweep 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (204) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +70 -0
  3. package/dist/common.d.ts +11 -0
  4. package/dist/common.js +116 -0
  5. package/dist/defaults.d.ts +14 -0
  6. package/dist/defaults.js +127 -0
  7. package/dist/embeds/youtube.d.ts +9 -0
  8. package/dist/embeds/youtube.js +50 -0
  9. package/dist/index.d.ts +105 -0
  10. package/dist/index.js +118 -0
  11. package/dist/transforms/dom/fixLazyImages.d.ts +6 -0
  12. package/dist/transforms/dom/fixLazyImages.js +38 -0
  13. package/dist/transforms/dom/highlightCode.d.ts +7 -0
  14. package/dist/transforms/dom/highlightCode.js +30 -0
  15. package/dist/transforms/dom/injectEnclosureEmbedPlaceholders.d.ts +6 -0
  16. package/dist/transforms/dom/injectEnclosureEmbedPlaceholders.js +33 -0
  17. package/dist/transforms/dom/linkifyUrls.d.ts +6 -0
  18. package/dist/transforms/dom/linkifyUrls.js +45 -0
  19. package/dist/transforms/dom/mergeConsecutiveOneLinerPres.d.ts +6 -0
  20. package/dist/transforms/dom/mergeConsecutiveOneLinerPres.js +33 -0
  21. package/dist/transforms/dom/removeTrackingPixels.d.ts +6 -0
  22. package/dist/transforms/dom/removeTrackingPixels.js +59 -0
  23. package/dist/transforms/dom/replaceEmbedsWithPlaceholders.d.ts +6 -0
  24. package/dist/transforms/dom/replaceEmbedsWithPlaceholders.js +36 -0
  25. package/dist/transforms/dom/replacePreLineBreaks.d.ts +6 -0
  26. package/dist/transforms/dom/replacePreLineBreaks.js +13 -0
  27. package/dist/transforms/dom/resolveRelativeUrls.d.ts +6 -0
  28. package/dist/transforms/dom/resolveRelativeUrls.js +42 -0
  29. package/dist/transforms/dom/simplifyFigures.d.ts +6 -0
  30. package/dist/transforms/dom/simplifyFigures.js +27 -0
  31. package/dist/transforms/dom/stripComments.d.ts +6 -0
  32. package/dist/transforms/dom/stripComments.js +23 -0
  33. package/dist/transforms/dom/stripInterBlockBreaks.d.ts +6 -0
  34. package/dist/transforms/dom/stripInterBlockBreaks.js +18 -0
  35. package/dist/transforms/dom/stripParagraphBoundaryBreaks.d.ts +6 -0
  36. package/dist/transforms/dom/stripParagraphBoundaryBreaks.js +25 -0
  37. package/dist/transforms/dom/stripTrackingParams.d.ts +6 -0
  38. package/dist/transforms/dom/stripTrackingParams.js +22 -0
  39. package/dist/transforms/dom/trimPreWhitespace.d.ts +6 -0
  40. package/dist/transforms/dom/trimPreWhitespace.js +20 -0
  41. package/dist/transforms/dom/unwrapRedirectUrls.d.ts +7 -0
  42. package/dist/transforms/dom/unwrapRedirectUrls.js +28 -0
  43. package/dist/transforms/string/decodeDoubleEncodedTags.d.ts +6 -0
  44. package/dist/transforms/string/decodeDoubleEncodedTags.js +23 -0
  45. package/dist/transforms/string/paragraphizePlainText.d.ts +6 -0
  46. package/dist/transforms/string/paragraphizePlainText.js +10 -0
  47. package/dist/transforms/string/stripEmptyTags.d.ts +6 -0
  48. package/dist/transforms/string/stripEmptyTags.js +25 -0
  49. package/dist/transforms/string/stripOrphanedClosingTags.d.ts +6 -0
  50. package/dist/transforms/string/stripOrphanedClosingTags.js +28 -0
  51. package/dist/transforms/string/unwrapWrappers.d.ts +6 -0
  52. package/dist/transforms/string/unwrapWrappers.js +10 -0
  53. package/dist/types.d.ts +52 -0
  54. package/dist/unwraps/aceml.d.ts +6 -0
  55. package/dist/unwraps/aceml.js +17 -0
  56. package/dist/unwraps/adjust.d.ts +6 -0
  57. package/dist/unwraps/adjust.js +9 -0
  58. package/dist/unwraps/amazonAffiliate.d.ts +6 -0
  59. package/dist/unwraps/amazonAffiliate.js +9 -0
  60. package/dist/unwraps/ampCache.d.ts +6 -0
  61. package/dist/unwraps/ampCache.js +13 -0
  62. package/dist/unwraps/awin.d.ts +6 -0
  63. package/dist/unwraps/awin.js +9 -0
  64. package/dist/unwraps/bing.d.ts +6 -0
  65. package/dist/unwraps/bing.js +15 -0
  66. package/dist/unwraps/cjNetwork.d.ts +6 -0
  67. package/dist/unwraps/cjNetwork.js +17 -0
  68. package/dist/unwraps/digidip.d.ts +6 -0
  69. package/dist/unwraps/digidip.js +8 -0
  70. package/dist/unwraps/disqus.d.ts +6 -0
  71. package/dist/unwraps/disqus.js +8 -0
  72. package/dist/unwraps/douban.d.ts +6 -0
  73. package/dist/unwraps/douban.js +9 -0
  74. package/dist/unwraps/duckduckgo.d.ts +6 -0
  75. package/dist/unwraps/duckduckgo.js +9 -0
  76. package/dist/unwraps/ebayRover.d.ts +6 -0
  77. package/dist/unwraps/ebayRover.js +8 -0
  78. package/dist/unwraps/effiliation.d.ts +6 -0
  79. package/dist/unwraps/effiliation.js +8 -0
  80. package/dist/unwraps/embedly.d.ts +6 -0
  81. package/dist/unwraps/embedly.js +8 -0
  82. package/dist/unwraps/facebook.d.ts +6 -0
  83. package/dist/unwraps/facebook.js +9 -0
  84. package/dist/unwraps/feedsportal.d.ts +6 -0
  85. package/dist/unwraps/feedsportal.js +44 -0
  86. package/dist/unwraps/firebaseDynamicLinks.d.ts +6 -0
  87. package/dist/unwraps/firebaseDynamicLinks.js +8 -0
  88. package/dist/unwraps/flipboard.d.ts +6 -0
  89. package/dist/unwraps/flipboard.js +9 -0
  90. package/dist/unwraps/gateSc.d.ts +6 -0
  91. package/dist/unwraps/gateSc.js +8 -0
  92. package/dist/unwraps/georiot.d.ts +6 -0
  93. package/dist/unwraps/georiot.js +8 -0
  94. package/dist/unwraps/gitee.d.ts +6 -0
  95. package/dist/unwraps/gitee.js +9 -0
  96. package/dist/unwraps/google.d.ts +6 -0
  97. package/dist/unwraps/google.js +8 -0
  98. package/dist/unwraps/googleAmpViewer.d.ts +6 -0
  99. package/dist/unwraps/googleAmpViewer.js +13 -0
  100. package/dist/unwraps/googleNews.d.ts +6 -0
  101. package/dist/unwraps/googleNews.js +8 -0
  102. package/dist/unwraps/googleNewsModern.d.ts +6 -0
  103. package/dist/unwraps/googleNewsModern.js +11 -0
  104. package/dist/unwraps/googleScholar.d.ts +6 -0
  105. package/dist/unwraps/googleScholar.js +8 -0
  106. package/dist/unwraps/googleTranslate.d.ts +6 -0
  107. package/dist/unwraps/googleTranslate.js +8 -0
  108. package/dist/unwraps/hashnode.d.ts +6 -0
  109. package/dist/unwraps/hashnode.js +9 -0
  110. package/dist/unwraps/icptrack.d.ts +6 -0
  111. package/dist/unwraps/icptrack.js +9 -0
  112. package/dist/unwraps/idealoPartner.d.ts +6 -0
  113. package/dist/unwraps/idealoPartner.js +8 -0
  114. package/dist/unwraps/instagram.d.ts +6 -0
  115. package/dist/unwraps/instagram.js +8 -0
  116. package/dist/unwraps/jianshuGo.d.ts +6 -0
  117. package/dist/unwraps/jianshuGo.js +9 -0
  118. package/dist/unwraps/juejin.d.ts +6 -0
  119. package/dist/unwraps/juejin.js +8 -0
  120. package/dist/unwraps/leverAnalytics.d.ts +6 -0
  121. package/dist/unwraps/leverAnalytics.js +8 -0
  122. package/dist/unwraps/linksynergy.d.ts +6 -0
  123. package/dist/unwraps/linksynergy.js +9 -0
  124. package/dist/unwraps/mailchimp.d.ts +6 -0
  125. package/dist/unwraps/mailchimp.js +9 -0
  126. package/dist/unwraps/mailpanion.d.ts +6 -0
  127. package/dist/unwraps/mailpanion.js +8 -0
  128. package/dist/unwraps/mailpgn.d.ts +6 -0
  129. package/dist/unwraps/mailpgn.js +8 -0
  130. package/dist/unwraps/mailtrack.d.ts +6 -0
  131. package/dist/unwraps/mailtrack.js +8 -0
  132. package/dist/unwraps/medium.d.ts +6 -0
  133. package/dist/unwraps/medium.js +9 -0
  134. package/dist/unwraps/mimecast.d.ts +6 -0
  135. package/dist/unwraps/mimecast.js +11 -0
  136. package/dist/unwraps/mozillaOutgoing.d.ts +6 -0
  137. package/dist/unwraps/mozillaOutgoing.js +13 -0
  138. package/dist/unwraps/narrativ.d.ts +6 -0
  139. package/dist/unwraps/narrativ.js +8 -0
  140. package/dist/unwraps/nicoMs.d.ts +6 -0
  141. package/dist/unwraps/nicoMs.js +12 -0
  142. package/dist/unwraps/outlookSafelinks.d.ts +6 -0
  143. package/dist/unwraps/outlookSafelinks.js +8 -0
  144. package/dist/unwraps/partnerAds.d.ts +6 -0
  145. package/dist/unwraps/partnerAds.js +8 -0
  146. package/dist/unwraps/pocket.d.ts +6 -0
  147. package/dist/unwraps/pocket.js +9 -0
  148. package/dist/unwraps/postmark.d.ts +6 -0
  149. package/dist/unwraps/postmark.js +12 -0
  150. package/dist/unwraps/proofpointV1.d.ts +6 -0
  151. package/dist/unwraps/proofpointV1.js +16 -0
  152. package/dist/unwraps/proofpointV2.d.ts +6 -0
  153. package/dist/unwraps/proofpointV2.js +16 -0
  154. package/dist/unwraps/proofpointV3.d.ts +6 -0
  155. package/dist/unwraps/proofpointV3.js +78 -0
  156. package/dist/unwraps/pxf.d.ts +6 -0
  157. package/dist/unwraps/pxf.js +8 -0
  158. package/dist/unwraps/recruitics.d.ts +6 -0
  159. package/dist/unwraps/recruitics.js +8 -0
  160. package/dist/unwraps/redditOut.d.ts +6 -0
  161. package/dist/unwraps/redditOut.js +8 -0
  162. package/dist/unwraps/redirectingat.d.ts +6 -0
  163. package/dist/unwraps/redirectingat.js +8 -0
  164. package/dist/unwraps/segmentfault.d.ts +6 -0
  165. package/dist/unwraps/segmentfault.js +16 -0
  166. package/dist/unwraps/shareasale.d.ts +6 -0
  167. package/dist/unwraps/shareasale.js +9 -0
  168. package/dist/unwraps/sjv.d.ts +6 -0
  169. package/dist/unwraps/sjv.js +8 -0
  170. package/dist/unwraps/skimlinks.d.ts +6 -0
  171. package/dist/unwraps/skimlinks.js +8 -0
  172. package/dist/unwraps/slack.d.ts +6 -0
  173. package/dist/unwraps/slack.js +9 -0
  174. package/dist/unwraps/smartredirect.d.ts +6 -0
  175. package/dist/unwraps/smartredirect.js +8 -0
  176. package/dist/unwraps/sspai.d.ts +6 -0
  177. package/dist/unwraps/sspai.js +9 -0
  178. package/dist/unwraps/steamLinkfilter.d.ts +6 -0
  179. package/dist/unwraps/steamLinkfilter.js +9 -0
  180. package/dist/unwraps/telegramIv.d.ts +6 -0
  181. package/dist/unwraps/telegramIv.js +9 -0
  182. package/dist/unwraps/tradedoubler.d.ts +6 -0
  183. package/dist/unwraps/tradedoubler.js +9 -0
  184. package/dist/unwraps/tumblr.d.ts +6 -0
  185. package/dist/unwraps/tumblr.js +9 -0
  186. package/dist/unwraps/valuecommerce.d.ts +6 -0
  187. package/dist/unwraps/valuecommerce.js +9 -0
  188. package/dist/unwraps/viglink.d.ts +6 -0
  189. package/dist/unwraps/viglink.js +8 -0
  190. package/dist/unwraps/vkAway.d.ts +6 -0
  191. package/dist/unwraps/vkAway.js +9 -0
  192. package/dist/unwraps/webArchive.d.ts +6 -0
  193. package/dist/unwraps/webArchive.js +12 -0
  194. package/dist/unwraps/yahooSearch.d.ts +6 -0
  195. package/dist/unwraps/yahooSearch.js +12 -0
  196. package/dist/unwraps/yandexTurbo.d.ts +6 -0
  197. package/dist/unwraps/yandexTurbo.js +12 -0
  198. package/dist/unwraps/youtube.d.ts +6 -0
  199. package/dist/unwraps/youtube.js +9 -0
  200. package/dist/unwraps/zhihu.d.ts +6 -0
  201. package/dist/unwraps/zhihu.js +8 -0
  202. package/dist/utils.d.ts +13 -0
  203. package/dist/utils.js +31 -0
  204. package/package.json +58 -0
package/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Maciej Lamberski
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
package/README.md ADDED
@@ -0,0 +1,70 @@
1
+ # Feedsweep
2
+
3
+ [![codecov](https://codecov.io/gh/macieklamberski/feedsweep/branch/main/graph/badge.svg)](https://codecov.io/gh/macieklamberski/feedsweep)
4
+ [![npm version](https://img.shields.io/npm/v/feedsweep.svg)](https://www.npmjs.com/package/feedsweep)
5
+ [![license](https://img.shields.io/npm/l/feedsweep.svg)](https://github.com/macieklamberski/feedsweep/blob/main/LICENSE)
6
+
7
+ Tidy up the HTML content in web feeds. Fix feed-specific quirks so content displays in its best possible form.
8
+
9
+ Feedsweep takes raw feed item HTML and runs it through a pipeline that genuinely improves the output: fixing lazy-loaded images so they actually render, resolving relative URLs to absolute, stripping tracking parameters and pixels for privacy, highlighting code blocks, normalizing broken markup from common feed quirks, auto-linking bare URLs, and converting embeds into framework-agnostic placeholders. It ships with sensible defaults and built-in support for YouTube and other popular platforms.
10
+
11
+ ## Installation
12
+
13
+ ```bash
14
+ npm install feedsweep
15
+ ```
16
+
17
+ ## Quick Start
18
+
19
+ ```typescript
20
+ import { transformContent } from 'feedsweep'
21
+
22
+ const result = transformContent('<p>Check <img data-src="photo.jpg"> and visit /about</p>', {
23
+ baseUrl: 'https://example.com/post/1',
24
+ })
25
+ ```
26
+
27
+ ## Transforms
28
+
29
+ Inventory of every transform exported from the package. Most are enabled by default; pass a custom `stringTransforms` / `domTransforms` array via `transformContent` options to override.
30
+
31
+ | Transform | Description |
32
+ | --- | --- |
33
+ | `stripOrphanedClosingTags` | Remove unmatched `</p>` / `</div>` close tags |
34
+ | `decodeDoubleEncodedTags` | Decode `&lt;tag&gt;` back to `<tag>` in mixed content |
35
+ | `unwrapWrappers` | Remove outer `<div>`, `<article>`, `<section>` wrappers |
36
+ | `paragraphizePlainText` | Wrap plain text in `<p>` tags |
37
+ | `stripEmptyTags` | Remove empty `<p>`, `<div>`, `<span>` and other tags |
38
+ | `stripComments` | Remove HTML `<!-- comments -->` |
39
+ | `fixLazyImages` | Move `data-src` / `data-original` to real `src` |
40
+ | `resolveRelativeUrls` | Convert relative URLs to absolute using base URL |
41
+ | `unwrapRedirectUrls` | Remove Google/Bing/Facebook/etc. redirect wrappers |
42
+ | `stripTrackingParams` | Remove UTM and other tracking parameters |
43
+ | `removeTrackingPixels` | Strip 1×1 tracking pixel images |
44
+ | `stripInterBlockBreaks` | Remove `<br>` tags between block elements |
45
+ | `stripParagraphBoundaryBreaks` | Remove `<br>` tags adjacent to paragraph boundaries |
46
+ | `highlightCode` | Syntax-highlight `<code>` blocks with highlight.js |
47
+ | `mergeConsecutiveOneLinerPres` | Merge consecutive single-line `<pre>` tags |
48
+ | `replacePreLineBreaks` | Replace `<br>` with `\n` inside `<pre>` |
49
+ | `trimPreWhitespace` | Remove common leading indentation from `<pre>` |
50
+ | `linkifyUrls` | Wrap bare URLs in `<a>` tags |
51
+ | `replaceEmbedsWithPlaceholders` | Convert `<iframe>` to embed placeholders |
52
+ | `injectEnclosureEmbedPlaceholders` | Add audio/video enclosures to content |
53
+ | `simplifyFigures` | Unwrap `<figure>` when the figcaption is empty or redundant |
54
+
55
+ ## Options
56
+
57
+ ```typescript
58
+ import { fixLazyImages, resolveRelativeUrls, transformContent } from 'feedsweep'
59
+
60
+ const result = transformContent(html, {
61
+ // Base URL for resolving relative URLs.
62
+ baseUrl: 'https://example.com/post/1',
63
+ // Feed item enclosures (audio/video).
64
+ enclosures: [{ url: 'https://example.com/audio.mp3', type: 'audio/mpeg' }],
65
+ // Run a custom DOM transform pipeline (omit to use defaults).
66
+ domTransforms: [fixLazyImages, resolveRelativeUrls],
67
+ })
68
+ ```
69
+
70
+ The `stringTransforms`, `domTransforms`, and `finalStringTransforms` options each fully replace the corresponding default phase when provided. Every transform is also exported individually from `feedsweep`, so you can compose any pipeline — list them explicitly to build from scratch, or spread `defaultDomTransforms` (etc.) from `feedsweep/defaults` to extend or filter the defaults.
@@ -0,0 +1,11 @@
1
+ import { EmbedResolverResult } from "./types.js";
2
+
3
+ //#region src/common.d.ts
4
+ declare const stripOversizedBase64Sources: (html: string, maxSize: number) => string;
5
+ declare const parseFragment: (html: string) => Document;
6
+ declare const transformHtml: (html: string, transform: (document: Document) => void) => string;
7
+ declare const applyDomTransforms: (html: string, transforms: Array<(document: Document) => void>) => string;
8
+ declare const applyStringTransforms: (html: string, transforms: Array<(html: string) => string>) => string;
9
+ declare const createEmbedPlaceholder: (document: Document, src: string, type: "video" | "audio" | "iframe", metadata?: Partial<EmbedResolverResult>) => HTMLElement;
10
+ //#endregion
11
+ export { applyDomTransforms, applyStringTransforms, createEmbedPlaceholder, parseFragment, stripOversizedBase64Sources, transformHtml };
package/dist/common.js ADDED
@@ -0,0 +1,116 @@
1
+ import { resolveUrl } from "feedcanon";
2
+ import { parseHTML } from "linkedom";
3
+ //#region src/common.ts
4
+ const Node = {
5
+ ELEMENT_NODE: 1,
6
+ TEXT_NODE: 3,
7
+ COMMENT_NODE: 8
8
+ };
9
+ const base64SrcRegex = /((?:src|srcset|poster)=["'])data:[^"']*;base64,[^"']*(["'])/g;
10
+ const safeThumbnailDataUrlRegex = /^data:image\/(png|jpe?g|gif|webp|avif);/i;
11
+ const isSafeThumbnailUrl = (url) => {
12
+ return resolveUrl(url) !== void 0 || safeThumbnailDataUrlRegex.test(url);
13
+ };
14
+ const stripOversizedBase64Sources = (html, maxSize) => {
15
+ return html.replace(base64SrcRegex, (match, prefix, suffix) => {
16
+ if (match.length < maxSize) return match;
17
+ return `${prefix}${suffix}`;
18
+ });
19
+ };
20
+ const parseFragment = (html) => {
21
+ const { document } = parseHTML(`<!doctype html><html><head></head><body>${html}</body></html>`);
22
+ return document;
23
+ };
24
+ const transformHtml = (html, transform) => {
25
+ const document = parseFragment(html);
26
+ transform(document);
27
+ return document.body.innerHTML;
28
+ };
29
+ const applyDomTransforms = (html, transforms) => {
30
+ const document = parseFragment(stripOversizedBase64Sources(html, 50 * 1024));
31
+ for (const transform of transforms) transform(document);
32
+ return document.body.innerHTML;
33
+ };
34
+ const applyStringTransforms = (html, transforms) => {
35
+ let output = html;
36
+ for (const transform of transforms) output = transform(output);
37
+ return output;
38
+ };
39
+ const blockElements = new Set([
40
+ "address",
41
+ "article",
42
+ "aside",
43
+ "blockquote",
44
+ "center",
45
+ "dd",
46
+ "details",
47
+ "div",
48
+ "dl",
49
+ "dt",
50
+ "fieldset",
51
+ "figcaption",
52
+ "figure",
53
+ "footer",
54
+ "h1",
55
+ "h2",
56
+ "h3",
57
+ "h4",
58
+ "h5",
59
+ "h6",
60
+ "header",
61
+ "hr",
62
+ "li",
63
+ "main",
64
+ "nav",
65
+ "ol",
66
+ "p",
67
+ "pre",
68
+ "section",
69
+ "summary",
70
+ "table",
71
+ "ul"
72
+ ]);
73
+ const isWhitespaceText = (node) => {
74
+ return node.nodeType === Node.TEXT_NODE && !(node.textContent ?? "").trim();
75
+ };
76
+ const isBr = (node) => {
77
+ return node.nodeType === Node.ELEMENT_NODE && node.tagName.toLowerCase() === "br";
78
+ };
79
+ const isComment = (node) => {
80
+ return node.nodeType === Node.COMMENT_NODE;
81
+ };
82
+ const isSkippable = (node) => {
83
+ return isWhitespaceText(node) || isBr(node) || isComment(node);
84
+ };
85
+ const isBlockElement = (node) => {
86
+ return node.nodeType === Node.ELEMENT_NODE && blockElements.has(node.tagName.toLowerCase());
87
+ };
88
+ const unwrapOuterTag = (html, pattern) => {
89
+ let result = html.trim();
90
+ let match = pattern.exec(result);
91
+ while (match) {
92
+ result = match[3].trim();
93
+ match = pattern.exec(result);
94
+ }
95
+ return result;
96
+ };
97
+ const createEmbedPlaceholder = (document, src, type, metadata) => {
98
+ const element = document.createElement("div");
99
+ element.setAttribute("data-embed", metadata?.type ?? type);
100
+ element.setAttribute("data-embed-src", metadata?.src ?? src);
101
+ if (metadata?.provider) element.setAttribute("data-embed-provider", metadata.provider);
102
+ if (metadata?.url) element.setAttribute("data-embed-url", metadata.url);
103
+ if (metadata?.thumbnail && isSafeThumbnailUrl(metadata.thumbnail)) element.setAttribute("data-embed-thumbnail", metadata.thumbnail);
104
+ if (metadata?.width) element.setAttribute("data-embed-width", String(metadata.width));
105
+ if (metadata?.height) element.setAttribute("data-embed-height", String(metadata.height));
106
+ if (metadata?.author) element.setAttribute("data-embed-author", metadata.author);
107
+ if (metadata?.text) element.setAttribute("data-embed-text", metadata.text);
108
+ const fallbackUrl = metadata?.url ?? metadata?.src ?? src;
109
+ const link = document.createElement("a");
110
+ link.setAttribute("href", fallbackUrl);
111
+ link.textContent = fallbackUrl;
112
+ element.appendChild(link);
113
+ return element;
114
+ };
115
+ //#endregion
116
+ export { Node, applyDomTransforms, applyStringTransforms, createEmbedPlaceholder, isBlockElement, isBr, isSkippable, parseFragment, stripOversizedBase64Sources, transformHtml, unwrapOuterTag };
@@ -0,0 +1,14 @@
1
+ import { DomTransform, EmbedResolver, ResolveUrlFn, StringTransform, UrlUnwrapper } from "./types.js";
2
+
3
+ //#region src/defaults.d.ts
4
+ declare const defaultStringTransforms: Array<StringTransform>;
5
+ declare const defaultDomTransforms: Array<DomTransform>;
6
+ declare const defaultFinalStringTransforms: Array<StringTransform>;
7
+ declare const defaultEmbedResolvers: Array<EmbedResolver>;
8
+ declare const defaultResolveUrlFn: ResolveUrlFn;
9
+ declare const defaultLazySrcAttributes: string[];
10
+ declare const defaultTrackingHosts: string[];
11
+ declare const defaultTrackingPathSegments: string[];
12
+ declare const defaultUrlUnwrappers: Array<UrlUnwrapper>;
13
+ //#endregion
14
+ export { defaultDomTransforms, defaultEmbedResolvers, defaultFinalStringTransforms, defaultLazySrcAttributes, defaultResolveUrlFn, defaultStringTransforms, defaultTrackingHosts, defaultTrackingPathSegments, defaultUrlUnwrappers };
@@ -0,0 +1,127 @@
1
+ import { youtubeEmbedResolver } from "./embeds/youtube.js";
2
+ import { fixLazyImages } from "./transforms/dom/fixLazyImages.js";
3
+ import { highlightCode } from "./transforms/dom/highlightCode.js";
4
+ import { injectEnclosureEmbedPlaceholders } from "./transforms/dom/injectEnclosureEmbedPlaceholders.js";
5
+ import { linkifyUrls } from "./transforms/dom/linkifyUrls.js";
6
+ import { mergeConsecutiveOneLinerPres } from "./transforms/dom/mergeConsecutiveOneLinerPres.js";
7
+ import { removeTrackingPixels } from "./transforms/dom/removeTrackingPixels.js";
8
+ import { replaceEmbedsWithPlaceholders } from "./transforms/dom/replaceEmbedsWithPlaceholders.js";
9
+ import { replacePreLineBreaks } from "./transforms/dom/replacePreLineBreaks.js";
10
+ import { resolveRelativeUrls } from "./transforms/dom/resolveRelativeUrls.js";
11
+ import { stripComments } from "./transforms/dom/stripComments.js";
12
+ import { stripInterBlockBreaks } from "./transforms/dom/stripInterBlockBreaks.js";
13
+ import { stripParagraphBoundaryBreaks } from "./transforms/dom/stripParagraphBoundaryBreaks.js";
14
+ import { stripTrackingParams } from "./transforms/dom/stripTrackingParams.js";
15
+ import { trimPreWhitespace } from "./transforms/dom/trimPreWhitespace.js";
16
+ import { unwrapRedirectUrls } from "./transforms/dom/unwrapRedirectUrls.js";
17
+ import { decodeDoubleEncodedTags } from "./transforms/string/decodeDoubleEncodedTags.js";
18
+ import { paragraphizePlainText } from "./transforms/string/paragraphizePlainText.js";
19
+ import { stripEmptyTags } from "./transforms/string/stripEmptyTags.js";
20
+ import { stripOrphanedClosingTags } from "./transforms/string/stripOrphanedClosingTags.js";
21
+ import { unwrapWrappers } from "./transforms/string/unwrapWrappers.js";
22
+ import { unwrapBing } from "./unwraps/bing.js";
23
+ import { unwrapFacebookShim } from "./unwraps/facebook.js";
24
+ import { unwrapGoogle } from "./unwraps/google.js";
25
+ import { unwrapGoogleAmpViewer } from "./unwraps/googleAmpViewer.js";
26
+ import { unwrapGoogleNews } from "./unwraps/googleNews.js";
27
+ import { unwrapGoogleNewsModern } from "./unwraps/googleNewsModern.js";
28
+ import { unwrapGoogleScholar } from "./unwraps/googleScholar.js";
29
+ import { unwrapInstagramShim } from "./unwraps/instagram.js";
30
+ import { unwrapRedditOut } from "./unwraps/redditOut.js";
31
+ import { unwrapVkAway } from "./unwraps/vkAway.js";
32
+ import { unwrapYahooSearch } from "./unwraps/yahooSearch.js";
33
+ import { unwrapYouTube } from "./unwraps/youtube.js";
34
+ import { resolveUrl } from "feedcanon";
35
+ //#region src/defaults.ts
36
+ const defaultStringTransforms = [
37
+ stripOrphanedClosingTags,
38
+ decodeDoubleEncodedTags,
39
+ unwrapWrappers,
40
+ paragraphizePlainText,
41
+ stripEmptyTags
42
+ ];
43
+ const defaultDomTransforms = [
44
+ stripComments,
45
+ fixLazyImages,
46
+ resolveRelativeUrls,
47
+ unwrapRedirectUrls,
48
+ stripTrackingParams,
49
+ removeTrackingPixels,
50
+ stripInterBlockBreaks,
51
+ stripParagraphBoundaryBreaks,
52
+ highlightCode,
53
+ mergeConsecutiveOneLinerPres,
54
+ replacePreLineBreaks,
55
+ trimPreWhitespace,
56
+ linkifyUrls,
57
+ replaceEmbedsWithPlaceholders,
58
+ injectEnclosureEmbedPlaceholders
59
+ ];
60
+ const defaultFinalStringTransforms = [stripEmptyTags];
61
+ const defaultEmbedResolvers = [youtubeEmbedResolver];
62
+ const defaultResolveUrlFn = (url, baseUrl) => resolveUrl(url, baseUrl);
63
+ const defaultLazySrcAttributes = [
64
+ "data-src",
65
+ "data-original",
66
+ "data-lazy-src",
67
+ "data-url",
68
+ "data-image",
69
+ "data-orig-file",
70
+ "data-large-file",
71
+ "data-medium-file",
72
+ "data-thumb",
73
+ "data-thumb-src",
74
+ "data-original-src",
75
+ "data-image-src",
76
+ "data-canonical-src",
77
+ "data-img-url",
78
+ "data-orig",
79
+ "data-runner-src"
80
+ ];
81
+ const defaultTrackingHosts = [
82
+ "feedsportal.com",
83
+ "stats.wordpress.com",
84
+ "pixel.wp.com",
85
+ "doubleclick.net",
86
+ "google-analytics.com",
87
+ "list-manage.com",
88
+ "feedburner.com",
89
+ "feedproxy.google.com",
90
+ "feedblitz.com",
91
+ "mailerlite.com",
92
+ "convertkit-mail.com",
93
+ "beehiiv.com",
94
+ "email.medium.com",
95
+ "stat-c.medium.com",
96
+ "googlesyndication.com",
97
+ "googletagmanager.com",
98
+ "amazon-adsystem.com",
99
+ "taboola.com",
100
+ "outbrain.com",
101
+ "scorecardresearch.com",
102
+ "quantserve.com",
103
+ "chartbeat.com",
104
+ "moatads.com",
105
+ "sentry.io"
106
+ ];
107
+ const defaultTrackingPathSegments = [
108
+ "pixel",
109
+ "beacon",
110
+ "count"
111
+ ];
112
+ const defaultUrlUnwrappers = [
113
+ unwrapBing,
114
+ unwrapGoogle,
115
+ unwrapGoogleNews,
116
+ unwrapGoogleNewsModern,
117
+ unwrapGoogleScholar,
118
+ unwrapGoogleAmpViewer,
119
+ unwrapYahooSearch,
120
+ unwrapYouTube,
121
+ unwrapFacebookShim,
122
+ unwrapInstagramShim,
123
+ unwrapVkAway,
124
+ unwrapRedditOut
125
+ ];
126
+ //#endregion
127
+ export { defaultDomTransforms, defaultEmbedResolvers, defaultFinalStringTransforms, defaultLazySrcAttributes, defaultResolveUrlFn, defaultStringTransforms, defaultTrackingHosts, defaultTrackingPathSegments, defaultUrlUnwrappers };
@@ -0,0 +1,9 @@
1
+ import { EmbedResolver, EmbedResolverResult } from "../types.js";
2
+
3
+ //#region src/embeds/youtube.d.ts
4
+ declare const composeThumbnailUrl: (videoId: string) => string;
5
+ declare const extractVideoId: (link: string) => string | undefined;
6
+ declare const youtubeResolveEmbed: (url: string) => EmbedResolverResult | undefined;
7
+ declare const youtubeEmbedResolver: EmbedResolver;
8
+ //#endregion
9
+ export { composeThumbnailUrl, extractVideoId, youtubeEmbedResolver, youtubeResolveEmbed };
@@ -0,0 +1,50 @@
1
+ import { isHostOf, isSubdomainOf } from "feedscout/utils";
2
+ //#region src/embeds/youtube.ts
3
+ const safeVideoIdRegex = /^[a-zA-Z0-9_-]{11}$/;
4
+ const pathIdSegments = [
5
+ "shorts",
6
+ "embed",
7
+ "live",
8
+ "v"
9
+ ];
10
+ const youtubeHosts = [
11
+ "youtube.com",
12
+ "youtube-nocookie.com",
13
+ "youtu.be"
14
+ ];
15
+ const composeThumbnailUrl = (videoId) => {
16
+ return `https://i.ytimg.com/vi/${videoId}/hqdefault.jpg`;
17
+ };
18
+ const extractVideoId = (link) => {
19
+ try {
20
+ const { hostname, pathname, searchParams } = new URL(link);
21
+ const segments = pathname.split("/").filter(Boolean);
22
+ const isShortDomain = hostname === "youtu.be" || hostname.endsWith(".youtu.be");
23
+ let id;
24
+ if (isShortDomain) id = segments[0];
25
+ else if (segments[0] === "watch") id = searchParams.get("v") ?? searchParams.get("vi");
26
+ else if (segments.length >= 2 && pathIdSegments.includes(segments[0])) id = segments[1];
27
+ if (id && safeVideoIdRegex.test(id)) return id;
28
+ } catch {}
29
+ };
30
+ const youtubeResolveEmbed = (url) => {
31
+ const videoId = extractVideoId(url);
32
+ if (!videoId) return;
33
+ return {
34
+ provider: "youtube",
35
+ src: `https://www.youtube-nocookie.com/embed/${videoId}`,
36
+ url: `https://www.youtube.com/watch?v=${videoId}`,
37
+ thumbnail: composeThumbnailUrl(videoId),
38
+ type: "iframe"
39
+ };
40
+ };
41
+ const youtubeEmbedResolver = {
42
+ selector: "iframe[src]",
43
+ extract: (element) => {
44
+ const src = element.getAttribute("src") ?? "";
45
+ if (!isHostOf(src, youtubeHosts) && !isSubdomainOf(src, youtubeHosts)) return;
46
+ return youtubeResolveEmbed(src);
47
+ }
48
+ };
49
+ //#endregion
50
+ export { composeThumbnailUrl, extractVideoId, youtubeEmbedResolver, youtubeResolveEmbed };
@@ -0,0 +1,105 @@
1
+ import { DomTransform, EmbedResolver, EmbedResolverResult, Enclosure, ResolveUrlFn, StringTransform, TransformContentOptions, TransformContext } from "./types.js";
2
+ import { defaultResolveUrlFn } from "./defaults.js";
3
+ import { applyDomTransforms, applyStringTransforms, createEmbedPlaceholder, parseFragment, stripOversizedBase64Sources, transformHtml } from "./common.js";
4
+ import { composeThumbnailUrl, extractVideoId, youtubeEmbedResolver, youtubeResolveEmbed } from "./embeds/youtube.js";
5
+ import { fixLazyImages } from "./transforms/dom/fixLazyImages.js";
6
+ import { detectLanguage, highlightCode } from "./transforms/dom/highlightCode.js";
7
+ import { injectEnclosureEmbedPlaceholders } from "./transforms/dom/injectEnclosureEmbedPlaceholders.js";
8
+ import { linkifyUrls } from "./transforms/dom/linkifyUrls.js";
9
+ import { mergeConsecutiveOneLinerPres } from "./transforms/dom/mergeConsecutiveOneLinerPres.js";
10
+ import { removeTrackingPixels } from "./transforms/dom/removeTrackingPixels.js";
11
+ import { replaceEmbedsWithPlaceholders } from "./transforms/dom/replaceEmbedsWithPlaceholders.js";
12
+ import { replacePreLineBreaks } from "./transforms/dom/replacePreLineBreaks.js";
13
+ import { resolveRelativeUrls } from "./transforms/dom/resolveRelativeUrls.js";
14
+ import { simplifyFigures } from "./transforms/dom/simplifyFigures.js";
15
+ import { stripComments } from "./transforms/dom/stripComments.js";
16
+ import { stripInterBlockBreaks } from "./transforms/dom/stripInterBlockBreaks.js";
17
+ import { stripParagraphBoundaryBreaks } from "./transforms/dom/stripParagraphBoundaryBreaks.js";
18
+ import { stripTrackingParams } from "./transforms/dom/stripTrackingParams.js";
19
+ import { trimPreWhitespace } from "./transforms/dom/trimPreWhitespace.js";
20
+ import { extractRedirectTarget, unwrapRedirectUrls } from "./transforms/dom/unwrapRedirectUrls.js";
21
+ import { decodeDoubleEncodedTags } from "./transforms/string/decodeDoubleEncodedTags.js";
22
+ import { paragraphizePlainText } from "./transforms/string/paragraphizePlainText.js";
23
+ import { stripEmptyTags } from "./transforms/string/stripEmptyTags.js";
24
+ import { stripOrphanedClosingTags } from "./transforms/string/stripOrphanedClosingTags.js";
25
+ import { unwrapWrappers } from "./transforms/string/unwrapWrappers.js";
26
+ import { unwrapAceml } from "./unwraps/aceml.js";
27
+ import { unwrapAdjust } from "./unwraps/adjust.js";
28
+ import { unwrapAmazonAffiliate } from "./unwraps/amazonAffiliate.js";
29
+ import { unwrapAmpCache } from "./unwraps/ampCache.js";
30
+ import { unwrapAwin } from "./unwraps/awin.js";
31
+ import { unwrapBing } from "./unwraps/bing.js";
32
+ import { unwrapCjNetwork } from "./unwraps/cjNetwork.js";
33
+ import { unwrapDigidip } from "./unwraps/digidip.js";
34
+ import { unwrapDisqus } from "./unwraps/disqus.js";
35
+ import { unwrapDouban } from "./unwraps/douban.js";
36
+ import { unwrapDuckduckgo } from "./unwraps/duckduckgo.js";
37
+ import { unwrapEbayRover } from "./unwraps/ebayRover.js";
38
+ import { unwrapEffiliation } from "./unwraps/effiliation.js";
39
+ import { unwrapEmbedly } from "./unwraps/embedly.js";
40
+ import { unwrapFacebookShim } from "./unwraps/facebook.js";
41
+ import { unwrapFeedsportal } from "./unwraps/feedsportal.js";
42
+ import { unwrapFirebaseDynamicLinks } from "./unwraps/firebaseDynamicLinks.js";
43
+ import { unwrapFlipboard } from "./unwraps/flipboard.js";
44
+ import { unwrapGateSc } from "./unwraps/gateSc.js";
45
+ import { unwrapGeoriot } from "./unwraps/georiot.js";
46
+ import { unwrapGitee } from "./unwraps/gitee.js";
47
+ import { unwrapGoogle } from "./unwraps/google.js";
48
+ import { unwrapGoogleAmpViewer } from "./unwraps/googleAmpViewer.js";
49
+ import { unwrapGoogleNews } from "./unwraps/googleNews.js";
50
+ import { unwrapGoogleNewsModern } from "./unwraps/googleNewsModern.js";
51
+ import { unwrapGoogleScholar } from "./unwraps/googleScholar.js";
52
+ import { unwrapGoogleTranslate } from "./unwraps/googleTranslate.js";
53
+ import { unwrapHashnode } from "./unwraps/hashnode.js";
54
+ import { unwrapIcptrack } from "./unwraps/icptrack.js";
55
+ import { unwrapIdealoPartner } from "./unwraps/idealoPartner.js";
56
+ import { unwrapInstagramShim } from "./unwraps/instagram.js";
57
+ import { unwrapJianshuGo } from "./unwraps/jianshuGo.js";
58
+ import { unwrapJuejin } from "./unwraps/juejin.js";
59
+ import { unwrapLeverAnalytics } from "./unwraps/leverAnalytics.js";
60
+ import { unwrapLinksynergy } from "./unwraps/linksynergy.js";
61
+ import { unwrapMailchimp } from "./unwraps/mailchimp.js";
62
+ import { unwrapMailpanion } from "./unwraps/mailpanion.js";
63
+ import { unwrapMailpgn } from "./unwraps/mailpgn.js";
64
+ import { unwrapMailtrack } from "./unwraps/mailtrack.js";
65
+ import { unwrapMedium } from "./unwraps/medium.js";
66
+ import { unwrapMimecast } from "./unwraps/mimecast.js";
67
+ import { unwrapMozillaOutgoing } from "./unwraps/mozillaOutgoing.js";
68
+ import { unwrapNarrativ } from "./unwraps/narrativ.js";
69
+ import { unwrapNicoMs } from "./unwraps/nicoMs.js";
70
+ import { unwrapOutlookSafelinks } from "./unwraps/outlookSafelinks.js";
71
+ import { unwrapPartnerAds } from "./unwraps/partnerAds.js";
72
+ import { unwrapPocket } from "./unwraps/pocket.js";
73
+ import { unwrapPostmark } from "./unwraps/postmark.js";
74
+ import { unwrapProofpointV1 } from "./unwraps/proofpointV1.js";
75
+ import { unwrapProofpointV2 } from "./unwraps/proofpointV2.js";
76
+ import { unwrapProofpointV3 } from "./unwraps/proofpointV3.js";
77
+ import { unwrapPxf } from "./unwraps/pxf.js";
78
+ import { unwrapRecruitics } from "./unwraps/recruitics.js";
79
+ import { unwrapRedditOut } from "./unwraps/redditOut.js";
80
+ import { unwrapRedirectingat } from "./unwraps/redirectingat.js";
81
+ import { unwrapSegmentfault } from "./unwraps/segmentfault.js";
82
+ import { unwrapShareasale } from "./unwraps/shareasale.js";
83
+ import { unwrapSjv } from "./unwraps/sjv.js";
84
+ import { unwrapSkimlinks } from "./unwraps/skimlinks.js";
85
+ import { unwrapSlack } from "./unwraps/slack.js";
86
+ import { unwrapSmartredirect } from "./unwraps/smartredirect.js";
87
+ import { unwrapSspai } from "./unwraps/sspai.js";
88
+ import { unwrapSteamLinkfilter } from "./unwraps/steamLinkfilter.js";
89
+ import { unwrapTelegramIv } from "./unwraps/telegramIv.js";
90
+ import { unwrapTradedoubler } from "./unwraps/tradedoubler.js";
91
+ import { unwrapTumblr } from "./unwraps/tumblr.js";
92
+ import { unwrapValuecommerce } from "./unwraps/valuecommerce.js";
93
+ import { unwrapViglink } from "./unwraps/viglink.js";
94
+ import { unwrapVkAway } from "./unwraps/vkAway.js";
95
+ import { unwrapWebArchive } from "./unwraps/webArchive.js";
96
+ import { unwrapYahooSearch } from "./unwraps/yahooSearch.js";
97
+ import { unwrapYandexTurbo } from "./unwraps/yandexTurbo.js";
98
+ import { unwrapYouTube } from "./unwraps/youtube.js";
99
+ import { unwrapZhihu } from "./unwraps/zhihu.js";
100
+ import { ParamExtractorConfig, chooseBaseUrl, coerceNumber, createParamExtractor } from "./utils.js";
101
+
102
+ //#region src/index.d.ts
103
+ declare const transformContent: (html: string, options?: TransformContentOptions) => string;
104
+ //#endregion
105
+ export { type DomTransform, type EmbedResolver, type EmbedResolverResult, type Enclosure, type ParamExtractorConfig, type ResolveUrlFn, type StringTransform, type TransformContentOptions, type TransformContext, applyDomTransforms, applyStringTransforms, chooseBaseUrl, coerceNumber, composeThumbnailUrl, createEmbedPlaceholder, createParamExtractor, decodeDoubleEncodedTags, defaultResolveUrlFn, detectLanguage, extractRedirectTarget, extractVideoId, fixLazyImages, highlightCode, injectEnclosureEmbedPlaceholders, linkifyUrls, mergeConsecutiveOneLinerPres, paragraphizePlainText, parseFragment, removeTrackingPixels, replaceEmbedsWithPlaceholders, replacePreLineBreaks, resolveRelativeUrls, simplifyFigures, stripComments, stripEmptyTags, stripInterBlockBreaks, stripOrphanedClosingTags, stripOversizedBase64Sources, stripParagraphBoundaryBreaks, stripTrackingParams, transformContent, transformHtml, trimPreWhitespace, unwrapAceml, unwrapAdjust, unwrapAmazonAffiliate, unwrapAmpCache, unwrapAwin, unwrapBing, unwrapCjNetwork, unwrapDigidip, unwrapDisqus, unwrapDouban, unwrapDuckduckgo, unwrapEbayRover, unwrapEffiliation, unwrapEmbedly, unwrapFacebookShim, unwrapFeedsportal, unwrapFirebaseDynamicLinks, unwrapFlipboard, unwrapGateSc, unwrapGeoriot, unwrapGitee, unwrapGoogle, unwrapGoogleAmpViewer, unwrapGoogleNews, unwrapGoogleNewsModern, unwrapGoogleScholar, unwrapGoogleTranslate, unwrapHashnode, unwrapIcptrack, unwrapIdealoPartner, unwrapInstagramShim, unwrapJianshuGo, unwrapJuejin, unwrapLeverAnalytics, unwrapLinksynergy, unwrapMailchimp, unwrapMailpanion, unwrapMailpgn, unwrapMailtrack, unwrapMedium, unwrapMimecast, unwrapMozillaOutgoing, unwrapNarrativ, unwrapNicoMs, unwrapOutlookSafelinks, unwrapPartnerAds, unwrapPocket, unwrapPostmark, unwrapProofpointV1, unwrapProofpointV2, unwrapProofpointV3, unwrapPxf, unwrapRecruitics, unwrapRedditOut, unwrapRedirectUrls, unwrapRedirectingat, unwrapSegmentfault, unwrapShareasale, unwrapSjv, unwrapSkimlinks, unwrapSlack, unwrapSmartredirect, unwrapSspai, unwrapSteamLinkfilter, unwrapTelegramIv, unwrapTradedoubler, unwrapTumblr, unwrapValuecommerce, unwrapViglink, unwrapVkAway, unwrapWebArchive, unwrapWrappers, unwrapYahooSearch, unwrapYandexTurbo, unwrapYouTube, unwrapZhihu, youtubeEmbedResolver, youtubeResolveEmbed };