feedsweep 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +70 -0
- package/dist/common.d.ts +11 -0
- package/dist/common.js +116 -0
- package/dist/defaults.d.ts +14 -0
- package/dist/defaults.js +127 -0
- package/dist/embeds/youtube.d.ts +9 -0
- package/dist/embeds/youtube.js +50 -0
- package/dist/index.d.ts +105 -0
- package/dist/index.js +118 -0
- package/dist/transforms/dom/fixLazyImages.d.ts +6 -0
- package/dist/transforms/dom/fixLazyImages.js +38 -0
- package/dist/transforms/dom/highlightCode.d.ts +7 -0
- package/dist/transforms/dom/highlightCode.js +30 -0
- package/dist/transforms/dom/injectEnclosureEmbedPlaceholders.d.ts +6 -0
- package/dist/transforms/dom/injectEnclosureEmbedPlaceholders.js +33 -0
- package/dist/transforms/dom/linkifyUrls.d.ts +6 -0
- package/dist/transforms/dom/linkifyUrls.js +45 -0
- package/dist/transforms/dom/mergeConsecutiveOneLinerPres.d.ts +6 -0
- package/dist/transforms/dom/mergeConsecutiveOneLinerPres.js +33 -0
- package/dist/transforms/dom/removeTrackingPixels.d.ts +6 -0
- package/dist/transforms/dom/removeTrackingPixels.js +59 -0
- package/dist/transforms/dom/replaceEmbedsWithPlaceholders.d.ts +6 -0
- package/dist/transforms/dom/replaceEmbedsWithPlaceholders.js +36 -0
- package/dist/transforms/dom/replacePreLineBreaks.d.ts +6 -0
- package/dist/transforms/dom/replacePreLineBreaks.js +13 -0
- package/dist/transforms/dom/resolveRelativeUrls.d.ts +6 -0
- package/dist/transforms/dom/resolveRelativeUrls.js +42 -0
- package/dist/transforms/dom/simplifyFigures.d.ts +6 -0
- package/dist/transforms/dom/simplifyFigures.js +27 -0
- package/dist/transforms/dom/stripComments.d.ts +6 -0
- package/dist/transforms/dom/stripComments.js +23 -0
- package/dist/transforms/dom/stripInterBlockBreaks.d.ts +6 -0
- package/dist/transforms/dom/stripInterBlockBreaks.js +18 -0
- package/dist/transforms/dom/stripParagraphBoundaryBreaks.d.ts +6 -0
- package/dist/transforms/dom/stripParagraphBoundaryBreaks.js +25 -0
- package/dist/transforms/dom/stripTrackingParams.d.ts +6 -0
- package/dist/transforms/dom/stripTrackingParams.js +22 -0
- package/dist/transforms/dom/trimPreWhitespace.d.ts +6 -0
- package/dist/transforms/dom/trimPreWhitespace.js +20 -0
- package/dist/transforms/dom/unwrapRedirectUrls.d.ts +7 -0
- package/dist/transforms/dom/unwrapRedirectUrls.js +28 -0
- package/dist/transforms/string/decodeDoubleEncodedTags.d.ts +6 -0
- package/dist/transforms/string/decodeDoubleEncodedTags.js +23 -0
- package/dist/transforms/string/paragraphizePlainText.d.ts +6 -0
- package/dist/transforms/string/paragraphizePlainText.js +10 -0
- package/dist/transforms/string/stripEmptyTags.d.ts +6 -0
- package/dist/transforms/string/stripEmptyTags.js +25 -0
- package/dist/transforms/string/stripOrphanedClosingTags.d.ts +6 -0
- package/dist/transforms/string/stripOrphanedClosingTags.js +28 -0
- package/dist/transforms/string/unwrapWrappers.d.ts +6 -0
- package/dist/transforms/string/unwrapWrappers.js +10 -0
- package/dist/types.d.ts +52 -0
- package/dist/unwraps/aceml.d.ts +6 -0
- package/dist/unwraps/aceml.js +17 -0
- package/dist/unwraps/adjust.d.ts +6 -0
- package/dist/unwraps/adjust.js +9 -0
- package/dist/unwraps/amazonAffiliate.d.ts +6 -0
- package/dist/unwraps/amazonAffiliate.js +9 -0
- package/dist/unwraps/ampCache.d.ts +6 -0
- package/dist/unwraps/ampCache.js +13 -0
- package/dist/unwraps/awin.d.ts +6 -0
- package/dist/unwraps/awin.js +9 -0
- package/dist/unwraps/bing.d.ts +6 -0
- package/dist/unwraps/bing.js +15 -0
- package/dist/unwraps/cjNetwork.d.ts +6 -0
- package/dist/unwraps/cjNetwork.js +17 -0
- package/dist/unwraps/digidip.d.ts +6 -0
- package/dist/unwraps/digidip.js +8 -0
- package/dist/unwraps/disqus.d.ts +6 -0
- package/dist/unwraps/disqus.js +8 -0
- package/dist/unwraps/douban.d.ts +6 -0
- package/dist/unwraps/douban.js +9 -0
- package/dist/unwraps/duckduckgo.d.ts +6 -0
- package/dist/unwraps/duckduckgo.js +9 -0
- package/dist/unwraps/ebayRover.d.ts +6 -0
- package/dist/unwraps/ebayRover.js +8 -0
- package/dist/unwraps/effiliation.d.ts +6 -0
- package/dist/unwraps/effiliation.js +8 -0
- package/dist/unwraps/embedly.d.ts +6 -0
- package/dist/unwraps/embedly.js +8 -0
- package/dist/unwraps/facebook.d.ts +6 -0
- package/dist/unwraps/facebook.js +9 -0
- package/dist/unwraps/feedsportal.d.ts +6 -0
- package/dist/unwraps/feedsportal.js +44 -0
- package/dist/unwraps/firebaseDynamicLinks.d.ts +6 -0
- package/dist/unwraps/firebaseDynamicLinks.js +8 -0
- package/dist/unwraps/flipboard.d.ts +6 -0
- package/dist/unwraps/flipboard.js +9 -0
- package/dist/unwraps/gateSc.d.ts +6 -0
- package/dist/unwraps/gateSc.js +8 -0
- package/dist/unwraps/georiot.d.ts +6 -0
- package/dist/unwraps/georiot.js +8 -0
- package/dist/unwraps/gitee.d.ts +6 -0
- package/dist/unwraps/gitee.js +9 -0
- package/dist/unwraps/google.d.ts +6 -0
- package/dist/unwraps/google.js +8 -0
- package/dist/unwraps/googleAmpViewer.d.ts +6 -0
- package/dist/unwraps/googleAmpViewer.js +13 -0
- package/dist/unwraps/googleNews.d.ts +6 -0
- package/dist/unwraps/googleNews.js +8 -0
- package/dist/unwraps/googleNewsModern.d.ts +6 -0
- package/dist/unwraps/googleNewsModern.js +11 -0
- package/dist/unwraps/googleScholar.d.ts +6 -0
- package/dist/unwraps/googleScholar.js +8 -0
- package/dist/unwraps/googleTranslate.d.ts +6 -0
- package/dist/unwraps/googleTranslate.js +8 -0
- package/dist/unwraps/hashnode.d.ts +6 -0
- package/dist/unwraps/hashnode.js +9 -0
- package/dist/unwraps/icptrack.d.ts +6 -0
- package/dist/unwraps/icptrack.js +9 -0
- package/dist/unwraps/idealoPartner.d.ts +6 -0
- package/dist/unwraps/idealoPartner.js +8 -0
- package/dist/unwraps/instagram.d.ts +6 -0
- package/dist/unwraps/instagram.js +8 -0
- package/dist/unwraps/jianshuGo.d.ts +6 -0
- package/dist/unwraps/jianshuGo.js +9 -0
- package/dist/unwraps/juejin.d.ts +6 -0
- package/dist/unwraps/juejin.js +8 -0
- package/dist/unwraps/leverAnalytics.d.ts +6 -0
- package/dist/unwraps/leverAnalytics.js +8 -0
- package/dist/unwraps/linksynergy.d.ts +6 -0
- package/dist/unwraps/linksynergy.js +9 -0
- package/dist/unwraps/mailchimp.d.ts +6 -0
- package/dist/unwraps/mailchimp.js +9 -0
- package/dist/unwraps/mailpanion.d.ts +6 -0
- package/dist/unwraps/mailpanion.js +8 -0
- package/dist/unwraps/mailpgn.d.ts +6 -0
- package/dist/unwraps/mailpgn.js +8 -0
- package/dist/unwraps/mailtrack.d.ts +6 -0
- package/dist/unwraps/mailtrack.js +8 -0
- package/dist/unwraps/medium.d.ts +6 -0
- package/dist/unwraps/medium.js +9 -0
- package/dist/unwraps/mimecast.d.ts +6 -0
- package/dist/unwraps/mimecast.js +11 -0
- package/dist/unwraps/mozillaOutgoing.d.ts +6 -0
- package/dist/unwraps/mozillaOutgoing.js +13 -0
- package/dist/unwraps/narrativ.d.ts +6 -0
- package/dist/unwraps/narrativ.js +8 -0
- package/dist/unwraps/nicoMs.d.ts +6 -0
- package/dist/unwraps/nicoMs.js +12 -0
- package/dist/unwraps/outlookSafelinks.d.ts +6 -0
- package/dist/unwraps/outlookSafelinks.js +8 -0
- package/dist/unwraps/partnerAds.d.ts +6 -0
- package/dist/unwraps/partnerAds.js +8 -0
- package/dist/unwraps/pocket.d.ts +6 -0
- package/dist/unwraps/pocket.js +9 -0
- package/dist/unwraps/postmark.d.ts +6 -0
- package/dist/unwraps/postmark.js +12 -0
- package/dist/unwraps/proofpointV1.d.ts +6 -0
- package/dist/unwraps/proofpointV1.js +16 -0
- package/dist/unwraps/proofpointV2.d.ts +6 -0
- package/dist/unwraps/proofpointV2.js +16 -0
- package/dist/unwraps/proofpointV3.d.ts +6 -0
- package/dist/unwraps/proofpointV3.js +78 -0
- package/dist/unwraps/pxf.d.ts +6 -0
- package/dist/unwraps/pxf.js +8 -0
- package/dist/unwraps/recruitics.d.ts +6 -0
- package/dist/unwraps/recruitics.js +8 -0
- package/dist/unwraps/redditOut.d.ts +6 -0
- package/dist/unwraps/redditOut.js +8 -0
- package/dist/unwraps/redirectingat.d.ts +6 -0
- package/dist/unwraps/redirectingat.js +8 -0
- package/dist/unwraps/segmentfault.d.ts +6 -0
- package/dist/unwraps/segmentfault.js +16 -0
- package/dist/unwraps/shareasale.d.ts +6 -0
- package/dist/unwraps/shareasale.js +9 -0
- package/dist/unwraps/sjv.d.ts +6 -0
- package/dist/unwraps/sjv.js +8 -0
- package/dist/unwraps/skimlinks.d.ts +6 -0
- package/dist/unwraps/skimlinks.js +8 -0
- package/dist/unwraps/slack.d.ts +6 -0
- package/dist/unwraps/slack.js +9 -0
- package/dist/unwraps/smartredirect.d.ts +6 -0
- package/dist/unwraps/smartredirect.js +8 -0
- package/dist/unwraps/sspai.d.ts +6 -0
- package/dist/unwraps/sspai.js +9 -0
- package/dist/unwraps/steamLinkfilter.d.ts +6 -0
- package/dist/unwraps/steamLinkfilter.js +9 -0
- package/dist/unwraps/telegramIv.d.ts +6 -0
- package/dist/unwraps/telegramIv.js +9 -0
- package/dist/unwraps/tradedoubler.d.ts +6 -0
- package/dist/unwraps/tradedoubler.js +9 -0
- package/dist/unwraps/tumblr.d.ts +6 -0
- package/dist/unwraps/tumblr.js +9 -0
- package/dist/unwraps/valuecommerce.d.ts +6 -0
- package/dist/unwraps/valuecommerce.js +9 -0
- package/dist/unwraps/viglink.d.ts +6 -0
- package/dist/unwraps/viglink.js +8 -0
- package/dist/unwraps/vkAway.d.ts +6 -0
- package/dist/unwraps/vkAway.js +9 -0
- package/dist/unwraps/webArchive.d.ts +6 -0
- package/dist/unwraps/webArchive.js +12 -0
- package/dist/unwraps/yahooSearch.d.ts +6 -0
- package/dist/unwraps/yahooSearch.js +12 -0
- package/dist/unwraps/yandexTurbo.d.ts +6 -0
- package/dist/unwraps/yandexTurbo.js +12 -0
- package/dist/unwraps/youtube.d.ts +6 -0
- package/dist/unwraps/youtube.js +9 -0
- package/dist/unwraps/zhihu.d.ts +6 -0
- package/dist/unwraps/zhihu.js +8 -0
- package/dist/utils.d.ts +13 -0
- package/dist/utils.js +31 -0
- package/package.json +58 -0
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
import { isBr, isSkippable } from "../../common.js";
|
|
2
|
+
//#region src/transforms/dom/stripParagraphBoundaryBreaks.ts
|
|
3
|
+
const stripParagraphBoundaryBreaks = () => {
|
|
4
|
+
return (document) => {
|
|
5
|
+
const paragraphs = document.querySelectorAll("p");
|
|
6
|
+
for (const paragraph of paragraphs) {
|
|
7
|
+
const leading = [];
|
|
8
|
+
let cursor = paragraph.firstChild;
|
|
9
|
+
while (cursor && isSkippable(cursor)) {
|
|
10
|
+
leading.push(cursor);
|
|
11
|
+
cursor = cursor.nextSibling;
|
|
12
|
+
}
|
|
13
|
+
if (leading.some(isBr)) for (const node of leading) node.remove();
|
|
14
|
+
const trailing = [];
|
|
15
|
+
cursor = paragraph.lastChild;
|
|
16
|
+
while (cursor && isSkippable(cursor)) {
|
|
17
|
+
trailing.push(cursor);
|
|
18
|
+
cursor = cursor.previousSibling;
|
|
19
|
+
}
|
|
20
|
+
if (trailing.some(isBr)) for (const node of trailing) node.remove();
|
|
21
|
+
}
|
|
22
|
+
};
|
|
23
|
+
};
|
|
24
|
+
//#endregion
|
|
25
|
+
export { stripParagraphBoundaryBreaks };
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
import { defaultStrippedParams } from "feedcanon";
|
|
2
|
+
//#region src/transforms/dom/stripTrackingParams.ts
|
|
3
|
+
const stripTrackingParams = () => {
|
|
4
|
+
return (document) => {
|
|
5
|
+
const anchors = document.querySelectorAll("a[href]");
|
|
6
|
+
for (const anchor of anchors) {
|
|
7
|
+
const href = anchor.getAttribute("href");
|
|
8
|
+
if (!href) continue;
|
|
9
|
+
try {
|
|
10
|
+
const url = new URL(href);
|
|
11
|
+
let changed = false;
|
|
12
|
+
for (const param of defaultStrippedParams) if (url.searchParams.has(param)) {
|
|
13
|
+
url.searchParams.delete(param);
|
|
14
|
+
changed = true;
|
|
15
|
+
}
|
|
16
|
+
if (changed) anchor.setAttribute("href", url.toString());
|
|
17
|
+
} catch {}
|
|
18
|
+
}
|
|
19
|
+
};
|
|
20
|
+
};
|
|
21
|
+
//#endregion
|
|
22
|
+
export { stripTrackingParams };
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
//#region src/transforms/dom/trimPreWhitespace.ts
|
|
2
|
+
const trailingWhitespaceRegex = /\s+$/;
|
|
3
|
+
const leadingBlankLinesRegex = /^(\s*\n)+/;
|
|
4
|
+
const leadingIndentRegex = /^([^\S\n]+)/;
|
|
5
|
+
const trimPreWhitespace = () => {
|
|
6
|
+
return (document) => {
|
|
7
|
+
const pres = document.querySelectorAll("pre");
|
|
8
|
+
for (const pre of pres) {
|
|
9
|
+
const target = pre.querySelector("code") ?? pre;
|
|
10
|
+
const trimmed = target.innerHTML.replace(trailingWhitespaceRegex, "").replace(leadingBlankLinesRegex, "");
|
|
11
|
+
const lines = trimmed.split("\n");
|
|
12
|
+
const indents = lines.filter((line) => line.trim().length > 0).map((line) => line.match(leadingIndentRegex)?.[1].length ?? 0);
|
|
13
|
+
const common = Math.min(...indents);
|
|
14
|
+
if (common > 0) target.innerHTML = lines.map((line) => line.slice(common)).join("\n");
|
|
15
|
+
else target.innerHTML = trimmed;
|
|
16
|
+
}
|
|
17
|
+
};
|
|
18
|
+
};
|
|
19
|
+
//#endregion
|
|
20
|
+
export { trimPreWhitespace };
|
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
import { DomTransform, UrlUnwrapper } from "../../types.js";
|
|
2
|
+
|
|
3
|
+
//#region src/transforms/dom/unwrapRedirectUrls.d.ts
|
|
4
|
+
declare const extractRedirectTarget: (url: URL, extractors: ReadonlyArray<UrlUnwrapper>) => string | undefined;
|
|
5
|
+
declare const unwrapRedirectUrls: DomTransform;
|
|
6
|
+
//#endregion
|
|
7
|
+
export { extractRedirectTarget, unwrapRedirectUrls };
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
//#region src/transforms/dom/unwrapRedirectUrls.ts
|
|
2
|
+
const extractRedirectTarget = (url, extractors) => {
|
|
3
|
+
for (const extractor of extractors) {
|
|
4
|
+
const target = extractor(url);
|
|
5
|
+
if (target) return target;
|
|
6
|
+
}
|
|
7
|
+
};
|
|
8
|
+
const unwrapRedirectUrls = (context) => {
|
|
9
|
+
return (document) => {
|
|
10
|
+
const anchors = document.querySelectorAll("a[href]");
|
|
11
|
+
for (const anchor of anchors) {
|
|
12
|
+
const href = anchor.getAttribute("href");
|
|
13
|
+
if (!href) continue;
|
|
14
|
+
try {
|
|
15
|
+
const url = new URL(href);
|
|
16
|
+
for (const extractor of context.urlUnwrappers) {
|
|
17
|
+
const target = extractor(url);
|
|
18
|
+
if (target) {
|
|
19
|
+
anchor.setAttribute("href", target);
|
|
20
|
+
break;
|
|
21
|
+
}
|
|
22
|
+
}
|
|
23
|
+
} catch {}
|
|
24
|
+
}
|
|
25
|
+
};
|
|
26
|
+
};
|
|
27
|
+
//#endregion
|
|
28
|
+
export { extractRedirectTarget, unwrapRedirectUrls };
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
//#region src/transforms/string/decodeDoubleEncodedTags.ts
|
|
2
|
+
const hasHtmlRegex = /<[a-z][a-z0-9]*[\s>]/i;
|
|
3
|
+
const encodedTagRegex = /<(\/?)([a-zA-Z][\w-]*)((?:[^&]|&(?!gt;))*)>/g;
|
|
4
|
+
const hasEncodedTagRegex = /<[a-zA-Z/]/;
|
|
5
|
+
const codeBlockRegex = /<(code|pre)(\s[^>]*)?>[\s\S]*?<\/\1>/gi;
|
|
6
|
+
const decodeDoubleEncodedTags = () => {
|
|
7
|
+
return (html) => {
|
|
8
|
+
if (!hasHtmlRegex.test(html) || !hasEncodedTagRegex.test(html)) return html;
|
|
9
|
+
let result = "";
|
|
10
|
+
let lastIndex = 0;
|
|
11
|
+
for (const match of html.matchAll(codeBlockRegex)) {
|
|
12
|
+
const matchStart = match.index;
|
|
13
|
+
const matchEnd = matchStart + match[0].length;
|
|
14
|
+
result += html.slice(lastIndex, matchStart).replace(encodedTagRegex, "<$1$2$3>");
|
|
15
|
+
result += match[0];
|
|
16
|
+
lastIndex = matchEnd;
|
|
17
|
+
}
|
|
18
|
+
result += html.slice(lastIndex).replace(encodedTagRegex, "<$1$2$3>");
|
|
19
|
+
return result;
|
|
20
|
+
};
|
|
21
|
+
};
|
|
22
|
+
//#endregion
|
|
23
|
+
export { decodeDoubleEncodedTags };
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
import { autop } from "@wordpress/autop";
|
|
2
|
+
//#region src/transforms/string/paragraphizePlainText.ts
|
|
3
|
+
const hasHtmlRegex = /<[a-z][a-z0-9]*[\s>]/i;
|
|
4
|
+
const paragraphizePlainText = () => {
|
|
5
|
+
return (html) => {
|
|
6
|
+
return hasHtmlRegex.test(html) ? html : autop(html);
|
|
7
|
+
};
|
|
8
|
+
};
|
|
9
|
+
//#endregion
|
|
10
|
+
export { paragraphizePlainText };
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
//#region src/transforms/string/stripEmptyTags.ts
|
|
2
|
+
const emptyTagRegex = /<([a-z][a-z0-9]*)(\s[^>]*)?>(\s*)<\/\1>/gi;
|
|
3
|
+
const preserveWhenEmpty = new Set([
|
|
4
|
+
"iframe",
|
|
5
|
+
"video",
|
|
6
|
+
"audio",
|
|
7
|
+
"img",
|
|
8
|
+
"source"
|
|
9
|
+
]);
|
|
10
|
+
const stripEmptyTags = () => {
|
|
11
|
+
return (html) => {
|
|
12
|
+
let previous = "";
|
|
13
|
+
let result = html;
|
|
14
|
+
while (result !== previous) {
|
|
15
|
+
previous = result;
|
|
16
|
+
result = result.replace(emptyTagRegex, (match, tagName, _attrs, content) => {
|
|
17
|
+
if (preserveWhenEmpty.has(tagName.toLowerCase())) return match;
|
|
18
|
+
return content.length > 0 ? " " : "";
|
|
19
|
+
});
|
|
20
|
+
}
|
|
21
|
+
return result;
|
|
22
|
+
};
|
|
23
|
+
};
|
|
24
|
+
//#endregion
|
|
25
|
+
export { stripEmptyTags };
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
//#region src/transforms/string/stripOrphanedClosingTags.ts
|
|
2
|
+
const orphanTags = new Set([
|
|
3
|
+
"p",
|
|
4
|
+
"h1",
|
|
5
|
+
"h2",
|
|
6
|
+
"h3",
|
|
7
|
+
"h4",
|
|
8
|
+
"h5",
|
|
9
|
+
"h6"
|
|
10
|
+
]);
|
|
11
|
+
const orphanTagRegex = /<(\/?([a-z][a-z0-9]*))(\s[^>]*)?\/?>/gi;
|
|
12
|
+
const stripOrphanedClosingTags = () => {
|
|
13
|
+
return (html) => {
|
|
14
|
+
const counts = {};
|
|
15
|
+
return html.replace(orphanTagRegex, (match, _full, tagName) => {
|
|
16
|
+
const name = tagName.toLowerCase();
|
|
17
|
+
if (!orphanTags.has(name)) return match;
|
|
18
|
+
if (match[1] === "/") {
|
|
19
|
+
const count = counts[name] ?? 0;
|
|
20
|
+
if (count <= 0) return "";
|
|
21
|
+
counts[name] = count - 1;
|
|
22
|
+
} else counts[name] = (counts[name] ?? 0) + 1;
|
|
23
|
+
return match;
|
|
24
|
+
});
|
|
25
|
+
};
|
|
26
|
+
};
|
|
27
|
+
//#endregion
|
|
28
|
+
export { stripOrphanedClosingTags };
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
import { unwrapOuterTag } from "../../common.js";
|
|
2
|
+
//#region src/transforms/string/unwrapWrappers.ts
|
|
3
|
+
const wrapperRegex = /^<(div|article|section|main|header|footer)(\s[^>]*)?>[\s\n]*([\s\S]*)[\s\n]*<\/\1>$/i;
|
|
4
|
+
const unwrapWrappers = () => {
|
|
5
|
+
return (html) => {
|
|
6
|
+
return unwrapOuterTag(html, wrapperRegex);
|
|
7
|
+
};
|
|
8
|
+
};
|
|
9
|
+
//#endregion
|
|
10
|
+
export { unwrapWrappers };
|
package/dist/types.d.ts
ADDED
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
import { DiscoverResolveUrlFn } from "feedscout";
|
|
2
|
+
|
|
3
|
+
//#region src/types.d.ts
|
|
4
|
+
type Enclosure = {
|
|
5
|
+
url: string;
|
|
6
|
+
type?: string;
|
|
7
|
+
medium?: string;
|
|
8
|
+
};
|
|
9
|
+
type ResolveUrlFn = DiscoverResolveUrlFn;
|
|
10
|
+
type EmbedResolverResult = {
|
|
11
|
+
provider: string;
|
|
12
|
+
src: string;
|
|
13
|
+
url?: string;
|
|
14
|
+
thumbnail?: string;
|
|
15
|
+
type?: 'video' | 'audio' | 'iframe';
|
|
16
|
+
width?: number;
|
|
17
|
+
height?: number;
|
|
18
|
+
author?: string;
|
|
19
|
+
text?: string;
|
|
20
|
+
};
|
|
21
|
+
type EmbedResolver = {
|
|
22
|
+
selector: string;
|
|
23
|
+
extract: (element: Element) => EmbedResolverResult | undefined;
|
|
24
|
+
};
|
|
25
|
+
type UrlUnwrapper = (url: URL) => string | undefined;
|
|
26
|
+
type TransformContext = {
|
|
27
|
+
baseUrl?: string;
|
|
28
|
+
enclosures?: Array<Enclosure>;
|
|
29
|
+
embedResolvers: Array<EmbedResolver>;
|
|
30
|
+
lazySrcAttributes: Array<string>;
|
|
31
|
+
trackingHosts: Array<string>;
|
|
32
|
+
trackingPathSegments: Array<string>;
|
|
33
|
+
urlUnwrappers: Array<UrlUnwrapper>;
|
|
34
|
+
resolveUrlFn: ResolveUrlFn;
|
|
35
|
+
};
|
|
36
|
+
type DomTransform = (context: TransformContext) => (document: Document) => void;
|
|
37
|
+
type StringTransform = (context: TransformContext) => (html: string) => string;
|
|
38
|
+
type TransformContentOptions = {
|
|
39
|
+
baseUrl?: string;
|
|
40
|
+
enclosures?: Array<Enclosure>;
|
|
41
|
+
embedResolvers?: Array<EmbedResolver>;
|
|
42
|
+
lazySrcAttributes?: Array<string>;
|
|
43
|
+
trackingHosts?: Array<string>;
|
|
44
|
+
trackingPathSegments?: Array<string>;
|
|
45
|
+
urlUnwrappers?: Array<UrlUnwrapper>;
|
|
46
|
+
resolveUrlFn?: ResolveUrlFn;
|
|
47
|
+
stringTransforms?: Array<StringTransform>;
|
|
48
|
+
domTransforms?: Array<DomTransform>;
|
|
49
|
+
finalStringTransforms?: Array<StringTransform>;
|
|
50
|
+
};
|
|
51
|
+
//#endregion
|
|
52
|
+
export { DomTransform, EmbedResolver, EmbedResolverResult, Enclosure, ResolveUrlFn, StringTransform, TransformContentOptions, TransformContext, UrlUnwrapper };
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
import { createParamExtractor } from "../utils.js";
|
|
2
|
+
//#region src/unwraps/aceml.ts
|
|
3
|
+
const baseExtractor = createParamExtractor({
|
|
4
|
+
hosts: /\.acemln[a-d]\.com$/,
|
|
5
|
+
path: "/Prod/link-tracker",
|
|
6
|
+
params: ["redirectUrl"]
|
|
7
|
+
});
|
|
8
|
+
const unwrapAceml = (url) => {
|
|
9
|
+
const raw = baseExtractor(url);
|
|
10
|
+
if (!raw) return;
|
|
11
|
+
try {
|
|
12
|
+
const decoded = Buffer.from(raw, "base64").toString("utf-8");
|
|
13
|
+
if (decoded.startsWith("http://") || decoded.startsWith("https://")) return decoded;
|
|
14
|
+
} catch {}
|
|
15
|
+
};
|
|
16
|
+
//#endregion
|
|
17
|
+
export { unwrapAceml };
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
//#region src/unwraps/adjust.ts
|
|
2
|
+
const unwrapAdjust = (url) => {
|
|
3
|
+
if (url.hostname !== "app.adjust.com") return;
|
|
4
|
+
const target = url.searchParams.get("redirect");
|
|
5
|
+
if (target?.startsWith("https://") !== true && target?.startsWith("http://") !== true) return;
|
|
6
|
+
return target;
|
|
7
|
+
};
|
|
8
|
+
//#endregion
|
|
9
|
+
export { unwrapAdjust };
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
//#region src/unwraps/amazonAffiliate.ts
|
|
2
|
+
const amazonHostRegex = /\.amazon-adsystem\.com$/;
|
|
3
|
+
const amazonPathRegex = /^\/x\/c\/[^/]+\/(https?:\/\/.+)$/;
|
|
4
|
+
const unwrapAmazonAffiliate = (url) => {
|
|
5
|
+
if (!amazonHostRegex.test(url.hostname)) return;
|
|
6
|
+
return url.pathname.match(amazonPathRegex)?.[1];
|
|
7
|
+
};
|
|
8
|
+
//#endregion
|
|
9
|
+
export { unwrapAmazonAffiliate };
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
import { isHostOf, isSubdomainOf } from "feedscout/utils";
|
|
2
|
+
//#region src/unwraps/ampCache.ts
|
|
3
|
+
const httpsPathRegex = /^\/c\/s\/(.+)$/;
|
|
4
|
+
const httpPathRegex = /^\/c\/(?!s\/)(.+)$/;
|
|
5
|
+
const unwrapAmpCache = (url) => {
|
|
6
|
+
if (!isHostOf(url.href, "cdn.ampproject.org") && !isSubdomainOf(url.href, "cdn.ampproject.org")) return;
|
|
7
|
+
const httpsMatch = url.pathname.match(httpsPathRegex);
|
|
8
|
+
if (httpsMatch) return `https://${httpsMatch[1]}`;
|
|
9
|
+
const httpMatch = url.pathname.match(httpPathRegex);
|
|
10
|
+
if (httpMatch) return `http://${httpMatch[1]}`;
|
|
11
|
+
};
|
|
12
|
+
//#endregion
|
|
13
|
+
export { unwrapAmpCache };
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
//#region src/unwraps/bing.ts
|
|
2
|
+
const bingHostRegex = /^(?:www\.|cn\.)?bing\.com$/;
|
|
3
|
+
const bingPrefixRegex = /^a\d/;
|
|
4
|
+
const unwrapBing = (url) => {
|
|
5
|
+
if (!bingHostRegex.test(url.hostname) || url.pathname !== "/ck/a") return;
|
|
6
|
+
const value = url.searchParams.get("u");
|
|
7
|
+
if (!value || !bingPrefixRegex.test(value)) return;
|
|
8
|
+
try {
|
|
9
|
+
const decoded = Buffer.from(value.slice(2), "base64url").toString("utf-8");
|
|
10
|
+
if (!decoded.startsWith("https://") && !decoded.startsWith("http://")) return;
|
|
11
|
+
return decoded;
|
|
12
|
+
} catch {}
|
|
13
|
+
};
|
|
14
|
+
//#endregion
|
|
15
|
+
export { unwrapBing };
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
import { createParamExtractor } from "../utils.js";
|
|
2
|
+
//#region src/unwraps/cjNetwork.ts
|
|
3
|
+
const unwrapCjNetwork = createParamExtractor({
|
|
4
|
+
hosts: [
|
|
5
|
+
"www.dpbolvw.net",
|
|
6
|
+
"www.tkqlhce.com",
|
|
7
|
+
"www.anrdoezrs.net",
|
|
8
|
+
"www.jdoqocy.com",
|
|
9
|
+
"www.kqzyfj.com",
|
|
10
|
+
"www.pntrac.com",
|
|
11
|
+
"www.pjtra.com",
|
|
12
|
+
"www.pntrs.com"
|
|
13
|
+
],
|
|
14
|
+
params: ["url"]
|
|
15
|
+
});
|
|
16
|
+
//#endregion
|
|
17
|
+
export { unwrapCjNetwork };
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
import { createParamExtractor } from "../utils.js";
|
|
2
|
+
//#region src/unwraps/facebook.ts
|
|
3
|
+
const unwrapFacebookShim = createParamExtractor({
|
|
4
|
+
hosts: ["l.facebook.com", "lm.facebook.com"],
|
|
5
|
+
path: "/l.php",
|
|
6
|
+
params: ["u"]
|
|
7
|
+
});
|
|
8
|
+
//#endregion
|
|
9
|
+
export { unwrapFacebookShim };
|