feedcanon 1.4.2 → 1.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.cjs +1 -1
- package/dist/index.js +1 -1
- package/dist/probes/wordpress.cjs +8 -4
- package/dist/probes/wordpress.js +8 -4
- package/dist/rewrites/blogger.cjs +6 -6
- package/dist/rewrites/blogger.js +6 -6
- package/dist/types.d.cts +6 -5
- package/dist/types.d.ts +6 -5
- package/dist/utils.cjs +22 -18
- package/dist/utils.js +22 -18
- package/package.json +7 -6
package/dist/index.cjs
CHANGED
|
@@ -91,7 +91,7 @@ async function findCanonical(inputUrl, options) {
|
|
|
91
91
|
}
|
|
92
92
|
}
|
|
93
93
|
}
|
|
94
|
-
if (probes?.length) candidateSourceUrl = await require_utils.applyProbes(candidateSourceUrl, probes, async (candidateUrl) => {
|
|
94
|
+
if (probes && probes?.length > 0) candidateSourceUrl = await require_utils.applyProbes(candidateSourceUrl, probes, async (candidateUrl) => {
|
|
95
95
|
const response = await fetchAndCompare(candidateUrl);
|
|
96
96
|
if (response) {
|
|
97
97
|
onMatch?.({
|
package/dist/index.js
CHANGED
|
@@ -91,7 +91,7 @@ async function findCanonical(inputUrl, options) {
|
|
|
91
91
|
}
|
|
92
92
|
}
|
|
93
93
|
}
|
|
94
|
-
if (probes?.length) candidateSourceUrl = await applyProbes(candidateSourceUrl, probes, async (candidateUrl) => {
|
|
94
|
+
if (probes && probes?.length > 0) candidateSourceUrl = await applyProbes(candidateSourceUrl, probes, async (candidateUrl) => {
|
|
95
95
|
const response = await fetchAndCompare(candidateUrl);
|
|
96
96
|
if (response) {
|
|
97
97
|
onMatch?.({
|
|
@@ -1,4 +1,8 @@
|
|
|
1
1
|
//#region src/probes/wordpress.ts
|
|
2
|
+
const commentsFeedPathRegex = /\/comments\/feed(\/|$)/;
|
|
3
|
+
const feedPathRegex = /\/feed(\/|$)/;
|
|
4
|
+
const trailingSlashRegex = /\/$/;
|
|
5
|
+
const optionalTrailingSlashRegex = /\/?$/;
|
|
2
6
|
const feedTypes = [
|
|
3
7
|
"atom",
|
|
4
8
|
"rss2",
|
|
@@ -18,18 +22,18 @@ const wordpressProbe = {
|
|
|
18
22
|
const candidates = [];
|
|
19
23
|
const isComment = feed.startsWith("comments-");
|
|
20
24
|
const type = isComment ? feed.slice(9) : feed;
|
|
21
|
-
if ((isComment ?
|
|
25
|
+
if ((isComment ? commentsFeedPathRegex : feedPathRegex).test(url.pathname)) {
|
|
22
26
|
const withoutSlash = new URL(url);
|
|
23
|
-
withoutSlash.pathname = url.pathname.replace(
|
|
27
|
+
withoutSlash.pathname = url.pathname.replace(trailingSlashRegex, "");
|
|
24
28
|
withoutSlash.searchParams.delete("feed");
|
|
25
29
|
candidates.push(withoutSlash.href);
|
|
26
30
|
const withSlash = new URL(url);
|
|
27
|
-
withSlash.pathname = url.pathname.replace(
|
|
31
|
+
withSlash.pathname = url.pathname.replace(optionalTrailingSlashRegex, "/");
|
|
28
32
|
withSlash.searchParams.delete("feed");
|
|
29
33
|
candidates.push(withSlash.href);
|
|
30
34
|
return candidates;
|
|
31
35
|
}
|
|
32
|
-
const basePath = url.pathname.replace(
|
|
36
|
+
const basePath = url.pathname.replace(trailingSlashRegex, "");
|
|
33
37
|
const feedSegment = type === "atom" ? "/feed/atom" : "/feed";
|
|
34
38
|
const feedPath = isComment ? `/comments${feedSegment}` : feedSegment;
|
|
35
39
|
const primary = new URL(url);
|
package/dist/probes/wordpress.js
CHANGED
|
@@ -1,4 +1,8 @@
|
|
|
1
1
|
//#region src/probes/wordpress.ts
|
|
2
|
+
const commentsFeedPathRegex = /\/comments\/feed(\/|$)/;
|
|
3
|
+
const feedPathRegex = /\/feed(\/|$)/;
|
|
4
|
+
const trailingSlashRegex = /\/$/;
|
|
5
|
+
const optionalTrailingSlashRegex = /\/?$/;
|
|
2
6
|
const feedTypes = [
|
|
3
7
|
"atom",
|
|
4
8
|
"rss2",
|
|
@@ -18,18 +22,18 @@ const wordpressProbe = {
|
|
|
18
22
|
const candidates = [];
|
|
19
23
|
const isComment = feed.startsWith("comments-");
|
|
20
24
|
const type = isComment ? feed.slice(9) : feed;
|
|
21
|
-
if ((isComment ?
|
|
25
|
+
if ((isComment ? commentsFeedPathRegex : feedPathRegex).test(url.pathname)) {
|
|
22
26
|
const withoutSlash = new URL(url);
|
|
23
|
-
withoutSlash.pathname = url.pathname.replace(
|
|
27
|
+
withoutSlash.pathname = url.pathname.replace(trailingSlashRegex, "");
|
|
24
28
|
withoutSlash.searchParams.delete("feed");
|
|
25
29
|
candidates.push(withoutSlash.href);
|
|
26
30
|
const withSlash = new URL(url);
|
|
27
|
-
withSlash.pathname = url.pathname.replace(
|
|
31
|
+
withSlash.pathname = url.pathname.replace(optionalTrailingSlashRegex, "/");
|
|
28
32
|
withSlash.searchParams.delete("feed");
|
|
29
33
|
candidates.push(withSlash.href);
|
|
30
34
|
return candidates;
|
|
31
35
|
}
|
|
32
|
-
const basePath = url.pathname.replace(
|
|
36
|
+
const basePath = url.pathname.replace(trailingSlashRegex, "");
|
|
33
37
|
const feedSegment = type === "atom" ? "/feed/atom" : "/feed";
|
|
34
38
|
const feedPath = isComment ? `/comments${feedSegment}` : feedSegment;
|
|
35
39
|
const primary = new URL(url);
|
|
@@ -1,19 +1,19 @@
|
|
|
1
1
|
const require_utils = require("../utils.cjs");
|
|
2
2
|
//#region src/rewrites/blogger.ts
|
|
3
|
-
const
|
|
4
|
-
const
|
|
3
|
+
const bloggerRegex = /^(www\.|beta\.)?blogger\.com$/;
|
|
4
|
+
const blogspotRegex = /\.blogspot\.[a-z]{2,3}(\.[a-z]{2})?$/i;
|
|
5
5
|
const bloggerRewrite = {
|
|
6
6
|
match: (url) => {
|
|
7
|
-
return
|
|
7
|
+
return bloggerRegex.test(url.hostname) || blogspotRegex.test(url.hostname);
|
|
8
8
|
},
|
|
9
9
|
rewrite: (url) => {
|
|
10
10
|
const rewritten = new URL(url);
|
|
11
|
-
const isBlogger =
|
|
12
|
-
const isBlogspot =
|
|
11
|
+
const isBlogger = bloggerRegex.test(rewritten.hostname);
|
|
12
|
+
const isBlogspot = blogspotRegex.test(rewritten.hostname);
|
|
13
13
|
rewritten.protocol = "https:";
|
|
14
14
|
if (isBlogger) rewritten.hostname = "www.blogger.com";
|
|
15
15
|
if (isBlogspot) {
|
|
16
|
-
rewritten.hostname = rewritten.hostname.replace(
|
|
16
|
+
rewritten.hostname = rewritten.hostname.replace(blogspotRegex, ".blogspot.com");
|
|
17
17
|
if (rewritten.pathname === "/atom.xml") rewritten.pathname = "/feeds/posts/default";
|
|
18
18
|
else if (rewritten.pathname === "/rss.xml") {
|
|
19
19
|
rewritten.pathname = "/feeds/posts/default";
|
package/dist/rewrites/blogger.js
CHANGED
|
@@ -1,19 +1,19 @@
|
|
|
1
1
|
import { normalizeUrl } from "../utils.js";
|
|
2
2
|
//#region src/rewrites/blogger.ts
|
|
3
|
-
const
|
|
4
|
-
const
|
|
3
|
+
const bloggerRegex = /^(www\.|beta\.)?blogger\.com$/;
|
|
4
|
+
const blogspotRegex = /\.blogspot\.[a-z]{2,3}(\.[a-z]{2})?$/i;
|
|
5
5
|
const bloggerRewrite = {
|
|
6
6
|
match: (url) => {
|
|
7
|
-
return
|
|
7
|
+
return bloggerRegex.test(url.hostname) || blogspotRegex.test(url.hostname);
|
|
8
8
|
},
|
|
9
9
|
rewrite: (url) => {
|
|
10
10
|
const rewritten = new URL(url);
|
|
11
|
-
const isBlogger =
|
|
12
|
-
const isBlogspot =
|
|
11
|
+
const isBlogger = bloggerRegex.test(rewritten.hostname);
|
|
12
|
+
const isBlogspot = blogspotRegex.test(rewritten.hostname);
|
|
13
13
|
rewritten.protocol = "https:";
|
|
14
14
|
if (isBlogger) rewritten.hostname = "www.blogger.com";
|
|
15
15
|
if (isBlogspot) {
|
|
16
|
-
rewritten.hostname = rewritten.hostname.replace(
|
|
16
|
+
rewritten.hostname = rewritten.hostname.replace(blogspotRegex, ".blogspot.com");
|
|
17
17
|
if (rewritten.pathname === "/atom.xml") rewritten.pathname = "/feeds/posts/default";
|
|
18
18
|
else if (rewritten.pathname === "/rss.xml") {
|
|
19
19
|
rewritten.pathname = "/feeds/posts/default";
|
package/dist/types.d.cts
CHANGED
|
@@ -1,9 +1,10 @@
|
|
|
1
|
-
import * as feedsmith from "feedsmith";
|
|
1
|
+
import * as _$feedsmith from "feedsmith";
|
|
2
2
|
|
|
3
3
|
//#region src/types.d.ts
|
|
4
|
-
type
|
|
4
|
+
type MaybePromise<T> = T | Promise<T>;
|
|
5
|
+
type DefaultParserResult = ReturnType<typeof _$feedsmith.parseFeed>;
|
|
5
6
|
type ParserAdapter<T> = {
|
|
6
|
-
parse: (body: string) =>
|
|
7
|
+
parse: (body: string) => MaybePromise<T | undefined>;
|
|
7
8
|
getSelfUrl: (parsed: T) => string | undefined;
|
|
8
9
|
getSignature: (parsed: T, url: string) => string;
|
|
9
10
|
};
|
|
@@ -62,13 +63,13 @@ type FetchFnOptions = {
|
|
|
62
63
|
method?: 'GET' | 'HEAD';
|
|
63
64
|
headers?: Record<string, string>;
|
|
64
65
|
};
|
|
65
|
-
type ExistsFn<T = unknown> = (url: string) =>
|
|
66
|
+
type ExistsFn<T = unknown> = (url: string) => MaybePromise<T | undefined>;
|
|
66
67
|
type FetchFnResponse = {
|
|
67
68
|
headers: Headers;
|
|
68
69
|
body: string;
|
|
69
70
|
url: string;
|
|
70
71
|
status: number;
|
|
71
72
|
};
|
|
72
|
-
type FetchFn<TResponse extends FetchFnResponse = FetchFnResponse> = (url: string, options?: FetchFnOptions) =>
|
|
73
|
+
type FetchFn<TResponse extends FetchFnResponse = FetchFnResponse> = (url: string, options?: FetchFnOptions) => MaybePromise<TResponse>;
|
|
73
74
|
//#endregion
|
|
74
75
|
export { DefaultParserResult, ExistsFn, FetchFn, FetchFnOptions, FetchFnResponse, FindCanonicalOptions, NormalizeOptions, OnExistsFn, OnFetchFn, OnMatchFn, ParserAdapter, Probe, Rewrite, Tier };
|
package/dist/types.d.ts
CHANGED
|
@@ -1,9 +1,10 @@
|
|
|
1
|
-
import * as feedsmith from "feedsmith";
|
|
1
|
+
import * as _$feedsmith from "feedsmith";
|
|
2
2
|
|
|
3
3
|
//#region src/types.d.ts
|
|
4
|
-
type
|
|
4
|
+
type MaybePromise<T> = T | Promise<T>;
|
|
5
|
+
type DefaultParserResult = ReturnType<typeof _$feedsmith.parseFeed>;
|
|
5
6
|
type ParserAdapter<T> = {
|
|
6
|
-
parse: (body: string) =>
|
|
7
|
+
parse: (body: string) => MaybePromise<T | undefined>;
|
|
7
8
|
getSelfUrl: (parsed: T) => string | undefined;
|
|
8
9
|
getSignature: (parsed: T, url: string) => string;
|
|
9
10
|
};
|
|
@@ -62,13 +63,13 @@ type FetchFnOptions = {
|
|
|
62
63
|
method?: 'GET' | 'HEAD';
|
|
63
64
|
headers?: Record<string, string>;
|
|
64
65
|
};
|
|
65
|
-
type ExistsFn<T = unknown> = (url: string) =>
|
|
66
|
+
type ExistsFn<T = unknown> = (url: string) => MaybePromise<T | undefined>;
|
|
66
67
|
type FetchFnResponse = {
|
|
67
68
|
headers: Headers;
|
|
68
69
|
body: string;
|
|
69
70
|
url: string;
|
|
70
71
|
status: number;
|
|
71
72
|
};
|
|
72
|
-
type FetchFn<TResponse extends FetchFnResponse = FetchFnResponse> = (url: string, options?: FetchFnOptions) =>
|
|
73
|
+
type FetchFn<TResponse extends FetchFnResponse = FetchFnResponse> = (url: string, options?: FetchFnOptions) => MaybePromise<TResponse>;
|
|
73
74
|
//#endregion
|
|
74
75
|
export { DefaultParserResult, ExistsFn, FetchFn, FetchFnOptions, FetchFnResponse, FindCanonicalOptions, NormalizeOptions, OnExistsFn, OnFetchFn, OnMatchFn, ParserAdapter, Probe, Rewrite, Tier };
|
package/dist/utils.cjs
CHANGED
|
@@ -11,27 +11,31 @@ const getStrippedParamsSet = (params) => {
|
|
|
11
11
|
}
|
|
12
12
|
return cached;
|
|
13
13
|
};
|
|
14
|
-
const
|
|
15
|
-
const
|
|
16
|
-
const
|
|
17
|
-
const
|
|
18
|
-
const
|
|
19
|
-
const
|
|
14
|
+
const ipv4Regex = /^\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}$/;
|
|
15
|
+
const ipv6Regex = /^([0-9a-f]{0,4}:){2,7}[0-9a-f]{0,4}$/i;
|
|
16
|
+
const safePathCharsRegex = /[a-zA-Z0-9._~!$&'()*+,;=:@-]/;
|
|
17
|
+
const httpsLetterRegex = /s/i;
|
|
18
|
+
const nonAsciiHostnameRegex = /[^a-z0-9.:-]/;
|
|
19
|
+
const protocolPrefixRegex = /^https?:\/\//;
|
|
20
|
+
const wwwPrefixRegex = /^www\./;
|
|
21
|
+
const validUrlRegex = /^https?:\/\/(?:www\.|[a-vx-z0-9])/i;
|
|
22
|
+
const doubledProtocolRegex = /^\/?[htps]{2,7}[:\s=.\\/]+([htps]{2,7})[:\s=.\\/]+[.,:/]*(www[./]+)?/i;
|
|
23
|
+
const singleMalformedRegex = /^\/?(?:h[htps():]{1,10}|t{1,2}ps?)[:\s=.\\/]+[.,:/]*(www[./]+)?/i;
|
|
20
24
|
const fixMalformedProtocol = (url) => {
|
|
21
|
-
if (
|
|
22
|
-
const doubledMatch =
|
|
25
|
+
if (validUrlRegex.test(url) && !doubledProtocolRegex.test(url)) return url;
|
|
26
|
+
const doubledMatch = doubledProtocolRegex.exec(url);
|
|
23
27
|
if (doubledMatch) {
|
|
24
28
|
const inner = doubledMatch[1];
|
|
25
29
|
const www = doubledMatch[2];
|
|
26
30
|
const rest = url.slice(doubledMatch[0].length);
|
|
27
|
-
return (
|
|
31
|
+
return (httpsLetterRegex.test(inner) ? "https://" : "http://") + (www ? "www." : "") + rest;
|
|
28
32
|
}
|
|
29
|
-
const singleMatch =
|
|
33
|
+
const singleMatch = singleMalformedRegex.exec(url);
|
|
30
34
|
if (singleMatch) {
|
|
31
35
|
const fullMatch = singleMatch[0];
|
|
32
36
|
const www = singleMatch[1];
|
|
33
37
|
const rest = url.slice(fullMatch.length);
|
|
34
|
-
return (
|
|
38
|
+
return (httpsLetterRegex.test(fullMatch) ? "https://" : "http://") + (www ? "www." : "") + rest;
|
|
35
39
|
}
|
|
36
40
|
return url;
|
|
37
41
|
};
|
|
@@ -60,7 +64,7 @@ const addMissingProtocol = (url, protocol = "https") => {
|
|
|
60
64
|
if (url.startsWith("//") && !url.startsWith("///")) try {
|
|
61
65
|
const parsed = new URL(`${protocol}:${url}`);
|
|
62
66
|
const hostname = parsed.hostname;
|
|
63
|
-
if (hostname.includes(".") || hostname === "localhost" ||
|
|
67
|
+
if (hostname.includes(".") || hostname === "localhost" || ipv4Regex.test(hostname) || ipv6Regex.test(hostname.replace(/^\[|\]$/g, ""))) return parsed.href;
|
|
64
68
|
return url;
|
|
65
69
|
} catch {
|
|
66
70
|
return url;
|
|
@@ -100,7 +104,7 @@ const decodeAndNormalizeEncoding = (value) => {
|
|
|
100
104
|
return value.replace(/%([0-9A-Fa-f]{2})/g, (_match, hex) => {
|
|
101
105
|
const charCode = Number.parseInt(hex, 16);
|
|
102
106
|
const char = String.fromCharCode(charCode);
|
|
103
|
-
if (
|
|
107
|
+
if (safePathCharsRegex.test(char)) return char;
|
|
104
108
|
return `%${hex.toUpperCase()}`;
|
|
105
109
|
});
|
|
106
110
|
};
|
|
@@ -111,7 +115,7 @@ const normalizeUrl = (url, options = require_defaults.defaultNormalizeOptions) =
|
|
|
111
115
|
parsed.hostname = parsed.hostname.normalize("NFC");
|
|
112
116
|
parsed.pathname = parsed.pathname.normalize("NFC");
|
|
113
117
|
}
|
|
114
|
-
if (options.convertToPunycode &&
|
|
118
|
+
if (options.convertToPunycode && nonAsciiHostnameRegex.test(parsed.hostname)) {
|
|
115
119
|
const ascii = (0, node_url.domainToASCII)(parsed.hostname);
|
|
116
120
|
if (ascii) parsed.hostname = ascii;
|
|
117
121
|
}
|
|
@@ -143,7 +147,7 @@ const normalizeUrl = (url, options = require_defaults.defaultNormalizeOptions) =
|
|
|
143
147
|
if (options.stripEmptyQuery && parsed.href.endsWith("?")) parsed.search = "";
|
|
144
148
|
let result = parsed.href;
|
|
145
149
|
if (options.stripRootSlash && result === `${parsed.origin}/`) result = parsed.origin;
|
|
146
|
-
if (options.stripProtocol) result = result.replace(
|
|
150
|
+
if (options.stripProtocol) result = result.replace(protocolPrefixRegex, "");
|
|
147
151
|
return result;
|
|
148
152
|
} catch {
|
|
149
153
|
return url;
|
|
@@ -184,11 +188,11 @@ const createSignature = (object, fields) => {
|
|
|
184
188
|
for (const [key, val] of saved) object[key] = val;
|
|
185
189
|
return signature;
|
|
186
190
|
};
|
|
187
|
-
const
|
|
191
|
+
const trailingSlashRegex = /("(?:https?:\/\/|\/)[^"]+)\/([?"])/g;
|
|
188
192
|
const neutralizeUrls = (text, urls) => {
|
|
189
193
|
const escapeHost = (url) => {
|
|
190
194
|
try {
|
|
191
|
-
return new URL("/", url).host.replace(
|
|
195
|
+
return new URL("/", url).host.replace(wwwPrefixRegex, "").replaceAll(".", "\\.");
|
|
192
196
|
} catch {
|
|
193
197
|
return;
|
|
194
198
|
}
|
|
@@ -196,7 +200,7 @@ const neutralizeUrls = (text, urls) => {
|
|
|
196
200
|
const hosts = urls.map(escapeHost).filter(Boolean);
|
|
197
201
|
if (hosts.length === 0) return text;
|
|
198
202
|
const hostPattern = hosts.length === 1 ? hosts[0] : `(?:${hosts.join("|")})`;
|
|
199
|
-
return text.replace(new RegExp(`https?://(?:www\\.)?${hostPattern}(?=[/"]|\\\\")(/)?`, "g"), "/").replace(
|
|
203
|
+
return text.replace(new RegExp(`https?://(?:www\\.)?${hostPattern}(?=[/"]|\\\\")(/)?`, "g"), "/").replace(trailingSlashRegex, "$1$2");
|
|
200
204
|
};
|
|
201
205
|
//#endregion
|
|
202
206
|
exports.addMissingProtocol = addMissingProtocol;
|
package/dist/utils.js
CHANGED
|
@@ -11,27 +11,31 @@ const getStrippedParamsSet = (params) => {
|
|
|
11
11
|
}
|
|
12
12
|
return cached;
|
|
13
13
|
};
|
|
14
|
-
const
|
|
15
|
-
const
|
|
16
|
-
const
|
|
17
|
-
const
|
|
18
|
-
const
|
|
19
|
-
const
|
|
14
|
+
const ipv4Regex = /^\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}$/;
|
|
15
|
+
const ipv6Regex = /^([0-9a-f]{0,4}:){2,7}[0-9a-f]{0,4}$/i;
|
|
16
|
+
const safePathCharsRegex = /[a-zA-Z0-9._~!$&'()*+,;=:@-]/;
|
|
17
|
+
const httpsLetterRegex = /s/i;
|
|
18
|
+
const nonAsciiHostnameRegex = /[^a-z0-9.:-]/;
|
|
19
|
+
const protocolPrefixRegex = /^https?:\/\//;
|
|
20
|
+
const wwwPrefixRegex = /^www\./;
|
|
21
|
+
const validUrlRegex = /^https?:\/\/(?:www\.|[a-vx-z0-9])/i;
|
|
22
|
+
const doubledProtocolRegex = /^\/?[htps]{2,7}[:\s=.\\/]+([htps]{2,7})[:\s=.\\/]+[.,:/]*(www[./]+)?/i;
|
|
23
|
+
const singleMalformedRegex = /^\/?(?:h[htps():]{1,10}|t{1,2}ps?)[:\s=.\\/]+[.,:/]*(www[./]+)?/i;
|
|
20
24
|
const fixMalformedProtocol = (url) => {
|
|
21
|
-
if (
|
|
22
|
-
const doubledMatch =
|
|
25
|
+
if (validUrlRegex.test(url) && !doubledProtocolRegex.test(url)) return url;
|
|
26
|
+
const doubledMatch = doubledProtocolRegex.exec(url);
|
|
23
27
|
if (doubledMatch) {
|
|
24
28
|
const inner = doubledMatch[1];
|
|
25
29
|
const www = doubledMatch[2];
|
|
26
30
|
const rest = url.slice(doubledMatch[0].length);
|
|
27
|
-
return (
|
|
31
|
+
return (httpsLetterRegex.test(inner) ? "https://" : "http://") + (www ? "www." : "") + rest;
|
|
28
32
|
}
|
|
29
|
-
const singleMatch =
|
|
33
|
+
const singleMatch = singleMalformedRegex.exec(url);
|
|
30
34
|
if (singleMatch) {
|
|
31
35
|
const fullMatch = singleMatch[0];
|
|
32
36
|
const www = singleMatch[1];
|
|
33
37
|
const rest = url.slice(fullMatch.length);
|
|
34
|
-
return (
|
|
38
|
+
return (httpsLetterRegex.test(fullMatch) ? "https://" : "http://") + (www ? "www." : "") + rest;
|
|
35
39
|
}
|
|
36
40
|
return url;
|
|
37
41
|
};
|
|
@@ -60,7 +64,7 @@ const addMissingProtocol = (url, protocol = "https") => {
|
|
|
60
64
|
if (url.startsWith("//") && !url.startsWith("///")) try {
|
|
61
65
|
const parsed = new URL(`${protocol}:${url}`);
|
|
62
66
|
const hostname = parsed.hostname;
|
|
63
|
-
if (hostname.includes(".") || hostname === "localhost" ||
|
|
67
|
+
if (hostname.includes(".") || hostname === "localhost" || ipv4Regex.test(hostname) || ipv6Regex.test(hostname.replace(/^\[|\]$/g, ""))) return parsed.href;
|
|
64
68
|
return url;
|
|
65
69
|
} catch {
|
|
66
70
|
return url;
|
|
@@ -100,7 +104,7 @@ const decodeAndNormalizeEncoding = (value) => {
|
|
|
100
104
|
return value.replace(/%([0-9A-Fa-f]{2})/g, (_match, hex) => {
|
|
101
105
|
const charCode = Number.parseInt(hex, 16);
|
|
102
106
|
const char = String.fromCharCode(charCode);
|
|
103
|
-
if (
|
|
107
|
+
if (safePathCharsRegex.test(char)) return char;
|
|
104
108
|
return `%${hex.toUpperCase()}`;
|
|
105
109
|
});
|
|
106
110
|
};
|
|
@@ -111,7 +115,7 @@ const normalizeUrl = (url, options = defaultNormalizeOptions) => {
|
|
|
111
115
|
parsed.hostname = parsed.hostname.normalize("NFC");
|
|
112
116
|
parsed.pathname = parsed.pathname.normalize("NFC");
|
|
113
117
|
}
|
|
114
|
-
if (options.convertToPunycode &&
|
|
118
|
+
if (options.convertToPunycode && nonAsciiHostnameRegex.test(parsed.hostname)) {
|
|
115
119
|
const ascii = domainToASCII(parsed.hostname);
|
|
116
120
|
if (ascii) parsed.hostname = ascii;
|
|
117
121
|
}
|
|
@@ -143,7 +147,7 @@ const normalizeUrl = (url, options = defaultNormalizeOptions) => {
|
|
|
143
147
|
if (options.stripEmptyQuery && parsed.href.endsWith("?")) parsed.search = "";
|
|
144
148
|
let result = parsed.href;
|
|
145
149
|
if (options.stripRootSlash && result === `${parsed.origin}/`) result = parsed.origin;
|
|
146
|
-
if (options.stripProtocol) result = result.replace(
|
|
150
|
+
if (options.stripProtocol) result = result.replace(protocolPrefixRegex, "");
|
|
147
151
|
return result;
|
|
148
152
|
} catch {
|
|
149
153
|
return url;
|
|
@@ -184,11 +188,11 @@ const createSignature = (object, fields) => {
|
|
|
184
188
|
for (const [key, val] of saved) object[key] = val;
|
|
185
189
|
return signature;
|
|
186
190
|
};
|
|
187
|
-
const
|
|
191
|
+
const trailingSlashRegex = /("(?:https?:\/\/|\/)[^"]+)\/([?"])/g;
|
|
188
192
|
const neutralizeUrls = (text, urls) => {
|
|
189
193
|
const escapeHost = (url) => {
|
|
190
194
|
try {
|
|
191
|
-
return new URL("/", url).host.replace(
|
|
195
|
+
return new URL("/", url).host.replace(wwwPrefixRegex, "").replaceAll(".", "\\.");
|
|
192
196
|
} catch {
|
|
193
197
|
return;
|
|
194
198
|
}
|
|
@@ -196,7 +200,7 @@ const neutralizeUrls = (text, urls) => {
|
|
|
196
200
|
const hosts = urls.map(escapeHost).filter(Boolean);
|
|
197
201
|
if (hosts.length === 0) return text;
|
|
198
202
|
const hostPattern = hosts.length === 1 ? hosts[0] : `(?:${hosts.join("|")})`;
|
|
199
|
-
return text.replace(new RegExp(`https?://(?:www\\.)?${hostPattern}(?=[/"]|\\\\")(/)?`, "g"), "/").replace(
|
|
203
|
+
return text.replace(new RegExp(`https?://(?:www\\.)?${hostPattern}(?=[/"]|\\\\")(/)?`, "g"), "/").replace(trailingSlashRegex, "$1$2");
|
|
200
204
|
};
|
|
201
205
|
//#endregion
|
|
202
206
|
export { addMissingProtocol, applyProbes, applyRewrites, createSignature, fixMalformedProtocol, neutralizeUrls, normalizeUrl, resolveFeedProtocol, resolveUrl };
|
package/package.json
CHANGED
|
@@ -55,13 +55,14 @@
|
|
|
55
55
|
},
|
|
56
56
|
"dependencies": {
|
|
57
57
|
"entities": "^7.0.1",
|
|
58
|
-
"feedsmith": "^2.9.1"
|
|
58
|
+
"feedsmith": "^2.9.1",
|
|
59
|
+
"typescript": "^6.0.2"
|
|
59
60
|
},
|
|
60
61
|
"devDependencies": {
|
|
61
|
-
"@types/bun": "^1.3.
|
|
62
|
-
"kvalita": "1.
|
|
63
|
-
"tsdown": "^0.21.
|
|
64
|
-
"vitepress": "^
|
|
62
|
+
"@types/bun": "^1.3.11",
|
|
63
|
+
"kvalita": "1.12.4",
|
|
64
|
+
"tsdown": "^0.21.7",
|
|
65
|
+
"vitepress": "^2.0.0-alpha.17"
|
|
65
66
|
},
|
|
66
|
-
"version": "1.
|
|
67
|
+
"version": "1.5.0"
|
|
67
68
|
}
|