feedcanon 1.4.1 → 1.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.cjs +1 -1
- package/dist/index.js +1 -1
- package/dist/probes/wordpress.cjs +8 -4
- package/dist/probes/wordpress.js +8 -4
- package/dist/rewrites/blogger.cjs +6 -6
- package/dist/rewrites/blogger.js +6 -6
- package/dist/types.d.cts +6 -5
- package/dist/types.d.ts +6 -5
- package/dist/utils.cjs +28 -23
- package/dist/utils.js +28 -23
- package/package.json +7 -6
package/dist/index.cjs
CHANGED
|
@@ -91,7 +91,7 @@ async function findCanonical(inputUrl, options) {
|
|
|
91
91
|
}
|
|
92
92
|
}
|
|
93
93
|
}
|
|
94
|
-
if (probes?.length) candidateSourceUrl = await require_utils.applyProbes(candidateSourceUrl, probes, async (candidateUrl) => {
|
|
94
|
+
if (probes && probes?.length > 0) candidateSourceUrl = await require_utils.applyProbes(candidateSourceUrl, probes, async (candidateUrl) => {
|
|
95
95
|
const response = await fetchAndCompare(candidateUrl);
|
|
96
96
|
if (response) {
|
|
97
97
|
onMatch?.({
|
package/dist/index.js
CHANGED
|
@@ -91,7 +91,7 @@ async function findCanonical(inputUrl, options) {
|
|
|
91
91
|
}
|
|
92
92
|
}
|
|
93
93
|
}
|
|
94
|
-
if (probes?.length) candidateSourceUrl = await applyProbes(candidateSourceUrl, probes, async (candidateUrl) => {
|
|
94
|
+
if (probes && probes?.length > 0) candidateSourceUrl = await applyProbes(candidateSourceUrl, probes, async (candidateUrl) => {
|
|
95
95
|
const response = await fetchAndCompare(candidateUrl);
|
|
96
96
|
if (response) {
|
|
97
97
|
onMatch?.({
|
|
@@ -1,4 +1,8 @@
|
|
|
1
1
|
//#region src/probes/wordpress.ts
|
|
2
|
+
const commentsFeedPathRegex = /\/comments\/feed(\/|$)/;
|
|
3
|
+
const feedPathRegex = /\/feed(\/|$)/;
|
|
4
|
+
const trailingSlashRegex = /\/$/;
|
|
5
|
+
const optionalTrailingSlashRegex = /\/?$/;
|
|
2
6
|
const feedTypes = [
|
|
3
7
|
"atom",
|
|
4
8
|
"rss2",
|
|
@@ -18,18 +22,18 @@ const wordpressProbe = {
|
|
|
18
22
|
const candidates = [];
|
|
19
23
|
const isComment = feed.startsWith("comments-");
|
|
20
24
|
const type = isComment ? feed.slice(9) : feed;
|
|
21
|
-
if ((isComment ?
|
|
25
|
+
if ((isComment ? commentsFeedPathRegex : feedPathRegex).test(url.pathname)) {
|
|
22
26
|
const withoutSlash = new URL(url);
|
|
23
|
-
withoutSlash.pathname = url.pathname.replace(
|
|
27
|
+
withoutSlash.pathname = url.pathname.replace(trailingSlashRegex, "");
|
|
24
28
|
withoutSlash.searchParams.delete("feed");
|
|
25
29
|
candidates.push(withoutSlash.href);
|
|
26
30
|
const withSlash = new URL(url);
|
|
27
|
-
withSlash.pathname = url.pathname.replace(
|
|
31
|
+
withSlash.pathname = url.pathname.replace(optionalTrailingSlashRegex, "/");
|
|
28
32
|
withSlash.searchParams.delete("feed");
|
|
29
33
|
candidates.push(withSlash.href);
|
|
30
34
|
return candidates;
|
|
31
35
|
}
|
|
32
|
-
const basePath = url.pathname.replace(
|
|
36
|
+
const basePath = url.pathname.replace(trailingSlashRegex, "");
|
|
33
37
|
const feedSegment = type === "atom" ? "/feed/atom" : "/feed";
|
|
34
38
|
const feedPath = isComment ? `/comments${feedSegment}` : feedSegment;
|
|
35
39
|
const primary = new URL(url);
|
package/dist/probes/wordpress.js
CHANGED
|
@@ -1,4 +1,8 @@
|
|
|
1
1
|
//#region src/probes/wordpress.ts
|
|
2
|
+
const commentsFeedPathRegex = /\/comments\/feed(\/|$)/;
|
|
3
|
+
const feedPathRegex = /\/feed(\/|$)/;
|
|
4
|
+
const trailingSlashRegex = /\/$/;
|
|
5
|
+
const optionalTrailingSlashRegex = /\/?$/;
|
|
2
6
|
const feedTypes = [
|
|
3
7
|
"atom",
|
|
4
8
|
"rss2",
|
|
@@ -18,18 +22,18 @@ const wordpressProbe = {
|
|
|
18
22
|
const candidates = [];
|
|
19
23
|
const isComment = feed.startsWith("comments-");
|
|
20
24
|
const type = isComment ? feed.slice(9) : feed;
|
|
21
|
-
if ((isComment ?
|
|
25
|
+
if ((isComment ? commentsFeedPathRegex : feedPathRegex).test(url.pathname)) {
|
|
22
26
|
const withoutSlash = new URL(url);
|
|
23
|
-
withoutSlash.pathname = url.pathname.replace(
|
|
27
|
+
withoutSlash.pathname = url.pathname.replace(trailingSlashRegex, "");
|
|
24
28
|
withoutSlash.searchParams.delete("feed");
|
|
25
29
|
candidates.push(withoutSlash.href);
|
|
26
30
|
const withSlash = new URL(url);
|
|
27
|
-
withSlash.pathname = url.pathname.replace(
|
|
31
|
+
withSlash.pathname = url.pathname.replace(optionalTrailingSlashRegex, "/");
|
|
28
32
|
withSlash.searchParams.delete("feed");
|
|
29
33
|
candidates.push(withSlash.href);
|
|
30
34
|
return candidates;
|
|
31
35
|
}
|
|
32
|
-
const basePath = url.pathname.replace(
|
|
36
|
+
const basePath = url.pathname.replace(trailingSlashRegex, "");
|
|
33
37
|
const feedSegment = type === "atom" ? "/feed/atom" : "/feed";
|
|
34
38
|
const feedPath = isComment ? `/comments${feedSegment}` : feedSegment;
|
|
35
39
|
const primary = new URL(url);
|
|
@@ -1,19 +1,19 @@
|
|
|
1
1
|
const require_utils = require("../utils.cjs");
|
|
2
2
|
//#region src/rewrites/blogger.ts
|
|
3
|
-
const
|
|
4
|
-
const
|
|
3
|
+
const bloggerRegex = /^(www\.|beta\.)?blogger\.com$/;
|
|
4
|
+
const blogspotRegex = /\.blogspot\.[a-z]{2,3}(\.[a-z]{2})?$/i;
|
|
5
5
|
const bloggerRewrite = {
|
|
6
6
|
match: (url) => {
|
|
7
|
-
return
|
|
7
|
+
return bloggerRegex.test(url.hostname) || blogspotRegex.test(url.hostname);
|
|
8
8
|
},
|
|
9
9
|
rewrite: (url) => {
|
|
10
10
|
const rewritten = new URL(url);
|
|
11
|
-
const isBlogger =
|
|
12
|
-
const isBlogspot =
|
|
11
|
+
const isBlogger = bloggerRegex.test(rewritten.hostname);
|
|
12
|
+
const isBlogspot = blogspotRegex.test(rewritten.hostname);
|
|
13
13
|
rewritten.protocol = "https:";
|
|
14
14
|
if (isBlogger) rewritten.hostname = "www.blogger.com";
|
|
15
15
|
if (isBlogspot) {
|
|
16
|
-
rewritten.hostname = rewritten.hostname.replace(
|
|
16
|
+
rewritten.hostname = rewritten.hostname.replace(blogspotRegex, ".blogspot.com");
|
|
17
17
|
if (rewritten.pathname === "/atom.xml") rewritten.pathname = "/feeds/posts/default";
|
|
18
18
|
else if (rewritten.pathname === "/rss.xml") {
|
|
19
19
|
rewritten.pathname = "/feeds/posts/default";
|
package/dist/rewrites/blogger.js
CHANGED
|
@@ -1,19 +1,19 @@
|
|
|
1
1
|
import { normalizeUrl } from "../utils.js";
|
|
2
2
|
//#region src/rewrites/blogger.ts
|
|
3
|
-
const
|
|
4
|
-
const
|
|
3
|
+
const bloggerRegex = /^(www\.|beta\.)?blogger\.com$/;
|
|
4
|
+
const blogspotRegex = /\.blogspot\.[a-z]{2,3}(\.[a-z]{2})?$/i;
|
|
5
5
|
const bloggerRewrite = {
|
|
6
6
|
match: (url) => {
|
|
7
|
-
return
|
|
7
|
+
return bloggerRegex.test(url.hostname) || blogspotRegex.test(url.hostname);
|
|
8
8
|
},
|
|
9
9
|
rewrite: (url) => {
|
|
10
10
|
const rewritten = new URL(url);
|
|
11
|
-
const isBlogger =
|
|
12
|
-
const isBlogspot =
|
|
11
|
+
const isBlogger = bloggerRegex.test(rewritten.hostname);
|
|
12
|
+
const isBlogspot = blogspotRegex.test(rewritten.hostname);
|
|
13
13
|
rewritten.protocol = "https:";
|
|
14
14
|
if (isBlogger) rewritten.hostname = "www.blogger.com";
|
|
15
15
|
if (isBlogspot) {
|
|
16
|
-
rewritten.hostname = rewritten.hostname.replace(
|
|
16
|
+
rewritten.hostname = rewritten.hostname.replace(blogspotRegex, ".blogspot.com");
|
|
17
17
|
if (rewritten.pathname === "/atom.xml") rewritten.pathname = "/feeds/posts/default";
|
|
18
18
|
else if (rewritten.pathname === "/rss.xml") {
|
|
19
19
|
rewritten.pathname = "/feeds/posts/default";
|
package/dist/types.d.cts
CHANGED
|
@@ -1,9 +1,10 @@
|
|
|
1
|
-
import * as feedsmith from "feedsmith";
|
|
1
|
+
import * as _$feedsmith from "feedsmith";
|
|
2
2
|
|
|
3
3
|
//#region src/types.d.ts
|
|
4
|
-
type
|
|
4
|
+
type MaybePromise<T> = T | Promise<T>;
|
|
5
|
+
type DefaultParserResult = ReturnType<typeof _$feedsmith.parseFeed>;
|
|
5
6
|
type ParserAdapter<T> = {
|
|
6
|
-
parse: (body: string) =>
|
|
7
|
+
parse: (body: string) => MaybePromise<T | undefined>;
|
|
7
8
|
getSelfUrl: (parsed: T) => string | undefined;
|
|
8
9
|
getSignature: (parsed: T, url: string) => string;
|
|
9
10
|
};
|
|
@@ -62,13 +63,13 @@ type FetchFnOptions = {
|
|
|
62
63
|
method?: 'GET' | 'HEAD';
|
|
63
64
|
headers?: Record<string, string>;
|
|
64
65
|
};
|
|
65
|
-
type ExistsFn<T = unknown> = (url: string) =>
|
|
66
|
+
type ExistsFn<T = unknown> = (url: string) => MaybePromise<T | undefined>;
|
|
66
67
|
type FetchFnResponse = {
|
|
67
68
|
headers: Headers;
|
|
68
69
|
body: string;
|
|
69
70
|
url: string;
|
|
70
71
|
status: number;
|
|
71
72
|
};
|
|
72
|
-
type FetchFn<TResponse extends FetchFnResponse = FetchFnResponse> = (url: string, options?: FetchFnOptions) =>
|
|
73
|
+
type FetchFn<TResponse extends FetchFnResponse = FetchFnResponse> = (url: string, options?: FetchFnOptions) => MaybePromise<TResponse>;
|
|
73
74
|
//#endregion
|
|
74
75
|
export { DefaultParserResult, ExistsFn, FetchFn, FetchFnOptions, FetchFnResponse, FindCanonicalOptions, NormalizeOptions, OnExistsFn, OnFetchFn, OnMatchFn, ParserAdapter, Probe, Rewrite, Tier };
|
package/dist/types.d.ts
CHANGED
|
@@ -1,9 +1,10 @@
|
|
|
1
|
-
import * as feedsmith from "feedsmith";
|
|
1
|
+
import * as _$feedsmith from "feedsmith";
|
|
2
2
|
|
|
3
3
|
//#region src/types.d.ts
|
|
4
|
-
type
|
|
4
|
+
type MaybePromise<T> = T | Promise<T>;
|
|
5
|
+
type DefaultParserResult = ReturnType<typeof _$feedsmith.parseFeed>;
|
|
5
6
|
type ParserAdapter<T> = {
|
|
6
|
-
parse: (body: string) =>
|
|
7
|
+
parse: (body: string) => MaybePromise<T | undefined>;
|
|
7
8
|
getSelfUrl: (parsed: T) => string | undefined;
|
|
8
9
|
getSignature: (parsed: T, url: string) => string;
|
|
9
10
|
};
|
|
@@ -62,13 +63,13 @@ type FetchFnOptions = {
|
|
|
62
63
|
method?: 'GET' | 'HEAD';
|
|
63
64
|
headers?: Record<string, string>;
|
|
64
65
|
};
|
|
65
|
-
type ExistsFn<T = unknown> = (url: string) =>
|
|
66
|
+
type ExistsFn<T = unknown> = (url: string) => MaybePromise<T | undefined>;
|
|
66
67
|
type FetchFnResponse = {
|
|
67
68
|
headers: Headers;
|
|
68
69
|
body: string;
|
|
69
70
|
url: string;
|
|
70
71
|
status: number;
|
|
71
72
|
};
|
|
72
|
-
type FetchFn<TResponse extends FetchFnResponse = FetchFnResponse> = (url: string, options?: FetchFnOptions) =>
|
|
73
|
+
type FetchFn<TResponse extends FetchFnResponse = FetchFnResponse> = (url: string, options?: FetchFnOptions) => MaybePromise<TResponse>;
|
|
73
74
|
//#endregion
|
|
74
75
|
export { DefaultParserResult, ExistsFn, FetchFn, FetchFnOptions, FetchFnResponse, FindCanonicalOptions, NormalizeOptions, OnExistsFn, OnFetchFn, OnMatchFn, ParserAdapter, Probe, Rewrite, Tier };
|
package/dist/utils.cjs
CHANGED
|
@@ -11,27 +11,31 @@ const getStrippedParamsSet = (params) => {
|
|
|
11
11
|
}
|
|
12
12
|
return cached;
|
|
13
13
|
};
|
|
14
|
-
const
|
|
15
|
-
const
|
|
16
|
-
const
|
|
17
|
-
const
|
|
18
|
-
const
|
|
19
|
-
const
|
|
14
|
+
const ipv4Regex = /^\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}$/;
|
|
15
|
+
const ipv6Regex = /^([0-9a-f]{0,4}:){2,7}[0-9a-f]{0,4}$/i;
|
|
16
|
+
const safePathCharsRegex = /[a-zA-Z0-9._~!$&'()*+,;=:@-]/;
|
|
17
|
+
const httpsLetterRegex = /s/i;
|
|
18
|
+
const nonAsciiHostnameRegex = /[^a-z0-9.:-]/;
|
|
19
|
+
const protocolPrefixRegex = /^https?:\/\//;
|
|
20
|
+
const wwwPrefixRegex = /^www\./;
|
|
21
|
+
const validUrlRegex = /^https?:\/\/(?:www\.|[a-vx-z0-9])/i;
|
|
22
|
+
const doubledProtocolRegex = /^\/?[htps]{2,7}[:\s=.\\/]+([htps]{2,7})[:\s=.\\/]+[.,:/]*(www[./]+)?/i;
|
|
23
|
+
const singleMalformedRegex = /^\/?(?:h[htps():]{1,10}|t{1,2}ps?)[:\s=.\\/]+[.,:/]*(www[./]+)?/i;
|
|
20
24
|
const fixMalformedProtocol = (url) => {
|
|
21
|
-
if (
|
|
22
|
-
const doubledMatch =
|
|
25
|
+
if (validUrlRegex.test(url) && !doubledProtocolRegex.test(url)) return url;
|
|
26
|
+
const doubledMatch = doubledProtocolRegex.exec(url);
|
|
23
27
|
if (doubledMatch) {
|
|
24
28
|
const inner = doubledMatch[1];
|
|
25
29
|
const www = doubledMatch[2];
|
|
26
30
|
const rest = url.slice(doubledMatch[0].length);
|
|
27
|
-
return (
|
|
31
|
+
return (httpsLetterRegex.test(inner) ? "https://" : "http://") + (www ? "www." : "") + rest;
|
|
28
32
|
}
|
|
29
|
-
const singleMatch =
|
|
33
|
+
const singleMatch = singleMalformedRegex.exec(url);
|
|
30
34
|
if (singleMatch) {
|
|
31
35
|
const fullMatch = singleMatch[0];
|
|
32
36
|
const www = singleMatch[1];
|
|
33
37
|
const rest = url.slice(fullMatch.length);
|
|
34
|
-
return (
|
|
38
|
+
return (httpsLetterRegex.test(fullMatch) ? "https://" : "http://") + (www ? "www." : "") + rest;
|
|
35
39
|
}
|
|
36
40
|
return url;
|
|
37
41
|
};
|
|
@@ -52,14 +56,15 @@ const resolveFeedProtocol = (url, protocol = "https") => {
|
|
|
52
56
|
return url;
|
|
53
57
|
};
|
|
54
58
|
const addMissingProtocol = (url, protocol = "https") => {
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
+
const colonIndex = url.indexOf(":");
|
|
60
|
+
if (colonIndex > 0) {
|
|
61
|
+
const beforeColon = url.slice(0, colonIndex);
|
|
62
|
+
if (!beforeColon.includes(".") && !beforeColon.includes("/") && beforeColon !== "localhost") return url;
|
|
63
|
+
}
|
|
59
64
|
if (url.startsWith("//") && !url.startsWith("///")) try {
|
|
60
65
|
const parsed = new URL(`${protocol}:${url}`);
|
|
61
66
|
const hostname = parsed.hostname;
|
|
62
|
-
if (hostname.includes(".") || hostname === "localhost" ||
|
|
67
|
+
if (hostname.includes(".") || hostname === "localhost" || ipv4Regex.test(hostname) || ipv6Regex.test(hostname.replace(/^\[|\]$/g, ""))) return parsed.href;
|
|
63
68
|
return url;
|
|
64
69
|
} catch {
|
|
65
70
|
return url;
|
|
@@ -99,7 +104,7 @@ const decodeAndNormalizeEncoding = (value) => {
|
|
|
99
104
|
return value.replace(/%([0-9A-Fa-f]{2})/g, (_match, hex) => {
|
|
100
105
|
const charCode = Number.parseInt(hex, 16);
|
|
101
106
|
const char = String.fromCharCode(charCode);
|
|
102
|
-
if (
|
|
107
|
+
if (safePathCharsRegex.test(char)) return char;
|
|
103
108
|
return `%${hex.toUpperCase()}`;
|
|
104
109
|
});
|
|
105
110
|
};
|
|
@@ -110,7 +115,7 @@ const normalizeUrl = (url, options = require_defaults.defaultNormalizeOptions) =
|
|
|
110
115
|
parsed.hostname = parsed.hostname.normalize("NFC");
|
|
111
116
|
parsed.pathname = parsed.pathname.normalize("NFC");
|
|
112
117
|
}
|
|
113
|
-
if (options.convertToPunycode) {
|
|
118
|
+
if (options.convertToPunycode && nonAsciiHostnameRegex.test(parsed.hostname)) {
|
|
114
119
|
const ascii = (0, node_url.domainToASCII)(parsed.hostname);
|
|
115
120
|
if (ascii) parsed.hostname = ascii;
|
|
116
121
|
}
|
|
@@ -138,11 +143,11 @@ const normalizeUrl = (url, options = require_defaults.defaultNormalizeOptions) =
|
|
|
138
143
|
parsed.search = "";
|
|
139
144
|
for (const [key, value] of entries) parsed.searchParams.append(key.toLowerCase(), value.toLowerCase());
|
|
140
145
|
}
|
|
141
|
-
if (options.sortQueryParams) parsed.searchParams.sort();
|
|
146
|
+
if (options.sortQueryParams && parsed.search) parsed.searchParams.sort();
|
|
142
147
|
if (options.stripEmptyQuery && parsed.href.endsWith("?")) parsed.search = "";
|
|
143
148
|
let result = parsed.href;
|
|
144
149
|
if (options.stripRootSlash && result === `${parsed.origin}/`) result = parsed.origin;
|
|
145
|
-
if (options.stripProtocol) result = result.replace(
|
|
150
|
+
if (options.stripProtocol) result = result.replace(protocolPrefixRegex, "");
|
|
146
151
|
return result;
|
|
147
152
|
} catch {
|
|
148
153
|
return url;
|
|
@@ -183,11 +188,11 @@ const createSignature = (object, fields) => {
|
|
|
183
188
|
for (const [key, val] of saved) object[key] = val;
|
|
184
189
|
return signature;
|
|
185
190
|
};
|
|
186
|
-
const
|
|
191
|
+
const trailingSlashRegex = /("(?:https?:\/\/|\/)[^"]+)\/([?"])/g;
|
|
187
192
|
const neutralizeUrls = (text, urls) => {
|
|
188
193
|
const escapeHost = (url) => {
|
|
189
194
|
try {
|
|
190
|
-
return new URL("/", url).host.replace(
|
|
195
|
+
return new URL("/", url).host.replace(wwwPrefixRegex, "").replaceAll(".", "\\.");
|
|
191
196
|
} catch {
|
|
192
197
|
return;
|
|
193
198
|
}
|
|
@@ -195,7 +200,7 @@ const neutralizeUrls = (text, urls) => {
|
|
|
195
200
|
const hosts = urls.map(escapeHost).filter(Boolean);
|
|
196
201
|
if (hosts.length === 0) return text;
|
|
197
202
|
const hostPattern = hosts.length === 1 ? hosts[0] : `(?:${hosts.join("|")})`;
|
|
198
|
-
return text.replace(new RegExp(`https?://(?:www\\.)?${hostPattern}(?=[/"]|\\\\")(/)?`, "g"), "/").replace(
|
|
203
|
+
return text.replace(new RegExp(`https?://(?:www\\.)?${hostPattern}(?=[/"]|\\\\")(/)?`, "g"), "/").replace(trailingSlashRegex, "$1$2");
|
|
199
204
|
};
|
|
200
205
|
//#endregion
|
|
201
206
|
exports.addMissingProtocol = addMissingProtocol;
|
package/dist/utils.js
CHANGED
|
@@ -11,27 +11,31 @@ const getStrippedParamsSet = (params) => {
|
|
|
11
11
|
}
|
|
12
12
|
return cached;
|
|
13
13
|
};
|
|
14
|
-
const
|
|
15
|
-
const
|
|
16
|
-
const
|
|
17
|
-
const
|
|
18
|
-
const
|
|
19
|
-
const
|
|
14
|
+
const ipv4Regex = /^\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}$/;
|
|
15
|
+
const ipv6Regex = /^([0-9a-f]{0,4}:){2,7}[0-9a-f]{0,4}$/i;
|
|
16
|
+
const safePathCharsRegex = /[a-zA-Z0-9._~!$&'()*+,;=:@-]/;
|
|
17
|
+
const httpsLetterRegex = /s/i;
|
|
18
|
+
const nonAsciiHostnameRegex = /[^a-z0-9.:-]/;
|
|
19
|
+
const protocolPrefixRegex = /^https?:\/\//;
|
|
20
|
+
const wwwPrefixRegex = /^www\./;
|
|
21
|
+
const validUrlRegex = /^https?:\/\/(?:www\.|[a-vx-z0-9])/i;
|
|
22
|
+
const doubledProtocolRegex = /^\/?[htps]{2,7}[:\s=.\\/]+([htps]{2,7})[:\s=.\\/]+[.,:/]*(www[./]+)?/i;
|
|
23
|
+
const singleMalformedRegex = /^\/?(?:h[htps():]{1,10}|t{1,2}ps?)[:\s=.\\/]+[.,:/]*(www[./]+)?/i;
|
|
20
24
|
const fixMalformedProtocol = (url) => {
|
|
21
|
-
if (
|
|
22
|
-
const doubledMatch =
|
|
25
|
+
if (validUrlRegex.test(url) && !doubledProtocolRegex.test(url)) return url;
|
|
26
|
+
const doubledMatch = doubledProtocolRegex.exec(url);
|
|
23
27
|
if (doubledMatch) {
|
|
24
28
|
const inner = doubledMatch[1];
|
|
25
29
|
const www = doubledMatch[2];
|
|
26
30
|
const rest = url.slice(doubledMatch[0].length);
|
|
27
|
-
return (
|
|
31
|
+
return (httpsLetterRegex.test(inner) ? "https://" : "http://") + (www ? "www." : "") + rest;
|
|
28
32
|
}
|
|
29
|
-
const singleMatch =
|
|
33
|
+
const singleMatch = singleMalformedRegex.exec(url);
|
|
30
34
|
if (singleMatch) {
|
|
31
35
|
const fullMatch = singleMatch[0];
|
|
32
36
|
const www = singleMatch[1];
|
|
33
37
|
const rest = url.slice(fullMatch.length);
|
|
34
|
-
return (
|
|
38
|
+
return (httpsLetterRegex.test(fullMatch) ? "https://" : "http://") + (www ? "www." : "") + rest;
|
|
35
39
|
}
|
|
36
40
|
return url;
|
|
37
41
|
};
|
|
@@ -52,14 +56,15 @@ const resolveFeedProtocol = (url, protocol = "https") => {
|
|
|
52
56
|
return url;
|
|
53
57
|
};
|
|
54
58
|
const addMissingProtocol = (url, protocol = "https") => {
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
+
const colonIndex = url.indexOf(":");
|
|
60
|
+
if (colonIndex > 0) {
|
|
61
|
+
const beforeColon = url.slice(0, colonIndex);
|
|
62
|
+
if (!beforeColon.includes(".") && !beforeColon.includes("/") && beforeColon !== "localhost") return url;
|
|
63
|
+
}
|
|
59
64
|
if (url.startsWith("//") && !url.startsWith("///")) try {
|
|
60
65
|
const parsed = new URL(`${protocol}:${url}`);
|
|
61
66
|
const hostname = parsed.hostname;
|
|
62
|
-
if (hostname.includes(".") || hostname === "localhost" ||
|
|
67
|
+
if (hostname.includes(".") || hostname === "localhost" || ipv4Regex.test(hostname) || ipv6Regex.test(hostname.replace(/^\[|\]$/g, ""))) return parsed.href;
|
|
63
68
|
return url;
|
|
64
69
|
} catch {
|
|
65
70
|
return url;
|
|
@@ -99,7 +104,7 @@ const decodeAndNormalizeEncoding = (value) => {
|
|
|
99
104
|
return value.replace(/%([0-9A-Fa-f]{2})/g, (_match, hex) => {
|
|
100
105
|
const charCode = Number.parseInt(hex, 16);
|
|
101
106
|
const char = String.fromCharCode(charCode);
|
|
102
|
-
if (
|
|
107
|
+
if (safePathCharsRegex.test(char)) return char;
|
|
103
108
|
return `%${hex.toUpperCase()}`;
|
|
104
109
|
});
|
|
105
110
|
};
|
|
@@ -110,7 +115,7 @@ const normalizeUrl = (url, options = defaultNormalizeOptions) => {
|
|
|
110
115
|
parsed.hostname = parsed.hostname.normalize("NFC");
|
|
111
116
|
parsed.pathname = parsed.pathname.normalize("NFC");
|
|
112
117
|
}
|
|
113
|
-
if (options.convertToPunycode) {
|
|
118
|
+
if (options.convertToPunycode && nonAsciiHostnameRegex.test(parsed.hostname)) {
|
|
114
119
|
const ascii = domainToASCII(parsed.hostname);
|
|
115
120
|
if (ascii) parsed.hostname = ascii;
|
|
116
121
|
}
|
|
@@ -138,11 +143,11 @@ const normalizeUrl = (url, options = defaultNormalizeOptions) => {
|
|
|
138
143
|
parsed.search = "";
|
|
139
144
|
for (const [key, value] of entries) parsed.searchParams.append(key.toLowerCase(), value.toLowerCase());
|
|
140
145
|
}
|
|
141
|
-
if (options.sortQueryParams) parsed.searchParams.sort();
|
|
146
|
+
if (options.sortQueryParams && parsed.search) parsed.searchParams.sort();
|
|
142
147
|
if (options.stripEmptyQuery && parsed.href.endsWith("?")) parsed.search = "";
|
|
143
148
|
let result = parsed.href;
|
|
144
149
|
if (options.stripRootSlash && result === `${parsed.origin}/`) result = parsed.origin;
|
|
145
|
-
if (options.stripProtocol) result = result.replace(
|
|
150
|
+
if (options.stripProtocol) result = result.replace(protocolPrefixRegex, "");
|
|
146
151
|
return result;
|
|
147
152
|
} catch {
|
|
148
153
|
return url;
|
|
@@ -183,11 +188,11 @@ const createSignature = (object, fields) => {
|
|
|
183
188
|
for (const [key, val] of saved) object[key] = val;
|
|
184
189
|
return signature;
|
|
185
190
|
};
|
|
186
|
-
const
|
|
191
|
+
const trailingSlashRegex = /("(?:https?:\/\/|\/)[^"]+)\/([?"])/g;
|
|
187
192
|
const neutralizeUrls = (text, urls) => {
|
|
188
193
|
const escapeHost = (url) => {
|
|
189
194
|
try {
|
|
190
|
-
return new URL("/", url).host.replace(
|
|
195
|
+
return new URL("/", url).host.replace(wwwPrefixRegex, "").replaceAll(".", "\\.");
|
|
191
196
|
} catch {
|
|
192
197
|
return;
|
|
193
198
|
}
|
|
@@ -195,7 +200,7 @@ const neutralizeUrls = (text, urls) => {
|
|
|
195
200
|
const hosts = urls.map(escapeHost).filter(Boolean);
|
|
196
201
|
if (hosts.length === 0) return text;
|
|
197
202
|
const hostPattern = hosts.length === 1 ? hosts[0] : `(?:${hosts.join("|")})`;
|
|
198
|
-
return text.replace(new RegExp(`https?://(?:www\\.)?${hostPattern}(?=[/"]|\\\\")(/)?`, "g"), "/").replace(
|
|
203
|
+
return text.replace(new RegExp(`https?://(?:www\\.)?${hostPattern}(?=[/"]|\\\\")(/)?`, "g"), "/").replace(trailingSlashRegex, "$1$2");
|
|
199
204
|
};
|
|
200
205
|
//#endregion
|
|
201
206
|
export { addMissingProtocol, applyProbes, applyRewrites, createSignature, fixMalformedProtocol, neutralizeUrls, normalizeUrl, resolveFeedProtocol, resolveUrl };
|
package/package.json
CHANGED
|
@@ -55,13 +55,14 @@
|
|
|
55
55
|
},
|
|
56
56
|
"dependencies": {
|
|
57
57
|
"entities": "^7.0.1",
|
|
58
|
-
"feedsmith": "^2.9.
|
|
58
|
+
"feedsmith": "^2.9.1",
|
|
59
|
+
"typescript": "^6.0.2"
|
|
59
60
|
},
|
|
60
61
|
"devDependencies": {
|
|
61
|
-
"@types/bun": "^1.3.
|
|
62
|
-
"kvalita": "1.
|
|
63
|
-
"tsdown": "^0.21.
|
|
64
|
-
"vitepress": "^
|
|
62
|
+
"@types/bun": "^1.3.11",
|
|
63
|
+
"kvalita": "1.12.4",
|
|
64
|
+
"tsdown": "^0.21.7",
|
|
65
|
+
"vitepress": "^2.0.0-alpha.17"
|
|
65
66
|
},
|
|
66
|
-
"version": "1.
|
|
67
|
+
"version": "1.5.0"
|
|
67
68
|
}
|