feedcanon 1.4.1 → 1.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.cjs CHANGED
@@ -91,7 +91,7 @@ async function findCanonical(inputUrl, options) {
91
91
  }
92
92
  }
93
93
  }
94
- if (probes?.length) candidateSourceUrl = await require_utils.applyProbes(candidateSourceUrl, probes, async (candidateUrl) => {
94
+ if (probes && probes?.length > 0) candidateSourceUrl = await require_utils.applyProbes(candidateSourceUrl, probes, async (candidateUrl) => {
95
95
  const response = await fetchAndCompare(candidateUrl);
96
96
  if (response) {
97
97
  onMatch?.({
package/dist/index.js CHANGED
@@ -91,7 +91,7 @@ async function findCanonical(inputUrl, options) {
91
91
  }
92
92
  }
93
93
  }
94
- if (probes?.length) candidateSourceUrl = await applyProbes(candidateSourceUrl, probes, async (candidateUrl) => {
94
+ if (probes && probes?.length > 0) candidateSourceUrl = await applyProbes(candidateSourceUrl, probes, async (candidateUrl) => {
95
95
  const response = await fetchAndCompare(candidateUrl);
96
96
  if (response) {
97
97
  onMatch?.({
@@ -1,4 +1,8 @@
1
1
  //#region src/probes/wordpress.ts
2
+ const commentsFeedPathRegex = /\/comments\/feed(\/|$)/;
3
+ const feedPathRegex = /\/feed(\/|$)/;
4
+ const trailingSlashRegex = /\/$/;
5
+ const optionalTrailingSlashRegex = /\/?$/;
2
6
  const feedTypes = [
3
7
  "atom",
4
8
  "rss2",
@@ -18,18 +22,18 @@ const wordpressProbe = {
18
22
  const candidates = [];
19
23
  const isComment = feed.startsWith("comments-");
20
24
  const type = isComment ? feed.slice(9) : feed;
21
- if ((isComment ? /\/comments\/feed(\/|$)/ : /\/feed(\/|$)/).test(url.pathname)) {
25
+ if ((isComment ? commentsFeedPathRegex : feedPathRegex).test(url.pathname)) {
22
26
  const withoutSlash = new URL(url);
23
- withoutSlash.pathname = url.pathname.replace(/\/$/, "");
27
+ withoutSlash.pathname = url.pathname.replace(trailingSlashRegex, "");
24
28
  withoutSlash.searchParams.delete("feed");
25
29
  candidates.push(withoutSlash.href);
26
30
  const withSlash = new URL(url);
27
- withSlash.pathname = url.pathname.replace(/\/?$/, "/");
31
+ withSlash.pathname = url.pathname.replace(optionalTrailingSlashRegex, "/");
28
32
  withSlash.searchParams.delete("feed");
29
33
  candidates.push(withSlash.href);
30
34
  return candidates;
31
35
  }
32
- const basePath = url.pathname.replace(/\/$/, "");
36
+ const basePath = url.pathname.replace(trailingSlashRegex, "");
33
37
  const feedSegment = type === "atom" ? "/feed/atom" : "/feed";
34
38
  const feedPath = isComment ? `/comments${feedSegment}` : feedSegment;
35
39
  const primary = new URL(url);
@@ -1,4 +1,8 @@
1
1
  //#region src/probes/wordpress.ts
2
+ const commentsFeedPathRegex = /\/comments\/feed(\/|$)/;
3
+ const feedPathRegex = /\/feed(\/|$)/;
4
+ const trailingSlashRegex = /\/$/;
5
+ const optionalTrailingSlashRegex = /\/?$/;
2
6
  const feedTypes = [
3
7
  "atom",
4
8
  "rss2",
@@ -18,18 +22,18 @@ const wordpressProbe = {
18
22
  const candidates = [];
19
23
  const isComment = feed.startsWith("comments-");
20
24
  const type = isComment ? feed.slice(9) : feed;
21
- if ((isComment ? /\/comments\/feed(\/|$)/ : /\/feed(\/|$)/).test(url.pathname)) {
25
+ if ((isComment ? commentsFeedPathRegex : feedPathRegex).test(url.pathname)) {
22
26
  const withoutSlash = new URL(url);
23
- withoutSlash.pathname = url.pathname.replace(/\/$/, "");
27
+ withoutSlash.pathname = url.pathname.replace(trailingSlashRegex, "");
24
28
  withoutSlash.searchParams.delete("feed");
25
29
  candidates.push(withoutSlash.href);
26
30
  const withSlash = new URL(url);
27
- withSlash.pathname = url.pathname.replace(/\/?$/, "/");
31
+ withSlash.pathname = url.pathname.replace(optionalTrailingSlashRegex, "/");
28
32
  withSlash.searchParams.delete("feed");
29
33
  candidates.push(withSlash.href);
30
34
  return candidates;
31
35
  }
32
- const basePath = url.pathname.replace(/\/$/, "");
36
+ const basePath = url.pathname.replace(trailingSlashRegex, "");
33
37
  const feedSegment = type === "atom" ? "/feed/atom" : "/feed";
34
38
  const feedPath = isComment ? `/comments${feedSegment}` : feedSegment;
35
39
  const primary = new URL(url);
@@ -1,19 +1,19 @@
1
1
  const require_utils = require("../utils.cjs");
2
2
  //#region src/rewrites/blogger.ts
3
- const bloggerPattern = /^(www\.|beta\.)?blogger\.com$/;
4
- const blogspotPattern = /\.blogspot\.[a-z]{2,3}(\.[a-z]{2})?$/i;
3
+ const bloggerRegex = /^(www\.|beta\.)?blogger\.com$/;
4
+ const blogspotRegex = /\.blogspot\.[a-z]{2,3}(\.[a-z]{2})?$/i;
5
5
  const bloggerRewrite = {
6
6
  match: (url) => {
7
- return bloggerPattern.test(url.hostname) || blogspotPattern.test(url.hostname);
7
+ return bloggerRegex.test(url.hostname) || blogspotRegex.test(url.hostname);
8
8
  },
9
9
  rewrite: (url) => {
10
10
  const rewritten = new URL(url);
11
- const isBlogger = bloggerPattern.test(rewritten.hostname);
12
- const isBlogspot = blogspotPattern.test(rewritten.hostname);
11
+ const isBlogger = bloggerRegex.test(rewritten.hostname);
12
+ const isBlogspot = blogspotRegex.test(rewritten.hostname);
13
13
  rewritten.protocol = "https:";
14
14
  if (isBlogger) rewritten.hostname = "www.blogger.com";
15
15
  if (isBlogspot) {
16
- rewritten.hostname = rewritten.hostname.replace(blogspotPattern, ".blogspot.com");
16
+ rewritten.hostname = rewritten.hostname.replace(blogspotRegex, ".blogspot.com");
17
17
  if (rewritten.pathname === "/atom.xml") rewritten.pathname = "/feeds/posts/default";
18
18
  else if (rewritten.pathname === "/rss.xml") {
19
19
  rewritten.pathname = "/feeds/posts/default";
@@ -1,19 +1,19 @@
1
1
  import { normalizeUrl } from "../utils.js";
2
2
  //#region src/rewrites/blogger.ts
3
- const bloggerPattern = /^(www\.|beta\.)?blogger\.com$/;
4
- const blogspotPattern = /\.blogspot\.[a-z]{2,3}(\.[a-z]{2})?$/i;
3
+ const bloggerRegex = /^(www\.|beta\.)?blogger\.com$/;
4
+ const blogspotRegex = /\.blogspot\.[a-z]{2,3}(\.[a-z]{2})?$/i;
5
5
  const bloggerRewrite = {
6
6
  match: (url) => {
7
- return bloggerPattern.test(url.hostname) || blogspotPattern.test(url.hostname);
7
+ return bloggerRegex.test(url.hostname) || blogspotRegex.test(url.hostname);
8
8
  },
9
9
  rewrite: (url) => {
10
10
  const rewritten = new URL(url);
11
- const isBlogger = bloggerPattern.test(rewritten.hostname);
12
- const isBlogspot = blogspotPattern.test(rewritten.hostname);
11
+ const isBlogger = bloggerRegex.test(rewritten.hostname);
12
+ const isBlogspot = blogspotRegex.test(rewritten.hostname);
13
13
  rewritten.protocol = "https:";
14
14
  if (isBlogger) rewritten.hostname = "www.blogger.com";
15
15
  if (isBlogspot) {
16
- rewritten.hostname = rewritten.hostname.replace(blogspotPattern, ".blogspot.com");
16
+ rewritten.hostname = rewritten.hostname.replace(blogspotRegex, ".blogspot.com");
17
17
  if (rewritten.pathname === "/atom.xml") rewritten.pathname = "/feeds/posts/default";
18
18
  else if (rewritten.pathname === "/rss.xml") {
19
19
  rewritten.pathname = "/feeds/posts/default";
package/dist/types.d.cts CHANGED
@@ -1,9 +1,10 @@
1
- import * as feedsmith from "feedsmith";
1
+ import * as _$feedsmith from "feedsmith";
2
2
 
3
3
  //#region src/types.d.ts
4
- type DefaultParserResult = ReturnType<typeof feedsmith.parseFeed>;
4
+ type MaybePromise<T> = T | Promise<T>;
5
+ type DefaultParserResult = ReturnType<typeof _$feedsmith.parseFeed>;
5
6
  type ParserAdapter<T> = {
6
- parse: (body: string) => Promise<T | undefined> | T | undefined;
7
+ parse: (body: string) => MaybePromise<T | undefined>;
7
8
  getSelfUrl: (parsed: T) => string | undefined;
8
9
  getSignature: (parsed: T, url: string) => string;
9
10
  };
@@ -62,13 +63,13 @@ type FetchFnOptions = {
62
63
  method?: 'GET' | 'HEAD';
63
64
  headers?: Record<string, string>;
64
65
  };
65
- type ExistsFn<T = unknown> = (url: string) => Promise<T | undefined>;
66
+ type ExistsFn<T = unknown> = (url: string) => MaybePromise<T | undefined>;
66
67
  type FetchFnResponse = {
67
68
  headers: Headers;
68
69
  body: string;
69
70
  url: string;
70
71
  status: number;
71
72
  };
72
- type FetchFn<TResponse extends FetchFnResponse = FetchFnResponse> = (url: string, options?: FetchFnOptions) => Promise<TResponse>;
73
+ type FetchFn<TResponse extends FetchFnResponse = FetchFnResponse> = (url: string, options?: FetchFnOptions) => MaybePromise<TResponse>;
73
74
  //#endregion
74
75
  export { DefaultParserResult, ExistsFn, FetchFn, FetchFnOptions, FetchFnResponse, FindCanonicalOptions, NormalizeOptions, OnExistsFn, OnFetchFn, OnMatchFn, ParserAdapter, Probe, Rewrite, Tier };
package/dist/types.d.ts CHANGED
@@ -1,9 +1,10 @@
1
- import * as feedsmith from "feedsmith";
1
+ import * as _$feedsmith from "feedsmith";
2
2
 
3
3
  //#region src/types.d.ts
4
- type DefaultParserResult = ReturnType<typeof feedsmith.parseFeed>;
4
+ type MaybePromise<T> = T | Promise<T>;
5
+ type DefaultParserResult = ReturnType<typeof _$feedsmith.parseFeed>;
5
6
  type ParserAdapter<T> = {
6
- parse: (body: string) => Promise<T | undefined> | T | undefined;
7
+ parse: (body: string) => MaybePromise<T | undefined>;
7
8
  getSelfUrl: (parsed: T) => string | undefined;
8
9
  getSignature: (parsed: T, url: string) => string;
9
10
  };
@@ -62,13 +63,13 @@ type FetchFnOptions = {
62
63
  method?: 'GET' | 'HEAD';
63
64
  headers?: Record<string, string>;
64
65
  };
65
- type ExistsFn<T = unknown> = (url: string) => Promise<T | undefined>;
66
+ type ExistsFn<T = unknown> = (url: string) => MaybePromise<T | undefined>;
66
67
  type FetchFnResponse = {
67
68
  headers: Headers;
68
69
  body: string;
69
70
  url: string;
70
71
  status: number;
71
72
  };
72
- type FetchFn<TResponse extends FetchFnResponse = FetchFnResponse> = (url: string, options?: FetchFnOptions) => Promise<TResponse>;
73
+ type FetchFn<TResponse extends FetchFnResponse = FetchFnResponse> = (url: string, options?: FetchFnOptions) => MaybePromise<TResponse>;
73
74
  //#endregion
74
75
  export { DefaultParserResult, ExistsFn, FetchFn, FetchFnOptions, FetchFnResponse, FindCanonicalOptions, NormalizeOptions, OnExistsFn, OnFetchFn, OnMatchFn, ParserAdapter, Probe, Rewrite, Tier };
package/dist/utils.cjs CHANGED
@@ -11,27 +11,31 @@ const getStrippedParamsSet = (params) => {
11
11
  }
12
12
  return cached;
13
13
  };
14
- const ipv4Pattern = /^\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}$/;
15
- const ipv6Pattern = /^([0-9a-f]{0,4}:){2,7}[0-9a-f]{0,4}$/i;
16
- const safePathChars = /[a-zA-Z0-9._~!$&'()*+,;=:@-]/;
17
- const validUrlPattern = /^https?:\/\/(?:www\.|[a-vx-z0-9])/i;
18
- const doubledProtocolPattern = /^\/?[htps]{2,7}[:\s=.\\/]+([htps]{2,7})[:\s=.\\/]+[.,:/]*(www[./]+)?/i;
19
- const singleMalformedPattern = /^\/?(?:h[htps():]{1,10}|t{1,2}ps?)[:\s=.\\/]+[.,:/]*(www[./]+)?/i;
14
+ const ipv4Regex = /^\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}$/;
15
+ const ipv6Regex = /^([0-9a-f]{0,4}:){2,7}[0-9a-f]{0,4}$/i;
16
+ const safePathCharsRegex = /[a-zA-Z0-9._~!$&'()*+,;=:@-]/;
17
+ const httpsLetterRegex = /s/i;
18
+ const nonAsciiHostnameRegex = /[^a-z0-9.:-]/;
19
+ const protocolPrefixRegex = /^https?:\/\//;
20
+ const wwwPrefixRegex = /^www\./;
21
+ const validUrlRegex = /^https?:\/\/(?:www\.|[a-vx-z0-9])/i;
22
+ const doubledProtocolRegex = /^\/?[htps]{2,7}[:\s=.\\/]+([htps]{2,7})[:\s=.\\/]+[.,:/]*(www[./]+)?/i;
23
+ const singleMalformedRegex = /^\/?(?:h[htps():]{1,10}|t{1,2}ps?)[:\s=.\\/]+[.,:/]*(www[./]+)?/i;
20
24
  const fixMalformedProtocol = (url) => {
21
- if (validUrlPattern.test(url) && !doubledProtocolPattern.test(url)) return url;
22
- const doubledMatch = doubledProtocolPattern.exec(url);
25
+ if (validUrlRegex.test(url) && !doubledProtocolRegex.test(url)) return url;
26
+ const doubledMatch = doubledProtocolRegex.exec(url);
23
27
  if (doubledMatch) {
24
28
  const inner = doubledMatch[1];
25
29
  const www = doubledMatch[2];
26
30
  const rest = url.slice(doubledMatch[0].length);
27
- return (/s/i.test(inner) ? "https://" : "http://") + (www ? "www." : "") + rest;
31
+ return (httpsLetterRegex.test(inner) ? "https://" : "http://") + (www ? "www." : "") + rest;
28
32
  }
29
- const singleMatch = singleMalformedPattern.exec(url);
33
+ const singleMatch = singleMalformedRegex.exec(url);
30
34
  if (singleMatch) {
31
35
  const fullMatch = singleMatch[0];
32
36
  const www = singleMatch[1];
33
37
  const rest = url.slice(fullMatch.length);
34
- return (/s/i.test(fullMatch) ? "https://" : "http://") + (www ? "www." : "") + rest;
38
+ return (httpsLetterRegex.test(fullMatch) ? "https://" : "http://") + (www ? "www." : "") + rest;
35
39
  }
36
40
  return url;
37
41
  };
@@ -52,14 +56,15 @@ const resolveFeedProtocol = (url, protocol = "https") => {
52
56
  return url;
53
57
  };
54
58
  const addMissingProtocol = (url, protocol = "https") => {
55
- try {
56
- const parsed = new URL(url);
57
- if (!parsed.protocol.includes(".") && parsed.protocol !== "localhost:") return url;
58
- } catch {}
59
+ const colonIndex = url.indexOf(":");
60
+ if (colonIndex > 0) {
61
+ const beforeColon = url.slice(0, colonIndex);
62
+ if (!beforeColon.includes(".") && !beforeColon.includes("/") && beforeColon !== "localhost") return url;
63
+ }
59
64
  if (url.startsWith("//") && !url.startsWith("///")) try {
60
65
  const parsed = new URL(`${protocol}:${url}`);
61
66
  const hostname = parsed.hostname;
62
- if (hostname.includes(".") || hostname === "localhost" || ipv4Pattern.test(hostname) || ipv6Pattern.test(hostname.replace(/^\[|\]$/g, ""))) return parsed.href;
67
+ if (hostname.includes(".") || hostname === "localhost" || ipv4Regex.test(hostname) || ipv6Regex.test(hostname.replace(/^\[|\]$/g, ""))) return parsed.href;
63
68
  return url;
64
69
  } catch {
65
70
  return url;
@@ -99,7 +104,7 @@ const decodeAndNormalizeEncoding = (value) => {
99
104
  return value.replace(/%([0-9A-Fa-f]{2})/g, (_match, hex) => {
100
105
  const charCode = Number.parseInt(hex, 16);
101
106
  const char = String.fromCharCode(charCode);
102
- if (safePathChars.test(char)) return char;
107
+ if (safePathCharsRegex.test(char)) return char;
103
108
  return `%${hex.toUpperCase()}`;
104
109
  });
105
110
  };
@@ -110,7 +115,7 @@ const normalizeUrl = (url, options = require_defaults.defaultNormalizeOptions) =
110
115
  parsed.hostname = parsed.hostname.normalize("NFC");
111
116
  parsed.pathname = parsed.pathname.normalize("NFC");
112
117
  }
113
- if (options.convertToPunycode) {
118
+ if (options.convertToPunycode && nonAsciiHostnameRegex.test(parsed.hostname)) {
114
119
  const ascii = (0, node_url.domainToASCII)(parsed.hostname);
115
120
  if (ascii) parsed.hostname = ascii;
116
121
  }
@@ -138,11 +143,11 @@ const normalizeUrl = (url, options = require_defaults.defaultNormalizeOptions) =
138
143
  parsed.search = "";
139
144
  for (const [key, value] of entries) parsed.searchParams.append(key.toLowerCase(), value.toLowerCase());
140
145
  }
141
- if (options.sortQueryParams) parsed.searchParams.sort();
146
+ if (options.sortQueryParams && parsed.search) parsed.searchParams.sort();
142
147
  if (options.stripEmptyQuery && parsed.href.endsWith("?")) parsed.search = "";
143
148
  let result = parsed.href;
144
149
  if (options.stripRootSlash && result === `${parsed.origin}/`) result = parsed.origin;
145
- if (options.stripProtocol) result = result.replace(/^https?:\/\//, "");
150
+ if (options.stripProtocol) result = result.replace(protocolPrefixRegex, "");
146
151
  return result;
147
152
  } catch {
148
153
  return url;
@@ -183,11 +188,11 @@ const createSignature = (object, fields) => {
183
188
  for (const [key, val] of saved) object[key] = val;
184
189
  return signature;
185
190
  };
186
- const trailingSlashPattern = /("(?:https?:\/\/|\/)[^"]+)\/([?"])/g;
191
+ const trailingSlashRegex = /("(?:https?:\/\/|\/)[^"]+)\/([?"])/g;
187
192
  const neutralizeUrls = (text, urls) => {
188
193
  const escapeHost = (url) => {
189
194
  try {
190
- return new URL("/", url).host.replace(/^www\./, "").replaceAll(".", "\\.");
195
+ return new URL("/", url).host.replace(wwwPrefixRegex, "").replaceAll(".", "\\.");
191
196
  } catch {
192
197
  return;
193
198
  }
@@ -195,7 +200,7 @@ const neutralizeUrls = (text, urls) => {
195
200
  const hosts = urls.map(escapeHost).filter(Boolean);
196
201
  if (hosts.length === 0) return text;
197
202
  const hostPattern = hosts.length === 1 ? hosts[0] : `(?:${hosts.join("|")})`;
198
- return text.replace(new RegExp(`https?://(?:www\\.)?${hostPattern}(?=[/"]|\\\\")(/)?`, "g"), "/").replace(trailingSlashPattern, "$1$2");
203
+ return text.replace(new RegExp(`https?://(?:www\\.)?${hostPattern}(?=[/"]|\\\\")(/)?`, "g"), "/").replace(trailingSlashRegex, "$1$2");
199
204
  };
200
205
  //#endregion
201
206
  exports.addMissingProtocol = addMissingProtocol;
package/dist/utils.js CHANGED
@@ -11,27 +11,31 @@ const getStrippedParamsSet = (params) => {
11
11
  }
12
12
  return cached;
13
13
  };
14
- const ipv4Pattern = /^\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}$/;
15
- const ipv6Pattern = /^([0-9a-f]{0,4}:){2,7}[0-9a-f]{0,4}$/i;
16
- const safePathChars = /[a-zA-Z0-9._~!$&'()*+,;=:@-]/;
17
- const validUrlPattern = /^https?:\/\/(?:www\.|[a-vx-z0-9])/i;
18
- const doubledProtocolPattern = /^\/?[htps]{2,7}[:\s=.\\/]+([htps]{2,7})[:\s=.\\/]+[.,:/]*(www[./]+)?/i;
19
- const singleMalformedPattern = /^\/?(?:h[htps():]{1,10}|t{1,2}ps?)[:\s=.\\/]+[.,:/]*(www[./]+)?/i;
14
+ const ipv4Regex = /^\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}$/;
15
+ const ipv6Regex = /^([0-9a-f]{0,4}:){2,7}[0-9a-f]{0,4}$/i;
16
+ const safePathCharsRegex = /[a-zA-Z0-9._~!$&'()*+,;=:@-]/;
17
+ const httpsLetterRegex = /s/i;
18
+ const nonAsciiHostnameRegex = /[^a-z0-9.:-]/;
19
+ const protocolPrefixRegex = /^https?:\/\//;
20
+ const wwwPrefixRegex = /^www\./;
21
+ const validUrlRegex = /^https?:\/\/(?:www\.|[a-vx-z0-9])/i;
22
+ const doubledProtocolRegex = /^\/?[htps]{2,7}[:\s=.\\/]+([htps]{2,7})[:\s=.\\/]+[.,:/]*(www[./]+)?/i;
23
+ const singleMalformedRegex = /^\/?(?:h[htps():]{1,10}|t{1,2}ps?)[:\s=.\\/]+[.,:/]*(www[./]+)?/i;
20
24
  const fixMalformedProtocol = (url) => {
21
- if (validUrlPattern.test(url) && !doubledProtocolPattern.test(url)) return url;
22
- const doubledMatch = doubledProtocolPattern.exec(url);
25
+ if (validUrlRegex.test(url) && !doubledProtocolRegex.test(url)) return url;
26
+ const doubledMatch = doubledProtocolRegex.exec(url);
23
27
  if (doubledMatch) {
24
28
  const inner = doubledMatch[1];
25
29
  const www = doubledMatch[2];
26
30
  const rest = url.slice(doubledMatch[0].length);
27
- return (/s/i.test(inner) ? "https://" : "http://") + (www ? "www." : "") + rest;
31
+ return (httpsLetterRegex.test(inner) ? "https://" : "http://") + (www ? "www." : "") + rest;
28
32
  }
29
- const singleMatch = singleMalformedPattern.exec(url);
33
+ const singleMatch = singleMalformedRegex.exec(url);
30
34
  if (singleMatch) {
31
35
  const fullMatch = singleMatch[0];
32
36
  const www = singleMatch[1];
33
37
  const rest = url.slice(fullMatch.length);
34
- return (/s/i.test(fullMatch) ? "https://" : "http://") + (www ? "www." : "") + rest;
38
+ return (httpsLetterRegex.test(fullMatch) ? "https://" : "http://") + (www ? "www." : "") + rest;
35
39
  }
36
40
  return url;
37
41
  };
@@ -52,14 +56,15 @@ const resolveFeedProtocol = (url, protocol = "https") => {
52
56
  return url;
53
57
  };
54
58
  const addMissingProtocol = (url, protocol = "https") => {
55
- try {
56
- const parsed = new URL(url);
57
- if (!parsed.protocol.includes(".") && parsed.protocol !== "localhost:") return url;
58
- } catch {}
59
+ const colonIndex = url.indexOf(":");
60
+ if (colonIndex > 0) {
61
+ const beforeColon = url.slice(0, colonIndex);
62
+ if (!beforeColon.includes(".") && !beforeColon.includes("/") && beforeColon !== "localhost") return url;
63
+ }
59
64
  if (url.startsWith("//") && !url.startsWith("///")) try {
60
65
  const parsed = new URL(`${protocol}:${url}`);
61
66
  const hostname = parsed.hostname;
62
- if (hostname.includes(".") || hostname === "localhost" || ipv4Pattern.test(hostname) || ipv6Pattern.test(hostname.replace(/^\[|\]$/g, ""))) return parsed.href;
67
+ if (hostname.includes(".") || hostname === "localhost" || ipv4Regex.test(hostname) || ipv6Regex.test(hostname.replace(/^\[|\]$/g, ""))) return parsed.href;
63
68
  return url;
64
69
  } catch {
65
70
  return url;
@@ -99,7 +104,7 @@ const decodeAndNormalizeEncoding = (value) => {
99
104
  return value.replace(/%([0-9A-Fa-f]{2})/g, (_match, hex) => {
100
105
  const charCode = Number.parseInt(hex, 16);
101
106
  const char = String.fromCharCode(charCode);
102
- if (safePathChars.test(char)) return char;
107
+ if (safePathCharsRegex.test(char)) return char;
103
108
  return `%${hex.toUpperCase()}`;
104
109
  });
105
110
  };
@@ -110,7 +115,7 @@ const normalizeUrl = (url, options = defaultNormalizeOptions) => {
110
115
  parsed.hostname = parsed.hostname.normalize("NFC");
111
116
  parsed.pathname = parsed.pathname.normalize("NFC");
112
117
  }
113
- if (options.convertToPunycode) {
118
+ if (options.convertToPunycode && nonAsciiHostnameRegex.test(parsed.hostname)) {
114
119
  const ascii = domainToASCII(parsed.hostname);
115
120
  if (ascii) parsed.hostname = ascii;
116
121
  }
@@ -138,11 +143,11 @@ const normalizeUrl = (url, options = defaultNormalizeOptions) => {
138
143
  parsed.search = "";
139
144
  for (const [key, value] of entries) parsed.searchParams.append(key.toLowerCase(), value.toLowerCase());
140
145
  }
141
- if (options.sortQueryParams) parsed.searchParams.sort();
146
+ if (options.sortQueryParams && parsed.search) parsed.searchParams.sort();
142
147
  if (options.stripEmptyQuery && parsed.href.endsWith("?")) parsed.search = "";
143
148
  let result = parsed.href;
144
149
  if (options.stripRootSlash && result === `${parsed.origin}/`) result = parsed.origin;
145
- if (options.stripProtocol) result = result.replace(/^https?:\/\//, "");
150
+ if (options.stripProtocol) result = result.replace(protocolPrefixRegex, "");
146
151
  return result;
147
152
  } catch {
148
153
  return url;
@@ -183,11 +188,11 @@ const createSignature = (object, fields) => {
183
188
  for (const [key, val] of saved) object[key] = val;
184
189
  return signature;
185
190
  };
186
- const trailingSlashPattern = /("(?:https?:\/\/|\/)[^"]+)\/([?"])/g;
191
+ const trailingSlashRegex = /("(?:https?:\/\/|\/)[^"]+)\/([?"])/g;
187
192
  const neutralizeUrls = (text, urls) => {
188
193
  const escapeHost = (url) => {
189
194
  try {
190
- return new URL("/", url).host.replace(/^www\./, "").replaceAll(".", "\\.");
195
+ return new URL("/", url).host.replace(wwwPrefixRegex, "").replaceAll(".", "\\.");
191
196
  } catch {
192
197
  return;
193
198
  }
@@ -195,7 +200,7 @@ const neutralizeUrls = (text, urls) => {
195
200
  const hosts = urls.map(escapeHost).filter(Boolean);
196
201
  if (hosts.length === 0) return text;
197
202
  const hostPattern = hosts.length === 1 ? hosts[0] : `(?:${hosts.join("|")})`;
198
- return text.replace(new RegExp(`https?://(?:www\\.)?${hostPattern}(?=[/"]|\\\\")(/)?`, "g"), "/").replace(trailingSlashPattern, "$1$2");
203
+ return text.replace(new RegExp(`https?://(?:www\\.)?${hostPattern}(?=[/"]|\\\\")(/)?`, "g"), "/").replace(trailingSlashRegex, "$1$2");
199
204
  };
200
205
  //#endregion
201
206
  export { addMissingProtocol, applyProbes, applyRewrites, createSignature, fixMalformedProtocol, neutralizeUrls, normalizeUrl, resolveFeedProtocol, resolveUrl };
package/package.json CHANGED
@@ -55,13 +55,14 @@
55
55
  },
56
56
  "dependencies": {
57
57
  "entities": "^7.0.1",
58
- "feedsmith": "^2.9.0"
58
+ "feedsmith": "^2.9.1",
59
+ "typescript": "^6.0.2"
59
60
  },
60
61
  "devDependencies": {
61
- "@types/bun": "^1.3.10",
62
- "kvalita": "1.10.0",
63
- "tsdown": "^0.21.0",
64
- "vitepress": "^1.6.4"
62
+ "@types/bun": "^1.3.11",
63
+ "kvalita": "1.12.4",
64
+ "tsdown": "^0.21.7",
65
+ "vitepress": "^2.0.0-alpha.17"
65
66
  },
66
- "version": "1.4.1"
67
+ "version": "1.5.0"
67
68
  }