feedcanon 1.2.1 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -41,8 +41,8 @@ This is a simplified flow. For complete details, see [How It Works](https://feed
41
41
  1. Fetch the input URL and parse the feed to establish reference content.
42
42
  2. Extract the feed's declared self URL (if present).
43
43
  3. Validate the self URL by fetching and comparing content.
44
- 4. Generate URL variants ordered from cleanest to least clean.
45
- 5. Test variants in order—the first one serving identical content wins.
44
+ 4. Generate URL candidates ordered from cleanest to least clean.
45
+ 5. Test candidates in order—the first one serving identical content wins.
46
46
  6. Upgrade HTTP to HTTPS if both serve identical content.
47
47
 
48
48
  ### Customization
@@ -53,7 +53,7 @@ Feedcanon is designed to be flexible. Every major component can be replaced or e
53
53
  - **Database lookup** — use `existsFn` to check if a URL already exists in your database.
54
54
  - **Custom fetch** — use your own HTTP client (Axios, Got, Ky, etc.)
55
55
  - **Custom parser** — bring your own parser (Feedsmith by default).
56
- - **Custom tiers** — define your own URL normalization variants.
56
+ - **Custom tiers** — define your own URL normalization tiers.
57
57
  - **Custom platforms** — add handlers to normalize domain aliases (like FeedBurner).
58
58
 
59
59
  ## Quick Start
package/dist/defaults.cjs CHANGED
@@ -1,8 +1,7 @@
1
- const require_feedburner = require('./rewrites/feedburner.cjs');
1
+ const require_utils = require('./utils.cjs');
2
2
  let feedsmith = require("feedsmith");
3
3
 
4
4
  //#region src/defaults.ts
5
- const defaultRewrites = [require_feedburner.feedburnerRewrite];
6
5
  const defaultStrippedParams = [
7
6
  "utm_source",
8
7
  "utm_medium",
@@ -185,15 +184,6 @@ const defaultFetch = async (url, options) => {
185
184
  status: response.status
186
185
  };
187
186
  };
188
- const trailingSlashPattern = /("(?:https?:\/\/|\/)[^"]+)\/([?"])/g;
189
- const neutralizeFeedUrls = (signature, url) => {
190
- try {
191
- const escapedHost = new URL("/", url).host.replace(/^www\./, "").replaceAll(".", "\\.");
192
- return signature.replace(new RegExp(`https?://(?:www\\.)?${escapedHost}(?=[/"])(/)?`, "g"), "/").replace(trailingSlashPattern, "$1$2");
193
- } catch {
194
- return signature;
195
- }
196
- };
197
187
  const retrieveSelfLink = (parsed) => {
198
188
  switch (parsed.format) {
199
189
  case "atom": return parsed.feed.links?.find((link) => link.rel === "self");
@@ -211,49 +201,48 @@ const defaultParser = {
211
201
  return parsed.format === "json" ? parsed.feed.feed_url : retrieveSelfLink(parsed)?.href;
212
202
  },
213
203
  getSignature: (parsed, url) => {
214
- if (parsed.format === "json") {
215
- const originalSelfUrl = parsed.feed.feed_url;
216
- parsed.feed.feed_url = void 0;
217
- const signature$1 = JSON.stringify(parsed.feed);
218
- parsed.feed.feed_url = originalSelfUrl;
219
- return neutralizeFeedUrls(signature$1, url);
220
- }
221
204
  let signature;
222
- let originalBuildDate;
223
- let originalPubDate;
224
- let originalLink;
225
- if (parsed.format === "rss") {
226
- originalBuildDate = parsed.feed.lastBuildDate;
227
- originalPubDate = parsed.feed.pubDate;
228
- originalLink = parsed.feed.link;
229
- parsed.feed.lastBuildDate = void 0;
230
- parsed.feed.pubDate = void 0;
231
- parsed.feed.link = void 0;
232
- } else if (parsed.format === "rdf") {
233
- originalLink = parsed.feed.link;
234
- parsed.feed.link = void 0;
235
- } else if (parsed.format === "atom") {
236
- originalBuildDate = parsed.feed.updated;
237
- parsed.feed.updated = void 0;
238
- }
239
- const link = retrieveSelfLink(parsed);
240
- if (!link) signature = JSON.stringify(parsed.feed);
241
- else {
242
- const originalSelfUrl = link.href;
243
- link.href = void 0;
244
- signature = JSON.stringify(parsed.feed);
245
- link.href = originalSelfUrl;
205
+ let contentUrl;
206
+ if (parsed.format === "json") {
207
+ contentUrl = parsed.feed.home_page_url;
208
+ signature = require_utils.createSignature(parsed.feed, ["feed_url"]);
209
+ } else {
210
+ const selfLink = retrieveSelfLink(parsed);
211
+ const savedSelfHref = selfLink?.href;
212
+ if (selfLink) selfLink.href = void 0;
213
+ if (parsed.format === "rss") {
214
+ contentUrl = parsed.feed.link;
215
+ signature = require_utils.createSignature(parsed.feed, [
216
+ "lastBuildDate",
217
+ "pubDate",
218
+ "link",
219
+ "generator"
220
+ ]);
221
+ } else if (parsed.format === "rdf") {
222
+ contentUrl = parsed.feed.link;
223
+ signature = require_utils.createSignature(parsed.feed, ["link"]);
224
+ } else signature = require_utils.createSignature(parsed.feed, ["updated", "generator"]);
225
+ if (selfLink) selfLink.href = savedSelfHref;
246
226
  }
247
- if (parsed.format === "rss") {
248
- parsed.feed.lastBuildDate = originalBuildDate;
249
- parsed.feed.pubDate = originalPubDate;
250
- parsed.feed.link = originalLink;
251
- } else if (parsed.format === "rdf") parsed.feed.link = originalLink;
252
- else if (parsed.format === "atom") parsed.feed.updated = originalBuildDate;
253
- return neutralizeFeedUrls(signature, url);
227
+ return require_utils.neutralizeUrls(signature, contentUrl ? [url, contentUrl] : [url]);
254
228
  }
255
229
  };
256
230
  const defaultTiers = [
231
+ {
232
+ stripProtocol: false,
233
+ stripAuthentication: false,
234
+ stripWww: true,
235
+ stripTrailingSlash: true,
236
+ stripRootSlash: true,
237
+ collapseSlashes: true,
238
+ stripHash: true,
239
+ sortQueryParams: false,
240
+ stripQuery: true,
241
+ stripEmptyQuery: true,
242
+ normalizeEncoding: true,
243
+ normalizeUnicode: true,
244
+ convertToPunycode: true
245
+ },
257
246
  {
258
247
  stripProtocol: false,
259
248
  stripAuthentication: false,
@@ -305,7 +294,5 @@ const defaultTiers = [
305
294
  exports.defaultFetch = defaultFetch;
306
295
  exports.defaultNormalizeOptions = defaultNormalizeOptions;
307
296
  exports.defaultParser = defaultParser;
308
- exports.defaultRewrites = defaultRewrites;
309
297
  exports.defaultStrippedParams = defaultStrippedParams;
310
- exports.defaultTiers = defaultTiers;
311
- exports.neutralizeFeedUrls = neutralizeFeedUrls;
298
+ exports.defaultTiers = defaultTiers;
@@ -1,12 +1,10 @@
1
- import { DefaultParserResult, FetchFn, NormalizeOptions, ParserAdapter, Rewrite, Tier } from "./types.cjs";
1
+ import { DefaultParserResult, FetchFn, NormalizeOptions, ParserAdapter, Tier } from "./types.cjs";
2
2
 
3
3
  //#region src/defaults.d.ts
4
- declare const defaultRewrites: Array<Rewrite>;
5
4
  declare const defaultStrippedParams: string[];
6
5
  declare const defaultNormalizeOptions: NormalizeOptions;
7
6
  declare const defaultFetch: FetchFn;
8
- declare const neutralizeFeedUrls: (signature: string, url: string) => string;
9
7
  declare const defaultParser: ParserAdapter<DefaultParserResult>;
10
8
  declare const defaultTiers: Array<Tier>;
11
9
  //#endregion
12
- export { defaultFetch, defaultNormalizeOptions, defaultParser, defaultRewrites, defaultStrippedParams, defaultTiers, neutralizeFeedUrls };
10
+ export { defaultFetch, defaultNormalizeOptions, defaultParser, defaultStrippedParams, defaultTiers };
@@ -1,12 +1,10 @@
1
- import { DefaultParserResult, FetchFn, NormalizeOptions, ParserAdapter, Rewrite, Tier } from "./types.js";
1
+ import { DefaultParserResult, FetchFn, NormalizeOptions, ParserAdapter, Tier } from "./types.js";
2
2
 
3
3
  //#region src/defaults.d.ts
4
- declare const defaultRewrites: Array<Rewrite>;
5
4
  declare const defaultStrippedParams: string[];
6
5
  declare const defaultNormalizeOptions: NormalizeOptions;
7
6
  declare const defaultFetch: FetchFn;
8
- declare const neutralizeFeedUrls: (signature: string, url: string) => string;
9
7
  declare const defaultParser: ParserAdapter<DefaultParserResult>;
10
8
  declare const defaultTiers: Array<Tier>;
11
9
  //#endregion
12
- export { defaultFetch, defaultNormalizeOptions, defaultParser, defaultRewrites, defaultStrippedParams, defaultTiers, neutralizeFeedUrls };
10
+ export { defaultFetch, defaultNormalizeOptions, defaultParser, defaultStrippedParams, defaultTiers };
package/dist/defaults.js CHANGED
@@ -1,8 +1,7 @@
1
- import { feedburnerRewrite } from "./rewrites/feedburner.js";
1
+ import { createSignature, neutralizeUrls } from "./utils.js";
2
2
  import { parseFeed } from "feedsmith";
3
3
 
4
4
  //#region src/defaults.ts
5
- const defaultRewrites = [feedburnerRewrite];
6
5
  const defaultStrippedParams = [
7
6
  "utm_source",
8
7
  "utm_medium",
@@ -185,15 +184,6 @@ const defaultFetch = async (url, options) => {
185
184
  status: response.status
186
185
  };
187
186
  };
188
- const trailingSlashPattern = /("(?:https?:\/\/|\/)[^"]+)\/([?"])/g;
189
- const neutralizeFeedUrls = (signature, url) => {
190
- try {
191
- const escapedHost = new URL("/", url).host.replace(/^www\./, "").replaceAll(".", "\\.");
192
- return signature.replace(new RegExp(`https?://(?:www\\.)?${escapedHost}(?=[/"])(/)?`, "g"), "/").replace(trailingSlashPattern, "$1$2");
193
- } catch {
194
- return signature;
195
- }
196
- };
197
187
  const retrieveSelfLink = (parsed) => {
198
188
  switch (parsed.format) {
199
189
  case "atom": return parsed.feed.links?.find((link) => link.rel === "self");
@@ -211,49 +201,48 @@ const defaultParser = {
211
201
  return parsed.format === "json" ? parsed.feed.feed_url : retrieveSelfLink(parsed)?.href;
212
202
  },
213
203
  getSignature: (parsed, url) => {
214
- if (parsed.format === "json") {
215
- const originalSelfUrl = parsed.feed.feed_url;
216
- parsed.feed.feed_url = void 0;
217
- const signature$1 = JSON.stringify(parsed.feed);
218
- parsed.feed.feed_url = originalSelfUrl;
219
- return neutralizeFeedUrls(signature$1, url);
220
- }
221
204
  let signature;
222
- let originalBuildDate;
223
- let originalPubDate;
224
- let originalLink;
225
- if (parsed.format === "rss") {
226
- originalBuildDate = parsed.feed.lastBuildDate;
227
- originalPubDate = parsed.feed.pubDate;
228
- originalLink = parsed.feed.link;
229
- parsed.feed.lastBuildDate = void 0;
230
- parsed.feed.pubDate = void 0;
231
- parsed.feed.link = void 0;
232
- } else if (parsed.format === "rdf") {
233
- originalLink = parsed.feed.link;
234
- parsed.feed.link = void 0;
235
- } else if (parsed.format === "atom") {
236
- originalBuildDate = parsed.feed.updated;
237
- parsed.feed.updated = void 0;
238
- }
239
- const link = retrieveSelfLink(parsed);
240
- if (!link) signature = JSON.stringify(parsed.feed);
241
- else {
242
- const originalSelfUrl = link.href;
243
- link.href = void 0;
244
- signature = JSON.stringify(parsed.feed);
245
- link.href = originalSelfUrl;
205
+ let contentUrl;
206
+ if (parsed.format === "json") {
207
+ contentUrl = parsed.feed.home_page_url;
208
+ signature = createSignature(parsed.feed, ["feed_url"]);
209
+ } else {
210
+ const selfLink = retrieveSelfLink(parsed);
211
+ const savedSelfHref = selfLink?.href;
212
+ if (selfLink) selfLink.href = void 0;
213
+ if (parsed.format === "rss") {
214
+ contentUrl = parsed.feed.link;
215
+ signature = createSignature(parsed.feed, [
216
+ "lastBuildDate",
217
+ "pubDate",
218
+ "link",
219
+ "generator"
220
+ ]);
221
+ } else if (parsed.format === "rdf") {
222
+ contentUrl = parsed.feed.link;
223
+ signature = createSignature(parsed.feed, ["link"]);
224
+ } else signature = createSignature(parsed.feed, ["updated", "generator"]);
225
+ if (selfLink) selfLink.href = savedSelfHref;
246
226
  }
247
- if (parsed.format === "rss") {
248
- parsed.feed.lastBuildDate = originalBuildDate;
249
- parsed.feed.pubDate = originalPubDate;
250
- parsed.feed.link = originalLink;
251
- } else if (parsed.format === "rdf") parsed.feed.link = originalLink;
252
- else if (parsed.format === "atom") parsed.feed.updated = originalBuildDate;
253
- return neutralizeFeedUrls(signature, url);
227
+ return neutralizeUrls(signature, contentUrl ? [url, contentUrl] : [url]);
254
228
  }
255
229
  };
256
230
  const defaultTiers = [
231
+ {
232
+ stripProtocol: false,
233
+ stripAuthentication: false,
234
+ stripWww: true,
235
+ stripTrailingSlash: true,
236
+ stripRootSlash: true,
237
+ collapseSlashes: true,
238
+ stripHash: true,
239
+ sortQueryParams: false,
240
+ stripQuery: true,
241
+ stripEmptyQuery: true,
242
+ normalizeEncoding: true,
243
+ normalizeUnicode: true,
244
+ convertToPunycode: true
245
+ },
257
246
  {
258
247
  stripProtocol: false,
259
248
  stripAuthentication: false,
@@ -302,4 +291,4 @@ const defaultTiers = [
302
291
  ];
303
292
 
304
293
  //#endregion
305
- export { defaultFetch, defaultNormalizeOptions, defaultParser, defaultRewrites, defaultStrippedParams, defaultTiers, neutralizeFeedUrls };
294
+ export { defaultFetch, defaultNormalizeOptions, defaultParser, defaultStrippedParams, defaultTiers };
package/dist/exports.cjs CHANGED
@@ -1,14 +1,14 @@
1
- const require_feedburner = require('./rewrites/feedburner.cjs');
2
- const require_defaults = require('./defaults.cjs');
3
1
  const require_utils = require('./utils.cjs');
2
+ const require_defaults = require('./defaults.cjs');
4
3
  const require_index = require('./index.cjs');
4
+ const require_wordpress = require('./probes/wordpress.cjs');
5
5
  const require_blogger = require('./rewrites/blogger.cjs');
6
+ const require_feedburner = require('./rewrites/feedburner.cjs');
6
7
 
7
8
  exports.addMissingProtocol = require_utils.addMissingProtocol;
8
9
  exports.bloggerRewrite = require_blogger.bloggerRewrite;
9
10
  exports.defaultFetch = require_defaults.defaultFetch;
10
11
  exports.defaultParser = require_defaults.defaultParser;
11
- exports.defaultRewrites = require_defaults.defaultRewrites;
12
12
  exports.defaultStrippedParams = require_defaults.defaultStrippedParams;
13
13
  exports.defaultTiers = require_defaults.defaultTiers;
14
14
  exports.feedburnerRewrite = require_feedburner.feedburnerRewrite;
@@ -16,4 +16,5 @@ exports.findCanonical = require_index.findCanonical;
16
16
  exports.fixMalformedProtocol = require_utils.fixMalformedProtocol;
17
17
  exports.normalizeUrl = require_utils.normalizeUrl;
18
18
  exports.resolveFeedProtocol = require_utils.resolveFeedProtocol;
19
- exports.resolveUrl = require_utils.resolveUrl;
19
+ exports.resolveUrl = require_utils.resolveUrl;
20
+ exports.wordpressProbe = require_wordpress.wordpressProbe;
@@ -1,7 +1,8 @@
1
- import { DefaultParserResult, ExistsFn, FetchFn, FetchFnOptions, FetchFnResponse, FindCanonicalOptions, NormalizeOptions, OnExistsFn, OnFetchFn, OnMatchFn, ParserAdapter, Rewrite } from "./types.cjs";
2
- import { defaultFetch, defaultParser, defaultRewrites, defaultStrippedParams, defaultTiers } from "./defaults.cjs";
1
+ import { DefaultParserResult, ExistsFn, FetchFn, FetchFnOptions, FetchFnResponse, FindCanonicalOptions, NormalizeOptions, OnExistsFn, OnFetchFn, OnMatchFn, ParserAdapter, Probe, Rewrite, Tier } from "./types.cjs";
2
+ import { defaultFetch, defaultParser, defaultStrippedParams, defaultTiers } from "./defaults.cjs";
3
3
  import { findCanonical } from "./index.cjs";
4
+ import { wordpressProbe } from "./probes/wordpress.cjs";
4
5
  import { bloggerRewrite } from "./rewrites/blogger.cjs";
5
6
  import { feedburnerRewrite } from "./rewrites/feedburner.cjs";
6
7
  import { addMissingProtocol, fixMalformedProtocol, normalizeUrl, resolveFeedProtocol, resolveUrl } from "./utils.cjs";
7
- export { type DefaultParserResult, type ExistsFn, type FetchFn, type FetchFnOptions, type FetchFnResponse, type FindCanonicalOptions, type NormalizeOptions, type OnExistsFn, type OnFetchFn, type OnMatchFn, type ParserAdapter, type Rewrite, addMissingProtocol, bloggerRewrite, defaultFetch, defaultParser, defaultRewrites, defaultStrippedParams, defaultTiers, feedburnerRewrite, findCanonical, fixMalformedProtocol, normalizeUrl, resolveFeedProtocol, resolveUrl };
8
+ export { type DefaultParserResult, type ExistsFn, type FetchFn, type FetchFnOptions, type FetchFnResponse, type FindCanonicalOptions, type NormalizeOptions, type OnExistsFn, type OnFetchFn, type OnMatchFn, type ParserAdapter, type Probe, type Rewrite, type Tier, addMissingProtocol, bloggerRewrite, defaultFetch, defaultParser, defaultStrippedParams, defaultTiers, feedburnerRewrite, findCanonical, fixMalformedProtocol, normalizeUrl, resolveFeedProtocol, resolveUrl, wordpressProbe };
package/dist/exports.d.ts CHANGED
@@ -1,7 +1,8 @@
1
- import { DefaultParserResult, ExistsFn, FetchFn, FetchFnOptions, FetchFnResponse, FindCanonicalOptions, NormalizeOptions, OnExistsFn, OnFetchFn, OnMatchFn, ParserAdapter, Rewrite } from "./types.js";
2
- import { defaultFetch, defaultParser, defaultRewrites, defaultStrippedParams, defaultTiers } from "./defaults.js";
1
+ import { DefaultParserResult, ExistsFn, FetchFn, FetchFnOptions, FetchFnResponse, FindCanonicalOptions, NormalizeOptions, OnExistsFn, OnFetchFn, OnMatchFn, ParserAdapter, Probe, Rewrite, Tier } from "./types.js";
2
+ import { defaultFetch, defaultParser, defaultStrippedParams, defaultTiers } from "./defaults.js";
3
3
  import { findCanonical } from "./index.js";
4
+ import { wordpressProbe } from "./probes/wordpress.js";
4
5
  import { bloggerRewrite } from "./rewrites/blogger.js";
5
6
  import { feedburnerRewrite } from "./rewrites/feedburner.js";
6
7
  import { addMissingProtocol, fixMalformedProtocol, normalizeUrl, resolveFeedProtocol, resolveUrl } from "./utils.js";
7
- export { type DefaultParserResult, type ExistsFn, type FetchFn, type FetchFnOptions, type FetchFnResponse, type FindCanonicalOptions, type NormalizeOptions, type OnExistsFn, type OnFetchFn, type OnMatchFn, type ParserAdapter, type Rewrite, addMissingProtocol, bloggerRewrite, defaultFetch, defaultParser, defaultRewrites, defaultStrippedParams, defaultTiers, feedburnerRewrite, findCanonical, fixMalformedProtocol, normalizeUrl, resolveFeedProtocol, resolveUrl };
8
+ export { type DefaultParserResult, type ExistsFn, type FetchFn, type FetchFnOptions, type FetchFnResponse, type FindCanonicalOptions, type NormalizeOptions, type OnExistsFn, type OnFetchFn, type OnMatchFn, type ParserAdapter, type Probe, type Rewrite, type Tier, addMissingProtocol, bloggerRewrite, defaultFetch, defaultParser, defaultStrippedParams, defaultTiers, feedburnerRewrite, findCanonical, fixMalformedProtocol, normalizeUrl, resolveFeedProtocol, resolveUrl, wordpressProbe };
package/dist/exports.js CHANGED
@@ -1,7 +1,8 @@
1
- import { feedburnerRewrite } from "./rewrites/feedburner.js";
2
- import { defaultFetch, defaultParser, defaultRewrites, defaultStrippedParams, defaultTiers } from "./defaults.js";
3
1
  import { addMissingProtocol, fixMalformedProtocol, normalizeUrl, resolveFeedProtocol, resolveUrl } from "./utils.js";
2
+ import { defaultFetch, defaultParser, defaultStrippedParams, defaultTiers } from "./defaults.js";
4
3
  import { findCanonical } from "./index.js";
4
+ import { wordpressProbe } from "./probes/wordpress.js";
5
5
  import { bloggerRewrite } from "./rewrites/blogger.js";
6
+ import { feedburnerRewrite } from "./rewrites/feedburner.js";
6
7
 
7
- export { addMissingProtocol, bloggerRewrite, defaultFetch, defaultParser, defaultRewrites, defaultStrippedParams, defaultTiers, feedburnerRewrite, findCanonical, fixMalformedProtocol, normalizeUrl, resolveFeedProtocol, resolveUrl };
8
+ export { addMissingProtocol, bloggerRewrite, defaultFetch, defaultParser, defaultStrippedParams, defaultTiers, feedburnerRewrite, findCanonical, fixMalformedProtocol, normalizeUrl, resolveFeedProtocol, resolveUrl, wordpressProbe };
package/dist/index.cjs CHANGED
@@ -1,19 +1,19 @@
1
- const require_defaults = require('./defaults.cjs');
2
1
  const require_utils = require('./utils.cjs');
2
+ const require_defaults = require('./defaults.cjs');
3
3
 
4
4
  //#region src/index.ts
5
5
  async function findCanonical(inputUrl, options) {
6
- const { parser = require_defaults.defaultParser, fetchFn = require_defaults.defaultFetch, existsFn, tiers = require_defaults.defaultTiers, rewrites = require_defaults.defaultRewrites, stripQueryParams = require_defaults.defaultStrippedParams, onFetch, onMatch, onExists } = options ?? {};
6
+ const { parser = require_defaults.defaultParser, fetchFn = require_defaults.defaultFetch, existsFn, tiers = require_defaults.defaultTiers, rewrites, probes, stripQueryParams = require_defaults.defaultStrippedParams, onFetch, onMatch, onExists } = options ?? {};
7
7
  const stripParams = (url) => {
8
- return stripQueryParams?.length ? require_utils.normalizeUrl(url, {
8
+ return require_utils.normalizeUrl(url, {
9
9
  stripQueryParams,
10
10
  sortQueryParams: true,
11
11
  stripEmptyQuery: true
12
- }) : url;
12
+ });
13
13
  };
14
14
  const resolveAndApplyRewrites = (url, baseUrl) => {
15
15
  const resolved = require_utils.resolveUrl(url, baseUrl);
16
- return resolved ? require_utils.applyRewrites(resolved, rewrites) : void 0;
16
+ return resolved && rewrites ? require_utils.applyRewrites(resolved, rewrites) : resolved;
17
17
  };
18
18
  const initialRequestUrl = resolveAndApplyRewrites(inputUrl);
19
19
  if (!initialRequestUrl) return;
@@ -73,7 +73,7 @@ async function findCanonical(inputUrl, options) {
73
73
  if (!await compareWithInitialResponse(response.body, response.url)) return;
74
74
  return response;
75
75
  };
76
- let variantSourceUrl = initialResponseUrl;
76
+ let candidateSourceUrl = initialResponseUrl;
77
77
  if (selfRequestUrl && selfRequestUrl !== initialResponseUrl) {
78
78
  const urlsToTry = [selfRequestUrl];
79
79
  if (selfRequestUrl.startsWith("https://")) urlsToTry.push(selfRequestUrl.replace("https://", "http://"));
@@ -86,42 +86,53 @@ async function findCanonical(inputUrl, options) {
86
86
  response,
87
87
  feed: initialResponseFeed
88
88
  });
89
- variantSourceUrl = resolveAndApplyRewrites(response.url) ?? initialResponseUrl;
90
- variantSourceUrl = stripParams(variantSourceUrl);
89
+ candidateSourceUrl = resolveAndApplyRewrites(response.url) ?? initialResponseUrl;
90
+ candidateSourceUrl = stripParams(candidateSourceUrl);
91
91
  break;
92
92
  }
93
93
  }
94
94
  }
95
- const variantUrls = new Set(tiers.map((tier) => resolveAndApplyRewrites(require_utils.normalizeUrl(variantSourceUrl, tier))).filter((variantUrl) => !!variantUrl));
96
- variantUrls.add(variantSourceUrl);
97
- let winningUrl = variantSourceUrl;
98
- for (const variantUrl of variantUrls) {
95
+ if (probes?.length) candidateSourceUrl = await require_utils.applyProbes(candidateSourceUrl, probes, async (candidateUrl) => {
96
+ const response = await fetchAndCompare(candidateUrl);
97
+ if (response) {
98
+ onMatch?.({
99
+ url: candidateUrl,
100
+ response,
101
+ feed: initialResponseFeed
102
+ });
103
+ return stripParams(resolveAndApplyRewrites(response.url) ?? candidateUrl);
104
+ }
105
+ });
106
+ const candidateUrls = new Set(tiers.map((tier) => resolveAndApplyRewrites(require_utils.normalizeUrl(candidateSourceUrl, tier))).filter((candidateUrl) => !!candidateUrl));
107
+ candidateUrls.add(candidateSourceUrl);
108
+ let winningUrl = candidateSourceUrl;
109
+ for (const candidateUrl of candidateUrls) {
99
110
  if (existsFn) {
100
- const data = await existsFn(variantUrl);
111
+ const data = await existsFn(candidateUrl);
101
112
  if (data !== void 0) {
102
113
  onExists?.({
103
- url: variantUrl,
114
+ url: candidateUrl,
104
115
  data
105
116
  });
106
- return variantUrl;
117
+ return candidateUrl;
107
118
  }
108
119
  }
109
- if (variantUrl === variantSourceUrl) continue;
110
- if (variantUrl === initialResponseUrl) {
120
+ if (candidateUrl === candidateSourceUrl) continue;
121
+ if (candidateUrl === initialResponseUrl) {
111
122
  winningUrl = initialResponseUrl;
112
123
  break;
113
124
  }
114
- const variantResponse = await fetchAndCompare(variantUrl);
115
- if (variantResponse) {
116
- let variantResponseUrl = resolveAndApplyRewrites(variantResponse.url);
117
- if (variantResponseUrl) variantResponseUrl = stripParams(variantResponseUrl);
118
- if (variantResponseUrl === variantSourceUrl || variantResponseUrl === initialResponseUrl) continue;
125
+ const candidateResponse = await fetchAndCompare(candidateUrl);
126
+ if (candidateResponse) {
127
+ let candidateResponseUrl = resolveAndApplyRewrites(candidateResponse.url);
128
+ if (candidateResponseUrl) candidateResponseUrl = stripParams(candidateResponseUrl);
129
+ if (candidateResponseUrl === candidateSourceUrl || candidateResponseUrl === initialResponseUrl) continue;
119
130
  onMatch?.({
120
- url: variantUrl,
121
- response: variantResponse,
131
+ url: candidateUrl,
132
+ response: candidateResponse,
122
133
  feed: initialResponseFeed
123
134
  });
124
- winningUrl = variantUrl;
135
+ winningUrl = candidateUrl;
125
136
  break;
126
137
  }
127
138
  }
package/dist/index.js CHANGED
@@ -1,19 +1,19 @@
1
- import { defaultFetch, defaultParser, defaultRewrites, defaultStrippedParams, defaultTiers } from "./defaults.js";
2
- import { applyRewrites, normalizeUrl, resolveUrl } from "./utils.js";
1
+ import { applyProbes, applyRewrites, normalizeUrl, resolveUrl } from "./utils.js";
2
+ import { defaultFetch, defaultParser, defaultStrippedParams, defaultTiers } from "./defaults.js";
3
3
 
4
4
  //#region src/index.ts
5
5
  async function findCanonical(inputUrl, options) {
6
- const { parser = defaultParser, fetchFn = defaultFetch, existsFn, tiers = defaultTiers, rewrites = defaultRewrites, stripQueryParams = defaultStrippedParams, onFetch, onMatch, onExists } = options ?? {};
6
+ const { parser = defaultParser, fetchFn = defaultFetch, existsFn, tiers = defaultTiers, rewrites, probes, stripQueryParams = defaultStrippedParams, onFetch, onMatch, onExists } = options ?? {};
7
7
  const stripParams = (url) => {
8
- return stripQueryParams?.length ? normalizeUrl(url, {
8
+ return normalizeUrl(url, {
9
9
  stripQueryParams,
10
10
  sortQueryParams: true,
11
11
  stripEmptyQuery: true
12
- }) : url;
12
+ });
13
13
  };
14
14
  const resolveAndApplyRewrites = (url, baseUrl) => {
15
15
  const resolved = resolveUrl(url, baseUrl);
16
- return resolved ? applyRewrites(resolved, rewrites) : void 0;
16
+ return resolved && rewrites ? applyRewrites(resolved, rewrites) : resolved;
17
17
  };
18
18
  const initialRequestUrl = resolveAndApplyRewrites(inputUrl);
19
19
  if (!initialRequestUrl) return;
@@ -73,7 +73,7 @@ async function findCanonical(inputUrl, options) {
73
73
  if (!await compareWithInitialResponse(response.body, response.url)) return;
74
74
  return response;
75
75
  };
76
- let variantSourceUrl = initialResponseUrl;
76
+ let candidateSourceUrl = initialResponseUrl;
77
77
  if (selfRequestUrl && selfRequestUrl !== initialResponseUrl) {
78
78
  const urlsToTry = [selfRequestUrl];
79
79
  if (selfRequestUrl.startsWith("https://")) urlsToTry.push(selfRequestUrl.replace("https://", "http://"));
@@ -86,42 +86,53 @@ async function findCanonical(inputUrl, options) {
86
86
  response,
87
87
  feed: initialResponseFeed
88
88
  });
89
- variantSourceUrl = resolveAndApplyRewrites(response.url) ?? initialResponseUrl;
90
- variantSourceUrl = stripParams(variantSourceUrl);
89
+ candidateSourceUrl = resolveAndApplyRewrites(response.url) ?? initialResponseUrl;
90
+ candidateSourceUrl = stripParams(candidateSourceUrl);
91
91
  break;
92
92
  }
93
93
  }
94
94
  }
95
- const variantUrls = new Set(tiers.map((tier) => resolveAndApplyRewrites(normalizeUrl(variantSourceUrl, tier))).filter((variantUrl) => !!variantUrl));
96
- variantUrls.add(variantSourceUrl);
97
- let winningUrl = variantSourceUrl;
98
- for (const variantUrl of variantUrls) {
95
+ if (probes?.length) candidateSourceUrl = await applyProbes(candidateSourceUrl, probes, async (candidateUrl) => {
96
+ const response = await fetchAndCompare(candidateUrl);
97
+ if (response) {
98
+ onMatch?.({
99
+ url: candidateUrl,
100
+ response,
101
+ feed: initialResponseFeed
102
+ });
103
+ return stripParams(resolveAndApplyRewrites(response.url) ?? candidateUrl);
104
+ }
105
+ });
106
+ const candidateUrls = new Set(tiers.map((tier) => resolveAndApplyRewrites(normalizeUrl(candidateSourceUrl, tier))).filter((candidateUrl) => !!candidateUrl));
107
+ candidateUrls.add(candidateSourceUrl);
108
+ let winningUrl = candidateSourceUrl;
109
+ for (const candidateUrl of candidateUrls) {
99
110
  if (existsFn) {
100
- const data = await existsFn(variantUrl);
111
+ const data = await existsFn(candidateUrl);
101
112
  if (data !== void 0) {
102
113
  onExists?.({
103
- url: variantUrl,
114
+ url: candidateUrl,
104
115
  data
105
116
  });
106
- return variantUrl;
117
+ return candidateUrl;
107
118
  }
108
119
  }
109
- if (variantUrl === variantSourceUrl) continue;
110
- if (variantUrl === initialResponseUrl) {
120
+ if (candidateUrl === candidateSourceUrl) continue;
121
+ if (candidateUrl === initialResponseUrl) {
111
122
  winningUrl = initialResponseUrl;
112
123
  break;
113
124
  }
114
- const variantResponse = await fetchAndCompare(variantUrl);
115
- if (variantResponse) {
116
- let variantResponseUrl = resolveAndApplyRewrites(variantResponse.url);
117
- if (variantResponseUrl) variantResponseUrl = stripParams(variantResponseUrl);
118
- if (variantResponseUrl === variantSourceUrl || variantResponseUrl === initialResponseUrl) continue;
125
+ const candidateResponse = await fetchAndCompare(candidateUrl);
126
+ if (candidateResponse) {
127
+ let candidateResponseUrl = resolveAndApplyRewrites(candidateResponse.url);
128
+ if (candidateResponseUrl) candidateResponseUrl = stripParams(candidateResponseUrl);
129
+ if (candidateResponseUrl === candidateSourceUrl || candidateResponseUrl === initialResponseUrl) continue;
119
130
  onMatch?.({
120
- url: variantUrl,
121
- response: variantResponse,
131
+ url: candidateUrl,
132
+ response: candidateResponse,
122
133
  feed: initialResponseFeed
123
134
  });
124
- winningUrl = variantUrl;
135
+ winningUrl = candidateUrl;
125
136
  break;
126
137
  }
127
138
  }
@@ -0,0 +1,49 @@
1
+
2
+ //#region src/probes/wordpress.ts
3
+ const feedTypes = [
4
+ "atom",
5
+ "rss2",
6
+ "rss",
7
+ "rdf"
8
+ ];
9
+ const wordpressProbe = {
10
+ match: (url) => {
11
+ const feed = url.searchParams.get("feed")?.toLowerCase();
12
+ if (!feed) return false;
13
+ const type = feed.startsWith("comments-") ? feed.slice(9) : feed;
14
+ return feedTypes.includes(type);
15
+ },
16
+ getCandidates: (url) => {
17
+ const feed = url.searchParams.get("feed")?.toLowerCase();
18
+ if (!feed) return [];
19
+ const candidates = [];
20
+ const isComment = feed.startsWith("comments-");
21
+ const type = isComment ? feed.slice(9) : feed;
22
+ if ((isComment ? /\/comments\/feed(\/|$)/ : /\/feed(\/|$)/).test(url.pathname)) {
23
+ const withoutSlash = new URL(url);
24
+ withoutSlash.pathname = url.pathname.replace(/\/$/, "");
25
+ withoutSlash.searchParams.delete("feed");
26
+ candidates.push(withoutSlash.href);
27
+ const withSlash$1 = new URL(url);
28
+ withSlash$1.pathname = url.pathname.replace(/\/?$/, "/");
29
+ withSlash$1.searchParams.delete("feed");
30
+ candidates.push(withSlash$1.href);
31
+ return candidates;
32
+ }
33
+ const basePath = url.pathname.replace(/\/$/, "");
34
+ const feedSegment = type === "atom" ? "/feed/atom" : "/feed";
35
+ const feedPath = isComment ? `/comments${feedSegment}` : feedSegment;
36
+ const primary = new URL(url);
37
+ primary.pathname = basePath + feedPath;
38
+ primary.searchParams.delete("feed");
39
+ candidates.push(primary.href);
40
+ const withSlash = new URL(url);
41
+ withSlash.pathname = `${basePath}${feedPath}/`;
42
+ withSlash.searchParams.delete("feed");
43
+ candidates.push(withSlash.href);
44
+ return candidates;
45
+ }
46
+ };
47
+
48
+ //#endregion
49
+ exports.wordpressProbe = wordpressProbe;
@@ -0,0 +1,6 @@
1
+ import { Probe } from "../types.cjs";
2
+
3
+ //#region src/probes/wordpress.d.ts
4
+ declare const wordpressProbe: Probe;
5
+ //#endregion
6
+ export { wordpressProbe };
@@ -0,0 +1,6 @@
1
+ import { Probe } from "../types.js";
2
+
3
+ //#region src/probes/wordpress.d.ts
4
+ declare const wordpressProbe: Probe;
5
+ //#endregion
6
+ export { wordpressProbe };
@@ -0,0 +1,48 @@
1
+ //#region src/probes/wordpress.ts
2
+ const feedTypes = [
3
+ "atom",
4
+ "rss2",
5
+ "rss",
6
+ "rdf"
7
+ ];
8
+ const wordpressProbe = {
9
+ match: (url) => {
10
+ const feed = url.searchParams.get("feed")?.toLowerCase();
11
+ if (!feed) return false;
12
+ const type = feed.startsWith("comments-") ? feed.slice(9) : feed;
13
+ return feedTypes.includes(type);
14
+ },
15
+ getCandidates: (url) => {
16
+ const feed = url.searchParams.get("feed")?.toLowerCase();
17
+ if (!feed) return [];
18
+ const candidates = [];
19
+ const isComment = feed.startsWith("comments-");
20
+ const type = isComment ? feed.slice(9) : feed;
21
+ if ((isComment ? /\/comments\/feed(\/|$)/ : /\/feed(\/|$)/).test(url.pathname)) {
22
+ const withoutSlash = new URL(url);
23
+ withoutSlash.pathname = url.pathname.replace(/\/$/, "");
24
+ withoutSlash.searchParams.delete("feed");
25
+ candidates.push(withoutSlash.href);
26
+ const withSlash$1 = new URL(url);
27
+ withSlash$1.pathname = url.pathname.replace(/\/?$/, "/");
28
+ withSlash$1.searchParams.delete("feed");
29
+ candidates.push(withSlash$1.href);
30
+ return candidates;
31
+ }
32
+ const basePath = url.pathname.replace(/\/$/, "");
33
+ const feedSegment = type === "atom" ? "/feed/atom" : "/feed";
34
+ const feedPath = isComment ? `/comments${feedSegment}` : feedSegment;
35
+ const primary = new URL(url);
36
+ primary.pathname = basePath + feedPath;
37
+ primary.searchParams.delete("feed");
38
+ candidates.push(primary.href);
39
+ const withSlash = new URL(url);
40
+ withSlash.pathname = `${basePath}${feedPath}/`;
41
+ withSlash.searchParams.delete("feed");
42
+ candidates.push(withSlash.href);
43
+ return candidates;
44
+ }
45
+ };
46
+
47
+ //#endregion
48
+ export { wordpressProbe };
@@ -1,3 +1,4 @@
1
+ const require_utils = require('../utils.cjs');
1
2
 
2
3
  //#region src/rewrites/blogger.ts
3
4
  const bloggerPattern = /^(www\.|beta\.)?blogger\.com$/;
@@ -6,32 +7,41 @@ const bloggerRewrite = {
6
7
  match: (url) => {
7
8
  return bloggerPattern.test(url.hostname) || blogspotPattern.test(url.hostname);
8
9
  },
9
- normalize: (url) => {
10
- const normalized = new URL(url);
11
- const isBlogger = bloggerPattern.test(normalized.hostname);
12
- const isBlogspot = blogspotPattern.test(normalized.hostname);
13
- normalized.protocol = "https:";
14
- if (isBlogger) normalized.hostname = "www.blogger.com";
10
+ rewrite: (url) => {
11
+ const rewritten = new URL(url);
12
+ const isBlogger = bloggerPattern.test(rewritten.hostname);
13
+ const isBlogspot = blogspotPattern.test(rewritten.hostname);
14
+ rewritten.protocol = "https:";
15
+ if (isBlogger) rewritten.hostname = "www.blogger.com";
15
16
  if (isBlogspot) {
16
- normalized.hostname = normalized.hostname.replace(blogspotPattern, ".blogspot.com");
17
- if (normalized.pathname === "/atom.xml") normalized.pathname = "/feeds/posts/default";
18
- else if (normalized.pathname === "/rss.xml") {
19
- normalized.pathname = "/feeds/posts/default";
20
- normalized.searchParams.set("alt", "rss");
17
+ rewritten.hostname = rewritten.hostname.replace(blogspotPattern, ".blogspot.com");
18
+ if (rewritten.pathname === "/atom.xml") rewritten.pathname = "/feeds/posts/default";
19
+ else if (rewritten.pathname === "/rss.xml") {
20
+ rewritten.pathname = "/feeds/posts/default";
21
+ rewritten.searchParams.set("alt", "rss");
21
22
  }
22
23
  }
23
- normalized.searchParams.delete("redirect");
24
- const alt = normalized.searchParams.get("alt");
25
- if (alt === "atom" || alt === "json" || alt === "") normalized.searchParams.delete("alt");
26
- normalized.searchParams.delete("v");
27
- normalized.searchParams.delete("max-results");
28
- normalized.searchParams.delete("start-index");
29
- normalized.searchParams.delete("published-min");
30
- normalized.searchParams.delete("published-max");
31
- normalized.searchParams.delete("updated-min");
32
- normalized.searchParams.delete("updated-max");
33
- normalized.searchParams.delete("orderby");
34
- return normalized;
24
+ rewritten.searchParams.delete("redirect");
25
+ const alt = rewritten.searchParams.get("alt");
26
+ if (alt === "atom" || alt === "json" || alt === "") rewritten.searchParams.delete("alt");
27
+ rewritten.searchParams.delete("v");
28
+ rewritten.searchParams.delete("max-results");
29
+ rewritten.searchParams.delete("start-index");
30
+ rewritten.searchParams.delete("published-min");
31
+ rewritten.searchParams.delete("published-max");
32
+ rewritten.searchParams.delete("updated-min");
33
+ rewritten.searchParams.delete("updated-max");
34
+ rewritten.searchParams.delete("orderby");
35
+ const normalized = require_utils.normalizeUrl(rewritten.href, {
36
+ stripTrailingSlash: true,
37
+ collapseSlashes: true,
38
+ stripHash: true,
39
+ normalizeEncoding: true,
40
+ normalizeUnicode: true,
41
+ stripEmptyQuery: true,
42
+ sortQueryParams: true
43
+ });
44
+ return new URL(normalized);
35
45
  }
36
46
  };
37
47
 
@@ -1,3 +1,5 @@
1
+ import { normalizeUrl } from "../utils.js";
2
+
1
3
  //#region src/rewrites/blogger.ts
2
4
  const bloggerPattern = /^(www\.|beta\.)?blogger\.com$/;
3
5
  const blogspotPattern = /\.blogspot\.[a-z]{2,3}(\.[a-z]{2})?$/i;
@@ -5,32 +7,41 @@ const bloggerRewrite = {
5
7
  match: (url) => {
6
8
  return bloggerPattern.test(url.hostname) || blogspotPattern.test(url.hostname);
7
9
  },
8
- normalize: (url) => {
9
- const normalized = new URL(url);
10
- const isBlogger = bloggerPattern.test(normalized.hostname);
11
- const isBlogspot = blogspotPattern.test(normalized.hostname);
12
- normalized.protocol = "https:";
13
- if (isBlogger) normalized.hostname = "www.blogger.com";
10
+ rewrite: (url) => {
11
+ const rewritten = new URL(url);
12
+ const isBlogger = bloggerPattern.test(rewritten.hostname);
13
+ const isBlogspot = blogspotPattern.test(rewritten.hostname);
14
+ rewritten.protocol = "https:";
15
+ if (isBlogger) rewritten.hostname = "www.blogger.com";
14
16
  if (isBlogspot) {
15
- normalized.hostname = normalized.hostname.replace(blogspotPattern, ".blogspot.com");
16
- if (normalized.pathname === "/atom.xml") normalized.pathname = "/feeds/posts/default";
17
- else if (normalized.pathname === "/rss.xml") {
18
- normalized.pathname = "/feeds/posts/default";
19
- normalized.searchParams.set("alt", "rss");
17
+ rewritten.hostname = rewritten.hostname.replace(blogspotPattern, ".blogspot.com");
18
+ if (rewritten.pathname === "/atom.xml") rewritten.pathname = "/feeds/posts/default";
19
+ else if (rewritten.pathname === "/rss.xml") {
20
+ rewritten.pathname = "/feeds/posts/default";
21
+ rewritten.searchParams.set("alt", "rss");
20
22
  }
21
23
  }
22
- normalized.searchParams.delete("redirect");
23
- const alt = normalized.searchParams.get("alt");
24
- if (alt === "atom" || alt === "json" || alt === "") normalized.searchParams.delete("alt");
25
- normalized.searchParams.delete("v");
26
- normalized.searchParams.delete("max-results");
27
- normalized.searchParams.delete("start-index");
28
- normalized.searchParams.delete("published-min");
29
- normalized.searchParams.delete("published-max");
30
- normalized.searchParams.delete("updated-min");
31
- normalized.searchParams.delete("updated-max");
32
- normalized.searchParams.delete("orderby");
33
- return normalized;
24
+ rewritten.searchParams.delete("redirect");
25
+ const alt = rewritten.searchParams.get("alt");
26
+ if (alt === "atom" || alt === "json" || alt === "") rewritten.searchParams.delete("alt");
27
+ rewritten.searchParams.delete("v");
28
+ rewritten.searchParams.delete("max-results");
29
+ rewritten.searchParams.delete("start-index");
30
+ rewritten.searchParams.delete("published-min");
31
+ rewritten.searchParams.delete("published-max");
32
+ rewritten.searchParams.delete("updated-min");
33
+ rewritten.searchParams.delete("updated-max");
34
+ rewritten.searchParams.delete("orderby");
35
+ const normalized = normalizeUrl(rewritten.href, {
36
+ stripTrailingSlash: true,
37
+ collapseSlashes: true,
38
+ stripHash: true,
39
+ normalizeEncoding: true,
40
+ normalizeUnicode: true,
41
+ stripEmptyQuery: true,
42
+ sortQueryParams: true
43
+ });
44
+ return new URL(normalized);
34
45
  }
35
46
  };
36
47
 
@@ -1,3 +1,4 @@
1
+ const require_utils = require('../utils.cjs');
1
2
 
2
3
  //#region src/rewrites/feedburner.ts
3
4
  const hosts = [
@@ -9,11 +10,18 @@ const feedburnerRewrite = {
9
10
  match: (url) => {
10
11
  return hosts.includes(url.hostname);
11
12
  },
12
- normalize: (url) => {
13
- const normalized = new URL(url);
14
- normalized.hostname = "feeds.feedburner.com";
15
- normalized.search = "";
16
- return normalized;
13
+ rewrite: (url) => {
14
+ const rewritten = new URL(url);
15
+ rewritten.hostname = "feeds.feedburner.com";
16
+ rewritten.search = "";
17
+ const normalized = require_utils.normalizeUrl(rewritten.href, {
18
+ stripTrailingSlash: true,
19
+ collapseSlashes: true,
20
+ stripHash: true,
21
+ normalizeEncoding: true,
22
+ normalizeUnicode: true
23
+ });
24
+ return new URL(normalized);
17
25
  }
18
26
  };
19
27
 
@@ -1,3 +1,5 @@
1
+ import { normalizeUrl } from "../utils.js";
2
+
1
3
  //#region src/rewrites/feedburner.ts
2
4
  const hosts = [
3
5
  "feeds.feedburner.com",
@@ -8,11 +10,18 @@ const feedburnerRewrite = {
8
10
  match: (url) => {
9
11
  return hosts.includes(url.hostname);
10
12
  },
11
- normalize: (url) => {
12
- const normalized = new URL(url);
13
- normalized.hostname = "feeds.feedburner.com";
14
- normalized.search = "";
15
- return normalized;
13
+ rewrite: (url) => {
14
+ const rewritten = new URL(url);
15
+ rewritten.hostname = "feeds.feedburner.com";
16
+ rewritten.search = "";
17
+ const normalized = normalizeUrl(rewritten.href, {
18
+ stripTrailingSlash: true,
19
+ collapseSlashes: true,
20
+ stripHash: true,
21
+ normalizeEncoding: true,
22
+ normalizeUnicode: true
23
+ });
24
+ return new URL(normalized);
16
25
  }
17
26
  };
18
27
 
package/dist/types.d.cts CHANGED
@@ -9,7 +9,11 @@ type ParserAdapter<T> = {
9
9
  };
10
10
  type Rewrite = {
11
11
  match: (url: URL) => boolean;
12
- normalize: (url: URL) => URL;
12
+ rewrite: (url: URL) => URL;
13
+ };
14
+ type Probe = {
15
+ match: (url: URL) => boolean;
16
+ getCandidates: (url: URL) => Array<string>;
13
17
  };
14
18
  type NormalizeOptions = {
15
19
  stripProtocol?: boolean;
@@ -45,8 +49,9 @@ type FindCanonicalOptions<TFeed = DefaultParserResult, TResponse extends FetchFn
45
49
  parser?: ParserAdapter<TFeed>;
46
50
  fetchFn?: FetchFn<TResponse>;
47
51
  existsFn?: ExistsFn<TExisting>;
48
- tiers?: Array<Tier>;
49
52
  rewrites?: Array<Rewrite>;
53
+ probes?: Array<Probe>;
54
+ tiers?: Array<Tier>;
50
55
  stripQueryParams?: Array<string>;
51
56
  onFetch?: OnFetchFn<TResponse>;
52
57
  onMatch?: OnMatchFn<TFeed, TResponse>;
@@ -65,4 +70,4 @@ type FetchFnResponse = {
65
70
  };
66
71
  type FetchFn<TResponse extends FetchFnResponse = FetchFnResponse> = (url: string, options?: FetchFnOptions) => Promise<TResponse>;
67
72
  //#endregion
68
- export { DefaultParserResult, ExistsFn, FetchFn, FetchFnOptions, FetchFnResponse, FindCanonicalOptions, NormalizeOptions, OnExistsFn, OnFetchFn, OnMatchFn, ParserAdapter, Rewrite, Tier };
73
+ export { DefaultParserResult, ExistsFn, FetchFn, FetchFnOptions, FetchFnResponse, FindCanonicalOptions, NormalizeOptions, OnExistsFn, OnFetchFn, OnMatchFn, ParserAdapter, Probe, Rewrite, Tier };
package/dist/types.d.ts CHANGED
@@ -9,7 +9,11 @@ type ParserAdapter<T> = {
9
9
  };
10
10
  type Rewrite = {
11
11
  match: (url: URL) => boolean;
12
- normalize: (url: URL) => URL;
12
+ rewrite: (url: URL) => URL;
13
+ };
14
+ type Probe = {
15
+ match: (url: URL) => boolean;
16
+ getCandidates: (url: URL) => Array<string>;
13
17
  };
14
18
  type NormalizeOptions = {
15
19
  stripProtocol?: boolean;
@@ -45,8 +49,9 @@ type FindCanonicalOptions<TFeed = DefaultParserResult, TResponse extends FetchFn
45
49
  parser?: ParserAdapter<TFeed>;
46
50
  fetchFn?: FetchFn<TResponse>;
47
51
  existsFn?: ExistsFn<TExisting>;
48
- tiers?: Array<Tier>;
49
52
  rewrites?: Array<Rewrite>;
53
+ probes?: Array<Probe>;
54
+ tiers?: Array<Tier>;
50
55
  stripQueryParams?: Array<string>;
51
56
  onFetch?: OnFetchFn<TResponse>;
52
57
  onMatch?: OnMatchFn<TFeed, TResponse>;
@@ -65,4 +70,4 @@ type FetchFnResponse = {
65
70
  };
66
71
  type FetchFn<TResponse extends FetchFnResponse = FetchFnResponse> = (url: string, options?: FetchFnOptions) => Promise<TResponse>;
67
72
  //#endregion
68
- export { DefaultParserResult, ExistsFn, FetchFn, FetchFnOptions, FetchFnResponse, FindCanonicalOptions, NormalizeOptions, OnExistsFn, OnFetchFn, OnMatchFn, ParserAdapter, Rewrite, Tier };
73
+ export { DefaultParserResult, ExistsFn, FetchFn, FetchFnOptions, FetchFnResponse, FindCanonicalOptions, NormalizeOptions, OnExistsFn, OnFetchFn, OnMatchFn, ParserAdapter, Probe, Rewrite, Tier };
package/dist/utils.cjs CHANGED
@@ -147,7 +147,7 @@ const applyRewrites = (url, rewrites) => {
147
147
  try {
148
148
  let parsed = new URL(url);
149
149
  for (const rewrite of rewrites) if (rewrite.match(parsed)) {
150
- parsed = rewrite.normalize(parsed);
150
+ parsed = rewrite.rewrite(parsed);
151
151
  break;
152
152
  }
153
153
  return parsed.href;
@@ -155,11 +155,51 @@ const applyRewrites = (url, rewrites) => {
155
155
  return url;
156
156
  }
157
157
  };
158
+ const applyProbes = async (url, probes, testCandidate) => {
159
+ try {
160
+ const parsed = new URL(url);
161
+ for (const probe of probes) {
162
+ if (!probe.match(parsed)) continue;
163
+ for (const candidate of probe.getCandidates(parsed)) {
164
+ const result = await testCandidate(candidate);
165
+ if (result) return result;
166
+ }
167
+ break;
168
+ }
169
+ return url;
170
+ } catch {
171
+ return url;
172
+ }
173
+ };
174
+ const createSignature = (object, fields) => {
175
+ const saved = fields.map((key) => [key, object[key]]);
176
+ for (const key of fields) object[key] = void 0;
177
+ const signature = JSON.stringify(object);
178
+ for (const [key, val] of saved) object[key] = val;
179
+ return signature;
180
+ };
181
+ const trailingSlashPattern = /("(?:https?:\/\/|\/)[^"]+)\/([?"])/g;
182
+ const neutralizeUrls = (text, urls) => {
183
+ const escapeHost = (url) => {
184
+ try {
185
+ return new URL("/", url).host.replace(/^www\./, "").replaceAll(".", "\\.");
186
+ } catch {
187
+ return;
188
+ }
189
+ };
190
+ const hosts = urls.map(escapeHost).filter(Boolean);
191
+ if (hosts.length === 0) return text;
192
+ const hostPattern = hosts.length === 1 ? hosts[0] : `(?:${hosts.join("|")})`;
193
+ return text.replace(new RegExp(`https?://(?:www\\.)?${hostPattern}(?=[/"])(/)?`, "g"), "/").replace(trailingSlashPattern, "$1$2");
194
+ };
158
195
 
159
196
  //#endregion
160
197
  exports.addMissingProtocol = addMissingProtocol;
198
+ exports.applyProbes = applyProbes;
161
199
  exports.applyRewrites = applyRewrites;
200
+ exports.createSignature = createSignature;
162
201
  exports.fixMalformedProtocol = fixMalformedProtocol;
202
+ exports.neutralizeUrls = neutralizeUrls;
163
203
  exports.normalizeUrl = normalizeUrl;
164
204
  exports.resolveFeedProtocol = resolveFeedProtocol;
165
205
  exports.resolveUrl = resolveUrl;
package/dist/utils.js CHANGED
@@ -147,7 +147,7 @@ const applyRewrites = (url, rewrites) => {
147
147
  try {
148
148
  let parsed = new URL(url);
149
149
  for (const rewrite of rewrites) if (rewrite.match(parsed)) {
150
- parsed = rewrite.normalize(parsed);
150
+ parsed = rewrite.rewrite(parsed);
151
151
  break;
152
152
  }
153
153
  return parsed.href;
@@ -155,6 +155,43 @@ const applyRewrites = (url, rewrites) => {
155
155
  return url;
156
156
  }
157
157
  };
158
+ const applyProbes = async (url, probes, testCandidate) => {
159
+ try {
160
+ const parsed = new URL(url);
161
+ for (const probe of probes) {
162
+ if (!probe.match(parsed)) continue;
163
+ for (const candidate of probe.getCandidates(parsed)) {
164
+ const result = await testCandidate(candidate);
165
+ if (result) return result;
166
+ }
167
+ break;
168
+ }
169
+ return url;
170
+ } catch {
171
+ return url;
172
+ }
173
+ };
174
+ const createSignature = (object, fields) => {
175
+ const saved = fields.map((key) => [key, object[key]]);
176
+ for (const key of fields) object[key] = void 0;
177
+ const signature = JSON.stringify(object);
178
+ for (const [key, val] of saved) object[key] = val;
179
+ return signature;
180
+ };
181
+ const trailingSlashPattern = /("(?:https?:\/\/|\/)[^"]+)\/([?"])/g;
182
+ const neutralizeUrls = (text, urls) => {
183
+ const escapeHost = (url) => {
184
+ try {
185
+ return new URL("/", url).host.replace(/^www\./, "").replaceAll(".", "\\.");
186
+ } catch {
187
+ return;
188
+ }
189
+ };
190
+ const hosts = urls.map(escapeHost).filter(Boolean);
191
+ if (hosts.length === 0) return text;
192
+ const hostPattern = hosts.length === 1 ? hosts[0] : `(?:${hosts.join("|")})`;
193
+ return text.replace(new RegExp(`https?://(?:www\\.)?${hostPattern}(?=[/"])(/)?`, "g"), "/").replace(trailingSlashPattern, "$1$2");
194
+ };
158
195
 
159
196
  //#endregion
160
- export { addMissingProtocol, applyRewrites, fixMalformedProtocol, normalizeUrl, resolveFeedProtocol, resolveUrl };
197
+ export { addMissingProtocol, applyProbes, applyRewrites, createSignature, fixMalformedProtocol, neutralizeUrls, normalizeUrl, resolveFeedProtocol, resolveUrl };
package/package.json CHANGED
@@ -63,5 +63,5 @@
63
63
  "tsdown": "^0.18.4",
64
64
  "vitepress": "^1.6.4"
65
65
  },
66
- "version": "1.2.1"
66
+ "version": "1.3.0"
67
67
  }