feedcanon 1.5.1 → 2.0.0-next.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/defaults.js CHANGED
@@ -169,8 +169,7 @@ const defaultNormalizeOptions = {
169
169
  stripEmptyQuery: true,
170
170
  lowercaseQuery: false,
171
171
  normalizeEncoding: true,
172
- normalizeUnicode: true,
173
- convertToPunycode: true
172
+ normalizeUnicode: true
174
173
  };
175
174
  const defaultFetch = async (url, options) => {
176
175
  const response = await fetch(url, {
@@ -240,8 +239,7 @@ const defaultTiers = [
240
239
  stripQuery: true,
241
240
  stripEmptyQuery: true,
242
241
  normalizeEncoding: true,
243
- normalizeUnicode: true,
244
- convertToPunycode: true
242
+ normalizeUnicode: true
245
243
  },
246
244
  {
247
245
  stripProtocol: false,
@@ -255,8 +253,7 @@ const defaultTiers = [
255
253
  stripQuery: false,
256
254
  stripEmptyQuery: true,
257
255
  normalizeEncoding: true,
258
- normalizeUnicode: true,
259
- convertToPunycode: true
256
+ normalizeUnicode: true
260
257
  },
261
258
  {
262
259
  stripProtocol: false,
@@ -270,8 +267,7 @@ const defaultTiers = [
270
267
  stripQuery: false,
271
268
  stripEmptyQuery: true,
272
269
  normalizeEncoding: true,
273
- normalizeUnicode: true,
274
- convertToPunycode: true
270
+ normalizeUnicode: true
275
271
  },
276
272
  {
277
273
  stripProtocol: false,
@@ -285,8 +281,7 @@ const defaultTiers = [
285
281
  stripQuery: false,
286
282
  stripEmptyQuery: true,
287
283
  normalizeEncoding: true,
288
- normalizeUnicode: true,
289
- convertToPunycode: true
284
+ normalizeUnicode: true
290
285
  }
291
286
  ];
292
287
  //#endregion
package/dist/types.d.ts CHANGED
@@ -31,7 +31,6 @@ type NormalizeOptions = {
31
31
  lowercaseQuery?: boolean;
32
32
  normalizeEncoding?: boolean;
33
33
  normalizeUnicode?: boolean;
34
- convertToPunycode?: boolean;
35
34
  };
36
35
  type Tier = Omit<NormalizeOptions, 'stripQueryParams'>;
37
36
  type OnFetchFn<TResponse extends FetchFnResponse = FetchFnResponse> = (data: {
package/dist/utils.js CHANGED
@@ -1,5 +1,4 @@
1
1
  import { defaultNormalizeOptions } from "./defaults.js";
2
- import { domainToASCII } from "node:url";
3
2
  import { decodeHTML } from "entities";
4
3
  //#region src/utils.ts
5
4
  const strippedParamsCache = /* @__PURE__ */ new WeakMap();
@@ -15,7 +14,6 @@ const ipv4Regex = /^\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}$/;
15
14
  const ipv6Regex = /^([0-9a-f]{0,4}:){2,7}[0-9a-f]{0,4}$/i;
16
15
  const safePathCharsRegex = /[a-zA-Z0-9._~!$&'()*+,;=:@-]/;
17
16
  const httpsLetterRegex = /s/i;
18
- const nonAsciiHostnameRegex = /[^a-z0-9.:-]/;
19
17
  const protocolPrefixRegex = /^https?:\/\//;
20
18
  const wwwPrefixRegex = /^www\./;
21
19
  const validUrlRegex = /^https?:\/\/(?:www\.|[a-vx-z0-9])/i;
@@ -115,10 +113,6 @@ const normalizeUrl = (url, options = defaultNormalizeOptions) => {
115
113
  parsed.hostname = parsed.hostname.normalize("NFC");
116
114
  parsed.pathname = parsed.pathname.normalize("NFC");
117
115
  }
118
- if (options.convertToPunycode && nonAsciiHostnameRegex.test(parsed.hostname)) {
119
- const ascii = domainToASCII(parsed.hostname);
120
- if (ascii) parsed.hostname = ascii;
121
- }
122
116
  if (options.stripAuthentication) {
123
117
  parsed.username = "";
124
118
  parsed.password = "";
package/package.json CHANGED
@@ -24,24 +24,12 @@
24
24
  "type": "module",
25
25
  "exports": {
26
26
  ".": {
27
- "import": {
28
- "types": "./dist/exports.d.ts",
29
- "default": "./dist/exports.js"
30
- },
31
- "require": {
32
- "types": "./dist/exports.d.cts",
33
- "default": "./dist/exports.cjs"
34
- }
27
+ "types": "./dist/exports.d.ts",
28
+ "default": "./dist/exports.js"
35
29
  },
36
30
  "./defaults": {
37
- "import": {
38
- "types": "./dist/defaults.d.ts",
39
- "default": "./dist/defaults.js"
40
- },
41
- "require": {
42
- "types": "./dist/defaults.d.cts",
43
- "default": "./dist/defaults.cjs"
44
- }
31
+ "types": "./dist/defaults.d.ts",
32
+ "default": "./dist/defaults.js"
45
33
  }
46
34
  },
47
35
  "files": [
@@ -49,14 +37,13 @@
49
37
  ],
50
38
  "scripts": {
51
39
  "prepare": "lefthook install",
52
- "build": "tsdown src/exports.ts src/defaults.ts --format cjs,esm --dts --clean --unbundle --no-fixed-extension",
40
+ "build": "tsdown src/exports.ts src/defaults.ts --format esm --dts --clean --unbundle --no-fixed-extension",
53
41
  "docs:dev": "vitepress dev docs",
54
42
  "docs:build": "vitepress build docs"
55
43
  },
56
44
  "dependencies": {
57
- "entities": "^7.0.1",
58
- "feedsmith": "^2.9.1",
59
- "typescript": "^6.0.2"
45
+ "entities": "^8.0.0",
46
+ "feedsmith": "^3.0.0-beta.1"
60
47
  },
61
48
  "devDependencies": {
62
49
  "@types/bun": "^1.3.11",
@@ -64,5 +51,5 @@
64
51
  "tsdown": "^0.21.7",
65
52
  "vitepress": "^2.0.0-alpha.17"
66
53
  },
67
- "version": "1.5.1"
54
+ "version": "2.0.0-next.1"
68
55
  }
package/dist/defaults.cjs DELETED
@@ -1,298 +0,0 @@
1
- Object.defineProperty(exports, Symbol.toStringTag, { value: "Module" });
2
- const require_utils = require("./utils.cjs");
3
- let feedsmith = require("feedsmith");
4
- //#region src/defaults.ts
5
- const defaultStrippedParams = [
6
- "utm_source",
7
- "utm_medium",
8
- "utm_campaign",
9
- "utm_term",
10
- "utm_content",
11
- "utm_reader",
12
- "utm_name",
13
- "utm_cid",
14
- "utm_viz_id",
15
- "gclid",
16
- "dclid",
17
- "gbraid",
18
- "wbraid",
19
- "gclsrc",
20
- "gad_source",
21
- "gad_campaignid",
22
- "srsltid",
23
- "fbclid",
24
- "fb_action_ids",
25
- "fb_action_types",
26
- "fb_source",
27
- "fb_ref",
28
- "_ga",
29
- "_gl",
30
- "_bk",
31
- "_ke",
32
- "mc_cid",
33
- "mc_eid",
34
- "mc_tc",
35
- "mkt_tok",
36
- "msclkid",
37
- "twclid",
38
- "ttclid",
39
- "igshid",
40
- "mtm_campaign",
41
- "mtm_cid",
42
- "mtm_content",
43
- "mtm_group",
44
- "mtm_keyword",
45
- "mtm_medium",
46
- "mtm_placement",
47
- "mtm_source",
48
- "pk_campaign",
49
- "pk_cid",
50
- "pk_content",
51
- "pk_keyword",
52
- "pk_medium",
53
- "pk_source",
54
- "ncid",
55
- "sr_share",
56
- "hsa_acc",
57
- "hsa_ad",
58
- "hsa_cam",
59
- "hsa_grp",
60
- "hsa_kw",
61
- "hsa_mt",
62
- "hsa_net",
63
- "hsa_src",
64
- "hsa_tgt",
65
- "hsa_ver",
66
- "hsCtaTracking",
67
- "_hsenc",
68
- "_hsmi",
69
- "__hstc",
70
- "__hsfp",
71
- "__hssc",
72
- "cid",
73
- "s_kwcid",
74
- "sc_cid",
75
- "ef_id",
76
- "obOrigUrl",
77
- "dicbo",
78
- "yclid",
79
- "ysclid",
80
- "_openstat",
81
- "awinaffid",
82
- "awinmid",
83
- "clickref",
84
- "afftrack",
85
- "itm_source",
86
- "itm_medium",
87
- "itm_campaign",
88
- "itm_content",
89
- "itm_channel",
90
- "itm_audience",
91
- "int_source",
92
- "int_medium",
93
- "int_campaign",
94
- "int_content",
95
- "int_placement",
96
- "int_campaign_type",
97
- "int_keycode",
98
- "g2i_source",
99
- "g2i_medium",
100
- "g2i_campaign",
101
- "g2i_or_o",
102
- "g2i_or_p",
103
- "doing_wp_cron",
104
- "preview",
105
- "preview_id",
106
- "preview_nonce",
107
- "replytocom",
108
- "_",
109
- "timestamp",
110
- "ts",
111
- "cb",
112
- "cachebuster",
113
- "nocache",
114
- "rand",
115
- "random",
116
- "sbdcrw",
117
- "forceByPassCache",
118
- "sucurianticache",
119
- "cleancache",
120
- "rebuildcache",
121
- "kontrol_health_check_timestamp",
122
- "action_object_map",
123
- "action_ref_map",
124
- "action_type_map",
125
- "algo_expid",
126
- "algo_pvid",
127
- "at_campaign",
128
- "at_custom1",
129
- "at_custom2",
130
- "at_custom3",
131
- "at_custom4",
132
- "at_medium",
133
- "at_preview_index",
134
- "_bhlid",
135
- "_branch_match_id",
136
- "_branch_referrer",
137
- "__readwiseLocation",
138
- "campaign_id",
139
- "click_sum",
140
- "fref",
141
- "gs_l",
142
- "hmb_campaign",
143
- "hmb_medium",
144
- "hmb_source",
145
- "ml_subscriber",
146
- "ml_subscriber_hash",
147
- "oly_anon_id",
148
- "oly_enc_id",
149
- "rb_clickid",
150
- "referer",
151
- "referrer",
152
- "spm",
153
- "trk",
154
- "vero_conv",
155
- "vero_id",
156
- "wickedid",
157
- "xtor"
158
- ];
159
- const defaultNormalizeOptions = {
160
- stripProtocol: true,
161
- stripAuthentication: false,
162
- stripWww: true,
163
- stripTrailingSlash: true,
164
- stripRootSlash: true,
165
- collapseSlashes: true,
166
- stripHash: true,
167
- sortQueryParams: true,
168
- stripQueryParams: defaultStrippedParams,
169
- stripQuery: false,
170
- stripEmptyQuery: true,
171
- lowercaseQuery: false,
172
- normalizeEncoding: true,
173
- normalizeUnicode: true,
174
- convertToPunycode: true
175
- };
176
- const defaultFetch = async (url, options) => {
177
- const response = await fetch(url, {
178
- method: options?.method ?? "GET",
179
- headers: options?.headers
180
- });
181
- return {
182
- headers: response.headers,
183
- body: await response.text(),
184
- url: response.url,
185
- status: response.status
186
- };
187
- };
188
- const retrieveSelfLink = (parsed) => {
189
- switch (parsed.format) {
190
- case "atom": return parsed.feed.links?.find((link) => link.rel === "self");
191
- case "rss":
192
- case "rdf": return parsed.feed.atom?.links?.find((link) => link.rel === "self");
193
- }
194
- };
195
- const defaultParser = {
196
- parse: (body) => {
197
- try {
198
- return (0, feedsmith.parseFeed)(body);
199
- } catch {}
200
- },
201
- getSelfUrl: (parsed) => {
202
- return parsed.format === "json" ? parsed.feed.feed_url : retrieveSelfLink(parsed)?.href;
203
- },
204
- getSignature: (parsed, url) => {
205
- let signature;
206
- let contentUrl;
207
- if (parsed.format === "json") {
208
- contentUrl = parsed.feed.home_page_url;
209
- signature = require_utils.createSignature(parsed.feed, ["feed_url"]);
210
- } else {
211
- const selfLink = retrieveSelfLink(parsed);
212
- const savedSelfHref = selfLink?.href;
213
- if (selfLink) selfLink.href = void 0;
214
- if (parsed.format === "rss") {
215
- contentUrl = parsed.feed.link;
216
- signature = require_utils.createSignature(parsed.feed, [
217
- "lastBuildDate",
218
- "pubDate",
219
- "link",
220
- "generator"
221
- ]);
222
- } else if (parsed.format === "rdf") {
223
- contentUrl = parsed.feed.link;
224
- signature = require_utils.createSignature(parsed.feed, ["link"]);
225
- } else signature = require_utils.createSignature(parsed.feed, ["updated", "generator"]);
226
- if (selfLink) selfLink.href = savedSelfHref;
227
- }
228
- return require_utils.neutralizeUrls(signature, contentUrl ? [url, contentUrl] : [url]);
229
- }
230
- };
231
- const defaultTiers = [
232
- {
233
- stripProtocol: false,
234
- stripAuthentication: false,
235
- stripWww: true,
236
- stripTrailingSlash: true,
237
- stripRootSlash: true,
238
- collapseSlashes: true,
239
- stripHash: true,
240
- sortQueryParams: false,
241
- stripQuery: true,
242
- stripEmptyQuery: true,
243
- normalizeEncoding: true,
244
- normalizeUnicode: true,
245
- convertToPunycode: true
246
- },
247
- {
248
- stripProtocol: false,
249
- stripAuthentication: false,
250
- stripWww: true,
251
- stripTrailingSlash: true,
252
- stripRootSlash: true,
253
- collapseSlashes: true,
254
- stripHash: true,
255
- sortQueryParams: true,
256
- stripQuery: false,
257
- stripEmptyQuery: true,
258
- normalizeEncoding: true,
259
- normalizeUnicode: true,
260
- convertToPunycode: true
261
- },
262
- {
263
- stripProtocol: false,
264
- stripAuthentication: false,
265
- stripWww: false,
266
- stripTrailingSlash: true,
267
- stripRootSlash: true,
268
- collapseSlashes: true,
269
- stripHash: true,
270
- sortQueryParams: true,
271
- stripQuery: false,
272
- stripEmptyQuery: true,
273
- normalizeEncoding: true,
274
- normalizeUnicode: true,
275
- convertToPunycode: true
276
- },
277
- {
278
- stripProtocol: false,
279
- stripAuthentication: false,
280
- stripWww: false,
281
- stripTrailingSlash: false,
282
- stripRootSlash: true,
283
- collapseSlashes: true,
284
- stripHash: true,
285
- sortQueryParams: true,
286
- stripQuery: false,
287
- stripEmptyQuery: true,
288
- normalizeEncoding: true,
289
- normalizeUnicode: true,
290
- convertToPunycode: true
291
- }
292
- ];
293
- //#endregion
294
- exports.defaultFetch = defaultFetch;
295
- exports.defaultNormalizeOptions = defaultNormalizeOptions;
296
- exports.defaultParser = defaultParser;
297
- exports.defaultStrippedParams = defaultStrippedParams;
298
- exports.defaultTiers = defaultTiers;
@@ -1,10 +0,0 @@
1
- import { DefaultParserResult, FetchFn, NormalizeOptions, ParserAdapter, Tier } from "./types.cjs";
2
-
3
- //#region src/defaults.d.ts
4
- declare const defaultStrippedParams: string[];
5
- declare const defaultNormalizeOptions: NormalizeOptions;
6
- declare const defaultFetch: FetchFn;
7
- declare const defaultParser: ParserAdapter<DefaultParserResult>;
8
- declare const defaultTiers: Array<Tier>;
9
- //#endregion
10
- export { defaultFetch, defaultNormalizeOptions, defaultParser, defaultStrippedParams, defaultTiers };
package/dist/exports.cjs DELETED
@@ -1,20 +0,0 @@
1
- Object.defineProperty(exports, Symbol.toStringTag, { value: "Module" });
2
- const require_utils = require("./utils.cjs");
3
- const require_defaults = require("./defaults.cjs");
4
- const require_index = require("./index.cjs");
5
- const require_wordpress = require("./probes/wordpress.cjs");
6
- const require_blogger = require("./rewrites/blogger.cjs");
7
- const require_feedburner = require("./rewrites/feedburner.cjs");
8
- exports.addMissingProtocol = require_utils.addMissingProtocol;
9
- exports.bloggerRewrite = require_blogger.bloggerRewrite;
10
- exports.defaultFetch = require_defaults.defaultFetch;
11
- exports.defaultParser = require_defaults.defaultParser;
12
- exports.defaultStrippedParams = require_defaults.defaultStrippedParams;
13
- exports.defaultTiers = require_defaults.defaultTiers;
14
- exports.feedburnerRewrite = require_feedburner.feedburnerRewrite;
15
- exports.findCanonical = require_index.findCanonical;
16
- exports.fixMalformedProtocol = require_utils.fixMalformedProtocol;
17
- exports.normalizeUrl = require_utils.normalizeUrl;
18
- exports.resolveFeedProtocol = require_utils.resolveFeedProtocol;
19
- exports.resolveUrl = require_utils.resolveUrl;
20
- exports.wordpressProbe = require_wordpress.wordpressProbe;
@@ -1,8 +0,0 @@
1
- import { DefaultParserResult, ExistsFn, FetchFn, FetchFnOptions, FetchFnResponse, FindCanonicalOptions, NormalizeOptions, OnExistsFn, OnFetchFn, OnMatchFn, ParserAdapter, Probe, Rewrite, Tier } from "./types.cjs";
2
- import { defaultFetch, defaultParser, defaultStrippedParams, defaultTiers } from "./defaults.cjs";
3
- import { findCanonical } from "./index.cjs";
4
- import { wordpressProbe } from "./probes/wordpress.cjs";
5
- import { bloggerRewrite } from "./rewrites/blogger.cjs";
6
- import { feedburnerRewrite } from "./rewrites/feedburner.cjs";
7
- import { addMissingProtocol, fixMalformedProtocol, normalizeUrl, resolveFeedProtocol, resolveUrl } from "./utils.cjs";
8
- export { type DefaultParserResult, type ExistsFn, type FetchFn, type FetchFnOptions, type FetchFnResponse, type FindCanonicalOptions, type NormalizeOptions, type OnExistsFn, type OnFetchFn, type OnMatchFn, type ParserAdapter, type Probe, type Rewrite, type Tier, addMissingProtocol, bloggerRewrite, defaultFetch, defaultParser, defaultStrippedParams, defaultTiers, feedburnerRewrite, findCanonical, fixMalformedProtocol, normalizeUrl, resolveFeedProtocol, resolveUrl, wordpressProbe };
package/dist/index.cjs DELETED
@@ -1,153 +0,0 @@
1
- const require_utils = require("./utils.cjs");
2
- const require_defaults = require("./defaults.cjs");
3
- //#region src/index.ts
4
- async function findCanonical(inputUrl, options) {
5
- const { parser = require_defaults.defaultParser, fetchFn = require_defaults.defaultFetch, existsFn, tiers = require_defaults.defaultTiers, rewrites, probes, stripQueryParams = require_defaults.defaultStrippedParams, onFetch, onMatch, onExists } = options ?? {};
6
- const stripParams = (url) => {
7
- return require_utils.normalizeUrl(url, {
8
- stripQueryParams,
9
- sortQueryParams: true,
10
- stripEmptyQuery: true
11
- });
12
- };
13
- const resolveAndApplyRewrites = (url, baseUrl) => {
14
- const resolved = require_utils.resolveUrl(url, baseUrl);
15
- return resolved && rewrites ? require_utils.applyRewrites(resolved, rewrites) : resolved;
16
- };
17
- const initialRequestUrl = resolveAndApplyRewrites(inputUrl);
18
- if (!initialRequestUrl) return;
19
- let initialResponse;
20
- try {
21
- initialResponse = await fetchFn(initialRequestUrl);
22
- } catch {
23
- return;
24
- }
25
- onFetch?.({
26
- url: initialRequestUrl,
27
- response: initialResponse
28
- });
29
- if (initialResponse.status < 200 || initialResponse.status >= 300) return;
30
- const initialResponseUrlRaw = resolveAndApplyRewrites(initialResponse.url);
31
- if (!initialResponseUrlRaw) return;
32
- const initialResponseUrl = stripParams(initialResponseUrlRaw);
33
- const initialResponseBody = initialResponse.body;
34
- if (!initialResponseBody) return;
35
- let initialResponseSignature;
36
- let selfRequestUrl;
37
- const initialResponseFeed = await parser.parse(initialResponseBody);
38
- if (!initialResponseFeed) return;
39
- onMatch?.({
40
- url: initialRequestUrl,
41
- response: initialResponse,
42
- feed: initialResponseFeed
43
- });
44
- const selfRequestUrlRaw = parser.getSelfUrl(initialResponseFeed);
45
- if (selfRequestUrlRaw) {
46
- selfRequestUrl = resolveAndApplyRewrites(selfRequestUrlRaw, initialResponseUrl);
47
- selfRequestUrl = selfRequestUrl ? stripParams(selfRequestUrl) : void 0;
48
- }
49
- const compareWithInitialResponse = async (comparedResponseBody, comparedResponseUrl) => {
50
- if (!comparedResponseBody) return false;
51
- if (initialResponseBody === comparedResponseBody) return true;
52
- const comparedResponseFeed = await parser.parse(comparedResponseBody);
53
- if (comparedResponseFeed) {
54
- initialResponseSignature ||= parser.getSignature(initialResponseFeed, initialResponseUrl);
55
- const comparedResponseSignature = parser.getSignature(comparedResponseFeed, comparedResponseUrl);
56
- return initialResponseSignature === comparedResponseSignature;
57
- }
58
- return false;
59
- };
60
- const fetchAndCompare = async (url) => {
61
- let response;
62
- try {
63
- response = await fetchFn(url);
64
- } catch {
65
- return;
66
- }
67
- onFetch?.({
68
- url,
69
- response
70
- });
71
- if (response.status < 200 || response.status >= 300) return;
72
- if (!await compareWithInitialResponse(response.body, response.url)) return;
73
- return response;
74
- };
75
- let candidateSourceUrl = initialResponseUrl;
76
- if (selfRequestUrl && selfRequestUrl !== initialResponseUrl) {
77
- const urlsToTry = [selfRequestUrl];
78
- if (selfRequestUrl.startsWith("https://")) urlsToTry.push(selfRequestUrl.replace("https://", "http://"));
79
- else if (selfRequestUrl.startsWith("http://")) urlsToTry.push(selfRequestUrl.replace("http://", "https://"));
80
- for (const urlToTry of urlsToTry) {
81
- const response = await fetchAndCompare(urlToTry);
82
- if (response) {
83
- onMatch?.({
84
- url: urlToTry,
85
- response,
86
- feed: initialResponseFeed
87
- });
88
- candidateSourceUrl = resolveAndApplyRewrites(response.url) ?? initialResponseUrl;
89
- candidateSourceUrl = stripParams(candidateSourceUrl);
90
- break;
91
- }
92
- }
93
- }
94
- if (probes && probes?.length > 0) candidateSourceUrl = await require_utils.applyProbes(candidateSourceUrl, probes, async (candidateUrl) => {
95
- const response = await fetchAndCompare(candidateUrl);
96
- if (response) {
97
- onMatch?.({
98
- url: candidateUrl,
99
- response,
100
- feed: initialResponseFeed
101
- });
102
- return stripParams(resolveAndApplyRewrites(response.url) ?? candidateUrl);
103
- }
104
- });
105
- const candidateUrls = new Set(tiers.map((tier) => resolveAndApplyRewrites(require_utils.normalizeUrl(candidateSourceUrl, tier))).filter((candidateUrl) => !!candidateUrl));
106
- candidateUrls.add(candidateSourceUrl);
107
- let winningUrl = candidateSourceUrl;
108
- for (const candidateUrl of candidateUrls) {
109
- if (existsFn) {
110
- const data = await existsFn(candidateUrl);
111
- if (data !== void 0) {
112
- onExists?.({
113
- url: candidateUrl,
114
- data
115
- });
116
- return candidateUrl;
117
- }
118
- }
119
- if (candidateUrl === candidateSourceUrl) continue;
120
- if (candidateUrl === initialResponseUrl) {
121
- winningUrl = initialResponseUrl;
122
- break;
123
- }
124
- const candidateResponse = await fetchAndCompare(candidateUrl);
125
- if (candidateResponse) {
126
- let candidateResponseUrl = resolveAndApplyRewrites(candidateResponse.url);
127
- if (candidateResponseUrl) candidateResponseUrl = stripParams(candidateResponseUrl);
128
- if (candidateResponseUrl === candidateSourceUrl || candidateResponseUrl === initialResponseUrl) continue;
129
- onMatch?.({
130
- url: candidateUrl,
131
- response: candidateResponse,
132
- feed: initialResponseFeed
133
- });
134
- winningUrl = candidateUrl;
135
- break;
136
- }
137
- }
138
- if (winningUrl.startsWith("http://")) {
139
- const httpsUrl = winningUrl.replace("http://", "https://");
140
- const response = await fetchAndCompare(httpsUrl);
141
- if (response) {
142
- onMatch?.({
143
- url: httpsUrl,
144
- response,
145
- feed: initialResponseFeed
146
- });
147
- return httpsUrl;
148
- }
149
- }
150
- return winningUrl;
151
- }
152
- //#endregion
153
- exports.findCanonical = findCanonical;
package/dist/index.d.cts DELETED
@@ -1,9 +0,0 @@
1
- import { DefaultParserResult, FetchFnResponse, FindCanonicalOptions, ParserAdapter } from "./types.cjs";
2
-
3
- //#region src/index.d.ts
4
- declare function findCanonical<TResponse extends FetchFnResponse = FetchFnResponse, TExisting = unknown>(inputUrl: string, options?: Omit<FindCanonicalOptions<DefaultParserResult, TResponse, TExisting>, 'parser'>): Promise<string | undefined>;
5
- declare function findCanonical<TFeed, TResponse extends FetchFnResponse = FetchFnResponse, TExisting = unknown>(inputUrl: string, options: FindCanonicalOptions<TFeed, TResponse, TExisting> & {
6
- parser: ParserAdapter<TFeed>;
7
- }): Promise<string | undefined>;
8
- //#endregion
9
- export { findCanonical };
@@ -1,51 +0,0 @@
1
- //#region src/probes/wordpress.ts
2
- const commentsFeedPathRegex = /\/comments\/feed(\/|$)/;
3
- const feedPathRegex = /\/feed(\/|$)/;
4
- const trailingSlashRegex = /\/$/;
5
- const optionalTrailingSlashRegex = /\/?$/;
6
- const feedTypes = [
7
- "atom",
8
- "rss2",
9
- "rss",
10
- "rdf"
11
- ];
12
- const wordpressProbe = {
13
- match: (url) => {
14
- const feed = url.searchParams.get("feed")?.toLowerCase();
15
- if (!feed) return false;
16
- const type = feed.startsWith("comments-") ? feed.slice(9) : feed;
17
- return feedTypes.includes(type);
18
- },
19
- getCandidates: (url) => {
20
- const feed = url.searchParams.get("feed")?.toLowerCase();
21
- if (!feed) return [];
22
- const candidates = [];
23
- const isComment = feed.startsWith("comments-");
24
- const type = isComment ? feed.slice(9) : feed;
25
- if ((isComment ? commentsFeedPathRegex : feedPathRegex).test(url.pathname)) {
26
- const withoutSlash = new URL(url);
27
- withoutSlash.pathname = url.pathname.replace(trailingSlashRegex, "");
28
- withoutSlash.searchParams.delete("feed");
29
- candidates.push(withoutSlash.href);
30
- const withSlash = new URL(url);
31
- withSlash.pathname = url.pathname.replace(optionalTrailingSlashRegex, "/");
32
- withSlash.searchParams.delete("feed");
33
- candidates.push(withSlash.href);
34
- return candidates;
35
- }
36
- const basePath = url.pathname.replace(trailingSlashRegex, "");
37
- const feedSegment = type === "atom" ? "/feed/atom" : "/feed";
38
- const feedPath = isComment ? `/comments${feedSegment}` : feedSegment;
39
- const primary = new URL(url);
40
- primary.pathname = basePath + feedPath;
41
- primary.searchParams.delete("feed");
42
- candidates.push(primary.href);
43
- const withSlash = new URL(url);
44
- withSlash.pathname = `${basePath}${feedPath}/`;
45
- withSlash.searchParams.delete("feed");
46
- candidates.push(withSlash.href);
47
- return candidates;
48
- }
49
- };
50
- //#endregion
51
- exports.wordpressProbe = wordpressProbe;
@@ -1,6 +0,0 @@
1
- import { Probe } from "../types.cjs";
2
-
3
- //#region src/probes/wordpress.d.ts
4
- declare const wordpressProbe: Probe;
5
- //#endregion
6
- export { wordpressProbe };
@@ -1,47 +0,0 @@
1
- const require_utils = require("../utils.cjs");
2
- //#region src/rewrites/blogger.ts
3
- const bloggerRegex = /^(www\.|beta\.)?blogger\.com$/;
4
- const blogspotRegex = /\.blogspot\.[a-z]{2,3}(\.[a-z]{2})?$/i;
5
- const bloggerRewrite = {
6
- match: (url) => {
7
- return bloggerRegex.test(url.hostname) || blogspotRegex.test(url.hostname);
8
- },
9
- rewrite: (url) => {
10
- const rewritten = new URL(url);
11
- const isBlogger = bloggerRegex.test(rewritten.hostname);
12
- const isBlogspot = blogspotRegex.test(rewritten.hostname);
13
- rewritten.protocol = "https:";
14
- if (isBlogger) rewritten.hostname = "www.blogger.com";
15
- if (isBlogspot) {
16
- rewritten.hostname = rewritten.hostname.replace(blogspotRegex, ".blogspot.com");
17
- if (rewritten.pathname === "/atom.xml") rewritten.pathname = "/feeds/posts/default";
18
- else if (rewritten.pathname === "/rss.xml") {
19
- rewritten.pathname = "/feeds/posts/default";
20
- rewritten.searchParams.set("alt", "rss");
21
- }
22
- }
23
- rewritten.searchParams.delete("redirect");
24
- const alt = rewritten.searchParams.get("alt");
25
- if (alt === "atom" || alt === "json" || alt === "") rewritten.searchParams.delete("alt");
26
- rewritten.searchParams.delete("v");
27
- rewritten.searchParams.delete("max-results");
28
- rewritten.searchParams.delete("start-index");
29
- rewritten.searchParams.delete("published-min");
30
- rewritten.searchParams.delete("published-max");
31
- rewritten.searchParams.delete("updated-min");
32
- rewritten.searchParams.delete("updated-max");
33
- rewritten.searchParams.delete("orderby");
34
- const normalized = require_utils.normalizeUrl(rewritten.href, {
35
- stripTrailingSlash: true,
36
- collapseSlashes: true,
37
- stripHash: true,
38
- normalizeEncoding: true,
39
- normalizeUnicode: true,
40
- stripEmptyQuery: true,
41
- sortQueryParams: true
42
- });
43
- return new URL(normalized);
44
- }
45
- };
46
- //#endregion
47
- exports.bloggerRewrite = bloggerRewrite;
@@ -1,6 +0,0 @@
1
- import { Rewrite } from "../types.cjs";
2
-
3
- //#region src/rewrites/blogger.d.ts
4
- declare const bloggerRewrite: Rewrite;
5
- //#endregion
6
- export { bloggerRewrite };
@@ -1,27 +0,0 @@
1
- const require_utils = require("../utils.cjs");
2
- //#region src/rewrites/feedburner.ts
3
- const hosts = [
4
- "feeds.feedburner.com",
5
- "feeds2.feedburner.com",
6
- "feedproxy.google.com"
7
- ];
8
- const feedburnerRewrite = {
9
- match: (url) => {
10
- return hosts.includes(url.hostname);
11
- },
12
- rewrite: (url) => {
13
- const rewritten = new URL(url);
14
- rewritten.hostname = "feeds.feedburner.com";
15
- rewritten.search = "";
16
- const normalized = require_utils.normalizeUrl(rewritten.href, {
17
- stripTrailingSlash: true,
18
- collapseSlashes: true,
19
- stripHash: true,
20
- normalizeEncoding: true,
21
- normalizeUnicode: true
22
- });
23
- return new URL(normalized);
24
- }
25
- };
26
- //#endregion
27
- exports.feedburnerRewrite = feedburnerRewrite;
@@ -1,6 +0,0 @@
1
- import { Rewrite } from "../types.cjs";
2
-
3
- //#region src/rewrites/feedburner.d.ts
4
- declare const feedburnerRewrite: Rewrite;
5
- //#endregion
6
- export { feedburnerRewrite };
package/dist/types.d.cts DELETED
@@ -1,75 +0,0 @@
1
- import * as _$feedsmith from "feedsmith";
2
-
3
- //#region src/types.d.ts
4
- type MaybePromise<T> = T | Promise<T>;
5
- type DefaultParserResult = ReturnType<typeof _$feedsmith.parseFeed>;
6
- type ParserAdapter<T> = {
7
- parse: (body: string) => MaybePromise<T | undefined>;
8
- getSelfUrl: (parsed: T) => string | undefined;
9
- getSignature: (parsed: T, url: string) => string;
10
- };
11
- type Rewrite = {
12
- match: (url: URL) => boolean;
13
- rewrite: (url: URL) => URL;
14
- };
15
- type Probe = {
16
- match: (url: URL) => boolean;
17
- getCandidates: (url: URL) => Array<string>;
18
- };
19
- type NormalizeOptions = {
20
- stripProtocol?: boolean;
21
- stripAuthentication?: boolean;
22
- stripWww?: boolean;
23
- stripTrailingSlash?: boolean;
24
- stripRootSlash?: boolean;
25
- collapseSlashes?: boolean;
26
- stripHash?: boolean;
27
- sortQueryParams?: boolean;
28
- stripQueryParams?: Array<string>;
29
- stripQuery?: boolean;
30
- stripEmptyQuery?: boolean;
31
- lowercaseQuery?: boolean;
32
- normalizeEncoding?: boolean;
33
- normalizeUnicode?: boolean;
34
- convertToPunycode?: boolean;
35
- };
36
- type Tier = Omit<NormalizeOptions, 'stripQueryParams'>;
37
- type OnFetchFn<TResponse extends FetchFnResponse = FetchFnResponse> = (data: {
38
- url: string;
39
- response: TResponse;
40
- }) => void;
41
- type OnMatchFn<TFeed = unknown, TResponse extends FetchFnResponse = FetchFnResponse> = (data: {
42
- url: string;
43
- response: TResponse;
44
- feed: TFeed;
45
- }) => void;
46
- type OnExistsFn<T> = (data: {
47
- url: string;
48
- data: T;
49
- }) => void;
50
- type FindCanonicalOptions<TFeed = DefaultParserResult, TResponse extends FetchFnResponse = FetchFnResponse, TExisting = unknown> = {
51
- parser?: ParserAdapter<TFeed>;
52
- fetchFn?: FetchFn<TResponse>;
53
- existsFn?: ExistsFn<TExisting>;
54
- rewrites?: Array<Rewrite>;
55
- probes?: Array<Probe>;
56
- tiers?: Array<Tier>;
57
- stripQueryParams?: Array<string>;
58
- onFetch?: OnFetchFn<TResponse>;
59
- onMatch?: OnMatchFn<TFeed, TResponse>;
60
- onExists?: OnExistsFn<TExisting>;
61
- };
62
- type FetchFnOptions = {
63
- method?: 'GET' | 'HEAD';
64
- headers?: Record<string, string>;
65
- };
66
- type ExistsFn<T = unknown> = (url: string) => MaybePromise<T | undefined>;
67
- type FetchFnResponse = {
68
- headers: Headers;
69
- body: string;
70
- url: string;
71
- status: number;
72
- };
73
- type FetchFn<TResponse extends FetchFnResponse = FetchFnResponse> = (url: string, options?: FetchFnOptions) => MaybePromise<TResponse>;
74
- //#endregion
75
- export { DefaultParserResult, ExistsFn, FetchFn, FetchFnOptions, FetchFnResponse, FindCanonicalOptions, NormalizeOptions, OnExistsFn, OnFetchFn, OnMatchFn, ParserAdapter, Probe, Rewrite, Tier };
package/dist/utils.cjs DELETED
@@ -1,214 +0,0 @@
1
- const require_defaults = require("./defaults.cjs");
2
- let node_url = require("node:url");
3
- let entities = require("entities");
4
- //#region src/utils.ts
5
- const strippedParamsCache = /* @__PURE__ */ new WeakMap();
6
- const getStrippedParamsSet = (params) => {
7
- let cached = strippedParamsCache.get(params);
8
- if (!cached) {
9
- cached = new Set(params.map((param) => param.toLowerCase()));
10
- strippedParamsCache.set(params, cached);
11
- }
12
- return cached;
13
- };
14
- const ipv4Regex = /^\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}$/;
15
- const ipv6Regex = /^([0-9a-f]{0,4}:){2,7}[0-9a-f]{0,4}$/i;
16
- const safePathCharsRegex = /[a-zA-Z0-9._~!$&'()*+,;=:@-]/;
17
- const httpsLetterRegex = /s/i;
18
- const nonAsciiHostnameRegex = /[^a-z0-9.:-]/;
19
- const protocolPrefixRegex = /^https?:\/\//;
20
- const wwwPrefixRegex = /^www\./;
21
- const validUrlRegex = /^https?:\/\/(?:www\.|[a-vx-z0-9])/i;
22
- const doubledProtocolRegex = /^\/?[htps]{2,7}[:\s=.\\/]+([htps]{2,7})[:\s=.\\/]+[.,:/]*(www[./]+)?/i;
23
- const singleMalformedRegex = /^\/?(?:h[htps():]{1,10}|t{1,2}ps?)[:\s=.\\/]+[.,:/]*(www[./]+)?/i;
24
- const fixMalformedProtocol = (url) => {
25
- if (validUrlRegex.test(url) && !doubledProtocolRegex.test(url)) return url;
26
- const doubledMatch = doubledProtocolRegex.exec(url);
27
- if (doubledMatch) {
28
- const inner = doubledMatch[1];
29
- const www = doubledMatch[2];
30
- const rest = url.slice(doubledMatch[0].length);
31
- return (httpsLetterRegex.test(inner) ? "https://" : "http://") + (www ? "www." : "") + rest;
32
- }
33
- const singleMatch = singleMalformedRegex.exec(url);
34
- if (singleMatch) {
35
- const fullMatch = singleMatch[0];
36
- const www = singleMatch[1];
37
- const rest = url.slice(fullMatch.length);
38
- return (httpsLetterRegex.test(fullMatch) ? "https://" : "http://") + (www ? "www." : "") + rest;
39
- }
40
- return url;
41
- };
42
- const feedProtocols = [
43
- "feed:",
44
- "rss:",
45
- "podcast:",
46
- "pcast:",
47
- "itpc:"
48
- ];
49
- const resolveFeedProtocol = (url, protocol = "https") => {
50
- const urlLower = url.toLowerCase();
51
- for (const scheme of feedProtocols) {
52
- if (!urlLower.startsWith(scheme)) continue;
53
- if (urlLower.startsWith(`${scheme}http://`) || urlLower.startsWith(`${scheme}https://`)) return url.slice(scheme.length);
54
- if (urlLower.startsWith(`${scheme}//`)) return `${protocol}:${url.slice(scheme.length)}`;
55
- }
56
- return url;
57
- };
58
- const addMissingProtocol = (url, protocol = "https") => {
59
- const colonIndex = url.indexOf(":");
60
- if (colonIndex > 0) {
61
- const beforeColon = url.slice(0, colonIndex);
62
- if (!beforeColon.includes(".") && !beforeColon.includes("/") && beforeColon !== "localhost") return url;
63
- }
64
- if (url.startsWith("//") && !url.startsWith("///")) try {
65
- const parsed = new URL(`${protocol}:${url}`);
66
- const hostname = parsed.hostname;
67
- if (hostname.includes(".") || hostname === "localhost" || ipv4Regex.test(hostname) || ipv6Regex.test(hostname.replace(/^\[|\]$/g, ""))) return parsed.href;
68
- return url;
69
- } catch {
70
- return url;
71
- }
72
- if (url.startsWith("/") || url.startsWith(".")) return url;
73
- const slashIndex = url.indexOf("/");
74
- const dotIndex = url.indexOf(".");
75
- if (dotIndex === -1 || slashIndex !== -1 && dotIndex > slashIndex) {
76
- if (!url.startsWith("localhost")) return url;
77
- }
78
- const firstChar = url.charAt(0);
79
- if (firstChar === " " || firstChar === " " || firstChar === "\n") return url;
80
- return `${protocol}://${url}`;
81
- };
82
- const resolveUrl = (url, base) => {
83
- if (url.startsWith("#") && !base) return;
84
- let resolvedUrl;
85
- resolvedUrl = url.includes("&") ? (0, entities.decodeHTML)(url) : url;
86
- resolvedUrl = resolveFeedProtocol(resolvedUrl);
87
- resolvedUrl = fixMalformedProtocol(resolvedUrl);
88
- if (base) try {
89
- resolvedUrl = new URL(resolvedUrl, base).href;
90
- } catch {
91
- return;
92
- }
93
- resolvedUrl = addMissingProtocol(resolvedUrl);
94
- try {
95
- const parsed = new URL(resolvedUrl);
96
- if (parsed.protocol !== "http:" && parsed.protocol !== "https:") return;
97
- return parsed.href;
98
- } catch {
99
- return;
100
- }
101
- };
102
- const decodeAndNormalizeEncoding = (value) => {
103
- if (!value.includes("%")) return value;
104
- return value.replace(/%([0-9A-Fa-f]{2})/g, (_match, hex) => {
105
- const charCode = Number.parseInt(hex, 16);
106
- const char = String.fromCharCode(charCode);
107
- if (safePathCharsRegex.test(char)) return char;
108
- return `%${hex.toUpperCase()}`;
109
- });
110
- };
111
- const normalizeUrl = (url, options = require_defaults.defaultNormalizeOptions) => {
112
- try {
113
- const parsed = new URL(url);
114
- if (options.normalizeUnicode) {
115
- parsed.hostname = parsed.hostname.normalize("NFC");
116
- parsed.pathname = parsed.pathname.normalize("NFC");
117
- }
118
- if (options.convertToPunycode && nonAsciiHostnameRegex.test(parsed.hostname)) {
119
- const ascii = (0, node_url.domainToASCII)(parsed.hostname);
120
- if (ascii) parsed.hostname = ascii;
121
- }
122
- if (options.stripAuthentication) {
123
- parsed.username = "";
124
- parsed.password = "";
125
- }
126
- if (options.stripWww && parsed.hostname.startsWith("www.")) parsed.hostname = parsed.hostname.slice(4);
127
- if (options.stripHash) parsed.hash = "";
128
- let pathname = parsed.pathname;
129
- if (options.normalizeEncoding) pathname = decodeAndNormalizeEncoding(pathname);
130
- if (options.collapseSlashes) pathname = pathname.replace(/\/+/g, "/");
131
- if (options.stripTrailingSlash && pathname.length > 1 && pathname.endsWith("/")) pathname = pathname.slice(0, -1);
132
- if (options.stripRootSlash && pathname === "/") pathname = "";
133
- parsed.pathname = pathname;
134
- if (options.stripQuery) parsed.search = "";
135
- if (options.stripQueryParams && parsed.search) {
136
- const strippedSet = getStrippedParamsSet(options.stripQueryParams);
137
- const paramsToDelete = [];
138
- for (const [key] of parsed.searchParams) if (strippedSet.has(key.toLowerCase())) paramsToDelete.push(key);
139
- for (const param of paramsToDelete) parsed.searchParams.delete(param);
140
- }
141
- if (options.lowercaseQuery && parsed.search) {
142
- const entries = [...parsed.searchParams.entries()];
143
- parsed.search = "";
144
- for (const [key, value] of entries) parsed.searchParams.append(key.toLowerCase(), value.toLowerCase());
145
- }
146
- if (options.sortQueryParams && parsed.search) parsed.searchParams.sort();
147
- if (options.stripEmptyQuery && parsed.href.endsWith("?")) parsed.search = "";
148
- let result = parsed.href;
149
- if (options.stripRootSlash && result === `${parsed.origin}/`) result = parsed.origin;
150
- if (options.stripProtocol) result = result.replace(protocolPrefixRegex, "");
151
- return result;
152
- } catch {
153
- return url;
154
- }
155
- };
156
- const applyRewrites = (url, rewrites) => {
157
- try {
158
- let parsed = new URL(url);
159
- for (const rewrite of rewrites) if (rewrite.match(parsed)) {
160
- parsed = rewrite.rewrite(parsed);
161
- break;
162
- }
163
- return parsed.href;
164
- } catch {
165
- return url;
166
- }
167
- };
168
- const applyProbes = async (url, probes, testCandidate) => {
169
- try {
170
- const parsed = new URL(url);
171
- for (const probe of probes) {
172
- if (!probe.match(parsed)) continue;
173
- for (const candidate of probe.getCandidates(parsed)) {
174
- const result = await testCandidate(candidate);
175
- if (result) return result;
176
- }
177
- break;
178
- }
179
- return url;
180
- } catch {
181
- return url;
182
- }
183
- };
184
- const createSignature = (object, fields) => {
185
- const saved = fields.map((key) => [key, object[key]]);
186
- for (const key of fields) object[key] = void 0;
187
- const signature = JSON.stringify(object);
188
- for (const [key, val] of saved) object[key] = val;
189
- return signature;
190
- };
191
- const trailingSlashRegex = /("(?:https?:\/\/|\/)[^"]+)\/([?"])/g;
192
- const neutralizeUrls = (text, urls) => {
193
- const escapeHost = (url) => {
194
- try {
195
- return new URL("/", url).host.replace(wwwPrefixRegex, "").replaceAll(".", "\\.");
196
- } catch {
197
- return;
198
- }
199
- };
200
- const hosts = urls.map(escapeHost).filter(Boolean);
201
- if (hosts.length === 0) return text;
202
- const hostPattern = hosts.length === 1 ? hosts[0] : `(?:${hosts.join("|")})`;
203
- return text.replace(new RegExp(`https?://(?:www\\.)?${hostPattern}(?=[/"]|\\\\")(/)?`, "g"), "/").replace(trailingSlashRegex, "$1$2");
204
- };
205
- //#endregion
206
- exports.addMissingProtocol = addMissingProtocol;
207
- exports.applyProbes = applyProbes;
208
- exports.applyRewrites = applyRewrites;
209
- exports.createSignature = createSignature;
210
- exports.fixMalformedProtocol = fixMalformedProtocol;
211
- exports.neutralizeUrls = neutralizeUrls;
212
- exports.normalizeUrl = normalizeUrl;
213
- exports.resolveFeedProtocol = resolveFeedProtocol;
214
- exports.resolveUrl = resolveUrl;
package/dist/utils.d.cts DELETED
@@ -1,10 +0,0 @@
1
- import { NormalizeOptions } from "./types.cjs";
2
-
3
- //#region src/utils.d.ts
4
- declare const fixMalformedProtocol: (url: string) => string;
5
- declare const resolveFeedProtocol: (url: string, protocol?: "http" | "https") => string;
6
- declare const addMissingProtocol: (url: string, protocol?: "http" | "https") => string;
7
- declare const resolveUrl: (url: string, base?: string) => string | undefined;
8
- declare const normalizeUrl: (url: string, options?: NormalizeOptions) => string;
9
- //#endregion
10
- export { addMissingProtocol, fixMalformedProtocol, normalizeUrl, resolveFeedProtocol, resolveUrl };