feedcanon 1.2.1 → 1.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +3 -3
- package/dist/defaults.cjs +39 -52
- package/dist/defaults.d.cts +2 -4
- package/dist/defaults.d.ts +2 -4
- package/dist/defaults.js +39 -50
- package/dist/exports.cjs +5 -4
- package/dist/exports.d.cts +4 -3
- package/dist/exports.d.ts +4 -3
- package/dist/exports.js +4 -3
- package/dist/index.cjs +36 -25
- package/dist/index.js +37 -26
- package/dist/probes/wordpress.cjs +49 -0
- package/dist/probes/wordpress.d.cts +6 -0
- package/dist/probes/wordpress.d.ts +6 -0
- package/dist/probes/wordpress.js +48 -0
- package/dist/rewrites/blogger.cjs +33 -23
- package/dist/rewrites/blogger.js +34 -23
- package/dist/rewrites/feedburner.cjs +13 -5
- package/dist/rewrites/feedburner.js +14 -5
- package/dist/types.d.cts +8 -3
- package/dist/types.d.ts +8 -3
- package/dist/utils.cjs +41 -1
- package/dist/utils.js +39 -2
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -41,8 +41,8 @@ This is a simplified flow. For complete details, see [How It Works](https://feed
|
|
|
41
41
|
1. Fetch the input URL and parse the feed to establish reference content.
|
|
42
42
|
2. Extract the feed's declared self URL (if present).
|
|
43
43
|
3. Validate the self URL by fetching and comparing content.
|
|
44
|
-
4. Generate URL
|
|
45
|
-
5. Test
|
|
44
|
+
4. Generate URL candidates ordered from cleanest to least clean.
|
|
45
|
+
5. Test candidates in order—the first one serving identical content wins.
|
|
46
46
|
6. Upgrade HTTP to HTTPS if both serve identical content.
|
|
47
47
|
|
|
48
48
|
### Customization
|
|
@@ -53,7 +53,7 @@ Feedcanon is designed to be flexible. Every major component can be replaced or e
|
|
|
53
53
|
- **Database lookup** — use `existsFn` to check if a URL already exists in your database.
|
|
54
54
|
- **Custom fetch** — use your own HTTP client (Axios, Got, Ky, etc.)
|
|
55
55
|
- **Custom parser** — bring your own parser (Feedsmith by default).
|
|
56
|
-
- **Custom tiers** — define your own URL normalization
|
|
56
|
+
- **Custom tiers** — define your own URL normalization tiers.
|
|
57
57
|
- **Custom platforms** — add handlers to normalize domain aliases (like FeedBurner).
|
|
58
58
|
|
|
59
59
|
## Quick Start
|
package/dist/defaults.cjs
CHANGED
|
@@ -1,8 +1,7 @@
|
|
|
1
|
-
const
|
|
1
|
+
const require_utils = require('./utils.cjs');
|
|
2
2
|
let feedsmith = require("feedsmith");
|
|
3
3
|
|
|
4
4
|
//#region src/defaults.ts
|
|
5
|
-
const defaultRewrites = [require_feedburner.feedburnerRewrite];
|
|
6
5
|
const defaultStrippedParams = [
|
|
7
6
|
"utm_source",
|
|
8
7
|
"utm_medium",
|
|
@@ -185,15 +184,6 @@ const defaultFetch = async (url, options) => {
|
|
|
185
184
|
status: response.status
|
|
186
185
|
};
|
|
187
186
|
};
|
|
188
|
-
const trailingSlashPattern = /("(?:https?:\/\/|\/)[^"]+)\/([?"])/g;
|
|
189
|
-
const neutralizeFeedUrls = (signature, url) => {
|
|
190
|
-
try {
|
|
191
|
-
const escapedHost = new URL("/", url).host.replace(/^www\./, "").replaceAll(".", "\\.");
|
|
192
|
-
return signature.replace(new RegExp(`https?://(?:www\\.)?${escapedHost}(?=[/"])(/)?`, "g"), "/").replace(trailingSlashPattern, "$1$2");
|
|
193
|
-
} catch {
|
|
194
|
-
return signature;
|
|
195
|
-
}
|
|
196
|
-
};
|
|
197
187
|
const retrieveSelfLink = (parsed) => {
|
|
198
188
|
switch (parsed.format) {
|
|
199
189
|
case "atom": return parsed.feed.links?.find((link) => link.rel === "self");
|
|
@@ -211,49 +201,48 @@ const defaultParser = {
|
|
|
211
201
|
return parsed.format === "json" ? parsed.feed.feed_url : retrieveSelfLink(parsed)?.href;
|
|
212
202
|
},
|
|
213
203
|
getSignature: (parsed, url) => {
|
|
214
|
-
if (parsed.format === "json") {
|
|
215
|
-
const originalSelfUrl = parsed.feed.feed_url;
|
|
216
|
-
parsed.feed.feed_url = void 0;
|
|
217
|
-
const signature$1 = JSON.stringify(parsed.feed);
|
|
218
|
-
parsed.feed.feed_url = originalSelfUrl;
|
|
219
|
-
return neutralizeFeedUrls(signature$1, url);
|
|
220
|
-
}
|
|
221
204
|
let signature;
|
|
222
|
-
let
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
parsed.
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
link.href = void 0;
|
|
244
|
-
signature = JSON.stringify(parsed.feed);
|
|
245
|
-
link.href = originalSelfUrl;
|
|
205
|
+
let contentUrl;
|
|
206
|
+
if (parsed.format === "json") {
|
|
207
|
+
contentUrl = parsed.feed.home_page_url;
|
|
208
|
+
signature = require_utils.createSignature(parsed.feed, ["feed_url"]);
|
|
209
|
+
} else {
|
|
210
|
+
const selfLink = retrieveSelfLink(parsed);
|
|
211
|
+
const savedSelfHref = selfLink?.href;
|
|
212
|
+
if (selfLink) selfLink.href = void 0;
|
|
213
|
+
if (parsed.format === "rss") {
|
|
214
|
+
contentUrl = parsed.feed.link;
|
|
215
|
+
signature = require_utils.createSignature(parsed.feed, [
|
|
216
|
+
"lastBuildDate",
|
|
217
|
+
"pubDate",
|
|
218
|
+
"link",
|
|
219
|
+
"generator"
|
|
220
|
+
]);
|
|
221
|
+
} else if (parsed.format === "rdf") {
|
|
222
|
+
contentUrl = parsed.feed.link;
|
|
223
|
+
signature = require_utils.createSignature(parsed.feed, ["link"]);
|
|
224
|
+
} else signature = require_utils.createSignature(parsed.feed, ["updated", "generator"]);
|
|
225
|
+
if (selfLink) selfLink.href = savedSelfHref;
|
|
246
226
|
}
|
|
247
|
-
|
|
248
|
-
parsed.feed.lastBuildDate = originalBuildDate;
|
|
249
|
-
parsed.feed.pubDate = originalPubDate;
|
|
250
|
-
parsed.feed.link = originalLink;
|
|
251
|
-
} else if (parsed.format === "rdf") parsed.feed.link = originalLink;
|
|
252
|
-
else if (parsed.format === "atom") parsed.feed.updated = originalBuildDate;
|
|
253
|
-
return neutralizeFeedUrls(signature, url);
|
|
227
|
+
return require_utils.neutralizeUrls(signature, contentUrl ? [url, contentUrl] : [url]);
|
|
254
228
|
}
|
|
255
229
|
};
|
|
256
230
|
const defaultTiers = [
|
|
231
|
+
{
|
|
232
|
+
stripProtocol: false,
|
|
233
|
+
stripAuthentication: false,
|
|
234
|
+
stripWww: true,
|
|
235
|
+
stripTrailingSlash: true,
|
|
236
|
+
stripRootSlash: true,
|
|
237
|
+
collapseSlashes: true,
|
|
238
|
+
stripHash: true,
|
|
239
|
+
sortQueryParams: false,
|
|
240
|
+
stripQuery: true,
|
|
241
|
+
stripEmptyQuery: true,
|
|
242
|
+
normalizeEncoding: true,
|
|
243
|
+
normalizeUnicode: true,
|
|
244
|
+
convertToPunycode: true
|
|
245
|
+
},
|
|
257
246
|
{
|
|
258
247
|
stripProtocol: false,
|
|
259
248
|
stripAuthentication: false,
|
|
@@ -305,7 +294,5 @@ const defaultTiers = [
|
|
|
305
294
|
exports.defaultFetch = defaultFetch;
|
|
306
295
|
exports.defaultNormalizeOptions = defaultNormalizeOptions;
|
|
307
296
|
exports.defaultParser = defaultParser;
|
|
308
|
-
exports.defaultRewrites = defaultRewrites;
|
|
309
297
|
exports.defaultStrippedParams = defaultStrippedParams;
|
|
310
|
-
exports.defaultTiers = defaultTiers;
|
|
311
|
-
exports.neutralizeFeedUrls = neutralizeFeedUrls;
|
|
298
|
+
exports.defaultTiers = defaultTiers;
|
package/dist/defaults.d.cts
CHANGED
|
@@ -1,12 +1,10 @@
|
|
|
1
|
-
import { DefaultParserResult, FetchFn, NormalizeOptions, ParserAdapter,
|
|
1
|
+
import { DefaultParserResult, FetchFn, NormalizeOptions, ParserAdapter, Tier } from "./types.cjs";
|
|
2
2
|
|
|
3
3
|
//#region src/defaults.d.ts
|
|
4
|
-
declare const defaultRewrites: Array<Rewrite>;
|
|
5
4
|
declare const defaultStrippedParams: string[];
|
|
6
5
|
declare const defaultNormalizeOptions: NormalizeOptions;
|
|
7
6
|
declare const defaultFetch: FetchFn;
|
|
8
|
-
declare const neutralizeFeedUrls: (signature: string, url: string) => string;
|
|
9
7
|
declare const defaultParser: ParserAdapter<DefaultParserResult>;
|
|
10
8
|
declare const defaultTiers: Array<Tier>;
|
|
11
9
|
//#endregion
|
|
12
|
-
export { defaultFetch, defaultNormalizeOptions, defaultParser,
|
|
10
|
+
export { defaultFetch, defaultNormalizeOptions, defaultParser, defaultStrippedParams, defaultTiers };
|
package/dist/defaults.d.ts
CHANGED
|
@@ -1,12 +1,10 @@
|
|
|
1
|
-
import { DefaultParserResult, FetchFn, NormalizeOptions, ParserAdapter,
|
|
1
|
+
import { DefaultParserResult, FetchFn, NormalizeOptions, ParserAdapter, Tier } from "./types.js";
|
|
2
2
|
|
|
3
3
|
//#region src/defaults.d.ts
|
|
4
|
-
declare const defaultRewrites: Array<Rewrite>;
|
|
5
4
|
declare const defaultStrippedParams: string[];
|
|
6
5
|
declare const defaultNormalizeOptions: NormalizeOptions;
|
|
7
6
|
declare const defaultFetch: FetchFn;
|
|
8
|
-
declare const neutralizeFeedUrls: (signature: string, url: string) => string;
|
|
9
7
|
declare const defaultParser: ParserAdapter<DefaultParserResult>;
|
|
10
8
|
declare const defaultTiers: Array<Tier>;
|
|
11
9
|
//#endregion
|
|
12
|
-
export { defaultFetch, defaultNormalizeOptions, defaultParser,
|
|
10
|
+
export { defaultFetch, defaultNormalizeOptions, defaultParser, defaultStrippedParams, defaultTiers };
|
package/dist/defaults.js
CHANGED
|
@@ -1,8 +1,7 @@
|
|
|
1
|
-
import {
|
|
1
|
+
import { createSignature, neutralizeUrls } from "./utils.js";
|
|
2
2
|
import { parseFeed } from "feedsmith";
|
|
3
3
|
|
|
4
4
|
//#region src/defaults.ts
|
|
5
|
-
const defaultRewrites = [feedburnerRewrite];
|
|
6
5
|
const defaultStrippedParams = [
|
|
7
6
|
"utm_source",
|
|
8
7
|
"utm_medium",
|
|
@@ -185,15 +184,6 @@ const defaultFetch = async (url, options) => {
|
|
|
185
184
|
status: response.status
|
|
186
185
|
};
|
|
187
186
|
};
|
|
188
|
-
const trailingSlashPattern = /("(?:https?:\/\/|\/)[^"]+)\/([?"])/g;
|
|
189
|
-
const neutralizeFeedUrls = (signature, url) => {
|
|
190
|
-
try {
|
|
191
|
-
const escapedHost = new URL("/", url).host.replace(/^www\./, "").replaceAll(".", "\\.");
|
|
192
|
-
return signature.replace(new RegExp(`https?://(?:www\\.)?${escapedHost}(?=[/"])(/)?`, "g"), "/").replace(trailingSlashPattern, "$1$2");
|
|
193
|
-
} catch {
|
|
194
|
-
return signature;
|
|
195
|
-
}
|
|
196
|
-
};
|
|
197
187
|
const retrieveSelfLink = (parsed) => {
|
|
198
188
|
switch (parsed.format) {
|
|
199
189
|
case "atom": return parsed.feed.links?.find((link) => link.rel === "self");
|
|
@@ -211,49 +201,48 @@ const defaultParser = {
|
|
|
211
201
|
return parsed.format === "json" ? parsed.feed.feed_url : retrieveSelfLink(parsed)?.href;
|
|
212
202
|
},
|
|
213
203
|
getSignature: (parsed, url) => {
|
|
214
|
-
if (parsed.format === "json") {
|
|
215
|
-
const originalSelfUrl = parsed.feed.feed_url;
|
|
216
|
-
parsed.feed.feed_url = void 0;
|
|
217
|
-
const signature$1 = JSON.stringify(parsed.feed);
|
|
218
|
-
parsed.feed.feed_url = originalSelfUrl;
|
|
219
|
-
return neutralizeFeedUrls(signature$1, url);
|
|
220
|
-
}
|
|
221
204
|
let signature;
|
|
222
|
-
let
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
parsed.
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
link.href = void 0;
|
|
244
|
-
signature = JSON.stringify(parsed.feed);
|
|
245
|
-
link.href = originalSelfUrl;
|
|
205
|
+
let contentUrl;
|
|
206
|
+
if (parsed.format === "json") {
|
|
207
|
+
contentUrl = parsed.feed.home_page_url;
|
|
208
|
+
signature = createSignature(parsed.feed, ["feed_url"]);
|
|
209
|
+
} else {
|
|
210
|
+
const selfLink = retrieveSelfLink(parsed);
|
|
211
|
+
const savedSelfHref = selfLink?.href;
|
|
212
|
+
if (selfLink) selfLink.href = void 0;
|
|
213
|
+
if (parsed.format === "rss") {
|
|
214
|
+
contentUrl = parsed.feed.link;
|
|
215
|
+
signature = createSignature(parsed.feed, [
|
|
216
|
+
"lastBuildDate",
|
|
217
|
+
"pubDate",
|
|
218
|
+
"link",
|
|
219
|
+
"generator"
|
|
220
|
+
]);
|
|
221
|
+
} else if (parsed.format === "rdf") {
|
|
222
|
+
contentUrl = parsed.feed.link;
|
|
223
|
+
signature = createSignature(parsed.feed, ["link"]);
|
|
224
|
+
} else signature = createSignature(parsed.feed, ["updated", "generator"]);
|
|
225
|
+
if (selfLink) selfLink.href = savedSelfHref;
|
|
246
226
|
}
|
|
247
|
-
|
|
248
|
-
parsed.feed.lastBuildDate = originalBuildDate;
|
|
249
|
-
parsed.feed.pubDate = originalPubDate;
|
|
250
|
-
parsed.feed.link = originalLink;
|
|
251
|
-
} else if (parsed.format === "rdf") parsed.feed.link = originalLink;
|
|
252
|
-
else if (parsed.format === "atom") parsed.feed.updated = originalBuildDate;
|
|
253
|
-
return neutralizeFeedUrls(signature, url);
|
|
227
|
+
return neutralizeUrls(signature, contentUrl ? [url, contentUrl] : [url]);
|
|
254
228
|
}
|
|
255
229
|
};
|
|
256
230
|
const defaultTiers = [
|
|
231
|
+
{
|
|
232
|
+
stripProtocol: false,
|
|
233
|
+
stripAuthentication: false,
|
|
234
|
+
stripWww: true,
|
|
235
|
+
stripTrailingSlash: true,
|
|
236
|
+
stripRootSlash: true,
|
|
237
|
+
collapseSlashes: true,
|
|
238
|
+
stripHash: true,
|
|
239
|
+
sortQueryParams: false,
|
|
240
|
+
stripQuery: true,
|
|
241
|
+
stripEmptyQuery: true,
|
|
242
|
+
normalizeEncoding: true,
|
|
243
|
+
normalizeUnicode: true,
|
|
244
|
+
convertToPunycode: true
|
|
245
|
+
},
|
|
257
246
|
{
|
|
258
247
|
stripProtocol: false,
|
|
259
248
|
stripAuthentication: false,
|
|
@@ -302,4 +291,4 @@ const defaultTiers = [
|
|
|
302
291
|
];
|
|
303
292
|
|
|
304
293
|
//#endregion
|
|
305
|
-
export { defaultFetch, defaultNormalizeOptions, defaultParser,
|
|
294
|
+
export { defaultFetch, defaultNormalizeOptions, defaultParser, defaultStrippedParams, defaultTiers };
|
package/dist/exports.cjs
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
|
-
const require_feedburner = require('./rewrites/feedburner.cjs');
|
|
2
|
-
const require_defaults = require('./defaults.cjs');
|
|
3
1
|
const require_utils = require('./utils.cjs');
|
|
2
|
+
const require_defaults = require('./defaults.cjs');
|
|
4
3
|
const require_index = require('./index.cjs');
|
|
4
|
+
const require_wordpress = require('./probes/wordpress.cjs');
|
|
5
5
|
const require_blogger = require('./rewrites/blogger.cjs');
|
|
6
|
+
const require_feedburner = require('./rewrites/feedburner.cjs');
|
|
6
7
|
|
|
7
8
|
exports.addMissingProtocol = require_utils.addMissingProtocol;
|
|
8
9
|
exports.bloggerRewrite = require_blogger.bloggerRewrite;
|
|
9
10
|
exports.defaultFetch = require_defaults.defaultFetch;
|
|
10
11
|
exports.defaultParser = require_defaults.defaultParser;
|
|
11
|
-
exports.defaultRewrites = require_defaults.defaultRewrites;
|
|
12
12
|
exports.defaultStrippedParams = require_defaults.defaultStrippedParams;
|
|
13
13
|
exports.defaultTiers = require_defaults.defaultTiers;
|
|
14
14
|
exports.feedburnerRewrite = require_feedburner.feedburnerRewrite;
|
|
@@ -16,4 +16,5 @@ exports.findCanonical = require_index.findCanonical;
|
|
|
16
16
|
exports.fixMalformedProtocol = require_utils.fixMalformedProtocol;
|
|
17
17
|
exports.normalizeUrl = require_utils.normalizeUrl;
|
|
18
18
|
exports.resolveFeedProtocol = require_utils.resolveFeedProtocol;
|
|
19
|
-
exports.resolveUrl = require_utils.resolveUrl;
|
|
19
|
+
exports.resolveUrl = require_utils.resolveUrl;
|
|
20
|
+
exports.wordpressProbe = require_wordpress.wordpressProbe;
|
package/dist/exports.d.cts
CHANGED
|
@@ -1,7 +1,8 @@
|
|
|
1
|
-
import { DefaultParserResult, ExistsFn, FetchFn, FetchFnOptions, FetchFnResponse, FindCanonicalOptions, NormalizeOptions, OnExistsFn, OnFetchFn, OnMatchFn, ParserAdapter, Rewrite } from "./types.cjs";
|
|
2
|
-
import { defaultFetch, defaultParser,
|
|
1
|
+
import { DefaultParserResult, ExistsFn, FetchFn, FetchFnOptions, FetchFnResponse, FindCanonicalOptions, NormalizeOptions, OnExistsFn, OnFetchFn, OnMatchFn, ParserAdapter, Probe, Rewrite, Tier } from "./types.cjs";
|
|
2
|
+
import { defaultFetch, defaultParser, defaultStrippedParams, defaultTiers } from "./defaults.cjs";
|
|
3
3
|
import { findCanonical } from "./index.cjs";
|
|
4
|
+
import { wordpressProbe } from "./probes/wordpress.cjs";
|
|
4
5
|
import { bloggerRewrite } from "./rewrites/blogger.cjs";
|
|
5
6
|
import { feedburnerRewrite } from "./rewrites/feedburner.cjs";
|
|
6
7
|
import { addMissingProtocol, fixMalformedProtocol, normalizeUrl, resolveFeedProtocol, resolveUrl } from "./utils.cjs";
|
|
7
|
-
export { type DefaultParserResult, type ExistsFn, type FetchFn, type FetchFnOptions, type FetchFnResponse, type FindCanonicalOptions, type NormalizeOptions, type OnExistsFn, type OnFetchFn, type OnMatchFn, type ParserAdapter, type Rewrite, addMissingProtocol, bloggerRewrite, defaultFetch, defaultParser,
|
|
8
|
+
export { type DefaultParserResult, type ExistsFn, type FetchFn, type FetchFnOptions, type FetchFnResponse, type FindCanonicalOptions, type NormalizeOptions, type OnExistsFn, type OnFetchFn, type OnMatchFn, type ParserAdapter, type Probe, type Rewrite, type Tier, addMissingProtocol, bloggerRewrite, defaultFetch, defaultParser, defaultStrippedParams, defaultTiers, feedburnerRewrite, findCanonical, fixMalformedProtocol, normalizeUrl, resolveFeedProtocol, resolveUrl, wordpressProbe };
|
package/dist/exports.d.ts
CHANGED
|
@@ -1,7 +1,8 @@
|
|
|
1
|
-
import { DefaultParserResult, ExistsFn, FetchFn, FetchFnOptions, FetchFnResponse, FindCanonicalOptions, NormalizeOptions, OnExistsFn, OnFetchFn, OnMatchFn, ParserAdapter, Rewrite } from "./types.js";
|
|
2
|
-
import { defaultFetch, defaultParser,
|
|
1
|
+
import { DefaultParserResult, ExistsFn, FetchFn, FetchFnOptions, FetchFnResponse, FindCanonicalOptions, NormalizeOptions, OnExistsFn, OnFetchFn, OnMatchFn, ParserAdapter, Probe, Rewrite, Tier } from "./types.js";
|
|
2
|
+
import { defaultFetch, defaultParser, defaultStrippedParams, defaultTiers } from "./defaults.js";
|
|
3
3
|
import { findCanonical } from "./index.js";
|
|
4
|
+
import { wordpressProbe } from "./probes/wordpress.js";
|
|
4
5
|
import { bloggerRewrite } from "./rewrites/blogger.js";
|
|
5
6
|
import { feedburnerRewrite } from "./rewrites/feedburner.js";
|
|
6
7
|
import { addMissingProtocol, fixMalformedProtocol, normalizeUrl, resolveFeedProtocol, resolveUrl } from "./utils.js";
|
|
7
|
-
export { type DefaultParserResult, type ExistsFn, type FetchFn, type FetchFnOptions, type FetchFnResponse, type FindCanonicalOptions, type NormalizeOptions, type OnExistsFn, type OnFetchFn, type OnMatchFn, type ParserAdapter, type Rewrite, addMissingProtocol, bloggerRewrite, defaultFetch, defaultParser,
|
|
8
|
+
export { type DefaultParserResult, type ExistsFn, type FetchFn, type FetchFnOptions, type FetchFnResponse, type FindCanonicalOptions, type NormalizeOptions, type OnExistsFn, type OnFetchFn, type OnMatchFn, type ParserAdapter, type Probe, type Rewrite, type Tier, addMissingProtocol, bloggerRewrite, defaultFetch, defaultParser, defaultStrippedParams, defaultTiers, feedburnerRewrite, findCanonical, fixMalformedProtocol, normalizeUrl, resolveFeedProtocol, resolveUrl, wordpressProbe };
|
package/dist/exports.js
CHANGED
|
@@ -1,7 +1,8 @@
|
|
|
1
|
-
import { feedburnerRewrite } from "./rewrites/feedburner.js";
|
|
2
|
-
import { defaultFetch, defaultParser, defaultRewrites, defaultStrippedParams, defaultTiers } from "./defaults.js";
|
|
3
1
|
import { addMissingProtocol, fixMalformedProtocol, normalizeUrl, resolveFeedProtocol, resolveUrl } from "./utils.js";
|
|
2
|
+
import { defaultFetch, defaultParser, defaultStrippedParams, defaultTiers } from "./defaults.js";
|
|
4
3
|
import { findCanonical } from "./index.js";
|
|
4
|
+
import { wordpressProbe } from "./probes/wordpress.js";
|
|
5
5
|
import { bloggerRewrite } from "./rewrites/blogger.js";
|
|
6
|
+
import { feedburnerRewrite } from "./rewrites/feedburner.js";
|
|
6
7
|
|
|
7
|
-
export { addMissingProtocol, bloggerRewrite, defaultFetch, defaultParser,
|
|
8
|
+
export { addMissingProtocol, bloggerRewrite, defaultFetch, defaultParser, defaultStrippedParams, defaultTiers, feedburnerRewrite, findCanonical, fixMalformedProtocol, normalizeUrl, resolveFeedProtocol, resolveUrl, wordpressProbe };
|
package/dist/index.cjs
CHANGED
|
@@ -1,19 +1,19 @@
|
|
|
1
|
-
const require_defaults = require('./defaults.cjs');
|
|
2
1
|
const require_utils = require('./utils.cjs');
|
|
2
|
+
const require_defaults = require('./defaults.cjs');
|
|
3
3
|
|
|
4
4
|
//#region src/index.ts
|
|
5
5
|
async function findCanonical(inputUrl, options) {
|
|
6
|
-
const { parser = require_defaults.defaultParser, fetchFn = require_defaults.defaultFetch, existsFn, tiers = require_defaults.defaultTiers, rewrites
|
|
6
|
+
const { parser = require_defaults.defaultParser, fetchFn = require_defaults.defaultFetch, existsFn, tiers = require_defaults.defaultTiers, rewrites, probes, stripQueryParams = require_defaults.defaultStrippedParams, onFetch, onMatch, onExists } = options ?? {};
|
|
7
7
|
const stripParams = (url) => {
|
|
8
|
-
return
|
|
8
|
+
return require_utils.normalizeUrl(url, {
|
|
9
9
|
stripQueryParams,
|
|
10
10
|
sortQueryParams: true,
|
|
11
11
|
stripEmptyQuery: true
|
|
12
|
-
})
|
|
12
|
+
});
|
|
13
13
|
};
|
|
14
14
|
const resolveAndApplyRewrites = (url, baseUrl) => {
|
|
15
15
|
const resolved = require_utils.resolveUrl(url, baseUrl);
|
|
16
|
-
return resolved ? require_utils.applyRewrites(resolved, rewrites) :
|
|
16
|
+
return resolved && rewrites ? require_utils.applyRewrites(resolved, rewrites) : resolved;
|
|
17
17
|
};
|
|
18
18
|
const initialRequestUrl = resolveAndApplyRewrites(inputUrl);
|
|
19
19
|
if (!initialRequestUrl) return;
|
|
@@ -73,7 +73,7 @@ async function findCanonical(inputUrl, options) {
|
|
|
73
73
|
if (!await compareWithInitialResponse(response.body, response.url)) return;
|
|
74
74
|
return response;
|
|
75
75
|
};
|
|
76
|
-
let
|
|
76
|
+
let candidateSourceUrl = initialResponseUrl;
|
|
77
77
|
if (selfRequestUrl && selfRequestUrl !== initialResponseUrl) {
|
|
78
78
|
const urlsToTry = [selfRequestUrl];
|
|
79
79
|
if (selfRequestUrl.startsWith("https://")) urlsToTry.push(selfRequestUrl.replace("https://", "http://"));
|
|
@@ -86,42 +86,53 @@ async function findCanonical(inputUrl, options) {
|
|
|
86
86
|
response,
|
|
87
87
|
feed: initialResponseFeed
|
|
88
88
|
});
|
|
89
|
-
|
|
90
|
-
|
|
89
|
+
candidateSourceUrl = resolveAndApplyRewrites(response.url) ?? initialResponseUrl;
|
|
90
|
+
candidateSourceUrl = stripParams(candidateSourceUrl);
|
|
91
91
|
break;
|
|
92
92
|
}
|
|
93
93
|
}
|
|
94
94
|
}
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
95
|
+
if (probes?.length) candidateSourceUrl = await require_utils.applyProbes(candidateSourceUrl, probes, async (candidateUrl) => {
|
|
96
|
+
const response = await fetchAndCompare(candidateUrl);
|
|
97
|
+
if (response) {
|
|
98
|
+
onMatch?.({
|
|
99
|
+
url: candidateUrl,
|
|
100
|
+
response,
|
|
101
|
+
feed: initialResponseFeed
|
|
102
|
+
});
|
|
103
|
+
return stripParams(resolveAndApplyRewrites(response.url) ?? candidateUrl);
|
|
104
|
+
}
|
|
105
|
+
});
|
|
106
|
+
const candidateUrls = new Set(tiers.map((tier) => resolveAndApplyRewrites(require_utils.normalizeUrl(candidateSourceUrl, tier))).filter((candidateUrl) => !!candidateUrl));
|
|
107
|
+
candidateUrls.add(candidateSourceUrl);
|
|
108
|
+
let winningUrl = candidateSourceUrl;
|
|
109
|
+
for (const candidateUrl of candidateUrls) {
|
|
99
110
|
if (existsFn) {
|
|
100
|
-
const data = await existsFn(
|
|
111
|
+
const data = await existsFn(candidateUrl);
|
|
101
112
|
if (data !== void 0) {
|
|
102
113
|
onExists?.({
|
|
103
|
-
url:
|
|
114
|
+
url: candidateUrl,
|
|
104
115
|
data
|
|
105
116
|
});
|
|
106
|
-
return
|
|
117
|
+
return candidateUrl;
|
|
107
118
|
}
|
|
108
119
|
}
|
|
109
|
-
if (
|
|
110
|
-
if (
|
|
120
|
+
if (candidateUrl === candidateSourceUrl) continue;
|
|
121
|
+
if (candidateUrl === initialResponseUrl) {
|
|
111
122
|
winningUrl = initialResponseUrl;
|
|
112
123
|
break;
|
|
113
124
|
}
|
|
114
|
-
const
|
|
115
|
-
if (
|
|
116
|
-
let
|
|
117
|
-
if (
|
|
118
|
-
if (
|
|
125
|
+
const candidateResponse = await fetchAndCompare(candidateUrl);
|
|
126
|
+
if (candidateResponse) {
|
|
127
|
+
let candidateResponseUrl = resolveAndApplyRewrites(candidateResponse.url);
|
|
128
|
+
if (candidateResponseUrl) candidateResponseUrl = stripParams(candidateResponseUrl);
|
|
129
|
+
if (candidateResponseUrl === candidateSourceUrl || candidateResponseUrl === initialResponseUrl) continue;
|
|
119
130
|
onMatch?.({
|
|
120
|
-
url:
|
|
121
|
-
response:
|
|
131
|
+
url: candidateUrl,
|
|
132
|
+
response: candidateResponse,
|
|
122
133
|
feed: initialResponseFeed
|
|
123
134
|
});
|
|
124
|
-
winningUrl =
|
|
135
|
+
winningUrl = candidateUrl;
|
|
125
136
|
break;
|
|
126
137
|
}
|
|
127
138
|
}
|
package/dist/index.js
CHANGED
|
@@ -1,19 +1,19 @@
|
|
|
1
|
-
import {
|
|
2
|
-
import {
|
|
1
|
+
import { applyProbes, applyRewrites, normalizeUrl, resolveUrl } from "./utils.js";
|
|
2
|
+
import { defaultFetch, defaultParser, defaultStrippedParams, defaultTiers } from "./defaults.js";
|
|
3
3
|
|
|
4
4
|
//#region src/index.ts
|
|
5
5
|
async function findCanonical(inputUrl, options) {
|
|
6
|
-
const { parser = defaultParser, fetchFn = defaultFetch, existsFn, tiers = defaultTiers, rewrites
|
|
6
|
+
const { parser = defaultParser, fetchFn = defaultFetch, existsFn, tiers = defaultTiers, rewrites, probes, stripQueryParams = defaultStrippedParams, onFetch, onMatch, onExists } = options ?? {};
|
|
7
7
|
const stripParams = (url) => {
|
|
8
|
-
return
|
|
8
|
+
return normalizeUrl(url, {
|
|
9
9
|
stripQueryParams,
|
|
10
10
|
sortQueryParams: true,
|
|
11
11
|
stripEmptyQuery: true
|
|
12
|
-
})
|
|
12
|
+
});
|
|
13
13
|
};
|
|
14
14
|
const resolveAndApplyRewrites = (url, baseUrl) => {
|
|
15
15
|
const resolved = resolveUrl(url, baseUrl);
|
|
16
|
-
return resolved ? applyRewrites(resolved, rewrites) :
|
|
16
|
+
return resolved && rewrites ? applyRewrites(resolved, rewrites) : resolved;
|
|
17
17
|
};
|
|
18
18
|
const initialRequestUrl = resolveAndApplyRewrites(inputUrl);
|
|
19
19
|
if (!initialRequestUrl) return;
|
|
@@ -73,7 +73,7 @@ async function findCanonical(inputUrl, options) {
|
|
|
73
73
|
if (!await compareWithInitialResponse(response.body, response.url)) return;
|
|
74
74
|
return response;
|
|
75
75
|
};
|
|
76
|
-
let
|
|
76
|
+
let candidateSourceUrl = initialResponseUrl;
|
|
77
77
|
if (selfRequestUrl && selfRequestUrl !== initialResponseUrl) {
|
|
78
78
|
const urlsToTry = [selfRequestUrl];
|
|
79
79
|
if (selfRequestUrl.startsWith("https://")) urlsToTry.push(selfRequestUrl.replace("https://", "http://"));
|
|
@@ -86,42 +86,53 @@ async function findCanonical(inputUrl, options) {
|
|
|
86
86
|
response,
|
|
87
87
|
feed: initialResponseFeed
|
|
88
88
|
});
|
|
89
|
-
|
|
90
|
-
|
|
89
|
+
candidateSourceUrl = resolveAndApplyRewrites(response.url) ?? initialResponseUrl;
|
|
90
|
+
candidateSourceUrl = stripParams(candidateSourceUrl);
|
|
91
91
|
break;
|
|
92
92
|
}
|
|
93
93
|
}
|
|
94
94
|
}
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
95
|
+
if (probes?.length) candidateSourceUrl = await applyProbes(candidateSourceUrl, probes, async (candidateUrl) => {
|
|
96
|
+
const response = await fetchAndCompare(candidateUrl);
|
|
97
|
+
if (response) {
|
|
98
|
+
onMatch?.({
|
|
99
|
+
url: candidateUrl,
|
|
100
|
+
response,
|
|
101
|
+
feed: initialResponseFeed
|
|
102
|
+
});
|
|
103
|
+
return stripParams(resolveAndApplyRewrites(response.url) ?? candidateUrl);
|
|
104
|
+
}
|
|
105
|
+
});
|
|
106
|
+
const candidateUrls = new Set(tiers.map((tier) => resolveAndApplyRewrites(normalizeUrl(candidateSourceUrl, tier))).filter((candidateUrl) => !!candidateUrl));
|
|
107
|
+
candidateUrls.add(candidateSourceUrl);
|
|
108
|
+
let winningUrl = candidateSourceUrl;
|
|
109
|
+
for (const candidateUrl of candidateUrls) {
|
|
99
110
|
if (existsFn) {
|
|
100
|
-
const data = await existsFn(
|
|
111
|
+
const data = await existsFn(candidateUrl);
|
|
101
112
|
if (data !== void 0) {
|
|
102
113
|
onExists?.({
|
|
103
|
-
url:
|
|
114
|
+
url: candidateUrl,
|
|
104
115
|
data
|
|
105
116
|
});
|
|
106
|
-
return
|
|
117
|
+
return candidateUrl;
|
|
107
118
|
}
|
|
108
119
|
}
|
|
109
|
-
if (
|
|
110
|
-
if (
|
|
120
|
+
if (candidateUrl === candidateSourceUrl) continue;
|
|
121
|
+
if (candidateUrl === initialResponseUrl) {
|
|
111
122
|
winningUrl = initialResponseUrl;
|
|
112
123
|
break;
|
|
113
124
|
}
|
|
114
|
-
const
|
|
115
|
-
if (
|
|
116
|
-
let
|
|
117
|
-
if (
|
|
118
|
-
if (
|
|
125
|
+
const candidateResponse = await fetchAndCompare(candidateUrl);
|
|
126
|
+
if (candidateResponse) {
|
|
127
|
+
let candidateResponseUrl = resolveAndApplyRewrites(candidateResponse.url);
|
|
128
|
+
if (candidateResponseUrl) candidateResponseUrl = stripParams(candidateResponseUrl);
|
|
129
|
+
if (candidateResponseUrl === candidateSourceUrl || candidateResponseUrl === initialResponseUrl) continue;
|
|
119
130
|
onMatch?.({
|
|
120
|
-
url:
|
|
121
|
-
response:
|
|
131
|
+
url: candidateUrl,
|
|
132
|
+
response: candidateResponse,
|
|
122
133
|
feed: initialResponseFeed
|
|
123
134
|
});
|
|
124
|
-
winningUrl =
|
|
135
|
+
winningUrl = candidateUrl;
|
|
125
136
|
break;
|
|
126
137
|
}
|
|
127
138
|
}
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
|
|
2
|
+
//#region src/probes/wordpress.ts
|
|
3
|
+
const feedTypes = [
|
|
4
|
+
"atom",
|
|
5
|
+
"rss2",
|
|
6
|
+
"rss",
|
|
7
|
+
"rdf"
|
|
8
|
+
];
|
|
9
|
+
const wordpressProbe = {
|
|
10
|
+
match: (url) => {
|
|
11
|
+
const feed = url.searchParams.get("feed")?.toLowerCase();
|
|
12
|
+
if (!feed) return false;
|
|
13
|
+
const type = feed.startsWith("comments-") ? feed.slice(9) : feed;
|
|
14
|
+
return feedTypes.includes(type);
|
|
15
|
+
},
|
|
16
|
+
getCandidates: (url) => {
|
|
17
|
+
const feed = url.searchParams.get("feed")?.toLowerCase();
|
|
18
|
+
if (!feed) return [];
|
|
19
|
+
const candidates = [];
|
|
20
|
+
const isComment = feed.startsWith("comments-");
|
|
21
|
+
const type = isComment ? feed.slice(9) : feed;
|
|
22
|
+
if ((isComment ? /\/comments\/feed(\/|$)/ : /\/feed(\/|$)/).test(url.pathname)) {
|
|
23
|
+
const withoutSlash = new URL(url);
|
|
24
|
+
withoutSlash.pathname = url.pathname.replace(/\/$/, "");
|
|
25
|
+
withoutSlash.searchParams.delete("feed");
|
|
26
|
+
candidates.push(withoutSlash.href);
|
|
27
|
+
const withSlash$1 = new URL(url);
|
|
28
|
+
withSlash$1.pathname = url.pathname.replace(/\/?$/, "/");
|
|
29
|
+
withSlash$1.searchParams.delete("feed");
|
|
30
|
+
candidates.push(withSlash$1.href);
|
|
31
|
+
return candidates;
|
|
32
|
+
}
|
|
33
|
+
const basePath = url.pathname.replace(/\/$/, "");
|
|
34
|
+
const feedSegment = type === "atom" ? "/feed/atom" : "/feed";
|
|
35
|
+
const feedPath = isComment ? `/comments${feedSegment}` : feedSegment;
|
|
36
|
+
const primary = new URL(url);
|
|
37
|
+
primary.pathname = basePath + feedPath;
|
|
38
|
+
primary.searchParams.delete("feed");
|
|
39
|
+
candidates.push(primary.href);
|
|
40
|
+
const withSlash = new URL(url);
|
|
41
|
+
withSlash.pathname = `${basePath}${feedPath}/`;
|
|
42
|
+
withSlash.searchParams.delete("feed");
|
|
43
|
+
candidates.push(withSlash.href);
|
|
44
|
+
return candidates;
|
|
45
|
+
}
|
|
46
|
+
};
|
|
47
|
+
|
|
48
|
+
//#endregion
|
|
49
|
+
exports.wordpressProbe = wordpressProbe;
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
//#region src/probes/wordpress.ts
|
|
2
|
+
const feedTypes = [
|
|
3
|
+
"atom",
|
|
4
|
+
"rss2",
|
|
5
|
+
"rss",
|
|
6
|
+
"rdf"
|
|
7
|
+
];
|
|
8
|
+
const wordpressProbe = {
|
|
9
|
+
match: (url) => {
|
|
10
|
+
const feed = url.searchParams.get("feed")?.toLowerCase();
|
|
11
|
+
if (!feed) return false;
|
|
12
|
+
const type = feed.startsWith("comments-") ? feed.slice(9) : feed;
|
|
13
|
+
return feedTypes.includes(type);
|
|
14
|
+
},
|
|
15
|
+
getCandidates: (url) => {
|
|
16
|
+
const feed = url.searchParams.get("feed")?.toLowerCase();
|
|
17
|
+
if (!feed) return [];
|
|
18
|
+
const candidates = [];
|
|
19
|
+
const isComment = feed.startsWith("comments-");
|
|
20
|
+
const type = isComment ? feed.slice(9) : feed;
|
|
21
|
+
if ((isComment ? /\/comments\/feed(\/|$)/ : /\/feed(\/|$)/).test(url.pathname)) {
|
|
22
|
+
const withoutSlash = new URL(url);
|
|
23
|
+
withoutSlash.pathname = url.pathname.replace(/\/$/, "");
|
|
24
|
+
withoutSlash.searchParams.delete("feed");
|
|
25
|
+
candidates.push(withoutSlash.href);
|
|
26
|
+
const withSlash$1 = new URL(url);
|
|
27
|
+
withSlash$1.pathname = url.pathname.replace(/\/?$/, "/");
|
|
28
|
+
withSlash$1.searchParams.delete("feed");
|
|
29
|
+
candidates.push(withSlash$1.href);
|
|
30
|
+
return candidates;
|
|
31
|
+
}
|
|
32
|
+
const basePath = url.pathname.replace(/\/$/, "");
|
|
33
|
+
const feedSegment = type === "atom" ? "/feed/atom" : "/feed";
|
|
34
|
+
const feedPath = isComment ? `/comments${feedSegment}` : feedSegment;
|
|
35
|
+
const primary = new URL(url);
|
|
36
|
+
primary.pathname = basePath + feedPath;
|
|
37
|
+
primary.searchParams.delete("feed");
|
|
38
|
+
candidates.push(primary.href);
|
|
39
|
+
const withSlash = new URL(url);
|
|
40
|
+
withSlash.pathname = `${basePath}${feedPath}/`;
|
|
41
|
+
withSlash.searchParams.delete("feed");
|
|
42
|
+
candidates.push(withSlash.href);
|
|
43
|
+
return candidates;
|
|
44
|
+
}
|
|
45
|
+
};
|
|
46
|
+
|
|
47
|
+
//#endregion
|
|
48
|
+
export { wordpressProbe };
|
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
const require_utils = require('../utils.cjs');
|
|
1
2
|
|
|
2
3
|
//#region src/rewrites/blogger.ts
|
|
3
4
|
const bloggerPattern = /^(www\.|beta\.)?blogger\.com$/;
|
|
@@ -6,32 +7,41 @@ const bloggerRewrite = {
|
|
|
6
7
|
match: (url) => {
|
|
7
8
|
return bloggerPattern.test(url.hostname) || blogspotPattern.test(url.hostname);
|
|
8
9
|
},
|
|
9
|
-
|
|
10
|
-
const
|
|
11
|
-
const isBlogger = bloggerPattern.test(
|
|
12
|
-
const isBlogspot = blogspotPattern.test(
|
|
13
|
-
|
|
14
|
-
if (isBlogger)
|
|
10
|
+
rewrite: (url) => {
|
|
11
|
+
const rewritten = new URL(url);
|
|
12
|
+
const isBlogger = bloggerPattern.test(rewritten.hostname);
|
|
13
|
+
const isBlogspot = blogspotPattern.test(rewritten.hostname);
|
|
14
|
+
rewritten.protocol = "https:";
|
|
15
|
+
if (isBlogger) rewritten.hostname = "www.blogger.com";
|
|
15
16
|
if (isBlogspot) {
|
|
16
|
-
|
|
17
|
-
if (
|
|
18
|
-
else if (
|
|
19
|
-
|
|
20
|
-
|
|
17
|
+
rewritten.hostname = rewritten.hostname.replace(blogspotPattern, ".blogspot.com");
|
|
18
|
+
if (rewritten.pathname === "/atom.xml") rewritten.pathname = "/feeds/posts/default";
|
|
19
|
+
else if (rewritten.pathname === "/rss.xml") {
|
|
20
|
+
rewritten.pathname = "/feeds/posts/default";
|
|
21
|
+
rewritten.searchParams.set("alt", "rss");
|
|
21
22
|
}
|
|
22
23
|
}
|
|
23
|
-
|
|
24
|
-
const alt =
|
|
25
|
-
if (alt === "atom" || alt === "json" || alt === "")
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
24
|
+
rewritten.searchParams.delete("redirect");
|
|
25
|
+
const alt = rewritten.searchParams.get("alt");
|
|
26
|
+
if (alt === "atom" || alt === "json" || alt === "") rewritten.searchParams.delete("alt");
|
|
27
|
+
rewritten.searchParams.delete("v");
|
|
28
|
+
rewritten.searchParams.delete("max-results");
|
|
29
|
+
rewritten.searchParams.delete("start-index");
|
|
30
|
+
rewritten.searchParams.delete("published-min");
|
|
31
|
+
rewritten.searchParams.delete("published-max");
|
|
32
|
+
rewritten.searchParams.delete("updated-min");
|
|
33
|
+
rewritten.searchParams.delete("updated-max");
|
|
34
|
+
rewritten.searchParams.delete("orderby");
|
|
35
|
+
const normalized = require_utils.normalizeUrl(rewritten.href, {
|
|
36
|
+
stripTrailingSlash: true,
|
|
37
|
+
collapseSlashes: true,
|
|
38
|
+
stripHash: true,
|
|
39
|
+
normalizeEncoding: true,
|
|
40
|
+
normalizeUnicode: true,
|
|
41
|
+
stripEmptyQuery: true,
|
|
42
|
+
sortQueryParams: true
|
|
43
|
+
});
|
|
44
|
+
return new URL(normalized);
|
|
35
45
|
}
|
|
36
46
|
};
|
|
37
47
|
|
package/dist/rewrites/blogger.js
CHANGED
|
@@ -1,3 +1,5 @@
|
|
|
1
|
+
import { normalizeUrl } from "../utils.js";
|
|
2
|
+
|
|
1
3
|
//#region src/rewrites/blogger.ts
|
|
2
4
|
const bloggerPattern = /^(www\.|beta\.)?blogger\.com$/;
|
|
3
5
|
const blogspotPattern = /\.blogspot\.[a-z]{2,3}(\.[a-z]{2})?$/i;
|
|
@@ -5,32 +7,41 @@ const bloggerRewrite = {
|
|
|
5
7
|
match: (url) => {
|
|
6
8
|
return bloggerPattern.test(url.hostname) || blogspotPattern.test(url.hostname);
|
|
7
9
|
},
|
|
8
|
-
|
|
9
|
-
const
|
|
10
|
-
const isBlogger = bloggerPattern.test(
|
|
11
|
-
const isBlogspot = blogspotPattern.test(
|
|
12
|
-
|
|
13
|
-
if (isBlogger)
|
|
10
|
+
rewrite: (url) => {
|
|
11
|
+
const rewritten = new URL(url);
|
|
12
|
+
const isBlogger = bloggerPattern.test(rewritten.hostname);
|
|
13
|
+
const isBlogspot = blogspotPattern.test(rewritten.hostname);
|
|
14
|
+
rewritten.protocol = "https:";
|
|
15
|
+
if (isBlogger) rewritten.hostname = "www.blogger.com";
|
|
14
16
|
if (isBlogspot) {
|
|
15
|
-
|
|
16
|
-
if (
|
|
17
|
-
else if (
|
|
18
|
-
|
|
19
|
-
|
|
17
|
+
rewritten.hostname = rewritten.hostname.replace(blogspotPattern, ".blogspot.com");
|
|
18
|
+
if (rewritten.pathname === "/atom.xml") rewritten.pathname = "/feeds/posts/default";
|
|
19
|
+
else if (rewritten.pathname === "/rss.xml") {
|
|
20
|
+
rewritten.pathname = "/feeds/posts/default";
|
|
21
|
+
rewritten.searchParams.set("alt", "rss");
|
|
20
22
|
}
|
|
21
23
|
}
|
|
22
|
-
|
|
23
|
-
const alt =
|
|
24
|
-
if (alt === "atom" || alt === "json" || alt === "")
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
24
|
+
rewritten.searchParams.delete("redirect");
|
|
25
|
+
const alt = rewritten.searchParams.get("alt");
|
|
26
|
+
if (alt === "atom" || alt === "json" || alt === "") rewritten.searchParams.delete("alt");
|
|
27
|
+
rewritten.searchParams.delete("v");
|
|
28
|
+
rewritten.searchParams.delete("max-results");
|
|
29
|
+
rewritten.searchParams.delete("start-index");
|
|
30
|
+
rewritten.searchParams.delete("published-min");
|
|
31
|
+
rewritten.searchParams.delete("published-max");
|
|
32
|
+
rewritten.searchParams.delete("updated-min");
|
|
33
|
+
rewritten.searchParams.delete("updated-max");
|
|
34
|
+
rewritten.searchParams.delete("orderby");
|
|
35
|
+
const normalized = normalizeUrl(rewritten.href, {
|
|
36
|
+
stripTrailingSlash: true,
|
|
37
|
+
collapseSlashes: true,
|
|
38
|
+
stripHash: true,
|
|
39
|
+
normalizeEncoding: true,
|
|
40
|
+
normalizeUnicode: true,
|
|
41
|
+
stripEmptyQuery: true,
|
|
42
|
+
sortQueryParams: true
|
|
43
|
+
});
|
|
44
|
+
return new URL(normalized);
|
|
34
45
|
}
|
|
35
46
|
};
|
|
36
47
|
|
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
const require_utils = require('../utils.cjs');
|
|
1
2
|
|
|
2
3
|
//#region src/rewrites/feedburner.ts
|
|
3
4
|
const hosts = [
|
|
@@ -9,11 +10,18 @@ const feedburnerRewrite = {
|
|
|
9
10
|
match: (url) => {
|
|
10
11
|
return hosts.includes(url.hostname);
|
|
11
12
|
},
|
|
12
|
-
|
|
13
|
-
const
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
13
|
+
rewrite: (url) => {
|
|
14
|
+
const rewritten = new URL(url);
|
|
15
|
+
rewritten.hostname = "feeds.feedburner.com";
|
|
16
|
+
rewritten.search = "";
|
|
17
|
+
const normalized = require_utils.normalizeUrl(rewritten.href, {
|
|
18
|
+
stripTrailingSlash: true,
|
|
19
|
+
collapseSlashes: true,
|
|
20
|
+
stripHash: true,
|
|
21
|
+
normalizeEncoding: true,
|
|
22
|
+
normalizeUnicode: true
|
|
23
|
+
});
|
|
24
|
+
return new URL(normalized);
|
|
17
25
|
}
|
|
18
26
|
};
|
|
19
27
|
|
|
@@ -1,3 +1,5 @@
|
|
|
1
|
+
import { normalizeUrl } from "../utils.js";
|
|
2
|
+
|
|
1
3
|
//#region src/rewrites/feedburner.ts
|
|
2
4
|
const hosts = [
|
|
3
5
|
"feeds.feedburner.com",
|
|
@@ -8,11 +10,18 @@ const feedburnerRewrite = {
|
|
|
8
10
|
match: (url) => {
|
|
9
11
|
return hosts.includes(url.hostname);
|
|
10
12
|
},
|
|
11
|
-
|
|
12
|
-
const
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
13
|
+
rewrite: (url) => {
|
|
14
|
+
const rewritten = new URL(url);
|
|
15
|
+
rewritten.hostname = "feeds.feedburner.com";
|
|
16
|
+
rewritten.search = "";
|
|
17
|
+
const normalized = normalizeUrl(rewritten.href, {
|
|
18
|
+
stripTrailingSlash: true,
|
|
19
|
+
collapseSlashes: true,
|
|
20
|
+
stripHash: true,
|
|
21
|
+
normalizeEncoding: true,
|
|
22
|
+
normalizeUnicode: true
|
|
23
|
+
});
|
|
24
|
+
return new URL(normalized);
|
|
16
25
|
}
|
|
17
26
|
};
|
|
18
27
|
|
package/dist/types.d.cts
CHANGED
|
@@ -9,7 +9,11 @@ type ParserAdapter<T> = {
|
|
|
9
9
|
};
|
|
10
10
|
type Rewrite = {
|
|
11
11
|
match: (url: URL) => boolean;
|
|
12
|
-
|
|
12
|
+
rewrite: (url: URL) => URL;
|
|
13
|
+
};
|
|
14
|
+
type Probe = {
|
|
15
|
+
match: (url: URL) => boolean;
|
|
16
|
+
getCandidates: (url: URL) => Array<string>;
|
|
13
17
|
};
|
|
14
18
|
type NormalizeOptions = {
|
|
15
19
|
stripProtocol?: boolean;
|
|
@@ -45,8 +49,9 @@ type FindCanonicalOptions<TFeed = DefaultParserResult, TResponse extends FetchFn
|
|
|
45
49
|
parser?: ParserAdapter<TFeed>;
|
|
46
50
|
fetchFn?: FetchFn<TResponse>;
|
|
47
51
|
existsFn?: ExistsFn<TExisting>;
|
|
48
|
-
tiers?: Array<Tier>;
|
|
49
52
|
rewrites?: Array<Rewrite>;
|
|
53
|
+
probes?: Array<Probe>;
|
|
54
|
+
tiers?: Array<Tier>;
|
|
50
55
|
stripQueryParams?: Array<string>;
|
|
51
56
|
onFetch?: OnFetchFn<TResponse>;
|
|
52
57
|
onMatch?: OnMatchFn<TFeed, TResponse>;
|
|
@@ -65,4 +70,4 @@ type FetchFnResponse = {
|
|
|
65
70
|
};
|
|
66
71
|
type FetchFn<TResponse extends FetchFnResponse = FetchFnResponse> = (url: string, options?: FetchFnOptions) => Promise<TResponse>;
|
|
67
72
|
//#endregion
|
|
68
|
-
export { DefaultParserResult, ExistsFn, FetchFn, FetchFnOptions, FetchFnResponse, FindCanonicalOptions, NormalizeOptions, OnExistsFn, OnFetchFn, OnMatchFn, ParserAdapter, Rewrite, Tier };
|
|
73
|
+
export { DefaultParserResult, ExistsFn, FetchFn, FetchFnOptions, FetchFnResponse, FindCanonicalOptions, NormalizeOptions, OnExistsFn, OnFetchFn, OnMatchFn, ParserAdapter, Probe, Rewrite, Tier };
|
package/dist/types.d.ts
CHANGED
|
@@ -9,7 +9,11 @@ type ParserAdapter<T> = {
|
|
|
9
9
|
};
|
|
10
10
|
type Rewrite = {
|
|
11
11
|
match: (url: URL) => boolean;
|
|
12
|
-
|
|
12
|
+
rewrite: (url: URL) => URL;
|
|
13
|
+
};
|
|
14
|
+
type Probe = {
|
|
15
|
+
match: (url: URL) => boolean;
|
|
16
|
+
getCandidates: (url: URL) => Array<string>;
|
|
13
17
|
};
|
|
14
18
|
type NormalizeOptions = {
|
|
15
19
|
stripProtocol?: boolean;
|
|
@@ -45,8 +49,9 @@ type FindCanonicalOptions<TFeed = DefaultParserResult, TResponse extends FetchFn
|
|
|
45
49
|
parser?: ParserAdapter<TFeed>;
|
|
46
50
|
fetchFn?: FetchFn<TResponse>;
|
|
47
51
|
existsFn?: ExistsFn<TExisting>;
|
|
48
|
-
tiers?: Array<Tier>;
|
|
49
52
|
rewrites?: Array<Rewrite>;
|
|
53
|
+
probes?: Array<Probe>;
|
|
54
|
+
tiers?: Array<Tier>;
|
|
50
55
|
stripQueryParams?: Array<string>;
|
|
51
56
|
onFetch?: OnFetchFn<TResponse>;
|
|
52
57
|
onMatch?: OnMatchFn<TFeed, TResponse>;
|
|
@@ -65,4 +70,4 @@ type FetchFnResponse = {
|
|
|
65
70
|
};
|
|
66
71
|
type FetchFn<TResponse extends FetchFnResponse = FetchFnResponse> = (url: string, options?: FetchFnOptions) => Promise<TResponse>;
|
|
67
72
|
//#endregion
|
|
68
|
-
export { DefaultParserResult, ExistsFn, FetchFn, FetchFnOptions, FetchFnResponse, FindCanonicalOptions, NormalizeOptions, OnExistsFn, OnFetchFn, OnMatchFn, ParserAdapter, Rewrite, Tier };
|
|
73
|
+
export { DefaultParserResult, ExistsFn, FetchFn, FetchFnOptions, FetchFnResponse, FindCanonicalOptions, NormalizeOptions, OnExistsFn, OnFetchFn, OnMatchFn, ParserAdapter, Probe, Rewrite, Tier };
|
package/dist/utils.cjs
CHANGED
|
@@ -147,7 +147,7 @@ const applyRewrites = (url, rewrites) => {
|
|
|
147
147
|
try {
|
|
148
148
|
let parsed = new URL(url);
|
|
149
149
|
for (const rewrite of rewrites) if (rewrite.match(parsed)) {
|
|
150
|
-
parsed = rewrite.
|
|
150
|
+
parsed = rewrite.rewrite(parsed);
|
|
151
151
|
break;
|
|
152
152
|
}
|
|
153
153
|
return parsed.href;
|
|
@@ -155,11 +155,51 @@ const applyRewrites = (url, rewrites) => {
|
|
|
155
155
|
return url;
|
|
156
156
|
}
|
|
157
157
|
};
|
|
158
|
+
const applyProbes = async (url, probes, testCandidate) => {
|
|
159
|
+
try {
|
|
160
|
+
const parsed = new URL(url);
|
|
161
|
+
for (const probe of probes) {
|
|
162
|
+
if (!probe.match(parsed)) continue;
|
|
163
|
+
for (const candidate of probe.getCandidates(parsed)) {
|
|
164
|
+
const result = await testCandidate(candidate);
|
|
165
|
+
if (result) return result;
|
|
166
|
+
}
|
|
167
|
+
break;
|
|
168
|
+
}
|
|
169
|
+
return url;
|
|
170
|
+
} catch {
|
|
171
|
+
return url;
|
|
172
|
+
}
|
|
173
|
+
};
|
|
174
|
+
const createSignature = (object, fields) => {
|
|
175
|
+
const saved = fields.map((key) => [key, object[key]]);
|
|
176
|
+
for (const key of fields) object[key] = void 0;
|
|
177
|
+
const signature = JSON.stringify(object);
|
|
178
|
+
for (const [key, val] of saved) object[key] = val;
|
|
179
|
+
return signature;
|
|
180
|
+
};
|
|
181
|
+
const trailingSlashPattern = /("(?:https?:\/\/|\/)[^"]+)\/([?"])/g;
|
|
182
|
+
const neutralizeUrls = (text, urls) => {
|
|
183
|
+
const escapeHost = (url) => {
|
|
184
|
+
try {
|
|
185
|
+
return new URL("/", url).host.replace(/^www\./, "").replaceAll(".", "\\.");
|
|
186
|
+
} catch {
|
|
187
|
+
return;
|
|
188
|
+
}
|
|
189
|
+
};
|
|
190
|
+
const hosts = urls.map(escapeHost).filter(Boolean);
|
|
191
|
+
if (hosts.length === 0) return text;
|
|
192
|
+
const hostPattern = hosts.length === 1 ? hosts[0] : `(?:${hosts.join("|")})`;
|
|
193
|
+
return text.replace(new RegExp(`https?://(?:www\\.)?${hostPattern}(?=[/"])(/)?`, "g"), "/").replace(trailingSlashPattern, "$1$2");
|
|
194
|
+
};
|
|
158
195
|
|
|
159
196
|
//#endregion
|
|
160
197
|
exports.addMissingProtocol = addMissingProtocol;
|
|
198
|
+
exports.applyProbes = applyProbes;
|
|
161
199
|
exports.applyRewrites = applyRewrites;
|
|
200
|
+
exports.createSignature = createSignature;
|
|
162
201
|
exports.fixMalformedProtocol = fixMalformedProtocol;
|
|
202
|
+
exports.neutralizeUrls = neutralizeUrls;
|
|
163
203
|
exports.normalizeUrl = normalizeUrl;
|
|
164
204
|
exports.resolveFeedProtocol = resolveFeedProtocol;
|
|
165
205
|
exports.resolveUrl = resolveUrl;
|
package/dist/utils.js
CHANGED
|
@@ -147,7 +147,7 @@ const applyRewrites = (url, rewrites) => {
|
|
|
147
147
|
try {
|
|
148
148
|
let parsed = new URL(url);
|
|
149
149
|
for (const rewrite of rewrites) if (rewrite.match(parsed)) {
|
|
150
|
-
parsed = rewrite.
|
|
150
|
+
parsed = rewrite.rewrite(parsed);
|
|
151
151
|
break;
|
|
152
152
|
}
|
|
153
153
|
return parsed.href;
|
|
@@ -155,6 +155,43 @@ const applyRewrites = (url, rewrites) => {
|
|
|
155
155
|
return url;
|
|
156
156
|
}
|
|
157
157
|
};
|
|
158
|
+
const applyProbes = async (url, probes, testCandidate) => {
|
|
159
|
+
try {
|
|
160
|
+
const parsed = new URL(url);
|
|
161
|
+
for (const probe of probes) {
|
|
162
|
+
if (!probe.match(parsed)) continue;
|
|
163
|
+
for (const candidate of probe.getCandidates(parsed)) {
|
|
164
|
+
const result = await testCandidate(candidate);
|
|
165
|
+
if (result) return result;
|
|
166
|
+
}
|
|
167
|
+
break;
|
|
168
|
+
}
|
|
169
|
+
return url;
|
|
170
|
+
} catch {
|
|
171
|
+
return url;
|
|
172
|
+
}
|
|
173
|
+
};
|
|
174
|
+
const createSignature = (object, fields) => {
|
|
175
|
+
const saved = fields.map((key) => [key, object[key]]);
|
|
176
|
+
for (const key of fields) object[key] = void 0;
|
|
177
|
+
const signature = JSON.stringify(object);
|
|
178
|
+
for (const [key, val] of saved) object[key] = val;
|
|
179
|
+
return signature;
|
|
180
|
+
};
|
|
181
|
+
const trailingSlashPattern = /("(?:https?:\/\/|\/)[^"]+)\/([?"])/g;
|
|
182
|
+
const neutralizeUrls = (text, urls) => {
|
|
183
|
+
const escapeHost = (url) => {
|
|
184
|
+
try {
|
|
185
|
+
return new URL("/", url).host.replace(/^www\./, "").replaceAll(".", "\\.");
|
|
186
|
+
} catch {
|
|
187
|
+
return;
|
|
188
|
+
}
|
|
189
|
+
};
|
|
190
|
+
const hosts = urls.map(escapeHost).filter(Boolean);
|
|
191
|
+
if (hosts.length === 0) return text;
|
|
192
|
+
const hostPattern = hosts.length === 1 ? hosts[0] : `(?:${hosts.join("|")})`;
|
|
193
|
+
return text.replace(new RegExp(`https?://(?:www\\.)?${hostPattern}(?=[/"])(/)?`, "g"), "/").replace(trailingSlashPattern, "$1$2");
|
|
194
|
+
};
|
|
158
195
|
|
|
159
196
|
//#endregion
|
|
160
|
-
export { addMissingProtocol, applyRewrites, fixMalformedProtocol, normalizeUrl, resolveFeedProtocol, resolveUrl };
|
|
197
|
+
export { addMissingProtocol, applyProbes, applyRewrites, createSignature, fixMalformedProtocol, neutralizeUrls, normalizeUrl, resolveFeedProtocol, resolveUrl };
|
package/package.json
CHANGED