feedcanon 1.1.0 → 1.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/defaults.cjs +34 -11
- package/dist/defaults.d.cts +4 -3
- package/dist/defaults.d.ts +4 -3
- package/dist/defaults.js +32 -10
- package/dist/exports.cjs +5 -3
- package/dist/exports.d.cts +5 -4
- package/dist/exports.d.ts +5 -4
- package/dist/exports.js +4 -3
- package/dist/index.cjs +13 -13
- package/dist/index.js +15 -15
- package/dist/rewrites/blogger.cjs +24 -0
- package/dist/rewrites/blogger.d.cts +6 -0
- package/dist/rewrites/blogger.d.ts +6 -0
- package/dist/rewrites/blogger.js +23 -0
- package/dist/{platforms → rewrites}/feedburner.cjs +3 -3
- package/dist/rewrites/feedburner.d.cts +6 -0
- package/dist/rewrites/feedburner.d.ts +6 -0
- package/dist/{platforms → rewrites}/feedburner.js +3 -3
- package/dist/types.d.cts +4 -4
- package/dist/types.d.ts +4 -4
- package/dist/utils.cjs +4 -4
- package/dist/utils.js +4 -4
- package/package.json +1 -1
- package/dist/platforms/feedburner.d.cts +0 -6
- package/dist/platforms/feedburner.d.ts +0 -6
package/dist/defaults.cjs
CHANGED
|
@@ -1,8 +1,8 @@
|
|
|
1
|
-
const require_feedburner = require('./
|
|
1
|
+
const require_feedburner = require('./rewrites/feedburner.cjs');
|
|
2
2
|
let feedsmith = require("feedsmith");
|
|
3
3
|
|
|
4
4
|
//#region src/defaults.ts
|
|
5
|
-
const
|
|
5
|
+
const defaultRewrites = [require_feedburner.feedburnerRewrite];
|
|
6
6
|
const defaultStrippedParams = [
|
|
7
7
|
"utm_source",
|
|
8
8
|
"utm_medium",
|
|
@@ -185,7 +185,16 @@ const defaultFetch = async (url, options) => {
|
|
|
185
185
|
status: response.status
|
|
186
186
|
};
|
|
187
187
|
};
|
|
188
|
-
const
|
|
188
|
+
const trailingSlashPattern = /("(?:https?:\/\/|\/)[^"]+)\/([?"])/g;
|
|
189
|
+
const neutralizeFeedUrls = (signature, url) => {
|
|
190
|
+
try {
|
|
191
|
+
const escapedHost = new URL("/", url).host.replace(/^www\./, "").replaceAll(".", "\\.");
|
|
192
|
+
return signature.replace(new RegExp(`https?://(?:www\\.)?${escapedHost}(?=[/"])(/)?`, "g"), "/").replace(trailingSlashPattern, "$1$2");
|
|
193
|
+
} catch {
|
|
194
|
+
return signature;
|
|
195
|
+
}
|
|
196
|
+
};
|
|
197
|
+
const retrieveSelfLink = (parsed) => {
|
|
189
198
|
switch (parsed.format) {
|
|
190
199
|
case "atom": return parsed.feed.links?.find((link) => link.rel === "self");
|
|
191
200
|
case "rss":
|
|
@@ -199,26 +208,35 @@ const defaultParser = {
|
|
|
199
208
|
} catch {}
|
|
200
209
|
},
|
|
201
210
|
getSelfUrl: (parsed) => {
|
|
202
|
-
return parsed.format === "json" ? parsed.feed.feed_url :
|
|
211
|
+
return parsed.format === "json" ? parsed.feed.feed_url : retrieveSelfLink(parsed)?.href;
|
|
203
212
|
},
|
|
204
|
-
getSignature: (parsed) => {
|
|
213
|
+
getSignature: (parsed, url) => {
|
|
205
214
|
if (parsed.format === "json") {
|
|
206
215
|
const originalSelfUrl = parsed.feed.feed_url;
|
|
207
216
|
parsed.feed.feed_url = void 0;
|
|
208
217
|
const signature$1 = JSON.stringify(parsed.feed);
|
|
209
218
|
parsed.feed.feed_url = originalSelfUrl;
|
|
210
|
-
return signature$1;
|
|
219
|
+
return neutralizeFeedUrls(signature$1, url);
|
|
211
220
|
}
|
|
212
221
|
let signature;
|
|
213
222
|
let originalBuildDate;
|
|
223
|
+
let originalPubDate;
|
|
224
|
+
let originalLink;
|
|
214
225
|
if (parsed.format === "rss") {
|
|
215
226
|
originalBuildDate = parsed.feed.lastBuildDate;
|
|
227
|
+
originalPubDate = parsed.feed.pubDate;
|
|
228
|
+
originalLink = parsed.feed.link;
|
|
216
229
|
parsed.feed.lastBuildDate = void 0;
|
|
230
|
+
parsed.feed.pubDate = void 0;
|
|
231
|
+
parsed.feed.link = void 0;
|
|
232
|
+
} else if (parsed.format === "rdf") {
|
|
233
|
+
originalLink = parsed.feed.link;
|
|
234
|
+
parsed.feed.link = void 0;
|
|
217
235
|
} else if (parsed.format === "atom") {
|
|
218
236
|
originalBuildDate = parsed.feed.updated;
|
|
219
237
|
parsed.feed.updated = void 0;
|
|
220
238
|
}
|
|
221
|
-
const link =
|
|
239
|
+
const link = retrieveSelfLink(parsed);
|
|
222
240
|
if (!link) signature = JSON.stringify(parsed.feed);
|
|
223
241
|
else {
|
|
224
242
|
const originalSelfUrl = link.href;
|
|
@@ -226,9 +244,13 @@ const defaultParser = {
|
|
|
226
244
|
signature = JSON.stringify(parsed.feed);
|
|
227
245
|
link.href = originalSelfUrl;
|
|
228
246
|
}
|
|
229
|
-
if (parsed.format === "rss")
|
|
247
|
+
if (parsed.format === "rss") {
|
|
248
|
+
parsed.feed.lastBuildDate = originalBuildDate;
|
|
249
|
+
parsed.feed.pubDate = originalPubDate;
|
|
250
|
+
parsed.feed.link = originalLink;
|
|
251
|
+
} else if (parsed.format === "rdf") parsed.feed.link = originalLink;
|
|
230
252
|
else if (parsed.format === "atom") parsed.feed.updated = originalBuildDate;
|
|
231
|
-
return signature;
|
|
253
|
+
return neutralizeFeedUrls(signature, url);
|
|
232
254
|
}
|
|
233
255
|
};
|
|
234
256
|
const defaultTiers = [
|
|
@@ -283,6 +305,7 @@ const defaultTiers = [
|
|
|
283
305
|
exports.defaultFetch = defaultFetch;
|
|
284
306
|
exports.defaultNormalizeOptions = defaultNormalizeOptions;
|
|
285
307
|
exports.defaultParser = defaultParser;
|
|
286
|
-
exports.
|
|
308
|
+
exports.defaultRewrites = defaultRewrites;
|
|
287
309
|
exports.defaultStrippedParams = defaultStrippedParams;
|
|
288
|
-
exports.defaultTiers = defaultTiers;
|
|
310
|
+
exports.defaultTiers = defaultTiers;
|
|
311
|
+
exports.neutralizeFeedUrls = neutralizeFeedUrls;
|
package/dist/defaults.d.cts
CHANGED
|
@@ -1,11 +1,12 @@
|
|
|
1
|
-
import { DefaultParserResult, FetchFn, NormalizeOptions, ParserAdapter,
|
|
1
|
+
import { DefaultParserResult, FetchFn, NormalizeOptions, ParserAdapter, Rewrite, Tier } from "./types.cjs";
|
|
2
2
|
|
|
3
3
|
//#region src/defaults.d.ts
|
|
4
|
-
declare const
|
|
4
|
+
declare const defaultRewrites: Array<Rewrite>;
|
|
5
5
|
declare const defaultStrippedParams: string[];
|
|
6
6
|
declare const defaultNormalizeOptions: NormalizeOptions;
|
|
7
7
|
declare const defaultFetch: FetchFn;
|
|
8
|
+
declare const neutralizeFeedUrls: (signature: string, url: string) => string;
|
|
8
9
|
declare const defaultParser: ParserAdapter<DefaultParserResult>;
|
|
9
10
|
declare const defaultTiers: Array<Tier>;
|
|
10
11
|
//#endregion
|
|
11
|
-
export { defaultFetch, defaultNormalizeOptions, defaultParser,
|
|
12
|
+
export { defaultFetch, defaultNormalizeOptions, defaultParser, defaultRewrites, defaultStrippedParams, defaultTiers, neutralizeFeedUrls };
|
package/dist/defaults.d.ts
CHANGED
|
@@ -1,11 +1,12 @@
|
|
|
1
|
-
import { DefaultParserResult, FetchFn, NormalizeOptions, ParserAdapter,
|
|
1
|
+
import { DefaultParserResult, FetchFn, NormalizeOptions, ParserAdapter, Rewrite, Tier } from "./types.js";
|
|
2
2
|
|
|
3
3
|
//#region src/defaults.d.ts
|
|
4
|
-
declare const
|
|
4
|
+
declare const defaultRewrites: Array<Rewrite>;
|
|
5
5
|
declare const defaultStrippedParams: string[];
|
|
6
6
|
declare const defaultNormalizeOptions: NormalizeOptions;
|
|
7
7
|
declare const defaultFetch: FetchFn;
|
|
8
|
+
declare const neutralizeFeedUrls: (signature: string, url: string) => string;
|
|
8
9
|
declare const defaultParser: ParserAdapter<DefaultParserResult>;
|
|
9
10
|
declare const defaultTiers: Array<Tier>;
|
|
10
11
|
//#endregion
|
|
11
|
-
export { defaultFetch, defaultNormalizeOptions, defaultParser,
|
|
12
|
+
export { defaultFetch, defaultNormalizeOptions, defaultParser, defaultRewrites, defaultStrippedParams, defaultTiers, neutralizeFeedUrls };
|
package/dist/defaults.js
CHANGED
|
@@ -1,8 +1,8 @@
|
|
|
1
|
-
import {
|
|
1
|
+
import { feedburnerRewrite } from "./rewrites/feedburner.js";
|
|
2
2
|
import { parseFeed } from "feedsmith";
|
|
3
3
|
|
|
4
4
|
//#region src/defaults.ts
|
|
5
|
-
const
|
|
5
|
+
const defaultRewrites = [feedburnerRewrite];
|
|
6
6
|
const defaultStrippedParams = [
|
|
7
7
|
"utm_source",
|
|
8
8
|
"utm_medium",
|
|
@@ -185,7 +185,16 @@ const defaultFetch = async (url, options) => {
|
|
|
185
185
|
status: response.status
|
|
186
186
|
};
|
|
187
187
|
};
|
|
188
|
-
const
|
|
188
|
+
const trailingSlashPattern = /("(?:https?:\/\/|\/)[^"]+)\/([?"])/g;
|
|
189
|
+
const neutralizeFeedUrls = (signature, url) => {
|
|
190
|
+
try {
|
|
191
|
+
const escapedHost = new URL("/", url).host.replace(/^www\./, "").replaceAll(".", "\\.");
|
|
192
|
+
return signature.replace(new RegExp(`https?://(?:www\\.)?${escapedHost}(?=[/"])(/)?`, "g"), "/").replace(trailingSlashPattern, "$1$2");
|
|
193
|
+
} catch {
|
|
194
|
+
return signature;
|
|
195
|
+
}
|
|
196
|
+
};
|
|
197
|
+
const retrieveSelfLink = (parsed) => {
|
|
189
198
|
switch (parsed.format) {
|
|
190
199
|
case "atom": return parsed.feed.links?.find((link) => link.rel === "self");
|
|
191
200
|
case "rss":
|
|
@@ -199,26 +208,35 @@ const defaultParser = {
|
|
|
199
208
|
} catch {}
|
|
200
209
|
},
|
|
201
210
|
getSelfUrl: (parsed) => {
|
|
202
|
-
return parsed.format === "json" ? parsed.feed.feed_url :
|
|
211
|
+
return parsed.format === "json" ? parsed.feed.feed_url : retrieveSelfLink(parsed)?.href;
|
|
203
212
|
},
|
|
204
|
-
getSignature: (parsed) => {
|
|
213
|
+
getSignature: (parsed, url) => {
|
|
205
214
|
if (parsed.format === "json") {
|
|
206
215
|
const originalSelfUrl = parsed.feed.feed_url;
|
|
207
216
|
parsed.feed.feed_url = void 0;
|
|
208
217
|
const signature$1 = JSON.stringify(parsed.feed);
|
|
209
218
|
parsed.feed.feed_url = originalSelfUrl;
|
|
210
|
-
return signature$1;
|
|
219
|
+
return neutralizeFeedUrls(signature$1, url);
|
|
211
220
|
}
|
|
212
221
|
let signature;
|
|
213
222
|
let originalBuildDate;
|
|
223
|
+
let originalPubDate;
|
|
224
|
+
let originalLink;
|
|
214
225
|
if (parsed.format === "rss") {
|
|
215
226
|
originalBuildDate = parsed.feed.lastBuildDate;
|
|
227
|
+
originalPubDate = parsed.feed.pubDate;
|
|
228
|
+
originalLink = parsed.feed.link;
|
|
216
229
|
parsed.feed.lastBuildDate = void 0;
|
|
230
|
+
parsed.feed.pubDate = void 0;
|
|
231
|
+
parsed.feed.link = void 0;
|
|
232
|
+
} else if (parsed.format === "rdf") {
|
|
233
|
+
originalLink = parsed.feed.link;
|
|
234
|
+
parsed.feed.link = void 0;
|
|
217
235
|
} else if (parsed.format === "atom") {
|
|
218
236
|
originalBuildDate = parsed.feed.updated;
|
|
219
237
|
parsed.feed.updated = void 0;
|
|
220
238
|
}
|
|
221
|
-
const link =
|
|
239
|
+
const link = retrieveSelfLink(parsed);
|
|
222
240
|
if (!link) signature = JSON.stringify(parsed.feed);
|
|
223
241
|
else {
|
|
224
242
|
const originalSelfUrl = link.href;
|
|
@@ -226,9 +244,13 @@ const defaultParser = {
|
|
|
226
244
|
signature = JSON.stringify(parsed.feed);
|
|
227
245
|
link.href = originalSelfUrl;
|
|
228
246
|
}
|
|
229
|
-
if (parsed.format === "rss")
|
|
247
|
+
if (parsed.format === "rss") {
|
|
248
|
+
parsed.feed.lastBuildDate = originalBuildDate;
|
|
249
|
+
parsed.feed.pubDate = originalPubDate;
|
|
250
|
+
parsed.feed.link = originalLink;
|
|
251
|
+
} else if (parsed.format === "rdf") parsed.feed.link = originalLink;
|
|
230
252
|
else if (parsed.format === "atom") parsed.feed.updated = originalBuildDate;
|
|
231
|
-
return signature;
|
|
253
|
+
return neutralizeFeedUrls(signature, url);
|
|
232
254
|
}
|
|
233
255
|
};
|
|
234
256
|
const defaultTiers = [
|
|
@@ -280,4 +302,4 @@ const defaultTiers = [
|
|
|
280
302
|
];
|
|
281
303
|
|
|
282
304
|
//#endregion
|
|
283
|
-
export { defaultFetch, defaultNormalizeOptions, defaultParser,
|
|
305
|
+
export { defaultFetch, defaultNormalizeOptions, defaultParser, defaultRewrites, defaultStrippedParams, defaultTiers, neutralizeFeedUrls };
|
package/dist/exports.cjs
CHANGED
|
@@ -1,15 +1,17 @@
|
|
|
1
|
-
const require_feedburner = require('./
|
|
1
|
+
const require_feedburner = require('./rewrites/feedburner.cjs');
|
|
2
2
|
const require_defaults = require('./defaults.cjs');
|
|
3
3
|
const require_utils = require('./utils.cjs');
|
|
4
4
|
const require_index = require('./index.cjs');
|
|
5
|
+
const require_blogger = require('./rewrites/blogger.cjs');
|
|
5
6
|
|
|
6
7
|
exports.addMissingProtocol = require_utils.addMissingProtocol;
|
|
8
|
+
exports.bloggerRewrite = require_blogger.bloggerRewrite;
|
|
7
9
|
exports.defaultFetch = require_defaults.defaultFetch;
|
|
8
10
|
exports.defaultParser = require_defaults.defaultParser;
|
|
9
|
-
exports.
|
|
11
|
+
exports.defaultRewrites = require_defaults.defaultRewrites;
|
|
10
12
|
exports.defaultStrippedParams = require_defaults.defaultStrippedParams;
|
|
11
13
|
exports.defaultTiers = require_defaults.defaultTiers;
|
|
12
|
-
exports.
|
|
14
|
+
exports.feedburnerRewrite = require_feedburner.feedburnerRewrite;
|
|
13
15
|
exports.findCanonical = require_index.findCanonical;
|
|
14
16
|
exports.fixMalformedProtocol = require_utils.fixMalformedProtocol;
|
|
15
17
|
exports.normalizeUrl = require_utils.normalizeUrl;
|
package/dist/exports.d.cts
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
|
-
import { DefaultParserResult, ExistsFn, FetchFn, FetchFnOptions, FetchFnResponse, FindCanonicalOptions, NormalizeOptions, OnExistsFn, OnFetchFn, OnMatchFn, ParserAdapter,
|
|
2
|
-
import { defaultFetch, defaultParser,
|
|
1
|
+
import { DefaultParserResult, ExistsFn, FetchFn, FetchFnOptions, FetchFnResponse, FindCanonicalOptions, NormalizeOptions, OnExistsFn, OnFetchFn, OnMatchFn, ParserAdapter, Rewrite } from "./types.cjs";
|
|
2
|
+
import { defaultFetch, defaultParser, defaultRewrites, defaultStrippedParams, defaultTiers } from "./defaults.cjs";
|
|
3
3
|
import { findCanonical } from "./index.cjs";
|
|
4
|
-
import {
|
|
4
|
+
import { bloggerRewrite } from "./rewrites/blogger.cjs";
|
|
5
|
+
import { feedburnerRewrite } from "./rewrites/feedburner.cjs";
|
|
5
6
|
import { addMissingProtocol, fixMalformedProtocol, normalizeUrl, resolveFeedProtocol, resolveUrl } from "./utils.cjs";
|
|
6
|
-
export { type DefaultParserResult, type ExistsFn, type FetchFn, type FetchFnOptions, type FetchFnResponse, type FindCanonicalOptions, type NormalizeOptions, type OnExistsFn, type OnFetchFn, type OnMatchFn, type ParserAdapter, type
|
|
7
|
+
export { type DefaultParserResult, type ExistsFn, type FetchFn, type FetchFnOptions, type FetchFnResponse, type FindCanonicalOptions, type NormalizeOptions, type OnExistsFn, type OnFetchFn, type OnMatchFn, type ParserAdapter, type Rewrite, addMissingProtocol, bloggerRewrite, defaultFetch, defaultParser, defaultRewrites, defaultStrippedParams, defaultTiers, feedburnerRewrite, findCanonical, fixMalformedProtocol, normalizeUrl, resolveFeedProtocol, resolveUrl };
|
package/dist/exports.d.ts
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
|
-
import { DefaultParserResult, ExistsFn, FetchFn, FetchFnOptions, FetchFnResponse, FindCanonicalOptions, NormalizeOptions, OnExistsFn, OnFetchFn, OnMatchFn, ParserAdapter,
|
|
2
|
-
import { defaultFetch, defaultParser,
|
|
1
|
+
import { DefaultParserResult, ExistsFn, FetchFn, FetchFnOptions, FetchFnResponse, FindCanonicalOptions, NormalizeOptions, OnExistsFn, OnFetchFn, OnMatchFn, ParserAdapter, Rewrite } from "./types.js";
|
|
2
|
+
import { defaultFetch, defaultParser, defaultRewrites, defaultStrippedParams, defaultTiers } from "./defaults.js";
|
|
3
3
|
import { findCanonical } from "./index.js";
|
|
4
|
-
import {
|
|
4
|
+
import { bloggerRewrite } from "./rewrites/blogger.js";
|
|
5
|
+
import { feedburnerRewrite } from "./rewrites/feedburner.js";
|
|
5
6
|
import { addMissingProtocol, fixMalformedProtocol, normalizeUrl, resolveFeedProtocol, resolveUrl } from "./utils.js";
|
|
6
|
-
export { type DefaultParserResult, type ExistsFn, type FetchFn, type FetchFnOptions, type FetchFnResponse, type FindCanonicalOptions, type NormalizeOptions, type OnExistsFn, type OnFetchFn, type OnMatchFn, type ParserAdapter, type
|
|
7
|
+
export { type DefaultParserResult, type ExistsFn, type FetchFn, type FetchFnOptions, type FetchFnResponse, type FindCanonicalOptions, type NormalizeOptions, type OnExistsFn, type OnFetchFn, type OnMatchFn, type ParserAdapter, type Rewrite, addMissingProtocol, bloggerRewrite, defaultFetch, defaultParser, defaultRewrites, defaultStrippedParams, defaultTiers, feedburnerRewrite, findCanonical, fixMalformedProtocol, normalizeUrl, resolveFeedProtocol, resolveUrl };
|
package/dist/exports.js
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
|
-
import {
|
|
2
|
-
import { defaultFetch, defaultParser,
|
|
1
|
+
import { feedburnerRewrite } from "./rewrites/feedburner.js";
|
|
2
|
+
import { defaultFetch, defaultParser, defaultRewrites, defaultStrippedParams, defaultTiers } from "./defaults.js";
|
|
3
3
|
import { addMissingProtocol, fixMalformedProtocol, normalizeUrl, resolveFeedProtocol, resolveUrl } from "./utils.js";
|
|
4
4
|
import { findCanonical } from "./index.js";
|
|
5
|
+
import { bloggerRewrite } from "./rewrites/blogger.js";
|
|
5
6
|
|
|
6
|
-
export { addMissingProtocol, defaultFetch, defaultParser,
|
|
7
|
+
export { addMissingProtocol, bloggerRewrite, defaultFetch, defaultParser, defaultRewrites, defaultStrippedParams, defaultTiers, feedburnerRewrite, findCanonical, fixMalformedProtocol, normalizeUrl, resolveFeedProtocol, resolveUrl };
|
package/dist/index.cjs
CHANGED
|
@@ -3,7 +3,7 @@ const require_utils = require('./utils.cjs');
|
|
|
3
3
|
|
|
4
4
|
//#region src/index.ts
|
|
5
5
|
async function findCanonical(inputUrl, options) {
|
|
6
|
-
const { parser = require_defaults.defaultParser, fetchFn = require_defaults.defaultFetch, existsFn, tiers = require_defaults.defaultTiers,
|
|
6
|
+
const { parser = require_defaults.defaultParser, fetchFn = require_defaults.defaultFetch, existsFn, tiers = require_defaults.defaultTiers, rewrites = require_defaults.defaultRewrites, stripQueryParams = require_defaults.defaultStrippedParams, onFetch, onMatch, onExists } = options ?? {};
|
|
7
7
|
const stripParams = (url) => {
|
|
8
8
|
return stripQueryParams?.length ? require_utils.normalizeUrl(url, {
|
|
9
9
|
stripQueryParams,
|
|
@@ -11,11 +11,11 @@ async function findCanonical(inputUrl, options) {
|
|
|
11
11
|
stripEmptyQuery: true
|
|
12
12
|
}) : url;
|
|
13
13
|
};
|
|
14
|
-
const
|
|
14
|
+
const resolveAndApplyRewrites = (url, baseUrl) => {
|
|
15
15
|
const resolved = require_utils.resolveUrl(url, baseUrl);
|
|
16
|
-
return resolved ? require_utils.
|
|
16
|
+
return resolved ? require_utils.applyRewrites(resolved, rewrites) : void 0;
|
|
17
17
|
};
|
|
18
|
-
const initialRequestUrl =
|
|
18
|
+
const initialRequestUrl = resolveAndApplyRewrites(inputUrl);
|
|
19
19
|
if (!initialRequestUrl) return;
|
|
20
20
|
let initialResponse;
|
|
21
21
|
try {
|
|
@@ -28,7 +28,7 @@ async function findCanonical(inputUrl, options) {
|
|
|
28
28
|
response: initialResponse
|
|
29
29
|
});
|
|
30
30
|
if (initialResponse.status < 200 || initialResponse.status >= 300) return;
|
|
31
|
-
const initialResponseUrlRaw =
|
|
31
|
+
const initialResponseUrlRaw = resolveAndApplyRewrites(initialResponse.url);
|
|
32
32
|
if (!initialResponseUrlRaw) return;
|
|
33
33
|
const initialResponseUrl = stripParams(initialResponseUrlRaw);
|
|
34
34
|
const initialResponseBody = initialResponse.body;
|
|
@@ -44,16 +44,16 @@ async function findCanonical(inputUrl, options) {
|
|
|
44
44
|
});
|
|
45
45
|
const selfRequestUrlRaw = parser.getSelfUrl(initialResponseFeed);
|
|
46
46
|
if (selfRequestUrlRaw) {
|
|
47
|
-
selfRequestUrl =
|
|
47
|
+
selfRequestUrl = resolveAndApplyRewrites(selfRequestUrlRaw, initialResponseUrl);
|
|
48
48
|
selfRequestUrl = selfRequestUrl ? stripParams(selfRequestUrl) : void 0;
|
|
49
49
|
}
|
|
50
|
-
const compareWithInitialResponse = async (comparedResponseBody) => {
|
|
50
|
+
const compareWithInitialResponse = async (comparedResponseBody, comparedResponseUrl) => {
|
|
51
51
|
if (!comparedResponseBody) return false;
|
|
52
52
|
if (initialResponseBody === comparedResponseBody) return true;
|
|
53
53
|
const comparedResponseFeed = await parser.parse(comparedResponseBody);
|
|
54
54
|
if (comparedResponseFeed) {
|
|
55
|
-
initialResponseSignature ||= parser.getSignature(initialResponseFeed);
|
|
56
|
-
const comparedResponseSignature = parser.getSignature(comparedResponseFeed);
|
|
55
|
+
initialResponseSignature ||= parser.getSignature(initialResponseFeed, initialResponseUrl);
|
|
56
|
+
const comparedResponseSignature = parser.getSignature(comparedResponseFeed, comparedResponseUrl);
|
|
57
57
|
return initialResponseSignature === comparedResponseSignature;
|
|
58
58
|
}
|
|
59
59
|
return false;
|
|
@@ -70,7 +70,7 @@ async function findCanonical(inputUrl, options) {
|
|
|
70
70
|
response
|
|
71
71
|
});
|
|
72
72
|
if (response.status < 200 || response.status >= 300) return;
|
|
73
|
-
if (!await compareWithInitialResponse(response.body)) return;
|
|
73
|
+
if (!await compareWithInitialResponse(response.body, response.url)) return;
|
|
74
74
|
return response;
|
|
75
75
|
};
|
|
76
76
|
let variantSourceUrl = initialResponseUrl;
|
|
@@ -86,13 +86,13 @@ async function findCanonical(inputUrl, options) {
|
|
|
86
86
|
response,
|
|
87
87
|
feed: initialResponseFeed
|
|
88
88
|
});
|
|
89
|
-
variantSourceUrl =
|
|
89
|
+
variantSourceUrl = resolveAndApplyRewrites(response.url) ?? initialResponseUrl;
|
|
90
90
|
variantSourceUrl = stripParams(variantSourceUrl);
|
|
91
91
|
break;
|
|
92
92
|
}
|
|
93
93
|
}
|
|
94
94
|
}
|
|
95
|
-
const variantUrls = new Set(tiers.map((tier) =>
|
|
95
|
+
const variantUrls = new Set(tiers.map((tier) => resolveAndApplyRewrites(require_utils.normalizeUrl(variantSourceUrl, tier))).filter((variantUrl) => !!variantUrl));
|
|
96
96
|
variantUrls.add(variantSourceUrl);
|
|
97
97
|
let winningUrl = variantSourceUrl;
|
|
98
98
|
for (const variantUrl of variantUrls) {
|
|
@@ -113,7 +113,7 @@ async function findCanonical(inputUrl, options) {
|
|
|
113
113
|
}
|
|
114
114
|
const variantResponse = await fetchAndCompare(variantUrl);
|
|
115
115
|
if (variantResponse) {
|
|
116
|
-
let variantResponseUrl =
|
|
116
|
+
let variantResponseUrl = resolveAndApplyRewrites(variantResponse.url);
|
|
117
117
|
if (variantResponseUrl) variantResponseUrl = stripParams(variantResponseUrl);
|
|
118
118
|
if (variantResponseUrl === variantSourceUrl || variantResponseUrl === initialResponseUrl) continue;
|
|
119
119
|
onMatch?.({
|
package/dist/index.js
CHANGED
|
@@ -1,9 +1,9 @@
|
|
|
1
|
-
import { defaultFetch, defaultParser,
|
|
2
|
-
import {
|
|
1
|
+
import { defaultFetch, defaultParser, defaultRewrites, defaultStrippedParams, defaultTiers } from "./defaults.js";
|
|
2
|
+
import { applyRewrites, normalizeUrl, resolveUrl } from "./utils.js";
|
|
3
3
|
|
|
4
4
|
//#region src/index.ts
|
|
5
5
|
async function findCanonical(inputUrl, options) {
|
|
6
|
-
const { parser = defaultParser, fetchFn = defaultFetch, existsFn, tiers = defaultTiers,
|
|
6
|
+
const { parser = defaultParser, fetchFn = defaultFetch, existsFn, tiers = defaultTiers, rewrites = defaultRewrites, stripQueryParams = defaultStrippedParams, onFetch, onMatch, onExists } = options ?? {};
|
|
7
7
|
const stripParams = (url) => {
|
|
8
8
|
return stripQueryParams?.length ? normalizeUrl(url, {
|
|
9
9
|
stripQueryParams,
|
|
@@ -11,11 +11,11 @@ async function findCanonical(inputUrl, options) {
|
|
|
11
11
|
stripEmptyQuery: true
|
|
12
12
|
}) : url;
|
|
13
13
|
};
|
|
14
|
-
const
|
|
14
|
+
const resolveAndApplyRewrites = (url, baseUrl) => {
|
|
15
15
|
const resolved = resolveUrl(url, baseUrl);
|
|
16
|
-
return resolved ?
|
|
16
|
+
return resolved ? applyRewrites(resolved, rewrites) : void 0;
|
|
17
17
|
};
|
|
18
|
-
const initialRequestUrl =
|
|
18
|
+
const initialRequestUrl = resolveAndApplyRewrites(inputUrl);
|
|
19
19
|
if (!initialRequestUrl) return;
|
|
20
20
|
let initialResponse;
|
|
21
21
|
try {
|
|
@@ -28,7 +28,7 @@ async function findCanonical(inputUrl, options) {
|
|
|
28
28
|
response: initialResponse
|
|
29
29
|
});
|
|
30
30
|
if (initialResponse.status < 200 || initialResponse.status >= 300) return;
|
|
31
|
-
const initialResponseUrlRaw =
|
|
31
|
+
const initialResponseUrlRaw = resolveAndApplyRewrites(initialResponse.url);
|
|
32
32
|
if (!initialResponseUrlRaw) return;
|
|
33
33
|
const initialResponseUrl = stripParams(initialResponseUrlRaw);
|
|
34
34
|
const initialResponseBody = initialResponse.body;
|
|
@@ -44,16 +44,16 @@ async function findCanonical(inputUrl, options) {
|
|
|
44
44
|
});
|
|
45
45
|
const selfRequestUrlRaw = parser.getSelfUrl(initialResponseFeed);
|
|
46
46
|
if (selfRequestUrlRaw) {
|
|
47
|
-
selfRequestUrl =
|
|
47
|
+
selfRequestUrl = resolveAndApplyRewrites(selfRequestUrlRaw, initialResponseUrl);
|
|
48
48
|
selfRequestUrl = selfRequestUrl ? stripParams(selfRequestUrl) : void 0;
|
|
49
49
|
}
|
|
50
|
-
const compareWithInitialResponse = async (comparedResponseBody) => {
|
|
50
|
+
const compareWithInitialResponse = async (comparedResponseBody, comparedResponseUrl) => {
|
|
51
51
|
if (!comparedResponseBody) return false;
|
|
52
52
|
if (initialResponseBody === comparedResponseBody) return true;
|
|
53
53
|
const comparedResponseFeed = await parser.parse(comparedResponseBody);
|
|
54
54
|
if (comparedResponseFeed) {
|
|
55
|
-
initialResponseSignature ||= parser.getSignature(initialResponseFeed);
|
|
56
|
-
const comparedResponseSignature = parser.getSignature(comparedResponseFeed);
|
|
55
|
+
initialResponseSignature ||= parser.getSignature(initialResponseFeed, initialResponseUrl);
|
|
56
|
+
const comparedResponseSignature = parser.getSignature(comparedResponseFeed, comparedResponseUrl);
|
|
57
57
|
return initialResponseSignature === comparedResponseSignature;
|
|
58
58
|
}
|
|
59
59
|
return false;
|
|
@@ -70,7 +70,7 @@ async function findCanonical(inputUrl, options) {
|
|
|
70
70
|
response
|
|
71
71
|
});
|
|
72
72
|
if (response.status < 200 || response.status >= 300) return;
|
|
73
|
-
if (!await compareWithInitialResponse(response.body)) return;
|
|
73
|
+
if (!await compareWithInitialResponse(response.body, response.url)) return;
|
|
74
74
|
return response;
|
|
75
75
|
};
|
|
76
76
|
let variantSourceUrl = initialResponseUrl;
|
|
@@ -86,13 +86,13 @@ async function findCanonical(inputUrl, options) {
|
|
|
86
86
|
response,
|
|
87
87
|
feed: initialResponseFeed
|
|
88
88
|
});
|
|
89
|
-
variantSourceUrl =
|
|
89
|
+
variantSourceUrl = resolveAndApplyRewrites(response.url) ?? initialResponseUrl;
|
|
90
90
|
variantSourceUrl = stripParams(variantSourceUrl);
|
|
91
91
|
break;
|
|
92
92
|
}
|
|
93
93
|
}
|
|
94
94
|
}
|
|
95
|
-
const variantUrls = new Set(tiers.map((tier) =>
|
|
95
|
+
const variantUrls = new Set(tiers.map((tier) => resolveAndApplyRewrites(normalizeUrl(variantSourceUrl, tier))).filter((variantUrl) => !!variantUrl));
|
|
96
96
|
variantUrls.add(variantSourceUrl);
|
|
97
97
|
let winningUrl = variantSourceUrl;
|
|
98
98
|
for (const variantUrl of variantUrls) {
|
|
@@ -113,7 +113,7 @@ async function findCanonical(inputUrl, options) {
|
|
|
113
113
|
}
|
|
114
114
|
const variantResponse = await fetchAndCompare(variantUrl);
|
|
115
115
|
if (variantResponse) {
|
|
116
|
-
let variantResponseUrl =
|
|
116
|
+
let variantResponseUrl = resolveAndApplyRewrites(variantResponse.url);
|
|
117
117
|
if (variantResponseUrl) variantResponseUrl = stripParams(variantResponseUrl);
|
|
118
118
|
if (variantResponseUrl === variantSourceUrl || variantResponseUrl === initialResponseUrl) continue;
|
|
119
119
|
onMatch?.({
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
|
|
2
|
+
//#region src/rewrites/blogger.ts
|
|
3
|
+
const hosts = ["blogger.com", "www.blogger.com"];
|
|
4
|
+
const bloggerRewrite = {
|
|
5
|
+
match: (url) => {
|
|
6
|
+
return hosts.includes(url.hostname);
|
|
7
|
+
},
|
|
8
|
+
normalize: (url) => {
|
|
9
|
+
const normalized = new URL(url);
|
|
10
|
+
normalized.protocol = "https:";
|
|
11
|
+
normalized.hostname = "www.blogger.com";
|
|
12
|
+
normalized.searchParams.delete("redirect");
|
|
13
|
+
normalized.searchParams.delete("max-results");
|
|
14
|
+
normalized.searchParams.delete("start-index");
|
|
15
|
+
normalized.searchParams.delete("published-min");
|
|
16
|
+
normalized.searchParams.delete("published-max");
|
|
17
|
+
normalized.searchParams.delete("updated-min");
|
|
18
|
+
normalized.searchParams.delete("updated-max");
|
|
19
|
+
return normalized;
|
|
20
|
+
}
|
|
21
|
+
};
|
|
22
|
+
|
|
23
|
+
//#endregion
|
|
24
|
+
exports.bloggerRewrite = bloggerRewrite;
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
//#region src/rewrites/blogger.ts
|
|
2
|
+
const hosts = ["blogger.com", "www.blogger.com"];
|
|
3
|
+
const bloggerRewrite = {
|
|
4
|
+
match: (url) => {
|
|
5
|
+
return hosts.includes(url.hostname);
|
|
6
|
+
},
|
|
7
|
+
normalize: (url) => {
|
|
8
|
+
const normalized = new URL(url);
|
|
9
|
+
normalized.protocol = "https:";
|
|
10
|
+
normalized.hostname = "www.blogger.com";
|
|
11
|
+
normalized.searchParams.delete("redirect");
|
|
12
|
+
normalized.searchParams.delete("max-results");
|
|
13
|
+
normalized.searchParams.delete("start-index");
|
|
14
|
+
normalized.searchParams.delete("published-min");
|
|
15
|
+
normalized.searchParams.delete("published-max");
|
|
16
|
+
normalized.searchParams.delete("updated-min");
|
|
17
|
+
normalized.searchParams.delete("updated-max");
|
|
18
|
+
return normalized;
|
|
19
|
+
}
|
|
20
|
+
};
|
|
21
|
+
|
|
22
|
+
//#endregion
|
|
23
|
+
export { bloggerRewrite };
|
|
@@ -1,11 +1,11 @@
|
|
|
1
1
|
|
|
2
|
-
//#region src/
|
|
2
|
+
//#region src/rewrites/feedburner.ts
|
|
3
3
|
const hosts = [
|
|
4
4
|
"feeds.feedburner.com",
|
|
5
5
|
"feeds2.feedburner.com",
|
|
6
6
|
"feedproxy.google.com"
|
|
7
7
|
];
|
|
8
|
-
const
|
|
8
|
+
const feedburnerRewrite = {
|
|
9
9
|
match: (url) => {
|
|
10
10
|
return hosts.includes(url.hostname);
|
|
11
11
|
},
|
|
@@ -18,4 +18,4 @@ const feedburnerHandler = {
|
|
|
18
18
|
};
|
|
19
19
|
|
|
20
20
|
//#endregion
|
|
21
|
-
exports.
|
|
21
|
+
exports.feedburnerRewrite = feedburnerRewrite;
|
|
@@ -1,10 +1,10 @@
|
|
|
1
|
-
//#region src/
|
|
1
|
+
//#region src/rewrites/feedburner.ts
|
|
2
2
|
const hosts = [
|
|
3
3
|
"feeds.feedburner.com",
|
|
4
4
|
"feeds2.feedburner.com",
|
|
5
5
|
"feedproxy.google.com"
|
|
6
6
|
];
|
|
7
|
-
const
|
|
7
|
+
const feedburnerRewrite = {
|
|
8
8
|
match: (url) => {
|
|
9
9
|
return hosts.includes(url.hostname);
|
|
10
10
|
},
|
|
@@ -17,4 +17,4 @@ const feedburnerHandler = {
|
|
|
17
17
|
};
|
|
18
18
|
|
|
19
19
|
//#endregion
|
|
20
|
-
export {
|
|
20
|
+
export { feedburnerRewrite };
|
package/dist/types.d.cts
CHANGED
|
@@ -5,9 +5,9 @@ type DefaultParserResult = ReturnType<typeof feedsmith0.parseFeed>;
|
|
|
5
5
|
type ParserAdapter<T> = {
|
|
6
6
|
parse: (body: string) => Promise<T | undefined> | T | undefined;
|
|
7
7
|
getSelfUrl: (parsed: T) => string | undefined;
|
|
8
|
-
getSignature: (parsed: T) => string;
|
|
8
|
+
getSignature: (parsed: T, url: string) => string;
|
|
9
9
|
};
|
|
10
|
-
type
|
|
10
|
+
type Rewrite = {
|
|
11
11
|
match: (url: URL) => boolean;
|
|
12
12
|
normalize: (url: URL) => URL;
|
|
13
13
|
};
|
|
@@ -46,7 +46,7 @@ type FindCanonicalOptions<TFeed = DefaultParserResult, TResponse extends FetchFn
|
|
|
46
46
|
fetchFn?: FetchFn<TResponse>;
|
|
47
47
|
existsFn?: ExistsFn<TExisting>;
|
|
48
48
|
tiers?: Array<Tier>;
|
|
49
|
-
|
|
49
|
+
rewrites?: Array<Rewrite>;
|
|
50
50
|
stripQueryParams?: Array<string>;
|
|
51
51
|
onFetch?: OnFetchFn<TResponse>;
|
|
52
52
|
onMatch?: OnMatchFn<TFeed, TResponse>;
|
|
@@ -65,4 +65,4 @@ type FetchFnResponse = {
|
|
|
65
65
|
};
|
|
66
66
|
type FetchFn<TResponse extends FetchFnResponse = FetchFnResponse> = (url: string, options?: FetchFnOptions) => Promise<TResponse>;
|
|
67
67
|
//#endregion
|
|
68
|
-
export { DefaultParserResult, ExistsFn, FetchFn, FetchFnOptions, FetchFnResponse, FindCanonicalOptions, NormalizeOptions, OnExistsFn, OnFetchFn, OnMatchFn, ParserAdapter,
|
|
68
|
+
export { DefaultParserResult, ExistsFn, FetchFn, FetchFnOptions, FetchFnResponse, FindCanonicalOptions, NormalizeOptions, OnExistsFn, OnFetchFn, OnMatchFn, ParserAdapter, Rewrite, Tier };
|
package/dist/types.d.ts
CHANGED
|
@@ -5,9 +5,9 @@ type DefaultParserResult = ReturnType<typeof feedsmith0.parseFeed>;
|
|
|
5
5
|
type ParserAdapter<T> = {
|
|
6
6
|
parse: (body: string) => Promise<T | undefined> | T | undefined;
|
|
7
7
|
getSelfUrl: (parsed: T) => string | undefined;
|
|
8
|
-
getSignature: (parsed: T) => string;
|
|
8
|
+
getSignature: (parsed: T, url: string) => string;
|
|
9
9
|
};
|
|
10
|
-
type
|
|
10
|
+
type Rewrite = {
|
|
11
11
|
match: (url: URL) => boolean;
|
|
12
12
|
normalize: (url: URL) => URL;
|
|
13
13
|
};
|
|
@@ -46,7 +46,7 @@ type FindCanonicalOptions<TFeed = DefaultParserResult, TResponse extends FetchFn
|
|
|
46
46
|
fetchFn?: FetchFn<TResponse>;
|
|
47
47
|
existsFn?: ExistsFn<TExisting>;
|
|
48
48
|
tiers?: Array<Tier>;
|
|
49
|
-
|
|
49
|
+
rewrites?: Array<Rewrite>;
|
|
50
50
|
stripQueryParams?: Array<string>;
|
|
51
51
|
onFetch?: OnFetchFn<TResponse>;
|
|
52
52
|
onMatch?: OnMatchFn<TFeed, TResponse>;
|
|
@@ -65,4 +65,4 @@ type FetchFnResponse = {
|
|
|
65
65
|
};
|
|
66
66
|
type FetchFn<TResponse extends FetchFnResponse = FetchFnResponse> = (url: string, options?: FetchFnOptions) => Promise<TResponse>;
|
|
67
67
|
//#endregion
|
|
68
|
-
export { DefaultParserResult, ExistsFn, FetchFn, FetchFnOptions, FetchFnResponse, FindCanonicalOptions, NormalizeOptions, OnExistsFn, OnFetchFn, OnMatchFn, ParserAdapter,
|
|
68
|
+
export { DefaultParserResult, ExistsFn, FetchFn, FetchFnOptions, FetchFnResponse, FindCanonicalOptions, NormalizeOptions, OnExistsFn, OnFetchFn, OnMatchFn, ParserAdapter, Rewrite, Tier };
|
package/dist/utils.cjs
CHANGED
|
@@ -143,11 +143,11 @@ const normalizeUrl = (url, options = require_defaults.defaultNormalizeOptions) =
|
|
|
143
143
|
return url;
|
|
144
144
|
}
|
|
145
145
|
};
|
|
146
|
-
const
|
|
146
|
+
const applyRewrites = (url, rewrites) => {
|
|
147
147
|
try {
|
|
148
148
|
let parsed = new URL(url);
|
|
149
|
-
for (const
|
|
150
|
-
parsed =
|
|
149
|
+
for (const rewrite of rewrites) if (rewrite.match(parsed)) {
|
|
150
|
+
parsed = rewrite.normalize(parsed);
|
|
151
151
|
break;
|
|
152
152
|
}
|
|
153
153
|
return parsed.href;
|
|
@@ -158,7 +158,7 @@ const applyPlatformHandlers = (url, platforms) => {
|
|
|
158
158
|
|
|
159
159
|
//#endregion
|
|
160
160
|
exports.addMissingProtocol = addMissingProtocol;
|
|
161
|
-
exports.
|
|
161
|
+
exports.applyRewrites = applyRewrites;
|
|
162
162
|
exports.fixMalformedProtocol = fixMalformedProtocol;
|
|
163
163
|
exports.normalizeUrl = normalizeUrl;
|
|
164
164
|
exports.resolveFeedProtocol = resolveFeedProtocol;
|
package/dist/utils.js
CHANGED
|
@@ -143,11 +143,11 @@ const normalizeUrl = (url, options = defaultNormalizeOptions) => {
|
|
|
143
143
|
return url;
|
|
144
144
|
}
|
|
145
145
|
};
|
|
146
|
-
const
|
|
146
|
+
const applyRewrites = (url, rewrites) => {
|
|
147
147
|
try {
|
|
148
148
|
let parsed = new URL(url);
|
|
149
|
-
for (const
|
|
150
|
-
parsed =
|
|
149
|
+
for (const rewrite of rewrites) if (rewrite.match(parsed)) {
|
|
150
|
+
parsed = rewrite.normalize(parsed);
|
|
151
151
|
break;
|
|
152
152
|
}
|
|
153
153
|
return parsed.href;
|
|
@@ -157,4 +157,4 @@ const applyPlatformHandlers = (url, platforms) => {
|
|
|
157
157
|
};
|
|
158
158
|
|
|
159
159
|
//#endregion
|
|
160
|
-
export { addMissingProtocol,
|
|
160
|
+
export { addMissingProtocol, applyRewrites, fixMalformedProtocol, normalizeUrl, resolveFeedProtocol, resolveUrl };
|
package/package.json
CHANGED