feedcanon 1.2.2 → 1.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +3 -3
- package/dist/defaults.cjs +39 -49
- package/dist/defaults.d.cts +1 -2
- package/dist/defaults.d.ts +1 -2
- package/dist/defaults.js +39 -48
- package/dist/exports.cjs +4 -2
- package/dist/exports.d.cts +3 -2
- package/dist/exports.d.ts +3 -2
- package/dist/exports.js +3 -2
- package/dist/index.cjs +35 -24
- package/dist/index.js +35 -24
- package/dist/probes/wordpress.cjs +49 -0
- package/dist/probes/wordpress.d.cts +6 -0
- package/dist/probes/wordpress.d.ts +6 -0
- package/dist/probes/wordpress.js +48 -0
- package/dist/types.d.cts +7 -2
- package/dist/types.d.ts +7 -2
- package/dist/utils.cjs +40 -0
- package/dist/utils.js +38 -1
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -41,8 +41,8 @@ This is a simplified flow. For complete details, see [How It Works](https://feed
|
|
|
41
41
|
1. Fetch the input URL and parse the feed to establish reference content.
|
|
42
42
|
2. Extract the feed's declared self URL (if present).
|
|
43
43
|
3. Validate the self URL by fetching and comparing content.
|
|
44
|
-
4. Generate URL
|
|
45
|
-
5. Test
|
|
44
|
+
4. Generate URL candidates ordered from cleanest to least clean.
|
|
45
|
+
5. Test candidates in order—the first one serving identical content wins.
|
|
46
46
|
6. Upgrade HTTP to HTTPS if both serve identical content.
|
|
47
47
|
|
|
48
48
|
### Customization
|
|
@@ -53,7 +53,7 @@ Feedcanon is designed to be flexible. Every major component can be replaced or e
|
|
|
53
53
|
- **Database lookup** — use `existsFn` to check if a URL already exists in your database.
|
|
54
54
|
- **Custom fetch** — use your own HTTP client (Axios, Got, Ky, etc.)
|
|
55
55
|
- **Custom parser** — bring your own parser (Feedsmith by default).
|
|
56
|
-
- **Custom tiers** — define your own URL normalization
|
|
56
|
+
- **Custom tiers** — define your own URL normalization tiers.
|
|
57
57
|
- **Custom platforms** — add handlers to normalize domain aliases (like FeedBurner).
|
|
58
58
|
|
|
59
59
|
## Quick Start
|
package/dist/defaults.cjs
CHANGED
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
const require_utils = require('./utils.cjs');
|
|
1
2
|
let feedsmith = require("feedsmith");
|
|
2
3
|
|
|
3
4
|
//#region src/defaults.ts
|
|
@@ -183,15 +184,6 @@ const defaultFetch = async (url, options) => {
|
|
|
183
184
|
status: response.status
|
|
184
185
|
};
|
|
185
186
|
};
|
|
186
|
-
const trailingSlashPattern = /("(?:https?:\/\/|\/)[^"]+)\/([?"])/g;
|
|
187
|
-
const neutralizeFeedUrls = (signature, url) => {
|
|
188
|
-
try {
|
|
189
|
-
const escapedHost = new URL("/", url).host.replace(/^www\./, "").replaceAll(".", "\\.");
|
|
190
|
-
return signature.replace(new RegExp(`https?://(?:www\\.)?${escapedHost}(?=[/"])(/)?`, "g"), "/").replace(trailingSlashPattern, "$1$2");
|
|
191
|
-
} catch {
|
|
192
|
-
return signature;
|
|
193
|
-
}
|
|
194
|
-
};
|
|
195
187
|
const retrieveSelfLink = (parsed) => {
|
|
196
188
|
switch (parsed.format) {
|
|
197
189
|
case "atom": return parsed.feed.links?.find((link) => link.rel === "self");
|
|
@@ -209,49 +201,48 @@ const defaultParser = {
|
|
|
209
201
|
return parsed.format === "json" ? parsed.feed.feed_url : retrieveSelfLink(parsed)?.href;
|
|
210
202
|
},
|
|
211
203
|
getSignature: (parsed, url) => {
|
|
212
|
-
if (parsed.format === "json") {
|
|
213
|
-
const originalSelfUrl = parsed.feed.feed_url;
|
|
214
|
-
parsed.feed.feed_url = void 0;
|
|
215
|
-
const signature$1 = JSON.stringify(parsed.feed);
|
|
216
|
-
parsed.feed.feed_url = originalSelfUrl;
|
|
217
|
-
return neutralizeFeedUrls(signature$1, url);
|
|
218
|
-
}
|
|
219
204
|
let signature;
|
|
220
|
-
let
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
parsed.
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
link.href = void 0;
|
|
242
|
-
signature = JSON.stringify(parsed.feed);
|
|
243
|
-
link.href = originalSelfUrl;
|
|
205
|
+
let contentUrl;
|
|
206
|
+
if (parsed.format === "json") {
|
|
207
|
+
contentUrl = parsed.feed.home_page_url;
|
|
208
|
+
signature = require_utils.createSignature(parsed.feed, ["feed_url"]);
|
|
209
|
+
} else {
|
|
210
|
+
const selfLink = retrieveSelfLink(parsed);
|
|
211
|
+
const savedSelfHref = selfLink?.href;
|
|
212
|
+
if (selfLink) selfLink.href = void 0;
|
|
213
|
+
if (parsed.format === "rss") {
|
|
214
|
+
contentUrl = parsed.feed.link;
|
|
215
|
+
signature = require_utils.createSignature(parsed.feed, [
|
|
216
|
+
"lastBuildDate",
|
|
217
|
+
"pubDate",
|
|
218
|
+
"link",
|
|
219
|
+
"generator"
|
|
220
|
+
]);
|
|
221
|
+
} else if (parsed.format === "rdf") {
|
|
222
|
+
contentUrl = parsed.feed.link;
|
|
223
|
+
signature = require_utils.createSignature(parsed.feed, ["link"]);
|
|
224
|
+
} else signature = require_utils.createSignature(parsed.feed, ["updated", "generator"]);
|
|
225
|
+
if (selfLink) selfLink.href = savedSelfHref;
|
|
244
226
|
}
|
|
245
|
-
|
|
246
|
-
parsed.feed.lastBuildDate = originalBuildDate;
|
|
247
|
-
parsed.feed.pubDate = originalPubDate;
|
|
248
|
-
parsed.feed.link = originalLink;
|
|
249
|
-
} else if (parsed.format === "rdf") parsed.feed.link = originalLink;
|
|
250
|
-
else if (parsed.format === "atom") parsed.feed.updated = originalBuildDate;
|
|
251
|
-
return neutralizeFeedUrls(signature, url);
|
|
227
|
+
return require_utils.neutralizeUrls(signature, contentUrl ? [url, contentUrl] : [url]);
|
|
252
228
|
}
|
|
253
229
|
};
|
|
254
230
|
const defaultTiers = [
|
|
231
|
+
{
|
|
232
|
+
stripProtocol: false,
|
|
233
|
+
stripAuthentication: false,
|
|
234
|
+
stripWww: true,
|
|
235
|
+
stripTrailingSlash: true,
|
|
236
|
+
stripRootSlash: true,
|
|
237
|
+
collapseSlashes: true,
|
|
238
|
+
stripHash: true,
|
|
239
|
+
sortQueryParams: false,
|
|
240
|
+
stripQuery: true,
|
|
241
|
+
stripEmptyQuery: true,
|
|
242
|
+
normalizeEncoding: true,
|
|
243
|
+
normalizeUnicode: true,
|
|
244
|
+
convertToPunycode: true
|
|
245
|
+
},
|
|
255
246
|
{
|
|
256
247
|
stripProtocol: false,
|
|
257
248
|
stripAuthentication: false,
|
|
@@ -304,5 +295,4 @@ exports.defaultFetch = defaultFetch;
|
|
|
304
295
|
exports.defaultNormalizeOptions = defaultNormalizeOptions;
|
|
305
296
|
exports.defaultParser = defaultParser;
|
|
306
297
|
exports.defaultStrippedParams = defaultStrippedParams;
|
|
307
|
-
exports.defaultTiers = defaultTiers;
|
|
308
|
-
exports.neutralizeFeedUrls = neutralizeFeedUrls;
|
|
298
|
+
exports.defaultTiers = defaultTiers;
|
package/dist/defaults.d.cts
CHANGED
|
@@ -4,8 +4,7 @@ import { DefaultParserResult, FetchFn, NormalizeOptions, ParserAdapter, Tier } f
|
|
|
4
4
|
declare const defaultStrippedParams: string[];
|
|
5
5
|
declare const defaultNormalizeOptions: NormalizeOptions;
|
|
6
6
|
declare const defaultFetch: FetchFn;
|
|
7
|
-
declare const neutralizeFeedUrls: (signature: string, url: string) => string;
|
|
8
7
|
declare const defaultParser: ParserAdapter<DefaultParserResult>;
|
|
9
8
|
declare const defaultTiers: Array<Tier>;
|
|
10
9
|
//#endregion
|
|
11
|
-
export { defaultFetch, defaultNormalizeOptions, defaultParser, defaultStrippedParams, defaultTiers
|
|
10
|
+
export { defaultFetch, defaultNormalizeOptions, defaultParser, defaultStrippedParams, defaultTiers };
|
package/dist/defaults.d.ts
CHANGED
|
@@ -4,8 +4,7 @@ import { DefaultParserResult, FetchFn, NormalizeOptions, ParserAdapter, Tier } f
|
|
|
4
4
|
declare const defaultStrippedParams: string[];
|
|
5
5
|
declare const defaultNormalizeOptions: NormalizeOptions;
|
|
6
6
|
declare const defaultFetch: FetchFn;
|
|
7
|
-
declare const neutralizeFeedUrls: (signature: string, url: string) => string;
|
|
8
7
|
declare const defaultParser: ParserAdapter<DefaultParserResult>;
|
|
9
8
|
declare const defaultTiers: Array<Tier>;
|
|
10
9
|
//#endregion
|
|
11
|
-
export { defaultFetch, defaultNormalizeOptions, defaultParser, defaultStrippedParams, defaultTiers
|
|
10
|
+
export { defaultFetch, defaultNormalizeOptions, defaultParser, defaultStrippedParams, defaultTiers };
|
package/dist/defaults.js
CHANGED
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import { createSignature, neutralizeUrls } from "./utils.js";
|
|
1
2
|
import { parseFeed } from "feedsmith";
|
|
2
3
|
|
|
3
4
|
//#region src/defaults.ts
|
|
@@ -183,15 +184,6 @@ const defaultFetch = async (url, options) => {
|
|
|
183
184
|
status: response.status
|
|
184
185
|
};
|
|
185
186
|
};
|
|
186
|
-
const trailingSlashPattern = /("(?:https?:\/\/|\/)[^"]+)\/([?"])/g;
|
|
187
|
-
const neutralizeFeedUrls = (signature, url) => {
|
|
188
|
-
try {
|
|
189
|
-
const escapedHost = new URL("/", url).host.replace(/^www\./, "").replaceAll(".", "\\.");
|
|
190
|
-
return signature.replace(new RegExp(`https?://(?:www\\.)?${escapedHost}(?=[/"])(/)?`, "g"), "/").replace(trailingSlashPattern, "$1$2");
|
|
191
|
-
} catch {
|
|
192
|
-
return signature;
|
|
193
|
-
}
|
|
194
|
-
};
|
|
195
187
|
const retrieveSelfLink = (parsed) => {
|
|
196
188
|
switch (parsed.format) {
|
|
197
189
|
case "atom": return parsed.feed.links?.find((link) => link.rel === "self");
|
|
@@ -209,49 +201,48 @@ const defaultParser = {
|
|
|
209
201
|
return parsed.format === "json" ? parsed.feed.feed_url : retrieveSelfLink(parsed)?.href;
|
|
210
202
|
},
|
|
211
203
|
getSignature: (parsed, url) => {
|
|
212
|
-
if (parsed.format === "json") {
|
|
213
|
-
const originalSelfUrl = parsed.feed.feed_url;
|
|
214
|
-
parsed.feed.feed_url = void 0;
|
|
215
|
-
const signature$1 = JSON.stringify(parsed.feed);
|
|
216
|
-
parsed.feed.feed_url = originalSelfUrl;
|
|
217
|
-
return neutralizeFeedUrls(signature$1, url);
|
|
218
|
-
}
|
|
219
204
|
let signature;
|
|
220
|
-
let
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
parsed.
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
link.href = void 0;
|
|
242
|
-
signature = JSON.stringify(parsed.feed);
|
|
243
|
-
link.href = originalSelfUrl;
|
|
205
|
+
let contentUrl;
|
|
206
|
+
if (parsed.format === "json") {
|
|
207
|
+
contentUrl = parsed.feed.home_page_url;
|
|
208
|
+
signature = createSignature(parsed.feed, ["feed_url"]);
|
|
209
|
+
} else {
|
|
210
|
+
const selfLink = retrieveSelfLink(parsed);
|
|
211
|
+
const savedSelfHref = selfLink?.href;
|
|
212
|
+
if (selfLink) selfLink.href = void 0;
|
|
213
|
+
if (parsed.format === "rss") {
|
|
214
|
+
contentUrl = parsed.feed.link;
|
|
215
|
+
signature = createSignature(parsed.feed, [
|
|
216
|
+
"lastBuildDate",
|
|
217
|
+
"pubDate",
|
|
218
|
+
"link",
|
|
219
|
+
"generator"
|
|
220
|
+
]);
|
|
221
|
+
} else if (parsed.format === "rdf") {
|
|
222
|
+
contentUrl = parsed.feed.link;
|
|
223
|
+
signature = createSignature(parsed.feed, ["link"]);
|
|
224
|
+
} else signature = createSignature(parsed.feed, ["updated", "generator"]);
|
|
225
|
+
if (selfLink) selfLink.href = savedSelfHref;
|
|
244
226
|
}
|
|
245
|
-
|
|
246
|
-
parsed.feed.lastBuildDate = originalBuildDate;
|
|
247
|
-
parsed.feed.pubDate = originalPubDate;
|
|
248
|
-
parsed.feed.link = originalLink;
|
|
249
|
-
} else if (parsed.format === "rdf") parsed.feed.link = originalLink;
|
|
250
|
-
else if (parsed.format === "atom") parsed.feed.updated = originalBuildDate;
|
|
251
|
-
return neutralizeFeedUrls(signature, url);
|
|
227
|
+
return neutralizeUrls(signature, contentUrl ? [url, contentUrl] : [url]);
|
|
252
228
|
}
|
|
253
229
|
};
|
|
254
230
|
const defaultTiers = [
|
|
231
|
+
{
|
|
232
|
+
stripProtocol: false,
|
|
233
|
+
stripAuthentication: false,
|
|
234
|
+
stripWww: true,
|
|
235
|
+
stripTrailingSlash: true,
|
|
236
|
+
stripRootSlash: true,
|
|
237
|
+
collapseSlashes: true,
|
|
238
|
+
stripHash: true,
|
|
239
|
+
sortQueryParams: false,
|
|
240
|
+
stripQuery: true,
|
|
241
|
+
stripEmptyQuery: true,
|
|
242
|
+
normalizeEncoding: true,
|
|
243
|
+
normalizeUnicode: true,
|
|
244
|
+
convertToPunycode: true
|
|
245
|
+
},
|
|
255
246
|
{
|
|
256
247
|
stripProtocol: false,
|
|
257
248
|
stripAuthentication: false,
|
|
@@ -300,4 +291,4 @@ const defaultTiers = [
|
|
|
300
291
|
];
|
|
301
292
|
|
|
302
293
|
//#endregion
|
|
303
|
-
export { defaultFetch, defaultNormalizeOptions, defaultParser, defaultStrippedParams, defaultTiers
|
|
294
|
+
export { defaultFetch, defaultNormalizeOptions, defaultParser, defaultStrippedParams, defaultTiers };
|
package/dist/exports.cjs
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
|
-
const require_defaults = require('./defaults.cjs');
|
|
2
1
|
const require_utils = require('./utils.cjs');
|
|
2
|
+
const require_defaults = require('./defaults.cjs');
|
|
3
3
|
const require_index = require('./index.cjs');
|
|
4
|
+
const require_wordpress = require('./probes/wordpress.cjs');
|
|
4
5
|
const require_blogger = require('./rewrites/blogger.cjs');
|
|
5
6
|
const require_feedburner = require('./rewrites/feedburner.cjs');
|
|
6
7
|
|
|
@@ -15,4 +16,5 @@ exports.findCanonical = require_index.findCanonical;
|
|
|
15
16
|
exports.fixMalformedProtocol = require_utils.fixMalformedProtocol;
|
|
16
17
|
exports.normalizeUrl = require_utils.normalizeUrl;
|
|
17
18
|
exports.resolveFeedProtocol = require_utils.resolveFeedProtocol;
|
|
18
|
-
exports.resolveUrl = require_utils.resolveUrl;
|
|
19
|
+
exports.resolveUrl = require_utils.resolveUrl;
|
|
20
|
+
exports.wordpressProbe = require_wordpress.wordpressProbe;
|
package/dist/exports.d.cts
CHANGED
|
@@ -1,7 +1,8 @@
|
|
|
1
|
-
import { DefaultParserResult, ExistsFn, FetchFn, FetchFnOptions, FetchFnResponse, FindCanonicalOptions, NormalizeOptions, OnExistsFn, OnFetchFn, OnMatchFn, ParserAdapter, Rewrite } from "./types.cjs";
|
|
1
|
+
import { DefaultParserResult, ExistsFn, FetchFn, FetchFnOptions, FetchFnResponse, FindCanonicalOptions, NormalizeOptions, OnExistsFn, OnFetchFn, OnMatchFn, ParserAdapter, Probe, Rewrite, Tier } from "./types.cjs";
|
|
2
2
|
import { defaultFetch, defaultParser, defaultStrippedParams, defaultTiers } from "./defaults.cjs";
|
|
3
3
|
import { findCanonical } from "./index.cjs";
|
|
4
|
+
import { wordpressProbe } from "./probes/wordpress.cjs";
|
|
4
5
|
import { bloggerRewrite } from "./rewrites/blogger.cjs";
|
|
5
6
|
import { feedburnerRewrite } from "./rewrites/feedburner.cjs";
|
|
6
7
|
import { addMissingProtocol, fixMalformedProtocol, normalizeUrl, resolveFeedProtocol, resolveUrl } from "./utils.cjs";
|
|
7
|
-
export { type DefaultParserResult, type ExistsFn, type FetchFn, type FetchFnOptions, type FetchFnResponse, type FindCanonicalOptions, type NormalizeOptions, type OnExistsFn, type OnFetchFn, type OnMatchFn, type ParserAdapter, type Rewrite, addMissingProtocol, bloggerRewrite, defaultFetch, defaultParser, defaultStrippedParams, defaultTiers, feedburnerRewrite, findCanonical, fixMalformedProtocol, normalizeUrl, resolveFeedProtocol, resolveUrl };
|
|
8
|
+
export { type DefaultParserResult, type ExistsFn, type FetchFn, type FetchFnOptions, type FetchFnResponse, type FindCanonicalOptions, type NormalizeOptions, type OnExistsFn, type OnFetchFn, type OnMatchFn, type ParserAdapter, type Probe, type Rewrite, type Tier, addMissingProtocol, bloggerRewrite, defaultFetch, defaultParser, defaultStrippedParams, defaultTiers, feedburnerRewrite, findCanonical, fixMalformedProtocol, normalizeUrl, resolveFeedProtocol, resolveUrl, wordpressProbe };
|
package/dist/exports.d.ts
CHANGED
|
@@ -1,7 +1,8 @@
|
|
|
1
|
-
import { DefaultParserResult, ExistsFn, FetchFn, FetchFnOptions, FetchFnResponse, FindCanonicalOptions, NormalizeOptions, OnExistsFn, OnFetchFn, OnMatchFn, ParserAdapter, Rewrite } from "./types.js";
|
|
1
|
+
import { DefaultParserResult, ExistsFn, FetchFn, FetchFnOptions, FetchFnResponse, FindCanonicalOptions, NormalizeOptions, OnExistsFn, OnFetchFn, OnMatchFn, ParserAdapter, Probe, Rewrite, Tier } from "./types.js";
|
|
2
2
|
import { defaultFetch, defaultParser, defaultStrippedParams, defaultTiers } from "./defaults.js";
|
|
3
3
|
import { findCanonical } from "./index.js";
|
|
4
|
+
import { wordpressProbe } from "./probes/wordpress.js";
|
|
4
5
|
import { bloggerRewrite } from "./rewrites/blogger.js";
|
|
5
6
|
import { feedburnerRewrite } from "./rewrites/feedburner.js";
|
|
6
7
|
import { addMissingProtocol, fixMalformedProtocol, normalizeUrl, resolveFeedProtocol, resolveUrl } from "./utils.js";
|
|
7
|
-
export { type DefaultParserResult, type ExistsFn, type FetchFn, type FetchFnOptions, type FetchFnResponse, type FindCanonicalOptions, type NormalizeOptions, type OnExistsFn, type OnFetchFn, type OnMatchFn, type ParserAdapter, type Rewrite, addMissingProtocol, bloggerRewrite, defaultFetch, defaultParser, defaultStrippedParams, defaultTiers, feedburnerRewrite, findCanonical, fixMalformedProtocol, normalizeUrl, resolveFeedProtocol, resolveUrl };
|
|
8
|
+
export { type DefaultParserResult, type ExistsFn, type FetchFn, type FetchFnOptions, type FetchFnResponse, type FindCanonicalOptions, type NormalizeOptions, type OnExistsFn, type OnFetchFn, type OnMatchFn, type ParserAdapter, type Probe, type Rewrite, type Tier, addMissingProtocol, bloggerRewrite, defaultFetch, defaultParser, defaultStrippedParams, defaultTiers, feedburnerRewrite, findCanonical, fixMalformedProtocol, normalizeUrl, resolveFeedProtocol, resolveUrl, wordpressProbe };
|
package/dist/exports.js
CHANGED
|
@@ -1,7 +1,8 @@
|
|
|
1
|
-
import { defaultFetch, defaultParser, defaultStrippedParams, defaultTiers } from "./defaults.js";
|
|
2
1
|
import { addMissingProtocol, fixMalformedProtocol, normalizeUrl, resolveFeedProtocol, resolveUrl } from "./utils.js";
|
|
2
|
+
import { defaultFetch, defaultParser, defaultStrippedParams, defaultTiers } from "./defaults.js";
|
|
3
3
|
import { findCanonical } from "./index.js";
|
|
4
|
+
import { wordpressProbe } from "./probes/wordpress.js";
|
|
4
5
|
import { bloggerRewrite } from "./rewrites/blogger.js";
|
|
5
6
|
import { feedburnerRewrite } from "./rewrites/feedburner.js";
|
|
6
7
|
|
|
7
|
-
export { addMissingProtocol, bloggerRewrite, defaultFetch, defaultParser, defaultStrippedParams, defaultTiers, feedburnerRewrite, findCanonical, fixMalformedProtocol, normalizeUrl, resolveFeedProtocol, resolveUrl };
|
|
8
|
+
export { addMissingProtocol, bloggerRewrite, defaultFetch, defaultParser, defaultStrippedParams, defaultTiers, feedburnerRewrite, findCanonical, fixMalformedProtocol, normalizeUrl, resolveFeedProtocol, resolveUrl, wordpressProbe };
|
package/dist/index.cjs
CHANGED
|
@@ -1,15 +1,15 @@
|
|
|
1
|
-
const require_defaults = require('./defaults.cjs');
|
|
2
1
|
const require_utils = require('./utils.cjs');
|
|
2
|
+
const require_defaults = require('./defaults.cjs');
|
|
3
3
|
|
|
4
4
|
//#region src/index.ts
|
|
5
5
|
async function findCanonical(inputUrl, options) {
|
|
6
|
-
const { parser = require_defaults.defaultParser, fetchFn = require_defaults.defaultFetch, existsFn, tiers = require_defaults.defaultTiers, rewrites, stripQueryParams = require_defaults.defaultStrippedParams, onFetch, onMatch, onExists } = options ?? {};
|
|
6
|
+
const { parser = require_defaults.defaultParser, fetchFn = require_defaults.defaultFetch, existsFn, tiers = require_defaults.defaultTiers, rewrites, probes, stripQueryParams = require_defaults.defaultStrippedParams, onFetch, onMatch, onExists } = options ?? {};
|
|
7
7
|
const stripParams = (url) => {
|
|
8
|
-
return
|
|
8
|
+
return require_utils.normalizeUrl(url, {
|
|
9
9
|
stripQueryParams,
|
|
10
10
|
sortQueryParams: true,
|
|
11
11
|
stripEmptyQuery: true
|
|
12
|
-
})
|
|
12
|
+
});
|
|
13
13
|
};
|
|
14
14
|
const resolveAndApplyRewrites = (url, baseUrl) => {
|
|
15
15
|
const resolved = require_utils.resolveUrl(url, baseUrl);
|
|
@@ -73,7 +73,7 @@ async function findCanonical(inputUrl, options) {
|
|
|
73
73
|
if (!await compareWithInitialResponse(response.body, response.url)) return;
|
|
74
74
|
return response;
|
|
75
75
|
};
|
|
76
|
-
let
|
|
76
|
+
let candidateSourceUrl = initialResponseUrl;
|
|
77
77
|
if (selfRequestUrl && selfRequestUrl !== initialResponseUrl) {
|
|
78
78
|
const urlsToTry = [selfRequestUrl];
|
|
79
79
|
if (selfRequestUrl.startsWith("https://")) urlsToTry.push(selfRequestUrl.replace("https://", "http://"));
|
|
@@ -86,42 +86,53 @@ async function findCanonical(inputUrl, options) {
|
|
|
86
86
|
response,
|
|
87
87
|
feed: initialResponseFeed
|
|
88
88
|
});
|
|
89
|
-
|
|
90
|
-
|
|
89
|
+
candidateSourceUrl = resolveAndApplyRewrites(response.url) ?? initialResponseUrl;
|
|
90
|
+
candidateSourceUrl = stripParams(candidateSourceUrl);
|
|
91
91
|
break;
|
|
92
92
|
}
|
|
93
93
|
}
|
|
94
94
|
}
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
95
|
+
if (probes?.length) candidateSourceUrl = await require_utils.applyProbes(candidateSourceUrl, probes, async (candidateUrl) => {
|
|
96
|
+
const response = await fetchAndCompare(candidateUrl);
|
|
97
|
+
if (response) {
|
|
98
|
+
onMatch?.({
|
|
99
|
+
url: candidateUrl,
|
|
100
|
+
response,
|
|
101
|
+
feed: initialResponseFeed
|
|
102
|
+
});
|
|
103
|
+
return stripParams(resolveAndApplyRewrites(response.url) ?? candidateUrl);
|
|
104
|
+
}
|
|
105
|
+
});
|
|
106
|
+
const candidateUrls = new Set(tiers.map((tier) => resolveAndApplyRewrites(require_utils.normalizeUrl(candidateSourceUrl, tier))).filter((candidateUrl) => !!candidateUrl));
|
|
107
|
+
candidateUrls.add(candidateSourceUrl);
|
|
108
|
+
let winningUrl = candidateSourceUrl;
|
|
109
|
+
for (const candidateUrl of candidateUrls) {
|
|
99
110
|
if (existsFn) {
|
|
100
|
-
const data = await existsFn(
|
|
111
|
+
const data = await existsFn(candidateUrl);
|
|
101
112
|
if (data !== void 0) {
|
|
102
113
|
onExists?.({
|
|
103
|
-
url:
|
|
114
|
+
url: candidateUrl,
|
|
104
115
|
data
|
|
105
116
|
});
|
|
106
|
-
return
|
|
117
|
+
return candidateUrl;
|
|
107
118
|
}
|
|
108
119
|
}
|
|
109
|
-
if (
|
|
110
|
-
if (
|
|
120
|
+
if (candidateUrl === candidateSourceUrl) continue;
|
|
121
|
+
if (candidateUrl === initialResponseUrl) {
|
|
111
122
|
winningUrl = initialResponseUrl;
|
|
112
123
|
break;
|
|
113
124
|
}
|
|
114
|
-
const
|
|
115
|
-
if (
|
|
116
|
-
let
|
|
117
|
-
if (
|
|
118
|
-
if (
|
|
125
|
+
const candidateResponse = await fetchAndCompare(candidateUrl);
|
|
126
|
+
if (candidateResponse) {
|
|
127
|
+
let candidateResponseUrl = resolveAndApplyRewrites(candidateResponse.url);
|
|
128
|
+
if (candidateResponseUrl) candidateResponseUrl = stripParams(candidateResponseUrl);
|
|
129
|
+
if (candidateResponseUrl === candidateSourceUrl || candidateResponseUrl === initialResponseUrl) continue;
|
|
119
130
|
onMatch?.({
|
|
120
|
-
url:
|
|
121
|
-
response:
|
|
131
|
+
url: candidateUrl,
|
|
132
|
+
response: candidateResponse,
|
|
122
133
|
feed: initialResponseFeed
|
|
123
134
|
});
|
|
124
|
-
winningUrl =
|
|
135
|
+
winningUrl = candidateUrl;
|
|
125
136
|
break;
|
|
126
137
|
}
|
|
127
138
|
}
|
package/dist/index.js
CHANGED
|
@@ -1,15 +1,15 @@
|
|
|
1
|
+
import { applyProbes, applyRewrites, normalizeUrl, resolveUrl } from "./utils.js";
|
|
1
2
|
import { defaultFetch, defaultParser, defaultStrippedParams, defaultTiers } from "./defaults.js";
|
|
2
|
-
import { applyRewrites, normalizeUrl, resolveUrl } from "./utils.js";
|
|
3
3
|
|
|
4
4
|
//#region src/index.ts
|
|
5
5
|
async function findCanonical(inputUrl, options) {
|
|
6
|
-
const { parser = defaultParser, fetchFn = defaultFetch, existsFn, tiers = defaultTiers, rewrites, stripQueryParams = defaultStrippedParams, onFetch, onMatch, onExists } = options ?? {};
|
|
6
|
+
const { parser = defaultParser, fetchFn = defaultFetch, existsFn, tiers = defaultTiers, rewrites, probes, stripQueryParams = defaultStrippedParams, onFetch, onMatch, onExists } = options ?? {};
|
|
7
7
|
const stripParams = (url) => {
|
|
8
|
-
return
|
|
8
|
+
return normalizeUrl(url, {
|
|
9
9
|
stripQueryParams,
|
|
10
10
|
sortQueryParams: true,
|
|
11
11
|
stripEmptyQuery: true
|
|
12
|
-
})
|
|
12
|
+
});
|
|
13
13
|
};
|
|
14
14
|
const resolveAndApplyRewrites = (url, baseUrl) => {
|
|
15
15
|
const resolved = resolveUrl(url, baseUrl);
|
|
@@ -73,7 +73,7 @@ async function findCanonical(inputUrl, options) {
|
|
|
73
73
|
if (!await compareWithInitialResponse(response.body, response.url)) return;
|
|
74
74
|
return response;
|
|
75
75
|
};
|
|
76
|
-
let
|
|
76
|
+
let candidateSourceUrl = initialResponseUrl;
|
|
77
77
|
if (selfRequestUrl && selfRequestUrl !== initialResponseUrl) {
|
|
78
78
|
const urlsToTry = [selfRequestUrl];
|
|
79
79
|
if (selfRequestUrl.startsWith("https://")) urlsToTry.push(selfRequestUrl.replace("https://", "http://"));
|
|
@@ -86,42 +86,53 @@ async function findCanonical(inputUrl, options) {
|
|
|
86
86
|
response,
|
|
87
87
|
feed: initialResponseFeed
|
|
88
88
|
});
|
|
89
|
-
|
|
90
|
-
|
|
89
|
+
candidateSourceUrl = resolveAndApplyRewrites(response.url) ?? initialResponseUrl;
|
|
90
|
+
candidateSourceUrl = stripParams(candidateSourceUrl);
|
|
91
91
|
break;
|
|
92
92
|
}
|
|
93
93
|
}
|
|
94
94
|
}
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
95
|
+
if (probes?.length) candidateSourceUrl = await applyProbes(candidateSourceUrl, probes, async (candidateUrl) => {
|
|
96
|
+
const response = await fetchAndCompare(candidateUrl);
|
|
97
|
+
if (response) {
|
|
98
|
+
onMatch?.({
|
|
99
|
+
url: candidateUrl,
|
|
100
|
+
response,
|
|
101
|
+
feed: initialResponseFeed
|
|
102
|
+
});
|
|
103
|
+
return stripParams(resolveAndApplyRewrites(response.url) ?? candidateUrl);
|
|
104
|
+
}
|
|
105
|
+
});
|
|
106
|
+
const candidateUrls = new Set(tiers.map((tier) => resolveAndApplyRewrites(normalizeUrl(candidateSourceUrl, tier))).filter((candidateUrl) => !!candidateUrl));
|
|
107
|
+
candidateUrls.add(candidateSourceUrl);
|
|
108
|
+
let winningUrl = candidateSourceUrl;
|
|
109
|
+
for (const candidateUrl of candidateUrls) {
|
|
99
110
|
if (existsFn) {
|
|
100
|
-
const data = await existsFn(
|
|
111
|
+
const data = await existsFn(candidateUrl);
|
|
101
112
|
if (data !== void 0) {
|
|
102
113
|
onExists?.({
|
|
103
|
-
url:
|
|
114
|
+
url: candidateUrl,
|
|
104
115
|
data
|
|
105
116
|
});
|
|
106
|
-
return
|
|
117
|
+
return candidateUrl;
|
|
107
118
|
}
|
|
108
119
|
}
|
|
109
|
-
if (
|
|
110
|
-
if (
|
|
120
|
+
if (candidateUrl === candidateSourceUrl) continue;
|
|
121
|
+
if (candidateUrl === initialResponseUrl) {
|
|
111
122
|
winningUrl = initialResponseUrl;
|
|
112
123
|
break;
|
|
113
124
|
}
|
|
114
|
-
const
|
|
115
|
-
if (
|
|
116
|
-
let
|
|
117
|
-
if (
|
|
118
|
-
if (
|
|
125
|
+
const candidateResponse = await fetchAndCompare(candidateUrl);
|
|
126
|
+
if (candidateResponse) {
|
|
127
|
+
let candidateResponseUrl = resolveAndApplyRewrites(candidateResponse.url);
|
|
128
|
+
if (candidateResponseUrl) candidateResponseUrl = stripParams(candidateResponseUrl);
|
|
129
|
+
if (candidateResponseUrl === candidateSourceUrl || candidateResponseUrl === initialResponseUrl) continue;
|
|
119
130
|
onMatch?.({
|
|
120
|
-
url:
|
|
121
|
-
response:
|
|
131
|
+
url: candidateUrl,
|
|
132
|
+
response: candidateResponse,
|
|
122
133
|
feed: initialResponseFeed
|
|
123
134
|
});
|
|
124
|
-
winningUrl =
|
|
135
|
+
winningUrl = candidateUrl;
|
|
125
136
|
break;
|
|
126
137
|
}
|
|
127
138
|
}
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
|
|
2
|
+
//#region src/probes/wordpress.ts
|
|
3
|
+
const feedTypes = [
|
|
4
|
+
"atom",
|
|
5
|
+
"rss2",
|
|
6
|
+
"rss",
|
|
7
|
+
"rdf"
|
|
8
|
+
];
|
|
9
|
+
const wordpressProbe = {
|
|
10
|
+
match: (url) => {
|
|
11
|
+
const feed = url.searchParams.get("feed")?.toLowerCase();
|
|
12
|
+
if (!feed) return false;
|
|
13
|
+
const type = feed.startsWith("comments-") ? feed.slice(9) : feed;
|
|
14
|
+
return feedTypes.includes(type);
|
|
15
|
+
},
|
|
16
|
+
getCandidates: (url) => {
|
|
17
|
+
const feed = url.searchParams.get("feed")?.toLowerCase();
|
|
18
|
+
if (!feed) return [];
|
|
19
|
+
const candidates = [];
|
|
20
|
+
const isComment = feed.startsWith("comments-");
|
|
21
|
+
const type = isComment ? feed.slice(9) : feed;
|
|
22
|
+
if ((isComment ? /\/comments\/feed(\/|$)/ : /\/feed(\/|$)/).test(url.pathname)) {
|
|
23
|
+
const withoutSlash = new URL(url);
|
|
24
|
+
withoutSlash.pathname = url.pathname.replace(/\/$/, "");
|
|
25
|
+
withoutSlash.searchParams.delete("feed");
|
|
26
|
+
candidates.push(withoutSlash.href);
|
|
27
|
+
const withSlash$1 = new URL(url);
|
|
28
|
+
withSlash$1.pathname = url.pathname.replace(/\/?$/, "/");
|
|
29
|
+
withSlash$1.searchParams.delete("feed");
|
|
30
|
+
candidates.push(withSlash$1.href);
|
|
31
|
+
return candidates;
|
|
32
|
+
}
|
|
33
|
+
const basePath = url.pathname.replace(/\/$/, "");
|
|
34
|
+
const feedSegment = type === "atom" ? "/feed/atom" : "/feed";
|
|
35
|
+
const feedPath = isComment ? `/comments${feedSegment}` : feedSegment;
|
|
36
|
+
const primary = new URL(url);
|
|
37
|
+
primary.pathname = basePath + feedPath;
|
|
38
|
+
primary.searchParams.delete("feed");
|
|
39
|
+
candidates.push(primary.href);
|
|
40
|
+
const withSlash = new URL(url);
|
|
41
|
+
withSlash.pathname = `${basePath}${feedPath}/`;
|
|
42
|
+
withSlash.searchParams.delete("feed");
|
|
43
|
+
candidates.push(withSlash.href);
|
|
44
|
+
return candidates;
|
|
45
|
+
}
|
|
46
|
+
};
|
|
47
|
+
|
|
48
|
+
//#endregion
|
|
49
|
+
exports.wordpressProbe = wordpressProbe;
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
//#region src/probes/wordpress.ts
|
|
2
|
+
const feedTypes = [
|
|
3
|
+
"atom",
|
|
4
|
+
"rss2",
|
|
5
|
+
"rss",
|
|
6
|
+
"rdf"
|
|
7
|
+
];
|
|
8
|
+
const wordpressProbe = {
|
|
9
|
+
match: (url) => {
|
|
10
|
+
const feed = url.searchParams.get("feed")?.toLowerCase();
|
|
11
|
+
if (!feed) return false;
|
|
12
|
+
const type = feed.startsWith("comments-") ? feed.slice(9) : feed;
|
|
13
|
+
return feedTypes.includes(type);
|
|
14
|
+
},
|
|
15
|
+
getCandidates: (url) => {
|
|
16
|
+
const feed = url.searchParams.get("feed")?.toLowerCase();
|
|
17
|
+
if (!feed) return [];
|
|
18
|
+
const candidates = [];
|
|
19
|
+
const isComment = feed.startsWith("comments-");
|
|
20
|
+
const type = isComment ? feed.slice(9) : feed;
|
|
21
|
+
if ((isComment ? /\/comments\/feed(\/|$)/ : /\/feed(\/|$)/).test(url.pathname)) {
|
|
22
|
+
const withoutSlash = new URL(url);
|
|
23
|
+
withoutSlash.pathname = url.pathname.replace(/\/$/, "");
|
|
24
|
+
withoutSlash.searchParams.delete("feed");
|
|
25
|
+
candidates.push(withoutSlash.href);
|
|
26
|
+
const withSlash$1 = new URL(url);
|
|
27
|
+
withSlash$1.pathname = url.pathname.replace(/\/?$/, "/");
|
|
28
|
+
withSlash$1.searchParams.delete("feed");
|
|
29
|
+
candidates.push(withSlash$1.href);
|
|
30
|
+
return candidates;
|
|
31
|
+
}
|
|
32
|
+
const basePath = url.pathname.replace(/\/$/, "");
|
|
33
|
+
const feedSegment = type === "atom" ? "/feed/atom" : "/feed";
|
|
34
|
+
const feedPath = isComment ? `/comments${feedSegment}` : feedSegment;
|
|
35
|
+
const primary = new URL(url);
|
|
36
|
+
primary.pathname = basePath + feedPath;
|
|
37
|
+
primary.searchParams.delete("feed");
|
|
38
|
+
candidates.push(primary.href);
|
|
39
|
+
const withSlash = new URL(url);
|
|
40
|
+
withSlash.pathname = `${basePath}${feedPath}/`;
|
|
41
|
+
withSlash.searchParams.delete("feed");
|
|
42
|
+
candidates.push(withSlash.href);
|
|
43
|
+
return candidates;
|
|
44
|
+
}
|
|
45
|
+
};
|
|
46
|
+
|
|
47
|
+
//#endregion
|
|
48
|
+
export { wordpressProbe };
|
package/dist/types.d.cts
CHANGED
|
@@ -11,6 +11,10 @@ type Rewrite = {
|
|
|
11
11
|
match: (url: URL) => boolean;
|
|
12
12
|
rewrite: (url: URL) => URL;
|
|
13
13
|
};
|
|
14
|
+
type Probe = {
|
|
15
|
+
match: (url: URL) => boolean;
|
|
16
|
+
getCandidates: (url: URL) => Array<string>;
|
|
17
|
+
};
|
|
14
18
|
type NormalizeOptions = {
|
|
15
19
|
stripProtocol?: boolean;
|
|
16
20
|
stripAuthentication?: boolean;
|
|
@@ -45,8 +49,9 @@ type FindCanonicalOptions<TFeed = DefaultParserResult, TResponse extends FetchFn
|
|
|
45
49
|
parser?: ParserAdapter<TFeed>;
|
|
46
50
|
fetchFn?: FetchFn<TResponse>;
|
|
47
51
|
existsFn?: ExistsFn<TExisting>;
|
|
48
|
-
tiers?: Array<Tier>;
|
|
49
52
|
rewrites?: Array<Rewrite>;
|
|
53
|
+
probes?: Array<Probe>;
|
|
54
|
+
tiers?: Array<Tier>;
|
|
50
55
|
stripQueryParams?: Array<string>;
|
|
51
56
|
onFetch?: OnFetchFn<TResponse>;
|
|
52
57
|
onMatch?: OnMatchFn<TFeed, TResponse>;
|
|
@@ -65,4 +70,4 @@ type FetchFnResponse = {
|
|
|
65
70
|
};
|
|
66
71
|
type FetchFn<TResponse extends FetchFnResponse = FetchFnResponse> = (url: string, options?: FetchFnOptions) => Promise<TResponse>;
|
|
67
72
|
//#endregion
|
|
68
|
-
export { DefaultParserResult, ExistsFn, FetchFn, FetchFnOptions, FetchFnResponse, FindCanonicalOptions, NormalizeOptions, OnExistsFn, OnFetchFn, OnMatchFn, ParserAdapter, Rewrite, Tier };
|
|
73
|
+
export { DefaultParserResult, ExistsFn, FetchFn, FetchFnOptions, FetchFnResponse, FindCanonicalOptions, NormalizeOptions, OnExistsFn, OnFetchFn, OnMatchFn, ParserAdapter, Probe, Rewrite, Tier };
|
package/dist/types.d.ts
CHANGED
|
@@ -11,6 +11,10 @@ type Rewrite = {
|
|
|
11
11
|
match: (url: URL) => boolean;
|
|
12
12
|
rewrite: (url: URL) => URL;
|
|
13
13
|
};
|
|
14
|
+
type Probe = {
|
|
15
|
+
match: (url: URL) => boolean;
|
|
16
|
+
getCandidates: (url: URL) => Array<string>;
|
|
17
|
+
};
|
|
14
18
|
type NormalizeOptions = {
|
|
15
19
|
stripProtocol?: boolean;
|
|
16
20
|
stripAuthentication?: boolean;
|
|
@@ -45,8 +49,9 @@ type FindCanonicalOptions<TFeed = DefaultParserResult, TResponse extends FetchFn
|
|
|
45
49
|
parser?: ParserAdapter<TFeed>;
|
|
46
50
|
fetchFn?: FetchFn<TResponse>;
|
|
47
51
|
existsFn?: ExistsFn<TExisting>;
|
|
48
|
-
tiers?: Array<Tier>;
|
|
49
52
|
rewrites?: Array<Rewrite>;
|
|
53
|
+
probes?: Array<Probe>;
|
|
54
|
+
tiers?: Array<Tier>;
|
|
50
55
|
stripQueryParams?: Array<string>;
|
|
51
56
|
onFetch?: OnFetchFn<TResponse>;
|
|
52
57
|
onMatch?: OnMatchFn<TFeed, TResponse>;
|
|
@@ -65,4 +70,4 @@ type FetchFnResponse = {
|
|
|
65
70
|
};
|
|
66
71
|
type FetchFn<TResponse extends FetchFnResponse = FetchFnResponse> = (url: string, options?: FetchFnOptions) => Promise<TResponse>;
|
|
67
72
|
//#endregion
|
|
68
|
-
export { DefaultParserResult, ExistsFn, FetchFn, FetchFnOptions, FetchFnResponse, FindCanonicalOptions, NormalizeOptions, OnExistsFn, OnFetchFn, OnMatchFn, ParserAdapter, Rewrite, Tier };
|
|
73
|
+
export { DefaultParserResult, ExistsFn, FetchFn, FetchFnOptions, FetchFnResponse, FindCanonicalOptions, NormalizeOptions, OnExistsFn, OnFetchFn, OnMatchFn, ParserAdapter, Probe, Rewrite, Tier };
|
package/dist/utils.cjs
CHANGED
|
@@ -155,11 +155,51 @@ const applyRewrites = (url, rewrites) => {
|
|
|
155
155
|
return url;
|
|
156
156
|
}
|
|
157
157
|
};
|
|
158
|
+
const applyProbes = async (url, probes, testCandidate) => {
|
|
159
|
+
try {
|
|
160
|
+
const parsed = new URL(url);
|
|
161
|
+
for (const probe of probes) {
|
|
162
|
+
if (!probe.match(parsed)) continue;
|
|
163
|
+
for (const candidate of probe.getCandidates(parsed)) {
|
|
164
|
+
const result = await testCandidate(candidate);
|
|
165
|
+
if (result) return result;
|
|
166
|
+
}
|
|
167
|
+
break;
|
|
168
|
+
}
|
|
169
|
+
return url;
|
|
170
|
+
} catch {
|
|
171
|
+
return url;
|
|
172
|
+
}
|
|
173
|
+
};
|
|
174
|
+
const createSignature = (object, fields) => {
|
|
175
|
+
const saved = fields.map((key) => [key, object[key]]);
|
|
176
|
+
for (const key of fields) object[key] = void 0;
|
|
177
|
+
const signature = JSON.stringify(object);
|
|
178
|
+
for (const [key, val] of saved) object[key] = val;
|
|
179
|
+
return signature;
|
|
180
|
+
};
|
|
181
|
+
const trailingSlashPattern = /("(?:https?:\/\/|\/)[^"]+)\/([?"])/g;
|
|
182
|
+
const neutralizeUrls = (text, urls) => {
|
|
183
|
+
const escapeHost = (url) => {
|
|
184
|
+
try {
|
|
185
|
+
return new URL("/", url).host.replace(/^www\./, "").replaceAll(".", "\\.");
|
|
186
|
+
} catch {
|
|
187
|
+
return;
|
|
188
|
+
}
|
|
189
|
+
};
|
|
190
|
+
const hosts = urls.map(escapeHost).filter(Boolean);
|
|
191
|
+
if (hosts.length === 0) return text;
|
|
192
|
+
const hostPattern = hosts.length === 1 ? hosts[0] : `(?:${hosts.join("|")})`;
|
|
193
|
+
return text.replace(new RegExp(`https?://(?:www\\.)?${hostPattern}(?=[/"])(/)?`, "g"), "/").replace(trailingSlashPattern, "$1$2");
|
|
194
|
+
};
|
|
158
195
|
|
|
159
196
|
//#endregion
|
|
160
197
|
exports.addMissingProtocol = addMissingProtocol;
|
|
198
|
+
exports.applyProbes = applyProbes;
|
|
161
199
|
exports.applyRewrites = applyRewrites;
|
|
200
|
+
exports.createSignature = createSignature;
|
|
162
201
|
exports.fixMalformedProtocol = fixMalformedProtocol;
|
|
202
|
+
exports.neutralizeUrls = neutralizeUrls;
|
|
163
203
|
exports.normalizeUrl = normalizeUrl;
|
|
164
204
|
exports.resolveFeedProtocol = resolveFeedProtocol;
|
|
165
205
|
exports.resolveUrl = resolveUrl;
|
package/dist/utils.js
CHANGED
|
@@ -155,6 +155,43 @@ const applyRewrites = (url, rewrites) => {
|
|
|
155
155
|
return url;
|
|
156
156
|
}
|
|
157
157
|
};
|
|
158
|
+
const applyProbes = async (url, probes, testCandidate) => {
|
|
159
|
+
try {
|
|
160
|
+
const parsed = new URL(url);
|
|
161
|
+
for (const probe of probes) {
|
|
162
|
+
if (!probe.match(parsed)) continue;
|
|
163
|
+
for (const candidate of probe.getCandidates(parsed)) {
|
|
164
|
+
const result = await testCandidate(candidate);
|
|
165
|
+
if (result) return result;
|
|
166
|
+
}
|
|
167
|
+
break;
|
|
168
|
+
}
|
|
169
|
+
return url;
|
|
170
|
+
} catch {
|
|
171
|
+
return url;
|
|
172
|
+
}
|
|
173
|
+
};
|
|
174
|
+
const createSignature = (object, fields) => {
|
|
175
|
+
const saved = fields.map((key) => [key, object[key]]);
|
|
176
|
+
for (const key of fields) object[key] = void 0;
|
|
177
|
+
const signature = JSON.stringify(object);
|
|
178
|
+
for (const [key, val] of saved) object[key] = val;
|
|
179
|
+
return signature;
|
|
180
|
+
};
|
|
181
|
+
const trailingSlashPattern = /("(?:https?:\/\/|\/)[^"]+)\/([?"])/g;
|
|
182
|
+
const neutralizeUrls = (text, urls) => {
|
|
183
|
+
const escapeHost = (url) => {
|
|
184
|
+
try {
|
|
185
|
+
return new URL("/", url).host.replace(/^www\./, "").replaceAll(".", "\\.");
|
|
186
|
+
} catch {
|
|
187
|
+
return;
|
|
188
|
+
}
|
|
189
|
+
};
|
|
190
|
+
const hosts = urls.map(escapeHost).filter(Boolean);
|
|
191
|
+
if (hosts.length === 0) return text;
|
|
192
|
+
const hostPattern = hosts.length === 1 ? hosts[0] : `(?:${hosts.join("|")})`;
|
|
193
|
+
return text.replace(new RegExp(`https?://(?:www\\.)?${hostPattern}(?=[/"])(/)?`, "g"), "/").replace(trailingSlashPattern, "$1$2");
|
|
194
|
+
};
|
|
158
195
|
|
|
159
196
|
//#endregion
|
|
160
|
-
export { addMissingProtocol, applyRewrites, fixMalformedProtocol, normalizeUrl, resolveFeedProtocol, resolveUrl };
|
|
197
|
+
export { addMissingProtocol, applyProbes, applyRewrites, createSignature, fixMalformedProtocol, neutralizeUrls, normalizeUrl, resolveFeedProtocol, resolveUrl };
|
package/package.json
CHANGED