feedcanon 1.5.2 → 2.0.0-next.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/types.d.ts +1 -1
- package/package.json +8 -21
- package/dist/defaults.cjs +0 -293
- package/dist/defaults.d.cts +0 -10
- package/dist/exports.cjs +0 -20
- package/dist/exports.d.cts +0 -8
- package/dist/index.cjs +0 -153
- package/dist/index.d.cts +0 -9
- package/dist/probes/wordpress.cjs +0 -51
- package/dist/probes/wordpress.d.cts +0 -6
- package/dist/rewrites/blogger.cjs +0 -47
- package/dist/rewrites/blogger.d.cts +0 -6
- package/dist/rewrites/feedburner.cjs +0 -27
- package/dist/rewrites/feedburner.d.cts +0 -6
- package/dist/types.d.cts +0 -74
- package/dist/utils.cjs +0 -208
- package/dist/utils.d.cts +0 -10
package/dist/types.d.ts
CHANGED
|
@@ -2,7 +2,7 @@ import * as _$feedsmith from "feedsmith";
|
|
|
2
2
|
|
|
3
3
|
//#region src/types.d.ts
|
|
4
4
|
type MaybePromise<T> = T | Promise<T>;
|
|
5
|
-
type DefaultParserResult = ReturnType<typeof _$feedsmith.parseFeed
|
|
5
|
+
type DefaultParserResult = ReturnType<typeof _$feedsmith.parseFeed<string>>;
|
|
6
6
|
type ParserAdapter<T> = {
|
|
7
7
|
parse: (body: string) => MaybePromise<T | undefined>;
|
|
8
8
|
getSelfUrl: (parsed: T) => string | undefined;
|
package/package.json
CHANGED
|
@@ -24,24 +24,12 @@
|
|
|
24
24
|
"type": "module",
|
|
25
25
|
"exports": {
|
|
26
26
|
".": {
|
|
27
|
-
"
|
|
28
|
-
|
|
29
|
-
"default": "./dist/exports.js"
|
|
30
|
-
},
|
|
31
|
-
"require": {
|
|
32
|
-
"types": "./dist/exports.d.cts",
|
|
33
|
-
"default": "./dist/exports.cjs"
|
|
34
|
-
}
|
|
27
|
+
"types": "./dist/exports.d.ts",
|
|
28
|
+
"default": "./dist/exports.js"
|
|
35
29
|
},
|
|
36
30
|
"./defaults": {
|
|
37
|
-
"
|
|
38
|
-
|
|
39
|
-
"default": "./dist/defaults.js"
|
|
40
|
-
},
|
|
41
|
-
"require": {
|
|
42
|
-
"types": "./dist/defaults.d.cts",
|
|
43
|
-
"default": "./dist/defaults.cjs"
|
|
44
|
-
}
|
|
31
|
+
"types": "./dist/defaults.d.ts",
|
|
32
|
+
"default": "./dist/defaults.js"
|
|
45
33
|
}
|
|
46
34
|
},
|
|
47
35
|
"files": [
|
|
@@ -49,14 +37,13 @@
|
|
|
49
37
|
],
|
|
50
38
|
"scripts": {
|
|
51
39
|
"prepare": "lefthook install",
|
|
52
|
-
"build": "tsdown src/exports.ts src/defaults.ts --format
|
|
40
|
+
"build": "tsdown src/exports.ts src/defaults.ts --format esm --dts --clean --unbundle --no-fixed-extension",
|
|
53
41
|
"docs:dev": "vitepress dev docs",
|
|
54
42
|
"docs:build": "vitepress build docs"
|
|
55
43
|
},
|
|
56
44
|
"dependencies": {
|
|
57
|
-
"entities": "^
|
|
58
|
-
"feedsmith": "^
|
|
59
|
-
"typescript": "^6.0.2"
|
|
45
|
+
"entities": "^8.0.0",
|
|
46
|
+
"feedsmith": "^3.0.0-beta.1"
|
|
60
47
|
},
|
|
61
48
|
"devDependencies": {
|
|
62
49
|
"@types/bun": "^1.3.11",
|
|
@@ -64,5 +51,5 @@
|
|
|
64
51
|
"tsdown": "^0.21.7",
|
|
65
52
|
"vitepress": "^2.0.0-alpha.17"
|
|
66
53
|
},
|
|
67
|
-
"version": "
|
|
54
|
+
"version": "2.0.0-next.2"
|
|
68
55
|
}
|
package/dist/defaults.cjs
DELETED
|
@@ -1,293 +0,0 @@
|
|
|
1
|
-
Object.defineProperty(exports, Symbol.toStringTag, { value: "Module" });
|
|
2
|
-
const require_utils = require("./utils.cjs");
|
|
3
|
-
let feedsmith = require("feedsmith");
|
|
4
|
-
//#region src/defaults.ts
|
|
5
|
-
const defaultStrippedParams = [
|
|
6
|
-
"utm_source",
|
|
7
|
-
"utm_medium",
|
|
8
|
-
"utm_campaign",
|
|
9
|
-
"utm_term",
|
|
10
|
-
"utm_content",
|
|
11
|
-
"utm_reader",
|
|
12
|
-
"utm_name",
|
|
13
|
-
"utm_cid",
|
|
14
|
-
"utm_viz_id",
|
|
15
|
-
"gclid",
|
|
16
|
-
"dclid",
|
|
17
|
-
"gbraid",
|
|
18
|
-
"wbraid",
|
|
19
|
-
"gclsrc",
|
|
20
|
-
"gad_source",
|
|
21
|
-
"gad_campaignid",
|
|
22
|
-
"srsltid",
|
|
23
|
-
"fbclid",
|
|
24
|
-
"fb_action_ids",
|
|
25
|
-
"fb_action_types",
|
|
26
|
-
"fb_source",
|
|
27
|
-
"fb_ref",
|
|
28
|
-
"_ga",
|
|
29
|
-
"_gl",
|
|
30
|
-
"_bk",
|
|
31
|
-
"_ke",
|
|
32
|
-
"mc_cid",
|
|
33
|
-
"mc_eid",
|
|
34
|
-
"mc_tc",
|
|
35
|
-
"mkt_tok",
|
|
36
|
-
"msclkid",
|
|
37
|
-
"twclid",
|
|
38
|
-
"ttclid",
|
|
39
|
-
"igshid",
|
|
40
|
-
"mtm_campaign",
|
|
41
|
-
"mtm_cid",
|
|
42
|
-
"mtm_content",
|
|
43
|
-
"mtm_group",
|
|
44
|
-
"mtm_keyword",
|
|
45
|
-
"mtm_medium",
|
|
46
|
-
"mtm_placement",
|
|
47
|
-
"mtm_source",
|
|
48
|
-
"pk_campaign",
|
|
49
|
-
"pk_cid",
|
|
50
|
-
"pk_content",
|
|
51
|
-
"pk_keyword",
|
|
52
|
-
"pk_medium",
|
|
53
|
-
"pk_source",
|
|
54
|
-
"ncid",
|
|
55
|
-
"sr_share",
|
|
56
|
-
"hsa_acc",
|
|
57
|
-
"hsa_ad",
|
|
58
|
-
"hsa_cam",
|
|
59
|
-
"hsa_grp",
|
|
60
|
-
"hsa_kw",
|
|
61
|
-
"hsa_mt",
|
|
62
|
-
"hsa_net",
|
|
63
|
-
"hsa_src",
|
|
64
|
-
"hsa_tgt",
|
|
65
|
-
"hsa_ver",
|
|
66
|
-
"hsCtaTracking",
|
|
67
|
-
"_hsenc",
|
|
68
|
-
"_hsmi",
|
|
69
|
-
"__hstc",
|
|
70
|
-
"__hsfp",
|
|
71
|
-
"__hssc",
|
|
72
|
-
"cid",
|
|
73
|
-
"s_kwcid",
|
|
74
|
-
"sc_cid",
|
|
75
|
-
"ef_id",
|
|
76
|
-
"obOrigUrl",
|
|
77
|
-
"dicbo",
|
|
78
|
-
"yclid",
|
|
79
|
-
"ysclid",
|
|
80
|
-
"_openstat",
|
|
81
|
-
"awinaffid",
|
|
82
|
-
"awinmid",
|
|
83
|
-
"clickref",
|
|
84
|
-
"afftrack",
|
|
85
|
-
"itm_source",
|
|
86
|
-
"itm_medium",
|
|
87
|
-
"itm_campaign",
|
|
88
|
-
"itm_content",
|
|
89
|
-
"itm_channel",
|
|
90
|
-
"itm_audience",
|
|
91
|
-
"int_source",
|
|
92
|
-
"int_medium",
|
|
93
|
-
"int_campaign",
|
|
94
|
-
"int_content",
|
|
95
|
-
"int_placement",
|
|
96
|
-
"int_campaign_type",
|
|
97
|
-
"int_keycode",
|
|
98
|
-
"g2i_source",
|
|
99
|
-
"g2i_medium",
|
|
100
|
-
"g2i_campaign",
|
|
101
|
-
"g2i_or_o",
|
|
102
|
-
"g2i_or_p",
|
|
103
|
-
"doing_wp_cron",
|
|
104
|
-
"preview",
|
|
105
|
-
"preview_id",
|
|
106
|
-
"preview_nonce",
|
|
107
|
-
"replytocom",
|
|
108
|
-
"_",
|
|
109
|
-
"timestamp",
|
|
110
|
-
"ts",
|
|
111
|
-
"cb",
|
|
112
|
-
"cachebuster",
|
|
113
|
-
"nocache",
|
|
114
|
-
"rand",
|
|
115
|
-
"random",
|
|
116
|
-
"sbdcrw",
|
|
117
|
-
"forceByPassCache",
|
|
118
|
-
"sucurianticache",
|
|
119
|
-
"cleancache",
|
|
120
|
-
"rebuildcache",
|
|
121
|
-
"kontrol_health_check_timestamp",
|
|
122
|
-
"action_object_map",
|
|
123
|
-
"action_ref_map",
|
|
124
|
-
"action_type_map",
|
|
125
|
-
"algo_expid",
|
|
126
|
-
"algo_pvid",
|
|
127
|
-
"at_campaign",
|
|
128
|
-
"at_custom1",
|
|
129
|
-
"at_custom2",
|
|
130
|
-
"at_custom3",
|
|
131
|
-
"at_custom4",
|
|
132
|
-
"at_medium",
|
|
133
|
-
"at_preview_index",
|
|
134
|
-
"_bhlid",
|
|
135
|
-
"_branch_match_id",
|
|
136
|
-
"_branch_referrer",
|
|
137
|
-
"__readwiseLocation",
|
|
138
|
-
"campaign_id",
|
|
139
|
-
"click_sum",
|
|
140
|
-
"fref",
|
|
141
|
-
"gs_l",
|
|
142
|
-
"hmb_campaign",
|
|
143
|
-
"hmb_medium",
|
|
144
|
-
"hmb_source",
|
|
145
|
-
"ml_subscriber",
|
|
146
|
-
"ml_subscriber_hash",
|
|
147
|
-
"oly_anon_id",
|
|
148
|
-
"oly_enc_id",
|
|
149
|
-
"rb_clickid",
|
|
150
|
-
"referer",
|
|
151
|
-
"referrer",
|
|
152
|
-
"spm",
|
|
153
|
-
"trk",
|
|
154
|
-
"vero_conv",
|
|
155
|
-
"vero_id",
|
|
156
|
-
"wickedid",
|
|
157
|
-
"xtor"
|
|
158
|
-
];
|
|
159
|
-
const defaultNormalizeOptions = {
|
|
160
|
-
stripProtocol: true,
|
|
161
|
-
stripAuthentication: false,
|
|
162
|
-
stripWww: true,
|
|
163
|
-
stripTrailingSlash: true,
|
|
164
|
-
stripRootSlash: true,
|
|
165
|
-
collapseSlashes: true,
|
|
166
|
-
stripHash: true,
|
|
167
|
-
sortQueryParams: true,
|
|
168
|
-
stripQueryParams: defaultStrippedParams,
|
|
169
|
-
stripQuery: false,
|
|
170
|
-
stripEmptyQuery: true,
|
|
171
|
-
lowercaseQuery: false,
|
|
172
|
-
normalizeEncoding: true,
|
|
173
|
-
normalizeUnicode: true
|
|
174
|
-
};
|
|
175
|
-
const defaultFetch = async (url, options) => {
|
|
176
|
-
const response = await fetch(url, {
|
|
177
|
-
method: options?.method ?? "GET",
|
|
178
|
-
headers: options?.headers
|
|
179
|
-
});
|
|
180
|
-
return {
|
|
181
|
-
headers: response.headers,
|
|
182
|
-
body: await response.text(),
|
|
183
|
-
url: response.url,
|
|
184
|
-
status: response.status
|
|
185
|
-
};
|
|
186
|
-
};
|
|
187
|
-
const retrieveSelfLink = (parsed) => {
|
|
188
|
-
switch (parsed.format) {
|
|
189
|
-
case "atom": return parsed.feed.links?.find((link) => link.rel === "self");
|
|
190
|
-
case "rss":
|
|
191
|
-
case "rdf": return parsed.feed.atom?.links?.find((link) => link.rel === "self");
|
|
192
|
-
}
|
|
193
|
-
};
|
|
194
|
-
const defaultParser = {
|
|
195
|
-
parse: (body) => {
|
|
196
|
-
try {
|
|
197
|
-
return (0, feedsmith.parseFeed)(body);
|
|
198
|
-
} catch {}
|
|
199
|
-
},
|
|
200
|
-
getSelfUrl: (parsed) => {
|
|
201
|
-
return parsed.format === "json" ? parsed.feed.feed_url : retrieveSelfLink(parsed)?.href;
|
|
202
|
-
},
|
|
203
|
-
getSignature: (parsed, url) => {
|
|
204
|
-
let signature;
|
|
205
|
-
let contentUrl;
|
|
206
|
-
if (parsed.format === "json") {
|
|
207
|
-
contentUrl = parsed.feed.home_page_url;
|
|
208
|
-
signature = require_utils.createSignature(parsed.feed, ["feed_url"]);
|
|
209
|
-
} else {
|
|
210
|
-
const selfLink = retrieveSelfLink(parsed);
|
|
211
|
-
const savedSelfHref = selfLink?.href;
|
|
212
|
-
if (selfLink) selfLink.href = void 0;
|
|
213
|
-
if (parsed.format === "rss") {
|
|
214
|
-
contentUrl = parsed.feed.link;
|
|
215
|
-
signature = require_utils.createSignature(parsed.feed, [
|
|
216
|
-
"lastBuildDate",
|
|
217
|
-
"pubDate",
|
|
218
|
-
"link",
|
|
219
|
-
"generator"
|
|
220
|
-
]);
|
|
221
|
-
} else if (parsed.format === "rdf") {
|
|
222
|
-
contentUrl = parsed.feed.link;
|
|
223
|
-
signature = require_utils.createSignature(parsed.feed, ["link"]);
|
|
224
|
-
} else signature = require_utils.createSignature(parsed.feed, ["updated", "generator"]);
|
|
225
|
-
if (selfLink) selfLink.href = savedSelfHref;
|
|
226
|
-
}
|
|
227
|
-
return require_utils.neutralizeUrls(signature, contentUrl ? [url, contentUrl] : [url]);
|
|
228
|
-
}
|
|
229
|
-
};
|
|
230
|
-
const defaultTiers = [
|
|
231
|
-
{
|
|
232
|
-
stripProtocol: false,
|
|
233
|
-
stripAuthentication: false,
|
|
234
|
-
stripWww: true,
|
|
235
|
-
stripTrailingSlash: true,
|
|
236
|
-
stripRootSlash: true,
|
|
237
|
-
collapseSlashes: true,
|
|
238
|
-
stripHash: true,
|
|
239
|
-
sortQueryParams: false,
|
|
240
|
-
stripQuery: true,
|
|
241
|
-
stripEmptyQuery: true,
|
|
242
|
-
normalizeEncoding: true,
|
|
243
|
-
normalizeUnicode: true
|
|
244
|
-
},
|
|
245
|
-
{
|
|
246
|
-
stripProtocol: false,
|
|
247
|
-
stripAuthentication: false,
|
|
248
|
-
stripWww: true,
|
|
249
|
-
stripTrailingSlash: true,
|
|
250
|
-
stripRootSlash: true,
|
|
251
|
-
collapseSlashes: true,
|
|
252
|
-
stripHash: true,
|
|
253
|
-
sortQueryParams: true,
|
|
254
|
-
stripQuery: false,
|
|
255
|
-
stripEmptyQuery: true,
|
|
256
|
-
normalizeEncoding: true,
|
|
257
|
-
normalizeUnicode: true
|
|
258
|
-
},
|
|
259
|
-
{
|
|
260
|
-
stripProtocol: false,
|
|
261
|
-
stripAuthentication: false,
|
|
262
|
-
stripWww: false,
|
|
263
|
-
stripTrailingSlash: true,
|
|
264
|
-
stripRootSlash: true,
|
|
265
|
-
collapseSlashes: true,
|
|
266
|
-
stripHash: true,
|
|
267
|
-
sortQueryParams: true,
|
|
268
|
-
stripQuery: false,
|
|
269
|
-
stripEmptyQuery: true,
|
|
270
|
-
normalizeEncoding: true,
|
|
271
|
-
normalizeUnicode: true
|
|
272
|
-
},
|
|
273
|
-
{
|
|
274
|
-
stripProtocol: false,
|
|
275
|
-
stripAuthentication: false,
|
|
276
|
-
stripWww: false,
|
|
277
|
-
stripTrailingSlash: false,
|
|
278
|
-
stripRootSlash: true,
|
|
279
|
-
collapseSlashes: true,
|
|
280
|
-
stripHash: true,
|
|
281
|
-
sortQueryParams: true,
|
|
282
|
-
stripQuery: false,
|
|
283
|
-
stripEmptyQuery: true,
|
|
284
|
-
normalizeEncoding: true,
|
|
285
|
-
normalizeUnicode: true
|
|
286
|
-
}
|
|
287
|
-
];
|
|
288
|
-
//#endregion
|
|
289
|
-
exports.defaultFetch = defaultFetch;
|
|
290
|
-
exports.defaultNormalizeOptions = defaultNormalizeOptions;
|
|
291
|
-
exports.defaultParser = defaultParser;
|
|
292
|
-
exports.defaultStrippedParams = defaultStrippedParams;
|
|
293
|
-
exports.defaultTiers = defaultTiers;
|
package/dist/defaults.d.cts
DELETED
|
@@ -1,10 +0,0 @@
|
|
|
1
|
-
import { DefaultParserResult, FetchFn, NormalizeOptions, ParserAdapter, Tier } from "./types.cjs";
|
|
2
|
-
|
|
3
|
-
//#region src/defaults.d.ts
|
|
4
|
-
declare const defaultStrippedParams: string[];
|
|
5
|
-
declare const defaultNormalizeOptions: NormalizeOptions;
|
|
6
|
-
declare const defaultFetch: FetchFn;
|
|
7
|
-
declare const defaultParser: ParserAdapter<DefaultParserResult>;
|
|
8
|
-
declare const defaultTiers: Array<Tier>;
|
|
9
|
-
//#endregion
|
|
10
|
-
export { defaultFetch, defaultNormalizeOptions, defaultParser, defaultStrippedParams, defaultTiers };
|
package/dist/exports.cjs
DELETED
|
@@ -1,20 +0,0 @@
|
|
|
1
|
-
Object.defineProperty(exports, Symbol.toStringTag, { value: "Module" });
|
|
2
|
-
const require_utils = require("./utils.cjs");
|
|
3
|
-
const require_defaults = require("./defaults.cjs");
|
|
4
|
-
const require_index = require("./index.cjs");
|
|
5
|
-
const require_wordpress = require("./probes/wordpress.cjs");
|
|
6
|
-
const require_blogger = require("./rewrites/blogger.cjs");
|
|
7
|
-
const require_feedburner = require("./rewrites/feedburner.cjs");
|
|
8
|
-
exports.addMissingProtocol = require_utils.addMissingProtocol;
|
|
9
|
-
exports.bloggerRewrite = require_blogger.bloggerRewrite;
|
|
10
|
-
exports.defaultFetch = require_defaults.defaultFetch;
|
|
11
|
-
exports.defaultParser = require_defaults.defaultParser;
|
|
12
|
-
exports.defaultStrippedParams = require_defaults.defaultStrippedParams;
|
|
13
|
-
exports.defaultTiers = require_defaults.defaultTiers;
|
|
14
|
-
exports.feedburnerRewrite = require_feedburner.feedburnerRewrite;
|
|
15
|
-
exports.findCanonical = require_index.findCanonical;
|
|
16
|
-
exports.fixMalformedProtocol = require_utils.fixMalformedProtocol;
|
|
17
|
-
exports.normalizeUrl = require_utils.normalizeUrl;
|
|
18
|
-
exports.resolveFeedProtocol = require_utils.resolveFeedProtocol;
|
|
19
|
-
exports.resolveUrl = require_utils.resolveUrl;
|
|
20
|
-
exports.wordpressProbe = require_wordpress.wordpressProbe;
|
package/dist/exports.d.cts
DELETED
|
@@ -1,8 +0,0 @@
|
|
|
1
|
-
import { DefaultParserResult, ExistsFn, FetchFn, FetchFnOptions, FetchFnResponse, FindCanonicalOptions, NormalizeOptions, OnExistsFn, OnFetchFn, OnMatchFn, ParserAdapter, Probe, Rewrite, Tier } from "./types.cjs";
|
|
2
|
-
import { defaultFetch, defaultParser, defaultStrippedParams, defaultTiers } from "./defaults.cjs";
|
|
3
|
-
import { findCanonical } from "./index.cjs";
|
|
4
|
-
import { wordpressProbe } from "./probes/wordpress.cjs";
|
|
5
|
-
import { bloggerRewrite } from "./rewrites/blogger.cjs";
|
|
6
|
-
import { feedburnerRewrite } from "./rewrites/feedburner.cjs";
|
|
7
|
-
import { addMissingProtocol, fixMalformedProtocol, normalizeUrl, resolveFeedProtocol, resolveUrl } from "./utils.cjs";
|
|
8
|
-
export { type DefaultParserResult, type ExistsFn, type FetchFn, type FetchFnOptions, type FetchFnResponse, type FindCanonicalOptions, type NormalizeOptions, type OnExistsFn, type OnFetchFn, type OnMatchFn, type ParserAdapter, type Probe, type Rewrite, type Tier, addMissingProtocol, bloggerRewrite, defaultFetch, defaultParser, defaultStrippedParams, defaultTiers, feedburnerRewrite, findCanonical, fixMalformedProtocol, normalizeUrl, resolveFeedProtocol, resolveUrl, wordpressProbe };
|
package/dist/index.cjs
DELETED
|
@@ -1,153 +0,0 @@
|
|
|
1
|
-
const require_utils = require("./utils.cjs");
|
|
2
|
-
const require_defaults = require("./defaults.cjs");
|
|
3
|
-
//#region src/index.ts
|
|
4
|
-
async function findCanonical(inputUrl, options) {
|
|
5
|
-
const { parser = require_defaults.defaultParser, fetchFn = require_defaults.defaultFetch, existsFn, tiers = require_defaults.defaultTiers, rewrites, probes, stripQueryParams = require_defaults.defaultStrippedParams, onFetch, onMatch, onExists } = options ?? {};
|
|
6
|
-
const stripParams = (url) => {
|
|
7
|
-
return require_utils.normalizeUrl(url, {
|
|
8
|
-
stripQueryParams,
|
|
9
|
-
sortQueryParams: true,
|
|
10
|
-
stripEmptyQuery: true
|
|
11
|
-
});
|
|
12
|
-
};
|
|
13
|
-
const resolveAndApplyRewrites = (url, baseUrl) => {
|
|
14
|
-
const resolved = require_utils.resolveUrl(url, baseUrl);
|
|
15
|
-
return resolved && rewrites ? require_utils.applyRewrites(resolved, rewrites) : resolved;
|
|
16
|
-
};
|
|
17
|
-
const initialRequestUrl = resolveAndApplyRewrites(inputUrl);
|
|
18
|
-
if (!initialRequestUrl) return;
|
|
19
|
-
let initialResponse;
|
|
20
|
-
try {
|
|
21
|
-
initialResponse = await fetchFn(initialRequestUrl);
|
|
22
|
-
} catch {
|
|
23
|
-
return;
|
|
24
|
-
}
|
|
25
|
-
onFetch?.({
|
|
26
|
-
url: initialRequestUrl,
|
|
27
|
-
response: initialResponse
|
|
28
|
-
});
|
|
29
|
-
if (initialResponse.status < 200 || initialResponse.status >= 300) return;
|
|
30
|
-
const initialResponseUrlRaw = resolveAndApplyRewrites(initialResponse.url);
|
|
31
|
-
if (!initialResponseUrlRaw) return;
|
|
32
|
-
const initialResponseUrl = stripParams(initialResponseUrlRaw);
|
|
33
|
-
const initialResponseBody = initialResponse.body;
|
|
34
|
-
if (!initialResponseBody) return;
|
|
35
|
-
let initialResponseSignature;
|
|
36
|
-
let selfRequestUrl;
|
|
37
|
-
const initialResponseFeed = await parser.parse(initialResponseBody);
|
|
38
|
-
if (!initialResponseFeed) return;
|
|
39
|
-
onMatch?.({
|
|
40
|
-
url: initialRequestUrl,
|
|
41
|
-
response: initialResponse,
|
|
42
|
-
feed: initialResponseFeed
|
|
43
|
-
});
|
|
44
|
-
const selfRequestUrlRaw = parser.getSelfUrl(initialResponseFeed);
|
|
45
|
-
if (selfRequestUrlRaw) {
|
|
46
|
-
selfRequestUrl = resolveAndApplyRewrites(selfRequestUrlRaw, initialResponseUrl);
|
|
47
|
-
selfRequestUrl = selfRequestUrl ? stripParams(selfRequestUrl) : void 0;
|
|
48
|
-
}
|
|
49
|
-
const compareWithInitialResponse = async (comparedResponseBody, comparedResponseUrl) => {
|
|
50
|
-
if (!comparedResponseBody) return false;
|
|
51
|
-
if (initialResponseBody === comparedResponseBody) return true;
|
|
52
|
-
const comparedResponseFeed = await parser.parse(comparedResponseBody);
|
|
53
|
-
if (comparedResponseFeed) {
|
|
54
|
-
initialResponseSignature ||= parser.getSignature(initialResponseFeed, initialResponseUrl);
|
|
55
|
-
const comparedResponseSignature = parser.getSignature(comparedResponseFeed, comparedResponseUrl);
|
|
56
|
-
return initialResponseSignature === comparedResponseSignature;
|
|
57
|
-
}
|
|
58
|
-
return false;
|
|
59
|
-
};
|
|
60
|
-
const fetchAndCompare = async (url) => {
|
|
61
|
-
let response;
|
|
62
|
-
try {
|
|
63
|
-
response = await fetchFn(url);
|
|
64
|
-
} catch {
|
|
65
|
-
return;
|
|
66
|
-
}
|
|
67
|
-
onFetch?.({
|
|
68
|
-
url,
|
|
69
|
-
response
|
|
70
|
-
});
|
|
71
|
-
if (response.status < 200 || response.status >= 300) return;
|
|
72
|
-
if (!await compareWithInitialResponse(response.body, response.url)) return;
|
|
73
|
-
return response;
|
|
74
|
-
};
|
|
75
|
-
let candidateSourceUrl = initialResponseUrl;
|
|
76
|
-
if (selfRequestUrl && selfRequestUrl !== initialResponseUrl) {
|
|
77
|
-
const urlsToTry = [selfRequestUrl];
|
|
78
|
-
if (selfRequestUrl.startsWith("https://")) urlsToTry.push(selfRequestUrl.replace("https://", "http://"));
|
|
79
|
-
else if (selfRequestUrl.startsWith("http://")) urlsToTry.push(selfRequestUrl.replace("http://", "https://"));
|
|
80
|
-
for (const urlToTry of urlsToTry) {
|
|
81
|
-
const response = await fetchAndCompare(urlToTry);
|
|
82
|
-
if (response) {
|
|
83
|
-
onMatch?.({
|
|
84
|
-
url: urlToTry,
|
|
85
|
-
response,
|
|
86
|
-
feed: initialResponseFeed
|
|
87
|
-
});
|
|
88
|
-
candidateSourceUrl = resolveAndApplyRewrites(response.url) ?? initialResponseUrl;
|
|
89
|
-
candidateSourceUrl = stripParams(candidateSourceUrl);
|
|
90
|
-
break;
|
|
91
|
-
}
|
|
92
|
-
}
|
|
93
|
-
}
|
|
94
|
-
if (probes && probes?.length > 0) candidateSourceUrl = await require_utils.applyProbes(candidateSourceUrl, probes, async (candidateUrl) => {
|
|
95
|
-
const response = await fetchAndCompare(candidateUrl);
|
|
96
|
-
if (response) {
|
|
97
|
-
onMatch?.({
|
|
98
|
-
url: candidateUrl,
|
|
99
|
-
response,
|
|
100
|
-
feed: initialResponseFeed
|
|
101
|
-
});
|
|
102
|
-
return stripParams(resolveAndApplyRewrites(response.url) ?? candidateUrl);
|
|
103
|
-
}
|
|
104
|
-
});
|
|
105
|
-
const candidateUrls = new Set(tiers.map((tier) => resolveAndApplyRewrites(require_utils.normalizeUrl(candidateSourceUrl, tier))).filter((candidateUrl) => !!candidateUrl));
|
|
106
|
-
candidateUrls.add(candidateSourceUrl);
|
|
107
|
-
let winningUrl = candidateSourceUrl;
|
|
108
|
-
for (const candidateUrl of candidateUrls) {
|
|
109
|
-
if (existsFn) {
|
|
110
|
-
const data = await existsFn(candidateUrl);
|
|
111
|
-
if (data !== void 0) {
|
|
112
|
-
onExists?.({
|
|
113
|
-
url: candidateUrl,
|
|
114
|
-
data
|
|
115
|
-
});
|
|
116
|
-
return candidateUrl;
|
|
117
|
-
}
|
|
118
|
-
}
|
|
119
|
-
if (candidateUrl === candidateSourceUrl) continue;
|
|
120
|
-
if (candidateUrl === initialResponseUrl) {
|
|
121
|
-
winningUrl = initialResponseUrl;
|
|
122
|
-
break;
|
|
123
|
-
}
|
|
124
|
-
const candidateResponse = await fetchAndCompare(candidateUrl);
|
|
125
|
-
if (candidateResponse) {
|
|
126
|
-
let candidateResponseUrl = resolveAndApplyRewrites(candidateResponse.url);
|
|
127
|
-
if (candidateResponseUrl) candidateResponseUrl = stripParams(candidateResponseUrl);
|
|
128
|
-
if (candidateResponseUrl === candidateSourceUrl || candidateResponseUrl === initialResponseUrl) continue;
|
|
129
|
-
onMatch?.({
|
|
130
|
-
url: candidateUrl,
|
|
131
|
-
response: candidateResponse,
|
|
132
|
-
feed: initialResponseFeed
|
|
133
|
-
});
|
|
134
|
-
winningUrl = candidateUrl;
|
|
135
|
-
break;
|
|
136
|
-
}
|
|
137
|
-
}
|
|
138
|
-
if (winningUrl.startsWith("http://")) {
|
|
139
|
-
const httpsUrl = winningUrl.replace("http://", "https://");
|
|
140
|
-
const response = await fetchAndCompare(httpsUrl);
|
|
141
|
-
if (response) {
|
|
142
|
-
onMatch?.({
|
|
143
|
-
url: httpsUrl,
|
|
144
|
-
response,
|
|
145
|
-
feed: initialResponseFeed
|
|
146
|
-
});
|
|
147
|
-
return httpsUrl;
|
|
148
|
-
}
|
|
149
|
-
}
|
|
150
|
-
return winningUrl;
|
|
151
|
-
}
|
|
152
|
-
//#endregion
|
|
153
|
-
exports.findCanonical = findCanonical;
|
package/dist/index.d.cts
DELETED
|
@@ -1,9 +0,0 @@
|
|
|
1
|
-
import { DefaultParserResult, FetchFnResponse, FindCanonicalOptions, ParserAdapter } from "./types.cjs";
|
|
2
|
-
|
|
3
|
-
//#region src/index.d.ts
|
|
4
|
-
declare function findCanonical<TResponse extends FetchFnResponse = FetchFnResponse, TExisting = unknown>(inputUrl: string, options?: Omit<FindCanonicalOptions<DefaultParserResult, TResponse, TExisting>, 'parser'>): Promise<string | undefined>;
|
|
5
|
-
declare function findCanonical<TFeed, TResponse extends FetchFnResponse = FetchFnResponse, TExisting = unknown>(inputUrl: string, options: FindCanonicalOptions<TFeed, TResponse, TExisting> & {
|
|
6
|
-
parser: ParserAdapter<TFeed>;
|
|
7
|
-
}): Promise<string | undefined>;
|
|
8
|
-
//#endregion
|
|
9
|
-
export { findCanonical };
|
|
@@ -1,51 +0,0 @@
|
|
|
1
|
-
//#region src/probes/wordpress.ts
|
|
2
|
-
const commentsFeedPathRegex = /\/comments\/feed(\/|$)/;
|
|
3
|
-
const feedPathRegex = /\/feed(\/|$)/;
|
|
4
|
-
const trailingSlashRegex = /\/$/;
|
|
5
|
-
const optionalTrailingSlashRegex = /\/?$/;
|
|
6
|
-
const feedTypes = [
|
|
7
|
-
"atom",
|
|
8
|
-
"rss2",
|
|
9
|
-
"rss",
|
|
10
|
-
"rdf"
|
|
11
|
-
];
|
|
12
|
-
const wordpressProbe = {
|
|
13
|
-
match: (url) => {
|
|
14
|
-
const feed = url.searchParams.get("feed")?.toLowerCase();
|
|
15
|
-
if (!feed) return false;
|
|
16
|
-
const type = feed.startsWith("comments-") ? feed.slice(9) : feed;
|
|
17
|
-
return feedTypes.includes(type);
|
|
18
|
-
},
|
|
19
|
-
getCandidates: (url) => {
|
|
20
|
-
const feed = url.searchParams.get("feed")?.toLowerCase();
|
|
21
|
-
if (!feed) return [];
|
|
22
|
-
const candidates = [];
|
|
23
|
-
const isComment = feed.startsWith("comments-");
|
|
24
|
-
const type = isComment ? feed.slice(9) : feed;
|
|
25
|
-
if ((isComment ? commentsFeedPathRegex : feedPathRegex).test(url.pathname)) {
|
|
26
|
-
const withoutSlash = new URL(url);
|
|
27
|
-
withoutSlash.pathname = url.pathname.replace(trailingSlashRegex, "");
|
|
28
|
-
withoutSlash.searchParams.delete("feed");
|
|
29
|
-
candidates.push(withoutSlash.href);
|
|
30
|
-
const withSlash = new URL(url);
|
|
31
|
-
withSlash.pathname = url.pathname.replace(optionalTrailingSlashRegex, "/");
|
|
32
|
-
withSlash.searchParams.delete("feed");
|
|
33
|
-
candidates.push(withSlash.href);
|
|
34
|
-
return candidates;
|
|
35
|
-
}
|
|
36
|
-
const basePath = url.pathname.replace(trailingSlashRegex, "");
|
|
37
|
-
const feedSegment = type === "atom" ? "/feed/atom" : "/feed";
|
|
38
|
-
const feedPath = isComment ? `/comments${feedSegment}` : feedSegment;
|
|
39
|
-
const primary = new URL(url);
|
|
40
|
-
primary.pathname = basePath + feedPath;
|
|
41
|
-
primary.searchParams.delete("feed");
|
|
42
|
-
candidates.push(primary.href);
|
|
43
|
-
const withSlash = new URL(url);
|
|
44
|
-
withSlash.pathname = `${basePath}${feedPath}/`;
|
|
45
|
-
withSlash.searchParams.delete("feed");
|
|
46
|
-
candidates.push(withSlash.href);
|
|
47
|
-
return candidates;
|
|
48
|
-
}
|
|
49
|
-
};
|
|
50
|
-
//#endregion
|
|
51
|
-
exports.wordpressProbe = wordpressProbe;
|
|
@@ -1,47 +0,0 @@
|
|
|
1
|
-
const require_utils = require("../utils.cjs");
|
|
2
|
-
//#region src/rewrites/blogger.ts
|
|
3
|
-
const bloggerRegex = /^(www\.|beta\.)?blogger\.com$/;
|
|
4
|
-
const blogspotRegex = /\.blogspot\.[a-z]{2,3}(\.[a-z]{2})?$/i;
|
|
5
|
-
const bloggerRewrite = {
|
|
6
|
-
match: (url) => {
|
|
7
|
-
return bloggerRegex.test(url.hostname) || blogspotRegex.test(url.hostname);
|
|
8
|
-
},
|
|
9
|
-
rewrite: (url) => {
|
|
10
|
-
const rewritten = new URL(url);
|
|
11
|
-
const isBlogger = bloggerRegex.test(rewritten.hostname);
|
|
12
|
-
const isBlogspot = blogspotRegex.test(rewritten.hostname);
|
|
13
|
-
rewritten.protocol = "https:";
|
|
14
|
-
if (isBlogger) rewritten.hostname = "www.blogger.com";
|
|
15
|
-
if (isBlogspot) {
|
|
16
|
-
rewritten.hostname = rewritten.hostname.replace(blogspotRegex, ".blogspot.com");
|
|
17
|
-
if (rewritten.pathname === "/atom.xml") rewritten.pathname = "/feeds/posts/default";
|
|
18
|
-
else if (rewritten.pathname === "/rss.xml") {
|
|
19
|
-
rewritten.pathname = "/feeds/posts/default";
|
|
20
|
-
rewritten.searchParams.set("alt", "rss");
|
|
21
|
-
}
|
|
22
|
-
}
|
|
23
|
-
rewritten.searchParams.delete("redirect");
|
|
24
|
-
const alt = rewritten.searchParams.get("alt");
|
|
25
|
-
if (alt === "atom" || alt === "json" || alt === "") rewritten.searchParams.delete("alt");
|
|
26
|
-
rewritten.searchParams.delete("v");
|
|
27
|
-
rewritten.searchParams.delete("max-results");
|
|
28
|
-
rewritten.searchParams.delete("start-index");
|
|
29
|
-
rewritten.searchParams.delete("published-min");
|
|
30
|
-
rewritten.searchParams.delete("published-max");
|
|
31
|
-
rewritten.searchParams.delete("updated-min");
|
|
32
|
-
rewritten.searchParams.delete("updated-max");
|
|
33
|
-
rewritten.searchParams.delete("orderby");
|
|
34
|
-
const normalized = require_utils.normalizeUrl(rewritten.href, {
|
|
35
|
-
stripTrailingSlash: true,
|
|
36
|
-
collapseSlashes: true,
|
|
37
|
-
stripHash: true,
|
|
38
|
-
normalizeEncoding: true,
|
|
39
|
-
normalizeUnicode: true,
|
|
40
|
-
stripEmptyQuery: true,
|
|
41
|
-
sortQueryParams: true
|
|
42
|
-
});
|
|
43
|
-
return new URL(normalized);
|
|
44
|
-
}
|
|
45
|
-
};
|
|
46
|
-
//#endregion
|
|
47
|
-
exports.bloggerRewrite = bloggerRewrite;
|
|
@@ -1,27 +0,0 @@
|
|
|
1
|
-
const require_utils = require("../utils.cjs");
|
|
2
|
-
//#region src/rewrites/feedburner.ts
|
|
3
|
-
const hosts = [
|
|
4
|
-
"feeds.feedburner.com",
|
|
5
|
-
"feeds2.feedburner.com",
|
|
6
|
-
"feedproxy.google.com"
|
|
7
|
-
];
|
|
8
|
-
const feedburnerRewrite = {
|
|
9
|
-
match: (url) => {
|
|
10
|
-
return hosts.includes(url.hostname);
|
|
11
|
-
},
|
|
12
|
-
rewrite: (url) => {
|
|
13
|
-
const rewritten = new URL(url);
|
|
14
|
-
rewritten.hostname = "feeds.feedburner.com";
|
|
15
|
-
rewritten.search = "";
|
|
16
|
-
const normalized = require_utils.normalizeUrl(rewritten.href, {
|
|
17
|
-
stripTrailingSlash: true,
|
|
18
|
-
collapseSlashes: true,
|
|
19
|
-
stripHash: true,
|
|
20
|
-
normalizeEncoding: true,
|
|
21
|
-
normalizeUnicode: true
|
|
22
|
-
});
|
|
23
|
-
return new URL(normalized);
|
|
24
|
-
}
|
|
25
|
-
};
|
|
26
|
-
//#endregion
|
|
27
|
-
exports.feedburnerRewrite = feedburnerRewrite;
|
package/dist/types.d.cts
DELETED
|
@@ -1,74 +0,0 @@
|
|
|
1
|
-
import * as _$feedsmith from "feedsmith";
|
|
2
|
-
|
|
3
|
-
//#region src/types.d.ts
|
|
4
|
-
type MaybePromise<T> = T | Promise<T>;
|
|
5
|
-
type DefaultParserResult = ReturnType<typeof _$feedsmith.parseFeed>;
|
|
6
|
-
type ParserAdapter<T> = {
|
|
7
|
-
parse: (body: string) => MaybePromise<T | undefined>;
|
|
8
|
-
getSelfUrl: (parsed: T) => string | undefined;
|
|
9
|
-
getSignature: (parsed: T, url: string) => string;
|
|
10
|
-
};
|
|
11
|
-
type Rewrite = {
|
|
12
|
-
match: (url: URL) => boolean;
|
|
13
|
-
rewrite: (url: URL) => URL;
|
|
14
|
-
};
|
|
15
|
-
type Probe = {
|
|
16
|
-
match: (url: URL) => boolean;
|
|
17
|
-
getCandidates: (url: URL) => Array<string>;
|
|
18
|
-
};
|
|
19
|
-
type NormalizeOptions = {
|
|
20
|
-
stripProtocol?: boolean;
|
|
21
|
-
stripAuthentication?: boolean;
|
|
22
|
-
stripWww?: boolean;
|
|
23
|
-
stripTrailingSlash?: boolean;
|
|
24
|
-
stripRootSlash?: boolean;
|
|
25
|
-
collapseSlashes?: boolean;
|
|
26
|
-
stripHash?: boolean;
|
|
27
|
-
sortQueryParams?: boolean;
|
|
28
|
-
stripQueryParams?: Array<string>;
|
|
29
|
-
stripQuery?: boolean;
|
|
30
|
-
stripEmptyQuery?: boolean;
|
|
31
|
-
lowercaseQuery?: boolean;
|
|
32
|
-
normalizeEncoding?: boolean;
|
|
33
|
-
normalizeUnicode?: boolean;
|
|
34
|
-
};
|
|
35
|
-
type Tier = Omit<NormalizeOptions, 'stripQueryParams'>;
|
|
36
|
-
type OnFetchFn<TResponse extends FetchFnResponse = FetchFnResponse> = (data: {
|
|
37
|
-
url: string;
|
|
38
|
-
response: TResponse;
|
|
39
|
-
}) => void;
|
|
40
|
-
type OnMatchFn<TFeed = unknown, TResponse extends FetchFnResponse = FetchFnResponse> = (data: {
|
|
41
|
-
url: string;
|
|
42
|
-
response: TResponse;
|
|
43
|
-
feed: TFeed;
|
|
44
|
-
}) => void;
|
|
45
|
-
type OnExistsFn<T> = (data: {
|
|
46
|
-
url: string;
|
|
47
|
-
data: T;
|
|
48
|
-
}) => void;
|
|
49
|
-
type FindCanonicalOptions<TFeed = DefaultParserResult, TResponse extends FetchFnResponse = FetchFnResponse, TExisting = unknown> = {
|
|
50
|
-
parser?: ParserAdapter<TFeed>;
|
|
51
|
-
fetchFn?: FetchFn<TResponse>;
|
|
52
|
-
existsFn?: ExistsFn<TExisting>;
|
|
53
|
-
rewrites?: Array<Rewrite>;
|
|
54
|
-
probes?: Array<Probe>;
|
|
55
|
-
tiers?: Array<Tier>;
|
|
56
|
-
stripQueryParams?: Array<string>;
|
|
57
|
-
onFetch?: OnFetchFn<TResponse>;
|
|
58
|
-
onMatch?: OnMatchFn<TFeed, TResponse>;
|
|
59
|
-
onExists?: OnExistsFn<TExisting>;
|
|
60
|
-
};
|
|
61
|
-
type FetchFnOptions = {
|
|
62
|
-
method?: 'GET' | 'HEAD';
|
|
63
|
-
headers?: Record<string, string>;
|
|
64
|
-
};
|
|
65
|
-
type ExistsFn<T = unknown> = (url: string) => MaybePromise<T | undefined>;
|
|
66
|
-
type FetchFnResponse = {
|
|
67
|
-
headers: Headers;
|
|
68
|
-
body: string;
|
|
69
|
-
url: string;
|
|
70
|
-
status: number;
|
|
71
|
-
};
|
|
72
|
-
type FetchFn<TResponse extends FetchFnResponse = FetchFnResponse> = (url: string, options?: FetchFnOptions) => MaybePromise<TResponse>;
|
|
73
|
-
//#endregion
|
|
74
|
-
export { DefaultParserResult, ExistsFn, FetchFn, FetchFnOptions, FetchFnResponse, FindCanonicalOptions, NormalizeOptions, OnExistsFn, OnFetchFn, OnMatchFn, ParserAdapter, Probe, Rewrite, Tier };
|
package/dist/utils.cjs
DELETED
|
@@ -1,208 +0,0 @@
|
|
|
1
|
-
const require_defaults = require("./defaults.cjs");
|
|
2
|
-
let entities = require("entities");
|
|
3
|
-
//#region src/utils.ts
|
|
4
|
-
const strippedParamsCache = /* @__PURE__ */ new WeakMap();
|
|
5
|
-
const getStrippedParamsSet = (params) => {
|
|
6
|
-
let cached = strippedParamsCache.get(params);
|
|
7
|
-
if (!cached) {
|
|
8
|
-
cached = new Set(params.map((param) => param.toLowerCase()));
|
|
9
|
-
strippedParamsCache.set(params, cached);
|
|
10
|
-
}
|
|
11
|
-
return cached;
|
|
12
|
-
};
|
|
13
|
-
const ipv4Regex = /^\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}$/;
|
|
14
|
-
const ipv6Regex = /^([0-9a-f]{0,4}:){2,7}[0-9a-f]{0,4}$/i;
|
|
15
|
-
const safePathCharsRegex = /[a-zA-Z0-9._~!$&'()*+,;=:@-]/;
|
|
16
|
-
const httpsLetterRegex = /s/i;
|
|
17
|
-
const protocolPrefixRegex = /^https?:\/\//;
|
|
18
|
-
const wwwPrefixRegex = /^www\./;
|
|
19
|
-
const validUrlRegex = /^https?:\/\/(?:www\.|[a-vx-z0-9])/i;
|
|
20
|
-
const doubledProtocolRegex = /^\/?[htps]{2,7}[:\s=.\\/]+([htps]{2,7})[:\s=.\\/]+[.,:/]*(www[./]+)?/i;
|
|
21
|
-
const singleMalformedRegex = /^\/?(?:h[htps():]{1,10}|t{1,2}ps?)[:\s=.\\/]+[.,:/]*(www[./]+)?/i;
|
|
22
|
-
const fixMalformedProtocol = (url) => {
|
|
23
|
-
if (validUrlRegex.test(url) && !doubledProtocolRegex.test(url)) return url;
|
|
24
|
-
const doubledMatch = doubledProtocolRegex.exec(url);
|
|
25
|
-
if (doubledMatch) {
|
|
26
|
-
const inner = doubledMatch[1];
|
|
27
|
-
const www = doubledMatch[2];
|
|
28
|
-
const rest = url.slice(doubledMatch[0].length);
|
|
29
|
-
return (httpsLetterRegex.test(inner) ? "https://" : "http://") + (www ? "www." : "") + rest;
|
|
30
|
-
}
|
|
31
|
-
const singleMatch = singleMalformedRegex.exec(url);
|
|
32
|
-
if (singleMatch) {
|
|
33
|
-
const fullMatch = singleMatch[0];
|
|
34
|
-
const www = singleMatch[1];
|
|
35
|
-
const rest = url.slice(fullMatch.length);
|
|
36
|
-
return (httpsLetterRegex.test(fullMatch) ? "https://" : "http://") + (www ? "www." : "") + rest;
|
|
37
|
-
}
|
|
38
|
-
return url;
|
|
39
|
-
};
|
|
40
|
-
const feedProtocols = [
|
|
41
|
-
"feed:",
|
|
42
|
-
"rss:",
|
|
43
|
-
"podcast:",
|
|
44
|
-
"pcast:",
|
|
45
|
-
"itpc:"
|
|
46
|
-
];
|
|
47
|
-
const resolveFeedProtocol = (url, protocol = "https") => {
|
|
48
|
-
const urlLower = url.toLowerCase();
|
|
49
|
-
for (const scheme of feedProtocols) {
|
|
50
|
-
if (!urlLower.startsWith(scheme)) continue;
|
|
51
|
-
if (urlLower.startsWith(`${scheme}http://`) || urlLower.startsWith(`${scheme}https://`)) return url.slice(scheme.length);
|
|
52
|
-
if (urlLower.startsWith(`${scheme}//`)) return `${protocol}:${url.slice(scheme.length)}`;
|
|
53
|
-
}
|
|
54
|
-
return url;
|
|
55
|
-
};
|
|
56
|
-
const addMissingProtocol = (url, protocol = "https") => {
|
|
57
|
-
const colonIndex = url.indexOf(":");
|
|
58
|
-
if (colonIndex > 0) {
|
|
59
|
-
const beforeColon = url.slice(0, colonIndex);
|
|
60
|
-
if (!beforeColon.includes(".") && !beforeColon.includes("/") && beforeColon !== "localhost") return url;
|
|
61
|
-
}
|
|
62
|
-
if (url.startsWith("//") && !url.startsWith("///")) try {
|
|
63
|
-
const parsed = new URL(`${protocol}:${url}`);
|
|
64
|
-
const hostname = parsed.hostname;
|
|
65
|
-
if (hostname.includes(".") || hostname === "localhost" || ipv4Regex.test(hostname) || ipv6Regex.test(hostname.replace(/^\[|\]$/g, ""))) return parsed.href;
|
|
66
|
-
return url;
|
|
67
|
-
} catch {
|
|
68
|
-
return url;
|
|
69
|
-
}
|
|
70
|
-
if (url.startsWith("/") || url.startsWith(".")) return url;
|
|
71
|
-
const slashIndex = url.indexOf("/");
|
|
72
|
-
const dotIndex = url.indexOf(".");
|
|
73
|
-
if (dotIndex === -1 || slashIndex !== -1 && dotIndex > slashIndex) {
|
|
74
|
-
if (!url.startsWith("localhost")) return url;
|
|
75
|
-
}
|
|
76
|
-
const firstChar = url.charAt(0);
|
|
77
|
-
if (firstChar === " " || firstChar === " " || firstChar === "\n") return url;
|
|
78
|
-
return `${protocol}://${url}`;
|
|
79
|
-
};
|
|
80
|
-
const resolveUrl = (url, base) => {
|
|
81
|
-
if (url.startsWith("#") && !base) return;
|
|
82
|
-
let resolvedUrl;
|
|
83
|
-
resolvedUrl = url.includes("&") ? (0, entities.decodeHTML)(url) : url;
|
|
84
|
-
resolvedUrl = resolveFeedProtocol(resolvedUrl);
|
|
85
|
-
resolvedUrl = fixMalformedProtocol(resolvedUrl);
|
|
86
|
-
if (base) try {
|
|
87
|
-
resolvedUrl = new URL(resolvedUrl, base).href;
|
|
88
|
-
} catch {
|
|
89
|
-
return;
|
|
90
|
-
}
|
|
91
|
-
resolvedUrl = addMissingProtocol(resolvedUrl);
|
|
92
|
-
try {
|
|
93
|
-
const parsed = new URL(resolvedUrl);
|
|
94
|
-
if (parsed.protocol !== "http:" && parsed.protocol !== "https:") return;
|
|
95
|
-
return parsed.href;
|
|
96
|
-
} catch {
|
|
97
|
-
return;
|
|
98
|
-
}
|
|
99
|
-
};
|
|
100
|
-
const decodeAndNormalizeEncoding = (value) => {
|
|
101
|
-
if (!value.includes("%")) return value;
|
|
102
|
-
return value.replace(/%([0-9A-Fa-f]{2})/g, (_match, hex) => {
|
|
103
|
-
const charCode = Number.parseInt(hex, 16);
|
|
104
|
-
const char = String.fromCharCode(charCode);
|
|
105
|
-
if (safePathCharsRegex.test(char)) return char;
|
|
106
|
-
return `%${hex.toUpperCase()}`;
|
|
107
|
-
});
|
|
108
|
-
};
|
|
109
|
-
const normalizeUrl = (url, options = require_defaults.defaultNormalizeOptions) => {
|
|
110
|
-
try {
|
|
111
|
-
const parsed = new URL(url);
|
|
112
|
-
if (options.normalizeUnicode) {
|
|
113
|
-
parsed.hostname = parsed.hostname.normalize("NFC");
|
|
114
|
-
parsed.pathname = parsed.pathname.normalize("NFC");
|
|
115
|
-
}
|
|
116
|
-
if (options.stripAuthentication) {
|
|
117
|
-
parsed.username = "";
|
|
118
|
-
parsed.password = "";
|
|
119
|
-
}
|
|
120
|
-
if (options.stripWww && parsed.hostname.startsWith("www.")) parsed.hostname = parsed.hostname.slice(4);
|
|
121
|
-
if (options.stripHash) parsed.hash = "";
|
|
122
|
-
let pathname = parsed.pathname;
|
|
123
|
-
if (options.normalizeEncoding) pathname = decodeAndNormalizeEncoding(pathname);
|
|
124
|
-
if (options.collapseSlashes) pathname = pathname.replace(/\/+/g, "/");
|
|
125
|
-
if (options.stripTrailingSlash && pathname.length > 1 && pathname.endsWith("/")) pathname = pathname.slice(0, -1);
|
|
126
|
-
if (options.stripRootSlash && pathname === "/") pathname = "";
|
|
127
|
-
parsed.pathname = pathname;
|
|
128
|
-
if (options.stripQuery) parsed.search = "";
|
|
129
|
-
if (options.stripQueryParams && parsed.search) {
|
|
130
|
-
const strippedSet = getStrippedParamsSet(options.stripQueryParams);
|
|
131
|
-
const paramsToDelete = [];
|
|
132
|
-
for (const [key] of parsed.searchParams) if (strippedSet.has(key.toLowerCase())) paramsToDelete.push(key);
|
|
133
|
-
for (const param of paramsToDelete) parsed.searchParams.delete(param);
|
|
134
|
-
}
|
|
135
|
-
if (options.lowercaseQuery && parsed.search) {
|
|
136
|
-
const entries = [...parsed.searchParams.entries()];
|
|
137
|
-
parsed.search = "";
|
|
138
|
-
for (const [key, value] of entries) parsed.searchParams.append(key.toLowerCase(), value.toLowerCase());
|
|
139
|
-
}
|
|
140
|
-
if (options.sortQueryParams && parsed.search) parsed.searchParams.sort();
|
|
141
|
-
if (options.stripEmptyQuery && parsed.href.endsWith("?")) parsed.search = "";
|
|
142
|
-
let result = parsed.href;
|
|
143
|
-
if (options.stripRootSlash && result === `${parsed.origin}/`) result = parsed.origin;
|
|
144
|
-
if (options.stripProtocol) result = result.replace(protocolPrefixRegex, "");
|
|
145
|
-
return result;
|
|
146
|
-
} catch {
|
|
147
|
-
return url;
|
|
148
|
-
}
|
|
149
|
-
};
|
|
150
|
-
const applyRewrites = (url, rewrites) => {
|
|
151
|
-
try {
|
|
152
|
-
let parsed = new URL(url);
|
|
153
|
-
for (const rewrite of rewrites) if (rewrite.match(parsed)) {
|
|
154
|
-
parsed = rewrite.rewrite(parsed);
|
|
155
|
-
break;
|
|
156
|
-
}
|
|
157
|
-
return parsed.href;
|
|
158
|
-
} catch {
|
|
159
|
-
return url;
|
|
160
|
-
}
|
|
161
|
-
};
|
|
162
|
-
const applyProbes = async (url, probes, testCandidate) => {
|
|
163
|
-
try {
|
|
164
|
-
const parsed = new URL(url);
|
|
165
|
-
for (const probe of probes) {
|
|
166
|
-
if (!probe.match(parsed)) continue;
|
|
167
|
-
for (const candidate of probe.getCandidates(parsed)) {
|
|
168
|
-
const result = await testCandidate(candidate);
|
|
169
|
-
if (result) return result;
|
|
170
|
-
}
|
|
171
|
-
break;
|
|
172
|
-
}
|
|
173
|
-
return url;
|
|
174
|
-
} catch {
|
|
175
|
-
return url;
|
|
176
|
-
}
|
|
177
|
-
};
|
|
178
|
-
const createSignature = (object, fields) => {
|
|
179
|
-
const saved = fields.map((key) => [key, object[key]]);
|
|
180
|
-
for (const key of fields) object[key] = void 0;
|
|
181
|
-
const signature = JSON.stringify(object);
|
|
182
|
-
for (const [key, val] of saved) object[key] = val;
|
|
183
|
-
return signature;
|
|
184
|
-
};
|
|
185
|
-
const trailingSlashRegex = /("(?:https?:\/\/|\/)[^"]+)\/([?"])/g;
|
|
186
|
-
const neutralizeUrls = (text, urls) => {
|
|
187
|
-
const escapeHost = (url) => {
|
|
188
|
-
try {
|
|
189
|
-
return new URL("/", url).host.replace(wwwPrefixRegex, "").replaceAll(".", "\\.");
|
|
190
|
-
} catch {
|
|
191
|
-
return;
|
|
192
|
-
}
|
|
193
|
-
};
|
|
194
|
-
const hosts = urls.map(escapeHost).filter(Boolean);
|
|
195
|
-
if (hosts.length === 0) return text;
|
|
196
|
-
const hostPattern = hosts.length === 1 ? hosts[0] : `(?:${hosts.join("|")})`;
|
|
197
|
-
return text.replace(new RegExp(`https?://(?:www\\.)?${hostPattern}(?=[/"]|\\\\")(/)?`, "g"), "/").replace(trailingSlashRegex, "$1$2");
|
|
198
|
-
};
|
|
199
|
-
//#endregion
|
|
200
|
-
exports.addMissingProtocol = addMissingProtocol;
|
|
201
|
-
exports.applyProbes = applyProbes;
|
|
202
|
-
exports.applyRewrites = applyRewrites;
|
|
203
|
-
exports.createSignature = createSignature;
|
|
204
|
-
exports.fixMalformedProtocol = fixMalformedProtocol;
|
|
205
|
-
exports.neutralizeUrls = neutralizeUrls;
|
|
206
|
-
exports.normalizeUrl = normalizeUrl;
|
|
207
|
-
exports.resolveFeedProtocol = resolveFeedProtocol;
|
|
208
|
-
exports.resolveUrl = resolveUrl;
|
package/dist/utils.d.cts
DELETED
|
@@ -1,10 +0,0 @@
|
|
|
1
|
-
import { NormalizeOptions } from "./types.cjs";
|
|
2
|
-
|
|
3
|
-
//#region src/utils.d.ts
|
|
4
|
-
declare const fixMalformedProtocol: (url: string) => string;
|
|
5
|
-
declare const resolveFeedProtocol: (url: string, protocol?: "http" | "https") => string;
|
|
6
|
-
declare const addMissingProtocol: (url: string, protocol?: "http" | "https") => string;
|
|
7
|
-
declare const resolveUrl: (url: string, base?: string) => string | undefined;
|
|
8
|
-
declare const normalizeUrl: (url: string, options?: NormalizeOptions) => string;
|
|
9
|
-
//#endregion
|
|
10
|
-
export { addMissingProtocol, fixMalformedProtocol, normalizeUrl, resolveFeedProtocol, resolveUrl };
|