feedcanon 0.9.2 → 1.0.0-beta.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/defaults.cjs +41 -10
- package/dist/defaults.d.cts +2 -2
- package/dist/defaults.d.ts +2 -2
- package/dist/defaults.js +41 -10
- package/dist/index.cjs +39 -26
- package/dist/index.d.cts +5 -2
- package/dist/index.d.ts +5 -2
- package/dist/index.js +40 -27
- package/dist/types.d.cts +15 -14
- package/dist/types.d.ts +15 -14
- package/dist/utils.cjs +3 -3
- package/dist/utils.js +3 -3
- package/package.json +1 -1
package/dist/defaults.cjs
CHANGED
|
@@ -18,6 +18,8 @@ const defaultStrippedParams = [
|
|
|
18
18
|
"wbraid",
|
|
19
19
|
"gclsrc",
|
|
20
20
|
"gad_source",
|
|
21
|
+
"gad_campaignid",
|
|
22
|
+
"srsltid",
|
|
21
23
|
"fbclid",
|
|
22
24
|
"fb_action_ids",
|
|
23
25
|
"fb_action_types",
|
|
@@ -60,12 +62,43 @@ const defaultStrippedParams = [
|
|
|
60
62
|
"hsa_src",
|
|
61
63
|
"hsa_tgt",
|
|
62
64
|
"hsa_ver",
|
|
65
|
+
"hsCtaTracking",
|
|
66
|
+
"__hstc",
|
|
67
|
+
"__hsfp",
|
|
68
|
+
"__hssc",
|
|
63
69
|
"cid",
|
|
64
70
|
"s_kwcid",
|
|
65
71
|
"ef_id",
|
|
66
72
|
"obOrigUrl",
|
|
67
73
|
"dicbo",
|
|
68
74
|
"yclid",
|
|
75
|
+
"awinaffid",
|
|
76
|
+
"awinmid",
|
|
77
|
+
"clickref",
|
|
78
|
+
"afftrack",
|
|
79
|
+
"itm_source",
|
|
80
|
+
"itm_medium",
|
|
81
|
+
"itm_campaign",
|
|
82
|
+
"itm_content",
|
|
83
|
+
"itm_channel",
|
|
84
|
+
"itm_audience",
|
|
85
|
+
"int_source",
|
|
86
|
+
"int_medium",
|
|
87
|
+
"int_campaign",
|
|
88
|
+
"int_content",
|
|
89
|
+
"int_placement",
|
|
90
|
+
"int_campaign_type",
|
|
91
|
+
"int_keycode",
|
|
92
|
+
"g2i_source",
|
|
93
|
+
"g2i_medium",
|
|
94
|
+
"g2i_campaign",
|
|
95
|
+
"g2i_or_o",
|
|
96
|
+
"g2i_or_p",
|
|
97
|
+
"doing_wp_cron",
|
|
98
|
+
"preview",
|
|
99
|
+
"preview_id",
|
|
100
|
+
"preview_nonce",
|
|
101
|
+
"replytocom",
|
|
69
102
|
"_",
|
|
70
103
|
"timestamp",
|
|
71
104
|
"ts",
|
|
@@ -74,6 +107,14 @@ const defaultStrippedParams = [
|
|
|
74
107
|
"nocache",
|
|
75
108
|
"rand",
|
|
76
109
|
"random",
|
|
110
|
+
"forceByPassCache",
|
|
111
|
+
"sucurianticache",
|
|
112
|
+
"cleancache",
|
|
113
|
+
"rebuildcache",
|
|
114
|
+
"kontrol_health_check_timestamp",
|
|
115
|
+
"_x_tr_sl",
|
|
116
|
+
"_x_tr_tl",
|
|
117
|
+
"_x_tr_hl",
|
|
77
118
|
"action_object_map",
|
|
78
119
|
"action_ref_map",
|
|
79
120
|
"action_type_map",
|
|
@@ -93,9 +134,6 @@ const defaultStrippedParams = [
|
|
|
93
134
|
"hmb_campaign",
|
|
94
135
|
"hmb_medium",
|
|
95
136
|
"hmb_source",
|
|
96
|
-
"itm_campaign",
|
|
97
|
-
"itm_medium",
|
|
98
|
-
"itm_source",
|
|
99
137
|
"ml_subscriber",
|
|
100
138
|
"ml_subscriber_hash",
|
|
101
139
|
"oly_anon_id",
|
|
@@ -118,7 +156,6 @@ const defaultNormalizeOptions = {
|
|
|
118
156
|
stripRootSlash: true,
|
|
119
157
|
collapseSlashes: true,
|
|
120
158
|
stripHash: true,
|
|
121
|
-
stripTextFragment: true,
|
|
122
159
|
sortQueryParams: true,
|
|
123
160
|
stripQueryParams: defaultStrippedParams,
|
|
124
161
|
stripEmptyQuery: true,
|
|
@@ -136,9 +173,7 @@ const defaultTiers = [
|
|
|
136
173
|
stripRootSlash: true,
|
|
137
174
|
collapseSlashes: true,
|
|
138
175
|
stripHash: true,
|
|
139
|
-
stripTextFragment: true,
|
|
140
176
|
sortQueryParams: true,
|
|
141
|
-
stripQueryParams: defaultStrippedParams,
|
|
142
177
|
stripEmptyQuery: true,
|
|
143
178
|
normalizeEncoding: true,
|
|
144
179
|
lowercaseHostname: true,
|
|
@@ -153,9 +188,7 @@ const defaultTiers = [
|
|
|
153
188
|
stripRootSlash: true,
|
|
154
189
|
collapseSlashes: true,
|
|
155
190
|
stripHash: true,
|
|
156
|
-
stripTextFragment: true,
|
|
157
191
|
sortQueryParams: true,
|
|
158
|
-
stripQueryParams: defaultStrippedParams,
|
|
159
192
|
stripEmptyQuery: true,
|
|
160
193
|
normalizeEncoding: true,
|
|
161
194
|
lowercaseHostname: true,
|
|
@@ -170,9 +203,7 @@ const defaultTiers = [
|
|
|
170
203
|
stripRootSlash: true,
|
|
171
204
|
collapseSlashes: true,
|
|
172
205
|
stripHash: true,
|
|
173
|
-
stripTextFragment: true,
|
|
174
206
|
sortQueryParams: true,
|
|
175
|
-
stripQueryParams: defaultStrippedParams,
|
|
176
207
|
stripEmptyQuery: true,
|
|
177
208
|
normalizeEncoding: true,
|
|
178
209
|
lowercaseHostname: true,
|
package/dist/defaults.d.cts
CHANGED
|
@@ -1,9 +1,9 @@
|
|
|
1
|
-
import { NormalizeOptions, PlatformHandler } from "./types.cjs";
|
|
1
|
+
import { NormalizeOptions, PlatformHandler, Tier } from "./types.cjs";
|
|
2
2
|
|
|
3
3
|
//#region src/defaults.d.ts
|
|
4
4
|
declare const defaultPlatforms: Array<PlatformHandler>;
|
|
5
5
|
declare const defaultStrippedParams: string[];
|
|
6
6
|
declare const defaultNormalizeOptions: NormalizeOptions;
|
|
7
|
-
declare const defaultTiers: Array<
|
|
7
|
+
declare const defaultTiers: Array<Tier>;
|
|
8
8
|
//#endregion
|
|
9
9
|
export { defaultNormalizeOptions, defaultPlatforms, defaultStrippedParams, defaultTiers };
|
package/dist/defaults.d.ts
CHANGED
|
@@ -1,9 +1,9 @@
|
|
|
1
|
-
import { NormalizeOptions, PlatformHandler } from "./types.js";
|
|
1
|
+
import { NormalizeOptions, PlatformHandler, Tier } from "./types.js";
|
|
2
2
|
|
|
3
3
|
//#region src/defaults.d.ts
|
|
4
4
|
declare const defaultPlatforms: Array<PlatformHandler>;
|
|
5
5
|
declare const defaultStrippedParams: string[];
|
|
6
6
|
declare const defaultNormalizeOptions: NormalizeOptions;
|
|
7
|
-
declare const defaultTiers: Array<
|
|
7
|
+
declare const defaultTiers: Array<Tier>;
|
|
8
8
|
//#endregion
|
|
9
9
|
export { defaultNormalizeOptions, defaultPlatforms, defaultStrippedParams, defaultTiers };
|
package/dist/defaults.js
CHANGED
|
@@ -18,6 +18,8 @@ const defaultStrippedParams = [
|
|
|
18
18
|
"wbraid",
|
|
19
19
|
"gclsrc",
|
|
20
20
|
"gad_source",
|
|
21
|
+
"gad_campaignid",
|
|
22
|
+
"srsltid",
|
|
21
23
|
"fbclid",
|
|
22
24
|
"fb_action_ids",
|
|
23
25
|
"fb_action_types",
|
|
@@ -60,12 +62,43 @@ const defaultStrippedParams = [
|
|
|
60
62
|
"hsa_src",
|
|
61
63
|
"hsa_tgt",
|
|
62
64
|
"hsa_ver",
|
|
65
|
+
"hsCtaTracking",
|
|
66
|
+
"__hstc",
|
|
67
|
+
"__hsfp",
|
|
68
|
+
"__hssc",
|
|
63
69
|
"cid",
|
|
64
70
|
"s_kwcid",
|
|
65
71
|
"ef_id",
|
|
66
72
|
"obOrigUrl",
|
|
67
73
|
"dicbo",
|
|
68
74
|
"yclid",
|
|
75
|
+
"awinaffid",
|
|
76
|
+
"awinmid",
|
|
77
|
+
"clickref",
|
|
78
|
+
"afftrack",
|
|
79
|
+
"itm_source",
|
|
80
|
+
"itm_medium",
|
|
81
|
+
"itm_campaign",
|
|
82
|
+
"itm_content",
|
|
83
|
+
"itm_channel",
|
|
84
|
+
"itm_audience",
|
|
85
|
+
"int_source",
|
|
86
|
+
"int_medium",
|
|
87
|
+
"int_campaign",
|
|
88
|
+
"int_content",
|
|
89
|
+
"int_placement",
|
|
90
|
+
"int_campaign_type",
|
|
91
|
+
"int_keycode",
|
|
92
|
+
"g2i_source",
|
|
93
|
+
"g2i_medium",
|
|
94
|
+
"g2i_campaign",
|
|
95
|
+
"g2i_or_o",
|
|
96
|
+
"g2i_or_p",
|
|
97
|
+
"doing_wp_cron",
|
|
98
|
+
"preview",
|
|
99
|
+
"preview_id",
|
|
100
|
+
"preview_nonce",
|
|
101
|
+
"replytocom",
|
|
69
102
|
"_",
|
|
70
103
|
"timestamp",
|
|
71
104
|
"ts",
|
|
@@ -74,6 +107,14 @@ const defaultStrippedParams = [
|
|
|
74
107
|
"nocache",
|
|
75
108
|
"rand",
|
|
76
109
|
"random",
|
|
110
|
+
"forceByPassCache",
|
|
111
|
+
"sucurianticache",
|
|
112
|
+
"cleancache",
|
|
113
|
+
"rebuildcache",
|
|
114
|
+
"kontrol_health_check_timestamp",
|
|
115
|
+
"_x_tr_sl",
|
|
116
|
+
"_x_tr_tl",
|
|
117
|
+
"_x_tr_hl",
|
|
77
118
|
"action_object_map",
|
|
78
119
|
"action_ref_map",
|
|
79
120
|
"action_type_map",
|
|
@@ -93,9 +134,6 @@ const defaultStrippedParams = [
|
|
|
93
134
|
"hmb_campaign",
|
|
94
135
|
"hmb_medium",
|
|
95
136
|
"hmb_source",
|
|
96
|
-
"itm_campaign",
|
|
97
|
-
"itm_medium",
|
|
98
|
-
"itm_source",
|
|
99
137
|
"ml_subscriber",
|
|
100
138
|
"ml_subscriber_hash",
|
|
101
139
|
"oly_anon_id",
|
|
@@ -118,7 +156,6 @@ const defaultNormalizeOptions = {
|
|
|
118
156
|
stripRootSlash: true,
|
|
119
157
|
collapseSlashes: true,
|
|
120
158
|
stripHash: true,
|
|
121
|
-
stripTextFragment: true,
|
|
122
159
|
sortQueryParams: true,
|
|
123
160
|
stripQueryParams: defaultStrippedParams,
|
|
124
161
|
stripEmptyQuery: true,
|
|
@@ -136,9 +173,7 @@ const defaultTiers = [
|
|
|
136
173
|
stripRootSlash: true,
|
|
137
174
|
collapseSlashes: true,
|
|
138
175
|
stripHash: true,
|
|
139
|
-
stripTextFragment: true,
|
|
140
176
|
sortQueryParams: true,
|
|
141
|
-
stripQueryParams: defaultStrippedParams,
|
|
142
177
|
stripEmptyQuery: true,
|
|
143
178
|
normalizeEncoding: true,
|
|
144
179
|
lowercaseHostname: true,
|
|
@@ -153,9 +188,7 @@ const defaultTiers = [
|
|
|
153
188
|
stripRootSlash: true,
|
|
154
189
|
collapseSlashes: true,
|
|
155
190
|
stripHash: true,
|
|
156
|
-
stripTextFragment: true,
|
|
157
191
|
sortQueryParams: true,
|
|
158
|
-
stripQueryParams: defaultStrippedParams,
|
|
159
192
|
stripEmptyQuery: true,
|
|
160
193
|
normalizeEncoding: true,
|
|
161
194
|
lowercaseHostname: true,
|
|
@@ -170,9 +203,7 @@ const defaultTiers = [
|
|
|
170
203
|
stripRootSlash: true,
|
|
171
204
|
collapseSlashes: true,
|
|
172
205
|
stripHash: true,
|
|
173
|
-
stripTextFragment: true,
|
|
174
206
|
sortQueryParams: true,
|
|
175
|
-
stripQueryParams: defaultStrippedParams,
|
|
176
207
|
stripEmptyQuery: true,
|
|
177
208
|
normalizeEncoding: true,
|
|
178
209
|
lowercaseHostname: true,
|
package/dist/index.cjs
CHANGED
|
@@ -2,8 +2,15 @@ const require_defaults = require('./defaults.cjs');
|
|
|
2
2
|
const require_utils = require('./utils.cjs');
|
|
3
3
|
|
|
4
4
|
//#region src/index.ts
|
|
5
|
-
|
|
6
|
-
const {
|
|
5
|
+
async function findCanonical(inputUrl, options) {
|
|
6
|
+
const { parser = require_utils.feedsmithParser, fetchFn = require_utils.nativeFetch, existsFn, tiers = require_defaults.defaultTiers, platforms = require_defaults.defaultPlatforms, stripQueryParams = require_defaults.defaultStrippedParams, onFetch, onMatch, onExists } = options ?? {};
|
|
7
|
+
const stripParams = (url) => {
|
|
8
|
+
return stripQueryParams?.length ? require_utils.normalizeUrl(url, {
|
|
9
|
+
stripQueryParams,
|
|
10
|
+
sortQueryParams: true,
|
|
11
|
+
stripEmptyQuery: true
|
|
12
|
+
}) : url;
|
|
13
|
+
};
|
|
7
14
|
const resolveAndApplyPlatformHandlers = (url, baseUrl) => {
|
|
8
15
|
const resolved = require_utils.resolveUrl(url, baseUrl);
|
|
9
16
|
return resolved ? require_utils.applyPlatformHandlers(resolved, platforms) : void 0;
|
|
@@ -21,8 +28,9 @@ const findCanonical = async (inputUrl, options) => {
|
|
|
21
28
|
response: initialResponse
|
|
22
29
|
});
|
|
23
30
|
if (initialResponse.status < 200 || initialResponse.status >= 300) return;
|
|
24
|
-
const
|
|
25
|
-
if (!
|
|
31
|
+
const initialResponseUrlRaw = resolveAndApplyPlatformHandlers(initialResponse.url);
|
|
32
|
+
if (!initialResponseUrlRaw) return;
|
|
33
|
+
const initialResponseUrl = stripParams(initialResponseUrlRaw);
|
|
26
34
|
const initialResponseBody = initialResponse.body;
|
|
27
35
|
if (!initialResponseBody) return;
|
|
28
36
|
let initialResponseSignature;
|
|
@@ -35,14 +43,17 @@ const findCanonical = async (inputUrl, options) => {
|
|
|
35
43
|
feed: initialResponseFeed
|
|
36
44
|
});
|
|
37
45
|
const selfRequestUrlRaw = parser.getSelfUrl(initialResponseFeed);
|
|
38
|
-
if (selfRequestUrlRaw)
|
|
46
|
+
if (selfRequestUrlRaw) {
|
|
47
|
+
selfRequestUrl = resolveAndApplyPlatformHandlers(selfRequestUrlRaw, initialResponseUrl);
|
|
48
|
+
selfRequestUrl = selfRequestUrl ? stripParams(selfRequestUrl) : void 0;
|
|
49
|
+
}
|
|
39
50
|
const compareWithInitialResponse = (comparedResponseBody) => {
|
|
40
51
|
if (!comparedResponseBody) return false;
|
|
41
52
|
if (initialResponseBody === comparedResponseBody) return true;
|
|
42
53
|
const comparedResponseFeed = parser.parse(comparedResponseBody);
|
|
43
54
|
if (comparedResponseFeed) {
|
|
44
|
-
initialResponseSignature ||=
|
|
45
|
-
const comparedResponseSignature =
|
|
55
|
+
initialResponseSignature ||= parser.getSignature(initialResponseFeed, parser.getSelfUrl(initialResponseFeed));
|
|
56
|
+
const comparedResponseSignature = parser.getSignature(comparedResponseFeed, parser.getSelfUrl(comparedResponseFeed));
|
|
46
57
|
return initialResponseSignature === comparedResponseSignature;
|
|
47
58
|
}
|
|
48
59
|
return false;
|
|
@@ -62,7 +73,7 @@ const findCanonical = async (inputUrl, options) => {
|
|
|
62
73
|
if (!compareWithInitialResponse(response.body)) return;
|
|
63
74
|
return response;
|
|
64
75
|
};
|
|
65
|
-
let
|
|
76
|
+
let variantSourceUrl = initialResponseUrl;
|
|
66
77
|
if (selfRequestUrl && selfRequestUrl !== initialResponseUrl) {
|
|
67
78
|
const urlsToTry = [selfRequestUrl];
|
|
68
79
|
if (selfRequestUrl.startsWith("https://")) urlsToTry.push(selfRequestUrl.replace("https://", "http://"));
|
|
@@ -75,40 +86,42 @@ const findCanonical = async (inputUrl, options) => {
|
|
|
75
86
|
response,
|
|
76
87
|
feed: initialResponseFeed
|
|
77
88
|
});
|
|
78
|
-
|
|
89
|
+
variantSourceUrl = resolveAndApplyPlatformHandlers(response.url) ?? initialResponseUrl;
|
|
90
|
+
variantSourceUrl = stripParams(variantSourceUrl);
|
|
79
91
|
break;
|
|
80
92
|
}
|
|
81
93
|
}
|
|
82
94
|
}
|
|
83
|
-
const
|
|
84
|
-
|
|
85
|
-
let winningUrl =
|
|
86
|
-
for (const
|
|
95
|
+
const variantUrls = new Set(tiers.map((tier) => resolveAndApplyPlatformHandlers(require_utils.normalizeUrl(variantSourceUrl, tier))).filter((variantUrl) => !!variantUrl));
|
|
96
|
+
variantUrls.add(variantSourceUrl);
|
|
97
|
+
let winningUrl = variantSourceUrl;
|
|
98
|
+
for (const variantUrl of variantUrls) {
|
|
87
99
|
if (existsFn) {
|
|
88
|
-
const data = await existsFn(
|
|
100
|
+
const data = await existsFn(variantUrl);
|
|
89
101
|
if (data !== void 0) {
|
|
90
102
|
onExists?.({
|
|
91
|
-
url:
|
|
103
|
+
url: variantUrl,
|
|
92
104
|
data
|
|
93
105
|
});
|
|
94
|
-
return
|
|
106
|
+
return variantUrl;
|
|
95
107
|
}
|
|
96
108
|
}
|
|
97
|
-
if (
|
|
98
|
-
if (
|
|
109
|
+
if (variantUrl === variantSourceUrl) continue;
|
|
110
|
+
if (variantUrl === initialResponseUrl) {
|
|
99
111
|
winningUrl = initialResponseUrl;
|
|
100
112
|
break;
|
|
101
113
|
}
|
|
102
|
-
const
|
|
103
|
-
if (
|
|
104
|
-
|
|
105
|
-
if (
|
|
114
|
+
const variantResponse = await fetchAndCompare(variantUrl);
|
|
115
|
+
if (variantResponse) {
|
|
116
|
+
let variantResponseUrl = resolveAndApplyPlatformHandlers(variantResponse.url);
|
|
117
|
+
if (variantResponseUrl) variantResponseUrl = stripParams(variantResponseUrl);
|
|
118
|
+
if (variantResponseUrl === variantSourceUrl || variantResponseUrl === initialResponseUrl) continue;
|
|
106
119
|
onMatch?.({
|
|
107
|
-
url:
|
|
108
|
-
response,
|
|
120
|
+
url: variantUrl,
|
|
121
|
+
response: variantResponse,
|
|
109
122
|
feed: initialResponseFeed
|
|
110
123
|
});
|
|
111
|
-
winningUrl =
|
|
124
|
+
winningUrl = variantUrl;
|
|
112
125
|
break;
|
|
113
126
|
}
|
|
114
127
|
}
|
|
@@ -125,7 +138,7 @@ const findCanonical = async (inputUrl, options) => {
|
|
|
125
138
|
}
|
|
126
139
|
}
|
|
127
140
|
return winningUrl;
|
|
128
|
-
}
|
|
141
|
+
}
|
|
129
142
|
|
|
130
143
|
//#endregion
|
|
131
144
|
exports.findCanonical = findCanonical;
|
package/dist/index.d.cts
CHANGED
|
@@ -1,6 +1,9 @@
|
|
|
1
|
-
import { FeedsmithFeed, FindCanonicalOptions } from "./types.cjs";
|
|
1
|
+
import { FeedsmithFeed, FetchFnResponse, FindCanonicalOptions, ParserAdapter } from "./types.cjs";
|
|
2
2
|
|
|
3
3
|
//#region src/index.d.ts
|
|
4
|
-
declare
|
|
4
|
+
declare function findCanonical<TResponse extends FetchFnResponse = FetchFnResponse, TExisting = unknown>(inputUrl: string, options?: Omit<FindCanonicalOptions<FeedsmithFeed, TResponse, TExisting>, 'parser'>): Promise<string | undefined>;
|
|
5
|
+
declare function findCanonical<TFeed, TResponse extends FetchFnResponse = FetchFnResponse, TExisting = unknown>(inputUrl: string, options: FindCanonicalOptions<TFeed, TResponse, TExisting> & {
|
|
6
|
+
parser: ParserAdapter<TFeed>;
|
|
7
|
+
}): Promise<string | undefined>;
|
|
5
8
|
//#endregion
|
|
6
9
|
export { findCanonical };
|
package/dist/index.d.ts
CHANGED
|
@@ -1,6 +1,9 @@
|
|
|
1
|
-
import { FeedsmithFeed, FindCanonicalOptions } from "./types.js";
|
|
1
|
+
import { FeedsmithFeed, FetchFnResponse, FindCanonicalOptions, ParserAdapter } from "./types.js";
|
|
2
2
|
|
|
3
3
|
//#region src/index.d.ts
|
|
4
|
-
declare
|
|
4
|
+
declare function findCanonical<TResponse extends FetchFnResponse = FetchFnResponse, TExisting = unknown>(inputUrl: string, options?: Omit<FindCanonicalOptions<FeedsmithFeed, TResponse, TExisting>, 'parser'>): Promise<string | undefined>;
|
|
5
|
+
declare function findCanonical<TFeed, TResponse extends FetchFnResponse = FetchFnResponse, TExisting = unknown>(inputUrl: string, options: FindCanonicalOptions<TFeed, TResponse, TExisting> & {
|
|
6
|
+
parser: ParserAdapter<TFeed>;
|
|
7
|
+
}): Promise<string | undefined>;
|
|
5
8
|
//#endregion
|
|
6
9
|
export { findCanonical };
|
package/dist/index.js
CHANGED
|
@@ -1,9 +1,16 @@
|
|
|
1
|
-
import { defaultPlatforms, defaultTiers } from "./defaults.js";
|
|
1
|
+
import { defaultPlatforms, defaultStrippedParams, defaultTiers } from "./defaults.js";
|
|
2
2
|
import { applyPlatformHandlers, feedsmithParser, nativeFetch, normalizeUrl, resolveUrl } from "./utils.js";
|
|
3
3
|
|
|
4
4
|
//#region src/index.ts
|
|
5
|
-
|
|
6
|
-
const { fetchFn = nativeFetch, existsFn,
|
|
5
|
+
async function findCanonical(inputUrl, options) {
|
|
6
|
+
const { parser = feedsmithParser, fetchFn = nativeFetch, existsFn, tiers = defaultTiers, platforms = defaultPlatforms, stripQueryParams = defaultStrippedParams, onFetch, onMatch, onExists } = options ?? {};
|
|
7
|
+
const stripParams = (url) => {
|
|
8
|
+
return stripQueryParams?.length ? normalizeUrl(url, {
|
|
9
|
+
stripQueryParams,
|
|
10
|
+
sortQueryParams: true,
|
|
11
|
+
stripEmptyQuery: true
|
|
12
|
+
}) : url;
|
|
13
|
+
};
|
|
7
14
|
const resolveAndApplyPlatformHandlers = (url, baseUrl) => {
|
|
8
15
|
const resolved = resolveUrl(url, baseUrl);
|
|
9
16
|
return resolved ? applyPlatformHandlers(resolved, platforms) : void 0;
|
|
@@ -21,8 +28,9 @@ const findCanonical = async (inputUrl, options) => {
|
|
|
21
28
|
response: initialResponse
|
|
22
29
|
});
|
|
23
30
|
if (initialResponse.status < 200 || initialResponse.status >= 300) return;
|
|
24
|
-
const
|
|
25
|
-
if (!
|
|
31
|
+
const initialResponseUrlRaw = resolveAndApplyPlatformHandlers(initialResponse.url);
|
|
32
|
+
if (!initialResponseUrlRaw) return;
|
|
33
|
+
const initialResponseUrl = stripParams(initialResponseUrlRaw);
|
|
26
34
|
const initialResponseBody = initialResponse.body;
|
|
27
35
|
if (!initialResponseBody) return;
|
|
28
36
|
let initialResponseSignature;
|
|
@@ -35,14 +43,17 @@ const findCanonical = async (inputUrl, options) => {
|
|
|
35
43
|
feed: initialResponseFeed
|
|
36
44
|
});
|
|
37
45
|
const selfRequestUrlRaw = parser.getSelfUrl(initialResponseFeed);
|
|
38
|
-
if (selfRequestUrlRaw)
|
|
46
|
+
if (selfRequestUrlRaw) {
|
|
47
|
+
selfRequestUrl = resolveAndApplyPlatformHandlers(selfRequestUrlRaw, initialResponseUrl);
|
|
48
|
+
selfRequestUrl = selfRequestUrl ? stripParams(selfRequestUrl) : void 0;
|
|
49
|
+
}
|
|
39
50
|
const compareWithInitialResponse = (comparedResponseBody) => {
|
|
40
51
|
if (!comparedResponseBody) return false;
|
|
41
52
|
if (initialResponseBody === comparedResponseBody) return true;
|
|
42
53
|
const comparedResponseFeed = parser.parse(comparedResponseBody);
|
|
43
54
|
if (comparedResponseFeed) {
|
|
44
|
-
initialResponseSignature ||=
|
|
45
|
-
const comparedResponseSignature =
|
|
55
|
+
initialResponseSignature ||= parser.getSignature(initialResponseFeed, parser.getSelfUrl(initialResponseFeed));
|
|
56
|
+
const comparedResponseSignature = parser.getSignature(comparedResponseFeed, parser.getSelfUrl(comparedResponseFeed));
|
|
46
57
|
return initialResponseSignature === comparedResponseSignature;
|
|
47
58
|
}
|
|
48
59
|
return false;
|
|
@@ -62,7 +73,7 @@ const findCanonical = async (inputUrl, options) => {
|
|
|
62
73
|
if (!compareWithInitialResponse(response.body)) return;
|
|
63
74
|
return response;
|
|
64
75
|
};
|
|
65
|
-
let
|
|
76
|
+
let variantSourceUrl = initialResponseUrl;
|
|
66
77
|
if (selfRequestUrl && selfRequestUrl !== initialResponseUrl) {
|
|
67
78
|
const urlsToTry = [selfRequestUrl];
|
|
68
79
|
if (selfRequestUrl.startsWith("https://")) urlsToTry.push(selfRequestUrl.replace("https://", "http://"));
|
|
@@ -75,40 +86,42 @@ const findCanonical = async (inputUrl, options) => {
|
|
|
75
86
|
response,
|
|
76
87
|
feed: initialResponseFeed
|
|
77
88
|
});
|
|
78
|
-
|
|
89
|
+
variantSourceUrl = resolveAndApplyPlatformHandlers(response.url) ?? initialResponseUrl;
|
|
90
|
+
variantSourceUrl = stripParams(variantSourceUrl);
|
|
79
91
|
break;
|
|
80
92
|
}
|
|
81
93
|
}
|
|
82
94
|
}
|
|
83
|
-
const
|
|
84
|
-
|
|
85
|
-
let winningUrl =
|
|
86
|
-
for (const
|
|
95
|
+
const variantUrls = new Set(tiers.map((tier) => resolveAndApplyPlatformHandlers(normalizeUrl(variantSourceUrl, tier))).filter((variantUrl) => !!variantUrl));
|
|
96
|
+
variantUrls.add(variantSourceUrl);
|
|
97
|
+
let winningUrl = variantSourceUrl;
|
|
98
|
+
for (const variantUrl of variantUrls) {
|
|
87
99
|
if (existsFn) {
|
|
88
|
-
const data = await existsFn(
|
|
100
|
+
const data = await existsFn(variantUrl);
|
|
89
101
|
if (data !== void 0) {
|
|
90
102
|
onExists?.({
|
|
91
|
-
url:
|
|
103
|
+
url: variantUrl,
|
|
92
104
|
data
|
|
93
105
|
});
|
|
94
|
-
return
|
|
106
|
+
return variantUrl;
|
|
95
107
|
}
|
|
96
108
|
}
|
|
97
|
-
if (
|
|
98
|
-
if (
|
|
109
|
+
if (variantUrl === variantSourceUrl) continue;
|
|
110
|
+
if (variantUrl === initialResponseUrl) {
|
|
99
111
|
winningUrl = initialResponseUrl;
|
|
100
112
|
break;
|
|
101
113
|
}
|
|
102
|
-
const
|
|
103
|
-
if (
|
|
104
|
-
|
|
105
|
-
if (
|
|
114
|
+
const variantResponse = await fetchAndCompare(variantUrl);
|
|
115
|
+
if (variantResponse) {
|
|
116
|
+
let variantResponseUrl = resolveAndApplyPlatformHandlers(variantResponse.url);
|
|
117
|
+
if (variantResponseUrl) variantResponseUrl = stripParams(variantResponseUrl);
|
|
118
|
+
if (variantResponseUrl === variantSourceUrl || variantResponseUrl === initialResponseUrl) continue;
|
|
106
119
|
onMatch?.({
|
|
107
|
-
url:
|
|
108
|
-
response,
|
|
120
|
+
url: variantUrl,
|
|
121
|
+
response: variantResponse,
|
|
109
122
|
feed: initialResponseFeed
|
|
110
123
|
});
|
|
111
|
-
winningUrl =
|
|
124
|
+
winningUrl = variantUrl;
|
|
112
125
|
break;
|
|
113
126
|
}
|
|
114
127
|
}
|
|
@@ -125,7 +138,7 @@ const findCanonical = async (inputUrl, options) => {
|
|
|
125
138
|
}
|
|
126
139
|
}
|
|
127
140
|
return winningUrl;
|
|
128
|
-
}
|
|
141
|
+
}
|
|
129
142
|
|
|
130
143
|
//#endregion
|
|
131
144
|
export { findCanonical };
|
package/dist/types.d.cts
CHANGED
|
@@ -5,7 +5,7 @@ type FeedsmithFeed = ReturnType<typeof feedsmith0.parseFeed>;
|
|
|
5
5
|
type ParserAdapter<T> = {
|
|
6
6
|
parse: (body: string) => T | undefined;
|
|
7
7
|
getSelfUrl: (parsed: T) => string | undefined;
|
|
8
|
-
getSignature: (parsed: T) =>
|
|
8
|
+
getSignature: (parsed: T, selfUrl?: string) => string;
|
|
9
9
|
};
|
|
10
10
|
type PlatformHandler = {
|
|
11
11
|
match: (url: URL) => boolean;
|
|
@@ -19,7 +19,6 @@ type NormalizeOptions = {
|
|
|
19
19
|
stripRootSlash?: boolean;
|
|
20
20
|
collapseSlashes?: boolean;
|
|
21
21
|
stripHash?: boolean;
|
|
22
|
-
stripTextFragment?: boolean;
|
|
23
22
|
sortQueryParams?: boolean;
|
|
24
23
|
stripQueryParams?: Array<string>;
|
|
25
24
|
stripEmptyQuery?: boolean;
|
|
@@ -28,40 +27,42 @@ type NormalizeOptions = {
|
|
|
28
27
|
normalizeUnicode?: boolean;
|
|
29
28
|
convertToPunycode?: boolean;
|
|
30
29
|
};
|
|
31
|
-
type
|
|
30
|
+
type Tier = Omit<NormalizeOptions, 'stripQueryParams'>;
|
|
31
|
+
type OnFetchFn<TResponse extends FetchFnResponse = FetchFnResponse> = (data: {
|
|
32
32
|
url: string;
|
|
33
|
-
response:
|
|
33
|
+
response: TResponse;
|
|
34
34
|
}) => void;
|
|
35
|
-
type OnMatchFn<TFeed = unknown> = (data: {
|
|
35
|
+
type OnMatchFn<TFeed = unknown, TResponse extends FetchFnResponse = FetchFnResponse> = (data: {
|
|
36
36
|
url: string;
|
|
37
|
-
response:
|
|
37
|
+
response: TResponse;
|
|
38
38
|
feed: TFeed;
|
|
39
39
|
}) => void;
|
|
40
40
|
type OnExistsFn<T> = (data: {
|
|
41
41
|
url: string;
|
|
42
42
|
data: T;
|
|
43
43
|
}) => void;
|
|
44
|
-
type
|
|
45
|
-
type FindCanonicalOptions<TFeed = FeedsmithFeed, TExisting = unknown> = {
|
|
44
|
+
type FindCanonicalOptions<TFeed = FeedsmithFeed, TResponse extends FetchFnResponse = FetchFnResponse, TExisting = unknown> = {
|
|
46
45
|
parser?: ParserAdapter<TFeed>;
|
|
47
|
-
fetchFn?: FetchFn
|
|
46
|
+
fetchFn?: FetchFn<TResponse>;
|
|
48
47
|
existsFn?: ExistsFn<TExisting>;
|
|
49
|
-
tiers?: Array<
|
|
48
|
+
tiers?: Array<Tier>;
|
|
50
49
|
platforms?: Array<PlatformHandler>;
|
|
51
|
-
|
|
52
|
-
|
|
50
|
+
stripQueryParams?: Array<string>;
|
|
51
|
+
onFetch?: OnFetchFn<TResponse>;
|
|
52
|
+
onMatch?: OnMatchFn<TFeed, TResponse>;
|
|
53
53
|
onExists?: OnExistsFn<TExisting>;
|
|
54
54
|
};
|
|
55
55
|
type FetchFnOptions = {
|
|
56
56
|
method?: 'GET' | 'HEAD';
|
|
57
57
|
headers?: Record<string, string>;
|
|
58
58
|
};
|
|
59
|
+
type ExistsFn<T = unknown> = (url: string) => Promise<T | undefined>;
|
|
59
60
|
type FetchFnResponse = {
|
|
60
61
|
headers: Headers;
|
|
61
62
|
body: string;
|
|
62
63
|
url: string;
|
|
63
64
|
status: number;
|
|
64
65
|
};
|
|
65
|
-
type FetchFn = (url: string, options?: FetchFnOptions) => Promise<
|
|
66
|
+
type FetchFn<TResponse extends FetchFnResponse = FetchFnResponse> = (url: string, options?: FetchFnOptions) => Promise<TResponse>;
|
|
66
67
|
//#endregion
|
|
67
|
-
export { ExistsFn, FeedsmithFeed, FetchFn, FetchFnOptions, FetchFnResponse, FindCanonicalOptions, NormalizeOptions, OnExistsFn, OnFetchFn, OnMatchFn, ParserAdapter, PlatformHandler };
|
|
68
|
+
export { ExistsFn, FeedsmithFeed, FetchFn, FetchFnOptions, FetchFnResponse, FindCanonicalOptions, NormalizeOptions, OnExistsFn, OnFetchFn, OnMatchFn, ParserAdapter, PlatformHandler, Tier };
|
package/dist/types.d.ts
CHANGED
|
@@ -5,7 +5,7 @@ type FeedsmithFeed = ReturnType<typeof feedsmith0.parseFeed>;
|
|
|
5
5
|
type ParserAdapter<T> = {
|
|
6
6
|
parse: (body: string) => T | undefined;
|
|
7
7
|
getSelfUrl: (parsed: T) => string | undefined;
|
|
8
|
-
getSignature: (parsed: T) =>
|
|
8
|
+
getSignature: (parsed: T, selfUrl?: string) => string;
|
|
9
9
|
};
|
|
10
10
|
type PlatformHandler = {
|
|
11
11
|
match: (url: URL) => boolean;
|
|
@@ -19,7 +19,6 @@ type NormalizeOptions = {
|
|
|
19
19
|
stripRootSlash?: boolean;
|
|
20
20
|
collapseSlashes?: boolean;
|
|
21
21
|
stripHash?: boolean;
|
|
22
|
-
stripTextFragment?: boolean;
|
|
23
22
|
sortQueryParams?: boolean;
|
|
24
23
|
stripQueryParams?: Array<string>;
|
|
25
24
|
stripEmptyQuery?: boolean;
|
|
@@ -28,40 +27,42 @@ type NormalizeOptions = {
|
|
|
28
27
|
normalizeUnicode?: boolean;
|
|
29
28
|
convertToPunycode?: boolean;
|
|
30
29
|
};
|
|
31
|
-
type
|
|
30
|
+
type Tier = Omit<NormalizeOptions, 'stripQueryParams'>;
|
|
31
|
+
type OnFetchFn<TResponse extends FetchFnResponse = FetchFnResponse> = (data: {
|
|
32
32
|
url: string;
|
|
33
|
-
response:
|
|
33
|
+
response: TResponse;
|
|
34
34
|
}) => void;
|
|
35
|
-
type OnMatchFn<TFeed = unknown> = (data: {
|
|
35
|
+
type OnMatchFn<TFeed = unknown, TResponse extends FetchFnResponse = FetchFnResponse> = (data: {
|
|
36
36
|
url: string;
|
|
37
|
-
response:
|
|
37
|
+
response: TResponse;
|
|
38
38
|
feed: TFeed;
|
|
39
39
|
}) => void;
|
|
40
40
|
type OnExistsFn<T> = (data: {
|
|
41
41
|
url: string;
|
|
42
42
|
data: T;
|
|
43
43
|
}) => void;
|
|
44
|
-
type
|
|
45
|
-
type FindCanonicalOptions<TFeed = FeedsmithFeed, TExisting = unknown> = {
|
|
44
|
+
type FindCanonicalOptions<TFeed = FeedsmithFeed, TResponse extends FetchFnResponse = FetchFnResponse, TExisting = unknown> = {
|
|
46
45
|
parser?: ParserAdapter<TFeed>;
|
|
47
|
-
fetchFn?: FetchFn
|
|
46
|
+
fetchFn?: FetchFn<TResponse>;
|
|
48
47
|
existsFn?: ExistsFn<TExisting>;
|
|
49
|
-
tiers?: Array<
|
|
48
|
+
tiers?: Array<Tier>;
|
|
50
49
|
platforms?: Array<PlatformHandler>;
|
|
51
|
-
|
|
52
|
-
|
|
50
|
+
stripQueryParams?: Array<string>;
|
|
51
|
+
onFetch?: OnFetchFn<TResponse>;
|
|
52
|
+
onMatch?: OnMatchFn<TFeed, TResponse>;
|
|
53
53
|
onExists?: OnExistsFn<TExisting>;
|
|
54
54
|
};
|
|
55
55
|
type FetchFnOptions = {
|
|
56
56
|
method?: 'GET' | 'HEAD';
|
|
57
57
|
headers?: Record<string, string>;
|
|
58
58
|
};
|
|
59
|
+
type ExistsFn<T = unknown> = (url: string) => Promise<T | undefined>;
|
|
59
60
|
type FetchFnResponse = {
|
|
60
61
|
headers: Headers;
|
|
61
62
|
body: string;
|
|
62
63
|
url: string;
|
|
63
64
|
status: number;
|
|
64
65
|
};
|
|
65
|
-
type FetchFn = (url: string, options?: FetchFnOptions) => Promise<
|
|
66
|
+
type FetchFn<TResponse extends FetchFnResponse = FetchFnResponse> = (url: string, options?: FetchFnOptions) => Promise<TResponse>;
|
|
66
67
|
//#endregion
|
|
67
|
-
export { ExistsFn, FeedsmithFeed, FetchFn, FetchFnOptions, FetchFnResponse, FindCanonicalOptions, NormalizeOptions, OnExistsFn, OnFetchFn, OnMatchFn, ParserAdapter, PlatformHandler };
|
|
68
|
+
export { ExistsFn, FeedsmithFeed, FetchFn, FetchFnOptions, FetchFnResponse, FindCanonicalOptions, NormalizeOptions, OnExistsFn, OnFetchFn, OnMatchFn, ParserAdapter, PlatformHandler, Tier };
|
package/dist/utils.cjs
CHANGED
|
@@ -91,7 +91,6 @@ const normalizeUrl = (url, options = require_defaults.defaultNormalizeOptions) =
|
|
|
91
91
|
}
|
|
92
92
|
if (options.stripWww && parsed.hostname.startsWith("www.")) parsed.hostname = parsed.hostname.slice(4);
|
|
93
93
|
if (options.stripHash) parsed.hash = "";
|
|
94
|
-
if (options.stripTextFragment && parsed.hash.startsWith("#:~:")) parsed.hash = "";
|
|
95
94
|
let pathname = parsed.pathname;
|
|
96
95
|
if (options.normalizeEncoding) pathname = decodeAndNormalizeEncoding(pathname);
|
|
97
96
|
if (options.collapseSlashes) pathname = pathname.replace(/\/+/g, "/");
|
|
@@ -146,8 +145,9 @@ const feedsmithParser = {
|
|
|
146
145
|
case "json": return parsed.feed.feed_url;
|
|
147
146
|
}
|
|
148
147
|
},
|
|
149
|
-
getSignature: (parsed) => {
|
|
150
|
-
|
|
148
|
+
getSignature: (parsed, selfUrl) => {
|
|
149
|
+
const signature = JSON.stringify(parsed.feed);
|
|
150
|
+
return selfUrl ? signature.replaceAll(`"${selfUrl}"`, "\"__SELF_URL__\"") : signature;
|
|
151
151
|
}
|
|
152
152
|
};
|
|
153
153
|
|
package/dist/utils.js
CHANGED
|
@@ -91,7 +91,6 @@ const normalizeUrl = (url, options = defaultNormalizeOptions) => {
|
|
|
91
91
|
}
|
|
92
92
|
if (options.stripWww && parsed.hostname.startsWith("www.")) parsed.hostname = parsed.hostname.slice(4);
|
|
93
93
|
if (options.stripHash) parsed.hash = "";
|
|
94
|
-
if (options.stripTextFragment && parsed.hash.startsWith("#:~:")) parsed.hash = "";
|
|
95
94
|
let pathname = parsed.pathname;
|
|
96
95
|
if (options.normalizeEncoding) pathname = decodeAndNormalizeEncoding(pathname);
|
|
97
96
|
if (options.collapseSlashes) pathname = pathname.replace(/\/+/g, "/");
|
|
@@ -146,8 +145,9 @@ const feedsmithParser = {
|
|
|
146
145
|
case "json": return parsed.feed.feed_url;
|
|
147
146
|
}
|
|
148
147
|
},
|
|
149
|
-
getSignature: (parsed) => {
|
|
150
|
-
|
|
148
|
+
getSignature: (parsed, selfUrl) => {
|
|
149
|
+
const signature = JSON.stringify(parsed.feed);
|
|
150
|
+
return selfUrl ? signature.replaceAll(`"${selfUrl}"`, "\"__SELF_URL__\"") : signature;
|
|
151
151
|
}
|
|
152
152
|
};
|
|
153
153
|
|
package/package.json
CHANGED