feedcanon 1.0.0-beta.3 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +62 -49
- package/dist/defaults.cjs +47 -9
- package/dist/defaults.d.cts +4 -2
- package/dist/defaults.d.ts +4 -2
- package/dist/defaults.js +46 -10
- package/dist/exports.cjs +2 -1
- package/dist/exports.d.cts +3 -3
- package/dist/exports.d.ts +3 -3
- package/dist/exports.js +3 -3
- package/dist/index.cjs +7 -7
- package/dist/index.js +9 -9
- package/dist/platforms/feedburner.cjs +3 -3
- package/dist/platforms/feedburner.js +3 -3
- package/dist/types.d.cts +3 -3
- package/dist/types.d.ts +3 -3
- package/dist/utils.cjs +17 -52
- package/dist/utils.d.cts +2 -3
- package/dist/utils.d.ts +2 -3
- package/dist/utils.js +18 -51
- package/package.json +7 -4
package/README.md
CHANGED
|
@@ -4,22 +4,59 @@
|
|
|
4
4
|
[](https://www.npmjs.com/package/feedcanon)
|
|
5
5
|
[](https://github.com/macieklamberski/feedcanon/blob/main/LICENSE)
|
|
6
6
|
|
|
7
|
-
Find the canonical URL for any web feed by comparing actual content. Turn messy feed URLs into their cleanest
|
|
7
|
+
Find the canonical URL for any web feed by comparing actual content. Turn messy feed URLs into their cleanest form.
|
|
8
8
|
|
|
9
|
-
Many URLs can point to the same feed
|
|
9
|
+
Many URLs can point to the same feed, varying by protocol, www prefixes, trailing slashes, order of params, or domain aliases. Feedcanon compares actual feed content, respects the feed's declared self URL, and tests simpler URL alternatives to find the cleanest working one.
|
|
10
|
+
|
|
11
|
+
Perfect for feed readers to deduplicate subscriptions when users add the same feed via different URLs.
|
|
12
|
+
|
|
13
|
+
**[Read full docs ↗](https://feedcanon.dev)**
|
|
14
|
+
·
|
|
15
|
+
[Quick Start](#quick-start)
|
|
10
16
|
|
|
11
17
|
---
|
|
12
18
|
|
|
13
|
-
##
|
|
19
|
+
## Example
|
|
20
|
+
|
|
21
|
+
The 9 URLs below all work and return identical content. None redirect to each other, normally making each appear unique. Feedcanon compares content, normalizes URLs and resolves them to a single URL.
|
|
22
|
+
|
|
23
|
+
```dockerfile
|
|
24
|
+
'http://feeds.kottke.org/main' ──────────┐
|
|
25
|
+
'http://feeds.kottke.org/main/' ─────────┤
|
|
26
|
+
'https://feeds.kottke.org/main' ─────────┤
|
|
27
|
+
'https://feeds.kottke.org/main/' ────────┤
|
|
28
|
+
'https://feeds.kottke.org///main/' ──────┼──→ 'https://feeds.kottke.org/main'
|
|
29
|
+
'http://feeds.feedburner.com/kottke' ────┤
|
|
30
|
+
'http://feeds.feedburner.com/kottke/' ───┤
|
|
31
|
+
'https://feeds.feedburner.com/kottke' ───┤
|
|
32
|
+
'https://feeds.feedburner.com/kottke/' ──┘
|
|
33
|
+
```
|
|
34
|
+
|
|
35
|
+
## Overview
|
|
36
|
+
|
|
37
|
+
### How It Works
|
|
14
38
|
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
39
|
+
1. Fetch the input URL and parse the feed to establish reference content.
|
|
40
|
+
2. Extract the feed's declared self URL and validate it serves identical content.
|
|
41
|
+
3. Generate URL variants ordered from cleanest to least clean.
|
|
42
|
+
4. Test variants in order—the first one serving identical content wins.
|
|
43
|
+
5. Upgrade HTTP to HTTPS if both serve identical content.
|
|
44
|
+
|
|
45
|
+
### Customization
|
|
46
|
+
|
|
47
|
+
Feedcanon is designed to be flexible. Every major component can be replaced or extended.
|
|
48
|
+
|
|
49
|
+
- **Progress callbacks** — monitor the process with `onFetch`, `onMatch`, and `onExists` callbacks.
|
|
50
|
+
- **Database lookup** — use `existsFn` to check if a URL already exists in your database.
|
|
51
|
+
- **Custom fetch** — use your own HTTP client (Axios, Got, Ky, etc.)
|
|
52
|
+
- **Custom parser** — bring your own parser (Feedsmith by default).
|
|
53
|
+
- **Custom tiers** — define your own URL normalization variants.
|
|
54
|
+
- **Custom platforms** — add handlers to normalize domain aliases (like FeedBurner).
|
|
20
55
|
|
|
21
56
|
## Quick Start
|
|
22
57
|
|
|
58
|
+
Basic installation and common usage patterns. For a full overview, visit the [documentation website](https://feedcanon.dev).
|
|
59
|
+
|
|
23
60
|
### Installation
|
|
24
61
|
|
|
25
62
|
```bash
|
|
@@ -28,61 +65,37 @@ npm install feedcanon
|
|
|
28
65
|
|
|
29
66
|
### Basic Usage
|
|
30
67
|
|
|
68
|
+
When you just need to clean up a feed URL and get its canonical form.
|
|
69
|
+
|
|
31
70
|
```typescript
|
|
32
71
|
import { findCanonical } from 'feedcanon'
|
|
33
72
|
|
|
34
|
-
const url = await findCanonical('
|
|
73
|
+
const url = await findCanonical('http://www.example.com/feed/?utm_source=twitter')
|
|
35
74
|
|
|
36
75
|
// 'https://example.com/feed'
|
|
37
76
|
```
|
|
38
77
|
|
|
39
|
-
|
|
78
|
+
Returns `undefined` if the feed is invalid or unreachable.
|
|
40
79
|
|
|
41
|
-
|
|
42
|
-
import { findCanonical } from 'feedcanon'
|
|
80
|
+
### Using Callbacks
|
|
43
81
|
|
|
44
|
-
|
|
45
|
-
onFetch: ({ url, response }) => {
|
|
46
|
-
console.log('Fetched:', url, response.status)
|
|
47
|
-
},
|
|
48
|
-
onMatch: ({ url, feed }) => {
|
|
49
|
-
console.log('Found matching URL:', url)
|
|
50
|
-
},
|
|
51
|
-
})
|
|
52
|
-
```
|
|
53
|
-
|
|
54
|
-
### Custom Fetch
|
|
82
|
+
When you want to log the canonicalization process for debugging. Or store all URL aliases that resolve to the same feed.
|
|
55
83
|
|
|
56
84
|
```typescript
|
|
57
85
|
import { findCanonical } from 'feedcanon'
|
|
58
|
-
import axios from 'axios'
|
|
59
|
-
|
|
60
|
-
const url = await findCanonical('https://example.com/feed', {
|
|
61
|
-
fetchFn: async (url) => {
|
|
62
|
-
const response = await axios.get(url)
|
|
63
|
-
|
|
64
|
-
return {
|
|
65
|
-
status: response.status,
|
|
66
|
-
url: response.request.res.responseUrl,
|
|
67
|
-
body: response.data,
|
|
68
|
-
headers: new Headers(response.headers),
|
|
69
|
-
}
|
|
70
|
-
},
|
|
71
|
-
})
|
|
72
|
-
```
|
|
73
|
-
|
|
74
|
-
### Database Integration
|
|
75
86
|
|
|
76
|
-
|
|
77
|
-
import { findCanonical } from 'feedcanon'
|
|
87
|
+
const aliases = []
|
|
78
88
|
|
|
79
|
-
const url = await findCanonical('
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
return await db.feeds.findByUrl(url)
|
|
83
|
-
},
|
|
84
|
-
onExists: ({ url, data }) => {
|
|
85
|
-
console.log('URL already exists:', url)
|
|
89
|
+
const url = await findCanonical('http://www.example.com/feed/', {
|
|
90
|
+
onMatch: ({ url }) => {
|
|
91
|
+
aliases.push(url)
|
|
86
92
|
},
|
|
87
93
|
})
|
|
94
|
+
|
|
95
|
+
// url: 'https://example.com/feed'
|
|
96
|
+
// aliases: [
|
|
97
|
+
// 'http://www.example.com/feed/',
|
|
98
|
+
// 'https://www.example.com/feed/',
|
|
99
|
+
// 'https://example.com/feed',
|
|
100
|
+
// ]
|
|
88
101
|
```
|
package/dist/defaults.cjs
CHANGED
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
const require_feedburner = require('./platforms/feedburner.cjs');
|
|
2
|
+
let feedsmith = require("feedsmith");
|
|
2
3
|
|
|
3
4
|
//#region src/defaults.ts
|
|
4
5
|
const defaultPlatforms = [require_feedburner.feedburnerHandler];
|
|
@@ -31,6 +32,7 @@ const defaultStrippedParams = [
|
|
|
31
32
|
"_ke",
|
|
32
33
|
"mc_cid",
|
|
33
34
|
"mc_eid",
|
|
35
|
+
"mc_tc",
|
|
34
36
|
"mkt_tok",
|
|
35
37
|
"msclkid",
|
|
36
38
|
"twclid",
|
|
@@ -63,15 +65,20 @@ const defaultStrippedParams = [
|
|
|
63
65
|
"hsa_tgt",
|
|
64
66
|
"hsa_ver",
|
|
65
67
|
"hsCtaTracking",
|
|
68
|
+
"_hsenc",
|
|
69
|
+
"_hsmi",
|
|
66
70
|
"__hstc",
|
|
67
71
|
"__hsfp",
|
|
68
72
|
"__hssc",
|
|
69
73
|
"cid",
|
|
70
74
|
"s_kwcid",
|
|
75
|
+
"sc_cid",
|
|
71
76
|
"ef_id",
|
|
72
77
|
"obOrigUrl",
|
|
73
78
|
"dicbo",
|
|
74
79
|
"yclid",
|
|
80
|
+
"ysclid",
|
|
81
|
+
"_openstat",
|
|
75
82
|
"awinaffid",
|
|
76
83
|
"awinmid",
|
|
77
84
|
"clickref",
|
|
@@ -104,7 +111,6 @@ const defaultStrippedParams = [
|
|
|
104
111
|
"ts",
|
|
105
112
|
"cb",
|
|
106
113
|
"cachebuster",
|
|
107
|
-
"cHash",
|
|
108
114
|
"nocache",
|
|
109
115
|
"rand",
|
|
110
116
|
"random",
|
|
@@ -114,10 +120,6 @@ const defaultStrippedParams = [
|
|
|
114
120
|
"cleancache",
|
|
115
121
|
"rebuildcache",
|
|
116
122
|
"kontrol_health_check_timestamp",
|
|
117
|
-
"userab",
|
|
118
|
-
"_x_tr_sl",
|
|
119
|
-
"_x_tr_tl",
|
|
120
|
-
"_x_tr_hl",
|
|
121
123
|
"action_object_map",
|
|
122
124
|
"action_ref_map",
|
|
123
125
|
"action_type_map",
|
|
@@ -130,6 +132,10 @@ const defaultStrippedParams = [
|
|
|
130
132
|
"at_custom4",
|
|
131
133
|
"at_medium",
|
|
132
134
|
"at_preview_index",
|
|
135
|
+
"_bhlid",
|
|
136
|
+
"_branch_match_id",
|
|
137
|
+
"_branch_referrer",
|
|
138
|
+
"__readwiseLocation",
|
|
133
139
|
"campaign_id",
|
|
134
140
|
"click_sum",
|
|
135
141
|
"fref",
|
|
@@ -161,12 +167,42 @@ const defaultNormalizeOptions = {
|
|
|
161
167
|
stripHash: true,
|
|
162
168
|
sortQueryParams: true,
|
|
163
169
|
stripQueryParams: defaultStrippedParams,
|
|
170
|
+
stripQuery: false,
|
|
164
171
|
stripEmptyQuery: true,
|
|
165
172
|
normalizeEncoding: true,
|
|
166
|
-
lowercaseHostname: true,
|
|
167
173
|
normalizeUnicode: true,
|
|
168
174
|
convertToPunycode: true
|
|
169
175
|
};
|
|
176
|
+
const defaultFetch = async (url, options) => {
|
|
177
|
+
const response = await fetch(url, {
|
|
178
|
+
method: options?.method ?? "GET",
|
|
179
|
+
headers: options?.headers
|
|
180
|
+
});
|
|
181
|
+
return {
|
|
182
|
+
headers: response.headers,
|
|
183
|
+
body: await response.text(),
|
|
184
|
+
url: response.url,
|
|
185
|
+
status: response.status
|
|
186
|
+
};
|
|
187
|
+
};
|
|
188
|
+
const defaultParser = {
|
|
189
|
+
parse: (body) => {
|
|
190
|
+
try {
|
|
191
|
+
return (0, feedsmith.parseFeed)(body);
|
|
192
|
+
} catch {}
|
|
193
|
+
},
|
|
194
|
+
getSelfUrl: (parsed) => {
|
|
195
|
+
switch (parsed.format) {
|
|
196
|
+
case "atom": return parsed.feed.links?.find((link) => link.rel === "self")?.href;
|
|
197
|
+
case "rss":
|
|
198
|
+
case "rdf": return parsed.feed.atom?.links?.find((link) => link.rel === "self")?.href;
|
|
199
|
+
case "json": return parsed.feed.feed_url;
|
|
200
|
+
}
|
|
201
|
+
},
|
|
202
|
+
getSignature: (parsed) => {
|
|
203
|
+
return parsed.feed;
|
|
204
|
+
}
|
|
205
|
+
};
|
|
170
206
|
const defaultTiers = [
|
|
171
207
|
{
|
|
172
208
|
stripProtocol: false,
|
|
@@ -177,9 +213,9 @@ const defaultTiers = [
|
|
|
177
213
|
collapseSlashes: true,
|
|
178
214
|
stripHash: true,
|
|
179
215
|
sortQueryParams: true,
|
|
216
|
+
stripQuery: false,
|
|
180
217
|
stripEmptyQuery: true,
|
|
181
218
|
normalizeEncoding: true,
|
|
182
|
-
lowercaseHostname: true,
|
|
183
219
|
normalizeUnicode: true,
|
|
184
220
|
convertToPunycode: true
|
|
185
221
|
},
|
|
@@ -192,9 +228,9 @@ const defaultTiers = [
|
|
|
192
228
|
collapseSlashes: true,
|
|
193
229
|
stripHash: true,
|
|
194
230
|
sortQueryParams: true,
|
|
231
|
+
stripQuery: false,
|
|
195
232
|
stripEmptyQuery: true,
|
|
196
233
|
normalizeEncoding: true,
|
|
197
|
-
lowercaseHostname: true,
|
|
198
234
|
normalizeUnicode: true,
|
|
199
235
|
convertToPunycode: true
|
|
200
236
|
},
|
|
@@ -207,16 +243,18 @@ const defaultTiers = [
|
|
|
207
243
|
collapseSlashes: true,
|
|
208
244
|
stripHash: true,
|
|
209
245
|
sortQueryParams: true,
|
|
246
|
+
stripQuery: false,
|
|
210
247
|
stripEmptyQuery: true,
|
|
211
248
|
normalizeEncoding: true,
|
|
212
|
-
lowercaseHostname: true,
|
|
213
249
|
normalizeUnicode: true,
|
|
214
250
|
convertToPunycode: true
|
|
215
251
|
}
|
|
216
252
|
];
|
|
217
253
|
|
|
218
254
|
//#endregion
|
|
255
|
+
exports.defaultFetch = defaultFetch;
|
|
219
256
|
exports.defaultNormalizeOptions = defaultNormalizeOptions;
|
|
257
|
+
exports.defaultParser = defaultParser;
|
|
220
258
|
exports.defaultPlatforms = defaultPlatforms;
|
|
221
259
|
exports.defaultStrippedParams = defaultStrippedParams;
|
|
222
260
|
exports.defaultTiers = defaultTiers;
|
package/dist/defaults.d.cts
CHANGED
|
@@ -1,9 +1,11 @@
|
|
|
1
|
-
import { NormalizeOptions, PlatformHandler, Tier } from "./types.cjs";
|
|
1
|
+
import { FeedsmithFeed, FetchFn, NormalizeOptions, ParserAdapter, PlatformHandler, Tier } from "./types.cjs";
|
|
2
2
|
|
|
3
3
|
//#region src/defaults.d.ts
|
|
4
4
|
declare const defaultPlatforms: Array<PlatformHandler>;
|
|
5
5
|
declare const defaultStrippedParams: string[];
|
|
6
6
|
declare const defaultNormalizeOptions: NormalizeOptions;
|
|
7
|
+
declare const defaultFetch: FetchFn;
|
|
8
|
+
declare const defaultParser: ParserAdapter<FeedsmithFeed>;
|
|
7
9
|
declare const defaultTiers: Array<Tier>;
|
|
8
10
|
//#endregion
|
|
9
|
-
export { defaultNormalizeOptions, defaultPlatforms, defaultStrippedParams, defaultTiers };
|
|
11
|
+
export { defaultFetch, defaultNormalizeOptions, defaultParser, defaultPlatforms, defaultStrippedParams, defaultTiers };
|
package/dist/defaults.d.ts
CHANGED
|
@@ -1,9 +1,11 @@
|
|
|
1
|
-
import { NormalizeOptions, PlatformHandler, Tier } from "./types.js";
|
|
1
|
+
import { FeedsmithFeed, FetchFn, NormalizeOptions, ParserAdapter, PlatformHandler, Tier } from "./types.js";
|
|
2
2
|
|
|
3
3
|
//#region src/defaults.d.ts
|
|
4
4
|
declare const defaultPlatforms: Array<PlatformHandler>;
|
|
5
5
|
declare const defaultStrippedParams: string[];
|
|
6
6
|
declare const defaultNormalizeOptions: NormalizeOptions;
|
|
7
|
+
declare const defaultFetch: FetchFn;
|
|
8
|
+
declare const defaultParser: ParserAdapter<FeedsmithFeed>;
|
|
7
9
|
declare const defaultTiers: Array<Tier>;
|
|
8
10
|
//#endregion
|
|
9
|
-
export { defaultNormalizeOptions, defaultPlatforms, defaultStrippedParams, defaultTiers };
|
|
11
|
+
export { defaultFetch, defaultNormalizeOptions, defaultParser, defaultPlatforms, defaultStrippedParams, defaultTiers };
|
package/dist/defaults.js
CHANGED
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import { feedburnerHandler } from "./platforms/feedburner.js";
|
|
2
|
+
import { parseFeed } from "feedsmith";
|
|
2
3
|
|
|
3
4
|
//#region src/defaults.ts
|
|
4
5
|
const defaultPlatforms = [feedburnerHandler];
|
|
@@ -31,6 +32,7 @@ const defaultStrippedParams = [
|
|
|
31
32
|
"_ke",
|
|
32
33
|
"mc_cid",
|
|
33
34
|
"mc_eid",
|
|
35
|
+
"mc_tc",
|
|
34
36
|
"mkt_tok",
|
|
35
37
|
"msclkid",
|
|
36
38
|
"twclid",
|
|
@@ -63,15 +65,20 @@ const defaultStrippedParams = [
|
|
|
63
65
|
"hsa_tgt",
|
|
64
66
|
"hsa_ver",
|
|
65
67
|
"hsCtaTracking",
|
|
68
|
+
"_hsenc",
|
|
69
|
+
"_hsmi",
|
|
66
70
|
"__hstc",
|
|
67
71
|
"__hsfp",
|
|
68
72
|
"__hssc",
|
|
69
73
|
"cid",
|
|
70
74
|
"s_kwcid",
|
|
75
|
+
"sc_cid",
|
|
71
76
|
"ef_id",
|
|
72
77
|
"obOrigUrl",
|
|
73
78
|
"dicbo",
|
|
74
79
|
"yclid",
|
|
80
|
+
"ysclid",
|
|
81
|
+
"_openstat",
|
|
75
82
|
"awinaffid",
|
|
76
83
|
"awinmid",
|
|
77
84
|
"clickref",
|
|
@@ -104,7 +111,6 @@ const defaultStrippedParams = [
|
|
|
104
111
|
"ts",
|
|
105
112
|
"cb",
|
|
106
113
|
"cachebuster",
|
|
107
|
-
"cHash",
|
|
108
114
|
"nocache",
|
|
109
115
|
"rand",
|
|
110
116
|
"random",
|
|
@@ -114,10 +120,6 @@ const defaultStrippedParams = [
|
|
|
114
120
|
"cleancache",
|
|
115
121
|
"rebuildcache",
|
|
116
122
|
"kontrol_health_check_timestamp",
|
|
117
|
-
"userab",
|
|
118
|
-
"_x_tr_sl",
|
|
119
|
-
"_x_tr_tl",
|
|
120
|
-
"_x_tr_hl",
|
|
121
123
|
"action_object_map",
|
|
122
124
|
"action_ref_map",
|
|
123
125
|
"action_type_map",
|
|
@@ -130,6 +132,10 @@ const defaultStrippedParams = [
|
|
|
130
132
|
"at_custom4",
|
|
131
133
|
"at_medium",
|
|
132
134
|
"at_preview_index",
|
|
135
|
+
"_bhlid",
|
|
136
|
+
"_branch_match_id",
|
|
137
|
+
"_branch_referrer",
|
|
138
|
+
"__readwiseLocation",
|
|
133
139
|
"campaign_id",
|
|
134
140
|
"click_sum",
|
|
135
141
|
"fref",
|
|
@@ -161,12 +167,42 @@ const defaultNormalizeOptions = {
|
|
|
161
167
|
stripHash: true,
|
|
162
168
|
sortQueryParams: true,
|
|
163
169
|
stripQueryParams: defaultStrippedParams,
|
|
170
|
+
stripQuery: false,
|
|
164
171
|
stripEmptyQuery: true,
|
|
165
172
|
normalizeEncoding: true,
|
|
166
|
-
lowercaseHostname: true,
|
|
167
173
|
normalizeUnicode: true,
|
|
168
174
|
convertToPunycode: true
|
|
169
175
|
};
|
|
176
|
+
const defaultFetch = async (url, options) => {
|
|
177
|
+
const response = await fetch(url, {
|
|
178
|
+
method: options?.method ?? "GET",
|
|
179
|
+
headers: options?.headers
|
|
180
|
+
});
|
|
181
|
+
return {
|
|
182
|
+
headers: response.headers,
|
|
183
|
+
body: await response.text(),
|
|
184
|
+
url: response.url,
|
|
185
|
+
status: response.status
|
|
186
|
+
};
|
|
187
|
+
};
|
|
188
|
+
const defaultParser = {
|
|
189
|
+
parse: (body) => {
|
|
190
|
+
try {
|
|
191
|
+
return parseFeed(body);
|
|
192
|
+
} catch {}
|
|
193
|
+
},
|
|
194
|
+
getSelfUrl: (parsed) => {
|
|
195
|
+
switch (parsed.format) {
|
|
196
|
+
case "atom": return parsed.feed.links?.find((link) => link.rel === "self")?.href;
|
|
197
|
+
case "rss":
|
|
198
|
+
case "rdf": return parsed.feed.atom?.links?.find((link) => link.rel === "self")?.href;
|
|
199
|
+
case "json": return parsed.feed.feed_url;
|
|
200
|
+
}
|
|
201
|
+
},
|
|
202
|
+
getSignature: (parsed) => {
|
|
203
|
+
return parsed.feed;
|
|
204
|
+
}
|
|
205
|
+
};
|
|
170
206
|
const defaultTiers = [
|
|
171
207
|
{
|
|
172
208
|
stripProtocol: false,
|
|
@@ -177,9 +213,9 @@ const defaultTiers = [
|
|
|
177
213
|
collapseSlashes: true,
|
|
178
214
|
stripHash: true,
|
|
179
215
|
sortQueryParams: true,
|
|
216
|
+
stripQuery: false,
|
|
180
217
|
stripEmptyQuery: true,
|
|
181
218
|
normalizeEncoding: true,
|
|
182
|
-
lowercaseHostname: true,
|
|
183
219
|
normalizeUnicode: true,
|
|
184
220
|
convertToPunycode: true
|
|
185
221
|
},
|
|
@@ -192,9 +228,9 @@ const defaultTiers = [
|
|
|
192
228
|
collapseSlashes: true,
|
|
193
229
|
stripHash: true,
|
|
194
230
|
sortQueryParams: true,
|
|
231
|
+
stripQuery: false,
|
|
195
232
|
stripEmptyQuery: true,
|
|
196
233
|
normalizeEncoding: true,
|
|
197
|
-
lowercaseHostname: true,
|
|
198
234
|
normalizeUnicode: true,
|
|
199
235
|
convertToPunycode: true
|
|
200
236
|
},
|
|
@@ -207,13 +243,13 @@ const defaultTiers = [
|
|
|
207
243
|
collapseSlashes: true,
|
|
208
244
|
stripHash: true,
|
|
209
245
|
sortQueryParams: true,
|
|
246
|
+
stripQuery: false,
|
|
210
247
|
stripEmptyQuery: true,
|
|
211
248
|
normalizeEncoding: true,
|
|
212
|
-
lowercaseHostname: true,
|
|
213
249
|
normalizeUnicode: true,
|
|
214
250
|
convertToPunycode: true
|
|
215
251
|
}
|
|
216
252
|
];
|
|
217
253
|
|
|
218
254
|
//#endregion
|
|
219
|
-
export { defaultNormalizeOptions, defaultPlatforms, defaultStrippedParams, defaultTiers };
|
|
255
|
+
export { defaultFetch, defaultNormalizeOptions, defaultParser, defaultPlatforms, defaultStrippedParams, defaultTiers };
|
package/dist/exports.cjs
CHANGED
|
@@ -4,11 +4,12 @@ const require_utils = require('./utils.cjs');
|
|
|
4
4
|
const require_index = require('./index.cjs');
|
|
5
5
|
|
|
6
6
|
exports.addMissingProtocol = require_utils.addMissingProtocol;
|
|
7
|
+
exports.defaultFetch = require_defaults.defaultFetch;
|
|
8
|
+
exports.defaultParser = require_defaults.defaultParser;
|
|
7
9
|
exports.defaultPlatforms = require_defaults.defaultPlatforms;
|
|
8
10
|
exports.defaultStrippedParams = require_defaults.defaultStrippedParams;
|
|
9
11
|
exports.defaultTiers = require_defaults.defaultTiers;
|
|
10
12
|
exports.feedburnerHandler = require_feedburner.feedburnerHandler;
|
|
11
|
-
exports.feedsmithParser = require_utils.feedsmithParser;
|
|
12
13
|
exports.findCanonical = require_index.findCanonical;
|
|
13
14
|
exports.normalizeUrl = require_utils.normalizeUrl;
|
|
14
15
|
exports.resolveFeedProtocol = require_utils.resolveFeedProtocol;
|
package/dist/exports.d.cts
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import { ExistsFn, FeedsmithFeed, FetchFn, FetchFnOptions, FetchFnResponse, FindCanonicalOptions, NormalizeOptions, OnExistsFn, OnFetchFn, OnMatchFn, ParserAdapter, PlatformHandler } from "./types.cjs";
|
|
2
|
-
import { defaultPlatforms, defaultStrippedParams, defaultTiers } from "./defaults.cjs";
|
|
2
|
+
import { defaultFetch, defaultParser, defaultPlatforms, defaultStrippedParams, defaultTiers } from "./defaults.cjs";
|
|
3
3
|
import { findCanonical } from "./index.cjs";
|
|
4
4
|
import { feedburnerHandler } from "./platforms/feedburner.cjs";
|
|
5
|
-
import { addMissingProtocol,
|
|
6
|
-
export { type ExistsFn, type FeedsmithFeed, type FetchFn, type FetchFnOptions, type FetchFnResponse, type FindCanonicalOptions, type NormalizeOptions, type OnExistsFn, type OnFetchFn, type OnMatchFn, type ParserAdapter, type PlatformHandler, addMissingProtocol, defaultPlatforms, defaultStrippedParams, defaultTiers, feedburnerHandler,
|
|
5
|
+
import { addMissingProtocol, normalizeUrl, resolveFeedProtocol, resolveUrl } from "./utils.cjs";
|
|
6
|
+
export { type ExistsFn, type FeedsmithFeed, type FetchFn, type FetchFnOptions, type FetchFnResponse, type FindCanonicalOptions, type NormalizeOptions, type OnExistsFn, type OnFetchFn, type OnMatchFn, type ParserAdapter, type PlatformHandler, addMissingProtocol, defaultFetch, defaultParser, defaultPlatforms, defaultStrippedParams, defaultTiers, feedburnerHandler, findCanonical, normalizeUrl, resolveFeedProtocol, resolveUrl };
|
package/dist/exports.d.ts
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import { ExistsFn, FeedsmithFeed, FetchFn, FetchFnOptions, FetchFnResponse, FindCanonicalOptions, NormalizeOptions, OnExistsFn, OnFetchFn, OnMatchFn, ParserAdapter, PlatformHandler } from "./types.js";
|
|
2
|
-
import { defaultPlatforms, defaultStrippedParams, defaultTiers } from "./defaults.js";
|
|
2
|
+
import { defaultFetch, defaultParser, defaultPlatforms, defaultStrippedParams, defaultTiers } from "./defaults.js";
|
|
3
3
|
import { findCanonical } from "./index.js";
|
|
4
4
|
import { feedburnerHandler } from "./platforms/feedburner.js";
|
|
5
|
-
import { addMissingProtocol,
|
|
6
|
-
export { type ExistsFn, type FeedsmithFeed, type FetchFn, type FetchFnOptions, type FetchFnResponse, type FindCanonicalOptions, type NormalizeOptions, type OnExistsFn, type OnFetchFn, type OnMatchFn, type ParserAdapter, type PlatformHandler, addMissingProtocol, defaultPlatforms, defaultStrippedParams, defaultTiers, feedburnerHandler,
|
|
5
|
+
import { addMissingProtocol, normalizeUrl, resolveFeedProtocol, resolveUrl } from "./utils.js";
|
|
6
|
+
export { type ExistsFn, type FeedsmithFeed, type FetchFn, type FetchFnOptions, type FetchFnResponse, type FindCanonicalOptions, type NormalizeOptions, type OnExistsFn, type OnFetchFn, type OnMatchFn, type ParserAdapter, type PlatformHandler, addMissingProtocol, defaultFetch, defaultParser, defaultPlatforms, defaultStrippedParams, defaultTiers, feedburnerHandler, findCanonical, normalizeUrl, resolveFeedProtocol, resolveUrl };
|
package/dist/exports.js
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import { feedburnerHandler } from "./platforms/feedburner.js";
|
|
2
|
-
import { defaultPlatforms, defaultStrippedParams, defaultTiers } from "./defaults.js";
|
|
3
|
-
import { addMissingProtocol,
|
|
2
|
+
import { defaultFetch, defaultParser, defaultPlatforms, defaultStrippedParams, defaultTiers } from "./defaults.js";
|
|
3
|
+
import { addMissingProtocol, normalizeUrl, resolveFeedProtocol, resolveUrl } from "./utils.js";
|
|
4
4
|
import { findCanonical } from "./index.js";
|
|
5
5
|
|
|
6
|
-
export { addMissingProtocol, defaultPlatforms, defaultStrippedParams, defaultTiers, feedburnerHandler,
|
|
6
|
+
export { addMissingProtocol, defaultFetch, defaultParser, defaultPlatforms, defaultStrippedParams, defaultTiers, feedburnerHandler, findCanonical, normalizeUrl, resolveFeedProtocol, resolveUrl };
|
package/dist/index.cjs
CHANGED
|
@@ -3,7 +3,7 @@ const require_utils = require('./utils.cjs');
|
|
|
3
3
|
|
|
4
4
|
//#region src/index.ts
|
|
5
5
|
async function findCanonical(inputUrl, options) {
|
|
6
|
-
const { parser =
|
|
6
|
+
const { parser = require_defaults.defaultParser, fetchFn = require_defaults.defaultFetch, existsFn, tiers = require_defaults.defaultTiers, platforms = require_defaults.defaultPlatforms, stripQueryParams = require_defaults.defaultStrippedParams, onFetch, onMatch, onExists } = options ?? {};
|
|
7
7
|
const stripParams = (url) => {
|
|
8
8
|
return stripQueryParams?.length ? require_utils.normalizeUrl(url, {
|
|
9
9
|
stripQueryParams,
|
|
@@ -35,7 +35,7 @@ async function findCanonical(inputUrl, options) {
|
|
|
35
35
|
if (!initialResponseBody) return;
|
|
36
36
|
let initialResponseSignature;
|
|
37
37
|
let selfRequestUrl;
|
|
38
|
-
const initialResponseFeed = parser.parse(initialResponseBody);
|
|
38
|
+
const initialResponseFeed = await parser.parse(initialResponseBody);
|
|
39
39
|
if (!initialResponseFeed) return;
|
|
40
40
|
onMatch?.({
|
|
41
41
|
url: initialRequestUrl,
|
|
@@ -47,13 +47,13 @@ async function findCanonical(inputUrl, options) {
|
|
|
47
47
|
selfRequestUrl = resolveAndApplyPlatformHandlers(selfRequestUrlRaw, initialResponseUrl);
|
|
48
48
|
selfRequestUrl = selfRequestUrl ? stripParams(selfRequestUrl) : void 0;
|
|
49
49
|
}
|
|
50
|
-
const compareWithInitialResponse = (comparedResponseBody) => {
|
|
50
|
+
const compareWithInitialResponse = async (comparedResponseBody) => {
|
|
51
51
|
if (!comparedResponseBody) return false;
|
|
52
52
|
if (initialResponseBody === comparedResponseBody) return true;
|
|
53
|
-
const comparedResponseFeed = parser.parse(comparedResponseBody);
|
|
53
|
+
const comparedResponseFeed = await parser.parse(comparedResponseBody);
|
|
54
54
|
if (comparedResponseFeed) {
|
|
55
|
-
initialResponseSignature ||=
|
|
56
|
-
const comparedResponseSignature =
|
|
55
|
+
initialResponseSignature ||= JSON.stringify(parser.getSignature(initialResponseFeed));
|
|
56
|
+
const comparedResponseSignature = JSON.stringify(parser.getSignature(comparedResponseFeed));
|
|
57
57
|
return initialResponseSignature === comparedResponseSignature;
|
|
58
58
|
}
|
|
59
59
|
return false;
|
|
@@ -70,7 +70,7 @@ async function findCanonical(inputUrl, options) {
|
|
|
70
70
|
response
|
|
71
71
|
});
|
|
72
72
|
if (response.status < 200 || response.status >= 300) return;
|
|
73
|
-
if (!compareWithInitialResponse(response.body)) return;
|
|
73
|
+
if (!await compareWithInitialResponse(response.body)) return;
|
|
74
74
|
return response;
|
|
75
75
|
};
|
|
76
76
|
let variantSourceUrl = initialResponseUrl;
|
package/dist/index.js
CHANGED
|
@@ -1,9 +1,9 @@
|
|
|
1
|
-
import { defaultPlatforms, defaultStrippedParams, defaultTiers } from "./defaults.js";
|
|
2
|
-
import { applyPlatformHandlers,
|
|
1
|
+
import { defaultFetch, defaultParser, defaultPlatforms, defaultStrippedParams, defaultTiers } from "./defaults.js";
|
|
2
|
+
import { applyPlatformHandlers, normalizeUrl, resolveUrl } from "./utils.js";
|
|
3
3
|
|
|
4
4
|
//#region src/index.ts
|
|
5
5
|
async function findCanonical(inputUrl, options) {
|
|
6
|
-
const { parser =
|
|
6
|
+
const { parser = defaultParser, fetchFn = defaultFetch, existsFn, tiers = defaultTiers, platforms = defaultPlatforms, stripQueryParams = defaultStrippedParams, onFetch, onMatch, onExists } = options ?? {};
|
|
7
7
|
const stripParams = (url) => {
|
|
8
8
|
return stripQueryParams?.length ? normalizeUrl(url, {
|
|
9
9
|
stripQueryParams,
|
|
@@ -35,7 +35,7 @@ async function findCanonical(inputUrl, options) {
|
|
|
35
35
|
if (!initialResponseBody) return;
|
|
36
36
|
let initialResponseSignature;
|
|
37
37
|
let selfRequestUrl;
|
|
38
|
-
const initialResponseFeed = parser.parse(initialResponseBody);
|
|
38
|
+
const initialResponseFeed = await parser.parse(initialResponseBody);
|
|
39
39
|
if (!initialResponseFeed) return;
|
|
40
40
|
onMatch?.({
|
|
41
41
|
url: initialRequestUrl,
|
|
@@ -47,13 +47,13 @@ async function findCanonical(inputUrl, options) {
|
|
|
47
47
|
selfRequestUrl = resolveAndApplyPlatformHandlers(selfRequestUrlRaw, initialResponseUrl);
|
|
48
48
|
selfRequestUrl = selfRequestUrl ? stripParams(selfRequestUrl) : void 0;
|
|
49
49
|
}
|
|
50
|
-
const compareWithInitialResponse = (comparedResponseBody) => {
|
|
50
|
+
const compareWithInitialResponse = async (comparedResponseBody) => {
|
|
51
51
|
if (!comparedResponseBody) return false;
|
|
52
52
|
if (initialResponseBody === comparedResponseBody) return true;
|
|
53
|
-
const comparedResponseFeed = parser.parse(comparedResponseBody);
|
|
53
|
+
const comparedResponseFeed = await parser.parse(comparedResponseBody);
|
|
54
54
|
if (comparedResponseFeed) {
|
|
55
|
-
initialResponseSignature ||=
|
|
56
|
-
const comparedResponseSignature =
|
|
55
|
+
initialResponseSignature ||= JSON.stringify(parser.getSignature(initialResponseFeed));
|
|
56
|
+
const comparedResponseSignature = JSON.stringify(parser.getSignature(comparedResponseFeed));
|
|
57
57
|
return initialResponseSignature === comparedResponseSignature;
|
|
58
58
|
}
|
|
59
59
|
return false;
|
|
@@ -70,7 +70,7 @@ async function findCanonical(inputUrl, options) {
|
|
|
70
70
|
response
|
|
71
71
|
});
|
|
72
72
|
if (response.status < 200 || response.status >= 300) return;
|
|
73
|
-
if (!compareWithInitialResponse(response.body)) return;
|
|
73
|
+
if (!await compareWithInitialResponse(response.body)) return;
|
|
74
74
|
return response;
|
|
75
75
|
};
|
|
76
76
|
let variantSourceUrl = initialResponseUrl;
|
|
@@ -1,13 +1,13 @@
|
|
|
1
1
|
|
|
2
2
|
//#region src/platforms/feedburner.ts
|
|
3
|
-
const hosts =
|
|
3
|
+
const hosts = [
|
|
4
4
|
"feeds.feedburner.com",
|
|
5
5
|
"feeds2.feedburner.com",
|
|
6
6
|
"feedproxy.google.com"
|
|
7
|
-
]
|
|
7
|
+
];
|
|
8
8
|
const feedburnerHandler = {
|
|
9
9
|
match: (url) => {
|
|
10
|
-
return hosts.
|
|
10
|
+
return hosts.includes(url.hostname);
|
|
11
11
|
},
|
|
12
12
|
normalize: (url) => {
|
|
13
13
|
const normalized = new URL(url);
|
|
@@ -1,12 +1,12 @@
|
|
|
1
1
|
//#region src/platforms/feedburner.ts
|
|
2
|
-
const hosts =
|
|
2
|
+
const hosts = [
|
|
3
3
|
"feeds.feedburner.com",
|
|
4
4
|
"feeds2.feedburner.com",
|
|
5
5
|
"feedproxy.google.com"
|
|
6
|
-
]
|
|
6
|
+
];
|
|
7
7
|
const feedburnerHandler = {
|
|
8
8
|
match: (url) => {
|
|
9
|
-
return hosts.
|
|
9
|
+
return hosts.includes(url.hostname);
|
|
10
10
|
},
|
|
11
11
|
normalize: (url) => {
|
|
12
12
|
const normalized = new URL(url);
|
package/dist/types.d.cts
CHANGED
|
@@ -3,9 +3,9 @@ import * as feedsmith0 from "feedsmith";
|
|
|
3
3
|
//#region src/types.d.ts
|
|
4
4
|
type FeedsmithFeed = ReturnType<typeof feedsmith0.parseFeed>;
|
|
5
5
|
type ParserAdapter<T> = {
|
|
6
|
-
parse: (body: string) => T | undefined;
|
|
6
|
+
parse: (body: string) => Promise<T | undefined> | T | undefined;
|
|
7
7
|
getSelfUrl: (parsed: T) => string | undefined;
|
|
8
|
-
getSignature: (parsed: T
|
|
8
|
+
getSignature: (parsed: T) => object;
|
|
9
9
|
};
|
|
10
10
|
type PlatformHandler = {
|
|
11
11
|
match: (url: URL) => boolean;
|
|
@@ -21,9 +21,9 @@ type NormalizeOptions = {
|
|
|
21
21
|
stripHash?: boolean;
|
|
22
22
|
sortQueryParams?: boolean;
|
|
23
23
|
stripQueryParams?: Array<string>;
|
|
24
|
+
stripQuery?: boolean;
|
|
24
25
|
stripEmptyQuery?: boolean;
|
|
25
26
|
normalizeEncoding?: boolean;
|
|
26
|
-
lowercaseHostname?: boolean;
|
|
27
27
|
normalizeUnicode?: boolean;
|
|
28
28
|
convertToPunycode?: boolean;
|
|
29
29
|
};
|
package/dist/types.d.ts
CHANGED
|
@@ -3,9 +3,9 @@ import * as feedsmith0 from "feedsmith";
|
|
|
3
3
|
//#region src/types.d.ts
|
|
4
4
|
type FeedsmithFeed = ReturnType<typeof feedsmith0.parseFeed>;
|
|
5
5
|
type ParserAdapter<T> = {
|
|
6
|
-
parse: (body: string) => T | undefined;
|
|
6
|
+
parse: (body: string) => Promise<T | undefined> | T | undefined;
|
|
7
7
|
getSelfUrl: (parsed: T) => string | undefined;
|
|
8
|
-
getSignature: (parsed: T
|
|
8
|
+
getSignature: (parsed: T) => object;
|
|
9
9
|
};
|
|
10
10
|
type PlatformHandler = {
|
|
11
11
|
match: (url: URL) => boolean;
|
|
@@ -21,9 +21,9 @@ type NormalizeOptions = {
|
|
|
21
21
|
stripHash?: boolean;
|
|
22
22
|
sortQueryParams?: boolean;
|
|
23
23
|
stripQueryParams?: Array<string>;
|
|
24
|
+
stripQuery?: boolean;
|
|
24
25
|
stripEmptyQuery?: boolean;
|
|
25
26
|
normalizeEncoding?: boolean;
|
|
26
|
-
lowercaseHostname?: boolean;
|
|
27
27
|
normalizeUnicode?: boolean;
|
|
28
28
|
convertToPunycode?: boolean;
|
|
29
29
|
};
|
package/dist/utils.cjs
CHANGED
|
@@ -1,9 +1,17 @@
|
|
|
1
1
|
const require_defaults = require('./defaults.cjs');
|
|
2
2
|
let node_url = require("node:url");
|
|
3
3
|
let entities = require("entities");
|
|
4
|
-
let feedsmith = require("feedsmith");
|
|
5
4
|
|
|
6
5
|
//#region src/utils.ts
|
|
6
|
+
const strippedParamsCache = /* @__PURE__ */ new WeakMap();
|
|
7
|
+
const getStrippedParamsSet = (params) => {
|
|
8
|
+
let cached = strippedParamsCache.get(params);
|
|
9
|
+
if (!cached) {
|
|
10
|
+
cached = new Set(params.map((param) => param.toLowerCase()));
|
|
11
|
+
strippedParamsCache.set(params, cached);
|
|
12
|
+
}
|
|
13
|
+
return cached;
|
|
14
|
+
};
|
|
7
15
|
const ipv4Pattern = /^\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}$/;
|
|
8
16
|
const ipv6Pattern = /^([0-9a-f]{0,4}:){2,7}[0-9a-f]{0,4}$/i;
|
|
9
17
|
const safePathChars = /[a-zA-Z0-9._~!$&'()*+,;=:@-]/;
|
|
@@ -84,7 +92,6 @@ const normalizeUrl = (url, options = require_defaults.defaultNormalizeOptions) =
|
|
|
84
92
|
const ascii = (0, node_url.domainToASCII)(parsed.hostname);
|
|
85
93
|
if (ascii) parsed.hostname = ascii;
|
|
86
94
|
}
|
|
87
|
-
if (options.lowercaseHostname) parsed.hostname = parsed.hostname.toLowerCase();
|
|
88
95
|
if (options.stripAuthentication) {
|
|
89
96
|
parsed.username = "";
|
|
90
97
|
parsed.password = "";
|
|
@@ -97,9 +104,15 @@ const normalizeUrl = (url, options = require_defaults.defaultNormalizeOptions) =
|
|
|
97
104
|
if (options.stripTrailingSlash && pathname.length > 1 && pathname.endsWith("/")) pathname = pathname.slice(0, -1);
|
|
98
105
|
if (options.stripRootSlash && pathname === "/") pathname = "";
|
|
99
106
|
parsed.pathname = pathname;
|
|
100
|
-
if (options.
|
|
107
|
+
if (options.stripQuery) parsed.search = "";
|
|
108
|
+
if (options.stripQueryParams && parsed.search) {
|
|
109
|
+
const strippedSet = getStrippedParamsSet(options.stripQueryParams);
|
|
110
|
+
const paramsToDelete = [];
|
|
111
|
+
for (const [key] of parsed.searchParams) if (strippedSet.has(key.toLowerCase())) paramsToDelete.push(key);
|
|
112
|
+
for (const param of paramsToDelete) parsed.searchParams.delete(param);
|
|
113
|
+
}
|
|
101
114
|
if (options.sortQueryParams) parsed.searchParams.sort();
|
|
102
|
-
if (options.stripEmptyQuery && parsed.
|
|
115
|
+
if (options.stripEmptyQuery && parsed.href.endsWith("?")) parsed.search = "";
|
|
103
116
|
let result = parsed.href;
|
|
104
117
|
if (options.stripProtocol) result = result.replace(/^https?:\/\//, "");
|
|
105
118
|
return result;
|
|
@@ -107,18 +120,6 @@ const normalizeUrl = (url, options = require_defaults.defaultNormalizeOptions) =
|
|
|
107
120
|
return url;
|
|
108
121
|
}
|
|
109
122
|
};
|
|
110
|
-
const nativeFetch = async (url, options) => {
|
|
111
|
-
const response = await fetch(url, {
|
|
112
|
-
method: options?.method ?? "GET",
|
|
113
|
-
headers: options?.headers
|
|
114
|
-
});
|
|
115
|
-
return {
|
|
116
|
-
headers: response.headers,
|
|
117
|
-
body: await response.text(),
|
|
118
|
-
url: response.url,
|
|
119
|
-
status: response.status
|
|
120
|
-
};
|
|
121
|
-
};
|
|
122
123
|
const applyPlatformHandlers = (url, platforms) => {
|
|
123
124
|
try {
|
|
124
125
|
let parsed = new URL(url);
|
|
@@ -131,46 +132,10 @@ const applyPlatformHandlers = (url, platforms) => {
|
|
|
131
132
|
return url;
|
|
132
133
|
}
|
|
133
134
|
};
|
|
134
|
-
const findSelfLink = (parsed) => {
|
|
135
|
-
switch (parsed.format) {
|
|
136
|
-
case "atom": return parsed.feed.links?.find((link) => link.rel === "self");
|
|
137
|
-
case "rss":
|
|
138
|
-
case "rdf": return parsed.feed.atom?.links?.find((link) => link.rel === "self");
|
|
139
|
-
}
|
|
140
|
-
};
|
|
141
|
-
const feedsmithParser = {
|
|
142
|
-
parse: (body) => {
|
|
143
|
-
try {
|
|
144
|
-
return (0, feedsmith.parseFeed)(body);
|
|
145
|
-
} catch {}
|
|
146
|
-
},
|
|
147
|
-
getSelfUrl: (parsed) => {
|
|
148
|
-
return parsed.format === "json" ? parsed.feed.feed_url : findSelfLink(parsed)?.href;
|
|
149
|
-
},
|
|
150
|
-
getSignature: (parsed, selfUrl) => {
|
|
151
|
-
if (!selfUrl) return JSON.stringify(parsed.feed);
|
|
152
|
-
if (parsed.format === "json") {
|
|
153
|
-
const original$1 = parsed.feed.feed_url;
|
|
154
|
-
parsed.feed.feed_url = void 0;
|
|
155
|
-
const signature$1 = JSON.stringify(parsed.feed);
|
|
156
|
-
parsed.feed.feed_url = original$1;
|
|
157
|
-
return signature$1;
|
|
158
|
-
}
|
|
159
|
-
const link = findSelfLink(parsed);
|
|
160
|
-
if (!link) return JSON.stringify(parsed.feed);
|
|
161
|
-
const original = link.href;
|
|
162
|
-
link.href = void 0;
|
|
163
|
-
const signature = JSON.stringify(parsed.feed);
|
|
164
|
-
link.href = original;
|
|
165
|
-
return signature;
|
|
166
|
-
}
|
|
167
|
-
};
|
|
168
135
|
|
|
169
136
|
//#endregion
|
|
170
137
|
exports.addMissingProtocol = addMissingProtocol;
|
|
171
138
|
exports.applyPlatformHandlers = applyPlatformHandlers;
|
|
172
|
-
exports.feedsmithParser = feedsmithParser;
|
|
173
|
-
exports.nativeFetch = nativeFetch;
|
|
174
139
|
exports.normalizeUrl = normalizeUrl;
|
|
175
140
|
exports.resolveFeedProtocol = resolveFeedProtocol;
|
|
176
141
|
exports.resolveUrl = resolveUrl;
|
package/dist/utils.d.cts
CHANGED
|
@@ -1,10 +1,9 @@
|
|
|
1
|
-
import {
|
|
1
|
+
import { NormalizeOptions } from "./types.cjs";
|
|
2
2
|
|
|
3
3
|
//#region src/utils.d.ts
|
|
4
4
|
declare const resolveFeedProtocol: (url: string, protocol?: "http" | "https") => string;
|
|
5
5
|
declare const addMissingProtocol: (url: string, protocol?: "http" | "https") => string;
|
|
6
6
|
declare const resolveUrl: (url: string, base?: string) => string | undefined;
|
|
7
7
|
declare const normalizeUrl: (url: string, options?: NormalizeOptions) => string;
|
|
8
|
-
declare const feedsmithParser: ParserAdapter<FeedsmithFeed>;
|
|
9
8
|
//#endregion
|
|
10
|
-
export { addMissingProtocol,
|
|
9
|
+
export { addMissingProtocol, normalizeUrl, resolveFeedProtocol, resolveUrl };
|
package/dist/utils.d.ts
CHANGED
|
@@ -1,10 +1,9 @@
|
|
|
1
|
-
import {
|
|
1
|
+
import { NormalizeOptions } from "./types.js";
|
|
2
2
|
|
|
3
3
|
//#region src/utils.d.ts
|
|
4
4
|
declare const resolveFeedProtocol: (url: string, protocol?: "http" | "https") => string;
|
|
5
5
|
declare const addMissingProtocol: (url: string, protocol?: "http" | "https") => string;
|
|
6
6
|
declare const resolveUrl: (url: string, base?: string) => string | undefined;
|
|
7
7
|
declare const normalizeUrl: (url: string, options?: NormalizeOptions) => string;
|
|
8
|
-
declare const feedsmithParser: ParserAdapter<FeedsmithFeed>;
|
|
9
8
|
//#endregion
|
|
10
|
-
export { addMissingProtocol,
|
|
9
|
+
export { addMissingProtocol, normalizeUrl, resolveFeedProtocol, resolveUrl };
|
package/dist/utils.js
CHANGED
|
@@ -1,9 +1,17 @@
|
|
|
1
1
|
import { defaultNormalizeOptions } from "./defaults.js";
|
|
2
2
|
import { domainToASCII } from "node:url";
|
|
3
3
|
import { decodeHTML } from "entities";
|
|
4
|
-
import { parseFeed } from "feedsmith";
|
|
5
4
|
|
|
6
5
|
//#region src/utils.ts
|
|
6
|
+
const strippedParamsCache = /* @__PURE__ */ new WeakMap();
|
|
7
|
+
const getStrippedParamsSet = (params) => {
|
|
8
|
+
let cached = strippedParamsCache.get(params);
|
|
9
|
+
if (!cached) {
|
|
10
|
+
cached = new Set(params.map((param) => param.toLowerCase()));
|
|
11
|
+
strippedParamsCache.set(params, cached);
|
|
12
|
+
}
|
|
13
|
+
return cached;
|
|
14
|
+
};
|
|
7
15
|
const ipv4Pattern = /^\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}$/;
|
|
8
16
|
const ipv6Pattern = /^([0-9a-f]{0,4}:){2,7}[0-9a-f]{0,4}$/i;
|
|
9
17
|
const safePathChars = /[a-zA-Z0-9._~!$&'()*+,;=:@-]/;
|
|
@@ -84,7 +92,6 @@ const normalizeUrl = (url, options = defaultNormalizeOptions) => {
|
|
|
84
92
|
const ascii = domainToASCII(parsed.hostname);
|
|
85
93
|
if (ascii) parsed.hostname = ascii;
|
|
86
94
|
}
|
|
87
|
-
if (options.lowercaseHostname) parsed.hostname = parsed.hostname.toLowerCase();
|
|
88
95
|
if (options.stripAuthentication) {
|
|
89
96
|
parsed.username = "";
|
|
90
97
|
parsed.password = "";
|
|
@@ -97,9 +104,15 @@ const normalizeUrl = (url, options = defaultNormalizeOptions) => {
|
|
|
97
104
|
if (options.stripTrailingSlash && pathname.length > 1 && pathname.endsWith("/")) pathname = pathname.slice(0, -1);
|
|
98
105
|
if (options.stripRootSlash && pathname === "/") pathname = "";
|
|
99
106
|
parsed.pathname = pathname;
|
|
100
|
-
if (options.
|
|
107
|
+
if (options.stripQuery) parsed.search = "";
|
|
108
|
+
if (options.stripQueryParams && parsed.search) {
|
|
109
|
+
const strippedSet = getStrippedParamsSet(options.stripQueryParams);
|
|
110
|
+
const paramsToDelete = [];
|
|
111
|
+
for (const [key] of parsed.searchParams) if (strippedSet.has(key.toLowerCase())) paramsToDelete.push(key);
|
|
112
|
+
for (const param of paramsToDelete) parsed.searchParams.delete(param);
|
|
113
|
+
}
|
|
101
114
|
if (options.sortQueryParams) parsed.searchParams.sort();
|
|
102
|
-
if (options.stripEmptyQuery && parsed.
|
|
115
|
+
if (options.stripEmptyQuery && parsed.href.endsWith("?")) parsed.search = "";
|
|
103
116
|
let result = parsed.href;
|
|
104
117
|
if (options.stripProtocol) result = result.replace(/^https?:\/\//, "");
|
|
105
118
|
return result;
|
|
@@ -107,18 +120,6 @@ const normalizeUrl = (url, options = defaultNormalizeOptions) => {
|
|
|
107
120
|
return url;
|
|
108
121
|
}
|
|
109
122
|
};
|
|
110
|
-
const nativeFetch = async (url, options) => {
|
|
111
|
-
const response = await fetch(url, {
|
|
112
|
-
method: options?.method ?? "GET",
|
|
113
|
-
headers: options?.headers
|
|
114
|
-
});
|
|
115
|
-
return {
|
|
116
|
-
headers: response.headers,
|
|
117
|
-
body: await response.text(),
|
|
118
|
-
url: response.url,
|
|
119
|
-
status: response.status
|
|
120
|
-
};
|
|
121
|
-
};
|
|
122
123
|
const applyPlatformHandlers = (url, platforms) => {
|
|
123
124
|
try {
|
|
124
125
|
let parsed = new URL(url);
|
|
@@ -131,40 +132,6 @@ const applyPlatformHandlers = (url, platforms) => {
|
|
|
131
132
|
return url;
|
|
132
133
|
}
|
|
133
134
|
};
|
|
134
|
-
const findSelfLink = (parsed) => {
|
|
135
|
-
switch (parsed.format) {
|
|
136
|
-
case "atom": return parsed.feed.links?.find((link) => link.rel === "self");
|
|
137
|
-
case "rss":
|
|
138
|
-
case "rdf": return parsed.feed.atom?.links?.find((link) => link.rel === "self");
|
|
139
|
-
}
|
|
140
|
-
};
|
|
141
|
-
const feedsmithParser = {
|
|
142
|
-
parse: (body) => {
|
|
143
|
-
try {
|
|
144
|
-
return parseFeed(body);
|
|
145
|
-
} catch {}
|
|
146
|
-
},
|
|
147
|
-
getSelfUrl: (parsed) => {
|
|
148
|
-
return parsed.format === "json" ? parsed.feed.feed_url : findSelfLink(parsed)?.href;
|
|
149
|
-
},
|
|
150
|
-
getSignature: (parsed, selfUrl) => {
|
|
151
|
-
if (!selfUrl) return JSON.stringify(parsed.feed);
|
|
152
|
-
if (parsed.format === "json") {
|
|
153
|
-
const original$1 = parsed.feed.feed_url;
|
|
154
|
-
parsed.feed.feed_url = void 0;
|
|
155
|
-
const signature$1 = JSON.stringify(parsed.feed);
|
|
156
|
-
parsed.feed.feed_url = original$1;
|
|
157
|
-
return signature$1;
|
|
158
|
-
}
|
|
159
|
-
const link = findSelfLink(parsed);
|
|
160
|
-
if (!link) return JSON.stringify(parsed.feed);
|
|
161
|
-
const original = link.href;
|
|
162
|
-
link.href = void 0;
|
|
163
|
-
const signature = JSON.stringify(parsed.feed);
|
|
164
|
-
link.href = original;
|
|
165
|
-
return signature;
|
|
166
|
-
}
|
|
167
|
-
};
|
|
168
135
|
|
|
169
136
|
//#endregion
|
|
170
|
-
export { addMissingProtocol, applyPlatformHandlers,
|
|
137
|
+
export { addMissingProtocol, applyPlatformHandlers, normalizeUrl, resolveFeedProtocol, resolveUrl };
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "feedcanon",
|
|
3
|
-
"description": "Find the canonical URL for any web feed by comparing actual content. Turn messy feed URLs into their cleanest
|
|
3
|
+
"description": "Find the canonical URL for any web feed by comparing actual content. Turn messy feed URLs into their cleanest form.",
|
|
4
4
|
"repository": {
|
|
5
5
|
"type": "git",
|
|
6
6
|
"url": "https://github.com/macieklamberski/feedcanon.git"
|
|
@@ -49,7 +49,9 @@
|
|
|
49
49
|
],
|
|
50
50
|
"scripts": {
|
|
51
51
|
"prepare": "lefthook install",
|
|
52
|
-
"build": "tsdown src/exports.ts src/defaults.ts --format cjs,esm --dts --clean --unbundle --no-fixed-extension"
|
|
52
|
+
"build": "tsdown src/exports.ts src/defaults.ts --format cjs,esm --dts --clean --unbundle --no-fixed-extension",
|
|
53
|
+
"docs:dev": "vitepress dev docs",
|
|
54
|
+
"docs:build": "vitepress build docs"
|
|
53
55
|
},
|
|
54
56
|
"dependencies": {
|
|
55
57
|
"entities": "^7.0.0",
|
|
@@ -58,7 +60,8 @@
|
|
|
58
60
|
"devDependencies": {
|
|
59
61
|
"@types/bun": "^1.3.5",
|
|
60
62
|
"kvalita": "1.9.0",
|
|
61
|
-
"tsdown": "^0.18.
|
|
63
|
+
"tsdown": "^0.18.3",
|
|
64
|
+
"vitepress": "^1.6.4"
|
|
62
65
|
},
|
|
63
|
-
"version": "1.0.0
|
|
66
|
+
"version": "1.0.0"
|
|
64
67
|
}
|