feedcanon 1.0.0-beta.2 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +62 -49
- package/dist/defaults.cjs +48 -7
- package/dist/defaults.d.cts +4 -2
- package/dist/defaults.d.ts +4 -2
- package/dist/defaults.js +47 -8
- package/dist/exports.cjs +2 -1
- package/dist/exports.d.cts +3 -3
- package/dist/exports.d.ts +3 -3
- package/dist/exports.js +3 -3
- package/dist/index.cjs +7 -7
- package/dist/index.js +9 -9
- package/dist/platforms/feedburner.cjs +3 -3
- package/dist/platforms/feedburner.js +3 -3
- package/dist/types.d.cts +3 -3
- package/dist/types.d.ts +3 -3
- package/dist/utils.cjs +17 -37
- package/dist/utils.d.cts +2 -3
- package/dist/utils.d.ts +2 -3
- package/dist/utils.js +18 -36
- package/package.json +7 -4
package/README.md
CHANGED
|
@@ -4,22 +4,59 @@
|
|
|
4
4
|
[](https://www.npmjs.com/package/feedcanon)
|
|
5
5
|
[](https://github.com/macieklamberski/feedcanon/blob/main/LICENSE)
|
|
6
6
|
|
|
7
|
-
Find the canonical URL for any web feed by comparing actual content. Turn messy feed URLs into their cleanest
|
|
7
|
+
Find the canonical URL for any web feed by comparing actual content. Turn messy feed URLs into their cleanest form.
|
|
8
8
|
|
|
9
|
-
Many URLs can point to the same feed
|
|
9
|
+
Many URLs can point to the same feed, varying by protocol, www prefixes, trailing slashes, order of params, or domain aliases. Feedcanon compares actual feed content, respects the feed's declared self URL, and tests simpler URL alternatives to find the cleanest working one.
|
|
10
|
+
|
|
11
|
+
Perfect for feed readers to deduplicate subscriptions when users add the same feed via different URLs.
|
|
12
|
+
|
|
13
|
+
**[Read full docs ↗](https://feedcanon.dev)**
|
|
14
|
+
·
|
|
15
|
+
[Quick Start](#quick-start)
|
|
10
16
|
|
|
11
17
|
---
|
|
12
18
|
|
|
13
|
-
##
|
|
19
|
+
## Example
|
|
20
|
+
|
|
21
|
+
The 9 URLs below all work and return identical content. None redirect to each other, normally making each appear unique. Feedcanon compares content, normalizes URLs and resolves them to a single URL.
|
|
22
|
+
|
|
23
|
+
```dockerfile
|
|
24
|
+
'http://feeds.kottke.org/main' ──────────┐
|
|
25
|
+
'http://feeds.kottke.org/main/' ─────────┤
|
|
26
|
+
'https://feeds.kottke.org/main' ─────────┤
|
|
27
|
+
'https://feeds.kottke.org/main/' ────────┤
|
|
28
|
+
'https://feeds.kottke.org///main/' ──────┼──→ 'https://feeds.kottke.org/main'
|
|
29
|
+
'http://feeds.feedburner.com/kottke' ────┤
|
|
30
|
+
'http://feeds.feedburner.com/kottke/' ───┤
|
|
31
|
+
'https://feeds.feedburner.com/kottke' ───┤
|
|
32
|
+
'https://feeds.feedburner.com/kottke/' ──┘
|
|
33
|
+
```
|
|
34
|
+
|
|
35
|
+
## Overview
|
|
36
|
+
|
|
37
|
+
### How It Works
|
|
14
38
|
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
39
|
+
1. Fetch the input URL and parse the feed to establish reference content.
|
|
40
|
+
2. Extract the feed's declared self URL and validate it serves identical content.
|
|
41
|
+
3. Generate URL variants ordered from cleanest to least clean.
|
|
42
|
+
4. Test variants in order—the first one serving identical content wins.
|
|
43
|
+
5. Upgrade HTTP to HTTPS if both serve identical content.
|
|
44
|
+
|
|
45
|
+
### Customization
|
|
46
|
+
|
|
47
|
+
Feedcanon is designed to be flexible. Every major component can be replaced or extended.
|
|
48
|
+
|
|
49
|
+
- **Progress callbacks** — monitor the process with `onFetch`, `onMatch`, and `onExists` callbacks.
|
|
50
|
+
- **Database lookup** — use `existsFn` to check if a URL already exists in your database.
|
|
51
|
+
- **Custom fetch** — use your own HTTP client (Axios, Got, Ky, etc.)
|
|
52
|
+
- **Custom parser** — bring your own parser (Feedsmith by default).
|
|
53
|
+
- **Custom tiers** — define your own URL normalization variants.
|
|
54
|
+
- **Custom platforms** — add handlers to normalize domain aliases (like FeedBurner).
|
|
20
55
|
|
|
21
56
|
## Quick Start
|
|
22
57
|
|
|
58
|
+
Basic installation and common usage patterns. For a full overview, visit the [documentation website](https://feedcanon.dev).
|
|
59
|
+
|
|
23
60
|
### Installation
|
|
24
61
|
|
|
25
62
|
```bash
|
|
@@ -28,61 +65,37 @@ npm install feedcanon
|
|
|
28
65
|
|
|
29
66
|
### Basic Usage
|
|
30
67
|
|
|
68
|
+
When you just need to clean up a feed URL and get its canonical form.
|
|
69
|
+
|
|
31
70
|
```typescript
|
|
32
71
|
import { findCanonical } from 'feedcanon'
|
|
33
72
|
|
|
34
|
-
const url = await findCanonical('
|
|
73
|
+
const url = await findCanonical('http://www.example.com/feed/?utm_source=twitter')
|
|
35
74
|
|
|
36
75
|
// 'https://example.com/feed'
|
|
37
76
|
```
|
|
38
77
|
|
|
39
|
-
|
|
78
|
+
Returns `undefined` if the feed is invalid or unreachable.
|
|
40
79
|
|
|
41
|
-
|
|
42
|
-
import { findCanonical } from 'feedcanon'
|
|
80
|
+
### Using Callbacks
|
|
43
81
|
|
|
44
|
-
|
|
45
|
-
onFetch: ({ url, response }) => {
|
|
46
|
-
console.log('Fetched:', url, response.status)
|
|
47
|
-
},
|
|
48
|
-
onMatch: ({ url, feed }) => {
|
|
49
|
-
console.log('Found matching URL:', url)
|
|
50
|
-
},
|
|
51
|
-
})
|
|
52
|
-
```
|
|
53
|
-
|
|
54
|
-
### Custom Fetch
|
|
82
|
+
When you want to log the canonicalization process for debugging. Or store all URL aliases that resolve to the same feed.
|
|
55
83
|
|
|
56
84
|
```typescript
|
|
57
85
|
import { findCanonical } from 'feedcanon'
|
|
58
|
-
import axios from 'axios'
|
|
59
|
-
|
|
60
|
-
const url = await findCanonical('https://example.com/feed', {
|
|
61
|
-
fetchFn: async (url) => {
|
|
62
|
-
const response = await axios.get(url)
|
|
63
|
-
|
|
64
|
-
return {
|
|
65
|
-
status: response.status,
|
|
66
|
-
url: response.request.res.responseUrl,
|
|
67
|
-
body: response.data,
|
|
68
|
-
headers: new Headers(response.headers),
|
|
69
|
-
}
|
|
70
|
-
},
|
|
71
|
-
})
|
|
72
|
-
```
|
|
73
|
-
|
|
74
|
-
### Database Integration
|
|
75
86
|
|
|
76
|
-
|
|
77
|
-
import { findCanonical } from 'feedcanon'
|
|
87
|
+
const aliases = []
|
|
78
88
|
|
|
79
|
-
const url = await findCanonical('
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
return await db.feeds.findByUrl(url)
|
|
83
|
-
},
|
|
84
|
-
onExists: ({ url, data }) => {
|
|
85
|
-
console.log('URL already exists:', url)
|
|
89
|
+
const url = await findCanonical('http://www.example.com/feed/', {
|
|
90
|
+
onMatch: ({ url }) => {
|
|
91
|
+
aliases.push(url)
|
|
86
92
|
},
|
|
87
93
|
})
|
|
94
|
+
|
|
95
|
+
// url: 'https://example.com/feed'
|
|
96
|
+
// aliases: [
|
|
97
|
+
// 'http://www.example.com/feed/',
|
|
98
|
+
// 'https://www.example.com/feed/',
|
|
99
|
+
// 'https://example.com/feed',
|
|
100
|
+
// ]
|
|
88
101
|
```
|
package/dist/defaults.cjs
CHANGED
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
const require_feedburner = require('./platforms/feedburner.cjs');
|
|
2
|
+
let feedsmith = require("feedsmith");
|
|
2
3
|
|
|
3
4
|
//#region src/defaults.ts
|
|
4
5
|
const defaultPlatforms = [require_feedburner.feedburnerHandler];
|
|
@@ -31,6 +32,7 @@ const defaultStrippedParams = [
|
|
|
31
32
|
"_ke",
|
|
32
33
|
"mc_cid",
|
|
33
34
|
"mc_eid",
|
|
35
|
+
"mc_tc",
|
|
34
36
|
"mkt_tok",
|
|
35
37
|
"msclkid",
|
|
36
38
|
"twclid",
|
|
@@ -63,15 +65,20 @@ const defaultStrippedParams = [
|
|
|
63
65
|
"hsa_tgt",
|
|
64
66
|
"hsa_ver",
|
|
65
67
|
"hsCtaTracking",
|
|
68
|
+
"_hsenc",
|
|
69
|
+
"_hsmi",
|
|
66
70
|
"__hstc",
|
|
67
71
|
"__hsfp",
|
|
68
72
|
"__hssc",
|
|
69
73
|
"cid",
|
|
70
74
|
"s_kwcid",
|
|
75
|
+
"sc_cid",
|
|
71
76
|
"ef_id",
|
|
72
77
|
"obOrigUrl",
|
|
73
78
|
"dicbo",
|
|
74
79
|
"yclid",
|
|
80
|
+
"ysclid",
|
|
81
|
+
"_openstat",
|
|
75
82
|
"awinaffid",
|
|
76
83
|
"awinmid",
|
|
77
84
|
"clickref",
|
|
@@ -107,14 +114,12 @@ const defaultStrippedParams = [
|
|
|
107
114
|
"nocache",
|
|
108
115
|
"rand",
|
|
109
116
|
"random",
|
|
117
|
+
"sbdcrw",
|
|
110
118
|
"forceByPassCache",
|
|
111
119
|
"sucurianticache",
|
|
112
120
|
"cleancache",
|
|
113
121
|
"rebuildcache",
|
|
114
122
|
"kontrol_health_check_timestamp",
|
|
115
|
-
"_x_tr_sl",
|
|
116
|
-
"_x_tr_tl",
|
|
117
|
-
"_x_tr_hl",
|
|
118
123
|
"action_object_map",
|
|
119
124
|
"action_ref_map",
|
|
120
125
|
"action_type_map",
|
|
@@ -127,6 +132,10 @@ const defaultStrippedParams = [
|
|
|
127
132
|
"at_custom4",
|
|
128
133
|
"at_medium",
|
|
129
134
|
"at_preview_index",
|
|
135
|
+
"_bhlid",
|
|
136
|
+
"_branch_match_id",
|
|
137
|
+
"_branch_referrer",
|
|
138
|
+
"__readwiseLocation",
|
|
130
139
|
"campaign_id",
|
|
131
140
|
"click_sum",
|
|
132
141
|
"fref",
|
|
@@ -158,12 +167,42 @@ const defaultNormalizeOptions = {
|
|
|
158
167
|
stripHash: true,
|
|
159
168
|
sortQueryParams: true,
|
|
160
169
|
stripQueryParams: defaultStrippedParams,
|
|
170
|
+
stripQuery: false,
|
|
161
171
|
stripEmptyQuery: true,
|
|
162
172
|
normalizeEncoding: true,
|
|
163
|
-
lowercaseHostname: true,
|
|
164
173
|
normalizeUnicode: true,
|
|
165
174
|
convertToPunycode: true
|
|
166
175
|
};
|
|
176
|
+
const defaultFetch = async (url, options) => {
|
|
177
|
+
const response = await fetch(url, {
|
|
178
|
+
method: options?.method ?? "GET",
|
|
179
|
+
headers: options?.headers
|
|
180
|
+
});
|
|
181
|
+
return {
|
|
182
|
+
headers: response.headers,
|
|
183
|
+
body: await response.text(),
|
|
184
|
+
url: response.url,
|
|
185
|
+
status: response.status
|
|
186
|
+
};
|
|
187
|
+
};
|
|
188
|
+
const defaultParser = {
|
|
189
|
+
parse: (body) => {
|
|
190
|
+
try {
|
|
191
|
+
return (0, feedsmith.parseFeed)(body);
|
|
192
|
+
} catch {}
|
|
193
|
+
},
|
|
194
|
+
getSelfUrl: (parsed) => {
|
|
195
|
+
switch (parsed.format) {
|
|
196
|
+
case "atom": return parsed.feed.links?.find((link) => link.rel === "self")?.href;
|
|
197
|
+
case "rss":
|
|
198
|
+
case "rdf": return parsed.feed.atom?.links?.find((link) => link.rel === "self")?.href;
|
|
199
|
+
case "json": return parsed.feed.feed_url;
|
|
200
|
+
}
|
|
201
|
+
},
|
|
202
|
+
getSignature: (parsed) => {
|
|
203
|
+
return parsed.feed;
|
|
204
|
+
}
|
|
205
|
+
};
|
|
167
206
|
const defaultTiers = [
|
|
168
207
|
{
|
|
169
208
|
stripProtocol: false,
|
|
@@ -174,9 +213,9 @@ const defaultTiers = [
|
|
|
174
213
|
collapseSlashes: true,
|
|
175
214
|
stripHash: true,
|
|
176
215
|
sortQueryParams: true,
|
|
216
|
+
stripQuery: false,
|
|
177
217
|
stripEmptyQuery: true,
|
|
178
218
|
normalizeEncoding: true,
|
|
179
|
-
lowercaseHostname: true,
|
|
180
219
|
normalizeUnicode: true,
|
|
181
220
|
convertToPunycode: true
|
|
182
221
|
},
|
|
@@ -189,9 +228,9 @@ const defaultTiers = [
|
|
|
189
228
|
collapseSlashes: true,
|
|
190
229
|
stripHash: true,
|
|
191
230
|
sortQueryParams: true,
|
|
231
|
+
stripQuery: false,
|
|
192
232
|
stripEmptyQuery: true,
|
|
193
233
|
normalizeEncoding: true,
|
|
194
|
-
lowercaseHostname: true,
|
|
195
234
|
normalizeUnicode: true,
|
|
196
235
|
convertToPunycode: true
|
|
197
236
|
},
|
|
@@ -204,16 +243,18 @@ const defaultTiers = [
|
|
|
204
243
|
collapseSlashes: true,
|
|
205
244
|
stripHash: true,
|
|
206
245
|
sortQueryParams: true,
|
|
246
|
+
stripQuery: false,
|
|
207
247
|
stripEmptyQuery: true,
|
|
208
248
|
normalizeEncoding: true,
|
|
209
|
-
lowercaseHostname: true,
|
|
210
249
|
normalizeUnicode: true,
|
|
211
250
|
convertToPunycode: true
|
|
212
251
|
}
|
|
213
252
|
];
|
|
214
253
|
|
|
215
254
|
//#endregion
|
|
255
|
+
exports.defaultFetch = defaultFetch;
|
|
216
256
|
exports.defaultNormalizeOptions = defaultNormalizeOptions;
|
|
257
|
+
exports.defaultParser = defaultParser;
|
|
217
258
|
exports.defaultPlatforms = defaultPlatforms;
|
|
218
259
|
exports.defaultStrippedParams = defaultStrippedParams;
|
|
219
260
|
exports.defaultTiers = defaultTiers;
|
package/dist/defaults.d.cts
CHANGED
|
@@ -1,9 +1,11 @@
|
|
|
1
|
-
import { NormalizeOptions, PlatformHandler, Tier } from "./types.cjs";
|
|
1
|
+
import { FeedsmithFeed, FetchFn, NormalizeOptions, ParserAdapter, PlatformHandler, Tier } from "./types.cjs";
|
|
2
2
|
|
|
3
3
|
//#region src/defaults.d.ts
|
|
4
4
|
declare const defaultPlatforms: Array<PlatformHandler>;
|
|
5
5
|
declare const defaultStrippedParams: string[];
|
|
6
6
|
declare const defaultNormalizeOptions: NormalizeOptions;
|
|
7
|
+
declare const defaultFetch: FetchFn;
|
|
8
|
+
declare const defaultParser: ParserAdapter<FeedsmithFeed>;
|
|
7
9
|
declare const defaultTiers: Array<Tier>;
|
|
8
10
|
//#endregion
|
|
9
|
-
export { defaultNormalizeOptions, defaultPlatforms, defaultStrippedParams, defaultTiers };
|
|
11
|
+
export { defaultFetch, defaultNormalizeOptions, defaultParser, defaultPlatforms, defaultStrippedParams, defaultTiers };
|
package/dist/defaults.d.ts
CHANGED
|
@@ -1,9 +1,11 @@
|
|
|
1
|
-
import { NormalizeOptions, PlatformHandler, Tier } from "./types.js";
|
|
1
|
+
import { FeedsmithFeed, FetchFn, NormalizeOptions, ParserAdapter, PlatformHandler, Tier } from "./types.js";
|
|
2
2
|
|
|
3
3
|
//#region src/defaults.d.ts
|
|
4
4
|
declare const defaultPlatforms: Array<PlatformHandler>;
|
|
5
5
|
declare const defaultStrippedParams: string[];
|
|
6
6
|
declare const defaultNormalizeOptions: NormalizeOptions;
|
|
7
|
+
declare const defaultFetch: FetchFn;
|
|
8
|
+
declare const defaultParser: ParserAdapter<FeedsmithFeed>;
|
|
7
9
|
declare const defaultTiers: Array<Tier>;
|
|
8
10
|
//#endregion
|
|
9
|
-
export { defaultNormalizeOptions, defaultPlatforms, defaultStrippedParams, defaultTiers };
|
|
11
|
+
export { defaultFetch, defaultNormalizeOptions, defaultParser, defaultPlatforms, defaultStrippedParams, defaultTiers };
|
package/dist/defaults.js
CHANGED
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import { feedburnerHandler } from "./platforms/feedburner.js";
|
|
2
|
+
import { parseFeed } from "feedsmith";
|
|
2
3
|
|
|
3
4
|
//#region src/defaults.ts
|
|
4
5
|
const defaultPlatforms = [feedburnerHandler];
|
|
@@ -31,6 +32,7 @@ const defaultStrippedParams = [
|
|
|
31
32
|
"_ke",
|
|
32
33
|
"mc_cid",
|
|
33
34
|
"mc_eid",
|
|
35
|
+
"mc_tc",
|
|
34
36
|
"mkt_tok",
|
|
35
37
|
"msclkid",
|
|
36
38
|
"twclid",
|
|
@@ -63,15 +65,20 @@ const defaultStrippedParams = [
|
|
|
63
65
|
"hsa_tgt",
|
|
64
66
|
"hsa_ver",
|
|
65
67
|
"hsCtaTracking",
|
|
68
|
+
"_hsenc",
|
|
69
|
+
"_hsmi",
|
|
66
70
|
"__hstc",
|
|
67
71
|
"__hsfp",
|
|
68
72
|
"__hssc",
|
|
69
73
|
"cid",
|
|
70
74
|
"s_kwcid",
|
|
75
|
+
"sc_cid",
|
|
71
76
|
"ef_id",
|
|
72
77
|
"obOrigUrl",
|
|
73
78
|
"dicbo",
|
|
74
79
|
"yclid",
|
|
80
|
+
"ysclid",
|
|
81
|
+
"_openstat",
|
|
75
82
|
"awinaffid",
|
|
76
83
|
"awinmid",
|
|
77
84
|
"clickref",
|
|
@@ -107,14 +114,12 @@ const defaultStrippedParams = [
|
|
|
107
114
|
"nocache",
|
|
108
115
|
"rand",
|
|
109
116
|
"random",
|
|
117
|
+
"sbdcrw",
|
|
110
118
|
"forceByPassCache",
|
|
111
119
|
"sucurianticache",
|
|
112
120
|
"cleancache",
|
|
113
121
|
"rebuildcache",
|
|
114
122
|
"kontrol_health_check_timestamp",
|
|
115
|
-
"_x_tr_sl",
|
|
116
|
-
"_x_tr_tl",
|
|
117
|
-
"_x_tr_hl",
|
|
118
123
|
"action_object_map",
|
|
119
124
|
"action_ref_map",
|
|
120
125
|
"action_type_map",
|
|
@@ -127,6 +132,10 @@ const defaultStrippedParams = [
|
|
|
127
132
|
"at_custom4",
|
|
128
133
|
"at_medium",
|
|
129
134
|
"at_preview_index",
|
|
135
|
+
"_bhlid",
|
|
136
|
+
"_branch_match_id",
|
|
137
|
+
"_branch_referrer",
|
|
138
|
+
"__readwiseLocation",
|
|
130
139
|
"campaign_id",
|
|
131
140
|
"click_sum",
|
|
132
141
|
"fref",
|
|
@@ -158,12 +167,42 @@ const defaultNormalizeOptions = {
|
|
|
158
167
|
stripHash: true,
|
|
159
168
|
sortQueryParams: true,
|
|
160
169
|
stripQueryParams: defaultStrippedParams,
|
|
170
|
+
stripQuery: false,
|
|
161
171
|
stripEmptyQuery: true,
|
|
162
172
|
normalizeEncoding: true,
|
|
163
|
-
lowercaseHostname: true,
|
|
164
173
|
normalizeUnicode: true,
|
|
165
174
|
convertToPunycode: true
|
|
166
175
|
};
|
|
176
|
+
const defaultFetch = async (url, options) => {
|
|
177
|
+
const response = await fetch(url, {
|
|
178
|
+
method: options?.method ?? "GET",
|
|
179
|
+
headers: options?.headers
|
|
180
|
+
});
|
|
181
|
+
return {
|
|
182
|
+
headers: response.headers,
|
|
183
|
+
body: await response.text(),
|
|
184
|
+
url: response.url,
|
|
185
|
+
status: response.status
|
|
186
|
+
};
|
|
187
|
+
};
|
|
188
|
+
const defaultParser = {
|
|
189
|
+
parse: (body) => {
|
|
190
|
+
try {
|
|
191
|
+
return parseFeed(body);
|
|
192
|
+
} catch {}
|
|
193
|
+
},
|
|
194
|
+
getSelfUrl: (parsed) => {
|
|
195
|
+
switch (parsed.format) {
|
|
196
|
+
case "atom": return parsed.feed.links?.find((link) => link.rel === "self")?.href;
|
|
197
|
+
case "rss":
|
|
198
|
+
case "rdf": return parsed.feed.atom?.links?.find((link) => link.rel === "self")?.href;
|
|
199
|
+
case "json": return parsed.feed.feed_url;
|
|
200
|
+
}
|
|
201
|
+
},
|
|
202
|
+
getSignature: (parsed) => {
|
|
203
|
+
return parsed.feed;
|
|
204
|
+
}
|
|
205
|
+
};
|
|
167
206
|
const defaultTiers = [
|
|
168
207
|
{
|
|
169
208
|
stripProtocol: false,
|
|
@@ -174,9 +213,9 @@ const defaultTiers = [
|
|
|
174
213
|
collapseSlashes: true,
|
|
175
214
|
stripHash: true,
|
|
176
215
|
sortQueryParams: true,
|
|
216
|
+
stripQuery: false,
|
|
177
217
|
stripEmptyQuery: true,
|
|
178
218
|
normalizeEncoding: true,
|
|
179
|
-
lowercaseHostname: true,
|
|
180
219
|
normalizeUnicode: true,
|
|
181
220
|
convertToPunycode: true
|
|
182
221
|
},
|
|
@@ -189,9 +228,9 @@ const defaultTiers = [
|
|
|
189
228
|
collapseSlashes: true,
|
|
190
229
|
stripHash: true,
|
|
191
230
|
sortQueryParams: true,
|
|
231
|
+
stripQuery: false,
|
|
192
232
|
stripEmptyQuery: true,
|
|
193
233
|
normalizeEncoding: true,
|
|
194
|
-
lowercaseHostname: true,
|
|
195
234
|
normalizeUnicode: true,
|
|
196
235
|
convertToPunycode: true
|
|
197
236
|
},
|
|
@@ -204,13 +243,13 @@ const defaultTiers = [
|
|
|
204
243
|
collapseSlashes: true,
|
|
205
244
|
stripHash: true,
|
|
206
245
|
sortQueryParams: true,
|
|
246
|
+
stripQuery: false,
|
|
207
247
|
stripEmptyQuery: true,
|
|
208
248
|
normalizeEncoding: true,
|
|
209
|
-
lowercaseHostname: true,
|
|
210
249
|
normalizeUnicode: true,
|
|
211
250
|
convertToPunycode: true
|
|
212
251
|
}
|
|
213
252
|
];
|
|
214
253
|
|
|
215
254
|
//#endregion
|
|
216
|
-
export { defaultNormalizeOptions, defaultPlatforms, defaultStrippedParams, defaultTiers };
|
|
255
|
+
export { defaultFetch, defaultNormalizeOptions, defaultParser, defaultPlatforms, defaultStrippedParams, defaultTiers };
|
package/dist/exports.cjs
CHANGED
|
@@ -4,11 +4,12 @@ const require_utils = require('./utils.cjs');
|
|
|
4
4
|
const require_index = require('./index.cjs');
|
|
5
5
|
|
|
6
6
|
exports.addMissingProtocol = require_utils.addMissingProtocol;
|
|
7
|
+
exports.defaultFetch = require_defaults.defaultFetch;
|
|
8
|
+
exports.defaultParser = require_defaults.defaultParser;
|
|
7
9
|
exports.defaultPlatforms = require_defaults.defaultPlatforms;
|
|
8
10
|
exports.defaultStrippedParams = require_defaults.defaultStrippedParams;
|
|
9
11
|
exports.defaultTiers = require_defaults.defaultTiers;
|
|
10
12
|
exports.feedburnerHandler = require_feedburner.feedburnerHandler;
|
|
11
|
-
exports.feedsmithParser = require_utils.feedsmithParser;
|
|
12
13
|
exports.findCanonical = require_index.findCanonical;
|
|
13
14
|
exports.normalizeUrl = require_utils.normalizeUrl;
|
|
14
15
|
exports.resolveFeedProtocol = require_utils.resolveFeedProtocol;
|
package/dist/exports.d.cts
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import { ExistsFn, FeedsmithFeed, FetchFn, FetchFnOptions, FetchFnResponse, FindCanonicalOptions, NormalizeOptions, OnExistsFn, OnFetchFn, OnMatchFn, ParserAdapter, PlatformHandler } from "./types.cjs";
|
|
2
|
-
import { defaultPlatforms, defaultStrippedParams, defaultTiers } from "./defaults.cjs";
|
|
2
|
+
import { defaultFetch, defaultParser, defaultPlatforms, defaultStrippedParams, defaultTiers } from "./defaults.cjs";
|
|
3
3
|
import { findCanonical } from "./index.cjs";
|
|
4
4
|
import { feedburnerHandler } from "./platforms/feedburner.cjs";
|
|
5
|
-
import { addMissingProtocol,
|
|
6
|
-
export { type ExistsFn, type FeedsmithFeed, type FetchFn, type FetchFnOptions, type FetchFnResponse, type FindCanonicalOptions, type NormalizeOptions, type OnExistsFn, type OnFetchFn, type OnMatchFn, type ParserAdapter, type PlatformHandler, addMissingProtocol, defaultPlatforms, defaultStrippedParams, defaultTiers, feedburnerHandler,
|
|
5
|
+
import { addMissingProtocol, normalizeUrl, resolveFeedProtocol, resolveUrl } from "./utils.cjs";
|
|
6
|
+
export { type ExistsFn, type FeedsmithFeed, type FetchFn, type FetchFnOptions, type FetchFnResponse, type FindCanonicalOptions, type NormalizeOptions, type OnExistsFn, type OnFetchFn, type OnMatchFn, type ParserAdapter, type PlatformHandler, addMissingProtocol, defaultFetch, defaultParser, defaultPlatforms, defaultStrippedParams, defaultTiers, feedburnerHandler, findCanonical, normalizeUrl, resolveFeedProtocol, resolveUrl };
|
package/dist/exports.d.ts
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import { ExistsFn, FeedsmithFeed, FetchFn, FetchFnOptions, FetchFnResponse, FindCanonicalOptions, NormalizeOptions, OnExistsFn, OnFetchFn, OnMatchFn, ParserAdapter, PlatformHandler } from "./types.js";
|
|
2
|
-
import { defaultPlatforms, defaultStrippedParams, defaultTiers } from "./defaults.js";
|
|
2
|
+
import { defaultFetch, defaultParser, defaultPlatforms, defaultStrippedParams, defaultTiers } from "./defaults.js";
|
|
3
3
|
import { findCanonical } from "./index.js";
|
|
4
4
|
import { feedburnerHandler } from "./platforms/feedburner.js";
|
|
5
|
-
import { addMissingProtocol,
|
|
6
|
-
export { type ExistsFn, type FeedsmithFeed, type FetchFn, type FetchFnOptions, type FetchFnResponse, type FindCanonicalOptions, type NormalizeOptions, type OnExistsFn, type OnFetchFn, type OnMatchFn, type ParserAdapter, type PlatformHandler, addMissingProtocol, defaultPlatforms, defaultStrippedParams, defaultTiers, feedburnerHandler,
|
|
5
|
+
import { addMissingProtocol, normalizeUrl, resolveFeedProtocol, resolveUrl } from "./utils.js";
|
|
6
|
+
export { type ExistsFn, type FeedsmithFeed, type FetchFn, type FetchFnOptions, type FetchFnResponse, type FindCanonicalOptions, type NormalizeOptions, type OnExistsFn, type OnFetchFn, type OnMatchFn, type ParserAdapter, type PlatformHandler, addMissingProtocol, defaultFetch, defaultParser, defaultPlatforms, defaultStrippedParams, defaultTiers, feedburnerHandler, findCanonical, normalizeUrl, resolveFeedProtocol, resolveUrl };
|
package/dist/exports.js
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import { feedburnerHandler } from "./platforms/feedburner.js";
|
|
2
|
-
import { defaultPlatforms, defaultStrippedParams, defaultTiers } from "./defaults.js";
|
|
3
|
-
import { addMissingProtocol,
|
|
2
|
+
import { defaultFetch, defaultParser, defaultPlatforms, defaultStrippedParams, defaultTiers } from "./defaults.js";
|
|
3
|
+
import { addMissingProtocol, normalizeUrl, resolveFeedProtocol, resolveUrl } from "./utils.js";
|
|
4
4
|
import { findCanonical } from "./index.js";
|
|
5
5
|
|
|
6
|
-
export { addMissingProtocol, defaultPlatforms, defaultStrippedParams, defaultTiers, feedburnerHandler,
|
|
6
|
+
export { addMissingProtocol, defaultFetch, defaultParser, defaultPlatforms, defaultStrippedParams, defaultTiers, feedburnerHandler, findCanonical, normalizeUrl, resolveFeedProtocol, resolveUrl };
|
package/dist/index.cjs
CHANGED
|
@@ -3,7 +3,7 @@ const require_utils = require('./utils.cjs');
|
|
|
3
3
|
|
|
4
4
|
//#region src/index.ts
|
|
5
5
|
async function findCanonical(inputUrl, options) {
|
|
6
|
-
const { parser =
|
|
6
|
+
const { parser = require_defaults.defaultParser, fetchFn = require_defaults.defaultFetch, existsFn, tiers = require_defaults.defaultTiers, platforms = require_defaults.defaultPlatforms, stripQueryParams = require_defaults.defaultStrippedParams, onFetch, onMatch, onExists } = options ?? {};
|
|
7
7
|
const stripParams = (url) => {
|
|
8
8
|
return stripQueryParams?.length ? require_utils.normalizeUrl(url, {
|
|
9
9
|
stripQueryParams,
|
|
@@ -35,7 +35,7 @@ async function findCanonical(inputUrl, options) {
|
|
|
35
35
|
if (!initialResponseBody) return;
|
|
36
36
|
let initialResponseSignature;
|
|
37
37
|
let selfRequestUrl;
|
|
38
|
-
const initialResponseFeed = parser.parse(initialResponseBody);
|
|
38
|
+
const initialResponseFeed = await parser.parse(initialResponseBody);
|
|
39
39
|
if (!initialResponseFeed) return;
|
|
40
40
|
onMatch?.({
|
|
41
41
|
url: initialRequestUrl,
|
|
@@ -47,13 +47,13 @@ async function findCanonical(inputUrl, options) {
|
|
|
47
47
|
selfRequestUrl = resolveAndApplyPlatformHandlers(selfRequestUrlRaw, initialResponseUrl);
|
|
48
48
|
selfRequestUrl = selfRequestUrl ? stripParams(selfRequestUrl) : void 0;
|
|
49
49
|
}
|
|
50
|
-
const compareWithInitialResponse = (comparedResponseBody) => {
|
|
50
|
+
const compareWithInitialResponse = async (comparedResponseBody) => {
|
|
51
51
|
if (!comparedResponseBody) return false;
|
|
52
52
|
if (initialResponseBody === comparedResponseBody) return true;
|
|
53
|
-
const comparedResponseFeed = parser.parse(comparedResponseBody);
|
|
53
|
+
const comparedResponseFeed = await parser.parse(comparedResponseBody);
|
|
54
54
|
if (comparedResponseFeed) {
|
|
55
|
-
initialResponseSignature ||=
|
|
56
|
-
const comparedResponseSignature =
|
|
55
|
+
initialResponseSignature ||= JSON.stringify(parser.getSignature(initialResponseFeed));
|
|
56
|
+
const comparedResponseSignature = JSON.stringify(parser.getSignature(comparedResponseFeed));
|
|
57
57
|
return initialResponseSignature === comparedResponseSignature;
|
|
58
58
|
}
|
|
59
59
|
return false;
|
|
@@ -70,7 +70,7 @@ async function findCanonical(inputUrl, options) {
|
|
|
70
70
|
response
|
|
71
71
|
});
|
|
72
72
|
if (response.status < 200 || response.status >= 300) return;
|
|
73
|
-
if (!compareWithInitialResponse(response.body)) return;
|
|
73
|
+
if (!await compareWithInitialResponse(response.body)) return;
|
|
74
74
|
return response;
|
|
75
75
|
};
|
|
76
76
|
let variantSourceUrl = initialResponseUrl;
|
package/dist/index.js
CHANGED
|
@@ -1,9 +1,9 @@
|
|
|
1
|
-
import { defaultPlatforms, defaultStrippedParams, defaultTiers } from "./defaults.js";
|
|
2
|
-
import { applyPlatformHandlers,
|
|
1
|
+
import { defaultFetch, defaultParser, defaultPlatforms, defaultStrippedParams, defaultTiers } from "./defaults.js";
|
|
2
|
+
import { applyPlatformHandlers, normalizeUrl, resolveUrl } from "./utils.js";
|
|
3
3
|
|
|
4
4
|
//#region src/index.ts
|
|
5
5
|
async function findCanonical(inputUrl, options) {
|
|
6
|
-
const { parser =
|
|
6
|
+
const { parser = defaultParser, fetchFn = defaultFetch, existsFn, tiers = defaultTiers, platforms = defaultPlatforms, stripQueryParams = defaultStrippedParams, onFetch, onMatch, onExists } = options ?? {};
|
|
7
7
|
const stripParams = (url) => {
|
|
8
8
|
return stripQueryParams?.length ? normalizeUrl(url, {
|
|
9
9
|
stripQueryParams,
|
|
@@ -35,7 +35,7 @@ async function findCanonical(inputUrl, options) {
|
|
|
35
35
|
if (!initialResponseBody) return;
|
|
36
36
|
let initialResponseSignature;
|
|
37
37
|
let selfRequestUrl;
|
|
38
|
-
const initialResponseFeed = parser.parse(initialResponseBody);
|
|
38
|
+
const initialResponseFeed = await parser.parse(initialResponseBody);
|
|
39
39
|
if (!initialResponseFeed) return;
|
|
40
40
|
onMatch?.({
|
|
41
41
|
url: initialRequestUrl,
|
|
@@ -47,13 +47,13 @@ async function findCanonical(inputUrl, options) {
|
|
|
47
47
|
selfRequestUrl = resolveAndApplyPlatformHandlers(selfRequestUrlRaw, initialResponseUrl);
|
|
48
48
|
selfRequestUrl = selfRequestUrl ? stripParams(selfRequestUrl) : void 0;
|
|
49
49
|
}
|
|
50
|
-
const compareWithInitialResponse = (comparedResponseBody) => {
|
|
50
|
+
const compareWithInitialResponse = async (comparedResponseBody) => {
|
|
51
51
|
if (!comparedResponseBody) return false;
|
|
52
52
|
if (initialResponseBody === comparedResponseBody) return true;
|
|
53
|
-
const comparedResponseFeed = parser.parse(comparedResponseBody);
|
|
53
|
+
const comparedResponseFeed = await parser.parse(comparedResponseBody);
|
|
54
54
|
if (comparedResponseFeed) {
|
|
55
|
-
initialResponseSignature ||=
|
|
56
|
-
const comparedResponseSignature =
|
|
55
|
+
initialResponseSignature ||= JSON.stringify(parser.getSignature(initialResponseFeed));
|
|
56
|
+
const comparedResponseSignature = JSON.stringify(parser.getSignature(comparedResponseFeed));
|
|
57
57
|
return initialResponseSignature === comparedResponseSignature;
|
|
58
58
|
}
|
|
59
59
|
return false;
|
|
@@ -70,7 +70,7 @@ async function findCanonical(inputUrl, options) {
|
|
|
70
70
|
response
|
|
71
71
|
});
|
|
72
72
|
if (response.status < 200 || response.status >= 300) return;
|
|
73
|
-
if (!compareWithInitialResponse(response.body)) return;
|
|
73
|
+
if (!await compareWithInitialResponse(response.body)) return;
|
|
74
74
|
return response;
|
|
75
75
|
};
|
|
76
76
|
let variantSourceUrl = initialResponseUrl;
|
|
@@ -1,13 +1,13 @@
|
|
|
1
1
|
|
|
2
2
|
//#region src/platforms/feedburner.ts
|
|
3
|
-
const hosts =
|
|
3
|
+
const hosts = [
|
|
4
4
|
"feeds.feedburner.com",
|
|
5
5
|
"feeds2.feedburner.com",
|
|
6
6
|
"feedproxy.google.com"
|
|
7
|
-
]
|
|
7
|
+
];
|
|
8
8
|
const feedburnerHandler = {
|
|
9
9
|
match: (url) => {
|
|
10
|
-
return hosts.
|
|
10
|
+
return hosts.includes(url.hostname);
|
|
11
11
|
},
|
|
12
12
|
normalize: (url) => {
|
|
13
13
|
const normalized = new URL(url);
|
|
@@ -1,12 +1,12 @@
|
|
|
1
1
|
//#region src/platforms/feedburner.ts
|
|
2
|
-
const hosts =
|
|
2
|
+
const hosts = [
|
|
3
3
|
"feeds.feedburner.com",
|
|
4
4
|
"feeds2.feedburner.com",
|
|
5
5
|
"feedproxy.google.com"
|
|
6
|
-
]
|
|
6
|
+
];
|
|
7
7
|
const feedburnerHandler = {
|
|
8
8
|
match: (url) => {
|
|
9
|
-
return hosts.
|
|
9
|
+
return hosts.includes(url.hostname);
|
|
10
10
|
},
|
|
11
11
|
normalize: (url) => {
|
|
12
12
|
const normalized = new URL(url);
|
package/dist/types.d.cts
CHANGED
|
@@ -3,9 +3,9 @@ import * as feedsmith0 from "feedsmith";
|
|
|
3
3
|
//#region src/types.d.ts
|
|
4
4
|
type FeedsmithFeed = ReturnType<typeof feedsmith0.parseFeed>;
|
|
5
5
|
type ParserAdapter<T> = {
|
|
6
|
-
parse: (body: string) => T | undefined;
|
|
6
|
+
parse: (body: string) => Promise<T | undefined> | T | undefined;
|
|
7
7
|
getSelfUrl: (parsed: T) => string | undefined;
|
|
8
|
-
getSignature: (parsed: T
|
|
8
|
+
getSignature: (parsed: T) => object;
|
|
9
9
|
};
|
|
10
10
|
type PlatformHandler = {
|
|
11
11
|
match: (url: URL) => boolean;
|
|
@@ -21,9 +21,9 @@ type NormalizeOptions = {
|
|
|
21
21
|
stripHash?: boolean;
|
|
22
22
|
sortQueryParams?: boolean;
|
|
23
23
|
stripQueryParams?: Array<string>;
|
|
24
|
+
stripQuery?: boolean;
|
|
24
25
|
stripEmptyQuery?: boolean;
|
|
25
26
|
normalizeEncoding?: boolean;
|
|
26
|
-
lowercaseHostname?: boolean;
|
|
27
27
|
normalizeUnicode?: boolean;
|
|
28
28
|
convertToPunycode?: boolean;
|
|
29
29
|
};
|
package/dist/types.d.ts
CHANGED
|
@@ -3,9 +3,9 @@ import * as feedsmith0 from "feedsmith";
|
|
|
3
3
|
//#region src/types.d.ts
|
|
4
4
|
type FeedsmithFeed = ReturnType<typeof feedsmith0.parseFeed>;
|
|
5
5
|
type ParserAdapter<T> = {
|
|
6
|
-
parse: (body: string) => T | undefined;
|
|
6
|
+
parse: (body: string) => Promise<T | undefined> | T | undefined;
|
|
7
7
|
getSelfUrl: (parsed: T) => string | undefined;
|
|
8
|
-
getSignature: (parsed: T
|
|
8
|
+
getSignature: (parsed: T) => object;
|
|
9
9
|
};
|
|
10
10
|
type PlatformHandler = {
|
|
11
11
|
match: (url: URL) => boolean;
|
|
@@ -21,9 +21,9 @@ type NormalizeOptions = {
|
|
|
21
21
|
stripHash?: boolean;
|
|
22
22
|
sortQueryParams?: boolean;
|
|
23
23
|
stripQueryParams?: Array<string>;
|
|
24
|
+
stripQuery?: boolean;
|
|
24
25
|
stripEmptyQuery?: boolean;
|
|
25
26
|
normalizeEncoding?: boolean;
|
|
26
|
-
lowercaseHostname?: boolean;
|
|
27
27
|
normalizeUnicode?: boolean;
|
|
28
28
|
convertToPunycode?: boolean;
|
|
29
29
|
};
|
package/dist/utils.cjs
CHANGED
|
@@ -1,9 +1,17 @@
|
|
|
1
1
|
const require_defaults = require('./defaults.cjs');
|
|
2
2
|
let node_url = require("node:url");
|
|
3
3
|
let entities = require("entities");
|
|
4
|
-
let feedsmith = require("feedsmith");
|
|
5
4
|
|
|
6
5
|
//#region src/utils.ts
|
|
6
|
+
const strippedParamsCache = /* @__PURE__ */ new WeakMap();
|
|
7
|
+
const getStrippedParamsSet = (params) => {
|
|
8
|
+
let cached = strippedParamsCache.get(params);
|
|
9
|
+
if (!cached) {
|
|
10
|
+
cached = new Set(params.map((param) => param.toLowerCase()));
|
|
11
|
+
strippedParamsCache.set(params, cached);
|
|
12
|
+
}
|
|
13
|
+
return cached;
|
|
14
|
+
};
|
|
7
15
|
const ipv4Pattern = /^\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}$/;
|
|
8
16
|
const ipv6Pattern = /^([0-9a-f]{0,4}:){2,7}[0-9a-f]{0,4}$/i;
|
|
9
17
|
const safePathChars = /[a-zA-Z0-9._~!$&'()*+,;=:@-]/;
|
|
@@ -84,7 +92,6 @@ const normalizeUrl = (url, options = require_defaults.defaultNormalizeOptions) =
|
|
|
84
92
|
const ascii = (0, node_url.domainToASCII)(parsed.hostname);
|
|
85
93
|
if (ascii) parsed.hostname = ascii;
|
|
86
94
|
}
|
|
87
|
-
if (options.lowercaseHostname) parsed.hostname = parsed.hostname.toLowerCase();
|
|
88
95
|
if (options.stripAuthentication) {
|
|
89
96
|
parsed.username = "";
|
|
90
97
|
parsed.password = "";
|
|
@@ -97,9 +104,15 @@ const normalizeUrl = (url, options = require_defaults.defaultNormalizeOptions) =
|
|
|
97
104
|
if (options.stripTrailingSlash && pathname.length > 1 && pathname.endsWith("/")) pathname = pathname.slice(0, -1);
|
|
98
105
|
if (options.stripRootSlash && pathname === "/") pathname = "";
|
|
99
106
|
parsed.pathname = pathname;
|
|
100
|
-
if (options.
|
|
107
|
+
if (options.stripQuery) parsed.search = "";
|
|
108
|
+
if (options.stripQueryParams && parsed.search) {
|
|
109
|
+
const strippedSet = getStrippedParamsSet(options.stripQueryParams);
|
|
110
|
+
const paramsToDelete = [];
|
|
111
|
+
for (const [key] of parsed.searchParams) if (strippedSet.has(key.toLowerCase())) paramsToDelete.push(key);
|
|
112
|
+
for (const param of paramsToDelete) parsed.searchParams.delete(param);
|
|
113
|
+
}
|
|
101
114
|
if (options.sortQueryParams) parsed.searchParams.sort();
|
|
102
|
-
if (options.stripEmptyQuery && parsed.
|
|
115
|
+
if (options.stripEmptyQuery && parsed.href.endsWith("?")) parsed.search = "";
|
|
103
116
|
let result = parsed.href;
|
|
104
117
|
if (options.stripProtocol) result = result.replace(/^https?:\/\//, "");
|
|
105
118
|
return result;
|
|
@@ -107,18 +120,6 @@ const normalizeUrl = (url, options = require_defaults.defaultNormalizeOptions) =
|
|
|
107
120
|
return url;
|
|
108
121
|
}
|
|
109
122
|
};
|
|
110
|
-
const nativeFetch = async (url, options) => {
|
|
111
|
-
const response = await fetch(url, {
|
|
112
|
-
method: options?.method ?? "GET",
|
|
113
|
-
headers: options?.headers
|
|
114
|
-
});
|
|
115
|
-
return {
|
|
116
|
-
headers: response.headers,
|
|
117
|
-
body: await response.text(),
|
|
118
|
-
url: response.url,
|
|
119
|
-
status: response.status
|
|
120
|
-
};
|
|
121
|
-
};
|
|
122
123
|
const applyPlatformHandlers = (url, platforms) => {
|
|
123
124
|
try {
|
|
124
125
|
let parsed = new URL(url);
|
|
@@ -131,31 +132,10 @@ const applyPlatformHandlers = (url, platforms) => {
|
|
|
131
132
|
return url;
|
|
132
133
|
}
|
|
133
134
|
};
|
|
134
|
-
const feedsmithParser = {
|
|
135
|
-
parse: (body) => {
|
|
136
|
-
try {
|
|
137
|
-
return (0, feedsmith.parseFeed)(body);
|
|
138
|
-
} catch {}
|
|
139
|
-
},
|
|
140
|
-
getSelfUrl: (parsed) => {
|
|
141
|
-
switch (parsed.format) {
|
|
142
|
-
case "atom": return parsed.feed.links?.find((link) => link.rel === "self")?.href;
|
|
143
|
-
case "rss":
|
|
144
|
-
case "rdf": return parsed.feed.atom?.links?.find((link) => link.rel === "self")?.href;
|
|
145
|
-
case "json": return parsed.feed.feed_url;
|
|
146
|
-
}
|
|
147
|
-
},
|
|
148
|
-
getSignature: (parsed, selfUrl) => {
|
|
149
|
-
const signature = JSON.stringify(parsed.feed);
|
|
150
|
-
return selfUrl ? signature.replaceAll(`"${selfUrl}"`, "\"__SELF_URL__\"") : signature;
|
|
151
|
-
}
|
|
152
|
-
};
|
|
153
135
|
|
|
154
136
|
//#endregion
|
|
155
137
|
exports.addMissingProtocol = addMissingProtocol;
|
|
156
138
|
exports.applyPlatformHandlers = applyPlatformHandlers;
|
|
157
|
-
exports.feedsmithParser = feedsmithParser;
|
|
158
|
-
exports.nativeFetch = nativeFetch;
|
|
159
139
|
exports.normalizeUrl = normalizeUrl;
|
|
160
140
|
exports.resolveFeedProtocol = resolveFeedProtocol;
|
|
161
141
|
exports.resolveUrl = resolveUrl;
|
package/dist/utils.d.cts
CHANGED
|
@@ -1,10 +1,9 @@
|
|
|
1
|
-
import {
|
|
1
|
+
import { NormalizeOptions } from "./types.cjs";
|
|
2
2
|
|
|
3
3
|
//#region src/utils.d.ts
|
|
4
4
|
declare const resolveFeedProtocol: (url: string, protocol?: "http" | "https") => string;
|
|
5
5
|
declare const addMissingProtocol: (url: string, protocol?: "http" | "https") => string;
|
|
6
6
|
declare const resolveUrl: (url: string, base?: string) => string | undefined;
|
|
7
7
|
declare const normalizeUrl: (url: string, options?: NormalizeOptions) => string;
|
|
8
|
-
declare const feedsmithParser: ParserAdapter<FeedsmithFeed>;
|
|
9
8
|
//#endregion
|
|
10
|
-
export { addMissingProtocol,
|
|
9
|
+
export { addMissingProtocol, normalizeUrl, resolveFeedProtocol, resolveUrl };
|
package/dist/utils.d.ts
CHANGED
|
@@ -1,10 +1,9 @@
|
|
|
1
|
-
import {
|
|
1
|
+
import { NormalizeOptions } from "./types.js";
|
|
2
2
|
|
|
3
3
|
//#region src/utils.d.ts
|
|
4
4
|
declare const resolveFeedProtocol: (url: string, protocol?: "http" | "https") => string;
|
|
5
5
|
declare const addMissingProtocol: (url: string, protocol?: "http" | "https") => string;
|
|
6
6
|
declare const resolveUrl: (url: string, base?: string) => string | undefined;
|
|
7
7
|
declare const normalizeUrl: (url: string, options?: NormalizeOptions) => string;
|
|
8
|
-
declare const feedsmithParser: ParserAdapter<FeedsmithFeed>;
|
|
9
8
|
//#endregion
|
|
10
|
-
export { addMissingProtocol,
|
|
9
|
+
export { addMissingProtocol, normalizeUrl, resolveFeedProtocol, resolveUrl };
|
package/dist/utils.js
CHANGED
|
@@ -1,9 +1,17 @@
|
|
|
1
1
|
import { defaultNormalizeOptions } from "./defaults.js";
|
|
2
2
|
import { domainToASCII } from "node:url";
|
|
3
3
|
import { decodeHTML } from "entities";
|
|
4
|
-
import { parseFeed } from "feedsmith";
|
|
5
4
|
|
|
6
5
|
//#region src/utils.ts
|
|
6
|
+
const strippedParamsCache = /* @__PURE__ */ new WeakMap();
|
|
7
|
+
const getStrippedParamsSet = (params) => {
|
|
8
|
+
let cached = strippedParamsCache.get(params);
|
|
9
|
+
if (!cached) {
|
|
10
|
+
cached = new Set(params.map((param) => param.toLowerCase()));
|
|
11
|
+
strippedParamsCache.set(params, cached);
|
|
12
|
+
}
|
|
13
|
+
return cached;
|
|
14
|
+
};
|
|
7
15
|
const ipv4Pattern = /^\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}$/;
|
|
8
16
|
const ipv6Pattern = /^([0-9a-f]{0,4}:){2,7}[0-9a-f]{0,4}$/i;
|
|
9
17
|
const safePathChars = /[a-zA-Z0-9._~!$&'()*+,;=:@-]/;
|
|
@@ -84,7 +92,6 @@ const normalizeUrl = (url, options = defaultNormalizeOptions) => {
|
|
|
84
92
|
const ascii = domainToASCII(parsed.hostname);
|
|
85
93
|
if (ascii) parsed.hostname = ascii;
|
|
86
94
|
}
|
|
87
|
-
if (options.lowercaseHostname) parsed.hostname = parsed.hostname.toLowerCase();
|
|
88
95
|
if (options.stripAuthentication) {
|
|
89
96
|
parsed.username = "";
|
|
90
97
|
parsed.password = "";
|
|
@@ -97,9 +104,15 @@ const normalizeUrl = (url, options = defaultNormalizeOptions) => {
|
|
|
97
104
|
if (options.stripTrailingSlash && pathname.length > 1 && pathname.endsWith("/")) pathname = pathname.slice(0, -1);
|
|
98
105
|
if (options.stripRootSlash && pathname === "/") pathname = "";
|
|
99
106
|
parsed.pathname = pathname;
|
|
100
|
-
if (options.
|
|
107
|
+
if (options.stripQuery) parsed.search = "";
|
|
108
|
+
if (options.stripQueryParams && parsed.search) {
|
|
109
|
+
const strippedSet = getStrippedParamsSet(options.stripQueryParams);
|
|
110
|
+
const paramsToDelete = [];
|
|
111
|
+
for (const [key] of parsed.searchParams) if (strippedSet.has(key.toLowerCase())) paramsToDelete.push(key);
|
|
112
|
+
for (const param of paramsToDelete) parsed.searchParams.delete(param);
|
|
113
|
+
}
|
|
101
114
|
if (options.sortQueryParams) parsed.searchParams.sort();
|
|
102
|
-
if (options.stripEmptyQuery && parsed.
|
|
115
|
+
if (options.stripEmptyQuery && parsed.href.endsWith("?")) parsed.search = "";
|
|
103
116
|
let result = parsed.href;
|
|
104
117
|
if (options.stripProtocol) result = result.replace(/^https?:\/\//, "");
|
|
105
118
|
return result;
|
|
@@ -107,18 +120,6 @@ const normalizeUrl = (url, options = defaultNormalizeOptions) => {
|
|
|
107
120
|
return url;
|
|
108
121
|
}
|
|
109
122
|
};
|
|
110
|
-
const nativeFetch = async (url, options) => {
|
|
111
|
-
const response = await fetch(url, {
|
|
112
|
-
method: options?.method ?? "GET",
|
|
113
|
-
headers: options?.headers
|
|
114
|
-
});
|
|
115
|
-
return {
|
|
116
|
-
headers: response.headers,
|
|
117
|
-
body: await response.text(),
|
|
118
|
-
url: response.url,
|
|
119
|
-
status: response.status
|
|
120
|
-
};
|
|
121
|
-
};
|
|
122
123
|
const applyPlatformHandlers = (url, platforms) => {
|
|
123
124
|
try {
|
|
124
125
|
let parsed = new URL(url);
|
|
@@ -131,25 +132,6 @@ const applyPlatformHandlers = (url, platforms) => {
|
|
|
131
132
|
return url;
|
|
132
133
|
}
|
|
133
134
|
};
|
|
134
|
-
const feedsmithParser = {
|
|
135
|
-
parse: (body) => {
|
|
136
|
-
try {
|
|
137
|
-
return parseFeed(body);
|
|
138
|
-
} catch {}
|
|
139
|
-
},
|
|
140
|
-
getSelfUrl: (parsed) => {
|
|
141
|
-
switch (parsed.format) {
|
|
142
|
-
case "atom": return parsed.feed.links?.find((link) => link.rel === "self")?.href;
|
|
143
|
-
case "rss":
|
|
144
|
-
case "rdf": return parsed.feed.atom?.links?.find((link) => link.rel === "self")?.href;
|
|
145
|
-
case "json": return parsed.feed.feed_url;
|
|
146
|
-
}
|
|
147
|
-
},
|
|
148
|
-
getSignature: (parsed, selfUrl) => {
|
|
149
|
-
const signature = JSON.stringify(parsed.feed);
|
|
150
|
-
return selfUrl ? signature.replaceAll(`"${selfUrl}"`, "\"__SELF_URL__\"") : signature;
|
|
151
|
-
}
|
|
152
|
-
};
|
|
153
135
|
|
|
154
136
|
//#endregion
|
|
155
|
-
export { addMissingProtocol, applyPlatformHandlers,
|
|
137
|
+
export { addMissingProtocol, applyPlatformHandlers, normalizeUrl, resolveFeedProtocol, resolveUrl };
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "feedcanon",
|
|
3
|
-
"description": "Find the canonical URL for any web feed by comparing actual content. Turn messy feed URLs into their cleanest
|
|
3
|
+
"description": "Find the canonical URL for any web feed by comparing actual content. Turn messy feed URLs into their cleanest form.",
|
|
4
4
|
"repository": {
|
|
5
5
|
"type": "git",
|
|
6
6
|
"url": "https://github.com/macieklamberski/feedcanon.git"
|
|
@@ -49,7 +49,9 @@
|
|
|
49
49
|
],
|
|
50
50
|
"scripts": {
|
|
51
51
|
"prepare": "lefthook install",
|
|
52
|
-
"build": "tsdown src/exports.ts src/defaults.ts --format cjs,esm --dts --clean --unbundle --no-fixed-extension"
|
|
52
|
+
"build": "tsdown src/exports.ts src/defaults.ts --format cjs,esm --dts --clean --unbundle --no-fixed-extension",
|
|
53
|
+
"docs:dev": "vitepress dev docs",
|
|
54
|
+
"docs:build": "vitepress build docs"
|
|
53
55
|
},
|
|
54
56
|
"dependencies": {
|
|
55
57
|
"entities": "^7.0.0",
|
|
@@ -58,7 +60,8 @@
|
|
|
58
60
|
"devDependencies": {
|
|
59
61
|
"@types/bun": "^1.3.5",
|
|
60
62
|
"kvalita": "1.9.0",
|
|
61
|
-
"tsdown": "^0.18.
|
|
63
|
+
"tsdown": "^0.18.3",
|
|
64
|
+
"vitepress": "^1.6.4"
|
|
62
65
|
},
|
|
63
|
-
"version": "1.0.0
|
|
66
|
+
"version": "1.0.0"
|
|
64
67
|
}
|