feedcanon 0.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +88 -0
- package/dist/defaults.cjs +188 -0
- package/dist/defaults.d.cts +9 -0
- package/dist/defaults.d.ts +9 -0
- package/dist/defaults.js +185 -0
- package/dist/exports.cjs +15 -0
- package/dist/exports.d.cts +6 -0
- package/dist/exports.d.ts +6 -0
- package/dist/exports.js +6 -0
- package/dist/index.cjs +131 -0
- package/dist/index.d.cts +6 -0
- package/dist/index.d.ts +6 -0
- package/dist/index.js +131 -0
- package/dist/platforms/feedburner.cjs +21 -0
- package/dist/platforms/feedburner.d.cts +6 -0
- package/dist/platforms/feedburner.d.ts +6 -0
- package/dist/platforms/feedburner.js +20 -0
- package/dist/types.d.cts +64 -0
- package/dist/types.d.ts +64 -0
- package/dist/utils.cjs +163 -0
- package/dist/utils.d.cts +11 -0
- package/dist/utils.d.ts +11 -0
- package/dist/utils.js +157 -0
- package/package.json +64 -0
package/dist/index.js
ADDED
|
@@ -0,0 +1,131 @@
|
|
|
1
|
+
import { defaultPlatforms, defaultTiers } from "./defaults.js";
|
|
2
|
+
import { applyPlatformHandlers, feedsmithParser, nativeFetch, normalizeUrl, resolveUrl } from "./utils.js";
|
|
3
|
+
|
|
4
|
+
//#region src/index.ts
|
|
5
|
+
const findCanonical = async (inputUrl, options) => {
|
|
6
|
+
const { fetchFn = nativeFetch, existsFn, parser = feedsmithParser, tiers = defaultTiers, platforms = defaultPlatforms, onFetch, onMatch, onExists } = options ?? {};
|
|
7
|
+
const resolveAndApplyPlatformHandlers = (url, baseUrl) => {
|
|
8
|
+
const resolved = resolveUrl(url, baseUrl);
|
|
9
|
+
return resolved ? applyPlatformHandlers(resolved, platforms) : void 0;
|
|
10
|
+
};
|
|
11
|
+
const initialRequestUrl = resolveAndApplyPlatformHandlers(inputUrl);
|
|
12
|
+
if (!initialRequestUrl) return;
|
|
13
|
+
let initialResponse;
|
|
14
|
+
try {
|
|
15
|
+
initialResponse = await fetchFn(initialRequestUrl);
|
|
16
|
+
} catch {
|
|
17
|
+
return;
|
|
18
|
+
}
|
|
19
|
+
onFetch?.({
|
|
20
|
+
url: initialRequestUrl,
|
|
21
|
+
response: initialResponse
|
|
22
|
+
});
|
|
23
|
+
if (initialResponse.status < 200 || initialResponse.status >= 300) return;
|
|
24
|
+
const initialResponseUrl = resolveAndApplyPlatformHandlers(initialResponse.url);
|
|
25
|
+
if (!initialResponseUrl) return;
|
|
26
|
+
const initialResponseBody = initialResponse.body;
|
|
27
|
+
if (!initialResponseBody) return;
|
|
28
|
+
let initialResponseSignature;
|
|
29
|
+
let selfRequestUrl;
|
|
30
|
+
const initialResponseFeed = parser.parse(initialResponseBody);
|
|
31
|
+
if (!initialResponseFeed) return;
|
|
32
|
+
onMatch?.({
|
|
33
|
+
url: initialRequestUrl,
|
|
34
|
+
response: initialResponse,
|
|
35
|
+
feed: initialResponseFeed
|
|
36
|
+
});
|
|
37
|
+
const selfRequestUrlRaw = parser.getSelfUrl(initialResponseFeed);
|
|
38
|
+
if (selfRequestUrlRaw) selfRequestUrl = resolveAndApplyPlatformHandlers(selfRequestUrlRaw, initialResponseUrl);
|
|
39
|
+
const compareWithInitialResponse = (comparedResponseBody) => {
|
|
40
|
+
if (!comparedResponseBody) return false;
|
|
41
|
+
if (initialResponseBody === comparedResponseBody) return true;
|
|
42
|
+
const comparedResponseFeed = parser.parse(comparedResponseBody);
|
|
43
|
+
if (comparedResponseFeed) {
|
|
44
|
+
initialResponseSignature ||= JSON.stringify(parser.getSignature(initialResponseFeed));
|
|
45
|
+
const comparedResponseSignature = JSON.stringify(parser.getSignature(comparedResponseFeed));
|
|
46
|
+
return initialResponseSignature === comparedResponseSignature;
|
|
47
|
+
}
|
|
48
|
+
return false;
|
|
49
|
+
};
|
|
50
|
+
const fetchAndCompare = async (url) => {
|
|
51
|
+
let response;
|
|
52
|
+
try {
|
|
53
|
+
response = await fetchFn(url);
|
|
54
|
+
} catch {
|
|
55
|
+
return;
|
|
56
|
+
}
|
|
57
|
+
onFetch?.({
|
|
58
|
+
url,
|
|
59
|
+
response
|
|
60
|
+
});
|
|
61
|
+
if (response.status < 200 || response.status >= 300) return;
|
|
62
|
+
if (!compareWithInitialResponse(response.body)) return;
|
|
63
|
+
return response;
|
|
64
|
+
};
|
|
65
|
+
let variantSource = initialResponseUrl;
|
|
66
|
+
if (selfRequestUrl && selfRequestUrl !== initialResponseUrl) {
|
|
67
|
+
const urlsToTry = [selfRequestUrl];
|
|
68
|
+
if (selfRequestUrl.startsWith("https://")) urlsToTry.push(selfRequestUrl.replace("https://", "http://"));
|
|
69
|
+
else if (selfRequestUrl.startsWith("http://")) urlsToTry.push(selfRequestUrl.replace("http://", "https://"));
|
|
70
|
+
for (const urlToTry of urlsToTry) {
|
|
71
|
+
const response = await fetchAndCompare(urlToTry);
|
|
72
|
+
if (response) {
|
|
73
|
+
onMatch?.({
|
|
74
|
+
url: urlToTry,
|
|
75
|
+
response,
|
|
76
|
+
feed: initialResponseFeed
|
|
77
|
+
});
|
|
78
|
+
variantSource = resolveAndApplyPlatformHandlers(response.url) ?? initialResponseUrl;
|
|
79
|
+
break;
|
|
80
|
+
}
|
|
81
|
+
}
|
|
82
|
+
}
|
|
83
|
+
const variants = new Set(tiers.map((tier) => resolveAndApplyPlatformHandlers(normalizeUrl(variantSource, tier))).filter((url) => url !== void 0));
|
|
84
|
+
variants.add(variantSource);
|
|
85
|
+
let winningUrl = variantSource;
|
|
86
|
+
for (const variant of variants) {
|
|
87
|
+
if (existsFn) {
|
|
88
|
+
const data = await existsFn(variant);
|
|
89
|
+
if (data !== void 0) {
|
|
90
|
+
onExists?.({
|
|
91
|
+
url: variant,
|
|
92
|
+
data
|
|
93
|
+
});
|
|
94
|
+
return variant;
|
|
95
|
+
}
|
|
96
|
+
}
|
|
97
|
+
if (variant === variantSource) continue;
|
|
98
|
+
if (variant === initialResponseUrl) {
|
|
99
|
+
winningUrl = initialResponseUrl;
|
|
100
|
+
break;
|
|
101
|
+
}
|
|
102
|
+
const response = await fetchAndCompare(variant);
|
|
103
|
+
if (response) {
|
|
104
|
+
const preparedResponseUrl = resolveAndApplyPlatformHandlers(response.url);
|
|
105
|
+
if (preparedResponseUrl === variantSource || preparedResponseUrl === initialResponseUrl) continue;
|
|
106
|
+
onMatch?.({
|
|
107
|
+
url: variant,
|
|
108
|
+
response,
|
|
109
|
+
feed: initialResponseFeed
|
|
110
|
+
});
|
|
111
|
+
winningUrl = variant;
|
|
112
|
+
break;
|
|
113
|
+
}
|
|
114
|
+
}
|
|
115
|
+
if (winningUrl.startsWith("http://")) {
|
|
116
|
+
const httpsUrl = winningUrl.replace("http://", "https://");
|
|
117
|
+
const response = await fetchAndCompare(httpsUrl);
|
|
118
|
+
if (response) {
|
|
119
|
+
onMatch?.({
|
|
120
|
+
url: httpsUrl,
|
|
121
|
+
response,
|
|
122
|
+
feed: initialResponseFeed
|
|
123
|
+
});
|
|
124
|
+
return httpsUrl;
|
|
125
|
+
}
|
|
126
|
+
}
|
|
127
|
+
return winningUrl;
|
|
128
|
+
};
|
|
129
|
+
|
|
130
|
+
//#endregion
|
|
131
|
+
export { findCanonical };
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
|
|
2
|
+
//#region src/platforms/feedburner.ts
|
|
3
|
+
const hosts = new Set([
|
|
4
|
+
"feeds.feedburner.com",
|
|
5
|
+
"feeds2.feedburner.com",
|
|
6
|
+
"feedproxy.google.com"
|
|
7
|
+
]);
|
|
8
|
+
const feedburnerHandler = {
|
|
9
|
+
match: (url) => {
|
|
10
|
+
return hosts.has(url.hostname);
|
|
11
|
+
},
|
|
12
|
+
normalize: (url) => {
|
|
13
|
+
const normalized = new URL(url);
|
|
14
|
+
normalized.hostname = "feeds.feedburner.com";
|
|
15
|
+
normalized.search = "";
|
|
16
|
+
return normalized;
|
|
17
|
+
}
|
|
18
|
+
};
|
|
19
|
+
|
|
20
|
+
//#endregion
|
|
21
|
+
exports.feedburnerHandler = feedburnerHandler;
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
//#region src/platforms/feedburner.ts
|
|
2
|
+
const hosts = new Set([
|
|
3
|
+
"feeds.feedburner.com",
|
|
4
|
+
"feeds2.feedburner.com",
|
|
5
|
+
"feedproxy.google.com"
|
|
6
|
+
]);
|
|
7
|
+
const feedburnerHandler = {
|
|
8
|
+
match: (url) => {
|
|
9
|
+
return hosts.has(url.hostname);
|
|
10
|
+
},
|
|
11
|
+
normalize: (url) => {
|
|
12
|
+
const normalized = new URL(url);
|
|
13
|
+
normalized.hostname = "feeds.feedburner.com";
|
|
14
|
+
normalized.search = "";
|
|
15
|
+
return normalized;
|
|
16
|
+
}
|
|
17
|
+
};
|
|
18
|
+
|
|
19
|
+
//#endregion
|
|
20
|
+
export { feedburnerHandler };
|
package/dist/types.d.cts
ADDED
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
//#region src/types.d.ts
|
|
2
|
+
type ParserAdapter<T> = {
|
|
3
|
+
parse: (body: string) => T | undefined;
|
|
4
|
+
getSelfUrl: (parsed: T) => string | undefined;
|
|
5
|
+
getSignature: (parsed: T) => object;
|
|
6
|
+
};
|
|
7
|
+
type PlatformHandler = {
|
|
8
|
+
match: (url: URL) => boolean;
|
|
9
|
+
normalize: (url: URL) => URL;
|
|
10
|
+
};
|
|
11
|
+
type NormalizeOptions = {
|
|
12
|
+
stripProtocol?: boolean;
|
|
13
|
+
stripAuthentication?: boolean;
|
|
14
|
+
stripWww?: boolean;
|
|
15
|
+
stripTrailingSlash?: boolean;
|
|
16
|
+
stripRootSlash?: boolean;
|
|
17
|
+
collapseSlashes?: boolean;
|
|
18
|
+
stripHash?: boolean;
|
|
19
|
+
stripTextFragment?: boolean;
|
|
20
|
+
sortQueryParams?: boolean;
|
|
21
|
+
stripQueryParams?: Array<string>;
|
|
22
|
+
stripEmptyQuery?: boolean;
|
|
23
|
+
normalizeEncoding?: boolean;
|
|
24
|
+
lowercaseHostname?: boolean;
|
|
25
|
+
normalizeUnicode?: boolean;
|
|
26
|
+
convertToPunycode?: boolean;
|
|
27
|
+
};
|
|
28
|
+
type OnFetchFn = (data: {
|
|
29
|
+
url: string;
|
|
30
|
+
response: FetchFnResponse;
|
|
31
|
+
}) => void;
|
|
32
|
+
type OnMatchFn<TFeed = unknown> = (data: {
|
|
33
|
+
url: string;
|
|
34
|
+
response: FetchFnResponse;
|
|
35
|
+
feed: TFeed;
|
|
36
|
+
}) => void;
|
|
37
|
+
type OnExistsFn<T> = (data: {
|
|
38
|
+
url: string;
|
|
39
|
+
data: T;
|
|
40
|
+
}) => void;
|
|
41
|
+
type ExistsFn<T = unknown> = (url: string) => Promise<T | undefined>;
|
|
42
|
+
type FindCanonicalOptions<TFeed = unknown, TExisting = unknown> = {
|
|
43
|
+
parser?: ParserAdapter<TFeed>;
|
|
44
|
+
fetchFn?: FetchFn;
|
|
45
|
+
existsFn?: ExistsFn<TExisting>;
|
|
46
|
+
tiers?: Array<NormalizeOptions>;
|
|
47
|
+
platforms?: Array<PlatformHandler>;
|
|
48
|
+
onFetch?: OnFetchFn;
|
|
49
|
+
onMatch?: OnMatchFn<TFeed>;
|
|
50
|
+
onExists?: OnExistsFn<TExisting>;
|
|
51
|
+
};
|
|
52
|
+
type FetchFnOptions = {
|
|
53
|
+
method?: 'GET' | 'HEAD';
|
|
54
|
+
headers?: Record<string, string>;
|
|
55
|
+
};
|
|
56
|
+
type FetchFnResponse = {
|
|
57
|
+
headers: Headers;
|
|
58
|
+
body: string;
|
|
59
|
+
url: string;
|
|
60
|
+
status: number;
|
|
61
|
+
};
|
|
62
|
+
type FetchFn = (url: string, options?: FetchFnOptions) => Promise<FetchFnResponse>;
|
|
63
|
+
//#endregion
|
|
64
|
+
export { ExistsFn, FetchFn, FetchFnOptions, FetchFnResponse, FindCanonicalOptions, NormalizeOptions, OnExistsFn, OnFetchFn, OnMatchFn, ParserAdapter, PlatformHandler };
|
package/dist/types.d.ts
ADDED
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
//#region src/types.d.ts
|
|
2
|
+
type ParserAdapter<T> = {
|
|
3
|
+
parse: (body: string) => T | undefined;
|
|
4
|
+
getSelfUrl: (parsed: T) => string | undefined;
|
|
5
|
+
getSignature: (parsed: T) => object;
|
|
6
|
+
};
|
|
7
|
+
type PlatformHandler = {
|
|
8
|
+
match: (url: URL) => boolean;
|
|
9
|
+
normalize: (url: URL) => URL;
|
|
10
|
+
};
|
|
11
|
+
type NormalizeOptions = {
|
|
12
|
+
stripProtocol?: boolean;
|
|
13
|
+
stripAuthentication?: boolean;
|
|
14
|
+
stripWww?: boolean;
|
|
15
|
+
stripTrailingSlash?: boolean;
|
|
16
|
+
stripRootSlash?: boolean;
|
|
17
|
+
collapseSlashes?: boolean;
|
|
18
|
+
stripHash?: boolean;
|
|
19
|
+
stripTextFragment?: boolean;
|
|
20
|
+
sortQueryParams?: boolean;
|
|
21
|
+
stripQueryParams?: Array<string>;
|
|
22
|
+
stripEmptyQuery?: boolean;
|
|
23
|
+
normalizeEncoding?: boolean;
|
|
24
|
+
lowercaseHostname?: boolean;
|
|
25
|
+
normalizeUnicode?: boolean;
|
|
26
|
+
convertToPunycode?: boolean;
|
|
27
|
+
};
|
|
28
|
+
type OnFetchFn = (data: {
|
|
29
|
+
url: string;
|
|
30
|
+
response: FetchFnResponse;
|
|
31
|
+
}) => void;
|
|
32
|
+
type OnMatchFn<TFeed = unknown> = (data: {
|
|
33
|
+
url: string;
|
|
34
|
+
response: FetchFnResponse;
|
|
35
|
+
feed: TFeed;
|
|
36
|
+
}) => void;
|
|
37
|
+
type OnExistsFn<T> = (data: {
|
|
38
|
+
url: string;
|
|
39
|
+
data: T;
|
|
40
|
+
}) => void;
|
|
41
|
+
type ExistsFn<T = unknown> = (url: string) => Promise<T | undefined>;
|
|
42
|
+
type FindCanonicalOptions<TFeed = unknown, TExisting = unknown> = {
|
|
43
|
+
parser?: ParserAdapter<TFeed>;
|
|
44
|
+
fetchFn?: FetchFn;
|
|
45
|
+
existsFn?: ExistsFn<TExisting>;
|
|
46
|
+
tiers?: Array<NormalizeOptions>;
|
|
47
|
+
platforms?: Array<PlatformHandler>;
|
|
48
|
+
onFetch?: OnFetchFn;
|
|
49
|
+
onMatch?: OnMatchFn<TFeed>;
|
|
50
|
+
onExists?: OnExistsFn<TExisting>;
|
|
51
|
+
};
|
|
52
|
+
type FetchFnOptions = {
|
|
53
|
+
method?: 'GET' | 'HEAD';
|
|
54
|
+
headers?: Record<string, string>;
|
|
55
|
+
};
|
|
56
|
+
type FetchFnResponse = {
|
|
57
|
+
headers: Headers;
|
|
58
|
+
body: string;
|
|
59
|
+
url: string;
|
|
60
|
+
status: number;
|
|
61
|
+
};
|
|
62
|
+
type FetchFn = (url: string, options?: FetchFnOptions) => Promise<FetchFnResponse>;
|
|
63
|
+
//#endregion
|
|
64
|
+
export { ExistsFn, FetchFn, FetchFnOptions, FetchFnResponse, FindCanonicalOptions, NormalizeOptions, OnExistsFn, OnFetchFn, OnMatchFn, ParserAdapter, PlatformHandler };
|
package/dist/utils.cjs
ADDED
|
@@ -0,0 +1,163 @@
|
|
|
1
|
+
const require_defaults = require('./defaults.cjs');
|
|
2
|
+
let node_url = require("node:url");
|
|
3
|
+
let entities = require("entities");
|
|
4
|
+
let feedsmith = require("feedsmith");
|
|
5
|
+
|
|
6
|
+
//#region src/utils.ts
|
|
7
|
+
const ipv4Pattern = /^\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}$/;
|
|
8
|
+
const ipv6Pattern = /^([0-9a-f]{0,4}:){2,7}[0-9a-f]{0,4}$/i;
|
|
9
|
+
const safePathChars = /[a-zA-Z0-9._~!$&'()*+,;=:@-]/;
|
|
10
|
+
const feedProtocols = [
|
|
11
|
+
"feed:",
|
|
12
|
+
"rss:",
|
|
13
|
+
"podcast:",
|
|
14
|
+
"pcast:",
|
|
15
|
+
"itpc:"
|
|
16
|
+
];
|
|
17
|
+
const resolveFeedProtocol = (url, protocol = "https") => {
|
|
18
|
+
const urlLower = url.toLowerCase();
|
|
19
|
+
for (const scheme of feedProtocols) {
|
|
20
|
+
if (!urlLower.startsWith(scheme)) continue;
|
|
21
|
+
if (urlLower.startsWith(`${scheme}http://`) || urlLower.startsWith(`${scheme}https://`)) return url.slice(scheme.length);
|
|
22
|
+
if (urlLower.startsWith(`${scheme}//`)) return `${protocol}:${url.slice(scheme.length)}`;
|
|
23
|
+
}
|
|
24
|
+
return url;
|
|
25
|
+
};
|
|
26
|
+
const addMissingProtocol = (url, protocol = "https") => {
|
|
27
|
+
try {
|
|
28
|
+
const parsed = new URL(url);
|
|
29
|
+
if (!parsed.protocol.includes(".") && parsed.protocol !== "localhost:") return url;
|
|
30
|
+
} catch {}
|
|
31
|
+
if (url.startsWith("//") && !url.startsWith("///")) try {
|
|
32
|
+
const parsed = new URL(`${protocol}:${url}`);
|
|
33
|
+
const hostname = parsed.hostname;
|
|
34
|
+
if (hostname.includes(".") || hostname === "localhost" || ipv4Pattern.test(hostname) || ipv6Pattern.test(hostname.replace(/^\[|\]$/g, ""))) return parsed.href;
|
|
35
|
+
return url;
|
|
36
|
+
} catch {
|
|
37
|
+
return url;
|
|
38
|
+
}
|
|
39
|
+
if (url.startsWith("/") || url.startsWith(".")) return url;
|
|
40
|
+
const slashIndex = url.indexOf("/");
|
|
41
|
+
const dotIndex = url.indexOf(".");
|
|
42
|
+
if (dotIndex === -1 || slashIndex !== -1 && dotIndex > slashIndex) {
|
|
43
|
+
if (!url.startsWith("localhost")) return url;
|
|
44
|
+
}
|
|
45
|
+
const firstChar = url.charAt(0);
|
|
46
|
+
if (firstChar === " " || firstChar === " " || firstChar === "\n") return url;
|
|
47
|
+
return `${protocol}://${url}`;
|
|
48
|
+
};
|
|
49
|
+
const resolveUrl = (url, base) => {
|
|
50
|
+
let resolvedUrl;
|
|
51
|
+
resolvedUrl = url.includes("&") ? (0, entities.decodeHTML)(url) : url;
|
|
52
|
+
resolvedUrl = resolveFeedProtocol(resolvedUrl);
|
|
53
|
+
if (base) try {
|
|
54
|
+
resolvedUrl = new URL(resolvedUrl, base).href;
|
|
55
|
+
} catch {
|
|
56
|
+
return;
|
|
57
|
+
}
|
|
58
|
+
resolvedUrl = addMissingProtocol(resolvedUrl);
|
|
59
|
+
try {
|
|
60
|
+
const parsed = new URL(resolvedUrl);
|
|
61
|
+
if (parsed.protocol !== "http:" && parsed.protocol !== "https:") return;
|
|
62
|
+
return parsed.href;
|
|
63
|
+
} catch {
|
|
64
|
+
return;
|
|
65
|
+
}
|
|
66
|
+
};
|
|
67
|
+
const decodeAndNormalizeEncoding = (value) => {
|
|
68
|
+
if (!value.includes("%")) return value;
|
|
69
|
+
return value.replace(/%([0-9A-Fa-f]{2})/g, (_match, hex) => {
|
|
70
|
+
const charCode = Number.parseInt(hex, 16);
|
|
71
|
+
const char = String.fromCharCode(charCode);
|
|
72
|
+
if (safePathChars.test(char)) return char;
|
|
73
|
+
return `%${hex.toUpperCase()}`;
|
|
74
|
+
});
|
|
75
|
+
};
|
|
76
|
+
const normalizeUrl = (url, options = require_defaults.defaultNormalizeOptions) => {
|
|
77
|
+
try {
|
|
78
|
+
const parsed = new URL(url);
|
|
79
|
+
if (options.normalizeUnicode) {
|
|
80
|
+
parsed.hostname = parsed.hostname.normalize("NFC");
|
|
81
|
+
parsed.pathname = parsed.pathname.normalize("NFC");
|
|
82
|
+
}
|
|
83
|
+
if (options.convertToPunycode) {
|
|
84
|
+
const ascii = (0, node_url.domainToASCII)(parsed.hostname);
|
|
85
|
+
if (ascii) parsed.hostname = ascii;
|
|
86
|
+
}
|
|
87
|
+
if (options.lowercaseHostname) parsed.hostname = parsed.hostname.toLowerCase();
|
|
88
|
+
if (options.stripAuthentication) {
|
|
89
|
+
parsed.username = "";
|
|
90
|
+
parsed.password = "";
|
|
91
|
+
}
|
|
92
|
+
if (options.stripWww && parsed.hostname.startsWith("www.")) parsed.hostname = parsed.hostname.slice(4);
|
|
93
|
+
if (options.stripHash) parsed.hash = "";
|
|
94
|
+
if (options.stripTextFragment && parsed.hash.startsWith("#:~:")) parsed.hash = "";
|
|
95
|
+
let pathname = parsed.pathname;
|
|
96
|
+
if (options.normalizeEncoding) pathname = decodeAndNormalizeEncoding(pathname);
|
|
97
|
+
if (options.collapseSlashes) pathname = pathname.replace(/\/+/g, "/");
|
|
98
|
+
if (options.stripTrailingSlash && pathname.length > 1 && pathname.endsWith("/")) pathname = pathname.slice(0, -1);
|
|
99
|
+
if (options.stripRootSlash && pathname === "/") pathname = "";
|
|
100
|
+
parsed.pathname = pathname;
|
|
101
|
+
if (options.stripQueryParams && parsed.search) for (const param of options.stripQueryParams) parsed.searchParams.delete(param);
|
|
102
|
+
if (options.sortQueryParams) parsed.searchParams.sort();
|
|
103
|
+
if (options.stripEmptyQuery && parsed.search === "?") parsed.search = "";
|
|
104
|
+
let result = parsed.href;
|
|
105
|
+
if (options.stripProtocol) result = result.replace(/^https?:\/\//, "");
|
|
106
|
+
return result;
|
|
107
|
+
} catch {
|
|
108
|
+
return url;
|
|
109
|
+
}
|
|
110
|
+
};
|
|
111
|
+
const nativeFetch = async (url, options) => {
|
|
112
|
+
const response = await fetch(url, {
|
|
113
|
+
method: options?.method ?? "GET",
|
|
114
|
+
headers: options?.headers
|
|
115
|
+
});
|
|
116
|
+
return {
|
|
117
|
+
headers: response.headers,
|
|
118
|
+
body: await response.text(),
|
|
119
|
+
url: response.url,
|
|
120
|
+
status: response.status
|
|
121
|
+
};
|
|
122
|
+
};
|
|
123
|
+
const applyPlatformHandlers = (url, platforms) => {
|
|
124
|
+
try {
|
|
125
|
+
let parsed = new URL(url);
|
|
126
|
+
for (const handler of platforms) if (handler.match(parsed)) {
|
|
127
|
+
parsed = handler.normalize(parsed);
|
|
128
|
+
break;
|
|
129
|
+
}
|
|
130
|
+
return parsed.href;
|
|
131
|
+
} catch {
|
|
132
|
+
return url;
|
|
133
|
+
}
|
|
134
|
+
};
|
|
135
|
+
const feedsmithParser = {
|
|
136
|
+
parse: (body) => {
|
|
137
|
+
try {
|
|
138
|
+
return (0, feedsmith.parseFeed)(body);
|
|
139
|
+
} catch {
|
|
140
|
+
return;
|
|
141
|
+
}
|
|
142
|
+
},
|
|
143
|
+
getSelfUrl: (parsed) => {
|
|
144
|
+
switch (parsed.format) {
|
|
145
|
+
case "atom": return parsed.feed.links?.find((link) => link.rel === "self")?.href;
|
|
146
|
+
case "rss":
|
|
147
|
+
case "rdf": return parsed.feed.atom?.links?.find((link) => link.rel === "self")?.href;
|
|
148
|
+
case "json": return parsed.feed.feed_url;
|
|
149
|
+
}
|
|
150
|
+
},
|
|
151
|
+
getSignature: (parsed) => {
|
|
152
|
+
return parsed.feed;
|
|
153
|
+
}
|
|
154
|
+
};
|
|
155
|
+
|
|
156
|
+
//#endregion
|
|
157
|
+
exports.addMissingProtocol = addMissingProtocol;
|
|
158
|
+
exports.applyPlatformHandlers = applyPlatformHandlers;
|
|
159
|
+
exports.feedsmithParser = feedsmithParser;
|
|
160
|
+
exports.nativeFetch = nativeFetch;
|
|
161
|
+
exports.normalizeUrl = normalizeUrl;
|
|
162
|
+
exports.resolveFeedProtocol = resolveFeedProtocol;
|
|
163
|
+
exports.resolveUrl = resolveUrl;
|
package/dist/utils.d.cts
ADDED
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
import { NormalizeOptions, ParserAdapter } from "./types.cjs";
|
|
2
|
+
import { parseFeed } from "feedsmith";
|
|
3
|
+
|
|
4
|
+
//#region src/utils.d.ts
|
|
5
|
+
declare const resolveFeedProtocol: (url: string, protocol?: "http" | "https") => string;
|
|
6
|
+
declare const addMissingProtocol: (url: string, protocol?: "http" | "https") => string;
|
|
7
|
+
declare const resolveUrl: (url: string, base?: string) => string | undefined;
|
|
8
|
+
declare const normalizeUrl: (url: string, options?: NormalizeOptions) => string;
|
|
9
|
+
declare const feedsmithParser: ParserAdapter<ReturnType<typeof parseFeed>>;
|
|
10
|
+
//#endregion
|
|
11
|
+
export { addMissingProtocol, feedsmithParser, normalizeUrl, resolveFeedProtocol, resolveUrl };
|
package/dist/utils.d.ts
ADDED
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
import { NormalizeOptions, ParserAdapter } from "./types.js";
|
|
2
|
+
import { parseFeed } from "feedsmith";
|
|
3
|
+
|
|
4
|
+
//#region src/utils.d.ts
|
|
5
|
+
declare const resolveFeedProtocol: (url: string, protocol?: "http" | "https") => string;
|
|
6
|
+
declare const addMissingProtocol: (url: string, protocol?: "http" | "https") => string;
|
|
7
|
+
declare const resolveUrl: (url: string, base?: string) => string | undefined;
|
|
8
|
+
declare const normalizeUrl: (url: string, options?: NormalizeOptions) => string;
|
|
9
|
+
declare const feedsmithParser: ParserAdapter<ReturnType<typeof parseFeed>>;
|
|
10
|
+
//#endregion
|
|
11
|
+
export { addMissingProtocol, feedsmithParser, normalizeUrl, resolveFeedProtocol, resolveUrl };
|
package/dist/utils.js
ADDED
|
@@ -0,0 +1,157 @@
|
|
|
1
|
+
import { defaultNormalizeOptions } from "./defaults.js";
|
|
2
|
+
import { domainToASCII } from "node:url";
|
|
3
|
+
import { decodeHTML } from "entities";
|
|
4
|
+
import { parseFeed } from "feedsmith";
|
|
5
|
+
|
|
6
|
+
//#region src/utils.ts
|
|
7
|
+
const ipv4Pattern = /^\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}$/;
|
|
8
|
+
const ipv6Pattern = /^([0-9a-f]{0,4}:){2,7}[0-9a-f]{0,4}$/i;
|
|
9
|
+
const safePathChars = /[a-zA-Z0-9._~!$&'()*+,;=:@-]/;
|
|
10
|
+
const feedProtocols = [
|
|
11
|
+
"feed:",
|
|
12
|
+
"rss:",
|
|
13
|
+
"podcast:",
|
|
14
|
+
"pcast:",
|
|
15
|
+
"itpc:"
|
|
16
|
+
];
|
|
17
|
+
const resolveFeedProtocol = (url, protocol = "https") => {
|
|
18
|
+
const urlLower = url.toLowerCase();
|
|
19
|
+
for (const scheme of feedProtocols) {
|
|
20
|
+
if (!urlLower.startsWith(scheme)) continue;
|
|
21
|
+
if (urlLower.startsWith(`${scheme}http://`) || urlLower.startsWith(`${scheme}https://`)) return url.slice(scheme.length);
|
|
22
|
+
if (urlLower.startsWith(`${scheme}//`)) return `${protocol}:${url.slice(scheme.length)}`;
|
|
23
|
+
}
|
|
24
|
+
return url;
|
|
25
|
+
};
|
|
26
|
+
const addMissingProtocol = (url, protocol = "https") => {
|
|
27
|
+
try {
|
|
28
|
+
const parsed = new URL(url);
|
|
29
|
+
if (!parsed.protocol.includes(".") && parsed.protocol !== "localhost:") return url;
|
|
30
|
+
} catch {}
|
|
31
|
+
if (url.startsWith("//") && !url.startsWith("///")) try {
|
|
32
|
+
const parsed = new URL(`${protocol}:${url}`);
|
|
33
|
+
const hostname = parsed.hostname;
|
|
34
|
+
if (hostname.includes(".") || hostname === "localhost" || ipv4Pattern.test(hostname) || ipv6Pattern.test(hostname.replace(/^\[|\]$/g, ""))) return parsed.href;
|
|
35
|
+
return url;
|
|
36
|
+
} catch {
|
|
37
|
+
return url;
|
|
38
|
+
}
|
|
39
|
+
if (url.startsWith("/") || url.startsWith(".")) return url;
|
|
40
|
+
const slashIndex = url.indexOf("/");
|
|
41
|
+
const dotIndex = url.indexOf(".");
|
|
42
|
+
if (dotIndex === -1 || slashIndex !== -1 && dotIndex > slashIndex) {
|
|
43
|
+
if (!url.startsWith("localhost")) return url;
|
|
44
|
+
}
|
|
45
|
+
const firstChar = url.charAt(0);
|
|
46
|
+
if (firstChar === " " || firstChar === " " || firstChar === "\n") return url;
|
|
47
|
+
return `${protocol}://${url}`;
|
|
48
|
+
};
|
|
49
|
+
const resolveUrl = (url, base) => {
|
|
50
|
+
let resolvedUrl;
|
|
51
|
+
resolvedUrl = url.includes("&") ? decodeHTML(url) : url;
|
|
52
|
+
resolvedUrl = resolveFeedProtocol(resolvedUrl);
|
|
53
|
+
if (base) try {
|
|
54
|
+
resolvedUrl = new URL(resolvedUrl, base).href;
|
|
55
|
+
} catch {
|
|
56
|
+
return;
|
|
57
|
+
}
|
|
58
|
+
resolvedUrl = addMissingProtocol(resolvedUrl);
|
|
59
|
+
try {
|
|
60
|
+
const parsed = new URL(resolvedUrl);
|
|
61
|
+
if (parsed.protocol !== "http:" && parsed.protocol !== "https:") return;
|
|
62
|
+
return parsed.href;
|
|
63
|
+
} catch {
|
|
64
|
+
return;
|
|
65
|
+
}
|
|
66
|
+
};
|
|
67
|
+
const decodeAndNormalizeEncoding = (value) => {
|
|
68
|
+
if (!value.includes("%")) return value;
|
|
69
|
+
return value.replace(/%([0-9A-Fa-f]{2})/g, (_match, hex) => {
|
|
70
|
+
const charCode = Number.parseInt(hex, 16);
|
|
71
|
+
const char = String.fromCharCode(charCode);
|
|
72
|
+
if (safePathChars.test(char)) return char;
|
|
73
|
+
return `%${hex.toUpperCase()}`;
|
|
74
|
+
});
|
|
75
|
+
};
|
|
76
|
+
const normalizeUrl = (url, options = defaultNormalizeOptions) => {
|
|
77
|
+
try {
|
|
78
|
+
const parsed = new URL(url);
|
|
79
|
+
if (options.normalizeUnicode) {
|
|
80
|
+
parsed.hostname = parsed.hostname.normalize("NFC");
|
|
81
|
+
parsed.pathname = parsed.pathname.normalize("NFC");
|
|
82
|
+
}
|
|
83
|
+
if (options.convertToPunycode) {
|
|
84
|
+
const ascii = domainToASCII(parsed.hostname);
|
|
85
|
+
if (ascii) parsed.hostname = ascii;
|
|
86
|
+
}
|
|
87
|
+
if (options.lowercaseHostname) parsed.hostname = parsed.hostname.toLowerCase();
|
|
88
|
+
if (options.stripAuthentication) {
|
|
89
|
+
parsed.username = "";
|
|
90
|
+
parsed.password = "";
|
|
91
|
+
}
|
|
92
|
+
if (options.stripWww && parsed.hostname.startsWith("www.")) parsed.hostname = parsed.hostname.slice(4);
|
|
93
|
+
if (options.stripHash) parsed.hash = "";
|
|
94
|
+
if (options.stripTextFragment && parsed.hash.startsWith("#:~:")) parsed.hash = "";
|
|
95
|
+
let pathname = parsed.pathname;
|
|
96
|
+
if (options.normalizeEncoding) pathname = decodeAndNormalizeEncoding(pathname);
|
|
97
|
+
if (options.collapseSlashes) pathname = pathname.replace(/\/+/g, "/");
|
|
98
|
+
if (options.stripTrailingSlash && pathname.length > 1 && pathname.endsWith("/")) pathname = pathname.slice(0, -1);
|
|
99
|
+
if (options.stripRootSlash && pathname === "/") pathname = "";
|
|
100
|
+
parsed.pathname = pathname;
|
|
101
|
+
if (options.stripQueryParams && parsed.search) for (const param of options.stripQueryParams) parsed.searchParams.delete(param);
|
|
102
|
+
if (options.sortQueryParams) parsed.searchParams.sort();
|
|
103
|
+
if (options.stripEmptyQuery && parsed.search === "?") parsed.search = "";
|
|
104
|
+
let result = parsed.href;
|
|
105
|
+
if (options.stripProtocol) result = result.replace(/^https?:\/\//, "");
|
|
106
|
+
return result;
|
|
107
|
+
} catch {
|
|
108
|
+
return url;
|
|
109
|
+
}
|
|
110
|
+
};
|
|
111
|
+
const nativeFetch = async (url, options) => {
|
|
112
|
+
const response = await fetch(url, {
|
|
113
|
+
method: options?.method ?? "GET",
|
|
114
|
+
headers: options?.headers
|
|
115
|
+
});
|
|
116
|
+
return {
|
|
117
|
+
headers: response.headers,
|
|
118
|
+
body: await response.text(),
|
|
119
|
+
url: response.url,
|
|
120
|
+
status: response.status
|
|
121
|
+
};
|
|
122
|
+
};
|
|
123
|
+
const applyPlatformHandlers = (url, platforms) => {
|
|
124
|
+
try {
|
|
125
|
+
let parsed = new URL(url);
|
|
126
|
+
for (const handler of platforms) if (handler.match(parsed)) {
|
|
127
|
+
parsed = handler.normalize(parsed);
|
|
128
|
+
break;
|
|
129
|
+
}
|
|
130
|
+
return parsed.href;
|
|
131
|
+
} catch {
|
|
132
|
+
return url;
|
|
133
|
+
}
|
|
134
|
+
};
|
|
135
|
+
const feedsmithParser = {
|
|
136
|
+
parse: (body) => {
|
|
137
|
+
try {
|
|
138
|
+
return parseFeed(body);
|
|
139
|
+
} catch {
|
|
140
|
+
return;
|
|
141
|
+
}
|
|
142
|
+
},
|
|
143
|
+
getSelfUrl: (parsed) => {
|
|
144
|
+
switch (parsed.format) {
|
|
145
|
+
case "atom": return parsed.feed.links?.find((link) => link.rel === "self")?.href;
|
|
146
|
+
case "rss":
|
|
147
|
+
case "rdf": return parsed.feed.atom?.links?.find((link) => link.rel === "self")?.href;
|
|
148
|
+
case "json": return parsed.feed.feed_url;
|
|
149
|
+
}
|
|
150
|
+
},
|
|
151
|
+
getSignature: (parsed) => {
|
|
152
|
+
return parsed.feed;
|
|
153
|
+
}
|
|
154
|
+
};
|
|
155
|
+
|
|
156
|
+
//#endregion
|
|
157
|
+
export { addMissingProtocol, applyPlatformHandlers, feedsmithParser, nativeFetch, normalizeUrl, resolveFeedProtocol, resolveUrl };
|