feedscout 2.0.0-beta.1 → 2.0.0-beta.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -4,7 +4,6 @@ import { HtmlMethodOptions } from "../common/uris/html/types.js";
4
4
  import { LinkSelector } from "../common/types.js";
5
5
 
6
6
  //#region src/blogrolls/defaults.d.ts
7
- declare const mimeTypes: string[];
8
7
  declare const urisMinimal: string[];
9
8
  declare const urisBalanced: string[];
10
9
  declare const urisComprehensive: string[];
@@ -14,4 +13,4 @@ declare const defaultHtmlOptions: Omit<HtmlMethodOptions, 'baseUrl'>;
14
13
  declare const defaultHeadersOptions: Omit<HeadersMethodOptions, 'baseUrl'>;
15
14
  declare const defaultGuessOptions: Omit<GuessMethodOptions, 'baseUrl'>;
16
15
  //#endregion
17
- export { anchorLabels, defaultGuessOptions, defaultHeadersOptions, defaultHtmlOptions, linkSelectors, mimeTypes, urisBalanced, urisComprehensive, urisMinimal };
16
+ export { anchorLabels, defaultGuessOptions, defaultHeadersOptions, defaultHtmlOptions, linkSelectors, urisBalanced, urisComprehensive, urisMinimal };
@@ -1,9 +1,4 @@
1
1
  //#region src/blogrolls/defaults.ts
2
- const mimeTypes = [
3
- "text/x-opml",
4
- "application/xml",
5
- "text/xml"
6
- ];
7
2
  const urisMinimal = [
8
3
  "/.well-known/recommendations.opml",
9
4
  "/blogroll.opml",
@@ -27,10 +22,22 @@ const anchorLabels = [
27
22
  "subscriptions",
28
23
  "reading list"
29
24
  ];
30
- const linkSelectors = [{ rel: "blogroll" }, {
31
- rel: "outline",
32
- types: mimeTypes
33
- }];
25
+ const linkSelectors = [
26
+ { rel: "blogroll" },
27
+ {
28
+ rel: "outline",
29
+ types: [
30
+ "text/x-opml",
31
+ "application/opml+xml",
32
+ "application/xml",
33
+ "text/xml"
34
+ ]
35
+ },
36
+ {
37
+ rel: "alternate",
38
+ types: ["text/x-opml", "application/opml+xml"]
39
+ }
40
+ ];
34
41
  const defaultHtmlOptions = {
35
42
  linkSelectors,
36
43
  anchorUris: urisComprehensive,
@@ -40,4 +47,4 @@ const defaultHtmlOptions = {
40
47
  const defaultHeadersOptions = { linkSelectors };
41
48
  const defaultGuessOptions = { uris: urisBalanced };
42
49
  //#endregion
43
- export { anchorLabels, defaultGuessOptions, defaultHeadersOptions, defaultHtmlOptions, linkSelectors, mimeTypes, urisBalanced, urisComprehensive, urisMinimal };
50
+ export { anchorLabels, defaultGuessOptions, defaultHeadersOptions, defaultHtmlOptions, linkSelectors, urisBalanced, urisComprehensive, urisMinimal };
@@ -1,4 +1,4 @@
1
- import { anchorLabels, defaultGuessOptions, defaultHeadersOptions, defaultHtmlOptions, linkSelectors, mimeTypes, urisBalanced, urisComprehensive, urisMinimal } from "./blogrolls/defaults.js";
1
+ import { anchorLabels, defaultGuessOptions, defaultHeadersOptions, defaultHtmlOptions, linkSelectors, urisBalanced, urisComprehensive, urisMinimal } from "./blogrolls/defaults.js";
2
2
  import { BlogrollResult } from "./blogrolls/types.js";
3
3
  import { defaultExtractFn } from "./blogrolls/extractors.js";
4
- export { BlogrollResult, anchorLabels, defaultExtractFn, defaultGuessOptions, defaultHeadersOptions, defaultHtmlOptions, linkSelectors, mimeTypes, urisBalanced, urisComprehensive, urisMinimal };
4
+ export { BlogrollResult, anchorLabels, defaultExtractFn, defaultGuessOptions, defaultHeadersOptions, defaultHtmlOptions, linkSelectors, urisBalanced, urisComprehensive, urisMinimal };
package/dist/blogrolls.js CHANGED
@@ -1,3 +1,3 @@
1
- import { anchorLabels, defaultGuessOptions, defaultHeadersOptions, defaultHtmlOptions, linkSelectors, mimeTypes, urisBalanced, urisComprehensive, urisMinimal } from "./blogrolls/defaults.js";
1
+ import { anchorLabels, defaultGuessOptions, defaultHeadersOptions, defaultHtmlOptions, linkSelectors, urisBalanced, urisComprehensive, urisMinimal } from "./blogrolls/defaults.js";
2
2
  import { defaultExtractFn } from "./blogrolls/extractors.js";
3
- export { anchorLabels, defaultExtractFn, defaultGuessOptions, defaultHeadersOptions, defaultHtmlOptions, linkSelectors, mimeTypes, urisBalanced, urisComprehensive, urisMinimal };
3
+ export { anchorLabels, defaultExtractFn, defaultGuessOptions, defaultHeadersOptions, defaultHtmlOptions, linkSelectors, urisBalanced, urisComprehensive, urisMinimal };
@@ -17,9 +17,7 @@ const defaultFetchFn = async (url, options) => {
17
17
  const defaultResolveUrlFn = (url, baseUrl) => {
18
18
  try {
19
19
  return new URL(url, baseUrl).href;
20
- } catch {
21
- return;
22
- }
20
+ } catch {}
23
21
  };
24
22
  const defaultResolveSiteUrlFn = (input, resolveUrlFn) => {
25
23
  if (!input.content) return;
@@ -1,6 +1,10 @@
1
1
  import { endsWithAnyOf, includesAnyOf, matchesAnyOfLinkSelectors } from "../../utils.js";
2
2
  //#region src/common/uris/html/handlers.ts
3
3
  const handleOpenTag = (context, name, attribs, _isImplied) => {
4
+ if (name === "base" && context.baseHref === void 0) {
5
+ const href = attribs.href?.trim();
6
+ if (href) context.baseHref = href;
7
+ }
4
8
  if (name === "link" && attribs.href) {
5
9
  const rel = attribs.rel?.toLowerCase();
6
10
  if (!rel) return;
@@ -16,6 +20,16 @@ const handleOpenTag = (context, name, attribs, _isImplied) => {
16
20
  context.currentAnchor.href = attribs.href;
17
21
  context.currentAnchor.text = "";
18
22
  if (endsWithAnyOf(lowerHref, context.options.anchorUris)) context.discoveredUris.add(attribs.href);
23
+ if (context.options.anchorPathSegments?.length) {
24
+ let pathname;
25
+ try {
26
+ pathname = new URL(attribs.href, "https://feedscout.invalid").pathname;
27
+ } catch {}
28
+ if (pathname && includesAnyOf(pathname, context.options.anchorPathSegments)) context.discoveredUris.add(attribs.href);
29
+ }
30
+ const ariaLabel = attribs["aria-label"];
31
+ const title = attribs.title;
32
+ if (ariaLabel && includesAnyOf(ariaLabel, context.options.anchorLabels) || title && includesAnyOf(title, context.options.anchorLabels)) context.discoveredUris.add(attribs.href);
19
33
  }
20
34
  };
21
35
  const handleText = (context, text) => {
@@ -13,7 +13,23 @@ const discoverUrisFromHtml = (html, options) => {
13
13
  const parser = new Parser(createHtmlUrisHandlers(context), { decodeEntities: true });
14
14
  parser.write(html);
15
15
  parser.end();
16
- return [...context.discoveredUris];
16
+ const uris = [...context.discoveredUris];
17
+ if (context.baseHref) {
18
+ let base;
19
+ try {
20
+ base = options.baseUrl ? new URL(context.baseHref, options.baseUrl).href : context.baseHref;
21
+ } catch {
22
+ base = options.baseUrl;
23
+ }
24
+ return uris.map((uri) => {
25
+ try {
26
+ return new URL(uri, base).href;
27
+ } catch {
28
+ return uri;
29
+ }
30
+ });
31
+ }
32
+ return uris;
17
33
  };
18
34
  //#endregion
19
35
  export { discoverUrisFromHtml };
@@ -5,6 +5,7 @@ type HtmlMethodOptions = {
5
5
  baseUrl?: string;
6
6
  linkSelectors: Array<LinkSelector>;
7
7
  anchorUris: Array<Pattern>;
8
+ anchorPathSegments?: Array<Pattern>;
8
9
  anchorIgnoredUris: Array<Pattern>;
9
10
  anchorLabels: Array<Pattern>;
10
11
  };
@@ -2,14 +2,14 @@ import { GuessMethodOptions } from "../common/uris/guess/types.js";
2
2
  import { HeadersMethodOptions } from "../common/uris/headers/types.js";
3
3
  import { HtmlMethodOptions } from "../common/uris/html/types.js";
4
4
  import { PlatformMethodOptions } from "../common/uris/platform/types.js";
5
- import { LinkSelector, UriEntry } from "../common/types.js";
5
+ import { LinkSelector, Pattern, UriEntry } from "../common/types.js";
6
6
 
7
7
  //#region src/feeds/defaults.d.ts
8
8
  declare const mimeTypes: string[];
9
9
  declare const urisMinimal: string[];
10
10
  declare const urisBalanced: string[];
11
11
  declare const urisComprehensive: Array<UriEntry>;
12
- declare const ignoredUris: string[];
12
+ declare const ignoredUris: Array<Pattern>;
13
13
  declare const anchorLabels: string[];
14
14
  declare const linkSelectors: Array<LinkSelector>;
15
15
  declare const anchorPathSegments: RegExp[];
@@ -152,12 +152,15 @@ const urisComprehensive = [
152
152
  "/feeds/posts/default?alt=rss",
153
153
  "/feeds/comments/default"
154
154
  ];
155
- const ignoredUris = ["wp-json/oembed/", "wp-json/wp/"];
155
+ const ignoredUris = [
156
+ "wp-json/oembed/",
157
+ "wp-json/wp/",
158
+ /[?&][^=&]*=(https?:|https?%3a|aHR0c)/i
159
+ ];
156
160
  const anchorLabels = [
157
161
  "rss",
158
162
  "feed",
159
163
  "atom",
160
- "subscribe",
161
164
  "syndicate",
162
165
  "syndication",
163
166
  "json feed"
@@ -173,7 +176,8 @@ const anchorPathSegments = [
173
176
  ];
174
177
  const defaultHtmlOptions = {
175
178
  linkSelectors,
176
- anchorUris: [...urisComprehensive.flat(), ...anchorPathSegments],
179
+ anchorUris: urisComprehensive.flat(),
180
+ anchorPathSegments,
177
181
  anchorIgnoredUris: ignoredUris,
178
182
  anchorLabels
179
183
  };
@@ -22,12 +22,8 @@ const isDiscourseHtml = (content) => {
22
22
  };
23
23
  const discourseHandler = {
24
24
  match: (url, content) => {
25
- try {
26
- if (!content || !isDiscourseHtml(content)) return false;
27
- new URL(url);
28
- return true;
29
- } catch {}
30
- return false;
25
+ if (!content || !isDiscourseHtml(content)) return false;
26
+ return URL.canParse(url);
31
27
  },
32
28
  resolve: (url) => {
33
29
  try {
@@ -8,7 +8,7 @@ const libsynHandler = {
8
8
  resolve: (url) => {
9
9
  const { origin, pathname } = new URL(url);
10
10
  if (isHostOf(url, "feeds.libsyn.com")) {
11
- const showId = pathname.split("/").filter(Boolean)[0];
11
+ const showId = pathname.split("/").find(Boolean);
12
12
  if (showId && numericRegex.test(showId)) return [{
13
13
  uri: `https://feeds.libsyn.com/${showId}/rss`,
14
14
  hint: composeHint("libsyn:podcast")
package/package.json CHANGED
@@ -68,14 +68,14 @@
68
68
  "htmlparser2": "^12.0.0"
69
69
  },
70
70
  "peerDependencies": {
71
- "feedsmith": "^3.0.0-beta.4"
71
+ "feedsmith": "3.0.0-beta.5"
72
72
  },
73
73
  "devDependencies": {
74
74
  "@types/bun": "^1.3.13",
75
- "feedsmith": "^3.0.0-beta.4",
76
- "kvalita": "^1.13.0",
75
+ "feedsmith": "3.0.0-beta.5",
76
+ "kvalita": "^1.15.1",
77
77
  "tsdown": "^0.22.2",
78
78
  "vitepress": "^2.0.0-alpha.17"
79
79
  },
80
- "version": "2.0.0-beta.1"
80
+ "version": "2.0.0-beta.2"
81
81
  }