feedscout 2.0.0-beta.1 → 2.0.0-beta.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -4,7 +4,6 @@ import { HtmlMethodOptions } from "../common/uris/html/types.js";
4
4
  import { LinkSelector } from "../common/types.js";
5
5
 
6
6
  //#region src/blogrolls/defaults.d.ts
7
- declare const mimeTypes: string[];
8
7
  declare const urisMinimal: string[];
9
8
  declare const urisBalanced: string[];
10
9
  declare const urisComprehensive: string[];
@@ -14,4 +13,4 @@ declare const defaultHtmlOptions: Omit<HtmlMethodOptions, 'baseUrl'>;
14
13
  declare const defaultHeadersOptions: Omit<HeadersMethodOptions, 'baseUrl'>;
15
14
  declare const defaultGuessOptions: Omit<GuessMethodOptions, 'baseUrl'>;
16
15
  //#endregion
17
- export { anchorLabels, defaultGuessOptions, defaultHeadersOptions, defaultHtmlOptions, linkSelectors, mimeTypes, urisBalanced, urisComprehensive, urisMinimal };
16
+ export { anchorLabels, defaultGuessOptions, defaultHeadersOptions, defaultHtmlOptions, linkSelectors, urisBalanced, urisComprehensive, urisMinimal };
@@ -1,9 +1,4 @@
1
1
  //#region src/blogrolls/defaults.ts
2
- const mimeTypes = [
3
- "text/x-opml",
4
- "application/xml",
5
- "text/xml"
6
- ];
7
2
  const urisMinimal = [
8
3
  "/.well-known/recommendations.opml",
9
4
  "/blogroll.opml",
@@ -27,17 +22,30 @@ const anchorLabels = [
27
22
  "subscriptions",
28
23
  "reading list"
29
24
  ];
30
- const linkSelectors = [{ rel: "blogroll" }, {
31
- rel: "outline",
32
- types: mimeTypes
33
- }];
25
+ const linkSelectors = [
26
+ { rel: "blogroll" },
27
+ {
28
+ rel: "outline",
29
+ types: [
30
+ "text/x-opml",
31
+ "application/opml+xml",
32
+ "application/xml",
33
+ "text/xml"
34
+ ]
35
+ },
36
+ {
37
+ rel: "alternate",
38
+ types: ["text/x-opml", "application/opml+xml"]
39
+ }
40
+ ];
34
41
  const defaultHtmlOptions = {
35
42
  linkSelectors,
36
43
  anchorUris: urisComprehensive,
37
44
  anchorIgnoredUris: [],
38
- anchorLabels
45
+ anchorLabels,
46
+ anchorAttributes: ["aria-label", "title"]
39
47
  };
40
48
  const defaultHeadersOptions = { linkSelectors };
41
49
  const defaultGuessOptions = { uris: urisBalanced };
42
50
  //#endregion
43
- export { anchorLabels, defaultGuessOptions, defaultHeadersOptions, defaultHtmlOptions, linkSelectors, mimeTypes, urisBalanced, urisComprehensive, urisMinimal };
51
+ export { anchorLabels, defaultGuessOptions, defaultHeadersOptions, defaultHtmlOptions, linkSelectors, urisBalanced, urisComprehensive, urisMinimal };
@@ -1,4 +1,4 @@
1
- import { anchorLabels, defaultGuessOptions, defaultHeadersOptions, defaultHtmlOptions, linkSelectors, mimeTypes, urisBalanced, urisComprehensive, urisMinimal } from "./blogrolls/defaults.js";
1
+ import { anchorLabels, defaultGuessOptions, defaultHeadersOptions, defaultHtmlOptions, linkSelectors, urisBalanced, urisComprehensive, urisMinimal } from "./blogrolls/defaults.js";
2
2
  import { BlogrollResult } from "./blogrolls/types.js";
3
3
  import { defaultExtractFn } from "./blogrolls/extractors.js";
4
- export { BlogrollResult, anchorLabels, defaultExtractFn, defaultGuessOptions, defaultHeadersOptions, defaultHtmlOptions, linkSelectors, mimeTypes, urisBalanced, urisComprehensive, urisMinimal };
4
+ export { BlogrollResult, anchorLabels, defaultExtractFn, defaultGuessOptions, defaultHeadersOptions, defaultHtmlOptions, linkSelectors, urisBalanced, urisComprehensive, urisMinimal };
package/dist/blogrolls.js CHANGED
@@ -1,3 +1,3 @@
1
- import { anchorLabels, defaultGuessOptions, defaultHeadersOptions, defaultHtmlOptions, linkSelectors, mimeTypes, urisBalanced, urisComprehensive, urisMinimal } from "./blogrolls/defaults.js";
1
+ import { anchorLabels, defaultGuessOptions, defaultHeadersOptions, defaultHtmlOptions, linkSelectors, urisBalanced, urisComprehensive, urisMinimal } from "./blogrolls/defaults.js";
2
2
  import { defaultExtractFn } from "./blogrolls/extractors.js";
3
- export { anchorLabels, defaultExtractFn, defaultGuessOptions, defaultHeadersOptions, defaultHtmlOptions, linkSelectors, mimeTypes, urisBalanced, urisComprehensive, urisMinimal };
3
+ export { anchorLabels, defaultExtractFn, defaultGuessOptions, defaultHeadersOptions, defaultHtmlOptions, linkSelectors, urisBalanced, urisComprehensive, urisMinimal };
@@ -17,9 +17,7 @@ const defaultFetchFn = async (url, options) => {
17
17
  const defaultResolveUrlFn = (url, baseUrl) => {
18
18
  try {
19
19
  return new URL(url, baseUrl).href;
20
- } catch {
21
- return;
22
- }
20
+ } catch {}
23
21
  };
24
22
  const defaultResolveSiteUrlFn = (input, resolveUrlFn) => {
25
23
  if (!input.content) return;
@@ -1,6 +1,10 @@
1
1
  import { endsWithAnyOf, includesAnyOf, matchesAnyOfLinkSelectors } from "../../utils.js";
2
2
  //#region src/common/uris/html/handlers.ts
3
3
  const handleOpenTag = (context, name, attribs, _isImplied) => {
4
+ if (name === "base" && context.baseHref === void 0) {
5
+ const href = attribs.href?.trim();
6
+ if (href) context.baseHref = href;
7
+ }
4
8
  if (name === "link" && attribs.href) {
5
9
  const rel = attribs.rel?.toLowerCase();
6
10
  if (!rel) return;
@@ -16,6 +20,20 @@ const handleOpenTag = (context, name, attribs, _isImplied) => {
16
20
  context.currentAnchor.href = attribs.href;
17
21
  context.currentAnchor.text = "";
18
22
  if (endsWithAnyOf(lowerHref, context.options.anchorUris)) context.discoveredUris.add(attribs.href);
23
+ if (context.options.anchorPathSegments?.length) {
24
+ let pathname;
25
+ try {
26
+ pathname = new URL(attribs.href, "https://feedscout.invalid").pathname;
27
+ } catch {}
28
+ if (pathname && includesAnyOf(pathname, context.options.anchorPathSegments)) context.discoveredUris.add(attribs.href);
29
+ }
30
+ }
31
+ if (context.currentAnchor.href && context.options.anchorAttributes?.length) for (const attribute of context.options.anchorAttributes) {
32
+ const value = attribs[attribute];
33
+ if (value && includesAnyOf(value, context.options.anchorLabels)) {
34
+ context.discoveredUris.add(context.currentAnchor.href);
35
+ break;
36
+ }
19
37
  }
20
38
  };
21
39
  const handleText = (context, text) => {
@@ -13,7 +13,23 @@ const discoverUrisFromHtml = (html, options) => {
13
13
  const parser = new Parser(createHtmlUrisHandlers(context), { decodeEntities: true });
14
14
  parser.write(html);
15
15
  parser.end();
16
- return [...context.discoveredUris];
16
+ const uris = [...context.discoveredUris];
17
+ if (context.baseHref) {
18
+ let base;
19
+ try {
20
+ base = options.baseUrl ? new URL(context.baseHref, options.baseUrl).href : context.baseHref;
21
+ } catch {
22
+ base = options.baseUrl;
23
+ }
24
+ return uris.map((uri) => {
25
+ try {
26
+ return new URL(uri, base).href;
27
+ } catch {
28
+ return uri;
29
+ }
30
+ });
31
+ }
32
+ return uris;
17
33
  };
18
34
  //#endregion
19
35
  export { discoverUrisFromHtml };
@@ -5,8 +5,10 @@ type HtmlMethodOptions = {
5
5
  baseUrl?: string;
6
6
  linkSelectors: Array<LinkSelector>;
7
7
  anchorUris: Array<Pattern>;
8
+ anchorPathSegments?: Array<Pattern>;
8
9
  anchorIgnoredUris: Array<Pattern>;
9
10
  anchorLabels: Array<Pattern>;
11
+ anchorAttributes?: Array<string>;
10
12
  };
11
13
  //#endregion
12
14
  export { HtmlMethodOptions };
@@ -2,20 +2,21 @@ import { GuessMethodOptions } from "../common/uris/guess/types.js";
2
2
  import { HeadersMethodOptions } from "../common/uris/headers/types.js";
3
3
  import { HtmlMethodOptions } from "../common/uris/html/types.js";
4
4
  import { PlatformMethodOptions } from "../common/uris/platform/types.js";
5
- import { LinkSelector, UriEntry } from "../common/types.js";
5
+ import { LinkSelector, Pattern, UriEntry } from "../common/types.js";
6
6
 
7
7
  //#region src/feeds/defaults.d.ts
8
8
  declare const mimeTypes: string[];
9
9
  declare const urisMinimal: string[];
10
10
  declare const urisBalanced: string[];
11
11
  declare const urisComprehensive: Array<UriEntry>;
12
- declare const ignoredUris: string[];
12
+ declare const ignoredUris: Array<Pattern>;
13
13
  declare const anchorLabels: string[];
14
14
  declare const linkSelectors: Array<LinkSelector>;
15
15
  declare const anchorPathSegments: RegExp[];
16
+ declare const anchorAttributes: string[];
16
17
  declare const defaultHtmlOptions: Omit<HtmlMethodOptions, 'baseUrl'>;
17
18
  declare const defaultHeadersOptions: Omit<HeadersMethodOptions, 'baseUrl'>;
18
19
  declare const defaultGuessOptions: Omit<GuessMethodOptions, 'baseUrl'>;
19
20
  declare const defaultPlatformOptions: Omit<PlatformMethodOptions, 'baseUrl'>;
20
21
  //#endregion
21
- export { anchorLabels, anchorPathSegments, defaultGuessOptions, defaultHeadersOptions, defaultHtmlOptions, defaultPlatformOptions, ignoredUris, linkSelectors, mimeTypes, urisBalanced, urisComprehensive, urisMinimal };
22
+ export { anchorAttributes, anchorLabels, anchorPathSegments, defaultGuessOptions, defaultHeadersOptions, defaultHtmlOptions, defaultPlatformOptions, ignoredUris, linkSelectors, mimeTypes, urisBalanced, urisComprehensive, urisMinimal };
@@ -152,12 +152,15 @@ const urisComprehensive = [
152
152
  "/feeds/posts/default?alt=rss",
153
153
  "/feeds/comments/default"
154
154
  ];
155
- const ignoredUris = ["wp-json/oembed/", "wp-json/wp/"];
155
+ const ignoredUris = [
156
+ "wp-json/oembed/",
157
+ "wp-json/wp/",
158
+ /[?&][^=&]*=(https?:|https?%3a|aHR0c)/i
159
+ ];
156
160
  const anchorLabels = [
157
161
  "rss",
158
162
  "feed",
159
163
  "atom",
160
- "subscribe",
161
164
  "syndicate",
162
165
  "syndication",
163
166
  "json feed"
@@ -171,11 +174,18 @@ const anchorPathSegments = [
171
174
  /\/atom\//,
172
175
  /\/feed\//
173
176
  ];
177
+ const anchorAttributes = [
178
+ "aria-label",
179
+ "title",
180
+ "data-framer-name"
181
+ ];
174
182
  const defaultHtmlOptions = {
175
183
  linkSelectors,
176
- anchorUris: [...urisComprehensive.flat(), ...anchorPathSegments],
184
+ anchorUris: urisComprehensive.flat(),
185
+ anchorPathSegments,
177
186
  anchorIgnoredUris: ignoredUris,
178
- anchorLabels
187
+ anchorLabels,
188
+ anchorAttributes
179
189
  };
180
190
  const defaultHeadersOptions = { linkSelectors };
181
191
  const defaultGuessOptions = { uris: urisBalanced };
@@ -272,4 +282,4 @@ const defaultPlatformOptions = { handlers: [
272
282
  zennHandler
273
283
  ] };
274
284
  //#endregion
275
- export { anchorLabels, anchorPathSegments, defaultGuessOptions, defaultHeadersOptions, defaultHtmlOptions, defaultPlatformOptions, ignoredUris, linkSelectors, mimeTypes, urisBalanced, urisComprehensive, urisMinimal };
285
+ export { anchorAttributes, anchorLabels, anchorPathSegments, defaultGuessOptions, defaultHeadersOptions, defaultHtmlOptions, defaultPlatformOptions, ignoredUris, linkSelectors, mimeTypes, urisBalanced, urisComprehensive, urisMinimal };
@@ -22,12 +22,8 @@ const isDiscourseHtml = (content) => {
22
22
  };
23
23
  const discourseHandler = {
24
24
  match: (url, content) => {
25
- try {
26
- if (!content || !isDiscourseHtml(content)) return false;
27
- new URL(url);
28
- return true;
29
- } catch {}
30
- return false;
25
+ if (!content || !isDiscourseHtml(content)) return false;
26
+ return URL.canParse(url);
31
27
  },
32
28
  resolve: (url) => {
33
29
  try {
@@ -8,7 +8,7 @@ const libsynHandler = {
8
8
  resolve: (url) => {
9
9
  const { origin, pathname } = new URL(url);
10
10
  if (isHostOf(url, "feeds.libsyn.com")) {
11
- const showId = pathname.split("/").filter(Boolean)[0];
11
+ const showId = pathname.split("/").find(Boolean);
12
12
  if (showId && numericRegex.test(showId)) return [{
13
13
  uri: `https://feeds.libsyn.com/${showId}/rss`,
14
14
  hint: composeHint("libsyn:podcast")
package/dist/feeds.d.ts CHANGED
@@ -1,4 +1,4 @@
1
- import { anchorLabels, anchorPathSegments, defaultGuessOptions, defaultHeadersOptions, defaultHtmlOptions, defaultPlatformOptions, ignoredUris, linkSelectors, mimeTypes, urisBalanced, urisComprehensive, urisMinimal } from "./feeds/defaults.js";
1
+ import { anchorAttributes, anchorLabels, anchorPathSegments, defaultGuessOptions, defaultHeadersOptions, defaultHtmlOptions, defaultPlatformOptions, ignoredUris, linkSelectors, mimeTypes, urisBalanced, urisComprehensive, urisMinimal } from "./feeds/defaults.js";
2
2
  import { FeedResult } from "./feeds/types.js";
3
3
  import { defaultExtractFn } from "./feeds/extractors.js";
4
- export { FeedResult, anchorLabels, anchorPathSegments, defaultExtractFn, defaultGuessOptions, defaultHeadersOptions, defaultHtmlOptions, defaultPlatformOptions, ignoredUris, linkSelectors, mimeTypes, urisBalanced, urisComprehensive, urisMinimal };
4
+ export { FeedResult, anchorAttributes, anchorLabels, anchorPathSegments, defaultExtractFn, defaultGuessOptions, defaultHeadersOptions, defaultHtmlOptions, defaultPlatformOptions, ignoredUris, linkSelectors, mimeTypes, urisBalanced, urisComprehensive, urisMinimal };
package/dist/feeds.js CHANGED
@@ -1,3 +1,3 @@
1
- import { anchorLabels, anchorPathSegments, defaultGuessOptions, defaultHeadersOptions, defaultHtmlOptions, defaultPlatformOptions, ignoredUris, linkSelectors, mimeTypes, urisBalanced, urisComprehensive, urisMinimal } from "./feeds/defaults.js";
1
+ import { anchorAttributes, anchorLabels, anchorPathSegments, defaultGuessOptions, defaultHeadersOptions, defaultHtmlOptions, defaultPlatformOptions, ignoredUris, linkSelectors, mimeTypes, urisBalanced, urisComprehensive, urisMinimal } from "./feeds/defaults.js";
2
2
  import { defaultExtractFn } from "./feeds/extractors.js";
3
- export { anchorLabels, anchorPathSegments, defaultExtractFn, defaultGuessOptions, defaultHeadersOptions, defaultHtmlOptions, defaultPlatformOptions, ignoredUris, linkSelectors, mimeTypes, urisBalanced, urisComprehensive, urisMinimal };
3
+ export { anchorAttributes, anchorLabels, anchorPathSegments, defaultExtractFn, defaultGuessOptions, defaultHeadersOptions, defaultHtmlOptions, defaultPlatformOptions, ignoredUris, linkSelectors, mimeTypes, urisBalanced, urisComprehensive, urisMinimal };
package/package.json CHANGED
@@ -68,14 +68,14 @@
68
68
  "htmlparser2": "^12.0.0"
69
69
  },
70
70
  "peerDependencies": {
71
- "feedsmith": "^3.0.0-beta.4"
71
+ "feedsmith": "3.0.0-beta.5"
72
72
  },
73
73
  "devDependencies": {
74
74
  "@types/bun": "^1.3.13",
75
- "feedsmith": "^3.0.0-beta.4",
76
- "kvalita": "^1.13.0",
75
+ "feedsmith": "3.0.0-beta.5",
76
+ "kvalita": "^1.15.1",
77
77
  "tsdown": "^0.22.2",
78
78
  "vitepress": "^2.0.0-alpha.17"
79
79
  },
80
- "version": "2.0.0-beta.1"
80
+ "version": "2.0.0-beta.3"
81
81
  }