@rmdes/indiekit-endpoint-microsub 1.0.0-beta.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +111 -0
- package/index.js +140 -0
- package/lib/cache/redis.js +133 -0
- package/lib/controllers/block.js +85 -0
- package/lib/controllers/channels.js +135 -0
- package/lib/controllers/events.js +56 -0
- package/lib/controllers/follow.js +108 -0
- package/lib/controllers/microsub.js +138 -0
- package/lib/controllers/mute.js +124 -0
- package/lib/controllers/preview.js +67 -0
- package/lib/controllers/reader.js +218 -0
- package/lib/controllers/search.js +142 -0
- package/lib/controllers/timeline.js +117 -0
- package/lib/feeds/atom.js +61 -0
- package/lib/feeds/fetcher.js +205 -0
- package/lib/feeds/hfeed.js +177 -0
- package/lib/feeds/jsonfeed.js +43 -0
- package/lib/feeds/normalizer.js +586 -0
- package/lib/feeds/parser.js +124 -0
- package/lib/feeds/rss.js +61 -0
- package/lib/polling/processor.js +201 -0
- package/lib/polling/scheduler.js +128 -0
- package/lib/polling/tier.js +139 -0
- package/lib/realtime/broker.js +241 -0
- package/lib/search/indexer.js +90 -0
- package/lib/search/query.js +197 -0
- package/lib/storage/channels.js +281 -0
- package/lib/storage/feeds.js +286 -0
- package/lib/storage/filters.js +265 -0
- package/lib/storage/items.js +419 -0
- package/lib/storage/read-state.js +109 -0
- package/lib/utils/jf2.js +170 -0
- package/lib/utils/pagination.js +157 -0
- package/lib/utils/validation.js +217 -0
- package/lib/webmention/processor.js +214 -0
- package/lib/webmention/receiver.js +54 -0
- package/lib/webmention/verifier.js +308 -0
- package/lib/websub/discovery.js +129 -0
- package/lib/websub/handler.js +163 -0
- package/lib/websub/subscriber.js +181 -0
- package/locales/en.json +80 -0
- package/package.json +54 -0
- package/views/channel-new.njk +33 -0
- package/views/channel.njk +41 -0
- package/views/compose.njk +61 -0
- package/views/item.njk +85 -0
- package/views/partials/actions.njk +15 -0
- package/views/partials/author.njk +17 -0
- package/views/partials/item-card.njk +65 -0
- package/views/partials/timeline.njk +10 -0
- package/views/reader.njk +37 -0
- package/views/settings.njk +81 -0
|
@@ -0,0 +1,142 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Search controller
|
|
3
|
+
* @module controllers/search
|
|
4
|
+
*/
|
|
5
|
+
|
|
6
|
+
import { IndiekitError } from "@indiekit/error";
|
|
7
|
+
|
|
8
|
+
import { discoverFeeds } from "../feeds/hfeed.js";
|
|
9
|
+
import { searchWithFallback } from "../search/query.js";
|
|
10
|
+
import { getChannel } from "../storage/channels.js";
|
|
11
|
+
import { validateChannel, validateUrl } from "../utils/validation.js";
|
|
12
|
+
|
|
13
|
+
/**
|
|
14
|
+
* Discover feeds from a URL
|
|
15
|
+
* GET ?action=search&query=<url>
|
|
16
|
+
* @param {object} request - Express request
|
|
17
|
+
* @param {object} response - Express response
|
|
18
|
+
*/
|
|
19
|
+
export async function discover(request, response) {
|
|
20
|
+
const { query } = request.query;
|
|
21
|
+
|
|
22
|
+
if (!query) {
|
|
23
|
+
throw new IndiekitError("Missing required parameter: query", {
|
|
24
|
+
status: 400,
|
|
25
|
+
});
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
// Check if query is a URL
|
|
29
|
+
let url;
|
|
30
|
+
try {
|
|
31
|
+
url = new URL(query);
|
|
32
|
+
} catch {
|
|
33
|
+
// Not a URL, return empty results
|
|
34
|
+
return response.json({ results: [] });
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
try {
|
|
38
|
+
// Fetch the URL content
|
|
39
|
+
const fetchResponse = await fetch(url.href, {
|
|
40
|
+
headers: {
|
|
41
|
+
Accept: "text/html, application/xhtml+xml, */*",
|
|
42
|
+
"User-Agent": "Indiekit Microsub/1.0 (+https://getindiekit.com)",
|
|
43
|
+
},
|
|
44
|
+
});
|
|
45
|
+
|
|
46
|
+
if (!fetchResponse.ok) {
|
|
47
|
+
throw new IndiekitError(`Failed to fetch URL: ${fetchResponse.status}`, {
|
|
48
|
+
status: 502,
|
|
49
|
+
});
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
const content = await fetchResponse.text();
|
|
53
|
+
const feeds = await discoverFeeds(content, url.href);
|
|
54
|
+
|
|
55
|
+
// Transform to Microsub search result format
|
|
56
|
+
const results = feeds.map((feed) => ({
|
|
57
|
+
type: "feed",
|
|
58
|
+
url: feed.url,
|
|
59
|
+
}));
|
|
60
|
+
|
|
61
|
+
response.json({ results });
|
|
62
|
+
} catch (error) {
|
|
63
|
+
if (error instanceof IndiekitError) {
|
|
64
|
+
throw error;
|
|
65
|
+
}
|
|
66
|
+
throw new IndiekitError(`Feed discovery failed: ${error.message}`, {
|
|
67
|
+
status: 502,
|
|
68
|
+
});
|
|
69
|
+
}
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
/**
|
|
73
|
+
* Search feeds or items
|
|
74
|
+
* POST ?action=search
|
|
75
|
+
* @param {object} request - Express request
|
|
76
|
+
* @param {object} response - Express response
|
|
77
|
+
*/
|
|
78
|
+
export async function search(request, response) {
|
|
79
|
+
const { application } = request.app.locals;
|
|
80
|
+
const userId = request.session?.userId;
|
|
81
|
+
const { query, channel } = request.body;
|
|
82
|
+
|
|
83
|
+
if (!query) {
|
|
84
|
+
throw new IndiekitError("Missing required parameter: query", {
|
|
85
|
+
status: 400,
|
|
86
|
+
});
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
// If channel is provided, search within channel items
|
|
90
|
+
if (channel) {
|
|
91
|
+
validateChannel(channel);
|
|
92
|
+
|
|
93
|
+
const channelDocument = await getChannel(application, channel, userId);
|
|
94
|
+
if (!channelDocument) {
|
|
95
|
+
throw new IndiekitError("Channel not found", { status: 404 });
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
const items = await searchWithFallback(
|
|
99
|
+
application,
|
|
100
|
+
channelDocument._id,
|
|
101
|
+
query,
|
|
102
|
+
);
|
|
103
|
+
return response.json({ items });
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
// Check if query is a URL (feed discovery)
|
|
107
|
+
try {
|
|
108
|
+
validateUrl(query, "query");
|
|
109
|
+
|
|
110
|
+
// Use the discover function for URL queries
|
|
111
|
+
const fetchResponse = await fetch(query, {
|
|
112
|
+
headers: {
|
|
113
|
+
Accept: "text/html, application/xhtml+xml, */*",
|
|
114
|
+
"User-Agent": "Indiekit Microsub/1.0 (+https://getindiekit.com)",
|
|
115
|
+
},
|
|
116
|
+
});
|
|
117
|
+
|
|
118
|
+
if (!fetchResponse.ok) {
|
|
119
|
+
throw new IndiekitError(`Failed to fetch URL: ${fetchResponse.status}`, {
|
|
120
|
+
status: 502,
|
|
121
|
+
});
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
const content = await fetchResponse.text();
|
|
125
|
+
const feeds = await discoverFeeds(content, query);
|
|
126
|
+
|
|
127
|
+
const results = feeds.map((feed) => ({
|
|
128
|
+
type: "feed",
|
|
129
|
+
url: feed.url,
|
|
130
|
+
}));
|
|
131
|
+
|
|
132
|
+
return response.json({ results });
|
|
133
|
+
} catch (error) {
|
|
134
|
+
// Not a URL or fetch failed, return empty results
|
|
135
|
+
if (error instanceof IndiekitError) {
|
|
136
|
+
throw error;
|
|
137
|
+
}
|
|
138
|
+
return response.json({ results: [] });
|
|
139
|
+
}
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
export const searchController = { discover, search };
|
|
@@ -0,0 +1,117 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Timeline controller
|
|
3
|
+
* @module controllers/timeline
|
|
4
|
+
*/
|
|
5
|
+
|
|
6
|
+
import { IndiekitError } from "@indiekit/error";
|
|
7
|
+
|
|
8
|
+
import { getChannel } from "../storage/channels.js";
|
|
9
|
+
import {
|
|
10
|
+
getTimelineItems,
|
|
11
|
+
markItemsRead,
|
|
12
|
+
markItemsUnread,
|
|
13
|
+
removeItems,
|
|
14
|
+
} from "../storage/items.js";
|
|
15
|
+
import {
|
|
16
|
+
validateChannel,
|
|
17
|
+
validateEntries,
|
|
18
|
+
parseArrayParameter as parseArrayParametereter,
|
|
19
|
+
} from "../utils/validation.js";
|
|
20
|
+
|
|
21
|
+
/**
|
|
22
|
+
* Get timeline items for a channel
|
|
23
|
+
* GET ?action=timeline&channel=<uid>
|
|
24
|
+
* @param {object} request - Express request
|
|
25
|
+
* @param {object} response - Express response
|
|
26
|
+
*/
|
|
27
|
+
export async function get(request, response) {
|
|
28
|
+
const { application } = request.app.locals;
|
|
29
|
+
const userId = request.session?.userId;
|
|
30
|
+
const { channel, before, after, limit } = request.query;
|
|
31
|
+
|
|
32
|
+
validateChannel(channel);
|
|
33
|
+
|
|
34
|
+
// Verify channel exists
|
|
35
|
+
const channelDocument = await getChannel(application, channel, userId);
|
|
36
|
+
if (!channelDocument) {
|
|
37
|
+
throw new IndiekitError("Channel not found", {
|
|
38
|
+
status: 404,
|
|
39
|
+
});
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
const timeline = await getTimelineItems(application, channelDocument._id, {
|
|
43
|
+
before,
|
|
44
|
+
after,
|
|
45
|
+
limit,
|
|
46
|
+
userId,
|
|
47
|
+
});
|
|
48
|
+
|
|
49
|
+
response.json(timeline);
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
/**
|
|
53
|
+
* Handle timeline actions (mark_read, mark_unread, remove)
|
|
54
|
+
* POST ?action=timeline
|
|
55
|
+
* @param {object} request - Express request
|
|
56
|
+
* @param {object} response - Express response
|
|
57
|
+
*/
|
|
58
|
+
export async function action(request, response) {
|
|
59
|
+
const { application } = request.app.locals;
|
|
60
|
+
const userId = request.session?.userId;
|
|
61
|
+
const { method, channel } = request.body;
|
|
62
|
+
|
|
63
|
+
validateChannel(channel);
|
|
64
|
+
|
|
65
|
+
// Verify channel exists
|
|
66
|
+
const channelDocument = await getChannel(application, channel, userId);
|
|
67
|
+
if (!channelDocument) {
|
|
68
|
+
throw new IndiekitError("Channel not found", {
|
|
69
|
+
status: 404,
|
|
70
|
+
});
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
// Get entry IDs from request
|
|
74
|
+
const entries = parseArrayParametereter(request.body, "entry");
|
|
75
|
+
|
|
76
|
+
switch (method) {
|
|
77
|
+
case "mark_read": {
|
|
78
|
+
validateEntries(entries);
|
|
79
|
+
const count = await markItemsRead(
|
|
80
|
+
application,
|
|
81
|
+
channelDocument._id,
|
|
82
|
+
entries,
|
|
83
|
+
userId,
|
|
84
|
+
);
|
|
85
|
+
return response.json({ result: "ok", updated: count });
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
case "mark_unread": {
|
|
89
|
+
validateEntries(entries);
|
|
90
|
+
const count = await markItemsUnread(
|
|
91
|
+
application,
|
|
92
|
+
channelDocument._id,
|
|
93
|
+
entries,
|
|
94
|
+
userId,
|
|
95
|
+
);
|
|
96
|
+
return response.json({ result: "ok", updated: count });
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
case "remove": {
|
|
100
|
+
validateEntries(entries);
|
|
101
|
+
const count = await removeItems(
|
|
102
|
+
application,
|
|
103
|
+
channelDocument._id,
|
|
104
|
+
entries,
|
|
105
|
+
);
|
|
106
|
+
return response.json({ result: "ok", removed: count });
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
default: {
|
|
110
|
+
throw new IndiekitError(`Invalid timeline method: ${method}`, {
|
|
111
|
+
status: 400,
|
|
112
|
+
});
|
|
113
|
+
}
|
|
114
|
+
}
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
export const timelineController = { get, action };
|
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Atom feed parser
|
|
3
|
+
* @module feeds/atom
|
|
4
|
+
*/
|
|
5
|
+
|
|
6
|
+
import { Readable } from "node:stream";
|
|
7
|
+
|
|
8
|
+
import FeedParser from "feedparser";
|
|
9
|
+
|
|
10
|
+
import { normalizeItem, normalizeFeedMeta } from "./normalizer.js";
|
|
11
|
+
|
|
12
|
+
/**
|
|
13
|
+
* Parse Atom feed content
|
|
14
|
+
* @param {string} content - Atom XML content
|
|
15
|
+
* @param {string} feedUrl - URL of the feed
|
|
16
|
+
* @returns {Promise<object>} Parsed feed with metadata and items
|
|
17
|
+
*/
|
|
18
|
+
export async function parseAtom(content, feedUrl) {
|
|
19
|
+
return new Promise((resolve, reject) => {
|
|
20
|
+
const feedparser = new FeedParser({ feedurl: feedUrl });
|
|
21
|
+
const items = [];
|
|
22
|
+
let meta;
|
|
23
|
+
|
|
24
|
+
feedparser.on("error", (error) => {
|
|
25
|
+
reject(new Error(`Atom parse error: ${error.message}`));
|
|
26
|
+
});
|
|
27
|
+
|
|
28
|
+
feedparser.on("meta", (feedMeta) => {
|
|
29
|
+
meta = feedMeta;
|
|
30
|
+
});
|
|
31
|
+
|
|
32
|
+
feedparser.on("readable", function () {
|
|
33
|
+
let item;
|
|
34
|
+
while ((item = this.read())) {
|
|
35
|
+
items.push(item);
|
|
36
|
+
}
|
|
37
|
+
});
|
|
38
|
+
|
|
39
|
+
feedparser.on("end", () => {
|
|
40
|
+
try {
|
|
41
|
+
const normalizedMeta = normalizeFeedMeta(meta, feedUrl);
|
|
42
|
+
const normalizedItems = items.map((item) =>
|
|
43
|
+
normalizeItem(item, feedUrl, "atom"),
|
|
44
|
+
);
|
|
45
|
+
|
|
46
|
+
resolve({
|
|
47
|
+
type: "feed",
|
|
48
|
+
url: feedUrl,
|
|
49
|
+
...normalizedMeta,
|
|
50
|
+
items: normalizedItems,
|
|
51
|
+
});
|
|
52
|
+
} catch (error) {
|
|
53
|
+
reject(error);
|
|
54
|
+
}
|
|
55
|
+
});
|
|
56
|
+
|
|
57
|
+
// Create readable stream from string and pipe to feedparser
|
|
58
|
+
const stream = Readable.from([content]);
|
|
59
|
+
stream.pipe(feedparser);
|
|
60
|
+
});
|
|
61
|
+
}
|
|
@@ -0,0 +1,205 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Feed fetcher with HTTP caching
|
|
3
|
+
* @module feeds/fetcher
|
|
4
|
+
*/
|
|
5
|
+
|
|
6
|
+
import { getCache, setCache } from "../cache/redis.js";
|
|
7
|
+
|
|
8
|
+
const DEFAULT_TIMEOUT = 30_000; // 30 seconds
|
|
9
|
+
const DEFAULT_USER_AGENT = "Indiekit Microsub/1.0 (+https://getindiekit.com)";
|
|
10
|
+
|
|
11
|
+
/**
|
|
12
|
+
* Fetch feed content with caching
|
|
13
|
+
* @param {string} url - Feed URL
|
|
14
|
+
* @param {object} options - Fetch options
|
|
15
|
+
* @param {string} [options.etag] - Previous ETag for conditional request
|
|
16
|
+
* @param {string} [options.lastModified] - Previous Last-Modified for conditional request
|
|
17
|
+
* @param {number} [options.timeout] - Request timeout in ms
|
|
18
|
+
* @param {object} [options.redis] - Redis client for caching
|
|
19
|
+
* @returns {Promise<object>} Fetch result with content and headers
|
|
20
|
+
*/
|
|
21
|
+
export async function fetchFeed(url, options = {}) {
|
|
22
|
+
const { etag, lastModified, timeout = DEFAULT_TIMEOUT, redis } = options;
|
|
23
|
+
|
|
24
|
+
// Check cache first
|
|
25
|
+
if (redis) {
|
|
26
|
+
const cached = await getCache(redis, `feed:${url}`);
|
|
27
|
+
if (cached) {
|
|
28
|
+
return {
|
|
29
|
+
content: cached.content,
|
|
30
|
+
contentType: cached.contentType,
|
|
31
|
+
etag: cached.etag,
|
|
32
|
+
lastModified: cached.lastModified,
|
|
33
|
+
fromCache: true,
|
|
34
|
+
status: 200,
|
|
35
|
+
};
|
|
36
|
+
}
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
const headers = {
|
|
40
|
+
Accept:
|
|
41
|
+
"application/atom+xml, application/rss+xml, application/json, application/feed+json, text/xml, text/html;q=0.9, */*;q=0.8",
|
|
42
|
+
"User-Agent": DEFAULT_USER_AGENT,
|
|
43
|
+
};
|
|
44
|
+
|
|
45
|
+
// Add conditional request headers
|
|
46
|
+
if (etag) {
|
|
47
|
+
headers["If-None-Match"] = etag;
|
|
48
|
+
}
|
|
49
|
+
if (lastModified) {
|
|
50
|
+
headers["If-Modified-Since"] = lastModified;
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
const controller = new AbortController();
|
|
54
|
+
const timeoutId = setTimeout(() => controller.abort(), timeout);
|
|
55
|
+
|
|
56
|
+
try {
|
|
57
|
+
const response = await fetch(url, {
|
|
58
|
+
headers,
|
|
59
|
+
signal: controller.signal,
|
|
60
|
+
redirect: "follow",
|
|
61
|
+
});
|
|
62
|
+
|
|
63
|
+
clearTimeout(timeoutId);
|
|
64
|
+
|
|
65
|
+
// Not modified - use cached version
|
|
66
|
+
if (response.status === 304) {
|
|
67
|
+
return {
|
|
68
|
+
content: undefined,
|
|
69
|
+
contentType: undefined,
|
|
70
|
+
etag,
|
|
71
|
+
lastModified,
|
|
72
|
+
notModified: true,
|
|
73
|
+
status: 304,
|
|
74
|
+
};
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
if (!response.ok) {
|
|
78
|
+
throw new Error(`HTTP ${response.status}: ${response.statusText}`);
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
const content = await response.text();
|
|
82
|
+
const responseEtag = response.headers.get("ETag");
|
|
83
|
+
const responseLastModified = response.headers.get("Last-Modified");
|
|
84
|
+
const contentType = response.headers.get("Content-Type") || "";
|
|
85
|
+
|
|
86
|
+
const result = {
|
|
87
|
+
content,
|
|
88
|
+
contentType,
|
|
89
|
+
etag: responseEtag,
|
|
90
|
+
lastModified: responseLastModified,
|
|
91
|
+
fromCache: false,
|
|
92
|
+
status: response.status,
|
|
93
|
+
};
|
|
94
|
+
|
|
95
|
+
// Extract hub URL from Link header for WebSub
|
|
96
|
+
const linkHeader = response.headers.get("Link");
|
|
97
|
+
if (linkHeader) {
|
|
98
|
+
result.hub = extractHubFromLinkHeader(linkHeader);
|
|
99
|
+
result.self = extractSelfFromLinkHeader(linkHeader);
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
// Cache the result
|
|
103
|
+
if (redis) {
|
|
104
|
+
const cacheData = {
|
|
105
|
+
content,
|
|
106
|
+
contentType,
|
|
107
|
+
etag: responseEtag,
|
|
108
|
+
lastModified: responseLastModified,
|
|
109
|
+
};
|
|
110
|
+
// Cache for 5 minutes by default
|
|
111
|
+
await setCache(redis, `feed:${url}`, cacheData, 300);
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
return result;
|
|
115
|
+
} catch (error) {
|
|
116
|
+
clearTimeout(timeoutId);
|
|
117
|
+
|
|
118
|
+
if (error.name === "AbortError") {
|
|
119
|
+
throw new Error(`Request timeout after ${timeout}ms`);
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
throw error;
|
|
123
|
+
}
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
/**
|
|
127
|
+
* Extract hub URL from Link header
|
|
128
|
+
* @param {string} linkHeader - Link header value
|
|
129
|
+
* @returns {string|undefined} Hub URL
|
|
130
|
+
*/
|
|
131
|
+
function extractHubFromLinkHeader(linkHeader) {
|
|
132
|
+
const hubMatch = linkHeader.match(/<([^>]+)>;\s*rel=["']?hub["']?/i);
|
|
133
|
+
return hubMatch ? hubMatch[1] : undefined;
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
/**
|
|
137
|
+
* Extract self URL from Link header
|
|
138
|
+
* @param {string} linkHeader - Link header value
|
|
139
|
+
* @returns {string|undefined} Self URL
|
|
140
|
+
*/
|
|
141
|
+
function extractSelfFromLinkHeader(linkHeader) {
|
|
142
|
+
const selfMatch = linkHeader.match(/<([^>]+)>;\s*rel=["']?self["']?/i);
|
|
143
|
+
return selfMatch ? selfMatch[1] : undefined;
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
/**
|
|
147
|
+
* Fetch feed and parse it
|
|
148
|
+
* @param {string} url - Feed URL
|
|
149
|
+
* @param {object} options - Options
|
|
150
|
+
* @returns {Promise<object>} Parsed feed
|
|
151
|
+
*/
|
|
152
|
+
export async function fetchAndParseFeed(url, options = {}) {
|
|
153
|
+
const { parseFeed } = await import("./parser.js");
|
|
154
|
+
|
|
155
|
+
const result = await fetchFeed(url, options);
|
|
156
|
+
|
|
157
|
+
if (result.notModified) {
|
|
158
|
+
return {
|
|
159
|
+
...result,
|
|
160
|
+
items: [],
|
|
161
|
+
};
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
const parsed = await parseFeed(result.content, url, {
|
|
165
|
+
contentType: result.contentType,
|
|
166
|
+
});
|
|
167
|
+
|
|
168
|
+
return {
|
|
169
|
+
...result,
|
|
170
|
+
...parsed,
|
|
171
|
+
hub: result.hub || parsed._hub,
|
|
172
|
+
};
|
|
173
|
+
}
|
|
174
|
+
|
|
175
|
+
/**
|
|
176
|
+
* Discover feeds from a URL
|
|
177
|
+
* @param {string} url - Page URL
|
|
178
|
+
* @param {object} options - Options
|
|
179
|
+
* @returns {Promise<Array>} Array of discovered feeds
|
|
180
|
+
*/
|
|
181
|
+
export async function discoverFeedsFromUrl(url, options = {}) {
|
|
182
|
+
const result = await fetchFeed(url, options);
|
|
183
|
+
const { discoverFeeds } = await import("./hfeed.js");
|
|
184
|
+
|
|
185
|
+
// If it's already a feed, return it
|
|
186
|
+
const contentType = result.contentType?.toLowerCase() || "";
|
|
187
|
+
if (
|
|
188
|
+
contentType.includes("xml") ||
|
|
189
|
+
contentType.includes("rss") ||
|
|
190
|
+
contentType.includes("atom") ||
|
|
191
|
+
contentType.includes("json")
|
|
192
|
+
) {
|
|
193
|
+
return [
|
|
194
|
+
{
|
|
195
|
+
url,
|
|
196
|
+
type: contentType.includes("json") ? "jsonfeed" : "xml",
|
|
197
|
+
rel: "self",
|
|
198
|
+
},
|
|
199
|
+
];
|
|
200
|
+
}
|
|
201
|
+
|
|
202
|
+
// Otherwise, discover feeds from HTML
|
|
203
|
+
const feeds = await discoverFeeds(result.content, url);
|
|
204
|
+
return feeds;
|
|
205
|
+
}
|
|
@@ -0,0 +1,177 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* h-feed (Microformats2) parser
|
|
3
|
+
* @module feeds/hfeed
|
|
4
|
+
*/
|
|
5
|
+
|
|
6
|
+
import { mf2 } from "microformats-parser";
|
|
7
|
+
|
|
8
|
+
import { normalizeHfeedItem, normalizeHfeedMeta } from "./normalizer.js";
|
|
9
|
+
|
|
10
|
+
/**
|
|
11
|
+
* Parse h-feed content from HTML
|
|
12
|
+
* @param {string} content - HTML content with h-feed
|
|
13
|
+
* @param {string} feedUrl - URL of the page
|
|
14
|
+
* @returns {Promise<object>} Parsed feed with metadata and items
|
|
15
|
+
*/
|
|
16
|
+
export async function parseHfeed(content, feedUrl) {
|
|
17
|
+
let parsed;
|
|
18
|
+
|
|
19
|
+
try {
|
|
20
|
+
parsed = mf2(content, { baseUrl: feedUrl });
|
|
21
|
+
} catch (error) {
|
|
22
|
+
throw new Error(`h-feed parse error: ${error.message}`);
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
// Find h-feed in the parsed microformats
|
|
26
|
+
const hfeed = findHfeed(parsed);
|
|
27
|
+
|
|
28
|
+
if (!hfeed) {
|
|
29
|
+
// If no h-feed, look for h-entry items at the root
|
|
30
|
+
const entries = parsed.items.filter(
|
|
31
|
+
(item) => item.type && item.type.includes("h-entry"),
|
|
32
|
+
);
|
|
33
|
+
|
|
34
|
+
if (entries.length === 0) {
|
|
35
|
+
throw new Error("No h-feed or h-entry found on page");
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
// Create synthetic feed from entries
|
|
39
|
+
return {
|
|
40
|
+
type: "feed",
|
|
41
|
+
url: feedUrl,
|
|
42
|
+
name: parsed.rels?.canonical?.[0] || feedUrl,
|
|
43
|
+
items: entries.map((entry) => normalizeHfeedItem(entry, feedUrl)),
|
|
44
|
+
};
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
const normalizedMeta = normalizeHfeedMeta(hfeed, feedUrl);
|
|
48
|
+
|
|
49
|
+
// Get children entries from h-feed
|
|
50
|
+
const entries = hfeed.children || [];
|
|
51
|
+
const normalizedItems = entries
|
|
52
|
+
.filter((child) => child.type && child.type.includes("h-entry"))
|
|
53
|
+
.map((entry) => normalizeHfeedItem(entry, feedUrl));
|
|
54
|
+
|
|
55
|
+
return {
|
|
56
|
+
type: "feed",
|
|
57
|
+
url: feedUrl,
|
|
58
|
+
...normalizedMeta,
|
|
59
|
+
items: normalizedItems,
|
|
60
|
+
};
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
/**
|
|
64
|
+
* Find h-feed in parsed microformats
|
|
65
|
+
* @param {object} parsed - Parsed microformats object
|
|
66
|
+
* @returns {object|undefined} h-feed object or undefined
|
|
67
|
+
*/
|
|
68
|
+
function findHfeed(parsed) {
|
|
69
|
+
// Look for h-feed at top level
|
|
70
|
+
for (const item of parsed.items) {
|
|
71
|
+
if (item.type && item.type.includes("h-feed")) {
|
|
72
|
+
return item;
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
// Check nested children
|
|
76
|
+
if (item.children) {
|
|
77
|
+
for (const child of item.children) {
|
|
78
|
+
if (child.type && child.type.includes("h-feed")) {
|
|
79
|
+
return child;
|
|
80
|
+
}
|
|
81
|
+
}
|
|
82
|
+
}
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
return;
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
/**
|
|
89
|
+
* Discover feeds from HTML page
|
|
90
|
+
* @param {string} content - HTML content
|
|
91
|
+
* @param {string} pageUrl - URL of the page
|
|
92
|
+
* @returns {Promise<Array>} Array of discovered feed URLs with types
|
|
93
|
+
*/
|
|
94
|
+
export async function discoverFeeds(content, pageUrl) {
|
|
95
|
+
const feeds = [];
|
|
96
|
+
const parsed = mf2(content, { baseUrl: pageUrl });
|
|
97
|
+
|
|
98
|
+
// Check for rel="alternate" feed links
|
|
99
|
+
const alternates = parsed.rels?.alternate || [];
|
|
100
|
+
for (const url of alternates) {
|
|
101
|
+
// Try to determine feed type from URL
|
|
102
|
+
if (url.includes("feed") || url.endsWith(".xml") || url.endsWith(".json")) {
|
|
103
|
+
feeds.push({
|
|
104
|
+
url,
|
|
105
|
+
type: "unknown",
|
|
106
|
+
rel: "alternate",
|
|
107
|
+
});
|
|
108
|
+
}
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
// Check for rel="feed" links (Microsub discovery)
|
|
112
|
+
const feedLinks = parsed.rels?.feed || [];
|
|
113
|
+
for (const url of feedLinks) {
|
|
114
|
+
feeds.push({
|
|
115
|
+
url,
|
|
116
|
+
type: "hfeed",
|
|
117
|
+
rel: "feed",
|
|
118
|
+
});
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
// Check if page itself has h-feed
|
|
122
|
+
const hfeed = findHfeed(parsed);
|
|
123
|
+
if (hfeed) {
|
|
124
|
+
feeds.push({
|
|
125
|
+
url: pageUrl,
|
|
126
|
+
type: "hfeed",
|
|
127
|
+
rel: "self",
|
|
128
|
+
});
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
// Parse <link> elements for feed discovery
|
|
132
|
+
const linkFeeds = extractLinkFeeds(content, pageUrl);
|
|
133
|
+
feeds.push(...linkFeeds);
|
|
134
|
+
|
|
135
|
+
return feeds;
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
/**
|
|
139
|
+
* Extract feed links from HTML <link> elements
|
|
140
|
+
* @param {string} content - HTML content
|
|
141
|
+
* @param {string} baseUrl - Base URL for resolving relative URLs
|
|
142
|
+
* @returns {Array} Array of discovered feeds
|
|
143
|
+
*/
|
|
144
|
+
function extractLinkFeeds(content, baseUrl) {
|
|
145
|
+
const feeds = [];
|
|
146
|
+
const linkRegex = /<link[^>]+rel=["'](?:alternate|feed)["'][^>]*>/gi;
|
|
147
|
+
const matches = content.match(linkRegex) || [];
|
|
148
|
+
|
|
149
|
+
for (const link of matches) {
|
|
150
|
+
const hrefMatch = link.match(/href=["']([^"']+)["']/i);
|
|
151
|
+
const typeMatch = link.match(/type=["']([^"']+)["']/i);
|
|
152
|
+
|
|
153
|
+
if (hrefMatch) {
|
|
154
|
+
const href = hrefMatch[1];
|
|
155
|
+
const type = typeMatch ? typeMatch[1] : "unknown";
|
|
156
|
+
const url = new URL(href, baseUrl).href;
|
|
157
|
+
|
|
158
|
+
let feedType = "unknown";
|
|
159
|
+
if (type.includes("rss")) {
|
|
160
|
+
feedType = "rss";
|
|
161
|
+
} else if (type.includes("atom")) {
|
|
162
|
+
feedType = "atom";
|
|
163
|
+
} else if (type.includes("json")) {
|
|
164
|
+
feedType = "jsonfeed";
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
feeds.push({
|
|
168
|
+
url,
|
|
169
|
+
type: feedType,
|
|
170
|
+
contentType: type,
|
|
171
|
+
rel: "link",
|
|
172
|
+
});
|
|
173
|
+
}
|
|
174
|
+
}
|
|
175
|
+
|
|
176
|
+
return feeds;
|
|
177
|
+
}
|