@rmdes/indiekit-endpoint-microsub 1.0.0-beta.9 → 1.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -150,7 +150,7 @@ function extractSelfFromLinkHeader(linkHeader) {
150
150
  * @returns {Promise<object>} Parsed feed
151
151
  */
152
152
  export async function fetchAndParseFeed(url, options = {}) {
153
- const { parseFeed } = await import("./parser.js");
153
+ const { parseFeed, detectFeedType } = await import("./parser.js");
154
154
 
155
155
  const result = await fetchFeed(url, options);
156
156
 
@@ -161,6 +161,32 @@ export async function fetchAndParseFeed(url, options = {}) {
161
161
  };
162
162
  }
163
163
 
164
+ // Check if we got a parseable feed
165
+ const feedType = detectFeedType(result.content, result.contentType);
166
+
167
+ // If we got ActivityPub or unknown, try common feed paths
168
+ if (feedType === "activitypub" || feedType === "unknown") {
169
+ const fallbackFeed = await tryCommonFeedPaths(url, options);
170
+ if (fallbackFeed) {
171
+ // Fetch and parse the discovered feed
172
+ const feedResult = await fetchFeed(fallbackFeed.url, options);
173
+ if (!feedResult.notModified) {
174
+ const parsed = await parseFeed(feedResult.content, fallbackFeed.url, {
175
+ contentType: feedResult.contentType,
176
+ });
177
+ return {
178
+ ...feedResult,
179
+ ...parsed,
180
+ hub: feedResult.hub || parsed._hub,
181
+ discoveredFrom: url,
182
+ };
183
+ }
184
+ }
185
+ throw new Error(
186
+ `Unable to find a feed at ${url}. Try the direct feed URL.`,
187
+ );
188
+ }
189
+
164
190
  const parsed = await parseFeed(result.content, url, {
165
191
  contentType: result.contentType,
166
192
  });
@@ -172,6 +198,48 @@ export async function fetchAndParseFeed(url, options = {}) {
172
198
  };
173
199
  }
174
200
 
201
+ /**
202
+ * Common feed paths to try when discovery fails
203
+ */
204
+ const COMMON_FEED_PATHS = ["/feed/", "/feed", "/rss", "/rss.xml", "/atom.xml"];
205
+
206
+ /**
207
+ * Try to fetch a feed from common paths
208
+ * @param {string} baseUrl - Base URL of the site
209
+ * @param {object} options - Fetch options
210
+ * @returns {Promise<object|undefined>} Feed result or undefined
211
+ */
212
+ async function tryCommonFeedPaths(baseUrl, options = {}) {
213
+ const base = new URL(baseUrl);
214
+
215
+ for (const feedPath of COMMON_FEED_PATHS) {
216
+ const feedUrl = new URL(feedPath, base).href;
217
+ try {
218
+ const result = await fetchFeed(feedUrl, { ...options, timeout: 10_000 });
219
+ const contentType = result.contentType?.toLowerCase() || "";
220
+
221
+ // Check if we got a feed
222
+ if (
223
+ contentType.includes("xml") ||
224
+ contentType.includes("rss") ||
225
+ contentType.includes("atom") ||
226
+ (contentType.includes("json") &&
227
+ result.content?.includes("jsonfeed.org"))
228
+ ) {
229
+ return {
230
+ url: feedUrl,
231
+ type: contentType.includes("json") ? "jsonfeed" : "xml",
232
+ rel: "alternate",
233
+ };
234
+ }
235
+ } catch {
236
+ // Try next path
237
+ }
238
+ }
239
+
240
+ return;
241
+ }
242
+
175
243
  /**
176
244
  * Discover feeds from a URL
177
245
  * @param {string} url - Page URL
@@ -187,19 +255,62 @@ export async function discoverFeedsFromUrl(url, options = {}) {
187
255
  if (
188
256
  contentType.includes("xml") ||
189
257
  contentType.includes("rss") ||
190
- contentType.includes("atom") ||
191
- contentType.includes("json")
258
+ contentType.includes("atom")
192
259
  ) {
193
260
  return [
194
261
  {
195
262
  url,
196
- type: contentType.includes("json") ? "jsonfeed" : "xml",
263
+ type: "xml",
197
264
  rel: "self",
198
265
  },
199
266
  ];
200
267
  }
201
268
 
202
- // Otherwise, discover feeds from HTML
203
- const feeds = await discoverFeeds(result.content, url);
204
- return feeds;
269
+ // Check for JSON Feed specifically
270
+ if (
271
+ contentType.includes("json") &&
272
+ result.content?.includes("jsonfeed.org")
273
+ ) {
274
+ return [
275
+ {
276
+ url,
277
+ type: "jsonfeed",
278
+ rel: "self",
279
+ },
280
+ ];
281
+ }
282
+
283
+ // Check if we got ActivityPub JSON or other non-feed JSON
284
+ // This happens with WordPress sites using ActivityPub plugin
285
+ if (
286
+ contentType.includes("json") ||
287
+ (result.content?.trim().startsWith("{") &&
288
+ result.content?.includes("@context"))
289
+ ) {
290
+ // Try common feed paths as fallback
291
+ const fallbackFeed = await tryCommonFeedPaths(url, options);
292
+ if (fallbackFeed) {
293
+ return [fallbackFeed];
294
+ }
295
+ }
296
+
297
+ // If content looks like HTML, discover feeds from it
298
+ if (
299
+ contentType.includes("html") ||
300
+ result.content?.includes("<!DOCTYPE html") ||
301
+ result.content?.includes("<html")
302
+ ) {
303
+ const feeds = await discoverFeeds(result.content, url);
304
+ if (feeds.length > 0) {
305
+ return feeds;
306
+ }
307
+ }
308
+
309
+ // Last resort: try common feed paths
310
+ const fallbackFeed = await tryCommonFeedPaths(url, options);
311
+ if (fallbackFeed) {
312
+ return [fallbackFeed];
313
+ }
314
+
315
+ return [];
205
316
  }
@@ -7,6 +7,66 @@ import crypto from "node:crypto";
7
7
 
8
8
  import sanitizeHtml from "sanitize-html";
9
9
 
10
+ /**
11
+ * Parse a date string with fallback for non-standard formats
12
+ * @param {string|Date} dateInput - Date string or Date object
13
+ * @returns {Date|undefined} Parsed Date or undefined if invalid
14
+ */
15
+ function parseDate(dateInput) {
16
+ if (!dateInput) {
17
+ return;
18
+ }
19
+
20
+ // Already a valid Date
21
+ if (dateInput instanceof Date && !Number.isNaN(dateInput.getTime())) {
22
+ return dateInput;
23
+ }
24
+
25
+ const dateString = String(dateInput).trim();
26
+
27
+ // Try standard parsing first
28
+ let date = new Date(dateString);
29
+ if (!Number.isNaN(date.getTime())) {
30
+ return date;
31
+ }
32
+
33
+ // Handle "YYYY-MM-DD HH:MM" format (missing seconds and timezone)
34
+ // e.g., "2026-01-28 08:40"
35
+ const shortDateTime = dateString.match(
36
+ /^(\d{4}-\d{2}-\d{2})\s+(\d{2}:\d{2})$/,
37
+ );
38
+ if (shortDateTime) {
39
+ date = new Date(`${shortDateTime[1]}T${shortDateTime[2]}:00Z`);
40
+ if (!Number.isNaN(date.getTime())) {
41
+ return date;
42
+ }
43
+ }
44
+
45
+ // Handle "YYYY-MM-DD HH:MM:SS" without timezone
46
+ const dateTimeNoTz = dateString.match(
47
+ /^(\d{4}-\d{2}-\d{2})\s+(\d{2}:\d{2}:\d{2})$/,
48
+ );
49
+ if (dateTimeNoTz) {
50
+ date = new Date(`${dateTimeNoTz[1]}T${dateTimeNoTz[2]}Z`);
51
+ if (!Number.isNaN(date.getTime())) {
52
+ return date;
53
+ }
54
+ }
55
+
56
+ // If all else fails, return undefined
57
+ return;
58
+ }
59
+
60
+ /**
61
+ * Safely convert date to ISO string
62
+ * @param {string|Date} dateInput - Date input
63
+ * @returns {string|undefined} ISO string or undefined
64
+ */
65
+ function toISOStringSafe(dateInput) {
66
+ const date = parseDate(dateInput);
67
+ return date ? date.toISOString() : undefined;
68
+ }
69
+
10
70
  /**
11
71
  * Sanitize HTML options
12
72
  */
@@ -91,8 +151,8 @@ export function normalizeItem(item, feedUrl, feedType) {
91
151
  uid,
92
152
  url,
93
153
  name: item.title || undefined,
94
- published: item.pubdate ? new Date(item.pubdate).toISOString() : undefined,
95
- updated: item.date ? new Date(item.date).toISOString() : undefined,
154
+ published: toISOStringSafe(item.pubdate),
155
+ updated: toISOStringSafe(item.date),
96
156
  _source: {
97
157
  feedUrl,
98
158
  feedType,
@@ -51,13 +51,18 @@ export function detectFeedType(content, contentType = "") {
51
51
  // Fall back to content inspection
52
52
  const trimmed = content.trim();
53
53
 
54
- // JSON Feed
54
+ // JSON content
55
55
  if (trimmed.startsWith("{")) {
56
56
  try {
57
57
  const json = JSON.parse(trimmed);
58
+ // JSON Feed
58
59
  if (json.version && json.version.includes("jsonfeed.org")) {
59
60
  return "jsonfeed";
60
61
  }
62
+ // ActivityPub - return special type to indicate we need feed discovery
63
+ if (json["@context"] || json.type === "Group" || json.inbox) {
64
+ return "activitypub";
65
+ }
61
66
  } catch {
62
67
  // Not JSON
63
68
  }
@@ -112,6 +117,12 @@ export async function parseFeed(content, feedUrl, options = {}) {
112
117
  return parseHfeed(content, feedUrl);
113
118
  }
114
119
 
120
+ case "activitypub": {
121
+ throw new Error(
122
+ `URL returns ActivityPub JSON instead of a feed. Try the direct feed URL (e.g., ${feedUrl}feed/)`,
123
+ );
124
+ }
125
+
115
126
  default: {
116
127
  throw new Error(`Unable to detect feed type for ${feedUrl}`);
117
128
  }
@@ -0,0 +1,219 @@
1
+ /**
2
+ * Media proxy with caching
3
+ * @module media/proxy
4
+ */
5
+
6
+ import crypto from "node:crypto";
7
+
8
+ import { getCache, setCache } from "../cache/redis.js";
9
+
10
+ const MAX_SIZE = 2 * 1024 * 1024; // 2MB max image size
11
+ const CACHE_TTL = 4 * 60 * 60; // 4 hours
12
+ const ALLOWED_TYPES = new Set([
13
+ "image/jpeg",
14
+ "image/png",
15
+ "image/gif",
16
+ "image/webp",
17
+ "image/svg+xml",
18
+ ]);
19
+
20
+ /**
21
+ * Generate a hash for a URL to use as cache key
22
+ * @param {string} url - Original image URL
23
+ * @returns {string} URL-safe hash
24
+ */
25
+ export function hashUrl(url) {
26
+ return crypto.createHash("sha256").update(url).digest("hex").slice(0, 16);
27
+ }
28
+
29
+ /**
30
+ * Get the proxied URL for an image
31
+ * @param {string} baseUrl - Base URL of the Microsub endpoint
32
+ * @param {string} originalUrl - Original image URL
33
+ * @returns {string} Proxied URL
34
+ */
35
+ export function getProxiedUrl(baseUrl, originalUrl) {
36
+ if (!originalUrl || !baseUrl) {
37
+ return originalUrl;
38
+ }
39
+
40
+ // Skip data URLs
41
+ if (originalUrl.startsWith("data:")) {
42
+ return originalUrl;
43
+ }
44
+
45
+ // Skip already-proxied URLs
46
+ if (originalUrl.includes("/microsub/media/")) {
47
+ return originalUrl;
48
+ }
49
+
50
+ const hash = hashUrl(originalUrl);
51
+ return `${baseUrl}/microsub/media/${hash}?url=${encodeURIComponent(originalUrl)}`;
52
+ }
53
+
54
+ /**
55
+ * Rewrite image URLs in an item to use the proxy
56
+ * @param {object} item - JF2 item
57
+ * @param {string} baseUrl - Base URL for proxy
58
+ * @returns {object} Item with proxied URLs
59
+ */
60
+ export function proxyItemImages(item, baseUrl) {
61
+ if (!baseUrl || !item) {
62
+ return item;
63
+ }
64
+
65
+ const proxied = { ...item };
66
+
67
+ // Proxy photo URLs
68
+ if (proxied.photo) {
69
+ if (Array.isArray(proxied.photo)) {
70
+ proxied.photo = proxied.photo.map((p) => {
71
+ if (typeof p === "string") {
72
+ return getProxiedUrl(baseUrl, p);
73
+ }
74
+ if (p?.value) {
75
+ return { ...p, value: getProxiedUrl(baseUrl, p.value) };
76
+ }
77
+ return p;
78
+ });
79
+ } else if (typeof proxied.photo === "string") {
80
+ proxied.photo = getProxiedUrl(baseUrl, proxied.photo);
81
+ }
82
+ }
83
+
84
+ // Proxy author photo
85
+ if (proxied.author?.photo) {
86
+ proxied.author = {
87
+ ...proxied.author,
88
+ photo: getProxiedUrl(baseUrl, proxied.author.photo),
89
+ };
90
+ }
91
+
92
+ return proxied;
93
+ }
94
+
95
+ /**
96
+ * Fetch and cache an image
97
+ * @param {object} redis - Redis client
98
+ * @param {string} url - Image URL to fetch
99
+ * @returns {Promise<object|null>} Cached image data or null
100
+ */
101
+ export async function fetchImage(redis, url) {
102
+ const cacheKey = `media:${hashUrl(url)}`;
103
+
104
+ // Try cache first
105
+ if (redis) {
106
+ const cached = await getCache(redis, cacheKey);
107
+ if (cached) {
108
+ return cached;
109
+ }
110
+ }
111
+
112
+ try {
113
+ // Fetch the image
114
+ const response = await fetch(url, {
115
+ headers: {
116
+ "User-Agent": "Indiekit Microsub/1.0 (+https://getindiekit.com)",
117
+ Accept: "image/*",
118
+ },
119
+ signal: AbortSignal.timeout(10_000), // 10 second timeout
120
+ });
121
+
122
+ if (!response.ok) {
123
+ console.error(
124
+ `[Microsub] Media proxy fetch failed: ${response.status} for ${url}`,
125
+ );
126
+ return;
127
+ }
128
+
129
+ // Check content type
130
+ const contentType = response.headers.get("content-type")?.split(";")[0];
131
+ if (!ALLOWED_TYPES.has(contentType)) {
132
+ console.error(
133
+ `[Microsub] Media proxy rejected type: ${contentType} for ${url}`,
134
+ );
135
+ return;
136
+ }
137
+
138
+ // Check content length
139
+ const contentLength = Number.parseInt(
140
+ response.headers.get("content-length") || "0",
141
+ 10,
142
+ );
143
+ if (contentLength > MAX_SIZE) {
144
+ console.error(
145
+ `[Microsub] Media proxy rejected size: ${contentLength} for ${url}`,
146
+ );
147
+ return;
148
+ }
149
+
150
+ // Read the body
151
+ const buffer = await response.arrayBuffer();
152
+ if (buffer.byteLength > MAX_SIZE) {
153
+ return;
154
+ }
155
+
156
+ const imageData = {
157
+ contentType,
158
+ data: Buffer.from(buffer).toString("base64"),
159
+ size: buffer.byteLength,
160
+ };
161
+
162
+ // Cache in Redis
163
+ if (redis) {
164
+ await setCache(redis, cacheKey, imageData, CACHE_TTL);
165
+ }
166
+
167
+ return imageData;
168
+ } catch (error) {
169
+ console.error(`[Microsub] Media proxy error: ${error.message} for ${url}`);
170
+ return;
171
+ }
172
+ }
173
+
174
+ /**
175
+ * Express route handler for media proxy
176
+ * @param {object} request - Express request
177
+ * @param {object} response - Express response
178
+ * @returns {Promise<void>}
179
+ */
180
+ export async function handleMediaProxy(request, response) {
181
+ const { url } = request.query;
182
+
183
+ if (!url) {
184
+ return response.status(400).send("Missing url parameter");
185
+ }
186
+
187
+ // Validate URL
188
+ try {
189
+ const parsed = new URL(url);
190
+ if (!["http:", "https:"].includes(parsed.protocol)) {
191
+ return response.status(400).send("Invalid URL protocol");
192
+ }
193
+ } catch {
194
+ return response.status(400).send("Invalid URL");
195
+ }
196
+
197
+ // Get Redis client from application
198
+ const { application } = request.app.locals;
199
+ const redis = application.redis;
200
+
201
+ // Fetch or get from cache
202
+ const imageData = await fetchImage(redis, url);
203
+
204
+ if (!imageData) {
205
+ // Redirect to original URL as fallback
206
+ return response.redirect(url);
207
+ }
208
+
209
+ // Set cache headers
210
+ response.set({
211
+ "Content-Type": imageData.contentType,
212
+ "Content-Length": imageData.size,
213
+ "Cache-Control": "public, max-age=14400", // 4 hours
214
+ "X-Proxied-From": url,
215
+ });
216
+
217
+ // Send the image
218
+ response.send(Buffer.from(imageData.data, "base64"));
219
+ }
@@ -9,6 +9,10 @@ import { getChannel } from "../storage/channels.js";
9
9
  import { updateFeedAfterFetch, updateFeedWebsub } from "../storage/feeds.js";
10
10
  import { passesRegexFilter, passesTypeFilter } from "../storage/filters.js";
11
11
  import { addItem } from "../storage/items.js";
12
+ import {
13
+ subscribe as websubSubscribe,
14
+ getCallbackUrl,
15
+ } from "../websub/subscriber.js";
12
16
 
13
17
  import { calculateNewTier } from "./tier.js";
14
18
 
@@ -123,13 +127,37 @@ export async function processFeed(application, feed) {
123
127
  updateData,
124
128
  );
125
129
 
126
- // Handle WebSub hub discovery
130
+ // Handle WebSub hub discovery and auto-subscription
127
131
  if (parsed.hub && (!feed.websub || feed.websub.hub !== parsed.hub)) {
128
132
  await updateFeedWebsub(application, feed._id, {
129
133
  hub: parsed.hub,
130
134
  topic: parsed.self || feed.url,
131
135
  });
132
- // TODO: Subscribe to hub
136
+
137
+ // Auto-subscribe to WebSub hub if we have a callback URL
138
+ const baseUrl = application.url;
139
+ if (baseUrl) {
140
+ const callbackUrl = getCallbackUrl(baseUrl, feed._id.toString());
141
+ const updatedFeed = {
142
+ ...feed,
143
+ websub: { hub: parsed.hub, topic: parsed.self || feed.url },
144
+ };
145
+
146
+ websubSubscribe(application, updatedFeed, callbackUrl)
147
+ .then((subscribed) => {
148
+ if (subscribed) {
149
+ console.info(
150
+ `[Microsub] WebSub subscription initiated for ${feed.url}`,
151
+ );
152
+ }
153
+ })
154
+ .catch((error) => {
155
+ console.error(
156
+ `[Microsub] WebSub subscription error for ${feed.url}:`,
157
+ error.message,
158
+ );
159
+ });
160
+ }
133
161
  }
134
162
 
135
163
  result.success = true;
@@ -7,6 +7,9 @@ import { ObjectId } from "mongodb";
7
7
 
8
8
  import { generateChannelUid } from "../utils/jf2.js";
9
9
 
10
+ import { deleteFeedsForChannel } from "./feeds.js";
11
+ import { deleteItemsForChannel } from "./items.js";
12
+
10
13
  /**
11
14
  * Get channels collection from application
12
15
  * @param {object} application - Indiekit application
@@ -184,7 +187,7 @@ export async function updateChannel(application, uid, updates, userId) {
184
187
  }
185
188
 
186
189
  /**
187
- * Delete a channel
190
+ * Delete a channel and all its feeds and items
188
191
  * @param {object} application - Indiekit application
189
192
  * @param {string} uid - Channel UID
190
193
  * @param {string} [userId] - User ID
@@ -200,7 +203,20 @@ export async function deleteChannel(application, uid, userId) {
200
203
  return false;
201
204
  }
202
205
 
203
- const result = await collection.deleteOne(query);
206
+ // Find the channel first to get its ObjectId
207
+ const channel = await collection.findOne(query);
208
+ if (!channel) {
209
+ return false;
210
+ }
211
+
212
+ // Cascade delete: items first, then feeds, then channel
213
+ const itemsDeleted = await deleteItemsForChannel(application, channel._id);
214
+ const feedsDeleted = await deleteFeedsForChannel(application, channel._id);
215
+ console.info(
216
+ `[Microsub] Deleted channel ${uid}: ${feedsDeleted} feeds, ${itemsDeleted} items`,
217
+ );
218
+
219
+ const result = await collection.deleteOne({ _id: channel._id });
204
220
  return result.deletedCount > 0;
205
221
  }
206
222
 
@@ -5,6 +5,8 @@
5
5
 
6
6
  import { ObjectId } from "mongodb";
7
7
 
8
+ import { deleteItemsForFeed } from "./items.js";
9
+
8
10
  /**
9
11
  * Get feeds collection from application
10
12
  * @param {object} application - Indiekit application
@@ -122,7 +124,7 @@ export async function updateFeed(application, id, updates) {
122
124
  }
123
125
 
124
126
  /**
125
- * Delete a feed subscription
127
+ * Delete a feed subscription and all its items
126
128
  * @param {object} application - Indiekit application
127
129
  * @param {ObjectId|string} channelId - Channel ObjectId
128
130
  * @param {string} url - Feed URL
@@ -133,7 +135,18 @@ export async function deleteFeed(application, channelId, url) {
133
135
  const objectId =
134
136
  typeof channelId === "string" ? new ObjectId(channelId) : channelId;
135
137
 
136
- const result = await collection.deleteOne({ channelId: objectId, url });
138
+ // Find the feed first to get its ID for cascade delete
139
+ const feed = await collection.findOne({ channelId: objectId, url });
140
+ if (!feed) {
141
+ return false;
142
+ }
143
+
144
+ // Delete all items from this feed
145
+ const itemsDeleted = await deleteItemsForFeed(application, feed._id);
146
+ console.info(`[Microsub] Deleted ${itemsDeleted} items from feed ${url}`);
147
+
148
+ // Delete the feed itself
149
+ const result = await collection.deleteOne({ _id: feed._id });
137
150
  return result.deletedCount > 0;
138
151
  }
139
152