@rmdes/indiekit-endpoint-microsub 1.0.29 → 1.0.31

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -7,6 +7,60 @@ import crypto from "node:crypto";
7
7
 
8
8
  import { getCache, setCache } from "../cache/redis.js";
9
9
 
10
+ /**
11
+ * Private/internal IP ranges that should never be fetched (SSRF protection)
12
+ */
13
+ const BLOCKED_HOSTNAMES = new Set(["localhost", "0.0.0.0"]);
14
+ const BLOCKED_IP_PREFIXES = [
15
+ "127.", // Loopback
16
+ "10.", // Private Class A
17
+ "192.168.", // Private Class C
18
+ "169.254.", // Link-local
19
+ "0.", // Current network
20
+ ];
21
+
22
+ /**
23
+ * Check if a hostname resolves to a private/internal address
24
+ * @param {string} urlString - URL to check
25
+ * @returns {boolean} True if the URL targets a private/internal address
26
+ */
27
+ export function isPrivateUrl(urlString) {
28
+ try {
29
+ const parsed = new URL(urlString);
30
+ const hostname = parsed.hostname;
31
+
32
+ // Block known private hostnames
33
+ if (BLOCKED_HOSTNAMES.has(hostname)) {
34
+ return true;
35
+ }
36
+
37
+ // Block IPv6 loopback
38
+ if (hostname === "::1" || hostname === "[::1]") {
39
+ return true;
40
+ }
41
+
42
+ // Block private IPv4 ranges
43
+ for (const prefix of BLOCKED_IP_PREFIXES) {
44
+ if (hostname.startsWith(prefix)) {
45
+ return true;
46
+ }
47
+ }
48
+
49
+ // Block 172.16.0.0/12 (172.16.x.x - 172.31.x.x)
50
+ const match172 = hostname.match(/^172\.(\d+)\./);
51
+ if (match172) {
52
+ const second = Number.parseInt(match172[1], 10);
53
+ if (second >= 16 && second <= 31) {
54
+ return true;
55
+ }
56
+ }
57
+
58
+ return false;
59
+ } catch {
60
+ return true; // Invalid URLs are blocked
61
+ }
62
+ }
63
+
10
64
  const MAX_SIZE = 2 * 1024 * 1024; // 2MB max image size
11
65
  const CACHE_TTL = 4 * 60 * 60; // 4 hours
12
66
  const ALLOWED_TYPES = new Set([
@@ -99,6 +153,12 @@ export function proxyItemImages(item, baseUrl) {
99
153
  * @returns {Promise<object|null>} Cached image data or null
100
154
  */
101
155
  export async function fetchImage(redis, url) {
156
+ // Block private/internal URLs (defense-in-depth)
157
+ if (isPrivateUrl(url)) {
158
+ console.error(`[Microsub] Media proxy blocked private URL: ${url}`);
159
+ return;
160
+ }
161
+
102
162
  const cacheKey = `media:${hashUrl(url)}`;
103
163
 
104
164
  // Try cache first
@@ -194,6 +254,11 @@ export async function handleMediaProxy(request, response) {
194
254
  return response.status(400).send("Invalid URL");
195
255
  }
196
256
 
257
+ // Block requests to private/internal networks (SSRF protection)
258
+ if (isPrivateUrl(url)) {
259
+ return response.status(403).send("URL not allowed");
260
+ }
261
+
197
262
  // Get Redis client from application
198
263
  const { application } = request.app.locals;
199
264
  const redis = application.redis;
@@ -202,8 +267,7 @@ export async function handleMediaProxy(request, response) {
202
267
  const imageData = await fetchImage(redis, url);
203
268
 
204
269
  if (!imageData) {
205
- // Redirect to original URL as fallback
206
- return response.redirect(url);
270
+ return response.status(404).send("Image not available");
207
271
  }
208
272
 
209
273
  // Set cache headers
@@ -115,6 +115,7 @@ export async function getChannels(application, userId) {
115
115
  channelId: channel._id,
116
116
  readBy: { $ne: userId },
117
117
  published: { $gte: cutoffDate },
118
+ _stripped: { $ne: true },
118
119
  });
119
120
 
120
121
  return {
@@ -87,8 +87,9 @@ export async function getTimelineItems(application, channelId, options = {}) {
87
87
  typeof channelId === "string" ? new ObjectId(channelId) : channelId;
88
88
  const limit = parseLimit(options.limit);
89
89
 
90
- // Base query - filter out read items unless showRead is true
91
- const baseQuery = { channelId: objectId };
90
+ // Base query - filter out read items unless showRead is true,
91
+ // and always exclude stripped dedup skeletons (no content to display)
92
+ const baseQuery = { channelId: objectId, _stripped: { $ne: true } };
92
93
  if (options.userId && !options.showRead) {
93
94
  baseQuery.readBy = { $ne: options.userId };
94
95
  }
@@ -288,61 +289,83 @@ export async function countReadItems(application, channelId, userId) {
288
289
  * @param {string} userId - User ID
289
290
  * @returns {Promise<number>} Number of items updated
290
291
  */
291
- // Maximum number of read items to keep per channel
292
- const MAX_READ_ITEMS = 30;
292
+ // Maximum number of full read items to keep per channel before stripping content.
293
+ // Items beyond this limit are converted to lightweight dedup skeletons (channelId,
294
+ // uid, readBy) so the poller doesn't re-ingest them as new unread entries.
295
+ const MAX_FULL_READ_ITEMS = 200;
293
296
 
294
297
  /**
295
- * Cleanup old read items, keeping only the most recent MAX_READ_ITEMS
298
+ * Cleanup old read items by stripping content but preserving dedup skeletons.
299
+ * This prevents the vicious cycle where deleted read items get re-ingested as
300
+ * unread by the poller because the dedup record (channelId + uid) was destroyed.
296
301
  * @param {object} collection - MongoDB collection
297
302
  * @param {ObjectId} channelObjectId - Channel ObjectId
298
303
  * @param {string} userId - User ID
299
304
  */
300
305
  async function cleanupOldReadItems(collection, channelObjectId, userId) {
301
- // Count read items in this channel
302
306
  const readCount = await collection.countDocuments({
303
307
  channelId: channelObjectId,
304
308
  readBy: userId,
305
309
  });
306
310
 
307
- if (readCount > MAX_READ_ITEMS) {
308
- // Find the oldest read items to delete
309
- const itemsToDelete = await collection
311
+ if (readCount > MAX_FULL_READ_ITEMS) {
312
+ // Find old read items beyond the retention limit
313
+ const itemsToStrip = await collection
310
314
  .find({
311
315
  channelId: channelObjectId,
312
316
  readBy: userId,
317
+ _stripped: { $ne: true }, // Don't re-strip already-stripped items
313
318
  })
314
- .sort({ published: -1, _id: -1 }) // Newest first
315
- .skip(MAX_READ_ITEMS) // Skip the ones we want to keep
319
+ .sort({ published: -1, _id: -1 })
320
+ .skip(MAX_FULL_READ_ITEMS)
316
321
  .project({ _id: 1 })
317
322
  .toArray();
318
323
 
319
- if (itemsToDelete.length > 0) {
320
- const idsToDelete = itemsToDelete.map((item) => item._id);
321
- const deleteResult = await collection.deleteMany({
322
- _id: { $in: idsToDelete },
323
- });
324
+ if (itemsToStrip.length > 0) {
325
+ const idsToStrip = itemsToStrip.map((item) => item._id);
326
+ // Strip content but keep dedup skeleton (channelId, uid, feedId, readBy)
327
+ const result = await collection.updateMany(
328
+ { _id: { $in: idsToStrip } },
329
+ {
330
+ $set: { _stripped: true },
331
+ $unset: {
332
+ name: "",
333
+ content: "",
334
+ summary: "",
335
+ author: "",
336
+ category: "",
337
+ photo: "",
338
+ video: "",
339
+ audio: "",
340
+ likeOf: "",
341
+ repostOf: "",
342
+ bookmarkOf: "",
343
+ inReplyTo: "",
344
+ source: "",
345
+ },
346
+ },
347
+ );
324
348
  console.info(
325
- `[Microsub] Cleaned up ${deleteResult.deletedCount} old read items (keeping ${MAX_READ_ITEMS})`,
349
+ `[Microsub] Stripped content from ${result.modifiedCount} old read items (keeping ${MAX_FULL_READ_ITEMS} full)`,
326
350
  );
327
351
  }
328
352
  }
329
353
  }
330
354
 
331
355
  /**
332
- * Cleanup all read items across all channels (startup cleanup)
356
+ * Cleanup all read items across all channels (startup cleanup).
357
+ * Strips content from old read items but preserves dedup skeletons.
333
358
  * @param {object} application - Indiekit application
334
- * @returns {Promise<number>} Total number of items deleted
359
+ * @returns {Promise<number>} Total number of items stripped
335
360
  */
336
361
  export async function cleanupAllReadItems(application) {
337
362
  const collection = getCollection(application);
338
363
  const channelsCollection = application.collections.get("microsub_channels");
339
364
 
340
- // Get all channels
341
365
  const channels = await channelsCollection.find({}).toArray();
342
- let totalDeleted = 0;
366
+ let totalStripped = 0;
343
367
 
344
368
  for (const channel of channels) {
345
- // Get unique userIds who have read items in this channel
346
369
  const readByUsers = await collection.distinct("readBy", {
347
370
  channelId: channel._id,
348
371
  readBy: { $exists: true, $ne: [] },
@@ -354,40 +377,60 @@ export async function cleanupAllReadItems(application) {
354
377
  const readCount = await collection.countDocuments({
355
378
  channelId: channel._id,
356
379
  readBy: userId,
380
+ _stripped: { $ne: true },
357
381
  });
358
382
 
359
- if (readCount > MAX_READ_ITEMS) {
360
- const itemsToDelete = await collection
383
+ if (readCount > MAX_FULL_READ_ITEMS) {
384
+ const itemsToStrip = await collection
361
385
  .find({
362
386
  channelId: channel._id,
363
387
  readBy: userId,
388
+ _stripped: { $ne: true },
364
389
  })
365
390
  .sort({ published: -1, _id: -1 })
366
- .skip(MAX_READ_ITEMS)
391
+ .skip(MAX_FULL_READ_ITEMS)
367
392
  .project({ _id: 1 })
368
393
  .toArray();
369
394
 
370
- if (itemsToDelete.length > 0) {
371
- const idsToDelete = itemsToDelete.map((item) => item._id);
372
- const deleteResult = await collection.deleteMany({
373
- _id: { $in: idsToDelete },
374
- });
375
- totalDeleted += deleteResult.deletedCount;
395
+ if (itemsToStrip.length > 0) {
396
+ const idsToStrip = itemsToStrip.map((item) => item._id);
397
+ const result = await collection.updateMany(
398
+ { _id: { $in: idsToStrip } },
399
+ {
400
+ $set: { _stripped: true },
401
+ $unset: {
402
+ name: "",
403
+ content: "",
404
+ summary: "",
405
+ author: "",
406
+ category: "",
407
+ photo: "",
408
+ video: "",
409
+ audio: "",
410
+ likeOf: "",
411
+ repostOf: "",
412
+ bookmarkOf: "",
413
+ inReplyTo: "",
414
+ source: "",
415
+ },
416
+ },
417
+ );
418
+ totalStripped += result.modifiedCount;
376
419
  console.info(
377
- `[Microsub] Startup cleanup: deleted ${deleteResult.deletedCount} old items from channel "${channel.name}"`,
420
+ `[Microsub] Startup cleanup: stripped ${result.modifiedCount} old items from channel "${channel.name}"`,
378
421
  );
379
422
  }
380
423
  }
381
424
  }
382
425
  }
383
426
 
384
- if (totalDeleted > 0) {
427
+ if (totalStripped > 0) {
385
428
  console.info(
386
- `[Microsub] Startup cleanup complete: ${totalDeleted} total items deleted`,
429
+ `[Microsub] Startup cleanup complete: ${totalStripped} total items stripped`,
387
430
  );
388
431
  }
389
432
 
390
- return totalDeleted;
433
+ return totalStripped;
391
434
  }
392
435
 
393
436
  export async function markItemsRead(application, channelId, entryIds, userId) {
@@ -446,9 +489,6 @@ export async function markItemsRead(application, channelId, entryIds, userId) {
446
489
  `[Microsub] markItemsRead result: ${result.modifiedCount} items updated`,
447
490
  );
448
491
 
449
- // Cleanup old read items, keeping only the most recent
450
- await cleanupOldReadItems(collection, channelObjectId, userId);
451
-
452
492
  return result.modifiedCount;
453
493
  }
454
494
 
@@ -577,7 +617,7 @@ export async function getUnreadCount(application, channelId, userId) {
577
617
  const objectId =
578
618
  typeof channelId === "string" ? new ObjectId(channelId) : channelId;
579
619
 
580
- // Only count items from the last UNREAD_RETENTION_DAYS
620
+ // Only count items from the last UNREAD_RETENTION_DAYS, exclude stripped skeletons
581
621
  const cutoffDate = new Date();
582
622
  cutoffDate.setDate(cutoffDate.getDate() - UNREAD_RETENTION_DAYS);
583
623
 
@@ -585,6 +625,7 @@ export async function getUnreadCount(application, channelId, userId) {
585
625
  channelId: objectId,
586
626
  readBy: { $ne: userId },
587
627
  published: { $gte: cutoffDate },
628
+ _stripped: { $ne: true },
588
629
  });
589
630
  }
590
631
 
@@ -602,7 +643,11 @@ export async function searchItems(application, channelId, query, limit = 20) {
602
643
  typeof channelId === "string" ? new ObjectId(channelId) : channelId;
603
644
 
604
645
  // Use regex search (consider adding text index for better performance)
605
- const regex = new RegExp(query, "i");
646
+ const escapedQuery = query.replaceAll(
647
+ /[$()*+.?[\\\]^{|}]/g,
648
+ String.raw`\$&`,
649
+ );
650
+ const regex = new RegExp(escapedQuery, "i");
606
651
  const items = await collection
607
652
  .find({
608
653
  channelId: objectId,
@@ -4,6 +4,29 @@
4
4
  */
5
5
 
6
6
  import { mf2 } from "microformats-parser";
7
+ import sanitizeHtml from "sanitize-html";
8
+
9
+ /**
10
+ * Sanitize HTML options (matches normalizer.js)
11
+ */
12
+ const SANITIZE_OPTIONS = {
13
+ allowedTags: [
14
+ "a", "abbr", "b", "blockquote", "br", "code", "em", "figcaption",
15
+ "figure", "h1", "h2", "h3", "h4", "h5", "h6", "hr", "i", "img",
16
+ "li", "ol", "p", "pre", "s", "span", "strike", "strong", "sub",
17
+ "sup", "table", "tbody", "td", "th", "thead", "tr", "u", "ul",
18
+ "video", "audio", "source",
19
+ ],
20
+ allowedAttributes: {
21
+ a: ["href", "title", "rel"],
22
+ img: ["src", "alt", "title", "width", "height"],
23
+ video: ["src", "poster", "controls", "width", "height"],
24
+ audio: ["src", "controls"],
25
+ source: ["src", "type"],
26
+ "*": ["class"],
27
+ },
28
+ allowedSchemes: ["http", "https", "mailto"],
29
+ };
7
30
 
8
31
  /**
9
32
  * Verify a webmention
@@ -276,7 +299,7 @@ function extractContent(entry) {
276
299
 
277
300
  return {
278
301
  text: content.value,
279
- html: content.html,
302
+ html: content.html ? sanitizeHtml(content.html, SANITIZE_OPTIONS) : undefined,
280
303
  };
281
304
  }
282
305
 
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@rmdes/indiekit-endpoint-microsub",
3
- "version": "1.0.29",
3
+ "version": "1.0.31",
4
4
  "description": "Microsub endpoint for Indiekit. Enables subscribing to feeds and reading content using the Microsub protocol.",
5
5
  "keywords": [
6
6
  "indiekit",