npm - @rmdes/indiekit-endpoint-microsub - Versions diffs - 1.0.29 → 1.0.31 - Mend

@rmdes/indiekit-endpoint-microsub 1.0.29 → 1.0.31

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (5) hide show

package/lib/media/proxy.js +66 -2
package/lib/storage/channels.js +1 -0
package/lib/storage/items.js +85 -40
package/lib/webmention/verifier.js +24 -1
package/package.json +1 -1

package/lib/media/proxy.js CHANGED Viewed

@@ -7,6 +7,60 @@ import crypto from "node:crypto";
 import { getCache, setCache } from "../cache/redis.js";
+/**
+ * Private/internal IP ranges that should never be fetched (SSRF protection)
+ */
+const BLOCKED_HOSTNAMES = new Set(["localhost", "0.0.0.0"]);
+const BLOCKED_IP_PREFIXES = [
+  "127.", // Loopback
+  "10.", // Private Class A
+  "192.168.", // Private Class C
+  "169.254.", // Link-local
+  "0.", // Current network
+];
+/**
+ * Check if a hostname resolves to a private/internal address
+ * @param {string} urlString - URL to check
+ * @returns {boolean} True if the URL targets a private/internal address
+ */
+export function isPrivateUrl(urlString) {
+  try {
+    const parsed = new URL(urlString);
+    const hostname = parsed.hostname;
+    // Block known private hostnames
+    if (BLOCKED_HOSTNAMES.has(hostname)) {
+      return true;
+    }
+    // Block IPv6 loopback
+    if (hostname === "::1" || hostname === "[::1]") {
+      return true;
+    }
+    // Block private IPv4 ranges
+    for (const prefix of BLOCKED_IP_PREFIXES) {
+      if (hostname.startsWith(prefix)) {
+        return true;
+      }
+    }
+    // Block 172.16.0.0/12 (172.16.x.x - 172.31.x.x)
+    const match172 = hostname.match(/^172\.(\d+)\./);
+    if (match172) {
+      const second = Number.parseInt(match172[1], 10);
+      if (second >= 16 && second <= 31) {
+        return true;
+      }
+    }
+    return false;
+  } catch {
+    return true; // Invalid URLs are blocked
+  }
+}
 const MAX_SIZE = 2 * 1024 * 1024; // 2MB max image size
 const CACHE_TTL = 4 * 60 * 60; // 4 hours
 const ALLOWED_TYPES = new Set([
@@ -99,6 +153,12 @@ export function proxyItemImages(item, baseUrl) {
  * @returns {Promise<object|null>} Cached image data or null
  */
 export async function fetchImage(redis, url) {
+  // Block private/internal URLs (defense-in-depth)
+  if (isPrivateUrl(url)) {
+    console.error(`[Microsub] Media proxy blocked private URL: ${url}`);
+    return;
+  }
   const cacheKey = `media:${hashUrl(url)}`;
   // Try cache first
@@ -194,6 +254,11 @@ export async function handleMediaProxy(request, response) {
     return response.status(400).send("Invalid URL");
   }
+  // Block requests to private/internal networks (SSRF protection)
+  if (isPrivateUrl(url)) {
+    return response.status(403).send("URL not allowed");
+  }
   // Get Redis client from application
   const { application } = request.app.locals;
   const redis = application.redis;
@@ -202,8 +267,7 @@ export async function handleMediaProxy(request, response) {
   const imageData = await fetchImage(redis, url);
   if (!imageData) {
-    // Redirect to original URL as fallback
-    return response.redirect(url);
+    return response.status(404).send("Image not available");
   }
   // Set cache headers

package/lib/storage/channels.js CHANGED Viewed

@@ -115,6 +115,7 @@ export async function getChannels(application, userId) {
         channelId: channel._id,
         readBy: { $ne: userId },
         published: { $gte: cutoffDate },
+        _stripped: { $ne: true },
       });
       return {

package/lib/storage/items.js CHANGED Viewed

@@ -87,8 +87,9 @@ export async function getTimelineItems(application, channelId, options = {}) {
     typeof channelId === "string" ? new ObjectId(channelId) : channelId;
   const limit = parseLimit(options.limit);
-  // Base query - filter out read items unless showRead is true
-  const baseQuery = { channelId: objectId };
+  // Base query - filter out read items unless showRead is true,
+  // and always exclude stripped dedup skeletons (no content to display)
+  const baseQuery = { channelId: objectId, _stripped: { $ne: true } };
   if (options.userId && !options.showRead) {
     baseQuery.readBy = { $ne: options.userId };
   }
@@ -288,61 +289,83 @@ export async function countReadItems(application, channelId, userId) {
  * @param {string} userId - User ID
  * @returns {Promise<number>} Number of items updated
  */
-// Maximum number of read items to keep per channel
-const MAX_READ_ITEMS = 30;
+// Maximum number of full read items to keep per channel before stripping content.
+// Items beyond this limit are converted to lightweight dedup skeletons (channelId,
+// uid, readBy) so the poller doesn't re-ingest them as new unread entries.
+const MAX_FULL_READ_ITEMS = 200;
 /**
- * Cleanup old read items, keeping only the most recent MAX_READ_ITEMS
+ * Cleanup old read items by stripping content but preserving dedup skeletons.
+ * This prevents the vicious cycle where deleted read items get re-ingested as
+ * unread by the poller because the dedup record (channelId + uid) was destroyed.
  * @param {object} collection - MongoDB collection
  * @param {ObjectId} channelObjectId - Channel ObjectId
  * @param {string} userId - User ID
  */
 async function cleanupOldReadItems(collection, channelObjectId, userId) {
-  // Count read items in this channel
   const readCount = await collection.countDocuments({
     channelId: channelObjectId,
     readBy: userId,
   });
-  if (readCount > MAX_READ_ITEMS) {
-    // Find the oldest read items to delete
-    const itemsToDelete = await collection
+  if (readCount > MAX_FULL_READ_ITEMS) {
+    // Find old read items beyond the retention limit
+    const itemsToStrip = await collection
       .find({
         channelId: channelObjectId,
         readBy: userId,
+        _stripped: { $ne: true }, // Don't re-strip already-stripped items
       })
-      .sort({ published: -1, _id: -1 }) // Newest first
-      .skip(MAX_READ_ITEMS) // Skip the ones we want to keep
+      .sort({ published: -1, _id: -1 })
+      .skip(MAX_FULL_READ_ITEMS)
       .project({ _id: 1 })
       .toArray();
-    if (itemsToDelete.length > 0) {
-      const idsToDelete = itemsToDelete.map((item) => item._id);
-      const deleteResult = await collection.deleteMany({
-        _id: { $in: idsToDelete },
-      });
+    if (itemsToStrip.length > 0) {
+      const idsToStrip = itemsToStrip.map((item) => item._id);
+      // Strip content but keep dedup skeleton (channelId, uid, feedId, readBy)
+      const result = await collection.updateMany(
+        { _id: { $in: idsToStrip } },
+        {
+          $set: { _stripped: true },
+          $unset: {
+            name: "",
+            content: "",
+            summary: "",
+            author: "",
+            category: "",
+            photo: "",
+            video: "",
+            audio: "",
+            likeOf: "",
+            repostOf: "",
+            bookmarkOf: "",
+            inReplyTo: "",
+            source: "",
+          },
+        },
+      );
       console.info(
-        `[Microsub] Cleaned up ${deleteResult.deletedCount} old read items (keeping ${MAX_READ_ITEMS})`,
+        `[Microsub] Stripped content from ${result.modifiedCount} old read items (keeping ${MAX_FULL_READ_ITEMS} full)`,
       );
     }
   }
 }
 /**
- * Cleanup all read items across all channels (startup cleanup)
+ * Cleanup all read items across all channels (startup cleanup).
+ * Strips content from old read items but preserves dedup skeletons.
  * @param {object} application - Indiekit application
- * @returns {Promise<number>} Total number of items deleted
+ * @returns {Promise<number>} Total number of items stripped
  */
 export async function cleanupAllReadItems(application) {
   const collection = getCollection(application);
   const channelsCollection = application.collections.get("microsub_channels");
-  // Get all channels
   const channels = await channelsCollection.find({}).toArray();
-  let totalDeleted = 0;
+  let totalStripped = 0;
   for (const channel of channels) {
-    // Get unique userIds who have read items in this channel
     const readByUsers = await collection.distinct("readBy", {
       channelId: channel._id,
       readBy: { $exists: true, $ne: [] },
@@ -354,40 +377,60 @@ export async function cleanupAllReadItems(application) {
       const readCount = await collection.countDocuments({
         channelId: channel._id,
         readBy: userId,
+        _stripped: { $ne: true },
       });
-      if (readCount > MAX_READ_ITEMS) {
-        const itemsToDelete = await collection
+      if (readCount > MAX_FULL_READ_ITEMS) {
+        const itemsToStrip = await collection
           .find({
             channelId: channel._id,
             readBy: userId,
+            _stripped: { $ne: true },
           })
           .sort({ published: -1, _id: -1 })
-          .skip(MAX_READ_ITEMS)
+          .skip(MAX_FULL_READ_ITEMS)
           .project({ _id: 1 })
           .toArray();
-        if (itemsToDelete.length > 0) {
-          const idsToDelete = itemsToDelete.map((item) => item._id);
-          const deleteResult = await collection.deleteMany({
-            _id: { $in: idsToDelete },
-          });
-          totalDeleted += deleteResult.deletedCount;
+        if (itemsToStrip.length > 0) {
+          const idsToStrip = itemsToStrip.map((item) => item._id);
+          const result = await collection.updateMany(
+            { _id: { $in: idsToStrip } },
+            {
+              $set: { _stripped: true },
+              $unset: {
+                name: "",
+                content: "",
+                summary: "",
+                author: "",
+                category: "",
+                photo: "",
+                video: "",
+                audio: "",
+                likeOf: "",
+                repostOf: "",
+                bookmarkOf: "",
+                inReplyTo: "",
+                source: "",
+              },
+            },
+          );
+          totalStripped += result.modifiedCount;
           console.info(
-            `[Microsub] Startup cleanup: deleted ${deleteResult.deletedCount} old items from channel "${channel.name}"`,
+            `[Microsub] Startup cleanup: stripped ${result.modifiedCount} old items from channel "${channel.name}"`,
           );
         }
       }
     }
   }
-  if (totalDeleted > 0) {
+  if (totalStripped > 0) {
     console.info(
-      `[Microsub] Startup cleanup complete: ${totalDeleted} total items deleted`,
+      `[Microsub] Startup cleanup complete: ${totalStripped} total items stripped`,
     );
   }
-  return totalDeleted;
+  return totalStripped;
 }
 export async function markItemsRead(application, channelId, entryIds, userId) {
@@ -446,9 +489,6 @@ export async function markItemsRead(application, channelId, entryIds, userId) {
     `[Microsub] markItemsRead result: ${result.modifiedCount} items updated`,
   );
-  // Cleanup old read items, keeping only the most recent
-  await cleanupOldReadItems(collection, channelObjectId, userId);
   return result.modifiedCount;
 }
@@ -577,7 +617,7 @@ export async function getUnreadCount(application, channelId, userId) {
   const objectId =
     typeof channelId === "string" ? new ObjectId(channelId) : channelId;
-  // Only count items from the last UNREAD_RETENTION_DAYS
+  // Only count items from the last UNREAD_RETENTION_DAYS, exclude stripped skeletons
   const cutoffDate = new Date();
   cutoffDate.setDate(cutoffDate.getDate() - UNREAD_RETENTION_DAYS);
@@ -585,6 +625,7 @@ export async function getUnreadCount(application, channelId, userId) {
     channelId: objectId,
     readBy: { $ne: userId },
     published: { $gte: cutoffDate },
+    _stripped: { $ne: true },
   });
 }
@@ -602,7 +643,11 @@ export async function searchItems(application, channelId, query, limit = 20) {
     typeof channelId === "string" ? new ObjectId(channelId) : channelId;
   // Use regex search (consider adding text index for better performance)
-  const regex = new RegExp(query, "i");
+  const escapedQuery = query.replaceAll(
+    /[$()*+.?[\\\]^{|}]/g,
+    String.raw`\$&`,
+  );
+  const regex = new RegExp(escapedQuery, "i");
   const items = await collection
     .find({
       channelId: objectId,

package/lib/webmention/verifier.js CHANGED Viewed

@@ -4,6 +4,29 @@
  */
 import { mf2 } from "microformats-parser";
+import sanitizeHtml from "sanitize-html";
+/**
+ * Sanitize HTML options (matches normalizer.js)
+ */
+const SANITIZE_OPTIONS = {
+  allowedTags: [
+    "a", "abbr", "b", "blockquote", "br", "code", "em", "figcaption",
+    "figure", "h1", "h2", "h3", "h4", "h5", "h6", "hr", "i", "img",
+    "li", "ol", "p", "pre", "s", "span", "strike", "strong", "sub",
+    "sup", "table", "tbody", "td", "th", "thead", "tr", "u", "ul",
+    "video", "audio", "source",
+  ],
+  allowedAttributes: {
+    a: ["href", "title", "rel"],
+    img: ["src", "alt", "title", "width", "height"],
+    video: ["src", "poster", "controls", "width", "height"],
+    audio: ["src", "controls"],
+    source: ["src", "type"],
+    "*": ["class"],
+  },
+  allowedSchemes: ["http", "https", "mailto"],
+};
 /**
  * Verify a webmention
@@ -276,7 +299,7 @@ function extractContent(entry) {
   return {
     text: content.value,
-    html: content.html,
+    html: content.html ? sanitizeHtml(content.html, SANITIZE_OPTIONS) : undefined,
   };
 }

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@rmdes/indiekit-endpoint-microsub",
-  "version": "1.0.29",
+  "version": "1.0.31",
   "description": "Microsub endpoint for Indiekit. Enables subscribing to feeds and reading content using the Microsub protocol.",
   "keywords": [
     "indiekit",