@rmdes/indiekit-endpoint-microsub 1.0.29 → 1.0.31
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/lib/media/proxy.js +66 -2
- package/lib/storage/channels.js +1 -0
- package/lib/storage/items.js +85 -40
- package/lib/webmention/verifier.js +24 -1
- package/package.json +1 -1
package/lib/media/proxy.js
CHANGED
|
@@ -7,6 +7,60 @@ import crypto from "node:crypto";
|
|
|
7
7
|
|
|
8
8
|
import { getCache, setCache } from "../cache/redis.js";
|
|
9
9
|
|
|
10
|
+
/**
|
|
11
|
+
* Private/internal IP ranges that should never be fetched (SSRF protection)
|
|
12
|
+
*/
|
|
13
|
+
const BLOCKED_HOSTNAMES = new Set(["localhost", "0.0.0.0"]);
|
|
14
|
+
const BLOCKED_IP_PREFIXES = [
|
|
15
|
+
"127.", // Loopback
|
|
16
|
+
"10.", // Private Class A
|
|
17
|
+
"192.168.", // Private Class C
|
|
18
|
+
"169.254.", // Link-local
|
|
19
|
+
"0.", // Current network
|
|
20
|
+
];
|
|
21
|
+
|
|
22
|
+
/**
|
|
23
|
+
* Check if a hostname resolves to a private/internal address
|
|
24
|
+
* @param {string} urlString - URL to check
|
|
25
|
+
* @returns {boolean} True if the URL targets a private/internal address
|
|
26
|
+
*/
|
|
27
|
+
export function isPrivateUrl(urlString) {
|
|
28
|
+
try {
|
|
29
|
+
const parsed = new URL(urlString);
|
|
30
|
+
const hostname = parsed.hostname;
|
|
31
|
+
|
|
32
|
+
// Block known private hostnames
|
|
33
|
+
if (BLOCKED_HOSTNAMES.has(hostname)) {
|
|
34
|
+
return true;
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
// Block IPv6 loopback
|
|
38
|
+
if (hostname === "::1" || hostname === "[::1]") {
|
|
39
|
+
return true;
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
// Block private IPv4 ranges
|
|
43
|
+
for (const prefix of BLOCKED_IP_PREFIXES) {
|
|
44
|
+
if (hostname.startsWith(prefix)) {
|
|
45
|
+
return true;
|
|
46
|
+
}
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
// Block 172.16.0.0/12 (172.16.x.x - 172.31.x.x)
|
|
50
|
+
const match172 = hostname.match(/^172\.(\d+)\./);
|
|
51
|
+
if (match172) {
|
|
52
|
+
const second = Number.parseInt(match172[1], 10);
|
|
53
|
+
if (second >= 16 && second <= 31) {
|
|
54
|
+
return true;
|
|
55
|
+
}
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
return false;
|
|
59
|
+
} catch {
|
|
60
|
+
return true; // Invalid URLs are blocked
|
|
61
|
+
}
|
|
62
|
+
}
|
|
63
|
+
|
|
10
64
|
const MAX_SIZE = 2 * 1024 * 1024; // 2MB max image size
|
|
11
65
|
const CACHE_TTL = 4 * 60 * 60; // 4 hours
|
|
12
66
|
const ALLOWED_TYPES = new Set([
|
|
@@ -99,6 +153,12 @@ export function proxyItemImages(item, baseUrl) {
|
|
|
99
153
|
* @returns {Promise<object|null>} Cached image data or null
|
|
100
154
|
*/
|
|
101
155
|
export async function fetchImage(redis, url) {
|
|
156
|
+
// Block private/internal URLs (defense-in-depth)
|
|
157
|
+
if (isPrivateUrl(url)) {
|
|
158
|
+
console.error(`[Microsub] Media proxy blocked private URL: ${url}`);
|
|
159
|
+
return;
|
|
160
|
+
}
|
|
161
|
+
|
|
102
162
|
const cacheKey = `media:${hashUrl(url)}`;
|
|
103
163
|
|
|
104
164
|
// Try cache first
|
|
@@ -194,6 +254,11 @@ export async function handleMediaProxy(request, response) {
|
|
|
194
254
|
return response.status(400).send("Invalid URL");
|
|
195
255
|
}
|
|
196
256
|
|
|
257
|
+
// Block requests to private/internal networks (SSRF protection)
|
|
258
|
+
if (isPrivateUrl(url)) {
|
|
259
|
+
return response.status(403).send("URL not allowed");
|
|
260
|
+
}
|
|
261
|
+
|
|
197
262
|
// Get Redis client from application
|
|
198
263
|
const { application } = request.app.locals;
|
|
199
264
|
const redis = application.redis;
|
|
@@ -202,8 +267,7 @@ export async function handleMediaProxy(request, response) {
|
|
|
202
267
|
const imageData = await fetchImage(redis, url);
|
|
203
268
|
|
|
204
269
|
if (!imageData) {
|
|
205
|
-
|
|
206
|
-
return response.redirect(url);
|
|
270
|
+
return response.status(404).send("Image not available");
|
|
207
271
|
}
|
|
208
272
|
|
|
209
273
|
// Set cache headers
|
package/lib/storage/channels.js
CHANGED
package/lib/storage/items.js
CHANGED
|
@@ -87,8 +87,9 @@ export async function getTimelineItems(application, channelId, options = {}) {
|
|
|
87
87
|
typeof channelId === "string" ? new ObjectId(channelId) : channelId;
|
|
88
88
|
const limit = parseLimit(options.limit);
|
|
89
89
|
|
|
90
|
-
// Base query - filter out read items unless showRead is true
|
|
91
|
-
|
|
90
|
+
// Base query - filter out read items unless showRead is true,
|
|
91
|
+
// and always exclude stripped dedup skeletons (no content to display)
|
|
92
|
+
const baseQuery = { channelId: objectId, _stripped: { $ne: true } };
|
|
92
93
|
if (options.userId && !options.showRead) {
|
|
93
94
|
baseQuery.readBy = { $ne: options.userId };
|
|
94
95
|
}
|
|
@@ -288,61 +289,83 @@ export async function countReadItems(application, channelId, userId) {
|
|
|
288
289
|
* @param {string} userId - User ID
|
|
289
290
|
* @returns {Promise<number>} Number of items updated
|
|
290
291
|
*/
|
|
291
|
-
// Maximum number of read items to keep per channel
|
|
292
|
-
|
|
292
|
+
// Maximum number of full read items to keep per channel before stripping content.
|
|
293
|
+
// Items beyond this limit are converted to lightweight dedup skeletons (channelId,
|
|
294
|
+
// uid, readBy) so the poller doesn't re-ingest them as new unread entries.
|
|
295
|
+
const MAX_FULL_READ_ITEMS = 200;
|
|
293
296
|
|
|
294
297
|
/**
|
|
295
|
-
* Cleanup old read items
|
|
298
|
+
* Cleanup old read items by stripping content but preserving dedup skeletons.
|
|
299
|
+
* This prevents the vicious cycle where deleted read items get re-ingested as
|
|
300
|
+
* unread by the poller because the dedup record (channelId + uid) was destroyed.
|
|
296
301
|
* @param {object} collection - MongoDB collection
|
|
297
302
|
* @param {ObjectId} channelObjectId - Channel ObjectId
|
|
298
303
|
* @param {string} userId - User ID
|
|
299
304
|
*/
|
|
300
305
|
async function cleanupOldReadItems(collection, channelObjectId, userId) {
|
|
301
|
-
// Count read items in this channel
|
|
302
306
|
const readCount = await collection.countDocuments({
|
|
303
307
|
channelId: channelObjectId,
|
|
304
308
|
readBy: userId,
|
|
305
309
|
});
|
|
306
310
|
|
|
307
|
-
if (readCount >
|
|
308
|
-
// Find
|
|
309
|
-
const
|
|
311
|
+
if (readCount > MAX_FULL_READ_ITEMS) {
|
|
312
|
+
// Find old read items beyond the retention limit
|
|
313
|
+
const itemsToStrip = await collection
|
|
310
314
|
.find({
|
|
311
315
|
channelId: channelObjectId,
|
|
312
316
|
readBy: userId,
|
|
317
|
+
_stripped: { $ne: true }, // Don't re-strip already-stripped items
|
|
313
318
|
})
|
|
314
|
-
.sort({ published: -1, _id: -1 })
|
|
315
|
-
.skip(
|
|
319
|
+
.sort({ published: -1, _id: -1 })
|
|
320
|
+
.skip(MAX_FULL_READ_ITEMS)
|
|
316
321
|
.project({ _id: 1 })
|
|
317
322
|
.toArray();
|
|
318
323
|
|
|
319
|
-
if (
|
|
320
|
-
const
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
324
|
+
if (itemsToStrip.length > 0) {
|
|
325
|
+
const idsToStrip = itemsToStrip.map((item) => item._id);
|
|
326
|
+
// Strip content but keep dedup skeleton (channelId, uid, feedId, readBy)
|
|
327
|
+
const result = await collection.updateMany(
|
|
328
|
+
{ _id: { $in: idsToStrip } },
|
|
329
|
+
{
|
|
330
|
+
$set: { _stripped: true },
|
|
331
|
+
$unset: {
|
|
332
|
+
name: "",
|
|
333
|
+
content: "",
|
|
334
|
+
summary: "",
|
|
335
|
+
author: "",
|
|
336
|
+
category: "",
|
|
337
|
+
photo: "",
|
|
338
|
+
video: "",
|
|
339
|
+
audio: "",
|
|
340
|
+
likeOf: "",
|
|
341
|
+
repostOf: "",
|
|
342
|
+
bookmarkOf: "",
|
|
343
|
+
inReplyTo: "",
|
|
344
|
+
source: "",
|
|
345
|
+
},
|
|
346
|
+
},
|
|
347
|
+
);
|
|
324
348
|
console.info(
|
|
325
|
-
`[Microsub]
|
|
349
|
+
`[Microsub] Stripped content from ${result.modifiedCount} old read items (keeping ${MAX_FULL_READ_ITEMS} full)`,
|
|
326
350
|
);
|
|
327
351
|
}
|
|
328
352
|
}
|
|
329
353
|
}
|
|
330
354
|
|
|
331
355
|
/**
|
|
332
|
-
* Cleanup all read items across all channels (startup cleanup)
|
|
356
|
+
* Cleanup all read items across all channels (startup cleanup).
|
|
357
|
+
* Strips content from old read items but preserves dedup skeletons.
|
|
333
358
|
* @param {object} application - Indiekit application
|
|
334
|
-
* @returns {Promise<number>} Total number of items
|
|
359
|
+
* @returns {Promise<number>} Total number of items stripped
|
|
335
360
|
*/
|
|
336
361
|
export async function cleanupAllReadItems(application) {
|
|
337
362
|
const collection = getCollection(application);
|
|
338
363
|
const channelsCollection = application.collections.get("microsub_channels");
|
|
339
364
|
|
|
340
|
-
// Get all channels
|
|
341
365
|
const channels = await channelsCollection.find({}).toArray();
|
|
342
|
-
let
|
|
366
|
+
let totalStripped = 0;
|
|
343
367
|
|
|
344
368
|
for (const channel of channels) {
|
|
345
|
-
// Get unique userIds who have read items in this channel
|
|
346
369
|
const readByUsers = await collection.distinct("readBy", {
|
|
347
370
|
channelId: channel._id,
|
|
348
371
|
readBy: { $exists: true, $ne: [] },
|
|
@@ -354,40 +377,60 @@ export async function cleanupAllReadItems(application) {
|
|
|
354
377
|
const readCount = await collection.countDocuments({
|
|
355
378
|
channelId: channel._id,
|
|
356
379
|
readBy: userId,
|
|
380
|
+
_stripped: { $ne: true },
|
|
357
381
|
});
|
|
358
382
|
|
|
359
|
-
if (readCount >
|
|
360
|
-
const
|
|
383
|
+
if (readCount > MAX_FULL_READ_ITEMS) {
|
|
384
|
+
const itemsToStrip = await collection
|
|
361
385
|
.find({
|
|
362
386
|
channelId: channel._id,
|
|
363
387
|
readBy: userId,
|
|
388
|
+
_stripped: { $ne: true },
|
|
364
389
|
})
|
|
365
390
|
.sort({ published: -1, _id: -1 })
|
|
366
|
-
.skip(
|
|
391
|
+
.skip(MAX_FULL_READ_ITEMS)
|
|
367
392
|
.project({ _id: 1 })
|
|
368
393
|
.toArray();
|
|
369
394
|
|
|
370
|
-
if (
|
|
371
|
-
const
|
|
372
|
-
const
|
|
373
|
-
_id: { $in:
|
|
374
|
-
|
|
375
|
-
|
|
395
|
+
if (itemsToStrip.length > 0) {
|
|
396
|
+
const idsToStrip = itemsToStrip.map((item) => item._id);
|
|
397
|
+
const result = await collection.updateMany(
|
|
398
|
+
{ _id: { $in: idsToStrip } },
|
|
399
|
+
{
|
|
400
|
+
$set: { _stripped: true },
|
|
401
|
+
$unset: {
|
|
402
|
+
name: "",
|
|
403
|
+
content: "",
|
|
404
|
+
summary: "",
|
|
405
|
+
author: "",
|
|
406
|
+
category: "",
|
|
407
|
+
photo: "",
|
|
408
|
+
video: "",
|
|
409
|
+
audio: "",
|
|
410
|
+
likeOf: "",
|
|
411
|
+
repostOf: "",
|
|
412
|
+
bookmarkOf: "",
|
|
413
|
+
inReplyTo: "",
|
|
414
|
+
source: "",
|
|
415
|
+
},
|
|
416
|
+
},
|
|
417
|
+
);
|
|
418
|
+
totalStripped += result.modifiedCount;
|
|
376
419
|
console.info(
|
|
377
|
-
`[Microsub] Startup cleanup:
|
|
420
|
+
`[Microsub] Startup cleanup: stripped ${result.modifiedCount} old items from channel "${channel.name}"`,
|
|
378
421
|
);
|
|
379
422
|
}
|
|
380
423
|
}
|
|
381
424
|
}
|
|
382
425
|
}
|
|
383
426
|
|
|
384
|
-
if (
|
|
427
|
+
if (totalStripped > 0) {
|
|
385
428
|
console.info(
|
|
386
|
-
`[Microsub] Startup cleanup complete: ${
|
|
429
|
+
`[Microsub] Startup cleanup complete: ${totalStripped} total items stripped`,
|
|
387
430
|
);
|
|
388
431
|
}
|
|
389
432
|
|
|
390
|
-
return
|
|
433
|
+
return totalStripped;
|
|
391
434
|
}
|
|
392
435
|
|
|
393
436
|
export async function markItemsRead(application, channelId, entryIds, userId) {
|
|
@@ -446,9 +489,6 @@ export async function markItemsRead(application, channelId, entryIds, userId) {
|
|
|
446
489
|
`[Microsub] markItemsRead result: ${result.modifiedCount} items updated`,
|
|
447
490
|
);
|
|
448
491
|
|
|
449
|
-
// Cleanup old read items, keeping only the most recent
|
|
450
|
-
await cleanupOldReadItems(collection, channelObjectId, userId);
|
|
451
|
-
|
|
452
492
|
return result.modifiedCount;
|
|
453
493
|
}
|
|
454
494
|
|
|
@@ -577,7 +617,7 @@ export async function getUnreadCount(application, channelId, userId) {
|
|
|
577
617
|
const objectId =
|
|
578
618
|
typeof channelId === "string" ? new ObjectId(channelId) : channelId;
|
|
579
619
|
|
|
580
|
-
// Only count items from the last UNREAD_RETENTION_DAYS
|
|
620
|
+
// Only count items from the last UNREAD_RETENTION_DAYS, exclude stripped skeletons
|
|
581
621
|
const cutoffDate = new Date();
|
|
582
622
|
cutoffDate.setDate(cutoffDate.getDate() - UNREAD_RETENTION_DAYS);
|
|
583
623
|
|
|
@@ -585,6 +625,7 @@ export async function getUnreadCount(application, channelId, userId) {
|
|
|
585
625
|
channelId: objectId,
|
|
586
626
|
readBy: { $ne: userId },
|
|
587
627
|
published: { $gte: cutoffDate },
|
|
628
|
+
_stripped: { $ne: true },
|
|
588
629
|
});
|
|
589
630
|
}
|
|
590
631
|
|
|
@@ -602,7 +643,11 @@ export async function searchItems(application, channelId, query, limit = 20) {
|
|
|
602
643
|
typeof channelId === "string" ? new ObjectId(channelId) : channelId;
|
|
603
644
|
|
|
604
645
|
// Use regex search (consider adding text index for better performance)
|
|
605
|
-
const
|
|
646
|
+
const escapedQuery = query.replaceAll(
|
|
647
|
+
/[$()*+.?[\\\]^{|}]/g,
|
|
648
|
+
String.raw`\$&`,
|
|
649
|
+
);
|
|
650
|
+
const regex = new RegExp(escapedQuery, "i");
|
|
606
651
|
const items = await collection
|
|
607
652
|
.find({
|
|
608
653
|
channelId: objectId,
|
|
@@ -4,6 +4,29 @@
|
|
|
4
4
|
*/
|
|
5
5
|
|
|
6
6
|
import { mf2 } from "microformats-parser";
|
|
7
|
+
import sanitizeHtml from "sanitize-html";
|
|
8
|
+
|
|
9
|
+
/**
|
|
10
|
+
* Sanitize HTML options (matches normalizer.js)
|
|
11
|
+
*/
|
|
12
|
+
const SANITIZE_OPTIONS = {
|
|
13
|
+
allowedTags: [
|
|
14
|
+
"a", "abbr", "b", "blockquote", "br", "code", "em", "figcaption",
|
|
15
|
+
"figure", "h1", "h2", "h3", "h4", "h5", "h6", "hr", "i", "img",
|
|
16
|
+
"li", "ol", "p", "pre", "s", "span", "strike", "strong", "sub",
|
|
17
|
+
"sup", "table", "tbody", "td", "th", "thead", "tr", "u", "ul",
|
|
18
|
+
"video", "audio", "source",
|
|
19
|
+
],
|
|
20
|
+
allowedAttributes: {
|
|
21
|
+
a: ["href", "title", "rel"],
|
|
22
|
+
img: ["src", "alt", "title", "width", "height"],
|
|
23
|
+
video: ["src", "poster", "controls", "width", "height"],
|
|
24
|
+
audio: ["src", "controls"],
|
|
25
|
+
source: ["src", "type"],
|
|
26
|
+
"*": ["class"],
|
|
27
|
+
},
|
|
28
|
+
allowedSchemes: ["http", "https", "mailto"],
|
|
29
|
+
};
|
|
7
30
|
|
|
8
31
|
/**
|
|
9
32
|
* Verify a webmention
|
|
@@ -276,7 +299,7 @@ function extractContent(entry) {
|
|
|
276
299
|
|
|
277
300
|
return {
|
|
278
301
|
text: content.value,
|
|
279
|
-
html: content.html,
|
|
302
|
+
html: content.html ? sanitizeHtml(content.html, SANITIZE_OPTIONS) : undefined,
|
|
280
303
|
};
|
|
281
304
|
}
|
|
282
305
|
|
package/package.json
CHANGED