@rmdes/indiekit-endpoint-microsub 1.0.56 → 1.0.58
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/assets/reader.js +408 -0
- package/index.js +37 -36
- package/lib/cache/redis.js +12 -3
- package/lib/controllers/reader/actor.js +142 -0
- package/lib/controllers/reader/channel.js +301 -0
- package/lib/controllers/reader/compose.js +242 -0
- package/lib/controllers/reader/deck.js +129 -0
- package/lib/controllers/reader/feed-repair.js +117 -0
- package/lib/controllers/reader/feed.js +246 -0
- package/lib/controllers/reader/index.js +126 -0
- package/lib/controllers/reader/search.js +157 -0
- package/lib/controllers/reader/timeline.js +251 -0
- package/lib/controllers/timeline.js +4 -2
- package/lib/feeds/atom.js +1 -1
- package/lib/feeds/fetcher.js +1 -30
- package/lib/feeds/hfeed.js +1 -1
- package/lib/feeds/jsonfeed.js +1 -1
- package/lib/feeds/normalizer-hfeed.js +209 -0
- package/lib/feeds/normalizer-jsonfeed.js +171 -0
- package/lib/feeds/normalizer-rss.js +178 -0
- package/lib/feeds/normalizer.js +20 -560
- package/lib/feeds/rss.js +1 -1
- package/lib/polling/processor.js +3 -17
- package/lib/storage/items-read-state.js +287 -0
- package/lib/storage/items-retention.js +174 -0
- package/lib/storage/items-search.js +34 -0
- package/lib/storage/items.js +99 -590
- package/lib/storage/read-state.js +1 -1
- package/lib/utils/async-handler.js +7 -0
- package/lib/utils/html.js +25 -0
- package/lib/utils/source-type.js +28 -0
- package/lib/webmention/processor.js +1 -1
- package/locales/de.json +3 -0
- package/locales/en.json +2 -0
- package/locales/es-419.json +3 -0
- package/locales/es.json +3 -0
- package/locales/fr.json +3 -0
- package/locales/hi.json +3 -0
- package/locales/id.json +3 -0
- package/locales/it.json +3 -0
- package/locales/nl.json +3 -0
- package/locales/pl.json +3 -0
- package/locales/pt-BR.json +3 -0
- package/locales/pt.json +3 -0
- package/locales/sr.json +3 -0
- package/locales/sv.json +3 -0
- package/locales/zh-Hans-CN.json +3 -0
- package/package.json +1 -1
- package/views/channel.njk +1 -348
- package/views/timeline.njk +3 -274
- package/lib/controllers/reader.js +0 -1562
package/lib/feeds/normalizer.js
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
/**
|
|
2
|
-
* Feed normalizer
|
|
2
|
+
* Feed normalizer — shared helpers
|
|
3
3
|
* @module feeds/normalizer
|
|
4
4
|
*/
|
|
5
5
|
|
|
@@ -8,27 +8,21 @@ import crypto from "node:crypto";
|
|
|
8
8
|
import sanitizeHtml from "sanitize-html";
|
|
9
9
|
|
|
10
10
|
import { SANITIZE_OPTIONS } from "../utils/sanitize.js";
|
|
11
|
+
import { extractImagesFromHtml } from "../utils/html.js";
|
|
12
|
+
|
|
13
|
+
// Re-export for use by format-specific normalizers
|
|
14
|
+
export { SANITIZE_OPTIONS, sanitizeHtml, extractImagesFromHtml };
|
|
11
15
|
|
|
12
16
|
/**
|
|
13
|
-
*
|
|
14
|
-
*
|
|
15
|
-
* @param {string}
|
|
16
|
-
* @returns {string
|
|
17
|
+
* Generate unique ID for an item
|
|
18
|
+
* @param {string} feedUrl - Feed URL
|
|
19
|
+
* @param {string} itemId - Item identifier (URL or ID)
|
|
20
|
+
* @returns {string} Unique ID hash
|
|
17
21
|
*/
|
|
18
|
-
function
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
const urls = [];
|
|
23
|
-
const imgRegex = /<img[^>]+src=["']([^"']+)["'][^>]*>/gi;
|
|
24
|
-
let match;
|
|
25
|
-
while ((match = imgRegex.exec(html)) !== null) {
|
|
26
|
-
const src = match[1];
|
|
27
|
-
if (src && !urls.includes(src)) {
|
|
28
|
-
urls.push(src);
|
|
29
|
-
}
|
|
30
|
-
}
|
|
31
|
-
return urls;
|
|
22
|
+
export function generateItemUid(feedUrl, itemId) {
|
|
23
|
+
const hash = crypto.createHash("sha256");
|
|
24
|
+
hash.update(`${feedUrl}::${itemId}`);
|
|
25
|
+
return hash.digest("hex").slice(0, 24);
|
|
32
26
|
}
|
|
33
27
|
|
|
34
28
|
/**
|
|
@@ -36,7 +30,7 @@ function extractImagesFromHtml(html) {
|
|
|
36
30
|
* @param {string|Date} dateInput - Date string or Date object
|
|
37
31
|
* @returns {Date|undefined} Parsed Date or undefined if invalid
|
|
38
32
|
*/
|
|
39
|
-
function parseDate(dateInput) {
|
|
33
|
+
export function parseDate(dateInput) {
|
|
40
34
|
if (!dateInput) {
|
|
41
35
|
return;
|
|
42
36
|
}
|
|
@@ -86,527 +80,17 @@ function parseDate(dateInput) {
|
|
|
86
80
|
* @param {string|Date} dateInput - Date input
|
|
87
81
|
* @returns {string|undefined} ISO string or undefined
|
|
88
82
|
*/
|
|
89
|
-
function toISOStringSafe(dateInput) {
|
|
83
|
+
export function toISOStringSafe(dateInput) {
|
|
90
84
|
const date = parseDate(dateInput);
|
|
91
85
|
return date ? date.toISOString() : undefined;
|
|
92
86
|
}
|
|
93
87
|
|
|
94
|
-
// SANITIZE_OPTIONS imported from ../utils/sanitize.js (shared with AP outbox fetcher)
|
|
95
|
-
|
|
96
|
-
/**
|
|
97
|
-
* Generate unique ID for an item
|
|
98
|
-
* @param {string} feedUrl - Feed URL
|
|
99
|
-
* @param {string} itemId - Item identifier (URL or ID)
|
|
100
|
-
* @returns {string} Unique ID hash
|
|
101
|
-
*/
|
|
102
|
-
export function generateItemUid(feedUrl, itemId) {
|
|
103
|
-
const hash = crypto.createHash("sha256");
|
|
104
|
-
hash.update(`${feedUrl}::${itemId}`);
|
|
105
|
-
return hash.digest("hex").slice(0, 24);
|
|
106
|
-
}
|
|
107
|
-
|
|
108
|
-
/**
|
|
109
|
-
* Normalize RSS/Atom item from feedparser
|
|
110
|
-
* @param {object} item - Feedparser item
|
|
111
|
-
* @param {string} feedUrl - Feed URL
|
|
112
|
-
* @param {string} feedType - 'rss' or 'atom'
|
|
113
|
-
* @returns {object} Normalized jf2 item
|
|
114
|
-
*/
|
|
115
|
-
export function normalizeItem(item, feedUrl, feedType) {
|
|
116
|
-
const url = item.link || item.origlink || item.guid;
|
|
117
|
-
const uid = generateItemUid(feedUrl, item.guid || url || item.title);
|
|
118
|
-
|
|
119
|
-
const normalized = {
|
|
120
|
-
type: "entry",
|
|
121
|
-
uid,
|
|
122
|
-
url,
|
|
123
|
-
name: item.title
|
|
124
|
-
? sanitizeHtml(item.title, { allowedTags: [] }).trim()
|
|
125
|
-
: undefined,
|
|
126
|
-
published: toISOStringSafe(item.pubdate),
|
|
127
|
-
updated: toISOStringSafe(item.date),
|
|
128
|
-
_source: {
|
|
129
|
-
url: feedUrl,
|
|
130
|
-
feedUrl,
|
|
131
|
-
feedType,
|
|
132
|
-
originalId: item.guid,
|
|
133
|
-
},
|
|
134
|
-
};
|
|
135
|
-
|
|
136
|
-
// Content
|
|
137
|
-
if (item.description || item.summary) {
|
|
138
|
-
const html = item.description || item.summary;
|
|
139
|
-
normalized.content = {
|
|
140
|
-
html: sanitizeHtml(html, SANITIZE_OPTIONS),
|
|
141
|
-
text: sanitizeHtml(html, { allowedTags: [] }).trim(),
|
|
142
|
-
};
|
|
143
|
-
}
|
|
144
|
-
|
|
145
|
-
// Summary (prefer explicit summary over truncated content)
|
|
146
|
-
if (item.summary && item.description && item.summary !== item.description) {
|
|
147
|
-
normalized.summary = sanitizeHtml(item.summary, { allowedTags: [] }).trim();
|
|
148
|
-
}
|
|
149
|
-
|
|
150
|
-
// Author
|
|
151
|
-
if (item.author || item["dc:creator"]) {
|
|
152
|
-
const authorName = item.author || item["dc:creator"];
|
|
153
|
-
normalized.author = {
|
|
154
|
-
type: "card",
|
|
155
|
-
name: authorName,
|
|
156
|
-
};
|
|
157
|
-
}
|
|
158
|
-
|
|
159
|
-
// Categories/tags
|
|
160
|
-
if (item.categories && item.categories.length > 0) {
|
|
161
|
-
normalized.category = item.categories;
|
|
162
|
-
}
|
|
163
|
-
|
|
164
|
-
// Enclosures (media)
|
|
165
|
-
if (item.enclosures && item.enclosures.length > 0) {
|
|
166
|
-
for (const enclosure of item.enclosures) {
|
|
167
|
-
const mediaUrl = enclosure.url;
|
|
168
|
-
const mediaType = enclosure.type || "";
|
|
169
|
-
|
|
170
|
-
if (mediaType.startsWith("image/")) {
|
|
171
|
-
normalized.photo = normalized.photo || [];
|
|
172
|
-
normalized.photo.push(mediaUrl);
|
|
173
|
-
} else if (mediaType.startsWith("video/")) {
|
|
174
|
-
normalized.video = normalized.video || [];
|
|
175
|
-
normalized.video.push(mediaUrl);
|
|
176
|
-
} else if (mediaType.startsWith("audio/")) {
|
|
177
|
-
normalized.audio = normalized.audio || [];
|
|
178
|
-
normalized.audio.push(mediaUrl);
|
|
179
|
-
}
|
|
180
|
-
}
|
|
181
|
-
}
|
|
182
|
-
|
|
183
|
-
// Featured image from media content
|
|
184
|
-
if (item["media:content"] && item["media:content"].url) {
|
|
185
|
-
const mediaType = item["media:content"].type || "";
|
|
186
|
-
if (
|
|
187
|
-
mediaType.startsWith("image/") ||
|
|
188
|
-
item["media:content"].medium === "image"
|
|
189
|
-
) {
|
|
190
|
-
normalized.photo = normalized.photo || [];
|
|
191
|
-
if (!normalized.photo.includes(item["media:content"].url)) {
|
|
192
|
-
normalized.photo.push(item["media:content"].url);
|
|
193
|
-
}
|
|
194
|
-
}
|
|
195
|
-
}
|
|
196
|
-
|
|
197
|
-
// Image from item.image
|
|
198
|
-
if (item.image && item.image.url) {
|
|
199
|
-
normalized.photo = normalized.photo || [];
|
|
200
|
-
if (!normalized.photo.includes(item.image.url)) {
|
|
201
|
-
normalized.photo.push(item.image.url);
|
|
202
|
-
}
|
|
203
|
-
}
|
|
204
|
-
|
|
205
|
-
// Extract images from HTML content as fallback
|
|
206
|
-
if (!normalized.photo && normalized.content?.html) {
|
|
207
|
-
const extracted = extractImagesFromHtml(normalized.content.html);
|
|
208
|
-
if (extracted.length > 0) {
|
|
209
|
-
normalized.photo = extracted;
|
|
210
|
-
}
|
|
211
|
-
}
|
|
212
|
-
|
|
213
|
-
return normalized;
|
|
214
|
-
}
|
|
215
|
-
|
|
216
|
-
/**
|
|
217
|
-
* Normalize feed metadata from feedparser
|
|
218
|
-
* @param {object} meta - Feedparser meta object
|
|
219
|
-
* @param {string} feedUrl - Feed URL
|
|
220
|
-
* @returns {object} Normalized feed metadata
|
|
221
|
-
*/
|
|
222
|
-
export function normalizeFeedMeta(meta, feedUrl) {
|
|
223
|
-
const normalized = {
|
|
224
|
-
name: meta.title
|
|
225
|
-
? sanitizeHtml(meta.title, { allowedTags: [] }).trim()
|
|
226
|
-
: feedUrl,
|
|
227
|
-
};
|
|
228
|
-
|
|
229
|
-
if (meta.description) {
|
|
230
|
-
normalized.summary = meta.description;
|
|
231
|
-
}
|
|
232
|
-
|
|
233
|
-
if (meta.link) {
|
|
234
|
-
normalized.url = meta.link;
|
|
235
|
-
}
|
|
236
|
-
|
|
237
|
-
if (meta.image && meta.image.url) {
|
|
238
|
-
normalized.photo = meta.image.url;
|
|
239
|
-
}
|
|
240
|
-
|
|
241
|
-
if (meta.favicon) {
|
|
242
|
-
normalized.photo = normalized.photo || meta.favicon;
|
|
243
|
-
}
|
|
244
|
-
|
|
245
|
-
// Author/publisher
|
|
246
|
-
if (meta.author) {
|
|
247
|
-
normalized.author = {
|
|
248
|
-
type: "card",
|
|
249
|
-
name: meta.author,
|
|
250
|
-
};
|
|
251
|
-
}
|
|
252
|
-
|
|
253
|
-
// Hub for WebSub
|
|
254
|
-
if (meta.cloud && meta.cloud.href) {
|
|
255
|
-
normalized._hub = meta.cloud.href;
|
|
256
|
-
}
|
|
257
|
-
|
|
258
|
-
// Look for hub in links
|
|
259
|
-
if (meta.link && meta["atom:link"]) {
|
|
260
|
-
const links = Array.isArray(meta["atom:link"])
|
|
261
|
-
? meta["atom:link"]
|
|
262
|
-
: [meta["atom:link"]];
|
|
263
|
-
for (const link of links) {
|
|
264
|
-
if (link["@"] && link["@"].rel === "hub") {
|
|
265
|
-
normalized._hub = link["@"].href;
|
|
266
|
-
break;
|
|
267
|
-
}
|
|
268
|
-
}
|
|
269
|
-
}
|
|
270
|
-
|
|
271
|
-
return normalized;
|
|
272
|
-
}
|
|
273
|
-
|
|
274
|
-
/**
|
|
275
|
-
* Normalize JSON Feed item
|
|
276
|
-
* @param {object} item - JSON Feed item
|
|
277
|
-
* @param {string} feedUrl - Feed URL
|
|
278
|
-
* @returns {object} Normalized jf2 item
|
|
279
|
-
*/
|
|
280
|
-
export function normalizeJsonFeedItem(item, feedUrl) {
|
|
281
|
-
const url = item.url || item.external_url;
|
|
282
|
-
const uid = generateItemUid(feedUrl, item.id || url);
|
|
283
|
-
|
|
284
|
-
const normalized = {
|
|
285
|
-
type: "entry",
|
|
286
|
-
uid,
|
|
287
|
-
url,
|
|
288
|
-
name: item.title
|
|
289
|
-
? sanitizeHtml(item.title, { allowedTags: [] }).trim()
|
|
290
|
-
: undefined,
|
|
291
|
-
published: item.date_published
|
|
292
|
-
? new Date(item.date_published).toISOString()
|
|
293
|
-
: undefined,
|
|
294
|
-
updated: item.date_modified
|
|
295
|
-
? new Date(item.date_modified).toISOString()
|
|
296
|
-
: undefined,
|
|
297
|
-
_source: {
|
|
298
|
-
url: feedUrl,
|
|
299
|
-
feedUrl,
|
|
300
|
-
feedType: "jsonfeed",
|
|
301
|
-
originalId: item.id,
|
|
302
|
-
},
|
|
303
|
-
};
|
|
304
|
-
|
|
305
|
-
// Content
|
|
306
|
-
if (item.content_html || item.content_text) {
|
|
307
|
-
normalized.content = {};
|
|
308
|
-
if (item.content_html) {
|
|
309
|
-
normalized.content.html = sanitizeHtml(
|
|
310
|
-
item.content_html,
|
|
311
|
-
SANITIZE_OPTIONS,
|
|
312
|
-
);
|
|
313
|
-
normalized.content.text = sanitizeHtml(item.content_html, {
|
|
314
|
-
allowedTags: [],
|
|
315
|
-
}).trim();
|
|
316
|
-
} else if (item.content_text) {
|
|
317
|
-
normalized.content.text = item.content_text;
|
|
318
|
-
}
|
|
319
|
-
}
|
|
320
|
-
|
|
321
|
-
// Summary
|
|
322
|
-
if (item.summary) {
|
|
323
|
-
normalized.summary = item.summary;
|
|
324
|
-
}
|
|
325
|
-
|
|
326
|
-
// Author
|
|
327
|
-
if (item.author || item.authors) {
|
|
328
|
-
const author = item.author || (item.authors && item.authors[0]);
|
|
329
|
-
if (author) {
|
|
330
|
-
normalized.author = {
|
|
331
|
-
type: "card",
|
|
332
|
-
name: author.name,
|
|
333
|
-
url: author.url,
|
|
334
|
-
photo: author.avatar,
|
|
335
|
-
};
|
|
336
|
-
}
|
|
337
|
-
}
|
|
338
|
-
|
|
339
|
-
// Tags
|
|
340
|
-
if (item.tags && item.tags.length > 0) {
|
|
341
|
-
normalized.category = item.tags;
|
|
342
|
-
}
|
|
343
|
-
|
|
344
|
-
// Featured image
|
|
345
|
-
if (item.image) {
|
|
346
|
-
normalized.photo = [item.image];
|
|
347
|
-
}
|
|
348
|
-
|
|
349
|
-
if (item.banner_image && !normalized.photo) {
|
|
350
|
-
normalized.photo = [item.banner_image];
|
|
351
|
-
}
|
|
352
|
-
|
|
353
|
-
// Attachments
|
|
354
|
-
if (item.attachments && item.attachments.length > 0) {
|
|
355
|
-
for (const attachment of item.attachments) {
|
|
356
|
-
const mediaType = attachment.mime_type || "";
|
|
357
|
-
|
|
358
|
-
if (mediaType.startsWith("image/")) {
|
|
359
|
-
normalized.photo = normalized.photo || [];
|
|
360
|
-
normalized.photo.push(attachment.url);
|
|
361
|
-
} else if (mediaType.startsWith("video/")) {
|
|
362
|
-
normalized.video = normalized.video || [];
|
|
363
|
-
normalized.video.push(attachment.url);
|
|
364
|
-
} else if (mediaType.startsWith("audio/")) {
|
|
365
|
-
normalized.audio = normalized.audio || [];
|
|
366
|
-
normalized.audio.push(attachment.url);
|
|
367
|
-
}
|
|
368
|
-
}
|
|
369
|
-
}
|
|
370
|
-
|
|
371
|
-
// External URL
|
|
372
|
-
if (item.external_url && item.url !== item.external_url) {
|
|
373
|
-
normalized["bookmark-of"] = [item.external_url];
|
|
374
|
-
}
|
|
375
|
-
|
|
376
|
-
// Extract images from HTML content as fallback
|
|
377
|
-
if (!normalized.photo && normalized.content?.html) {
|
|
378
|
-
const extracted = extractImagesFromHtml(normalized.content.html);
|
|
379
|
-
if (extracted.length > 0) {
|
|
380
|
-
normalized.photo = extracted;
|
|
381
|
-
}
|
|
382
|
-
}
|
|
383
|
-
|
|
384
|
-
return normalized;
|
|
385
|
-
}
|
|
386
|
-
|
|
387
|
-
/**
|
|
388
|
-
* Normalize JSON Feed metadata
|
|
389
|
-
* @param {object} feed - JSON Feed object
|
|
390
|
-
* @param {string} feedUrl - Feed URL
|
|
391
|
-
* @returns {object} Normalized feed metadata
|
|
392
|
-
*/
|
|
393
|
-
export function normalizeJsonFeedMeta(feed, feedUrl) {
|
|
394
|
-
const normalized = {
|
|
395
|
-
name: feed.title
|
|
396
|
-
? sanitizeHtml(feed.title, { allowedTags: [] }).trim()
|
|
397
|
-
: feedUrl,
|
|
398
|
-
};
|
|
399
|
-
|
|
400
|
-
if (feed.description) {
|
|
401
|
-
normalized.summary = feed.description;
|
|
402
|
-
}
|
|
403
|
-
|
|
404
|
-
if (feed.home_page_url) {
|
|
405
|
-
normalized.url = feed.home_page_url;
|
|
406
|
-
}
|
|
407
|
-
|
|
408
|
-
if (feed.icon) {
|
|
409
|
-
normalized.photo = feed.icon;
|
|
410
|
-
} else if (feed.favicon) {
|
|
411
|
-
normalized.photo = feed.favicon;
|
|
412
|
-
}
|
|
413
|
-
|
|
414
|
-
if (feed.author || feed.authors) {
|
|
415
|
-
const author = feed.author || (feed.authors && feed.authors[0]);
|
|
416
|
-
if (author) {
|
|
417
|
-
normalized.author = {
|
|
418
|
-
type: "card",
|
|
419
|
-
name: author.name,
|
|
420
|
-
url: author.url,
|
|
421
|
-
photo: author.avatar,
|
|
422
|
-
};
|
|
423
|
-
}
|
|
424
|
-
}
|
|
425
|
-
|
|
426
|
-
// Hub for WebSub
|
|
427
|
-
if (feed.hubs && feed.hubs.length > 0) {
|
|
428
|
-
normalized._hub = feed.hubs[0].url;
|
|
429
|
-
}
|
|
430
|
-
|
|
431
|
-
return normalized;
|
|
432
|
-
}
|
|
433
|
-
|
|
434
|
-
/**
|
|
435
|
-
* Normalize h-feed entry
|
|
436
|
-
* @param {object} entry - Microformats h-entry
|
|
437
|
-
* @param {string} feedUrl - Feed URL
|
|
438
|
-
* @returns {object} Normalized jf2 item
|
|
439
|
-
*/
|
|
440
|
-
export function normalizeHfeedItem(entry, feedUrl) {
|
|
441
|
-
const properties = entry.properties || {};
|
|
442
|
-
const url = getFirst(properties.url) || getFirst(properties.uid);
|
|
443
|
-
const uid = generateItemUid(feedUrl, getFirst(properties.uid) || url);
|
|
444
|
-
|
|
445
|
-
const normalized = {
|
|
446
|
-
type: "entry",
|
|
447
|
-
uid,
|
|
448
|
-
url,
|
|
449
|
-
_source: {
|
|
450
|
-
url: feedUrl,
|
|
451
|
-
feedUrl,
|
|
452
|
-
feedType: "hfeed",
|
|
453
|
-
originalId: getFirst(properties.uid),
|
|
454
|
-
},
|
|
455
|
-
};
|
|
456
|
-
|
|
457
|
-
// Name/title
|
|
458
|
-
if (properties.name) {
|
|
459
|
-
const name = getFirst(properties.name);
|
|
460
|
-
// Only include name if it's not just the content
|
|
461
|
-
if (
|
|
462
|
-
name &&
|
|
463
|
-
(!properties.content || name !== getContentText(properties.content))
|
|
464
|
-
) {
|
|
465
|
-
normalized.name = name;
|
|
466
|
-
}
|
|
467
|
-
}
|
|
468
|
-
|
|
469
|
-
// Published
|
|
470
|
-
if (properties.published) {
|
|
471
|
-
const published = getFirst(properties.published);
|
|
472
|
-
normalized.published = new Date(published).toISOString();
|
|
473
|
-
}
|
|
474
|
-
|
|
475
|
-
// Updated
|
|
476
|
-
if (properties.updated) {
|
|
477
|
-
const updated = getFirst(properties.updated);
|
|
478
|
-
normalized.updated = new Date(updated).toISOString();
|
|
479
|
-
}
|
|
480
|
-
|
|
481
|
-
// Content
|
|
482
|
-
if (properties.content) {
|
|
483
|
-
const content = getFirst(properties.content);
|
|
484
|
-
if (typeof content === "object") {
|
|
485
|
-
normalized.content = {
|
|
486
|
-
html: content.html
|
|
487
|
-
? sanitizeHtml(content.html, SANITIZE_OPTIONS)
|
|
488
|
-
: undefined,
|
|
489
|
-
text: content.value || undefined,
|
|
490
|
-
};
|
|
491
|
-
} else if (typeof content === "string") {
|
|
492
|
-
normalized.content = { text: content };
|
|
493
|
-
}
|
|
494
|
-
}
|
|
495
|
-
|
|
496
|
-
// Summary
|
|
497
|
-
if (properties.summary) {
|
|
498
|
-
normalized.summary = getFirst(properties.summary);
|
|
499
|
-
}
|
|
500
|
-
|
|
501
|
-
// Author
|
|
502
|
-
if (properties.author) {
|
|
503
|
-
const author = getFirst(properties.author);
|
|
504
|
-
normalized.author = normalizeHcard(author);
|
|
505
|
-
}
|
|
506
|
-
|
|
507
|
-
// Categories
|
|
508
|
-
if (properties.category) {
|
|
509
|
-
normalized.category = properties.category;
|
|
510
|
-
}
|
|
511
|
-
|
|
512
|
-
// Photos
|
|
513
|
-
if (properties.photo) {
|
|
514
|
-
normalized.photo = properties.photo.map((p) =>
|
|
515
|
-
typeof p === "object" ? p.value || p.url : p,
|
|
516
|
-
);
|
|
517
|
-
}
|
|
518
|
-
|
|
519
|
-
// Videos
|
|
520
|
-
if (properties.video) {
|
|
521
|
-
normalized.video = properties.video.map((v) =>
|
|
522
|
-
typeof v === "object" ? v.value || v.url : v,
|
|
523
|
-
);
|
|
524
|
-
}
|
|
525
|
-
|
|
526
|
-
// Audio
|
|
527
|
-
if (properties.audio) {
|
|
528
|
-
normalized.audio = properties.audio.map((a) =>
|
|
529
|
-
typeof a === "object" ? a.value || a.url : a,
|
|
530
|
-
);
|
|
531
|
-
}
|
|
532
|
-
|
|
533
|
-
// Interaction types - normalize to string URLs
|
|
534
|
-
if (properties["like-of"]) {
|
|
535
|
-
normalized["like-of"] = normalizeUrlArray(properties["like-of"]);
|
|
536
|
-
}
|
|
537
|
-
if (properties["repost-of"]) {
|
|
538
|
-
normalized["repost-of"] = normalizeUrlArray(properties["repost-of"]);
|
|
539
|
-
}
|
|
540
|
-
if (properties["bookmark-of"]) {
|
|
541
|
-
normalized["bookmark-of"] = normalizeUrlArray(properties["bookmark-of"]);
|
|
542
|
-
}
|
|
543
|
-
if (properties["in-reply-to"]) {
|
|
544
|
-
normalized["in-reply-to"] = normalizeUrlArray(properties["in-reply-to"]);
|
|
545
|
-
}
|
|
546
|
-
|
|
547
|
-
// RSVP
|
|
548
|
-
if (properties.rsvp) {
|
|
549
|
-
normalized.rsvp = getFirst(properties.rsvp);
|
|
550
|
-
}
|
|
551
|
-
|
|
552
|
-
// Syndication
|
|
553
|
-
if (properties.syndication) {
|
|
554
|
-
normalized.syndication = properties.syndication;
|
|
555
|
-
}
|
|
556
|
-
|
|
557
|
-
// Extract images from HTML content as fallback
|
|
558
|
-
if (!normalized.photo && normalized.content?.html) {
|
|
559
|
-
const extracted = extractImagesFromHtml(normalized.content.html);
|
|
560
|
-
if (extracted.length > 0) {
|
|
561
|
-
normalized.photo = extracted;
|
|
562
|
-
}
|
|
563
|
-
}
|
|
564
|
-
|
|
565
|
-
return normalized;
|
|
566
|
-
}
|
|
567
|
-
|
|
568
|
-
/**
|
|
569
|
-
* Normalize h-feed metadata
|
|
570
|
-
* @param {object} hfeed - h-feed microformat object
|
|
571
|
-
* @param {string} feedUrl - Feed URL
|
|
572
|
-
* @returns {object} Normalized feed metadata
|
|
573
|
-
*/
|
|
574
|
-
export function normalizeHfeedMeta(hfeed, feedUrl) {
|
|
575
|
-
const properties = hfeed.properties || {};
|
|
576
|
-
|
|
577
|
-
const normalized = {
|
|
578
|
-
name: getFirst(properties.name) || feedUrl,
|
|
579
|
-
};
|
|
580
|
-
|
|
581
|
-
if (properties.summary) {
|
|
582
|
-
normalized.summary = getFirst(properties.summary);
|
|
583
|
-
}
|
|
584
|
-
|
|
585
|
-
if (properties.url) {
|
|
586
|
-
normalized.url = getFirst(properties.url);
|
|
587
|
-
}
|
|
588
|
-
|
|
589
|
-
if (properties.photo) {
|
|
590
|
-
normalized.photo = getFirst(properties.photo);
|
|
591
|
-
if (typeof normalized.photo === "object") {
|
|
592
|
-
normalized.photo = normalized.photo.value || normalized.photo.url;
|
|
593
|
-
}
|
|
594
|
-
}
|
|
595
|
-
|
|
596
|
-
if (properties.author) {
|
|
597
|
-
const author = getFirst(properties.author);
|
|
598
|
-
normalized.author = normalizeHcard(author);
|
|
599
|
-
}
|
|
600
|
-
|
|
601
|
-
return normalized;
|
|
602
|
-
}
|
|
603
|
-
|
|
604
88
|
/**
|
|
605
89
|
* Extract URL string from a photo value
|
|
606
90
|
* @param {object|string} photo - Photo value (can be string URL or object with value/url)
|
|
607
91
|
* @returns {string|undefined} Photo URL string
|
|
608
92
|
*/
|
|
609
|
-
function extractPhotoUrl(photo) {
|
|
93
|
+
export function extractPhotoUrl(photo) {
|
|
610
94
|
if (!photo) {
|
|
611
95
|
return;
|
|
612
96
|
}
|
|
@@ -624,7 +108,7 @@ function extractPhotoUrl(photo) {
|
|
|
624
108
|
* @param {object|string} value - URL string or object with url/value property
|
|
625
109
|
* @returns {string|undefined} URL string
|
|
626
110
|
*/
|
|
627
|
-
function extractUrl(value) {
|
|
111
|
+
export function extractUrl(value) {
|
|
628
112
|
if (!value) {
|
|
629
113
|
return;
|
|
630
114
|
}
|
|
@@ -642,43 +126,19 @@ function extractUrl(value) {
|
|
|
642
126
|
* @param {Array} urls - Array of URL strings or objects
|
|
643
127
|
* @returns {Array<string>} Array of URL strings
|
|
644
128
|
*/
|
|
645
|
-
function normalizeUrlArray(urls) {
|
|
129
|
+
export function normalizeUrlArray(urls) {
|
|
646
130
|
if (!urls || !Array.isArray(urls)) {
|
|
647
131
|
return [];
|
|
648
132
|
}
|
|
649
133
|
return urls.map((u) => extractUrl(u)).filter(Boolean);
|
|
650
134
|
}
|
|
651
135
|
|
|
652
|
-
/**
|
|
653
|
-
* Normalize h-card author
|
|
654
|
-
* @param {object|string} hcard - h-card or author name string
|
|
655
|
-
* @returns {object} Normalized author object
|
|
656
|
-
*/
|
|
657
|
-
function normalizeHcard(hcard) {
|
|
658
|
-
if (typeof hcard === "string") {
|
|
659
|
-
return { type: "card", name: hcard };
|
|
660
|
-
}
|
|
661
|
-
|
|
662
|
-
if (!hcard || !hcard.properties) {
|
|
663
|
-
return;
|
|
664
|
-
}
|
|
665
|
-
|
|
666
|
-
const properties = hcard.properties;
|
|
667
|
-
|
|
668
|
-
return {
|
|
669
|
-
type: "card",
|
|
670
|
-
name: getFirst(properties.name),
|
|
671
|
-
url: getFirst(properties.url),
|
|
672
|
-
photo: extractPhotoUrl(getFirst(properties.photo)),
|
|
673
|
-
};
|
|
674
|
-
}
|
|
675
|
-
|
|
676
136
|
/**
|
|
677
137
|
* Get first item from array or return the value itself
|
|
678
138
|
* @param {Array|*} value - Value or array of values
|
|
679
139
|
* @returns {*} First value or the value itself
|
|
680
140
|
*/
|
|
681
|
-
function getFirst(value) {
|
|
141
|
+
export function getFirst(value) {
|
|
682
142
|
if (Array.isArray(value)) {
|
|
683
143
|
return value[0];
|
|
684
144
|
}
|
|
@@ -690,7 +150,7 @@ function getFirst(value) {
|
|
|
690
150
|
* @param {Array} content - Content property array
|
|
691
151
|
* @returns {string} Text content
|
|
692
152
|
*/
|
|
693
|
-
function getContentText(content) {
|
|
153
|
+
export function getContentText(content) {
|
|
694
154
|
const first = getFirst(content);
|
|
695
155
|
if (typeof first === "object") {
|
|
696
156
|
return first.value || first.text || "";
|
package/lib/feeds/rss.js
CHANGED
|
@@ -7,7 +7,7 @@ import { Readable } from "node:stream";
|
|
|
7
7
|
|
|
8
8
|
import FeedParser from "feedparser";
|
|
9
9
|
|
|
10
|
-
import { normalizeItem, normalizeFeedMeta } from "./normalizer.js";
|
|
10
|
+
import { normalizeItem, normalizeFeedMeta } from "./normalizer-rss.js";
|
|
11
11
|
|
|
12
12
|
/**
|
|
13
13
|
* Parse RSS feed content
|
package/lib/polling/processor.js
CHANGED
|
@@ -18,6 +18,7 @@ import {
|
|
|
18
18
|
} from "../storage/feeds.js";
|
|
19
19
|
import { passesRegexFilter, passesTypeFilter } from "../storage/filters.js";
|
|
20
20
|
import { addItem } from "../storage/items.js";
|
|
21
|
+
import { classifyUrl } from "../utils/source-type.js";
|
|
21
22
|
import {
|
|
22
23
|
subscribe as websubSubscribe,
|
|
23
24
|
getCallbackUrl,
|
|
@@ -43,7 +44,7 @@ export async function processFeed(application, feed) {
|
|
|
43
44
|
|
|
44
45
|
try {
|
|
45
46
|
// Get Redis client for caching
|
|
46
|
-
const redis = getRedisClient(application);
|
|
47
|
+
const redis = await getRedisClient(application);
|
|
47
48
|
|
|
48
49
|
// Fetch and parse the feed
|
|
49
50
|
const parsed = await fetchAndParseFeed(feed.url, {
|
|
@@ -96,7 +97,7 @@ export async function processFeed(application, feed) {
|
|
|
96
97
|
if (feed.capabilities?.source_type) {
|
|
97
98
|
item._source.source_type = feed.capabilities.source_type;
|
|
98
99
|
} else {
|
|
99
|
-
item._source.source_type =
|
|
100
|
+
item._source.source_type = classifyUrl(feed.url).type;
|
|
100
101
|
}
|
|
101
102
|
|
|
102
103
|
// Store the item
|
|
@@ -242,21 +243,6 @@ export async function processFeed(application, feed) {
|
|
|
242
243
|
return result;
|
|
243
244
|
}
|
|
244
245
|
|
|
245
|
-
/**
|
|
246
|
-
* Infer source type from feed URL when capabilities haven't been detected yet
|
|
247
|
-
* @param {string} url - Feed URL
|
|
248
|
-
* @returns {string} Source type
|
|
249
|
-
*/
|
|
250
|
-
function inferSourceType(url) {
|
|
251
|
-
if (!url) return "web";
|
|
252
|
-
const lower = url.toLowerCase();
|
|
253
|
-
if (lower.includes("bsky.app") || lower.includes("bluesky")) return "bluesky";
|
|
254
|
-
if (lower.includes("mastodon.") || lower.includes("mstdn.") ||
|
|
255
|
-
lower.includes("fosstodon.") || lower.includes("pleroma.") ||
|
|
256
|
-
lower.includes("misskey.") || lower.includes("pixelfed.")) return "mastodon";
|
|
257
|
-
return "web";
|
|
258
|
-
}
|
|
259
|
-
|
|
260
246
|
/**
|
|
261
247
|
* Check if an item passes channel filters
|
|
262
248
|
* @param {object} item - Feed item
|