@rmdes/indiekit-endpoint-microsub 1.0.55 → 1.0.57
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/assets/reader.js +408 -0
- package/index.js +61 -49
- package/lib/activitypub/outbox-fetcher.js +14 -2
- package/lib/cache/redis.js +26 -7
- package/lib/controllers/channels.js +2 -2
- package/lib/controllers/reader/actor.js +142 -0
- package/lib/controllers/reader/channel.js +301 -0
- package/lib/controllers/reader/compose.js +242 -0
- package/lib/controllers/reader/deck.js +129 -0
- package/lib/controllers/reader/feed-repair.js +117 -0
- package/lib/controllers/reader/feed.js +246 -0
- package/lib/controllers/reader/index.js +126 -0
- package/lib/controllers/reader/search.js +157 -0
- package/lib/controllers/reader/timeline.js +250 -0
- package/lib/controllers/search.js +6 -0
- package/lib/controllers/timeline.js +6 -4
- package/lib/feeds/atom.js +1 -1
- package/lib/feeds/capabilities.js +5 -0
- package/lib/feeds/fetcher.js +5 -28
- package/lib/feeds/hfeed.js +1 -1
- package/lib/feeds/jsonfeed.js +1 -1
- package/lib/feeds/normalizer-hfeed.js +209 -0
- package/lib/feeds/normalizer-jsonfeed.js +171 -0
- package/lib/feeds/normalizer-rss.js +178 -0
- package/lib/feeds/normalizer.js +22 -614
- package/lib/feeds/rss.js +1 -1
- package/lib/media/proxy.js +82 -27
- package/lib/polling/processor.js +30 -21
- package/lib/polling/scheduler.js +2 -0
- package/lib/realtime/broker.js +6 -1
- package/lib/storage/channels.js +53 -42
- package/lib/storage/feeds.js +3 -1
- package/lib/storage/items-read-state.js +287 -0
- package/lib/storage/items-retention.js +174 -0
- package/lib/storage/items-search.js +34 -0
- package/lib/storage/items.js +113 -610
- package/lib/storage/read-state.js +1 -1
- package/lib/utils/async-handler.js +7 -0
- package/lib/utils/constants.js +7 -0
- package/lib/utils/csrf.js +51 -0
- package/lib/utils/html.js +25 -0
- package/lib/utils/sanitize.js +61 -0
- package/lib/utils/source-type.js +28 -0
- package/lib/utils/validation.js +8 -2
- package/lib/webmention/processor.js +1 -1
- package/lib/webmention/verifier.js +10 -21
- package/lib/websub/subscriber.js +12 -0
- package/locales/de.json +3 -0
- package/locales/en.json +2 -0
- package/locales/es-419.json +3 -0
- package/locales/es.json +3 -0
- package/locales/fr.json +3 -0
- package/locales/hi.json +3 -0
- package/locales/id.json +3 -0
- package/locales/it.json +3 -0
- package/locales/nl.json +3 -0
- package/locales/pl.json +3 -0
- package/locales/pt-BR.json +3 -0
- package/locales/pt.json +3 -0
- package/locales/sr.json +3 -0
- package/locales/sv.json +3 -0
- package/locales/zh-Hans-CN.json +3 -0
- package/package.json +3 -1
- package/views/actor.njk +2 -0
- package/views/channel-new.njk +1 -0
- package/views/channel.njk +3 -344
- package/views/compose.njk +1 -0
- package/views/deck-settings.njk +1 -0
- package/views/feed-edit.njk +3 -0
- package/views/feeds.njk +4 -0
- package/views/layouts/reader.njk +1 -0
- package/views/search.njk +2 -0
- package/views/settings.njk +2 -0
- package/views/timeline.njk +3 -271
- package/lib/controllers/reader.js +0 -1580
package/lib/feeds/normalizer.js
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
/**
|
|
2
|
-
* Feed normalizer
|
|
2
|
+
* Feed normalizer — shared helpers
|
|
3
3
|
* @module feeds/normalizer
|
|
4
4
|
*/
|
|
5
5
|
|
|
@@ -7,26 +7,22 @@ import crypto from "node:crypto";
|
|
|
7
7
|
|
|
8
8
|
import sanitizeHtml from "sanitize-html";
|
|
9
9
|
|
|
10
|
+
import { SANITIZE_OPTIONS } from "../utils/sanitize.js";
|
|
11
|
+
import { extractImagesFromHtml } from "../utils/html.js";
|
|
12
|
+
|
|
13
|
+
// Re-export for use by format-specific normalizers
|
|
14
|
+
export { SANITIZE_OPTIONS, sanitizeHtml, extractImagesFromHtml };
|
|
15
|
+
|
|
10
16
|
/**
|
|
11
|
-
*
|
|
12
|
-
*
|
|
13
|
-
* @param {string}
|
|
14
|
-
* @returns {string
|
|
17
|
+
* Generate unique ID for an item
|
|
18
|
+
* @param {string} feedUrl - Feed URL
|
|
19
|
+
* @param {string} itemId - Item identifier (URL or ID)
|
|
20
|
+
* @returns {string} Unique ID hash
|
|
15
21
|
*/
|
|
16
|
-
function
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
const urls = [];
|
|
21
|
-
const imgRegex = /<img[^>]+src=["']([^"']+)["'][^>]*>/gi;
|
|
22
|
-
let match;
|
|
23
|
-
while ((match = imgRegex.exec(html)) !== null) {
|
|
24
|
-
const src = match[1];
|
|
25
|
-
if (src && !urls.includes(src)) {
|
|
26
|
-
urls.push(src);
|
|
27
|
-
}
|
|
28
|
-
}
|
|
29
|
-
return urls;
|
|
22
|
+
export function generateItemUid(feedUrl, itemId) {
|
|
23
|
+
const hash = crypto.createHash("sha256");
|
|
24
|
+
hash.update(`${feedUrl}::${itemId}`);
|
|
25
|
+
return hash.digest("hex").slice(0, 24);
|
|
30
26
|
}
|
|
31
27
|
|
|
32
28
|
/**
|
|
@@ -34,7 +30,7 @@ function extractImagesFromHtml(html) {
|
|
|
34
30
|
* @param {string|Date} dateInput - Date string or Date object
|
|
35
31
|
* @returns {Date|undefined} Parsed Date or undefined if invalid
|
|
36
32
|
*/
|
|
37
|
-
function parseDate(dateInput) {
|
|
33
|
+
export function parseDate(dateInput) {
|
|
38
34
|
if (!dateInput) {
|
|
39
35
|
return;
|
|
40
36
|
}
|
|
@@ -84,581 +80,17 @@ function parseDate(dateInput) {
|
|
|
84
80
|
* @param {string|Date} dateInput - Date input
|
|
85
81
|
* @returns {string|undefined} ISO string or undefined
|
|
86
82
|
*/
|
|
87
|
-
function toISOStringSafe(dateInput) {
|
|
83
|
+
export function toISOStringSafe(dateInput) {
|
|
88
84
|
const date = parseDate(dateInput);
|
|
89
85
|
return date ? date.toISOString() : undefined;
|
|
90
86
|
}
|
|
91
87
|
|
|
92
|
-
/**
|
|
93
|
-
* Sanitize HTML options
|
|
94
|
-
*/
|
|
95
|
-
const SANITIZE_OPTIONS = {
|
|
96
|
-
allowedTags: [
|
|
97
|
-
"a",
|
|
98
|
-
"abbr",
|
|
99
|
-
"b",
|
|
100
|
-
"blockquote",
|
|
101
|
-
"br",
|
|
102
|
-
"code",
|
|
103
|
-
"em",
|
|
104
|
-
"figcaption",
|
|
105
|
-
"figure",
|
|
106
|
-
"h1",
|
|
107
|
-
"h2",
|
|
108
|
-
"h3",
|
|
109
|
-
"h4",
|
|
110
|
-
"h5",
|
|
111
|
-
"h6",
|
|
112
|
-
"hr",
|
|
113
|
-
"i",
|
|
114
|
-
"img",
|
|
115
|
-
"li",
|
|
116
|
-
"ol",
|
|
117
|
-
"p",
|
|
118
|
-
"pre",
|
|
119
|
-
"s",
|
|
120
|
-
"span",
|
|
121
|
-
"strike",
|
|
122
|
-
"strong",
|
|
123
|
-
"sub",
|
|
124
|
-
"sup",
|
|
125
|
-
"table",
|
|
126
|
-
"tbody",
|
|
127
|
-
"td",
|
|
128
|
-
"th",
|
|
129
|
-
"thead",
|
|
130
|
-
"tr",
|
|
131
|
-
"u",
|
|
132
|
-
"ul",
|
|
133
|
-
"video",
|
|
134
|
-
"audio",
|
|
135
|
-
"source",
|
|
136
|
-
],
|
|
137
|
-
allowedAttributes: {
|
|
138
|
-
a: ["href", "title", "rel"],
|
|
139
|
-
img: ["src", "alt", "title", "width", "height"],
|
|
140
|
-
video: ["src", "poster", "controls", "width", "height"],
|
|
141
|
-
audio: ["src", "controls"],
|
|
142
|
-
source: ["src", "type"],
|
|
143
|
-
"*": ["class"],
|
|
144
|
-
},
|
|
145
|
-
allowedSchemes: ["http", "https", "mailto"],
|
|
146
|
-
};
|
|
147
|
-
|
|
148
|
-
/**
|
|
149
|
-
* Generate unique ID for an item
|
|
150
|
-
* @param {string} feedUrl - Feed URL
|
|
151
|
-
* @param {string} itemId - Item identifier (URL or ID)
|
|
152
|
-
* @returns {string} Unique ID hash
|
|
153
|
-
*/
|
|
154
|
-
export function generateItemUid(feedUrl, itemId) {
|
|
155
|
-
const hash = crypto.createHash("sha256");
|
|
156
|
-
hash.update(`${feedUrl}::${itemId}`);
|
|
157
|
-
return hash.digest("hex").slice(0, 24);
|
|
158
|
-
}
|
|
159
|
-
|
|
160
|
-
/**
|
|
161
|
-
* Normalize RSS/Atom item from feedparser
|
|
162
|
-
* @param {object} item - Feedparser item
|
|
163
|
-
* @param {string} feedUrl - Feed URL
|
|
164
|
-
* @param {string} feedType - 'rss' or 'atom'
|
|
165
|
-
* @returns {object} Normalized jf2 item
|
|
166
|
-
*/
|
|
167
|
-
export function normalizeItem(item, feedUrl, feedType) {
|
|
168
|
-
const url = item.link || item.origlink || item.guid;
|
|
169
|
-
const uid = generateItemUid(feedUrl, item.guid || url || item.title);
|
|
170
|
-
|
|
171
|
-
const normalized = {
|
|
172
|
-
type: "entry",
|
|
173
|
-
uid,
|
|
174
|
-
url,
|
|
175
|
-
name: item.title
|
|
176
|
-
? sanitizeHtml(item.title, { allowedTags: [] }).trim()
|
|
177
|
-
: undefined,
|
|
178
|
-
published: toISOStringSafe(item.pubdate),
|
|
179
|
-
updated: toISOStringSafe(item.date),
|
|
180
|
-
_source: {
|
|
181
|
-
url: feedUrl,
|
|
182
|
-
feedUrl,
|
|
183
|
-
feedType,
|
|
184
|
-
originalId: item.guid,
|
|
185
|
-
},
|
|
186
|
-
};
|
|
187
|
-
|
|
188
|
-
// Content
|
|
189
|
-
if (item.description || item.summary) {
|
|
190
|
-
const html = item.description || item.summary;
|
|
191
|
-
normalized.content = {
|
|
192
|
-
html: sanitizeHtml(html, SANITIZE_OPTIONS),
|
|
193
|
-
text: sanitizeHtml(html, { allowedTags: [] }).trim(),
|
|
194
|
-
};
|
|
195
|
-
}
|
|
196
|
-
|
|
197
|
-
// Summary (prefer explicit summary over truncated content)
|
|
198
|
-
if (item.summary && item.description && item.summary !== item.description) {
|
|
199
|
-
normalized.summary = sanitizeHtml(item.summary, { allowedTags: [] }).trim();
|
|
200
|
-
}
|
|
201
|
-
|
|
202
|
-
// Author
|
|
203
|
-
if (item.author || item["dc:creator"]) {
|
|
204
|
-
const authorName = item.author || item["dc:creator"];
|
|
205
|
-
normalized.author = {
|
|
206
|
-
type: "card",
|
|
207
|
-
name: authorName,
|
|
208
|
-
};
|
|
209
|
-
}
|
|
210
|
-
|
|
211
|
-
// Categories/tags
|
|
212
|
-
if (item.categories && item.categories.length > 0) {
|
|
213
|
-
normalized.category = item.categories;
|
|
214
|
-
}
|
|
215
|
-
|
|
216
|
-
// Enclosures (media)
|
|
217
|
-
if (item.enclosures && item.enclosures.length > 0) {
|
|
218
|
-
for (const enclosure of item.enclosures) {
|
|
219
|
-
const mediaUrl = enclosure.url;
|
|
220
|
-
const mediaType = enclosure.type || "";
|
|
221
|
-
|
|
222
|
-
if (mediaType.startsWith("image/")) {
|
|
223
|
-
normalized.photo = normalized.photo || [];
|
|
224
|
-
normalized.photo.push(mediaUrl);
|
|
225
|
-
} else if (mediaType.startsWith("video/")) {
|
|
226
|
-
normalized.video = normalized.video || [];
|
|
227
|
-
normalized.video.push(mediaUrl);
|
|
228
|
-
} else if (mediaType.startsWith("audio/")) {
|
|
229
|
-
normalized.audio = normalized.audio || [];
|
|
230
|
-
normalized.audio.push(mediaUrl);
|
|
231
|
-
}
|
|
232
|
-
}
|
|
233
|
-
}
|
|
234
|
-
|
|
235
|
-
// Featured image from media content
|
|
236
|
-
if (item["media:content"] && item["media:content"].url) {
|
|
237
|
-
const mediaType = item["media:content"].type || "";
|
|
238
|
-
if (
|
|
239
|
-
mediaType.startsWith("image/") ||
|
|
240
|
-
item["media:content"].medium === "image"
|
|
241
|
-
) {
|
|
242
|
-
normalized.photo = normalized.photo || [];
|
|
243
|
-
if (!normalized.photo.includes(item["media:content"].url)) {
|
|
244
|
-
normalized.photo.push(item["media:content"].url);
|
|
245
|
-
}
|
|
246
|
-
}
|
|
247
|
-
}
|
|
248
|
-
|
|
249
|
-
// Image from item.image
|
|
250
|
-
if (item.image && item.image.url) {
|
|
251
|
-
normalized.photo = normalized.photo || [];
|
|
252
|
-
if (!normalized.photo.includes(item.image.url)) {
|
|
253
|
-
normalized.photo.push(item.image.url);
|
|
254
|
-
}
|
|
255
|
-
}
|
|
256
|
-
|
|
257
|
-
// Extract images from HTML content as fallback
|
|
258
|
-
if (!normalized.photo && normalized.content?.html) {
|
|
259
|
-
const extracted = extractImagesFromHtml(normalized.content.html);
|
|
260
|
-
if (extracted.length > 0) {
|
|
261
|
-
normalized.photo = extracted;
|
|
262
|
-
}
|
|
263
|
-
}
|
|
264
|
-
|
|
265
|
-
return normalized;
|
|
266
|
-
}
|
|
267
|
-
|
|
268
|
-
/**
|
|
269
|
-
* Normalize feed metadata from feedparser
|
|
270
|
-
* @param {object} meta - Feedparser meta object
|
|
271
|
-
* @param {string} feedUrl - Feed URL
|
|
272
|
-
* @returns {object} Normalized feed metadata
|
|
273
|
-
*/
|
|
274
|
-
export function normalizeFeedMeta(meta, feedUrl) {
|
|
275
|
-
const normalized = {
|
|
276
|
-
name: meta.title
|
|
277
|
-
? sanitizeHtml(meta.title, { allowedTags: [] }).trim()
|
|
278
|
-
: feedUrl,
|
|
279
|
-
};
|
|
280
|
-
|
|
281
|
-
if (meta.description) {
|
|
282
|
-
normalized.summary = meta.description;
|
|
283
|
-
}
|
|
284
|
-
|
|
285
|
-
if (meta.link) {
|
|
286
|
-
normalized.url = meta.link;
|
|
287
|
-
}
|
|
288
|
-
|
|
289
|
-
if (meta.image && meta.image.url) {
|
|
290
|
-
normalized.photo = meta.image.url;
|
|
291
|
-
}
|
|
292
|
-
|
|
293
|
-
if (meta.favicon) {
|
|
294
|
-
normalized.photo = normalized.photo || meta.favicon;
|
|
295
|
-
}
|
|
296
|
-
|
|
297
|
-
// Author/publisher
|
|
298
|
-
if (meta.author) {
|
|
299
|
-
normalized.author = {
|
|
300
|
-
type: "card",
|
|
301
|
-
name: meta.author,
|
|
302
|
-
};
|
|
303
|
-
}
|
|
304
|
-
|
|
305
|
-
// Hub for WebSub
|
|
306
|
-
if (meta.cloud && meta.cloud.href) {
|
|
307
|
-
normalized._hub = meta.cloud.href;
|
|
308
|
-
}
|
|
309
|
-
|
|
310
|
-
// Look for hub in links
|
|
311
|
-
if (meta.link && meta["atom:link"]) {
|
|
312
|
-
const links = Array.isArray(meta["atom:link"])
|
|
313
|
-
? meta["atom:link"]
|
|
314
|
-
: [meta["atom:link"]];
|
|
315
|
-
for (const link of links) {
|
|
316
|
-
if (link["@"] && link["@"].rel === "hub") {
|
|
317
|
-
normalized._hub = link["@"].href;
|
|
318
|
-
break;
|
|
319
|
-
}
|
|
320
|
-
}
|
|
321
|
-
}
|
|
322
|
-
|
|
323
|
-
return normalized;
|
|
324
|
-
}
|
|
325
|
-
|
|
326
|
-
/**
|
|
327
|
-
* Normalize JSON Feed item
|
|
328
|
-
* @param {object} item - JSON Feed item
|
|
329
|
-
* @param {string} feedUrl - Feed URL
|
|
330
|
-
* @returns {object} Normalized jf2 item
|
|
331
|
-
*/
|
|
332
|
-
export function normalizeJsonFeedItem(item, feedUrl) {
|
|
333
|
-
const url = item.url || item.external_url;
|
|
334
|
-
const uid = generateItemUid(feedUrl, item.id || url);
|
|
335
|
-
|
|
336
|
-
const normalized = {
|
|
337
|
-
type: "entry",
|
|
338
|
-
uid,
|
|
339
|
-
url,
|
|
340
|
-
name: item.title
|
|
341
|
-
? sanitizeHtml(item.title, { allowedTags: [] }).trim()
|
|
342
|
-
: undefined,
|
|
343
|
-
published: item.date_published
|
|
344
|
-
? new Date(item.date_published).toISOString()
|
|
345
|
-
: undefined,
|
|
346
|
-
updated: item.date_modified
|
|
347
|
-
? new Date(item.date_modified).toISOString()
|
|
348
|
-
: undefined,
|
|
349
|
-
_source: {
|
|
350
|
-
url: feedUrl,
|
|
351
|
-
feedUrl,
|
|
352
|
-
feedType: "jsonfeed",
|
|
353
|
-
originalId: item.id,
|
|
354
|
-
},
|
|
355
|
-
};
|
|
356
|
-
|
|
357
|
-
// Content
|
|
358
|
-
if (item.content_html || item.content_text) {
|
|
359
|
-
normalized.content = {};
|
|
360
|
-
if (item.content_html) {
|
|
361
|
-
normalized.content.html = sanitizeHtml(
|
|
362
|
-
item.content_html,
|
|
363
|
-
SANITIZE_OPTIONS,
|
|
364
|
-
);
|
|
365
|
-
normalized.content.text = sanitizeHtml(item.content_html, {
|
|
366
|
-
allowedTags: [],
|
|
367
|
-
}).trim();
|
|
368
|
-
} else if (item.content_text) {
|
|
369
|
-
normalized.content.text = item.content_text;
|
|
370
|
-
}
|
|
371
|
-
}
|
|
372
|
-
|
|
373
|
-
// Summary
|
|
374
|
-
if (item.summary) {
|
|
375
|
-
normalized.summary = item.summary;
|
|
376
|
-
}
|
|
377
|
-
|
|
378
|
-
// Author
|
|
379
|
-
if (item.author || item.authors) {
|
|
380
|
-
const author = item.author || (item.authors && item.authors[0]);
|
|
381
|
-
if (author) {
|
|
382
|
-
normalized.author = {
|
|
383
|
-
type: "card",
|
|
384
|
-
name: author.name,
|
|
385
|
-
url: author.url,
|
|
386
|
-
photo: author.avatar,
|
|
387
|
-
};
|
|
388
|
-
}
|
|
389
|
-
}
|
|
390
|
-
|
|
391
|
-
// Tags
|
|
392
|
-
if (item.tags && item.tags.length > 0) {
|
|
393
|
-
normalized.category = item.tags;
|
|
394
|
-
}
|
|
395
|
-
|
|
396
|
-
// Featured image
|
|
397
|
-
if (item.image) {
|
|
398
|
-
normalized.photo = [item.image];
|
|
399
|
-
}
|
|
400
|
-
|
|
401
|
-
if (item.banner_image && !normalized.photo) {
|
|
402
|
-
normalized.photo = [item.banner_image];
|
|
403
|
-
}
|
|
404
|
-
|
|
405
|
-
// Attachments
|
|
406
|
-
if (item.attachments && item.attachments.length > 0) {
|
|
407
|
-
for (const attachment of item.attachments) {
|
|
408
|
-
const mediaType = attachment.mime_type || "";
|
|
409
|
-
|
|
410
|
-
if (mediaType.startsWith("image/")) {
|
|
411
|
-
normalized.photo = normalized.photo || [];
|
|
412
|
-
normalized.photo.push(attachment.url);
|
|
413
|
-
} else if (mediaType.startsWith("video/")) {
|
|
414
|
-
normalized.video = normalized.video || [];
|
|
415
|
-
normalized.video.push(attachment.url);
|
|
416
|
-
} else if (mediaType.startsWith("audio/")) {
|
|
417
|
-
normalized.audio = normalized.audio || [];
|
|
418
|
-
normalized.audio.push(attachment.url);
|
|
419
|
-
}
|
|
420
|
-
}
|
|
421
|
-
}
|
|
422
|
-
|
|
423
|
-
// External URL
|
|
424
|
-
if (item.external_url && item.url !== item.external_url) {
|
|
425
|
-
normalized["bookmark-of"] = [item.external_url];
|
|
426
|
-
}
|
|
427
|
-
|
|
428
|
-
// Extract images from HTML content as fallback
|
|
429
|
-
if (!normalized.photo && normalized.content?.html) {
|
|
430
|
-
const extracted = extractImagesFromHtml(normalized.content.html);
|
|
431
|
-
if (extracted.length > 0) {
|
|
432
|
-
normalized.photo = extracted;
|
|
433
|
-
}
|
|
434
|
-
}
|
|
435
|
-
|
|
436
|
-
return normalized;
|
|
437
|
-
}
|
|
438
|
-
|
|
439
|
-
/**
|
|
440
|
-
* Normalize JSON Feed metadata
|
|
441
|
-
* @param {object} feed - JSON Feed object
|
|
442
|
-
* @param {string} feedUrl - Feed URL
|
|
443
|
-
* @returns {object} Normalized feed metadata
|
|
444
|
-
*/
|
|
445
|
-
export function normalizeJsonFeedMeta(feed, feedUrl) {
|
|
446
|
-
const normalized = {
|
|
447
|
-
name: feed.title
|
|
448
|
-
? sanitizeHtml(feed.title, { allowedTags: [] }).trim()
|
|
449
|
-
: feedUrl,
|
|
450
|
-
};
|
|
451
|
-
|
|
452
|
-
if (feed.description) {
|
|
453
|
-
normalized.summary = feed.description;
|
|
454
|
-
}
|
|
455
|
-
|
|
456
|
-
if (feed.home_page_url) {
|
|
457
|
-
normalized.url = feed.home_page_url;
|
|
458
|
-
}
|
|
459
|
-
|
|
460
|
-
if (feed.icon) {
|
|
461
|
-
normalized.photo = feed.icon;
|
|
462
|
-
} else if (feed.favicon) {
|
|
463
|
-
normalized.photo = feed.favicon;
|
|
464
|
-
}
|
|
465
|
-
|
|
466
|
-
if (feed.author || feed.authors) {
|
|
467
|
-
const author = feed.author || (feed.authors && feed.authors[0]);
|
|
468
|
-
if (author) {
|
|
469
|
-
normalized.author = {
|
|
470
|
-
type: "card",
|
|
471
|
-
name: author.name,
|
|
472
|
-
url: author.url,
|
|
473
|
-
photo: author.avatar,
|
|
474
|
-
};
|
|
475
|
-
}
|
|
476
|
-
}
|
|
477
|
-
|
|
478
|
-
// Hub for WebSub
|
|
479
|
-
if (feed.hubs && feed.hubs.length > 0) {
|
|
480
|
-
normalized._hub = feed.hubs[0].url;
|
|
481
|
-
}
|
|
482
|
-
|
|
483
|
-
return normalized;
|
|
484
|
-
}
|
|
485
|
-
|
|
486
|
-
/**
|
|
487
|
-
* Normalize h-feed entry
|
|
488
|
-
* @param {object} entry - Microformats h-entry
|
|
489
|
-
* @param {string} feedUrl - Feed URL
|
|
490
|
-
* @returns {object} Normalized jf2 item
|
|
491
|
-
*/
|
|
492
|
-
export function normalizeHfeedItem(entry, feedUrl) {
|
|
493
|
-
const properties = entry.properties || {};
|
|
494
|
-
const url = getFirst(properties.url) || getFirst(properties.uid);
|
|
495
|
-
const uid = generateItemUid(feedUrl, getFirst(properties.uid) || url);
|
|
496
|
-
|
|
497
|
-
const normalized = {
|
|
498
|
-
type: "entry",
|
|
499
|
-
uid,
|
|
500
|
-
url,
|
|
501
|
-
_source: {
|
|
502
|
-
url: feedUrl,
|
|
503
|
-
feedUrl,
|
|
504
|
-
feedType: "hfeed",
|
|
505
|
-
originalId: getFirst(properties.uid),
|
|
506
|
-
},
|
|
507
|
-
};
|
|
508
|
-
|
|
509
|
-
// Name/title
|
|
510
|
-
if (properties.name) {
|
|
511
|
-
const name = getFirst(properties.name);
|
|
512
|
-
// Only include name if it's not just the content
|
|
513
|
-
if (
|
|
514
|
-
name &&
|
|
515
|
-
(!properties.content || name !== getContentText(properties.content))
|
|
516
|
-
) {
|
|
517
|
-
normalized.name = name;
|
|
518
|
-
}
|
|
519
|
-
}
|
|
520
|
-
|
|
521
|
-
// Published
|
|
522
|
-
if (properties.published) {
|
|
523
|
-
const published = getFirst(properties.published);
|
|
524
|
-
normalized.published = new Date(published).toISOString();
|
|
525
|
-
}
|
|
526
|
-
|
|
527
|
-
// Updated
|
|
528
|
-
if (properties.updated) {
|
|
529
|
-
const updated = getFirst(properties.updated);
|
|
530
|
-
normalized.updated = new Date(updated).toISOString();
|
|
531
|
-
}
|
|
532
|
-
|
|
533
|
-
// Content
|
|
534
|
-
if (properties.content) {
|
|
535
|
-
const content = getFirst(properties.content);
|
|
536
|
-
if (typeof content === "object") {
|
|
537
|
-
normalized.content = {
|
|
538
|
-
html: content.html
|
|
539
|
-
? sanitizeHtml(content.html, SANITIZE_OPTIONS)
|
|
540
|
-
: undefined,
|
|
541
|
-
text: content.value || undefined,
|
|
542
|
-
};
|
|
543
|
-
} else if (typeof content === "string") {
|
|
544
|
-
normalized.content = { text: content };
|
|
545
|
-
}
|
|
546
|
-
}
|
|
547
|
-
|
|
548
|
-
// Summary
|
|
549
|
-
if (properties.summary) {
|
|
550
|
-
normalized.summary = getFirst(properties.summary);
|
|
551
|
-
}
|
|
552
|
-
|
|
553
|
-
// Author
|
|
554
|
-
if (properties.author) {
|
|
555
|
-
const author = getFirst(properties.author);
|
|
556
|
-
normalized.author = normalizeHcard(author);
|
|
557
|
-
}
|
|
558
|
-
|
|
559
|
-
// Categories
|
|
560
|
-
if (properties.category) {
|
|
561
|
-
normalized.category = properties.category;
|
|
562
|
-
}
|
|
563
|
-
|
|
564
|
-
// Photos
|
|
565
|
-
if (properties.photo) {
|
|
566
|
-
normalized.photo = properties.photo.map((p) =>
|
|
567
|
-
typeof p === "object" ? p.value || p.url : p,
|
|
568
|
-
);
|
|
569
|
-
}
|
|
570
|
-
|
|
571
|
-
// Videos
|
|
572
|
-
if (properties.video) {
|
|
573
|
-
normalized.video = properties.video.map((v) =>
|
|
574
|
-
typeof v === "object" ? v.value || v.url : v,
|
|
575
|
-
);
|
|
576
|
-
}
|
|
577
|
-
|
|
578
|
-
// Audio
|
|
579
|
-
if (properties.audio) {
|
|
580
|
-
normalized.audio = properties.audio.map((a) =>
|
|
581
|
-
typeof a === "object" ? a.value || a.url : a,
|
|
582
|
-
);
|
|
583
|
-
}
|
|
584
|
-
|
|
585
|
-
// Interaction types - normalize to string URLs
|
|
586
|
-
if (properties["like-of"]) {
|
|
587
|
-
normalized["like-of"] = normalizeUrlArray(properties["like-of"]);
|
|
588
|
-
}
|
|
589
|
-
if (properties["repost-of"]) {
|
|
590
|
-
normalized["repost-of"] = normalizeUrlArray(properties["repost-of"]);
|
|
591
|
-
}
|
|
592
|
-
if (properties["bookmark-of"]) {
|
|
593
|
-
normalized["bookmark-of"] = normalizeUrlArray(properties["bookmark-of"]);
|
|
594
|
-
}
|
|
595
|
-
if (properties["in-reply-to"]) {
|
|
596
|
-
normalized["in-reply-to"] = normalizeUrlArray(properties["in-reply-to"]);
|
|
597
|
-
}
|
|
598
|
-
|
|
599
|
-
// RSVP
|
|
600
|
-
if (properties.rsvp) {
|
|
601
|
-
normalized.rsvp = getFirst(properties.rsvp);
|
|
602
|
-
}
|
|
603
|
-
|
|
604
|
-
// Syndication
|
|
605
|
-
if (properties.syndication) {
|
|
606
|
-
normalized.syndication = properties.syndication;
|
|
607
|
-
}
|
|
608
|
-
|
|
609
|
-
// Extract images from HTML content as fallback
|
|
610
|
-
if (!normalized.photo && normalized.content?.html) {
|
|
611
|
-
const extracted = extractImagesFromHtml(normalized.content.html);
|
|
612
|
-
if (extracted.length > 0) {
|
|
613
|
-
normalized.photo = extracted;
|
|
614
|
-
}
|
|
615
|
-
}
|
|
616
|
-
|
|
617
|
-
return normalized;
|
|
618
|
-
}
|
|
619
|
-
|
|
620
|
-
/**
|
|
621
|
-
* Normalize h-feed metadata
|
|
622
|
-
* @param {object} hfeed - h-feed microformat object
|
|
623
|
-
* @param {string} feedUrl - Feed URL
|
|
624
|
-
* @returns {object} Normalized feed metadata
|
|
625
|
-
*/
|
|
626
|
-
export function normalizeHfeedMeta(hfeed, feedUrl) {
|
|
627
|
-
const properties = hfeed.properties || {};
|
|
628
|
-
|
|
629
|
-
const normalized = {
|
|
630
|
-
name: getFirst(properties.name) || feedUrl,
|
|
631
|
-
};
|
|
632
|
-
|
|
633
|
-
if (properties.summary) {
|
|
634
|
-
normalized.summary = getFirst(properties.summary);
|
|
635
|
-
}
|
|
636
|
-
|
|
637
|
-
if (properties.url) {
|
|
638
|
-
normalized.url = getFirst(properties.url);
|
|
639
|
-
}
|
|
640
|
-
|
|
641
|
-
if (properties.photo) {
|
|
642
|
-
normalized.photo = getFirst(properties.photo);
|
|
643
|
-
if (typeof normalized.photo === "object") {
|
|
644
|
-
normalized.photo = normalized.photo.value || normalized.photo.url;
|
|
645
|
-
}
|
|
646
|
-
}
|
|
647
|
-
|
|
648
|
-
if (properties.author) {
|
|
649
|
-
const author = getFirst(properties.author);
|
|
650
|
-
normalized.author = normalizeHcard(author);
|
|
651
|
-
}
|
|
652
|
-
|
|
653
|
-
return normalized;
|
|
654
|
-
}
|
|
655
|
-
|
|
656
88
|
/**
|
|
657
89
|
* Extract URL string from a photo value
|
|
658
90
|
* @param {object|string} photo - Photo value (can be string URL or object with value/url)
|
|
659
91
|
* @returns {string|undefined} Photo URL string
|
|
660
92
|
*/
|
|
661
|
-
function extractPhotoUrl(photo) {
|
|
93
|
+
export function extractPhotoUrl(photo) {
|
|
662
94
|
if (!photo) {
|
|
663
95
|
return;
|
|
664
96
|
}
|
|
@@ -676,7 +108,7 @@ function extractPhotoUrl(photo) {
|
|
|
676
108
|
* @param {object|string} value - URL string or object with url/value property
|
|
677
109
|
* @returns {string|undefined} URL string
|
|
678
110
|
*/
|
|
679
|
-
function extractUrl(value) {
|
|
111
|
+
export function extractUrl(value) {
|
|
680
112
|
if (!value) {
|
|
681
113
|
return;
|
|
682
114
|
}
|
|
@@ -694,43 +126,19 @@ function extractUrl(value) {
|
|
|
694
126
|
* @param {Array} urls - Array of URL strings or objects
|
|
695
127
|
* @returns {Array<string>} Array of URL strings
|
|
696
128
|
*/
|
|
697
|
-
function normalizeUrlArray(urls) {
|
|
129
|
+
export function normalizeUrlArray(urls) {
|
|
698
130
|
if (!urls || !Array.isArray(urls)) {
|
|
699
131
|
return [];
|
|
700
132
|
}
|
|
701
133
|
return urls.map((u) => extractUrl(u)).filter(Boolean);
|
|
702
134
|
}
|
|
703
135
|
|
|
704
|
-
/**
|
|
705
|
-
* Normalize h-card author
|
|
706
|
-
* @param {object|string} hcard - h-card or author name string
|
|
707
|
-
* @returns {object} Normalized author object
|
|
708
|
-
*/
|
|
709
|
-
function normalizeHcard(hcard) {
|
|
710
|
-
if (typeof hcard === "string") {
|
|
711
|
-
return { type: "card", name: hcard };
|
|
712
|
-
}
|
|
713
|
-
|
|
714
|
-
if (!hcard || !hcard.properties) {
|
|
715
|
-
return;
|
|
716
|
-
}
|
|
717
|
-
|
|
718
|
-
const properties = hcard.properties;
|
|
719
|
-
|
|
720
|
-
return {
|
|
721
|
-
type: "card",
|
|
722
|
-
name: getFirst(properties.name),
|
|
723
|
-
url: getFirst(properties.url),
|
|
724
|
-
photo: extractPhotoUrl(getFirst(properties.photo)),
|
|
725
|
-
};
|
|
726
|
-
}
|
|
727
|
-
|
|
728
136
|
/**
|
|
729
137
|
* Get first item from array or return the value itself
|
|
730
138
|
* @param {Array|*} value - Value or array of values
|
|
731
139
|
* @returns {*} First value or the value itself
|
|
732
140
|
*/
|
|
733
|
-
function getFirst(value) {
|
|
141
|
+
export function getFirst(value) {
|
|
734
142
|
if (Array.isArray(value)) {
|
|
735
143
|
return value[0];
|
|
736
144
|
}
|
|
@@ -742,7 +150,7 @@ function getFirst(value) {
|
|
|
742
150
|
* @param {Array} content - Content property array
|
|
743
151
|
* @returns {string} Text content
|
|
744
152
|
*/
|
|
745
|
-
function getContentText(content) {
|
|
153
|
+
export function getContentText(content) {
|
|
746
154
|
const first = getFirst(content);
|
|
747
155
|
if (typeof first === "object") {
|
|
748
156
|
return first.value || first.text || "";
|
package/lib/feeds/rss.js
CHANGED
|
@@ -7,7 +7,7 @@ import { Readable } from "node:stream";
|
|
|
7
7
|
|
|
8
8
|
import FeedParser from "feedparser";
|
|
9
9
|
|
|
10
|
-
import { normalizeItem, normalizeFeedMeta } from "./normalizer.js";
|
|
10
|
+
import { normalizeItem, normalizeFeedMeta } from "./normalizer-rss.js";
|
|
11
11
|
|
|
12
12
|
/**
|
|
13
13
|
* Parse RSS feed content
|