@rmdes/indiekit-endpoint-microsub 1.0.33 → 1.0.35
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/lib/feeds/normalizer.js +58 -4
- package/package.json +1 -1
package/lib/feeds/normalizer.js
CHANGED
|
@@ -7,6 +7,28 @@ import crypto from "node:crypto";
|
|
|
7
7
|
|
|
8
8
|
import sanitizeHtml from "sanitize-html";
|
|
9
9
|
|
|
10
|
+
/**
|
|
11
|
+
* Extract image URLs from HTML content.
|
|
12
|
+
* Used as a fallback when no explicit photo/enclosure is provided.
|
|
13
|
+
* @param {string} html - HTML content (already sanitized)
|
|
14
|
+
* @returns {string[]} Array of image URLs
|
|
15
|
+
*/
|
|
16
|
+
function extractImagesFromHtml(html) {
|
|
17
|
+
if (!html) {
|
|
18
|
+
return [];
|
|
19
|
+
}
|
|
20
|
+
const urls = [];
|
|
21
|
+
const imgRegex = /<img[^>]+src=["']([^"']+)["'][^>]*>/gi;
|
|
22
|
+
let match;
|
|
23
|
+
while ((match = imgRegex.exec(html)) !== null) {
|
|
24
|
+
const src = match[1];
|
|
25
|
+
if (src && !urls.includes(src)) {
|
|
26
|
+
urls.push(src);
|
|
27
|
+
}
|
|
28
|
+
}
|
|
29
|
+
return urls;
|
|
30
|
+
}
|
|
31
|
+
|
|
10
32
|
/**
|
|
11
33
|
* Parse a date string with fallback for non-standard formats
|
|
12
34
|
* @param {string|Date} dateInput - Date string or Date object
|
|
@@ -150,7 +172,9 @@ export function normalizeItem(item, feedUrl, feedType) {
|
|
|
150
172
|
type: "entry",
|
|
151
173
|
uid,
|
|
152
174
|
url,
|
|
153
|
-
name: item.title
|
|
175
|
+
name: item.title
|
|
176
|
+
? sanitizeHtml(item.title, { allowedTags: [] }).trim()
|
|
177
|
+
: undefined,
|
|
154
178
|
published: toISOStringSafe(item.pubdate),
|
|
155
179
|
updated: toISOStringSafe(item.date),
|
|
156
180
|
_source: {
|
|
@@ -230,6 +254,14 @@ export function normalizeItem(item, feedUrl, feedType) {
|
|
|
230
254
|
}
|
|
231
255
|
}
|
|
232
256
|
|
|
257
|
+
// Extract images from HTML content as fallback
|
|
258
|
+
if (!normalized.photo && normalized.content?.html) {
|
|
259
|
+
const extracted = extractImagesFromHtml(normalized.content.html);
|
|
260
|
+
if (extracted.length > 0) {
|
|
261
|
+
normalized.photo = extracted;
|
|
262
|
+
}
|
|
263
|
+
}
|
|
264
|
+
|
|
233
265
|
return normalized;
|
|
234
266
|
}
|
|
235
267
|
|
|
@@ -241,7 +273,9 @@ export function normalizeItem(item, feedUrl, feedType) {
|
|
|
241
273
|
*/
|
|
242
274
|
export function normalizeFeedMeta(meta, feedUrl) {
|
|
243
275
|
const normalized = {
|
|
244
|
-
name: meta.title
|
|
276
|
+
name: meta.title
|
|
277
|
+
? sanitizeHtml(meta.title, { allowedTags: [] }).trim()
|
|
278
|
+
: feedUrl,
|
|
245
279
|
};
|
|
246
280
|
|
|
247
281
|
if (meta.description) {
|
|
@@ -303,7 +337,9 @@ export function normalizeJsonFeedItem(item, feedUrl) {
|
|
|
303
337
|
type: "entry",
|
|
304
338
|
uid,
|
|
305
339
|
url,
|
|
306
|
-
name: item.title
|
|
340
|
+
name: item.title
|
|
341
|
+
? sanitizeHtml(item.title, { allowedTags: [] }).trim()
|
|
342
|
+
: undefined,
|
|
307
343
|
published: item.date_published
|
|
308
344
|
? new Date(item.date_published).toISOString()
|
|
309
345
|
: undefined,
|
|
@@ -389,6 +425,14 @@ export function normalizeJsonFeedItem(item, feedUrl) {
|
|
|
389
425
|
normalized["bookmark-of"] = [item.external_url];
|
|
390
426
|
}
|
|
391
427
|
|
|
428
|
+
// Extract images from HTML content as fallback
|
|
429
|
+
if (!normalized.photo && normalized.content?.html) {
|
|
430
|
+
const extracted = extractImagesFromHtml(normalized.content.html);
|
|
431
|
+
if (extracted.length > 0) {
|
|
432
|
+
normalized.photo = extracted;
|
|
433
|
+
}
|
|
434
|
+
}
|
|
435
|
+
|
|
392
436
|
return normalized;
|
|
393
437
|
}
|
|
394
438
|
|
|
@@ -400,7 +444,9 @@ export function normalizeJsonFeedItem(item, feedUrl) {
|
|
|
400
444
|
*/
|
|
401
445
|
export function normalizeJsonFeedMeta(feed, feedUrl) {
|
|
402
446
|
const normalized = {
|
|
403
|
-
name: feed.title
|
|
447
|
+
name: feed.title
|
|
448
|
+
? sanitizeHtml(feed.title, { allowedTags: [] }).trim()
|
|
449
|
+
: feedUrl,
|
|
404
450
|
};
|
|
405
451
|
|
|
406
452
|
if (feed.description) {
|
|
@@ -560,6 +606,14 @@ export function normalizeHfeedItem(entry, feedUrl) {
|
|
|
560
606
|
normalized.syndication = properties.syndication;
|
|
561
607
|
}
|
|
562
608
|
|
|
609
|
+
// Extract images from HTML content as fallback
|
|
610
|
+
if (!normalized.photo && normalized.content?.html) {
|
|
611
|
+
const extracted = extractImagesFromHtml(normalized.content.html);
|
|
612
|
+
if (extracted.length > 0) {
|
|
613
|
+
normalized.photo = extracted;
|
|
614
|
+
}
|
|
615
|
+
}
|
|
616
|
+
|
|
563
617
|
return normalized;
|
|
564
618
|
}
|
|
565
619
|
|
package/package.json
CHANGED