@rmdes/indiekit-endpoint-microsub 1.0.34 → 1.0.35
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/lib/feeds/normalizer.js +46 -0
- package/package.json +1 -1
package/lib/feeds/normalizer.js
CHANGED
|
@@ -7,6 +7,28 @@ import crypto from "node:crypto";
|
|
|
7
7
|
|
|
8
8
|
import sanitizeHtml from "sanitize-html";
|
|
9
9
|
|
|
10
|
+
/**
|
|
11
|
+
* Extract image URLs from HTML content.
|
|
12
|
+
* Used as a fallback when no explicit photo/enclosure is provided.
|
|
13
|
+
* @param {string} html - HTML content (already sanitized)
|
|
14
|
+
* @returns {string[]} Array of image URLs
|
|
15
|
+
*/
|
|
16
|
+
function extractImagesFromHtml(html) {
|
|
17
|
+
if (!html) {
|
|
18
|
+
return [];
|
|
19
|
+
}
|
|
20
|
+
const urls = [];
|
|
21
|
+
const imgRegex = /<img[^>]+src=["']([^"']+)["'][^>]*>/gi;
|
|
22
|
+
let match;
|
|
23
|
+
while ((match = imgRegex.exec(html)) !== null) {
|
|
24
|
+
const src = match[1];
|
|
25
|
+
if (src && !urls.includes(src)) {
|
|
26
|
+
urls.push(src);
|
|
27
|
+
}
|
|
28
|
+
}
|
|
29
|
+
return urls;
|
|
30
|
+
}
|
|
31
|
+
|
|
10
32
|
/**
|
|
11
33
|
* Parse a date string with fallback for non-standard formats
|
|
12
34
|
* @param {string|Date} dateInput - Date string or Date object
|
|
@@ -232,6 +254,14 @@ export function normalizeItem(item, feedUrl, feedType) {
|
|
|
232
254
|
}
|
|
233
255
|
}
|
|
234
256
|
|
|
257
|
+
// Extract images from HTML content as fallback
|
|
258
|
+
if (!normalized.photo && normalized.content?.html) {
|
|
259
|
+
const extracted = extractImagesFromHtml(normalized.content.html);
|
|
260
|
+
if (extracted.length > 0) {
|
|
261
|
+
normalized.photo = extracted;
|
|
262
|
+
}
|
|
263
|
+
}
|
|
264
|
+
|
|
235
265
|
return normalized;
|
|
236
266
|
}
|
|
237
267
|
|
|
@@ -395,6 +425,14 @@ export function normalizeJsonFeedItem(item, feedUrl) {
|
|
|
395
425
|
normalized["bookmark-of"] = [item.external_url];
|
|
396
426
|
}
|
|
397
427
|
|
|
428
|
+
// Extract images from HTML content as fallback
|
|
429
|
+
if (!normalized.photo && normalized.content?.html) {
|
|
430
|
+
const extracted = extractImagesFromHtml(normalized.content.html);
|
|
431
|
+
if (extracted.length > 0) {
|
|
432
|
+
normalized.photo = extracted;
|
|
433
|
+
}
|
|
434
|
+
}
|
|
435
|
+
|
|
398
436
|
return normalized;
|
|
399
437
|
}
|
|
400
438
|
|
|
@@ -568,6 +606,14 @@ export function normalizeHfeedItem(entry, feedUrl) {
|
|
|
568
606
|
normalized.syndication = properties.syndication;
|
|
569
607
|
}
|
|
570
608
|
|
|
609
|
+
// Extract images from HTML content as fallback
|
|
610
|
+
if (!normalized.photo && normalized.content?.html) {
|
|
611
|
+
const extracted = extractImagesFromHtml(normalized.content.html);
|
|
612
|
+
if (extracted.length > 0) {
|
|
613
|
+
normalized.photo = extracted;
|
|
614
|
+
}
|
|
615
|
+
}
|
|
616
|
+
|
|
571
617
|
return normalized;
|
|
572
618
|
}
|
|
573
619
|
|
package/package.json
CHANGED