@neurowire/ingest 0.3.0 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.js +15 -8
- package/package.json +2 -2
package/dist/index.js
CHANGED
|
@@ -125,7 +125,10 @@ function attr(node, name) {
|
|
|
125
125
|
}
|
|
126
126
|
|
|
127
127
|
// src/util.ts
|
|
128
|
-
import {
|
|
128
|
+
import {
|
|
129
|
+
GENERATOR,
|
|
130
|
+
stableId
|
|
131
|
+
} from "@neurowire/core";
|
|
129
132
|
function resolveUrl(href, base) {
|
|
130
133
|
try {
|
|
131
134
|
return new URL(href, base).toString();
|
|
@@ -153,13 +156,17 @@ function newestEntryDate(entries) {
|
|
|
153
156
|
}
|
|
154
157
|
return max === Number.NEGATIVE_INFINITY ? void 0 : new Date(max).toISOString();
|
|
155
158
|
}
|
|
159
|
+
function withStableId(entry) {
|
|
160
|
+
if (entry.id.trim()) return entry;
|
|
161
|
+
return { ...entry, id: stableId(entry.link, entry.title) };
|
|
162
|
+
}
|
|
156
163
|
function finalizeFeed(draft, ctx) {
|
|
157
164
|
const updated = normDate(draft.updated) ?? newestEntryDate(draft.entries) ?? (/* @__PURE__ */ new Date()).toISOString();
|
|
158
165
|
const feed = {
|
|
159
166
|
id: draft.id?.trim() || ctx.sourceUrl,
|
|
160
167
|
title: draft.title?.trim() || "Untitled",
|
|
161
168
|
updated,
|
|
162
|
-
entries: draft.entries,
|
|
169
|
+
entries: draft.entries.map(withStableId),
|
|
163
170
|
generator: { name: GENERATOR.name, version: GENERATOR.version }
|
|
164
171
|
};
|
|
165
172
|
if (draft.home) feed.home = resolveUrl(draft.home, ctx.sourceUrl);
|
|
@@ -238,7 +245,7 @@ function jsonLdEntry(node, ctx) {
|
|
|
238
245
|
const title = str(node.headline) ?? str(node.name);
|
|
239
246
|
if (!url || !title) return null;
|
|
240
247
|
const link = resolveUrl(url, ctx.sourceUrl);
|
|
241
|
-
const entry = { id:
|
|
248
|
+
const entry = { id: "", title, link };
|
|
242
249
|
const published = normDate(str(node.datePublished));
|
|
243
250
|
const updated = normDate(str(node.dateModified));
|
|
244
251
|
if (published) entry.published = published;
|
|
@@ -287,7 +294,7 @@ function fromSemantic($, ctx) {
|
|
|
287
294
|
const href = $a.attr("href") ?? $el.find("a[href]").first().attr("href");
|
|
288
295
|
if (!title || !href) return;
|
|
289
296
|
const link = resolveUrl(href, ctx.sourceUrl);
|
|
290
|
-
const entry = { id:
|
|
297
|
+
const entry = { id: "", title, link };
|
|
291
298
|
const $time = $el.find("time[datetime]").first();
|
|
292
299
|
const date = normDate($time.attr("datetime") ?? $el.find("time").first().text().trim());
|
|
293
300
|
if (date) entry.published = date;
|
|
@@ -351,7 +358,7 @@ function applyTemplate($, template, ctx) {
|
|
|
351
358
|
const href = $link.attr("href") ?? $link.find("a").first().attr("href");
|
|
352
359
|
if (!title2 || !href) return;
|
|
353
360
|
const link = resolveUrl(href, ctx.sourceUrl);
|
|
354
|
-
const entry = { id:
|
|
361
|
+
const entry = { id: "", title: title2, link };
|
|
355
362
|
if (template.date) {
|
|
356
363
|
const $date = $el.find(template.date).first();
|
|
357
364
|
const date = normDate($date.attr("datetime") ?? $date.text().trim());
|
|
@@ -432,7 +439,7 @@ function atomEntry(node, ctx) {
|
|
|
432
439
|
const href = pickLink(links, "alternate") ?? attr(links[0], "href") ?? text(get(node, "id")) ?? "";
|
|
433
440
|
const link = resolveUrl(href, ctx.sourceUrl);
|
|
434
441
|
const entry = {
|
|
435
|
-
id: text(get(node, "id")) ??
|
|
442
|
+
id: text(get(node, "id")) ?? "",
|
|
436
443
|
title: text(get(node, "title")) ?? "Untitled",
|
|
437
444
|
link
|
|
438
445
|
};
|
|
@@ -468,7 +475,7 @@ function rssItem(node, ctx) {
|
|
|
468
475
|
const guid = text(get(node, "guid"));
|
|
469
476
|
const link = resolveUrl(text(get(node, "link")) ?? guid ?? "", ctx.sourceUrl);
|
|
470
477
|
const entry = {
|
|
471
|
-
id: guid ??
|
|
478
|
+
id: guid ?? "",
|
|
472
479
|
title: text(get(node, "title")) ?? "Untitled",
|
|
473
480
|
link
|
|
474
481
|
};
|
|
@@ -526,7 +533,7 @@ function parseJsonFeed(raw, ctx) {
|
|
|
526
533
|
const entries = toArray(data.items).map((item) => {
|
|
527
534
|
const link = resolveUrl(item.url ?? item.external_url ?? "", ctx.sourceUrl);
|
|
528
535
|
const entry = {
|
|
529
|
-
id: item.id !== void 0 ? String(item.id) :
|
|
536
|
+
id: item.id !== void 0 ? String(item.id) : "",
|
|
530
537
|
title: item.title ?? "Untitled",
|
|
531
538
|
link
|
|
532
539
|
};
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@neurowire/ingest",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.4.0",
|
|
4
4
|
"description": "Fetch, detect, and parse RSS/Atom/RDF/JSON feeds and HTML pages into the Neurowire model.",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "./dist/index.js",
|
|
@@ -48,7 +48,7 @@
|
|
|
48
48
|
"cheerio": "^1.0.0",
|
|
49
49
|
"fast-xml-parser": "^4.5.1",
|
|
50
50
|
"zod": "^3.24.1",
|
|
51
|
-
"@neurowire/core": "0.
|
|
51
|
+
"@neurowire/core": "0.5.0"
|
|
52
52
|
},
|
|
53
53
|
"devDependencies": {
|
|
54
54
|
"@types/node": "^22.10.5",
|