@nexpress/wp-import 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +34 -0
- package/dist/index.d.ts +1155 -0
- package/dist/index.js +2448 -0
- package/package.json +58 -0
package/dist/index.js
ADDED
|
@@ -0,0 +1,2448 @@
|
|
|
1
|
+
// src/parse/wxr.ts
|
|
2
|
+
import { XMLParser } from "fast-xml-parser";
|
|
3
|
+
function parseWxr(xml) {
|
|
4
|
+
const parser = new XMLParser({
|
|
5
|
+
ignoreAttributes: false,
|
|
6
|
+
attributeNamePrefix: "@_",
|
|
7
|
+
parseAttributeValue: false,
|
|
8
|
+
parseTagValue: false,
|
|
9
|
+
trimValues: true,
|
|
10
|
+
cdataPropName: "__cdata",
|
|
11
|
+
isArray: (name) => MULTI_VALUE_TAGS.has(name)
|
|
12
|
+
});
|
|
13
|
+
const tree = parser.parse(xml);
|
|
14
|
+
const channel = tree.rss?.channel;
|
|
15
|
+
if (!channel) {
|
|
16
|
+
throw new Error("Invalid WXR: missing <rss><channel> root");
|
|
17
|
+
}
|
|
18
|
+
const site = parseSite(channel);
|
|
19
|
+
const authors = parseAuthors(channel);
|
|
20
|
+
const terms = parseChannelTerms(channel);
|
|
21
|
+
const records = parseRecords(channel);
|
|
22
|
+
return { site, authors, terms, records };
|
|
23
|
+
}
|
|
24
|
+
var MULTI_VALUE_TAGS = /* @__PURE__ */ new Set([
|
|
25
|
+
"item",
|
|
26
|
+
"wp:author",
|
|
27
|
+
"wp:category",
|
|
28
|
+
"wp:tag",
|
|
29
|
+
"wp:term",
|
|
30
|
+
"wp:postmeta",
|
|
31
|
+
"wp:comment",
|
|
32
|
+
"category"
|
|
33
|
+
]);
|
|
34
|
+
function asText(value) {
|
|
35
|
+
if (!value) return "";
|
|
36
|
+
if (typeof value === "string") return value;
|
|
37
|
+
if (typeof value.__cdata === "string") return value.__cdata;
|
|
38
|
+
if (typeof value["#text"] === "string") return value["#text"];
|
|
39
|
+
return "";
|
|
40
|
+
}
|
|
41
|
+
function asOptionalText(value) {
|
|
42
|
+
const text = asText(value);
|
|
43
|
+
return text.length > 0 ? text : null;
|
|
44
|
+
}
|
|
45
|
+
function parseSite(channel) {
|
|
46
|
+
return {
|
|
47
|
+
title: asText(channel.title),
|
|
48
|
+
link: asText(channel.link),
|
|
49
|
+
description: asText(channel.description),
|
|
50
|
+
baseSiteUrl: asText(channel["wp:base_site_url"]),
|
|
51
|
+
baseBlogUrl: asText(channel["wp:base_blog_url"]),
|
|
52
|
+
language: asOptionalText(channel.language)
|
|
53
|
+
};
|
|
54
|
+
}
|
|
55
|
+
function parseAuthors(channel) {
|
|
56
|
+
const rows = channel["wp:author"] ?? [];
|
|
57
|
+
return rows.map((row) => ({
|
|
58
|
+
wpId: parseIntOrZero(asText(row["wp:author_id"])),
|
|
59
|
+
login: asText(row["wp:author_login"]),
|
|
60
|
+
email: asText(row["wp:author_email"]),
|
|
61
|
+
displayName: asText(row["wp:author_display_name"]),
|
|
62
|
+
description: asOptionalText(row["wp:author_description"])
|
|
63
|
+
}));
|
|
64
|
+
}
|
|
65
|
+
function parseChannelTerms(channel) {
|
|
66
|
+
const out = [];
|
|
67
|
+
for (const row of channel["wp:category"] ?? []) {
|
|
68
|
+
out.push({
|
|
69
|
+
taxonomy: "category",
|
|
70
|
+
slug: asText(row["wp:category_nicename"]),
|
|
71
|
+
name: asText(row["wp:cat_name"])
|
|
72
|
+
});
|
|
73
|
+
}
|
|
74
|
+
for (const row of channel["wp:tag"] ?? []) {
|
|
75
|
+
out.push({
|
|
76
|
+
taxonomy: "post_tag",
|
|
77
|
+
slug: asText(row["wp:tag_slug"]),
|
|
78
|
+
name: asText(row["wp:tag_name"])
|
|
79
|
+
});
|
|
80
|
+
}
|
|
81
|
+
for (const row of channel["wp:term"] ?? []) {
|
|
82
|
+
const taxonomy = asText(row["wp:term_taxonomy"]) || asText(row["wp:taxonomy"]);
|
|
83
|
+
if (!taxonomy) continue;
|
|
84
|
+
out.push({
|
|
85
|
+
taxonomy,
|
|
86
|
+
slug: asText(row["wp:term_slug"]),
|
|
87
|
+
name: asText(row["wp:term_name"])
|
|
88
|
+
});
|
|
89
|
+
}
|
|
90
|
+
return out;
|
|
91
|
+
}
|
|
92
|
+
function parseRecords(channel) {
|
|
93
|
+
const items = channel.item ?? [];
|
|
94
|
+
return items.map((item) => parseRecord(item));
|
|
95
|
+
}
|
|
96
|
+
function parseRecord(item) {
|
|
97
|
+
const meta = parseMeta(item);
|
|
98
|
+
const status = coerceStatus(asText(item["wp:status"]));
|
|
99
|
+
const wpType = asText(item["wp:post_type"]);
|
|
100
|
+
const rawContent = asText(item["content:encoded"]);
|
|
101
|
+
const attachmentUrl = asText(item["wp:attachment_url"]);
|
|
102
|
+
return {
|
|
103
|
+
wpId: parseIntOrZero(asText(item["wp:post_id"])),
|
|
104
|
+
wpType,
|
|
105
|
+
status,
|
|
106
|
+
slug: asText(item["wp:post_name"]),
|
|
107
|
+
title: asText(item.title),
|
|
108
|
+
excerpt: asOptionalText(item["excerpt:encoded"]),
|
|
109
|
+
rawContent,
|
|
110
|
+
wpAuthorLogin: asText(item["dc:creator"]),
|
|
111
|
+
publishedAt: asText(item["wp:post_date_gmt"]),
|
|
112
|
+
updatedAt: asText(item["wp:post_modified_gmt"]),
|
|
113
|
+
terms: parseItemCategories(item),
|
|
114
|
+
meta,
|
|
115
|
+
mediaRefs: parseMediaRefs({ rawContent, attachmentUrl, wpType, meta }),
|
|
116
|
+
comments: parseComments(item)
|
|
117
|
+
};
|
|
118
|
+
}
|
|
119
|
+
function parseItemCategories(item) {
|
|
120
|
+
const out = [];
|
|
121
|
+
for (const row of item.category ?? []) {
|
|
122
|
+
const taxonomy = row["@_domain"] ?? "category";
|
|
123
|
+
out.push({
|
|
124
|
+
taxonomy,
|
|
125
|
+
slug: row["@_nicename"] ?? "",
|
|
126
|
+
// Real WXR exports wrap the term name in CDATA, but tests
|
|
127
|
+
// and hand-written XML may use plain text. Read both.
|
|
128
|
+
name: row.__cdata ?? row["#text"] ?? ""
|
|
129
|
+
});
|
|
130
|
+
}
|
|
131
|
+
return out;
|
|
132
|
+
}
|
|
133
|
+
function parseMeta(item) {
|
|
134
|
+
const out = {};
|
|
135
|
+
for (const row of item["wp:postmeta"] ?? []) {
|
|
136
|
+
const key = asText(row["wp:meta_key"]);
|
|
137
|
+
if (!key) continue;
|
|
138
|
+
out[key] = asText(row["wp:meta_value"]);
|
|
139
|
+
}
|
|
140
|
+
return out;
|
|
141
|
+
}
|
|
142
|
+
function parseComments(item) {
|
|
143
|
+
const rows = item["wp:comment"] ?? [];
|
|
144
|
+
return rows.map((row) => ({
|
|
145
|
+
wpId: parseIntOrZero(asText(row["wp:comment_id"])),
|
|
146
|
+
parentWpId: parseOptionalInt(asText(row["wp:comment_parent"])),
|
|
147
|
+
authorName: asText(row["wp:comment_author"]),
|
|
148
|
+
authorEmail: asOptionalText(row["wp:comment_author_email"]),
|
|
149
|
+
authorUrl: asOptionalText(row["wp:comment_author_url"]),
|
|
150
|
+
date: asText(row["wp:comment_date_gmt"]),
|
|
151
|
+
content: asText(row["wp:comment_content"]),
|
|
152
|
+
approved: asText(row["wp:comment_approved"]) === "1"
|
|
153
|
+
}));
|
|
154
|
+
}
|
|
155
|
+
var IMG_TAG_RE = /<img\b[^>]*>/gi;
|
|
156
|
+
var SRC_ATTR_RE = /\bsrc\s*=\s*["']([^"']+)["']/i;
|
|
157
|
+
var WP_ATTACHMENT_ID_RE = /wp-image-(\d+)/i;
|
|
158
|
+
function parseMediaRefs(args) {
|
|
159
|
+
const refs = [];
|
|
160
|
+
const thumbId = args.meta._thumbnail_id;
|
|
161
|
+
if (thumbId) {
|
|
162
|
+
refs.push({
|
|
163
|
+
sourceUrl: "",
|
|
164
|
+
wpAttachmentId: parseIntOrZero(thumbId),
|
|
165
|
+
kind: "featured"
|
|
166
|
+
});
|
|
167
|
+
}
|
|
168
|
+
const seen = /* @__PURE__ */ new Set();
|
|
169
|
+
for (const tagMatch of args.rawContent.matchAll(IMG_TAG_RE)) {
|
|
170
|
+
const tag = tagMatch[0];
|
|
171
|
+
const srcMatch = SRC_ATTR_RE.exec(tag);
|
|
172
|
+
const url = srcMatch?.[1];
|
|
173
|
+
if (!url || seen.has(url)) continue;
|
|
174
|
+
seen.add(url);
|
|
175
|
+
const idMatch = WP_ATTACHMENT_ID_RE.exec(tag);
|
|
176
|
+
refs.push({
|
|
177
|
+
sourceUrl: url,
|
|
178
|
+
wpAttachmentId: idMatch ? parseIntOrZero(idMatch[1] ?? "") : null,
|
|
179
|
+
kind: "inline"
|
|
180
|
+
});
|
|
181
|
+
}
|
|
182
|
+
if (args.wpType === "attachment" && args.attachmentUrl) {
|
|
183
|
+
refs.push({
|
|
184
|
+
sourceUrl: args.attachmentUrl,
|
|
185
|
+
wpAttachmentId: null,
|
|
186
|
+
kind: "inline"
|
|
187
|
+
});
|
|
188
|
+
}
|
|
189
|
+
return refs;
|
|
190
|
+
}
|
|
191
|
+
function coerceStatus(raw) {
|
|
192
|
+
switch (raw) {
|
|
193
|
+
case "publish":
|
|
194
|
+
case "draft":
|
|
195
|
+
case "private":
|
|
196
|
+
case "pending":
|
|
197
|
+
case "trash":
|
|
198
|
+
case "auto-draft":
|
|
199
|
+
return raw;
|
|
200
|
+
default:
|
|
201
|
+
return "draft";
|
|
202
|
+
}
|
|
203
|
+
}
|
|
204
|
+
function parseIntOrZero(value) {
|
|
205
|
+
const n = Number.parseInt(value, 10);
|
|
206
|
+
return Number.isFinite(n) ? n : 0;
|
|
207
|
+
}
|
|
208
|
+
function parseOptionalInt(value) {
|
|
209
|
+
if (!value) return null;
|
|
210
|
+
const n = Number.parseInt(value, 10);
|
|
211
|
+
return Number.isFinite(n) && n > 0 ? n : null;
|
|
212
|
+
}
|
|
213
|
+
|
|
214
|
+
// src/parse/wxr-stream.ts
|
|
215
|
+
import { createReadStream } from "fs";
|
|
216
|
+
var DEFAULT_HIGH_WATER_MARK = 64 * 1024;
|
|
217
|
+
var DEFAULT_MAX_ITEM_BYTES = 64 * 1024 * 1024;
|
|
218
|
+
var ITEM_OPEN = "<item>";
|
|
219
|
+
var ITEM_CLOSE = "</item>";
|
|
220
|
+
var WpImportStreamError = class extends Error {
|
|
221
|
+
constructor(message) {
|
|
222
|
+
super(message);
|
|
223
|
+
this.name = "WpImportStreamError";
|
|
224
|
+
}
|
|
225
|
+
};
|
|
226
|
+
async function parseWxrStream(path, options = {}) {
|
|
227
|
+
const highWaterMark = options.highWaterMark ?? DEFAULT_HIGH_WATER_MARK;
|
|
228
|
+
const maxItemBytes = options.maxItemBytes ?? DEFAULT_MAX_ITEM_BYTES;
|
|
229
|
+
const stream = createReadStream(path, { encoding: "utf8", highWaterMark });
|
|
230
|
+
const reader = stream[Symbol.asyncIterator]();
|
|
231
|
+
let buffer = "";
|
|
232
|
+
let firstItemAt = -1;
|
|
233
|
+
while (firstItemAt < 0) {
|
|
234
|
+
const next = await reader.next();
|
|
235
|
+
if (next.done) break;
|
|
236
|
+
buffer += next.value;
|
|
237
|
+
firstItemAt = buffer.indexOf(ITEM_OPEN);
|
|
238
|
+
if (buffer.length > maxItemBytes && firstItemAt < 0) {
|
|
239
|
+
throw new WpImportStreamError(
|
|
240
|
+
`header exceeded ${maxItemBytes} bytes without finding any <item> \u2014 is this a WXR file?`
|
|
241
|
+
);
|
|
242
|
+
}
|
|
243
|
+
}
|
|
244
|
+
const headerXml = firstItemAt >= 0 ? buffer.slice(0, firstItemAt) + "</channel></rss>" : buffer + "</channel></rss>";
|
|
245
|
+
const headerBundle = parseWxr(headerXml);
|
|
246
|
+
const tail = firstItemAt >= 0 ? buffer.slice(firstItemAt) : "";
|
|
247
|
+
async function* iterate() {
|
|
248
|
+
let local = tail;
|
|
249
|
+
let exhausted = false;
|
|
250
|
+
while (true) {
|
|
251
|
+
const open = local.indexOf(ITEM_OPEN);
|
|
252
|
+
if (open < 0) {
|
|
253
|
+
if (exhausted) return;
|
|
254
|
+
const next = await reader.next();
|
|
255
|
+
if (next.done) {
|
|
256
|
+
exhausted = true;
|
|
257
|
+
continue;
|
|
258
|
+
}
|
|
259
|
+
local += next.value;
|
|
260
|
+
if (local.length > maxItemBytes) {
|
|
261
|
+
throw new WpImportStreamError(
|
|
262
|
+
`WXR item exceeded ${maxItemBytes} bytes \u2014 abort. Likely an embedded base64 payload that won't fit; raise --max-item-bytes if you really need it.`
|
|
263
|
+
);
|
|
264
|
+
}
|
|
265
|
+
continue;
|
|
266
|
+
}
|
|
267
|
+
const close = local.indexOf(ITEM_CLOSE, open);
|
|
268
|
+
if (close < 0) {
|
|
269
|
+
const next = await reader.next();
|
|
270
|
+
if (next.done) {
|
|
271
|
+
if (exhausted) return;
|
|
272
|
+
exhausted = true;
|
|
273
|
+
continue;
|
|
274
|
+
}
|
|
275
|
+
local += next.value;
|
|
276
|
+
if (local.length > maxItemBytes) {
|
|
277
|
+
throw new WpImportStreamError(
|
|
278
|
+
`WXR item exceeded ${maxItemBytes} bytes \u2014 abort.`
|
|
279
|
+
);
|
|
280
|
+
}
|
|
281
|
+
continue;
|
|
282
|
+
}
|
|
283
|
+
const itemEnd = close + ITEM_CLOSE.length;
|
|
284
|
+
const itemSlice = local.slice(open, itemEnd);
|
|
285
|
+
local = local.slice(itemEnd);
|
|
286
|
+
const wrapped = wrapInChannel(itemSlice);
|
|
287
|
+
const single = parseWxr(wrapped);
|
|
288
|
+
const record = single.records[0];
|
|
289
|
+
if (record) yield record;
|
|
290
|
+
}
|
|
291
|
+
}
|
|
292
|
+
return {
|
|
293
|
+
header: {
|
|
294
|
+
site: headerBundle.site,
|
|
295
|
+
authors: headerBundle.authors,
|
|
296
|
+
terms: headerBundle.terms
|
|
297
|
+
},
|
|
298
|
+
items: { [Symbol.asyncIterator]: iterate }
|
|
299
|
+
};
|
|
300
|
+
}
|
|
301
|
+
function wrapInChannel(itemXml) {
|
|
302
|
+
return `<?xml version="1.0" encoding="UTF-8"?><rss version="2.0" xmlns:excerpt="http://wordpress.org/export/1.2/excerpt/" xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:wp="http://wordpress.org/export/1.2/"><channel><title></title><link></link><description></description><wp:base_site_url></wp:base_site_url><wp:base_blog_url></wp:base_blog_url>` + itemXml + `</channel></rss>`;
|
|
303
|
+
}
|
|
304
|
+
|
|
305
|
+
// src/cli/index.ts
|
|
306
|
+
import { readFileSync as readFileSync2 } from "fs";
|
|
307
|
+
import { parseArgs } from "util";
|
|
308
|
+
|
|
309
|
+
// src/cli/config.ts
|
|
310
|
+
import { readFileSync } from "fs";
|
|
311
|
+
var WpImportConfigError = class extends Error {
|
|
312
|
+
constructor(message) {
|
|
313
|
+
super(message);
|
|
314
|
+
this.name = "WpImportConfigError";
|
|
315
|
+
}
|
|
316
|
+
};
|
|
317
|
+
function loadConfigFromPath(path) {
|
|
318
|
+
let raw;
|
|
319
|
+
try {
|
|
320
|
+
raw = readFileSync(path, "utf8");
|
|
321
|
+
} catch (err) {
|
|
322
|
+
throw new WpImportConfigError(
|
|
323
|
+
`cannot read config ${path}: ${err instanceof Error ? err.message : String(err)}`
|
|
324
|
+
);
|
|
325
|
+
}
|
|
326
|
+
return parseConfig(raw, path);
|
|
327
|
+
}
|
|
328
|
+
function parseConfig(source, displayPath = "<inline>") {
|
|
329
|
+
let parsed;
|
|
330
|
+
try {
|
|
331
|
+
parsed = JSON.parse(source);
|
|
332
|
+
} catch (err) {
|
|
333
|
+
throw new WpImportConfigError(
|
|
334
|
+
`${displayPath}: invalid JSON \u2014 ${err instanceof Error ? err.message : String(err)}`
|
|
335
|
+
);
|
|
336
|
+
}
|
|
337
|
+
if (!parsed || typeof parsed !== "object" || Array.isArray(parsed)) {
|
|
338
|
+
throw new WpImportConfigError(`${displayPath}: top-level value must be a JSON object`);
|
|
339
|
+
}
|
|
340
|
+
const root = parsed;
|
|
341
|
+
const mappings = root.mappings;
|
|
342
|
+
const collectionMappings = {};
|
|
343
|
+
if (mappings !== void 0) {
|
|
344
|
+
if (!Array.isArray(mappings)) {
|
|
345
|
+
throw new WpImportConfigError(`${displayPath}: "mappings" must be an array`);
|
|
346
|
+
}
|
|
347
|
+
for (const [i, entry] of mappings.entries()) {
|
|
348
|
+
const mapping = parseMapping(entry, `${displayPath}#mappings[${i}]`);
|
|
349
|
+
if (collectionMappings[mapping.wpType]) {
|
|
350
|
+
throw new WpImportConfigError(
|
|
351
|
+
`${displayPath}: duplicate mapping for wpType "${mapping.wpType}"`
|
|
352
|
+
);
|
|
353
|
+
}
|
|
354
|
+
collectionMappings[mapping.wpType] = {
|
|
355
|
+
collection: mapping.collection,
|
|
356
|
+
fieldOverrides: mapping.fieldOverrides
|
|
357
|
+
};
|
|
358
|
+
}
|
|
359
|
+
}
|
|
360
|
+
return { collectionMappings };
|
|
361
|
+
}
|
|
362
|
+
function parseMapping(value, displayPath) {
|
|
363
|
+
if (!value || typeof value !== "object" || Array.isArray(value)) {
|
|
364
|
+
throw new WpImportConfigError(`${displayPath}: each mapping must be an object`);
|
|
365
|
+
}
|
|
366
|
+
const row = value;
|
|
367
|
+
const wpType = row.wpType ?? row.wp_type;
|
|
368
|
+
const collection = row.collection;
|
|
369
|
+
if (typeof wpType !== "string" || wpType.length === 0) {
|
|
370
|
+
throw new WpImportConfigError(`${displayPath}: "wpType" is required and must be a non-empty string`);
|
|
371
|
+
}
|
|
372
|
+
if (typeof collection !== "string" || collection.length === 0) {
|
|
373
|
+
throw new WpImportConfigError(
|
|
374
|
+
`${displayPath}: "collection" is required and must be a non-empty string`
|
|
375
|
+
);
|
|
376
|
+
}
|
|
377
|
+
let fieldOverrides;
|
|
378
|
+
const rawOverrides = row.fieldOverrides ?? row.field_overrides;
|
|
379
|
+
if (rawOverrides !== void 0) {
|
|
380
|
+
if (!rawOverrides || typeof rawOverrides !== "object" || Array.isArray(rawOverrides)) {
|
|
381
|
+
throw new WpImportConfigError(`${displayPath}: "fieldOverrides" must be an object`);
|
|
382
|
+
}
|
|
383
|
+
fieldOverrides = {};
|
|
384
|
+
for (const [k, v] of Object.entries(rawOverrides)) {
|
|
385
|
+
if (typeof v !== "string" || v.length === 0) {
|
|
386
|
+
throw new WpImportConfigError(
|
|
387
|
+
`${displayPath}: fieldOverrides["${k}"] must be a non-empty string`
|
|
388
|
+
);
|
|
389
|
+
}
|
|
390
|
+
fieldOverrides[k] = v;
|
|
391
|
+
}
|
|
392
|
+
}
|
|
393
|
+
return { wpType, collection, fieldOverrides };
|
|
394
|
+
}
|
|
395
|
+
|
|
396
|
+
// src/cli/format.ts
|
|
397
|
+
function formatSummary(args) {
|
|
398
|
+
const { bundle, sourcePath, dryRun } = args;
|
|
399
|
+
const lines = [];
|
|
400
|
+
lines.push(`WordPress import${dryRun ? " \u2014 dry run" : ""}`);
|
|
401
|
+
lines.push("");
|
|
402
|
+
lines.push(`Source: ${sourcePath}`);
|
|
403
|
+
lines.push(`Site: ${bundle.site.title || "(untitled)"}`);
|
|
404
|
+
if (bundle.site.link) lines.push(` ${bundle.site.link}`);
|
|
405
|
+
if (bundle.site.language) lines.push(`Lang: ${bundle.site.language}`);
|
|
406
|
+
lines.push("");
|
|
407
|
+
lines.push(`Authors (${bundle.authors.length})`);
|
|
408
|
+
for (const author of bundle.authors) {
|
|
409
|
+
const email = author.email ? ` <${author.email}>` : "";
|
|
410
|
+
const display = author.displayName ? ` ${author.displayName}` : "";
|
|
411
|
+
lines.push(` ${author.login}${email}${display}`);
|
|
412
|
+
}
|
|
413
|
+
if (bundle.authors.length === 0) {
|
|
414
|
+
lines.push(" (none)");
|
|
415
|
+
}
|
|
416
|
+
lines.push("");
|
|
417
|
+
const taxoCounts = countByKey(bundle.terms, (t) => t.taxonomy);
|
|
418
|
+
lines.push(`Channel taxonomies (${taxoCounts.size})`);
|
|
419
|
+
for (const [taxonomy, count] of sortedEntries(taxoCounts)) {
|
|
420
|
+
lines.push(` ${taxonomy.padEnd(12)} ${count}`);
|
|
421
|
+
}
|
|
422
|
+
if (taxoCounts.size === 0) {
|
|
423
|
+
lines.push(" (none)");
|
|
424
|
+
}
|
|
425
|
+
lines.push("");
|
|
426
|
+
const typeCounts = countByKey(bundle.records, (r) => r.wpType);
|
|
427
|
+
lines.push(`Records (${bundle.records.length})`);
|
|
428
|
+
for (const [wpType, count] of sortedEntries(typeCounts)) {
|
|
429
|
+
const annotation = typeAnnotation(wpType, bundle.records);
|
|
430
|
+
lines.push(` ${wpType.padEnd(12)} ${count}${annotation ? ` ${annotation}` : ""}`);
|
|
431
|
+
}
|
|
432
|
+
if (typeCounts.size === 0) {
|
|
433
|
+
lines.push(" (no items)");
|
|
434
|
+
}
|
|
435
|
+
const mediaUrls = collectInlineMediaUrls(bundle.records);
|
|
436
|
+
const featuredCount = bundle.records.reduce(
|
|
437
|
+
(acc, r) => acc + r.mediaRefs.filter((m) => m.kind === "featured").length,
|
|
438
|
+
0
|
|
439
|
+
);
|
|
440
|
+
lines.push("");
|
|
441
|
+
lines.push(`Inline media refs (${mediaUrls.size} unique URL${mediaUrls.size === 1 ? "" : "s"})`);
|
|
442
|
+
lines.push(`Featured images (${featuredCount})`);
|
|
443
|
+
const totalComments = bundle.records.reduce((acc, r) => acc + r.comments.length, 0);
|
|
444
|
+
if (totalComments > 0) {
|
|
445
|
+
const recordsWithComments = bundle.records.filter((r) => r.comments.length > 0).length;
|
|
446
|
+
lines.push("");
|
|
447
|
+
lines.push(
|
|
448
|
+
`Comments: ${totalComments} across ${recordsWithComments} record${recordsWithComments === 1 ? "" : "s"}`
|
|
449
|
+
);
|
|
450
|
+
}
|
|
451
|
+
lines.push("");
|
|
452
|
+
if (dryRun) {
|
|
453
|
+
lines.push("This was a dry run. Pass --apply to write to the database.");
|
|
454
|
+
} else {
|
|
455
|
+
lines.push("Pass --apply to write to the database, or omit it to keep this summary view.");
|
|
456
|
+
}
|
|
457
|
+
return lines.join("\n");
|
|
458
|
+
}
|
|
459
|
+
function typeAnnotation(wpType, _records) {
|
|
460
|
+
if (wpType === "attachment") {
|
|
461
|
+
return "(downloaded + uploaded by the media pipeline)";
|
|
462
|
+
}
|
|
463
|
+
return "";
|
|
464
|
+
}
|
|
465
|
+
function countByKey(rows, keyOf) {
|
|
466
|
+
const out = /* @__PURE__ */ new Map();
|
|
467
|
+
for (const row of rows) {
|
|
468
|
+
const key = keyOf(row);
|
|
469
|
+
out.set(key, (out.get(key) ?? 0) + 1);
|
|
470
|
+
}
|
|
471
|
+
return out;
|
|
472
|
+
}
|
|
473
|
+
function sortedEntries(map) {
|
|
474
|
+
return Array.from(map.entries()).sort(([a], [b]) => a.localeCompare(b));
|
|
475
|
+
}
|
|
476
|
+
function collectInlineMediaUrls(records) {
|
|
477
|
+
const out = /* @__PURE__ */ new Set();
|
|
478
|
+
for (const record of records) {
|
|
479
|
+
for (const ref of record.mediaRefs) {
|
|
480
|
+
if (ref.kind === "inline" && ref.sourceUrl) {
|
|
481
|
+
out.add(ref.sourceUrl);
|
|
482
|
+
}
|
|
483
|
+
}
|
|
484
|
+
}
|
|
485
|
+
return out;
|
|
486
|
+
}
|
|
487
|
+
function formatApplyReport(report, args) {
|
|
488
|
+
const lines = [];
|
|
489
|
+
lines.push(args.dryRun ? "Apply \u2014 dry run" : "Apply");
|
|
490
|
+
lines.push("");
|
|
491
|
+
lines.push(`${args.dryRun ? "Would write" : "Written"}: ${report.applied.length}`);
|
|
492
|
+
for (const row of report.applied) {
|
|
493
|
+
lines.push(` ${row.collection.padEnd(8)} ${row.slug} "${row.title}"`);
|
|
494
|
+
}
|
|
495
|
+
if (report.applied.length === 0) {
|
|
496
|
+
lines.push(" (none)");
|
|
497
|
+
}
|
|
498
|
+
lines.push("");
|
|
499
|
+
lines.push(`Skipped: ${report.skipped.length}`);
|
|
500
|
+
const reasonCounts = /* @__PURE__ */ new Map();
|
|
501
|
+
for (const row of report.skipped) {
|
|
502
|
+
reasonCounts.set(row.reason, (reasonCounts.get(row.reason) ?? 0) + 1);
|
|
503
|
+
}
|
|
504
|
+
for (const [reason, count] of [...reasonCounts.entries()].sort()) {
|
|
505
|
+
lines.push(` ${count.toString().padStart(3)} ${reason}`);
|
|
506
|
+
}
|
|
507
|
+
if (report.skipped.length === 0) {
|
|
508
|
+
lines.push(" (none)");
|
|
509
|
+
}
|
|
510
|
+
if (report.errors.length > 0) {
|
|
511
|
+
lines.push("");
|
|
512
|
+
lines.push(`Errors: ${report.errors.length}`);
|
|
513
|
+
for (const err of report.errors) {
|
|
514
|
+
lines.push(` ${err.slug}: ${err.message}`);
|
|
515
|
+
}
|
|
516
|
+
}
|
|
517
|
+
lines.push("");
|
|
518
|
+
if (report.media) {
|
|
519
|
+
const m = report.media;
|
|
520
|
+
lines.push(
|
|
521
|
+
`Media: ${m.uploaded} uploaded, ${m.reused} reused (hash match), ${m.skipped} skipped (dry run), ${m.errors.length} errors`
|
|
522
|
+
);
|
|
523
|
+
for (const err of m.errors) {
|
|
524
|
+
lines.push(` ${err.url}: ${err.reason}`);
|
|
525
|
+
}
|
|
526
|
+
} else {
|
|
527
|
+
lines.push("Media: pipeline not run (no upload hook supplied)");
|
|
528
|
+
}
|
|
529
|
+
lines.push("");
|
|
530
|
+
if (report.taxonomies) {
|
|
531
|
+
const t = report.taxonomies;
|
|
532
|
+
lines.push(
|
|
533
|
+
`Taxonomies: ${t.termIds.size} resolved, ${t.skipped.length} skipped, ${t.errors.length} errors`
|
|
534
|
+
);
|
|
535
|
+
for (const err of t.errors) {
|
|
536
|
+
lines.push(` ${err.key.taxonomy}/${err.key.slug}: ${err.reason}`);
|
|
537
|
+
}
|
|
538
|
+
for (const skip of t.skipped) {
|
|
539
|
+
lines.push(` skipped ${skip.taxonomy}/${skip.slug}`);
|
|
540
|
+
}
|
|
541
|
+
} else {
|
|
542
|
+
lines.push("Taxonomies: resolver not supplied \u2014 terms dropped");
|
|
543
|
+
}
|
|
544
|
+
lines.push("");
|
|
545
|
+
if (report.comments) {
|
|
546
|
+
const c = report.comments;
|
|
547
|
+
lines.push(
|
|
548
|
+
`Comments: ${c.applied} imported, ${c.skippedUnapproved} unapproved, ${c.skippedByResume} resume-skipped, ${c.errors.length} errors`
|
|
549
|
+
);
|
|
550
|
+
for (const err of c.errors) {
|
|
551
|
+
lines.push(` #${err.wpCommentId}: ${err.reason}`);
|
|
552
|
+
}
|
|
553
|
+
} else {
|
|
554
|
+
lines.push("Comments: deps not supplied \u2014 comments dropped");
|
|
555
|
+
}
|
|
556
|
+
lines.push("");
|
|
557
|
+
if (report.authors) {
|
|
558
|
+
const a = report.authors;
|
|
559
|
+
lines.push(
|
|
560
|
+
`Authors: ${a.authorIds.size} resolved, ${a.skipped.length} skipped, ${a.errors.length} errors`
|
|
561
|
+
);
|
|
562
|
+
for (const err of a.errors) {
|
|
563
|
+
lines.push(` ${err.login}: ${err.reason}`);
|
|
564
|
+
}
|
|
565
|
+
for (const skip of a.skipped) {
|
|
566
|
+
lines.push(` skipped ${skip}`);
|
|
567
|
+
}
|
|
568
|
+
} else {
|
|
569
|
+
lines.push("Authors: resolver not supplied \u2014 posts attributed to the import actor");
|
|
570
|
+
}
|
|
571
|
+
if (report.notes.length > 0) {
|
|
572
|
+
lines.push("");
|
|
573
|
+
lines.push("Notes");
|
|
574
|
+
for (const note of report.notes) {
|
|
575
|
+
lines.push(` - ${note}`);
|
|
576
|
+
}
|
|
577
|
+
}
|
|
578
|
+
return lines.join("\n");
|
|
579
|
+
}
|
|
580
|
+
|
|
581
|
+
// src/cli/index.ts
|
|
582
|
+
var DEFAULT_IO = {
|
|
583
|
+
stdout: (line) => console.log(line),
|
|
584
|
+
stderr: (line) => console.error(line)
|
|
585
|
+
};
|
|
586
|
+
var CLI_OPTIONS = {
|
|
587
|
+
"dry-run": { type: "boolean", default: true },
|
|
588
|
+
apply: { type: "boolean", default: false },
|
|
589
|
+
/**
|
|
590
|
+
* Phase 21.8 — opt out of staff-user creation. With this flag the
|
|
591
|
+
* shim swaps in a resolver that returns null for every author, so
|
|
592
|
+
* imported posts have no `author` set and are attributed to the
|
|
593
|
+
* import operator via `createdBy` / `updatedBy`.
|
|
594
|
+
*/
|
|
595
|
+
"no-create-authors": { type: "boolean", default: false },
|
|
596
|
+
/**
|
|
597
|
+
* Phase 21.9 — path to a JSON config file that declares custom-
|
|
598
|
+
* post-type → collection mappings and optional postmeta-key →
|
|
599
|
+
* field-name overrides. Records whose `wpType` isn't in the
|
|
600
|
+
* config (and isn't post / page / attachment) are skipped with a
|
|
601
|
+
* warning.
|
|
602
|
+
*/
|
|
603
|
+
config: { type: "string" },
|
|
604
|
+
/**
|
|
605
|
+
* Phase 21.12 — escalate sub-pipeline warnings (media 4xx, MIME
|
|
606
|
+
* reject, taxonomy/author resolver failures) into errors so the
|
|
607
|
+
* CLI exits non-zero. Useful for "clean import or fail" scripts.
|
|
608
|
+
*/
|
|
609
|
+
strict: { type: "boolean", default: false },
|
|
610
|
+
/**
|
|
611
|
+
* Phase 21.12 — rewrite the existing document instead of
|
|
612
|
+
* skipping when a slug collides. Comments are NOT re-imported on
|
|
613
|
+
* an update pass — that needs the per-comment idempotency keys
|
|
614
|
+
* landing in 21.14.
|
|
615
|
+
*/
|
|
616
|
+
update: { type: "boolean", default: false },
|
|
617
|
+
/**
|
|
618
|
+
* Phase 21.12 — write a side-by-side HTML/Lexical diff for every
|
|
619
|
+
* imported record so the operator can spot-check the conversion.
|
|
620
|
+
* Defaults to writing `<wxr>.report.html` next to the source.
|
|
621
|
+
*/
|
|
622
|
+
"report-html": { type: "boolean", default: false },
|
|
623
|
+
/**
|
|
624
|
+
* Phase 21.12 — override the default `<wxr>.report.html` path.
|
|
625
|
+
* Implies `--report-html`; passing a path without the flag is
|
|
626
|
+
* fine too.
|
|
627
|
+
*/
|
|
628
|
+
"report-html-path": { type: "string" },
|
|
629
|
+
/**
|
|
630
|
+
* Phase 21.14 — load + persist a sidecar resume marker so re-runs
|
|
631
|
+
* skip work that already landed and dedupe comments by
|
|
632
|
+
* `wpCommentId`. Defaults the marker path to
|
|
633
|
+
* `<wxr>.import-state.json`; override with `--resume-state`.
|
|
634
|
+
*/
|
|
635
|
+
resume: { type: "boolean", default: false },
|
|
636
|
+
/** Phase 21.14 — override the default resume-marker path. */
|
|
637
|
+
"resume-state": { type: "string" },
|
|
638
|
+
help: { type: "boolean", short: "h" }
|
|
639
|
+
};
|
|
640
|
+
async function runCli(argv, io = DEFAULT_IO, hooks) {
|
|
641
|
+
let parsed;
|
|
642
|
+
try {
|
|
643
|
+
parsed = parseArgs({
|
|
644
|
+
args: argv,
|
|
645
|
+
options: CLI_OPTIONS,
|
|
646
|
+
allowPositionals: true
|
|
647
|
+
});
|
|
648
|
+
} catch (error) {
|
|
649
|
+
io.stderr(error instanceof Error ? `wp-import: ${error.message}` : "wp-import: bad arguments");
|
|
650
|
+
io.stderr("");
|
|
651
|
+
io.stderr(USAGE);
|
|
652
|
+
return 2;
|
|
653
|
+
}
|
|
654
|
+
if (parsed.values.help) {
|
|
655
|
+
io.stdout(USAGE);
|
|
656
|
+
return 0;
|
|
657
|
+
}
|
|
658
|
+
const sourcePath = parsed.positionals[0];
|
|
659
|
+
if (!sourcePath) {
|
|
660
|
+
io.stderr("wp-import: missing path to a WXR file");
|
|
661
|
+
io.stderr("");
|
|
662
|
+
io.stderr(USAGE);
|
|
663
|
+
return 2;
|
|
664
|
+
}
|
|
665
|
+
let xml;
|
|
666
|
+
try {
|
|
667
|
+
xml = readFileSync2(sourcePath, "utf8");
|
|
668
|
+
} catch (error) {
|
|
669
|
+
io.stderr(
|
|
670
|
+
`wp-import: cannot read ${sourcePath}: ${error instanceof Error ? error.message : String(error)}`
|
|
671
|
+
);
|
|
672
|
+
return 1;
|
|
673
|
+
}
|
|
674
|
+
let bundle;
|
|
675
|
+
try {
|
|
676
|
+
bundle = parseWxr(xml);
|
|
677
|
+
} catch (error) {
|
|
678
|
+
io.stderr(`wp-import: parse failed: ${error instanceof Error ? error.message : String(error)}`);
|
|
679
|
+
return 1;
|
|
680
|
+
}
|
|
681
|
+
let collectionMappings = {};
|
|
682
|
+
if (parsed.values.config) {
|
|
683
|
+
try {
|
|
684
|
+
collectionMappings = loadConfigFromPath(parsed.values.config).collectionMappings;
|
|
685
|
+
} catch (error) {
|
|
686
|
+
io.stderr(
|
|
687
|
+
error instanceof WpImportConfigError ? `wp-import: ${error.message}` : `wp-import: ${error instanceof Error ? error.message : String(error)}`
|
|
688
|
+
);
|
|
689
|
+
return 1;
|
|
690
|
+
}
|
|
691
|
+
}
|
|
692
|
+
if (!parsed.values.apply) {
|
|
693
|
+
io.stdout(formatSummary({ bundle, sourcePath, dryRun: parsed.values["dry-run"] }));
|
|
694
|
+
return 0;
|
|
695
|
+
}
|
|
696
|
+
if (!hooks) {
|
|
697
|
+
io.stderr(
|
|
698
|
+
"wp-import: --apply requires the shim that bootstraps core services. Run via `pnpm wp-import` from apps/web, not directly."
|
|
699
|
+
);
|
|
700
|
+
return 1;
|
|
701
|
+
}
|
|
702
|
+
let actor;
|
|
703
|
+
try {
|
|
704
|
+
actor = await hooks.resolveActor();
|
|
705
|
+
} catch (error) {
|
|
706
|
+
io.stderr(
|
|
707
|
+
`wp-import: cannot resolve admin actor: ${error instanceof Error ? error.message : String(error)}`
|
|
708
|
+
);
|
|
709
|
+
return 1;
|
|
710
|
+
}
|
|
711
|
+
const reportHtmlPathOverride = parsed.values["report-html-path"];
|
|
712
|
+
const reportHtmlPath = reportHtmlPathOverride && reportHtmlPathOverride.length > 0 ? reportHtmlPathOverride : parsed.values["report-html"] ? `${sourcePath}.report.html` : null;
|
|
713
|
+
const resumeStatePathOverride = parsed.values["resume-state"];
|
|
714
|
+
const resumeStatePath = resumeStatePathOverride && resumeStatePathOverride.length > 0 ? resumeStatePathOverride : parsed.values.resume ? `${sourcePath}.import-state.json` : null;
|
|
715
|
+
const report = await hooks.applyBundle(bundle, {
|
|
716
|
+
actor,
|
|
717
|
+
dryRun: parsed.values["dry-run"],
|
|
718
|
+
log: (line) => io.stdout(line),
|
|
719
|
+
createAuthors: !parsed.values["no-create-authors"],
|
|
720
|
+
collectionMappings,
|
|
721
|
+
strict: parsed.values.strict,
|
|
722
|
+
update: parsed.values.update,
|
|
723
|
+
reportHtmlPath,
|
|
724
|
+
resumeStatePath
|
|
725
|
+
});
|
|
726
|
+
io.stdout(formatApplyReport(report, { dryRun: parsed.values["dry-run"] }));
|
|
727
|
+
return report.errors.length > 0 ? 1 : 0;
|
|
728
|
+
}
|
|
729
|
+
var USAGE = `Usage: wp-import <wxr-file> [--apply] [--dry-run] [--strict] [--update] [--no-create-authors] [--report-html] [--report-html-path <path>] [--resume] [--resume-state <path>]
|
|
730
|
+
|
|
731
|
+
Reads a WordPress eXtended RSS export and either prints a summary
|
|
732
|
+
of what would be imported (default) or applies it to the database
|
|
733
|
+
(--apply). With --apply you can still pass --dry-run to walk the
|
|
734
|
+
records and surface skip / collision decisions without writing.
|
|
735
|
+
|
|
736
|
+
Options:
|
|
737
|
+
--apply Run the applier (writes via @nexpress/core).
|
|
738
|
+
Without this flag only the parsed summary
|
|
739
|
+
is printed.
|
|
740
|
+
--dry-run When combined with --apply, walk records
|
|
741
|
+
but skip the actual writes. Useful for
|
|
742
|
+
previewing what the import will do against
|
|
743
|
+
a real DB.
|
|
744
|
+
--no-create-authors Skip creating staff users for WP authors.
|
|
745
|
+
Imported posts come in without an author
|
|
746
|
+
wired and the import operator takes credit
|
|
747
|
+
via createdBy / updatedBy (Phase 21.8).
|
|
748
|
+
--config <path> Path to a JSON config file declaring
|
|
749
|
+
custom-post-type mappings. Each mapping
|
|
750
|
+
routes a wpType into a NexPress collection
|
|
751
|
+
and optionally maps WP postmeta keys to
|
|
752
|
+
collection field names (Phase 21.9).
|
|
753
|
+
--strict Escalate sub-pipeline warnings (media 4xx,
|
|
754
|
+
MIME reject, taxonomy / author resolver
|
|
755
|
+
failures) into errors so the CLI exits
|
|
756
|
+
non-zero (Phase 21.12).
|
|
757
|
+
--update Rewrite the existing document instead of
|
|
758
|
+
skipping when a slug collides. Comments
|
|
759
|
+
are NOT re-imported on an update pass
|
|
760
|
+
(Phase 21.12).
|
|
761
|
+
--report-html Write a side-by-side HTML/Lexical diff of
|
|
762
|
+
every imported record so the operator can
|
|
763
|
+
spot-check the conversion. Defaults to
|
|
764
|
+
<wxr>.report.html (Phase 21.12).
|
|
765
|
+
--report-html-path <path>
|
|
766
|
+
Override the default report path. Implies
|
|
767
|
+
--report-html.
|
|
768
|
+
--resume Read + persist a sidecar resume marker so
|
|
769
|
+
re-runs skip work that already landed and
|
|
770
|
+
dedupe comments by wpCommentId. Defaults
|
|
771
|
+
to <wxr>.import-state.json (Phase 21.14).
|
|
772
|
+
--resume-state <path>
|
|
773
|
+
Override the default resume-marker path.
|
|
774
|
+
Implies --resume.
|
|
775
|
+
-h, --help Show this help message.`;
|
|
776
|
+
|
|
777
|
+
// src/apply/index.ts
|
|
778
|
+
import { findDocuments, saveDocument } from "@nexpress/core";
|
|
779
|
+
|
|
780
|
+
// src/convert/html-to-lexical.ts
|
|
781
|
+
import { NodeType, parse } from "node-html-parser";
|
|
782
|
+
|
|
783
|
+
// src/convert/gutenberg.ts
|
|
784
|
+
var FENCE_RE = /<!--\s*(\/?)wp:([\w/-]+)(\s+(\{[\s\S]*?\}))?\s*(\/)?\s*-->/g;
|
|
785
|
+
function isGutenbergSource(html) {
|
|
786
|
+
return /<!--\s*wp:[\w/-]+/i.test(html);
|
|
787
|
+
}
|
|
788
|
+
function parseGutenbergBlocks(source) {
|
|
789
|
+
const blocks = [];
|
|
790
|
+
const stack = [];
|
|
791
|
+
let cursor = 0;
|
|
792
|
+
FENCE_RE.lastIndex = 0;
|
|
793
|
+
let match;
|
|
794
|
+
while (match = FENCE_RE.exec(source)) {
|
|
795
|
+
const [full, slash, rawName, , attrsJson, selfSlash] = match;
|
|
796
|
+
const isCloser = slash === "/";
|
|
797
|
+
const isSelfClosing = !isCloser && selfSlash === "/";
|
|
798
|
+
const name = (rawName ?? "").trim();
|
|
799
|
+
const attrsRaw = (attrsJson ?? "").trim();
|
|
800
|
+
const attrs = parseAttrsJson(attrsRaw);
|
|
801
|
+
const matchStart = match.index;
|
|
802
|
+
const matchEnd = matchStart + full.length;
|
|
803
|
+
if (isCloser) {
|
|
804
|
+
if (stack.length === 0) {
|
|
805
|
+
cursor = matchEnd;
|
|
806
|
+
continue;
|
|
807
|
+
}
|
|
808
|
+
const top = stack[stack.length - 1];
|
|
809
|
+
if (!top || top.name !== name) {
|
|
810
|
+
cursor = matchEnd;
|
|
811
|
+
continue;
|
|
812
|
+
}
|
|
813
|
+
stack.pop();
|
|
814
|
+
if (stack.length === 0) {
|
|
815
|
+
const innerHtml = source.slice(top.innerStart, matchStart);
|
|
816
|
+
blocks.push({
|
|
817
|
+
name: top.name,
|
|
818
|
+
attrs: top.attrs,
|
|
819
|
+
rawAttrs: top.rawAttrs,
|
|
820
|
+
innerHtml,
|
|
821
|
+
selfClosing: false
|
|
822
|
+
});
|
|
823
|
+
}
|
|
824
|
+
cursor = matchEnd;
|
|
825
|
+
continue;
|
|
826
|
+
}
|
|
827
|
+
if (isSelfClosing) {
|
|
828
|
+
if (stack.length === 0 && matchStart > cursor) {
|
|
829
|
+
const looseHtml = source.slice(cursor, matchStart);
|
|
830
|
+
if (looseHtml.trim().length > 0) {
|
|
831
|
+
blocks.push({
|
|
832
|
+
name: "gutenberg-loose",
|
|
833
|
+
attrs: {},
|
|
834
|
+
rawAttrs: "",
|
|
835
|
+
innerHtml: looseHtml,
|
|
836
|
+
selfClosing: false
|
|
837
|
+
});
|
|
838
|
+
}
|
|
839
|
+
}
|
|
840
|
+
if (stack.length === 0) {
|
|
841
|
+
blocks.push({ name, attrs, rawAttrs: attrsRaw, innerHtml: "", selfClosing: true });
|
|
842
|
+
}
|
|
843
|
+
cursor = matchEnd;
|
|
844
|
+
continue;
|
|
845
|
+
}
|
|
846
|
+
if (stack.length === 0 && matchStart > cursor) {
|
|
847
|
+
const looseHtml = source.slice(cursor, matchStart);
|
|
848
|
+
if (looseHtml.trim().length > 0) {
|
|
849
|
+
blocks.push({
|
|
850
|
+
name: "gutenberg-loose",
|
|
851
|
+
attrs: {},
|
|
852
|
+
rawAttrs: "",
|
|
853
|
+
innerHtml: looseHtml,
|
|
854
|
+
selfClosing: false
|
|
855
|
+
});
|
|
856
|
+
}
|
|
857
|
+
}
|
|
858
|
+
stack.push({ name, attrs, rawAttrs: attrsRaw, innerStart: matchEnd });
|
|
859
|
+
cursor = matchEnd;
|
|
860
|
+
}
|
|
861
|
+
if (stack.length === 0 && cursor < source.length) {
|
|
862
|
+
const tail = source.slice(cursor);
|
|
863
|
+
if (tail.trim().length > 0) {
|
|
864
|
+
blocks.push({
|
|
865
|
+
name: "gutenberg-loose",
|
|
866
|
+
attrs: {},
|
|
867
|
+
rawAttrs: "",
|
|
868
|
+
innerHtml: tail,
|
|
869
|
+
selfClosing: false
|
|
870
|
+
});
|
|
871
|
+
}
|
|
872
|
+
}
|
|
873
|
+
const root = stack[0];
|
|
874
|
+
if (root) {
|
|
875
|
+
blocks.push({
|
|
876
|
+
name: root.name,
|
|
877
|
+
attrs: root.attrs,
|
|
878
|
+
rawAttrs: root.rawAttrs,
|
|
879
|
+
innerHtml: source.slice(root.innerStart),
|
|
880
|
+
selfClosing: false
|
|
881
|
+
});
|
|
882
|
+
}
|
|
883
|
+
return blocks;
|
|
884
|
+
}
|
|
885
|
+
function parseAttrsJson(raw) {
|
|
886
|
+
if (!raw) return {};
|
|
887
|
+
try {
|
|
888
|
+
const parsed = JSON.parse(raw);
|
|
889
|
+
if (parsed && typeof parsed === "object" && !Array.isArray(parsed)) {
|
|
890
|
+
return parsed;
|
|
891
|
+
}
|
|
892
|
+
} catch {
|
|
893
|
+
}
|
|
894
|
+
return {};
|
|
895
|
+
}
|
|
896
|
+
|
|
897
|
+
// src/convert/html-to-lexical.ts
|
|
898
|
+
var FORMAT_BOLD = 1;
|
|
899
|
+
var FORMAT_ITALIC = 2;
|
|
900
|
+
var FORMAT_STRIKETHROUGH = 4;
|
|
901
|
+
var FORMAT_UNDERLINE = 8;
|
|
902
|
+
var FORMAT_CODE = 16;
|
|
903
|
+
function htmlToLexical(html) {
|
|
904
|
+
const trimmed = html.trim();
|
|
905
|
+
if (!trimmed) {
|
|
906
|
+
return emptyDocument();
|
|
907
|
+
}
|
|
908
|
+
const blocks = [];
|
|
909
|
+
if (isGutenbergSource(trimmed)) {
|
|
910
|
+
for (const block of parseGutenbergBlocks(trimmed)) {
|
|
911
|
+
convertGutenbergBlock(block, blocks);
|
|
912
|
+
}
|
|
913
|
+
} else {
|
|
914
|
+
const parsed = parse(trimmed, { lowerCaseTagName: true });
|
|
915
|
+
for (const child of parsed.childNodes) {
|
|
916
|
+
convertTopLevel(child, blocks);
|
|
917
|
+
}
|
|
918
|
+
}
|
|
919
|
+
if (blocks.length === 0) {
|
|
920
|
+
return emptyDocument();
|
|
921
|
+
}
|
|
922
|
+
return {
|
|
923
|
+
root: {
|
|
924
|
+
type: "root",
|
|
925
|
+
direction: null,
|
|
926
|
+
format: "",
|
|
927
|
+
indent: 0,
|
|
928
|
+
version: 1,
|
|
929
|
+
children: blocks
|
|
930
|
+
}
|
|
931
|
+
};
|
|
932
|
+
}
|
|
933
|
+
function convertGutenbergBlock(block, out) {
|
|
934
|
+
if (block.selfClosing) {
|
|
935
|
+
if (block.name === "separator") {
|
|
936
|
+
out.push({
|
|
937
|
+
type: "horizontalrule",
|
|
938
|
+
version: 1,
|
|
939
|
+
format: "",
|
|
940
|
+
indent: 0,
|
|
941
|
+
direction: null
|
|
942
|
+
});
|
|
943
|
+
}
|
|
944
|
+
return;
|
|
945
|
+
}
|
|
946
|
+
if (block.name === "gutenberg-loose") {
|
|
947
|
+
runClassicPath(block.innerHtml, out);
|
|
948
|
+
return;
|
|
949
|
+
}
|
|
950
|
+
switch (block.name) {
|
|
951
|
+
case "heading": {
|
|
952
|
+
const innerBlocks = [];
|
|
953
|
+
runClassicPath(block.innerHtml, innerBlocks);
|
|
954
|
+
const heading = innerBlocks.find((b) => b.type === "heading");
|
|
955
|
+
if (heading) {
|
|
956
|
+
const lvl = block.attrs.level;
|
|
957
|
+
if (typeof lvl === "number" && lvl >= 1 && lvl <= 6) {
|
|
958
|
+
heading.tag = `h${lvl}`;
|
|
959
|
+
}
|
|
960
|
+
out.push(heading);
|
|
961
|
+
for (const b of innerBlocks) if (b !== heading) out.push(b);
|
|
962
|
+
} else {
|
|
963
|
+
const lvl = typeof block.attrs.level === "number" ? block.attrs.level : 2;
|
|
964
|
+
out.push({
|
|
965
|
+
type: "heading",
|
|
966
|
+
version: 1,
|
|
967
|
+
format: "",
|
|
968
|
+
indent: 0,
|
|
969
|
+
direction: null,
|
|
970
|
+
tag: `h${Math.min(6, Math.max(1, lvl))}`,
|
|
971
|
+
children: [textNode(stripTags(block.innerHtml), 0)]
|
|
972
|
+
});
|
|
973
|
+
}
|
|
974
|
+
return;
|
|
975
|
+
}
|
|
976
|
+
case "list": {
|
|
977
|
+
const innerBlocks = [];
|
|
978
|
+
runClassicPath(block.innerHtml, innerBlocks);
|
|
979
|
+
const list = innerBlocks.find((b) => b.type === "list");
|
|
980
|
+
if (list) {
|
|
981
|
+
if (block.attrs.ordered === true) list.listType = "number";
|
|
982
|
+
else if (block.attrs.ordered === false) list.listType = "bullet";
|
|
983
|
+
out.push(list);
|
|
984
|
+
} else {
|
|
985
|
+
runClassicPath(block.innerHtml, out);
|
|
986
|
+
}
|
|
987
|
+
return;
|
|
988
|
+
}
|
|
989
|
+
default:
|
|
990
|
+
runClassicPath(block.innerHtml, out);
|
|
991
|
+
}
|
|
992
|
+
}
|
|
993
|
+
function runClassicPath(html, out) {
|
|
994
|
+
const parsed = parse(html, { lowerCaseTagName: true });
|
|
995
|
+
for (const child of parsed.childNodes) {
|
|
996
|
+
convertTopLevel(child, out);
|
|
997
|
+
}
|
|
998
|
+
}
|
|
999
|
+
function stripTags(html) {
|
|
1000
|
+
return html.replace(/<[^>]+>/g, "").replace(/ /g, " ").trim();
|
|
1001
|
+
}
|
|
1002
|
+
function emptyDocument() {
|
|
1003
|
+
return {
|
|
1004
|
+
root: {
|
|
1005
|
+
type: "root",
|
|
1006
|
+
direction: null,
|
|
1007
|
+
format: "",
|
|
1008
|
+
indent: 0,
|
|
1009
|
+
version: 1,
|
|
1010
|
+
children: [paragraph([])]
|
|
1011
|
+
}
|
|
1012
|
+
};
|
|
1013
|
+
}
|
|
1014
|
+
function convertTopLevel(node, out) {
|
|
1015
|
+
if (node.nodeType === NodeType.TEXT_NODE) {
|
|
1016
|
+
const text = node.text;
|
|
1017
|
+
if (text.trim().length === 0) return;
|
|
1018
|
+
out.push(paragraph([textNode(text, 0)]));
|
|
1019
|
+
return;
|
|
1020
|
+
}
|
|
1021
|
+
if (node.nodeType !== NodeType.ELEMENT_NODE) return;
|
|
1022
|
+
const el = node;
|
|
1023
|
+
const tag = el.tagName?.toLowerCase();
|
|
1024
|
+
switch (tag) {
|
|
1025
|
+
case "p":
|
|
1026
|
+
out.push(paragraph(convertInline(el)));
|
|
1027
|
+
return;
|
|
1028
|
+
case "h1":
|
|
1029
|
+
case "h2":
|
|
1030
|
+
case "h3":
|
|
1031
|
+
case "h4":
|
|
1032
|
+
case "h5":
|
|
1033
|
+
case "h6":
|
|
1034
|
+
out.push({
|
|
1035
|
+
type: "heading",
|
|
1036
|
+
version: 1,
|
|
1037
|
+
format: "",
|
|
1038
|
+
indent: 0,
|
|
1039
|
+
direction: null,
|
|
1040
|
+
tag,
|
|
1041
|
+
children: convertInline(el)
|
|
1042
|
+
});
|
|
1043
|
+
return;
|
|
1044
|
+
case "blockquote":
|
|
1045
|
+
out.push({
|
|
1046
|
+
type: "quote",
|
|
1047
|
+
version: 1,
|
|
1048
|
+
format: "",
|
|
1049
|
+
indent: 0,
|
|
1050
|
+
direction: null,
|
|
1051
|
+
children: convertInline(el)
|
|
1052
|
+
});
|
|
1053
|
+
return;
|
|
1054
|
+
case "ul":
|
|
1055
|
+
case "ol":
|
|
1056
|
+
out.push(convertList(el, tag === "ol" ? "number" : "bullet"));
|
|
1057
|
+
return;
|
|
1058
|
+
case "pre":
|
|
1059
|
+
out.push({
|
|
1060
|
+
type: "code",
|
|
1061
|
+
version: 1,
|
|
1062
|
+
format: "",
|
|
1063
|
+
indent: 0,
|
|
1064
|
+
direction: null,
|
|
1065
|
+
children: [textNode(el.text, 0)]
|
|
1066
|
+
});
|
|
1067
|
+
return;
|
|
1068
|
+
case "hr":
|
|
1069
|
+
out.push({
|
|
1070
|
+
type: "horizontalrule",
|
|
1071
|
+
version: 1,
|
|
1072
|
+
format: "",
|
|
1073
|
+
indent: 0,
|
|
1074
|
+
direction: null
|
|
1075
|
+
});
|
|
1076
|
+
return;
|
|
1077
|
+
case "img":
|
|
1078
|
+
out.push(imageBlock(el));
|
|
1079
|
+
return;
|
|
1080
|
+
case "br":
|
|
1081
|
+
out.push(paragraph([]));
|
|
1082
|
+
return;
|
|
1083
|
+
case "div":
|
|
1084
|
+
case "section":
|
|
1085
|
+
case "article":
|
|
1086
|
+
for (const child of el.childNodes) convertTopLevel(child, out);
|
|
1087
|
+
return;
|
|
1088
|
+
default: {
|
|
1089
|
+
const hasBlockChild = el.childNodes.some(
|
|
1090
|
+
(c) => c.nodeType === NodeType.ELEMENT_NODE && isBlockTag(c)
|
|
1091
|
+
);
|
|
1092
|
+
if (hasBlockChild) {
|
|
1093
|
+
for (const child of el.childNodes) convertTopLevel(child, out);
|
|
1094
|
+
} else {
|
|
1095
|
+
const inline = convertInline(el);
|
|
1096
|
+
if (inline.length > 0) out.push(paragraph(inline));
|
|
1097
|
+
}
|
|
1098
|
+
}
|
|
1099
|
+
}
|
|
1100
|
+
}
|
|
1101
|
+
var BLOCK_TAGS = /* @__PURE__ */ new Set([
|
|
1102
|
+
"p",
|
|
1103
|
+
"h1",
|
|
1104
|
+
"h2",
|
|
1105
|
+
"h3",
|
|
1106
|
+
"h4",
|
|
1107
|
+
"h5",
|
|
1108
|
+
"h6",
|
|
1109
|
+
"blockquote",
|
|
1110
|
+
"ul",
|
|
1111
|
+
"ol",
|
|
1112
|
+
"li",
|
|
1113
|
+
"pre",
|
|
1114
|
+
"hr",
|
|
1115
|
+
"div",
|
|
1116
|
+
"section",
|
|
1117
|
+
"article",
|
|
1118
|
+
"table",
|
|
1119
|
+
"tr",
|
|
1120
|
+
"td",
|
|
1121
|
+
"th",
|
|
1122
|
+
"thead",
|
|
1123
|
+
"tbody"
|
|
1124
|
+
]);
|
|
1125
|
+
function isBlockTag(el) {
|
|
1126
|
+
return BLOCK_TAGS.has((el.tagName ?? "").toLowerCase());
|
|
1127
|
+
}
|
|
1128
|
+
function paragraph(children) {
|
|
1129
|
+
return {
|
|
1130
|
+
type: "paragraph",
|
|
1131
|
+
version: 1,
|
|
1132
|
+
format: "",
|
|
1133
|
+
indent: 0,
|
|
1134
|
+
direction: null,
|
|
1135
|
+
children
|
|
1136
|
+
};
|
|
1137
|
+
}
|
|
1138
|
+
function textNode(text, format) {
|
|
1139
|
+
return {
|
|
1140
|
+
type: "text",
|
|
1141
|
+
version: 1,
|
|
1142
|
+
format,
|
|
1143
|
+
indent: 0,
|
|
1144
|
+
direction: null,
|
|
1145
|
+
text
|
|
1146
|
+
};
|
|
1147
|
+
}
|
|
1148
|
+
function imageBlock(el) {
|
|
1149
|
+
return {
|
|
1150
|
+
type: "image",
|
|
1151
|
+
version: 1,
|
|
1152
|
+
format: "",
|
|
1153
|
+
indent: 0,
|
|
1154
|
+
direction: null,
|
|
1155
|
+
src: el.getAttribute("src") ?? "",
|
|
1156
|
+
altText: el.getAttribute("alt") ?? ""
|
|
1157
|
+
};
|
|
1158
|
+
}
|
|
1159
|
+
function convertList(el, listType) {
|
|
1160
|
+
const items = [];
|
|
1161
|
+
for (const child of el.childNodes) {
|
|
1162
|
+
if (child.nodeType !== NodeType.ELEMENT_NODE) continue;
|
|
1163
|
+
const inner = child;
|
|
1164
|
+
if ((inner.tagName ?? "").toLowerCase() !== "li") continue;
|
|
1165
|
+
items.push({
|
|
1166
|
+
type: "listitem",
|
|
1167
|
+
version: 1,
|
|
1168
|
+
format: "",
|
|
1169
|
+
indent: 0,
|
|
1170
|
+
direction: null,
|
|
1171
|
+
children: convertInline(inner)
|
|
1172
|
+
});
|
|
1173
|
+
}
|
|
1174
|
+
return {
|
|
1175
|
+
type: "list",
|
|
1176
|
+
version: 1,
|
|
1177
|
+
format: "",
|
|
1178
|
+
indent: 0,
|
|
1179
|
+
direction: null,
|
|
1180
|
+
listType,
|
|
1181
|
+
children: items
|
|
1182
|
+
};
|
|
1183
|
+
}
|
|
1184
|
+
function convertInline(el) {
|
|
1185
|
+
const out = [];
|
|
1186
|
+
walkInline(el, 0, out);
|
|
1187
|
+
return out;
|
|
1188
|
+
}
|
|
1189
|
+
function walkInline(node, format, out) {
|
|
1190
|
+
if (node.nodeType === NodeType.TEXT_NODE) {
|
|
1191
|
+
const text = node.text;
|
|
1192
|
+
if (!text) return;
|
|
1193
|
+
out.push(textNode(text, format));
|
|
1194
|
+
return;
|
|
1195
|
+
}
|
|
1196
|
+
if (node.nodeType !== NodeType.ELEMENT_NODE) return;
|
|
1197
|
+
const el = node;
|
|
1198
|
+
const tag = (el.tagName ?? "").toLowerCase();
|
|
1199
|
+
switch (tag) {
|
|
1200
|
+
case "strong":
|
|
1201
|
+
case "b":
|
|
1202
|
+
for (const child of el.childNodes) walkInline(child, format | FORMAT_BOLD, out);
|
|
1203
|
+
return;
|
|
1204
|
+
case "em":
|
|
1205
|
+
case "i":
|
|
1206
|
+
for (const child of el.childNodes) walkInline(child, format | FORMAT_ITALIC, out);
|
|
1207
|
+
return;
|
|
1208
|
+
case "u":
|
|
1209
|
+
for (const child of el.childNodes) walkInline(child, format | FORMAT_UNDERLINE, out);
|
|
1210
|
+
return;
|
|
1211
|
+
case "s":
|
|
1212
|
+
case "del":
|
|
1213
|
+
case "strike":
|
|
1214
|
+
for (const child of el.childNodes) walkInline(child, format | FORMAT_STRIKETHROUGH, out);
|
|
1215
|
+
return;
|
|
1216
|
+
case "code":
|
|
1217
|
+
for (const child of el.childNodes) walkInline(child, format | FORMAT_CODE, out);
|
|
1218
|
+
return;
|
|
1219
|
+
case "a": {
|
|
1220
|
+
const url = el.getAttribute("href") ?? "";
|
|
1221
|
+
const inner = [];
|
|
1222
|
+
for (const child of el.childNodes) walkInline(child, format, inner);
|
|
1223
|
+
out.push({
|
|
1224
|
+
type: "link",
|
|
1225
|
+
version: 1,
|
|
1226
|
+
format: "",
|
|
1227
|
+
indent: 0,
|
|
1228
|
+
direction: null,
|
|
1229
|
+
url,
|
|
1230
|
+
children: inner
|
|
1231
|
+
});
|
|
1232
|
+
return;
|
|
1233
|
+
}
|
|
1234
|
+
case "br":
|
|
1235
|
+
out.push({
|
|
1236
|
+
type: "linebreak",
|
|
1237
|
+
version: 1,
|
|
1238
|
+
format: "",
|
|
1239
|
+
indent: 0,
|
|
1240
|
+
direction: null
|
|
1241
|
+
});
|
|
1242
|
+
return;
|
|
1243
|
+
case "img":
|
|
1244
|
+
out.push(imageBlock(el));
|
|
1245
|
+
return;
|
|
1246
|
+
case "span":
|
|
1247
|
+
for (const child of el.childNodes) walkInline(child, format, out);
|
|
1248
|
+
return;
|
|
1249
|
+
default: {
|
|
1250
|
+
for (const child of el.childNodes) walkInline(child, format, out);
|
|
1251
|
+
}
|
|
1252
|
+
}
|
|
1253
|
+
}
|
|
1254
|
+
|
|
1255
|
+
// src/media/pipeline.ts
|
|
1256
|
+
import { createHash } from "crypto";
|
|
1257
|
+
|
|
1258
|
+
// src/media/download.ts
|
|
1259
|
+
import { Buffer } from "buffer";
|
|
1260
|
+
import { promises as dnsPromises } from "dns";
|
|
1261
|
+
import { Agent } from "undici";
|
|
1262
|
+
var DEFAULT_TIMEOUT_MS = 3e4;
|
|
1263
|
+
var DEFAULT_RETRIES = 1;
|
|
1264
|
+
var DEFAULT_MAX_REDIRECTS = 3;
|
|
1265
|
+
var DEFAULT_MAX_BYTES = 100 * 1024 * 1024;
|
|
1266
|
+
var WpMediaDownloadError = class extends Error {
|
|
1267
|
+
url;
|
|
1268
|
+
status;
|
|
1269
|
+
constructor(url, message, status = null) {
|
|
1270
|
+
super(message);
|
|
1271
|
+
this.name = "WpMediaDownloadError";
|
|
1272
|
+
this.url = url;
|
|
1273
|
+
this.status = status;
|
|
1274
|
+
}
|
|
1275
|
+
};
|
|
1276
|
+
var WpMediaSsrfError = class extends WpMediaDownloadError {
|
|
1277
|
+
constructor(url, message) {
|
|
1278
|
+
super(url, message);
|
|
1279
|
+
this.name = "WpMediaSsrfError";
|
|
1280
|
+
}
|
|
1281
|
+
};
|
|
1282
|
+
async function downloadMedia(url, opts = {}) {
|
|
1283
|
+
const fetchImpl = opts.fetchImpl ?? globalThis.fetch;
|
|
1284
|
+
if (!fetchImpl) {
|
|
1285
|
+
throw new WpMediaDownloadError(url, "no fetch implementation available");
|
|
1286
|
+
}
|
|
1287
|
+
const timeoutMs = opts.timeoutMs ?? DEFAULT_TIMEOUT_MS;
|
|
1288
|
+
const maxRetries = opts.retries ?? DEFAULT_RETRIES;
|
|
1289
|
+
const maxRedirects = opts.maxRedirects ?? DEFAULT_MAX_REDIRECTS;
|
|
1290
|
+
const maxBytes = opts.maxBytes ?? DEFAULT_MAX_BYTES;
|
|
1291
|
+
assertHttpScheme(url);
|
|
1292
|
+
let lastError = null;
|
|
1293
|
+
for (let attempt = 0; attempt <= maxRetries; attempt++) {
|
|
1294
|
+
const controller = new AbortController();
|
|
1295
|
+
const timer = setTimeout(() => controller.abort(), timeoutMs);
|
|
1296
|
+
try {
|
|
1297
|
+
return await fetchWithRedirects(url, {
|
|
1298
|
+
fetchImpl,
|
|
1299
|
+
dnsLookupImpl: opts.dnsLookupImpl,
|
|
1300
|
+
signal: controller.signal,
|
|
1301
|
+
maxRedirects,
|
|
1302
|
+
maxBytes,
|
|
1303
|
+
allowPrivateHosts: opts.allowPrivateHosts ?? false
|
|
1304
|
+
});
|
|
1305
|
+
} catch (err) {
|
|
1306
|
+
lastError = err;
|
|
1307
|
+
if (err instanceof WpMediaSsrfError) throw err;
|
|
1308
|
+
if (err instanceof WpMediaDownloadError && err.status !== null && err.status >= 400 && err.status < 500) {
|
|
1309
|
+
throw err;
|
|
1310
|
+
}
|
|
1311
|
+
if (attempt >= maxRetries) {
|
|
1312
|
+
if (err instanceof WpMediaDownloadError) throw err;
|
|
1313
|
+
const msg = err instanceof Error ? err.message : String(err);
|
|
1314
|
+
throw new WpMediaDownloadError(url, msg);
|
|
1315
|
+
}
|
|
1316
|
+
} finally {
|
|
1317
|
+
clearTimeout(timer);
|
|
1318
|
+
}
|
|
1319
|
+
}
|
|
1320
|
+
throw lastError instanceof Error ? lastError : new WpMediaDownloadError(url, "download failed");
|
|
1321
|
+
}
|
|
1322
|
+
async function fetchWithRedirects(originalUrl, opts) {
|
|
1323
|
+
let currentUrl = originalUrl;
|
|
1324
|
+
for (let hop = 0; hop <= opts.maxRedirects; hop++) {
|
|
1325
|
+
let pinned = null;
|
|
1326
|
+
if (!opts.allowPrivateHosts) {
|
|
1327
|
+
pinned = await assertHostAllowed(currentUrl, opts.dnsLookupImpl);
|
|
1328
|
+
}
|
|
1329
|
+
const dispatcher = pinned ? createPinnedAgent(pinned) : void 0;
|
|
1330
|
+
const init = {
|
|
1331
|
+
signal: opts.signal,
|
|
1332
|
+
redirect: "manual"
|
|
1333
|
+
};
|
|
1334
|
+
if (dispatcher) init.dispatcher = dispatcher;
|
|
1335
|
+
const res = await opts.fetchImpl(currentUrl, init);
|
|
1336
|
+
if (isRedirectStatus(res.status)) {
|
|
1337
|
+
const next = res.headers.get("location");
|
|
1338
|
+
if (!next) {
|
|
1339
|
+
throw new WpMediaDownloadError(
|
|
1340
|
+
currentUrl,
|
|
1341
|
+
`redirect ${res.status} without Location header`,
|
|
1342
|
+
res.status
|
|
1343
|
+
);
|
|
1344
|
+
}
|
|
1345
|
+
currentUrl = new URL(next, currentUrl).toString();
|
|
1346
|
+
assertHttpScheme(currentUrl);
|
|
1347
|
+
continue;
|
|
1348
|
+
}
|
|
1349
|
+
if (!res.ok) {
|
|
1350
|
+
throw new WpMediaDownloadError(
|
|
1351
|
+
currentUrl,
|
|
1352
|
+
`source responded ${res.status} ${res.statusText || ""}`.trim(),
|
|
1353
|
+
res.status
|
|
1354
|
+
);
|
|
1355
|
+
}
|
|
1356
|
+
const declaredLength = res.headers.get("content-length");
|
|
1357
|
+
if (declaredLength !== null) {
|
|
1358
|
+
const n = Number(declaredLength);
|
|
1359
|
+
if (Number.isFinite(n) && n > opts.maxBytes) {
|
|
1360
|
+
throw new WpMediaDownloadError(
|
|
1361
|
+
currentUrl,
|
|
1362
|
+
`content-length ${n} exceeds maxBytes ${opts.maxBytes}`
|
|
1363
|
+
);
|
|
1364
|
+
}
|
|
1365
|
+
}
|
|
1366
|
+
const arrayBuffer = await res.arrayBuffer();
|
|
1367
|
+
if (arrayBuffer.byteLength > opts.maxBytes) {
|
|
1368
|
+
throw new WpMediaDownloadError(
|
|
1369
|
+
currentUrl,
|
|
1370
|
+
`body ${arrayBuffer.byteLength} bytes exceeds maxBytes ${opts.maxBytes}`
|
|
1371
|
+
);
|
|
1372
|
+
}
|
|
1373
|
+
const buffer = Buffer.from(arrayBuffer);
|
|
1374
|
+
const mimeType = parseMime(res.headers.get("content-type"));
|
|
1375
|
+
const filename = inferFilename(originalUrl);
|
|
1376
|
+
return { buffer, mimeType, filename };
|
|
1377
|
+
}
|
|
1378
|
+
throw new WpMediaDownloadError(currentUrl, `too many redirects (max ${opts.maxRedirects})`);
|
|
1379
|
+
}
|
|
1380
|
+
function isRedirectStatus(status) {
|
|
1381
|
+
return status === 301 || status === 302 || status === 303 || status === 307 || status === 308;
|
|
1382
|
+
}
|
|
1383
|
+
function assertHttpScheme(url) {
|
|
1384
|
+
let parsed;
|
|
1385
|
+
try {
|
|
1386
|
+
parsed = new URL(url);
|
|
1387
|
+
} catch {
|
|
1388
|
+
throw new WpMediaSsrfError(url, `invalid URL "${url}"`);
|
|
1389
|
+
}
|
|
1390
|
+
if (parsed.protocol !== "http:" && parsed.protocol !== "https:") {
|
|
1391
|
+
throw new WpMediaSsrfError(url, `unsupported scheme "${parsed.protocol}" \u2014 only http(s) is allowed`);
|
|
1392
|
+
}
|
|
1393
|
+
}
|
|
1394
|
+
async function assertHostAllowed(url, dnsLookupImpl) {
|
|
1395
|
+
const parsed = new URL(url);
|
|
1396
|
+
const rawHostname = parsed.hostname;
|
|
1397
|
+
const hostname = rawHostname.startsWith("[") && rawHostname.endsWith("]") ? rawHostname.slice(1, -1) : rawHostname;
|
|
1398
|
+
const lowered = hostname.toLowerCase();
|
|
1399
|
+
if (lowered === "localhost" || lowered.endsWith(".localhost")) {
|
|
1400
|
+
throw new WpMediaSsrfError(url, `hostname "${hostname}" resolves to a private address`);
|
|
1401
|
+
}
|
|
1402
|
+
const literal = classifyIpLiteral(hostname);
|
|
1403
|
+
if (literal === "private") {
|
|
1404
|
+
throw new WpMediaSsrfError(url, `hostname "${hostname}" resolves to a private address`);
|
|
1405
|
+
}
|
|
1406
|
+
if (literal === "public") {
|
|
1407
|
+
return { address: hostname, family: hostname.includes(":") ? 6 : 4 };
|
|
1408
|
+
}
|
|
1409
|
+
const lookup = dnsLookupImpl ?? defaultDnsLookup;
|
|
1410
|
+
let addrs;
|
|
1411
|
+
try {
|
|
1412
|
+
addrs = await lookup(hostname);
|
|
1413
|
+
} catch (err) {
|
|
1414
|
+
const msg = err instanceof Error ? err.message : String(err);
|
|
1415
|
+
throw new WpMediaSsrfError(url, `DNS lookup failed for "${hostname}": ${msg}`);
|
|
1416
|
+
}
|
|
1417
|
+
if (addrs.length === 0) {
|
|
1418
|
+
throw new WpMediaSsrfError(url, `DNS returned no addresses for "${hostname}"`);
|
|
1419
|
+
}
|
|
1420
|
+
let pinned = null;
|
|
1421
|
+
for (const { address, family } of addrs) {
|
|
1422
|
+
const cls = classifyIpAddress(address, family);
|
|
1423
|
+
if (cls === "private") {
|
|
1424
|
+
throw new WpMediaSsrfError(
|
|
1425
|
+
url,
|
|
1426
|
+
`hostname "${hostname}" resolves to private address ${address}`
|
|
1427
|
+
);
|
|
1428
|
+
}
|
|
1429
|
+
if (!pinned && (family === 4 || family === 6)) {
|
|
1430
|
+
pinned = { address, family };
|
|
1431
|
+
}
|
|
1432
|
+
}
|
|
1433
|
+
if (!pinned) {
|
|
1434
|
+
throw new WpMediaSsrfError(url, `DNS returned no usable addresses for "${hostname}"`);
|
|
1435
|
+
}
|
|
1436
|
+
return pinned;
|
|
1437
|
+
}
|
|
1438
|
+
function createPinnedAgent(pinned) {
|
|
1439
|
+
return new Agent({
|
|
1440
|
+
connect: {
|
|
1441
|
+
lookup: (_hostname, _options, callback) => {
|
|
1442
|
+
callback(null, pinned.address, pinned.family);
|
|
1443
|
+
}
|
|
1444
|
+
}
|
|
1445
|
+
});
|
|
1446
|
+
}
|
|
1447
|
+
async function defaultDnsLookup(hostname) {
|
|
1448
|
+
return dnsPromises.lookup(hostname, { all: true });
|
|
1449
|
+
}
|
|
1450
|
+
function classifyIpLiteral(input) {
|
|
1451
|
+
if (looksLikeIpv4(input)) return classifyIpAddress(input, 4);
|
|
1452
|
+
if (input.includes(":")) return classifyIpAddress(input, 6);
|
|
1453
|
+
return "not-an-ip";
|
|
1454
|
+
}
|
|
1455
|
+
function classifyIpAddress(address, family) {
|
|
1456
|
+
if (family === 4) {
|
|
1457
|
+
return isPrivateIpv4(address) ? "private" : "public";
|
|
1458
|
+
}
|
|
1459
|
+
if (family === 6) {
|
|
1460
|
+
return isPrivateIpv6(address) ? "private" : "public";
|
|
1461
|
+
}
|
|
1462
|
+
return "private";
|
|
1463
|
+
}
|
|
1464
|
+
function looksLikeIpv4(s) {
|
|
1465
|
+
return /^\d{1,3}(\.\d{1,3}){3}$/.test(s);
|
|
1466
|
+
}
|
|
1467
|
+
function ipv4ToInt(ip) {
|
|
1468
|
+
const parts = ip.split(".");
|
|
1469
|
+
if (parts.length !== 4) return null;
|
|
1470
|
+
let n = 0;
|
|
1471
|
+
for (const part of parts) {
|
|
1472
|
+
const v = Number(part);
|
|
1473
|
+
if (!Number.isInteger(v) || v < 0 || v > 255) return null;
|
|
1474
|
+
n = n * 256 + v >>> 0;
|
|
1475
|
+
}
|
|
1476
|
+
return n >>> 0;
|
|
1477
|
+
}
|
|
1478
|
+
var PRIVATE_IPV4_RANGES = [
|
|
1479
|
+
["0.0.0.0", 8],
|
|
1480
|
+
// "this network"
|
|
1481
|
+
["10.0.0.0", 8],
|
|
1482
|
+
// RFC 1918
|
|
1483
|
+
["100.64.0.0", 10],
|
|
1484
|
+
// CGNAT
|
|
1485
|
+
["127.0.0.0", 8],
|
|
1486
|
+
// loopback
|
|
1487
|
+
["169.254.0.0", 16],
|
|
1488
|
+
// link-local (incl. cloud metadata 169.254.169.254)
|
|
1489
|
+
["172.16.0.0", 12],
|
|
1490
|
+
// RFC 1918
|
|
1491
|
+
["192.0.0.0", 24],
|
|
1492
|
+
// protocol assignments
|
|
1493
|
+
["192.0.2.0", 24],
|
|
1494
|
+
// TEST-NET-1
|
|
1495
|
+
["192.168.0.0", 16],
|
|
1496
|
+
// RFC 1918
|
|
1497
|
+
["198.18.0.0", 15],
|
|
1498
|
+
// benchmarking
|
|
1499
|
+
["198.51.100.0", 24],
|
|
1500
|
+
// TEST-NET-2
|
|
1501
|
+
["203.0.113.0", 24],
|
|
1502
|
+
// TEST-NET-3
|
|
1503
|
+
["224.0.0.0", 4],
|
|
1504
|
+
// multicast
|
|
1505
|
+
["240.0.0.0", 4],
|
|
1506
|
+
// reserved
|
|
1507
|
+
["255.255.255.255", 32]
|
|
1508
|
+
// broadcast
|
|
1509
|
+
];
|
|
1510
|
+
function isPrivateIpv4(ip) {
|
|
1511
|
+
const n = ipv4ToInt(ip);
|
|
1512
|
+
if (n === null) return true;
|
|
1513
|
+
for (const [base, bits] of PRIVATE_IPV4_RANGES) {
|
|
1514
|
+
const baseN = ipv4ToInt(base);
|
|
1515
|
+
if (baseN === null) continue;
|
|
1516
|
+
const mask = bits === 0 ? 0 : 4294967295 << 32 - bits >>> 0;
|
|
1517
|
+
if ((n & mask) === (baseN & mask)) return true;
|
|
1518
|
+
}
|
|
1519
|
+
return false;
|
|
1520
|
+
}
|
|
1521
|
+
function isPrivateIpv6(ip) {
|
|
1522
|
+
const lower = ip.toLowerCase();
|
|
1523
|
+
if (lower === "::1" || lower === "::") return true;
|
|
1524
|
+
if (/^fc[0-9a-f]{2}:/.test(lower) || /^fd[0-9a-f]{2}:/.test(lower)) return true;
|
|
1525
|
+
if (/^fe[89ab][0-9a-f]:/.test(lower)) return true;
|
|
1526
|
+
if (/^ff[0-9a-f]{2}:/.test(lower)) return true;
|
|
1527
|
+
const v4Mapped = /^::ffff:(\d+\.\d+\.\d+\.\d+)$/i.exec(lower);
|
|
1528
|
+
if (v4Mapped) return isPrivateIpv4(v4Mapped[1]);
|
|
1529
|
+
const v4Compat = /^::(\d+\.\d+\.\d+\.\d+)$/i.exec(lower);
|
|
1530
|
+
if (v4Compat) return isPrivateIpv4(v4Compat[1]);
|
|
1531
|
+
return false;
|
|
1532
|
+
}
|
|
1533
|
+
function parseMime(header) {
|
|
1534
|
+
if (!header) return "application/octet-stream";
|
|
1535
|
+
const semi = header.indexOf(";");
|
|
1536
|
+
return (semi >= 0 ? header.slice(0, semi) : header).trim().toLowerCase();
|
|
1537
|
+
}
|
|
1538
|
+
function inferFilename(url) {
|
|
1539
|
+
try {
|
|
1540
|
+
const parsed = new URL(url);
|
|
1541
|
+
const segments = parsed.pathname.split("/").filter(Boolean);
|
|
1542
|
+
const last = segments[segments.length - 1];
|
|
1543
|
+
if (last) return decodeURIComponent(last);
|
|
1544
|
+
} catch {
|
|
1545
|
+
}
|
|
1546
|
+
return "download";
|
|
1547
|
+
}
|
|
1548
|
+
function resolveEnvDownloadOptions(env = process.env) {
|
|
1549
|
+
const opts = {};
|
|
1550
|
+
const allow = env.NP_WP_IMPORT_ALLOW_PRIVATE_HOSTS;
|
|
1551
|
+
if (allow === "1" || allow === "true") {
|
|
1552
|
+
opts.allowPrivateHosts = true;
|
|
1553
|
+
}
|
|
1554
|
+
const maxBytesRaw = env.NP_WP_IMPORT_MAX_BYTES;
|
|
1555
|
+
if (maxBytesRaw) {
|
|
1556
|
+
const n = Number.parseInt(maxBytesRaw, 10);
|
|
1557
|
+
if (Number.isFinite(n) && n > 0) {
|
|
1558
|
+
opts.maxBytes = n;
|
|
1559
|
+
}
|
|
1560
|
+
}
|
|
1561
|
+
return opts;
|
|
1562
|
+
}
|
|
1563
|
+
function isAllowedMimeType(mimeType) {
|
|
1564
|
+
return mimeType.startsWith("image/") || mimeType.startsWith("video/") || mimeType === "application/pdf";
|
|
1565
|
+
}
|
|
1566
|
+
|
|
1567
|
+
// src/media/pipeline.ts
|
|
1568
|
+
var DEFAULT_PER_HOST_CONCURRENCY = 4;
|
|
1569
|
+
async function runMediaPipeline(bundle, attachments, deps, options = {}) {
|
|
1570
|
+
const log = options.log ?? noop;
|
|
1571
|
+
const dryRun = options.dryRun ?? false;
|
|
1572
|
+
const envDefaults = resolveEnvDownloadOptions();
|
|
1573
|
+
const download = deps.download ?? ((url) => downloadMedia(url, envDefaults));
|
|
1574
|
+
const concurrency = Math.max(1, options.perHostConcurrency ?? DEFAULT_PER_HOST_CONCURRENCY);
|
|
1575
|
+
const byUrl = /* @__PURE__ */ new Map();
|
|
1576
|
+
const byAttachmentId = /* @__PURE__ */ new Map();
|
|
1577
|
+
const errors = [];
|
|
1578
|
+
let uploaded = 0;
|
|
1579
|
+
let skipped = 0;
|
|
1580
|
+
let reused = 0;
|
|
1581
|
+
const targets = collectTargets(bundle, attachments);
|
|
1582
|
+
const fetchable = [];
|
|
1583
|
+
for (const target of targets) {
|
|
1584
|
+
if (!target.url) {
|
|
1585
|
+
errors.push({
|
|
1586
|
+
url: `(wp-attachment-id ${target.wpAttachmentId})`,
|
|
1587
|
+
reason: "attachment record missing from WXR \u2014 cannot resolve URL"
|
|
1588
|
+
});
|
|
1589
|
+
continue;
|
|
1590
|
+
}
|
|
1591
|
+
fetchable.push(target);
|
|
1592
|
+
}
|
|
1593
|
+
const byHost = /* @__PURE__ */ new Map();
|
|
1594
|
+
for (const target of fetchable) {
|
|
1595
|
+
const host = parseHost(target.url);
|
|
1596
|
+
const list = byHost.get(host);
|
|
1597
|
+
if (list) list.push(target);
|
|
1598
|
+
else byHost.set(host, [target]);
|
|
1599
|
+
}
|
|
1600
|
+
const processOne = async (target) => {
|
|
1601
|
+
if (byUrl.has(target.url)) {
|
|
1602
|
+
if (target.wpAttachmentId !== null) {
|
|
1603
|
+
byAttachmentId.set(target.wpAttachmentId, byUrl.get(target.url));
|
|
1604
|
+
}
|
|
1605
|
+
return;
|
|
1606
|
+
}
|
|
1607
|
+
if (dryRun) {
|
|
1608
|
+
log(`media plan ${target.url}`);
|
|
1609
|
+
skipped++;
|
|
1610
|
+
return;
|
|
1611
|
+
}
|
|
1612
|
+
try {
|
|
1613
|
+
const result = await download(target.url);
|
|
1614
|
+
if (!isAllowedMimeType(result.mimeType)) {
|
|
1615
|
+
errors.push({
|
|
1616
|
+
url: target.url,
|
|
1617
|
+
reason: `disallowed MIME type "${result.mimeType}"`
|
|
1618
|
+
});
|
|
1619
|
+
return;
|
|
1620
|
+
}
|
|
1621
|
+
let mediaId = null;
|
|
1622
|
+
if (deps.findExistingByHash) {
|
|
1623
|
+
const sha256 = createHash("sha256").update(result.buffer).digest("hex");
|
|
1624
|
+
const existing = await deps.findExistingByHash(sha256);
|
|
1625
|
+
if (existing) {
|
|
1626
|
+
mediaId = existing.id;
|
|
1627
|
+
reused++;
|
|
1628
|
+
log(`media reuse ${target.url} \u2192 ${existing.id}`);
|
|
1629
|
+
}
|
|
1630
|
+
}
|
|
1631
|
+
if (!mediaId) {
|
|
1632
|
+
const upload = await deps.upload({
|
|
1633
|
+
buffer: result.buffer,
|
|
1634
|
+
originalFilename: result.filename,
|
|
1635
|
+
mimeType: result.mimeType
|
|
1636
|
+
});
|
|
1637
|
+
mediaId = upload.id;
|
|
1638
|
+
uploaded++;
|
|
1639
|
+
log(`media write ${target.url} \u2192 ${upload.id}`);
|
|
1640
|
+
}
|
|
1641
|
+
byUrl.set(target.url, mediaId);
|
|
1642
|
+
if (target.wpAttachmentId !== null) {
|
|
1643
|
+
byAttachmentId.set(target.wpAttachmentId, mediaId);
|
|
1644
|
+
}
|
|
1645
|
+
} catch (err) {
|
|
1646
|
+
const reason = err instanceof WpMediaDownloadError ? err.status !== null ? `HTTP ${err.status}: ${err.message}` : err.message : err instanceof Error ? err.message : String(err);
|
|
1647
|
+
errors.push({ url: target.url, reason });
|
|
1648
|
+
log(`media error ${target.url}: ${reason}`);
|
|
1649
|
+
}
|
|
1650
|
+
};
|
|
1651
|
+
await Promise.all(
|
|
1652
|
+
Array.from(byHost.entries()).map(async ([_host, queue]) => {
|
|
1653
|
+
let cursor = 0;
|
|
1654
|
+
const next = async () => {
|
|
1655
|
+
while (cursor < queue.length) {
|
|
1656
|
+
const i = cursor++;
|
|
1657
|
+
const target = queue[i];
|
|
1658
|
+
if (target) await processOne(target);
|
|
1659
|
+
}
|
|
1660
|
+
};
|
|
1661
|
+
const workers = Array.from({ length: Math.min(concurrency, queue.length) }, () => next());
|
|
1662
|
+
await Promise.all(workers);
|
|
1663
|
+
})
|
|
1664
|
+
);
|
|
1665
|
+
return {
|
|
1666
|
+
resolution: { byUrl, byAttachmentId },
|
|
1667
|
+
uploaded,
|
|
1668
|
+
skipped,
|
|
1669
|
+
reused,
|
|
1670
|
+
errors
|
|
1671
|
+
};
|
|
1672
|
+
}
|
|
1673
|
+
function collectTargets(bundle, attachments) {
|
|
1674
|
+
const seen = /* @__PURE__ */ new Set();
|
|
1675
|
+
const targets = [];
|
|
1676
|
+
for (const entry of attachments.byId.values()) {
|
|
1677
|
+
if (!entry.sourceUrl) continue;
|
|
1678
|
+
if (seen.has(entry.sourceUrl)) continue;
|
|
1679
|
+
seen.add(entry.sourceUrl);
|
|
1680
|
+
targets.push({ url: entry.sourceUrl, wpAttachmentId: entry.wpAttachmentId });
|
|
1681
|
+
}
|
|
1682
|
+
for (const record of bundle.records) {
|
|
1683
|
+
if (record.wpType === "attachment") continue;
|
|
1684
|
+
for (const ref of record.mediaRefs) {
|
|
1685
|
+
if (ref.kind === "featured" && ref.wpAttachmentId !== null) {
|
|
1686
|
+
const entry = attachments.byId.get(ref.wpAttachmentId);
|
|
1687
|
+
const url = entry?.sourceUrl ?? "";
|
|
1688
|
+
if (url && seen.has(url)) continue;
|
|
1689
|
+
if (url) seen.add(url);
|
|
1690
|
+
targets.push({ url, wpAttachmentId: ref.wpAttachmentId });
|
|
1691
|
+
continue;
|
|
1692
|
+
}
|
|
1693
|
+
if (ref.sourceUrl && !seen.has(ref.sourceUrl)) {
|
|
1694
|
+
seen.add(ref.sourceUrl);
|
|
1695
|
+
targets.push({ url: ref.sourceUrl, wpAttachmentId: ref.wpAttachmentId });
|
|
1696
|
+
}
|
|
1697
|
+
}
|
|
1698
|
+
}
|
|
1699
|
+
return targets;
|
|
1700
|
+
}
|
|
1701
|
+
function parseHost(url) {
|
|
1702
|
+
try {
|
|
1703
|
+
return new URL(url).host;
|
|
1704
|
+
} catch {
|
|
1705
|
+
return "(invalid)";
|
|
1706
|
+
}
|
|
1707
|
+
}
|
|
1708
|
+
function noop() {
|
|
1709
|
+
}
|
|
1710
|
+
|
|
1711
|
+
// src/media/rewrite.ts
|
|
1712
|
+
function rewriteLexicalMedia(root, resolution) {
|
|
1713
|
+
if (resolution.byUrl.size === 0) {
|
|
1714
|
+
return root;
|
|
1715
|
+
}
|
|
1716
|
+
const cloned = JSON.parse(JSON.stringify(root));
|
|
1717
|
+
walk(cloned.root, resolution);
|
|
1718
|
+
return cloned;
|
|
1719
|
+
}
|
|
1720
|
+
function walk(node, resolution) {
|
|
1721
|
+
if (!node || typeof node !== "object") return;
|
|
1722
|
+
if (node.type === "image") {
|
|
1723
|
+
const src = typeof node.src === "string" ? node.src : "";
|
|
1724
|
+
const mediaId = src ? resolution.byUrl.get(src) : void 0;
|
|
1725
|
+
if (mediaId) {
|
|
1726
|
+
node.mediaId = mediaId;
|
|
1727
|
+
}
|
|
1728
|
+
}
|
|
1729
|
+
const children = node.children;
|
|
1730
|
+
if (Array.isArray(children)) {
|
|
1731
|
+
for (const child of children) walk(child, resolution);
|
|
1732
|
+
}
|
|
1733
|
+
}
|
|
1734
|
+
|
|
1735
|
+
// src/apply/attachment-index.ts
|
|
1736
|
+
function buildAttachmentIndex(bundle) {
|
|
1737
|
+
const byId = /* @__PURE__ */ new Map();
|
|
1738
|
+
const byUrl = /* @__PURE__ */ new Map();
|
|
1739
|
+
for (const record of bundle.records) {
|
|
1740
|
+
if (record.wpType !== "attachment") continue;
|
|
1741
|
+
const url = pickAttachmentUrl(record);
|
|
1742
|
+
const entry = {
|
|
1743
|
+
wpAttachmentId: record.wpId,
|
|
1744
|
+
sourceUrl: url,
|
|
1745
|
+
meta: record.meta,
|
|
1746
|
+
title: record.title
|
|
1747
|
+
};
|
|
1748
|
+
if (record.wpId > 0) byId.set(record.wpId, entry);
|
|
1749
|
+
if (url) byUrl.set(url, entry);
|
|
1750
|
+
}
|
|
1751
|
+
return { byId, byUrl };
|
|
1752
|
+
}
|
|
1753
|
+
function pickAttachmentUrl(record) {
|
|
1754
|
+
const fromMediaRef = record.mediaRefs.find((ref) => ref.sourceUrl);
|
|
1755
|
+
return fromMediaRef?.sourceUrl ?? "";
|
|
1756
|
+
}
|
|
1757
|
+
|
|
1758
|
+
// src/apply/authors.ts
|
|
1759
|
+
async function resolveAuthors(bundle, resolver) {
|
|
1760
|
+
const logins = /* @__PURE__ */ new Set();
|
|
1761
|
+
for (const record of bundle.records) {
|
|
1762
|
+
if (record.wpType === "attachment") continue;
|
|
1763
|
+
if (record.wpAuthorLogin) logins.add(record.wpAuthorLogin);
|
|
1764
|
+
}
|
|
1765
|
+
const byLogin = new Map(bundle.authors.map((a) => [a.login, a]));
|
|
1766
|
+
const authorIds = /* @__PURE__ */ new Map();
|
|
1767
|
+
const skipped = [];
|
|
1768
|
+
const errors = [];
|
|
1769
|
+
for (const login of logins) {
|
|
1770
|
+
try {
|
|
1771
|
+
const out = await resolver.resolveAuthor({
|
|
1772
|
+
wpAuthorLogin: login,
|
|
1773
|
+
wpAuthor: byLogin.get(login)
|
|
1774
|
+
});
|
|
1775
|
+
if (out) {
|
|
1776
|
+
authorIds.set(login, out.id);
|
|
1777
|
+
} else {
|
|
1778
|
+
skipped.push(login);
|
|
1779
|
+
}
|
|
1780
|
+
} catch (err) {
|
|
1781
|
+
errors.push({ login, reason: err instanceof Error ? err.message : String(err) });
|
|
1782
|
+
}
|
|
1783
|
+
}
|
|
1784
|
+
return { authorIds, skipped, errors };
|
|
1785
|
+
}
|
|
1786
|
+
|
|
1787
|
+
// src/apply/comments.ts
|
|
1788
|
+
async function importPostComments(args) {
|
|
1789
|
+
const { record, postId, collection, deps, plan, resume } = args;
|
|
1790
|
+
const log = args.log ?? noop2;
|
|
1791
|
+
if (record.comments.length === 0) return;
|
|
1792
|
+
const ordered = [...record.comments].sort((a, b) => a.wpId - b.wpId);
|
|
1793
|
+
const wpToNexpressId = /* @__PURE__ */ new Map();
|
|
1794
|
+
if (resume) {
|
|
1795
|
+
for (const c of ordered) {
|
|
1796
|
+
const prior = resume.state.comments[c.wpId];
|
|
1797
|
+
if (prior) wpToNexpressId.set(c.wpId, prior);
|
|
1798
|
+
}
|
|
1799
|
+
}
|
|
1800
|
+
for (const wpComment of ordered) {
|
|
1801
|
+
if (resume?.state.comments[wpComment.wpId]) {
|
|
1802
|
+
plan.skippedByResume++;
|
|
1803
|
+
continue;
|
|
1804
|
+
}
|
|
1805
|
+
if (!wpComment.approved) {
|
|
1806
|
+
plan.skippedUnapproved++;
|
|
1807
|
+
continue;
|
|
1808
|
+
}
|
|
1809
|
+
try {
|
|
1810
|
+
const member = await ensureMemberFor(wpComment, deps);
|
|
1811
|
+
if (!member) {
|
|
1812
|
+
plan.skippedNoMember++;
|
|
1813
|
+
continue;
|
|
1814
|
+
}
|
|
1815
|
+
const parentId = wpComment.parentWpId !== null ? wpToNexpressId.get(wpComment.parentWpId) ?? null : null;
|
|
1816
|
+
const createdAt = parseWpDate(wpComment.date);
|
|
1817
|
+
const inserted = await deps.insertComment({
|
|
1818
|
+
targetType: collection,
|
|
1819
|
+
targetId: postId,
|
|
1820
|
+
parentId,
|
|
1821
|
+
memberId: member.id,
|
|
1822
|
+
bodyMd: wpComment.content,
|
|
1823
|
+
bodyHtml: deps.renderBody(wpComment.content),
|
|
1824
|
+
createdAt
|
|
1825
|
+
});
|
|
1826
|
+
wpToNexpressId.set(wpComment.wpId, inserted.id);
|
|
1827
|
+
plan.applied++;
|
|
1828
|
+
log(`comment write ${collection}/${record.slug} #${wpComment.wpId}`);
|
|
1829
|
+
if (resume) {
|
|
1830
|
+
resume.state.comments[wpComment.wpId] = inserted.id;
|
|
1831
|
+
resume.persist();
|
|
1832
|
+
}
|
|
1833
|
+
} catch (err) {
|
|
1834
|
+
const reason = err instanceof Error ? err.message : String(err);
|
|
1835
|
+
plan.errors.push({ wpCommentId: wpComment.wpId, reason });
|
|
1836
|
+
log(`comment error ${collection}/${record.slug} #${wpComment.wpId}: ${reason}`);
|
|
1837
|
+
}
|
|
1838
|
+
}
|
|
1839
|
+
}
|
|
1840
|
+
async function ensureMemberFor(comment, deps) {
|
|
1841
|
+
const fallbackName = comment.authorName?.trim() || "guest";
|
|
1842
|
+
const slugSource = comment.authorEmail ?? fallbackName;
|
|
1843
|
+
const handle = `${slugify(slugSource)}-wpimp`;
|
|
1844
|
+
if (!handle || handle === "-wpimp") return null;
|
|
1845
|
+
return deps.ensureImportedMember({
|
|
1846
|
+
handle,
|
|
1847
|
+
email: comment.authorEmail,
|
|
1848
|
+
displayName: fallbackName
|
|
1849
|
+
});
|
|
1850
|
+
}
|
|
1851
|
+
function slugify(input) {
|
|
1852
|
+
return input.toLowerCase().replace(/[^a-z0-9]+/g, "-").replace(/(^-|-$)/g, "").slice(0, 40);
|
|
1853
|
+
}
|
|
1854
|
+
function parseWpDate(raw) {
|
|
1855
|
+
if (!raw) return /* @__PURE__ */ new Date();
|
|
1856
|
+
const iso = raw.includes("T") ? raw : `${raw.replace(" ", "T")}Z`;
|
|
1857
|
+
const date = new Date(iso);
|
|
1858
|
+
return Number.isNaN(date.getTime()) ? /* @__PURE__ */ new Date() : date;
|
|
1859
|
+
}
|
|
1860
|
+
function emptyCommentPlan() {
|
|
1861
|
+
return { applied: 0, skippedUnapproved: 0, skippedNoMember: 0, skippedByResume: 0, errors: [] };
|
|
1862
|
+
}
|
|
1863
|
+
function noop2() {
|
|
1864
|
+
}
|
|
1865
|
+
|
|
1866
|
+
// src/apply/resume.ts
|
|
1867
|
+
import { readFileSync as readFileSync3, writeFileSync } from "fs";
|
|
1868
|
+
function emptyResumeState(source) {
|
|
1869
|
+
const now = (/* @__PURE__ */ new Date()).toISOString();
|
|
1870
|
+
return {
|
|
1871
|
+
version: 1,
|
|
1872
|
+
source,
|
|
1873
|
+
startedAt: now,
|
|
1874
|
+
updatedAt: now,
|
|
1875
|
+
documents: {},
|
|
1876
|
+
comments: {},
|
|
1877
|
+
authors: {},
|
|
1878
|
+
media: {},
|
|
1879
|
+
taxonomies: {}
|
|
1880
|
+
};
|
|
1881
|
+
}
|
|
1882
|
+
var ResumeStateError = class extends Error {
|
|
1883
|
+
constructor(message) {
|
|
1884
|
+
super(message);
|
|
1885
|
+
this.name = "ResumeStateError";
|
|
1886
|
+
}
|
|
1887
|
+
};
|
|
1888
|
+
function loadResumeState(path, source) {
|
|
1889
|
+
let raw;
|
|
1890
|
+
try {
|
|
1891
|
+
raw = readFileSync3(path, "utf8");
|
|
1892
|
+
} catch (err) {
|
|
1893
|
+
if (err.code === "ENOENT") {
|
|
1894
|
+
return emptyResumeState(source);
|
|
1895
|
+
}
|
|
1896
|
+
throw new ResumeStateError(
|
|
1897
|
+
`cannot read resume state ${path}: ${err instanceof Error ? err.message : String(err)}`
|
|
1898
|
+
);
|
|
1899
|
+
}
|
|
1900
|
+
let parsed;
|
|
1901
|
+
try {
|
|
1902
|
+
parsed = JSON.parse(raw);
|
|
1903
|
+
} catch (err) {
|
|
1904
|
+
throw new ResumeStateError(
|
|
1905
|
+
`${path}: invalid JSON \u2014 ${err instanceof Error ? err.message : String(err)}`
|
|
1906
|
+
);
|
|
1907
|
+
}
|
|
1908
|
+
if (!parsed || typeof parsed !== "object" || Array.isArray(parsed)) {
|
|
1909
|
+
throw new ResumeStateError(`${path}: top-level value must be a JSON object`);
|
|
1910
|
+
}
|
|
1911
|
+
const root = parsed;
|
|
1912
|
+
if (root.version !== 1) {
|
|
1913
|
+
throw new ResumeStateError(`${path}: unsupported version ${String(root.version)}`);
|
|
1914
|
+
}
|
|
1915
|
+
return {
|
|
1916
|
+
version: 1,
|
|
1917
|
+
source: typeof root.source === "string" ? root.source : source,
|
|
1918
|
+
startedAt: typeof root.startedAt === "string" ? root.startedAt : (/* @__PURE__ */ new Date()).toISOString(),
|
|
1919
|
+
updatedAt: typeof root.updatedAt === "string" ? root.updatedAt : (/* @__PURE__ */ new Date()).toISOString(),
|
|
1920
|
+
documents: asStringMap(root.documents),
|
|
1921
|
+
comments: asNumberKeyMap(root.comments),
|
|
1922
|
+
authors: asStringMap(root.authors),
|
|
1923
|
+
media: asStringMap(root.media),
|
|
1924
|
+
taxonomies: asStringMap(root.taxonomies)
|
|
1925
|
+
};
|
|
1926
|
+
}
|
|
1927
|
+
function persistResumeState(path, state) {
|
|
1928
|
+
state.updatedAt = (/* @__PURE__ */ new Date()).toISOString();
|
|
1929
|
+
writeFileSync(path, JSON.stringify(state, null, 2), "utf8");
|
|
1930
|
+
}
|
|
1931
|
+
function documentKey(collection, slug) {
|
|
1932
|
+
return `${collection}/${slug}`;
|
|
1933
|
+
}
|
|
1934
|
+
function asStringMap(value) {
|
|
1935
|
+
if (!value || typeof value !== "object" || Array.isArray(value)) return {};
|
|
1936
|
+
const out = {};
|
|
1937
|
+
for (const [k, v] of Object.entries(value)) {
|
|
1938
|
+
if (typeof v === "string") out[k] = v;
|
|
1939
|
+
}
|
|
1940
|
+
return out;
|
|
1941
|
+
}
|
|
1942
|
+
function asNumberKeyMap(value) {
|
|
1943
|
+
if (!value || typeof value !== "object" || Array.isArray(value)) return {};
|
|
1944
|
+
const out = {};
|
|
1945
|
+
for (const [k, v] of Object.entries(value)) {
|
|
1946
|
+
const n = Number.parseInt(k, 10);
|
|
1947
|
+
if (Number.isFinite(n) && typeof v === "string") out[n] = v;
|
|
1948
|
+
}
|
|
1949
|
+
return out;
|
|
1950
|
+
}
|
|
1951
|
+
|
|
1952
|
+
// src/apply/taxonomies.ts
|
|
1953
|
+
function termCacheKey(taxonomy, slug) {
|
|
1954
|
+
return `${taxonomy}:${slug}`;
|
|
1955
|
+
}
|
|
1956
|
+
async function resolveTaxonomies(records, channelTerms, resolver) {
|
|
1957
|
+
const seen = /* @__PURE__ */ new Map();
|
|
1958
|
+
const remember = (term) => {
|
|
1959
|
+
if (!term.slug) return;
|
|
1960
|
+
const key = termCacheKey(term.taxonomy, term.slug);
|
|
1961
|
+
if (seen.has(key)) return;
|
|
1962
|
+
seen.set(key, { taxonomy: term.taxonomy, slug: term.slug, name: term.name || term.slug });
|
|
1963
|
+
};
|
|
1964
|
+
for (const term of channelTerms) remember(term);
|
|
1965
|
+
for (const record of records) {
|
|
1966
|
+
if (record.wpType === "attachment") continue;
|
|
1967
|
+
for (const term of record.terms) remember(term);
|
|
1968
|
+
}
|
|
1969
|
+
const termIds = /* @__PURE__ */ new Map();
|
|
1970
|
+
const errors = [];
|
|
1971
|
+
const skipped = [];
|
|
1972
|
+
for (const [key, value] of seen.entries()) {
|
|
1973
|
+
try {
|
|
1974
|
+
const result = await resolver.findOrCreate(value);
|
|
1975
|
+
if (!result) {
|
|
1976
|
+
skipped.push(value);
|
|
1977
|
+
continue;
|
|
1978
|
+
}
|
|
1979
|
+
termIds.set(key, result.id);
|
|
1980
|
+
} catch (err) {
|
|
1981
|
+
const reason = err instanceof Error ? err.message : String(err);
|
|
1982
|
+
errors.push({ key: value, reason });
|
|
1983
|
+
}
|
|
1984
|
+
}
|
|
1985
|
+
return { termIds, errors, skipped };
|
|
1986
|
+
}
|
|
1987
|
+
function pickPostTermIds(record, resolution) {
|
|
1988
|
+
const categoryIds = [];
|
|
1989
|
+
const tagIds = [];
|
|
1990
|
+
const seenCategory = /* @__PURE__ */ new Set();
|
|
1991
|
+
const seenTag = /* @__PURE__ */ new Set();
|
|
1992
|
+
for (const term of record.terms) {
|
|
1993
|
+
if (!term.slug) continue;
|
|
1994
|
+
const id = resolution.termIds.get(termCacheKey(term.taxonomy, term.slug));
|
|
1995
|
+
if (!id) continue;
|
|
1996
|
+
if (term.taxonomy === "category" && !seenCategory.has(id)) {
|
|
1997
|
+
categoryIds.push(id);
|
|
1998
|
+
seenCategory.add(id);
|
|
1999
|
+
} else if (term.taxonomy === "post_tag" && !seenTag.has(id)) {
|
|
2000
|
+
tagIds.push(id);
|
|
2001
|
+
seenTag.add(id);
|
|
2002
|
+
}
|
|
2003
|
+
}
|
|
2004
|
+
return { categoryIds, tagIds };
|
|
2005
|
+
}
|
|
2006
|
+
|
|
2007
|
+
// src/apply/index.ts
|
|
2008
|
+
var TYPE_TO_COLLECTION = {
|
|
2009
|
+
post: "posts",
|
|
2010
|
+
page: "pages"
|
|
2011
|
+
};
|
|
2012
|
+
async function applyBundle(bundle, options) {
|
|
2013
|
+
const log = options.log ?? noop3;
|
|
2014
|
+
const dryRun = options.dryRun ?? false;
|
|
2015
|
+
const attachments = buildAttachmentIndex(bundle);
|
|
2016
|
+
let media = null;
|
|
2017
|
+
let resolution = { byUrl: /* @__PURE__ */ new Map(), byAttachmentId: /* @__PURE__ */ new Map() };
|
|
2018
|
+
if (options.media) {
|
|
2019
|
+
media = await runMediaPipeline(bundle, attachments, options.media, { dryRun, log });
|
|
2020
|
+
resolution = media.resolution;
|
|
2021
|
+
}
|
|
2022
|
+
let taxonomies = null;
|
|
2023
|
+
if (options.taxonomies && !dryRun) {
|
|
2024
|
+
taxonomies = await resolveTaxonomies(bundle.records, bundle.terms, options.taxonomies);
|
|
2025
|
+
}
|
|
2026
|
+
let authors = null;
|
|
2027
|
+
if (options.authors && !dryRun) {
|
|
2028
|
+
authors = await resolveAuthors(bundle, options.authors);
|
|
2029
|
+
}
|
|
2030
|
+
const commentsPlan = options.comments && !dryRun ? emptyCommentPlan() : null;
|
|
2031
|
+
const applied = [];
|
|
2032
|
+
const skipped = [];
|
|
2033
|
+
const errors = [];
|
|
2034
|
+
let privateCount = 0;
|
|
2035
|
+
let pendingCount = 0;
|
|
2036
|
+
let droppedAuthorCount = 0;
|
|
2037
|
+
let coverWiredCount = 0;
|
|
2038
|
+
let coverMissingCount = 0;
|
|
2039
|
+
for (const record of bundle.records) {
|
|
2040
|
+
const builtin = TYPE_TO_COLLECTION[record.wpType];
|
|
2041
|
+
const customMapping = !builtin && options.collectionMappings ? options.collectionMappings[record.wpType] : void 0;
|
|
2042
|
+
const collection = builtin ?? customMapping?.collection;
|
|
2043
|
+
if (!collection) {
|
|
2044
|
+
skipped.push({
|
|
2045
|
+
wpId: record.wpId,
|
|
2046
|
+
wpType: record.wpType,
|
|
2047
|
+
slug: record.slug,
|
|
2048
|
+
reason: record.wpType === "attachment" ? "attachment \u2014 handled by media pipeline" : `unmapped wpType "${record.wpType}" \u2014 add an entry to wp-import config to route it`
|
|
2049
|
+
});
|
|
2050
|
+
continue;
|
|
2051
|
+
}
|
|
2052
|
+
if (record.status === "trash" || record.status === "auto-draft") {
|
|
2053
|
+
skipped.push({
|
|
2054
|
+
wpId: record.wpId,
|
|
2055
|
+
wpType: record.wpType,
|
|
2056
|
+
slug: record.slug,
|
|
2057
|
+
reason: `status="${record.status}"`
|
|
2058
|
+
});
|
|
2059
|
+
continue;
|
|
2060
|
+
}
|
|
2061
|
+
if (!record.slug) {
|
|
2062
|
+
skipped.push({
|
|
2063
|
+
wpId: record.wpId,
|
|
2064
|
+
wpType: record.wpType,
|
|
2065
|
+
slug: "",
|
|
2066
|
+
reason: "missing slug"
|
|
2067
|
+
});
|
|
2068
|
+
continue;
|
|
2069
|
+
}
|
|
2070
|
+
try {
|
|
2071
|
+
const markerId = options.resume?.state.documents[documentKey(collection, record.slug)];
|
|
2072
|
+
const exists = markerId ? { docs: [{ id: markerId }] } : await findDocuments(
|
|
2073
|
+
collection,
|
|
2074
|
+
{ where: { slug: record.slug }, limit: 1 },
|
|
2075
|
+
options.actor
|
|
2076
|
+
);
|
|
2077
|
+
const existingId = exists.docs.length > 0 && typeof exists.docs[0]?.id === "string" ? exists.docs[0]?.id : void 0;
|
|
2078
|
+
const updateMode = options.update === true && existingId !== void 0;
|
|
2079
|
+
if (exists.docs.length > 0 && !updateMode) {
|
|
2080
|
+
skipped.push({
|
|
2081
|
+
wpId: record.wpId,
|
|
2082
|
+
wpType: record.wpType,
|
|
2083
|
+
slug: record.slug,
|
|
2084
|
+
reason: markerId ? "resume marker \u2014 already imported" : "slug already exists"
|
|
2085
|
+
});
|
|
2086
|
+
log(`skip ${collection}/${record.slug} (${markerId ? "resume marker" : "already exists"})`);
|
|
2087
|
+
await emitAudit(options.audit, {
|
|
2088
|
+
action: "import.wp.skipped",
|
|
2089
|
+
targetType: collection,
|
|
2090
|
+
targetId: existingId,
|
|
2091
|
+
payload: {
|
|
2092
|
+
wpId: record.wpId,
|
|
2093
|
+
wpType: record.wpType,
|
|
2094
|
+
slug: record.slug,
|
|
2095
|
+
reason: markerId ? "resume marker" : "slug already exists"
|
|
2096
|
+
}
|
|
2097
|
+
});
|
|
2098
|
+
continue;
|
|
2099
|
+
}
|
|
2100
|
+
if (record.status === "private") privateCount++;
|
|
2101
|
+
else if (record.status === "pending") pendingCount++;
|
|
2102
|
+
if (record.wpAuthorLogin && !authors?.authorIds.has(record.wpAuthorLogin)) {
|
|
2103
|
+
droppedAuthorCount++;
|
|
2104
|
+
}
|
|
2105
|
+
const coverImageId = resolveCoverImageId(record, resolution);
|
|
2106
|
+
if (collection === "posts") {
|
|
2107
|
+
if (coverImageId) {
|
|
2108
|
+
coverWiredCount++;
|
|
2109
|
+
} else if (recordHasFeaturedImage(record)) {
|
|
2110
|
+
coverMissingCount++;
|
|
2111
|
+
}
|
|
2112
|
+
}
|
|
2113
|
+
const termIds = collection === "posts" && taxonomies ? pickPostTermIds(record, taxonomies) : { categoryIds: [], tagIds: [] };
|
|
2114
|
+
const authorId = collection === "posts" && authors && record.wpAuthorLogin ? authors.authorIds.get(record.wpAuthorLogin) ?? void 0 : void 0;
|
|
2115
|
+
if (dryRun) {
|
|
2116
|
+
applied.push({
|
|
2117
|
+
wpId: record.wpId,
|
|
2118
|
+
wpType: record.wpType,
|
|
2119
|
+
collection,
|
|
2120
|
+
slug: record.slug,
|
|
2121
|
+
title: record.title,
|
|
2122
|
+
coverImageId,
|
|
2123
|
+
categoryIds: termIds.categoryIds,
|
|
2124
|
+
tagIds: termIds.tagIds,
|
|
2125
|
+
authorId
|
|
2126
|
+
});
|
|
2127
|
+
log(`plan ${collection}/${record.slug}`);
|
|
2128
|
+
continue;
|
|
2129
|
+
}
|
|
2130
|
+
const originalAuthorField = options.preserveOriginalAuthor?.[collection];
|
|
2131
|
+
const originalAuthorName = originalAuthorField ? resolveOriginalAuthorName(record, bundle) : void 0;
|
|
2132
|
+
const data = buildDocData(
|
|
2133
|
+
record,
|
|
2134
|
+
resolution,
|
|
2135
|
+
collection,
|
|
2136
|
+
coverImageId,
|
|
2137
|
+
termIds,
|
|
2138
|
+
authorId,
|
|
2139
|
+
customMapping?.fieldOverrides,
|
|
2140
|
+
originalAuthorField && originalAuthorName ? { field: originalAuthorField, value: originalAuthorName } : void 0
|
|
2141
|
+
);
|
|
2142
|
+
const mappedStatus = mapStatusToFramework(record.status);
|
|
2143
|
+
const saved = await saveDocument(
|
|
2144
|
+
collection,
|
|
2145
|
+
updateMode && existingId ? existingId : null,
|
|
2146
|
+
// Phase 21.17 — visibility rides the data payload (it's a
|
|
2147
|
+
// collection column, validated by the Zod schema), while
|
|
2148
|
+
// status stays as the saveDocument opts override. Both
|
|
2149
|
+
// are derived from the WP record's `<wp:status>` here.
|
|
2150
|
+
{ ...data, visibility: mappedStatus.visibility },
|
|
2151
|
+
options.actor,
|
|
2152
|
+
{
|
|
2153
|
+
status: mappedStatus.status
|
|
2154
|
+
}
|
|
2155
|
+
);
|
|
2156
|
+
const savedId = typeof saved.doc.id === "string" ? saved.doc.id : void 0;
|
|
2157
|
+
applied.push({
|
|
2158
|
+
wpId: record.wpId,
|
|
2159
|
+
wpType: record.wpType,
|
|
2160
|
+
collection,
|
|
2161
|
+
slug: record.slug,
|
|
2162
|
+
title: record.title,
|
|
2163
|
+
coverImageId,
|
|
2164
|
+
categoryIds: termIds.categoryIds,
|
|
2165
|
+
tagIds: termIds.tagIds,
|
|
2166
|
+
authorId
|
|
2167
|
+
});
|
|
2168
|
+
log(updateMode ? `update ${collection}/${record.slug}` : `write ${collection}/${record.slug}`);
|
|
2169
|
+
options.reportHtml?.emit({
|
|
2170
|
+
wpId: record.wpId,
|
|
2171
|
+
wpType: record.wpType,
|
|
2172
|
+
slug: record.slug,
|
|
2173
|
+
title: record.title,
|
|
2174
|
+
rawContent: record.rawContent,
|
|
2175
|
+
lexical: data.content
|
|
2176
|
+
});
|
|
2177
|
+
await emitAudit(options.audit, {
|
|
2178
|
+
action: updateMode ? "import.wp.updated" : "import.wp.applied",
|
|
2179
|
+
targetType: collection,
|
|
2180
|
+
targetId: savedId,
|
|
2181
|
+
payload: {
|
|
2182
|
+
wpId: record.wpId,
|
|
2183
|
+
wpType: record.wpType,
|
|
2184
|
+
slug: record.slug,
|
|
2185
|
+
title: record.title,
|
|
2186
|
+
coverImageId,
|
|
2187
|
+
categoryIds: termIds.categoryIds,
|
|
2188
|
+
tagIds: termIds.tagIds,
|
|
2189
|
+
authorId
|
|
2190
|
+
}
|
|
2191
|
+
});
|
|
2192
|
+
if (options.resume && savedId) {
|
|
2193
|
+
options.resume.state.documents[documentKey(collection, record.slug)] = savedId;
|
|
2194
|
+
options.resume.persist();
|
|
2195
|
+
}
|
|
2196
|
+
if (commentsPlan && options.comments && collection === "posts") {
|
|
2197
|
+
const postId = typeof saved.doc.id === "string" ? saved.doc.id : null;
|
|
2198
|
+
if (postId) {
|
|
2199
|
+
await importPostComments({
|
|
2200
|
+
record,
|
|
2201
|
+
postId,
|
|
2202
|
+
collection,
|
|
2203
|
+
deps: options.comments,
|
|
2204
|
+
plan: commentsPlan,
|
|
2205
|
+
log,
|
|
2206
|
+
resume: options.resume
|
|
2207
|
+
});
|
|
2208
|
+
}
|
|
2209
|
+
}
|
|
2210
|
+
} catch (err) {
|
|
2211
|
+
const message = err instanceof Error ? err.message : String(err);
|
|
2212
|
+
errors.push({ wpId: record.wpId, slug: record.slug, message });
|
|
2213
|
+
log(`error ${collection}/${record.slug}: ${message}`);
|
|
2214
|
+
await emitAudit(options.audit, {
|
|
2215
|
+
action: "import.wp.error",
|
|
2216
|
+
targetType: collection,
|
|
2217
|
+
payload: {
|
|
2218
|
+
wpId: record.wpId,
|
|
2219
|
+
wpType: record.wpType,
|
|
2220
|
+
slug: record.slug,
|
|
2221
|
+
message
|
|
2222
|
+
}
|
|
2223
|
+
});
|
|
2224
|
+
}
|
|
2225
|
+
}
|
|
2226
|
+
const notes = [];
|
|
2227
|
+
if (privateCount > 0) {
|
|
2228
|
+
notes.push(
|
|
2229
|
+
`${privateCount} record${privateCount === 1 ? "" : "s"} with WP status "private" imported as published with visibility=private (Phase 21.17).`
|
|
2230
|
+
);
|
|
2231
|
+
}
|
|
2232
|
+
if (pendingCount > 0) {
|
|
2233
|
+
notes.push(
|
|
2234
|
+
`${pendingCount} record${pendingCount === 1 ? "" : "s"} with WP status "pending" imported as draft.`
|
|
2235
|
+
);
|
|
2236
|
+
}
|
|
2237
|
+
if (droppedAuthorCount > 0) {
|
|
2238
|
+
notes.push(
|
|
2239
|
+
authors ? `${droppedAuthorCount} record${droppedAuthorCount === 1 ? "" : "s"} dropped their original WP author (resolver returned null for the matching login).` : `${droppedAuthorCount} record${droppedAuthorCount === 1 ? "" : "s"} dropped their original WP author \u2014 opt in by passing \`authors\` to \`applyBundle\` (Phase 21.8).`
|
|
2240
|
+
);
|
|
2241
|
+
}
|
|
2242
|
+
if (coverWiredCount > 0) {
|
|
2243
|
+
notes.push(
|
|
2244
|
+
`${coverWiredCount} post${coverWiredCount === 1 ? "" : "s"} wired a featured image to coverImage from the WP _thumbnail_id reference.`
|
|
2245
|
+
);
|
|
2246
|
+
}
|
|
2247
|
+
if (coverMissingCount > 0) {
|
|
2248
|
+
notes.push(
|
|
2249
|
+
`${coverMissingCount} post${coverMissingCount === 1 ? "" : "s"} declared a WP featured image but the source asset was not resolvable (download failed, MIME rejected, or attachment record missing).`
|
|
2250
|
+
);
|
|
2251
|
+
}
|
|
2252
|
+
if (taxonomies) {
|
|
2253
|
+
if (taxonomies.errors.length > 0) {
|
|
2254
|
+
notes.push(
|
|
2255
|
+
`${taxonomies.errors.length} taxonomy term${taxonomies.errors.length === 1 ? "" : "s"} failed to resolve \u2014 see Taxonomies section.`
|
|
2256
|
+
);
|
|
2257
|
+
}
|
|
2258
|
+
if (taxonomies.skipped.length > 0) {
|
|
2259
|
+
notes.push(
|
|
2260
|
+
`${taxonomies.skipped.length} taxonomy term${taxonomies.skipped.length === 1 ? "" : "s"} skipped by the resolver (likely a custom taxonomy the project doesn't track).`
|
|
2261
|
+
);
|
|
2262
|
+
}
|
|
2263
|
+
} else if (hasAnyTerm(bundle)) {
|
|
2264
|
+
notes.push(
|
|
2265
|
+
"Categories/tags found in the WXR but no taxonomy resolver was supplied \u2014 terms were dropped (Phase 21.6 \u2014 opt in by passing `taxonomies` to `applyBundle`)."
|
|
2266
|
+
);
|
|
2267
|
+
}
|
|
2268
|
+
if (commentsPlan) {
|
|
2269
|
+
if (commentsPlan.skippedUnapproved > 0) {
|
|
2270
|
+
notes.push(
|
|
2271
|
+
`${commentsPlan.skippedUnapproved} comment${commentsPlan.skippedUnapproved === 1 ? "" : "s"} dropped because <wp:comment_approved> was not "1".`
|
|
2272
|
+
);
|
|
2273
|
+
}
|
|
2274
|
+
if (commentsPlan.errors.length > 0) {
|
|
2275
|
+
notes.push(
|
|
2276
|
+
`${commentsPlan.errors.length} comment${commentsPlan.errors.length === 1 ? "" : "s"} failed to insert \u2014 see Comments section.`
|
|
2277
|
+
);
|
|
2278
|
+
}
|
|
2279
|
+
} else if (hasAnyComment(bundle)) {
|
|
2280
|
+
notes.push(
|
|
2281
|
+
"Comments found in the WXR but no comments deps were supplied \u2014 comments were dropped (Phase 21.7 \u2014 opt in by passing `comments` to `applyBundle`)."
|
|
2282
|
+
);
|
|
2283
|
+
}
|
|
2284
|
+
if (authors) {
|
|
2285
|
+
if (authors.errors.length > 0) {
|
|
2286
|
+
notes.push(
|
|
2287
|
+
`${authors.errors.length} author${authors.errors.length === 1 ? "" : "s"} failed to resolve \u2014 see Authors section.`
|
|
2288
|
+
);
|
|
2289
|
+
}
|
|
2290
|
+
}
|
|
2291
|
+
if (options.strict) {
|
|
2292
|
+
if (media) {
|
|
2293
|
+
for (const e of media.errors) {
|
|
2294
|
+
errors.push({ wpId: 0, slug: e.url, message: `media: ${e.reason}` });
|
|
2295
|
+
}
|
|
2296
|
+
}
|
|
2297
|
+
if (taxonomies) {
|
|
2298
|
+
for (const e of taxonomies.errors) {
|
|
2299
|
+
errors.push({
|
|
2300
|
+
wpId: 0,
|
|
2301
|
+
slug: `${e.key.taxonomy}/${e.key.slug}`,
|
|
2302
|
+
message: `taxonomy: ${e.reason}`
|
|
2303
|
+
});
|
|
2304
|
+
}
|
|
2305
|
+
}
|
|
2306
|
+
if (authors) {
|
|
2307
|
+
for (const e of authors.errors) {
|
|
2308
|
+
errors.push({ wpId: 0, slug: e.login, message: `author: ${e.reason}` });
|
|
2309
|
+
}
|
|
2310
|
+
}
|
|
2311
|
+
if (commentsPlan) {
|
|
2312
|
+
for (const e of commentsPlan.errors) {
|
|
2313
|
+
errors.push({
|
|
2314
|
+
wpId: 0,
|
|
2315
|
+
slug: `comment#${e.wpCommentId}`,
|
|
2316
|
+
message: `comment: ${e.reason}`
|
|
2317
|
+
});
|
|
2318
|
+
}
|
|
2319
|
+
}
|
|
2320
|
+
}
|
|
2321
|
+
return {
|
|
2322
|
+
applied,
|
|
2323
|
+
skipped,
|
|
2324
|
+
errors,
|
|
2325
|
+
attachments,
|
|
2326
|
+
media,
|
|
2327
|
+
taxonomies,
|
|
2328
|
+
comments: commentsPlan,
|
|
2329
|
+
authors,
|
|
2330
|
+
notes
|
|
2331
|
+
};
|
|
2332
|
+
}
|
|
2333
|
+
function buildDocData(record, resolution, collection, coverImageId, termIds, authorId, fieldOverrides, originalAuthor) {
|
|
2334
|
+
const lexical = htmlToLexical(record.rawContent);
|
|
2335
|
+
const rewritten = rewriteLexicalMedia(lexical, resolution);
|
|
2336
|
+
const data = {
|
|
2337
|
+
title: record.title || "(untitled)",
|
|
2338
|
+
slug: record.slug,
|
|
2339
|
+
content: rewritten
|
|
2340
|
+
};
|
|
2341
|
+
if (record.excerpt) {
|
|
2342
|
+
data.excerpt = record.excerpt;
|
|
2343
|
+
}
|
|
2344
|
+
if (collection === "posts" && coverImageId) {
|
|
2345
|
+
data.coverImage = coverImageId;
|
|
2346
|
+
}
|
|
2347
|
+
if (collection === "posts") {
|
|
2348
|
+
if (termIds.categoryIds.length > 0) data.categories = termIds.categoryIds;
|
|
2349
|
+
if (termIds.tagIds.length > 0) data.tags = termIds.tagIds;
|
|
2350
|
+
if (authorId) data.author = authorId;
|
|
2351
|
+
}
|
|
2352
|
+
if (fieldOverrides) {
|
|
2353
|
+
const protectedFields = /* @__PURE__ */ new Set(["title", "slug", "content", "excerpt", "publishedAt", "coverImage", "categories", "tags", "author"]);
|
|
2354
|
+
for (const [metaKey, fieldName] of Object.entries(fieldOverrides)) {
|
|
2355
|
+
if (protectedFields.has(fieldName)) continue;
|
|
2356
|
+
const value = record.meta[metaKey];
|
|
2357
|
+
if (typeof value === "string" && value.length > 0) {
|
|
2358
|
+
data[fieldName] = value;
|
|
2359
|
+
}
|
|
2360
|
+
}
|
|
2361
|
+
}
|
|
2362
|
+
if (originalAuthor) {
|
|
2363
|
+
data[originalAuthor.field] = originalAuthor.value;
|
|
2364
|
+
}
|
|
2365
|
+
if (record.publishedAt) {
|
|
2366
|
+
const iso = record.publishedAt.replace(" ", "T") + "Z";
|
|
2367
|
+
const date = new Date(iso);
|
|
2368
|
+
if (!Number.isNaN(date.getTime())) {
|
|
2369
|
+
data.publishedAt = date.toISOString();
|
|
2370
|
+
}
|
|
2371
|
+
}
|
|
2372
|
+
return data;
|
|
2373
|
+
}
|
|
2374
|
+
function recordHasFeaturedImage(record) {
|
|
2375
|
+
return record.mediaRefs.some((ref) => ref.kind === "featured");
|
|
2376
|
+
}
|
|
2377
|
+
function hasAnyTerm(bundle) {
|
|
2378
|
+
if (bundle.terms.length > 0) return true;
|
|
2379
|
+
return bundle.records.some((r) => r.terms.length > 0 && r.wpType !== "attachment");
|
|
2380
|
+
}
|
|
2381
|
+
function hasAnyComment(bundle) {
|
|
2382
|
+
return bundle.records.some((r) => r.comments.length > 0 && r.wpType !== "attachment");
|
|
2383
|
+
}
|
|
2384
|
+
function resolveOriginalAuthorName(record, bundle) {
|
|
2385
|
+
const login = record.wpAuthorLogin;
|
|
2386
|
+
if (!login) return void 0;
|
|
2387
|
+
const match = bundle.authors.find((a) => a.login === login);
|
|
2388
|
+
return match?.displayName?.trim() || login;
|
|
2389
|
+
}
|
|
2390
|
+
function resolveCoverImageId(record, resolution) {
|
|
2391
|
+
const ref = record.mediaRefs.find((m) => m.kind === "featured");
|
|
2392
|
+
if (!ref) return void 0;
|
|
2393
|
+
if (ref.wpAttachmentId !== null) {
|
|
2394
|
+
const id = resolution.byAttachmentId.get(ref.wpAttachmentId);
|
|
2395
|
+
if (id) return id;
|
|
2396
|
+
}
|
|
2397
|
+
if (ref.sourceUrl) {
|
|
2398
|
+
const id = resolution.byUrl.get(ref.sourceUrl);
|
|
2399
|
+
if (id) return id;
|
|
2400
|
+
}
|
|
2401
|
+
return void 0;
|
|
2402
|
+
}
|
|
2403
|
+
function mapStatusToFramework(status) {
|
|
2404
|
+
if (status === "publish") return { status: "published", visibility: "public" };
|
|
2405
|
+
if (status === "private") return { status: "published", visibility: "private" };
|
|
2406
|
+
return { status: "draft", visibility: "public" };
|
|
2407
|
+
}
|
|
2408
|
+
function noop3() {
|
|
2409
|
+
}
|
|
2410
|
+
async function emitAudit(deps, event) {
|
|
2411
|
+
if (!deps) return;
|
|
2412
|
+
try {
|
|
2413
|
+
await deps.record(event);
|
|
2414
|
+
} catch {
|
|
2415
|
+
}
|
|
2416
|
+
}
|
|
2417
|
+
export {
|
|
2418
|
+
ResumeStateError,
|
|
2419
|
+
WpImportConfigError,
|
|
2420
|
+
WpImportStreamError,
|
|
2421
|
+
WpMediaDownloadError,
|
|
2422
|
+
WpMediaSsrfError,
|
|
2423
|
+
applyBundle,
|
|
2424
|
+
buildAttachmentIndex,
|
|
2425
|
+
documentKey,
|
|
2426
|
+
downloadMedia,
|
|
2427
|
+
emptyCommentPlan,
|
|
2428
|
+
emptyResumeState,
|
|
2429
|
+
formatApplyReport,
|
|
2430
|
+
formatSummary,
|
|
2431
|
+
htmlToLexical,
|
|
2432
|
+
importPostComments,
|
|
2433
|
+
isAllowedMimeType,
|
|
2434
|
+
loadConfigFromPath,
|
|
2435
|
+
loadResumeState,
|
|
2436
|
+
parseConfig,
|
|
2437
|
+
parseWxr,
|
|
2438
|
+
parseWxrStream,
|
|
2439
|
+
persistResumeState,
|
|
2440
|
+
pickPostTermIds,
|
|
2441
|
+
resolveAuthors,
|
|
2442
|
+
resolveEnvDownloadOptions,
|
|
2443
|
+
resolveTaxonomies,
|
|
2444
|
+
rewriteLexicalMedia,
|
|
2445
|
+
runCli,
|
|
2446
|
+
runMediaPipeline,
|
|
2447
|
+
termCacheKey
|
|
2448
|
+
};
|