@nexpress/wp-import 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js ADDED
@@ -0,0 +1,2448 @@
1
+ // src/parse/wxr.ts
2
+ import { XMLParser } from "fast-xml-parser";
3
+ function parseWxr(xml) {
4
+ const parser = new XMLParser({
5
+ ignoreAttributes: false,
6
+ attributeNamePrefix: "@_",
7
+ parseAttributeValue: false,
8
+ parseTagValue: false,
9
+ trimValues: true,
10
+ cdataPropName: "__cdata",
11
+ isArray: (name) => MULTI_VALUE_TAGS.has(name)
12
+ });
13
+ const tree = parser.parse(xml);
14
+ const channel = tree.rss?.channel;
15
+ if (!channel) {
16
+ throw new Error("Invalid WXR: missing <rss><channel> root");
17
+ }
18
+ const site = parseSite(channel);
19
+ const authors = parseAuthors(channel);
20
+ const terms = parseChannelTerms(channel);
21
+ const records = parseRecords(channel);
22
+ return { site, authors, terms, records };
23
+ }
24
+ var MULTI_VALUE_TAGS = /* @__PURE__ */ new Set([
25
+ "item",
26
+ "wp:author",
27
+ "wp:category",
28
+ "wp:tag",
29
+ "wp:term",
30
+ "wp:postmeta",
31
+ "wp:comment",
32
+ "category"
33
+ ]);
34
+ function asText(value) {
35
+ if (!value) return "";
36
+ if (typeof value === "string") return value;
37
+ if (typeof value.__cdata === "string") return value.__cdata;
38
+ if (typeof value["#text"] === "string") return value["#text"];
39
+ return "";
40
+ }
41
+ function asOptionalText(value) {
42
+ const text = asText(value);
43
+ return text.length > 0 ? text : null;
44
+ }
45
+ function parseSite(channel) {
46
+ return {
47
+ title: asText(channel.title),
48
+ link: asText(channel.link),
49
+ description: asText(channel.description),
50
+ baseSiteUrl: asText(channel["wp:base_site_url"]),
51
+ baseBlogUrl: asText(channel["wp:base_blog_url"]),
52
+ language: asOptionalText(channel.language)
53
+ };
54
+ }
55
+ function parseAuthors(channel) {
56
+ const rows = channel["wp:author"] ?? [];
57
+ return rows.map((row) => ({
58
+ wpId: parseIntOrZero(asText(row["wp:author_id"])),
59
+ login: asText(row["wp:author_login"]),
60
+ email: asText(row["wp:author_email"]),
61
+ displayName: asText(row["wp:author_display_name"]),
62
+ description: asOptionalText(row["wp:author_description"])
63
+ }));
64
+ }
65
+ function parseChannelTerms(channel) {
66
+ const out = [];
67
+ for (const row of channel["wp:category"] ?? []) {
68
+ out.push({
69
+ taxonomy: "category",
70
+ slug: asText(row["wp:category_nicename"]),
71
+ name: asText(row["wp:cat_name"])
72
+ });
73
+ }
74
+ for (const row of channel["wp:tag"] ?? []) {
75
+ out.push({
76
+ taxonomy: "post_tag",
77
+ slug: asText(row["wp:tag_slug"]),
78
+ name: asText(row["wp:tag_name"])
79
+ });
80
+ }
81
+ for (const row of channel["wp:term"] ?? []) {
82
+ const taxonomy = asText(row["wp:term_taxonomy"]) || asText(row["wp:taxonomy"]);
83
+ if (!taxonomy) continue;
84
+ out.push({
85
+ taxonomy,
86
+ slug: asText(row["wp:term_slug"]),
87
+ name: asText(row["wp:term_name"])
88
+ });
89
+ }
90
+ return out;
91
+ }
92
+ function parseRecords(channel) {
93
+ const items = channel.item ?? [];
94
+ return items.map((item) => parseRecord(item));
95
+ }
96
+ function parseRecord(item) {
97
+ const meta = parseMeta(item);
98
+ const status = coerceStatus(asText(item["wp:status"]));
99
+ const wpType = asText(item["wp:post_type"]);
100
+ const rawContent = asText(item["content:encoded"]);
101
+ const attachmentUrl = asText(item["wp:attachment_url"]);
102
+ return {
103
+ wpId: parseIntOrZero(asText(item["wp:post_id"])),
104
+ wpType,
105
+ status,
106
+ slug: asText(item["wp:post_name"]),
107
+ title: asText(item.title),
108
+ excerpt: asOptionalText(item["excerpt:encoded"]),
109
+ rawContent,
110
+ wpAuthorLogin: asText(item["dc:creator"]),
111
+ publishedAt: asText(item["wp:post_date_gmt"]),
112
+ updatedAt: asText(item["wp:post_modified_gmt"]),
113
+ terms: parseItemCategories(item),
114
+ meta,
115
+ mediaRefs: parseMediaRefs({ rawContent, attachmentUrl, wpType, meta }),
116
+ comments: parseComments(item)
117
+ };
118
+ }
119
+ function parseItemCategories(item) {
120
+ const out = [];
121
+ for (const row of item.category ?? []) {
122
+ const taxonomy = row["@_domain"] ?? "category";
123
+ out.push({
124
+ taxonomy,
125
+ slug: row["@_nicename"] ?? "",
126
+ // Real WXR exports wrap the term name in CDATA, but tests
127
+ // and hand-written XML may use plain text. Read both.
128
+ name: row.__cdata ?? row["#text"] ?? ""
129
+ });
130
+ }
131
+ return out;
132
+ }
133
+ function parseMeta(item) {
134
+ const out = {};
135
+ for (const row of item["wp:postmeta"] ?? []) {
136
+ const key = asText(row["wp:meta_key"]);
137
+ if (!key) continue;
138
+ out[key] = asText(row["wp:meta_value"]);
139
+ }
140
+ return out;
141
+ }
142
+ function parseComments(item) {
143
+ const rows = item["wp:comment"] ?? [];
144
+ return rows.map((row) => ({
145
+ wpId: parseIntOrZero(asText(row["wp:comment_id"])),
146
+ parentWpId: parseOptionalInt(asText(row["wp:comment_parent"])),
147
+ authorName: asText(row["wp:comment_author"]),
148
+ authorEmail: asOptionalText(row["wp:comment_author_email"]),
149
+ authorUrl: asOptionalText(row["wp:comment_author_url"]),
150
+ date: asText(row["wp:comment_date_gmt"]),
151
+ content: asText(row["wp:comment_content"]),
152
+ approved: asText(row["wp:comment_approved"]) === "1"
153
+ }));
154
+ }
155
+ var IMG_TAG_RE = /<img\b[^>]*>/gi;
156
+ var SRC_ATTR_RE = /\bsrc\s*=\s*["']([^"']+)["']/i;
157
+ var WP_ATTACHMENT_ID_RE = /wp-image-(\d+)/i;
158
+ function parseMediaRefs(args) {
159
+ const refs = [];
160
+ const thumbId = args.meta._thumbnail_id;
161
+ if (thumbId) {
162
+ refs.push({
163
+ sourceUrl: "",
164
+ wpAttachmentId: parseIntOrZero(thumbId),
165
+ kind: "featured"
166
+ });
167
+ }
168
+ const seen = /* @__PURE__ */ new Set();
169
+ for (const tagMatch of args.rawContent.matchAll(IMG_TAG_RE)) {
170
+ const tag = tagMatch[0];
171
+ const srcMatch = SRC_ATTR_RE.exec(tag);
172
+ const url = srcMatch?.[1];
173
+ if (!url || seen.has(url)) continue;
174
+ seen.add(url);
175
+ const idMatch = WP_ATTACHMENT_ID_RE.exec(tag);
176
+ refs.push({
177
+ sourceUrl: url,
178
+ wpAttachmentId: idMatch ? parseIntOrZero(idMatch[1] ?? "") : null,
179
+ kind: "inline"
180
+ });
181
+ }
182
+ if (args.wpType === "attachment" && args.attachmentUrl) {
183
+ refs.push({
184
+ sourceUrl: args.attachmentUrl,
185
+ wpAttachmentId: null,
186
+ kind: "inline"
187
+ });
188
+ }
189
+ return refs;
190
+ }
191
+ function coerceStatus(raw) {
192
+ switch (raw) {
193
+ case "publish":
194
+ case "draft":
195
+ case "private":
196
+ case "pending":
197
+ case "trash":
198
+ case "auto-draft":
199
+ return raw;
200
+ default:
201
+ return "draft";
202
+ }
203
+ }
204
+ function parseIntOrZero(value) {
205
+ const n = Number.parseInt(value, 10);
206
+ return Number.isFinite(n) ? n : 0;
207
+ }
208
+ function parseOptionalInt(value) {
209
+ if (!value) return null;
210
+ const n = Number.parseInt(value, 10);
211
+ return Number.isFinite(n) && n > 0 ? n : null;
212
+ }
213
+
214
+ // src/parse/wxr-stream.ts
215
+ import { createReadStream } from "fs";
216
+ var DEFAULT_HIGH_WATER_MARK = 64 * 1024;
217
+ var DEFAULT_MAX_ITEM_BYTES = 64 * 1024 * 1024;
218
+ var ITEM_OPEN = "<item>";
219
+ var ITEM_CLOSE = "</item>";
220
+ var WpImportStreamError = class extends Error {
221
+ constructor(message) {
222
+ super(message);
223
+ this.name = "WpImportStreamError";
224
+ }
225
+ };
226
+ async function parseWxrStream(path, options = {}) {
227
+ const highWaterMark = options.highWaterMark ?? DEFAULT_HIGH_WATER_MARK;
228
+ const maxItemBytes = options.maxItemBytes ?? DEFAULT_MAX_ITEM_BYTES;
229
+ const stream = createReadStream(path, { encoding: "utf8", highWaterMark });
230
+ const reader = stream[Symbol.asyncIterator]();
231
+ let buffer = "";
232
+ let firstItemAt = -1;
233
+ while (firstItemAt < 0) {
234
+ const next = await reader.next();
235
+ if (next.done) break;
236
+ buffer += next.value;
237
+ firstItemAt = buffer.indexOf(ITEM_OPEN);
238
+ if (buffer.length > maxItemBytes && firstItemAt < 0) {
239
+ throw new WpImportStreamError(
240
+ `header exceeded ${maxItemBytes} bytes without finding any <item> \u2014 is this a WXR file?`
241
+ );
242
+ }
243
+ }
244
+ const headerXml = firstItemAt >= 0 ? buffer.slice(0, firstItemAt) + "</channel></rss>" : buffer + "</channel></rss>";
245
+ const headerBundle = parseWxr(headerXml);
246
+ const tail = firstItemAt >= 0 ? buffer.slice(firstItemAt) : "";
247
+ async function* iterate() {
248
+ let local = tail;
249
+ let exhausted = false;
250
+ while (true) {
251
+ const open = local.indexOf(ITEM_OPEN);
252
+ if (open < 0) {
253
+ if (exhausted) return;
254
+ const next = await reader.next();
255
+ if (next.done) {
256
+ exhausted = true;
257
+ continue;
258
+ }
259
+ local += next.value;
260
+ if (local.length > maxItemBytes) {
261
+ throw new WpImportStreamError(
262
+ `WXR item exceeded ${maxItemBytes} bytes \u2014 abort. Likely an embedded base64 payload that won't fit; raise --max-item-bytes if you really need it.`
263
+ );
264
+ }
265
+ continue;
266
+ }
267
+ const close = local.indexOf(ITEM_CLOSE, open);
268
+ if (close < 0) {
269
+ const next = await reader.next();
270
+ if (next.done) {
271
+ if (exhausted) return;
272
+ exhausted = true;
273
+ continue;
274
+ }
275
+ local += next.value;
276
+ if (local.length > maxItemBytes) {
277
+ throw new WpImportStreamError(
278
+ `WXR item exceeded ${maxItemBytes} bytes \u2014 abort.`
279
+ );
280
+ }
281
+ continue;
282
+ }
283
+ const itemEnd = close + ITEM_CLOSE.length;
284
+ const itemSlice = local.slice(open, itemEnd);
285
+ local = local.slice(itemEnd);
286
+ const wrapped = wrapInChannel(itemSlice);
287
+ const single = parseWxr(wrapped);
288
+ const record = single.records[0];
289
+ if (record) yield record;
290
+ }
291
+ }
292
+ return {
293
+ header: {
294
+ site: headerBundle.site,
295
+ authors: headerBundle.authors,
296
+ terms: headerBundle.terms
297
+ },
298
+ items: { [Symbol.asyncIterator]: iterate }
299
+ };
300
+ }
301
+ function wrapInChannel(itemXml) {
302
+ return `<?xml version="1.0" encoding="UTF-8"?><rss version="2.0" xmlns:excerpt="http://wordpress.org/export/1.2/excerpt/" xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:wp="http://wordpress.org/export/1.2/"><channel><title></title><link></link><description></description><wp:base_site_url></wp:base_site_url><wp:base_blog_url></wp:base_blog_url>` + itemXml + `</channel></rss>`;
303
+ }
304
+
305
+ // src/cli/index.ts
306
+ import { readFileSync as readFileSync2 } from "fs";
307
+ import { parseArgs } from "util";
308
+
309
+ // src/cli/config.ts
310
+ import { readFileSync } from "fs";
311
+ var WpImportConfigError = class extends Error {
312
+ constructor(message) {
313
+ super(message);
314
+ this.name = "WpImportConfigError";
315
+ }
316
+ };
317
+ function loadConfigFromPath(path) {
318
+ let raw;
319
+ try {
320
+ raw = readFileSync(path, "utf8");
321
+ } catch (err) {
322
+ throw new WpImportConfigError(
323
+ `cannot read config ${path}: ${err instanceof Error ? err.message : String(err)}`
324
+ );
325
+ }
326
+ return parseConfig(raw, path);
327
+ }
328
+ function parseConfig(source, displayPath = "<inline>") {
329
+ let parsed;
330
+ try {
331
+ parsed = JSON.parse(source);
332
+ } catch (err) {
333
+ throw new WpImportConfigError(
334
+ `${displayPath}: invalid JSON \u2014 ${err instanceof Error ? err.message : String(err)}`
335
+ );
336
+ }
337
+ if (!parsed || typeof parsed !== "object" || Array.isArray(parsed)) {
338
+ throw new WpImportConfigError(`${displayPath}: top-level value must be a JSON object`);
339
+ }
340
+ const root = parsed;
341
+ const mappings = root.mappings;
342
+ const collectionMappings = {};
343
+ if (mappings !== void 0) {
344
+ if (!Array.isArray(mappings)) {
345
+ throw new WpImportConfigError(`${displayPath}: "mappings" must be an array`);
346
+ }
347
+ for (const [i, entry] of mappings.entries()) {
348
+ const mapping = parseMapping(entry, `${displayPath}#mappings[${i}]`);
349
+ if (collectionMappings[mapping.wpType]) {
350
+ throw new WpImportConfigError(
351
+ `${displayPath}: duplicate mapping for wpType "${mapping.wpType}"`
352
+ );
353
+ }
354
+ collectionMappings[mapping.wpType] = {
355
+ collection: mapping.collection,
356
+ fieldOverrides: mapping.fieldOverrides
357
+ };
358
+ }
359
+ }
360
+ return { collectionMappings };
361
+ }
362
+ function parseMapping(value, displayPath) {
363
+ if (!value || typeof value !== "object" || Array.isArray(value)) {
364
+ throw new WpImportConfigError(`${displayPath}: each mapping must be an object`);
365
+ }
366
+ const row = value;
367
+ const wpType = row.wpType ?? row.wp_type;
368
+ const collection = row.collection;
369
+ if (typeof wpType !== "string" || wpType.length === 0) {
370
+ throw new WpImportConfigError(`${displayPath}: "wpType" is required and must be a non-empty string`);
371
+ }
372
+ if (typeof collection !== "string" || collection.length === 0) {
373
+ throw new WpImportConfigError(
374
+ `${displayPath}: "collection" is required and must be a non-empty string`
375
+ );
376
+ }
377
+ let fieldOverrides;
378
+ const rawOverrides = row.fieldOverrides ?? row.field_overrides;
379
+ if (rawOverrides !== void 0) {
380
+ if (!rawOverrides || typeof rawOverrides !== "object" || Array.isArray(rawOverrides)) {
381
+ throw new WpImportConfigError(`${displayPath}: "fieldOverrides" must be an object`);
382
+ }
383
+ fieldOverrides = {};
384
+ for (const [k, v] of Object.entries(rawOverrides)) {
385
+ if (typeof v !== "string" || v.length === 0) {
386
+ throw new WpImportConfigError(
387
+ `${displayPath}: fieldOverrides["${k}"] must be a non-empty string`
388
+ );
389
+ }
390
+ fieldOverrides[k] = v;
391
+ }
392
+ }
393
+ return { wpType, collection, fieldOverrides };
394
+ }
395
+
396
+ // src/cli/format.ts
397
+ function formatSummary(args) {
398
+ const { bundle, sourcePath, dryRun } = args;
399
+ const lines = [];
400
+ lines.push(`WordPress import${dryRun ? " \u2014 dry run" : ""}`);
401
+ lines.push("");
402
+ lines.push(`Source: ${sourcePath}`);
403
+ lines.push(`Site: ${bundle.site.title || "(untitled)"}`);
404
+ if (bundle.site.link) lines.push(` ${bundle.site.link}`);
405
+ if (bundle.site.language) lines.push(`Lang: ${bundle.site.language}`);
406
+ lines.push("");
407
+ lines.push(`Authors (${bundle.authors.length})`);
408
+ for (const author of bundle.authors) {
409
+ const email = author.email ? ` <${author.email}>` : "";
410
+ const display = author.displayName ? ` ${author.displayName}` : "";
411
+ lines.push(` ${author.login}${email}${display}`);
412
+ }
413
+ if (bundle.authors.length === 0) {
414
+ lines.push(" (none)");
415
+ }
416
+ lines.push("");
417
+ const taxoCounts = countByKey(bundle.terms, (t) => t.taxonomy);
418
+ lines.push(`Channel taxonomies (${taxoCounts.size})`);
419
+ for (const [taxonomy, count] of sortedEntries(taxoCounts)) {
420
+ lines.push(` ${taxonomy.padEnd(12)} ${count}`);
421
+ }
422
+ if (taxoCounts.size === 0) {
423
+ lines.push(" (none)");
424
+ }
425
+ lines.push("");
426
+ const typeCounts = countByKey(bundle.records, (r) => r.wpType);
427
+ lines.push(`Records (${bundle.records.length})`);
428
+ for (const [wpType, count] of sortedEntries(typeCounts)) {
429
+ const annotation = typeAnnotation(wpType, bundle.records);
430
+ lines.push(` ${wpType.padEnd(12)} ${count}${annotation ? ` ${annotation}` : ""}`);
431
+ }
432
+ if (typeCounts.size === 0) {
433
+ lines.push(" (no items)");
434
+ }
435
+ const mediaUrls = collectInlineMediaUrls(bundle.records);
436
+ const featuredCount = bundle.records.reduce(
437
+ (acc, r) => acc + r.mediaRefs.filter((m) => m.kind === "featured").length,
438
+ 0
439
+ );
440
+ lines.push("");
441
+ lines.push(`Inline media refs (${mediaUrls.size} unique URL${mediaUrls.size === 1 ? "" : "s"})`);
442
+ lines.push(`Featured images (${featuredCount})`);
443
+ const totalComments = bundle.records.reduce((acc, r) => acc + r.comments.length, 0);
444
+ if (totalComments > 0) {
445
+ const recordsWithComments = bundle.records.filter((r) => r.comments.length > 0).length;
446
+ lines.push("");
447
+ lines.push(
448
+ `Comments: ${totalComments} across ${recordsWithComments} record${recordsWithComments === 1 ? "" : "s"}`
449
+ );
450
+ }
451
+ lines.push("");
452
+ if (dryRun) {
453
+ lines.push("This was a dry run. Pass --apply to write to the database.");
454
+ } else {
455
+ lines.push("Pass --apply to write to the database, or omit it to keep this summary view.");
456
+ }
457
+ return lines.join("\n");
458
+ }
459
+ function typeAnnotation(wpType, _records) {
460
+ if (wpType === "attachment") {
461
+ return "(downloaded + uploaded by the media pipeline)";
462
+ }
463
+ return "";
464
+ }
465
+ function countByKey(rows, keyOf) {
466
+ const out = /* @__PURE__ */ new Map();
467
+ for (const row of rows) {
468
+ const key = keyOf(row);
469
+ out.set(key, (out.get(key) ?? 0) + 1);
470
+ }
471
+ return out;
472
+ }
473
+ function sortedEntries(map) {
474
+ return Array.from(map.entries()).sort(([a], [b]) => a.localeCompare(b));
475
+ }
476
+ function collectInlineMediaUrls(records) {
477
+ const out = /* @__PURE__ */ new Set();
478
+ for (const record of records) {
479
+ for (const ref of record.mediaRefs) {
480
+ if (ref.kind === "inline" && ref.sourceUrl) {
481
+ out.add(ref.sourceUrl);
482
+ }
483
+ }
484
+ }
485
+ return out;
486
+ }
487
+ function formatApplyReport(report, args) {
488
+ const lines = [];
489
+ lines.push(args.dryRun ? "Apply \u2014 dry run" : "Apply");
490
+ lines.push("");
491
+ lines.push(`${args.dryRun ? "Would write" : "Written"}: ${report.applied.length}`);
492
+ for (const row of report.applied) {
493
+ lines.push(` ${row.collection.padEnd(8)} ${row.slug} "${row.title}"`);
494
+ }
495
+ if (report.applied.length === 0) {
496
+ lines.push(" (none)");
497
+ }
498
+ lines.push("");
499
+ lines.push(`Skipped: ${report.skipped.length}`);
500
+ const reasonCounts = /* @__PURE__ */ new Map();
501
+ for (const row of report.skipped) {
502
+ reasonCounts.set(row.reason, (reasonCounts.get(row.reason) ?? 0) + 1);
503
+ }
504
+ for (const [reason, count] of [...reasonCounts.entries()].sort()) {
505
+ lines.push(` ${count.toString().padStart(3)} ${reason}`);
506
+ }
507
+ if (report.skipped.length === 0) {
508
+ lines.push(" (none)");
509
+ }
510
+ if (report.errors.length > 0) {
511
+ lines.push("");
512
+ lines.push(`Errors: ${report.errors.length}`);
513
+ for (const err of report.errors) {
514
+ lines.push(` ${err.slug}: ${err.message}`);
515
+ }
516
+ }
517
+ lines.push("");
518
+ if (report.media) {
519
+ const m = report.media;
520
+ lines.push(
521
+ `Media: ${m.uploaded} uploaded, ${m.reused} reused (hash match), ${m.skipped} skipped (dry run), ${m.errors.length} errors`
522
+ );
523
+ for (const err of m.errors) {
524
+ lines.push(` ${err.url}: ${err.reason}`);
525
+ }
526
+ } else {
527
+ lines.push("Media: pipeline not run (no upload hook supplied)");
528
+ }
529
+ lines.push("");
530
+ if (report.taxonomies) {
531
+ const t = report.taxonomies;
532
+ lines.push(
533
+ `Taxonomies: ${t.termIds.size} resolved, ${t.skipped.length} skipped, ${t.errors.length} errors`
534
+ );
535
+ for (const err of t.errors) {
536
+ lines.push(` ${err.key.taxonomy}/${err.key.slug}: ${err.reason}`);
537
+ }
538
+ for (const skip of t.skipped) {
539
+ lines.push(` skipped ${skip.taxonomy}/${skip.slug}`);
540
+ }
541
+ } else {
542
+ lines.push("Taxonomies: resolver not supplied \u2014 terms dropped");
543
+ }
544
+ lines.push("");
545
+ if (report.comments) {
546
+ const c = report.comments;
547
+ lines.push(
548
+ `Comments: ${c.applied} imported, ${c.skippedUnapproved} unapproved, ${c.skippedByResume} resume-skipped, ${c.errors.length} errors`
549
+ );
550
+ for (const err of c.errors) {
551
+ lines.push(` #${err.wpCommentId}: ${err.reason}`);
552
+ }
553
+ } else {
554
+ lines.push("Comments: deps not supplied \u2014 comments dropped");
555
+ }
556
+ lines.push("");
557
+ if (report.authors) {
558
+ const a = report.authors;
559
+ lines.push(
560
+ `Authors: ${a.authorIds.size} resolved, ${a.skipped.length} skipped, ${a.errors.length} errors`
561
+ );
562
+ for (const err of a.errors) {
563
+ lines.push(` ${err.login}: ${err.reason}`);
564
+ }
565
+ for (const skip of a.skipped) {
566
+ lines.push(` skipped ${skip}`);
567
+ }
568
+ } else {
569
+ lines.push("Authors: resolver not supplied \u2014 posts attributed to the import actor");
570
+ }
571
+ if (report.notes.length > 0) {
572
+ lines.push("");
573
+ lines.push("Notes");
574
+ for (const note of report.notes) {
575
+ lines.push(` - ${note}`);
576
+ }
577
+ }
578
+ return lines.join("\n");
579
+ }
580
+
581
+ // src/cli/index.ts
582
+ var DEFAULT_IO = {
583
+ stdout: (line) => console.log(line),
584
+ stderr: (line) => console.error(line)
585
+ };
586
+ var CLI_OPTIONS = {
587
+ "dry-run": { type: "boolean", default: true },
588
+ apply: { type: "boolean", default: false },
589
+ /**
590
+ * Phase 21.8 — opt out of staff-user creation. With this flag the
591
+ * shim swaps in a resolver that returns null for every author, so
592
+ * imported posts have no `author` set and are attributed to the
593
+ * import operator via `createdBy` / `updatedBy`.
594
+ */
595
+ "no-create-authors": { type: "boolean", default: false },
596
+ /**
597
+ * Phase 21.9 — path to a JSON config file that declares custom-
598
+ * post-type → collection mappings and optional postmeta-key →
599
+ * field-name overrides. Records whose `wpType` isn't in the
600
+ * config (and isn't post / page / attachment) are skipped with a
601
+ * warning.
602
+ */
603
+ config: { type: "string" },
604
+ /**
605
+ * Phase 21.12 — escalate sub-pipeline warnings (media 4xx, MIME
606
+ * reject, taxonomy/author resolver failures) into errors so the
607
+ * CLI exits non-zero. Useful for "clean import or fail" scripts.
608
+ */
609
+ strict: { type: "boolean", default: false },
610
+ /**
611
+ * Phase 21.12 — rewrite the existing document instead of
612
+ * skipping when a slug collides. Comments are NOT re-imported on
613
+ * an update pass — that needs the per-comment idempotency keys
614
+ * landing in 21.14.
615
+ */
616
+ update: { type: "boolean", default: false },
617
+ /**
618
+ * Phase 21.12 — write a side-by-side HTML/Lexical diff for every
619
+ * imported record so the operator can spot-check the conversion.
620
+ * Defaults to writing `<wxr>.report.html` next to the source.
621
+ */
622
+ "report-html": { type: "boolean", default: false },
623
+ /**
624
+ * Phase 21.12 — override the default `<wxr>.report.html` path.
625
+ * Implies `--report-html`; passing a path without the flag is
626
+ * fine too.
627
+ */
628
+ "report-html-path": { type: "string" },
629
+ /**
630
+ * Phase 21.14 — load + persist a sidecar resume marker so re-runs
631
+ * skip work that already landed and dedupe comments by
632
+ * `wpCommentId`. Defaults the marker path to
633
+ * `<wxr>.import-state.json`; override with `--resume-state`.
634
+ */
635
+ resume: { type: "boolean", default: false },
636
+ /** Phase 21.14 — override the default resume-marker path. */
637
+ "resume-state": { type: "string" },
638
+ help: { type: "boolean", short: "h" }
639
+ };
640
+ async function runCli(argv, io = DEFAULT_IO, hooks) {
641
+ let parsed;
642
+ try {
643
+ parsed = parseArgs({
644
+ args: argv,
645
+ options: CLI_OPTIONS,
646
+ allowPositionals: true
647
+ });
648
+ } catch (error) {
649
+ io.stderr(error instanceof Error ? `wp-import: ${error.message}` : "wp-import: bad arguments");
650
+ io.stderr("");
651
+ io.stderr(USAGE);
652
+ return 2;
653
+ }
654
+ if (parsed.values.help) {
655
+ io.stdout(USAGE);
656
+ return 0;
657
+ }
658
+ const sourcePath = parsed.positionals[0];
659
+ if (!sourcePath) {
660
+ io.stderr("wp-import: missing path to a WXR file");
661
+ io.stderr("");
662
+ io.stderr(USAGE);
663
+ return 2;
664
+ }
665
+ let xml;
666
+ try {
667
+ xml = readFileSync2(sourcePath, "utf8");
668
+ } catch (error) {
669
+ io.stderr(
670
+ `wp-import: cannot read ${sourcePath}: ${error instanceof Error ? error.message : String(error)}`
671
+ );
672
+ return 1;
673
+ }
674
+ let bundle;
675
+ try {
676
+ bundle = parseWxr(xml);
677
+ } catch (error) {
678
+ io.stderr(`wp-import: parse failed: ${error instanceof Error ? error.message : String(error)}`);
679
+ return 1;
680
+ }
681
+ let collectionMappings = {};
682
+ if (parsed.values.config) {
683
+ try {
684
+ collectionMappings = loadConfigFromPath(parsed.values.config).collectionMappings;
685
+ } catch (error) {
686
+ io.stderr(
687
+ error instanceof WpImportConfigError ? `wp-import: ${error.message}` : `wp-import: ${error instanceof Error ? error.message : String(error)}`
688
+ );
689
+ return 1;
690
+ }
691
+ }
692
+ if (!parsed.values.apply) {
693
+ io.stdout(formatSummary({ bundle, sourcePath, dryRun: parsed.values["dry-run"] }));
694
+ return 0;
695
+ }
696
+ if (!hooks) {
697
+ io.stderr(
698
+ "wp-import: --apply requires the shim that bootstraps core services. Run via `pnpm wp-import` from apps/web, not directly."
699
+ );
700
+ return 1;
701
+ }
702
+ let actor;
703
+ try {
704
+ actor = await hooks.resolveActor();
705
+ } catch (error) {
706
+ io.stderr(
707
+ `wp-import: cannot resolve admin actor: ${error instanceof Error ? error.message : String(error)}`
708
+ );
709
+ return 1;
710
+ }
711
+ const reportHtmlPathOverride = parsed.values["report-html-path"];
712
+ const reportHtmlPath = reportHtmlPathOverride && reportHtmlPathOverride.length > 0 ? reportHtmlPathOverride : parsed.values["report-html"] ? `${sourcePath}.report.html` : null;
713
+ const resumeStatePathOverride = parsed.values["resume-state"];
714
+ const resumeStatePath = resumeStatePathOverride && resumeStatePathOverride.length > 0 ? resumeStatePathOverride : parsed.values.resume ? `${sourcePath}.import-state.json` : null;
715
+ const report = await hooks.applyBundle(bundle, {
716
+ actor,
717
+ dryRun: parsed.values["dry-run"],
718
+ log: (line) => io.stdout(line),
719
+ createAuthors: !parsed.values["no-create-authors"],
720
+ collectionMappings,
721
+ strict: parsed.values.strict,
722
+ update: parsed.values.update,
723
+ reportHtmlPath,
724
+ resumeStatePath
725
+ });
726
+ io.stdout(formatApplyReport(report, { dryRun: parsed.values["dry-run"] }));
727
+ return report.errors.length > 0 ? 1 : 0;
728
+ }
729
+ var USAGE = `Usage: wp-import <wxr-file> [--apply] [--dry-run] [--strict] [--update] [--no-create-authors] [--report-html] [--report-html-path <path>] [--resume] [--resume-state <path>]
730
+
731
+ Reads a WordPress eXtended RSS export and either prints a summary
732
+ of what would be imported (default) or applies it to the database
733
+ (--apply). With --apply you can still pass --dry-run to walk the
734
+ records and surface skip / collision decisions without writing.
735
+
736
+ Options:
737
+ --apply Run the applier (writes via @nexpress/core).
738
+ Without this flag only the parsed summary
739
+ is printed.
740
+ --dry-run When combined with --apply, walk records
741
+ but skip the actual writes. Useful for
742
+ previewing what the import will do against
743
+ a real DB.
744
+ --no-create-authors Skip creating staff users for WP authors.
745
+ Imported posts come in without an author
746
+ wired and the import operator takes credit
747
+ via createdBy / updatedBy (Phase 21.8).
748
+ --config <path> Path to a JSON config file declaring
749
+ custom-post-type mappings. Each mapping
750
+ routes a wpType into a NexPress collection
751
+ and optionally maps WP postmeta keys to
752
+ collection field names (Phase 21.9).
753
+ --strict Escalate sub-pipeline warnings (media 4xx,
754
+ MIME reject, taxonomy / author resolver
755
+ failures) into errors so the CLI exits
756
+ non-zero (Phase 21.12).
757
+ --update Rewrite the existing document instead of
758
+ skipping when a slug collides. Comments
759
+ are NOT re-imported on an update pass
760
+ (Phase 21.12).
761
+ --report-html Write a side-by-side HTML/Lexical diff of
762
+ every imported record so the operator can
763
+ spot-check the conversion. Defaults to
764
+ <wxr>.report.html (Phase 21.12).
765
+ --report-html-path <path>
766
+ Override the default report path. Implies
767
+ --report-html.
768
+ --resume Read + persist a sidecar resume marker so
769
+ re-runs skip work that already landed and
770
+ dedupe comments by wpCommentId. Defaults
771
+ to <wxr>.import-state.json (Phase 21.14).
772
+ --resume-state <path>
773
+ Override the default resume-marker path.
774
+ Implies --resume.
775
+ -h, --help Show this help message.`;
776
+
777
+ // src/apply/index.ts
778
+ import { findDocuments, saveDocument } from "@nexpress/core";
779
+
780
+ // src/convert/html-to-lexical.ts
781
+ import { NodeType, parse } from "node-html-parser";
782
+
783
+ // src/convert/gutenberg.ts
784
+ var FENCE_RE = /<!--\s*(\/?)wp:([\w/-]+)(\s+(\{[\s\S]*?\}))?\s*(\/)?\s*-->/g;
785
+ function isGutenbergSource(html) {
786
+ return /<!--\s*wp:[\w/-]+/i.test(html);
787
+ }
788
+ function parseGutenbergBlocks(source) {
789
+ const blocks = [];
790
+ const stack = [];
791
+ let cursor = 0;
792
+ FENCE_RE.lastIndex = 0;
793
+ let match;
794
+ while (match = FENCE_RE.exec(source)) {
795
+ const [full, slash, rawName, , attrsJson, selfSlash] = match;
796
+ const isCloser = slash === "/";
797
+ const isSelfClosing = !isCloser && selfSlash === "/";
798
+ const name = (rawName ?? "").trim();
799
+ const attrsRaw = (attrsJson ?? "").trim();
800
+ const attrs = parseAttrsJson(attrsRaw);
801
+ const matchStart = match.index;
802
+ const matchEnd = matchStart + full.length;
803
+ if (isCloser) {
804
+ if (stack.length === 0) {
805
+ cursor = matchEnd;
806
+ continue;
807
+ }
808
+ const top = stack[stack.length - 1];
809
+ if (!top || top.name !== name) {
810
+ cursor = matchEnd;
811
+ continue;
812
+ }
813
+ stack.pop();
814
+ if (stack.length === 0) {
815
+ const innerHtml = source.slice(top.innerStart, matchStart);
816
+ blocks.push({
817
+ name: top.name,
818
+ attrs: top.attrs,
819
+ rawAttrs: top.rawAttrs,
820
+ innerHtml,
821
+ selfClosing: false
822
+ });
823
+ }
824
+ cursor = matchEnd;
825
+ continue;
826
+ }
827
+ if (isSelfClosing) {
828
+ if (stack.length === 0 && matchStart > cursor) {
829
+ const looseHtml = source.slice(cursor, matchStart);
830
+ if (looseHtml.trim().length > 0) {
831
+ blocks.push({
832
+ name: "gutenberg-loose",
833
+ attrs: {},
834
+ rawAttrs: "",
835
+ innerHtml: looseHtml,
836
+ selfClosing: false
837
+ });
838
+ }
839
+ }
840
+ if (stack.length === 0) {
841
+ blocks.push({ name, attrs, rawAttrs: attrsRaw, innerHtml: "", selfClosing: true });
842
+ }
843
+ cursor = matchEnd;
844
+ continue;
845
+ }
846
+ if (stack.length === 0 && matchStart > cursor) {
847
+ const looseHtml = source.slice(cursor, matchStart);
848
+ if (looseHtml.trim().length > 0) {
849
+ blocks.push({
850
+ name: "gutenberg-loose",
851
+ attrs: {},
852
+ rawAttrs: "",
853
+ innerHtml: looseHtml,
854
+ selfClosing: false
855
+ });
856
+ }
857
+ }
858
+ stack.push({ name, attrs, rawAttrs: attrsRaw, innerStart: matchEnd });
859
+ cursor = matchEnd;
860
+ }
861
+ if (stack.length === 0 && cursor < source.length) {
862
+ const tail = source.slice(cursor);
863
+ if (tail.trim().length > 0) {
864
+ blocks.push({
865
+ name: "gutenberg-loose",
866
+ attrs: {},
867
+ rawAttrs: "",
868
+ innerHtml: tail,
869
+ selfClosing: false
870
+ });
871
+ }
872
+ }
873
+ const root = stack[0];
874
+ if (root) {
875
+ blocks.push({
876
+ name: root.name,
877
+ attrs: root.attrs,
878
+ rawAttrs: root.rawAttrs,
879
+ innerHtml: source.slice(root.innerStart),
880
+ selfClosing: false
881
+ });
882
+ }
883
+ return blocks;
884
+ }
885
+ function parseAttrsJson(raw) {
886
+ if (!raw) return {};
887
+ try {
888
+ const parsed = JSON.parse(raw);
889
+ if (parsed && typeof parsed === "object" && !Array.isArray(parsed)) {
890
+ return parsed;
891
+ }
892
+ } catch {
893
+ }
894
+ return {};
895
+ }
896
+
897
+ // src/convert/html-to-lexical.ts
898
+ var FORMAT_BOLD = 1;
899
+ var FORMAT_ITALIC = 2;
900
+ var FORMAT_STRIKETHROUGH = 4;
901
+ var FORMAT_UNDERLINE = 8;
902
+ var FORMAT_CODE = 16;
903
+ function htmlToLexical(html) {
904
+ const trimmed = html.trim();
905
+ if (!trimmed) {
906
+ return emptyDocument();
907
+ }
908
+ const blocks = [];
909
+ if (isGutenbergSource(trimmed)) {
910
+ for (const block of parseGutenbergBlocks(trimmed)) {
911
+ convertGutenbergBlock(block, blocks);
912
+ }
913
+ } else {
914
+ const parsed = parse(trimmed, { lowerCaseTagName: true });
915
+ for (const child of parsed.childNodes) {
916
+ convertTopLevel(child, blocks);
917
+ }
918
+ }
919
+ if (blocks.length === 0) {
920
+ return emptyDocument();
921
+ }
922
+ return {
923
+ root: {
924
+ type: "root",
925
+ direction: null,
926
+ format: "",
927
+ indent: 0,
928
+ version: 1,
929
+ children: blocks
930
+ }
931
+ };
932
+ }
933
+ function convertGutenbergBlock(block, out) {
934
+ if (block.selfClosing) {
935
+ if (block.name === "separator") {
936
+ out.push({
937
+ type: "horizontalrule",
938
+ version: 1,
939
+ format: "",
940
+ indent: 0,
941
+ direction: null
942
+ });
943
+ }
944
+ return;
945
+ }
946
+ if (block.name === "gutenberg-loose") {
947
+ runClassicPath(block.innerHtml, out);
948
+ return;
949
+ }
950
+ switch (block.name) {
951
+ case "heading": {
952
+ const innerBlocks = [];
953
+ runClassicPath(block.innerHtml, innerBlocks);
954
+ const heading = innerBlocks.find((b) => b.type === "heading");
955
+ if (heading) {
956
+ const lvl = block.attrs.level;
957
+ if (typeof lvl === "number" && lvl >= 1 && lvl <= 6) {
958
+ heading.tag = `h${lvl}`;
959
+ }
960
+ out.push(heading);
961
+ for (const b of innerBlocks) if (b !== heading) out.push(b);
962
+ } else {
963
+ const lvl = typeof block.attrs.level === "number" ? block.attrs.level : 2;
964
+ out.push({
965
+ type: "heading",
966
+ version: 1,
967
+ format: "",
968
+ indent: 0,
969
+ direction: null,
970
+ tag: `h${Math.min(6, Math.max(1, lvl))}`,
971
+ children: [textNode(stripTags(block.innerHtml), 0)]
972
+ });
973
+ }
974
+ return;
975
+ }
976
+ case "list": {
977
+ const innerBlocks = [];
978
+ runClassicPath(block.innerHtml, innerBlocks);
979
+ const list = innerBlocks.find((b) => b.type === "list");
980
+ if (list) {
981
+ if (block.attrs.ordered === true) list.listType = "number";
982
+ else if (block.attrs.ordered === false) list.listType = "bullet";
983
+ out.push(list);
984
+ } else {
985
+ runClassicPath(block.innerHtml, out);
986
+ }
987
+ return;
988
+ }
989
+ default:
990
+ runClassicPath(block.innerHtml, out);
991
+ }
992
+ }
993
+ function runClassicPath(html, out) {
994
+ const parsed = parse(html, { lowerCaseTagName: true });
995
+ for (const child of parsed.childNodes) {
996
+ convertTopLevel(child, out);
997
+ }
998
+ }
999
+ function stripTags(html) {
1000
+ return html.replace(/<[^>]+>/g, "").replace(/&nbsp;/g, " ").trim();
1001
+ }
1002
+ function emptyDocument() {
1003
+ return {
1004
+ root: {
1005
+ type: "root",
1006
+ direction: null,
1007
+ format: "",
1008
+ indent: 0,
1009
+ version: 1,
1010
+ children: [paragraph([])]
1011
+ }
1012
+ };
1013
+ }
1014
+ function convertTopLevel(node, out) {
1015
+ if (node.nodeType === NodeType.TEXT_NODE) {
1016
+ const text = node.text;
1017
+ if (text.trim().length === 0) return;
1018
+ out.push(paragraph([textNode(text, 0)]));
1019
+ return;
1020
+ }
1021
+ if (node.nodeType !== NodeType.ELEMENT_NODE) return;
1022
+ const el = node;
1023
+ const tag = el.tagName?.toLowerCase();
1024
+ switch (tag) {
1025
+ case "p":
1026
+ out.push(paragraph(convertInline(el)));
1027
+ return;
1028
+ case "h1":
1029
+ case "h2":
1030
+ case "h3":
1031
+ case "h4":
1032
+ case "h5":
1033
+ case "h6":
1034
+ out.push({
1035
+ type: "heading",
1036
+ version: 1,
1037
+ format: "",
1038
+ indent: 0,
1039
+ direction: null,
1040
+ tag,
1041
+ children: convertInline(el)
1042
+ });
1043
+ return;
1044
+ case "blockquote":
1045
+ out.push({
1046
+ type: "quote",
1047
+ version: 1,
1048
+ format: "",
1049
+ indent: 0,
1050
+ direction: null,
1051
+ children: convertInline(el)
1052
+ });
1053
+ return;
1054
+ case "ul":
1055
+ case "ol":
1056
+ out.push(convertList(el, tag === "ol" ? "number" : "bullet"));
1057
+ return;
1058
+ case "pre":
1059
+ out.push({
1060
+ type: "code",
1061
+ version: 1,
1062
+ format: "",
1063
+ indent: 0,
1064
+ direction: null,
1065
+ children: [textNode(el.text, 0)]
1066
+ });
1067
+ return;
1068
+ case "hr":
1069
+ out.push({
1070
+ type: "horizontalrule",
1071
+ version: 1,
1072
+ format: "",
1073
+ indent: 0,
1074
+ direction: null
1075
+ });
1076
+ return;
1077
+ case "img":
1078
+ out.push(imageBlock(el));
1079
+ return;
1080
+ case "br":
1081
+ out.push(paragraph([]));
1082
+ return;
1083
+ case "div":
1084
+ case "section":
1085
+ case "article":
1086
+ for (const child of el.childNodes) convertTopLevel(child, out);
1087
+ return;
1088
+ default: {
1089
+ const hasBlockChild = el.childNodes.some(
1090
+ (c) => c.nodeType === NodeType.ELEMENT_NODE && isBlockTag(c)
1091
+ );
1092
+ if (hasBlockChild) {
1093
+ for (const child of el.childNodes) convertTopLevel(child, out);
1094
+ } else {
1095
+ const inline = convertInline(el);
1096
+ if (inline.length > 0) out.push(paragraph(inline));
1097
+ }
1098
+ }
1099
+ }
1100
+ }
1101
+ var BLOCK_TAGS = /* @__PURE__ */ new Set([
1102
+ "p",
1103
+ "h1",
1104
+ "h2",
1105
+ "h3",
1106
+ "h4",
1107
+ "h5",
1108
+ "h6",
1109
+ "blockquote",
1110
+ "ul",
1111
+ "ol",
1112
+ "li",
1113
+ "pre",
1114
+ "hr",
1115
+ "div",
1116
+ "section",
1117
+ "article",
1118
+ "table",
1119
+ "tr",
1120
+ "td",
1121
+ "th",
1122
+ "thead",
1123
+ "tbody"
1124
+ ]);
1125
+ function isBlockTag(el) {
1126
+ return BLOCK_TAGS.has((el.tagName ?? "").toLowerCase());
1127
+ }
1128
+ function paragraph(children) {
1129
+ return {
1130
+ type: "paragraph",
1131
+ version: 1,
1132
+ format: "",
1133
+ indent: 0,
1134
+ direction: null,
1135
+ children
1136
+ };
1137
+ }
1138
+ function textNode(text, format) {
1139
+ return {
1140
+ type: "text",
1141
+ version: 1,
1142
+ format,
1143
+ indent: 0,
1144
+ direction: null,
1145
+ text
1146
+ };
1147
+ }
1148
+ function imageBlock(el) {
1149
+ return {
1150
+ type: "image",
1151
+ version: 1,
1152
+ format: "",
1153
+ indent: 0,
1154
+ direction: null,
1155
+ src: el.getAttribute("src") ?? "",
1156
+ altText: el.getAttribute("alt") ?? ""
1157
+ };
1158
+ }
1159
+ function convertList(el, listType) {
1160
+ const items = [];
1161
+ for (const child of el.childNodes) {
1162
+ if (child.nodeType !== NodeType.ELEMENT_NODE) continue;
1163
+ const inner = child;
1164
+ if ((inner.tagName ?? "").toLowerCase() !== "li") continue;
1165
+ items.push({
1166
+ type: "listitem",
1167
+ version: 1,
1168
+ format: "",
1169
+ indent: 0,
1170
+ direction: null,
1171
+ children: convertInline(inner)
1172
+ });
1173
+ }
1174
+ return {
1175
+ type: "list",
1176
+ version: 1,
1177
+ format: "",
1178
+ indent: 0,
1179
+ direction: null,
1180
+ listType,
1181
+ children: items
1182
+ };
1183
+ }
1184
+ function convertInline(el) {
1185
+ const out = [];
1186
+ walkInline(el, 0, out);
1187
+ return out;
1188
+ }
1189
+ function walkInline(node, format, out) {
1190
+ if (node.nodeType === NodeType.TEXT_NODE) {
1191
+ const text = node.text;
1192
+ if (!text) return;
1193
+ out.push(textNode(text, format));
1194
+ return;
1195
+ }
1196
+ if (node.nodeType !== NodeType.ELEMENT_NODE) return;
1197
+ const el = node;
1198
+ const tag = (el.tagName ?? "").toLowerCase();
1199
+ switch (tag) {
1200
+ case "strong":
1201
+ case "b":
1202
+ for (const child of el.childNodes) walkInline(child, format | FORMAT_BOLD, out);
1203
+ return;
1204
+ case "em":
1205
+ case "i":
1206
+ for (const child of el.childNodes) walkInline(child, format | FORMAT_ITALIC, out);
1207
+ return;
1208
+ case "u":
1209
+ for (const child of el.childNodes) walkInline(child, format | FORMAT_UNDERLINE, out);
1210
+ return;
1211
+ case "s":
1212
+ case "del":
1213
+ case "strike":
1214
+ for (const child of el.childNodes) walkInline(child, format | FORMAT_STRIKETHROUGH, out);
1215
+ return;
1216
+ case "code":
1217
+ for (const child of el.childNodes) walkInline(child, format | FORMAT_CODE, out);
1218
+ return;
1219
+ case "a": {
1220
+ const url = el.getAttribute("href") ?? "";
1221
+ const inner = [];
1222
+ for (const child of el.childNodes) walkInline(child, format, inner);
1223
+ out.push({
1224
+ type: "link",
1225
+ version: 1,
1226
+ format: "",
1227
+ indent: 0,
1228
+ direction: null,
1229
+ url,
1230
+ children: inner
1231
+ });
1232
+ return;
1233
+ }
1234
+ case "br":
1235
+ out.push({
1236
+ type: "linebreak",
1237
+ version: 1,
1238
+ format: "",
1239
+ indent: 0,
1240
+ direction: null
1241
+ });
1242
+ return;
1243
+ case "img":
1244
+ out.push(imageBlock(el));
1245
+ return;
1246
+ case "span":
1247
+ for (const child of el.childNodes) walkInline(child, format, out);
1248
+ return;
1249
+ default: {
1250
+ for (const child of el.childNodes) walkInline(child, format, out);
1251
+ }
1252
+ }
1253
+ }
1254
+
1255
+ // src/media/pipeline.ts
1256
+ import { createHash } from "crypto";
1257
+
1258
+ // src/media/download.ts
1259
+ import { Buffer } from "buffer";
1260
+ import { promises as dnsPromises } from "dns";
1261
+ import { Agent } from "undici";
1262
+ var DEFAULT_TIMEOUT_MS = 3e4;
1263
+ var DEFAULT_RETRIES = 1;
1264
+ var DEFAULT_MAX_REDIRECTS = 3;
1265
+ var DEFAULT_MAX_BYTES = 100 * 1024 * 1024;
1266
+ var WpMediaDownloadError = class extends Error {
1267
+ url;
1268
+ status;
1269
+ constructor(url, message, status = null) {
1270
+ super(message);
1271
+ this.name = "WpMediaDownloadError";
1272
+ this.url = url;
1273
+ this.status = status;
1274
+ }
1275
+ };
1276
+ var WpMediaSsrfError = class extends WpMediaDownloadError {
1277
+ constructor(url, message) {
1278
+ super(url, message);
1279
+ this.name = "WpMediaSsrfError";
1280
+ }
1281
+ };
1282
+ async function downloadMedia(url, opts = {}) {
1283
+ const fetchImpl = opts.fetchImpl ?? globalThis.fetch;
1284
+ if (!fetchImpl) {
1285
+ throw new WpMediaDownloadError(url, "no fetch implementation available");
1286
+ }
1287
+ const timeoutMs = opts.timeoutMs ?? DEFAULT_TIMEOUT_MS;
1288
+ const maxRetries = opts.retries ?? DEFAULT_RETRIES;
1289
+ const maxRedirects = opts.maxRedirects ?? DEFAULT_MAX_REDIRECTS;
1290
+ const maxBytes = opts.maxBytes ?? DEFAULT_MAX_BYTES;
1291
+ assertHttpScheme(url);
1292
+ let lastError = null;
1293
+ for (let attempt = 0; attempt <= maxRetries; attempt++) {
1294
+ const controller = new AbortController();
1295
+ const timer = setTimeout(() => controller.abort(), timeoutMs);
1296
+ try {
1297
+ return await fetchWithRedirects(url, {
1298
+ fetchImpl,
1299
+ dnsLookupImpl: opts.dnsLookupImpl,
1300
+ signal: controller.signal,
1301
+ maxRedirects,
1302
+ maxBytes,
1303
+ allowPrivateHosts: opts.allowPrivateHosts ?? false
1304
+ });
1305
+ } catch (err) {
1306
+ lastError = err;
1307
+ if (err instanceof WpMediaSsrfError) throw err;
1308
+ if (err instanceof WpMediaDownloadError && err.status !== null && err.status >= 400 && err.status < 500) {
1309
+ throw err;
1310
+ }
1311
+ if (attempt >= maxRetries) {
1312
+ if (err instanceof WpMediaDownloadError) throw err;
1313
+ const msg = err instanceof Error ? err.message : String(err);
1314
+ throw new WpMediaDownloadError(url, msg);
1315
+ }
1316
+ } finally {
1317
+ clearTimeout(timer);
1318
+ }
1319
+ }
1320
+ throw lastError instanceof Error ? lastError : new WpMediaDownloadError(url, "download failed");
1321
+ }
1322
+ async function fetchWithRedirects(originalUrl, opts) {
1323
+ let currentUrl = originalUrl;
1324
+ for (let hop = 0; hop <= opts.maxRedirects; hop++) {
1325
+ let pinned = null;
1326
+ if (!opts.allowPrivateHosts) {
1327
+ pinned = await assertHostAllowed(currentUrl, opts.dnsLookupImpl);
1328
+ }
1329
+ const dispatcher = pinned ? createPinnedAgent(pinned) : void 0;
1330
+ const init = {
1331
+ signal: opts.signal,
1332
+ redirect: "manual"
1333
+ };
1334
+ if (dispatcher) init.dispatcher = dispatcher;
1335
+ const res = await opts.fetchImpl(currentUrl, init);
1336
+ if (isRedirectStatus(res.status)) {
1337
+ const next = res.headers.get("location");
1338
+ if (!next) {
1339
+ throw new WpMediaDownloadError(
1340
+ currentUrl,
1341
+ `redirect ${res.status} without Location header`,
1342
+ res.status
1343
+ );
1344
+ }
1345
+ currentUrl = new URL(next, currentUrl).toString();
1346
+ assertHttpScheme(currentUrl);
1347
+ continue;
1348
+ }
1349
+ if (!res.ok) {
1350
+ throw new WpMediaDownloadError(
1351
+ currentUrl,
1352
+ `source responded ${res.status} ${res.statusText || ""}`.trim(),
1353
+ res.status
1354
+ );
1355
+ }
1356
+ const declaredLength = res.headers.get("content-length");
1357
+ if (declaredLength !== null) {
1358
+ const n = Number(declaredLength);
1359
+ if (Number.isFinite(n) && n > opts.maxBytes) {
1360
+ throw new WpMediaDownloadError(
1361
+ currentUrl,
1362
+ `content-length ${n} exceeds maxBytes ${opts.maxBytes}`
1363
+ );
1364
+ }
1365
+ }
1366
+ const arrayBuffer = await res.arrayBuffer();
1367
+ if (arrayBuffer.byteLength > opts.maxBytes) {
1368
+ throw new WpMediaDownloadError(
1369
+ currentUrl,
1370
+ `body ${arrayBuffer.byteLength} bytes exceeds maxBytes ${opts.maxBytes}`
1371
+ );
1372
+ }
1373
+ const buffer = Buffer.from(arrayBuffer);
1374
+ const mimeType = parseMime(res.headers.get("content-type"));
1375
+ const filename = inferFilename(originalUrl);
1376
+ return { buffer, mimeType, filename };
1377
+ }
1378
+ throw new WpMediaDownloadError(currentUrl, `too many redirects (max ${opts.maxRedirects})`);
1379
+ }
1380
+ function isRedirectStatus(status) {
1381
+ return status === 301 || status === 302 || status === 303 || status === 307 || status === 308;
1382
+ }
1383
+ function assertHttpScheme(url) {
1384
+ let parsed;
1385
+ try {
1386
+ parsed = new URL(url);
1387
+ } catch {
1388
+ throw new WpMediaSsrfError(url, `invalid URL "${url}"`);
1389
+ }
1390
+ if (parsed.protocol !== "http:" && parsed.protocol !== "https:") {
1391
+ throw new WpMediaSsrfError(url, `unsupported scheme "${parsed.protocol}" \u2014 only http(s) is allowed`);
1392
+ }
1393
+ }
1394
+ async function assertHostAllowed(url, dnsLookupImpl) {
1395
+ const parsed = new URL(url);
1396
+ const rawHostname = parsed.hostname;
1397
+ const hostname = rawHostname.startsWith("[") && rawHostname.endsWith("]") ? rawHostname.slice(1, -1) : rawHostname;
1398
+ const lowered = hostname.toLowerCase();
1399
+ if (lowered === "localhost" || lowered.endsWith(".localhost")) {
1400
+ throw new WpMediaSsrfError(url, `hostname "${hostname}" resolves to a private address`);
1401
+ }
1402
+ const literal = classifyIpLiteral(hostname);
1403
+ if (literal === "private") {
1404
+ throw new WpMediaSsrfError(url, `hostname "${hostname}" resolves to a private address`);
1405
+ }
1406
+ if (literal === "public") {
1407
+ return { address: hostname, family: hostname.includes(":") ? 6 : 4 };
1408
+ }
1409
+ const lookup = dnsLookupImpl ?? defaultDnsLookup;
1410
+ let addrs;
1411
+ try {
1412
+ addrs = await lookup(hostname);
1413
+ } catch (err) {
1414
+ const msg = err instanceof Error ? err.message : String(err);
1415
+ throw new WpMediaSsrfError(url, `DNS lookup failed for "${hostname}": ${msg}`);
1416
+ }
1417
+ if (addrs.length === 0) {
1418
+ throw new WpMediaSsrfError(url, `DNS returned no addresses for "${hostname}"`);
1419
+ }
1420
+ let pinned = null;
1421
+ for (const { address, family } of addrs) {
1422
+ const cls = classifyIpAddress(address, family);
1423
+ if (cls === "private") {
1424
+ throw new WpMediaSsrfError(
1425
+ url,
1426
+ `hostname "${hostname}" resolves to private address ${address}`
1427
+ );
1428
+ }
1429
+ if (!pinned && (family === 4 || family === 6)) {
1430
+ pinned = { address, family };
1431
+ }
1432
+ }
1433
+ if (!pinned) {
1434
+ throw new WpMediaSsrfError(url, `DNS returned no usable addresses for "${hostname}"`);
1435
+ }
1436
+ return pinned;
1437
+ }
1438
+ function createPinnedAgent(pinned) {
1439
+ return new Agent({
1440
+ connect: {
1441
+ lookup: (_hostname, _options, callback) => {
1442
+ callback(null, pinned.address, pinned.family);
1443
+ }
1444
+ }
1445
+ });
1446
+ }
1447
+ async function defaultDnsLookup(hostname) {
1448
+ return dnsPromises.lookup(hostname, { all: true });
1449
+ }
1450
+ function classifyIpLiteral(input) {
1451
+ if (looksLikeIpv4(input)) return classifyIpAddress(input, 4);
1452
+ if (input.includes(":")) return classifyIpAddress(input, 6);
1453
+ return "not-an-ip";
1454
+ }
1455
+ function classifyIpAddress(address, family) {
1456
+ if (family === 4) {
1457
+ return isPrivateIpv4(address) ? "private" : "public";
1458
+ }
1459
+ if (family === 6) {
1460
+ return isPrivateIpv6(address) ? "private" : "public";
1461
+ }
1462
+ return "private";
1463
+ }
1464
+ function looksLikeIpv4(s) {
1465
+ return /^\d{1,3}(\.\d{1,3}){3}$/.test(s);
1466
+ }
1467
+ function ipv4ToInt(ip) {
1468
+ const parts = ip.split(".");
1469
+ if (parts.length !== 4) return null;
1470
+ let n = 0;
1471
+ for (const part of parts) {
1472
+ const v = Number(part);
1473
+ if (!Number.isInteger(v) || v < 0 || v > 255) return null;
1474
+ n = n * 256 + v >>> 0;
1475
+ }
1476
+ return n >>> 0;
1477
+ }
1478
+ var PRIVATE_IPV4_RANGES = [
1479
+ ["0.0.0.0", 8],
1480
+ // "this network"
1481
+ ["10.0.0.0", 8],
1482
+ // RFC 1918
1483
+ ["100.64.0.0", 10],
1484
+ // CGNAT
1485
+ ["127.0.0.0", 8],
1486
+ // loopback
1487
+ ["169.254.0.0", 16],
1488
+ // link-local (incl. cloud metadata 169.254.169.254)
1489
+ ["172.16.0.0", 12],
1490
+ // RFC 1918
1491
+ ["192.0.0.0", 24],
1492
+ // protocol assignments
1493
+ ["192.0.2.0", 24],
1494
+ // TEST-NET-1
1495
+ ["192.168.0.0", 16],
1496
+ // RFC 1918
1497
+ ["198.18.0.0", 15],
1498
+ // benchmarking
1499
+ ["198.51.100.0", 24],
1500
+ // TEST-NET-2
1501
+ ["203.0.113.0", 24],
1502
+ // TEST-NET-3
1503
+ ["224.0.0.0", 4],
1504
+ // multicast
1505
+ ["240.0.0.0", 4],
1506
+ // reserved
1507
+ ["255.255.255.255", 32]
1508
+ // broadcast
1509
+ ];
1510
+ function isPrivateIpv4(ip) {
1511
+ const n = ipv4ToInt(ip);
1512
+ if (n === null) return true;
1513
+ for (const [base, bits] of PRIVATE_IPV4_RANGES) {
1514
+ const baseN = ipv4ToInt(base);
1515
+ if (baseN === null) continue;
1516
+ const mask = bits === 0 ? 0 : 4294967295 << 32 - bits >>> 0;
1517
+ if ((n & mask) === (baseN & mask)) return true;
1518
+ }
1519
+ return false;
1520
+ }
1521
+ function isPrivateIpv6(ip) {
1522
+ const lower = ip.toLowerCase();
1523
+ if (lower === "::1" || lower === "::") return true;
1524
+ if (/^fc[0-9a-f]{2}:/.test(lower) || /^fd[0-9a-f]{2}:/.test(lower)) return true;
1525
+ if (/^fe[89ab][0-9a-f]:/.test(lower)) return true;
1526
+ if (/^ff[0-9a-f]{2}:/.test(lower)) return true;
1527
+ const v4Mapped = /^::ffff:(\d+\.\d+\.\d+\.\d+)$/i.exec(lower);
1528
+ if (v4Mapped) return isPrivateIpv4(v4Mapped[1]);
1529
+ const v4Compat = /^::(\d+\.\d+\.\d+\.\d+)$/i.exec(lower);
1530
+ if (v4Compat) return isPrivateIpv4(v4Compat[1]);
1531
+ return false;
1532
+ }
1533
+ function parseMime(header) {
1534
+ if (!header) return "application/octet-stream";
1535
+ const semi = header.indexOf(";");
1536
+ return (semi >= 0 ? header.slice(0, semi) : header).trim().toLowerCase();
1537
+ }
1538
+ function inferFilename(url) {
1539
+ try {
1540
+ const parsed = new URL(url);
1541
+ const segments = parsed.pathname.split("/").filter(Boolean);
1542
+ const last = segments[segments.length - 1];
1543
+ if (last) return decodeURIComponent(last);
1544
+ } catch {
1545
+ }
1546
+ return "download";
1547
+ }
1548
+ function resolveEnvDownloadOptions(env = process.env) {
1549
+ const opts = {};
1550
+ const allow = env.NP_WP_IMPORT_ALLOW_PRIVATE_HOSTS;
1551
+ if (allow === "1" || allow === "true") {
1552
+ opts.allowPrivateHosts = true;
1553
+ }
1554
+ const maxBytesRaw = env.NP_WP_IMPORT_MAX_BYTES;
1555
+ if (maxBytesRaw) {
1556
+ const n = Number.parseInt(maxBytesRaw, 10);
1557
+ if (Number.isFinite(n) && n > 0) {
1558
+ opts.maxBytes = n;
1559
+ }
1560
+ }
1561
+ return opts;
1562
+ }
1563
+ function isAllowedMimeType(mimeType) {
1564
+ return mimeType.startsWith("image/") || mimeType.startsWith("video/") || mimeType === "application/pdf";
1565
+ }
1566
+
1567
+ // src/media/pipeline.ts
1568
+ var DEFAULT_PER_HOST_CONCURRENCY = 4;
1569
+ async function runMediaPipeline(bundle, attachments, deps, options = {}) {
1570
+ const log = options.log ?? noop;
1571
+ const dryRun = options.dryRun ?? false;
1572
+ const envDefaults = resolveEnvDownloadOptions();
1573
+ const download = deps.download ?? ((url) => downloadMedia(url, envDefaults));
1574
+ const concurrency = Math.max(1, options.perHostConcurrency ?? DEFAULT_PER_HOST_CONCURRENCY);
1575
+ const byUrl = /* @__PURE__ */ new Map();
1576
+ const byAttachmentId = /* @__PURE__ */ new Map();
1577
+ const errors = [];
1578
+ let uploaded = 0;
1579
+ let skipped = 0;
1580
+ let reused = 0;
1581
+ const targets = collectTargets(bundle, attachments);
1582
+ const fetchable = [];
1583
+ for (const target of targets) {
1584
+ if (!target.url) {
1585
+ errors.push({
1586
+ url: `(wp-attachment-id ${target.wpAttachmentId})`,
1587
+ reason: "attachment record missing from WXR \u2014 cannot resolve URL"
1588
+ });
1589
+ continue;
1590
+ }
1591
+ fetchable.push(target);
1592
+ }
1593
+ const byHost = /* @__PURE__ */ new Map();
1594
+ for (const target of fetchable) {
1595
+ const host = parseHost(target.url);
1596
+ const list = byHost.get(host);
1597
+ if (list) list.push(target);
1598
+ else byHost.set(host, [target]);
1599
+ }
1600
+ const processOne = async (target) => {
1601
+ if (byUrl.has(target.url)) {
1602
+ if (target.wpAttachmentId !== null) {
1603
+ byAttachmentId.set(target.wpAttachmentId, byUrl.get(target.url));
1604
+ }
1605
+ return;
1606
+ }
1607
+ if (dryRun) {
1608
+ log(`media plan ${target.url}`);
1609
+ skipped++;
1610
+ return;
1611
+ }
1612
+ try {
1613
+ const result = await download(target.url);
1614
+ if (!isAllowedMimeType(result.mimeType)) {
1615
+ errors.push({
1616
+ url: target.url,
1617
+ reason: `disallowed MIME type "${result.mimeType}"`
1618
+ });
1619
+ return;
1620
+ }
1621
+ let mediaId = null;
1622
+ if (deps.findExistingByHash) {
1623
+ const sha256 = createHash("sha256").update(result.buffer).digest("hex");
1624
+ const existing = await deps.findExistingByHash(sha256);
1625
+ if (existing) {
1626
+ mediaId = existing.id;
1627
+ reused++;
1628
+ log(`media reuse ${target.url} \u2192 ${existing.id}`);
1629
+ }
1630
+ }
1631
+ if (!mediaId) {
1632
+ const upload = await deps.upload({
1633
+ buffer: result.buffer,
1634
+ originalFilename: result.filename,
1635
+ mimeType: result.mimeType
1636
+ });
1637
+ mediaId = upload.id;
1638
+ uploaded++;
1639
+ log(`media write ${target.url} \u2192 ${upload.id}`);
1640
+ }
1641
+ byUrl.set(target.url, mediaId);
1642
+ if (target.wpAttachmentId !== null) {
1643
+ byAttachmentId.set(target.wpAttachmentId, mediaId);
1644
+ }
1645
+ } catch (err) {
1646
+ const reason = err instanceof WpMediaDownloadError ? err.status !== null ? `HTTP ${err.status}: ${err.message}` : err.message : err instanceof Error ? err.message : String(err);
1647
+ errors.push({ url: target.url, reason });
1648
+ log(`media error ${target.url}: ${reason}`);
1649
+ }
1650
+ };
1651
+ await Promise.all(
1652
+ Array.from(byHost.entries()).map(async ([_host, queue]) => {
1653
+ let cursor = 0;
1654
+ const next = async () => {
1655
+ while (cursor < queue.length) {
1656
+ const i = cursor++;
1657
+ const target = queue[i];
1658
+ if (target) await processOne(target);
1659
+ }
1660
+ };
1661
+ const workers = Array.from({ length: Math.min(concurrency, queue.length) }, () => next());
1662
+ await Promise.all(workers);
1663
+ })
1664
+ );
1665
+ return {
1666
+ resolution: { byUrl, byAttachmentId },
1667
+ uploaded,
1668
+ skipped,
1669
+ reused,
1670
+ errors
1671
+ };
1672
+ }
1673
+ function collectTargets(bundle, attachments) {
1674
+ const seen = /* @__PURE__ */ new Set();
1675
+ const targets = [];
1676
+ for (const entry of attachments.byId.values()) {
1677
+ if (!entry.sourceUrl) continue;
1678
+ if (seen.has(entry.sourceUrl)) continue;
1679
+ seen.add(entry.sourceUrl);
1680
+ targets.push({ url: entry.sourceUrl, wpAttachmentId: entry.wpAttachmentId });
1681
+ }
1682
+ for (const record of bundle.records) {
1683
+ if (record.wpType === "attachment") continue;
1684
+ for (const ref of record.mediaRefs) {
1685
+ if (ref.kind === "featured" && ref.wpAttachmentId !== null) {
1686
+ const entry = attachments.byId.get(ref.wpAttachmentId);
1687
+ const url = entry?.sourceUrl ?? "";
1688
+ if (url && seen.has(url)) continue;
1689
+ if (url) seen.add(url);
1690
+ targets.push({ url, wpAttachmentId: ref.wpAttachmentId });
1691
+ continue;
1692
+ }
1693
+ if (ref.sourceUrl && !seen.has(ref.sourceUrl)) {
1694
+ seen.add(ref.sourceUrl);
1695
+ targets.push({ url: ref.sourceUrl, wpAttachmentId: ref.wpAttachmentId });
1696
+ }
1697
+ }
1698
+ }
1699
+ return targets;
1700
+ }
1701
+ function parseHost(url) {
1702
+ try {
1703
+ return new URL(url).host;
1704
+ } catch {
1705
+ return "(invalid)";
1706
+ }
1707
+ }
1708
+ function noop() {
1709
+ }
1710
+
1711
+ // src/media/rewrite.ts
1712
+ function rewriteLexicalMedia(root, resolution) {
1713
+ if (resolution.byUrl.size === 0) {
1714
+ return root;
1715
+ }
1716
+ const cloned = JSON.parse(JSON.stringify(root));
1717
+ walk(cloned.root, resolution);
1718
+ return cloned;
1719
+ }
1720
+ function walk(node, resolution) {
1721
+ if (!node || typeof node !== "object") return;
1722
+ if (node.type === "image") {
1723
+ const src = typeof node.src === "string" ? node.src : "";
1724
+ const mediaId = src ? resolution.byUrl.get(src) : void 0;
1725
+ if (mediaId) {
1726
+ node.mediaId = mediaId;
1727
+ }
1728
+ }
1729
+ const children = node.children;
1730
+ if (Array.isArray(children)) {
1731
+ for (const child of children) walk(child, resolution);
1732
+ }
1733
+ }
1734
+
1735
+ // src/apply/attachment-index.ts
1736
+ function buildAttachmentIndex(bundle) {
1737
+ const byId = /* @__PURE__ */ new Map();
1738
+ const byUrl = /* @__PURE__ */ new Map();
1739
+ for (const record of bundle.records) {
1740
+ if (record.wpType !== "attachment") continue;
1741
+ const url = pickAttachmentUrl(record);
1742
+ const entry = {
1743
+ wpAttachmentId: record.wpId,
1744
+ sourceUrl: url,
1745
+ meta: record.meta,
1746
+ title: record.title
1747
+ };
1748
+ if (record.wpId > 0) byId.set(record.wpId, entry);
1749
+ if (url) byUrl.set(url, entry);
1750
+ }
1751
+ return { byId, byUrl };
1752
+ }
1753
+ function pickAttachmentUrl(record) {
1754
+ const fromMediaRef = record.mediaRefs.find((ref) => ref.sourceUrl);
1755
+ return fromMediaRef?.sourceUrl ?? "";
1756
+ }
1757
+
1758
+ // src/apply/authors.ts
1759
+ async function resolveAuthors(bundle, resolver) {
1760
+ const logins = /* @__PURE__ */ new Set();
1761
+ for (const record of bundle.records) {
1762
+ if (record.wpType === "attachment") continue;
1763
+ if (record.wpAuthorLogin) logins.add(record.wpAuthorLogin);
1764
+ }
1765
+ const byLogin = new Map(bundle.authors.map((a) => [a.login, a]));
1766
+ const authorIds = /* @__PURE__ */ new Map();
1767
+ const skipped = [];
1768
+ const errors = [];
1769
+ for (const login of logins) {
1770
+ try {
1771
+ const out = await resolver.resolveAuthor({
1772
+ wpAuthorLogin: login,
1773
+ wpAuthor: byLogin.get(login)
1774
+ });
1775
+ if (out) {
1776
+ authorIds.set(login, out.id);
1777
+ } else {
1778
+ skipped.push(login);
1779
+ }
1780
+ } catch (err) {
1781
+ errors.push({ login, reason: err instanceof Error ? err.message : String(err) });
1782
+ }
1783
+ }
1784
+ return { authorIds, skipped, errors };
1785
+ }
1786
+
1787
+ // src/apply/comments.ts
1788
+ async function importPostComments(args) {
1789
+ const { record, postId, collection, deps, plan, resume } = args;
1790
+ const log = args.log ?? noop2;
1791
+ if (record.comments.length === 0) return;
1792
+ const ordered = [...record.comments].sort((a, b) => a.wpId - b.wpId);
1793
+ const wpToNexpressId = /* @__PURE__ */ new Map();
1794
+ if (resume) {
1795
+ for (const c of ordered) {
1796
+ const prior = resume.state.comments[c.wpId];
1797
+ if (prior) wpToNexpressId.set(c.wpId, prior);
1798
+ }
1799
+ }
1800
+ for (const wpComment of ordered) {
1801
+ if (resume?.state.comments[wpComment.wpId]) {
1802
+ plan.skippedByResume++;
1803
+ continue;
1804
+ }
1805
+ if (!wpComment.approved) {
1806
+ plan.skippedUnapproved++;
1807
+ continue;
1808
+ }
1809
+ try {
1810
+ const member = await ensureMemberFor(wpComment, deps);
1811
+ if (!member) {
1812
+ plan.skippedNoMember++;
1813
+ continue;
1814
+ }
1815
+ const parentId = wpComment.parentWpId !== null ? wpToNexpressId.get(wpComment.parentWpId) ?? null : null;
1816
+ const createdAt = parseWpDate(wpComment.date);
1817
+ const inserted = await deps.insertComment({
1818
+ targetType: collection,
1819
+ targetId: postId,
1820
+ parentId,
1821
+ memberId: member.id,
1822
+ bodyMd: wpComment.content,
1823
+ bodyHtml: deps.renderBody(wpComment.content),
1824
+ createdAt
1825
+ });
1826
+ wpToNexpressId.set(wpComment.wpId, inserted.id);
1827
+ plan.applied++;
1828
+ log(`comment write ${collection}/${record.slug} #${wpComment.wpId}`);
1829
+ if (resume) {
1830
+ resume.state.comments[wpComment.wpId] = inserted.id;
1831
+ resume.persist();
1832
+ }
1833
+ } catch (err) {
1834
+ const reason = err instanceof Error ? err.message : String(err);
1835
+ plan.errors.push({ wpCommentId: wpComment.wpId, reason });
1836
+ log(`comment error ${collection}/${record.slug} #${wpComment.wpId}: ${reason}`);
1837
+ }
1838
+ }
1839
+ }
1840
+ async function ensureMemberFor(comment, deps) {
1841
+ const fallbackName = comment.authorName?.trim() || "guest";
1842
+ const slugSource = comment.authorEmail ?? fallbackName;
1843
+ const handle = `${slugify(slugSource)}-wpimp`;
1844
+ if (!handle || handle === "-wpimp") return null;
1845
+ return deps.ensureImportedMember({
1846
+ handle,
1847
+ email: comment.authorEmail,
1848
+ displayName: fallbackName
1849
+ });
1850
+ }
1851
+ function slugify(input) {
1852
+ return input.toLowerCase().replace(/[^a-z0-9]+/g, "-").replace(/(^-|-$)/g, "").slice(0, 40);
1853
+ }
1854
+ function parseWpDate(raw) {
1855
+ if (!raw) return /* @__PURE__ */ new Date();
1856
+ const iso = raw.includes("T") ? raw : `${raw.replace(" ", "T")}Z`;
1857
+ const date = new Date(iso);
1858
+ return Number.isNaN(date.getTime()) ? /* @__PURE__ */ new Date() : date;
1859
+ }
1860
+ function emptyCommentPlan() {
1861
+ return { applied: 0, skippedUnapproved: 0, skippedNoMember: 0, skippedByResume: 0, errors: [] };
1862
+ }
1863
+ function noop2() {
1864
+ }
1865
+
1866
+ // src/apply/resume.ts
1867
+ import { readFileSync as readFileSync3, writeFileSync } from "fs";
1868
+ function emptyResumeState(source) {
1869
+ const now = (/* @__PURE__ */ new Date()).toISOString();
1870
+ return {
1871
+ version: 1,
1872
+ source,
1873
+ startedAt: now,
1874
+ updatedAt: now,
1875
+ documents: {},
1876
+ comments: {},
1877
+ authors: {},
1878
+ media: {},
1879
+ taxonomies: {}
1880
+ };
1881
+ }
1882
+ var ResumeStateError = class extends Error {
1883
+ constructor(message) {
1884
+ super(message);
1885
+ this.name = "ResumeStateError";
1886
+ }
1887
+ };
1888
+ function loadResumeState(path, source) {
1889
+ let raw;
1890
+ try {
1891
+ raw = readFileSync3(path, "utf8");
1892
+ } catch (err) {
1893
+ if (err.code === "ENOENT") {
1894
+ return emptyResumeState(source);
1895
+ }
1896
+ throw new ResumeStateError(
1897
+ `cannot read resume state ${path}: ${err instanceof Error ? err.message : String(err)}`
1898
+ );
1899
+ }
1900
+ let parsed;
1901
+ try {
1902
+ parsed = JSON.parse(raw);
1903
+ } catch (err) {
1904
+ throw new ResumeStateError(
1905
+ `${path}: invalid JSON \u2014 ${err instanceof Error ? err.message : String(err)}`
1906
+ );
1907
+ }
1908
+ if (!parsed || typeof parsed !== "object" || Array.isArray(parsed)) {
1909
+ throw new ResumeStateError(`${path}: top-level value must be a JSON object`);
1910
+ }
1911
+ const root = parsed;
1912
+ if (root.version !== 1) {
1913
+ throw new ResumeStateError(`${path}: unsupported version ${String(root.version)}`);
1914
+ }
1915
+ return {
1916
+ version: 1,
1917
+ source: typeof root.source === "string" ? root.source : source,
1918
+ startedAt: typeof root.startedAt === "string" ? root.startedAt : (/* @__PURE__ */ new Date()).toISOString(),
1919
+ updatedAt: typeof root.updatedAt === "string" ? root.updatedAt : (/* @__PURE__ */ new Date()).toISOString(),
1920
+ documents: asStringMap(root.documents),
1921
+ comments: asNumberKeyMap(root.comments),
1922
+ authors: asStringMap(root.authors),
1923
+ media: asStringMap(root.media),
1924
+ taxonomies: asStringMap(root.taxonomies)
1925
+ };
1926
+ }
1927
+ function persistResumeState(path, state) {
1928
+ state.updatedAt = (/* @__PURE__ */ new Date()).toISOString();
1929
+ writeFileSync(path, JSON.stringify(state, null, 2), "utf8");
1930
+ }
1931
+ function documentKey(collection, slug) {
1932
+ return `${collection}/${slug}`;
1933
+ }
1934
+ function asStringMap(value) {
1935
+ if (!value || typeof value !== "object" || Array.isArray(value)) return {};
1936
+ const out = {};
1937
+ for (const [k, v] of Object.entries(value)) {
1938
+ if (typeof v === "string") out[k] = v;
1939
+ }
1940
+ return out;
1941
+ }
1942
+ function asNumberKeyMap(value) {
1943
+ if (!value || typeof value !== "object" || Array.isArray(value)) return {};
1944
+ const out = {};
1945
+ for (const [k, v] of Object.entries(value)) {
1946
+ const n = Number.parseInt(k, 10);
1947
+ if (Number.isFinite(n) && typeof v === "string") out[n] = v;
1948
+ }
1949
+ return out;
1950
+ }
1951
+
1952
+ // src/apply/taxonomies.ts
1953
+ function termCacheKey(taxonomy, slug) {
1954
+ return `${taxonomy}:${slug}`;
1955
+ }
1956
+ async function resolveTaxonomies(records, channelTerms, resolver) {
1957
+ const seen = /* @__PURE__ */ new Map();
1958
+ const remember = (term) => {
1959
+ if (!term.slug) return;
1960
+ const key = termCacheKey(term.taxonomy, term.slug);
1961
+ if (seen.has(key)) return;
1962
+ seen.set(key, { taxonomy: term.taxonomy, slug: term.slug, name: term.name || term.slug });
1963
+ };
1964
+ for (const term of channelTerms) remember(term);
1965
+ for (const record of records) {
1966
+ if (record.wpType === "attachment") continue;
1967
+ for (const term of record.terms) remember(term);
1968
+ }
1969
+ const termIds = /* @__PURE__ */ new Map();
1970
+ const errors = [];
1971
+ const skipped = [];
1972
+ for (const [key, value] of seen.entries()) {
1973
+ try {
1974
+ const result = await resolver.findOrCreate(value);
1975
+ if (!result) {
1976
+ skipped.push(value);
1977
+ continue;
1978
+ }
1979
+ termIds.set(key, result.id);
1980
+ } catch (err) {
1981
+ const reason = err instanceof Error ? err.message : String(err);
1982
+ errors.push({ key: value, reason });
1983
+ }
1984
+ }
1985
+ return { termIds, errors, skipped };
1986
+ }
1987
+ function pickPostTermIds(record, resolution) {
1988
+ const categoryIds = [];
1989
+ const tagIds = [];
1990
+ const seenCategory = /* @__PURE__ */ new Set();
1991
+ const seenTag = /* @__PURE__ */ new Set();
1992
+ for (const term of record.terms) {
1993
+ if (!term.slug) continue;
1994
+ const id = resolution.termIds.get(termCacheKey(term.taxonomy, term.slug));
1995
+ if (!id) continue;
1996
+ if (term.taxonomy === "category" && !seenCategory.has(id)) {
1997
+ categoryIds.push(id);
1998
+ seenCategory.add(id);
1999
+ } else if (term.taxonomy === "post_tag" && !seenTag.has(id)) {
2000
+ tagIds.push(id);
2001
+ seenTag.add(id);
2002
+ }
2003
+ }
2004
+ return { categoryIds, tagIds };
2005
+ }
2006
+
2007
+ // src/apply/index.ts
2008
+ var TYPE_TO_COLLECTION = {
2009
+ post: "posts",
2010
+ page: "pages"
2011
+ };
2012
+ async function applyBundle(bundle, options) {
2013
+ const log = options.log ?? noop3;
2014
+ const dryRun = options.dryRun ?? false;
2015
+ const attachments = buildAttachmentIndex(bundle);
2016
+ let media = null;
2017
+ let resolution = { byUrl: /* @__PURE__ */ new Map(), byAttachmentId: /* @__PURE__ */ new Map() };
2018
+ if (options.media) {
2019
+ media = await runMediaPipeline(bundle, attachments, options.media, { dryRun, log });
2020
+ resolution = media.resolution;
2021
+ }
2022
+ let taxonomies = null;
2023
+ if (options.taxonomies && !dryRun) {
2024
+ taxonomies = await resolveTaxonomies(bundle.records, bundle.terms, options.taxonomies);
2025
+ }
2026
+ let authors = null;
2027
+ if (options.authors && !dryRun) {
2028
+ authors = await resolveAuthors(bundle, options.authors);
2029
+ }
2030
+ const commentsPlan = options.comments && !dryRun ? emptyCommentPlan() : null;
2031
+ const applied = [];
2032
+ const skipped = [];
2033
+ const errors = [];
2034
+ let privateCount = 0;
2035
+ let pendingCount = 0;
2036
+ let droppedAuthorCount = 0;
2037
+ let coverWiredCount = 0;
2038
+ let coverMissingCount = 0;
2039
+ for (const record of bundle.records) {
2040
+ const builtin = TYPE_TO_COLLECTION[record.wpType];
2041
+ const customMapping = !builtin && options.collectionMappings ? options.collectionMappings[record.wpType] : void 0;
2042
+ const collection = builtin ?? customMapping?.collection;
2043
+ if (!collection) {
2044
+ skipped.push({
2045
+ wpId: record.wpId,
2046
+ wpType: record.wpType,
2047
+ slug: record.slug,
2048
+ reason: record.wpType === "attachment" ? "attachment \u2014 handled by media pipeline" : `unmapped wpType "${record.wpType}" \u2014 add an entry to wp-import config to route it`
2049
+ });
2050
+ continue;
2051
+ }
2052
+ if (record.status === "trash" || record.status === "auto-draft") {
2053
+ skipped.push({
2054
+ wpId: record.wpId,
2055
+ wpType: record.wpType,
2056
+ slug: record.slug,
2057
+ reason: `status="${record.status}"`
2058
+ });
2059
+ continue;
2060
+ }
2061
+ if (!record.slug) {
2062
+ skipped.push({
2063
+ wpId: record.wpId,
2064
+ wpType: record.wpType,
2065
+ slug: "",
2066
+ reason: "missing slug"
2067
+ });
2068
+ continue;
2069
+ }
2070
+ try {
2071
+ const markerId = options.resume?.state.documents[documentKey(collection, record.slug)];
2072
+ const exists = markerId ? { docs: [{ id: markerId }] } : await findDocuments(
2073
+ collection,
2074
+ { where: { slug: record.slug }, limit: 1 },
2075
+ options.actor
2076
+ );
2077
+ const existingId = exists.docs.length > 0 && typeof exists.docs[0]?.id === "string" ? exists.docs[0]?.id : void 0;
2078
+ const updateMode = options.update === true && existingId !== void 0;
2079
+ if (exists.docs.length > 0 && !updateMode) {
2080
+ skipped.push({
2081
+ wpId: record.wpId,
2082
+ wpType: record.wpType,
2083
+ slug: record.slug,
2084
+ reason: markerId ? "resume marker \u2014 already imported" : "slug already exists"
2085
+ });
2086
+ log(`skip ${collection}/${record.slug} (${markerId ? "resume marker" : "already exists"})`);
2087
+ await emitAudit(options.audit, {
2088
+ action: "import.wp.skipped",
2089
+ targetType: collection,
2090
+ targetId: existingId,
2091
+ payload: {
2092
+ wpId: record.wpId,
2093
+ wpType: record.wpType,
2094
+ slug: record.slug,
2095
+ reason: markerId ? "resume marker" : "slug already exists"
2096
+ }
2097
+ });
2098
+ continue;
2099
+ }
2100
+ if (record.status === "private") privateCount++;
2101
+ else if (record.status === "pending") pendingCount++;
2102
+ if (record.wpAuthorLogin && !authors?.authorIds.has(record.wpAuthorLogin)) {
2103
+ droppedAuthorCount++;
2104
+ }
2105
+ const coverImageId = resolveCoverImageId(record, resolution);
2106
+ if (collection === "posts") {
2107
+ if (coverImageId) {
2108
+ coverWiredCount++;
2109
+ } else if (recordHasFeaturedImage(record)) {
2110
+ coverMissingCount++;
2111
+ }
2112
+ }
2113
+ const termIds = collection === "posts" && taxonomies ? pickPostTermIds(record, taxonomies) : { categoryIds: [], tagIds: [] };
2114
+ const authorId = collection === "posts" && authors && record.wpAuthorLogin ? authors.authorIds.get(record.wpAuthorLogin) ?? void 0 : void 0;
2115
+ if (dryRun) {
2116
+ applied.push({
2117
+ wpId: record.wpId,
2118
+ wpType: record.wpType,
2119
+ collection,
2120
+ slug: record.slug,
2121
+ title: record.title,
2122
+ coverImageId,
2123
+ categoryIds: termIds.categoryIds,
2124
+ tagIds: termIds.tagIds,
2125
+ authorId
2126
+ });
2127
+ log(`plan ${collection}/${record.slug}`);
2128
+ continue;
2129
+ }
2130
+ const originalAuthorField = options.preserveOriginalAuthor?.[collection];
2131
+ const originalAuthorName = originalAuthorField ? resolveOriginalAuthorName(record, bundle) : void 0;
2132
+ const data = buildDocData(
2133
+ record,
2134
+ resolution,
2135
+ collection,
2136
+ coverImageId,
2137
+ termIds,
2138
+ authorId,
2139
+ customMapping?.fieldOverrides,
2140
+ originalAuthorField && originalAuthorName ? { field: originalAuthorField, value: originalAuthorName } : void 0
2141
+ );
2142
+ const mappedStatus = mapStatusToFramework(record.status);
2143
+ const saved = await saveDocument(
2144
+ collection,
2145
+ updateMode && existingId ? existingId : null,
2146
+ // Phase 21.17 — visibility rides the data payload (it's a
2147
+ // collection column, validated by the Zod schema), while
2148
+ // status stays as the saveDocument opts override. Both
2149
+ // are derived from the WP record's `<wp:status>` here.
2150
+ { ...data, visibility: mappedStatus.visibility },
2151
+ options.actor,
2152
+ {
2153
+ status: mappedStatus.status
2154
+ }
2155
+ );
2156
+ const savedId = typeof saved.doc.id === "string" ? saved.doc.id : void 0;
2157
+ applied.push({
2158
+ wpId: record.wpId,
2159
+ wpType: record.wpType,
2160
+ collection,
2161
+ slug: record.slug,
2162
+ title: record.title,
2163
+ coverImageId,
2164
+ categoryIds: termIds.categoryIds,
2165
+ tagIds: termIds.tagIds,
2166
+ authorId
2167
+ });
2168
+ log(updateMode ? `update ${collection}/${record.slug}` : `write ${collection}/${record.slug}`);
2169
+ options.reportHtml?.emit({
2170
+ wpId: record.wpId,
2171
+ wpType: record.wpType,
2172
+ slug: record.slug,
2173
+ title: record.title,
2174
+ rawContent: record.rawContent,
2175
+ lexical: data.content
2176
+ });
2177
+ await emitAudit(options.audit, {
2178
+ action: updateMode ? "import.wp.updated" : "import.wp.applied",
2179
+ targetType: collection,
2180
+ targetId: savedId,
2181
+ payload: {
2182
+ wpId: record.wpId,
2183
+ wpType: record.wpType,
2184
+ slug: record.slug,
2185
+ title: record.title,
2186
+ coverImageId,
2187
+ categoryIds: termIds.categoryIds,
2188
+ tagIds: termIds.tagIds,
2189
+ authorId
2190
+ }
2191
+ });
2192
+ if (options.resume && savedId) {
2193
+ options.resume.state.documents[documentKey(collection, record.slug)] = savedId;
2194
+ options.resume.persist();
2195
+ }
2196
+ if (commentsPlan && options.comments && collection === "posts") {
2197
+ const postId = typeof saved.doc.id === "string" ? saved.doc.id : null;
2198
+ if (postId) {
2199
+ await importPostComments({
2200
+ record,
2201
+ postId,
2202
+ collection,
2203
+ deps: options.comments,
2204
+ plan: commentsPlan,
2205
+ log,
2206
+ resume: options.resume
2207
+ });
2208
+ }
2209
+ }
2210
+ } catch (err) {
2211
+ const message = err instanceof Error ? err.message : String(err);
2212
+ errors.push({ wpId: record.wpId, slug: record.slug, message });
2213
+ log(`error ${collection}/${record.slug}: ${message}`);
2214
+ await emitAudit(options.audit, {
2215
+ action: "import.wp.error",
2216
+ targetType: collection,
2217
+ payload: {
2218
+ wpId: record.wpId,
2219
+ wpType: record.wpType,
2220
+ slug: record.slug,
2221
+ message
2222
+ }
2223
+ });
2224
+ }
2225
+ }
2226
+ const notes = [];
2227
+ if (privateCount > 0) {
2228
+ notes.push(
2229
+ `${privateCount} record${privateCount === 1 ? "" : "s"} with WP status "private" imported as published with visibility=private (Phase 21.17).`
2230
+ );
2231
+ }
2232
+ if (pendingCount > 0) {
2233
+ notes.push(
2234
+ `${pendingCount} record${pendingCount === 1 ? "" : "s"} with WP status "pending" imported as draft.`
2235
+ );
2236
+ }
2237
+ if (droppedAuthorCount > 0) {
2238
+ notes.push(
2239
+ authors ? `${droppedAuthorCount} record${droppedAuthorCount === 1 ? "" : "s"} dropped their original WP author (resolver returned null for the matching login).` : `${droppedAuthorCount} record${droppedAuthorCount === 1 ? "" : "s"} dropped their original WP author \u2014 opt in by passing \`authors\` to \`applyBundle\` (Phase 21.8).`
2240
+ );
2241
+ }
2242
+ if (coverWiredCount > 0) {
2243
+ notes.push(
2244
+ `${coverWiredCount} post${coverWiredCount === 1 ? "" : "s"} wired a featured image to coverImage from the WP _thumbnail_id reference.`
2245
+ );
2246
+ }
2247
+ if (coverMissingCount > 0) {
2248
+ notes.push(
2249
+ `${coverMissingCount} post${coverMissingCount === 1 ? "" : "s"} declared a WP featured image but the source asset was not resolvable (download failed, MIME rejected, or attachment record missing).`
2250
+ );
2251
+ }
2252
+ if (taxonomies) {
2253
+ if (taxonomies.errors.length > 0) {
2254
+ notes.push(
2255
+ `${taxonomies.errors.length} taxonomy term${taxonomies.errors.length === 1 ? "" : "s"} failed to resolve \u2014 see Taxonomies section.`
2256
+ );
2257
+ }
2258
+ if (taxonomies.skipped.length > 0) {
2259
+ notes.push(
2260
+ `${taxonomies.skipped.length} taxonomy term${taxonomies.skipped.length === 1 ? "" : "s"} skipped by the resolver (likely a custom taxonomy the project doesn't track).`
2261
+ );
2262
+ }
2263
+ } else if (hasAnyTerm(bundle)) {
2264
+ notes.push(
2265
+ "Categories/tags found in the WXR but no taxonomy resolver was supplied \u2014 terms were dropped (Phase 21.6 \u2014 opt in by passing `taxonomies` to `applyBundle`)."
2266
+ );
2267
+ }
2268
+ if (commentsPlan) {
2269
+ if (commentsPlan.skippedUnapproved > 0) {
2270
+ notes.push(
2271
+ `${commentsPlan.skippedUnapproved} comment${commentsPlan.skippedUnapproved === 1 ? "" : "s"} dropped because <wp:comment_approved> was not "1".`
2272
+ );
2273
+ }
2274
+ if (commentsPlan.errors.length > 0) {
2275
+ notes.push(
2276
+ `${commentsPlan.errors.length} comment${commentsPlan.errors.length === 1 ? "" : "s"} failed to insert \u2014 see Comments section.`
2277
+ );
2278
+ }
2279
+ } else if (hasAnyComment(bundle)) {
2280
+ notes.push(
2281
+ "Comments found in the WXR but no comments deps were supplied \u2014 comments were dropped (Phase 21.7 \u2014 opt in by passing `comments` to `applyBundle`)."
2282
+ );
2283
+ }
2284
+ if (authors) {
2285
+ if (authors.errors.length > 0) {
2286
+ notes.push(
2287
+ `${authors.errors.length} author${authors.errors.length === 1 ? "" : "s"} failed to resolve \u2014 see Authors section.`
2288
+ );
2289
+ }
2290
+ }
2291
+ if (options.strict) {
2292
+ if (media) {
2293
+ for (const e of media.errors) {
2294
+ errors.push({ wpId: 0, slug: e.url, message: `media: ${e.reason}` });
2295
+ }
2296
+ }
2297
+ if (taxonomies) {
2298
+ for (const e of taxonomies.errors) {
2299
+ errors.push({
2300
+ wpId: 0,
2301
+ slug: `${e.key.taxonomy}/${e.key.slug}`,
2302
+ message: `taxonomy: ${e.reason}`
2303
+ });
2304
+ }
2305
+ }
2306
+ if (authors) {
2307
+ for (const e of authors.errors) {
2308
+ errors.push({ wpId: 0, slug: e.login, message: `author: ${e.reason}` });
2309
+ }
2310
+ }
2311
+ if (commentsPlan) {
2312
+ for (const e of commentsPlan.errors) {
2313
+ errors.push({
2314
+ wpId: 0,
2315
+ slug: `comment#${e.wpCommentId}`,
2316
+ message: `comment: ${e.reason}`
2317
+ });
2318
+ }
2319
+ }
2320
+ }
2321
+ return {
2322
+ applied,
2323
+ skipped,
2324
+ errors,
2325
+ attachments,
2326
+ media,
2327
+ taxonomies,
2328
+ comments: commentsPlan,
2329
+ authors,
2330
+ notes
2331
+ };
2332
+ }
2333
+ function buildDocData(record, resolution, collection, coverImageId, termIds, authorId, fieldOverrides, originalAuthor) {
2334
+ const lexical = htmlToLexical(record.rawContent);
2335
+ const rewritten = rewriteLexicalMedia(lexical, resolution);
2336
+ const data = {
2337
+ title: record.title || "(untitled)",
2338
+ slug: record.slug,
2339
+ content: rewritten
2340
+ };
2341
+ if (record.excerpt) {
2342
+ data.excerpt = record.excerpt;
2343
+ }
2344
+ if (collection === "posts" && coverImageId) {
2345
+ data.coverImage = coverImageId;
2346
+ }
2347
+ if (collection === "posts") {
2348
+ if (termIds.categoryIds.length > 0) data.categories = termIds.categoryIds;
2349
+ if (termIds.tagIds.length > 0) data.tags = termIds.tagIds;
2350
+ if (authorId) data.author = authorId;
2351
+ }
2352
+ if (fieldOverrides) {
2353
+ const protectedFields = /* @__PURE__ */ new Set(["title", "slug", "content", "excerpt", "publishedAt", "coverImage", "categories", "tags", "author"]);
2354
+ for (const [metaKey, fieldName] of Object.entries(fieldOverrides)) {
2355
+ if (protectedFields.has(fieldName)) continue;
2356
+ const value = record.meta[metaKey];
2357
+ if (typeof value === "string" && value.length > 0) {
2358
+ data[fieldName] = value;
2359
+ }
2360
+ }
2361
+ }
2362
+ if (originalAuthor) {
2363
+ data[originalAuthor.field] = originalAuthor.value;
2364
+ }
2365
+ if (record.publishedAt) {
2366
+ const iso = record.publishedAt.replace(" ", "T") + "Z";
2367
+ const date = new Date(iso);
2368
+ if (!Number.isNaN(date.getTime())) {
2369
+ data.publishedAt = date.toISOString();
2370
+ }
2371
+ }
2372
+ return data;
2373
+ }
2374
+ function recordHasFeaturedImage(record) {
2375
+ return record.mediaRefs.some((ref) => ref.kind === "featured");
2376
+ }
2377
+ function hasAnyTerm(bundle) {
2378
+ if (bundle.terms.length > 0) return true;
2379
+ return bundle.records.some((r) => r.terms.length > 0 && r.wpType !== "attachment");
2380
+ }
2381
+ function hasAnyComment(bundle) {
2382
+ return bundle.records.some((r) => r.comments.length > 0 && r.wpType !== "attachment");
2383
+ }
2384
+ function resolveOriginalAuthorName(record, bundle) {
2385
+ const login = record.wpAuthorLogin;
2386
+ if (!login) return void 0;
2387
+ const match = bundle.authors.find((a) => a.login === login);
2388
+ return match?.displayName?.trim() || login;
2389
+ }
2390
+ function resolveCoverImageId(record, resolution) {
2391
+ const ref = record.mediaRefs.find((m) => m.kind === "featured");
2392
+ if (!ref) return void 0;
2393
+ if (ref.wpAttachmentId !== null) {
2394
+ const id = resolution.byAttachmentId.get(ref.wpAttachmentId);
2395
+ if (id) return id;
2396
+ }
2397
+ if (ref.sourceUrl) {
2398
+ const id = resolution.byUrl.get(ref.sourceUrl);
2399
+ if (id) return id;
2400
+ }
2401
+ return void 0;
2402
+ }
2403
+ function mapStatusToFramework(status) {
2404
+ if (status === "publish") return { status: "published", visibility: "public" };
2405
+ if (status === "private") return { status: "published", visibility: "private" };
2406
+ return { status: "draft", visibility: "public" };
2407
+ }
2408
+ function noop3() {
2409
+ }
2410
+ async function emitAudit(deps, event) {
2411
+ if (!deps) return;
2412
+ try {
2413
+ await deps.record(event);
2414
+ } catch {
2415
+ }
2416
+ }
2417
+ export {
2418
+ ResumeStateError,
2419
+ WpImportConfigError,
2420
+ WpImportStreamError,
2421
+ WpMediaDownloadError,
2422
+ WpMediaSsrfError,
2423
+ applyBundle,
2424
+ buildAttachmentIndex,
2425
+ documentKey,
2426
+ downloadMedia,
2427
+ emptyCommentPlan,
2428
+ emptyResumeState,
2429
+ formatApplyReport,
2430
+ formatSummary,
2431
+ htmlToLexical,
2432
+ importPostComments,
2433
+ isAllowedMimeType,
2434
+ loadConfigFromPath,
2435
+ loadResumeState,
2436
+ parseConfig,
2437
+ parseWxr,
2438
+ parseWxrStream,
2439
+ persistResumeState,
2440
+ pickPostTermIds,
2441
+ resolveAuthors,
2442
+ resolveEnvDownloadOptions,
2443
+ resolveTaxonomies,
2444
+ rewriteLexicalMedia,
2445
+ runCli,
2446
+ runMediaPipeline,
2447
+ termCacheKey
2448
+ };