@artinstack/migrator 0.1.2 → 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -7,13 +7,138 @@ import {
7
7
  validateSquarespaceExportFile
8
8
  } from "./chunk-HH7666MQ.js";
9
9
  import {
10
- discoverContentAssetUrls
10
+ discoverContentAssetUrls,
11
+ normalizeAssetUrl
11
12
  } from "./chunk-2PNSVE5Y.js";
12
13
 
14
+ // src/lib/origin-url-rewrite.ts
15
+ function rewriteOriginUrlsInText(text, config) {
16
+ if (!text || config.rules.length === 0) return text;
17
+ let result = text;
18
+ for (const rule of config.rules) {
19
+ if (typeof rule.match === "string") {
20
+ if (!rule.match) continue;
21
+ result = result.split(rule.match).join(rule.replace);
22
+ continue;
23
+ }
24
+ result = result.replace(rule.match, rule.replace);
25
+ }
26
+ return result;
27
+ }
28
+ function createWpContentGatewayRewrite(gatewayBase, publicOrigin) {
29
+ const normalizedGateway = gatewayBase.replace(/\/$/, "");
30
+ const normalizedPublic = publicOrigin.replace(/\/$/, "");
31
+ return {
32
+ rules: [
33
+ {
34
+ match: `${normalizedGateway}/wp-content/`,
35
+ replace: `${normalizedPublic}/wp-content/`
36
+ }
37
+ ]
38
+ };
39
+ }
40
+
13
41
  // src/parsers/wordpress/parse-wxr.ts
14
42
  import { readFile } from "fs/promises";
15
43
  import { basename } from "path";
16
44
  import { XMLParser } from "fast-xml-parser";
45
+
46
+ // src/parsers/wordpress/builders/registry.ts
47
+ var WORDPRESS_BUILDER_REGISTRY = [
48
+ {
49
+ id: "tatsu",
50
+ detect: /\[(?:\/)?tatsu_/i,
51
+ contentRules: [
52
+ { shortcodePrefix: "tatsu_image", urlParams: ["image", "url", "src"], tag: "img" },
53
+ { shortcodePrefix: "tatsu_video", urlParams: ["video", "src", "url"], tag: "video" }
54
+ ],
55
+ scaffoldingPrefix: "tatsu_"
56
+ },
57
+ {
58
+ id: "divi",
59
+ detect: /\[(?:\/)?et_pb_/i,
60
+ contentRules: [{ shortcodePrefix: "et_pb_image", urlParams: ["src", "url"], tag: "img" }],
61
+ scaffoldingPrefix: "et_pb_"
62
+ },
63
+ {
64
+ id: "elementor",
65
+ detect: /\[(?:\/)?elementor[-_]/i,
66
+ contentRules: [
67
+ { shortcodePrefix: "elementor-widget", urlParams: ["url", "src", "image"], tag: "img" }
68
+ ],
69
+ scaffoldingPrefix: "elementor_"
70
+ }
71
+ ];
72
+
73
+ // src/parsers/wordpress/builders/flatten.ts
74
+ function escapeRegExp(value) {
75
+ return value.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
76
+ }
77
+ function extractShortcodeParam(params, names) {
78
+ for (const name of names) {
79
+ const pattern = new RegExp(`\\b${escapeRegExp(name)}\\s*=\\s*["']([^"']+)["']`, "i");
80
+ const match = params.match(pattern);
81
+ if (match?.[1]?.trim()) return match[1].trim();
82
+ }
83
+ return void 0;
84
+ }
85
+ function emitHtmlTag(tag, url) {
86
+ const normalized = normalizeAssetUrl(url) ?? url;
87
+ const escaped = normalized.replace(/&/g, "&amp;").replace(/"/g, "&quot;").replace(/</g, "&lt;");
88
+ switch (tag) {
89
+ case "img":
90
+ return `<img src="${escaped}" alt="" />`;
91
+ case "video":
92
+ return `<video src="${escaped}" controls></video>`;
93
+ case "iframe":
94
+ return `<iframe src="${escaped}" loading="lazy"></iframe>`;
95
+ }
96
+ }
97
+ function convertContentBlocker(content, rule) {
98
+ const prefix = escapeRegExp(rule.shortcodePrefix);
99
+ const pattern = new RegExp(
100
+ `\\[${prefix}([^\\]]*)\\]\\s*(?:\\[\\/${prefix}[^\\]]*\\])?`,
101
+ "gi"
102
+ );
103
+ return content.replace(pattern, (block, params) => {
104
+ const url = extractShortcodeParam(params, rule.urlParams);
105
+ if (!url) return block;
106
+ return emitHtmlTag(rule.tag, url);
107
+ });
108
+ }
109
+ function stripScaffolding(content, prefix) {
110
+ const escaped = escapeRegExp(prefix);
111
+ const opener = new RegExp(`\\[${escaped}[a-z0-9_-]*[^\\]]*\\]`, "gi");
112
+ const closer = new RegExp(`\\[\\/${escaped}[a-z0-9_-]*[^\\]]*\\]`, "gi");
113
+ return content.replace(opener, "").replace(closer, "");
114
+ }
115
+ function detectThemes(content, registry) {
116
+ return registry.filter((theme) => theme.detect.test(content));
117
+ }
118
+ function flattenWordPressBuilders(content, options = {}) {
119
+ if (!content.trim()) {
120
+ return { html: content, detectedThemes: [] };
121
+ }
122
+ const registry = options.registry ?? WORDPRESS_BUILDER_REGISTRY;
123
+ const themes = detectThemes(content, registry);
124
+ if (themes.length === 0) {
125
+ return { html: content, detectedThemes: [] };
126
+ }
127
+ let html = content;
128
+ for (const theme of themes) {
129
+ for (const rule of theme.contentRules) {
130
+ html = convertContentBlocker(html, rule);
131
+ }
132
+ html = stripScaffolding(html, theme.scaffoldingPrefix);
133
+ }
134
+ html = html.replace(/\n{3,}/g, "\n\n").trim();
135
+ return {
136
+ html,
137
+ detectedThemes: themes.map((theme) => theme.id)
138
+ };
139
+ }
140
+
141
+ // src/parsers/wordpress/parse-wxr.ts
17
142
  var PLATFORM = "wordpress";
18
143
  function asArray(value) {
19
144
  if (value === void 0) return [];
@@ -80,13 +205,16 @@ function parseItems(xml) {
80
205
  const doc = parser.parse(xml);
81
206
  return asArray(doc.rss?.channel?.item);
82
207
  }
83
- function buildAttachmentIndex(items) {
208
+ function buildAttachmentIndex(items, originUrlRewrite) {
84
209
  const index = /* @__PURE__ */ new Map();
85
210
  for (const item of items) {
86
211
  if (textValue(item.post_type) !== "attachment") continue;
87
212
  const id = textValue(item.post_id);
88
- const url = textValue(item.attachment_url) || textValue(item.link);
213
+ let url = textValue(item.attachment_url) || textValue(item.link);
89
214
  if (!id || !url) continue;
215
+ if (originUrlRewrite) {
216
+ url = rewriteOriginUrlsInText(url, originUrlRewrite);
217
+ }
90
218
  const filename = basename(new URL(url, "http://local.invalid").pathname) || `attachment-${id}`;
91
219
  index.set(id, {
92
220
  sourceUrl: url,
@@ -171,10 +299,27 @@ function collectInlineAssets(html, attachmentIndex, seenUrls, exportedAt) {
171
299
  }
172
300
  return assets;
173
301
  }
302
+ function preprocessContent(rawHtml, options) {
303
+ let html = rawHtml;
304
+ if (options.originUrlRewrite) {
305
+ html = rewriteOriginUrlsInText(html, options.originUrlRewrite);
306
+ }
307
+ if (options.flattenBuilders !== false) {
308
+ html = flattenWordPressBuilders(html).html;
309
+ }
310
+ return html;
311
+ }
312
+ function resolveFeaturedAssetSourceId(thumbnailId, attachmentIndex, contentHtml) {
313
+ if (thumbnailId && attachmentIndex.has(thumbnailId)) {
314
+ return thumbnailId;
315
+ }
316
+ const firstInline = discoverContentAssetUrls(contentHtml)[0];
317
+ return firstInline ? `url:${firstInline}` : void 0;
318
+ }
174
319
  async function* enumerateWxrEntities(options) {
175
320
  const xml = await readFile(options.filePath, "utf8");
176
321
  const items = parseItems(xml);
177
- const attachmentIndex = buildAttachmentIndex(items);
322
+ const attachmentIndex = buildAttachmentIndex(items, options.originUrlRewrite);
178
323
  const { categories, tags } = collectTaxonomies(items);
179
324
  const seenAssetUrls = /* @__PURE__ */ new Set();
180
325
  const emittedAttachmentIds = /* @__PURE__ */ new Set();
@@ -203,9 +348,9 @@ async function* enumerateWxrEntities(options) {
203
348
  const id = textValue(item.post_id);
204
349
  const link = textValue(item.link);
205
350
  const slug = sanitizeSlug(textValue(item.post_name) || textValue(item.title) || id);
206
- const rawHtml = getContentEncoded(item);
351
+ const contentHtml = preprocessContent(getContentEncoded(item), options);
207
352
  for (const asset of collectInlineAssets(
208
- rawHtml,
353
+ contentHtml,
209
354
  attachmentIndex,
210
355
  seenAssetUrls,
211
356
  options.exportedAt
@@ -223,10 +368,11 @@ async function* enumerateWxrEntities(options) {
223
368
  }
224
369
  if (postType === "post") {
225
370
  const thumbnailId = getPostMeta(item, "_thumbnail_id");
226
- let featuredAssetSourceId;
227
- if (thumbnailId && attachmentIndex.has(thumbnailId)) {
228
- featuredAssetSourceId = thumbnailId;
229
- }
371
+ const featuredAssetSourceId = resolveFeaturedAssetSourceId(
372
+ thumbnailId,
373
+ attachmentIndex,
374
+ contentHtml
375
+ );
230
376
  const post = {
231
377
  type: "post",
232
378
  source: sourceMeta(id, link, options.exportedAt),
@@ -234,7 +380,7 @@ async function* enumerateWxrEntities(options) {
234
380
  title: textValue(item.title) || slug,
235
381
  slug,
236
382
  excerpt: getExcerpt(item) || void 0,
237
- contentHtml: rawHtml,
383
+ contentHtml,
238
384
  publishedAt: textValue(item.post_date) || void 0,
239
385
  status: mapPublishStatus(textValue(item.status)),
240
386
  categorySlugs: categorySlugs.length ? categorySlugs : void 0,
@@ -251,7 +397,7 @@ async function* enumerateWxrEntities(options) {
251
397
  sourceId: id,
252
398
  title: textValue(item.title) || slug,
253
399
  slug,
254
- contentHtml: rawHtml,
400
+ contentHtml,
255
401
  isHomePage: isHomePage || void 0,
256
402
  status: mapPublishStatus(textValue(item.status))
257
403
  };
@@ -291,18 +437,25 @@ async function validateWxrFile(filePath) {
291
437
  }
292
438
 
293
439
  // src/parsers/wordpress/index.ts
294
- function resolvePath(input) {
295
- if (typeof input === "string") return input;
440
+ function resolveWxrOptions(input) {
441
+ if (typeof input === "string") {
442
+ return { filePath: input };
443
+ }
296
444
  if (input && typeof input === "object" && "path" in input) {
297
- return String(input.path);
445
+ const obj = input;
446
+ return {
447
+ filePath: String(obj.path),
448
+ originUrlRewrite: obj.originUrlRewrite,
449
+ flattenBuilders: obj.flattenBuilders
450
+ };
298
451
  }
299
- throw new Error("WordPress adapter requires input path (string or { path })");
452
+ throw new Error("WordPress adapter requires input path (string or { path, originUrlRewrite?, flattenBuilders? })");
300
453
  }
301
454
  var wordpressAdapter = {
302
455
  platform: "wordpress",
303
456
  async validateInput(input) {
304
- const path = resolvePath(input);
305
- const result = await validateWxrFile(path);
457
+ const { filePath } = resolveWxrOptions(input);
458
+ const result = await validateWxrFile(filePath);
306
459
  return {
307
460
  ok: result.ok,
308
461
  issues: result.issues,
@@ -310,8 +463,7 @@ var wordpressAdapter = {
310
463
  };
311
464
  },
312
465
  enumerateEntities(ctx) {
313
- const path = resolvePath(ctx.input);
314
- return enumerateWxrEntities({ filePath: path });
466
+ return enumerateWxrEntities(resolveWxrOptions(ctx.input));
315
467
  }
316
468
  };
317
469
 
@@ -2272,6 +2424,8 @@ function getAdapter(platform) {
2272
2424
  }
2273
2425
 
2274
2426
  export {
2427
+ rewriteOriginUrlsInText,
2428
+ createWpContentGatewayRewrite,
2275
2429
  wordpressAdapter,
2276
2430
  SMUGMUG_API_BASE,
2277
2431
  SMUGMUG_OAUTH_ENDPOINTS,
@@ -2287,4 +2441,4 @@ export {
2287
2441
  wixAdapter,
2288
2442
  getAdapter
2289
2443
  };
2290
- //# sourceMappingURL=chunk-VXEHAQKK.js.map
2444
+ //# sourceMappingURL=chunk-QEXTXHFG.js.map