@artinstack/migrator 0.1.2 → 0.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{chunk-VXEHAQKK.js → chunk-QEXTXHFG.js} +175 -21
- package/dist/chunk-QEXTXHFG.js.map +1 -0
- package/dist/cli/index.js +1 -1
- package/dist/index.d.ts +14 -1
- package/dist/index.js +5 -1
- package/dist/index.js.map +1 -1
- package/package.json +1 -1
- package/dist/chunk-VXEHAQKK.js.map +0 -1
|
@@ -7,13 +7,138 @@ import {
|
|
|
7
7
|
validateSquarespaceExportFile
|
|
8
8
|
} from "./chunk-HH7666MQ.js";
|
|
9
9
|
import {
|
|
10
|
-
discoverContentAssetUrls
|
|
10
|
+
discoverContentAssetUrls,
|
|
11
|
+
normalizeAssetUrl
|
|
11
12
|
} from "./chunk-2PNSVE5Y.js";
|
|
12
13
|
|
|
14
|
+
// src/lib/origin-url-rewrite.ts
|
|
15
|
+
function rewriteOriginUrlsInText(text, config) {
|
|
16
|
+
if (!text || config.rules.length === 0) return text;
|
|
17
|
+
let result = text;
|
|
18
|
+
for (const rule of config.rules) {
|
|
19
|
+
if (typeof rule.match === "string") {
|
|
20
|
+
if (!rule.match) continue;
|
|
21
|
+
result = result.split(rule.match).join(rule.replace);
|
|
22
|
+
continue;
|
|
23
|
+
}
|
|
24
|
+
result = result.replace(rule.match, rule.replace);
|
|
25
|
+
}
|
|
26
|
+
return result;
|
|
27
|
+
}
|
|
28
|
+
function createWpContentGatewayRewrite(gatewayBase, publicOrigin) {
|
|
29
|
+
const normalizedGateway = gatewayBase.replace(/\/$/, "");
|
|
30
|
+
const normalizedPublic = publicOrigin.replace(/\/$/, "");
|
|
31
|
+
return {
|
|
32
|
+
rules: [
|
|
33
|
+
{
|
|
34
|
+
match: `${normalizedGateway}/wp-content/`,
|
|
35
|
+
replace: `${normalizedPublic}/wp-content/`
|
|
36
|
+
}
|
|
37
|
+
]
|
|
38
|
+
};
|
|
39
|
+
}
|
|
40
|
+
|
|
13
41
|
// src/parsers/wordpress/parse-wxr.ts
|
|
14
42
|
import { readFile } from "fs/promises";
|
|
15
43
|
import { basename } from "path";
|
|
16
44
|
import { XMLParser } from "fast-xml-parser";
|
|
45
|
+
|
|
46
|
+
// src/parsers/wordpress/builders/registry.ts
|
|
47
|
+
var WORDPRESS_BUILDER_REGISTRY = [
|
|
48
|
+
{
|
|
49
|
+
id: "tatsu",
|
|
50
|
+
detect: /\[(?:\/)?tatsu_/i,
|
|
51
|
+
contentRules: [
|
|
52
|
+
{ shortcodePrefix: "tatsu_image", urlParams: ["image", "url", "src"], tag: "img" },
|
|
53
|
+
{ shortcodePrefix: "tatsu_video", urlParams: ["video", "src", "url"], tag: "video" }
|
|
54
|
+
],
|
|
55
|
+
scaffoldingPrefix: "tatsu_"
|
|
56
|
+
},
|
|
57
|
+
{
|
|
58
|
+
id: "divi",
|
|
59
|
+
detect: /\[(?:\/)?et_pb_/i,
|
|
60
|
+
contentRules: [{ shortcodePrefix: "et_pb_image", urlParams: ["src", "url"], tag: "img" }],
|
|
61
|
+
scaffoldingPrefix: "et_pb_"
|
|
62
|
+
},
|
|
63
|
+
{
|
|
64
|
+
id: "elementor",
|
|
65
|
+
detect: /\[(?:\/)?elementor[-_]/i,
|
|
66
|
+
contentRules: [
|
|
67
|
+
{ shortcodePrefix: "elementor-widget", urlParams: ["url", "src", "image"], tag: "img" }
|
|
68
|
+
],
|
|
69
|
+
scaffoldingPrefix: "elementor_"
|
|
70
|
+
}
|
|
71
|
+
];
|
|
72
|
+
|
|
73
|
+
// src/parsers/wordpress/builders/flatten.ts
|
|
74
|
+
function escapeRegExp(value) {
|
|
75
|
+
return value.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
|
|
76
|
+
}
|
|
77
|
+
function extractShortcodeParam(params, names) {
|
|
78
|
+
for (const name of names) {
|
|
79
|
+
const pattern = new RegExp(`\\b${escapeRegExp(name)}\\s*=\\s*["']([^"']+)["']`, "i");
|
|
80
|
+
const match = params.match(pattern);
|
|
81
|
+
if (match?.[1]?.trim()) return match[1].trim();
|
|
82
|
+
}
|
|
83
|
+
return void 0;
|
|
84
|
+
}
|
|
85
|
+
function emitHtmlTag(tag, url) {
|
|
86
|
+
const normalized = normalizeAssetUrl(url) ?? url;
|
|
87
|
+
const escaped = normalized.replace(/&/g, "&").replace(/"/g, """).replace(/</g, "<");
|
|
88
|
+
switch (tag) {
|
|
89
|
+
case "img":
|
|
90
|
+
return `<img src="${escaped}" alt="" />`;
|
|
91
|
+
case "video":
|
|
92
|
+
return `<video src="${escaped}" controls></video>`;
|
|
93
|
+
case "iframe":
|
|
94
|
+
return `<iframe src="${escaped}" loading="lazy"></iframe>`;
|
|
95
|
+
}
|
|
96
|
+
}
|
|
97
|
+
function convertContentBlocker(content, rule) {
|
|
98
|
+
const prefix = escapeRegExp(rule.shortcodePrefix);
|
|
99
|
+
const pattern = new RegExp(
|
|
100
|
+
`\\[${prefix}([^\\]]*)\\]\\s*(?:\\[\\/${prefix}[^\\]]*\\])?`,
|
|
101
|
+
"gi"
|
|
102
|
+
);
|
|
103
|
+
return content.replace(pattern, (block, params) => {
|
|
104
|
+
const url = extractShortcodeParam(params, rule.urlParams);
|
|
105
|
+
if (!url) return block;
|
|
106
|
+
return emitHtmlTag(rule.tag, url);
|
|
107
|
+
});
|
|
108
|
+
}
|
|
109
|
+
function stripScaffolding(content, prefix) {
|
|
110
|
+
const escaped = escapeRegExp(prefix);
|
|
111
|
+
const opener = new RegExp(`\\[${escaped}[a-z0-9_-]*[^\\]]*\\]`, "gi");
|
|
112
|
+
const closer = new RegExp(`\\[\\/${escaped}[a-z0-9_-]*[^\\]]*\\]`, "gi");
|
|
113
|
+
return content.replace(opener, "").replace(closer, "");
|
|
114
|
+
}
|
|
115
|
+
function detectThemes(content, registry) {
|
|
116
|
+
return registry.filter((theme) => theme.detect.test(content));
|
|
117
|
+
}
|
|
118
|
+
function flattenWordPressBuilders(content, options = {}) {
|
|
119
|
+
if (!content.trim()) {
|
|
120
|
+
return { html: content, detectedThemes: [] };
|
|
121
|
+
}
|
|
122
|
+
const registry = options.registry ?? WORDPRESS_BUILDER_REGISTRY;
|
|
123
|
+
const themes = detectThemes(content, registry);
|
|
124
|
+
if (themes.length === 0) {
|
|
125
|
+
return { html: content, detectedThemes: [] };
|
|
126
|
+
}
|
|
127
|
+
let html = content;
|
|
128
|
+
for (const theme of themes) {
|
|
129
|
+
for (const rule of theme.contentRules) {
|
|
130
|
+
html = convertContentBlocker(html, rule);
|
|
131
|
+
}
|
|
132
|
+
html = stripScaffolding(html, theme.scaffoldingPrefix);
|
|
133
|
+
}
|
|
134
|
+
html = html.replace(/\n{3,}/g, "\n\n").trim();
|
|
135
|
+
return {
|
|
136
|
+
html,
|
|
137
|
+
detectedThemes: themes.map((theme) => theme.id)
|
|
138
|
+
};
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
// src/parsers/wordpress/parse-wxr.ts
|
|
17
142
|
var PLATFORM = "wordpress";
|
|
18
143
|
function asArray(value) {
|
|
19
144
|
if (value === void 0) return [];
|
|
@@ -80,13 +205,16 @@ function parseItems(xml) {
|
|
|
80
205
|
const doc = parser.parse(xml);
|
|
81
206
|
return asArray(doc.rss?.channel?.item);
|
|
82
207
|
}
|
|
83
|
-
function buildAttachmentIndex(items) {
|
|
208
|
+
function buildAttachmentIndex(items, originUrlRewrite) {
|
|
84
209
|
const index = /* @__PURE__ */ new Map();
|
|
85
210
|
for (const item of items) {
|
|
86
211
|
if (textValue(item.post_type) !== "attachment") continue;
|
|
87
212
|
const id = textValue(item.post_id);
|
|
88
|
-
|
|
213
|
+
let url = textValue(item.attachment_url) || textValue(item.link);
|
|
89
214
|
if (!id || !url) continue;
|
|
215
|
+
if (originUrlRewrite) {
|
|
216
|
+
url = rewriteOriginUrlsInText(url, originUrlRewrite);
|
|
217
|
+
}
|
|
90
218
|
const filename = basename(new URL(url, "http://local.invalid").pathname) || `attachment-${id}`;
|
|
91
219
|
index.set(id, {
|
|
92
220
|
sourceUrl: url,
|
|
@@ -171,10 +299,27 @@ function collectInlineAssets(html, attachmentIndex, seenUrls, exportedAt) {
|
|
|
171
299
|
}
|
|
172
300
|
return assets;
|
|
173
301
|
}
|
|
302
|
+
function preprocessContent(rawHtml, options) {
|
|
303
|
+
let html = rawHtml;
|
|
304
|
+
if (options.originUrlRewrite) {
|
|
305
|
+
html = rewriteOriginUrlsInText(html, options.originUrlRewrite);
|
|
306
|
+
}
|
|
307
|
+
if (options.flattenBuilders !== false) {
|
|
308
|
+
html = flattenWordPressBuilders(html).html;
|
|
309
|
+
}
|
|
310
|
+
return html;
|
|
311
|
+
}
|
|
312
|
+
function resolveFeaturedAssetSourceId(thumbnailId, attachmentIndex, contentHtml) {
|
|
313
|
+
if (thumbnailId && attachmentIndex.has(thumbnailId)) {
|
|
314
|
+
return thumbnailId;
|
|
315
|
+
}
|
|
316
|
+
const firstInline = discoverContentAssetUrls(contentHtml)[0];
|
|
317
|
+
return firstInline ? `url:${firstInline}` : void 0;
|
|
318
|
+
}
|
|
174
319
|
async function* enumerateWxrEntities(options) {
|
|
175
320
|
const xml = await readFile(options.filePath, "utf8");
|
|
176
321
|
const items = parseItems(xml);
|
|
177
|
-
const attachmentIndex = buildAttachmentIndex(items);
|
|
322
|
+
const attachmentIndex = buildAttachmentIndex(items, options.originUrlRewrite);
|
|
178
323
|
const { categories, tags } = collectTaxonomies(items);
|
|
179
324
|
const seenAssetUrls = /* @__PURE__ */ new Set();
|
|
180
325
|
const emittedAttachmentIds = /* @__PURE__ */ new Set();
|
|
@@ -203,9 +348,9 @@ async function* enumerateWxrEntities(options) {
|
|
|
203
348
|
const id = textValue(item.post_id);
|
|
204
349
|
const link = textValue(item.link);
|
|
205
350
|
const slug = sanitizeSlug(textValue(item.post_name) || textValue(item.title) || id);
|
|
206
|
-
const
|
|
351
|
+
const contentHtml = preprocessContent(getContentEncoded(item), options);
|
|
207
352
|
for (const asset of collectInlineAssets(
|
|
208
|
-
|
|
353
|
+
contentHtml,
|
|
209
354
|
attachmentIndex,
|
|
210
355
|
seenAssetUrls,
|
|
211
356
|
options.exportedAt
|
|
@@ -223,10 +368,11 @@ async function* enumerateWxrEntities(options) {
|
|
|
223
368
|
}
|
|
224
369
|
if (postType === "post") {
|
|
225
370
|
const thumbnailId = getPostMeta(item, "_thumbnail_id");
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
371
|
+
const featuredAssetSourceId = resolveFeaturedAssetSourceId(
|
|
372
|
+
thumbnailId,
|
|
373
|
+
attachmentIndex,
|
|
374
|
+
contentHtml
|
|
375
|
+
);
|
|
230
376
|
const post = {
|
|
231
377
|
type: "post",
|
|
232
378
|
source: sourceMeta(id, link, options.exportedAt),
|
|
@@ -234,7 +380,7 @@ async function* enumerateWxrEntities(options) {
|
|
|
234
380
|
title: textValue(item.title) || slug,
|
|
235
381
|
slug,
|
|
236
382
|
excerpt: getExcerpt(item) || void 0,
|
|
237
|
-
contentHtml
|
|
383
|
+
contentHtml,
|
|
238
384
|
publishedAt: textValue(item.post_date) || void 0,
|
|
239
385
|
status: mapPublishStatus(textValue(item.status)),
|
|
240
386
|
categorySlugs: categorySlugs.length ? categorySlugs : void 0,
|
|
@@ -251,7 +397,7 @@ async function* enumerateWxrEntities(options) {
|
|
|
251
397
|
sourceId: id,
|
|
252
398
|
title: textValue(item.title) || slug,
|
|
253
399
|
slug,
|
|
254
|
-
contentHtml
|
|
400
|
+
contentHtml,
|
|
255
401
|
isHomePage: isHomePage || void 0,
|
|
256
402
|
status: mapPublishStatus(textValue(item.status))
|
|
257
403
|
};
|
|
@@ -291,18 +437,25 @@ async function validateWxrFile(filePath) {
|
|
|
291
437
|
}
|
|
292
438
|
|
|
293
439
|
// src/parsers/wordpress/index.ts
|
|
294
|
-
function
|
|
295
|
-
if (typeof input === "string")
|
|
440
|
+
function resolveWxrOptions(input) {
|
|
441
|
+
if (typeof input === "string") {
|
|
442
|
+
return { filePath: input };
|
|
443
|
+
}
|
|
296
444
|
if (input && typeof input === "object" && "path" in input) {
|
|
297
|
-
|
|
445
|
+
const obj = input;
|
|
446
|
+
return {
|
|
447
|
+
filePath: String(obj.path),
|
|
448
|
+
originUrlRewrite: obj.originUrlRewrite,
|
|
449
|
+
flattenBuilders: obj.flattenBuilders
|
|
450
|
+
};
|
|
298
451
|
}
|
|
299
|
-
throw new Error("WordPress adapter requires input path (string or { path })");
|
|
452
|
+
throw new Error("WordPress adapter requires input path (string or { path, originUrlRewrite?, flattenBuilders? })");
|
|
300
453
|
}
|
|
301
454
|
var wordpressAdapter = {
|
|
302
455
|
platform: "wordpress",
|
|
303
456
|
async validateInput(input) {
|
|
304
|
-
const
|
|
305
|
-
const result = await validateWxrFile(
|
|
457
|
+
const { filePath } = resolveWxrOptions(input);
|
|
458
|
+
const result = await validateWxrFile(filePath);
|
|
306
459
|
return {
|
|
307
460
|
ok: result.ok,
|
|
308
461
|
issues: result.issues,
|
|
@@ -310,8 +463,7 @@ var wordpressAdapter = {
|
|
|
310
463
|
};
|
|
311
464
|
},
|
|
312
465
|
enumerateEntities(ctx) {
|
|
313
|
-
|
|
314
|
-
return enumerateWxrEntities({ filePath: path });
|
|
466
|
+
return enumerateWxrEntities(resolveWxrOptions(ctx.input));
|
|
315
467
|
}
|
|
316
468
|
};
|
|
317
469
|
|
|
@@ -2272,6 +2424,8 @@ function getAdapter(platform) {
|
|
|
2272
2424
|
}
|
|
2273
2425
|
|
|
2274
2426
|
export {
|
|
2427
|
+
rewriteOriginUrlsInText,
|
|
2428
|
+
createWpContentGatewayRewrite,
|
|
2275
2429
|
wordpressAdapter,
|
|
2276
2430
|
SMUGMUG_API_BASE,
|
|
2277
2431
|
SMUGMUG_OAUTH_ENDPOINTS,
|
|
@@ -2287,4 +2441,4 @@ export {
|
|
|
2287
2441
|
wixAdapter,
|
|
2288
2442
|
getAdapter
|
|
2289
2443
|
};
|
|
2290
|
-
//# sourceMappingURL=chunk-
|
|
2444
|
+
//# sourceMappingURL=chunk-QEXTXHFG.js.map
|