@artinstack/migrator 0.1.3 → 0.1.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +11 -0
- package/dist/{chunk-QEXTXHFG.js → chunk-Q6M5JEL3.js} +133 -41
- package/dist/chunk-Q6M5JEL3.js.map +1 -0
- package/dist/{chunk-HH7666MQ.js → chunk-XKWWXKP3.js} +124 -7
- package/dist/chunk-XKWWXKP3.js.map +1 -0
- package/dist/cli/index.js +41 -8
- package/dist/cli/index.js.map +1 -1
- package/dist/index.js +2 -2
- package/dist/sinks/index.js +1 -1
- package/package.json +1 -1
- package/dist/chunk-HH7666MQ.js.map +0 -1
- package/dist/chunk-QEXTXHFG.js.map +0 -1
package/README.md
CHANGED
|
@@ -11,6 +11,7 @@ See [docs/architecture.md](./docs/architecture.md) for the high-level blueprint:
|
|
|
11
11
|
```
|
|
12
12
|
src/
|
|
13
13
|
parsers/ WordPress, SmugMug, Squarespace, Wix → normalizer DTOs
|
|
14
|
+
wordpress/ WXR parse, builder flattening (theme registry)
|
|
14
15
|
normalizer/ Canonical DTOs + portable idempotency types
|
|
15
16
|
sinks/ filesystem export, MigrationSink interface
|
|
16
17
|
cli/ artinstack-migrate
|
|
@@ -56,6 +57,8 @@ artinstack-migrate validate <platform> <export-file>
|
|
|
56
57
|
| `--dry-run` | Parse and analyze only; no export files |
|
|
57
58
|
| `--report <dir>` | With `--dry-run`, write `conflicts.json` and `migration-report.json` |
|
|
58
59
|
| `--offline` | Skip network HEAD requests for asset size estimates |
|
|
60
|
+
| `--rewrite-gateway <url>` | WordPress: legacy API-gateway base (use with `--rewrite-public`) |
|
|
61
|
+
| `--rewrite-public <url>` | WordPress: public origin for `/wp-content/` asset paths |
|
|
59
62
|
| `--sink filesystem` | Run through `MigrationSink` before writing (requires `--out`) |
|
|
60
63
|
| `--urls <file>` | Wix only: URL list or `sitemap.xml` for static page snapshots |
|
|
61
64
|
|
|
@@ -68,6 +71,12 @@ artinstack-migrate wordpress export.xml --out ./output
|
|
|
68
71
|
# Preview conflicts without writing content
|
|
69
72
|
artinstack-migrate wordpress export.xml --dry-run --report ./preview/
|
|
70
73
|
|
|
74
|
+
# WordPress: rewrite legacy gateway URLs before dry-run / export (e.g. API Gateway → public CDN)
|
|
75
|
+
artinstack-migrate wordpress export.xml \
|
|
76
|
+
--rewrite-gateway "https://gateway.example/prod" \
|
|
77
|
+
--rewrite-public "https://www.example.com" \
|
|
78
|
+
--dry-run --report ./preview/
|
|
79
|
+
|
|
71
80
|
# Validate export structure (JSON result on stdout, exit 0/1)
|
|
72
81
|
artinstack-migrate validate wordpress export.xml
|
|
73
82
|
|
|
@@ -119,8 +128,10 @@ pnpm dev # watch build
|
|
|
119
128
|
| Piece | `@artinstack/migrator` | Host application |
|
|
120
129
|
|-------|------------------------|------------------|
|
|
121
130
|
| Parsers + normalizer DTOs | Yes | No |
|
|
131
|
+
| WordPress builder flattening + origin URL rewrite (pre-DTO) | Yes | Optional same config on adapter input |
|
|
122
132
|
| CLI + filesystem JSON export | Yes | No |
|
|
123
133
|
| `MigrationSink` interface | Yes | Implementation |
|
|
134
|
+
| Dynamic shortcodes (`[portfolio]`, `[recent_posts]`), forms, sanitize | No | Yes |
|
|
124
135
|
| Jobs, worker, credentials, UI | No | Yes |
|
|
125
136
|
|
|
126
137
|
## License
|
|
@@ -1,11 +1,12 @@
|
|
|
1
1
|
import {
|
|
2
2
|
SquarespaceCollectionClient,
|
|
3
|
+
WORDPRESS_BUILDER_REGISTRY,
|
|
3
4
|
enumerateSquarespaceEntities,
|
|
4
5
|
linkToPath,
|
|
5
6
|
sanitizeSlug,
|
|
6
7
|
summarizeSquarespaceExport,
|
|
7
8
|
validateSquarespaceExportFile
|
|
8
|
-
} from "./chunk-
|
|
9
|
+
} from "./chunk-XKWWXKP3.js";
|
|
9
10
|
import {
|
|
10
11
|
discoverContentAssetUrls,
|
|
11
12
|
normalizeAssetUrl
|
|
@@ -43,45 +44,52 @@ import { readFile } from "fs/promises";
|
|
|
43
44
|
import { basename } from "path";
|
|
44
45
|
import { XMLParser } from "fast-xml-parser";
|
|
45
46
|
|
|
46
|
-
// src/parsers/wordpress/builders/registry.ts
|
|
47
|
-
var WORDPRESS_BUILDER_REGISTRY = [
|
|
48
|
-
{
|
|
49
|
-
id: "tatsu",
|
|
50
|
-
detect: /\[(?:\/)?tatsu_/i,
|
|
51
|
-
contentRules: [
|
|
52
|
-
{ shortcodePrefix: "tatsu_image", urlParams: ["image", "url", "src"], tag: "img" },
|
|
53
|
-
{ shortcodePrefix: "tatsu_video", urlParams: ["video", "src", "url"], tag: "video" }
|
|
54
|
-
],
|
|
55
|
-
scaffoldingPrefix: "tatsu_"
|
|
56
|
-
},
|
|
57
|
-
{
|
|
58
|
-
id: "divi",
|
|
59
|
-
detect: /\[(?:\/)?et_pb_/i,
|
|
60
|
-
contentRules: [{ shortcodePrefix: "et_pb_image", urlParams: ["src", "url"], tag: "img" }],
|
|
61
|
-
scaffoldingPrefix: "et_pb_"
|
|
62
|
-
},
|
|
63
|
-
{
|
|
64
|
-
id: "elementor",
|
|
65
|
-
detect: /\[(?:\/)?elementor[-_]/i,
|
|
66
|
-
contentRules: [
|
|
67
|
-
{ shortcodePrefix: "elementor-widget", urlParams: ["url", "src", "image"], tag: "img" }
|
|
68
|
-
],
|
|
69
|
-
scaffoldingPrefix: "elementor_"
|
|
70
|
-
}
|
|
71
|
-
];
|
|
72
|
-
|
|
73
47
|
// src/parsers/wordpress/builders/flatten.ts
|
|
74
48
|
function escapeRegExp(value) {
|
|
75
49
|
return value.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
|
|
76
50
|
}
|
|
51
|
+
function extractQuotedParam(params, name) {
|
|
52
|
+
const pattern = new RegExp(`\\b${escapeRegExp(name)}\\s*=\\s*`, "i");
|
|
53
|
+
const match = pattern.exec(params);
|
|
54
|
+
if (!match) return void 0;
|
|
55
|
+
let index = match.index + match[0].length;
|
|
56
|
+
while (index < params.length && /\s/.test(params[index])) index += 1;
|
|
57
|
+
const quote = params[index];
|
|
58
|
+
if (quote !== '"' && quote !== "'") return void 0;
|
|
59
|
+
index += 1;
|
|
60
|
+
let value = "";
|
|
61
|
+
while (index < params.length) {
|
|
62
|
+
const char = params[index];
|
|
63
|
+
if (char === "\\" && index + 1 < params.length) {
|
|
64
|
+
value += params[index + 1];
|
|
65
|
+
index += 2;
|
|
66
|
+
continue;
|
|
67
|
+
}
|
|
68
|
+
if (char === quote) break;
|
|
69
|
+
value += char;
|
|
70
|
+
index += 1;
|
|
71
|
+
}
|
|
72
|
+
const trimmed = value.trim();
|
|
73
|
+
return trimmed || void 0;
|
|
74
|
+
}
|
|
77
75
|
function extractShortcodeParam(params, names) {
|
|
78
76
|
for (const name of names) {
|
|
79
|
-
const
|
|
80
|
-
|
|
81
|
-
if (match?.[1]?.trim()) return match[1].trim();
|
|
77
|
+
const value = extractQuotedParam(params, name);
|
|
78
|
+
if (value) return value;
|
|
82
79
|
}
|
|
83
80
|
return void 0;
|
|
84
81
|
}
|
|
82
|
+
function escapeHtmlText(text) {
|
|
83
|
+
return text.replace(/&/g, "&").replace(/</g, "<").replace(/>/g, ">").replace(/"/g, """);
|
|
84
|
+
}
|
|
85
|
+
function textToHtml(text, tag) {
|
|
86
|
+
const paragraphs = text.split(/\n{2,}/).map((part) => part.trim()).filter(Boolean);
|
|
87
|
+
if (paragraphs.length === 0) return "";
|
|
88
|
+
return paragraphs.map((paragraph) => {
|
|
89
|
+
const inner = escapeHtmlText(paragraph).replace(/\n/g, "<br />");
|
|
90
|
+
return `<${tag}>${inner}</${tag}>`;
|
|
91
|
+
}).join("\n");
|
|
92
|
+
}
|
|
85
93
|
function emitHtmlTag(tag, url) {
|
|
86
94
|
const normalized = normalizeAssetUrl(url) ?? url;
|
|
87
95
|
const escaped = normalized.replace(/&/g, "&").replace(/"/g, """).replace(/</g, "<");
|
|
@@ -94,10 +102,10 @@ function emitHtmlTag(tag, url) {
|
|
|
94
102
|
return `<iframe src="${escaped}" loading="lazy"></iframe>`;
|
|
95
103
|
}
|
|
96
104
|
}
|
|
97
|
-
function
|
|
105
|
+
function convertUrlRule(content, rule) {
|
|
98
106
|
const prefix = escapeRegExp(rule.shortcodePrefix);
|
|
99
107
|
const pattern = new RegExp(
|
|
100
|
-
`\\[${prefix}([^\\]]*)\\]\\s*(?:\\[\\/${prefix}[^\\]]*\\])?`,
|
|
108
|
+
`\\[${prefix}\\b([^\\]]*)\\]\\s*(?:\\[\\/${prefix}\\b[^\\]]*\\])?`,
|
|
101
109
|
"gi"
|
|
102
110
|
);
|
|
103
111
|
return content.replace(pattern, (block, params) => {
|
|
@@ -106,12 +114,63 @@ function convertContentBlocker(content, rule) {
|
|
|
106
114
|
return emitHtmlTag(rule.tag, url);
|
|
107
115
|
});
|
|
108
116
|
}
|
|
109
|
-
function
|
|
117
|
+
function convertTextRule(content, rule) {
|
|
118
|
+
const prefix = escapeRegExp(rule.shortcodePrefix);
|
|
119
|
+
const pattern = new RegExp(
|
|
120
|
+
`\\[${prefix}\\b([^\\]]*)\\]\\s*(?:\\[\\/${prefix}\\b[^\\]]*\\])?`,
|
|
121
|
+
"gis"
|
|
122
|
+
);
|
|
123
|
+
return content.replace(pattern, (block, params) => {
|
|
124
|
+
const parts = [];
|
|
125
|
+
for (const field of rule.fields) {
|
|
126
|
+
const text = extractQuotedParam(params, field.param);
|
|
127
|
+
if (!text) continue;
|
|
128
|
+
const html = textToHtml(text, field.tag);
|
|
129
|
+
if (html) parts.push(html);
|
|
130
|
+
}
|
|
131
|
+
return parts.length > 0 ? parts.join("\n") : block;
|
|
132
|
+
});
|
|
133
|
+
}
|
|
134
|
+
function convertWrapperRule(content, rule) {
|
|
135
|
+
const prefix = escapeRegExp(rule.shortcodePrefix);
|
|
136
|
+
const pattern = new RegExp(
|
|
137
|
+
`\\[${prefix}\\b([^\\]]*)\\]([\\s\\S]*?)\\[\\/${prefix}\\b[^\\]]*\\]`,
|
|
138
|
+
"gi"
|
|
139
|
+
);
|
|
140
|
+
return content.replace(pattern, (_, params, inner) => {
|
|
141
|
+
const parts = [];
|
|
142
|
+
if (rule.urlParams?.length) {
|
|
143
|
+
const url = extractShortcodeParam(params, rule.urlParams);
|
|
144
|
+
if (url) parts.push(emitHtmlTag("img", url));
|
|
145
|
+
}
|
|
146
|
+
parts.push(inner.trim());
|
|
147
|
+
return parts.filter(Boolean).join("\n");
|
|
148
|
+
});
|
|
149
|
+
}
|
|
150
|
+
function convertPlaceholderRule(content, rule) {
|
|
151
|
+
const prefix = escapeRegExp(rule.shortcodePrefix);
|
|
152
|
+
const pattern = new RegExp(
|
|
153
|
+
`\\[${prefix}\\b([^\\]]*)\\]\\s*(?:\\[\\/${prefix}\\b[^\\]]*\\])?`,
|
|
154
|
+
"gi"
|
|
155
|
+
);
|
|
156
|
+
return content.replace(pattern, rule.html);
|
|
157
|
+
}
|
|
158
|
+
function stripScaffoldingPrefix(content, prefix) {
|
|
110
159
|
const escaped = escapeRegExp(prefix);
|
|
111
160
|
const opener = new RegExp(`\\[${escaped}[a-z0-9_-]*[^\\]]*\\]`, "gi");
|
|
112
161
|
const closer = new RegExp(`\\[\\/${escaped}[a-z0-9_-]*[^\\]]*\\]`, "gi");
|
|
113
162
|
return content.replace(opener, "").replace(closer, "");
|
|
114
163
|
}
|
|
164
|
+
function stripLegacyTokens(content, tokens) {
|
|
165
|
+
let result = content;
|
|
166
|
+
for (const token of tokens) {
|
|
167
|
+
const escaped = escapeRegExp(token);
|
|
168
|
+
const opener = new RegExp(`\\[${escaped}\\b[^\\]]*\\]`, "gi");
|
|
169
|
+
const closer = new RegExp(`\\[\\/${escaped}\\b[^\\]]*\\]`, "gi");
|
|
170
|
+
result = result.replace(opener, "").replace(closer, "");
|
|
171
|
+
}
|
|
172
|
+
return result;
|
|
173
|
+
}
|
|
115
174
|
function detectThemes(content, registry) {
|
|
116
175
|
return registry.filter((theme) => theme.detect.test(content));
|
|
117
176
|
}
|
|
@@ -126,10 +185,24 @@ function flattenWordPressBuilders(content, options = {}) {
|
|
|
126
185
|
}
|
|
127
186
|
let html = content;
|
|
128
187
|
for (const theme of themes) {
|
|
129
|
-
for (const rule of theme.
|
|
130
|
-
html =
|
|
188
|
+
for (const rule of theme.wrapperRules ?? []) {
|
|
189
|
+
html = convertWrapperRule(html, rule);
|
|
190
|
+
}
|
|
191
|
+
for (const rule of theme.textRules ?? []) {
|
|
192
|
+
html = convertTextRule(html, rule);
|
|
193
|
+
}
|
|
194
|
+
for (const rule of theme.urlRules ?? []) {
|
|
195
|
+
html = convertUrlRule(html, rule);
|
|
196
|
+
}
|
|
197
|
+
for (const rule of theme.placeholderRules ?? []) {
|
|
198
|
+
html = convertPlaceholderRule(html, rule);
|
|
199
|
+
}
|
|
200
|
+
for (const prefix of theme.scaffoldingPrefixes ?? []) {
|
|
201
|
+
html = stripScaffoldingPrefix(html, prefix);
|
|
202
|
+
}
|
|
203
|
+
if (theme.legacyScaffoldingTokens?.length) {
|
|
204
|
+
html = stripLegacyTokens(html, theme.legacyScaffoldingTokens);
|
|
131
205
|
}
|
|
132
|
-
html = stripScaffolding(html, theme.scaffoldingPrefix);
|
|
133
206
|
}
|
|
134
207
|
html = html.replace(/\n{3,}/g, "\n\n").trim();
|
|
135
208
|
return {
|
|
@@ -140,6 +213,14 @@ function flattenWordPressBuilders(content, options = {}) {
|
|
|
140
213
|
|
|
141
214
|
// src/parsers/wordpress/parse-wxr.ts
|
|
142
215
|
var PLATFORM = "wordpress";
|
|
216
|
+
var WOOCOMMERCE_STUB_PAGE_SLUGS = /* @__PURE__ */ new Set(["cart", "checkout", "my-account"]);
|
|
217
|
+
var WOOCOMMERCE_STUB_SHORTCODE = /^\[woocommerce_(?:cart|checkout|my_account)\]\s*$/i;
|
|
218
|
+
function isWooCommerceStubPage(slug, contentHtml) {
|
|
219
|
+
if (WOOCOMMERCE_STUB_PAGE_SLUGS.has(slug)) return true;
|
|
220
|
+
const trimmed = contentHtml.trim();
|
|
221
|
+
if (!trimmed) return false;
|
|
222
|
+
return WOOCOMMERCE_STUB_SHORTCODE.test(trimmed);
|
|
223
|
+
}
|
|
143
224
|
function asArray(value) {
|
|
144
225
|
if (value === void 0) return [];
|
|
145
226
|
return Array.isArray(value) ? value : [value];
|
|
@@ -316,6 +397,11 @@ function resolveFeaturedAssetSourceId(thumbnailId, attachmentIndex, contentHtml)
|
|
|
316
397
|
const firstInline = discoverContentAssetUrls(contentHtml)[0];
|
|
317
398
|
return firstInline ? `url:${firstInline}` : void 0;
|
|
318
399
|
}
|
|
400
|
+
function maybeRewriteUrl(url, config) {
|
|
401
|
+
if (!url) return void 0;
|
|
402
|
+
if (!config) return url;
|
|
403
|
+
return rewriteOriginUrlsInText(url, config);
|
|
404
|
+
}
|
|
319
405
|
async function* enumerateWxrEntities(options) {
|
|
320
406
|
const xml = await readFile(options.filePath, "utf8");
|
|
321
407
|
const items = parseItems(xml);
|
|
@@ -346,9 +432,12 @@ async function* enumerateWxrEntities(options) {
|
|
|
346
432
|
const postType = textValue(item.post_type);
|
|
347
433
|
if (postType !== "post" && postType !== "page") continue;
|
|
348
434
|
const id = textValue(item.post_id);
|
|
349
|
-
const link = textValue(item.link);
|
|
435
|
+
const link = maybeRewriteUrl(textValue(item.link), options.originUrlRewrite);
|
|
350
436
|
const slug = sanitizeSlug(textValue(item.post_name) || textValue(item.title) || id);
|
|
351
437
|
const contentHtml = preprocessContent(getContentEncoded(item), options);
|
|
438
|
+
if (postType === "page" && options.skipWooCommerceStubPages !== false && isWooCommerceStubPage(slug, contentHtml)) {
|
|
439
|
+
continue;
|
|
440
|
+
}
|
|
352
441
|
for (const asset of collectInlineAssets(
|
|
353
442
|
contentHtml,
|
|
354
443
|
attachmentIndex,
|
|
@@ -446,10 +535,13 @@ function resolveWxrOptions(input) {
|
|
|
446
535
|
return {
|
|
447
536
|
filePath: String(obj.path),
|
|
448
537
|
originUrlRewrite: obj.originUrlRewrite,
|
|
449
|
-
flattenBuilders: obj.flattenBuilders
|
|
538
|
+
flattenBuilders: obj.flattenBuilders,
|
|
539
|
+
skipWooCommerceStubPages: obj.skipWooCommerceStubPages
|
|
450
540
|
};
|
|
451
541
|
}
|
|
452
|
-
throw new Error(
|
|
542
|
+
throw new Error(
|
|
543
|
+
"WordPress adapter requires input path (string or { path, originUrlRewrite?, flattenBuilders?, skipWooCommerceStubPages? })"
|
|
544
|
+
);
|
|
453
545
|
}
|
|
454
546
|
var wordpressAdapter = {
|
|
455
547
|
platform: "wordpress",
|
|
@@ -2441,4 +2533,4 @@ export {
|
|
|
2441
2533
|
wixAdapter,
|
|
2442
2534
|
getAdapter
|
|
2443
2535
|
};
|
|
2444
|
-
//# sourceMappingURL=chunk-
|
|
2536
|
+
//# sourceMappingURL=chunk-Q6M5JEL3.js.map
|