@artinstack/migrator 0.1.3 → 0.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -11,6 +11,7 @@ See [docs/architecture.md](./docs/architecture.md) for the high-level blueprint:
11
11
  ```
12
12
  src/
13
13
  parsers/ WordPress, SmugMug, Squarespace, Wix → normalizer DTOs
14
+ wordpress/ WXR parse, builder flattening (theme registry)
14
15
  normalizer/ Canonical DTOs + portable idempotency types
15
16
  sinks/ filesystem export, MigrationSink interface
16
17
  cli/ artinstack-migrate
@@ -56,6 +57,8 @@ artinstack-migrate validate <platform> <export-file>
56
57
  | `--dry-run` | Parse and analyze only; no export files |
57
58
  | `--report <dir>` | With `--dry-run`, write `conflicts.json` and `migration-report.json` |
58
59
  | `--offline` | Skip network HEAD requests for asset size estimates |
60
+ | `--rewrite-gateway <url>` | WordPress: legacy API-gateway base (use with `--rewrite-public`) |
61
+ | `--rewrite-public <url>` | WordPress: public origin for `/wp-content/` asset paths |
59
62
  | `--sink filesystem` | Run through `MigrationSink` before writing (requires `--out`) |
60
63
  | `--urls <file>` | Wix only: URL list or `sitemap.xml` for static page snapshots |
61
64
 
@@ -68,6 +71,12 @@ artinstack-migrate wordpress export.xml --out ./output
68
71
  # Preview conflicts without writing content
69
72
  artinstack-migrate wordpress export.xml --dry-run --report ./preview/
70
73
 
74
+ # WordPress: rewrite legacy gateway URLs before dry-run / export (e.g. API Gateway → public CDN)
75
+ artinstack-migrate wordpress export.xml \
76
+ --rewrite-gateway "https://gateway.example/prod" \
77
+ --rewrite-public "https://www.example.com" \
78
+ --dry-run --report ./preview/
79
+
71
80
  # Validate export structure (JSON result on stdout, exit 0/1)
72
81
  artinstack-migrate validate wordpress export.xml
73
82
 
@@ -119,8 +128,10 @@ pnpm dev # watch build
119
128
  | Piece | `@artinstack/migrator` | Host application |
120
129
  |-------|------------------------|------------------|
121
130
  | Parsers + normalizer DTOs | Yes | No |
131
+ | WordPress builder flattening + origin URL rewrite (pre-DTO) | Yes | Optional same config on adapter input |
122
132
  | CLI + filesystem JSON export | Yes | No |
123
133
  | `MigrationSink` interface | Yes | Implementation |
134
+ | Dynamic shortcodes (`[portfolio]`, `[recent_posts]`), forms, sanitize | No | Yes |
124
135
  | Jobs, worker, credentials, UI | No | Yes |
125
136
 
126
137
  ## License
@@ -1,11 +1,12 @@
1
1
  import {
2
2
  SquarespaceCollectionClient,
3
+ WORDPRESS_BUILDER_REGISTRY,
3
4
  enumerateSquarespaceEntities,
4
5
  linkToPath,
5
6
  sanitizeSlug,
6
7
  summarizeSquarespaceExport,
7
8
  validateSquarespaceExportFile
8
- } from "./chunk-HH7666MQ.js";
9
+ } from "./chunk-XKWWXKP3.js";
9
10
  import {
10
11
  discoverContentAssetUrls,
11
12
  normalizeAssetUrl
@@ -43,45 +44,52 @@ import { readFile } from "fs/promises";
43
44
  import { basename } from "path";
44
45
  import { XMLParser } from "fast-xml-parser";
45
46
 
46
- // src/parsers/wordpress/builders/registry.ts
47
- var WORDPRESS_BUILDER_REGISTRY = [
48
- {
49
- id: "tatsu",
50
- detect: /\[(?:\/)?tatsu_/i,
51
- contentRules: [
52
- { shortcodePrefix: "tatsu_image", urlParams: ["image", "url", "src"], tag: "img" },
53
- { shortcodePrefix: "tatsu_video", urlParams: ["video", "src", "url"], tag: "video" }
54
- ],
55
- scaffoldingPrefix: "tatsu_"
56
- },
57
- {
58
- id: "divi",
59
- detect: /\[(?:\/)?et_pb_/i,
60
- contentRules: [{ shortcodePrefix: "et_pb_image", urlParams: ["src", "url"], tag: "img" }],
61
- scaffoldingPrefix: "et_pb_"
62
- },
63
- {
64
- id: "elementor",
65
- detect: /\[(?:\/)?elementor[-_]/i,
66
- contentRules: [
67
- { shortcodePrefix: "elementor-widget", urlParams: ["url", "src", "image"], tag: "img" }
68
- ],
69
- scaffoldingPrefix: "elementor_"
70
- }
71
- ];
72
-
73
47
  // src/parsers/wordpress/builders/flatten.ts
74
48
  function escapeRegExp(value) {
75
49
  return value.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
76
50
  }
51
+ function extractQuotedParam(params, name) {
52
+ const pattern = new RegExp(`\\b${escapeRegExp(name)}\\s*=\\s*`, "i");
53
+ const match = pattern.exec(params);
54
+ if (!match) return void 0;
55
+ let index = match.index + match[0].length;
56
+ while (index < params.length && /\s/.test(params[index])) index += 1;
57
+ const quote = params[index];
58
+ if (quote !== '"' && quote !== "'") return void 0;
59
+ index += 1;
60
+ let value = "";
61
+ while (index < params.length) {
62
+ const char = params[index];
63
+ if (char === "\\" && index + 1 < params.length) {
64
+ value += params[index + 1];
65
+ index += 2;
66
+ continue;
67
+ }
68
+ if (char === quote) break;
69
+ value += char;
70
+ index += 1;
71
+ }
72
+ const trimmed = value.trim();
73
+ return trimmed || void 0;
74
+ }
77
75
  function extractShortcodeParam(params, names) {
78
76
  for (const name of names) {
79
- const pattern = new RegExp(`\\b${escapeRegExp(name)}\\s*=\\s*["']([^"']+)["']`, "i");
80
- const match = params.match(pattern);
81
- if (match?.[1]?.trim()) return match[1].trim();
77
+ const value = extractQuotedParam(params, name);
78
+ if (value) return value;
82
79
  }
83
80
  return void 0;
84
81
  }
82
+ function escapeHtmlText(text) {
83
+ return text.replace(/&/g, "&amp;").replace(/</g, "&lt;").replace(/>/g, "&gt;").replace(/"/g, "&quot;");
84
+ }
85
+ function textToHtml(text, tag) {
86
+ const paragraphs = text.split(/\n{2,}/).map((part) => part.trim()).filter(Boolean);
87
+ if (paragraphs.length === 0) return "";
88
+ return paragraphs.map((paragraph) => {
89
+ const inner = escapeHtmlText(paragraph).replace(/\n/g, "<br />");
90
+ return `<${tag}>${inner}</${tag}>`;
91
+ }).join("\n");
92
+ }
85
93
  function emitHtmlTag(tag, url) {
86
94
  const normalized = normalizeAssetUrl(url) ?? url;
87
95
  const escaped = normalized.replace(/&/g, "&amp;").replace(/"/g, "&quot;").replace(/</g, "&lt;");
@@ -94,10 +102,10 @@ function emitHtmlTag(tag, url) {
94
102
  return `<iframe src="${escaped}" loading="lazy"></iframe>`;
95
103
  }
96
104
  }
97
- function convertContentBlocker(content, rule) {
105
+ function convertUrlRule(content, rule) {
98
106
  const prefix = escapeRegExp(rule.shortcodePrefix);
99
107
  const pattern = new RegExp(
100
- `\\[${prefix}([^\\]]*)\\]\\s*(?:\\[\\/${prefix}[^\\]]*\\])?`,
108
+ `\\[${prefix}\\b([^\\]]*)\\]\\s*(?:\\[\\/${prefix}\\b[^\\]]*\\])?`,
101
109
  "gi"
102
110
  );
103
111
  return content.replace(pattern, (block, params) => {
@@ -106,12 +114,63 @@ function convertContentBlocker(content, rule) {
106
114
  return emitHtmlTag(rule.tag, url);
107
115
  });
108
116
  }
109
- function stripScaffolding(content, prefix) {
117
+ function convertTextRule(content, rule) {
118
+ const prefix = escapeRegExp(rule.shortcodePrefix);
119
+ const pattern = new RegExp(
120
+ `\\[${prefix}\\b([^\\]]*)\\]\\s*(?:\\[\\/${prefix}\\b[^\\]]*\\])?`,
121
+ "gis"
122
+ );
123
+ return content.replace(pattern, (block, params) => {
124
+ const parts = [];
125
+ for (const field of rule.fields) {
126
+ const text = extractQuotedParam(params, field.param);
127
+ if (!text) continue;
128
+ const html = textToHtml(text, field.tag);
129
+ if (html) parts.push(html);
130
+ }
131
+ return parts.length > 0 ? parts.join("\n") : block;
132
+ });
133
+ }
134
+ function convertWrapperRule(content, rule) {
135
+ const prefix = escapeRegExp(rule.shortcodePrefix);
136
+ const pattern = new RegExp(
137
+ `\\[${prefix}\\b([^\\]]*)\\]([\\s\\S]*?)\\[\\/${prefix}\\b[^\\]]*\\]`,
138
+ "gi"
139
+ );
140
+ return content.replace(pattern, (_, params, inner) => {
141
+ const parts = [];
142
+ if (rule.urlParams?.length) {
143
+ const url = extractShortcodeParam(params, rule.urlParams);
144
+ if (url) parts.push(emitHtmlTag("img", url));
145
+ }
146
+ parts.push(inner.trim());
147
+ return parts.filter(Boolean).join("\n");
148
+ });
149
+ }
150
+ function convertPlaceholderRule(content, rule) {
151
+ const prefix = escapeRegExp(rule.shortcodePrefix);
152
+ const pattern = new RegExp(
153
+ `\\[${prefix}\\b([^\\]]*)\\]\\s*(?:\\[\\/${prefix}\\b[^\\]]*\\])?`,
154
+ "gi"
155
+ );
156
+ return content.replace(pattern, rule.html);
157
+ }
158
+ function stripScaffoldingPrefix(content, prefix) {
110
159
  const escaped = escapeRegExp(prefix);
111
160
  const opener = new RegExp(`\\[${escaped}[a-z0-9_-]*[^\\]]*\\]`, "gi");
112
161
  const closer = new RegExp(`\\[\\/${escaped}[a-z0-9_-]*[^\\]]*\\]`, "gi");
113
162
  return content.replace(opener, "").replace(closer, "");
114
163
  }
164
+ function stripLegacyTokens(content, tokens) {
165
+ let result = content;
166
+ for (const token of tokens) {
167
+ const escaped = escapeRegExp(token);
168
+ const opener = new RegExp(`\\[${escaped}\\b[^\\]]*\\]`, "gi");
169
+ const closer = new RegExp(`\\[\\/${escaped}\\b[^\\]]*\\]`, "gi");
170
+ result = result.replace(opener, "").replace(closer, "");
171
+ }
172
+ return result;
173
+ }
115
174
  function detectThemes(content, registry) {
116
175
  return registry.filter((theme) => theme.detect.test(content));
117
176
  }
@@ -126,10 +185,24 @@ function flattenWordPressBuilders(content, options = {}) {
126
185
  }
127
186
  let html = content;
128
187
  for (const theme of themes) {
129
- for (const rule of theme.contentRules) {
130
- html = convertContentBlocker(html, rule);
188
+ for (const rule of theme.wrapperRules ?? []) {
189
+ html = convertWrapperRule(html, rule);
190
+ }
191
+ for (const rule of theme.textRules ?? []) {
192
+ html = convertTextRule(html, rule);
193
+ }
194
+ for (const rule of theme.urlRules ?? []) {
195
+ html = convertUrlRule(html, rule);
196
+ }
197
+ for (const rule of theme.placeholderRules ?? []) {
198
+ html = convertPlaceholderRule(html, rule);
199
+ }
200
+ for (const prefix of theme.scaffoldingPrefixes ?? []) {
201
+ html = stripScaffoldingPrefix(html, prefix);
202
+ }
203
+ if (theme.legacyScaffoldingTokens?.length) {
204
+ html = stripLegacyTokens(html, theme.legacyScaffoldingTokens);
131
205
  }
132
- html = stripScaffolding(html, theme.scaffoldingPrefix);
133
206
  }
134
207
  html = html.replace(/\n{3,}/g, "\n\n").trim();
135
208
  return {
@@ -140,6 +213,14 @@ function flattenWordPressBuilders(content, options = {}) {
140
213
 
141
214
  // src/parsers/wordpress/parse-wxr.ts
142
215
  var PLATFORM = "wordpress";
216
+ var WOOCOMMERCE_STUB_PAGE_SLUGS = /* @__PURE__ */ new Set(["cart", "checkout", "my-account"]);
217
+ var WOOCOMMERCE_STUB_SHORTCODE = /^\[woocommerce_(?:cart|checkout|my_account)\]\s*$/i;
218
+ function isWooCommerceStubPage(slug, contentHtml) {
219
+ if (WOOCOMMERCE_STUB_PAGE_SLUGS.has(slug)) return true;
220
+ const trimmed = contentHtml.trim();
221
+ if (!trimmed) return false;
222
+ return WOOCOMMERCE_STUB_SHORTCODE.test(trimmed);
223
+ }
143
224
  function asArray(value) {
144
225
  if (value === void 0) return [];
145
226
  return Array.isArray(value) ? value : [value];
@@ -316,6 +397,11 @@ function resolveFeaturedAssetSourceId(thumbnailId, attachmentIndex, contentHtml)
316
397
  const firstInline = discoverContentAssetUrls(contentHtml)[0];
317
398
  return firstInline ? `url:${firstInline}` : void 0;
318
399
  }
400
+ function maybeRewriteUrl(url, config) {
401
+ if (!url) return void 0;
402
+ if (!config) return url;
403
+ return rewriteOriginUrlsInText(url, config);
404
+ }
319
405
  async function* enumerateWxrEntities(options) {
320
406
  const xml = await readFile(options.filePath, "utf8");
321
407
  const items = parseItems(xml);
@@ -346,9 +432,12 @@ async function* enumerateWxrEntities(options) {
346
432
  const postType = textValue(item.post_type);
347
433
  if (postType !== "post" && postType !== "page") continue;
348
434
  const id = textValue(item.post_id);
349
- const link = textValue(item.link);
435
+ const link = maybeRewriteUrl(textValue(item.link), options.originUrlRewrite);
350
436
  const slug = sanitizeSlug(textValue(item.post_name) || textValue(item.title) || id);
351
437
  const contentHtml = preprocessContent(getContentEncoded(item), options);
438
+ if (postType === "page" && options.skipWooCommerceStubPages !== false && isWooCommerceStubPage(slug, contentHtml)) {
439
+ continue;
440
+ }
352
441
  for (const asset of collectInlineAssets(
353
442
  contentHtml,
354
443
  attachmentIndex,
@@ -446,10 +535,13 @@ function resolveWxrOptions(input) {
446
535
  return {
447
536
  filePath: String(obj.path),
448
537
  originUrlRewrite: obj.originUrlRewrite,
449
- flattenBuilders: obj.flattenBuilders
538
+ flattenBuilders: obj.flattenBuilders,
539
+ skipWooCommerceStubPages: obj.skipWooCommerceStubPages
450
540
  };
451
541
  }
452
- throw new Error("WordPress adapter requires input path (string or { path, originUrlRewrite?, flattenBuilders? })");
542
+ throw new Error(
543
+ "WordPress adapter requires input path (string or { path, originUrlRewrite?, flattenBuilders?, skipWooCommerceStubPages? })"
544
+ );
453
545
  }
454
546
  var wordpressAdapter = {
455
547
  platform: "wordpress",
@@ -2441,4 +2533,4 @@ export {
2441
2533
  wixAdapter,
2442
2534
  getAdapter
2443
2535
  };
2444
- //# sourceMappingURL=chunk-QEXTXHFG.js.map
2536
+ //# sourceMappingURL=chunk-Q6M5JEL3.js.map