@artinstack/migrator 0.1.6 → 0.1.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,44 +1,28 @@
1
1
  import {
2
2
  SquarespaceCollectionClient,
3
3
  WORDPRESS_BUILDER_REGISTRY,
4
+ WORDPRESS_WIDGET_REGISTRY,
5
+ WP_WIDGET_PLACEHOLDER,
4
6
  enumerateSquarespaceEntities,
5
- linkToPath,
6
- sanitizeSlug,
7
7
  summarizeSquarespaceExport,
8
8
  validateSquarespaceExportFile
9
- } from "./chunk-IPYHS5R2.js";
9
+ } from "./chunk-Z3L6N63Y.js";
10
+ import {
11
+ stampMigrationMediaRefs
12
+ } from "./chunk-KYNKJ4XV.js";
13
+ import {
14
+ linkToPath,
15
+ sanitizeSlug
16
+ } from "./chunk-XRCF73DA.js";
10
17
  import {
18
+ buildContentMediaUrlIndex,
19
+ canonicalizeInlineAssetUrl,
11
20
  discoverContentAssetUrls,
12
21
  normalizeAssetUrl,
13
- resolveFeaturedContentAssetUrl
14
- } from "./chunk-XYP3VYDH.js";
15
-
16
- // src/lib/origin-url-rewrite.ts
17
- function rewriteOriginUrlsInText(text, config) {
18
- if (!text || config.rules.length === 0) return text;
19
- let result = text;
20
- for (const rule of config.rules) {
21
- if (typeof rule.match === "string") {
22
- if (!rule.match) continue;
23
- result = result.split(rule.match).join(rule.replace);
24
- continue;
25
- }
26
- result = result.replace(rule.match, rule.replace);
27
- }
28
- return result;
29
- }
30
- function createWpContentGatewayRewrite(gatewayBase, publicOrigin) {
31
- const normalizedGateway = gatewayBase.replace(/\/$/, "");
32
- const normalizedPublic = publicOrigin.replace(/\/$/, "");
33
- return {
34
- rules: [
35
- {
36
- match: `${normalizedGateway}/wp-content/`,
37
- replace: `${normalizedPublic}/wp-content/`
38
- }
39
- ]
40
- };
41
- }
22
+ parseMigrationMediaRef,
23
+ resolveFeaturedContentAssetUrl,
24
+ rewriteOriginUrlsInText
25
+ } from "./chunk-WHGUE5FC.js";
42
26
 
43
27
  // src/parsers/wordpress/parse-wxr.ts
44
28
  import { readFile } from "fs/promises";
@@ -312,16 +296,195 @@ function stripLegacyTokens(content, tokens) {
312
296
  function detectThemes(content, registry) {
313
297
  return registry.filter((theme) => theme.detect.test(content));
314
298
  }
299
+ function extractBareOrQuotedParam(params, name) {
300
+ const quoted = extractQuotedParam(params, name);
301
+ if (quoted) return quoted;
302
+ const pattern = new RegExp(`\\b${escapeRegExp(name)}\\s*=\\s*([^\\s"'\\]]+)`, "i");
303
+ const match = pattern.exec(params);
304
+ return match?.[1]?.trim() || void 0;
305
+ }
306
+ function emitWidgetStub(widget, attrs, tag = "div") {
307
+ const parts = [`data-wp-widget="${escapeLayoutAttr(widget)}"`];
308
+ for (const [key, value] of Object.entries(attrs)) {
309
+ if (value) parts.push(`${key}="${escapeLayoutAttr(value)}"`);
310
+ }
311
+ return `<${tag} ${parts.join(" ")}>${WP_WIDGET_PLACEHOLDER}</${tag}>`;
312
+ }
313
+ function normalizeVideoEmbedUrl(raw) {
314
+ const trimmed = raw.trim();
315
+ if (!trimmed || trimmed.startsWith("data:")) return void 0;
316
+ try {
317
+ const url = new URL(trimmed.startsWith("//") ? `https:${trimmed}` : trimmed);
318
+ const host = url.hostname.replace(/^www\./, "").replace(/^m\./, "");
319
+ if (host === "youtu.be") {
320
+ const id = url.pathname.split("/").filter(Boolean)[0];
321
+ if (id) {
322
+ return { provider: "youtube", embedUrl: `https://www.youtube-nocookie.com/embed/${id}` };
323
+ }
324
+ }
325
+ if (host === "youtube.com" || host === "youtube-nocookie.com") {
326
+ const embedMatch = url.pathname.match(/\/embed\/([^/?#]+)/);
327
+ if (embedMatch?.[1]) {
328
+ const start = url.searchParams.get("start");
329
+ const suffix = start ? `?start=${start}` : "";
330
+ return {
331
+ provider: "youtube",
332
+ embedUrl: `https://www.youtube-nocookie.com/embed/${embedMatch[1]}${suffix}`
333
+ };
334
+ }
335
+ const videoId = url.searchParams.get("v");
336
+ if (videoId) {
337
+ const t = url.searchParams.get("t") ?? url.searchParams.get("start");
338
+ const startSeconds = t?.endsWith("s") ? t.slice(0, -1) : t;
339
+ const suffix = startSeconds ? `?start=${startSeconds}` : "";
340
+ return {
341
+ provider: "youtube",
342
+ embedUrl: `https://www.youtube-nocookie.com/embed/${videoId}${suffix}`
343
+ };
344
+ }
345
+ }
346
+ if (host === "vimeo.com") {
347
+ const segments = url.pathname.split("/").filter(Boolean);
348
+ const id = segments[segments.length - 1];
349
+ if (id && /^\d+$/.test(id)) {
350
+ return { provider: "vimeo", embedUrl: `https://player.vimeo.com/video/${id}` };
351
+ }
352
+ }
353
+ if (host === "player.vimeo.com") {
354
+ const match = url.pathname.match(/\/video\/(\d+)/);
355
+ if (match?.[1]) {
356
+ return { provider: "vimeo", embedUrl: `https://player.vimeo.com/video/${match[1]}` };
357
+ }
358
+ }
359
+ } catch {
360
+ return void 0;
361
+ }
362
+ return void 0;
363
+ }
364
+ function emitVideoWidgetFromParams(params, inner) {
365
+ const url = extractShortcodeParam(params, ["url", "src", "video", "link", "youtube_url", "vimeo_url"]) ?? inner.trim().match(/^https?:\/\/\S+/)?.[0];
366
+ if (!url) {
367
+ return emitWidgetStub("video", { "data-video-provider": "external" });
368
+ }
369
+ const normalized = normalizeVideoEmbedUrl(url);
370
+ if (normalized) {
371
+ return emitWidgetStub("video", {
372
+ "data-video-provider": normalized.provider,
373
+ "data-embed-url": normalized.embedUrl
374
+ });
375
+ }
376
+ if (/\.(mp4|webm|ogg)(\?|#|$)/i.test(url)) {
377
+ return emitHtmlTag("video", url);
378
+ }
379
+ return emitWidgetStub("video", {
380
+ "data-video-provider": "external",
381
+ "data-embed-url": url
382
+ });
383
+ }
384
+ function flattenMapShortcodes(content, widgetRegistry) {
385
+ let html = content;
386
+ for (const prefix of widgetRegistry.mapShortcodePrefixes) {
387
+ const pattern = new RegExp(
388
+ `\\[${escapeRegExp(prefix)}\\b([^\\]]*)\\]\\s*(?:\\[\\/${escapeRegExp(prefix)}\\b[^\\]]*\\])?`,
389
+ "gi"
390
+ );
391
+ html = html.replace(pattern, (_, params) => {
392
+ const embedUrl = extractShortcodeParam(params, ["embed_url", "url", "src", "map_url"]);
393
+ const query = extractBareOrQuotedParam(params, "address") ?? extractBareOrQuotedParam(params, "q");
394
+ return emitWidgetStub("map", {
395
+ ...embedUrl?.includes("google.com/maps") ? { "data-embed-url": embedUrl } : {},
396
+ ...query && !embedUrl ? { "data-wp-map-query": query } : {}
397
+ });
398
+ });
399
+ }
400
+ return html;
401
+ }
402
+ function flattenContactFormShortcodes(content, widgetRegistry) {
403
+ let html = content;
404
+ for (const rule of widgetRegistry.contactFormRules) {
405
+ const pattern = new RegExp(
406
+ `\\[${escapeRegExp(rule.tag)}\\b([^\\]]*)\\]\\s*(?:\\[\\/${escapeRegExp(rule.tag)}\\b[^\\]]*\\])?`,
407
+ "gi"
408
+ );
409
+ html = html.replace(pattern, (_, params) => {
410
+ const id = extractBareOrQuotedParam(params, rule.idParam);
411
+ return emitWidgetStub(
412
+ "contact-form",
413
+ {
414
+ "data-wp-form-source": rule.source,
415
+ ...id ? { "data-wp-form-id": id } : {}
416
+ },
417
+ "section"
418
+ );
419
+ });
420
+ }
421
+ return html;
422
+ }
423
+ function flattenGalleryShortcodes(content, widgetRegistry) {
424
+ const tag = escapeRegExp(widgetRegistry.galleryShortcode);
425
+ const pattern = new RegExp(`\\[${tag}\\b([^\\]]*)\\](?:\\s*\\[\\/${tag}\\])?`, "gi");
426
+ return content.replace(pattern, (_, params) => {
427
+ const ids = extractBareOrQuotedParam(params, "ids");
428
+ const idList = ids?.split(",").map((part) => part.trim()).filter((part) => /^\d+$/.test(part));
429
+ if (idList?.length) {
430
+ const images = idList.map((id) => `<img data-wp-attachment-id="${escapeLayoutAttr(id)}" alt="" />`).join("");
431
+ return `<figure data-wp-inline-gallery>${images}</figure>`;
432
+ }
433
+ const category = extractBareOrQuotedParam(params, "category") ?? extractBareOrQuotedParam(params, "type");
434
+ return emitWidgetStub("portfolio", {
435
+ "data-wp-gallery-dynamic": "1",
436
+ ...category ? { "data-wp-portfolio-category": category } : {}
437
+ });
438
+ });
439
+ }
440
+ function flattenPortfolioShortcodes(content, widgetRegistry) {
441
+ const tag = escapeRegExp(widgetRegistry.portfolioShortcode);
442
+ const pattern = new RegExp(`\\[${tag}\\b([^\\]]*)\\](?:\\s*\\[\\/${tag}\\])?`, "gi");
443
+ return content.replace(pattern, (_, params) => {
444
+ const category = extractBareOrQuotedParam(params, "category");
445
+ const slug = extractBareOrQuotedParam(params, "slug");
446
+ return emitWidgetStub("portfolio", {
447
+ ...category ? { "data-wp-portfolio-category": category } : {},
448
+ ...slug ? { "data-wp-portfolio-slug": slug } : {}
449
+ });
450
+ });
451
+ }
452
+ function flattenVideoShortcodes(content, widgetRegistry) {
453
+ let html = content;
454
+ for (const prefix of widgetRegistry.videoShortcodePrefixes) {
455
+ const wrapped = new RegExp(
456
+ `\\[${escapeRegExp(prefix)}\\b([^\\]]*)\\]([\\s\\S]*?)\\[\\/${escapeRegExp(prefix)}\\b[^\\]]*\\]`,
457
+ "gi"
458
+ );
459
+ html = html.replace(
460
+ wrapped,
461
+ (_, params, inner) => emitVideoWidgetFromParams(params, inner)
462
+ );
463
+ const selfClosing = new RegExp(
464
+ `\\[${escapeRegExp(prefix)}\\b([^\\]]*)\\]`,
465
+ "gi"
466
+ );
467
+ html = html.replace(selfClosing, (_, params) => emitVideoWidgetFromParams(params, ""));
468
+ }
469
+ return html;
470
+ }
471
+ function flattenWordPressWidgets(content, widgetRegistry = WORDPRESS_WIDGET_REGISTRY) {
472
+ let html = content;
473
+ html = flattenGalleryShortcodes(html, widgetRegistry);
474
+ html = flattenPortfolioShortcodes(html, widgetRegistry);
475
+ html = flattenMapShortcodes(html, widgetRegistry);
476
+ html = flattenContactFormShortcodes(html, widgetRegistry);
477
+ html = flattenVideoShortcodes(html, widgetRegistry);
478
+ return html;
479
+ }
315
480
  function flattenWordPressBuilders(content, options = {}) {
316
481
  if (!content.trim()) {
317
482
  return { html: content, detectedThemes: [] };
318
483
  }
319
484
  const registry = options.registry ?? WORDPRESS_BUILDER_REGISTRY;
320
485
  const themes = detectThemes(content, registry);
321
- if (themes.length === 0) {
322
- return { html: content, detectedThemes: [] };
323
- }
324
- let html = content;
486
+ const widgetRegistry = options.widgetRegistry ?? WORDPRESS_WIDGET_REGISTRY;
487
+ let html = flattenWordPressWidgets(content, widgetRegistry);
325
488
  for (const theme of themes) {
326
489
  for (const rule of theme.wrapperRules ?? []) {
327
490
  html = convertWrapperRule(html, rule);
@@ -435,11 +598,11 @@ function buildAttachmentIndex(items, originUrlRewrite) {
435
598
  for (const item of items) {
436
599
  if (textValue(item.post_type) !== "attachment") continue;
437
600
  const id = textValue(item.post_id);
438
- let url = textValue(item.attachment_url) || textValue(item.link);
439
- if (!id || !url) continue;
440
- if (originUrlRewrite) {
441
- url = rewriteOriginUrlsInText(url, originUrlRewrite);
442
- }
601
+ const rawUrl = textValue(item.attachment_url) || textValue(item.link);
602
+ if (!id || !rawUrl) continue;
603
+ const canonical = canonicalizeInlineAssetUrl(rawUrl, originUrlRewrite);
604
+ if (!canonical) continue;
605
+ const url = canonical.canonicalUrl;
443
606
  const filename = basename(new URL(url, "http://local.invalid").pathname) || `attachment-${id}`;
444
607
  index.set(id, {
445
608
  sourceUrl: url,
@@ -498,22 +661,24 @@ function collectTaxonomies(items) {
498
661
  }
499
662
  return { categories, tags };
500
663
  }
501
- function collectInlineAssets(html, attachmentIndex, seenUrls, exportedAt) {
664
+ function collectInlineAssets(html, attachmentIndex, seenUrls, exportedAt, originUrlRewrite) {
502
665
  const assets = [];
503
- for (const src of discoverContentAssetUrls(html)) {
504
- if (seenUrls.has(src)) continue;
505
- seenUrls.add(src);
666
+ for (const discovered of discoverContentAssetUrls(html)) {
667
+ const canonical = canonicalizeInlineAssetUrl(discovered, originUrlRewrite);
668
+ if (!canonical) continue;
669
+ if (seenUrls.has(canonical.canonicalUrl)) continue;
670
+ seenUrls.add(canonical.canonicalUrl);
506
671
  let filename;
507
672
  try {
508
- filename = basename(new URL(src, "http://local.invalid").pathname) || "inline-asset";
673
+ filename = basename(new URL(canonical.canonicalUrl, "http://local.invalid").pathname) || "inline-asset";
509
674
  } catch {
510
675
  filename = "inline-asset";
511
676
  }
512
677
  assets.push({
513
678
  type: "asset",
514
- source: sourceMeta(`url:${src}`, src, exportedAt),
515
- sourceId: `url:${src}`,
516
- sourceUrl: src,
679
+ source: sourceMeta(canonical.sourceId, canonical.canonicalUrl, exportedAt),
680
+ sourceId: canonical.sourceId,
681
+ sourceUrl: canonical.canonicalUrl,
517
682
  filename,
518
683
  mimeType: guessMime(filename)
519
684
  });
@@ -534,12 +699,15 @@ function preprocessContent(rawHtml, options) {
534
699
  }
535
700
  return html;
536
701
  }
537
- function resolveFeaturedAssetSourceId(thumbnailId, attachmentIndex, contentHtml) {
702
+ function resolveFeaturedAssetSourceId(thumbnailId, attachmentIndex, contentHtml, originUrlRewrite) {
538
703
  if (thumbnailId && attachmentIndex.has(thumbnailId)) {
539
704
  return thumbnailId;
540
705
  }
541
706
  const featuredUrl = resolveFeaturedContentAssetUrl(contentHtml);
542
- return featuredUrl ? `url:${featuredUrl}` : void 0;
707
+ if (!featuredUrl) return void 0;
708
+ const fromRef = parseMigrationMediaRef(featuredUrl);
709
+ if (fromRef) return fromRef;
710
+ return canonicalizeInlineAssetUrl(featuredUrl, originUrlRewrite)?.sourceId;
543
711
  }
544
712
  function maybeRewriteUrl(url, config) {
545
713
  if (!url) return void 0;
@@ -578,18 +746,39 @@ async function* enumerateWxrEntities(options) {
578
746
  const id = textValue(item.post_id);
579
747
  const link = maybeRewriteUrl(textValue(item.link), options.originUrlRewrite);
580
748
  const slug = sanitizeSlug(textValue(item.post_name) || textValue(item.title) || id);
581
- const contentHtml = preprocessContent(getContentEncoded(item), options);
749
+ let contentHtml = preprocessContent(getContentEncoded(item), options);
582
750
  if (postType === "page" && options.skipWooCommerceStubPages !== false && isWooCommerceStubPage(slug, contentHtml)) {
583
751
  continue;
584
752
  }
585
- for (const asset of collectInlineAssets(
753
+ const inlineAssets = collectInlineAssets(
586
754
  contentHtml,
587
755
  attachmentIndex,
588
756
  seenAssetUrls,
589
- options.exportedAt
590
- )) {
757
+ options.exportedAt,
758
+ options.originUrlRewrite
759
+ );
760
+ for (const asset of inlineAssets) {
591
761
  yield asset;
592
762
  }
763
+ if (options.stampMigrationMediaRefs !== false) {
764
+ const urlIndex = buildContentMediaUrlIndex(
765
+ [
766
+ ...[...attachmentIndex.entries()].map(([sourceId, entry]) => ({
767
+ sourceId,
768
+ sourceUrl: entry.sourceUrl
769
+ })),
770
+ ...inlineAssets.map((asset) => ({
771
+ sourceId: asset.sourceId,
772
+ sourceUrl: asset.sourceUrl
773
+ }))
774
+ ],
775
+ options.originUrlRewrite
776
+ );
777
+ contentHtml = stampMigrationMediaRefs(contentHtml, {
778
+ urlToSourceId: urlIndex,
779
+ originUrlRewrite: options.originUrlRewrite
780
+ }).html;
781
+ }
593
782
  const categorySlugs = [];
594
783
  const tagSlugs = [];
595
784
  for (const cat of asArray(item.category)) {
@@ -604,7 +793,8 @@ async function* enumerateWxrEntities(options) {
604
793
  const featuredAssetSourceId = resolveFeaturedAssetSourceId(
605
794
  thumbnailId,
606
795
  attachmentIndex,
607
- contentHtml
796
+ contentHtml,
797
+ options.originUrlRewrite
608
798
  );
609
799
  const post = {
610
800
  type: "post",
@@ -2660,8 +2850,6 @@ function getAdapter(platform) {
2660
2850
  }
2661
2851
 
2662
2852
  export {
2663
- rewriteOriginUrlsInText,
2664
- createWpContentGatewayRewrite,
2665
2853
  wordpressAdapter,
2666
2854
  SMUGMUG_API_BASE,
2667
2855
  SMUGMUG_OAUTH_ENDPOINTS,
@@ -2677,4 +2865,4 @@ export {
2677
2865
  wixAdapter,
2678
2866
  getAdapter
2679
2867
  };
2680
- //# sourceMappingURL=chunk-XQVKA54A.js.map
2868
+ //# sourceMappingURL=chunk-CB5KRANW.js.map