@artinstack/migrator 0.1.8 → 0.1.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (33) hide show
  1. package/dist/{bundle-uAAHehbv.d.ts → bundle-Do-9ikQv.d.ts} +1 -1
  2. package/dist/{chunk-Z3L6N63Y.js → chunk-3A2PA4P3.js} +80 -9
  3. package/dist/chunk-3A2PA4P3.js.map +1 -0
  4. package/dist/{chunk-KYNKJ4XV.js → chunk-BONZ3U3I.js} +2 -2
  5. package/dist/{chunk-HI7JHWZU.js → chunk-LC7CGWDN.js} +1 -1
  6. package/dist/chunk-LC7CGWDN.js.map +1 -0
  7. package/dist/{chunk-WHGUE5FC.js → chunk-S4GMDRGX.js} +39 -4
  8. package/dist/chunk-S4GMDRGX.js.map +1 -0
  9. package/dist/{chunk-ALLFBWBO.js → chunk-S4SUJT2D.js} +2 -2
  10. package/dist/{chunk-CB5KRANW.js → chunk-YLVPZ4M3.js} +155 -30
  11. package/dist/chunk-YLVPZ4M3.js.map +1 -0
  12. package/dist/cli/index.js +12 -7
  13. package/dist/cli/index.js.map +1 -1
  14. package/dist/index.d.ts +6 -6
  15. package/dist/index.js +12 -6
  16. package/dist/lib/index.d.ts +1 -1
  17. package/dist/lib/index.js +3 -1
  18. package/dist/{media-urls-w46-CWUp.d.ts → media-urls-u49RCyPn.d.ts} +15 -1
  19. package/dist/normalizer/index.d.ts +4 -4
  20. package/dist/normalizer/index.js +1 -1
  21. package/dist/{rewrite-inline-images-DyxKUNs3.d.ts → rewrite-inline-images-BsgSquzV.d.ts} +1 -1
  22. package/dist/sinks/index.d.ts +31 -6
  23. package/dist/sinks/index.js +8 -4
  24. package/dist/transformers/index.d.ts +3 -3
  25. package/dist/transformers/index.js +3 -3
  26. package/dist/{types-DWOP8Dcy.d.ts → types-TCHy3Oko.d.ts} +17 -1
  27. package/package.json +1 -1
  28. package/dist/chunk-CB5KRANW.js.map +0 -1
  29. package/dist/chunk-HI7JHWZU.js.map +0 -1
  30. package/dist/chunk-WHGUE5FC.js.map +0 -1
  31. package/dist/chunk-Z3L6N63Y.js.map +0 -1
  32. /package/dist/{chunk-KYNKJ4XV.js.map → chunk-BONZ3U3I.js.map} +0 -0
  33. /package/dist/{chunk-ALLFBWBO.js.map → chunk-S4SUJT2D.js.map} +0 -0
@@ -0,0 +1 @@
1
+ {"version":3,"sources":["../src/lib/media-urls.ts"],"sourcesContent":["import * as cheerio from \"cheerio\";\n\n// --- Origin URL rewrite (gateway → public origin before parse/discovery) ---\n\nexport interface OriginUrlRewriteRule {\n /** Literal substring or regex matched against the full text block. */\n match: string | RegExp;\n replace: string;\n}\n\nexport interface OriginUrlRewriteConfig {\n rules: OriginUrlRewriteRule[];\n}\n\n/** Swap legacy gateway/staging host fragments before parse, fetch, or asset discovery. */\nexport function rewriteOriginUrlsInText(text: string, config: OriginUrlRewriteConfig): string {\n if (!text || config.rules.length === 0) return text;\n\n let result = text;\n for (const rule of config.rules) {\n if (typeof rule.match === \"string\") {\n if (!rule.match) continue;\n result = result.split(rule.match).join(rule.replace);\n continue;\n }\n result = result.replace(rule.match, rule.replace);\n }\n return result;\n}\n\n/** Build a rule that rewrites API-gateway `/prod/wp-content/` paths to a public origin. */\nexport function createWpContentGatewayRewrite(gatewayBase: string, publicOrigin: string): OriginUrlRewriteConfig {\n const normalizedGateway = gatewayBase.replace(/\\/$/, \"\");\n const normalizedPublic = publicOrigin.replace(/\\/$/, \"\");\n return {\n rules: [\n {\n match: `${normalizedGateway}/wp-content/`,\n replace: `${normalizedPublic}/wp-content/`,\n },\n ],\n };\n}\n\n// --- Content asset URL discovery & normalization ---\n\nconst IMAGE_EXTENSIONS = \"jpe?g|png|gif|webp|avif|svg\";\n/** Image file extension in a path or URL (allows trailing `?query` / `#hash`). */\nconst IMAGE_EXTENSION_PATTERN = new RegExp(String.raw`\\.(?:${IMAGE_EXTENSIONS})\\b`, \"i\");\n\n/** Captured value must contain an image extension — skips `url=\"…/about\"`, `<iframe src=\"…youtube…\">`, etc. */\nconst QUOTED_IMAGE_PATH = String.raw`[^\"']+\\.(?:${IMAGE_EXTENSIONS})(?:\\?[^\"'#]*)?(?:#.*)?`;\n\nconst SHORTCODE_IMAGE_PARAM_PATTERN = new RegExp(\n String.raw`\\b(?:image|bg_image|background_image|url)\\s*=\\s*[\"'](${QUOTED_IMAGE_PATH})[\"']`,\n \"gi\",\n);\n\n/** Bare `src=\"…jpg\"` outside `<img>` (shortcode fragments); `<img src>` handled by cheerio. */\nconst BARE_SRC_PARAM_PATTERN = new RegExp(\n String.raw`\\bsrc\\s*=\\s*[\"'](${QUOTED_IMAGE_PATH})[\"']`,\n \"gi\",\n);\n\nconst DATA_BG_IMAGE_PATTERN = /\\bdata-bg-image\\s*=\\s*[\"']([^\"']+)[\"']/gi;\n\n/** Inline CSS `background` / `background-image: url(…)` (quoted or bare). */\nconst BACKGROUND_IMAGE_URL_PATTERN =\n /background(?:-image)?\\s*:[^;]*?url\\s*\\(\\s*(['\"]?)([^'\")]+)\\1\\s*\\)/gi;\n\nconst HERO_URL_PARAM_PATTERN = new RegExp(\n String.raw`\\b(?:bg_image|background_image)\\s*=\\s*[\"'](${QUOTED_IMAGE_PATH})[\"']`,\n \"gi\",\n);\n\nconst INLINE_IMAGE_PARAM_PATTERN = new RegExp(\n String.raw`\\bimage\\s*=\\s*[\"'](${QUOTED_IMAGE_PATH})[\"']`,\n \"gi\",\n);\n\nconst IMG_TAG_SRC_PATTERN = /<img\\b[^>]*\\bsrc\\s*=\\s*[\"']([^\"']+)[\"']/gi;\n\nconst DATA_WP_ATTACHMENT_ID_PATTERN = /\\bdata-wp-attachment-id\\s*=\\s*[\"'](\\d+)[\"']/gi;\n\n/** Builder / core gallery shortcodes with explicit `ids=` lists (pre- or post-flatten). */\nconst SHORTCODE_GALLERY_IDS_PATTERN =\n /\\[(?:gallery|oshine_gallery|vc_gallery|nggallery)\\b[^\\]]*\\bids\\s*=\\s*[\"']([^\"']+)[\"']/gi;\n\nexport interface ContentAssetDiscovery {\n /** Network-resolvable image paths (`<img>`, backgrounds, shortcode `image=` attrs, …). */\n urls: string[];\n /**\n * WordPress attachment post ids referenced in content without an inline URL in this\n * file context (`data-wp-attachment-id`, `[gallery ids=…]`, `[oshine_gallery ids=…]`, …).\n */\n unresolvedAttachmentIds: string[];\n}\n\ninterface FeaturedAssetCandidate {\n url: string;\n index: number;\n tier: 0 | 1;\n}\n\nfunction ingestLikelyImageUrl(urls: Set<string>, raw: string | undefined): void {\n const normalized = normalizeAssetUrl(raw ?? \"\");\n if (normalized && isLikelyImageUrl(normalized)) {\n urls.add(normalized);\n }\n}\n\nfunction extractImgTagSrcs(content: string): string[] {\n if (!content.trim()) return [];\n const $ = cheerio.load(content, { xml: false });\n const srcs: string[] = [];\n $(\"img[src]\").each((_, el) => {\n const src = $(el).attr(\"src\")?.trim();\n if (src) srcs.push(src);\n });\n return srcs;\n}\n\nfunction hasImageExtension(value: string): boolean {\n const withoutHash = value.split(\"#\", 1)[0] ?? value;\n const withoutQuery = withoutHash.split(\"?\", 1)[0] ?? withoutHash;\n return IMAGE_EXTENSION_PATTERN.test(withoutQuery);\n}\n\nfunction extractDataBgImageUrls(content: string): string[] {\n const urls: string[] = [];\n for (const match of content.matchAll(DATA_BG_IMAGE_PATTERN)) {\n const raw = match[1]?.trim();\n if (raw) urls.push(raw);\n }\n return urls;\n}\n\nfunction extractCssBackgroundImageUrls(content: string): string[] {\n const urls: string[] = [];\n for (const match of content.matchAll(BACKGROUND_IMAGE_URL_PATTERN)) {\n const raw = match[2]?.trim();\n if (raw) urls.push(raw);\n }\n return urls;\n}\n\n/** All `<img src>` values (including those not ingested as vault assets). */\nexport function discoverRawImgSrcs(content: string): string[] {\n return extractImgTagSrcs(content).filter((src) => !src.startsWith(\"data:\"));\n}\n\n/** Normalize protocol-relative and trim; skip data URIs. */\nexport function normalizeAssetUrl(raw: string): string | undefined {\n const trimmed = raw.trim();\n if (!trimmed || trimmed.startsWith(\"data:\")) return undefined;\n if (trimmed.startsWith(\"//\")) return `https:${trimmed}`;\n return trimmed;\n}\n\n/** Heuristic: URL likely points at a raster/vector image asset, not a page link. */\nexport function isLikelyImageUrl(url: string): boolean {\n if (!url || url.startsWith(\"data:\")) return false;\n\n if (url.startsWith(\"/\")) {\n return hasImageExtension(url);\n }\n\n if (!/^https?:\\/\\//i.test(url)) return false;\n\n try {\n const { pathname } = new URL(url);\n if (hasImageExtension(pathname)) return true;\n } catch {\n // fall through — malformed absolute URL\n }\n\n return hasImageExtension(url);\n}\n\nfunction pushFeaturedCandidate(\n candidates: FeaturedAssetCandidate[],\n raw: string | undefined,\n index: number,\n tier: 0 | 1,\n): void {\n const normalized = normalizeAssetUrl(raw ?? \"\");\n if (!normalized || !isLikelyImageUrl(normalized)) return;\n candidates.push({ url: normalized, index, tier });\n}\n\nfunction collectFeaturedAssetCandidates(content: string): FeaturedAssetCandidate[] {\n const candidates: FeaturedAssetCandidate[] = [];\n\n for (const match of content.matchAll(DATA_BG_IMAGE_PATTERN)) {\n pushFeaturedCandidate(candidates, match[1], match.index ?? 0, 0);\n }\n for (const match of content.matchAll(BACKGROUND_IMAGE_URL_PATTERN)) {\n pushFeaturedCandidate(candidates, match[2], match.index ?? 0, 0);\n }\n for (const match of content.matchAll(HERO_URL_PARAM_PATTERN)) {\n pushFeaturedCandidate(candidates, match[1], match.index ?? 0, 0);\n }\n for (const match of content.matchAll(IMG_TAG_SRC_PATTERN)) {\n pushFeaturedCandidate(candidates, match[1], match.index ?? 0, 1);\n }\n for (const match of content.matchAll(INLINE_IMAGE_PARAM_PATTERN)) {\n pushFeaturedCandidate(candidates, match[1], match.index ?? 0, 1);\n }\n\n return candidates;\n}\n\n/**\n * Ordered featured-image candidates when `_thumbnail_id` is missing — heroes\n * (`data-bg-image`, CSS backgrounds, `bg_image=`) before inline assets; within\n * each tier, first in document order wins. Filename tokens (`_w`, `_2048`, …)\n * are not interpreted as quality signals.\n */\nexport function discoverFeaturedAssetCandidateUrls(content: string): string[] {\n if (!content.trim()) return [];\n\n const ranked = [...collectFeaturedAssetCandidates(content)].sort((left, right) => {\n if (left.tier !== right.tier) return left.tier - right.tier;\n return left.index - right.index;\n });\n\n const urls: string[] = [];\n const seen = new Set<string>();\n for (const candidate of ranked) {\n if (seen.has(candidate.url)) continue;\n seen.add(candidate.url);\n urls.push(candidate.url);\n }\n return urls;\n}\n\n/** Best featured-image URL from post/page HTML when attachment id is unavailable. */\nexport function resolveFeaturedContentAssetUrl(content: string): string | undefined {\n return discoverFeaturedAssetCandidateUrls(content)[0];\n}\n\nfunction parseAttachmentIdList(raw: string | undefined): string[] {\n if (!raw?.trim()) return [];\n return raw\n .split(\",\")\n .map((part) => part.trim())\n .filter((part) => /^\\d+$/.test(part));\n}\n\nfunction extractAttachmentIdsFromContent(content: string): string[] {\n const ids = new Set<string>();\n\n for (const match of content.matchAll(DATA_WP_ATTACHMENT_ID_PATTERN)) {\n const id = match[1]?.trim();\n if (id) ids.add(id);\n }\n\n for (const match of content.matchAll(SHORTCODE_GALLERY_IDS_PATTERN)) {\n for (const id of parseAttachmentIdList(match[1])) {\n ids.add(id);\n }\n }\n\n return [...ids];\n}\n\n/**\n * Generic content-discovery pass: collect resolvable image URLs and attachment ids\n * that still need an index / REST / crawl resolution step.\n */\nexport function discoverContentAssets(content: string): ContentAssetDiscovery {\n if (!content.trim()) {\n return { urls: [], unresolvedAttachmentIds: [] };\n }\n\n const urls = new Set<string>();\n\n for (const raw of extractImgTagSrcs(content)) {\n if (isMigrationMediaRef(raw)) {\n const sourceId = parseMigrationMediaRef(raw);\n if (sourceId?.startsWith(\"url:\")) {\n ingestLikelyImageUrl(urls, sourceId.slice(\"url:\".length));\n }\n continue;\n }\n ingestLikelyImageUrl(urls, raw);\n }\n\n for (const match of content.matchAll(SHORTCODE_IMAGE_PARAM_PATTERN)) {\n ingestLikelyImageUrl(urls, match[1]);\n }\n\n for (const match of content.matchAll(BARE_SRC_PARAM_PATTERN)) {\n ingestLikelyImageUrl(urls, match[1]);\n }\n\n for (const raw of extractDataBgImageUrls(content)) {\n ingestLikelyImageUrl(urls, raw);\n }\n\n for (const raw of extractCssBackgroundImageUrls(content)) {\n ingestLikelyImageUrl(urls, raw);\n }\n\n return {\n urls: [...urls],\n unresolvedAttachmentIds: extractAttachmentIdsFromContent(content),\n };\n}\n\n/**\n * Generic content-discovery pass: collect image URLs from HTML `<img>` tags,\n * section hero markers (`data-bg-image`), inline CSS backgrounds, and common\n * shortcode/builder attributes (`src=`, `image=`, `bg_image=`, …) without\n * parsing builder-specific structure (Tatsu, Elementor, etc.).\n */\nexport function discoverContentAssetUrls(content: string): string[] {\n return discoverContentAssets(content).urls;\n}\n\n/** @deprecated Use discoverContentAssetUrls — kept for call-site clarity during transition. */\nexport function extractInlineImageSrcs(content: string): string[] {\n return discoverContentAssetUrls(content);\n}\n\n// --- Migration media refs (`artinstack-migration://asset/{sourceId}`) ---\n\n/** Pseudo-URL scheme for portable migration asset pointers (not WordPress shortcodes). */\nexport const MIGRATION_MEDIA_REF_SCHEME = \"artinstack-migration://asset/\";\n\n/** Build `artinstack-migration://asset/{sourceId}` (percent-encodes the normalizer source id). */\nexport function formatMigrationMediaRef(sourceAssetId: string): string {\n return `${MIGRATION_MEDIA_REF_SCHEME}${encodeURIComponent(sourceAssetId)}`;\n}\n\nexport function isMigrationMediaRef(value: string): boolean {\n return value.trim().startsWith(MIGRATION_MEDIA_REF_SCHEME);\n}\n\n/** Parse a migration media ref back to the normalizer `sourceId`, or `undefined` if not a ref. */\nexport function parseMigrationMediaRef(value: string): string | undefined {\n const trimmed = value.trim();\n if (!trimmed.startsWith(MIGRATION_MEDIA_REF_SCHEME)) return undefined;\n const encoded = trimmed.slice(MIGRATION_MEDIA_REF_SCHEME.length);\n if (!encoded) return undefined;\n try {\n return decodeURIComponent(encoded);\n } catch {\n return undefined;\n }\n}\n\n/** Default `replaceWith` for `rewriteInlineImages` / `stampMigrationMediaRefs` (OSS-14). */\nexport function createMigrationMediaRefReplaceWith(): (\n ref: { sourceAssetId?: string },\n) => string {\n return (ref) => {\n if (!ref.sourceAssetId) return \"\";\n return formatMigrationMediaRef(ref.sourceAssetId);\n };\n}\n\n// --- Canonical inline keys & lookup index (OSS-15) ---\n\nexport interface CanonicalInlineAssetUrl {\n /** Canonical absolute URL stored on `NormalizedAsset.sourceUrl`. */\n canonicalUrl: string;\n /** Normalizer id: `url:{canonicalUrl}`. */\n sourceId: string;\n}\n\n/**\n * OSS-15: one canonical key for inline `url:` assets — apply origin rewrite then\n * `normalizeAssetUrl` so discovery, refs, and vault entities share the same id.\n */\nexport function canonicalizeInlineAssetUrl(\n raw: string,\n originUrlRewrite?: OriginUrlRewriteConfig,\n): CanonicalInlineAssetUrl | undefined {\n let value = raw.trim();\n if (!value || value.startsWith(\"data:\")) return undefined;\n\n if (originUrlRewrite) {\n value = rewriteOriginUrlsInText(value, originUrlRewrite);\n }\n\n const canonicalUrl = normalizeAssetUrl(value);\n if (!canonicalUrl) return undefined;\n\n return {\n canonicalUrl,\n sourceId: `url:${canonicalUrl}`,\n };\n}\n\nfunction urlPathname(url: string): string | undefined {\n try {\n return new URL(url, \"http://migration.local\").pathname;\n } catch {\n return undefined;\n }\n}\n\n/**\n * Map normalized upload URLs (and pathnames) → normalizer `sourceId`.\n * Attachment ids are WXR `post_id` strings; inline discoveries use `url:{src}`.\n */\nexport function buildMigrationMediaUrlIndex(\n entries: Iterable<{ sourceUrl: string; sourceId: string }>,\n): Map<string, string> {\n const index = new Map<string, string>();\n\n for (const entry of entries) {\n index.set(entry.sourceUrl, entry.sourceId);\n const normalized = normalizeAssetUrl(entry.sourceUrl);\n if (normalized) index.set(normalized, entry.sourceId);\n const pathname = urlPathname(entry.sourceUrl);\n if (pathname) index.set(pathname, entry.sourceId);\n }\n\n return index;\n}\n\nexport function resolveMigrationMediaSourceId(\n src: string,\n urlIndex: Map<string, string>,\n originUrlRewrite?: OriginUrlRewriteConfig,\n): string | undefined {\n const canonical = canonicalizeInlineAssetUrl(src, originUrlRewrite);\n const normalized = canonical?.canonicalUrl ?? normalizeAssetUrl(src);\n if (!normalized) return undefined;\n\n return (\n urlIndex.get(normalized) ??\n urlIndex.get(src) ??\n (urlPathname(normalized) ? urlIndex.get(urlPathname(normalized)!) : undefined)\n );\n}\n\n/** Merge attachment + inline asset rows into one stamp/lookup index (OSS-15). */\nexport function buildContentMediaUrlIndex(\n entries: Iterable<{ sourceUrl: string; sourceId: string }>,\n originUrlRewrite?: OriginUrlRewriteConfig,\n): Map<string, string> {\n const canonicalEntries: { sourceUrl: string; sourceId: string }[] = [];\n for (const entry of entries) {\n const canonical = canonicalizeInlineAssetUrl(entry.sourceUrl, originUrlRewrite);\n canonicalEntries.push({\n sourceUrl: canonical?.canonicalUrl ?? entry.sourceUrl,\n sourceId: entry.sourceId,\n });\n }\n return buildMigrationMediaUrlIndex(canonicalEntries);\n}\n"],"mappings":";AAAA,YAAY,aAAa;AAelB,SAAS,wBAAwB,MAAc,QAAwC;AAC5F,MAAI,CAAC,QAAQ,OAAO,MAAM,WAAW,EAAG,QAAO;AAE/C,MAAI,SAAS;AACb,aAAW,QAAQ,OAAO,OAAO;AAC/B,QAAI,OAAO,KAAK,UAAU,UAAU;AAClC,UAAI,CAAC,KAAK,MAAO;AACjB,eAAS,OAAO,MAAM,KAAK,KAAK,EAAE,KAAK,KAAK,OAAO;AACnD;AAAA,IACF;AACA,aAAS,OAAO,QAAQ,KAAK,OAAO,KAAK,OAAO;AAAA,EAClD;AACA,SAAO;AACT;AAGO,SAAS,8BAA8B,aAAqB,cAA8C;AAC/G,QAAM,oBAAoB,YAAY,QAAQ,OAAO,EAAE;AACvD,QAAM,mBAAmB,aAAa,QAAQ,OAAO,EAAE;AACvD,SAAO;AAAA,IACL,OAAO;AAAA,MACL;AAAA,QACE,OAAO,GAAG,iBAAiB;AAAA,QAC3B,SAAS,GAAG,gBAAgB;AAAA,MAC9B;AAAA,IACF;AAAA,EACF;AACF;AAIA,IAAM,mBAAmB;AAEzB,IAAM,0BAA0B,IAAI,OAAO,OAAO,WAAW,gBAAgB,OAAO,GAAG;AAGvF,IAAM,oBAAoB,OAAO,iBAAiB,gBAAgB;AAElE,IAAM,gCAAgC,IAAI;AAAA,EACxC,OAAO,2DAA2D,iBAAiB;AAAA,EACnF;AACF;AAGA,IAAM,yBAAyB,IAAI;AAAA,EACjC,OAAO,uBAAuB,iBAAiB;AAAA,EAC/C;AACF;AAEA,IAAM,wBAAwB;AAG9B,IAAM,+BACJ;AAEF,IAAM,yBAAyB,IAAI;AAAA,EACjC,OAAO,iDAAiD,iBAAiB;AAAA,EACzE;AACF;AAEA,IAAM,6BAA6B,IAAI;AAAA,EACrC,OAAO,yBAAyB,iBAAiB;AAAA,EACjD;AACF;AAEA,IAAM,sBAAsB;AAE5B,IAAM,gCAAgC;AAGtC,IAAM,gCACJ;AAkBF,SAAS,qBAAqB,MAAmB,KAA+B;AAC9E,QAAM,aAAa,kBAAkB,OAAO,EAAE;AAC9C,MAAI,cAAc,iBAAiB,UAAU,GAAG;AAC9C,SAAK,IAAI,UAAU;AAAA,EACrB;AACF;AAEA,SAAS,kBAAkB,SAA2B;AACpD,MAAI,CAAC,QAAQ,KAAK,EAAG,QAAO,CAAC;AAC7B,QAAM,IAAY,aAAK,SAAS,EAAE,KAAK,MAAM,CAAC;AAC9C,QAAM,OAAiB,CAAC;AACxB,IAAE,UAAU,EAAE,KAAK,CAAC,GAAG,OAAO;AAC5B,UAAM,MAAM,EAAE,EAAE,EAAE,KAAK,KAAK,GAAG,KAAK;AACpC,QAAI,IAAK,MAAK,KAAK,GAAG;AAAA,EACxB,CAAC;AACD,SAAO;AACT;AAEA,SAAS,kBAAkB,OAAwB;AACjD,QAAM,cAAc,MAAM,MAAM,KAAK,CAAC,EAAE,CAAC,KAAK;AAC9C,QAAM,eAAe,YAAY,MAAM,KAAK,CAAC,EAAE,CAAC,KAAK;AACrD,SAAO,wBAAwB,KAAK,YAAY;AAClD;AAEA,SAAS,uBAAuB,SAA2B;AACzD,QAAM,OAAiB,CAAC;AACxB,aAAW,SAAS,QAAQ,SAAS,qBAAqB,GAAG;AAC3D,UAAM,MAAM,MAAM,CAAC,GAAG,KAAK;AAC3B,QAAI,IAAK,MAAK,KAAK,GAAG;AAAA,EACxB;AACA,SAAO;AACT;AAEA,SAAS,8BAA8B,SAA2B;AAChE,QAAM,OAAiB,CAAC;AACxB,aAAW,SAAS,QAAQ,SAAS,4BAA4B,GAAG;AAClE,UAAM,MAAM,MAAM,CAAC,GAAG,KAAK;AAC3B,QAAI,IAAK,MAAK,KAAK,GAAG;AAAA,EACxB;AACA,SAAO;AACT;AAGO,SAAS,mBAAmB,SAA2B;AAC5D,SAAO,kBAAkB,OAAO,EAAE,OAAO,CAAC,QAAQ,CAAC,IAAI,WAAW,OAAO,CAAC;AAC5E;AAGO,SAAS,kBAAkB,KAAiC;AACjE,QAAM,UAAU,IAAI,KAAK;AACzB,MAAI,CAAC,WAAW,QAAQ,WAAW,OAAO,EAAG,QAAO;AACpD,MAAI,QAAQ,WAAW,IAAI,EAAG,QAAO,SAAS,OAAO;AACrD,SAAO;AACT;AAGO,SAAS,iBAAiB,KAAsB;AACrD,MAAI,CAAC,OAAO,IAAI,WAAW,OAAO,EAAG,QAAO;AAE5C,MAAI,IAAI,WAAW,GAAG,GAAG;AACvB,WAAO,kBAAkB,GAAG;AAAA,EAC9B;AAEA,MAAI,CAAC,gBAAgB,KAAK,GAAG,EAAG,QAAO;AAEvC,MAAI;AACF,UAAM,EAAE,SAAS,IAAI,IAAI,IAAI,GAAG;AAChC,QAAI,kBAAkB,QAAQ,EAAG,QAAO;AAAA,EAC1C,QAAQ;AAAA,EAER;AAEA,SAAO,kBAAkB,GAAG;AAC9B;AAEA,SAAS,sBACP,YACA,KACA,OACA,MACM;AACN,QAAM,aAAa,kBAAkB,OAAO,EAAE;AAC9C,MAAI,CAAC,cAAc,CAAC,iBAAiB,UAAU,EAAG;AAClD,aAAW,KAAK,EAAE,KAAK,YAAY,OAAO,KAAK,CAAC;AAClD;AAEA,SAAS,+BAA+B,SAA2C;AACjF,QAAM,aAAuC,CAAC;AAE9C,aAAW,SAAS,QAAQ,SAAS,qBAAqB,GAAG;AAC3D,0BAAsB,YAAY,MAAM,CAAC,GAAG,MAAM,SAAS,GAAG,CAAC;AAAA,EACjE;AACA,aAAW,SAAS,QAAQ,SAAS,4BAA4B,GAAG;AAClE,0BAAsB,YAAY,MAAM,CAAC,GAAG,MAAM,SAAS,GAAG,CAAC;AAAA,EACjE;AACA,aAAW,SAAS,QAAQ,SAAS,sBAAsB,GAAG;AAC5D,0BAAsB,YAAY,MAAM,CAAC,GAAG,MAAM,SAAS,GAAG,CAAC;AAAA,EACjE;AACA,aAAW,SAAS,QAAQ,SAAS,mBAAmB,GAAG;AACzD,0BAAsB,YAAY,MAAM,CAAC,GAAG,MAAM,SAAS,GAAG,CAAC;AAAA,EACjE;AACA,aAAW,SAAS,QAAQ,SAAS,0BAA0B,GAAG;AAChE,0BAAsB,YAAY,MAAM,CAAC,GAAG,MAAM,SAAS,GAAG,CAAC;AAAA,EACjE;AAEA,SAAO;AACT;AAQO,SAAS,mCAAmC,SAA2B;AAC5E,MAAI,CAAC,QAAQ,KAAK,EAAG,QAAO,CAAC;AAE7B,QAAM,SAAS,CAAC,GAAG,+BAA+B,OAAO,CAAC,EAAE,KAAK,CAAC,MAAM,UAAU;AAChF,QAAI,KAAK,SAAS,MAAM,KAAM,QAAO,KAAK,OAAO,MAAM;AACvD,WAAO,KAAK,QAAQ,MAAM;AAAA,EAC5B,CAAC;AAED,QAAM,OAAiB,CAAC;AACxB,QAAM,OAAO,oBAAI,IAAY;AAC7B,aAAW,aAAa,QAAQ;AAC9B,QAAI,KAAK,IAAI,UAAU,GAAG,EAAG;AAC7B,SAAK,IAAI,UAAU,GAAG;AACtB,SAAK,KAAK,UAAU,GAAG;AAAA,EACzB;AACA,SAAO;AACT;AAGO,SAAS,+BAA+B,SAAqC;AAClF,SAAO,mCAAmC,OAAO,EAAE,CAAC;AACtD;AAEA,SAAS,sBAAsB,KAAmC;AAChE,MAAI,CAAC,KAAK,KAAK,EAAG,QAAO,CAAC;AAC1B,SAAO,IACJ,MAAM,GAAG,EACT,IAAI,CAAC,SAAS,KAAK,KAAK,CAAC,EACzB,OAAO,CAAC,SAAS,QAAQ,KAAK,IAAI,CAAC;AACxC;AAEA,SAAS,gCAAgC,SAA2B;AAClE,QAAM,MAAM,oBAAI,IAAY;AAE5B,aAAW,SAAS,QAAQ,SAAS,6BAA6B,GAAG;AACnE,UAAM,KAAK,MAAM,CAAC,GAAG,KAAK;AAC1B,QAAI,GAAI,KAAI,IAAI,EAAE;AAAA,EACpB;AAEA,aAAW,SAAS,QAAQ,SAAS,6BAA6B,GAAG;AACnE,eAAW,MAAM,sBAAsB,MAAM,CAAC,CAAC,GAAG;AAChD,UAAI,IAAI,EAAE;AAAA,IACZ;AAAA,EACF;AAEA,SAAO,CAAC,GAAG,GAAG;AAChB;AAMO,SAAS,sBAAsB,SAAwC;AAC5E,MAAI,CAAC,QAAQ,KAAK,GAAG;AACnB,WAAO,EAAE,MAAM,CAAC,GAAG,yBAAyB,CAAC,EAAE;AAAA,EACjD;AAEA,QAAM,OAAO,oBAAI,IAAY;AAE7B,aAAW,OAAO,kBAAkB,OAAO,GAAG;AAC5C,QAAI,oBAAoB,GAAG,GAAG;AAC5B,YAAM,WAAW,uBAAuB,GAAG;AAC3C,UAAI,UAAU,WAAW,MAAM,GAAG;AAChC,6BAAqB,MAAM,SAAS,MAAM,OAAO,MAAM,CAAC;AAAA,MAC1D;AACA;AAAA,IACF;AACA,yBAAqB,MAAM,GAAG;AAAA,EAChC;AAEA,aAAW,SAAS,QAAQ,SAAS,6BAA6B,GAAG;AACnE,yBAAqB,MAAM,MAAM,CAAC,CAAC;AAAA,EACrC;AAEA,aAAW,SAAS,QAAQ,SAAS,sBAAsB,GAAG;AAC5D,yBAAqB,MAAM,MAAM,CAAC,CAAC;AAAA,EACrC;AAEA,aAAW,OAAO,uBAAuB,OAAO,GAAG;AACjD,yBAAqB,MAAM,GAAG;AAAA,EAChC;AAEA,aAAW,OAAO,8BAA8B,OAAO,GAAG;AACxD,yBAAqB,MAAM,GAAG;AAAA,EAChC;AAEA,SAAO;AAAA,IACL,MAAM,CAAC,GAAG,IAAI;AAAA,IACd,yBAAyB,gCAAgC,OAAO;AAAA,EAClE;AACF;AAQO,SAAS,yBAAyB,SAA2B;AAClE,SAAO,sBAAsB,OAAO,EAAE;AACxC;AAGO,SAAS,uBAAuB,SAA2B;AAChE,SAAO,yBAAyB,OAAO;AACzC;AAKO,IAAM,6BAA6B;AAGnC,SAAS,wBAAwB,eAA+B;AACrE,SAAO,GAAG,0BAA0B,GAAG,mBAAmB,aAAa,CAAC;AAC1E;AAEO,SAAS,oBAAoB,OAAwB;AAC1D,SAAO,MAAM,KAAK,EAAE,WAAW,0BAA0B;AAC3D;AAGO,SAAS,uBAAuB,OAAmC;AACxE,QAAM,UAAU,MAAM,KAAK;AAC3B,MAAI,CAAC,QAAQ,WAAW,0BAA0B,EAAG,QAAO;AAC5D,QAAM,UAAU,QAAQ,MAAM,2BAA2B,MAAM;AAC/D,MAAI,CAAC,QAAS,QAAO;AACrB,MAAI;AACF,WAAO,mBAAmB,OAAO;AAAA,EACnC,QAAQ;AACN,WAAO;AAAA,EACT;AACF;AAGO,SAAS,qCAEJ;AACV,SAAO,CAAC,QAAQ;AACd,QAAI,CAAC,IAAI,cAAe,QAAO;AAC/B,WAAO,wBAAwB,IAAI,aAAa;AAAA,EAClD;AACF;AAeO,SAAS,2BACd,KACA,kBACqC;AACrC,MAAI,QAAQ,IAAI,KAAK;AACrB,MAAI,CAAC,SAAS,MAAM,WAAW,OAAO,EAAG,QAAO;AAEhD,MAAI,kBAAkB;AACpB,YAAQ,wBAAwB,OAAO,gBAAgB;AAAA,EACzD;AAEA,QAAM,eAAe,kBAAkB,KAAK;AAC5C,MAAI,CAAC,aAAc,QAAO;AAE1B,SAAO;AAAA,IACL;AAAA,IACA,UAAU,OAAO,YAAY;AAAA,EAC/B;AACF;AAEA,SAAS,YAAY,KAAiC;AACpD,MAAI;AACF,WAAO,IAAI,IAAI,KAAK,wBAAwB,EAAE;AAAA,EAChD,QAAQ;AACN,WAAO;AAAA,EACT;AACF;AAMO,SAAS,4BACd,SACqB;AACrB,QAAM,QAAQ,oBAAI,IAAoB;AAEtC,aAAW,SAAS,SAAS;AAC3B,UAAM,IAAI,MAAM,WAAW,MAAM,QAAQ;AACzC,UAAM,aAAa,kBAAkB,MAAM,SAAS;AACpD,QAAI,WAAY,OAAM,IAAI,YAAY,MAAM,QAAQ;AACpD,UAAM,WAAW,YAAY,MAAM,SAAS;AAC5C,QAAI,SAAU,OAAM,IAAI,UAAU,MAAM,QAAQ;AAAA,EAClD;AAEA,SAAO;AACT;AAEO,SAAS,8BACd,KACA,UACA,kBACoB;AACpB,QAAM,YAAY,2BAA2B,KAAK,gBAAgB;AAClE,QAAM,aAAa,WAAW,gBAAgB,kBAAkB,GAAG;AACnE,MAAI,CAAC,WAAY,QAAO;AAExB,SACE,SAAS,IAAI,UAAU,KACvB,SAAS,IAAI,GAAG,MACf,YAAY,UAAU,IAAI,SAAS,IAAI,YAAY,UAAU,CAAE,IAAI;AAExE;AAGO,SAAS,0BACd,SACA,kBACqB;AACrB,QAAM,mBAA8D,CAAC;AACrE,aAAW,SAAS,SAAS;AAC3B,UAAM,YAAY,2BAA2B,MAAM,WAAW,gBAAgB;AAC9E,qBAAiB,KAAK;AAAA,MACpB,WAAW,WAAW,gBAAgB,MAAM;AAAA,MAC5C,UAAU,MAAM;AAAA,IAClB,CAAC;AAAA,EACH;AACA,SAAO,4BAA4B,gBAAgB;AACrD;","names":[]}
@@ -1,7 +1,7 @@
1
1
  import {
2
2
  isMigrationMediaRef,
3
3
  parseMigrationMediaRef
4
- } from "./chunk-WHGUE5FC.js";
4
+ } from "./chunk-S4GMDRGX.js";
5
5
 
6
6
  // src/transformers/html-to-grapes/index.ts
7
7
  import * as cheerio from "cheerio";
@@ -961,4 +961,4 @@ export {
961
961
  validateTiptapDoc,
962
962
  expandMigrationMediaRefs
963
963
  };
964
- //# sourceMappingURL=chunk-ALLFBWBO.js.map
964
+ //# sourceMappingURL=chunk-S4SUJT2D.js.map
@@ -6,10 +6,10 @@ import {
6
6
  enumerateSquarespaceEntities,
7
7
  summarizeSquarespaceExport,
8
8
  validateSquarespaceExportFile
9
- } from "./chunk-Z3L6N63Y.js";
9
+ } from "./chunk-3A2PA4P3.js";
10
10
  import {
11
11
  stampMigrationMediaRefs
12
- } from "./chunk-KYNKJ4XV.js";
12
+ } from "./chunk-BONZ3U3I.js";
13
13
  import {
14
14
  linkToPath,
15
15
  sanitizeSlug
@@ -18,11 +18,12 @@ import {
18
18
  buildContentMediaUrlIndex,
19
19
  canonicalizeInlineAssetUrl,
20
20
  discoverContentAssetUrls,
21
+ discoverContentAssets,
21
22
  normalizeAssetUrl,
22
23
  parseMigrationMediaRef,
23
24
  resolveFeaturedContentAssetUrl,
24
25
  rewriteOriginUrlsInText
25
- } from "./chunk-WHGUE5FC.js";
26
+ } from "./chunk-S4GMDRGX.js";
26
27
 
27
28
  // src/parsers/wordpress/parse-wxr.ts
28
29
  import { readFile } from "fs/promises";
@@ -420,15 +421,33 @@ function flattenContactFormShortcodes(content, widgetRegistry) {
420
421
  }
421
422
  return html;
422
423
  }
424
+ function emitInlineGalleryFromIds(idList) {
425
+ const images = idList.map((id) => `<img data-wp-attachment-id="${escapeLayoutAttr(id)}" alt="" />`).join("");
426
+ return `<figure data-wp-inline-gallery>${images}</figure>`;
427
+ }
428
+ function parseGalleryAttachmentIds(params) {
429
+ const ids = extractBareOrQuotedParam(params, "ids");
430
+ const idList = ids?.split(",").map((part) => part.trim()).filter((part) => /^\d+$/.test(part));
431
+ return idList?.length ? idList : void 0;
432
+ }
433
+ function flattenIdGalleryShortcode(content, tag) {
434
+ const escaped = escapeRegExp(tag);
435
+ const pattern = new RegExp(`\\[${escaped}\\b([^\\]]*)\\](?:\\s*\\[\\/${escaped}\\])?`, "gi");
436
+ return content.replace(pattern, (fullMatch, params) => {
437
+ const idList = parseGalleryAttachmentIds(params);
438
+ if (idList?.length) {
439
+ return emitInlineGalleryFromIds(idList);
440
+ }
441
+ return fullMatch;
442
+ });
443
+ }
423
444
  function flattenGalleryShortcodes(content, widgetRegistry) {
424
445
  const tag = escapeRegExp(widgetRegistry.galleryShortcode);
425
446
  const pattern = new RegExp(`\\[${tag}\\b([^\\]]*)\\](?:\\s*\\[\\/${tag}\\])?`, "gi");
426
447
  return content.replace(pattern, (_, params) => {
427
- const ids = extractBareOrQuotedParam(params, "ids");
428
- const idList = ids?.split(",").map((part) => part.trim()).filter((part) => /^\d+$/.test(part));
448
+ const idList = parseGalleryAttachmentIds(params);
429
449
  if (idList?.length) {
430
- const images = idList.map((id) => `<img data-wp-attachment-id="${escapeLayoutAttr(id)}" alt="" />`).join("");
431
- return `<figure data-wp-inline-gallery>${images}</figure>`;
450
+ return emitInlineGalleryFromIds(idList);
432
451
  }
433
452
  const category = extractBareOrQuotedParam(params, "category") ?? extractBareOrQuotedParam(params, "type");
434
453
  return emitWidgetStub("portfolio", {
@@ -437,6 +456,13 @@ function flattenGalleryShortcodes(content, widgetRegistry) {
437
456
  });
438
457
  });
439
458
  }
459
+ function flattenIdBasedGalleryShortcodes(content, widgetRegistry) {
460
+ let html = content;
461
+ for (const tag of widgetRegistry.idGalleryShortcodes) {
462
+ html = flattenIdGalleryShortcode(html, tag);
463
+ }
464
+ return html;
465
+ }
440
466
  function flattenPortfolioShortcodes(content, widgetRegistry) {
441
467
  const tag = escapeRegExp(widgetRegistry.portfolioShortcode);
442
468
  const pattern = new RegExp(`\\[${tag}\\b([^\\]]*)\\](?:\\s*\\[\\/${tag}\\])?`, "gi");
@@ -471,6 +497,7 @@ function flattenVideoShortcodes(content, widgetRegistry) {
471
497
  function flattenWordPressWidgets(content, widgetRegistry = WORDPRESS_WIDGET_REGISTRY) {
472
498
  let html = content;
473
499
  html = flattenGalleryShortcodes(html, widgetRegistry);
500
+ html = flattenIdBasedGalleryShortcodes(html, widgetRegistry);
474
501
  html = flattenPortfolioShortcodes(html, widgetRegistry);
475
502
  html = flattenMapShortcodes(html, widgetRegistry);
476
503
  html = flattenContactFormShortcodes(html, widgetRegistry);
@@ -520,6 +547,7 @@ function flattenWordPressBuilders(content, options = {}) {
520
547
 
521
548
  // src/parsers/wordpress/parse-wxr.ts
522
549
  var PLATFORM = "wordpress";
550
+ var DEFAULT_WORDPRESS_PORTFOLIO_CPT_SLUGS = ["portfolio"];
523
551
  var WOOCOMMERCE_STUB_PAGE_SLUGS = /* @__PURE__ */ new Set(["cart", "checkout", "my-account"]);
524
552
  var WOOCOMMERCE_STUB_SHORTCODE = /^\[woocommerce_(?:cart|checkout|my_account)\]\s*$/i;
525
553
  function isWooCommerceStubPage(slug, contentHtml) {
@@ -559,15 +587,76 @@ function getContentEncoded(item) {
559
587
  }
560
588
  return textValue(item.encoded);
561
589
  }
562
- function sourceMeta(id, link, exportedAt) {
590
+ function sourceMeta(id, link, exportedAt, postType) {
563
591
  return {
564
592
  platform: PLATFORM,
565
593
  id,
566
594
  url: link || void 0,
567
595
  path: linkToPath(link),
568
- exportedAt
596
+ exportedAt,
597
+ ...postType ? { postType } : {}
598
+ };
599
+ }
600
+ function resolvePortfolioCptSlugs(options) {
601
+ const slugs = options.portfolioCptSlugs ?? DEFAULT_WORDPRESS_PORTFOLIO_CPT_SLUGS;
602
+ return new Set(slugs.map((slug) => slug.toLowerCase()));
603
+ }
604
+ function portfolioCptSourceId(postId) {
605
+ return `portfolio:${postId}`;
606
+ }
607
+ function isPortfolioCptPostType(postType, portfolioCptSlugs) {
608
+ return portfolioCptSlugs.has(postType.toLowerCase());
609
+ }
610
+ function countWxrPortfolioCptItems(items, portfolioCptSlugs = new Set(DEFAULT_WORDPRESS_PORTFOLIO_CPT_SLUGS)) {
611
+ return items.filter((item) => isPortfolioCptPostType(textValue(item.post_type), portfolioCptSlugs)).length;
612
+ }
613
+ function isImportableWxrPostType(postType, portfolioCptSlugs) {
614
+ const normalized = postType.toLowerCase();
615
+ return normalized === "post" || normalized === "page" || normalized === "attachment" || isPortfolioCptPostType(normalized, portfolioCptSlugs);
616
+ }
617
+ function contentForWooStubCheck(item, options) {
618
+ let html = getContentEncoded(item);
619
+ if (options.originUrlRewrite) {
620
+ html = rewriteOriginUrlsInText(html, options.originUrlRewrite);
621
+ }
622
+ if (options.flattenBuilders !== false) {
623
+ html = flattenWordPressBuilders(html).html;
624
+ }
625
+ return html;
626
+ }
627
+ function summarizeWxrImport(items, options) {
628
+ const portfolioCptSlugs = resolvePortfolioCptSlugs(options);
629
+ let importableItemCount = 0;
630
+ let skippedWooCommerceStubPages = 0;
631
+ const skippedPostTypes = {};
632
+ for (const item of items) {
633
+ const postType = textValue(item.post_type) || "unknown";
634
+ const normalizedType = postType.toLowerCase();
635
+ if (isImportableWxrPostType(normalizedType, portfolioCptSlugs)) {
636
+ if (normalizedType === "page" && options.skipWooCommerceStubPages !== false && isWooCommerceStubPage(
637
+ sanitizeSlug(textValue(item.post_name) || textValue(item.title) || textValue(item.post_id)),
638
+ contentForWooStubCheck(item, options)
639
+ )) {
640
+ skippedWooCommerceStubPages++;
641
+ continue;
642
+ }
643
+ importableItemCount++;
644
+ continue;
645
+ }
646
+ skippedPostTypes[normalizedType] = (skippedPostTypes[normalizedType] ?? 0) + 1;
647
+ }
648
+ const skippedUnsupported = Object.values(skippedPostTypes).reduce((sum, count) => sum + count, 0);
649
+ return {
650
+ importableItemCount,
651
+ unsupportedOnly: importableItemCount === 0 && skippedUnsupported > 0,
652
+ skippedPostTypes,
653
+ ...skippedWooCommerceStubPages > 0 ? { skippedWooCommerceStubPages } : {}
569
654
  };
570
655
  }
656
+ async function summarizeWxrImportFromFile(filePath, options = { filePath }) {
657
+ const xml = await readFile(filePath, "utf8");
658
+ return summarizeWxrImport(parseItems(xml), options);
659
+ }
571
660
  function getExcerpt(item) {
572
661
  const excerpt = item.excerpt;
573
662
  if (!excerpt) return "";
@@ -661,9 +750,10 @@ function collectTaxonomies(items) {
661
750
  }
662
751
  return { categories, tags };
663
752
  }
664
- function collectInlineAssets(html, attachmentIndex, seenUrls, exportedAt, originUrlRewrite) {
753
+ function collectInlineAssets(html, attachmentIndex, seenUrls, seenAttachmentIds, exportedAt, originUrlRewrite) {
665
754
  const assets = [];
666
- for (const discovered of discoverContentAssetUrls(html)) {
755
+ const discovery = discoverContentAssets(html);
756
+ for (const discovered of discovery.urls) {
667
757
  const canonical = canonicalizeInlineAssetUrl(discovered, originUrlRewrite);
668
758
  if (!canonical) continue;
669
759
  if (seenUrls.has(canonical.canonicalUrl)) continue;
@@ -683,9 +773,22 @@ function collectInlineAssets(html, attachmentIndex, seenUrls, exportedAt, origin
683
773
  mimeType: guessMime(filename)
684
774
  });
685
775
  }
686
- for (const [id, entry] of attachmentIndex) {
776
+ for (const attachmentId of discovery.unresolvedAttachmentIds) {
777
+ if (seenAttachmentIds.has(attachmentId)) continue;
778
+ seenAttachmentIds.add(attachmentId);
779
+ const entry = attachmentIndex.get(attachmentId);
780
+ if (!entry) continue;
687
781
  if (seenUrls.has(entry.sourceUrl)) continue;
688
- void id;
782
+ seenUrls.add(entry.sourceUrl);
783
+ assets.push({
784
+ type: "asset",
785
+ source: sourceMeta(attachmentId, entry.sourceUrl, exportedAt),
786
+ sourceId: attachmentId,
787
+ sourceUrl: entry.sourceUrl,
788
+ filename: entry.filename,
789
+ mimeType: entry.mimeType ?? guessMime(entry.filename),
790
+ caption: entry.title
791
+ });
689
792
  }
690
793
  return assets;
691
794
  }
@@ -740,20 +843,25 @@ async function* enumerateWxrEntities(options) {
740
843
  caption: entry.title
741
844
  };
742
845
  }
846
+ const portfolioCptSlugs = resolvePortfolioCptSlugs(options);
743
847
  for (const item of items) {
744
848
  const postType = textValue(item.post_type);
745
- if (postType !== "post" && postType !== "page") continue;
849
+ const isPost = postType === "post";
850
+ const isPage = postType === "page";
851
+ const isPortfolioCpt = isPortfolioCptPostType(postType, portfolioCptSlugs);
852
+ if (!isPost && !isPage && !isPortfolioCpt) continue;
746
853
  const id = textValue(item.post_id);
747
854
  const link = maybeRewriteUrl(textValue(item.link), options.originUrlRewrite);
748
855
  const slug = sanitizeSlug(textValue(item.post_name) || textValue(item.title) || id);
749
856
  let contentHtml = preprocessContent(getContentEncoded(item), options);
750
- if (postType === "page" && options.skipWooCommerceStubPages !== false && isWooCommerceStubPage(slug, contentHtml)) {
857
+ if (isPage && options.skipWooCommerceStubPages !== false && isWooCommerceStubPage(slug, contentHtml)) {
751
858
  continue;
752
859
  }
753
860
  const inlineAssets = collectInlineAssets(
754
861
  contentHtml,
755
862
  attachmentIndex,
756
863
  seenAssetUrls,
864
+ emittedAttachmentIds,
757
865
  options.exportedAt,
758
866
  options.originUrlRewrite
759
867
  );
@@ -788,7 +896,7 @@ async function* enumerateWxrEntities(options) {
788
896
  if (domain === "category") categorySlugs.push(nicename);
789
897
  if (domain === "post_tag") tagSlugs.push(nicename);
790
898
  }
791
- if (postType === "post") {
899
+ if (isPost) {
792
900
  const thumbnailId = getPostMeta(item, "_thumbnail_id");
793
901
  const featuredAssetSourceId = resolveFeaturedAssetSourceId(
794
902
  thumbnailId,
@@ -813,11 +921,12 @@ async function* enumerateWxrEntities(options) {
813
921
  };
814
922
  yield post;
815
923
  } else {
816
- const isHomePage = getPostMeta(item, "_wp_show_on_front") === "1" || getPostMeta(item, "page_on_front") === "1";
924
+ const isHomePage = !isPortfolioCpt && (getPostMeta(item, "_wp_show_on_front") === "1" || getPostMeta(item, "page_on_front") === "1");
925
+ const pageSourceId = isPortfolioCpt ? portfolioCptSourceId(id) : id;
817
926
  const page = {
818
927
  type: "page",
819
- source: sourceMeta(id, link, options.exportedAt),
820
- sourceId: id,
928
+ source: sourceMeta(pageSourceId, link, options.exportedAt, isPortfolioCpt ? postType : void 0),
929
+ sourceId: pageSourceId,
821
930
  title: textValue(item.title) || slug,
822
931
  slug,
823
932
  contentHtml,
@@ -828,7 +937,7 @@ async function* enumerateWxrEntities(options) {
828
937
  }
829
938
  }
830
939
  }
831
- async function validateWxrFile(filePath) {
940
+ async function validateWxrFile(filePath, options = { filePath }) {
832
941
  const issues = [];
833
942
  let xml;
834
943
  try {
@@ -837,7 +946,12 @@ async function validateWxrFile(filePath) {
837
946
  return {
838
947
  ok: false,
839
948
  issues: [{ code: "file_not_found", message: `Cannot read file: ${filePath}` }],
840
- summary: {}
949
+ summary: {},
950
+ importSummary: {
951
+ importableItemCount: 0,
952
+ unsupportedOnly: false,
953
+ skippedPostTypes: {}
954
+ }
841
955
  };
842
956
  }
843
957
  const looksLikeWxr = xml.includes("<rss") && (xml.includes("wp:wxr_version") || xml.includes("xmlns:wp=") || xml.includes("WordPress eXtended RSS"));
@@ -845,18 +959,20 @@ async function validateWxrFile(filePath) {
845
959
  issues.push({ code: "invalid_wxr", message: "File does not appear to be WordPress WXR" });
846
960
  }
847
961
  const items = parseItems(xml);
962
+ const importSummary = summarizeWxrImport(items, { ...options, filePath });
848
963
  const summary = {
849
964
  posts: items.filter((i) => textValue(i.post_type) === "post").length,
850
965
  pages: items.filter((i) => textValue(i.post_type) === "page").length,
851
966
  assets: items.filter((i) => textValue(i.post_type) === "attachment").length,
852
- portfolios: 0,
967
+ portfolioCpt: countWxrPortfolioCptItems(items),
853
968
  categories: 0,
854
- tags: 0
969
+ tags: 0,
970
+ importableItemCount: importSummary.importableItemCount
855
971
  };
856
972
  const { categories, tags } = collectTaxonomies(items);
857
973
  summary.categories = categories.size;
858
974
  summary.tags = tags.size;
859
- return { ok: issues.length === 0, issues, summary };
975
+ return { ok: issues.length === 0, issues, summary, importSummary };
860
976
  }
861
977
 
862
978
  // src/parsers/wordpress/index.ts
@@ -870,24 +986,33 @@ function resolveWxrOptions(input) {
870
986
  filePath: String(obj.path),
871
987
  originUrlRewrite: obj.originUrlRewrite,
872
988
  flattenBuilders: obj.flattenBuilders,
873
- skipWooCommerceStubPages: obj.skipWooCommerceStubPages
989
+ skipWooCommerceStubPages: obj.skipWooCommerceStubPages,
990
+ portfolioCptSlugs: obj.portfolioCptSlugs
874
991
  };
875
992
  }
876
993
  throw new Error(
877
- "WordPress adapter requires input path (string or { path, originUrlRewrite?, flattenBuilders?, skipWooCommerceStubPages? })"
994
+ "WordPress adapter requires input path (string or { path, originUrlRewrite?, flattenBuilders?, skipWooCommerceStubPages?, portfolioCptSlugs? })"
878
995
  );
879
996
  }
880
997
  var wordpressAdapter = {
881
998
  platform: "wordpress",
882
999
  async validateInput(input) {
883
- const { filePath } = resolveWxrOptions(input);
884
- const result = await validateWxrFile(filePath);
1000
+ const options = resolveWxrOptions(input);
1001
+ const result = await validateWxrFile(options.filePath, options);
885
1002
  return {
886
1003
  ok: result.ok,
887
1004
  issues: result.issues,
888
- summary: result.summary
1005
+ summary: {
1006
+ ...result.summary,
1007
+ unsupportedOnly: result.importSummary.unsupportedOnly,
1008
+ skippedPostTypes: result.importSummary.skippedPostTypes
1009
+ }
889
1010
  };
890
1011
  },
1012
+ async getImportSummary(input) {
1013
+ const options = resolveWxrOptions(input);
1014
+ return summarizeWxrImportFromFile(options.filePath, options);
1015
+ },
891
1016
  enumerateEntities(ctx) {
892
1017
  return enumerateWxrEntities(resolveWxrOptions(ctx.input));
893
1018
  }
@@ -2865,4 +2990,4 @@ export {
2865
2990
  wixAdapter,
2866
2991
  getAdapter
2867
2992
  };
2868
- //# sourceMappingURL=chunk-CB5KRANW.js.map
2993
+ //# sourceMappingURL=chunk-YLVPZ4M3.js.map