@artinstack/migrator 0.1.8 → 0.1.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{bundle-uAAHehbv.d.ts → bundle-Do-9ikQv.d.ts} +1 -1
- package/dist/{chunk-Z3L6N63Y.js → chunk-3A2PA4P3.js} +80 -9
- package/dist/chunk-3A2PA4P3.js.map +1 -0
- package/dist/{chunk-KYNKJ4XV.js → chunk-BONZ3U3I.js} +2 -2
- package/dist/{chunk-HI7JHWZU.js → chunk-LC7CGWDN.js} +1 -1
- package/dist/chunk-LC7CGWDN.js.map +1 -0
- package/dist/{chunk-WHGUE5FC.js → chunk-S4GMDRGX.js} +39 -4
- package/dist/chunk-S4GMDRGX.js.map +1 -0
- package/dist/{chunk-ALLFBWBO.js → chunk-S4SUJT2D.js} +2 -2
- package/dist/{chunk-CB5KRANW.js → chunk-YLVPZ4M3.js} +155 -30
- package/dist/chunk-YLVPZ4M3.js.map +1 -0
- package/dist/cli/index.js +12 -7
- package/dist/cli/index.js.map +1 -1
- package/dist/index.d.ts +6 -6
- package/dist/index.js +12 -6
- package/dist/lib/index.d.ts +1 -1
- package/dist/lib/index.js +3 -1
- package/dist/{media-urls-w46-CWUp.d.ts → media-urls-u49RCyPn.d.ts} +15 -1
- package/dist/normalizer/index.d.ts +4 -4
- package/dist/normalizer/index.js +1 -1
- package/dist/{rewrite-inline-images-DyxKUNs3.d.ts → rewrite-inline-images-BsgSquzV.d.ts} +1 -1
- package/dist/sinks/index.d.ts +31 -6
- package/dist/sinks/index.js +8 -4
- package/dist/transformers/index.d.ts +3 -3
- package/dist/transformers/index.js +3 -3
- package/dist/{types-DWOP8Dcy.d.ts → types-TCHy3Oko.d.ts} +17 -1
- package/package.json +1 -1
- package/dist/chunk-CB5KRANW.js.map +0 -1
- package/dist/chunk-HI7JHWZU.js.map +0 -1
- package/dist/chunk-WHGUE5FC.js.map +0 -1
- package/dist/chunk-Z3L6N63Y.js.map +0 -1
- /package/dist/{chunk-KYNKJ4XV.js.map → chunk-BONZ3U3I.js.map} +0 -0
- /package/dist/{chunk-ALLFBWBO.js.map → chunk-S4SUJT2D.js.map} +0 -0
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"sources":["../src/lib/media-urls.ts"],"sourcesContent":["import * as cheerio from \"cheerio\";\n\n// --- Origin URL rewrite (gateway → public origin before parse/discovery) ---\n\nexport interface OriginUrlRewriteRule {\n /** Literal substring or regex matched against the full text block. */\n match: string | RegExp;\n replace: string;\n}\n\nexport interface OriginUrlRewriteConfig {\n rules: OriginUrlRewriteRule[];\n}\n\n/** Swap legacy gateway/staging host fragments before parse, fetch, or asset discovery. */\nexport function rewriteOriginUrlsInText(text: string, config: OriginUrlRewriteConfig): string {\n if (!text || config.rules.length === 0) return text;\n\n let result = text;\n for (const rule of config.rules) {\n if (typeof rule.match === \"string\") {\n if (!rule.match) continue;\n result = result.split(rule.match).join(rule.replace);\n continue;\n }\n result = result.replace(rule.match, rule.replace);\n }\n return result;\n}\n\n/** Build a rule that rewrites API-gateway `/prod/wp-content/` paths to a public origin. */\nexport function createWpContentGatewayRewrite(gatewayBase: string, publicOrigin: string): OriginUrlRewriteConfig {\n const normalizedGateway = gatewayBase.replace(/\\/$/, \"\");\n const normalizedPublic = publicOrigin.replace(/\\/$/, \"\");\n return {\n rules: [\n {\n match: `${normalizedGateway}/wp-content/`,\n replace: `${normalizedPublic}/wp-content/`,\n },\n ],\n };\n}\n\n// --- Content asset URL discovery & normalization ---\n\nconst IMAGE_EXTENSIONS = \"jpe?g|png|gif|webp|avif|svg\";\n/** Image file extension in a path or URL (allows trailing `?query` / `#hash`). */\nconst IMAGE_EXTENSION_PATTERN = new RegExp(String.raw`\\.(?:${IMAGE_EXTENSIONS})\\b`, \"i\");\n\n/** Captured value must contain an image extension — skips `url=\"…/about\"`, `<iframe src=\"…youtube…\">`, etc. */\nconst QUOTED_IMAGE_PATH = String.raw`[^\"']+\\.(?:${IMAGE_EXTENSIONS})(?:\\?[^\"'#]*)?(?:#.*)?`;\n\nconst SHORTCODE_IMAGE_PARAM_PATTERN = new RegExp(\n String.raw`\\b(?:image|bg_image|background_image|url)\\s*=\\s*[\"'](${QUOTED_IMAGE_PATH})[\"']`,\n \"gi\",\n);\n\n/** Bare `src=\"…jpg\"` outside `<img>` (shortcode fragments); `<img src>` handled by cheerio. */\nconst BARE_SRC_PARAM_PATTERN = new RegExp(\n String.raw`\\bsrc\\s*=\\s*[\"'](${QUOTED_IMAGE_PATH})[\"']`,\n \"gi\",\n);\n\nconst DATA_BG_IMAGE_PATTERN = /\\bdata-bg-image\\s*=\\s*[\"']([^\"']+)[\"']/gi;\n\n/** Inline CSS `background` / `background-image: url(…)` (quoted or bare). */\nconst BACKGROUND_IMAGE_URL_PATTERN =\n /background(?:-image)?\\s*:[^;]*?url\\s*\\(\\s*(['\"]?)([^'\")]+)\\1\\s*\\)/gi;\n\nconst HERO_URL_PARAM_PATTERN = new RegExp(\n String.raw`\\b(?:bg_image|background_image)\\s*=\\s*[\"'](${QUOTED_IMAGE_PATH})[\"']`,\n \"gi\",\n);\n\nconst INLINE_IMAGE_PARAM_PATTERN = new RegExp(\n String.raw`\\bimage\\s*=\\s*[\"'](${QUOTED_IMAGE_PATH})[\"']`,\n \"gi\",\n);\n\nconst IMG_TAG_SRC_PATTERN = /<img\\b[^>]*\\bsrc\\s*=\\s*[\"']([^\"']+)[\"']/gi;\n\nconst DATA_WP_ATTACHMENT_ID_PATTERN = /\\bdata-wp-attachment-id\\s*=\\s*[\"'](\\d+)[\"']/gi;\n\n/** Builder / core gallery shortcodes with explicit `ids=` lists (pre- or post-flatten). */\nconst SHORTCODE_GALLERY_IDS_PATTERN =\n /\\[(?:gallery|oshine_gallery|vc_gallery|nggallery)\\b[^\\]]*\\bids\\s*=\\s*[\"']([^\"']+)[\"']/gi;\n\nexport interface ContentAssetDiscovery {\n /** Network-resolvable image paths (`<img>`, backgrounds, shortcode `image=` attrs, …). */\n urls: string[];\n /**\n * WordPress attachment post ids referenced in content without an inline URL in this\n * file context (`data-wp-attachment-id`, `[gallery ids=…]`, `[oshine_gallery ids=…]`, …).\n */\n unresolvedAttachmentIds: string[];\n}\n\ninterface FeaturedAssetCandidate {\n url: string;\n index: number;\n tier: 0 | 1;\n}\n\nfunction ingestLikelyImageUrl(urls: Set<string>, raw: string | undefined): void {\n const normalized = normalizeAssetUrl(raw ?? \"\");\n if (normalized && isLikelyImageUrl(normalized)) {\n urls.add(normalized);\n }\n}\n\nfunction extractImgTagSrcs(content: string): string[] {\n if (!content.trim()) return [];\n const $ = cheerio.load(content, { xml: false });\n const srcs: string[] = [];\n $(\"img[src]\").each((_, el) => {\n const src = $(el).attr(\"src\")?.trim();\n if (src) srcs.push(src);\n });\n return srcs;\n}\n\nfunction hasImageExtension(value: string): boolean {\n const withoutHash = value.split(\"#\", 1)[0] ?? value;\n const withoutQuery = withoutHash.split(\"?\", 1)[0] ?? withoutHash;\n return IMAGE_EXTENSION_PATTERN.test(withoutQuery);\n}\n\nfunction extractDataBgImageUrls(content: string): string[] {\n const urls: string[] = [];\n for (const match of content.matchAll(DATA_BG_IMAGE_PATTERN)) {\n const raw = match[1]?.trim();\n if (raw) urls.push(raw);\n }\n return urls;\n}\n\nfunction extractCssBackgroundImageUrls(content: string): string[] {\n const urls: string[] = [];\n for (const match of content.matchAll(BACKGROUND_IMAGE_URL_PATTERN)) {\n const raw = match[2]?.trim();\n if (raw) urls.push(raw);\n }\n return urls;\n}\n\n/** All `<img src>` values (including those not ingested as vault assets). */\nexport function discoverRawImgSrcs(content: string): string[] {\n return extractImgTagSrcs(content).filter((src) => !src.startsWith(\"data:\"));\n}\n\n/** Normalize protocol-relative and trim; skip data URIs. */\nexport function normalizeAssetUrl(raw: string): string | undefined {\n const trimmed = raw.trim();\n if (!trimmed || trimmed.startsWith(\"data:\")) return undefined;\n if (trimmed.startsWith(\"//\")) return `https:${trimmed}`;\n return trimmed;\n}\n\n/** Heuristic: URL likely points at a raster/vector image asset, not a page link. */\nexport function isLikelyImageUrl(url: string): boolean {\n if (!url || url.startsWith(\"data:\")) return false;\n\n if (url.startsWith(\"/\")) {\n return hasImageExtension(url);\n }\n\n if (!/^https?:\\/\\//i.test(url)) return false;\n\n try {\n const { pathname } = new URL(url);\n if (hasImageExtension(pathname)) return true;\n } catch {\n // fall through — malformed absolute URL\n }\n\n return hasImageExtension(url);\n}\n\nfunction pushFeaturedCandidate(\n candidates: FeaturedAssetCandidate[],\n raw: string | undefined,\n index: number,\n tier: 0 | 1,\n): void {\n const normalized = normalizeAssetUrl(raw ?? \"\");\n if (!normalized || !isLikelyImageUrl(normalized)) return;\n candidates.push({ url: normalized, index, tier });\n}\n\nfunction collectFeaturedAssetCandidates(content: string): FeaturedAssetCandidate[] {\n const candidates: FeaturedAssetCandidate[] = [];\n\n for (const match of content.matchAll(DATA_BG_IMAGE_PATTERN)) {\n pushFeaturedCandidate(candidates, match[1], match.index ?? 0, 0);\n }\n for (const match of content.matchAll(BACKGROUND_IMAGE_URL_PATTERN)) {\n pushFeaturedCandidate(candidates, match[2], match.index ?? 0, 0);\n }\n for (const match of content.matchAll(HERO_URL_PARAM_PATTERN)) {\n pushFeaturedCandidate(candidates, match[1], match.index ?? 0, 0);\n }\n for (const match of content.matchAll(IMG_TAG_SRC_PATTERN)) {\n pushFeaturedCandidate(candidates, match[1], match.index ?? 0, 1);\n }\n for (const match of content.matchAll(INLINE_IMAGE_PARAM_PATTERN)) {\n pushFeaturedCandidate(candidates, match[1], match.index ?? 0, 1);\n }\n\n return candidates;\n}\n\n/**\n * Ordered featured-image candidates when `_thumbnail_id` is missing — heroes\n * (`data-bg-image`, CSS backgrounds, `bg_image=`) before inline assets; within\n * each tier, first in document order wins. Filename tokens (`_w`, `_2048`, …)\n * are not interpreted as quality signals.\n */\nexport function discoverFeaturedAssetCandidateUrls(content: string): string[] {\n if (!content.trim()) return [];\n\n const ranked = [...collectFeaturedAssetCandidates(content)].sort((left, right) => {\n if (left.tier !== right.tier) return left.tier - right.tier;\n return left.index - right.index;\n });\n\n const urls: string[] = [];\n const seen = new Set<string>();\n for (const candidate of ranked) {\n if (seen.has(candidate.url)) continue;\n seen.add(candidate.url);\n urls.push(candidate.url);\n }\n return urls;\n}\n\n/** Best featured-image URL from post/page HTML when attachment id is unavailable. */\nexport function resolveFeaturedContentAssetUrl(content: string): string | undefined {\n return discoverFeaturedAssetCandidateUrls(content)[0];\n}\n\nfunction parseAttachmentIdList(raw: string | undefined): string[] {\n if (!raw?.trim()) return [];\n return raw\n .split(\",\")\n .map((part) => part.trim())\n .filter((part) => /^\\d+$/.test(part));\n}\n\nfunction extractAttachmentIdsFromContent(content: string): string[] {\n const ids = new Set<string>();\n\n for (const match of content.matchAll(DATA_WP_ATTACHMENT_ID_PATTERN)) {\n const id = match[1]?.trim();\n if (id) ids.add(id);\n }\n\n for (const match of content.matchAll(SHORTCODE_GALLERY_IDS_PATTERN)) {\n for (const id of parseAttachmentIdList(match[1])) {\n ids.add(id);\n }\n }\n\n return [...ids];\n}\n\n/**\n * Generic content-discovery pass: collect resolvable image URLs and attachment ids\n * that still need an index / REST / crawl resolution step.\n */\nexport function discoverContentAssets(content: string): ContentAssetDiscovery {\n if (!content.trim()) {\n return { urls: [], unresolvedAttachmentIds: [] };\n }\n\n const urls = new Set<string>();\n\n for (const raw of extractImgTagSrcs(content)) {\n if (isMigrationMediaRef(raw)) {\n const sourceId = parseMigrationMediaRef(raw);\n if (sourceId?.startsWith(\"url:\")) {\n ingestLikelyImageUrl(urls, sourceId.slice(\"url:\".length));\n }\n continue;\n }\n ingestLikelyImageUrl(urls, raw);\n }\n\n for (const match of content.matchAll(SHORTCODE_IMAGE_PARAM_PATTERN)) {\n ingestLikelyImageUrl(urls, match[1]);\n }\n\n for (const match of content.matchAll(BARE_SRC_PARAM_PATTERN)) {\n ingestLikelyImageUrl(urls, match[1]);\n }\n\n for (const raw of extractDataBgImageUrls(content)) {\n ingestLikelyImageUrl(urls, raw);\n }\n\n for (const raw of extractCssBackgroundImageUrls(content)) {\n ingestLikelyImageUrl(urls, raw);\n }\n\n return {\n urls: [...urls],\n unresolvedAttachmentIds: extractAttachmentIdsFromContent(content),\n };\n}\n\n/**\n * Generic content-discovery pass: collect image URLs from HTML `<img>` tags,\n * section hero markers (`data-bg-image`), inline CSS backgrounds, and common\n * shortcode/builder attributes (`src=`, `image=`, `bg_image=`, …) without\n * parsing builder-specific structure (Tatsu, Elementor, etc.).\n */\nexport function discoverContentAssetUrls(content: string): string[] {\n return discoverContentAssets(content).urls;\n}\n\n/** @deprecated Use discoverContentAssetUrls — kept for call-site clarity during transition. */\nexport function extractInlineImageSrcs(content: string): string[] {\n return discoverContentAssetUrls(content);\n}\n\n// --- Migration media refs (`artinstack-migration://asset/{sourceId}`) ---\n\n/** Pseudo-URL scheme for portable migration asset pointers (not WordPress shortcodes). */\nexport const MIGRATION_MEDIA_REF_SCHEME = \"artinstack-migration://asset/\";\n\n/** Build `artinstack-migration://asset/{sourceId}` (percent-encodes the normalizer source id). */\nexport function formatMigrationMediaRef(sourceAssetId: string): string {\n return `${MIGRATION_MEDIA_REF_SCHEME}${encodeURIComponent(sourceAssetId)}`;\n}\n\nexport function isMigrationMediaRef(value: string): boolean {\n return value.trim().startsWith(MIGRATION_MEDIA_REF_SCHEME);\n}\n\n/** Parse a migration media ref back to the normalizer `sourceId`, or `undefined` if not a ref. */\nexport function parseMigrationMediaRef(value: string): string | undefined {\n const trimmed = value.trim();\n if (!trimmed.startsWith(MIGRATION_MEDIA_REF_SCHEME)) return undefined;\n const encoded = trimmed.slice(MIGRATION_MEDIA_REF_SCHEME.length);\n if (!encoded) return undefined;\n try {\n return decodeURIComponent(encoded);\n } catch {\n return undefined;\n }\n}\n\n/** Default `replaceWith` for `rewriteInlineImages` / `stampMigrationMediaRefs` (OSS-14). */\nexport function createMigrationMediaRefReplaceWith(): (\n ref: { sourceAssetId?: string },\n) => string {\n return (ref) => {\n if (!ref.sourceAssetId) return \"\";\n return formatMigrationMediaRef(ref.sourceAssetId);\n };\n}\n\n// --- Canonical inline keys & lookup index (OSS-15) ---\n\nexport interface CanonicalInlineAssetUrl {\n /** Canonical absolute URL stored on `NormalizedAsset.sourceUrl`. */\n canonicalUrl: string;\n /** Normalizer id: `url:{canonicalUrl}`. */\n sourceId: string;\n}\n\n/**\n * OSS-15: one canonical key for inline `url:` assets — apply origin rewrite then\n * `normalizeAssetUrl` so discovery, refs, and vault entities share the same id.\n */\nexport function canonicalizeInlineAssetUrl(\n raw: string,\n originUrlRewrite?: OriginUrlRewriteConfig,\n): CanonicalInlineAssetUrl | undefined {\n let value = raw.trim();\n if (!value || value.startsWith(\"data:\")) return undefined;\n\n if (originUrlRewrite) {\n value = rewriteOriginUrlsInText(value, originUrlRewrite);\n }\n\n const canonicalUrl = normalizeAssetUrl(value);\n if (!canonicalUrl) return undefined;\n\n return {\n canonicalUrl,\n sourceId: `url:${canonicalUrl}`,\n };\n}\n\nfunction urlPathname(url: string): string | undefined {\n try {\n return new URL(url, \"http://migration.local\").pathname;\n } catch {\n return undefined;\n }\n}\n\n/**\n * Map normalized upload URLs (and pathnames) → normalizer `sourceId`.\n * Attachment ids are WXR `post_id` strings; inline discoveries use `url:{src}`.\n */\nexport function buildMigrationMediaUrlIndex(\n entries: Iterable<{ sourceUrl: string; sourceId: string }>,\n): Map<string, string> {\n const index = new Map<string, string>();\n\n for (const entry of entries) {\n index.set(entry.sourceUrl, entry.sourceId);\n const normalized = normalizeAssetUrl(entry.sourceUrl);\n if (normalized) index.set(normalized, entry.sourceId);\n const pathname = urlPathname(entry.sourceUrl);\n if (pathname) index.set(pathname, entry.sourceId);\n }\n\n return index;\n}\n\nexport function resolveMigrationMediaSourceId(\n src: string,\n urlIndex: Map<string, string>,\n originUrlRewrite?: OriginUrlRewriteConfig,\n): string | undefined {\n const canonical = canonicalizeInlineAssetUrl(src, originUrlRewrite);\n const normalized = canonical?.canonicalUrl ?? normalizeAssetUrl(src);\n if (!normalized) return undefined;\n\n return (\n urlIndex.get(normalized) ??\n urlIndex.get(src) ??\n (urlPathname(normalized) ? urlIndex.get(urlPathname(normalized)!) : undefined)\n );\n}\n\n/** Merge attachment + inline asset rows into one stamp/lookup index (OSS-15). */\nexport function buildContentMediaUrlIndex(\n entries: Iterable<{ sourceUrl: string; sourceId: string }>,\n originUrlRewrite?: OriginUrlRewriteConfig,\n): Map<string, string> {\n const canonicalEntries: { sourceUrl: string; sourceId: string }[] = [];\n for (const entry of entries) {\n const canonical = canonicalizeInlineAssetUrl(entry.sourceUrl, originUrlRewrite);\n canonicalEntries.push({\n sourceUrl: canonical?.canonicalUrl ?? entry.sourceUrl,\n sourceId: entry.sourceId,\n });\n }\n return buildMigrationMediaUrlIndex(canonicalEntries);\n}\n"],"mappings":";AAAA,YAAY,aAAa;AAelB,SAAS,wBAAwB,MAAc,QAAwC;AAC5F,MAAI,CAAC,QAAQ,OAAO,MAAM,WAAW,EAAG,QAAO;AAE/C,MAAI,SAAS;AACb,aAAW,QAAQ,OAAO,OAAO;AAC/B,QAAI,OAAO,KAAK,UAAU,UAAU;AAClC,UAAI,CAAC,KAAK,MAAO;AACjB,eAAS,OAAO,MAAM,KAAK,KAAK,EAAE,KAAK,KAAK,OAAO;AACnD;AAAA,IACF;AACA,aAAS,OAAO,QAAQ,KAAK,OAAO,KAAK,OAAO;AAAA,EAClD;AACA,SAAO;AACT;AAGO,SAAS,8BAA8B,aAAqB,cAA8C;AAC/G,QAAM,oBAAoB,YAAY,QAAQ,OAAO,EAAE;AACvD,QAAM,mBAAmB,aAAa,QAAQ,OAAO,EAAE;AACvD,SAAO;AAAA,IACL,OAAO;AAAA,MACL;AAAA,QACE,OAAO,GAAG,iBAAiB;AAAA,QAC3B,SAAS,GAAG,gBAAgB;AAAA,MAC9B;AAAA,IACF;AAAA,EACF;AACF;AAIA,IAAM,mBAAmB;AAEzB,IAAM,0BAA0B,IAAI,OAAO,OAAO,WAAW,gBAAgB,OAAO,GAAG;AAGvF,IAAM,oBAAoB,OAAO,iBAAiB,gBAAgB;AAElE,IAAM,gCAAgC,IAAI;AAAA,EACxC,OAAO,2DAA2D,iBAAiB;AAAA,EACnF;AACF;AAGA,IAAM,yBAAyB,IAAI;AAAA,EACjC,OAAO,uBAAuB,iBAAiB;AAAA,EAC/C;AACF;AAEA,IAAM,wBAAwB;AAG9B,IAAM,+BACJ;AAEF,IAAM,yBAAyB,IAAI;AAAA,EACjC,OAAO,iDAAiD,iBAAiB;AAAA,EACzE;AACF;AAEA,IAAM,6BAA6B,IAAI;AAAA,EACrC,OAAO,yBAAyB,iBAAiB;AAAA,EACjD;AACF;AAEA,IAAM,sBAAsB;AAE5B,IAAM,gCAAgC;AAGtC,IAAM,gCACJ;AAkBF,SAAS,qBAAqB,MAAmB,KAA+B;AAC9E,QAAM,aAAa,kBAAkB,OAAO,EAAE;AAC9C,MAAI,cAAc,iBAAiB,UAAU,GAAG;AAC9C,SAAK,IAAI,UAAU;AAAA,EACrB;AACF;AAEA,SAAS,kBAAkB,SAA2B;AACpD,MAAI,CAAC,QAAQ,KAAK,EAAG,QAAO,CAAC;AAC7B,QAAM,IAAY,aAAK,SAAS,EAAE,KAAK,MAAM,CAAC;AAC9C,QAAM,OAAiB,CAAC;AACxB,IAAE,UAAU,EAAE,KAAK,CAAC,GAAG,OAAO;AAC5B,UAAM,MAAM,EAAE,EAAE,EAAE,KAAK,KAAK,GAAG,KAAK;AACpC,QAAI,IAAK,MAAK,KAAK,GAAG;AAAA,EACxB,CAAC;AACD,SAAO;AACT;AAEA,SAAS,kBAAkB,OAAwB;AACjD,QAAM,cAAc,MAAM,MAAM,KAAK,CAAC,EAAE,CAAC,KAAK;AAC9C,QAAM,eAAe,YAAY,MAAM,KAAK,CAAC,EAAE,CAAC,KAAK;AACrD,SAAO,wBAAwB,KAAK,YAAY;AAClD;AAEA,SAAS,uBAAuB,SAA2B;AACzD,QAAM,OAAiB,CAAC;AACxB,aAAW,SAAS,QAAQ,SAAS,qBAAqB,GAAG;AAC3D,UAAM,MAAM,MAAM,CAAC,GAAG,KAAK;AAC3B,QAAI,IAAK,MAAK,KAAK,GAAG;AAAA,EACxB;AACA,SAAO;AACT;AAEA,SAAS,8BAA8B,SAA2B;AAChE,QAAM,OAAiB,CAAC;AACxB,aAAW,SAAS,QAAQ,SAAS,4BAA4B,GAAG;AAClE,UAAM,MAAM,MAAM,CAAC,GAAG,KAAK;AAC3B,QAAI,IAAK,MAAK,KAAK,GAAG;AAAA,EACxB;AACA,SAAO;AACT;AAGO,SAAS,mBAAmB,SAA2B;AAC5D,SAAO,kBAAkB,OAAO,EAAE,OAAO,CAAC,QAAQ,CAAC,IAAI,WAAW,OAAO,CAAC;AAC5E;AAGO,SAAS,kBAAkB,KAAiC;AACjE,QAAM,UAAU,IAAI,KAAK;AACzB,MAAI,CAAC,WAAW,QAAQ,WAAW,OAAO,EAAG,QAAO;AACpD,MAAI,QAAQ,WAAW,IAAI,EAAG,QAAO,SAAS,OAAO;AACrD,SAAO;AACT;AAGO,SAAS,iBAAiB,KAAsB;AACrD,MAAI,CAAC,OAAO,IAAI,WAAW,OAAO,EAAG,QAAO;AAE5C,MAAI,IAAI,WAAW,GAAG,GAAG;AACvB,WAAO,kBAAkB,GAAG;AAAA,EAC9B;AAEA,MAAI,CAAC,gBAAgB,KAAK,GAAG,EAAG,QAAO;AAEvC,MAAI;AACF,UAAM,EAAE,SAAS,IAAI,IAAI,IAAI,GAAG;AAChC,QAAI,kBAAkB,QAAQ,EAAG,QAAO;AAAA,EAC1C,QAAQ;AAAA,EAER;AAEA,SAAO,kBAAkB,GAAG;AAC9B;AAEA,SAAS,sBACP,YACA,KACA,OACA,MACM;AACN,QAAM,aAAa,kBAAkB,OAAO,EAAE;AAC9C,MAAI,CAAC,cAAc,CAAC,iBAAiB,UAAU,EAAG;AAClD,aAAW,KAAK,EAAE,KAAK,YAAY,OAAO,KAAK,CAAC;AAClD;AAEA,SAAS,+BAA+B,SAA2C;AACjF,QAAM,aAAuC,CAAC;AAE9C,aAAW,SAAS,QAAQ,SAAS,qBAAqB,GAAG;AAC3D,0BAAsB,YAAY,MAAM,CAAC,GAAG,MAAM,SAAS,GAAG,CAAC;AAAA,EACjE;AACA,aAAW,SAAS,QAAQ,SAAS,4BAA4B,GAAG;AAClE,0BAAsB,YAAY,MAAM,CAAC,GAAG,MAAM,SAAS,GAAG,CAAC;AAAA,EACjE;AACA,aAAW,SAAS,QAAQ,SAAS,sBAAsB,GAAG;AAC5D,0BAAsB,YAAY,MAAM,CAAC,GAAG,MAAM,SAAS,GAAG,CAAC;AAAA,EACjE;AACA,aAAW,SAAS,QAAQ,SAAS,mBAAmB,GAAG;AACzD,0BAAsB,YAAY,MAAM,CAAC,GAAG,MAAM,SAAS,GAAG,CAAC;AAAA,EACjE;AACA,aAAW,SAAS,QAAQ,SAAS,0BAA0B,GAAG;AAChE,0BAAsB,YAAY,MAAM,CAAC,GAAG,MAAM,SAAS,GAAG,CAAC;AAAA,EACjE;AAEA,SAAO;AACT;AAQO,SAAS,mCAAmC,SAA2B;AAC5E,MAAI,CAAC,QAAQ,KAAK,EAAG,QAAO,CAAC;AAE7B,QAAM,SAAS,CAAC,GAAG,+BAA+B,OAAO,CAAC,EAAE,KAAK,CAAC,MAAM,UAAU;AAChF,QAAI,KAAK,SAAS,MAAM,KAAM,QAAO,KAAK,OAAO,MAAM;AACvD,WAAO,KAAK,QAAQ,MAAM;AAAA,EAC5B,CAAC;AAED,QAAM,OAAiB,CAAC;AACxB,QAAM,OAAO,oBAAI,IAAY;AAC7B,aAAW,aAAa,QAAQ;AAC9B,QAAI,KAAK,IAAI,UAAU,GAAG,EAAG;AAC7B,SAAK,IAAI,UAAU,GAAG;AACtB,SAAK,KAAK,UAAU,GAAG;AAAA,EACzB;AACA,SAAO;AACT;AAGO,SAAS,+BAA+B,SAAqC;AAClF,SAAO,mCAAmC,OAAO,EAAE,CAAC;AACtD;AAEA,SAAS,sBAAsB,KAAmC;AAChE,MAAI,CAAC,KAAK,KAAK,EAAG,QAAO,CAAC;AAC1B,SAAO,IACJ,MAAM,GAAG,EACT,IAAI,CAAC,SAAS,KAAK,KAAK,CAAC,EACzB,OAAO,CAAC,SAAS,QAAQ,KAAK,IAAI,CAAC;AACxC;AAEA,SAAS,gCAAgC,SAA2B;AAClE,QAAM,MAAM,oBAAI,IAAY;AAE5B,aAAW,SAAS,QAAQ,SAAS,6BAA6B,GAAG;AACnE,UAAM,KAAK,MAAM,CAAC,GAAG,KAAK;AAC1B,QAAI,GAAI,KAAI,IAAI,EAAE;AAAA,EACpB;AAEA,aAAW,SAAS,QAAQ,SAAS,6BAA6B,GAAG;AACnE,eAAW,MAAM,sBAAsB,MAAM,CAAC,CAAC,GAAG;AAChD,UAAI,IAAI,EAAE;AAAA,IACZ;AAAA,EACF;AAEA,SAAO,CAAC,GAAG,GAAG;AAChB;AAMO,SAAS,sBAAsB,SAAwC;AAC5E,MAAI,CAAC,QAAQ,KAAK,GAAG;AACnB,WAAO,EAAE,MAAM,CAAC,GAAG,yBAAyB,CAAC,EAAE;AAAA,EACjD;AAEA,QAAM,OAAO,oBAAI,IAAY;AAE7B,aAAW,OAAO,kBAAkB,OAAO,GAAG;AAC5C,QAAI,oBAAoB,GAAG,GAAG;AAC5B,YAAM,WAAW,uBAAuB,GAAG;AAC3C,UAAI,UAAU,WAAW,MAAM,GAAG;AAChC,6BAAqB,MAAM,SAAS,MAAM,OAAO,MAAM,CAAC;AAAA,MAC1D;AACA;AAAA,IACF;AACA,yBAAqB,MAAM,GAAG;AAAA,EAChC;AAEA,aAAW,SAAS,QAAQ,SAAS,6BAA6B,GAAG;AACnE,yBAAqB,MAAM,MAAM,CAAC,CAAC;AAAA,EACrC;AAEA,aAAW,SAAS,QAAQ,SAAS,sBAAsB,GAAG;AAC5D,yBAAqB,MAAM,MAAM,CAAC,CAAC;AAAA,EACrC;AAEA,aAAW,OAAO,uBAAuB,OAAO,GAAG;AACjD,yBAAqB,MAAM,GAAG;AAAA,EAChC;AAEA,aAAW,OAAO,8BAA8B,OAAO,GAAG;AACxD,yBAAqB,MAAM,GAAG;AAAA,EAChC;AAEA,SAAO;AAAA,IACL,MAAM,CAAC,GAAG,IAAI;AAAA,IACd,yBAAyB,gCAAgC,OAAO;AAAA,EAClE;AACF;AAQO,SAAS,yBAAyB,SAA2B;AAClE,SAAO,sBAAsB,OAAO,EAAE;AACxC;AAGO,SAAS,uBAAuB,SAA2B;AAChE,SAAO,yBAAyB,OAAO;AACzC;AAKO,IAAM,6BAA6B;AAGnC,SAAS,wBAAwB,eAA+B;AACrE,SAAO,GAAG,0BAA0B,GAAG,mBAAmB,aAAa,CAAC;AAC1E;AAEO,SAAS,oBAAoB,OAAwB;AAC1D,SAAO,MAAM,KAAK,EAAE,WAAW,0BAA0B;AAC3D;AAGO,SAAS,uBAAuB,OAAmC;AACxE,QAAM,UAAU,MAAM,KAAK;AAC3B,MAAI,CAAC,QAAQ,WAAW,0BAA0B,EAAG,QAAO;AAC5D,QAAM,UAAU,QAAQ,MAAM,2BAA2B,MAAM;AAC/D,MAAI,CAAC,QAAS,QAAO;AACrB,MAAI;AACF,WAAO,mBAAmB,OAAO;AAAA,EACnC,QAAQ;AACN,WAAO;AAAA,EACT;AACF;AAGO,SAAS,qCAEJ;AACV,SAAO,CAAC,QAAQ;AACd,QAAI,CAAC,IAAI,cAAe,QAAO;AAC/B,WAAO,wBAAwB,IAAI,aAAa;AAAA,EAClD;AACF;AAeO,SAAS,2BACd,KACA,kBACqC;AACrC,MAAI,QAAQ,IAAI,KAAK;AACrB,MAAI,CAAC,SAAS,MAAM,WAAW,OAAO,EAAG,QAAO;AAEhD,MAAI,kBAAkB;AACpB,YAAQ,wBAAwB,OAAO,gBAAgB;AAAA,EACzD;AAEA,QAAM,eAAe,kBAAkB,KAAK;AAC5C,MAAI,CAAC,aAAc,QAAO;AAE1B,SAAO;AAAA,IACL;AAAA,IACA,UAAU,OAAO,YAAY;AAAA,EAC/B;AACF;AAEA,SAAS,YAAY,KAAiC;AACpD,MAAI;AACF,WAAO,IAAI,IAAI,KAAK,wBAAwB,EAAE;AAAA,EAChD,QAAQ;AACN,WAAO;AAAA,EACT;AACF;AAMO,SAAS,4BACd,SACqB;AACrB,QAAM,QAAQ,oBAAI,IAAoB;AAEtC,aAAW,SAAS,SAAS;AAC3B,UAAM,IAAI,MAAM,WAAW,MAAM,QAAQ;AACzC,UAAM,aAAa,kBAAkB,MAAM,SAAS;AACpD,QAAI,WAAY,OAAM,IAAI,YAAY,MAAM,QAAQ;AACpD,UAAM,WAAW,YAAY,MAAM,SAAS;AAC5C,QAAI,SAAU,OAAM,IAAI,UAAU,MAAM,QAAQ;AAAA,EAClD;AAEA,SAAO;AACT;AAEO,SAAS,8BACd,KACA,UACA,kBACoB;AACpB,QAAM,YAAY,2BAA2B,KAAK,gBAAgB;AAClE,QAAM,aAAa,WAAW,gBAAgB,kBAAkB,GAAG;AACnE,MAAI,CAAC,WAAY,QAAO;AAExB,SACE,SAAS,IAAI,UAAU,KACvB,SAAS,IAAI,GAAG,MACf,YAAY,UAAU,IAAI,SAAS,IAAI,YAAY,UAAU,CAAE,IAAI;AAExE;AAGO,SAAS,0BACd,SACA,kBACqB;AACrB,QAAM,mBAA8D,CAAC;AACrE,aAAW,SAAS,SAAS;AAC3B,UAAM,YAAY,2BAA2B,MAAM,WAAW,gBAAgB;AAC9E,qBAAiB,KAAK;AAAA,MACpB,WAAW,WAAW,gBAAgB,MAAM;AAAA,MAC5C,UAAU,MAAM;AAAA,IAClB,CAAC;AAAA,EACH;AACA,SAAO,4BAA4B,gBAAgB;AACrD;","names":[]}
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import {
|
|
2
2
|
isMigrationMediaRef,
|
|
3
3
|
parseMigrationMediaRef
|
|
4
|
-
} from "./chunk-
|
|
4
|
+
} from "./chunk-S4GMDRGX.js";
|
|
5
5
|
|
|
6
6
|
// src/transformers/html-to-grapes/index.ts
|
|
7
7
|
import * as cheerio from "cheerio";
|
|
@@ -961,4 +961,4 @@ export {
|
|
|
961
961
|
validateTiptapDoc,
|
|
962
962
|
expandMigrationMediaRefs
|
|
963
963
|
};
|
|
964
|
-
//# sourceMappingURL=chunk-
|
|
964
|
+
//# sourceMappingURL=chunk-S4SUJT2D.js.map
|
|
@@ -6,10 +6,10 @@ import {
|
|
|
6
6
|
enumerateSquarespaceEntities,
|
|
7
7
|
summarizeSquarespaceExport,
|
|
8
8
|
validateSquarespaceExportFile
|
|
9
|
-
} from "./chunk-
|
|
9
|
+
} from "./chunk-3A2PA4P3.js";
|
|
10
10
|
import {
|
|
11
11
|
stampMigrationMediaRefs
|
|
12
|
-
} from "./chunk-
|
|
12
|
+
} from "./chunk-BONZ3U3I.js";
|
|
13
13
|
import {
|
|
14
14
|
linkToPath,
|
|
15
15
|
sanitizeSlug
|
|
@@ -18,11 +18,12 @@ import {
|
|
|
18
18
|
buildContentMediaUrlIndex,
|
|
19
19
|
canonicalizeInlineAssetUrl,
|
|
20
20
|
discoverContentAssetUrls,
|
|
21
|
+
discoverContentAssets,
|
|
21
22
|
normalizeAssetUrl,
|
|
22
23
|
parseMigrationMediaRef,
|
|
23
24
|
resolveFeaturedContentAssetUrl,
|
|
24
25
|
rewriteOriginUrlsInText
|
|
25
|
-
} from "./chunk-
|
|
26
|
+
} from "./chunk-S4GMDRGX.js";
|
|
26
27
|
|
|
27
28
|
// src/parsers/wordpress/parse-wxr.ts
|
|
28
29
|
import { readFile } from "fs/promises";
|
|
@@ -420,15 +421,33 @@ function flattenContactFormShortcodes(content, widgetRegistry) {
|
|
|
420
421
|
}
|
|
421
422
|
return html;
|
|
422
423
|
}
|
|
424
|
+
function emitInlineGalleryFromIds(idList) {
|
|
425
|
+
const images = idList.map((id) => `<img data-wp-attachment-id="${escapeLayoutAttr(id)}" alt="" />`).join("");
|
|
426
|
+
return `<figure data-wp-inline-gallery>${images}</figure>`;
|
|
427
|
+
}
|
|
428
|
+
function parseGalleryAttachmentIds(params) {
|
|
429
|
+
const ids = extractBareOrQuotedParam(params, "ids");
|
|
430
|
+
const idList = ids?.split(",").map((part) => part.trim()).filter((part) => /^\d+$/.test(part));
|
|
431
|
+
return idList?.length ? idList : void 0;
|
|
432
|
+
}
|
|
433
|
+
function flattenIdGalleryShortcode(content, tag) {
|
|
434
|
+
const escaped = escapeRegExp(tag);
|
|
435
|
+
const pattern = new RegExp(`\\[${escaped}\\b([^\\]]*)\\](?:\\s*\\[\\/${escaped}\\])?`, "gi");
|
|
436
|
+
return content.replace(pattern, (fullMatch, params) => {
|
|
437
|
+
const idList = parseGalleryAttachmentIds(params);
|
|
438
|
+
if (idList?.length) {
|
|
439
|
+
return emitInlineGalleryFromIds(idList);
|
|
440
|
+
}
|
|
441
|
+
return fullMatch;
|
|
442
|
+
});
|
|
443
|
+
}
|
|
423
444
|
function flattenGalleryShortcodes(content, widgetRegistry) {
|
|
424
445
|
const tag = escapeRegExp(widgetRegistry.galleryShortcode);
|
|
425
446
|
const pattern = new RegExp(`\\[${tag}\\b([^\\]]*)\\](?:\\s*\\[\\/${tag}\\])?`, "gi");
|
|
426
447
|
return content.replace(pattern, (_, params) => {
|
|
427
|
-
const
|
|
428
|
-
const idList = ids?.split(",").map((part) => part.trim()).filter((part) => /^\d+$/.test(part));
|
|
448
|
+
const idList = parseGalleryAttachmentIds(params);
|
|
429
449
|
if (idList?.length) {
|
|
430
|
-
|
|
431
|
-
return `<figure data-wp-inline-gallery>${images}</figure>`;
|
|
450
|
+
return emitInlineGalleryFromIds(idList);
|
|
432
451
|
}
|
|
433
452
|
const category = extractBareOrQuotedParam(params, "category") ?? extractBareOrQuotedParam(params, "type");
|
|
434
453
|
return emitWidgetStub("portfolio", {
|
|
@@ -437,6 +456,13 @@ function flattenGalleryShortcodes(content, widgetRegistry) {
|
|
|
437
456
|
});
|
|
438
457
|
});
|
|
439
458
|
}
|
|
459
|
+
function flattenIdBasedGalleryShortcodes(content, widgetRegistry) {
|
|
460
|
+
let html = content;
|
|
461
|
+
for (const tag of widgetRegistry.idGalleryShortcodes) {
|
|
462
|
+
html = flattenIdGalleryShortcode(html, tag);
|
|
463
|
+
}
|
|
464
|
+
return html;
|
|
465
|
+
}
|
|
440
466
|
function flattenPortfolioShortcodes(content, widgetRegistry) {
|
|
441
467
|
const tag = escapeRegExp(widgetRegistry.portfolioShortcode);
|
|
442
468
|
const pattern = new RegExp(`\\[${tag}\\b([^\\]]*)\\](?:\\s*\\[\\/${tag}\\])?`, "gi");
|
|
@@ -471,6 +497,7 @@ function flattenVideoShortcodes(content, widgetRegistry) {
|
|
|
471
497
|
function flattenWordPressWidgets(content, widgetRegistry = WORDPRESS_WIDGET_REGISTRY) {
|
|
472
498
|
let html = content;
|
|
473
499
|
html = flattenGalleryShortcodes(html, widgetRegistry);
|
|
500
|
+
html = flattenIdBasedGalleryShortcodes(html, widgetRegistry);
|
|
474
501
|
html = flattenPortfolioShortcodes(html, widgetRegistry);
|
|
475
502
|
html = flattenMapShortcodes(html, widgetRegistry);
|
|
476
503
|
html = flattenContactFormShortcodes(html, widgetRegistry);
|
|
@@ -520,6 +547,7 @@ function flattenWordPressBuilders(content, options = {}) {
|
|
|
520
547
|
|
|
521
548
|
// src/parsers/wordpress/parse-wxr.ts
|
|
522
549
|
var PLATFORM = "wordpress";
|
|
550
|
+
var DEFAULT_WORDPRESS_PORTFOLIO_CPT_SLUGS = ["portfolio"];
|
|
523
551
|
var WOOCOMMERCE_STUB_PAGE_SLUGS = /* @__PURE__ */ new Set(["cart", "checkout", "my-account"]);
|
|
524
552
|
var WOOCOMMERCE_STUB_SHORTCODE = /^\[woocommerce_(?:cart|checkout|my_account)\]\s*$/i;
|
|
525
553
|
function isWooCommerceStubPage(slug, contentHtml) {
|
|
@@ -559,15 +587,76 @@ function getContentEncoded(item) {
|
|
|
559
587
|
}
|
|
560
588
|
return textValue(item.encoded);
|
|
561
589
|
}
|
|
562
|
-
function sourceMeta(id, link, exportedAt) {
|
|
590
|
+
function sourceMeta(id, link, exportedAt, postType) {
|
|
563
591
|
return {
|
|
564
592
|
platform: PLATFORM,
|
|
565
593
|
id,
|
|
566
594
|
url: link || void 0,
|
|
567
595
|
path: linkToPath(link),
|
|
568
|
-
exportedAt
|
|
596
|
+
exportedAt,
|
|
597
|
+
...postType ? { postType } : {}
|
|
598
|
+
};
|
|
599
|
+
}
|
|
600
|
+
function resolvePortfolioCptSlugs(options) {
|
|
601
|
+
const slugs = options.portfolioCptSlugs ?? DEFAULT_WORDPRESS_PORTFOLIO_CPT_SLUGS;
|
|
602
|
+
return new Set(slugs.map((slug) => slug.toLowerCase()));
|
|
603
|
+
}
|
|
604
|
+
function portfolioCptSourceId(postId) {
|
|
605
|
+
return `portfolio:${postId}`;
|
|
606
|
+
}
|
|
607
|
+
function isPortfolioCptPostType(postType, portfolioCptSlugs) {
|
|
608
|
+
return portfolioCptSlugs.has(postType.toLowerCase());
|
|
609
|
+
}
|
|
610
|
+
function countWxrPortfolioCptItems(items, portfolioCptSlugs = new Set(DEFAULT_WORDPRESS_PORTFOLIO_CPT_SLUGS)) {
|
|
611
|
+
return items.filter((item) => isPortfolioCptPostType(textValue(item.post_type), portfolioCptSlugs)).length;
|
|
612
|
+
}
|
|
613
|
+
function isImportableWxrPostType(postType, portfolioCptSlugs) {
|
|
614
|
+
const normalized = postType.toLowerCase();
|
|
615
|
+
return normalized === "post" || normalized === "page" || normalized === "attachment" || isPortfolioCptPostType(normalized, portfolioCptSlugs);
|
|
616
|
+
}
|
|
617
|
+
function contentForWooStubCheck(item, options) {
|
|
618
|
+
let html = getContentEncoded(item);
|
|
619
|
+
if (options.originUrlRewrite) {
|
|
620
|
+
html = rewriteOriginUrlsInText(html, options.originUrlRewrite);
|
|
621
|
+
}
|
|
622
|
+
if (options.flattenBuilders !== false) {
|
|
623
|
+
html = flattenWordPressBuilders(html).html;
|
|
624
|
+
}
|
|
625
|
+
return html;
|
|
626
|
+
}
|
|
627
|
+
function summarizeWxrImport(items, options) {
|
|
628
|
+
const portfolioCptSlugs = resolvePortfolioCptSlugs(options);
|
|
629
|
+
let importableItemCount = 0;
|
|
630
|
+
let skippedWooCommerceStubPages = 0;
|
|
631
|
+
const skippedPostTypes = {};
|
|
632
|
+
for (const item of items) {
|
|
633
|
+
const postType = textValue(item.post_type) || "unknown";
|
|
634
|
+
const normalizedType = postType.toLowerCase();
|
|
635
|
+
if (isImportableWxrPostType(normalizedType, portfolioCptSlugs)) {
|
|
636
|
+
if (normalizedType === "page" && options.skipWooCommerceStubPages !== false && isWooCommerceStubPage(
|
|
637
|
+
sanitizeSlug(textValue(item.post_name) || textValue(item.title) || textValue(item.post_id)),
|
|
638
|
+
contentForWooStubCheck(item, options)
|
|
639
|
+
)) {
|
|
640
|
+
skippedWooCommerceStubPages++;
|
|
641
|
+
continue;
|
|
642
|
+
}
|
|
643
|
+
importableItemCount++;
|
|
644
|
+
continue;
|
|
645
|
+
}
|
|
646
|
+
skippedPostTypes[normalizedType] = (skippedPostTypes[normalizedType] ?? 0) + 1;
|
|
647
|
+
}
|
|
648
|
+
const skippedUnsupported = Object.values(skippedPostTypes).reduce((sum, count) => sum + count, 0);
|
|
649
|
+
return {
|
|
650
|
+
importableItemCount,
|
|
651
|
+
unsupportedOnly: importableItemCount === 0 && skippedUnsupported > 0,
|
|
652
|
+
skippedPostTypes,
|
|
653
|
+
...skippedWooCommerceStubPages > 0 ? { skippedWooCommerceStubPages } : {}
|
|
569
654
|
};
|
|
570
655
|
}
|
|
656
|
+
async function summarizeWxrImportFromFile(filePath, options = { filePath }) {
|
|
657
|
+
const xml = await readFile(filePath, "utf8");
|
|
658
|
+
return summarizeWxrImport(parseItems(xml), options);
|
|
659
|
+
}
|
|
571
660
|
function getExcerpt(item) {
|
|
572
661
|
const excerpt = item.excerpt;
|
|
573
662
|
if (!excerpt) return "";
|
|
@@ -661,9 +750,10 @@ function collectTaxonomies(items) {
|
|
|
661
750
|
}
|
|
662
751
|
return { categories, tags };
|
|
663
752
|
}
|
|
664
|
-
function collectInlineAssets(html, attachmentIndex, seenUrls, exportedAt, originUrlRewrite) {
|
|
753
|
+
function collectInlineAssets(html, attachmentIndex, seenUrls, seenAttachmentIds, exportedAt, originUrlRewrite) {
|
|
665
754
|
const assets = [];
|
|
666
|
-
|
|
755
|
+
const discovery = discoverContentAssets(html);
|
|
756
|
+
for (const discovered of discovery.urls) {
|
|
667
757
|
const canonical = canonicalizeInlineAssetUrl(discovered, originUrlRewrite);
|
|
668
758
|
if (!canonical) continue;
|
|
669
759
|
if (seenUrls.has(canonical.canonicalUrl)) continue;
|
|
@@ -683,9 +773,22 @@ function collectInlineAssets(html, attachmentIndex, seenUrls, exportedAt, origin
|
|
|
683
773
|
mimeType: guessMime(filename)
|
|
684
774
|
});
|
|
685
775
|
}
|
|
686
|
-
for (const
|
|
776
|
+
for (const attachmentId of discovery.unresolvedAttachmentIds) {
|
|
777
|
+
if (seenAttachmentIds.has(attachmentId)) continue;
|
|
778
|
+
seenAttachmentIds.add(attachmentId);
|
|
779
|
+
const entry = attachmentIndex.get(attachmentId);
|
|
780
|
+
if (!entry) continue;
|
|
687
781
|
if (seenUrls.has(entry.sourceUrl)) continue;
|
|
688
|
-
|
|
782
|
+
seenUrls.add(entry.sourceUrl);
|
|
783
|
+
assets.push({
|
|
784
|
+
type: "asset",
|
|
785
|
+
source: sourceMeta(attachmentId, entry.sourceUrl, exportedAt),
|
|
786
|
+
sourceId: attachmentId,
|
|
787
|
+
sourceUrl: entry.sourceUrl,
|
|
788
|
+
filename: entry.filename,
|
|
789
|
+
mimeType: entry.mimeType ?? guessMime(entry.filename),
|
|
790
|
+
caption: entry.title
|
|
791
|
+
});
|
|
689
792
|
}
|
|
690
793
|
return assets;
|
|
691
794
|
}
|
|
@@ -740,20 +843,25 @@ async function* enumerateWxrEntities(options) {
|
|
|
740
843
|
caption: entry.title
|
|
741
844
|
};
|
|
742
845
|
}
|
|
846
|
+
const portfolioCptSlugs = resolvePortfolioCptSlugs(options);
|
|
743
847
|
for (const item of items) {
|
|
744
848
|
const postType = textValue(item.post_type);
|
|
745
|
-
|
|
849
|
+
const isPost = postType === "post";
|
|
850
|
+
const isPage = postType === "page";
|
|
851
|
+
const isPortfolioCpt = isPortfolioCptPostType(postType, portfolioCptSlugs);
|
|
852
|
+
if (!isPost && !isPage && !isPortfolioCpt) continue;
|
|
746
853
|
const id = textValue(item.post_id);
|
|
747
854
|
const link = maybeRewriteUrl(textValue(item.link), options.originUrlRewrite);
|
|
748
855
|
const slug = sanitizeSlug(textValue(item.post_name) || textValue(item.title) || id);
|
|
749
856
|
let contentHtml = preprocessContent(getContentEncoded(item), options);
|
|
750
|
-
if (
|
|
857
|
+
if (isPage && options.skipWooCommerceStubPages !== false && isWooCommerceStubPage(slug, contentHtml)) {
|
|
751
858
|
continue;
|
|
752
859
|
}
|
|
753
860
|
const inlineAssets = collectInlineAssets(
|
|
754
861
|
contentHtml,
|
|
755
862
|
attachmentIndex,
|
|
756
863
|
seenAssetUrls,
|
|
864
|
+
emittedAttachmentIds,
|
|
757
865
|
options.exportedAt,
|
|
758
866
|
options.originUrlRewrite
|
|
759
867
|
);
|
|
@@ -788,7 +896,7 @@ async function* enumerateWxrEntities(options) {
|
|
|
788
896
|
if (domain === "category") categorySlugs.push(nicename);
|
|
789
897
|
if (domain === "post_tag") tagSlugs.push(nicename);
|
|
790
898
|
}
|
|
791
|
-
if (
|
|
899
|
+
if (isPost) {
|
|
792
900
|
const thumbnailId = getPostMeta(item, "_thumbnail_id");
|
|
793
901
|
const featuredAssetSourceId = resolveFeaturedAssetSourceId(
|
|
794
902
|
thumbnailId,
|
|
@@ -813,11 +921,12 @@ async function* enumerateWxrEntities(options) {
|
|
|
813
921
|
};
|
|
814
922
|
yield post;
|
|
815
923
|
} else {
|
|
816
|
-
const isHomePage = getPostMeta(item, "_wp_show_on_front") === "1" || getPostMeta(item, "page_on_front") === "1";
|
|
924
|
+
const isHomePage = !isPortfolioCpt && (getPostMeta(item, "_wp_show_on_front") === "1" || getPostMeta(item, "page_on_front") === "1");
|
|
925
|
+
const pageSourceId = isPortfolioCpt ? portfolioCptSourceId(id) : id;
|
|
817
926
|
const page = {
|
|
818
927
|
type: "page",
|
|
819
|
-
source: sourceMeta(
|
|
820
|
-
sourceId:
|
|
928
|
+
source: sourceMeta(pageSourceId, link, options.exportedAt, isPortfolioCpt ? postType : void 0),
|
|
929
|
+
sourceId: pageSourceId,
|
|
821
930
|
title: textValue(item.title) || slug,
|
|
822
931
|
slug,
|
|
823
932
|
contentHtml,
|
|
@@ -828,7 +937,7 @@ async function* enumerateWxrEntities(options) {
|
|
|
828
937
|
}
|
|
829
938
|
}
|
|
830
939
|
}
|
|
831
|
-
async function validateWxrFile(filePath) {
|
|
940
|
+
async function validateWxrFile(filePath, options = { filePath }) {
|
|
832
941
|
const issues = [];
|
|
833
942
|
let xml;
|
|
834
943
|
try {
|
|
@@ -837,7 +946,12 @@ async function validateWxrFile(filePath) {
|
|
|
837
946
|
return {
|
|
838
947
|
ok: false,
|
|
839
948
|
issues: [{ code: "file_not_found", message: `Cannot read file: ${filePath}` }],
|
|
840
|
-
summary: {}
|
|
949
|
+
summary: {},
|
|
950
|
+
importSummary: {
|
|
951
|
+
importableItemCount: 0,
|
|
952
|
+
unsupportedOnly: false,
|
|
953
|
+
skippedPostTypes: {}
|
|
954
|
+
}
|
|
841
955
|
};
|
|
842
956
|
}
|
|
843
957
|
const looksLikeWxr = xml.includes("<rss") && (xml.includes("wp:wxr_version") || xml.includes("xmlns:wp=") || xml.includes("WordPress eXtended RSS"));
|
|
@@ -845,18 +959,20 @@ async function validateWxrFile(filePath) {
|
|
|
845
959
|
issues.push({ code: "invalid_wxr", message: "File does not appear to be WordPress WXR" });
|
|
846
960
|
}
|
|
847
961
|
const items = parseItems(xml);
|
|
962
|
+
const importSummary = summarizeWxrImport(items, { ...options, filePath });
|
|
848
963
|
const summary = {
|
|
849
964
|
posts: items.filter((i) => textValue(i.post_type) === "post").length,
|
|
850
965
|
pages: items.filter((i) => textValue(i.post_type) === "page").length,
|
|
851
966
|
assets: items.filter((i) => textValue(i.post_type) === "attachment").length,
|
|
852
|
-
|
|
967
|
+
portfolioCpt: countWxrPortfolioCptItems(items),
|
|
853
968
|
categories: 0,
|
|
854
|
-
tags: 0
|
|
969
|
+
tags: 0,
|
|
970
|
+
importableItemCount: importSummary.importableItemCount
|
|
855
971
|
};
|
|
856
972
|
const { categories, tags } = collectTaxonomies(items);
|
|
857
973
|
summary.categories = categories.size;
|
|
858
974
|
summary.tags = tags.size;
|
|
859
|
-
return { ok: issues.length === 0, issues, summary };
|
|
975
|
+
return { ok: issues.length === 0, issues, summary, importSummary };
|
|
860
976
|
}
|
|
861
977
|
|
|
862
978
|
// src/parsers/wordpress/index.ts
|
|
@@ -870,24 +986,33 @@ function resolveWxrOptions(input) {
|
|
|
870
986
|
filePath: String(obj.path),
|
|
871
987
|
originUrlRewrite: obj.originUrlRewrite,
|
|
872
988
|
flattenBuilders: obj.flattenBuilders,
|
|
873
|
-
skipWooCommerceStubPages: obj.skipWooCommerceStubPages
|
|
989
|
+
skipWooCommerceStubPages: obj.skipWooCommerceStubPages,
|
|
990
|
+
portfolioCptSlugs: obj.portfolioCptSlugs
|
|
874
991
|
};
|
|
875
992
|
}
|
|
876
993
|
throw new Error(
|
|
877
|
-
"WordPress adapter requires input path (string or { path, originUrlRewrite?, flattenBuilders?, skipWooCommerceStubPages? })"
|
|
994
|
+
"WordPress adapter requires input path (string or { path, originUrlRewrite?, flattenBuilders?, skipWooCommerceStubPages?, portfolioCptSlugs? })"
|
|
878
995
|
);
|
|
879
996
|
}
|
|
880
997
|
var wordpressAdapter = {
|
|
881
998
|
platform: "wordpress",
|
|
882
999
|
async validateInput(input) {
|
|
883
|
-
const
|
|
884
|
-
const result = await validateWxrFile(filePath);
|
|
1000
|
+
const options = resolveWxrOptions(input);
|
|
1001
|
+
const result = await validateWxrFile(options.filePath, options);
|
|
885
1002
|
return {
|
|
886
1003
|
ok: result.ok,
|
|
887
1004
|
issues: result.issues,
|
|
888
|
-
summary:
|
|
1005
|
+
summary: {
|
|
1006
|
+
...result.summary,
|
|
1007
|
+
unsupportedOnly: result.importSummary.unsupportedOnly,
|
|
1008
|
+
skippedPostTypes: result.importSummary.skippedPostTypes
|
|
1009
|
+
}
|
|
889
1010
|
};
|
|
890
1011
|
},
|
|
1012
|
+
async getImportSummary(input) {
|
|
1013
|
+
const options = resolveWxrOptions(input);
|
|
1014
|
+
return summarizeWxrImportFromFile(options.filePath, options);
|
|
1015
|
+
},
|
|
891
1016
|
enumerateEntities(ctx) {
|
|
892
1017
|
return enumerateWxrEntities(resolveWxrOptions(ctx.input));
|
|
893
1018
|
}
|
|
@@ -2865,4 +2990,4 @@ export {
|
|
|
2865
2990
|
wixAdapter,
|
|
2866
2991
|
getAdapter
|
|
2867
2992
|
};
|
|
2868
|
-
//# sourceMappingURL=chunk-
|
|
2993
|
+
//# sourceMappingURL=chunk-YLVPZ4M3.js.map
|