@artinstack/migrator 0.1.0 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -1,8 +1,8 @@
1
1
  # @artinstack/migrator
2
2
 
3
- Stateless content normalizer and migration framework for transforming **WordPress**, **SmugMug**, **Squarespace**, and similar sources into a platform-agnostic schema.
3
+ Stateless content normalizer and migration framework for transforming **WordPress**, **SmugMug**, **Squarespace**, **Wix**, and similar sources into a platform-agnostic schema.
4
4
 
5
- **Public from day one.** Portable parsers and JSON export are useful without any specific host. Job orchestration, credentials, and UI are implemented separately via `MigrationSink`.
5
+ Portable parsers and JSON export are useful without any specific host. Job orchestration, credentials, and UI are implemented separately via `MigrationSink`.
6
6
 
7
7
  See [docs/architecture.md](./docs/architecture.md) for the high-level blueprint: data flow, DTOs, sink contract, and source mappings.
8
8
 
@@ -10,7 +10,7 @@ See [docs/architecture.md](./docs/architecture.md) for the high-level blueprint:
10
10
 
11
11
  ```
12
12
  src/
13
- parsers/ WordPress, SmugMug, Squarespace → normalizer DTOs
13
+ parsers/ WordPress, SmugMug, Squarespace, Wix → normalizer DTOs
14
14
  normalizer/ Canonical DTOs + portable idempotency types
15
15
  sinks/ filesystem export, MigrationSink interface
16
16
  cli/ artinstack-migrate
@@ -19,18 +19,21 @@ src/
19
19
 
20
20
  ## Install
21
21
 
22
- **From npm** (after publish):
22
+ **From npm:**
23
23
 
24
24
  ```bash
25
25
  pnpm add @artinstack/migrator
26
26
  # or: npm install @artinstack/migrator
27
27
  ```
28
28
 
29
+ The `artinstack-migrate` binary is on your PATH after install (or use `npx artinstack-migrate`).
30
+
29
31
  **From source** (development):
30
32
 
31
33
  ```bash
32
34
  pnpm install
33
35
  pnpm build
36
+ pnpm link --global # optional: artinstack-migrate on PATH
34
37
  ```
35
38
 
36
39
  Requires **Node.js 20+**.
@@ -38,22 +41,76 @@ Requires **Node.js 20+**.
38
41
  ## CLI
39
42
 
40
43
  ```bash
41
- pnpm build
44
+ artinstack-migrate <platform> <export-file> [options]
45
+ artinstack-migrate validate <platform> <export-file>
46
+ ```
47
+
48
+ **Platforms:** `wordpress`, `smugmug`, `squarespace`, `wix`
49
+
50
+ **Options:**
51
+
52
+ | Flag | Description |
53
+ |------|-------------|
54
+ | `--out <dir>` | Write normalized JSON files to a directory |
55
+ | `--format json` | Print combined JSON to stdout (no files written) |
56
+ | `--dry-run` | Parse and analyze only; no export files |
57
+ | `--report <dir>` | With `--dry-run`, write `conflicts.json` and `migration-report.json` |
58
+ | `--offline` | Skip network HEAD requests for asset size estimates |
59
+ | `--sink filesystem` | Run through `MigrationSink` before writing (requires `--out`) |
60
+ | `--urls <file>` | Wix only: URL list or `sitemap.xml` for static page snapshots |
42
61
 
43
- # Installed / linked binary
62
+ **Examples:**
63
+
64
+ ```bash
65
+ # Export normalized JSON
44
66
  artinstack-migrate wordpress export.xml --out ./output
45
- artinstack-migrate validate wordpress ./export.xml
46
67
 
47
- # Local clone no global install
68
+ # Preview conflicts without writing content
69
+ artinstack-migrate wordpress export.xml --dry-run --report ./preview/
70
+
71
+ # Validate export structure (JSON result on stdout, exit 0/1)
72
+ artinstack-migrate validate wordpress export.xml
73
+
74
+ # Wix: blog feed + static pages from a URL list
75
+ artinstack-migrate wix feed.xml --urls page-urls.txt --out ./output
76
+
77
+ # Local clone
48
78
  pnpm cli wordpress export.xml --dry-run
49
- node dist/cli/index.js wordpress export.xml --dry-run
50
79
  ```
51
80
 
81
+ ### Output
82
+
83
+ **`--out ./output`** writes grouped JSON:
84
+
85
+ ```
86
+ output/
87
+ posts.json
88
+ pages.json
89
+ media.json
90
+ portfolios.json
91
+ portfolio-media.json
92
+ categories.json
93
+ tags.json
94
+ conflicts.json # when generated
95
+ migration-report.json # when generated
96
+ ```
97
+
98
+ Each file contains an array of normalized DTOs (`NormalizedPost`, `NormalizedPage`, `NormalizedAsset`, etc.). See [docs/architecture.md](./docs/architecture.md) for schema and per-platform input formats.
99
+
100
+ **`--format json`** prints the same entities as one combined JSON object to stdout.
101
+
102
+ **`validate`** prints a validation result JSON object (`ok`, `issues`, `summary` counts) and exits `0` on success, `1` on failure.
103
+
104
+ **`--dry-run`** exits `0` (clean), `2` (warnings), or `1` (blocking conflicts).
105
+
106
+ Per-platform export file formats and API client usage are documented in [docs/architecture.md](./docs/architecture.md).
107
+
52
108
  ## Development
53
109
 
54
110
  ```bash
55
111
  pnpm typecheck
56
112
  pnpm test
113
+ pnpm test:validate-fixtures # golden fixtures (wordpress, smugmug, squarespace, grapes, wix)
57
114
  pnpm dev # watch build
58
115
  ```
59
116
 
@@ -1,4 +1,4 @@
1
- type MigrationPlatform = "wordpress" | "smugmug" | "squarespace";
1
+ type MigrationPlatform = "wordpress" | "smugmug" | "squarespace" | "wix";
2
2
  type EntityType = "post" | "page" | "asset" | "portfolio" | "category" | "tag";
3
3
  type PublishStatus = "draft" | "published" | "archived";
4
4
  interface SourceMetadata {
@@ -150,4 +150,4 @@ interface BundleCounts {
150
150
  }
151
151
  declare function bundleCounts(bundle: EntityBundle): BundleCounts;
152
152
 
153
- export { type AdapterContext as A, type BundleCounts as B, type EntityBundle as E, type MigrationAdapter as M, type NormalizedAsset as N, type PortfolioMediaLink as P, type SourceMetadata as S, type ValidationIssue as V, type MigrationPlatform as a, type EntityKey as b, type EntityType as c, type MigrationCursor as d, type NormalizedAssetExif as e, type NormalizedCategory as f, type NormalizedEntity as g, type NormalizedPage as h, type NormalizedPortfolio as i, type NormalizedPost as j, type NormalizedTag as k, type PublishStatus as l, type ValidationResult as m, bundleCounts as n, collectEntities as o, emptyBundle as p, entityKey as q };
153
+ export { type AdapterContext as A, type BundleCounts as B, type EntityBundle as E, type MigrationAdapter as M, type NormalizedAsset as N, type PortfolioMediaLink as P, type SourceMetadata as S, type ValidationResult as V, type MigrationPlatform as a, type EntityKey as b, type EntityType as c, type MigrationCursor as d, type NormalizedAssetExif as e, type NormalizedCategory as f, type NormalizedEntity as g, type NormalizedPage as h, type NormalizedPortfolio as i, type NormalizedPost as j, type NormalizedTag as k, type PublishStatus as l, type ValidationIssue as m, bundleCounts as n, collectEntities as o, emptyBundle as p, entityKey as q };
@@ -0,0 +1,67 @@
1
+ // src/lib/content-asset-urls.ts
2
+ import * as cheerio from "cheerio";
3
+ var ASSET_URL_PARAM_PATTERN = /\b(?:src|image|url)\s*=\s*["']([^"']+)["']/gi;
4
+ var IMAGE_EXTENSION_PATTERN = /\.(?:jpe?g|png|gif|webp|avif|svg)(?:[?#]|$)/i;
5
+ var WP_UPLOADS_PATTERN = /\/wp-content\/uploads\//i;
6
+ function extractImgTagSrcs(content) {
7
+ if (!content.trim()) return [];
8
+ const $ = cheerio.load(content, { xml: false });
9
+ const srcs = [];
10
+ $("img[src]").each((_, el) => {
11
+ const src = $(el).attr("src")?.trim();
12
+ if (src) srcs.push(src);
13
+ });
14
+ return srcs;
15
+ }
16
+ function discoverRawImgSrcs(content) {
17
+ return extractImgTagSrcs(content).filter((src) => !src.startsWith("data:"));
18
+ }
19
+ function normalizeAssetUrl(raw) {
20
+ const trimmed = raw.trim();
21
+ if (!trimmed || trimmed.startsWith("data:")) return void 0;
22
+ if (trimmed.startsWith("//")) return `https:${trimmed}`;
23
+ return trimmed;
24
+ }
25
+ function isLikelyImageUrl(url) {
26
+ if (!url || url.startsWith("data:")) return false;
27
+ if (url.startsWith("/")) {
28
+ return WP_UPLOADS_PATTERN.test(url) || IMAGE_EXTENSION_PATTERN.test(url);
29
+ }
30
+ if (!/^https?:\/\//i.test(url)) return false;
31
+ if (WP_UPLOADS_PATTERN.test(url)) return true;
32
+ try {
33
+ const pathname = new URL(url).pathname;
34
+ return IMAGE_EXTENSION_PATTERN.test(pathname);
35
+ } catch {
36
+ return IMAGE_EXTENSION_PATTERN.test(url);
37
+ }
38
+ }
39
+ function discoverContentAssetUrls(content) {
40
+ if (!content.trim()) return [];
41
+ const urls = /* @__PURE__ */ new Set();
42
+ for (const raw of extractImgTagSrcs(content)) {
43
+ const normalized = normalizeAssetUrl(raw);
44
+ if (normalized && isLikelyImageUrl(normalized)) {
45
+ urls.add(normalized);
46
+ }
47
+ }
48
+ for (const match of content.matchAll(ASSET_URL_PARAM_PATTERN)) {
49
+ const normalized = normalizeAssetUrl(match[1] ?? "");
50
+ if (normalized && isLikelyImageUrl(normalized)) {
51
+ urls.add(normalized);
52
+ }
53
+ }
54
+ return [...urls];
55
+ }
56
+ function extractInlineImageSrcs(content) {
57
+ return discoverContentAssetUrls(content);
58
+ }
59
+
60
+ export {
61
+ discoverRawImgSrcs,
62
+ normalizeAssetUrl,
63
+ isLikelyImageUrl,
64
+ discoverContentAssetUrls,
65
+ extractInlineImageSrcs
66
+ };
67
+ //# sourceMappingURL=chunk-2PNSVE5Y.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"sources":["../src/lib/content-asset-urls.ts"],"sourcesContent":["import * as cheerio from \"cheerio\";\n\n/** Builder-agnostic attribute names that commonly hold image URLs in post_content. */\nconst ASSET_URL_PARAM_PATTERN =\n /\\b(?:src|image|url)\\s*=\\s*[\"']([^\"']+)[\"']/gi;\n\nconst IMAGE_EXTENSION_PATTERN = /\\.(?:jpe?g|png|gif|webp|avif|svg)(?:[?#]|$)/i;\nconst WP_UPLOADS_PATTERN = /\\/wp-content\\/uploads\\//i;\n\nfunction extractImgTagSrcs(content: string): string[] {\n if (!content.trim()) return [];\n const $ = cheerio.load(content, { xml: false });\n const srcs: string[] = [];\n $(\"img[src]\").each((_, el) => {\n const src = $(el).attr(\"src\")?.trim();\n if (src) srcs.push(src);\n });\n return srcs;\n}\n\n/** All `<img src>` values (including those not ingested as vault assets). */\nexport function discoverRawImgSrcs(content: string): string[] {\n return extractImgTagSrcs(content).filter((src) => !src.startsWith(\"data:\"));\n}\n\n/** Normalize protocol-relative and trim; skip data URIs. */\nexport function normalizeAssetUrl(raw: string): string | undefined {\n const trimmed = raw.trim();\n if (!trimmed || trimmed.startsWith(\"data:\")) return undefined;\n if (trimmed.startsWith(\"//\")) return `https:${trimmed}`;\n return trimmed;\n}\n\n/** Heuristic: URL likely points at a raster/vector image asset, not a page link. */\nexport function isLikelyImageUrl(url: string): boolean {\n if (!url || url.startsWith(\"data:\")) return false;\n\n if (url.startsWith(\"/\")) {\n return WP_UPLOADS_PATTERN.test(url) || IMAGE_EXTENSION_PATTERN.test(url);\n }\n\n if (!/^https?:\\/\\//i.test(url)) return false;\n\n if (WP_UPLOADS_PATTERN.test(url)) return true;\n\n try {\n const pathname = new URL(url).pathname;\n return IMAGE_EXTENSION_PATTERN.test(pathname);\n } catch {\n return IMAGE_EXTENSION_PATTERN.test(url);\n }\n}\n\n/**\n * Generic content-discovery pass: collect image URLs from HTML `<img>` tags and\n * common shortcode/builder attributes (`src=`, `image=`, `url=`) without parsing\n * builder-specific structure (Tatsu, Elementor, etc.).\n */\nexport function discoverContentAssetUrls(content: string): string[] {\n if (!content.trim()) return [];\n\n const urls = new Set<string>();\n\n for (const raw of extractImgTagSrcs(content)) {\n const normalized = normalizeAssetUrl(raw);\n if (normalized && isLikelyImageUrl(normalized)) {\n urls.add(normalized);\n }\n }\n\n for (const match of content.matchAll(ASSET_URL_PARAM_PATTERN)) {\n const normalized = normalizeAssetUrl(match[1] ?? \"\");\n if (normalized && isLikelyImageUrl(normalized)) {\n urls.add(normalized);\n }\n }\n\n return [...urls];\n}\n\n/** @deprecated Use discoverContentAssetUrls — kept for call-site clarity during transition. */\nexport function extractInlineImageSrcs(content: string): string[] {\n return discoverContentAssetUrls(content);\n}\n"],"mappings":";AAAA,YAAY,aAAa;AAGzB,IAAM,0BACJ;AAEF,IAAM,0BAA0B;AAChC,IAAM,qBAAqB;AAE3B,SAAS,kBAAkB,SAA2B;AACpD,MAAI,CAAC,QAAQ,KAAK,EAAG,QAAO,CAAC;AAC7B,QAAM,IAAY,aAAK,SAAS,EAAE,KAAK,MAAM,CAAC;AAC9C,QAAM,OAAiB,CAAC;AACxB,IAAE,UAAU,EAAE,KAAK,CAAC,GAAG,OAAO;AAC5B,UAAM,MAAM,EAAE,EAAE,EAAE,KAAK,KAAK,GAAG,KAAK;AACpC,QAAI,IAAK,MAAK,KAAK,GAAG;AAAA,EACxB,CAAC;AACD,SAAO;AACT;AAGO,SAAS,mBAAmB,SAA2B;AAC5D,SAAO,kBAAkB,OAAO,EAAE,OAAO,CAAC,QAAQ,CAAC,IAAI,WAAW,OAAO,CAAC;AAC5E;AAGO,SAAS,kBAAkB,KAAiC;AACjE,QAAM,UAAU,IAAI,KAAK;AACzB,MAAI,CAAC,WAAW,QAAQ,WAAW,OAAO,EAAG,QAAO;AACpD,MAAI,QAAQ,WAAW,IAAI,EAAG,QAAO,SAAS,OAAO;AACrD,SAAO;AACT;AAGO,SAAS,iBAAiB,KAAsB;AACrD,MAAI,CAAC,OAAO,IAAI,WAAW,OAAO,EAAG,QAAO;AAE5C,MAAI,IAAI,WAAW,GAAG,GAAG;AACvB,WAAO,mBAAmB,KAAK,GAAG,KAAK,wBAAwB,KAAK,GAAG;AAAA,EACzE;AAEA,MAAI,CAAC,gBAAgB,KAAK,GAAG,EAAG,QAAO;AAEvC,MAAI,mBAAmB,KAAK,GAAG,EAAG,QAAO;AAEzC,MAAI;AACF,UAAM,WAAW,IAAI,IAAI,GAAG,EAAE;AAC9B,WAAO,wBAAwB,KAAK,QAAQ;AAAA,EAC9C,QAAQ;AACN,WAAO,wBAAwB,KAAK,GAAG;AAAA,EACzC;AACF;AAOO,SAAS,yBAAyB,SAA2B;AAClE,MAAI,CAAC,QAAQ,KAAK,EAAG,QAAO,CAAC;AAE7B,QAAM,OAAO,oBAAI,IAAY;AAE7B,aAAW,OAAO,kBAAkB,OAAO,GAAG;AAC5C,UAAM,aAAa,kBAAkB,GAAG;AACxC,QAAI,cAAc,iBAAiB,UAAU,GAAG;AAC9C,WAAK,IAAI,UAAU;AAAA,IACrB;AAAA,EACF;AAEA,aAAW,SAAS,QAAQ,SAAS,uBAAuB,GAAG;AAC7D,UAAM,aAAa,kBAAkB,MAAM,CAAC,KAAK,EAAE;AACnD,QAAI,cAAc,iBAAiB,UAAU,GAAG;AAC9C,WAAK,IAAI,UAAU;AAAA,IACrB;AAAA,EACF;AAEA,SAAO,CAAC,GAAG,IAAI;AACjB;AAGO,SAAS,uBAAuB,SAA2B;AAChE,SAAO,yBAAyB,OAAO;AACzC;","names":[]}
@@ -0,0 +1,147 @@
1
+ // src/normalizer/validate.ts
2
+ import { z } from "zod";
3
+ var migrationPlatformSchema = z.enum(["wordpress", "smugmug", "squarespace", "wix"]);
4
+ var publishStatusSchema = z.enum(["draft", "published", "archived"]);
5
+ var sourceMetadataSchema = z.object({
6
+ platform: migrationPlatformSchema,
7
+ id: z.string().min(1),
8
+ url: z.string().optional(),
9
+ path: z.string().optional(),
10
+ exportedAt: z.string().optional()
11
+ });
12
+ var normalizedPostSchema = z.object({
13
+ type: z.literal("post"),
14
+ source: sourceMetadataSchema,
15
+ sourceId: z.string().min(1),
16
+ title: z.string().min(1),
17
+ slug: z.string().min(1),
18
+ excerpt: z.string().optional(),
19
+ contentHtml: z.string(),
20
+ publishedAt: z.string().optional(),
21
+ status: publishStatusSchema,
22
+ categorySlugs: z.array(z.string().min(1)).optional(),
23
+ tagSlugs: z.array(z.string().min(1)).optional(),
24
+ sourceFeaturedMediaId: z.string().optional(),
25
+ featuredAssetSourceId: z.string().optional(),
26
+ seoTitle: z.string().optional(),
27
+ seoDescription: z.string().optional()
28
+ });
29
+ var normalizedPageSchema = z.object({
30
+ type: z.literal("page"),
31
+ source: sourceMetadataSchema,
32
+ sourceId: z.string().min(1),
33
+ title: z.string().min(1),
34
+ slug: z.string().min(1),
35
+ contentHtml: z.string(),
36
+ contentCss: z.string().optional(),
37
+ isHomePage: z.boolean().optional(),
38
+ status: publishStatusSchema,
39
+ seoTitle: z.string().optional(),
40
+ seoDescription: z.string().optional()
41
+ });
42
+ var normalizedAssetExifSchema = z.object({
43
+ iso: z.number().optional(),
44
+ aperture: z.number().optional(),
45
+ shutter: z.string().optional(),
46
+ focalLength: z.number().optional()
47
+ });
48
+ var normalizedAssetSchema = z.object({
49
+ type: z.literal("asset"),
50
+ source: sourceMetadataSchema,
51
+ sourceId: z.string().min(1),
52
+ sourceUrl: z.string().min(1),
53
+ filename: z.string().min(1),
54
+ mimeType: z.string().optional(),
55
+ caption: z.string().optional(),
56
+ altText: z.string().optional(),
57
+ keywords: z.array(z.string()).optional(),
58
+ exif: normalizedAssetExifSchema.optional(),
59
+ portfolioSourceId: z.string().optional(),
60
+ sort: z.number().optional()
61
+ });
62
+ var normalizedPortfolioSchema = z.object({
63
+ type: z.literal("portfolio"),
64
+ source: sourceMetadataSchema,
65
+ sourceId: z.string().min(1),
66
+ title: z.string().min(1),
67
+ slug: z.string().min(1),
68
+ description: z.string().optional(),
69
+ parentSourceId: z.string().optional()
70
+ });
71
+ var normalizedCategorySchema = z.object({
72
+ type: z.literal("category"),
73
+ source: sourceMetadataSchema,
74
+ sourceId: z.string().min(1),
75
+ name: z.string().min(1),
76
+ slug: z.string().min(1)
77
+ });
78
+ var normalizedTagSchema = z.object({
79
+ type: z.literal("tag"),
80
+ source: sourceMetadataSchema,
81
+ sourceId: z.string().min(1),
82
+ name: z.string().min(1),
83
+ slug: z.string().min(1)
84
+ });
85
+ var normalizedEntitySchema = z.discriminatedUnion("type", [
86
+ normalizedPostSchema,
87
+ normalizedPageSchema,
88
+ normalizedAssetSchema,
89
+ normalizedPortfolioSchema,
90
+ normalizedCategorySchema,
91
+ normalizedTagSchema
92
+ ]);
93
+ function zodIssuesToValidationIssues(issues) {
94
+ return issues.map((issue) => ({
95
+ code: issue.code,
96
+ message: issue.message,
97
+ path: issue.path.length > 0 ? issue.path.join(".") : void 0
98
+ }));
99
+ }
100
+ function parseToValidationResult(schema, value) {
101
+ const result = schema.safeParse(value);
102
+ if (result.success) {
103
+ return { ok: true, issues: [] };
104
+ }
105
+ return { ok: false, issues: zodIssuesToValidationIssues(result.error.issues) };
106
+ }
107
+ function validateNormalizedPost(post) {
108
+ return parseToValidationResult(normalizedPostSchema, post);
109
+ }
110
+ function validateNormalizedPage(page) {
111
+ return parseToValidationResult(normalizedPageSchema, page);
112
+ }
113
+ function validateNormalizedAsset(asset) {
114
+ return parseToValidationResult(normalizedAssetSchema, asset);
115
+ }
116
+ function validateNormalizedPortfolio(portfolio) {
117
+ return parseToValidationResult(normalizedPortfolioSchema, portfolio);
118
+ }
119
+ function validateNormalizedCategory(category) {
120
+ return parseToValidationResult(normalizedCategorySchema, category);
121
+ }
122
+ function validateNormalizedTag(tag) {
123
+ return parseToValidationResult(normalizedTagSchema, tag);
124
+ }
125
+ function validateNormalizedEntity(entity) {
126
+ return parseToValidationResult(normalizedEntitySchema, entity);
127
+ }
128
+
129
+ export {
130
+ sourceMetadataSchema,
131
+ normalizedPostSchema,
132
+ normalizedPageSchema,
133
+ normalizedAssetExifSchema,
134
+ normalizedAssetSchema,
135
+ normalizedPortfolioSchema,
136
+ normalizedCategorySchema,
137
+ normalizedTagSchema,
138
+ normalizedEntitySchema,
139
+ validateNormalizedPost,
140
+ validateNormalizedPage,
141
+ validateNormalizedAsset,
142
+ validateNormalizedPortfolio,
143
+ validateNormalizedCategory,
144
+ validateNormalizedTag,
145
+ validateNormalizedEntity
146
+ };
147
+ //# sourceMappingURL=chunk-3YJFSTYR.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"sources":["../src/normalizer/validate.ts"],"sourcesContent":["import { z } from \"zod\";\n\nimport type { ValidationIssue, ValidationResult } from \"./types.js\";\n\nconst migrationPlatformSchema = z.enum([\"wordpress\", \"smugmug\", \"squarespace\", \"wix\"]);\nconst publishStatusSchema = z.enum([\"draft\", \"published\", \"archived\"]);\n\nexport const sourceMetadataSchema = z.object({\n platform: migrationPlatformSchema,\n id: z.string().min(1),\n url: z.string().optional(),\n path: z.string().optional(),\n exportedAt: z.string().optional(),\n});\n\nexport const normalizedPostSchema = z.object({\n type: z.literal(\"post\"),\n source: sourceMetadataSchema,\n sourceId: z.string().min(1),\n title: z.string().min(1),\n slug: z.string().min(1),\n excerpt: z.string().optional(),\n contentHtml: z.string(),\n publishedAt: z.string().optional(),\n status: publishStatusSchema,\n categorySlugs: z.array(z.string().min(1)).optional(),\n tagSlugs: z.array(z.string().min(1)).optional(),\n sourceFeaturedMediaId: z.string().optional(),\n featuredAssetSourceId: z.string().optional(),\n seoTitle: z.string().optional(),\n seoDescription: z.string().optional(),\n});\n\nexport const normalizedPageSchema = z.object({\n type: z.literal(\"page\"),\n source: sourceMetadataSchema,\n sourceId: z.string().min(1),\n title: z.string().min(1),\n slug: z.string().min(1),\n contentHtml: z.string(),\n contentCss: z.string().optional(),\n isHomePage: z.boolean().optional(),\n status: publishStatusSchema,\n seoTitle: z.string().optional(),\n seoDescription: z.string().optional(),\n});\n\nexport const normalizedAssetExifSchema = z.object({\n iso: z.number().optional(),\n aperture: z.number().optional(),\n shutter: z.string().optional(),\n focalLength: z.number().optional(),\n});\n\nexport const normalizedAssetSchema = z.object({\n type: z.literal(\"asset\"),\n source: sourceMetadataSchema,\n sourceId: z.string().min(1),\n sourceUrl: z.string().min(1),\n filename: z.string().min(1),\n mimeType: z.string().optional(),\n caption: z.string().optional(),\n altText: z.string().optional(),\n keywords: z.array(z.string()).optional(),\n exif: normalizedAssetExifSchema.optional(),\n portfolioSourceId: z.string().optional(),\n sort: z.number().optional(),\n});\n\nexport const normalizedPortfolioSchema = z.object({\n type: z.literal(\"portfolio\"),\n source: sourceMetadataSchema,\n sourceId: z.string().min(1),\n title: z.string().min(1),\n slug: z.string().min(1),\n description: z.string().optional(),\n parentSourceId: z.string().optional(),\n});\n\nexport const normalizedCategorySchema = z.object({\n type: z.literal(\"category\"),\n source: sourceMetadataSchema,\n sourceId: z.string().min(1),\n name: z.string().min(1),\n slug: z.string().min(1),\n});\n\nexport const normalizedTagSchema = z.object({\n type: z.literal(\"tag\"),\n source: sourceMetadataSchema,\n sourceId: z.string().min(1),\n name: z.string().min(1),\n slug: z.string().min(1),\n});\n\nexport const normalizedEntitySchema = z.discriminatedUnion(\"type\", [\n normalizedPostSchema,\n normalizedPageSchema,\n normalizedAssetSchema,\n normalizedPortfolioSchema,\n normalizedCategorySchema,\n normalizedTagSchema,\n]);\n\nfunction zodIssuesToValidationIssues(issues: z.ZodIssue[]): ValidationIssue[] {\n return issues.map((issue) => ({\n code: issue.code,\n message: issue.message,\n path: issue.path.length > 0 ? issue.path.join(\".\") : undefined,\n }));\n}\n\nfunction parseToValidationResult(schema: z.ZodTypeAny, value: unknown): ValidationResult {\n const result = schema.safeParse(value);\n if (result.success) {\n return { ok: true, issues: [] };\n }\n return { ok: false, issues: zodIssuesToValidationIssues(result.error.issues) };\n}\n\n/** Opt-in structural check for a normalized post DTO (no cross-entity FK validation). */\nexport function validateNormalizedPost(post: unknown): ValidationResult {\n return parseToValidationResult(normalizedPostSchema, post);\n}\n\n/** Opt-in structural check for a normalized page DTO (no cross-entity FK validation). */\nexport function validateNormalizedPage(page: unknown): ValidationResult {\n return parseToValidationResult(normalizedPageSchema, page);\n}\n\n/** Opt-in structural check for a normalized asset DTO. */\nexport function validateNormalizedAsset(asset: unknown): ValidationResult {\n return parseToValidationResult(normalizedAssetSchema, asset);\n}\n\n/** Opt-in structural check for a normalized portfolio DTO. */\nexport function validateNormalizedPortfolio(portfolio: unknown): ValidationResult {\n return parseToValidationResult(normalizedPortfolioSchema, portfolio);\n}\n\n/** Opt-in structural check for a normalized category DTO. */\nexport function validateNormalizedCategory(category: unknown): ValidationResult {\n return parseToValidationResult(normalizedCategorySchema, category);\n}\n\n/** Opt-in structural check for a normalized tag DTO. */\nexport function validateNormalizedTag(tag: unknown): ValidationResult {\n return parseToValidationResult(normalizedTagSchema, tag);\n}\n\n/** Opt-in structural check for any normalized entity discriminated by `type`. */\nexport function validateNormalizedEntity(entity: unknown): ValidationResult {\n return parseToValidationResult(normalizedEntitySchema, entity);\n}\n"],"mappings":";AAAA,SAAS,SAAS;AAIlB,IAAM,0BAA0B,EAAE,KAAK,CAAC,aAAa,WAAW,eAAe,KAAK,CAAC;AACrF,IAAM,sBAAsB,EAAE,KAAK,CAAC,SAAS,aAAa,UAAU,CAAC;AAE9D,IAAM,uBAAuB,EAAE,OAAO;AAAA,EAC3C,UAAU;AAAA,EACV,IAAI,EAAE,OAAO,EAAE,IAAI,CAAC;AAAA,EACpB,KAAK,EAAE,OAAO,EAAE,SAAS;AAAA,EACzB,MAAM,EAAE,OAAO,EAAE,SAAS;AAAA,EAC1B,YAAY,EAAE,OAAO,EAAE,SAAS;AAClC,CAAC;AAEM,IAAM,uBAAuB,EAAE,OAAO;AAAA,EAC3C,MAAM,EAAE,QAAQ,MAAM;AAAA,EACtB,QAAQ;AAAA,EACR,UAAU,EAAE,OAAO,EAAE,IAAI,CAAC;AAAA,EAC1B,OAAO,EAAE,OAAO,EAAE,IAAI,CAAC;AAAA,EACvB,MAAM,EAAE,OAAO,EAAE,IAAI,CAAC;AAAA,EACtB,SAAS,EAAE,OAAO,EAAE,SAAS;AAAA,EAC7B,aAAa,EAAE,OAAO;AAAA,EACtB,aAAa,EAAE,OAAO,EAAE,SAAS;AAAA,EACjC,QAAQ;AAAA,EACR,eAAe,EAAE,MAAM,EAAE,OAAO,EAAE,IAAI,CAAC,CAAC,EAAE,SAAS;AAAA,EACnD,UAAU,EAAE,MAAM,EAAE,OAAO,EAAE,IAAI,CAAC,CAAC,EAAE,SAAS;AAAA,EAC9C,uBAAuB,EAAE,OAAO,EAAE,SAAS;AAAA,EAC3C,uBAAuB,EAAE,OAAO,EAAE,SAAS;AAAA,EAC3C,UAAU,EAAE,OAAO,EAAE,SAAS;AAAA,EAC9B,gBAAgB,EAAE,OAAO,EAAE,SAAS;AACtC,CAAC;AAEM,IAAM,uBAAuB,EAAE,OAAO;AAAA,EAC3C,MAAM,EAAE,QAAQ,MAAM;AAAA,EACtB,QAAQ;AAAA,EACR,UAAU,EAAE,OAAO,EAAE,IAAI,CAAC;AAAA,EAC1B,OAAO,EAAE,OAAO,EAAE,IAAI,CAAC;AAAA,EACvB,MAAM,EAAE,OAAO,EAAE,IAAI,CAAC;AAAA,EACtB,aAAa,EAAE,OAAO;AAAA,EACtB,YAAY,EAAE,OAAO,EAAE,SAAS;AAAA,EAChC,YAAY,EAAE,QAAQ,EAAE,SAAS;AAAA,EACjC,QAAQ;AAAA,EACR,UAAU,EAAE,OAAO,EAAE,SAAS;AAAA,EAC9B,gBAAgB,EAAE,OAAO,EAAE,SAAS;AACtC,CAAC;AAEM,IAAM,4BAA4B,EAAE,OAAO;AAAA,EAChD,KAAK,EAAE,OAAO,EAAE,SAAS;AAAA,EACzB,UAAU,EAAE,OAAO,EAAE,SAAS;AAAA,EAC9B,SAAS,EAAE,OAAO,EAAE,SAAS;AAAA,EAC7B,aAAa,EAAE,OAAO,EAAE,SAAS;AACnC,CAAC;AAEM,IAAM,wBAAwB,EAAE,OAAO;AAAA,EAC5C,MAAM,EAAE,QAAQ,OAAO;AAAA,EACvB,QAAQ;AAAA,EACR,UAAU,EAAE,OAAO,EAAE,IAAI,CAAC;AAAA,EAC1B,WAAW,EAAE,OAAO,EAAE,IAAI,CAAC;AAAA,EAC3B,UAAU,EAAE,OAAO,EAAE,IAAI,CAAC;AAAA,EAC1B,UAAU,EAAE,OAAO,EAAE,SAAS;AAAA,EAC9B,SAAS,EAAE,OAAO,EAAE,SAAS;AAAA,EAC7B,SAAS,EAAE,OAAO,EAAE,SAAS;AAAA,EAC7B,UAAU,EAAE,MAAM,EAAE,OAAO,CAAC,EAAE,SAAS;AAAA,EACvC,MAAM,0BAA0B,SAAS;AAAA,EACzC,mBAAmB,EAAE,OAAO,EAAE,SAAS;AAAA,EACvC,MAAM,EAAE,OAAO,EAAE,SAAS;AAC5B,CAAC;AAEM,IAAM,4BAA4B,EAAE,OAAO;AAAA,EAChD,MAAM,EAAE,QAAQ,WAAW;AAAA,EAC3B,QAAQ;AAAA,EACR,UAAU,EAAE,OAAO,EAAE,IAAI,CAAC;AAAA,EAC1B,OAAO,EAAE,OAAO,EAAE,IAAI,CAAC;AAAA,EACvB,MAAM,EAAE,OAAO,EAAE,IAAI,CAAC;AAAA,EACtB,aAAa,EAAE,OAAO,EAAE,SAAS;AAAA,EACjC,gBAAgB,EAAE,OAAO,EAAE,SAAS;AACtC,CAAC;AAEM,IAAM,2BAA2B,EAAE,OAAO;AAAA,EAC/C,MAAM,EAAE,QAAQ,UAAU;AAAA,EAC1B,QAAQ;AAAA,EACR,UAAU,EAAE,OAAO,EAAE,IAAI,CAAC;AAAA,EAC1B,MAAM,EAAE,OAAO,EAAE,IAAI,CAAC;AAAA,EACtB,MAAM,EAAE,OAAO,EAAE,IAAI,CAAC;AACxB,CAAC;AAEM,IAAM,sBAAsB,EAAE,OAAO;AAAA,EAC1C,MAAM,EAAE,QAAQ,KAAK;AAAA,EACrB,QAAQ;AAAA,EACR,UAAU,EAAE,OAAO,EAAE,IAAI,CAAC;AAAA,EAC1B,MAAM,EAAE,OAAO,EAAE,IAAI,CAAC;AAAA,EACtB,MAAM,EAAE,OAAO,EAAE,IAAI,CAAC;AACxB,CAAC;AAEM,IAAM,yBAAyB,EAAE,mBAAmB,QAAQ;AAAA,EACjE;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AACF,CAAC;AAED,SAAS,4BAA4B,QAAyC;AAC5E,SAAO,OAAO,IAAI,CAAC,WAAW;AAAA,IAC5B,MAAM,MAAM;AAAA,IACZ,SAAS,MAAM;AAAA,IACf,MAAM,MAAM,KAAK,SAAS,IAAI,MAAM,KAAK,KAAK,GAAG,IAAI;AAAA,EACvD,EAAE;AACJ;AAEA,SAAS,wBAAwB,QAAsB,OAAkC;AACvF,QAAM,SAAS,OAAO,UAAU,KAAK;AACrC,MAAI,OAAO,SAAS;AAClB,WAAO,EAAE,IAAI,MAAM,QAAQ,CAAC,EAAE;AAAA,EAChC;AACA,SAAO,EAAE,IAAI,OAAO,QAAQ,4BAA4B,OAAO,MAAM,MAAM,EAAE;AAC/E;AAGO,SAAS,uBAAuB,MAAiC;AACtE,SAAO,wBAAwB,sBAAsB,IAAI;AAC3D;AAGO,SAAS,uBAAuB,MAAiC;AACtE,SAAO,wBAAwB,sBAAsB,IAAI;AAC3D;AAGO,SAAS,wBAAwB,OAAkC;AACxE,SAAO,wBAAwB,uBAAuB,KAAK;AAC7D;AAGO,SAAS,4BAA4B,WAAsC;AAChF,SAAO,wBAAwB,2BAA2B,SAAS;AACrE;AAGO,SAAS,2BAA2B,UAAqC;AAC9E,SAAO,wBAAwB,0BAA0B,QAAQ;AACnE;AAGO,SAAS,sBAAsB,KAAgC;AACpE,SAAO,wBAAwB,qBAAqB,GAAG;AACzD;AAGO,SAAS,yBAAyB,QAAmC;AAC1E,SAAO,wBAAwB,wBAAwB,MAAM;AAC/D;","names":[]}
@@ -5,7 +5,12 @@ import {
5
5
  emptyBundle,
6
6
  entityKey,
7
7
  shouldProcessEntity
8
- } from "./chunk-JKDRTL24.js";
8
+ } from "./chunk-HI7JHWZU.js";
9
+ import {
10
+ discoverContentAssetUrls,
11
+ discoverRawImgSrcs,
12
+ normalizeAssetUrl
13
+ } from "./chunk-2PNSVE5Y.js";
9
14
 
10
15
  // src/sinks/types.ts
11
16
  var MIGRATION_WRITE_STAGES = [
@@ -20,64 +25,8 @@ var MIGRATION_WRITE_STAGES = [
20
25
  // src/sinks/run-migration.ts
21
26
  import { Readable } from "stream";
22
27
 
23
- // src/lib/content-asset-urls.ts
24
- import * as cheerio from "cheerio";
25
- var ASSET_URL_PARAM_PATTERN = /\b(?:src|image|url)\s*=\s*["']([^"']+)["']/gi;
26
- var IMAGE_EXTENSION_PATTERN = /\.(?:jpe?g|png|gif|webp|avif|svg)(?:[?#]|$)/i;
27
- var WP_UPLOADS_PATTERN = /\/wp-content\/uploads\//i;
28
- function extractImgTagSrcs(content) {
29
- if (!content.trim()) return [];
30
- const $ = cheerio.load(content, { xml: false });
31
- const srcs = [];
32
- $("img[src]").each((_, el) => {
33
- const src = $(el).attr("src")?.trim();
34
- if (src) srcs.push(src);
35
- });
36
- return srcs;
37
- }
38
- function discoverRawImgSrcs(content) {
39
- return extractImgTagSrcs(content).filter((src) => !src.startsWith("data:"));
40
- }
41
- function normalizeAssetUrl(raw) {
42
- const trimmed = raw.trim();
43
- if (!trimmed || trimmed.startsWith("data:")) return void 0;
44
- if (trimmed.startsWith("//")) return `https:${trimmed}`;
45
- return trimmed;
46
- }
47
- function isLikelyImageUrl(url) {
48
- if (!url || url.startsWith("data:")) return false;
49
- if (url.startsWith("/")) {
50
- return WP_UPLOADS_PATTERN.test(url) || IMAGE_EXTENSION_PATTERN.test(url);
51
- }
52
- if (!/^https?:\/\//i.test(url)) return false;
53
- if (WP_UPLOADS_PATTERN.test(url)) return true;
54
- try {
55
- const pathname = new URL(url).pathname;
56
- return IMAGE_EXTENSION_PATTERN.test(pathname);
57
- } catch {
58
- return IMAGE_EXTENSION_PATTERN.test(url);
59
- }
60
- }
61
- function discoverContentAssetUrls(content) {
62
- if (!content.trim()) return [];
63
- const urls = /* @__PURE__ */ new Set();
64
- for (const raw of extractImgTagSrcs(content)) {
65
- const normalized = normalizeAssetUrl(raw);
66
- if (normalized && isLikelyImageUrl(normalized)) {
67
- urls.add(normalized);
68
- }
69
- }
70
- for (const match of content.matchAll(ASSET_URL_PARAM_PATTERN)) {
71
- const normalized = normalizeAssetUrl(match[1] ?? "");
72
- if (normalized && isLikelyImageUrl(normalized)) {
73
- urls.add(normalized);
74
- }
75
- }
76
- return [...urls];
77
- }
78
-
79
28
  // src/transformers/rewrite-inline-images.ts
80
- import * as cheerio2 from "cheerio";
29
+ import * as cheerio from "cheerio";
81
30
  function rewriteSrcset(srcset, options, uploadedBySourceId, referencedSources, unresolved) {
82
31
  return srcset.split(",").map((entry) => {
83
32
  const trimmed = entry.trim();
@@ -104,7 +53,7 @@ function rewriteInlineImages(html, options, uploadedBySourceId) {
104
53
  if (!html.trim()) {
105
54
  return { html, referencedSources: [], unresolved: [] };
106
55
  }
107
- const $ = cheerio2.load(html, { xml: false });
56
+ const $ = cheerio.load(html, { xml: false });
108
57
  const referencedSources = /* @__PURE__ */ new Set();
109
58
  const unresolved = /* @__PURE__ */ new Set();
110
59
  $("img").each((_, element) => {
@@ -140,7 +89,7 @@ function rewriteInlineImages(html, options, uploadedBySourceId) {
140
89
  }
141
90
 
142
91
  // src/sinks/conflicts.ts
143
- import * as cheerio4 from "cheerio";
92
+ import * as cheerio3 from "cheerio";
144
93
 
145
94
  // src/parsers/squarespace/parse-export.ts
146
95
  import { readFile } from "fs/promises";
@@ -166,7 +115,7 @@ function linkToPath(link) {
166
115
  }
167
116
 
168
117
  // src/parsers/squarespace/collect.ts
169
- import * as cheerio3 from "cheerio";
118
+ import * as cheerio2 from "cheerio";
170
119
  import { z } from "zod";
171
120
  var SQUARESPACE_JSON_FORMAT = "json-pretty";
172
121
  var squarespaceClientOptionsSchema = z.object({
@@ -219,7 +168,7 @@ function inferBlockTypeFromClassName(className) {
219
168
  }
220
169
  function extractBlocksFromBodyHtml(html) {
221
170
  if (!html.trim()) return [];
222
- const $ = cheerio3.load(html, { xml: false });
171
+ const $ = cheerio2.load(html, { xml: false });
223
172
  const blocks = [];
224
173
  $(".sqs-block").each((_, element) => {
225
174
  const el = $(element);
@@ -1048,7 +997,7 @@ function analyzeHtml(html) {
1048
997
  issues.push("script_tag_present");
1049
998
  }
1050
999
  try {
1051
- const $ = cheerio4.load(html, { xml: false });
1000
+ const $ = cheerio3.load(html, { xml: false });
1052
1001
  $("p").each((_, el) => {
1053
1002
  const inner = $(el).html() ?? "";
1054
1003
  if (inner.includes("<p")) {
@@ -1546,7 +1495,6 @@ async function runDryRun(options) {
1546
1495
 
1547
1496
  export {
1548
1497
  MIGRATION_WRITE_STAGES,
1549
- discoverContentAssetUrls,
1550
1498
  rewriteInlineImages,
1551
1499
  sanitizeSlug,
1552
1500
  linkToPath,
@@ -1576,4 +1524,4 @@ export {
1576
1524
  staleUrlsFromEstimate,
1577
1525
  runDryRun
1578
1526
  };
1579
- //# sourceMappingURL=chunk-LKNIQQJO.js.map
1527
+ //# sourceMappingURL=chunk-HH7666MQ.js.map