@artinstack/migrator 0.1.0 → 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +66 -9
- package/dist/{bundle-BfZqiKV_.d.ts → bundle-DfM_jKbq.d.ts} +2 -2
- package/dist/chunk-2PNSVE5Y.js +67 -0
- package/dist/chunk-2PNSVE5Y.js.map +1 -0
- package/dist/chunk-3YJFSTYR.js +147 -0
- package/dist/chunk-3YJFSTYR.js.map +1 -0
- package/dist/{chunk-LKNIQQJO.js → chunk-HH7666MQ.js} +13 -65
- package/dist/chunk-HH7666MQ.js.map +1 -0
- package/dist/{chunk-JKDRTL24.js → chunk-HI7JHWZU.js} +1 -1
- package/dist/chunk-HI7JHWZU.js.map +1 -0
- package/dist/chunk-VXEHAQKK.js +2290 -0
- package/dist/chunk-VXEHAQKK.js.map +1 -0
- package/dist/cli/index.js +23 -8
- package/dist/cli/index.js.map +1 -1
- package/dist/{index-DQNzrygx.d.ts → index-D88mjcF5.d.ts} +1 -1
- package/dist/index.d.ts +259 -5
- package/dist/index.js +127 -4
- package/dist/index.js.map +1 -1
- package/dist/lib/index.d.ts +16 -0
- package/dist/lib/index.js +15 -0
- package/dist/normalizer/index.d.ts +837 -3
- package/dist/normalizer/index.js +36 -3
- package/dist/sinks/index.d.ts +2 -2
- package/dist/sinks/index.js +3 -2
- package/package.json +5 -1
- package/dist/chunk-2RWAXT6O.js +0 -1
- package/dist/chunk-FXXKLYO5.js +0 -1076
- package/dist/chunk-FXXKLYO5.js.map +0 -1
- package/dist/chunk-JKDRTL24.js.map +0 -1
- package/dist/chunk-LKNIQQJO.js.map +0 -1
- /package/dist/{chunk-2RWAXT6O.js.map → lib/index.js.map} +0 -0
package/README.md
CHANGED
|
@@ -1,8 +1,8 @@
|
|
|
1
1
|
# @artinstack/migrator
|
|
2
2
|
|
|
3
|
-
Stateless content normalizer and migration framework for transforming **WordPress**, **SmugMug**, **Squarespace**, and similar sources into a platform-agnostic schema.
|
|
3
|
+
Stateless content normalizer and migration framework for transforming **WordPress**, **SmugMug**, **Squarespace**, **Wix**, and similar sources into a platform-agnostic schema.
|
|
4
4
|
|
|
5
|
-
|
|
5
|
+
Portable parsers and JSON export are useful without any specific host. Job orchestration, credentials, and UI are implemented separately via `MigrationSink`.
|
|
6
6
|
|
|
7
7
|
See [docs/architecture.md](./docs/architecture.md) for the high-level blueprint: data flow, DTOs, sink contract, and source mappings.
|
|
8
8
|
|
|
@@ -10,7 +10,7 @@ See [docs/architecture.md](./docs/architecture.md) for the high-level blueprint:
|
|
|
10
10
|
|
|
11
11
|
```
|
|
12
12
|
src/
|
|
13
|
-
parsers/ WordPress, SmugMug, Squarespace → normalizer DTOs
|
|
13
|
+
parsers/ WordPress, SmugMug, Squarespace, Wix → normalizer DTOs
|
|
14
14
|
normalizer/ Canonical DTOs + portable idempotency types
|
|
15
15
|
sinks/ filesystem export, MigrationSink interface
|
|
16
16
|
cli/ artinstack-migrate
|
|
@@ -19,18 +19,21 @@ src/
|
|
|
19
19
|
|
|
20
20
|
## Install
|
|
21
21
|
|
|
22
|
-
**From npm
|
|
22
|
+
**From npm:**
|
|
23
23
|
|
|
24
24
|
```bash
|
|
25
25
|
pnpm add @artinstack/migrator
|
|
26
26
|
# or: npm install @artinstack/migrator
|
|
27
27
|
```
|
|
28
28
|
|
|
29
|
+
The `artinstack-migrate` binary is on your PATH after install (or use `npx artinstack-migrate`).
|
|
30
|
+
|
|
29
31
|
**From source** (development):
|
|
30
32
|
|
|
31
33
|
```bash
|
|
32
34
|
pnpm install
|
|
33
35
|
pnpm build
|
|
36
|
+
pnpm link --global # optional: artinstack-migrate on PATH
|
|
34
37
|
```
|
|
35
38
|
|
|
36
39
|
Requires **Node.js 20+**.
|
|
@@ -38,22 +41,76 @@ Requires **Node.js 20+**.
|
|
|
38
41
|
## CLI
|
|
39
42
|
|
|
40
43
|
```bash
|
|
41
|
-
|
|
44
|
+
artinstack-migrate <platform> <export-file> [options]
|
|
45
|
+
artinstack-migrate validate <platform> <export-file>
|
|
46
|
+
```
|
|
47
|
+
|
|
48
|
+
**Platforms:** `wordpress`, `smugmug`, `squarespace`, `wix`
|
|
49
|
+
|
|
50
|
+
**Options:**
|
|
51
|
+
|
|
52
|
+
| Flag | Description |
|
|
53
|
+
|------|-------------|
|
|
54
|
+
| `--out <dir>` | Write normalized JSON files to a directory |
|
|
55
|
+
| `--format json` | Print combined JSON to stdout (no files written) |
|
|
56
|
+
| `--dry-run` | Parse and analyze only; no export files |
|
|
57
|
+
| `--report <dir>` | With `--dry-run`, write `conflicts.json` and `migration-report.json` |
|
|
58
|
+
| `--offline` | Skip network HEAD requests for asset size estimates |
|
|
59
|
+
| `--sink filesystem` | Run through `MigrationSink` before writing (requires `--out`) |
|
|
60
|
+
| `--urls <file>` | Wix only: URL list or `sitemap.xml` for static page snapshots |
|
|
42
61
|
|
|
43
|
-
|
|
62
|
+
**Examples:**
|
|
63
|
+
|
|
64
|
+
```bash
|
|
65
|
+
# Export normalized JSON
|
|
44
66
|
artinstack-migrate wordpress export.xml --out ./output
|
|
45
|
-
artinstack-migrate validate wordpress ./export.xml
|
|
46
67
|
|
|
47
|
-
#
|
|
68
|
+
# Preview conflicts without writing content
|
|
69
|
+
artinstack-migrate wordpress export.xml --dry-run --report ./preview/
|
|
70
|
+
|
|
71
|
+
# Validate export structure (JSON result on stdout, exit 0/1)
|
|
72
|
+
artinstack-migrate validate wordpress export.xml
|
|
73
|
+
|
|
74
|
+
# Wix: blog feed + static pages from a URL list
|
|
75
|
+
artinstack-migrate wix feed.xml --urls page-urls.txt --out ./output
|
|
76
|
+
|
|
77
|
+
# Local clone
|
|
48
78
|
pnpm cli wordpress export.xml --dry-run
|
|
49
|
-
node dist/cli/index.js wordpress export.xml --dry-run
|
|
50
79
|
```
|
|
51
80
|
|
|
81
|
+
### Output
|
|
82
|
+
|
|
83
|
+
**`--out ./output`** writes grouped JSON:
|
|
84
|
+
|
|
85
|
+
```
|
|
86
|
+
output/
|
|
87
|
+
posts.json
|
|
88
|
+
pages.json
|
|
89
|
+
media.json
|
|
90
|
+
portfolios.json
|
|
91
|
+
portfolio-media.json
|
|
92
|
+
categories.json
|
|
93
|
+
tags.json
|
|
94
|
+
conflicts.json # when generated
|
|
95
|
+
migration-report.json # when generated
|
|
96
|
+
```
|
|
97
|
+
|
|
98
|
+
Each file contains an array of normalized DTOs (`NormalizedPost`, `NormalizedPage`, `NormalizedAsset`, etc.). See [docs/architecture.md](./docs/architecture.md) for schema and per-platform input formats.
|
|
99
|
+
|
|
100
|
+
**`--format json`** prints the same entities as one combined JSON object to stdout.
|
|
101
|
+
|
|
102
|
+
**`validate`** prints a validation result JSON object (`ok`, `issues`, `summary` counts) and exits `0` on success, `1` on failure.
|
|
103
|
+
|
|
104
|
+
**`--dry-run`** exits `0` (clean), `2` (warnings), or `1` (blocking conflicts).
|
|
105
|
+
|
|
106
|
+
Per-platform export file formats and API client usage are documented in [docs/architecture.md](./docs/architecture.md).
|
|
107
|
+
|
|
52
108
|
## Development
|
|
53
109
|
|
|
54
110
|
```bash
|
|
55
111
|
pnpm typecheck
|
|
56
112
|
pnpm test
|
|
113
|
+
pnpm test:validate-fixtures # golden fixtures (wordpress, smugmug, squarespace, grapes, wix)
|
|
57
114
|
pnpm dev # watch build
|
|
58
115
|
```
|
|
59
116
|
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
type MigrationPlatform = "wordpress" | "smugmug" | "squarespace";
|
|
1
|
+
type MigrationPlatform = "wordpress" | "smugmug" | "squarespace" | "wix";
|
|
2
2
|
type EntityType = "post" | "page" | "asset" | "portfolio" | "category" | "tag";
|
|
3
3
|
type PublishStatus = "draft" | "published" | "archived";
|
|
4
4
|
interface SourceMetadata {
|
|
@@ -150,4 +150,4 @@ interface BundleCounts {
|
|
|
150
150
|
}
|
|
151
151
|
declare function bundleCounts(bundle: EntityBundle): BundleCounts;
|
|
152
152
|
|
|
153
|
-
export { type AdapterContext as A, type BundleCounts as B, type EntityBundle as E, type MigrationAdapter as M, type NormalizedAsset as N, type PortfolioMediaLink as P, type SourceMetadata as S, type
|
|
153
|
+
export { type AdapterContext as A, type BundleCounts as B, type EntityBundle as E, type MigrationAdapter as M, type NormalizedAsset as N, type PortfolioMediaLink as P, type SourceMetadata as S, type ValidationResult as V, type MigrationPlatform as a, type EntityKey as b, type EntityType as c, type MigrationCursor as d, type NormalizedAssetExif as e, type NormalizedCategory as f, type NormalizedEntity as g, type NormalizedPage as h, type NormalizedPortfolio as i, type NormalizedPost as j, type NormalizedTag as k, type PublishStatus as l, type ValidationIssue as m, bundleCounts as n, collectEntities as o, emptyBundle as p, entityKey as q };
|
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
// src/lib/content-asset-urls.ts
|
|
2
|
+
import * as cheerio from "cheerio";
|
|
3
|
+
var ASSET_URL_PARAM_PATTERN = /\b(?:src|image|url)\s*=\s*["']([^"']+)["']/gi;
|
|
4
|
+
var IMAGE_EXTENSION_PATTERN = /\.(?:jpe?g|png|gif|webp|avif|svg)(?:[?#]|$)/i;
|
|
5
|
+
var WP_UPLOADS_PATTERN = /\/wp-content\/uploads\//i;
|
|
6
|
+
function extractImgTagSrcs(content) {
|
|
7
|
+
if (!content.trim()) return [];
|
|
8
|
+
const $ = cheerio.load(content, { xml: false });
|
|
9
|
+
const srcs = [];
|
|
10
|
+
$("img[src]").each((_, el) => {
|
|
11
|
+
const src = $(el).attr("src")?.trim();
|
|
12
|
+
if (src) srcs.push(src);
|
|
13
|
+
});
|
|
14
|
+
return srcs;
|
|
15
|
+
}
|
|
16
|
+
function discoverRawImgSrcs(content) {
|
|
17
|
+
return extractImgTagSrcs(content).filter((src) => !src.startsWith("data:"));
|
|
18
|
+
}
|
|
19
|
+
function normalizeAssetUrl(raw) {
|
|
20
|
+
const trimmed = raw.trim();
|
|
21
|
+
if (!trimmed || trimmed.startsWith("data:")) return void 0;
|
|
22
|
+
if (trimmed.startsWith("//")) return `https:${trimmed}`;
|
|
23
|
+
return trimmed;
|
|
24
|
+
}
|
|
25
|
+
function isLikelyImageUrl(url) {
|
|
26
|
+
if (!url || url.startsWith("data:")) return false;
|
|
27
|
+
if (url.startsWith("/")) {
|
|
28
|
+
return WP_UPLOADS_PATTERN.test(url) || IMAGE_EXTENSION_PATTERN.test(url);
|
|
29
|
+
}
|
|
30
|
+
if (!/^https?:\/\//i.test(url)) return false;
|
|
31
|
+
if (WP_UPLOADS_PATTERN.test(url)) return true;
|
|
32
|
+
try {
|
|
33
|
+
const pathname = new URL(url).pathname;
|
|
34
|
+
return IMAGE_EXTENSION_PATTERN.test(pathname);
|
|
35
|
+
} catch {
|
|
36
|
+
return IMAGE_EXTENSION_PATTERN.test(url);
|
|
37
|
+
}
|
|
38
|
+
}
|
|
39
|
+
function discoverContentAssetUrls(content) {
|
|
40
|
+
if (!content.trim()) return [];
|
|
41
|
+
const urls = /* @__PURE__ */ new Set();
|
|
42
|
+
for (const raw of extractImgTagSrcs(content)) {
|
|
43
|
+
const normalized = normalizeAssetUrl(raw);
|
|
44
|
+
if (normalized && isLikelyImageUrl(normalized)) {
|
|
45
|
+
urls.add(normalized);
|
|
46
|
+
}
|
|
47
|
+
}
|
|
48
|
+
for (const match of content.matchAll(ASSET_URL_PARAM_PATTERN)) {
|
|
49
|
+
const normalized = normalizeAssetUrl(match[1] ?? "");
|
|
50
|
+
if (normalized && isLikelyImageUrl(normalized)) {
|
|
51
|
+
urls.add(normalized);
|
|
52
|
+
}
|
|
53
|
+
}
|
|
54
|
+
return [...urls];
|
|
55
|
+
}
|
|
56
|
+
function extractInlineImageSrcs(content) {
|
|
57
|
+
return discoverContentAssetUrls(content);
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
export {
|
|
61
|
+
discoverRawImgSrcs,
|
|
62
|
+
normalizeAssetUrl,
|
|
63
|
+
isLikelyImageUrl,
|
|
64
|
+
discoverContentAssetUrls,
|
|
65
|
+
extractInlineImageSrcs
|
|
66
|
+
};
|
|
67
|
+
//# sourceMappingURL=chunk-2PNSVE5Y.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"sources":["../src/lib/content-asset-urls.ts"],"sourcesContent":["import * as cheerio from \"cheerio\";\n\n/** Builder-agnostic attribute names that commonly hold image URLs in post_content. */\nconst ASSET_URL_PARAM_PATTERN =\n /\\b(?:src|image|url)\\s*=\\s*[\"']([^\"']+)[\"']/gi;\n\nconst IMAGE_EXTENSION_PATTERN = /\\.(?:jpe?g|png|gif|webp|avif|svg)(?:[?#]|$)/i;\nconst WP_UPLOADS_PATTERN = /\\/wp-content\\/uploads\\//i;\n\nfunction extractImgTagSrcs(content: string): string[] {\n if (!content.trim()) return [];\n const $ = cheerio.load(content, { xml: false });\n const srcs: string[] = [];\n $(\"img[src]\").each((_, el) => {\n const src = $(el).attr(\"src\")?.trim();\n if (src) srcs.push(src);\n });\n return srcs;\n}\n\n/** All `<img src>` values (including those not ingested as vault assets). */\nexport function discoverRawImgSrcs(content: string): string[] {\n return extractImgTagSrcs(content).filter((src) => !src.startsWith(\"data:\"));\n}\n\n/** Normalize protocol-relative and trim; skip data URIs. */\nexport function normalizeAssetUrl(raw: string): string | undefined {\n const trimmed = raw.trim();\n if (!trimmed || trimmed.startsWith(\"data:\")) return undefined;\n if (trimmed.startsWith(\"//\")) return `https:${trimmed}`;\n return trimmed;\n}\n\n/** Heuristic: URL likely points at a raster/vector image asset, not a page link. */\nexport function isLikelyImageUrl(url: string): boolean {\n if (!url || url.startsWith(\"data:\")) return false;\n\n if (url.startsWith(\"/\")) {\n return WP_UPLOADS_PATTERN.test(url) || IMAGE_EXTENSION_PATTERN.test(url);\n }\n\n if (!/^https?:\\/\\//i.test(url)) return false;\n\n if (WP_UPLOADS_PATTERN.test(url)) return true;\n\n try {\n const pathname = new URL(url).pathname;\n return IMAGE_EXTENSION_PATTERN.test(pathname);\n } catch {\n return IMAGE_EXTENSION_PATTERN.test(url);\n }\n}\n\n/**\n * Generic content-discovery pass: collect image URLs from HTML `<img>` tags and\n * common shortcode/builder attributes (`src=`, `image=`, `url=`) without parsing\n * builder-specific structure (Tatsu, Elementor, etc.).\n */\nexport function discoverContentAssetUrls(content: string): string[] {\n if (!content.trim()) return [];\n\n const urls = new Set<string>();\n\n for (const raw of extractImgTagSrcs(content)) {\n const normalized = normalizeAssetUrl(raw);\n if (normalized && isLikelyImageUrl(normalized)) {\n urls.add(normalized);\n }\n }\n\n for (const match of content.matchAll(ASSET_URL_PARAM_PATTERN)) {\n const normalized = normalizeAssetUrl(match[1] ?? \"\");\n if (normalized && isLikelyImageUrl(normalized)) {\n urls.add(normalized);\n }\n }\n\n return [...urls];\n}\n\n/** @deprecated Use discoverContentAssetUrls — kept for call-site clarity during transition. */\nexport function extractInlineImageSrcs(content: string): string[] {\n return discoverContentAssetUrls(content);\n}\n"],"mappings":";AAAA,YAAY,aAAa;AAGzB,IAAM,0BACJ;AAEF,IAAM,0BAA0B;AAChC,IAAM,qBAAqB;AAE3B,SAAS,kBAAkB,SAA2B;AACpD,MAAI,CAAC,QAAQ,KAAK,EAAG,QAAO,CAAC;AAC7B,QAAM,IAAY,aAAK,SAAS,EAAE,KAAK,MAAM,CAAC;AAC9C,QAAM,OAAiB,CAAC;AACxB,IAAE,UAAU,EAAE,KAAK,CAAC,GAAG,OAAO;AAC5B,UAAM,MAAM,EAAE,EAAE,EAAE,KAAK,KAAK,GAAG,KAAK;AACpC,QAAI,IAAK,MAAK,KAAK,GAAG;AAAA,EACxB,CAAC;AACD,SAAO;AACT;AAGO,SAAS,mBAAmB,SAA2B;AAC5D,SAAO,kBAAkB,OAAO,EAAE,OAAO,CAAC,QAAQ,CAAC,IAAI,WAAW,OAAO,CAAC;AAC5E;AAGO,SAAS,kBAAkB,KAAiC;AACjE,QAAM,UAAU,IAAI,KAAK;AACzB,MAAI,CAAC,WAAW,QAAQ,WAAW,OAAO,EAAG,QAAO;AACpD,MAAI,QAAQ,WAAW,IAAI,EAAG,QAAO,SAAS,OAAO;AACrD,SAAO;AACT;AAGO,SAAS,iBAAiB,KAAsB;AACrD,MAAI,CAAC,OAAO,IAAI,WAAW,OAAO,EAAG,QAAO;AAE5C,MAAI,IAAI,WAAW,GAAG,GAAG;AACvB,WAAO,mBAAmB,KAAK,GAAG,KAAK,wBAAwB,KAAK,GAAG;AAAA,EACzE;AAEA,MAAI,CAAC,gBAAgB,KAAK,GAAG,EAAG,QAAO;AAEvC,MAAI,mBAAmB,KAAK,GAAG,EAAG,QAAO;AAEzC,MAAI;AACF,UAAM,WAAW,IAAI,IAAI,GAAG,EAAE;AAC9B,WAAO,wBAAwB,KAAK,QAAQ;AAAA,EAC9C,QAAQ;AACN,WAAO,wBAAwB,KAAK,GAAG;AAAA,EACzC;AACF;AAOO,SAAS,yBAAyB,SAA2B;AAClE,MAAI,CAAC,QAAQ,KAAK,EAAG,QAAO,CAAC;AAE7B,QAAM,OAAO,oBAAI,IAAY;AAE7B,aAAW,OAAO,kBAAkB,OAAO,GAAG;AAC5C,UAAM,aAAa,kBAAkB,GAAG;AACxC,QAAI,cAAc,iBAAiB,UAAU,GAAG;AAC9C,WAAK,IAAI,UAAU;AAAA,IACrB;AAAA,EACF;AAEA,aAAW,SAAS,QAAQ,SAAS,uBAAuB,GAAG;AAC7D,UAAM,aAAa,kBAAkB,MAAM,CAAC,KAAK,EAAE;AACnD,QAAI,cAAc,iBAAiB,UAAU,GAAG;AAC9C,WAAK,IAAI,UAAU;AAAA,IACrB;AAAA,EACF;AAEA,SAAO,CAAC,GAAG,IAAI;AACjB;AAGO,SAAS,uBAAuB,SAA2B;AAChE,SAAO,yBAAyB,OAAO;AACzC;","names":[]}
|
|
@@ -0,0 +1,147 @@
|
|
|
1
|
+
// src/normalizer/validate.ts
|
|
2
|
+
import { z } from "zod";
|
|
3
|
+
var migrationPlatformSchema = z.enum(["wordpress", "smugmug", "squarespace", "wix"]);
|
|
4
|
+
var publishStatusSchema = z.enum(["draft", "published", "archived"]);
|
|
5
|
+
var sourceMetadataSchema = z.object({
|
|
6
|
+
platform: migrationPlatformSchema,
|
|
7
|
+
id: z.string().min(1),
|
|
8
|
+
url: z.string().optional(),
|
|
9
|
+
path: z.string().optional(),
|
|
10
|
+
exportedAt: z.string().optional()
|
|
11
|
+
});
|
|
12
|
+
var normalizedPostSchema = z.object({
|
|
13
|
+
type: z.literal("post"),
|
|
14
|
+
source: sourceMetadataSchema,
|
|
15
|
+
sourceId: z.string().min(1),
|
|
16
|
+
title: z.string().min(1),
|
|
17
|
+
slug: z.string().min(1),
|
|
18
|
+
excerpt: z.string().optional(),
|
|
19
|
+
contentHtml: z.string(),
|
|
20
|
+
publishedAt: z.string().optional(),
|
|
21
|
+
status: publishStatusSchema,
|
|
22
|
+
categorySlugs: z.array(z.string().min(1)).optional(),
|
|
23
|
+
tagSlugs: z.array(z.string().min(1)).optional(),
|
|
24
|
+
sourceFeaturedMediaId: z.string().optional(),
|
|
25
|
+
featuredAssetSourceId: z.string().optional(),
|
|
26
|
+
seoTitle: z.string().optional(),
|
|
27
|
+
seoDescription: z.string().optional()
|
|
28
|
+
});
|
|
29
|
+
var normalizedPageSchema = z.object({
|
|
30
|
+
type: z.literal("page"),
|
|
31
|
+
source: sourceMetadataSchema,
|
|
32
|
+
sourceId: z.string().min(1),
|
|
33
|
+
title: z.string().min(1),
|
|
34
|
+
slug: z.string().min(1),
|
|
35
|
+
contentHtml: z.string(),
|
|
36
|
+
contentCss: z.string().optional(),
|
|
37
|
+
isHomePage: z.boolean().optional(),
|
|
38
|
+
status: publishStatusSchema,
|
|
39
|
+
seoTitle: z.string().optional(),
|
|
40
|
+
seoDescription: z.string().optional()
|
|
41
|
+
});
|
|
42
|
+
var normalizedAssetExifSchema = z.object({
|
|
43
|
+
iso: z.number().optional(),
|
|
44
|
+
aperture: z.number().optional(),
|
|
45
|
+
shutter: z.string().optional(),
|
|
46
|
+
focalLength: z.number().optional()
|
|
47
|
+
});
|
|
48
|
+
var normalizedAssetSchema = z.object({
|
|
49
|
+
type: z.literal("asset"),
|
|
50
|
+
source: sourceMetadataSchema,
|
|
51
|
+
sourceId: z.string().min(1),
|
|
52
|
+
sourceUrl: z.string().min(1),
|
|
53
|
+
filename: z.string().min(1),
|
|
54
|
+
mimeType: z.string().optional(),
|
|
55
|
+
caption: z.string().optional(),
|
|
56
|
+
altText: z.string().optional(),
|
|
57
|
+
keywords: z.array(z.string()).optional(),
|
|
58
|
+
exif: normalizedAssetExifSchema.optional(),
|
|
59
|
+
portfolioSourceId: z.string().optional(),
|
|
60
|
+
sort: z.number().optional()
|
|
61
|
+
});
|
|
62
|
+
var normalizedPortfolioSchema = z.object({
|
|
63
|
+
type: z.literal("portfolio"),
|
|
64
|
+
source: sourceMetadataSchema,
|
|
65
|
+
sourceId: z.string().min(1),
|
|
66
|
+
title: z.string().min(1),
|
|
67
|
+
slug: z.string().min(1),
|
|
68
|
+
description: z.string().optional(),
|
|
69
|
+
parentSourceId: z.string().optional()
|
|
70
|
+
});
|
|
71
|
+
var normalizedCategorySchema = z.object({
|
|
72
|
+
type: z.literal("category"),
|
|
73
|
+
source: sourceMetadataSchema,
|
|
74
|
+
sourceId: z.string().min(1),
|
|
75
|
+
name: z.string().min(1),
|
|
76
|
+
slug: z.string().min(1)
|
|
77
|
+
});
|
|
78
|
+
var normalizedTagSchema = z.object({
|
|
79
|
+
type: z.literal("tag"),
|
|
80
|
+
source: sourceMetadataSchema,
|
|
81
|
+
sourceId: z.string().min(1),
|
|
82
|
+
name: z.string().min(1),
|
|
83
|
+
slug: z.string().min(1)
|
|
84
|
+
});
|
|
85
|
+
var normalizedEntitySchema = z.discriminatedUnion("type", [
|
|
86
|
+
normalizedPostSchema,
|
|
87
|
+
normalizedPageSchema,
|
|
88
|
+
normalizedAssetSchema,
|
|
89
|
+
normalizedPortfolioSchema,
|
|
90
|
+
normalizedCategorySchema,
|
|
91
|
+
normalizedTagSchema
|
|
92
|
+
]);
|
|
93
|
+
function zodIssuesToValidationIssues(issues) {
|
|
94
|
+
return issues.map((issue) => ({
|
|
95
|
+
code: issue.code,
|
|
96
|
+
message: issue.message,
|
|
97
|
+
path: issue.path.length > 0 ? issue.path.join(".") : void 0
|
|
98
|
+
}));
|
|
99
|
+
}
|
|
100
|
+
function parseToValidationResult(schema, value) {
|
|
101
|
+
const result = schema.safeParse(value);
|
|
102
|
+
if (result.success) {
|
|
103
|
+
return { ok: true, issues: [] };
|
|
104
|
+
}
|
|
105
|
+
return { ok: false, issues: zodIssuesToValidationIssues(result.error.issues) };
|
|
106
|
+
}
|
|
107
|
+
function validateNormalizedPost(post) {
|
|
108
|
+
return parseToValidationResult(normalizedPostSchema, post);
|
|
109
|
+
}
|
|
110
|
+
function validateNormalizedPage(page) {
|
|
111
|
+
return parseToValidationResult(normalizedPageSchema, page);
|
|
112
|
+
}
|
|
113
|
+
function validateNormalizedAsset(asset) {
|
|
114
|
+
return parseToValidationResult(normalizedAssetSchema, asset);
|
|
115
|
+
}
|
|
116
|
+
function validateNormalizedPortfolio(portfolio) {
|
|
117
|
+
return parseToValidationResult(normalizedPortfolioSchema, portfolio);
|
|
118
|
+
}
|
|
119
|
+
function validateNormalizedCategory(category) {
|
|
120
|
+
return parseToValidationResult(normalizedCategorySchema, category);
|
|
121
|
+
}
|
|
122
|
+
function validateNormalizedTag(tag) {
|
|
123
|
+
return parseToValidationResult(normalizedTagSchema, tag);
|
|
124
|
+
}
|
|
125
|
+
function validateNormalizedEntity(entity) {
|
|
126
|
+
return parseToValidationResult(normalizedEntitySchema, entity);
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
export {
|
|
130
|
+
sourceMetadataSchema,
|
|
131
|
+
normalizedPostSchema,
|
|
132
|
+
normalizedPageSchema,
|
|
133
|
+
normalizedAssetExifSchema,
|
|
134
|
+
normalizedAssetSchema,
|
|
135
|
+
normalizedPortfolioSchema,
|
|
136
|
+
normalizedCategorySchema,
|
|
137
|
+
normalizedTagSchema,
|
|
138
|
+
normalizedEntitySchema,
|
|
139
|
+
validateNormalizedPost,
|
|
140
|
+
validateNormalizedPage,
|
|
141
|
+
validateNormalizedAsset,
|
|
142
|
+
validateNormalizedPortfolio,
|
|
143
|
+
validateNormalizedCategory,
|
|
144
|
+
validateNormalizedTag,
|
|
145
|
+
validateNormalizedEntity
|
|
146
|
+
};
|
|
147
|
+
//# sourceMappingURL=chunk-3YJFSTYR.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"sources":["../src/normalizer/validate.ts"],"sourcesContent":["import { z } from \"zod\";\n\nimport type { ValidationIssue, ValidationResult } from \"./types.js\";\n\nconst migrationPlatformSchema = z.enum([\"wordpress\", \"smugmug\", \"squarespace\", \"wix\"]);\nconst publishStatusSchema = z.enum([\"draft\", \"published\", \"archived\"]);\n\nexport const sourceMetadataSchema = z.object({\n platform: migrationPlatformSchema,\n id: z.string().min(1),\n url: z.string().optional(),\n path: z.string().optional(),\n exportedAt: z.string().optional(),\n});\n\nexport const normalizedPostSchema = z.object({\n type: z.literal(\"post\"),\n source: sourceMetadataSchema,\n sourceId: z.string().min(1),\n title: z.string().min(1),\n slug: z.string().min(1),\n excerpt: z.string().optional(),\n contentHtml: z.string(),\n publishedAt: z.string().optional(),\n status: publishStatusSchema,\n categorySlugs: z.array(z.string().min(1)).optional(),\n tagSlugs: z.array(z.string().min(1)).optional(),\n sourceFeaturedMediaId: z.string().optional(),\n featuredAssetSourceId: z.string().optional(),\n seoTitle: z.string().optional(),\n seoDescription: z.string().optional(),\n});\n\nexport const normalizedPageSchema = z.object({\n type: z.literal(\"page\"),\n source: sourceMetadataSchema,\n sourceId: z.string().min(1),\n title: z.string().min(1),\n slug: z.string().min(1),\n contentHtml: z.string(),\n contentCss: z.string().optional(),\n isHomePage: z.boolean().optional(),\n status: publishStatusSchema,\n seoTitle: z.string().optional(),\n seoDescription: z.string().optional(),\n});\n\nexport const normalizedAssetExifSchema = z.object({\n iso: z.number().optional(),\n aperture: z.number().optional(),\n shutter: z.string().optional(),\n focalLength: z.number().optional(),\n});\n\nexport const normalizedAssetSchema = z.object({\n type: z.literal(\"asset\"),\n source: sourceMetadataSchema,\n sourceId: z.string().min(1),\n sourceUrl: z.string().min(1),\n filename: z.string().min(1),\n mimeType: z.string().optional(),\n caption: z.string().optional(),\n altText: z.string().optional(),\n keywords: z.array(z.string()).optional(),\n exif: normalizedAssetExifSchema.optional(),\n portfolioSourceId: z.string().optional(),\n sort: z.number().optional(),\n});\n\nexport const normalizedPortfolioSchema = z.object({\n type: z.literal(\"portfolio\"),\n source: sourceMetadataSchema,\n sourceId: z.string().min(1),\n title: z.string().min(1),\n slug: z.string().min(1),\n description: z.string().optional(),\n parentSourceId: z.string().optional(),\n});\n\nexport const normalizedCategorySchema = z.object({\n type: z.literal(\"category\"),\n source: sourceMetadataSchema,\n sourceId: z.string().min(1),\n name: z.string().min(1),\n slug: z.string().min(1),\n});\n\nexport const normalizedTagSchema = z.object({\n type: z.literal(\"tag\"),\n source: sourceMetadataSchema,\n sourceId: z.string().min(1),\n name: z.string().min(1),\n slug: z.string().min(1),\n});\n\nexport const normalizedEntitySchema = z.discriminatedUnion(\"type\", [\n normalizedPostSchema,\n normalizedPageSchema,\n normalizedAssetSchema,\n normalizedPortfolioSchema,\n normalizedCategorySchema,\n normalizedTagSchema,\n]);\n\nfunction zodIssuesToValidationIssues(issues: z.ZodIssue[]): ValidationIssue[] {\n return issues.map((issue) => ({\n code: issue.code,\n message: issue.message,\n path: issue.path.length > 0 ? issue.path.join(\".\") : undefined,\n }));\n}\n\nfunction parseToValidationResult(schema: z.ZodTypeAny, value: unknown): ValidationResult {\n const result = schema.safeParse(value);\n if (result.success) {\n return { ok: true, issues: [] };\n }\n return { ok: false, issues: zodIssuesToValidationIssues(result.error.issues) };\n}\n\n/** Opt-in structural check for a normalized post DTO (no cross-entity FK validation). */\nexport function validateNormalizedPost(post: unknown): ValidationResult {\n return parseToValidationResult(normalizedPostSchema, post);\n}\n\n/** Opt-in structural check for a normalized page DTO (no cross-entity FK validation). */\nexport function validateNormalizedPage(page: unknown): ValidationResult {\n return parseToValidationResult(normalizedPageSchema, page);\n}\n\n/** Opt-in structural check for a normalized asset DTO. */\nexport function validateNormalizedAsset(asset: unknown): ValidationResult {\n return parseToValidationResult(normalizedAssetSchema, asset);\n}\n\n/** Opt-in structural check for a normalized portfolio DTO. */\nexport function validateNormalizedPortfolio(portfolio: unknown): ValidationResult {\n return parseToValidationResult(normalizedPortfolioSchema, portfolio);\n}\n\n/** Opt-in structural check for a normalized category DTO. */\nexport function validateNormalizedCategory(category: unknown): ValidationResult {\n return parseToValidationResult(normalizedCategorySchema, category);\n}\n\n/** Opt-in structural check for a normalized tag DTO. */\nexport function validateNormalizedTag(tag: unknown): ValidationResult {\n return parseToValidationResult(normalizedTagSchema, tag);\n}\n\n/** Opt-in structural check for any normalized entity discriminated by `type`. */\nexport function validateNormalizedEntity(entity: unknown): ValidationResult {\n return parseToValidationResult(normalizedEntitySchema, entity);\n}\n"],"mappings":";AAAA,SAAS,SAAS;AAIlB,IAAM,0BAA0B,EAAE,KAAK,CAAC,aAAa,WAAW,eAAe,KAAK,CAAC;AACrF,IAAM,sBAAsB,EAAE,KAAK,CAAC,SAAS,aAAa,UAAU,CAAC;AAE9D,IAAM,uBAAuB,EAAE,OAAO;AAAA,EAC3C,UAAU;AAAA,EACV,IAAI,EAAE,OAAO,EAAE,IAAI,CAAC;AAAA,EACpB,KAAK,EAAE,OAAO,EAAE,SAAS;AAAA,EACzB,MAAM,EAAE,OAAO,EAAE,SAAS;AAAA,EAC1B,YAAY,EAAE,OAAO,EAAE,SAAS;AAClC,CAAC;AAEM,IAAM,uBAAuB,EAAE,OAAO;AAAA,EAC3C,MAAM,EAAE,QAAQ,MAAM;AAAA,EACtB,QAAQ;AAAA,EACR,UAAU,EAAE,OAAO,EAAE,IAAI,CAAC;AAAA,EAC1B,OAAO,EAAE,OAAO,EAAE,IAAI,CAAC;AAAA,EACvB,MAAM,EAAE,OAAO,EAAE,IAAI,CAAC;AAAA,EACtB,SAAS,EAAE,OAAO,EAAE,SAAS;AAAA,EAC7B,aAAa,EAAE,OAAO;AAAA,EACtB,aAAa,EAAE,OAAO,EAAE,SAAS;AAAA,EACjC,QAAQ;AAAA,EACR,eAAe,EAAE,MAAM,EAAE,OAAO,EAAE,IAAI,CAAC,CAAC,EAAE,SAAS;AAAA,EACnD,UAAU,EAAE,MAAM,EAAE,OAAO,EAAE,IAAI,CAAC,CAAC,EAAE,SAAS;AAAA,EAC9C,uBAAuB,EAAE,OAAO,EAAE,SAAS;AAAA,EAC3C,uBAAuB,EAAE,OAAO,EAAE,SAAS;AAAA,EAC3C,UAAU,EAAE,OAAO,EAAE,SAAS;AAAA,EAC9B,gBAAgB,EAAE,OAAO,EAAE,SAAS;AACtC,CAAC;AAEM,IAAM,uBAAuB,EAAE,OAAO;AAAA,EAC3C,MAAM,EAAE,QAAQ,MAAM;AAAA,EACtB,QAAQ;AAAA,EACR,UAAU,EAAE,OAAO,EAAE,IAAI,CAAC;AAAA,EAC1B,OAAO,EAAE,OAAO,EAAE,IAAI,CAAC;AAAA,EACvB,MAAM,EAAE,OAAO,EAAE,IAAI,CAAC;AAAA,EACtB,aAAa,EAAE,OAAO;AAAA,EACtB,YAAY,EAAE,OAAO,EAAE,SAAS;AAAA,EAChC,YAAY,EAAE,QAAQ,EAAE,SAAS;AAAA,EACjC,QAAQ;AAAA,EACR,UAAU,EAAE,OAAO,EAAE,SAAS;AAAA,EAC9B,gBAAgB,EAAE,OAAO,EAAE,SAAS;AACtC,CAAC;AAEM,IAAM,4BAA4B,EAAE,OAAO;AAAA,EAChD,KAAK,EAAE,OAAO,EAAE,SAAS;AAAA,EACzB,UAAU,EAAE,OAAO,EAAE,SAAS;AAAA,EAC9B,SAAS,EAAE,OAAO,EAAE,SAAS;AAAA,EAC7B,aAAa,EAAE,OAAO,EAAE,SAAS;AACnC,CAAC;AAEM,IAAM,wBAAwB,EAAE,OAAO;AAAA,EAC5C,MAAM,EAAE,QAAQ,OAAO;AAAA,EACvB,QAAQ;AAAA,EACR,UAAU,EAAE,OAAO,EAAE,IAAI,CAAC;AAAA,EAC1B,WAAW,EAAE,OAAO,EAAE,IAAI,CAAC;AAAA,EAC3B,UAAU,EAAE,OAAO,EAAE,IAAI,CAAC;AAAA,EAC1B,UAAU,EAAE,OAAO,EAAE,SAAS;AAAA,EAC9B,SAAS,EAAE,OAAO,EAAE,SAAS;AAAA,EAC7B,SAAS,EAAE,OAAO,EAAE,SAAS;AAAA,EAC7B,UAAU,EAAE,MAAM,EAAE,OAAO,CAAC,EAAE,SAAS;AAAA,EACvC,MAAM,0BAA0B,SAAS;AAAA,EACzC,mBAAmB,EAAE,OAAO,EAAE,SAAS;AAAA,EACvC,MAAM,EAAE,OAAO,EAAE,SAAS;AAC5B,CAAC;AAEM,IAAM,4BAA4B,EAAE,OAAO;AAAA,EAChD,MAAM,EAAE,QAAQ,WAAW;AAAA,EAC3B,QAAQ;AAAA,EACR,UAAU,EAAE,OAAO,EAAE,IAAI,CAAC;AAAA,EAC1B,OAAO,EAAE,OAAO,EAAE,IAAI,CAAC;AAAA,EACvB,MAAM,EAAE,OAAO,EAAE,IAAI,CAAC;AAAA,EACtB,aAAa,EAAE,OAAO,EAAE,SAAS;AAAA,EACjC,gBAAgB,EAAE,OAAO,EAAE,SAAS;AACtC,CAAC;AAEM,IAAM,2BAA2B,EAAE,OAAO;AAAA,EAC/C,MAAM,EAAE,QAAQ,UAAU;AAAA,EAC1B,QAAQ;AAAA,EACR,UAAU,EAAE,OAAO,EAAE,IAAI,CAAC;AAAA,EAC1B,MAAM,EAAE,OAAO,EAAE,IAAI,CAAC;AAAA,EACtB,MAAM,EAAE,OAAO,EAAE,IAAI,CAAC;AACxB,CAAC;AAEM,IAAM,sBAAsB,EAAE,OAAO;AAAA,EAC1C,MAAM,EAAE,QAAQ,KAAK;AAAA,EACrB,QAAQ;AAAA,EACR,UAAU,EAAE,OAAO,EAAE,IAAI,CAAC;AAAA,EAC1B,MAAM,EAAE,OAAO,EAAE,IAAI,CAAC;AAAA,EACtB,MAAM,EAAE,OAAO,EAAE,IAAI,CAAC;AACxB,CAAC;AAEM,IAAM,yBAAyB,EAAE,mBAAmB,QAAQ;AAAA,EACjE;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AACF,CAAC;AAED,SAAS,4BAA4B,QAAyC;AAC5E,SAAO,OAAO,IAAI,CAAC,WAAW;AAAA,IAC5B,MAAM,MAAM;AAAA,IACZ,SAAS,MAAM;AAAA,IACf,MAAM,MAAM,KAAK,SAAS,IAAI,MAAM,KAAK,KAAK,GAAG,IAAI;AAAA,EACvD,EAAE;AACJ;AAEA,SAAS,wBAAwB,QAAsB,OAAkC;AACvF,QAAM,SAAS,OAAO,UAAU,KAAK;AACrC,MAAI,OAAO,SAAS;AAClB,WAAO,EAAE,IAAI,MAAM,QAAQ,CAAC,EAAE;AAAA,EAChC;AACA,SAAO,EAAE,IAAI,OAAO,QAAQ,4BAA4B,OAAO,MAAM,MAAM,EAAE;AAC/E;AAGO,SAAS,uBAAuB,MAAiC;AACtE,SAAO,wBAAwB,sBAAsB,IAAI;AAC3D;AAGO,SAAS,uBAAuB,MAAiC;AACtE,SAAO,wBAAwB,sBAAsB,IAAI;AAC3D;AAGO,SAAS,wBAAwB,OAAkC;AACxE,SAAO,wBAAwB,uBAAuB,KAAK;AAC7D;AAGO,SAAS,4BAA4B,WAAsC;AAChF,SAAO,wBAAwB,2BAA2B,SAAS;AACrE;AAGO,SAAS,2BAA2B,UAAqC;AAC9E,SAAO,wBAAwB,0BAA0B,QAAQ;AACnE;AAGO,SAAS,sBAAsB,KAAgC;AACpE,SAAO,wBAAwB,qBAAqB,GAAG;AACzD;AAGO,SAAS,yBAAyB,QAAmC;AAC1E,SAAO,wBAAwB,wBAAwB,MAAM;AAC/D;","names":[]}
|
|
@@ -5,7 +5,12 @@ import {
|
|
|
5
5
|
emptyBundle,
|
|
6
6
|
entityKey,
|
|
7
7
|
shouldProcessEntity
|
|
8
|
-
} from "./chunk-
|
|
8
|
+
} from "./chunk-HI7JHWZU.js";
|
|
9
|
+
import {
|
|
10
|
+
discoverContentAssetUrls,
|
|
11
|
+
discoverRawImgSrcs,
|
|
12
|
+
normalizeAssetUrl
|
|
13
|
+
} from "./chunk-2PNSVE5Y.js";
|
|
9
14
|
|
|
10
15
|
// src/sinks/types.ts
|
|
11
16
|
var MIGRATION_WRITE_STAGES = [
|
|
@@ -20,64 +25,8 @@ var MIGRATION_WRITE_STAGES = [
|
|
|
20
25
|
// src/sinks/run-migration.ts
|
|
21
26
|
import { Readable } from "stream";
|
|
22
27
|
|
|
23
|
-
// src/lib/content-asset-urls.ts
|
|
24
|
-
import * as cheerio from "cheerio";
|
|
25
|
-
var ASSET_URL_PARAM_PATTERN = /\b(?:src|image|url)\s*=\s*["']([^"']+)["']/gi;
|
|
26
|
-
var IMAGE_EXTENSION_PATTERN = /\.(?:jpe?g|png|gif|webp|avif|svg)(?:[?#]|$)/i;
|
|
27
|
-
var WP_UPLOADS_PATTERN = /\/wp-content\/uploads\//i;
|
|
28
|
-
function extractImgTagSrcs(content) {
|
|
29
|
-
if (!content.trim()) return [];
|
|
30
|
-
const $ = cheerio.load(content, { xml: false });
|
|
31
|
-
const srcs = [];
|
|
32
|
-
$("img[src]").each((_, el) => {
|
|
33
|
-
const src = $(el).attr("src")?.trim();
|
|
34
|
-
if (src) srcs.push(src);
|
|
35
|
-
});
|
|
36
|
-
return srcs;
|
|
37
|
-
}
|
|
38
|
-
function discoverRawImgSrcs(content) {
|
|
39
|
-
return extractImgTagSrcs(content).filter((src) => !src.startsWith("data:"));
|
|
40
|
-
}
|
|
41
|
-
function normalizeAssetUrl(raw) {
|
|
42
|
-
const trimmed = raw.trim();
|
|
43
|
-
if (!trimmed || trimmed.startsWith("data:")) return void 0;
|
|
44
|
-
if (trimmed.startsWith("//")) return `https:${trimmed}`;
|
|
45
|
-
return trimmed;
|
|
46
|
-
}
|
|
47
|
-
function isLikelyImageUrl(url) {
|
|
48
|
-
if (!url || url.startsWith("data:")) return false;
|
|
49
|
-
if (url.startsWith("/")) {
|
|
50
|
-
return WP_UPLOADS_PATTERN.test(url) || IMAGE_EXTENSION_PATTERN.test(url);
|
|
51
|
-
}
|
|
52
|
-
if (!/^https?:\/\//i.test(url)) return false;
|
|
53
|
-
if (WP_UPLOADS_PATTERN.test(url)) return true;
|
|
54
|
-
try {
|
|
55
|
-
const pathname = new URL(url).pathname;
|
|
56
|
-
return IMAGE_EXTENSION_PATTERN.test(pathname);
|
|
57
|
-
} catch {
|
|
58
|
-
return IMAGE_EXTENSION_PATTERN.test(url);
|
|
59
|
-
}
|
|
60
|
-
}
|
|
61
|
-
function discoverContentAssetUrls(content) {
|
|
62
|
-
if (!content.trim()) return [];
|
|
63
|
-
const urls = /* @__PURE__ */ new Set();
|
|
64
|
-
for (const raw of extractImgTagSrcs(content)) {
|
|
65
|
-
const normalized = normalizeAssetUrl(raw);
|
|
66
|
-
if (normalized && isLikelyImageUrl(normalized)) {
|
|
67
|
-
urls.add(normalized);
|
|
68
|
-
}
|
|
69
|
-
}
|
|
70
|
-
for (const match of content.matchAll(ASSET_URL_PARAM_PATTERN)) {
|
|
71
|
-
const normalized = normalizeAssetUrl(match[1] ?? "");
|
|
72
|
-
if (normalized && isLikelyImageUrl(normalized)) {
|
|
73
|
-
urls.add(normalized);
|
|
74
|
-
}
|
|
75
|
-
}
|
|
76
|
-
return [...urls];
|
|
77
|
-
}
|
|
78
|
-
|
|
79
28
|
// src/transformers/rewrite-inline-images.ts
|
|
80
|
-
import * as
|
|
29
|
+
import * as cheerio from "cheerio";
|
|
81
30
|
function rewriteSrcset(srcset, options, uploadedBySourceId, referencedSources, unresolved) {
|
|
82
31
|
return srcset.split(",").map((entry) => {
|
|
83
32
|
const trimmed = entry.trim();
|
|
@@ -104,7 +53,7 @@ function rewriteInlineImages(html, options, uploadedBySourceId) {
|
|
|
104
53
|
if (!html.trim()) {
|
|
105
54
|
return { html, referencedSources: [], unresolved: [] };
|
|
106
55
|
}
|
|
107
|
-
const $ =
|
|
56
|
+
const $ = cheerio.load(html, { xml: false });
|
|
108
57
|
const referencedSources = /* @__PURE__ */ new Set();
|
|
109
58
|
const unresolved = /* @__PURE__ */ new Set();
|
|
110
59
|
$("img").each((_, element) => {
|
|
@@ -140,7 +89,7 @@ function rewriteInlineImages(html, options, uploadedBySourceId) {
|
|
|
140
89
|
}
|
|
141
90
|
|
|
142
91
|
// src/sinks/conflicts.ts
|
|
143
|
-
import * as
|
|
92
|
+
import * as cheerio3 from "cheerio";
|
|
144
93
|
|
|
145
94
|
// src/parsers/squarespace/parse-export.ts
|
|
146
95
|
import { readFile } from "fs/promises";
|
|
@@ -166,7 +115,7 @@ function linkToPath(link) {
|
|
|
166
115
|
}
|
|
167
116
|
|
|
168
117
|
// src/parsers/squarespace/collect.ts
|
|
169
|
-
import * as
|
|
118
|
+
import * as cheerio2 from "cheerio";
|
|
170
119
|
import { z } from "zod";
|
|
171
120
|
var SQUARESPACE_JSON_FORMAT = "json-pretty";
|
|
172
121
|
var squarespaceClientOptionsSchema = z.object({
|
|
@@ -219,7 +168,7 @@ function inferBlockTypeFromClassName(className) {
|
|
|
219
168
|
}
|
|
220
169
|
function extractBlocksFromBodyHtml(html) {
|
|
221
170
|
if (!html.trim()) return [];
|
|
222
|
-
const $ =
|
|
171
|
+
const $ = cheerio2.load(html, { xml: false });
|
|
223
172
|
const blocks = [];
|
|
224
173
|
$(".sqs-block").each((_, element) => {
|
|
225
174
|
const el = $(element);
|
|
@@ -1048,7 +997,7 @@ function analyzeHtml(html) {
|
|
|
1048
997
|
issues.push("script_tag_present");
|
|
1049
998
|
}
|
|
1050
999
|
try {
|
|
1051
|
-
const $ =
|
|
1000
|
+
const $ = cheerio3.load(html, { xml: false });
|
|
1052
1001
|
$("p").each((_, el) => {
|
|
1053
1002
|
const inner = $(el).html() ?? "";
|
|
1054
1003
|
if (inner.includes("<p")) {
|
|
@@ -1546,7 +1495,6 @@ async function runDryRun(options) {
|
|
|
1546
1495
|
|
|
1547
1496
|
export {
|
|
1548
1497
|
MIGRATION_WRITE_STAGES,
|
|
1549
|
-
discoverContentAssetUrls,
|
|
1550
1498
|
rewriteInlineImages,
|
|
1551
1499
|
sanitizeSlug,
|
|
1552
1500
|
linkToPath,
|
|
@@ -1576,4 +1524,4 @@ export {
|
|
|
1576
1524
|
staleUrlsFromEstimate,
|
|
1577
1525
|
runDryRun
|
|
1578
1526
|
};
|
|
1579
|
-
//# sourceMappingURL=chunk-
|
|
1527
|
+
//# sourceMappingURL=chunk-HH7666MQ.js.map
|