@artinstack/migrator 0.1.0 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,2290 @@
1
+ import {
2
+ SquarespaceCollectionClient,
3
+ enumerateSquarespaceEntities,
4
+ linkToPath,
5
+ sanitizeSlug,
6
+ summarizeSquarespaceExport,
7
+ validateSquarespaceExportFile
8
+ } from "./chunk-HH7666MQ.js";
9
+ import {
10
+ discoverContentAssetUrls
11
+ } from "./chunk-2PNSVE5Y.js";
12
+
13
+ // src/parsers/wordpress/parse-wxr.ts
14
+ import { readFile } from "fs/promises";
15
+ import { basename } from "path";
16
+ import { XMLParser } from "fast-xml-parser";
17
+ var PLATFORM = "wordpress";
18
+ function asArray(value) {
19
+ if (value === void 0) return [];
20
+ return Array.isArray(value) ? value : [value];
21
+ }
22
+ function textValue(value) {
23
+ if (value === void 0 || value === null) return "";
24
+ if (typeof value === "string" || typeof value === "number") return String(value);
25
+ if (typeof value === "object" && value !== null && "#text" in value) {
26
+ return String(value["#text"] ?? "");
27
+ }
28
+ return String(value);
29
+ }
30
+ function mapPublishStatus(wpStatus) {
31
+ switch ((wpStatus ?? "").toLowerCase()) {
32
+ case "publish":
33
+ return "published";
34
+ case "draft":
35
+ case "pending":
36
+ return "draft";
37
+ default:
38
+ return "archived";
39
+ }
40
+ }
41
+ function getContentEncoded(item) {
42
+ const content = item.content;
43
+ if (content !== void 0) {
44
+ if (typeof content === "string") return content;
45
+ return textValue(content.encoded);
46
+ }
47
+ return textValue(item.encoded);
48
+ }
49
+ function sourceMeta(id, link, exportedAt) {
50
+ return {
51
+ platform: PLATFORM,
52
+ id,
53
+ url: link || void 0,
54
+ path: linkToPath(link),
55
+ exportedAt
56
+ };
57
+ }
58
+ function getExcerpt(item) {
59
+ const excerpt = item.excerpt;
60
+ if (!excerpt) return "";
61
+ if (typeof excerpt === "string") return excerpt;
62
+ return textValue(excerpt.encoded);
63
+ }
64
+ function getPostMeta(item, key) {
65
+ for (const meta of asArray(item.postmeta)) {
66
+ if (textValue(meta.meta_key) === key) {
67
+ return textValue(meta.meta_value);
68
+ }
69
+ }
70
+ return void 0;
71
+ }
72
+ function parseItems(xml) {
73
+ const parser = new XMLParser({
74
+ ignoreAttributes: false,
75
+ attributeNamePrefix: "@_",
76
+ removeNSPrefix: true,
77
+ trimValues: false,
78
+ parseTagValue: false
79
+ });
80
+ const doc = parser.parse(xml);
81
+ return asArray(doc.rss?.channel?.item);
82
+ }
83
+ function buildAttachmentIndex(items) {
84
+ const index = /* @__PURE__ */ new Map();
85
+ for (const item of items) {
86
+ if (textValue(item.post_type) !== "attachment") continue;
87
+ const id = textValue(item.post_id);
88
+ const url = textValue(item.attachment_url) || textValue(item.link);
89
+ if (!id || !url) continue;
90
+ const filename = basename(new URL(url, "http://local.invalid").pathname) || `attachment-${id}`;
91
+ index.set(id, {
92
+ sourceUrl: url,
93
+ filename,
94
+ mimeType: getPostMeta(item, "_wp_attached_file") ? void 0 : guessMime(filename),
95
+ title: textValue(item.title)
96
+ });
97
+ }
98
+ return index;
99
+ }
100
+ function guessMime(filename) {
101
+ const ext = filename.split(".").pop()?.toLowerCase();
102
+ const map = {
103
+ jpg: "image/jpeg",
104
+ jpeg: "image/jpeg",
105
+ png: "image/png",
106
+ gif: "image/gif",
107
+ webp: "image/webp",
108
+ pdf: "application/pdf"
109
+ };
110
+ return ext ? map[ext] : void 0;
111
+ }
112
+ function collectTaxonomies(items) {
113
+ const categories = /* @__PURE__ */ new Map();
114
+ const tags = /* @__PURE__ */ new Map();
115
+ for (const item of items) {
116
+ const postType = textValue(item.post_type);
117
+ if (postType !== "post" && postType !== "page") continue;
118
+ for (const cat of asArray(item.category)) {
119
+ const domain = cat["@_domain"] ?? "";
120
+ const nicename = sanitizeSlug(cat["@_nicename"] ?? textValue(cat["#text"]));
121
+ const name = textValue(cat["#text"]) || nicename;
122
+ if (!nicename) continue;
123
+ if (domain === "category") {
124
+ if (!categories.has(nicename)) {
125
+ categories.set(nicename, {
126
+ type: "category",
127
+ source: sourceMeta(`cat:${nicename}`),
128
+ sourceId: `cat:${nicename}`,
129
+ name,
130
+ slug: nicename
131
+ });
132
+ }
133
+ } else if (domain === "post_tag") {
134
+ if (!tags.has(nicename)) {
135
+ tags.set(nicename, {
136
+ type: "tag",
137
+ source: sourceMeta(`tag:${nicename}`),
138
+ sourceId: `tag:${nicename}`,
139
+ name,
140
+ slug: nicename
141
+ });
142
+ }
143
+ }
144
+ }
145
+ }
146
+ return { categories, tags };
147
+ }
148
+ function collectInlineAssets(html, attachmentIndex, seenUrls, exportedAt) {
149
+ const assets = [];
150
+ for (const src of discoverContentAssetUrls(html)) {
151
+ if (seenUrls.has(src)) continue;
152
+ seenUrls.add(src);
153
+ let filename;
154
+ try {
155
+ filename = basename(new URL(src, "http://local.invalid").pathname) || "inline-asset";
156
+ } catch {
157
+ filename = "inline-asset";
158
+ }
159
+ assets.push({
160
+ type: "asset",
161
+ source: sourceMeta(`url:${src}`, src, exportedAt),
162
+ sourceId: `url:${src}`,
163
+ sourceUrl: src,
164
+ filename,
165
+ mimeType: guessMime(filename)
166
+ });
167
+ }
168
+ for (const [id, entry] of attachmentIndex) {
169
+ if (seenUrls.has(entry.sourceUrl)) continue;
170
+ void id;
171
+ }
172
+ return assets;
173
+ }
174
+ async function* enumerateWxrEntities(options) {
175
+ const xml = await readFile(options.filePath, "utf8");
176
+ const items = parseItems(xml);
177
+ const attachmentIndex = buildAttachmentIndex(items);
178
+ const { categories, tags } = collectTaxonomies(items);
179
+ const seenAssetUrls = /* @__PURE__ */ new Set();
180
+ const emittedAttachmentIds = /* @__PURE__ */ new Set();
181
+ for (const category of categories.values()) {
182
+ yield category;
183
+ }
184
+ for (const tag of tags.values()) {
185
+ yield tag;
186
+ }
187
+ for (const [id, entry] of attachmentIndex) {
188
+ emittedAttachmentIds.add(id);
189
+ seenAssetUrls.add(entry.sourceUrl);
190
+ yield {
191
+ type: "asset",
192
+ source: sourceMeta(id, entry.sourceUrl, options.exportedAt),
193
+ sourceId: id,
194
+ sourceUrl: entry.sourceUrl,
195
+ filename: entry.filename,
196
+ mimeType: entry.mimeType,
197
+ caption: entry.title
198
+ };
199
+ }
200
+ for (const item of items) {
201
+ const postType = textValue(item.post_type);
202
+ if (postType !== "post" && postType !== "page") continue;
203
+ const id = textValue(item.post_id);
204
+ const link = textValue(item.link);
205
+ const slug = sanitizeSlug(textValue(item.post_name) || textValue(item.title) || id);
206
+ const rawHtml = getContentEncoded(item);
207
+ for (const asset of collectInlineAssets(
208
+ rawHtml,
209
+ attachmentIndex,
210
+ seenAssetUrls,
211
+ options.exportedAt
212
+ )) {
213
+ yield asset;
214
+ }
215
+ const categorySlugs = [];
216
+ const tagSlugs = [];
217
+ for (const cat of asArray(item.category)) {
218
+ const domain = cat["@_domain"] ?? "";
219
+ const nicename = sanitizeSlug(cat["@_nicename"] ?? textValue(cat["#text"]));
220
+ if (!nicename) continue;
221
+ if (domain === "category") categorySlugs.push(nicename);
222
+ if (domain === "post_tag") tagSlugs.push(nicename);
223
+ }
224
+ if (postType === "post") {
225
+ const thumbnailId = getPostMeta(item, "_thumbnail_id");
226
+ let featuredAssetSourceId;
227
+ if (thumbnailId && attachmentIndex.has(thumbnailId)) {
228
+ featuredAssetSourceId = thumbnailId;
229
+ }
230
+ const post = {
231
+ type: "post",
232
+ source: sourceMeta(id, link, options.exportedAt),
233
+ sourceId: id,
234
+ title: textValue(item.title) || slug,
235
+ slug,
236
+ excerpt: getExcerpt(item) || void 0,
237
+ contentHtml: rawHtml,
238
+ publishedAt: textValue(item.post_date) || void 0,
239
+ status: mapPublishStatus(textValue(item.status)),
240
+ categorySlugs: categorySlugs.length ? categorySlugs : void 0,
241
+ tagSlugs: tagSlugs.length ? tagSlugs : void 0,
242
+ sourceFeaturedMediaId: thumbnailId,
243
+ featuredAssetSourceId
244
+ };
245
+ yield post;
246
+ } else {
247
+ const isHomePage = getPostMeta(item, "_wp_show_on_front") === "1" || getPostMeta(item, "page_on_front") === "1";
248
+ const page = {
249
+ type: "page",
250
+ source: sourceMeta(id, link, options.exportedAt),
251
+ sourceId: id,
252
+ title: textValue(item.title) || slug,
253
+ slug,
254
+ contentHtml: rawHtml,
255
+ isHomePage: isHomePage || void 0,
256
+ status: mapPublishStatus(textValue(item.status))
257
+ };
258
+ yield page;
259
+ }
260
+ }
261
+ }
262
+ async function validateWxrFile(filePath) {
263
+ const issues = [];
264
+ let xml;
265
+ try {
266
+ xml = await readFile(filePath, "utf8");
267
+ } catch {
268
+ return {
269
+ ok: false,
270
+ issues: [{ code: "file_not_found", message: `Cannot read file: ${filePath}` }],
271
+ summary: {}
272
+ };
273
+ }
274
+ const looksLikeWxr = xml.includes("<rss") && (xml.includes("wp:wxr_version") || xml.includes("xmlns:wp=") || xml.includes("WordPress eXtended RSS"));
275
+ if (!looksLikeWxr) {
276
+ issues.push({ code: "invalid_wxr", message: "File does not appear to be WordPress WXR" });
277
+ }
278
+ const items = parseItems(xml);
279
+ const summary = {
280
+ posts: items.filter((i) => textValue(i.post_type) === "post").length,
281
+ pages: items.filter((i) => textValue(i.post_type) === "page").length,
282
+ assets: items.filter((i) => textValue(i.post_type) === "attachment").length,
283
+ portfolios: 0,
284
+ categories: 0,
285
+ tags: 0
286
+ };
287
+ const { categories, tags } = collectTaxonomies(items);
288
+ summary.categories = categories.size;
289
+ summary.tags = tags.size;
290
+ return { ok: issues.length === 0, issues, summary };
291
+ }
292
+
293
+ // src/parsers/wordpress/index.ts
294
+ function resolvePath(input) {
295
+ if (typeof input === "string") return input;
296
+ if (input && typeof input === "object" && "path" in input) {
297
+ return String(input.path);
298
+ }
299
+ throw new Error("WordPress adapter requires input path (string or { path })");
300
+ }
301
+ var wordpressAdapter = {
302
+ platform: "wordpress",
303
+ async validateInput(input) {
304
+ const path = resolvePath(input);
305
+ const result = await validateWxrFile(path);
306
+ return {
307
+ ok: result.ok,
308
+ issues: result.issues,
309
+ summary: result.summary
310
+ };
311
+ },
312
+ enumerateEntities(ctx) {
313
+ const path = resolvePath(ctx.input);
314
+ return enumerateWxrEntities({ filePath: path });
315
+ }
316
+ };
317
+
318
+ // src/parsers/smugmug/api.ts
319
+ import { createHmac, randomBytes } from "crypto";
320
+ import { z } from "zod";
321
+ var SMUGMUG_API_HOST = "api.smugmug.com";
322
+ var SMUGMUG_API_BASE = `https://${SMUGMUG_API_HOST}/api/v2`;
323
+ var SMUGMUG_OAUTH_ENDPOINTS = {
324
+ requestToken: "https://api.smugmug.com/services/oauth/1.0a/getRequestToken",
325
+ authorize: "https://api.smugmug.com/services/oauth/1.0a/authorize",
326
+ accessToken: "https://api.smugmug.com/services/oauth/1.0a/getAccessToken"
327
+ };
328
+ var smugMugCredentialsSchema = z.object({
329
+ consumerKey: z.string().min(1),
330
+ consumerSecret: z.string().min(1),
331
+ accessToken: z.string().min(1),
332
+ accessTokenSecret: z.string().min(1)
333
+ });
334
+ var smugMugClientOptionsSchema = z.object({
335
+ credentials: smugMugCredentialsSchema,
336
+ pageSize: z.number().int().min(1).max(500).default(100),
337
+ maxRetries: z.number().int().min(0).max(10).default(3),
338
+ retryBaseDelayMs: z.number().int().min(0).default(500),
339
+ maxRetryDelayMs: z.number().int().min(0).default(8e3),
340
+ requestIntervalMs: z.number().int().min(0).default(200),
341
+ fetchImpl: z.custom().optional()
342
+ });
343
+ var ALBUM_IMAGES_CONFIG = {
344
+ expand: {
345
+ AlbumImage: {
346
+ expand: {
347
+ Image: {
348
+ filter: ["FileName", "Caption", "KeywordsArray"],
349
+ filteruri: ["ImageMetadata", "ImageSizeDetails"],
350
+ expand: {
351
+ ImageMetadata: {
352
+ filter: ["ISO", "Aperture", "ApertureValue", "ShutterSpeed", "ExposureTime", "FocalLength"]
353
+ },
354
+ ImageSizeDetails: {
355
+ filter: ["OriginalImageUrl"]
356
+ }
357
+ }
358
+ }
359
+ }
360
+ }
361
+ }
362
+ };
363
+ function oauthPercentEncode(value) {
364
+ return encodeURIComponent(value).replace(
365
+ /[!'()*]/g,
366
+ (char) => `%${char.charCodeAt(0).toString(16).toUpperCase()}`
367
+ );
368
+ }
369
+ function normalizeRequestUrl(url) {
370
+ const protocol = url.protocol.replace(/:$/, "").toLowerCase();
371
+ const host = url.hostname.toLowerCase();
372
+ const defaultPort = protocol === "http" ? "80" : "443";
373
+ const port = url.port && url.port !== defaultPort ? `:${url.port}` : "";
374
+ return `${protocol}://${host}${port}${url.pathname}`;
375
+ }
376
+ function sortedParameterString(params) {
377
+ return Object.keys(params).sort((a, b) => a === b ? 0 : a < b ? -1 : 1).map((key) => `${oauthPercentEncode(key)}=${oauthPercentEncode(params[key])}`).join("&");
378
+ }
379
+ function collectSignatureParams(url, oauthParams, bodyParams) {
380
+ const params = { ...oauthParams };
381
+ url.searchParams.forEach((value, key) => {
382
+ params[key] = value;
383
+ });
384
+ if (bodyParams) {
385
+ for (const [key, value] of Object.entries(bodyParams)) {
386
+ params[key] = value;
387
+ }
388
+ }
389
+ return params;
390
+ }
391
+ function signSmugMugOAuthRequest(input) {
392
+ const url = new URL(input.url);
393
+ const parameterString = sortedParameterString(
394
+ collectSignatureParams(url, input.oauthParams, input.bodyParams)
395
+ );
396
+ const signatureBase = [
397
+ input.method.toUpperCase(),
398
+ oauthPercentEncode(normalizeRequestUrl(url)),
399
+ oauthPercentEncode(parameterString)
400
+ ].join("&");
401
+ const signingKey = `${oauthPercentEncode(input.credentials.consumerSecret)}&${oauthPercentEncode(input.credentials.accessTokenSecret)}`;
402
+ return createHmac("sha1", signingKey).update(signatureBase).digest("base64");
403
+ }
404
+ function buildOAuthParams(credentials, nonce, timestamp) {
405
+ return {
406
+ oauth_consumer_key: credentials.consumerKey,
407
+ oauth_token: credentials.accessToken,
408
+ oauth_signature_method: "HMAC-SHA1",
409
+ oauth_timestamp: timestamp,
410
+ oauth_nonce: nonce,
411
+ oauth_version: "1.0"
412
+ };
413
+ }
414
+ function buildSmugMugAuthorizationHeader(input) {
415
+ const nonce = input.nonce ?? randomBytes(16).toString("hex");
416
+ const timestamp = input.timestamp ?? String(Math.floor(Date.now() / 1e3));
417
+ const oauthParams = buildOAuthParams(input.credentials, nonce, timestamp);
418
+ const signature = signSmugMugOAuthRequest({
419
+ method: input.method,
420
+ url: input.url,
421
+ credentials: input.credentials,
422
+ oauthParams,
423
+ bodyParams: input.bodyParams
424
+ });
425
+ const headerParams = { ...oauthParams, oauth_signature: signature };
426
+ const headerValue = Object.keys(headerParams).sort().map((key) => `${oauthPercentEncode(key)}="${oauthPercentEncode(headerParams[key])}"`).join(", ");
427
+ return `OAuth ${headerValue}`;
428
+ }
429
+ function readSmugMugCredentialsFromEnv(env = process.env) {
430
+ return smugMugCredentialsSchema.parse({
431
+ consumerKey: env.SMUGMUG_CONSUMER_KEY,
432
+ consumerSecret: env.SMUGMUG_CONSUMER_SECRET,
433
+ accessToken: env.SMUGMUG_ACCESS_TOKEN,
434
+ accessTokenSecret: env.SMUGMUG_ACCESS_TOKEN_SECRET
435
+ });
436
+ }
437
+ function sleep(ms) {
438
+ return new Promise((resolve) => setTimeout(resolve, ms));
439
+ }
440
+ function albumKeyFromUri(uri) {
441
+ const match = uri.match(/\/album\/([^/?!]+)/i);
442
+ if (!match?.[1]) {
443
+ throw new Error(`Unable to parse album key from URI: ${uri}`);
444
+ }
445
+ return match[1];
446
+ }
447
+ function mapAlbumImage(albumImage, portfolioSourceId, sort) {
448
+ const image = albumImage.Image;
449
+ const metadata = image?.ImageMetadata ?? albumImage.ImageMetadata;
450
+ const originalUrl = image?.ImageSizeDetails?.OriginalImageUrl ?? albumImage.LargestImage?.Url ?? albumImage.WebUri;
451
+ const fileName = image?.FileName ?? albumImage.FileName;
452
+ return {
453
+ sourceId: albumImage.ImageKey,
454
+ portfolioSourceId,
455
+ sort,
456
+ fileName,
457
+ originalUrl,
458
+ caption: albumImage.Caption ?? image?.Caption,
459
+ keywords: image?.KeywordsArray?.length ? image.KeywordsArray : void 0,
460
+ exif: metadata ? {
461
+ iso: metadata.ISO,
462
+ aperture: metadata.Aperture ?? metadata.ApertureValue,
463
+ shutter: metadata.ShutterSpeed ?? metadata.ExposureTime,
464
+ focalLength: metadata.FocalLength
465
+ } : void 0
466
+ };
467
+ }
468
+ var SmugMugApiClient = class {
469
+ credentials;
470
+ pageSize;
471
+ maxRetries;
472
+ retryBaseDelayMs;
473
+ maxRetryDelayMs;
474
+ requestIntervalMs;
475
+ fetchImpl;
476
+ lastRequestAt = 0;
477
+ constructor(options) {
478
+ const parsed = smugMugClientOptionsSchema.parse(options);
479
+ this.credentials = parsed.credentials;
480
+ this.pageSize = parsed.pageSize;
481
+ this.maxRetries = parsed.maxRetries;
482
+ this.retryBaseDelayMs = parsed.retryBaseDelayMs;
483
+ this.maxRetryDelayMs = parsed.maxRetryDelayMs;
484
+ this.requestIntervalMs = parsed.requestIntervalMs;
485
+ this.fetchImpl = parsed.fetchImpl ?? fetch;
486
+ }
487
+ /** Validate credentials against `GET /user/!authuser`. */
488
+ async validateCredentials() {
489
+ const user = await this.getAuthUser();
490
+ return { nick: user.NickName, rootNodeUri: user.Uris.Node };
491
+ }
492
+ /** Crawl the authenticated user's node tree into flat export tables for `parse-node.ts`. */
493
+ async crawlExport() {
494
+ const user = await this.getAuthUser();
495
+ const folders = [];
496
+ const albums = [];
497
+ const images = [];
498
+ await this.walkNode(user.Uris.Node, void 0, folders, albums, images);
499
+ return {
500
+ exportVersion: 1,
501
+ exportedAt: (/* @__PURE__ */ new Date()).toISOString(),
502
+ Folders: folders,
503
+ Albums: albums,
504
+ Images: images
505
+ };
506
+ }
507
+ async getAuthUser() {
508
+ const envelope = await this.requestJson(`${SMUGMUG_API_BASE}/user/!authuser`);
509
+ return envelope.Response;
510
+ }
511
+ async walkNode(nodeUri, parentFolderId, folders, albums, images) {
512
+ const childrenPath = `${nodeUri}!children`;
513
+ for await (const child of this.paginateNodes(childrenPath)) {
514
+ if (child.Type === "Page") continue;
515
+ if (child.Type === "Folder") {
516
+ folders.push({
517
+ sourceId: child.NodeID,
518
+ name: child.Name,
519
+ parentSourceId: parentFolderId,
520
+ slug: child.UrlName,
521
+ description: child.Description
522
+ });
523
+ await this.walkNode(child.Uri, child.NodeID, folders, albums, images);
524
+ continue;
525
+ }
526
+ if (child.Type === "Album") {
527
+ albums.push({
528
+ sourceId: child.NodeID,
529
+ name: child.Name,
530
+ parentSourceId: parentFolderId,
531
+ slug: child.UrlName,
532
+ description: child.Description,
533
+ url: child.WebUri
534
+ });
535
+ const albumUri = child.Uris?.Album;
536
+ if (albumUri) {
537
+ await this.collectAlbumImages(albumUri, child.NodeID, images);
538
+ }
539
+ }
540
+ }
541
+ }
542
+ async collectAlbumImages(albumUri, portfolioSourceId, images) {
543
+ const albumKey = albumKeyFromUri(albumUri);
544
+ const configQuery = `_config=${encodeURIComponent(JSON.stringify(ALBUM_IMAGES_CONFIG))}`;
545
+ const initialPath = `${SMUGMUG_API_BASE}/album/${albumKey}!images?${configQuery}`;
546
+ let sort = 0;
547
+ for await (const albumImage of this.paginateAlbumImages(initialPath)) {
548
+ images.push(mapAlbumImage(albumImage, portfolioSourceId, sort));
549
+ sort += 1;
550
+ }
551
+ }
552
+ async *paginateNodes(path) {
553
+ for await (const page of this.paginate(path)) {
554
+ for (const node of page.Node ?? []) {
555
+ yield node;
556
+ }
557
+ }
558
+ }
559
+ async *paginateAlbumImages(path) {
560
+ for await (const page of this.paginate(path)) {
561
+ for (const albumImage of page.AlbumImage ?? []) {
562
+ yield albumImage;
563
+ }
564
+ }
565
+ }
566
+ async *paginate(initialPath) {
567
+ let nextPath = appendPagination(initialPath, this.pageSize, 1);
568
+ while (nextPath) {
569
+ const envelope = await this.requestJson(nextPath);
570
+ yield envelope.Response;
571
+ nextPath = envelope.Response.Pages?.NextPage;
572
+ }
573
+ }
574
+ async requestJson(pathOrUrl) {
575
+ const url = toAbsoluteUrl(pathOrUrl);
576
+ const response = await this.requestWithRetry(url);
577
+ const body = await response.json();
578
+ if (body.Code !== 200) {
579
+ throw new Error(`SmugMug API error ${body.Code}: ${body.Message}`);
580
+ }
581
+ return body;
582
+ }
583
+ async requestWithRetry(url) {
584
+ let attempt = 0;
585
+ while (true) {
586
+ await this.throttle();
587
+ const authorization = buildSmugMugAuthorizationHeader({
588
+ method: "GET",
589
+ url: url.toString(),
590
+ credentials: this.credentials
591
+ });
592
+ const response = await this.fetchImpl(url, {
593
+ method: "GET",
594
+ headers: {
595
+ Accept: "application/json",
596
+ Authorization: authorization
597
+ }
598
+ });
599
+ if (response.ok) {
600
+ return response;
601
+ }
602
+ const retryable = response.status === 429 || response.status >= 500;
603
+ if (!retryable || attempt >= this.maxRetries) {
604
+ const detail = await response.text().catch(() => "");
605
+ throw new Error(
606
+ `SmugMug HTTP ${response.status}${detail ? `: ${detail.slice(0, 200)}` : ""}`
607
+ );
608
+ }
609
+ const retryAfter = Number.parseInt(response.headers.get("retry-after") ?? "", 10);
610
+ const delay = Number.isFinite(retryAfter) ? retryAfter * 1e3 : Math.min(this.maxRetryDelayMs, this.retryBaseDelayMs * 2 ** attempt);
611
+ await sleep(delay);
612
+ attempt += 1;
613
+ }
614
+ }
615
+ async throttle() {
616
+ if (this.requestIntervalMs <= 0) return;
617
+ const elapsed = Date.now() - this.lastRequestAt;
618
+ if (elapsed < this.requestIntervalMs) {
619
+ await sleep(this.requestIntervalMs - elapsed);
620
+ }
621
+ this.lastRequestAt = Date.now();
622
+ }
623
+ };
624
+ function toAbsoluteUrl(pathOrUrl) {
625
+ if (pathOrUrl.startsWith("http://") || pathOrUrl.startsWith("https://")) {
626
+ return new URL(pathOrUrl);
627
+ }
628
+ if (pathOrUrl.startsWith("/")) {
629
+ return new URL(`https://${SMUGMUG_API_HOST}${pathOrUrl}`);
630
+ }
631
+ return new URL(pathOrUrl);
632
+ }
633
+ function appendPagination(pathOrUrl, count, start) {
634
+ const url = toAbsoluteUrl(pathOrUrl);
635
+ url.searchParams.set("count", String(count));
636
+ url.searchParams.set("start", String(start));
637
+ return url.toString();
638
+ }
639
+
640
+ // src/parsers/smugmug/parse-node.ts
641
+ import { readFile as readFile2 } from "fs/promises";
642
+ var PLATFORM2 = "smugmug";
643
+ var UNRESOLVED_URL_PREFIX = "unspecified://smugmug/";
644
+ function sourceMeta2(id, url, exportedAt) {
645
+ return {
646
+ platform: PLATFORM2,
647
+ id,
648
+ url,
649
+ exportedAt
650
+ };
651
+ }
652
+ function guessMime2(filename) {
653
+ const ext = filename.split(".").pop()?.toLowerCase();
654
+ const map = {
655
+ jpg: "image/jpeg",
656
+ jpeg: "image/jpeg",
657
+ png: "image/png",
658
+ gif: "image/gif",
659
+ webp: "image/webp",
660
+ tif: "image/tiff",
661
+ tiff: "image/tiff"
662
+ };
663
+ return ext ? map[ext] : void 0;
664
+ }
665
+ function parseExifNumber(value) {
666
+ if (value === void 0) return void 0;
667
+ if (typeof value === "number" && Number.isFinite(value)) return value;
668
+ const parsed = Number.parseFloat(String(value).replace(/[^0-9.]/g, ""));
669
+ return Number.isFinite(parsed) ? parsed : void 0;
670
+ }
671
+ function normalizeExif(exif) {
672
+ if (!exif || Object.keys(exif).length === 0) return void 0;
673
+ const normalized = {
674
+ iso: parseExifNumber(exif.iso),
675
+ aperture: parseExifNumber(exif.aperture),
676
+ shutter: exif.shutter,
677
+ focalLength: parseExifNumber(exif.focalLength)
678
+ };
679
+ if (normalized.iso === void 0 && normalized.aperture === void 0 && !normalized.shutter && normalized.focalLength === void 0) {
680
+ return void 0;
681
+ }
682
+ return normalized;
683
+ }
684
+ function isSmugMugFlatExport(value) {
685
+ if (!value || typeof value !== "object") return false;
686
+ const record = value;
687
+ const version = record.exportVersion;
688
+ return (version === 1 || version === "1") && Array.isArray(record.Folders) && Array.isArray(record.Albums) && Array.isArray(record.Images);
689
+ }
690
+ function isSmugMugNestedExport(value) {
691
+ if (!value || typeof value !== "object") return false;
692
+ const record = value;
693
+ const version = record.exportVersion;
694
+ return (version === 1 || version === "1") && Array.isArray(record.folders);
695
+ }
696
+ async function loadSmugMugExport(options) {
697
+ if (options.data) return options.data;
698
+ if (!options.filePath) {
699
+ throw new Error("SmugMug parser requires filePath or data");
700
+ }
701
+ const raw = JSON.parse(await readFile2(options.filePath, "utf8"));
702
+ if (isSmugMugFlatExport(raw) || isSmugMugNestedExport(raw)) {
703
+ return raw;
704
+ }
705
+ throw new Error(
706
+ "Invalid SmugMug export: expected exportVersion 1 with folders[] (nested) or Folders/Albums/Images (flat)"
707
+ );
708
+ }
709
+ function resolveAssetUrl(image) {
710
+ if (image.originalUrl) return image.originalUrl;
711
+ return `${UNRESOLVED_URL_PREFIX}${image.sourceId}`;
712
+ }
713
+ function resolveFilename(image) {
714
+ if (image.fileName) return image.fileName;
715
+ return `${image.sourceId}.jpg`;
716
+ }
717
+ function* emitNestedFolderPortfolio(folder, exportedAt) {
718
+ yield {
719
+ type: "portfolio",
720
+ source: sourceMeta2(folder.id, void 0, exportedAt),
721
+ sourceId: folder.id,
722
+ title: folder.name,
723
+ slug: sanitizeSlug(folder.slug ?? folder.name),
724
+ description: folder.description
725
+ };
726
+ }
727
+ function* emitNestedAlbumPortfolio(folder, album, exportedAt) {
728
+ yield {
729
+ type: "portfolio",
730
+ source: sourceMeta2(album.id, album.url, exportedAt),
731
+ sourceId: album.id,
732
+ title: album.name,
733
+ slug: sanitizeSlug(album.slug ?? album.name),
734
+ description: album.description,
735
+ parentSourceId: folder.id
736
+ };
737
+ }
738
+ function* emitNestedAlbumAssets(album, exportedAt) {
739
+ for (let index = 0; index < album.images.length; index++) {
740
+ const image = album.images[index];
741
+ yield {
742
+ type: "asset",
743
+ source: sourceMeta2(image.id, image.originalUrl, exportedAt),
744
+ sourceId: image.id,
745
+ sourceUrl: image.originalUrl,
746
+ filename: image.fileName,
747
+ mimeType: guessMime2(image.fileName),
748
+ caption: image.caption,
749
+ keywords: image.keywords?.length ? image.keywords : void 0,
750
+ exif: normalizeExif(image.exif),
751
+ portfolioSourceId: album.id,
752
+ sort: index
753
+ };
754
+ }
755
+ }
756
+ async function* enumerateNestedExport(doc) {
757
+ const exportedAt = doc.exportedAt;
758
+ for (const folder of doc.folders) {
759
+ yield* emitNestedFolderPortfolio(folder, exportedAt);
760
+ for (const album of folder.albums) {
761
+ yield* emitNestedAlbumPortfolio(folder, album, exportedAt);
762
+ yield* emitNestedAlbumAssets(album, exportedAt);
763
+ }
764
+ }
765
+ }
766
+ async function* enumerateFlatExport(doc) {
767
+ const exportedAt = doc.exportedAt;
768
+ for (const folder of doc.Folders) {
769
+ yield {
770
+ type: "portfolio",
771
+ source: sourceMeta2(folder.sourceId, void 0, exportedAt),
772
+ sourceId: folder.sourceId,
773
+ title: folder.name,
774
+ slug: sanitizeSlug(folder.slug ?? folder.name),
775
+ description: folder.description,
776
+ parentSourceId: folder.parentSourceId
777
+ };
778
+ }
779
+ for (const album of doc.Albums) {
780
+ yield {
781
+ type: "portfolio",
782
+ source: sourceMeta2(album.sourceId, album.url, exportedAt),
783
+ sourceId: album.sourceId,
784
+ title: album.name,
785
+ slug: sanitizeSlug(album.slug ?? album.name),
786
+ description: album.description,
787
+ parentSourceId: album.parentSourceId
788
+ };
789
+ }
790
+ for (const image of doc.Images) {
791
+ const filename = resolveFilename(image);
792
+ yield {
793
+ type: "asset",
794
+ source: sourceMeta2(image.sourceId, image.originalUrl, exportedAt),
795
+ sourceId: image.sourceId,
796
+ sourceUrl: resolveAssetUrl(image),
797
+ filename,
798
+ mimeType: guessMime2(filename),
799
+ caption: image.caption,
800
+ keywords: image.keywords?.length ? image.keywords : void 0,
801
+ exif: normalizeExif(image.exif),
802
+ portfolioSourceId: image.portfolioSourceId,
803
+ sort: image.sort ?? 0
804
+ };
805
+ }
806
+ }
807
+ async function resolveSmugMugDocument(options) {
808
+ if (options.data) return options.data;
809
+ if (options.client) return options.client.crawlExport();
810
+ if (options.credentials) {
811
+ const client = new SmugMugApiClient({ credentials: options.credentials, ...options.clientOptions });
812
+ return client.crawlExport();
813
+ }
814
+ return loadSmugMugExport(options);
815
+ }
816
+ async function* enumerateSmugMugEntities(options) {
817
+ const doc = await resolveSmugMugDocument(options);
818
+ if (isSmugMugFlatExport(doc)) {
819
+ yield* enumerateFlatExport(doc);
820
+ return;
821
+ }
822
+ yield* enumerateNestedExport(doc);
823
+ }
824
+ function summarizeSmugMugExport(doc) {
825
+ if (isSmugMugFlatExport(doc)) {
826
+ return {
827
+ folders: doc.Folders.length,
828
+ albums: doc.Albums.length,
829
+ assets: doc.Images.length,
830
+ portfolios: doc.Folders.length + doc.Albums.length
831
+ };
832
+ }
833
+ const folders = doc.folders.length;
834
+ let albums = 0;
835
+ let assets = 0;
836
+ for (const folder of doc.folders) {
837
+ albums += folder.albums.length;
838
+ for (const album of folder.albums) {
839
+ assets += album.images.length;
840
+ }
841
+ }
842
+ return {
843
+ folders,
844
+ albums,
845
+ assets,
846
+ portfolios: folders + albums
847
+ };
848
+ }
849
+ async function validateSmugMugExportFile(filePath) {
850
+ const issues = [];
851
+ let doc;
852
+ try {
853
+ doc = await loadSmugMugExport({ filePath });
854
+ } catch (error) {
855
+ return {
856
+ ok: false,
857
+ issues: [
858
+ {
859
+ code: "invalid_export",
860
+ message: error instanceof Error ? error.message : String(error)
861
+ }
862
+ ],
863
+ summary: {}
864
+ };
865
+ }
866
+ if (isSmugMugFlatExport(doc)) {
867
+ if (doc.Folders.length === 0 && doc.Albums.length === 0) {
868
+ issues.push({ code: "empty_export", message: "No folders or albums in export" });
869
+ }
870
+ } else if (doc.folders.length === 0) {
871
+ issues.push({ code: "empty_export", message: "No folders in export" });
872
+ }
873
+ const summary = summarizeSmugMugExport(doc);
874
+ return {
875
+ ok: issues.length === 0,
876
+ issues,
877
+ summary: {
878
+ portfolios: summary.portfolios,
879
+ assets: summary.assets,
880
+ categories: summary.folders,
881
+ posts: 0,
882
+ pages: 0,
883
+ tags: 0
884
+ }
885
+ };
886
+ }
887
+
888
+ // src/parsers/smugmug/index.ts
889
+ function resolveInput(input) {
890
+ if (typeof input === "string") return { path: input };
891
+ if (input && typeof input === "object") {
892
+ const record = input;
893
+ if (record.client || record.credentials || record.live) return record;
894
+ if (record.data) return { data: record.data };
895
+ if (record.path) return { path: record.path };
896
+ }
897
+ throw new Error(
898
+ "SmugMug adapter requires input path (string or { path }), { data }, { credentials }, { client }, or { live: true }"
899
+ );
900
+ }
901
+ function resolveLiveCredentials(input) {
902
+ if (input.credentials) return input.credentials;
903
+ if (input.live) return readSmugMugCredentialsFromEnv();
904
+ return void 0;
905
+ }
906
+ var smugmugAdapter = {
907
+ platform: "smugmug",
908
+ async validateInput(input) {
909
+ try {
910
+ const resolved = resolveInput(input);
911
+ const credentials = resolveLiveCredentials(resolved);
912
+ if (resolved.data) {
913
+ const summary = summarizeSmugMugExport(resolved.data);
914
+ return {
915
+ ok: true,
916
+ issues: [],
917
+ summary: {
918
+ portfolios: summary.portfolios,
919
+ assets: summary.assets,
920
+ categories: summary.folders,
921
+ posts: 0,
922
+ pages: 0,
923
+ tags: 0
924
+ }
925
+ };
926
+ }
927
+ if (resolved.client || credentials) {
928
+ const client = resolved.client ?? new SmugMugApiClient({ credentials, ...resolved.clientOptions });
929
+ await client.validateCredentials();
930
+ const doc = await client.crawlExport();
931
+ const summary = summarizeSmugMugExport(doc);
932
+ return {
933
+ ok: true,
934
+ issues: [],
935
+ summary: {
936
+ portfolios: summary.portfolios,
937
+ assets: summary.assets,
938
+ categories: summary.folders,
939
+ posts: 0,
940
+ pages: 0,
941
+ tags: 0
942
+ }
943
+ };
944
+ }
945
+ const result = await validateSmugMugExportFile(resolved.path);
946
+ return {
947
+ ok: result.ok,
948
+ issues: result.issues,
949
+ summary: result.summary
950
+ };
951
+ } catch (error) {
952
+ return {
953
+ ok: false,
954
+ issues: [
955
+ {
956
+ code: "invalid_input",
957
+ message: error instanceof Error ? error.message : String(error)
958
+ }
959
+ ]
960
+ };
961
+ }
962
+ },
963
+ enumerateEntities(ctx) {
964
+ const resolved = resolveInput(ctx.input);
965
+ const credentials = resolveLiveCredentials(resolved);
966
+ return enumerateSmugMugEntities({
967
+ filePath: resolved.path,
968
+ data: resolved.data,
969
+ client: resolved.client,
970
+ credentials,
971
+ clientOptions: resolved.clientOptions
972
+ });
973
+ }
974
+ };
975
+
976
+ // src/parsers/squarespace/index.ts
977
+ function resolveInput2(input) {
978
+ if (typeof input === "string") return { path: input };
979
+ if (input && typeof input === "object") {
980
+ const record = input;
981
+ if (record.client || record.collectTargets) return record;
982
+ if (record.data) return { data: record.data };
983
+ if (record.path) return { path: record.path };
984
+ }
985
+ throw new Error(
986
+ "Squarespace adapter requires input path (string or { path }), { data }, { client, collectTargets }, or { collectTargets }"
987
+ );
988
+ }
989
+ var squarespaceAdapter = {
990
+ platform: "squarespace",
991
+ async validateInput(input) {
992
+ try {
993
+ const resolved = resolveInput2(input);
994
+ if (resolved.data) {
995
+ const summary = summarizeSquarespaceExport(resolved.data);
996
+ return {
997
+ ok: true,
998
+ issues: [],
999
+ summary: {
1000
+ pages: summary.pages,
1001
+ posts: summary.posts,
1002
+ categories: summary.categories,
1003
+ tags: summary.tags
1004
+ }
1005
+ };
1006
+ }
1007
+ if (resolved.client || resolved.collectTargets?.length) {
1008
+ if (!resolved.collectTargets?.length) {
1009
+ throw new Error("Squarespace live validation requires collectTargets");
1010
+ }
1011
+ const client = resolved.client ?? new SquarespaceCollectionClient(resolved.clientOptions);
1012
+ const doc = await client.collectExport(resolved.collectTargets);
1013
+ const summary = summarizeSquarespaceExport(doc);
1014
+ return {
1015
+ ok: true,
1016
+ issues: [],
1017
+ summary: {
1018
+ pages: summary.pages,
1019
+ posts: summary.posts,
1020
+ categories: summary.categories,
1021
+ tags: summary.tags
1022
+ }
1023
+ };
1024
+ }
1025
+ const result = await validateSquarespaceExportFile(resolved.path);
1026
+ return {
1027
+ ok: result.ok,
1028
+ issues: result.issues,
1029
+ summary: result.summary
1030
+ };
1031
+ } catch (error) {
1032
+ return {
1033
+ ok: false,
1034
+ issues: [
1035
+ {
1036
+ code: "invalid_input",
1037
+ message: error instanceof Error ? error.message : String(error)
1038
+ }
1039
+ ]
1040
+ };
1041
+ }
1042
+ },
1043
+ enumerateEntities(ctx) {
1044
+ const resolved = resolveInput2(ctx.input);
1045
+ return enumerateSquarespaceEntities({
1046
+ filePath: resolved.path,
1047
+ data: resolved.data,
1048
+ client: resolved.client,
1049
+ collectTargets: resolved.collectTargets,
1050
+ clientOptions: resolved.clientOptions
1051
+ });
1052
+ }
1053
+ };
1054
+
1055
+ // src/parsers/wix/api.ts
1056
+ import { z as z2 } from "zod";
1057
+
1058
+ // src/parsers/wix/ricos-to-html.ts
1059
+ function escapeHtml(text) {
1060
+ return text.replace(/&/g, "&amp;").replace(/</g, "&lt;").replace(/>/g, "&gt;").replace(/"/g, "&quot;");
1061
+ }
1062
+ function renderTextNode(node) {
1063
+ const text = node.textData?.text ?? "";
1064
+ let html = escapeHtml(text);
1065
+ for (const decoration of node.textData?.decorations ?? []) {
1066
+ switch (decoration.type) {
1067
+ case "BOLD":
1068
+ html = `<strong>${html}</strong>`;
1069
+ break;
1070
+ case "ITALIC":
1071
+ html = `<em>${html}</em>`;
1072
+ break;
1073
+ case "UNDERLINE":
1074
+ html = `<u>${html}</u>`;
1075
+ break;
1076
+ case "LINK": {
1077
+ const href = decoration.linkData?.link?.url;
1078
+ if (href) html = `<a href="${escapeHtml(href)}">${html}</a>`;
1079
+ break;
1080
+ }
1081
+ default:
1082
+ break;
1083
+ }
1084
+ }
1085
+ return html;
1086
+ }
1087
+ function renderChildren(nodes) {
1088
+ return (nodes ?? []).map((node) => renderRicosNode(node)).join("");
1089
+ }
1090
+ function renderRicosNode(node) {
1091
+ const type = (node.type ?? "").toUpperCase();
1092
+ switch (type) {
1093
+ case "TEXT":
1094
+ return renderTextNode(node);
1095
+ case "PARAGRAPH":
1096
+ return `<p>${renderChildren(node.nodes)}</p>`;
1097
+ case "HEADING": {
1098
+ const level = Math.min(6, Math.max(1, node.headingData?.level ?? 2));
1099
+ return `<h${level}>${renderChildren(node.nodes)}</h${level}>`;
1100
+ }
1101
+ case "BULLETED_LIST":
1102
+ return `<ul>${renderChildren(node.nodes)}</ul>`;
1103
+ case "ORDERED_LIST":
1104
+ return `<ol>${renderChildren(node.nodes)}</ol>`;
1105
+ case "LIST_ITEM":
1106
+ return `<li>${renderChildren(node.nodes)}</li>`;
1107
+ case "BLOCKQUOTE":
1108
+ return `<blockquote>${renderChildren(node.nodes)}</blockquote>`;
1109
+ case "HTML":
1110
+ return node.htmlData?.html ?? "";
1111
+ case "IMAGE": {
1112
+ const src = node.imageData?.image?.src?.url;
1113
+ if (!src) return "";
1114
+ const alt = node.imageData?.image?.altText ? ` alt="${escapeHtml(node.imageData.image.altText)}"` : "";
1115
+ return `<figure><img src="${escapeHtml(src)}"${alt} /></figure>`;
1116
+ }
1117
+ case "BUTTON": {
1118
+ const label = escapeHtml(node.buttonData?.text ?? "Link");
1119
+ const href = node.buttonData?.link?.url ?? node.linkData?.link?.url;
1120
+ if (!href) return `<span>${label}</span>`;
1121
+ return `<p><a href="${escapeHtml(href)}">${label}</a></p>`;
1122
+ }
1123
+ case "CODE_BLOCK":
1124
+ return `<pre><code>${renderChildren(node.nodes)}</code></pre>`;
1125
+ case "DIVIDER":
1126
+ return "<hr />";
1127
+ default:
1128
+ return renderChildren(node.nodes);
1129
+ }
1130
+ }
1131
+ function ricosToHtml(richContent) {
1132
+ if (!richContent || typeof richContent !== "object") return "";
1133
+ const nodes = richContent.nodes;
1134
+ if (!Array.isArray(nodes) || nodes.length === 0) return "";
1135
+ return renderChildren(nodes);
1136
+ }
1137
+
1138
+ // src/parsers/wix/map-wire.ts
1139
+ function isRecord(value) {
1140
+ return !!value && typeof value === "object" && !Array.isArray(value);
1141
+ }
1142
+ function asRecord(value) {
1143
+ return isRecord(value) ? value : void 0;
1144
+ }
1145
+ function asString(value) {
1146
+ return typeof value === "string" && value.length > 0 ? value : void 0;
1147
+ }
1148
+ function asStringArray(value) {
1149
+ if (!Array.isArray(value)) return [];
1150
+ return value.filter((entry) => typeof entry === "string");
1151
+ }
1152
+ function buildWixPageUrl(url) {
1153
+ if (!url) return void 0;
1154
+ const base = asString(url.base);
1155
+ const path = asString(url.path);
1156
+ if (base && path) {
1157
+ return `${base.replace(/\/$/, "")}${path.startsWith("/") ? path : `/${path}`}`;
1158
+ }
1159
+ return base ?? path;
1160
+ }
1161
+ function seoField(seoData, prop) {
1162
+ const tags = asRecord(seoData)?.tags;
1163
+ if (!Array.isArray(tags)) return void 0;
1164
+ for (const tag of tags) {
1165
+ const record = asRecord(tag);
1166
+ if (!record) continue;
1167
+ const props = asRecord(record.props);
1168
+ if (record.type === "title" && prop === "title") {
1169
+ return asString(props?.children) ?? asString(record.children);
1170
+ }
1171
+ if (record.type === "meta" && prop === "description" && props?.name === "description") {
1172
+ return asString(props.content);
1173
+ }
1174
+ }
1175
+ return void 0;
1176
+ }
1177
+ function postContentHtml(post) {
1178
+ const richHtml = ricosToHtml(post.richContent);
1179
+ if (richHtml.trim()) return richHtml;
1180
+ const plain = asString(post.contentText);
1181
+ if (!plain) return "";
1182
+ return `<p>${plain.replace(/\n\n+/g, "</p><p>").replace(/\n/g, "<br />")}</p>`;
1183
+ }
1184
+ function mapWireCategory(wire, exportedAt) {
1185
+ const record = asRecord(wire);
1186
+ if (!record) return void 0;
1187
+ const id = asString(record.id);
1188
+ const name = asString(record.label) ?? asString(record.title);
1189
+ if (!id || !name) return void 0;
1190
+ const slug = sanitizeSlug(asString(record.slug) ?? name);
1191
+ if (!slug) return void 0;
1192
+ void exportedAt;
1193
+ return { id, name, slug };
1194
+ }
1195
+ function mapWireTag(wire) {
1196
+ const record = asRecord(wire);
1197
+ if (!record) return void 0;
1198
+ const id = asString(record.id);
1199
+ const name = asString(record.label) ?? asString(record.slug);
1200
+ if (!id || !name) return void 0;
1201
+ const slug = sanitizeSlug(asString(record.slug) ?? name);
1202
+ if (!slug) return void 0;
1203
+ return { id, name, slug };
1204
+ }
1205
+ function mapWirePost(wire, lookup) {
1206
+ const record = asRecord(wire);
1207
+ if (!record) return void 0;
1208
+ const id = asString(record.id);
1209
+ const title = asString(record.title) ?? "Untitled";
1210
+ if (!id) return void 0;
1211
+ const slug = sanitizeSlug(asString(record.slug) ?? title);
1212
+ const url = buildWixPageUrl(asRecord(record.url));
1213
+ const heroImage = asRecord(record.heroImage);
1214
+ const featuredImageUrl = asString(heroImage?.url);
1215
+ const categorySlugs = asStringArray(record.categoryIds).map((categoryId) => lookup.categorySlugsById.get(categoryId)).filter((slugValue) => !!slugValue);
1216
+ const tagSlugs = asStringArray(record.tagIds).map((tagId) => lookup.tagSlugsById.get(tagId)).filter((slugValue) => !!slugValue);
1217
+ for (const hashtag of asStringArray(record.hashtags)) {
1218
+ const tagSlug = sanitizeSlug(hashtag);
1219
+ if (tagSlug && !tagSlugs.includes(tagSlug)) tagSlugs.push(tagSlug);
1220
+ }
1221
+ return {
1222
+ id,
1223
+ title,
1224
+ slug,
1225
+ url,
1226
+ excerpt: asString(record.excerpt),
1227
+ contentHtml: postContentHtml(record),
1228
+ publishedAt: asString(record.firstPublishedDate) ?? asString(record.lastPublishedDate),
1229
+ status: "published",
1230
+ categorySlugs,
1231
+ tagSlugs,
1232
+ featuredImageUrl,
1233
+ seoTitle: seoField(record.seoData, "title"),
1234
+ seoDescription: seoField(record.seoData, "description")
1235
+ };
1236
+ }
1237
+ function mapWireListPostsResponse(wire, lookup) {
1238
+ const posts = asRecord(wire)?.posts;
1239
+ if (!Array.isArray(posts)) return [];
1240
+ return posts.map((entry) => mapWirePost(entry, lookup)).filter((post) => !!post);
1241
+ }
1242
+ function mapWireListCategoriesResponse(wire) {
1243
+ const categories = asRecord(wire)?.categories;
1244
+ if (!Array.isArray(categories)) return [];
1245
+ return categories.map((entry) => mapWireCategory(entry)).filter((category) => !!category);
1246
+ }
1247
+ function mapWireListTagsResponse(wire) {
1248
+ const tags = asRecord(wire)?.tags;
1249
+ if (!Array.isArray(tags)) return [];
1250
+ return tags.map((entry) => mapWireTag(entry)).filter((tag) => !!tag);
1251
+ }
1252
+
1253
+ // src/parsers/wix/api.ts
1254
+ var WIX_API_BASE = "https://www.wixapis.com";
1255
+ var wixAuthContextSchema = z2.object({
1256
+ /** Full Authorization header value (API key or Bearer token). */
1257
+ authorization: z2.string().min(1),
1258
+ siteId: z2.string().min(1),
1259
+ accountId: z2.string().optional(),
1260
+ extraHeaders: z2.record(z2.string()).optional()
1261
+ });
1262
+ var wixClientOptionsSchema = z2.object({
1263
+ auth: wixAuthContextSchema,
1264
+ pageSize: z2.number().int().min(1).max(100).default(50),
1265
+ maxRetries: z2.number().int().min(0).max(10).default(3),
1266
+ retryBaseDelayMs: z2.number().int().min(0).default(500),
1267
+ maxRetryDelayMs: z2.number().int().min(0).default(8e3),
1268
+ requestIntervalMs: z2.number().int().min(0).default(200),
1269
+ fetchImpl: z2.custom().optional(),
1270
+ /** Include draft posts when the API key has permission. */
1271
+ includeDrafts: z2.boolean().default(false)
1272
+ });
1273
+ function sleep2(ms) {
1274
+ return new Promise((resolve) => setTimeout(resolve, ms));
1275
+ }
1276
+ function dedupeById(items) {
1277
+ const seen = /* @__PURE__ */ new Map();
1278
+ for (const item of items) seen.set(item.id, item);
1279
+ return [...seen.values()];
1280
+ }
1281
+ function dedupeBySlug(items) {
1282
+ const seen = /* @__PURE__ */ new Map();
1283
+ for (const item of items) seen.set(item.slug, item);
1284
+ return [...seen.values()];
1285
+ }
1286
+ function pagingComplete(items, paging, pageSize) {
1287
+ if (!Array.isArray(items) || items.length === 0) return true;
1288
+ if (items.length < pageSize) return true;
1289
+ if (typeof paging?.total === "number" && typeof paging.offset === "number") {
1290
+ return paging.offset + items.length >= paging.total;
1291
+ }
1292
+ return false;
1293
+ }
1294
+ var WixCollectionClient = class {
1295
+ auth;
1296
+ pageSize;
1297
+ maxRetries;
1298
+ retryBaseDelayMs;
1299
+ maxRetryDelayMs;
1300
+ requestIntervalMs;
1301
+ fetchImpl;
1302
+ includeDrafts;
1303
+ lastRequestAt = 0;
1304
+ constructor(options) {
1305
+ const parsed = wixClientOptionsSchema.parse(options);
1306
+ this.auth = parsed.auth;
1307
+ this.pageSize = parsed.pageSize;
1308
+ this.maxRetries = parsed.maxRetries;
1309
+ this.retryBaseDelayMs = parsed.retryBaseDelayMs;
1310
+ this.maxRetryDelayMs = parsed.maxRetryDelayMs;
1311
+ this.requestIntervalMs = parsed.requestIntervalMs;
1312
+ this.fetchImpl = parsed.fetchImpl ?? fetch;
1313
+ this.includeDrafts = parsed.includeDrafts;
1314
+ }
1315
+ buildUrl(path, query) {
1316
+ const url = new URL(path.startsWith("http") ? path : `${WIX_API_BASE}${path}`);
1317
+ if (query) {
1318
+ for (const [key, value] of Object.entries(query)) {
1319
+ if (value === void 0) continue;
1320
+ url.searchParams.set(key, String(value));
1321
+ }
1322
+ }
1323
+ return url.toString();
1324
+ }
1325
+ async fetchJson(path, query) {
1326
+ const response = await this.requestWithRetry(this.buildUrl(path, query));
1327
+ return response.json();
1328
+ }
1329
+ async listAllCategories() {
1330
+ const categories = [];
1331
+ let offset = 0;
1332
+ while (true) {
1333
+ const wire = await this.fetchJson("/blog/v3/categories", {
1334
+ "paging.limit": this.pageSize,
1335
+ "paging.offset": offset,
1336
+ fieldsets: "URL"
1337
+ });
1338
+ const batch = mapWireListCategoriesResponse(wire);
1339
+ categories.push(...batch);
1340
+ const paging = wire.pagingMetadata;
1341
+ if (pagingComplete(batch, paging, this.pageSize)) break;
1342
+ offset += batch.length;
1343
+ if (batch.length === 0) break;
1344
+ }
1345
+ return dedupeBySlug(categories);
1346
+ }
1347
+ async listAllTags() {
1348
+ const tags = [];
1349
+ let offset = 0;
1350
+ while (true) {
1351
+ const wire = await this.fetchJson("/blog/v3/tags", {
1352
+ "paging.limit": this.pageSize,
1353
+ "paging.offset": offset
1354
+ });
1355
+ const batch = mapWireListTagsResponse(wire);
1356
+ tags.push(...batch);
1357
+ const paging = wire.pagingMetadata;
1358
+ if (pagingComplete(batch, paging, this.pageSize)) break;
1359
+ offset += batch.length;
1360
+ if (batch.length === 0) break;
1361
+ }
1362
+ return dedupeBySlug(tags);
1363
+ }
1364
+ async listAllPosts(lookup) {
1365
+ const posts = [];
1366
+ let offset = 0;
1367
+ while (true) {
1368
+ const wire = await this.fetchJson("/blog/v3/posts", {
1369
+ "paging.limit": this.pageSize,
1370
+ "paging.offset": offset,
1371
+ fieldsets: "URL,RICH_CONTENT,SEO",
1372
+ sort: "PUBLISHED_DATE_DESC"
1373
+ });
1374
+ const batch = mapWireListPostsResponse(wire, lookup);
1375
+ posts.push(...batch);
1376
+ const paging = wire.pagingMetadata;
1377
+ if (pagingComplete(batch, paging, this.pageSize)) break;
1378
+ offset += batch.length;
1379
+ if (batch.length === 0) break;
1380
+ }
1381
+ if (this.includeDrafts) {
1382
+ posts.push(...await this.listDraftPosts(lookup));
1383
+ }
1384
+ return dedupeById(posts);
1385
+ }
1386
+ async listDraftPosts(lookup) {
1387
+ const posts = [];
1388
+ let offset = 0;
1389
+ while (true) {
1390
+ const wire = await this.fetchJson("/blog/v3/draft-posts", {
1391
+ "paging.limit": this.pageSize,
1392
+ "paging.offset": offset,
1393
+ fieldsets: "URL,RICH_CONTENT,SEO"
1394
+ });
1395
+ const batch = mapWireListPostsResponse(
1396
+ { posts: wire.draftPosts ?? [] },
1397
+ lookup
1398
+ ).map((post) => ({ ...post, status: "draft" }));
1399
+ posts.push(...batch);
1400
+ const paging = wire.pagingMetadata;
1401
+ if (pagingComplete(batch, paging, this.pageSize)) break;
1402
+ offset += batch.length;
1403
+ if (batch.length === 0) break;
1404
+ }
1405
+ return posts;
1406
+ }
1407
+ async collectExport() {
1408
+ const categories = await this.listAllCategories();
1409
+ const tags = await this.listAllTags();
1410
+ const categorySlugsById = new Map(categories.map((category) => [category.id, category.slug]));
1411
+ const tagSlugsById = new Map(tags.map((tag) => [tag.id, tag.slug]));
1412
+ const posts = await this.listAllPosts({ categorySlugsById, tagSlugsById });
1413
+ return {
1414
+ exportVersion: 1,
1415
+ exportedAt: (/* @__PURE__ */ new Date()).toISOString(),
1416
+ site: { siteId: this.auth.siteId },
1417
+ posts,
1418
+ pages: [],
1419
+ categories,
1420
+ tags
1421
+ };
1422
+ }
1423
+ buildHeaders() {
1424
+ const headers = {
1425
+ Accept: "application/json",
1426
+ Authorization: this.auth.authorization,
1427
+ "wix-site-id": this.auth.siteId,
1428
+ ...this.auth.extraHeaders ?? {}
1429
+ };
1430
+ if (this.auth.accountId) {
1431
+ headers["wix-account-id"] = this.auth.accountId;
1432
+ }
1433
+ return headers;
1434
+ }
1435
+ async requestWithRetry(url) {
1436
+ let attempt = 0;
1437
+ while (true) {
1438
+ await this.throttle();
1439
+ const response = await this.fetchImpl(url, {
1440
+ method: "GET",
1441
+ headers: this.buildHeaders()
1442
+ });
1443
+ if (response.ok) return response;
1444
+ const retryable = response.status === 429 || response.status >= 500;
1445
+ if (!retryable || attempt >= this.maxRetries) {
1446
+ const detail = await response.text().catch(() => "");
1447
+ throw new Error(
1448
+ `Wix HTTP ${response.status}${detail ? `: ${detail.slice(0, 200)}` : ""}`
1449
+ );
1450
+ }
1451
+ const retryAfter = Number.parseInt(response.headers.get("retry-after") ?? "", 10);
1452
+ const delay = Number.isFinite(retryAfter) ? retryAfter * 1e3 : Math.min(this.maxRetryDelayMs, this.retryBaseDelayMs * 2 ** attempt);
1453
+ await sleep2(delay);
1454
+ attempt += 1;
1455
+ }
1456
+ }
1457
+ async throttle() {
1458
+ if (this.requestIntervalMs <= 0) return;
1459
+ const elapsed = Date.now() - this.lastRequestAt;
1460
+ if (elapsed < this.requestIntervalMs) {
1461
+ await sleep2(this.requestIntervalMs - elapsed);
1462
+ }
1463
+ this.lastRequestAt = Date.now();
1464
+ }
1465
+ };
1466
+ function isWixExport(value) {
1467
+ if (!value || typeof value !== "object") return false;
1468
+ const record = value;
1469
+ return record.exportVersion === 1 && (Array.isArray(record.posts) || Array.isArray(record.pages));
1470
+ }
1471
+ function assertWixExport(value) {
1472
+ if (!isWixExport(value)) {
1473
+ throw new Error("Invalid Wix export: expected exportVersion 1 with posts[] and/or pages[]");
1474
+ }
1475
+ if ((value.posts?.length ?? 0) === 0 && (value.pages?.length ?? 0) === 0) {
1476
+ throw new Error("Invalid Wix export: no posts or pages");
1477
+ }
1478
+ for (const post of value.posts ?? []) {
1479
+ if (!post.slug) post.slug = sanitizeSlug(post.title);
1480
+ }
1481
+ for (const page of value.pages ?? []) {
1482
+ if (!page.slug) page.slug = sanitizeSlug(page.title);
1483
+ }
1484
+ return value;
1485
+ }
1486
+
1487
+ // src/parsers/wix/snapshot.ts
1488
+ import * as cheerio from "cheerio";
1489
+ import { readFile as readFile3 } from "fs/promises";
1490
+ import { XMLParser as XMLParser2 } from "fast-xml-parser";
1491
+ import { z as z3 } from "zod";
1492
+ var MAIN_CONTENT_SELECTORS = [
1493
+ "main",
1494
+ "article",
1495
+ '[role="main"]',
1496
+ "#SITE_PAGES main",
1497
+ "#site-root main",
1498
+ "#PAGES_CONTAINER",
1499
+ "body"
1500
+ ];
1501
+ var wixSnapshotClientOptionsSchema = z3.object({
1502
+ fetchImpl: z3.custom().optional(),
1503
+ maxRetries: z3.number().int().min(0).max(5).default(2),
1504
+ retryBaseDelayMs: z3.number().int().min(0).default(300),
1505
+ requestIntervalMs: z3.number().int().min(0).default(150)
1506
+ });
1507
+ function sleep3(ms) {
1508
+ return new Promise((resolve) => setTimeout(resolve, ms));
1509
+ }
1510
+ function slugFromUrl(url, fallback) {
1511
+ try {
1512
+ const segments = new URL(url).pathname.split("/").filter(Boolean);
1513
+ const last = segments.at(-1);
1514
+ if (last) return sanitizeSlug(last);
1515
+ } catch {
1516
+ }
1517
+ return sanitizeSlug(fallback);
1518
+ }
1519
+ function looksLikeLoginWall($, html) {
1520
+ const lower = html.toLowerCase();
1521
+ if (lower.includes("members-login") || lower.includes("login-bar")) return true;
1522
+ if ($('input[type="password"]').length > 0 && $('form[action*="login"]').length > 0) return true;
1523
+ if ($('[data-testid="sign-in"]').length > 0) return true;
1524
+ return false;
1525
+ }
1526
+ function pickMainRoot($) {
1527
+ for (const selector of MAIN_CONTENT_SELECTORS) {
1528
+ const match = $(selector).first();
1529
+ if (match.length > 0) return match;
1530
+ }
1531
+ return $("body");
1532
+ }
1533
+ function extractMainContentHtml(html) {
1534
+ const $ = cheerio.load(html, { xml: false });
1535
+ const loginWall = looksLikeLoginWall($, html);
1536
+ const title = $("title").first().text().trim() || $('meta[property="og:title"]').attr("content")?.trim() || $("h1").first().text().trim() || void 0;
1537
+ const root = pickMainRoot($);
1538
+ root.find("script, style, noscript, nav, header, footer, iframe").remove();
1539
+ const contentHtml = root.html()?.trim() ?? "";
1540
+ const textOnly = root.text().replace(/\s+/g, " ").trim();
1541
+ const empty = textOnly.length < 20;
1542
+ return { contentHtml, title, empty, loginWall };
1543
+ }
1544
+ function parseUrlList(raw) {
1545
+ return raw.split(/[\n,]+/).map((entry) => entry.trim()).filter((entry) => entry.length > 0 && /^https?:\/\//i.test(entry));
1546
+ }
1547
+ function parseSitemapUrls(xml) {
1548
+ const parser = new XMLParser2({ ignoreAttributes: true, removeNSPrefix: true });
1549
+ const doc = parser.parse(xml);
1550
+ const urls = [];
1551
+ const urlEntries = doc.urlset?.url;
1552
+ if (urlEntries) {
1553
+ const list = Array.isArray(urlEntries) ? urlEntries : [urlEntries];
1554
+ for (const entry of list) {
1555
+ if (entry.loc) urls.push(entry.loc.trim());
1556
+ }
1557
+ }
1558
+ const sitemapEntries = doc.sitemapindex?.sitemap;
1559
+ if (sitemapEntries) {
1560
+ const list = Array.isArray(sitemapEntries) ? sitemapEntries : [sitemapEntries];
1561
+ for (const entry of list) {
1562
+ if (entry.loc) urls.push(entry.loc.trim());
1563
+ }
1564
+ }
1565
+ return [...new Set(urls.filter((url) => /^https?:\/\//i.test(url)))];
1566
+ }
1567
+ async function loadUrlListFile(filePath) {
1568
+ const raw = await readFile3(filePath, "utf8");
1569
+ if (raw.trim().startsWith("<")) return parseSitemapUrls(raw);
1570
+ return parseUrlList(raw);
1571
+ }
1572
+ var WixPageSnapshotCollector = class {
1573
+ fetchImpl;
1574
+ maxRetries;
1575
+ retryBaseDelayMs;
1576
+ requestIntervalMs;
1577
+ lastRequestAt = 0;
1578
+ constructor(options = {}) {
1579
+ const parsed = wixSnapshotClientOptionsSchema.parse(options);
1580
+ this.fetchImpl = parsed.fetchImpl ?? fetch;
1581
+ this.maxRetries = parsed.maxRetries;
1582
+ this.retryBaseDelayMs = parsed.retryBaseDelayMs;
1583
+ this.requestIntervalMs = parsed.requestIntervalMs;
1584
+ }
1585
+ async collectPages(targets) {
1586
+ const pages = [];
1587
+ const gaps = [];
1588
+ for (const target of targets) {
1589
+ let html;
1590
+ try {
1591
+ html = target.html ?? await this.fetchHtml(target.url);
1592
+ } catch (error) {
1593
+ gaps.push({
1594
+ url: target.url,
1595
+ code: "fetch_failed",
1596
+ message: error instanceof Error ? error.message : String(error)
1597
+ });
1598
+ continue;
1599
+ }
1600
+ const extracted = extractMainContentHtml(html);
1601
+ if (extracted.loginWall) {
1602
+ gaps.push({
1603
+ url: target.url,
1604
+ code: "login_wall",
1605
+ message: "Page appears to require authentication"
1606
+ });
1607
+ continue;
1608
+ }
1609
+ if (extracted.empty) {
1610
+ gaps.push({
1611
+ url: target.url,
1612
+ code: "empty_extract",
1613
+ message: "No meaningful content found in main/article containers"
1614
+ });
1615
+ continue;
1616
+ }
1617
+ const title = target.title ?? extracted.title ?? "Untitled";
1618
+ const slug = target.slug ?? slugFromUrl(target.url, title);
1619
+ pages.push({
1620
+ id: `page:${slug}`,
1621
+ title,
1622
+ slug,
1623
+ url: target.url,
1624
+ contentHtml: extracted.contentHtml,
1625
+ isHomePage: target.isHomePage,
1626
+ status: "published"
1627
+ });
1628
+ }
1629
+ return { pages, gaps };
1630
+ }
1631
+ async collectFromUrlList(urls) {
1632
+ return this.collectPages(
1633
+ urls.map((url, index) => ({
1634
+ url,
1635
+ isHomePage: index === 0 && new URL(url).pathname === "/"
1636
+ }))
1637
+ );
1638
+ }
1639
+ async fetchHtml(url) {
1640
+ let attempt = 0;
1641
+ while (true) {
1642
+ await this.throttle();
1643
+ const response = await this.fetchImpl(url, {
1644
+ method: "GET",
1645
+ headers: { Accept: "text/html,application/xhtml+xml" }
1646
+ });
1647
+ if (response.ok) {
1648
+ return response.text();
1649
+ }
1650
+ const retryable = response.status === 429 || response.status >= 500;
1651
+ if (!retryable || attempt >= this.maxRetries) {
1652
+ throw new Error(`Snapshot fetch HTTP ${response.status} for ${url}`);
1653
+ }
1654
+ await sleep3(this.retryBaseDelayMs * 2 ** attempt);
1655
+ attempt += 1;
1656
+ }
1657
+ }
1658
+ async throttle() {
1659
+ if (this.requestIntervalMs <= 0) return;
1660
+ const elapsed = Date.now() - this.lastRequestAt;
1661
+ if (elapsed < this.requestIntervalMs) {
1662
+ await sleep3(this.requestIntervalMs - elapsed);
1663
+ }
1664
+ this.lastRequestAt = Date.now();
1665
+ }
1666
+ };
1667
+
1668
+ // src/parsers/wix/parse-export.ts
1669
+ import { readFile as readFile4 } from "fs/promises";
1670
+ import { basename as basename2, extname } from "path";
1671
+ import { XMLParser as XMLParser3 } from "fast-xml-parser";
1672
+ var PLATFORM3 = "wix";
1673
+ function asArray2(value) {
1674
+ if (value === void 0) return [];
1675
+ return Array.isArray(value) ? value : [value];
1676
+ }
1677
+ function textValue2(value) {
1678
+ if (value === void 0 || value === null) return "";
1679
+ if (typeof value === "string" || typeof value === "number") return String(value);
1680
+ if (typeof value === "object" && value !== null && "#text" in value) {
1681
+ return String(value["#text"] ?? "");
1682
+ }
1683
+ return String(value);
1684
+ }
1685
+ function sourceMeta3(id, url, exportedAt) {
1686
+ return {
1687
+ platform: PLATFORM3,
1688
+ id,
1689
+ url: url || void 0,
1690
+ path: linkToPath(url),
1691
+ exportedAt
1692
+ };
1693
+ }
1694
+ function guessMime3(filename) {
1695
+ const ext = filename.split(".").pop()?.toLowerCase();
1696
+ const map = {
1697
+ jpg: "image/jpeg",
1698
+ jpeg: "image/jpeg",
1699
+ png: "image/png",
1700
+ gif: "image/gif",
1701
+ webp: "image/webp",
1702
+ avif: "image/avif"
1703
+ };
1704
+ return ext ? map[ext] : void 0;
1705
+ }
1706
+ function parseXmlDocument(xml) {
1707
+ const parser = new XMLParser3({
1708
+ ignoreAttributes: false,
1709
+ attributeNamePrefix: "@_",
1710
+ removeNSPrefix: true,
1711
+ trimValues: false,
1712
+ parseTagValue: false
1713
+ });
1714
+ return parser.parse(xml);
1715
+ }
1716
+ function detectWixFeedFormat(xml) {
1717
+ const trimmed = xml.trim();
1718
+ if (trimmed.includes("<feed") || trimmed.startsWith("<feed")) return "atom";
1719
+ return "rss";
1720
+ }
1721
+ function getItemContentHtml(item) {
1722
+ const content = item.content ?? item.content;
1723
+ if (content !== void 0) {
1724
+ if (typeof content === "string") return content;
1725
+ const block = content;
1726
+ if (block["#text"] !== void 0) return textValue2(block["#text"]);
1727
+ if (block.encoded !== void 0) return textValue2(block.encoded);
1728
+ }
1729
+ const rssItem = item;
1730
+ if (rssItem.encoded !== void 0) return textValue2(rssItem.encoded);
1731
+ return textValue2(rssItem.description ?? item.summary);
1732
+ }
1733
+ function slugFromLink(link, title, fallbackId) {
1734
+ try {
1735
+ const pathname = new URL(link).pathname;
1736
+ const segments = pathname.split("/").filter(Boolean);
1737
+ const last = segments.at(-1);
1738
+ if (last) return sanitizeSlug(last);
1739
+ } catch {
1740
+ }
1741
+ return sanitizeSlug(title || fallbackId);
1742
+ }
1743
+ function itemLink(item) {
1744
+ const rssLink = textValue2(item.link);
1745
+ if (rssLink) return rssLink;
1746
+ for (const link of asArray2(item.link)) {
1747
+ if (typeof link === "string" && link) return link;
1748
+ if (typeof link === "object" && link !== null) {
1749
+ const rel = link["@_rel"];
1750
+ const href = link["@_href"];
1751
+ if (href && (!rel || rel === "alternate")) return href;
1752
+ }
1753
+ }
1754
+ return "";
1755
+ }
1756
+ function itemSourceId(item, link, slug) {
1757
+ const guid = textValue2(item.guid);
1758
+ if (guid) return guid;
1759
+ const atomId = textValue2(item.id);
1760
+ if (atomId) return atomId;
1761
+ if (link) return link;
1762
+ return slug;
1763
+ }
1764
+ function itemPublishedAt(item) {
1765
+ const pubDate = textValue2(item.pubDate) || textValue2(item.published) || textValue2(item.updated) || textValue2(item.published);
1766
+ return pubDate || void 0;
1767
+ }
1768
+ function normalizeCategoryLabel(category) {
1769
+ if (typeof category === "string") {
1770
+ return { label: category.trim() };
1771
+ }
1772
+ return {
1773
+ domain: category["@_domain"]?.toLowerCase(),
1774
+ label: textValue2(category["#text"] ?? category["@_term"]).trim()
1775
+ };
1776
+ }
1777
+ function collectTaxonomiesFromItems(items) {
1778
+ const categories = /* @__PURE__ */ new Map();
1779
+ const tags = /* @__PURE__ */ new Map();
1780
+ for (const item of items) {
1781
+ for (const rawCategory of asArray2(item.category)) {
1782
+ const { domain, label } = normalizeCategoryLabel(rawCategory);
1783
+ if (!label) continue;
1784
+ const slug = sanitizeSlug(label);
1785
+ if (!slug) continue;
1786
+ if (domain === "tag" || domain === "post_tag") {
1787
+ if (tags.has(slug)) continue;
1788
+ tags.set(slug, {
1789
+ type: "tag",
1790
+ source: sourceMeta3(`tag:${slug}`),
1791
+ sourceId: `tag:${slug}`,
1792
+ name: label,
1793
+ slug
1794
+ });
1795
+ continue;
1796
+ }
1797
+ if (categories.has(slug)) continue;
1798
+ categories.set(slug, {
1799
+ type: "category",
1800
+ source: sourceMeta3(`cat:${slug}`),
1801
+ sourceId: `cat:${slug}`,
1802
+ name: label,
1803
+ slug
1804
+ });
1805
+ }
1806
+ }
1807
+ return { categories, tags };
1808
+ }
1809
+ function collectCategorySlugs(item) {
1810
+ const slugs = [];
1811
+ for (const rawCategory of asArray2(item.category)) {
1812
+ const { domain, label } = normalizeCategoryLabel(rawCategory);
1813
+ if (!label || domain === "tag" || domain === "post_tag") continue;
1814
+ const slug = sanitizeSlug(label);
1815
+ if (slug) slugs.push(slug);
1816
+ }
1817
+ return slugs;
1818
+ }
1819
+ function collectTagSlugs(item) {
1820
+ const slugs = [];
1821
+ for (const rawCategory of asArray2(item.category)) {
1822
+ const { domain, label } = normalizeCategoryLabel(rawCategory);
1823
+ if (!label || domain !== "tag" && domain !== "post_tag") continue;
1824
+ const slug = sanitizeSlug(label);
1825
+ if (slug) slugs.push(slug);
1826
+ }
1827
+ return slugs;
1828
+ }
1829
+ function* collectInlineAssets2(html, seenUrls, exportedAt) {
1830
+ for (const src of discoverContentAssetUrls(html)) {
1831
+ if (seenUrls.has(src)) continue;
1832
+ seenUrls.add(src);
1833
+ let filename;
1834
+ try {
1835
+ filename = basename2(new URL(src, "http://local.invalid").pathname) || "inline-asset";
1836
+ } catch {
1837
+ filename = "inline-asset";
1838
+ }
1839
+ yield {
1840
+ type: "asset",
1841
+ source: sourceMeta3(`url:${src}`, src, exportedAt),
1842
+ sourceId: `url:${src}`,
1843
+ sourceUrl: src,
1844
+ filename,
1845
+ mimeType: guessMime3(filename)
1846
+ };
1847
+ }
1848
+ }
1849
+ function mapPublishStatus2(status) {
1850
+ switch ((status ?? "published").toLowerCase()) {
1851
+ case "published":
1852
+ return "published";
1853
+ case "draft":
1854
+ return "draft";
1855
+ default:
1856
+ return "archived";
1857
+ }
1858
+ }
1859
+ function* emitExportPost(post, exportedAt, seenAssetUrls) {
1860
+ yield* collectInlineAssets2(post.contentHtml, seenAssetUrls, exportedAt);
1861
+ let featuredAssetSourceId;
1862
+ if (post.featuredImageUrl) {
1863
+ featuredAssetSourceId = `featured:${post.id}`;
1864
+ if (!seenAssetUrls.has(post.featuredImageUrl)) {
1865
+ seenAssetUrls.add(post.featuredImageUrl);
1866
+ const filename = basename2(new URL(post.featuredImageUrl).pathname) || `${post.id}-featured.jpg`;
1867
+ yield {
1868
+ type: "asset",
1869
+ source: sourceMeta3(featuredAssetSourceId, post.featuredImageUrl, exportedAt),
1870
+ sourceId: featuredAssetSourceId,
1871
+ sourceUrl: post.featuredImageUrl,
1872
+ filename,
1873
+ mimeType: guessMime3(filename)
1874
+ };
1875
+ }
1876
+ }
1877
+ yield {
1878
+ type: "post",
1879
+ source: sourceMeta3(post.id, post.url, exportedAt),
1880
+ sourceId: post.id,
1881
+ title: post.title,
1882
+ slug: sanitizeSlug(post.slug),
1883
+ excerpt: post.excerpt,
1884
+ contentHtml: post.contentHtml,
1885
+ publishedAt: post.publishedAt,
1886
+ status: mapPublishStatus2(post.status),
1887
+ categorySlugs: post.categorySlugs,
1888
+ tagSlugs: post.tagSlugs,
1889
+ featuredAssetSourceId,
1890
+ seoTitle: post.seoTitle,
1891
+ seoDescription: post.seoDescription
1892
+ };
1893
+ }
1894
+ function* emitExportPage(page, exportedAt, seenAssetUrls) {
1895
+ yield* collectInlineAssets2(page.contentHtml, seenAssetUrls, exportedAt);
1896
+ yield {
1897
+ type: "page",
1898
+ source: sourceMeta3(page.id, page.url, exportedAt),
1899
+ sourceId: page.id,
1900
+ title: page.title,
1901
+ slug: sanitizeSlug(page.slug),
1902
+ contentHtml: page.contentHtml,
1903
+ isHomePage: page.isHomePage,
1904
+ status: mapPublishStatus2(page.status),
1905
+ seoTitle: page.seoTitle,
1906
+ seoDescription: page.seoDescription
1907
+ };
1908
+ }
1909
+ async function* enumerateWixExportEntities(doc, snapshotGaps) {
1910
+ const exportedAt = doc.exportedAt;
1911
+ const seenAssetUrls = /* @__PURE__ */ new Set();
1912
+ for (const category of doc.categories ?? []) {
1913
+ yield {
1914
+ type: "category",
1915
+ source: sourceMeta3(category.id, void 0, exportedAt),
1916
+ sourceId: category.id,
1917
+ name: category.name,
1918
+ slug: sanitizeSlug(category.slug)
1919
+ };
1920
+ }
1921
+ for (const tag of doc.tags ?? []) {
1922
+ yield {
1923
+ type: "tag",
1924
+ source: sourceMeta3(tag.id, void 0, exportedAt),
1925
+ sourceId: tag.id,
1926
+ name: tag.name,
1927
+ slug: sanitizeSlug(tag.slug)
1928
+ };
1929
+ }
1930
+ for (const page of doc.pages ?? []) {
1931
+ yield* emitExportPage(page, exportedAt, seenAssetUrls);
1932
+ }
1933
+ for (const post of doc.posts ?? []) {
1934
+ yield* emitExportPost(post, exportedAt, seenAssetUrls);
1935
+ }
1936
+ void snapshotGaps;
1937
+ }
1938
+ function summarizeWixExport(doc) {
1939
+ const seenAssetUrls = /* @__PURE__ */ new Set();
1940
+ let assets = 0;
1941
+ const countHtml = (html, featured) => {
1942
+ if (featured && !seenAssetUrls.has(featured)) {
1943
+ seenAssetUrls.add(featured);
1944
+ assets += 1;
1945
+ }
1946
+ for (const src of discoverContentAssetUrls(html)) {
1947
+ if (seenAssetUrls.has(src)) continue;
1948
+ seenAssetUrls.add(src);
1949
+ assets += 1;
1950
+ }
1951
+ };
1952
+ for (const post of doc.posts ?? []) {
1953
+ countHtml(post.contentHtml, post.featuredImageUrl);
1954
+ }
1955
+ for (const page of doc.pages ?? []) {
1956
+ countHtml(page.contentHtml);
1957
+ }
1958
+ return {
1959
+ posts: doc.posts?.length ?? 0,
1960
+ pages: doc.pages?.length ?? 0,
1961
+ categories: doc.categories?.length ?? 0,
1962
+ tags: doc.tags?.length ?? 0,
1963
+ assets
1964
+ };
1965
+ }
1966
+ function parseFeedItems(xml) {
1967
+ const format = detectWixFeedFormat(xml);
1968
+ const doc = parseXmlDocument(xml);
1969
+ if (format === "atom") {
1970
+ return { format, items: asArray2(doc.feed?.entry) };
1971
+ }
1972
+ return { format, items: asArray2(doc.rss?.channel?.item) };
1973
+ }
1974
+ async function loadWixFeed(filePath) {
1975
+ const xml = await readFile4(filePath, "utf8");
1976
+ const parsed = parseFeedItems(xml);
1977
+ if (parsed.items.length === 0) {
1978
+ throw new Error("Invalid Wix feed: no entries found in RSS or Atom document");
1979
+ }
1980
+ return parsed;
1981
+ }
1982
+ async function loadWixExport(options) {
1983
+ if (options.data) return assertWixExport(options.data);
1984
+ if (options.client) {
1985
+ const doc = await options.client.collectExport();
1986
+ return assertWixExport(doc);
1987
+ }
1988
+ if (options.clientOptions) {
1989
+ const client = new WixCollectionClient(options.clientOptions);
1990
+ const doc = await client.collectExport();
1991
+ return assertWixExport(doc);
1992
+ }
1993
+ if (options.filePath) {
1994
+ const ext = extname(options.filePath).toLowerCase();
1995
+ if (ext === ".json") {
1996
+ const raw = JSON.parse(await readFile4(options.filePath, "utf8"));
1997
+ return assertWixExport(raw);
1998
+ }
1999
+ }
2000
+ throw new Error("Wix parser requires filePath (.json), data, client, or clientOptions");
2001
+ }
2002
+ async function resolveSnapshotTargets(options) {
2003
+ if (options.snapshotTargets?.length) return options.snapshotTargets;
2004
+ const listPath = options.urlsFile ?? (options.filePath && extname(options.filePath).toLowerCase() === ".txt" ? options.filePath : void 0);
2005
+ if (listPath) {
2006
+ const urls = await loadUrlListFile(listPath);
2007
+ return urls.map((url, index) => ({
2008
+ url,
2009
+ isHomePage: index === 0 && new URL(url).pathname === "/"
2010
+ }));
2011
+ }
2012
+ return [];
2013
+ }
2014
+ async function attachSnapshotPages(doc, options) {
2015
+ const targets = await resolveSnapshotTargets(options);
2016
+ if (targets.length === 0) return { doc, gaps: [] };
2017
+ const collector = new WixPageSnapshotCollector(options.snapshotOptions);
2018
+ const { pages, gaps } = await collector.collectPages(targets);
2019
+ return {
2020
+ doc: {
2021
+ ...doc,
2022
+ pages: [...doc.pages ?? [], ...pages]
2023
+ },
2024
+ gaps
2025
+ };
2026
+ }
2027
+ async function* enumerateWixEntities(options) {
2028
+ if (options.filePath && [".xml", ".rss", ".atom"].includes(extname(options.filePath).toLowerCase())) {
2029
+ yield* enumerateWixFeedEntities(options);
2030
+ const snapshotTargets2 = await resolveSnapshotTargets(options);
2031
+ if (snapshotTargets2.length > 0) {
2032
+ const { doc: doc2, gaps: gaps2 } = await attachSnapshotPages(
2033
+ { exportVersion: 1, pages: [], posts: [] },
2034
+ { ...options, snapshotTargets: snapshotTargets2 }
2035
+ );
2036
+ yield* enumerateWixExportEntities(doc2, gaps2);
2037
+ }
2038
+ return;
2039
+ }
2040
+ if (options.filePath && extname(options.filePath).toLowerCase() === ".txt" && !options.data && !options.client) {
2041
+ const { doc: doc2, gaps: gaps2 } = await attachSnapshotPages(
2042
+ { exportVersion: 1, pages: [], posts: [] },
2043
+ options
2044
+ );
2045
+ yield* enumerateWixExportEntities(doc2, gaps2);
2046
+ return;
2047
+ }
2048
+ const snapshotTargets = await resolveSnapshotTargets(options);
2049
+ if (snapshotTargets.length > 0 && !options.filePath && !options.data && !options.client && !options.clientOptions) {
2050
+ const { doc: doc2, gaps: gaps2 } = await attachSnapshotPages(
2051
+ { exportVersion: 1, pages: [], posts: [] },
2052
+ { ...options, snapshotTargets }
2053
+ );
2054
+ yield* enumerateWixExportEntities(doc2, gaps2);
2055
+ return;
2056
+ }
2057
+ const doc = await loadWixExport(options);
2058
+ const { doc: withSnapshots, gaps } = await attachSnapshotPages(doc, options);
2059
+ yield* enumerateWixExportEntities(withSnapshots, gaps);
2060
+ }
2061
+ async function* enumerateWixFeedEntities(options) {
2062
+ if (!options.filePath) {
2063
+ throw new Error("Wix feed parser requires filePath");
2064
+ }
2065
+ const { format, items } = await loadWixFeed(options.filePath);
2066
+ const { categories, tags } = collectTaxonomiesFromItems(items);
2067
+ const seenAssetUrls = /* @__PURE__ */ new Set();
2068
+ for (const category of categories.values()) {
2069
+ yield category;
2070
+ }
2071
+ for (const tag of tags.values()) {
2072
+ yield tag;
2073
+ }
2074
+ for (const item of items) {
2075
+ const title = textValue2(item.title) || "Untitled";
2076
+ const link = itemLink(item);
2077
+ const sourceId = itemSourceId(item, link, sanitizeSlug(title));
2078
+ const slug = slugFromLink(link, title, sourceId);
2079
+ const contentHtml = getItemContentHtml(item);
2080
+ for (const asset of collectInlineAssets2(contentHtml, seenAssetUrls, options.exportedAt)) {
2081
+ yield asset;
2082
+ }
2083
+ const post = {
2084
+ type: "post",
2085
+ source: sourceMeta3(sourceId, link || void 0, options.exportedAt),
2086
+ sourceId,
2087
+ title,
2088
+ slug,
2089
+ excerpt: textValue2(item.description) || void 0,
2090
+ contentHtml,
2091
+ publishedAt: itemPublishedAt(item),
2092
+ status: "published",
2093
+ categorySlugs: collectCategorySlugs(item),
2094
+ tagSlugs: collectTagSlugs(item)
2095
+ };
2096
+ yield post;
2097
+ }
2098
+ void format;
2099
+ }
2100
+ async function summarizeWixFeed(filePath) {
2101
+ const { format, items } = await loadWixFeed(filePath);
2102
+ let posts = 0;
2103
+ let assets = 0;
2104
+ const { categories, tags } = collectTaxonomiesFromItems(items);
2105
+ const seenAssetUrls = /* @__PURE__ */ new Set();
2106
+ for (const item of items) {
2107
+ posts += 1;
2108
+ const contentHtml = getItemContentHtml(item);
2109
+ for (const src of discoverContentAssetUrls(contentHtml)) {
2110
+ if (seenAssetUrls.has(src)) continue;
2111
+ seenAssetUrls.add(src);
2112
+ assets += 1;
2113
+ }
2114
+ }
2115
+ return {
2116
+ format,
2117
+ posts,
2118
+ categories: categories.size,
2119
+ tags: tags.size,
2120
+ assets
2121
+ };
2122
+ }
2123
+ async function validateWixExportFile(filePath) {
2124
+ try {
2125
+ const summary = await summarizeWixFeed(filePath);
2126
+ return {
2127
+ ok: true,
2128
+ issues: [],
2129
+ summary
2130
+ };
2131
+ } catch (error) {
2132
+ return {
2133
+ ok: false,
2134
+ issues: [
2135
+ {
2136
+ code: "invalid_wix_feed",
2137
+ message: error instanceof Error ? error.message : String(error)
2138
+ }
2139
+ ],
2140
+ summary: { posts: 0, categories: 0, tags: 0, assets: 0 }
2141
+ };
2142
+ }
2143
+ }
2144
+
2145
+ // src/parsers/wix/index.ts
2146
+ function resolveInput3(input) {
2147
+ if (typeof input === "string") return { path: input };
2148
+ if (input && typeof input === "object") {
2149
+ const record = input;
2150
+ if (record.client || record.clientOptions || record.data || record.snapshotTargets?.length) {
2151
+ return record;
2152
+ }
2153
+ if (record.path || record.urlsFile || record.urlsPath) {
2154
+ return {
2155
+ path: record.path,
2156
+ urlsFile: record.urlsFile ?? record.urlsPath
2157
+ };
2158
+ }
2159
+ }
2160
+ throw new Error(
2161
+ "Wix adapter requires input path (string or { path }), { data }, { client }, { clientOptions }, or snapshot targets"
2162
+ );
2163
+ }
2164
+ function toParseOptions(input) {
2165
+ return {
2166
+ filePath: input.path,
2167
+ urlsFile: input.urlsFile,
2168
+ data: input.data,
2169
+ client: input.client,
2170
+ clientOptions: input.clientOptions,
2171
+ snapshotTargets: input.snapshotTargets
2172
+ };
2173
+ }
2174
+ var wixAdapter = {
2175
+ platform: "wix",
2176
+ async validateInput(input) {
2177
+ try {
2178
+ const resolved = resolveInput3(input);
2179
+ const options = toParseOptions(resolved);
2180
+ if (resolved.data) {
2181
+ const summary = summarizeWixExport(resolved.data);
2182
+ return {
2183
+ ok: true,
2184
+ issues: [],
2185
+ summary: {
2186
+ posts: summary.posts,
2187
+ pages: summary.pages,
2188
+ assets: summary.assets,
2189
+ categories: summary.categories,
2190
+ tags: summary.tags
2191
+ }
2192
+ };
2193
+ }
2194
+ if (resolved.client || resolved.clientOptions) {
2195
+ const doc = await loadWixExport(options);
2196
+ const summary = summarizeWixExport(doc);
2197
+ return {
2198
+ ok: true,
2199
+ issues: [],
2200
+ summary: {
2201
+ posts: summary.posts,
2202
+ pages: summary.pages,
2203
+ assets: summary.assets,
2204
+ categories: summary.categories,
2205
+ tags: summary.tags
2206
+ }
2207
+ };
2208
+ }
2209
+ if (resolved.path?.endsWith(".json")) {
2210
+ const doc = await loadWixExport(options);
2211
+ const summary = summarizeWixExport(doc);
2212
+ return {
2213
+ ok: true,
2214
+ issues: [],
2215
+ summary: {
2216
+ posts: summary.posts,
2217
+ pages: summary.pages,
2218
+ assets: summary.assets,
2219
+ categories: summary.categories,
2220
+ tags: summary.tags
2221
+ }
2222
+ };
2223
+ }
2224
+ if (resolved.path && !resolved.path.endsWith(".txt")) {
2225
+ const result = await validateWixExportFile(resolved.path);
2226
+ return {
2227
+ ok: result.ok,
2228
+ issues: result.issues,
2229
+ summary: {
2230
+ posts: result.summary.posts,
2231
+ pages: 0,
2232
+ assets: result.summary.assets,
2233
+ categories: result.summary.categories,
2234
+ tags: result.summary.tags
2235
+ }
2236
+ };
2237
+ }
2238
+ if (resolved.path?.endsWith(".txt") || resolved.urlsFile) {
2239
+ return {
2240
+ ok: true,
2241
+ issues: [],
2242
+ summary: { pages: 0, posts: 0, assets: 0, categories: 0, tags: 0 }
2243
+ };
2244
+ }
2245
+ throw new Error("Wix validation requires export.xml, export.json, url list, or API client options");
2246
+ } catch (error) {
2247
+ return {
2248
+ ok: false,
2249
+ issues: [
2250
+ {
2251
+ code: "invalid_input",
2252
+ message: error instanceof Error ? error.message : String(error)
2253
+ }
2254
+ ]
2255
+ };
2256
+ }
2257
+ },
2258
+ enumerateEntities(ctx) {
2259
+ return enumerateWixEntities(toParseOptions(resolveInput3(ctx.input)));
2260
+ }
2261
+ };
2262
+
2263
+ // src/parsers/index.ts
2264
+ var adapters = {
2265
+ wordpress: wordpressAdapter,
2266
+ smugmug: smugmugAdapter,
2267
+ squarespace: squarespaceAdapter,
2268
+ wix: wixAdapter
2269
+ };
2270
+ function getAdapter(platform) {
2271
+ return adapters[platform];
2272
+ }
2273
+
2274
+ export {
2275
+ wordpressAdapter,
2276
+ SMUGMUG_API_BASE,
2277
+ SMUGMUG_OAUTH_ENDPOINTS,
2278
+ smugMugCredentialsSchema,
2279
+ signSmugMugOAuthRequest,
2280
+ buildSmugMugAuthorizationHeader,
2281
+ readSmugMugCredentialsFromEnv,
2282
+ SmugMugApiClient,
2283
+ smugmugAdapter,
2284
+ squarespaceAdapter,
2285
+ WixCollectionClient,
2286
+ WixPageSnapshotCollector,
2287
+ wixAdapter,
2288
+ getAdapter
2289
+ };
2290
+ //# sourceMappingURL=chunk-VXEHAQKK.js.map