@artinstack/migrator 0.1.1 → 0.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +65 -8
- package/dist/{bundle-DYPSKSQH.d.ts → bundle-DfM_jKbq.d.ts} +1 -1
- package/dist/chunk-2PNSVE5Y.js +67 -0
- package/dist/chunk-2PNSVE5Y.js.map +1 -0
- package/dist/{chunk-5QEF5O5T.js → chunk-3YJFSTYR.js} +2 -2
- package/dist/chunk-3YJFSTYR.js.map +1 -0
- package/dist/{chunk-LKNIQQJO.js → chunk-HH7666MQ.js} +13 -65
- package/dist/chunk-HH7666MQ.js.map +1 -0
- package/dist/{chunk-JKDRTL24.js → chunk-HI7JHWZU.js} +1 -1
- package/dist/chunk-HI7JHWZU.js.map +1 -0
- package/dist/chunk-QEXTXHFG.js +2444 -0
- package/dist/chunk-QEXTXHFG.js.map +1 -0
- package/dist/cli/index.js +23 -8
- package/dist/cli/index.js.map +1 -1
- package/dist/{index-CtRzFhUw.d.ts → index-D88mjcF5.d.ts} +1 -1
- package/dist/index.d.ts +219 -4
- package/dist/index.js +26 -4
- package/dist/index.js.map +1 -1
- package/dist/lib/index.d.ts +16 -0
- package/dist/lib/index.js +15 -0
- package/dist/lib/index.js.map +1 -0
- package/dist/normalizer/index.d.ts +65 -65
- package/dist/normalizer/index.js +2 -2
- package/dist/sinks/index.d.ts +2 -2
- package/dist/sinks/index.js +3 -2
- package/package.json +5 -1
- package/dist/chunk-5QEF5O5T.js.map +0 -1
- package/dist/chunk-FXXKLYO5.js +0 -1076
- package/dist/chunk-FXXKLYO5.js.map +0 -1
- package/dist/chunk-JKDRTL24.js.map +0 -1
- package/dist/chunk-LKNIQQJO.js.map +0 -1
|
@@ -0,0 +1,2444 @@
|
|
|
1
|
+
import {
|
|
2
|
+
SquarespaceCollectionClient,
|
|
3
|
+
enumerateSquarespaceEntities,
|
|
4
|
+
linkToPath,
|
|
5
|
+
sanitizeSlug,
|
|
6
|
+
summarizeSquarespaceExport,
|
|
7
|
+
validateSquarespaceExportFile
|
|
8
|
+
} from "./chunk-HH7666MQ.js";
|
|
9
|
+
import {
|
|
10
|
+
discoverContentAssetUrls,
|
|
11
|
+
normalizeAssetUrl
|
|
12
|
+
} from "./chunk-2PNSVE5Y.js";
|
|
13
|
+
|
|
14
|
+
// src/lib/origin-url-rewrite.ts
|
|
15
|
+
function rewriteOriginUrlsInText(text, config) {
|
|
16
|
+
if (!text || config.rules.length === 0) return text;
|
|
17
|
+
let result = text;
|
|
18
|
+
for (const rule of config.rules) {
|
|
19
|
+
if (typeof rule.match === "string") {
|
|
20
|
+
if (!rule.match) continue;
|
|
21
|
+
result = result.split(rule.match).join(rule.replace);
|
|
22
|
+
continue;
|
|
23
|
+
}
|
|
24
|
+
result = result.replace(rule.match, rule.replace);
|
|
25
|
+
}
|
|
26
|
+
return result;
|
|
27
|
+
}
|
|
28
|
+
function createWpContentGatewayRewrite(gatewayBase, publicOrigin) {
|
|
29
|
+
const normalizedGateway = gatewayBase.replace(/\/$/, "");
|
|
30
|
+
const normalizedPublic = publicOrigin.replace(/\/$/, "");
|
|
31
|
+
return {
|
|
32
|
+
rules: [
|
|
33
|
+
{
|
|
34
|
+
match: `${normalizedGateway}/wp-content/`,
|
|
35
|
+
replace: `${normalizedPublic}/wp-content/`
|
|
36
|
+
}
|
|
37
|
+
]
|
|
38
|
+
};
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
// src/parsers/wordpress/parse-wxr.ts
|
|
42
|
+
import { readFile } from "fs/promises";
|
|
43
|
+
import { basename } from "path";
|
|
44
|
+
import { XMLParser } from "fast-xml-parser";
|
|
45
|
+
|
|
46
|
+
// src/parsers/wordpress/builders/registry.ts
|
|
47
|
+
var WORDPRESS_BUILDER_REGISTRY = [
|
|
48
|
+
{
|
|
49
|
+
id: "tatsu",
|
|
50
|
+
detect: /\[(?:\/)?tatsu_/i,
|
|
51
|
+
contentRules: [
|
|
52
|
+
{ shortcodePrefix: "tatsu_image", urlParams: ["image", "url", "src"], tag: "img" },
|
|
53
|
+
{ shortcodePrefix: "tatsu_video", urlParams: ["video", "src", "url"], tag: "video" }
|
|
54
|
+
],
|
|
55
|
+
scaffoldingPrefix: "tatsu_"
|
|
56
|
+
},
|
|
57
|
+
{
|
|
58
|
+
id: "divi",
|
|
59
|
+
detect: /\[(?:\/)?et_pb_/i,
|
|
60
|
+
contentRules: [{ shortcodePrefix: "et_pb_image", urlParams: ["src", "url"], tag: "img" }],
|
|
61
|
+
scaffoldingPrefix: "et_pb_"
|
|
62
|
+
},
|
|
63
|
+
{
|
|
64
|
+
id: "elementor",
|
|
65
|
+
detect: /\[(?:\/)?elementor[-_]/i,
|
|
66
|
+
contentRules: [
|
|
67
|
+
{ shortcodePrefix: "elementor-widget", urlParams: ["url", "src", "image"], tag: "img" }
|
|
68
|
+
],
|
|
69
|
+
scaffoldingPrefix: "elementor_"
|
|
70
|
+
}
|
|
71
|
+
];
|
|
72
|
+
|
|
73
|
+
// src/parsers/wordpress/builders/flatten.ts
|
|
74
|
+
function escapeRegExp(value) {
|
|
75
|
+
return value.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
|
|
76
|
+
}
|
|
77
|
+
function extractShortcodeParam(params, names) {
|
|
78
|
+
for (const name of names) {
|
|
79
|
+
const pattern = new RegExp(`\\b${escapeRegExp(name)}\\s*=\\s*["']([^"']+)["']`, "i");
|
|
80
|
+
const match = params.match(pattern);
|
|
81
|
+
if (match?.[1]?.trim()) return match[1].trim();
|
|
82
|
+
}
|
|
83
|
+
return void 0;
|
|
84
|
+
}
|
|
85
|
+
function emitHtmlTag(tag, url) {
|
|
86
|
+
const normalized = normalizeAssetUrl(url) ?? url;
|
|
87
|
+
const escaped = normalized.replace(/&/g, "&").replace(/"/g, """).replace(/</g, "<");
|
|
88
|
+
switch (tag) {
|
|
89
|
+
case "img":
|
|
90
|
+
return `<img src="${escaped}" alt="" />`;
|
|
91
|
+
case "video":
|
|
92
|
+
return `<video src="${escaped}" controls></video>`;
|
|
93
|
+
case "iframe":
|
|
94
|
+
return `<iframe src="${escaped}" loading="lazy"></iframe>`;
|
|
95
|
+
}
|
|
96
|
+
}
|
|
97
|
+
function convertContentBlocker(content, rule) {
|
|
98
|
+
const prefix = escapeRegExp(rule.shortcodePrefix);
|
|
99
|
+
const pattern = new RegExp(
|
|
100
|
+
`\\[${prefix}([^\\]]*)\\]\\s*(?:\\[\\/${prefix}[^\\]]*\\])?`,
|
|
101
|
+
"gi"
|
|
102
|
+
);
|
|
103
|
+
return content.replace(pattern, (block, params) => {
|
|
104
|
+
const url = extractShortcodeParam(params, rule.urlParams);
|
|
105
|
+
if (!url) return block;
|
|
106
|
+
return emitHtmlTag(rule.tag, url);
|
|
107
|
+
});
|
|
108
|
+
}
|
|
109
|
+
function stripScaffolding(content, prefix) {
|
|
110
|
+
const escaped = escapeRegExp(prefix);
|
|
111
|
+
const opener = new RegExp(`\\[${escaped}[a-z0-9_-]*[^\\]]*\\]`, "gi");
|
|
112
|
+
const closer = new RegExp(`\\[\\/${escaped}[a-z0-9_-]*[^\\]]*\\]`, "gi");
|
|
113
|
+
return content.replace(opener, "").replace(closer, "");
|
|
114
|
+
}
|
|
115
|
+
function detectThemes(content, registry) {
|
|
116
|
+
return registry.filter((theme) => theme.detect.test(content));
|
|
117
|
+
}
|
|
118
|
+
function flattenWordPressBuilders(content, options = {}) {
|
|
119
|
+
if (!content.trim()) {
|
|
120
|
+
return { html: content, detectedThemes: [] };
|
|
121
|
+
}
|
|
122
|
+
const registry = options.registry ?? WORDPRESS_BUILDER_REGISTRY;
|
|
123
|
+
const themes = detectThemes(content, registry);
|
|
124
|
+
if (themes.length === 0) {
|
|
125
|
+
return { html: content, detectedThemes: [] };
|
|
126
|
+
}
|
|
127
|
+
let html = content;
|
|
128
|
+
for (const theme of themes) {
|
|
129
|
+
for (const rule of theme.contentRules) {
|
|
130
|
+
html = convertContentBlocker(html, rule);
|
|
131
|
+
}
|
|
132
|
+
html = stripScaffolding(html, theme.scaffoldingPrefix);
|
|
133
|
+
}
|
|
134
|
+
html = html.replace(/\n{3,}/g, "\n\n").trim();
|
|
135
|
+
return {
|
|
136
|
+
html,
|
|
137
|
+
detectedThemes: themes.map((theme) => theme.id)
|
|
138
|
+
};
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
// src/parsers/wordpress/parse-wxr.ts
|
|
142
|
+
var PLATFORM = "wordpress";
|
|
143
|
+
function asArray(value) {
|
|
144
|
+
if (value === void 0) return [];
|
|
145
|
+
return Array.isArray(value) ? value : [value];
|
|
146
|
+
}
|
|
147
|
+
function textValue(value) {
|
|
148
|
+
if (value === void 0 || value === null) return "";
|
|
149
|
+
if (typeof value === "string" || typeof value === "number") return String(value);
|
|
150
|
+
if (typeof value === "object" && value !== null && "#text" in value) {
|
|
151
|
+
return String(value["#text"] ?? "");
|
|
152
|
+
}
|
|
153
|
+
return String(value);
|
|
154
|
+
}
|
|
155
|
+
function mapPublishStatus(wpStatus) {
|
|
156
|
+
switch ((wpStatus ?? "").toLowerCase()) {
|
|
157
|
+
case "publish":
|
|
158
|
+
return "published";
|
|
159
|
+
case "draft":
|
|
160
|
+
case "pending":
|
|
161
|
+
return "draft";
|
|
162
|
+
default:
|
|
163
|
+
return "archived";
|
|
164
|
+
}
|
|
165
|
+
}
|
|
166
|
+
function getContentEncoded(item) {
|
|
167
|
+
const content = item.content;
|
|
168
|
+
if (content !== void 0) {
|
|
169
|
+
if (typeof content === "string") return content;
|
|
170
|
+
return textValue(content.encoded);
|
|
171
|
+
}
|
|
172
|
+
return textValue(item.encoded);
|
|
173
|
+
}
|
|
174
|
+
function sourceMeta(id, link, exportedAt) {
|
|
175
|
+
return {
|
|
176
|
+
platform: PLATFORM,
|
|
177
|
+
id,
|
|
178
|
+
url: link || void 0,
|
|
179
|
+
path: linkToPath(link),
|
|
180
|
+
exportedAt
|
|
181
|
+
};
|
|
182
|
+
}
|
|
183
|
+
function getExcerpt(item) {
|
|
184
|
+
const excerpt = item.excerpt;
|
|
185
|
+
if (!excerpt) return "";
|
|
186
|
+
if (typeof excerpt === "string") return excerpt;
|
|
187
|
+
return textValue(excerpt.encoded);
|
|
188
|
+
}
|
|
189
|
+
function getPostMeta(item, key) {
|
|
190
|
+
for (const meta of asArray(item.postmeta)) {
|
|
191
|
+
if (textValue(meta.meta_key) === key) {
|
|
192
|
+
return textValue(meta.meta_value);
|
|
193
|
+
}
|
|
194
|
+
}
|
|
195
|
+
return void 0;
|
|
196
|
+
}
|
|
197
|
+
function parseItems(xml) {
|
|
198
|
+
const parser = new XMLParser({
|
|
199
|
+
ignoreAttributes: false,
|
|
200
|
+
attributeNamePrefix: "@_",
|
|
201
|
+
removeNSPrefix: true,
|
|
202
|
+
trimValues: false,
|
|
203
|
+
parseTagValue: false
|
|
204
|
+
});
|
|
205
|
+
const doc = parser.parse(xml);
|
|
206
|
+
return asArray(doc.rss?.channel?.item);
|
|
207
|
+
}
|
|
208
|
+
function buildAttachmentIndex(items, originUrlRewrite) {
|
|
209
|
+
const index = /* @__PURE__ */ new Map();
|
|
210
|
+
for (const item of items) {
|
|
211
|
+
if (textValue(item.post_type) !== "attachment") continue;
|
|
212
|
+
const id = textValue(item.post_id);
|
|
213
|
+
let url = textValue(item.attachment_url) || textValue(item.link);
|
|
214
|
+
if (!id || !url) continue;
|
|
215
|
+
if (originUrlRewrite) {
|
|
216
|
+
url = rewriteOriginUrlsInText(url, originUrlRewrite);
|
|
217
|
+
}
|
|
218
|
+
const filename = basename(new URL(url, "http://local.invalid").pathname) || `attachment-${id}`;
|
|
219
|
+
index.set(id, {
|
|
220
|
+
sourceUrl: url,
|
|
221
|
+
filename,
|
|
222
|
+
mimeType: getPostMeta(item, "_wp_attached_file") ? void 0 : guessMime(filename),
|
|
223
|
+
title: textValue(item.title)
|
|
224
|
+
});
|
|
225
|
+
}
|
|
226
|
+
return index;
|
|
227
|
+
}
|
|
228
|
+
function guessMime(filename) {
|
|
229
|
+
const ext = filename.split(".").pop()?.toLowerCase();
|
|
230
|
+
const map = {
|
|
231
|
+
jpg: "image/jpeg",
|
|
232
|
+
jpeg: "image/jpeg",
|
|
233
|
+
png: "image/png",
|
|
234
|
+
gif: "image/gif",
|
|
235
|
+
webp: "image/webp",
|
|
236
|
+
pdf: "application/pdf"
|
|
237
|
+
};
|
|
238
|
+
return ext ? map[ext] : void 0;
|
|
239
|
+
}
|
|
240
|
+
function collectTaxonomies(items) {
|
|
241
|
+
const categories = /* @__PURE__ */ new Map();
|
|
242
|
+
const tags = /* @__PURE__ */ new Map();
|
|
243
|
+
for (const item of items) {
|
|
244
|
+
const postType = textValue(item.post_type);
|
|
245
|
+
if (postType !== "post" && postType !== "page") continue;
|
|
246
|
+
for (const cat of asArray(item.category)) {
|
|
247
|
+
const domain = cat["@_domain"] ?? "";
|
|
248
|
+
const nicename = sanitizeSlug(cat["@_nicename"] ?? textValue(cat["#text"]));
|
|
249
|
+
const name = textValue(cat["#text"]) || nicename;
|
|
250
|
+
if (!nicename) continue;
|
|
251
|
+
if (domain === "category") {
|
|
252
|
+
if (!categories.has(nicename)) {
|
|
253
|
+
categories.set(nicename, {
|
|
254
|
+
type: "category",
|
|
255
|
+
source: sourceMeta(`cat:${nicename}`),
|
|
256
|
+
sourceId: `cat:${nicename}`,
|
|
257
|
+
name,
|
|
258
|
+
slug: nicename
|
|
259
|
+
});
|
|
260
|
+
}
|
|
261
|
+
} else if (domain === "post_tag") {
|
|
262
|
+
if (!tags.has(nicename)) {
|
|
263
|
+
tags.set(nicename, {
|
|
264
|
+
type: "tag",
|
|
265
|
+
source: sourceMeta(`tag:${nicename}`),
|
|
266
|
+
sourceId: `tag:${nicename}`,
|
|
267
|
+
name,
|
|
268
|
+
slug: nicename
|
|
269
|
+
});
|
|
270
|
+
}
|
|
271
|
+
}
|
|
272
|
+
}
|
|
273
|
+
}
|
|
274
|
+
return { categories, tags };
|
|
275
|
+
}
|
|
276
|
+
function collectInlineAssets(html, attachmentIndex, seenUrls, exportedAt) {
|
|
277
|
+
const assets = [];
|
|
278
|
+
for (const src of discoverContentAssetUrls(html)) {
|
|
279
|
+
if (seenUrls.has(src)) continue;
|
|
280
|
+
seenUrls.add(src);
|
|
281
|
+
let filename;
|
|
282
|
+
try {
|
|
283
|
+
filename = basename(new URL(src, "http://local.invalid").pathname) || "inline-asset";
|
|
284
|
+
} catch {
|
|
285
|
+
filename = "inline-asset";
|
|
286
|
+
}
|
|
287
|
+
assets.push({
|
|
288
|
+
type: "asset",
|
|
289
|
+
source: sourceMeta(`url:${src}`, src, exportedAt),
|
|
290
|
+
sourceId: `url:${src}`,
|
|
291
|
+
sourceUrl: src,
|
|
292
|
+
filename,
|
|
293
|
+
mimeType: guessMime(filename)
|
|
294
|
+
});
|
|
295
|
+
}
|
|
296
|
+
for (const [id, entry] of attachmentIndex) {
|
|
297
|
+
if (seenUrls.has(entry.sourceUrl)) continue;
|
|
298
|
+
void id;
|
|
299
|
+
}
|
|
300
|
+
return assets;
|
|
301
|
+
}
|
|
302
|
+
function preprocessContent(rawHtml, options) {
|
|
303
|
+
let html = rawHtml;
|
|
304
|
+
if (options.originUrlRewrite) {
|
|
305
|
+
html = rewriteOriginUrlsInText(html, options.originUrlRewrite);
|
|
306
|
+
}
|
|
307
|
+
if (options.flattenBuilders !== false) {
|
|
308
|
+
html = flattenWordPressBuilders(html).html;
|
|
309
|
+
}
|
|
310
|
+
return html;
|
|
311
|
+
}
|
|
312
|
+
function resolveFeaturedAssetSourceId(thumbnailId, attachmentIndex, contentHtml) {
|
|
313
|
+
if (thumbnailId && attachmentIndex.has(thumbnailId)) {
|
|
314
|
+
return thumbnailId;
|
|
315
|
+
}
|
|
316
|
+
const firstInline = discoverContentAssetUrls(contentHtml)[0];
|
|
317
|
+
return firstInline ? `url:${firstInline}` : void 0;
|
|
318
|
+
}
|
|
319
|
+
async function* enumerateWxrEntities(options) {
|
|
320
|
+
const xml = await readFile(options.filePath, "utf8");
|
|
321
|
+
const items = parseItems(xml);
|
|
322
|
+
const attachmentIndex = buildAttachmentIndex(items, options.originUrlRewrite);
|
|
323
|
+
const { categories, tags } = collectTaxonomies(items);
|
|
324
|
+
const seenAssetUrls = /* @__PURE__ */ new Set();
|
|
325
|
+
const emittedAttachmentIds = /* @__PURE__ */ new Set();
|
|
326
|
+
for (const category of categories.values()) {
|
|
327
|
+
yield category;
|
|
328
|
+
}
|
|
329
|
+
for (const tag of tags.values()) {
|
|
330
|
+
yield tag;
|
|
331
|
+
}
|
|
332
|
+
for (const [id, entry] of attachmentIndex) {
|
|
333
|
+
emittedAttachmentIds.add(id);
|
|
334
|
+
seenAssetUrls.add(entry.sourceUrl);
|
|
335
|
+
yield {
|
|
336
|
+
type: "asset",
|
|
337
|
+
source: sourceMeta(id, entry.sourceUrl, options.exportedAt),
|
|
338
|
+
sourceId: id,
|
|
339
|
+
sourceUrl: entry.sourceUrl,
|
|
340
|
+
filename: entry.filename,
|
|
341
|
+
mimeType: entry.mimeType,
|
|
342
|
+
caption: entry.title
|
|
343
|
+
};
|
|
344
|
+
}
|
|
345
|
+
for (const item of items) {
|
|
346
|
+
const postType = textValue(item.post_type);
|
|
347
|
+
if (postType !== "post" && postType !== "page") continue;
|
|
348
|
+
const id = textValue(item.post_id);
|
|
349
|
+
const link = textValue(item.link);
|
|
350
|
+
const slug = sanitizeSlug(textValue(item.post_name) || textValue(item.title) || id);
|
|
351
|
+
const contentHtml = preprocessContent(getContentEncoded(item), options);
|
|
352
|
+
for (const asset of collectInlineAssets(
|
|
353
|
+
contentHtml,
|
|
354
|
+
attachmentIndex,
|
|
355
|
+
seenAssetUrls,
|
|
356
|
+
options.exportedAt
|
|
357
|
+
)) {
|
|
358
|
+
yield asset;
|
|
359
|
+
}
|
|
360
|
+
const categorySlugs = [];
|
|
361
|
+
const tagSlugs = [];
|
|
362
|
+
for (const cat of asArray(item.category)) {
|
|
363
|
+
const domain = cat["@_domain"] ?? "";
|
|
364
|
+
const nicename = sanitizeSlug(cat["@_nicename"] ?? textValue(cat["#text"]));
|
|
365
|
+
if (!nicename) continue;
|
|
366
|
+
if (domain === "category") categorySlugs.push(nicename);
|
|
367
|
+
if (domain === "post_tag") tagSlugs.push(nicename);
|
|
368
|
+
}
|
|
369
|
+
if (postType === "post") {
|
|
370
|
+
const thumbnailId = getPostMeta(item, "_thumbnail_id");
|
|
371
|
+
const featuredAssetSourceId = resolveFeaturedAssetSourceId(
|
|
372
|
+
thumbnailId,
|
|
373
|
+
attachmentIndex,
|
|
374
|
+
contentHtml
|
|
375
|
+
);
|
|
376
|
+
const post = {
|
|
377
|
+
type: "post",
|
|
378
|
+
source: sourceMeta(id, link, options.exportedAt),
|
|
379
|
+
sourceId: id,
|
|
380
|
+
title: textValue(item.title) || slug,
|
|
381
|
+
slug,
|
|
382
|
+
excerpt: getExcerpt(item) || void 0,
|
|
383
|
+
contentHtml,
|
|
384
|
+
publishedAt: textValue(item.post_date) || void 0,
|
|
385
|
+
status: mapPublishStatus(textValue(item.status)),
|
|
386
|
+
categorySlugs: categorySlugs.length ? categorySlugs : void 0,
|
|
387
|
+
tagSlugs: tagSlugs.length ? tagSlugs : void 0,
|
|
388
|
+
sourceFeaturedMediaId: thumbnailId,
|
|
389
|
+
featuredAssetSourceId
|
|
390
|
+
};
|
|
391
|
+
yield post;
|
|
392
|
+
} else {
|
|
393
|
+
const isHomePage = getPostMeta(item, "_wp_show_on_front") === "1" || getPostMeta(item, "page_on_front") === "1";
|
|
394
|
+
const page = {
|
|
395
|
+
type: "page",
|
|
396
|
+
source: sourceMeta(id, link, options.exportedAt),
|
|
397
|
+
sourceId: id,
|
|
398
|
+
title: textValue(item.title) || slug,
|
|
399
|
+
slug,
|
|
400
|
+
contentHtml,
|
|
401
|
+
isHomePage: isHomePage || void 0,
|
|
402
|
+
status: mapPublishStatus(textValue(item.status))
|
|
403
|
+
};
|
|
404
|
+
yield page;
|
|
405
|
+
}
|
|
406
|
+
}
|
|
407
|
+
}
|
|
408
|
+
async function validateWxrFile(filePath) {
|
|
409
|
+
const issues = [];
|
|
410
|
+
let xml;
|
|
411
|
+
try {
|
|
412
|
+
xml = await readFile(filePath, "utf8");
|
|
413
|
+
} catch {
|
|
414
|
+
return {
|
|
415
|
+
ok: false,
|
|
416
|
+
issues: [{ code: "file_not_found", message: `Cannot read file: ${filePath}` }],
|
|
417
|
+
summary: {}
|
|
418
|
+
};
|
|
419
|
+
}
|
|
420
|
+
const looksLikeWxr = xml.includes("<rss") && (xml.includes("wp:wxr_version") || xml.includes("xmlns:wp=") || xml.includes("WordPress eXtended RSS"));
|
|
421
|
+
if (!looksLikeWxr) {
|
|
422
|
+
issues.push({ code: "invalid_wxr", message: "File does not appear to be WordPress WXR" });
|
|
423
|
+
}
|
|
424
|
+
const items = parseItems(xml);
|
|
425
|
+
const summary = {
|
|
426
|
+
posts: items.filter((i) => textValue(i.post_type) === "post").length,
|
|
427
|
+
pages: items.filter((i) => textValue(i.post_type) === "page").length,
|
|
428
|
+
assets: items.filter((i) => textValue(i.post_type) === "attachment").length,
|
|
429
|
+
portfolios: 0,
|
|
430
|
+
categories: 0,
|
|
431
|
+
tags: 0
|
|
432
|
+
};
|
|
433
|
+
const { categories, tags } = collectTaxonomies(items);
|
|
434
|
+
summary.categories = categories.size;
|
|
435
|
+
summary.tags = tags.size;
|
|
436
|
+
return { ok: issues.length === 0, issues, summary };
|
|
437
|
+
}
|
|
438
|
+
|
|
439
|
+
// src/parsers/wordpress/index.ts
|
|
440
|
+
function resolveWxrOptions(input) {
|
|
441
|
+
if (typeof input === "string") {
|
|
442
|
+
return { filePath: input };
|
|
443
|
+
}
|
|
444
|
+
if (input && typeof input === "object" && "path" in input) {
|
|
445
|
+
const obj = input;
|
|
446
|
+
return {
|
|
447
|
+
filePath: String(obj.path),
|
|
448
|
+
originUrlRewrite: obj.originUrlRewrite,
|
|
449
|
+
flattenBuilders: obj.flattenBuilders
|
|
450
|
+
};
|
|
451
|
+
}
|
|
452
|
+
throw new Error("WordPress adapter requires input path (string or { path, originUrlRewrite?, flattenBuilders? })");
|
|
453
|
+
}
|
|
454
|
+
var wordpressAdapter = {
|
|
455
|
+
platform: "wordpress",
|
|
456
|
+
async validateInput(input) {
|
|
457
|
+
const { filePath } = resolveWxrOptions(input);
|
|
458
|
+
const result = await validateWxrFile(filePath);
|
|
459
|
+
return {
|
|
460
|
+
ok: result.ok,
|
|
461
|
+
issues: result.issues,
|
|
462
|
+
summary: result.summary
|
|
463
|
+
};
|
|
464
|
+
},
|
|
465
|
+
enumerateEntities(ctx) {
|
|
466
|
+
return enumerateWxrEntities(resolveWxrOptions(ctx.input));
|
|
467
|
+
}
|
|
468
|
+
};
|
|
469
|
+
|
|
470
|
+
// src/parsers/smugmug/api.ts
|
|
471
|
+
import { createHmac, randomBytes } from "crypto";
|
|
472
|
+
import { z } from "zod";
|
|
473
|
+
var SMUGMUG_API_HOST = "api.smugmug.com";
|
|
474
|
+
var SMUGMUG_API_BASE = `https://${SMUGMUG_API_HOST}/api/v2`;
|
|
475
|
+
var SMUGMUG_OAUTH_ENDPOINTS = {
|
|
476
|
+
requestToken: "https://api.smugmug.com/services/oauth/1.0a/getRequestToken",
|
|
477
|
+
authorize: "https://api.smugmug.com/services/oauth/1.0a/authorize",
|
|
478
|
+
accessToken: "https://api.smugmug.com/services/oauth/1.0a/getAccessToken"
|
|
479
|
+
};
|
|
480
|
+
var smugMugCredentialsSchema = z.object({
|
|
481
|
+
consumerKey: z.string().min(1),
|
|
482
|
+
consumerSecret: z.string().min(1),
|
|
483
|
+
accessToken: z.string().min(1),
|
|
484
|
+
accessTokenSecret: z.string().min(1)
|
|
485
|
+
});
|
|
486
|
+
var smugMugClientOptionsSchema = z.object({
|
|
487
|
+
credentials: smugMugCredentialsSchema,
|
|
488
|
+
pageSize: z.number().int().min(1).max(500).default(100),
|
|
489
|
+
maxRetries: z.number().int().min(0).max(10).default(3),
|
|
490
|
+
retryBaseDelayMs: z.number().int().min(0).default(500),
|
|
491
|
+
maxRetryDelayMs: z.number().int().min(0).default(8e3),
|
|
492
|
+
requestIntervalMs: z.number().int().min(0).default(200),
|
|
493
|
+
fetchImpl: z.custom().optional()
|
|
494
|
+
});
|
|
495
|
+
var ALBUM_IMAGES_CONFIG = {
|
|
496
|
+
expand: {
|
|
497
|
+
AlbumImage: {
|
|
498
|
+
expand: {
|
|
499
|
+
Image: {
|
|
500
|
+
filter: ["FileName", "Caption", "KeywordsArray"],
|
|
501
|
+
filteruri: ["ImageMetadata", "ImageSizeDetails"],
|
|
502
|
+
expand: {
|
|
503
|
+
ImageMetadata: {
|
|
504
|
+
filter: ["ISO", "Aperture", "ApertureValue", "ShutterSpeed", "ExposureTime", "FocalLength"]
|
|
505
|
+
},
|
|
506
|
+
ImageSizeDetails: {
|
|
507
|
+
filter: ["OriginalImageUrl"]
|
|
508
|
+
}
|
|
509
|
+
}
|
|
510
|
+
}
|
|
511
|
+
}
|
|
512
|
+
}
|
|
513
|
+
}
|
|
514
|
+
};
|
|
515
|
+
function oauthPercentEncode(value) {
|
|
516
|
+
return encodeURIComponent(value).replace(
|
|
517
|
+
/[!'()*]/g,
|
|
518
|
+
(char) => `%${char.charCodeAt(0).toString(16).toUpperCase()}`
|
|
519
|
+
);
|
|
520
|
+
}
|
|
521
|
+
function normalizeRequestUrl(url) {
|
|
522
|
+
const protocol = url.protocol.replace(/:$/, "").toLowerCase();
|
|
523
|
+
const host = url.hostname.toLowerCase();
|
|
524
|
+
const defaultPort = protocol === "http" ? "80" : "443";
|
|
525
|
+
const port = url.port && url.port !== defaultPort ? `:${url.port}` : "";
|
|
526
|
+
return `${protocol}://${host}${port}${url.pathname}`;
|
|
527
|
+
}
|
|
528
|
+
function sortedParameterString(params) {
|
|
529
|
+
return Object.keys(params).sort((a, b) => a === b ? 0 : a < b ? -1 : 1).map((key) => `${oauthPercentEncode(key)}=${oauthPercentEncode(params[key])}`).join("&");
|
|
530
|
+
}
|
|
531
|
+
function collectSignatureParams(url, oauthParams, bodyParams) {
|
|
532
|
+
const params = { ...oauthParams };
|
|
533
|
+
url.searchParams.forEach((value, key) => {
|
|
534
|
+
params[key] = value;
|
|
535
|
+
});
|
|
536
|
+
if (bodyParams) {
|
|
537
|
+
for (const [key, value] of Object.entries(bodyParams)) {
|
|
538
|
+
params[key] = value;
|
|
539
|
+
}
|
|
540
|
+
}
|
|
541
|
+
return params;
|
|
542
|
+
}
|
|
543
|
+
function signSmugMugOAuthRequest(input) {
|
|
544
|
+
const url = new URL(input.url);
|
|
545
|
+
const parameterString = sortedParameterString(
|
|
546
|
+
collectSignatureParams(url, input.oauthParams, input.bodyParams)
|
|
547
|
+
);
|
|
548
|
+
const signatureBase = [
|
|
549
|
+
input.method.toUpperCase(),
|
|
550
|
+
oauthPercentEncode(normalizeRequestUrl(url)),
|
|
551
|
+
oauthPercentEncode(parameterString)
|
|
552
|
+
].join("&");
|
|
553
|
+
const signingKey = `${oauthPercentEncode(input.credentials.consumerSecret)}&${oauthPercentEncode(input.credentials.accessTokenSecret)}`;
|
|
554
|
+
return createHmac("sha1", signingKey).update(signatureBase).digest("base64");
|
|
555
|
+
}
|
|
556
|
+
function buildOAuthParams(credentials, nonce, timestamp) {
|
|
557
|
+
return {
|
|
558
|
+
oauth_consumer_key: credentials.consumerKey,
|
|
559
|
+
oauth_token: credentials.accessToken,
|
|
560
|
+
oauth_signature_method: "HMAC-SHA1",
|
|
561
|
+
oauth_timestamp: timestamp,
|
|
562
|
+
oauth_nonce: nonce,
|
|
563
|
+
oauth_version: "1.0"
|
|
564
|
+
};
|
|
565
|
+
}
|
|
566
|
+
function buildSmugMugAuthorizationHeader(input) {
|
|
567
|
+
const nonce = input.nonce ?? randomBytes(16).toString("hex");
|
|
568
|
+
const timestamp = input.timestamp ?? String(Math.floor(Date.now() / 1e3));
|
|
569
|
+
const oauthParams = buildOAuthParams(input.credentials, nonce, timestamp);
|
|
570
|
+
const signature = signSmugMugOAuthRequest({
|
|
571
|
+
method: input.method,
|
|
572
|
+
url: input.url,
|
|
573
|
+
credentials: input.credentials,
|
|
574
|
+
oauthParams,
|
|
575
|
+
bodyParams: input.bodyParams
|
|
576
|
+
});
|
|
577
|
+
const headerParams = { ...oauthParams, oauth_signature: signature };
|
|
578
|
+
const headerValue = Object.keys(headerParams).sort().map((key) => `${oauthPercentEncode(key)}="${oauthPercentEncode(headerParams[key])}"`).join(", ");
|
|
579
|
+
return `OAuth ${headerValue}`;
|
|
580
|
+
}
|
|
581
|
+
function readSmugMugCredentialsFromEnv(env = process.env) {
|
|
582
|
+
return smugMugCredentialsSchema.parse({
|
|
583
|
+
consumerKey: env.SMUGMUG_CONSUMER_KEY,
|
|
584
|
+
consumerSecret: env.SMUGMUG_CONSUMER_SECRET,
|
|
585
|
+
accessToken: env.SMUGMUG_ACCESS_TOKEN,
|
|
586
|
+
accessTokenSecret: env.SMUGMUG_ACCESS_TOKEN_SECRET
|
|
587
|
+
});
|
|
588
|
+
}
|
|
589
|
+
function sleep(ms) {
|
|
590
|
+
return new Promise((resolve) => setTimeout(resolve, ms));
|
|
591
|
+
}
|
|
592
|
+
function albumKeyFromUri(uri) {
|
|
593
|
+
const match = uri.match(/\/album\/([^/?!]+)/i);
|
|
594
|
+
if (!match?.[1]) {
|
|
595
|
+
throw new Error(`Unable to parse album key from URI: ${uri}`);
|
|
596
|
+
}
|
|
597
|
+
return match[1];
|
|
598
|
+
}
|
|
599
|
+
function mapAlbumImage(albumImage, portfolioSourceId, sort) {
|
|
600
|
+
const image = albumImage.Image;
|
|
601
|
+
const metadata = image?.ImageMetadata ?? albumImage.ImageMetadata;
|
|
602
|
+
const originalUrl = image?.ImageSizeDetails?.OriginalImageUrl ?? albumImage.LargestImage?.Url ?? albumImage.WebUri;
|
|
603
|
+
const fileName = image?.FileName ?? albumImage.FileName;
|
|
604
|
+
return {
|
|
605
|
+
sourceId: albumImage.ImageKey,
|
|
606
|
+
portfolioSourceId,
|
|
607
|
+
sort,
|
|
608
|
+
fileName,
|
|
609
|
+
originalUrl,
|
|
610
|
+
caption: albumImage.Caption ?? image?.Caption,
|
|
611
|
+
keywords: image?.KeywordsArray?.length ? image.KeywordsArray : void 0,
|
|
612
|
+
exif: metadata ? {
|
|
613
|
+
iso: metadata.ISO,
|
|
614
|
+
aperture: metadata.Aperture ?? metadata.ApertureValue,
|
|
615
|
+
shutter: metadata.ShutterSpeed ?? metadata.ExposureTime,
|
|
616
|
+
focalLength: metadata.FocalLength
|
|
617
|
+
} : void 0
|
|
618
|
+
};
|
|
619
|
+
}
|
|
620
|
+
var SmugMugApiClient = class {
|
|
621
|
+
credentials;
|
|
622
|
+
pageSize;
|
|
623
|
+
maxRetries;
|
|
624
|
+
retryBaseDelayMs;
|
|
625
|
+
maxRetryDelayMs;
|
|
626
|
+
requestIntervalMs;
|
|
627
|
+
fetchImpl;
|
|
628
|
+
lastRequestAt = 0;
|
|
629
|
+
constructor(options) {
|
|
630
|
+
const parsed = smugMugClientOptionsSchema.parse(options);
|
|
631
|
+
this.credentials = parsed.credentials;
|
|
632
|
+
this.pageSize = parsed.pageSize;
|
|
633
|
+
this.maxRetries = parsed.maxRetries;
|
|
634
|
+
this.retryBaseDelayMs = parsed.retryBaseDelayMs;
|
|
635
|
+
this.maxRetryDelayMs = parsed.maxRetryDelayMs;
|
|
636
|
+
this.requestIntervalMs = parsed.requestIntervalMs;
|
|
637
|
+
this.fetchImpl = parsed.fetchImpl ?? fetch;
|
|
638
|
+
}
|
|
639
|
+
/** Validate credentials against `GET /user/!authuser`. */
|
|
640
|
+
async validateCredentials() {
|
|
641
|
+
const user = await this.getAuthUser();
|
|
642
|
+
return { nick: user.NickName, rootNodeUri: user.Uris.Node };
|
|
643
|
+
}
|
|
644
|
+
/** Crawl the authenticated user's node tree into flat export tables for `parse-node.ts`. */
|
|
645
|
+
async crawlExport() {
|
|
646
|
+
const user = await this.getAuthUser();
|
|
647
|
+
const folders = [];
|
|
648
|
+
const albums = [];
|
|
649
|
+
const images = [];
|
|
650
|
+
await this.walkNode(user.Uris.Node, void 0, folders, albums, images);
|
|
651
|
+
return {
|
|
652
|
+
exportVersion: 1,
|
|
653
|
+
exportedAt: (/* @__PURE__ */ new Date()).toISOString(),
|
|
654
|
+
Folders: folders,
|
|
655
|
+
Albums: albums,
|
|
656
|
+
Images: images
|
|
657
|
+
};
|
|
658
|
+
}
|
|
659
|
+
async getAuthUser() {
|
|
660
|
+
const envelope = await this.requestJson(`${SMUGMUG_API_BASE}/user/!authuser`);
|
|
661
|
+
return envelope.Response;
|
|
662
|
+
}
|
|
663
|
+
async walkNode(nodeUri, parentFolderId, folders, albums, images) {
|
|
664
|
+
const childrenPath = `${nodeUri}!children`;
|
|
665
|
+
for await (const child of this.paginateNodes(childrenPath)) {
|
|
666
|
+
if (child.Type === "Page") continue;
|
|
667
|
+
if (child.Type === "Folder") {
|
|
668
|
+
folders.push({
|
|
669
|
+
sourceId: child.NodeID,
|
|
670
|
+
name: child.Name,
|
|
671
|
+
parentSourceId: parentFolderId,
|
|
672
|
+
slug: child.UrlName,
|
|
673
|
+
description: child.Description
|
|
674
|
+
});
|
|
675
|
+
await this.walkNode(child.Uri, child.NodeID, folders, albums, images);
|
|
676
|
+
continue;
|
|
677
|
+
}
|
|
678
|
+
if (child.Type === "Album") {
|
|
679
|
+
albums.push({
|
|
680
|
+
sourceId: child.NodeID,
|
|
681
|
+
name: child.Name,
|
|
682
|
+
parentSourceId: parentFolderId,
|
|
683
|
+
slug: child.UrlName,
|
|
684
|
+
description: child.Description,
|
|
685
|
+
url: child.WebUri
|
|
686
|
+
});
|
|
687
|
+
const albumUri = child.Uris?.Album;
|
|
688
|
+
if (albumUri) {
|
|
689
|
+
await this.collectAlbumImages(albumUri, child.NodeID, images);
|
|
690
|
+
}
|
|
691
|
+
}
|
|
692
|
+
}
|
|
693
|
+
}
|
|
694
|
+
async collectAlbumImages(albumUri, portfolioSourceId, images) {
|
|
695
|
+
const albumKey = albumKeyFromUri(albumUri);
|
|
696
|
+
const configQuery = `_config=${encodeURIComponent(JSON.stringify(ALBUM_IMAGES_CONFIG))}`;
|
|
697
|
+
const initialPath = `${SMUGMUG_API_BASE}/album/${albumKey}!images?${configQuery}`;
|
|
698
|
+
let sort = 0;
|
|
699
|
+
for await (const albumImage of this.paginateAlbumImages(initialPath)) {
|
|
700
|
+
images.push(mapAlbumImage(albumImage, portfolioSourceId, sort));
|
|
701
|
+
sort += 1;
|
|
702
|
+
}
|
|
703
|
+
}
|
|
704
|
+
async *paginateNodes(path) {
|
|
705
|
+
for await (const page of this.paginate(path)) {
|
|
706
|
+
for (const node of page.Node ?? []) {
|
|
707
|
+
yield node;
|
|
708
|
+
}
|
|
709
|
+
}
|
|
710
|
+
}
|
|
711
|
+
async *paginateAlbumImages(path) {
|
|
712
|
+
for await (const page of this.paginate(path)) {
|
|
713
|
+
for (const albumImage of page.AlbumImage ?? []) {
|
|
714
|
+
yield albumImage;
|
|
715
|
+
}
|
|
716
|
+
}
|
|
717
|
+
}
|
|
718
|
+
async *paginate(initialPath) {
|
|
719
|
+
let nextPath = appendPagination(initialPath, this.pageSize, 1);
|
|
720
|
+
while (nextPath) {
|
|
721
|
+
const envelope = await this.requestJson(nextPath);
|
|
722
|
+
yield envelope.Response;
|
|
723
|
+
nextPath = envelope.Response.Pages?.NextPage;
|
|
724
|
+
}
|
|
725
|
+
}
|
|
726
|
+
async requestJson(pathOrUrl) {
|
|
727
|
+
const url = toAbsoluteUrl(pathOrUrl);
|
|
728
|
+
const response = await this.requestWithRetry(url);
|
|
729
|
+
const body = await response.json();
|
|
730
|
+
if (body.Code !== 200) {
|
|
731
|
+
throw new Error(`SmugMug API error ${body.Code}: ${body.Message}`);
|
|
732
|
+
}
|
|
733
|
+
return body;
|
|
734
|
+
}
|
|
735
|
+
async requestWithRetry(url) {
|
|
736
|
+
let attempt = 0;
|
|
737
|
+
while (true) {
|
|
738
|
+
await this.throttle();
|
|
739
|
+
const authorization = buildSmugMugAuthorizationHeader({
|
|
740
|
+
method: "GET",
|
|
741
|
+
url: url.toString(),
|
|
742
|
+
credentials: this.credentials
|
|
743
|
+
});
|
|
744
|
+
const response = await this.fetchImpl(url, {
|
|
745
|
+
method: "GET",
|
|
746
|
+
headers: {
|
|
747
|
+
Accept: "application/json",
|
|
748
|
+
Authorization: authorization
|
|
749
|
+
}
|
|
750
|
+
});
|
|
751
|
+
if (response.ok) {
|
|
752
|
+
return response;
|
|
753
|
+
}
|
|
754
|
+
const retryable = response.status === 429 || response.status >= 500;
|
|
755
|
+
if (!retryable || attempt >= this.maxRetries) {
|
|
756
|
+
const detail = await response.text().catch(() => "");
|
|
757
|
+
throw new Error(
|
|
758
|
+
`SmugMug HTTP ${response.status}${detail ? `: ${detail.slice(0, 200)}` : ""}`
|
|
759
|
+
);
|
|
760
|
+
}
|
|
761
|
+
const retryAfter = Number.parseInt(response.headers.get("retry-after") ?? "", 10);
|
|
762
|
+
const delay = Number.isFinite(retryAfter) ? retryAfter * 1e3 : Math.min(this.maxRetryDelayMs, this.retryBaseDelayMs * 2 ** attempt);
|
|
763
|
+
await sleep(delay);
|
|
764
|
+
attempt += 1;
|
|
765
|
+
}
|
|
766
|
+
}
|
|
767
|
+
async throttle() {
|
|
768
|
+
if (this.requestIntervalMs <= 0) return;
|
|
769
|
+
const elapsed = Date.now() - this.lastRequestAt;
|
|
770
|
+
if (elapsed < this.requestIntervalMs) {
|
|
771
|
+
await sleep(this.requestIntervalMs - elapsed);
|
|
772
|
+
}
|
|
773
|
+
this.lastRequestAt = Date.now();
|
|
774
|
+
}
|
|
775
|
+
};
|
|
776
|
+
function toAbsoluteUrl(pathOrUrl) {
|
|
777
|
+
if (pathOrUrl.startsWith("http://") || pathOrUrl.startsWith("https://")) {
|
|
778
|
+
return new URL(pathOrUrl);
|
|
779
|
+
}
|
|
780
|
+
if (pathOrUrl.startsWith("/")) {
|
|
781
|
+
return new URL(`https://${SMUGMUG_API_HOST}${pathOrUrl}`);
|
|
782
|
+
}
|
|
783
|
+
return new URL(pathOrUrl);
|
|
784
|
+
}
|
|
785
|
+
function appendPagination(pathOrUrl, count, start) {
|
|
786
|
+
const url = toAbsoluteUrl(pathOrUrl);
|
|
787
|
+
url.searchParams.set("count", String(count));
|
|
788
|
+
url.searchParams.set("start", String(start));
|
|
789
|
+
return url.toString();
|
|
790
|
+
}
|
|
791
|
+
|
|
792
|
+
// src/parsers/smugmug/parse-node.ts
|
|
793
|
+
import { readFile as readFile2 } from "fs/promises";
|
|
794
|
+
var PLATFORM2 = "smugmug";
|
|
795
|
+
var UNRESOLVED_URL_PREFIX = "unspecified://smugmug/";
|
|
796
|
+
function sourceMeta2(id, url, exportedAt) {
|
|
797
|
+
return {
|
|
798
|
+
platform: PLATFORM2,
|
|
799
|
+
id,
|
|
800
|
+
url,
|
|
801
|
+
exportedAt
|
|
802
|
+
};
|
|
803
|
+
}
|
|
804
|
+
function guessMime2(filename) {
|
|
805
|
+
const ext = filename.split(".").pop()?.toLowerCase();
|
|
806
|
+
const map = {
|
|
807
|
+
jpg: "image/jpeg",
|
|
808
|
+
jpeg: "image/jpeg",
|
|
809
|
+
png: "image/png",
|
|
810
|
+
gif: "image/gif",
|
|
811
|
+
webp: "image/webp",
|
|
812
|
+
tif: "image/tiff",
|
|
813
|
+
tiff: "image/tiff"
|
|
814
|
+
};
|
|
815
|
+
return ext ? map[ext] : void 0;
|
|
816
|
+
}
|
|
817
|
+
function parseExifNumber(value) {
|
|
818
|
+
if (value === void 0) return void 0;
|
|
819
|
+
if (typeof value === "number" && Number.isFinite(value)) return value;
|
|
820
|
+
const parsed = Number.parseFloat(String(value).replace(/[^0-9.]/g, ""));
|
|
821
|
+
return Number.isFinite(parsed) ? parsed : void 0;
|
|
822
|
+
}
|
|
823
|
+
function normalizeExif(exif) {
|
|
824
|
+
if (!exif || Object.keys(exif).length === 0) return void 0;
|
|
825
|
+
const normalized = {
|
|
826
|
+
iso: parseExifNumber(exif.iso),
|
|
827
|
+
aperture: parseExifNumber(exif.aperture),
|
|
828
|
+
shutter: exif.shutter,
|
|
829
|
+
focalLength: parseExifNumber(exif.focalLength)
|
|
830
|
+
};
|
|
831
|
+
if (normalized.iso === void 0 && normalized.aperture === void 0 && !normalized.shutter && normalized.focalLength === void 0) {
|
|
832
|
+
return void 0;
|
|
833
|
+
}
|
|
834
|
+
return normalized;
|
|
835
|
+
}
|
|
836
|
+
function isSmugMugFlatExport(value) {
|
|
837
|
+
if (!value || typeof value !== "object") return false;
|
|
838
|
+
const record = value;
|
|
839
|
+
const version = record.exportVersion;
|
|
840
|
+
return (version === 1 || version === "1") && Array.isArray(record.Folders) && Array.isArray(record.Albums) && Array.isArray(record.Images);
|
|
841
|
+
}
|
|
842
|
+
function isSmugMugNestedExport(value) {
|
|
843
|
+
if (!value || typeof value !== "object") return false;
|
|
844
|
+
const record = value;
|
|
845
|
+
const version = record.exportVersion;
|
|
846
|
+
return (version === 1 || version === "1") && Array.isArray(record.folders);
|
|
847
|
+
}
|
|
848
|
+
async function loadSmugMugExport(options) {
|
|
849
|
+
if (options.data) return options.data;
|
|
850
|
+
if (!options.filePath) {
|
|
851
|
+
throw new Error("SmugMug parser requires filePath or data");
|
|
852
|
+
}
|
|
853
|
+
const raw = JSON.parse(await readFile2(options.filePath, "utf8"));
|
|
854
|
+
if (isSmugMugFlatExport(raw) || isSmugMugNestedExport(raw)) {
|
|
855
|
+
return raw;
|
|
856
|
+
}
|
|
857
|
+
throw new Error(
|
|
858
|
+
"Invalid SmugMug export: expected exportVersion 1 with folders[] (nested) or Folders/Albums/Images (flat)"
|
|
859
|
+
);
|
|
860
|
+
}
|
|
861
|
+
function resolveAssetUrl(image) {
|
|
862
|
+
if (image.originalUrl) return image.originalUrl;
|
|
863
|
+
return `${UNRESOLVED_URL_PREFIX}${image.sourceId}`;
|
|
864
|
+
}
|
|
865
|
+
function resolveFilename(image) {
|
|
866
|
+
if (image.fileName) return image.fileName;
|
|
867
|
+
return `${image.sourceId}.jpg`;
|
|
868
|
+
}
|
|
869
|
+
function* emitNestedFolderPortfolio(folder, exportedAt) {
|
|
870
|
+
yield {
|
|
871
|
+
type: "portfolio",
|
|
872
|
+
source: sourceMeta2(folder.id, void 0, exportedAt),
|
|
873
|
+
sourceId: folder.id,
|
|
874
|
+
title: folder.name,
|
|
875
|
+
slug: sanitizeSlug(folder.slug ?? folder.name),
|
|
876
|
+
description: folder.description
|
|
877
|
+
};
|
|
878
|
+
}
|
|
879
|
+
function* emitNestedAlbumPortfolio(folder, album, exportedAt) {
|
|
880
|
+
yield {
|
|
881
|
+
type: "portfolio",
|
|
882
|
+
source: sourceMeta2(album.id, album.url, exportedAt),
|
|
883
|
+
sourceId: album.id,
|
|
884
|
+
title: album.name,
|
|
885
|
+
slug: sanitizeSlug(album.slug ?? album.name),
|
|
886
|
+
description: album.description,
|
|
887
|
+
parentSourceId: folder.id
|
|
888
|
+
};
|
|
889
|
+
}
|
|
890
|
+
function* emitNestedAlbumAssets(album, exportedAt) {
|
|
891
|
+
for (let index = 0; index < album.images.length; index++) {
|
|
892
|
+
const image = album.images[index];
|
|
893
|
+
yield {
|
|
894
|
+
type: "asset",
|
|
895
|
+
source: sourceMeta2(image.id, image.originalUrl, exportedAt),
|
|
896
|
+
sourceId: image.id,
|
|
897
|
+
sourceUrl: image.originalUrl,
|
|
898
|
+
filename: image.fileName,
|
|
899
|
+
mimeType: guessMime2(image.fileName),
|
|
900
|
+
caption: image.caption,
|
|
901
|
+
keywords: image.keywords?.length ? image.keywords : void 0,
|
|
902
|
+
exif: normalizeExif(image.exif),
|
|
903
|
+
portfolioSourceId: album.id,
|
|
904
|
+
sort: index
|
|
905
|
+
};
|
|
906
|
+
}
|
|
907
|
+
}
|
|
908
|
+
async function* enumerateNestedExport(doc) {
|
|
909
|
+
const exportedAt = doc.exportedAt;
|
|
910
|
+
for (const folder of doc.folders) {
|
|
911
|
+
yield* emitNestedFolderPortfolio(folder, exportedAt);
|
|
912
|
+
for (const album of folder.albums) {
|
|
913
|
+
yield* emitNestedAlbumPortfolio(folder, album, exportedAt);
|
|
914
|
+
yield* emitNestedAlbumAssets(album, exportedAt);
|
|
915
|
+
}
|
|
916
|
+
}
|
|
917
|
+
}
|
|
918
|
+
async function* enumerateFlatExport(doc) {
|
|
919
|
+
const exportedAt = doc.exportedAt;
|
|
920
|
+
for (const folder of doc.Folders) {
|
|
921
|
+
yield {
|
|
922
|
+
type: "portfolio",
|
|
923
|
+
source: sourceMeta2(folder.sourceId, void 0, exportedAt),
|
|
924
|
+
sourceId: folder.sourceId,
|
|
925
|
+
title: folder.name,
|
|
926
|
+
slug: sanitizeSlug(folder.slug ?? folder.name),
|
|
927
|
+
description: folder.description,
|
|
928
|
+
parentSourceId: folder.parentSourceId
|
|
929
|
+
};
|
|
930
|
+
}
|
|
931
|
+
for (const album of doc.Albums) {
|
|
932
|
+
yield {
|
|
933
|
+
type: "portfolio",
|
|
934
|
+
source: sourceMeta2(album.sourceId, album.url, exportedAt),
|
|
935
|
+
sourceId: album.sourceId,
|
|
936
|
+
title: album.name,
|
|
937
|
+
slug: sanitizeSlug(album.slug ?? album.name),
|
|
938
|
+
description: album.description,
|
|
939
|
+
parentSourceId: album.parentSourceId
|
|
940
|
+
};
|
|
941
|
+
}
|
|
942
|
+
for (const image of doc.Images) {
|
|
943
|
+
const filename = resolveFilename(image);
|
|
944
|
+
yield {
|
|
945
|
+
type: "asset",
|
|
946
|
+
source: sourceMeta2(image.sourceId, image.originalUrl, exportedAt),
|
|
947
|
+
sourceId: image.sourceId,
|
|
948
|
+
sourceUrl: resolveAssetUrl(image),
|
|
949
|
+
filename,
|
|
950
|
+
mimeType: guessMime2(filename),
|
|
951
|
+
caption: image.caption,
|
|
952
|
+
keywords: image.keywords?.length ? image.keywords : void 0,
|
|
953
|
+
exif: normalizeExif(image.exif),
|
|
954
|
+
portfolioSourceId: image.portfolioSourceId,
|
|
955
|
+
sort: image.sort ?? 0
|
|
956
|
+
};
|
|
957
|
+
}
|
|
958
|
+
}
|
|
959
|
+
async function resolveSmugMugDocument(options) {
|
|
960
|
+
if (options.data) return options.data;
|
|
961
|
+
if (options.client) return options.client.crawlExport();
|
|
962
|
+
if (options.credentials) {
|
|
963
|
+
const client = new SmugMugApiClient({ credentials: options.credentials, ...options.clientOptions });
|
|
964
|
+
return client.crawlExport();
|
|
965
|
+
}
|
|
966
|
+
return loadSmugMugExport(options);
|
|
967
|
+
}
|
|
968
|
+
async function* enumerateSmugMugEntities(options) {
|
|
969
|
+
const doc = await resolveSmugMugDocument(options);
|
|
970
|
+
if (isSmugMugFlatExport(doc)) {
|
|
971
|
+
yield* enumerateFlatExport(doc);
|
|
972
|
+
return;
|
|
973
|
+
}
|
|
974
|
+
yield* enumerateNestedExport(doc);
|
|
975
|
+
}
|
|
976
|
+
function summarizeSmugMugExport(doc) {
|
|
977
|
+
if (isSmugMugFlatExport(doc)) {
|
|
978
|
+
return {
|
|
979
|
+
folders: doc.Folders.length,
|
|
980
|
+
albums: doc.Albums.length,
|
|
981
|
+
assets: doc.Images.length,
|
|
982
|
+
portfolios: doc.Folders.length + doc.Albums.length
|
|
983
|
+
};
|
|
984
|
+
}
|
|
985
|
+
const folders = doc.folders.length;
|
|
986
|
+
let albums = 0;
|
|
987
|
+
let assets = 0;
|
|
988
|
+
for (const folder of doc.folders) {
|
|
989
|
+
albums += folder.albums.length;
|
|
990
|
+
for (const album of folder.albums) {
|
|
991
|
+
assets += album.images.length;
|
|
992
|
+
}
|
|
993
|
+
}
|
|
994
|
+
return {
|
|
995
|
+
folders,
|
|
996
|
+
albums,
|
|
997
|
+
assets,
|
|
998
|
+
portfolios: folders + albums
|
|
999
|
+
};
|
|
1000
|
+
}
|
|
1001
|
+
async function validateSmugMugExportFile(filePath) {
|
|
1002
|
+
const issues = [];
|
|
1003
|
+
let doc;
|
|
1004
|
+
try {
|
|
1005
|
+
doc = await loadSmugMugExport({ filePath });
|
|
1006
|
+
} catch (error) {
|
|
1007
|
+
return {
|
|
1008
|
+
ok: false,
|
|
1009
|
+
issues: [
|
|
1010
|
+
{
|
|
1011
|
+
code: "invalid_export",
|
|
1012
|
+
message: error instanceof Error ? error.message : String(error)
|
|
1013
|
+
}
|
|
1014
|
+
],
|
|
1015
|
+
summary: {}
|
|
1016
|
+
};
|
|
1017
|
+
}
|
|
1018
|
+
if (isSmugMugFlatExport(doc)) {
|
|
1019
|
+
if (doc.Folders.length === 0 && doc.Albums.length === 0) {
|
|
1020
|
+
issues.push({ code: "empty_export", message: "No folders or albums in export" });
|
|
1021
|
+
}
|
|
1022
|
+
} else if (doc.folders.length === 0) {
|
|
1023
|
+
issues.push({ code: "empty_export", message: "No folders in export" });
|
|
1024
|
+
}
|
|
1025
|
+
const summary = summarizeSmugMugExport(doc);
|
|
1026
|
+
return {
|
|
1027
|
+
ok: issues.length === 0,
|
|
1028
|
+
issues,
|
|
1029
|
+
summary: {
|
|
1030
|
+
portfolios: summary.portfolios,
|
|
1031
|
+
assets: summary.assets,
|
|
1032
|
+
categories: summary.folders,
|
|
1033
|
+
posts: 0,
|
|
1034
|
+
pages: 0,
|
|
1035
|
+
tags: 0
|
|
1036
|
+
}
|
|
1037
|
+
};
|
|
1038
|
+
}
|
|
1039
|
+
|
|
1040
|
+
// src/parsers/smugmug/index.ts
|
|
1041
|
+
function resolveInput(input) {
|
|
1042
|
+
if (typeof input === "string") return { path: input };
|
|
1043
|
+
if (input && typeof input === "object") {
|
|
1044
|
+
const record = input;
|
|
1045
|
+
if (record.client || record.credentials || record.live) return record;
|
|
1046
|
+
if (record.data) return { data: record.data };
|
|
1047
|
+
if (record.path) return { path: record.path };
|
|
1048
|
+
}
|
|
1049
|
+
throw new Error(
|
|
1050
|
+
"SmugMug adapter requires input path (string or { path }), { data }, { credentials }, { client }, or { live: true }"
|
|
1051
|
+
);
|
|
1052
|
+
}
|
|
1053
|
+
function resolveLiveCredentials(input) {
|
|
1054
|
+
if (input.credentials) return input.credentials;
|
|
1055
|
+
if (input.live) return readSmugMugCredentialsFromEnv();
|
|
1056
|
+
return void 0;
|
|
1057
|
+
}
|
|
1058
|
+
var smugmugAdapter = {
|
|
1059
|
+
platform: "smugmug",
|
|
1060
|
+
async validateInput(input) {
|
|
1061
|
+
try {
|
|
1062
|
+
const resolved = resolveInput(input);
|
|
1063
|
+
const credentials = resolveLiveCredentials(resolved);
|
|
1064
|
+
if (resolved.data) {
|
|
1065
|
+
const summary = summarizeSmugMugExport(resolved.data);
|
|
1066
|
+
return {
|
|
1067
|
+
ok: true,
|
|
1068
|
+
issues: [],
|
|
1069
|
+
summary: {
|
|
1070
|
+
portfolios: summary.portfolios,
|
|
1071
|
+
assets: summary.assets,
|
|
1072
|
+
categories: summary.folders,
|
|
1073
|
+
posts: 0,
|
|
1074
|
+
pages: 0,
|
|
1075
|
+
tags: 0
|
|
1076
|
+
}
|
|
1077
|
+
};
|
|
1078
|
+
}
|
|
1079
|
+
if (resolved.client || credentials) {
|
|
1080
|
+
const client = resolved.client ?? new SmugMugApiClient({ credentials, ...resolved.clientOptions });
|
|
1081
|
+
await client.validateCredentials();
|
|
1082
|
+
const doc = await client.crawlExport();
|
|
1083
|
+
const summary = summarizeSmugMugExport(doc);
|
|
1084
|
+
return {
|
|
1085
|
+
ok: true,
|
|
1086
|
+
issues: [],
|
|
1087
|
+
summary: {
|
|
1088
|
+
portfolios: summary.portfolios,
|
|
1089
|
+
assets: summary.assets,
|
|
1090
|
+
categories: summary.folders,
|
|
1091
|
+
posts: 0,
|
|
1092
|
+
pages: 0,
|
|
1093
|
+
tags: 0
|
|
1094
|
+
}
|
|
1095
|
+
};
|
|
1096
|
+
}
|
|
1097
|
+
const result = await validateSmugMugExportFile(resolved.path);
|
|
1098
|
+
return {
|
|
1099
|
+
ok: result.ok,
|
|
1100
|
+
issues: result.issues,
|
|
1101
|
+
summary: result.summary
|
|
1102
|
+
};
|
|
1103
|
+
} catch (error) {
|
|
1104
|
+
return {
|
|
1105
|
+
ok: false,
|
|
1106
|
+
issues: [
|
|
1107
|
+
{
|
|
1108
|
+
code: "invalid_input",
|
|
1109
|
+
message: error instanceof Error ? error.message : String(error)
|
|
1110
|
+
}
|
|
1111
|
+
]
|
|
1112
|
+
};
|
|
1113
|
+
}
|
|
1114
|
+
},
|
|
1115
|
+
enumerateEntities(ctx) {
|
|
1116
|
+
const resolved = resolveInput(ctx.input);
|
|
1117
|
+
const credentials = resolveLiveCredentials(resolved);
|
|
1118
|
+
return enumerateSmugMugEntities({
|
|
1119
|
+
filePath: resolved.path,
|
|
1120
|
+
data: resolved.data,
|
|
1121
|
+
client: resolved.client,
|
|
1122
|
+
credentials,
|
|
1123
|
+
clientOptions: resolved.clientOptions
|
|
1124
|
+
});
|
|
1125
|
+
}
|
|
1126
|
+
};
|
|
1127
|
+
|
|
1128
|
+
// src/parsers/squarespace/index.ts
|
|
1129
|
+
function resolveInput2(input) {
|
|
1130
|
+
if (typeof input === "string") return { path: input };
|
|
1131
|
+
if (input && typeof input === "object") {
|
|
1132
|
+
const record = input;
|
|
1133
|
+
if (record.client || record.collectTargets) return record;
|
|
1134
|
+
if (record.data) return { data: record.data };
|
|
1135
|
+
if (record.path) return { path: record.path };
|
|
1136
|
+
}
|
|
1137
|
+
throw new Error(
|
|
1138
|
+
"Squarespace adapter requires input path (string or { path }), { data }, { client, collectTargets }, or { collectTargets }"
|
|
1139
|
+
);
|
|
1140
|
+
}
|
|
1141
|
+
var squarespaceAdapter = {
|
|
1142
|
+
platform: "squarespace",
|
|
1143
|
+
async validateInput(input) {
|
|
1144
|
+
try {
|
|
1145
|
+
const resolved = resolveInput2(input);
|
|
1146
|
+
if (resolved.data) {
|
|
1147
|
+
const summary = summarizeSquarespaceExport(resolved.data);
|
|
1148
|
+
return {
|
|
1149
|
+
ok: true,
|
|
1150
|
+
issues: [],
|
|
1151
|
+
summary: {
|
|
1152
|
+
pages: summary.pages,
|
|
1153
|
+
posts: summary.posts,
|
|
1154
|
+
categories: summary.categories,
|
|
1155
|
+
tags: summary.tags
|
|
1156
|
+
}
|
|
1157
|
+
};
|
|
1158
|
+
}
|
|
1159
|
+
if (resolved.client || resolved.collectTargets?.length) {
|
|
1160
|
+
if (!resolved.collectTargets?.length) {
|
|
1161
|
+
throw new Error("Squarespace live validation requires collectTargets");
|
|
1162
|
+
}
|
|
1163
|
+
const client = resolved.client ?? new SquarespaceCollectionClient(resolved.clientOptions);
|
|
1164
|
+
const doc = await client.collectExport(resolved.collectTargets);
|
|
1165
|
+
const summary = summarizeSquarespaceExport(doc);
|
|
1166
|
+
return {
|
|
1167
|
+
ok: true,
|
|
1168
|
+
issues: [],
|
|
1169
|
+
summary: {
|
|
1170
|
+
pages: summary.pages,
|
|
1171
|
+
posts: summary.posts,
|
|
1172
|
+
categories: summary.categories,
|
|
1173
|
+
tags: summary.tags
|
|
1174
|
+
}
|
|
1175
|
+
};
|
|
1176
|
+
}
|
|
1177
|
+
const result = await validateSquarespaceExportFile(resolved.path);
|
|
1178
|
+
return {
|
|
1179
|
+
ok: result.ok,
|
|
1180
|
+
issues: result.issues,
|
|
1181
|
+
summary: result.summary
|
|
1182
|
+
};
|
|
1183
|
+
} catch (error) {
|
|
1184
|
+
return {
|
|
1185
|
+
ok: false,
|
|
1186
|
+
issues: [
|
|
1187
|
+
{
|
|
1188
|
+
code: "invalid_input",
|
|
1189
|
+
message: error instanceof Error ? error.message : String(error)
|
|
1190
|
+
}
|
|
1191
|
+
]
|
|
1192
|
+
};
|
|
1193
|
+
}
|
|
1194
|
+
},
|
|
1195
|
+
enumerateEntities(ctx) {
|
|
1196
|
+
const resolved = resolveInput2(ctx.input);
|
|
1197
|
+
return enumerateSquarespaceEntities({
|
|
1198
|
+
filePath: resolved.path,
|
|
1199
|
+
data: resolved.data,
|
|
1200
|
+
client: resolved.client,
|
|
1201
|
+
collectTargets: resolved.collectTargets,
|
|
1202
|
+
clientOptions: resolved.clientOptions
|
|
1203
|
+
});
|
|
1204
|
+
}
|
|
1205
|
+
};
|
|
1206
|
+
|
|
1207
|
+
// src/parsers/wix/api.ts
|
|
1208
|
+
import { z as z2 } from "zod";
|
|
1209
|
+
|
|
1210
|
+
// src/parsers/wix/ricos-to-html.ts
|
|
1211
|
+
function escapeHtml(text) {
|
|
1212
|
+
return text.replace(/&/g, "&").replace(/</g, "<").replace(/>/g, ">").replace(/"/g, """);
|
|
1213
|
+
}
|
|
1214
|
+
function renderTextNode(node) {
|
|
1215
|
+
const text = node.textData?.text ?? "";
|
|
1216
|
+
let html = escapeHtml(text);
|
|
1217
|
+
for (const decoration of node.textData?.decorations ?? []) {
|
|
1218
|
+
switch (decoration.type) {
|
|
1219
|
+
case "BOLD":
|
|
1220
|
+
html = `<strong>${html}</strong>`;
|
|
1221
|
+
break;
|
|
1222
|
+
case "ITALIC":
|
|
1223
|
+
html = `<em>${html}</em>`;
|
|
1224
|
+
break;
|
|
1225
|
+
case "UNDERLINE":
|
|
1226
|
+
html = `<u>${html}</u>`;
|
|
1227
|
+
break;
|
|
1228
|
+
case "LINK": {
|
|
1229
|
+
const href = decoration.linkData?.link?.url;
|
|
1230
|
+
if (href) html = `<a href="${escapeHtml(href)}">${html}</a>`;
|
|
1231
|
+
break;
|
|
1232
|
+
}
|
|
1233
|
+
default:
|
|
1234
|
+
break;
|
|
1235
|
+
}
|
|
1236
|
+
}
|
|
1237
|
+
return html;
|
|
1238
|
+
}
|
|
1239
|
+
function renderChildren(nodes) {
|
|
1240
|
+
return (nodes ?? []).map((node) => renderRicosNode(node)).join("");
|
|
1241
|
+
}
|
|
1242
|
+
function renderRicosNode(node) {
|
|
1243
|
+
const type = (node.type ?? "").toUpperCase();
|
|
1244
|
+
switch (type) {
|
|
1245
|
+
case "TEXT":
|
|
1246
|
+
return renderTextNode(node);
|
|
1247
|
+
case "PARAGRAPH":
|
|
1248
|
+
return `<p>${renderChildren(node.nodes)}</p>`;
|
|
1249
|
+
case "HEADING": {
|
|
1250
|
+
const level = Math.min(6, Math.max(1, node.headingData?.level ?? 2));
|
|
1251
|
+
return `<h${level}>${renderChildren(node.nodes)}</h${level}>`;
|
|
1252
|
+
}
|
|
1253
|
+
case "BULLETED_LIST":
|
|
1254
|
+
return `<ul>${renderChildren(node.nodes)}</ul>`;
|
|
1255
|
+
case "ORDERED_LIST":
|
|
1256
|
+
return `<ol>${renderChildren(node.nodes)}</ol>`;
|
|
1257
|
+
case "LIST_ITEM":
|
|
1258
|
+
return `<li>${renderChildren(node.nodes)}</li>`;
|
|
1259
|
+
case "BLOCKQUOTE":
|
|
1260
|
+
return `<blockquote>${renderChildren(node.nodes)}</blockquote>`;
|
|
1261
|
+
case "HTML":
|
|
1262
|
+
return node.htmlData?.html ?? "";
|
|
1263
|
+
case "IMAGE": {
|
|
1264
|
+
const src = node.imageData?.image?.src?.url;
|
|
1265
|
+
if (!src) return "";
|
|
1266
|
+
const alt = node.imageData?.image?.altText ? ` alt="${escapeHtml(node.imageData.image.altText)}"` : "";
|
|
1267
|
+
return `<figure><img src="${escapeHtml(src)}"${alt} /></figure>`;
|
|
1268
|
+
}
|
|
1269
|
+
case "BUTTON": {
|
|
1270
|
+
const label = escapeHtml(node.buttonData?.text ?? "Link");
|
|
1271
|
+
const href = node.buttonData?.link?.url ?? node.linkData?.link?.url;
|
|
1272
|
+
if (!href) return `<span>${label}</span>`;
|
|
1273
|
+
return `<p><a href="${escapeHtml(href)}">${label}</a></p>`;
|
|
1274
|
+
}
|
|
1275
|
+
case "CODE_BLOCK":
|
|
1276
|
+
return `<pre><code>${renderChildren(node.nodes)}</code></pre>`;
|
|
1277
|
+
case "DIVIDER":
|
|
1278
|
+
return "<hr />";
|
|
1279
|
+
default:
|
|
1280
|
+
return renderChildren(node.nodes);
|
|
1281
|
+
}
|
|
1282
|
+
}
|
|
1283
|
+
function ricosToHtml(richContent) {
|
|
1284
|
+
if (!richContent || typeof richContent !== "object") return "";
|
|
1285
|
+
const nodes = richContent.nodes;
|
|
1286
|
+
if (!Array.isArray(nodes) || nodes.length === 0) return "";
|
|
1287
|
+
return renderChildren(nodes);
|
|
1288
|
+
}
|
|
1289
|
+
|
|
1290
|
+
// src/parsers/wix/map-wire.ts
|
|
1291
|
+
function isRecord(value) {
|
|
1292
|
+
return !!value && typeof value === "object" && !Array.isArray(value);
|
|
1293
|
+
}
|
|
1294
|
+
function asRecord(value) {
|
|
1295
|
+
return isRecord(value) ? value : void 0;
|
|
1296
|
+
}
|
|
1297
|
+
function asString(value) {
|
|
1298
|
+
return typeof value === "string" && value.length > 0 ? value : void 0;
|
|
1299
|
+
}
|
|
1300
|
+
function asStringArray(value) {
|
|
1301
|
+
if (!Array.isArray(value)) return [];
|
|
1302
|
+
return value.filter((entry) => typeof entry === "string");
|
|
1303
|
+
}
|
|
1304
|
+
function buildWixPageUrl(url) {
|
|
1305
|
+
if (!url) return void 0;
|
|
1306
|
+
const base = asString(url.base);
|
|
1307
|
+
const path = asString(url.path);
|
|
1308
|
+
if (base && path) {
|
|
1309
|
+
return `${base.replace(/\/$/, "")}${path.startsWith("/") ? path : `/${path}`}`;
|
|
1310
|
+
}
|
|
1311
|
+
return base ?? path;
|
|
1312
|
+
}
|
|
1313
|
+
function seoField(seoData, prop) {
|
|
1314
|
+
const tags = asRecord(seoData)?.tags;
|
|
1315
|
+
if (!Array.isArray(tags)) return void 0;
|
|
1316
|
+
for (const tag of tags) {
|
|
1317
|
+
const record = asRecord(tag);
|
|
1318
|
+
if (!record) continue;
|
|
1319
|
+
const props = asRecord(record.props);
|
|
1320
|
+
if (record.type === "title" && prop === "title") {
|
|
1321
|
+
return asString(props?.children) ?? asString(record.children);
|
|
1322
|
+
}
|
|
1323
|
+
if (record.type === "meta" && prop === "description" && props?.name === "description") {
|
|
1324
|
+
return asString(props.content);
|
|
1325
|
+
}
|
|
1326
|
+
}
|
|
1327
|
+
return void 0;
|
|
1328
|
+
}
|
|
1329
|
+
function postContentHtml(post) {
|
|
1330
|
+
const richHtml = ricosToHtml(post.richContent);
|
|
1331
|
+
if (richHtml.trim()) return richHtml;
|
|
1332
|
+
const plain = asString(post.contentText);
|
|
1333
|
+
if (!plain) return "";
|
|
1334
|
+
return `<p>${plain.replace(/\n\n+/g, "</p><p>").replace(/\n/g, "<br />")}</p>`;
|
|
1335
|
+
}
|
|
1336
|
+
function mapWireCategory(wire, exportedAt) {
|
|
1337
|
+
const record = asRecord(wire);
|
|
1338
|
+
if (!record) return void 0;
|
|
1339
|
+
const id = asString(record.id);
|
|
1340
|
+
const name = asString(record.label) ?? asString(record.title);
|
|
1341
|
+
if (!id || !name) return void 0;
|
|
1342
|
+
const slug = sanitizeSlug(asString(record.slug) ?? name);
|
|
1343
|
+
if (!slug) return void 0;
|
|
1344
|
+
void exportedAt;
|
|
1345
|
+
return { id, name, slug };
|
|
1346
|
+
}
|
|
1347
|
+
function mapWireTag(wire) {
|
|
1348
|
+
const record = asRecord(wire);
|
|
1349
|
+
if (!record) return void 0;
|
|
1350
|
+
const id = asString(record.id);
|
|
1351
|
+
const name = asString(record.label) ?? asString(record.slug);
|
|
1352
|
+
if (!id || !name) return void 0;
|
|
1353
|
+
const slug = sanitizeSlug(asString(record.slug) ?? name);
|
|
1354
|
+
if (!slug) return void 0;
|
|
1355
|
+
return { id, name, slug };
|
|
1356
|
+
}
|
|
1357
|
+
function mapWirePost(wire, lookup) {
|
|
1358
|
+
const record = asRecord(wire);
|
|
1359
|
+
if (!record) return void 0;
|
|
1360
|
+
const id = asString(record.id);
|
|
1361
|
+
const title = asString(record.title) ?? "Untitled";
|
|
1362
|
+
if (!id) return void 0;
|
|
1363
|
+
const slug = sanitizeSlug(asString(record.slug) ?? title);
|
|
1364
|
+
const url = buildWixPageUrl(asRecord(record.url));
|
|
1365
|
+
const heroImage = asRecord(record.heroImage);
|
|
1366
|
+
const featuredImageUrl = asString(heroImage?.url);
|
|
1367
|
+
const categorySlugs = asStringArray(record.categoryIds).map((categoryId) => lookup.categorySlugsById.get(categoryId)).filter((slugValue) => !!slugValue);
|
|
1368
|
+
const tagSlugs = asStringArray(record.tagIds).map((tagId) => lookup.tagSlugsById.get(tagId)).filter((slugValue) => !!slugValue);
|
|
1369
|
+
for (const hashtag of asStringArray(record.hashtags)) {
|
|
1370
|
+
const tagSlug = sanitizeSlug(hashtag);
|
|
1371
|
+
if (tagSlug && !tagSlugs.includes(tagSlug)) tagSlugs.push(tagSlug);
|
|
1372
|
+
}
|
|
1373
|
+
return {
|
|
1374
|
+
id,
|
|
1375
|
+
title,
|
|
1376
|
+
slug,
|
|
1377
|
+
url,
|
|
1378
|
+
excerpt: asString(record.excerpt),
|
|
1379
|
+
contentHtml: postContentHtml(record),
|
|
1380
|
+
publishedAt: asString(record.firstPublishedDate) ?? asString(record.lastPublishedDate),
|
|
1381
|
+
status: "published",
|
|
1382
|
+
categorySlugs,
|
|
1383
|
+
tagSlugs,
|
|
1384
|
+
featuredImageUrl,
|
|
1385
|
+
seoTitle: seoField(record.seoData, "title"),
|
|
1386
|
+
seoDescription: seoField(record.seoData, "description")
|
|
1387
|
+
};
|
|
1388
|
+
}
|
|
1389
|
+
function mapWireListPostsResponse(wire, lookup) {
|
|
1390
|
+
const posts = asRecord(wire)?.posts;
|
|
1391
|
+
if (!Array.isArray(posts)) return [];
|
|
1392
|
+
return posts.map((entry) => mapWirePost(entry, lookup)).filter((post) => !!post);
|
|
1393
|
+
}
|
|
1394
|
+
function mapWireListCategoriesResponse(wire) {
|
|
1395
|
+
const categories = asRecord(wire)?.categories;
|
|
1396
|
+
if (!Array.isArray(categories)) return [];
|
|
1397
|
+
return categories.map((entry) => mapWireCategory(entry)).filter((category) => !!category);
|
|
1398
|
+
}
|
|
1399
|
+
function mapWireListTagsResponse(wire) {
|
|
1400
|
+
const tags = asRecord(wire)?.tags;
|
|
1401
|
+
if (!Array.isArray(tags)) return [];
|
|
1402
|
+
return tags.map((entry) => mapWireTag(entry)).filter((tag) => !!tag);
|
|
1403
|
+
}
|
|
1404
|
+
|
|
1405
|
+
// src/parsers/wix/api.ts
|
|
1406
|
+
var WIX_API_BASE = "https://www.wixapis.com";
|
|
1407
|
+
var wixAuthContextSchema = z2.object({
|
|
1408
|
+
/** Full Authorization header value (API key or Bearer token). */
|
|
1409
|
+
authorization: z2.string().min(1),
|
|
1410
|
+
siteId: z2.string().min(1),
|
|
1411
|
+
accountId: z2.string().optional(),
|
|
1412
|
+
extraHeaders: z2.record(z2.string()).optional()
|
|
1413
|
+
});
|
|
1414
|
+
var wixClientOptionsSchema = z2.object({
|
|
1415
|
+
auth: wixAuthContextSchema,
|
|
1416
|
+
pageSize: z2.number().int().min(1).max(100).default(50),
|
|
1417
|
+
maxRetries: z2.number().int().min(0).max(10).default(3),
|
|
1418
|
+
retryBaseDelayMs: z2.number().int().min(0).default(500),
|
|
1419
|
+
maxRetryDelayMs: z2.number().int().min(0).default(8e3),
|
|
1420
|
+
requestIntervalMs: z2.number().int().min(0).default(200),
|
|
1421
|
+
fetchImpl: z2.custom().optional(),
|
|
1422
|
+
/** Include draft posts when the API key has permission. */
|
|
1423
|
+
includeDrafts: z2.boolean().default(false)
|
|
1424
|
+
});
|
|
1425
|
+
function sleep2(ms) {
|
|
1426
|
+
return new Promise((resolve) => setTimeout(resolve, ms));
|
|
1427
|
+
}
|
|
1428
|
+
function dedupeById(items) {
|
|
1429
|
+
const seen = /* @__PURE__ */ new Map();
|
|
1430
|
+
for (const item of items) seen.set(item.id, item);
|
|
1431
|
+
return [...seen.values()];
|
|
1432
|
+
}
|
|
1433
|
+
function dedupeBySlug(items) {
|
|
1434
|
+
const seen = /* @__PURE__ */ new Map();
|
|
1435
|
+
for (const item of items) seen.set(item.slug, item);
|
|
1436
|
+
return [...seen.values()];
|
|
1437
|
+
}
|
|
1438
|
+
function pagingComplete(items, paging, pageSize) {
|
|
1439
|
+
if (!Array.isArray(items) || items.length === 0) return true;
|
|
1440
|
+
if (items.length < pageSize) return true;
|
|
1441
|
+
if (typeof paging?.total === "number" && typeof paging.offset === "number") {
|
|
1442
|
+
return paging.offset + items.length >= paging.total;
|
|
1443
|
+
}
|
|
1444
|
+
return false;
|
|
1445
|
+
}
|
|
1446
|
+
var WixCollectionClient = class {
|
|
1447
|
+
auth;
|
|
1448
|
+
pageSize;
|
|
1449
|
+
maxRetries;
|
|
1450
|
+
retryBaseDelayMs;
|
|
1451
|
+
maxRetryDelayMs;
|
|
1452
|
+
requestIntervalMs;
|
|
1453
|
+
fetchImpl;
|
|
1454
|
+
includeDrafts;
|
|
1455
|
+
lastRequestAt = 0;
|
|
1456
|
+
constructor(options) {
|
|
1457
|
+
const parsed = wixClientOptionsSchema.parse(options);
|
|
1458
|
+
this.auth = parsed.auth;
|
|
1459
|
+
this.pageSize = parsed.pageSize;
|
|
1460
|
+
this.maxRetries = parsed.maxRetries;
|
|
1461
|
+
this.retryBaseDelayMs = parsed.retryBaseDelayMs;
|
|
1462
|
+
this.maxRetryDelayMs = parsed.maxRetryDelayMs;
|
|
1463
|
+
this.requestIntervalMs = parsed.requestIntervalMs;
|
|
1464
|
+
this.fetchImpl = parsed.fetchImpl ?? fetch;
|
|
1465
|
+
this.includeDrafts = parsed.includeDrafts;
|
|
1466
|
+
}
|
|
1467
|
+
buildUrl(path, query) {
|
|
1468
|
+
const url = new URL(path.startsWith("http") ? path : `${WIX_API_BASE}${path}`);
|
|
1469
|
+
if (query) {
|
|
1470
|
+
for (const [key, value] of Object.entries(query)) {
|
|
1471
|
+
if (value === void 0) continue;
|
|
1472
|
+
url.searchParams.set(key, String(value));
|
|
1473
|
+
}
|
|
1474
|
+
}
|
|
1475
|
+
return url.toString();
|
|
1476
|
+
}
|
|
1477
|
+
async fetchJson(path, query) {
|
|
1478
|
+
const response = await this.requestWithRetry(this.buildUrl(path, query));
|
|
1479
|
+
return response.json();
|
|
1480
|
+
}
|
|
1481
|
+
async listAllCategories() {
|
|
1482
|
+
const categories = [];
|
|
1483
|
+
let offset = 0;
|
|
1484
|
+
while (true) {
|
|
1485
|
+
const wire = await this.fetchJson("/blog/v3/categories", {
|
|
1486
|
+
"paging.limit": this.pageSize,
|
|
1487
|
+
"paging.offset": offset,
|
|
1488
|
+
fieldsets: "URL"
|
|
1489
|
+
});
|
|
1490
|
+
const batch = mapWireListCategoriesResponse(wire);
|
|
1491
|
+
categories.push(...batch);
|
|
1492
|
+
const paging = wire.pagingMetadata;
|
|
1493
|
+
if (pagingComplete(batch, paging, this.pageSize)) break;
|
|
1494
|
+
offset += batch.length;
|
|
1495
|
+
if (batch.length === 0) break;
|
|
1496
|
+
}
|
|
1497
|
+
return dedupeBySlug(categories);
|
|
1498
|
+
}
|
|
1499
|
+
async listAllTags() {
|
|
1500
|
+
const tags = [];
|
|
1501
|
+
let offset = 0;
|
|
1502
|
+
while (true) {
|
|
1503
|
+
const wire = await this.fetchJson("/blog/v3/tags", {
|
|
1504
|
+
"paging.limit": this.pageSize,
|
|
1505
|
+
"paging.offset": offset
|
|
1506
|
+
});
|
|
1507
|
+
const batch = mapWireListTagsResponse(wire);
|
|
1508
|
+
tags.push(...batch);
|
|
1509
|
+
const paging = wire.pagingMetadata;
|
|
1510
|
+
if (pagingComplete(batch, paging, this.pageSize)) break;
|
|
1511
|
+
offset += batch.length;
|
|
1512
|
+
if (batch.length === 0) break;
|
|
1513
|
+
}
|
|
1514
|
+
return dedupeBySlug(tags);
|
|
1515
|
+
}
|
|
1516
|
+
async listAllPosts(lookup) {
|
|
1517
|
+
const posts = [];
|
|
1518
|
+
let offset = 0;
|
|
1519
|
+
while (true) {
|
|
1520
|
+
const wire = await this.fetchJson("/blog/v3/posts", {
|
|
1521
|
+
"paging.limit": this.pageSize,
|
|
1522
|
+
"paging.offset": offset,
|
|
1523
|
+
fieldsets: "URL,RICH_CONTENT,SEO",
|
|
1524
|
+
sort: "PUBLISHED_DATE_DESC"
|
|
1525
|
+
});
|
|
1526
|
+
const batch = mapWireListPostsResponse(wire, lookup);
|
|
1527
|
+
posts.push(...batch);
|
|
1528
|
+
const paging = wire.pagingMetadata;
|
|
1529
|
+
if (pagingComplete(batch, paging, this.pageSize)) break;
|
|
1530
|
+
offset += batch.length;
|
|
1531
|
+
if (batch.length === 0) break;
|
|
1532
|
+
}
|
|
1533
|
+
if (this.includeDrafts) {
|
|
1534
|
+
posts.push(...await this.listDraftPosts(lookup));
|
|
1535
|
+
}
|
|
1536
|
+
return dedupeById(posts);
|
|
1537
|
+
}
|
|
1538
|
+
async listDraftPosts(lookup) {
|
|
1539
|
+
const posts = [];
|
|
1540
|
+
let offset = 0;
|
|
1541
|
+
while (true) {
|
|
1542
|
+
const wire = await this.fetchJson("/blog/v3/draft-posts", {
|
|
1543
|
+
"paging.limit": this.pageSize,
|
|
1544
|
+
"paging.offset": offset,
|
|
1545
|
+
fieldsets: "URL,RICH_CONTENT,SEO"
|
|
1546
|
+
});
|
|
1547
|
+
const batch = mapWireListPostsResponse(
|
|
1548
|
+
{ posts: wire.draftPosts ?? [] },
|
|
1549
|
+
lookup
|
|
1550
|
+
).map((post) => ({ ...post, status: "draft" }));
|
|
1551
|
+
posts.push(...batch);
|
|
1552
|
+
const paging = wire.pagingMetadata;
|
|
1553
|
+
if (pagingComplete(batch, paging, this.pageSize)) break;
|
|
1554
|
+
offset += batch.length;
|
|
1555
|
+
if (batch.length === 0) break;
|
|
1556
|
+
}
|
|
1557
|
+
return posts;
|
|
1558
|
+
}
|
|
1559
|
+
async collectExport() {
|
|
1560
|
+
const categories = await this.listAllCategories();
|
|
1561
|
+
const tags = await this.listAllTags();
|
|
1562
|
+
const categorySlugsById = new Map(categories.map((category) => [category.id, category.slug]));
|
|
1563
|
+
const tagSlugsById = new Map(tags.map((tag) => [tag.id, tag.slug]));
|
|
1564
|
+
const posts = await this.listAllPosts({ categorySlugsById, tagSlugsById });
|
|
1565
|
+
return {
|
|
1566
|
+
exportVersion: 1,
|
|
1567
|
+
exportedAt: (/* @__PURE__ */ new Date()).toISOString(),
|
|
1568
|
+
site: { siteId: this.auth.siteId },
|
|
1569
|
+
posts,
|
|
1570
|
+
pages: [],
|
|
1571
|
+
categories,
|
|
1572
|
+
tags
|
|
1573
|
+
};
|
|
1574
|
+
}
|
|
1575
|
+
buildHeaders() {
|
|
1576
|
+
const headers = {
|
|
1577
|
+
Accept: "application/json",
|
|
1578
|
+
Authorization: this.auth.authorization,
|
|
1579
|
+
"wix-site-id": this.auth.siteId,
|
|
1580
|
+
...this.auth.extraHeaders ?? {}
|
|
1581
|
+
};
|
|
1582
|
+
if (this.auth.accountId) {
|
|
1583
|
+
headers["wix-account-id"] = this.auth.accountId;
|
|
1584
|
+
}
|
|
1585
|
+
return headers;
|
|
1586
|
+
}
|
|
1587
|
+
async requestWithRetry(url) {
|
|
1588
|
+
let attempt = 0;
|
|
1589
|
+
while (true) {
|
|
1590
|
+
await this.throttle();
|
|
1591
|
+
const response = await this.fetchImpl(url, {
|
|
1592
|
+
method: "GET",
|
|
1593
|
+
headers: this.buildHeaders()
|
|
1594
|
+
});
|
|
1595
|
+
if (response.ok) return response;
|
|
1596
|
+
const retryable = response.status === 429 || response.status >= 500;
|
|
1597
|
+
if (!retryable || attempt >= this.maxRetries) {
|
|
1598
|
+
const detail = await response.text().catch(() => "");
|
|
1599
|
+
throw new Error(
|
|
1600
|
+
`Wix HTTP ${response.status}${detail ? `: ${detail.slice(0, 200)}` : ""}`
|
|
1601
|
+
);
|
|
1602
|
+
}
|
|
1603
|
+
const retryAfter = Number.parseInt(response.headers.get("retry-after") ?? "", 10);
|
|
1604
|
+
const delay = Number.isFinite(retryAfter) ? retryAfter * 1e3 : Math.min(this.maxRetryDelayMs, this.retryBaseDelayMs * 2 ** attempt);
|
|
1605
|
+
await sleep2(delay);
|
|
1606
|
+
attempt += 1;
|
|
1607
|
+
}
|
|
1608
|
+
}
|
|
1609
|
+
async throttle() {
|
|
1610
|
+
if (this.requestIntervalMs <= 0) return;
|
|
1611
|
+
const elapsed = Date.now() - this.lastRequestAt;
|
|
1612
|
+
if (elapsed < this.requestIntervalMs) {
|
|
1613
|
+
await sleep2(this.requestIntervalMs - elapsed);
|
|
1614
|
+
}
|
|
1615
|
+
this.lastRequestAt = Date.now();
|
|
1616
|
+
}
|
|
1617
|
+
};
|
|
1618
|
+
function isWixExport(value) {
|
|
1619
|
+
if (!value || typeof value !== "object") return false;
|
|
1620
|
+
const record = value;
|
|
1621
|
+
return record.exportVersion === 1 && (Array.isArray(record.posts) || Array.isArray(record.pages));
|
|
1622
|
+
}
|
|
1623
|
+
function assertWixExport(value) {
|
|
1624
|
+
if (!isWixExport(value)) {
|
|
1625
|
+
throw new Error("Invalid Wix export: expected exportVersion 1 with posts[] and/or pages[]");
|
|
1626
|
+
}
|
|
1627
|
+
if ((value.posts?.length ?? 0) === 0 && (value.pages?.length ?? 0) === 0) {
|
|
1628
|
+
throw new Error("Invalid Wix export: no posts or pages");
|
|
1629
|
+
}
|
|
1630
|
+
for (const post of value.posts ?? []) {
|
|
1631
|
+
if (!post.slug) post.slug = sanitizeSlug(post.title);
|
|
1632
|
+
}
|
|
1633
|
+
for (const page of value.pages ?? []) {
|
|
1634
|
+
if (!page.slug) page.slug = sanitizeSlug(page.title);
|
|
1635
|
+
}
|
|
1636
|
+
return value;
|
|
1637
|
+
}
|
|
1638
|
+
|
|
1639
|
+
// src/parsers/wix/snapshot.ts
|
|
1640
|
+
import * as cheerio from "cheerio";
|
|
1641
|
+
import { readFile as readFile3 } from "fs/promises";
|
|
1642
|
+
import { XMLParser as XMLParser2 } from "fast-xml-parser";
|
|
1643
|
+
import { z as z3 } from "zod";
|
|
1644
|
+
var MAIN_CONTENT_SELECTORS = [
|
|
1645
|
+
"main",
|
|
1646
|
+
"article",
|
|
1647
|
+
'[role="main"]',
|
|
1648
|
+
"#SITE_PAGES main",
|
|
1649
|
+
"#site-root main",
|
|
1650
|
+
"#PAGES_CONTAINER",
|
|
1651
|
+
"body"
|
|
1652
|
+
];
|
|
1653
|
+
var wixSnapshotClientOptionsSchema = z3.object({
|
|
1654
|
+
fetchImpl: z3.custom().optional(),
|
|
1655
|
+
maxRetries: z3.number().int().min(0).max(5).default(2),
|
|
1656
|
+
retryBaseDelayMs: z3.number().int().min(0).default(300),
|
|
1657
|
+
requestIntervalMs: z3.number().int().min(0).default(150)
|
|
1658
|
+
});
|
|
1659
|
+
function sleep3(ms) {
|
|
1660
|
+
return new Promise((resolve) => setTimeout(resolve, ms));
|
|
1661
|
+
}
|
|
1662
|
+
function slugFromUrl(url, fallback) {
|
|
1663
|
+
try {
|
|
1664
|
+
const segments = new URL(url).pathname.split("/").filter(Boolean);
|
|
1665
|
+
const last = segments.at(-1);
|
|
1666
|
+
if (last) return sanitizeSlug(last);
|
|
1667
|
+
} catch {
|
|
1668
|
+
}
|
|
1669
|
+
return sanitizeSlug(fallback);
|
|
1670
|
+
}
|
|
1671
|
+
function looksLikeLoginWall($, html) {
|
|
1672
|
+
const lower = html.toLowerCase();
|
|
1673
|
+
if (lower.includes("members-login") || lower.includes("login-bar")) return true;
|
|
1674
|
+
if ($('input[type="password"]').length > 0 && $('form[action*="login"]').length > 0) return true;
|
|
1675
|
+
if ($('[data-testid="sign-in"]').length > 0) return true;
|
|
1676
|
+
return false;
|
|
1677
|
+
}
|
|
1678
|
+
function pickMainRoot($) {
|
|
1679
|
+
for (const selector of MAIN_CONTENT_SELECTORS) {
|
|
1680
|
+
const match = $(selector).first();
|
|
1681
|
+
if (match.length > 0) return match;
|
|
1682
|
+
}
|
|
1683
|
+
return $("body");
|
|
1684
|
+
}
|
|
1685
|
+
function extractMainContentHtml(html) {
|
|
1686
|
+
const $ = cheerio.load(html, { xml: false });
|
|
1687
|
+
const loginWall = looksLikeLoginWall($, html);
|
|
1688
|
+
const title = $("title").first().text().trim() || $('meta[property="og:title"]').attr("content")?.trim() || $("h1").first().text().trim() || void 0;
|
|
1689
|
+
const root = pickMainRoot($);
|
|
1690
|
+
root.find("script, style, noscript, nav, header, footer, iframe").remove();
|
|
1691
|
+
const contentHtml = root.html()?.trim() ?? "";
|
|
1692
|
+
const textOnly = root.text().replace(/\s+/g, " ").trim();
|
|
1693
|
+
const empty = textOnly.length < 20;
|
|
1694
|
+
return { contentHtml, title, empty, loginWall };
|
|
1695
|
+
}
|
|
1696
|
+
function parseUrlList(raw) {
|
|
1697
|
+
return raw.split(/[\n,]+/).map((entry) => entry.trim()).filter((entry) => entry.length > 0 && /^https?:\/\//i.test(entry));
|
|
1698
|
+
}
|
|
1699
|
+
function parseSitemapUrls(xml) {
|
|
1700
|
+
const parser = new XMLParser2({ ignoreAttributes: true, removeNSPrefix: true });
|
|
1701
|
+
const doc = parser.parse(xml);
|
|
1702
|
+
const urls = [];
|
|
1703
|
+
const urlEntries = doc.urlset?.url;
|
|
1704
|
+
if (urlEntries) {
|
|
1705
|
+
const list = Array.isArray(urlEntries) ? urlEntries : [urlEntries];
|
|
1706
|
+
for (const entry of list) {
|
|
1707
|
+
if (entry.loc) urls.push(entry.loc.trim());
|
|
1708
|
+
}
|
|
1709
|
+
}
|
|
1710
|
+
const sitemapEntries = doc.sitemapindex?.sitemap;
|
|
1711
|
+
if (sitemapEntries) {
|
|
1712
|
+
const list = Array.isArray(sitemapEntries) ? sitemapEntries : [sitemapEntries];
|
|
1713
|
+
for (const entry of list) {
|
|
1714
|
+
if (entry.loc) urls.push(entry.loc.trim());
|
|
1715
|
+
}
|
|
1716
|
+
}
|
|
1717
|
+
return [...new Set(urls.filter((url) => /^https?:\/\//i.test(url)))];
|
|
1718
|
+
}
|
|
1719
|
+
async function loadUrlListFile(filePath) {
|
|
1720
|
+
const raw = await readFile3(filePath, "utf8");
|
|
1721
|
+
if (raw.trim().startsWith("<")) return parseSitemapUrls(raw);
|
|
1722
|
+
return parseUrlList(raw);
|
|
1723
|
+
}
|
|
1724
|
+
var WixPageSnapshotCollector = class {
|
|
1725
|
+
fetchImpl;
|
|
1726
|
+
maxRetries;
|
|
1727
|
+
retryBaseDelayMs;
|
|
1728
|
+
requestIntervalMs;
|
|
1729
|
+
lastRequestAt = 0;
|
|
1730
|
+
constructor(options = {}) {
|
|
1731
|
+
const parsed = wixSnapshotClientOptionsSchema.parse(options);
|
|
1732
|
+
this.fetchImpl = parsed.fetchImpl ?? fetch;
|
|
1733
|
+
this.maxRetries = parsed.maxRetries;
|
|
1734
|
+
this.retryBaseDelayMs = parsed.retryBaseDelayMs;
|
|
1735
|
+
this.requestIntervalMs = parsed.requestIntervalMs;
|
|
1736
|
+
}
|
|
1737
|
+
async collectPages(targets) {
|
|
1738
|
+
const pages = [];
|
|
1739
|
+
const gaps = [];
|
|
1740
|
+
for (const target of targets) {
|
|
1741
|
+
let html;
|
|
1742
|
+
try {
|
|
1743
|
+
html = target.html ?? await this.fetchHtml(target.url);
|
|
1744
|
+
} catch (error) {
|
|
1745
|
+
gaps.push({
|
|
1746
|
+
url: target.url,
|
|
1747
|
+
code: "fetch_failed",
|
|
1748
|
+
message: error instanceof Error ? error.message : String(error)
|
|
1749
|
+
});
|
|
1750
|
+
continue;
|
|
1751
|
+
}
|
|
1752
|
+
const extracted = extractMainContentHtml(html);
|
|
1753
|
+
if (extracted.loginWall) {
|
|
1754
|
+
gaps.push({
|
|
1755
|
+
url: target.url,
|
|
1756
|
+
code: "login_wall",
|
|
1757
|
+
message: "Page appears to require authentication"
|
|
1758
|
+
});
|
|
1759
|
+
continue;
|
|
1760
|
+
}
|
|
1761
|
+
if (extracted.empty) {
|
|
1762
|
+
gaps.push({
|
|
1763
|
+
url: target.url,
|
|
1764
|
+
code: "empty_extract",
|
|
1765
|
+
message: "No meaningful content found in main/article containers"
|
|
1766
|
+
});
|
|
1767
|
+
continue;
|
|
1768
|
+
}
|
|
1769
|
+
const title = target.title ?? extracted.title ?? "Untitled";
|
|
1770
|
+
const slug = target.slug ?? slugFromUrl(target.url, title);
|
|
1771
|
+
pages.push({
|
|
1772
|
+
id: `page:${slug}`,
|
|
1773
|
+
title,
|
|
1774
|
+
slug,
|
|
1775
|
+
url: target.url,
|
|
1776
|
+
contentHtml: extracted.contentHtml,
|
|
1777
|
+
isHomePage: target.isHomePage,
|
|
1778
|
+
status: "published"
|
|
1779
|
+
});
|
|
1780
|
+
}
|
|
1781
|
+
return { pages, gaps };
|
|
1782
|
+
}
|
|
1783
|
+
async collectFromUrlList(urls) {
|
|
1784
|
+
return this.collectPages(
|
|
1785
|
+
urls.map((url, index) => ({
|
|
1786
|
+
url,
|
|
1787
|
+
isHomePage: index === 0 && new URL(url).pathname === "/"
|
|
1788
|
+
}))
|
|
1789
|
+
);
|
|
1790
|
+
}
|
|
1791
|
+
async fetchHtml(url) {
|
|
1792
|
+
let attempt = 0;
|
|
1793
|
+
while (true) {
|
|
1794
|
+
await this.throttle();
|
|
1795
|
+
const response = await this.fetchImpl(url, {
|
|
1796
|
+
method: "GET",
|
|
1797
|
+
headers: { Accept: "text/html,application/xhtml+xml" }
|
|
1798
|
+
});
|
|
1799
|
+
if (response.ok) {
|
|
1800
|
+
return response.text();
|
|
1801
|
+
}
|
|
1802
|
+
const retryable = response.status === 429 || response.status >= 500;
|
|
1803
|
+
if (!retryable || attempt >= this.maxRetries) {
|
|
1804
|
+
throw new Error(`Snapshot fetch HTTP ${response.status} for ${url}`);
|
|
1805
|
+
}
|
|
1806
|
+
await sleep3(this.retryBaseDelayMs * 2 ** attempt);
|
|
1807
|
+
attempt += 1;
|
|
1808
|
+
}
|
|
1809
|
+
}
|
|
1810
|
+
async throttle() {
|
|
1811
|
+
if (this.requestIntervalMs <= 0) return;
|
|
1812
|
+
const elapsed = Date.now() - this.lastRequestAt;
|
|
1813
|
+
if (elapsed < this.requestIntervalMs) {
|
|
1814
|
+
await sleep3(this.requestIntervalMs - elapsed);
|
|
1815
|
+
}
|
|
1816
|
+
this.lastRequestAt = Date.now();
|
|
1817
|
+
}
|
|
1818
|
+
};
|
|
1819
|
+
|
|
1820
|
+
// src/parsers/wix/parse-export.ts
|
|
1821
|
+
import { readFile as readFile4 } from "fs/promises";
|
|
1822
|
+
import { basename as basename2, extname } from "path";
|
|
1823
|
+
import { XMLParser as XMLParser3 } from "fast-xml-parser";
|
|
1824
|
+
var PLATFORM3 = "wix";
|
|
1825
|
+
function asArray2(value) {
|
|
1826
|
+
if (value === void 0) return [];
|
|
1827
|
+
return Array.isArray(value) ? value : [value];
|
|
1828
|
+
}
|
|
1829
|
+
function textValue2(value) {
|
|
1830
|
+
if (value === void 0 || value === null) return "";
|
|
1831
|
+
if (typeof value === "string" || typeof value === "number") return String(value);
|
|
1832
|
+
if (typeof value === "object" && value !== null && "#text" in value) {
|
|
1833
|
+
return String(value["#text"] ?? "");
|
|
1834
|
+
}
|
|
1835
|
+
return String(value);
|
|
1836
|
+
}
|
|
1837
|
+
function sourceMeta3(id, url, exportedAt) {
|
|
1838
|
+
return {
|
|
1839
|
+
platform: PLATFORM3,
|
|
1840
|
+
id,
|
|
1841
|
+
url: url || void 0,
|
|
1842
|
+
path: linkToPath(url),
|
|
1843
|
+
exportedAt
|
|
1844
|
+
};
|
|
1845
|
+
}
|
|
1846
|
+
function guessMime3(filename) {
|
|
1847
|
+
const ext = filename.split(".").pop()?.toLowerCase();
|
|
1848
|
+
const map = {
|
|
1849
|
+
jpg: "image/jpeg",
|
|
1850
|
+
jpeg: "image/jpeg",
|
|
1851
|
+
png: "image/png",
|
|
1852
|
+
gif: "image/gif",
|
|
1853
|
+
webp: "image/webp",
|
|
1854
|
+
avif: "image/avif"
|
|
1855
|
+
};
|
|
1856
|
+
return ext ? map[ext] : void 0;
|
|
1857
|
+
}
|
|
1858
|
+
function parseXmlDocument(xml) {
|
|
1859
|
+
const parser = new XMLParser3({
|
|
1860
|
+
ignoreAttributes: false,
|
|
1861
|
+
attributeNamePrefix: "@_",
|
|
1862
|
+
removeNSPrefix: true,
|
|
1863
|
+
trimValues: false,
|
|
1864
|
+
parseTagValue: false
|
|
1865
|
+
});
|
|
1866
|
+
return parser.parse(xml);
|
|
1867
|
+
}
|
|
1868
|
+
function detectWixFeedFormat(xml) {
|
|
1869
|
+
const trimmed = xml.trim();
|
|
1870
|
+
if (trimmed.includes("<feed") || trimmed.startsWith("<feed")) return "atom";
|
|
1871
|
+
return "rss";
|
|
1872
|
+
}
|
|
1873
|
+
function getItemContentHtml(item) {
|
|
1874
|
+
const content = item.content ?? item.content;
|
|
1875
|
+
if (content !== void 0) {
|
|
1876
|
+
if (typeof content === "string") return content;
|
|
1877
|
+
const block = content;
|
|
1878
|
+
if (block["#text"] !== void 0) return textValue2(block["#text"]);
|
|
1879
|
+
if (block.encoded !== void 0) return textValue2(block.encoded);
|
|
1880
|
+
}
|
|
1881
|
+
const rssItem = item;
|
|
1882
|
+
if (rssItem.encoded !== void 0) return textValue2(rssItem.encoded);
|
|
1883
|
+
return textValue2(rssItem.description ?? item.summary);
|
|
1884
|
+
}
|
|
1885
|
+
function slugFromLink(link, title, fallbackId) {
|
|
1886
|
+
try {
|
|
1887
|
+
const pathname = new URL(link).pathname;
|
|
1888
|
+
const segments = pathname.split("/").filter(Boolean);
|
|
1889
|
+
const last = segments.at(-1);
|
|
1890
|
+
if (last) return sanitizeSlug(last);
|
|
1891
|
+
} catch {
|
|
1892
|
+
}
|
|
1893
|
+
return sanitizeSlug(title || fallbackId);
|
|
1894
|
+
}
|
|
1895
|
+
function itemLink(item) {
|
|
1896
|
+
const rssLink = textValue2(item.link);
|
|
1897
|
+
if (rssLink) return rssLink;
|
|
1898
|
+
for (const link of asArray2(item.link)) {
|
|
1899
|
+
if (typeof link === "string" && link) return link;
|
|
1900
|
+
if (typeof link === "object" && link !== null) {
|
|
1901
|
+
const rel = link["@_rel"];
|
|
1902
|
+
const href = link["@_href"];
|
|
1903
|
+
if (href && (!rel || rel === "alternate")) return href;
|
|
1904
|
+
}
|
|
1905
|
+
}
|
|
1906
|
+
return "";
|
|
1907
|
+
}
|
|
1908
|
+
function itemSourceId(item, link, slug) {
|
|
1909
|
+
const guid = textValue2(item.guid);
|
|
1910
|
+
if (guid) return guid;
|
|
1911
|
+
const atomId = textValue2(item.id);
|
|
1912
|
+
if (atomId) return atomId;
|
|
1913
|
+
if (link) return link;
|
|
1914
|
+
return slug;
|
|
1915
|
+
}
|
|
1916
|
+
function itemPublishedAt(item) {
|
|
1917
|
+
const pubDate = textValue2(item.pubDate) || textValue2(item.published) || textValue2(item.updated) || textValue2(item.published);
|
|
1918
|
+
return pubDate || void 0;
|
|
1919
|
+
}
|
|
1920
|
+
function normalizeCategoryLabel(category) {
|
|
1921
|
+
if (typeof category === "string") {
|
|
1922
|
+
return { label: category.trim() };
|
|
1923
|
+
}
|
|
1924
|
+
return {
|
|
1925
|
+
domain: category["@_domain"]?.toLowerCase(),
|
|
1926
|
+
label: textValue2(category["#text"] ?? category["@_term"]).trim()
|
|
1927
|
+
};
|
|
1928
|
+
}
|
|
1929
|
+
function collectTaxonomiesFromItems(items) {
|
|
1930
|
+
const categories = /* @__PURE__ */ new Map();
|
|
1931
|
+
const tags = /* @__PURE__ */ new Map();
|
|
1932
|
+
for (const item of items) {
|
|
1933
|
+
for (const rawCategory of asArray2(item.category)) {
|
|
1934
|
+
const { domain, label } = normalizeCategoryLabel(rawCategory);
|
|
1935
|
+
if (!label) continue;
|
|
1936
|
+
const slug = sanitizeSlug(label);
|
|
1937
|
+
if (!slug) continue;
|
|
1938
|
+
if (domain === "tag" || domain === "post_tag") {
|
|
1939
|
+
if (tags.has(slug)) continue;
|
|
1940
|
+
tags.set(slug, {
|
|
1941
|
+
type: "tag",
|
|
1942
|
+
source: sourceMeta3(`tag:${slug}`),
|
|
1943
|
+
sourceId: `tag:${slug}`,
|
|
1944
|
+
name: label,
|
|
1945
|
+
slug
|
|
1946
|
+
});
|
|
1947
|
+
continue;
|
|
1948
|
+
}
|
|
1949
|
+
if (categories.has(slug)) continue;
|
|
1950
|
+
categories.set(slug, {
|
|
1951
|
+
type: "category",
|
|
1952
|
+
source: sourceMeta3(`cat:${slug}`),
|
|
1953
|
+
sourceId: `cat:${slug}`,
|
|
1954
|
+
name: label,
|
|
1955
|
+
slug
|
|
1956
|
+
});
|
|
1957
|
+
}
|
|
1958
|
+
}
|
|
1959
|
+
return { categories, tags };
|
|
1960
|
+
}
|
|
1961
|
+
function collectCategorySlugs(item) {
|
|
1962
|
+
const slugs = [];
|
|
1963
|
+
for (const rawCategory of asArray2(item.category)) {
|
|
1964
|
+
const { domain, label } = normalizeCategoryLabel(rawCategory);
|
|
1965
|
+
if (!label || domain === "tag" || domain === "post_tag") continue;
|
|
1966
|
+
const slug = sanitizeSlug(label);
|
|
1967
|
+
if (slug) slugs.push(slug);
|
|
1968
|
+
}
|
|
1969
|
+
return slugs;
|
|
1970
|
+
}
|
|
1971
|
+
function collectTagSlugs(item) {
|
|
1972
|
+
const slugs = [];
|
|
1973
|
+
for (const rawCategory of asArray2(item.category)) {
|
|
1974
|
+
const { domain, label } = normalizeCategoryLabel(rawCategory);
|
|
1975
|
+
if (!label || domain !== "tag" && domain !== "post_tag") continue;
|
|
1976
|
+
const slug = sanitizeSlug(label);
|
|
1977
|
+
if (slug) slugs.push(slug);
|
|
1978
|
+
}
|
|
1979
|
+
return slugs;
|
|
1980
|
+
}
|
|
1981
|
+
function* collectInlineAssets2(html, seenUrls, exportedAt) {
|
|
1982
|
+
for (const src of discoverContentAssetUrls(html)) {
|
|
1983
|
+
if (seenUrls.has(src)) continue;
|
|
1984
|
+
seenUrls.add(src);
|
|
1985
|
+
let filename;
|
|
1986
|
+
try {
|
|
1987
|
+
filename = basename2(new URL(src, "http://local.invalid").pathname) || "inline-asset";
|
|
1988
|
+
} catch {
|
|
1989
|
+
filename = "inline-asset";
|
|
1990
|
+
}
|
|
1991
|
+
yield {
|
|
1992
|
+
type: "asset",
|
|
1993
|
+
source: sourceMeta3(`url:${src}`, src, exportedAt),
|
|
1994
|
+
sourceId: `url:${src}`,
|
|
1995
|
+
sourceUrl: src,
|
|
1996
|
+
filename,
|
|
1997
|
+
mimeType: guessMime3(filename)
|
|
1998
|
+
};
|
|
1999
|
+
}
|
|
2000
|
+
}
|
|
2001
|
+
function mapPublishStatus2(status) {
|
|
2002
|
+
switch ((status ?? "published").toLowerCase()) {
|
|
2003
|
+
case "published":
|
|
2004
|
+
return "published";
|
|
2005
|
+
case "draft":
|
|
2006
|
+
return "draft";
|
|
2007
|
+
default:
|
|
2008
|
+
return "archived";
|
|
2009
|
+
}
|
|
2010
|
+
}
|
|
2011
|
+
function* emitExportPost(post, exportedAt, seenAssetUrls) {
|
|
2012
|
+
yield* collectInlineAssets2(post.contentHtml, seenAssetUrls, exportedAt);
|
|
2013
|
+
let featuredAssetSourceId;
|
|
2014
|
+
if (post.featuredImageUrl) {
|
|
2015
|
+
featuredAssetSourceId = `featured:${post.id}`;
|
|
2016
|
+
if (!seenAssetUrls.has(post.featuredImageUrl)) {
|
|
2017
|
+
seenAssetUrls.add(post.featuredImageUrl);
|
|
2018
|
+
const filename = basename2(new URL(post.featuredImageUrl).pathname) || `${post.id}-featured.jpg`;
|
|
2019
|
+
yield {
|
|
2020
|
+
type: "asset",
|
|
2021
|
+
source: sourceMeta3(featuredAssetSourceId, post.featuredImageUrl, exportedAt),
|
|
2022
|
+
sourceId: featuredAssetSourceId,
|
|
2023
|
+
sourceUrl: post.featuredImageUrl,
|
|
2024
|
+
filename,
|
|
2025
|
+
mimeType: guessMime3(filename)
|
|
2026
|
+
};
|
|
2027
|
+
}
|
|
2028
|
+
}
|
|
2029
|
+
yield {
|
|
2030
|
+
type: "post",
|
|
2031
|
+
source: sourceMeta3(post.id, post.url, exportedAt),
|
|
2032
|
+
sourceId: post.id,
|
|
2033
|
+
title: post.title,
|
|
2034
|
+
slug: sanitizeSlug(post.slug),
|
|
2035
|
+
excerpt: post.excerpt,
|
|
2036
|
+
contentHtml: post.contentHtml,
|
|
2037
|
+
publishedAt: post.publishedAt,
|
|
2038
|
+
status: mapPublishStatus2(post.status),
|
|
2039
|
+
categorySlugs: post.categorySlugs,
|
|
2040
|
+
tagSlugs: post.tagSlugs,
|
|
2041
|
+
featuredAssetSourceId,
|
|
2042
|
+
seoTitle: post.seoTitle,
|
|
2043
|
+
seoDescription: post.seoDescription
|
|
2044
|
+
};
|
|
2045
|
+
}
|
|
2046
|
+
function* emitExportPage(page, exportedAt, seenAssetUrls) {
|
|
2047
|
+
yield* collectInlineAssets2(page.contentHtml, seenAssetUrls, exportedAt);
|
|
2048
|
+
yield {
|
|
2049
|
+
type: "page",
|
|
2050
|
+
source: sourceMeta3(page.id, page.url, exportedAt),
|
|
2051
|
+
sourceId: page.id,
|
|
2052
|
+
title: page.title,
|
|
2053
|
+
slug: sanitizeSlug(page.slug),
|
|
2054
|
+
contentHtml: page.contentHtml,
|
|
2055
|
+
isHomePage: page.isHomePage,
|
|
2056
|
+
status: mapPublishStatus2(page.status),
|
|
2057
|
+
seoTitle: page.seoTitle,
|
|
2058
|
+
seoDescription: page.seoDescription
|
|
2059
|
+
};
|
|
2060
|
+
}
|
|
2061
|
+
async function* enumerateWixExportEntities(doc, snapshotGaps) {
|
|
2062
|
+
const exportedAt = doc.exportedAt;
|
|
2063
|
+
const seenAssetUrls = /* @__PURE__ */ new Set();
|
|
2064
|
+
for (const category of doc.categories ?? []) {
|
|
2065
|
+
yield {
|
|
2066
|
+
type: "category",
|
|
2067
|
+
source: sourceMeta3(category.id, void 0, exportedAt),
|
|
2068
|
+
sourceId: category.id,
|
|
2069
|
+
name: category.name,
|
|
2070
|
+
slug: sanitizeSlug(category.slug)
|
|
2071
|
+
};
|
|
2072
|
+
}
|
|
2073
|
+
for (const tag of doc.tags ?? []) {
|
|
2074
|
+
yield {
|
|
2075
|
+
type: "tag",
|
|
2076
|
+
source: sourceMeta3(tag.id, void 0, exportedAt),
|
|
2077
|
+
sourceId: tag.id,
|
|
2078
|
+
name: tag.name,
|
|
2079
|
+
slug: sanitizeSlug(tag.slug)
|
|
2080
|
+
};
|
|
2081
|
+
}
|
|
2082
|
+
for (const page of doc.pages ?? []) {
|
|
2083
|
+
yield* emitExportPage(page, exportedAt, seenAssetUrls);
|
|
2084
|
+
}
|
|
2085
|
+
for (const post of doc.posts ?? []) {
|
|
2086
|
+
yield* emitExportPost(post, exportedAt, seenAssetUrls);
|
|
2087
|
+
}
|
|
2088
|
+
void snapshotGaps;
|
|
2089
|
+
}
|
|
2090
|
+
function summarizeWixExport(doc) {
|
|
2091
|
+
const seenAssetUrls = /* @__PURE__ */ new Set();
|
|
2092
|
+
let assets = 0;
|
|
2093
|
+
const countHtml = (html, featured) => {
|
|
2094
|
+
if (featured && !seenAssetUrls.has(featured)) {
|
|
2095
|
+
seenAssetUrls.add(featured);
|
|
2096
|
+
assets += 1;
|
|
2097
|
+
}
|
|
2098
|
+
for (const src of discoverContentAssetUrls(html)) {
|
|
2099
|
+
if (seenAssetUrls.has(src)) continue;
|
|
2100
|
+
seenAssetUrls.add(src);
|
|
2101
|
+
assets += 1;
|
|
2102
|
+
}
|
|
2103
|
+
};
|
|
2104
|
+
for (const post of doc.posts ?? []) {
|
|
2105
|
+
countHtml(post.contentHtml, post.featuredImageUrl);
|
|
2106
|
+
}
|
|
2107
|
+
for (const page of doc.pages ?? []) {
|
|
2108
|
+
countHtml(page.contentHtml);
|
|
2109
|
+
}
|
|
2110
|
+
return {
|
|
2111
|
+
posts: doc.posts?.length ?? 0,
|
|
2112
|
+
pages: doc.pages?.length ?? 0,
|
|
2113
|
+
categories: doc.categories?.length ?? 0,
|
|
2114
|
+
tags: doc.tags?.length ?? 0,
|
|
2115
|
+
assets
|
|
2116
|
+
};
|
|
2117
|
+
}
|
|
2118
|
+
function parseFeedItems(xml) {
|
|
2119
|
+
const format = detectWixFeedFormat(xml);
|
|
2120
|
+
const doc = parseXmlDocument(xml);
|
|
2121
|
+
if (format === "atom") {
|
|
2122
|
+
return { format, items: asArray2(doc.feed?.entry) };
|
|
2123
|
+
}
|
|
2124
|
+
return { format, items: asArray2(doc.rss?.channel?.item) };
|
|
2125
|
+
}
|
|
2126
|
+
async function loadWixFeed(filePath) {
|
|
2127
|
+
const xml = await readFile4(filePath, "utf8");
|
|
2128
|
+
const parsed = parseFeedItems(xml);
|
|
2129
|
+
if (parsed.items.length === 0) {
|
|
2130
|
+
throw new Error("Invalid Wix feed: no entries found in RSS or Atom document");
|
|
2131
|
+
}
|
|
2132
|
+
return parsed;
|
|
2133
|
+
}
|
|
2134
|
+
async function loadWixExport(options) {
|
|
2135
|
+
if (options.data) return assertWixExport(options.data);
|
|
2136
|
+
if (options.client) {
|
|
2137
|
+
const doc = await options.client.collectExport();
|
|
2138
|
+
return assertWixExport(doc);
|
|
2139
|
+
}
|
|
2140
|
+
if (options.clientOptions) {
|
|
2141
|
+
const client = new WixCollectionClient(options.clientOptions);
|
|
2142
|
+
const doc = await client.collectExport();
|
|
2143
|
+
return assertWixExport(doc);
|
|
2144
|
+
}
|
|
2145
|
+
if (options.filePath) {
|
|
2146
|
+
const ext = extname(options.filePath).toLowerCase();
|
|
2147
|
+
if (ext === ".json") {
|
|
2148
|
+
const raw = JSON.parse(await readFile4(options.filePath, "utf8"));
|
|
2149
|
+
return assertWixExport(raw);
|
|
2150
|
+
}
|
|
2151
|
+
}
|
|
2152
|
+
throw new Error("Wix parser requires filePath (.json), data, client, or clientOptions");
|
|
2153
|
+
}
|
|
2154
|
+
async function resolveSnapshotTargets(options) {
|
|
2155
|
+
if (options.snapshotTargets?.length) return options.snapshotTargets;
|
|
2156
|
+
const listPath = options.urlsFile ?? (options.filePath && extname(options.filePath).toLowerCase() === ".txt" ? options.filePath : void 0);
|
|
2157
|
+
if (listPath) {
|
|
2158
|
+
const urls = await loadUrlListFile(listPath);
|
|
2159
|
+
return urls.map((url, index) => ({
|
|
2160
|
+
url,
|
|
2161
|
+
isHomePage: index === 0 && new URL(url).pathname === "/"
|
|
2162
|
+
}));
|
|
2163
|
+
}
|
|
2164
|
+
return [];
|
|
2165
|
+
}
|
|
2166
|
+
async function attachSnapshotPages(doc, options) {
|
|
2167
|
+
const targets = await resolveSnapshotTargets(options);
|
|
2168
|
+
if (targets.length === 0) return { doc, gaps: [] };
|
|
2169
|
+
const collector = new WixPageSnapshotCollector(options.snapshotOptions);
|
|
2170
|
+
const { pages, gaps } = await collector.collectPages(targets);
|
|
2171
|
+
return {
|
|
2172
|
+
doc: {
|
|
2173
|
+
...doc,
|
|
2174
|
+
pages: [...doc.pages ?? [], ...pages]
|
|
2175
|
+
},
|
|
2176
|
+
gaps
|
|
2177
|
+
};
|
|
2178
|
+
}
|
|
2179
|
+
async function* enumerateWixEntities(options) {
|
|
2180
|
+
if (options.filePath && [".xml", ".rss", ".atom"].includes(extname(options.filePath).toLowerCase())) {
|
|
2181
|
+
yield* enumerateWixFeedEntities(options);
|
|
2182
|
+
const snapshotTargets2 = await resolveSnapshotTargets(options);
|
|
2183
|
+
if (snapshotTargets2.length > 0) {
|
|
2184
|
+
const { doc: doc2, gaps: gaps2 } = await attachSnapshotPages(
|
|
2185
|
+
{ exportVersion: 1, pages: [], posts: [] },
|
|
2186
|
+
{ ...options, snapshotTargets: snapshotTargets2 }
|
|
2187
|
+
);
|
|
2188
|
+
yield* enumerateWixExportEntities(doc2, gaps2);
|
|
2189
|
+
}
|
|
2190
|
+
return;
|
|
2191
|
+
}
|
|
2192
|
+
if (options.filePath && extname(options.filePath).toLowerCase() === ".txt" && !options.data && !options.client) {
|
|
2193
|
+
const { doc: doc2, gaps: gaps2 } = await attachSnapshotPages(
|
|
2194
|
+
{ exportVersion: 1, pages: [], posts: [] },
|
|
2195
|
+
options
|
|
2196
|
+
);
|
|
2197
|
+
yield* enumerateWixExportEntities(doc2, gaps2);
|
|
2198
|
+
return;
|
|
2199
|
+
}
|
|
2200
|
+
const snapshotTargets = await resolveSnapshotTargets(options);
|
|
2201
|
+
if (snapshotTargets.length > 0 && !options.filePath && !options.data && !options.client && !options.clientOptions) {
|
|
2202
|
+
const { doc: doc2, gaps: gaps2 } = await attachSnapshotPages(
|
|
2203
|
+
{ exportVersion: 1, pages: [], posts: [] },
|
|
2204
|
+
{ ...options, snapshotTargets }
|
|
2205
|
+
);
|
|
2206
|
+
yield* enumerateWixExportEntities(doc2, gaps2);
|
|
2207
|
+
return;
|
|
2208
|
+
}
|
|
2209
|
+
const doc = await loadWixExport(options);
|
|
2210
|
+
const { doc: withSnapshots, gaps } = await attachSnapshotPages(doc, options);
|
|
2211
|
+
yield* enumerateWixExportEntities(withSnapshots, gaps);
|
|
2212
|
+
}
|
|
2213
|
+
async function* enumerateWixFeedEntities(options) {
|
|
2214
|
+
if (!options.filePath) {
|
|
2215
|
+
throw new Error("Wix feed parser requires filePath");
|
|
2216
|
+
}
|
|
2217
|
+
const { format, items } = await loadWixFeed(options.filePath);
|
|
2218
|
+
const { categories, tags } = collectTaxonomiesFromItems(items);
|
|
2219
|
+
const seenAssetUrls = /* @__PURE__ */ new Set();
|
|
2220
|
+
for (const category of categories.values()) {
|
|
2221
|
+
yield category;
|
|
2222
|
+
}
|
|
2223
|
+
for (const tag of tags.values()) {
|
|
2224
|
+
yield tag;
|
|
2225
|
+
}
|
|
2226
|
+
for (const item of items) {
|
|
2227
|
+
const title = textValue2(item.title) || "Untitled";
|
|
2228
|
+
const link = itemLink(item);
|
|
2229
|
+
const sourceId = itemSourceId(item, link, sanitizeSlug(title));
|
|
2230
|
+
const slug = slugFromLink(link, title, sourceId);
|
|
2231
|
+
const contentHtml = getItemContentHtml(item);
|
|
2232
|
+
for (const asset of collectInlineAssets2(contentHtml, seenAssetUrls, options.exportedAt)) {
|
|
2233
|
+
yield asset;
|
|
2234
|
+
}
|
|
2235
|
+
const post = {
|
|
2236
|
+
type: "post",
|
|
2237
|
+
source: sourceMeta3(sourceId, link || void 0, options.exportedAt),
|
|
2238
|
+
sourceId,
|
|
2239
|
+
title,
|
|
2240
|
+
slug,
|
|
2241
|
+
excerpt: textValue2(item.description) || void 0,
|
|
2242
|
+
contentHtml,
|
|
2243
|
+
publishedAt: itemPublishedAt(item),
|
|
2244
|
+
status: "published",
|
|
2245
|
+
categorySlugs: collectCategorySlugs(item),
|
|
2246
|
+
tagSlugs: collectTagSlugs(item)
|
|
2247
|
+
};
|
|
2248
|
+
yield post;
|
|
2249
|
+
}
|
|
2250
|
+
void format;
|
|
2251
|
+
}
|
|
2252
|
+
async function summarizeWixFeed(filePath) {
|
|
2253
|
+
const { format, items } = await loadWixFeed(filePath);
|
|
2254
|
+
let posts = 0;
|
|
2255
|
+
let assets = 0;
|
|
2256
|
+
const { categories, tags } = collectTaxonomiesFromItems(items);
|
|
2257
|
+
const seenAssetUrls = /* @__PURE__ */ new Set();
|
|
2258
|
+
for (const item of items) {
|
|
2259
|
+
posts += 1;
|
|
2260
|
+
const contentHtml = getItemContentHtml(item);
|
|
2261
|
+
for (const src of discoverContentAssetUrls(contentHtml)) {
|
|
2262
|
+
if (seenAssetUrls.has(src)) continue;
|
|
2263
|
+
seenAssetUrls.add(src);
|
|
2264
|
+
assets += 1;
|
|
2265
|
+
}
|
|
2266
|
+
}
|
|
2267
|
+
return {
|
|
2268
|
+
format,
|
|
2269
|
+
posts,
|
|
2270
|
+
categories: categories.size,
|
|
2271
|
+
tags: tags.size,
|
|
2272
|
+
assets
|
|
2273
|
+
};
|
|
2274
|
+
}
|
|
2275
|
+
async function validateWixExportFile(filePath) {
|
|
2276
|
+
try {
|
|
2277
|
+
const summary = await summarizeWixFeed(filePath);
|
|
2278
|
+
return {
|
|
2279
|
+
ok: true,
|
|
2280
|
+
issues: [],
|
|
2281
|
+
summary
|
|
2282
|
+
};
|
|
2283
|
+
} catch (error) {
|
|
2284
|
+
return {
|
|
2285
|
+
ok: false,
|
|
2286
|
+
issues: [
|
|
2287
|
+
{
|
|
2288
|
+
code: "invalid_wix_feed",
|
|
2289
|
+
message: error instanceof Error ? error.message : String(error)
|
|
2290
|
+
}
|
|
2291
|
+
],
|
|
2292
|
+
summary: { posts: 0, categories: 0, tags: 0, assets: 0 }
|
|
2293
|
+
};
|
|
2294
|
+
}
|
|
2295
|
+
}
|
|
2296
|
+
|
|
2297
|
+
// src/parsers/wix/index.ts
|
|
2298
|
+
function resolveInput3(input) {
|
|
2299
|
+
if (typeof input === "string") return { path: input };
|
|
2300
|
+
if (input && typeof input === "object") {
|
|
2301
|
+
const record = input;
|
|
2302
|
+
if (record.client || record.clientOptions || record.data || record.snapshotTargets?.length) {
|
|
2303
|
+
return record;
|
|
2304
|
+
}
|
|
2305
|
+
if (record.path || record.urlsFile || record.urlsPath) {
|
|
2306
|
+
return {
|
|
2307
|
+
path: record.path,
|
|
2308
|
+
urlsFile: record.urlsFile ?? record.urlsPath
|
|
2309
|
+
};
|
|
2310
|
+
}
|
|
2311
|
+
}
|
|
2312
|
+
throw new Error(
|
|
2313
|
+
"Wix adapter requires input path (string or { path }), { data }, { client }, { clientOptions }, or snapshot targets"
|
|
2314
|
+
);
|
|
2315
|
+
}
|
|
2316
|
+
function toParseOptions(input) {
|
|
2317
|
+
return {
|
|
2318
|
+
filePath: input.path,
|
|
2319
|
+
urlsFile: input.urlsFile,
|
|
2320
|
+
data: input.data,
|
|
2321
|
+
client: input.client,
|
|
2322
|
+
clientOptions: input.clientOptions,
|
|
2323
|
+
snapshotTargets: input.snapshotTargets
|
|
2324
|
+
};
|
|
2325
|
+
}
|
|
2326
|
+
var wixAdapter = {
|
|
2327
|
+
platform: "wix",
|
|
2328
|
+
async validateInput(input) {
|
|
2329
|
+
try {
|
|
2330
|
+
const resolved = resolveInput3(input);
|
|
2331
|
+
const options = toParseOptions(resolved);
|
|
2332
|
+
if (resolved.data) {
|
|
2333
|
+
const summary = summarizeWixExport(resolved.data);
|
|
2334
|
+
return {
|
|
2335
|
+
ok: true,
|
|
2336
|
+
issues: [],
|
|
2337
|
+
summary: {
|
|
2338
|
+
posts: summary.posts,
|
|
2339
|
+
pages: summary.pages,
|
|
2340
|
+
assets: summary.assets,
|
|
2341
|
+
categories: summary.categories,
|
|
2342
|
+
tags: summary.tags
|
|
2343
|
+
}
|
|
2344
|
+
};
|
|
2345
|
+
}
|
|
2346
|
+
if (resolved.client || resolved.clientOptions) {
|
|
2347
|
+
const doc = await loadWixExport(options);
|
|
2348
|
+
const summary = summarizeWixExport(doc);
|
|
2349
|
+
return {
|
|
2350
|
+
ok: true,
|
|
2351
|
+
issues: [],
|
|
2352
|
+
summary: {
|
|
2353
|
+
posts: summary.posts,
|
|
2354
|
+
pages: summary.pages,
|
|
2355
|
+
assets: summary.assets,
|
|
2356
|
+
categories: summary.categories,
|
|
2357
|
+
tags: summary.tags
|
|
2358
|
+
}
|
|
2359
|
+
};
|
|
2360
|
+
}
|
|
2361
|
+
if (resolved.path?.endsWith(".json")) {
|
|
2362
|
+
const doc = await loadWixExport(options);
|
|
2363
|
+
const summary = summarizeWixExport(doc);
|
|
2364
|
+
return {
|
|
2365
|
+
ok: true,
|
|
2366
|
+
issues: [],
|
|
2367
|
+
summary: {
|
|
2368
|
+
posts: summary.posts,
|
|
2369
|
+
pages: summary.pages,
|
|
2370
|
+
assets: summary.assets,
|
|
2371
|
+
categories: summary.categories,
|
|
2372
|
+
tags: summary.tags
|
|
2373
|
+
}
|
|
2374
|
+
};
|
|
2375
|
+
}
|
|
2376
|
+
if (resolved.path && !resolved.path.endsWith(".txt")) {
|
|
2377
|
+
const result = await validateWixExportFile(resolved.path);
|
|
2378
|
+
return {
|
|
2379
|
+
ok: result.ok,
|
|
2380
|
+
issues: result.issues,
|
|
2381
|
+
summary: {
|
|
2382
|
+
posts: result.summary.posts,
|
|
2383
|
+
pages: 0,
|
|
2384
|
+
assets: result.summary.assets,
|
|
2385
|
+
categories: result.summary.categories,
|
|
2386
|
+
tags: result.summary.tags
|
|
2387
|
+
}
|
|
2388
|
+
};
|
|
2389
|
+
}
|
|
2390
|
+
if (resolved.path?.endsWith(".txt") || resolved.urlsFile) {
|
|
2391
|
+
return {
|
|
2392
|
+
ok: true,
|
|
2393
|
+
issues: [],
|
|
2394
|
+
summary: { pages: 0, posts: 0, assets: 0, categories: 0, tags: 0 }
|
|
2395
|
+
};
|
|
2396
|
+
}
|
|
2397
|
+
throw new Error("Wix validation requires export.xml, export.json, url list, or API client options");
|
|
2398
|
+
} catch (error) {
|
|
2399
|
+
return {
|
|
2400
|
+
ok: false,
|
|
2401
|
+
issues: [
|
|
2402
|
+
{
|
|
2403
|
+
code: "invalid_input",
|
|
2404
|
+
message: error instanceof Error ? error.message : String(error)
|
|
2405
|
+
}
|
|
2406
|
+
]
|
|
2407
|
+
};
|
|
2408
|
+
}
|
|
2409
|
+
},
|
|
2410
|
+
enumerateEntities(ctx) {
|
|
2411
|
+
return enumerateWixEntities(toParseOptions(resolveInput3(ctx.input)));
|
|
2412
|
+
}
|
|
2413
|
+
};
|
|
2414
|
+
|
|
2415
|
+
// src/parsers/index.ts
|
|
2416
|
+
var adapters = {
|
|
2417
|
+
wordpress: wordpressAdapter,
|
|
2418
|
+
smugmug: smugmugAdapter,
|
|
2419
|
+
squarespace: squarespaceAdapter,
|
|
2420
|
+
wix: wixAdapter
|
|
2421
|
+
};
|
|
2422
|
+
function getAdapter(platform) {
|
|
2423
|
+
return adapters[platform];
|
|
2424
|
+
}
|
|
2425
|
+
|
|
2426
|
+
export {
|
|
2427
|
+
rewriteOriginUrlsInText,
|
|
2428
|
+
createWpContentGatewayRewrite,
|
|
2429
|
+
wordpressAdapter,
|
|
2430
|
+
SMUGMUG_API_BASE,
|
|
2431
|
+
SMUGMUG_OAUTH_ENDPOINTS,
|
|
2432
|
+
smugMugCredentialsSchema,
|
|
2433
|
+
signSmugMugOAuthRequest,
|
|
2434
|
+
buildSmugMugAuthorizationHeader,
|
|
2435
|
+
readSmugMugCredentialsFromEnv,
|
|
2436
|
+
SmugMugApiClient,
|
|
2437
|
+
smugmugAdapter,
|
|
2438
|
+
squarespaceAdapter,
|
|
2439
|
+
WixCollectionClient,
|
|
2440
|
+
WixPageSnapshotCollector,
|
|
2441
|
+
wixAdapter,
|
|
2442
|
+
getAdapter
|
|
2443
|
+
};
|
|
2444
|
+
//# sourceMappingURL=chunk-QEXTXHFG.js.map
|