@a83/orbiter-admin 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,299 @@
1
+ /**
2
+ * wp-importer.js
3
+ * WordPress WXR import logic for Orbiter.
4
+ *
5
+ * Exports:
6
+ * parseWXR(xmlText) → { site, postTypes, items, attachments }
7
+ * buildImportPlan(parsed) → summary for preview UI
8
+ * executeImport(db, plan, options) → { imported, skipped, errors, mediaResults }
9
+ */
10
+
11
+ import { XMLParser } from 'fast-xml-parser';
12
+ import { createRequire } from 'node:module';
13
+ import { randomUUID } from 'node:crypto';
14
+
15
+ const _require = createRequire(import.meta.url);
16
+ const TurndownService = _require('turndown');
17
+
18
+ // ── Turndown (HTML → Markdown) ────────────────────────────────────────────
19
+ const td = new TurndownService({
20
+ headingStyle: 'atx',
21
+ bulletListMarker: '-',
22
+ codeBlockStyle: 'fenced',
23
+ hr: '---',
24
+ });
25
+
26
+ // Preserve WordPress shortcodes as inline code rather than dropping them
27
+ td.addRule('shortcode', {
28
+ filter: (node) => node.nodeName === '#text' && /\[.+?\]/.test(node.nodeValue),
29
+ replacement: (content) => content,
30
+ });
31
+
32
+ // ── XML parser config ─────────────────────────────────────────────────────
33
+ const xmlParser = new XMLParser({
34
+ ignoreAttributes: false,
35
+ attributeNamePrefix: '@_',
36
+ cdataPropName: '__cdata',
37
+ isArray: (name) => ['item', 'wp:postmeta', 'category', 'wp:author', 'wp:category', 'wp:tag'].includes(name),
38
+ parseAttributeValue: true,
39
+ trimValues: true,
40
+ });
41
+
42
+ // Helper: unwrap CDATA or plain text value
43
+ function v(val) {
44
+ if (val === undefined || val === null) return '';
45
+ if (typeof val === 'object' && val.__cdata !== undefined) return String(val.__cdata ?? '');
46
+ return String(val);
47
+ }
48
+
49
+ // Helper: safe slug from title
50
+ function slugify(str) {
51
+ return str.toLowerCase()
52
+ .replace(/[äÄ]/g, 'ae').replace(/[öÖ]/g, 'oe').replace(/[üÜ]/g, 'ue').replace(/ß/g, 'ss')
53
+ .replace(/[^a-z0-9]+/g, '-').replace(/^-+|-+$/g, '') || 'untitled';
54
+ }
55
+
56
+ // WordPress post status → Orbiter status
57
+ function mapStatus(wpStatus) {
58
+ return wpStatus === 'publish' ? 'published' : 'draft';
59
+ }
60
+
61
+ // Default schema per post type
62
+ function schemaFor(postType) {
63
+ if (postType === 'page') {
64
+ return {
65
+ title: { type: 'string', required: true, label: 'Title' },
66
+ body: { type: 'richtext', required: false, label: 'Content' },
67
+ date: { type: 'date', required: false, label: 'Date' },
68
+ };
69
+ }
70
+ // posts + all custom post types
71
+ return {
72
+ title: { type: 'string', required: true, label: 'Title' },
73
+ excerpt: { type: 'string', required: false, label: 'Excerpt' },
74
+ body: { type: 'richtext', required: false, label: 'Content' },
75
+ tags: { type: 'array', required: false, label: 'Tags & Categories' },
76
+ date: { type: 'date', required: false, label: 'Date' },
77
+ image: { type: 'media', required: false, label: 'Featured Image' },
78
+ author: { type: 'string', required: false, label: 'Author' },
79
+ };
80
+ }
81
+
82
+ // Collection ID from post type (safe slug)
83
+ function collectionId(postType) {
84
+ if (postType === 'post') return 'posts';
85
+ if (postType === 'page') return 'pages';
86
+ return slugify(postType).replace(/-/g, '_');
87
+ }
88
+
89
+ // ── parseWXR ─────────────────────────────────────────────────────────────
90
+ export function parseWXR(xmlText) {
91
+ const root = xmlParser.parse(xmlText);
92
+ const channel = root?.rss?.channel ?? {};
93
+ const items = Array.isArray(channel.item) ? channel.item : (channel.item ? [channel.item] : []);
94
+
95
+ const site = {
96
+ title: v(channel.title),
97
+ url: v(channel.link),
98
+ };
99
+
100
+ // Build attachment map: wp:post_id → wp:attachment_url
101
+ const attachmentMap = {};
102
+ for (const item of items) {
103
+ const type = v(item['wp:post_type']);
104
+ if (type === 'attachment') {
105
+ const wpId = v(item['wp:post_id']);
106
+ attachmentMap[wpId] = {
107
+ url: v(item['wp:attachment_url']),
108
+ filename: v(item['wp:post_name']) || v(item.title),
109
+ mimeType: v(item['wp:attachment_metadata']?.mime_type) || '',
110
+ };
111
+ }
112
+ }
113
+
114
+ // Parse non-attachment items
115
+ const contentItems = [];
116
+ for (const item of items) {
117
+ const type = v(item['wp:post_type']);
118
+ if (type === 'attachment' || type === 'nav_menu_item' || type === 'revision') continue;
119
+ if (!type) continue;
120
+
121
+ const meta = Array.isArray(item['wp:postmeta']) ? item['wp:postmeta'] : [];
122
+ const thumbId = v(meta.find(m => v(m['wp:meta_key']) === '_thumbnail_id')?.['wp:meta_value'] ?? '');
123
+
124
+ const categories = (Array.isArray(item.category) ? item.category : (item.category ? [item.category] : []))
125
+ .filter(c => c['@_domain'] === 'category')
126
+ .map(c => v(c));
127
+ const tags = (Array.isArray(item.category) ? item.category : (item.category ? [item.category] : []))
128
+ .filter(c => c['@_domain'] === 'post_tag')
129
+ .map(c => v(c));
130
+
131
+ const rawHtml = v(item['content:encoded']);
132
+ const rawExcerpt = v(item['excerpt:encoded']);
133
+ const wpDate = v(item['wp:post_date']).split(' ')[0] || '';
134
+
135
+ contentItems.push({
136
+ postType: type,
137
+ wpId: v(item['wp:post_id']),
138
+ title: v(item.title),
139
+ slug: slugify(v(item['wp:post_name']) || v(item.title)),
140
+ status: mapStatus(v(item['wp:status'])),
141
+ rawHtml,
142
+ rawExcerpt,
143
+ tags: [...new Set([...categories, ...tags])].filter(Boolean),
144
+ date: wpDate,
145
+ author: v(item['dc:creator']),
146
+ thumbId, // WP attachment ID → resolve via attachmentMap
147
+ pubDate: v(item.pubDate),
148
+ });
149
+ }
150
+
151
+ // Group by post type
152
+ const postTypes = {};
153
+ for (const item of contentItems) {
154
+ if (!postTypes[item.postType]) postTypes[item.postType] = [];
155
+ postTypes[item.postType].push(item);
156
+ }
157
+
158
+ return { site, postTypes, attachmentMap };
159
+ }
160
+
161
+ // ── buildImportPlan ───────────────────────────────────────────────────────
162
+ export function buildImportPlan(parsed) {
163
+ const { site, postTypes, attachmentMap } = parsed;
164
+ const mediaCount = Object.keys(attachmentMap).length;
165
+
166
+ const types = Object.entries(postTypes).map(([type, items]) => ({
167
+ postType: type,
168
+ collectionId: collectionId(type),
169
+ count: items.length,
170
+ published: items.filter(i => i.status === 'published').length,
171
+ drafts: items.filter(i => i.status === 'draft').length,
172
+ schema: schemaFor(type),
173
+ }));
174
+
175
+ return { site, types, mediaCount, attachmentCount: mediaCount };
176
+ }
177
+
178
+ // ── executeImport ─────────────────────────────────────────────────────────
179
+ // options: { selectedTypes: string[], downloadMedia: boolean, onDuplicate: 'skip'|'overwrite', podPath: string }
180
+ export async function executeImport(db, parsed, options) {
181
+ const { selectedTypes, downloadMedia, onDuplicate } = options;
182
+ const { postTypes, attachmentMap } = parsed;
183
+
184
+ const results = {
185
+ collections: [],
186
+ imported: 0,
187
+ skipped: 0,
188
+ overwritten: 0,
189
+ mediaOk: 0,
190
+ mediaFailed: 0,
191
+ errors: [],
192
+ };
193
+
194
+ // ── 1. Ensure collections exist ─────────────────────────────────────────
195
+ for (const type of selectedTypes) {
196
+ const colId = collectionId(type);
197
+ const existing = db.getCollection(colId);
198
+ if (!existing) {
199
+ const label = type === 'post' ? 'Posts' : type === 'page' ? 'Pages'
200
+ : type.charAt(0).toUpperCase() + type.slice(1).replace(/_/g, ' ');
201
+ db.db.prepare('INSERT INTO _collections (id, label, schema) VALUES (?, ?, ?)')
202
+ .run(colId, label, JSON.stringify(schemaFor(type)));
203
+ results.collections.push(colId);
204
+ }
205
+ }
206
+
207
+ // ── 2. Download media (optional) ─────────────────────────────────────────
208
+ // orbiterMediaId map: wpAttachmentId → orbiter media UUID
209
+ const orbiterMediaId = {};
210
+ if (downloadMedia) {
211
+ for (const [wpId, att] of Object.entries(attachmentMap)) {
212
+ if (!att.url) continue;
213
+ try {
214
+ const res = await fetch(att.url, { signal: AbortSignal.timeout(15000) });
215
+ if (!res.ok) throw new Error(`HTTP ${res.status}`);
216
+ const buf = Buffer.from(await res.arrayBuffer());
217
+ const mime = res.headers.get('content-type')?.split(';')[0]?.trim()
218
+ || att.mimeType || 'application/octet-stream';
219
+ const filename = att.url.split('/').pop().split('?')[0] || `media-${wpId}`;
220
+ const now = new Date().toISOString().replace('T', ' ').replace(/\.\d{3}Z$/, '');
221
+ const id = randomUUID();
222
+ db.db.prepare(
223
+ 'INSERT OR IGNORE INTO _media (id, filename, mime_type, size, data, alt, created_at) VALUES (?, ?, ?, ?, ?, ?, ?)'
224
+ ).run(id, filename, mime, buf.length, buf, att.filename || filename, now);
225
+ orbiterMediaId[wpId] = id;
226
+ results.mediaOk++;
227
+ } catch (err) {
228
+ results.mediaFailed++;
229
+ results.errors.push(`Media ${att.url}: ${err.message}`);
230
+ }
231
+ }
232
+ }
233
+
234
+ // ── 3. Import entries ────────────────────────────────────────────────────
235
+ const now = new Date().toISOString().replace('T', ' ').replace(/\.\d{3}Z$/, '');
236
+
237
+ for (const type of selectedTypes) {
238
+ const colId = collectionId(type);
239
+ const items = postTypes[type] ?? [];
240
+
241
+ for (const item of items) {
242
+ try {
243
+ // Check for duplicate slug
244
+ const existing = db.db
245
+ .prepare('SELECT id FROM _entries WHERE collection_id = ? AND slug = ?')
246
+ .get(colId, item.slug);
247
+
248
+ if (existing && onDuplicate === 'skip') {
249
+ results.skipped++;
250
+ continue;
251
+ }
252
+
253
+ // Convert HTML → Markdown
254
+ const bodyMd = item.rawHtml ? td.turndown(item.rawHtml) : '';
255
+ const excerptMd = item.rawExcerpt ? td.turndown(item.rawExcerpt) : '';
256
+
257
+ // Resolve featured image
258
+ const imageId = item.thumbId ? (orbiterMediaId[item.thumbId] ?? null) : null;
259
+
260
+ const data = {
261
+ title: item.title || 'Untitled',
262
+ body: bodyMd,
263
+ excerpt: excerptMd,
264
+ tags: item.tags,
265
+ date: item.date,
266
+ author: item.author,
267
+ image: imageId,
268
+ };
269
+
270
+ // Remove page-irrelevant fields
271
+ if (type === 'page') {
272
+ delete data.excerpt;
273
+ delete data.tags;
274
+ delete data.author;
275
+ delete data.image;
276
+ }
277
+
278
+ const created = item.date || now;
279
+
280
+ if (existing && onDuplicate === 'overwrite') {
281
+ db.db.prepare(
282
+ 'UPDATE _entries SET data = ?, status = ?, updated_at = ? WHERE id = ?'
283
+ ).run(JSON.stringify(data), item.status, now, existing.id);
284
+ results.overwritten++;
285
+ } else {
286
+ const id = randomUUID();
287
+ db.db.prepare(
288
+ 'INSERT INTO _entries (id, collection_id, slug, data, status, created_at, updated_at) VALUES (?, ?, ?, ?, ?, ?, ?)'
289
+ ).run(id, colId, item.slug, JSON.stringify(data), item.status, created, now);
290
+ results.imported++;
291
+ }
292
+ } catch (err) {
293
+ results.errors.push(`Entry "${item.slug}": ${err.message}`);
294
+ }
295
+ }
296
+ }
297
+
298
+ return results;
299
+ }