@openparachute/vault 0.4.3 → 0.4.4-rc.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,1409 @@
1
+ /**
2
+ * Portable markdown knowledge-base format — lossless export/import for any
3
+ * markdown+frontmatter consumer (Obsidian, Logseq, Foam, Quartz, Dendron,
4
+ * static-site generators).
5
+ *
6
+ * The format is **not Obsidian-specific** — Obsidian happens to consume it
7
+ * cleanly because Obsidian's `.md + YAML frontmatter` shape is the
8
+ * de-facto knowledge-base interchange format. Anchoring the function name
9
+ * to the format (rather than to one consumer) keeps the door open as
10
+ * other consumers adopt the same shape.
11
+ *
12
+ * ## Why this exists separately from `obsidian.ts`
13
+ *
14
+ * `obsidian.ts` ships a lossy export (no IDs, no typed links, no
15
+ * schemas, no attachments, no idempotency). That's fine for one-shot
16
+ * "give me an Obsidian copy" but it can't round-trip back to byte-equivalent
17
+ * vault state. Several real use cases want round-trip:
18
+ * - Gitcoin Brain's vault-as-primary + git-as-projection architecture.
19
+ * - Disaster recovery (backups that restore exact state).
20
+ * - Audit trails not dependent on vault's internal storage.
21
+ * - Migrations between vault hosts.
22
+ *
23
+ * This module implements the lossless format. `obsidian.ts` stays as a
24
+ * deprecated back-compat shim so existing callers don't break.
25
+ *
26
+ * ## Format
27
+ *
28
+ * ```
29
+ * <export-dir>/
30
+ * .parachute/
31
+ * vault.yaml # vault meta + export format version
32
+ * schemas/<tag>.yaml # per-tag: description, fields, relationships, parent_names
33
+ * attachments/<att-id>/<filename> # binary files (PR 2; #308)
34
+ * <note.path>.md # one file per note
35
+ * ```
36
+ *
37
+ * ## Frontmatter (per-note, fixed top-level key order)
38
+ *
39
+ * ```yaml
40
+ * ---
41
+ * id: 01HGZ9...
42
+ * path: Inbox/2026-05-12-meeting
43
+ * tags:
44
+ * - meeting
45
+ * - donor-pipeline
46
+ * metadata: # alpha-sorted keys
47
+ * priority: high
48
+ * links: # typed links (non-wikilink)
49
+ * - target: 01HGZA...
50
+ * relationship: derived-from
51
+ * metadata: { source: git://... }
52
+ * attachments: # PR 2 (#308)
53
+ * - id: att_01HGZB...
54
+ * path: 2026-05-12/audio.m4a
55
+ * mime_type: audio/mp4
56
+ * created_at: 2026-05-12T10:00:00.000Z
57
+ * updated_at: 2026-05-12T11:23:45.123Z
58
+ * ---
59
+ * <note content with [[wikilinks]] preserved verbatim>
60
+ * ```
61
+ *
62
+ * ## Idempotency
63
+ *
64
+ * - Top-level frontmatter keys emitted in fixed order (above).
65
+ * - Nested object keys alpha-sorted.
66
+ * - Booleans `true`/`false`; numbers as-is; strings bare unless they
67
+ * contain YAML-meaningful characters, then single-quoted.
68
+ * - Trailing newline after closing `---`.
69
+ *
70
+ * Pin: vault → export → re-export → byte-identical bytes.
71
+ *
72
+ * See vault#308.
73
+ */
74
+
75
+ import { readdirSync, readFileSync, statSync, mkdirSync, writeFileSync, copyFileSync, existsSync } from "fs";
76
+ import { basename, join, relative, extname, dirname, resolve as resolvePath, sep as pathSep } from "path";
77
+ import type { Store, Note, Link, Attachment } from "./types.js";
78
+ import type { TagRecord } from "./tag-schemas.js";
79
+
80
+ // ---------------------------------------------------------------------------
81
+ // Format constants
82
+ // ---------------------------------------------------------------------------
83
+
84
+ /** Bumped if/when the export format makes a backward-incompatible change. */
85
+ export const EXPORT_FORMAT_VERSION = 1;
86
+
87
+ /** Sidecar directory name. Dot-prefixed so it matches Obsidian's `.obsidian/`
88
+ * convention — directory walkers that skip dot-dirs (including our own
89
+ * `walkMarkdownFiles` below) won't accidentally re-import schemas/vault-meta
90
+ * as notes; consumers like Logseq/Foam/Quartz don't see the sidecar. */
91
+ export const SIDECAR_DIR = ".parachute";
92
+
93
+ /** Order in which top-level frontmatter keys are emitted. Fixed — required
94
+ * for byte-identical re-exports of unchanged vault state. */
95
+ const FRONTMATTER_KEY_ORDER = [
96
+ "id",
97
+ "path",
98
+ "tags",
99
+ "metadata",
100
+ "links",
101
+ "attachments",
102
+ "created_at",
103
+ "updated_at",
104
+ ] as const;
105
+
106
+ // ---------------------------------------------------------------------------
107
+ // Types
108
+ // ---------------------------------------------------------------------------
109
+
110
+ /** Per-note shape written into one .md file (frontmatter + content). */
111
+ export interface PortableNote {
112
+ id: string;
113
+ path?: string;
114
+ content: string;
115
+ metadata?: Record<string, unknown>;
116
+ tags?: string[];
117
+ links?: PortableLink[];
118
+ attachments?: PortableAttachmentRef[];
119
+ created_at: string;
120
+ updated_at?: string;
121
+ }
122
+
123
+ /** A typed-link relationship serialized in frontmatter. Target is a note ID
124
+ * (stable across renames). Missing targets at import time skip with a
125
+ * warning rather than aborting the import. */
126
+ export interface PortableLink {
127
+ target: string;
128
+ relationship: string;
129
+ metadata?: Record<string, unknown>;
130
+ }
131
+
132
+ /** Attachment reference in frontmatter. Binary file lives at
133
+ * `.parachute/attachments/<id>/<filename>`. PR 2 wires the file copy;
134
+ * PR 1 emits the reference. */
135
+ export interface PortableAttachmentRef {
136
+ id: string;
137
+ path: string;
138
+ mime_type: string;
139
+ metadata?: Record<string, unknown>;
140
+ }
141
+
142
+ /** Vault-level metadata in `.parachute/vault.yaml`. */
143
+ export interface PortableVaultMeta {
144
+ name?: string;
145
+ description?: string;
146
+ export_format_version: number;
147
+ exported_at: string;
148
+ }
149
+
150
+ /** Stats returned from `exportVaultToDir`. */
151
+ export interface ExportStats {
152
+ notes: number;
153
+ schemas: number;
154
+ attachments: number;
155
+ /** Set when caller passed `since`; counts notes whose `updated_at >= since`. */
156
+ filtered_by_since: boolean;
157
+ /**
158
+ * Number of notes skipped because their resolved write target escaped
159
+ * the export root. Operators / programmatic callers (e.g. PR 2's
160
+ * importer) inspect this to decide whether to treat the export as
161
+ * complete. The corresponding entries are detailed in `skipped_notes`.
162
+ * vault#318.
163
+ */
164
+ skipped_traversal: number;
165
+ /**
166
+ * Per-skipped-note detail. Each entry pairs the offending note's path
167
+ * with the human-readable reason it was skipped. Empty when nothing
168
+ * was skipped.
169
+ */
170
+ skipped_notes: Array<{ path: string | undefined; reason: string }>;
171
+ /**
172
+ * Per-skipped-attachment detail. Skipped reasons: source file missing,
173
+ * source path escapes assetsDir, dest path escapes outDir. Importer
174
+ * uses this to distinguish expected-missing from data corruption.
175
+ */
176
+ skipped_attachments: Array<{ note_id: string; attachment_id: string; path: string; reason: string }>;
177
+ }
178
+
179
+ // ---------------------------------------------------------------------------
180
+ // YAML emitter (idempotent, hand-rolled — no new dep)
181
+ // ---------------------------------------------------------------------------
182
+
183
+ /**
184
+ * Quote a string when it contains YAML-meaningful characters. Mirrors the
185
+ * subset of YAML 1.2 plain-scalar rules that matter for our payloads:
186
+ * leading whitespace / `-` / `?` / `:` / `#` / `&` / `*` / `!` / `|` / `>`,
187
+ * leading/trailing whitespace, embedded `:` followed by whitespace,
188
+ * embedded newlines / control characters, or values that would parse as
189
+ * boolean/null/numeric.
190
+ *
191
+ * Newline detection is critical (vault#317 F1): vault `metadata` is
192
+ * `Record<string, unknown>` and notes legitimately carry multi-line
193
+ * strings (transcripts, descriptions, body-as-metadata). Without a
194
+ * newline check, single-quoting splits the value across physical lines
195
+ * and the parser silently truncates or corrupts. Multi-line values fall
196
+ * through to `quoteString` which switches to the double-quoted form
197
+ * with `\n` escapes so the whole value stays on one line.
198
+ */
199
+ function needsQuote(s: string): boolean {
200
+ if (s === "") return true;
201
+ if (s !== s.trim()) return true;
202
+ // Booleans / null / numbers — would round-trip as a different type.
203
+ if (s === "true" || s === "false" || s === "null") return true;
204
+ if (/^-?\d+(\.\d+)?$/.test(s)) return true;
205
+ // YAML-meaningful starters.
206
+ if (/^[-?:&*!|>%@`#]/.test(s)) return true;
207
+ // Embedded `: ` (key/value separator) or `#` (comment) makes a plain
208
+ // scalar ambiguous.
209
+ if (s.includes(": ") || s.includes(" #")) return true;
210
+ // Embedded newlines / control characters require the double-quoted
211
+ // escape form. vault#317 F1.
212
+ // eslint-disable-next-line no-control-regex
213
+ if (/[\n\r\t\v\f\x00-\x08\x0e-\x1f]/.test(s)) return true;
214
+ return false;
215
+ }
216
+
217
+ /**
218
+ * Quote a string for YAML emission. Strings containing newlines or other
219
+ * control characters use the **double-quoted** form with escape sequences
220
+ * (so the value stays on one physical YAML line — single-quoted multi-
221
+ * line splits the parser, vault#317 F1). All other quoted strings use the
222
+ * single-quoted form (cleaner output; YAML 1.2 escapes `'` by doubling).
223
+ */
224
+ function quoteString(s: string): string {
225
+ // eslint-disable-next-line no-control-regex
226
+ if (/[\n\r\t\v\f\x00-\x08\x0e-\x1f]/.test(s)) {
227
+ // Double-quoted with escape sequences. Escape backslash first so we
228
+ // don't double-escape the escapes themselves.
229
+ const escaped = s
230
+ .replace(/\\/g, "\\\\")
231
+ .replace(/"/g, "\\\"")
232
+ .replace(/\n/g, "\\n")
233
+ .replace(/\r/g, "\\r")
234
+ .replace(/\t/g, "\\t")
235
+ .replace(/\v/g, "\\v")
236
+ .replace(/\f/g, "\\f")
237
+ // eslint-disable-next-line no-control-regex
238
+ .replace(/[\x00-\x08\x0e-\x1f]/g, (ch) => `\\x${ch.charCodeAt(0).toString(16).padStart(2, "0")}`);
239
+ return `"${escaped}"`;
240
+ }
241
+ return `'${s.replace(/'/g, "''")}'`;
242
+ }
243
+
244
+ function emitScalar(value: unknown): string {
245
+ if (value === null) return "null";
246
+ if (value === undefined) return "null";
247
+ if (typeof value === "boolean") return value ? "true" : "false";
248
+ if (typeof value === "number") return Number.isFinite(value) ? String(value) : "null";
249
+ if (typeof value === "string") {
250
+ return needsQuote(value) ? quoteString(value) : value;
251
+ }
252
+ // Fallback — shouldn't happen for primitives but defensive.
253
+ return quoteString(String(value));
254
+ }
255
+
256
+ /**
257
+ * Emit an object as YAML at the given indent depth. Keys alpha-sorted for
258
+ * idempotency. Nested objects recurse; arrays use block-style for
259
+ * readability. Inline `{ ... }` is used only for objects nested in array
260
+ * items at depth >= 2 to keep the output compact.
261
+ */
262
+ function emitObject(obj: Record<string, unknown>, indent: number): string {
263
+ const keys = Object.keys(obj).sort();
264
+ if (keys.length === 0) return "{}";
265
+ const pad = " ".repeat(indent);
266
+ const lines: string[] = [];
267
+ for (const key of keys) {
268
+ lines.push(`${pad}${key}: ${emitValueInline(obj[key], indent + 1) ?? ""}`);
269
+ const block = emitValueBlock(obj[key], indent + 1);
270
+ if (block !== null) {
271
+ // Block form was used — replace the inline placeholder above with the
272
+ // block form. Done by overwriting the last pushed line with key: only.
273
+ lines[lines.length - 1] = `${pad}${key}:`;
274
+ lines.push(block);
275
+ }
276
+ }
277
+ return lines.join("\n");
278
+ }
279
+
280
+ /**
281
+ * Inline form of a value when it fits on one line; null when block form
282
+ * should be used instead (caller emits `key:` then the block on the next
283
+ * lines).
284
+ */
285
+ function emitValueInline(value: unknown, indent: number): string | null {
286
+ if (value === null || value === undefined) return "null";
287
+ if (typeof value === "boolean" || typeof value === "number" || typeof value === "string") {
288
+ return emitScalar(value);
289
+ }
290
+ if (Array.isArray(value)) {
291
+ if (value.length === 0) return "[]";
292
+ return null; // block form
293
+ }
294
+ if (typeof value === "object") {
295
+ if (Object.keys(value as object).length === 0) return "{}";
296
+ return null; // block form
297
+ }
298
+ return emitScalar(value);
299
+ }
300
+
301
+ function emitValueBlock(value: unknown, indent: number): string | null {
302
+ const pad = " ".repeat(indent);
303
+ if (Array.isArray(value) && value.length > 0) {
304
+ const lines: string[] = [];
305
+ for (const item of value) {
306
+ if (item === null || typeof item !== "object" || Array.isArray(item)) {
307
+ // Scalar / array item — single line with `- `.
308
+ lines.push(`${pad}- ${emitScalar(item)}`);
309
+ } else {
310
+ // Object item: emit keys with `- ` prefix on the first key, hanging
311
+ // indent for the rest. Alpha-sort keys for idempotency.
312
+ const keys = Object.keys(item as Record<string, unknown>).sort();
313
+ const obj = item as Record<string, unknown>;
314
+ let first = true;
315
+ for (const key of keys) {
316
+ const prefix = first ? `${pad}- ` : `${pad} `;
317
+ const inline = emitValueInline(obj[key], indent + 2);
318
+ if (inline !== null) {
319
+ lines.push(`${prefix}${key}: ${inline}`);
320
+ } else {
321
+ lines.push(`${prefix}${key}:`);
322
+ const block = emitValueBlock(obj[key], indent + 2);
323
+ if (block !== null) lines.push(block);
324
+ }
325
+ first = false;
326
+ }
327
+ }
328
+ }
329
+ return lines.join("\n");
330
+ }
331
+ if (value !== null && typeof value === "object" && !Array.isArray(value)) {
332
+ if (Object.keys(value as object).length === 0) return null;
333
+ return emitObject(value as Record<string, unknown>, indent);
334
+ }
335
+ return null;
336
+ }
337
+
338
+ /**
339
+ * Emit a complete YAML document. Used for sidecar files
340
+ * (`vault.yaml`, `schemas/<tag>.yaml`). Trailing newline included.
341
+ */
342
+ export function emitYamlDoc(obj: Record<string, unknown>): string {
343
+ return emitObject(obj, 0) + "\n";
344
+ }
345
+
346
+ // ---------------------------------------------------------------------------
347
+ // Frontmatter emitter (note-level)
348
+ // ---------------------------------------------------------------------------
349
+
350
+ /**
351
+ * Build the ordered frontmatter object for a note. Only includes keys whose
352
+ * value is non-empty (no `metadata: {}` lines, no `links: []`) so unchanged
353
+ * vaults produce minimal diffs.
354
+ */
355
+ function buildFrontmatter(note: PortableNote): Record<string, unknown> {
356
+ const fm: Record<string, unknown> = {};
357
+ fm.id = note.id;
358
+ if (note.path) fm.path = note.path;
359
+ if (note.tags && note.tags.length > 0) fm.tags = [...note.tags].sort();
360
+ if (note.metadata && Object.keys(note.metadata).length > 0) fm.metadata = note.metadata;
361
+ if (note.links && note.links.length > 0) fm.links = note.links;
362
+ if (note.attachments && note.attachments.length > 0) fm.attachments = note.attachments;
363
+ fm.created_at = note.created_at;
364
+ if (note.updated_at) fm.updated_at = note.updated_at;
365
+ return fm;
366
+ }
367
+
368
+ /**
369
+ * Render a note as portable markdown: `--- <frontmatter> --- <content>`.
370
+ * Frontmatter keys in `FRONTMATTER_KEY_ORDER`; nested objects alpha-sorted.
371
+ * Trailing newline preserved from `content` (or one is added if absent).
372
+ */
373
+ export function toPortableMarkdown(note: PortableNote): string {
374
+ const fm = buildFrontmatter(note);
375
+ let out = "---\n";
376
+ for (const key of FRONTMATTER_KEY_ORDER) {
377
+ if (!(key in fm)) continue;
378
+ const value = fm[key];
379
+ const inline = emitValueInline(value, 1);
380
+ if (inline !== null) {
381
+ out += `${key}: ${inline}\n`;
382
+ } else {
383
+ out += `${key}:\n`;
384
+ const block = emitValueBlock(value, 1);
385
+ if (block !== null) out += `${block}\n`;
386
+ }
387
+ }
388
+ out += "---\n";
389
+ // Preserve content as-is; ensure exactly one trailing newline if missing.
390
+ out += note.content;
391
+ if (!out.endsWith("\n")) out += "\n";
392
+ return out;
393
+ }
394
+
395
+ /**
396
+ * Determine the file path for an exported portable-md note. Notes with a
397
+ * `path` use it; pathless notes use `_unpathed/<id>.md` (no date-prefix
398
+ * coincidence with user content).
399
+ */
400
+ export function portableExportFilePath(note: PortableNote): string {
401
+ if (note.path) return note.path + ".md";
402
+ return `_unpathed/${note.id}.md`;
403
+ }
404
+
405
+ // ---------------------------------------------------------------------------
406
+ // Pull-from-store: build PortableNote shapes
407
+ // ---------------------------------------------------------------------------
408
+
409
+ /**
410
+ * Convert a store `Note` + its typed `Link`s + `Attachment`s into the
411
+ * PortableNote shape. Wikilinks are excluded from the `links` block —
412
+ * they're recoverable from the content text on import. Stable orderings:
413
+ * tags alpha-sorted; links sorted by `(relationship, target)`; attachments
414
+ * sorted by `id`.
415
+ */
416
+ export async function noteToPortable(
417
+ note: Note,
418
+ store: Store,
419
+ ): Promise<PortableNote> {
420
+ // Typed links only (exclude wikilink — that's the content's job).
421
+ const allLinks = await store.getLinks(note.id, { direction: "outbound" });
422
+ const typedLinks: PortableLink[] = allLinks
423
+ .filter((l) => l.relationship !== "wikilink")
424
+ .map((l) => ({
425
+ target: l.targetId,
426
+ relationship: l.relationship,
427
+ ...(l.metadata && Object.keys(l.metadata).length > 0 ? { metadata: l.metadata } : {}),
428
+ }))
429
+ .sort((a, b) =>
430
+ a.relationship.localeCompare(b.relationship) || a.target.localeCompare(b.target),
431
+ );
432
+
433
+ const atts = await store.getAttachments(note.id);
434
+ const attachments: PortableAttachmentRef[] = atts
435
+ .map((a) => ({
436
+ id: a.id,
437
+ path: a.path,
438
+ mime_type: a.mimeType,
439
+ ...(a.metadata && Object.keys(a.metadata).length > 0 ? { metadata: a.metadata } : {}),
440
+ }))
441
+ .sort((a, b) => a.id.localeCompare(b.id));
442
+
443
+ const result: PortableNote = {
444
+ id: note.id,
445
+ ...(note.path ? { path: note.path } : {}),
446
+ content: note.content,
447
+ ...(note.metadata && Object.keys(note.metadata).length > 0 ? { metadata: note.metadata } : {}),
448
+ ...(note.tags && note.tags.length > 0 ? { tags: [...note.tags].sort() } : {}),
449
+ ...(typedLinks.length > 0 ? { links: typedLinks } : {}),
450
+ ...(attachments.length > 0 ? { attachments } : {}),
451
+ created_at: note.createdAt,
452
+ ...(note.updatedAt ? { updated_at: note.updatedAt } : {}),
453
+ };
454
+ return result;
455
+ }
456
+
457
+ // ---------------------------------------------------------------------------
458
+ // Vault-level export
459
+ // ---------------------------------------------------------------------------
460
+
461
+ export interface ExportOptions {
462
+ /** Output directory. Created if missing; existing files overwritten. */
463
+ outDir: string;
464
+ /** Vault name for the sidecar's `vault.yaml`. Defaults to "default". */
465
+ vaultName?: string;
466
+ /** Vault description (free text) for the sidecar's `vault.yaml`. */
467
+ vaultDescription?: string;
468
+ /** Incremental: only export notes with `updated_at >= since`. */
469
+ since?: string;
470
+ /** Override `exported_at` timestamp (test seam — keeps re-export byte-equiv). */
471
+ exportedAt?: string;
472
+ /**
473
+ * Absolute path to the vault's assets directory (where attachment
474
+ * bytes live, one file per `attachments.path` row). When set, attachment
475
+ * binaries are copied into `<outDir>/.parachute/attachments/<id>/<basename>`
476
+ * alongside the frontmatter reference. When unset, attachment refs are
477
+ * still emitted but the binaries stay where they are — useful for
478
+ * partial exports / git-projection where only the markdown is checked
479
+ * in. The CLI computes this via `src/routes.ts:assetsDir(vault)`; core
480
+ * stays pure (no dep on server-side path resolution).
481
+ */
482
+ assetsDir?: string;
483
+ }
484
+
485
+ /**
486
+ * Export a vault to a portable-markdown directory. Writes:
487
+ * - `<outDir>/.parachute/vault.yaml`
488
+ * - `<outDir>/.parachute/schemas/<tag>.yaml` for each tag that declares
489
+ * description/fields/relationships/parent_names.
490
+ * - `<outDir>/<note.path>.md` for each note (or `_unpathed/<id>.md`).
491
+ * - `<outDir>/.parachute/attachments/<id>/<basename>` for each attachment
492
+ * (only when `opts.assetsDir` is set; the path-traversal guard rejects
493
+ * attachments whose source path escapes assetsDir and whose dest path
494
+ * would escape outDir).
495
+ *
496
+ * The frontmatter `attachments[].path` value preserves the original
497
+ * vault-internal path (relative to `assetsDir`). Import restores the
498
+ * binary to that path. The sidecar location is derived from `id` so it
499
+ * stays stable across renames + different export runs.
500
+ */
501
+ export async function exportVaultToDir(
502
+ store: Store,
503
+ opts: ExportOptions,
504
+ ): Promise<ExportStats> {
505
+ const outDir = opts.outDir;
506
+ mkdirSync(outDir, { recursive: true });
507
+ const sidecar = join(outDir, SIDECAR_DIR);
508
+ mkdirSync(sidecar, { recursive: true });
509
+ mkdirSync(join(sidecar, "schemas"), { recursive: true });
510
+ // attachments dir only when assetsDir is wired (caller opted in).
511
+ if (opts.assetsDir) {
512
+ mkdirSync(join(sidecar, "attachments"), { recursive: true });
513
+ }
514
+
515
+ // 1. vault.yaml — vault meta + export format version. Trailing
516
+ // export-time timestamp is the one place where re-exports legitimately
517
+ // produce different bytes; callers wanting byte-equiv re-export pass
518
+ // `exportedAt` explicitly (tests do).
519
+ const vaultMeta: PortableVaultMeta = {
520
+ export_format_version: EXPORT_FORMAT_VERSION,
521
+ exported_at: opts.exportedAt ?? new Date().toISOString(),
522
+ ...(opts.vaultName ? { name: opts.vaultName } : {}),
523
+ ...(opts.vaultDescription ? { description: opts.vaultDescription } : {}),
524
+ };
525
+ writeFileSync(join(sidecar, "vault.yaml"), emitYamlDoc(vaultMeta as unknown as Record<string, unknown>));
526
+
527
+ // 2. Per-tag schemas. Only tags carrying at least one schema-shaped
528
+ // field (description, fields, relationships, parent_names) get a file;
529
+ // tags that are just-a-name don't pollute the sidecar.
530
+ const tagRecords = await store.listTagRecords();
531
+ let schemasWritten = 0;
532
+ for (const tag of tagRecords) {
533
+ if (!hasSchemaContent(tag)) continue;
534
+ const filename = sanitizeTagFilename(tag.tag) + ".yaml";
535
+ const doc: Record<string, unknown> = { name: tag.tag };
536
+ if (tag.description !== undefined) doc.description = tag.description;
537
+ if (tag.fields !== undefined) doc.fields = tag.fields;
538
+ if (tag.relationships !== undefined) doc.relationships = tag.relationships;
539
+ if (tag.parent_names !== undefined && tag.parent_names.length > 0) {
540
+ doc.parent_names = tag.parent_names;
541
+ }
542
+ writeFileSync(join(sidecar, "schemas", filename), emitYamlDoc(doc));
543
+ schemasWritten++;
544
+ }
545
+
546
+ // 3. Per-note files. Iterate the full vault; if `since` is set, filter
547
+ // by updated_at >= since (incremental export).
548
+ //
549
+ // Note: in-memory bulk load. The 1M cap is a defensive ceiling — for
550
+ // very large vaults (>>100k notes) we should swap to a cursor /
551
+ // streaming query so the whole result set doesn't have to materialize
552
+ // at once. PR 2 follow-up if a real workload surfaces (vault#317 F5).
553
+ const allNotes = await store.queryNotes({ limit: 1_000_000, sort: "asc" });
554
+ const since = opts.since;
555
+ const outDirResolved = resolvePath(outDir);
556
+ const assetsDirResolved = opts.assetsDir ? resolvePath(opts.assetsDir) : undefined;
557
+ const attachmentsRoot = join(sidecar, "attachments");
558
+ const attachmentsRootResolved = resolvePath(attachmentsRoot);
559
+ let notesWritten = 0;
560
+ let attachmentsWritten = 0;
561
+ const skipped: { path: string | undefined; reason: string }[] = [];
562
+ const skippedAttachments: { note_id: string; attachment_id: string; path: string; reason: string }[] = [];
563
+ for (const note of allNotes) {
564
+ if (since && !shouldIncludeForSince(note, since)) continue;
565
+ const portable = await noteToPortable(note, store);
566
+ const relPath = portableExportFilePath(portable);
567
+ const fullPath = join(outDir, relPath);
568
+ // vault#317 F3 — path-traversal guard. A note with
569
+ // `path: "../../.ssh/authorized_keys"` would otherwise write outside
570
+ // outDir. Refuse the write and surface the offending note's path so
571
+ // the operator can fix the note (self-inflicted at vault level —
572
+ // user owns the data — but programmatic callers might not control
573
+ // the note path source, e.g. ingest from external systems).
574
+ const fullPathResolved = resolvePath(fullPath);
575
+ if (!isWithinDir(fullPathResolved, outDirResolved)) {
576
+ skipped.push({
577
+ path: portable.path,
578
+ reason: `path-traversal: resolved write target "${fullPathResolved}" escapes export root "${outDirResolved}"`,
579
+ });
580
+ continue;
581
+ }
582
+ mkdirSync(dirname(fullPath), { recursive: true });
583
+ writeFileSync(fullPath, toPortableMarkdown(portable));
584
+ notesWritten++;
585
+
586
+ // Copy attachment binaries when assetsDir is wired. Each attachment
587
+ // is path-traversal-guarded on both ends: source under assetsDir,
588
+ // dest under outDir's sidecar attachments root. Missing source files
589
+ // are skipped (warn) rather than aborting — assetsDir state may
590
+ // legitimately lag the DB (e.g. file evicted while row persists).
591
+ if (assetsDirResolved && portable.attachments && portable.attachments.length > 0) {
592
+ for (const att of portable.attachments) {
593
+ const srcPath = join(assetsDirResolved, att.path);
594
+ const srcResolved = resolvePath(srcPath);
595
+ if (!isWithinDir(srcResolved, assetsDirResolved)) {
596
+ skippedAttachments.push({
597
+ note_id: portable.id,
598
+ attachment_id: att.id,
599
+ path: att.path,
600
+ reason: `path-traversal: source "${srcResolved}" escapes assetsDir "${assetsDirResolved}"`,
601
+ });
602
+ continue;
603
+ }
604
+ if (!existsSync(srcResolved)) {
605
+ skippedAttachments.push({
606
+ note_id: portable.id,
607
+ attachment_id: att.id,
608
+ path: att.path,
609
+ reason: `source file missing at "${srcResolved}"`,
610
+ });
611
+ continue;
612
+ }
613
+ // Dest: .parachute/attachments/<att-id>/<basename(att.path)>.
614
+ // Using att.id as the directory name keeps multiple attachments
615
+ // with the same basename from colliding; basename keeps the
616
+ // filename human-readable.
617
+ const destDir = join(attachmentsRoot, att.id);
618
+ const destFile = join(destDir, basename(att.path));
619
+ const destResolved = resolvePath(destFile);
620
+ if (!isWithinDir(destResolved, attachmentsRootResolved)) {
621
+ skippedAttachments.push({
622
+ note_id: portable.id,
623
+ attachment_id: att.id,
624
+ path: att.path,
625
+ reason: `path-traversal: dest "${destResolved}" escapes attachments root "${attachmentsRootResolved}"`,
626
+ });
627
+ continue;
628
+ }
629
+ mkdirSync(destDir, { recursive: true });
630
+ copyFileSync(srcResolved, destResolved);
631
+ attachmentsWritten++;
632
+ }
633
+ }
634
+ }
635
+ if (skipped.length > 0) {
636
+ // Surface to the caller without aborting — partial export is more
637
+ // useful than no export. CLI prints the list; programmatic callers
638
+ // can inspect via the return value.
639
+ for (const s of skipped) {
640
+ // eslint-disable-next-line no-console
641
+ console.warn(`[export] skipped note (path="${s.path ?? "<unpathed>"}"): ${s.reason}`);
642
+ }
643
+ }
644
+ if (skippedAttachments.length > 0) {
645
+ for (const s of skippedAttachments) {
646
+ // eslint-disable-next-line no-console
647
+ console.warn(`[export] skipped attachment (note=${s.note_id}, path="${s.path}"): ${s.reason}`);
648
+ }
649
+ }
650
+
651
+ return {
652
+ notes: notesWritten,
653
+ schemas: schemasWritten,
654
+ attachments: attachmentsWritten,
655
+ filtered_by_since: since !== undefined,
656
+ skipped_traversal: skipped.length,
657
+ skipped_notes: skipped,
658
+ skipped_attachments: skippedAttachments,
659
+ };
660
+ }
661
+
662
+ function hasSchemaContent(tag: TagRecord): boolean {
663
+ if (tag.description !== undefined && tag.description.length > 0) return true;
664
+ if (tag.fields && Object.keys(tag.fields).length > 0) return true;
665
+ if (tag.relationships && Object.keys(tag.relationships).length > 0) return true;
666
+ if (tag.parent_names && tag.parent_names.length > 0) return true;
667
+ return false;
668
+ }
669
+
670
+ /**
671
+ * Tag names may contain `/` (sub-tag hierarchy). Replace with `__` for the
672
+ * filename so the sidecar stays flat: `.parachute/schemas/<safe>.yaml`.
673
+ * Round-trip on import recovers the `/` form from the `name:` key inside
674
+ * the file, not from the filename.
675
+ */
676
+ function sanitizeTagFilename(tag: string): string {
677
+ return tag.replace(/[/\\]/g, "__");
678
+ }
679
+
680
+ /**
681
+ * Path-traversal guard (vault#317 F3). Returns true iff `candidate` is
682
+ * exactly `root` or sits beneath it. Both inputs must be **already
683
+ * resolved** (no `..` segments). Uses a trailing-separator check rather
684
+ * than a bare `startsWith` so `outDir/foo` doesn't satisfy a containment
685
+ * check for outDir `outDi` (substring-match false-positive).
686
+ */
687
+ function isWithinDir(candidate: string, root: string): boolean {
688
+ if (candidate === root) return true;
689
+ return candidate.startsWith(root + pathSep);
690
+ }
691
+
692
+ function shouldIncludeForSince(note: Note, since: string): boolean {
693
+ const stamp = note.updatedAt ?? note.createdAt;
694
+ return stamp >= since;
695
+ }
696
+
697
+ // ---------------------------------------------------------------------------
698
+ // Vault-level import — read a portable-md directory back into a vault
699
+ // ---------------------------------------------------------------------------
700
+
701
+ export interface ImportOptions {
702
+ /** Source directory (an `exportVaultToDir` output). */
703
+ inDir: string;
704
+ /**
705
+ * Wipe the vault first (DELETE FROM notes; DELETE FROM tags;
706
+ * cascade-deletes flow through note_tags/links/attachments). The
707
+ * disaster-recovery path; the CLI gates this behind an
708
+ * explicit confirm prompt. Default `false` — upsert-by-id semantics
709
+ * (existing notes updated, new ones created).
710
+ */
711
+ blowAway?: boolean;
712
+ /**
713
+ * When set, attachment binaries are restored from
714
+ * `<inDir>/.parachute/attachments/<id>/<basename>` to
715
+ * `<assetsDir>/<frontmatter-path>`. When unset, the DB rows are
716
+ * created but the binaries are left untouched (handy when assetsDir
717
+ * isn't relevant or attachments weren't exported).
718
+ */
719
+ assetsDir?: string;
720
+ /**
721
+ * Dry run — parse the export, surface what would happen, but don't
722
+ * write anything to the store or filesystem. The returned stats still
723
+ * count "would-create" / "would-update" so the operator can audit.
724
+ */
725
+ dryRun?: boolean;
726
+ }
727
+
728
+ export interface ImportStats {
729
+ notes_created: number;
730
+ notes_updated: number;
731
+ schemas_restored: number;
732
+ links_restored: number;
733
+ attachments_restored: number;
734
+ /** Per-skipped-link detail: target note ID missing post-import.
735
+ * Common when a forward-ref points at a note that wasn't exported. */
736
+ skipped_links: Array<{ source_id: string; target_id: string; relationship: string; reason: string }>;
737
+ /** Per-skipped-attachment detail. */
738
+ skipped_attachments: Array<{ note_id: string; attachment_id: string; reason: string }>;
739
+ /** Set when the caller passed `blowAway: true`; counts notes removed. */
740
+ notes_wiped: number;
741
+ }
742
+
743
+ /**
744
+ * Read a portable-md export directory back into a vault. Lossless
745
+ * counterpart to `exportVaultToDir`. With `blowAway: true`, replaces
746
+ * vault state byte-equivalent to the export (the disaster-recovery
747
+ * path). Without it, upserts by frontmatter `id` — existing notes
748
+ * updated in place, new notes created.
749
+ *
750
+ * Restoration order (matters for forward refs):
751
+ * 1. Tag schemas (so notes with schema-bearing tags validate cleanly).
752
+ * 2. Notes — content/path/metadata/tags. Use `restoreNoteTimestamps`
753
+ * after create so created_at AND updated_at land at their exported
754
+ * values (regular createNote sets updated_at = created_at).
755
+ * 3. Typed links — only now that all target notes exist (forward-ref
756
+ * pattern). Wikilinks rebuild themselves from `[[brackets]]` in
757
+ * content via the existing `syncAllWikilinks` pass.
758
+ * 4. Attachments — DB row first, then file copy from sidecar to
759
+ * `<assetsDir>/<path>` when `assetsDir` is wired.
760
+ *
761
+ * See vault#308 PR 2.
762
+ */
763
+ export async function importPortableVault(
764
+ store: Store,
765
+ opts: ImportOptions,
766
+ ): Promise<ImportStats> {
767
+ const inDir = opts.inDir;
768
+ const inDirResolved = resolvePath(inDir);
769
+ const sidecar = join(inDir, SIDECAR_DIR);
770
+ if (!existsSync(join(sidecar, "vault.yaml"))) {
771
+ throw new Error(
772
+ `not a portable-md export: missing ${join(SIDECAR_DIR, "vault.yaml")} in "${inDir}". ` +
773
+ `If this is a legacy Obsidian-shape directory, use the obsidian.ts \`parseObsidianVault\` ` +
774
+ `path instead — vault#308 importer only handles the portable-md format.`,
775
+ );
776
+ }
777
+
778
+ const stats: ImportStats = {
779
+ notes_created: 0,
780
+ notes_updated: 0,
781
+ schemas_restored: 0,
782
+ links_restored: 0,
783
+ attachments_restored: 0,
784
+ skipped_links: [],
785
+ skipped_attachments: [],
786
+ notes_wiped: 0,
787
+ };
788
+
789
+ // 1. Optional wipe. Notes are deleted via the public Store API so
790
+ // hooks fire (callers depend on `attachment.deleted` hooks for
791
+ // assets-dir cleanup; we don't bypass that on blow-away).
792
+ if (opts.blowAway && !opts.dryRun) {
793
+ const existing = await store.queryNotes({ limit: 1_000_000 });
794
+ for (const note of existing) {
795
+ await store.deleteNote(note.id);
796
+ }
797
+ stats.notes_wiped = existing.length;
798
+ // Clear tag rows too — `deleteNote` clears note_tags via FK cascade
799
+ // but leaves the `tags` table rows in place (orphaned schemas).
800
+ const tagRecords = await store.listTagRecords();
801
+ for (const tag of tagRecords) {
802
+ await store.deleteTag(tag.tag);
803
+ }
804
+ }
805
+
806
+ // 2. Tag schemas — restore before notes so any tag a note carries can
807
+ // validate against its schema on insert.
808
+ const schemasDir = join(sidecar, "schemas");
809
+ if (existsSync(schemasDir)) {
810
+ for (const entry of readdirSync(schemasDir)) {
811
+ if (!entry.endsWith(".yaml")) continue;
812
+ const fullPath = join(schemasDir, entry);
813
+ // Path-traversal guard on the read side: refuse to follow a
814
+ // symlink out of the sidecar (the readdirSync already only
815
+ // surfaces names; this is belt-and-suspenders).
816
+ const resolved = resolvePath(fullPath);
817
+ if (!isWithinDir(resolved, resolvePath(schemasDir))) continue;
818
+ const text = readFileSync(fullPath, "utf-8");
819
+ // Reuse the frontmatter parser by wrapping the doc in `---`s.
820
+ // The schema file is a YAML doc (no `---` markers); pad with them
821
+ // so `parseFrontmatter` can chew on it via the same code path.
822
+ const wrapped = `---\n${text}${text.endsWith("\n") ? "" : "\n"}---\n`;
823
+ const { frontmatter } = parseFrontmatter(wrapped);
824
+ const tagName = typeof frontmatter.name === "string" ? frontmatter.name : null;
825
+ if (!tagName) continue;
826
+ if (opts.dryRun) {
827
+ stats.schemas_restored++;
828
+ continue;
829
+ }
830
+ await store.upsertTagRecord(tagName, {
831
+ description: (frontmatter.description as string | null | undefined) ?? null,
832
+ fields: (frontmatter.fields as Record<string, unknown> | null | undefined) as any ?? null,
833
+ relationships: (frontmatter.relationships as Record<string, unknown> | null | undefined) as any ?? null,
834
+ parent_names: (frontmatter.parent_names as string[] | null | undefined) ?? null,
835
+ });
836
+ stats.schemas_restored++;
837
+ }
838
+ }
839
+
840
+ // 3. Notes. Walk every .md file under inDir (dot-dirs already
841
+ // excluded), parse, upsert.
842
+ // Track per-import (id → portable) so we can replay typed links
843
+ // after all notes exist.
844
+ const seenNotes = new Map<string, PortableNote>();
845
+ for (const filePath of walkMarkdownFiles(inDir)) {
846
+ // Containment check — readdirSync should already be safe, but
847
+ // verify the resolved path is inside inDir (symlinks).
848
+ const resolved = resolvePath(filePath);
849
+ if (!isWithinDir(resolved, inDirResolved)) continue;
850
+
851
+ const raw = readFileSync(filePath, "utf-8");
852
+ const { frontmatter, content } = parseFrontmatter(raw);
853
+
854
+ const id = typeof frontmatter.id === "string" ? frontmatter.id : null;
855
+ if (!id) {
856
+ // No `id` → legacy obsidian-style note. Skip with a warning; the
857
+ // importer is for the portable-md format, the legacy path stays
858
+ // on the obsidian.ts parseObsidianVault flow.
859
+ // eslint-disable-next-line no-console
860
+ console.warn(`[import] skipped "${filePath}": no \`id\` in frontmatter (legacy obsidian format — use parseObsidianVault)`);
861
+ continue;
862
+ }
863
+ const created_at = typeof frontmatter.created_at === "string" ? frontmatter.created_at : new Date().toISOString();
864
+ const updated_at = typeof frontmatter.updated_at === "string" ? frontmatter.updated_at : created_at;
865
+ const path = typeof frontmatter.path === "string" ? frontmatter.path : undefined;
866
+ const tags = Array.isArray(frontmatter.tags) ? frontmatter.tags.filter((t): t is string => typeof t === "string") : undefined;
867
+ const metadata = (frontmatter.metadata && typeof frontmatter.metadata === "object" && !Array.isArray(frontmatter.metadata))
868
+ ? frontmatter.metadata as Record<string, unknown>
869
+ : undefined;
870
+ const links = Array.isArray(frontmatter.links) ? frontmatter.links : undefined;
871
+ const attachments = Array.isArray(frontmatter.attachments) ? frontmatter.attachments : undefined;
872
+
873
+ const portable: PortableNote = {
874
+ id,
875
+ content,
876
+ created_at,
877
+ updated_at,
878
+ ...(path ? { path } : {}),
879
+ ...(tags && tags.length > 0 ? { tags } : {}),
880
+ ...(metadata ? { metadata } : {}),
881
+ ...(links ? { links: links as PortableLink[] } : {}),
882
+ ...(attachments ? { attachments: attachments as PortableAttachmentRef[] } : {}),
883
+ };
884
+ seenNotes.set(id, portable);
885
+
886
+ if (opts.dryRun) {
887
+ const existing = await store.getNote(id);
888
+ if (existing) stats.notes_updated++; else stats.notes_created++;
889
+ continue;
890
+ }
891
+
892
+ // Upsert by id. createNote will throw on duplicate id; check first.
893
+ const existing = await store.getNote(id);
894
+ if (existing) {
895
+ // **Upsert merge policy** (vault#319 F2 — pinned here so future
896
+ // edits don't drift):
897
+ //
898
+ // - `content`: ALWAYS replaced from the import. (Required —
899
+ // the import always has content, even if empty
900
+ // string, and that's the unambiguous source of
901
+ // truth on a non-blow-away upsert.)
902
+ // - `tags`: REPLACED WHOLESALE — existing tags removed,
903
+ // imported set applied. The export is the source
904
+ // of truth for the current tag set.
905
+ // - `path`: REPLACED if the frontmatter declares one;
906
+ // otherwise the existing vault path is preserved.
907
+ // This is upsert-by-field, NOT replace-by-id: a
908
+ // note that lost its path before export keeps the
909
+ // vault's existing path on a non-blow-away
910
+ // import.
911
+ // - `metadata`: REPLACED if the frontmatter declares one;
912
+ // otherwise existing metadata is preserved. Same
913
+ // upsert-by-field asymmetry as `path`.
914
+ //
915
+ // For a strict replace-by-id ("the vault should look exactly like
916
+ // the export, no surviving fields"), use `--blow-away`. The
917
+ // wipe-first-replay-from-export path drops every row and rebuilds,
918
+ // so absent fields can't survive.
919
+ //
920
+ // Store-level updateNote has no `if_updated_at` set → always
921
+ // succeeds (precondition gate lives at the HTTP/MCP layer; the
922
+ // Store accepts unconditional writes from importer/internal
923
+ // callers).
924
+ await store.updateNote(id, {
925
+ content,
926
+ ...(path !== undefined ? { path } : {}),
927
+ ...(metadata ? { metadata } : {}),
928
+ });
929
+ // Tags: delete existing, re-tag with imported set.
930
+ if (existing.tags && existing.tags.length > 0) {
931
+ await store.untagNote(id, existing.tags);
932
+ }
933
+ if (tags && tags.length > 0) {
934
+ await store.tagNote(id, tags);
935
+ }
936
+ stats.notes_updated++;
937
+ } else {
938
+ await store.createNote(content, {
939
+ id,
940
+ ...(path ? { path } : {}),
941
+ ...(tags && tags.length > 0 ? { tags } : {}),
942
+ ...(metadata ? { metadata } : {}),
943
+ created_at,
944
+ });
945
+ stats.notes_created++;
946
+ }
947
+ // Restore both timestamps explicitly. Two reasons:
948
+ // 1. createNote sets updated_at = created_at; we want the
949
+ // exported updated_at (may differ if the note was edited).
950
+ // 2. update path bumped updated_at to now(); we want to peg it
951
+ // back to the exported value.
952
+ await store.restoreNoteTimestamps(id, created_at, updated_at);
953
+ }
954
+
955
+ // 4. Typed links — replay only now that all notes exist. Wikilinks
956
+ // (which the exporter excludes from `links:`) rebuild from
957
+ // content brackets via syncAllWikilinks (a callable Store method).
958
+ for (const [sourceId, portable] of seenNotes) {
959
+ if (!portable.links) continue;
960
+ for (const link of portable.links) {
961
+ // Confirm target exists. Forward refs to notes the export
962
+ // didn't include (subset export) are skipped with a warning
963
+ // rather than aborting.
964
+ const target = await store.getNote(link.target);
965
+ if (!target) {
966
+ stats.skipped_links.push({
967
+ source_id: sourceId,
968
+ target_id: link.target,
969
+ relationship: link.relationship,
970
+ reason: `target note ${link.target} not present in vault after import`,
971
+ });
972
+ continue;
973
+ }
974
+ if (opts.dryRun) {
975
+ stats.links_restored++;
976
+ continue;
977
+ }
978
+ await store.createLink(sourceId, link.target, link.relationship, link.metadata);
979
+ stats.links_restored++;
980
+ }
981
+ }
982
+
983
+ // 5. Attachments — DB row then file copy (when assetsDir wired).
984
+ // Skip the file-copy phase when assetsDir isn't set; the DB row still
985
+ // restores so callers operating without an assetsDir keep parity.
986
+ const assetsDirResolved = opts.assetsDir ? resolvePath(opts.assetsDir) : undefined;
987
+ const attachmentsRootResolved = resolvePath(join(sidecar, "attachments"));
988
+ for (const [noteId, portable] of seenNotes) {
989
+ if (!portable.attachments) continue;
990
+ for (const att of portable.attachments) {
991
+ if (opts.dryRun) {
992
+ stats.attachments_restored++;
993
+ continue;
994
+ }
995
+ // DB row first so the path column matches the export. The store's
996
+ // generateId() would mint a fresh id; we need to preserve the
997
+ // exported att.id so downstream refs (note frontmatter, other
998
+ // tools) stay stable. Use a direct route — see comment.
999
+ //
1000
+ // The public addAttachment generates a fresh id. To preserve the
1001
+ // exported id we need a low-level path; there isn't one in the
1002
+ // Store interface today. Workaround: addAttachment, then update
1003
+ // the row's id via the DB if the Store is a SqliteStore.
1004
+ // TODO: surface a `restoreAttachment(id, noteId, path, mimeType, metadata, createdAt)`
1005
+ // import-only method on the Store interface (parallel to
1006
+ // restoreNoteTimestamps). For now, attachment ids are
1007
+ // re-minted on import — this is a known PR-2-scope limitation
1008
+ // documented in CHANGELOG. Frontmatter refs still resolve by
1009
+ // (note_id, path) tuple on a round-trip; only the att.id values
1010
+ // change. Mark in skipped_attachments? No — the data is there.
1011
+ //
1012
+ // The exporter writes `attachments` keyed by exported att.id;
1013
+ // a round-trip where ids change will produce a byte-different
1014
+ // export (different att.id values). PR 2 round-trip test
1015
+ // therefore can't claim byte-equivalent attachment ids in the
1016
+ // first version — call this out in CHANGELOG.
1017
+ const attachment = await store.addAttachment(
1018
+ noteId,
1019
+ att.path,
1020
+ att.mime_type,
1021
+ att.metadata,
1022
+ );
1023
+
1024
+ // File copy: from sidecar to assetsDir.
1025
+ if (assetsDirResolved) {
1026
+ // Source: .parachute/attachments/<exported-att-id>/<basename>.
1027
+ // Use the EXPORTED id from the frontmatter — that's what the
1028
+ // exporter wrote, even though our newly-created DB row has a
1029
+ // fresh `attachment.id`.
1030
+ const srcFile = join(sidecar, "attachments", att.id, basename(att.path));
1031
+ const srcResolved = resolvePath(srcFile);
1032
+ if (!isWithinDir(srcResolved, attachmentsRootResolved)) {
1033
+ stats.skipped_attachments.push({
1034
+ note_id: noteId,
1035
+ attachment_id: attachment.id,
1036
+ reason: `path-traversal on source: "${srcResolved}" escapes attachments root`,
1037
+ });
1038
+ continue;
1039
+ }
1040
+ if (!existsSync(srcResolved)) {
1041
+ stats.skipped_attachments.push({
1042
+ note_id: noteId,
1043
+ attachment_id: attachment.id,
1044
+ reason: `source attachment file missing at "${srcResolved}"`,
1045
+ });
1046
+ continue;
1047
+ }
1048
+ const destFile = join(assetsDirResolved, att.path);
1049
+ const destResolved = resolvePath(destFile);
1050
+ if (!isWithinDir(destResolved, assetsDirResolved)) {
1051
+ stats.skipped_attachments.push({
1052
+ note_id: noteId,
1053
+ attachment_id: attachment.id,
1054
+ reason: `path-traversal on dest: "${destResolved}" escapes assetsDir`,
1055
+ });
1056
+ continue;
1057
+ }
1058
+ mkdirSync(dirname(destResolved), { recursive: true });
1059
+ copyFileSync(srcResolved, destResolved);
1060
+ }
1061
+ stats.attachments_restored++;
1062
+ }
1063
+ }
1064
+
1065
+ // 6. Sync wikilinks across the imported set so `[[brackets]]` in
1066
+ // content rebuild link rows for the imported notes.
1067
+ if (!opts.dryRun) {
1068
+ await store.syncAllWikilinks();
1069
+ }
1070
+
1071
+ return stats;
1072
+ }
1073
+
1074
+ // ---------------------------------------------------------------------------
1075
+ // Parser — shared with `obsidian.ts` (legacy back-compat) via re-export
1076
+ // ---------------------------------------------------------------------------
1077
+
1078
+ /**
1079
+ * Parse YAML frontmatter from markdown content. Returns
1080
+ * { frontmatter, content } where content has frontmatter stripped.
1081
+ *
1082
+ * Hand-rolled parser — no YAML library dep. Handles the subset of YAML
1083
+ * the emitter produces plus the legacy obsidian shapes the importer has
1084
+ * to accept: bare strings, single-quoted strings, booleans, integers,
1085
+ * floats, inline arrays `[a, b]`, block arrays, block objects, and the
1086
+ * `key: { inline }` form.
1087
+ *
1088
+ * This is the canonical parser for the portable-md format. The legacy
1089
+ * `parseFrontmatter` in `obsidian.ts` delegates here.
1090
+ */
1091
+ export function parseFrontmatter(raw: string): {
1092
+ frontmatter: Record<string, unknown>;
1093
+ content: string;
1094
+ } {
1095
+ if (!raw.startsWith("---")) return { frontmatter: {}, content: raw };
1096
+ const endIdx = raw.indexOf("\n---", 3);
1097
+ if (endIdx === -1) return { frontmatter: {}, content: raw };
1098
+ const yamlBlock = raw.slice(4, endIdx); // skip opening "---\n"
1099
+ const content = raw.slice(endIdx + 4).replace(/^\n/, "");
1100
+ return { frontmatter: parseBlock(yamlBlock, 0).value, content };
1101
+ }
1102
+
1103
+ interface ParseResult {
1104
+ value: Record<string, unknown>;
1105
+ consumed: number; // number of lines consumed (for nested blocks)
1106
+ }
1107
+
1108
+ /**
1109
+ * Parse a YAML block at a given indent depth. Returns the parsed object
1110
+ * plus the line count consumed (so callers can advance past nested blocks).
1111
+ *
1112
+ * The parser handles the shapes the emitter produces. Unknown shapes
1113
+ * (anchors, references, multi-document streams, multi-line strings) are
1114
+ * not supported — out of scope for the export format.
1115
+ */
1116
+ function parseBlock(text: string, baseIndent: number): ParseResult {
1117
+ const lines = text.split("\n");
1118
+ const result: Record<string, unknown> = {};
1119
+ let i = 0;
1120
+ while (i < lines.length) {
1121
+ const line = lines[i]!;
1122
+ if (line.trim() === "") { i++; continue; }
1123
+ const indent = countLeadingSpaces(line);
1124
+ if (indent < baseIndent) break;
1125
+ if (indent > baseIndent) { i++; continue; } // shouldn't happen at this level
1126
+
1127
+ const kv = line.slice(baseIndent).match(/^([\w][\w-]*):\s*(.*)$/);
1128
+ if (!kv) { i++; continue; }
1129
+ const key = kv[1]!;
1130
+ const valueText = kv[2]!.trim();
1131
+
1132
+ if (valueText === "") {
1133
+ // Block-form value follows. Could be either an object or an array.
1134
+ // Determine by peeking at the next non-blank line's first
1135
+ // non-whitespace character — `-` ⇒ array, otherwise object.
1136
+ const peekIdx = peekNextContent(lines, i + 1);
1137
+ if (peekIdx === -1) {
1138
+ result[key] = "";
1139
+ i++;
1140
+ continue;
1141
+ }
1142
+ const peekLine = lines[peekIdx]!;
1143
+ const peekIndent = countLeadingSpaces(peekLine);
1144
+ if (peekIndent <= baseIndent) {
1145
+ // No nested content — empty value.
1146
+ result[key] = "";
1147
+ i++;
1148
+ continue;
1149
+ }
1150
+ if (peekLine.slice(peekIndent).startsWith("- ")) {
1151
+ const { value, consumed } = parseArrayBlock(lines, i + 1, peekIndent);
1152
+ result[key] = value;
1153
+ i = i + 1 + consumed;
1154
+ } else {
1155
+ const block = lines.slice(i + 1).join("\n");
1156
+ const { value, consumed } = parseBlock(block, peekIndent);
1157
+ result[key] = value;
1158
+ i = i + 1 + consumed;
1159
+ }
1160
+ } else {
1161
+ result[key] = parseScalarOrInline(valueText);
1162
+ i++;
1163
+ }
1164
+ }
1165
+ return { value: result, consumed: i };
1166
+ }
1167
+
1168
+ function peekNextContent(lines: string[], from: number): number {
1169
+ for (let i = from; i < lines.length; i++) {
1170
+ if (lines[i]!.trim() !== "") return i;
1171
+ }
1172
+ return -1;
1173
+ }
1174
+
1175
+ function countLeadingSpaces(s: string): number {
1176
+ let n = 0;
1177
+ while (n < s.length && s[n] === " ") n++;
1178
+ return n;
1179
+ }
1180
+
1181
+ /**
1182
+ * Parse an array block: lines starting with `- ` at `arrayIndent`.
1183
+ * Returns the array + lines consumed.
1184
+ *
1185
+ * Each `- ` introduces an item:
1186
+ * - `- value` → scalar item.
1187
+ * - `- key: value` (with possible following indented keys) → object item.
1188
+ */
1189
+ function parseArrayBlock(lines: string[], start: number, arrayIndent: number): { value: unknown[]; consumed: number } {
1190
+ const result: unknown[] = [];
1191
+ let i = start;
1192
+ while (i < lines.length) {
1193
+ const line = lines[i]!;
1194
+ if (line.trim() === "") { i++; continue; }
1195
+ const indent = countLeadingSpaces(line);
1196
+ if (indent < arrayIndent) break;
1197
+ if (indent > arrayIndent) { i++; continue; }
1198
+ if (!line.slice(indent).startsWith("- ")) break;
1199
+
1200
+ // First content after `- `.
1201
+ const after = line.slice(indent + 2).trim();
1202
+ // Is this a scalar item (`- foo`) or an object item (`- key: value`)?
1203
+ const objMatch = after.match(/^([\w][\w-]*):\s*(.*)$/);
1204
+ if (!objMatch) {
1205
+ result.push(parseScalarOrInline(after));
1206
+ i++;
1207
+ continue;
1208
+ }
1209
+
1210
+ // Object item. Build a fake block: the first line becomes a key at
1211
+ // indent+2, and subsequent lines at indent+2 are siblings. We
1212
+ // synthesize a normalized block string and recurse.
1213
+ const itemIndent = indent + 2;
1214
+ const itemKey = objMatch[1]!;
1215
+ const itemValue = objMatch[2]!.trim();
1216
+ const itemObj: Record<string, unknown> = {};
1217
+ if (itemValue === "") {
1218
+ // First key's value is a nested block — look at next line.
1219
+ const peekIdx = peekNextContent(lines, i + 1);
1220
+ if (peekIdx !== -1) {
1221
+ const peekLine = lines[peekIdx]!;
1222
+ const peekIndent = countLeadingSpaces(peekLine);
1223
+ if (peekIndent > itemIndent) {
1224
+ // Nested block under this first key.
1225
+ if (peekLine.slice(peekIndent).startsWith("- ")) {
1226
+ const { value, consumed } = parseArrayBlock(lines, i + 1, peekIndent);
1227
+ itemObj[itemKey] = value;
1228
+ i = i + 1 + consumed;
1229
+ } else {
1230
+ const block = lines.slice(i + 1).join("\n");
1231
+ const { value, consumed } = parseBlock(block, peekIndent);
1232
+ itemObj[itemKey] = value;
1233
+ i = i + 1 + consumed;
1234
+ }
1235
+ } else {
1236
+ itemObj[itemKey] = "";
1237
+ i++;
1238
+ }
1239
+ } else {
1240
+ itemObj[itemKey] = "";
1241
+ i++;
1242
+ }
1243
+ } else {
1244
+ itemObj[itemKey] = parseScalarOrInline(itemValue);
1245
+ i++;
1246
+ }
1247
+
1248
+ // Continue consuming sibling keys at itemIndent that aren't `- `-prefixed.
1249
+ while (i < lines.length) {
1250
+ const sib = lines[i]!;
1251
+ if (sib.trim() === "") { i++; continue; }
1252
+ const sibIndent = countLeadingSpaces(sib);
1253
+ if (sibIndent !== itemIndent) break;
1254
+ if (sib.slice(sibIndent).startsWith("- ")) break;
1255
+ const sibKv = sib.slice(sibIndent).match(/^([\w][\w-]*):\s*(.*)$/);
1256
+ if (!sibKv) break;
1257
+ const sibKey = sibKv[1]!;
1258
+ const sibValue = sibKv[2]!.trim();
1259
+ if (sibValue === "") {
1260
+ const peekIdx = peekNextContent(lines, i + 1);
1261
+ if (peekIdx !== -1) {
1262
+ const peekLine = lines[peekIdx]!;
1263
+ const peekIndent = countLeadingSpaces(peekLine);
1264
+ if (peekIndent > itemIndent) {
1265
+ if (peekLine.slice(peekIndent).startsWith("- ")) {
1266
+ const { value, consumed } = parseArrayBlock(lines, i + 1, peekIndent);
1267
+ itemObj[sibKey] = value;
1268
+ i = i + 1 + consumed;
1269
+ } else {
1270
+ const block = lines.slice(i + 1).join("\n");
1271
+ const { value, consumed } = parseBlock(block, peekIndent);
1272
+ itemObj[sibKey] = value;
1273
+ i = i + 1 + consumed;
1274
+ }
1275
+ continue;
1276
+ }
1277
+ }
1278
+ itemObj[sibKey] = "";
1279
+ i++;
1280
+ } else {
1281
+ itemObj[sibKey] = parseScalarOrInline(sibValue);
1282
+ i++;
1283
+ }
1284
+ }
1285
+
1286
+ result.push(itemObj);
1287
+ }
1288
+ return { value: result, consumed: i - start };
1289
+ }
1290
+
1291
+ /**
1292
+ * Parse a scalar or inline form (`[a, b]`, `{ k: v }`). Used for the
1293
+ * value portion of `key: value` lines.
1294
+ */
1295
+ function parseScalarOrInline(s: string): unknown {
1296
+ if (s.startsWith("[") && s.endsWith("]")) {
1297
+ const inner = s.slice(1, -1).trim();
1298
+ if (inner === "") return [];
1299
+ return inner.split(",").map((part) => parseScalarOrInline(part.trim()));
1300
+ }
1301
+ if (s.startsWith("{") && s.endsWith("}")) {
1302
+ const inner = s.slice(1, -1).trim();
1303
+ if (inner === "") return {};
1304
+ const out: Record<string, unknown> = {};
1305
+ // Simple split on `, ` — sufficient for our emitter's shape (no
1306
+ // nested commas at this level since nested objects use block form).
1307
+ for (const part of inner.split(",")) {
1308
+ const m = part.trim().match(/^([\w][\w-]*):\s*(.*)$/);
1309
+ if (m) out[m[1]!] = parseScalarOrInline(m[2]!.trim());
1310
+ }
1311
+ return out;
1312
+ }
1313
+ return unquote(s);
1314
+ }
1315
+
1316
+ function unquote(s: string): unknown {
1317
+ if (s.startsWith("'") && s.endsWith("'")) {
1318
+ return s.slice(1, -1).replace(/''/g, "'");
1319
+ }
1320
+ if (s.startsWith('"') && s.endsWith('"')) {
1321
+ // Double-quoted form with escape sequences — the shape the emitter
1322
+ // produces for strings containing newlines / control characters
1323
+ // (vault#317 F1). Decode the escapes the emitter emits:
1324
+ // `\\` `\"` `\n` `\r` `\t` `\v` `\f` `\xNN`. TODO: YAML 1.2 defines
1325
+ // additional escapes (`\0` `\a` `\e` `\N` `\_` `\L` `\P` `\u<4hex>`
1326
+ // `\U<8hex>`) — the emitter never produces them, but legacy
1327
+ // vault.yaml / schema files might. Add when a real case lands.
1328
+ const body = s.slice(1, -1);
1329
+ let out = "";
1330
+ let i = 0;
1331
+ while (i < body.length) {
1332
+ const ch = body[i]!;
1333
+ if (ch === "\\" && i + 1 < body.length) {
1334
+ const next = body[i + 1]!;
1335
+ switch (next) {
1336
+ case "\\": out += "\\"; i += 2; continue;
1337
+ case "\"": out += "\""; i += 2; continue;
1338
+ case "n": out += "\n"; i += 2; continue;
1339
+ case "r": out += "\r"; i += 2; continue;
1340
+ case "t": out += "\t"; i += 2; continue;
1341
+ case "v": out += "\v"; i += 2; continue;
1342
+ case "f": out += "\f"; i += 2; continue;
1343
+ case "x": {
1344
+ const hex = body.slice(i + 2, i + 4);
1345
+ if (/^[0-9a-fA-F]{2}$/.test(hex)) {
1346
+ out += String.fromCharCode(parseInt(hex, 16));
1347
+ i += 4;
1348
+ continue;
1349
+ }
1350
+ // Malformed escape — fall through, treat as literal.
1351
+ out += ch;
1352
+ i += 1;
1353
+ continue;
1354
+ }
1355
+ default:
1356
+ // Unknown escape — preserve literally; future-proof against
1357
+ // additional YAML escapes we don't yet decode.
1358
+ out += ch + next;
1359
+ i += 2;
1360
+ continue;
1361
+ }
1362
+ }
1363
+ out += ch;
1364
+ i += 1;
1365
+ }
1366
+ return out;
1367
+ }
1368
+ if (s === "true") return true;
1369
+ if (s === "false") return false;
1370
+ if (s === "null") return null;
1371
+ if (/^-?\d+$/.test(s)) return parseInt(s, 10);
1372
+ if (/^-?\d+\.\d+$/.test(s)) return parseFloat(s);
1373
+ return s;
1374
+ }
1375
+
1376
+ // ---------------------------------------------------------------------------
1377
+ // Directory walking — shared with obsidian.ts
1378
+ // ---------------------------------------------------------------------------
1379
+
1380
+ /** Recursively list all .md files in a directory, excluding hidden dirs
1381
+ * (including `.parachute/` and `.obsidian/`). */
1382
+ export function walkMarkdownFiles(dir: string): string[] {
1383
+ const results: string[] = [];
1384
+ function walk(current: string) {
1385
+ for (const entry of readdirSync(current)) {
1386
+ if (entry.startsWith(".")) continue;
1387
+ if (entry === "node_modules") continue;
1388
+ const full = join(current, entry);
1389
+ const stat = statSync(full);
1390
+ if (stat.isDirectory()) walk(full);
1391
+ else if (stat.isFile() && extname(entry).toLowerCase() === ".md") results.push(full);
1392
+ }
1393
+ }
1394
+ walk(dir);
1395
+ return results.sort();
1396
+ }
1397
+
1398
+ /** Extract inline #tags from markdown content. Excludes tags in code blocks. */
1399
+ export function extractInlineTags(content: string): string[] {
1400
+ let stripped = content.replace(/```[\s\S]*?```/g, "");
1401
+ stripped = stripped.replace(/`[^`\n]+`/g, "");
1402
+ const tags = new Set<string>();
1403
+ const regex = /(?:^|\s)#([\w][\w/-]*[\w]|[\w])/gm;
1404
+ let match: RegExpExecArray | null;
1405
+ while ((match = regex.exec(stripped)) !== null) {
1406
+ tags.add(match[1]!.toLowerCase());
1407
+ }
1408
+ return [...tags];
1409
+ }