@openparachute/vault 0.4.4-rc.12 → 0.4.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -72,7 +72,7 @@
72
72
  * See vault#308.
73
73
  */
74
74
 
75
- import { readdirSync, readFileSync, statSync, mkdirSync, writeFileSync, copyFileSync, existsSync } from "fs";
75
+ import { readdirSync, readFileSync, statSync, mkdirSync, writeFileSync, copyFileSync, existsSync, rmSync } from "fs";
76
76
  import { basename, join, relative, extname, dirname, resolve as resolvePath, sep as pathSep } from "path";
77
77
  import type { Store, Note, Link, Attachment } from "./types.js";
78
78
  import type { TagRecord } from "./tag-schemas.js";
@@ -90,11 +90,42 @@ export const EXPORT_FORMAT_VERSION = 1;
90
90
  * as notes; consumers like Logseq/Foam/Quartz don't see the sidecar. */
91
91
  export const SIDECAR_DIR = ".parachute";
92
92
 
93
+ /**
94
+ * Subdirectory under the sidecar where per-note metadata sidecars live
95
+ * for non-frontmatter-compatible extensions (vault#328). One YAML file
96
+ * per note, keyed by note id: `.parachute/notes-meta/<note-id>.yaml`.
97
+ * Mirrors the inline-frontmatter shape so the parser can chew either.
98
+ */
99
+ export const NOTES_META_DIR = "notes-meta";
100
+
101
+ /**
102
+ * Extensions whose serialized form carries metadata as inline YAML
103
+ * frontmatter at the top of the content file. `.md` is the canonical
104
+ * case; `.mdx` joins it because MDX's parser accepts the same `---`
105
+ * delimited preamble (and Aaron's planning to use MDX in notes).
106
+ * Anything else is sidecar-required — see `core/src/portable-md.ts`
107
+ * write/read paths.
108
+ *
109
+ * Easy to extend later — e.g. `.org` if/when a real workflow demands
110
+ * it. Today's set is conservative: only formats whose parser semantics
111
+ * we've explicitly validated.
112
+ */
113
+ const FRONTMATTER_COMPAT_EXTENSIONS = new Set(["md", "mdx"]);
114
+
115
+ export function supportsInlineFrontmatter(extension: string): boolean {
116
+ return FRONTMATTER_COMPAT_EXTENSIONS.has(extension.toLowerCase());
117
+ }
118
+
93
119
  /** Order in which top-level frontmatter keys are emitted. Fixed — required
94
- * for byte-identical re-exports of unchanged vault state. */
120
+ * for byte-identical re-exports of unchanged vault state. `extension` is
121
+ * emitted right after `path` so the suffix story sits with the
122
+ * filesystem-location story; emitted only when non-default ("md"
123
+ * doesn't get a frontmatter line so the existing `.md`-only corpus
124
+ * diffs cleanly against pre-vault#328 exports). */
95
125
  const FRONTMATTER_KEY_ORDER = [
96
126
  "id",
97
127
  "path",
128
+ "extension",
98
129
  "tags",
99
130
  "metadata",
100
131
  "links",
@@ -107,11 +138,20 @@ const FRONTMATTER_KEY_ORDER = [
107
138
  // Types
108
139
  // ---------------------------------------------------------------------------
109
140
 
110
- /** Per-note shape written into one .md file (frontmatter + content). */
141
+ /** Per-note shape written into one file (frontmatter + content for
142
+ * `.md`/`.mdx`; raw content + sidecar metadata for everything else). */
111
143
  export interface PortableNote {
112
144
  id: string;
113
145
  path?: string;
114
146
  content: string;
147
+ /**
148
+ * File extension (vault#328). Defaults to "md" when omitted —
149
+ * back-compat with PR1/PR2 exports that predate the extension axis.
150
+ * Controls both the file suffix on disk AND whether metadata goes
151
+ * inline as frontmatter (`md`, `mdx`) or in a sidecar
152
+ * (`csv`/`yaml`/`json`/etc).
153
+ */
154
+ extension?: string;
115
155
  metadata?: Record<string, unknown>;
116
156
  tags?: string[];
117
157
  links?: PortableLink[];
@@ -152,6 +192,31 @@ export interface ExportStats {
152
192
  notes: number;
153
193
  schemas: number;
154
194
  attachments: number;
195
+ /**
196
+ * Per-note metadata sidecars written for non-frontmatter-compatible
197
+ * extensions (vault#328). For `.md`/`.mdx` notes this stays 0 —
198
+ * metadata is inline. For `.csv`/`.yaml`/`.json` notes, one sidecar
199
+ * per note.
200
+ */
201
+ sidecars: number;
202
+ /**
203
+ * True when the export ran on a case-insensitive filesystem (macOS
204
+ * APFS default, Windows NTFS, FAT/exFAT) — whether or not any
205
+ * collision actually occurred. The probe runs once per export
206
+ * regardless of the note set; this field reflects the probe's
207
+ * outcome. To detect actual collisions, check
208
+ * `disambiguated_paths.length > 0`. See vault#327.
209
+ */
210
+ case_insensitive_fs: boolean;
211
+ /**
212
+ * Per-note disambiguation detail when the export's case-insensitive
213
+ * filesystem would otherwise silently collapse notes whose paths
214
+ * differ only by case (vault#327). Each entry records the original
215
+ * path + the suffixed on-disk filename so the operator can audit.
216
+ * Empty on case-sensitive filesystems and on case-insensitive
217
+ * filesystems with no collisions.
218
+ */
219
+ disambiguated_paths: Array<{ note_id: string; original_path: string; disambiguated_filename: string }>;
155
220
  /** Set when caller passed `since`; counts notes whose `updated_at >= since`. */
156
221
  filtered_by_since: boolean;
157
222
  /**
@@ -356,6 +421,9 @@ function buildFrontmatter(note: PortableNote): Record<string, unknown> {
356
421
  const fm: Record<string, unknown> = {};
357
422
  fm.id = note.id;
358
423
  if (note.path) fm.path = note.path;
424
+ // Emit only when non-default ("md") so legacy markdown-only exports
425
+ // produce byte-identical bytes pre- and post-vault#328.
426
+ if (note.extension && note.extension !== "md") fm.extension = note.extension;
359
427
  if (note.tags && note.tags.length > 0) fm.tags = [...note.tags].sort();
360
428
  if (note.metadata && Object.keys(note.metadata).length > 0) fm.metadata = note.metadata;
361
429
  if (note.links && note.links.length > 0) fm.links = note.links;
@@ -366,13 +434,18 @@ function buildFrontmatter(note: PortableNote): Record<string, unknown> {
366
434
  }
367
435
 
368
436
  /**
369
- * Render a note as portable markdown: `--- <frontmatter> --- <content>`.
370
- * Frontmatter keys in `FRONTMATTER_KEY_ORDER`; nested objects alpha-sorted.
371
- * Trailing newline preserved from `content` (or one is added if absent).
437
+ * Emit a frontmatter-shape object using `FRONTMATTER_KEY_ORDER`: each
438
+ * present key gets one or more lines (inline form for scalars + empty
439
+ * collections, block form otherwise). Every line ends in `\n`. The
440
+ * output does NOT include the `---` wrapper — that's the caller's
441
+ * concern (only `toPortableMarkdown` wraps; `toSidecarYaml` doesn't).
442
+ *
443
+ * Single source of truth for the per-key emit loop shared by
444
+ * `toPortableMarkdown` and `toSidecarYaml` (vault#330 F3 — pure
445
+ * refactor, behavior unchanged).
372
446
  */
373
- export function toPortableMarkdown(note: PortableNote): string {
374
- const fm = buildFrontmatter(note);
375
- let out = "---\n";
447
+ function emitFrontmatterKeys(fm: Record<string, unknown>): string {
448
+ let out = "";
376
449
  for (const key of FRONTMATTER_KEY_ORDER) {
377
450
  if (!(key in fm)) continue;
378
451
  const value = fm[key];
@@ -385,6 +458,36 @@ export function toPortableMarkdown(note: PortableNote): string {
385
458
  if (block !== null) out += `${block}\n`;
386
459
  }
387
460
  }
461
+ return out;
462
+ }
463
+
464
+ /**
465
+ * Render a note's content-file bytes. Behavior depends on the note's
466
+ * `extension`:
467
+ *
468
+ * - Frontmatter-compatible (`.md`, `.mdx`): emits `--- <frontmatter>
469
+ * --- <content>`. Today's behavior, generalized to also handle MDX.
470
+ * - Sidecar-required (`.csv`, `.yaml`, `.json`, etc.): emits the
471
+ * raw content as-is (no frontmatter prepend). The metadata lives in
472
+ * `.parachute/notes-meta/<id>.yaml`; see `toSidecarYaml`.
473
+ *
474
+ * Trailing-newline: frontmatter form always ends with `\n`. Raw content
475
+ * form is returned verbatim — if the note's content has no trailing
476
+ * newline, the file ends without one. Callers wanting strict
477
+ * trailing-newline normalization (re-emit invariant) should add it
478
+ * outside this function.
479
+ */
480
+ export function toPortableMarkdown(note: PortableNote): string {
481
+ const ext = note.extension ?? "md";
482
+ if (!supportsInlineFrontmatter(ext)) {
483
+ // Sidecar-required: content goes out as-is. No frontmatter, no
484
+ // synthetic trailing newline — the file is whatever the caller
485
+ // stored as `content`.
486
+ return note.content;
487
+ }
488
+ const fm = buildFrontmatter(note);
489
+ let out = "---\n";
490
+ out += emitFrontmatterKeys(fm);
388
491
  out += "---\n";
389
492
  // Preserve content as-is; ensure exactly one trailing newline if missing.
390
493
  out += note.content;
@@ -392,14 +495,47 @@ export function toPortableMarkdown(note: PortableNote): string {
392
495
  return out;
393
496
  }
394
497
 
498
+ /**
499
+ * Render a note's sidecar metadata bytes (vault#328). Same key set as
500
+ * the inline frontmatter — just lifted out of the content file. Used
501
+ * for sidecar-required extensions (`.csv`/`.yaml`/`.json`/etc.) where
502
+ * the content file can't host YAML. Order of keys is fixed
503
+ * (`FRONTMATTER_KEY_ORDER`) so the sidecar bytes are byte-identical
504
+ * across re-exports of unchanged vault state.
505
+ *
506
+ * Always includes the `extension` field (unlike `buildFrontmatter`,
507
+ * which omits it for `md` to keep legacy diffs clean) — the sidecar is
508
+ * a new artifact, the omit-default optimization buys nothing.
509
+ */
510
+ export function toSidecarYaml(note: PortableNote): string {
511
+ // Build a "full" frontmatter — same as buildFrontmatter, but always
512
+ // emit the extension. The sidecar's purpose is exactly to record
513
+ // the extension that's not in the filename.
514
+ const fm: Record<string, unknown> = {};
515
+ fm.id = note.id;
516
+ if (note.path) fm.path = note.path;
517
+ fm.extension = note.extension ?? "md";
518
+ if (note.tags && note.tags.length > 0) fm.tags = [...note.tags].sort();
519
+ if (note.metadata && Object.keys(note.metadata).length > 0) fm.metadata = note.metadata;
520
+ if (note.links && note.links.length > 0) fm.links = note.links;
521
+ if (note.attachments && note.attachments.length > 0) fm.attachments = note.attachments;
522
+ fm.created_at = note.created_at;
523
+ if (note.updated_at) fm.updated_at = note.updated_at;
524
+
525
+ return emitFrontmatterKeys(fm);
526
+ }
527
+
395
528
  /**
396
529
  * Determine the file path for an exported portable-md note. Notes with a
397
- * `path` use it; pathless notes use `_unpathed/<id>.md` (no date-prefix
398
- * coincidence with user content).
530
+ * `path` use it + their `extension`; pathless notes use
531
+ * `_unpathed/<id>.<extension>` (no date-prefix coincidence with user
532
+ * content). Default extension is "md" so pre-vault#328 exports produce
533
+ * the same filenames.
399
534
  */
400
535
  export function portableExportFilePath(note: PortableNote): string {
401
- if (note.path) return note.path + ".md";
402
- return `_unpathed/${note.id}.md`;
536
+ const ext = note.extension ?? "md";
537
+ if (note.path) return `${note.path}.${ext}`;
538
+ return `_unpathed/${note.id}.${ext}`;
403
539
  }
404
540
 
405
541
  // ---------------------------------------------------------------------------
@@ -440,10 +576,16 @@ export async function noteToPortable(
440
576
  }))
441
577
  .sort((a, b) => a.id.localeCompare(b.id));
442
578
 
579
+ // Default to "md" so old PR1/PR2 callers (or callers that didn't get
580
+ // the v18 column from a fresh-DB read) keep producing identical
581
+ // frontmatter shape.
582
+ const extension = note.extension ?? "md";
583
+
443
584
  const result: PortableNote = {
444
585
  id: note.id,
445
586
  ...(note.path ? { path: note.path } : {}),
446
587
  content: note.content,
588
+ ...(extension ? { extension } : {}),
447
589
  ...(note.metadata && Object.keys(note.metadata).length > 0 ? { metadata: note.metadata } : {}),
448
590
  ...(note.tags && note.tags.length > 0 ? { tags: [...note.tags].sort() } : {}),
449
591
  ...(typedLinks.length > 0 ? { links: typedLinks } : {}),
@@ -480,6 +622,15 @@ export interface ExportOptions {
480
622
  * stays pure (no dep on server-side path resolution).
481
623
  */
482
624
  assetsDir?: string;
625
+ /**
626
+ * Override the filesystem case-sensitivity probe (vault#327). Pass
627
+ * `false` to force the case-insensitive code path (with auto-
628
+ * disambiguation) regardless of the real filesystem; pass `true` to
629
+ * force the case-sensitive code path. Test seam — lets the round-trip
630
+ * suite exercise both branches on whatever FS the test happens to
631
+ * run on. When unset (the production default), the probe runs.
632
+ */
633
+ caseSensitiveOverride?: boolean;
483
634
  }
484
635
 
485
636
  /**
@@ -556,14 +707,63 @@ export async function exportVaultToDir(
556
707
  const assetsDirResolved = opts.assetsDir ? resolvePath(opts.assetsDir) : undefined;
557
708
  const attachmentsRoot = join(sidecar, "attachments");
558
709
  const attachmentsRootResolved = resolvePath(attachmentsRoot);
710
+ const notesMetaRoot = join(sidecar, NOTES_META_DIR);
711
+ const notesMetaRootResolved = resolvePath(notesMetaRoot);
559
712
  let notesWritten = 0;
560
713
  let attachmentsWritten = 0;
714
+ let sidecarsWritten = 0;
561
715
  const skipped: { path: string | undefined; reason: string }[] = [];
562
716
  const skippedAttachments: { note_id: string; attachment_id: string; path: string; reason: string }[] = [];
717
+ const disambiguatedPaths: ExportStats["disambiguated_paths"] = [];
718
+
719
+ // Case-collision detection (vault#327). On case-insensitive
720
+ // filesystems (macOS APFS-default, Windows NTFS-default, FAT/exFAT),
721
+ // two notes whose paths differ only by case collapse into one file
722
+ // on write — silent data loss. We probe the export dir's filesystem
723
+ // once, then either ship as-is (case-sensitive) or build a lowercase
724
+ // `(path, extension)` index during the walk and disambiguate
725
+ // colliding notes with an `__<id-short>` filename suffix.
726
+ //
727
+ // The note's stored `path` (in frontmatter + sidecar) stays canonical;
728
+ // only the on-disk filename is suffixed. Import recovers the
729
+ // canonical path from frontmatter/sidecar, not from the filename.
730
+ const caseSensitive = opts.caseSensitiveOverride ?? probeCaseSensitive(outDir);
731
+ // Lowercased `<path>|<ext>` → first-write note-id. Subsequent matches
732
+ // on the same key trigger disambiguation. Only populated on
733
+ // case-insensitive filesystems.
734
+ const seenLowerKeys = new Map<string, string>();
735
+
563
736
  for (const note of allNotes) {
564
737
  if (since && !shouldIncludeForSince(note, since)) continue;
565
738
  const portable = await noteToPortable(note, store);
566
- const relPath = portableExportFilePath(portable);
739
+ let relPath = portableExportFilePath(portable);
740
+
741
+ // Decide whether this note's filename needs disambiguation
742
+ // (vault#327). Only meaningful when the FS is case-insensitive AND
743
+ // a prior note's (path, ext) tuple already claimed the same
744
+ // lowercased filename slot. Pathless notes (`_unpathed/<id>.<ext>`)
745
+ // are immune by construction — their filename embeds the id, which
746
+ // is case-stable already.
747
+ if (!caseSensitive && portable.path) {
748
+ const ext = portable.extension ?? "md";
749
+ const key = `${portable.path.toLowerCase()}|${ext.toLowerCase()}`;
750
+ const prior = seenLowerKeys.get(key);
751
+ if (prior !== undefined && prior !== portable.id) {
752
+ // Collision: emit the disambiguated form. The frontmatter /
753
+ // sidecar `path:` still holds the canonical (original) path so
754
+ // import recovers the truth.
755
+ const disambig = disambiguateFilename(portable.path, ext, portable.id);
756
+ relPath = disambig;
757
+ disambiguatedPaths.push({
758
+ note_id: portable.id,
759
+ original_path: portable.path,
760
+ disambiguated_filename: disambig,
761
+ });
762
+ } else if (prior === undefined) {
763
+ seenLowerKeys.set(key, portable.id);
764
+ }
765
+ }
766
+
567
767
  const fullPath = join(outDir, relPath);
568
768
  // vault#317 F3 — path-traversal guard. A note with
569
769
  // `path: "../../.ssh/authorized_keys"` would otherwise write outside
@@ -583,6 +783,28 @@ export async function exportVaultToDir(
583
783
  writeFileSync(fullPath, toPortableMarkdown(portable));
584
784
  notesWritten++;
585
785
 
786
+ // Sidecar metadata write for non-frontmatter-compat extensions
787
+ // (vault#328). The content file holds raw bytes (no YAML); the
788
+ // sidecar at .parachute/notes-meta/<id>.yaml carries id/path/tags/
789
+ // metadata/links/attachments/timestamps.
790
+ const portableExt = portable.extension ?? "md";
791
+ if (!supportsInlineFrontmatter(portableExt)) {
792
+ const sidecarFile = join(notesMetaRoot, `${portable.id}.yaml`);
793
+ const sidecarResolved = resolvePath(sidecarFile);
794
+ // Path-traversal guard symmetric with the attachments path: the
795
+ // sidecar lives under the .parachute/notes-meta/ subtree, period.
796
+ if (!isWithinDir(sidecarResolved, notesMetaRootResolved)) {
797
+ skipped.push({
798
+ path: portable.path,
799
+ reason: `path-traversal: sidecar write target "${sidecarResolved}" escapes notes-meta root "${notesMetaRootResolved}"`,
800
+ });
801
+ } else {
802
+ mkdirSync(notesMetaRoot, { recursive: true });
803
+ writeFileSync(sidecarResolved, toSidecarYaml(portable));
804
+ sidecarsWritten++;
805
+ }
806
+ }
807
+
586
808
  // Copy attachment binaries when assetsDir is wired. Each attachment
587
809
  // is path-traversal-guarded on both ends: source under assetsDir,
588
810
  // dest under outDir's sidecar attachments root. Missing source files
@@ -652,6 +874,9 @@ export async function exportVaultToDir(
652
874
  notes: notesWritten,
653
875
  schemas: schemasWritten,
654
876
  attachments: attachmentsWritten,
877
+ sidecars: sidecarsWritten,
878
+ case_insensitive_fs: !caseSensitive,
879
+ disambiguated_paths: disambiguatedPaths,
655
880
  filtered_by_since: since !== undefined,
656
881
  skipped_traversal: skipped.length,
657
882
  skipped_notes: skipped,
@@ -689,6 +914,85 @@ function isWithinDir(candidate: string, root: string): boolean {
689
914
  return candidate.startsWith(root + pathSep);
690
915
  }
691
916
 
917
+ /**
918
+ * Probe whether `dir` lives on a case-sensitive filesystem (vault#327).
919
+ *
920
+ * The check writes a hidden tempfile, then tests whether the same file
921
+ * is reachable via its uppercased name. macOS APFS-default and Windows
922
+ * NTFS-default are case-insensitive (the file IS reachable); Linux
923
+ * ext4 and macOS APFS-CS are case-sensitive (the file is NOT). Other
924
+ * platforms (FAT32, exFAT, network shares to either) collapse to the
925
+ * same probe result.
926
+ *
927
+ * Returns true when the filesystem distinguishes case, false otherwise.
928
+ * Defaults to true (case-sensitive — current export behavior) if the
929
+ * probe fails for any reason — we'd rather ship without disambiguation
930
+ * than fail-closed on an unexpected I/O error mid-export.
931
+ *
932
+ * The probe cleans up after itself: tempfile is removed before return.
933
+ * Tempfile name uses a UUID-ish suffix so concurrent probes don't
934
+ * collide in shared directories.
935
+ */
936
+ export function probeCaseSensitive(dir: string): boolean {
937
+ const suffix = `${Date.now().toString(36)}-${Math.random().toString(36).slice(2, 10)}`;
938
+ const lowerName = `._parachute_cs_probe_${suffix}`;
939
+ const upperName = `._PARACHUTE_CS_PROBE_${suffix.toUpperCase()}`;
940
+ const lowerPath = join(dir, lowerName);
941
+ const upperPath = join(dir, upperName);
942
+ try {
943
+ writeFileSync(lowerPath, "");
944
+ // On a case-insensitive FS, `existsSync(upperPath)` returns true
945
+ // because upperPath references the same inode. On a case-sensitive
946
+ // FS, the file at upperPath doesn't exist.
947
+ const caseInsensitive = existsSync(upperPath);
948
+ return !caseInsensitive;
949
+ } catch {
950
+ // Probe failed — assume case-sensitive (the conservative default
951
+ // that matches today's export behavior).
952
+ return true;
953
+ } finally {
954
+ try { rmSync(lowerPath, { force: true }); } catch {}
955
+ // Defensive: if some FS variant created a distinct file at
956
+ // upperPath, clean that too. No-op when it was the same inode.
957
+ try { rmSync(upperPath, { force: true }); } catch {}
958
+ }
959
+ }
960
+
961
+ /**
962
+ * Compute the disambiguated filename for a colliding note (vault#327).
963
+ * Appends `__<id-short>` to the path's basename, before the extension.
964
+ * The id-short is the first 8 chars of the note ID — short enough to
965
+ * stay readable, unique enough to avoid secondary collisions in
966
+ * practice (vault IDs are timestamp-prefixed YYYY-MM-DD-HH-MM-SS-ffffff,
967
+ * so the first 8 chars include the year + month + day).
968
+ *
969
+ * Example: `Journal/2025-05-26 Technology in Balance` (path) with
970
+ * extension `md` + id `2025-05-26-09-15-42-123456` becomes
971
+ * `Journal/2025-05-26 Technology in Balance__2025-05-.md`.
972
+ *
973
+ * The original path is preserved in the note's frontmatter/sidecar
974
+ * `path:` field — import recovers the canonical path from there, not
975
+ * from the disambiguated filename.
976
+ *
977
+ * **Assumption** (vault#331 N4): two notes created in the same month
978
+ * share the first 8 chars of the id-prefix (`YYYY-MM-`). For import,
979
+ * this is harmless because the resolver runs three tiers in order
980
+ * (exact-case canonical path → leftover-bucket pick → id-prefix
981
+ * scan), and sidecars are removed from the leftover map as they're
982
+ * consumed. By the time the id-prefix scan runs for a disambiguated
983
+ * filename, the only sidecars that could match are still-orphaned,
984
+ * so there's at most one candidate per scan. If two notes in the
985
+ * same month BOTH had disambiguated filenames AND the resolver had
986
+ * to fall through to the prefix scan for both, the first match wins
987
+ * deterministically (sorted dir walk). Bumping the slice to 12 or 14
988
+ * chars would tighten this further but adds noise to filenames —
989
+ * skip it until a real workload demands the change.
990
+ */
991
+ function disambiguateFilename(path: string, extension: string, noteId: string): string {
992
+ const idShort = noteId.slice(0, 8);
993
+ return `${path}__${idShort}.${extension}`;
994
+ }
995
+
692
996
  function shouldIncludeForSince(note: Note, since: string): boolean {
693
997
  const stamp = note.updatedAt ?? note.createdAt;
694
998
  return stamp >= since;
@@ -736,6 +1040,14 @@ export interface ImportStats {
736
1040
  skipped_links: Array<{ source_id: string; target_id: string; relationship: string; reason: string }>;
737
1041
  /** Per-skipped-attachment detail. */
738
1042
  skipped_attachments: Array<{ note_id: string; attachment_id: string; reason: string }>;
1043
+ /**
1044
+ * Sidecars under `.parachute/notes-meta/` with no matching content
1045
+ * file on disk (vault#330 S2). Each entry records the sidecar's id +
1046
+ * the expected `(path, extension)` it claimed so the operator can
1047
+ * see what's orphaned. Empty when every sidecar paired with a
1048
+ * content file during the walk.
1049
+ */
1050
+ skipped_sidecars: Array<{ sidecar_id: string; expected_path: string | null; expected_extension: string | null; reason: string }>;
739
1051
  /** Set when the caller passed `blowAway: true`; counts notes removed. */
740
1052
  notes_wiped: number;
741
1053
  }
@@ -783,6 +1095,7 @@ export async function importPortableVault(
783
1095
  attachments_restored: 0,
784
1096
  skipped_links: [],
785
1097
  skipped_attachments: [],
1098
+ skipped_sidecars: [],
786
1099
  notes_wiped: 0,
787
1100
  };
788
1101
 
@@ -837,19 +1150,142 @@ export async function importPortableVault(
837
1150
  }
838
1151
  }
839
1152
 
840
- // 3. Notes. Walk every .md file under inDir (dot-dirs already
841
- // excluded), parse, upsert.
1153
+ // 3. Notes. Walk every content file under inDir (dot-dirs already
1154
+ // excluded). For frontmatter-compatible extensions (md, mdx) parse
1155
+ // inline metadata. For sidecar-required extensions (csv, yaml, json,
1156
+ // etc.) look up metadata in `.parachute/notes-meta/<id>.yaml`.
1157
+ //
1158
+ // The sidecar's `path` + `extension` are the source of truth for the
1159
+ // user-visible filename — but we walk filenames first and INDEX
1160
+ // sidecars by `(path, extension)` for O(1) lookup per content file.
1161
+ // Sidecars with no matching content file are warned about (and
1162
+ // skipped) rather than triggering a write — preserves the
1163
+ // export-bytes-are-truth invariant.
1164
+ // Sidecar index: keyed by lowercased `<path>|<ext>` so the walker's
1165
+ // filename-derived key (also lowered) matches. The bucket holds the
1166
+ // full sidecar(s) — multiple entries occur when two notes share a
1167
+ // case-insensitive path (vault#327 collisions). The lookup logic
1168
+ // picks the right sidecar from the bucket using the walker's
1169
+ // case-preserved filename + the disambiguated-id-suffix heuristic.
1170
+ const notesMetaDir = join(sidecar, NOTES_META_DIR);
1171
+ const sidecarByKey = new Map<string, Record<string, unknown>[]>();
1172
+ const sidecarByIdLeftover = new Map<string, Record<string, unknown>>();
1173
+ if (existsSync(notesMetaDir)) {
1174
+ const notesMetaRootResolved = resolvePath(notesMetaDir);
1175
+ for (const entry of readdirSync(notesMetaDir)) {
1176
+ if (!entry.endsWith(".yaml")) continue;
1177
+ if (entry.startsWith(".")) continue;
1178
+ const fullPath = join(notesMetaDir, entry);
1179
+ const resolved = resolvePath(fullPath);
1180
+ if (!isWithinDir(resolved, notesMetaRootResolved)) continue;
1181
+ const text = readFileSync(fullPath, "utf-8");
1182
+ // The sidecar is a bare YAML doc (no `---`); wrap to reuse the
1183
+ // shared frontmatter parser.
1184
+ const wrapped = `---\n${text}${text.endsWith("\n") ? "" : "\n"}---\n`;
1185
+ const { frontmatter } = parseFrontmatter(wrapped);
1186
+ const sidecarId = typeof frontmatter.id === "string" ? frontmatter.id : null;
1187
+ const sidecarPath = typeof frontmatter.path === "string" ? frontmatter.path : null;
1188
+ const sidecarExt = typeof frontmatter.extension === "string" ? frontmatter.extension : null;
1189
+ if (!sidecarId) continue;
1190
+ // Index by (path, ext) tuple lowered so case-insensitive walks
1191
+ // match. Multi-value bucket lets two case-collided sidecars coexist
1192
+ // until the per-file lookup picks the right one.
1193
+ if (sidecarPath && sidecarExt) {
1194
+ const key = `${sidecarPath.toLowerCase()}|${sidecarExt.toLowerCase()}`;
1195
+ const bucket = sidecarByKey.get(key);
1196
+ if (bucket) bucket.push(frontmatter);
1197
+ else sidecarByKey.set(key, [frontmatter]);
1198
+ }
1199
+ sidecarByIdLeftover.set(sidecarId, frontmatter);
1200
+ }
1201
+ }
1202
+
842
1203
  // Track per-import (id → portable) so we can replay typed links
843
1204
  // after all notes exist.
844
1205
  const seenNotes = new Map<string, PortableNote>();
845
- for (const filePath of walkMarkdownFiles(inDir)) {
1206
+ for (const filePath of walkContentFiles(inDir)) {
846
1207
  // Containment check — readdirSync should already be safe, but
847
1208
  // verify the resolved path is inside inDir (symlinks).
848
1209
  const resolved = resolvePath(filePath);
849
1210
  if (!isWithinDir(resolved, inDirResolved)) continue;
850
1211
 
851
- const raw = readFileSync(filePath, "utf-8");
852
- const { frontmatter, content } = parseFrontmatter(raw);
1212
+ // Derive the file's extension (lowercased, no leading dot) and the
1213
+ // user-visible note path (everything between inDir and the
1214
+ // extension).
1215
+ const extWithDot = extname(filePath); // e.g. ".csv"
1216
+ const fileExt = extWithDot.slice(1).toLowerCase();
1217
+ if (fileExt.length === 0) continue;
1218
+ const relWithExt = relative(inDir, filePath);
1219
+ const relNoExt = relWithExt.slice(0, relWithExt.length - extWithDot.length);
1220
+ // Normalize path separators for cross-platform exports.
1221
+ const userPath = relNoExt.split(pathSep).join("/");
1222
+
1223
+ let frontmatter: Record<string, unknown>;
1224
+ let content: string;
1225
+ if (supportsInlineFrontmatter(fileExt)) {
1226
+ const raw = readFileSync(filePath, "utf-8");
1227
+ const parsed = parseFrontmatter(raw);
1228
+ frontmatter = parsed.frontmatter;
1229
+ content = parsed.content;
1230
+ } else {
1231
+ // Resolve which sidecar this content file belongs to. Two
1232
+ // sources of ambiguity to handle:
1233
+ // 1. vault#327 case-collisions on a case-insensitive FS —
1234
+ // multiple sidecars share the same lowered (path, ext) key.
1235
+ // 2. Disambiguated filenames — `<base>__<id-prefix>.<ext>` —
1236
+ // where the walker's path doesn't match any sidecar's
1237
+ // canonical path.
1238
+ // Resolution order: exact-case match within the bucket → id-prefix
1239
+ // match against the disambiguation suffix → first remaining
1240
+ // sidecar in the bucket → fall through to id-prefix scan of all
1241
+ // leftovers.
1242
+ let found: Record<string, unknown> | undefined;
1243
+ const key = `${userPath.toLowerCase()}|${fileExt}`;
1244
+ const bucket = sidecarByKey.get(key);
1245
+ if (bucket && bucket.length > 0) {
1246
+ // Try exact-case match on canonical path first — distinguishes
1247
+ // case-collided notes when the FS preserves filename case but
1248
+ // not equality.
1249
+ found = bucket.find((s) => typeof s.path === "string" && s.path === userPath);
1250
+ if (!found) {
1251
+ // No exact-case match. Pick a sidecar from the bucket whose
1252
+ // id is still in `leftover` (i.e. not yet consumed by a prior
1253
+ // file in the walk). This keeps two case-collided notes on a
1254
+ // case-sensitive replay from claiming the same sidecar twice.
1255
+ found = bucket.find((s) => typeof s.id === "string" && sidecarByIdLeftover.has(s.id));
1256
+ }
1257
+ }
1258
+ if (!found) {
1259
+ // Disambiguated filename fallback: `<base>__<id-prefix>.<ext>`.
1260
+ // Strip the suffix, then find a leftover sidecar whose id
1261
+ // starts with that prefix.
1262
+ const disambigMatch = userPath.match(/^(.*)__([A-Za-z0-9-]{6,})$/);
1263
+ if (disambigMatch) {
1264
+ const idPrefix = disambigMatch[2]!;
1265
+ for (const [sidecarId, sidecar] of sidecarByIdLeftover) {
1266
+ if (sidecarId.startsWith(idPrefix)) {
1267
+ found = sidecar;
1268
+ break;
1269
+ }
1270
+ }
1271
+ }
1272
+ }
1273
+ if (!found) {
1274
+ // No sidecar — log and skip. Importing the raw bytes with no
1275
+ // metadata would orphan the row (no id, no path, no
1276
+ // timestamps). Better to surface the gap than silently lose
1277
+ // shape.
1278
+ // eslint-disable-next-line no-console
1279
+ console.warn(`[import] skipped "${filePath}": no matching sidecar at ${NOTES_META_DIR}/<id>.yaml (path="${userPath}", extension="${fileExt}")`);
1280
+ continue;
1281
+ }
1282
+ frontmatter = found;
1283
+ content = readFileSync(filePath, "utf-8");
1284
+ // Mark this sidecar as consumed so subsequent files (and the
1285
+ // stale-sidecar pass) don't double-count.
1286
+ const sidecarId = typeof found.id === "string" ? found.id : null;
1287
+ if (sidecarId) sidecarByIdLeftover.delete(sidecarId);
1288
+ }
853
1289
 
854
1290
  const id = typeof frontmatter.id === "string" ? frontmatter.id : null;
855
1291
  if (!id) {
@@ -863,6 +1299,12 @@ export async function importPortableVault(
863
1299
  const created_at = typeof frontmatter.created_at === "string" ? frontmatter.created_at : new Date().toISOString();
864
1300
  const updated_at = typeof frontmatter.updated_at === "string" ? frontmatter.updated_at : created_at;
865
1301
  const path = typeof frontmatter.path === "string" ? frontmatter.path : undefined;
1302
+ // Trust the frontmatter/sidecar extension first; fall back to the
1303
+ // filename extension. Notes without an explicit extension default
1304
+ // to "md" (back-compat with pre-vault#328 exports).
1305
+ const extension = typeof frontmatter.extension === "string"
1306
+ ? frontmatter.extension
1307
+ : fileExt || "md";
866
1308
  const tags = Array.isArray(frontmatter.tags) ? frontmatter.tags.filter((t): t is string => typeof t === "string") : undefined;
867
1309
  const metadata = (frontmatter.metadata && typeof frontmatter.metadata === "object" && !Array.isArray(frontmatter.metadata))
868
1310
  ? frontmatter.metadata as Record<string, unknown>
@@ -875,6 +1317,7 @@ export async function importPortableVault(
875
1317
  content,
876
1318
  created_at,
877
1319
  updated_at,
1320
+ extension,
878
1321
  ...(path ? { path } : {}),
879
1322
  ...(tags && tags.length > 0 ? { tags } : {}),
880
1323
  ...(metadata ? { metadata } : {}),
@@ -925,6 +1368,7 @@ export async function importPortableVault(
925
1368
  content,
926
1369
  ...(path !== undefined ? { path } : {}),
927
1370
  ...(metadata ? { metadata } : {}),
1371
+ extension,
928
1372
  });
929
1373
  // Tags: delete existing, re-tag with imported set.
930
1374
  if (existing.tags && existing.tags.length > 0) {
@@ -941,6 +1385,7 @@ export async function importPortableVault(
941
1385
  ...(tags && tags.length > 0 ? { tags } : {}),
942
1386
  ...(metadata ? { metadata } : {}),
943
1387
  created_at,
1388
+ extension,
944
1389
  });
945
1390
  stats.notes_created++;
946
1391
  }
@@ -952,6 +1397,27 @@ export async function importPortableVault(
952
1397
  await store.restoreNoteTimestamps(id, created_at, updated_at);
953
1398
  }
954
1399
 
1400
+ // 3b. Drain remaining sidecars (vault#330 S2). Any entry still in
1401
+ // `sidecarByIdLeftover` after the content-file walk is orphaned —
1402
+ // its expected content file wasn't on disk. Record the gap in
1403
+ // `skipped_sidecars` so programmatic callers can surface or repair.
1404
+ // Common cause: an operator removed a content file by hand without
1405
+ // deleting the matching sidecar.
1406
+ for (const [sidecarId, sidecar] of sidecarByIdLeftover) {
1407
+ const expectedPath = typeof sidecar.path === "string" ? sidecar.path : null;
1408
+ const expectedExt = typeof sidecar.extension === "string" ? sidecar.extension : null;
1409
+ stats.skipped_sidecars.push({
1410
+ sidecar_id: sidecarId,
1411
+ expected_path: expectedPath,
1412
+ expected_extension: expectedExt,
1413
+ reason: expectedPath
1414
+ ? `no content file at "${expectedPath}.${expectedExt ?? "md"}"`
1415
+ : "sidecar has no `path:` field; orphaned by construction",
1416
+ });
1417
+ // eslint-disable-next-line no-console
1418
+ console.warn(`[import] orphaned sidecar "${sidecarId}.yaml": ${stats.skipped_sidecars[stats.skipped_sidecars.length - 1]!.reason}`);
1419
+ }
1420
+
955
1421
  // 4. Typed links — replay only now that all notes exist. Wikilinks
956
1422
  // (which the exporter excludes from `links:`) rebuild from
957
1423
  // content brackets via syncAllWikilinks (a callable Store method).
@@ -1395,6 +1861,29 @@ export function walkMarkdownFiles(dir: string): string[] {
1395
1861
  return results.sort();
1396
1862
  }
1397
1863
 
1864
+ /**
1865
+ * Recursively list all content files in a portable-md export (vault#328).
1866
+ * Same dot-dir exclusion as `walkMarkdownFiles` — sidecar metadata under
1867
+ * `.parachute/` is reached separately by the importer. Files with no
1868
+ * extension are skipped (no way to tell what they are; vault doesn't
1869
+ * write extensionless notes by design).
1870
+ */
1871
+ export function walkContentFiles(dir: string): string[] {
1872
+ const results: string[] = [];
1873
+ function walk(current: string) {
1874
+ for (const entry of readdirSync(current)) {
1875
+ if (entry.startsWith(".")) continue;
1876
+ if (entry === "node_modules") continue;
1877
+ const full = join(current, entry);
1878
+ const stat = statSync(full);
1879
+ if (stat.isDirectory()) walk(full);
1880
+ else if (stat.isFile() && extname(entry).length > 0) results.push(full);
1881
+ }
1882
+ }
1883
+ walk(dir);
1884
+ return results.sort();
1885
+ }
1886
+
1398
1887
  /** Extract inline #tags from markdown content. Excludes tags in code blocks. */
1399
1888
  export function extractInlineTags(content: string): string[] {
1400
1889
  let stripped = content.replace(/```[\s\S]*?```/g, "");