@openparachute/vault 0.4.7-rc.1 → 0.4.8-rc.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. package/README.md +44 -10
  2. package/core/src/connection-pragmas.test.ts +232 -0
  3. package/core/src/core.test.ts +257 -0
  4. package/core/src/cursor.test.ts +160 -0
  5. package/core/src/cursor.ts +272 -0
  6. package/core/src/mcp.ts +51 -7
  7. package/core/src/notes.ts +164 -2
  8. package/core/src/portable-md.test.ts +247 -0
  9. package/core/src/portable-md.ts +118 -1
  10. package/core/src/schema.ts +98 -2
  11. package/core/src/store.ts +11 -1
  12. package/core/src/types.ts +32 -0
  13. package/package.json +1 -1
  14. package/src/auth-status.ts +4 -0
  15. package/src/auto-transcribe.test.ts +116 -0
  16. package/src/auto-transcribe.ts +48 -0
  17. package/src/cli.ts +151 -50
  18. package/src/config.test.ts +26 -0
  19. package/src/config.ts +53 -1
  20. package/src/db.ts +15 -2
  21. package/src/export-watch.test.ts +99 -0
  22. package/src/mcp-install-interactive.test.ts +23 -2
  23. package/src/mcp-install-interactive.ts +21 -2
  24. package/src/mcp-install.test.ts +40 -0
  25. package/src/mcp-tools.ts +17 -1
  26. package/src/module-config.ts +70 -14
  27. package/src/module-manifest.test.ts +93 -0
  28. package/src/module-manifest.ts +94 -0
  29. package/src/routes.ts +267 -50
  30. package/src/scribe-discovery.test.ts +77 -0
  31. package/src/scribe-discovery.ts +91 -0
  32. package/src/scribe-env.test.ts +66 -1
  33. package/src/scribe-env.ts +42 -1
  34. package/src/self-register.test.ts +380 -0
  35. package/src/self-register.ts +234 -0
  36. package/src/server.ts +46 -11
  37. package/src/transcript-note.test.ts +171 -0
  38. package/src/transcript-note.ts +189 -0
  39. package/src/transcription-registry.ts +22 -0
  40. package/src/transcription-worker.test.ts +250 -0
  41. package/src/transcription-worker.ts +186 -27
  42. package/src/vault.test.ts +347 -0
package/core/src/notes.ts CHANGED
@@ -1,5 +1,5 @@
1
1
  import { Database, type SQLQueryBindings } from "bun:sqlite";
2
- import type { Note, NoteIndex, QueryOpts, VaultStats } from "./types.js";
2
+ import type { Note, NoteIndex, QueryOpts, QueryNotesPage, VaultStats } from "./types.js";
3
3
  import { normalizePath } from "./paths.js";
4
4
  import {
5
5
  buildOperatorClause,
@@ -7,6 +7,17 @@ import {
7
7
  QueryError,
8
8
  requireIndexedField,
9
9
  } from "./query-operators.js";
10
+ import {
11
+ CURSOR_VERSION,
12
+ CursorError,
13
+ computeQueryHash,
14
+ decodeCursor,
15
+ encodeCursor,
16
+ isoToMillis,
17
+ millisToIso,
18
+ type CursorPayload,
19
+ type QueryHashInputs,
20
+ } from "./cursor.js";
10
21
 
11
22
  let idCounter = 0;
12
23
 
@@ -663,9 +674,68 @@ export function queryNotes(db: Database, opts: QueryOpts): Note[] {
663
674
  }
664
675
  }
665
676
 
677
+ // ---- Cursor predicate (vault#313) ----
678
+ //
679
+ // When a cursor is present, decode it, verify its query_hash matches the
680
+ // current query, and add a keyset predicate of the form:
681
+ //
682
+ // (updated_at > last_updated_at)
683
+ // OR (updated_at = last_updated_at AND id > last_id)
684
+ //
685
+ // The cursor also forces ORDER BY n.updated_at ASC, n.id ASC so the
686
+ // watermark math is sound — paginating by updated_at while ordering
687
+ // by created_at would skip rows whose update timestamp differs from
688
+ // their creation timestamp. `orderBy` and `sort: "desc"` are mutually
689
+ // exclusive with cursor mode (a "since last checked" loop wants
690
+ // ascending updated_at, full stop); we reject with INVALID_QUERY so
691
+ // callers don't silently get a broken iteration.
692
+ let cursorPayload: CursorPayload | null = null;
693
+ if (opts.cursor) {
694
+ if (opts.orderBy) {
695
+ throw new QueryError(
696
+ `cursor and order_by are mutually exclusive — cursor pagination forces order by updated_at`,
697
+ "INVALID_QUERY",
698
+ );
699
+ }
700
+ if (opts.sort === "desc") {
701
+ throw new QueryError(
702
+ `cursor pagination requires ascending sort by updated_at — descending sort with a cursor would skip newly-written rows`,
703
+ "INVALID_QUERY",
704
+ );
705
+ }
706
+ cursorPayload = decodeCursor(opts.cursor);
707
+ const expectedHash = computeQueryHash(toQueryHashInputs(opts));
708
+ if (cursorPayload.query_hash !== expectedHash) {
709
+ throw new CursorError(
710
+ `cursor was minted for a different query — drop the cursor and restart iteration`,
711
+ "cursor_query_mismatch",
712
+ );
713
+ }
714
+ // Translate the millis watermark back to an ISO string for the SQL
715
+ // comparison. SQLite's `n.updated_at` is TEXT in canonical ISO form
716
+ // (the store's `toISOString()` output), and ISO timestamps sort
717
+ // lexicographically in the same order as their millisecond epochs
718
+ // when they all use the same canonical form — which every timestamp
719
+ // vault mints does. Cursors minted on heterogeneous timestamps
720
+ // (e.g. an import that preserved unusual formatting) are still
721
+ // safe: we round-trip the cursor's millis through `new Date()`'s
722
+ // canonical ISO so the comparison is apples-to-apples.
723
+ const cursorIso = millisToIso(cursorPayload.last_updated_at);
724
+ conditions.push(
725
+ "(n.updated_at > ? OR (n.updated_at = ? AND n.id > ?))",
726
+ );
727
+ params.push(cursorIso, cursorIso, cursorPayload.last_id);
728
+ }
729
+
666
730
  const direction = opts.sort === "desc" ? "DESC" : "ASC";
667
731
  let orderBy: string;
668
- if (opts.orderBy) {
732
+ if (opts.cursor) {
733
+ // Cursor mode forces a deterministic keyset order. `id` is the
734
+ // tiebreaker — without it, two notes sharing an `updated_at` would
735
+ // be at the mercy of SQLite's row order and the next page could
736
+ // miss or duplicate one.
737
+ orderBy = "n.updated_at ASC, n.id ASC";
738
+ } else if (opts.orderBy) {
669
739
  requireIndexedField(db, opts.orderBy);
670
740
  // `orderBy` came from indexed_fields (validated on declaration), so
671
741
  // the column name is safe to interpolate. Append created_at as a
@@ -697,6 +767,98 @@ export function queryNotes(db: Database, opts: QueryOpts): Note[] {
697
767
  });
698
768
  }
699
769
 
770
+ /**
771
+ * Extract the result-set-affecting subset of `QueryOpts` for cursor hashing.
772
+ *
773
+ * `cursor`, `limit`, `offset`, `_tagsExpanded` (internal cache key) are
774
+ * excluded — they don't change which rows match, just how many or how
775
+ * the iteration advances. See `core/src/cursor.ts` for the rationale.
776
+ */
777
+ function toQueryHashInputs(opts: QueryOpts): QueryHashInputs {
778
+ return {
779
+ tags: opts.tags,
780
+ tagMatch: opts.tagMatch,
781
+ excludeTags: opts.excludeTags,
782
+ hasTags: opts.hasTags,
783
+ hasLinks: opts.hasLinks,
784
+ path: opts.path,
785
+ pathPrefix: opts.pathPrefix,
786
+ extension: opts.extension,
787
+ ids: opts.ids,
788
+ metadata: opts.metadata,
789
+ dateFrom: opts.dateFrom,
790
+ dateTo: opts.dateTo,
791
+ dateFilter: opts.dateFilter,
792
+ sort: opts.sort,
793
+ orderBy: opts.orderBy,
794
+ };
795
+ }
796
+
797
+ /**
798
+ * Cursor-paginated wrapper around `queryNotes` (vault#313).
799
+ *
800
+ * Always returns `{ notes, next_cursor }`. `next_cursor` advances even on
801
+ * an empty result page — the caller can persist a single watermark and
802
+ * keep polling without special-casing the empty-page condition. The
803
+ * empty-page cursor's `last_updated_at` is the larger of:
804
+ * - the prior cursor's `last_updated_at` (when `opts.cursor` was set), or
805
+ * - the prior cursor's `last_updated_at` (defaults to 0 when not).
806
+ *
807
+ * Holding the watermark at the prior value on an empty page is the
808
+ * conservative choice: if a note is written between this call and the
809
+ * next at a timestamp BEFORE wall-clock-now (clock skew, batch import
810
+ * with explicit `created_at`), advancing the watermark to `now()` would
811
+ * skip it. The watermark advances only when actual rows are returned.
812
+ *
813
+ * First-call semantics (`opts.cursor` absent): query_hash is computed
814
+ * from the result-set-affecting opts and bound into the minted cursor.
815
+ * If zero rows match, the returned cursor encodes
816
+ * `last_updated_at = 0, last_id = ""` so the next call returns
817
+ * everything written since (the keyset predicate
818
+ * `updated_at > 0 OR (updated_at = 0 AND id > "")` matches every row
819
+ * with a non-null `updated_at` greater than the unix epoch).
820
+ */
821
+ export function queryNotesPaged(db: Database, opts: QueryOpts): QueryNotesPage {
822
+ const notes = queryNotes(db, opts);
823
+ const queryHash = computeQueryHash(toQueryHashInputs(opts));
824
+
825
+ // Watermark math: pick the larger of (last returned row, prior cursor
826
+ // watermark, sentinel). When the page is empty, fall back to the prior
827
+ // cursor's watermark — see the JSDoc rationale above.
828
+ let lastUpdatedAt = 0;
829
+ let lastId = "";
830
+ if (opts.cursor) {
831
+ // Re-decode (we already validated in queryNotes); this is cheap.
832
+ const prior = decodeCursor(opts.cursor);
833
+ lastUpdatedAt = prior.last_updated_at;
834
+ lastId = prior.last_id;
835
+ }
836
+ if (notes.length > 0) {
837
+ // queryNotes with a cursor orders by (updated_at ASC, id ASC), so
838
+ // the last note in the array is the new watermark. When no cursor
839
+ // was passed, the SQL is ordered by created_at; we still want the
840
+ // cursor to advance to the MAX (updated_at, id) of this page so
841
+ // the next call resumes correctly. Compute the max explicitly.
842
+ for (const note of notes) {
843
+ const updatedIso = note.updatedAt ?? note.createdAt;
844
+ const ms = isoToMillis(updatedIso);
845
+ if (ms > lastUpdatedAt || (ms === lastUpdatedAt && note.id > lastId)) {
846
+ lastUpdatedAt = ms;
847
+ lastId = note.id;
848
+ }
849
+ }
850
+ }
851
+
852
+ const next_cursor = encodeCursor({
853
+ v: CURSOR_VERSION,
854
+ last_updated_at: lastUpdatedAt,
855
+ last_id: lastId,
856
+ query_hash: queryHash,
857
+ });
858
+
859
+ return { notes, next_cursor };
860
+ }
861
+
700
862
  export function searchNotes(
701
863
  db: Database,
702
864
  query: string,
@@ -25,6 +25,7 @@ import { tmpdir } from "os";
25
25
 
26
26
  import { SqliteStore } from "./store.js";
27
27
  import {
28
+ CaseCollisionError,
28
29
  emitYamlDoc,
29
30
  exportVaultToDir,
30
31
  importPortableVault,
@@ -1551,4 +1552,250 @@ describe("case-collision detection (vault#327)", async () => {
1551
1552
  expect(lower!.path).toBe("Tabular/budget-2026");
1552
1553
  expect(lower!.content).toBe("month,total\n2026-01,1");
1553
1554
  });
1555
+
1556
+ // ---------------------------------------------------------------------------
1557
+ // failOnCaseCollision — strict mode (vault#327 Phase 2)
1558
+ // ---------------------------------------------------------------------------
1559
+ //
1560
+ // The default behavior (auto-disambiguate) is lossless but silent on the
1561
+ // wire — the CLI didn't surface it before #vault-rc.2. Strict mode is the
1562
+ // opt-in fail-fast path: throws `CaseCollisionError` with every colliding
1563
+ // path enumerated, so the operator can rename one of each pair in the
1564
+ // vault before re-exporting.
1565
+
1566
+ it("failOnCaseCollision throws CaseCollisionError on case-insensitive FS", async () => {
1567
+ await store.createNote("# in Balance", {
1568
+ id: "2025-05-26-09-15-42-aaaaaa",
1569
+ path: "Journal/2025-05-26 Technology in Balance",
1570
+ });
1571
+ await store.createNote("# in balance", {
1572
+ id: "2025-05-26-09-15-42-bbbbbb",
1573
+ path: "Journal/2025-05-26 Technology in balance",
1574
+ });
1575
+
1576
+ const outDir = join(tmpBase, "strict-throw");
1577
+ let thrown: unknown;
1578
+ try {
1579
+ await exportVaultToDir(store, {
1580
+ outDir,
1581
+ vaultName: "test",
1582
+ exportedAt: "2026-05-15T00:00:00.000Z",
1583
+ caseSensitiveOverride: false,
1584
+ failOnCaseCollision: true,
1585
+ });
1586
+ } catch (err) {
1587
+ thrown = err;
1588
+ }
1589
+ expect(thrown).toBeInstanceOf(CaseCollisionError);
1590
+ const err = thrown as CaseCollisionError;
1591
+ expect(err.collisions).toHaveLength(1);
1592
+ expect(err.collisions[0]).toHaveLength(2);
1593
+ const ids = err.collisions[0]!.map((c) => c.note_id).sort();
1594
+ expect(ids).toEqual(["2025-05-26-09-15-42-aaaaaa", "2025-05-26-09-15-42-bbbbbb"]);
1595
+ // Error message names BOTH paths + the actionable instruction.
1596
+ expect(err.message).toContain("Journal/2025-05-26 Technology in Balance");
1597
+ expect(err.message).toContain("Journal/2025-05-26 Technology in balance");
1598
+ expect(err.message).toContain("Rename one of them");
1599
+ // Pre-scan throws BEFORE any per-note file write. The .parachute/
1600
+ // sidecar dir is still created (cheap, idempotent), but no per-note
1601
+ // .md file landed.
1602
+ expect(existsSync(join(outDir, "Journal/2025-05-26 Technology in Balance.md"))).toBe(false);
1603
+ });
1604
+
1605
+ it("failOnCaseCollision is a no-op when FS is case-sensitive", async () => {
1606
+ // Same fixture as above, but force the case-sensitive code path. The
1607
+ // strict flag becomes a no-op — both files land at their canonical
1608
+ // paths, no error.
1609
+ await store.createNote("# in Balance", {
1610
+ id: "2025-05-26-09-15-42-aaaaaa",
1611
+ path: "Journal/2025-05-26 Technology in Balance",
1612
+ });
1613
+ await store.createNote("# in balance", {
1614
+ id: "2025-05-26-09-15-42-bbbbbb",
1615
+ path: "Journal/2025-05-26 Technology in balance",
1616
+ });
1617
+
1618
+ const outDir = join(tmpBase, "strict-cs");
1619
+ const stats = await exportVaultToDir(store, {
1620
+ outDir,
1621
+ vaultName: "test",
1622
+ exportedAt: "2026-05-15T00:00:00.000Z",
1623
+ caseSensitiveOverride: true,
1624
+ failOnCaseCollision: true,
1625
+ });
1626
+ expect(stats.notes).toBe(2);
1627
+ expect(stats.disambiguated_paths).toHaveLength(0);
1628
+ });
1629
+
1630
+ it("failOnCaseCollision is a no-op on case-insensitive FS when nothing collides", async () => {
1631
+ // One note. Strict mode + case-insensitive FS — should not throw,
1632
+ // should ship clean. Pre-scan walks the (single-note) list and
1633
+ // finds no collision groups.
1634
+ await store.createNote("# solo", {
1635
+ id: "2025-05-26-09-15-42-aaaaaa",
1636
+ path: "Journal/Solo Note",
1637
+ });
1638
+
1639
+ const outDir = join(tmpBase, "strict-solo");
1640
+ const stats = await exportVaultToDir(store, {
1641
+ outDir,
1642
+ vaultName: "test",
1643
+ exportedAt: "2026-05-15T00:00:00.000Z",
1644
+ caseSensitiveOverride: false,
1645
+ failOnCaseCollision: true,
1646
+ });
1647
+ expect(stats.notes).toBe(1);
1648
+ expect(stats.disambiguated_paths).toHaveLength(0);
1649
+ expect(existsSync(join(outDir, "Journal/Solo Note.md"))).toBe(true);
1650
+ });
1651
+
1652
+ it("three-way collision lists all three paths in the error", async () => {
1653
+ // Foo.md + foo.md + FOO.md — all share the same lowercased
1654
+ // `(path, ext)` slot. CaseCollisionError.collisions[0] should
1655
+ // include every one of them so the operator sees the full set in
1656
+ // a single error report, not a paint-by-numbers re-export cycle.
1657
+ await store.createNote("# upper-camel", {
1658
+ id: "2025-05-26-09-15-42-aaaaaa",
1659
+ path: "Journal/Foo",
1660
+ });
1661
+ await store.createNote("# lower", {
1662
+ id: "2025-05-26-09-15-42-bbbbbb",
1663
+ path: "Journal/foo",
1664
+ });
1665
+ await store.createNote("# all-caps", {
1666
+ id: "2025-05-26-09-15-42-cccccc",
1667
+ path: "Journal/FOO",
1668
+ });
1669
+
1670
+ const outDir = join(tmpBase, "strict-3way");
1671
+ let thrown: unknown;
1672
+ try {
1673
+ await exportVaultToDir(store, {
1674
+ outDir,
1675
+ vaultName: "test",
1676
+ exportedAt: "2026-05-15T00:00:00.000Z",
1677
+ caseSensitiveOverride: false,
1678
+ failOnCaseCollision: true,
1679
+ });
1680
+ } catch (err) {
1681
+ thrown = err;
1682
+ }
1683
+ expect(thrown).toBeInstanceOf(CaseCollisionError);
1684
+ const err = thrown as CaseCollisionError;
1685
+ expect(err.collisions).toHaveLength(1);
1686
+ expect(err.collisions[0]).toHaveLength(3);
1687
+ const paths = err.collisions[0]!.map((c) => c.path).sort();
1688
+ expect(paths).toEqual(["Journal/FOO", "Journal/Foo", "Journal/foo"]);
1689
+ expect(err.message).toContain("Journal/FOO");
1690
+ expect(err.message).toContain("Journal/Foo");
1691
+ expect(err.message).toContain("Journal/foo");
1692
+ });
1693
+
1694
+ it("multiple independent collision groups all surface", async () => {
1695
+ // Two distinct collision groups: (Bar.md, bar.md) and (Baz.md,
1696
+ // baz.md). Both groups should appear in the error so the operator
1697
+ // doesn't have to fix-rebuild-fix in a loop. Pairs are independent —
1698
+ // resolving one doesn't reveal the other.
1699
+ await store.createNote("# bar-upper", {
1700
+ id: "2025-05-26-09-15-42-aaaaaa",
1701
+ path: "Bar",
1702
+ });
1703
+ await store.createNote("# bar-lower", {
1704
+ id: "2025-05-26-09-15-42-bbbbbb",
1705
+ path: "bar",
1706
+ });
1707
+ await store.createNote("# baz-upper", {
1708
+ id: "2025-05-26-09-15-42-cccccc",
1709
+ path: "Baz",
1710
+ });
1711
+ await store.createNote("# baz-lower", {
1712
+ id: "2025-05-26-09-15-42-dddddd",
1713
+ path: "baz",
1714
+ });
1715
+
1716
+ const outDir = join(tmpBase, "strict-multi-group");
1717
+ let thrown: unknown;
1718
+ try {
1719
+ await exportVaultToDir(store, {
1720
+ outDir,
1721
+ vaultName: "test",
1722
+ exportedAt: "2026-05-15T00:00:00.000Z",
1723
+ caseSensitiveOverride: false,
1724
+ failOnCaseCollision: true,
1725
+ });
1726
+ } catch (err) {
1727
+ thrown = err;
1728
+ }
1729
+ expect(thrown).toBeInstanceOf(CaseCollisionError);
1730
+ const err = thrown as CaseCollisionError;
1731
+ expect(err.collisions).toHaveLength(2);
1732
+ const allIds = err.collisions.flat().map((c) => c.note_id).sort();
1733
+ expect(allIds).toEqual([
1734
+ "2025-05-26-09-15-42-aaaaaa",
1735
+ "2025-05-26-09-15-42-bbbbbb",
1736
+ "2025-05-26-09-15-42-cccccc",
1737
+ "2025-05-26-09-15-42-dddddd",
1738
+ ]);
1739
+ });
1740
+
1741
+ it("directory-level case difference triggers collision detection", async () => {
1742
+ // Two notes at `Notes/foo` and `notes/foo` — the basename matches
1743
+ // but the parent dir differs only by case. On a case-insensitive
1744
+ // FS, both files would land in the same directory because
1745
+ // `Notes/` and `notes/` resolve to the same inode. Verify the
1746
+ // lowercased-path key catches this: `notes/foo.md`.
1747
+ await store.createNote("# notes-upper", {
1748
+ id: "2025-05-26-09-15-42-aaaaaa",
1749
+ path: "Notes/foo",
1750
+ });
1751
+ await store.createNote("# notes-lower", {
1752
+ id: "2025-05-26-09-15-42-bbbbbb",
1753
+ path: "notes/foo",
1754
+ });
1755
+
1756
+ const outDir = join(tmpBase, "strict-dir-case");
1757
+ let thrown: unknown;
1758
+ try {
1759
+ await exportVaultToDir(store, {
1760
+ outDir,
1761
+ vaultName: "test",
1762
+ exportedAt: "2026-05-15T00:00:00.000Z",
1763
+ caseSensitiveOverride: false,
1764
+ failOnCaseCollision: true,
1765
+ });
1766
+ } catch (err) {
1767
+ thrown = err;
1768
+ }
1769
+ expect(thrown).toBeInstanceOf(CaseCollisionError);
1770
+ const err = thrown as CaseCollisionError;
1771
+ expect(err.collisions).toHaveLength(1);
1772
+ expect(err.collisions[0]).toHaveLength(2);
1773
+ });
1774
+
1775
+ it("default (no failOnCaseCollision) still auto-disambiguates — back-compat", async () => {
1776
+ // The new strict mode is opt-in. Leaving failOnCaseCollision unset
1777
+ // (or false) keeps the existing lossless auto-disambiguation path
1778
+ // unchanged. This pins the back-compat contract — watch/mirror
1779
+ // loops that don't opt in to strict mode never see a thrown
1780
+ // CaseCollisionError on a colliding vault.
1781
+ await store.createNote("# upper", {
1782
+ id: "2025-05-26-09-15-42-aaaaaa",
1783
+ path: "Journal/Note",
1784
+ });
1785
+ await store.createNote("# lower", {
1786
+ id: "2025-05-26-09-15-42-bbbbbb",
1787
+ path: "Journal/note",
1788
+ });
1789
+
1790
+ const outDir = join(tmpBase, "default-disambig");
1791
+ const stats = await exportVaultToDir(store, {
1792
+ outDir,
1793
+ vaultName: "test",
1794
+ exportedAt: "2026-05-15T00:00:00.000Z",
1795
+ caseSensitiveOverride: false,
1796
+ // failOnCaseCollision deliberately omitted — default behavior.
1797
+ });
1798
+ expect(stats.notes).toBe(2);
1799
+ expect(stats.disambiguated_paths).toHaveLength(1);
1800
+ });
1554
1801
  });
@@ -631,6 +631,78 @@ export interface ExportOptions {
631
631
  * run on. When unset (the production default), the probe runs.
632
632
  */
633
633
  caseSensitiveOverride?: boolean;
634
+ /**
635
+ * Strict mode for case-collision handling on case-insensitive
636
+ * filesystems (vault#327 Phase 2). When `true`, the first detected
637
+ * collision aborts the export by throwing a `CaseCollisionError`
638
+ * naming every colliding path. When `false` (the default), the
639
+ * existing lossless behavior is preserved — the colliding note is
640
+ * written to a disambiguated filename (`<base>__<id-short>.<ext>`)
641
+ * and recorded in `ExportStats.disambiguated_paths`.
642
+ *
643
+ * Use `true` for one-shot CLI flows where the operator wants to be
644
+ * forced to fix the source-of-truth (rename one of the colliding
645
+ * notes in the vault before re-exporting). Leave `false` for
646
+ * long-running watch / mirror loops where a hard failure mid-loop
647
+ * would block the operator's actual work.
648
+ */
649
+ failOnCaseCollision?: boolean;
650
+ }
651
+
652
+ /**
653
+ * Thrown by `exportVaultToDir` when `failOnCaseCollision: true` is set
654
+ * and the export detects two-or-more notes whose paths differ only by
655
+ * case on a case-insensitive filesystem (vault#327).
656
+ *
657
+ * The error names every colliding path (full N-way group, not just
658
+ * the first pair) so the operator can audit the whole set in one
659
+ * pass. Caller catches by type and surfaces `.collisions` for a
660
+ * clean error report:
661
+ *
662
+ * ```ts
663
+ * try {
664
+ * await exportVaultToDir(store, { ..., failOnCaseCollision: true });
665
+ * } catch (err) {
666
+ * if (err instanceof CaseCollisionError) {
667
+ * for (const group of err.collisions) {
668
+ * console.error(`collision: ${group.map((g) => g.path).join(", ")}`);
669
+ * }
670
+ * }
671
+ * }
672
+ * ```
673
+ */
674
+ export class CaseCollisionError extends Error {
675
+ /**
676
+ * Each entry is one collision group — every note that shares the same
677
+ * lowercased `(path, extension)` slot. Two notes per group is the
678
+ * common case; three-or-more (`Foo.md` + `foo.md` + `FOO.md`) is rare
679
+ * but supported. Notes are listed in the order they were encountered
680
+ * during the export walk (deterministic — `queryNotes` sorts ASC).
681
+ */
682
+ readonly collisions: Array<Array<{ note_id: string; path: string; extension: string }>>;
683
+ constructor(collisions: CaseCollisionError["collisions"]) {
684
+ const lines: string[] = [
685
+ "Export failed: case-collision detected on case-insensitive filesystem.",
686
+ "The following notes have paths that differ only by case:",
687
+ ];
688
+ // Separate distinct collision groups with ` ---` so operators
689
+ // reading the error in a terminal can tell where one (Foo.md /
690
+ // foo.md) pair ends and the next (Bar.md / bar.md) begins. Without
691
+ // a separator, multi-group output runs together as an unbroken
692
+ // bullet list. vault#350.
693
+ collisions.forEach((group, idx) => {
694
+ if (idx > 0) lines.push(" ---");
695
+ for (const entry of group) {
696
+ lines.push(` - ${entry.path}.${entry.extension} (note id: ${entry.note_id})`);
697
+ }
698
+ });
699
+ lines.push(
700
+ "Rename one of them in the vault before re-exporting, or run from a case-sensitive filesystem.",
701
+ );
702
+ super(lines.join("\n"));
703
+ this.name = "CaseCollisionError";
704
+ this.collisions = collisions;
705
+ }
634
706
  }
635
707
 
636
708
  /**
@@ -733,9 +805,54 @@ export async function exportVaultToDir(
733
805
  // case-insensitive filesystems.
734
806
  const seenLowerKeys = new Map<string, string>();
735
807
 
808
+ // Strict-mode pre-scan (vault#327 Phase 2). When the caller passes
809
+ // `failOnCaseCollision: true`, surface every collision group in one
810
+ // typed error BEFORE any write lands on disk — partial-export-then-
811
+ // throw would leave the operator with a half-mirrored output dir to
812
+ // clean up. The pre-scan walks every note in the vault (NOT
813
+ // since-filtered: a since-filter belongs in the write loop —
814
+ // collisions can involve one old + one new path, and a since-only
815
+ // pre-scan would miss those entirely, silently degrading the strict
816
+ // guarantee on every poll cycle after the initial export). When no
817
+ // collisions exist on a case-insensitive FS, the pre-scan is a
818
+ // no-op (cheap); on a case-sensitive FS it's skipped entirely.
819
+ //
820
+ // Perf: the pre-scan calls `noteToPortable` (3 DB queries per note:
821
+ // links, attachments, content). The main loop below also calls
822
+ // `noteToPortable` — without caching, every note that ALSO passes
823
+ // the since-filter would be serialized twice (~2x the DB round-
824
+ // trips on a large strict-mode export). Stash every pre-scan result
825
+ // in `prescanPortables` and reuse it below; cache-miss falls back
826
+ // to a fresh `noteToPortable` for safety. vault#350.
827
+ const prescanPortables = new Map<string, PortableNote>();
828
+ if (opts.failOnCaseCollision && !caseSensitive) {
829
+ const groups = new Map<string, Array<{ note_id: string; path: string; extension: string }>>();
830
+ for (const note of allNotes) {
831
+ const portable = await noteToPortable(note, store);
832
+ prescanPortables.set(portable.id, portable);
833
+ if (!portable.path) continue; // _unpathed/<id>.<ext> is case-stable
834
+ const ext = portable.extension ?? "md";
835
+ const key = `${portable.path.toLowerCase()}|${ext.toLowerCase()}`;
836
+ const entry = { note_id: portable.id, path: portable.path, extension: ext };
837
+ const existing = groups.get(key);
838
+ if (existing) {
839
+ existing.push(entry);
840
+ } else {
841
+ groups.set(key, [entry]);
842
+ }
843
+ }
844
+ const collisions = Array.from(groups.values()).filter((g) => g.length > 1);
845
+ if (collisions.length > 0) {
846
+ throw new CaseCollisionError(collisions);
847
+ }
848
+ }
849
+
736
850
  for (const note of allNotes) {
737
851
  if (since && !shouldIncludeForSince(note, since)) continue;
738
- const portable = await noteToPortable(note, store);
852
+ // Reuse a pre-scan result when strict-mode populated the cache;
853
+ // otherwise serialize fresh. Same PortableNote shape either way,
854
+ // so the rest of the loop is untouched. vault#350.
855
+ const portable = prescanPortables.get(note.id) ?? (await noteToPortable(note, store));
739
856
  let relPath = portableExportFilePath(portable);
740
857
 
741
858
  // Decide whether this note's filename needs disambiguation