@openparachute/vault 0.4.8 → 0.4.9-rc.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. package/core/src/hooks.test.ts +320 -1
  2. package/core/src/hooks.ts +243 -38
  3. package/core/src/mcp.ts +35 -0
  4. package/core/src/portable-md.test.ts +252 -1
  5. package/core/src/portable-md.ts +370 -2
  6. package/core/src/schema.ts +51 -2
  7. package/core/src/store.ts +68 -2
  8. package/package.json +1 -1
  9. package/src/auth.ts +29 -1
  10. package/src/auto-transcribe.test.ts +7 -2
  11. package/src/auto-transcribe.ts +6 -2
  12. package/src/export-watch.test.ts +74 -0
  13. package/src/export-watch.ts +108 -7
  14. package/src/github-device-flow.test.ts +404 -0
  15. package/src/github-device-flow.ts +415 -0
  16. package/src/mcp-http.ts +24 -36
  17. package/src/mcp-tools.ts +286 -2
  18. package/src/mirror-config.test.ts +184 -14
  19. package/src/mirror-config.ts +220 -24
  20. package/src/mirror-credentials.test.ts +450 -0
  21. package/src/mirror-credentials.ts +577 -0
  22. package/src/mirror-deps.ts +42 -1
  23. package/src/mirror-import.test.ts +550 -0
  24. package/src/mirror-import.ts +484 -0
  25. package/src/mirror-manager.test.ts +423 -12
  26. package/src/mirror-manager.ts +579 -62
  27. package/src/mirror-routes.test.ts +966 -10
  28. package/src/mirror-routes.ts +1096 -5
  29. package/src/module-config.ts +11 -5
  30. package/src/routing.test.ts +92 -1
  31. package/src/routing.ts +165 -1
  32. package/src/server.ts +21 -8
  33. package/src/token-store.ts +158 -5
  34. package/src/transcription-worker.ts +9 -4
  35. package/src/triggers.ts +16 -3
  36. package/src/vault.test.ts +380 -1
  37. package/web/ui/dist/assets/{index-BOa-JJtV.css → index-DBe8Xiah.css} +1 -1
  38. package/web/ui/dist/assets/index-DE18QJMx.js +60 -0
  39. package/web/ui/dist/index.html +2 -2
  40. package/web/ui/dist/assets/index-BzA5LgE3.js +0 -60
@@ -19,7 +19,7 @@
19
19
 
20
20
  import { describe, it, expect, beforeEach } from "bun:test";
21
21
  import { Database } from "bun:sqlite";
22
- import { mkdirSync, readFileSync, readdirSync, rmSync, writeFileSync, existsSync, statSync } from "fs";
22
+ import { mkdirSync, readFileSync, readdirSync, rmSync, symlinkSync, writeFileSync, existsSync, statSync } from "fs";
23
23
  import { join } from "path";
24
24
  import { tmpdir } from "os";
25
25
 
@@ -33,6 +33,7 @@ import {
33
33
  parseFrontmatter,
34
34
  portableExportFilePath,
35
35
  probeCaseSensitive,
36
+ pruneOrphans,
36
37
  SIDECAR_DIR,
37
38
  NOTES_META_DIR,
38
39
  supportsInlineFrontmatter,
@@ -1799,3 +1800,253 @@ describe("case-collision detection (vault#327)", async () => {
1799
1800
  expect(stats.disambiguated_paths).toHaveLength(1);
1800
1801
  });
1801
1802
  });
1803
+
1804
+ // ---------------------------------------------------------------------------
1805
+ // pruneOrphans (vault#382 — event-driven mirror delete propagation)
1806
+ // ---------------------------------------------------------------------------
1807
+
1808
+ describe("pruneOrphans", async () => {
1809
+ let tmpBase: string;
1810
+ beforeEach(() => {
1811
+ tmpBase = join(tmpdir(), `parachute-prune-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`);
1812
+ mkdirSync(tmpBase, { recursive: true });
1813
+ });
1814
+
1815
+ it("no-op on non-existent directory", () => {
1816
+ const stats = pruneOrphans({
1817
+ outDir: join(tmpBase, "doesnt-exist"),
1818
+ validNoteIds: new Set(),
1819
+ validTagNames: new Set(),
1820
+ validAttachmentIds: new Set(),
1821
+ });
1822
+ expect(stats.notes_removed).toBe(0);
1823
+ expect(stats.unparseable_files).toHaveLength(0);
1824
+ });
1825
+
1826
+ it("removes orphaned note .md file", async () => {
1827
+ const outDir = join(tmpBase, "orphan-note");
1828
+ // First do a real export so the structure is realistic.
1829
+ const db = new Database(":memory:");
1830
+ const store = new SqliteStore(db);
1831
+ await store.createNote("alive", { id: "01HFAA", path: "alive" });
1832
+ await store.createNote("doomed", { id: "01HFBB", path: "doomed" });
1833
+ await exportVaultToDir(store, { outDir, vaultName: "t", exportedAt: "2026-01-01T00:00:00.000Z" });
1834
+ expect(existsSync(join(outDir, "alive.md"))).toBe(true);
1835
+ expect(existsSync(join(outDir, "doomed.md"))).toBe(true);
1836
+
1837
+ // Now prune with only "alive" in the valid set.
1838
+ const stats = pruneOrphans({
1839
+ outDir,
1840
+ validNoteIds: new Set(["01HFAA"]),
1841
+ validTagNames: new Set(),
1842
+ validAttachmentIds: new Set(),
1843
+ });
1844
+ expect(stats.notes_removed).toBe(1);
1845
+ expect(existsSync(join(outDir, "alive.md"))).toBe(true);
1846
+ expect(existsSync(join(outDir, "doomed.md"))).toBe(false);
1847
+ });
1848
+
1849
+ it("removes orphaned schema sidecar", async () => {
1850
+ const outDir = join(tmpBase, "orphan-schema");
1851
+ const db = new Database(":memory:");
1852
+ const store = new SqliteStore(db);
1853
+ await store.upsertTagRecord("alive-tag", { description: "stays" });
1854
+ await store.upsertTagRecord("doomed-tag", { description: "goes" });
1855
+ await exportVaultToDir(store, { outDir, vaultName: "t", exportedAt: "2026-01-01T00:00:00.000Z" });
1856
+ const schemasDir = join(outDir, SIDECAR_DIR, "schemas");
1857
+ expect(existsSync(join(schemasDir, "alive-tag.yaml"))).toBe(true);
1858
+ expect(existsSync(join(schemasDir, "doomed-tag.yaml"))).toBe(true);
1859
+
1860
+ const stats = pruneOrphans({
1861
+ outDir,
1862
+ validNoteIds: new Set(),
1863
+ validTagNames: new Set(["alive-tag"]),
1864
+ validAttachmentIds: new Set(),
1865
+ });
1866
+ expect(stats.schemas_removed).toBe(1);
1867
+ expect(existsSync(join(schemasDir, "alive-tag.yaml"))).toBe(true);
1868
+ expect(existsSync(join(schemasDir, "doomed-tag.yaml"))).toBe(false);
1869
+ });
1870
+
1871
+ it("removes orphaned attachment directories", async () => {
1872
+ const outDir = join(tmpBase, "orphan-att");
1873
+ // Build the export structure by hand (attachment binaries need
1874
+ // assetsDir wiring; cheaper to just create the dirs).
1875
+ const attachmentsDir = join(outDir, SIDECAR_DIR, "attachments");
1876
+ mkdirSync(attachmentsDir, { recursive: true });
1877
+ mkdirSync(join(attachmentsDir, "att-alive"), { recursive: true });
1878
+ writeFileSync(join(attachmentsDir, "att-alive", "voice.m4a"), "");
1879
+ mkdirSync(join(attachmentsDir, "att-doomed"), { recursive: true });
1880
+ writeFileSync(join(attachmentsDir, "att-doomed", "voice.m4a"), "");
1881
+ // Need .parachute/vault.yaml so the structure is recognized (cheap to fake)
1882
+ writeFileSync(join(outDir, SIDECAR_DIR, "vault.yaml"), "name: t\n");
1883
+
1884
+ const stats = pruneOrphans({
1885
+ outDir,
1886
+ validNoteIds: new Set(),
1887
+ validTagNames: new Set(),
1888
+ validAttachmentIds: new Set(["att-alive"]),
1889
+ });
1890
+ expect(stats.attachment_dirs_removed).toBe(1);
1891
+ expect(existsSync(join(attachmentsDir, "att-alive"))).toBe(true);
1892
+ expect(existsSync(join(attachmentsDir, "att-doomed"))).toBe(false);
1893
+ });
1894
+
1895
+ it("skips unparseable .md files without crashing", async () => {
1896
+ const outDir = join(tmpBase, "unparseable");
1897
+ mkdirSync(outDir, { recursive: true });
1898
+ writeFileSync(join(outDir, "no-frontmatter.md"), "just content, no frontmatter\n");
1899
+ writeFileSync(join(outDir, "garbage.md"), "---\nnot-real-yaml\n");
1900
+ const stats = pruneOrphans({
1901
+ outDir,
1902
+ validNoteIds: new Set(),
1903
+ validTagNames: new Set(),
1904
+ validAttachmentIds: new Set(),
1905
+ });
1906
+ // Both files lacked an `id`, so we record them but don't remove.
1907
+ expect(stats.notes_removed).toBe(0);
1908
+ expect(stats.unparseable_files.length).toBeGreaterThanOrEqual(2);
1909
+ expect(existsSync(join(outDir, "no-frontmatter.md"))).toBe(true);
1910
+ expect(existsSync(join(outDir, "garbage.md"))).toBe(true);
1911
+ });
1912
+
1913
+ it("preserves all files when everything is in the valid sets", async () => {
1914
+ const outDir = join(tmpBase, "happy-path");
1915
+ const db = new Database(":memory:");
1916
+ const store = new SqliteStore(db);
1917
+ const a = await store.createNote("a", { path: "a" });
1918
+ const b = await store.createNote("b", { path: "b" });
1919
+ await store.upsertTagRecord("tag1", { description: "x" });
1920
+ await exportVaultToDir(store, { outDir, vaultName: "t", exportedAt: "2026-01-01T00:00:00.000Z" });
1921
+ const stats = pruneOrphans({
1922
+ outDir,
1923
+ validNoteIds: new Set([a.id, b.id]),
1924
+ validTagNames: new Set(["tag1"]),
1925
+ validAttachmentIds: new Set(),
1926
+ });
1927
+ expect(stats.notes_removed).toBe(0);
1928
+ expect(stats.schemas_removed).toBe(0);
1929
+ expect(stats.attachment_dirs_removed).toBe(0);
1930
+ expect(existsSync(join(outDir, "a.md"))).toBe(true);
1931
+ expect(existsSync(join(outDir, "b.md"))).toBe(true);
1932
+ });
1933
+
1934
+ it("removes orphan note + corresponding notes-meta sidecar for csv/yaml notes", async () => {
1935
+ // For non-frontmatter extensions, the sidecar lives at
1936
+ // .parachute/notes-meta/<id>.yaml. Pruning the note should remove
1937
+ // both files.
1938
+ const outDir = join(tmpBase, "orphan-csv");
1939
+ const db = new Database(":memory:");
1940
+ const store = new SqliteStore(db);
1941
+ await store.createNote("col1,col2\n1,2\n", {
1942
+ id: "01CSV-DEL",
1943
+ path: "data/table",
1944
+ extension: "csv",
1945
+ });
1946
+ await exportVaultToDir(store, { outDir, vaultName: "t", exportedAt: "2026-01-01T00:00:00.000Z" });
1947
+ const contentFile = join(outDir, "data", "table.csv");
1948
+ const sidecarFile = join(outDir, SIDECAR_DIR, "notes-meta", "01CSV-DEL.yaml");
1949
+ expect(existsSync(contentFile)).toBe(true);
1950
+ expect(existsSync(sidecarFile)).toBe(true);
1951
+
1952
+ const stats = pruneOrphans({
1953
+ outDir,
1954
+ validNoteIds: new Set(), // doom it
1955
+ validTagNames: new Set(),
1956
+ validAttachmentIds: new Set(),
1957
+ });
1958
+ expect(stats.notes_removed).toBe(1);
1959
+ expect(stats.sidecars_removed).toBeGreaterThanOrEqual(1);
1960
+ expect(existsSync(contentFile)).toBe(false);
1961
+ expect(existsSync(sidecarFile)).toBe(false);
1962
+ });
1963
+
1964
+ // Reviewer-flagged regression on vault#382 Critical #2 — pruneOrphans
1965
+ // walks via statSync (follows symlinks); without the safeRm guard a
1966
+ // symlink inside the mirror pointing OUTSIDE outDir would resurface
1967
+ // its target's files as orphans and rmSync would happily delete them
1968
+ // off-tree. The guard resolves each candidate and refuses anything
1969
+ // not under outDir; refusals get recorded in `unparseable_files` so
1970
+ // an operator can see what was skipped.
1971
+ it("refuses to delete files reached via a symlink pointing outside outDir", async () => {
1972
+ const outDir = join(tmpBase, "symlink-attack");
1973
+ const outside = join(tmpBase, "outside");
1974
+ mkdirSync(outside, { recursive: true });
1975
+ mkdirSync(outDir, { recursive: true });
1976
+ // A real, sensitive file in `outside/` we don't want pruneOrphans
1977
+ // to touch under any circumstance.
1978
+ const externalFile = join(outside, "do-not-touch.md");
1979
+ writeFileSync(externalFile, "---\nid: 01EXTERNAL\n---\nimportant\n");
1980
+ // A symlink inside outDir pointing at outside/ — walkContentFiles
1981
+ // would normally surface outside/do-not-touch.md as a candidate.
1982
+ try {
1983
+ symlinkSync(outside, join(outDir, "via-link"));
1984
+ } catch {
1985
+ // Some CI sandboxes refuse symlink creation. Skip the test in
1986
+ // that case rather than fail spuriously.
1987
+ return;
1988
+ }
1989
+
1990
+ const stats = pruneOrphans({
1991
+ outDir,
1992
+ validNoteIds: new Set(), // doom every id we see
1993
+ validTagNames: new Set(),
1994
+ validAttachmentIds: new Set(),
1995
+ });
1996
+
1997
+ // Critical assertion: the external file MUST survive.
1998
+ expect(existsSync(externalFile)).toBe(true);
1999
+ // And the refusal MUST be recorded so the operator sees it.
2000
+ expect(
2001
+ stats.unparseable_files.some(
2002
+ (u) => u.path.includes("via-link") || u.reason.includes("outside"),
2003
+ ),
2004
+ ).toBe(true);
2005
+ });
2006
+
2007
+ // Reviewer-flagged regression on vault#382 Critical #1 — pruneOrphans
2008
+ // builds `validTagNames` from ALL tag-table rows in mirror-deps.ts.
2009
+ // After `deleteTagSchema(t)` the schema fields are cleared but the
2010
+ // tag row persists with the bare name, so the sidecar lingers
2011
+ // forever. The fix routes validTagNames through `hasSchemaContent`
2012
+ // before passing into pruneOrphans, and exports the predicate so
2013
+ // mirror-deps can reuse the single source of truth.
2014
+ it("considers a schema-content-free tag the same as a deleted tag for sidecar pruning", async () => {
2015
+ const outDir = join(tmpBase, "stale-schema");
2016
+ const db = new Database(":memory:");
2017
+ const store = new SqliteStore(db);
2018
+ await store.upsertTagRecord("bare", {}); // bare-name only
2019
+ await store.upsertTagRecord("with-schema", { description: "real" });
2020
+ await exportVaultToDir(store, {
2021
+ outDir,
2022
+ vaultName: "t",
2023
+ exportedAt: "2026-01-01T00:00:00.000Z",
2024
+ });
2025
+ const schemasDir = join(outDir, SIDECAR_DIR, "schemas");
2026
+ // The bare tag SHOULDN'T have a sidecar; the schema-bearing one
2027
+ // SHOULD. This confirms the export-writer's contract before the
2028
+ // prune step.
2029
+ expect(existsSync(join(schemasDir, "bare.yaml"))).toBe(false);
2030
+ expect(existsSync(join(schemasDir, "with-schema.yaml"))).toBe(true);
2031
+
2032
+ // Now seed a stale sidecar for `bare` (simulating "the operator
2033
+ // previously had a schema for `bare`, then cleared it via
2034
+ // `deleteTagSchema`"). pruneOrphans should remove this iff the
2035
+ // caller correctly filtered validTagNames by hasSchemaContent.
2036
+ writeFileSync(join(schemasDir, "bare.yaml"), 'name: "bare"\ndescription: "stale"\n');
2037
+ expect(existsSync(join(schemasDir, "bare.yaml"))).toBe(true);
2038
+
2039
+ // Filtered set — only `with-schema` has hasSchemaContent === true.
2040
+ const validTagNames = new Set(["with-schema"]);
2041
+ const stats = pruneOrphans({
2042
+ outDir,
2043
+ validNoteIds: new Set(),
2044
+ validTagNames,
2045
+ validAttachmentIds: new Set(),
2046
+ });
2047
+
2048
+ expect(stats.schemas_removed).toBe(1);
2049
+ expect(existsSync(join(schemasDir, "bare.yaml"))).toBe(false);
2050
+ expect(existsSync(join(schemasDir, "with-schema.yaml"))).toBe(true);
2051
+ });
2052
+ });
@@ -72,7 +72,7 @@
72
72
  * See vault#308.
73
73
  */
74
74
 
75
- import { readdirSync, readFileSync, statSync, mkdirSync, writeFileSync, copyFileSync, existsSync, rmSync } from "fs";
75
+ import { readdirSync, readFileSync, realpathSync, statSync, mkdirSync, writeFileSync, copyFileSync, existsSync, rmSync } from "fs";
76
76
  import { basename, join, relative, extname, dirname, resolve as resolvePath, sep as pathSep } from "path";
77
77
  import type { Store, Note, Link, Attachment } from "./types.js";
78
78
  import type { TagRecord } from "./tag-schemas.js";
@@ -1001,7 +1001,375 @@ export async function exportVaultToDir(
1001
1001
  };
1002
1002
  }
1003
1003
 
1004
- function hasSchemaContent(tag: TagRecord): boolean {
1004
+ // ---------------------------------------------------------------------------
1005
+ // Orphan sweep — for git-mirror's delete propagation
1006
+ // ---------------------------------------------------------------------------
1007
+
1008
+ /**
1009
+ * Result of a `pruneOrphans` pass.
1010
+ */
1011
+ export interface PruneOrphansStats {
1012
+ /** Note content files removed (frontmatter id not in `validNoteIds`). */
1013
+ notes_removed: number;
1014
+ /** Sidecar metadata files removed (under `.parachute/notes-meta/`). */
1015
+ sidecars_removed: number;
1016
+ /** Schema sidecars removed (under `.parachute/schemas/`). */
1017
+ schemas_removed: number;
1018
+ /** Attachment directories removed (under `.parachute/attachments/`). */
1019
+ attachment_dirs_removed: number;
1020
+ /**
1021
+ * Files we couldn't parse / classify. Surfaced for operator audit;
1022
+ * the sweep doesn't touch them. Empty when everything classified
1023
+ * cleanly.
1024
+ */
1025
+ unparseable_files: Array<{ path: string; reason: string }>;
1026
+ }
1027
+
1028
+ /**
1029
+ * Options for the orphan-sweep pass. Run periodically (the mirror manager
1030
+ * arms a safety-net poll, default 1h) and after operator-visible deletions
1031
+ * (the mirror manager's targeted-deletion fast path also calls this for the
1032
+ * touched-set).
1033
+ *
1034
+ * The sweep is the bookkeeping cousin of the event-driven fast path: events
1035
+ * cover the common case (a single note deletion fires "deleted" → mirror
1036
+ * removes that file); the sweep covers anything the fast path missed
1037
+ * (direct SQL writes, app crashes between dispatch and handler, restart
1038
+ * gaps).
1039
+ */
1040
+ export interface PruneOrphansOptions {
1041
+ /** Directory to sweep — same shape as an `exportVaultToDir` outDir. */
1042
+ outDir: string;
1043
+ /** Note IDs that should be kept (everything else under the export gets removed). */
1044
+ validNoteIds: Set<string>;
1045
+ /** Tag names that should be kept (other schema sidecars under `.parachute/schemas/` get removed). */
1046
+ validTagNames: Set<string>;
1047
+ /** Attachment IDs that should be kept (other dirs under `.parachute/attachments/` get removed). */
1048
+ validAttachmentIds: Set<string>;
1049
+ /**
1050
+ * Override `extension`-based content-file extension recognition. The
1051
+ * default treats `.md` and `.mdx` as having inline frontmatter (with
1052
+ * `id:` reachable via the file head); everything else needs a sidecar
1053
+ * lookup to know what id owns it.
1054
+ */
1055
+ supportsInlineFrontmatter?: (ext: string) => boolean;
1056
+ }
1057
+
1058
+ /**
1059
+ * Sweep the export directory for files belonging to notes / tags /
1060
+ * attachments that no longer exist in the vault. Removes them so the
1061
+ * mirror's `git diff` reflects what vault actually has.
1062
+ *
1063
+ * Strategy:
1064
+ * 1. Walk content files. For each `.md` / `.mdx`, parse the frontmatter
1065
+ * `id` and compare against `validNoteIds`. Mismatch → remove the
1066
+ * file. Files we can't parse are recorded in `unparseable_files`
1067
+ * and left alone (best-effort, no destructive guesses).
1068
+ * 2. For non-inline-frontmatter extensions (`.csv`, `.yaml`, etc.),
1069
+ * check the matching `.parachute/notes-meta/<id>.yaml` sidecar — the
1070
+ * sidecar carries the canonical `id` + `path` + `extension` triple.
1071
+ * An orphaned sidecar (no matching content file) is also removed,
1072
+ * and an orphaned content file (no matching sidecar) without a
1073
+ * parseable frontmatter is left as unparseable.
1074
+ * 3. Walk `.parachute/schemas/`. Each file is `<tag>.yaml` (after
1075
+ * filename sanitization). Parse the `name:` field; compare against
1076
+ * `validTagNames`. Mismatch → remove.
1077
+ * 4. Walk `.parachute/attachments/`. Each subdir name IS the
1078
+ * attachment id (per `exportVaultToDir`'s layout). Compare against
1079
+ * `validAttachmentIds`. Mismatch → recursive remove.
1080
+ *
1081
+ * Returns counts so callers can log + decide whether to commit.
1082
+ *
1083
+ * Safe to call on a directory that's never been exported to (returns
1084
+ * zero counts; doesn't create anything).
1085
+ */
1086
+ export function pruneOrphans(opts: PruneOrphansOptions): PruneOrphansStats {
1087
+ const stats: PruneOrphansStats = {
1088
+ notes_removed: 0,
1089
+ sidecars_removed: 0,
1090
+ schemas_removed: 0,
1091
+ attachment_dirs_removed: 0,
1092
+ unparseable_files: [],
1093
+ };
1094
+
1095
+ const outDir = opts.outDir;
1096
+ if (!existsSync(outDir)) return stats;
1097
+
1098
+ const supportsInline = opts.supportsInlineFrontmatter ?? supportsInlineFrontmatter;
1099
+ const sidecarRoot = join(outDir, SIDECAR_DIR);
1100
+ const notesMetaRoot = join(sidecarRoot, NOTES_META_DIR);
1101
+ const schemasRoot = join(sidecarRoot, "schemas");
1102
+ const attachmentsRoot = join(sidecarRoot, "attachments");
1103
+
1104
+ // Path-traversal guard. `walkContentFiles` uses `statSync` which
1105
+ // follows symlinks — a symlink inside the mirror pointing OUTSIDE
1106
+ // `outDir` would resurface its target's files in the prune sweep,
1107
+ // and a bare `rmSync(filepath)` would delete them off-tree. Every
1108
+ // deletion in this function routes through `safeRm`, which calls
1109
+ // `realpathSync` (resolves through symlinks, unlike syntactic-only
1110
+ // `path.resolve`) and refuses to delete anything that isn't `outDir`
1111
+ // or beneath it after symlink resolution. Refusals get recorded in
1112
+ // `unparseable_files` so an operator can see what was skipped.
1113
+ // Reviewer-flagged on vault#382 (Critical #2).
1114
+ const outDirReal = realpathSync(outDir);
1115
+ const safeRm = (
1116
+ candidate: string,
1117
+ onSuccess: () => void,
1118
+ options: { recursive?: boolean } = {},
1119
+ ): void => {
1120
+ let real: string;
1121
+ try {
1122
+ real = realpathSync(candidate);
1123
+ } catch (err) {
1124
+ // realpathSync throws if the path doesn't exist or is unreadable.
1125
+ // Don't delete what we can't fully resolve.
1126
+ stats.unparseable_files.push({
1127
+ path: candidate,
1128
+ reason: `realpath failed: ${(err as Error).message ?? err}`,
1129
+ });
1130
+ return;
1131
+ }
1132
+ if (!isWithinDir(real, outDirReal)) {
1133
+ stats.unparseable_files.push({
1134
+ path: candidate,
1135
+ reason: "real path resolved outside mirror outDir — refusing to delete (symlink?)",
1136
+ });
1137
+ return;
1138
+ }
1139
+ try {
1140
+ // Delete via the resolved real path; never via the unresolved
1141
+ // candidate (which could route through a symlink we already
1142
+ // determined would escape).
1143
+ rmSync(real, { force: true, recursive: options.recursive ?? false });
1144
+ onSuccess();
1145
+ } catch (err) {
1146
+ stats.unparseable_files.push({
1147
+ path: candidate,
1148
+ reason: `unlink failed: ${(err as Error).message ?? err}`,
1149
+ });
1150
+ }
1151
+ };
1152
+
1153
+ // ---- 1 + 2. Notes + sidecars ----
1154
+ //
1155
+ // First pass: build the sidecar id → { path, extension } map so we can
1156
+ // resolve non-inline-frontmatter content files via their sidecar.
1157
+ // Sidecars whose claimed (path, extension) doesn't map to an existing
1158
+ // content file are tracked as "orphaned sidecar" candidates.
1159
+ const sidecarById = new Map<string, { path: string | null; extension: string | null }>();
1160
+ const sidecarFilesById = new Map<string, string>(); // id → absolute filepath
1161
+ if (existsSync(notesMetaRoot)) {
1162
+ try {
1163
+ for (const entry of readdirSync(notesMetaRoot)) {
1164
+ if (!entry.endsWith(".yaml")) continue;
1165
+ const id = entry.slice(0, -5);
1166
+ const full = join(notesMetaRoot, entry);
1167
+ sidecarFilesById.set(id, full);
1168
+ try {
1169
+ const text = readFileSync(full, "utf-8");
1170
+ const { frontmatter } = parseFrontmatter(`---\n${text}---\n`);
1171
+ sidecarById.set(id, {
1172
+ path: typeof frontmatter.path === "string" ? (frontmatter.path as string) : null,
1173
+ extension: typeof frontmatter.extension === "string" ? (frontmatter.extension as string) : null,
1174
+ });
1175
+ } catch (err) {
1176
+ stats.unparseable_files.push({
1177
+ path: full,
1178
+ reason: `failed to parse sidecar: ${(err as Error).message ?? err}`,
1179
+ });
1180
+ }
1181
+ }
1182
+ } catch (err) {
1183
+ // Notes-meta dir read-error is non-fatal — record + carry on.
1184
+ stats.unparseable_files.push({
1185
+ path: notesMetaRoot,
1186
+ reason: `notes-meta walk failed: ${(err as Error).message ?? err}`,
1187
+ });
1188
+ }
1189
+ }
1190
+
1191
+ // Now walk content files (everything outside the .parachute sidecar).
1192
+ const contentFiles = walkContentFiles(outDir);
1193
+ // Track which sidecars matched a content file so we can also remove
1194
+ // orphaned sidecars (sidecar present but content file gone).
1195
+ const pairedSidecarIds = new Set<string>();
1196
+ for (const filepath of contentFiles) {
1197
+ const ext = extname(filepath).slice(1).toLowerCase();
1198
+ if (supportsInline(ext)) {
1199
+ // Parse frontmatter, read id.
1200
+ try {
1201
+ const raw = readFileSync(filepath, "utf-8");
1202
+ const { frontmatter } = parseFrontmatter(raw);
1203
+ const id = typeof frontmatter.id === "string" ? (frontmatter.id as string) : null;
1204
+ if (!id) {
1205
+ stats.unparseable_files.push({
1206
+ path: filepath,
1207
+ reason: "no `id` in frontmatter",
1208
+ });
1209
+ continue;
1210
+ }
1211
+ if (!opts.validNoteIds.has(id)) {
1212
+ safeRm(filepath, () => {
1213
+ stats.notes_removed++;
1214
+ });
1215
+ }
1216
+ } catch (err) {
1217
+ stats.unparseable_files.push({
1218
+ path: filepath,
1219
+ reason: `read failed: ${(err as Error).message ?? err}`,
1220
+ });
1221
+ }
1222
+ } else {
1223
+ // Sidecar-required extension. Find the matching sidecar by
1224
+ // (path, extension) — sidecars are keyed by id, so we sweep the
1225
+ // sidecarById map.
1226
+ const relPath = relative(outDir, filepath).replace(/\\/g, "/");
1227
+ // Strip extension to get the canonical path stored in the sidecar
1228
+ // (vault paths don't carry extensions).
1229
+ const pathNoExt = relPath.slice(0, -(ext.length + 1));
1230
+ let foundId: string | null = null;
1231
+ for (const [id, info] of sidecarById.entries()) {
1232
+ if (
1233
+ info.path === pathNoExt &&
1234
+ (info.extension ?? "md").toLowerCase() === ext
1235
+ ) {
1236
+ foundId = id;
1237
+ break;
1238
+ }
1239
+ }
1240
+ if (!foundId) {
1241
+ // Content file with no sidecar — can't tell which note owns it.
1242
+ // Conservative: leave alone, record as unparseable.
1243
+ stats.unparseable_files.push({
1244
+ path: filepath,
1245
+ reason: "no sidecar metadata could be matched by (path, extension)",
1246
+ });
1247
+ continue;
1248
+ }
1249
+ pairedSidecarIds.add(foundId);
1250
+ if (!opts.validNoteIds.has(foundId)) {
1251
+ // Note is orphaned — remove both content and sidecar.
1252
+ safeRm(filepath, () => {
1253
+ stats.notes_removed++;
1254
+ });
1255
+ const sidecarPath = sidecarFilesById.get(foundId);
1256
+ if (sidecarPath) {
1257
+ safeRm(sidecarPath, () => {
1258
+ stats.sidecars_removed++;
1259
+ });
1260
+ }
1261
+ }
1262
+ }
1263
+ }
1264
+
1265
+ // Sweep up orphaned sidecars (sidecar exists but no content file
1266
+ // matched OR sidecar's id isn't in validNoteIds).
1267
+ for (const [id, sidecarPath] of sidecarFilesById.entries()) {
1268
+ if (opts.validNoteIds.has(id) && pairedSidecarIds.has(id)) continue;
1269
+ if (opts.validNoteIds.has(id) && !pairedSidecarIds.has(id)) {
1270
+ // Sidecar refers to a valid note but the content file is gone —
1271
+ // that's an inconsistency, not an orphan. Leave the sidecar so
1272
+ // the next export can rewrite the content file alongside it.
1273
+ continue;
1274
+ }
1275
+ safeRm(sidecarPath, () => {
1276
+ stats.sidecars_removed++;
1277
+ });
1278
+ }
1279
+
1280
+ // ---- 3. Schema sidecars ----
1281
+ if (existsSync(schemasRoot)) {
1282
+ try {
1283
+ for (const entry of readdirSync(schemasRoot)) {
1284
+ if (!entry.endsWith(".yaml")) continue;
1285
+ const full = join(schemasRoot, entry);
1286
+ try {
1287
+ const text = readFileSync(full, "utf-8");
1288
+ const { frontmatter } = parseFrontmatter(`---\n${text}---\n`);
1289
+ const name = typeof frontmatter.name === "string" ? (frontmatter.name as string) : null;
1290
+ if (!name) {
1291
+ // Fall back to filename — sanitizeTagFilename replaces `/`
1292
+ // with `__`, so reverse for the lookup.
1293
+ const fromFilename = entry.slice(0, -5).replace(/__/g, "/");
1294
+ if (!opts.validTagNames.has(fromFilename)) {
1295
+ safeRm(full, () => {
1296
+ stats.schemas_removed++;
1297
+ });
1298
+ }
1299
+ continue;
1300
+ }
1301
+ if (!opts.validTagNames.has(name)) {
1302
+ safeRm(full, () => {
1303
+ stats.schemas_removed++;
1304
+ });
1305
+ }
1306
+ } catch (err) {
1307
+ stats.unparseable_files.push({
1308
+ path: full,
1309
+ reason: `schema sweep failed: ${(err as Error).message ?? err}`,
1310
+ });
1311
+ }
1312
+ }
1313
+ } catch (err) {
1314
+ stats.unparseable_files.push({
1315
+ path: schemasRoot,
1316
+ reason: `schemas walk failed: ${(err as Error).message ?? err}`,
1317
+ });
1318
+ }
1319
+ }
1320
+
1321
+ // ---- 4. Attachment directories ----
1322
+ if (existsSync(attachmentsRoot)) {
1323
+ try {
1324
+ for (const entry of readdirSync(attachmentsRoot)) {
1325
+ const full = join(attachmentsRoot, entry);
1326
+ let stat;
1327
+ try {
1328
+ stat = statSync(full);
1329
+ } catch (err) {
1330
+ stats.unparseable_files.push({
1331
+ path: full,
1332
+ reason: `stat failed: ${(err as Error).message ?? err}`,
1333
+ });
1334
+ continue;
1335
+ }
1336
+ if (!stat.isDirectory()) continue;
1337
+ // The directory name IS the attachment id (per the export layout).
1338
+ if (!opts.validAttachmentIds.has(entry)) {
1339
+ safeRm(
1340
+ full,
1341
+ () => {
1342
+ stats.attachment_dirs_removed++;
1343
+ },
1344
+ { recursive: true },
1345
+ );
1346
+ }
1347
+ }
1348
+ } catch (err) {
1349
+ stats.unparseable_files.push({
1350
+ path: attachmentsRoot,
1351
+ reason: `attachments walk failed: ${(err as Error).message ?? err}`,
1352
+ });
1353
+ }
1354
+ }
1355
+
1356
+ return stats;
1357
+ }
1358
+
1359
+ /**
1360
+ * True iff the tag carries content the export writer will emit as a
1361
+ * schema sidecar (`.parachute/schemas/<tag>.yaml`). Bare-name tags
1362
+ * (the `tags` table has a row but description/fields/relationships/
1363
+ * parents are all empty) get no sidecar — and crucially, after
1364
+ * `deleteTagSchema` clears those fields the row persists with the
1365
+ * bare name. Callers building the `validTagNames` set for
1366
+ * `pruneOrphans` MUST filter through this predicate, otherwise the
1367
+ * stale sidecar lingers indefinitely.
1368
+ *
1369
+ * Reviewer-flagged on vault#382: without this filter, a cleared
1370
+ * schema's sidecar never gets pruned.
1371
+ */
1372
+ export function hasSchemaContent(tag: TagRecord): boolean {
1005
1373
  if (tag.description !== undefined && tag.description.length > 0) return true;
1006
1374
  if (tag.fields && Object.keys(tag.fields).length > 0) return true;
1007
1375
  if (tag.relationships && Object.keys(tag.relationships).length > 0) return true;