@openparachute/vault 0.4.9-rc.4 → 0.4.9-rc.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/core/src/hooks.ts CHANGED
@@ -47,7 +47,34 @@
47
47
 
48
48
  import type { Note, Store, Attachment } from "./types.js";
49
49
 
50
- export type HookEvent = "created" | "updated";
50
+ /**
51
+ * Note-mutation events. `"created"` and `"updated"` carry the full post-write
52
+ * `Note`; `"deleted"` carries only `{ id, path }` — the row is gone by
53
+ * dispatch time, so handlers can't re-read it. Down-stream consumers (e.g.
54
+ * the git-mirror) match by id/path and react (remove file, etc.). Predicate
55
+ * authors writing a `when(note)` for `"deleted"` should rely on `note.id` /
56
+ * `note.path` only; everything else is undefined for the deleted shape.
57
+ */
58
+ export type HookEvent = "created" | "updated" | "deleted";
59
+
60
+ /**
61
+ * Minimal identity payload for a deleted note. The row is gone by dispatch
62
+ * time, so handlers can't go back to the store for the rest. Path is
63
+ * optional because notes without a path are legal (e.g. fragments captured
64
+ * via API without a target slot).
65
+ */
66
+ export interface DeletedNoteRef {
67
+ id: string;
68
+ path?: string;
69
+ }
70
+
71
+ /**
72
+ * What a hook handler receives. For `"created"` / `"updated"` it's the full
73
+ * `Note` row; for `"deleted"` only the id/path remain. This union keeps the
74
+ * common predicates (path-prefix gates, tag checks for non-deleted shapes)
75
+ * working unchanged while making the "deleted has less" reality type-safe.
76
+ */
77
+ export type NoteHookPayload = Note | DeletedNoteRef;
51
78
 
52
79
  export interface NoteHook {
53
80
  /** Events this hook listens for. Defaults to ["created", "updated"]. */
@@ -57,10 +84,26 @@ export interface NoteHook {
57
84
  * Should be cheap and synchronous. Idempotency lives here: check
58
85
  * whether a marker (e.g. `metadata.audio_rendered_at`) is already set
59
86
  * and return false if so.
87
+ *
88
+ * For `"deleted"` events the payload is a `DeletedNoteRef` — only
89
+ * `id` / `path` are populated; tag/metadata/content fields are
90
+ * undefined. Predicates that read those fields effectively skip
91
+ * the deleted shape unless they handle the narrower payload.
92
+ */
93
+ when?: (note: NoteHookPayload) => boolean;
94
+ /**
95
+ * Handler — runs async, off the request path. Third arg is the event
96
+ * type. For `"deleted"` the payload is a `DeletedNoteRef`, not a full
97
+ * `Note` — the row is gone by dispatch time, so the store can't be
98
+ * queried for it. Handlers needing more context should stash it ahead
99
+ * of time on the note's `metadata` and read off the predicate / the
100
+ * tracking shape rather than re-querying.
60
101
  */
61
- when?: (note: Note) => boolean;
62
- /** Handler — runs async, off the request path. Third arg is the event type. */
63
- handler: (note: Note, store: Store, event?: HookEvent) => Promise<void> | void;
102
+ handler: (
103
+ note: NoteHookPayload,
104
+ store: Store,
105
+ event?: HookEvent,
106
+ ) => Promise<void> | void;
64
107
  /** Optional label for logs. */
65
108
  name?: string;
66
109
  }
@@ -70,22 +113,46 @@ interface RegisteredHook extends NoteHook {
70
113
  }
71
114
 
72
115
  /**
73
- * Attachment-mutation events. Today only `"created"` is dispatched the
74
- * transcription worker (and any future attachment-aware feature) registers
75
- * here to move off its poll-driven steady state and onto the same event bus
76
- * that note hooks use. Keeping attachments separate from notes means a
77
- * `NoteHook` predicate doesn't have to learn a second argument shape.
116
+ * Attachment-mutation events. `"created"` carries the full attachment;
117
+ * `"deleted"` carries only `{ id, note_id, path }` (the row is gone by
118
+ * dispatch time, same shape rule as deleted notes).
119
+ *
120
+ * Consumers (transcription worker, git-mirror) subscribe here to react to
121
+ * lifecycle changes without polling. Keeping attachments separate from
122
+ * notes means a `NoteHook` predicate doesn't have to learn a second
123
+ * argument shape.
124
+ */
125
+ export type AttachmentHookEvent = "created" | "deleted";
126
+
127
+ /**
128
+ * Identity payload for a deleted attachment. The DB row is gone by
129
+ * dispatch time, so handlers can only react to id/note_id/path; metadata
130
+ * and timestamps are not preserved.
78
131
  */
79
- export type AttachmentHookEvent = "created";
132
+ export interface DeletedAttachmentRef {
133
+ id: string;
134
+ noteId: string;
135
+ path: string;
136
+ }
137
+
138
+ /** Union over what an attachment hook handler may receive. */
139
+ export type AttachmentHookPayload = Attachment | DeletedAttachmentRef;
80
140
 
81
141
  export interface AttachmentHook {
82
142
  /** Events this hook listens for. Defaults to ["created"]. */
83
143
  event?: AttachmentHookEvent | AttachmentHookEvent[];
84
- /** Sync predicate. Same idempotency contract as `NoteHook.when`. */
85
- when?: (attachment: Attachment) => boolean;
86
- /** Handler runs async, off the request path. */
144
+ /**
145
+ * Sync predicate. Same idempotency contract as `NoteHook.when`. For
146
+ * `"deleted"` the payload is a `DeletedAttachmentRef` predicates
147
+ * relying on `metadata` / `createdAt` will see `undefined`.
148
+ */
149
+ when?: (attachment: AttachmentHookPayload) => boolean;
150
+ /**
151
+ * Handler — runs async, off the request path. For `"deleted"` the
152
+ * payload is a `DeletedAttachmentRef`, not a full `Attachment`.
153
+ */
87
154
  handler: (
88
- attachment: Attachment,
155
+ attachment: AttachmentHookPayload,
89
156
  store: Store,
90
157
  event?: AttachmentHookEvent,
91
158
  ) => Promise<void> | void;
@@ -97,6 +164,37 @@ interface RegisteredAttachmentHook extends AttachmentHook {
97
164
  events: Set<AttachmentHookEvent>;
98
165
  }
99
166
 
167
+ /**
168
+ * Tag-mutation events. `"upserted"` fires on tag-record create/update
169
+ * (description, fields, relationships, parent_names — any mutation that
170
+ * could change the schema sidecar that the git-mirror writes). `"deleted"`
171
+ * fires when the tag row is removed.
172
+ *
173
+ * Why this exists: the export sidecars at `.parachute/schemas/<tag>.yaml`
174
+ * are part of the mirror's output. Without a tag-mutation event the mirror
175
+ * has no signal that those files might need to change.
176
+ */
177
+ export type TagHookEvent = "upserted" | "deleted";
178
+
179
+ export interface TagHook {
180
+ /** Events this hook listens for. Defaults to ["upserted", "deleted"]. */
181
+ event?: TagHookEvent | TagHookEvent[];
182
+ /** Sync predicate keyed on the tag name. */
183
+ when?: (tag: string) => boolean;
184
+ /** Handler — runs async, off the request path. */
185
+ handler: (
186
+ tag: string,
187
+ store: Store,
188
+ event?: TagHookEvent,
189
+ ) => Promise<void> | void;
190
+ /** Optional label for logs. */
191
+ name?: string;
192
+ }
193
+
194
+ interface RegisteredTagHook extends TagHook {
195
+ events: Set<TagHookEvent>;
196
+ }
197
+
100
198
  /**
101
199
  * Tiny async semaphore — FIFO waiters, no dependencies.
102
200
  * Used to cap concurrent handler execution across all hooks.
@@ -139,6 +237,7 @@ export interface HookRegistryOptions {
139
237
  export class HookRegistry {
140
238
  private hooks: RegisteredHook[] = [];
141
239
  private attachmentHooks: RegisteredAttachmentHook[] = [];
240
+ private tagHooks: RegisteredTagHook[] = [];
142
241
  private semaphore: Semaphore;
143
242
  private inFlight = new Set<Promise<void>>();
144
243
  private logger: { error: (...args: unknown[]) => void };
@@ -150,7 +249,15 @@ export class HookRegistry {
150
249
  this.logger = opts.logger ?? console;
151
250
  }
152
251
 
153
- /** Register a hook. Returns an unregister function. */
252
+ /**
253
+ * Register a note-mutation hook. Returns an unregister function.
254
+ *
255
+ * The default event set is `["created", "updated"]` — explicit
256
+ * `event: "deleted"` (or include it in the array) is required to
257
+ * subscribe to deletions. This keeps existing hooks that pre-date the
258
+ * `"deleted"` event from suddenly receiving a payload shape
259
+ * (`DeletedNoteRef`) they weren't typed for.
260
+ */
154
261
  onNote(hook: NoteHook): () => void {
155
262
  const events = new Set<HookEvent>(
156
263
  Array.isArray(hook.event)
@@ -167,7 +274,13 @@ export class HookRegistry {
167
274
  };
168
275
  }
169
276
 
170
- /** Register an attachment-mutation hook. Returns an unregister function. */
277
+ /**
278
+ * Register an attachment-mutation hook. Returns an unregister function.
279
+ *
280
+ * The default event set is `["created"]` only (matches the historical
281
+ * shape pre-deletion-events). Subscribe to `"deleted"` explicitly when
282
+ * the handler needs to react to attachment removal.
283
+ */
171
284
  onAttachment(hook: AttachmentHook): () => void {
172
285
  const events = new Set<AttachmentHookEvent>(
173
286
  Array.isArray(hook.event)
@@ -184,15 +297,38 @@ export class HookRegistry {
184
297
  };
185
298
  }
186
299
 
300
+ /**
301
+ * Register a tag-mutation hook. Returns an unregister function. Default
302
+ * event set is both `"upserted"` and `"deleted"` — symmetric with the
303
+ * sole consumer's needs (the git-mirror reacts to either to refresh its
304
+ * schema sidecars).
305
+ */
306
+ onTag(hook: TagHook): () => void {
307
+ const events = new Set<TagHookEvent>(
308
+ Array.isArray(hook.event)
309
+ ? hook.event
310
+ : hook.event
311
+ ? [hook.event]
312
+ : (["upserted", "deleted"] as TagHookEvent[]),
313
+ );
314
+ const entry: RegisteredTagHook = { ...hook, events };
315
+ this.tagHooks.push(entry);
316
+ return () => {
317
+ const idx = this.tagHooks.indexOf(entry);
318
+ if (idx >= 0) this.tagHooks.splice(idx, 1);
319
+ };
320
+ }
321
+
187
322
  /** Remove all registered hooks. Mostly for tests. */
188
323
  clear(): void {
189
324
  this.hooks = [];
190
325
  this.attachmentHooks = [];
326
+ this.tagHooks = [];
191
327
  }
192
328
 
193
- /** Count of currently registered hooks (notes + attachments). */
329
+ /** Count of currently registered hooks (notes + attachments + tags). */
194
330
  get size(): number {
195
- return this.hooks.length + this.attachmentHooks.length;
331
+ return this.hooks.length + this.attachmentHooks.length + this.tagHooks.length;
196
332
  }
197
333
 
198
334
  /** Count of currently in-flight handler executions. */
@@ -201,13 +337,19 @@ export class HookRegistry {
201
337
  }
202
338
 
203
339
  /**
204
- * Dispatch a mutation event. Matches hooks, schedules their handlers
205
- * onto a microtask, and returns immediately. The caller is never
206
- * blocked on handler execution.
340
+ * Dispatch a note-mutation event. Matches hooks, schedules their
341
+ * handlers onto a microtask, and returns immediately. The caller is
342
+ * never blocked on handler execution.
207
343
  *
208
344
  * Must only be called after the triggering SQLite write has committed.
345
+ *
346
+ * For `"deleted"` the `note` argument is a `DeletedNoteRef` ({ id,
347
+ * path }) — the row is gone, so the runtime can't re-read it before
348
+ * dispatching to handlers. For `"created"` / `"updated"` the full
349
+ * `Note` is expected; `runHandler` will re-read from the store for
350
+ * non-deleted events to pick up the latest committed state.
209
351
  */
210
- dispatch(event: HookEvent, note: Note, store: Store): void {
352
+ dispatch(event: HookEvent, note: NoteHookPayload, store: Store): void {
211
353
  if (this.hooks.length === 0) return;
212
354
 
213
355
  // Snapshot matches synchronously so subsequent hook registration
@@ -241,13 +383,12 @@ export class HookRegistry {
241
383
 
242
384
  /**
243
385
  * Dispatch an attachment-mutation event. Same post-commit/microtask
244
- * contract as `dispatch()` for notes callers are never blocked on
245
- * handler execution, and the triggering SQLite write must already be
246
- * committed.
386
+ * contract as `dispatch()` for notes. For `"deleted"` the payload is
387
+ * a `DeletedAttachmentRef`; for `"created"` it's the full `Attachment`.
247
388
  */
248
389
  dispatchAttachment(
249
390
  event: AttachmentHookEvent,
250
- attachment: Attachment,
391
+ attachment: AttachmentHookPayload,
251
392
  store: Store,
252
393
  ): void {
253
394
  if (this.attachmentHooks.length === 0) return;
@@ -277,19 +418,61 @@ export class HookRegistry {
277
418
  });
278
419
  }
279
420
 
421
+ /**
422
+ * Dispatch a tag-mutation event. Same post-commit / microtask contract
423
+ * as the note + attachment dispatchers. `tag` is the bare tag name; the
424
+ * git-mirror handler matches on it to identify which `.parachute/schemas/<tag>.yaml`
425
+ * sidecar to rewrite or remove.
426
+ */
427
+ dispatchTag(event: TagHookEvent, tag: string, store: Store): void {
428
+ if (this.tagHooks.length === 0) return;
429
+
430
+ const matches: RegisteredTagHook[] = [];
431
+ for (const hook of this.tagHooks) {
432
+ if (!hook.events.has(event)) continue;
433
+ try {
434
+ if (hook.when && !hook.when(tag)) continue;
435
+ } catch (err) {
436
+ this.logger.error(
437
+ `[hooks] predicate threw for ${hook.name ?? "anonymous"} on tag ${tag}:`,
438
+ err,
439
+ );
440
+ continue;
441
+ }
442
+ matches.push(hook);
443
+ }
444
+ if (matches.length === 0) return;
445
+
446
+ queueMicrotask(() => {
447
+ for (const hook of matches) {
448
+ const task = this.runTagHandler(hook, event, tag, store);
449
+ this.inFlight.add(task);
450
+ task.finally(() => this.inFlight.delete(task));
451
+ }
452
+ });
453
+ }
454
+
280
455
  private async runHandler(
281
456
  hook: RegisteredHook,
282
457
  event: HookEvent,
283
- note: Note,
458
+ note: NoteHookPayload,
284
459
  store: Store,
285
460
  ): Promise<void> {
286
461
  const release = await this.semaphore.acquire();
287
462
  try {
288
- // Re-read the note so the handler sees the latest state (another
289
- // handler may have written back in between). If the note was
290
- // deleted, silently drop.
291
- const fresh = (await store.getNote(note.id)) ?? note;
292
- await hook.handler(fresh, store, event);
463
+ // For non-deleted events, re-read the note so the handler sees the
464
+ // latest committed state (another handler may have written back
465
+ // between dispatch and this acquisition). For "deleted" events the
466
+ // row is gone pass the DeletedNoteRef payload straight through.
467
+ let payload: NoteHookPayload = note;
468
+ if (event !== "deleted") {
469
+ const fresh = await store.getNote(note.id);
470
+ if (fresh) payload = fresh;
471
+ // If the note was deleted between dispatch and re-read, fall
472
+ // back to the dispatch-time payload — same shape as before; the
473
+ // handler can sense the disappearance via its own predicate.
474
+ }
475
+ await hook.handler(payload, store, event);
293
476
  } catch (err) {
294
477
  this.logger.error(
295
478
  `[hooks] handler ${hook.name ?? "anonymous"} threw on ${event} ${note.id}:`,
@@ -303,16 +486,19 @@ export class HookRegistry {
303
486
  private async runAttachmentHandler(
304
487
  hook: RegisteredAttachmentHook,
305
488
  event: AttachmentHookEvent,
306
- attachment: Attachment,
489
+ attachment: AttachmentHookPayload,
307
490
  store: Store,
308
491
  ): Promise<void> {
309
492
  const release = await this.semaphore.acquire();
310
493
  try {
311
- // Re-read the attachment so the handler sees the latest metadata
312
- // (another handler may have written back in between). If the
313
- // attachment was deleted, silently drop.
314
- const fresh = (await store.getAttachment(attachment.id)) ?? attachment;
315
- await hook.handler(fresh, store, event);
494
+ // Symmetric with runHandler: re-read for non-deleted events,
495
+ // pass straight through on delete.
496
+ let payload: AttachmentHookPayload = attachment;
497
+ if (event !== "deleted") {
498
+ const fresh = await store.getAttachment(attachment.id);
499
+ if (fresh) payload = fresh;
500
+ }
501
+ await hook.handler(payload, store, event);
316
502
  } catch (err) {
317
503
  this.logger.error(
318
504
  `[hooks] attachment handler ${hook.name ?? "anonymous"} threw on ${event} ${attachment.id}:`,
@@ -323,6 +509,25 @@ export class HookRegistry {
323
509
  }
324
510
  }
325
511
 
512
+ private async runTagHandler(
513
+ hook: RegisteredTagHook,
514
+ event: TagHookEvent,
515
+ tag: string,
516
+ store: Store,
517
+ ): Promise<void> {
518
+ const release = await this.semaphore.acquire();
519
+ try {
520
+ await hook.handler(tag, store, event);
521
+ } catch (err) {
522
+ this.logger.error(
523
+ `[hooks] tag handler ${hook.name ?? "anonymous"} threw on ${event} ${tag}:`,
524
+ err,
525
+ );
526
+ } finally {
527
+ release();
528
+ }
529
+ }
530
+
326
531
  /**
327
532
  * Wait for all currently in-flight handlers to settle. Best-effort
328
533
  * drain for graceful shutdown. New hooks dispatched during the drain
@@ -19,7 +19,7 @@
19
19
 
20
20
  import { describe, it, expect, beforeEach } from "bun:test";
21
21
  import { Database } from "bun:sqlite";
22
- import { mkdirSync, readFileSync, readdirSync, rmSync, writeFileSync, existsSync, statSync } from "fs";
22
+ import { mkdirSync, readFileSync, readdirSync, rmSync, symlinkSync, writeFileSync, existsSync, statSync } from "fs";
23
23
  import { join } from "path";
24
24
  import { tmpdir } from "os";
25
25
 
@@ -33,6 +33,7 @@ import {
33
33
  parseFrontmatter,
34
34
  portableExportFilePath,
35
35
  probeCaseSensitive,
36
+ pruneOrphans,
36
37
  SIDECAR_DIR,
37
38
  NOTES_META_DIR,
38
39
  supportsInlineFrontmatter,
@@ -1799,3 +1800,253 @@ describe("case-collision detection (vault#327)", async () => {
1799
1800
  expect(stats.disambiguated_paths).toHaveLength(1);
1800
1801
  });
1801
1802
  });
1803
+
1804
+ // ---------------------------------------------------------------------------
1805
+ // pruneOrphans (vault#382 — event-driven mirror delete propagation)
1806
+ // ---------------------------------------------------------------------------
1807
+
1808
+ describe("pruneOrphans", async () => {
1809
+ let tmpBase: string;
1810
+ beforeEach(() => {
1811
+ tmpBase = join(tmpdir(), `parachute-prune-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`);
1812
+ mkdirSync(tmpBase, { recursive: true });
1813
+ });
1814
+
1815
+ it("no-op on non-existent directory", () => {
1816
+ const stats = pruneOrphans({
1817
+ outDir: join(tmpBase, "doesnt-exist"),
1818
+ validNoteIds: new Set(),
1819
+ validTagNames: new Set(),
1820
+ validAttachmentIds: new Set(),
1821
+ });
1822
+ expect(stats.notes_removed).toBe(0);
1823
+ expect(stats.unparseable_files).toHaveLength(0);
1824
+ });
1825
+
1826
+ it("removes orphaned note .md file", async () => {
1827
+ const outDir = join(tmpBase, "orphan-note");
1828
+ // First do a real export so the structure is realistic.
1829
+ const db = new Database(":memory:");
1830
+ const store = new SqliteStore(db);
1831
+ await store.createNote("alive", { id: "01HFAA", path: "alive" });
1832
+ await store.createNote("doomed", { id: "01HFBB", path: "doomed" });
1833
+ await exportVaultToDir(store, { outDir, vaultName: "t", exportedAt: "2026-01-01T00:00:00.000Z" });
1834
+ expect(existsSync(join(outDir, "alive.md"))).toBe(true);
1835
+ expect(existsSync(join(outDir, "doomed.md"))).toBe(true);
1836
+
1837
+ // Now prune with only "alive" in the valid set.
1838
+ const stats = pruneOrphans({
1839
+ outDir,
1840
+ validNoteIds: new Set(["01HFAA"]),
1841
+ validTagNames: new Set(),
1842
+ validAttachmentIds: new Set(),
1843
+ });
1844
+ expect(stats.notes_removed).toBe(1);
1845
+ expect(existsSync(join(outDir, "alive.md"))).toBe(true);
1846
+ expect(existsSync(join(outDir, "doomed.md"))).toBe(false);
1847
+ });
1848
+
1849
+ it("removes orphaned schema sidecar", async () => {
1850
+ const outDir = join(tmpBase, "orphan-schema");
1851
+ const db = new Database(":memory:");
1852
+ const store = new SqliteStore(db);
1853
+ await store.upsertTagRecord("alive-tag", { description: "stays" });
1854
+ await store.upsertTagRecord("doomed-tag", { description: "goes" });
1855
+ await exportVaultToDir(store, { outDir, vaultName: "t", exportedAt: "2026-01-01T00:00:00.000Z" });
1856
+ const schemasDir = join(outDir, SIDECAR_DIR, "schemas");
1857
+ expect(existsSync(join(schemasDir, "alive-tag.yaml"))).toBe(true);
1858
+ expect(existsSync(join(schemasDir, "doomed-tag.yaml"))).toBe(true);
1859
+
1860
+ const stats = pruneOrphans({
1861
+ outDir,
1862
+ validNoteIds: new Set(),
1863
+ validTagNames: new Set(["alive-tag"]),
1864
+ validAttachmentIds: new Set(),
1865
+ });
1866
+ expect(stats.schemas_removed).toBe(1);
1867
+ expect(existsSync(join(schemasDir, "alive-tag.yaml"))).toBe(true);
1868
+ expect(existsSync(join(schemasDir, "doomed-tag.yaml"))).toBe(false);
1869
+ });
1870
+
1871
+ it("removes orphaned attachment directories", async () => {
1872
+ const outDir = join(tmpBase, "orphan-att");
1873
+ // Build the export structure by hand (attachment binaries need
1874
+ // assetsDir wiring; cheaper to just create the dirs).
1875
+ const attachmentsDir = join(outDir, SIDECAR_DIR, "attachments");
1876
+ mkdirSync(attachmentsDir, { recursive: true });
1877
+ mkdirSync(join(attachmentsDir, "att-alive"), { recursive: true });
1878
+ writeFileSync(join(attachmentsDir, "att-alive", "voice.m4a"), "");
1879
+ mkdirSync(join(attachmentsDir, "att-doomed"), { recursive: true });
1880
+ writeFileSync(join(attachmentsDir, "att-doomed", "voice.m4a"), "");
1881
+ // Need .parachute/vault.yaml so the structure is recognized (cheap to fake)
1882
+ writeFileSync(join(outDir, SIDECAR_DIR, "vault.yaml"), "name: t\n");
1883
+
1884
+ const stats = pruneOrphans({
1885
+ outDir,
1886
+ validNoteIds: new Set(),
1887
+ validTagNames: new Set(),
1888
+ validAttachmentIds: new Set(["att-alive"]),
1889
+ });
1890
+ expect(stats.attachment_dirs_removed).toBe(1);
1891
+ expect(existsSync(join(attachmentsDir, "att-alive"))).toBe(true);
1892
+ expect(existsSync(join(attachmentsDir, "att-doomed"))).toBe(false);
1893
+ });
1894
+
1895
+ it("skips unparseable .md files without crashing", async () => {
1896
+ const outDir = join(tmpBase, "unparseable");
1897
+ mkdirSync(outDir, { recursive: true });
1898
+ writeFileSync(join(outDir, "no-frontmatter.md"), "just content, no frontmatter\n");
1899
+ writeFileSync(join(outDir, "garbage.md"), "---\nnot-real-yaml\n");
1900
+ const stats = pruneOrphans({
1901
+ outDir,
1902
+ validNoteIds: new Set(),
1903
+ validTagNames: new Set(),
1904
+ validAttachmentIds: new Set(),
1905
+ });
1906
+ // Both files lacked an `id`, so we record them but don't remove.
1907
+ expect(stats.notes_removed).toBe(0);
1908
+ expect(stats.unparseable_files.length).toBeGreaterThanOrEqual(2);
1909
+ expect(existsSync(join(outDir, "no-frontmatter.md"))).toBe(true);
1910
+ expect(existsSync(join(outDir, "garbage.md"))).toBe(true);
1911
+ });
1912
+
1913
+ it("preserves all files when everything is in the valid sets", async () => {
1914
+ const outDir = join(tmpBase, "happy-path");
1915
+ const db = new Database(":memory:");
1916
+ const store = new SqliteStore(db);
1917
+ const a = await store.createNote("a", { path: "a" });
1918
+ const b = await store.createNote("b", { path: "b" });
1919
+ await store.upsertTagRecord("tag1", { description: "x" });
1920
+ await exportVaultToDir(store, { outDir, vaultName: "t", exportedAt: "2026-01-01T00:00:00.000Z" });
1921
+ const stats = pruneOrphans({
1922
+ outDir,
1923
+ validNoteIds: new Set([a.id, b.id]),
1924
+ validTagNames: new Set(["tag1"]),
1925
+ validAttachmentIds: new Set(),
1926
+ });
1927
+ expect(stats.notes_removed).toBe(0);
1928
+ expect(stats.schemas_removed).toBe(0);
1929
+ expect(stats.attachment_dirs_removed).toBe(0);
1930
+ expect(existsSync(join(outDir, "a.md"))).toBe(true);
1931
+ expect(existsSync(join(outDir, "b.md"))).toBe(true);
1932
+ });
1933
+
1934
+ it("removes orphan note + corresponding notes-meta sidecar for csv/yaml notes", async () => {
1935
+ // For non-frontmatter extensions, the sidecar lives at
1936
+ // .parachute/notes-meta/<id>.yaml. Pruning the note should remove
1937
+ // both files.
1938
+ const outDir = join(tmpBase, "orphan-csv");
1939
+ const db = new Database(":memory:");
1940
+ const store = new SqliteStore(db);
1941
+ await store.createNote("col1,col2\n1,2\n", {
1942
+ id: "01CSV-DEL",
1943
+ path: "data/table",
1944
+ extension: "csv",
1945
+ });
1946
+ await exportVaultToDir(store, { outDir, vaultName: "t", exportedAt: "2026-01-01T00:00:00.000Z" });
1947
+ const contentFile = join(outDir, "data", "table.csv");
1948
+ const sidecarFile = join(outDir, SIDECAR_DIR, "notes-meta", "01CSV-DEL.yaml");
1949
+ expect(existsSync(contentFile)).toBe(true);
1950
+ expect(existsSync(sidecarFile)).toBe(true);
1951
+
1952
+ const stats = pruneOrphans({
1953
+ outDir,
1954
+ validNoteIds: new Set(), // doom it
1955
+ validTagNames: new Set(),
1956
+ validAttachmentIds: new Set(),
1957
+ });
1958
+ expect(stats.notes_removed).toBe(1);
1959
+ expect(stats.sidecars_removed).toBeGreaterThanOrEqual(1);
1960
+ expect(existsSync(contentFile)).toBe(false);
1961
+ expect(existsSync(sidecarFile)).toBe(false);
1962
+ });
1963
+
1964
+ // Reviewer-flagged regression on vault#382 Critical #2 — pruneOrphans
1965
+ // walks via statSync (follows symlinks); without the safeRm guard a
1966
+ // symlink inside the mirror pointing OUTSIDE outDir would resurface
1967
+ // its target's files as orphans and rmSync would happily delete them
1968
+ // off-tree. The guard resolves each candidate and refuses anything
1969
+ // not under outDir; refusals get recorded in `unparseable_files` so
1970
+ // an operator can see what was skipped.
1971
+ it("refuses to delete files reached via a symlink pointing outside outDir", async () => {
1972
+ const outDir = join(tmpBase, "symlink-attack");
1973
+ const outside = join(tmpBase, "outside");
1974
+ mkdirSync(outside, { recursive: true });
1975
+ mkdirSync(outDir, { recursive: true });
1976
+ // A real, sensitive file in `outside/` we don't want pruneOrphans
1977
+ // to touch under any circumstance.
1978
+ const externalFile = join(outside, "do-not-touch.md");
1979
+ writeFileSync(externalFile, "---\nid: 01EXTERNAL\n---\nimportant\n");
1980
+ // A symlink inside outDir pointing at outside/ — walkContentFiles
1981
+ // would normally surface outside/do-not-touch.md as a candidate.
1982
+ try {
1983
+ symlinkSync(outside, join(outDir, "via-link"));
1984
+ } catch {
1985
+ // Some CI sandboxes refuse symlink creation. Skip the test in
1986
+ // that case rather than fail spuriously.
1987
+ return;
1988
+ }
1989
+
1990
+ const stats = pruneOrphans({
1991
+ outDir,
1992
+ validNoteIds: new Set(), // doom every id we see
1993
+ validTagNames: new Set(),
1994
+ validAttachmentIds: new Set(),
1995
+ });
1996
+
1997
+ // Critical assertion: the external file MUST survive.
1998
+ expect(existsSync(externalFile)).toBe(true);
1999
+ // And the refusal MUST be recorded so the operator sees it.
2000
+ expect(
2001
+ stats.unparseable_files.some(
2002
+ (u) => u.path.includes("via-link") || u.reason.includes("outside"),
2003
+ ),
2004
+ ).toBe(true);
2005
+ });
2006
+
2007
+ // Reviewer-flagged regression on vault#382 Critical #1 — pruneOrphans
2008
+ // builds `validTagNames` from ALL tag-table rows in mirror-deps.ts.
2009
+ // After `deleteTagSchema(t)` the schema fields are cleared but the
2010
+ // tag row persists with the bare name, so the sidecar lingers
2011
+ // forever. The fix routes validTagNames through `hasSchemaContent`
2012
+ // before passing into pruneOrphans, and exports the predicate so
2013
+ // mirror-deps can reuse the single source of truth.
2014
+ it("considers a schema-content-free tag the same as a deleted tag for sidecar pruning", async () => {
2015
+ const outDir = join(tmpBase, "stale-schema");
2016
+ const db = new Database(":memory:");
2017
+ const store = new SqliteStore(db);
2018
+ await store.upsertTagRecord("bare", {}); // bare-name only
2019
+ await store.upsertTagRecord("with-schema", { description: "real" });
2020
+ await exportVaultToDir(store, {
2021
+ outDir,
2022
+ vaultName: "t",
2023
+ exportedAt: "2026-01-01T00:00:00.000Z",
2024
+ });
2025
+ const schemasDir = join(outDir, SIDECAR_DIR, "schemas");
2026
+ // The bare tag SHOULDN'T have a sidecar; the schema-bearing one
2027
+ // SHOULD. This confirms the export-writer's contract before the
2028
+ // prune step.
2029
+ expect(existsSync(join(schemasDir, "bare.yaml"))).toBe(false);
2030
+ expect(existsSync(join(schemasDir, "with-schema.yaml"))).toBe(true);
2031
+
2032
+ // Now seed a stale sidecar for `bare` (simulating "the operator
2033
+ // previously had a schema for `bare`, then cleared it via
2034
+ // `deleteTagSchema`"). pruneOrphans should remove this iff the
2035
+ // caller correctly filtered validTagNames by hasSchemaContent.
2036
+ writeFileSync(join(schemasDir, "bare.yaml"), 'name: "bare"\ndescription: "stale"\n');
2037
+ expect(existsSync(join(schemasDir, "bare.yaml"))).toBe(true);
2038
+
2039
+ // Filtered set — only `with-schema` has hasSchemaContent === true.
2040
+ const validTagNames = new Set(["with-schema"]);
2041
+ const stats = pruneOrphans({
2042
+ outDir,
2043
+ validNoteIds: new Set(),
2044
+ validTagNames,
2045
+ validAttachmentIds: new Set(),
2046
+ });
2047
+
2048
+ expect(stats.schemas_removed).toBe(1);
2049
+ expect(existsSync(join(schemasDir, "bare.yaml"))).toBe(false);
2050
+ expect(existsSync(join(schemasDir, "with-schema.yaml"))).toBe(true);
2051
+ });
2052
+ });