@openparachute/vault 0.5.1 → 0.5.2-rc.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/core/src/core.test.ts +183 -26
- package/core/src/expand-visibility.test.ts +102 -0
- package/core/src/expand.ts +31 -3
- package/core/src/link-count.test.ts +301 -0
- package/core/src/links.ts +77 -0
- package/core/src/mcp.ts +130 -22
- package/core/src/notes.ts +36 -0
- package/core/src/portable-md.test.ts +40 -0
- package/core/src/schema.ts +7 -4
- package/core/src/store.ts +1 -1
- package/core/src/tag-schemas.ts +59 -44
- package/core/src/types.ts +31 -3
- package/package.json +1 -1
- package/src/auth.test.ts +37 -1
- package/src/auth.ts +29 -0
- package/src/cli.ts +286 -68
- package/src/config.test.ts +16 -0
- package/src/config.ts +39 -0
- package/src/init-summary.test.ts +77 -5
- package/src/init-summary.ts +37 -19
- package/src/mcp-tools.ts +60 -6
- package/src/routes.ts +486 -53
- package/src/routing.test.ts +185 -0
- package/src/routing.ts +32 -2
- package/src/server.ts +7 -0
- package/src/storage.test.ts +162 -0
- package/src/tag-scope.ts +68 -1
- package/src/transcription-worker.test.ts +471 -5
- package/src/transcription-worker.ts +212 -44
- package/src/usage.test.ts +362 -0
- package/src/usage.ts +318 -0
- package/src/vault-create.test.ts +298 -11
- package/src/vault.test.ts +1064 -7
|
@@ -25,8 +25,11 @@
|
|
|
25
25
|
* (Whisper API shape). Response is `{ text: string }`.
|
|
26
26
|
* 3. On success:
|
|
27
27
|
* - If `note.metadata.transcribe_stub === true`, replace the
|
|
28
|
-
* `_Transcript pending._` placeholder
|
|
29
|
-
*
|
|
28
|
+
* `_Transcript pending._` placeholder (or a prior `_Transcription
|
|
29
|
+
* unavailable._` failure marker, on a retry) with the transcript. If
|
|
30
|
+
* neither marker is present (user edited the note while pending),
|
|
31
|
+
* APPEND the transcript rather than overwriting the body. Clear the
|
|
32
|
+
* stub marker.
|
|
30
33
|
* - Mark `attachment.metadata.transcribe_status = "done"` and record
|
|
31
34
|
* `transcript` + `transcribe_done_at`.
|
|
32
35
|
* - If the vault's `audio_retention` is `"until_transcribed"`, unlink
|
|
@@ -50,13 +53,13 @@
|
|
|
50
53
|
|
|
51
54
|
import { join, normalize } from "path";
|
|
52
55
|
import { existsSync, readFileSync, unlinkSync } from "fs";
|
|
53
|
-
import type { Store, Attachment } from "../core/src/types.ts";
|
|
56
|
+
import type { Store, Attachment, Note } from "../core/src/types.ts";
|
|
54
57
|
import type { HookRegistry } from "../core/src/hooks.ts";
|
|
55
58
|
import { appendContextPart, fetchContextEntries, type ContextPayload } from "./context.ts";
|
|
56
59
|
import type { TriggerIncludeContext } from "./config.ts";
|
|
57
60
|
import { upsertTranscriptNote } from "./transcript-note.ts";
|
|
58
61
|
|
|
59
|
-
/** Placeholder pattern written by
|
|
62
|
+
/** Placeholder pattern written by the voice-memo capture stub. */
|
|
60
63
|
const TRANSCRIPT_PLACEHOLDER = /_Transcript pending\._/;
|
|
61
64
|
|
|
62
65
|
/**
|
|
@@ -65,9 +68,33 @@ const TRANSCRIPT_PLACEHOLDER = /_Transcript pending\._/;
|
|
|
65
68
|
* Lens's now-removed scribe client; owning it here means a failed upload
|
|
66
69
|
* stops reading "Transcript pending" forever regardless of which client
|
|
67
70
|
* uploaded the audio.
|
|
71
|
+
*
|
|
72
|
+
* NOTE: the notes-ui status chip (parachute-surface TranscriptionStatus.tsx)
|
|
73
|
+
* keys off this exact string, so don't change the copy without a coordinated
|
|
74
|
+
* change there. A friendlier "retry available" copy + chip affordance is a
|
|
75
|
+
* tracked parachute-surface follow-up.
|
|
68
76
|
*/
|
|
69
77
|
const TRANSCRIPT_UNAVAILABLE = "_Transcription unavailable._";
|
|
70
78
|
|
|
79
|
+
/**
|
|
80
|
+
* On a successful (re)transcription of a legacy in-body memo, the transcript
|
|
81
|
+
* replaces whichever marker is currently in the body — the original
|
|
82
|
+
* `_Transcript pending._` on a first-try success, OR `_Transcription
|
|
83
|
+
* unavailable._` if a prior attempt failed and we're now retrying. Matching
|
|
84
|
+
* both means a retried success lands in the same spot a first-try success
|
|
85
|
+
* would, preserving the surrounding capture body (the `![[memo]]` embed,
|
|
86
|
+
* the `_Recorded …_` line, the header).
|
|
87
|
+
*
|
|
88
|
+
* Deliberately NO `/g` flag — `.replace` swaps only the FIRST match. A
|
|
89
|
+
* canonical capture body holds exactly one marker, so first-match is the
|
|
90
|
+
* correct target. `applyFailureMarker`'s includes-guard (no-op when the
|
|
91
|
+
* marker is already present) prevents markers accumulating across repeated
|
|
92
|
+
* terminal failures, so the body never carries two of the same marker. A
|
|
93
|
+
* hand-edited body that somehow contains both markers patches only the
|
|
94
|
+
* first — accepted (degenerate, operator-induced).
|
|
95
|
+
*/
|
|
96
|
+
const TRANSCRIPT_SUCCESS_TARGET = /_Transcript pending\._|_Transcription unavailable\._/;
|
|
97
|
+
|
|
71
98
|
/**
|
|
72
99
|
* Default sweep cadence (ms). The sweep is the safety net for backoff-
|
|
73
100
|
* queued items, items that arrived while the server was down, or dispatches
|
|
@@ -202,6 +229,100 @@ export function startTranscriptionWorker(opts: TranscriptionWorkerOpts): Transcr
|
|
|
202
229
|
*/
|
|
203
230
|
const inFlightAttachments = new Set<string>();
|
|
204
231
|
|
|
232
|
+
/**
|
|
233
|
+
* Apply a surgical note transform under optimistic concurrency (vault#435).
|
|
234
|
+
*
|
|
235
|
+
* The worker's marker/transcript writes are read-modify-write cycles
|
|
236
|
+
* (`getNote` → transform → `updateNote`). Without a precondition, a user
|
|
237
|
+
* edit landing between the read and the write is silently clobbered —
|
|
238
|
+
* the same static-write/stale-read class as vault#208.
|
|
239
|
+
*
|
|
240
|
+
* `transform(note)` returns the surgical update to apply (`content` and/or
|
|
241
|
+
* `metadata`), or `null` when the fresh state means there's nothing to do
|
|
242
|
+
* (e.g. the stub was cleared, or the marker is already present — the
|
|
243
|
+
* idempotency guards from #434 live inside the transform, so they re-run
|
|
244
|
+
* against whatever we re-read). The transform MUST be pure w.r.t. the note
|
|
245
|
+
* it's handed — it's invoked once per read, and re-invoked on the fresh
|
|
246
|
+
* read after a conflict.
|
|
247
|
+
*
|
|
248
|
+
* Policy on conflict (worker = resilient, never crash the sweep):
|
|
249
|
+
* 1. First write conflicts → re-read, re-run the transform against fresh
|
|
250
|
+
* content, write with the fresh precondition.
|
|
251
|
+
* 2. Second write also conflicts → fall back to a precondition-less write
|
|
252
|
+
* ONLY when `safeWithoutPrecondition(freshNote)` says the transform is
|
|
253
|
+
* still safe against the latest content (e.g. the surgical-replace
|
|
254
|
+
* target is still present, or an append is always-safe). Otherwise
|
|
255
|
+
* skip + log — better to leave the note as the user last left it than
|
|
256
|
+
* to blind-overwrite a third concurrent edit.
|
|
257
|
+
*
|
|
258
|
+
* All errors are logged + swallowed: a note-write failure must not mask the
|
|
259
|
+
* attachment-level result we already recorded, nor crash the sweep.
|
|
260
|
+
*/
|
|
261
|
+
async function applyNoteTransformWithOC(
|
|
262
|
+
store: Store,
|
|
263
|
+
noteId: string,
|
|
264
|
+
op: string,
|
|
265
|
+
transform: (note: Note) => { content?: string; metadata?: Record<string, unknown> } | null,
|
|
266
|
+
safeWithoutPrecondition: (note: Note) => boolean,
|
|
267
|
+
): Promise<void> {
|
|
268
|
+
try {
|
|
269
|
+
const note = await store.getNote(noteId);
|
|
270
|
+
if (!note) return;
|
|
271
|
+
const update = transform(note);
|
|
272
|
+
if (update === null) return;
|
|
273
|
+
|
|
274
|
+
try {
|
|
275
|
+
await store.updateNote(note.id, {
|
|
276
|
+
...update,
|
|
277
|
+
skipUpdatedAt: true,
|
|
278
|
+
if_updated_at: note.updatedAt,
|
|
279
|
+
});
|
|
280
|
+
return;
|
|
281
|
+
} catch (err: any) {
|
|
282
|
+
if (!err || err.code !== "CONFLICT") throw err;
|
|
283
|
+
}
|
|
284
|
+
|
|
285
|
+
// Conflict — a user edit landed between read and write. Re-read,
|
|
286
|
+
// re-apply the same surgical transform against the fresh content, and
|
|
287
|
+
// write with the fresh precondition.
|
|
288
|
+
const fresh = await store.getNote(noteId);
|
|
289
|
+
if (!fresh) return;
|
|
290
|
+
const reUpdate = transform(fresh);
|
|
291
|
+
if (reUpdate === null) return;
|
|
292
|
+
|
|
293
|
+
try {
|
|
294
|
+
await store.updateNote(fresh.id, {
|
|
295
|
+
...reUpdate,
|
|
296
|
+
skipUpdatedAt: true,
|
|
297
|
+
if_updated_at: fresh.updatedAt,
|
|
298
|
+
});
|
|
299
|
+
return;
|
|
300
|
+
} catch (err: any) {
|
|
301
|
+
if (!err || err.code !== "CONFLICT") throw err;
|
|
302
|
+
}
|
|
303
|
+
|
|
304
|
+
// Double conflict (a third edit raced the retry). Last resort: apply
|
|
305
|
+
// without a precondition ONLY if the transform is still safe against
|
|
306
|
+
// the latest content. Otherwise skip — don't clobber the user.
|
|
307
|
+
const latest = await store.getNote(noteId);
|
|
308
|
+
if (!latest) return;
|
|
309
|
+
if (!safeWithoutPrecondition(latest)) {
|
|
310
|
+
logger.error(
|
|
311
|
+
`[transcribe] ${op}: note ${noteId} kept changing under us (double conflict); skipping to avoid clobbering a concurrent edit`,
|
|
312
|
+
);
|
|
313
|
+
return;
|
|
314
|
+
}
|
|
315
|
+
const finalUpdate = transform(latest);
|
|
316
|
+
if (finalUpdate === null) return;
|
|
317
|
+
await store.updateNote(latest.id, {
|
|
318
|
+
...finalUpdate,
|
|
319
|
+
skipUpdatedAt: true,
|
|
320
|
+
});
|
|
321
|
+
} catch (err) {
|
|
322
|
+
logger.error(`[transcribe] ${op}: failed to apply to note ${noteId}:`, err);
|
|
323
|
+
}
|
|
324
|
+
}
|
|
325
|
+
|
|
205
326
|
async function processOne(vault: string, attachment: Attachment): Promise<void> {
|
|
206
327
|
// Dedupe: another path (sweep vs hook kick, or a duplicate dispatch)
|
|
207
328
|
// is already working this attachment. Drop — its result is durable
|
|
@@ -217,33 +338,58 @@ export function startTranscriptionWorker(opts: TranscriptionWorkerOpts): Transcr
|
|
|
217
338
|
|
|
218
339
|
/**
|
|
219
340
|
* On a terminal failure (maxAttempts exhausted, or audio file missing),
|
|
220
|
-
*
|
|
221
|
-
*
|
|
222
|
-
* the success-path note write in shape: only touches the note when
|
|
341
|
+
* record the "unavailable" marker on the note — otherwise the voice memo
|
|
342
|
+
* sits reading "Transcript pending" forever. Only touches the note when
|
|
223
343
|
* `transcribe_stub === true`, clears the stub marker, uses `skipUpdatedAt`
|
|
224
344
|
* so the note's modification time still reflects user intent. Errors
|
|
225
345
|
* are logged and swallowed so a note-write failure doesn't mask the
|
|
226
346
|
* attachment failure we're trying to record.
|
|
347
|
+
*
|
|
348
|
+
* Body policy (finding F — never destroy content):
|
|
349
|
+
* - Placeholder PRESENT → surgical replace of `_Transcript pending._`
|
|
350
|
+
* with the marker. The `![[memo]]` embed + any surrounding text survive.
|
|
351
|
+
* - Marker ALREADY PRESENT → no-op (idempotent; a double-terminal-failure
|
|
352
|
+
* must not stack markers).
|
|
353
|
+
* - Otherwise (placeholder absent — the user edited the note while it was
|
|
354
|
+
* pending) → APPEND `\n\n` + marker to the existing content. The old
|
|
355
|
+
* code full-replaced the body here, destroying the embed AND the user's
|
|
356
|
+
* edits. We append instead so nothing is lost. If the content is empty,
|
|
357
|
+
* the marker alone becomes the body (avoids a leading blank line).
|
|
227
358
|
*/
|
|
228
359
|
async function applyFailureMarker(store: Store, noteId: string): Promise<void> {
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
360
|
+
// OC-guarded (vault#435): the read-transform-write below is re-run against
|
|
361
|
+
// fresh content on a conflict so a concurrent user edit isn't clobbered.
|
|
362
|
+
// The transform is pure w.r.t. the note it's handed; the stub-set and
|
|
363
|
+
// marker-already-present idempotency guards re-evaluate on the re-read.
|
|
364
|
+
await applyNoteTransformWithOC(
|
|
365
|
+
store,
|
|
366
|
+
noteId,
|
|
367
|
+
"apply-failure-marker",
|
|
368
|
+
(note) => {
|
|
369
|
+
const noteMeta = (note.metadata as Record<string, unknown> | undefined) ?? {};
|
|
370
|
+
if (noteMeta.transcribe_stub !== true) return null;
|
|
371
|
+
|
|
372
|
+
let body: string;
|
|
373
|
+
if (TRANSCRIPT_PLACEHOLDER.test(note.content)) {
|
|
374
|
+
body = note.content.replace(TRANSCRIPT_PLACEHOLDER, TRANSCRIPT_UNAVAILABLE);
|
|
375
|
+
} else if (note.content.includes(TRANSCRIPT_UNAVAILABLE)) {
|
|
376
|
+
// Marker already present — nothing to do. Clear the stub and
|
|
377
|
+
// return without rewriting the body so we don't stack markers.
|
|
378
|
+
body = note.content;
|
|
379
|
+
} else {
|
|
380
|
+
body = note.content.length > 0
|
|
381
|
+
? `${note.content}\n\n${TRANSCRIPT_UNAVAILABLE}`
|
|
382
|
+
: TRANSCRIPT_UNAVAILABLE;
|
|
383
|
+
}
|
|
384
|
+
const { transcribe_stub: _drop, ...restMeta } = noteMeta;
|
|
385
|
+
return { content: body, metadata: restMeta };
|
|
386
|
+
},
|
|
387
|
+
// Last-resort (double-conflict) safety: only blind-write while the note
|
|
388
|
+
// still carries the stub opt-in. If a racing edit cleared it, the user
|
|
389
|
+
// opted out — skip rather than re-stamp the marker. The body transform
|
|
390
|
+
// itself is non-destructive (surgical replace / no-op / append).
|
|
391
|
+
(note) => ((note.metadata as Record<string, unknown> | undefined)?.transcribe_stub === true),
|
|
392
|
+
);
|
|
247
393
|
}
|
|
248
394
|
|
|
249
395
|
/**
|
|
@@ -411,26 +557,48 @@ export function startTranscriptionWorker(opts: TranscriptionWorkerOpts): Transcr
|
|
|
411
557
|
logger.error(`[transcribe] failed to write transcript note for attachment ${attachment.id}:`, err);
|
|
412
558
|
}
|
|
413
559
|
} else {
|
|
414
|
-
// Legacy stub-patching path (
|
|
415
|
-
|
|
416
|
-
|
|
417
|
-
|
|
418
|
-
|
|
419
|
-
|
|
420
|
-
|
|
421
|
-
|
|
422
|
-
|
|
423
|
-
|
|
424
|
-
|
|
425
|
-
|
|
426
|
-
|
|
427
|
-
|
|
428
|
-
|
|
429
|
-
|
|
430
|
-
|
|
560
|
+
// Legacy stub-patching path (voice memo flow). Only acts when the note
|
|
561
|
+
// still carries the `transcribe_stub` opt-in — a user edit clearing it
|
|
562
|
+
// before the transcript arrives opts out of the overwrite. OC-guarded
|
|
563
|
+
// (vault#435): re-applied against fresh content on a conflict so a
|
|
564
|
+
// concurrent user edit isn't clobbered.
|
|
565
|
+
await applyNoteTransformWithOC(
|
|
566
|
+
store,
|
|
567
|
+
attachment.noteId,
|
|
568
|
+
"apply-transcript",
|
|
569
|
+
(note) => {
|
|
570
|
+
const noteMeta = (note.metadata as Record<string, unknown> | undefined) ?? {};
|
|
571
|
+
if (noteMeta.transcribe_stub !== true) return null;
|
|
572
|
+
// Body policy (finding F — never destroy content):
|
|
573
|
+
// - placeholder OR failure-marker present → surgical replace in
|
|
574
|
+
// place (a retried success replaces the `_Transcription
|
|
575
|
+
// unavailable._` marker, landing exactly where a first-try
|
|
576
|
+
// success would). The embed + surrounding capture body survive.
|
|
577
|
+
// - neither present (user edited the note while pending) → APPEND
|
|
578
|
+
// the transcript instead of full-replacing the body, so the
|
|
579
|
+
// user's edits + the `![[memo]]` embed are preserved. The old
|
|
580
|
+
// code full-replaced here, which destroyed both.
|
|
581
|
+
let body: string;
|
|
582
|
+
if (TRANSCRIPT_SUCCESS_TARGET.test(note.content)) {
|
|
583
|
+
// Function replacer, NOT a string — speech-to-text is arbitrary
|
|
584
|
+
// user content, and String.replace treats `$&`, `$\``, `$'`,
|
|
585
|
+
// `$1`-`$9` as special patterns in a string replacement. A
|
|
586
|
+
// transcript containing `$&` would otherwise inject the matched
|
|
587
|
+
// marker text into the body. `() => transcript` returns the text
|
|
588
|
+
// verbatim.
|
|
589
|
+
body = note.content.replace(TRANSCRIPT_SUCCESS_TARGET, () => transcript);
|
|
590
|
+
} else {
|
|
591
|
+
body = note.content.length > 0
|
|
592
|
+
? `${note.content}\n\n${transcript}`
|
|
593
|
+
: transcript;
|
|
431
594
|
}
|
|
432
|
-
|
|
433
|
-
|
|
595
|
+
const { transcribe_stub: _drop, ...restMeta } = noteMeta;
|
|
596
|
+
return { content: body, metadata: restMeta };
|
|
597
|
+
},
|
|
598
|
+
// Last-resort (double-conflict) safety: only blind-write while the
|
|
599
|
+
// stub opt-in survives. A racing edit that cleared it opts out.
|
|
600
|
+
(note) => ((note.metadata as Record<string, unknown> | undefined)?.transcribe_stub === true),
|
|
601
|
+
);
|
|
434
602
|
}
|
|
435
603
|
|
|
436
604
|
// Always record the transcript on the attachment, even if the note
|
|
@@ -0,0 +1,362 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Unit tests for the usage helpers (src/usage.ts).
|
|
3
|
+
*
|
|
4
|
+
* Everything here runs against the injectable `UsageFs` seam — no real disk
|
|
5
|
+
* I/O — so we can (a) synthesize trees with symlinks/missing dirs and (b)
|
|
6
|
+
* count how many times the dir-walk actually runs, which is how we prove the
|
|
7
|
+
* TTL cache skips the walk on a hit.
|
|
8
|
+
*
|
|
9
|
+
* The path helpers (`vaultDir`, `assetsDir`, mirror resolution) DO read
|
|
10
|
+
* `process.env.PARACHUTE_HOME`; we point it at a tmp dir so the resolved paths
|
|
11
|
+
* are deterministic, but no files are written there — the fake fs intercepts
|
|
12
|
+
* every stat/readdir.
|
|
13
|
+
*/
|
|
14
|
+
|
|
15
|
+
import { describe, test, expect, beforeEach } from "bun:test";
|
|
16
|
+
import { join } from "path";
|
|
17
|
+
import { tmpdir } from "os";
|
|
18
|
+
|
|
19
|
+
const testDir = join(
|
|
20
|
+
tmpdir(),
|
|
21
|
+
`vault-usage-test-${Date.now()}-${Math.random().toString(36).slice(2)}`,
|
|
22
|
+
);
|
|
23
|
+
process.env.PARACHUTE_HOME = testDir;
|
|
24
|
+
|
|
25
|
+
const {
|
|
26
|
+
dbBytes,
|
|
27
|
+
dirSize,
|
|
28
|
+
UsageCache,
|
|
29
|
+
buildUsageReport,
|
|
30
|
+
} = await import("./usage.ts");
|
|
31
|
+
const { vaultDir, assetsDir } = await import("./config.ts");
|
|
32
|
+
|
|
33
|
+
import type { UsageFs } from "./usage.ts";
|
|
34
|
+
import type { VaultStats } from "../core/src/types.ts";
|
|
35
|
+
import type { Dirent } from "fs";
|
|
36
|
+
|
|
37
|
+
// ---------------------------------------------------------------------------
|
|
38
|
+
// Fake filesystem builder
|
|
39
|
+
//
|
|
40
|
+
// A node is either a file (number = size in bytes), a dir (object mapping
|
|
41
|
+
// names → nodes), or a symlink (special marker, never followed).
|
|
42
|
+
// ---------------------------------------------------------------------------
|
|
43
|
+
|
|
44
|
+
type FileNode = { kind: "file"; size: number };
|
|
45
|
+
type DirNode = { kind: "dir"; children: Record<string, FsNode> };
|
|
46
|
+
type LinkNode = { kind: "link" };
|
|
47
|
+
type FsNode = FileNode | DirNode | LinkNode;
|
|
48
|
+
|
|
49
|
+
const file = (size: number): FileNode => ({ kind: "file", size });
|
|
50
|
+
const dir = (children: Record<string, FsNode>): DirNode => ({ kind: "dir", children });
|
|
51
|
+
const link = (): LinkNode => ({ kind: "link" });
|
|
52
|
+
|
|
53
|
+
function makeDirent(name: string, node: FsNode): Dirent {
|
|
54
|
+
return {
|
|
55
|
+
name,
|
|
56
|
+
isFile: () => node.kind === "file",
|
|
57
|
+
isDirectory: () => node.kind === "dir",
|
|
58
|
+
isSymbolicLink: () => node.kind === "link",
|
|
59
|
+
isBlockDevice: () => false,
|
|
60
|
+
isCharacterDevice: () => false,
|
|
61
|
+
isFIFO: () => false,
|
|
62
|
+
isSocket: () => false,
|
|
63
|
+
} as unknown as Dirent;
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
/**
|
|
67
|
+
* Build a fake `UsageFs` rooted at a set of absolute paths. `roots` maps an
|
|
68
|
+
* absolute path → the node that lives there. Lookups resolve a requested
|
|
69
|
+
* absolute path by walking from the matching root prefix. `readCount` exposes
|
|
70
|
+
* how many `readDir` calls happened (for cache assertions).
|
|
71
|
+
*/
|
|
72
|
+
function makeFakeFs(roots: Record<string, FsNode>): UsageFs & { readCount: number } {
|
|
73
|
+
function resolve(path: string): FsNode | undefined {
|
|
74
|
+
// Exact root match first.
|
|
75
|
+
if (roots[path]) return roots[path];
|
|
76
|
+
// Otherwise find the root that's a prefix and descend by segment.
|
|
77
|
+
for (const [rootPath, rootNode] of Object.entries(roots)) {
|
|
78
|
+
if (path === rootPath) return rootNode;
|
|
79
|
+
if (path.startsWith(rootPath + "/")) {
|
|
80
|
+
const rest = path.slice(rootPath.length + 1).split("/");
|
|
81
|
+
let cur: FsNode | undefined = rootNode;
|
|
82
|
+
for (const seg of rest) {
|
|
83
|
+
if (!cur || cur.kind !== "dir") return undefined;
|
|
84
|
+
cur = cur.children[seg];
|
|
85
|
+
}
|
|
86
|
+
return cur;
|
|
87
|
+
}
|
|
88
|
+
}
|
|
89
|
+
return undefined;
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
const fs = {
|
|
93
|
+
readCount: 0,
|
|
94
|
+
statFile(path: string) {
|
|
95
|
+
const node = resolve(path);
|
|
96
|
+
if (!node) throw new Error(`ENOENT: ${path}`);
|
|
97
|
+
return {
|
|
98
|
+
size: node.kind === "file" ? node.size : 0,
|
|
99
|
+
isDirectory: () => node.kind === "dir",
|
|
100
|
+
isSymbolicLink: () => node.kind === "link",
|
|
101
|
+
};
|
|
102
|
+
},
|
|
103
|
+
readDir(path: string): Dirent[] {
|
|
104
|
+
fs.readCount++;
|
|
105
|
+
const node = resolve(path);
|
|
106
|
+
if (!node || node.kind !== "dir") throw new Error(`ENOTDIR: ${path}`);
|
|
107
|
+
return Object.entries(node.children).map(([name, child]) => makeDirent(name, child));
|
|
108
|
+
},
|
|
109
|
+
};
|
|
110
|
+
return fs;
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
// ---------------------------------------------------------------------------
|
|
114
|
+
// dbBytes — sums the WAL trio (vault.db + -wal + -shm)
|
|
115
|
+
// ---------------------------------------------------------------------------
|
|
116
|
+
|
|
117
|
+
describe("dbBytes (WAL-aware DB file sizing)", () => {
|
|
118
|
+
const VAULT = "journal";
|
|
119
|
+
const dbBase = join(vaultDir(VAULT), "vault.db");
|
|
120
|
+
|
|
121
|
+
test("sums vault.db + vault.db-wal + vault.db-shm", () => {
|
|
122
|
+
const fs = makeFakeFs({
|
|
123
|
+
[dbBase]: file(4096),
|
|
124
|
+
[`${dbBase}-wal`]: file(800),
|
|
125
|
+
[`${dbBase}-shm`]: file(32),
|
|
126
|
+
});
|
|
127
|
+
expect(dbBytes(VAULT, fs)).toBe(4096 + 800 + 32);
|
|
128
|
+
});
|
|
129
|
+
|
|
130
|
+
test("tolerates missing -wal/-shm (checkpointed at rest)", () => {
|
|
131
|
+
const fs = makeFakeFs({ [dbBase]: file(4096) });
|
|
132
|
+
// -wal and -shm absent → contribute 0, not an error.
|
|
133
|
+
expect(dbBytes(VAULT, fs)).toBe(4096);
|
|
134
|
+
});
|
|
135
|
+
|
|
136
|
+
test("missing DB entirely → 0", () => {
|
|
137
|
+
const fs = makeFakeFs({});
|
|
138
|
+
expect(dbBytes(VAULT, fs)).toBe(0);
|
|
139
|
+
});
|
|
140
|
+
});
|
|
141
|
+
|
|
142
|
+
// ---------------------------------------------------------------------------
|
|
143
|
+
// dirSize — recursive, missing-dir tolerant, symlink-safe
|
|
144
|
+
// ---------------------------------------------------------------------------
|
|
145
|
+
|
|
146
|
+
describe("dirSize (recursive directory byte sum)", () => {
|
|
147
|
+
const ROOT = "/fake/assets";
|
|
148
|
+
|
|
149
|
+
test("sums files across nested directories", () => {
|
|
150
|
+
const fs = makeFakeFs({
|
|
151
|
+
[ROOT]: dir({
|
|
152
|
+
"a.png": file(100),
|
|
153
|
+
"2026-06-03": dir({
|
|
154
|
+
"x.jpg": file(250),
|
|
155
|
+
nested: dir({ "y.pdf": file(50) }),
|
|
156
|
+
}),
|
|
157
|
+
}),
|
|
158
|
+
});
|
|
159
|
+
expect(dirSize(ROOT, fs)).toBe(100 + 250 + 50);
|
|
160
|
+
});
|
|
161
|
+
|
|
162
|
+
test("empty directory → 0", () => {
|
|
163
|
+
const fs = makeFakeFs({ [ROOT]: dir({}) });
|
|
164
|
+
expect(dirSize(ROOT, fs)).toBe(0);
|
|
165
|
+
});
|
|
166
|
+
|
|
167
|
+
test("missing directory → 0 (no throw)", () => {
|
|
168
|
+
const fs = makeFakeFs({});
|
|
169
|
+
expect(dirSize(ROOT, fs)).toBe(0);
|
|
170
|
+
});
|
|
171
|
+
|
|
172
|
+
test("does NOT follow symlinks (file or dir)", () => {
|
|
173
|
+
const fs = makeFakeFs({
|
|
174
|
+
[ROOT]: dir({
|
|
175
|
+
"real.png": file(100),
|
|
176
|
+
"linked-file": link(), // would be a file if followed
|
|
177
|
+
"linked-dir": link(), // would be a dir if followed
|
|
178
|
+
}),
|
|
179
|
+
// A target tree the symlink "points at" — if dirSize followed the link
|
|
180
|
+
// it would walk this and add 9999. It must NOT.
|
|
181
|
+
[join(ROOT, "linked-dir")]: dir({ "huge.bin": file(9999) }),
|
|
182
|
+
});
|
|
183
|
+
expect(dirSize(ROOT, fs)).toBe(100);
|
|
184
|
+
});
|
|
185
|
+
|
|
186
|
+
test("symlink loop does not hang (link is skipped, never descended)", () => {
|
|
187
|
+
// The classic infinite-walk trap: a dir containing a symlink to itself.
|
|
188
|
+
// Because we skip symlinks outright, this terminates immediately.
|
|
189
|
+
const fs = makeFakeFs({
|
|
190
|
+
[ROOT]: dir({
|
|
191
|
+
"f.png": file(10),
|
|
192
|
+
loop: link(),
|
|
193
|
+
}),
|
|
194
|
+
});
|
|
195
|
+
expect(dirSize(ROOT, fs)).toBe(10);
|
|
196
|
+
});
|
|
197
|
+
});
|
|
198
|
+
|
|
199
|
+
// ---------------------------------------------------------------------------
|
|
200
|
+
// UsageCache — 60s TTL, fresh bypass, invalidation, call-count proof
|
|
201
|
+
// ---------------------------------------------------------------------------
|
|
202
|
+
|
|
203
|
+
describe("UsageCache (dir-walk TTL cache)", () => {
|
|
204
|
+
const VAULT = "journal";
|
|
205
|
+
const assets = assetsDir(VAULT);
|
|
206
|
+
|
|
207
|
+
function fsWith(assetsBytes: number) {
|
|
208
|
+
return makeFakeFs({ [assets]: dir({ "a.png": file(assetsBytes) }) });
|
|
209
|
+
}
|
|
210
|
+
|
|
211
|
+
test("first read walks (cached:false); second read within TTL is cached (no walk)", () => {
|
|
212
|
+
const fs = fsWith(500);
|
|
213
|
+
let clock = 1_000;
|
|
214
|
+
const cache = new UsageCache(fs, () => clock, 60_000);
|
|
215
|
+
|
|
216
|
+
const first = cache.get(VAULT);
|
|
217
|
+
expect(first.cached).toBe(false);
|
|
218
|
+
expect(first.result.assets).toBe(500);
|
|
219
|
+
const afterFirst = fs.readCount;
|
|
220
|
+
expect(afterFirst).toBeGreaterThan(0);
|
|
221
|
+
|
|
222
|
+
clock += 30_000; // within the 60s TTL
|
|
223
|
+
const second = cache.get(VAULT);
|
|
224
|
+
expect(second.cached).toBe(true);
|
|
225
|
+
expect(second.result.assets).toBe(500);
|
|
226
|
+
// The cache MUST NOT have re-walked — call count is unchanged.
|
|
227
|
+
expect(fs.readCount).toBe(afterFirst);
|
|
228
|
+
});
|
|
229
|
+
|
|
230
|
+
test("entry expires after TTL → re-walks (cached:false)", () => {
|
|
231
|
+
const fs = fsWith(500);
|
|
232
|
+
let clock = 1_000;
|
|
233
|
+
const cache = new UsageCache(fs, () => clock, 60_000);
|
|
234
|
+
|
|
235
|
+
cache.get(VAULT); // prime
|
|
236
|
+
const afterPrime = fs.readCount;
|
|
237
|
+
|
|
238
|
+
clock += 60_001; // just past TTL
|
|
239
|
+
const stale = cache.get(VAULT);
|
|
240
|
+
expect(stale.cached).toBe(false);
|
|
241
|
+
expect(fs.readCount).toBeGreaterThan(afterPrime);
|
|
242
|
+
});
|
|
243
|
+
|
|
244
|
+
test("fresh:true bypasses a valid cache entry and re-walks", () => {
|
|
245
|
+
const fs = fsWith(500);
|
|
246
|
+
let clock = 1_000;
|
|
247
|
+
const cache = new UsageCache(fs, () => clock, 60_000);
|
|
248
|
+
|
|
249
|
+
cache.get(VAULT); // prime
|
|
250
|
+
const afterPrime = fs.readCount;
|
|
251
|
+
|
|
252
|
+
clock += 1_000; // well within TTL — a normal read would be cached
|
|
253
|
+
const forced = cache.get(VAULT, { fresh: true });
|
|
254
|
+
expect(forced.cached).toBe(false);
|
|
255
|
+
expect(fs.readCount).toBeGreaterThan(afterPrime);
|
|
256
|
+
});
|
|
257
|
+
|
|
258
|
+
test("invalidate() forces the next read to re-walk", () => {
|
|
259
|
+
const fs = fsWith(500);
|
|
260
|
+
let clock = 1_000;
|
|
261
|
+
const cache = new UsageCache(fs, () => clock, 60_000);
|
|
262
|
+
|
|
263
|
+
cache.get(VAULT); // prime
|
|
264
|
+
const afterPrime = fs.readCount;
|
|
265
|
+
|
|
266
|
+
cache.invalidate(VAULT);
|
|
267
|
+
clock += 1_000; // within TTL, but the entry is gone
|
|
268
|
+
const after = cache.get(VAULT);
|
|
269
|
+
expect(after.cached).toBe(false);
|
|
270
|
+
expect(fs.readCount).toBeGreaterThan(afterPrime);
|
|
271
|
+
});
|
|
272
|
+
|
|
273
|
+
test("no mirror configured → mirror:null (omitted from report)", () => {
|
|
274
|
+
// No mirror-config.yaml written for this vault → resolveVaultMirrorDir
|
|
275
|
+
// returns null → mirror is null.
|
|
276
|
+
const fs = fsWith(500);
|
|
277
|
+
const cache = new UsageCache(fs, () => 1_000, 60_000);
|
|
278
|
+
const { result } = cache.get(VAULT);
|
|
279
|
+
expect(result.mirror).toBeNull();
|
|
280
|
+
});
|
|
281
|
+
});
|
|
282
|
+
|
|
283
|
+
// ---------------------------------------------------------------------------
|
|
284
|
+
// buildUsageReport — shape + total math + mirror handling
|
|
285
|
+
// ---------------------------------------------------------------------------
|
|
286
|
+
|
|
287
|
+
describe("buildUsageReport", () => {
|
|
288
|
+
const VAULT = "journal";
|
|
289
|
+
const dbBase = join(vaultDir(VAULT), "vault.db");
|
|
290
|
+
const assets = assetsDir(VAULT);
|
|
291
|
+
|
|
292
|
+
function makeStats(overrides: Partial<VaultStats> = {}): VaultStats {
|
|
293
|
+
return {
|
|
294
|
+
totalNotes: 12,
|
|
295
|
+
earliestNote: null,
|
|
296
|
+
latestNote: null,
|
|
297
|
+
notesByMonth: [],
|
|
298
|
+
topTags: [],
|
|
299
|
+
tagCount: 4,
|
|
300
|
+
attachmentCount: 3,
|
|
301
|
+
linkCount: 7,
|
|
302
|
+
contentBytes: 1234,
|
|
303
|
+
...overrides,
|
|
304
|
+
};
|
|
305
|
+
}
|
|
306
|
+
|
|
307
|
+
test("full shape: counts, bytes, total = db + assets, mirror omitted when none", () => {
|
|
308
|
+
const fs = makeFakeFs({
|
|
309
|
+
[dbBase]: file(4096),
|
|
310
|
+
[`${dbBase}-wal`]: file(900),
|
|
311
|
+
[assets]: dir({ "a.png": file(2000) }),
|
|
312
|
+
});
|
|
313
|
+
const cache = new UsageCache(fs, () => 1_000, 60_000);
|
|
314
|
+
const report = buildUsageReport(VAULT, makeStats(), { cache, fs, now: () => 1_700_000_000_000 });
|
|
315
|
+
|
|
316
|
+
expect(report.counts).toEqual({ notes: 12, attachments: 3, links: 7, tags: 4 });
|
|
317
|
+
expect(report.bytes.content).toBe(1234);
|
|
318
|
+
expect(report.bytes.db).toBe(4096 + 900);
|
|
319
|
+
expect(report.bytes.assets).toBe(2000);
|
|
320
|
+
// total = db + assets only. NOT content (logical, already inside db) and
|
|
321
|
+
// NOT mirror (projection).
|
|
322
|
+
expect(report.bytes.total).toBe(4096 + 900 + 2000);
|
|
323
|
+
expect(report.bytes).not.toHaveProperty("mirror");
|
|
324
|
+
expect(report.cached).toBe(false);
|
|
325
|
+
expect(report.computedAt).toBe(new Date(1_700_000_000_000).toISOString());
|
|
326
|
+
});
|
|
327
|
+
|
|
328
|
+
test("mirror is a separate line item, NOT added to total", () => {
|
|
329
|
+
// Configure an internal mirror so resolveVaultMirrorDir returns a dir.
|
|
330
|
+
const { writeMirrorConfigForVault, defaultMirrorConfig } = require("./mirror-config.ts");
|
|
331
|
+
writeMirrorConfigForVault(VAULT, { ...defaultMirrorConfig(), location: "internal", enabled: true });
|
|
332
|
+
const mirrorDir = join(vaultDir(VAULT), "mirror");
|
|
333
|
+
|
|
334
|
+
const fs = makeFakeFs({
|
|
335
|
+
[dbBase]: file(1000),
|
|
336
|
+
[assets]: dir({ "a.png": file(500) }),
|
|
337
|
+
[mirrorDir]: dir({ "note.md": file(8000) }),
|
|
338
|
+
});
|
|
339
|
+
const cache = new UsageCache(fs, () => 1_000, 60_000);
|
|
340
|
+
const report = buildUsageReport(VAULT, makeStats(), { cache, fs });
|
|
341
|
+
|
|
342
|
+
expect(report.bytes.mirror).toBe(8000);
|
|
343
|
+
// total stays db + assets — the 8000-byte mirror does not inflate it.
|
|
344
|
+
expect(report.bytes.total).toBe(1000 + 500);
|
|
345
|
+
});
|
|
346
|
+
|
|
347
|
+
test("cached flag reflects a cache hit", () => {
|
|
348
|
+
const fs = makeFakeFs({
|
|
349
|
+
[dbBase]: file(100),
|
|
350
|
+
[assets]: dir({}),
|
|
351
|
+
});
|
|
352
|
+
let clock = 1_000;
|
|
353
|
+
const cache = new UsageCache(fs, () => clock, 60_000);
|
|
354
|
+
|
|
355
|
+
const first = buildUsageReport(VAULT, makeStats(), { cache, fs });
|
|
356
|
+
expect(first.cached).toBe(false);
|
|
357
|
+
|
|
358
|
+
clock += 5_000;
|
|
359
|
+
const second = buildUsageReport(VAULT, makeStats(), { cache, fs });
|
|
360
|
+
expect(second.cached).toBe(true);
|
|
361
|
+
});
|
|
362
|
+
});
|