@openparachute/vault 0.4.7-rc.1 → 0.4.8-rc.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. package/README.md +44 -10
  2. package/core/src/connection-pragmas.test.ts +232 -0
  3. package/core/src/core.test.ts +257 -0
  4. package/core/src/cursor.test.ts +160 -0
  5. package/core/src/cursor.ts +272 -0
  6. package/core/src/mcp.ts +51 -7
  7. package/core/src/notes.ts +164 -2
  8. package/core/src/portable-md.test.ts +247 -0
  9. package/core/src/portable-md.ts +118 -1
  10. package/core/src/schema.ts +98 -2
  11. package/core/src/store.ts +11 -1
  12. package/core/src/types.ts +32 -0
  13. package/package.json +1 -1
  14. package/src/auth-status.ts +4 -0
  15. package/src/auto-transcribe.test.ts +116 -0
  16. package/src/auto-transcribe.ts +48 -0
  17. package/src/cli.ts +151 -50
  18. package/src/config.test.ts +26 -0
  19. package/src/config.ts +53 -1
  20. package/src/db.ts +15 -2
  21. package/src/export-watch.test.ts +99 -0
  22. package/src/mcp-install-interactive.test.ts +23 -2
  23. package/src/mcp-install-interactive.ts +21 -2
  24. package/src/mcp-install.test.ts +40 -0
  25. package/src/mcp-tools.ts +17 -1
  26. package/src/module-config.ts +70 -14
  27. package/src/module-manifest.test.ts +93 -0
  28. package/src/module-manifest.ts +94 -0
  29. package/src/routes.ts +267 -50
  30. package/src/scribe-discovery.test.ts +77 -0
  31. package/src/scribe-discovery.ts +91 -0
  32. package/src/scribe-env.test.ts +66 -1
  33. package/src/scribe-env.ts +42 -1
  34. package/src/self-register.test.ts +380 -0
  35. package/src/self-register.ts +234 -0
  36. package/src/server.ts +46 -11
  37. package/src/transcript-note.test.ts +171 -0
  38. package/src/transcript-note.ts +189 -0
  39. package/src/transcription-registry.ts +22 -0
  40. package/src/transcription-worker.test.ts +250 -0
  41. package/src/transcription-worker.ts +186 -27
  42. package/src/vault.test.ts +347 -0
@@ -200,12 +200,108 @@ CREATE INDEX IF NOT EXISTS idx_links_target ON links(target_id);
200
200
  -- because migrateToV16 also runs the unconditional CREATE INDEX path.
201
201
  `;
202
202
 
203
+ /**
204
+ * Connection-level pragmas applied on every Database open, in the order they
205
+ * appear here.
206
+ *
207
+ * `journal_mode = WAL` is a persistent, DB-level setting (lives in the SQLite
208
+ * header). Once any writer flips a DB into WAL it stays in WAL across opens
209
+ * and processes — so daemon + CLI + parachute-runner + any read-side tool
210
+ * see the same mode. Re-applying on every open is cheap and idempotent;
211
+ * SQLite returns the current mode either way.
212
+ *
213
+ * `synchronous = NORMAL` is the safe, recommended pairing with WAL per the
214
+ * SQLite docs: fsync only at checkpoint rather than on every commit. Crash
215
+ * safety is preserved (WAL frames are still ordered + checksummed); the only
216
+ * cost vs FULL is that an OS-level crash *between* checkpoints might lose
217
+ * the last transaction. Acceptable for a knowledge graph that's snapshotted
218
+ * by `VACUUM INTO` for backups.
219
+ *
220
+ * `wal_autocheckpoint = 1000` is SQLite's default; we set it explicitly so
221
+ * the contract is visible in code rather than implicit. 1000 pages ≈ 4MB
222
+ * before a passive checkpoint is triggered on the next write.
223
+ *
224
+ * `foreign_keys = ON` is per-connection (not persistent) — must be re-applied
225
+ * on every open. Migrations occasionally disable it transiently (see
226
+ * migrateToV14's BEGIN IMMEDIATE block); the boot path re-enables.
227
+ *
228
+ * WAL requires a filesystem that supports memory-mapped shared-memory
229
+ * (the `-shm` sidecar). NFS, some FUSE mounts, and a few Docker volume
230
+ * drivers don't qualify and silently fall back to the prior journal mode
231
+ * (typically `delete`). `applyConnectionPragmas` detects this and returns
232
+ * `wal: false` so the caller can log a warning — operators on those
233
+ * filesystems should know they've lost multi-process concurrency.
234
+ */
235
+ const APPLY_PRAGMAS_LOGGED = new WeakSet<Database>();
236
+
237
+ export interface ConnectionPragmaResult {
238
+ /** True when the connection ended up in WAL mode. False means the FS doesn't support WAL. */
239
+ wal: boolean;
240
+ /** The actual journal_mode SQLite reports — "wal", "delete", "memory", etc. */
241
+ journalMode: string;
242
+ }
243
+
244
+ /**
245
+ * Apply connection-level pragmas (journal mode, synchronous, FK enforcement)
246
+ * and verify WAL took effect. Idempotent — safe to call multiple times on
247
+ * the same connection. Logs a one-time warning per connection when WAL
248
+ * couldn't be applied.
249
+ *
250
+ * Exported for read-side callers (auth-status, mirror-manager, etc.) that
251
+ * open a Database directly without going through initSchema. Setting
252
+ * `journal_mode` on a read-only handle is a no-op but harmless; the
253
+ * useful state is set by whichever writer opens first.
254
+ */
255
+ export function applyConnectionPragmas(db: Database): ConnectionPragmaResult {
256
+ // PRAGMA journal_mode returns a row { journal_mode: "wal" } on success.
257
+ // Use `.get()` (not `.exec()`) so we capture the result. Some bun:sqlite
258
+ // versions throw on readonly handles attempting to set journal_mode; treat
259
+ // that as "we couldn't set it, just read the current value" and recover.
260
+ let journalMode: string;
261
+ try {
262
+ const row = db.prepare("PRAGMA journal_mode = WAL").get() as { journal_mode?: string } | null;
263
+ journalMode = (row?.journal_mode ?? "").toLowerCase();
264
+ } catch {
265
+ // Most likely: readonly handle. Read-only opens never write the DB
266
+ // header, so they can't change journal_mode — but they can still query
267
+ // the current mode, which is set by the most recent writer.
268
+ const row = db.prepare("PRAGMA journal_mode").get() as { journal_mode?: string } | null;
269
+ journalMode = (row?.journal_mode ?? "").toLowerCase();
270
+ }
271
+ const wal = journalMode === "wal";
272
+
273
+ // synchronous + wal_autocheckpoint only matter when WAL is active. They're
274
+ // harmless under DELETE mode but the rationale is WAL-specific, so gate
275
+ // them on the success path. Both are best-effort — wrap in try to keep
276
+ // readonly handles (which reject writes) from failing the whole open.
277
+ if (wal) {
278
+ try { db.exec("PRAGMA synchronous = NORMAL"); } catch {}
279
+ try { db.exec("PRAGMA wal_autocheckpoint = 1000"); } catch {}
280
+ } else if (journalMode !== "memory" && !APPLY_PRAGMAS_LOGGED.has(db)) {
281
+ // `journalMode === "memory"` ⇒ this is a `:memory:` database, an
282
+ // explicit choice (tests, ephemeral probes) rather than a filesystem
283
+ // limitation. Suppress the warning so the test suite stays quiet;
284
+ // real on-disk vaults that can't host WAL (NFS, some FUSE/Docker
285
+ // volume drivers) still surface the diagnostic.
286
+ APPLY_PRAGMAS_LOGGED.add(db);
287
+ // eslint-disable-next-line no-console
288
+ console.warn(
289
+ `[vault] WAL mode could not be enabled (journal_mode=${journalMode || "unknown"}). ` +
290
+ `The underlying filesystem may not support WAL (NFS, some FUSE/Docker volume drivers). ` +
291
+ `Multi-process concurrent access will be limited to a single writer at a time.`,
292
+ );
293
+ }
294
+
295
+ try { db.exec("PRAGMA foreign_keys = ON"); } catch {}
296
+
297
+ return { wal, journalMode };
298
+ }
299
+
203
300
  /**
204
301
  * Initialize database schema. Idempotent — safe to call on every startup.
205
302
  */
206
303
  export function initSchema(db: Database): void {
207
- db.exec("PRAGMA journal_mode = WAL");
208
- db.exec("PRAGMA foreign_keys = ON");
304
+ applyConnectionPragmas(db);
209
305
 
210
306
  // Check if we need to migrate from v2
211
307
  const hasOldTables = hasTable(db, "things");
package/core/src/store.ts CHANGED
@@ -1,5 +1,5 @@
1
1
  import { Database } from "bun:sqlite";
2
- import type { Store, Note, Link, Attachment, QueryOpts } from "./types.js";
2
+ import type { Store, Note, Link, Attachment, QueryOpts, QueryNotesPage } from "./types.js";
3
3
  import { initSchema } from "./schema.js";
4
4
  import * as noteOps from "./notes.js";
5
5
  import * as linkOps from "./links.js";
@@ -227,6 +227,16 @@ export class BunSqliteStore implements Store {
227
227
  return noteOps.queryNotes(this.db, this.expandQueryTags(opts));
228
228
  }
229
229
 
230
+ async queryNotesPaged(opts: QueryOpts): Promise<QueryNotesPage> {
231
+ // Hierarchy expansion happens internally — but importantly the cursor's
232
+ // query_hash is computed from the CALLER'S opts (pre-expansion), so a
233
+ // tag hierarchy edit between calls invalidates the cursor (different
234
+ // descendant set → different rows match → caller should restart). The
235
+ // alternative — hash the expanded set — would silently keep returning
236
+ // stale results from a hierarchy snapshot the caller never saw.
237
+ return noteOps.queryNotesPaged(this.db, this.expandQueryTags(opts));
238
+ }
239
+
230
240
  /**
231
241
  * If `tags` are present, attach a parallel `_tagsExpanded` array where
232
242
  * each input tag is replaced with `{tag} ∪ descendants(tag)`. The SQL
package/core/src/types.ts CHANGED
@@ -116,6 +116,30 @@ export interface QueryOpts {
116
116
  orderBy?: string;
117
117
  limit?: number;
118
118
  offset?: number;
119
+ /**
120
+ * Opaque cursor for "since last checked" agent loops (vault#313).
121
+ * When passed, the engine decodes it, verifies its `query_hash` matches
122
+ * the current query (mismatch → CursorError `cursor_query_mismatch`),
123
+ * and adds a keyset predicate that returns only rows newer than the
124
+ * cursor's `updated_at`/`id` watermark. Forces `orderBy = updated_at`
125
+ * (with `id` as a stable tiebreaker) so the watermark math is sound.
126
+ *
127
+ * Cursors are minted by `queryNotesPaged` (engine) and surfaced via
128
+ * the `query-notes` MCP tool's `next_cursor` field; callers should
129
+ * treat the string as opaque.
130
+ */
131
+ cursor?: string;
132
+ }
133
+
134
+ /**
135
+ * Cursor-paginated query result (vault#313). Returned by
136
+ * `queryNotesPaged`/`storeQueryNotesPaged`. `next_cursor` always advances —
137
+ * even on an empty result page — so an agent loop can persist a single
138
+ * watermark and keep polling.
139
+ */
140
+ export interface QueryNotesPage {
141
+ notes: Note[];
142
+ next_cursor: string;
119
143
  }
120
144
 
121
145
  /** Note summary — everything except content. Used in link results. */
@@ -184,6 +208,14 @@ export interface Store {
184
208
  syncAllWikilinks(): Promise<{ synced: number; totalAdded: number; totalRemoved: number }>;
185
209
  deleteNote(id: string): Promise<void>;
186
210
  queryNotes(opts: QueryOpts): Promise<Note[]>;
211
+ /**
212
+ * Cursor-paginated `queryNotes` (vault#313). Returns the same notes plus
213
+ * an opaque `next_cursor` string the caller can pass on the next call
214
+ * to resume from the watermark of the LAST returned row. The cursor is
215
+ * always present in the response — even on an empty page — so an
216
+ * agent loop can persist a single watermark and keep polling.
217
+ */
218
+ queryNotesPaged(opts: QueryOpts): Promise<QueryNotesPage>;
187
219
  searchNotes(query: string, opts?: { tags?: string[]; limit?: number }): Promise<Note[]>;
188
220
 
189
221
  // Tags
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@openparachute/vault",
3
- "version": "0.4.7-rc.1",
3
+ "version": "0.4.8-rc.4",
4
4
  "description": "Agent-native knowledge graph. Notes, tags, links over MCP.",
5
5
  "module": "src/cli.ts",
6
6
  "type": "module",
@@ -40,6 +40,10 @@ export interface AuthStatusResponse {
40
40
  * caller's signal to degrade `hasTokens` to `null`.
41
41
  */
42
42
  function vaultHasTokens(dbPath: string): boolean {
43
+ // Readonly handle — no pragma application here. Journal mode is a
44
+ // persistent DB-header setting written by the first writer (the daemon's
45
+ // BunSqliteStore via openVaultDb), so this probe sees WAL automatically
46
+ // and is safe under concurrent writes.
43
47
  const db = new Database(dbPath, { readonly: true });
44
48
  try {
45
49
  const row = db.prepare("SELECT 1 FROM tokens LIMIT 1").get();
@@ -0,0 +1,116 @@
1
+ /**
2
+ * Auto-transcribe gating decisions (vault#353).
3
+ *
4
+ * Three independent guards: mime-type prefix, enabled toggle, scribe URL
5
+ * present. Pure function — exercise the truth table.
6
+ */
7
+
8
+ import { describe, test, expect } from "bun:test";
9
+ import { shouldAutoTranscribe } from "./auto-transcribe.ts";
10
+
11
+ function readGlobalConfig(enabled: boolean | undefined) {
12
+ return () => ({
13
+ port: 1940,
14
+ ...(enabled !== undefined ? { auto_transcribe: { enabled } } : {}),
15
+ }) as any;
16
+ }
17
+
18
+ describe("shouldAutoTranscribe", () => {
19
+ const scribePresent = () => "http://127.0.0.1:1943";
20
+ const scribeAbsent = () => undefined;
21
+
22
+ test("triggers on audio/* mime-type when enabled + scribe reachable", () => {
23
+ expect(shouldAutoTranscribe("audio/wav", {
24
+ readGlobalConfigImpl: readGlobalConfig(true),
25
+ getCachedScribeUrlImpl: scribePresent,
26
+ })).toBe(true);
27
+ });
28
+
29
+ test("triggers on audio/mp4 (m4a)", () => {
30
+ expect(shouldAutoTranscribe("audio/mp4", {
31
+ readGlobalConfigImpl: readGlobalConfig(true),
32
+ getCachedScribeUrlImpl: scribePresent,
33
+ })).toBe(true);
34
+ });
35
+
36
+ test("triggers on audio/webm", () => {
37
+ expect(shouldAutoTranscribe("audio/webm", {
38
+ readGlobalConfigImpl: readGlobalConfig(true),
39
+ getCachedScribeUrlImpl: scribePresent,
40
+ })).toBe(true);
41
+ });
42
+
43
+ test("triggers case-insensitively (AUDIO/WAV)", () => {
44
+ expect(shouldAutoTranscribe("AUDIO/WAV", {
45
+ readGlobalConfigImpl: readGlobalConfig(true),
46
+ getCachedScribeUrlImpl: scribePresent,
47
+ })).toBe(true);
48
+ });
49
+
50
+ test("skips non-audio mime-types (image/png, application/pdf, video/mp4)", () => {
51
+ expect(shouldAutoTranscribe("image/png", {
52
+ readGlobalConfigImpl: readGlobalConfig(true),
53
+ getCachedScribeUrlImpl: scribePresent,
54
+ })).toBe(false);
55
+ expect(shouldAutoTranscribe("application/pdf", {
56
+ readGlobalConfigImpl: readGlobalConfig(true),
57
+ getCachedScribeUrlImpl: scribePresent,
58
+ })).toBe(false);
59
+ expect(shouldAutoTranscribe("video/mp4", {
60
+ readGlobalConfigImpl: readGlobalConfig(true),
61
+ getCachedScribeUrlImpl: scribePresent,
62
+ })).toBe(false);
63
+ });
64
+
65
+ test("skips when enabled is false (default off)", () => {
66
+ expect(shouldAutoTranscribe("audio/wav", {
67
+ readGlobalConfigImpl: readGlobalConfig(false),
68
+ getCachedScribeUrlImpl: scribePresent,
69
+ })).toBe(false);
70
+ });
71
+
72
+ test("skips when enabled is unset (no auto_transcribe block in config)", () => {
73
+ expect(shouldAutoTranscribe("audio/wav", {
74
+ readGlobalConfigImpl: readGlobalConfig(undefined),
75
+ getCachedScribeUrlImpl: scribePresent,
76
+ })).toBe(false);
77
+ });
78
+
79
+ test("skips when scribe URL is undefined (no services.json entry, no env)", () => {
80
+ expect(shouldAutoTranscribe("audio/wav", {
81
+ readGlobalConfigImpl: readGlobalConfig(true),
82
+ getCachedScribeUrlImpl: scribeAbsent,
83
+ })).toBe(false);
84
+ });
85
+
86
+ test("skips when scribe URL is empty string", () => {
87
+ expect(shouldAutoTranscribe("audio/wav", {
88
+ readGlobalConfigImpl: readGlobalConfig(true),
89
+ getCachedScribeUrlImpl: () => "",
90
+ })).toBe(false);
91
+ });
92
+
93
+ test("skips on garbage mime-type input", () => {
94
+ expect(shouldAutoTranscribe("", {
95
+ readGlobalConfigImpl: readGlobalConfig(true),
96
+ getCachedScribeUrlImpl: scribePresent,
97
+ })).toBe(false);
98
+ expect(shouldAutoTranscribe("not-a-mime", {
99
+ readGlobalConfigImpl: readGlobalConfig(true),
100
+ getCachedScribeUrlImpl: scribePresent,
101
+ })).toBe(false);
102
+ });
103
+
104
+ test("respects enabledOverride when present", () => {
105
+ expect(shouldAutoTranscribe("audio/wav", {
106
+ readGlobalConfigImpl: readGlobalConfig(false),
107
+ getCachedScribeUrlImpl: scribePresent,
108
+ enabledOverride: true,
109
+ })).toBe(true);
110
+ expect(shouldAutoTranscribe("audio/wav", {
111
+ readGlobalConfigImpl: readGlobalConfig(true),
112
+ getCachedScribeUrlImpl: scribePresent,
113
+ enabledOverride: false,
114
+ })).toBe(false);
115
+ });
116
+ });
@@ -0,0 +1,48 @@
1
+ /**
2
+ * Auto-transcribe trigger decision (vault#353, design 2026-05-21 Part 2).
3
+ *
4
+ * One pure function: given an attachment's mime-type + the operator's
5
+ * settings + whether scribe is reachable, decide whether to enqueue the
6
+ * attachment for the transcription worker. Lives in its own module so the
7
+ * attachment-write code path (`routes.ts`) and the retry endpoint share the
8
+ * same gate without duplicating logic.
9
+ */
10
+
11
+ import { readGlobalConfig } from "./config.ts";
12
+ import { getCachedScribeUrl } from "./scribe-discovery.ts";
13
+
14
+ /**
15
+ * Pre-vault#353 callers passed `transcribe: true` explicitly on the
16
+ * attachment POST. The auto-transcribe path inlines the decision: if the
17
+ * upload is an audio mime-type AND the toggle is on AND scribe is reachable,
18
+ * the worker is enqueued. This function is the single decision site.
19
+ *
20
+ * Returns `true` only when ALL three conditions hold:
21
+ * 1. mime-type starts with `audio/` (case-insensitive).
22
+ * 2. `globalConfig.auto_transcribe?.enabled === true`.
23
+ * 3. Scribe is discoverable (services.json entry OR SCRIBE_URL env).
24
+ *
25
+ * The three conditions are independent guards: a single `false` is sufficient
26
+ * to skip enqueuing. The audio stays as a regular attachment in that case.
27
+ */
28
+ export function shouldAutoTranscribe(
29
+ mimeType: string,
30
+ opts: {
31
+ /** Injection seam for tests — defaults to live globals. */
32
+ readGlobalConfigImpl?: typeof readGlobalConfig;
33
+ getCachedScribeUrlImpl?: () => string | undefined;
34
+ /** Allow per-call enabled override — used by the explicit-opt-in path. */
35
+ enabledOverride?: boolean;
36
+ } = {},
37
+ ): boolean {
38
+ if (typeof mimeType !== "string" || !mimeType.toLowerCase().startsWith("audio/")) {
39
+ return false;
40
+ }
41
+ const enabled = opts.enabledOverride
42
+ ?? (opts.readGlobalConfigImpl ?? readGlobalConfig)().auto_transcribe?.enabled
43
+ ?? false;
44
+ if (!enabled) return false;
45
+ const url = (opts.getCachedScribeUrlImpl ?? getCachedScribeUrl)();
46
+ if (!url || !url.trim()) return false;
47
+ return true;
48
+ }