@openparachute/vault 0.4.7-rc.2 → 0.4.8-rc.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. package/.parachute/module.json +1 -1
  2. package/README.md +78 -41
  3. package/core/src/connection-pragmas.test.ts +232 -0
  4. package/core/src/core.test.ts +257 -0
  5. package/core/src/cursor.test.ts +160 -0
  6. package/core/src/cursor.ts +272 -0
  7. package/core/src/mcp.ts +51 -7
  8. package/core/src/notes.ts +164 -2
  9. package/core/src/schema.ts +106 -5
  10. package/core/src/store.ts +11 -1
  11. package/core/src/types.ts +32 -0
  12. package/package.json +7 -3
  13. package/src/auth-status.ts +4 -0
  14. package/src/auth.test.ts +5 -112
  15. package/src/auto-transcribe.test.ts +116 -0
  16. package/src/auto-transcribe.ts +48 -0
  17. package/src/backup.ts +17 -3
  18. package/src/cli.ts +95 -66
  19. package/src/config.test.ts +26 -0
  20. package/src/config.ts +53 -1
  21. package/src/db.ts +15 -2
  22. package/src/export-watch.test.ts +21 -0
  23. package/src/mcp-install-interactive.test.ts +23 -2
  24. package/src/mcp-install-interactive.ts +21 -2
  25. package/src/mcp-install.test.ts +40 -0
  26. package/src/mcp-tools.ts +17 -1
  27. package/src/module-config.ts +70 -14
  28. package/src/module-manifest.test.ts +114 -0
  29. package/src/module-manifest.ts +104 -0
  30. package/src/oauth-discovery.ts +95 -0
  31. package/src/owner-auth.ts +22 -149
  32. package/src/routes.ts +268 -51
  33. package/src/routing.test.ts +102 -99
  34. package/src/routing.ts +33 -47
  35. package/src/scribe-discovery.test.ts +77 -0
  36. package/src/scribe-discovery.ts +91 -0
  37. package/src/scribe-env.test.ts +66 -1
  38. package/src/scribe-env.ts +42 -1
  39. package/src/self-register.test.ts +412 -0
  40. package/src/self-register.ts +247 -0
  41. package/src/server.ts +47 -23
  42. package/src/transcript-note.test.ts +171 -0
  43. package/src/transcript-note.ts +189 -0
  44. package/src/transcription-registry.ts +22 -0
  45. package/src/transcription-worker.test.ts +250 -0
  46. package/src/transcription-worker.ts +186 -27
  47. package/src/vault-name.ts +3 -2
  48. package/src/vault.test.ts +347 -0
  49. package/web/ui/dist/assets/index-BOa-JJtV.css +1 -0
  50. package/web/ui/dist/assets/index-BzA5LgE3.js +60 -0
  51. package/web/ui/dist/index.html +14 -0
  52. package/web/ui/tsconfig.json +21 -0
  53. package/src/oauth.test.ts +0 -2156
  54. package/src/oauth.ts +0 -973
@@ -133,6 +133,11 @@ CREATE TABLE IF NOT EXISTS tokens (
133
133
  );
134
134
 
135
135
  -- OAuth: registered clients (Dynamic Client Registration)
136
+ -- VESTIGIAL after vault 0.4.x workstream E (2026-05-25). The standalone
137
+ -- OAuth issuer that wrote these rows was retired (hub is the issuer now;
138
+ -- vault is resource-server-only). The tables are left in place so an
139
+ -- upgrade doesn't trip on a missing column for any operator who still
140
+ -- has rows mid-upgrade. A future migration will drop them.
136
141
  CREATE TABLE IF NOT EXISTS oauth_clients (
137
142
  client_id TEXT PRIMARY KEY,
138
143
  client_name TEXT,
@@ -141,9 +146,9 @@ CREATE TABLE IF NOT EXISTS oauth_clients (
141
146
  );
142
147
 
143
148
  -- OAuth: authorization codes (single-use, short-lived)
144
- -- vault_name pins the code to the vault it was issued for. handleToken
145
- -- must verify it matches the requested vault otherwise a code issued
146
- -- under /vaults/A/oauth/authorize could be redeemed at /vaults/B/oauth/token.
149
+ -- VESTIGIAL see oauth_clients above. The vault_name column survives
150
+ -- as a sentinel of the per-vault-pinning invariant that used to apply
151
+ -- when vault was the issuer.
147
152
  CREATE TABLE IF NOT EXISTS oauth_codes (
148
153
  code TEXT PRIMARY KEY,
149
154
  client_id TEXT NOT NULL,
@@ -200,12 +205,108 @@ CREATE INDEX IF NOT EXISTS idx_links_target ON links(target_id);
200
205
  -- because migrateToV16 also runs the unconditional CREATE INDEX path.
201
206
  `;
202
207
 
208
+ /**
209
+ * Connection-level pragmas applied on every Database open, in the order they
210
+ * appear here.
211
+ *
212
+ * `journal_mode = WAL` is a persistent, DB-level setting (lives in the SQLite
213
+ * header). Once any writer flips a DB into WAL it stays in WAL across opens
214
+ * and processes — so daemon + CLI + parachute-runner + any read-side tool
215
+ * see the same mode. Re-applying on every open is cheap and idempotent;
216
+ * SQLite returns the current mode either way.
217
+ *
218
+ * `synchronous = NORMAL` is the safe, recommended pairing with WAL per the
219
+ * SQLite docs: fsync only at checkpoint rather than on every commit. Crash
220
+ * safety is preserved (WAL frames are still ordered + checksummed); the only
221
+ * cost vs FULL is that an OS-level crash *between* checkpoints might lose
222
+ * the last transaction. Acceptable for a knowledge graph that's snapshotted
223
+ * by `VACUUM INTO` for backups.
224
+ *
225
+ * `wal_autocheckpoint = 1000` is SQLite's default; we set it explicitly so
226
+ * the contract is visible in code rather than implicit. 1000 pages ≈ 4MB
227
+ * before a passive checkpoint is triggered on the next write.
228
+ *
229
+ * `foreign_keys = ON` is per-connection (not persistent) — must be re-applied
230
+ * on every open. Migrations occasionally disable it transiently (see
231
+ * migrateToV14's BEGIN IMMEDIATE block); the boot path re-enables.
232
+ *
233
+ * WAL requires a filesystem that supports memory-mapped shared-memory
234
+ * (the `-shm` sidecar). NFS, some FUSE mounts, and a few Docker volume
235
+ * drivers don't qualify and silently fall back to the prior journal mode
236
+ * (typically `delete`). `applyConnectionPragmas` detects this and returns
237
+ * `wal: false` so the caller can log a warning — operators on those
238
+ * filesystems should know they've lost multi-process concurrency.
239
+ */
240
+ const APPLY_PRAGMAS_LOGGED = new WeakSet<Database>();
241
+
242
+ export interface ConnectionPragmaResult {
243
+ /** True when the connection ended up in WAL mode. False means the FS doesn't support WAL. */
244
+ wal: boolean;
245
+ /** The actual journal_mode SQLite reports — "wal", "delete", "memory", etc. */
246
+ journalMode: string;
247
+ }
248
+
249
+ /**
250
+ * Apply connection-level pragmas (journal mode, synchronous, FK enforcement)
251
+ * and verify WAL took effect. Idempotent — safe to call multiple times on
252
+ * the same connection. Logs a one-time warning per connection when WAL
253
+ * couldn't be applied.
254
+ *
255
+ * Exported for read-side callers (auth-status, mirror-manager, etc.) that
256
+ * open a Database directly without going through initSchema. Setting
257
+ * `journal_mode` on a read-only handle is a no-op but harmless; the
258
+ * useful state is set by whichever writer opens first.
259
+ */
260
+ export function applyConnectionPragmas(db: Database): ConnectionPragmaResult {
261
+ // PRAGMA journal_mode returns a row { journal_mode: "wal" } on success.
262
+ // Use `.get()` (not `.exec()`) so we capture the result. Some bun:sqlite
263
+ // versions throw on readonly handles attempting to set journal_mode; treat
264
+ // that as "we couldn't set it, just read the current value" and recover.
265
+ let journalMode: string;
266
+ try {
267
+ const row = db.prepare("PRAGMA journal_mode = WAL").get() as { journal_mode?: string } | null;
268
+ journalMode = (row?.journal_mode ?? "").toLowerCase();
269
+ } catch {
270
+ // Most likely: readonly handle. Read-only opens never write the DB
271
+ // header, so they can't change journal_mode — but they can still query
272
+ // the current mode, which is set by the most recent writer.
273
+ const row = db.prepare("PRAGMA journal_mode").get() as { journal_mode?: string } | null;
274
+ journalMode = (row?.journal_mode ?? "").toLowerCase();
275
+ }
276
+ const wal = journalMode === "wal";
277
+
278
+ // synchronous + wal_autocheckpoint only matter when WAL is active. They're
279
+ // harmless under DELETE mode but the rationale is WAL-specific, so gate
280
+ // them on the success path. Both are best-effort — wrap in try to keep
281
+ // readonly handles (which reject writes) from failing the whole open.
282
+ if (wal) {
283
+ try { db.exec("PRAGMA synchronous = NORMAL"); } catch {}
284
+ try { db.exec("PRAGMA wal_autocheckpoint = 1000"); } catch {}
285
+ } else if (journalMode !== "memory" && !APPLY_PRAGMAS_LOGGED.has(db)) {
286
+ // `journalMode === "memory"` ⇒ this is a `:memory:` database, an
287
+ // explicit choice (tests, ephemeral probes) rather than a filesystem
288
+ // limitation. Suppress the warning so the test suite stays quiet;
289
+ // real on-disk vaults that can't host WAL (NFS, some FUSE/Docker
290
+ // volume drivers) still surface the diagnostic.
291
+ APPLY_PRAGMAS_LOGGED.add(db);
292
+ // eslint-disable-next-line no-console
293
+ console.warn(
294
+ `[vault] WAL mode could not be enabled (journal_mode=${journalMode || "unknown"}). ` +
295
+ `The underlying filesystem may not support WAL (NFS, some FUSE/Docker volume drivers). ` +
296
+ `Multi-process concurrent access will be limited to a single writer at a time.`,
297
+ );
298
+ }
299
+
300
+ try { db.exec("PRAGMA foreign_keys = ON"); } catch {}
301
+
302
+ return { wal, journalMode };
303
+ }
304
+
203
305
  /**
204
306
  * Initialize database schema. Idempotent — safe to call on every startup.
205
307
  */
206
308
  export function initSchema(db: Database): void {
207
- db.exec("PRAGMA journal_mode = WAL");
208
- db.exec("PRAGMA foreign_keys = ON");
309
+ applyConnectionPragmas(db);
209
310
 
210
311
  // Check if we need to migrate from v2
211
312
  const hasOldTables = hasTable(db, "things");
package/core/src/store.ts CHANGED
@@ -1,5 +1,5 @@
1
1
  import { Database } from "bun:sqlite";
2
- import type { Store, Note, Link, Attachment, QueryOpts } from "./types.js";
2
+ import type { Store, Note, Link, Attachment, QueryOpts, QueryNotesPage } from "./types.js";
3
3
  import { initSchema } from "./schema.js";
4
4
  import * as noteOps from "./notes.js";
5
5
  import * as linkOps from "./links.js";
@@ -227,6 +227,16 @@ export class BunSqliteStore implements Store {
227
227
  return noteOps.queryNotes(this.db, this.expandQueryTags(opts));
228
228
  }
229
229
 
230
+ async queryNotesPaged(opts: QueryOpts): Promise<QueryNotesPage> {
231
+ // Hierarchy expansion happens internally — but importantly the cursor's
232
+ // query_hash is computed from the CALLER'S opts (pre-expansion), so a
233
+ // tag hierarchy edit between calls invalidates the cursor (different
234
+ // descendant set → different rows match → caller should restart). The
235
+ // alternative — hash the expanded set — would silently keep returning
236
+ // stale results from a hierarchy snapshot the caller never saw.
237
+ return noteOps.queryNotesPaged(this.db, this.expandQueryTags(opts));
238
+ }
239
+
230
240
  /**
231
241
  * If `tags` are present, attach a parallel `_tagsExpanded` array where
232
242
  * each input tag is replaced with `{tag} ∪ descendants(tag)`. The SQL
package/core/src/types.ts CHANGED
@@ -116,6 +116,30 @@ export interface QueryOpts {
116
116
  orderBy?: string;
117
117
  limit?: number;
118
118
  offset?: number;
119
+ /**
120
+ * Opaque cursor for "since last checked" agent loops (vault#313).
121
+ * When passed, the engine decodes it, verifies its `query_hash` matches
122
+ * the current query (mismatch → CursorError `cursor_query_mismatch`),
123
+ * and adds a keyset predicate that returns only rows newer than the
124
+ * cursor's `updated_at`/`id` watermark. Forces `orderBy = updated_at`
125
+ * (with `id` as a stable tiebreaker) so the watermark math is sound.
126
+ *
127
+ * Cursors are minted by `queryNotesPaged` (engine) and surfaced via
128
+ * the `query-notes` MCP tool's `next_cursor` field; callers should
129
+ * treat the string as opaque.
130
+ */
131
+ cursor?: string;
132
+ }
133
+
134
+ /**
135
+ * Cursor-paginated query result (vault#313). Returned by
136
+ * `queryNotesPaged`/`storeQueryNotesPaged`. `next_cursor` always advances —
137
+ * even on an empty result page — so an agent loop can persist a single
138
+ * watermark and keep polling.
139
+ */
140
+ export interface QueryNotesPage {
141
+ notes: Note[];
142
+ next_cursor: string;
119
143
  }
120
144
 
121
145
  /** Note summary — everything except content. Used in link results. */
@@ -184,6 +208,14 @@ export interface Store {
184
208
  syncAllWikilinks(): Promise<{ synced: number; totalAdded: number; totalRemoved: number }>;
185
209
  deleteNote(id: string): Promise<void>;
186
210
  queryNotes(opts: QueryOpts): Promise<Note[]>;
211
+ /**
212
+ * Cursor-paginated `queryNotes` (vault#313). Returns the same notes plus
213
+ * an opaque `next_cursor` string the caller can pass on the next call
214
+ * to resume from the watermark of the LAST returned row. The cursor is
215
+ * always present in the response — even on an empty page — so an
216
+ * agent loop can persist a single watermark and keep polling.
217
+ */
218
+ queryNotesPaged(opts: QueryOpts): Promise<QueryNotesPage>;
187
219
  searchNotes(query: string, opts?: { tags?: string[]; limit?: number }): Promise<Note[]>;
188
220
 
189
221
  // Tags
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@openparachute/vault",
3
- "version": "0.4.7-rc.2",
3
+ "version": "0.4.8-rc.10",
4
4
  "description": "Agent-native knowledge graph. Notes, tags, links over MCP.",
5
5
  "module": "src/cli.ts",
6
6
  "type": "module",
@@ -12,14 +12,18 @@
12
12
  "core/src",
13
13
  "core/package.json",
14
14
  ".parachute",
15
- "tsconfig.json"
15
+ "tsconfig.json",
16
+ "web/ui/dist"
16
17
  ],
17
18
  "scripts": {
18
19
  "start": "bun src/server.ts",
19
20
  "cli": "bun src/cli.ts",
20
21
  "test": "bun test ./src/",
21
22
  "test:core": "cd core && node --experimental-vm-modules node_modules/vitest/dist/cli.js run",
22
- "typecheck": "tsc --noEmit"
23
+ "typecheck": "tsc --noEmit",
24
+ "build:spa": "cd web/ui && bun install --frozen-lockfile && bun run build",
25
+ "postinstall": "if [ -d web/ui ]; then bun run build:spa; fi",
26
+ "prepack": "bun run build:spa"
23
27
  },
24
28
  "dependencies": {
25
29
  "@modelcontextprotocol/sdk": "^1.12.1",
@@ -40,6 +40,10 @@ export interface AuthStatusResponse {
40
40
  * caller's signal to degrade `hasTokens` to `null`.
41
41
  */
42
42
  function vaultHasTokens(dbPath: string): boolean {
43
+ // Readonly handle — no pragma application here. Journal mode is a
44
+ // persistent DB-header setting written by the first writer (the daemon's
45
+ // BunSqliteStore via openVaultDb), so this probe sees WAL automatically
46
+ // and is safe under concurrent writes.
43
47
  const db = new Database(dbPath, { readonly: true });
44
48
  try {
45
49
  const row = db.prepare("SELECT 1 FROM tokens LIMIT 1").get();
package/src/auth.test.ts CHANGED
@@ -25,8 +25,6 @@ import {
25
25
  import { getVaultStore, clearVaultStoreCache } from "./vault-store.ts";
26
26
  import { generateToken, createToken } from "./token-store.ts";
27
27
  import { authenticateVaultRequest, authenticateGlobalRequest } from "./auth.ts";
28
- import { handleRegister, handleAuthorizePost, handleToken } from "./oauth.ts";
29
- import crypto from "node:crypto";
30
28
 
31
29
  let tmpHome: string;
32
30
  let prevHome: string | undefined;
@@ -235,116 +233,11 @@ describe("auth — cross-vault isolation", () => {
235
233
  });
236
234
  });
237
235
 
238
- // ---------------------------------------------------------------------------
239
- // End-to-end: OAuth flow resulting token authenticates against its vault
240
- // ---------------------------------------------------------------------------
241
-
242
- describe("OAuth-minted tokens per-vault coherence", () => {
243
- // These tests drive the OAuth handlers directly (no HTTP), then take the
244
- // resulting access_token and verify it resolves at endpoints addressing
245
- // its issuing vault — and only its issuing vault.
246
-
247
- async function runOAuthFlow(vaultName: string): Promise<string> {
248
- const store = getVaultStore(vaultName);
249
- const db = store.db;
250
-
251
- // Seed an owner token so consent passes in legacy-token mode.
252
- const { fullToken: ownerToken } = generateToken();
253
- createToken(db, ownerToken, { label: "owner", permission: "full" });
254
-
255
- // 1. Register client
256
- const regRes = await handleRegister(
257
- new Request(`https://vault.test/vault/${vaultName}/oauth/register`, {
258
- method: "POST",
259
- headers: { "Content-Type": "application/json" },
260
- body: JSON.stringify({
261
- client_name: "Daily",
262
- redirect_uris: ["parachute://oauth/callback"],
263
- }),
264
- }),
265
- db,
266
- );
267
- const { client_id } = (await regRes.json()) as { client_id: string };
268
-
269
- // 2. PKCE + authorize
270
- const codeVerifier = crypto.randomBytes(32).toString("base64url");
271
- const codeChallenge = crypto.createHash("sha256").update(codeVerifier).digest("base64url");
272
- const authRes = await handleAuthorizePost(
273
- new Request(`https://vault.test/vault/${vaultName}/oauth/authorize`, {
274
- method: "POST",
275
- body: new URLSearchParams({
276
- action: "authorize",
277
- client_id,
278
- redirect_uri: "parachute://oauth/callback",
279
- code_challenge: codeChallenge,
280
- code_challenge_method: "S256",
281
- scope: "full",
282
- owner_token: ownerToken,
283
- }),
284
- }),
285
- db,
286
- { vaultName },
287
- );
288
- const code = new URL(authRes.headers.get("location")!).searchParams.get("code")!;
289
-
290
- // 3. Token exchange
291
- const tokRes = await handleToken(
292
- new Request(`https://vault.test/vault/${vaultName}/oauth/token`, {
293
- method: "POST",
294
- headers: { "Content-Type": "application/x-www-form-urlencoded" },
295
- body: new URLSearchParams({
296
- grant_type: "authorization_code",
297
- code,
298
- code_verifier: codeVerifier,
299
- client_id,
300
- redirect_uri: "parachute://oauth/callback",
301
- }).toString(),
302
- }),
303
- db,
304
- vaultName,
305
- );
306
- const tokBody = (await tokRes.json()) as { access_token: string; vault: string };
307
- expect(tokBody.vault).toBe(vaultName);
308
- return tokBody.access_token;
309
- }
310
-
311
- test("OAuth-minted token works at /vault/<name>/api/* and /vault/<name>/mcp", async () => {
312
- seedVault("journal", { isDefault: true });
313
- const token = await runOAuthFlow("journal");
314
- const cfg = readVaultConfig("journal")!;
315
- const store = getVaultStore("journal");
316
-
317
- // /vault/journal/api/* and /vault/journal/mcp both reach this auth call.
318
- const vaultAuth = await authenticateVaultRequest(bearer(token), cfg, store.db);
319
- expect("error" in vaultAuth).toBe(false);
320
-
321
- // /vaults (authenticated listing) uses authenticateGlobalRequest.
322
- const global = await authenticateGlobalRequest(bearer(token));
323
- expect("error" in global).toBe(false);
324
- });
325
-
326
- test("named-vault OAuth: token works for its vault, rejected by others", async () => {
327
- seedVault("journal", { isDefault: true });
328
- seedVault("work");
329
- const token = await runOAuthFlow("work");
330
- const workCfg = readVaultConfig("work")!;
331
- const workStore = getVaultStore("work");
332
-
333
- // Valid at work's own endpoints.
334
- const scoped = await authenticateVaultRequest(bearer(token), workCfg, workStore.db);
335
- expect("error" in scoped).toBe(false);
336
-
337
- // Global auth finds the token in work's DB.
338
- const global = await authenticateGlobalRequest(bearer(token));
339
- expect("error" in global).toBe(false);
340
-
341
- // Isolation: the token is NOT usable against the journal vault.
342
- const journalCfg = readVaultConfig("journal")!;
343
- const journalStore = getVaultStore("journal");
344
- const crossCheck = await authenticateVaultRequest(bearer(token), journalCfg, journalStore.db);
345
- expect("error" in crossCheck).toBe(true);
346
- });
347
- });
236
+ // The "End-to-end OAuth flow" suite was retired alongside the standalone
237
+ // OAuth issuer in workstream E (vault#366). Per-vault token coherence is
238
+ // still pinned by the v16 binding tests above and by `tokens-routes.test.ts`
239
+ // (mint-via-CLI → present at /vault/<name>/* surfaces); the OAuth handshake
240
+ // itself has moved entirely to the hub.
348
241
 
349
242
  // ---------------------------------------------------------------------------
350
243
  // Legacy YAML global keys — scope must round-trip through the parser
@@ -0,0 +1,116 @@
1
+ /**
2
+ * Auto-transcribe gating decisions (vault#353).
3
+ *
4
+ * Three independent guards: mime-type prefix, enabled toggle, scribe URL
5
+ * present. Pure function — exercise the truth table.
6
+ */
7
+
8
+ import { describe, test, expect } from "bun:test";
9
+ import { shouldAutoTranscribe } from "./auto-transcribe.ts";
10
+
11
+ function readGlobalConfig(enabled: boolean | undefined) {
12
+ return () => ({
13
+ port: 1940,
14
+ ...(enabled !== undefined ? { auto_transcribe: { enabled } } : {}),
15
+ }) as any;
16
+ }
17
+
18
+ describe("shouldAutoTranscribe", () => {
19
+ const scribePresent = () => "http://127.0.0.1:1943";
20
+ const scribeAbsent = () => undefined;
21
+
22
+ test("triggers on audio/* mime-type when enabled + scribe reachable", () => {
23
+ expect(shouldAutoTranscribe("audio/wav", {
24
+ readGlobalConfigImpl: readGlobalConfig(true),
25
+ getCachedScribeUrlImpl: scribePresent,
26
+ })).toBe(true);
27
+ });
28
+
29
+ test("triggers on audio/mp4 (m4a)", () => {
30
+ expect(shouldAutoTranscribe("audio/mp4", {
31
+ readGlobalConfigImpl: readGlobalConfig(true),
32
+ getCachedScribeUrlImpl: scribePresent,
33
+ })).toBe(true);
34
+ });
35
+
36
+ test("triggers on audio/webm", () => {
37
+ expect(shouldAutoTranscribe("audio/webm", {
38
+ readGlobalConfigImpl: readGlobalConfig(true),
39
+ getCachedScribeUrlImpl: scribePresent,
40
+ })).toBe(true);
41
+ });
42
+
43
+ test("triggers case-insensitively (AUDIO/WAV)", () => {
44
+ expect(shouldAutoTranscribe("AUDIO/WAV", {
45
+ readGlobalConfigImpl: readGlobalConfig(true),
46
+ getCachedScribeUrlImpl: scribePresent,
47
+ })).toBe(true);
48
+ });
49
+
50
+ test("skips non-audio mime-types (image/png, application/pdf, video/mp4)", () => {
51
+ expect(shouldAutoTranscribe("image/png", {
52
+ readGlobalConfigImpl: readGlobalConfig(true),
53
+ getCachedScribeUrlImpl: scribePresent,
54
+ })).toBe(false);
55
+ expect(shouldAutoTranscribe("application/pdf", {
56
+ readGlobalConfigImpl: readGlobalConfig(true),
57
+ getCachedScribeUrlImpl: scribePresent,
58
+ })).toBe(false);
59
+ expect(shouldAutoTranscribe("video/mp4", {
60
+ readGlobalConfigImpl: readGlobalConfig(true),
61
+ getCachedScribeUrlImpl: scribePresent,
62
+ })).toBe(false);
63
+ });
64
+
65
+ test("skips when enabled is false (default off)", () => {
66
+ expect(shouldAutoTranscribe("audio/wav", {
67
+ readGlobalConfigImpl: readGlobalConfig(false),
68
+ getCachedScribeUrlImpl: scribePresent,
69
+ })).toBe(false);
70
+ });
71
+
72
+ test("skips when enabled is unset (no auto_transcribe block in config)", () => {
73
+ expect(shouldAutoTranscribe("audio/wav", {
74
+ readGlobalConfigImpl: readGlobalConfig(undefined),
75
+ getCachedScribeUrlImpl: scribePresent,
76
+ })).toBe(false);
77
+ });
78
+
79
+ test("skips when scribe URL is undefined (no services.json entry, no env)", () => {
80
+ expect(shouldAutoTranscribe("audio/wav", {
81
+ readGlobalConfigImpl: readGlobalConfig(true),
82
+ getCachedScribeUrlImpl: scribeAbsent,
83
+ })).toBe(false);
84
+ });
85
+
86
+ test("skips when scribe URL is empty string", () => {
87
+ expect(shouldAutoTranscribe("audio/wav", {
88
+ readGlobalConfigImpl: readGlobalConfig(true),
89
+ getCachedScribeUrlImpl: () => "",
90
+ })).toBe(false);
91
+ });
92
+
93
+ test("skips on garbage mime-type input", () => {
94
+ expect(shouldAutoTranscribe("", {
95
+ readGlobalConfigImpl: readGlobalConfig(true),
96
+ getCachedScribeUrlImpl: scribePresent,
97
+ })).toBe(false);
98
+ expect(shouldAutoTranscribe("not-a-mime", {
99
+ readGlobalConfigImpl: readGlobalConfig(true),
100
+ getCachedScribeUrlImpl: scribePresent,
101
+ })).toBe(false);
102
+ });
103
+
104
+ test("respects enabledOverride when present", () => {
105
+ expect(shouldAutoTranscribe("audio/wav", {
106
+ readGlobalConfigImpl: readGlobalConfig(false),
107
+ getCachedScribeUrlImpl: scribePresent,
108
+ enabledOverride: true,
109
+ })).toBe(true);
110
+ expect(shouldAutoTranscribe("audio/wav", {
111
+ readGlobalConfigImpl: readGlobalConfig(true),
112
+ getCachedScribeUrlImpl: scribePresent,
113
+ enabledOverride: false,
114
+ })).toBe(false);
115
+ });
116
+ });
@@ -0,0 +1,48 @@
1
+ /**
2
+ * Auto-transcribe trigger decision (vault#353, design 2026-05-21 Part 2).
3
+ *
4
+ * One pure function: given an attachment's mime-type + the operator's
5
+ * settings + whether scribe is reachable, decide whether to enqueue the
6
+ * attachment for the transcription worker. Lives in its own module so the
7
+ * attachment-write code path (`routes.ts`) and the retry endpoint share the
8
+ * same gate without duplicating logic.
9
+ */
10
+
11
+ import { readGlobalConfig } from "./config.ts";
12
+ import { getCachedScribeUrl } from "./scribe-discovery.ts";
13
+
14
+ /**
15
+ * Pre-vault#353 callers passed `transcribe: true` explicitly on the
16
+ * attachment POST. The auto-transcribe path inlines the decision: if the
17
+ * upload is an audio mime-type AND the toggle is on AND scribe is reachable,
18
+ * the worker is enqueued. This function is the single decision site.
19
+ *
20
+ * Returns `true` only when ALL three conditions hold:
21
+ * 1. mime-type starts with `audio/` (case-insensitive).
22
+ * 2. `globalConfig.auto_transcribe?.enabled === true`.
23
+ * 3. Scribe is discoverable (services.json entry OR SCRIBE_URL env).
24
+ *
25
+ * The three conditions are independent guards: a single `false` is sufficient
26
+ * to skip enqueuing. The audio stays as a regular attachment in that case.
27
+ */
28
+ export function shouldAutoTranscribe(
29
+ mimeType: string,
30
+ opts: {
31
+ /** Injection seam for tests — defaults to live globals. */
32
+ readGlobalConfigImpl?: typeof readGlobalConfig;
33
+ getCachedScribeUrlImpl?: () => string | undefined;
34
+ /** Allow per-call enabled override — used by the explicit-opt-in path. */
35
+ enabledOverride?: boolean;
36
+ } = {},
37
+ ): boolean {
38
+ if (typeof mimeType !== "string" || !mimeType.toLowerCase().startsWith("audio/")) {
39
+ return false;
40
+ }
41
+ const enabled = opts.enabledOverride
42
+ ?? (opts.readGlobalConfigImpl ?? readGlobalConfig)().auto_transcribe?.enabled
43
+ ?? false;
44
+ if (!enabled) return false;
45
+ const url = (opts.getCachedScribeUrlImpl ?? getCachedScribeUrl)();
46
+ if (!url || !url.trim()) return false;
47
+ return true;
48
+ }
package/src/backup.ts CHANGED
@@ -572,9 +572,21 @@ export async function runBackup(opts?: {
572
572
  vaultsDir: opts?.vaultsDir,
573
573
  });
574
574
 
575
+ // Write the tarball to a SIBLING tempdir, not inside stagingDir.
576
+ //
577
+ // Why: `assembleTarball` runs `tar -czf <out> -C <stagingDir> <entries>`
578
+ // where `entries = readdirSync(stagingDir)`. If the output path lives
579
+ // inside stagingDir (e.g. `stagingDir/__out__/...`), that subdir shows
580
+ // up in `entries` and tar enumerates it while ALSO writing to it.
581
+ // GNU tar (Linux) treats "file changed as we read it" as fatal and
582
+ // aborts; BSD tar (macOS) tolerates it. The sibling-tempdir layout
583
+ // keeps the output completely out of tar's input set on both platforms.
584
+ // See vault#363.
585
+ const outDir = mkdtempSync(join(tmpdir(), "parachute-backup-out-"));
586
+
575
587
  try {
576
588
  const tarName = backupFilename(timestamp);
577
- const tarballPath = join(stagingDir, "__out__", tarName);
589
+ const tarballPath = join(outDir, tarName);
578
590
  await assembleTarball(stagingDir, tarballPath);
579
591
  const bytes = statSync(tarballPath).size;
580
592
 
@@ -594,9 +606,11 @@ export async function runBackup(opts?: {
594
606
 
595
607
  return { tarballPath, timestamp, bytes, destinations: results, contents };
596
608
  } finally {
597
- // The staging dir has the only copy of the tarball that isn't at a
598
- // destination; destinations have already been written. Safe to clean.
609
+ // The staging dir + out dir have the only copies of the tarball that
610
+ // aren't at a destination; destinations have already been written. Safe
611
+ // to clean both.
599
612
  try { rmSync(stagingDir, { recursive: true, force: true }); } catch {}
613
+ try { rmSync(outDir, { recursive: true, force: true }); } catch {}
600
614
  }
601
615
  }
602
616