npm - @openparachute/vault - Versions diffs - 0.4.7-rc.2 → 0.4.8-rc.10 - Mend

@openparachute/vault 0.4.7-rc.2 → 0.4.8-rc.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (54) hide show

package/.parachute/module.json +1 -1
package/README.md +78 -41
package/core/src/connection-pragmas.test.ts +232 -0
package/core/src/core.test.ts +257 -0
package/core/src/cursor.test.ts +160 -0
package/core/src/cursor.ts +272 -0
package/core/src/mcp.ts +51 -7
package/core/src/notes.ts +164 -2
package/core/src/schema.ts +106 -5
package/core/src/store.ts +11 -1
package/core/src/types.ts +32 -0
package/package.json +7 -3
package/src/auth-status.ts +4 -0
package/src/auth.test.ts +5 -112
package/src/auto-transcribe.test.ts +116 -0
package/src/auto-transcribe.ts +48 -0
package/src/backup.ts +17 -3
package/src/cli.ts +95 -66
package/src/config.test.ts +26 -0
package/src/config.ts +53 -1
package/src/db.ts +15 -2
package/src/export-watch.test.ts +21 -0
package/src/mcp-install-interactive.test.ts +23 -2
package/src/mcp-install-interactive.ts +21 -2
package/src/mcp-install.test.ts +40 -0
package/src/mcp-tools.ts +17 -1
package/src/module-config.ts +70 -14
package/src/module-manifest.test.ts +114 -0
package/src/module-manifest.ts +104 -0
package/src/oauth-discovery.ts +95 -0
package/src/owner-auth.ts +22 -149
package/src/routes.ts +268 -51
package/src/routing.test.ts +102 -99
package/src/routing.ts +33 -47
package/src/scribe-discovery.test.ts +77 -0
package/src/scribe-discovery.ts +91 -0
package/src/scribe-env.test.ts +66 -1
package/src/scribe-env.ts +42 -1
package/src/self-register.test.ts +412 -0
package/src/self-register.ts +247 -0
package/src/server.ts +47 -23
package/src/transcript-note.test.ts +171 -0
package/src/transcript-note.ts +189 -0
package/src/transcription-registry.ts +22 -0
package/src/transcription-worker.test.ts +250 -0
package/src/transcription-worker.ts +186 -27
package/src/vault-name.ts +3 -2
package/src/vault.test.ts +347 -0
package/web/ui/dist/assets/index-BOa-JJtV.css +1 -0
package/web/ui/dist/assets/index-BzA5LgE3.js +60 -0
package/web/ui/dist/index.html +14 -0
package/web/ui/tsconfig.json +21 -0
package/src/oauth.test.ts +0 -2156
package/src/oauth.ts +0 -973

package/core/src/schema.ts CHANGED Viewed

@@ -133,6 +133,11 @@ CREATE TABLE IF NOT EXISTS tokens (
 );
 -- OAuth: registered clients (Dynamic Client Registration)
+-- VESTIGIAL after vault 0.4.x workstream E (2026-05-25). The standalone
+-- OAuth issuer that wrote these rows was retired (hub is the issuer now;
+-- vault is resource-server-only). The tables are left in place so an
+-- upgrade doesn't trip on a missing column for any operator who still
+-- has rows mid-upgrade. A future migration will drop them.
 CREATE TABLE IF NOT EXISTS oauth_clients (
   client_id TEXT PRIMARY KEY,
   client_name TEXT,
@@ -141,9 +146,9 @@ CREATE TABLE IF NOT EXISTS oauth_clients (
 );
 -- OAuth: authorization codes (single-use, short-lived)
--- vault_name pins the code to the vault it was issued for. handleToken
--- must verify it matches the requested vault — otherwise a code issued
--- under /vaults/A/oauth/authorize could be redeemed at /vaults/B/oauth/token.
+-- VESTIGIAL — see oauth_clients above. The vault_name column survives
+-- as a sentinel of the per-vault-pinning invariant that used to apply
+-- when vault was the issuer.
 CREATE TABLE IF NOT EXISTS oauth_codes (
   code TEXT PRIMARY KEY,
   client_id TEXT NOT NULL,
@@ -200,12 +205,108 @@ CREATE INDEX IF NOT EXISTS idx_links_target ON links(target_id);
 -- because migrateToV16 also runs the unconditional CREATE INDEX path.
 `;
+/**
+ * Connection-level pragmas applied on every Database open, in the order they
+ * appear here.
+ *
+ * `journal_mode = WAL` is a persistent, DB-level setting (lives in the SQLite
+ * header). Once any writer flips a DB into WAL it stays in WAL across opens
+ * and processes — so daemon + CLI + parachute-runner + any read-side tool
+ * see the same mode. Re-applying on every open is cheap and idempotent;
+ * SQLite returns the current mode either way.
+ *
+ * `synchronous = NORMAL` is the safe, recommended pairing with WAL per the
+ * SQLite docs: fsync only at checkpoint rather than on every commit. Crash
+ * safety is preserved (WAL frames are still ordered + checksummed); the only
+ * cost vs FULL is that an OS-level crash *between* checkpoints might lose
+ * the last transaction. Acceptable for a knowledge graph that's snapshotted
+ * by `VACUUM INTO` for backups.
+ *
+ * `wal_autocheckpoint = 1000` is SQLite's default; we set it explicitly so
+ * the contract is visible in code rather than implicit. 1000 pages ≈ 4MB
+ * before a passive checkpoint is triggered on the next write.
+ *
+ * `foreign_keys = ON` is per-connection (not persistent) — must be re-applied
+ * on every open. Migrations occasionally disable it transiently (see
+ * migrateToV14's BEGIN IMMEDIATE block); the boot path re-enables.
+ *
+ * WAL requires a filesystem that supports memory-mapped shared-memory
+ * (the `-shm` sidecar). NFS, some FUSE mounts, and a few Docker volume
+ * drivers don't qualify and silently fall back to the prior journal mode
+ * (typically `delete`). `applyConnectionPragmas` detects this and returns
+ * `wal: false` so the caller can log a warning — operators on those
+ * filesystems should know they've lost multi-process concurrency.
+ */
+const APPLY_PRAGMAS_LOGGED = new WeakSet<Database>();
+export interface ConnectionPragmaResult {
+  /** True when the connection ended up in WAL mode. False means the FS doesn't support WAL. */
+  wal: boolean;
+  /** The actual journal_mode SQLite reports — "wal", "delete", "memory", etc. */
+  journalMode: string;
+}
+/**
+ * Apply connection-level pragmas (journal mode, synchronous, FK enforcement)
+ * and verify WAL took effect. Idempotent — safe to call multiple times on
+ * the same connection. Logs a one-time warning per connection when WAL
+ * couldn't be applied.
+ *
+ * Exported for read-side callers (auth-status, mirror-manager, etc.) that
+ * open a Database directly without going through initSchema. Setting
+ * `journal_mode` on a read-only handle is a no-op but harmless; the
+ * useful state is set by whichever writer opens first.
+ */
+export function applyConnectionPragmas(db: Database): ConnectionPragmaResult {
+  // PRAGMA journal_mode returns a row { journal_mode: "wal" } on success.
+  // Use `.get()` (not `.exec()`) so we capture the result. Some bun:sqlite
+  // versions throw on readonly handles attempting to set journal_mode; treat
+  // that as "we couldn't set it, just read the current value" and recover.
+  let journalMode: string;
+  try {
+    const row = db.prepare("PRAGMA journal_mode = WAL").get() as { journal_mode?: string } | null;
+    journalMode = (row?.journal_mode ?? "").toLowerCase();
+  } catch {
+    // Most likely: readonly handle. Read-only opens never write the DB
+    // header, so they can't change journal_mode — but they can still query
+    // the current mode, which is set by the most recent writer.
+    const row = db.prepare("PRAGMA journal_mode").get() as { journal_mode?: string } | null;
+    journalMode = (row?.journal_mode ?? "").toLowerCase();
+  }
+  const wal = journalMode === "wal";
+  // synchronous + wal_autocheckpoint only matter when WAL is active. They're
+  // harmless under DELETE mode but the rationale is WAL-specific, so gate
+  // them on the success path. Both are best-effort — wrap in try to keep
+  // readonly handles (which reject writes) from failing the whole open.
+  if (wal) {
+    try { db.exec("PRAGMA synchronous = NORMAL"); } catch {}
+    try { db.exec("PRAGMA wal_autocheckpoint = 1000"); } catch {}
+  } else if (journalMode !== "memory" && !APPLY_PRAGMAS_LOGGED.has(db)) {
+    // `journalMode === "memory"` ⇒ this is a `:memory:` database, an
+    // explicit choice (tests, ephemeral probes) rather than a filesystem
+    // limitation. Suppress the warning so the test suite stays quiet;
+    // real on-disk vaults that can't host WAL (NFS, some FUSE/Docker
+    // volume drivers) still surface the diagnostic.
+    APPLY_PRAGMAS_LOGGED.add(db);
+    // eslint-disable-next-line no-console
+    console.warn(
+      `[vault] WAL mode could not be enabled (journal_mode=${journalMode || "unknown"}). ` +
+      `The underlying filesystem may not support WAL (NFS, some FUSE/Docker volume drivers). ` +
+      `Multi-process concurrent access will be limited to a single writer at a time.`,
+    );
+  }
+  try { db.exec("PRAGMA foreign_keys = ON"); } catch {}
+  return { wal, journalMode };
+}
 /**
  * Initialize database schema. Idempotent — safe to call on every startup.
  */
 export function initSchema(db: Database): void {
-  db.exec("PRAGMA journal_mode = WAL");
-  db.exec("PRAGMA foreign_keys = ON");
+  applyConnectionPragmas(db);
   // Check if we need to migrate from v2
   const hasOldTables = hasTable(db, "things");

package/core/src/store.ts CHANGED Viewed

@@ -1,5 +1,5 @@
 import { Database } from "bun:sqlite";
-import type { Store, Note, Link, Attachment, QueryOpts } from "./types.js";
+import type { Store, Note, Link, Attachment, QueryOpts, QueryNotesPage } from "./types.js";
 import { initSchema } from "./schema.js";
 import * as noteOps from "./notes.js";
 import * as linkOps from "./links.js";
@@ -227,6 +227,16 @@ export class BunSqliteStore implements Store {
     return noteOps.queryNotes(this.db, this.expandQueryTags(opts));
   }
+  async queryNotesPaged(opts: QueryOpts): Promise<QueryNotesPage> {
+    // Hierarchy expansion happens internally — but importantly the cursor's
+    // query_hash is computed from the CALLER'S opts (pre-expansion), so a
+    // tag hierarchy edit between calls invalidates the cursor (different
+    // descendant set → different rows match → caller should restart). The
+    // alternative — hash the expanded set — would silently keep returning
+    // stale results from a hierarchy snapshot the caller never saw.
+    return noteOps.queryNotesPaged(this.db, this.expandQueryTags(opts));
+  }
   /**
    * If `tags` are present, attach a parallel `_tagsExpanded` array where
    * each input tag is replaced with `{tag} ∪ descendants(tag)`. The SQL

package/core/src/types.ts CHANGED Viewed

@@ -116,6 +116,30 @@ export interface QueryOpts {
   orderBy?: string;
   limit?: number;
   offset?: number;
+  /**
+   * Opaque cursor for "since last checked" agent loops (vault#313).
+   * When passed, the engine decodes it, verifies its `query_hash` matches
+   * the current query (mismatch → CursorError `cursor_query_mismatch`),
+   * and adds a keyset predicate that returns only rows newer than the
+   * cursor's `updated_at`/`id` watermark. Forces `orderBy = updated_at`
+   * (with `id` as a stable tiebreaker) so the watermark math is sound.
+   *
+   * Cursors are minted by `queryNotesPaged` (engine) and surfaced via
+   * the `query-notes` MCP tool's `next_cursor` field; callers should
+   * treat the string as opaque.
+   */
+  cursor?: string;
+}
+/**
+ * Cursor-paginated query result (vault#313). Returned by
+ * `queryNotesPaged`/`storeQueryNotesPaged`. `next_cursor` always advances —
+ * even on an empty result page — so an agent loop can persist a single
+ * watermark and keep polling.
+ */
+export interface QueryNotesPage {
+  notes: Note[];
+  next_cursor: string;
 }
 /** Note summary — everything except content. Used in link results. */
@@ -184,6 +208,14 @@ export interface Store {
   syncAllWikilinks(): Promise<{ synced: number; totalAdded: number; totalRemoved: number }>;
   deleteNote(id: string): Promise<void>;
   queryNotes(opts: QueryOpts): Promise<Note[]>;
+  /**
+   * Cursor-paginated `queryNotes` (vault#313). Returns the same notes plus
+   * an opaque `next_cursor` string the caller can pass on the next call
+   * to resume from the watermark of the LAST returned row. The cursor is
+   * always present in the response — even on an empty page — so an
+   * agent loop can persist a single watermark and keep polling.
+   */
+  queryNotesPaged(opts: QueryOpts): Promise<QueryNotesPage>;
   searchNotes(query: string, opts?: { tags?: string[]; limit?: number }): Promise<Note[]>;
   // Tags

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@openparachute/vault",
-  "version": "0.4.7-rc.2",
+  "version": "0.4.8-rc.10",
   "description": "Agent-native knowledge graph. Notes, tags, links over MCP.",
   "module": "src/cli.ts",
   "type": "module",
@@ -12,14 +12,18 @@
     "core/src",
     "core/package.json",
     ".parachute",
-    "tsconfig.json"
+    "tsconfig.json",
+    "web/ui/dist"
   ],
   "scripts": {
     "start": "bun src/server.ts",
     "cli": "bun src/cli.ts",
     "test": "bun test ./src/",
     "test:core": "cd core && node --experimental-vm-modules node_modules/vitest/dist/cli.js run",
-    "typecheck": "tsc --noEmit"
+    "typecheck": "tsc --noEmit",
+    "build:spa": "cd web/ui && bun install --frozen-lockfile && bun run build",
+    "postinstall": "if [ -d web/ui ]; then bun run build:spa; fi",
+    "prepack": "bun run build:spa"
   },
   "dependencies": {
     "@modelcontextprotocol/sdk": "^1.12.1",

package/src/auth-status.ts CHANGED Viewed

@@ -40,6 +40,10 @@ export interface AuthStatusResponse {
  * caller's signal to degrade `hasTokens` to `null`.
  */
 function vaultHasTokens(dbPath: string): boolean {
+  // Readonly handle — no pragma application here. Journal mode is a
+  // persistent DB-header setting written by the first writer (the daemon's
+  // BunSqliteStore via openVaultDb), so this probe sees WAL automatically
+  // and is safe under concurrent writes.
   const db = new Database(dbPath, { readonly: true });
   try {
     const row = db.prepare("SELECT 1 FROM tokens LIMIT 1").get();

package/src/auth.test.ts CHANGED Viewed

@@ -25,8 +25,6 @@ import {
 import { getVaultStore, clearVaultStoreCache } from "./vault-store.ts";
 import { generateToken, createToken } from "./token-store.ts";
 import { authenticateVaultRequest, authenticateGlobalRequest } from "./auth.ts";
-import { handleRegister, handleAuthorizePost, handleToken } from "./oauth.ts";
-import crypto from "node:crypto";
 let tmpHome: string;
 let prevHome: string | undefined;
@@ -235,116 +233,11 @@ describe("auth — cross-vault isolation", () => {
   });
 });
-// ---------------------------------------------------------------------------
-// End-to-end: OAuth flow → resulting token authenticates against its vault
-// ---------------------------------------------------------------------------
-describe("OAuth-minted tokens — per-vault coherence", () => {
-  // These tests drive the OAuth handlers directly (no HTTP), then take the
-  // resulting access_token and verify it resolves at endpoints addressing
-  // its issuing vault — and only its issuing vault.
-  async function runOAuthFlow(vaultName: string): Promise<string> {
-    const store = getVaultStore(vaultName);
-    const db = store.db;
-    // Seed an owner token so consent passes in legacy-token mode.
-    const { fullToken: ownerToken } = generateToken();
-    createToken(db, ownerToken, { label: "owner", permission: "full" });
-    // 1. Register client
-    const regRes = await handleRegister(
-      new Request(`https://vault.test/vault/${vaultName}/oauth/register`, {
-        method: "POST",
-        headers: { "Content-Type": "application/json" },
-        body: JSON.stringify({
-          client_name: "Daily",
-          redirect_uris: ["parachute://oauth/callback"],
-        }),
-      }),
-      db,
-    );
-    const { client_id } = (await regRes.json()) as { client_id: string };
-    // 2. PKCE + authorize
-    const codeVerifier = crypto.randomBytes(32).toString("base64url");
-    const codeChallenge = crypto.createHash("sha256").update(codeVerifier).digest("base64url");
-    const authRes = await handleAuthorizePost(
-      new Request(`https://vault.test/vault/${vaultName}/oauth/authorize`, {
-        method: "POST",
-        body: new URLSearchParams({
-          action: "authorize",
-          client_id,
-          redirect_uri: "parachute://oauth/callback",
-          code_challenge: codeChallenge,
-          code_challenge_method: "S256",
-          scope: "full",
-          owner_token: ownerToken,
-        }),
-      }),
-      db,
-      { vaultName },
-    );
-    const code = new URL(authRes.headers.get("location")!).searchParams.get("code")!;
-    // 3. Token exchange
-    const tokRes = await handleToken(
-      new Request(`https://vault.test/vault/${vaultName}/oauth/token`, {
-        method: "POST",
-        headers: { "Content-Type": "application/x-www-form-urlencoded" },
-        body: new URLSearchParams({
-          grant_type: "authorization_code",
-          code,
-          code_verifier: codeVerifier,
-          client_id,
-          redirect_uri: "parachute://oauth/callback",
-        }).toString(),
-      }),
-      db,
-      vaultName,
-    );
-    const tokBody = (await tokRes.json()) as { access_token: string; vault: string };
-    expect(tokBody.vault).toBe(vaultName);
-    return tokBody.access_token;
-  }
-  test("OAuth-minted token works at /vault/<name>/api/* and /vault/<name>/mcp", async () => {
-    seedVault("journal", { isDefault: true });
-    const token = await runOAuthFlow("journal");
-    const cfg = readVaultConfig("journal")!;
-    const store = getVaultStore("journal");
-    // /vault/journal/api/* and /vault/journal/mcp both reach this auth call.
-    const vaultAuth = await authenticateVaultRequest(bearer(token), cfg, store.db);
-    expect("error" in vaultAuth).toBe(false);
-    // /vaults (authenticated listing) uses authenticateGlobalRequest.
-    const global = await authenticateGlobalRequest(bearer(token));
-    expect("error" in global).toBe(false);
-  });
-  test("named-vault OAuth: token works for its vault, rejected by others", async () => {
-    seedVault("journal", { isDefault: true });
-    seedVault("work");
-    const token = await runOAuthFlow("work");
-    const workCfg = readVaultConfig("work")!;
-    const workStore = getVaultStore("work");
-    // Valid at work's own endpoints.
-    const scoped = await authenticateVaultRequest(bearer(token), workCfg, workStore.db);
-    expect("error" in scoped).toBe(false);
-    // Global auth finds the token in work's DB.
-    const global = await authenticateGlobalRequest(bearer(token));
-    expect("error" in global).toBe(false);
-    // Isolation: the token is NOT usable against the journal vault.
-    const journalCfg = readVaultConfig("journal")!;
-    const journalStore = getVaultStore("journal");
-    const crossCheck = await authenticateVaultRequest(bearer(token), journalCfg, journalStore.db);
-    expect("error" in crossCheck).toBe(true);
-  });
-});
+// The "End-to-end OAuth flow" suite was retired alongside the standalone
+// OAuth issuer in workstream E (vault#366). Per-vault token coherence is
+// still pinned by the v16 binding tests above and by `tokens-routes.test.ts`
+// (mint-via-CLI → present at /vault/<name>/* surfaces); the OAuth handshake
+// itself has moved entirely to the hub.
 // ---------------------------------------------------------------------------
 // Legacy YAML global keys — scope must round-trip through the parser

package/src/auto-transcribe.test.ts ADDED Viewed

@@ -0,0 +1,116 @@
+/**
+ * Auto-transcribe gating decisions (vault#353).
+ *
+ * Three independent guards: mime-type prefix, enabled toggle, scribe URL
+ * present. Pure function — exercise the truth table.
+ */
+import { describe, test, expect } from "bun:test";
+import { shouldAutoTranscribe } from "./auto-transcribe.ts";
+function readGlobalConfig(enabled: boolean | undefined) {
+  return () => ({
+    port: 1940,
+    ...(enabled !== undefined ? { auto_transcribe: { enabled } } : {}),
+  }) as any;
+}
+describe("shouldAutoTranscribe", () => {
+  const scribePresent = () => "http://127.0.0.1:1943";
+  const scribeAbsent = () => undefined;
+  test("triggers on audio/* mime-type when enabled + scribe reachable", () => {
+    expect(shouldAutoTranscribe("audio/wav", {
+      readGlobalConfigImpl: readGlobalConfig(true),
+      getCachedScribeUrlImpl: scribePresent,
+    })).toBe(true);
+  });
+  test("triggers on audio/mp4 (m4a)", () => {
+    expect(shouldAutoTranscribe("audio/mp4", {
+      readGlobalConfigImpl: readGlobalConfig(true),
+      getCachedScribeUrlImpl: scribePresent,
+    })).toBe(true);
+  });
+  test("triggers on audio/webm", () => {
+    expect(shouldAutoTranscribe("audio/webm", {
+      readGlobalConfigImpl: readGlobalConfig(true),
+      getCachedScribeUrlImpl: scribePresent,
+    })).toBe(true);
+  });
+  test("triggers case-insensitively (AUDIO/WAV)", () => {
+    expect(shouldAutoTranscribe("AUDIO/WAV", {
+      readGlobalConfigImpl: readGlobalConfig(true),
+      getCachedScribeUrlImpl: scribePresent,
+    })).toBe(true);
+  });
+  test("skips non-audio mime-types (image/png, application/pdf, video/mp4)", () => {
+    expect(shouldAutoTranscribe("image/png", {
+      readGlobalConfigImpl: readGlobalConfig(true),
+      getCachedScribeUrlImpl: scribePresent,
+    })).toBe(false);
+    expect(shouldAutoTranscribe("application/pdf", {
+      readGlobalConfigImpl: readGlobalConfig(true),
+      getCachedScribeUrlImpl: scribePresent,
+    })).toBe(false);
+    expect(shouldAutoTranscribe("video/mp4", {
+      readGlobalConfigImpl: readGlobalConfig(true),
+      getCachedScribeUrlImpl: scribePresent,
+    })).toBe(false);
+  });
+  test("skips when enabled is false (default off)", () => {
+    expect(shouldAutoTranscribe("audio/wav", {
+      readGlobalConfigImpl: readGlobalConfig(false),
+      getCachedScribeUrlImpl: scribePresent,
+    })).toBe(false);
+  });
+  test("skips when enabled is unset (no auto_transcribe block in config)", () => {
+    expect(shouldAutoTranscribe("audio/wav", {
+      readGlobalConfigImpl: readGlobalConfig(undefined),
+      getCachedScribeUrlImpl: scribePresent,
+    })).toBe(false);
+  });
+  test("skips when scribe URL is undefined (no services.json entry, no env)", () => {
+    expect(shouldAutoTranscribe("audio/wav", {
+      readGlobalConfigImpl: readGlobalConfig(true),
+      getCachedScribeUrlImpl: scribeAbsent,
+    })).toBe(false);
+  });
+  test("skips when scribe URL is empty string", () => {
+    expect(shouldAutoTranscribe("audio/wav", {
+      readGlobalConfigImpl: readGlobalConfig(true),
+      getCachedScribeUrlImpl: () => "",
+    })).toBe(false);
+  });
+  test("skips on garbage mime-type input", () => {
+    expect(shouldAutoTranscribe("", {
+      readGlobalConfigImpl: readGlobalConfig(true),
+      getCachedScribeUrlImpl: scribePresent,
+    })).toBe(false);
+    expect(shouldAutoTranscribe("not-a-mime", {
+      readGlobalConfigImpl: readGlobalConfig(true),
+      getCachedScribeUrlImpl: scribePresent,
+    })).toBe(false);
+  });
+  test("respects enabledOverride when present", () => {
+    expect(shouldAutoTranscribe("audio/wav", {
+      readGlobalConfigImpl: readGlobalConfig(false),
+      getCachedScribeUrlImpl: scribePresent,
+      enabledOverride: true,
+    })).toBe(true);
+    expect(shouldAutoTranscribe("audio/wav", {
+      readGlobalConfigImpl: readGlobalConfig(true),
+      getCachedScribeUrlImpl: scribePresent,
+      enabledOverride: false,
+    })).toBe(false);
+  });
+});

package/src/auto-transcribe.ts ADDED Viewed

@@ -0,0 +1,48 @@
+/**
+ * Auto-transcribe trigger decision (vault#353, design 2026-05-21 Part 2).
+ *
+ * One pure function: given an attachment's mime-type + the operator's
+ * settings + whether scribe is reachable, decide whether to enqueue the
+ * attachment for the transcription worker. Lives in its own module so the
+ * attachment-write code path (`routes.ts`) and the retry endpoint share the
+ * same gate without duplicating logic.
+ */
+import { readGlobalConfig } from "./config.ts";
+import { getCachedScribeUrl } from "./scribe-discovery.ts";
+/**
+ * Pre-vault#353 callers passed `transcribe: true` explicitly on the
+ * attachment POST. The auto-transcribe path inlines the decision: if the
+ * upload is an audio mime-type AND the toggle is on AND scribe is reachable,
+ * the worker is enqueued. This function is the single decision site.
+ *
+ * Returns `true` only when ALL three conditions hold:
+ *   1. mime-type starts with `audio/` (case-insensitive).
+ *   2. `globalConfig.auto_transcribe?.enabled === true`.
+ *   3. Scribe is discoverable (services.json entry OR SCRIBE_URL env).
+ *
+ * The three conditions are independent guards: a single `false` is sufficient
+ * to skip enqueuing. The audio stays as a regular attachment in that case.
+ */
+export function shouldAutoTranscribe(
+  mimeType: string,
+  opts: {
+    /** Injection seam for tests — defaults to live globals. */
+    readGlobalConfigImpl?: typeof readGlobalConfig;
+    getCachedScribeUrlImpl?: () => string | undefined;
+    /** Allow per-call enabled override — used by the explicit-opt-in path. */
+    enabledOverride?: boolean;
+  } = {},
+): boolean {
+  if (typeof mimeType !== "string" || !mimeType.toLowerCase().startsWith("audio/")) {
+    return false;
+  }
+  const enabled = opts.enabledOverride
+    ?? (opts.readGlobalConfigImpl ?? readGlobalConfig)().auto_transcribe?.enabled
+    ?? false;
+  if (!enabled) return false;
+  const url = (opts.getCachedScribeUrlImpl ?? getCachedScribeUrl)();
+  if (!url || !url.trim()) return false;
+  return true;
+}

package/src/backup.ts CHANGED Viewed

@@ -572,9 +572,21 @@ export async function runBackup(opts?: {
     vaultsDir: opts?.vaultsDir,
   });
+  // Write the tarball to a SIBLING tempdir, not inside stagingDir.
+  //
+  // Why: `assembleTarball` runs `tar -czf <out> -C <stagingDir> <entries>`
+  // where `entries = readdirSync(stagingDir)`. If the output path lives
+  // inside stagingDir (e.g. `stagingDir/__out__/...`), that subdir shows
+  // up in `entries` and tar enumerates it while ALSO writing to it.
+  // GNU tar (Linux) treats "file changed as we read it" as fatal and
+  // aborts; BSD tar (macOS) tolerates it. The sibling-tempdir layout
+  // keeps the output completely out of tar's input set on both platforms.
+  // See vault#363.
+  const outDir = mkdtempSync(join(tmpdir(), "parachute-backup-out-"));
   try {
     const tarName = backupFilename(timestamp);
-    const tarballPath = join(stagingDir, "__out__", tarName);
+    const tarballPath = join(outDir, tarName);
     await assembleTarball(stagingDir, tarballPath);
     const bytes = statSync(tarballPath).size;
@@ -594,9 +606,11 @@ export async function runBackup(opts?: {
     return { tarballPath, timestamp, bytes, destinations: results, contents };
   } finally {
-    // The staging dir has the only copy of the tarball that isn't at a
-    // destination; destinations have already been written. Safe to clean.
+    // The staging dir + out dir have the only copies of the tarball that
+    // aren't at a destination; destinations have already been written. Safe
+    // to clean both.
     try { rmSync(stagingDir, { recursive: true, force: true }); } catch {}
+    try { rmSync(outDir, { recursive: true, force: true }); } catch {}
   }
 }