npm - @openparachute/vault - Versions diffs - 0.4.7-rc.2 → 0.4.8-rc.6 - Mend

@openparachute/vault 0.4.7-rc.2 → 0.4.8-rc.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (42) hide show

package/.parachute/module.json +0 -1
package/README.md +44 -10
package/core/src/connection-pragmas.test.ts +232 -0
package/core/src/core.test.ts +257 -0
package/core/src/cursor.test.ts +160 -0
package/core/src/cursor.ts +272 -0
package/core/src/mcp.ts +51 -7
package/core/src/notes.ts +164 -2
package/core/src/schema.ts +98 -2
package/core/src/store.ts +11 -1
package/core/src/types.ts +32 -0
package/package.json +1 -1
package/src/auth-status.ts +4 -0
package/src/auto-transcribe.test.ts +116 -0
package/src/auto-transcribe.ts +48 -0
package/src/cli.ts +57 -48
package/src/config.test.ts +26 -0
package/src/config.ts +53 -1
package/src/db.ts +15 -2
package/src/mcp-install-interactive.test.ts +23 -2
package/src/mcp-install-interactive.ts +21 -2
package/src/mcp-install.test.ts +40 -0
package/src/mcp-tools.ts +17 -1
package/src/module-config.ts +70 -14
package/src/module-manifest.test.ts +114 -0
package/src/module-manifest.ts +104 -0
package/src/routes.ts +268 -51
package/src/routing.test.ts +4 -2
package/src/routing.ts +4 -4
package/src/scribe-discovery.test.ts +77 -0
package/src/scribe-discovery.ts +91 -0
package/src/scribe-env.test.ts +66 -1
package/src/scribe-env.ts +42 -1
package/src/self-register.test.ts +379 -0
package/src/self-register.ts +234 -0
package/src/server.ts +46 -11
package/src/transcript-note.test.ts +171 -0
package/src/transcript-note.ts +189 -0
package/src/transcription-registry.ts +22 -0
package/src/transcription-worker.test.ts +250 -0
package/src/transcription-worker.ts +186 -27
package/src/vault.test.ts +347 -0

package/core/src/notes.ts CHANGED Viewed

@@ -1,5 +1,5 @@
 import { Database, type SQLQueryBindings } from "bun:sqlite";
-import type { Note, NoteIndex, QueryOpts, VaultStats } from "./types.js";
+import type { Note, NoteIndex, QueryOpts, QueryNotesPage, VaultStats } from "./types.js";
 import { normalizePath } from "./paths.js";
 import {
   buildOperatorClause,
@@ -7,6 +7,17 @@ import {
   QueryError,
   requireIndexedField,
 } from "./query-operators.js";
+import {
+  CURSOR_VERSION,
+  CursorError,
+  computeQueryHash,
+  decodeCursor,
+  encodeCursor,
+  isoToMillis,
+  millisToIso,
+  type CursorPayload,
+  type QueryHashInputs,
+} from "./cursor.js";
 let idCounter = 0;
@@ -663,9 +674,68 @@ export function queryNotes(db: Database, opts: QueryOpts): Note[] {
     }
   }
+  // ---- Cursor predicate (vault#313) ----
+  //
+  // When a cursor is present, decode it, verify its query_hash matches the
+  // current query, and add a keyset predicate of the form:
+  //
+  //   (updated_at > last_updated_at)
+  //     OR (updated_at = last_updated_at AND id > last_id)
+  //
+  // The cursor also forces ORDER BY n.updated_at ASC, n.id ASC so the
+  // watermark math is sound — paginating by updated_at while ordering
+  // by created_at would skip rows whose update timestamp differs from
+  // their creation timestamp. `orderBy` and `sort: "desc"` are mutually
+  // exclusive with cursor mode (a "since last checked" loop wants
+  // ascending updated_at, full stop); we reject with INVALID_QUERY so
+  // callers don't silently get a broken iteration.
+  let cursorPayload: CursorPayload | null = null;
+  if (opts.cursor) {
+    if (opts.orderBy) {
+      throw new QueryError(
+        `cursor and order_by are mutually exclusive — cursor pagination forces order by updated_at`,
+        "INVALID_QUERY",
+      );
+    }
+    if (opts.sort === "desc") {
+      throw new QueryError(
+        `cursor pagination requires ascending sort by updated_at — descending sort with a cursor would skip newly-written rows`,
+        "INVALID_QUERY",
+      );
+    }
+    cursorPayload = decodeCursor(opts.cursor);
+    const expectedHash = computeQueryHash(toQueryHashInputs(opts));
+    if (cursorPayload.query_hash !== expectedHash) {
+      throw new CursorError(
+        `cursor was minted for a different query — drop the cursor and restart iteration`,
+        "cursor_query_mismatch",
+      );
+    }
+    // Translate the millis watermark back to an ISO string for the SQL
+    // comparison. SQLite's `n.updated_at` is TEXT in canonical ISO form
+    // (the store's `toISOString()` output), and ISO timestamps sort
+    // lexicographically in the same order as their millisecond epochs
+    // when they all use the same canonical form — which every timestamp
+    // vault mints does. Cursors minted on heterogeneous timestamps
+    // (e.g. an import that preserved unusual formatting) are still
+    // safe: we round-trip the cursor's millis through `new Date()`'s
+    // canonical ISO so the comparison is apples-to-apples.
+    const cursorIso = millisToIso(cursorPayload.last_updated_at);
+    conditions.push(
+      "(n.updated_at > ? OR (n.updated_at = ? AND n.id > ?))",
+    );
+    params.push(cursorIso, cursorIso, cursorPayload.last_id);
+  }
   const direction = opts.sort === "desc" ? "DESC" : "ASC";
   let orderBy: string;
-  if (opts.orderBy) {
+  if (opts.cursor) {
+    // Cursor mode forces a deterministic keyset order. `id` is the
+    // tiebreaker — without it, two notes sharing an `updated_at` would
+    // be at the mercy of SQLite's row order and the next page could
+    // miss or duplicate one.
+    orderBy = "n.updated_at ASC, n.id ASC";
+  } else if (opts.orderBy) {
     requireIndexedField(db, opts.orderBy);
     // `orderBy` came from indexed_fields (validated on declaration), so
     // the column name is safe to interpolate. Append created_at as a
@@ -697,6 +767,98 @@ export function queryNotes(db: Database, opts: QueryOpts): Note[] {
   });
 }
+/**
+ * Extract the result-set-affecting subset of `QueryOpts` for cursor hashing.
+ *
+ * `cursor`, `limit`, `offset`, `_tagsExpanded` (internal cache key) are
+ * excluded — they don't change which rows match, just how many or how
+ * the iteration advances. See `core/src/cursor.ts` for the rationale.
+ */
+function toQueryHashInputs(opts: QueryOpts): QueryHashInputs {
+  return {
+    tags: opts.tags,
+    tagMatch: opts.tagMatch,
+    excludeTags: opts.excludeTags,
+    hasTags: opts.hasTags,
+    hasLinks: opts.hasLinks,
+    path: opts.path,
+    pathPrefix: opts.pathPrefix,
+    extension: opts.extension,
+    ids: opts.ids,
+    metadata: opts.metadata,
+    dateFrom: opts.dateFrom,
+    dateTo: opts.dateTo,
+    dateFilter: opts.dateFilter,
+    sort: opts.sort,
+    orderBy: opts.orderBy,
+  };
+}
+/**
+ * Cursor-paginated wrapper around `queryNotes` (vault#313).
+ *
+ * Always returns `{ notes, next_cursor }`. `next_cursor` advances even on
+ * an empty result page — the caller can persist a single watermark and
+ * keep polling without special-casing the empty-page condition. The
+ * empty-page cursor's `last_updated_at` is the larger of:
+ *   - the prior cursor's `last_updated_at` (when `opts.cursor` was set), or
+ *   - the prior cursor's `last_updated_at` (defaults to 0 when not).
+ *
+ * Holding the watermark at the prior value on an empty page is the
+ * conservative choice: if a note is written between this call and the
+ * next at a timestamp BEFORE wall-clock-now (clock skew, batch import
+ * with explicit `created_at`), advancing the watermark to `now()` would
+ * skip it. The watermark advances only when actual rows are returned.
+ *
+ * First-call semantics (`opts.cursor` absent): query_hash is computed
+ * from the result-set-affecting opts and bound into the minted cursor.
+ * If zero rows match, the returned cursor encodes
+ * `last_updated_at = 0, last_id = ""` so the next call returns
+ * everything written since (the keyset predicate
+ * `updated_at > 0 OR (updated_at = 0 AND id > "")` matches every row
+ * with a non-null `updated_at` greater than the unix epoch).
+ */
+export function queryNotesPaged(db: Database, opts: QueryOpts): QueryNotesPage {
+  const notes = queryNotes(db, opts);
+  const queryHash = computeQueryHash(toQueryHashInputs(opts));
+  // Watermark math: pick the larger of (last returned row, prior cursor
+  // watermark, sentinel). When the page is empty, fall back to the prior
+  // cursor's watermark — see the JSDoc rationale above.
+  let lastUpdatedAt = 0;
+  let lastId = "";
+  if (opts.cursor) {
+    // Re-decode (we already validated in queryNotes); this is cheap.
+    const prior = decodeCursor(opts.cursor);
+    lastUpdatedAt = prior.last_updated_at;
+    lastId = prior.last_id;
+  }
+  if (notes.length > 0) {
+    // queryNotes with a cursor orders by (updated_at ASC, id ASC), so
+    // the last note in the array is the new watermark. When no cursor
+    // was passed, the SQL is ordered by created_at; we still want the
+    // cursor to advance to the MAX (updated_at, id) of this page so
+    // the next call resumes correctly. Compute the max explicitly.
+    for (const note of notes) {
+      const updatedIso = note.updatedAt ?? note.createdAt;
+      const ms = isoToMillis(updatedIso);
+      if (ms > lastUpdatedAt || (ms === lastUpdatedAt && note.id > lastId)) {
+        lastUpdatedAt = ms;
+        lastId = note.id;
+      }
+    }
+  }
+  const next_cursor = encodeCursor({
+    v: CURSOR_VERSION,
+    last_updated_at: lastUpdatedAt,
+    last_id: lastId,
+    query_hash: queryHash,
+  });
+  return { notes, next_cursor };
+}
 export function searchNotes(
   db: Database,
   query: string,

package/core/src/schema.ts CHANGED Viewed

@@ -200,12 +200,108 @@ CREATE INDEX IF NOT EXISTS idx_links_target ON links(target_id);
 -- because migrateToV16 also runs the unconditional CREATE INDEX path.
 `;
+/**
+ * Connection-level pragmas applied on every Database open, in the order they
+ * appear here.
+ *
+ * `journal_mode = WAL` is a persistent, DB-level setting (lives in the SQLite
+ * header). Once any writer flips a DB into WAL it stays in WAL across opens
+ * and processes — so daemon + CLI + parachute-runner + any read-side tool
+ * see the same mode. Re-applying on every open is cheap and idempotent;
+ * SQLite returns the current mode either way.
+ *
+ * `synchronous = NORMAL` is the safe, recommended pairing with WAL per the
+ * SQLite docs: fsync only at checkpoint rather than on every commit. Crash
+ * safety is preserved (WAL frames are still ordered + checksummed); the only
+ * cost vs FULL is that an OS-level crash *between* checkpoints might lose
+ * the last transaction. Acceptable for a knowledge graph that's snapshotted
+ * by `VACUUM INTO` for backups.
+ *
+ * `wal_autocheckpoint = 1000` is SQLite's default; we set it explicitly so
+ * the contract is visible in code rather than implicit. 1000 pages ≈ 4MB
+ * before a passive checkpoint is triggered on the next write.
+ *
+ * `foreign_keys = ON` is per-connection (not persistent) — must be re-applied
+ * on every open. Migrations occasionally disable it transiently (see
+ * migrateToV14's BEGIN IMMEDIATE block); the boot path re-enables.
+ *
+ * WAL requires a filesystem that supports memory-mapped shared-memory
+ * (the `-shm` sidecar). NFS, some FUSE mounts, and a few Docker volume
+ * drivers don't qualify and silently fall back to the prior journal mode
+ * (typically `delete`). `applyConnectionPragmas` detects this and returns
+ * `wal: false` so the caller can log a warning — operators on those
+ * filesystems should know they've lost multi-process concurrency.
+ */
+const APPLY_PRAGMAS_LOGGED = new WeakSet<Database>();
+export interface ConnectionPragmaResult {
+  /** True when the connection ended up in WAL mode. False means the FS doesn't support WAL. */
+  wal: boolean;
+  /** The actual journal_mode SQLite reports — "wal", "delete", "memory", etc. */
+  journalMode: string;
+}
+/**
+ * Apply connection-level pragmas (journal mode, synchronous, FK enforcement)
+ * and verify WAL took effect. Idempotent — safe to call multiple times on
+ * the same connection. Logs a one-time warning per connection when WAL
+ * couldn't be applied.
+ *
+ * Exported for read-side callers (auth-status, mirror-manager, etc.) that
+ * open a Database directly without going through initSchema. Setting
+ * `journal_mode` on a read-only handle is a no-op but harmless; the
+ * useful state is set by whichever writer opens first.
+ */
+export function applyConnectionPragmas(db: Database): ConnectionPragmaResult {
+  // PRAGMA journal_mode returns a row { journal_mode: "wal" } on success.
+  // Use `.get()` (not `.exec()`) so we capture the result. Some bun:sqlite
+  // versions throw on readonly handles attempting to set journal_mode; treat
+  // that as "we couldn't set it, just read the current value" and recover.
+  let journalMode: string;
+  try {
+    const row = db.prepare("PRAGMA journal_mode = WAL").get() as { journal_mode?: string } | null;
+    journalMode = (row?.journal_mode ?? "").toLowerCase();
+  } catch {
+    // Most likely: readonly handle. Read-only opens never write the DB
+    // header, so they can't change journal_mode — but they can still query
+    // the current mode, which is set by the most recent writer.
+    const row = db.prepare("PRAGMA journal_mode").get() as { journal_mode?: string } | null;
+    journalMode = (row?.journal_mode ?? "").toLowerCase();
+  }
+  const wal = journalMode === "wal";
+  // synchronous + wal_autocheckpoint only matter when WAL is active. They're
+  // harmless under DELETE mode but the rationale is WAL-specific, so gate
+  // them on the success path. Both are best-effort — wrap in try to keep
+  // readonly handles (which reject writes) from failing the whole open.
+  if (wal) {
+    try { db.exec("PRAGMA synchronous = NORMAL"); } catch {}
+    try { db.exec("PRAGMA wal_autocheckpoint = 1000"); } catch {}
+  } else if (journalMode !== "memory" && !APPLY_PRAGMAS_LOGGED.has(db)) {
+    // `journalMode === "memory"` ⇒ this is a `:memory:` database, an
+    // explicit choice (tests, ephemeral probes) rather than a filesystem
+    // limitation. Suppress the warning so the test suite stays quiet;
+    // real on-disk vaults that can't host WAL (NFS, some FUSE/Docker
+    // volume drivers) still surface the diagnostic.
+    APPLY_PRAGMAS_LOGGED.add(db);
+    // eslint-disable-next-line no-console
+    console.warn(
+      `[vault] WAL mode could not be enabled (journal_mode=${journalMode || "unknown"}). ` +
+      `The underlying filesystem may not support WAL (NFS, some FUSE/Docker volume drivers). ` +
+      `Multi-process concurrent access will be limited to a single writer at a time.`,
+    );
+  }
+  try { db.exec("PRAGMA foreign_keys = ON"); } catch {}
+  return { wal, journalMode };
+}
 /**
  * Initialize database schema. Idempotent — safe to call on every startup.
  */
 export function initSchema(db: Database): void {
-  db.exec("PRAGMA journal_mode = WAL");
-  db.exec("PRAGMA foreign_keys = ON");
+  applyConnectionPragmas(db);
   // Check if we need to migrate from v2
   const hasOldTables = hasTable(db, "things");

package/core/src/store.ts CHANGED Viewed

@@ -1,5 +1,5 @@
 import { Database } from "bun:sqlite";
-import type { Store, Note, Link, Attachment, QueryOpts } from "./types.js";
+import type { Store, Note, Link, Attachment, QueryOpts, QueryNotesPage } from "./types.js";
 import { initSchema } from "./schema.js";
 import * as noteOps from "./notes.js";
 import * as linkOps from "./links.js";
@@ -227,6 +227,16 @@ export class BunSqliteStore implements Store {
     return noteOps.queryNotes(this.db, this.expandQueryTags(opts));
   }
+  async queryNotesPaged(opts: QueryOpts): Promise<QueryNotesPage> {
+    // Hierarchy expansion happens internally — but importantly the cursor's
+    // query_hash is computed from the CALLER'S opts (pre-expansion), so a
+    // tag hierarchy edit between calls invalidates the cursor (different
+    // descendant set → different rows match → caller should restart). The
+    // alternative — hash the expanded set — would silently keep returning
+    // stale results from a hierarchy snapshot the caller never saw.
+    return noteOps.queryNotesPaged(this.db, this.expandQueryTags(opts));
+  }
   /**
    * If `tags` are present, attach a parallel `_tagsExpanded` array where
    * each input tag is replaced with `{tag} ∪ descendants(tag)`. The SQL

package/core/src/types.ts CHANGED Viewed

@@ -116,6 +116,30 @@ export interface QueryOpts {
   orderBy?: string;
   limit?: number;
   offset?: number;
+  /**
+   * Opaque cursor for "since last checked" agent loops (vault#313).
+   * When passed, the engine decodes it, verifies its `query_hash` matches
+   * the current query (mismatch → CursorError `cursor_query_mismatch`),
+   * and adds a keyset predicate that returns only rows newer than the
+   * cursor's `updated_at`/`id` watermark. Forces `orderBy = updated_at`
+   * (with `id` as a stable tiebreaker) so the watermark math is sound.
+   *
+   * Cursors are minted by `queryNotesPaged` (engine) and surfaced via
+   * the `query-notes` MCP tool's `next_cursor` field; callers should
+   * treat the string as opaque.
+   */
+  cursor?: string;
+}
+/**
+ * Cursor-paginated query result (vault#313). Returned by
+ * `queryNotesPaged`/`storeQueryNotesPaged`. `next_cursor` always advances —
+ * even on an empty result page — so an agent loop can persist a single
+ * watermark and keep polling.
+ */
+export interface QueryNotesPage {
+  notes: Note[];
+  next_cursor: string;
 }
 /** Note summary — everything except content. Used in link results. */
@@ -184,6 +208,14 @@ export interface Store {
   syncAllWikilinks(): Promise<{ synced: number; totalAdded: number; totalRemoved: number }>;
   deleteNote(id: string): Promise<void>;
   queryNotes(opts: QueryOpts): Promise<Note[]>;
+  /**
+   * Cursor-paginated `queryNotes` (vault#313). Returns the same notes plus
+   * an opaque `next_cursor` string the caller can pass on the next call
+   * to resume from the watermark of the LAST returned row. The cursor is
+   * always present in the response — even on an empty page — so an
+   * agent loop can persist a single watermark and keep polling.
+   */
+  queryNotesPaged(opts: QueryOpts): Promise<QueryNotesPage>;
   searchNotes(query: string, opts?: { tags?: string[]; limit?: number }): Promise<Note[]>;
   // Tags

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@openparachute/vault",
-  "version": "0.4.7-rc.2",
+  "version": "0.4.8-rc.6",
   "description": "Agent-native knowledge graph. Notes, tags, links over MCP.",
   "module": "src/cli.ts",
   "type": "module",

package/src/auth-status.ts CHANGED Viewed

@@ -40,6 +40,10 @@ export interface AuthStatusResponse {
  * caller's signal to degrade `hasTokens` to `null`.
  */
 function vaultHasTokens(dbPath: string): boolean {
+  // Readonly handle — no pragma application here. Journal mode is a
+  // persistent DB-header setting written by the first writer (the daemon's
+  // BunSqliteStore via openVaultDb), so this probe sees WAL automatically
+  // and is safe under concurrent writes.
   const db = new Database(dbPath, { readonly: true });
   try {
     const row = db.prepare("SELECT 1 FROM tokens LIMIT 1").get();

package/src/auto-transcribe.test.ts ADDED Viewed

@@ -0,0 +1,116 @@
+/**
+ * Auto-transcribe gating decisions (vault#353).
+ *
+ * Three independent guards: mime-type prefix, enabled toggle, scribe URL
+ * present. Pure function — exercise the truth table.
+ */
+import { describe, test, expect } from "bun:test";
+import { shouldAutoTranscribe } from "./auto-transcribe.ts";
+function readGlobalConfig(enabled: boolean | undefined) {
+  return () => ({
+    port: 1940,
+    ...(enabled !== undefined ? { auto_transcribe: { enabled } } : {}),
+  }) as any;
+}
+describe("shouldAutoTranscribe", () => {
+  const scribePresent = () => "http://127.0.0.1:1943";
+  const scribeAbsent = () => undefined;
+  test("triggers on audio/* mime-type when enabled + scribe reachable", () => {
+    expect(shouldAutoTranscribe("audio/wav", {
+      readGlobalConfigImpl: readGlobalConfig(true),
+      getCachedScribeUrlImpl: scribePresent,
+    })).toBe(true);
+  });
+  test("triggers on audio/mp4 (m4a)", () => {
+    expect(shouldAutoTranscribe("audio/mp4", {
+      readGlobalConfigImpl: readGlobalConfig(true),
+      getCachedScribeUrlImpl: scribePresent,
+    })).toBe(true);
+  });
+  test("triggers on audio/webm", () => {
+    expect(shouldAutoTranscribe("audio/webm", {
+      readGlobalConfigImpl: readGlobalConfig(true),
+      getCachedScribeUrlImpl: scribePresent,
+    })).toBe(true);
+  });
+  test("triggers case-insensitively (AUDIO/WAV)", () => {
+    expect(shouldAutoTranscribe("AUDIO/WAV", {
+      readGlobalConfigImpl: readGlobalConfig(true),
+      getCachedScribeUrlImpl: scribePresent,
+    })).toBe(true);
+  });
+  test("skips non-audio mime-types (image/png, application/pdf, video/mp4)", () => {
+    expect(shouldAutoTranscribe("image/png", {
+      readGlobalConfigImpl: readGlobalConfig(true),
+      getCachedScribeUrlImpl: scribePresent,
+    })).toBe(false);
+    expect(shouldAutoTranscribe("application/pdf", {
+      readGlobalConfigImpl: readGlobalConfig(true),
+      getCachedScribeUrlImpl: scribePresent,
+    })).toBe(false);
+    expect(shouldAutoTranscribe("video/mp4", {
+      readGlobalConfigImpl: readGlobalConfig(true),
+      getCachedScribeUrlImpl: scribePresent,
+    })).toBe(false);
+  });
+  test("skips when enabled is false (default off)", () => {
+    expect(shouldAutoTranscribe("audio/wav", {
+      readGlobalConfigImpl: readGlobalConfig(false),
+      getCachedScribeUrlImpl: scribePresent,
+    })).toBe(false);
+  });
+  test("skips when enabled is unset (no auto_transcribe block in config)", () => {
+    expect(shouldAutoTranscribe("audio/wav", {
+      readGlobalConfigImpl: readGlobalConfig(undefined),
+      getCachedScribeUrlImpl: scribePresent,
+    })).toBe(false);
+  });
+  test("skips when scribe URL is undefined (no services.json entry, no env)", () => {
+    expect(shouldAutoTranscribe("audio/wav", {
+      readGlobalConfigImpl: readGlobalConfig(true),
+      getCachedScribeUrlImpl: scribeAbsent,
+    })).toBe(false);
+  });
+  test("skips when scribe URL is empty string", () => {
+    expect(shouldAutoTranscribe("audio/wav", {
+      readGlobalConfigImpl: readGlobalConfig(true),
+      getCachedScribeUrlImpl: () => "",
+    })).toBe(false);
+  });
+  test("skips on garbage mime-type input", () => {
+    expect(shouldAutoTranscribe("", {
+      readGlobalConfigImpl: readGlobalConfig(true),
+      getCachedScribeUrlImpl: scribePresent,
+    })).toBe(false);
+    expect(shouldAutoTranscribe("not-a-mime", {
+      readGlobalConfigImpl: readGlobalConfig(true),
+      getCachedScribeUrlImpl: scribePresent,
+    })).toBe(false);
+  });
+  test("respects enabledOverride when present", () => {
+    expect(shouldAutoTranscribe("audio/wav", {
+      readGlobalConfigImpl: readGlobalConfig(false),
+      getCachedScribeUrlImpl: scribePresent,
+      enabledOverride: true,
+    })).toBe(true);
+    expect(shouldAutoTranscribe("audio/wav", {
+      readGlobalConfigImpl: readGlobalConfig(true),
+      getCachedScribeUrlImpl: scribePresent,
+      enabledOverride: false,
+    })).toBe(false);
+  });
+});

package/src/auto-transcribe.ts ADDED Viewed

@@ -0,0 +1,48 @@
+/**
+ * Auto-transcribe trigger decision (vault#353, design 2026-05-21 Part 2).
+ *
+ * One pure function: given an attachment's mime-type + the operator's
+ * settings + whether scribe is reachable, decide whether to enqueue the
+ * attachment for the transcription worker. Lives in its own module so the
+ * attachment-write code path (`routes.ts`) and the retry endpoint share the
+ * same gate without duplicating logic.
+ */
+import { readGlobalConfig } from "./config.ts";
+import { getCachedScribeUrl } from "./scribe-discovery.ts";
+/**
+ * Pre-vault#353 callers passed `transcribe: true` explicitly on the
+ * attachment POST. The auto-transcribe path inlines the decision: if the
+ * upload is an audio mime-type AND the toggle is on AND scribe is reachable,
+ * the worker is enqueued. This function is the single decision site.
+ *
+ * Returns `true` only when ALL three conditions hold:
+ *   1. mime-type starts with `audio/` (case-insensitive).
+ *   2. `globalConfig.auto_transcribe?.enabled === true`.
+ *   3. Scribe is discoverable (services.json entry OR SCRIBE_URL env).
+ *
+ * The three conditions are independent guards: a single `false` is sufficient
+ * to skip enqueuing. The audio stays as a regular attachment in that case.
+ */
+export function shouldAutoTranscribe(
+  mimeType: string,
+  opts: {
+    /** Injection seam for tests — defaults to live globals. */
+    readGlobalConfigImpl?: typeof readGlobalConfig;
+    getCachedScribeUrlImpl?: () => string | undefined;
+    /** Allow per-call enabled override — used by the explicit-opt-in path. */
+    enabledOverride?: boolean;
+  } = {},
+): boolean {
+  if (typeof mimeType !== "string" || !mimeType.toLowerCase().startsWith("audio/")) {
+    return false;
+  }
+  const enabled = opts.enabledOverride
+    ?? (opts.readGlobalConfigImpl ?? readGlobalConfig)().auto_transcribe?.enabled
+    ?? false;
+  if (!enabled) return false;
+  const url = (opts.getCachedScribeUrlImpl ?? getCachedScribeUrl)();
+  if (!url || !url.trim()) return false;
+  return true;
+}