@openparachute/vault 0.4.7-rc.2 → 0.4.8-rc.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. package/.parachute/module.json +0 -1
  2. package/README.md +44 -10
  3. package/core/src/connection-pragmas.test.ts +232 -0
  4. package/core/src/core.test.ts +257 -0
  5. package/core/src/cursor.test.ts +160 -0
  6. package/core/src/cursor.ts +272 -0
  7. package/core/src/mcp.ts +51 -7
  8. package/core/src/notes.ts +164 -2
  9. package/core/src/schema.ts +98 -2
  10. package/core/src/store.ts +11 -1
  11. package/core/src/types.ts +32 -0
  12. package/package.json +1 -1
  13. package/src/auth-status.ts +4 -0
  14. package/src/auto-transcribe.test.ts +116 -0
  15. package/src/auto-transcribe.ts +48 -0
  16. package/src/cli.ts +57 -48
  17. package/src/config.test.ts +26 -0
  18. package/src/config.ts +53 -1
  19. package/src/db.ts +15 -2
  20. package/src/mcp-install-interactive.test.ts +23 -2
  21. package/src/mcp-install-interactive.ts +21 -2
  22. package/src/mcp-install.test.ts +40 -0
  23. package/src/mcp-tools.ts +17 -1
  24. package/src/module-config.ts +70 -14
  25. package/src/module-manifest.test.ts +114 -0
  26. package/src/module-manifest.ts +104 -0
  27. package/src/routes.ts +268 -51
  28. package/src/routing.test.ts +4 -2
  29. package/src/routing.ts +4 -4
  30. package/src/scribe-discovery.test.ts +77 -0
  31. package/src/scribe-discovery.ts +91 -0
  32. package/src/scribe-env.test.ts +66 -1
  33. package/src/scribe-env.ts +42 -1
  34. package/src/self-register.test.ts +379 -0
  35. package/src/self-register.ts +234 -0
  36. package/src/server.ts +46 -11
  37. package/src/transcript-note.test.ts +171 -0
  38. package/src/transcript-note.ts +189 -0
  39. package/src/transcription-registry.ts +22 -0
  40. package/src/transcription-worker.test.ts +250 -0
  41. package/src/transcription-worker.ts +186 -27
  42. package/src/vault.test.ts +347 -0
package/core/src/notes.ts CHANGED
@@ -1,5 +1,5 @@
1
1
  import { Database, type SQLQueryBindings } from "bun:sqlite";
2
- import type { Note, NoteIndex, QueryOpts, VaultStats } from "./types.js";
2
+ import type { Note, NoteIndex, QueryOpts, QueryNotesPage, VaultStats } from "./types.js";
3
3
  import { normalizePath } from "./paths.js";
4
4
  import {
5
5
  buildOperatorClause,
@@ -7,6 +7,17 @@ import {
7
7
  QueryError,
8
8
  requireIndexedField,
9
9
  } from "./query-operators.js";
10
+ import {
11
+ CURSOR_VERSION,
12
+ CursorError,
13
+ computeQueryHash,
14
+ decodeCursor,
15
+ encodeCursor,
16
+ isoToMillis,
17
+ millisToIso,
18
+ type CursorPayload,
19
+ type QueryHashInputs,
20
+ } from "./cursor.js";
10
21
 
11
22
  let idCounter = 0;
12
23
 
@@ -663,9 +674,68 @@ export function queryNotes(db: Database, opts: QueryOpts): Note[] {
663
674
  }
664
675
  }
665
676
 
677
+ // ---- Cursor predicate (vault#313) ----
678
+ //
679
+ // When a cursor is present, decode it, verify its query_hash matches the
680
+ // current query, and add a keyset predicate of the form:
681
+ //
682
+ // (updated_at > last_updated_at)
683
+ // OR (updated_at = last_updated_at AND id > last_id)
684
+ //
685
+ // The cursor also forces ORDER BY n.updated_at ASC, n.id ASC so the
686
+ // watermark math is sound — paginating by updated_at while ordering
687
+ // by created_at would skip rows whose update timestamp differs from
688
+ // their creation timestamp. `orderBy` and `sort: "desc"` are mutually
689
+ // exclusive with cursor mode (a "since last checked" loop wants
690
+ // ascending updated_at, full stop); we reject with INVALID_QUERY so
691
+ // callers don't silently get a broken iteration.
692
+ let cursorPayload: CursorPayload | null = null;
693
+ if (opts.cursor) {
694
+ if (opts.orderBy) {
695
+ throw new QueryError(
696
+ `cursor and order_by are mutually exclusive — cursor pagination forces order by updated_at`,
697
+ "INVALID_QUERY",
698
+ );
699
+ }
700
+ if (opts.sort === "desc") {
701
+ throw new QueryError(
702
+ `cursor pagination requires ascending sort by updated_at — descending sort with a cursor would skip newly-written rows`,
703
+ "INVALID_QUERY",
704
+ );
705
+ }
706
+ cursorPayload = decodeCursor(opts.cursor);
707
+ const expectedHash = computeQueryHash(toQueryHashInputs(opts));
708
+ if (cursorPayload.query_hash !== expectedHash) {
709
+ throw new CursorError(
710
+ `cursor was minted for a different query — drop the cursor and restart iteration`,
711
+ "cursor_query_mismatch",
712
+ );
713
+ }
714
+ // Translate the millis watermark back to an ISO string for the SQL
715
+ // comparison. SQLite's `n.updated_at` is TEXT in canonical ISO form
716
+ // (the store's `toISOString()` output), and ISO timestamps sort
717
+ // lexicographically in the same order as their millisecond epochs
718
+ // when they all use the same canonical form — which every timestamp
719
+ // vault mints does. Cursors minted on heterogeneous timestamps
720
+ // (e.g. an import that preserved unusual formatting) are still
721
+ // safe: we round-trip the cursor's millis through `new Date()`'s
722
+ // canonical ISO so the comparison is apples-to-apples.
723
+ const cursorIso = millisToIso(cursorPayload.last_updated_at);
724
+ conditions.push(
725
+ "(n.updated_at > ? OR (n.updated_at = ? AND n.id > ?))",
726
+ );
727
+ params.push(cursorIso, cursorIso, cursorPayload.last_id);
728
+ }
729
+
666
730
  const direction = opts.sort === "desc" ? "DESC" : "ASC";
667
731
  let orderBy: string;
668
- if (opts.orderBy) {
732
+ if (opts.cursor) {
733
+ // Cursor mode forces a deterministic keyset order. `id` is the
734
+ // tiebreaker — without it, two notes sharing an `updated_at` would
735
+ // be at the mercy of SQLite's row order and the next page could
736
+ // miss or duplicate one.
737
+ orderBy = "n.updated_at ASC, n.id ASC";
738
+ } else if (opts.orderBy) {
669
739
  requireIndexedField(db, opts.orderBy);
670
740
  // `orderBy` came from indexed_fields (validated on declaration), so
671
741
  // the column name is safe to interpolate. Append created_at as a
@@ -697,6 +767,98 @@ export function queryNotes(db: Database, opts: QueryOpts): Note[] {
697
767
  });
698
768
  }
699
769
 
770
+ /**
771
+ * Extract the result-set-affecting subset of `QueryOpts` for cursor hashing.
772
+ *
773
+ * `cursor`, `limit`, `offset`, `_tagsExpanded` (internal cache key) are
774
+ * excluded — they don't change which rows match, just how many or how
775
+ * the iteration advances. See `core/src/cursor.ts` for the rationale.
776
+ */
777
+ function toQueryHashInputs(opts: QueryOpts): QueryHashInputs {
778
+ return {
779
+ tags: opts.tags,
780
+ tagMatch: opts.tagMatch,
781
+ excludeTags: opts.excludeTags,
782
+ hasTags: opts.hasTags,
783
+ hasLinks: opts.hasLinks,
784
+ path: opts.path,
785
+ pathPrefix: opts.pathPrefix,
786
+ extension: opts.extension,
787
+ ids: opts.ids,
788
+ metadata: opts.metadata,
789
+ dateFrom: opts.dateFrom,
790
+ dateTo: opts.dateTo,
791
+ dateFilter: opts.dateFilter,
792
+ sort: opts.sort,
793
+ orderBy: opts.orderBy,
794
+ };
795
+ }
796
+
797
+ /**
798
+ * Cursor-paginated wrapper around `queryNotes` (vault#313).
799
+ *
800
+ * Always returns `{ notes, next_cursor }`. `next_cursor` advances even on
801
+ * an empty result page — the caller can persist a single watermark and
802
+ * keep polling without special-casing the empty-page condition. The
803
+ * empty-page cursor's `last_updated_at` is the larger of:
804
+ * - the prior cursor's `last_updated_at` (when `opts.cursor` was set), or
805
+ * - the prior cursor's `last_updated_at` (defaults to 0 when not).
806
+ *
807
+ * Holding the watermark at the prior value on an empty page is the
808
+ * conservative choice: if a note is written between this call and the
809
+ * next at a timestamp BEFORE wall-clock-now (clock skew, batch import
810
+ * with explicit `created_at`), advancing the watermark to `now()` would
811
+ * skip it. The watermark advances only when actual rows are returned.
812
+ *
813
+ * First-call semantics (`opts.cursor` absent): query_hash is computed
814
+ * from the result-set-affecting opts and bound into the minted cursor.
815
+ * If zero rows match, the returned cursor encodes
816
+ * `last_updated_at = 0, last_id = ""` so the next call returns
817
+ * everything written since (the keyset predicate
818
+ * `updated_at > 0 OR (updated_at = 0 AND id > "")` matches every row
819
+ * with a non-null `updated_at` greater than the unix epoch).
820
+ */
821
+ export function queryNotesPaged(db: Database, opts: QueryOpts): QueryNotesPage {
822
+ const notes = queryNotes(db, opts);
823
+ const queryHash = computeQueryHash(toQueryHashInputs(opts));
824
+
825
+ // Watermark math: pick the larger of (last returned row, prior cursor
826
+ // watermark, sentinel). When the page is empty, fall back to the prior
827
+ // cursor's watermark — see the JSDoc rationale above.
828
+ let lastUpdatedAt = 0;
829
+ let lastId = "";
830
+ if (opts.cursor) {
831
+ // Re-decode (we already validated in queryNotes); this is cheap.
832
+ const prior = decodeCursor(opts.cursor);
833
+ lastUpdatedAt = prior.last_updated_at;
834
+ lastId = prior.last_id;
835
+ }
836
+ if (notes.length > 0) {
837
+ // queryNotes with a cursor orders by (updated_at ASC, id ASC), so
838
+ // the last note in the array is the new watermark. When no cursor
839
+ // was passed, the SQL is ordered by created_at; we still want the
840
+ // cursor to advance to the MAX (updated_at, id) of this page so
841
+ // the next call resumes correctly. Compute the max explicitly.
842
+ for (const note of notes) {
843
+ const updatedIso = note.updatedAt ?? note.createdAt;
844
+ const ms = isoToMillis(updatedIso);
845
+ if (ms > lastUpdatedAt || (ms === lastUpdatedAt && note.id > lastId)) {
846
+ lastUpdatedAt = ms;
847
+ lastId = note.id;
848
+ }
849
+ }
850
+ }
851
+
852
+ const next_cursor = encodeCursor({
853
+ v: CURSOR_VERSION,
854
+ last_updated_at: lastUpdatedAt,
855
+ last_id: lastId,
856
+ query_hash: queryHash,
857
+ });
858
+
859
+ return { notes, next_cursor };
860
+ }
861
+
700
862
  export function searchNotes(
701
863
  db: Database,
702
864
  query: string,
@@ -200,12 +200,108 @@ CREATE INDEX IF NOT EXISTS idx_links_target ON links(target_id);
200
200
  -- because migrateToV16 also runs the unconditional CREATE INDEX path.
201
201
  `;
202
202
 
203
+ /**
204
+ * Connection-level pragmas applied on every Database open, in the order they
205
+ * appear here.
206
+ *
207
+ * `journal_mode = WAL` is a persistent, DB-level setting (lives in the SQLite
208
+ * header). Once any writer flips a DB into WAL it stays in WAL across opens
209
+ * and processes — so daemon + CLI + parachute-runner + any read-side tool
210
+ * see the same mode. Re-applying on every open is cheap and idempotent;
211
+ * SQLite returns the current mode either way.
212
+ *
213
+ * `synchronous = NORMAL` is the safe, recommended pairing with WAL per the
214
+ * SQLite docs: fsync only at checkpoint rather than on every commit. Crash
215
+ * safety is preserved (WAL frames are still ordered + checksummed); the only
216
+ * cost vs FULL is that an OS-level crash *between* checkpoints might lose
217
+ * the last transaction. Acceptable for a knowledge graph that's snapshotted
218
+ * by `VACUUM INTO` for backups.
219
+ *
220
+ * `wal_autocheckpoint = 1000` is SQLite's default; we set it explicitly so
221
+ * the contract is visible in code rather than implicit. 1000 pages ≈ 4MB
222
+ * before a passive checkpoint is triggered on the next write.
223
+ *
224
+ * `foreign_keys = ON` is per-connection (not persistent) — must be re-applied
225
+ * on every open. Migrations occasionally disable it transiently (see
226
+ * migrateToV14's BEGIN IMMEDIATE block); the boot path re-enables.
227
+ *
228
+ * WAL requires a filesystem that supports memory-mapped shared-memory
229
+ * (the `-shm` sidecar). NFS, some FUSE mounts, and a few Docker volume
230
+ * drivers don't qualify and silently fall back to the prior journal mode
231
+ * (typically `delete`). `applyConnectionPragmas` detects this and returns
232
+ * `wal: false` so the caller can log a warning — operators on those
233
+ * filesystems should know they've lost multi-process concurrency.
234
+ */
235
+ const APPLY_PRAGMAS_LOGGED = new WeakSet<Database>();
236
+
237
+ export interface ConnectionPragmaResult {
238
+ /** True when the connection ended up in WAL mode. False means the FS doesn't support WAL. */
239
+ wal: boolean;
240
+ /** The actual journal_mode SQLite reports — "wal", "delete", "memory", etc. */
241
+ journalMode: string;
242
+ }
243
+
244
+ /**
245
+ * Apply connection-level pragmas (journal mode, synchronous, FK enforcement)
246
+ * and verify WAL took effect. Idempotent — safe to call multiple times on
247
+ * the same connection. Logs a one-time warning per connection when WAL
248
+ * couldn't be applied.
249
+ *
250
+ * Exported for read-side callers (auth-status, mirror-manager, etc.) that
251
+ * open a Database directly without going through initSchema. Setting
252
+ * `journal_mode` on a read-only handle is a no-op but harmless; the
253
+ * useful state is set by whichever writer opens first.
254
+ */
255
+ export function applyConnectionPragmas(db: Database): ConnectionPragmaResult {
256
+ // PRAGMA journal_mode returns a row { journal_mode: "wal" } on success.
257
+ // Use `.get()` (not `.exec()`) so we capture the result. Some bun:sqlite
258
+ // versions throw on readonly handles attempting to set journal_mode; treat
259
+ // that as "we couldn't set it, just read the current value" and recover.
260
+ let journalMode: string;
261
+ try {
262
+ const row = db.prepare("PRAGMA journal_mode = WAL").get() as { journal_mode?: string } | null;
263
+ journalMode = (row?.journal_mode ?? "").toLowerCase();
264
+ } catch {
265
+ // Most likely: readonly handle. Read-only opens never write the DB
266
+ // header, so they can't change journal_mode — but they can still query
267
+ // the current mode, which is set by the most recent writer.
268
+ const row = db.prepare("PRAGMA journal_mode").get() as { journal_mode?: string } | null;
269
+ journalMode = (row?.journal_mode ?? "").toLowerCase();
270
+ }
271
+ const wal = journalMode === "wal";
272
+
273
+ // synchronous + wal_autocheckpoint only matter when WAL is active. They're
274
+ // harmless under DELETE mode but the rationale is WAL-specific, so gate
275
+ // them on the success path. Both are best-effort — wrap in try to keep
276
+ // readonly handles (which reject writes) from failing the whole open.
277
+ if (wal) {
278
+ try { db.exec("PRAGMA synchronous = NORMAL"); } catch {}
279
+ try { db.exec("PRAGMA wal_autocheckpoint = 1000"); } catch {}
280
+ } else if (journalMode !== "memory" && !APPLY_PRAGMAS_LOGGED.has(db)) {
281
+ // `journalMode === "memory"` ⇒ this is a `:memory:` database, an
282
+ // explicit choice (tests, ephemeral probes) rather than a filesystem
283
+ // limitation. Suppress the warning so the test suite stays quiet;
284
+ // real on-disk vaults that can't host WAL (NFS, some FUSE/Docker
285
+ // volume drivers) still surface the diagnostic.
286
+ APPLY_PRAGMAS_LOGGED.add(db);
287
+ // eslint-disable-next-line no-console
288
+ console.warn(
289
+ `[vault] WAL mode could not be enabled (journal_mode=${journalMode || "unknown"}). ` +
290
+ `The underlying filesystem may not support WAL (NFS, some FUSE/Docker volume drivers). ` +
291
+ `Multi-process concurrent access will be limited to a single writer at a time.`,
292
+ );
293
+ }
294
+
295
+ try { db.exec("PRAGMA foreign_keys = ON"); } catch {}
296
+
297
+ return { wal, journalMode };
298
+ }
299
+
203
300
  /**
204
301
  * Initialize database schema. Idempotent — safe to call on every startup.
205
302
  */
206
303
  export function initSchema(db: Database): void {
207
- db.exec("PRAGMA journal_mode = WAL");
208
- db.exec("PRAGMA foreign_keys = ON");
304
+ applyConnectionPragmas(db);
209
305
 
210
306
  // Check if we need to migrate from v2
211
307
  const hasOldTables = hasTable(db, "things");
package/core/src/store.ts CHANGED
@@ -1,5 +1,5 @@
1
1
  import { Database } from "bun:sqlite";
2
- import type { Store, Note, Link, Attachment, QueryOpts } from "./types.js";
2
+ import type { Store, Note, Link, Attachment, QueryOpts, QueryNotesPage } from "./types.js";
3
3
  import { initSchema } from "./schema.js";
4
4
  import * as noteOps from "./notes.js";
5
5
  import * as linkOps from "./links.js";
@@ -227,6 +227,16 @@ export class BunSqliteStore implements Store {
227
227
  return noteOps.queryNotes(this.db, this.expandQueryTags(opts));
228
228
  }
229
229
 
230
+ async queryNotesPaged(opts: QueryOpts): Promise<QueryNotesPage> {
231
+ // Hierarchy expansion happens internally — but importantly the cursor's
232
+ // query_hash is computed from the CALLER'S opts (pre-expansion), so a
233
+ // tag hierarchy edit between calls invalidates the cursor (different
234
+ // descendant set → different rows match → caller should restart). The
235
+ // alternative — hash the expanded set — would silently keep returning
236
+ // stale results from a hierarchy snapshot the caller never saw.
237
+ return noteOps.queryNotesPaged(this.db, this.expandQueryTags(opts));
238
+ }
239
+
230
240
  /**
231
241
  * If `tags` are present, attach a parallel `_tagsExpanded` array where
232
242
  * each input tag is replaced with `{tag} ∪ descendants(tag)`. The SQL
package/core/src/types.ts CHANGED
@@ -116,6 +116,30 @@ export interface QueryOpts {
116
116
  orderBy?: string;
117
117
  limit?: number;
118
118
  offset?: number;
119
+ /**
120
+ * Opaque cursor for "since last checked" agent loops (vault#313).
121
+ * When passed, the engine decodes it, verifies its `query_hash` matches
122
+ * the current query (mismatch → CursorError `cursor_query_mismatch`),
123
+ * and adds a keyset predicate that returns only rows newer than the
124
+ * cursor's `updated_at`/`id` watermark. Forces `orderBy = updated_at`
125
+ * (with `id` as a stable tiebreaker) so the watermark math is sound.
126
+ *
127
+ * Cursors are minted by `queryNotesPaged` (engine) and surfaced via
128
+ * the `query-notes` MCP tool's `next_cursor` field; callers should
129
+ * treat the string as opaque.
130
+ */
131
+ cursor?: string;
132
+ }
133
+
134
+ /**
135
+ * Cursor-paginated query result (vault#313). Returned by
136
+ * `queryNotesPaged`/`storeQueryNotesPaged`. `next_cursor` always advances —
137
+ * even on an empty result page — so an agent loop can persist a single
138
+ * watermark and keep polling.
139
+ */
140
+ export interface QueryNotesPage {
141
+ notes: Note[];
142
+ next_cursor: string;
119
143
  }
120
144
 
121
145
  /** Note summary — everything except content. Used in link results. */
@@ -184,6 +208,14 @@ export interface Store {
184
208
  syncAllWikilinks(): Promise<{ synced: number; totalAdded: number; totalRemoved: number }>;
185
209
  deleteNote(id: string): Promise<void>;
186
210
  queryNotes(opts: QueryOpts): Promise<Note[]>;
211
+ /**
212
+ * Cursor-paginated `queryNotes` (vault#313). Returns the same notes plus
213
+ * an opaque `next_cursor` string the caller can pass on the next call
214
+ * to resume from the watermark of the LAST returned row. The cursor is
215
+ * always present in the response — even on an empty page — so an
216
+ * agent loop can persist a single watermark and keep polling.
217
+ */
218
+ queryNotesPaged(opts: QueryOpts): Promise<QueryNotesPage>;
187
219
  searchNotes(query: string, opts?: { tags?: string[]; limit?: number }): Promise<Note[]>;
188
220
 
189
221
  // Tags
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@openparachute/vault",
3
- "version": "0.4.7-rc.2",
3
+ "version": "0.4.8-rc.6",
4
4
  "description": "Agent-native knowledge graph. Notes, tags, links over MCP.",
5
5
  "module": "src/cli.ts",
6
6
  "type": "module",
@@ -40,6 +40,10 @@ export interface AuthStatusResponse {
40
40
  * caller's signal to degrade `hasTokens` to `null`.
41
41
  */
42
42
  function vaultHasTokens(dbPath: string): boolean {
43
+ // Readonly handle — no pragma application here. Journal mode is a
44
+ // persistent DB-header setting written by the first writer (the daemon's
45
+ // BunSqliteStore via openVaultDb), so this probe sees WAL automatically
46
+ // and is safe under concurrent writes.
43
47
  const db = new Database(dbPath, { readonly: true });
44
48
  try {
45
49
  const row = db.prepare("SELECT 1 FROM tokens LIMIT 1").get();
@@ -0,0 +1,116 @@
1
+ /**
2
+ * Auto-transcribe gating decisions (vault#353).
3
+ *
4
+ * Three independent guards: mime-type prefix, enabled toggle, scribe URL
5
+ * present. Pure function — exercise the truth table.
6
+ */
7
+
8
+ import { describe, test, expect } from "bun:test";
9
+ import { shouldAutoTranscribe } from "./auto-transcribe.ts";
10
+
11
+ function readGlobalConfig(enabled: boolean | undefined) {
12
+ return () => ({
13
+ port: 1940,
14
+ ...(enabled !== undefined ? { auto_transcribe: { enabled } } : {}),
15
+ }) as any;
16
+ }
17
+
18
+ describe("shouldAutoTranscribe", () => {
19
+ const scribePresent = () => "http://127.0.0.1:1943";
20
+ const scribeAbsent = () => undefined;
21
+
22
+ test("triggers on audio/* mime-type when enabled + scribe reachable", () => {
23
+ expect(shouldAutoTranscribe("audio/wav", {
24
+ readGlobalConfigImpl: readGlobalConfig(true),
25
+ getCachedScribeUrlImpl: scribePresent,
26
+ })).toBe(true);
27
+ });
28
+
29
+ test("triggers on audio/mp4 (m4a)", () => {
30
+ expect(shouldAutoTranscribe("audio/mp4", {
31
+ readGlobalConfigImpl: readGlobalConfig(true),
32
+ getCachedScribeUrlImpl: scribePresent,
33
+ })).toBe(true);
34
+ });
35
+
36
+ test("triggers on audio/webm", () => {
37
+ expect(shouldAutoTranscribe("audio/webm", {
38
+ readGlobalConfigImpl: readGlobalConfig(true),
39
+ getCachedScribeUrlImpl: scribePresent,
40
+ })).toBe(true);
41
+ });
42
+
43
+ test("triggers case-insensitively (AUDIO/WAV)", () => {
44
+ expect(shouldAutoTranscribe("AUDIO/WAV", {
45
+ readGlobalConfigImpl: readGlobalConfig(true),
46
+ getCachedScribeUrlImpl: scribePresent,
47
+ })).toBe(true);
48
+ });
49
+
50
+ test("skips non-audio mime-types (image/png, application/pdf, video/mp4)", () => {
51
+ expect(shouldAutoTranscribe("image/png", {
52
+ readGlobalConfigImpl: readGlobalConfig(true),
53
+ getCachedScribeUrlImpl: scribePresent,
54
+ })).toBe(false);
55
+ expect(shouldAutoTranscribe("application/pdf", {
56
+ readGlobalConfigImpl: readGlobalConfig(true),
57
+ getCachedScribeUrlImpl: scribePresent,
58
+ })).toBe(false);
59
+ expect(shouldAutoTranscribe("video/mp4", {
60
+ readGlobalConfigImpl: readGlobalConfig(true),
61
+ getCachedScribeUrlImpl: scribePresent,
62
+ })).toBe(false);
63
+ });
64
+
65
+ test("skips when enabled is false (default off)", () => {
66
+ expect(shouldAutoTranscribe("audio/wav", {
67
+ readGlobalConfigImpl: readGlobalConfig(false),
68
+ getCachedScribeUrlImpl: scribePresent,
69
+ })).toBe(false);
70
+ });
71
+
72
+ test("skips when enabled is unset (no auto_transcribe block in config)", () => {
73
+ expect(shouldAutoTranscribe("audio/wav", {
74
+ readGlobalConfigImpl: readGlobalConfig(undefined),
75
+ getCachedScribeUrlImpl: scribePresent,
76
+ })).toBe(false);
77
+ });
78
+
79
+ test("skips when scribe URL is undefined (no services.json entry, no env)", () => {
80
+ expect(shouldAutoTranscribe("audio/wav", {
81
+ readGlobalConfigImpl: readGlobalConfig(true),
82
+ getCachedScribeUrlImpl: scribeAbsent,
83
+ })).toBe(false);
84
+ });
85
+
86
+ test("skips when scribe URL is empty string", () => {
87
+ expect(shouldAutoTranscribe("audio/wav", {
88
+ readGlobalConfigImpl: readGlobalConfig(true),
89
+ getCachedScribeUrlImpl: () => "",
90
+ })).toBe(false);
91
+ });
92
+
93
+ test("skips on garbage mime-type input", () => {
94
+ expect(shouldAutoTranscribe("", {
95
+ readGlobalConfigImpl: readGlobalConfig(true),
96
+ getCachedScribeUrlImpl: scribePresent,
97
+ })).toBe(false);
98
+ expect(shouldAutoTranscribe("not-a-mime", {
99
+ readGlobalConfigImpl: readGlobalConfig(true),
100
+ getCachedScribeUrlImpl: scribePresent,
101
+ })).toBe(false);
102
+ });
103
+
104
+ test("respects enabledOverride when present", () => {
105
+ expect(shouldAutoTranscribe("audio/wav", {
106
+ readGlobalConfigImpl: readGlobalConfig(false),
107
+ getCachedScribeUrlImpl: scribePresent,
108
+ enabledOverride: true,
109
+ })).toBe(true);
110
+ expect(shouldAutoTranscribe("audio/wav", {
111
+ readGlobalConfigImpl: readGlobalConfig(true),
112
+ getCachedScribeUrlImpl: scribePresent,
113
+ enabledOverride: false,
114
+ })).toBe(false);
115
+ });
116
+ });
@@ -0,0 +1,48 @@
1
+ /**
2
+ * Auto-transcribe trigger decision (vault#353, design 2026-05-21 Part 2).
3
+ *
4
+ * One pure function: given an attachment's mime-type + the operator's
5
+ * settings + whether scribe is reachable, decide whether to enqueue the
6
+ * attachment for the transcription worker. Lives in its own module so the
7
+ * attachment-write code path (`routes.ts`) and the retry endpoint share the
8
+ * same gate without duplicating logic.
9
+ */
10
+
11
+ import { readGlobalConfig } from "./config.ts";
12
+ import { getCachedScribeUrl } from "./scribe-discovery.ts";
13
+
14
+ /**
15
+ * Pre-vault#353 callers passed `transcribe: true` explicitly on the
16
+ * attachment POST. The auto-transcribe path inlines the decision: if the
17
+ * upload is an audio mime-type AND the toggle is on AND scribe is reachable,
18
+ * the worker is enqueued. This function is the single decision site.
19
+ *
20
+ * Returns `true` only when ALL three conditions hold:
21
+ * 1. mime-type starts with `audio/` (case-insensitive).
22
+ * 2. `globalConfig.auto_transcribe?.enabled === true`.
23
+ * 3. Scribe is discoverable (services.json entry OR SCRIBE_URL env).
24
+ *
25
+ * The three conditions are independent guards: a single `false` is sufficient
26
+ * to skip enqueuing. The audio stays as a regular attachment in that case.
27
+ */
28
+ export function shouldAutoTranscribe(
29
+ mimeType: string,
30
+ opts: {
31
+ /** Injection seam for tests — defaults to live globals. */
32
+ readGlobalConfigImpl?: typeof readGlobalConfig;
33
+ getCachedScribeUrlImpl?: () => string | undefined;
34
+ /** Allow per-call enabled override — used by the explicit-opt-in path. */
35
+ enabledOverride?: boolean;
36
+ } = {},
37
+ ): boolean {
38
+ if (typeof mimeType !== "string" || !mimeType.toLowerCase().startsWith("audio/")) {
39
+ return false;
40
+ }
41
+ const enabled = opts.enabledOverride
42
+ ?? (opts.readGlobalConfigImpl ?? readGlobalConfig)().auto_transcribe?.enabled
43
+ ?? false;
44
+ if (!enabled) return false;
45
+ const url = (opts.getCachedScribeUrlImpl ?? getCachedScribeUrl)();
46
+ if (!url || !url.trim()) return false;
47
+ return true;
48
+ }