@openparachute/vault 0.3.0-rc.1 → 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -4,7 +4,9 @@ import { mkdirSync, rmSync, writeFileSync, existsSync } from "fs";
4
4
  import { join } from "path";
5
5
  import { tmpdir } from "os";
6
6
  import { BunStore } from "./vault-store.ts";
7
- import { startTranscriptionWorker } from "./transcription-worker.ts";
7
+ import { startTranscriptionWorker, registerTranscriptionHook } from "./transcription-worker.ts";
8
+ import { HookRegistry } from "../core/src/hooks.ts";
9
+ import { SqliteStore } from "../core/src/store.ts";
8
10
  import type { Store } from "../core/src/types.ts";
9
11
 
10
12
  let db: Database;
@@ -202,6 +204,101 @@ describe("transcription worker", () => {
202
204
  expect(att.metadata?.transcribe_error).toContain("boom");
203
205
  });
204
206
 
207
+ test("terminal failure with stub=true → note shows 'Transcription unavailable' and stub is cleared", async () => {
208
+ // Mirrors Lens's voice-memo stub shape: note with placeholder body and
209
+ // transcribe_stub marker, attachment pre-loaded near the retry limit.
210
+ await store.createNote(
211
+ "# 🎙️ Voice memo\n\n_Transcript pending._\n",
212
+ { id: "unavail1", metadata: { transcribe_stub: true } },
213
+ );
214
+ seedAudio("memos/unavail1.webm");
215
+ await store.addAttachment("unavail1", "memos/unavail1.webm", "audio/webm", {
216
+ transcribe_status: "pending",
217
+ transcribe_attempts: 2,
218
+ });
219
+
220
+ const worker = makeWorker({
221
+ fetchImpl: mkFetchMock([{ error: "scribe down hard", status: 500 }]),
222
+ maxAttempts: 3,
223
+ });
224
+ try {
225
+ await worker.tick();
226
+ } finally {
227
+ await worker.stop();
228
+ }
229
+
230
+ const note = await store.getNote("unavail1");
231
+ expect(note!.content).toBe("# 🎙️ Voice memo\n\n_Transcription unavailable._\n");
232
+ expect((note!.metadata as any)?.transcribe_stub).toBeUndefined();
233
+
234
+ const [att] = await store.getAttachments("unavail1");
235
+ expect(att!.metadata?.transcribe_status).toBe("failed");
236
+ expect(att!.metadata?.transcribe_error).toContain("scribe down hard");
237
+ });
238
+
239
+ test("audio-not-found with stub=true → note shows 'Transcription unavailable' and stub is cleared", async () => {
240
+ await store.createNote(
241
+ "# 🎙️ Voice memo\n\n_Transcript pending._\n",
242
+ { id: "unavail2", metadata: { transcribe_stub: true } },
243
+ );
244
+ // No seedAudio — the file is deliberately missing.
245
+ await store.addAttachment("unavail2", "memos/gone.webm", "audio/webm", {
246
+ transcribe_status: "pending",
247
+ });
248
+
249
+ let called = 0;
250
+ const worker = makeWorker({
251
+ fetchImpl: (async () => {
252
+ called++;
253
+ return new Response("x", { status: 200 });
254
+ }) as typeof fetch,
255
+ });
256
+ try {
257
+ await worker.tick();
258
+ } finally {
259
+ await worker.stop();
260
+ }
261
+
262
+ // Scribe was never called — audio-missing check short-circuits before
263
+ // the network call, same as before. What's new is the note rewrite.
264
+ expect(called).toBe(0);
265
+
266
+ const note = await store.getNote("unavail2");
267
+ expect(note!.content).toBe("# 🎙️ Voice memo\n\n_Transcription unavailable._\n");
268
+ expect((note!.metadata as any)?.transcribe_stub).toBeUndefined();
269
+
270
+ const [att] = await store.getAttachments("unavail2");
271
+ expect(att!.metadata?.transcribe_status).toBe("failed");
272
+ expect(att!.metadata?.transcribe_error).toContain("audio file not found");
273
+ });
274
+
275
+ test("terminal failure with stub=false → note content is NOT touched", async () => {
276
+ // User edited the note after upload, which cleared the stub marker.
277
+ // Worker must not clobber their edit even though transcription failed.
278
+ await store.createNote("my own words", { id: "unavail3" });
279
+ seedAudio("memos/unavail3.webm");
280
+ await store.addAttachment("unavail3", "memos/unavail3.webm", "audio/webm", {
281
+ transcribe_status: "pending",
282
+ transcribe_attempts: 2,
283
+ });
284
+
285
+ const worker = makeWorker({
286
+ fetchImpl: mkFetchMock([{ error: "boom", status: 500 }]),
287
+ maxAttempts: 3,
288
+ });
289
+ try {
290
+ await worker.tick();
291
+ } finally {
292
+ await worker.stop();
293
+ }
294
+
295
+ const note = await store.getNote("unavail3");
296
+ expect(note!.content).toBe("my own words");
297
+
298
+ const [att] = await store.getAttachments("unavail3");
299
+ expect(att!.metadata?.transcribe_status).toBe("failed");
300
+ });
301
+
205
302
  test("FIFO: oldest pending is processed first", async () => {
206
303
  await store.createNote("s", { id: "f1", metadata: { transcribe_stub: true } });
207
304
  await store.createNote("s", { id: "f2", metadata: { transcribe_stub: true } });
@@ -581,3 +678,187 @@ describe("store.listAttachmentsByTranscribeStatus", () => {
581
678
  expect(done[0]!.path).toBe("a.webm");
582
679
  });
583
680
  });
681
+
682
+ describe("transcription worker — hook-driven", () => {
683
+ // These tests use a private HookRegistry so they don't collide with
684
+ // defaultHookRegistry state or other test files.
685
+ let hooks: HookRegistry;
686
+ let hookedStore: SqliteStore;
687
+ let hookedDb: Database;
688
+
689
+ beforeEach(() => {
690
+ hookedDb = new Database(":memory:");
691
+ hooks = new HookRegistry({ concurrency: 4, logger: silentLogger });
692
+ hookedStore = new SqliteStore(hookedDb, { hooks });
693
+ });
694
+
695
+ afterEach(() => {
696
+ hookedDb.close();
697
+ });
698
+
699
+ test("attachment:created event triggers a cycle before the sweep fires", async () => {
700
+ await hookedStore.createNote("stub", { id: "h1", metadata: { transcribe_stub: true } });
701
+ seedAudio("memos/h1.webm");
702
+
703
+ let callCount = 0;
704
+ const fetchImpl = (async () => {
705
+ callCount++;
706
+ return new Response(JSON.stringify({ text: "hook-path" }), {
707
+ status: 200,
708
+ headers: { "content-type": "application/json" },
709
+ });
710
+ }) as unknown as typeof fetch;
711
+
712
+ const worker = startTranscriptionWorker({
713
+ vaultList: () => ["default"],
714
+ getStore: () => hookedStore as unknown as Store,
715
+ scribeUrl: "http://scribe.test",
716
+ resolveAssetsDir: () => assetsRoot,
717
+ // Sweep would never fire within the test window — we prove the hook
718
+ // path is what drives processing.
719
+ pollIntervalMs: 10_000_000,
720
+ fetchImpl,
721
+ logger: silentLogger,
722
+ });
723
+ registerTranscriptionHook(hooks, worker, () => "default");
724
+
725
+ try {
726
+ const start = Date.now();
727
+ await hookedStore.addAttachment("h1", "memos/h1.webm", "audio/webm", {
728
+ transcribe_status: "pending",
729
+ });
730
+
731
+ // Poll for completion rather than sleep-and-hope — `queueMicrotask` +
732
+ // semaphore acquire + a faked fetch round-trip is well under 50ms but
733
+ // not zero.
734
+ const deadline = start + 500;
735
+ while (Date.now() < deadline) {
736
+ const [att] = await hookedStore.getAttachments("h1");
737
+ if (att?.metadata?.transcribe_status === "done") break;
738
+ await new Promise((r) => setTimeout(r, 5));
739
+ }
740
+ const elapsed = Date.now() - start;
741
+
742
+ expect(callCount).toBe(1);
743
+ expect(elapsed).toBeLessThan(500);
744
+
745
+ const [att] = await hookedStore.getAttachments("h1");
746
+ expect(att!.metadata?.transcribe_status).toBe("done");
747
+ expect(att!.metadata?.transcript).toBe("hook-path");
748
+
749
+ const note = await hookedStore.getNote("h1");
750
+ expect(note!.content).toBe("hook-path");
751
+ } finally {
752
+ await worker.stop();
753
+ await hooks.drain();
754
+ }
755
+ });
756
+
757
+ test("sweep still catches a backoff-queued item after its backoff elapses", async () => {
758
+ await hookedStore.createNote("stub", { id: "h2", metadata: { transcribe_stub: true } });
759
+ seedAudio("memos/h2.webm");
760
+
761
+ // Seed an attachment already in backoff, but with a backoff window that
762
+ // has already elapsed — the sweep should pick it up on the next tick.
763
+ // The hook is registered below, AFTER this insert, so the dispatch at
764
+ // addAttachment time has no subscribers and the event-driven path is
765
+ // never taken. What drives the completion is `worker.tick()` alone.
766
+ const past = new Date(Date.now() - 1_000).toISOString();
767
+ await hookedStore.addAttachment("h2", "memos/h2.webm", "audio/webm", {
768
+ transcribe_status: "pending",
769
+ transcribe_attempts: 1,
770
+ transcribe_backoff_until: past,
771
+ });
772
+
773
+ let calls = 0;
774
+ const fetchImpl = (async () => {
775
+ calls++;
776
+ return new Response(JSON.stringify({ text: "sweep-recovered" }), { status: 200 });
777
+ }) as unknown as typeof fetch;
778
+
779
+ const worker = startTranscriptionWorker({
780
+ vaultList: () => ["default"],
781
+ getStore: () => hookedStore as unknown as Store,
782
+ scribeUrl: "http://scribe.test",
783
+ resolveAssetsDir: () => assetsRoot,
784
+ pollIntervalMs: 10_000_000,
785
+ fetchImpl,
786
+ logger: silentLogger,
787
+ });
788
+ // Hook is registered but won't fire (no new addAttachment inside this
789
+ // test window). The sweep is what we're exercising.
790
+ registerTranscriptionHook(hooks, worker, () => "default");
791
+
792
+ try {
793
+ const processed = await worker.tick();
794
+ expect(processed).toBe(1);
795
+ expect(calls).toBe(1);
796
+
797
+ const [att] = await hookedStore.getAttachments("h2");
798
+ expect(att!.metadata?.transcribe_status).toBe("done");
799
+ expect(att!.metadata?.transcript).toBe("sweep-recovered");
800
+ } finally {
801
+ await worker.stop();
802
+ await hooks.drain();
803
+ }
804
+ });
805
+
806
+ test("back-compat: pending status set without dispatching a hook is picked up by the sweep", async () => {
807
+ // Simulate a row inserted by something other than the hooked store —
808
+ // e.g., a restart resumes with a pre-existing pending attachment, or a
809
+ // migration/backfill that writes directly. The sweep must still drain
810
+ // it even though no `attachment:created` event was dispatched.
811
+ await hookedStore.createNote("stub", { id: "h3", metadata: { transcribe_stub: true } });
812
+ seedAudio("memos/h3.webm");
813
+
814
+ // Insert the attachment directly via raw SQL so no hook dispatches.
815
+ const now = new Date().toISOString();
816
+ hookedDb
817
+ .prepare(
818
+ "INSERT INTO attachments (id, note_id, path, mime_type, metadata, created_at) VALUES (?, ?, ?, ?, ?, ?)",
819
+ )
820
+ .run(
821
+ "att-h3",
822
+ "h3",
823
+ "memos/h3.webm",
824
+ "audio/webm",
825
+ JSON.stringify({ transcribe_status: "pending" }),
826
+ now,
827
+ );
828
+
829
+ let calls = 0;
830
+ const fetchImpl = (async () => {
831
+ calls++;
832
+ return new Response(JSON.stringify({ text: "back-compat-sweep" }), { status: 200 });
833
+ }) as unknown as typeof fetch;
834
+
835
+ const worker = startTranscriptionWorker({
836
+ vaultList: () => ["default"],
837
+ getStore: () => hookedStore as unknown as Store,
838
+ scribeUrl: "http://scribe.test",
839
+ resolveAssetsDir: () => assetsRoot,
840
+ pollIntervalMs: 10_000_000,
841
+ fetchImpl,
842
+ logger: silentLogger,
843
+ });
844
+ registerTranscriptionHook(hooks, worker, () => "default");
845
+
846
+ try {
847
+ // No hook fires — row was inserted via raw SQL. Prove the hook is idle.
848
+ await new Promise((r) => setTimeout(r, 30));
849
+ expect(calls).toBe(0);
850
+
851
+ // Sweep tick drains it.
852
+ const processed = await worker.tick();
853
+ expect(processed).toBe(1);
854
+ expect(calls).toBe(1);
855
+
856
+ const [att] = await hookedStore.getAttachments("h3");
857
+ expect(att!.metadata?.transcribe_status).toBe("done");
858
+ expect(att!.metadata?.transcript).toBe("back-compat-sweep");
859
+ } finally {
860
+ await worker.stop();
861
+ await hooks.drain();
862
+ }
863
+ });
864
+ });
@@ -1,13 +1,22 @@
1
1
  /**
2
- * Background worker that drains pending transcription requests.
2
+ * Event-driven transcription with a safety-net sweep.
3
3
  *
4
- * ## How a request enters the queue
4
+ * ## Shape (event-driven happy path, timer-driven failure path)
5
5
  *
6
- * The caller `POST /api/notes/:id/attachments` with `{transcribe: true}`.
7
- * The route writes `attachment.metadata.transcribe_status = "pending"` and
8
- * sets `note.metadata.transcribe_stub = true` as the opt-in to overwrite.
9
- * The DB is the queue — a server restart resumes the scan without losing
10
- * requests.
6
+ * - **Event path (hot):** `POST /api/notes/:id/attachments` with
7
+ * `{transcribe: true}` writes `attachment.metadata.transcribe_status =
8
+ * "pending"` via `store.addAttachment`, which dispatches an
9
+ * `attachment:created` hook. A handler registered via
10
+ * `registerTranscriptionHook` calls `worker.kick()` on the owning vault,
11
+ * so the cycle begins in the microtask after the HTTP response returns —
12
+ * upload latency is not gated on transcription latency.
13
+ * - **Sweep path (safety net):** Every `pollIntervalMs` (default 30s), the
14
+ * worker lists pending attachments across all vaults and runs them. This
15
+ * catches items queued during a server restart, items whose backoff just
16
+ * elapsed, and anything that got orphaned by a dropped hook dispatch.
17
+ *
18
+ * The DB remains the queue — `metadata.transcribe_status = "pending"` is
19
+ * the source of truth; the hook is a shortcut for cache warmth.
11
20
  *
12
21
  * ## What the worker does per pending attachment
13
22
  *
@@ -25,28 +34,52 @@
25
34
  * metadata is still addressable).
26
35
  * 4. On failure:
27
36
  * - Up to `maxAttempts` retries with exponential backoff encoded as
28
- * `transcribe_backoff_until`. Status stays `"pending"`; we simply skip
29
- * ones whose backoff hasn't expired.
37
+ * `transcribe_backoff_until`. Status stays `"pending"`; the sweep
38
+ * skips ones whose backoff hasn't expired.
30
39
  * - After `maxAttempts`, flip status to `"failed"` with `transcribe_error`.
31
40
  *
32
41
  * ## Concurrency
33
42
  *
34
- * FIFO, one at a time, across all vaults. The poll-then-process loop is
35
- * intentionally simple transcription is already seconds-long and scribe
36
- * is not designed for high concurrency. Scaling to multiple in-flight
37
- * jobs can be added later without changing the wire contract.
43
+ * FIFO across all vaults. Hook-driven and sweep-driven paths race on the
44
+ * same attachment if an upload arrives just before a sweep runs; an
45
+ * in-memory `inFlight` set dedupes within the process so we don't double-
46
+ * POST to scribe. Cross-process guarantees still live in the DB — a sweep
47
+ * on another process would see `transcribe_status = "pending"` and try
48
+ * again, which scribe and the metadata writes handle idempotently.
38
49
  */
39
50
 
40
51
  import { join, normalize } from "path";
41
52
  import { existsSync, readFileSync, unlinkSync } from "fs";
42
53
  import type { Store, Attachment } from "../core/src/types.ts";
54
+ import type { HookRegistry } from "../core/src/hooks.ts";
43
55
  import { appendContextPart, fetchContextEntries, type ContextPayload } from "./context.ts";
44
56
  import type { TriggerIncludeContext } from "./config.ts";
45
57
 
46
58
  /** Placeholder pattern written by Lens's voice-memo stub. */
47
59
  const TRANSCRIPT_PLACEHOLDER = /_Transcript pending\._/;
48
60
 
49
- const DEFAULT_POLL_MS = 5_000;
61
+ /**
62
+ * Body written when transcription reaches a terminal failure (maxAttempts
63
+ * exhausted, or the audio file is missing). This used to be written by
64
+ * Lens's now-removed scribe client; owning it here means a failed upload
65
+ * stops reading "Transcript pending" forever regardless of which client
66
+ * uploaded the audio.
67
+ */
68
+ const TRANSCRIPT_UNAVAILABLE = "_Transcription unavailable._";
69
+
70
+ /**
71
+ * Default sweep cadence (ms). The sweep is the safety net for backoff-
72
+ * queued items, items that arrived while the server was down, or dispatches
73
+ * that got dropped — not the hot path. Fresh uploads land in single-digit
74
+ * ms via the `attachment:created` hook (see `registerTranscriptionHook`).
75
+ *
76
+ * Operators can override this with the `TRANSCRIPTION_SWEEP_MS` env var
77
+ * (read at `startTranscriptionWorker()` time, not module load, so values
78
+ * in `~/.parachute/vault/.env` apply — ES module import happens before
79
+ * `loadEnvFile()` in server.ts). Per-caller override via the
80
+ * `pollIntervalMs` opt wins over both.
81
+ */
82
+ const DEFAULT_POLL_MS = 30_000;
50
83
  const DEFAULT_MAX_ATTEMPTS = 3;
51
84
  const DEFAULT_TIMEOUT_MS = 120_000;
52
85
 
@@ -85,6 +118,17 @@ export interface TranscriptionWorker {
85
118
  stop(): Promise<void>;
86
119
  /** Run one poll cycle now. Returns number of attachments processed. */
87
120
  tick(): Promise<number>;
121
+ /**
122
+ * Process a single attachment immediately. Called by the
123
+ * `attachment:created` hook to short-circuit the sweep wait.
124
+ *
125
+ * Safe to race with `tick()` — an in-memory `inFlight` guard dedupes
126
+ * same-attachment requests within this process. The handler returns
127
+ * once processing finishes (or is skipped as a dup / backoff / non-
128
+ * pending status). Errors are logged and swallowed so a thrown hook
129
+ * handler never crashes the dispatcher.
130
+ */
131
+ kick(vault: string, attachment: Attachment): Promise<void>;
88
132
  }
89
133
 
90
134
  interface PendingMeta {
@@ -106,7 +150,12 @@ interface PendingMeta {
106
150
  export function startTranscriptionWorker(opts: TranscriptionWorkerOpts): TranscriptionWorker {
107
151
  const logger = opts.logger ?? console;
108
152
  const fetchImpl = opts.fetchImpl ?? fetch;
109
- const pollMs = opts.pollIntervalMs ?? DEFAULT_POLL_MS;
153
+ // Precedence: opts.pollIntervalMs > TRANSCRIPTION_SWEEP_MS env > DEFAULT_POLL_MS.
154
+ // Reading env here (not at module scope) means `~/.parachute/vault/.env`
155
+ // values loaded by server.ts still apply, matching how SCRIBE_URL works.
156
+ const envPoll = Number(process.env.TRANSCRIPTION_SWEEP_MS);
157
+ const defaultPollMs = Number.isFinite(envPoll) && envPoll > 0 ? envPoll : DEFAULT_POLL_MS;
158
+ const pollMs = opts.pollIntervalMs ?? defaultPollMs;
110
159
  const maxAttempts = opts.maxAttempts ?? DEFAULT_MAX_ATTEMPTS;
111
160
  const timeoutMs = opts.timeoutMs ?? DEFAULT_TIMEOUT_MS;
112
161
  const retentionFor = opts.getAudioRetention ?? (() => "keep" as const);
@@ -115,9 +164,67 @@ export function startTranscriptionWorker(opts: TranscriptionWorkerOpts): Transcr
115
164
  let inflight: Promise<void> = Promise.resolve();
116
165
  let timer: ReturnType<typeof setTimeout> | null = null;
117
166
 
167
+ /**
168
+ * In-process dedupe: holds attachment IDs currently being worked. The
169
+ * event-driven `kick()` path can race the sweep on the same attachment
170
+ * when an upload lands moments before a tick starts. Without this guard
171
+ * both paths would fetch the audio and POST to scribe twice.
172
+ */
173
+ const inFlightAttachments = new Set<string>();
174
+
118
175
  async function processOne(vault: string, attachment: Attachment): Promise<void> {
176
+ // Dedupe: another path (sweep vs hook kick, or a duplicate dispatch)
177
+ // is already working this attachment. Drop — its result is durable
178
+ // in the DB, and the sweep will re-pick anything that truly needs it.
179
+ if (inFlightAttachments.has(attachment.id)) return;
180
+ inFlightAttachments.add(attachment.id);
181
+ try {
182
+ await processOneLocked(vault, attachment);
183
+ } finally {
184
+ inFlightAttachments.delete(attachment.id);
185
+ }
186
+ }
187
+
188
+ /**
189
+ * On a terminal failure (maxAttempts exhausted, or audio file missing),
190
+ * swap the stub placeholder for the "unavailable" marker — otherwise
191
+ * Lens's voice memo sits reading "Transcript pending" forever. Mirrors
192
+ * the success-path note write in shape: only touches the note when
193
+ * `transcribe_stub === true`, clears the stub marker, uses `skipUpdatedAt`
194
+ * so the note's modification time still reflects user intent. Errors
195
+ * are logged and swallowed so a note-write failure doesn't mask the
196
+ * attachment failure we're trying to record.
197
+ */
198
+ async function applyFailureMarker(store: Store, noteId: string): Promise<void> {
199
+ const note = await store.getNote(noteId);
200
+ if (!note) return;
201
+ const noteMeta = (note.metadata as Record<string, unknown> | undefined) ?? {};
202
+ if (noteMeta.transcribe_stub !== true) return;
203
+
204
+ const body = TRANSCRIPT_PLACEHOLDER.test(note.content)
205
+ ? note.content.replace(TRANSCRIPT_PLACEHOLDER, TRANSCRIPT_UNAVAILABLE)
206
+ : TRANSCRIPT_UNAVAILABLE;
207
+ const { transcribe_stub: _drop, ...restMeta } = noteMeta;
208
+ try {
209
+ await store.updateNote(note.id, {
210
+ content: body,
211
+ metadata: restMeta,
212
+ skipUpdatedAt: true,
213
+ });
214
+ } catch (err) {
215
+ logger.error(`[transcribe] failed to apply failure marker to note ${note.id}:`, err);
216
+ }
217
+ }
218
+
219
+ async function processOneLocked(vault: string, attachment: Attachment): Promise<void> {
119
220
  const store = opts.getStore(vault);
120
- const meta: PendingMeta = { ...(attachment.metadata ?? {}) };
221
+ // Re-read metadata the in-memory `attachment` may be stale (the hook
222
+ // path hands us the row from just after insert; a concurrent completion
223
+ // in another path may have already flipped status). Skip if not pending.
224
+ const fresh = (await store.getAttachment(attachment.id)) ?? attachment;
225
+ const meta: PendingMeta = { ...(fresh.metadata ?? {}) };
226
+ if (meta.transcribe_status !== "pending") return;
227
+
121
228
  const attempts = (meta.transcribe_attempts as number | undefined) ?? 0;
122
229
 
123
230
  // Honor backoff — we re-check here in case another tick queued this
@@ -136,6 +243,7 @@ export function startTranscriptionWorker(opts: TranscriptionWorkerOpts): Transcr
136
243
  transcribe_status: "failed",
137
244
  transcribe_error: "audio file not found",
138
245
  });
246
+ await applyFailureMarker(store, attachment.noteId);
139
247
  return;
140
248
  }
141
249
 
@@ -171,6 +279,7 @@ export function startTranscriptionWorker(opts: TranscriptionWorkerOpts): Transcr
171
279
  transcribe_attempts: nextAttempts,
172
280
  transcribe_error: errMsg,
173
281
  });
282
+ await applyFailureMarker(store, attachment.noteId);
174
283
  // retention=never drops the audio on any terminal state, including
175
284
  // failure. The user opted in to "I don't want the audio kept around
176
285
  // regardless of outcome" — honor it.
@@ -293,6 +402,15 @@ export function startTranscriptionWorker(opts: TranscriptionWorkerOpts): Transcr
293
402
 
294
403
  schedule();
295
404
 
405
+ async function kick(vault: string, attachment: Attachment): Promise<void> {
406
+ if (stopped) return;
407
+ try {
408
+ await processOne(vault, attachment);
409
+ } catch (err) {
410
+ logger.error(`[transcribe] kick error on attachment ${attachment.id}:`, err);
411
+ }
412
+ }
413
+
296
414
  return {
297
415
  async stop() {
298
416
  stopped = true;
@@ -300,9 +418,46 @@ export function startTranscriptionWorker(opts: TranscriptionWorkerOpts): Transcr
300
418
  await inflight;
301
419
  },
302
420
  tick,
421
+ kick,
303
422
  };
304
423
  }
305
424
 
425
+ /**
426
+ * Wire the transcription worker up as an `attachment:created` hook. This
427
+ * is the event-driven fast path — when a new attachment is inserted with
428
+ * `transcribe_status = "pending"`, the hook fires within a microtask and
429
+ * the worker begins processing without waiting for the next sweep.
430
+ *
431
+ * `resolveVault(store)` maps the store handle delivered to the hook back
432
+ * to its vault name (needed so the worker can resolve the assets dir,
433
+ * retention policy, and context predicates). Returns an unregister
434
+ * function so tests can tear down cleanly.
435
+ */
436
+ export function registerTranscriptionHook(
437
+ registry: HookRegistry,
438
+ worker: TranscriptionWorker,
439
+ resolveVault: (store: Store) => string | undefined,
440
+ logger: { error: (...args: unknown[]) => void } = console,
441
+ ): () => void {
442
+ return registry.onAttachment({
443
+ name: "transcription-kickoff",
444
+ event: "created",
445
+ when: (att) =>
446
+ (att.metadata as { transcribe_status?: string } | undefined)
447
+ ?.transcribe_status === "pending",
448
+ handler: async (attachment, store) => {
449
+ const vault = resolveVault(store);
450
+ if (!vault) {
451
+ logger.error(
452
+ `[transcribe] could not resolve vault for attachment ${attachment.id}; sweep will pick it up`,
453
+ );
454
+ return;
455
+ }
456
+ await worker.kick(vault, attachment);
457
+ },
458
+ });
459
+ }
460
+
306
461
  async function callScribe(args: {
307
462
  url: string;
308
463
  token?: string;