@openparachute/vault 0.2.4 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (102) hide show
  1. package/.claude/settings.local.json +2 -25
  2. package/CHANGELOG.md +64 -0
  3. package/CLAUDE.md +17 -7
  4. package/README.md +169 -136
  5. package/core/src/core.test.ts +591 -19
  6. package/core/src/hooks.ts +111 -3
  7. package/core/src/indexed-fields.test.ts +285 -0
  8. package/core/src/indexed-fields.ts +238 -0
  9. package/core/src/mcp.ts +127 -6
  10. package/core/src/notes.ts +153 -11
  11. package/core/src/query-operators.ts +174 -0
  12. package/core/src/schema.ts +69 -2
  13. package/core/src/store.ts +95 -1
  14. package/core/src/tag-schemas.ts +5 -0
  15. package/core/src/types.ts +28 -1
  16. package/docs/HTTP_API.md +105 -1
  17. package/docs/auth-model.md +340 -0
  18. package/package/package.json +32 -0
  19. package/package.json +2 -2
  20. package/src/auth.test.ts +83 -114
  21. package/src/auth.ts +68 -6
  22. package/src/backup-launchd.ts +1 -1
  23. package/src/backup.test.ts +1 -1
  24. package/src/backup.ts +18 -17
  25. package/src/bind.test.ts +28 -0
  26. package/src/bind.ts +19 -0
  27. package/src/cli.ts +228 -133
  28. package/src/config-triggers.test.ts +49 -0
  29. package/src/config.test.ts +317 -2
  30. package/src/config.ts +420 -40
  31. package/src/context.test.ts +136 -0
  32. package/src/context.ts +115 -0
  33. package/src/daemon.ts +17 -16
  34. package/src/doctor.test.ts +9 -7
  35. package/src/launchd.test.ts +1 -1
  36. package/src/launchd.ts +6 -6
  37. package/src/mcp-http.ts +75 -21
  38. package/src/mcp-install.test.ts +125 -0
  39. package/src/mcp-install.ts +60 -0
  40. package/src/mcp-tools.ts +34 -96
  41. package/src/module-config.ts +109 -0
  42. package/src/oauth.test.ts +345 -57
  43. package/src/oauth.ts +155 -35
  44. package/src/published.test.ts +2 -2
  45. package/src/routes.ts +209 -33
  46. package/src/routing.test.ts +817 -300
  47. package/src/routing.ts +204 -202
  48. package/src/scopes.test.ts +294 -0
  49. package/src/scopes.ts +253 -0
  50. package/src/scribe-env.test.ts +49 -0
  51. package/src/scribe-env.ts +33 -0
  52. package/src/server.ts +73 -9
  53. package/src/services-manifest.test.ts +140 -0
  54. package/src/services-manifest.ts +99 -0
  55. package/src/systemd.ts +3 -3
  56. package/src/token-store.ts +42 -9
  57. package/src/transcription-worker.test.ts +864 -0
  58. package/src/transcription-worker.ts +501 -0
  59. package/src/triggers.test.ts +191 -1
  60. package/src/triggers.ts +17 -2
  61. package/src/vault.test.ts +693 -77
  62. package/src/version.test.ts +1 -1
  63. package/.playwright-mcp/console-2026-04-14T04-17-25-395Z.log +0 -2
  64. package/.playwright-mcp/console-2026-04-14T04-18-11-767Z.log +0 -1
  65. package/.playwright-mcp/console-2026-04-14T04-19-07-733Z.log +0 -2
  66. package/.playwright-mcp/console-2026-04-14T04-20-45-440Z.log +0 -2
  67. package/.playwright-mcp/page-2026-04-14T04-17-25-536Z.yml +0 -1
  68. package/.playwright-mcp/page-2026-04-14T04-18-11-816Z.yml +0 -1
  69. package/.playwright-mcp/page-2026-04-14T04-18-31-674Z.yml +0 -211
  70. package/.playwright-mcp/page-2026-04-14T04-19-07-795Z.yml +0 -59
  71. package/.playwright-mcp/page-2026-04-14T04-19-36-239Z.yml +0 -232
  72. package/.playwright-mcp/page-2026-04-14T04-19-58-327Z.yml +0 -182
  73. package/.playwright-mcp/page-2026-04-14T04-20-10-517Z.yml +0 -91
  74. package/.playwright-mcp/page-2026-04-14T04-20-14-796Z.yml +0 -70
  75. package/.playwright-mcp/page-2026-04-14T04-20-45-509Z.yml +0 -59
  76. package/religions-abrahamic-filter.png +0 -0
  77. package/religions-buddhism-v2.png +0 -0
  78. package/religions-buddhism.png +0 -0
  79. package/religions-final.png +0 -0
  80. package/religions-v1.png +0 -0
  81. package/religions-v2.png +0 -0
  82. package/religions-zen.png +0 -0
  83. package/web/README.md +0 -73
  84. package/web/bun.lock +0 -827
  85. package/web/eslint.config.js +0 -23
  86. package/web/index.html +0 -15
  87. package/web/package.json +0 -36
  88. package/web/public/favicon.svg +0 -1
  89. package/web/public/icons.svg +0 -24
  90. package/web/src/App.tsx +0 -149
  91. package/web/src/Graph.tsx +0 -200
  92. package/web/src/NoteView.tsx +0 -155
  93. package/web/src/Sidebar.tsx +0 -186
  94. package/web/src/api.ts +0 -21
  95. package/web/src/index.css +0 -50
  96. package/web/src/main.tsx +0 -10
  97. package/web/src/types.ts +0 -37
  98. package/web/src/utils.ts +0 -107
  99. package/web/tsconfig.app.json +0 -25
  100. package/web/tsconfig.json +0 -7
  101. package/web/tsconfig.node.json +0 -24
  102. package/web/vite.config.ts +0 -16
@@ -0,0 +1,501 @@
1
+ /**
2
+ * Event-driven transcription with a safety-net sweep.
3
+ *
4
+ * ## Shape (event-driven happy path, timer-driven failure path)
5
+ *
6
+ * - **Event path (hot):** `POST /api/notes/:id/attachments` with
7
+ * `{transcribe: true}` writes `attachment.metadata.transcribe_status =
8
+ * "pending"` via `store.addAttachment`, which dispatches an
9
+ * `attachment:created` hook. A handler registered via
10
+ * `registerTranscriptionHook` calls `worker.kick()` on the owning vault,
11
+ * so the cycle begins in the microtask after the HTTP response returns —
12
+ * upload latency is not gated on transcription latency.
13
+ * - **Sweep path (safety net):** Every `pollIntervalMs` (default 30s), the
14
+ * worker lists pending attachments across all vaults and runs them. This
15
+ * catches items queued during a server restart, items whose backoff just
16
+ * elapsed, and anything that got orphaned by a dropped hook dispatch.
17
+ *
18
+ * The DB remains the queue — `metadata.transcribe_status = "pending"` is
19
+ * the source of truth; the hook is a shortcut for cache warmth.
20
+ *
21
+ * ## What the worker does per pending attachment
22
+ *
23
+ * 1. Read the audio file from the vault's assets dir.
24
+ * 2. POST it as multipart/form-data to `SCRIBE_URL/v1/audio/transcriptions`
25
+ * (Whisper API shape). Response is `{ text: string }`.
26
+ * 3. On success:
27
+ * - If `note.metadata.transcribe_stub === true`, replace the
28
+ * `_Transcript pending._` placeholder with the transcript, or the
29
+ * whole note body if the placeholder is absent. Clear the stub marker.
30
+ * - Mark `attachment.metadata.transcribe_status = "done"` and record
31
+ * `transcript` + `transcribe_done_at`.
32
+ * - If the vault's `audio_retention` is `"until_transcribed"`, unlink
33
+ * the audio file on disk (the attachment row stays, so the transcript
34
+ * metadata is still addressable).
35
+ * 4. On failure:
36
+ * - Up to `maxAttempts` retries with exponential backoff encoded as
37
+ * `transcribe_backoff_until`. Status stays `"pending"`; the sweep
38
+ * skips ones whose backoff hasn't expired.
39
+ * - After `maxAttempts`, flip status to `"failed"` with `transcribe_error`.
40
+ *
41
+ * ## Concurrency
42
+ *
43
+ * FIFO across all vaults. Hook-driven and sweep-driven paths race on the
44
+ * same attachment if an upload arrives just before a sweep runs; an
45
+ * in-memory `inFlight` set dedupes within the process so we don't double-
46
+ * POST to scribe. Cross-process guarantees still live in the DB — a sweep
47
+ * on another process would see `transcribe_status = "pending"` and try
48
+ * again, which scribe and the metadata writes handle idempotently.
49
+ */
50
+
51
+ import { join, normalize } from "path";
52
+ import { existsSync, readFileSync, unlinkSync } from "fs";
53
+ import type { Store, Attachment } from "../core/src/types.ts";
54
+ import type { HookRegistry } from "../core/src/hooks.ts";
55
+ import { appendContextPart, fetchContextEntries, type ContextPayload } from "./context.ts";
56
+ import type { TriggerIncludeContext } from "./config.ts";
57
+
58
+ /** Placeholder pattern written by Lens's voice-memo stub. */
59
+ const TRANSCRIPT_PLACEHOLDER = /_Transcript pending\._/;
60
+
61
+ /**
62
+ * Body written when transcription reaches a terminal failure (maxAttempts
63
+ * exhausted, or the audio file is missing). This used to be written by
64
+ * Lens's now-removed scribe client; owning it here means a failed upload
65
+ * stops reading "Transcript pending" forever regardless of which client
66
+ * uploaded the audio.
67
+ */
68
+ const TRANSCRIPT_UNAVAILABLE = "_Transcription unavailable._";
69
+
70
+ /**
71
+ * Default sweep cadence (ms). The sweep is the safety net for backoff-
72
+ * queued items, items that arrived while the server was down, or dispatches
73
+ * that got dropped — not the hot path. Fresh uploads land in single-digit
74
+ * ms via the `attachment:created` hook (see `registerTranscriptionHook`).
75
+ *
76
+ * Operators can override this with the `TRANSCRIPTION_SWEEP_MS` env var
77
+ * (read at `startTranscriptionWorker()` time, not module load, so values
78
+ * in `~/.parachute/vault/.env` apply — ES module import happens before
79
+ * `loadEnvFile()` in server.ts). Per-caller override via the
80
+ * `pollIntervalMs` opt wins over both.
81
+ */
82
+ const DEFAULT_POLL_MS = 30_000;
83
+ const DEFAULT_MAX_ATTEMPTS = 3;
84
+ const DEFAULT_TIMEOUT_MS = 120_000;
85
+
86
+ export type AudioRetention = "keep" | "until_transcribed" | "never";
87
+
88
+ export interface TranscriptionWorkerOpts {
89
+ /** Vault names to scan each cycle. */
90
+ vaultList: () => string[];
91
+ /** Get a store for a vault name. */
92
+ getStore: (name: string) => Store;
93
+ /** Scribe base URL (no trailing slash). */
94
+ scribeUrl: string;
95
+ /** Optional bearer token for scribe. */
96
+ scribeToken?: string;
97
+ /** Resolve the assets root for a vault name. */
98
+ resolveAssetsDir: (vault: string) => string;
99
+ /** Per-vault audio retention. Default "keep". */
100
+ getAudioRetention?: (vault: string) => AudioRetention;
101
+ /**
102
+ * Per-vault context predicates for enriching the scribe POST. When present,
103
+ * the worker runs each predicate against the vault store and attaches the
104
+ * resulting entries as a `context` multipart part. Matches triggers'
105
+ * `action.include_context` so scribe sees the same shape via either path.
106
+ * Returning `undefined` or `[]` means no context is attached.
107
+ */
108
+ getContextPredicates?: (vault: string) => TriggerIncludeContext[] | undefined;
109
+ pollIntervalMs?: number;
110
+ maxAttempts?: number;
111
+ timeoutMs?: number;
112
+ fetchImpl?: typeof fetch;
113
+ logger?: { info?: (...args: unknown[]) => void; error: (...args: unknown[]) => void };
114
+ }
115
+
116
+ export interface TranscriptionWorker {
117
+ /** Stop the loop and wait for in-flight work to finish. */
118
+ stop(): Promise<void>;
119
+ /** Run one poll cycle now. Returns number of attachments processed. */
120
+ tick(): Promise<number>;
121
+ /**
122
+ * Process a single attachment immediately. Called by the
123
+ * `attachment:created` hook to short-circuit the sweep wait.
124
+ *
125
+ * Safe to race with `tick()` — an in-memory `inFlight` guard dedupes
126
+ * same-attachment requests within this process. The handler returns
127
+ * once processing finishes (or is skipped as a dup / backoff / non-
128
+ * pending status). Errors are logged and swallowed so a thrown hook
129
+ * handler never crashes the dispatcher.
130
+ */
131
+ kick(vault: string, attachment: Attachment): Promise<void>;
132
+ }
133
+
134
+ interface PendingMeta {
135
+ transcribe_status?: string;
136
+ transcribe_attempts?: number;
137
+ transcribe_backoff_until?: string;
138
+ transcribe_requested_at?: string;
139
+ transcribe_error?: string;
140
+ transcript?: string;
141
+ transcribe_done_at?: string;
142
+ [k: string]: unknown;
143
+ }
144
+
145
+ /**
146
+ * Start the worker loop. Returns a handle with `stop()` + `tick()`.
147
+ * Tests should build the worker and call `tick()` directly; production
148
+ * calls `start()` implicitly by constructing the worker.
149
+ */
150
+ export function startTranscriptionWorker(opts: TranscriptionWorkerOpts): TranscriptionWorker {
151
+ const logger = opts.logger ?? console;
152
+ const fetchImpl = opts.fetchImpl ?? fetch;
153
+ // Precedence: opts.pollIntervalMs > TRANSCRIPTION_SWEEP_MS env > DEFAULT_POLL_MS.
154
+ // Reading env here (not at module scope) means `~/.parachute/vault/.env`
155
+ // values loaded by server.ts still apply, matching how SCRIBE_URL works.
156
+ const envPoll = Number(process.env.TRANSCRIPTION_SWEEP_MS);
157
+ const defaultPollMs = Number.isFinite(envPoll) && envPoll > 0 ? envPoll : DEFAULT_POLL_MS;
158
+ const pollMs = opts.pollIntervalMs ?? defaultPollMs;
159
+ const maxAttempts = opts.maxAttempts ?? DEFAULT_MAX_ATTEMPTS;
160
+ const timeoutMs = opts.timeoutMs ?? DEFAULT_TIMEOUT_MS;
161
+ const retentionFor = opts.getAudioRetention ?? (() => "keep" as const);
162
+
163
+ let stopped = false;
164
+ let inflight: Promise<void> = Promise.resolve();
165
+ let timer: ReturnType<typeof setTimeout> | null = null;
166
+
167
+ /**
168
+ * In-process dedupe: holds attachment IDs currently being worked. The
169
+ * event-driven `kick()` path can race the sweep on the same attachment
170
+ * when an upload lands moments before a tick starts. Without this guard
171
+ * both paths would fetch the audio and POST to scribe twice.
172
+ */
173
+ const inFlightAttachments = new Set<string>();
174
+
175
+ async function processOne(vault: string, attachment: Attachment): Promise<void> {
176
+ // Dedupe: another path (sweep vs hook kick, or a duplicate dispatch)
177
+ // is already working this attachment. Drop — its result is durable
178
+ // in the DB, and the sweep will re-pick anything that truly needs it.
179
+ if (inFlightAttachments.has(attachment.id)) return;
180
+ inFlightAttachments.add(attachment.id);
181
+ try {
182
+ await processOneLocked(vault, attachment);
183
+ } finally {
184
+ inFlightAttachments.delete(attachment.id);
185
+ }
186
+ }
187
+
188
+ /**
189
+ * On a terminal failure (maxAttempts exhausted, or audio file missing),
190
+ * swap the stub placeholder for the "unavailable" marker — otherwise
191
+ * Lens's voice memo sits reading "Transcript pending" forever. Mirrors
192
+ * the success-path note write in shape: only touches the note when
193
+ * `transcribe_stub === true`, clears the stub marker, uses `skipUpdatedAt`
194
+ * so the note's modification time still reflects user intent. Errors
195
+ * are logged and swallowed so a note-write failure doesn't mask the
196
+ * attachment failure we're trying to record.
197
+ */
198
+ async function applyFailureMarker(store: Store, noteId: string): Promise<void> {
199
+ const note = await store.getNote(noteId);
200
+ if (!note) return;
201
+ const noteMeta = (note.metadata as Record<string, unknown> | undefined) ?? {};
202
+ if (noteMeta.transcribe_stub !== true) return;
203
+
204
+ const body = TRANSCRIPT_PLACEHOLDER.test(note.content)
205
+ ? note.content.replace(TRANSCRIPT_PLACEHOLDER, TRANSCRIPT_UNAVAILABLE)
206
+ : TRANSCRIPT_UNAVAILABLE;
207
+ const { transcribe_stub: _drop, ...restMeta } = noteMeta;
208
+ try {
209
+ await store.updateNote(note.id, {
210
+ content: body,
211
+ metadata: restMeta,
212
+ skipUpdatedAt: true,
213
+ });
214
+ } catch (err) {
215
+ logger.error(`[transcribe] failed to apply failure marker to note ${note.id}:`, err);
216
+ }
217
+ }
218
+
219
+ async function processOneLocked(vault: string, attachment: Attachment): Promise<void> {
220
+ const store = opts.getStore(vault);
221
+ // Re-read metadata — the in-memory `attachment` may be stale (the hook
222
+ // path hands us the row from just after insert; a concurrent completion
223
+ // in another path may have already flipped status). Skip if not pending.
224
+ const fresh = (await store.getAttachment(attachment.id)) ?? attachment;
225
+ const meta: PendingMeta = { ...(fresh.metadata ?? {}) };
226
+ if (meta.transcribe_status !== "pending") return;
227
+
228
+ const attempts = (meta.transcribe_attempts as number | undefined) ?? 0;
229
+
230
+ // Honor backoff — we re-check here in case another tick queued this
231
+ // attachment between the listing and now.
232
+ if (meta.transcribe_backoff_until) {
233
+ const until = Date.parse(String(meta.transcribe_backoff_until));
234
+ if (Number.isFinite(until) && until > Date.now()) return;
235
+ }
236
+
237
+ const assetsRoot = opts.resolveAssetsDir(vault);
238
+ const filePath = normalize(join(assetsRoot, attachment.path));
239
+ if (!filePath.startsWith(normalize(assetsRoot)) || !existsSync(filePath)) {
240
+ // Audio gone — nothing to transcribe. Mark failed so we don't loop.
241
+ await store.setAttachmentMetadata(attachment.id, {
242
+ ...meta,
243
+ transcribe_status: "failed",
244
+ transcribe_error: "audio file not found",
245
+ });
246
+ await applyFailureMarker(store, attachment.noteId);
247
+ return;
248
+ }
249
+
250
+ // Fetch context predicates for this vault. Errors are logged inside
251
+ // fetchContextEntries — we always have a payload (possibly empty) to
252
+ // pass through, so a bad predicate doesn't block transcription.
253
+ let context: ContextPayload | null = null;
254
+ const predicates = opts.getContextPredicates?.(vault);
255
+ if (predicates && predicates.length) {
256
+ context = await fetchContextEntries(store, predicates, logger);
257
+ }
258
+
259
+ let transcript: string;
260
+ try {
261
+ transcript = await callScribe({
262
+ url: opts.scribeUrl,
263
+ token: opts.scribeToken,
264
+ filePath,
265
+ filename: attachment.path.split("/").pop() ?? "audio",
266
+ mimeType: attachment.mimeType,
267
+ context,
268
+ timeoutMs,
269
+ fetchImpl,
270
+ });
271
+ } catch (err) {
272
+ const nextAttempts = attempts + 1;
273
+ const errMsg = err instanceof Error ? err.message : String(err);
274
+ if (nextAttempts >= maxAttempts) {
275
+ logger.error(`[transcribe] giving up on attachment ${attachment.id} after ${nextAttempts} attempts:`, errMsg);
276
+ await store.setAttachmentMetadata(attachment.id, {
277
+ ...meta,
278
+ transcribe_status: "failed",
279
+ transcribe_attempts: nextAttempts,
280
+ transcribe_error: errMsg,
281
+ });
282
+ await applyFailureMarker(store, attachment.noteId);
283
+ // retention=never drops the audio on any terminal state, including
284
+ // failure. The user opted in to "I don't want the audio kept around
285
+ // regardless of outcome" — honor it.
286
+ if (retentionFor(vault) === "never") {
287
+ unlinkIfSafe(filePath, assetsRoot, logger);
288
+ }
289
+ return;
290
+ }
291
+ // Exponential backoff: 30s, 2m, 8m, ...
292
+ const backoffMs = 30_000 * Math.pow(4, nextAttempts - 1);
293
+ const backoffUntil = new Date(Date.now() + backoffMs).toISOString();
294
+ logger.error(`[transcribe] attachment ${attachment.id} attempt ${nextAttempts} failed; retrying at ${backoffUntil}:`, errMsg);
295
+ await store.setAttachmentMetadata(attachment.id, {
296
+ ...meta,
297
+ transcribe_status: "pending",
298
+ transcribe_attempts: nextAttempts,
299
+ transcribe_backoff_until: backoffUntil,
300
+ transcribe_error: errMsg,
301
+ });
302
+ return;
303
+ }
304
+
305
+ // Success. Apply to note if the caller still wants us to.
306
+ const note = await store.getNote(attachment.noteId);
307
+ if (note) {
308
+ const noteMeta = (note.metadata as Record<string, unknown> | undefined) ?? {};
309
+ if (noteMeta.transcribe_stub === true) {
310
+ const body = TRANSCRIPT_PLACEHOLDER.test(note.content)
311
+ ? note.content.replace(TRANSCRIPT_PLACEHOLDER, transcript)
312
+ : transcript;
313
+ const { transcribe_stub: _drop, ...restMeta } = noteMeta;
314
+ try {
315
+ await store.updateNote(note.id, {
316
+ content: body,
317
+ metadata: restMeta,
318
+ skipUpdatedAt: true,
319
+ });
320
+ } catch (err) {
321
+ logger.error(`[transcribe] failed to apply transcript to note ${note.id}:`, err);
322
+ }
323
+ }
324
+ }
325
+
326
+ // Always record the transcript on the attachment, even if the note
327
+ // already moved on — the transcript is otherwise discarded.
328
+ const doneMeta: PendingMeta = {
329
+ ...meta,
330
+ transcribe_status: "done",
331
+ transcribe_attempts: attempts + 1,
332
+ transcribe_done_at: new Date().toISOString(),
333
+ transcript,
334
+ };
335
+ delete doneMeta.transcribe_backoff_until;
336
+ delete doneMeta.transcribe_error;
337
+ await store.setAttachmentMetadata(attachment.id, doneMeta);
338
+
339
+ // Retention: drop the file but keep the row so the transcript stays
340
+ // addressable. "until_transcribed" and "never" both unlink on success.
341
+ const retention = retentionFor(vault);
342
+ if (retention === "until_transcribed" || retention === "never") {
343
+ unlinkIfSafe(filePath, assetsRoot, logger);
344
+ }
345
+ }
346
+
347
+ function unlinkIfSafe(
348
+ filePath: string,
349
+ assetsRoot: string,
350
+ logger: { error: (...args: unknown[]) => void },
351
+ ): void {
352
+ try {
353
+ if (filePath.startsWith(normalize(assetsRoot)) && existsSync(filePath)) {
354
+ unlinkSync(filePath);
355
+ }
356
+ } catch (err) {
357
+ logger.error(`[transcribe] retention unlink failed for ${filePath}:`, err);
358
+ }
359
+ }
360
+
361
+ async function tick(): Promise<number> {
362
+ let processed = 0;
363
+ for (const vault of opts.vaultList()) {
364
+ const store = opts.getStore(vault);
365
+ let pending: Attachment[];
366
+ try {
367
+ pending = await store.listAttachmentsByTranscribeStatus("pending", 50);
368
+ } catch (err) {
369
+ logger.error(`[transcribe] list failed for vault "${vault}":`, err);
370
+ continue;
371
+ }
372
+
373
+ for (const attachment of pending) {
374
+ if (stopped) return processed;
375
+ // Backoff gate — skip without touching.
376
+ const meta = (attachment.metadata as PendingMeta | undefined) ?? {};
377
+ if (meta.transcribe_backoff_until) {
378
+ const until = Date.parse(String(meta.transcribe_backoff_until));
379
+ if (Number.isFinite(until) && until > Date.now()) continue;
380
+ }
381
+ try {
382
+ await processOne(vault, attachment);
383
+ processed++;
384
+ } catch (err) {
385
+ logger.error(`[transcribe] unexpected error on attachment ${attachment.id}:`, err);
386
+ }
387
+ }
388
+ }
389
+ return processed;
390
+ }
391
+
392
+ function schedule(): void {
393
+ if (stopped) return;
394
+ timer = setTimeout(() => {
395
+ inflight = tick().catch((err) => {
396
+ logger.error("[transcribe] tick error:", err);
397
+ }).then(() => {
398
+ schedule();
399
+ });
400
+ }, pollMs);
401
+ }
402
+
403
+ schedule();
404
+
405
+ async function kick(vault: string, attachment: Attachment): Promise<void> {
406
+ if (stopped) return;
407
+ try {
408
+ await processOne(vault, attachment);
409
+ } catch (err) {
410
+ logger.error(`[transcribe] kick error on attachment ${attachment.id}:`, err);
411
+ }
412
+ }
413
+
414
+ return {
415
+ async stop() {
416
+ stopped = true;
417
+ if (timer) { clearTimeout(timer); timer = null; }
418
+ await inflight;
419
+ },
420
+ tick,
421
+ kick,
422
+ };
423
+ }
424
+
425
+ /**
426
+ * Wire the transcription worker up as an `attachment:created` hook. This
427
+ * is the event-driven fast path — when a new attachment is inserted with
428
+ * `transcribe_status = "pending"`, the hook fires within a microtask and
429
+ * the worker begins processing without waiting for the next sweep.
430
+ *
431
+ * `resolveVault(store)` maps the store handle delivered to the hook back
432
+ * to its vault name (needed so the worker can resolve the assets dir,
433
+ * retention policy, and context predicates). Returns an unregister
434
+ * function so tests can tear down cleanly.
435
+ */
436
+ export function registerTranscriptionHook(
437
+ registry: HookRegistry,
438
+ worker: TranscriptionWorker,
439
+ resolveVault: (store: Store) => string | undefined,
440
+ logger: { error: (...args: unknown[]) => void } = console,
441
+ ): () => void {
442
+ return registry.onAttachment({
443
+ name: "transcription-kickoff",
444
+ event: "created",
445
+ when: (att) =>
446
+ (att.metadata as { transcribe_status?: string } | undefined)
447
+ ?.transcribe_status === "pending",
448
+ handler: async (attachment, store) => {
449
+ const vault = resolveVault(store);
450
+ if (!vault) {
451
+ logger.error(
452
+ `[transcribe] could not resolve vault for attachment ${attachment.id}; sweep will pick it up`,
453
+ );
454
+ return;
455
+ }
456
+ await worker.kick(vault, attachment);
457
+ },
458
+ });
459
+ }
460
+
461
+ async function callScribe(args: {
462
+ url: string;
463
+ token?: string;
464
+ filePath: string;
465
+ filename: string;
466
+ mimeType: string;
467
+ context: ContextPayload | null;
468
+ timeoutMs: number;
469
+ fetchImpl: typeof fetch;
470
+ }): Promise<string> {
471
+ const controller = new AbortController();
472
+ const timer = setTimeout(() => controller.abort(), args.timeoutMs);
473
+ try {
474
+ const fileBuffer = readFileSync(args.filePath);
475
+ const file = new File([fileBuffer], args.filename, { type: args.mimeType });
476
+ const form = new FormData();
477
+ form.append("file", file);
478
+ if (args.context) appendContextPart(form, args.context);
479
+
480
+ const endpoint = `${args.url.replace(/\/$/, "")}/v1/audio/transcriptions`;
481
+ const headers: Record<string, string> = {};
482
+ if (args.token) headers["Authorization"] = `Bearer ${args.token}`;
483
+
484
+ const resp = await args.fetchImpl(endpoint, {
485
+ method: "POST",
486
+ headers,
487
+ body: form,
488
+ signal: controller.signal,
489
+ });
490
+ if (!resp.ok) {
491
+ throw new Error(`scribe returned ${resp.status}: ${await resp.text().catch(() => "")}`);
492
+ }
493
+ const result = await resp.json() as { text?: string };
494
+ if (typeof result.text !== "string") {
495
+ throw new Error("scribe response missing text field");
496
+ }
497
+ return result.text;
498
+ } finally {
499
+ clearTimeout(timer);
500
+ }
501
+ }