@openparachute/vault 0.2.4 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude/settings.local.json +2 -25
- package/CHANGELOG.md +64 -0
- package/CLAUDE.md +17 -7
- package/README.md +169 -136
- package/core/src/core.test.ts +591 -19
- package/core/src/hooks.ts +111 -3
- package/core/src/indexed-fields.test.ts +285 -0
- package/core/src/indexed-fields.ts +238 -0
- package/core/src/mcp.ts +127 -6
- package/core/src/notes.ts +153 -11
- package/core/src/query-operators.ts +174 -0
- package/core/src/schema.ts +69 -2
- package/core/src/store.ts +95 -1
- package/core/src/tag-schemas.ts +5 -0
- package/core/src/types.ts +28 -1
- package/docs/HTTP_API.md +105 -1
- package/docs/auth-model.md +340 -0
- package/package/package.json +32 -0
- package/package.json +2 -2
- package/src/auth.test.ts +83 -114
- package/src/auth.ts +68 -6
- package/src/backup-launchd.ts +1 -1
- package/src/backup.test.ts +1 -1
- package/src/backup.ts +18 -17
- package/src/bind.test.ts +28 -0
- package/src/bind.ts +19 -0
- package/src/cli.ts +228 -133
- package/src/config-triggers.test.ts +49 -0
- package/src/config.test.ts +317 -2
- package/src/config.ts +420 -40
- package/src/context.test.ts +136 -0
- package/src/context.ts +115 -0
- package/src/daemon.ts +17 -16
- package/src/doctor.test.ts +9 -7
- package/src/launchd.test.ts +1 -1
- package/src/launchd.ts +6 -6
- package/src/mcp-http.ts +75 -21
- package/src/mcp-install.test.ts +125 -0
- package/src/mcp-install.ts +60 -0
- package/src/mcp-tools.ts +34 -96
- package/src/module-config.ts +109 -0
- package/src/oauth.test.ts +345 -57
- package/src/oauth.ts +155 -35
- package/src/published.test.ts +2 -2
- package/src/routes.ts +209 -33
- package/src/routing.test.ts +817 -300
- package/src/routing.ts +204 -202
- package/src/scopes.test.ts +294 -0
- package/src/scopes.ts +253 -0
- package/src/scribe-env.test.ts +49 -0
- package/src/scribe-env.ts +33 -0
- package/src/server.ts +73 -9
- package/src/services-manifest.test.ts +140 -0
- package/src/services-manifest.ts +99 -0
- package/src/systemd.ts +3 -3
- package/src/token-store.ts +42 -9
- package/src/transcription-worker.test.ts +864 -0
- package/src/transcription-worker.ts +501 -0
- package/src/triggers.test.ts +191 -1
- package/src/triggers.ts +17 -2
- package/src/vault.test.ts +693 -77
- package/src/version.test.ts +1 -1
- package/.playwright-mcp/console-2026-04-14T04-17-25-395Z.log +0 -2
- package/.playwright-mcp/console-2026-04-14T04-18-11-767Z.log +0 -1
- package/.playwright-mcp/console-2026-04-14T04-19-07-733Z.log +0 -2
- package/.playwright-mcp/console-2026-04-14T04-20-45-440Z.log +0 -2
- package/.playwright-mcp/page-2026-04-14T04-17-25-536Z.yml +0 -1
- package/.playwright-mcp/page-2026-04-14T04-18-11-816Z.yml +0 -1
- package/.playwright-mcp/page-2026-04-14T04-18-31-674Z.yml +0 -211
- package/.playwright-mcp/page-2026-04-14T04-19-07-795Z.yml +0 -59
- package/.playwright-mcp/page-2026-04-14T04-19-36-239Z.yml +0 -232
- package/.playwright-mcp/page-2026-04-14T04-19-58-327Z.yml +0 -182
- package/.playwright-mcp/page-2026-04-14T04-20-10-517Z.yml +0 -91
- package/.playwright-mcp/page-2026-04-14T04-20-14-796Z.yml +0 -70
- package/.playwright-mcp/page-2026-04-14T04-20-45-509Z.yml +0 -59
- package/religions-abrahamic-filter.png +0 -0
- package/religions-buddhism-v2.png +0 -0
- package/religions-buddhism.png +0 -0
- package/religions-final.png +0 -0
- package/religions-v1.png +0 -0
- package/religions-v2.png +0 -0
- package/religions-zen.png +0 -0
- package/web/README.md +0 -73
- package/web/bun.lock +0 -827
- package/web/eslint.config.js +0 -23
- package/web/index.html +0 -15
- package/web/package.json +0 -36
- package/web/public/favicon.svg +0 -1
- package/web/public/icons.svg +0 -24
- package/web/src/App.tsx +0 -149
- package/web/src/Graph.tsx +0 -200
- package/web/src/NoteView.tsx +0 -155
- package/web/src/Sidebar.tsx +0 -186
- package/web/src/api.ts +0 -21
- package/web/src/index.css +0 -50
- package/web/src/main.tsx +0 -10
- package/web/src/types.ts +0 -37
- package/web/src/utils.ts +0 -107
- package/web/tsconfig.app.json +0 -25
- package/web/tsconfig.json +0 -7
- package/web/tsconfig.node.json +0 -24
- package/web/vite.config.ts +0 -16
|
@@ -0,0 +1,501 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Event-driven transcription with a safety-net sweep.
|
|
3
|
+
*
|
|
4
|
+
* ## Shape (event-driven happy path, timer-driven failure path)
|
|
5
|
+
*
|
|
6
|
+
* - **Event path (hot):** `POST /api/notes/:id/attachments` with
|
|
7
|
+
* `{transcribe: true}` writes `attachment.metadata.transcribe_status =
|
|
8
|
+
* "pending"` via `store.addAttachment`, which dispatches an
|
|
9
|
+
* `attachment:created` hook. A handler registered via
|
|
10
|
+
* `registerTranscriptionHook` calls `worker.kick()` on the owning vault,
|
|
11
|
+
* so the cycle begins in the microtask after the HTTP response returns —
|
|
12
|
+
* upload latency is not gated on transcription latency.
|
|
13
|
+
* - **Sweep path (safety net):** Every `pollIntervalMs` (default 30s), the
|
|
14
|
+
* worker lists pending attachments across all vaults and runs them. This
|
|
15
|
+
* catches items queued during a server restart, items whose backoff just
|
|
16
|
+
* elapsed, and anything that got orphaned by a dropped hook dispatch.
|
|
17
|
+
*
|
|
18
|
+
* The DB remains the queue — `metadata.transcribe_status = "pending"` is
|
|
19
|
+
* the source of truth; the hook is a shortcut for cache warmth.
|
|
20
|
+
*
|
|
21
|
+
* ## What the worker does per pending attachment
|
|
22
|
+
*
|
|
23
|
+
* 1. Read the audio file from the vault's assets dir.
|
|
24
|
+
* 2. POST it as multipart/form-data to `SCRIBE_URL/v1/audio/transcriptions`
|
|
25
|
+
* (Whisper API shape). Response is `{ text: string }`.
|
|
26
|
+
* 3. On success:
|
|
27
|
+
* - If `note.metadata.transcribe_stub === true`, replace the
|
|
28
|
+
* `_Transcript pending._` placeholder with the transcript, or the
|
|
29
|
+
* whole note body if the placeholder is absent. Clear the stub marker.
|
|
30
|
+
* - Mark `attachment.metadata.transcribe_status = "done"` and record
|
|
31
|
+
* `transcript` + `transcribe_done_at`.
|
|
32
|
+
* - If the vault's `audio_retention` is `"until_transcribed"`, unlink
|
|
33
|
+
* the audio file on disk (the attachment row stays, so the transcript
|
|
34
|
+
* metadata is still addressable).
|
|
35
|
+
* 4. On failure:
|
|
36
|
+
* - Up to `maxAttempts` retries with exponential backoff encoded as
|
|
37
|
+
* `transcribe_backoff_until`. Status stays `"pending"`; the sweep
|
|
38
|
+
* skips ones whose backoff hasn't expired.
|
|
39
|
+
* - After `maxAttempts`, flip status to `"failed"` with `transcribe_error`.
|
|
40
|
+
*
|
|
41
|
+
* ## Concurrency
|
|
42
|
+
*
|
|
43
|
+
* FIFO across all vaults. Hook-driven and sweep-driven paths race on the
|
|
44
|
+
* same attachment if an upload arrives just before a sweep runs; an
|
|
45
|
+
* in-memory `inFlight` set dedupes within the process so we don't double-
|
|
46
|
+
* POST to scribe. Cross-process guarantees still live in the DB — a sweep
|
|
47
|
+
* on another process would see `transcribe_status = "pending"` and try
|
|
48
|
+
* again, which scribe and the metadata writes handle idempotently.
|
|
49
|
+
*/
|
|
50
|
+
|
|
51
|
+
import { join, normalize } from "path";
|
|
52
|
+
import { existsSync, readFileSync, unlinkSync } from "fs";
|
|
53
|
+
import type { Store, Attachment } from "../core/src/types.ts";
|
|
54
|
+
import type { HookRegistry } from "../core/src/hooks.ts";
|
|
55
|
+
import { appendContextPart, fetchContextEntries, type ContextPayload } from "./context.ts";
|
|
56
|
+
import type { TriggerIncludeContext } from "./config.ts";
|
|
57
|
+
|
|
58
|
+
/** Placeholder pattern written by Lens's voice-memo stub. */
|
|
59
|
+
const TRANSCRIPT_PLACEHOLDER = /_Transcript pending\._/;
|
|
60
|
+
|
|
61
|
+
/**
|
|
62
|
+
* Body written when transcription reaches a terminal failure (maxAttempts
|
|
63
|
+
* exhausted, or the audio file is missing). This used to be written by
|
|
64
|
+
* Lens's now-removed scribe client; owning it here means a failed upload
|
|
65
|
+
* stops reading "Transcript pending" forever regardless of which client
|
|
66
|
+
* uploaded the audio.
|
|
67
|
+
*/
|
|
68
|
+
const TRANSCRIPT_UNAVAILABLE = "_Transcription unavailable._";
|
|
69
|
+
|
|
70
|
+
/**
|
|
71
|
+
* Default sweep cadence (ms). The sweep is the safety net for backoff-
|
|
72
|
+
* queued items, items that arrived while the server was down, or dispatches
|
|
73
|
+
* that got dropped — not the hot path. Fresh uploads land in single-digit
|
|
74
|
+
* ms via the `attachment:created` hook (see `registerTranscriptionHook`).
|
|
75
|
+
*
|
|
76
|
+
* Operators can override this with the `TRANSCRIPTION_SWEEP_MS` env var
|
|
77
|
+
* (read at `startTranscriptionWorker()` time, not module load, so values
|
|
78
|
+
* in `~/.parachute/vault/.env` apply — ES module import happens before
|
|
79
|
+
* `loadEnvFile()` in server.ts). Per-caller override via the
|
|
80
|
+
* `pollIntervalMs` opt wins over both.
|
|
81
|
+
*/
|
|
82
|
+
const DEFAULT_POLL_MS = 30_000;
|
|
83
|
+
const DEFAULT_MAX_ATTEMPTS = 3;
|
|
84
|
+
const DEFAULT_TIMEOUT_MS = 120_000;
|
|
85
|
+
|
|
86
|
+
export type AudioRetention = "keep" | "until_transcribed" | "never";
|
|
87
|
+
|
|
88
|
+
export interface TranscriptionWorkerOpts {
|
|
89
|
+
/** Vault names to scan each cycle. */
|
|
90
|
+
vaultList: () => string[];
|
|
91
|
+
/** Get a store for a vault name. */
|
|
92
|
+
getStore: (name: string) => Store;
|
|
93
|
+
/** Scribe base URL (no trailing slash). */
|
|
94
|
+
scribeUrl: string;
|
|
95
|
+
/** Optional bearer token for scribe. */
|
|
96
|
+
scribeToken?: string;
|
|
97
|
+
/** Resolve the assets root for a vault name. */
|
|
98
|
+
resolveAssetsDir: (vault: string) => string;
|
|
99
|
+
/** Per-vault audio retention. Default "keep". */
|
|
100
|
+
getAudioRetention?: (vault: string) => AudioRetention;
|
|
101
|
+
/**
|
|
102
|
+
* Per-vault context predicates for enriching the scribe POST. When present,
|
|
103
|
+
* the worker runs each predicate against the vault store and attaches the
|
|
104
|
+
* resulting entries as a `context` multipart part. Matches triggers'
|
|
105
|
+
* `action.include_context` so scribe sees the same shape via either path.
|
|
106
|
+
* Returning `undefined` or `[]` means no context is attached.
|
|
107
|
+
*/
|
|
108
|
+
getContextPredicates?: (vault: string) => TriggerIncludeContext[] | undefined;
|
|
109
|
+
pollIntervalMs?: number;
|
|
110
|
+
maxAttempts?: number;
|
|
111
|
+
timeoutMs?: number;
|
|
112
|
+
fetchImpl?: typeof fetch;
|
|
113
|
+
logger?: { info?: (...args: unknown[]) => void; error: (...args: unknown[]) => void };
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
export interface TranscriptionWorker {
|
|
117
|
+
/** Stop the loop and wait for in-flight work to finish. */
|
|
118
|
+
stop(): Promise<void>;
|
|
119
|
+
/** Run one poll cycle now. Returns number of attachments processed. */
|
|
120
|
+
tick(): Promise<number>;
|
|
121
|
+
/**
|
|
122
|
+
* Process a single attachment immediately. Called by the
|
|
123
|
+
* `attachment:created` hook to short-circuit the sweep wait.
|
|
124
|
+
*
|
|
125
|
+
* Safe to race with `tick()` — an in-memory `inFlight` guard dedupes
|
|
126
|
+
* same-attachment requests within this process. The handler returns
|
|
127
|
+
* once processing finishes (or is skipped as a dup / backoff / non-
|
|
128
|
+
* pending status). Errors are logged and swallowed so a thrown hook
|
|
129
|
+
* handler never crashes the dispatcher.
|
|
130
|
+
*/
|
|
131
|
+
kick(vault: string, attachment: Attachment): Promise<void>;
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
interface PendingMeta {
|
|
135
|
+
transcribe_status?: string;
|
|
136
|
+
transcribe_attempts?: number;
|
|
137
|
+
transcribe_backoff_until?: string;
|
|
138
|
+
transcribe_requested_at?: string;
|
|
139
|
+
transcribe_error?: string;
|
|
140
|
+
transcript?: string;
|
|
141
|
+
transcribe_done_at?: string;
|
|
142
|
+
[k: string]: unknown;
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
/**
|
|
146
|
+
* Start the worker loop. Returns a handle with `stop()` + `tick()`.
|
|
147
|
+
* Tests should build the worker and call `tick()` directly; production
|
|
148
|
+
* calls `start()` implicitly by constructing the worker.
|
|
149
|
+
*/
|
|
150
|
+
export function startTranscriptionWorker(opts: TranscriptionWorkerOpts): TranscriptionWorker {
|
|
151
|
+
const logger = opts.logger ?? console;
|
|
152
|
+
const fetchImpl = opts.fetchImpl ?? fetch;
|
|
153
|
+
// Precedence: opts.pollIntervalMs > TRANSCRIPTION_SWEEP_MS env > DEFAULT_POLL_MS.
|
|
154
|
+
// Reading env here (not at module scope) means `~/.parachute/vault/.env`
|
|
155
|
+
// values loaded by server.ts still apply, matching how SCRIBE_URL works.
|
|
156
|
+
const envPoll = Number(process.env.TRANSCRIPTION_SWEEP_MS);
|
|
157
|
+
const defaultPollMs = Number.isFinite(envPoll) && envPoll > 0 ? envPoll : DEFAULT_POLL_MS;
|
|
158
|
+
const pollMs = opts.pollIntervalMs ?? defaultPollMs;
|
|
159
|
+
const maxAttempts = opts.maxAttempts ?? DEFAULT_MAX_ATTEMPTS;
|
|
160
|
+
const timeoutMs = opts.timeoutMs ?? DEFAULT_TIMEOUT_MS;
|
|
161
|
+
const retentionFor = opts.getAudioRetention ?? (() => "keep" as const);
|
|
162
|
+
|
|
163
|
+
let stopped = false;
|
|
164
|
+
let inflight: Promise<void> = Promise.resolve();
|
|
165
|
+
let timer: ReturnType<typeof setTimeout> | null = null;
|
|
166
|
+
|
|
167
|
+
/**
|
|
168
|
+
* In-process dedupe: holds attachment IDs currently being worked. The
|
|
169
|
+
* event-driven `kick()` path can race the sweep on the same attachment
|
|
170
|
+
* when an upload lands moments before a tick starts. Without this guard
|
|
171
|
+
* both paths would fetch the audio and POST to scribe twice.
|
|
172
|
+
*/
|
|
173
|
+
const inFlightAttachments = new Set<string>();
|
|
174
|
+
|
|
175
|
+
async function processOne(vault: string, attachment: Attachment): Promise<void> {
|
|
176
|
+
// Dedupe: another path (sweep vs hook kick, or a duplicate dispatch)
|
|
177
|
+
// is already working this attachment. Drop — its result is durable
|
|
178
|
+
// in the DB, and the sweep will re-pick anything that truly needs it.
|
|
179
|
+
if (inFlightAttachments.has(attachment.id)) return;
|
|
180
|
+
inFlightAttachments.add(attachment.id);
|
|
181
|
+
try {
|
|
182
|
+
await processOneLocked(vault, attachment);
|
|
183
|
+
} finally {
|
|
184
|
+
inFlightAttachments.delete(attachment.id);
|
|
185
|
+
}
|
|
186
|
+
}
|
|
187
|
+
|
|
188
|
+
/**
|
|
189
|
+
* On a terminal failure (maxAttempts exhausted, or audio file missing),
|
|
190
|
+
* swap the stub placeholder for the "unavailable" marker — otherwise
|
|
191
|
+
* Lens's voice memo sits reading "Transcript pending" forever. Mirrors
|
|
192
|
+
* the success-path note write in shape: only touches the note when
|
|
193
|
+
* `transcribe_stub === true`, clears the stub marker, uses `skipUpdatedAt`
|
|
194
|
+
* so the note's modification time still reflects user intent. Errors
|
|
195
|
+
* are logged and swallowed so a note-write failure doesn't mask the
|
|
196
|
+
* attachment failure we're trying to record.
|
|
197
|
+
*/
|
|
198
|
+
async function applyFailureMarker(store: Store, noteId: string): Promise<void> {
|
|
199
|
+
const note = await store.getNote(noteId);
|
|
200
|
+
if (!note) return;
|
|
201
|
+
const noteMeta = (note.metadata as Record<string, unknown> | undefined) ?? {};
|
|
202
|
+
if (noteMeta.transcribe_stub !== true) return;
|
|
203
|
+
|
|
204
|
+
const body = TRANSCRIPT_PLACEHOLDER.test(note.content)
|
|
205
|
+
? note.content.replace(TRANSCRIPT_PLACEHOLDER, TRANSCRIPT_UNAVAILABLE)
|
|
206
|
+
: TRANSCRIPT_UNAVAILABLE;
|
|
207
|
+
const { transcribe_stub: _drop, ...restMeta } = noteMeta;
|
|
208
|
+
try {
|
|
209
|
+
await store.updateNote(note.id, {
|
|
210
|
+
content: body,
|
|
211
|
+
metadata: restMeta,
|
|
212
|
+
skipUpdatedAt: true,
|
|
213
|
+
});
|
|
214
|
+
} catch (err) {
|
|
215
|
+
logger.error(`[transcribe] failed to apply failure marker to note ${note.id}:`, err);
|
|
216
|
+
}
|
|
217
|
+
}
|
|
218
|
+
|
|
219
|
+
async function processOneLocked(vault: string, attachment: Attachment): Promise<void> {
|
|
220
|
+
const store = opts.getStore(vault);
|
|
221
|
+
// Re-read metadata — the in-memory `attachment` may be stale (the hook
|
|
222
|
+
// path hands us the row from just after insert; a concurrent completion
|
|
223
|
+
// in another path may have already flipped status). Skip if not pending.
|
|
224
|
+
const fresh = (await store.getAttachment(attachment.id)) ?? attachment;
|
|
225
|
+
const meta: PendingMeta = { ...(fresh.metadata ?? {}) };
|
|
226
|
+
if (meta.transcribe_status !== "pending") return;
|
|
227
|
+
|
|
228
|
+
const attempts = (meta.transcribe_attempts as number | undefined) ?? 0;
|
|
229
|
+
|
|
230
|
+
// Honor backoff — we re-check here in case another tick queued this
|
|
231
|
+
// attachment between the listing and now.
|
|
232
|
+
if (meta.transcribe_backoff_until) {
|
|
233
|
+
const until = Date.parse(String(meta.transcribe_backoff_until));
|
|
234
|
+
if (Number.isFinite(until) && until > Date.now()) return;
|
|
235
|
+
}
|
|
236
|
+
|
|
237
|
+
const assetsRoot = opts.resolveAssetsDir(vault);
|
|
238
|
+
const filePath = normalize(join(assetsRoot, attachment.path));
|
|
239
|
+
if (!filePath.startsWith(normalize(assetsRoot)) || !existsSync(filePath)) {
|
|
240
|
+
// Audio gone — nothing to transcribe. Mark failed so we don't loop.
|
|
241
|
+
await store.setAttachmentMetadata(attachment.id, {
|
|
242
|
+
...meta,
|
|
243
|
+
transcribe_status: "failed",
|
|
244
|
+
transcribe_error: "audio file not found",
|
|
245
|
+
});
|
|
246
|
+
await applyFailureMarker(store, attachment.noteId);
|
|
247
|
+
return;
|
|
248
|
+
}
|
|
249
|
+
|
|
250
|
+
// Fetch context predicates for this vault. Errors are logged inside
|
|
251
|
+
// fetchContextEntries — we always have a payload (possibly empty) to
|
|
252
|
+
// pass through, so a bad predicate doesn't block transcription.
|
|
253
|
+
let context: ContextPayload | null = null;
|
|
254
|
+
const predicates = opts.getContextPredicates?.(vault);
|
|
255
|
+
if (predicates && predicates.length) {
|
|
256
|
+
context = await fetchContextEntries(store, predicates, logger);
|
|
257
|
+
}
|
|
258
|
+
|
|
259
|
+
let transcript: string;
|
|
260
|
+
try {
|
|
261
|
+
transcript = await callScribe({
|
|
262
|
+
url: opts.scribeUrl,
|
|
263
|
+
token: opts.scribeToken,
|
|
264
|
+
filePath,
|
|
265
|
+
filename: attachment.path.split("/").pop() ?? "audio",
|
|
266
|
+
mimeType: attachment.mimeType,
|
|
267
|
+
context,
|
|
268
|
+
timeoutMs,
|
|
269
|
+
fetchImpl,
|
|
270
|
+
});
|
|
271
|
+
} catch (err) {
|
|
272
|
+
const nextAttempts = attempts + 1;
|
|
273
|
+
const errMsg = err instanceof Error ? err.message : String(err);
|
|
274
|
+
if (nextAttempts >= maxAttempts) {
|
|
275
|
+
logger.error(`[transcribe] giving up on attachment ${attachment.id} after ${nextAttempts} attempts:`, errMsg);
|
|
276
|
+
await store.setAttachmentMetadata(attachment.id, {
|
|
277
|
+
...meta,
|
|
278
|
+
transcribe_status: "failed",
|
|
279
|
+
transcribe_attempts: nextAttempts,
|
|
280
|
+
transcribe_error: errMsg,
|
|
281
|
+
});
|
|
282
|
+
await applyFailureMarker(store, attachment.noteId);
|
|
283
|
+
// retention=never drops the audio on any terminal state, including
|
|
284
|
+
// failure. The user opted in to "I don't want the audio kept around
|
|
285
|
+
// regardless of outcome" — honor it.
|
|
286
|
+
if (retentionFor(vault) === "never") {
|
|
287
|
+
unlinkIfSafe(filePath, assetsRoot, logger);
|
|
288
|
+
}
|
|
289
|
+
return;
|
|
290
|
+
}
|
|
291
|
+
// Exponential backoff: 30s, 2m, 8m, ...
|
|
292
|
+
const backoffMs = 30_000 * Math.pow(4, nextAttempts - 1);
|
|
293
|
+
const backoffUntil = new Date(Date.now() + backoffMs).toISOString();
|
|
294
|
+
logger.error(`[transcribe] attachment ${attachment.id} attempt ${nextAttempts} failed; retrying at ${backoffUntil}:`, errMsg);
|
|
295
|
+
await store.setAttachmentMetadata(attachment.id, {
|
|
296
|
+
...meta,
|
|
297
|
+
transcribe_status: "pending",
|
|
298
|
+
transcribe_attempts: nextAttempts,
|
|
299
|
+
transcribe_backoff_until: backoffUntil,
|
|
300
|
+
transcribe_error: errMsg,
|
|
301
|
+
});
|
|
302
|
+
return;
|
|
303
|
+
}
|
|
304
|
+
|
|
305
|
+
// Success. Apply to note if the caller still wants us to.
|
|
306
|
+
const note = await store.getNote(attachment.noteId);
|
|
307
|
+
if (note) {
|
|
308
|
+
const noteMeta = (note.metadata as Record<string, unknown> | undefined) ?? {};
|
|
309
|
+
if (noteMeta.transcribe_stub === true) {
|
|
310
|
+
const body = TRANSCRIPT_PLACEHOLDER.test(note.content)
|
|
311
|
+
? note.content.replace(TRANSCRIPT_PLACEHOLDER, transcript)
|
|
312
|
+
: transcript;
|
|
313
|
+
const { transcribe_stub: _drop, ...restMeta } = noteMeta;
|
|
314
|
+
try {
|
|
315
|
+
await store.updateNote(note.id, {
|
|
316
|
+
content: body,
|
|
317
|
+
metadata: restMeta,
|
|
318
|
+
skipUpdatedAt: true,
|
|
319
|
+
});
|
|
320
|
+
} catch (err) {
|
|
321
|
+
logger.error(`[transcribe] failed to apply transcript to note ${note.id}:`, err);
|
|
322
|
+
}
|
|
323
|
+
}
|
|
324
|
+
}
|
|
325
|
+
|
|
326
|
+
// Always record the transcript on the attachment, even if the note
|
|
327
|
+
// already moved on — the transcript is otherwise discarded.
|
|
328
|
+
const doneMeta: PendingMeta = {
|
|
329
|
+
...meta,
|
|
330
|
+
transcribe_status: "done",
|
|
331
|
+
transcribe_attempts: attempts + 1,
|
|
332
|
+
transcribe_done_at: new Date().toISOString(),
|
|
333
|
+
transcript,
|
|
334
|
+
};
|
|
335
|
+
delete doneMeta.transcribe_backoff_until;
|
|
336
|
+
delete doneMeta.transcribe_error;
|
|
337
|
+
await store.setAttachmentMetadata(attachment.id, doneMeta);
|
|
338
|
+
|
|
339
|
+
// Retention: drop the file but keep the row so the transcript stays
|
|
340
|
+
// addressable. "until_transcribed" and "never" both unlink on success.
|
|
341
|
+
const retention = retentionFor(vault);
|
|
342
|
+
if (retention === "until_transcribed" || retention === "never") {
|
|
343
|
+
unlinkIfSafe(filePath, assetsRoot, logger);
|
|
344
|
+
}
|
|
345
|
+
}
|
|
346
|
+
|
|
347
|
+
function unlinkIfSafe(
|
|
348
|
+
filePath: string,
|
|
349
|
+
assetsRoot: string,
|
|
350
|
+
logger: { error: (...args: unknown[]) => void },
|
|
351
|
+
): void {
|
|
352
|
+
try {
|
|
353
|
+
if (filePath.startsWith(normalize(assetsRoot)) && existsSync(filePath)) {
|
|
354
|
+
unlinkSync(filePath);
|
|
355
|
+
}
|
|
356
|
+
} catch (err) {
|
|
357
|
+
logger.error(`[transcribe] retention unlink failed for ${filePath}:`, err);
|
|
358
|
+
}
|
|
359
|
+
}
|
|
360
|
+
|
|
361
|
+
async function tick(): Promise<number> {
|
|
362
|
+
let processed = 0;
|
|
363
|
+
for (const vault of opts.vaultList()) {
|
|
364
|
+
const store = opts.getStore(vault);
|
|
365
|
+
let pending: Attachment[];
|
|
366
|
+
try {
|
|
367
|
+
pending = await store.listAttachmentsByTranscribeStatus("pending", 50);
|
|
368
|
+
} catch (err) {
|
|
369
|
+
logger.error(`[transcribe] list failed for vault "${vault}":`, err);
|
|
370
|
+
continue;
|
|
371
|
+
}
|
|
372
|
+
|
|
373
|
+
for (const attachment of pending) {
|
|
374
|
+
if (stopped) return processed;
|
|
375
|
+
// Backoff gate — skip without touching.
|
|
376
|
+
const meta = (attachment.metadata as PendingMeta | undefined) ?? {};
|
|
377
|
+
if (meta.transcribe_backoff_until) {
|
|
378
|
+
const until = Date.parse(String(meta.transcribe_backoff_until));
|
|
379
|
+
if (Number.isFinite(until) && until > Date.now()) continue;
|
|
380
|
+
}
|
|
381
|
+
try {
|
|
382
|
+
await processOne(vault, attachment);
|
|
383
|
+
processed++;
|
|
384
|
+
} catch (err) {
|
|
385
|
+
logger.error(`[transcribe] unexpected error on attachment ${attachment.id}:`, err);
|
|
386
|
+
}
|
|
387
|
+
}
|
|
388
|
+
}
|
|
389
|
+
return processed;
|
|
390
|
+
}
|
|
391
|
+
|
|
392
|
+
function schedule(): void {
|
|
393
|
+
if (stopped) return;
|
|
394
|
+
timer = setTimeout(() => {
|
|
395
|
+
inflight = tick().catch((err) => {
|
|
396
|
+
logger.error("[transcribe] tick error:", err);
|
|
397
|
+
}).then(() => {
|
|
398
|
+
schedule();
|
|
399
|
+
});
|
|
400
|
+
}, pollMs);
|
|
401
|
+
}
|
|
402
|
+
|
|
403
|
+
schedule();
|
|
404
|
+
|
|
405
|
+
async function kick(vault: string, attachment: Attachment): Promise<void> {
|
|
406
|
+
if (stopped) return;
|
|
407
|
+
try {
|
|
408
|
+
await processOne(vault, attachment);
|
|
409
|
+
} catch (err) {
|
|
410
|
+
logger.error(`[transcribe] kick error on attachment ${attachment.id}:`, err);
|
|
411
|
+
}
|
|
412
|
+
}
|
|
413
|
+
|
|
414
|
+
return {
|
|
415
|
+
async stop() {
|
|
416
|
+
stopped = true;
|
|
417
|
+
if (timer) { clearTimeout(timer); timer = null; }
|
|
418
|
+
await inflight;
|
|
419
|
+
},
|
|
420
|
+
tick,
|
|
421
|
+
kick,
|
|
422
|
+
};
|
|
423
|
+
}
|
|
424
|
+
|
|
425
|
+
/**
|
|
426
|
+
* Wire the transcription worker up as an `attachment:created` hook. This
|
|
427
|
+
* is the event-driven fast path — when a new attachment is inserted with
|
|
428
|
+
* `transcribe_status = "pending"`, the hook fires within a microtask and
|
|
429
|
+
* the worker begins processing without waiting for the next sweep.
|
|
430
|
+
*
|
|
431
|
+
* `resolveVault(store)` maps the store handle delivered to the hook back
|
|
432
|
+
* to its vault name (needed so the worker can resolve the assets dir,
|
|
433
|
+
* retention policy, and context predicates). Returns an unregister
|
|
434
|
+
* function so tests can tear down cleanly.
|
|
435
|
+
*/
|
|
436
|
+
export function registerTranscriptionHook(
|
|
437
|
+
registry: HookRegistry,
|
|
438
|
+
worker: TranscriptionWorker,
|
|
439
|
+
resolveVault: (store: Store) => string | undefined,
|
|
440
|
+
logger: { error: (...args: unknown[]) => void } = console,
|
|
441
|
+
): () => void {
|
|
442
|
+
return registry.onAttachment({
|
|
443
|
+
name: "transcription-kickoff",
|
|
444
|
+
event: "created",
|
|
445
|
+
when: (att) =>
|
|
446
|
+
(att.metadata as { transcribe_status?: string } | undefined)
|
|
447
|
+
?.transcribe_status === "pending",
|
|
448
|
+
handler: async (attachment, store) => {
|
|
449
|
+
const vault = resolveVault(store);
|
|
450
|
+
if (!vault) {
|
|
451
|
+
logger.error(
|
|
452
|
+
`[transcribe] could not resolve vault for attachment ${attachment.id}; sweep will pick it up`,
|
|
453
|
+
);
|
|
454
|
+
return;
|
|
455
|
+
}
|
|
456
|
+
await worker.kick(vault, attachment);
|
|
457
|
+
},
|
|
458
|
+
});
|
|
459
|
+
}
|
|
460
|
+
|
|
461
|
+
async function callScribe(args: {
|
|
462
|
+
url: string;
|
|
463
|
+
token?: string;
|
|
464
|
+
filePath: string;
|
|
465
|
+
filename: string;
|
|
466
|
+
mimeType: string;
|
|
467
|
+
context: ContextPayload | null;
|
|
468
|
+
timeoutMs: number;
|
|
469
|
+
fetchImpl: typeof fetch;
|
|
470
|
+
}): Promise<string> {
|
|
471
|
+
const controller = new AbortController();
|
|
472
|
+
const timer = setTimeout(() => controller.abort(), args.timeoutMs);
|
|
473
|
+
try {
|
|
474
|
+
const fileBuffer = readFileSync(args.filePath);
|
|
475
|
+
const file = new File([fileBuffer], args.filename, { type: args.mimeType });
|
|
476
|
+
const form = new FormData();
|
|
477
|
+
form.append("file", file);
|
|
478
|
+
if (args.context) appendContextPart(form, args.context);
|
|
479
|
+
|
|
480
|
+
const endpoint = `${args.url.replace(/\/$/, "")}/v1/audio/transcriptions`;
|
|
481
|
+
const headers: Record<string, string> = {};
|
|
482
|
+
if (args.token) headers["Authorization"] = `Bearer ${args.token}`;
|
|
483
|
+
|
|
484
|
+
const resp = await args.fetchImpl(endpoint, {
|
|
485
|
+
method: "POST",
|
|
486
|
+
headers,
|
|
487
|
+
body: form,
|
|
488
|
+
signal: controller.signal,
|
|
489
|
+
});
|
|
490
|
+
if (!resp.ok) {
|
|
491
|
+
throw new Error(`scribe returned ${resp.status}: ${await resp.text().catch(() => "")}`);
|
|
492
|
+
}
|
|
493
|
+
const result = await resp.json() as { text?: string };
|
|
494
|
+
if (typeof result.text !== "string") {
|
|
495
|
+
throw new Error("scribe response missing text field");
|
|
496
|
+
}
|
|
497
|
+
return result.text;
|
|
498
|
+
} finally {
|
|
499
|
+
clearTimeout(timer);
|
|
500
|
+
}
|
|
501
|
+
}
|