@vellumai/assistant 0.5.4 → 0.5.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (151) hide show
  1. package/Dockerfile +17 -27
  2. package/node_modules/@vellumai/ces-contracts/src/index.ts +1 -0
  3. package/node_modules/@vellumai/ces-contracts/src/trust-rules.ts +42 -0
  4. package/package.json +1 -1
  5. package/src/__tests__/actor-token-service.test.ts +113 -0
  6. package/src/__tests__/config-schema.test.ts +2 -2
  7. package/src/__tests__/context-window-manager.test.ts +78 -0
  8. package/src/__tests__/conversation-title-service.test.ts +30 -1
  9. package/src/__tests__/credential-security-invariants.test.ts +2 -0
  10. package/src/__tests__/docker-signing-key-bootstrap.test.ts +207 -0
  11. package/src/__tests__/memory-regressions.test.ts +8 -30
  12. package/src/__tests__/openai-whisper.test.ts +93 -0
  13. package/src/__tests__/require-fresh-approval.test.ts +4 -0
  14. package/src/__tests__/slack-messaging-token-resolution.test.ts +319 -0
  15. package/src/__tests__/tool-executor-lifecycle-events.test.ts +4 -0
  16. package/src/__tests__/tool-executor.test.ts +4 -0
  17. package/src/__tests__/volume-security-guard.test.ts +155 -0
  18. package/src/cli/commands/conversations.ts +0 -18
  19. package/src/config/bundled-skills/messaging/tools/shared.ts +1 -0
  20. package/src/config/bundled-skills/transcribe/tools/transcribe-media.ts +16 -37
  21. package/src/config/env-registry.ts +9 -0
  22. package/src/config/env.ts +8 -2
  23. package/src/config/feature-flag-registry.json +8 -8
  24. package/src/config/schema.ts +0 -12
  25. package/src/config/schemas/memory.ts +0 -4
  26. package/src/config/schemas/platform.ts +1 -1
  27. package/src/config/schemas/security.ts +4 -0
  28. package/src/context/window-manager.ts +53 -2
  29. package/src/credential-execution/managed-catalog.ts +5 -15
  30. package/src/daemon/conversation-agent-loop.ts +0 -60
  31. package/src/daemon/conversation-memory.ts +0 -117
  32. package/src/daemon/conversation-runtime-assembly.ts +0 -2
  33. package/src/daemon/daemon-control.ts +7 -0
  34. package/src/daemon/handlers/conversations.ts +0 -11
  35. package/src/daemon/lifecycle.ts +10 -47
  36. package/src/daemon/providers-setup.ts +2 -1
  37. package/src/followups/followup-store.ts +5 -2
  38. package/src/hooks/manager.ts +7 -0
  39. package/src/instrument.ts +33 -1
  40. package/src/memory/conversation-crud.ts +0 -236
  41. package/src/memory/conversation-title-service.ts +26 -10
  42. package/src/memory/db-init.ts +5 -13
  43. package/src/memory/embedding-local.ts +11 -5
  44. package/src/memory/indexer.ts +15 -106
  45. package/src/memory/job-handlers/conversation-starters.ts +24 -36
  46. package/src/memory/job-handlers/embedding.ts +0 -79
  47. package/src/memory/job-utils.ts +1 -1
  48. package/src/memory/jobs-store.ts +0 -8
  49. package/src/memory/jobs-worker.ts +0 -20
  50. package/src/memory/migrations/189-drop-simplified-memory.ts +42 -0
  51. package/src/memory/migrations/index.ts +1 -3
  52. package/src/memory/qdrant-client.ts +4 -6
  53. package/src/memory/schema/conversations.ts +0 -3
  54. package/src/memory/schema/index.ts +0 -2
  55. package/src/messaging/draft-store.ts +2 -2
  56. package/src/messaging/provider.ts +9 -0
  57. package/src/messaging/providers/slack/adapter.ts +29 -2
  58. package/src/oauth/connection-resolver.test.ts +22 -18
  59. package/src/oauth/connection-resolver.ts +92 -7
  60. package/src/oauth/platform-connection.test.ts +78 -69
  61. package/src/oauth/platform-connection.ts +12 -19
  62. package/src/permissions/defaults.ts +3 -3
  63. package/src/permissions/trust-client.ts +332 -0
  64. package/src/permissions/trust-store-interface.ts +105 -0
  65. package/src/permissions/trust-store.ts +531 -39
  66. package/src/platform/client.test.ts +148 -0
  67. package/src/platform/client.ts +71 -0
  68. package/src/providers/speech-to-text/openai-whisper.test.ts +190 -0
  69. package/src/providers/speech-to-text/openai-whisper.ts +68 -0
  70. package/src/providers/speech-to-text/resolve.ts +9 -0
  71. package/src/providers/speech-to-text/types.ts +17 -0
  72. package/src/runtime/auth/route-policy.ts +14 -0
  73. package/src/runtime/auth/token-service.ts +133 -0
  74. package/src/runtime/http-server.ts +4 -2
  75. package/src/runtime/routes/conversation-management-routes.ts +0 -36
  76. package/src/runtime/routes/conversation-query-routes.ts +44 -2
  77. package/src/runtime/routes/conversation-routes.ts +2 -1
  78. package/src/runtime/routes/inbound-message-handler.ts +27 -3
  79. package/src/runtime/routes/inbound-stages/acl-enforcement.ts +16 -1
  80. package/src/runtime/routes/inbound-stages/transcribe-audio.test.ts +287 -0
  81. package/src/runtime/routes/inbound-stages/transcribe-audio.ts +122 -0
  82. package/src/runtime/routes/log-export-routes.ts +1 -0
  83. package/src/runtime/routes/memory-item-routes.test.ts +221 -3
  84. package/src/runtime/routes/memory-item-routes.ts +124 -2
  85. package/src/runtime/routes/secret-routes.ts +4 -1
  86. package/src/runtime/routes/upgrade-broadcast-routes.ts +151 -0
  87. package/src/schedule/schedule-store.ts +0 -21
  88. package/src/security/ces-credential-client.ts +173 -0
  89. package/src/security/secure-keys.ts +65 -22
  90. package/src/signals/bash.ts +3 -0
  91. package/src/signals/cancel.ts +3 -0
  92. package/src/signals/confirm.ts +3 -0
  93. package/src/signals/conversation-undo.ts +3 -0
  94. package/src/signals/event-stream.ts +7 -0
  95. package/src/signals/shotgun.ts +3 -0
  96. package/src/signals/trust-rule.ts +3 -0
  97. package/src/skills/inline-command-render.ts +5 -1
  98. package/src/skills/inline-command-runner.ts +30 -2
  99. package/src/telemetry/usage-telemetry-reporter.test.ts +23 -36
  100. package/src/telemetry/usage-telemetry-reporter.ts +21 -19
  101. package/src/tools/memory/handlers.ts +1 -129
  102. package/src/tools/permission-checker.ts +18 -0
  103. package/src/tools/skills/load.ts +9 -2
  104. package/src/util/device-id.ts +70 -7
  105. package/src/util/logger.ts +35 -9
  106. package/src/util/platform.ts +29 -5
  107. package/src/util/xml.ts +8 -0
  108. package/src/workspace/heartbeat-service.ts +5 -24
  109. package/src/workspace/migrations/migrate-to-workspace-volume.ts +113 -0
  110. package/src/workspace/migrations/registry.ts +2 -0
  111. package/src/__tests__/archive-recall.test.ts +0 -560
  112. package/src/__tests__/conversation-memory-dirty-tail.test.ts +0 -150
  113. package/src/__tests__/conversation-switch-memory-reduction.test.ts +0 -474
  114. package/src/__tests__/db-memory-archive-migration.test.ts +0 -372
  115. package/src/__tests__/db-memory-brief-state-migration.test.ts +0 -213
  116. package/src/__tests__/db-memory-reducer-checkpoints.test.ts +0 -273
  117. package/src/__tests__/memory-brief-open-loops.test.ts +0 -530
  118. package/src/__tests__/memory-brief-time.test.ts +0 -285
  119. package/src/__tests__/memory-brief-wrapper.test.ts +0 -311
  120. package/src/__tests__/memory-chunk-archive.test.ts +0 -400
  121. package/src/__tests__/memory-chunk-dual-write.test.ts +0 -453
  122. package/src/__tests__/memory-episode-archive.test.ts +0 -370
  123. package/src/__tests__/memory-episode-dual-write.test.ts +0 -626
  124. package/src/__tests__/memory-observation-archive.test.ts +0 -375
  125. package/src/__tests__/memory-observation-dual-write.test.ts +0 -318
  126. package/src/__tests__/memory-reducer-job.test.ts +0 -538
  127. package/src/__tests__/memory-reducer-scheduling.test.ts +0 -473
  128. package/src/__tests__/memory-reducer-store.test.ts +0 -728
  129. package/src/__tests__/memory-reducer-types.test.ts +0 -707
  130. package/src/__tests__/memory-reducer.test.ts +0 -704
  131. package/src/__tests__/memory-simplified-config.test.ts +0 -281
  132. package/src/__tests__/simplified-memory-e2e.test.ts +0 -666
  133. package/src/__tests__/simplified-memory-runtime.test.ts +0 -616
  134. package/src/config/schemas/memory-simplified.ts +0 -101
  135. package/src/memory/archive-recall.ts +0 -516
  136. package/src/memory/archive-store.ts +0 -400
  137. package/src/memory/brief-formatting.ts +0 -33
  138. package/src/memory/brief-open-loops.ts +0 -266
  139. package/src/memory/brief-time.ts +0 -162
  140. package/src/memory/brief.ts +0 -75
  141. package/src/memory/job-handlers/backfill-simplified-memory.ts +0 -462
  142. package/src/memory/job-handlers/reduce-conversation-memory.ts +0 -229
  143. package/src/memory/migrations/185-memory-brief-state.ts +0 -52
  144. package/src/memory/migrations/186-memory-archive.ts +0 -109
  145. package/src/memory/migrations/187-memory-reducer-checkpoints.ts +0 -19
  146. package/src/memory/reducer-scheduler.ts +0 -242
  147. package/src/memory/reducer-store.ts +0 -271
  148. package/src/memory/reducer-types.ts +0 -106
  149. package/src/memory/reducer.ts +0 -467
  150. package/src/memory/schema/memory-archive.ts +0 -121
  151. package/src/memory/schema/memory-brief.ts +0 -55
@@ -172,6 +172,7 @@ import { surfaceContentRouteDefinitions } from "./routes/surface-content-routes.
172
172
  import { telemetryRouteDefinitions } from "./routes/telemetry-routes.js";
173
173
  import { traceEventRouteDefinitions } from "./routes/trace-event-routes.js";
174
174
  import { trustRulesRouteDefinitions } from "./routes/trust-rules-routes.js";
175
+ import { upgradeBroadcastRouteDefinitions } from "./routes/upgrade-broadcast-routes.js";
175
176
  import { usageRouteDefinitions } from "./routes/usage-routes.js";
176
177
  import { watchRouteDefinitions } from "./routes/watch-routes.js";
177
178
  import { workItemRouteDefinitions } from "./routes/work-items-routes.js";
@@ -208,8 +209,8 @@ const log = getLogger("runtime-http");
208
209
  const DEFAULT_PORT = 7821;
209
210
  const DEFAULT_HOSTNAME = "127.0.0.1";
210
211
 
211
- /** Global hard cap on request body size (150 MB — accommodates base64-encoded 100 MB attachments). */
212
- const MAX_REQUEST_BODY_BYTES = 150 * 1024 * 1024;
212
+ /** Global hard cap on request body size (512 MB — accommodates large .vbundle backup imports). */
213
+ const MAX_REQUEST_BODY_BYTES = 512 * 1024 * 1024;
213
214
 
214
215
  export class RuntimeHttpServer {
215
216
  private server: ReturnType<typeof Bun.serve> | null = null;
@@ -918,6 +919,7 @@ export class RuntimeHttpServer {
918
919
  getCesClient: this.getCesClient,
919
920
  }),
920
921
  ...identityRouteDefinitions(),
922
+ ...upgradeBroadcastRouteDefinitions(),
921
923
  ...debugRouteDefinitions(),
922
924
  ...usageRouteDefinitions(),
923
925
  ...telemetryRouteDefinitions(),
@@ -275,24 +275,6 @@ export function conversationManagementRouteDefinitions(
275
275
  targetId: summaryId,
276
276
  });
277
277
  }
278
- for (const obsId of result.deletedObservationIds) {
279
- enqueueMemoryJob("delete_qdrant_vectors", {
280
- targetType: "observation",
281
- targetId: obsId,
282
- });
283
- }
284
- for (const chunkId of result.deletedChunkIds) {
285
- enqueueMemoryJob("delete_qdrant_vectors", {
286
- targetType: "chunk",
287
- targetId: chunkId,
288
- });
289
- }
290
- for (const episodeId of result.deletedEpisodeIds) {
291
- enqueueMemoryJob("delete_qdrant_vectors", {
292
- targetType: "episode",
293
- targetId: episodeId,
294
- });
295
- }
296
278
  log.info(
297
279
  {
298
280
  conversationId: resolvedId,
@@ -349,24 +331,6 @@ export function conversationManagementRouteDefinitions(
349
331
  targetId: summaryId,
350
332
  });
351
333
  }
352
- for (const obsId of deleted.deletedObservationIds) {
353
- enqueueMemoryJob("delete_qdrant_vectors", {
354
- targetType: "observation",
355
- targetId: obsId,
356
- });
357
- }
358
- for (const chunkId of deleted.deletedChunkIds) {
359
- enqueueMemoryJob("delete_qdrant_vectors", {
360
- targetType: "chunk",
361
- targetId: chunkId,
362
- });
363
- }
364
- for (const episodeId of deleted.deletedEpisodeIds) {
365
- enqueueMemoryJob("delete_qdrant_vectors", {
366
- targetType: "episode",
367
- targetId: episodeId,
368
- });
369
- }
370
334
  log.info({ conversationId: resolvedId }, "Deleted conversation");
371
335
  return new Response(null, { status: 204 });
372
336
  },
@@ -1,7 +1,7 @@
1
1
  /**
2
2
  * HTTP route definitions for model configuration, embedding configuration,
3
- * conversation search, message content, LLM context inspection, and queued
4
- * message deletion.
3
+ * permissions configuration, conversation search, message content, LLM
4
+ * context inspection, and queued message deletion.
5
5
  *
6
6
  * These routes expose conversation query functionality over the HTTP API.
7
7
  *
@@ -10,12 +10,15 @@
10
10
  * PUT /v1/model/image-gen — set image-gen model
11
11
  * GET /v1/config/embeddings — current embedding config
12
12
  * PUT /v1/config/embeddings — set embedding provider/model
13
+ * GET /v1/config/permissions/skip — dangerouslySkipPermissions status
14
+ * PUT /v1/config/permissions/skip — toggle dangerouslySkipPermissions
13
15
  * GET /v1/conversations/search — search conversations
14
16
  * GET /v1/messages/:id/content — full message content
15
17
  * GET /v1/messages/:id/llm-context — LLM request logs for a message
16
18
  * DELETE /v1/messages/queued/:id — delete queued message
17
19
  */
18
20
 
21
+ import { getConfig, loadRawConfig, saveRawConfig } from "../../config/loader.js";
19
22
  import { VALID_MEMORY_EMBEDDING_PROVIDERS } from "../../config/schemas/memory-storage.js";
20
23
  import { VALID_INFERENCE_PROVIDERS } from "../../config/schemas/services.js";
21
24
  import {
@@ -250,6 +253,45 @@ export function conversationQueryRouteDefinitions(
250
253
  },
251
254
  },
252
255
 
256
+ // ── Permissions config ─────────────────────────────────────────────
257
+ {
258
+ endpoint: "config/permissions/skip",
259
+ method: "GET",
260
+ policyKey: "config/permissions/skip",
261
+ handler: () => {
262
+ const config = getConfig();
263
+ return Response.json({
264
+ enabled: config.permissions.dangerouslySkipPermissions,
265
+ });
266
+ },
267
+ },
268
+ {
269
+ endpoint: "config/permissions/skip",
270
+ method: "PUT",
271
+ policyKey: "config/permissions/skip",
272
+ handler: async ({ req }) => {
273
+ const body = (await req.json()) as { enabled?: unknown };
274
+ if (typeof body.enabled !== "boolean") {
275
+ return httpError(
276
+ "BAD_REQUEST",
277
+ "Missing or invalid field: enabled (boolean)",
278
+ 400,
279
+ );
280
+ }
281
+ const raw = loadRawConfig();
282
+ const permissions: Record<string, unknown> =
283
+ raw.permissions != null &&
284
+ typeof raw.permissions === "object" &&
285
+ !Array.isArray(raw.permissions)
286
+ ? (raw.permissions as Record<string, unknown>)
287
+ : {};
288
+ permissions.dangerouslySkipPermissions = body.enabled;
289
+ raw.permissions = permissions;
290
+ saveRawConfig(raw);
291
+ return Response.json({ enabled: body.enabled });
292
+ },
293
+ },
294
+
253
295
  // ── Conversation search ───────────────────────────────────────────
254
296
  {
255
297
  endpoint: "conversations/search",
@@ -637,6 +637,7 @@ export async function handleSendMessage(
637
637
  interface?: string;
638
638
  conversationType?: string;
639
639
  automated?: boolean;
640
+ bypassSecretCheck?: boolean;
640
641
  };
641
642
 
642
643
  const { conversationKey, content, attachmentIds } = body;
@@ -708,7 +709,7 @@ export async function handleSendMessage(
708
709
  // This mirrors the legacy handleUserMessage behavior: secrets are
709
710
  // detected and the message is rejected with a safe notice. The client
710
711
  // should prompt the user to use the secure credential flow instead.
711
- if (trimmedContent.length > 0) {
712
+ if (trimmedContent.length > 0 && !body.bypassSecretCheck) {
712
713
  const ingressCheck = checkIngressForSecrets(trimmedContent);
713
714
  if (ingressCheck.blocked) {
714
715
  log.warn(
@@ -44,6 +44,7 @@ import { handleEditIntercept } from "./inbound-stages/edit-intercept.js";
44
44
  import { handleEscalationIntercept } from "./inbound-stages/escalation-intercept.js";
45
45
  import { handleGuardianReplyIntercept } from "./inbound-stages/guardian-reply-intercept.js";
46
46
  import { runSecretIngressCheck } from "./inbound-stages/secret-ingress-check.js";
47
+ import { tryTranscribeAudioAttachments } from "./inbound-stages/transcribe-audio.js";
47
48
  import { handleVerificationIntercept } from "./inbound-stages/verification-intercept.js";
48
49
 
49
50
  const log = getLogger("runtime-http");
@@ -144,7 +145,7 @@ export async function handleChannelInbound(
144
145
  return httpError("BAD_REQUEST", "content must be a string", 400);
145
146
  }
146
147
 
147
- const trimmedContent = typeof content === "string" ? content.trim() : "";
148
+ let trimmedContent = typeof content === "string" ? content.trim() : "";
148
149
  const hasAttachments =
149
150
  Array.isArray(attachmentIds) && attachmentIds.length > 0;
150
151
 
@@ -227,6 +228,29 @@ export async function handleChannelInbound(
227
228
  }
228
229
  }
229
230
 
231
+ // Auto-transcribe audio attachments from channel messages
232
+ if (hasAttachments && sourceChannel) {
233
+ const transcribeResult = await tryTranscribeAudioAttachments(attachmentIds);
234
+ switch (transcribeResult.status) {
235
+ case "transcribed":
236
+ // For voice-only messages (empty content), this becomes the message text.
237
+ // For audio+caption, both are preserved.
238
+ trimmedContent =
239
+ transcribeResult.text +
240
+ (trimmedContent ? `\n\n${trimmedContent}` : "");
241
+ break;
242
+ case "no_provider":
243
+ case "error":
244
+ // Inject a hint so the assistant knows the user sent audio and why
245
+ // transcription failed — it can then guide the user (e.g. set up API key).
246
+ trimmedContent =
247
+ `[Voice message received — ${transcribeResult.reason}]` +
248
+ (trimmedContent ? `\n\n${trimmedContent}` : "");
249
+ break;
250
+ // "no_audio", "disabled" — no action needed
251
+ }
252
+ }
253
+
230
254
  const sourceMessageId =
231
255
  typeof sourceMetadata?.messageId === "string"
232
256
  ? sourceMetadata.messageId
@@ -333,7 +357,7 @@ export async function handleChannelInbound(
333
357
  externalMessageId,
334
358
  conversationId: result.conversationId,
335
359
  eventId: result.eventId,
336
- content,
360
+ content: trimmedContent,
337
361
  attachmentIds,
338
362
  sourceMetadata: body.sourceMetadata,
339
363
  actorDisplayName: body.actorDisplayName,
@@ -612,7 +636,7 @@ export async function handleChannelInbound(
612
636
  processMessage,
613
637
  conversationId: result.conversationId,
614
638
  eventId: result.eventId,
615
- content: content ?? "",
639
+ content: trimmedContent,
616
640
  attachmentIds: hasAttachments ? attachmentIds : undefined,
617
641
  sourceChannel,
618
642
  sourceInterface,
@@ -79,14 +79,29 @@ export interface AclResult {
79
79
  guardianVerifyCode: string | undefined;
80
80
  }
81
81
 
82
+ /**
83
+ * Strip Slack/Telegram mrkdwn formatting wrappers from a raw message.
84
+ * When users copy-paste a verification code from the desktop app with
85
+ * rich-text formatting (e.g. bold), Slack preserves it as `*code*` in
86
+ * the message text, which would otherwise fail the strict bare-code regex.
87
+ */
88
+ function stripMrkdwnFormatting(text: string): string {
89
+ // Bold (*…*), italic (_…_), strikethrough (~…~), inline code (`…`)
90
+ return text.replace(/^[*_~`]+/, "").replace(/[*_~`]+$/, "");
91
+ }
92
+
82
93
  /**
83
94
  * Parse a guardian verification code from message content.
84
95
  * Accepts a bare code as the entire message: 6-digit numeric OR 64-char hex
85
96
  * (hex is retained for compatibility with unbound inbound/bootstrap sessions
86
97
  * that intentionally use high-entropy secrets).
98
+ *
99
+ * Strips surrounding mrkdwn formatting characters first so that codes
100
+ * pasted with bold/italic/code formatting are still recognized.
87
101
  */
88
102
  function parseGuardianVerifyCode(content: string): string | undefined {
89
- const bareMatch = content.match(/^([0-9a-fA-F]{64}|\d{6})$/);
103
+ const stripped = stripMrkdwnFormatting(content);
104
+ const bareMatch = stripped.match(/^([0-9a-fA-F]{64}|\d{6})$/);
90
105
  if (bareMatch) return bareMatch[1];
91
106
 
92
107
  return undefined;
@@ -0,0 +1,287 @@
1
+ import { afterEach, beforeEach, describe, expect, mock, test } from "bun:test";
2
+
3
+ import type { SpeechToTextProvider } from "../../../providers/speech-to-text/types.js";
4
+
5
+ // ---------------------------------------------------------------------------
6
+ // Mocks — must be set up before importing the module under test
7
+ // ---------------------------------------------------------------------------
8
+
9
+ let mockFeatureFlagEnabled = true;
10
+ let mockAttachments: Array<{
11
+ id: string;
12
+ mimeType: string;
13
+ dataBase64: string;
14
+ originalFilename: string;
15
+ sizeBytes: number;
16
+ kind: string;
17
+ thumbnailBase64: string | null;
18
+ createdAt: number;
19
+ }> = [];
20
+ let mockProvider: SpeechToTextProvider | null = null;
21
+
22
+ mock.module("../../../config/assistant-feature-flags.js", () => ({
23
+ isAssistantFeatureFlagEnabled: () => mockFeatureFlagEnabled,
24
+ }));
25
+
26
+ mock.module("../../../config/loader.js", () => ({
27
+ getConfig: () => ({ assistantFeatureFlagValues: {} }),
28
+ }));
29
+
30
+ mock.module("../../../memory/attachments-store.js", () => ({
31
+ getAttachmentsByIds: (ids: string[]) =>
32
+ mockAttachments.filter((a) => ids.includes(a.id)),
33
+ getAttachmentById: (id: string, _opts?: { hydrateFileData?: boolean }) =>
34
+ mockAttachments.find((a) => a.id === id) ?? null,
35
+ }));
36
+
37
+ mock.module("../../../providers/speech-to-text/resolve.js", () => ({
38
+ resolveSpeechToTextProvider: async () => mockProvider,
39
+ }));
40
+
41
+ mock.module("../../../util/logger.js", () => ({
42
+ getLogger: () => ({
43
+ debug: () => {},
44
+ info: () => {},
45
+ warn: () => {},
46
+ error: () => {},
47
+ }),
48
+ }));
49
+
50
+ // Import after mocks are installed
51
+ const { tryTranscribeAudioAttachments } = await import("./transcribe-audio.js");
52
+
53
+ // ---------------------------------------------------------------------------
54
+ // Helpers
55
+ // ---------------------------------------------------------------------------
56
+
57
+ function makeAudioAttachment(
58
+ id: string,
59
+ mimeType = "audio/ogg",
60
+ dataBase64 = Buffer.from("fake-audio-data").toString("base64"),
61
+ ) {
62
+ return {
63
+ id,
64
+ mimeType,
65
+ dataBase64,
66
+ originalFilename: `voice-${id}.ogg`,
67
+ sizeBytes: Buffer.from(dataBase64, "base64").length,
68
+ kind: "document" as const,
69
+ thumbnailBase64: null,
70
+ createdAt: Date.now(),
71
+ };
72
+ }
73
+
74
+ function makeDocumentAttachment(id: string) {
75
+ return {
76
+ id,
77
+ mimeType: "application/pdf",
78
+ dataBase64: Buffer.from("fake-pdf").toString("base64"),
79
+ originalFilename: `doc-${id}.pdf`,
80
+ sizeBytes: 8,
81
+ kind: "document" as const,
82
+ thumbnailBase64: null,
83
+ createdAt: Date.now(),
84
+ };
85
+ }
86
+
87
+ function makeImageAttachment(id: string) {
88
+ return {
89
+ id,
90
+ mimeType: "image/png",
91
+ dataBase64: Buffer.from("fake-image").toString("base64"),
92
+ originalFilename: `photo-${id}.png`,
93
+ sizeBytes: 10,
94
+ kind: "image" as const,
95
+ thumbnailBase64: null,
96
+ createdAt: Date.now(),
97
+ };
98
+ }
99
+
100
+ // ---------------------------------------------------------------------------
101
+ // Tests
102
+ // ---------------------------------------------------------------------------
103
+
104
+ describe("tryTranscribeAudioAttachments", () => {
105
+ beforeEach(() => {
106
+ mockFeatureFlagEnabled = true;
107
+ mockAttachments = [];
108
+ mockProvider = null;
109
+ });
110
+
111
+ afterEach(() => {
112
+ mockAttachments = [];
113
+ });
114
+
115
+ test("audio attachment is transcribed and returns transcribed result", async () => {
116
+ const audio = makeAudioAttachment("a1");
117
+ mockAttachments = [audio];
118
+ mockProvider = {
119
+ transcribe: async () => ({ text: "Hello, how are you?" }),
120
+ };
121
+
122
+ const result = await tryTranscribeAudioAttachments(["a1"]);
123
+
124
+ expect(result).toEqual({
125
+ status: "transcribed",
126
+ text: "Hello, how are you?",
127
+ });
128
+ });
129
+
130
+ test("non-audio attachments return no_audio", async () => {
131
+ const doc = makeDocumentAttachment("d1");
132
+ const img = makeImageAttachment("i1");
133
+ mockAttachments = [doc, img];
134
+ mockProvider = {
135
+ transcribe: async () => ({ text: "should not be called" }),
136
+ };
137
+
138
+ const result = await tryTranscribeAudioAttachments(["d1", "i1"]);
139
+
140
+ expect(result).toEqual({ status: "no_audio" });
141
+ });
142
+
143
+ test("no API key returns no_provider with helpful reason string", async () => {
144
+ const audio = makeAudioAttachment("a1");
145
+ mockAttachments = [audio];
146
+ mockProvider = null; // No provider resolved
147
+
148
+ const result = await tryTranscribeAudioAttachments(["a1"]);
149
+
150
+ expect(result.status).toBe("no_provider");
151
+ expect((result as { reason: string }).reason).toContain(
152
+ "No OpenAI API key configured",
153
+ );
154
+ });
155
+
156
+ test("API failure returns error with reason", async () => {
157
+ const audio = makeAudioAttachment("a1");
158
+ mockAttachments = [audio];
159
+ mockProvider = {
160
+ transcribe: async () => {
161
+ throw new Error("API rate limit exceeded");
162
+ },
163
+ };
164
+
165
+ const result = await tryTranscribeAudioAttachments(["a1"]);
166
+
167
+ expect(result.status).toBe("error");
168
+ expect((result as { reason: string }).reason).toBe(
169
+ "API rate limit exceeded",
170
+ );
171
+ });
172
+
173
+ test("feature flag disabled returns disabled", async () => {
174
+ mockFeatureFlagEnabled = false;
175
+ const audio = makeAudioAttachment("a1");
176
+ mockAttachments = [audio];
177
+
178
+ const result = await tryTranscribeAudioAttachments(["a1"]);
179
+
180
+ expect(result).toEqual({ status: "disabled" });
181
+ });
182
+
183
+ test("30-second timeout fires and returns error without blocking", async () => {
184
+ const audio = makeAudioAttachment("a1");
185
+ mockAttachments = [audio];
186
+ mockProvider = {
187
+ transcribe: async (_audio, _mime, signal) => {
188
+ // Simulate a provider that respects the abort signal
189
+ return new Promise((_resolve, reject) => {
190
+ if (signal?.aborted) {
191
+ reject(new DOMException("The operation was aborted", "AbortError"));
192
+ return;
193
+ }
194
+ const onAbort = () => {
195
+ reject(new DOMException("The operation was aborted", "AbortError"));
196
+ };
197
+ signal?.addEventListener("abort", onAbort, { once: true });
198
+ });
199
+ },
200
+ };
201
+
202
+ // The timeout is 30s in the real code, but the test's mock provider
203
+ // aborts immediately when signaled. We verify the error path works
204
+ // by checking the result type. For a true timeout test we'd need
205
+ // to override the timeout constant, but this confirms the abort
206
+ // path produces the correct result.
207
+ // Instead, let's test with a provider that checks signal state:
208
+ mockProvider = {
209
+ transcribe: async () => {
210
+ throw new DOMException("The operation was aborted", "AbortError");
211
+ },
212
+ };
213
+
214
+ const result = await tryTranscribeAudioAttachments(["a1"]);
215
+
216
+ expect(result.status).toBe("error");
217
+ expect((result as { reason: string }).reason).toBe(
218
+ "Transcription timed out",
219
+ );
220
+ });
221
+
222
+ test("multiple audio attachments are transcribed and concatenated", async () => {
223
+ const a1 = makeAudioAttachment("a1");
224
+ const a2 = makeAudioAttachment("a2", "audio/mpeg");
225
+ mockAttachments = [a1, a2];
226
+
227
+ let callCount = 0;
228
+ mockProvider = {
229
+ transcribe: async () => {
230
+ callCount++;
231
+ return { text: callCount === 1 ? "First message" : "Second message" };
232
+ },
233
+ };
234
+
235
+ const result = await tryTranscribeAudioAttachments(["a1", "a2"]);
236
+
237
+ expect(result).toEqual({
238
+ status: "transcribed",
239
+ text: "First message\n\nSecond message",
240
+ });
241
+ expect(callCount).toBe(2);
242
+ });
243
+
244
+ test("mixed audio and non-audio attachments: only audio is transcribed", async () => {
245
+ const audio = makeAudioAttachment("a1");
246
+ const doc = makeDocumentAttachment("d1");
247
+ mockAttachments = [audio, doc];
248
+
249
+ let transcribeCallCount = 0;
250
+ mockProvider = {
251
+ transcribe: async () => {
252
+ transcribeCallCount++;
253
+ return { text: "Voice transcription" };
254
+ },
255
+ };
256
+
257
+ const result = await tryTranscribeAudioAttachments(["a1", "d1"]);
258
+
259
+ expect(result).toEqual({
260
+ status: "transcribed",
261
+ text: "Voice transcription",
262
+ });
263
+ expect(transcribeCallCount).toBe(1);
264
+ });
265
+
266
+ test("empty attachment IDs returns no_audio", async () => {
267
+ mockProvider = {
268
+ transcribe: async () => ({ text: "should not be called" }),
269
+ };
270
+
271
+ const result = await tryTranscribeAudioAttachments([]);
272
+
273
+ expect(result).toEqual({ status: "no_audio" });
274
+ });
275
+
276
+ test("attachment with empty transcription returns no_audio", async () => {
277
+ const audio = makeAudioAttachment("a1");
278
+ mockAttachments = [audio];
279
+ mockProvider = {
280
+ transcribe: async () => ({ text: " " }), // whitespace-only
281
+ };
282
+
283
+ const result = await tryTranscribeAudioAttachments(["a1"]);
284
+
285
+ expect(result).toEqual({ status: "no_audio" });
286
+ });
287
+ });
@@ -0,0 +1,122 @@
1
+ /**
2
+ * Auto-transcribe audio attachments from channel inbound messages.
3
+ *
4
+ * Returns a discriminated result type so callers can handle each outcome
5
+ * (transcribed, no audio, disabled, no provider, error) without exceptions.
6
+ * Never throws — failures are represented as result variants so that message
7
+ * delivery is never blocked by transcription issues.
8
+ */
9
+
10
+ import { isAssistantFeatureFlagEnabled } from "../../../config/assistant-feature-flags.js";
11
+ import { getConfig } from "../../../config/loader.js";
12
+ import * as attachmentsStore from "../../../memory/attachments-store.js";
13
+ import { resolveSpeechToTextProvider } from "../../../providers/speech-to-text/resolve.js";
14
+ import { getLogger } from "../../../util/logger.js";
15
+
16
+ const log = getLogger("transcribe-audio");
17
+
18
+ const VOICE_TRANSCRIPTION_FLAG_KEY =
19
+ "feature_flags.channel-voice-transcription.enabled" as const;
20
+
21
+ /** Timeout for the entire transcription pipeline (all attachments). */
22
+ const TRANSCRIPTION_TIMEOUT_MS = 30_000;
23
+
24
+ // ---------------------------------------------------------------------------
25
+ // Result type
26
+ // ---------------------------------------------------------------------------
27
+
28
+ export type TranscribeResult =
29
+ | { status: "transcribed"; text: string }
30
+ | { status: "no_audio" }
31
+ | { status: "disabled" }
32
+ | { status: "no_provider"; reason: string }
33
+ | { status: "error"; reason: string };
34
+
35
+ // ---------------------------------------------------------------------------
36
+ // Public API
37
+ // ---------------------------------------------------------------------------
38
+
39
+ export async function tryTranscribeAudioAttachments(
40
+ attachmentIds: string[],
41
+ ): Promise<TranscribeResult> {
42
+ try {
43
+ // Check feature flag
44
+ const config = getConfig();
45
+ if (!isAssistantFeatureFlagEnabled(VOICE_TRANSCRIPTION_FLAG_KEY, config)) {
46
+ return { status: "disabled" };
47
+ }
48
+
49
+ // Look up attachments and filter to audio MIME types
50
+ const resolved = attachmentsStore.getAttachmentsByIds(attachmentIds);
51
+ const audioAttachments = resolved.filter((a) =>
52
+ a.mimeType.startsWith("audio/"),
53
+ );
54
+
55
+ if (audioAttachments.length === 0) {
56
+ return { status: "no_audio" };
57
+ }
58
+
59
+ // Resolve STT provider
60
+ const provider = await resolveSpeechToTextProvider();
61
+ if (!provider) {
62
+ return {
63
+ status: "no_provider",
64
+ reason:
65
+ "No OpenAI API key configured. Set one up to enable voice message transcription.",
66
+ };
67
+ }
68
+
69
+ // Transcribe each audio attachment with a shared timeout
70
+ const abortController = new AbortController();
71
+ const timeoutId = setTimeout(
72
+ () => abortController.abort(),
73
+ TRANSCRIPTION_TIMEOUT_MS,
74
+ );
75
+
76
+ try {
77
+ const transcriptions: string[] = [];
78
+
79
+ for (const attachment of audioAttachments) {
80
+ // Hydrate the base64 data for the attachment
81
+ const hydrated = attachmentsStore.getAttachmentById(attachment.id, {
82
+ hydrateFileData: true,
83
+ });
84
+ if (!hydrated || !hydrated.dataBase64) {
85
+ log.warn(
86
+ { attachmentId: attachment.id },
87
+ "Could not hydrate audio attachment data; skipping",
88
+ );
89
+ continue;
90
+ }
91
+
92
+ const buffer = Buffer.from(hydrated.dataBase64, "base64");
93
+ const result = await provider.transcribe(
94
+ buffer,
95
+ attachment.mimeType,
96
+ abortController.signal,
97
+ );
98
+
99
+ if (result.text.trim()) {
100
+ transcriptions.push(result.text.trim());
101
+ }
102
+ }
103
+
104
+ if (transcriptions.length === 0) {
105
+ return { status: "no_audio" };
106
+ }
107
+
108
+ return { status: "transcribed", text: transcriptions.join("\n\n") };
109
+ } finally {
110
+ clearTimeout(timeoutId);
111
+ }
112
+ } catch (err: unknown) {
113
+ const reason =
114
+ err instanceof Error
115
+ ? err.name === "AbortError"
116
+ ? "Transcription timed out"
117
+ : err.message
118
+ : String(err);
119
+ log.warn({ err }, "Audio transcription failed");
120
+ return { status: "error", reason };
121
+ }
122
+ }
@@ -444,6 +444,7 @@ const WORKSPACE_SKIP_DIRS = new Set([
444
444
  "embedding-models",
445
445
  "data/qdrant",
446
446
  "data/attachments",
447
+ "data/sounds",
447
448
  "conversations",
448
449
  ]);
449
450