@vellumai/assistant 0.5.3 → 0.5.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (111) hide show
  1. package/Dockerfile +18 -27
  2. package/docs/architecture/memory.md +105 -0
  3. package/node_modules/@vellumai/ces-contracts/src/index.ts +1 -0
  4. package/node_modules/@vellumai/ces-contracts/src/trust-rules.ts +42 -0
  5. package/package.json +1 -1
  6. package/src/__tests__/archive-recall.test.ts +560 -0
  7. package/src/__tests__/conversation-clear-safety.test.ts +259 -0
  8. package/src/__tests__/conversation-switch-memory-reduction.test.ts +474 -0
  9. package/src/__tests__/credential-security-invariants.test.ts +2 -0
  10. package/src/__tests__/db-schedule-syntax-migration.test.ts +3 -0
  11. package/src/__tests__/memory-reducer-job.test.ts +538 -0
  12. package/src/__tests__/memory-reducer-scheduling.test.ts +473 -0
  13. package/src/__tests__/memory-reducer-types.test.ts +12 -4
  14. package/src/__tests__/memory-reducer.test.ts +7 -1
  15. package/src/__tests__/memory-regressions.test.ts +24 -4
  16. package/src/__tests__/memory-simplified-config.test.ts +4 -4
  17. package/src/__tests__/openai-whisper.test.ts +93 -0
  18. package/src/__tests__/simplified-memory-e2e.test.ts +666 -0
  19. package/src/__tests__/simplified-memory-runtime.test.ts +616 -0
  20. package/src/__tests__/slack-messaging-token-resolution.test.ts +319 -0
  21. package/src/__tests__/volume-security-guard.test.ts +155 -0
  22. package/src/cli/commands/conversations.ts +18 -0
  23. package/src/config/bundled-skills/messaging/tools/shared.ts +1 -0
  24. package/src/config/bundled-skills/schedule/TOOLS.json +8 -0
  25. package/src/config/bundled-skills/transcribe/tools/transcribe-media.ts +16 -37
  26. package/src/config/env-registry.ts +9 -0
  27. package/src/config/feature-flag-registry.json +8 -0
  28. package/src/config/loader.ts +0 -1
  29. package/src/config/schemas/memory-simplified.ts +1 -1
  30. package/src/credential-execution/managed-catalog.ts +5 -15
  31. package/src/daemon/config-watcher.ts +4 -1
  32. package/src/daemon/conversation-memory.ts +117 -0
  33. package/src/daemon/conversation-runtime-assembly.ts +1 -0
  34. package/src/daemon/daemon-control.ts +7 -0
  35. package/src/daemon/handlers/conversations.ts +11 -0
  36. package/src/daemon/lifecycle.ts +51 -2
  37. package/src/daemon/providers-setup.ts +2 -1
  38. package/src/hooks/manager.ts +7 -0
  39. package/src/instrument.ts +33 -1
  40. package/src/memory/archive-recall.ts +516 -0
  41. package/src/memory/brief-time.ts +5 -4
  42. package/src/memory/conversation-crud.ts +210 -0
  43. package/src/memory/conversation-key-store.ts +33 -4
  44. package/src/memory/db-init.ts +4 -0
  45. package/src/memory/embedding-local.ts +11 -5
  46. package/src/memory/job-handlers/backfill-simplified-memory.ts +462 -0
  47. package/src/memory/job-handlers/conversation-starters.ts +24 -30
  48. package/src/memory/job-handlers/reduce-conversation-memory.ts +229 -0
  49. package/src/memory/jobs-store.ts +2 -0
  50. package/src/memory/jobs-worker.ts +8 -0
  51. package/src/memory/migrations/036-normalize-phone-identities.ts +49 -14
  52. package/src/memory/migrations/135-backfill-contact-interaction-stats.ts +9 -1
  53. package/src/memory/migrations/141-rename-verification-table.ts +8 -0
  54. package/src/memory/migrations/142-rename-verification-session-id-column.ts +7 -2
  55. package/src/memory/migrations/174-rename-thread-starters-table.ts +8 -0
  56. package/src/memory/migrations/188-schedule-quiet-flag.ts +13 -0
  57. package/src/memory/migrations/index.ts +1 -0
  58. package/src/memory/reducer-scheduler.ts +242 -0
  59. package/src/memory/reducer-types.ts +9 -2
  60. package/src/memory/reducer.ts +25 -11
  61. package/src/memory/schema/infrastructure.ts +1 -0
  62. package/src/messaging/provider.ts +9 -0
  63. package/src/messaging/providers/slack/adapter.ts +29 -2
  64. package/src/oauth/connection-resolver.test.ts +22 -18
  65. package/src/oauth/connection-resolver.ts +92 -7
  66. package/src/oauth/platform-connection.test.ts +78 -69
  67. package/src/oauth/platform-connection.ts +12 -19
  68. package/src/permissions/trust-client.ts +343 -0
  69. package/src/permissions/trust-store-interface.ts +105 -0
  70. package/src/permissions/trust-store.ts +523 -36
  71. package/src/platform/client.test.ts +148 -0
  72. package/src/platform/client.ts +71 -0
  73. package/src/providers/speech-to-text/openai-whisper.test.ts +190 -0
  74. package/src/providers/speech-to-text/openai-whisper.ts +68 -0
  75. package/src/providers/speech-to-text/resolve.ts +9 -0
  76. package/src/providers/speech-to-text/types.ts +17 -0
  77. package/src/runtime/auth/route-policy.ts +10 -1
  78. package/src/runtime/http-server.ts +2 -2
  79. package/src/runtime/routes/conversation-management-routes.ts +88 -2
  80. package/src/runtime/routes/guardian-bootstrap-routes.ts +19 -7
  81. package/src/runtime/routes/inbound-message-handler.ts +27 -3
  82. package/src/runtime/routes/inbound-stages/acl-enforcement.ts +16 -1
  83. package/src/runtime/routes/inbound-stages/transcribe-audio.test.ts +287 -0
  84. package/src/runtime/routes/inbound-stages/transcribe-audio.ts +122 -0
  85. package/src/runtime/routes/log-export-routes.ts +1 -0
  86. package/src/runtime/routes/secret-routes.ts +5 -1
  87. package/src/schedule/schedule-store.ts +7 -0
  88. package/src/schedule/scheduler.ts +6 -2
  89. package/src/security/ces-credential-client.ts +173 -0
  90. package/src/security/secure-keys.ts +65 -22
  91. package/src/signals/bash.ts +3 -0
  92. package/src/signals/cancel.ts +3 -0
  93. package/src/signals/confirm.ts +3 -0
  94. package/src/signals/conversation-undo.ts +3 -0
  95. package/src/signals/event-stream.ts +7 -0
  96. package/src/signals/shotgun.ts +3 -0
  97. package/src/signals/trust-rule.ts +3 -0
  98. package/src/telemetry/usage-telemetry-reporter.test.ts +23 -36
  99. package/src/telemetry/usage-telemetry-reporter.ts +22 -20
  100. package/src/tools/filesystem/edit.ts +6 -1
  101. package/src/tools/filesystem/read.ts +6 -1
  102. package/src/tools/filesystem/write.ts +6 -1
  103. package/src/tools/memory/handlers.ts +129 -1
  104. package/src/tools/schedule/create.ts +3 -0
  105. package/src/tools/schedule/list.ts +5 -1
  106. package/src/tools/schedule/update.ts +6 -0
  107. package/src/util/device-id.ts +70 -7
  108. package/src/util/logger.ts +35 -9
  109. package/src/util/platform.ts +29 -5
  110. package/src/workspace/migrations/migrate-to-workspace-volume.ts +113 -0
  111. package/src/workspace/migrations/registry.ts +2 -0
@@ -28,6 +28,7 @@ type ServerWithRequestIP = {
28
28
  ): { address: string; family: string; port: number } | null;
29
29
  };
30
30
  import { isHttpAuthDisabled } from "../../config/env.js";
31
+ import { getIsContainerized } from "../../config/env-registry.js";
31
32
 
32
33
  const log = getLogger("guardian-bootstrap");
33
34
 
@@ -86,19 +87,30 @@ export async function handleGuardianBootstrap(
86
87
  req: Request,
87
88
  server: ServerWithRequestIP,
88
89
  ): Promise<Response> {
90
+ // Reject non-private-network peers (allows loopback, Docker bridge, etc.)
91
+ const peerIp = server.requestIP(req)?.address;
92
+ if ((!peerIp || !isPrivateAddress(peerIp)) && !isHttpAuthDisabled()) {
93
+ return httpError("FORBIDDEN", "Bootstrap endpoint is local-only", 403);
94
+ }
95
+
89
96
  // Reject requests forwarded from public networks. The gateway sets
90
97
  // x-forwarded-for to the real client IP; if that IP is on a private
91
98
  // network (loopback, Docker bridge, RFC 1918) the request is still
92
99
  // considered local. Only reject when the forwarded IP is public.
100
+ //
101
+ // Skip this check when running in a container: the peer IP was already
102
+ // validated above (Docker bridge network = private), so the request
103
+ // reached us through a co-located gateway. The x-forwarded-for header
104
+ // reflects the original external client (e.g. platform proxy) and is
105
+ // not meaningful for local-only enforcement in this topology.
93
106
  const forwarded = req.headers.get("x-forwarded-for");
94
107
  const forwardedIp = forwarded ? forwarded.split(",")[0].trim() : null;
95
- if (forwardedIp && !isPrivateAddress(forwardedIp) && !isHttpAuthDisabled()) {
96
- return httpError("FORBIDDEN", "Bootstrap endpoint is local-only", 403);
97
- }
98
-
99
- // Reject non-private-network peers (allows loopback, Docker bridge, etc.)
100
- const peerIp = server.requestIP(req)?.address;
101
- if ((!peerIp || !isPrivateAddress(peerIp)) && !isHttpAuthDisabled()) {
108
+ if (
109
+ forwardedIp &&
110
+ !isPrivateAddress(forwardedIp) &&
111
+ !isHttpAuthDisabled() &&
112
+ !getIsContainerized()
113
+ ) {
102
114
  return httpError("FORBIDDEN", "Bootstrap endpoint is local-only", 403);
103
115
  }
104
116
 
@@ -44,6 +44,7 @@ import { handleEditIntercept } from "./inbound-stages/edit-intercept.js";
44
44
  import { handleEscalationIntercept } from "./inbound-stages/escalation-intercept.js";
45
45
  import { handleGuardianReplyIntercept } from "./inbound-stages/guardian-reply-intercept.js";
46
46
  import { runSecretIngressCheck } from "./inbound-stages/secret-ingress-check.js";
47
+ import { tryTranscribeAudioAttachments } from "./inbound-stages/transcribe-audio.js";
47
48
  import { handleVerificationIntercept } from "./inbound-stages/verification-intercept.js";
48
49
 
49
50
  const log = getLogger("runtime-http");
@@ -144,7 +145,7 @@ export async function handleChannelInbound(
144
145
  return httpError("BAD_REQUEST", "content must be a string", 400);
145
146
  }
146
147
 
147
- const trimmedContent = typeof content === "string" ? content.trim() : "";
148
+ let trimmedContent = typeof content === "string" ? content.trim() : "";
148
149
  const hasAttachments =
149
150
  Array.isArray(attachmentIds) && attachmentIds.length > 0;
150
151
 
@@ -227,6 +228,29 @@ export async function handleChannelInbound(
227
228
  }
228
229
  }
229
230
 
231
+ // Auto-transcribe audio attachments from channel messages
232
+ if (hasAttachments && sourceChannel) {
233
+ const transcribeResult = await tryTranscribeAudioAttachments(attachmentIds);
234
+ switch (transcribeResult.status) {
235
+ case "transcribed":
236
+ // For voice-only messages (empty content), this becomes the message text.
237
+ // For audio+caption, both are preserved.
238
+ trimmedContent =
239
+ transcribeResult.text +
240
+ (trimmedContent ? `\n\n${trimmedContent}` : "");
241
+ break;
242
+ case "no_provider":
243
+ case "error":
244
+ // Inject a hint so the assistant knows the user sent audio and why
245
+ // transcription failed — it can then guide the user (e.g. set up API key).
246
+ trimmedContent =
247
+ `[Voice message received — ${transcribeResult.reason}]` +
248
+ (trimmedContent ? `\n\n${trimmedContent}` : "");
249
+ break;
250
+ // "no_audio", "disabled" — no action needed
251
+ }
252
+ }
253
+
230
254
  const sourceMessageId =
231
255
  typeof sourceMetadata?.messageId === "string"
232
256
  ? sourceMetadata.messageId
@@ -333,7 +357,7 @@ export async function handleChannelInbound(
333
357
  externalMessageId,
334
358
  conversationId: result.conversationId,
335
359
  eventId: result.eventId,
336
- content,
360
+ content: trimmedContent,
337
361
  attachmentIds,
338
362
  sourceMetadata: body.sourceMetadata,
339
363
  actorDisplayName: body.actorDisplayName,
@@ -612,7 +636,7 @@ export async function handleChannelInbound(
612
636
  processMessage,
613
637
  conversationId: result.conversationId,
614
638
  eventId: result.eventId,
615
- content: content ?? "",
639
+ content: trimmedContent,
616
640
  attachmentIds: hasAttachments ? attachmentIds : undefined,
617
641
  sourceChannel,
618
642
  sourceInterface,
@@ -79,14 +79,29 @@ export interface AclResult {
79
79
  guardianVerifyCode: string | undefined;
80
80
  }
81
81
 
82
+ /**
83
+ * Strip Slack/Telegram mrkdwn formatting wrappers from a raw message.
84
+ * When users copy-paste a verification code from the desktop app with
85
+ * rich-text formatting (e.g. bold), Slack preserves it as `*code*` in
86
+ * the message text, which would otherwise fail the strict bare-code regex.
87
+ */
88
+ function stripMrkdwnFormatting(text: string): string {
89
+ // Bold (*…*), italic (_…_), strikethrough (~…~), inline code (`…`)
90
+ return text.replace(/^[*_~`]+/, "").replace(/[*_~`]+$/, "");
91
+ }
92
+
82
93
  /**
83
94
  * Parse a guardian verification code from message content.
84
95
  * Accepts a bare code as the entire message: 6-digit numeric OR 64-char hex
85
96
  * (hex is retained for compatibility with unbound inbound/bootstrap sessions
86
97
  * that intentionally use high-entropy secrets).
98
+ *
99
+ * Strips surrounding mrkdwn formatting characters first so that codes
100
+ * pasted with bold/italic/code formatting are still recognized.
87
101
  */
88
102
  function parseGuardianVerifyCode(content: string): string | undefined {
89
- const bareMatch = content.match(/^([0-9a-fA-F]{64}|\d{6})$/);
103
+ const stripped = stripMrkdwnFormatting(content);
104
+ const bareMatch = stripped.match(/^([0-9a-fA-F]{64}|\d{6})$/);
90
105
  if (bareMatch) return bareMatch[1];
91
106
 
92
107
  return undefined;
@@ -0,0 +1,287 @@
1
+ import { afterEach, beforeEach, describe, expect, mock, test } from "bun:test";
2
+
3
+ import type { SpeechToTextProvider } from "../../../providers/speech-to-text/types.js";
4
+
5
+ // ---------------------------------------------------------------------------
6
+ // Mocks — must be set up before importing the module under test
7
+ // ---------------------------------------------------------------------------
8
+
9
+ let mockFeatureFlagEnabled = true;
10
+ let mockAttachments: Array<{
11
+ id: string;
12
+ mimeType: string;
13
+ dataBase64: string;
14
+ originalFilename: string;
15
+ sizeBytes: number;
16
+ kind: string;
17
+ thumbnailBase64: string | null;
18
+ createdAt: number;
19
+ }> = [];
20
+ let mockProvider: SpeechToTextProvider | null = null;
21
+
22
+ mock.module("../../../config/assistant-feature-flags.js", () => ({
23
+ isAssistantFeatureFlagEnabled: () => mockFeatureFlagEnabled,
24
+ }));
25
+
26
+ mock.module("../../../config/loader.js", () => ({
27
+ getConfig: () => ({ assistantFeatureFlagValues: {} }),
28
+ }));
29
+
30
+ mock.module("../../../memory/attachments-store.js", () => ({
31
+ getAttachmentsByIds: (ids: string[]) =>
32
+ mockAttachments.filter((a) => ids.includes(a.id)),
33
+ getAttachmentById: (id: string, _opts?: { hydrateFileData?: boolean }) =>
34
+ mockAttachments.find((a) => a.id === id) ?? null,
35
+ }));
36
+
37
+ mock.module("../../../providers/speech-to-text/resolve.js", () => ({
38
+ resolveSpeechToTextProvider: async () => mockProvider,
39
+ }));
40
+
41
+ mock.module("../../../util/logger.js", () => ({
42
+ getLogger: () => ({
43
+ debug: () => {},
44
+ info: () => {},
45
+ warn: () => {},
46
+ error: () => {},
47
+ }),
48
+ }));
49
+
50
+ // Import after mocks are installed
51
+ const { tryTranscribeAudioAttachments } = await import("./transcribe-audio.js");
52
+
53
+ // ---------------------------------------------------------------------------
54
+ // Helpers
55
+ // ---------------------------------------------------------------------------
56
+
57
+ function makeAudioAttachment(
58
+ id: string,
59
+ mimeType = "audio/ogg",
60
+ dataBase64 = Buffer.from("fake-audio-data").toString("base64"),
61
+ ) {
62
+ return {
63
+ id,
64
+ mimeType,
65
+ dataBase64,
66
+ originalFilename: `voice-${id}.ogg`,
67
+ sizeBytes: Buffer.from(dataBase64, "base64").length,
68
+ kind: "document" as const,
69
+ thumbnailBase64: null,
70
+ createdAt: Date.now(),
71
+ };
72
+ }
73
+
74
+ function makeDocumentAttachment(id: string) {
75
+ return {
76
+ id,
77
+ mimeType: "application/pdf",
78
+ dataBase64: Buffer.from("fake-pdf").toString("base64"),
79
+ originalFilename: `doc-${id}.pdf`,
80
+ sizeBytes: 8,
81
+ kind: "document" as const,
82
+ thumbnailBase64: null,
83
+ createdAt: Date.now(),
84
+ };
85
+ }
86
+
87
+ function makeImageAttachment(id: string) {
88
+ return {
89
+ id,
90
+ mimeType: "image/png",
91
+ dataBase64: Buffer.from("fake-image").toString("base64"),
92
+ originalFilename: `photo-${id}.png`,
93
+ sizeBytes: 10,
94
+ kind: "image" as const,
95
+ thumbnailBase64: null,
96
+ createdAt: Date.now(),
97
+ };
98
+ }
99
+
100
+ // ---------------------------------------------------------------------------
101
+ // Tests
102
+ // ---------------------------------------------------------------------------
103
+
104
+ describe("tryTranscribeAudioAttachments", () => {
105
+ beforeEach(() => {
106
+ mockFeatureFlagEnabled = true;
107
+ mockAttachments = [];
108
+ mockProvider = null;
109
+ });
110
+
111
+ afterEach(() => {
112
+ mockAttachments = [];
113
+ });
114
+
115
+ test("audio attachment is transcribed and returns transcribed result", async () => {
116
+ const audio = makeAudioAttachment("a1");
117
+ mockAttachments = [audio];
118
+ mockProvider = {
119
+ transcribe: async () => ({ text: "Hello, how are you?" }),
120
+ };
121
+
122
+ const result = await tryTranscribeAudioAttachments(["a1"]);
123
+
124
+ expect(result).toEqual({
125
+ status: "transcribed",
126
+ text: "Hello, how are you?",
127
+ });
128
+ });
129
+
130
+ test("non-audio attachments return no_audio", async () => {
131
+ const doc = makeDocumentAttachment("d1");
132
+ const img = makeImageAttachment("i1");
133
+ mockAttachments = [doc, img];
134
+ mockProvider = {
135
+ transcribe: async () => ({ text: "should not be called" }),
136
+ };
137
+
138
+ const result = await tryTranscribeAudioAttachments(["d1", "i1"]);
139
+
140
+ expect(result).toEqual({ status: "no_audio" });
141
+ });
142
+
143
+ test("no API key returns no_provider with helpful reason string", async () => {
144
+ const audio = makeAudioAttachment("a1");
145
+ mockAttachments = [audio];
146
+ mockProvider = null; // No provider resolved
147
+
148
+ const result = await tryTranscribeAudioAttachments(["a1"]);
149
+
150
+ expect(result.status).toBe("no_provider");
151
+ expect((result as { reason: string }).reason).toContain(
152
+ "No OpenAI API key configured",
153
+ );
154
+ });
155
+
156
+ test("API failure returns error with reason", async () => {
157
+ const audio = makeAudioAttachment("a1");
158
+ mockAttachments = [audio];
159
+ mockProvider = {
160
+ transcribe: async () => {
161
+ throw new Error("API rate limit exceeded");
162
+ },
163
+ };
164
+
165
+ const result = await tryTranscribeAudioAttachments(["a1"]);
166
+
167
+ expect(result.status).toBe("error");
168
+ expect((result as { reason: string }).reason).toBe(
169
+ "API rate limit exceeded",
170
+ );
171
+ });
172
+
173
+ test("feature flag disabled returns disabled", async () => {
174
+ mockFeatureFlagEnabled = false;
175
+ const audio = makeAudioAttachment("a1");
176
+ mockAttachments = [audio];
177
+
178
+ const result = await tryTranscribeAudioAttachments(["a1"]);
179
+
180
+ expect(result).toEqual({ status: "disabled" });
181
+ });
182
+
183
+ test("30-second timeout fires and returns error without blocking", async () => {
184
+ const audio = makeAudioAttachment("a1");
185
+ mockAttachments = [audio];
186
+ mockProvider = {
187
+ transcribe: async (_audio, _mime, signal) => {
188
+ // Simulate a provider that respects the abort signal
189
+ return new Promise((_resolve, reject) => {
190
+ if (signal?.aborted) {
191
+ reject(new DOMException("The operation was aborted", "AbortError"));
192
+ return;
193
+ }
194
+ const onAbort = () => {
195
+ reject(new DOMException("The operation was aborted", "AbortError"));
196
+ };
197
+ signal?.addEventListener("abort", onAbort, { once: true });
198
+ });
199
+ },
200
+ };
201
+
202
+ // The timeout is 30s in the real code, but the test's mock provider
203
+ // aborts immediately when signaled. We verify the error path works
204
+ // by checking the result type. For a true timeout test we'd need
205
+ // to override the timeout constant, but this confirms the abort
206
+ // path produces the correct result.
207
+ // Instead, let's test with a provider that checks signal state:
208
+ mockProvider = {
209
+ transcribe: async () => {
210
+ throw new DOMException("The operation was aborted", "AbortError");
211
+ },
212
+ };
213
+
214
+ const result = await tryTranscribeAudioAttachments(["a1"]);
215
+
216
+ expect(result.status).toBe("error");
217
+ expect((result as { reason: string }).reason).toBe(
218
+ "Transcription timed out",
219
+ );
220
+ });
221
+
222
+ test("multiple audio attachments are transcribed and concatenated", async () => {
223
+ const a1 = makeAudioAttachment("a1");
224
+ const a2 = makeAudioAttachment("a2", "audio/mpeg");
225
+ mockAttachments = [a1, a2];
226
+
227
+ let callCount = 0;
228
+ mockProvider = {
229
+ transcribe: async () => {
230
+ callCount++;
231
+ return { text: callCount === 1 ? "First message" : "Second message" };
232
+ },
233
+ };
234
+
235
+ const result = await tryTranscribeAudioAttachments(["a1", "a2"]);
236
+
237
+ expect(result).toEqual({
238
+ status: "transcribed",
239
+ text: "First message\n\nSecond message",
240
+ });
241
+ expect(callCount).toBe(2);
242
+ });
243
+
244
+ test("mixed audio and non-audio attachments: only audio is transcribed", async () => {
245
+ const audio = makeAudioAttachment("a1");
246
+ const doc = makeDocumentAttachment("d1");
247
+ mockAttachments = [audio, doc];
248
+
249
+ let transcribeCallCount = 0;
250
+ mockProvider = {
251
+ transcribe: async () => {
252
+ transcribeCallCount++;
253
+ return { text: "Voice transcription" };
254
+ },
255
+ };
256
+
257
+ const result = await tryTranscribeAudioAttachments(["a1", "d1"]);
258
+
259
+ expect(result).toEqual({
260
+ status: "transcribed",
261
+ text: "Voice transcription",
262
+ });
263
+ expect(transcribeCallCount).toBe(1);
264
+ });
265
+
266
+ test("empty attachment IDs returns no_audio", async () => {
267
+ mockProvider = {
268
+ transcribe: async () => ({ text: "should not be called" }),
269
+ };
270
+
271
+ const result = await tryTranscribeAudioAttachments([]);
272
+
273
+ expect(result).toEqual({ status: "no_audio" });
274
+ });
275
+
276
+ test("attachment with empty transcription returns no_audio", async () => {
277
+ const audio = makeAudioAttachment("a1");
278
+ mockAttachments = [audio];
279
+ mockProvider = {
280
+ transcribe: async () => ({ text: " " }), // whitespace-only
281
+ };
282
+
283
+ const result = await tryTranscribeAudioAttachments(["a1"]);
284
+
285
+ expect(result).toEqual({ status: "no_audio" });
286
+ });
287
+ });
@@ -0,0 +1,122 @@
1
+ /**
2
+ * Auto-transcribe audio attachments from channel inbound messages.
3
+ *
4
+ * Returns a discriminated result type so callers can handle each outcome
5
+ * (transcribed, no audio, disabled, no provider, error) without exceptions.
6
+ * Never throws — failures are represented as result variants so that message
7
+ * delivery is never blocked by transcription issues.
8
+ */
9
+
10
+ import { isAssistantFeatureFlagEnabled } from "../../../config/assistant-feature-flags.js";
11
+ import { getConfig } from "../../../config/loader.js";
12
+ import * as attachmentsStore from "../../../memory/attachments-store.js";
13
+ import { resolveSpeechToTextProvider } from "../../../providers/speech-to-text/resolve.js";
14
+ import { getLogger } from "../../../util/logger.js";
15
+
16
+ const log = getLogger("transcribe-audio");
17
+
18
+ const VOICE_TRANSCRIPTION_FLAG_KEY =
19
+ "feature_flags.channel-voice-transcription.enabled" as const;
20
+
21
+ /** Timeout for the entire transcription pipeline (all attachments). */
22
+ const TRANSCRIPTION_TIMEOUT_MS = 30_000;
23
+
24
+ // ---------------------------------------------------------------------------
25
+ // Result type
26
+ // ---------------------------------------------------------------------------
27
+
28
+ export type TranscribeResult =
29
+ | { status: "transcribed"; text: string }
30
+ | { status: "no_audio" }
31
+ | { status: "disabled" }
32
+ | { status: "no_provider"; reason: string }
33
+ | { status: "error"; reason: string };
34
+
35
+ // ---------------------------------------------------------------------------
36
+ // Public API
37
+ // ---------------------------------------------------------------------------
38
+
39
+ export async function tryTranscribeAudioAttachments(
40
+ attachmentIds: string[],
41
+ ): Promise<TranscribeResult> {
42
+ try {
43
+ // Check feature flag
44
+ const config = getConfig();
45
+ if (!isAssistantFeatureFlagEnabled(VOICE_TRANSCRIPTION_FLAG_KEY, config)) {
46
+ return { status: "disabled" };
47
+ }
48
+
49
+ // Look up attachments and filter to audio MIME types
50
+ const resolved = attachmentsStore.getAttachmentsByIds(attachmentIds);
51
+ const audioAttachments = resolved.filter((a) =>
52
+ a.mimeType.startsWith("audio/"),
53
+ );
54
+
55
+ if (audioAttachments.length === 0) {
56
+ return { status: "no_audio" };
57
+ }
58
+
59
+ // Resolve STT provider
60
+ const provider = await resolveSpeechToTextProvider();
61
+ if (!provider) {
62
+ return {
63
+ status: "no_provider",
64
+ reason:
65
+ "No OpenAI API key configured. Set one up to enable voice message transcription.",
66
+ };
67
+ }
68
+
69
+ // Transcribe each audio attachment with a shared timeout
70
+ const abortController = new AbortController();
71
+ const timeoutId = setTimeout(
72
+ () => abortController.abort(),
73
+ TRANSCRIPTION_TIMEOUT_MS,
74
+ );
75
+
76
+ try {
77
+ const transcriptions: string[] = [];
78
+
79
+ for (const attachment of audioAttachments) {
80
+ // Hydrate the base64 data for the attachment
81
+ const hydrated = attachmentsStore.getAttachmentById(attachment.id, {
82
+ hydrateFileData: true,
83
+ });
84
+ if (!hydrated || !hydrated.dataBase64) {
85
+ log.warn(
86
+ { attachmentId: attachment.id },
87
+ "Could not hydrate audio attachment data; skipping",
88
+ );
89
+ continue;
90
+ }
91
+
92
+ const buffer = Buffer.from(hydrated.dataBase64, "base64");
93
+ const result = await provider.transcribe(
94
+ buffer,
95
+ attachment.mimeType,
96
+ abortController.signal,
97
+ );
98
+
99
+ if (result.text.trim()) {
100
+ transcriptions.push(result.text.trim());
101
+ }
102
+ }
103
+
104
+ if (transcriptions.length === 0) {
105
+ return { status: "no_audio" };
106
+ }
107
+
108
+ return { status: "transcribed", text: transcriptions.join("\n\n") };
109
+ } finally {
110
+ clearTimeout(timeoutId);
111
+ }
112
+ } catch (err: unknown) {
113
+ const reason =
114
+ err instanceof Error
115
+ ? err.name === "AbortError"
116
+ ? "Transcription timed out"
117
+ : err.message
118
+ : String(err);
119
+ log.warn({ err }, "Audio transcription failed");
120
+ return { status: "error", reason };
121
+ }
122
+ }
@@ -444,6 +444,7 @@ const WORKSPACE_SKIP_DIRS = new Set([
444
444
  "embedding-models",
445
445
  "data/qdrant",
446
446
  "data/attachments",
447
+ "data/sounds",
447
448
  "conversations",
448
449
  ]);
449
450
 
@@ -10,7 +10,7 @@ import {
10
10
  invalidateConfigCache,
11
11
  } from "../../config/loader.js";
12
12
  import type { CesClient } from "../../credential-execution/client.js";
13
- import { setSentryOrganizationId } from "../../instrument.js";
13
+ import { setSentryOrganizationId, setSentryUserId } from "../../instrument.js";
14
14
  import { clearEmbeddingBackendCache } from "../../memory/embedding-backend.js";
15
15
  import { syncManualTokenConnection } from "../../oauth/manual-token-connection.js";
16
16
  import { validateAnthropicApiKey } from "../../providers/anthropic/client.js";
@@ -182,6 +182,7 @@ export async function handleAddSecret(
182
182
  500,
183
183
  );
184
184
  }
185
+ clearEmbeddingBackendCache();
185
186
  invalidateConfigCache();
186
187
  await initializeProviders(getConfig());
187
188
  log.info({ provider: name }, "API key updated via HTTP");
@@ -234,6 +235,7 @@ export async function handleAddSecret(
234
235
  setSentryOrganizationId(undefined);
235
236
  } else if (field === "platform_user_id") {
236
237
  setPlatformUserId(undefined);
238
+ setSentryUserId(undefined);
237
239
  }
238
240
  deleteCredentialMetadata(service, field);
239
241
  } else {
@@ -259,6 +261,7 @@ export async function handleAddSecret(
259
261
  }
260
262
  if (service === "vellum" && field === "platform_user_id") {
261
263
  setPlatformUserId(effectiveValue || undefined);
264
+ setSentryUserId(effectiveValue || undefined);
262
265
  }
263
266
  }
264
267
  if (isManagedProxyCredential(service, field)) {
@@ -394,6 +397,7 @@ export async function handleDeleteSecret(req: Request): Promise<Response> {
394
397
  }
395
398
  if (service === "vellum" && field === "platform_user_id") {
396
399
  setPlatformUserId(undefined);
400
+ setSentryUserId(undefined);
397
401
  }
398
402
  if (isManagedProxyCredential(service, field)) {
399
403
  await initializeProviders(getConfig());
@@ -35,6 +35,7 @@ export interface ScheduleJob {
35
35
  mode: ScheduleMode;
36
36
  routingIntent: RoutingIntent;
37
37
  routingHints: Record<string, unknown>;
38
+ quiet: boolean;
38
39
  status: ScheduleStatus;
39
40
  createdAt: number;
40
41
  updatedAt: number;
@@ -91,6 +92,7 @@ export function createSchedule(params: {
91
92
  mode?: ScheduleMode;
92
93
  routingIntent?: RoutingIntent;
93
94
  routingHints?: Record<string, unknown>;
95
+ quiet?: boolean;
94
96
  }): ScheduleJob {
95
97
  const expression = params.expression ?? params.cronExpression ?? null;
96
98
  const isOneShot = expression == null;
@@ -118,6 +120,7 @@ export function createSchedule(params: {
118
120
  const mode = params.mode ?? "execute";
119
121
  const routingIntent = params.routingIntent ?? "all_channels";
120
122
  const routingHints = params.routingHints ?? {};
123
+ const quiet = params.quiet ?? false;
121
124
 
122
125
  let nextRunAt: number;
123
126
  if (isOneShot) {
@@ -144,6 +147,7 @@ export function createSchedule(params: {
144
147
  mode,
145
148
  routingIntent,
146
149
  routingHintsJson: JSON.stringify(routingHints),
150
+ quiet,
147
151
  status: "active" as ScheduleStatus,
148
152
  createdAt: now,
149
153
  updatedAt: now,
@@ -236,6 +240,7 @@ export function updateSchedule(
236
240
  mode?: ScheduleMode;
237
241
  routingIntent?: RoutingIntent;
238
242
  routingHints?: Record<string, unknown>;
243
+ quiet?: boolean;
239
244
  },
240
245
  ): ScheduleJob | null {
241
246
  const db = getDb();
@@ -290,6 +295,7 @@ export function updateSchedule(
290
295
  set.routingIntent = updates.routingIntent;
291
296
  if (updates.routingHints !== undefined)
292
297
  set.routingHintsJson = JSON.stringify(updates.routingHints);
298
+ if (updates.quiet !== undefined) set.quiet = updates.quiet;
293
299
 
294
300
  // Recompute nextRunAt if schedule timing may have changed (only for recurring)
295
301
  if (
@@ -771,6 +777,7 @@ function parseJobRow(row: typeof scheduleJobs.$inferSelect): ScheduleJob {
771
777
  mode: (row.mode ?? "execute") as ScheduleMode,
772
778
  routingIntent: (row.routingIntent ?? "all_channels") as RoutingIntent,
773
779
  routingHints: safeParseJson(row.routingHintsJson),
780
+ quiet: row.quiet ?? false,
774
781
  status: (row.status ?? "active") as ScheduleStatus,
775
782
  createdAt: row.createdAt,
776
783
  updatedAt: row.updatedAt,
@@ -206,7 +206,9 @@ async function runScheduleOnce(
206
206
  if (isOneShot) failOneShot(job.id);
207
207
  } else {
208
208
  completeScheduleRun(runId, { status: "ok" });
209
- notifySchedule({ id: job.id, name: job.name });
209
+ if (!job.quiet) {
210
+ notifySchedule({ id: job.id, name: job.name });
211
+ }
210
212
  if (isOneShot) completeOneShot(job.id);
211
213
  }
212
214
  processed += 1;
@@ -278,7 +280,9 @@ async function runScheduleOnce(
278
280
  trustClass: "guardian",
279
281
  });
280
282
  completeScheduleRun(runId, { status: "ok" });
281
- notifySchedule({ id: job.id, name: job.name });
283
+ if (!job.quiet) {
284
+ notifySchedule({ id: job.id, name: job.name });
285
+ }
282
286
  if (isOneShot) completeOneShot(job.id);
283
287
  processed += 1;
284
288
  } catch (err) {