@vellumai/assistant 0.5.3 → 0.5.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/Dockerfile +18 -27
- package/docs/architecture/memory.md +105 -0
- package/node_modules/@vellumai/ces-contracts/src/index.ts +1 -0
- package/node_modules/@vellumai/ces-contracts/src/trust-rules.ts +42 -0
- package/package.json +1 -1
- package/src/__tests__/archive-recall.test.ts +560 -0
- package/src/__tests__/conversation-clear-safety.test.ts +259 -0
- package/src/__tests__/conversation-switch-memory-reduction.test.ts +474 -0
- package/src/__tests__/credential-security-invariants.test.ts +2 -0
- package/src/__tests__/db-schedule-syntax-migration.test.ts +3 -0
- package/src/__tests__/memory-reducer-job.test.ts +538 -0
- package/src/__tests__/memory-reducer-scheduling.test.ts +473 -0
- package/src/__tests__/memory-reducer-types.test.ts +12 -4
- package/src/__tests__/memory-reducer.test.ts +7 -1
- package/src/__tests__/memory-regressions.test.ts +24 -4
- package/src/__tests__/memory-simplified-config.test.ts +4 -4
- package/src/__tests__/openai-whisper.test.ts +93 -0
- package/src/__tests__/simplified-memory-e2e.test.ts +666 -0
- package/src/__tests__/simplified-memory-runtime.test.ts +616 -0
- package/src/__tests__/slack-messaging-token-resolution.test.ts +319 -0
- package/src/__tests__/volume-security-guard.test.ts +155 -0
- package/src/cli/commands/conversations.ts +18 -0
- package/src/config/bundled-skills/messaging/tools/shared.ts +1 -0
- package/src/config/bundled-skills/schedule/TOOLS.json +8 -0
- package/src/config/bundled-skills/transcribe/tools/transcribe-media.ts +16 -37
- package/src/config/env-registry.ts +9 -0
- package/src/config/feature-flag-registry.json +8 -0
- package/src/config/loader.ts +0 -1
- package/src/config/schemas/memory-simplified.ts +1 -1
- package/src/credential-execution/managed-catalog.ts +5 -15
- package/src/daemon/config-watcher.ts +4 -1
- package/src/daemon/conversation-memory.ts +117 -0
- package/src/daemon/conversation-runtime-assembly.ts +1 -0
- package/src/daemon/daemon-control.ts +7 -0
- package/src/daemon/handlers/conversations.ts +11 -0
- package/src/daemon/lifecycle.ts +51 -2
- package/src/daemon/providers-setup.ts +2 -1
- package/src/hooks/manager.ts +7 -0
- package/src/instrument.ts +33 -1
- package/src/memory/archive-recall.ts +516 -0
- package/src/memory/brief-time.ts +5 -4
- package/src/memory/conversation-crud.ts +210 -0
- package/src/memory/conversation-key-store.ts +33 -4
- package/src/memory/db-init.ts +4 -0
- package/src/memory/embedding-local.ts +11 -5
- package/src/memory/job-handlers/backfill-simplified-memory.ts +462 -0
- package/src/memory/job-handlers/conversation-starters.ts +24 -30
- package/src/memory/job-handlers/reduce-conversation-memory.ts +229 -0
- package/src/memory/jobs-store.ts +2 -0
- package/src/memory/jobs-worker.ts +8 -0
- package/src/memory/migrations/036-normalize-phone-identities.ts +49 -14
- package/src/memory/migrations/135-backfill-contact-interaction-stats.ts +9 -1
- package/src/memory/migrations/141-rename-verification-table.ts +8 -0
- package/src/memory/migrations/142-rename-verification-session-id-column.ts +7 -2
- package/src/memory/migrations/174-rename-thread-starters-table.ts +8 -0
- package/src/memory/migrations/188-schedule-quiet-flag.ts +13 -0
- package/src/memory/migrations/index.ts +1 -0
- package/src/memory/reducer-scheduler.ts +242 -0
- package/src/memory/reducer-types.ts +9 -2
- package/src/memory/reducer.ts +25 -11
- package/src/memory/schema/infrastructure.ts +1 -0
- package/src/messaging/provider.ts +9 -0
- package/src/messaging/providers/slack/adapter.ts +29 -2
- package/src/oauth/connection-resolver.test.ts +22 -18
- package/src/oauth/connection-resolver.ts +92 -7
- package/src/oauth/platform-connection.test.ts +78 -69
- package/src/oauth/platform-connection.ts +12 -19
- package/src/permissions/trust-client.ts +343 -0
- package/src/permissions/trust-store-interface.ts +105 -0
- package/src/permissions/trust-store.ts +523 -36
- package/src/platform/client.test.ts +148 -0
- package/src/platform/client.ts +71 -0
- package/src/providers/speech-to-text/openai-whisper.test.ts +190 -0
- package/src/providers/speech-to-text/openai-whisper.ts +68 -0
- package/src/providers/speech-to-text/resolve.ts +9 -0
- package/src/providers/speech-to-text/types.ts +17 -0
- package/src/runtime/auth/route-policy.ts +10 -1
- package/src/runtime/http-server.ts +2 -2
- package/src/runtime/routes/conversation-management-routes.ts +88 -2
- package/src/runtime/routes/guardian-bootstrap-routes.ts +19 -7
- package/src/runtime/routes/inbound-message-handler.ts +27 -3
- package/src/runtime/routes/inbound-stages/acl-enforcement.ts +16 -1
- package/src/runtime/routes/inbound-stages/transcribe-audio.test.ts +287 -0
- package/src/runtime/routes/inbound-stages/transcribe-audio.ts +122 -0
- package/src/runtime/routes/log-export-routes.ts +1 -0
- package/src/runtime/routes/secret-routes.ts +5 -1
- package/src/schedule/schedule-store.ts +7 -0
- package/src/schedule/scheduler.ts +6 -2
- package/src/security/ces-credential-client.ts +173 -0
- package/src/security/secure-keys.ts +65 -22
- package/src/signals/bash.ts +3 -0
- package/src/signals/cancel.ts +3 -0
- package/src/signals/confirm.ts +3 -0
- package/src/signals/conversation-undo.ts +3 -0
- package/src/signals/event-stream.ts +7 -0
- package/src/signals/shotgun.ts +3 -0
- package/src/signals/trust-rule.ts +3 -0
- package/src/telemetry/usage-telemetry-reporter.test.ts +23 -36
- package/src/telemetry/usage-telemetry-reporter.ts +22 -20
- package/src/tools/filesystem/edit.ts +6 -1
- package/src/tools/filesystem/read.ts +6 -1
- package/src/tools/filesystem/write.ts +6 -1
- package/src/tools/memory/handlers.ts +129 -1
- package/src/tools/schedule/create.ts +3 -0
- package/src/tools/schedule/list.ts +5 -1
- package/src/tools/schedule/update.ts +6 -0
- package/src/util/device-id.ts +70 -7
- package/src/util/logger.ts +35 -9
- package/src/util/platform.ts +29 -5
- package/src/workspace/migrations/migrate-to-workspace-volume.ts +113 -0
- package/src/workspace/migrations/registry.ts +2 -0
|
@@ -28,6 +28,7 @@ type ServerWithRequestIP = {
|
|
|
28
28
|
): { address: string; family: string; port: number } | null;
|
|
29
29
|
};
|
|
30
30
|
import { isHttpAuthDisabled } from "../../config/env.js";
|
|
31
|
+
import { getIsContainerized } from "../../config/env-registry.js";
|
|
31
32
|
|
|
32
33
|
const log = getLogger("guardian-bootstrap");
|
|
33
34
|
|
|
@@ -86,19 +87,30 @@ export async function handleGuardianBootstrap(
|
|
|
86
87
|
req: Request,
|
|
87
88
|
server: ServerWithRequestIP,
|
|
88
89
|
): Promise<Response> {
|
|
90
|
+
// Reject non-private-network peers (allows loopback, Docker bridge, etc.)
|
|
91
|
+
const peerIp = server.requestIP(req)?.address;
|
|
92
|
+
if ((!peerIp || !isPrivateAddress(peerIp)) && !isHttpAuthDisabled()) {
|
|
93
|
+
return httpError("FORBIDDEN", "Bootstrap endpoint is local-only", 403);
|
|
94
|
+
}
|
|
95
|
+
|
|
89
96
|
// Reject requests forwarded from public networks. The gateway sets
|
|
90
97
|
// x-forwarded-for to the real client IP; if that IP is on a private
|
|
91
98
|
// network (loopback, Docker bridge, RFC 1918) the request is still
|
|
92
99
|
// considered local. Only reject when the forwarded IP is public.
|
|
100
|
+
//
|
|
101
|
+
// Skip this check when running in a container: the peer IP was already
|
|
102
|
+
// validated above (Docker bridge network = private), so the request
|
|
103
|
+
// reached us through a co-located gateway. The x-forwarded-for header
|
|
104
|
+
// reflects the original external client (e.g. platform proxy) and is
|
|
105
|
+
// not meaningful for local-only enforcement in this topology.
|
|
93
106
|
const forwarded = req.headers.get("x-forwarded-for");
|
|
94
107
|
const forwardedIp = forwarded ? forwarded.split(",")[0].trim() : null;
|
|
95
|
-
if (
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
if ((!peerIp || !isPrivateAddress(peerIp)) && !isHttpAuthDisabled()) {
|
|
108
|
+
if (
|
|
109
|
+
forwardedIp &&
|
|
110
|
+
!isPrivateAddress(forwardedIp) &&
|
|
111
|
+
!isHttpAuthDisabled() &&
|
|
112
|
+
!getIsContainerized()
|
|
113
|
+
) {
|
|
102
114
|
return httpError("FORBIDDEN", "Bootstrap endpoint is local-only", 403);
|
|
103
115
|
}
|
|
104
116
|
|
|
@@ -44,6 +44,7 @@ import { handleEditIntercept } from "./inbound-stages/edit-intercept.js";
|
|
|
44
44
|
import { handleEscalationIntercept } from "./inbound-stages/escalation-intercept.js";
|
|
45
45
|
import { handleGuardianReplyIntercept } from "./inbound-stages/guardian-reply-intercept.js";
|
|
46
46
|
import { runSecretIngressCheck } from "./inbound-stages/secret-ingress-check.js";
|
|
47
|
+
import { tryTranscribeAudioAttachments } from "./inbound-stages/transcribe-audio.js";
|
|
47
48
|
import { handleVerificationIntercept } from "./inbound-stages/verification-intercept.js";
|
|
48
49
|
|
|
49
50
|
const log = getLogger("runtime-http");
|
|
@@ -144,7 +145,7 @@ export async function handleChannelInbound(
|
|
|
144
145
|
return httpError("BAD_REQUEST", "content must be a string", 400);
|
|
145
146
|
}
|
|
146
147
|
|
|
147
|
-
|
|
148
|
+
let trimmedContent = typeof content === "string" ? content.trim() : "";
|
|
148
149
|
const hasAttachments =
|
|
149
150
|
Array.isArray(attachmentIds) && attachmentIds.length > 0;
|
|
150
151
|
|
|
@@ -227,6 +228,29 @@ export async function handleChannelInbound(
|
|
|
227
228
|
}
|
|
228
229
|
}
|
|
229
230
|
|
|
231
|
+
// Auto-transcribe audio attachments from channel messages
|
|
232
|
+
if (hasAttachments && sourceChannel) {
|
|
233
|
+
const transcribeResult = await tryTranscribeAudioAttachments(attachmentIds);
|
|
234
|
+
switch (transcribeResult.status) {
|
|
235
|
+
case "transcribed":
|
|
236
|
+
// For voice-only messages (empty content), this becomes the message text.
|
|
237
|
+
// For audio+caption, both are preserved.
|
|
238
|
+
trimmedContent =
|
|
239
|
+
transcribeResult.text +
|
|
240
|
+
(trimmedContent ? `\n\n${trimmedContent}` : "");
|
|
241
|
+
break;
|
|
242
|
+
case "no_provider":
|
|
243
|
+
case "error":
|
|
244
|
+
// Inject a hint so the assistant knows the user sent audio and why
|
|
245
|
+
// transcription failed — it can then guide the user (e.g. set up API key).
|
|
246
|
+
trimmedContent =
|
|
247
|
+
`[Voice message received — ${transcribeResult.reason}]` +
|
|
248
|
+
(trimmedContent ? `\n\n${trimmedContent}` : "");
|
|
249
|
+
break;
|
|
250
|
+
// "no_audio", "disabled" — no action needed
|
|
251
|
+
}
|
|
252
|
+
}
|
|
253
|
+
|
|
230
254
|
const sourceMessageId =
|
|
231
255
|
typeof sourceMetadata?.messageId === "string"
|
|
232
256
|
? sourceMetadata.messageId
|
|
@@ -333,7 +357,7 @@ export async function handleChannelInbound(
|
|
|
333
357
|
externalMessageId,
|
|
334
358
|
conversationId: result.conversationId,
|
|
335
359
|
eventId: result.eventId,
|
|
336
|
-
content,
|
|
360
|
+
content: trimmedContent,
|
|
337
361
|
attachmentIds,
|
|
338
362
|
sourceMetadata: body.sourceMetadata,
|
|
339
363
|
actorDisplayName: body.actorDisplayName,
|
|
@@ -612,7 +636,7 @@ export async function handleChannelInbound(
|
|
|
612
636
|
processMessage,
|
|
613
637
|
conversationId: result.conversationId,
|
|
614
638
|
eventId: result.eventId,
|
|
615
|
-
content:
|
|
639
|
+
content: trimmedContent,
|
|
616
640
|
attachmentIds: hasAttachments ? attachmentIds : undefined,
|
|
617
641
|
sourceChannel,
|
|
618
642
|
sourceInterface,
|
|
@@ -79,14 +79,29 @@ export interface AclResult {
|
|
|
79
79
|
guardianVerifyCode: string | undefined;
|
|
80
80
|
}
|
|
81
81
|
|
|
82
|
+
/**
|
|
83
|
+
* Strip Slack/Telegram mrkdwn formatting wrappers from a raw message.
|
|
84
|
+
* When users copy-paste a verification code from the desktop app with
|
|
85
|
+
* rich-text formatting (e.g. bold), Slack preserves it as `*code*` in
|
|
86
|
+
* the message text, which would otherwise fail the strict bare-code regex.
|
|
87
|
+
*/
|
|
88
|
+
function stripMrkdwnFormatting(text: string): string {
|
|
89
|
+
// Bold (*…*), italic (_…_), strikethrough (~…~), inline code (`…`)
|
|
90
|
+
return text.replace(/^[*_~`]+/, "").replace(/[*_~`]+$/, "");
|
|
91
|
+
}
|
|
92
|
+
|
|
82
93
|
/**
|
|
83
94
|
* Parse a guardian verification code from message content.
|
|
84
95
|
* Accepts a bare code as the entire message: 6-digit numeric OR 64-char hex
|
|
85
96
|
* (hex is retained for compatibility with unbound inbound/bootstrap sessions
|
|
86
97
|
* that intentionally use high-entropy secrets).
|
|
98
|
+
*
|
|
99
|
+
* Strips surrounding mrkdwn formatting characters first so that codes
|
|
100
|
+
* pasted with bold/italic/code formatting are still recognized.
|
|
87
101
|
*/
|
|
88
102
|
function parseGuardianVerifyCode(content: string): string | undefined {
|
|
89
|
-
const
|
|
103
|
+
const stripped = stripMrkdwnFormatting(content);
|
|
104
|
+
const bareMatch = stripped.match(/^([0-9a-fA-F]{64}|\d{6})$/);
|
|
90
105
|
if (bareMatch) return bareMatch[1];
|
|
91
106
|
|
|
92
107
|
return undefined;
|
|
@@ -0,0 +1,287 @@
|
|
|
1
|
+
import { afterEach, beforeEach, describe, expect, mock, test } from "bun:test";
|
|
2
|
+
|
|
3
|
+
import type { SpeechToTextProvider } from "../../../providers/speech-to-text/types.js";
|
|
4
|
+
|
|
5
|
+
// ---------------------------------------------------------------------------
|
|
6
|
+
// Mocks — must be set up before importing the module under test
|
|
7
|
+
// ---------------------------------------------------------------------------
|
|
8
|
+
|
|
9
|
+
let mockFeatureFlagEnabled = true;
|
|
10
|
+
let mockAttachments: Array<{
|
|
11
|
+
id: string;
|
|
12
|
+
mimeType: string;
|
|
13
|
+
dataBase64: string;
|
|
14
|
+
originalFilename: string;
|
|
15
|
+
sizeBytes: number;
|
|
16
|
+
kind: string;
|
|
17
|
+
thumbnailBase64: string | null;
|
|
18
|
+
createdAt: number;
|
|
19
|
+
}> = [];
|
|
20
|
+
let mockProvider: SpeechToTextProvider | null = null;
|
|
21
|
+
|
|
22
|
+
mock.module("../../../config/assistant-feature-flags.js", () => ({
|
|
23
|
+
isAssistantFeatureFlagEnabled: () => mockFeatureFlagEnabled,
|
|
24
|
+
}));
|
|
25
|
+
|
|
26
|
+
mock.module("../../../config/loader.js", () => ({
|
|
27
|
+
getConfig: () => ({ assistantFeatureFlagValues: {} }),
|
|
28
|
+
}));
|
|
29
|
+
|
|
30
|
+
mock.module("../../../memory/attachments-store.js", () => ({
|
|
31
|
+
getAttachmentsByIds: (ids: string[]) =>
|
|
32
|
+
mockAttachments.filter((a) => ids.includes(a.id)),
|
|
33
|
+
getAttachmentById: (id: string, _opts?: { hydrateFileData?: boolean }) =>
|
|
34
|
+
mockAttachments.find((a) => a.id === id) ?? null,
|
|
35
|
+
}));
|
|
36
|
+
|
|
37
|
+
mock.module("../../../providers/speech-to-text/resolve.js", () => ({
|
|
38
|
+
resolveSpeechToTextProvider: async () => mockProvider,
|
|
39
|
+
}));
|
|
40
|
+
|
|
41
|
+
mock.module("../../../util/logger.js", () => ({
|
|
42
|
+
getLogger: () => ({
|
|
43
|
+
debug: () => {},
|
|
44
|
+
info: () => {},
|
|
45
|
+
warn: () => {},
|
|
46
|
+
error: () => {},
|
|
47
|
+
}),
|
|
48
|
+
}));
|
|
49
|
+
|
|
50
|
+
// Import after mocks are installed
|
|
51
|
+
const { tryTranscribeAudioAttachments } = await import("./transcribe-audio.js");
|
|
52
|
+
|
|
53
|
+
// ---------------------------------------------------------------------------
|
|
54
|
+
// Helpers
|
|
55
|
+
// ---------------------------------------------------------------------------
|
|
56
|
+
|
|
57
|
+
function makeAudioAttachment(
|
|
58
|
+
id: string,
|
|
59
|
+
mimeType = "audio/ogg",
|
|
60
|
+
dataBase64 = Buffer.from("fake-audio-data").toString("base64"),
|
|
61
|
+
) {
|
|
62
|
+
return {
|
|
63
|
+
id,
|
|
64
|
+
mimeType,
|
|
65
|
+
dataBase64,
|
|
66
|
+
originalFilename: `voice-${id}.ogg`,
|
|
67
|
+
sizeBytes: Buffer.from(dataBase64, "base64").length,
|
|
68
|
+
kind: "document" as const,
|
|
69
|
+
thumbnailBase64: null,
|
|
70
|
+
createdAt: Date.now(),
|
|
71
|
+
};
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
function makeDocumentAttachment(id: string) {
|
|
75
|
+
return {
|
|
76
|
+
id,
|
|
77
|
+
mimeType: "application/pdf",
|
|
78
|
+
dataBase64: Buffer.from("fake-pdf").toString("base64"),
|
|
79
|
+
originalFilename: `doc-${id}.pdf`,
|
|
80
|
+
sizeBytes: 8,
|
|
81
|
+
kind: "document" as const,
|
|
82
|
+
thumbnailBase64: null,
|
|
83
|
+
createdAt: Date.now(),
|
|
84
|
+
};
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
function makeImageAttachment(id: string) {
|
|
88
|
+
return {
|
|
89
|
+
id,
|
|
90
|
+
mimeType: "image/png",
|
|
91
|
+
dataBase64: Buffer.from("fake-image").toString("base64"),
|
|
92
|
+
originalFilename: `photo-${id}.png`,
|
|
93
|
+
sizeBytes: 10,
|
|
94
|
+
kind: "image" as const,
|
|
95
|
+
thumbnailBase64: null,
|
|
96
|
+
createdAt: Date.now(),
|
|
97
|
+
};
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
// ---------------------------------------------------------------------------
|
|
101
|
+
// Tests
|
|
102
|
+
// ---------------------------------------------------------------------------
|
|
103
|
+
|
|
104
|
+
describe("tryTranscribeAudioAttachments", () => {
|
|
105
|
+
beforeEach(() => {
|
|
106
|
+
mockFeatureFlagEnabled = true;
|
|
107
|
+
mockAttachments = [];
|
|
108
|
+
mockProvider = null;
|
|
109
|
+
});
|
|
110
|
+
|
|
111
|
+
afterEach(() => {
|
|
112
|
+
mockAttachments = [];
|
|
113
|
+
});
|
|
114
|
+
|
|
115
|
+
test("audio attachment is transcribed and returns transcribed result", async () => {
|
|
116
|
+
const audio = makeAudioAttachment("a1");
|
|
117
|
+
mockAttachments = [audio];
|
|
118
|
+
mockProvider = {
|
|
119
|
+
transcribe: async () => ({ text: "Hello, how are you?" }),
|
|
120
|
+
};
|
|
121
|
+
|
|
122
|
+
const result = await tryTranscribeAudioAttachments(["a1"]);
|
|
123
|
+
|
|
124
|
+
expect(result).toEqual({
|
|
125
|
+
status: "transcribed",
|
|
126
|
+
text: "Hello, how are you?",
|
|
127
|
+
});
|
|
128
|
+
});
|
|
129
|
+
|
|
130
|
+
test("non-audio attachments return no_audio", async () => {
|
|
131
|
+
const doc = makeDocumentAttachment("d1");
|
|
132
|
+
const img = makeImageAttachment("i1");
|
|
133
|
+
mockAttachments = [doc, img];
|
|
134
|
+
mockProvider = {
|
|
135
|
+
transcribe: async () => ({ text: "should not be called" }),
|
|
136
|
+
};
|
|
137
|
+
|
|
138
|
+
const result = await tryTranscribeAudioAttachments(["d1", "i1"]);
|
|
139
|
+
|
|
140
|
+
expect(result).toEqual({ status: "no_audio" });
|
|
141
|
+
});
|
|
142
|
+
|
|
143
|
+
test("no API key returns no_provider with helpful reason string", async () => {
|
|
144
|
+
const audio = makeAudioAttachment("a1");
|
|
145
|
+
mockAttachments = [audio];
|
|
146
|
+
mockProvider = null; // No provider resolved
|
|
147
|
+
|
|
148
|
+
const result = await tryTranscribeAudioAttachments(["a1"]);
|
|
149
|
+
|
|
150
|
+
expect(result.status).toBe("no_provider");
|
|
151
|
+
expect((result as { reason: string }).reason).toContain(
|
|
152
|
+
"No OpenAI API key configured",
|
|
153
|
+
);
|
|
154
|
+
});
|
|
155
|
+
|
|
156
|
+
test("API failure returns error with reason", async () => {
|
|
157
|
+
const audio = makeAudioAttachment("a1");
|
|
158
|
+
mockAttachments = [audio];
|
|
159
|
+
mockProvider = {
|
|
160
|
+
transcribe: async () => {
|
|
161
|
+
throw new Error("API rate limit exceeded");
|
|
162
|
+
},
|
|
163
|
+
};
|
|
164
|
+
|
|
165
|
+
const result = await tryTranscribeAudioAttachments(["a1"]);
|
|
166
|
+
|
|
167
|
+
expect(result.status).toBe("error");
|
|
168
|
+
expect((result as { reason: string }).reason).toBe(
|
|
169
|
+
"API rate limit exceeded",
|
|
170
|
+
);
|
|
171
|
+
});
|
|
172
|
+
|
|
173
|
+
test("feature flag disabled returns disabled", async () => {
|
|
174
|
+
mockFeatureFlagEnabled = false;
|
|
175
|
+
const audio = makeAudioAttachment("a1");
|
|
176
|
+
mockAttachments = [audio];
|
|
177
|
+
|
|
178
|
+
const result = await tryTranscribeAudioAttachments(["a1"]);
|
|
179
|
+
|
|
180
|
+
expect(result).toEqual({ status: "disabled" });
|
|
181
|
+
});
|
|
182
|
+
|
|
183
|
+
test("30-second timeout fires and returns error without blocking", async () => {
|
|
184
|
+
const audio = makeAudioAttachment("a1");
|
|
185
|
+
mockAttachments = [audio];
|
|
186
|
+
mockProvider = {
|
|
187
|
+
transcribe: async (_audio, _mime, signal) => {
|
|
188
|
+
// Simulate a provider that respects the abort signal
|
|
189
|
+
return new Promise((_resolve, reject) => {
|
|
190
|
+
if (signal?.aborted) {
|
|
191
|
+
reject(new DOMException("The operation was aborted", "AbortError"));
|
|
192
|
+
return;
|
|
193
|
+
}
|
|
194
|
+
const onAbort = () => {
|
|
195
|
+
reject(new DOMException("The operation was aborted", "AbortError"));
|
|
196
|
+
};
|
|
197
|
+
signal?.addEventListener("abort", onAbort, { once: true });
|
|
198
|
+
});
|
|
199
|
+
},
|
|
200
|
+
};
|
|
201
|
+
|
|
202
|
+
// The timeout is 30s in the real code, but the test's mock provider
|
|
203
|
+
// aborts immediately when signaled. We verify the error path works
|
|
204
|
+
// by checking the result type. For a true timeout test we'd need
|
|
205
|
+
// to override the timeout constant, but this confirms the abort
|
|
206
|
+
// path produces the correct result.
|
|
207
|
+
// Instead, let's test with a provider that checks signal state:
|
|
208
|
+
mockProvider = {
|
|
209
|
+
transcribe: async () => {
|
|
210
|
+
throw new DOMException("The operation was aborted", "AbortError");
|
|
211
|
+
},
|
|
212
|
+
};
|
|
213
|
+
|
|
214
|
+
const result = await tryTranscribeAudioAttachments(["a1"]);
|
|
215
|
+
|
|
216
|
+
expect(result.status).toBe("error");
|
|
217
|
+
expect((result as { reason: string }).reason).toBe(
|
|
218
|
+
"Transcription timed out",
|
|
219
|
+
);
|
|
220
|
+
});
|
|
221
|
+
|
|
222
|
+
test("multiple audio attachments are transcribed and concatenated", async () => {
|
|
223
|
+
const a1 = makeAudioAttachment("a1");
|
|
224
|
+
const a2 = makeAudioAttachment("a2", "audio/mpeg");
|
|
225
|
+
mockAttachments = [a1, a2];
|
|
226
|
+
|
|
227
|
+
let callCount = 0;
|
|
228
|
+
mockProvider = {
|
|
229
|
+
transcribe: async () => {
|
|
230
|
+
callCount++;
|
|
231
|
+
return { text: callCount === 1 ? "First message" : "Second message" };
|
|
232
|
+
},
|
|
233
|
+
};
|
|
234
|
+
|
|
235
|
+
const result = await tryTranscribeAudioAttachments(["a1", "a2"]);
|
|
236
|
+
|
|
237
|
+
expect(result).toEqual({
|
|
238
|
+
status: "transcribed",
|
|
239
|
+
text: "First message\n\nSecond message",
|
|
240
|
+
});
|
|
241
|
+
expect(callCount).toBe(2);
|
|
242
|
+
});
|
|
243
|
+
|
|
244
|
+
test("mixed audio and non-audio attachments: only audio is transcribed", async () => {
|
|
245
|
+
const audio = makeAudioAttachment("a1");
|
|
246
|
+
const doc = makeDocumentAttachment("d1");
|
|
247
|
+
mockAttachments = [audio, doc];
|
|
248
|
+
|
|
249
|
+
let transcribeCallCount = 0;
|
|
250
|
+
mockProvider = {
|
|
251
|
+
transcribe: async () => {
|
|
252
|
+
transcribeCallCount++;
|
|
253
|
+
return { text: "Voice transcription" };
|
|
254
|
+
},
|
|
255
|
+
};
|
|
256
|
+
|
|
257
|
+
const result = await tryTranscribeAudioAttachments(["a1", "d1"]);
|
|
258
|
+
|
|
259
|
+
expect(result).toEqual({
|
|
260
|
+
status: "transcribed",
|
|
261
|
+
text: "Voice transcription",
|
|
262
|
+
});
|
|
263
|
+
expect(transcribeCallCount).toBe(1);
|
|
264
|
+
});
|
|
265
|
+
|
|
266
|
+
test("empty attachment IDs returns no_audio", async () => {
|
|
267
|
+
mockProvider = {
|
|
268
|
+
transcribe: async () => ({ text: "should not be called" }),
|
|
269
|
+
};
|
|
270
|
+
|
|
271
|
+
const result = await tryTranscribeAudioAttachments([]);
|
|
272
|
+
|
|
273
|
+
expect(result).toEqual({ status: "no_audio" });
|
|
274
|
+
});
|
|
275
|
+
|
|
276
|
+
test("attachment with empty transcription returns no_audio", async () => {
|
|
277
|
+
const audio = makeAudioAttachment("a1");
|
|
278
|
+
mockAttachments = [audio];
|
|
279
|
+
mockProvider = {
|
|
280
|
+
transcribe: async () => ({ text: " " }), // whitespace-only
|
|
281
|
+
};
|
|
282
|
+
|
|
283
|
+
const result = await tryTranscribeAudioAttachments(["a1"]);
|
|
284
|
+
|
|
285
|
+
expect(result).toEqual({ status: "no_audio" });
|
|
286
|
+
});
|
|
287
|
+
});
|
|
@@ -0,0 +1,122 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Auto-transcribe audio attachments from channel inbound messages.
|
|
3
|
+
*
|
|
4
|
+
* Returns a discriminated result type so callers can handle each outcome
|
|
5
|
+
* (transcribed, no audio, disabled, no provider, error) without exceptions.
|
|
6
|
+
* Never throws — failures are represented as result variants so that message
|
|
7
|
+
* delivery is never blocked by transcription issues.
|
|
8
|
+
*/
|
|
9
|
+
|
|
10
|
+
import { isAssistantFeatureFlagEnabled } from "../../../config/assistant-feature-flags.js";
|
|
11
|
+
import { getConfig } from "../../../config/loader.js";
|
|
12
|
+
import * as attachmentsStore from "../../../memory/attachments-store.js";
|
|
13
|
+
import { resolveSpeechToTextProvider } from "../../../providers/speech-to-text/resolve.js";
|
|
14
|
+
import { getLogger } from "../../../util/logger.js";
|
|
15
|
+
|
|
16
|
+
const log = getLogger("transcribe-audio");
|
|
17
|
+
|
|
18
|
+
const VOICE_TRANSCRIPTION_FLAG_KEY =
|
|
19
|
+
"feature_flags.channel-voice-transcription.enabled" as const;
|
|
20
|
+
|
|
21
|
+
/** Timeout for the entire transcription pipeline (all attachments). */
|
|
22
|
+
const TRANSCRIPTION_TIMEOUT_MS = 30_000;
|
|
23
|
+
|
|
24
|
+
// ---------------------------------------------------------------------------
|
|
25
|
+
// Result type
|
|
26
|
+
// ---------------------------------------------------------------------------
|
|
27
|
+
|
|
28
|
+
export type TranscribeResult =
|
|
29
|
+
| { status: "transcribed"; text: string }
|
|
30
|
+
| { status: "no_audio" }
|
|
31
|
+
| { status: "disabled" }
|
|
32
|
+
| { status: "no_provider"; reason: string }
|
|
33
|
+
| { status: "error"; reason: string };
|
|
34
|
+
|
|
35
|
+
// ---------------------------------------------------------------------------
|
|
36
|
+
// Public API
|
|
37
|
+
// ---------------------------------------------------------------------------
|
|
38
|
+
|
|
39
|
+
export async function tryTranscribeAudioAttachments(
|
|
40
|
+
attachmentIds: string[],
|
|
41
|
+
): Promise<TranscribeResult> {
|
|
42
|
+
try {
|
|
43
|
+
// Check feature flag
|
|
44
|
+
const config = getConfig();
|
|
45
|
+
if (!isAssistantFeatureFlagEnabled(VOICE_TRANSCRIPTION_FLAG_KEY, config)) {
|
|
46
|
+
return { status: "disabled" };
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
// Look up attachments and filter to audio MIME types
|
|
50
|
+
const resolved = attachmentsStore.getAttachmentsByIds(attachmentIds);
|
|
51
|
+
const audioAttachments = resolved.filter((a) =>
|
|
52
|
+
a.mimeType.startsWith("audio/"),
|
|
53
|
+
);
|
|
54
|
+
|
|
55
|
+
if (audioAttachments.length === 0) {
|
|
56
|
+
return { status: "no_audio" };
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
// Resolve STT provider
|
|
60
|
+
const provider = await resolveSpeechToTextProvider();
|
|
61
|
+
if (!provider) {
|
|
62
|
+
return {
|
|
63
|
+
status: "no_provider",
|
|
64
|
+
reason:
|
|
65
|
+
"No OpenAI API key configured. Set one up to enable voice message transcription.",
|
|
66
|
+
};
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
// Transcribe each audio attachment with a shared timeout
|
|
70
|
+
const abortController = new AbortController();
|
|
71
|
+
const timeoutId = setTimeout(
|
|
72
|
+
() => abortController.abort(),
|
|
73
|
+
TRANSCRIPTION_TIMEOUT_MS,
|
|
74
|
+
);
|
|
75
|
+
|
|
76
|
+
try {
|
|
77
|
+
const transcriptions: string[] = [];
|
|
78
|
+
|
|
79
|
+
for (const attachment of audioAttachments) {
|
|
80
|
+
// Hydrate the base64 data for the attachment
|
|
81
|
+
const hydrated = attachmentsStore.getAttachmentById(attachment.id, {
|
|
82
|
+
hydrateFileData: true,
|
|
83
|
+
});
|
|
84
|
+
if (!hydrated || !hydrated.dataBase64) {
|
|
85
|
+
log.warn(
|
|
86
|
+
{ attachmentId: attachment.id },
|
|
87
|
+
"Could not hydrate audio attachment data; skipping",
|
|
88
|
+
);
|
|
89
|
+
continue;
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
const buffer = Buffer.from(hydrated.dataBase64, "base64");
|
|
93
|
+
const result = await provider.transcribe(
|
|
94
|
+
buffer,
|
|
95
|
+
attachment.mimeType,
|
|
96
|
+
abortController.signal,
|
|
97
|
+
);
|
|
98
|
+
|
|
99
|
+
if (result.text.trim()) {
|
|
100
|
+
transcriptions.push(result.text.trim());
|
|
101
|
+
}
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
if (transcriptions.length === 0) {
|
|
105
|
+
return { status: "no_audio" };
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
return { status: "transcribed", text: transcriptions.join("\n\n") };
|
|
109
|
+
} finally {
|
|
110
|
+
clearTimeout(timeoutId);
|
|
111
|
+
}
|
|
112
|
+
} catch (err: unknown) {
|
|
113
|
+
const reason =
|
|
114
|
+
err instanceof Error
|
|
115
|
+
? err.name === "AbortError"
|
|
116
|
+
? "Transcription timed out"
|
|
117
|
+
: err.message
|
|
118
|
+
: String(err);
|
|
119
|
+
log.warn({ err }, "Audio transcription failed");
|
|
120
|
+
return { status: "error", reason };
|
|
121
|
+
}
|
|
122
|
+
}
|
|
@@ -10,7 +10,7 @@ import {
|
|
|
10
10
|
invalidateConfigCache,
|
|
11
11
|
} from "../../config/loader.js";
|
|
12
12
|
import type { CesClient } from "../../credential-execution/client.js";
|
|
13
|
-
import { setSentryOrganizationId } from "../../instrument.js";
|
|
13
|
+
import { setSentryOrganizationId, setSentryUserId } from "../../instrument.js";
|
|
14
14
|
import { clearEmbeddingBackendCache } from "../../memory/embedding-backend.js";
|
|
15
15
|
import { syncManualTokenConnection } from "../../oauth/manual-token-connection.js";
|
|
16
16
|
import { validateAnthropicApiKey } from "../../providers/anthropic/client.js";
|
|
@@ -182,6 +182,7 @@ export async function handleAddSecret(
|
|
|
182
182
|
500,
|
|
183
183
|
);
|
|
184
184
|
}
|
|
185
|
+
clearEmbeddingBackendCache();
|
|
185
186
|
invalidateConfigCache();
|
|
186
187
|
await initializeProviders(getConfig());
|
|
187
188
|
log.info({ provider: name }, "API key updated via HTTP");
|
|
@@ -234,6 +235,7 @@ export async function handleAddSecret(
|
|
|
234
235
|
setSentryOrganizationId(undefined);
|
|
235
236
|
} else if (field === "platform_user_id") {
|
|
236
237
|
setPlatformUserId(undefined);
|
|
238
|
+
setSentryUserId(undefined);
|
|
237
239
|
}
|
|
238
240
|
deleteCredentialMetadata(service, field);
|
|
239
241
|
} else {
|
|
@@ -259,6 +261,7 @@ export async function handleAddSecret(
|
|
|
259
261
|
}
|
|
260
262
|
if (service === "vellum" && field === "platform_user_id") {
|
|
261
263
|
setPlatformUserId(effectiveValue || undefined);
|
|
264
|
+
setSentryUserId(effectiveValue || undefined);
|
|
262
265
|
}
|
|
263
266
|
}
|
|
264
267
|
if (isManagedProxyCredential(service, field)) {
|
|
@@ -394,6 +397,7 @@ export async function handleDeleteSecret(req: Request): Promise<Response> {
|
|
|
394
397
|
}
|
|
395
398
|
if (service === "vellum" && field === "platform_user_id") {
|
|
396
399
|
setPlatformUserId(undefined);
|
|
400
|
+
setSentryUserId(undefined);
|
|
397
401
|
}
|
|
398
402
|
if (isManagedProxyCredential(service, field)) {
|
|
399
403
|
await initializeProviders(getConfig());
|
|
@@ -35,6 +35,7 @@ export interface ScheduleJob {
|
|
|
35
35
|
mode: ScheduleMode;
|
|
36
36
|
routingIntent: RoutingIntent;
|
|
37
37
|
routingHints: Record<string, unknown>;
|
|
38
|
+
quiet: boolean;
|
|
38
39
|
status: ScheduleStatus;
|
|
39
40
|
createdAt: number;
|
|
40
41
|
updatedAt: number;
|
|
@@ -91,6 +92,7 @@ export function createSchedule(params: {
|
|
|
91
92
|
mode?: ScheduleMode;
|
|
92
93
|
routingIntent?: RoutingIntent;
|
|
93
94
|
routingHints?: Record<string, unknown>;
|
|
95
|
+
quiet?: boolean;
|
|
94
96
|
}): ScheduleJob {
|
|
95
97
|
const expression = params.expression ?? params.cronExpression ?? null;
|
|
96
98
|
const isOneShot = expression == null;
|
|
@@ -118,6 +120,7 @@ export function createSchedule(params: {
|
|
|
118
120
|
const mode = params.mode ?? "execute";
|
|
119
121
|
const routingIntent = params.routingIntent ?? "all_channels";
|
|
120
122
|
const routingHints = params.routingHints ?? {};
|
|
123
|
+
const quiet = params.quiet ?? false;
|
|
121
124
|
|
|
122
125
|
let nextRunAt: number;
|
|
123
126
|
if (isOneShot) {
|
|
@@ -144,6 +147,7 @@ export function createSchedule(params: {
|
|
|
144
147
|
mode,
|
|
145
148
|
routingIntent,
|
|
146
149
|
routingHintsJson: JSON.stringify(routingHints),
|
|
150
|
+
quiet,
|
|
147
151
|
status: "active" as ScheduleStatus,
|
|
148
152
|
createdAt: now,
|
|
149
153
|
updatedAt: now,
|
|
@@ -236,6 +240,7 @@ export function updateSchedule(
|
|
|
236
240
|
mode?: ScheduleMode;
|
|
237
241
|
routingIntent?: RoutingIntent;
|
|
238
242
|
routingHints?: Record<string, unknown>;
|
|
243
|
+
quiet?: boolean;
|
|
239
244
|
},
|
|
240
245
|
): ScheduleJob | null {
|
|
241
246
|
const db = getDb();
|
|
@@ -290,6 +295,7 @@ export function updateSchedule(
|
|
|
290
295
|
set.routingIntent = updates.routingIntent;
|
|
291
296
|
if (updates.routingHints !== undefined)
|
|
292
297
|
set.routingHintsJson = JSON.stringify(updates.routingHints);
|
|
298
|
+
if (updates.quiet !== undefined) set.quiet = updates.quiet;
|
|
293
299
|
|
|
294
300
|
// Recompute nextRunAt if schedule timing may have changed (only for recurring)
|
|
295
301
|
if (
|
|
@@ -771,6 +777,7 @@ function parseJobRow(row: typeof scheduleJobs.$inferSelect): ScheduleJob {
|
|
|
771
777
|
mode: (row.mode ?? "execute") as ScheduleMode,
|
|
772
778
|
routingIntent: (row.routingIntent ?? "all_channels") as RoutingIntent,
|
|
773
779
|
routingHints: safeParseJson(row.routingHintsJson),
|
|
780
|
+
quiet: row.quiet ?? false,
|
|
774
781
|
status: (row.status ?? "active") as ScheduleStatus,
|
|
775
782
|
createdAt: row.createdAt,
|
|
776
783
|
updatedAt: row.updatedAt,
|
|
@@ -206,7 +206,9 @@ async function runScheduleOnce(
|
|
|
206
206
|
if (isOneShot) failOneShot(job.id);
|
|
207
207
|
} else {
|
|
208
208
|
completeScheduleRun(runId, { status: "ok" });
|
|
209
|
-
|
|
209
|
+
if (!job.quiet) {
|
|
210
|
+
notifySchedule({ id: job.id, name: job.name });
|
|
211
|
+
}
|
|
210
212
|
if (isOneShot) completeOneShot(job.id);
|
|
211
213
|
}
|
|
212
214
|
processed += 1;
|
|
@@ -278,7 +280,9 @@ async function runScheduleOnce(
|
|
|
278
280
|
trustClass: "guardian",
|
|
279
281
|
});
|
|
280
282
|
completeScheduleRun(runId, { status: "ok" });
|
|
281
|
-
|
|
283
|
+
if (!job.quiet) {
|
|
284
|
+
notifySchedule({ id: job.id, name: job.name });
|
|
285
|
+
}
|
|
282
286
|
if (isOneShot) completeOneShot(job.id);
|
|
283
287
|
processed += 1;
|
|
284
288
|
} catch (err) {
|