@vellumai/assistant 0.5.4 → 0.5.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/Dockerfile +18 -27
- package/node_modules/@vellumai/ces-contracts/src/index.ts +1 -0
- package/node_modules/@vellumai/ces-contracts/src/trust-rules.ts +42 -0
- package/package.json +1 -1
- package/src/__tests__/credential-security-invariants.test.ts +2 -0
- package/src/__tests__/openai-whisper.test.ts +93 -0
- package/src/__tests__/slack-messaging-token-resolution.test.ts +319 -0
- package/src/__tests__/volume-security-guard.test.ts +155 -0
- package/src/config/bundled-skills/messaging/tools/shared.ts +1 -0
- package/src/config/bundled-skills/transcribe/tools/transcribe-media.ts +16 -37
- package/src/config/env-registry.ts +9 -0
- package/src/config/feature-flag-registry.json +8 -0
- package/src/credential-execution/managed-catalog.ts +5 -15
- package/src/daemon/config-watcher.ts +4 -1
- package/src/daemon/daemon-control.ts +7 -0
- package/src/daemon/lifecycle.ts +7 -1
- package/src/daemon/providers-setup.ts +2 -1
- package/src/hooks/manager.ts +7 -0
- package/src/instrument.ts +33 -1
- package/src/memory/embedding-local.ts +11 -5
- package/src/memory/job-handlers/conversation-starters.ts +24 -36
- package/src/messaging/provider.ts +9 -0
- package/src/messaging/providers/slack/adapter.ts +29 -2
- package/src/oauth/connection-resolver.test.ts +22 -18
- package/src/oauth/connection-resolver.ts +92 -7
- package/src/oauth/platform-connection.test.ts +78 -69
- package/src/oauth/platform-connection.ts +12 -19
- package/src/permissions/trust-client.ts +343 -0
- package/src/permissions/trust-store-interface.ts +105 -0
- package/src/permissions/trust-store.ts +523 -36
- package/src/platform/client.test.ts +148 -0
- package/src/platform/client.ts +71 -0
- package/src/providers/speech-to-text/openai-whisper.test.ts +190 -0
- package/src/providers/speech-to-text/openai-whisper.ts +68 -0
- package/src/providers/speech-to-text/resolve.ts +9 -0
- package/src/providers/speech-to-text/types.ts +17 -0
- package/src/runtime/http-server.ts +2 -2
- package/src/runtime/routes/inbound-message-handler.ts +27 -3
- package/src/runtime/routes/inbound-stages/acl-enforcement.ts +16 -1
- package/src/runtime/routes/inbound-stages/transcribe-audio.test.ts +287 -0
- package/src/runtime/routes/inbound-stages/transcribe-audio.ts +122 -0
- package/src/runtime/routes/log-export-routes.ts +1 -0
- package/src/runtime/routes/secret-routes.ts +4 -1
- package/src/security/ces-credential-client.ts +173 -0
- package/src/security/secure-keys.ts +65 -22
- package/src/signals/bash.ts +3 -0
- package/src/signals/cancel.ts +3 -0
- package/src/signals/confirm.ts +3 -0
- package/src/signals/conversation-undo.ts +3 -0
- package/src/signals/event-stream.ts +7 -0
- package/src/signals/shotgun.ts +3 -0
- package/src/signals/trust-rule.ts +3 -0
- package/src/telemetry/usage-telemetry-reporter.test.ts +23 -36
- package/src/telemetry/usage-telemetry-reporter.ts +21 -19
- package/src/util/device-id.ts +70 -7
- package/src/util/logger.ts +35 -9
- package/src/util/platform.ts +29 -5
- package/src/workspace/migrations/migrate-to-workspace-volume.ts +113 -0
- package/src/workspace/migrations/registry.ts +2 -0
|
@@ -0,0 +1,148 @@
|
|
|
1
|
+
import { afterEach, beforeEach, describe, expect, mock, test } from "bun:test";
|
|
2
|
+
|
|
3
|
+
// ---------------------------------------------------------------------------
|
|
4
|
+
// Mutable mock state
|
|
5
|
+
// ---------------------------------------------------------------------------
|
|
6
|
+
|
|
7
|
+
let mockManagedProxyCtx = {
|
|
8
|
+
enabled: false,
|
|
9
|
+
platformBaseUrl: "",
|
|
10
|
+
assistantApiKey: "",
|
|
11
|
+
};
|
|
12
|
+
let mockAssistantId = "";
|
|
13
|
+
|
|
14
|
+
// ---------------------------------------------------------------------------
|
|
15
|
+
// Module mocks
|
|
16
|
+
// ---------------------------------------------------------------------------
|
|
17
|
+
|
|
18
|
+
mock.module("../providers/managed-proxy/context.js", () => ({
|
|
19
|
+
resolveManagedProxyContext: async () => mockManagedProxyCtx,
|
|
20
|
+
}));
|
|
21
|
+
|
|
22
|
+
mock.module("../config/env.js", () => ({
|
|
23
|
+
getPlatformAssistantId: () => mockAssistantId,
|
|
24
|
+
}));
|
|
25
|
+
|
|
26
|
+
// ---------------------------------------------------------------------------
|
|
27
|
+
// Import under test (after mocks)
|
|
28
|
+
// ---------------------------------------------------------------------------
|
|
29
|
+
|
|
30
|
+
import { VellumPlatformClient } from "./client.js";
|
|
31
|
+
|
|
32
|
+
// ---------------------------------------------------------------------------
|
|
33
|
+
// Tests
|
|
34
|
+
// ---------------------------------------------------------------------------
|
|
35
|
+
|
|
36
|
+
describe("VellumPlatformClient", () => {
|
|
37
|
+
let originalFetch: typeof globalThis.fetch;
|
|
38
|
+
|
|
39
|
+
beforeEach(() => {
|
|
40
|
+
originalFetch = globalThis.fetch;
|
|
41
|
+
mockManagedProxyCtx = {
|
|
42
|
+
enabled: true,
|
|
43
|
+
platformBaseUrl: "https://platform.example.com",
|
|
44
|
+
assistantApiKey: "sk-test-key",
|
|
45
|
+
};
|
|
46
|
+
mockAssistantId = "asst-123";
|
|
47
|
+
});
|
|
48
|
+
|
|
49
|
+
afterEach(() => {
|
|
50
|
+
globalThis.fetch = originalFetch;
|
|
51
|
+
});
|
|
52
|
+
|
|
53
|
+
describe("create()", () => {
|
|
54
|
+
test("returns a client when all prerequisites are met", async () => {
|
|
55
|
+
const client = await VellumPlatformClient.create();
|
|
56
|
+
expect(client).not.toBeNull();
|
|
57
|
+
expect(client!.baseUrl).toBe("https://platform.example.com");
|
|
58
|
+
expect(client!.assistantApiKey).toBe("sk-test-key");
|
|
59
|
+
expect(client!.platformAssistantId).toBe("asst-123");
|
|
60
|
+
});
|
|
61
|
+
|
|
62
|
+
test("returns null when managed proxy is not enabled", async () => {
|
|
63
|
+
mockManagedProxyCtx = {
|
|
64
|
+
enabled: false,
|
|
65
|
+
platformBaseUrl: "",
|
|
66
|
+
assistantApiKey: "",
|
|
67
|
+
};
|
|
68
|
+
|
|
69
|
+
const client = await VellumPlatformClient.create();
|
|
70
|
+
expect(client).toBeNull();
|
|
71
|
+
});
|
|
72
|
+
|
|
73
|
+
test("returns client with empty assistantId when assistant ID is missing", async () => {
|
|
74
|
+
mockAssistantId = "";
|
|
75
|
+
|
|
76
|
+
const client = await VellumPlatformClient.create();
|
|
77
|
+
expect(client).not.toBeNull();
|
|
78
|
+
expect(client!.platformAssistantId).toBe("");
|
|
79
|
+
});
|
|
80
|
+
|
|
81
|
+
test("strips trailing slash from platform base URL", async () => {
|
|
82
|
+
mockManagedProxyCtx.platformBaseUrl = "https://platform.example.com///";
|
|
83
|
+
|
|
84
|
+
const client = await VellumPlatformClient.create();
|
|
85
|
+
expect(client!.baseUrl).toBe("https://platform.example.com");
|
|
86
|
+
});
|
|
87
|
+
});
|
|
88
|
+
|
|
89
|
+
describe("fetch()", () => {
|
|
90
|
+
test("prepends base URL to path", async () => {
|
|
91
|
+
globalThis.fetch = mock(async (url: string | URL | Request) => {
|
|
92
|
+
expect(String(url)).toBe(
|
|
93
|
+
"https://platform.example.com/v1/some/endpoint/",
|
|
94
|
+
);
|
|
95
|
+
return new Response("ok", { status: 200 });
|
|
96
|
+
}) as unknown as typeof globalThis.fetch;
|
|
97
|
+
|
|
98
|
+
const client = await VellumPlatformClient.create();
|
|
99
|
+
await client!.fetch("/v1/some/endpoint/");
|
|
100
|
+
});
|
|
101
|
+
|
|
102
|
+
test("injects Api-Key auth header", async () => {
|
|
103
|
+
globalThis.fetch = mock(
|
|
104
|
+
async (_url: string | URL | Request, init?: RequestInit) => {
|
|
105
|
+
const headers = new Headers(init?.headers);
|
|
106
|
+
expect(headers.get("Authorization")).toBe("Api-Key sk-test-key");
|
|
107
|
+
return new Response("ok", { status: 200 });
|
|
108
|
+
},
|
|
109
|
+
) as unknown as typeof globalThis.fetch;
|
|
110
|
+
|
|
111
|
+
const client = await VellumPlatformClient.create();
|
|
112
|
+
await client!.fetch("/v1/test/");
|
|
113
|
+
});
|
|
114
|
+
|
|
115
|
+
test("preserves caller-provided headers", async () => {
|
|
116
|
+
globalThis.fetch = mock(
|
|
117
|
+
async (_url: string | URL | Request, init?: RequestInit) => {
|
|
118
|
+
const headers = new Headers(init?.headers);
|
|
119
|
+
expect(headers.get("Content-Type")).toBe("application/json");
|
|
120
|
+
expect(headers.get("Authorization")).toBe("Api-Key sk-test-key");
|
|
121
|
+
return new Response("ok", { status: 200 });
|
|
122
|
+
},
|
|
123
|
+
) as unknown as typeof globalThis.fetch;
|
|
124
|
+
|
|
125
|
+
const client = await VellumPlatformClient.create();
|
|
126
|
+
await client!.fetch("/v1/test/", {
|
|
127
|
+
method: "POST",
|
|
128
|
+
headers: { "Content-Type": "application/json" },
|
|
129
|
+
});
|
|
130
|
+
});
|
|
131
|
+
|
|
132
|
+
test("passes through request init options", async () => {
|
|
133
|
+
globalThis.fetch = mock(
|
|
134
|
+
async (_url: string | URL | Request, init?: RequestInit) => {
|
|
135
|
+
expect(init?.method).toBe("POST");
|
|
136
|
+
expect(init?.body).toBe('{"key":"value"}');
|
|
137
|
+
return new Response("ok", { status: 200 });
|
|
138
|
+
},
|
|
139
|
+
) as unknown as typeof globalThis.fetch;
|
|
140
|
+
|
|
141
|
+
const client = await VellumPlatformClient.create();
|
|
142
|
+
await client!.fetch("/v1/test/", {
|
|
143
|
+
method: "POST",
|
|
144
|
+
body: '{"key":"value"}',
|
|
145
|
+
});
|
|
146
|
+
});
|
|
147
|
+
});
|
|
148
|
+
});
|
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Centralized platform API client.
|
|
3
|
+
*
|
|
4
|
+
* Owns managed proxy context resolution, prerequisite validation, and
|
|
5
|
+
* authenticated fetch for all platform API calls that use Api-Key auth.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
import { getPlatformAssistantId } from "../config/env.js";
|
|
9
|
+
import { resolveManagedProxyContext } from "../providers/managed-proxy/context.js";
|
|
10
|
+
|
|
11
|
+
export class VellumPlatformClient {
|
|
12
|
+
private readonly platformBaseUrl: string;
|
|
13
|
+
private readonly apiKey: string;
|
|
14
|
+
private readonly assistantId: string;
|
|
15
|
+
|
|
16
|
+
private constructor(
|
|
17
|
+
platformBaseUrl: string,
|
|
18
|
+
apiKey: string,
|
|
19
|
+
assistantId: string,
|
|
20
|
+
) {
|
|
21
|
+
this.platformBaseUrl = platformBaseUrl.replace(/\/+$/, "");
|
|
22
|
+
this.apiKey = apiKey;
|
|
23
|
+
this.assistantId = assistantId;
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
/**
|
|
27
|
+
* Create a platform client by resolving managed proxy context.
|
|
28
|
+
*
|
|
29
|
+
* Returns `null` when auth prerequisites are missing (not logged in, no API
|
|
30
|
+
* key). The assistant ID is resolved but not required — callers that need it
|
|
31
|
+
* should check `platformAssistantId` themselves.
|
|
32
|
+
*/
|
|
33
|
+
static async create(): Promise<VellumPlatformClient | null> {
|
|
34
|
+
const ctx = await resolveManagedProxyContext();
|
|
35
|
+
if (!ctx.enabled) return null;
|
|
36
|
+
|
|
37
|
+
const assistantId = getPlatformAssistantId();
|
|
38
|
+
|
|
39
|
+
return new VellumPlatformClient(
|
|
40
|
+
ctx.platformBaseUrl,
|
|
41
|
+
ctx.assistantApiKey,
|
|
42
|
+
assistantId,
|
|
43
|
+
);
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
/**
|
|
47
|
+
* Authenticated fetch against the platform API.
|
|
48
|
+
*
|
|
49
|
+
* Prepends `platformBaseUrl` to `path` and injects the `Api-Key` auth header.
|
|
50
|
+
* Callers handle response parsing and domain-specific error mapping.
|
|
51
|
+
*/
|
|
52
|
+
async fetch(path: string, init?: RequestInit): Promise<Response> {
|
|
53
|
+
const url = `${this.platformBaseUrl}${path}`;
|
|
54
|
+
const headers = new Headers(init?.headers);
|
|
55
|
+
headers.set("Authorization", `Api-Key ${this.apiKey}`);
|
|
56
|
+
|
|
57
|
+
return fetch(url, { ...init, headers });
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
get baseUrl(): string {
|
|
61
|
+
return this.platformBaseUrl;
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
get assistantApiKey(): string {
|
|
65
|
+
return this.apiKey;
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
get platformAssistantId(): string {
|
|
69
|
+
return this.assistantId;
|
|
70
|
+
}
|
|
71
|
+
}
|
|
@@ -0,0 +1,190 @@
|
|
|
1
|
+
import { afterEach, beforeEach, describe, expect, mock, test } from "bun:test";
|
|
2
|
+
|
|
3
|
+
// ---------------------------------------------------------------------------
|
|
4
|
+
// Module mocks (must precede dynamic imports)
|
|
5
|
+
// ---------------------------------------------------------------------------
|
|
6
|
+
|
|
7
|
+
let mockOpenAIKey: string | undefined;
|
|
8
|
+
|
|
9
|
+
mock.module("../../security/secure-keys.js", () => ({
|
|
10
|
+
getProviderKeyAsync: async (provider: string) =>
|
|
11
|
+
provider === "openai" ? mockOpenAIKey : undefined,
|
|
12
|
+
}));
|
|
13
|
+
|
|
14
|
+
// Dynamic import so the mock is in effect when resolve.ts loads.
|
|
15
|
+
const { resolveSpeechToTextProvider } = await import("./resolve.js");
|
|
16
|
+
|
|
17
|
+
import { OpenAIWhisperProvider } from "./openai-whisper.js";
|
|
18
|
+
|
|
19
|
+
const TEST_API_KEY = "sk-test-key-for-unit-tests";
|
|
20
|
+
|
|
21
|
+
describe("OpenAIWhisperProvider", () => {
|
|
22
|
+
let originalFetch: typeof globalThis.fetch;
|
|
23
|
+
|
|
24
|
+
beforeEach(() => {
|
|
25
|
+
originalFetch = globalThis.fetch;
|
|
26
|
+
});
|
|
27
|
+
|
|
28
|
+
afterEach(() => {
|
|
29
|
+
globalThis.fetch = originalFetch;
|
|
30
|
+
});
|
|
31
|
+
|
|
32
|
+
test("successful transcription returns text", async () => {
|
|
33
|
+
globalThis.fetch = (async (
|
|
34
|
+
_url: string | URL | Request,
|
|
35
|
+
_init?: RequestInit,
|
|
36
|
+
) => {
|
|
37
|
+
return new Response(JSON.stringify({ text: " Hello, world! " }), {
|
|
38
|
+
status: 200,
|
|
39
|
+
headers: { "Content-Type": "application/json" },
|
|
40
|
+
});
|
|
41
|
+
}) as typeof fetch;
|
|
42
|
+
|
|
43
|
+
const provider = new OpenAIWhisperProvider(TEST_API_KEY);
|
|
44
|
+
const result = await provider.transcribe(
|
|
45
|
+
Buffer.from("fake-audio"),
|
|
46
|
+
"audio/ogg",
|
|
47
|
+
);
|
|
48
|
+
|
|
49
|
+
expect(result).toEqual({ text: "Hello, world!" });
|
|
50
|
+
});
|
|
51
|
+
|
|
52
|
+
test("API error throws with status and partial body", async () => {
|
|
53
|
+
const errorBody = JSON.stringify({
|
|
54
|
+
error: {
|
|
55
|
+
message: "Invalid API key provided",
|
|
56
|
+
type: "invalid_request_error",
|
|
57
|
+
},
|
|
58
|
+
});
|
|
59
|
+
|
|
60
|
+
globalThis.fetch = (async (
|
|
61
|
+
_url: string | URL | Request,
|
|
62
|
+
_init?: RequestInit,
|
|
63
|
+
) => {
|
|
64
|
+
return new Response(errorBody, {
|
|
65
|
+
status: 401,
|
|
66
|
+
headers: { "Content-Type": "application/json" },
|
|
67
|
+
});
|
|
68
|
+
}) as typeof fetch;
|
|
69
|
+
|
|
70
|
+
const provider = new OpenAIWhisperProvider(TEST_API_KEY);
|
|
71
|
+
|
|
72
|
+
await expect(
|
|
73
|
+
provider.transcribe(Buffer.from("fake-audio"), "audio/wav"),
|
|
74
|
+
).rejects.toThrow("Whisper API error (401)");
|
|
75
|
+
});
|
|
76
|
+
|
|
77
|
+
test("sends correct FormData structure (model=whisper-1, file blob with correct MIME)", async () => {
|
|
78
|
+
let capturedBody: FormData | undefined;
|
|
79
|
+
let capturedHeaders: HeadersInit | undefined;
|
|
80
|
+
|
|
81
|
+
globalThis.fetch = (async (
|
|
82
|
+
url: string | URL | Request,
|
|
83
|
+
init?: RequestInit,
|
|
84
|
+
) => {
|
|
85
|
+
capturedBody = init?.body as FormData;
|
|
86
|
+
capturedHeaders = init?.headers;
|
|
87
|
+
expect(url).toBe("https://api.openai.com/v1/audio/transcriptions");
|
|
88
|
+
expect(init?.method).toBe("POST");
|
|
89
|
+
|
|
90
|
+
return new Response(JSON.stringify({ text: "transcribed" }), {
|
|
91
|
+
status: 200,
|
|
92
|
+
headers: { "Content-Type": "application/json" },
|
|
93
|
+
});
|
|
94
|
+
}) as typeof fetch;
|
|
95
|
+
|
|
96
|
+
const provider = new OpenAIWhisperProvider(TEST_API_KEY);
|
|
97
|
+
await provider.transcribe(Buffer.from("fake-audio"), "audio/mpeg");
|
|
98
|
+
|
|
99
|
+
// Verify authorization header
|
|
100
|
+
expect(capturedHeaders).toEqual({
|
|
101
|
+
Authorization: `Bearer ${TEST_API_KEY}`,
|
|
102
|
+
});
|
|
103
|
+
|
|
104
|
+
// Verify FormData contents
|
|
105
|
+
expect(capturedBody).toBeInstanceOf(FormData);
|
|
106
|
+
const model = capturedBody!.get("model");
|
|
107
|
+
expect(model).toBe("whisper-1");
|
|
108
|
+
|
|
109
|
+
const file = capturedBody!.get("file");
|
|
110
|
+
expect(file).toBeInstanceOf(Blob);
|
|
111
|
+
expect((file as Blob).type).toBe("audio/mpeg");
|
|
112
|
+
});
|
|
113
|
+
|
|
114
|
+
test("returns empty text when API returns empty text field", async () => {
|
|
115
|
+
globalThis.fetch = (async () => {
|
|
116
|
+
return new Response(JSON.stringify({ text: "" }), {
|
|
117
|
+
status: 200,
|
|
118
|
+
headers: { "Content-Type": "application/json" },
|
|
119
|
+
});
|
|
120
|
+
}) as unknown as typeof fetch;
|
|
121
|
+
|
|
122
|
+
const provider = new OpenAIWhisperProvider(TEST_API_KEY);
|
|
123
|
+
const result = await provider.transcribe(
|
|
124
|
+
Buffer.from("silence"),
|
|
125
|
+
"audio/wav",
|
|
126
|
+
);
|
|
127
|
+
|
|
128
|
+
expect(result).toEqual({ text: "" });
|
|
129
|
+
});
|
|
130
|
+
|
|
131
|
+
test("returns empty text when API response has no text property", async () => {
|
|
132
|
+
globalThis.fetch = (async () => {
|
|
133
|
+
return new Response(JSON.stringify({}), {
|
|
134
|
+
status: 200,
|
|
135
|
+
headers: { "Content-Type": "application/json" },
|
|
136
|
+
});
|
|
137
|
+
}) as unknown as typeof fetch;
|
|
138
|
+
|
|
139
|
+
const provider = new OpenAIWhisperProvider(TEST_API_KEY);
|
|
140
|
+
const result = await provider.transcribe(
|
|
141
|
+
Buffer.from("silence"),
|
|
142
|
+
"audio/wav",
|
|
143
|
+
);
|
|
144
|
+
|
|
145
|
+
expect(result).toEqual({ text: "" });
|
|
146
|
+
});
|
|
147
|
+
|
|
148
|
+
test("error body is truncated to 300 characters", async () => {
|
|
149
|
+
const longBody = "x".repeat(500);
|
|
150
|
+
|
|
151
|
+
globalThis.fetch = (async () => {
|
|
152
|
+
return new Response(longBody, { status: 500 });
|
|
153
|
+
}) as unknown as typeof fetch;
|
|
154
|
+
|
|
155
|
+
const provider = new OpenAIWhisperProvider(TEST_API_KEY);
|
|
156
|
+
|
|
157
|
+
try {
|
|
158
|
+
await provider.transcribe(Buffer.from("audio"), "audio/wav");
|
|
159
|
+
expect.unreachable("should have thrown");
|
|
160
|
+
} catch (err) {
|
|
161
|
+
const msg = (err as Error).message;
|
|
162
|
+
expect(msg).toContain("Whisper API error (500)");
|
|
163
|
+
// The body portion should be at most 300 chars
|
|
164
|
+
const bodyPart = msg.replace("Whisper API error (500): ", "");
|
|
165
|
+
expect(bodyPart.length).toBeLessThanOrEqual(300);
|
|
166
|
+
}
|
|
167
|
+
});
|
|
168
|
+
});
|
|
169
|
+
|
|
170
|
+
// ---------------------------------------------------------------------------
|
|
171
|
+
// resolveSpeechToTextProvider
|
|
172
|
+
// ---------------------------------------------------------------------------
|
|
173
|
+
|
|
174
|
+
describe("resolveSpeechToTextProvider", () => {
|
|
175
|
+
beforeEach(() => {
|
|
176
|
+
mockOpenAIKey = undefined;
|
|
177
|
+
});
|
|
178
|
+
|
|
179
|
+
test("returns null when no OpenAI key is configured", async () => {
|
|
180
|
+
mockOpenAIKey = undefined;
|
|
181
|
+
const provider = await resolveSpeechToTextProvider();
|
|
182
|
+
expect(provider).toBeNull();
|
|
183
|
+
});
|
|
184
|
+
|
|
185
|
+
test("returns an OpenAIWhisperProvider when key exists", async () => {
|
|
186
|
+
mockOpenAIKey = "sk-real-key";
|
|
187
|
+
const provider = await resolveSpeechToTextProvider();
|
|
188
|
+
expect(provider).toBeInstanceOf(OpenAIWhisperProvider);
|
|
189
|
+
});
|
|
190
|
+
});
|
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
import type { SpeechToTextProvider, SpeechToTextResult } from "./types.js";
|
|
2
|
+
|
|
3
|
+
const WHISPER_API_URL = "https://api.openai.com/v1/audio/transcriptions";
|
|
4
|
+
const DEFAULT_TIMEOUT_MS = 60_000;
|
|
5
|
+
|
|
6
|
+
/**
|
|
7
|
+
* Derive a filename extension from a MIME type so the Whisper API can detect
|
|
8
|
+
* the audio format. Falls back to "audio" when the MIME type is unrecognised.
|
|
9
|
+
*/
|
|
10
|
+
function extensionFromMime(mimeType: string): string {
|
|
11
|
+
const map: Record<string, string> = {
|
|
12
|
+
"audio/wav": "wav",
|
|
13
|
+
"audio/x-wav": "wav",
|
|
14
|
+
"audio/mpeg": "mp3",
|
|
15
|
+
"audio/mp3": "mp3",
|
|
16
|
+
"audio/ogg": "ogg",
|
|
17
|
+
"audio/opus": "opus",
|
|
18
|
+
"audio/webm": "webm",
|
|
19
|
+
"audio/mp4": "m4a",
|
|
20
|
+
"audio/x-m4a": "m4a",
|
|
21
|
+
"audio/flac": "flac",
|
|
22
|
+
};
|
|
23
|
+
const base = mimeType.split(";")[0].trim().toLowerCase();
|
|
24
|
+
return map[base] ?? "audio";
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
export class OpenAIWhisperProvider implements SpeechToTextProvider {
|
|
28
|
+
private readonly apiKey: string;
|
|
29
|
+
|
|
30
|
+
constructor(apiKey: string) {
|
|
31
|
+
this.apiKey = apiKey;
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
async transcribe(
|
|
35
|
+
audio: Buffer,
|
|
36
|
+
mimeType: string,
|
|
37
|
+
signal?: AbortSignal,
|
|
38
|
+
): Promise<SpeechToTextResult> {
|
|
39
|
+
const ext = extensionFromMime(mimeType);
|
|
40
|
+
|
|
41
|
+
const formData = new FormData();
|
|
42
|
+
formData.append(
|
|
43
|
+
"file",
|
|
44
|
+
new Blob([new Uint8Array(audio)], { type: mimeType }),
|
|
45
|
+
`audio.${ext}`,
|
|
46
|
+
);
|
|
47
|
+
formData.append("model", "whisper-1");
|
|
48
|
+
|
|
49
|
+
const effectiveSignal = signal ?? AbortSignal.timeout(DEFAULT_TIMEOUT_MS);
|
|
50
|
+
|
|
51
|
+
const response = await fetch(WHISPER_API_URL, {
|
|
52
|
+
method: "POST",
|
|
53
|
+
headers: { Authorization: `Bearer ${this.apiKey}` },
|
|
54
|
+
body: formData,
|
|
55
|
+
signal: effectiveSignal,
|
|
56
|
+
});
|
|
57
|
+
|
|
58
|
+
if (!response.ok) {
|
|
59
|
+
const body = await response.text().catch(() => "");
|
|
60
|
+
throw new Error(
|
|
61
|
+
`Whisper API error (${response.status}): ${body.slice(0, 300)}`,
|
|
62
|
+
);
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
const result = (await response.json()) as { text?: string };
|
|
66
|
+
return { text: result.text?.trim() ?? "" };
|
|
67
|
+
}
|
|
68
|
+
}
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
import { getProviderKeyAsync } from "../../security/secure-keys.js";
|
|
2
|
+
import { OpenAIWhisperProvider } from "./openai-whisper.js";
|
|
3
|
+
import type { SpeechToTextProvider } from "./types.js";
|
|
4
|
+
|
|
5
|
+
export async function resolveSpeechToTextProvider(): Promise<SpeechToTextProvider | null> {
|
|
6
|
+
const apiKey = await getProviderKeyAsync("openai");
|
|
7
|
+
if (!apiKey) return null;
|
|
8
|
+
return new OpenAIWhisperProvider(apiKey);
|
|
9
|
+
}
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
export interface SpeechToTextResult {
|
|
2
|
+
text: string;
|
|
3
|
+
}
|
|
4
|
+
|
|
5
|
+
export interface SpeechToTextProvider {
|
|
6
|
+
/**
|
|
7
|
+
* Transcribe audio from a Buffer.
|
|
8
|
+
* @param audio - Raw audio data (WAV, OGG, MP3, etc.)
|
|
9
|
+
* @param mimeType - MIME type of the audio data
|
|
10
|
+
* @param signal - Optional abort signal for cancellation
|
|
11
|
+
*/
|
|
12
|
+
transcribe(
|
|
13
|
+
audio: Buffer,
|
|
14
|
+
mimeType: string,
|
|
15
|
+
signal?: AbortSignal,
|
|
16
|
+
): Promise<SpeechToTextResult>;
|
|
17
|
+
}
|
|
@@ -208,8 +208,8 @@ const log = getLogger("runtime-http");
|
|
|
208
208
|
const DEFAULT_PORT = 7821;
|
|
209
209
|
const DEFAULT_HOSTNAME = "127.0.0.1";
|
|
210
210
|
|
|
211
|
-
/** Global hard cap on request body size (
|
|
212
|
-
const MAX_REQUEST_BODY_BYTES =
|
|
211
|
+
/** Global hard cap on request body size (512 MB — accommodates large .vbundle backup imports). */
|
|
212
|
+
const MAX_REQUEST_BODY_BYTES = 512 * 1024 * 1024;
|
|
213
213
|
|
|
214
214
|
export class RuntimeHttpServer {
|
|
215
215
|
private server: ReturnType<typeof Bun.serve> | null = null;
|
|
@@ -44,6 +44,7 @@ import { handleEditIntercept } from "./inbound-stages/edit-intercept.js";
|
|
|
44
44
|
import { handleEscalationIntercept } from "./inbound-stages/escalation-intercept.js";
|
|
45
45
|
import { handleGuardianReplyIntercept } from "./inbound-stages/guardian-reply-intercept.js";
|
|
46
46
|
import { runSecretIngressCheck } from "./inbound-stages/secret-ingress-check.js";
|
|
47
|
+
import { tryTranscribeAudioAttachments } from "./inbound-stages/transcribe-audio.js";
|
|
47
48
|
import { handleVerificationIntercept } from "./inbound-stages/verification-intercept.js";
|
|
48
49
|
|
|
49
50
|
const log = getLogger("runtime-http");
|
|
@@ -144,7 +145,7 @@ export async function handleChannelInbound(
|
|
|
144
145
|
return httpError("BAD_REQUEST", "content must be a string", 400);
|
|
145
146
|
}
|
|
146
147
|
|
|
147
|
-
|
|
148
|
+
let trimmedContent = typeof content === "string" ? content.trim() : "";
|
|
148
149
|
const hasAttachments =
|
|
149
150
|
Array.isArray(attachmentIds) && attachmentIds.length > 0;
|
|
150
151
|
|
|
@@ -227,6 +228,29 @@ export async function handleChannelInbound(
|
|
|
227
228
|
}
|
|
228
229
|
}
|
|
229
230
|
|
|
231
|
+
// Auto-transcribe audio attachments from channel messages
|
|
232
|
+
if (hasAttachments && sourceChannel) {
|
|
233
|
+
const transcribeResult = await tryTranscribeAudioAttachments(attachmentIds);
|
|
234
|
+
switch (transcribeResult.status) {
|
|
235
|
+
case "transcribed":
|
|
236
|
+
// For voice-only messages (empty content), this becomes the message text.
|
|
237
|
+
// For audio+caption, both are preserved.
|
|
238
|
+
trimmedContent =
|
|
239
|
+
transcribeResult.text +
|
|
240
|
+
(trimmedContent ? `\n\n${trimmedContent}` : "");
|
|
241
|
+
break;
|
|
242
|
+
case "no_provider":
|
|
243
|
+
case "error":
|
|
244
|
+
// Inject a hint so the assistant knows the user sent audio and why
|
|
245
|
+
// transcription failed — it can then guide the user (e.g. set up API key).
|
|
246
|
+
trimmedContent =
|
|
247
|
+
`[Voice message received — ${transcribeResult.reason}]` +
|
|
248
|
+
(trimmedContent ? `\n\n${trimmedContent}` : "");
|
|
249
|
+
break;
|
|
250
|
+
// "no_audio", "disabled" — no action needed
|
|
251
|
+
}
|
|
252
|
+
}
|
|
253
|
+
|
|
230
254
|
const sourceMessageId =
|
|
231
255
|
typeof sourceMetadata?.messageId === "string"
|
|
232
256
|
? sourceMetadata.messageId
|
|
@@ -333,7 +357,7 @@ export async function handleChannelInbound(
|
|
|
333
357
|
externalMessageId,
|
|
334
358
|
conversationId: result.conversationId,
|
|
335
359
|
eventId: result.eventId,
|
|
336
|
-
content,
|
|
360
|
+
content: trimmedContent,
|
|
337
361
|
attachmentIds,
|
|
338
362
|
sourceMetadata: body.sourceMetadata,
|
|
339
363
|
actorDisplayName: body.actorDisplayName,
|
|
@@ -612,7 +636,7 @@ export async function handleChannelInbound(
|
|
|
612
636
|
processMessage,
|
|
613
637
|
conversationId: result.conversationId,
|
|
614
638
|
eventId: result.eventId,
|
|
615
|
-
content:
|
|
639
|
+
content: trimmedContent,
|
|
616
640
|
attachmentIds: hasAttachments ? attachmentIds : undefined,
|
|
617
641
|
sourceChannel,
|
|
618
642
|
sourceInterface,
|
|
@@ -79,14 +79,29 @@ export interface AclResult {
|
|
|
79
79
|
guardianVerifyCode: string | undefined;
|
|
80
80
|
}
|
|
81
81
|
|
|
82
|
+
/**
|
|
83
|
+
* Strip Slack/Telegram mrkdwn formatting wrappers from a raw message.
|
|
84
|
+
* When users copy-paste a verification code from the desktop app with
|
|
85
|
+
* rich-text formatting (e.g. bold), Slack preserves it as `*code*` in
|
|
86
|
+
* the message text, which would otherwise fail the strict bare-code regex.
|
|
87
|
+
*/
|
|
88
|
+
function stripMrkdwnFormatting(text: string): string {
|
|
89
|
+
// Bold (*…*), italic (_…_), strikethrough (~…~), inline code (`…`)
|
|
90
|
+
return text.replace(/^[*_~`]+/, "").replace(/[*_~`]+$/, "");
|
|
91
|
+
}
|
|
92
|
+
|
|
82
93
|
/**
|
|
83
94
|
* Parse a guardian verification code from message content.
|
|
84
95
|
* Accepts a bare code as the entire message: 6-digit numeric OR 64-char hex
|
|
85
96
|
* (hex is retained for compatibility with unbound inbound/bootstrap sessions
|
|
86
97
|
* that intentionally use high-entropy secrets).
|
|
98
|
+
*
|
|
99
|
+
* Strips surrounding mrkdwn formatting characters first so that codes
|
|
100
|
+
* pasted with bold/italic/code formatting are still recognized.
|
|
87
101
|
*/
|
|
88
102
|
function parseGuardianVerifyCode(content: string): string | undefined {
|
|
89
|
-
const
|
|
103
|
+
const stripped = stripMrkdwnFormatting(content);
|
|
104
|
+
const bareMatch = stripped.match(/^([0-9a-fA-F]{64}|\d{6})$/);
|
|
90
105
|
if (bareMatch) return bareMatch[1];
|
|
91
106
|
|
|
92
107
|
return undefined;
|