macroclaw 0.11.0 → 0.12.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/app.test.ts +14 -22
- package/src/app.ts +5 -4
- package/src/index.ts +2 -0
- package/src/speech-to-text.ts +28 -0
- package/src/stt.ts +0 -31
package/package.json
CHANGED
package/src/app.test.ts
CHANGED
|
@@ -3,18 +3,13 @@ import { existsSync, rmSync } from "node:fs";
|
|
|
3
3
|
import { App, type AppConfig } from "./app";
|
|
4
4
|
import { type Claude, QueryProcessError, type QueryResult, type RunningQuery } from "./claude";
|
|
5
5
|
import { saveSessions } from "./sessions";
|
|
6
|
+
import type { SpeechToText } from "./speech-to-text";
|
|
6
7
|
|
|
7
|
-
const
|
|
8
|
+
const mockTranscribe = mock(async (_filePath: string) => "transcribed text");
|
|
8
9
|
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
transcriptions: {
|
|
13
|
-
create: mockOpenAICreate,
|
|
14
|
-
},
|
|
15
|
-
};
|
|
16
|
-
},
|
|
17
|
-
}));
|
|
10
|
+
function mockStt(): SpeechToText {
|
|
11
|
+
return { transcribe: mockTranscribe } as unknown as SpeechToText;
|
|
12
|
+
}
|
|
18
13
|
|
|
19
14
|
// Mock Grammy Bot
|
|
20
15
|
mock.module("grammy", () => ({
|
|
@@ -59,17 +54,14 @@ mock.module("grammy", () => ({
|
|
|
59
54
|
|
|
60
55
|
const tmpSettingsDir = "/tmp/macroclaw-test-settings";
|
|
61
56
|
|
|
62
|
-
const savedOpenAIKey = process.env.OPENAI_API_KEY;
|
|
63
|
-
|
|
64
57
|
beforeEach(() => {
|
|
65
|
-
|
|
58
|
+
mockTranscribe.mockReset();
|
|
59
|
+
mockTranscribe.mockImplementation(async () => "transcribed text");
|
|
66
60
|
if (existsSync(tmpSettingsDir)) rmSync(tmpSettingsDir, { recursive: true });
|
|
67
61
|
saveSessions({ mainSessionId: "test-session" }, tmpSettingsDir);
|
|
68
62
|
});
|
|
69
63
|
|
|
70
64
|
afterEach(() => {
|
|
71
|
-
if (savedOpenAIKey) process.env.OPENAI_API_KEY = savedOpenAIKey;
|
|
72
|
-
else delete process.env.OPENAI_API_KEY;
|
|
73
65
|
if (existsSync(tmpSettingsDir)) rmSync(tmpSettingsDir, { recursive: true });
|
|
74
66
|
});
|
|
75
67
|
|
|
@@ -128,6 +120,7 @@ function makeConfig(overrides?: Partial<AppConfig>): AppConfig {
|
|
|
128
120
|
workspace: "/tmp/macroclaw-test-workspace",
|
|
129
121
|
settingsDir: tmpSettingsDir,
|
|
130
122
|
claude: defaultMockClaude(),
|
|
123
|
+
stt: mockStt(),
|
|
131
124
|
...overrides,
|
|
132
125
|
};
|
|
133
126
|
}
|
|
@@ -350,7 +343,7 @@ describe("App", () => {
|
|
|
350
343
|
globalThis.fetch = mock(() =>
|
|
351
344
|
Promise.resolve(new Response("fake-audio", { status: 200 })),
|
|
352
345
|
) as any;
|
|
353
|
-
|
|
346
|
+
mockTranscribe.mockImplementationOnce(async () => "hello from voice");
|
|
354
347
|
|
|
355
348
|
const config = makeConfig();
|
|
356
349
|
const app = new App(config);
|
|
@@ -380,7 +373,7 @@ describe("App", () => {
|
|
|
380
373
|
globalThis.fetch = mock(() =>
|
|
381
374
|
Promise.resolve(new Response("fake-audio", { status: 200 })),
|
|
382
375
|
) as any;
|
|
383
|
-
|
|
376
|
+
mockTranscribe.mockImplementationOnce(async () => { throw new Error("API error"); });
|
|
384
377
|
|
|
385
378
|
const config = makeConfig();
|
|
386
379
|
const app = new App(config);
|
|
@@ -406,7 +399,7 @@ describe("App", () => {
|
|
|
406
399
|
globalThis.fetch = mock(() =>
|
|
407
400
|
Promise.resolve(new Response("fake-audio", { status: 200 })),
|
|
408
401
|
) as any;
|
|
409
|
-
|
|
402
|
+
mockTranscribe.mockImplementationOnce(async () => " ");
|
|
410
403
|
|
|
411
404
|
const config = makeConfig();
|
|
412
405
|
const app = new App(config);
|
|
@@ -442,9 +435,8 @@ describe("App", () => {
|
|
|
442
435
|
expect((config.claude as Claude & { calls: CallInfo[] }).calls).toHaveLength(0);
|
|
443
436
|
});
|
|
444
437
|
|
|
445
|
-
it("responds with unavailable message when
|
|
446
|
-
|
|
447
|
-
const config = makeConfig();
|
|
438
|
+
it("responds with unavailable message when stt is not configured", async () => {
|
|
439
|
+
const config = makeConfig({ stt: undefined });
|
|
448
440
|
const app = new App(config);
|
|
449
441
|
const bot = app.bot as any;
|
|
450
442
|
const handler = bot.filterHandlers.get("message:voice")![0];
|
|
@@ -455,7 +447,7 @@ describe("App", () => {
|
|
|
455
447
|
});
|
|
456
448
|
|
|
457
449
|
const sendCalls = (bot.api.sendMessage as any).mock.calls;
|
|
458
|
-
const call = sendCalls.find((c: any) => c[1].includes("
|
|
450
|
+
const call = sendCalls.find((c: any) => c[1].includes("openaiApiKey"));
|
|
459
451
|
expect(call).toBeDefined();
|
|
460
452
|
expect((config.claude as Claude & { calls: CallInfo[] }).calls).toHaveLength(0);
|
|
461
453
|
});
|
package/src/app.ts
CHANGED
|
@@ -2,7 +2,7 @@ import type { Bot } from "grammy";
|
|
|
2
2
|
import { CronScheduler } from "./cron";
|
|
3
3
|
import { createLogger } from "./logger";
|
|
4
4
|
import { type Claude, Orchestrator, type OrchestratorResponse } from "./orchestrator";
|
|
5
|
-
import {
|
|
5
|
+
import type { SpeechToText } from "./speech-to-text";
|
|
6
6
|
import { createBot, downloadFile, sendFile, sendResponse } from "./telegram";
|
|
7
7
|
|
|
8
8
|
const log = createLogger("app");
|
|
@@ -14,6 +14,7 @@ export interface AppConfig {
|
|
|
14
14
|
model?: string;
|
|
15
15
|
settingsDir?: string;
|
|
16
16
|
claude?: Claude;
|
|
17
|
+
stt?: SpeechToText;
|
|
17
18
|
}
|
|
18
19
|
|
|
19
20
|
export class App {
|
|
@@ -118,13 +119,13 @@ export class App {
|
|
|
118
119
|
|
|
119
120
|
this.#bot.on("message:voice", async (ctx) => {
|
|
120
121
|
if (ctx.chat.id.toString() !== this.#config.authorizedChatId) return;
|
|
121
|
-
if (!
|
|
122
|
-
await sendResponse(this.#bot, this.#config.authorizedChatId, "[Voice messages not available — set
|
|
122
|
+
if (!this.#config.stt) {
|
|
123
|
+
await sendResponse(this.#bot, this.#config.authorizedChatId, "[Voice messages not available — set openaiApiKey in settings to enable]");
|
|
123
124
|
return;
|
|
124
125
|
}
|
|
125
126
|
try {
|
|
126
127
|
const path = await downloadFile(this.#bot, ctx.message.voice.file_id, this.#config.botToken, "voice.ogg");
|
|
127
|
-
const text = await transcribe(path);
|
|
128
|
+
const text = await this.#config.stt.transcribe(path);
|
|
128
129
|
if (!text.trim()) {
|
|
129
130
|
await sendResponse(this.#bot, this.#config.authorizedChatId, "[Could not understand audio]");
|
|
130
131
|
return;
|
package/src/index.ts
CHANGED
|
@@ -3,6 +3,7 @@ import { dirname, join, resolve } from "node:path";
|
|
|
3
3
|
import { App, type AppConfig } from "./app";
|
|
4
4
|
import { createLogger, initLogger } from "./logger";
|
|
5
5
|
import { applyEnvOverrides, loadSettings, printSettings } from "./settings";
|
|
6
|
+
import { SpeechToText } from "./speech-to-text";
|
|
6
7
|
|
|
7
8
|
export async function start(): Promise<void> {
|
|
8
9
|
const log = createLogger("index");
|
|
@@ -42,6 +43,7 @@ export async function start(): Promise<void> {
|
|
|
42
43
|
authorizedChatId: resolved.chatId,
|
|
43
44
|
workspace,
|
|
44
45
|
model: resolved.model,
|
|
46
|
+
stt: resolved.openaiApiKey ? new SpeechToText(resolved.openaiApiKey) : undefined,
|
|
45
47
|
};
|
|
46
48
|
|
|
47
49
|
new App(config).start();
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
import { readFile } from "node:fs/promises";
|
|
2
|
+
import { basename } from "node:path";
|
|
3
|
+
import OpenAI from "openai";
|
|
4
|
+
import { createLogger } from "./logger";
|
|
5
|
+
|
|
6
|
+
const log = createLogger("speech-to-text");
|
|
7
|
+
|
|
8
|
+
export class SpeechToText {
|
|
9
|
+
#client: OpenAI;
|
|
10
|
+
|
|
11
|
+
constructor(apiKey: string) {
|
|
12
|
+
this.#client = new OpenAI({ apiKey });
|
|
13
|
+
}
|
|
14
|
+
|
|
15
|
+
async transcribe(filePath: string): Promise<string> {
|
|
16
|
+
const buffer = await readFile(filePath);
|
|
17
|
+
const file = new File([buffer], basename(filePath), { type: "audio/ogg" });
|
|
18
|
+
|
|
19
|
+
log.debug({ filePath }, "Transcribing audio");
|
|
20
|
+
const result = await this.#client.audio.transcriptions.create({
|
|
21
|
+
model: "whisper-1",
|
|
22
|
+
file,
|
|
23
|
+
});
|
|
24
|
+
|
|
25
|
+
log.debug({ text: result.text }, "Transcription complete");
|
|
26
|
+
return result.text;
|
|
27
|
+
}
|
|
28
|
+
}
|
package/src/stt.ts
DELETED
|
@@ -1,31 +0,0 @@
|
|
|
1
|
-
import { readFile } from "node:fs/promises";
|
|
2
|
-
import { basename } from "node:path";
|
|
3
|
-
import OpenAI from "openai";
|
|
4
|
-
import { createLogger } from "./logger";
|
|
5
|
-
|
|
6
|
-
const log = createLogger("stt");
|
|
7
|
-
|
|
8
|
-
let client: OpenAI | undefined;
|
|
9
|
-
|
|
10
|
-
function getClient(): OpenAI {
|
|
11
|
-
if (!client) client = new OpenAI();
|
|
12
|
-
return client;
|
|
13
|
-
}
|
|
14
|
-
|
|
15
|
-
export function isAvailable(): boolean {
|
|
16
|
-
return !!process.env.OPENAI_API_KEY;
|
|
17
|
-
}
|
|
18
|
-
|
|
19
|
-
export async function transcribe(filePath: string): Promise<string> {
|
|
20
|
-
const buffer = await readFile(filePath);
|
|
21
|
-
const file = new File([buffer], basename(filePath), { type: "audio/ogg" });
|
|
22
|
-
|
|
23
|
-
log.debug({ filePath }, "Transcribing audio");
|
|
24
|
-
const result = await getClient().audio.transcriptions.create({
|
|
25
|
-
model: "whisper-1",
|
|
26
|
-
file,
|
|
27
|
-
});
|
|
28
|
-
|
|
29
|
-
log.debug({ text: result.text }, "Transcription complete");
|
|
30
|
-
return result.text;
|
|
31
|
-
}
|