@markusylisiurunen/tau 0.2.123 → 0.2.124
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +11 -4
- package/dist/core/async/cli.js +5 -1
- package/dist/core/async/cli.js.map +1 -1
- package/dist/core/async/daemon_runtime.js +2 -0
- package/dist/core/async/daemon_runtime.js.map +1 -1
- package/dist/core/async/telegram.js +445 -60
- package/dist/core/async/telegram.js.map +1 -1
- package/dist/core/config/index.js +1 -1
- package/dist/core/config/index.js.map +1 -1
- package/dist/core/config/schema.js +38 -0
- package/dist/core/config/schema.js.map +1 -1
- package/dist/core/utils/gemini_transcription.js +136 -0
- package/dist/core/utils/gemini_transcription.js.map +1 -0
- package/dist/core/utils/speech_to_text.js +23 -0
- package/dist/core/utils/speech_to_text.js.map +1 -0
- package/dist/core/version.js +1 -1
- package/dist/tui/chat_controller.js +24 -7
- package/dist/tui/chat_controller.js.map +1 -1
- package/package.json +1 -1
|
@@ -0,0 +1,136 @@
|
|
|
1
|
+
import { z } from "zod";
|
|
2
|
+
const GEMINI_GENERATE_CONTENT_BASE_URL = "https://generativelanguage.googleapis.com/v1beta/models";
|
|
3
|
+
const DEFAULT_GEMINI_TRANSCRIPTION_MODEL = "gemini-3.5-flash";
|
|
4
|
+
const DEFAULT_GEMINI_TRANSCRIPTION_THINKING_LEVEL = "minimal";
|
|
5
|
+
const DEFAULT_GEMINI_AUDIO_MIME_TYPE = "audio/wav";
|
|
6
|
+
const errorPayloadSchema = z.object({
|
|
7
|
+
error: z
|
|
8
|
+
.object({
|
|
9
|
+
message: z.string().trim().min(1).optional(),
|
|
10
|
+
status: z.string().trim().min(1).optional(),
|
|
11
|
+
code: z.number().int().optional(),
|
|
12
|
+
})
|
|
13
|
+
.optional(),
|
|
14
|
+
});
|
|
15
|
+
const GEMINI_TRANSCRIPTION_RESPONSE_SCHEMA = {
|
|
16
|
+
type: "OBJECT",
|
|
17
|
+
properties: {
|
|
18
|
+
transcription: { type: "STRING" },
|
|
19
|
+
},
|
|
20
|
+
required: ["transcription"],
|
|
21
|
+
};
|
|
22
|
+
const textPartSchema = z.object({ text: z.string() });
|
|
23
|
+
const apiResponseSchema = z.object({
|
|
24
|
+
candidates: z
|
|
25
|
+
.array(z.object({
|
|
26
|
+
content: z.object({
|
|
27
|
+
parts: z.array(z.unknown()).optional(),
|
|
28
|
+
}),
|
|
29
|
+
}))
|
|
30
|
+
.optional(),
|
|
31
|
+
});
|
|
32
|
+
const transcriptionResultSchema = z.object({
|
|
33
|
+
transcription: z.string(),
|
|
34
|
+
});
|
|
35
|
+
export async function transcribeGeminiAudio(options) {
|
|
36
|
+
const apiKey = options.apiKey.trim();
|
|
37
|
+
if (!apiKey) {
|
|
38
|
+
throw new Error("missing Gemini API key");
|
|
39
|
+
}
|
|
40
|
+
const fetchFn = options.fetchImpl ?? fetch;
|
|
41
|
+
const model = options.model ?? DEFAULT_GEMINI_TRANSCRIPTION_MODEL;
|
|
42
|
+
const response = await fetchFn(`${GEMINI_GENERATE_CONTENT_BASE_URL}/${encodeURIComponent(model)}:generateContent`, {
|
|
43
|
+
method: "POST",
|
|
44
|
+
headers: {
|
|
45
|
+
"Content-Type": "application/json",
|
|
46
|
+
"x-goog-api-key": apiKey,
|
|
47
|
+
},
|
|
48
|
+
body: JSON.stringify({
|
|
49
|
+
systemInstruction: {
|
|
50
|
+
parts: [
|
|
51
|
+
{
|
|
52
|
+
text: buildTranscriptionSystemInstruction(),
|
|
53
|
+
},
|
|
54
|
+
],
|
|
55
|
+
},
|
|
56
|
+
contents: [
|
|
57
|
+
{
|
|
58
|
+
parts: [
|
|
59
|
+
{
|
|
60
|
+
text: buildTranscriptionPrompt(),
|
|
61
|
+
},
|
|
62
|
+
{
|
|
63
|
+
inlineData: {
|
|
64
|
+
mimeType: options.mimeType ?? DEFAULT_GEMINI_AUDIO_MIME_TYPE,
|
|
65
|
+
data: options.audio.toString("base64"),
|
|
66
|
+
},
|
|
67
|
+
},
|
|
68
|
+
],
|
|
69
|
+
},
|
|
70
|
+
],
|
|
71
|
+
generationConfig: {
|
|
72
|
+
responseMimeType: "application/json",
|
|
73
|
+
responseSchema: GEMINI_TRANSCRIPTION_RESPONSE_SCHEMA,
|
|
74
|
+
thinkingConfig: {
|
|
75
|
+
thinkingLevel: DEFAULT_GEMINI_TRANSCRIPTION_THINKING_LEVEL,
|
|
76
|
+
},
|
|
77
|
+
},
|
|
78
|
+
}),
|
|
79
|
+
});
|
|
80
|
+
const responseText = await response.text();
|
|
81
|
+
let payload;
|
|
82
|
+
try {
|
|
83
|
+
payload = responseText ? JSON.parse(responseText) : undefined;
|
|
84
|
+
}
|
|
85
|
+
catch {
|
|
86
|
+
payload = undefined;
|
|
87
|
+
}
|
|
88
|
+
if (!response.ok) {
|
|
89
|
+
const parsed = errorPayloadSchema.safeParse(payload);
|
|
90
|
+
const fallbackMessage = responseText.trim() || `HTTP ${response.status}`;
|
|
91
|
+
throw new Error(parsed.success ? (parsed.data.error?.message ?? fallbackMessage) : fallbackMessage);
|
|
92
|
+
}
|
|
93
|
+
return extractGeminiText(payload).trim();
|
|
94
|
+
}
|
|
95
|
+
function buildTranscriptionSystemInstruction() {
|
|
96
|
+
return [
|
|
97
|
+
"You are a speech-to-text engine.",
|
|
98
|
+
"Transcribe the speaker's intended message for insertion into a chat input.",
|
|
99
|
+
"Detect the speaker's language and transcribe in that same language; never translate unless the speaker explicitly asks for translation.",
|
|
100
|
+
"Preserve the speaker's wording and register as spoken, including colloquial forms, dialect, and informal language; do not normalize informal speech into formal standard language.",
|
|
101
|
+
"Use natural punctuation and capitalization where helpful, without changing the speaker's wording or register.",
|
|
102
|
+
"Lightly clean only speech artifacts that do not affect meaning, such as filler words, repeated stutters, obvious false starts, and unintelligible mumbling.",
|
|
103
|
+
"Do not rewrite, paraphrase, summarize, answer the speaker, add labels, add timestamps, or describe background sounds.",
|
|
104
|
+
].join("\n");
|
|
105
|
+
}
|
|
106
|
+
function buildTranscriptionPrompt() {
|
|
107
|
+
return [
|
|
108
|
+
"Transcribe the attached audio into the transcription field.",
|
|
109
|
+
"Return only the lightly cleaned transcript text, with no timestamps or commentary.",
|
|
110
|
+
].join("\n");
|
|
111
|
+
}
|
|
112
|
+
function extractGeminiText(payload) {
|
|
113
|
+
const parsed = apiResponseSchema.safeParse(payload);
|
|
114
|
+
if (!parsed.success) {
|
|
115
|
+
return "";
|
|
116
|
+
}
|
|
117
|
+
const responseText = (parsed.data.candidates?.[0]?.content.parts ?? [])
|
|
118
|
+
.map((part) => {
|
|
119
|
+
const parsedPart = textPartSchema.safeParse(part);
|
|
120
|
+
return parsedPart.success ? parsedPart.data.text : "";
|
|
121
|
+
})
|
|
122
|
+
.join("");
|
|
123
|
+
let transcriptionPayload;
|
|
124
|
+
try {
|
|
125
|
+
transcriptionPayload = responseText ? JSON.parse(responseText) : undefined;
|
|
126
|
+
}
|
|
127
|
+
catch {
|
|
128
|
+
return "";
|
|
129
|
+
}
|
|
130
|
+
const transcription = transcriptionResultSchema.safeParse(transcriptionPayload);
|
|
131
|
+
if (!transcription.success) {
|
|
132
|
+
return "";
|
|
133
|
+
}
|
|
134
|
+
return transcription.data.transcription;
|
|
135
|
+
}
|
|
136
|
+
//# sourceMappingURL=gemini_transcription.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"gemini_transcription.js","sourceRoot":"","sources":["../../../src/core/utils/gemini_transcription.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AAExB,MAAM,gCAAgC,GAAG,yDAAyD,CAAC;AACnG,MAAM,kCAAkC,GAAG,kBAAkB,CAAC;AAC9D,MAAM,2CAA2C,GAAG,SAAS,CAAC;AAC9D,MAAM,8BAA8B,GAAG,WAAW,CAAC;AAEnD,MAAM,kBAAkB,GAAG,CAAC,CAAC,MAAM,CAAC;IAClC,KAAK,EAAE,CAAC;SACL,MAAM,CAAC;QACN,OAAO,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,IAAI,EAAE,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,QAAQ,EAAE;QAC5C,MAAM,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,IAAI,EAAE,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,QAAQ,EAAE;QAC3C,IAAI,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,GAAG,EAAE,CAAC,QAAQ,EAAE;KAClC,CAAC;SACD,QAAQ,EAAE;CACd,CAAC,CAAC;AAEH,MAAM,oCAAoC,GAAG;IAC3C,IAAI,EAAE,QAAQ;IACd,UAAU,EAAE;QACV,aAAa,EAAE,EAAE,IAAI,EAAE,QAAQ,EAAE;KAClC;IACD,QAAQ,EAAE,CAAC,eAAe,CAAC;CAC5B,CAAC;AAEF,MAAM,cAAc,GAAG,CAAC,CAAC,MAAM,CAAC,EAAE,IAAI,EAAE,CAAC,CAAC,MAAM,EAAE,EAAE,CAAC,CAAC;AACtD,MAAM,iBAAiB,GAAG,CAAC,CAAC,MAAM,CAAC;IACjC,UAAU,EAAE,CAAC;SACV,KAAK,CACJ,CAAC,CAAC,MAAM,CAAC;QACP,OAAO,EAAE,CAAC,CAAC,MAAM,CAAC;YAChB,KAAK,EAAE,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,OAAO,EAAE,CAAC,CAAC,QAAQ,EAAE;SACvC,CAAC;KACH,CAAC,CACH;SACA,QAAQ,EAAE;CACd,CAAC,CAAC;AACH,MAAM,yBAAyB,GAAG,CAAC,CAAC,MAAM,CAAC;IACzC,aAAa,EAAE,CAAC,CAAC,MAAM,EAAE;CAC1B,CAAC,CAAC;AAUH,MAAM,CAAC,KAAK,UAAU,qBAAqB,CAAC,OAAmC;IAC7E,MAAM,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC,IAAI,EAAE,CAAC;IACrC,IAAI,CAAC,MAAM,EAAE,CAAC;QACZ,MAAM,IAAI,KAAK,CAAC,wBAAwB,CAAC,CAAC;IAC5C,CAAC;IAED,MAAM,OAAO,GAAG,OAAO,CAAC,SAAS,IAAI,KAAK,CAAC;IAC3C,MAAM,KAAK,GAAG,OAAO,CAAC,KAAK,IAAI,kCAAkC,CAAC;IAClE,MAAM,QAAQ,GAAG,MAAM,OAAO,CAC5B,GAAG,gCAAgC,IAAI,kBAAkB,CAAC,KAAK,CAAC,kBAAkB,EAClF;QACE,MAAM,EAAE,MAAM;QACd,OAAO,EAAE;YACP,cAAc,EAAE,kBAAkB;YAClC,gBAAgB,EAAE,MAAM;SACzB;QACD,IAAI,EAAE,IAAI,CAAC,SAAS,CAAC;YACnB,iBAAiB,EAAE;gBACjB,KAAK,EAAE;oBACL;wBACE,IAAI,EAAE,mCAAmC,EAAE;qBAC5C;iBACF;aACF;YACD,QAAQ,EAAE;gBACR;oBACE,KAAK,EAAE;wBACL;4BACE,IAAI,EAAE,wBAAwB,EAAE;yBACjC;wBACD;4BACE,UAAU,EAAE;gCACV,QAAQ,EAAE,OAAO,CAAC,QAAQ,IAAI,8BAA8B;gCAC5D,IAAI,EAAE,OAAO,CAAC,KAAK,CAAC,QAAQ,CAAC,QAAQ,CAAC;6BACvC;yBACF;qBACF;iBACF;aACF;YACD,gBAAgB,EAAE;gBAChB,gBAAgB,EAAE,kBAAkB;gBACpC,cAAc,EAAE,oCAAoC;gBACpD,cAAc,EAAE;oBACd,aAAa,EAAE,2CAA2C;iBAC3D;aACF;SACF,CAAC;KACH,CACF,CAAC;IAEF,MAAM,YAAY,GAAG,MAAM,QAAQ,CAAC,IAAI,EAAE,CAAC;IAC3C,IAAI,OAAgB,CAAC;IACrB,IAAI,CAAC;QACH,OAAO,GAAG,YAAY,CAAC,CAAC,CAAE,IAAI,CAAC,KAAK,CAAC,YAAY,CAAa,CAAC,CAAC,CAAC,SAAS,CAAC;IAC7E,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,GAAG,SAAS,CAAC;IACtB,CAAC;IAED,IAAI,CAAC,QAAQ,CAAC,EAAE,EAAE,CAAC;QACjB,MAAM,MAAM,GAAG,kBAAkB,CAAC,SAAS,CAAC,OAAO,CAAC,CAAC;QACrD,MAAM,eAAe,GAAG,YAAY,CAAC,IAAI,EAAE,IAAI,QAAQ,QAAQ,CAAC,MAAM,EAAE,CAAC;QACzE,MAAM,IAAI,KAAK,CACb,MAAM,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,IAAI,CAAC,KAAK,EAAE,OAAO,IAAI,eAAe,CAAC,CAAC,CAAC,CAAC,eAAe,CACnF,CAAC;IACJ,CAAC;IAED,OAAO,iBAAiB,CAAC,OAAO,CAAC,CAAC,IAAI,EAAE,CAAC;AAC3C,CAAC;AAED,SAAS,mCAAmC;IAC1C,OAAO;QACL,kCAAkC;QAClC,4EAA4E;QAC5E,yIAAyI;QACzI,oLAAoL;QACpL,+GAA+G;QAC/G,6JAA6J;QAC7J,uHAAuH;KACxH,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;AACf,CAAC;AAED,SAAS,wBAAwB;IAC/B,OAAO;QACL,6DAA6D;QAC7D,oFAAoF;KACrF,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;AACf,CAAC;AAED,SAAS,iBAAiB,CAAC,OAAgB;IACzC,MAAM,MAAM,GAAG,iBAAiB,CAAC,SAAS,CAAC,OAAO,CAAC,CAAC;IACpD,IAAI,CAAC,MAAM,CAAC,OAAO,EAAE,CAAC;QACpB,OAAO,EAAE,CAAC;IACZ,CAAC;IAED,MAAM,YAAY,GAAG,CAAC,MAAM,CAAC,IAAI,CAAC,UAAU,EAAE,CAAC,CAAC,CAAC,EAAE,OAAO,CAAC,KAAK,IAAI,EAAE,CAAC;SACpE,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE;QACZ,MAAM,UAAU,GAAG,cAAc,CAAC,SAAS,CAAC,IAAI,CAAC,CAAC;QAClD,OAAO,UAAU,CAAC,OAAO,CAAC,CAAC,CAAC,UAAU,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC;IACxD,CAAC,CAAC;SACD,IAAI,CAAC,EAAE,CAAC,CAAC;IAEZ,IAAI,oBAA6B,CAAC;IAClC,IAAI,CAAC;QACH,oBAAoB,GAAG,YAAY,CAAC,CAAC,CAAE,IAAI,CAAC,KAAK,CAAC,YAAY,CAAa,CAAC,CAAC,CAAC,SAAS,CAAC;IAC1F,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,EAAE,CAAC;IACZ,CAAC;IAED,MAAM,aAAa,GAAG,yBAAyB,CAAC,SAAS,CAAC,oBAAoB,CAAC,CAAC;IAChF,IAAI,CAAC,aAAa,CAAC,OAAO,EAAE,CAAC;QAC3B,OAAO,EAAE,CAAC;IACZ,CAAC;IAED,OAAO,aAAa,CAAC,IAAI,CAAC,aAAa,CAAC;AAC1C,CAAC"}
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
import { transcribeGeminiAudio } from "./gemini_transcription.js";
|
|
2
|
+
import { transcribeMistralAudio } from "./mistral_transcription.js";
|
|
3
|
+
export async function transcribeAudio(options) {
|
|
4
|
+
switch (options.provider) {
|
|
5
|
+
case "gemini":
|
|
6
|
+
return await transcribeGeminiAudio({
|
|
7
|
+
apiKey: options.apiKey,
|
|
8
|
+
audio: options.audio,
|
|
9
|
+
mimeType: options.mimeType,
|
|
10
|
+
fetchImpl: options.fetchImpl,
|
|
11
|
+
});
|
|
12
|
+
case "mistral":
|
|
13
|
+
return await transcribeMistralAudio({
|
|
14
|
+
apiKey: options.apiKey,
|
|
15
|
+
audio: options.audio,
|
|
16
|
+
mimeType: options.mimeType,
|
|
17
|
+
fileName: options.fileName,
|
|
18
|
+
language: options.language,
|
|
19
|
+
fetchImpl: options.fetchImpl,
|
|
20
|
+
});
|
|
21
|
+
}
|
|
22
|
+
}
|
|
23
|
+
//# sourceMappingURL=speech_to_text.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"speech_to_text.js","sourceRoot":"","sources":["../../../src/core/utils/speech_to_text.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,qBAAqB,EAAE,MAAM,2BAA2B,CAAC;AAClE,OAAO,EAAE,sBAAsB,EAAE,MAAM,4BAA4B,CAAC;AAYpE,MAAM,CAAC,KAAK,UAAU,eAAe,CAAC,OAA4B;IAChE,QAAQ,OAAO,CAAC,QAAQ,EAAE,CAAC;QACzB,KAAK,QAAQ;YACX,OAAO,MAAM,qBAAqB,CAAC;gBACjC,MAAM,EAAE,OAAO,CAAC,MAAM;gBACtB,KAAK,EAAE,OAAO,CAAC,KAAK;gBACpB,QAAQ,EAAE,OAAO,CAAC,QAAQ;gBAC1B,SAAS,EAAE,OAAO,CAAC,SAAS;aAC7B,CAAC,CAAC;QACL,KAAK,SAAS;YACZ,OAAO,MAAM,sBAAsB,CAAC;gBAClC,MAAM,EAAE,OAAO,CAAC,MAAM;gBACtB,KAAK,EAAE,OAAO,CAAC,KAAK;gBACpB,QAAQ,EAAE,OAAO,CAAC,QAAQ;gBAC1B,QAAQ,EAAE,OAAO,CAAC,QAAQ;gBAC1B,QAAQ,EAAE,OAAO,CAAC,QAAQ;gBAC1B,SAAS,EAAE,OAAO,CAAC,SAAS;aAC7B,CAAC,CAAC;IACP,CAAC;AACH,CAAC"}
|
package/dist/core/version.js
CHANGED
|
@@ -1,2 +1,2 @@
|
|
|
1
|
-
export const APP_VERSION = "0.2.
|
|
1
|
+
export const APP_VERSION = "0.2.124";
|
|
2
2
|
//# sourceMappingURL=version.js.map
|
|
@@ -9,7 +9,7 @@ import { getAuthPath } from "../core/auth/auth_paths.js";
|
|
|
9
9
|
import { AuthStorage } from "../core/auth/auth_storage.js";
|
|
10
10
|
import { createCredentialResolver, } from "../core/auth/credential_resolver.js";
|
|
11
11
|
import { createCommandRegistry, getRiskLevelDescription, } from "../core/commands/index.js";
|
|
12
|
-
import { createDefaultConfigDeps, getMistralApiKey, loadRuntimeConfig, } from "../core/config/index.js";
|
|
12
|
+
import { createDefaultConfigDeps, getGoogleApiKey, getMistralApiKey, loadRuntimeConfig, } from "../core/config/index.js";
|
|
13
13
|
import { startDiffReviewSession as startCoreDiffReviewSession } from "../core/diff_review/index.js";
|
|
14
14
|
import { ChatRuntime } from "../core/runtime/chat_runtime.js";
|
|
15
15
|
import { createDefaultCoreDeps } from "../core/runtime/deps.js";
|
|
@@ -24,9 +24,9 @@ import { formatCwdChangeNotice, formatProjectContextChangeNotice, formatRiskLeve
|
|
|
24
24
|
import { formatAdaptiveNumber, formatCwd, formatPathForDisplay, formatTokenWindow, } from "../core/utils/format.js";
|
|
25
25
|
import { streamGeminiSpeechAudio } from "../core/utils/gemini_speech.js";
|
|
26
26
|
import { extractAllFencedCodeBlocks, extractAssistantText } from "../core/utils/messages.js";
|
|
27
|
-
import { transcribeMistralAudio } from "../core/utils/mistral_transcription.js";
|
|
28
27
|
import { streamModel } from "../core/utils/model_stream.js";
|
|
29
28
|
import { listProjectFilesAsync } from "../core/utils/project_files.js";
|
|
29
|
+
import { transcribeAudio } from "../core/utils/speech_to_text.js";
|
|
30
30
|
import { getAutoCompactionMetadataFromMessage, hasAutoCompactionContinuationMetadata, stripTauUserMetadata, } from "../core/utils/user_metadata.js";
|
|
31
31
|
import { APP_VERSION } from "../core/version.js";
|
|
32
32
|
import { DiffReviewService, } from "./chat_controller/diff_review_service.js";
|
|
@@ -1266,9 +1266,9 @@ export class ChatController {
|
|
|
1266
1266
|
this.view.addSystemMessage("/listen is currently supported only on macOS.", "warn");
|
|
1267
1267
|
return;
|
|
1268
1268
|
}
|
|
1269
|
-
const apiKey =
|
|
1269
|
+
const apiKey = this.getSpeechToTextApiKey();
|
|
1270
1270
|
if (!apiKey) {
|
|
1271
|
-
this.view.addSystemMessage("
|
|
1271
|
+
this.view.addSystemMessage(this.getSpeechToTextApiKeyErrorMessage("use /listen"), "error");
|
|
1272
1272
|
return;
|
|
1273
1273
|
}
|
|
1274
1274
|
let audioPath;
|
|
@@ -1433,12 +1433,29 @@ export class ChatController {
|
|
|
1433
1433
|
}
|
|
1434
1434
|
return path;
|
|
1435
1435
|
}
|
|
1436
|
+
getSpeechToTextProvider() {
|
|
1437
|
+
return this.config.speechToText?.provider ?? "mistral";
|
|
1438
|
+
}
|
|
1439
|
+
getSpeechToTextApiKey() {
|
|
1440
|
+
const provider = this.getSpeechToTextProvider();
|
|
1441
|
+
return provider === "gemini"
|
|
1442
|
+
? getGoogleApiKey(this.config, this.deps.env.env())
|
|
1443
|
+
: getMistralApiKey(this.config, this.deps.env.env());
|
|
1444
|
+
}
|
|
1445
|
+
getSpeechToTextApiKeyErrorMessage(action) {
|
|
1446
|
+
const provider = this.getSpeechToTextProvider();
|
|
1447
|
+
return provider === "gemini"
|
|
1448
|
+
? `set GEMINI_API_KEY or apiKeys.google to ${action}`
|
|
1449
|
+
: `set MISTRAL_API_KEY or apiKeys.mistral to ${action}`;
|
|
1450
|
+
}
|
|
1436
1451
|
async transcribeListenAudio(audio) {
|
|
1437
|
-
const
|
|
1452
|
+
const provider = this.getSpeechToTextProvider();
|
|
1453
|
+
const apiKey = this.getSpeechToTextApiKey();
|
|
1438
1454
|
if (!apiKey) {
|
|
1439
|
-
throw new Error("
|
|
1455
|
+
throw new Error(this.getSpeechToTextApiKeyErrorMessage("transcribe speech"));
|
|
1440
1456
|
}
|
|
1441
|
-
return await
|
|
1457
|
+
return await transcribeAudio({
|
|
1458
|
+
provider,
|
|
1442
1459
|
apiKey,
|
|
1443
1460
|
audio,
|
|
1444
1461
|
mimeType: "audio/wav",
|