@markusylisiurunen/tau 0.2.123 → 0.2.124

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,136 @@
1
+ import { z } from "zod";
2
+ const GEMINI_GENERATE_CONTENT_BASE_URL = "https://generativelanguage.googleapis.com/v1beta/models";
3
+ const DEFAULT_GEMINI_TRANSCRIPTION_MODEL = "gemini-3.5-flash";
4
+ const DEFAULT_GEMINI_TRANSCRIPTION_THINKING_LEVEL = "minimal";
5
+ const DEFAULT_GEMINI_AUDIO_MIME_TYPE = "audio/wav";
6
+ const errorPayloadSchema = z.object({
7
+ error: z
8
+ .object({
9
+ message: z.string().trim().min(1).optional(),
10
+ status: z.string().trim().min(1).optional(),
11
+ code: z.number().int().optional(),
12
+ })
13
+ .optional(),
14
+ });
15
+ const GEMINI_TRANSCRIPTION_RESPONSE_SCHEMA = {
16
+ type: "OBJECT",
17
+ properties: {
18
+ transcription: { type: "STRING" },
19
+ },
20
+ required: ["transcription"],
21
+ };
22
+ const textPartSchema = z.object({ text: z.string() });
23
+ const apiResponseSchema = z.object({
24
+ candidates: z
25
+ .array(z.object({
26
+ content: z.object({
27
+ parts: z.array(z.unknown()).optional(),
28
+ }),
29
+ }))
30
+ .optional(),
31
+ });
32
+ const transcriptionResultSchema = z.object({
33
+ transcription: z.string(),
34
+ });
35
+ export async function transcribeGeminiAudio(options) {
36
+ const apiKey = options.apiKey.trim();
37
+ if (!apiKey) {
38
+ throw new Error("missing Gemini API key");
39
+ }
40
+ const fetchFn = options.fetchImpl ?? fetch;
41
+ const model = options.model ?? DEFAULT_GEMINI_TRANSCRIPTION_MODEL;
42
+ const response = await fetchFn(`${GEMINI_GENERATE_CONTENT_BASE_URL}/${encodeURIComponent(model)}:generateContent`, {
43
+ method: "POST",
44
+ headers: {
45
+ "Content-Type": "application/json",
46
+ "x-goog-api-key": apiKey,
47
+ },
48
+ body: JSON.stringify({
49
+ systemInstruction: {
50
+ parts: [
51
+ {
52
+ text: buildTranscriptionSystemInstruction(),
53
+ },
54
+ ],
55
+ },
56
+ contents: [
57
+ {
58
+ parts: [
59
+ {
60
+ text: buildTranscriptionPrompt(),
61
+ },
62
+ {
63
+ inlineData: {
64
+ mimeType: options.mimeType ?? DEFAULT_GEMINI_AUDIO_MIME_TYPE,
65
+ data: options.audio.toString("base64"),
66
+ },
67
+ },
68
+ ],
69
+ },
70
+ ],
71
+ generationConfig: {
72
+ responseMimeType: "application/json",
73
+ responseSchema: GEMINI_TRANSCRIPTION_RESPONSE_SCHEMA,
74
+ thinkingConfig: {
75
+ thinkingLevel: DEFAULT_GEMINI_TRANSCRIPTION_THINKING_LEVEL,
76
+ },
77
+ },
78
+ }),
79
+ });
80
+ const responseText = await response.text();
81
+ let payload;
82
+ try {
83
+ payload = responseText ? JSON.parse(responseText) : undefined;
84
+ }
85
+ catch {
86
+ payload = undefined;
87
+ }
88
+ if (!response.ok) {
89
+ const parsed = errorPayloadSchema.safeParse(payload);
90
+ const fallbackMessage = responseText.trim() || `HTTP ${response.status}`;
91
+ throw new Error(parsed.success ? (parsed.data.error?.message ?? fallbackMessage) : fallbackMessage);
92
+ }
93
+ return extractGeminiText(payload).trim();
94
+ }
95
+ function buildTranscriptionSystemInstruction() {
96
+ return [
97
+ "You are a speech-to-text engine.",
98
+ "Transcribe the speaker's intended message for insertion into a chat input.",
99
+ "Detect the speaker's language and transcribe in that same language; never translate unless the speaker explicitly asks for translation.",
100
+ "Preserve the speaker's wording and register as spoken, including colloquial forms, dialect, and informal language; do not normalize informal speech into formal standard language.",
101
+ "Use natural punctuation and capitalization where helpful, without changing the speaker's wording or register.",
102
+ "Lightly clean only speech artifacts that do not affect meaning, such as filler words, repeated stutters, obvious false starts, and unintelligible mumbling.",
103
+ "Do not rewrite, paraphrase, summarize, answer the speaker, add labels, add timestamps, or describe background sounds.",
104
+ ].join("\n");
105
+ }
106
+ function buildTranscriptionPrompt() {
107
+ return [
108
+ "Transcribe the attached audio into the transcription field.",
109
+ "Return only the lightly cleaned transcript text, with no timestamps or commentary.",
110
+ ].join("\n");
111
+ }
112
+ function extractGeminiText(payload) {
113
+ const parsed = apiResponseSchema.safeParse(payload);
114
+ if (!parsed.success) {
115
+ return "";
116
+ }
117
+ const responseText = (parsed.data.candidates?.[0]?.content.parts ?? [])
118
+ .map((part) => {
119
+ const parsedPart = textPartSchema.safeParse(part);
120
+ return parsedPart.success ? parsedPart.data.text : "";
121
+ })
122
+ .join("");
123
+ let transcriptionPayload;
124
+ try {
125
+ transcriptionPayload = responseText ? JSON.parse(responseText) : undefined;
126
+ }
127
+ catch {
128
+ return "";
129
+ }
130
+ const transcription = transcriptionResultSchema.safeParse(transcriptionPayload);
131
+ if (!transcription.success) {
132
+ return "";
133
+ }
134
+ return transcription.data.transcription;
135
+ }
136
+ //# sourceMappingURL=gemini_transcription.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"gemini_transcription.js","sourceRoot":"","sources":["../../../src/core/utils/gemini_transcription.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AAExB,MAAM,gCAAgC,GAAG,yDAAyD,CAAC;AACnG,MAAM,kCAAkC,GAAG,kBAAkB,CAAC;AAC9D,MAAM,2CAA2C,GAAG,SAAS,CAAC;AAC9D,MAAM,8BAA8B,GAAG,WAAW,CAAC;AAEnD,MAAM,kBAAkB,GAAG,CAAC,CAAC,MAAM,CAAC;IAClC,KAAK,EAAE,CAAC;SACL,MAAM,CAAC;QACN,OAAO,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,IAAI,EAAE,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,QAAQ,EAAE;QAC5C,MAAM,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,IAAI,EAAE,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,QAAQ,EAAE;QAC3C,IAAI,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,GAAG,EAAE,CAAC,QAAQ,EAAE;KAClC,CAAC;SACD,QAAQ,EAAE;CACd,CAAC,CAAC;AAEH,MAAM,oCAAoC,GAAG;IAC3C,IAAI,EAAE,QAAQ;IACd,UAAU,EAAE;QACV,aAAa,EAAE,EAAE,IAAI,EAAE,QAAQ,EAAE;KAClC;IACD,QAAQ,EAAE,CAAC,eAAe,CAAC;CAC5B,CAAC;AAEF,MAAM,cAAc,GAAG,CAAC,CAAC,MAAM,CAAC,EAAE,IAAI,EAAE,CAAC,CAAC,MAAM,EAAE,EAAE,CAAC,CAAC;AACtD,MAAM,iBAAiB,GAAG,CAAC,CAAC,MAAM,CAAC;IACjC,UAAU,EAAE,CAAC;SACV,KAAK,CACJ,CAAC,CAAC,MAAM,CAAC;QACP,OAAO,EAAE,CAAC,CAAC,MAAM,CAAC;YAChB,KAAK,EAAE,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,OAAO,EAAE,CAAC,CAAC,QAAQ,EAAE;SACvC,CAAC;KACH,CAAC,CACH;SACA,QAAQ,EAAE;CACd,CAAC,CAAC;AACH,MAAM,yBAAyB,GAAG,CAAC,CAAC,MAAM,CAAC;IACzC,aAAa,EAAE,CAAC,CAAC,MAAM,EAAE;CAC1B,CAAC,CAAC;AAUH,MAAM,CAAC,KAAK,UAAU,qBAAqB,CAAC,OAAmC;IAC7E,MAAM,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC,IAAI,EAAE,CAAC;IACrC,IAAI,CAAC,MAAM,EAAE,CAAC;QACZ,MAAM,IAAI,KAAK,CAAC,wBAAwB,CAAC,CAAC;IAC5C,CAAC;IAED,MAAM,OAAO,GAAG,OAAO,CAAC,SAAS,IAAI,KAAK,CAAC;IAC3C,MAAM,KAAK,GAAG,OAAO,CAAC,KAAK,IAAI,kCAAkC,CAAC;IAClE,MAAM,QAAQ,GAAG,MAAM,OAAO,CAC5B,GAAG,gCAAgC,IAAI,kBAAkB,CAAC,KAAK,CAAC,kBAAkB,EAClF;QACE,MAAM,EAAE,MAAM;QACd,OAAO,EAAE;YACP,cAAc,EAAE,kBAAkB;YAClC,gBAAgB,EAAE,MAAM;SACzB;QACD,IAAI,EAAE,IAAI,CAAC,SAAS,CAAC;YACnB,iBAAiB,EAAE;gBACjB,KAAK,EAAE;oBACL;wBACE,IAAI,EAAE,mCAAmC,EAAE;qBAC5C;iBACF;aACF;YACD,QAAQ,EAAE;gBACR;oBACE,KAAK,EAAE;wBACL;4BACE,IAAI,EAAE,wBAAwB,EAAE;yBACjC;wBACD;4BACE,UAAU,EAAE;gCACV,QAAQ,EAAE,OAAO,CAAC,QAAQ,IAAI,8BAA8B;gCAC5D,IAAI,EAAE,OAAO,CAAC,KAAK,CAAC,QAAQ,CAAC,QAAQ,CAAC;6BACvC;yBACF;qBACF;iBACF;aACF;YACD,gBAAgB,EAAE;gBAChB,gBAAgB,EAAE,kBAAkB;gBACpC,cAAc,EAAE,oCAAoC;gBACpD,cAAc,EAAE;oBACd,aAAa,EAAE,2CAA2C;iBAC3D;aACF;SACF,CAAC;KACH,CACF,CAAC;IAEF,MAAM,YAAY,GAAG,MAAM,QAAQ,CAAC,IAAI,EAAE,CAAC;IAC3C,IAAI,OAAgB,CAAC;IACrB,IAAI,CAAC;QACH,OAAO,GAAG,YAAY,CAAC,CAAC,CAAE,IAAI,CAAC,KAAK,CAAC,YAAY,CAAa,CAAC,CAAC,CAAC,SAAS,CAAC;IAC7E,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,GAAG,SAAS,CAAC;IACtB,CAAC;IAED,IAAI,CAAC,QAAQ,CAAC,EAAE,EAAE,CAAC;QACjB,MAAM,MAAM,GAAG,kBAAkB,CAAC,SAAS,CAAC,OAAO,CAAC,CAAC;QACrD,MAAM,eAAe,GAAG,YAAY,CAAC,IAAI,EAAE,IAAI,QAAQ,QAAQ,CAAC,MAAM,EAAE,CAAC;QACzE,MAAM,IAAI,KAAK,CACb,MAAM,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,IAAI,CAAC,KAAK,EAAE,OAAO,IAAI,eAAe,CAAC,CAAC,CAAC,CAAC,eAAe,CACnF,CAAC;IACJ,CAAC;IAED,OAAO,iBAAiB,CAAC,OAAO,CAAC,CAAC,IAAI,EAAE,CAAC;AAC3C,CAAC;AAED,SAAS,mCAAmC;IAC1C,OAAO;QACL,kCAAkC;QAClC,4EAA4E;QAC5E,yIAAyI;QACzI,oLAAoL;QACpL,+GAA+G;QAC/G,6JAA6J;QAC7J,uHAAuH;KACxH,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;AACf,CAAC;AAED,SAAS,wBAAwB;IAC/B,OAAO;QACL,6DAA6D;QAC7D,oFAAoF;KACrF,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;AACf,CAAC;AAED,SAAS,iBAAiB,CAAC,OAAgB;IACzC,MAAM,MAAM,GAAG,iBAAiB,CAAC,SAAS,CAAC,OAAO,CAAC,CAAC;IACpD,IAAI,CAAC,MAAM,CAAC,OAAO,EAAE,CAAC;QACpB,OAAO,EAAE,CAAC;IACZ,CAAC;IAED,MAAM,YAAY,GAAG,CAAC,MAAM,CAAC,IAAI,CAAC,UAAU,EAAE,CAAC,CAAC,CAAC,EAAE,OAAO,CAAC,KAAK,IAAI,EAAE,CAAC;SACpE,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE;QACZ,MAAM,UAAU,GAAG,cAAc,CAAC,SAAS,CAAC,IAAI,CAAC,CAAC;QAClD,OAAO,UAAU,CAAC,OAAO,CAAC,CAAC,CAAC,UAAU,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC;IACxD,CAAC,CAAC;SACD,IAAI,CAAC,EAAE,CAAC,CAAC;IAEZ,IAAI,oBAA6B,CAAC;IAClC,IAAI,CAAC;QACH,oBAAoB,GAAG,YAAY,CAAC,CAAC,CAAE,IAAI,CAAC,KAAK,CAAC,YAAY,CAAa,CAAC,CAAC,CAAC,SAAS,CAAC;IAC1F,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,EAAE,CAAC;IACZ,CAAC;IAED,MAAM,aAAa,GAAG,yBAAyB,CAAC,SAAS,CAAC,oBAAoB,CAAC,CAAC;IAChF,IAAI,CAAC,aAAa,CAAC,OAAO,EAAE,CAAC;QAC3B,OAAO,EAAE,CAAC;IACZ,CAAC;IAED,OAAO,aAAa,CAAC,IAAI,CAAC,aAAa,CAAC;AAC1C,CAAC"}
@@ -0,0 +1,23 @@
1
+ import { transcribeGeminiAudio } from "./gemini_transcription.js";
2
+ import { transcribeMistralAudio } from "./mistral_transcription.js";
3
+ export async function transcribeAudio(options) {
4
+ switch (options.provider) {
5
+ case "gemini":
6
+ return await transcribeGeminiAudio({
7
+ apiKey: options.apiKey,
8
+ audio: options.audio,
9
+ mimeType: options.mimeType,
10
+ fetchImpl: options.fetchImpl,
11
+ });
12
+ case "mistral":
13
+ return await transcribeMistralAudio({
14
+ apiKey: options.apiKey,
15
+ audio: options.audio,
16
+ mimeType: options.mimeType,
17
+ fileName: options.fileName,
18
+ language: options.language,
19
+ fetchImpl: options.fetchImpl,
20
+ });
21
+ }
22
+ }
23
+ //# sourceMappingURL=speech_to_text.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"speech_to_text.js","sourceRoot":"","sources":["../../../src/core/utils/speech_to_text.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,qBAAqB,EAAE,MAAM,2BAA2B,CAAC;AAClE,OAAO,EAAE,sBAAsB,EAAE,MAAM,4BAA4B,CAAC;AAYpE,MAAM,CAAC,KAAK,UAAU,eAAe,CAAC,OAA4B;IAChE,QAAQ,OAAO,CAAC,QAAQ,EAAE,CAAC;QACzB,KAAK,QAAQ;YACX,OAAO,MAAM,qBAAqB,CAAC;gBACjC,MAAM,EAAE,OAAO,CAAC,MAAM;gBACtB,KAAK,EAAE,OAAO,CAAC,KAAK;gBACpB,QAAQ,EAAE,OAAO,CAAC,QAAQ;gBAC1B,SAAS,EAAE,OAAO,CAAC,SAAS;aAC7B,CAAC,CAAC;QACL,KAAK,SAAS;YACZ,OAAO,MAAM,sBAAsB,CAAC;gBAClC,MAAM,EAAE,OAAO,CAAC,MAAM;gBACtB,KAAK,EAAE,OAAO,CAAC,KAAK;gBACpB,QAAQ,EAAE,OAAO,CAAC,QAAQ;gBAC1B,QAAQ,EAAE,OAAO,CAAC,QAAQ;gBAC1B,QAAQ,EAAE,OAAO,CAAC,QAAQ;gBAC1B,SAAS,EAAE,OAAO,CAAC,SAAS;aAC7B,CAAC,CAAC;IACP,CAAC;AACH,CAAC"}
@@ -1,2 +1,2 @@
1
- export const APP_VERSION = "0.2.123";
1
+ export const APP_VERSION = "0.2.124";
2
2
  //# sourceMappingURL=version.js.map
@@ -9,7 +9,7 @@ import { getAuthPath } from "../core/auth/auth_paths.js";
9
9
  import { AuthStorage } from "../core/auth/auth_storage.js";
10
10
  import { createCredentialResolver, } from "../core/auth/credential_resolver.js";
11
11
  import { createCommandRegistry, getRiskLevelDescription, } from "../core/commands/index.js";
12
- import { createDefaultConfigDeps, getMistralApiKey, loadRuntimeConfig, } from "../core/config/index.js";
12
+ import { createDefaultConfigDeps, getGoogleApiKey, getMistralApiKey, loadRuntimeConfig, } from "../core/config/index.js";
13
13
  import { startDiffReviewSession as startCoreDiffReviewSession } from "../core/diff_review/index.js";
14
14
  import { ChatRuntime } from "../core/runtime/chat_runtime.js";
15
15
  import { createDefaultCoreDeps } from "../core/runtime/deps.js";
@@ -24,9 +24,9 @@ import { formatCwdChangeNotice, formatProjectContextChangeNotice, formatRiskLeve
24
24
  import { formatAdaptiveNumber, formatCwd, formatPathForDisplay, formatTokenWindow, } from "../core/utils/format.js";
25
25
  import { streamGeminiSpeechAudio } from "../core/utils/gemini_speech.js";
26
26
  import { extractAllFencedCodeBlocks, extractAssistantText } from "../core/utils/messages.js";
27
- import { transcribeMistralAudio } from "../core/utils/mistral_transcription.js";
28
27
  import { streamModel } from "../core/utils/model_stream.js";
29
28
  import { listProjectFilesAsync } from "../core/utils/project_files.js";
29
+ import { transcribeAudio } from "../core/utils/speech_to_text.js";
30
30
  import { getAutoCompactionMetadataFromMessage, hasAutoCompactionContinuationMetadata, stripTauUserMetadata, } from "../core/utils/user_metadata.js";
31
31
  import { APP_VERSION } from "../core/version.js";
32
32
  import { DiffReviewService, } from "./chat_controller/diff_review_service.js";
@@ -1266,9 +1266,9 @@ export class ChatController {
1266
1266
  this.view.addSystemMessage("/listen is currently supported only on macOS.", "warn");
1267
1267
  return;
1268
1268
  }
1269
- const apiKey = getMistralApiKey(this.config, this.deps.env.env());
1269
+ const apiKey = this.getSpeechToTextApiKey();
1270
1270
  if (!apiKey) {
1271
- this.view.addSystemMessage("set MISTRAL_API_KEY or apiKeys.mistral to use /listen", "error");
1271
+ this.view.addSystemMessage(this.getSpeechToTextApiKeyErrorMessage("use /listen"), "error");
1272
1272
  return;
1273
1273
  }
1274
1274
  let audioPath;
@@ -1433,12 +1433,29 @@ export class ChatController {
1433
1433
  }
1434
1434
  return path;
1435
1435
  }
1436
+ getSpeechToTextProvider() {
1437
+ return this.config.speechToText?.provider ?? "mistral";
1438
+ }
1439
+ getSpeechToTextApiKey() {
1440
+ const provider = this.getSpeechToTextProvider();
1441
+ return provider === "gemini"
1442
+ ? getGoogleApiKey(this.config, this.deps.env.env())
1443
+ : getMistralApiKey(this.config, this.deps.env.env());
1444
+ }
1445
+ getSpeechToTextApiKeyErrorMessage(action) {
1446
+ const provider = this.getSpeechToTextProvider();
1447
+ return provider === "gemini"
1448
+ ? `set GEMINI_API_KEY or apiKeys.google to ${action}`
1449
+ : `set MISTRAL_API_KEY or apiKeys.mistral to ${action}`;
1450
+ }
1436
1451
  async transcribeListenAudio(audio) {
1437
- const apiKey = getMistralApiKey(this.config, this.deps.env.env());
1452
+ const provider = this.getSpeechToTextProvider();
1453
+ const apiKey = this.getSpeechToTextApiKey();
1438
1454
  if (!apiKey) {
1439
- throw new Error("missing MISTRAL_API_KEY or apiKeys.mistral");
1455
+ throw new Error(this.getSpeechToTextApiKeyErrorMessage("transcribe speech"));
1440
1456
  }
1441
- return await transcribeMistralAudio({
1457
+ return await transcribeAudio({
1458
+ provider,
1442
1459
  apiKey,
1443
1460
  audio,
1444
1461
  mimeType: "audio/wav",