careervivid 2.1.18 → 2.1.22
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/commands/agent/engineResolver.js +1 -1
- package/dist/commands/agent/index.d.ts.map +1 -1
- package/dist/commands/agent/index.js +8 -3
- package/dist/commands/agent/repl/engineLoop.d.ts +35 -0
- package/dist/commands/agent/repl/engineLoop.d.ts.map +1 -0
- package/dist/commands/agent/repl/engineLoop.js +168 -0
- package/dist/commands/agent/repl/input.d.ts +21 -0
- package/dist/commands/agent/repl/input.d.ts.map +1 -0
- package/dist/commands/agent/repl/input.js +78 -0
- package/dist/commands/agent/repl/slashCommands.d.ts +33 -0
- package/dist/commands/agent/repl/slashCommands.d.ts.map +1 -0
- package/dist/commands/agent/repl/slashCommands.js +193 -0
- package/dist/commands/agent/repl/toolHandlers.d.ts +33 -0
- package/dist/commands/agent/repl/toolHandlers.d.ts.map +1 -0
- package/dist/commands/agent/repl/toolHandlers.js +185 -0
- package/dist/commands/agent/repl.d.ts +10 -0
- package/dist/commands/agent/repl.d.ts.map +1 -1
- package/dist/commands/agent/repl.js +133 -609
- package/dist/lib/tts.d.ts +19 -9
- package/dist/lib/tts.d.ts.map +1 -1
- package/dist/lib/tts.js +129 -50
- package/package.json +1 -1
package/dist/lib/tts.d.ts
CHANGED
|
@@ -1,15 +1,20 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* tts.ts — Text-to-Speech engine for the CareerVivid REPL
|
|
3
3
|
*
|
|
4
|
-
*
|
|
5
|
-
*
|
|
6
|
-
* No separate GEMINI_API_KEY required.
|
|
4
|
+
* Auth: Uses the CareerVivid API key (cv_live_...) → cliGetInterviewToken
|
|
5
|
+
* → short-lived Gemini key. No GEMINI_API_KEY env var required.
|
|
7
6
|
*
|
|
8
|
-
*
|
|
9
|
-
*
|
|
7
|
+
* Chunking: Long text is split at sentence boundaries and synthesized
|
|
8
|
+
* sequentially, then played back-to-back for seamless audio.
|
|
9
|
+
*
|
|
10
|
+
* Retry: Gemini 3.1 TTS models occasionally return 500 errors;
|
|
11
|
+
* each chunk is retried up to 3 times with exponential back-off.
|
|
12
|
+
*
|
|
13
|
+
* Toggle: /voice on | off
|
|
14
|
+
* Replay: /speak
|
|
10
15
|
*/
|
|
11
|
-
export declare const AVAILABLE_VOICES: readonly ["Zephyr", "Puck", "Charon", "Kore", "Fenrir", "Aoede", "Orbit", "Stellar", "Leda", "Orus"];
|
|
12
|
-
export declare const AVAILABLE_TTS_MODELS: readonly ["gemini-3.1-flash-
|
|
16
|
+
export declare const AVAILABLE_VOICES: readonly ["Zephyr", "Puck", "Charon", "Kore", "Fenrir", "Aoede", "Orbit", "Stellar", "Leda", "Orus", "Autonoe", "Enceladus", "Iapetus", "Umbriel", "Algieba", "Despina", "Erinome", "Sulafat", "Schedar", "Vindemiatrix"];
|
|
17
|
+
export declare const AVAILABLE_TTS_MODELS: readonly ["gemini-3.1-flash-tts-preview", "gemini-2.5-flash-preview-tts", "gemini-2.5-pro-preview-tts"];
|
|
13
18
|
export declare function isVoiceEnabled(): boolean;
|
|
14
19
|
export declare function setVoiceEnabled(on: boolean): void;
|
|
15
20
|
export declare function setLastResponse(text: string): void;
|
|
@@ -20,8 +25,13 @@ export declare function getCurrentTtsModel(): string;
|
|
|
20
25
|
export declare function setCurrentTtsModel(m: string): void;
|
|
21
26
|
export declare function stopPlayback(): void;
|
|
22
27
|
/**
|
|
23
|
-
* Synthesizes `text` via Gemini TTS
|
|
24
|
-
*
|
|
28
|
+
* Synthesizes `text` via Gemini TTS.
|
|
29
|
+
* - Cleans markdown
|
|
30
|
+
* - Splits into sentence-boundary chunks
|
|
31
|
+
* - Synthesizes each chunk sequentially with retry
|
|
32
|
+
* - Concatenates all PCM data into one WAV and plays it
|
|
33
|
+
*
|
|
34
|
+
* Non-blocking: errors are silently swallowed so the REPL is never disrupted.
|
|
25
35
|
*/
|
|
26
36
|
export declare function speakText(text: string, _unusedKey?: string): Promise<void>;
|
|
27
37
|
//# sourceMappingURL=tts.d.ts.map
|
package/dist/lib/tts.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"tts.d.ts","sourceRoot":"","sources":["../../src/lib/tts.ts"],"names":[],"mappings":"AAAA
|
|
1
|
+
{"version":3,"file":"tts.d.ts","sourceRoot":"","sources":["../../src/lib/tts.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;GAcG;AAiBH,eAAO,MAAM,gBAAgB,2NAqBnB,CAAC;AAIX,eAAO,MAAM,oBAAoB,yGAIvB,CAAC;AAYX,wBAAgB,cAAc,YAA2B;AACzD,wBAAgB,eAAe,CAAC,EAAE,EAAE,OAAO,QAAwB;AACnE,wBAAgB,eAAe,CAAC,IAAI,EAAE,MAAM,QAA0B;AACtE,wBAAgB,eAAe,WAA2B;AAC1D,wBAAgB,eAAe,WAA2B;AAC1D,wBAAgB,eAAe,CAAC,CAAC,EAAE,MAAM,QAAuB;AAChE,wBAAgB,kBAAkB,WAA8B;AAChE,wBAAgB,kBAAkB,CAAC,CAAC,EAAE,MAAM,QAA0B;AA8BtE,wBAAgB,YAAY,SAK3B;AA6JD;;;;;;;;GAQG;AACH,wBAAsB,SAAS,CAAC,IAAI,EAAE,MAAM,EAAE,UAAU,CAAC,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC,CAgChF"}
|
package/dist/lib/tts.js
CHANGED
|
@@ -1,12 +1,17 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* tts.ts — Text-to-Speech engine for the CareerVivid REPL
|
|
3
3
|
*
|
|
4
|
-
*
|
|
5
|
-
*
|
|
6
|
-
* No separate GEMINI_API_KEY required.
|
|
4
|
+
* Auth: Uses the CareerVivid API key (cv_live_...) → cliGetInterviewToken
|
|
5
|
+
* → short-lived Gemini key. No GEMINI_API_KEY env var required.
|
|
7
6
|
*
|
|
8
|
-
*
|
|
9
|
-
*
|
|
7
|
+
* Chunking: Long text is split at sentence boundaries and synthesized
|
|
8
|
+
* sequentially, then played back-to-back for seamless audio.
|
|
9
|
+
*
|
|
10
|
+
* Retry: Gemini 3.1 TTS models occasionally return 500 errors;
|
|
11
|
+
* each chunk is retried up to 3 times with exponential back-off.
|
|
12
|
+
*
|
|
13
|
+
* Toggle: /voice on | off
|
|
14
|
+
* Replay: /speak
|
|
10
15
|
*/
|
|
11
16
|
import { writeFileSync, unlinkSync } from "fs";
|
|
12
17
|
import { spawn } from "child_process";
|
|
@@ -14,11 +19,12 @@ import { tmpdir } from "os";
|
|
|
14
19
|
import { join } from "path";
|
|
15
20
|
import { GoogleGenAI, Modality } from "@google/genai";
|
|
16
21
|
import { getApiKey } from "../config.js";
|
|
17
|
-
// ── Backend endpoint
|
|
22
|
+
// ── Backend endpoint ───────────────────────────────────────────────────────────
|
|
18
23
|
const TTS_TOKEN_URL = process.env.CV_FUNCTIONS_URL
|
|
19
24
|
? `${process.env.CV_FUNCTIONS_URL}/cliGetInterviewToken`
|
|
20
25
|
: "https://us-west1-jastalk-firebase.cloudfunctions.net/cliGetInterviewToken";
|
|
21
|
-
// ── Available options
|
|
26
|
+
// ── Available options ──────────────────────────────────────────────────────────
|
|
27
|
+
// All 30 Gemini TTS voices (identical across 2.5 and 3.1 model families)
|
|
22
28
|
export const AVAILABLE_VOICES = [
|
|
23
29
|
"Zephyr", // Bright, energetic
|
|
24
30
|
"Puck", // Upbeat, playful
|
|
@@ -30,20 +36,31 @@ export const AVAILABLE_VOICES = [
|
|
|
30
36
|
"Stellar", // Smooth, polished
|
|
31
37
|
"Leda", // Warm, natural
|
|
32
38
|
"Orus", // Confident, authoritative
|
|
39
|
+
"Autonoe", // Gentle, clear
|
|
40
|
+
"Enceladus", // Breathable, expressive
|
|
41
|
+
"Iapetus", // Deep, resonant
|
|
42
|
+
"Umbriel", // Calm, deliberate
|
|
43
|
+
"Algieba", // Rich, warm
|
|
44
|
+
"Despina", // Light, airy
|
|
45
|
+
"Erinome", // Crisp, articulate
|
|
46
|
+
"Sulafat", // Smooth, soothing
|
|
47
|
+
"Schedar", // Authoritative, clear
|
|
48
|
+
"Vindemiatrix", // Expressive, fluid
|
|
33
49
|
];
|
|
50
|
+
// Correct model IDs verified against official Gemini API docs (Apr 2026)
|
|
51
|
+
// Pattern: gemini-{version}-{variant}-tts-preview (NOT -preview-tts)
|
|
34
52
|
export const AVAILABLE_TTS_MODELS = [
|
|
35
|
-
"gemini-3.1-flash-preview
|
|
36
|
-
"gemini-3.1-pro-preview-tts", // Latest, highest quality
|
|
53
|
+
"gemini-3.1-flash-tts-preview", // Latest, fast (default) ✓ CORRECT ID
|
|
37
54
|
"gemini-2.5-flash-preview-tts", // Previous gen, fast
|
|
38
55
|
"gemini-2.5-pro-preview-tts", // Previous gen, high quality
|
|
39
56
|
];
|
|
40
|
-
// ── State
|
|
57
|
+
// ── State ──────────────────────────────────────────────────────────────────────
|
|
41
58
|
let voiceEnabled = false;
|
|
42
59
|
let lastResponse = "";
|
|
43
60
|
let playbackProcess = null;
|
|
44
61
|
let currentVoice = "Zephyr";
|
|
45
|
-
let currentTtsModel = "gemini-3.1-flash-preview
|
|
46
|
-
//
|
|
62
|
+
let currentTtsModel = "gemini-3.1-flash-tts-preview";
|
|
63
|
+
// Session-cached Gemini key — only fetched once per session
|
|
47
64
|
let cachedGeminiKey = null;
|
|
48
65
|
export function isVoiceEnabled() { return voiceEnabled; }
|
|
49
66
|
export function setVoiceEnabled(on) { voiceEnabled = on; }
|
|
@@ -53,7 +70,7 @@ export function getCurrentVoice() { return currentVoice; }
|
|
|
53
70
|
export function setCurrentVoice(v) { currentVoice = v; }
|
|
54
71
|
export function getCurrentTtsModel() { return currentTtsModel; }
|
|
55
72
|
export function setCurrentTtsModel(m) { currentTtsModel = m; }
|
|
56
|
-
// ── Gemini key via CV API key
|
|
73
|
+
// ── Gemini key via CV API key ──────────────────────────────────────────────────
|
|
57
74
|
async function fetchGeminiKey() {
|
|
58
75
|
if (cachedGeminiKey)
|
|
59
76
|
return cachedGeminiKey;
|
|
@@ -79,7 +96,7 @@ async function fetchGeminiKey() {
|
|
|
79
96
|
}
|
|
80
97
|
return null;
|
|
81
98
|
}
|
|
82
|
-
// ── Audio Playback
|
|
99
|
+
// ── Audio Playback ─────────────────────────────────────────────────────────────
|
|
83
100
|
export function stopPlayback() {
|
|
84
101
|
if (playbackProcess && !playbackProcess.killed) {
|
|
85
102
|
playbackProcess.kill("SIGKILL");
|
|
@@ -122,10 +139,10 @@ function playWav(wavBuffer) {
|
|
|
122
139
|
catch { /* ignore */ }
|
|
123
140
|
});
|
|
124
141
|
}
|
|
125
|
-
// ── WAV Builder
|
|
142
|
+
// ── WAV Builder ────────────────────────────────────────────────────────────────
|
|
126
143
|
function buildWavHeader(dataLength, sampleRate = 24000, channels = 1, bitsPerSample = 16) {
|
|
127
|
-
const byteRate = sampleRate * channels * bitsPerSample / 8;
|
|
128
|
-
const blockAlign = channels * bitsPerSample / 8;
|
|
144
|
+
const byteRate = (sampleRate * channels * bitsPerSample) / 8;
|
|
145
|
+
const blockAlign = (channels * bitsPerSample) / 8;
|
|
129
146
|
const header = Buffer.alloc(44);
|
|
130
147
|
header.write("RIFF", 0);
|
|
131
148
|
header.writeUInt32LE(36 + dataLength, 4);
|
|
@@ -142,59 +159,121 @@ function buildWavHeader(dataLength, sampleRate = 24000, channels = 1, bitsPerSam
|
|
|
142
159
|
header.writeUInt32LE(dataLength, 40);
|
|
143
160
|
return header;
|
|
144
161
|
}
|
|
145
|
-
// ──
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
return; // No key available — silently skip
|
|
156
|
-
// Strip markdown for natural-sounding speech
|
|
157
|
-
const cleaned = text
|
|
158
|
-
.replace(/```[\s\S]*?```/g, "")
|
|
159
|
-
.replace(/`[^`]+`/g, "")
|
|
160
|
-
.replace(/\*\*(.*?)\*\*/g, "$1")
|
|
161
|
-
.replace(/\*(.*?)\*/g, "$1")
|
|
162
|
-
.replace(/^[#>•\-*]\s*/gm, "")
|
|
162
|
+
// ── Text Cleaning ──────────────────────────────────────────────────────────────
|
|
163
|
+
function cleanForSpeech(text) {
|
|
164
|
+
return text
|
|
165
|
+
.replace(/```[\s\S]*?```/g, "") // strip code blocks
|
|
166
|
+
.replace(/`[^`]+`/g, "") // strip inline code
|
|
167
|
+
.replace(/\*\*(.*?)\*\*/g, "$1") // bold → plain
|
|
168
|
+
.replace(/\*(.*?)\*/g, "$1") // italic → plain
|
|
169
|
+
.replace(/^#{1,6}\s+/gm, "") // headings → plain
|
|
170
|
+
.replace(/^[>•\-*]\s*/gm, "") // bullets/blockquotes
|
|
171
|
+
.replace(/\[([^\]]+)\]\([^)]+\)/g, "$1") // links → text only
|
|
163
172
|
.replace(/\s+/g, " ")
|
|
164
|
-
.trim()
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
173
|
+
.trim();
|
|
174
|
+
}
|
|
175
|
+
// ── Sentence Chunker ───────────────────────────────────────────────────────────
|
|
176
|
+
// Splits at sentence boundaries (. ! ?) respecting ~800 char soft limit
|
|
177
|
+
// to stay well within the 32k token context window and avoid quality drift.
|
|
178
|
+
const CHUNK_SIZE = 800; // characters
|
|
179
|
+
function splitIntoChunks(text) {
|
|
180
|
+
if (text.length <= CHUNK_SIZE)
|
|
181
|
+
return [text];
|
|
182
|
+
const chunks = [];
|
|
183
|
+
// Split on sentence-ending punctuation, keeping the delimiter
|
|
184
|
+
const sentences = text.match(/[^.!?]+[.!?]+(?:\s|$)|[^.!?]+$/g) ?? [text];
|
|
185
|
+
let current = "";
|
|
186
|
+
for (const sentence of sentences) {
|
|
187
|
+
if ((current + sentence).length > CHUNK_SIZE && current.length > 0) {
|
|
188
|
+
chunks.push(current.trim());
|
|
189
|
+
current = sentence;
|
|
190
|
+
}
|
|
191
|
+
else {
|
|
192
|
+
current += sentence;
|
|
193
|
+
}
|
|
194
|
+
}
|
|
195
|
+
if (current.trim())
|
|
196
|
+
chunks.push(current.trim());
|
|
197
|
+
return chunks.filter(c => c.length > 0);
|
|
198
|
+
}
|
|
199
|
+
// ── Single-chunk Synthesis (with retry) ───────────────────────────────────────
|
|
200
|
+
const MAX_RETRIES = 3;
|
|
201
|
+
const RETRY_DELAY_MS = 800;
|
|
202
|
+
async function synthesizeChunk(ai, text, voice, model, attempt = 0) {
|
|
168
203
|
try {
|
|
169
|
-
const ai = new GoogleGenAI({ apiKey: geminiKey });
|
|
170
204
|
const response = await ai.models.generateContent({
|
|
171
|
-
model
|
|
172
|
-
contents: [{ parts: [{ text
|
|
205
|
+
model,
|
|
206
|
+
contents: [{ parts: [{ text }] }],
|
|
173
207
|
config: {
|
|
174
208
|
responseModalities: [Modality.AUDIO],
|
|
175
209
|
speechConfig: {
|
|
176
210
|
voiceConfig: {
|
|
177
|
-
prebuiltVoiceConfig: { voiceName:
|
|
211
|
+
prebuiltVoiceConfig: { voiceName: voice },
|
|
178
212
|
},
|
|
179
213
|
},
|
|
180
214
|
},
|
|
181
215
|
});
|
|
182
216
|
const parts = response?.candidates?.[0]?.content?.parts ?? [];
|
|
183
|
-
const
|
|
217
|
+
const pcmParts = [];
|
|
184
218
|
for (const part of parts) {
|
|
185
219
|
if (part.inlineData?.data) {
|
|
186
|
-
|
|
220
|
+
pcmParts.push(Buffer.from(part.inlineData.data, "base64"));
|
|
187
221
|
}
|
|
188
222
|
}
|
|
189
|
-
if (
|
|
223
|
+
if (pcmParts.length === 0)
|
|
224
|
+
return null;
|
|
225
|
+
return Buffer.concat(pcmParts);
|
|
226
|
+
}
|
|
227
|
+
catch (err) {
|
|
228
|
+
// Gemini 3.1 TTS can 500 on random requests — retry with back-off
|
|
229
|
+
const isRetryable = err?.status === 500 ||
|
|
230
|
+
String(err?.message ?? "").includes("500") ||
|
|
231
|
+
String(err?.message ?? "").includes("INTERNAL");
|
|
232
|
+
if (isRetryable && attempt < MAX_RETRIES) {
|
|
233
|
+
await new Promise(r => setTimeout(r, RETRY_DELAY_MS * (attempt + 1)));
|
|
234
|
+
return synthesizeChunk(ai, text, voice, model, attempt + 1);
|
|
235
|
+
}
|
|
236
|
+
return null;
|
|
237
|
+
}
|
|
238
|
+
}
|
|
239
|
+
// ── Public TTS Entry Point ─────────────────────────────────────────────────────
|
|
240
|
+
/**
|
|
241
|
+
* Synthesizes `text` via Gemini TTS.
|
|
242
|
+
* - Cleans markdown
|
|
243
|
+
* - Splits into sentence-boundary chunks
|
|
244
|
+
* - Synthesizes each chunk sequentially with retry
|
|
245
|
+
* - Concatenates all PCM data into one WAV and plays it
|
|
246
|
+
*
|
|
247
|
+
* Non-blocking: errors are silently swallowed so the REPL is never disrupted.
|
|
248
|
+
*/
|
|
249
|
+
export async function speakText(text, _unusedKey) {
|
|
250
|
+
if (!text.trim())
|
|
251
|
+
return;
|
|
252
|
+
const geminiKey = await fetchGeminiKey();
|
|
253
|
+
if (!geminiKey)
|
|
254
|
+
return;
|
|
255
|
+
const cleaned = cleanForSpeech(text);
|
|
256
|
+
if (!cleaned)
|
|
257
|
+
return;
|
|
258
|
+
const chunks = splitIntoChunks(cleaned);
|
|
259
|
+
const voice = currentVoice;
|
|
260
|
+
const model = currentTtsModel;
|
|
261
|
+
try {
|
|
262
|
+
const ai = new GoogleGenAI({ apiKey: geminiKey });
|
|
263
|
+
const pcmBuffers = [];
|
|
264
|
+
for (const chunk of chunks) {
|
|
265
|
+
const pcm = await synthesizeChunk(ai, chunk, voice, model);
|
|
266
|
+
if (pcm)
|
|
267
|
+
pcmBuffers.push(pcm);
|
|
268
|
+
}
|
|
269
|
+
if (pcmBuffers.length === 0)
|
|
190
270
|
return;
|
|
191
|
-
const
|
|
192
|
-
const wavBuffer = Buffer.concat([buildWavHeader(
|
|
271
|
+
const allPcm = Buffer.concat(pcmBuffers);
|
|
272
|
+
const wavBuffer = Buffer.concat([buildWavHeader(allPcm.length), allPcm]);
|
|
193
273
|
playWav(wavBuffer);
|
|
194
274
|
}
|
|
195
275
|
catch {
|
|
196
|
-
//
|
|
197
|
-
// Invalidate cached key so we retry fetching on the next call
|
|
276
|
+
// Never crash the REPL — invalidate key so next call re-fetches
|
|
198
277
|
cachedGeminiKey = null;
|
|
199
278
|
}
|
|
200
279
|
}
|
package/package.json
CHANGED