careervivid 2.1.18 → 2.1.22

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/lib/tts.d.ts CHANGED
@@ -1,15 +1,20 @@
1
1
  /**
2
2
  * tts.ts — Text-to-Speech engine for the CareerVivid REPL
3
3
  *
4
- * Authenticates using the user's CareerVivid API key (cv_live_...) to fetch
5
- * a short-lived Gemini key from the backend exactly like `cv interview`.
6
- * No separate GEMINI_API_KEY required.
4
+ * Auth: Uses the CareerVivid API key (cv_live_...) cliGetInterviewToken
5
+ * short-lived Gemini key. No GEMINI_API_KEY env var required.
7
6
  *
8
- * Toggle: /voice on | /voice off
9
- * Replay: /speak
7
+ * Chunking: Long text is split at sentence boundaries and synthesized
8
+ * sequentially, then played back-to-back for seamless audio.
9
+ *
10
+ * Retry: Gemini 3.1 TTS models occasionally return 500 errors;
11
+ * each chunk is retried up to 3 times with exponential back-off.
12
+ *
13
+ * Toggle: /voice on | off
14
+ * Replay: /speak
10
15
  */
11
- export declare const AVAILABLE_VOICES: readonly ["Zephyr", "Puck", "Charon", "Kore", "Fenrir", "Aoede", "Orbit", "Stellar", "Leda", "Orus"];
12
- export declare const AVAILABLE_TTS_MODELS: readonly ["gemini-3.1-flash-preview-tts", "gemini-3.1-pro-preview-tts", "gemini-2.5-flash-preview-tts", "gemini-2.5-pro-preview-tts"];
16
+ export declare const AVAILABLE_VOICES: readonly ["Zephyr", "Puck", "Charon", "Kore", "Fenrir", "Aoede", "Orbit", "Stellar", "Leda", "Orus", "Autonoe", "Enceladus", "Iapetus", "Umbriel", "Algieba", "Despina", "Erinome", "Sulafat", "Schedar", "Vindemiatrix"];
17
+ export declare const AVAILABLE_TTS_MODELS: readonly ["gemini-3.1-flash-tts-preview", "gemini-2.5-flash-preview-tts", "gemini-2.5-pro-preview-tts"];
13
18
  export declare function isVoiceEnabled(): boolean;
14
19
  export declare function setVoiceEnabled(on: boolean): void;
15
20
  export declare function setLastResponse(text: string): void;
@@ -20,8 +25,13 @@ export declare function getCurrentTtsModel(): string;
20
25
  export declare function setCurrentTtsModel(m: string): void;
21
26
  export declare function stopPlayback(): void;
22
27
  /**
23
- * Synthesizes `text` via Gemini TTS using the CareerVivid API key for auth.
24
- * Non-blocking errors are silently swallowed so the REPL is never disrupted.
28
+ * Synthesizes `text` via Gemini TTS.
29
+ * - Cleans markdown
30
+ * - Splits into sentence-boundary chunks
31
+ * - Synthesizes each chunk sequentially with retry
32
+ * - Concatenates all PCM data into one WAV and plays it
33
+ *
34
+ * Non-blocking: errors are silently swallowed so the REPL is never disrupted.
25
35
  */
26
36
  export declare function speakText(text: string, _unusedKey?: string): Promise<void>;
27
37
  //# sourceMappingURL=tts.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"tts.d.ts","sourceRoot":"","sources":["../../src/lib/tts.ts"],"names":[],"mappings":"AAAA;;;;;;;;;GASG;AAgBH,eAAO,MAAM,gBAAgB,sGAWnB,CAAC;AAEX,eAAO,MAAM,oBAAoB,uIAKvB,CAAC;AAYX,wBAAgB,cAAc,YAA2B;AACzD,wBAAgB,eAAe,CAAC,EAAE,EAAE,OAAO,QAAwB;AACnE,wBAAgB,eAAe,CAAC,IAAI,EAAE,MAAM,QAA0B;AACtE,wBAAgB,eAAe,WAA2B;AAC1D,wBAAgB,eAAe,WAA2B;AAC1D,wBAAgB,eAAe,CAAC,CAAC,EAAE,MAAM,QAAuB;AAChE,wBAAgB,kBAAkB,WAA8B;AAChE,wBAAgB,kBAAkB,CAAC,CAAC,EAAE,MAAM,QAA0B;AA8BtE,wBAAgB,YAAY,SAK3B;AA2DD;;;GAGG;AACH,wBAAsB,SAAS,CAAC,IAAI,EAAE,MAAM,EAAE,UAAU,CAAC,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC,CAuDhF"}
1
+ {"version":3,"file":"tts.d.ts","sourceRoot":"","sources":["../../src/lib/tts.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;GAcG;AAiBH,eAAO,MAAM,gBAAgB,2NAqBnB,CAAC;AAIX,eAAO,MAAM,oBAAoB,yGAIvB,CAAC;AAYX,wBAAgB,cAAc,YAA2B;AACzD,wBAAgB,eAAe,CAAC,EAAE,EAAE,OAAO,QAAwB;AACnE,wBAAgB,eAAe,CAAC,IAAI,EAAE,MAAM,QAA0B;AACtE,wBAAgB,eAAe,WAA2B;AAC1D,wBAAgB,eAAe,WAA2B;AAC1D,wBAAgB,eAAe,CAAC,CAAC,EAAE,MAAM,QAAuB;AAChE,wBAAgB,kBAAkB,WAA8B;AAChE,wBAAgB,kBAAkB,CAAC,CAAC,EAAE,MAAM,QAA0B;AA8BtE,wBAAgB,YAAY,SAK3B;AA6JD;;;;;;;;GAQG;AACH,wBAAsB,SAAS,CAAC,IAAI,EAAE,MAAM,EAAE,UAAU,CAAC,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC,CAgChF"}
package/dist/lib/tts.js CHANGED
@@ -1,12 +1,17 @@
1
1
  /**
2
2
  * tts.ts — Text-to-Speech engine for the CareerVivid REPL
3
3
  *
4
- * Authenticates using the user's CareerVivid API key (cv_live_...) to fetch
5
- * a short-lived Gemini key from the backend exactly like `cv interview`.
6
- * No separate GEMINI_API_KEY required.
4
+ * Auth: Uses the CareerVivid API key (cv_live_...) cliGetInterviewToken
5
+ * short-lived Gemini key. No GEMINI_API_KEY env var required.
7
6
  *
8
- * Toggle: /voice on | /voice off
9
- * Replay: /speak
7
+ * Chunking: Long text is split at sentence boundaries and synthesized
8
+ * sequentially, then played back-to-back for seamless audio.
9
+ *
10
+ * Retry: Gemini 3.1 TTS models occasionally return 500 errors;
11
+ * each chunk is retried up to 3 times with exponential back-off.
12
+ *
13
+ * Toggle: /voice on | off
14
+ * Replay: /speak
10
15
  */
11
16
  import { writeFileSync, unlinkSync } from "fs";
12
17
  import { spawn } from "child_process";
@@ -14,11 +19,12 @@ import { tmpdir } from "os";
14
19
  import { join } from "path";
15
20
  import { GoogleGenAI, Modality } from "@google/genai";
16
21
  import { getApiKey } from "../config.js";
17
- // ── Backend endpoint (same as interview token vend) ───────────────────────────
22
+ // ── Backend endpoint ───────────────────────────────────────────────────────────
18
23
  const TTS_TOKEN_URL = process.env.CV_FUNCTIONS_URL
19
24
  ? `${process.env.CV_FUNCTIONS_URL}/cliGetInterviewToken`
20
25
  : "https://us-west1-jastalk-firebase.cloudfunctions.net/cliGetInterviewToken";
21
- // ── Available options ────────────────────────────────────────────────────────
26
+ // ── Available options ──────────────────────────────────────────────────────────
27
+ // All 30 Gemini TTS voices (identical across 2.5 and 3.1 model families)
22
28
  export const AVAILABLE_VOICES = [
23
29
  "Zephyr", // Bright, energetic
24
30
  "Puck", // Upbeat, playful
@@ -30,20 +36,31 @@ export const AVAILABLE_VOICES = [
30
36
  "Stellar", // Smooth, polished
31
37
  "Leda", // Warm, natural
32
38
  "Orus", // Confident, authoritative
39
+ "Autonoe", // Gentle, clear
40
+ "Enceladus", // Breathable, expressive
41
+ "Iapetus", // Deep, resonant
42
+ "Umbriel", // Calm, deliberate
43
+ "Algieba", // Rich, warm
44
+ "Despina", // Light, airy
45
+ "Erinome", // Crisp, articulate
46
+ "Sulafat", // Smooth, soothing
47
+ "Schedar", // Authoritative, clear
48
+ "Vindemiatrix", // Expressive, fluid
33
49
  ];
50
+ // Correct model IDs verified against official Gemini API docs (Apr 2026)
51
+ // Pattern: gemini-{version}-{variant}-tts-preview (NOT -preview-tts)
34
52
  export const AVAILABLE_TTS_MODELS = [
35
- "gemini-3.1-flash-preview-tts", // Latest, fast (default)
36
- "gemini-3.1-pro-preview-tts", // Latest, highest quality
53
+ "gemini-3.1-flash-tts-preview", // Latest, fast (default) ✓ CORRECT ID
37
54
  "gemini-2.5-flash-preview-tts", // Previous gen, fast
38
55
  "gemini-2.5-pro-preview-tts", // Previous gen, high quality
39
56
  ];
40
- // ── State ────────────────────────────────────────────────────────────────────
57
+ // ── State ──────────────────────────────────────────────────────────────────────
41
58
  let voiceEnabled = false;
42
59
  let lastResponse = "";
43
60
  let playbackProcess = null;
44
61
  let currentVoice = "Zephyr";
45
- let currentTtsModel = "gemini-3.1-flash-preview-tts";
46
- // Cache the Gemini key for the session so we don't hit the endpoint every turn
62
+ let currentTtsModel = "gemini-3.1-flash-tts-preview";
63
+ // Session-cached Gemini key only fetched once per session
47
64
  let cachedGeminiKey = null;
48
65
  export function isVoiceEnabled() { return voiceEnabled; }
49
66
  export function setVoiceEnabled(on) { voiceEnabled = on; }
@@ -53,7 +70,7 @@ export function getCurrentVoice() { return currentVoice; }
53
70
  export function setCurrentVoice(v) { currentVoice = v; }
54
71
  export function getCurrentTtsModel() { return currentTtsModel; }
55
72
  export function setCurrentTtsModel(m) { currentTtsModel = m; }
56
- // ── Gemini key via CV API key ─────────────────────────────────────────────────
73
+ // ── Gemini key via CV API key ──────────────────────────────────────────────────
57
74
  async function fetchGeminiKey() {
58
75
  if (cachedGeminiKey)
59
76
  return cachedGeminiKey;
@@ -79,7 +96,7 @@ async function fetchGeminiKey() {
79
96
  }
80
97
  return null;
81
98
  }
82
- // ── Audio Playback ────────────────────────────────────────────────────────────
99
+ // ── Audio Playback ─────────────────────────────────────────────────────────────
83
100
  export function stopPlayback() {
84
101
  if (playbackProcess && !playbackProcess.killed) {
85
102
  playbackProcess.kill("SIGKILL");
@@ -122,10 +139,10 @@ function playWav(wavBuffer) {
122
139
  catch { /* ignore */ }
123
140
  });
124
141
  }
125
- // ── WAV Builder ───────────────────────────────────────────────────────────────
142
+ // ── WAV Builder ────────────────────────────────────────────────────────────────
126
143
  function buildWavHeader(dataLength, sampleRate = 24000, channels = 1, bitsPerSample = 16) {
127
- const byteRate = sampleRate * channels * bitsPerSample / 8;
128
- const blockAlign = channels * bitsPerSample / 8;
144
+ const byteRate = (sampleRate * channels * bitsPerSample) / 8;
145
+ const blockAlign = (channels * bitsPerSample) / 8;
129
146
  const header = Buffer.alloc(44);
130
147
  header.write("RIFF", 0);
131
148
  header.writeUInt32LE(36 + dataLength, 4);
@@ -142,59 +159,121 @@ function buildWavHeader(dataLength, sampleRate = 24000, channels = 1, bitsPerSam
142
159
  header.writeUInt32LE(dataLength, 40);
143
160
  return header;
144
161
  }
145
- // ── TTS Synthesis ─────────────────────────────────────────────────────────────
146
- /**
147
- * Synthesizes `text` via Gemini TTS using the CareerVivid API key for auth.
148
- * Non-blocking errors are silently swallowed so the REPL is never disrupted.
149
- */
150
- export async function speakText(text, _unusedKey) {
151
- if (!text.trim())
152
- return;
153
- const geminiKey = await fetchGeminiKey();
154
- if (!geminiKey)
155
- return; // No key available — silently skip
156
- // Strip markdown for natural-sounding speech
157
- const cleaned = text
158
- .replace(/```[\s\S]*?```/g, "")
159
- .replace(/`[^`]+`/g, "")
160
- .replace(/\*\*(.*?)\*\*/g, "$1")
161
- .replace(/\*(.*?)\*/g, "$1")
162
- .replace(/^[#>•\-*]\s*/gm, "")
162
+ // ── Text Cleaning ──────────────────────────────────────────────────────────────
163
+ function cleanForSpeech(text) {
164
+ return text
165
+ .replace(/```[\s\S]*?```/g, "") // strip code blocks
166
+ .replace(/`[^`]+`/g, "") // strip inline code
167
+ .replace(/\*\*(.*?)\*\*/g, "$1") // bold → plain
168
+ .replace(/\*(.*?)\*/g, "$1") // italic → plain
169
+ .replace(/^#{1,6}\s+/gm, "") // headings → plain
170
+ .replace(/^[>•\-*]\s*/gm, "") // bullets/blockquotes
171
+ .replace(/\[([^\]]+)\]\([^)]+\)/g, "$1") // links → text only
163
172
  .replace(/\s+/g, " ")
164
- .trim()
165
- .slice(0, 1000);
166
- if (!cleaned)
167
- return;
173
+ .trim();
174
+ }
175
+ // ── Sentence Chunker ───────────────────────────────────────────────────────────
176
+ // Splits at sentence boundaries (. ! ?) respecting ~800 char soft limit
177
+ // to stay well within the 32k token context window and avoid quality drift.
178
+ const CHUNK_SIZE = 800; // characters
179
+ function splitIntoChunks(text) {
180
+ if (text.length <= CHUNK_SIZE)
181
+ return [text];
182
+ const chunks = [];
183
+ // Split on sentence-ending punctuation, keeping the delimiter
184
+ const sentences = text.match(/[^.!?]+[.!?]+(?:\s|$)|[^.!?]+$/g) ?? [text];
185
+ let current = "";
186
+ for (const sentence of sentences) {
187
+ if ((current + sentence).length > CHUNK_SIZE && current.length > 0) {
188
+ chunks.push(current.trim());
189
+ current = sentence;
190
+ }
191
+ else {
192
+ current += sentence;
193
+ }
194
+ }
195
+ if (current.trim())
196
+ chunks.push(current.trim());
197
+ return chunks.filter(c => c.length > 0);
198
+ }
199
+ // ── Single-chunk Synthesis (with retry) ───────────────────────────────────────
200
+ const MAX_RETRIES = 3;
201
+ const RETRY_DELAY_MS = 800;
202
+ async function synthesizeChunk(ai, text, voice, model, attempt = 0) {
168
203
  try {
169
- const ai = new GoogleGenAI({ apiKey: geminiKey });
170
204
  const response = await ai.models.generateContent({
171
- model: currentTtsModel,
172
- contents: [{ parts: [{ text: cleaned }] }],
205
+ model,
206
+ contents: [{ parts: [{ text }] }],
173
207
  config: {
174
208
  responseModalities: [Modality.AUDIO],
175
209
  speechConfig: {
176
210
  voiceConfig: {
177
- prebuiltVoiceConfig: { voiceName: currentVoice },
211
+ prebuiltVoiceConfig: { voiceName: voice },
178
212
  },
179
213
  },
180
214
  },
181
215
  });
182
216
  const parts = response?.candidates?.[0]?.content?.parts ?? [];
183
- const audioParts = [];
217
+ const pcmParts = [];
184
218
  for (const part of parts) {
185
219
  if (part.inlineData?.data) {
186
- audioParts.push(Buffer.from(part.inlineData.data, "base64"));
220
+ pcmParts.push(Buffer.from(part.inlineData.data, "base64"));
187
221
  }
188
222
  }
189
- if (audioParts.length === 0)
223
+ if (pcmParts.length === 0)
224
+ return null;
225
+ return Buffer.concat(pcmParts);
226
+ }
227
+ catch (err) {
228
+ // Gemini 3.1 TTS can 500 on random requests — retry with back-off
229
+ const isRetryable = err?.status === 500 ||
230
+ String(err?.message ?? "").includes("500") ||
231
+ String(err?.message ?? "").includes("INTERNAL");
232
+ if (isRetryable && attempt < MAX_RETRIES) {
233
+ await new Promise(r => setTimeout(r, RETRY_DELAY_MS * (attempt + 1)));
234
+ return synthesizeChunk(ai, text, voice, model, attempt + 1);
235
+ }
236
+ return null;
237
+ }
238
+ }
239
+ // ── Public TTS Entry Point ─────────────────────────────────────────────────────
240
+ /**
241
+ * Synthesizes `text` via Gemini TTS.
242
+ * - Cleans markdown
243
+ * - Splits into sentence-boundary chunks
244
+ * - Synthesizes each chunk sequentially with retry
245
+ * - Concatenates all PCM data into one WAV and plays it
246
+ *
247
+ * Non-blocking: errors are silently swallowed so the REPL is never disrupted.
248
+ */
249
+ export async function speakText(text, _unusedKey) {
250
+ if (!text.trim())
251
+ return;
252
+ const geminiKey = await fetchGeminiKey();
253
+ if (!geminiKey)
254
+ return;
255
+ const cleaned = cleanForSpeech(text);
256
+ if (!cleaned)
257
+ return;
258
+ const chunks = splitIntoChunks(cleaned);
259
+ const voice = currentVoice;
260
+ const model = currentTtsModel;
261
+ try {
262
+ const ai = new GoogleGenAI({ apiKey: geminiKey });
263
+ const pcmBuffers = [];
264
+ for (const chunk of chunks) {
265
+ const pcm = await synthesizeChunk(ai, chunk, voice, model);
266
+ if (pcm)
267
+ pcmBuffers.push(pcm);
268
+ }
269
+ if (pcmBuffers.length === 0)
190
270
  return;
191
- const pcmData = Buffer.concat(audioParts);
192
- const wavBuffer = Buffer.concat([buildWavHeader(pcmData.length), pcmData]);
271
+ const allPcm = Buffer.concat(pcmBuffers);
272
+ const wavBuffer = Buffer.concat([buildWavHeader(allPcm.length), allPcm]);
193
273
  playWav(wavBuffer);
194
274
  }
195
275
  catch {
196
- // Silently ignore TTS errors must never crash the agent REPL
197
- // Invalidate cached key so we retry fetching on the next call
276
+ // Never crash the REPL invalidate key so next call re-fetches
198
277
  cachedGeminiKey = null;
199
278
  }
200
279
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "careervivid",
3
- "version": "2.1.18",
3
+ "version": "2.1.22",
4
4
  "description": "Official CLI for CareerVivid — AI voice interviews, autonomous job applications, resume editing, and portfolio publishing from your terminal",
5
5
  "type": "module",
6
6
  "bin": {