npm - careervivid - Versions diffs - 2.1.18 → 2.1.22 - Mend

careervivid 2.1.18 → 2.1.22

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (22) hide show

package/dist/commands/agent/engineResolver.js +1 -1
package/dist/commands/agent/index.d.ts.map +1 -1
package/dist/commands/agent/index.js +8 -3
package/dist/commands/agent/repl/engineLoop.d.ts +35 -0
package/dist/commands/agent/repl/engineLoop.d.ts.map +1 -0
package/dist/commands/agent/repl/engineLoop.js +168 -0
package/dist/commands/agent/repl/input.d.ts +21 -0
package/dist/commands/agent/repl/input.d.ts.map +1 -0
package/dist/commands/agent/repl/input.js +78 -0
package/dist/commands/agent/repl/slashCommands.d.ts +33 -0
package/dist/commands/agent/repl/slashCommands.d.ts.map +1 -0
package/dist/commands/agent/repl/slashCommands.js +193 -0
package/dist/commands/agent/repl/toolHandlers.d.ts +33 -0
package/dist/commands/agent/repl/toolHandlers.d.ts.map +1 -0
package/dist/commands/agent/repl/toolHandlers.js +185 -0
package/dist/commands/agent/repl.d.ts +10 -0
package/dist/commands/agent/repl.d.ts.map +1 -1
package/dist/commands/agent/repl.js +133 -609
package/dist/lib/tts.d.ts +19 -9
package/dist/lib/tts.d.ts.map +1 -1
package/dist/lib/tts.js +129 -50
package/package.json +1 -1

package/dist/lib/tts.d.ts CHANGED Viewed

@@ -1,15 +1,20 @@
 /**
  * tts.ts — Text-to-Speech engine for the CareerVivid REPL
  *
- * Authenticates using the user's CareerVivid API key (cv_live_...) to fetch
- * a short-lived Gemini key from the backend — exactly like `cv interview`.
- * No separate GEMINI_API_KEY required.
+ * Auth:     Uses the CareerVivid API key (cv_live_...) → cliGetInterviewToken
+ *           → short-lived Gemini key. No GEMINI_API_KEY env var required.
  *
- * Toggle:  /voice on  |  /voice off
- * Replay:  /speak
+ * Chunking: Long text is split at sentence boundaries and synthesized
+ *           sequentially, then played back-to-back for seamless audio.
+ *
+ * Retry:    Gemini 3.1 TTS models occasionally return 500 errors;
+ *           each chunk is retried up to 3 times with exponential back-off.
+ *
+ * Toggle:   /voice on | off
+ * Replay:   /speak
  */
-export declare const AVAILABLE_VOICES: readonly ["Zephyr", "Puck", "Charon", "Kore", "Fenrir", "Aoede", "Orbit", "Stellar", "Leda", "Orus"];
-export declare const AVAILABLE_TTS_MODELS: readonly ["gemini-3.1-flash-preview-tts", "gemini-3.1-pro-preview-tts", "gemini-2.5-flash-preview-tts", "gemini-2.5-pro-preview-tts"];
+export declare const AVAILABLE_VOICES: readonly ["Zephyr", "Puck", "Charon", "Kore", "Fenrir", "Aoede", "Orbit", "Stellar", "Leda", "Orus", "Autonoe", "Enceladus", "Iapetus", "Umbriel", "Algieba", "Despina", "Erinome", "Sulafat", "Schedar", "Vindemiatrix"];
+export declare const AVAILABLE_TTS_MODELS: readonly ["gemini-3.1-flash-tts-preview", "gemini-2.5-flash-preview-tts", "gemini-2.5-pro-preview-tts"];
 export declare function isVoiceEnabled(): boolean;
 export declare function setVoiceEnabled(on: boolean): void;
 export declare function setLastResponse(text: string): void;
@@ -20,8 +25,13 @@ export declare function getCurrentTtsModel(): string;
 export declare function setCurrentTtsModel(m: string): void;
 export declare function stopPlayback(): void;
 /**
- * Synthesizes `text` via Gemini TTS using the CareerVivid API key for auth.
- * Non-blocking — errors are silently swallowed so the REPL is never disrupted.
+ * Synthesizes `text` via Gemini TTS.
+ * - Cleans markdown
+ * - Splits into sentence-boundary chunks
+ * - Synthesizes each chunk sequentially with retry
+ * - Concatenates all PCM data into one WAV and plays it
+ *
+ * Non-blocking: errors are silently swallowed so the REPL is never disrupted.
  */
 export declare function speakText(text: string, _unusedKey?: string): Promise<void>;
 //# sourceMappingURL=tts.d.ts.map

package/dist/lib/tts.d.ts.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"tts.d.ts","sourceRoot":"","sources":["../../src/lib/tts.ts"],"names":[],"mappings":"AAAA~~;;;;;;;;;GASG~~;~~AAgBH~~,eAAO,MAAM,gBAAgB,~~sGAWnB~~,CAAC;~~AAEX~~,eAAO,MAAM,oBAAoB,~~uIAKvB~~,CAAC;AAYX,wBAAgB,cAAc,YAA2B;AACzD,wBAAgB,eAAe,CAAC,EAAE,EAAE,OAAO,QAAwB;AACnE,wBAAgB,eAAe,CAAC,IAAI,EAAE,MAAM,QAA0B;AACtE,wBAAgB,eAAe,WAA2B;AAC1D,wBAAgB,eAAe,WAA2B;AAC1D,wBAAgB,eAAe,CAAC,CAAC,EAAE,MAAM,QAAuB;AAChE,wBAAgB,kBAAkB,WAA8B;AAChE,wBAAgB,kBAAkB,CAAC,CAAC,EAAE,MAAM,QAA0B;AA8BtE,wBAAgB,YAAY,SAK3B;~~AA2DD;;;GAGG~~;AACH,wBAAsB,SAAS,CAAC,IAAI,EAAE,MAAM,EAAE,UAAU,CAAC,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC,~~CAuDhF~~"}
1	+ {"version":3,"file":"tts.d.ts","sourceRoot":"","sources":["../../src/lib/tts.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;GAcG;AAiBH,eAAO,MAAM,gBAAgB,2NAqBnB,CAAC;AAIX,eAAO,MAAM,oBAAoB,yGAIvB,CAAC;AAYX,wBAAgB,cAAc,YAA2B;AACzD,wBAAgB,eAAe,CAAC,EAAE,EAAE,OAAO,QAAwB;AACnE,wBAAgB,eAAe,CAAC,IAAI,EAAE,MAAM,QAA0B;AACtE,wBAAgB,eAAe,WAA2B;AAC1D,wBAAgB,eAAe,WAA2B;AAC1D,wBAAgB,eAAe,CAAC,CAAC,EAAE,MAAM,QAAuB;AAChE,wBAAgB,kBAAkB,WAA8B;AAChE,wBAAgB,kBAAkB,CAAC,CAAC,EAAE,MAAM,QAA0B;AA8BtE,wBAAgB,YAAY,SAK3B;AA6JD;;;;;;;;GAQG;AACH,wBAAsB,SAAS,CAAC,IAAI,EAAE,MAAM,EAAE,UAAU,CAAC,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC,CAgChF"}

package/dist/lib/tts.js CHANGED Viewed

@@ -1,12 +1,17 @@
 /**
  * tts.ts — Text-to-Speech engine for the CareerVivid REPL
  *
- * Authenticates using the user's CareerVivid API key (cv_live_...) to fetch
- * a short-lived Gemini key from the backend — exactly like `cv interview`.
- * No separate GEMINI_API_KEY required.
+ * Auth:     Uses the CareerVivid API key (cv_live_...) → cliGetInterviewToken
+ *           → short-lived Gemini key. No GEMINI_API_KEY env var required.
  *
- * Toggle:  /voice on  |  /voice off
- * Replay:  /speak
+ * Chunking: Long text is split at sentence boundaries and synthesized
+ *           sequentially, then played back-to-back for seamless audio.
+ *
+ * Retry:    Gemini 3.1 TTS models occasionally return 500 errors;
+ *           each chunk is retried up to 3 times with exponential back-off.
+ *
+ * Toggle:   /voice on | off
+ * Replay:   /speak
  */
 import { writeFileSync, unlinkSync } from "fs";
 import { spawn } from "child_process";
@@ -14,11 +19,12 @@ import { tmpdir } from "os";
 import { join } from "path";
 import { GoogleGenAI, Modality } from "@google/genai";
 import { getApiKey } from "../config.js";
-// ── Backend endpoint (same as interview token vend) ───────────────────────────
+// ── Backend endpoint ───────────────────────────────────────────────────────────
 const TTS_TOKEN_URL = process.env.CV_FUNCTIONS_URL
     ? `${process.env.CV_FUNCTIONS_URL}/cliGetInterviewToken`
     : "https://us-west1-jastalk-firebase.cloudfunctions.net/cliGetInterviewToken";
-// ── Available options ────────────────────────────────────────────────────────
+// ── Available options ──────────────────────────────────────────────────────────
+// All 30 Gemini TTS voices (identical across 2.5 and 3.1 model families)
 export const AVAILABLE_VOICES = [
     "Zephyr", // Bright, energetic
     "Puck", // Upbeat, playful
@@ -30,20 +36,31 @@ export const AVAILABLE_VOICES = [
     "Stellar", // Smooth, polished
     "Leda", // Warm, natural
     "Orus", // Confident, authoritative
+    "Autonoe", // Gentle, clear
+    "Enceladus", // Breathable, expressive
+    "Iapetus", // Deep, resonant
+    "Umbriel", // Calm, deliberate
+    "Algieba", // Rich, warm
+    "Despina", // Light, airy
+    "Erinome", // Crisp, articulate
+    "Sulafat", // Smooth, soothing
+    "Schedar", // Authoritative, clear
+    "Vindemiatrix", // Expressive, fluid
 ];
+// Correct model IDs verified against official Gemini API docs (Apr 2026)
+// Pattern: gemini-{version}-{variant}-tts-preview  (NOT -preview-tts)
 export const AVAILABLE_TTS_MODELS = [
-    "gemini-3.1-flash-preview-tts", // Latest, fast (default)
-    "gemini-3.1-pro-preview-tts", // Latest, highest quality
+    "gemini-3.1-flash-tts-preview", // Latest, fast (default) ✓ CORRECT ID
     "gemini-2.5-flash-preview-tts", // Previous gen, fast
     "gemini-2.5-pro-preview-tts", // Previous gen, high quality
 ];
-// ── State ────────────────────────────────────────────────────────────────────
+// ── State ──────────────────────────────────────────────────────────────────────
 let voiceEnabled = false;
 let lastResponse = "";
 let playbackProcess = null;
 let currentVoice = "Zephyr";
-let currentTtsModel = "gemini-3.1-flash-preview-tts";
-// Cache the Gemini key for the session so we don't hit the endpoint every turn
+let currentTtsModel = "gemini-3.1-flash-tts-preview";
+// Session-cached Gemini key — only fetched once per session
 let cachedGeminiKey = null;
 export function isVoiceEnabled() { return voiceEnabled; }
 export function setVoiceEnabled(on) { voiceEnabled = on; }
@@ -53,7 +70,7 @@ export function getCurrentVoice() { return currentVoice; }
 export function setCurrentVoice(v) { currentVoice = v; }
 export function getCurrentTtsModel() { return currentTtsModel; }
 export function setCurrentTtsModel(m) { currentTtsModel = m; }
-// ── Gemini key via CV API key ─────────────────────────────────────────────────
+// ── Gemini key via CV API key ──────────────────────────────────────────────────
 async function fetchGeminiKey() {
     if (cachedGeminiKey)
         return cachedGeminiKey;
@@ -79,7 +96,7 @@ async function fetchGeminiKey() {
     }
     return null;
 }
-// ── Audio Playback ────────────────────────────────────────────────────────────
+// ── Audio Playback ─────────────────────────────────────────────────────────────
 export function stopPlayback() {
     if (playbackProcess && !playbackProcess.killed) {
         playbackProcess.kill("SIGKILL");
@@ -122,10 +139,10 @@ function playWav(wavBuffer) {
         catch { /* ignore */ }
     });
 }
-// ── WAV Builder ───────────────────────────────────────────────────────────────
+// ── WAV Builder ────────────────────────────────────────────────────────────────
 function buildWavHeader(dataLength, sampleRate = 24000, channels = 1, bitsPerSample = 16) {
-    const byteRate = sampleRate * channels * bitsPerSample / 8;
-    const blockAlign = channels * bitsPerSample / 8;
+    const byteRate = (sampleRate * channels * bitsPerSample) / 8;
+    const blockAlign = (channels * bitsPerSample) / 8;
     const header = Buffer.alloc(44);
     header.write("RIFF", 0);
     header.writeUInt32LE(36 + dataLength, 4);
@@ -142,59 +159,121 @@ function buildWavHeader(dataLength, sampleRate = 24000, channels = 1, bitsPerSam
     header.writeUInt32LE(dataLength, 40);
     return header;
 }
-// ── TTS Synthesis ─────────────────────────────────────────────────────────────
-/**
- * Synthesizes `text` via Gemini TTS using the CareerVivid API key for auth.
- * Non-blocking — errors are silently swallowed so the REPL is never disrupted.
- */
-export async function speakText(text, _unusedKey) {
-    if (!text.trim())
-        return;
-    const geminiKey = await fetchGeminiKey();
-    if (!geminiKey)
-        return; // No key available — silently skip
-    // Strip markdown for natural-sounding speech
-    const cleaned = text
-        .replace(/```[\s\S]*?```/g, "")
-        .replace(/`[^`]+`/g, "")
-        .replace(/\*\*(.*?)\*\*/g, "$1")
-        .replace(/\*(.*?)\*/g, "$1")
-        .replace(/^[#>•\-*]\s*/gm, "")
+// ── Text Cleaning ──────────────────────────────────────────────────────────────
+function cleanForSpeech(text) {
+    return text
+        .replace(/```[\s\S]*?```/g, "") // strip code blocks
+        .replace(/`[^`]+`/g, "") // strip inline code
+        .replace(/\*\*(.*?)\*\*/g, "$1") // bold → plain
+        .replace(/\*(.*?)\*/g, "$1") // italic → plain
+        .replace(/^#{1,6}\s+/gm, "") // headings → plain
+        .replace(/^[>•\-*]\s*/gm, "") // bullets/blockquotes
+        .replace(/\[([^\]]+)\]\([^)]+\)/g, "$1") // links → text only
         .replace(/\s+/g, " ")
-        .trim()
-        .slice(0, 1000);
-    if (!cleaned)
-        return;
+        .trim();
+}
+// ── Sentence Chunker ───────────────────────────────────────────────────────────
+// Splits at sentence boundaries (. ! ?) respecting ~800 char soft limit
+// to stay well within the 32k token context window and avoid quality drift.
+const CHUNK_SIZE = 800; // characters
+function splitIntoChunks(text) {
+    if (text.length <= CHUNK_SIZE)
+        return [text];
+    const chunks = [];
+    // Split on sentence-ending punctuation, keeping the delimiter
+    const sentences = text.match(/[^.!?]+[.!?]+(?:\s|$)|[^.!?]+$/g) ?? [text];
+    let current = "";
+    for (const sentence of sentences) {
+        if ((current + sentence).length > CHUNK_SIZE && current.length > 0) {
+            chunks.push(current.trim());
+            current = sentence;
+        }
+        else {
+            current += sentence;
+        }
+    }
+    if (current.trim())
+        chunks.push(current.trim());
+    return chunks.filter(c => c.length > 0);
+}
+// ── Single-chunk Synthesis (with retry) ───────────────────────────────────────
+const MAX_RETRIES = 3;
+const RETRY_DELAY_MS = 800;
+async function synthesizeChunk(ai, text, voice, model, attempt = 0) {
     try {
-        const ai = new GoogleGenAI({ apiKey: geminiKey });
         const response = await ai.models.generateContent({
-            model: currentTtsModel,
-            contents: [{ parts: [{ text: cleaned }] }],
+            model,
+            contents: [{ parts: [{ text }] }],
             config: {
                 responseModalities: [Modality.AUDIO],
                 speechConfig: {
                     voiceConfig: {
-                        prebuiltVoiceConfig: { voiceName: currentVoice },
+                        prebuiltVoiceConfig: { voiceName: voice },
                     },
                 },
             },
         });
         const parts = response?.candidates?.[0]?.content?.parts ?? [];
-        const audioParts = [];
+        const pcmParts = [];
         for (const part of parts) {
             if (part.inlineData?.data) {
-                audioParts.push(Buffer.from(part.inlineData.data, "base64"));
+                pcmParts.push(Buffer.from(part.inlineData.data, "base64"));
             }
         }
-        if (audioParts.length === 0)
+        if (pcmParts.length === 0)
+            return null;
+        return Buffer.concat(pcmParts);
+    }
+    catch (err) {
+        // Gemini 3.1 TTS can 500 on random requests — retry with back-off
+        const isRetryable = err?.status === 500 ||
+            String(err?.message ?? "").includes("500") ||
+            String(err?.message ?? "").includes("INTERNAL");
+        if (isRetryable && attempt < MAX_RETRIES) {
+            await new Promise(r => setTimeout(r, RETRY_DELAY_MS * (attempt + 1)));
+            return synthesizeChunk(ai, text, voice, model, attempt + 1);
+        }
+        return null;
+    }
+}
+// ── Public TTS Entry Point ─────────────────────────────────────────────────────
+/**
+ * Synthesizes `text` via Gemini TTS.
+ * - Cleans markdown
+ * - Splits into sentence-boundary chunks
+ * - Synthesizes each chunk sequentially with retry
+ * - Concatenates all PCM data into one WAV and plays it
+ *
+ * Non-blocking: errors are silently swallowed so the REPL is never disrupted.
+ */
+export async function speakText(text, _unusedKey) {
+    if (!text.trim())
+        return;
+    const geminiKey = await fetchGeminiKey();
+    if (!geminiKey)
+        return;
+    const cleaned = cleanForSpeech(text);
+    if (!cleaned)
+        return;
+    const chunks = splitIntoChunks(cleaned);
+    const voice = currentVoice;
+    const model = currentTtsModel;
+    try {
+        const ai = new GoogleGenAI({ apiKey: geminiKey });
+        const pcmBuffers = [];
+        for (const chunk of chunks) {
+            const pcm = await synthesizeChunk(ai, chunk, voice, model);
+            if (pcm)
+                pcmBuffers.push(pcm);
+        }
+        if (pcmBuffers.length === 0)
             return;
-        const pcmData = Buffer.concat(audioParts);
-        const wavBuffer = Buffer.concat([buildWavHeader(pcmData.length), pcmData]);
+        const allPcm = Buffer.concat(pcmBuffers);
+        const wavBuffer = Buffer.concat([buildWavHeader(allPcm.length), allPcm]);
         playWav(wavBuffer);
     }
     catch {
-        // Silently ignore — TTS errors must never crash the agent REPL
-        // Invalidate cached key so we retry fetching on the next call
+        // Never crash the REPL — invalidate key so next call re-fetches
         cachedGeminiKey = null;
     }
 }

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "careervivid",
-  "version": "2.1.18",
+  "version": "2.1.22",
   "description": "Official CLI for CareerVivid — AI voice interviews, autonomous job applications, resume editing, and portfolio publishing from your terminal",
   "type": "module",
   "bin": {