npm - rampup - Versions diffs - 0.1.4 → 0.1.6 - Mend

rampup 0.1.4 → 0.1.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (2) hide show

package/index.js +440 -109
package/package.json +4 -2

package/index.js CHANGED Viewed

@@ -704,6 +704,7 @@ program
   .command('voice')
   .description('Voice-based codebase learning (talk to your code)')
   .option('-p, --path <path>', 'Project path', '.')
+  .option('-t, --text', 'Use text input instead of microphone')
   .action(async (options) => {
     console.log(chalk.bold.blue('\n🎙️  Voice Mode\n'));
     console.log(chalk.gray('Talk to your codebase. Say "exit" or press Ctrl+C to quit.\n'));
@@ -738,7 +739,7 @@ program
     // Get fresh token after potential login
     const authToken = await getIdToken();
-    const API_URL = process.env.RAMP_API_URL || 'https://entitlement-service.rian-19c.workers.dev';
+    const RAMP_API_URL = process.env.RAMP_API_URL || 'https://ramp-api-946191982468.us-central1.run.app';
     // Track usage
     const usageFile = path.join(process.env.HOME, '.ramp', 'voice-usage.json');
@@ -749,7 +750,6 @@ program
     } catch {}
     const sessionStart = Date.now();
-    let sessionMinutes = 0;
     // Gather codebase context once
     const spinner = ora('Reading codebase...').start();
@@ -781,85 +781,415 @@ program
       await getStructure(projectPath);
       context += `\nStructure:\n${structure.slice(0, 2000)}\n`;
-      spinner.succeed('Ready! Listening...\n');
+      spinner.succeed('Codebase loaded');
     } catch (error) {
       spinner.fail(`Error: ${error.message}`);
       process.exit(1);
     }
-    const conversationHistory = [];
+    // Check for text-only mode
+    if (options.text) {
+      await runTextVoiceMode(authToken, context, projectPath, usage, usageFile, sessionStart, RAMP_API_URL);
+      return;
+    }
-    // Helper function to call backend chat API
-    async function chatWithBackend(messages, systemPrompt) {
-      const response = await fetch(`${API_URL}/ai/chat`, {
-        method: 'POST',
-        headers: {
-          'Authorization': `Bearer ${authToken}`,
-          'Content-Type': 'application/json',
+    // Try to use realtime voice with microphone
+    try {
+      await runRealtimeVoiceMode(authToken, context, projectPath, usage, usageFile, sessionStart, RAMP_API_URL);
+    } catch (micError) {
+      console.log(chalk.yellow(`\n⚠️  Microphone not available: ${micError.message}`));
+      console.log(chalk.dim('Falling back to text input mode...\n'));
+      await runTextVoiceMode(authToken, context, projectPath, usage, usageFile, sessionStart, RAMP_API_URL);
+    }
+  });
+// Realtime voice mode using OpenAI Realtime API
+async function runRealtimeVoiceMode(authToken, context, projectPath, usage, usageFile, sessionStart, RAMP_API_URL) {
+  const WebSocket = (await import('ws')).default;
+  let mic;
+  try {
+    mic = (await import('mic')).default;
+  } catch (e) {
+    throw new Error('mic package not available - run: npm install -g rampup');
+  }
+  console.log(chalk.cyan('Connecting to voice service...\n'));
+  // Get ephemeral token from our API
+  const sessionResponse = await fetch(`${RAMP_API_URL}/api/ramp/realtime/session`, {
+    method: 'POST',
+    headers: {
+      'Authorization': `Bearer ${authToken}`,
+      'Content-Type': 'application/json',
+    },
+    body: JSON.stringify({
+      model: 'gpt-4o-realtime-preview-2024-12-17',
+      voice: 'verse',
+    }),
+  });
+  if (!sessionResponse.ok) {
+    const error = await sessionResponse.json().catch(() => ({}));
+    throw new Error(error.message || error.error || 'Failed to create voice session');
+  }
+  const session = await sessionResponse.json();
+  const { clientSecret, sessionId } = session;
+  // Connect to OpenAI Realtime API
+  const ws = new WebSocket('wss://api.openai.com/v1/realtime?model=gpt-4o-realtime-preview-2024-12-17', {
+    headers: {
+      'Authorization': `Bearer ${clientSecret}`,
+      'OpenAI-Beta': 'realtime=v1',
+    },
+  });
+  let isConnected = false;
+  let micInstance = null;
+  let micInputStream = null;
+  let audioChunks = [];
+  let isListening = false;
+  let sessionDurationSeconds = 0;
+  const sessionTimer = setInterval(() => sessionDurationSeconds++, 1000);
+  // Handle WebSocket events
+  ws.on('open', () => {
+    isConnected = true;
+    console.log(chalk.green('✓ Connected to voice service'));
+    // Configure the session with codebase context
+    ws.send(JSON.stringify({
+      type: 'session.update',
+      session: {
+        modalities: ['text', 'audio'],
+        instructions: `You are Ramp, a helpful voice assistant for developers exploring codebases.
+Keep responses concise (1-3 sentences) since they'll be spoken aloud.
+Project context:
+${context}
+Be friendly, practical, and reference specific files when relevant. If asked about code structure, explain it clearly.`,
+        voice: 'verse',
+        input_audio_format: 'pcm16',
+        output_audio_format: 'pcm16',
+        input_audio_transcription: { model: 'whisper-1' },
+        turn_detection: {
+          type: 'server_vad',
+          threshold: 0.5,
+          prefix_padding_ms: 300,
+          silence_duration_ms: 500,
         },
-        body: JSON.stringify({
-          product: 'ramp',
-          messages,
-          system: systemPrompt,
-          max_tokens: 500,
-        }),
+      },
+    }));
+    // Start microphone
+    startMicrophone();
+  });
+  ws.on('message', async (data) => {
+    try {
+      const event = JSON.parse(data.toString());
+      switch (event.type) {
+        case 'session.created':
+        case 'session.updated':
+          console.log(chalk.green('✓ Session ready - speak now!\n'));
+          break;
+        case 'input_audio_buffer.speech_started':
+          process.stdout.write(chalk.dim('🎤 Listening... '));
+          break;
+        case 'input_audio_buffer.speech_stopped':
+          console.log(chalk.dim('processing...'));
+          break;
+        case 'conversation.item.input_audio_transcription.completed':
+          if (event.transcript) {
+            console.log(chalk.green(`\nYou: ${event.transcript}`));
+            if (event.transcript.toLowerCase().includes('exit') ||
+                event.transcript.toLowerCase().includes('quit') ||
+                event.transcript.toLowerCase().includes('goodbye')) {
+              cleanup();
+            }
+          }
+          break;
+        case 'response.audio.delta':
+          // Collect audio chunks
+          if (event.delta) {
+            audioChunks.push(Buffer.from(event.delta, 'base64'));
+          }
+          break;
+        case 'response.audio_transcript.delta':
+          // Stream transcript to console
+          if (event.delta) {
+            process.stdout.write(chalk.cyan(event.delta));
+          }
+          break;
+        case 'response.audio_transcript.done':
+          console.log('\n');
+          break;
+        case 'response.audio.done':
+          // Play collected audio
+          if (audioChunks.length > 0) {
+            await playAudioChunks(audioChunks);
+            audioChunks = [];
+          }
+          break;
+        case 'response.done':
+          // Response complete, ready for next input
+          break;
+        case 'error':
+          console.error(chalk.red(`\nError: ${event.error?.message || 'Unknown error'}`));
+          break;
+      }
+    } catch (e) {
+      // Ignore parse errors
+    }
+  });
+  ws.on('error', (error) => {
+    console.error(chalk.red(`\nConnection error: ${error.message}`));
+    cleanup();
+  });
+  ws.on('close', () => {
+    if (isConnected) {
+      console.log(chalk.dim('\nConnection closed'));
+      cleanup();
+    }
+  });
+  function startMicrophone() {
+    try {
+      micInstance = mic({
+        rate: '24000',
+        channels: '1',
+        bitwidth: '16',
+        encoding: 'signed-integer',
+        endian: 'little',
+        device: 'default',
+        debug: false,
+      });
+      micInputStream = micInstance.getAudioStream();
+      micInputStream.on('data', (chunk) => {
+        if (isConnected && ws.readyState === WebSocket.OPEN) {
+          // Send audio to OpenAI
+          ws.send(JSON.stringify({
+            type: 'input_audio_buffer.append',
+            audio: chunk.toString('base64'),
+          }));
+        }
+      });
+      micInputStream.on('error', (err) => {
+        console.error(chalk.red(`Microphone error: ${err.message}`));
       });
-      if (!response.ok) {
-        const error = await response.json().catch(() => ({}));
-        throw new Error(error.message || `API error: ${response.status}`);
+      micInstance.start();
+      isListening = true;
+    } catch (err) {
+      throw new Error(`Failed to start microphone: ${err.message}`);
+    }
+  }
+  async function playAudioChunks(chunks) {
+    try {
+      // Combine all chunks into one buffer
+      const pcmData = Buffer.concat(chunks);
+      // Create WAV file with proper headers (no external tools needed)
+      const wavBuffer = createWavBuffer(pcmData, 24000, 1, 16);
+      const wavPath = `/tmp/ramp-voice-${Date.now()}.wav`;
+      await fs.writeFile(wavPath, wavBuffer);
+      // Play audio
+      if (process.platform === 'darwin') {
+        await execAsync(`afplay "${wavPath}"`);
+      } else if (process.platform === 'linux') {
+        await execAsync(`aplay "${wavPath}" 2>/dev/null || paplay "${wavPath}" 2>/dev/null`).catch(() => {});
       }
-      return await response.json();
+      // Clean up
+      await fs.unlink(wavPath).catch(() => {});
+    } catch (err) {
+      // Log error for debugging but don't crash
+      console.error(chalk.dim(`Audio playback error: ${err.message}`));
+    }
+  }
+  // Create WAV buffer from raw PCM data
+  function createWavBuffer(pcmData, sampleRate, numChannels, bitsPerSample) {
+    const byteRate = sampleRate * numChannels * (bitsPerSample / 8);
+    const blockAlign = numChannels * (bitsPerSample / 8);
+    const dataSize = pcmData.length;
+    const headerSize = 44;
+    const fileSize = headerSize + dataSize;
+    const buffer = Buffer.alloc(fileSize);
+    let offset = 0;
+    // RIFF header
+    buffer.write('RIFF', offset); offset += 4;
+    buffer.writeUInt32LE(fileSize - 8, offset); offset += 4;
+    buffer.write('WAVE', offset); offset += 4;
+    // fmt subchunk
+    buffer.write('fmt ', offset); offset += 4;
+    buffer.writeUInt32LE(16, offset); offset += 4; // Subchunk1Size (16 for PCM)
+    buffer.writeUInt16LE(1, offset); offset += 2;  // AudioFormat (1 = PCM)
+    buffer.writeUInt16LE(numChannels, offset); offset += 2;
+    buffer.writeUInt32LE(sampleRate, offset); offset += 4;
+    buffer.writeUInt32LE(byteRate, offset); offset += 4;
+    buffer.writeUInt16LE(blockAlign, offset); offset += 2;
+    buffer.writeUInt16LE(bitsPerSample, offset); offset += 2;
+    // data subchunk
+    buffer.write('data', offset); offset += 4;
+    buffer.writeUInt32LE(dataSize, offset); offset += 4;
+    // Copy PCM data
+    pcmData.copy(buffer, offset);
+    return buffer;
+  }
+  async function cleanup() {
+    clearInterval(sessionTimer);
+    if (micInstance) {
+      try {
+        micInstance.stop();
+      } catch {}
     }
-    // Helper function to call backend TTS API
-    async function textToSpeech(text) {
-      const response = await fetch(`${API_URL}/ai/tts`, {
+    if (ws.readyState === WebSocket.OPEN) {
+      ws.close();
+    }
+    // Report session end to our API
+    try {
+      await fetch(`${RAMP_API_URL}/api/ramp/realtime/session/${sessionId}/end`, {
         method: 'POST',
         headers: {
           'Authorization': `Bearer ${authToken}`,
           'Content-Type': 'application/json',
         },
-        body: JSON.stringify({
-          product: 'ramp',
-          text,
-          voice: 'nova',
-        }),
+        body: JSON.stringify({ durationSeconds: sessionDurationSeconds }),
       });
+    } catch {}
-      if (!response.ok) {
-        throw new Error(`TTS error: ${response.status}`);
-      }
+    // Save usage
+    const totalSessionMinutes = sessionDurationSeconds / 60;
+    usage.totalMinutes += totalSessionMinutes;
+    usage.sessions.push({
+      date: new Date().toISOString(),
+      project: path.basename(projectPath),
+      minutes: totalSessionMinutes,
+      type: 'realtime',
+    });
+    await fs.writeFile(usageFile, JSON.stringify(usage, null, 2));
+    console.log(chalk.cyan('\n👋 Ending voice session...'));
+    console.log(chalk.dim(`Session: ${totalSessionMinutes.toFixed(2)} min`));
+    console.log(chalk.dim(`Total usage: ${usage.totalMinutes.toFixed(2)} min\n`));
+    process.exit(0);
+  }
-      return Buffer.from(await response.arrayBuffer());
+  // Handle Ctrl+C
+  process.on('SIGINT', cleanup);
+  // Keep process alive
+  await new Promise(() => {});
+}
+// Text input with voice output (fallback mode)
+async function runTextVoiceMode(authToken, context, projectPath, usage, usageFile, sessionStart, RAMP_API_URL) {
+  const API_URL = process.env.ENTITLEMENT_API_URL || 'https://entitlement-service.rian-19c.workers.dev';
+  const conversationHistory = [];
+  let sessionMinutes = 0;
+  console.log(chalk.dim('Using text input with voice output.\n'));
+  // Helper function to call backend chat API
+  async function chatWithBackend(messages, systemPrompt) {
+    const response = await fetch(`${API_URL}/ai/chat`, {
+      method: 'POST',
+      headers: {
+        'Authorization': `Bearer ${authToken}`,
+        'Content-Type': 'application/json',
+      },
+      body: JSON.stringify({
+        product: 'ramp',
+        messages,
+        system: systemPrompt,
+        max_tokens: 500,
+      }),
+    });
+    if (!response.ok) {
+      const error = await response.json().catch(() => ({}));
+      throw new Error(error.message || `API error: ${response.status}`);
     }
-    // Voice interaction loop
-    async function voiceLoop() {
-      while (true) {
-        try {
-          // For now, use text input with voice output
-          // Full voice input requires native audio recording
-          const { input } = await inquirer.prompt([{
-            type: 'input',
-            name: 'input',
-            message: chalk.green('🎤 You:'),
-            prefix: ''
-          }]);
-          if (!input.trim()) continue;
-          if (input.toLowerCase() === 'exit' || input.toLowerCase() === 'quit') {
-            break;
-          }
+    return await response.json();
+  }
+  // Helper function to call backend TTS API
+  async function textToSpeech(text) {
+    const response = await fetch(`${API_URL}/ai/tts`, {
+      method: 'POST',
+      headers: {
+        'Authorization': `Bearer ${authToken}`,
+        'Content-Type': 'application/json',
+      },
+      body: JSON.stringify({
+        product: 'ramp',
+        text,
+        voice: 'nova',
+      }),
+    });
+    if (!response.ok) {
+      throw new Error(`TTS error: ${response.status}`);
+    }
+    return Buffer.from(await response.arrayBuffer());
+  }
+  // Voice interaction loop
+  async function voiceLoop() {
+    while (true) {
+      try {
+        const { input } = await inquirer.prompt([{
+          type: 'input',
+          name: 'input',
+          message: chalk.green('🎤 You:'),
+          prefix: ''
+        }]);
-          const startTime = Date.now();
-          conversationHistory.push({ role: 'user', content: input });
+        if (!input.trim()) continue;
+        if (input.toLowerCase() === 'exit' || input.toLowerCase() === 'quit') {
+          break;
+        }
-          // Get AI response
-          const thinkingSpinner = ora('Thinking...').start();
+        const startTime = Date.now();
+        conversationHistory.push({ role: 'user', content: input });
-          const systemPrompt = `You are Ramp, a voice assistant helping a developer understand a codebase.
+        // Get AI response
+        const thinkingSpinner = ora('Thinking...').start();
+        const systemPrompt = `You are Ramp, a voice assistant helping a developer understand a codebase.
 Keep responses concise (2-3 sentences) since they'll be spoken aloud.
 Project context:
@@ -867,77 +1197,78 @@ ${context}
 Be helpful, friendly, and practical. Reference specific files when relevant.`;
-          const chatResponse = await chatWithBackend(conversationHistory, systemPrompt);
-          const answer = chatResponse.content || chatResponse.text || '';
-          conversationHistory.push({ role: 'assistant', content: answer });
-          thinkingSpinner.stop();
+        const chatResponse = await chatWithBackend(conversationHistory, systemPrompt);
+        const answer = chatResponse.content || chatResponse.text || '';
+        conversationHistory.push({ role: 'assistant', content: answer });
-          // Generate speech
-          const speechSpinner = ora('Speaking...').start();
-          try {
-            const audioBuffer = await textToSpeech(answer);
+        thinkingSpinner.stop();
-            // Save and play audio
-            const audioPath = `/tmp/ramp-voice-${Date.now()}.mp3`;
-            await fs.writeFile(audioPath, audioBuffer);
+        // Generate speech
+        const speechSpinner = ora('Speaking...').start();
-            speechSpinner.stop();
-            console.log(chalk.cyan(`\n🔊 Ramp: ${answer}\n`));
+        try {
+          const audioBuffer = await textToSpeech(answer);
-            // Play audio (macOS)
-            if (process.platform === 'darwin') {
-              await execAsync(`afplay "${audioPath}"`).catch(() => {});
-            } else if (process.platform === 'linux') {
-              await execAsync(`mpg123 "${audioPath}" 2>/dev/null || play "${audioPath}" 2>/dev/null`).catch(() => {});
-            }
+          // Save and play audio
+          const audioPath = `/tmp/ramp-voice-${Date.now()}.mp3`;
+          await fs.writeFile(audioPath, audioBuffer);
-            // Clean up
-            await fs.unlink(audioPath).catch(() => {});
+          speechSpinner.stop();
+          console.log(chalk.cyan(`\n🔊 Ramp: ${answer}\n`));
-          } catch (ttsError) {
-            speechSpinner.stop();
-            // Fallback to text if TTS fails
-            console.log(chalk.cyan(`\n💬 Ramp: ${answer}\n`));
+          // Play audio (macOS)
+          if (process.platform === 'darwin') {
+            await execAsync(`afplay "${audioPath}"`).catch(() => {});
+          } else if (process.platform === 'linux') {
+            await execAsync(`mpg123 "${audioPath}" 2>/dev/null || play "${audioPath}" 2>/dev/null`).catch(() => {});
           }
-          // Track usage
-          const elapsed = (Date.now() - startTime) / 1000 / 60;
-          sessionMinutes += elapsed;
+          // Clean up
+          await fs.unlink(audioPath).catch(() => {});
-        } catch (error) {
-          if (error.name === 'ExitPromptError') break;
-          console.error(chalk.red(`Error: ${error.message}`));
+        } catch (ttsError) {
+          speechSpinner.stop();
+          // Fallback to text if TTS fails
+          console.log(chalk.cyan(`\n💬 Ramp: ${answer}\n`));
         }
-      }
-    }
-    // Handle exit
-    process.on('SIGINT', async () => {
-      console.log(chalk.cyan('\n\n👋 Ending voice session...\n'));
-      await saveUsage();
-      process.exit(0);
-    });
-    async function saveUsage() {
-      const totalSessionMinutes = (Date.now() - sessionStart) / 1000 / 60;
-      usage.totalMinutes += totalSessionMinutes;
-      usage.sessions.push({
-        date: new Date().toISOString(),
-        project: path.basename(projectPath),
-        minutes: totalSessionMinutes
-      });
-      await fs.writeFile(usageFile, JSON.stringify(usage, null, 2));
+        // Track usage
+        const elapsed = (Date.now() - startTime) / 1000 / 60;
+        sessionMinutes += elapsed;
-      console.log(chalk.dim(`Session: ${totalSessionMinutes.toFixed(2)} min`));
-      console.log(chalk.dim(`Total usage: ${usage.totalMinutes.toFixed(2)} min\n`));
+      } catch (error) {
+        if (error.name === 'ExitPromptError') break;
+        console.error(chalk.red(`Error: ${error.message}`));
+      }
     }
+  }
-    await voiceLoop();
+  // Handle exit
+  process.on('SIGINT', async () => {
+    console.log(chalk.cyan('\n\n👋 Ending voice session...\n'));
     await saveUsage();
+    process.exit(0);
   });
+  async function saveUsage() {
+    const totalSessionMinutes = (Date.now() - sessionStart) / 1000 / 60;
+    usage.totalMinutes += totalSessionMinutes;
+    usage.sessions.push({
+      date: new Date().toISOString(),
+      project: path.basename(projectPath),
+      minutes: totalSessionMinutes,
+      type: 'text',
+    });
+    await fs.writeFile(usageFile, JSON.stringify(usage, null, 2));
+    console.log(chalk.dim(`Session: ${totalSessionMinutes.toFixed(2)} min`));
+    console.log(chalk.dim(`Total usage: ${usage.totalMinutes.toFixed(2)} min\n`));
+  }
+  await voiceLoop();
+  await saveUsage();
+}
 // Voice usage stats
 program
   .command('voice:usage')

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "rampup",
-  "version": "0.1.4",
+  "version": "0.1.6",
   "description": "Ramp - Understand any codebase in hours. AI-powered developer onboarding CLI.",
   "type": "module",
   "bin": {
@@ -42,8 +42,10 @@
     "commander": "^11.1.0",
     "firebase": "^10.14.1",
     "inquirer": "^8.2.6",
+    "mic": "^2.1.2",
     "open": "^9.1.0",
     "openai": "^4.0.0",
-    "ora": "^5.4.1"
+    "ora": "^5.4.1",
+    "ws": "^8.18.0"
   }
 }