vibeteam 0.6.2 → 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -12,7 +12,7 @@ import { createServer } from 'http';
12
12
  import { WebSocketServer, WebSocket } from 'ws';
13
13
  import { watch } from 'chokidar';
14
14
  import { readFileSync, writeFileSync, existsSync, appendFileSync, mkdirSync, unlinkSync, statSync, readdirSync } from 'fs';
15
- import { exec, execFile, spawn } from 'child_process';
15
+ import { exec, execFile, execFileSync, spawn } from 'child_process';
16
16
  import { dirname, resolve, join, extname } from 'path';
17
17
  import { hostname, tmpdir } from 'os';
18
18
  import { StringDecoder } from 'string_decoder';
@@ -79,6 +79,7 @@ const TMUX_SESSION = process.env.VIBETEAM_TMUX_SESSION ?? DEFAULTS.TMUX_SESSION;
79
79
  const SESSIONS_FILE = resolve(expandHome(process.env.VIBETEAM_SESSIONS_FILE ?? DEFAULTS.SESSIONS_FILE));
80
80
  const TILES_FILE = resolve(expandHome(process.env.VIBETEAM_TILES_FILE ?? '~/.vibeteam/data/tiles.json'));
81
81
  const VOICE_CONFIG_FILE = resolve(expandHome(process.env.VIBETEAM_VOICE_CONFIG_FILE ?? '~/.vibeteam/data/voice-config.json'));
82
+ const RELAY_STATE_FILE = resolve(expandHome(process.env.VIBETEAM_RELAY_STATE_FILE ?? '~/.vibeteam/data/relay-state.json'));
82
83
  /** Time before a "working" session auto-transitions to idle (failsafe for missed events) */
83
84
  const WORKING_TIMEOUT_MS = 300_000; // 5 minutes (increased to accommodate long Claude thinking periods)
84
85
  /** Maximum request body size (1MB) - prevents DoS via memory exhaustion */
@@ -110,9 +111,10 @@ const DEEPGRAM_API_KEY_ENV = 'DEEPGRAM_API_KEY';
110
111
  const OPENROUTER_API_KEY_ENV = 'OPENROUTER_API_KEY';
111
112
  const VOICE_CLEANUP_MODEL_ENV = 'VOICE_CLEANUP_MODEL';
112
113
  const VOICE_CUSTOM_VOCAB_ENV = 'VOICE_CUSTOM_VOCAB';
114
+ const DEEPGRAM_LANGUAGE_ENV = 'DEEPGRAM_LANGUAGE';
113
115
  /** Deepgram transcription settings */
114
116
  const DEEPGRAM_MODEL = process.env.DEEPGRAM_MODEL?.trim() || 'nova-3';
115
- const DEEPGRAM_LANGUAGE = process.env.DEEPGRAM_LANGUAGE?.trim() || 'multi';
117
+ const DEEPGRAM_LANGUAGE_DEFAULT = 'multi';
116
118
  const VOICE_CLEANUP_MODEL_DEFAULT = 'google/gemini-2.5-flash';
117
119
  /**
118
120
  * Validate WebSocket origin header to prevent CSRF attacks.
@@ -3204,6 +3206,11 @@ let voiceCleanupModel = VOICE_CLEANUP_MODEL_DEFAULT;
3204
3206
  /** Custom vocabulary array (effective; from file > env). Populated lazily by
3205
3207
  * getCustomVocabulary() when the cleanup pass runs. */
3206
3208
  let voiceCustomVocab = [];
3209
+ /** Deepgram transcription language (effective; from file > env > default).
3210
+ * 'multi' = auto-detect (Nova-3 multilingual). Codes like 'pt-BR', 'en-US'
3211
+ * force a specific language — important for users whose accent or short
3212
+ * utterances confuse the auto-detector and end up transcribed as English. */
3213
+ let voiceLanguage = DEEPGRAM_LANGUAGE_DEFAULT;
3207
3214
  /** Track whether each effective value came from the persisted config UI vs
3208
3215
  * the env var fallback. Used by `/voice/config` GET so the UI can show
3209
3216
  * "set via UI" / "set via env" / "unset" without leaking the actual key. */
@@ -3212,6 +3219,7 @@ let voiceConfigSource = {
3212
3219
  openrouter: 'unset', // 'file' | 'env' | 'unset'
3213
3220
  model: 'default', // 'file' | 'env' | 'default'
3214
3221
  customVocab: 'unset', // 'file' | 'env' | 'unset'
3222
+ language: 'default', // 'file' | 'env' | 'default'
3215
3223
  };
3216
3224
  /** Lazy-instantiated OpenAI client pointed at OpenRouter (one shared instance,
3217
3225
  * rebuilt whenever the API key changes). */
@@ -3250,6 +3258,7 @@ function writeVoiceConfigFile(config) {
3250
3258
  if (config.deepgramApiKey) clean.deepgramApiKey = String(config.deepgramApiKey);
3251
3259
  if (config.openrouterApiKey) clean.openrouterApiKey = String(config.openrouterApiKey);
3252
3260
  if (config.voiceCleanupModel) clean.voiceCleanupModel = String(config.voiceCleanupModel);
3261
+ if (config.voiceLanguage) clean.voiceLanguage = String(config.voiceLanguage);
3253
3262
  if (Array.isArray(config.customVocabulary)) {
3254
3263
  clean.customVocabulary = config.customVocabulary
3255
3264
  .map(s => String(s).trim())
@@ -3278,6 +3287,7 @@ function reloadVoiceConfig() {
3278
3287
  const envOpenRouter = process.env[OPENROUTER_API_KEY_ENV]?.trim() || null;
3279
3288
  const envModel = process.env[VOICE_CLEANUP_MODEL_ENV]?.trim() || null;
3280
3289
  const envVocabRaw = process.env[VOICE_CUSTOM_VOCAB_ENV]?.trim() || null;
3290
+ const envLanguage = process.env[DEEPGRAM_LANGUAGE_ENV]?.trim() || null;
3281
3291
 
3282
3292
  deepgramApiKey = file.deepgramApiKey || envDeepgram;
3283
3293
  voiceConfigSource.deepgram = file.deepgramApiKey ? 'file' : (envDeepgram ? 'env' : 'unset');
@@ -3288,6 +3298,9 @@ function reloadVoiceConfig() {
3288
3298
  voiceCleanupModel = file.voiceCleanupModel || envModel || VOICE_CLEANUP_MODEL_DEFAULT;
3289
3299
  voiceConfigSource.model = file.voiceCleanupModel ? 'file' : (envModel ? 'env' : 'default');
3290
3300
 
3301
+ voiceLanguage = file.voiceLanguage || envLanguage || DEEPGRAM_LANGUAGE_DEFAULT;
3302
+ voiceConfigSource.language = file.voiceLanguage ? 'file' : (envLanguage ? 'env' : 'default');
3303
+
3291
3304
  if (Array.isArray(file.customVocabulary) && file.customVocabulary.length) {
3292
3305
  voiceCustomVocab = file.customVocabulary;
3293
3306
  voiceConfigSource.customVocab = 'file';
@@ -3322,7 +3335,7 @@ function reloadVoiceConfig() {
3322
3335
  }
3323
3336
  }
3324
3337
 
3325
- log(`[voice-config] effective: deepgram=${voiceConfigSource.deepgram}, openrouter=${voiceConfigSource.openrouter}, model=${voiceCleanupModel} (${voiceConfigSource.model}), vocab=${voiceCustomVocab.length} (${voiceConfigSource.customVocab})`);
3338
+ log(`[voice-config] effective: deepgram=${voiceConfigSource.deepgram}, openrouter=${voiceConfigSource.openrouter}, model=${voiceCleanupModel} (${voiceConfigSource.model}), language=${voiceLanguage} (${voiceConfigSource.language}), vocab=${voiceCustomVocab.length} (${voiceConfigSource.customVocab})`);
3326
3339
  }
3327
3340
 
3328
3341
  /** Backwards-compat shim — still called from server startup. */
@@ -5409,6 +5422,22 @@ function startVoiceSession(ws, payload = {}) {
5409
5422
  // output) — falls back to no-context cleanup if not provided.
5410
5423
  const sessionId = typeof payload?.sessionId === 'string' ? payload.sessionId : null;
5411
5424
  const tmuxSession = typeof payload?.tmuxSession === 'string' ? payload.tmuxSession : null;
5425
+ // Diagnostic info from the client — what mimeType / sample rate /
5426
+ // platform produced this audio. Critical for debugging the "Deepgram
5427
+ // multi auto-detect picks English on mobile audio" failure mode,
5428
+ // which we suspect correlates with iOS Safari's mp4/aac path vs
5429
+ // desktop Chrome's webm/opus. Logged but never required.
5430
+ const audioInfo = payload?.audioInfo && typeof payload.audioInfo === 'object'
5431
+ ? payload.audioInfo
5432
+ : null;
5433
+ if (audioInfo) {
5434
+ const sr = typeof audioInfo.sampleRate === 'number' ? audioInfo.sampleRate : '?';
5435
+ const ch = typeof audioInfo.channelCount === 'number' ? audioInfo.channelCount : '?';
5436
+ const ua = typeof audioInfo.ua === 'string' ? audioInfo.ua.slice(0, 100) : '';
5437
+ log(`[voice] start lang=${voiceLanguage} mime=${audioInfo.mimeType || '?'} sr=${sr} ch=${ch} ua="${ua}"`);
5438
+ } else {
5439
+ log(`[voice] start lang=${voiceLanguage} (no client audioInfo)`);
5440
+ }
5412
5441
  // Accumulate FINAL transcripts here; the post-processor runs once on
5413
5442
  // voice_stop with the full utterance so cleanup has the full sentence.
5414
5443
  const finalParts = [];
@@ -5416,12 +5445,16 @@ function startVoiceSession(ws, payload = {}) {
5416
5445
  const deepgram = createClient(deepgramApiKey);
5417
5446
  // model: nova-3 + language: 'multi' lets Deepgram auto-detect the
5418
5447
  // language per utterance — important for users who code-switch
5419
- // (PT-BR + EN). Encoding/sample_rate are intentionally omitted
5420
- // browsers default to webm/opus from MediaRecorder and Deepgram
5421
- // auto-detects from the WebM container header.
5448
+ // (PT-BR + EN). Auto-detect can fail on short utterances or unusual
5449
+ // accents and silently fall back to English transcription, so the
5450
+ // user can override `language` via Settings (persisted in
5451
+ // voice-config.json) to force a specific BCP-47 code like 'pt-BR'.
5452
+ // Encoding/sample_rate are intentionally omitted — browsers default
5453
+ // to webm/opus from MediaRecorder (or mp4/aac on iOS Safari) and
5454
+ // Deepgram auto-detects the codec from the container header.
5422
5455
  const connection = deepgram.listen.live({
5423
5456
  model: DEEPGRAM_MODEL,
5424
- language: DEEPGRAM_LANGUAGE,
5457
+ language: voiceLanguage,
5425
5458
  smart_format: true,
5426
5459
  interim_results: true,
5427
5460
  // utterance_end_ms: silence threshold for the UtteranceEnd event.
@@ -5440,12 +5473,42 @@ function startVoiceSession(ws, payload = {}) {
5440
5473
  connection.on(LiveTranscriptionEvents.Transcript, (data) => {
5441
5474
  const transcript = data.channel?.alternatives?.[0]?.transcript;
5442
5475
  if (!transcript) return;
5476
+ // Nova-3 multilingual returns the per-utterance detected language.
5477
+ // The exact field varies across SDK / API versions, so probe the
5478
+ // documented spots: `channel.languages[0]` (current), top-level
5479
+ // `detected_language`, and `alternatives[0].languages[0]` (older).
5480
+ // Surfacing this to the client lets the user SEE when auto-detect
5481
+ // is misclassifying their audio (the suspected mobile bug).
5482
+ const detectedLanguage = (Array.isArray(data.channel?.languages) && data.channel.languages[0])
5483
+ || data.detected_language
5484
+ || (Array.isArray(data.channel?.alternatives?.[0]?.languages) && data.channel.alternatives[0].languages[0])
5485
+ || null;
5486
+ if (data.is_final) {
5487
+ log(`[voice-final] lang=${detectedLanguage || 'unknown'} text="${transcript.slice(0, 80)}"`);
5488
+ finalParts.push(transcript);
5489
+ }
5443
5490
  ws.send(JSON.stringify({
5444
5491
  type: 'voice_transcript',
5445
- payload: { transcript, isFinal: !!data.is_final }
5492
+ payload: { transcript, isFinal: !!data.is_final, detectedLanguage }
5446
5493
  }));
5447
- if (data.is_final) {
5448
- finalParts.push(transcript);
5494
+ });
5495
+ connection.on(LiveTranscriptionEvents.Metadata, (data) => {
5496
+ // Deepgram sends a Metadata event after Open with model + audio
5497
+ // format info (and again on close with usage stats). Log it once
5498
+ // — it tells us EXACTLY what codec/sample-rate Deepgram thinks
5499
+ // it's processing, which is the smoking gun for codec-related
5500
+ // language-detection failures.
5501
+ try {
5502
+ const summary = {
5503
+ request_id: data.request_id,
5504
+ model_info: data.model_info,
5505
+ sha256: data.sha256,
5506
+ duration: data.duration,
5507
+ channels: data.channels,
5508
+ };
5509
+ log(`[voice-deepgram-metadata] ${JSON.stringify(summary)}`);
5510
+ } catch {
5511
+ log(`[voice-deepgram-metadata] (unloggable)`);
5449
5512
  }
5450
5513
  });
5451
5514
  connection.on(LiveTranscriptionEvents.UtteranceEnd, () => {
@@ -5578,20 +5641,22 @@ function captureRecentPaneOutput(tmuxSession, lineCount = 30) {
5578
5641
  }
5579
5642
 
5580
5643
  const VOICE_CLEANUP_SYSTEM_PROMPT =
5581
- `You are a voice transcription post-processor for VibeTeam, a tool for managing AI coding agents. The user just dictated something into a terminal session.
5644
+ `You are a near-verbatim transcription formatter for VibeTeam (a CLI for managing AI coding agents). The user dictated text that will be inserted into a chat input. Your only job is to add formatting — NEVER edit content.
5645
+
5646
+ DO:
5647
+ 1. Add capitalization (sentence starts, proper nouns).
5648
+ 2. Add punctuation (.,?!) only where natural pauses make it obvious. When unsure, leave it out.
5649
+ 3. Fix obvious phonetic misspellings of NAMES (people, files, commands, products) that appear in CONTEXT below — and ONLY names. Example: "v8 team" → "VibeTeam" if VibeTeam is in CONTEXT. Never replace common words.
5582
5650
 
5583
- Your job:
5584
- - Remove filler words (um, uh, like, sabe, tipo) unless they carry meaning
5585
- - Fix grammar, capitalization, punctuation, and obvious typos
5586
- - When the transcript contains a close misspelling of a name or term from the CONTEXT below, correct the spelling (e.g. "v8 team" → "VibeTeam")
5587
- - Preserve the speaker's intent, tone, and language (PT-BR, EN, etc) exactly
5588
- - For programming terms, technical commands, and file names: keep them as the user said them, just fix obvious spelling mistakes
5651
+ DO NOT:
5652
+ - Drop ANY words. Keep "uh", "um", "tipo", "sabe", "like", "então" — every word the user said stays. The transcript is precious; let the user delete what they don't want.
5653
+ - Paraphrase, summarize, or shorten. The output's word count should be within ±2 words of the input (only changes are name corrections from CONTEXT).
5654
+ - Translate. PT-BR stays PT-BR, EN stays EN. Mixed-language input is preserved as-is.
5655
+ - Preserve self-corrections and repetitions verbatim. If the user said "vou testar agora agora", keep both "agora"s. If they said "abre o, abre o arquivo", keep both fragments.
5656
+ - Insert words/names from CONTEXT context is reference only, never source.
5657
+ - Add quotation marks unless the user clearly delimited a quoted phrase.
5589
5658
 
5590
- Hard rules:
5591
- - Return ONLY the cleaned transcript text, nothing else
5592
- - Never insert words or names from CONTEXT that the speaker did NOT say — context is for spelling correction only
5593
- - Never change the meaning of what was said
5594
- - If the transcript is empty or unintelligible, return exactly: EMPTY`;
5659
+ OUTPUT: Only the formatted transcript. No prefix, no explanation, no markdown. If the input is empty or pure silence, output exactly: EMPTY`;
5595
5660
 
5596
5661
  /**
5597
5662
  * Run the contextual cleanup pass on a raw Deepgram transcript via OpenRouter.
@@ -5642,7 +5707,11 @@ async function voicePostProcess(ws, rawTranscript, sessionId, tmuxSession) {
5642
5707
  { role: 'user', content: userMessage },
5643
5708
  ],
5644
5709
  temperature: 0,
5645
- max_tokens: 600,
5710
+ // 2000 covers ~1500 words of cleaned PT-BR / EN text — enough for
5711
+ // a 3–5 minute monologue. The previous 600-token cap silently
5712
+ // truncated long dictations, so the user's last sentences would
5713
+ // simply disappear from the output.
5714
+ max_tokens: 2000,
5646
5715
  });
5647
5716
  const cleaned = (completion.choices?.[0]?.message?.content ?? '').trim();
5648
5717
  const text = cleaned === 'EMPTY' ? '' : cleaned;
@@ -5664,6 +5733,203 @@ async function voicePostProcess(ws, rawTranscript, sessionId, tmuxSession) {
5664
5733
  }
5665
5734
  }
5666
5735
  }
5736
+
5737
+ // ============================================================================
5738
+ // API usage discovery — Claude (OAuth) / Deepgram / OpenRouter balance.
5739
+ // Each function caches its successful result so we don't re-do expensive
5740
+ // keychain enumerations on every poll.
5741
+ // ============================================================================
5742
+
5743
+ /** Memoized after first successful lookup so we skip the dump-keychain
5744
+ * enumeration on subsequent calls. */
5745
+ let _claudeCredsCache = null;
5746
+
5747
+ /**
5748
+ * Find the user's Claude Code OAuth credentials object across the various
5749
+ * places Claude Code has stored them over time:
5750
+ * 1. ~/.claude/.credentials.json (older installs)
5751
+ * 2. Keychain entry "Claude Code-credentials" (older format)
5752
+ * 3. Keychain entry "Claude Code-credentials-<suffix>" (current format —
5753
+ * the suffix is per-install)
5754
+ *
5755
+ * Returns the parsed credentials object (with `claudeAiOauth.accessToken`)
5756
+ * or null if nothing is found.
5757
+ */
5758
+ async function findClaudeOAuthCredentials() {
5759
+ if (_claudeCredsCache) {
5760
+ // Tokens expire — re-validate by re-reading the cached source.
5761
+ try {
5762
+ const { readFileSync } = await import('fs');
5763
+ const { execFileSync } = await import('child_process');
5764
+ if (_claudeCredsCache.kind === 'file') {
5765
+ const data = JSON.parse(readFileSync(_claudeCredsCache.path, 'utf8'));
5766
+ if (data?.claudeAiOauth?.accessToken) return data;
5767
+ } else if (_claudeCredsCache.kind === 'keychain') {
5768
+ const out = execFileSync('security',
5769
+ ['find-generic-password', '-s', _claudeCredsCache.service, '-w'],
5770
+ { encoding: 'utf8', timeout: 5000 }).trim();
5771
+ const data = JSON.parse(out);
5772
+ if (data?.claudeAiOauth?.accessToken) return data;
5773
+ }
5774
+ } catch { /* fall through to fresh lookup */ }
5775
+ _claudeCredsCache = null; // cached source no longer valid
5776
+ }
5777
+
5778
+ const { readFileSync } = await import('fs');
5779
+ const { join } = await import('path');
5780
+ const { homedir } = await import('os');
5781
+ const { execFileSync } = await import('child_process');
5782
+
5783
+ // Method 1: file at ~/.claude/.credentials.json
5784
+ try {
5785
+ const credPath = join(homedir(), '.claude', '.credentials.json');
5786
+ const data = JSON.parse(readFileSync(credPath, 'utf8'));
5787
+ if (data?.claudeAiOauth?.accessToken) {
5788
+ _claudeCredsCache = { kind: 'file', path: credPath };
5789
+ return data;
5790
+ }
5791
+ } catch { /* missing or unreadable — try keychain */ }
5792
+
5793
+ if (process.platform !== 'darwin') return null;
5794
+
5795
+ // Method 2: try the unsuffixed keychain entry (older format)
5796
+ const candidates = ['Claude Code-credentials'];
5797
+
5798
+ // Method 3: enumerate suffixed entries via dump-keychain (slow, only on
5799
+ // first miss). We just collect "Claude Code-credentials-<anything>" svc
5800
+ // names. login.keychain-db is unlocked at login so this doesn't prompt.
5801
+ try {
5802
+ const dump = execFileSync('security', ['dump-keychain'],
5803
+ { encoding: 'utf8', timeout: 10000, maxBuffer: 16 * 1024 * 1024 });
5804
+ const seen = new Set(candidates);
5805
+ for (const m of dump.matchAll(/"svce"<blob>="(Claude Code-credentials-[^"]+)"/g)) {
5806
+ if (!seen.has(m[1])) {
5807
+ seen.add(m[1]);
5808
+ candidates.push(m[1]);
5809
+ }
5810
+ }
5811
+ } catch { /* dump failed, only try the unsuffixed candidate */ }
5812
+
5813
+ for (const service of candidates) {
5814
+ try {
5815
+ const out = execFileSync('security',
5816
+ ['find-generic-password', '-s', service, '-w'],
5817
+ { encoding: 'utf8', timeout: 5000 }).trim();
5818
+ const data = JSON.parse(out);
5819
+ if (data?.claudeAiOauth?.accessToken) {
5820
+ _claudeCredsCache = { kind: 'keychain', service };
5821
+ return data;
5822
+ }
5823
+ } catch { /* try next */ }
5824
+ }
5825
+ return null;
5826
+ }
5827
+
5828
+ /** Cached Deepgram balance fetch. Cache TTL 5 min, exponential backoff on
5829
+ * rate limit. Resolves to { ok, balance, currency, projectName, error }. */
5830
+ let _deepgramUsageCache = { data: null, timestamp: 0, ttl: 300_000, backoff: 0 };
5831
+ async function fetchDeepgramUsage() {
5832
+ const cache = _deepgramUsageCache;
5833
+ const now = Date.now();
5834
+ if (cache.data && (now - cache.timestamp) < cache.ttl + cache.backoff) {
5835
+ return { ...cache.data, cached: true };
5836
+ }
5837
+ if (!deepgramApiKey) {
5838
+ return { ok: false, error: 'DEEPGRAM_API_KEY not configured' };
5839
+ }
5840
+ try {
5841
+ // 1. List projects to discover the project_id.
5842
+ const projectsRes = await fetch('https://api.deepgram.com/v1/projects', {
5843
+ headers: { Authorization: `Token ${deepgramApiKey}` },
5844
+ });
5845
+ if (!projectsRes.ok) {
5846
+ if (projectsRes.status === 429) cache.backoff = Math.min((cache.backoff || 60_000) * 2, 600_000);
5847
+ const status = projectsRes.status;
5848
+ return {
5849
+ ok: false,
5850
+ error: `Deepgram projects HTTP ${status}`,
5851
+ hint: status === 401 ? 'API key invalid — re-paste it in Settings → Voice'
5852
+ : status === 403 ? 'API key missing scopes — see hint below'
5853
+ : null,
5854
+ };
5855
+ }
5856
+ const projectsData = await projectsRes.json();
5857
+ const project = projectsData.projects?.[0];
5858
+ if (!project?.project_id) return { ok: false, error: 'No Deepgram project found' };
5859
+
5860
+ // 2. Get balances for that project.
5861
+ const balancesRes = await fetch(
5862
+ `https://api.deepgram.com/v1/projects/${project.project_id}/balances`,
5863
+ { headers: { Authorization: `Token ${deepgramApiKey}` } }
5864
+ );
5865
+ if (!balancesRes.ok) {
5866
+ if (balancesRes.status === 429) cache.backoff = Math.min((cache.backoff || 60_000) * 2, 600_000);
5867
+ const status = balancesRes.status;
5868
+ // 403 = key works for transcription but lacks billing:read scope.
5869
+ // Default Deepgram keys are member-level; admin scope is opt-in
5870
+ // when creating the key. We can't fix this without the user
5871
+ // creating a new key — point them at the console.
5872
+ return {
5873
+ ok: false,
5874
+ error: `Deepgram balances HTTP ${status}`,
5875
+ hint: status === 403
5876
+ ? 'Default API keys can\'t read billing — check console.deepgram.com directly, or create a new key with billing:read scope'
5877
+ : null,
5878
+ consoleUrl: 'https://console.deepgram.com',
5879
+ };
5880
+ }
5881
+ const balData = await balancesRes.json();
5882
+ // Sum across all balance buckets (free trial, paid top-ups, etc).
5883
+ const total = (balData.balances || []).reduce((acc, b) => acc + (typeof b.amount === 'number' ? b.amount : 0), 0);
5884
+ const currency = balData.balances?.[0]?.units || 'usd';
5885
+ const result = { ok: true, balance: total, currency, projectName: project.name };
5886
+ cache.data = result;
5887
+ cache.timestamp = now;
5888
+ cache.backoff = 0;
5889
+ return result;
5890
+ } catch (err) {
5891
+ return { ok: false, error: err.message || String(err) };
5892
+ }
5893
+ }
5894
+
5895
+ /** Cached OpenRouter credits fetch. */
5896
+ let _openrouterUsageCache = { data: null, timestamp: 0, ttl: 300_000, backoff: 0 };
5897
+ async function fetchOpenRouterUsage() {
5898
+ const cache = _openrouterUsageCache;
5899
+ const now = Date.now();
5900
+ if (cache.data && (now - cache.timestamp) < cache.ttl + cache.backoff) {
5901
+ return { ...cache.data, cached: true };
5902
+ }
5903
+ if (!openrouterApiKey) {
5904
+ return { ok: false, error: 'OPENROUTER_API_KEY not configured' };
5905
+ }
5906
+ try {
5907
+ const res = await fetch('https://openrouter.ai/api/v1/credits', {
5908
+ headers: { Authorization: `Bearer ${openrouterApiKey}` },
5909
+ });
5910
+ if (!res.ok) {
5911
+ if (res.status === 429) cache.backoff = Math.min((cache.backoff || 60_000) * 2, 600_000);
5912
+ return { ok: false, error: `OpenRouter HTTP ${res.status}` };
5913
+ }
5914
+ const data = await res.json();
5915
+ const totalCredits = data?.data?.total_credits ?? 0;
5916
+ const totalUsage = data?.data?.total_usage ?? 0;
5917
+ const result = {
5918
+ ok: true,
5919
+ totalCredits,
5920
+ totalUsage,
5921
+ remaining: Math.max(0, totalCredits - totalUsage),
5922
+ currency: 'usd',
5923
+ };
5924
+ cache.data = result;
5925
+ cache.timestamp = now;
5926
+ cache.backoff = 0;
5927
+ return result;
5928
+ } catch (err) {
5929
+ return { ok: false, error: err.message || String(err) };
5930
+ }
5931
+ }
5932
+
5667
5933
  /**
5668
5934
  * Link a Claude Code session ID to a managed session
5669
5935
  */
@@ -5983,6 +6249,289 @@ function hashAuthToken(token) {
5983
6249
  return createHash('sha256').update(token).digest('hex');
5984
6250
  }
5985
6251
 
6252
+ // ---- Local pairing flow (relay-independent fallback) ----
6253
+ //
6254
+ // The Fly relay's /pair endpoint enforces 10 codes/IP/hour. When a user
6255
+ // burns through that (e.g. backend restart loses the relay session and
6256
+ // they retry pairing), they're locked out for up to an hour.
6257
+ //
6258
+ // This local flow gives the same security model as the relay — 6-digit
6259
+ // code, 5-minute TTL, single-use, brute-force-rate-limited — but the
6260
+ // code-issuance side is loopback-only, so an external attacker can't
6261
+ // trigger code creation. Mobile claims via Funnel-accessible /pair-claim.
6262
+ //
6263
+ // State is in-memory only (codes are intentionally short-lived).
6264
+ const localPairCodes = new Map(); // code -> { createdAt }
6265
+ const LOCAL_PAIR_TTL_MS = 5 * 60 * 1000;
6266
+ const pairClaimAttempts = new Map(); // ip -> { count, windowStart }
6267
+ const PAIR_CLAIM_WINDOW_MS = 5 * 60 * 1000;
6268
+ const PAIR_CLAIM_MAX_PER_WINDOW = 60;
6269
+
6270
+ function generateLocalPairCode() {
6271
+ return String(Math.floor(Math.random() * 1_000_000)).padStart(6, '0');
6272
+ }
6273
+ function pruneLocalPairCodes() {
6274
+ const cutoff = Date.now() - LOCAL_PAIR_TTL_MS;
6275
+ for (const [code, entry] of localPairCodes) {
6276
+ if (entry.createdAt < cutoff) localPairCodes.delete(code);
6277
+ }
6278
+ }
6279
+ function getRequestIP(req) {
6280
+ const xff = req.headers['x-forwarded-for'];
6281
+ if (typeof xff === 'string' && xff.length) return xff.split(',')[0].trim();
6282
+ return req.socket?.remoteAddress || 'unknown';
6283
+ }
6284
+ function checkPairClaimRateLimit(ip) {
6285
+ const now = Date.now();
6286
+ const entry = pairClaimAttempts.get(ip);
6287
+ if (!entry || now - entry.windowStart > PAIR_CLAIM_WINDOW_MS) {
6288
+ pairClaimAttempts.set(ip, { count: 1, windowStart: now });
6289
+ return true;
6290
+ }
6291
+ if (entry.count >= PAIR_CLAIM_MAX_PER_WINDOW) return false;
6292
+ entry.count++;
6293
+ return true;
6294
+ }
6295
+ function isLoopbackRequest(req) {
6296
+ const ra = req.socket?.remoteAddress || '';
6297
+ return ra === '127.0.0.1' || ra === '::1' || ra === '::ffff:127.0.0.1';
6298
+ }
6299
+
6300
+ // ---- Relay state persistence + auto-reconnect ----
6301
+ //
6302
+ // Without persistence, every backend restart loses the relay sessionToken
6303
+ // and orphans the mobile in its relay room. The mobile then sees its
6304
+ // relay-status pill go red and the user has to re-pair from desktop —
6305
+ // which is impossible if the user is mobile-only.
6306
+ //
6307
+ // With persistence + auto-reconnect:
6308
+ // - Backend writes its relay sessionToken to disk on every successful
6309
+ // 'paired' event.
6310
+ // - On boot, if a token exists on disk, backend opens a WS to the relay
6311
+ // with `?token=<saved>&role=host`. Relay re-creates the room (or
6312
+ // finds the existing one with the mobile still connected) and re-
6313
+ // emits 'paired'.
6314
+ // - On unexpected close, exponential backoff retry up to 30s.
6315
+ //
6316
+ // Net effect: backend can restart freely, mobile stays connected.
6317
+
6318
+ function readRelayState() {
6319
+ try {
6320
+ if (!existsSync(RELAY_STATE_FILE)) return null;
6321
+ const raw = readFileSync(RELAY_STATE_FILE, 'utf-8');
6322
+ const parsed = JSON.parse(raw);
6323
+ return parsed && typeof parsed === 'object' ? parsed : null;
6324
+ } catch { return null; }
6325
+ }
6326
+ function writeRelayState(state) {
6327
+ try {
6328
+ const dir = dirname(RELAY_STATE_FILE);
6329
+ if (!existsSync(dir)) mkdirSync(dir, { recursive: true });
6330
+ writeFileSync(RELAY_STATE_FILE, JSON.stringify(state, null, 2), { mode: 0o600 });
6331
+ } catch (e) {
6332
+ log(`[relay] failed to persist state: ${e.message || e}`);
6333
+ }
6334
+ }
6335
+ function clearRelayState() {
6336
+ try {
6337
+ if (existsSync(RELAY_STATE_FILE)) unlinkSync(RELAY_STATE_FILE);
6338
+ } catch { /* ignore */ }
6339
+ }
6340
+
6341
+ let _relayReconnectAttempts = 0;
6342
+ let _relayReconnectTimer = null;
6343
+ function scheduleRelayAutoReconnect() {
6344
+ if (_relayReconnectTimer) return; // already scheduled
6345
+ const saved = readRelayState();
6346
+ if (!saved?.sessionToken) {
6347
+ log(`[relay] no saved sessionToken on disk; auto-reconnect skipped`);
6348
+ return;
6349
+ }
6350
+ const delay = Math.min(30_000, 1000 * Math.pow(2, _relayReconnectAttempts));
6351
+ _relayReconnectAttempts++;
6352
+ log(`[relay] auto-reconnect in ${delay}ms (attempt ${_relayReconnectAttempts})`);
6353
+ _relayReconnectTimer = setTimeout(() => {
6354
+ _relayReconnectTimer = null;
6355
+ connectRelayWithSavedToken();
6356
+ }, delay);
6357
+ _relayReconnectTimer.unref?.();
6358
+ }
6359
+ function connectRelayWithSavedToken() {
6360
+ if (relayWs && relayWs.readyState <= 1) {
6361
+ log(`[relay] auto-reconnect: WS already exists; skipping`);
6362
+ return;
6363
+ }
6364
+ const saved = readRelayState();
6365
+ if (!saved?.sessionToken) {
6366
+ log(`[relay] no saved sessionToken; cannot auto-reconnect`);
6367
+ return;
6368
+ }
6369
+ const relayUrl = saved.relayUrl || 'wss://vibeteam-relay.fly.dev/connect';
6370
+ const url = `${relayUrl}?token=${encodeURIComponent(saved.sessionToken)}&role=host`;
6371
+ log(`[relay] auto-reconnecting with saved sessionToken`);
6372
+ try {
6373
+ const ws = new WebSocket(url);
6374
+ relayWs = ws; // claim slot immediately so concurrent calls bail
6375
+ setupRelayHostWs(ws, { reconnect: true });
6376
+ } catch (e) {
6377
+ log(`[relay] auto-reconnect threw: ${e.message || e}`);
6378
+ relayWs = null;
6379
+ scheduleRelayAutoReconnect();
6380
+ }
6381
+ }
6382
+
6383
+ // Shared handler for the host's outbound WS to the relay. Used by both
6384
+ // the user-initiated /relay/start-pairing and /relay/connect endpoints AND
6385
+ // by the auto-reconnect path. Encapsulates: open, paired, waiting, ping,
6386
+ // mobile_disconnected, relay_message, host_info emission, sessionToken
6387
+ // persistence, and close→reconnect.
6388
+ function setupRelayHostWs(ws, { reconnect = false } = {}) {
6389
+ ws.on('open', () => {
6390
+ log(`[relay] ${reconnect ? 'auto-reconnect' : 'connect'}: WS opened`);
6391
+ relayConnected = true;
6392
+ _relayReconnectAttempts = 0;
6393
+ broadcast({ type: 'relay_status', payload: { connected: true } });
6394
+ });
6395
+
6396
+ ws.on('message', (data) => {
6397
+ let msg;
6398
+ try { msg = JSON.parse(data.toString()); }
6399
+ catch { return; }
6400
+
6401
+ if (msg.type === 'paired') {
6402
+ relaySessionToken = msg.payload?.sessionToken;
6403
+ // Persist immediately so a crash before next save still survives.
6404
+ if (relaySessionToken) {
6405
+ writeRelayState({ sessionToken: relaySessionToken, relayUrl: 'wss://vibeteam-relay.fly.dev/connect', updatedAt: Date.now() });
6406
+ }
6407
+ log(`[relay] paired with mobile${reconnect ? ' (after auto-reconnect)' : ''}`);
6408
+
6409
+ // Idempotent: only register a NEW device on the FIRST 'paired'
6410
+ // event for this WS. Mobile reconnects re-emit 'paired' without
6411
+ // implying a new device — same currentRelayDeviceId carries
6412
+ // through, no orphan rows.
6413
+ if (!currentRelayDeviceId) {
6414
+ const revokedIds = enforceMaxDevicesBeforePairing();
6415
+ const { id: deviceId, token: authToken } = registerDevice({ name: 'Mobile Device' });
6416
+ currentRelayDeviceId = deviceId;
6417
+ sendRelayEnvelope('auth_grant', {
6418
+ deviceId,
6419
+ authToken,
6420
+ issuedAt: Date.now(),
6421
+ funnelHost: getFunnelHostname(),
6422
+ });
6423
+ broadcast({ type: 'devices_changed', payload: { devices: listDevicesSafe() } });
6424
+ broadcast({ type: 'device_paired', payload: { deviceId, revokedIds, name: 'Mobile Device' } });
6425
+ }
6426
+
6427
+ // Always send host_info, even on reconnect. This is the channel
6428
+ // by which an EXISTING paired mobile (whose deviceAuth predates
6429
+ // the funnelHost feature) gets backfilled with the Funnel
6430
+ // address. New mobile JS handles this envelope by updating
6431
+ // localStorage; old JS ignores it harmlessly.
6432
+ sendRelayEnvelope('host_info', {
6433
+ funnelHost: getFunnelHostname(),
6434
+ serverVersion: VERSION,
6435
+ });
6436
+
6437
+ broadcast({ type: 'relay_status', payload: { connected: true, paired: true, deviceId: currentRelayDeviceId } });
6438
+ // Push current state (mobile may have just reconnected with stale store).
6439
+ sendToRelay({ type: 'sessions', payload: getSessions() });
6440
+ sendToRelay({ type: 'projects', payload: projectsManager.getProjects() });
6441
+ return;
6442
+ }
6443
+ if (msg.type === 'waiting') {
6444
+ if (msg.payload?.sessionToken) {
6445
+ relaySessionToken = msg.payload.sessionToken;
6446
+ writeRelayState({ sessionToken: relaySessionToken, relayUrl: 'wss://vibeteam-relay.fly.dev/connect', updatedAt: Date.now() });
6447
+ }
6448
+ broadcast({ type: 'relay_status', payload: { connected: true, paired: false, waiting: true } });
6449
+ return;
6450
+ }
6451
+ if (msg.type === 'ping') {
6452
+ if (ws.readyState === 1) {
6453
+ try { ws.send(JSON.stringify({ type: 'pong', from: 'host', timestamp: Date.now(), seq: 0 })); } catch {}
6454
+ }
6455
+ return;
6456
+ }
6457
+ if (msg.type === 'mobile_disconnected') {
6458
+ broadcast({ type: 'relay_status', payload: { connected: true, paired: false } });
6459
+ return;
6460
+ }
6461
+ if (msg.type === 'relay_message' && msg.payload) {
6462
+ handleRelayRequest(msg.payload, ws);
6463
+ return;
6464
+ }
6465
+ if (msg.type === 'error') {
6466
+ // Token rejected by relay (expired, invalid, etc) — clear state
6467
+ // so we don't keep retrying with a bad token. User must re-pair.
6468
+ log(`[relay] error from relay: ${msg.payload?.message || 'unknown'}`);
6469
+ if (reconnect && /token/i.test(msg.payload?.message || '')) {
6470
+ clearRelayState();
6471
+ _relayReconnectAttempts = 0; // give up
6472
+ }
6473
+ return;
6474
+ }
6475
+ });
6476
+
6477
+ ws.on('close', (code) => {
6478
+ log(`[relay] WS closed (code=${code})`);
6479
+ relayWs = null;
6480
+ relayConnected = false;
6481
+ // DELIBERATELY keep relaySessionToken + currentRelayDeviceId in
6482
+ // memory and on disk — this is what enables the auto-reconnect to
6483
+ // pick up where we left off.
6484
+ broadcast({ type: 'relay_status', payload: { connected: false } });
6485
+ scheduleRelayAutoReconnect();
6486
+ });
6487
+
6488
+ ws.on('error', (err) => {
6489
+ log(`[relay] WS error: ${err.message || err}`);
6490
+ });
6491
+ }
6492
+
6493
+ // ---- Funnel hostname auto-discovery ----
6494
+ //
6495
+ // At pairing time we tell the mobile what `host:port` reaches this backend
6496
+ // over the public internet via Tailscale Funnel. The mobile stores it
6497
+ // alongside its deviceAuth, and from then on it can hit the backend
6498
+ // directly from ANY origin (vibing.team, localhost dev tab on another box,
6499
+ // etc) — no more relay dependency for read paths or WS.
6500
+ //
6501
+ // Discovery: parse `tailscale serve status` and find a `(Funnel on)`
6502
+ // entry whose proxy points at our PORT. Returns "host:port" or null
6503
+ // when Funnel isn't set up. Cached on first call (you'd have to reload
6504
+ // the backend to pick up a serve config change).
6505
+ let _funnelHostnameCache; // undefined = not probed, null = no funnel, string = host:port
6506
+ function getFunnelHostname() {
6507
+ if (_funnelHostnameCache !== undefined) return _funnelHostnameCache;
6508
+ try {
6509
+ const out = execFileSync('tailscale', ['serve', 'status'], { encoding: 'utf8', timeout: 2000 });
6510
+ const lines = out.split('\n');
6511
+ const headerRe = /^https:\/\/([^\s:]+):(\d+)\s+\(Funnel on\)/;
6512
+ const proxyRe = /\|--\s*\/\s*proxy\s+http:\/\/127\.0\.0\.1:(\d+)/;
6513
+ for (let i = 0; i < lines.length; i++) {
6514
+ const m = lines[i].match(headerRe);
6515
+ if (!m) continue;
6516
+ // Look at the next few lines for the proxy target
6517
+ for (let j = i + 1; j < Math.min(i + 4, lines.length); j++) {
6518
+ const pm = lines[j].match(proxyRe);
6519
+ if (pm && parseInt(pm[1], 10) === PORT) {
6520
+ _funnelHostnameCache = `${m[1]}:${m[2]}`;
6521
+ log(`[funnel] discovered hostname=${_funnelHostnameCache} → backend:${PORT}`);
6522
+ return _funnelHostnameCache;
6523
+ }
6524
+ }
6525
+ }
6526
+ _funnelHostnameCache = null;
6527
+ return null;
6528
+ } catch (e) {
6529
+ // tailscale not installed / not running / serve not configured — no funnel
6530
+ _funnelHostnameCache = null;
6531
+ return null;
6532
+ }
6533
+ }
6534
+
5986
6535
  function loadDevices() {
5987
6536
  try {
5988
6537
  if (!existsSync(DEVICES_FILE)) return;
@@ -6125,6 +6674,38 @@ function getRequireConfirmation() {
6125
6674
  }
6126
6675
  }
6127
6676
 
6677
+ function getMaxDevices() {
6678
+ try {
6679
+ const settings = readGlobalSettings();
6680
+ const v = settings.remoteControl?.maxDevices;
6681
+ if (typeof v === 'number' && v >= 1 && v <= 10) return Math.floor(v);
6682
+ } catch { /* ignore */ }
6683
+ return 2; // default: phone + backup
6684
+ }
6685
+
6686
+ // Enforce max-devices policy: if registering a new device would exceed the
6687
+ // configured max, auto-revoke the oldest unrevoked device first. Returns the
6688
+ // list of revoked device IDs so the caller can include them in the
6689
+ // notification broadcast.
6690
+ function enforceMaxDevicesBeforePairing() {
6691
+ const max = getMaxDevices();
6692
+ const unrevoked = Array.from(devices.values())
6693
+ .filter(d => !d.revoked)
6694
+ .sort((a, b) => a.lastSeen - b.lastSeen); // oldest first
6695
+ const surplus = unrevoked.length - (max - 1); // leave room for the about-to-be-registered device
6696
+ if (surplus <= 0) return [];
6697
+ const revokedIds = [];
6698
+ for (let i = 0; i < surplus && i < unrevoked.length; i++) {
6699
+ const d = unrevoked[i];
6700
+ d.revoked = true;
6701
+ d.revokedAt = Date.now();
6702
+ d.revokedReason = 'max_devices_exceeded';
6703
+ revokedIds.push(d.id);
6704
+ }
6705
+ saveDevices();
6706
+ return revokedIds;
6707
+ }
6708
+
6128
6709
  function describeDestructiveAction(method, path) {
6129
6710
  if (method === 'DELETE' && /^\/projects\//.test(path)) return 'Delete project';
6130
6711
  if (method === 'DELETE' && /^\/sessions\//.test(path)) return 'Delete agent session';
@@ -6195,6 +6776,7 @@ const RELAY_PATH_ALLOWLIST = [
6195
6776
  /^\/themes(\/|\?|$)/,
6196
6777
  /^\/settings(\/|\?|$)/,
6197
6778
  /^\/usage(\/|\?|$)/,
6779
+ /^\/ideation(\/|\?|$)/,
6198
6780
  /^\/health$/,
6199
6781
  ];
6200
6782
 
@@ -6395,11 +6977,13 @@ function handleClientMessage(ws, message) {
6395
6977
  // ============================================================================
6396
6978
  async function handleHttpRequest(req, res) {
6397
6979
  const origin = req.headers.origin;
6398
- // CORS headers - only allow specific origins
6980
+ // CORS headers - only allow specific origins. Allow Authorization header
6981
+ // so mobile clients can send their device token over Tailscale Funnel
6982
+ // (direct backend path, bypassing the Fly relay).
6399
6983
  if (origin && isOriginAllowed(origin)) {
6400
6984
  res.setHeader('Access-Control-Allow-Origin', origin);
6401
6985
  res.setHeader('Access-Control-Allow-Methods', 'GET, POST, PUT, PATCH, DELETE, OPTIONS');
6402
- res.setHeader('Access-Control-Allow-Headers', 'Content-Type');
6986
+ res.setHeader('Access-Control-Allow-Headers', 'Content-Type, Authorization');
6403
6987
  }
6404
6988
  if (req.method === 'OPTIONS') {
6405
6989
  // Preflight: reject if origin not allowed
@@ -6412,6 +6996,65 @@ async function handleHttpRequest(req, res) {
6412
6996
  res.end();
6413
6997
  return;
6414
6998
  }
6999
+
7000
+ // Bearer-token auth for direct mobile access via Tailscale Funnel.
7001
+ // The backend is exposed publicly on Funnel:8443, so any path that
7002
+ // mobile reaches MUST require a valid device token. Localhost requests
7003
+ // (desktop on the same machine) bypass this gate — they connect via
7004
+ // 127.0.0.1 and don't have an Origin header set to a remote hostname.
7005
+ //
7006
+ // Strategy:
7007
+ // - If request has Authorization: Bearer <token> AND token is valid
7008
+ // → mark req as remote-authenticated, allow through.
7009
+ // - If request comes from localhost (no remote origin) → allow through
7010
+ // unauthenticated (legacy desktop behavior).
7011
+ // - Otherwise → 401.
7012
+ const authHeader = req.headers['authorization'];
7013
+ let isRemoteAuthed = false;
7014
+ if (typeof authHeader === 'string' && authHeader.startsWith('Bearer ')) {
7015
+ const token = authHeader.slice(7).trim();
7016
+ const device = validateAuthToken(token);
7017
+ if (device) {
7018
+ req.__authedDevice = device;
7019
+ isRemoteAuthed = true;
7020
+ }
7021
+ }
7022
+
7023
+ // Reject remote requests without valid bearer token. We use Host header
7024
+ // (harder to spoof for actual remote requests) plus the Tailscale Funnel
7025
+ // marker as the trust boundary:
7026
+ // - Tailscale-Funnel-Request: ?1 → public-internet via Funnel, REQUIRE auth
7027
+ // - Host other than localhost/127.0.0.1 → reached us via something
7028
+ // remote-ish (Tailscale Serve, LAN), REQUIRE auth
7029
+ // - Otherwise localhost desktop → allow (legacy behavior)
7030
+ const hostHeader = (req.headers['host'] || '').split(':')[0].toLowerCase();
7031
+ const isFunnelRequest = req.headers['tailscale-funnel-request'] === '?1';
7032
+ const isLocalHost = hostHeader === 'localhost' || hostHeader === '127.0.0.1' || hostHeader === '::1' || hostHeader === '0.0.0.0' || hostHeader === '';
7033
+ const isRemoteRequest = isFunnelRequest || !isLocalHost;
7034
+ if (isRemoteRequest && !isRemoteAuthed) {
7035
+ // Allow these public paths to be reached without auth:
7036
+ // - /health (status check)
7037
+ // - /relay/start-pairing, /relay/connect (pairing UI on desktop fetches)
7038
+ // - /relay/disconnect (admin)
7039
+ // - /relay/status
7040
+ // - / and SPA static assets (handled later in the file)
7041
+ const path = (req.url || '').split('?')[0];
7042
+ const PUBLIC_PATHS = [
7043
+ /^\/health$/,
7044
+ /^\/relay\/(start-pairing|connect|disconnect|status)$/,
7045
+ /^\/pair-claim$/, // mobile claims a host-issued local code; the code IS the auth
7046
+ /^\/api\/docs/, // API docs UI
7047
+ /^\/$/,
7048
+ /\.(js|css|html|svg|png|jpg|ico|woff2?|ttf|map|json|webp|mp3)$/i,
7049
+ /^\/(?:assets|models|sounds)\//,
7050
+ /^\/favicon/,
7051
+ ];
7052
+ if (!PUBLIC_PATHS.some((rx) => rx.test(path))) {
7053
+ res.writeHead(401, { 'Content-Type': 'application/json' });
7054
+ res.end(JSON.stringify({ error: 'Authentication required' }));
7055
+ return;
7056
+ }
7057
+ }
6415
7058
  if (req.method === 'POST' && req.url === '/event') {
6416
7059
  collectRequestBody(req).then(body => {
6417
7060
  try {
@@ -6580,6 +7223,7 @@ async function handleHttpRequest(req, res) {
6580
7223
  deepgram: { set: !!deepgramApiKey, source: voiceConfigSource.deepgram },
6581
7224
  openrouter: { set: !!openrouterApiKey, source: voiceConfigSource.openrouter },
6582
7225
  cleanupModel: { value: voiceCleanupModel, source: voiceConfigSource.model },
7226
+ language: { value: voiceLanguage, source: voiceConfigSource.language },
6583
7227
  customVocabulary: { value: voiceCustomVocab, source: voiceConfigSource.customVocab },
6584
7228
  }));
6585
7229
  return;
@@ -6607,6 +7251,7 @@ async function handleHttpRequest(req, res) {
6607
7251
  apply('deepgramApiKey', String);
6608
7252
  apply('openrouterApiKey', String);
6609
7253
  apply('voiceCleanupModel', String);
7254
+ apply('voiceLanguage', String);
6610
7255
  apply('customVocabulary', (v) => Array.isArray(v) ? v : String(v).split(/[\n,]+/));
6611
7256
 
6612
7257
  if (!writeVoiceConfigFile(next)) {
@@ -7934,6 +8579,24 @@ async function handleHttpRequest(req, res) {
7934
8579
  try { mkdirSync(_usageCacheDir, { recursive: true }); } catch {}
7935
8580
  global._usageCache = { data: _diskData, timestamp: _diskTimestamp, ttl: 300000, backoff: 0, path: _usageCachePath };
7936
8581
  }
8582
+ // Deepgram balance: returns { ok, balance, currency } or
8583
+ // { ok: false, error, hint }. 200 with ok:false for "expected" failures
8584
+ // (no key, missing scope) so the UI can render a friendly state without
8585
+ // lighting up a 5xx error.
8586
+ if (req.method === 'GET' && req.url === '/usage/deepgram') {
8587
+ const result = await fetchDeepgramUsage();
8588
+ res.writeHead(200, { 'Content-Type': 'application/json' });
8589
+ res.end(JSON.stringify(result));
8590
+ return;
8591
+ }
8592
+ // OpenRouter credits: returns { ok, totalCredits, totalUsage, remaining }
8593
+ // or { ok: false, error }. Same caching strategy as Deepgram.
8594
+ if (req.method === 'GET' && req.url === '/usage/openrouter') {
8595
+ const result = await fetchOpenRouterUsage();
8596
+ res.writeHead(200, { 'Content-Type': 'application/json' });
8597
+ res.end(JSON.stringify(result));
8598
+ return;
8599
+ }
7937
8600
  if (req.method === 'GET' && req.url === '/usage') {
7938
8601
  const cache = global._usageCache;
7939
8602
  const now = Date.now();
@@ -7954,31 +8617,13 @@ async function handleHttpRequest(req, res) {
7954
8617
  }
7955
8618
 
7956
8619
  try {
7957
- // Get OAuth token - try credentials file first (Keychain's security -w truncates large JSON),
7958
- // then fall back to Keychain
7959
- const { readFileSync } = await import('fs');
7960
- const { join } = await import('path');
7961
- const { homedir } = await import('os');
7962
- let tokenData;
7963
-
7964
- // Method 1: Read from ~/.claude/.credentials.json (most reliable)
7965
- try {
7966
- const credPath = join(homedir(), '.claude', '.credentials.json');
7967
- tokenData = JSON.parse(readFileSync(credPath, 'utf8'));
7968
- } catch (_fileErr) {
7969
- // Method 2: Fall back to macOS Keychain
7970
- try {
7971
- const { execFileSync } = await import('child_process');
7972
- const keychainResult = execFileSync(
7973
- 'security',
7974
- ['find-generic-password', '-s', 'Claude Code-credentials', '-w'],
7975
- { encoding: 'utf8', timeout: 5000 }
7976
- ).trim();
7977
- tokenData = JSON.parse(keychainResult);
7978
- } catch (_keychainErr) {
7979
- // ignore, handled below
7980
- }
7981
- }
8620
+ // Locate the user's Claude Code OAuth token. Recent Claude Code
8621
+ // versions store it in macOS Keychain under a per-install service
8622
+ // name like "Claude Code-credentials-ba6ac817" (suffix is unique
8623
+ // per install) the unsuffixed entry is now used for plugin
8624
+ // OAuth (mcpOAuth schema) and DOESN'T contain claudeAiOauth.
8625
+ // We cache the resolved service name in memory once found.
8626
+ const tokenData = await findClaudeOAuthCredentials();
7982
8627
 
7983
8628
  if (!tokenData) {
7984
8629
  res.writeHead(401, { 'Content-Type': 'application/json' });
@@ -9421,98 +10066,14 @@ Explore the project structure and key files to understand what it does, then out
9421
10066
  }
9422
10067
 
9423
10068
  try {
10069
+ // User-initiated re-pair — clear any prior persistent
10070
+ // state so a stale token doesn't fight with the new one.
10071
+ clearRelayState();
10072
+ currentRelayDeviceId = null;
9424
10073
  const url = `${relayUrl}?code=${encodeURIComponent(code)}&role=host`;
9425
10074
  const ws = new WebSocket(url);
9426
-
9427
- ws.on('open', () => {
9428
- console.log('[relay] Connected to relay server');
9429
- relayConnected = true;
9430
- broadcast({ type: 'relay_status', payload: { connected: true } });
9431
- });
9432
-
9433
- ws.on('message', (data) => {
9434
- try {
9435
- const msg = JSON.parse(data.toString());
9436
-
9437
- // Handle relay control messages
9438
- if (msg.type === 'paired') {
9439
- relaySessionToken = msg.payload?.sessionToken;
9440
- console.log('[relay] Paired with mobile device');
9441
-
9442
- // Issue per-device auth token. The mobile keeps this token
9443
- // and must include it in every relay_message payload.
9444
- // The token is plaintext on this single delivery only;
9445
- // we store SHA-256 hash + revocation flag locally.
9446
- const { id: deviceId, token: authToken } = registerDevice({
9447
- name: 'Mobile Device',
9448
- });
9449
- currentRelayDeviceId = deviceId;
9450
-
9451
- // Send the grant to mobile through the relay
9452
- sendRelayEnvelope('auth_grant', {
9453
- deviceId,
9454
- authToken,
9455
- issuedAt: Date.now(),
9456
- });
9457
-
9458
- broadcast({ type: 'relay_status', payload: { connected: true, paired: true, deviceId } });
9459
- broadcast({ type: 'devices_changed', payload: { devices: listDevicesSafe() } });
9460
-
9461
- // Send current state to mobile via relay
9462
- const sessionsData = getSessions();
9463
- sendToRelay({ type: 'sessions', payload: sessionsData });
9464
-
9465
- // Send projects
9466
- sendToRelay({ type: 'projects', payload: projectsManager.getProjects() });
9467
- return;
9468
- }
9469
-
9470
- if (msg.type === 'waiting') {
9471
- console.log('[relay] Waiting for mobile device to connect...');
9472
- if (msg.payload?.sessionToken) {
9473
- relaySessionToken = msg.payload.sessionToken;
9474
- }
9475
- broadcast({ type: 'relay_status', payload: { connected: true, paired: false, waiting: true } });
9476
- return;
9477
- }
9478
-
9479
- if (msg.type === 'ping') {
9480
- if (ws.readyState === 1) {
9481
- ws.send(JSON.stringify({ type: 'pong', from: 'host', timestamp: Date.now(), seq: 0 }));
9482
- }
9483
- return;
9484
- }
9485
-
9486
- if (msg.type === 'mobile_disconnected') {
9487
- console.log('[relay] Mobile device disconnected');
9488
- broadcast({ type: 'relay_status', payload: { connected: true, paired: false } });
9489
- return;
9490
- }
9491
-
9492
- // Handle relay_message from mobile (REST-over-relay)
9493
- if (msg.type === 'relay_message' && msg.payload) {
9494
- handleRelayRequest(msg.payload, ws);
9495
- return;
9496
- }
9497
- } catch (e) {
9498
- console.error('[relay] Error processing message:', e);
9499
- }
9500
- });
9501
-
9502
- ws.on('close', () => {
9503
- console.log('[relay] Disconnected from relay server');
9504
- relayWs = null;
9505
- relayConnected = false;
9506
- relaySessionToken = null;
9507
- currentRelayDeviceId = null;
9508
- broadcast({ type: 'relay_status', payload: { connected: false } });
9509
- });
9510
-
9511
- ws.on('error', (err) => {
9512
- console.error('[relay] WebSocket error:', err.message);
9513
- });
9514
-
9515
10075
  relayWs = ws;
10076
+ setupRelayHostWs(ws, { reconnect: false });
9516
10077
  res.writeHead(200, { 'Content-Type': 'application/json' });
9517
10078
  res.end(JSON.stringify({ success: true, message: 'Connecting to relay...' }));
9518
10079
  } catch (err) {
@@ -9556,66 +10117,17 @@ Explore the project structure and key files to understand what it does, then out
9556
10117
  relayWs = null;
9557
10118
  }
9558
10119
 
9559
- // 3. Open WS as host with this code (mirrors /relay/connect logic)
10120
+ // 3. Open WS as host with this code (uses shared handler).
9560
10121
  try {
10122
+ // User-initiated re-pair — wipe any prior persistent
10123
+ // state so the next 'paired' event installs the new
10124
+ // sessionToken cleanly.
10125
+ clearRelayState();
10126
+ currentRelayDeviceId = null;
9561
10127
  const url = `${relayUrl}?code=${encodeURIComponent(code)}&role=host`;
9562
10128
  const ws = new WebSocket(url);
9563
-
9564
- ws.on('open', () => {
9565
- console.log('[relay] start-pairing: host WS opened');
9566
- relayConnected = true;
9567
- broadcast({ type: 'relay_status', payload: { connected: true } });
9568
- });
9569
-
9570
- ws.on('message', (data) => {
9571
- try {
9572
- const msg = JSON.parse(data.toString());
9573
-
9574
- if (msg.type === 'paired') {
9575
- relaySessionToken = msg.payload?.sessionToken;
9576
- console.log('[relay] start-pairing: paired with mobile');
9577
- const { id: deviceId, token: authToken } = registerDevice({ name: 'Mobile Device' });
9578
- currentRelayDeviceId = deviceId;
9579
- sendRelayEnvelope('auth_grant', { deviceId, authToken, issuedAt: Date.now() });
9580
- broadcast({ type: 'relay_status', payload: { connected: true, paired: true, deviceId } });
9581
- broadcast({ type: 'devices_changed', payload: { devices: listDevicesSafe() } });
9582
- sendToRelay({ type: 'sessions', payload: getSessions() });
9583
- sendToRelay({ type: 'projects', payload: projectsManager.getProjects() });
9584
- return;
9585
- }
9586
- if (msg.type === 'waiting') {
9587
- if (msg.payload?.sessionToken) relaySessionToken = msg.payload.sessionToken;
9588
- broadcast({ type: 'relay_status', payload: { connected: true, paired: false, waiting: true } });
9589
- return;
9590
- }
9591
- if (msg.type === 'ping') {
9592
- if (ws.readyState === 1) ws.send(JSON.stringify({ type: 'pong', from: 'host', timestamp: Date.now(), seq: 0 }));
9593
- return;
9594
- }
9595
- if (msg.type === 'mobile_disconnected') {
9596
- broadcast({ type: 'relay_status', payload: { connected: true, paired: false } });
9597
- return;
9598
- }
9599
- if (msg.type === 'relay_message' && msg.payload) {
9600
- handleRelayRequest(msg.payload, ws);
9601
- return;
9602
- }
9603
- } catch (e) {
9604
- console.error('[relay] start-pairing: error processing message:', e);
9605
- }
9606
- });
9607
-
9608
- ws.on('close', () => {
9609
- relayWs = null;
9610
- relayConnected = false;
9611
- relaySessionToken = null;
9612
- currentRelayDeviceId = null;
9613
- broadcast({ type: 'relay_status', payload: { connected: false } });
9614
- });
9615
-
9616
- ws.on('error', (err) => console.error('[relay] start-pairing WS error:', err.message));
9617
10129
  relayWs = ws;
9618
-
10130
+ setupRelayHostWs(ws, { reconnect: false });
9619
10131
  res.writeHead(200, { 'Content-Type': 'application/json' });
9620
10132
  res.end(JSON.stringify({ code, expiresIn, relayUrl, relayApiUrl }));
9621
10133
  } catch (err) {
@@ -9643,6 +10155,16 @@ Explore the project structure and key files to understand what it does, then out
9643
10155
  }
9644
10156
 
9645
10157
  if (req.method === 'POST' && req.url === '/relay/disconnect') {
10158
+ // User-initiated disconnect — clear persisted state too so the
10159
+ // auto-reconnect loop doesn't immediately rebuild the connection
10160
+ // we just tore down. (The close handler in setupRelayHostWs would
10161
+ // otherwise see the saved token and reconnect.)
10162
+ clearRelayState();
10163
+ _relayReconnectAttempts = 0;
10164
+ if (_relayReconnectTimer) {
10165
+ clearTimeout(_relayReconnectTimer);
10166
+ _relayReconnectTimer = null;
10167
+ }
9646
10168
  if (relayWs) {
9647
10169
  try { relayWs.close(); } catch {}
9648
10170
  relayWs = null;
@@ -9695,6 +10217,86 @@ Explore the project structure and key files to understand what it does, then out
9695
10217
  return;
9696
10218
  }
9697
10219
 
10220
+ // POST /local/pair-init — host-only (loopback). Generates a 6-digit
10221
+ // single-use code that the mobile can claim via /pair-claim within 5
10222
+ // min. Use this when the Fly relay is unreachable or rate-limited.
10223
+ if (req.method === 'POST' && req.url === '/local/pair-init') {
10224
+ if (!isLoopbackRequest(req)) {
10225
+ res.writeHead(403, { 'Content-Type': 'application/json' });
10226
+ res.end(JSON.stringify({ error: 'Local pairing must be initiated from the host machine (loopback only)' }));
10227
+ return;
10228
+ }
10229
+ pruneLocalPairCodes();
10230
+ const code = generateLocalPairCode();
10231
+ localPairCodes.set(code, { createdAt: Date.now() });
10232
+ log(`[local-pair] issued code (5 min ttl)`);
10233
+ res.writeHead(200, { 'Content-Type': 'application/json' });
10234
+ res.end(JSON.stringify({ code, expiresInSec: Math.floor(LOCAL_PAIR_TTL_MS / 1000) }));
10235
+ return;
10236
+ }
10237
+
10238
+ // POST /pair-claim — Funnel-accessible. Body: { code }. The code IS
10239
+ // the authorization (mirrors the relay /pair model). Brute-force-
10240
+ // resistant via per-IP rate limit on attempts (60 / 5 min) and the
10241
+ // codes themselves being short-lived + single-use.
10242
+ if (req.method === 'POST' && req.url === '/pair-claim') {
10243
+ const ip = getRequestIP(req);
10244
+ if (!checkPairClaimRateLimit(ip)) {
10245
+ log(`[local-pair] rate-limit ip=${ip}`);
10246
+ res.writeHead(429, { 'Content-Type': 'application/json' });
10247
+ res.end(JSON.stringify({ error: 'Too many attempts. Try again in a few minutes.' }));
10248
+ return;
10249
+ }
10250
+ collectRequestBody(req).then(body => {
10251
+ try {
10252
+ pruneLocalPairCodes();
10253
+ const parsed = JSON.parse(body || '{}');
10254
+ const code = typeof parsed.code === 'string' ? parsed.code.trim() : '';
10255
+ if (!/^\d{6}$/.test(code)) {
10256
+ res.writeHead(400, { 'Content-Type': 'application/json' });
10257
+ res.end(JSON.stringify({ error: 'Invalid code format — must be 6 digits' }));
10258
+ return;
10259
+ }
10260
+ const entry = localPairCodes.get(code);
10261
+ if (!entry) {
10262
+ res.writeHead(404, { 'Content-Type': 'application/json' });
10263
+ res.end(JSON.stringify({ error: 'Code not found or expired' }));
10264
+ return;
10265
+ }
10266
+ // Burn first so a slow second claim races and the code can't
10267
+ // be replayed from an HTTP log.
10268
+ localPairCodes.delete(code);
10269
+ const revokedIds = enforceMaxDevicesBeforePairing();
10270
+ const ua = req.headers['user-agent'] || null;
10271
+ const { id: deviceId, token: authToken } = registerDevice({
10272
+ name: 'Mobile Device (local)',
10273
+ userAgent: ua,
10274
+ });
10275
+ broadcast({ type: 'devices_changed', payload: { devices: listDevicesSafe() } });
10276
+ broadcast({ type: 'device_paired', payload: { deviceId, revokedIds, name: 'Mobile Device (local)' } });
10277
+ log(`[local-pair] claimed ip=${ip} device=${deviceId}`);
10278
+ res.writeHead(200, { 'Content-Type': 'application/json' });
10279
+ res.end(JSON.stringify({
10280
+ deviceId,
10281
+ authToken,
10282
+ issuedAt: Date.now(),
10283
+ // Tell the mobile how to reach this backend directly via
10284
+ // the public internet (Tailscale Funnel). Lets the mobile
10285
+ // skip the Fly relay for read paths + WS, even when
10286
+ // loaded from vibing.team. null when Funnel isn't set up.
10287
+ funnelHost: getFunnelHostname(),
10288
+ }));
10289
+ } catch (e) {
10290
+ res.writeHead(400, { 'Content-Type': 'application/json' });
10291
+ res.end(JSON.stringify({ error: e.message || 'Invalid request' }));
10292
+ }
10293
+ }).catch(() => {
10294
+ res.writeHead(413, { 'Content-Type': 'application/json' });
10295
+ res.end(JSON.stringify({ error: 'Request body too large' }));
10296
+ });
10297
+ return;
10298
+ }
10299
+
9698
10300
  // DELETE /devices/:id — revoke a device's auth token
9699
10301
  {
9700
10302
  const m = req.url && req.method === 'DELETE' && req.url.match(/^\/devices\/([a-zA-Z0-9_-]+)$/);
@@ -9723,6 +10325,38 @@ Explore the project structure and key files to understand what it does, then out
9723
10325
  }
9724
10326
  }
9725
10327
 
10328
+ // POST /devices/cleanup — hard-delete revoked + stale devices
10329
+ // Body: { keep?: number } — keep this many most-recently-used unrevoked
10330
+ // devices (default 5). Everything else is removed from disk.
10331
+ if (req.method === 'POST' && req.url === '/devices/cleanup') {
10332
+ collectRequestBody(req).then(body => {
10333
+ try {
10334
+ const { keep = 5 } = (body ? JSON.parse(body) : {}) || {};
10335
+ const all = Array.from(devices.values());
10336
+ const unrevoked = all.filter(d => !d.revoked).sort((a, b) => b.lastSeen - a.lastSeen);
10337
+ const keepIds = new Set(unrevoked.slice(0, Math.max(1, keep)).map(d => d.id));
10338
+ let removed = 0;
10339
+ for (const d of all) {
10340
+ if (!keepIds.has(d.id)) {
10341
+ devices.delete(d.id);
10342
+ removed++;
10343
+ }
10344
+ }
10345
+ saveDevices();
10346
+ broadcast({ type: 'devices_changed', payload: { devices: listDevicesSafe() } });
10347
+ res.writeHead(200, { 'Content-Type': 'application/json' });
10348
+ res.end(JSON.stringify({ success: true, removed, kept: keepIds.size }));
10349
+ } catch (e) {
10350
+ res.writeHead(400, { 'Content-Type': 'application/json' });
10351
+ res.end(JSON.stringify({ error: e.message }));
10352
+ }
10353
+ }).catch(() => {
10354
+ res.writeHead(413, { 'Content-Type': 'application/json' });
10355
+ res.end(JSON.stringify({ error: 'Body too large' }));
10356
+ });
10357
+ return;
10358
+ }
10359
+
9726
10360
  // POST /devices/:id/rename — rename a device
9727
10361
  {
9728
10362
  const m = req.url && req.method === 'POST' && req.url.match(/^\/devices\/([a-zA-Z0-9_-]+)\/rename$/);
@@ -9924,8 +10558,19 @@ function main() {
9924
10558
  watchEventsFile();
9925
10559
  // Create HTTP server
9926
10560
  const httpServer = createServer(handleHttpRequest);
9927
- // Create WebSocket server
9928
- const wss = new WebSocketServer({ server: httpServer });
10561
+ // Create WebSocket server. handleProtocols echoes the first offered
10562
+ // subprotocol back to the client required because mobile clients
10563
+ // pass `vibeteam-auth.<token>` as a subprotocol to authenticate the
10564
+ // upgrade. Without echoing, browsers treat the missing
10565
+ // Sec-WebSocket-Protocol response header as a handshake failure and
10566
+ // close the connection.
10567
+ const wss = new WebSocketServer({
10568
+ server: httpServer,
10569
+ handleProtocols: (protocols /* Set<string> */) => {
10570
+ for (const p of protocols) return p;
10571
+ return false;
10572
+ },
10573
+ });
9929
10574
 
9930
10575
  // WebSocket ping/pong heartbeat: detects "zombie" connections where TCP
9931
10576
  // is dead but Node's `ws` library still reports readyState === OPEN. Without
@@ -9954,6 +10599,36 @@ function main() {
9954
10599
  ws.close(1008, 'Origin not allowed');
9955
10600
  return;
9956
10601
  }
10602
+ // Auth gate for remote-ish connections. We can't trust the TCP
10603
+ // socket's remoteAddress here because Tailscale Funnel proxies
10604
+ // through tailscaled on 127.0.0.1 — it would falsely look like
10605
+ // loopback. Instead mirror the HTTP gate (line ~6740): if the
10606
+ // request came in via Funnel OR the Origin's hostname isn't
10607
+ // localhost, treat it as remote and require deviceAuth via the
10608
+ // Sec-WebSocket-Protocol subprotocol "vibeteam-auth.<token>"
10609
+ // (only header browsers allow on a WS connection).
10610
+ const isFunnelRequest = req.headers['tailscale-funnel-request'] === '?1';
10611
+ let originHost = '';
10612
+ try { originHost = origin ? new URL(origin).hostname.toLowerCase() : ''; } catch {}
10613
+ const isLocalOrigin = originHost === 'localhost' || originHost === '127.0.0.1' || originHost === '::1' || originHost === '0.0.0.0' || originHost === '';
10614
+ const requireAuth = isFunnelRequest || !isLocalOrigin;
10615
+ if (requireAuth) {
10616
+ const proto = ws.protocol || '';
10617
+ if (!proto.startsWith('vibeteam-auth.')) {
10618
+ log(`[ws-auth] Rejected remote WS without auth subprotocol from ${origin}`);
10619
+ ws.close(1008, 'Authentication required (vibeteam-auth.<token> subprotocol)');
10620
+ return;
10621
+ }
10622
+ const token = proto.slice('vibeteam-auth.'.length);
10623
+ const device = validateAuthToken(token);
10624
+ if (!device) {
10625
+ log(`[ws-auth] Invalid token from ${origin}`);
10626
+ ws.close(1008, 'Invalid auth token');
10627
+ return;
10628
+ }
10629
+ ws.deviceId = device.id; // for downstream use / debugging
10630
+ log(`[ws-auth] Accepted device=${device.id} origin=${origin}${isFunnelRequest ? ' via=funnel' : ''}`);
10631
+ }
9957
10632
  ws.isAlive = true;
9958
10633
  ws.on('pong', markAlive);
9959
10634
  clients.add(ws);
@@ -10054,6 +10729,19 @@ function main() {
10054
10729
  log(` Health: http://localhost:${PORT}/health`);
10055
10730
  log(` Stats: http://localhost:${PORT}/stats`);
10056
10731
  log(` Sessions: http://localhost:${PORT}/sessions`);
10732
+ // Restore relay session if we shut down cleanly. The relay accepts
10733
+ // host reconnect with a 24h sessionToken — if the saved token is
10734
+ // still valid, the mobile transparently re-pairs with no user
10735
+ // interaction. If the token is gone or rejected, we silently fall
10736
+ // back to "needs pairing" (relay-status pill goes red on mobile).
10737
+ const savedRelay = readRelayState();
10738
+ if (savedRelay?.sessionToken) {
10739
+ log(`[relay] found saved sessionToken on disk; attempting auto-reconnect`);
10740
+ // Slight delay so the listen log lines stay ordered + log() is
10741
+ // ready before reconnect noise.
10742
+ setTimeout(connectRelayWithSavedToken, 500).unref();
10743
+ }
10744
+
10057
10745
  // Start token polling after server is ready
10058
10746
  startTokenPolling();
10059
10747
  // Start permission prompt polling