npm - loukai-app - Versions diffs - 0.3.0 - Mend

loukai-app 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (290) hide show

package/src/main/creator/lrclibService.js ADDED Viewed

@@ -0,0 +1,340 @@
+/**
+ * LRCLIB Service - Lyrics lookup from lrclib.net
+ *
+ * Provides:
+ * - Lyrics search by title/artist
+ * - Vocabulary extraction for Whisper hints
+ * - Synced lyrics (LRC format) when available
+ */
+const LRCLIB_API_BASE = 'https://lrclib.net/api';
+// Common words to filter out of vocabulary hints
+const COMMON_WORDS = new Set([
+  'this',
+  'that',
+  'with',
+  'will',
+  'were',
+  'when',
+  'where',
+  'what',
+  'they',
+  'them',
+  'then',
+  'than',
+  'like',
+  'just',
+  'have',
+  'from',
+  'been',
+  'your',
+  'come',
+  'said',
+  'would',
+  'could',
+  'should',
+  'there',
+  'their',
+  'these',
+  'those',
+  'through',
+  'before',
+  'after',
+  'about',
+  'dont',
+  'cant',
+  'wont',
+  'isnt',
+  'arent',
+  'wasnt',
+  'werent',
+  'doesnt',
+]);
+/**
+ * Search LRCLIB for lyrics
+ * @param {string} title - Song title
+ * @param {string} artist - Artist name
+ * @returns {Promise<Object|null>} Lyrics result or null
+ */
+export async function searchLyrics(title, artist) {
+  if (!title) {
+    return null;
+  }
+  try {
+    const params = new URLSearchParams({
+      track_name: title,
+    });
+    if (artist) {
+      params.set('artist_name', artist);
+    }
+    const url = `${LRCLIB_API_BASE}/search?${params}`;
+    console.log(`Searching LRCLIB for: ${title} by ${artist || 'unknown'}`);
+    const response = await fetch(url, {
+      headers: {
+        'User-Agent': 'Loukai/1.0',
+      },
+      signal: AbortSignal.timeout(10000),
+    });
+    if (!response.ok) {
+      console.warn(`LRCLIB search failed: ${response.status}`);
+      return null;
+    }
+    const results = await response.json();
+    if (!results || results.length === 0) {
+      console.warn('No lyrics found on LRCLIB');
+      return null;
+    }
+    // Find first non-instrumental result with plain lyrics
+    for (const result of results) {
+      if (!result.instrumental && result.plainLyrics) {
+        console.log(
+          `Found lyrics: ${result.name || 'Unknown'} from ${result.albumName || 'Unknown'}`
+        );
+        return {
+          id: result.id,
+          name: result.name,
+          artist: result.artistName,
+          album: result.albumName,
+          duration: result.duration,
+          plainLyrics: result.plainLyrics,
+          syncedLyrics: result.syncedLyrics || null,
+        };
+      }
+    }
+    console.warn('No suitable lyrics found (all instrumental or missing plainLyrics)');
+    return null;
+  } catch (error) {
+    console.error('Failed to fetch lyrics from LRCLIB:', error.message);
+    return null;
+  }
+}
+/**
+ * Get lyrics by LRCLIB ID
+ * @param {number} id - LRCLIB track ID
+ * @returns {Promise<Object|null>} Lyrics result or null
+ */
+export async function getLyricsById(id) {
+  try {
+    const url = `${LRCLIB_API_BASE}/get/${id}`;
+    console.log(`Fetching LRCLIB track: ${id}`);
+    const response = await fetch(url, {
+      headers: {
+        'User-Agent': 'Loukai/1.0',
+      },
+      signal: AbortSignal.timeout(10000),
+    });
+    if (!response.ok) {
+      console.warn(`LRCLIB get failed: ${response.status}`);
+      return null;
+    }
+    const result = await response.json();
+    if (result.instrumental) {
+      console.warn('Track is marked as instrumental');
+      return null;
+    }
+    if (!result.plainLyrics) {
+      console.warn('No plain lyrics in response');
+      return null;
+    }
+    return {
+      id: result.id,
+      name: result.name,
+      artist: result.artistName,
+      album: result.albumName,
+      duration: result.duration,
+      plainLyrics: result.plainLyrics,
+      syncedLyrics: result.syncedLyrics || null,
+    };
+  } catch (error) {
+    console.error('Failed to fetch lyrics by ID:', error.message);
+    return null;
+  }
+}
+/**
+ * Extract vocabulary hints from lyrics for Whisper context
+ *
+ * @param {string} lyrics - Full lyrics text
+ * @param {number} maxTokens - Maximum tokens for vocabulary hints (default 150)
+ * @returns {string} Comma-separated list of vocabulary words
+ */
+export function extractVocabularyHints(lyrics, maxTokens = 150) {
+  if (!lyrics) {
+    return '';
+  }
+  // Keep only letters (English + common accented characters)
+  const wordsOnly = lyrics.replace(/[^a-zA-ZáéíóúñüÁÉÍÓÚÑÜ\s]/g, ' ');
+  // Split into words, filter meaningful ones (> 3 chars)
+  const words = wordsOnly
+    .split(/\s+/)
+    .map((w) => w.toLowerCase())
+    .filter((w) => w.length > 3);
+  // Count word frequency with boost for opening words
+  const wordCounts = new Map();
+  words.forEach((word, i) => {
+    if (!COMMON_WORDS.has(word)) {
+      let count = (wordCounts.get(word) || 0) + 1;
+      // Boost first 3 meaningful words
+      if (i < 3) {
+        count += 1;
+      }
+      wordCounts.set(word, count);
+    }
+  });
+  // Get words with at least 2 occurrences (frequent)
+  const frequentWords = [...wordCounts.entries()]
+    .filter(([_word, count]) => count >= 2)
+    .sort((a, b) => b[1] - a[1] || a[0].localeCompare(b[0]))
+    .map(([word]) => word);
+  // Build candidate list
+  const candidates = [...frequentWords];
+  // Add single-occurrence words if we have room
+  if (frequentWords.length < 15) {
+    const singleWords = [...wordCounts.entries()]
+      .filter(([_word, count]) => count === 1)
+      .map(([word]) => word)
+      .sort();
+    const remaining = 15 - candidates.length;
+    candidates.push(...singleWords.slice(0, remaining));
+  }
+  // Build vocabulary list respecting token budget
+  const selectedWords = [];
+  let estimatedTokens = 0;
+  for (const word of candidates) {
+    // Rough estimate: 1 token per 4 characters + 1 for separator
+    const wordTokens = Math.ceil(word.length / 4) + 1;
+    if (estimatedTokens + wordTokens <= maxTokens) {
+      selectedWords.push(word);
+      estimatedTokens += wordTokens;
+    } else {
+      break;
+    }
+  }
+  return selectedWords.join(', ');
+}
+/**
+ * Prepare Whisper context with LRCLIB vocabulary enhancement
+ *
+ * @param {string} title - Song title
+ * @param {string} artist - Artist name
+ * @param {string} existingLyrics - Optional pre-fetched lyrics
+ * @returns {Promise<Object>} Object with initialPrompt and lyrics
+ */
+export async function prepareWhisperContext(title, artist, existingLyrics = null) {
+  let lyrics = existingLyrics;
+  // Fetch lyrics if not provided
+  if (!lyrics) {
+    const result = await searchLyrics(title, artist);
+    lyrics = result?.plainLyrics || null;
+  }
+  // Build initial prompt
+  let initialPrompt = null;
+  if (lyrics) {
+    // Calculate available tokens for vocabulary hints
+    // Whisper limit: 224 tokens total
+    // Reserve 30 tokens for safety buffer
+    const basePrompt = title ? `${title}. ` : '';
+    const baseTokens = Math.ceil(basePrompt.length / 4) + 2; // +2 for safety
+    const safetyBuffer = 30;
+    const maxVocabTokens = 224 - baseTokens - safetyBuffer;
+    // Extract vocabulary hints
+    const vocabularyHints = extractVocabularyHints(lyrics, maxVocabTokens);
+    if (vocabularyHints) {
+      initialPrompt = `${title}. ${vocabularyHints}`;
+      console.log(`Whisper initial prompt: ${initialPrompt.substring(0, 100)}...`);
+    } else {
+      initialPrompt = title;
+    }
+  } else if (title) {
+    initialPrompt = title;
+  } else if (artist) {
+    initialPrompt = artist;
+  }
+  return {
+    initialPrompt,
+    lyrics,
+    hasLyrics: Boolean(lyrics),
+  };
+}
+/**
+ * Parse synced lyrics (LRC format) into timed segments
+ *
+ * @param {string} syncedLyrics - LRC format lyrics
+ * @returns {Array<{time: number, text: string}>} Array of timed lyrics
+ */
+export function parseSyncedLyrics(syncedLyrics) {
+  if (!syncedLyrics) {
+    return [];
+  }
+  const lines = syncedLyrics.split('\n');
+  const result = [];
+  // LRC format: [mm:ss.xx]lyrics
+  const timeRegex = /\[(\d{2}):(\d{2})\.(\d{2,3})\]/;
+  for (const line of lines) {
+    const match = line.match(timeRegex);
+    if (match) {
+      const minutes = parseInt(match[1], 10);
+      const seconds = parseInt(match[2], 10);
+      const hundredths = parseInt(match[3].padEnd(3, '0').slice(0, 3), 10);
+      const time = minutes * 60 + seconds + hundredths / 1000;
+      const text = line.replace(timeRegex, '').trim();
+      if (text) {
+        result.push({ time, text });
+      }
+    }
+  }
+  return result.sort((a, b) => a.time - b.time);
+}
+export default {
+  searchLyrics,
+  getLyricsById,
+  extractVocabularyHints,
+  prepareWhisperContext,
+  parseSyncedLyrics,
+};

package/src/main/creator/python/crepe_runner.py ADDED Viewed

@@ -0,0 +1,189 @@
+#!/usr/bin/env python3
+"""
+CREPE Runner - Pitch detection for Loukai Creator
+Usage: python crepe_runner.py '{"input": "path/to/vocals.wav", "output": "path/to/pitch.json"}'
+Detects pitch (F0) from vocal audio for karaoke scoring.
+Outputs pitch data as JSON to stdout.
+Progress updates are sent to stderr in format: PROGRESS:percent:message
+"""
+import json
+import sys
+import os
+def progress(percent, message):
+    """Send progress update to stderr"""
+    print(f"PROGRESS:{percent}:{message}", file=sys.stderr, flush=True)
+def main():
+    if len(sys.argv) < 2:
+        print(json.dumps({"error": "Missing arguments"}))
+        sys.exit(1)
+    try:
+        args = json.loads(sys.argv[1])
+    except json.JSONDecodeError as e:
+        print(json.dumps({"error": f"Invalid JSON arguments: {e}"}))
+        sys.exit(1)
+    input_path = args.get("input")
+    output_path = args.get("output")
+    hop_length = args.get("hop_length", 512)  # ~11.6ms at 44100 Hz
+    model_capacity = args.get("model", "tiny")  # 'tiny', 'small', 'medium', 'large', 'full' - tiny is fast and accurate enough
+    if not input_path:
+        print(json.dumps({"error": "Missing input path"}))
+        sys.exit(1)
+    try:
+        import torch
+        import torchaudio
+        import torchcrepe
+        import numpy as np
+        # Detect device (CREPE has issues with MPS viterbi decoder, use CPU)
+        if torch.cuda.is_available():
+            device = "cuda"
+            device_name = torch.cuda.get_device_name(0)
+        else:
+            # Force CPU even on Apple Silicon (CREPE's viterbi decoder hangs on MPS)
+            device = "cpu"
+            device_name = "CPU"
+        progress(0, f"Loading vocal audio on {device_name}")
+        # Load audio using soundfile (avoids torchcodec requirement)
+        import soundfile as sf
+        audio_np, sample_rate = sf.read(input_path, always_2d=True)
+        # Convert to torch tensor and transpose to [channels, samples]
+        audio = torch.from_numpy(audio_np.T).float()
+        duration = audio.shape[1] / sample_rate
+        progress(5, f"Loaded {duration:.1f}s of audio")
+        # Convert to mono if stereo
+        if audio.shape[0] > 1:
+            audio = audio.mean(dim=0, keepdim=True)
+            progress(8, "Converted to mono")
+        # Resample to 16kHz (CREPE's expected sample rate)
+        if sample_rate != 16000:
+            progress(10, f"Resampling from {sample_rate}Hz to 16kHz")
+            import torchaudio.functional
+            # Resample on CPU to avoid MPS float64 issues
+            audio = torchaudio.functional.resample(audio, sample_rate, 16000)
+            sample_rate = 16000
+        audio = audio.to(device)
+        progress(15, f"🎵 Detecting pitch ({model_capacity} model)...")
+        # Run CREPE
+        # Returns: (pitch, periodicity) - periodicity is confidence-like (0-1)
+        import time
+        start_time = time.time()
+        frequency, periodicity = torchcrepe.predict(
+            audio,
+            sample_rate,
+            hop_length=hop_length,
+            model=model_capacity,
+            device=device,
+            return_periodicity=True,
+            batch_size=2048,
+            decoder=torchcrepe.decode.argmax  # Use argmax instead of viterbi (viterbi hangs on MPS)
+        )
+        elapsed_time = time.time() - start_time
+        progress(75, f"Processing pitch data (CREPE took {elapsed_time:.1f}s for {duration:.1f}s audio)")
+        print(f"⏱️ CREPE timing: {elapsed_time:.1f}s for {duration:.1f}s of audio ({elapsed_time/duration:.2f}x realtime)", file=sys.stderr, flush=True)
+        # Convert to numpy
+        frequency = frequency.cpu().numpy().flatten()
+        confidence = periodicity.cpu().numpy().flatten()  # periodicity is the confidence
+        # Compute time array from hop_length
+        num_frames = len(frequency)
+        time = np.arange(num_frames) * hop_length / sample_rate
+        # Calculate stats
+        valid_frames = (frequency > 0) & (confidence > 0.5)
+        voiced_percent = (valid_frames.sum() / len(frequency)) * 100
+        avg_confidence = confidence[valid_frames].mean() if valid_frames.any() else 0
+        progress(80, f"Found pitch in {voiced_percent:.0f}% of frames")
+        # Filter out low confidence predictions
+        # Set frequency to 0 where confidence is low
+        frequency[confidence < 0.5] = 0
+        # Convert frequency to MIDI note numbers for easier use
+        # MIDI = 69 + 12 * log2(f/440)
+        midi = np.zeros_like(frequency)
+        valid = frequency > 0
+        midi[valid] = 69 + 12 * np.log2(frequency[valid] / 440.0)
+        # Calculate vocal range
+        if valid.any():
+            min_midi = midi[valid].min()
+            max_midi = midi[valid].max()
+            range_semitones = max_midi - min_midi
+            progress(85, f"Vocal range: {range_semitones:.0f} semitones")
+        else:
+            progress(85, "No pitched vocals detected")
+        # Downsample for storage efficiency (keep every Nth point)
+        # Original is ~86 fps, downsample to ~20 fps
+        downsample_factor = 4
+        time_ds = time[::downsample_factor].tolist()
+        frequency_ds = frequency[::downsample_factor].tolist()
+        midi_ds = midi[::downsample_factor].tolist()
+        confidence_ds = confidence[::downsample_factor].tolist()
+        progress(90, f"Downsampled to {len(time_ds)} points")
+        # Build output
+        pitch_data = {
+            "time": [round(t, 4) for t in time_ds],
+            "frequency": [round(f, 2) if f > 0 else 0 for f in frequency_ds],
+            "midi": [round(m, 2) if m > 0 else 0 for m in midi_ds],
+            "confidence": [round(c, 3) for c in confidence_ds],
+            "sample_rate": sample_rate,
+            "hop_length": hop_length * downsample_factor,
+            "model": model_capacity
+        }
+        # Save to file if output path specified
+        if output_path:
+            progress(95, "Saving pitch data")
+            with open(output_path, 'w') as f:
+                json.dump(pitch_data, f)
+        progress(100, f"✓ Pitch detection complete ({len(time_ds)} points)")
+        # Output result
+        result = {
+            "success": True,
+            "num_frames": len(time_ds),
+            "duration": float(time[-1]) if len(time) > 0 else 0,
+            "device": device,
+            "voiced_percent": round(voiced_percent, 1),
+            "avg_confidence": round(float(avg_confidence), 3),
+            "pitch_data": pitch_data if not output_path else None,
+            "output_file": output_path
+        }
+        print(json.dumps(result))
+    except Exception as e:
+        import traceback
+        print(json.dumps({
+            "error": str(e),
+            "traceback": traceback.format_exc()
+        }))
+        sys.exit(1)
+if __name__ == "__main__":
+    main()

package/src/main/creator/python/demucs_runner.py ADDED Viewed

@@ -0,0 +1,158 @@
+#!/usr/bin/env python3
+"""
+Demucs Runner - Stem separation for Loukai Creator
+Usage: python demucs_runner.py '{"input": "path/to/audio.wav", "output_dir": "path/to/output", "model": "htdemucs_ft"}'
+Outputs stems as WAV files and prints JSON result to stdout.
+Progress updates are sent to stderr in format: PROGRESS:percent:message
+tqdm progress bars are also output to stderr and parsed by Node.js
+"""
+import json
+import sys
+from pathlib import Path
+def progress(percent, message):
+    """Send progress update to stderr"""
+    print(f"PROGRESS:{percent}:{message}", file=sys.stderr, flush=True)
+def main():
+    if len(sys.argv) < 2:
+        print(json.dumps({"error": "Missing arguments"}))
+        sys.exit(1)
+    try:
+        args = json.loads(sys.argv[1])
+    except json.JSONDecodeError as e:
+        print(json.dumps({"error": f"Invalid JSON arguments: {e}"}))
+        sys.exit(1)
+    input_path = args.get("input")
+    output_dir = args.get("output_dir")
+    model_name = args.get("model", "htdemucs_ft")
+    num_stems = args.get("num_stems", 4)
+    if not input_path or not output_dir:
+        print(json.dumps({"error": "Missing input or output_dir"}))
+        sys.exit(1)
+    try:
+        import torch
+        import torchaudio
+        from demucs.pretrained import get_model
+        from demucs.apply import apply_model
+        from demucs.audio import convert_audio
+        # Detect device
+        if torch.cuda.is_available():
+            device = "cuda"
+            device_name = torch.cuda.get_device_name(0)
+        elif hasattr(torch.backends, 'mps') and torch.backends.mps.is_available():
+            device = "mps"
+            device_name = "Apple Silicon GPU"
+        else:
+            device = "cpu"
+            device_name = "CPU"
+        progress(0, f"Loading model on {device_name}")
+        # Load model
+        model = get_model(model_name)
+        model.to(device)
+        model.eval()
+        source_names = model.sources
+        stem_labels = {
+            'drums': '🥁 Drums',
+            'bass': '🎸 Bass',
+            'other': '🎹 Other',
+            'vocals': '🎤 Vocals',
+            'no_vocals': '🎵 Instrumental',
+        }
+        progress(5, "Loading audio file")
+        # Load audio using soundfile (avoids torchcodec requirement)
+        import soundfile as sf
+        audio_np, sample_rate = sf.read(input_path, always_2d=True)
+        # Convert to torch tensor and transpose to [channels, samples]
+        audio = torch.from_numpy(audio_np.T).float()
+        duration = audio.shape[1] / sample_rate
+        progress(8, f"Loaded {duration:.1f}s audio")
+        # Convert to model format and move to device
+        audio = convert_audio(
+            audio.unsqueeze(0),
+            sample_rate,
+            model.samplerate,
+            model.audio_channels
+        ).to(device)
+        stems_str = " + ".join(stem_labels.get(s, s) for s in source_names)
+        progress(10, f"Separating {stems_str}")
+        # Run separation with tqdm progress (parsed by Node.js)
+        with torch.no_grad():
+            sources = apply_model(
+                model,
+                audio,
+                device=device,
+                shifts=1,
+                split=True,
+                overlap=0.25,
+                progress=True  # tqdm output goes to stderr
+            )
+        progress(82, "Separation complete!")
+        # Resample if needed
+        if model.samplerate != sample_rate:
+            progress(83, f"Resampling to {sample_rate}Hz")
+            import torchaudio.functional
+            sources = torchaudio.functional.resample(
+                sources.squeeze(0),
+                model.samplerate,
+                sample_rate
+            ).unsqueeze(0)
+        # Save stems
+        output_path = Path(output_dir)
+        output_path.mkdir(parents=True, exist_ok=True)
+        stem_files = {}
+        num_sources = len(source_names)
+        for i, name in enumerate(source_names):
+            stem_progress = int(85 + (i / num_sources) * 14)
+            label = stem_labels.get(name, name.capitalize())
+            progress(stem_progress, f"Saving {label}")
+            stem_audio = sources[0, i].cpu()
+            stem_path = output_path / f"{name}.wav"
+            # Save using soundfile (avoids torchcodec requirement)
+            sf.write(str(stem_path), stem_audio.numpy().T, sample_rate)
+            stem_files[name] = str(stem_path)
+        progress(100, f"✓ Saved {num_sources} stems")
+        print(json.dumps({
+            "success": True,
+            "stems": stem_files,
+            "model": model_name,
+            "device": device,
+            "sample_rate": sample_rate,
+            "duration": duration
+        }))
+    except Exception as e:
+        import traceback
+        print(json.dumps({
+            "error": str(e),
+            "traceback": traceback.format_exc()
+        }))
+        sys.exit(1)
+if __name__ == "__main__":
+    main()