PyPI - nexaai - Versions diffs - 1.0.29__cp310-cp310-macosx_14_0_universal2.whl - Mend

nexaai 1.0.29__cp310-cp310-macosx_14_0_universal2.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (580) hide show

nexaai/binds/metal/py-lib/mlx_audio/stt/utils.py ADDED Viewed

@@ -0,0 +1,195 @@
+# modified
+import importlib
+import json
+import logging
+from pathlib import Path
+from typing import List, Optional
+import mlx.core as mx
+import numpy as np
+import soundfile as sf
+from scipy import signal
+SAMPLE_RATE = 16000
+MODEL_REMAPPING = {}
+MAX_FILE_SIZE_GB = 5
+MODEL_CONVERSION_DTYPES = ["float16", "bfloat16", "float32"]
+def resample_audio(audio: np.ndarray, orig_sr: int, target_sr: int) -> np.ndarray:
+    gcd = np.gcd(orig_sr, target_sr)
+    up = target_sr // gcd
+    down = orig_sr // gcd
+    resampled = signal.resample_poly(audio, up, down, padtype="edge")
+    return resampled
+def load_audio(
+    file: str = Optional[str],
+    sr: int = SAMPLE_RATE,
+    from_stdin=False,
+    dtype: mx.Dtype = mx.float32,
+):
+    """
+    Open an audio file and read as mono waveform, resampling as necessary
+    Parameters
+    ----------
+    file: str
+        The audio file to open
+    sr: int
+        The sample rate to resample the audio if necessary
+    Returns
+    -------
+    A NumPy array containing the audio waveform, in float32 dtype.
+    """
+    audio, sample_rate = sf.read(file, always_2d=True)
+    if sample_rate != sr:
+        audio = resample_audio(audio, sample_rate, sr)
+    return mx.array(audio, dtype=dtype).mean(axis=1)
+def get_model_path(path: str) -> Path:
+    """
+    Ensures the model is available locally. Only works with local paths.
+    Args:
+        path (str): The local path to the model.
+    Returns:
+        Path: The path to the model.
+    Raises:
+        FileNotFoundError: If the local path does not exist.
+    """
+    model_path = Path(path)
+    if not model_path.exists():
+        raise FileNotFoundError(f"Model path '{path}' does not exist locally. Please ensure the model is available at the specified path.")
+    return model_path
+# Get a list of all available model types from the models directory
+def get_available_models():
+    """
+    Get a list of all available TTS model types by scanning the models directory.
+    Returns:
+        List[str]: A list of available model type names
+    """
+    models_dir = Path(__file__).parent / "models"
+    available_models = []
+    if models_dir.exists() and models_dir.is_dir():
+        for item in models_dir.iterdir():
+            if item.is_dir() and not item.name.startswith("__"):
+                available_models.append(item.name)
+    return available_models
+def load_config(model_path: Path) -> dict:
+    """
+    Load the model configuration from config.json.
+    Args:
+        model_path (Path): Path to the model directory.
+    Returns:
+        dict: The model configuration.
+    Raises:
+        FileNotFoundError: If config.json is not found.
+    """
+    try:
+        with open(model_path / "config.json", "r") as f:
+            config = json.load(f)
+    except FileNotFoundError:
+        logging.error(f"Config file not found in {model_path}")
+        raise
+    return config
+def get_model_and_args(model_type: str):
+    """
+    Retrieve the model architecture module based on the model type.
+    Args:
+        model_type (str): The type of model to load (e.g., "whisper", "parakeet").
+    Returns:
+        Tuple[module, str]: A tuple containing:
+            - The imported architecture module
+            - The resolved model_type string after remapping
+    Raises:
+        ValueError: If the model type is not supported (module import fails).
+    """
+    # Check if the model type is in the remapping
+    model_type = MODEL_REMAPPING.get(model_type, model_type)
+    try:
+        arch = importlib.import_module(f"mlx_audio.stt.models.{model_type}")
+    except ImportError:
+        msg = f"Model type {model_type} not supported."
+        logging.error(msg)
+        raise ValueError(msg)
+    return arch, model_type
+def load_model(model_path: str, lazy: bool = False, strict: bool = True, **kwargs):
+    """
+    Load and initialize the model from a given path.
+    Args:
+        model_path (str): The path to load the model from.
+        lazy (bool): If False eval the model parameters to make sure they are
+            loaded in memory before returning, otherwise they will be loaded
+            when needed. Default: ``False``
+    Returns:
+        nn.Module: The loaded and initialized model.
+    Raises:
+        FileNotFoundError: If the weight files (.safetensors) are not found.
+        ValueError: If the model class or args class are not found or cannot be instantiated.
+    """
+    # Convert to Path object for easier handling
+    if isinstance(model_path, str):
+        model_path = Path(model_path)
+    elif not isinstance(model_path, Path):
+        raise ValueError(f"Invalid model path type: {type(model_path)}")
+    # Load configuration to get model_type
+    config = load_config(model_path)
+    model_type = config.get("model_type")
+    if model_type is None:
+        # Fallback: try to infer model_type from the path name
+        directory_name = model_path.name
+        parts = directory_name.split("-")
+        model_class = None
+        for part in parts:
+            try:
+                model_class, model_type = get_model_and_args(part)
+                break
+            except ValueError:
+                continue
+        if model_class is None:
+            raise ValueError(f"Model type not found in config.json at {model_path} and could not be inferred from path name '{directory_name}'")
+    else:
+        model_class, model_type = get_model_and_args(model_type)
+    model = model_class.Model.from_pretrained(model_path)
+    if not lazy:
+        model.eval()
+    return model

nexaai/binds/metal/py-lib/mlx_audio/tts/__init__.py ADDED Viewed

	@@ -0,0 +1 @@
1	+

nexaai/binds/metal/py-lib/mlx_audio/tts/audio_player.py ADDED Viewed

@@ -0,0 +1,120 @@
+import time
+from collections import deque
+from threading import Event, Lock
+import numpy as np
+import sounddevice as sd
+class AudioPlayer:
+    min_buffer_seconds = 1.5  # with respect to real-time, not the sample rate
+    measure_window = 0.25
+    ema_alpha = 0.25
+    def __init__(self, sample_rate=24_000, buffer_size=2048):
+        self.sample_rate = sample_rate
+        self.buffer_size = buffer_size
+        self.audio_buffer = deque()
+        self.buffer_lock = Lock()
+        self.stream: sd.OutputStream | None = None
+        self.playing = False
+        self.drain_event = Event()
+        self.window_sample_count = 0
+        self.window_start = time.perf_counter()
+        self.arrival_rate = sample_rate  # assume real-time to start
+    def callback(self, outdata, frames, time, status):
+        outdata.fill(0)  # initialize the frame with silence
+        filled = 0
+        with self.buffer_lock:
+            while filled < frames and self.audio_buffer:
+                buf = self.audio_buffer[0]
+                to_copy = min(frames - filled, len(buf))
+                outdata[filled : filled + to_copy, 0] = buf[:to_copy]
+                filled += to_copy
+                if to_copy == len(buf):
+                    self.audio_buffer.popleft()
+                else:
+                    self.audio_buffer[0] = buf[to_copy:]
+            if not self.audio_buffer and filled < frames:
+                self.drain_event.set()
+                self.playing = False
+                raise sd.CallbackStop()
+    def start_stream(self):
+        print("Starting audio stream...")
+        self.stream = sd.OutputStream(
+            samplerate=self.sample_rate,
+            channels=1,
+            callback=self.callback,
+            blocksize=self.buffer_size,
+        )
+        self.stream.start()
+        self.playing = True
+        self.drain_event.clear()
+    def stop_stream(self):
+        try:
+            if self.stream:
+                self.stream.stop()
+                self.stream.close()
+        finally:
+            self.stream = None
+            self.playing = False
+    def buffered_samples(self) -> int:
+        return sum(map(len, self.audio_buffer))
+    def queue_audio(self, samples):
+        if not len(samples):
+            return
+        now = time.perf_counter()
+        # arrival-rate statistics
+        self.window_sample_count += len(samples)
+        if now - self.window_start >= self.measure_window:
+            inst_rate = self.window_sample_count / (now - self.window_start)
+            self.arrival_rate = (
+                inst_rate
+                if self.arrival_rate is None
+                else self.ema_alpha * inst_rate
+                + (1 - self.ema_alpha) * self.arrival_rate
+            )
+            self.window_sample_count = 0
+            self.window_start = now
+        with self.buffer_lock:
+            self.audio_buffer.append(np.asarray(samples))
+        # start playback only when we have enough buffered audio
+        needed = int(self.arrival_rate * self.min_buffer_seconds)
+        if not self.playing and self.buffered_samples() >= needed:
+            self.start_stream()
+    def wait_for_drain(self):
+        return self.drain_event.wait()
+    def stop(self):
+        if self.playing:
+            self.wait_for_drain()
+            sd.sleep(100)
+            self.stop_stream()
+            self.playing = False
+    def flush(self):
+        """Discard everything and stop playback immediately."""
+        if not self.playing:
+            return
+        with self.buffer_lock:
+            self.audio_buffer.clear()
+        self.stop_stream()
+        self.playing = False
+        self.drain_event.set()

nexaai/binds/metal/py-lib/mlx_audio/tts/convert.py ADDED Viewed

@@ -0,0 +1,71 @@
+# Copyright © 2023-2024 Prince Canuma
+import argparse
+from .utils import convert
+QUANT_RECIPES = ["mixed_2_6", "mixed_3_4", "mixed_3_6", "mixed_4_6"]
+def configure_parser() -> argparse.ArgumentParser:
+    """
+    Configures and returns the argument parser for the script.
+    Returns:
+        argparse.ArgumentParser: Configured argument parser.
+    """
+    parser = argparse.ArgumentParser(
+        description="Convert Hugging Face model to MLX format"
+    )
+    parser.add_argument("--hf-path", type=str, help="Path to the Hugging Face model.")
+    parser.add_argument(
+        "--mlx-path", type=str, default="mlx_model", help="Path to save the MLX model."
+    )
+    parser.add_argument(
+        "-q", "--quantize", help="Generate a quantized model.", action="store_true"
+    )
+    parser.add_argument(
+        "--q-group-size", help="Group size for quantization.", type=int, default=64
+    )
+    parser.add_argument(
+        "--q-bits", help="Bits per weight for quantization.", type=int, default=4
+    )
+    parser.add_argument(
+        "--quant-predicate",
+        help=f"Mixed-bit quantization recipe.",
+        choices=QUANT_RECIPES,
+        type=str,
+        required=False,
+    )
+    parser.add_argument(
+        "--dtype",
+        help="Type to save the parameters, ignored if -q is given.",
+        type=str,
+        choices=["float16", "bfloat16", "float32"],
+        default="float16",
+    )
+    parser.add_argument(
+        "--upload-repo",
+        help="The Hugging Face repo to upload the model to.",
+        type=str,
+        default=None,
+    )
+    parser.add_argument(
+        "-d",
+        "--dequantize",
+        help="Dequantize a quantized model.",
+        action="store_true",
+        default=False,
+    )
+    return parser
+def main():
+    parser = configure_parser()
+    args = parser.parse_args()
+    convert(**vars(args))
+if __name__ == "__main__":
+    main()