nexaai 1.0.21rc5__cp313-cp313-win_arm64.whl → 1.0.21rc14__cp313-cp313-win_arm64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of nexaai might be problematic. Click here for more details.
- nexaai/__init__.py +95 -95
- nexaai/_stub.cp313-win_arm64.pyd +0 -0
- nexaai/_version.py +4 -1
- nexaai/asr.py +68 -65
- nexaai/asr_impl/mlx_asr_impl.py +92 -92
- nexaai/asr_impl/pybind_asr_impl.py +127 -44
- nexaai/base.py +39 -39
- nexaai/binds/__init__.py +6 -5
- nexaai/binds/asr_bind.cp313-win_arm64.pyd +0 -0
- nexaai/binds/common_bind.cp313-win_arm64.pyd +0 -0
- nexaai/binds/cpu_gpu/ggml-base.dll +0 -0
- nexaai/binds/cpu_gpu/ggml-cpu.dll +0 -0
- nexaai/binds/cpu_gpu/ggml-opencl.dll +0 -0
- nexaai/binds/cpu_gpu/ggml.dll +0 -0
- nexaai/binds/cpu_gpu/mtmd.dll +0 -0
- nexaai/binds/cpu_gpu/nexa_cpu_gpu.dll +0 -0
- nexaai/binds/cpu_gpu/nexa_plugin.dll +0 -0
- nexaai/binds/embedder_bind.cp313-win_arm64.pyd +0 -0
- nexaai/binds/libcrypto-3-arm64.dll +0 -0
- nexaai/binds/libssl-3-arm64.dll +0 -0
- nexaai/binds/llm_bind.cp313-win_arm64.pyd +0 -0
- nexaai/binds/nexa_bridge.dll +0 -0
- nexaai/binds/npu/convnext-sdk.dll +0 -0
- nexaai/binds/npu/embed-gemma-sdk.dll +0 -0
- nexaai/binds/npu/ggml-base.dll +0 -0
- nexaai/binds/npu/ggml-cpu.dll +0 -0
- nexaai/binds/npu/ggml-opencl.dll +0 -0
- nexaai/binds/npu/ggml.dll +0 -0
- nexaai/binds/npu/granite-nano-sdk.dll +0 -0
- nexaai/binds/npu/granite4-sdk.dll +0 -0
- nexaai/binds/npu/jina-rerank-sdk.dll +0 -0
- nexaai/binds/npu/liquid-sdk.dll +0 -0
- nexaai/binds/npu/llama3-3b-sdk.dll +0 -0
- nexaai/binds/npu/nexa-mm-process.dll +0 -0
- nexaai/binds/npu/nexa-sampling.dll +0 -0
- nexaai/binds/npu/nexa_plugin.dll +0 -0
- nexaai/binds/npu/omni-neural-sdk.dll +0 -0
- nexaai/binds/npu/openblas.dll +0 -0
- nexaai/binds/npu/paddleocr-sdk.dll +0 -0
- nexaai/binds/npu/parakeet-sdk.dll +0 -0
- nexaai/binds/npu/phi3-5-sdk.dll +0 -0
- nexaai/binds/npu/phi4-sdk.dll +0 -0
- nexaai/binds/npu/pyannote-sdk.dll +0 -0
- nexaai/binds/npu/qwen3-4b-sdk.dll +0 -0
- nexaai/binds/npu/qwen3vl-sdk.dll +0 -0
- nexaai/binds/npu/qwen3vl-vision.dll +0 -0
- nexaai/binds/npu/yolov12-sdk.dll +0 -0
- nexaai/binds/npu/zlib1.dll +0 -0
- nexaai/binds/rerank_bind.cp313-win_arm64.pyd +0 -0
- nexaai/binds/vlm_bind.cp313-win_arm64.pyd +0 -0
- nexaai/common.py +105 -105
- nexaai/cv.py +93 -93
- nexaai/cv_impl/mlx_cv_impl.py +89 -89
- nexaai/cv_impl/pybind_cv_impl.py +32 -32
- nexaai/embedder.py +73 -73
- nexaai/embedder_impl/mlx_embedder_impl.py +118 -118
- nexaai/embedder_impl/pybind_embedder_impl.py +96 -96
- nexaai/image_gen.py +141 -141
- nexaai/image_gen_impl/mlx_image_gen_impl.py +292 -292
- nexaai/image_gen_impl/pybind_image_gen_impl.py +85 -85
- nexaai/llm.py +98 -98
- nexaai/llm_impl/mlx_llm_impl.py +271 -271
- nexaai/llm_impl/pybind_llm_impl.py +220 -220
- nexaai/log.py +92 -92
- nexaai/rerank.py +57 -57
- nexaai/rerank_impl/mlx_rerank_impl.py +94 -94
- nexaai/rerank_impl/pybind_rerank_impl.py +136 -136
- nexaai/runtime.py +68 -68
- nexaai/runtime_error.py +24 -24
- nexaai/tts.py +75 -75
- nexaai/tts_impl/mlx_tts_impl.py +94 -94
- nexaai/tts_impl/pybind_tts_impl.py +43 -43
- nexaai/utils/decode.py +17 -17
- nexaai/utils/manifest_utils.py +531 -531
- nexaai/utils/model_manager.py +1562 -1562
- nexaai/utils/model_types.py +49 -49
- nexaai/utils/progress_tracker.py +384 -384
- nexaai/utils/quantization_utils.py +245 -245
- nexaai/vlm.py +129 -129
- nexaai/vlm_impl/mlx_vlm_impl.py +258 -258
- nexaai/vlm_impl/pybind_vlm_impl.py +256 -256
- {nexaai-1.0.21rc5.dist-info → nexaai-1.0.21rc14.dist-info}/METADATA +1 -1
- nexaai-1.0.21rc14.dist-info/RECORD +154 -0
- nexaai/binds/nexaml/FLAC.dll +0 -0
- nexaai/binds/nexaml/fftw3.dll +0 -0
- nexaai/binds/nexaml/fftw3f.dll +0 -0
- nexaai/binds/nexaml/ggml-base.dll +0 -0
- nexaai/binds/nexaml/ggml-cpu.dll +0 -0
- nexaai/binds/nexaml/ggml-opencl.dll +0 -0
- nexaai/binds/nexaml/ggml.dll +0 -0
- nexaai/binds/nexaml/libmp3lame.DLL +0 -0
- nexaai/binds/nexaml/mpg123.dll +0 -0
- nexaai/binds/nexaml/nexa-mm-process.dll +0 -0
- nexaai/binds/nexaml/nexa-sampling.dll +0 -0
- nexaai/binds/nexaml/nexa_plugin.dll +0 -0
- nexaai/binds/nexaml/nexaproc.dll +0 -0
- nexaai/binds/nexaml/ogg.dll +0 -0
- nexaai/binds/nexaml/opus.dll +0 -0
- nexaai/binds/nexaml/qwen3-vl.dll +0 -0
- nexaai/binds/nexaml/qwen3vl-vision.dll +0 -0
- nexaai/binds/nexaml/vorbis.dll +0 -0
- nexaai/binds/nexaml/vorbisenc.dll +0 -0
- nexaai-1.0.21rc5.dist-info/RECORD +0 -162
- {nexaai-1.0.21rc5.dist-info → nexaai-1.0.21rc14.dist-info}/WHEEL +0 -0
- {nexaai-1.0.21rc5.dist-info → nexaai-1.0.21rc14.dist-info}/top_level.txt +0 -0
nexaai/runtime.py
CHANGED
|
@@ -1,68 +1,68 @@
|
|
|
1
|
-
from __future__ import annotations
|
|
2
|
-
import atexit
|
|
3
|
-
import threading
|
|
4
|
-
from typing import Optional, Any
|
|
5
|
-
|
|
6
|
-
from nexaai.binds import common_bind
|
|
7
|
-
|
|
8
|
-
_init_lock = threading.Lock()
|
|
9
|
-
_runtime_alive = False # global flag
|
|
10
|
-
|
|
11
|
-
def _ensure_runtime() -> None:
|
|
12
|
-
"""Initialise the runtime exactly once (thread‑safe, lazy)."""
|
|
13
|
-
global _runtime_alive
|
|
14
|
-
if not _runtime_alive:
|
|
15
|
-
with _init_lock:
|
|
16
|
-
if not _runtime_alive: # double‑checked locking
|
|
17
|
-
common_bind.ml_init()
|
|
18
|
-
_runtime_alive = True
|
|
19
|
-
atexit.register(_shutdown_runtime)
|
|
20
|
-
|
|
21
|
-
def _shutdown_runtime() -> None:
|
|
22
|
-
"""Tear the runtime down; idempotent and registered with atexit."""
|
|
23
|
-
global _runtime_alive
|
|
24
|
-
if _runtime_alive:
|
|
25
|
-
common_bind.ml_deinit()
|
|
26
|
-
_runtime_alive = False
|
|
27
|
-
|
|
28
|
-
# Public helper so advanced users can reclaim memory on demand
|
|
29
|
-
shutdown = _shutdown_runtime
|
|
30
|
-
|
|
31
|
-
def is_initialized() -> bool:
|
|
32
|
-
"""Check if the runtime has been initialized."""
|
|
33
|
-
return _runtime_alive
|
|
34
|
-
|
|
35
|
-
# ----------------------------------------------------------------------
|
|
36
|
-
# Single public class
|
|
37
|
-
# ----------------------------------------------------------------------
|
|
38
|
-
class Session:
|
|
39
|
-
"""
|
|
40
|
-
Model session **and** runtime guard in one object.
|
|
41
|
-
|
|
42
|
-
sess = myrt.Session("foo.mdl")
|
|
43
|
-
out = sess.run(inputs)
|
|
44
|
-
sess.close() # optional (model only)
|
|
45
|
-
|
|
46
|
-
The global runtime is initialised lazily when the first Session
|
|
47
|
-
is created and stays alive until:
|
|
48
|
-
• the interpreter exits, or
|
|
49
|
-
• `myrt.shutdown()` is called.
|
|
50
|
-
"""
|
|
51
|
-
|
|
52
|
-
# ---- construction -------------------------------------------------
|
|
53
|
-
def __init__(self, model_path: str) -> None:
|
|
54
|
-
_ensure_runtime()
|
|
55
|
-
|
|
56
|
-
# safety net – make GC close the model
|
|
57
|
-
def __del__(self) -> None:
|
|
58
|
-
try:
|
|
59
|
-
self.close()
|
|
60
|
-
except Exception:
|
|
61
|
-
pass
|
|
62
|
-
|
|
63
|
-
# allow `with Session(...) as s:` syntax
|
|
64
|
-
def __enter__(self) -> "Session":
|
|
65
|
-
return self
|
|
66
|
-
|
|
67
|
-
def __exit__(self, exc_type, exc, tb) -> None:
|
|
68
|
-
self.close()
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
import atexit
|
|
3
|
+
import threading
|
|
4
|
+
from typing import Optional, Any
|
|
5
|
+
|
|
6
|
+
from nexaai.binds import common_bind
|
|
7
|
+
|
|
8
|
+
_init_lock = threading.Lock()
|
|
9
|
+
_runtime_alive = False # global flag
|
|
10
|
+
|
|
11
|
+
def _ensure_runtime() -> None:
|
|
12
|
+
"""Initialise the runtime exactly once (thread‑safe, lazy)."""
|
|
13
|
+
global _runtime_alive
|
|
14
|
+
if not _runtime_alive:
|
|
15
|
+
with _init_lock:
|
|
16
|
+
if not _runtime_alive: # double‑checked locking
|
|
17
|
+
common_bind.ml_init()
|
|
18
|
+
_runtime_alive = True
|
|
19
|
+
atexit.register(_shutdown_runtime)
|
|
20
|
+
|
|
21
|
+
def _shutdown_runtime() -> None:
|
|
22
|
+
"""Tear the runtime down; idempotent and registered with atexit."""
|
|
23
|
+
global _runtime_alive
|
|
24
|
+
if _runtime_alive:
|
|
25
|
+
common_bind.ml_deinit()
|
|
26
|
+
_runtime_alive = False
|
|
27
|
+
|
|
28
|
+
# Public helper so advanced users can reclaim memory on demand
|
|
29
|
+
shutdown = _shutdown_runtime
|
|
30
|
+
|
|
31
|
+
def is_initialized() -> bool:
|
|
32
|
+
"""Check if the runtime has been initialized."""
|
|
33
|
+
return _runtime_alive
|
|
34
|
+
|
|
35
|
+
# ----------------------------------------------------------------------
|
|
36
|
+
# Single public class
|
|
37
|
+
# ----------------------------------------------------------------------
|
|
38
|
+
class Session:
|
|
39
|
+
"""
|
|
40
|
+
Model session **and** runtime guard in one object.
|
|
41
|
+
|
|
42
|
+
sess = myrt.Session("foo.mdl")
|
|
43
|
+
out = sess.run(inputs)
|
|
44
|
+
sess.close() # optional (model only)
|
|
45
|
+
|
|
46
|
+
The global runtime is initialised lazily when the first Session
|
|
47
|
+
is created and stays alive until:
|
|
48
|
+
• the interpreter exits, or
|
|
49
|
+
• `myrt.shutdown()` is called.
|
|
50
|
+
"""
|
|
51
|
+
|
|
52
|
+
# ---- construction -------------------------------------------------
|
|
53
|
+
def __init__(self, model_path: str) -> None:
|
|
54
|
+
_ensure_runtime()
|
|
55
|
+
|
|
56
|
+
# safety net – make GC close the model
|
|
57
|
+
def __del__(self) -> None:
|
|
58
|
+
try:
|
|
59
|
+
self.close()
|
|
60
|
+
except Exception:
|
|
61
|
+
pass
|
|
62
|
+
|
|
63
|
+
# allow `with Session(...) as s:` syntax
|
|
64
|
+
def __enter__(self) -> "Session":
|
|
65
|
+
return self
|
|
66
|
+
|
|
67
|
+
def __exit__(self, exc_type, exc, tb) -> None:
|
|
68
|
+
self.close()
|
nexaai/runtime_error.py
CHANGED
|
@@ -1,24 +1,24 @@
|
|
|
1
|
-
"""Runtime errors for Nexa SDK operations."""
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
class NexaRuntimeError(Exception):
|
|
5
|
-
"""Base class for Nexa runtime errors."""
|
|
6
|
-
|
|
7
|
-
def __init__(self, message: str, error_code: int = None):
|
|
8
|
-
self.error_code = error_code
|
|
9
|
-
super().__init__(message)
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
class ContextLengthExceededError(NexaRuntimeError):
|
|
13
|
-
"""Raised when the input context length exceeds the model's maximum."""
|
|
14
|
-
|
|
15
|
-
def __init__(self, message: str = "Input context length exceeded model's maximum", error_code: int = None):
|
|
16
|
-
super().__init__(message, error_code)
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
class GenerationError(NexaRuntimeError):
|
|
20
|
-
"""Raised when generation fails."""
|
|
21
|
-
|
|
22
|
-
def __init__(self, message: str = "Generation failed", error_code: int = None):
|
|
23
|
-
super().__init__(message, error_code)
|
|
24
|
-
|
|
1
|
+
"""Runtime errors for Nexa SDK operations."""
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
class NexaRuntimeError(Exception):
|
|
5
|
+
"""Base class for Nexa runtime errors."""
|
|
6
|
+
|
|
7
|
+
def __init__(self, message: str, error_code: int = None):
|
|
8
|
+
self.error_code = error_code
|
|
9
|
+
super().__init__(message)
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class ContextLengthExceededError(NexaRuntimeError):
|
|
13
|
+
"""Raised when the input context length exceeds the model's maximum."""
|
|
14
|
+
|
|
15
|
+
def __init__(self, message: str = "Input context length exceeded model's maximum", error_code: int = None):
|
|
16
|
+
super().__init__(message, error_code)
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class GenerationError(NexaRuntimeError):
|
|
20
|
+
"""Raised when generation fails."""
|
|
21
|
+
|
|
22
|
+
def __init__(self, message: str = "Generation failed", error_code: int = None):
|
|
23
|
+
super().__init__(message, error_code)
|
|
24
|
+
|
nexaai/tts.py
CHANGED
|
@@ -1,75 +1,75 @@
|
|
|
1
|
-
from typing import List, Optional, Union
|
|
2
|
-
from abc import abstractmethod
|
|
3
|
-
from dataclasses import dataclass
|
|
4
|
-
|
|
5
|
-
from nexaai.base import BaseModel
|
|
6
|
-
from nexaai.common import PluginID
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
@dataclass
|
|
10
|
-
class TTSConfig:
|
|
11
|
-
"""Configuration for TTS."""
|
|
12
|
-
voice: str = "default"
|
|
13
|
-
speed: float = 1.0
|
|
14
|
-
seed: int = -1 # –1 for random
|
|
15
|
-
sample_rate: int = 22050
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
@dataclass
|
|
19
|
-
class TTSSamplerConfig:
|
|
20
|
-
"""Configuration for TTS sampling."""
|
|
21
|
-
temperature: float = 1.0
|
|
22
|
-
noise_scale: float = 0.667
|
|
23
|
-
length_scale: float = 1.0
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
@dataclass
|
|
27
|
-
class TTSResult:
|
|
28
|
-
"""Result from TTS processing."""
|
|
29
|
-
audio_path: str # Path where the synthesized audio is saved
|
|
30
|
-
duration_seconds: float
|
|
31
|
-
sample_rate: int
|
|
32
|
-
channels: int
|
|
33
|
-
num_samples: int
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
class TTS(BaseModel):
|
|
37
|
-
"""Abstract base class for Text-to-Speech models."""
|
|
38
|
-
|
|
39
|
-
def __init__(self):
|
|
40
|
-
"""Initialize base TTS class."""
|
|
41
|
-
pass
|
|
42
|
-
|
|
43
|
-
@classmethod
|
|
44
|
-
def _load_from(cls,
|
|
45
|
-
model_path: str,
|
|
46
|
-
vocoder_path: str,
|
|
47
|
-
plugin_id: Union[PluginID, str] = PluginID.LLAMA_CPP,
|
|
48
|
-
device_id: Optional[str] = None,
|
|
49
|
-
**kwargs
|
|
50
|
-
) -> 'TTS':
|
|
51
|
-
"""Load TTS model from local path, routing to appropriate implementation."""
|
|
52
|
-
# Check plugin_id value for routing - handle both enum and string
|
|
53
|
-
plugin_value = plugin_id.value if isinstance(plugin_id, PluginID) else plugin_id
|
|
54
|
-
|
|
55
|
-
if plugin_value == "mlx":
|
|
56
|
-
from nexaai.tts_impl.mlx_tts_impl import MLXTTSImpl
|
|
57
|
-
return MLXTTSImpl._load_from(model_path, vocoder_path, plugin_id, device_id)
|
|
58
|
-
else:
|
|
59
|
-
from nexaai.tts_impl.pybind_tts_impl import PyBindTTSImpl
|
|
60
|
-
return PyBindTTSImpl._load_from(model_path, vocoder_path, plugin_id, device_id)
|
|
61
|
-
|
|
62
|
-
@abstractmethod
|
|
63
|
-
def synthesize(
|
|
64
|
-
self,
|
|
65
|
-
text: str,
|
|
66
|
-
config: Optional[TTSConfig] = None,
|
|
67
|
-
output_path: Optional[str] = None,
|
|
68
|
-
) -> TTSResult:
|
|
69
|
-
"""Synthesize speech from text and save to filesystem."""
|
|
70
|
-
pass
|
|
71
|
-
|
|
72
|
-
@abstractmethod
|
|
73
|
-
def list_available_voices(self) -> List[str]:
|
|
74
|
-
"""List available voices."""
|
|
75
|
-
pass
|
|
1
|
+
from typing import List, Optional, Union
|
|
2
|
+
from abc import abstractmethod
|
|
3
|
+
from dataclasses import dataclass
|
|
4
|
+
|
|
5
|
+
from nexaai.base import BaseModel
|
|
6
|
+
from nexaai.common import PluginID
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
@dataclass
|
|
10
|
+
class TTSConfig:
|
|
11
|
+
"""Configuration for TTS."""
|
|
12
|
+
voice: str = "default"
|
|
13
|
+
speed: float = 1.0
|
|
14
|
+
seed: int = -1 # –1 for random
|
|
15
|
+
sample_rate: int = 22050
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
@dataclass
|
|
19
|
+
class TTSSamplerConfig:
|
|
20
|
+
"""Configuration for TTS sampling."""
|
|
21
|
+
temperature: float = 1.0
|
|
22
|
+
noise_scale: float = 0.667
|
|
23
|
+
length_scale: float = 1.0
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
@dataclass
|
|
27
|
+
class TTSResult:
|
|
28
|
+
"""Result from TTS processing."""
|
|
29
|
+
audio_path: str # Path where the synthesized audio is saved
|
|
30
|
+
duration_seconds: float
|
|
31
|
+
sample_rate: int
|
|
32
|
+
channels: int
|
|
33
|
+
num_samples: int
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
class TTS(BaseModel):
|
|
37
|
+
"""Abstract base class for Text-to-Speech models."""
|
|
38
|
+
|
|
39
|
+
def __init__(self):
|
|
40
|
+
"""Initialize base TTS class."""
|
|
41
|
+
pass
|
|
42
|
+
|
|
43
|
+
@classmethod
|
|
44
|
+
def _load_from(cls,
|
|
45
|
+
model_path: str,
|
|
46
|
+
vocoder_path: str,
|
|
47
|
+
plugin_id: Union[PluginID, str] = PluginID.LLAMA_CPP,
|
|
48
|
+
device_id: Optional[str] = None,
|
|
49
|
+
**kwargs
|
|
50
|
+
) -> 'TTS':
|
|
51
|
+
"""Load TTS model from local path, routing to appropriate implementation."""
|
|
52
|
+
# Check plugin_id value for routing - handle both enum and string
|
|
53
|
+
plugin_value = plugin_id.value if isinstance(plugin_id, PluginID) else plugin_id
|
|
54
|
+
|
|
55
|
+
if plugin_value == "mlx":
|
|
56
|
+
from nexaai.tts_impl.mlx_tts_impl import MLXTTSImpl
|
|
57
|
+
return MLXTTSImpl._load_from(model_path, vocoder_path, plugin_id, device_id)
|
|
58
|
+
else:
|
|
59
|
+
from nexaai.tts_impl.pybind_tts_impl import PyBindTTSImpl
|
|
60
|
+
return PyBindTTSImpl._load_from(model_path, vocoder_path, plugin_id, device_id)
|
|
61
|
+
|
|
62
|
+
@abstractmethod
|
|
63
|
+
def synthesize(
|
|
64
|
+
self,
|
|
65
|
+
text: str,
|
|
66
|
+
config: Optional[TTSConfig] = None,
|
|
67
|
+
output_path: Optional[str] = None,
|
|
68
|
+
) -> TTSResult:
|
|
69
|
+
"""Synthesize speech from text and save to filesystem."""
|
|
70
|
+
pass
|
|
71
|
+
|
|
72
|
+
@abstractmethod
|
|
73
|
+
def list_available_voices(self) -> List[str]:
|
|
74
|
+
"""List available voices."""
|
|
75
|
+
pass
|
nexaai/tts_impl/mlx_tts_impl.py
CHANGED
|
@@ -1,94 +1,94 @@
|
|
|
1
|
-
# Note: This code is generated by Cursor, not tested yet.
|
|
2
|
-
|
|
3
|
-
from typing import List, Optional, Union
|
|
4
|
-
import os
|
|
5
|
-
|
|
6
|
-
from nexaai.common import PluginID
|
|
7
|
-
from nexaai.tts import TTS, TTSConfig, TTSResult
|
|
8
|
-
from nexaai.mlx_backend.tts.interface import MlxTts as MLXTTSInterface
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
class MLXTTSImpl(TTS):
|
|
12
|
-
def __init__(self):
|
|
13
|
-
"""Initialize MLX TTS implementation."""
|
|
14
|
-
super().__init__()
|
|
15
|
-
self._mlx_tts = None
|
|
16
|
-
|
|
17
|
-
@classmethod
|
|
18
|
-
def _load_from(cls,
|
|
19
|
-
model_path: str,
|
|
20
|
-
vocoder_path: str,
|
|
21
|
-
plugin_id: Union[PluginID, str] = PluginID.MLX,
|
|
22
|
-
device_id: Optional[str] = None
|
|
23
|
-
) -> 'MLXTTSImpl':
|
|
24
|
-
"""Load TTS model from local path using MLX backend."""
|
|
25
|
-
try:
|
|
26
|
-
# MLX TTS interface is already imported
|
|
27
|
-
|
|
28
|
-
# Create instance and load MLX TTS
|
|
29
|
-
instance = cls()
|
|
30
|
-
instance._mlx_tts = MLXTTSInterface(
|
|
31
|
-
model_path=model_path,
|
|
32
|
-
vocoder_path=vocoder_path,
|
|
33
|
-
device=device_id
|
|
34
|
-
)
|
|
35
|
-
|
|
36
|
-
return instance
|
|
37
|
-
except Exception as e:
|
|
38
|
-
raise RuntimeError(f"Failed to load MLX TTS: {str(e)}")
|
|
39
|
-
|
|
40
|
-
def eject(self):
|
|
41
|
-
"""Destroy the model and free resources."""
|
|
42
|
-
if self._mlx_tts:
|
|
43
|
-
self._mlx_tts.destroy()
|
|
44
|
-
self._mlx_tts = None
|
|
45
|
-
|
|
46
|
-
def synthesize(
|
|
47
|
-
self,
|
|
48
|
-
text: str,
|
|
49
|
-
config: Optional[TTSConfig] = None,
|
|
50
|
-
output_path: Optional[str] = None,
|
|
51
|
-
) -> TTSResult:
|
|
52
|
-
"""Synthesize speech from text and save to filesystem."""
|
|
53
|
-
if not self._mlx_tts:
|
|
54
|
-
raise RuntimeError("MLX TTS not loaded")
|
|
55
|
-
|
|
56
|
-
try:
|
|
57
|
-
# Convert our config to MLX format if provided
|
|
58
|
-
mlx_config = None
|
|
59
|
-
if config:
|
|
60
|
-
from nexaai.mlx_backend.ml import TTSConfig as MLXTTSConfig
|
|
61
|
-
|
|
62
|
-
mlx_config = MLXTTSConfig(
|
|
63
|
-
voice=config.voice,
|
|
64
|
-
speed=config.speed,
|
|
65
|
-
seed=config.seed,
|
|
66
|
-
sample_rate=config.sample_rate
|
|
67
|
-
)
|
|
68
|
-
|
|
69
|
-
# Use MLX TTS synthesis
|
|
70
|
-
result = self._mlx_tts.synthesize(text, mlx_config, output_path)
|
|
71
|
-
|
|
72
|
-
# Convert MLX result to our format
|
|
73
|
-
return TTSResult(
|
|
74
|
-
audio_path=result.audio_path,
|
|
75
|
-
duration_seconds=result.duration_seconds,
|
|
76
|
-
sample_rate=result.sample_rate,
|
|
77
|
-
channels=result.channels,
|
|
78
|
-
num_samples=result.num_samples
|
|
79
|
-
)
|
|
80
|
-
|
|
81
|
-
except Exception as e:
|
|
82
|
-
raise RuntimeError(f"Failed to synthesize speech: {str(e)}")
|
|
83
|
-
|
|
84
|
-
def list_available_voices(self) -> List[str]:
|
|
85
|
-
"""List available voices."""
|
|
86
|
-
if not self._mlx_tts:
|
|
87
|
-
raise RuntimeError("MLX TTS not loaded")
|
|
88
|
-
|
|
89
|
-
try:
|
|
90
|
-
return self._mlx_tts.list_available_voices()
|
|
91
|
-
except Exception as e:
|
|
92
|
-
raise RuntimeError(f"Failed to list available voices: {str(e)}")
|
|
93
|
-
|
|
94
|
-
|
|
1
|
+
# Note: This code is generated by Cursor, not tested yet.
|
|
2
|
+
|
|
3
|
+
from typing import List, Optional, Union
|
|
4
|
+
import os
|
|
5
|
+
|
|
6
|
+
from nexaai.common import PluginID
|
|
7
|
+
from nexaai.tts import TTS, TTSConfig, TTSResult
|
|
8
|
+
from nexaai.mlx_backend.tts.interface import MlxTts as MLXTTSInterface
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class MLXTTSImpl(TTS):
|
|
12
|
+
def __init__(self):
|
|
13
|
+
"""Initialize MLX TTS implementation."""
|
|
14
|
+
super().__init__()
|
|
15
|
+
self._mlx_tts = None
|
|
16
|
+
|
|
17
|
+
@classmethod
|
|
18
|
+
def _load_from(cls,
|
|
19
|
+
model_path: str,
|
|
20
|
+
vocoder_path: str,
|
|
21
|
+
plugin_id: Union[PluginID, str] = PluginID.MLX,
|
|
22
|
+
device_id: Optional[str] = None
|
|
23
|
+
) -> 'MLXTTSImpl':
|
|
24
|
+
"""Load TTS model from local path using MLX backend."""
|
|
25
|
+
try:
|
|
26
|
+
# MLX TTS interface is already imported
|
|
27
|
+
|
|
28
|
+
# Create instance and load MLX TTS
|
|
29
|
+
instance = cls()
|
|
30
|
+
instance._mlx_tts = MLXTTSInterface(
|
|
31
|
+
model_path=model_path,
|
|
32
|
+
vocoder_path=vocoder_path,
|
|
33
|
+
device=device_id
|
|
34
|
+
)
|
|
35
|
+
|
|
36
|
+
return instance
|
|
37
|
+
except Exception as e:
|
|
38
|
+
raise RuntimeError(f"Failed to load MLX TTS: {str(e)}")
|
|
39
|
+
|
|
40
|
+
def eject(self):
|
|
41
|
+
"""Destroy the model and free resources."""
|
|
42
|
+
if self._mlx_tts:
|
|
43
|
+
self._mlx_tts.destroy()
|
|
44
|
+
self._mlx_tts = None
|
|
45
|
+
|
|
46
|
+
def synthesize(
|
|
47
|
+
self,
|
|
48
|
+
text: str,
|
|
49
|
+
config: Optional[TTSConfig] = None,
|
|
50
|
+
output_path: Optional[str] = None,
|
|
51
|
+
) -> TTSResult:
|
|
52
|
+
"""Synthesize speech from text and save to filesystem."""
|
|
53
|
+
if not self._mlx_tts:
|
|
54
|
+
raise RuntimeError("MLX TTS not loaded")
|
|
55
|
+
|
|
56
|
+
try:
|
|
57
|
+
# Convert our config to MLX format if provided
|
|
58
|
+
mlx_config = None
|
|
59
|
+
if config:
|
|
60
|
+
from nexaai.mlx_backend.ml import TTSConfig as MLXTTSConfig
|
|
61
|
+
|
|
62
|
+
mlx_config = MLXTTSConfig(
|
|
63
|
+
voice=config.voice,
|
|
64
|
+
speed=config.speed,
|
|
65
|
+
seed=config.seed,
|
|
66
|
+
sample_rate=config.sample_rate
|
|
67
|
+
)
|
|
68
|
+
|
|
69
|
+
# Use MLX TTS synthesis
|
|
70
|
+
result = self._mlx_tts.synthesize(text, mlx_config, output_path)
|
|
71
|
+
|
|
72
|
+
# Convert MLX result to our format
|
|
73
|
+
return TTSResult(
|
|
74
|
+
audio_path=result.audio_path,
|
|
75
|
+
duration_seconds=result.duration_seconds,
|
|
76
|
+
sample_rate=result.sample_rate,
|
|
77
|
+
channels=result.channels,
|
|
78
|
+
num_samples=result.num_samples
|
|
79
|
+
)
|
|
80
|
+
|
|
81
|
+
except Exception as e:
|
|
82
|
+
raise RuntimeError(f"Failed to synthesize speech: {str(e)}")
|
|
83
|
+
|
|
84
|
+
def list_available_voices(self) -> List[str]:
|
|
85
|
+
"""List available voices."""
|
|
86
|
+
if not self._mlx_tts:
|
|
87
|
+
raise RuntimeError("MLX TTS not loaded")
|
|
88
|
+
|
|
89
|
+
try:
|
|
90
|
+
return self._mlx_tts.list_available_voices()
|
|
91
|
+
except Exception as e:
|
|
92
|
+
raise RuntimeError(f"Failed to list available voices: {str(e)}")
|
|
93
|
+
|
|
94
|
+
|
|
@@ -1,43 +1,43 @@
|
|
|
1
|
-
from typing import List, Optional, Union
|
|
2
|
-
|
|
3
|
-
from nexaai.common import PluginID
|
|
4
|
-
from nexaai.tts import TTS, TTSConfig, TTSResult
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
class PyBindTTSImpl(TTS):
|
|
8
|
-
def __init__(self):
|
|
9
|
-
"""Initialize PyBind TTS implementation."""
|
|
10
|
-
super().__init__()
|
|
11
|
-
# TODO: Add PyBind-specific initialization
|
|
12
|
-
|
|
13
|
-
@classmethod
|
|
14
|
-
def _load_from(cls,
|
|
15
|
-
model_path: str,
|
|
16
|
-
vocoder_path: str,
|
|
17
|
-
plugin_id: Union[PluginID, str] = PluginID.LLAMA_CPP,
|
|
18
|
-
device_id: Optional[str] = None
|
|
19
|
-
) -> 'PyBindTTSImpl':
|
|
20
|
-
"""Load TTS model from local path using PyBind backend."""
|
|
21
|
-
# TODO: Implement PyBind TTS loading
|
|
22
|
-
instance = cls()
|
|
23
|
-
return instance
|
|
24
|
-
|
|
25
|
-
def eject(self):
|
|
26
|
-
"""Destroy the model and free resources."""
|
|
27
|
-
# TODO: Implement PyBind TTS cleanup
|
|
28
|
-
pass
|
|
29
|
-
|
|
30
|
-
def synthesize(
|
|
31
|
-
self,
|
|
32
|
-
text: str,
|
|
33
|
-
config: Optional[TTSConfig] = None,
|
|
34
|
-
output_path: Optional[str] = None,
|
|
35
|
-
) -> TTSResult:
|
|
36
|
-
"""Synthesize speech from text and save to filesystem."""
|
|
37
|
-
# TODO: Implement PyBind TTS synthesis
|
|
38
|
-
raise NotImplementedError("PyBind TTS synthesis not yet implemented")
|
|
39
|
-
|
|
40
|
-
def list_available_voices(self) -> List[str]:
|
|
41
|
-
"""List available voices."""
|
|
42
|
-
# TODO: Implement PyBind TTS voice listing
|
|
43
|
-
raise NotImplementedError("PyBind TTS voice listing not yet implemented")
|
|
1
|
+
from typing import List, Optional, Union
|
|
2
|
+
|
|
3
|
+
from nexaai.common import PluginID
|
|
4
|
+
from nexaai.tts import TTS, TTSConfig, TTSResult
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class PyBindTTSImpl(TTS):
|
|
8
|
+
def __init__(self):
|
|
9
|
+
"""Initialize PyBind TTS implementation."""
|
|
10
|
+
super().__init__()
|
|
11
|
+
# TODO: Add PyBind-specific initialization
|
|
12
|
+
|
|
13
|
+
@classmethod
|
|
14
|
+
def _load_from(cls,
|
|
15
|
+
model_path: str,
|
|
16
|
+
vocoder_path: str,
|
|
17
|
+
plugin_id: Union[PluginID, str] = PluginID.LLAMA_CPP,
|
|
18
|
+
device_id: Optional[str] = None
|
|
19
|
+
) -> 'PyBindTTSImpl':
|
|
20
|
+
"""Load TTS model from local path using PyBind backend."""
|
|
21
|
+
# TODO: Implement PyBind TTS loading
|
|
22
|
+
instance = cls()
|
|
23
|
+
return instance
|
|
24
|
+
|
|
25
|
+
def eject(self):
|
|
26
|
+
"""Destroy the model and free resources."""
|
|
27
|
+
# TODO: Implement PyBind TTS cleanup
|
|
28
|
+
pass
|
|
29
|
+
|
|
30
|
+
def synthesize(
|
|
31
|
+
self,
|
|
32
|
+
text: str,
|
|
33
|
+
config: Optional[TTSConfig] = None,
|
|
34
|
+
output_path: Optional[str] = None,
|
|
35
|
+
) -> TTSResult:
|
|
36
|
+
"""Synthesize speech from text and save to filesystem."""
|
|
37
|
+
# TODO: Implement PyBind TTS synthesis
|
|
38
|
+
raise NotImplementedError("PyBind TTS synthesis not yet implemented")
|
|
39
|
+
|
|
40
|
+
def list_available_voices(self) -> List[str]:
|
|
41
|
+
"""List available voices."""
|
|
42
|
+
# TODO: Implement PyBind TTS voice listing
|
|
43
|
+
raise NotImplementedError("PyBind TTS voice listing not yet implemented")
|
nexaai/utils/decode.py
CHANGED
|
@@ -1,18 +1,18 @@
|
|
|
1
|
-
"""
|
|
2
|
-
Utility functions for text decoding operations.
|
|
3
|
-
"""
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
def safe_decode(data):
|
|
7
|
-
"""
|
|
8
|
-
Safely decode bytes or text, handling UTF-8 errors.
|
|
9
|
-
|
|
10
|
-
Args:
|
|
11
|
-
data: Input data that can be bytes or text
|
|
12
|
-
|
|
13
|
-
Returns:
|
|
14
|
-
str: Decoded string with errors replaced if any
|
|
15
|
-
"""
|
|
16
|
-
if isinstance(data, bytes):
|
|
17
|
-
return data.decode('utf-8', errors='replace')
|
|
1
|
+
"""
|
|
2
|
+
Utility functions for text decoding operations.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def safe_decode(data):
|
|
7
|
+
"""
|
|
8
|
+
Safely decode bytes or text, handling UTF-8 errors.
|
|
9
|
+
|
|
10
|
+
Args:
|
|
11
|
+
data: Input data that can be bytes or text
|
|
12
|
+
|
|
13
|
+
Returns:
|
|
14
|
+
str: Decoded string with errors replaced if any
|
|
15
|
+
"""
|
|
16
|
+
if isinstance(data, bytes):
|
|
17
|
+
return data.decode('utf-8', errors='replace')
|
|
18
18
|
return str(data)
|