PyPI - nexaai - Versions diffs - 1.0.4rc14__cp310-cp310-macosx_14_0_universal2.whl → 1.0.4rc16__cp310-cp310-macosx_14_0_universal2.whl - Mend

nexaai 1.0.4rc14__cp310-cp310-macosx_14_0_universal2.whl → 1.0.4rc16__cp310-cp310-macosx_14_0_universal2.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of nexaai might be problematic. Click here for more details.

Files changed (39) hide show

nexaai/__init__.py +6 -1
nexaai/_stub.cpython-310-darwin.so +0 -0
nexaai/_version.py +1 -1
nexaai/asr.py +7 -3
nexaai/asr_impl/mlx_asr_impl.py +3 -2
nexaai/asr_impl/pybind_asr_impl.py +3 -2
nexaai/binds/libcrypto.dylib +0 -0
nexaai/binds/libnexa_bridge.dylib +0 -0
nexaai/binds/libssl.dylib +0 -0
nexaai/binds/nexa_llama_cpp/libggml-base.dylib +0 -0
nexaai/binds/nexa_llama_cpp/libnexa_plugin.dylib +0 -0
nexaai/binds/nexa_mlx/libnexa_plugin.dylib +0 -0
nexaai/common.py +7 -0
nexaai/cv.py +8 -3
nexaai/cv_impl/mlx_cv_impl.py +3 -2
nexaai/cv_impl/pybind_cv_impl.py +3 -2
nexaai/embedder.py +7 -3
nexaai/embedder_impl/mlx_embedder_impl.py +3 -2
nexaai/embedder_impl/pybind_embedder_impl.py +6 -3
nexaai/image_gen.py +6 -2
nexaai/image_gen_impl/mlx_image_gen_impl.py +3 -2
nexaai/image_gen_impl/pybind_image_gen_impl.py +3 -2
nexaai/llm.py +8 -5
nexaai/llm_impl/mlx_llm_impl.py +19 -6
nexaai/llm_impl/pybind_llm_impl.py +7 -5
nexaai/mlx_backend/llm/interface.py +2 -2
nexaai/rerank.py +7 -3
nexaai/rerank_impl/mlx_rerank_impl.py +3 -2
nexaai/rerank_impl/pybind_rerank_impl.py +3 -2
nexaai/tts.py +7 -3
nexaai/tts_impl/mlx_tts_impl.py +3 -2
nexaai/tts_impl/pybind_tts_impl.py +3 -2
nexaai/vlm.py +6 -3
nexaai/vlm_impl/mlx_vlm_impl.py +85 -41
nexaai/vlm_impl/pybind_vlm_impl.py +5 -3
{nexaai-1.0.4rc14.dist-info → nexaai-1.0.4rc16.dist-info}/METADATA +9 -8
{nexaai-1.0.4rc14.dist-info → nexaai-1.0.4rc16.dist-info}/RECORD +39 -37
{nexaai-1.0.4rc14.dist-info → nexaai-1.0.4rc16.dist-info}/WHEEL +0 -0
{nexaai-1.0.4rc14.dist-info → nexaai-1.0.4rc16.dist-info}/top_level.txt +0 -0

nexaai/__init__.py CHANGED Viewed

@@ -19,7 +19,10 @@ except ImportError:
     __version__ = "0.0.1"
 # Import common configuration classes first (no external dependencies)
-from .common import ModelConfig, GenerationConfig, ChatMessage, SamplerConfig
+from .common import ModelConfig, GenerationConfig, ChatMessage, SamplerConfig, PluginID
+# Create alias for PluginID to be accessible as plugin_id
+plugin_id = PluginID
 # Import new feature classes (no external dependencies in base classes)
 from .llm import LLM
@@ -40,6 +43,8 @@ __all__ = [
     "ChatMessage",
     "SamplerConfig",
     "EmbeddingConfig",
+    "PluginID",
+    "plugin_id",
     "LLM",
     "Embedder",

nexaai/_stub.cpython-310-darwin.so CHANGED Viewed

Binary file

nexaai/_version.py CHANGED Viewed

@@ -1,4 +1,4 @@
 # This file is generated by CMake from _version.py.in
 # Do not modify this file manually - it will be overwritten
-__version__ = "1.0.4-rc14"
+__version__ = "1.0.4-rc16"

nexaai/asr.py CHANGED Viewed

@@ -1,8 +1,9 @@
-from typing import List, Optional, Sequence, Tuple
+from typing import List, Optional, Sequence, Tuple, Union
 from abc import abstractmethod
 from dataclasses import dataclass
 from nexaai.base import BaseModel
+from nexaai.common import PluginID
 @dataclass
@@ -33,11 +34,14 @@ class ASR(BaseModel):
                    model_path: str,
                    tokenizer_path: Optional[str] = None,
                    language: Optional[str] = None,
-                   plugin_id: str = "llama_cpp",
+                   plugin_id: Union[PluginID, str] = PluginID.LLAMA_CPP,
                    device_id: Optional[str] = None
         ) -> 'ASR':
         """Load ASR model from local path, routing to appropriate implementation."""
-        if plugin_id == "mlx":
+        # Check plugin_id value for routing - handle both enum and string
+        plugin_value = plugin_id.value if isinstance(plugin_id, PluginID) else plugin_id
+        if plugin_value == "mlx":
             from nexaai.asr_impl.mlx_asr_impl import MLXASRImpl
             return MLXASRImpl._load_from(model_path, tokenizer_path, language, plugin_id, device_id)
         else:

nexaai/asr_impl/mlx_asr_impl.py CHANGED Viewed

@@ -1,7 +1,8 @@
 # Note: This code is generated by Cursor, not tested yet.
-from typing import List, Optional
+from typing import List, Optional, Union
+from nexaai.common import PluginID
 from nexaai.asr import ASR, ASRConfig, ASRResult
 from nexaai.mlx_backend.asr.interface import MlxAsr as MLXASRInterface
 from nexaai.mlx_backend.ml import ModelConfig as MLXModelConfig, SamplerConfig as MLXSamplerConfig, GenerationConfig as MLXGenerationConfig, EmbeddingConfig
@@ -18,7 +19,7 @@ class MLXASRImpl(ASR):
                    model_path: str,
                    tokenizer_path: Optional[str] = None,
                    language: Optional[str] = None,
-                   plugin_id: str = "mlx",
+                   plugin_id: Union[PluginID, str] = PluginID.MLX,
                    device_id: Optional[str] = None
         ) -> 'MLXASRImpl':
         """Load ASR model from local path using MLX backend."""

nexaai/asr_impl/pybind_asr_impl.py CHANGED Viewed

@@ -1,5 +1,6 @@
-from typing import List, Optional
+from typing import List, Optional, Union
+from nexaai.common import PluginID
 from nexaai.asr import ASR, ASRConfig, ASRResult
@@ -14,7 +15,7 @@ class PyBindASRImpl(ASR):
                    model_path: str,
                    tokenizer_path: Optional[str] = None,
                    language: Optional[str] = None,
-                   plugin_id: str = "llama_cpp",
+                   plugin_id: Union[PluginID, str] = PluginID.LLAMA_CPP,
                    device_id: Optional[str] = None
         ) -> 'PyBindASRImpl':
         """Load ASR model from local path using PyBind backend."""

nexaai/binds/libcrypto.dylib ADDED Viewed

Binary file

nexaai/binds/libnexa_bridge.dylib CHANGED Viewed

Binary file

nexaai/binds/libssl.dylib ADDED Viewed

Binary file

nexaai/binds/nexa_llama_cpp/libggml-base.dylib CHANGED Viewed

Binary file

nexaai/binds/nexa_llama_cpp/libnexa_plugin.dylib CHANGED Viewed

Binary file

nexaai/binds/nexa_mlx/libnexa_plugin.dylib CHANGED Viewed

Binary file

nexaai/common.py CHANGED Viewed

@@ -1,5 +1,12 @@
 from dataclasses import dataclass
 from typing import TypedDict, Literal, Optional, List
+from enum import Enum
+class PluginID(str, Enum):
+    """Enum for plugin identifiers."""
+    MLX = "mlx"
+    LLAMA_CPP = "llama_cpp"
 class ChatMessage(TypedDict):

nexaai/cv.py CHANGED Viewed

@@ -1,8 +1,9 @@
-from typing import List, Optional
+from typing import List, Optional, Union
 from abc import abstractmethod
 from dataclasses import dataclass
 from nexaai.base import BaseModel
+from nexaai.common import PluginID
 @dataclass
@@ -69,12 +70,16 @@ class CVModel(BaseModel):
     @classmethod
     def _load_from(cls,
+                   _: str,  # TODO: remove this argument, this is a hack to make api design happy
                    config: CVModelConfig,
-                   plugin_id: str = "llama_cpp",
+                   plugin_id: Union[PluginID, str] = PluginID.LLAMA_CPP,
                    device_id: Optional[str] = None
         ) -> 'CVModel':
         """Load CV model from configuration, routing to appropriate implementation."""
-        if plugin_id == "mlx":
+        # Check plugin_id value for routing - handle both enum and string
+        plugin_value = plugin_id.value if isinstance(plugin_id, PluginID) else plugin_id
+        if plugin_value == "mlx":
             from nexaai.cv_impl.mlx_cv_impl import MLXCVImpl
             return MLXCVImpl._load_from(config, plugin_id, device_id)
         else:

nexaai/cv_impl/mlx_cv_impl.py CHANGED Viewed

@@ -1,8 +1,9 @@
 # Note: This code is generated by Cursor, not tested yet.
-from typing import Optional
+from typing import Optional, Union
 import os
+from nexaai.common import PluginID
 from nexaai.cv import CVModel, CVModelConfig, CVResults
 from nexaai.mlx_backend.cv.interface import CVModel as MLXCVInterface, create_cv_model
@@ -16,7 +17,7 @@ class MLXCVImpl(CVModel):
     @classmethod
     def _load_from(cls,
                    config: CVModelConfig,
-                   plugin_id: str = "mlx",
+                   plugin_id: Union[PluginID, str] = PluginID.MLX,
                    device_id: Optional[str] = None
         ) -> 'MLXCVImpl':
         """Load CV model from configuration using MLX backend."""

nexaai/cv_impl/pybind_cv_impl.py CHANGED Viewed

@@ -1,5 +1,6 @@
-from typing import Optional
+from typing import Optional, Union
+from nexaai.common import PluginID
 from nexaai.cv import CVModel, CVModelConfig, CVResults
@@ -12,7 +13,7 @@ class PyBindCVImpl(CVModel):
     @classmethod
     def _load_from(cls,
                    config: CVModelConfig,
-                   plugin_id: str = "llama_cpp",
+                   plugin_id: Union[PluginID, str] = PluginID.LLAMA_CPP,
                    device_id: Optional[str] = None
         ) -> 'PyBindCVImpl':
         """Load CV model from configuration using PyBind backend."""

nexaai/embedder.py CHANGED Viewed

@@ -4,6 +4,7 @@ from abc import abstractmethod
 import numpy as np
 from nexaai.base import BaseModel
+from nexaai.common import PluginID
 @dataclass
@@ -21,19 +22,22 @@ class Embedder(BaseModel):
         pass
     @classmethod
-    def _load_from(cls, model_path: str, tokenizer_file: str = "tokenizer.json", plugin_id: str = "llama_cpp"):
+    def _load_from(cls, model_path: str, tokenizer_file: str = "tokenizer.json", plugin_id: Union[PluginID, str] = PluginID.LLAMA_CPP):
         """
         Load an embedder from model files, routing to appropriate implementation.
         Args:
             model_path: Path to the model file
             tokenizer_file: Path to the tokenizer file (default: "tokenizer.json")
-            plugin_id: Plugin ID to use for the model (default: "llama_cpp")
+            plugin_id: Plugin ID to use for the model (default: PluginID.LLAMA_CPP)
         Returns:
             Embedder instance
         """
-        if plugin_id == "mlx":
+        # Check plugin_id value for routing - handle both enum and string
+        plugin_value = plugin_id.value if isinstance(plugin_id, PluginID) else plugin_id
+        if plugin_value == "mlx":
             from nexaai.embedder_impl.mlx_embedder_impl import MLXEmbedderImpl
             return MLXEmbedderImpl._load_from(model_path, tokenizer_file, plugin_id)
         else:

nexaai/embedder_impl/mlx_embedder_impl.py CHANGED Viewed

@@ -1,6 +1,7 @@
 from typing import List, Union
 import numpy as np
+from nexaai.common import PluginID
 from nexaai.embedder import Embedder, EmbeddingConfig
 from nexaai.mlx_backend.embedding.interface import Embedder as MLXEmbedderInterface
 from nexaai.mlx_backend.ml import ModelConfig as MLXModelConfig, SamplerConfig as MLXSamplerConfig, GenerationConfig as MLXGenerationConfig, EmbeddingConfig
@@ -13,14 +14,14 @@ class MLXEmbedderImpl(Embedder):
         self._mlx_embedder = None
     @classmethod
-    def _load_from(cls, model_path: str, tokenizer_file: str = "tokenizer.json", plugin_id: str = "mlx"):
+    def _load_from(cls, model_path: str, tokenizer_file: str = "tokenizer.json", plugin_id: Union[PluginID, str] = PluginID.MLX):
         """
         Load an embedder from model files using MLX backend.
         Args:
             model_path: Path to the model file
             tokenizer_file: Path to the tokenizer file (default: "tokenizer.json")
-            plugin_id: Plugin ID to use for the model (default: "mlx")
+            plugin_id: Plugin ID to use for the model (default: PluginID.MLX)
         Returns:
             MLXEmbedderImpl instance

nexaai/embedder_impl/pybind_embedder_impl.py CHANGED Viewed

@@ -1,6 +1,7 @@
 from typing import List, Union
 import numpy as np
+from nexaai.common import PluginID
 from nexaai.embedder import Embedder, EmbeddingConfig
 from nexaai.binds import embedder_bind
 from nexaai.runtime import _ensure_runtime
@@ -15,20 +16,22 @@ class PyBindEmbedderImpl(Embedder):
         self._handle = _handle_ptr
     @classmethod
-    def _load_from(cls, model_path: str, tokenizer_file: str = "tokenizer.json", plugin_id: str = "llama_cpp"):
+    def _load_from(cls, model_path: str, tokenizer_file: str = "tokenizer.json", plugin_id: Union[PluginID, str] = PluginID.LLAMA_CPP):
         """
         Load an embedder from model files
         Args:
             model_path: Path to the model file
             tokenizer_file: Path to the tokenizer file (default: "tokenizer.json")
-            plugin_id: Plugin ID to use for the model (default: "llama_cpp")
+            plugin_id: Plugin ID to use for the model (default: PluginID.LLAMA_CPP)
         Returns:
             PyBindEmbedderImpl instance
         """
         _ensure_runtime()
-        handle = embedder_bind.ml_embedder_create(model_path, tokenizer_file, plugin_id)
+        # Convert enum to string for C++ binding
+        plugin_id_str = plugin_id.value if isinstance(plugin_id, PluginID) else plugin_id
+        handle = embedder_bind.ml_embedder_create(model_path, tokenizer_file, plugin_id_str)
         return cls(handle)
     def eject(self):

nexaai/image_gen.py CHANGED Viewed

@@ -3,6 +3,7 @@ from abc import abstractmethod
 from dataclasses import dataclass
 from nexaai.base import BaseModel
+from nexaai.common import PluginID
 @dataclass
@@ -67,13 +68,16 @@ class ImageGen(BaseModel):
     def _load_from(cls,
                    model_path: str,
                    scheduler_config_path: str = "",
-                   plugin_id: str = "llama_cpp",
+                   plugin_id: Union[PluginID, str] = PluginID.LLAMA_CPP,
                    device_id: Optional[str] = None,
                    float16: bool = True,
                    quantize: bool = False
         ) -> 'ImageGen':
         """Load image generation model from local path, routing to appropriate implementation."""
-        if plugin_id == "mlx":
+        # Check plugin_id value for routing - handle both enum and string
+        plugin_value = plugin_id.value if isinstance(plugin_id, PluginID) else plugin_id
+        if plugin_value == "mlx":
             from nexaai.image_gen_impl.mlx_image_gen_impl import MLXImageGenImpl
             return MLXImageGenImpl._load_from(model_path, scheduler_config_path, plugin_id, device_id, float16, quantize)
         else:

nexaai/image_gen_impl/mlx_image_gen_impl.py CHANGED Viewed

@@ -1,8 +1,9 @@
 # Note: This code is generated by Cursor, not tested yet.
-from typing import List, Optional
+from typing import List, Optional, Union
 import os
+from nexaai.common import PluginID
 from nexaai.image_gen import ImageGen, ImageGenerationConfig, ImageSamplerConfig, SchedulerConfig, Image
 from nexaai.mlx_backend.sd.interface import ImageGen as MLXImageGenInterface
@@ -17,7 +18,7 @@ class MLXImageGenImpl(ImageGen):
     def _load_from(cls,
                    model_path: str,
                    scheduler_config_path: str = "",
-                   plugin_id: str = "mlx",
+                   plugin_id: Union[PluginID, str] = PluginID.MLX,
                    device_id: Optional[str] = None,
                    float16: bool = True,
                    quantize: bool = False

nexaai/image_gen_impl/pybind_image_gen_impl.py CHANGED Viewed

@@ -1,5 +1,6 @@
-from typing import List, Optional
+from typing import List, Optional, Union
+from nexaai.common import PluginID
 from nexaai.image_gen import ImageGen, ImageGenerationConfig, ImageSamplerConfig, SchedulerConfig, Image
@@ -13,7 +14,7 @@ class PyBindImageGenImpl(ImageGen):
     def _load_from(cls,
                    model_path: str,
                    scheduler_config_path: str = "",
-                   plugin_id: str = "llama_cpp",
+                   plugin_id: Union[PluginID, str] = PluginID.LLAMA_CPP,
                    device_id: Optional[str] = None,
                    float16: bool = True,
                    quantize: bool = False

nexaai/llm.py CHANGED Viewed

@@ -1,9 +1,9 @@
-from typing import Generator, Optional
+from typing import Generator, Optional, Union
 from abc import abstractmethod
 import queue
 import threading
-from nexaai.common import ModelConfig, GenerationConfig, ChatMessage
+from nexaai.common import ModelConfig, GenerationConfig, ChatMessage, PluginID
 from nexaai.base import BaseModel
 class LLM(BaseModel):
@@ -17,11 +17,14 @@ class LLM(BaseModel):
                    local_path: str,
                    tokenizer_path: Optional[str] = None,
                    m_cfg: ModelConfig = ModelConfig(),
-                   plugin_id: str = "llama_cpp",
+                   plugin_id: Union[PluginID, str] = PluginID.LLAMA_CPP,
                    device_id: Optional[str] = None
         ) -> 'LLM':
         """Load model from local path, routing to appropriate implementation."""
-        if plugin_id == "mlx":
+        # Check plugin_id value for routing - handle both enum and string
+        plugin_value = plugin_id.value if isinstance(plugin_id, PluginID) else plugin_id
+        if plugin_value == "mlx":
             from nexaai.llm_impl.mlx_llm_impl import MLXLLMImpl
             return MLXLLMImpl._load_from(local_path, tokenizer_path, m_cfg, plugin_id, device_id)
         else:
@@ -37,7 +40,7 @@ class LLM(BaseModel):
         self._cancel_event.clear()
     @abstractmethod
-    def apply_chat_template(self, messages: list[ChatMessage]) -> str:
+    def apply_chat_template(self, messages: list[ChatMessage], tools: Optional[str] = None, enable_thinking: bool = True, add_generation_prompt: bool = True) -> str:
         """Apply the chat template to messages."""
         pass

nexaai/llm_impl/mlx_llm_impl.py CHANGED Viewed

@@ -1,6 +1,6 @@
-from typing import Generator, Optional, Any
+from typing import Generator, Optional, Any, Sequence, Union
-from nexaai.common import ModelConfig, GenerationConfig, ChatMessage
+from nexaai.common import ModelConfig, GenerationConfig, ChatMessage, PluginID
 from nexaai.llm import LLM
 from nexaai.mlx_backend.llm.interface import LLM as MLXLLMInterface
 from nexaai.mlx_backend.ml import ModelConfig as MLXModelConfig, SamplerConfig as MLXSamplerConfig, GenerationConfig as MLXGenerationConfig, EmbeddingConfig
@@ -17,7 +17,7 @@ class MLXLLMImpl(LLM):
                    local_path: str,
                    tokenizer_path: Optional[str] = None,
                    m_cfg: ModelConfig = ModelConfig(),
-                   plugin_id: str = "mlx",
+                   plugin_id: Union[PluginID, str] = PluginID.MLX,
                    device_id: Optional[str] = None
         ) -> 'MLXLLMImpl':
         """Load model from local path using MLX backend."""
@@ -54,7 +54,13 @@ class MLXLLMImpl(LLM):
             self._mlx_llm.destroy()
             self._mlx_llm = None
-    def apply_chat_template(self, messages: list[ChatMessage]) -> str:
+    def apply_chat_template(
+        self,
+        messages: Sequence[ChatMessage],
+        tools: Optional[str] = None,
+        enable_thinking: bool = True,
+        add_generation_prompt: bool = True
+    ) -> str:
         """Apply the chat template to messages."""
         if not self._mlx_llm:
             raise RuntimeError("MLX LLM not loaded")
@@ -68,9 +74,16 @@ class MLXLLMImpl(LLM):
                     def __init__(self, role, content):
                         self.role = role
                         self.content = content
-                mlx_messages.append(MLXChatMessage(msg["role"], msg["content"]))
+                # Handle both dict-style and attribute-style access
+                if hasattr(msg, 'role') and hasattr(msg, 'content'):
+                    # Message is already an object with attributes
+                    mlx_messages.append(MLXChatMessage(msg.role, msg.content))
+                else:
+                    # Message is a dict
+                    mlx_messages.append(MLXChatMessage(msg["role"], msg["content"]))
-            return self._mlx_llm.apply_chat_template(mlx_messages)
+            return self._mlx_llm.apply_chat_template(mlx_messages, tools=tools, enable_thinking=enable_thinking, add_generation_prompt=add_generation_prompt)
         except Exception as e:
             raise RuntimeError(f"Failed to apply chat template: {str(e)}")

nexaai/llm_impl/pybind_llm_impl.py CHANGED Viewed

@@ -1,8 +1,8 @@
-from typing import Generator, Optional
+from typing import Generator, Optional, Union
 import queue
 import threading
-from nexaai.common import ModelConfig, GenerationConfig, ChatMessage
+from nexaai.common import ModelConfig, GenerationConfig, ChatMessage, PluginID
 from nexaai.binds import llm_bind, common_bind
 from nexaai.runtime import _ensure_runtime
 from nexaai.llm import LLM
@@ -19,7 +19,7 @@ class PyBindLLMImpl(LLM):
                    local_path: str,
                    tokenizer_path: Optional[str] = None,
                    m_cfg: ModelConfig = ModelConfig(),
-                   plugin_id: str = "llama_cpp",
+                   plugin_id: Union[PluginID, str] = PluginID.LLAMA_CPP,
                    device_id: Optional[str] = None
         ) -> 'PyBindLLMImpl':
         """Load model from local path."""
@@ -49,11 +49,13 @@ class PyBindLLMImpl(LLM):
             config.chat_template_content = m_cfg.chat_template_content
         # Create handle : returns py::capsule with automatic cleanup
+        # Convert enum to string for C++ binding
+        plugin_id_str = plugin_id.value if isinstance(plugin_id, PluginID) else plugin_id
         handle = llm_bind.ml_llm_create(
             model_path=local_path,
             tokenizer_path=tokenizer_path,
             model_config=config,
-            plugin_id=plugin_id,
+            plugin_id=plugin_id_str,
             device_id=device_id
         )
         return cls(handle, m_cfg)
@@ -64,7 +66,7 @@ class PyBindLLMImpl(LLM):
         del self._handle
         self._handle = None
-    def apply_chat_template(self, messages: list[ChatMessage]) -> str:
+    def apply_chat_template(self, messages: list[ChatMessage], tools: Optional[str] = None, enable_thinking: bool = True, add_generation_prompt: bool = True) -> str:
         """Apply the chat template to messages."""
         # Convert TypedDict to list of dicts for binding
         message_dicts = [

nexaai/mlx_backend/llm/interface.py CHANGED Viewed

@@ -467,7 +467,7 @@ class LLM(BaseLLM, ProfilingMixin):
         # We'll ignore the argument for now.
         return self.tokenizer.chat_template
-    def apply_chat_template(self, messages: Sequence[ChatMessage], tools: Optional[str] = None, enable_thinking: bool = True) -> str:
+    def apply_chat_template(self, messages: Sequence[ChatMessage], tools: Optional[str] = None, enable_thinking: bool = True, add_generation_prompt: bool = True) -> str:
         """
         Apply chat template to messages with incremental prompt support and optional tools.
@@ -526,7 +526,7 @@ class LLM(BaseLLM, ProfilingMixin):
                 incremental_messages,
                 tokenize=False,
                 enable_thinking=enable_thinking,
-                add_generation_prompt=True,
+                add_generation_prompt=add_generation_prompt,
                 tools=parsed_tools
             )
         except Exception as e:

nexaai/rerank.py CHANGED Viewed

@@ -1,8 +1,9 @@
-from typing import List, Optional, Sequence
+from typing import List, Optional, Sequence, Union
 from abc import abstractmethod
 from dataclasses import dataclass
 from nexaai.base import BaseModel
+from nexaai.common import PluginID
 @dataclass
@@ -24,11 +25,14 @@ class Reranker(BaseModel):
     def _load_from(cls,
                    model_path: str,
                    tokenizer_file: str = "tokenizer.json",
-                   plugin_id: str = "llama_cpp",
+                   plugin_id: Union[PluginID, str] = PluginID.LLAMA_CPP,
                    device_id: Optional[str] = None
         ) -> 'Reranker':
         """Load reranker model from local path, routing to appropriate implementation."""
-        if plugin_id == "mlx":
+        # Check plugin_id value for routing - handle both enum and string
+        plugin_value = plugin_id.value if isinstance(plugin_id, PluginID) else plugin_id
+        if plugin_value == "mlx":
             from nexaai.rerank_impl.mlx_rerank_impl import MLXRerankImpl
             return MLXRerankImpl._load_from(model_path, tokenizer_file, plugin_id, device_id)
         else:

nexaai/rerank_impl/mlx_rerank_impl.py CHANGED Viewed

@@ -1,8 +1,9 @@
 # Note: This code is generated by Cursor, not tested yet.
-from typing import List, Optional, Sequence
+from typing import List, Optional, Sequence, Union
 import os
+from nexaai.common import PluginID
 from nexaai.rerank import Reranker, RerankConfig
 from nexaai.mlx_backend.rerank.interface import Reranker as MLXRerankInterface, create_reranker
@@ -17,7 +18,7 @@ class MLXRerankImpl(Reranker):
     def _load_from(cls,
                    model_path: str,
                    tokenizer_file: str = "tokenizer.json",
-                   plugin_id: str = "mlx",
+                   plugin_id: Union[PluginID, str] = PluginID.MLX,
                    device_id: Optional[str] = None
         ) -> 'MLXRerankImpl':
         """Load reranker model from local path using MLX backend."""

nexaai/rerank_impl/pybind_rerank_impl.py CHANGED Viewed

@@ -1,5 +1,6 @@
-from typing import List, Optional, Sequence
+from typing import List, Optional, Sequence, Union
+from nexaai.common import PluginID
 from nexaai.rerank import Reranker, RerankConfig
@@ -13,7 +14,7 @@ class PyBindRerankImpl(Reranker):
     def _load_from(cls,
                    model_path: str,
                    tokenizer_file: str = "tokenizer.json",
-                   plugin_id: str = "llama_cpp",
+                   plugin_id: Union[PluginID, str] = PluginID.LLAMA_CPP,
                    device_id: Optional[str] = None
         ) -> 'PyBindRerankImpl':
         """Load reranker model from local path using PyBind backend."""

nexaai/tts.py CHANGED Viewed

@@ -1,8 +1,9 @@
-from typing import List, Optional
+from typing import List, Optional, Union
 from abc import abstractmethod
 from dataclasses import dataclass
 from nexaai.base import BaseModel
+from nexaai.common import PluginID
 @dataclass
@@ -43,11 +44,14 @@ class TTS(BaseModel):
     def _load_from(cls,
                    model_path: str,
                    vocoder_path: str,
-                   plugin_id: str = "llama_cpp",
+                   plugin_id: Union[PluginID, str] = PluginID.LLAMA_CPP,
                    device_id: Optional[str] = None
         ) -> 'TTS':
         """Load TTS model from local path, routing to appropriate implementation."""
-        if plugin_id == "mlx":
+        # Check plugin_id value for routing - handle both enum and string
+        plugin_value = plugin_id.value if isinstance(plugin_id, PluginID) else plugin_id
+        if plugin_value == "mlx":
             from nexaai.tts_impl.mlx_tts_impl import MLXTTSImpl
             return MLXTTSImpl._load_from(model_path, vocoder_path, plugin_id, device_id)
         else:

nexaai/tts_impl/mlx_tts_impl.py CHANGED Viewed

@@ -1,8 +1,9 @@
 # Note: This code is generated by Cursor, not tested yet.
-from typing import List, Optional
+from typing import List, Optional, Union
 import os
+from nexaai.common import PluginID
 from nexaai.tts import TTS, TTSConfig, TTSResult
 from nexaai.mlx_backend.tts.interface import MlxTts as MLXTTSInterface
@@ -17,7 +18,7 @@ class MLXTTSImpl(TTS):
     def _load_from(cls,
                    model_path: str,
                    vocoder_path: str,
-                   plugin_id: str = "mlx",
+                   plugin_id: Union[PluginID, str] = PluginID.MLX,
                    device_id: Optional[str] = None
         ) -> 'MLXTTSImpl':
         """Load TTS model from local path using MLX backend."""

nexaai/tts_impl/pybind_tts_impl.py CHANGED Viewed

@@ -1,5 +1,6 @@
-from typing import List, Optional
+from typing import List, Optional, Union
+from nexaai.common import PluginID
 from nexaai.tts import TTS, TTSConfig, TTSResult
@@ -13,7 +14,7 @@ class PyBindTTSImpl(TTS):
     def _load_from(cls,
                    model_path: str,
                    vocoder_path: str,
-                   plugin_id: str = "llama_cpp",
+                   plugin_id: Union[PluginID, str] = PluginID.LLAMA_CPP,
                    device_id: Optional[str] = None
         ) -> 'PyBindTTSImpl':
         """Load TTS model from local path using PyBind backend."""

nexaai/vlm.py CHANGED Viewed

@@ -5,7 +5,7 @@ import threading
 import base64
 from pathlib import Path
-from nexaai.common import ModelConfig, GenerationConfig, MultiModalMessage
+from nexaai.common import ModelConfig, GenerationConfig, MultiModalMessage, PluginID
 from nexaai.base import BaseModel
@@ -20,7 +20,7 @@ class VLM(BaseModel):
                    local_path: str,
                    mmproj_path: str,
                    m_cfg: ModelConfig = ModelConfig(),
-                   plugin_id: str = "llama_cpp",
+                   plugin_id: Union[PluginID, str] = PluginID.LLAMA_CPP,
                    device_id: Optional[str] = None
         ) -> 'VLM':
         """Load VLM model from local path, routing to appropriate implementation.
@@ -35,7 +35,10 @@ class VLM(BaseModel):
         Returns:
             VLM instance
         """
-        if plugin_id == "mlx":
+        # Check plugin_id value for routing - handle both enum and string
+        plugin_value = plugin_id.value if isinstance(plugin_id, PluginID) else plugin_id
+        if plugin_value == "mlx":
             from nexaai.vlm_impl.mlx_vlm_impl import MlxVlmImpl
             return MlxVlmImpl._load_from(local_path, mmproj_path, m_cfg, plugin_id, device_id)
         else:

nexaai/vlm_impl/mlx_vlm_impl.py CHANGED Viewed

@@ -1,6 +1,6 @@
-from typing import Generator, Optional, List, Dict, Any
+from typing import Generator, Optional, List, Dict, Any, Union
-from nexaai.common import ModelConfig, GenerationConfig, MultiModalMessage
+from nexaai.common import ModelConfig, GenerationConfig, MultiModalMessage, PluginID
 from nexaai.vlm import VLM
 from nexaai.mlx_backend.vlm.interface import VLM as MLXVLMInterface
 from nexaai.mlx_backend.ml import ModelConfig as MLXModelConfig, SamplerConfig as MLXSamplerConfig, GenerationConfig as MLXGenerationConfig, EmbeddingConfig
@@ -17,7 +17,7 @@ class MlxVlmImpl(VLM):
                    local_path: str,
                    mmproj_path: str,
                    m_cfg: ModelConfig = ModelConfig(),
-                   plugin_id: str = "mlx",
+                   plugin_id: Union[PluginID, str] = PluginID.MLX,
                    device_id: Optional[str] = None
         ) -> 'MlxVlmImpl':
         """Load VLM model from local path using MLX backend.
@@ -76,8 +76,10 @@ class MlxVlmImpl(VLM):
             raise RuntimeError("MLX VLM not loaded")
         try:
-            # Convert MultiModalMessage to MLX format
             mlx_messages = []
+            total_images = 0
+            total_audios = 0
             for msg in messages:
                 # Create a simple object with role and content attributes
                 class MLXChatMessage:
@@ -85,19 +87,38 @@ class MlxVlmImpl(VLM):
                         self.role = role
                         self.content = content
-                # For MLX VLM, we need to extract text content from multimodal messages
-                # This is a simplified approach - the actual implementation may need
-                # more sophisticated handling of different content types
+                # Extract text content and count media files
                 text_content = ""
+                first_content = True
                 for content_item in msg["content"]:
-                    if content_item["type"] == "text":
+                    content_type = content_item.get("type", "")
+                    if content_type == "text":
+                        if not first_content:
+                            text_content += " "
                         text_content += content_item.get("text", "")
-                    # Note: image/audio/video content is typically handled separately
-                    # in the generation phase, not in the chat template
+                        first_content = False
+                    elif content_type == "image":
+                        total_images += 1
+                    elif content_type == "audio":
+                        total_audios += 1
                 mlx_messages.append(MLXChatMessage(msg["role"], text_content))
-            return self._mlx_vlm.apply_chat_template(mlx_messages)
+            if total_images > 0 or total_audios > 0:
+                # Use apply_chat_template_with_media when media is present
+                return self._mlx_vlm.apply_chat_template_with_media(
+                    mlx_messages,
+                    num_images=total_images,
+                    num_audios=total_audios,
+                    tools=tools,
+                    enable_thinking=False  # Default to False, could be made configurable
+                )
+            else:
+                # Use regular apply_chat_template for text-only messages
+                return self._mlx_vlm.apply_chat_template(mlx_messages)
         except Exception as e:
             raise RuntimeError(f"Failed to apply chat template: {str(e)}")
@@ -107,9 +128,6 @@ class MlxVlmImpl(VLM):
             raise RuntimeError("MLX VLM not loaded")
         try:
-            # Get MLX config classes
-            _, MLXSamplerConfig, MLXGenerationConfig, _ = get_mlx_configs()
             # Convert GenerationConfig to MLX format
             mlx_gen_config = MLXGenerationConfig()
             mlx_gen_config.max_tokens = g_cfg.max_tokens
@@ -130,25 +148,57 @@ class MlxVlmImpl(VLM):
                 mlx_sampler_config.grammar_string = g_cfg.sampler_config.grammar_string
                 mlx_gen_config.sampler_config = mlx_sampler_config
-            # Create a token callback for streaming
-            def token_callback(token: str) -> bool:
-                # Check if generation should be cancelled
-                return not self._cancel_event.is_set()
+            import queue
+            import threading
+            # Create a queue for streaming tokens
+            token_queue = queue.Queue()
+            exception_container = [None]
+            self.reset_cancel()  # Reset cancel flag before generation
-            # Use MLX VLM streaming generation
-            result = self._mlx_vlm.generate_stream(prompt, mlx_gen_config, token_callback)
+            def token_callback(token: str, user_data: Any = None) -> bool:
+                if self._cancel_event.is_set():
+                    token_queue.put(('end', None))
+                    return False
+                try:
+                    token_queue.put(('token', token))
+                    return True
+                except Exception as e:
+                    exception_container[0] = e
+                    return False
-            # MLX VLM interface returns a GenerationResult, extract the text
-            if hasattr(result, 'text') and result.text:
-                # Split the result into words and yield them
-                words = result.text.split()
-                for i, word in enumerate(words):
-                    if self._cancel_event.is_set():
+            # Run generation in a separate thread
+            def generate():
+                try:
+                    self._mlx_vlm.generate_stream(prompt, mlx_gen_config, token_callback)
+                except Exception as e:
+                    exception_container[0] = e
+                finally:
+                    token_queue.put(('end', None))
+            thread = threading.Thread(target=generate)
+            thread.start()
+            # Yield tokens as they come from the queue
+            while True:
+                if exception_container[0]:
+                    raise exception_container[0]
+                try:
+                    msg_type, token = token_queue.get(timeout=0.1)
+                    if msg_type == 'end':
                         break
-                    if i == 0:
-                        yield word
-                    else:
-                        yield " " + word
+                    elif msg_type == 'token':
+                        yield token
+                except queue.Empty:
+                    if not thread.is_alive():
+                        break
+                    continue
+            thread.join()
+            if exception_container[0]:
+                raise exception_container[0]
         except Exception as e:
             raise RuntimeError(f"Failed to generate streaming text: {str(e)}")
@@ -168,9 +218,6 @@ class MlxVlmImpl(VLM):
             raise RuntimeError("MLX VLM not loaded")
         try:
-            # Get MLX config classes
-            _, MLXSamplerConfig, MLXGenerationConfig, _ = get_mlx_configs()
             # Convert GenerationConfig to MLX format
             mlx_gen_config = MLXGenerationConfig()
             mlx_gen_config.max_tokens = g_cfg.max_tokens
@@ -191,15 +238,12 @@ class MlxVlmImpl(VLM):
                 mlx_sampler_config.grammar_string = g_cfg.sampler_config.grammar_string
                 mlx_gen_config.sampler_config = mlx_sampler_config
-            # Use MLX VLM generation
-            result = self._mlx_vlm.generate(prompt, mlx_gen_config)
+            # Simple token callback that just continues
+            def token_callback(token: str, user_data: Any = None) -> bool:
+                return not self._cancel_event.is_set()
-            # MLX VLM interface returns a GenerationResult, extract the text
-            if hasattr(result, 'text'):
-                return result.text
-            else:
-                # Fallback if result is just a string
-                return str(result)
+            # Use MLX streaming generation and return the full result
+            return self._mlx_vlm.generate_stream(prompt, mlx_gen_config, token_callback)
         except Exception as e:
             raise RuntimeError(f"Failed to generate text: {str(e)}")

nexaai/vlm_impl/pybind_vlm_impl.py CHANGED Viewed

@@ -4,7 +4,7 @@ import threading
 import base64
 from pathlib import Path
-from nexaai.common import ModelConfig, GenerationConfig, MultiModalMessage
+from nexaai.common import ModelConfig, GenerationConfig, MultiModalMessage, PluginID
 from nexaai.binds import vlm_bind, common_bind
 from nexaai.runtime import _ensure_runtime
 from nexaai.vlm import VLM
@@ -21,7 +21,7 @@ class PyBindVLMImpl(VLM):
                    local_path: str,
                    mmproj_path: str,
                    m_cfg: ModelConfig = ModelConfig(),
-                   plugin_id: str = "llama_cpp",
+                   plugin_id: Union[PluginID, str] = PluginID.LLAMA_CPP,
                    device_id: Optional[str] = None
         ) -> 'PyBindVLMImpl':
         """Load VLM model from local path.
@@ -61,11 +61,13 @@ class PyBindVLMImpl(VLM):
             config.chat_template_content = m_cfg.chat_template_content
         # Create handle : returns py::capsule with automatic cleanup
+        # Convert enum to string for C++ binding
+        plugin_id_str = plugin_id.value if isinstance(plugin_id, PluginID) else plugin_id
         handle = vlm_bind.create_vlm(
             model_path=local_path,
             mmproj_path=mmproj_path,
             model_config=config,
-            plugin_id=plugin_id,
+            plugin_id=plugin_id_str,
             device_id=device_id
         )
         return cls(handle, m_cfg)

{nexaai-1.0.4rc14.dist-info → nexaai-1.0.4rc16.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: nexaai
-Version: 1.0.4rc14
+Version: 1.0.4rc16
 Summary: Python bindings for NexaSDK C-lib backend
 Author-email: "Nexa AI, Inc." <dev@nexa.ai>
 Project-URL: Homepage, https://github.com/NexaAI/nexasdk-bridge
@@ -17,10 +17,11 @@ Requires-Dist: tqdm
 Requires-Dist: hf_xet
 Requires-Dist: numpy
 Requires-Dist: httpx
-Provides-Extra: mlx
-Requires-Dist: mlx; extra == "mlx"
-Requires-Dist: mlx-lm; extra == "mlx"
-Requires-Dist: mlx-vlm; extra == "mlx"
-Requires-Dist: tokenizers; extra == "mlx"
-Requires-Dist: safetensors; extra == "mlx"
-Requires-Dist: Pillow; extra == "mlx"
+Requires-Dist: mlx
+Requires-Dist: mlx-lm
+Requires-Dist: scipy
+Requires-Dist: soundfile
+Requires-Dist: Pillow
+Requires-Dist: opencv-python
+Requires-Dist: shapely
+Requires-Dist: pyclipper

{nexaai-1.0.4rc14.dist-info → nexaai-1.0.4rc16.dist-info}/RECORD RENAMED Viewed

@@ -1,33 +1,35 @@
-nexaai/__init__.py,sha256=JTjJWdiBXHZyc_91Oe-GNOcODFp9gbUQM43bzNY7S8Q,1906
-nexaai/_stub.cpython-310-darwin.so,sha256=jLqSTNxJs9FVD19-Vwob9M8ciDqSHgKE3KgoF4FDlbI,66768
-nexaai/_version.py,sha256=81zx9t__xAdjV8GwoDFsZWlADrXI5VeJD0AkYlQ8P9g,143
-nexaai/asr.py,sha256=Yg8Yml_nklzJYl3C_lwvEApTdNjY2czAurDaoEjkiIU,1813
+nexaai/__init__.py,sha256=jXdC4vv6DBK1fVewYTYSUhOOYfvf_Mk81UIeMGGIKUg,2029
+nexaai/_stub.cpython-310-darwin.so,sha256=J8U8gAiGKjmFexJuT4XWOm1k49lmsyJ-WYQJaTi6fyA,66768
+nexaai/_version.py,sha256=NGCgH5JHTkWsbmkVT9FhcM7m4cxgmEZiw51TUG210EA,143
+nexaai/asr.py,sha256=NljMXDErwPNMOPaRkJZMEDka9Nk8xyur7L8i924TStY,2054
 nexaai/base.py,sha256=N8PRgDFA-XPku2vWnQIofQ7ipz3pPlO6f8YZGnuhquE,982
-nexaai/common.py,sha256=VPM7NaUNaLTT7quW-u4D2uOeNrQqPjvfcgJlYGS3Qy8,1525
-nexaai/cv.py,sha256=CYfLSDU0_QJkcaIDIJ-a-JjA9FMvIUrSfG71_7-79hI,2934
-nexaai/embedder.py,sha256=VheiZEYBuuBjhQcvLawCz26jX0I169Xk4b9VP-ERjqU,2211
-nexaai/image_gen.py,sha256=IhLQLpmPkK9KcHteUdaQdxrnTIjk6xdyekRqeJtHfWw,4122
-nexaai/llm.py,sha256=egHa6YafNWyZy5qrmZRNZlFHO8LRUejc_gkOpK0nbnw,3105
-nexaai/rerank.py,sha256=7EEm96gpvd6kXO_Q8xSrQDlLZdAYTk0MODeNWDq70WA,1631
+nexaai/common.py,sha256=5ElYo4uDP2CT3Kqxoo7XzqcJtDBuwwbIi_Wr14aT9Z4,1659
+nexaai/cv.py,sha256=RHCDo8gvBH8BkGZx7qVyp-OKxqi7E1GG9XzyaXehCNA,3273
+nexaai/embedder.py,sha256=Cw0tSHkPgd-RI62afCqQAcTHMnQhaI2CvfTMO-1JKOg,2452
+nexaai/image_gen.py,sha256=0C_5Tjj4BYmxLbmMmvwajp-yy2mmEEOKwBFnDQNPzx4,4356
+nexaai/llm.py,sha256=QQDRg8zlu-xHmWjtSOsK1vhQBHaqRIdL3T9I4cVX7W4,3416
+nexaai/rerank.py,sha256=vWaBucoQ1wz-2iYnZqyFIcEjm-4Xcs1KDbFN5X8zzDQ,1872
 nexaai/runtime.py,sha256=mxxHYsb5iBUAm2K_u-XJWr_U-spJ9S4eApc8kf9myjw,1957
-nexaai/tts.py,sha256=4EbC0BfFh5TLrm_3Q5vx1sXdug5gvOi-owNeX7ekbdA,1926
-nexaai/vlm.py,sha256=g65S8ChMnp_wsz_O4szjR3Z8sD_46NHaxDlfdoZoQ0c,4291
+nexaai/tts.py,sha256=ZnBpWUxIfHhh7KfEjddtH7hHOTa91zg7ogGLakMIALo,2167
+nexaai/vlm.py,sha256=pZcMWkF2Ml9liVNbHxLqBJxwm2bxVNM1dkoelwWMyIE,4500
 nexaai/asr_impl/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-nexaai/asr_impl/mlx_asr_impl.py,sha256=JuyxFzFbbgclK5_2Rq5pT278h0q8LztJX7Tggz0zkbM,3191
-nexaai/asr_impl/pybind_asr_impl.py,sha256=ybvthYgtVbH_JgpSsl0nxjZYvXyk8KGRSKdsJ-hLfZE,1450
+nexaai/asr_impl/mlx_asr_impl.py,sha256=eosd8-TIWAOwV0HltmoFrLwzXHcU4jyxtncvuZE9pgA,3257
+nexaai/asr_impl/pybind_asr_impl.py,sha256=pE9Hb_hMi5yAc4MF83bLVOb8zDtreCkB3_u7XED9YpA,1516
 nexaai/binds/__init__.py,sha256=T9Ua7SzHNglSeEqXlfH5ymYXRyXhNKkC9z_y_bWCNMo,80
 nexaai/binds/common_bind.cpython-310-darwin.so,sha256=hVxY76tn7hN6uHDIgM7LWNvgoudHgNZVoaygM9X1RWE,217232
 nexaai/binds/embedder_bind.cpython-310-darwin.so,sha256=FT8581RNciilskK89PhtnNSjw4Oh0-xk8QdbJVFmOd8,202064
-nexaai/binds/libnexa_bridge.dylib,sha256=JfXpiJg-_MpB77NYNUZnvQdOZWYppGHgpUJ83n9cKVg,251256
+nexaai/binds/libcrypto.dylib,sha256=aWif9WhTKVQhmZL3DmtIpMkZY5JSb_Ny6CClmUBKYM4,4710416
+nexaai/binds/libnexa_bridge.dylib,sha256=W2PFrGAVDmalbBvyECD5GTEylLut3gg99p93Fi-QaAM,251480
+nexaai/binds/libssl.dylib,sha256=Q2frAdhR729oKYuCjJOEr1Ott3idFWoFp98fwNqtIaU,881616
 nexaai/binds/llm_bind.cpython-310-darwin.so,sha256=Bv08rn9OBAHy01eAQeANiJSrCxskn1xSx4Gl1Vcrhm0,166064
-nexaai/binds/nexa_llama_cpp/libggml-base.dylib,sha256=D_mlY_PNMIFlm6mHERSLMoA7QfYHwEPVlb0UKMbl2N0,632048
+nexaai/binds/nexa_llama_cpp/libggml-base.dylib,sha256=CzsTec_QHlvbBGzmx4MBQ4LUjG7aIqW1rP5p_A90Vds,632048
 nexaai/binds/nexa_llama_cpp/libggml-cpu.so,sha256=RiMhOv6IAWY1zkFTp0JCB7CYoPfOv54vBVQHvj1koBM,661120
 nexaai/binds/nexa_llama_cpp/libggml-metal.so,sha256=L4RQvaD0w4qBjexi4O05RMCH8842fof5QgBEvyx0RcA,673104
 nexaai/binds/nexa_llama_cpp/libggml.dylib,sha256=aOTj_6RrAMkfDO0ZI28_3nfcC-l4Y3dRCiS3C0d0_eI,58592
 nexaai/binds/nexa_llama_cpp/libllama.dylib,sha256=fDPnTG6EQ1JN6aRmnIFQzag_kmtyImRxKjMOOtaTY5Q,1746928
 nexaai/binds/nexa_llama_cpp/libmtmd.dylib,sha256=ccnBRsJNFGTCsjgW03N9PvX26wUirqpxljnxdVPINVc,587008
-nexaai/binds/nexa_llama_cpp/libnexa_plugin.dylib,sha256=1kAoSq1w8pPiNmceOdmAZ7ehfENauFoGq8mpIwGl-kk,1806696
-nexaai/binds/nexa_mlx/libnexa_plugin.dylib,sha256=aw8if8RwXjb02CehbqGPHOeEKRUcTpzeJZLOkjTBm8A,596328
+nexaai/binds/nexa_llama_cpp/libnexa_plugin.dylib,sha256=d7s5TwlMFTGsrUifN1e6dOWxStsLFpuu6Ko6ImXZ5Sg,2368184
+nexaai/binds/nexa_mlx/libnexa_plugin.dylib,sha256=yjbdy0FpBE_RwgqvwGxd3czIfs3OYVoh--vWpn2H7RQ,1422888
 nexaai/binds/nexa_mlx/py-lib/ml.py,sha256=LafDM_TeXmuQkld2tdQxUBGgooT0JPMXngLam2TADqU,23179
 nexaai/binds/nexa_mlx/py-lib/profiling.py,sha256=Dc-mybFwBdCIKFWL7CbSHjkOJGAoYHG7r_e_XPhzwBU,9361
 nexaai/binds/nexa_mlx/py-lib/mlx_audio/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -181,17 +183,17 @@ nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/tests/test_convert.py,sha256=79ddUhtT
 nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/tests/test_interpolate.py,sha256=9dNmH03C46HtxwesH2DpT2oTNEG1KCZWYEKq6UQ3vfk,3536
 nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/tests/test_models.py,sha256=12RiOfPtSZQj5g5JM-yCJk3uGQfM3OdmRiPt5uUDE4E,35096
 nexaai/cv_impl/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-nexaai/cv_impl/mlx_cv_impl.py,sha256=mdK4DEffPe96AgDGDXtQeHlG958hf8FO1fBZ1qjZMEE,3162
-nexaai/cv_impl/pybind_cv_impl.py,sha256=yS4JKfRSaIjjVP7hJ-CizG76pIX85bpmGLk9B9cnL24,998
+nexaai/cv_impl/mlx_cv_impl.py,sha256=gKECQOv8iaWwG3bl7xeqVy2NN_9K7tYerIFzfn4eLo4,3228
+nexaai/cv_impl/pybind_cv_impl.py,sha256=uSmwBste4cT7c8DQmXzRLmzwDf773PAbXNYWW1UzVls,1064
 nexaai/embedder_impl/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-nexaai/embedder_impl/mlx_embedder_impl.py,sha256=dTOz34WGDnhsI9L7Ctv6fGPngvMAUc4FwEwRgBp_M9I,4317
-nexaai/embedder_impl/pybind_embedder_impl.py,sha256=AGGrOq4z0mDpQZInOvJsOIlQWflByhDjsihMu_Wjtbk,3286
+nexaai/embedder_impl/mlx_embedder_impl.py,sha256=OsDzsc_2wZkSoWu6yCOZadMkaYdBW3uyjF11hDKTaX8,4383
+nexaai/embedder_impl/pybind_embedder_impl.py,sha256=Ga1JYauVkRq6jwAGL7Xx5HDaIx483_v9gZVoTyd3xNU,3495
 nexaai/image_gen_impl/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-nexaai/image_gen_impl/mlx_image_gen_impl.py,sha256=OxSbk9zIDj7tTvsdM8bMJQDBhpn-mygBNktewd_wgtE,11153
-nexaai/image_gen_impl/pybind_image_gen_impl.py,sha256=XoSdWG5ID_g93WT9QB0qCP64a4rX-Rva0u4fQ8xpoqg,3626
+nexaai/image_gen_impl/mlx_image_gen_impl.py,sha256=BuDkksvXyb4J02GsdnbGAmYckfUU0Eah6BimoMD3QqY,11219
+nexaai/image_gen_impl/pybind_image_gen_impl.py,sha256=ms34VYoD5AxZFG6cIG0QAJDjCtfphaZ1bHzKzey1xF8,3692
 nexaai/llm_impl/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-nexaai/llm_impl/mlx_llm_impl.py,sha256=HCi1uQBjccDDi64LbAgyH85lWx7qDZIW8i43dojGfF0,10210
-nexaai/llm_impl/pybind_llm_impl.py,sha256=8Us4N5KF6oi-0-K_5Dpf2rYe9smd89ZfWFrP_fWBsM4,7374
+nexaai/llm_impl/mlx_llm_impl.py,sha256=2Ifc_mfTHDX64BWVHLjOhFCIMqM_Z-Cn4RfExlMtq0s,10865
+nexaai/llm_impl/pybind_llm_impl.py,sha256=DpO38rlGcvf0Zpe4bPKsbPD3EguBf0dDS9Ve64bgdvo,7653
 nexaai/mlx_backend/ml.py,sha256=LafDM_TeXmuQkld2tdQxUBGgooT0JPMXngLam2TADqU,23179
 nexaai/mlx_backend/profiling.py,sha256=Dc-mybFwBdCIKFWL7CbSHjkOJGAoYHG7r_e_XPhzwBU,9361
 nexaai/mlx_backend/asr/__init__.py,sha256=fuT_9_xpYJ28m4yjly5L2jChUrzlSQz-b_S7nujxkSM,451
@@ -211,7 +213,7 @@ nexaai/mlx_backend/embedding/modeling/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCe
 nexaai/mlx_backend/embedding/modeling/nexa_jina_v2.py,sha256=F9Z_9r-Dh0wNThiMp5W5hqE2dt5bf4ps5_c6h4BuWGw,15218
 nexaai/mlx_backend/llm/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 nexaai/mlx_backend/llm/generate.py,sha256=Phes0tzxbbEWA2hDylQvD0LjorMaPwvcfZq9RKCAOt0,4399
-nexaai/mlx_backend/llm/interface.py,sha256=Fx28O2jCDPaEfr0xLffWnqGIU5Gspggxr-o54-fBWj4,29257
+nexaai/mlx_backend/llm/interface.py,sha256=YBLAdz_5gQ1VF9o98Tuj6xB_M2nUB9kX9VkM-Mp6ryc,29310
 nexaai/mlx_backend/llm/main.py,sha256=gFDE4VZv_CLKMCTn0N521OfCKH_Ys26bHDh6g9VEFNc,1982
 nexaai/mlx_backend/mlx_audio/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 nexaai/mlx_backend/mlx_audio/server.py,sha256=Pqy13Fafq4WX_cTuvRFz1jq89beQm2QQGpXmhK4b9jc,17547
@@ -502,19 +504,19 @@ nexaai/mlx_backend/vlm/modeling/trainer/lora.py,sha256=tGjvenjEQ8_1Az8Nz3smz5Mgv
 nexaai/mlx_backend/vlm/modeling/trainer/trainer.py,sha256=h16SaHt76JzFruXuidgXDx7_2evx4L0SecvzqLmhyZw,9081
 nexaai/mlx_backend/vlm/modeling/trainer/utils.py,sha256=29oHf_7946YeJKP_-Dt-NPeN4xJq8Fj7Yv4jZKO9RWA,4909
 nexaai/rerank_impl/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-nexaai/rerank_impl/mlx_rerank_impl.py,sha256=I-jumShLm1jAmKunRcDNUU4yjfWLoWClFMFONd88-Es,3177
-nexaai/rerank_impl/pybind_rerank_impl.py,sha256=FIIN96zCxXopqpqZdBd7OjuqqviFBY8HMZek1bCeoJw,1447
+nexaai/rerank_impl/mlx_rerank_impl.py,sha256=h37PKSIRBY8mwzVeLeP4ix9ui3waIsg4gorzelYLJbM,3243
+nexaai/rerank_impl/pybind_rerank_impl.py,sha256=CtwkG7YrW58GPMDERJSnISGTVCXWNju5__R2W837t7c,1513
 nexaai/tts_impl/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-nexaai/tts_impl/mlx_tts_impl.py,sha256=D71IFtIYWzrVdBS2y5vDBWjZ4ZAzRRjFHC0KO0pA5BU,3035
-nexaai/tts_impl/pybind_tts_impl.py,sha256=Be5QiXzDz6h1LTIQzUBd0ZyBs7rUpNA-pULCXFtt2Is,1378
+nexaai/tts_impl/mlx_tts_impl.py,sha256=i_uNPdvlXYtL3e01oKjDlP9jgkWCRt1bBHsExaaiJi8,3101
+nexaai/tts_impl/pybind_tts_impl.py,sha256=mpn44r6pfYLIl-NrEy2dXHjGtWtNCmM7HRyxiANxUI4,1444
 nexaai/utils/avatar_fetcher.py,sha256=bWy8ujgbOiTHFCjFxTwkn3uXbZ84PgEGUkXkR3MH4bI,3821
 nexaai/utils/decode.py,sha256=61n4Zf6c5QLyqGoctEitlI9BX3tPlP2a5aaKNHbw3T4,404
 nexaai/utils/model_manager.py,sha256=c07ocxxw1IHCQw6esbmYK0dX2R2OajfEIGsC_2teHXo,48572
 nexaai/utils/progress_tracker.py,sha256=76HlPkyN41IMHSsH56-qdlN_aY_oBfJz50J16Cx67R0,15102
 nexaai/vlm_impl/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-nexaai/vlm_impl/mlx_vlm_impl.py,sha256=4lFZ8ZQnYZ-Uoh9j2Fh2UzpdpMAy_v1Jz-lrqX33XcI,8947
-nexaai/vlm_impl/pybind_vlm_impl.py,sha256=C-3fa0AIypI33OAGuGfVxo1V7zN0wjQMgruKlDIlW4Q,8333
-nexaai-1.0.4rc14.dist-info/METADATA,sha256=dsyrftSmP4lR0EolabKfzo8ZRG5CBMHYJKGDRoP0eIM,952
-nexaai-1.0.4rc14.dist-info/WHEEL,sha256=T2p57lol9__xkoU6aJTyN1Pm43ZpRU3q6km7mIbrAMs,114
-nexaai-1.0.4rc14.dist-info/top_level.txt,sha256=LRE2YERlrZk2vfuygnSzsEeqSknnZbz3Z1MHyNmBU4w,7
-nexaai-1.0.4rc14.dist-info/RECORD,,
+nexaai/vlm_impl/mlx_vlm_impl.py,sha256=Dm-N38wqK3Cjdk3n7wfVGKC7hwxHvaM8pz37VzvJC-Y,10443
+nexaai/vlm_impl/pybind_vlm_impl.py,sha256=mvydHMHNWtkmyqouLIj1XSYZgsro3tcp3s_aqkjljE0,8510
+nexaai-1.0.4rc16.dist-info/METADATA,sha256=NuLsDWtJssKVjTNP4oo-tFItIBxIbiq-0hTq1rv706s,883
+nexaai-1.0.4rc16.dist-info/WHEEL,sha256=T2p57lol9__xkoU6aJTyN1Pm43ZpRU3q6km7mIbrAMs,114
+nexaai-1.0.4rc16.dist-info/top_level.txt,sha256=LRE2YERlrZk2vfuygnSzsEeqSknnZbz3Z1MHyNmBU4w,7
+nexaai-1.0.4rc16.dist-info/RECORD,,

{nexaai-1.0.4rc14.dist-info → nexaai-1.0.4rc16.dist-info}/WHEEL RENAMED Viewed

File without changes

{nexaai-1.0.4rc14.dist-info → nexaai-1.0.4rc16.dist-info}/top_level.txt RENAMED Viewed

File without changes