PyPI - nexaai - Versions diffs - 1.0.20__cp310-cp310-macosx_13_0_x86_64.whl → 1.0.21__cp310-cp310-macosx_13_0_x86_64.whl - Mend

nexaai 1.0.20__cp310-cp310-macosx_13_0_x86_64.whl → 1.0.21__cp310-cp310-macosx_13_0_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of nexaai might be problematic. Click here for more details.

Files changed (37) hide show

nexaai/__init__.py +12 -0
nexaai/_stub.cpython-310-darwin.so +0 -0
nexaai/_version.py +1 -1
nexaai/asr.py +10 -6
nexaai/asr_impl/pybind_asr_impl.py +98 -15
nexaai/binds/__init__.py +2 -0
nexaai/binds/asr_bind.cpython-310-darwin.so +0 -0
nexaai/binds/cpu_gpu/libnexa_plugin.dylib +0 -0
nexaai/binds/embedder_bind.cpython-310-darwin.so +0 -0
nexaai/binds/libnexa_bridge.dylib +0 -0
nexaai/binds/llm_bind.cpython-310-darwin.so +0 -0
nexaai/binds/rerank_bind.cpython-310-darwin.so +0 -0
nexaai/binds/vlm_bind.cpython-310-darwin.so +0 -0
nexaai/common.py +1 -0
nexaai/cv.py +2 -1
nexaai/embedder.py +4 -3
nexaai/embedder_impl/mlx_embedder_impl.py +3 -1
nexaai/embedder_impl/pybind_embedder_impl.py +3 -2
nexaai/image_gen.py +2 -1
nexaai/llm.py +5 -3
nexaai/llm_impl/mlx_llm_impl.py +2 -0
nexaai/llm_impl/pybind_llm_impl.py +2 -0
nexaai/mlx_backend/vlm/interface.py +5 -2
nexaai/rerank.py +5 -3
nexaai/rerank_impl/mlx_rerank_impl.py +2 -0
nexaai/rerank_impl/pybind_rerank_impl.py +109 -16
nexaai/runtime_error.py +24 -0
nexaai/tts.py +2 -1
nexaai/utils/manifest_utils.py +10 -6
nexaai/utils/model_manager.py +139 -8
nexaai/vlm.py +4 -2
nexaai/vlm_impl/mlx_vlm_impl.py +3 -2
nexaai/vlm_impl/pybind_vlm_impl.py +33 -7
{nexaai-1.0.20.dist-info → nexaai-1.0.21.dist-info}/METADATA +1 -2
{nexaai-1.0.20.dist-info → nexaai-1.0.21.dist-info}/RECORD +37 -34
{nexaai-1.0.20.dist-info → nexaai-1.0.21.dist-info}/WHEEL +0 -0
{nexaai-1.0.20.dist-info → nexaai-1.0.21.dist-info}/top_level.txt +0 -0

nexaai/__init__.py CHANGED Viewed

@@ -24,6 +24,13 @@ from .common import ModelConfig, GenerationConfig, ChatMessage, SamplerConfig, P
 # Import logging functionality
 from .log import set_logger, get_error_message
+# Import runtime errors
+from .runtime_error import (
+    NexaRuntimeError,
+    ContextLengthExceededError,
+    GenerationError
+)
 # Create alias for PluginID to be accessible as plugin_id
 plugin_id = PluginID
@@ -52,6 +59,11 @@ __all__ = [
     # Logging functionality
     "set_logger",
     "get_error_message",
+    # Runtime errors
+    "NexaRuntimeError",
+    "ContextLengthExceededError",
+    "GenerationError",
     "LLM",
     "Embedder",

nexaai/_stub.cpython-310-darwin.so CHANGED Viewed

Binary file

nexaai/_version.py CHANGED Viewed

@@ -1,4 +1,4 @@
 # This file is generated by CMake from _version.py.in
 # Do not modify this file manually - it will be overwritten
-__version__ = "1.0.20"
+__version__ = "1.0.21"

nexaai/asr.py CHANGED Viewed

@@ -3,7 +3,7 @@ from abc import abstractmethod
 from dataclasses import dataclass
 from nexaai.base import BaseModel
-from nexaai.common import PluginID
+from nexaai.common import PluginID, ModelConfig
 @dataclass
@@ -25,17 +25,20 @@ class ASRResult:
 class ASR(BaseModel):
     """Abstract base class for Automatic Speech Recognition models."""
-    def __init__(self):
+    def __init__(self, m_cfg: ModelConfig = ModelConfig()):
         """Initialize base ASR class."""
-        pass
+        self._m_cfg = m_cfg
     @classmethod
     def _load_from(cls,
                    model_path: str,
+                   model_name: Optional[str] = None,
                    tokenizer_path: Optional[str] = None,
                    language: Optional[str] = None,
+                   m_cfg: ModelConfig = ModelConfig(),
                    plugin_id: Union[PluginID, str] = PluginID.LLAMA_CPP,
-                   device_id: Optional[str] = None
+                   device_id: Optional[str] = None,
+                   **kwargs
         ) -> 'ASR':
         """Load ASR model from local path, routing to appropriate implementation."""
         # Check plugin_id value for routing - handle both enum and string
@@ -43,10 +46,11 @@ class ASR(BaseModel):
         if plugin_value == "mlx":
             from nexaai.asr_impl.mlx_asr_impl import MLXASRImpl
-            return MLXASRImpl._load_from(model_path, tokenizer_path, language, plugin_id, device_id)
+            return MLXASRImpl._load_from(model_path, model_name, tokenizer_path, language, m_cfg, plugin_id, device_id)
         else:
             from nexaai.asr_impl.pybind_asr_impl import PyBindASRImpl
-            return PyBindASRImpl._load_from(model_path, tokenizer_path, language, plugin_id, device_id)
+            return PyBindASRImpl._load_from(model_path, model_name, tokenizer_path, language, m_cfg, plugin_id, device_id)
     @abstractmethod
     def transcribe(

nexaai/asr_impl/pybind_asr_impl.py CHANGED Viewed

@@ -1,32 +1,78 @@
 from typing import List, Optional, Union
-from nexaai.common import PluginID
+from nexaai.common import PluginID, ModelConfig
 from nexaai.asr import ASR, ASRConfig, ASRResult
+from nexaai.binds import asr_bind, common_bind
+from nexaai.runtime import _ensure_runtime
 class PyBindASRImpl(ASR):
-    def __init__(self):
-        """Initialize PyBind ASR implementation."""
-        super().__init__()
-        # TODO: Add PyBind-specific initialization
+    def __init__(self, handle: any, m_cfg: ModelConfig = ModelConfig()):
+        """Private constructor, should not be called directly."""
+        super().__init__(m_cfg)
+        self._handle = handle  # This is a py::capsule
+        self._model_config = None
     @classmethod
     def _load_from(cls,
                    model_path: str,
+                   model_name: Optional[str] = None,
                    tokenizer_path: Optional[str] = None,
                    language: Optional[str] = None,
+                   m_cfg: ModelConfig = ModelConfig(),
                    plugin_id: Union[PluginID, str] = PluginID.LLAMA_CPP,
                    device_id: Optional[str] = None
         ) -> 'PyBindASRImpl':
         """Load ASR model from local path using PyBind backend."""
-        # TODO: Implement PyBind ASR loading
-        instance = cls()
-        return instance
+        _ensure_runtime()
+        # Create model config
+        config = common_bind.ModelConfig()
+        config.n_ctx = m_cfg.n_ctx
+        if m_cfg.n_threads is not None:
+            config.n_threads = m_cfg.n_threads
+        if m_cfg.n_threads_batch is not None:
+            config.n_threads_batch = m_cfg.n_threads_batch
+        if m_cfg.n_batch is not None:
+            config.n_batch = m_cfg.n_batch
+        if m_cfg.n_ubatch is not None:
+            config.n_ubatch = m_cfg.n_ubatch
+        if m_cfg.n_seq_max is not None:
+            config.n_seq_max = m_cfg.n_seq_max
+        config.n_gpu_layers = m_cfg.n_gpu_layers
+        # handle chat template strings
+        if m_cfg.chat_template_path:
+            config.chat_template_path = m_cfg.chat_template_path
+        if m_cfg.chat_template_content:
+            config.chat_template_content = m_cfg.chat_template_content
+        # Convert plugin_id to string
+        plugin_id_str = plugin_id.value if isinstance(plugin_id, PluginID) else str(plugin_id)
+        # Create ASR handle using the binding
+        handle = asr_bind.ml_asr_create(
+            model_path=model_path,
+            model_name=model_name,
+            tokenizer_path=tokenizer_path,
+            model_config=config,
+            language=language,
+            plugin_id=plugin_id_str,
+            device_id=device_id,
+            license_id=None,  # Optional
+            license_key=None  # Optional
+        )
+        return cls(handle, m_cfg)
     def eject(self):
-        """Destroy the model and free resources."""
-        # TODO: Implement PyBind ASR cleanup
-        pass
+        """Release the model from memory."""
+        # py::capsule handles cleanup automatically
+        if hasattr(self, '_handle') and self._handle is not None:
+            del self._handle
+            self._handle = None
     def transcribe(
         self,
@@ -35,10 +81,47 @@ class PyBindASRImpl(ASR):
         config: Optional[ASRConfig] = None,
     ) -> ASRResult:
         """Transcribe audio file to text."""
-        # TODO: Implement PyBind ASR transcription
-        raise NotImplementedError("PyBind ASR transcription not yet implemented")
+        if self._handle is None:
+            raise RuntimeError("ASR model not loaded. Call _load_from first.")
+        # Convert ASRConfig to binding format if provided
+        asr_config = None
+        if config:
+            asr_config = asr_bind.ASRConfig()
+            asr_config.timestamps = config.timestamps
+            asr_config.beam_size = config.beam_size
+            asr_config.stream = config.stream
+        # Perform transcription using the binding
+        result_dict = asr_bind.ml_asr_transcribe(
+            handle=self._handle,
+            audio_path=audio_path,
+            language=language,
+            config=asr_config
+        )
+        # Convert result to ASRResult
+        transcript = result_dict.get("transcript", "")
+        confidence_scores = result_dict.get("confidence_scores")
+        timestamps = result_dict.get("timestamps")
+        # Convert timestamps to the expected format
+        timestamp_pairs = []
+        if timestamps:
+            for start, end in timestamps:
+                timestamp_pairs.append((float(start), float(end)))
+        return ASRResult(
+            transcript=transcript,
+            confidence_scores=confidence_scores or [],
+            timestamps=timestamp_pairs
+        )
     def list_supported_languages(self) -> List[str]:
         """List supported languages."""
-        # TODO: Implement PyBind ASR language listing
-        raise NotImplementedError("PyBind ASR language listing not yet implemented")
+        if self._handle is None:
+            raise RuntimeError("ASR model not loaded. Call _load_from first.")
+        # Get supported languages using the binding
+        languages = asr_bind.ml_asr_list_supported_languages(handle=self._handle)
+        return languages

nexaai/binds/__init__.py CHANGED Viewed

@@ -2,3 +2,5 @@ from .common_bind import *
 from .llm_bind import *
 from .embedder_bind import *
 from .vlm_bind import *
+from .rerank_bind import *
+from .asr_bind import *

nexaai/binds/asr_bind.cpython-310-darwin.so ADDED Viewed

Binary file

nexaai/binds/cpu_gpu/libnexa_plugin.dylib CHANGED Viewed

Binary file

nexaai/binds/embedder_bind.cpython-310-darwin.so CHANGED Viewed

Binary file

nexaai/binds/libnexa_bridge.dylib CHANGED Viewed

Binary file

nexaai/binds/llm_bind.cpython-310-darwin.so CHANGED Viewed

Binary file

nexaai/binds/rerank_bind.cpython-310-darwin.so ADDED Viewed

Binary file

nexaai/binds/vlm_bind.cpython-310-darwin.so CHANGED Viewed

Binary file

nexaai/common.py CHANGED Viewed

@@ -8,6 +8,7 @@ class PluginID(str, Enum):
     MLX = "mlx"
     LLAMA_CPP = "llama_cpp"
     NEXAML = "nexaml"
+    NPU = "npu"
 class ChatMessage(TypedDict):

nexaai/cv.py CHANGED Viewed

@@ -73,7 +73,8 @@ class CVModel(BaseModel):
                    _: str,  # TODO: remove this argument, this is a hack to make api design happy
                    config: CVModelConfig,
                    plugin_id: Union[PluginID, str] = PluginID.LLAMA_CPP,
-                   device_id: Optional[str] = None
+                   device_id: Optional[str] = None,
+                   **kwargs
         ) -> 'CVModel':
         """Load CV model from configuration, routing to appropriate implementation."""
         # Check plugin_id value for routing - handle both enum and string

nexaai/embedder.py CHANGED Viewed

@@ -22,12 +22,13 @@ class Embedder(BaseModel):
         pass
     @classmethod
-    def _load_from(cls, model_path: str, tokenizer_file: str = "tokenizer.json", plugin_id: Union[PluginID, str] = PluginID.LLAMA_CPP):
+    def _load_from(cls, model_path: str, model_name: str = None, tokenizer_file: str = "tokenizer.json", plugin_id: Union[PluginID, str] = PluginID.LLAMA_CPP, **kwargs):
         """
         Load an embedder from model files, routing to appropriate implementation.
         Args:
             model_path: Path to the model file
+            model_name: Name of the model
             tokenizer_file: Path to the tokenizer file (default: "tokenizer.json")
             plugin_id: Plugin ID to use for the model (default: PluginID.LLAMA_CPP)
@@ -39,10 +40,10 @@ class Embedder(BaseModel):
         if plugin_value == "mlx":
             from nexaai.embedder_impl.mlx_embedder_impl import MLXEmbedderImpl
-            return MLXEmbedderImpl._load_from(model_path, tokenizer_file, plugin_id)
+            return MLXEmbedderImpl._load_from(model_path, model_name, tokenizer_file, plugin_id)
         else:
             from nexaai.embedder_impl.pybind_embedder_impl import PyBindEmbedderImpl
-            return PyBindEmbedderImpl._load_from(model_path, tokenizer_file, plugin_id)
+            return PyBindEmbedderImpl._load_from(model_path, model_name, tokenizer_file, plugin_id)
     @abstractmethod
     def generate(self, texts: Union[List[str], str] = None, config: EmbeddingConfig = EmbeddingConfig(), input_ids: Union[List[int], List[List[int]]] = None) -> np.ndarray:

nexaai/embedder_impl/mlx_embedder_impl.py CHANGED Viewed

@@ -14,12 +14,13 @@ class MLXEmbedderImpl(Embedder):
         self._mlx_embedder = None
     @classmethod
-    def _load_from(cls, model_path: str, tokenizer_file: str = "tokenizer.json", plugin_id: Union[PluginID, str] = PluginID.MLX):
+    def _load_from(cls, model_path: str, model_name: str = None, tokenizer_file: str = "tokenizer.json", plugin_id: Union[PluginID, str] = PluginID.MLX):
         """
         Load an embedder from model files using MLX backend.
         Args:
             model_path: Path to the model file
+            model_name: Name of the model
             tokenizer_file: Path to the tokenizer file (default: "tokenizer.json")
             plugin_id: Plugin ID to use for the model (default: PluginID.MLX)
@@ -34,6 +35,7 @@ class MLXEmbedderImpl(Embedder):
             # This will automatically detect if it's JinaV2 or generic model and route correctly
             instance._mlx_embedder = create_embedder(
                 model_path=model_path,
+                # model_name=model_name, # FIXME: For MLX Embedder, model_name is not used
                 tokenizer_path=tokenizer_file
             )

nexaai/embedder_impl/pybind_embedder_impl.py CHANGED Viewed

@@ -16,12 +16,13 @@ class PyBindEmbedderImpl(Embedder):
         self._handle = _handle_ptr
     @classmethod
-    def _load_from(cls, model_path: str, tokenizer_file: str = "tokenizer.json", plugin_id: Union[PluginID, str] = PluginID.LLAMA_CPP):
+    def _load_from(cls, model_path: str, model_name: str = None, tokenizer_file: str = "tokenizer.json", plugin_id: Union[PluginID, str] = PluginID.LLAMA_CPP):
         """
         Load an embedder from model files
         Args:
             model_path: Path to the model file
+            model_name: Name of the model
             tokenizer_file: Path to the tokenizer file (default: "tokenizer.json")
             plugin_id: Plugin ID to use for the model (default: PluginID.LLAMA_CPP)
@@ -32,7 +33,7 @@ class PyBindEmbedderImpl(Embedder):
         # Convert enum to string for C++ binding
         plugin_id_str = plugin_id.value if isinstance(plugin_id, PluginID) else plugin_id
         # New parameter order: model_path, plugin_id, tokenizer_path (optional)
-        handle = embedder_bind.ml_embedder_create(model_path, plugin_id_str, tokenizer_file)
+        handle = embedder_bind.ml_embedder_create(model_path, model_name, plugin_id_str, tokenizer_file)
         return cls(handle)
     def eject(self):

nexaai/image_gen.py CHANGED Viewed

@@ -71,7 +71,8 @@ class ImageGen(BaseModel):
                    plugin_id: Union[PluginID, str] = PluginID.LLAMA_CPP,
                    device_id: Optional[str] = None,
                    float16: bool = True,
-                   quantize: bool = False
+                   quantize: bool = False,
+                   **kwargs
         ) -> 'ImageGen':
         """Load image generation model from local path, routing to appropriate implementation."""
         # Check plugin_id value for routing - handle both enum and string

nexaai/llm.py CHANGED Viewed

@@ -15,10 +15,12 @@ class LLM(BaseModel):
     @classmethod
     def _load_from(cls,
                    local_path: str,
+                   model_name: Optional[str] = None,
                    tokenizer_path: Optional[str] = None,
                    m_cfg: ModelConfig = ModelConfig(),
                    plugin_id: Union[PluginID, str] = PluginID.LLAMA_CPP,
-                   device_id: Optional[str] = None
+                   device_id: Optional[str] = None,
+                   **kwargs
         ) -> 'LLM':
         """Load model from local path, routing to appropriate implementation."""
         # Check plugin_id value for routing - handle both enum and string
@@ -26,10 +28,10 @@ class LLM(BaseModel):
         if plugin_value == "mlx":
             from nexaai.llm_impl.mlx_llm_impl import MLXLLMImpl
-            return MLXLLMImpl._load_from(local_path, tokenizer_path, m_cfg, plugin_id, device_id)
+            return MLXLLMImpl._load_from(local_path, model_name, tokenizer_path, m_cfg, plugin_id, device_id)
         else:
             from nexaai.llm_impl.pybind_llm_impl import PyBindLLMImpl
-            return PyBindLLMImpl._load_from(local_path, tokenizer_path, m_cfg, plugin_id, device_id)
+            return PyBindLLMImpl._load_from(local_path, model_name, tokenizer_path, m_cfg, plugin_id, device_id)
     def cancel_generation(self):
         """Signal to cancel any ongoing stream generation."""

nexaai/llm_impl/mlx_llm_impl.py CHANGED Viewed

@@ -16,6 +16,7 @@ class MLXLLMImpl(LLM):
     @classmethod
     def _load_from(cls,
                    local_path: str,
+                   model_name: Optional[str] = None,
                    tokenizer_path: Optional[str] = None,
                    m_cfg: ModelConfig = ModelConfig(),
                    plugin_id: Union[PluginID, str] = PluginID.MLX,
@@ -40,6 +41,7 @@ class MLXLLMImpl(LLM):
             instance = cls(m_cfg)
             instance._mlx_llm = MLXLLMInterface(
                 model_path=local_path,
+                # model_name=model_name, # FIXME: For MLX LLM, model_name is not used
                 tokenizer_path=tokenizer_path or local_path,
                 config=mlx_config,
                 device=device_id

nexaai/llm_impl/pybind_llm_impl.py CHANGED Viewed

@@ -19,6 +19,7 @@ class PyBindLLMImpl(LLM):
     @classmethod
     def _load_from(cls,
                    local_path: str,
+                   model_name: Optional[str] = None,
                    tokenizer_path: Optional[str] = None,
                    m_cfg: ModelConfig = ModelConfig(),
                    plugin_id: Union[PluginID, str] = PluginID.LLAMA_CPP,
@@ -55,6 +56,7 @@ class PyBindLLMImpl(LLM):
         plugin_id_str = plugin_id.value if isinstance(plugin_id, PluginID) else plugin_id
         handle = llm_bind.ml_llm_create(
             model_path=local_path,
+            model_name=model_name,
             tokenizer_path=tokenizer_path,
             model_config=config,
             plugin_id=plugin_id_str,

nexaai/mlx_backend/vlm/interface.py CHANGED Viewed

@@ -482,8 +482,12 @@ class VLM(ProfilingMixin):
     def apply_chat_template(self, messages: Sequence[ChatMessage], tools: Optional[str] = None, enable_thinking: bool = True) -> str:
         """Apply chat template to messages with optional tools support."""
+        if self.model_name in ["qwen3vl", "qwen3vl-4b", "qwen3vl-4b-thinking", "qwen3vl-8b", "qwen3vl-8b-thinking"]:
+            return apply_chat_template_qwen3_vl(messages, num_images=0, num_audios=0, tools=tools, enable_thinking=enable_thinking)
+        if self.model_name == "qwen3vl-moe":
+            return apply_chat_template_qwen3_vl_moe(messages, num_images=0, num_audios=0, tools=tools, enable_thinking=enable_thinking)
         if hasattr(self.processor, "apply_chat_template"):
-            # Convert ChatMessage objects to dictionaries for the processor
             messages_dict = [{"role": msg.role, "content": msg.content} for msg in messages]
             parsed_tools = None
@@ -492,7 +496,6 @@ class VLM(ProfilingMixin):
             result = apply_chat_template(self.processor, self.model.config, messages_dict, add_generation_prompt=True, enable_thinking=enable_thinking, tools=parsed_tools)
             return result
-        # Fallback: join messages
         return "\n".join([f"{m.role}: {m.content}" for m in messages])
     def apply_chat_template_with_media(self, messages: Sequence[ChatMessage], num_images: int = 0, num_audios: int = 0, tools: Optional[str] = None, enable_thinking: bool = True) -> str:

nexaai/rerank.py CHANGED Viewed

@@ -24,9 +24,11 @@ class Reranker(BaseModel):
     @classmethod
     def _load_from(cls,
                    model_path: str,
+                   model_name: str = None,
                    tokenizer_file: str = "tokenizer.json",
                    plugin_id: Union[PluginID, str] = PluginID.LLAMA_CPP,
-                   device_id: Optional[str] = None
+                   device_id: Optional[str] = None,
+                   **kwargs
         ) -> 'Reranker':
         """Load reranker model from local path, routing to appropriate implementation."""
         # Check plugin_id value for routing - handle both enum and string
@@ -34,10 +36,10 @@ class Reranker(BaseModel):
         if plugin_value == "mlx":
             from nexaai.rerank_impl.mlx_rerank_impl import MLXRerankImpl
-            return MLXRerankImpl._load_from(model_path, tokenizer_file, plugin_id, device_id)
+            return MLXRerankImpl._load_from(model_path, model_name, tokenizer_file, plugin_id, device_id)
         else:
             from nexaai.rerank_impl.pybind_rerank_impl import PyBindRerankImpl
-            return PyBindRerankImpl._load_from(model_path, tokenizer_file, plugin_id, device_id)
+            return PyBindRerankImpl._load_from(model_path, model_name, tokenizer_file, plugin_id, device_id)
     @abstractmethod
     def load_model(self, model_path: str, extra_data: Optional[str] = None) -> bool:

nexaai/rerank_impl/mlx_rerank_impl.py CHANGED Viewed

@@ -17,6 +17,7 @@ class MLXRerankImpl(Reranker):
     @classmethod
     def _load_from(cls,
                    model_path: str,
+                   model_name: str = None,
                    tokenizer_file: str = "tokenizer.json",
                    plugin_id: Union[PluginID, str] = PluginID.MLX,
                    device_id: Optional[str] = None
@@ -29,6 +30,7 @@ class MLXRerankImpl(Reranker):
             instance = cls()
             instance._mlx_reranker = create_reranker(
                 model_path=model_path,
+                # model_name=model_name, # FIXME: For MLX Reranker, model_name is not used
                 tokenizer_path=tokenizer_file,
                 device=device_id
             )

nexaai/rerank_impl/pybind_rerank_impl.py CHANGED Viewed

@@ -1,36 +1,89 @@
 from typing import List, Optional, Sequence, Union
+import numpy as np
 from nexaai.common import PluginID
 from nexaai.rerank import Reranker, RerankConfig
+from nexaai.binds import rerank_bind, common_bind
+from nexaai.runtime import _ensure_runtime
 class PyBindRerankImpl(Reranker):
-    def __init__(self):
-        """Initialize PyBind Rerank implementation."""
+    def __init__(self, _handle_ptr):
+        """
+        Internal initializer
+        Args:
+            _handle_ptr: Capsule handle to the C++ reranker object
+        """
         super().__init__()
-        # TODO: Add PyBind-specific initialization
+        self._handle = _handle_ptr
     @classmethod
     def _load_from(cls,
                    model_path: str,
+                   model_name: str = None,
                    tokenizer_file: str = "tokenizer.json",
                    plugin_id: Union[PluginID, str] = PluginID.LLAMA_CPP,
                    device_id: Optional[str] = None
         ) -> 'PyBindRerankImpl':
-        """Load reranker model from local path using PyBind backend."""
-        # TODO: Implement PyBind reranker loading
-        instance = cls()
-        return instance
+        """
+        Load reranker model from local path using PyBind backend.
+        Args:
+            model_path: Path to the model file
+            model_name: Name of the model (optional)
+            tokenizer_file: Path to the tokenizer file (default: "tokenizer.json")
+            plugin_id: Plugin ID to use for the model (default: PluginID.LLAMA_CPP)
+            device_id: Device ID to use for the model (optional)
+        Returns:
+            PyBindRerankImpl instance
+        """
+        _ensure_runtime()
+        # Convert enum to string for C++ binding
+        plugin_id_str = plugin_id.value if isinstance(plugin_id, PluginID) else plugin_id
+        # Create model config
+        model_config = common_bind.ModelConfig()
+        # Create reranker handle with new API signature
+        handle = rerank_bind.ml_reranker_create(
+            model_path,
+            model_name,
+            tokenizer_file,
+            model_config,
+            plugin_id_str,
+            device_id
+        )
+        return cls(handle)
     def eject(self):
-        """Destroy the model and free resources."""
-        # TODO: Implement PyBind reranker cleanup
-        pass
+        """
+        Clean up resources and destroy the reranker
+        """
+        # Destructor of the handle will unload the model correctly
+        if hasattr(self, '_handle') and self._handle is not None:
+            del self._handle
+            self._handle = None
     def load_model(self, model_path: str, extra_data: Optional[str] = None) -> bool:
-        """Load model from path."""
-        # TODO: Implement PyBind reranker model loading
-        raise NotImplementedError("PyBind reranker model loading not yet implemented")
+        """
+        Load model from path.
+        Note: This method is not typically used directly. Use _load_from instead.
+        Args:
+            model_path: Path to the model file
+            extra_data: Additional data (unused)
+        Returns:
+            True if successful
+        """
+        # This method is part of the BaseModel interface but typically not used
+        # directly for PyBind implementations since _load_from handles creation
+        raise NotImplementedError("Use _load_from class method to load models")
     def rerank(
         self,
@@ -38,6 +91,46 @@ class PyBindRerankImpl(Reranker):
         documents: Sequence[str],
         config: Optional[RerankConfig] = None,
     ) -> List[float]:
-        """Rerank documents given a query."""
-        # TODO: Implement PyBind reranking
-        raise NotImplementedError("PyBind reranking not yet implemented")
+        """
+        Rerank documents given a query.
+        Args:
+            query: Query text as UTF-8 string
+            documents: List of document texts to rerank
+            config: Optional reranking configuration
+        Returns:
+            List of ranking scores (one per document)
+        """
+        if self._handle is None:
+            raise RuntimeError("Reranker handle is None. Model may have been ejected.")
+        # Use default config if not provided
+        if config is None:
+            config = RerankConfig()
+        # Create bind config
+        bind_config = rerank_bind.RerankConfig()
+        bind_config.batch_size = config.batch_size
+        bind_config.normalize = config.normalize
+        bind_config.normalize_method = config.normalize_method
+        # Convert documents to list if needed
+        documents_list = list(documents)
+        # Call the binding which returns a dict with scores and profile_data
+        result = rerank_bind.ml_reranker_rerank(
+            self._handle,
+            query,
+            documents_list,
+            bind_config
+        )
+        # Extract scores from result dict
+        scores_array = result.get("scores", np.array([]))
+        # Convert numpy array to list of floats
+        if isinstance(scores_array, np.ndarray):
+            return scores_array.tolist()
+        else:
+            return []

nexaai/runtime_error.py ADDED Viewed

@@ -0,0 +1,24 @@
+"""Runtime errors for Nexa SDK operations."""
+class NexaRuntimeError(Exception):
+    """Base class for Nexa runtime errors."""
+    def __init__(self, message: str, error_code: int = None):
+        self.error_code = error_code
+        super().__init__(message)
+class ContextLengthExceededError(NexaRuntimeError):
+    """Raised when the input context length exceeds the model's maximum."""
+    def __init__(self, message: str = "Input context length exceeded model's maximum", error_code: int = None):
+        super().__init__(message, error_code)
+class GenerationError(NexaRuntimeError):
+    """Raised when generation fails."""
+    def __init__(self, message: str = "Generation failed", error_code: int = None):
+        super().__init__(message, error_code)

nexaai/tts.py CHANGED Viewed

@@ -45,7 +45,8 @@ class TTS(BaseModel):
                    model_path: str,
                    vocoder_path: str,
                    plugin_id: Union[PluginID, str] = PluginID.LLAMA_CPP,
-                   device_id: Optional[str] = None
+                   device_id: Optional[str] = None,
+                   **kwargs
         ) -> 'TTS':
         """Load TTS model from local path, routing to appropriate implementation."""
         # Check plugin_id value for routing - handle both enum and string

nexaai/utils/manifest_utils.py CHANGED Viewed

@@ -157,12 +157,16 @@ def create_gguf_manifest(repo_id: str, files: List[str], directory_path: str, ol
                 # Use the new enum-based quantization extraction
                 quantization_type = extract_quantization_from_filename(current_file_name)
                 quant_level = quantization_type.value if quantization_type else "UNKNOWN"
-                model_files[quant_level] = {
-                    "Name": current_file_name,
-                    "Downloaded": True,
-                    "Size": file_size
-                }
+                # FIXME: hardcode to handle the multiple mmproj files problem
+                if quant_level == "UNKNOWN" and "mmproj" in current_file_name.lower():
+                    pass
+                else:
+                    model_files[quant_level] = {
+                        "Name": current_file_name,
+                        "Downloaded": True,
+                        "Size": file_size
+                    }
     # Determine PluginId with priority: kwargs > downloaded_manifest > model_file_type > default
     plugin_id = kwargs.get('plugin_id')

nexaai/utils/model_manager.py CHANGED Viewed

@@ -410,6 +410,20 @@ def _remove_specific_file(target_model: DownloadedModel, file_name: str, local_d
     except OSError:
         file_size = 0
+    # Check if we should remove entire folder instead (for .gguf files)
+    # If removing a .gguf file and no other non-mmproj .gguf files remain, remove entire folder
+    if file_name.endswith('.gguf'):
+        updated_files = [f for f in target_model.files if f != file_name]
+        # Find remaining .gguf files that don't contain "mmproj" in filename
+        remaining_non_mmproj_gguf = [
+            f for f in updated_files
+            if f.endswith('.gguf') and 'mmproj' not in f.lower()
+        ]
+        # If no non-mmproj .gguf files remain, remove entire repository
+        if len(remaining_non_mmproj_gguf) == 0:
+            return _remove_entire_repository(target_model, local_dir)
     # Remove the file
     try:
         os.remove(file_path)
@@ -846,6 +860,41 @@ class HuggingFaceDownloader:
                 pass
         return {}
+    def _download_manifest_if_needed(self, repo_id: str, local_dir: str) -> bool:
+        """
+        Download nexa.manifest from the repository if it doesn't exist locally.
+        Args:
+            repo_id: Repository ID
+            local_dir: Local directory where the manifest should be saved
+        Returns:
+            bool: True if manifest was downloaded or already exists, False if not found in repo
+        """
+        manifest_path = os.path.join(local_dir, 'nexa.manifest')
+        # Check if manifest already exists locally
+        if os.path.exists(manifest_path):
+            return True
+        # Try to download nexa.manifest from the repository
+        try:
+            print(f"[INFO] Attempting to download nexa.manifest from {repo_id}...")
+            self.api.hf_hub_download(
+                repo_id=repo_id,
+                filename='nexa.manifest',
+                local_dir=local_dir,
+                local_dir_use_symlinks=False,
+                token=self.token,
+                force_download=False
+            )
+            print(f"[OK] Successfully downloaded nexa.manifest from {repo_id}")
+            return True
+        except Exception as e:
+            # Manifest doesn't exist in repo or other error - this is fine, we'll create it
+            print(f"[INFO] nexa.manifest not found in {repo_id}, will create locally")
+            return False
     def _fetch_and_save_metadata(self, repo_id: str, local_dir: str, is_mmproj: bool = False, file_name: Optional[Union[str, List[str]]] = None, **kwargs) -> None:
         """Fetch model info and save metadata after successful download."""
         # Initialize metadata with defaults to ensure manifest is always created
@@ -946,6 +995,9 @@ class HuggingFaceDownloader:
             if progress_tracker:
                 progress_tracker.stop_tracking()
+            # Download nexa.manifest from repo if it doesn't exist locally
+            self._download_manifest_if_needed(repo_id, file_local_dir)
             # Save metadata after successful download
             self._fetch_and_save_metadata(repo_id, file_local_dir, self._current_is_mmproj, self._current_file_name, **kwargs)
@@ -1055,6 +1107,9 @@ class HuggingFaceDownloader:
             if progress_tracker:
                 progress_tracker.stop_tracking()
+            # Download nexa.manifest from repo if it doesn't exist locally
+            self._download_manifest_if_needed(repo_id, repo_local_dir)
             # Save metadata after successful download
             self._fetch_and_save_metadata(repo_id, repo_local_dir, self._current_is_mmproj, self._current_file_name, **kwargs)
@@ -1289,7 +1344,7 @@ def _download_model_if_needed(
     token: Union[bool, str, None] = None,
     is_mmproj: bool = False,
     **kwargs
-) -> str:
+) -> tuple[str, Optional[str], Optional[str]]:
     """
     Helper function to download a model from HuggingFace if it doesn't exist locally.
@@ -1300,15 +1355,78 @@ def _download_model_if_needed(
         token: HuggingFace authentication token for private repositories
     Returns:
-        str: Local path to the model (either existing or downloaded)
+        tuple[str, Optional[str], Optional[str]]: Tuple of (local_path, model_name, plugin_id)
+            - local_path: Local path to the model (either existing or downloaded)
+            - model_name: ModelName from nexa.manifest if available, None otherwise
+            - plugin_id: PluginId from nexa.manifest if available, None otherwise
     Raises:
         RuntimeError: If download fails
     """
+    # Helper function to extract model info from manifest
+    def _extract_info_from_manifest(path: str) -> tuple[Optional[str], Optional[str], Optional[dict]]:
+        """Extract ModelName, PluginId, and full manifest from nexa.manifest if it exists."""
+        # If path is a file, check its parent directory for manifest
+        if os.path.isfile(path):
+            manifest_dir = os.path.dirname(path)
+        else:
+            manifest_dir = path
+        manifest_path = os.path.join(manifest_dir, 'nexa.manifest')
+        if not os.path.exists(manifest_path):
+            return None, None, None
+        try:
+            with open(manifest_path, 'r', encoding='utf-8') as f:
+                manifest = json.load(f)
+                return manifest.get('ModelName'), manifest.get('PluginId'), manifest
+        except (json.JSONDecodeError, IOError):
+            return None, None, None
+    # Helper function to get a model file path from manifest
+    # Note: Tnis is for NPU only, because when downloading, it is a directory; when passing local path to inference, it needs to be a file.
+    def _get_model_file_from_manifest(manifest: dict, base_dir: str) -> Optional[str]:
+        """Extract a model file path from manifest's ModelFile section."""
+        if not manifest or 'ModelFile' not in manifest:
+            return None
+        model_files = manifest['ModelFile']
+        # Find the first valid model file (skip N/A entries and metadata files)
+        for key, file_info in model_files.items():
+            if key == 'N/A':
+                continue
+            if isinstance(file_info, dict) and 'Name' in file_info:
+                file_name = file_info['Name']
+                # Skip common non-model files
+                if file_name and not file_name.startswith('.') and file_name.endswith('.nexa'):
+                    file_path = os.path.join(base_dir, file_name)
+                    if os.path.exists(file_path):
+                        return file_path
+        # If no .nexa files found, try ExtraFiles for .nexa files
+        if 'ExtraFiles' in manifest:
+            for file_info in manifest['ExtraFiles']:
+                if isinstance(file_info, dict) and 'Name' in file_info:
+                    file_name = file_info['Name']
+                    if file_name and file_name.endswith('.nexa') and not file_name.startswith('.cache'):
+                        file_path = os.path.join(base_dir, file_name)
+                        if os.path.exists(file_path):
+                            return file_path
+        return None
     # Check if model_path exists locally (file or directory)
     if os.path.exists(model_path):
-        # Local path exists, return as-is
-        return model_path
+        # Local path exists, try to extract model info
+        model_name, plugin_id, manifest = _extract_info_from_manifest(model_path)
+        # If PluginId is "npu" and path is a directory, convert to file path
+        if plugin_id == "npu" and os.path.isdir(model_path):
+            model_file_path = _get_model_file_from_manifest(manifest, model_path)
+            if model_file_path:
+                model_path = model_file_path
+        return model_path, model_name, plugin_id
     # Model path doesn't exist locally, try to download from HuggingFace
     try:
@@ -1328,7 +1446,16 @@ def _download_model_if_needed(
             **kwargs
         )
-        return downloaded_path
+        # Extract model info from the downloaded manifest
+        model_name, plugin_id, manifest = _extract_info_from_manifest(downloaded_path)
+        # If PluginId is "npu" and path is a directory, convert to file path
+        if plugin_id == "npu" and os.path.isdir(downloaded_path):
+            model_file_path = _get_model_file_from_manifest(manifest, downloaded_path)
+            if model_file_path:
+                downloaded_path = model_file_path
+        return downloaded_path, model_name, plugin_id
     except Exception as e:
         # Only handle download-related errors
@@ -1397,7 +1524,7 @@ def auto_download_model(func: Callable) -> Callable:
         # Download name_or_path if needed
         if name_or_path is not None:
             try:
-                downloaded_name_path = _download_model_if_needed(
+                downloaded_name_path, model_name, plugin_id = _download_model_if_needed(
                     name_or_path, 'name_or_path', progress_callback, token, **kwargs
                 )
@@ -1408,6 +1535,10 @@ def auto_download_model(func: Callable) -> Callable:
                         args = tuple(args_list)
                 else:
                     kwargs['name_or_path'] = downloaded_name_path
+                # Add model_name to kwargs if it exists and not already set
+                if model_name is not None and 'model_name' not in kwargs:
+                    kwargs['model_name'] = model_name
             except Exception as e:
                 raise e  # Re-raise the error from _download_model_if_needed
@@ -1415,7 +1546,7 @@ def auto_download_model(func: Callable) -> Callable:
         # Download mmproj_path if needed
         if mmproj_path is not None:
             try:
-                downloaded_mmproj_path = _download_model_if_needed(
+                downloaded_mmproj_path, _, _ = _download_model_if_needed(
                     mmproj_path, 'mmproj_path', progress_callback, token, is_mmproj=True, **kwargs
                 )
@@ -1427,5 +1558,5 @@ def auto_download_model(func: Callable) -> Callable:
         # Call original function with updated paths (outside try-catch to let model creation errors bubble up)
         return func(*args, **kwargs)
     return wrapper

nexaai/vlm.py CHANGED Viewed

@@ -22,7 +22,8 @@ class VLM(BaseModel):
                    model_name: Optional[str] = None,
                    m_cfg: ModelConfig = ModelConfig(),
                    plugin_id: Union[PluginID, str] = PluginID.LLAMA_CPP,
-                   device_id: Optional[str] = None
+                   device_id: Optional[str] = None,
+                   **kwargs
         ) -> 'VLM':
         """Load VLM model from local path, routing to appropriate implementation.
@@ -99,7 +100,8 @@ class VLM(BaseModel):
     def apply_chat_template(
         self,
         messages: List[MultiModalMessage],
-        tools: Optional[List[Dict[str, Any]]] = None
+        tools: Optional[List[Dict[str, Any]]] = None,
+        enable_thinking: bool = True
     ) -> str:
         """Apply the chat template to multimodal messages."""
         pass

nexaai/vlm_impl/mlx_vlm_impl.py CHANGED Viewed

@@ -72,7 +72,8 @@ class MlxVlmImpl(VLM):
     def apply_chat_template(
         self,
         messages: List[MultiModalMessage],
-        tools: Optional[List[Dict[str, Any]]] = None
+        tools: Optional[List[Dict[str, Any]]] = None,
+        enable_thinking: bool = True
     ) -> str:
         """Apply the chat template to multimodal messages."""
         if not self._mlx_vlm:
@@ -116,7 +117,7 @@ class MlxVlmImpl(VLM):
                     num_images=total_images,
                     num_audios=total_audios,
                     tools=tools,
-                    enable_thinking=False  # Default to False, could be made configurable
+                    enable_thinking=enable_thinking
                 )
             else:
                 # Use regular apply_chat_template for text-only messages

nexaai/vlm_impl/pybind_vlm_impl.py CHANGED Viewed

@@ -8,6 +8,11 @@ from nexaai.binds import vlm_bind, common_bind
 from nexaai.runtime import _ensure_runtime
 from nexaai.vlm import VLM
 from nexaai.base import ProfilingData
+from nexaai.runtime_error import ContextLengthExceededError, GenerationError
+# Error codes from ml.h
+ML_SUCCESS = 0
+ML_ERROR_LLM_TOKENIZATION_CONTEXT_LENGTH = -200004
 class PyBindVLMImpl(VLM):
@@ -68,7 +73,7 @@ class PyBindVLMImpl(VLM):
         handle = vlm_bind.create_vlm(
             model_path=local_path,
             mmproj_path=mmproj_path,
-            # model_name=model_name, # TODO: enable model_name in pybind later
+            model_name=model_name,
             model_config=config,
             plugin_id=plugin_id_str,
             device_id=device_id
@@ -91,7 +96,8 @@ class PyBindVLMImpl(VLM):
     def apply_chat_template(
         self,
         messages: List[MultiModalMessage],
-        tools: Optional[List[Dict[str, Any]]] = None
+        tools: Optional[List[Dict[str, Any]]] = None,
+        enable_thinking: bool = True
     ) -> str:
         """Apply the chat template to multimodal messages."""
         payload = []
@@ -103,15 +109,14 @@ class PyBindVLMImpl(VLM):
                 t = c["type"]
                 if t == "text":
                     blocks.append({"type": "text", "text": c.get("text","") or ""})
-                elif t == "image":
-                    # Pass through the original structure - let vlm-bind.cpp handle field extraction
-                    blocks.append(c)
                 else:
-                    raise ValueError(f"Unsupported content type: {t}. Use 'text' or 'image' to match the golden reference in vlm.cpp")
+                    # Pass through the original structure for image, audio, and any other types
+                    # Let vlm-bind.cpp handle field extraction (text/url/path)
+                    blocks.append(c)
             payload.append({"role": role, "content": blocks})
-        result = vlm_bind.ml_vlm_apply_chat_template(self._handle, payload, tools)
+        result = vlm_bind.ml_vlm_apply_chat_template(self._handle, payload, tools, enable_thinking)
         return result
     def generate_stream(self, prompt: str, g_cfg: GenerationConfig = GenerationConfig()) -> Generator[str, None, None]:
@@ -143,6 +148,18 @@ class PyBindVLMImpl(VLM):
                     on_token=on_token,
                     user_data=None
                 )
+                # Check for errors in result
+                error_code = result.get("error_code", ML_SUCCESS)
+                if error_code != ML_SUCCESS:
+                    error_message = result.get("error_message", "Unknown error")
+                    if error_code == ML_ERROR_LLM_TOKENIZATION_CONTEXT_LENGTH:
+                        exception_container[0] = ContextLengthExceededError(error_message, error_code)
+                    else:
+                        exception_container[0] = GenerationError(error_message, error_code)
+                    token_queue.put(('end', None))
+                    return
                 self._profiling_data = ProfilingData.from_dict(result.get("profile_data", {}))
             except Exception as e:
                 exception_container[0] = e
@@ -186,6 +203,15 @@ class PyBindVLMImpl(VLM):
             user_data=None
         )
+        # Check for errors in result
+        error_code = result.get("error_code", ML_SUCCESS)
+        if error_code != ML_SUCCESS:
+            error_message = result.get("error_message", "Unknown error")
+            if error_code == ML_ERROR_LLM_TOKENIZATION_CONTEXT_LENGTH:
+                raise ContextLengthExceededError(error_message, error_code)
+            else:
+                raise GenerationError(error_message, error_code)
         self._profiling_data = ProfilingData.from_dict(result.get("profile_data", {}))
         return result.get("text", "")

{nexaai-1.0.20.dist-info → nexaai-1.0.21.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: nexaai
-Version: 1.0.20
+Version: 1.0.21
 Summary: Python bindings for NexaSDK C-lib backend
 Author-email: "Nexa AI, Inc." <dev@nexa.ai>
 Project-URL: Homepage, https://github.com/NexaAI/nexasdk-bridge
@@ -14,7 +14,6 @@ Requires-Python: >=3.7
 Description-Content-Type: text/markdown
 Requires-Dist: huggingface_hub
 Requires-Dist: tqdm
-Requires-Dist: hf_xet
 Requires-Dist: numpy
 Requires-Dist: httpx
 Provides-Extra: mlx

{nexaai-1.0.20.dist-info → nexaai-1.0.21.dist-info}/RECORD RENAMED Viewed

@@ -1,46 +1,49 @@
-nexaai/__init__.py,sha256=L8oB7GFZZMGnUpCg0PecDbI_ycKuQak-ZEJ4Y12_QIw,2184
-nexaai/_stub.cpython-310-darwin.so,sha256=gdqah1VJ1qyRogQe5O82w0KyOozQj89SVlY2A9fDp8k,49832
-nexaai/_version.py,sha256=v412uZvaY1JQ8H4RwZLyc9qL0riThUDmOS3PorCTMUk,139
-nexaai/asr.py,sha256=NljMXDErwPNMOPaRkJZMEDka9Nk8xyur7L8i924TStY,2054
+nexaai/__init__.py,sha256=gOd7sNsqEESopw_24xgnOSkIRENrk4Fa-RMtmVv62eA,2421
+nexaai/_stub.cpython-310-darwin.so,sha256=HbW9PeHihO3_JhDrG31qtXJ4Ru733LdQmr5EwkZKvM0,49832
+nexaai/_version.py,sha256=nWa8LYSocqThPKZF7GPMpRrb1TPnqOI4BR2IoL05toU,139
+nexaai/asr.py,sha256=wqtq71cxIMGE4KvOIYZebHdWik8dy4LyKrDI98PDvzQ,2294
 nexaai/base.py,sha256=N8PRgDFA-XPku2vWnQIofQ7ipz3pPlO6f8YZGnuhquE,982
-nexaai/common.py,sha256=Y0NJNLTi4Nq4x1WL6PQsSvGUto0eGmWhjpsC6jcekfA,3444
-nexaai/cv.py,sha256=RHCDo8gvBH8BkGZx7qVyp-OKxqi7E1GG9XzyaXehCNA,3273
-nexaai/embedder.py,sha256=Cw0tSHkPgd-RI62afCqQAcTHMnQhaI2CvfTMO-1JKOg,2452
-nexaai/image_gen.py,sha256=0C_5Tjj4BYmxLbmMmvwajp-yy2mmEEOKwBFnDQNPzx4,4356
-nexaai/llm.py,sha256=S1o_k2VQoF5w2wO25f142OO1R75TP89Ii69VZv8pIGo,3567
+nexaai/common.py,sha256=MRWZ6a7pnci_OUHxZRm3YqgKLAtZFD7b88STYDfeIF8,3460
+nexaai/cv.py,sha256=gpE3F__6bjh8OQKNJZs-QrBuCxqMj2eH-u6HR90vGZE,3302
+nexaai/embedder.py,sha256=lXOT16PEvd_hT23d77dZH38VHNOAk-3JvoOUdQTEaGI,2552
+nexaai/image_gen.py,sha256=MkGw1HXqqv8cJzbiGERNPKFXfq9vMOlvuq0pgekXw68,4385
+nexaai/llm.py,sha256=-agVJuj0FOaDvDiT-fFSOpoyVt-MpNudBucsod3Vp1M,3673
 nexaai/log.py,sha256=Kwo2CIfWN6iP4M4F5EUIV8KIO5hAsvz6HZAaOwJ27Og,2628
-nexaai/rerank.py,sha256=vWaBucoQ1wz-2iYnZqyFIcEjm-4Xcs1KDbFN5X8zzDQ,1872
+nexaai/rerank.py,sha256=rFKm1Y_ou__0lU82OTy4j_AYIGVBGfID0gzuZ6zXYsM,1968
 nexaai/runtime.py,sha256=JvllhlNPgYGLbgGyX2yNvmGzT0lZ5XbvTvEo8sZG_Ho,2067
-nexaai/tts.py,sha256=ZnBpWUxIfHhh7KfEjddtH7hHOTa91zg7ogGLakMIALo,2167
-nexaai/vlm.py,sha256=OCxwML-Z5uVGp3fjzJVtbCxfTLpgxkhQ8Wo6MVysoiw,4733
+nexaai/runtime_error.py,sha256=sO87LyCA0qzm0hVqBrmG2FDzGQH865EMbTMop2OfZto,779
+nexaai/tts.py,sha256=jvgDZIyo47NBDny6z74IQT2SDDVo7Mpp-QZwl6YxARU,2196
+nexaai/vlm.py,sha256=LUrd1_SGHOsYpWyUymX93oEIsNJv7XzHIHo4hBZOhQA,4800
 nexaai/asr_impl/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 nexaai/asr_impl/mlx_asr_impl.py,sha256=eosd8-TIWAOwV0HltmoFrLwzXHcU4jyxtncvuZE9pgA,3257
-nexaai/asr_impl/pybind_asr_impl.py,sha256=pE9Hb_hMi5yAc4MF83bLVOb8zDtreCkB3_u7XED9YpA,1516
-nexaai/binds/__init__.py,sha256=eYuay_8DDXeOUWz2_R9HFSabohxs6hvZn391t2L0Po0,104
+nexaai/asr_impl/pybind_asr_impl.py,sha256=FLOWIph37q_nIiNx8xYi-VnhQ6CrPuc4HFAJZQKc42w,4680
+nexaai/binds/__init__.py,sha256=2-Rr0NwyWygqwS8Xlxq0BJ2ltyID-WbGuzEYNlSanCI,155
+nexaai/binds/asr_bind.cpython-310-darwin.so,sha256=QmxLTY6qmHtbkdZlSyvdh7pVh0KP9j1ARtIWJDi_QMs,217096
 nexaai/binds/common_bind.cpython-310-darwin.so,sha256=BoXByRlNGDaNS1YyZyCF-s7h0vXP9NLPlJMQQ5pqusU,235488
-nexaai/binds/embedder_bind.cpython-310-darwin.so,sha256=b2NoXFAJvPLi_P1X7lXLKmAUU0v2HJI3Zwa10gfqHdw,202032
-nexaai/binds/libnexa_bridge.dylib,sha256=AEVIbaSMqE70UymaH9QT1t8CvgEuZ6RCL-w9wPfrjd0,271952
-nexaai/binds/llm_bind.cpython-310-darwin.so,sha256=p1ZTGMolEkWywkmwzOUjTr3RpSEH21BHZAggVzo89Ks,183088
-nexaai/binds/vlm_bind.cpython-310-darwin.so,sha256=LGd-tykePnQFfGca25HnPIBfXsfrMzbwyx6d5Ld3xps,183000
+nexaai/binds/embedder_bind.cpython-310-darwin.so,sha256=ZOJLzVvTUkbDdBBak1ylOmKx_bwHVzaPvha6RkoLpGo,202032
+nexaai/binds/libnexa_bridge.dylib,sha256=fQSsvrM4-9tWIedEkTpdHvFlFzeatI7q2Llswkrhl-4,290352
+nexaai/binds/llm_bind.cpython-310-darwin.so,sha256=O-HyjCya-GBZnaIb_GJSxk5kBJRCaQL6nKu_qBGEZ1w,183096
+nexaai/binds/rerank_bind.cpython-310-darwin.so,sha256=seJQ1ZpYVR_RCMmBvPSHnLj5LCHX33k5VUFadUkQsvI,200384
+nexaai/binds/vlm_bind.cpython-310-darwin.so,sha256=IXM3RTJx-rii3DNZCAVY6eKxn9C8TtAMP9i5bi8qA6s,199392
 nexaai/binds/cpu_gpu/libggml-base.dylib,sha256=YDclLDlP7XlDpXiKfTOTt6mW7jgXlmwSoT_VuRrGrmM,629528
 nexaai/binds/cpu_gpu/libggml-cpu.so,sha256=cnLUQ7WdX-5iiDaH8v45u1kX1NUmK8DanpzSMGCpXPE,1039800
 nexaai/binds/cpu_gpu/libggml-metal.so,sha256=Xhhl_tLg1xmCIQVrKjqPFaLHAlx_2wUFiwDyUk0wJ-E,713680
 nexaai/binds/cpu_gpu/libggml.dylib,sha256=12Q1Z98oM81hxzT_GMQsW5rlhC8DOMsX6luWVCFQHcI,58336
 nexaai/binds/cpu_gpu/libmtmd.dylib,sha256=4-KGS82gxwwIJBNHuZ88mzzTbNZ12tqsDD46-ey6sQ4,701504
 nexaai/binds/cpu_gpu/libnexa_cpu_gpu.dylib,sha256=9qrrMOlGWM9cWUORg64GfkE_p9aQ1rjIp_z-QVfIFH8,1982280
-nexaai/binds/cpu_gpu/libnexa_plugin.dylib,sha256=OaGyG0gi0PCsJ-2pP1WZJUrgo2D_NCGI845MFc6hQ7M,2043144
+nexaai/binds/cpu_gpu/libnexa_plugin.dylib,sha256=GiXEXNYePuJRaCtnJw1jrS2dtPcp90qr-IvnrL95dmU,2064152
 nexaai/cv_impl/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 nexaai/cv_impl/mlx_cv_impl.py,sha256=gKECQOv8iaWwG3bl7xeqVy2NN_9K7tYerIFzfn4eLo4,3228
 nexaai/cv_impl/pybind_cv_impl.py,sha256=uSmwBste4cT7c8DQmXzRLmzwDf773PAbXNYWW1UzVls,1064
 nexaai/embedder_impl/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-nexaai/embedder_impl/mlx_embedder_impl.py,sha256=dTjOC1VJ9ypIgCvkK_jKNSWpswbg132rDcTzWcL5oFA,4482
-nexaai/embedder_impl/pybind_embedder_impl.py,sha256=9gsHuSbF64IZH9ugqv4-GTUuRpy-FJNUb0cww2QR3uA,3575
+nexaai/embedder_impl/mlx_embedder_impl.py,sha256=pFPraUAjm9EVvVbwIp1cjbtXUysF5pqxEcK2CAFvcDw,4639
+nexaai/embedder_impl/pybind_embedder_impl.py,sha256=lFpf0wI2d7kfO2GUyUuUS1U2L_PyZMJVGmAvF8EuQ0g,3653
 nexaai/image_gen_impl/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 nexaai/image_gen_impl/mlx_image_gen_impl.py,sha256=BuDkksvXyb4J02GsdnbGAmYckfUU0Eah6BimoMD3QqY,11219
 nexaai/image_gen_impl/pybind_image_gen_impl.py,sha256=ms34VYoD5AxZFG6cIG0QAJDjCtfphaZ1bHzKzey1xF8,3692
 nexaai/llm_impl/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-nexaai/llm_impl/mlx_llm_impl.py,sha256=4v7jUFzHfE7zw2uViekGQDaTROz8A6oaW31Z3iVe6tg,11157
-nexaai/llm_impl/pybind_llm_impl.py,sha256=aooqkcXZWhCo07wbSafGgBrA3WnijtnUADShjjgFsBQ,8051
+nexaai/llm_impl/mlx_llm_impl.py,sha256=dPtaEribluHZZY_f9M114glcQhtDEckukw4Sfd5zJos,11296
+nexaai/llm_impl/pybind_llm_impl.py,sha256=XXnUuRZMr9rrEL1vM6VTwsgs0KQnKn4C3TyrHE46uw8,8139
 nexaai/mlx_backend/ml.py,sha256=DKXVOAfh8cg7KTKljh7jpcPwfQFNigc6uv_ZXF6lse8,23977
 nexaai/mlx_backend/profiling.py,sha256=Dc-mybFwBdCIKFWL7CbSHjkOJGAoYHG7r_e_XPhzwBU,9361
 nexaai/mlx_backend/asr/__init__.py,sha256=fuT_9_xpYJ28m4yjly5L2jChUrzlSQz-b_S7nujxkSM,451
@@ -248,7 +251,7 @@ nexaai/mlx_backend/vlm/__init__.py,sha256=_25kvMEviX16Hg3bro8Ws70V0eeIEqYKV8ZDXq
 nexaai/mlx_backend/vlm/generate.py,sha256=DqHFEAuqk-nko8ho6U9GAXTDAWz4d8GTe_hCt-XFyCw,19071
 nexaai/mlx_backend/vlm/generate_qwen3_vl.py,sha256=srN8-RFv8eOeH2rdyygCJ7Yt7kW7MQzS3i50UHBVfIM,13151
 nexaai/mlx_backend/vlm/generate_qwen3_vl_moe.py,sha256=ZSbM8JjTlkxUaVO9UNZM6YSbd60am3Z4ztJJEBsnJHg,9015
-nexaai/mlx_backend/vlm/interface.py,sha256=_rnqaIkvy3OUsH2b08l623oKjoe_la0G2W9iusD5qwI,22741
+nexaai/mlx_backend/vlm/interface.py,sha256=D6TCUWbiGLkgmAk_b9yMb36Y4TLGT9gFPxnTaDSaCSM,23070
 nexaai/mlx_backend/vlm/main.py,sha256=8bmSTtyebp8eyL2jL36DZbNHapOpFXNmjM2NyzCFqGs,12919
 nexaai/mlx_backend/vlm/modeling/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 nexaai/mlx_backend/vlm/modeling/convert.py,sha256=ia5i9cgTufFGmKyhkYUaW0nfNqT_bMo8i-Hg_zy5JC4,1863
@@ -384,21 +387,21 @@ nexaai/mlx_backend/vlm/modeling/trainer/lora.py,sha256=tGjvenjEQ8_1Az8Nz3smz5Mgv
 nexaai/mlx_backend/vlm/modeling/trainer/trainer.py,sha256=h16SaHt76JzFruXuidgXDx7_2evx4L0SecvzqLmhyZw,9081
 nexaai/mlx_backend/vlm/modeling/trainer/utils.py,sha256=29oHf_7946YeJKP_-Dt-NPeN4xJq8Fj7Yv4jZKO9RWA,4909
 nexaai/rerank_impl/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-nexaai/rerank_impl/mlx_rerank_impl.py,sha256=h37PKSIRBY8mwzVeLeP4ix9ui3waIsg4gorzelYLJbM,3243
-nexaai/rerank_impl/pybind_rerank_impl.py,sha256=CtwkG7YrW58GPMDERJSnISGTVCXWNju5__R2W837t7c,1513
+nexaai/rerank_impl/mlx_rerank_impl.py,sha256=3nbqCdzyAugc4P_6K9mowEgy4LFdfzhy7GUvn9GMpSE,3377
+nexaai/rerank_impl/pybind_rerank_impl.py,sha256=tmzrpRYCCV3ATxbE9G1Io6SUtgYPO8BFe48nTae6_xw,4490
 nexaai/tts_impl/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 nexaai/tts_impl/mlx_tts_impl.py,sha256=i_uNPdvlXYtL3e01oKjDlP9jgkWCRt1bBHsExaaiJi8,3101
 nexaai/tts_impl/pybind_tts_impl.py,sha256=mpn44r6pfYLIl-NrEy2dXHjGtWtNCmM7HRyxiANxUI4,1444
 nexaai/utils/decode.py,sha256=61n4Zf6c5QLyqGoctEitlI9BX3tPlP2a5aaKNHbw3T4,404
-nexaai/utils/manifest_utils.py,sha256=SCcFN09xNI0DiTA1U7DZwWiQsRH0CInWSny_9q0BwNM,21273
-nexaai/utils/model_manager.py,sha256=NnbPv1iuwo6T523gLsWjnff-gGvPGUjez-rFg8-ffpE,59568
+nexaai/utils/manifest_utils.py,sha256=OOp_BmFWH1ZHMYkS2VGAby5Rpm4f4GLCRBJEBYm-kys,21489
+nexaai/utils/model_manager.py,sha256=OnL87zCPn3cBcScCKo-bHnBUpr24-Po293QC6Bwgx1Q,66112
 nexaai/utils/model_types.py,sha256=ONWjjo8CFPdhxki6qo7MXnSZaEzjBcxa_Kkf_y5NXus,1483
 nexaai/utils/progress_tracker.py,sha256=jdUqtmPqyhwC9uSKvQcJEYETwSt-OhP4oitdJ94614o,15394
 nexaai/utils/quantization_utils.py,sha256=FYcNSAKGlBqFDUTx3jSKOr2lnq4nyiyC0ZG8oSxFwiU,7825
 nexaai/vlm_impl/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-nexaai/vlm_impl/mlx_vlm_impl.py,sha256=pLtWm_ckz8a0U-AtAOMVseFDO4OVPvHyYO2KlfBaGYk,10833
-nexaai/vlm_impl/pybind_vlm_impl.py,sha256=FAbhpRJzHgI78r0mUvKybO97R1szvNhH0aTn_I52oT4,8597
-nexaai-1.0.20.dist-info/METADATA,sha256=nrAx2CD06ULRSTnz5K7wGVAv8CELbeE_aZHn3R9iLdo,1206
-nexaai-1.0.20.dist-info/WHEEL,sha256=0KYp5feZ1CMUhsfFXKpSQTbSmQbXy4mv6yPPVBXg2EM,110
-nexaai-1.0.20.dist-info/top_level.txt,sha256=LRE2YERlrZk2vfuygnSzsEeqSknnZbz3Z1MHyNmBU4w,7
-nexaai-1.0.20.dist-info/RECORD,,
+nexaai/vlm_impl/mlx_vlm_impl.py,sha256=sgHqnX5OCSGLccCnTuRiktIbqThNn3AAIvYE2_Dy4TI,10833
+nexaai/vlm_impl/pybind_vlm_impl.py,sha256=stJKHdhYhBuWUQkky-nHgCv625qDB_1geI3v5BLNGpM,9765
+nexaai-1.0.21.dist-info/METADATA,sha256=WTZ4KM_6xJlrJ-NOaDoQEEPwEidaxuot5bocvRHKB0k,1184
+nexaai-1.0.21.dist-info/WHEEL,sha256=0KYp5feZ1CMUhsfFXKpSQTbSmQbXy4mv6yPPVBXg2EM,110
+nexaai-1.0.21.dist-info/top_level.txt,sha256=LRE2YERlrZk2vfuygnSzsEeqSknnZbz3Z1MHyNmBU4w,7
+nexaai-1.0.21.dist-info/RECORD,,

{nexaai-1.0.20.dist-info → nexaai-1.0.21.dist-info}/WHEEL RENAMED Viewed

File without changes

{nexaai-1.0.20.dist-info → nexaai-1.0.21.dist-info}/top_level.txt RENAMED Viewed

File without changes