PyPI - nexaai - Versions diffs - 1.0.4rc15__cp310-cp310-macosx_14_0_universal2.whl → 1.0.5__cp310-cp310-macosx_14_0_universal2.whl - Mend

nexaai 1.0.4rc15__cp310-cp310-macosx_14_0_universal2.whl → 1.0.5__cp310-cp310-macosx_14_0_universal2.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of nexaai might be problematic. Click here for more details.

Files changed (40) hide show

nexaai/__init__.py +6 -1
nexaai/_stub.cpython-310-darwin.so +0 -0
nexaai/_version.py +1 -1
nexaai/asr.py +7 -3
nexaai/asr_impl/mlx_asr_impl.py +3 -2
nexaai/asr_impl/pybind_asr_impl.py +3 -2
nexaai/binds/libcrypto.dylib +0 -0
nexaai/binds/libnexa_bridge.dylib +0 -0
nexaai/binds/libssl.dylib +0 -0
nexaai/binds/llm_bind.cpython-310-darwin.so +0 -0
nexaai/binds/nexa_llama_cpp/libggml-base.dylib +0 -0
nexaai/binds/nexa_llama_cpp/libnexa_plugin.dylib +0 -0
nexaai/binds/nexa_mlx/libnexa_plugin.dylib +0 -0
nexaai/common.py +49 -7
nexaai/cv.py +7 -3
nexaai/cv_impl/mlx_cv_impl.py +3 -2
nexaai/cv_impl/pybind_cv_impl.py +3 -2
nexaai/embedder.py +7 -3
nexaai/embedder_impl/mlx_embedder_impl.py +3 -2
nexaai/embedder_impl/pybind_embedder_impl.py +6 -3
nexaai/image_gen.py +6 -2
nexaai/image_gen_impl/mlx_image_gen_impl.py +3 -2
nexaai/image_gen_impl/pybind_image_gen_impl.py +3 -2
nexaai/llm.py +13 -6
nexaai/llm_impl/mlx_llm_impl.py +26 -6
nexaai/llm_impl/pybind_llm_impl.py +17 -6
nexaai/mlx_backend/llm/interface.py +12 -12
nexaai/rerank.py +7 -3
nexaai/rerank_impl/mlx_rerank_impl.py +3 -2
nexaai/rerank_impl/pybind_rerank_impl.py +3 -2
nexaai/tts.py +7 -3
nexaai/tts_impl/mlx_tts_impl.py +3 -2
nexaai/tts_impl/pybind_tts_impl.py +3 -2
nexaai/vlm.py +11 -4
nexaai/vlm_impl/mlx_vlm_impl.py +10 -3
nexaai/vlm_impl/pybind_vlm_impl.py +15 -4
{nexaai-1.0.4rc15.dist-info → nexaai-1.0.5.dist-info}/METADATA +13 -9
{nexaai-1.0.4rc15.dist-info → nexaai-1.0.5.dist-info}/RECORD +40 -38
{nexaai-1.0.4rc15.dist-info → nexaai-1.0.5.dist-info}/WHEEL +0 -0
{nexaai-1.0.4rc15.dist-info → nexaai-1.0.5.dist-info}/top_level.txt +0 -0

nexaai/__init__.py CHANGED Viewed

@@ -19,7 +19,10 @@ except ImportError:
     __version__ = "0.0.1"
 # Import common configuration classes first (no external dependencies)
-from .common import ModelConfig, GenerationConfig, ChatMessage, SamplerConfig
+from .common import ModelConfig, GenerationConfig, ChatMessage, SamplerConfig, PluginID
+# Create alias for PluginID to be accessible as plugin_id
+plugin_id = PluginID
 # Import new feature classes (no external dependencies in base classes)
 from .llm import LLM
@@ -40,6 +43,8 @@ __all__ = [
     "ChatMessage",
     "SamplerConfig",
     "EmbeddingConfig",
+    "PluginID",
+    "plugin_id",
     "LLM",
     "Embedder",

nexaai/_stub.cpython-310-darwin.so CHANGED Viewed

Binary file

nexaai/_version.py CHANGED Viewed

@@ -1,4 +1,4 @@
 # This file is generated by CMake from _version.py.in
 # Do not modify this file manually - it will be overwritten
-__version__ = "1.0.4-rc15"
+__version__ = "1.0.5"

nexaai/asr.py CHANGED Viewed

@@ -1,8 +1,9 @@
-from typing import List, Optional, Sequence, Tuple
+from typing import List, Optional, Sequence, Tuple, Union
 from abc import abstractmethod
 from dataclasses import dataclass
 from nexaai.base import BaseModel
+from nexaai.common import PluginID
 @dataclass
@@ -33,11 +34,14 @@ class ASR(BaseModel):
                    model_path: str,
                    tokenizer_path: Optional[str] = None,
                    language: Optional[str] = None,
-                   plugin_id: str = "llama_cpp",
+                   plugin_id: Union[PluginID, str] = PluginID.LLAMA_CPP,
                    device_id: Optional[str] = None
         ) -> 'ASR':
         """Load ASR model from local path, routing to appropriate implementation."""
-        if plugin_id == "mlx":
+        # Check plugin_id value for routing - handle both enum and string
+        plugin_value = plugin_id.value if isinstance(plugin_id, PluginID) else plugin_id
+        if plugin_value == "mlx":
             from nexaai.asr_impl.mlx_asr_impl import MLXASRImpl
             return MLXASRImpl._load_from(model_path, tokenizer_path, language, plugin_id, device_id)
         else:

nexaai/asr_impl/mlx_asr_impl.py CHANGED Viewed

@@ -1,7 +1,8 @@
 # Note: This code is generated by Cursor, not tested yet.
-from typing import List, Optional
+from typing import List, Optional, Union
+from nexaai.common import PluginID
 from nexaai.asr import ASR, ASRConfig, ASRResult
 from nexaai.mlx_backend.asr.interface import MlxAsr as MLXASRInterface
 from nexaai.mlx_backend.ml import ModelConfig as MLXModelConfig, SamplerConfig as MLXSamplerConfig, GenerationConfig as MLXGenerationConfig, EmbeddingConfig
@@ -18,7 +19,7 @@ class MLXASRImpl(ASR):
                    model_path: str,
                    tokenizer_path: Optional[str] = None,
                    language: Optional[str] = None,
-                   plugin_id: str = "mlx",
+                   plugin_id: Union[PluginID, str] = PluginID.MLX,
                    device_id: Optional[str] = None
         ) -> 'MLXASRImpl':
         """Load ASR model from local path using MLX backend."""

nexaai/asr_impl/pybind_asr_impl.py CHANGED Viewed

@@ -1,5 +1,6 @@
-from typing import List, Optional
+from typing import List, Optional, Union
+from nexaai.common import PluginID
 from nexaai.asr import ASR, ASRConfig, ASRResult
@@ -14,7 +15,7 @@ class PyBindASRImpl(ASR):
                    model_path: str,
                    tokenizer_path: Optional[str] = None,
                    language: Optional[str] = None,
-                   plugin_id: str = "llama_cpp",
+                   plugin_id: Union[PluginID, str] = PluginID.LLAMA_CPP,
                    device_id: Optional[str] = None
         ) -> 'PyBindASRImpl':
         """Load ASR model from local path using PyBind backend."""

nexaai/binds/libcrypto.dylib ADDED Viewed

Binary file

nexaai/binds/libnexa_bridge.dylib CHANGED Viewed

Binary file

nexaai/binds/libssl.dylib ADDED Viewed

Binary file

nexaai/binds/llm_bind.cpython-310-darwin.so CHANGED Viewed

Binary file

nexaai/binds/nexa_llama_cpp/libggml-base.dylib CHANGED Viewed

Binary file

nexaai/binds/nexa_llama_cpp/libnexa_plugin.dylib CHANGED Viewed

Binary file

nexaai/binds/nexa_mlx/libnexa_plugin.dylib CHANGED Viewed

Binary file

nexaai/common.py CHANGED Viewed

@@ -1,5 +1,12 @@
 from dataclasses import dataclass
 from typing import TypedDict, Literal, Optional, List
+from enum import Enum
+class PluginID(str, Enum):
+    """Enum for plugin identifiers."""
+    MLX = "mlx"
+    LLAMA_CPP = "llama_cpp"
 class ChatMessage(TypedDict):
@@ -52,10 +59,45 @@ class ModelConfig:
 @dataclass(frozen=True) # Read-only
 class ProfilingData:
-    start_time: int
-    end_time: int
-    prompt_start_time: int = None
-    prompt_end_time: int = None
-    decode_start_time: int = None
-    decode_ent_time: int = None
-    first_token_time: int = None
+    """Profiling data structure for LLM/VLM performance metrics."""
+    ttft: int = 0             # Time to first token (us)
+    prompt_time: int = 0      # Prompt processing time (us)
+    decode_time: int = 0      # Token generation time (us)
+    prompt_tokens: int = 0    # Number of prompt tokens
+    generated_tokens: int = 0  # Number of generated tokens
+    audio_duration: int = 0   # Audio duration (us)
+    prefill_speed: float = 0.0  # Prefill speed (tokens/sec)
+    decoding_speed: float = 0.0 # Decoding speed (tokens/sec)
+    real_time_factor: float = 0.0 # Real-Time Factor (RTF)
+    stop_reason: str = ""     # Stop reason: "eos", "length", "user", "stop_sequence"
+    @classmethod
+    def from_dict(cls, data: dict) -> "ProfilingData":
+        """Create ProfilingData from dictionary."""
+        return cls(
+            ttft=data.get("ttft", 0),
+            prompt_time=data.get("prompt_time", 0),
+            decode_time=data.get("decode_time", 0),
+            prompt_tokens=data.get("prompt_tokens", 0),
+            generated_tokens=data.get("generated_tokens", 0),
+            audio_duration=data.get("audio_duration", 0),
+            prefill_speed=data.get("prefill_speed", 0.0),
+            decoding_speed=data.get("decoding_speed", 0.0),
+            real_time_factor=data.get("real_time_factor", 0.0),
+            stop_reason=data.get("stop_reason", "")
+        )
+    def to_dict(self) -> dict:
+        """Convert to dictionary."""
+        return {
+            "ttft": self.ttft,
+            "prompt_time": self.prompt_time,
+            "decode_time": self.decode_time,
+            "prompt_tokens": self.prompt_tokens,
+            "generated_tokens": self.generated_tokens,
+            "audio_duration": self.audio_duration,
+            "prefill_speed": self.prefill_speed,
+            "decoding_speed": self.decoding_speed,
+            "real_time_factor": self.real_time_factor,
+            "stop_reason": self.stop_reason
+        }

nexaai/cv.py CHANGED Viewed

@@ -1,8 +1,9 @@
-from typing import List, Optional
+from typing import List, Optional, Union
 from abc import abstractmethod
 from dataclasses import dataclass
 from nexaai.base import BaseModel
+from nexaai.common import PluginID
 @dataclass
@@ -71,11 +72,14 @@ class CVModel(BaseModel):
     def _load_from(cls,
                    _: str,  # TODO: remove this argument, this is a hack to make api design happy
                    config: CVModelConfig,
-                   plugin_id: str = "llama_cpp",
+                   plugin_id: Union[PluginID, str] = PluginID.LLAMA_CPP,
                    device_id: Optional[str] = None
         ) -> 'CVModel':
         """Load CV model from configuration, routing to appropriate implementation."""
-        if plugin_id == "mlx":
+        # Check plugin_id value for routing - handle both enum and string
+        plugin_value = plugin_id.value if isinstance(plugin_id, PluginID) else plugin_id
+        if plugin_value == "mlx":
             from nexaai.cv_impl.mlx_cv_impl import MLXCVImpl
             return MLXCVImpl._load_from(config, plugin_id, device_id)
         else:

nexaai/cv_impl/mlx_cv_impl.py CHANGED Viewed

@@ -1,8 +1,9 @@
 # Note: This code is generated by Cursor, not tested yet.
-from typing import Optional
+from typing import Optional, Union
 import os
+from nexaai.common import PluginID
 from nexaai.cv import CVModel, CVModelConfig, CVResults
 from nexaai.mlx_backend.cv.interface import CVModel as MLXCVInterface, create_cv_model
@@ -16,7 +17,7 @@ class MLXCVImpl(CVModel):
     @classmethod
     def _load_from(cls,
                    config: CVModelConfig,
-                   plugin_id: str = "mlx",
+                   plugin_id: Union[PluginID, str] = PluginID.MLX,
                    device_id: Optional[str] = None
         ) -> 'MLXCVImpl':
         """Load CV model from configuration using MLX backend."""

nexaai/cv_impl/pybind_cv_impl.py CHANGED Viewed

@@ -1,5 +1,6 @@
-from typing import Optional
+from typing import Optional, Union
+from nexaai.common import PluginID
 from nexaai.cv import CVModel, CVModelConfig, CVResults
@@ -12,7 +13,7 @@ class PyBindCVImpl(CVModel):
     @classmethod
     def _load_from(cls,
                    config: CVModelConfig,
-                   plugin_id: str = "llama_cpp",
+                   plugin_id: Union[PluginID, str] = PluginID.LLAMA_CPP,
                    device_id: Optional[str] = None
         ) -> 'PyBindCVImpl':
         """Load CV model from configuration using PyBind backend."""

nexaai/embedder.py CHANGED Viewed

@@ -4,6 +4,7 @@ from abc import abstractmethod
 import numpy as np
 from nexaai.base import BaseModel
+from nexaai.common import PluginID
 @dataclass
@@ -21,19 +22,22 @@ class Embedder(BaseModel):
         pass
     @classmethod
-    def _load_from(cls, model_path: str, tokenizer_file: str = "tokenizer.json", plugin_id: str = "llama_cpp"):
+    def _load_from(cls, model_path: str, tokenizer_file: str = "tokenizer.json", plugin_id: Union[PluginID, str] = PluginID.LLAMA_CPP):
         """
         Load an embedder from model files, routing to appropriate implementation.
         Args:
             model_path: Path to the model file
             tokenizer_file: Path to the tokenizer file (default: "tokenizer.json")
-            plugin_id: Plugin ID to use for the model (default: "llama_cpp")
+            plugin_id: Plugin ID to use for the model (default: PluginID.LLAMA_CPP)
         Returns:
             Embedder instance
         """
-        if plugin_id == "mlx":
+        # Check plugin_id value for routing - handle both enum and string
+        plugin_value = plugin_id.value if isinstance(plugin_id, PluginID) else plugin_id
+        if plugin_value == "mlx":
             from nexaai.embedder_impl.mlx_embedder_impl import MLXEmbedderImpl
             return MLXEmbedderImpl._load_from(model_path, tokenizer_file, plugin_id)
         else:

nexaai/embedder_impl/mlx_embedder_impl.py CHANGED Viewed

@@ -1,6 +1,7 @@
 from typing import List, Union
 import numpy as np
+from nexaai.common import PluginID
 from nexaai.embedder import Embedder, EmbeddingConfig
 from nexaai.mlx_backend.embedding.interface import Embedder as MLXEmbedderInterface
 from nexaai.mlx_backend.ml import ModelConfig as MLXModelConfig, SamplerConfig as MLXSamplerConfig, GenerationConfig as MLXGenerationConfig, EmbeddingConfig
@@ -13,14 +14,14 @@ class MLXEmbedderImpl(Embedder):
         self._mlx_embedder = None
     @classmethod
-    def _load_from(cls, model_path: str, tokenizer_file: str = "tokenizer.json", plugin_id: str = "mlx"):
+    def _load_from(cls, model_path: str, tokenizer_file: str = "tokenizer.json", plugin_id: Union[PluginID, str] = PluginID.MLX):
         """
         Load an embedder from model files using MLX backend.
         Args:
             model_path: Path to the model file
             tokenizer_file: Path to the tokenizer file (default: "tokenizer.json")
-            plugin_id: Plugin ID to use for the model (default: "mlx")
+            plugin_id: Plugin ID to use for the model (default: PluginID.MLX)
         Returns:
             MLXEmbedderImpl instance

nexaai/embedder_impl/pybind_embedder_impl.py CHANGED Viewed

@@ -1,6 +1,7 @@
 from typing import List, Union
 import numpy as np
+from nexaai.common import PluginID
 from nexaai.embedder import Embedder, EmbeddingConfig
 from nexaai.binds import embedder_bind
 from nexaai.runtime import _ensure_runtime
@@ -15,20 +16,22 @@ class PyBindEmbedderImpl(Embedder):
         self._handle = _handle_ptr
     @classmethod
-    def _load_from(cls, model_path: str, tokenizer_file: str = "tokenizer.json", plugin_id: str = "llama_cpp"):
+    def _load_from(cls, model_path: str, tokenizer_file: str = "tokenizer.json", plugin_id: Union[PluginID, str] = PluginID.LLAMA_CPP):
         """
         Load an embedder from model files
         Args:
             model_path: Path to the model file
             tokenizer_file: Path to the tokenizer file (default: "tokenizer.json")
-            plugin_id: Plugin ID to use for the model (default: "llama_cpp")
+            plugin_id: Plugin ID to use for the model (default: PluginID.LLAMA_CPP)
         Returns:
             PyBindEmbedderImpl instance
         """
         _ensure_runtime()
-        handle = embedder_bind.ml_embedder_create(model_path, tokenizer_file, plugin_id)
+        # Convert enum to string for C++ binding
+        plugin_id_str = plugin_id.value if isinstance(plugin_id, PluginID) else plugin_id
+        handle = embedder_bind.ml_embedder_create(model_path, tokenizer_file, plugin_id_str)
         return cls(handle)
     def eject(self):

nexaai/image_gen.py CHANGED Viewed

@@ -3,6 +3,7 @@ from abc import abstractmethod
 from dataclasses import dataclass
 from nexaai.base import BaseModel
+from nexaai.common import PluginID
 @dataclass
@@ -67,13 +68,16 @@ class ImageGen(BaseModel):
     def _load_from(cls,
                    model_path: str,
                    scheduler_config_path: str = "",
-                   plugin_id: str = "llama_cpp",
+                   plugin_id: Union[PluginID, str] = PluginID.LLAMA_CPP,
                    device_id: Optional[str] = None,
                    float16: bool = True,
                    quantize: bool = False
         ) -> 'ImageGen':
         """Load image generation model from local path, routing to appropriate implementation."""
-        if plugin_id == "mlx":
+        # Check plugin_id value for routing - handle both enum and string
+        plugin_value = plugin_id.value if isinstance(plugin_id, PluginID) else plugin_id
+        if plugin_value == "mlx":
             from nexaai.image_gen_impl.mlx_image_gen_impl import MLXImageGenImpl
             return MLXImageGenImpl._load_from(model_path, scheduler_config_path, plugin_id, device_id, float16, quantize)
         else:

nexaai/image_gen_impl/mlx_image_gen_impl.py CHANGED Viewed

@@ -1,8 +1,9 @@
 # Note: This code is generated by Cursor, not tested yet.
-from typing import List, Optional
+from typing import List, Optional, Union
 import os
+from nexaai.common import PluginID
 from nexaai.image_gen import ImageGen, ImageGenerationConfig, ImageSamplerConfig, SchedulerConfig, Image
 from nexaai.mlx_backend.sd.interface import ImageGen as MLXImageGenInterface
@@ -17,7 +18,7 @@ class MLXImageGenImpl(ImageGen):
     def _load_from(cls,
                    model_path: str,
                    scheduler_config_path: str = "",
-                   plugin_id: str = "mlx",
+                   plugin_id: Union[PluginID, str] = PluginID.MLX,
                    device_id: Optional[str] = None,
                    float16: bool = True,
                    quantize: bool = False

nexaai/image_gen_impl/pybind_image_gen_impl.py CHANGED Viewed

@@ -1,5 +1,6 @@
-from typing import List, Optional
+from typing import List, Optional, Union
+from nexaai.common import PluginID
 from nexaai.image_gen import ImageGen, ImageGenerationConfig, ImageSamplerConfig, SchedulerConfig, Image
@@ -13,7 +14,7 @@ class PyBindImageGenImpl(ImageGen):
     def _load_from(cls,
                    model_path: str,
                    scheduler_config_path: str = "",
-                   plugin_id: str = "llama_cpp",
+                   plugin_id: Union[PluginID, str] = PluginID.LLAMA_CPP,
                    device_id: Optional[str] = None,
                    float16: bool = True,
                    quantize: bool = False

nexaai/llm.py CHANGED Viewed

@@ -1,10 +1,10 @@
-from typing import Generator, Optional
+from typing import Generator, Optional, Union
 from abc import abstractmethod
 import queue
 import threading
-from nexaai.common import ModelConfig, GenerationConfig, ChatMessage
-from nexaai.base import BaseModel
+from nexaai.common import ModelConfig, GenerationConfig, ChatMessage, PluginID
+from nexaai.base import BaseModel, ProfilingData
 class LLM(BaseModel):
     def __init__(self, m_cfg: ModelConfig = ModelConfig()):
@@ -17,11 +17,14 @@ class LLM(BaseModel):
                    local_path: str,
                    tokenizer_path: Optional[str] = None,
                    m_cfg: ModelConfig = ModelConfig(),
-                   plugin_id: str = "llama_cpp",
+                   plugin_id: Union[PluginID, str] = PluginID.LLAMA_CPP,
                    device_id: Optional[str] = None
         ) -> 'LLM':
         """Load model from local path, routing to appropriate implementation."""
-        if plugin_id == "mlx":
+        # Check plugin_id value for routing - handle both enum and string
+        plugin_value = plugin_id.value if isinstance(plugin_id, PluginID) else plugin_id
+        if plugin_value == "mlx":
             from nexaai.llm_impl.mlx_llm_impl import MLXLLMImpl
             return MLXLLMImpl._load_from(local_path, tokenizer_path, m_cfg, plugin_id, device_id)
         else:
@@ -37,7 +40,7 @@ class LLM(BaseModel):
         self._cancel_event.clear()
     @abstractmethod
-    def apply_chat_template(self, messages: list[ChatMessage]) -> str:
+    def apply_chat_template(self, messages: list[ChatMessage], tools: Optional[str] = None, enable_thinking: bool = True, add_generation_prompt: bool = True) -> str:
         """Apply the chat template to messages."""
         pass
@@ -60,6 +63,10 @@ class LLM(BaseModel):
         """
         pass
+    def get_profiling_data(self) -> Optional[ProfilingData]:
+        """Get profiling data from the last generation."""
+        pass
     @abstractmethod
     def save_kv_cache(self, path: str):
         """

nexaai/llm_impl/mlx_llm_impl.py CHANGED Viewed

@@ -1,6 +1,7 @@
-from typing import Generator, Optional, Any
+from typing import Generator, Optional, Any, Sequence, Union
-from nexaai.common import ModelConfig, GenerationConfig, ChatMessage
+from nexaai.base import ProfilingData
+from nexaai.common import ModelConfig, GenerationConfig, ChatMessage, PluginID
 from nexaai.llm import LLM
 from nexaai.mlx_backend.llm.interface import LLM as MLXLLMInterface
 from nexaai.mlx_backend.ml import ModelConfig as MLXModelConfig, SamplerConfig as MLXSamplerConfig, GenerationConfig as MLXGenerationConfig, EmbeddingConfig
@@ -17,7 +18,7 @@ class MLXLLMImpl(LLM):
                    local_path: str,
                    tokenizer_path: Optional[str] = None,
                    m_cfg: ModelConfig = ModelConfig(),
-                   plugin_id: str = "mlx",
+                   plugin_id: Union[PluginID, str] = PluginID.MLX,
                    device_id: Optional[str] = None
         ) -> 'MLXLLMImpl':
         """Load model from local path using MLX backend."""
@@ -54,7 +55,13 @@ class MLXLLMImpl(LLM):
             self._mlx_llm.destroy()
             self._mlx_llm = None
-    def apply_chat_template(self, messages: list[ChatMessage]) -> str:
+    def apply_chat_template(
+        self,
+        messages: Sequence[ChatMessage],
+        tools: Optional[str] = None,
+        enable_thinking: bool = True,
+        add_generation_prompt: bool = True
+    ) -> str:
         """Apply the chat template to messages."""
         if not self._mlx_llm:
             raise RuntimeError("MLX LLM not loaded")
@@ -68,9 +75,16 @@ class MLXLLMImpl(LLM):
                     def __init__(self, role, content):
                         self.role = role
                         self.content = content
-                mlx_messages.append(MLXChatMessage(msg["role"], msg["content"]))
+                # Handle both dict-style and attribute-style access
+                if hasattr(msg, 'role') and hasattr(msg, 'content'):
+                    # Message is already an object with attributes
+                    mlx_messages.append(MLXChatMessage(msg.role, msg.content))
+                else:
+                    # Message is a dict
+                    mlx_messages.append(MLXChatMessage(msg["role"], msg["content"]))
-            return self._mlx_llm.apply_chat_template(mlx_messages)
+            return self._mlx_llm.apply_chat_template(mlx_messages, tools=tools, enable_thinking=enable_thinking, add_generation_prompt=add_generation_prompt)
         except Exception as e:
             raise RuntimeError(f"Failed to apply chat template: {str(e)}")
@@ -202,6 +216,12 @@ class MLXLLMImpl(LLM):
         except Exception as e:
             raise RuntimeError(f"Failed to generate text: {str(e)}")
+    def get_profiling_data(self) -> Optional[ProfilingData]:
+        """Get profiling data from the last generation."""
+        if not self._mlx_llm:
+            raise RuntimeError("MLX LLM not loaded")
+        return self._mlx_llm.get_profiling_data()
     def save_kv_cache(self, path: str):
         """
         Save the key-value cache to the file.

nexaai/llm_impl/pybind_llm_impl.py CHANGED Viewed

@@ -1,8 +1,9 @@
-from typing import Generator, Optional
+from typing import Generator, Optional, Union
 import queue
 import threading
-from nexaai.common import ModelConfig, GenerationConfig, ChatMessage
+from nexaai.base import ProfilingData
+from nexaai.common import ModelConfig, GenerationConfig, ChatMessage, PluginID
 from nexaai.binds import llm_bind, common_bind
 from nexaai.runtime import _ensure_runtime
 from nexaai.llm import LLM
@@ -13,13 +14,14 @@ class PyBindLLMImpl(LLM):
         """Private constructor, should not be called directly."""
         super().__init__(m_cfg)
         self._handle = handle  # This is a py::capsule
+        self._profiling_data = None
     @classmethod
     def _load_from(cls,
                    local_path: str,
                    tokenizer_path: Optional[str] = None,
                    m_cfg: ModelConfig = ModelConfig(),
-                   plugin_id: str = "llama_cpp",
+                   plugin_id: Union[PluginID, str] = PluginID.LLAMA_CPP,
                    device_id: Optional[str] = None
         ) -> 'PyBindLLMImpl':
         """Load model from local path."""
@@ -49,11 +51,13 @@ class PyBindLLMImpl(LLM):
             config.chat_template_content = m_cfg.chat_template_content
         # Create handle : returns py::capsule with automatic cleanup
+        # Convert enum to string for C++ binding
+        plugin_id_str = plugin_id.value if isinstance(plugin_id, PluginID) else plugin_id
         handle = llm_bind.ml_llm_create(
             model_path=local_path,
             tokenizer_path=tokenizer_path,
             model_config=config,
-            plugin_id=plugin_id,
+            plugin_id=plugin_id_str,
             device_id=device_id
         )
         return cls(handle, m_cfg)
@@ -64,7 +68,7 @@ class PyBindLLMImpl(LLM):
         del self._handle
         self._handle = None
-    def apply_chat_template(self, messages: list[ChatMessage]) -> str:
+    def apply_chat_template(self, messages: list[ChatMessage], tools: Optional[str] = None, enable_thinking: bool = True, add_generation_prompt: bool = True) -> str:
         """Apply the chat template to messages."""
         # Convert TypedDict to list of dicts for binding
         message_dicts = [
@@ -95,13 +99,14 @@ class PyBindLLMImpl(LLM):
         # Run generation in thread
         def generate():
             try:
-                llm_bind.ml_llm_generate(
+                result = llm_bind.ml_llm_generate(
                     handle=self._handle,
                     prompt=prompt,
                     config=config,
                     on_token=on_token,
                     user_data=None
                 )
+                self._profiling_data = ProfilingData.from_dict(result.get("profile_data", {}))
             except Exception as e:
                 exception_container[0] = e
             finally:
@@ -143,8 +148,14 @@ class PyBindLLMImpl(LLM):
             on_token=None,  # No callback for non-streaming
             user_data=None
         )
+        self._profiling_data = ProfilingData.from_dict(result.get("profile_data", {}))
         return result.get("text", "")
+    def get_profiling_data(self) -> Optional[ProfilingData]:
+        """Get profiling data."""
+        return self._profiling_data
     def save_kv_cache(self, path: str):
         """
         Save the key-value cache to the file.

nexaai/mlx_backend/llm/interface.py CHANGED Viewed

@@ -371,19 +371,19 @@ class LLM(BaseLLM, ProfilingMixin):
         cached_tokens = 0
         # Only offset prefix kv-cache at first round
-        if is_first_round:
+        # if is_first_round:
-            # Handle KV cache prefix offset if available
-            if self.kv_cache is not None and len(self.kv_cache) > 0:
-                # Get the offset from the first cache layer
-                if hasattr(self.kv_cache[0], 'offset'):
-                    cached_tokens = self.kv_cache[0].offset - 1
+        #     # Handle KV cache prefix offset if available
+        #     if self.kv_cache is not None and len(self.kv_cache) > 0:
+        #         # Get the offset from the first cache layer
+        #         if hasattr(self.kv_cache[0], 'offset'):
+        #             cached_tokens = self.kv_cache[0].offset - 1
-            # Process only the non-cached tokens
-            incremental_tokens = incremental_tokens[cached_tokens:] if cached_tokens > 0 else incremental_tokens
+        #     # Process only the non-cached tokens
+        #     incremental_tokens = incremental_tokens[cached_tokens:] if cached_tokens > 0 else incremental_tokens
-            if len(incremental_tokens) == 0:
-                raise ValueError("No tokens to process, KV cache is too long.")
+        #     if len(incremental_tokens) == 0:
+        #         raise ValueError("No tokens to process, KV cache is too long.")
         # Since apply_chat_template now returns incremental prompts, we can use the prompt directly
         # The prompt is already the incremental part based on global_n_past
@@ -467,7 +467,7 @@ class LLM(BaseLLM, ProfilingMixin):
         # We'll ignore the argument for now.
         return self.tokenizer.chat_template
-    def apply_chat_template(self, messages: Sequence[ChatMessage], tools: Optional[str] = None, enable_thinking: bool = True) -> str:
+    def apply_chat_template(self, messages: Sequence[ChatMessage], tools: Optional[str] = None, enable_thinking: bool = True, add_generation_prompt: bool = True) -> str:
         """
         Apply chat template to messages with incremental prompt support and optional tools.
@@ -526,7 +526,7 @@ class LLM(BaseLLM, ProfilingMixin):
                 incremental_messages,
                 tokenize=False,
                 enable_thinking=enable_thinking,
-                add_generation_prompt=True,
+                add_generation_prompt=add_generation_prompt,
                 tools=parsed_tools
             )
         except Exception as e:

nexaai/rerank.py CHANGED Viewed

@@ -1,8 +1,9 @@
-from typing import List, Optional, Sequence
+from typing import List, Optional, Sequence, Union
 from abc import abstractmethod
 from dataclasses import dataclass
 from nexaai.base import BaseModel
+from nexaai.common import PluginID
 @dataclass
@@ -24,11 +25,14 @@ class Reranker(BaseModel):
     def _load_from(cls,
                    model_path: str,
                    tokenizer_file: str = "tokenizer.json",
-                   plugin_id: str = "llama_cpp",
+                   plugin_id: Union[PluginID, str] = PluginID.LLAMA_CPP,
                    device_id: Optional[str] = None
         ) -> 'Reranker':
         """Load reranker model from local path, routing to appropriate implementation."""
-        if plugin_id == "mlx":
+        # Check plugin_id value for routing - handle both enum and string
+        plugin_value = plugin_id.value if isinstance(plugin_id, PluginID) else plugin_id
+        if plugin_value == "mlx":
             from nexaai.rerank_impl.mlx_rerank_impl import MLXRerankImpl
             return MLXRerankImpl._load_from(model_path, tokenizer_file, plugin_id, device_id)
         else:

nexaai/rerank_impl/mlx_rerank_impl.py CHANGED Viewed

@@ -1,8 +1,9 @@
 # Note: This code is generated by Cursor, not tested yet.
-from typing import List, Optional, Sequence
+from typing import List, Optional, Sequence, Union
 import os
+from nexaai.common import PluginID
 from nexaai.rerank import Reranker, RerankConfig
 from nexaai.mlx_backend.rerank.interface import Reranker as MLXRerankInterface, create_reranker
@@ -17,7 +18,7 @@ class MLXRerankImpl(Reranker):
     def _load_from(cls,
                    model_path: str,
                    tokenizer_file: str = "tokenizer.json",
-                   plugin_id: str = "mlx",
+                   plugin_id: Union[PluginID, str] = PluginID.MLX,
                    device_id: Optional[str] = None
         ) -> 'MLXRerankImpl':
         """Load reranker model from local path using MLX backend."""

nexaai/rerank_impl/pybind_rerank_impl.py CHANGED Viewed

@@ -1,5 +1,6 @@
-from typing import List, Optional, Sequence
+from typing import List, Optional, Sequence, Union
+from nexaai.common import PluginID
 from nexaai.rerank import Reranker, RerankConfig
@@ -13,7 +14,7 @@ class PyBindRerankImpl(Reranker):
     def _load_from(cls,
                    model_path: str,
                    tokenizer_file: str = "tokenizer.json",
-                   plugin_id: str = "llama_cpp",
+                   plugin_id: Union[PluginID, str] = PluginID.LLAMA_CPP,
                    device_id: Optional[str] = None
         ) -> 'PyBindRerankImpl':
         """Load reranker model from local path using PyBind backend."""

nexaai/tts.py CHANGED Viewed

@@ -1,8 +1,9 @@
-from typing import List, Optional
+from typing import List, Optional, Union
 from abc import abstractmethod
 from dataclasses import dataclass
 from nexaai.base import BaseModel
+from nexaai.common import PluginID
 @dataclass
@@ -43,11 +44,14 @@ class TTS(BaseModel):
     def _load_from(cls,
                    model_path: str,
                    vocoder_path: str,
-                   plugin_id: str = "llama_cpp",
+                   plugin_id: Union[PluginID, str] = PluginID.LLAMA_CPP,
                    device_id: Optional[str] = None
         ) -> 'TTS':
         """Load TTS model from local path, routing to appropriate implementation."""
-        if plugin_id == "mlx":
+        # Check plugin_id value for routing - handle both enum and string
+        plugin_value = plugin_id.value if isinstance(plugin_id, PluginID) else plugin_id
+        if plugin_value == "mlx":
             from nexaai.tts_impl.mlx_tts_impl import MLXTTSImpl
             return MLXTTSImpl._load_from(model_path, vocoder_path, plugin_id, device_id)
         else:

nexaai/tts_impl/mlx_tts_impl.py CHANGED Viewed

@@ -1,8 +1,9 @@
 # Note: This code is generated by Cursor, not tested yet.
-from typing import List, Optional
+from typing import List, Optional, Union
 import os
+from nexaai.common import PluginID
 from nexaai.tts import TTS, TTSConfig, TTSResult
 from nexaai.mlx_backend.tts.interface import MlxTts as MLXTTSInterface
@@ -17,7 +18,7 @@ class MLXTTSImpl(TTS):
     def _load_from(cls,
                    model_path: str,
                    vocoder_path: str,
-                   plugin_id: str = "mlx",
+                   plugin_id: Union[PluginID, str] = PluginID.MLX,
                    device_id: Optional[str] = None
         ) -> 'MLXTTSImpl':
         """Load TTS model from local path using MLX backend."""

nexaai/tts_impl/pybind_tts_impl.py CHANGED Viewed

@@ -1,5 +1,6 @@
-from typing import List, Optional
+from typing import List, Optional, Union
+from nexaai.common import PluginID
 from nexaai.tts import TTS, TTSConfig, TTSResult
@@ -13,7 +14,7 @@ class PyBindTTSImpl(TTS):
     def _load_from(cls,
                    model_path: str,
                    vocoder_path: str,
-                   plugin_id: str = "llama_cpp",
+                   plugin_id: Union[PluginID, str] = PluginID.LLAMA_CPP,
                    device_id: Optional[str] = None
         ) -> 'PyBindTTSImpl':
         """Load TTS model from local path using PyBind backend."""

nexaai/vlm.py CHANGED Viewed

@@ -5,8 +5,8 @@ import threading
 import base64
 from pathlib import Path
-from nexaai.common import ModelConfig, GenerationConfig, MultiModalMessage
-from nexaai.base import BaseModel
+from nexaai.common import ModelConfig, GenerationConfig, MultiModalMessage, PluginID
+from nexaai.base import BaseModel, ProfilingData
 class VLM(BaseModel):
@@ -20,7 +20,7 @@ class VLM(BaseModel):
                    local_path: str,
                    mmproj_path: str,
                    m_cfg: ModelConfig = ModelConfig(),
-                   plugin_id: str = "llama_cpp",
+                   plugin_id: Union[PluginID, str] = PluginID.LLAMA_CPP,
                    device_id: Optional[str] = None
         ) -> 'VLM':
         """Load VLM model from local path, routing to appropriate implementation.
@@ -35,7 +35,10 @@ class VLM(BaseModel):
         Returns:
             VLM instance
         """
-        if plugin_id == "mlx":
+        # Check plugin_id value for routing - handle both enum and string
+        plugin_value = plugin_id.value if isinstance(plugin_id, PluginID) else plugin_id
+        if plugin_value == "mlx":
             from nexaai.vlm_impl.mlx_vlm_impl import MlxVlmImpl
             return MlxVlmImpl._load_from(local_path, mmproj_path, m_cfg, plugin_id, device_id)
         else:
@@ -117,4 +120,8 @@ class VLM(BaseModel):
         Returns:
             str: The generated text.
         """
+        pass
+    def get_profiling_data(self) -> Optional[ProfilingData]:
+        """Get profiling data from the last generation."""
         pass

nexaai/vlm_impl/mlx_vlm_impl.py CHANGED Viewed

@@ -1,6 +1,7 @@
-from typing import Generator, Optional, List, Dict, Any
+from typing import Generator, Optional, List, Dict, Any, Union
-from nexaai.common import ModelConfig, GenerationConfig, MultiModalMessage
+from nexaai.base import ProfilingData
+from nexaai.common import ModelConfig, GenerationConfig, MultiModalMessage, PluginID
 from nexaai.vlm import VLM
 from nexaai.mlx_backend.vlm.interface import VLM as MLXVLMInterface
 from nexaai.mlx_backend.ml import ModelConfig as MLXModelConfig, SamplerConfig as MLXSamplerConfig, GenerationConfig as MLXGenerationConfig, EmbeddingConfig
@@ -17,7 +18,7 @@ class MlxVlmImpl(VLM):
                    local_path: str,
                    mmproj_path: str,
                    m_cfg: ModelConfig = ModelConfig(),
-                   plugin_id: str = "mlx",
+                   plugin_id: Union[PluginID, str] = PluginID.MLX,
                    device_id: Optional[str] = None
         ) -> 'MlxVlmImpl':
         """Load VLM model from local path using MLX backend.
@@ -247,3 +248,9 @@ class MlxVlmImpl(VLM):
         except Exception as e:
             raise RuntimeError(f"Failed to generate text: {str(e)}")
+    def get_profiling_data(self) -> Optional[ProfilingData]:
+        """Get profiling data from the last generation."""
+        if not self._mlx_vlm:
+            raise RuntimeError("MLX VLM not loaded")
+        return self._mlx_vlm.get_profiling_data()

nexaai/vlm_impl/pybind_vlm_impl.py CHANGED Viewed

@@ -4,10 +4,11 @@ import threading
 import base64
 from pathlib import Path
-from nexaai.common import ModelConfig, GenerationConfig, MultiModalMessage
+from nexaai.common import ModelConfig, GenerationConfig, MultiModalMessage, PluginID
 from nexaai.binds import vlm_bind, common_bind
 from nexaai.runtime import _ensure_runtime
 from nexaai.vlm import VLM
+from nexaai.base import ProfilingData
 class PyBindVLMImpl(VLM):
@@ -15,13 +16,14 @@ class PyBindVLMImpl(VLM):
         """Private constructor, should not be called directly."""
         super().__init__(m_cfg)
         self._handle = handle  # This is a py::capsule
+        self._profiling_data = None
     @classmethod
     def _load_from(cls,
                    local_path: str,
                    mmproj_path: str,
                    m_cfg: ModelConfig = ModelConfig(),
-                   plugin_id: str = "llama_cpp",
+                   plugin_id: Union[PluginID, str] = PluginID.LLAMA_CPP,
                    device_id: Optional[str] = None
         ) -> 'PyBindVLMImpl':
         """Load VLM model from local path.
@@ -61,11 +63,13 @@ class PyBindVLMImpl(VLM):
             config.chat_template_content = m_cfg.chat_template_content
         # Create handle : returns py::capsule with automatic cleanup
+        # Convert enum to string for C++ binding
+        plugin_id_str = plugin_id.value if isinstance(plugin_id, PluginID) else plugin_id
         handle = vlm_bind.create_vlm(
             model_path=local_path,
             mmproj_path=mmproj_path,
             model_config=config,
-            plugin_id=plugin_id,
+            plugin_id=plugin_id_str,
             device_id=device_id
         )
         return cls(handle, m_cfg)
@@ -141,13 +145,14 @@ class PyBindVLMImpl(VLM):
         # Run generation in thread
         def generate():
             try:
-                vlm_bind.ml_vlm_generate(
+                result = vlm_bind.ml_vlm_generate(
                     handle=self._handle,
                     prompt=prompt,
                     config=config,
                     on_token=on_token,
                     user_data=None
                 )
+                self._profiling_data = ProfilingData.from_dict(result.get("profile_data", {}))
             except Exception as e:
                 exception_container[0] = e
             finally:
@@ -189,8 +194,14 @@ class PyBindVLMImpl(VLM):
             on_token=None,  # No callback for non-streaming
             user_data=None
         )
+        self._profiling_data = ProfilingData.from_dict(result.get("profile_data", {}))
         return result.get("text", "")
+    def get_profiling_data(self) -> Optional[ProfilingData]:
+        """Get profiling data."""
+        return self._profiling_data
     def _convert_generation_config(self, g_cfg: GenerationConfig):
         """Convert GenerationConfig to binding format."""
         config = common_bind.GenerationConfig()

{nexaai-1.0.4rc15.dist-info → nexaai-1.0.5.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: nexaai
-Version: 1.0.4rc15
+Version: 1.0.5
 Summary: Python bindings for NexaSDK C-lib backend
 Author-email: "Nexa AI, Inc." <dev@nexa.ai>
 Project-URL: Homepage, https://github.com/NexaAI/nexasdk-bridge
@@ -17,11 +17,15 @@ Requires-Dist: tqdm
 Requires-Dist: hf_xet
 Requires-Dist: numpy
 Requires-Dist: httpx
-Requires-Dist: mlx
-Requires-Dist: mlx-lm
-Requires-Dist: scipy
-Requires-Dist: soundfile
-Requires-Dist: Pillow
-Requires-Dist: opencv-python
-Requires-Dist: shapely
-Requires-Dist: pyclipper
+Provides-Extra: mlx
+Requires-Dist: mlx; extra == "mlx"
+Requires-Dist: mlx-lm; extra == "mlx"
+Requires-Dist: mlx-vlm; extra == "mlx"
+Requires-Dist: tokenizers; extra == "mlx"
+Requires-Dist: safetensors; extra == "mlx"
+Requires-Dist: Pillow; extra == "mlx"
+Requires-Dist: scipy; extra == "mlx"
+Requires-Dist: soundfile; extra == "mlx"
+Requires-Dist: opencv-python; extra == "mlx"
+Requires-Dist: shapely; extra == "mlx"
+Requires-Dist: pyclipper; extra == "mlx"

{nexaai-1.0.4rc15.dist-info → nexaai-1.0.5.dist-info}/RECORD RENAMED Viewed

@@ -1,33 +1,35 @@
-nexaai/__init__.py,sha256=JTjJWdiBXHZyc_91Oe-GNOcODFp9gbUQM43bzNY7S8Q,1906
-nexaai/_stub.cpython-310-darwin.so,sha256=6cDmmUXwfQIBTowXBsoMjB7kqSQskigaWJAHEB8aaTw,66768
-nexaai/_version.py,sha256=UxLv07_TC8sCUMr5KTEXolBn9DNXJx2RUjkBcGewdXw,143
-nexaai/asr.py,sha256=Yg8Yml_nklzJYl3C_lwvEApTdNjY2czAurDaoEjkiIU,1813
+nexaai/__init__.py,sha256=jXdC4vv6DBK1fVewYTYSUhOOYfvf_Mk81UIeMGGIKUg,2029
+nexaai/_stub.cpython-310-darwin.so,sha256=z7e7YOjkyjyalKr6u9iLGZ5YHgdvh5d5pLTwGvCCmtM,66768
+nexaai/_version.py,sha256=j-3XhaOQERPf3uculltVeo9djhrsUyWzi1EY_j4wPEc,138
+nexaai/asr.py,sha256=NljMXDErwPNMOPaRkJZMEDka9Nk8xyur7L8i924TStY,2054
 nexaai/base.py,sha256=N8PRgDFA-XPku2vWnQIofQ7ipz3pPlO6f8YZGnuhquE,982
-nexaai/common.py,sha256=VPM7NaUNaLTT7quW-u4D2uOeNrQqPjvfcgJlYGS3Qy8,1525
-nexaai/cv.py,sha256=KOaiRouiQ-YFP8FL20QuiieJfHN7DzASEi5_0m6H-E0,3032
-nexaai/embedder.py,sha256=VheiZEYBuuBjhQcvLawCz26jX0I169Xk4b9VP-ERjqU,2211
-nexaai/image_gen.py,sha256=IhLQLpmPkK9KcHteUdaQdxrnTIjk6xdyekRqeJtHfWw,4122
-nexaai/llm.py,sha256=egHa6YafNWyZy5qrmZRNZlFHO8LRUejc_gkOpK0nbnw,3105
-nexaai/rerank.py,sha256=7EEm96gpvd6kXO_Q8xSrQDlLZdAYTk0MODeNWDq70WA,1631
+nexaai/common.py,sha256=yBnIbqYaQYnfrl7IczOBh6MDibYZVxwaRJEglYcKgGs,3422
+nexaai/cv.py,sha256=RHCDo8gvBH8BkGZx7qVyp-OKxqi7E1GG9XzyaXehCNA,3273
+nexaai/embedder.py,sha256=Cw0tSHkPgd-RI62afCqQAcTHMnQhaI2CvfTMO-1JKOg,2452
+nexaai/image_gen.py,sha256=0C_5Tjj4BYmxLbmMmvwajp-yy2mmEEOKwBFnDQNPzx4,4356
+nexaai/llm.py,sha256=S1o_k2VQoF5w2wO25f142OO1R75TP89Ii69VZv8pIGo,3567
+nexaai/rerank.py,sha256=vWaBucoQ1wz-2iYnZqyFIcEjm-4Xcs1KDbFN5X8zzDQ,1872
 nexaai/runtime.py,sha256=mxxHYsb5iBUAm2K_u-XJWr_U-spJ9S4eApc8kf9myjw,1957
-nexaai/tts.py,sha256=4EbC0BfFh5TLrm_3Q5vx1sXdug5gvOi-owNeX7ekbdA,1926
-nexaai/vlm.py,sha256=g65S8ChMnp_wsz_O4szjR3Z8sD_46NHaxDlfdoZoQ0c,4291
+nexaai/tts.py,sha256=ZnBpWUxIfHhh7KfEjddtH7hHOTa91zg7ogGLakMIALo,2167
+nexaai/vlm.py,sha256=3voXmAVnGlXnOiwA3wcX4p0Lvmp0X1VKkQVPObJdwBY,4649
 nexaai/asr_impl/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-nexaai/asr_impl/mlx_asr_impl.py,sha256=JuyxFzFbbgclK5_2Rq5pT278h0q8LztJX7Tggz0zkbM,3191
-nexaai/asr_impl/pybind_asr_impl.py,sha256=ybvthYgtVbH_JgpSsl0nxjZYvXyk8KGRSKdsJ-hLfZE,1450
+nexaai/asr_impl/mlx_asr_impl.py,sha256=eosd8-TIWAOwV0HltmoFrLwzXHcU4jyxtncvuZE9pgA,3257
+nexaai/asr_impl/pybind_asr_impl.py,sha256=pE9Hb_hMi5yAc4MF83bLVOb8zDtreCkB3_u7XED9YpA,1516
 nexaai/binds/__init__.py,sha256=T9Ua7SzHNglSeEqXlfH5ymYXRyXhNKkC9z_y_bWCNMo,80
 nexaai/binds/common_bind.cpython-310-darwin.so,sha256=hVxY76tn7hN6uHDIgM7LWNvgoudHgNZVoaygM9X1RWE,217232
 nexaai/binds/embedder_bind.cpython-310-darwin.so,sha256=FT8581RNciilskK89PhtnNSjw4Oh0-xk8QdbJVFmOd8,202064
-nexaai/binds/libnexa_bridge.dylib,sha256=cdriv4BpSoRUlNmFCVuSAHiWeVjnC8KKihCTiTNrJno,251256
-nexaai/binds/llm_bind.cpython-310-darwin.so,sha256=Bv08rn9OBAHy01eAQeANiJSrCxskn1xSx4Gl1Vcrhm0,166064
-nexaai/binds/nexa_llama_cpp/libggml-base.dylib,sha256=D_mlY_PNMIFlm6mHERSLMoA7QfYHwEPVlb0UKMbl2N0,632048
+nexaai/binds/libcrypto.dylib,sha256=aWif9WhTKVQhmZL3DmtIpMkZY5JSb_Ny6CClmUBKYM4,4710416
+nexaai/binds/libnexa_bridge.dylib,sha256=g4nlxyGyVJ-LJV1cHMDg2m2pYF8fFTBBXGTPQV-lotg,251480
+nexaai/binds/libssl.dylib,sha256=Q2frAdhR729oKYuCjJOEr1Ott3idFWoFp98fwNqtIaU,881616
+nexaai/binds/llm_bind.cpython-310-darwin.so,sha256=3Bsq0_tGkM027-bORVeJUDl6CYZxAF9sbDIn1l31XTQ,182704
+nexaai/binds/nexa_llama_cpp/libggml-base.dylib,sha256=CzsTec_QHlvbBGzmx4MBQ4LUjG7aIqW1rP5p_A90Vds,632048
 nexaai/binds/nexa_llama_cpp/libggml-cpu.so,sha256=RiMhOv6IAWY1zkFTp0JCB7CYoPfOv54vBVQHvj1koBM,661120
 nexaai/binds/nexa_llama_cpp/libggml-metal.so,sha256=L4RQvaD0w4qBjexi4O05RMCH8842fof5QgBEvyx0RcA,673104
 nexaai/binds/nexa_llama_cpp/libggml.dylib,sha256=aOTj_6RrAMkfDO0ZI28_3nfcC-l4Y3dRCiS3C0d0_eI,58592
 nexaai/binds/nexa_llama_cpp/libllama.dylib,sha256=fDPnTG6EQ1JN6aRmnIFQzag_kmtyImRxKjMOOtaTY5Q,1746928
 nexaai/binds/nexa_llama_cpp/libmtmd.dylib,sha256=ccnBRsJNFGTCsjgW03N9PvX26wUirqpxljnxdVPINVc,587008
-nexaai/binds/nexa_llama_cpp/libnexa_plugin.dylib,sha256=1kAoSq1w8pPiNmceOdmAZ7ehfENauFoGq8mpIwGl-kk,1806696
-nexaai/binds/nexa_mlx/libnexa_plugin.dylib,sha256=aw8if8RwXjb02CehbqGPHOeEKRUcTpzeJZLOkjTBm8A,596328
+nexaai/binds/nexa_llama_cpp/libnexa_plugin.dylib,sha256=bZOa9K6tROVv7sfBU6JaA7WEPvuLfJljkXH37um3WSU,2368632
+nexaai/binds/nexa_mlx/libnexa_plugin.dylib,sha256=yjbdy0FpBE_RwgqvwGxd3czIfs3OYVoh--vWpn2H7RQ,1422888
 nexaai/binds/nexa_mlx/py-lib/ml.py,sha256=LafDM_TeXmuQkld2tdQxUBGgooT0JPMXngLam2TADqU,23179
 nexaai/binds/nexa_mlx/py-lib/profiling.py,sha256=Dc-mybFwBdCIKFWL7CbSHjkOJGAoYHG7r_e_XPhzwBU,9361
 nexaai/binds/nexa_mlx/py-lib/mlx_audio/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -181,17 +183,17 @@ nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/tests/test_convert.py,sha256=79ddUhtT
 nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/tests/test_interpolate.py,sha256=9dNmH03C46HtxwesH2DpT2oTNEG1KCZWYEKq6UQ3vfk,3536
 nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/tests/test_models.py,sha256=12RiOfPtSZQj5g5JM-yCJk3uGQfM3OdmRiPt5uUDE4E,35096
 nexaai/cv_impl/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-nexaai/cv_impl/mlx_cv_impl.py,sha256=mdK4DEffPe96AgDGDXtQeHlG958hf8FO1fBZ1qjZMEE,3162
-nexaai/cv_impl/pybind_cv_impl.py,sha256=yS4JKfRSaIjjVP7hJ-CizG76pIX85bpmGLk9B9cnL24,998
+nexaai/cv_impl/mlx_cv_impl.py,sha256=gKECQOv8iaWwG3bl7xeqVy2NN_9K7tYerIFzfn4eLo4,3228
+nexaai/cv_impl/pybind_cv_impl.py,sha256=uSmwBste4cT7c8DQmXzRLmzwDf773PAbXNYWW1UzVls,1064
 nexaai/embedder_impl/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-nexaai/embedder_impl/mlx_embedder_impl.py,sha256=dTOz34WGDnhsI9L7Ctv6fGPngvMAUc4FwEwRgBp_M9I,4317
-nexaai/embedder_impl/pybind_embedder_impl.py,sha256=AGGrOq4z0mDpQZInOvJsOIlQWflByhDjsihMu_Wjtbk,3286
+nexaai/embedder_impl/mlx_embedder_impl.py,sha256=OsDzsc_2wZkSoWu6yCOZadMkaYdBW3uyjF11hDKTaX8,4383
+nexaai/embedder_impl/pybind_embedder_impl.py,sha256=Ga1JYauVkRq6jwAGL7Xx5HDaIx483_v9gZVoTyd3xNU,3495
 nexaai/image_gen_impl/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-nexaai/image_gen_impl/mlx_image_gen_impl.py,sha256=OxSbk9zIDj7tTvsdM8bMJQDBhpn-mygBNktewd_wgtE,11153
-nexaai/image_gen_impl/pybind_image_gen_impl.py,sha256=XoSdWG5ID_g93WT9QB0qCP64a4rX-Rva0u4fQ8xpoqg,3626
+nexaai/image_gen_impl/mlx_image_gen_impl.py,sha256=BuDkksvXyb4J02GsdnbGAmYckfUU0Eah6BimoMD3QqY,11219
+nexaai/image_gen_impl/pybind_image_gen_impl.py,sha256=ms34VYoD5AxZFG6cIG0QAJDjCtfphaZ1bHzKzey1xF8,3692
 nexaai/llm_impl/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-nexaai/llm_impl/mlx_llm_impl.py,sha256=HCi1uQBjccDDi64LbAgyH85lWx7qDZIW8i43dojGfF0,10210
-nexaai/llm_impl/pybind_llm_impl.py,sha256=8Us4N5KF6oi-0-K_5Dpf2rYe9smd89ZfWFrP_fWBsM4,7374
+nexaai/llm_impl/mlx_llm_impl.py,sha256=4v7jUFzHfE7zw2uViekGQDaTROz8A6oaW31Z3iVe6tg,11157
+nexaai/llm_impl/pybind_llm_impl.py,sha256=aooqkcXZWhCo07wbSafGgBrA3WnijtnUADShjjgFsBQ,8051
 nexaai/mlx_backend/ml.py,sha256=LafDM_TeXmuQkld2tdQxUBGgooT0JPMXngLam2TADqU,23179
 nexaai/mlx_backend/profiling.py,sha256=Dc-mybFwBdCIKFWL7CbSHjkOJGAoYHG7r_e_XPhzwBU,9361
 nexaai/mlx_backend/asr/__init__.py,sha256=fuT_9_xpYJ28m4yjly5L2jChUrzlSQz-b_S7nujxkSM,451
@@ -211,7 +213,7 @@ nexaai/mlx_backend/embedding/modeling/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCe
 nexaai/mlx_backend/embedding/modeling/nexa_jina_v2.py,sha256=F9Z_9r-Dh0wNThiMp5W5hqE2dt5bf4ps5_c6h4BuWGw,15218
 nexaai/mlx_backend/llm/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 nexaai/mlx_backend/llm/generate.py,sha256=Phes0tzxbbEWA2hDylQvD0LjorMaPwvcfZq9RKCAOt0,4399
-nexaai/mlx_backend/llm/interface.py,sha256=Fx28O2jCDPaEfr0xLffWnqGIU5Gspggxr-o54-fBWj4,29257
+nexaai/mlx_backend/llm/interface.py,sha256=SZFkuAUi2vxj_dSqj8RXf9vPTGMtpks_pZxxrF7iIe8,29330
 nexaai/mlx_backend/llm/main.py,sha256=gFDE4VZv_CLKMCTn0N521OfCKH_Ys26bHDh6g9VEFNc,1982
 nexaai/mlx_backend/mlx_audio/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 nexaai/mlx_backend/mlx_audio/server.py,sha256=Pqy13Fafq4WX_cTuvRFz1jq89beQm2QQGpXmhK4b9jc,17547
@@ -502,19 +504,19 @@ nexaai/mlx_backend/vlm/modeling/trainer/lora.py,sha256=tGjvenjEQ8_1Az8Nz3smz5Mgv
 nexaai/mlx_backend/vlm/modeling/trainer/trainer.py,sha256=h16SaHt76JzFruXuidgXDx7_2evx4L0SecvzqLmhyZw,9081
 nexaai/mlx_backend/vlm/modeling/trainer/utils.py,sha256=29oHf_7946YeJKP_-Dt-NPeN4xJq8Fj7Yv4jZKO9RWA,4909
 nexaai/rerank_impl/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-nexaai/rerank_impl/mlx_rerank_impl.py,sha256=I-jumShLm1jAmKunRcDNUU4yjfWLoWClFMFONd88-Es,3177
-nexaai/rerank_impl/pybind_rerank_impl.py,sha256=FIIN96zCxXopqpqZdBd7OjuqqviFBY8HMZek1bCeoJw,1447
+nexaai/rerank_impl/mlx_rerank_impl.py,sha256=h37PKSIRBY8mwzVeLeP4ix9ui3waIsg4gorzelYLJbM,3243
+nexaai/rerank_impl/pybind_rerank_impl.py,sha256=CtwkG7YrW58GPMDERJSnISGTVCXWNju5__R2W837t7c,1513
 nexaai/tts_impl/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-nexaai/tts_impl/mlx_tts_impl.py,sha256=D71IFtIYWzrVdBS2y5vDBWjZ4ZAzRRjFHC0KO0pA5BU,3035
-nexaai/tts_impl/pybind_tts_impl.py,sha256=Be5QiXzDz6h1LTIQzUBd0ZyBs7rUpNA-pULCXFtt2Is,1378
+nexaai/tts_impl/mlx_tts_impl.py,sha256=i_uNPdvlXYtL3e01oKjDlP9jgkWCRt1bBHsExaaiJi8,3101
+nexaai/tts_impl/pybind_tts_impl.py,sha256=mpn44r6pfYLIl-NrEy2dXHjGtWtNCmM7HRyxiANxUI4,1444
 nexaai/utils/avatar_fetcher.py,sha256=bWy8ujgbOiTHFCjFxTwkn3uXbZ84PgEGUkXkR3MH4bI,3821
 nexaai/utils/decode.py,sha256=61n4Zf6c5QLyqGoctEitlI9BX3tPlP2a5aaKNHbw3T4,404
 nexaai/utils/model_manager.py,sha256=c07ocxxw1IHCQw6esbmYK0dX2R2OajfEIGsC_2teHXo,48572
 nexaai/utils/progress_tracker.py,sha256=76HlPkyN41IMHSsH56-qdlN_aY_oBfJz50J16Cx67R0,15102
 nexaai/vlm_impl/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-nexaai/vlm_impl/mlx_vlm_impl.py,sha256=7gm_tFNox3LC78DQEtlMQ-eBK55zDY0xWlJghUAOP5Y,10402
-nexaai/vlm_impl/pybind_vlm_impl.py,sha256=C-3fa0AIypI33OAGuGfVxo1V7zN0wjQMgruKlDIlW4Q,8333
-nexaai-1.0.4rc15.dist-info/METADATA,sha256=izOUOhvRNpO73EELnKolgU0Kn_PK79tsJkJr3RMWBzA,883
-nexaai-1.0.4rc15.dist-info/WHEEL,sha256=T2p57lol9__xkoU6aJTyN1Pm43ZpRU3q6km7mIbrAMs,114
-nexaai-1.0.4rc15.dist-info/top_level.txt,sha256=LRE2YERlrZk2vfuygnSzsEeqSknnZbz3Z1MHyNmBU4w,7
-nexaai-1.0.4rc15.dist-info/RECORD,,
+nexaai/vlm_impl/mlx_vlm_impl.py,sha256=od1R1mRoIgPG3NHC7JiDlcB_YJY8aklX8Em3ZkeHNpE,10734
+nexaai/vlm_impl/pybind_vlm_impl.py,sha256=5ZMFgDATthmMzjrd-vE5KX5ZAMoWPYbF_FTLz8DBKIk,8908
+nexaai-1.0.5.dist-info/METADATA,sha256=fxZuww9PtpWooTJNDcf04tzwKi7AEThWyD0Z9O5a0rY,1151
+nexaai-1.0.5.dist-info/WHEEL,sha256=T2p57lol9__xkoU6aJTyN1Pm43ZpRU3q6km7mIbrAMs,114
+nexaai-1.0.5.dist-info/top_level.txt,sha256=LRE2YERlrZk2vfuygnSzsEeqSknnZbz3Z1MHyNmBU4w,7
+nexaai-1.0.5.dist-info/RECORD,,

{nexaai-1.0.4rc15.dist-info → nexaai-1.0.5.dist-info}/WHEEL RENAMED Viewed

File without changes

{nexaai-1.0.4rc15.dist-info → nexaai-1.0.5.dist-info}/top_level.txt RENAMED Viewed

File without changes