PyPI - nexaai - Versions diffs - 1.0.19rc19__cp310-cp310-win_amd64.whl → 1.0.21__cp310-cp310-win_amd64.whl - Mend

nexaai 1.0.19rc19__cp310-cp310-win_amd64.whl → 1.0.21__cp310-cp310-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of nexaai might be problematic. Click here for more details.

Files changed (53) hide show

nexaai/__init__.py +12 -0
nexaai/_stub.cp310-win_amd64.pyd +0 -0
nexaai/_version.py +1 -1
nexaai/asr.py +10 -6
nexaai/asr_impl/pybind_asr_impl.py +98 -15
nexaai/binds/__init__.py +2 -0
nexaai/binds/asr_bind.cp310-win_amd64.pyd +0 -0
nexaai/binds/common_bind.cp310-win_amd64.pyd +0 -0
nexaai/binds/cpu_gpu/ggml-base.dll +0 -0
nexaai/binds/cpu_gpu/ggml-cpu.dll +0 -0
nexaai/binds/cpu_gpu/ggml-cuda.dll +0 -0
nexaai/binds/cpu_gpu/ggml-vulkan.dll +0 -0
nexaai/binds/cpu_gpu/ggml.dll +0 -0
nexaai/binds/cpu_gpu/mtmd.dll +0 -0
nexaai/binds/cpu_gpu/nexa_cpu_gpu.dll +0 -0
nexaai/binds/cpu_gpu/nexa_plugin.dll +0 -0
nexaai/binds/embedder_bind.cp310-win_amd64.pyd +0 -0
nexaai/binds/llm_bind.cp310-win_amd64.pyd +0 -0
nexaai/binds/nexa_bridge.dll +0 -0
nexaai/binds/nexaml/ggml-base.dll +0 -0
nexaai/binds/nexaml/ggml-cpu.dll +0 -0
nexaai/binds/nexaml/ggml-cuda.dll +0 -0
nexaai/binds/nexaml/ggml-vulkan.dll +0 -0
nexaai/binds/nexaml/ggml.dll +0 -0
nexaai/binds/nexaml/nexa_plugin.dll +0 -0
nexaai/binds/nexaml/nexaproc.dll +0 -0
nexaai/binds/nexaml/qwen3-vl.dll +0 -0
nexaai/binds/rerank_bind.cp310-win_amd64.pyd +0 -0
nexaai/binds/vlm_bind.cp310-win_amd64.pyd +0 -0
nexaai/common.py +1 -0
nexaai/cv.py +2 -1
nexaai/embedder.py +4 -3
nexaai/embedder_impl/mlx_embedder_impl.py +3 -1
nexaai/embedder_impl/pybind_embedder_impl.py +3 -2
nexaai/image_gen.py +2 -1
nexaai/llm.py +5 -3
nexaai/llm_impl/mlx_llm_impl.py +2 -0
nexaai/llm_impl/pybind_llm_impl.py +2 -0
nexaai/rerank.py +5 -3
nexaai/rerank_impl/mlx_rerank_impl.py +2 -0
nexaai/rerank_impl/pybind_rerank_impl.py +109 -16
nexaai/runtime_error.py +24 -0
nexaai/tts.py +2 -1
nexaai/utils/manifest_utils.py +10 -6
nexaai/utils/model_manager.py +139 -8
nexaai/vlm.py +4 -2
nexaai/vlm_impl/mlx_vlm_impl.py +3 -2
nexaai/vlm_impl/pybind_vlm_impl.py +33 -7
{nexaai-1.0.19rc19.dist-info → nexaai-1.0.21.dist-info}/METADATA +2 -3
nexaai-1.0.21.dist-info/RECORD +79 -0
nexaai-1.0.19rc19.dist-info/RECORD +0 -76
{nexaai-1.0.19rc19.dist-info → nexaai-1.0.21.dist-info}/WHEEL +0 -0
{nexaai-1.0.19rc19.dist-info → nexaai-1.0.21.dist-info}/top_level.txt +0 -0

nexaai/__init__.py CHANGED Viewed

@@ -24,6 +24,13 @@ from .common import ModelConfig, GenerationConfig, ChatMessage, SamplerConfig, P
 # Import logging functionality
 from .log import set_logger, get_error_message
+# Import runtime errors
+from .runtime_error import (
+    NexaRuntimeError,
+    ContextLengthExceededError,
+    GenerationError
+)
 # Create alias for PluginID to be accessible as plugin_id
 plugin_id = PluginID
@@ -52,6 +59,11 @@ __all__ = [
     # Logging functionality
     "set_logger",
     "get_error_message",
+    # Runtime errors
+    "NexaRuntimeError",
+    "ContextLengthExceededError",
+    "GenerationError",
     "LLM",
     "Embedder",

nexaai/_stub.cp310-win_amd64.pyd CHANGED Viewed

Binary file

nexaai/_version.py CHANGED Viewed

@@ -1,4 +1,4 @@
 # This file is generated by CMake from _version.py.in
 # Do not modify this file manually - it will be overwritten
-__version__ = "1.0.19-rc19"
+__version__ = "1.0.21"

nexaai/asr.py CHANGED Viewed

@@ -3,7 +3,7 @@ from abc import abstractmethod
 from dataclasses import dataclass
 from nexaai.base import BaseModel
-from nexaai.common import PluginID
+from nexaai.common import PluginID, ModelConfig
 @dataclass
@@ -25,17 +25,20 @@ class ASRResult:
 class ASR(BaseModel):
     """Abstract base class for Automatic Speech Recognition models."""
-    def __init__(self):
+    def __init__(self, m_cfg: ModelConfig = ModelConfig()):
         """Initialize base ASR class."""
-        pass
+        self._m_cfg = m_cfg
     @classmethod
     def _load_from(cls,
                    model_path: str,
+                   model_name: Optional[str] = None,
                    tokenizer_path: Optional[str] = None,
                    language: Optional[str] = None,
+                   m_cfg: ModelConfig = ModelConfig(),
                    plugin_id: Union[PluginID, str] = PluginID.LLAMA_CPP,
-                   device_id: Optional[str] = None
+                   device_id: Optional[str] = None,
+                   **kwargs
         ) -> 'ASR':
         """Load ASR model from local path, routing to appropriate implementation."""
         # Check plugin_id value for routing - handle both enum and string
@@ -43,10 +46,11 @@ class ASR(BaseModel):
         if plugin_value == "mlx":
             from nexaai.asr_impl.mlx_asr_impl import MLXASRImpl
-            return MLXASRImpl._load_from(model_path, tokenizer_path, language, plugin_id, device_id)
+            return MLXASRImpl._load_from(model_path, model_name, tokenizer_path, language, m_cfg, plugin_id, device_id)
         else:
             from nexaai.asr_impl.pybind_asr_impl import PyBindASRImpl
-            return PyBindASRImpl._load_from(model_path, tokenizer_path, language, plugin_id, device_id)
+            return PyBindASRImpl._load_from(model_path, model_name, tokenizer_path, language, m_cfg, plugin_id, device_id)
     @abstractmethod
     def transcribe(

nexaai/asr_impl/pybind_asr_impl.py CHANGED Viewed

@@ -1,32 +1,78 @@
 from typing import List, Optional, Union
-from nexaai.common import PluginID
+from nexaai.common import PluginID, ModelConfig
 from nexaai.asr import ASR, ASRConfig, ASRResult
+from nexaai.binds import asr_bind, common_bind
+from nexaai.runtime import _ensure_runtime
 class PyBindASRImpl(ASR):
-    def __init__(self):
-        """Initialize PyBind ASR implementation."""
-        super().__init__()
-        # TODO: Add PyBind-specific initialization
+    def __init__(self, handle: any, m_cfg: ModelConfig = ModelConfig()):
+        """Private constructor, should not be called directly."""
+        super().__init__(m_cfg)
+        self._handle = handle  # This is a py::capsule
+        self._model_config = None
     @classmethod
     def _load_from(cls,
                    model_path: str,
+                   model_name: Optional[str] = None,
                    tokenizer_path: Optional[str] = None,
                    language: Optional[str] = None,
+                   m_cfg: ModelConfig = ModelConfig(),
                    plugin_id: Union[PluginID, str] = PluginID.LLAMA_CPP,
                    device_id: Optional[str] = None
         ) -> 'PyBindASRImpl':
         """Load ASR model from local path using PyBind backend."""
-        # TODO: Implement PyBind ASR loading
-        instance = cls()
-        return instance
+        _ensure_runtime()
+        # Create model config
+        config = common_bind.ModelConfig()
+        config.n_ctx = m_cfg.n_ctx
+        if m_cfg.n_threads is not None:
+            config.n_threads = m_cfg.n_threads
+        if m_cfg.n_threads_batch is not None:
+            config.n_threads_batch = m_cfg.n_threads_batch
+        if m_cfg.n_batch is not None:
+            config.n_batch = m_cfg.n_batch
+        if m_cfg.n_ubatch is not None:
+            config.n_ubatch = m_cfg.n_ubatch
+        if m_cfg.n_seq_max is not None:
+            config.n_seq_max = m_cfg.n_seq_max
+        config.n_gpu_layers = m_cfg.n_gpu_layers
+        # handle chat template strings
+        if m_cfg.chat_template_path:
+            config.chat_template_path = m_cfg.chat_template_path
+        if m_cfg.chat_template_content:
+            config.chat_template_content = m_cfg.chat_template_content
+        # Convert plugin_id to string
+        plugin_id_str = plugin_id.value if isinstance(plugin_id, PluginID) else str(plugin_id)
+        # Create ASR handle using the binding
+        handle = asr_bind.ml_asr_create(
+            model_path=model_path,
+            model_name=model_name,
+            tokenizer_path=tokenizer_path,
+            model_config=config,
+            language=language,
+            plugin_id=plugin_id_str,
+            device_id=device_id,
+            license_id=None,  # Optional
+            license_key=None  # Optional
+        )
+        return cls(handle, m_cfg)
     def eject(self):
-        """Destroy the model and free resources."""
-        # TODO: Implement PyBind ASR cleanup
-        pass
+        """Release the model from memory."""
+        # py::capsule handles cleanup automatically
+        if hasattr(self, '_handle') and self._handle is not None:
+            del self._handle
+            self._handle = None
     def transcribe(
         self,
@@ -35,10 +81,47 @@ class PyBindASRImpl(ASR):
         config: Optional[ASRConfig] = None,
     ) -> ASRResult:
         """Transcribe audio file to text."""
-        # TODO: Implement PyBind ASR transcription
-        raise NotImplementedError("PyBind ASR transcription not yet implemented")
+        if self._handle is None:
+            raise RuntimeError("ASR model not loaded. Call _load_from first.")
+        # Convert ASRConfig to binding format if provided
+        asr_config = None
+        if config:
+            asr_config = asr_bind.ASRConfig()
+            asr_config.timestamps = config.timestamps
+            asr_config.beam_size = config.beam_size
+            asr_config.stream = config.stream
+        # Perform transcription using the binding
+        result_dict = asr_bind.ml_asr_transcribe(
+            handle=self._handle,
+            audio_path=audio_path,
+            language=language,
+            config=asr_config
+        )
+        # Convert result to ASRResult
+        transcript = result_dict.get("transcript", "")
+        confidence_scores = result_dict.get("confidence_scores")
+        timestamps = result_dict.get("timestamps")
+        # Convert timestamps to the expected format
+        timestamp_pairs = []
+        if timestamps:
+            for start, end in timestamps:
+                timestamp_pairs.append((float(start), float(end)))
+        return ASRResult(
+            transcript=transcript,
+            confidence_scores=confidence_scores or [],
+            timestamps=timestamp_pairs
+        )
     def list_supported_languages(self) -> List[str]:
         """List supported languages."""
-        # TODO: Implement PyBind ASR language listing
-        raise NotImplementedError("PyBind ASR language listing not yet implemented")
+        if self._handle is None:
+            raise RuntimeError("ASR model not loaded. Call _load_from first.")
+        # Get supported languages using the binding
+        languages = asr_bind.ml_asr_list_supported_languages(handle=self._handle)
+        return languages

nexaai/binds/__init__.py CHANGED Viewed

@@ -2,3 +2,5 @@ from .common_bind import *
 from .llm_bind import *
 from .embedder_bind import *
 from .vlm_bind import *
+from .rerank_bind import *
+from .asr_bind import *

nexaai/binds/asr_bind.cp310-win_amd64.pyd ADDED Viewed

Binary file

nexaai/binds/common_bind.cp310-win_amd64.pyd CHANGED Viewed

Binary file

nexaai/binds/cpu_gpu/ggml-base.dll CHANGED Viewed

Binary file

nexaai/binds/cpu_gpu/ggml-cpu.dll CHANGED Viewed

Binary file

nexaai/binds/cpu_gpu/ggml-cuda.dll CHANGED Viewed

Binary file

nexaai/binds/cpu_gpu/ggml-vulkan.dll CHANGED Viewed

Binary file

nexaai/binds/cpu_gpu/ggml.dll CHANGED Viewed

Binary file

nexaai/binds/cpu_gpu/mtmd.dll CHANGED Viewed

Binary file

nexaai/binds/cpu_gpu/nexa_cpu_gpu.dll CHANGED Viewed

Binary file

nexaai/binds/cpu_gpu/nexa_plugin.dll CHANGED Viewed

Binary file

nexaai/binds/embedder_bind.cp310-win_amd64.pyd CHANGED Viewed

Binary file

nexaai/binds/llm_bind.cp310-win_amd64.pyd CHANGED Viewed

Binary file

nexaai/binds/nexa_bridge.dll CHANGED Viewed

Binary file

nexaai/binds/nexaml/ggml-base.dll CHANGED Viewed

Binary file

nexaai/binds/nexaml/ggml-cpu.dll CHANGED Viewed

Binary file

nexaai/binds/nexaml/ggml-cuda.dll CHANGED Viewed

Binary file

nexaai/binds/nexaml/ggml-vulkan.dll CHANGED Viewed

Binary file

nexaai/binds/nexaml/ggml.dll CHANGED Viewed

Binary file

nexaai/binds/nexaml/nexa_plugin.dll CHANGED Viewed

Binary file

nexaai/binds/nexaml/nexaproc.dll CHANGED Viewed

Binary file

nexaai/binds/nexaml/qwen3-vl.dll CHANGED Viewed

Binary file

nexaai/binds/rerank_bind.cp310-win_amd64.pyd ADDED Viewed

Binary file

nexaai/binds/vlm_bind.cp310-win_amd64.pyd CHANGED Viewed

Binary file

nexaai/common.py CHANGED Viewed

@@ -8,6 +8,7 @@ class PluginID(str, Enum):
     MLX = "mlx"
     LLAMA_CPP = "llama_cpp"
     NEXAML = "nexaml"
+    NPU = "npu"
 class ChatMessage(TypedDict):

nexaai/cv.py CHANGED Viewed

@@ -73,7 +73,8 @@ class CVModel(BaseModel):
                    _: str,  # TODO: remove this argument, this is a hack to make api design happy
                    config: CVModelConfig,
                    plugin_id: Union[PluginID, str] = PluginID.LLAMA_CPP,
-                   device_id: Optional[str] = None
+                   device_id: Optional[str] = None,
+                   **kwargs
         ) -> 'CVModel':
         """Load CV model from configuration, routing to appropriate implementation."""
         # Check plugin_id value for routing - handle both enum and string

nexaai/embedder.py CHANGED Viewed

@@ -22,12 +22,13 @@ class Embedder(BaseModel):
         pass
     @classmethod
-    def _load_from(cls, model_path: str, tokenizer_file: str = "tokenizer.json", plugin_id: Union[PluginID, str] = PluginID.LLAMA_CPP):
+    def _load_from(cls, model_path: str, model_name: str = None, tokenizer_file: str = "tokenizer.json", plugin_id: Union[PluginID, str] = PluginID.LLAMA_CPP, **kwargs):
         """
         Load an embedder from model files, routing to appropriate implementation.
         Args:
             model_path: Path to the model file
+            model_name: Name of the model
             tokenizer_file: Path to the tokenizer file (default: "tokenizer.json")
             plugin_id: Plugin ID to use for the model (default: PluginID.LLAMA_CPP)
@@ -39,10 +40,10 @@ class Embedder(BaseModel):
         if plugin_value == "mlx":
             from nexaai.embedder_impl.mlx_embedder_impl import MLXEmbedderImpl
-            return MLXEmbedderImpl._load_from(model_path, tokenizer_file, plugin_id)
+            return MLXEmbedderImpl._load_from(model_path, model_name, tokenizer_file, plugin_id)
         else:
             from nexaai.embedder_impl.pybind_embedder_impl import PyBindEmbedderImpl
-            return PyBindEmbedderImpl._load_from(model_path, tokenizer_file, plugin_id)
+            return PyBindEmbedderImpl._load_from(model_path, model_name, tokenizer_file, plugin_id)
     @abstractmethod
     def generate(self, texts: Union[List[str], str] = None, config: EmbeddingConfig = EmbeddingConfig(), input_ids: Union[List[int], List[List[int]]] = None) -> np.ndarray:

nexaai/embedder_impl/mlx_embedder_impl.py CHANGED Viewed

@@ -14,12 +14,13 @@ class MLXEmbedderImpl(Embedder):
         self._mlx_embedder = None
     @classmethod
-    def _load_from(cls, model_path: str, tokenizer_file: str = "tokenizer.json", plugin_id: Union[PluginID, str] = PluginID.MLX):
+    def _load_from(cls, model_path: str, model_name: str = None, tokenizer_file: str = "tokenizer.json", plugin_id: Union[PluginID, str] = PluginID.MLX):
         """
         Load an embedder from model files using MLX backend.
         Args:
             model_path: Path to the model file
+            model_name: Name of the model
             tokenizer_file: Path to the tokenizer file (default: "tokenizer.json")
             plugin_id: Plugin ID to use for the model (default: PluginID.MLX)
@@ -34,6 +35,7 @@ class MLXEmbedderImpl(Embedder):
             # This will automatically detect if it's JinaV2 or generic model and route correctly
             instance._mlx_embedder = create_embedder(
                 model_path=model_path,
+                # model_name=model_name, # FIXME: For MLX Embedder, model_name is not used
                 tokenizer_path=tokenizer_file
             )

nexaai/embedder_impl/pybind_embedder_impl.py CHANGED Viewed

@@ -16,12 +16,13 @@ class PyBindEmbedderImpl(Embedder):
         self._handle = _handle_ptr
     @classmethod
-    def _load_from(cls, model_path: str, tokenizer_file: str = "tokenizer.json", plugin_id: Union[PluginID, str] = PluginID.LLAMA_CPP):
+    def _load_from(cls, model_path: str, model_name: str = None, tokenizer_file: str = "tokenizer.json", plugin_id: Union[PluginID, str] = PluginID.LLAMA_CPP):
         """
         Load an embedder from model files
         Args:
             model_path: Path to the model file
+            model_name: Name of the model
             tokenizer_file: Path to the tokenizer file (default: "tokenizer.json")
             plugin_id: Plugin ID to use for the model (default: PluginID.LLAMA_CPP)
@@ -32,7 +33,7 @@ class PyBindEmbedderImpl(Embedder):
         # Convert enum to string for C++ binding
         plugin_id_str = plugin_id.value if isinstance(plugin_id, PluginID) else plugin_id
         # New parameter order: model_path, plugin_id, tokenizer_path (optional)
-        handle = embedder_bind.ml_embedder_create(model_path, plugin_id_str, tokenizer_file)
+        handle = embedder_bind.ml_embedder_create(model_path, model_name, plugin_id_str, tokenizer_file)
         return cls(handle)
     def eject(self):

nexaai/image_gen.py CHANGED Viewed

@@ -71,7 +71,8 @@ class ImageGen(BaseModel):
                    plugin_id: Union[PluginID, str] = PluginID.LLAMA_CPP,
                    device_id: Optional[str] = None,
                    float16: bool = True,
-                   quantize: bool = False
+                   quantize: bool = False,
+                   **kwargs
         ) -> 'ImageGen':
         """Load image generation model from local path, routing to appropriate implementation."""
         # Check plugin_id value for routing - handle both enum and string

nexaai/llm.py CHANGED Viewed

@@ -15,10 +15,12 @@ class LLM(BaseModel):
     @classmethod
     def _load_from(cls,
                    local_path: str,
+                   model_name: Optional[str] = None,
                    tokenizer_path: Optional[str] = None,
                    m_cfg: ModelConfig = ModelConfig(),
                    plugin_id: Union[PluginID, str] = PluginID.LLAMA_CPP,
-                   device_id: Optional[str] = None
+                   device_id: Optional[str] = None,
+                   **kwargs
         ) -> 'LLM':
         """Load model from local path, routing to appropriate implementation."""
         # Check plugin_id value for routing - handle both enum and string
@@ -26,10 +28,10 @@ class LLM(BaseModel):
         if plugin_value == "mlx":
             from nexaai.llm_impl.mlx_llm_impl import MLXLLMImpl
-            return MLXLLMImpl._load_from(local_path, tokenizer_path, m_cfg, plugin_id, device_id)
+            return MLXLLMImpl._load_from(local_path, model_name, tokenizer_path, m_cfg, plugin_id, device_id)
         else:
             from nexaai.llm_impl.pybind_llm_impl import PyBindLLMImpl
-            return PyBindLLMImpl._load_from(local_path, tokenizer_path, m_cfg, plugin_id, device_id)
+            return PyBindLLMImpl._load_from(local_path, model_name, tokenizer_path, m_cfg, plugin_id, device_id)
     def cancel_generation(self):
         """Signal to cancel any ongoing stream generation."""

nexaai/llm_impl/mlx_llm_impl.py CHANGED Viewed

@@ -16,6 +16,7 @@ class MLXLLMImpl(LLM):
     @classmethod
     def _load_from(cls,
                    local_path: str,
+                   model_name: Optional[str] = None,
                    tokenizer_path: Optional[str] = None,
                    m_cfg: ModelConfig = ModelConfig(),
                    plugin_id: Union[PluginID, str] = PluginID.MLX,
@@ -40,6 +41,7 @@ class MLXLLMImpl(LLM):
             instance = cls(m_cfg)
             instance._mlx_llm = MLXLLMInterface(
                 model_path=local_path,
+                # model_name=model_name, # FIXME: For MLX LLM, model_name is not used
                 tokenizer_path=tokenizer_path or local_path,
                 config=mlx_config,
                 device=device_id

nexaai/llm_impl/pybind_llm_impl.py CHANGED Viewed

@@ -19,6 +19,7 @@ class PyBindLLMImpl(LLM):
     @classmethod
     def _load_from(cls,
                    local_path: str,
+                   model_name: Optional[str] = None,
                    tokenizer_path: Optional[str] = None,
                    m_cfg: ModelConfig = ModelConfig(),
                    plugin_id: Union[PluginID, str] = PluginID.LLAMA_CPP,
@@ -55,6 +56,7 @@ class PyBindLLMImpl(LLM):
         plugin_id_str = plugin_id.value if isinstance(plugin_id, PluginID) else plugin_id
         handle = llm_bind.ml_llm_create(
             model_path=local_path,
+            model_name=model_name,
             tokenizer_path=tokenizer_path,
             model_config=config,
             plugin_id=plugin_id_str,

nexaai/rerank.py CHANGED Viewed

@@ -24,9 +24,11 @@ class Reranker(BaseModel):
     @classmethod
     def _load_from(cls,
                    model_path: str,
+                   model_name: str = None,
                    tokenizer_file: str = "tokenizer.json",
                    plugin_id: Union[PluginID, str] = PluginID.LLAMA_CPP,
-                   device_id: Optional[str] = None
+                   device_id: Optional[str] = None,
+                   **kwargs
         ) -> 'Reranker':
         """Load reranker model from local path, routing to appropriate implementation."""
         # Check plugin_id value for routing - handle both enum and string
@@ -34,10 +36,10 @@ class Reranker(BaseModel):
         if plugin_value == "mlx":
             from nexaai.rerank_impl.mlx_rerank_impl import MLXRerankImpl
-            return MLXRerankImpl._load_from(model_path, tokenizer_file, plugin_id, device_id)
+            return MLXRerankImpl._load_from(model_path, model_name, tokenizer_file, plugin_id, device_id)
         else:
             from nexaai.rerank_impl.pybind_rerank_impl import PyBindRerankImpl
-            return PyBindRerankImpl._load_from(model_path, tokenizer_file, plugin_id, device_id)
+            return PyBindRerankImpl._load_from(model_path, model_name, tokenizer_file, plugin_id, device_id)
     @abstractmethod
     def load_model(self, model_path: str, extra_data: Optional[str] = None) -> bool:

nexaai/rerank_impl/mlx_rerank_impl.py CHANGED Viewed

@@ -17,6 +17,7 @@ class MLXRerankImpl(Reranker):
     @classmethod
     def _load_from(cls,
                    model_path: str,
+                   model_name: str = None,
                    tokenizer_file: str = "tokenizer.json",
                    plugin_id: Union[PluginID, str] = PluginID.MLX,
                    device_id: Optional[str] = None
@@ -29,6 +30,7 @@ class MLXRerankImpl(Reranker):
             instance = cls()
             instance._mlx_reranker = create_reranker(
                 model_path=model_path,
+                # model_name=model_name, # FIXME: For MLX Reranker, model_name is not used
                 tokenizer_path=tokenizer_file,
                 device=device_id
             )

nexaai/rerank_impl/pybind_rerank_impl.py CHANGED Viewed

@@ -1,36 +1,89 @@
 from typing import List, Optional, Sequence, Union
+import numpy as np
 from nexaai.common import PluginID
 from nexaai.rerank import Reranker, RerankConfig
+from nexaai.binds import rerank_bind, common_bind
+from nexaai.runtime import _ensure_runtime
 class PyBindRerankImpl(Reranker):
-    def __init__(self):
-        """Initialize PyBind Rerank implementation."""
+    def __init__(self, _handle_ptr):
+        """
+        Internal initializer
+        Args:
+            _handle_ptr: Capsule handle to the C++ reranker object
+        """
         super().__init__()
-        # TODO: Add PyBind-specific initialization
+        self._handle = _handle_ptr
     @classmethod
     def _load_from(cls,
                    model_path: str,
+                   model_name: str = None,
                    tokenizer_file: str = "tokenizer.json",
                    plugin_id: Union[PluginID, str] = PluginID.LLAMA_CPP,
                    device_id: Optional[str] = None
         ) -> 'PyBindRerankImpl':
-        """Load reranker model from local path using PyBind backend."""
-        # TODO: Implement PyBind reranker loading
-        instance = cls()
-        return instance
+        """
+        Load reranker model from local path using PyBind backend.
+        Args:
+            model_path: Path to the model file
+            model_name: Name of the model (optional)
+            tokenizer_file: Path to the tokenizer file (default: "tokenizer.json")
+            plugin_id: Plugin ID to use for the model (default: PluginID.LLAMA_CPP)
+            device_id: Device ID to use for the model (optional)
+        Returns:
+            PyBindRerankImpl instance
+        """
+        _ensure_runtime()
+        # Convert enum to string for C++ binding
+        plugin_id_str = plugin_id.value if isinstance(plugin_id, PluginID) else plugin_id
+        # Create model config
+        model_config = common_bind.ModelConfig()
+        # Create reranker handle with new API signature
+        handle = rerank_bind.ml_reranker_create(
+            model_path,
+            model_name,
+            tokenizer_file,
+            model_config,
+            plugin_id_str,
+            device_id
+        )
+        return cls(handle)
     def eject(self):
-        """Destroy the model and free resources."""
-        # TODO: Implement PyBind reranker cleanup
-        pass
+        """
+        Clean up resources and destroy the reranker
+        """
+        # Destructor of the handle will unload the model correctly
+        if hasattr(self, '_handle') and self._handle is not None:
+            del self._handle
+            self._handle = None
     def load_model(self, model_path: str, extra_data: Optional[str] = None) -> bool:
-        """Load model from path."""
-        # TODO: Implement PyBind reranker model loading
-        raise NotImplementedError("PyBind reranker model loading not yet implemented")
+        """
+        Load model from path.
+        Note: This method is not typically used directly. Use _load_from instead.
+        Args:
+            model_path: Path to the model file
+            extra_data: Additional data (unused)
+        Returns:
+            True if successful
+        """
+        # This method is part of the BaseModel interface but typically not used
+        # directly for PyBind implementations since _load_from handles creation
+        raise NotImplementedError("Use _load_from class method to load models")
     def rerank(
         self,
@@ -38,6 +91,46 @@ class PyBindRerankImpl(Reranker):
         documents: Sequence[str],
         config: Optional[RerankConfig] = None,
     ) -> List[float]:
-        """Rerank documents given a query."""
-        # TODO: Implement PyBind reranking
-        raise NotImplementedError("PyBind reranking not yet implemented")
+        """
+        Rerank documents given a query.
+        Args:
+            query: Query text as UTF-8 string
+            documents: List of document texts to rerank
+            config: Optional reranking configuration
+        Returns:
+            List of ranking scores (one per document)
+        """
+        if self._handle is None:
+            raise RuntimeError("Reranker handle is None. Model may have been ejected.")
+        # Use default config if not provided
+        if config is None:
+            config = RerankConfig()
+        # Create bind config
+        bind_config = rerank_bind.RerankConfig()
+        bind_config.batch_size = config.batch_size
+        bind_config.normalize = config.normalize
+        bind_config.normalize_method = config.normalize_method
+        # Convert documents to list if needed
+        documents_list = list(documents)
+        # Call the binding which returns a dict with scores and profile_data
+        result = rerank_bind.ml_reranker_rerank(
+            self._handle,
+            query,
+            documents_list,
+            bind_config
+        )
+        # Extract scores from result dict
+        scores_array = result.get("scores", np.array([]))
+        # Convert numpy array to list of floats
+        if isinstance(scores_array, np.ndarray):
+            return scores_array.tolist()
+        else:
+            return []