nexaai 1.0.19rc19__cp310-cp310-macosx_14_0_universal2.whl → 1.0.21__cp310-cp310-macosx_14_0_universal2.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of nexaai might be problematic. Click here for more details.

Files changed (43) hide show
  1. nexaai/__init__.py +12 -0
  2. nexaai/_stub.cpython-310-darwin.so +0 -0
  3. nexaai/_version.py +1 -1
  4. nexaai/asr.py +10 -6
  5. nexaai/asr_impl/pybind_asr_impl.py +98 -15
  6. nexaai/binds/__init__.py +2 -0
  7. nexaai/binds/asr_bind.cpython-310-darwin.so +0 -0
  8. nexaai/binds/cpu_gpu/libnexa_plugin.dylib +0 -0
  9. nexaai/binds/embedder_bind.cpython-310-darwin.so +0 -0
  10. nexaai/binds/libnexa_bridge.dylib +0 -0
  11. nexaai/binds/llm_bind.cpython-310-darwin.so +0 -0
  12. nexaai/binds/metal/libnexa_plugin.dylib +0 -0
  13. nexaai/binds/nexaml/libnexa-mm-process.dylib +0 -0
  14. nexaai/binds/nexaml/libnexa-sampling.dylib +0 -0
  15. nexaai/binds/nexaml/libnexa_plugin.dylib +0 -0
  16. nexaai/binds/nexaml/libnexaproc.dylib +0 -0
  17. nexaai/binds/nexaml/libqwen3-vl.dylib +0 -0
  18. nexaai/binds/rerank_bind.cpython-310-darwin.so +0 -0
  19. nexaai/binds/vlm_bind.cpython-310-darwin.so +0 -0
  20. nexaai/common.py +1 -0
  21. nexaai/cv.py +2 -1
  22. nexaai/embedder.py +4 -3
  23. nexaai/embedder_impl/mlx_embedder_impl.py +3 -1
  24. nexaai/embedder_impl/pybind_embedder_impl.py +3 -2
  25. nexaai/image_gen.py +2 -1
  26. nexaai/llm.py +5 -3
  27. nexaai/llm_impl/mlx_llm_impl.py +2 -0
  28. nexaai/llm_impl/pybind_llm_impl.py +2 -0
  29. nexaai/mlx_backend/vlm/interface.py +5 -2
  30. nexaai/rerank.py +5 -3
  31. nexaai/rerank_impl/mlx_rerank_impl.py +2 -0
  32. nexaai/rerank_impl/pybind_rerank_impl.py +109 -16
  33. nexaai/runtime_error.py +24 -0
  34. nexaai/tts.py +2 -1
  35. nexaai/utils/manifest_utils.py +10 -6
  36. nexaai/utils/model_manager.py +139 -8
  37. nexaai/vlm.py +4 -2
  38. nexaai/vlm_impl/mlx_vlm_impl.py +3 -2
  39. nexaai/vlm_impl/pybind_vlm_impl.py +33 -7
  40. {nexaai-1.0.19rc19.dist-info → nexaai-1.0.21.dist-info}/METADATA +2 -3
  41. {nexaai-1.0.19rc19.dist-info → nexaai-1.0.21.dist-info}/RECORD +43 -40
  42. {nexaai-1.0.19rc19.dist-info → nexaai-1.0.21.dist-info}/WHEEL +0 -0
  43. {nexaai-1.0.19rc19.dist-info → nexaai-1.0.21.dist-info}/top_level.txt +0 -0
nexaai/__init__.py CHANGED
@@ -24,6 +24,13 @@ from .common import ModelConfig, GenerationConfig, ChatMessage, SamplerConfig, P
24
24
  # Import logging functionality
25
25
  from .log import set_logger, get_error_message
26
26
 
27
+ # Import runtime errors
28
+ from .runtime_error import (
29
+ NexaRuntimeError,
30
+ ContextLengthExceededError,
31
+ GenerationError
32
+ )
33
+
27
34
  # Create alias for PluginID to be accessible as plugin_id
28
35
  plugin_id = PluginID
29
36
 
@@ -52,6 +59,11 @@ __all__ = [
52
59
  # Logging functionality
53
60
  "set_logger",
54
61
  "get_error_message",
62
+
63
+ # Runtime errors
64
+ "NexaRuntimeError",
65
+ "ContextLengthExceededError",
66
+ "GenerationError",
55
67
 
56
68
  "LLM",
57
69
  "Embedder",
Binary file
nexaai/_version.py CHANGED
@@ -1,4 +1,4 @@
1
1
  # This file is generated by CMake from _version.py.in
2
2
  # Do not modify this file manually - it will be overwritten
3
3
 
4
- __version__ = "1.0.19-rc19"
4
+ __version__ = "1.0.21"
nexaai/asr.py CHANGED
@@ -3,7 +3,7 @@ from abc import abstractmethod
3
3
  from dataclasses import dataclass
4
4
 
5
5
  from nexaai.base import BaseModel
6
- from nexaai.common import PluginID
6
+ from nexaai.common import PluginID, ModelConfig
7
7
 
8
8
 
9
9
  @dataclass
@@ -25,17 +25,20 @@ class ASRResult:
25
25
  class ASR(BaseModel):
26
26
  """Abstract base class for Automatic Speech Recognition models."""
27
27
 
28
- def __init__(self):
28
+ def __init__(self, m_cfg: ModelConfig = ModelConfig()):
29
29
  """Initialize base ASR class."""
30
- pass
30
+ self._m_cfg = m_cfg
31
31
 
32
32
  @classmethod
33
33
  def _load_from(cls,
34
34
  model_path: str,
35
+ model_name: Optional[str] = None,
35
36
  tokenizer_path: Optional[str] = None,
36
37
  language: Optional[str] = None,
38
+ m_cfg: ModelConfig = ModelConfig(),
37
39
  plugin_id: Union[PluginID, str] = PluginID.LLAMA_CPP,
38
- device_id: Optional[str] = None
40
+ device_id: Optional[str] = None,
41
+ **kwargs
39
42
  ) -> 'ASR':
40
43
  """Load ASR model from local path, routing to appropriate implementation."""
41
44
  # Check plugin_id value for routing - handle both enum and string
@@ -43,10 +46,11 @@ class ASR(BaseModel):
43
46
 
44
47
  if plugin_value == "mlx":
45
48
  from nexaai.asr_impl.mlx_asr_impl import MLXASRImpl
46
- return MLXASRImpl._load_from(model_path, tokenizer_path, language, plugin_id, device_id)
49
+ return MLXASRImpl._load_from(model_path, model_name, tokenizer_path, language, m_cfg, plugin_id, device_id)
47
50
  else:
48
51
  from nexaai.asr_impl.pybind_asr_impl import PyBindASRImpl
49
- return PyBindASRImpl._load_from(model_path, tokenizer_path, language, plugin_id, device_id)
52
+ return PyBindASRImpl._load_from(model_path, model_name, tokenizer_path, language, m_cfg, plugin_id, device_id)
53
+
50
54
 
51
55
  @abstractmethod
52
56
  def transcribe(
@@ -1,32 +1,78 @@
1
1
  from typing import List, Optional, Union
2
2
 
3
- from nexaai.common import PluginID
3
+ from nexaai.common import PluginID, ModelConfig
4
4
  from nexaai.asr import ASR, ASRConfig, ASRResult
5
+ from nexaai.binds import asr_bind, common_bind
6
+ from nexaai.runtime import _ensure_runtime
5
7
 
6
8
 
7
9
  class PyBindASRImpl(ASR):
8
- def __init__(self):
9
- """Initialize PyBind ASR implementation."""
10
- super().__init__()
11
- # TODO: Add PyBind-specific initialization
10
+ def __init__(self, handle: any, m_cfg: ModelConfig = ModelConfig()):
11
+ """Private constructor, should not be called directly."""
12
+ super().__init__(m_cfg)
13
+ self._handle = handle # This is a py::capsule
14
+ self._model_config = None
12
15
 
13
16
  @classmethod
14
17
  def _load_from(cls,
15
18
  model_path: str,
19
+ model_name: Optional[str] = None,
16
20
  tokenizer_path: Optional[str] = None,
17
21
  language: Optional[str] = None,
22
+ m_cfg: ModelConfig = ModelConfig(),
18
23
  plugin_id: Union[PluginID, str] = PluginID.LLAMA_CPP,
19
24
  device_id: Optional[str] = None
20
25
  ) -> 'PyBindASRImpl':
21
26
  """Load ASR model from local path using PyBind backend."""
22
- # TODO: Implement PyBind ASR loading
23
- instance = cls()
24
- return instance
27
+ _ensure_runtime()
28
+
29
+ # Create model config
30
+ config = common_bind.ModelConfig()
31
+
32
+ config.n_ctx = m_cfg.n_ctx
33
+ if m_cfg.n_threads is not None:
34
+ config.n_threads = m_cfg.n_threads
35
+ if m_cfg.n_threads_batch is not None:
36
+ config.n_threads_batch = m_cfg.n_threads_batch
37
+ if m_cfg.n_batch is not None:
38
+ config.n_batch = m_cfg.n_batch
39
+ if m_cfg.n_ubatch is not None:
40
+ config.n_ubatch = m_cfg.n_ubatch
41
+ if m_cfg.n_seq_max is not None:
42
+ config.n_seq_max = m_cfg.n_seq_max
43
+ config.n_gpu_layers = m_cfg.n_gpu_layers
44
+
45
+ # handle chat template strings
46
+ if m_cfg.chat_template_path:
47
+ config.chat_template_path = m_cfg.chat_template_path
48
+
49
+ if m_cfg.chat_template_content:
50
+ config.chat_template_content = m_cfg.chat_template_content
51
+
52
+ # Convert plugin_id to string
53
+ plugin_id_str = plugin_id.value if isinstance(plugin_id, PluginID) else str(plugin_id)
54
+
55
+ # Create ASR handle using the binding
56
+ handle = asr_bind.ml_asr_create(
57
+ model_path=model_path,
58
+ model_name=model_name,
59
+ tokenizer_path=tokenizer_path,
60
+ model_config=config,
61
+ language=language,
62
+ plugin_id=plugin_id_str,
63
+ device_id=device_id,
64
+ license_id=None, # Optional
65
+ license_key=None # Optional
66
+ )
67
+
68
+ return cls(handle, m_cfg)
25
69
 
26
70
  def eject(self):
27
- """Destroy the model and free resources."""
28
- # TODO: Implement PyBind ASR cleanup
29
- pass
71
+ """Release the model from memory."""
72
+ # py::capsule handles cleanup automatically
73
+ if hasattr(self, '_handle') and self._handle is not None:
74
+ del self._handle
75
+ self._handle = None
30
76
 
31
77
  def transcribe(
32
78
  self,
@@ -35,10 +81,47 @@ class PyBindASRImpl(ASR):
35
81
  config: Optional[ASRConfig] = None,
36
82
  ) -> ASRResult:
37
83
  """Transcribe audio file to text."""
38
- # TODO: Implement PyBind ASR transcription
39
- raise NotImplementedError("PyBind ASR transcription not yet implemented")
84
+ if self._handle is None:
85
+ raise RuntimeError("ASR model not loaded. Call _load_from first.")
86
+
87
+ # Convert ASRConfig to binding format if provided
88
+ asr_config = None
89
+ if config:
90
+ asr_config = asr_bind.ASRConfig()
91
+ asr_config.timestamps = config.timestamps
92
+ asr_config.beam_size = config.beam_size
93
+ asr_config.stream = config.stream
94
+
95
+ # Perform transcription using the binding
96
+ result_dict = asr_bind.ml_asr_transcribe(
97
+ handle=self._handle,
98
+ audio_path=audio_path,
99
+ language=language,
100
+ config=asr_config
101
+ )
102
+
103
+ # Convert result to ASRResult
104
+ transcript = result_dict.get("transcript", "")
105
+ confidence_scores = result_dict.get("confidence_scores")
106
+ timestamps = result_dict.get("timestamps")
107
+
108
+ # Convert timestamps to the expected format
109
+ timestamp_pairs = []
110
+ if timestamps:
111
+ for start, end in timestamps:
112
+ timestamp_pairs.append((float(start), float(end)))
113
+
114
+ return ASRResult(
115
+ transcript=transcript,
116
+ confidence_scores=confidence_scores or [],
117
+ timestamps=timestamp_pairs
118
+ )
40
119
 
41
120
  def list_supported_languages(self) -> List[str]:
42
121
  """List supported languages."""
43
- # TODO: Implement PyBind ASR language listing
44
- raise NotImplementedError("PyBind ASR language listing not yet implemented")
122
+ if self._handle is None:
123
+ raise RuntimeError("ASR model not loaded. Call _load_from first.")
124
+
125
+ # Get supported languages using the binding
126
+ languages = asr_bind.ml_asr_list_supported_languages(handle=self._handle)
127
+ return languages
nexaai/binds/__init__.py CHANGED
@@ -2,3 +2,5 @@ from .common_bind import *
2
2
  from .llm_bind import *
3
3
  from .embedder_bind import *
4
4
  from .vlm_bind import *
5
+ from .rerank_bind import *
6
+ from .asr_bind import *
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
nexaai/common.py CHANGED
@@ -8,6 +8,7 @@ class PluginID(str, Enum):
8
8
  MLX = "mlx"
9
9
  LLAMA_CPP = "llama_cpp"
10
10
  NEXAML = "nexaml"
11
+ NPU = "npu"
11
12
 
12
13
 
13
14
  class ChatMessage(TypedDict):
nexaai/cv.py CHANGED
@@ -73,7 +73,8 @@ class CVModel(BaseModel):
73
73
  _: str, # TODO: remove this argument, this is a hack to make api design happy
74
74
  config: CVModelConfig,
75
75
  plugin_id: Union[PluginID, str] = PluginID.LLAMA_CPP,
76
- device_id: Optional[str] = None
76
+ device_id: Optional[str] = None,
77
+ **kwargs
77
78
  ) -> 'CVModel':
78
79
  """Load CV model from configuration, routing to appropriate implementation."""
79
80
  # Check plugin_id value for routing - handle both enum and string
nexaai/embedder.py CHANGED
@@ -22,12 +22,13 @@ class Embedder(BaseModel):
22
22
  pass
23
23
 
24
24
  @classmethod
25
- def _load_from(cls, model_path: str, tokenizer_file: str = "tokenizer.json", plugin_id: Union[PluginID, str] = PluginID.LLAMA_CPP):
25
+ def _load_from(cls, model_path: str, model_name: str = None, tokenizer_file: str = "tokenizer.json", plugin_id: Union[PluginID, str] = PluginID.LLAMA_CPP, **kwargs):
26
26
  """
27
27
  Load an embedder from model files, routing to appropriate implementation.
28
28
 
29
29
  Args:
30
30
  model_path: Path to the model file
31
+ model_name: Name of the model
31
32
  tokenizer_file: Path to the tokenizer file (default: "tokenizer.json")
32
33
  plugin_id: Plugin ID to use for the model (default: PluginID.LLAMA_CPP)
33
34
 
@@ -39,10 +40,10 @@ class Embedder(BaseModel):
39
40
 
40
41
  if plugin_value == "mlx":
41
42
  from nexaai.embedder_impl.mlx_embedder_impl import MLXEmbedderImpl
42
- return MLXEmbedderImpl._load_from(model_path, tokenizer_file, plugin_id)
43
+ return MLXEmbedderImpl._load_from(model_path, model_name, tokenizer_file, plugin_id)
43
44
  else:
44
45
  from nexaai.embedder_impl.pybind_embedder_impl import PyBindEmbedderImpl
45
- return PyBindEmbedderImpl._load_from(model_path, tokenizer_file, plugin_id)
46
+ return PyBindEmbedderImpl._load_from(model_path, model_name, tokenizer_file, plugin_id)
46
47
 
47
48
  @abstractmethod
48
49
  def generate(self, texts: Union[List[str], str] = None, config: EmbeddingConfig = EmbeddingConfig(), input_ids: Union[List[int], List[List[int]]] = None) -> np.ndarray:
@@ -14,12 +14,13 @@ class MLXEmbedderImpl(Embedder):
14
14
  self._mlx_embedder = None
15
15
 
16
16
  @classmethod
17
- def _load_from(cls, model_path: str, tokenizer_file: str = "tokenizer.json", plugin_id: Union[PluginID, str] = PluginID.MLX):
17
+ def _load_from(cls, model_path: str, model_name: str = None, tokenizer_file: str = "tokenizer.json", plugin_id: Union[PluginID, str] = PluginID.MLX):
18
18
  """
19
19
  Load an embedder from model files using MLX backend.
20
20
 
21
21
  Args:
22
22
  model_path: Path to the model file
23
+ model_name: Name of the model
23
24
  tokenizer_file: Path to the tokenizer file (default: "tokenizer.json")
24
25
  plugin_id: Plugin ID to use for the model (default: PluginID.MLX)
25
26
 
@@ -34,6 +35,7 @@ class MLXEmbedderImpl(Embedder):
34
35
  # This will automatically detect if it's JinaV2 or generic model and route correctly
35
36
  instance._mlx_embedder = create_embedder(
36
37
  model_path=model_path,
38
+ # model_name=model_name, # FIXME: For MLX Embedder, model_name is not used
37
39
  tokenizer_path=tokenizer_file
38
40
  )
39
41
 
@@ -16,12 +16,13 @@ class PyBindEmbedderImpl(Embedder):
16
16
  self._handle = _handle_ptr
17
17
 
18
18
  @classmethod
19
- def _load_from(cls, model_path: str, tokenizer_file: str = "tokenizer.json", plugin_id: Union[PluginID, str] = PluginID.LLAMA_CPP):
19
+ def _load_from(cls, model_path: str, model_name: str = None, tokenizer_file: str = "tokenizer.json", plugin_id: Union[PluginID, str] = PluginID.LLAMA_CPP):
20
20
  """
21
21
  Load an embedder from model files
22
22
 
23
23
  Args:
24
24
  model_path: Path to the model file
25
+ model_name: Name of the model
25
26
  tokenizer_file: Path to the tokenizer file (default: "tokenizer.json")
26
27
  plugin_id: Plugin ID to use for the model (default: PluginID.LLAMA_CPP)
27
28
 
@@ -32,7 +33,7 @@ class PyBindEmbedderImpl(Embedder):
32
33
  # Convert enum to string for C++ binding
33
34
  plugin_id_str = plugin_id.value if isinstance(plugin_id, PluginID) else plugin_id
34
35
  # New parameter order: model_path, plugin_id, tokenizer_path (optional)
35
- handle = embedder_bind.ml_embedder_create(model_path, plugin_id_str, tokenizer_file)
36
+ handle = embedder_bind.ml_embedder_create(model_path, model_name, plugin_id_str, tokenizer_file)
36
37
  return cls(handle)
37
38
 
38
39
  def eject(self):
nexaai/image_gen.py CHANGED
@@ -71,7 +71,8 @@ class ImageGen(BaseModel):
71
71
  plugin_id: Union[PluginID, str] = PluginID.LLAMA_CPP,
72
72
  device_id: Optional[str] = None,
73
73
  float16: bool = True,
74
- quantize: bool = False
74
+ quantize: bool = False,
75
+ **kwargs
75
76
  ) -> 'ImageGen':
76
77
  """Load image generation model from local path, routing to appropriate implementation."""
77
78
  # Check plugin_id value for routing - handle both enum and string
nexaai/llm.py CHANGED
@@ -15,10 +15,12 @@ class LLM(BaseModel):
15
15
  @classmethod
16
16
  def _load_from(cls,
17
17
  local_path: str,
18
+ model_name: Optional[str] = None,
18
19
  tokenizer_path: Optional[str] = None,
19
20
  m_cfg: ModelConfig = ModelConfig(),
20
21
  plugin_id: Union[PluginID, str] = PluginID.LLAMA_CPP,
21
- device_id: Optional[str] = None
22
+ device_id: Optional[str] = None,
23
+ **kwargs
22
24
  ) -> 'LLM':
23
25
  """Load model from local path, routing to appropriate implementation."""
24
26
  # Check plugin_id value for routing - handle both enum and string
@@ -26,10 +28,10 @@ class LLM(BaseModel):
26
28
 
27
29
  if plugin_value == "mlx":
28
30
  from nexaai.llm_impl.mlx_llm_impl import MLXLLMImpl
29
- return MLXLLMImpl._load_from(local_path, tokenizer_path, m_cfg, plugin_id, device_id)
31
+ return MLXLLMImpl._load_from(local_path, model_name, tokenizer_path, m_cfg, plugin_id, device_id)
30
32
  else:
31
33
  from nexaai.llm_impl.pybind_llm_impl import PyBindLLMImpl
32
- return PyBindLLMImpl._load_from(local_path, tokenizer_path, m_cfg, plugin_id, device_id)
34
+ return PyBindLLMImpl._load_from(local_path, model_name, tokenizer_path, m_cfg, plugin_id, device_id)
33
35
 
34
36
  def cancel_generation(self):
35
37
  """Signal to cancel any ongoing stream generation."""
@@ -16,6 +16,7 @@ class MLXLLMImpl(LLM):
16
16
  @classmethod
17
17
  def _load_from(cls,
18
18
  local_path: str,
19
+ model_name: Optional[str] = None,
19
20
  tokenizer_path: Optional[str] = None,
20
21
  m_cfg: ModelConfig = ModelConfig(),
21
22
  plugin_id: Union[PluginID, str] = PluginID.MLX,
@@ -40,6 +41,7 @@ class MLXLLMImpl(LLM):
40
41
  instance = cls(m_cfg)
41
42
  instance._mlx_llm = MLXLLMInterface(
42
43
  model_path=local_path,
44
+ # model_name=model_name, # FIXME: For MLX LLM, model_name is not used
43
45
  tokenizer_path=tokenizer_path or local_path,
44
46
  config=mlx_config,
45
47
  device=device_id
@@ -19,6 +19,7 @@ class PyBindLLMImpl(LLM):
19
19
  @classmethod
20
20
  def _load_from(cls,
21
21
  local_path: str,
22
+ model_name: Optional[str] = None,
22
23
  tokenizer_path: Optional[str] = None,
23
24
  m_cfg: ModelConfig = ModelConfig(),
24
25
  plugin_id: Union[PluginID, str] = PluginID.LLAMA_CPP,
@@ -55,6 +56,7 @@ class PyBindLLMImpl(LLM):
55
56
  plugin_id_str = plugin_id.value if isinstance(plugin_id, PluginID) else plugin_id
56
57
  handle = llm_bind.ml_llm_create(
57
58
  model_path=local_path,
59
+ model_name=model_name,
58
60
  tokenizer_path=tokenizer_path,
59
61
  model_config=config,
60
62
  plugin_id=plugin_id_str,
@@ -482,8 +482,12 @@ class VLM(ProfilingMixin):
482
482
 
483
483
  def apply_chat_template(self, messages: Sequence[ChatMessage], tools: Optional[str] = None, enable_thinking: bool = True) -> str:
484
484
  """Apply chat template to messages with optional tools support."""
485
+ if self.model_name in ["qwen3vl", "qwen3vl-4b", "qwen3vl-4b-thinking", "qwen3vl-8b", "qwen3vl-8b-thinking"]:
486
+ return apply_chat_template_qwen3_vl(messages, num_images=0, num_audios=0, tools=tools, enable_thinking=enable_thinking)
487
+ if self.model_name == "qwen3vl-moe":
488
+ return apply_chat_template_qwen3_vl_moe(messages, num_images=0, num_audios=0, tools=tools, enable_thinking=enable_thinking)
489
+
485
490
  if hasattr(self.processor, "apply_chat_template"):
486
- # Convert ChatMessage objects to dictionaries for the processor
487
491
  messages_dict = [{"role": msg.role, "content": msg.content} for msg in messages]
488
492
 
489
493
  parsed_tools = None
@@ -492,7 +496,6 @@ class VLM(ProfilingMixin):
492
496
 
493
497
  result = apply_chat_template(self.processor, self.model.config, messages_dict, add_generation_prompt=True, enable_thinking=enable_thinking, tools=parsed_tools)
494
498
  return result
495
- # Fallback: join messages
496
499
  return "\n".join([f"{m.role}: {m.content}" for m in messages])
497
500
 
498
501
  def apply_chat_template_with_media(self, messages: Sequence[ChatMessage], num_images: int = 0, num_audios: int = 0, tools: Optional[str] = None, enable_thinking: bool = True) -> str:
nexaai/rerank.py CHANGED
@@ -24,9 +24,11 @@ class Reranker(BaseModel):
24
24
  @classmethod
25
25
  def _load_from(cls,
26
26
  model_path: str,
27
+ model_name: str = None,
27
28
  tokenizer_file: str = "tokenizer.json",
28
29
  plugin_id: Union[PluginID, str] = PluginID.LLAMA_CPP,
29
- device_id: Optional[str] = None
30
+ device_id: Optional[str] = None,
31
+ **kwargs
30
32
  ) -> 'Reranker':
31
33
  """Load reranker model from local path, routing to appropriate implementation."""
32
34
  # Check plugin_id value for routing - handle both enum and string
@@ -34,10 +36,10 @@ class Reranker(BaseModel):
34
36
 
35
37
  if plugin_value == "mlx":
36
38
  from nexaai.rerank_impl.mlx_rerank_impl import MLXRerankImpl
37
- return MLXRerankImpl._load_from(model_path, tokenizer_file, plugin_id, device_id)
39
+ return MLXRerankImpl._load_from(model_path, model_name, tokenizer_file, plugin_id, device_id)
38
40
  else:
39
41
  from nexaai.rerank_impl.pybind_rerank_impl import PyBindRerankImpl
40
- return PyBindRerankImpl._load_from(model_path, tokenizer_file, plugin_id, device_id)
42
+ return PyBindRerankImpl._load_from(model_path, model_name, tokenizer_file, plugin_id, device_id)
41
43
 
42
44
  @abstractmethod
43
45
  def load_model(self, model_path: str, extra_data: Optional[str] = None) -> bool:
@@ -17,6 +17,7 @@ class MLXRerankImpl(Reranker):
17
17
  @classmethod
18
18
  def _load_from(cls,
19
19
  model_path: str,
20
+ model_name: str = None,
20
21
  tokenizer_file: str = "tokenizer.json",
21
22
  plugin_id: Union[PluginID, str] = PluginID.MLX,
22
23
  device_id: Optional[str] = None
@@ -29,6 +30,7 @@ class MLXRerankImpl(Reranker):
29
30
  instance = cls()
30
31
  instance._mlx_reranker = create_reranker(
31
32
  model_path=model_path,
33
+ # model_name=model_name, # FIXME: For MLX Reranker, model_name is not used
32
34
  tokenizer_path=tokenizer_file,
33
35
  device=device_id
34
36
  )
@@ -1,36 +1,89 @@
1
1
  from typing import List, Optional, Sequence, Union
2
+ import numpy as np
2
3
 
3
4
  from nexaai.common import PluginID
4
5
  from nexaai.rerank import Reranker, RerankConfig
6
+ from nexaai.binds import rerank_bind, common_bind
7
+ from nexaai.runtime import _ensure_runtime
5
8
 
6
9
 
7
10
  class PyBindRerankImpl(Reranker):
8
- def __init__(self):
9
- """Initialize PyBind Rerank implementation."""
11
+ def __init__(self, _handle_ptr):
12
+ """
13
+ Internal initializer
14
+
15
+ Args:
16
+ _handle_ptr: Capsule handle to the C++ reranker object
17
+ """
10
18
  super().__init__()
11
- # TODO: Add PyBind-specific initialization
19
+ self._handle = _handle_ptr
12
20
 
13
21
  @classmethod
14
22
  def _load_from(cls,
15
23
  model_path: str,
24
+ model_name: str = None,
16
25
  tokenizer_file: str = "tokenizer.json",
17
26
  plugin_id: Union[PluginID, str] = PluginID.LLAMA_CPP,
18
27
  device_id: Optional[str] = None
19
28
  ) -> 'PyBindRerankImpl':
20
- """Load reranker model from local path using PyBind backend."""
21
- # TODO: Implement PyBind reranker loading
22
- instance = cls()
23
- return instance
29
+ """
30
+ Load reranker model from local path using PyBind backend.
31
+
32
+ Args:
33
+ model_path: Path to the model file
34
+ model_name: Name of the model (optional)
35
+ tokenizer_file: Path to the tokenizer file (default: "tokenizer.json")
36
+ plugin_id: Plugin ID to use for the model (default: PluginID.LLAMA_CPP)
37
+ device_id: Device ID to use for the model (optional)
38
+
39
+ Returns:
40
+ PyBindRerankImpl instance
41
+ """
42
+ _ensure_runtime()
43
+
44
+ # Convert enum to string for C++ binding
45
+ plugin_id_str = plugin_id.value if isinstance(plugin_id, PluginID) else plugin_id
46
+
47
+ # Create model config
48
+ model_config = common_bind.ModelConfig()
49
+
50
+ # Create reranker handle with new API signature
51
+ handle = rerank_bind.ml_reranker_create(
52
+ model_path,
53
+ model_name,
54
+ tokenizer_file,
55
+ model_config,
56
+ plugin_id_str,
57
+ device_id
58
+ )
59
+
60
+ return cls(handle)
24
61
 
25
62
  def eject(self):
26
- """Destroy the model and free resources."""
27
- # TODO: Implement PyBind reranker cleanup
28
- pass
63
+ """
64
+ Clean up resources and destroy the reranker
65
+ """
66
+ # Destructor of the handle will unload the model correctly
67
+ if hasattr(self, '_handle') and self._handle is not None:
68
+ del self._handle
69
+ self._handle = None
29
70
 
30
71
  def load_model(self, model_path: str, extra_data: Optional[str] = None) -> bool:
31
- """Load model from path."""
32
- # TODO: Implement PyBind reranker model loading
33
- raise NotImplementedError("PyBind reranker model loading not yet implemented")
72
+ """
73
+ Load model from path.
74
+
75
+ Note: This method is not typically used directly. Use _load_from instead.
76
+
77
+ Args:
78
+ model_path: Path to the model file
79
+ extra_data: Additional data (unused)
80
+
81
+ Returns:
82
+ True if successful
83
+ """
84
+ # This method is part of the BaseModel interface but typically not used
85
+ # directly for PyBind implementations since _load_from handles creation
86
+ raise NotImplementedError("Use _load_from class method to load models")
34
87
 
35
88
  def rerank(
36
89
  self,
@@ -38,6 +91,46 @@ class PyBindRerankImpl(Reranker):
38
91
  documents: Sequence[str],
39
92
  config: Optional[RerankConfig] = None,
40
93
  ) -> List[float]:
41
- """Rerank documents given a query."""
42
- # TODO: Implement PyBind reranking
43
- raise NotImplementedError("PyBind reranking not yet implemented")
94
+ """
95
+ Rerank documents given a query.
96
+
97
+ Args:
98
+ query: Query text as UTF-8 string
99
+ documents: List of document texts to rerank
100
+ config: Optional reranking configuration
101
+
102
+ Returns:
103
+ List of ranking scores (one per document)
104
+ """
105
+ if self._handle is None:
106
+ raise RuntimeError("Reranker handle is None. Model may have been ejected.")
107
+
108
+ # Use default config if not provided
109
+ if config is None:
110
+ config = RerankConfig()
111
+
112
+ # Create bind config
113
+ bind_config = rerank_bind.RerankConfig()
114
+ bind_config.batch_size = config.batch_size
115
+ bind_config.normalize = config.normalize
116
+ bind_config.normalize_method = config.normalize_method
117
+
118
+ # Convert documents to list if needed
119
+ documents_list = list(documents)
120
+
121
+ # Call the binding which returns a dict with scores and profile_data
122
+ result = rerank_bind.ml_reranker_rerank(
123
+ self._handle,
124
+ query,
125
+ documents_list,
126
+ bind_config
127
+ )
128
+
129
+ # Extract scores from result dict
130
+ scores_array = result.get("scores", np.array([]))
131
+
132
+ # Convert numpy array to list of floats
133
+ if isinstance(scores_array, np.ndarray):
134
+ return scores_array.tolist()
135
+ else:
136
+ return []
@@ -0,0 +1,24 @@
1
+ """Runtime errors for Nexa SDK operations."""
2
+
3
+
4
+ class NexaRuntimeError(Exception):
5
+ """Base class for Nexa runtime errors."""
6
+
7
+ def __init__(self, message: str, error_code: int = None):
8
+ self.error_code = error_code
9
+ super().__init__(message)
10
+
11
+
12
+ class ContextLengthExceededError(NexaRuntimeError):
13
+ """Raised when the input context length exceeds the model's maximum."""
14
+
15
+ def __init__(self, message: str = "Input context length exceeded model's maximum", error_code: int = None):
16
+ super().__init__(message, error_code)
17
+
18
+
19
+ class GenerationError(NexaRuntimeError):
20
+ """Raised when generation fails."""
21
+
22
+ def __init__(self, message: str = "Generation failed", error_code: int = None):
23
+ super().__init__(message, error_code)
24
+
nexaai/tts.py CHANGED
@@ -45,7 +45,8 @@ class TTS(BaseModel):
45
45
  model_path: str,
46
46
  vocoder_path: str,
47
47
  plugin_id: Union[PluginID, str] = PluginID.LLAMA_CPP,
48
- device_id: Optional[str] = None
48
+ device_id: Optional[str] = None,
49
+ **kwargs
49
50
  ) -> 'TTS':
50
51
  """Load TTS model from local path, routing to appropriate implementation."""
51
52
  # Check plugin_id value for routing - handle both enum and string
@@ -157,12 +157,16 @@ def create_gguf_manifest(repo_id: str, files: List[str], directory_path: str, ol
157
157
  # Use the new enum-based quantization extraction
158
158
  quantization_type = extract_quantization_from_filename(current_file_name)
159
159
  quant_level = quantization_type.value if quantization_type else "UNKNOWN"
160
-
161
- model_files[quant_level] = {
162
- "Name": current_file_name,
163
- "Downloaded": True,
164
- "Size": file_size
165
- }
160
+
161
+ # FIXME: hardcode to handle the multiple mmproj files problem
162
+ if quant_level == "UNKNOWN" and "mmproj" in current_file_name.lower():
163
+ pass
164
+ else:
165
+ model_files[quant_level] = {
166
+ "Name": current_file_name,
167
+ "Downloaded": True,
168
+ "Size": file_size
169
+ }
166
170
 
167
171
  # Determine PluginId with priority: kwargs > downloaded_manifest > model_file_type > default
168
172
  plugin_id = kwargs.get('plugin_id')
@@ -410,6 +410,20 @@ def _remove_specific_file(target_model: DownloadedModel, file_name: str, local_d
410
410
  except OSError:
411
411
  file_size = 0
412
412
 
413
+ # Check if we should remove entire folder instead (for .gguf files)
414
+ # If removing a .gguf file and no other non-mmproj .gguf files remain, remove entire folder
415
+ if file_name.endswith('.gguf'):
416
+ updated_files = [f for f in target_model.files if f != file_name]
417
+ # Find remaining .gguf files that don't contain "mmproj" in filename
418
+ remaining_non_mmproj_gguf = [
419
+ f for f in updated_files
420
+ if f.endswith('.gguf') and 'mmproj' not in f.lower()
421
+ ]
422
+
423
+ # If no non-mmproj .gguf files remain, remove entire repository
424
+ if len(remaining_non_mmproj_gguf) == 0:
425
+ return _remove_entire_repository(target_model, local_dir)
426
+
413
427
  # Remove the file
414
428
  try:
415
429
  os.remove(file_path)
@@ -846,6 +860,41 @@ class HuggingFaceDownloader:
846
860
  pass
847
861
  return {}
848
862
 
863
+ def _download_manifest_if_needed(self, repo_id: str, local_dir: str) -> bool:
864
+ """
865
+ Download nexa.manifest from the repository if it doesn't exist locally.
866
+
867
+ Args:
868
+ repo_id: Repository ID
869
+ local_dir: Local directory where the manifest should be saved
870
+
871
+ Returns:
872
+ bool: True if manifest was downloaded or already exists, False if not found in repo
873
+ """
874
+ manifest_path = os.path.join(local_dir, 'nexa.manifest')
875
+
876
+ # Check if manifest already exists locally
877
+ if os.path.exists(manifest_path):
878
+ return True
879
+
880
+ # Try to download nexa.manifest from the repository
881
+ try:
882
+ print(f"[INFO] Attempting to download nexa.manifest from {repo_id}...")
883
+ self.api.hf_hub_download(
884
+ repo_id=repo_id,
885
+ filename='nexa.manifest',
886
+ local_dir=local_dir,
887
+ local_dir_use_symlinks=False,
888
+ token=self.token,
889
+ force_download=False
890
+ )
891
+ print(f"[OK] Successfully downloaded nexa.manifest from {repo_id}")
892
+ return True
893
+ except Exception as e:
894
+ # Manifest doesn't exist in repo or other error - this is fine, we'll create it
895
+ print(f"[INFO] nexa.manifest not found in {repo_id}, will create locally")
896
+ return False
897
+
849
898
  def _fetch_and_save_metadata(self, repo_id: str, local_dir: str, is_mmproj: bool = False, file_name: Optional[Union[str, List[str]]] = None, **kwargs) -> None:
850
899
  """Fetch model info and save metadata after successful download."""
851
900
  # Initialize metadata with defaults to ensure manifest is always created
@@ -946,6 +995,9 @@ class HuggingFaceDownloader:
946
995
  if progress_tracker:
947
996
  progress_tracker.stop_tracking()
948
997
 
998
+ # Download nexa.manifest from repo if it doesn't exist locally
999
+ self._download_manifest_if_needed(repo_id, file_local_dir)
1000
+
949
1001
  # Save metadata after successful download
950
1002
  self._fetch_and_save_metadata(repo_id, file_local_dir, self._current_is_mmproj, self._current_file_name, **kwargs)
951
1003
 
@@ -1055,6 +1107,9 @@ class HuggingFaceDownloader:
1055
1107
  if progress_tracker:
1056
1108
  progress_tracker.stop_tracking()
1057
1109
 
1110
+ # Download nexa.manifest from repo if it doesn't exist locally
1111
+ self._download_manifest_if_needed(repo_id, repo_local_dir)
1112
+
1058
1113
  # Save metadata after successful download
1059
1114
  self._fetch_and_save_metadata(repo_id, repo_local_dir, self._current_is_mmproj, self._current_file_name, **kwargs)
1060
1115
 
@@ -1289,7 +1344,7 @@ def _download_model_if_needed(
1289
1344
  token: Union[bool, str, None] = None,
1290
1345
  is_mmproj: bool = False,
1291
1346
  **kwargs
1292
- ) -> str:
1347
+ ) -> tuple[str, Optional[str], Optional[str]]:
1293
1348
  """
1294
1349
  Helper function to download a model from HuggingFace if it doesn't exist locally.
1295
1350
 
@@ -1300,15 +1355,78 @@ def _download_model_if_needed(
1300
1355
  token: HuggingFace authentication token for private repositories
1301
1356
 
1302
1357
  Returns:
1303
- str: Local path to the model (either existing or downloaded)
1358
+ tuple[str, Optional[str], Optional[str]]: Tuple of (local_path, model_name, plugin_id)
1359
+ - local_path: Local path to the model (either existing or downloaded)
1360
+ - model_name: ModelName from nexa.manifest if available, None otherwise
1361
+ - plugin_id: PluginId from nexa.manifest if available, None otherwise
1304
1362
 
1305
1363
  Raises:
1306
1364
  RuntimeError: If download fails
1307
1365
  """
1366
+ # Helper function to extract model info from manifest
1367
+ def _extract_info_from_manifest(path: str) -> tuple[Optional[str], Optional[str], Optional[dict]]:
1368
+ """Extract ModelName, PluginId, and full manifest from nexa.manifest if it exists."""
1369
+ # If path is a file, check its parent directory for manifest
1370
+ if os.path.isfile(path):
1371
+ manifest_dir = os.path.dirname(path)
1372
+ else:
1373
+ manifest_dir = path
1374
+
1375
+ manifest_path = os.path.join(manifest_dir, 'nexa.manifest')
1376
+ if not os.path.exists(manifest_path):
1377
+ return None, None, None
1378
+
1379
+ try:
1380
+ with open(manifest_path, 'r', encoding='utf-8') as f:
1381
+ manifest = json.load(f)
1382
+ return manifest.get('ModelName'), manifest.get('PluginId'), manifest
1383
+ except (json.JSONDecodeError, IOError):
1384
+ return None, None, None
1385
+
1386
+ # Helper function to get a model file path from manifest
1387
+ # Note: Tnis is for NPU only, because when downloading, it is a directory; when passing local path to inference, it needs to be a file.
1388
+ def _get_model_file_from_manifest(manifest: dict, base_dir: str) -> Optional[str]:
1389
+ """Extract a model file path from manifest's ModelFile section."""
1390
+ if not manifest or 'ModelFile' not in manifest:
1391
+ return None
1392
+
1393
+ model_files = manifest['ModelFile']
1394
+ # Find the first valid model file (skip N/A entries and metadata files)
1395
+ for key, file_info in model_files.items():
1396
+ if key == 'N/A':
1397
+ continue
1398
+ if isinstance(file_info, dict) and 'Name' in file_info:
1399
+ file_name = file_info['Name']
1400
+ # Skip common non-model files
1401
+ if file_name and not file_name.startswith('.') and file_name.endswith('.nexa'):
1402
+ file_path = os.path.join(base_dir, file_name)
1403
+ if os.path.exists(file_path):
1404
+ return file_path
1405
+
1406
+ # If no .nexa files found, try ExtraFiles for .nexa files
1407
+ if 'ExtraFiles' in manifest:
1408
+ for file_info in manifest['ExtraFiles']:
1409
+ if isinstance(file_info, dict) and 'Name' in file_info:
1410
+ file_name = file_info['Name']
1411
+ if file_name and file_name.endswith('.nexa') and not file_name.startswith('.cache'):
1412
+ file_path = os.path.join(base_dir, file_name)
1413
+ if os.path.exists(file_path):
1414
+ return file_path
1415
+
1416
+ return None
1417
+
1308
1418
  # Check if model_path exists locally (file or directory)
1309
1419
  if os.path.exists(model_path):
1310
- # Local path exists, return as-is
1311
- return model_path
1420
+ # Local path exists, try to extract model info
1421
+ model_name, plugin_id, manifest = _extract_info_from_manifest(model_path)
1422
+
1423
+ # If PluginId is "npu" and path is a directory, convert to file path
1424
+ if plugin_id == "npu" and os.path.isdir(model_path):
1425
+ model_file_path = _get_model_file_from_manifest(manifest, model_path)
1426
+ if model_file_path:
1427
+ model_path = model_file_path
1428
+
1429
+ return model_path, model_name, plugin_id
1312
1430
 
1313
1431
  # Model path doesn't exist locally, try to download from HuggingFace
1314
1432
  try:
@@ -1328,7 +1446,16 @@ def _download_model_if_needed(
1328
1446
  **kwargs
1329
1447
  )
1330
1448
 
1331
- return downloaded_path
1449
+ # Extract model info from the downloaded manifest
1450
+ model_name, plugin_id, manifest = _extract_info_from_manifest(downloaded_path)
1451
+
1452
+ # If PluginId is "npu" and path is a directory, convert to file path
1453
+ if plugin_id == "npu" and os.path.isdir(downloaded_path):
1454
+ model_file_path = _get_model_file_from_manifest(manifest, downloaded_path)
1455
+ if model_file_path:
1456
+ downloaded_path = model_file_path
1457
+
1458
+ return downloaded_path, model_name, plugin_id
1332
1459
 
1333
1460
  except Exception as e:
1334
1461
  # Only handle download-related errors
@@ -1397,7 +1524,7 @@ def auto_download_model(func: Callable) -> Callable:
1397
1524
  # Download name_or_path if needed
1398
1525
  if name_or_path is not None:
1399
1526
  try:
1400
- downloaded_name_path = _download_model_if_needed(
1527
+ downloaded_name_path, model_name, plugin_id = _download_model_if_needed(
1401
1528
  name_or_path, 'name_or_path', progress_callback, token, **kwargs
1402
1529
  )
1403
1530
 
@@ -1408,6 +1535,10 @@ def auto_download_model(func: Callable) -> Callable:
1408
1535
  args = tuple(args_list)
1409
1536
  else:
1410
1537
  kwargs['name_or_path'] = downloaded_name_path
1538
+
1539
+ # Add model_name to kwargs if it exists and not already set
1540
+ if model_name is not None and 'model_name' not in kwargs:
1541
+ kwargs['model_name'] = model_name
1411
1542
 
1412
1543
  except Exception as e:
1413
1544
  raise e # Re-raise the error from _download_model_if_needed
@@ -1415,7 +1546,7 @@ def auto_download_model(func: Callable) -> Callable:
1415
1546
  # Download mmproj_path if needed
1416
1547
  if mmproj_path is not None:
1417
1548
  try:
1418
- downloaded_mmproj_path = _download_model_if_needed(
1549
+ downloaded_mmproj_path, _, _ = _download_model_if_needed(
1419
1550
  mmproj_path, 'mmproj_path', progress_callback, token, is_mmproj=True, **kwargs
1420
1551
  )
1421
1552
 
@@ -1427,5 +1558,5 @@ def auto_download_model(func: Callable) -> Callable:
1427
1558
 
1428
1559
  # Call original function with updated paths (outside try-catch to let model creation errors bubble up)
1429
1560
  return func(*args, **kwargs)
1430
-
1561
+
1431
1562
  return wrapper
nexaai/vlm.py CHANGED
@@ -22,7 +22,8 @@ class VLM(BaseModel):
22
22
  model_name: Optional[str] = None,
23
23
  m_cfg: ModelConfig = ModelConfig(),
24
24
  plugin_id: Union[PluginID, str] = PluginID.LLAMA_CPP,
25
- device_id: Optional[str] = None
25
+ device_id: Optional[str] = None,
26
+ **kwargs
26
27
  ) -> 'VLM':
27
28
  """Load VLM model from local path, routing to appropriate implementation.
28
29
 
@@ -99,7 +100,8 @@ class VLM(BaseModel):
99
100
  def apply_chat_template(
100
101
  self,
101
102
  messages: List[MultiModalMessage],
102
- tools: Optional[List[Dict[str, Any]]] = None
103
+ tools: Optional[List[Dict[str, Any]]] = None,
104
+ enable_thinking: bool = True
103
105
  ) -> str:
104
106
  """Apply the chat template to multimodal messages."""
105
107
  pass
@@ -72,7 +72,8 @@ class MlxVlmImpl(VLM):
72
72
  def apply_chat_template(
73
73
  self,
74
74
  messages: List[MultiModalMessage],
75
- tools: Optional[List[Dict[str, Any]]] = None
75
+ tools: Optional[List[Dict[str, Any]]] = None,
76
+ enable_thinking: bool = True
76
77
  ) -> str:
77
78
  """Apply the chat template to multimodal messages."""
78
79
  if not self._mlx_vlm:
@@ -116,7 +117,7 @@ class MlxVlmImpl(VLM):
116
117
  num_images=total_images,
117
118
  num_audios=total_audios,
118
119
  tools=tools,
119
- enable_thinking=False # Default to False, could be made configurable
120
+ enable_thinking=enable_thinking
120
121
  )
121
122
  else:
122
123
  # Use regular apply_chat_template for text-only messages
@@ -8,6 +8,11 @@ from nexaai.binds import vlm_bind, common_bind
8
8
  from nexaai.runtime import _ensure_runtime
9
9
  from nexaai.vlm import VLM
10
10
  from nexaai.base import ProfilingData
11
+ from nexaai.runtime_error import ContextLengthExceededError, GenerationError
12
+
13
+ # Error codes from ml.h
14
+ ML_SUCCESS = 0
15
+ ML_ERROR_LLM_TOKENIZATION_CONTEXT_LENGTH = -200004
11
16
 
12
17
 
13
18
  class PyBindVLMImpl(VLM):
@@ -68,7 +73,7 @@ class PyBindVLMImpl(VLM):
68
73
  handle = vlm_bind.create_vlm(
69
74
  model_path=local_path,
70
75
  mmproj_path=mmproj_path,
71
- # model_name=model_name, # TODO: enable model_name in pybind later
76
+ model_name=model_name,
72
77
  model_config=config,
73
78
  plugin_id=plugin_id_str,
74
79
  device_id=device_id
@@ -91,7 +96,8 @@ class PyBindVLMImpl(VLM):
91
96
  def apply_chat_template(
92
97
  self,
93
98
  messages: List[MultiModalMessage],
94
- tools: Optional[List[Dict[str, Any]]] = None
99
+ tools: Optional[List[Dict[str, Any]]] = None,
100
+ enable_thinking: bool = True
95
101
  ) -> str:
96
102
  """Apply the chat template to multimodal messages."""
97
103
  payload = []
@@ -103,15 +109,14 @@ class PyBindVLMImpl(VLM):
103
109
  t = c["type"]
104
110
  if t == "text":
105
111
  blocks.append({"type": "text", "text": c.get("text","") or ""})
106
- elif t == "image":
107
- # Pass through the original structure - let vlm-bind.cpp handle field extraction
108
- blocks.append(c)
109
112
  else:
110
- raise ValueError(f"Unsupported content type: {t}. Use 'text' or 'image' to match the golden reference in vlm.cpp")
113
+ # Pass through the original structure for image, audio, and any other types
114
+ # Let vlm-bind.cpp handle field extraction (text/url/path)
115
+ blocks.append(c)
111
116
 
112
117
  payload.append({"role": role, "content": blocks})
113
118
 
114
- result = vlm_bind.ml_vlm_apply_chat_template(self._handle, payload, tools)
119
+ result = vlm_bind.ml_vlm_apply_chat_template(self._handle, payload, tools, enable_thinking)
115
120
  return result
116
121
 
117
122
  def generate_stream(self, prompt: str, g_cfg: GenerationConfig = GenerationConfig()) -> Generator[str, None, None]:
@@ -143,6 +148,18 @@ class PyBindVLMImpl(VLM):
143
148
  on_token=on_token,
144
149
  user_data=None
145
150
  )
151
+
152
+ # Check for errors in result
153
+ error_code = result.get("error_code", ML_SUCCESS)
154
+ if error_code != ML_SUCCESS:
155
+ error_message = result.get("error_message", "Unknown error")
156
+ if error_code == ML_ERROR_LLM_TOKENIZATION_CONTEXT_LENGTH:
157
+ exception_container[0] = ContextLengthExceededError(error_message, error_code)
158
+ else:
159
+ exception_container[0] = GenerationError(error_message, error_code)
160
+ token_queue.put(('end', None))
161
+ return
162
+
146
163
  self._profiling_data = ProfilingData.from_dict(result.get("profile_data", {}))
147
164
  except Exception as e:
148
165
  exception_container[0] = e
@@ -186,6 +203,15 @@ class PyBindVLMImpl(VLM):
186
203
  user_data=None
187
204
  )
188
205
 
206
+ # Check for errors in result
207
+ error_code = result.get("error_code", ML_SUCCESS)
208
+ if error_code != ML_SUCCESS:
209
+ error_message = result.get("error_message", "Unknown error")
210
+ if error_code == ML_ERROR_LLM_TOKENIZATION_CONTEXT_LENGTH:
211
+ raise ContextLengthExceededError(error_message, error_code)
212
+ else:
213
+ raise GenerationError(error_message, error_code)
214
+
189
215
  self._profiling_data = ProfilingData.from_dict(result.get("profile_data", {}))
190
216
  return result.get("text", "")
191
217
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: nexaai
3
- Version: 1.0.19rc19
3
+ Version: 1.0.21
4
4
  Summary: Python bindings for NexaSDK C-lib backend
5
5
  Author-email: "Nexa AI, Inc." <dev@nexa.ai>
6
6
  Project-URL: Homepage, https://github.com/NexaAI/nexasdk-bridge
@@ -14,12 +14,11 @@ Requires-Python: >=3.7
14
14
  Description-Content-Type: text/markdown
15
15
  Requires-Dist: huggingface_hub
16
16
  Requires-Dist: tqdm
17
- Requires-Dist: hf_xet
18
17
  Requires-Dist: numpy
19
18
  Requires-Dist: httpx
20
19
  Provides-Extra: mlx
21
20
  Requires-Dist: mlx; extra == "mlx"
22
- Requires-Dist: mlx-lm; extra == "mlx"
21
+ Requires-Dist: mlx-lm==0.27.0; extra == "mlx"
23
22
  Requires-Dist: mlx-vlm; extra == "mlx"
24
23
  Requires-Dist: mlx-embeddings; extra == "mlx"
25
24
  Requires-Dist: tokenizers; extra == "mlx"
@@ -1,35 +1,38 @@
1
- nexaai/__init__.py,sha256=L8oB7GFZZMGnUpCg0PecDbI_ycKuQak-ZEJ4Y12_QIw,2184
2
- nexaai/_stub.cpython-310-darwin.so,sha256=fhGvLrWdd7Lb4h9i_s3x3cud95dAqxuHrSteZIR-UVc,66768
3
- nexaai/_version.py,sha256=HOqTOpd7Lb8gdF93CQYr-x-Z_1O0X0vCrAAHaWQlI4I,144
4
- nexaai/asr.py,sha256=NljMXDErwPNMOPaRkJZMEDka9Nk8xyur7L8i924TStY,2054
1
+ nexaai/__init__.py,sha256=gOd7sNsqEESopw_24xgnOSkIRENrk4Fa-RMtmVv62eA,2421
2
+ nexaai/_stub.cpython-310-darwin.so,sha256=M4mXeGDU9SgBUtKngvJN0AreCFvbRTAhVn0N5zQj63g,66768
3
+ nexaai/_version.py,sha256=nWa8LYSocqThPKZF7GPMpRrb1TPnqOI4BR2IoL05toU,139
4
+ nexaai/asr.py,sha256=wqtq71cxIMGE4KvOIYZebHdWik8dy4LyKrDI98PDvzQ,2294
5
5
  nexaai/base.py,sha256=N8PRgDFA-XPku2vWnQIofQ7ipz3pPlO6f8YZGnuhquE,982
6
- nexaai/common.py,sha256=Y0NJNLTi4Nq4x1WL6PQsSvGUto0eGmWhjpsC6jcekfA,3444
7
- nexaai/cv.py,sha256=RHCDo8gvBH8BkGZx7qVyp-OKxqi7E1GG9XzyaXehCNA,3273
8
- nexaai/embedder.py,sha256=Cw0tSHkPgd-RI62afCqQAcTHMnQhaI2CvfTMO-1JKOg,2452
9
- nexaai/image_gen.py,sha256=0C_5Tjj4BYmxLbmMmvwajp-yy2mmEEOKwBFnDQNPzx4,4356
10
- nexaai/llm.py,sha256=S1o_k2VQoF5w2wO25f142OO1R75TP89Ii69VZv8pIGo,3567
6
+ nexaai/common.py,sha256=MRWZ6a7pnci_OUHxZRm3YqgKLAtZFD7b88STYDfeIF8,3460
7
+ nexaai/cv.py,sha256=gpE3F__6bjh8OQKNJZs-QrBuCxqMj2eH-u6HR90vGZE,3302
8
+ nexaai/embedder.py,sha256=lXOT16PEvd_hT23d77dZH38VHNOAk-3JvoOUdQTEaGI,2552
9
+ nexaai/image_gen.py,sha256=MkGw1HXqqv8cJzbiGERNPKFXfq9vMOlvuq0pgekXw68,4385
10
+ nexaai/llm.py,sha256=-agVJuj0FOaDvDiT-fFSOpoyVt-MpNudBucsod3Vp1M,3673
11
11
  nexaai/log.py,sha256=Kwo2CIfWN6iP4M4F5EUIV8KIO5hAsvz6HZAaOwJ27Og,2628
12
- nexaai/rerank.py,sha256=vWaBucoQ1wz-2iYnZqyFIcEjm-4Xcs1KDbFN5X8zzDQ,1872
12
+ nexaai/rerank.py,sha256=rFKm1Y_ou__0lU82OTy4j_AYIGVBGfID0gzuZ6zXYsM,1968
13
13
  nexaai/runtime.py,sha256=JvllhlNPgYGLbgGyX2yNvmGzT0lZ5XbvTvEo8sZG_Ho,2067
14
- nexaai/tts.py,sha256=ZnBpWUxIfHhh7KfEjddtH7hHOTa91zg7ogGLakMIALo,2167
15
- nexaai/vlm.py,sha256=OCxwML-Z5uVGp3fjzJVtbCxfTLpgxkhQ8Wo6MVysoiw,4733
14
+ nexaai/runtime_error.py,sha256=sO87LyCA0qzm0hVqBrmG2FDzGQH865EMbTMop2OfZto,779
15
+ nexaai/tts.py,sha256=jvgDZIyo47NBDny6z74IQT2SDDVo7Mpp-QZwl6YxARU,2196
16
+ nexaai/vlm.py,sha256=LUrd1_SGHOsYpWyUymX93oEIsNJv7XzHIHo4hBZOhQA,4800
16
17
  nexaai/asr_impl/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
17
18
  nexaai/asr_impl/mlx_asr_impl.py,sha256=eosd8-TIWAOwV0HltmoFrLwzXHcU4jyxtncvuZE9pgA,3257
18
- nexaai/asr_impl/pybind_asr_impl.py,sha256=pE9Hb_hMi5yAc4MF83bLVOb8zDtreCkB3_u7XED9YpA,1516
19
- nexaai/binds/__init__.py,sha256=eYuay_8DDXeOUWz2_R9HFSabohxs6hvZn391t2L0Po0,104
19
+ nexaai/asr_impl/pybind_asr_impl.py,sha256=FLOWIph37q_nIiNx8xYi-VnhQ6CrPuc4HFAJZQKc42w,4680
20
+ nexaai/binds/__init__.py,sha256=2-Rr0NwyWygqwS8Xlxq0BJ2ltyID-WbGuzEYNlSanCI,155
21
+ nexaai/binds/asr_bind.cpython-310-darwin.so,sha256=nr1BqGYRCooj2k5khaIvGG06_GrzudMXa9xa15UQ-88,200768
20
22
  nexaai/binds/common_bind.cpython-310-darwin.so,sha256=zxJuD0nSV--VZKxBfWZUavU7_bHj_JTi0FhkjvG4VJw,235264
21
- nexaai/binds/embedder_bind.cpython-310-darwin.so,sha256=tPa0c0Dv_GiW66fgmAGWGCHXRGNApznqoQS0eQx9GFM,202064
22
- nexaai/binds/libnexa_bridge.dylib,sha256=8oYPIjtIf6VBG30Id9x6K9D9irfYZBFHxvW-8iZ5rbQ,272648
23
- nexaai/binds/llm_bind.cpython-310-darwin.so,sha256=TAWfa1Hzq00TjtC1xVsiAeLp6hv2LrL5afDz4omUghc,182784
24
- nexaai/binds/vlm_bind.cpython-310-darwin.so,sha256=nd6eG_m2EiPthzkSZ97hlXWUOZQir4cQfFJZ4p6eR2U,182704
23
+ nexaai/binds/embedder_bind.cpython-310-darwin.so,sha256=nn3kINQUNyXIzmcU1olLg8RlkZYyIs_RtIlCk6OH1ds,202064
24
+ nexaai/binds/libnexa_bridge.dylib,sha256=zPuFEOGOJkSlv5Uw2JffzN5IpRb3ANW7GIKaZ9TGxso,291224
25
+ nexaai/binds/llm_bind.cpython-310-darwin.so,sha256=YShsXbe_n2N05joMmTnZXaXh9gM9LGdcmuWUxUc5plI,182784
26
+ nexaai/binds/rerank_bind.cpython-310-darwin.so,sha256=GJmffOLo9A48S_pMG2CtHyhbamtt97QikSSCXk3LqiM,183920
27
+ nexaai/binds/vlm_bind.cpython-310-darwin.so,sha256=JEoQbyOkMXVK4WyBCqKBHbyXHtTiTWT18UkZMFvMh4k,182704
25
28
  nexaai/binds/cpu_gpu/libggml-base.dylib,sha256=9urbatWTB4QJ6uariOYlB_ZN1FFci5GaD8OfGZ9oPSQ,650736
26
29
  nexaai/binds/cpu_gpu/libggml-cpu.so,sha256=qiYxbTe4Nt7n36zJVvq3zovgSZEmrN2is6gzTern7UI,677728
27
30
  nexaai/binds/cpu_gpu/libggml-metal.so,sha256=zfaX7rIBYQazH2lf-vza007BMhPTK1ASd2T0HLLIA4E,673104
28
31
  nexaai/binds/cpu_gpu/libggml.dylib,sha256=aOTj_6RrAMkfDO0ZI28_3nfcC-l4Y3dRCiS3C0d0_eI,58592
29
32
  nexaai/binds/cpu_gpu/libmtmd.dylib,sha256=yfdSBV5McPAwGs9bRFHs9xSItFLEqc1NAnXBGBjumRU,605248
30
33
  nexaai/binds/cpu_gpu/libnexa_cpu_gpu.dylib,sha256=IF4xdOSZJG83mjgVBD42M4IswXzT-6K7VenPvaxb3vs,1786136
31
- nexaai/binds/cpu_gpu/libnexa_plugin.dylib,sha256=uSP4GgEZipQO-ll_5LDC0z1aCHvlVgPgW-C1MFw3IFU,1863304
32
- nexaai/binds/metal/libnexa_plugin.dylib,sha256=F2v8SOCpgEa8MlUGensvLAeC-SWhLFLRw5RqfIl-QWA,659256
34
+ nexaai/binds/cpu_gpu/libnexa_plugin.dylib,sha256=DVIggxoopUs_ipgCh3-wD5KR0zRles_QU7t18BE5lLA,1884184
35
+ nexaai/binds/metal/libnexa_plugin.dylib,sha256=1qIA7R9FBOwg4JrJe84PpQJdKhIng2QZQYbthkF84hM,659304
33
36
  nexaai/binds/metal/py-lib/ml.py,sha256=DKXVOAfh8cg7KTKljh7jpcPwfQFNigc6uv_ZXF6lse8,23977
34
37
  nexaai/binds/metal/py-lib/profiling.py,sha256=Dc-mybFwBdCIKFWL7CbSHjkOJGAoYHG7r_e_XPhzwBU,9361
35
38
  nexaai/binds/metal/py-lib/mlx_audio/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -190,25 +193,25 @@ nexaai/binds/nexaml/libggml-metal.so,sha256=zfaX7rIBYQazH2lf-vza007BMhPTK1ASd2T0
190
193
  nexaai/binds/nexaml/libggml.dylib,sha256=aOTj_6RrAMkfDO0ZI28_3nfcC-l4Y3dRCiS3C0d0_eI,58592
191
194
  nexaai/binds/nexaml/libmp3lame.0.dylib,sha256=G-21u3MAZ2hiv1fbPEpOUGdToZfLtz2XI6BwW9xIqIA,305136
192
195
  nexaai/binds/nexaml/libmpg123.0.dylib,sha256=L4AgLcdcjGOQkEovUGDCNlH959500i0GkKBhqiHxBY4,306464
193
- nexaai/binds/nexaml/libnexa-mm-process.dylib,sha256=-tNsX7gmHbgGDVOo9WEMyppWb1u4ldRCyTl3VIRkeHc,10222440
194
- nexaai/binds/nexaml/libnexa-sampling.dylib,sha256=cnOFmvb5urwkB1S1aruZA7s6jaa6e_q-sv-2NFW785U,7957848
195
- nexaai/binds/nexaml/libnexa_plugin.dylib,sha256=7aQX5UQWVipWmq0KcyPtbqmpZMsAtyHMFJ4zusFGGDw,249512
196
- nexaai/binds/nexaml/libnexaproc.dylib,sha256=aWhBGz9c8Dd0JIigOeYey0CIraTZ_PYOyRjaS9lho0M,968320
196
+ nexaai/binds/nexaml/libnexa-mm-process.dylib,sha256=WCEgYK13YBfg-DiaGXXJ00-QYo0ucEP-QUS1jVEUR_k,10222392
197
+ nexaai/binds/nexaml/libnexa-sampling.dylib,sha256=mWJ3jsF4bp3RwWebgjd4PalZqtLiXGK4Y5J9IkYqgvk,7957800
198
+ nexaai/binds/nexaml/libnexa_plugin.dylib,sha256=7HglcsNk_2Stbpw22TpvItmPQY3vbQMvu-JOlDWOkHA,267224
199
+ nexaai/binds/nexaml/libnexaproc.dylib,sha256=KNAyTwv2Tk94VNbUNFldDbnhQmJwv2ykysfvW_267DY,897248
197
200
  nexaai/binds/nexaml/libomp.dylib,sha256=RcvfaCz0XuqTYtVPFYyUm2V6S9YgUQ0x5pHATr9FnDk,754912
198
- nexaai/binds/nexaml/libqwen3-vl.dylib,sha256=wRQUVHtJJZRi8QCoHHURmpzpd7LeirBbMvIsCmWNJsc,770352
201
+ nexaai/binds/nexaml/libqwen3-vl.dylib,sha256=zTKPJqAzSsUt4b2DOwkgcjysDSa9tAYY0m2HaxrIlLo,770784
199
202
  nexaai/binds/nexaml/libqwen3vl-vision.dylib,sha256=t0QfNZG5gpvHPBqX-Tdm1qjaXHCVROTtzL5bW78v4-I,490344
200
203
  nexaai/cv_impl/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
201
204
  nexaai/cv_impl/mlx_cv_impl.py,sha256=gKECQOv8iaWwG3bl7xeqVy2NN_9K7tYerIFzfn4eLo4,3228
202
205
  nexaai/cv_impl/pybind_cv_impl.py,sha256=uSmwBste4cT7c8DQmXzRLmzwDf773PAbXNYWW1UzVls,1064
203
206
  nexaai/embedder_impl/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
204
- nexaai/embedder_impl/mlx_embedder_impl.py,sha256=dTjOC1VJ9ypIgCvkK_jKNSWpswbg132rDcTzWcL5oFA,4482
205
- nexaai/embedder_impl/pybind_embedder_impl.py,sha256=9gsHuSbF64IZH9ugqv4-GTUuRpy-FJNUb0cww2QR3uA,3575
207
+ nexaai/embedder_impl/mlx_embedder_impl.py,sha256=pFPraUAjm9EVvVbwIp1cjbtXUysF5pqxEcK2CAFvcDw,4639
208
+ nexaai/embedder_impl/pybind_embedder_impl.py,sha256=lFpf0wI2d7kfO2GUyUuUS1U2L_PyZMJVGmAvF8EuQ0g,3653
206
209
  nexaai/image_gen_impl/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
207
210
  nexaai/image_gen_impl/mlx_image_gen_impl.py,sha256=BuDkksvXyb4J02GsdnbGAmYckfUU0Eah6BimoMD3QqY,11219
208
211
  nexaai/image_gen_impl/pybind_image_gen_impl.py,sha256=ms34VYoD5AxZFG6cIG0QAJDjCtfphaZ1bHzKzey1xF8,3692
209
212
  nexaai/llm_impl/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
210
- nexaai/llm_impl/mlx_llm_impl.py,sha256=4v7jUFzHfE7zw2uViekGQDaTROz8A6oaW31Z3iVe6tg,11157
211
- nexaai/llm_impl/pybind_llm_impl.py,sha256=aooqkcXZWhCo07wbSafGgBrA3WnijtnUADShjjgFsBQ,8051
213
+ nexaai/llm_impl/mlx_llm_impl.py,sha256=dPtaEribluHZZY_f9M114glcQhtDEckukw4Sfd5zJos,11296
214
+ nexaai/llm_impl/pybind_llm_impl.py,sha256=XXnUuRZMr9rrEL1vM6VTwsgs0KQnKn4C3TyrHE46uw8,8139
212
215
  nexaai/mlx_backend/ml.py,sha256=DKXVOAfh8cg7KTKljh7jpcPwfQFNigc6uv_ZXF6lse8,23977
213
216
  nexaai/mlx_backend/profiling.py,sha256=Dc-mybFwBdCIKFWL7CbSHjkOJGAoYHG7r_e_XPhzwBU,9361
214
217
  nexaai/mlx_backend/asr/__init__.py,sha256=fuT_9_xpYJ28m4yjly5L2jChUrzlSQz-b_S7nujxkSM,451
@@ -416,7 +419,7 @@ nexaai/mlx_backend/vlm/__init__.py,sha256=_25kvMEviX16Hg3bro8Ws70V0eeIEqYKV8ZDXq
416
419
  nexaai/mlx_backend/vlm/generate.py,sha256=DqHFEAuqk-nko8ho6U9GAXTDAWz4d8GTe_hCt-XFyCw,19071
417
420
  nexaai/mlx_backend/vlm/generate_qwen3_vl.py,sha256=srN8-RFv8eOeH2rdyygCJ7Yt7kW7MQzS3i50UHBVfIM,13151
418
421
  nexaai/mlx_backend/vlm/generate_qwen3_vl_moe.py,sha256=ZSbM8JjTlkxUaVO9UNZM6YSbd60am3Z4ztJJEBsnJHg,9015
419
- nexaai/mlx_backend/vlm/interface.py,sha256=_rnqaIkvy3OUsH2b08l623oKjoe_la0G2W9iusD5qwI,22741
422
+ nexaai/mlx_backend/vlm/interface.py,sha256=D6TCUWbiGLkgmAk_b9yMb36Y4TLGT9gFPxnTaDSaCSM,23070
420
423
  nexaai/mlx_backend/vlm/main.py,sha256=8bmSTtyebp8eyL2jL36DZbNHapOpFXNmjM2NyzCFqGs,12919
421
424
  nexaai/mlx_backend/vlm/modeling/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
422
425
  nexaai/mlx_backend/vlm/modeling/convert.py,sha256=ia5i9cgTufFGmKyhkYUaW0nfNqT_bMo8i-Hg_zy5JC4,1863
@@ -552,21 +555,21 @@ nexaai/mlx_backend/vlm/modeling/trainer/lora.py,sha256=tGjvenjEQ8_1Az8Nz3smz5Mgv
552
555
  nexaai/mlx_backend/vlm/modeling/trainer/trainer.py,sha256=h16SaHt76JzFruXuidgXDx7_2evx4L0SecvzqLmhyZw,9081
553
556
  nexaai/mlx_backend/vlm/modeling/trainer/utils.py,sha256=29oHf_7946YeJKP_-Dt-NPeN4xJq8Fj7Yv4jZKO9RWA,4909
554
557
  nexaai/rerank_impl/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
555
- nexaai/rerank_impl/mlx_rerank_impl.py,sha256=h37PKSIRBY8mwzVeLeP4ix9ui3waIsg4gorzelYLJbM,3243
556
- nexaai/rerank_impl/pybind_rerank_impl.py,sha256=CtwkG7YrW58GPMDERJSnISGTVCXWNju5__R2W837t7c,1513
558
+ nexaai/rerank_impl/mlx_rerank_impl.py,sha256=3nbqCdzyAugc4P_6K9mowEgy4LFdfzhy7GUvn9GMpSE,3377
559
+ nexaai/rerank_impl/pybind_rerank_impl.py,sha256=tmzrpRYCCV3ATxbE9G1Io6SUtgYPO8BFe48nTae6_xw,4490
557
560
  nexaai/tts_impl/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
558
561
  nexaai/tts_impl/mlx_tts_impl.py,sha256=i_uNPdvlXYtL3e01oKjDlP9jgkWCRt1bBHsExaaiJi8,3101
559
562
  nexaai/tts_impl/pybind_tts_impl.py,sha256=mpn44r6pfYLIl-NrEy2dXHjGtWtNCmM7HRyxiANxUI4,1444
560
563
  nexaai/utils/decode.py,sha256=61n4Zf6c5QLyqGoctEitlI9BX3tPlP2a5aaKNHbw3T4,404
561
- nexaai/utils/manifest_utils.py,sha256=SCcFN09xNI0DiTA1U7DZwWiQsRH0CInWSny_9q0BwNM,21273
562
- nexaai/utils/model_manager.py,sha256=NnbPv1iuwo6T523gLsWjnff-gGvPGUjez-rFg8-ffpE,59568
564
+ nexaai/utils/manifest_utils.py,sha256=OOp_BmFWH1ZHMYkS2VGAby5Rpm4f4GLCRBJEBYm-kys,21489
565
+ nexaai/utils/model_manager.py,sha256=OnL87zCPn3cBcScCKo-bHnBUpr24-Po293QC6Bwgx1Q,66112
563
566
  nexaai/utils/model_types.py,sha256=ONWjjo8CFPdhxki6qo7MXnSZaEzjBcxa_Kkf_y5NXus,1483
564
567
  nexaai/utils/progress_tracker.py,sha256=jdUqtmPqyhwC9uSKvQcJEYETwSt-OhP4oitdJ94614o,15394
565
568
  nexaai/utils/quantization_utils.py,sha256=FYcNSAKGlBqFDUTx3jSKOr2lnq4nyiyC0ZG8oSxFwiU,7825
566
569
  nexaai/vlm_impl/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
567
- nexaai/vlm_impl/mlx_vlm_impl.py,sha256=pLtWm_ckz8a0U-AtAOMVseFDO4OVPvHyYO2KlfBaGYk,10833
568
- nexaai/vlm_impl/pybind_vlm_impl.py,sha256=FAbhpRJzHgI78r0mUvKybO97R1szvNhH0aTn_I52oT4,8597
569
- nexaai-1.0.19rc19.dist-info/METADATA,sha256=V_rgXCiklp4A99jP4b_rAOsNBpscrNaLGd4Pp7aLGDo,1202
570
- nexaai-1.0.19rc19.dist-info/WHEEL,sha256=T2p57lol9__xkoU6aJTyN1Pm43ZpRU3q6km7mIbrAMs,114
571
- nexaai-1.0.19rc19.dist-info/top_level.txt,sha256=LRE2YERlrZk2vfuygnSzsEeqSknnZbz3Z1MHyNmBU4w,7
572
- nexaai-1.0.19rc19.dist-info/RECORD,,
570
+ nexaai/vlm_impl/mlx_vlm_impl.py,sha256=sgHqnX5OCSGLccCnTuRiktIbqThNn3AAIvYE2_Dy4TI,10833
571
+ nexaai/vlm_impl/pybind_vlm_impl.py,sha256=stJKHdhYhBuWUQkky-nHgCv625qDB_1geI3v5BLNGpM,9765
572
+ nexaai-1.0.21.dist-info/METADATA,sha256=WTZ4KM_6xJlrJ-NOaDoQEEPwEidaxuot5bocvRHKB0k,1184
573
+ nexaai-1.0.21.dist-info/WHEEL,sha256=T2p57lol9__xkoU6aJTyN1Pm43ZpRU3q6km7mIbrAMs,114
574
+ nexaai-1.0.21.dist-info/top_level.txt,sha256=LRE2YERlrZk2vfuygnSzsEeqSknnZbz3Z1MHyNmBU4w,7
575
+ nexaai-1.0.21.dist-info/RECORD,,