nexaai 1.0.19rc19__cp310-cp310-macosx_13_0_x86_64.whl → 1.0.21rc1__cp310-cp310-macosx_13_0_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of nexaai might be problematic. Click here for more details.

nexaai/__init__.py CHANGED
@@ -24,6 +24,13 @@ from .common import ModelConfig, GenerationConfig, ChatMessage, SamplerConfig, P
24
24
  # Import logging functionality
25
25
  from .log import set_logger, get_error_message
26
26
 
27
+ # Import runtime errors
28
+ from .runtime_error import (
29
+ NexaRuntimeError,
30
+ ContextLengthExceededError,
31
+ GenerationError
32
+ )
33
+
27
34
  # Create alias for PluginID to be accessible as plugin_id
28
35
  plugin_id = PluginID
29
36
 
@@ -52,6 +59,11 @@ __all__ = [
52
59
  # Logging functionality
53
60
  "set_logger",
54
61
  "get_error_message",
62
+
63
+ # Runtime errors
64
+ "NexaRuntimeError",
65
+ "ContextLengthExceededError",
66
+ "GenerationError",
55
67
 
56
68
  "LLM",
57
69
  "Embedder",
Binary file
nexaai/_version.py CHANGED
@@ -1,4 +1,4 @@
1
1
  # This file is generated by CMake from _version.py.in
2
2
  # Do not modify this file manually - it will be overwritten
3
3
 
4
- __version__ = "1.0.19-rc19"
4
+ __version__ = "1.0.21-rc1"
Binary file
Binary file
@@ -482,8 +482,12 @@ class VLM(ProfilingMixin):
482
482
 
483
483
  def apply_chat_template(self, messages: Sequence[ChatMessage], tools: Optional[str] = None, enable_thinking: bool = True) -> str:
484
484
  """Apply chat template to messages with optional tools support."""
485
+ if self.model_name in ["qwen3vl", "qwen3vl-4b", "qwen3vl-4b-thinking", "qwen3vl-8b", "qwen3vl-8b-thinking"]:
486
+ return apply_chat_template_qwen3_vl(messages, num_images=0, num_audios=0, tools=tools, enable_thinking=enable_thinking)
487
+ if self.model_name == "qwen3vl-moe":
488
+ return apply_chat_template_qwen3_vl_moe(messages, num_images=0, num_audios=0, tools=tools, enable_thinking=enable_thinking)
489
+
485
490
  if hasattr(self.processor, "apply_chat_template"):
486
- # Convert ChatMessage objects to dictionaries for the processor
487
491
  messages_dict = [{"role": msg.role, "content": msg.content} for msg in messages]
488
492
 
489
493
  parsed_tools = None
@@ -492,7 +496,6 @@ class VLM(ProfilingMixin):
492
496
 
493
497
  result = apply_chat_template(self.processor, self.model.config, messages_dict, add_generation_prompt=True, enable_thinking=enable_thinking, tools=parsed_tools)
494
498
  return result
495
- # Fallback: join messages
496
499
  return "\n".join([f"{m.role}: {m.content}" for m in messages])
497
500
 
498
501
  def apply_chat_template_with_media(self, messages: Sequence[ChatMessage], num_images: int = 0, num_audios: int = 0, tools: Optional[str] = None, enable_thinking: bool = True) -> str:
@@ -0,0 +1,24 @@
1
+ """Runtime errors for Nexa SDK operations."""
2
+
3
+
4
+ class NexaRuntimeError(Exception):
5
+ """Base class for Nexa runtime errors."""
6
+
7
+ def __init__(self, message: str, error_code: int = None):
8
+ self.error_code = error_code
9
+ super().__init__(message)
10
+
11
+
12
+ class ContextLengthExceededError(NexaRuntimeError):
13
+ """Raised when the input context length exceeds the model's maximum."""
14
+
15
+ def __init__(self, message: str = "Input context length exceeded model's maximum", error_code: int = None):
16
+ super().__init__(message, error_code)
17
+
18
+
19
+ class GenerationError(NexaRuntimeError):
20
+ """Raised when generation fails."""
21
+
22
+ def __init__(self, message: str = "Generation failed", error_code: int = None):
23
+ super().__init__(message, error_code)
24
+
nexaai/vlm.py CHANGED
@@ -99,7 +99,8 @@ class VLM(BaseModel):
99
99
  def apply_chat_template(
100
100
  self,
101
101
  messages: List[MultiModalMessage],
102
- tools: Optional[List[Dict[str, Any]]] = None
102
+ tools: Optional[List[Dict[str, Any]]] = None,
103
+ enable_thinking: bool = True
103
104
  ) -> str:
104
105
  """Apply the chat template to multimodal messages."""
105
106
  pass
@@ -72,7 +72,8 @@ class MlxVlmImpl(VLM):
72
72
  def apply_chat_template(
73
73
  self,
74
74
  messages: List[MultiModalMessage],
75
- tools: Optional[List[Dict[str, Any]]] = None
75
+ tools: Optional[List[Dict[str, Any]]] = None,
76
+ enable_thinking: bool = True
76
77
  ) -> str:
77
78
  """Apply the chat template to multimodal messages."""
78
79
  if not self._mlx_vlm:
@@ -116,7 +117,7 @@ class MlxVlmImpl(VLM):
116
117
  num_images=total_images,
117
118
  num_audios=total_audios,
118
119
  tools=tools,
119
- enable_thinking=False # Default to False, could be made configurable
120
+ enable_thinking=enable_thinking
120
121
  )
121
122
  else:
122
123
  # Use regular apply_chat_template for text-only messages
@@ -8,6 +8,11 @@ from nexaai.binds import vlm_bind, common_bind
8
8
  from nexaai.runtime import _ensure_runtime
9
9
  from nexaai.vlm import VLM
10
10
  from nexaai.base import ProfilingData
11
+ from nexaai.runtime_error import ContextLengthExceededError, GenerationError
12
+
13
+ # Error codes from ml.h
14
+ ML_SUCCESS = 0
15
+ ML_ERROR_LLM_TOKENIZATION_CONTEXT_LENGTH = -200004
11
16
 
12
17
 
13
18
  class PyBindVLMImpl(VLM):
@@ -91,7 +96,8 @@ class PyBindVLMImpl(VLM):
91
96
  def apply_chat_template(
92
97
  self,
93
98
  messages: List[MultiModalMessage],
94
- tools: Optional[List[Dict[str, Any]]] = None
99
+ tools: Optional[List[Dict[str, Any]]] = None,
100
+ enable_thinking: bool = True
95
101
  ) -> str:
96
102
  """Apply the chat template to multimodal messages."""
97
103
  payload = []
@@ -111,7 +117,7 @@ class PyBindVLMImpl(VLM):
111
117
 
112
118
  payload.append({"role": role, "content": blocks})
113
119
 
114
- result = vlm_bind.ml_vlm_apply_chat_template(self._handle, payload, tools)
120
+ result = vlm_bind.ml_vlm_apply_chat_template(self._handle, payload, tools, enable_thinking)
115
121
  return result
116
122
 
117
123
  def generate_stream(self, prompt: str, g_cfg: GenerationConfig = GenerationConfig()) -> Generator[str, None, None]:
@@ -143,6 +149,18 @@ class PyBindVLMImpl(VLM):
143
149
  on_token=on_token,
144
150
  user_data=None
145
151
  )
152
+
153
+ # Check for errors in result
154
+ error_code = result.get("error_code", ML_SUCCESS)
155
+ if error_code != ML_SUCCESS:
156
+ error_message = result.get("error_message", "Unknown error")
157
+ if error_code == ML_ERROR_LLM_TOKENIZATION_CONTEXT_LENGTH:
158
+ exception_container[0] = ContextLengthExceededError(error_message, error_code)
159
+ else:
160
+ exception_container[0] = GenerationError(error_message, error_code)
161
+ token_queue.put(('end', None))
162
+ return
163
+
146
164
  self._profiling_data = ProfilingData.from_dict(result.get("profile_data", {}))
147
165
  except Exception as e:
148
166
  exception_container[0] = e
@@ -186,6 +204,15 @@ class PyBindVLMImpl(VLM):
186
204
  user_data=None
187
205
  )
188
206
 
207
+ # Check for errors in result
208
+ error_code = result.get("error_code", ML_SUCCESS)
209
+ if error_code != ML_SUCCESS:
210
+ error_message = result.get("error_message", "Unknown error")
211
+ if error_code == ML_ERROR_LLM_TOKENIZATION_CONTEXT_LENGTH:
212
+ raise ContextLengthExceededError(error_message, error_code)
213
+ else:
214
+ raise GenerationError(error_message, error_code)
215
+
189
216
  self._profiling_data = ProfilingData.from_dict(result.get("profile_data", {}))
190
217
  return result.get("text", "")
191
218
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: nexaai
3
- Version: 1.0.19rc19
3
+ Version: 1.0.21rc1
4
4
  Summary: Python bindings for NexaSDK C-lib backend
5
5
  Author-email: "Nexa AI, Inc." <dev@nexa.ai>
6
6
  Project-URL: Homepage, https://github.com/NexaAI/nexasdk-bridge
@@ -19,7 +19,7 @@ Requires-Dist: numpy
19
19
  Requires-Dist: httpx
20
20
  Provides-Extra: mlx
21
21
  Requires-Dist: mlx; extra == "mlx"
22
- Requires-Dist: mlx-lm; extra == "mlx"
22
+ Requires-Dist: mlx-lm==0.27.0; extra == "mlx"
23
23
  Requires-Dist: mlx-vlm; extra == "mlx"
24
24
  Requires-Dist: mlx-embeddings; extra == "mlx"
25
25
  Requires-Dist: tokenizers; extra == "mlx"
@@ -1,6 +1,6 @@
1
- nexaai/__init__.py,sha256=L8oB7GFZZMGnUpCg0PecDbI_ycKuQak-ZEJ4Y12_QIw,2184
2
- nexaai/_stub.cpython-310-darwin.so,sha256=0qNsPqEe1bDNB2oQ8RHFKv8EHG6GZnQihPLhawRoxYM,49832
3
- nexaai/_version.py,sha256=HOqTOpd7Lb8gdF93CQYr-x-Z_1O0X0vCrAAHaWQlI4I,144
1
+ nexaai/__init__.py,sha256=gOd7sNsqEESopw_24xgnOSkIRENrk4Fa-RMtmVv62eA,2421
2
+ nexaai/_stub.cpython-310-darwin.so,sha256=rI0M16HWShk2bDzCG8uQ9eIeff8R2JWwad7VLm68_JY,49832
3
+ nexaai/_version.py,sha256=sVSvlzCHN_LFLOFxO9UEdPfKi2BBoPi4MtOW-fdz1i8,143
4
4
  nexaai/asr.py,sha256=NljMXDErwPNMOPaRkJZMEDka9Nk8xyur7L8i924TStY,2054
5
5
  nexaai/base.py,sha256=N8PRgDFA-XPku2vWnQIofQ7ipz3pPlO6f8YZGnuhquE,982
6
6
  nexaai/common.py,sha256=Y0NJNLTi4Nq4x1WL6PQsSvGUto0eGmWhjpsC6jcekfA,3444
@@ -11,17 +11,18 @@ nexaai/llm.py,sha256=S1o_k2VQoF5w2wO25f142OO1R75TP89Ii69VZv8pIGo,3567
11
11
  nexaai/log.py,sha256=Kwo2CIfWN6iP4M4F5EUIV8KIO5hAsvz6HZAaOwJ27Og,2628
12
12
  nexaai/rerank.py,sha256=vWaBucoQ1wz-2iYnZqyFIcEjm-4Xcs1KDbFN5X8zzDQ,1872
13
13
  nexaai/runtime.py,sha256=JvllhlNPgYGLbgGyX2yNvmGzT0lZ5XbvTvEo8sZG_Ho,2067
14
+ nexaai/runtime_error.py,sha256=sO87LyCA0qzm0hVqBrmG2FDzGQH865EMbTMop2OfZto,779
14
15
  nexaai/tts.py,sha256=ZnBpWUxIfHhh7KfEjddtH7hHOTa91zg7ogGLakMIALo,2167
15
- nexaai/vlm.py,sha256=OCxwML-Z5uVGp3fjzJVtbCxfTLpgxkhQ8Wo6MVysoiw,4733
16
+ nexaai/vlm.py,sha256=MreJ_S5-C0KH3haFuJwHqVtL099MrrmBQ23vK1PINCc,4771
16
17
  nexaai/asr_impl/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
17
18
  nexaai/asr_impl/mlx_asr_impl.py,sha256=eosd8-TIWAOwV0HltmoFrLwzXHcU4jyxtncvuZE9pgA,3257
18
19
  nexaai/asr_impl/pybind_asr_impl.py,sha256=pE9Hb_hMi5yAc4MF83bLVOb8zDtreCkB3_u7XED9YpA,1516
19
20
  nexaai/binds/__init__.py,sha256=eYuay_8DDXeOUWz2_R9HFSabohxs6hvZn391t2L0Po0,104
20
21
  nexaai/binds/common_bind.cpython-310-darwin.so,sha256=BoXByRlNGDaNS1YyZyCF-s7h0vXP9NLPlJMQQ5pqusU,235488
21
22
  nexaai/binds/embedder_bind.cpython-310-darwin.so,sha256=b2NoXFAJvPLi_P1X7lXLKmAUU0v2HJI3Zwa10gfqHdw,202032
22
- nexaai/binds/libnexa_bridge.dylib,sha256=PoXV5im3SwHeD_V2HoQntGJ6iqquJAfOZCdLdNHE9Dg,271952
23
+ nexaai/binds/libnexa_bridge.dylib,sha256=jnHOOuy-Kvt-5G40HQk_NJxFncsTxPZDHETQoJHpmJQ,271952
23
24
  nexaai/binds/llm_bind.cpython-310-darwin.so,sha256=p1ZTGMolEkWywkmwzOUjTr3RpSEH21BHZAggVzo89Ks,183088
24
- nexaai/binds/vlm_bind.cpython-310-darwin.so,sha256=LGd-tykePnQFfGca25HnPIBfXsfrMzbwyx6d5Ld3xps,183000
25
+ nexaai/binds/vlm_bind.cpython-310-darwin.so,sha256=CQdy70sGqhM9SvoDN3xmsOj79IWEYeOVhwou_kgkjHI,199392
25
26
  nexaai/binds/cpu_gpu/libggml-base.dylib,sha256=YDclLDlP7XlDpXiKfTOTt6mW7jgXlmwSoT_VuRrGrmM,629528
26
27
  nexaai/binds/cpu_gpu/libggml-cpu.so,sha256=cnLUQ7WdX-5iiDaH8v45u1kX1NUmK8DanpzSMGCpXPE,1039800
27
28
  nexaai/binds/cpu_gpu/libggml-metal.so,sha256=Xhhl_tLg1xmCIQVrKjqPFaLHAlx_2wUFiwDyUk0wJ-E,713680
@@ -248,7 +249,7 @@ nexaai/mlx_backend/vlm/__init__.py,sha256=_25kvMEviX16Hg3bro8Ws70V0eeIEqYKV8ZDXq
248
249
  nexaai/mlx_backend/vlm/generate.py,sha256=DqHFEAuqk-nko8ho6U9GAXTDAWz4d8GTe_hCt-XFyCw,19071
249
250
  nexaai/mlx_backend/vlm/generate_qwen3_vl.py,sha256=srN8-RFv8eOeH2rdyygCJ7Yt7kW7MQzS3i50UHBVfIM,13151
250
251
  nexaai/mlx_backend/vlm/generate_qwen3_vl_moe.py,sha256=ZSbM8JjTlkxUaVO9UNZM6YSbd60am3Z4ztJJEBsnJHg,9015
251
- nexaai/mlx_backend/vlm/interface.py,sha256=_rnqaIkvy3OUsH2b08l623oKjoe_la0G2W9iusD5qwI,22741
252
+ nexaai/mlx_backend/vlm/interface.py,sha256=D6TCUWbiGLkgmAk_b9yMb36Y4TLGT9gFPxnTaDSaCSM,23070
252
253
  nexaai/mlx_backend/vlm/main.py,sha256=8bmSTtyebp8eyL2jL36DZbNHapOpFXNmjM2NyzCFqGs,12919
253
254
  nexaai/mlx_backend/vlm/modeling/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
254
255
  nexaai/mlx_backend/vlm/modeling/convert.py,sha256=ia5i9cgTufFGmKyhkYUaW0nfNqT_bMo8i-Hg_zy5JC4,1863
@@ -396,9 +397,9 @@ nexaai/utils/model_types.py,sha256=ONWjjo8CFPdhxki6qo7MXnSZaEzjBcxa_Kkf_y5NXus,1
396
397
  nexaai/utils/progress_tracker.py,sha256=jdUqtmPqyhwC9uSKvQcJEYETwSt-OhP4oitdJ94614o,15394
397
398
  nexaai/utils/quantization_utils.py,sha256=FYcNSAKGlBqFDUTx3jSKOr2lnq4nyiyC0ZG8oSxFwiU,7825
398
399
  nexaai/vlm_impl/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
399
- nexaai/vlm_impl/mlx_vlm_impl.py,sha256=pLtWm_ckz8a0U-AtAOMVseFDO4OVPvHyYO2KlfBaGYk,10833
400
- nexaai/vlm_impl/pybind_vlm_impl.py,sha256=FAbhpRJzHgI78r0mUvKybO97R1szvNhH0aTn_I52oT4,8597
401
- nexaai-1.0.19rc19.dist-info/METADATA,sha256=V_rgXCiklp4A99jP4b_rAOsNBpscrNaLGd4Pp7aLGDo,1202
402
- nexaai-1.0.19rc19.dist-info/WHEEL,sha256=0KYp5feZ1CMUhsfFXKpSQTbSmQbXy4mv6yPPVBXg2EM,110
403
- nexaai-1.0.19rc19.dist-info/top_level.txt,sha256=LRE2YERlrZk2vfuygnSzsEeqSknnZbz3Z1MHyNmBU4w,7
404
- nexaai-1.0.19rc19.dist-info/RECORD,,
400
+ nexaai/vlm_impl/mlx_vlm_impl.py,sha256=sgHqnX5OCSGLccCnTuRiktIbqThNn3AAIvYE2_Dy4TI,10833
401
+ nexaai/vlm_impl/pybind_vlm_impl.py,sha256=MDbreWSqugakXU_PqH6mPoCxjKEEbYfQIco_NDck8_s,9905
402
+ nexaai-1.0.21rc1.dist-info/METADATA,sha256=I2YizqGmn9LBQh1tfAPNxPZYE_limQe4ELxBlzYqtKM,1209
403
+ nexaai-1.0.21rc1.dist-info/WHEEL,sha256=0KYp5feZ1CMUhsfFXKpSQTbSmQbXy4mv6yPPVBXg2EM,110
404
+ nexaai-1.0.21rc1.dist-info/top_level.txt,sha256=LRE2YERlrZk2vfuygnSzsEeqSknnZbz3Z1MHyNmBU4w,7
405
+ nexaai-1.0.21rc1.dist-info/RECORD,,