nexaai 1.0.16rc12__cp310-cp310-macosx_14_0_universal2.whl → 1.0.17rc1__cp310-cp310-macosx_14_0_universal2.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of nexaai might be problematic. Click here for more details.

Binary file
nexaai/_version.py CHANGED
@@ -1,4 +1,4 @@
1
1
  # This file is generated by CMake from _version.py.in
2
2
  # Do not modify this file manually - it will be overwritten
3
3
 
4
- __version__ = "1.0.16-rc12"
4
+ __version__ = "1.0.17-rc1"
Binary file
Binary file
Binary file
nexaai/common.py CHANGED
@@ -7,6 +7,7 @@ class PluginID(str, Enum):
7
7
  """Enum for plugin identifiers."""
8
8
  MLX = "mlx"
9
9
  LLAMA_CPP = "llama_cpp"
10
+ NEXAML = "nexaml"
10
11
 
11
12
 
12
13
  class ChatMessage(TypedDict):
@@ -80,6 +80,9 @@ class VLM(ProfilingMixin):
80
80
 
81
81
  # Init deafutl sampler config with defualt.
82
82
  self.sampler_config = SamplerConfig()
83
+
84
+ # Track global character position for incremental processing
85
+ self.global_n_past_chars = 0
83
86
 
84
87
  def destroy(self) -> None:
85
88
  """Destroy the model and free resources."""
@@ -89,6 +92,7 @@ class VLM(ProfilingMixin):
89
92
  def reset(self) -> None:
90
93
  """Reset the model state."""
91
94
  self._reset_cache()
95
+ self.global_n_past_chars = 0
92
96
 
93
97
  def _reset_cache(self) -> None:
94
98
  """Reset the KV cache."""
@@ -141,6 +145,16 @@ class VLM(ProfilingMixin):
141
145
  image_list = [str(path) for path in image_paths] if image_paths else None
142
146
  audio_list = [str(path) for path in audio_paths] if audio_paths else None
143
147
 
148
+ # Extract incremental portion of the prompt (similar to llama.cpp VLM)
149
+ full_prompt_len = len(prompt)
150
+ incremental_prompt = prompt
151
+
152
+ if self.global_n_past_chars < full_prompt_len:
153
+ incremental_prompt = prompt[self.global_n_past_chars:]
154
+ else:
155
+ # No new text to process
156
+ incremental_prompt = ""
157
+
144
158
  # End prompt processing, start decode
145
159
  self._prompt_end()
146
160
  self._decode_start()
@@ -152,7 +166,7 @@ class VLM(ProfilingMixin):
152
166
  text, stats = generate(
153
167
  self.model,
154
168
  self.processor,
155
- prompt,
169
+ incremental_prompt, # Use incremental prompt instead of full prompt
156
170
  image=image_list,
157
171
  audio=audio_list,
158
172
  **gen_kwargs,
@@ -181,6 +195,10 @@ class VLM(ProfilingMixin):
181
195
  self._update_prompt_tokens(prompt_tokens)
182
196
  self._update_generated_tokens(generated_tokens)
183
197
  self._set_stop_reason(StopReason.ML_STOP_REASON_COMPLETED)
198
+
199
+ # Update global character position
200
+ self.global_n_past_chars = full_prompt_len + len(text)
201
+
184
202
  self._decode_end()
185
203
  self._end_profiling()
186
204
 
@@ -226,6 +244,16 @@ class VLM(ProfilingMixin):
226
244
  image_list = [str(path) for path in image_paths] if image_paths else None
227
245
  audio_list = [str(path) for path in audio_paths] if audio_paths else None
228
246
 
247
+ # Extract incremental portion of the prompt (similar to llama.cpp VLM)
248
+ full_prompt_len = len(prompt)
249
+ incremental_prompt = prompt
250
+
251
+ if self.global_n_past_chars < full_prompt_len:
252
+ incremental_prompt = prompt[self.global_n_past_chars:]
253
+ else:
254
+ # No new text to process
255
+ incremental_prompt = ""
256
+
229
257
  # End prompt processing, start decode
230
258
  self._prompt_end()
231
259
  self._decode_start()
@@ -239,7 +267,7 @@ class VLM(ProfilingMixin):
239
267
  for result in stream_generate_impl(
240
268
  self.model,
241
269
  self.processor,
242
- prompt,
270
+ incremental_prompt, # Use incremental prompt instead of full prompt
243
271
  image=image_list,
244
272
  audio=audio_list,
245
273
  **gen_kwargs,
@@ -266,6 +294,9 @@ class VLM(ProfilingMixin):
266
294
  self._update_prompt_tokens(last_result.prompt_tokens)
267
295
  self._update_generated_tokens(last_result.generation_tokens)
268
296
 
297
+ # Update global character position
298
+ self.global_n_past_chars = full_prompt_len + len(text)
299
+
269
300
  self._decode_end()
270
301
  self._end_profiling()
271
302
 
@@ -25,12 +25,15 @@ class QuantizationType(str, Enum):
25
25
  F16 = "F16"
26
26
  Q2_K = "Q2_K"
27
27
  Q2_K_L = "Q2_K_L"
28
+ Q3_K = "Q3_K"
28
29
  Q3_K_M = "Q3_K_M"
29
30
  Q3_K_S = "Q3_K_S"
30
31
  Q4_0 = "Q4_0"
31
32
  Q4_1 = "Q4_1"
33
+ Q4_K = "Q4_K"
32
34
  Q4_K_M = "Q4_K_M"
33
35
  Q4_K_S = "Q4_K_S"
36
+ Q5_K = "Q5_K"
34
37
  Q5_K_M = "Q5_K_M"
35
38
  Q5_K_S = "Q5_K_S"
36
39
  Q6_K = "Q6_K"
@@ -67,12 +70,15 @@ def extract_quantization_from_filename(filename: str) -> Optional[QuantizationTy
67
70
  'f16.': QuantizationType.F16, # Add F16 support
68
71
  'q2_k_l.': QuantizationType.Q2_K_L, # Check Q2_K_L before Q2_K to avoid partial match
69
72
  'q2_k.': QuantizationType.Q2_K,
73
+ 'q3_k.': QuantizationType.Q3_K,
70
74
  'q3_k_m.': QuantizationType.Q3_K_M,
71
- 'q3_ks.': QuantizationType.Q3_K_S,
75
+ 'q3_k_s.': QuantizationType.Q3_K_S,
72
76
  'q4_k_m.': QuantizationType.Q4_K_M,
73
77
  'q4_k_s.': QuantizationType.Q4_K_S,
74
78
  'q4_0.': QuantizationType.Q4_0,
75
79
  'q4_1.': QuantizationType.Q4_1,
80
+ 'q4_k.': QuantizationType.Q4_K,
81
+ 'q5_k.': QuantizationType.Q5_K,
76
82
  'q5_k_m.': QuantizationType.Q5_K_M,
77
83
  'q5_k_s.': QuantizationType.Q5_K_S,
78
84
  'q6_k.': QuantizationType.Q6_K,
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: nexaai
3
- Version: 1.0.16rc12
3
+ Version: 1.0.17rc1
4
4
  Summary: Python bindings for NexaSDK C-lib backend
5
5
  Author-email: "Nexa AI, Inc." <dev@nexa.ai>
6
6
  Project-URL: Homepage, https://github.com/NexaAI/nexasdk-bridge
@@ -1,9 +1,9 @@
1
1
  nexaai/__init__.py,sha256=L8oB7GFZZMGnUpCg0PecDbI_ycKuQak-ZEJ4Y12_QIw,2184
2
- nexaai/_stub.cpython-310-darwin.so,sha256=bzuFvUH92pOTqDVeEN9Y9ULBe5iQhE5RUgzD_MRDrtE,66768
3
- nexaai/_version.py,sha256=VcEloSyZAF14s_ZLUSzu8vLdcbhwdSK1deBNqQktuqA,144
2
+ nexaai/_stub.cpython-310-darwin.so,sha256=-U41Rg1210R-O8tEXhmXA-PPJ5tIk48yx-v6HoJD660,66768
3
+ nexaai/_version.py,sha256=rKQTMRjwBe37A1MYCov01rf2P607gRZoI-XbYCSIcoA,143
4
4
  nexaai/asr.py,sha256=NljMXDErwPNMOPaRkJZMEDka9Nk8xyur7L8i924TStY,2054
5
5
  nexaai/base.py,sha256=N8PRgDFA-XPku2vWnQIofQ7ipz3pPlO6f8YZGnuhquE,982
6
- nexaai/common.py,sha256=yBnIbqYaQYnfrl7IczOBh6MDibYZVxwaRJEglYcKgGs,3422
6
+ nexaai/common.py,sha256=Y0NJNLTi4Nq4x1WL6PQsSvGUto0eGmWhjpsC6jcekfA,3444
7
7
  nexaai/cv.py,sha256=RHCDo8gvBH8BkGZx7qVyp-OKxqi7E1GG9XzyaXehCNA,3273
8
8
  nexaai/embedder.py,sha256=Cw0tSHkPgd-RI62afCqQAcTHMnQhaI2CvfTMO-1JKOg,2452
9
9
  nexaai/image_gen.py,sha256=0C_5Tjj4BYmxLbmMmvwajp-yy2mmEEOKwBFnDQNPzx4,4356
@@ -19,7 +19,7 @@ nexaai/asr_impl/pybind_asr_impl.py,sha256=pE9Hb_hMi5yAc4MF83bLVOb8zDtreCkB3_u7XE
19
19
  nexaai/binds/__init__.py,sha256=eYuay_8DDXeOUWz2_R9HFSabohxs6hvZn391t2L0Po0,104
20
20
  nexaai/binds/common_bind.cpython-310-darwin.so,sha256=ya8gz7NnUtHlniGnRyExdwWfdHimEHiJ7Dry4I7_y44,235264
21
21
  nexaai/binds/embedder_bind.cpython-310-darwin.so,sha256=tPa0c0Dv_GiW66fgmAGWGCHXRGNApznqoQS0eQx9GFM,202064
22
- nexaai/binds/libnexa_bridge.dylib,sha256=y0cv3WWCPmKVyZFSj81BM6-yhzUOfkZWvezu_kfztQc,251192
22
+ nexaai/binds/libnexa_bridge.dylib,sha256=4VzUccl3U2lTlNEK7-Q1szFpdE7HKRiPmJwQ5Y-VgqM,251192
23
23
  nexaai/binds/llm_bind.cpython-310-darwin.so,sha256=TAWfa1Hzq00TjtC1xVsiAeLp6hv2LrL5afDz4omUghc,182784
24
24
  nexaai/binds/vlm_bind.cpython-310-darwin.so,sha256=nd6eG_m2EiPthzkSZ97hlXWUOZQir4cQfFJZ4p6eR2U,182704
25
25
  nexaai/binds/nexa_llama_cpp/libggml-base.dylib,sha256=JM4oOkie1su0ES5hMdtILeQHlRukRzH1vTleTupUXhg,650736
@@ -182,6 +182,16 @@ nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/tests/test_base.py,sha256=dAeEA0IsBnU
182
182
  nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/tests/test_convert.py,sha256=79ddUhtTUlElD9NvBupUxl-MV4HKFM9PUxQpTwKLnBA,5804
183
183
  nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/tests/test_interpolate.py,sha256=9dNmH03C46HtxwesH2DpT2oTNEG1KCZWYEKq6UQ3vfk,3536
184
184
  nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/tests/test_models.py,sha256=12RiOfPtSZQj5g5JM-yCJk3uGQfM3OdmRiPt5uUDE4E,35096
185
+ nexaai/binds/nexa_nexaml/libggml-base.dylib,sha256=JM4oOkie1su0ES5hMdtILeQHlRukRzH1vTleTupUXhg,650736
186
+ nexaai/binds/nexa_nexaml/libggml-cpu.so,sha256=qiYxbTe4Nt7n36zJVvq3zovgSZEmrN2is6gzTern7UI,677728
187
+ nexaai/binds/nexa_nexaml/libggml-metal.so,sha256=zfaX7rIBYQazH2lf-vza007BMhPTK1ASd2T0HLLIA4E,673104
188
+ nexaai/binds/nexa_nexaml/libggml.dylib,sha256=aOTj_6RrAMkfDO0ZI28_3nfcC-l4Y3dRCiS3C0d0_eI,58592
189
+ nexaai/binds/nexa_nexaml/libnexa-mm-process.dylib,sha256=3mITty3oYhoi5yc690CxS0tf4LGanqrPCR0meE-h9HQ,8568120
190
+ nexaai/binds/nexa_nexaml/libnexa-sampling.dylib,sha256=OliTBr-r2y22Ebjc1Y33Ax56XpLMFQKtsHgfouj6TQM,7865400
191
+ nexaai/binds/nexa_nexaml/libnexa_plugin.dylib,sha256=rkTrXOKH71m0SmUP5IBLsJrLTbCElcWX3GKn33BPZpY,224056
192
+ nexaai/binds/nexa_nexaml/libnexaproc.dylib,sha256=Pnxgm2k29hfedLjNP4t44gvioh-NWbE_3BW-VsCdHp8,880560
193
+ nexaai/binds/nexa_nexaml/libqwen3-vl.dylib,sha256=hd8VPINYTH8mbU3BPxnjrPOKTw-NkWn7zkDB7p074-U,753168
194
+ nexaai/binds/nexa_nexaml/libqwen3vl-vision.dylib,sha256=AmUk-DBtBAp5DOR14ICZcsiAcKjxxAx1y6mHAaETQu8,570024
185
195
  nexaai/cv_impl/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
186
196
  nexaai/cv_impl/mlx_cv_impl.py,sha256=gKECQOv8iaWwG3bl7xeqVy2NN_9K7tYerIFzfn4eLo4,3228
187
197
  nexaai/cv_impl/pybind_cv_impl.py,sha256=uSmwBste4cT7c8DQmXzRLmzwDf773PAbXNYWW1UzVls,1064
@@ -400,7 +410,7 @@ nexaai/mlx_backend/tts/interface.py,sha256=0FvZbIyOvg8jERZEQ6bygbv7v02O9xHO4-TPU
400
410
  nexaai/mlx_backend/vlm/__init__.py,sha256=_25kvMEviX16Hg3bro8Ws70V0eeIEqYKV8ZDXqYzKew,73
401
411
  nexaai/mlx_backend/vlm/generate.py,sha256=DqHFEAuqk-nko8ho6U9GAXTDAWz4d8GTe_hCt-XFyCw,19071
402
412
  nexaai/mlx_backend/vlm/generate_qwen3_vl.py,sha256=undjso1mfxqpd6FMTksSA5qagRttxAGbOBj1x7cqI1s,9211
403
- nexaai/mlx_backend/vlm/interface.py,sha256=vFTzJCbqq55ybv_tbDBC9NVn1_sXgCfqXdsV-3ia8vo,16177
413
+ nexaai/mlx_backend/vlm/interface.py,sha256=0BLfodbYOU71jFvAvv01FuLBE_KBtyB-8Cd7LqzzRHY,17450
404
414
  nexaai/mlx_backend/vlm/main.py,sha256=nPcg25jupeDD74uvRoxpWp3Dsulw7WddI7vll6zejak,10664
405
415
  nexaai/mlx_backend/vlm/modeling/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
406
416
  nexaai/mlx_backend/vlm/modeling/convert.py,sha256=ia5i9cgTufFGmKyhkYUaW0nfNqT_bMo8i-Hg_zy5JC4,1863
@@ -537,11 +547,11 @@ nexaai/utils/manifest_utils.py,sha256=sR9Nme4GbD3Cb3fMd55yLvGZpqxb71vd6b2XZTsrIG
537
547
  nexaai/utils/model_manager.py,sha256=p2kJKK63Zk-rEUucFsgY0T5PyXi_IvJY0gKewUVcAV4,56081
538
548
  nexaai/utils/model_types.py,sha256=-DER8L4lAUR_iLS99F0r57avwqWtuN21ug5pX2p24_E,1369
539
549
  nexaai/utils/progress_tracker.py,sha256=jdUqtmPqyhwC9uSKvQcJEYETwSt-OhP4oitdJ94614o,15394
540
- nexaai/utils/quantization_utils.py,sha256=4gvp6UQfSO9G1FYBwnFtQspTzH9sDbi1PBXw2t1N69M,7650
550
+ nexaai/utils/quantization_utils.py,sha256=FYcNSAKGlBqFDUTx3jSKOr2lnq4nyiyC0ZG8oSxFwiU,7825
541
551
  nexaai/vlm_impl/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
542
552
  nexaai/vlm_impl/mlx_vlm_impl.py,sha256=pLtWm_ckz8a0U-AtAOMVseFDO4OVPvHyYO2KlfBaGYk,10833
543
553
  nexaai/vlm_impl/pybind_vlm_impl.py,sha256=FAbhpRJzHgI78r0mUvKybO97R1szvNhH0aTn_I52oT4,8597
544
- nexaai-1.0.16rc12.dist-info/METADATA,sha256=a6Oh67bHm9t2m1j9yPHyou4gx0YOJjag7YLhJYyjRxc,1202
545
- nexaai-1.0.16rc12.dist-info/WHEEL,sha256=T2p57lol9__xkoU6aJTyN1Pm43ZpRU3q6km7mIbrAMs,114
546
- nexaai-1.0.16rc12.dist-info/top_level.txt,sha256=LRE2YERlrZk2vfuygnSzsEeqSknnZbz3Z1MHyNmBU4w,7
547
- nexaai-1.0.16rc12.dist-info/RECORD,,
554
+ nexaai-1.0.17rc1.dist-info/METADATA,sha256=-o225-FK1Adyvf4f8tzZM7FMyeq9g9CJthuqKuNKvG4,1201
555
+ nexaai-1.0.17rc1.dist-info/WHEEL,sha256=T2p57lol9__xkoU6aJTyN1Pm43ZpRU3q6km7mIbrAMs,114
556
+ nexaai-1.0.17rc1.dist-info/top_level.txt,sha256=LRE2YERlrZk2vfuygnSzsEeqSknnZbz3Z1MHyNmBU4w,7
557
+ nexaai-1.0.17rc1.dist-info/RECORD,,