nexaai 1.0.16rc13__cp310-cp310-macosx_13_0_x86_64.whl → 1.0.16rc14__cp310-cp310-macosx_13_0_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of nexaai might be problematic. Click here for more details.

Binary file
nexaai/_version.py CHANGED
@@ -1,4 +1,4 @@
1
1
  # This file is generated by CMake from _version.py.in
2
2
  # Do not modify this file manually - it will be overwritten
3
3
 
4
- __version__ = "1.0.16-rc13"
4
+ __version__ = "1.0.16-rc14"
Binary file
@@ -80,6 +80,9 @@ class VLM(ProfilingMixin):
80
80
 
81
81
  # Init deafutl sampler config with defualt.
82
82
  self.sampler_config = SamplerConfig()
83
+
84
+ # Track global character position for incremental processing
85
+ self.global_n_past_chars = 0
83
86
 
84
87
  def destroy(self) -> None:
85
88
  """Destroy the model and free resources."""
@@ -89,6 +92,7 @@ class VLM(ProfilingMixin):
89
92
  def reset(self) -> None:
90
93
  """Reset the model state."""
91
94
  self._reset_cache()
95
+ self.global_n_past_chars = 0
92
96
 
93
97
  def _reset_cache(self) -> None:
94
98
  """Reset the KV cache."""
@@ -141,6 +145,16 @@ class VLM(ProfilingMixin):
141
145
  image_list = [str(path) for path in image_paths] if image_paths else None
142
146
  audio_list = [str(path) for path in audio_paths] if audio_paths else None
143
147
 
148
+ # Extract incremental portion of the prompt (similar to llama.cpp VLM)
149
+ full_prompt_len = len(prompt)
150
+ incremental_prompt = prompt
151
+
152
+ if self.global_n_past_chars < full_prompt_len:
153
+ incremental_prompt = prompt[self.global_n_past_chars:]
154
+ else:
155
+ # No new text to process
156
+ incremental_prompt = ""
157
+
144
158
  # End prompt processing, start decode
145
159
  self._prompt_end()
146
160
  self._decode_start()
@@ -152,7 +166,7 @@ class VLM(ProfilingMixin):
152
166
  text, stats = generate(
153
167
  self.model,
154
168
  self.processor,
155
- prompt,
169
+ incremental_prompt, # Use incremental prompt instead of full prompt
156
170
  image=image_list,
157
171
  audio=audio_list,
158
172
  **gen_kwargs,
@@ -181,6 +195,10 @@ class VLM(ProfilingMixin):
181
195
  self._update_prompt_tokens(prompt_tokens)
182
196
  self._update_generated_tokens(generated_tokens)
183
197
  self._set_stop_reason(StopReason.ML_STOP_REASON_COMPLETED)
198
+
199
+ # Update global character position
200
+ self.global_n_past_chars = full_prompt_len + len(text)
201
+
184
202
  self._decode_end()
185
203
  self._end_profiling()
186
204
 
@@ -226,6 +244,16 @@ class VLM(ProfilingMixin):
226
244
  image_list = [str(path) for path in image_paths] if image_paths else None
227
245
  audio_list = [str(path) for path in audio_paths] if audio_paths else None
228
246
 
247
+ # Extract incremental portion of the prompt (similar to llama.cpp VLM)
248
+ full_prompt_len = len(prompt)
249
+ incremental_prompt = prompt
250
+
251
+ if self.global_n_past_chars < full_prompt_len:
252
+ incremental_prompt = prompt[self.global_n_past_chars:]
253
+ else:
254
+ # No new text to process
255
+ incremental_prompt = ""
256
+
229
257
  # End prompt processing, start decode
230
258
  self._prompt_end()
231
259
  self._decode_start()
@@ -239,7 +267,7 @@ class VLM(ProfilingMixin):
239
267
  for result in stream_generate_impl(
240
268
  self.model,
241
269
  self.processor,
242
- prompt,
270
+ incremental_prompt, # Use incremental prompt instead of full prompt
243
271
  image=image_list,
244
272
  audio=audio_list,
245
273
  **gen_kwargs,
@@ -266,6 +294,9 @@ class VLM(ProfilingMixin):
266
294
  self._update_prompt_tokens(last_result.prompt_tokens)
267
295
  self._update_generated_tokens(last_result.generation_tokens)
268
296
 
297
+ # Update global character position
298
+ self.global_n_past_chars = full_prompt_len + len(text)
299
+
269
300
  self._decode_end()
270
301
  self._end_profiling()
271
302
 
@@ -232,7 +232,7 @@ def generate_step(
232
232
  prompt_progress_callback(prompt_processed_tokens, total_prompt_tokens)
233
233
  prompt_processed_tokens += prefill_step_size
234
234
  y = y[prefill_step_size:]
235
- mx.metal.clear_cache()
235
+ mx.clear_cache()
236
236
 
237
237
  y, logprobs = _step(y)
238
238
 
@@ -249,7 +249,7 @@ def generate_step(
249
249
  break
250
250
  yield y.item(), logprobs
251
251
  if n % 256 == 0:
252
- mx.metal.clear_cache()
252
+ mx.clear_cache()
253
253
  y, logprobs = next_y, next_logprobs
254
254
  n += 1
255
255
 
@@ -371,7 +371,7 @@ def nexa_generate_step(
371
371
  prompt_progress_callback(prompt_processed_tokens, total_prompt_tokens)
372
372
  prompt_processed_tokens += prefill_step_size
373
373
  y = y[prefill_step_size:]
374
- mx.metal.clear_cache()
374
+ mx.clear_cache()
375
375
 
376
376
  y, logprobs = _step(y)
377
377
 
@@ -388,7 +388,7 @@ def nexa_generate_step(
388
388
  break
389
389
  yield y.item(), logprobs
390
390
  if n % 256 == 0:
391
- mx.metal.clear_cache()
391
+ mx.clear_cache()
392
392
  y, logprobs = next_y, next_logprobs
393
393
  n += 1
394
394
 
@@ -507,7 +507,7 @@ def nexa_multimodal_generate_step(
507
507
  prompt_progress_callback(prompt_processed_tokens, total_prompt_tokens)
508
508
  prompt_processed_tokens += prefill_step_size
509
509
  y = y[prefill_step_size:]
510
- mx.metal.clear_cache()
510
+ mx.clear_cache()
511
511
 
512
512
  y, logprobs = _step(y)
513
513
 
@@ -524,7 +524,7 @@ def nexa_multimodal_generate_step(
524
524
  break
525
525
  yield y.item(), logprobs
526
526
  if n % 256 == 0:
527
- mx.metal.clear_cache()
527
+ mx.clear_cache()
528
528
  y, logprobs = next_y, next_logprobs
529
529
  n += 1
530
530
 
@@ -632,7 +632,7 @@ def speculative_generate_step(
632
632
  quantize_cache_fn(cache)
633
633
  mx.eval([c.state for c in cache])
634
634
  y = y[prefill_step_size:]
635
- mx.metal.clear_cache()
635
+ mx.clear_cache()
636
636
  return y
637
637
 
638
638
  def _rewind_cache(num_draft, num_accept):
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: nexaai
3
- Version: 1.0.16rc13
3
+ Version: 1.0.16rc14
4
4
  Summary: Python bindings for NexaSDK C-lib backend
5
5
  Author-email: "Nexa AI, Inc." <dev@nexa.ai>
6
6
  Project-URL: Homepage, https://github.com/NexaAI/nexasdk-bridge
@@ -1,6 +1,6 @@
1
1
  nexaai/__init__.py,sha256=L8oB7GFZZMGnUpCg0PecDbI_ycKuQak-ZEJ4Y12_QIw,2184
2
- nexaai/_stub.cpython-310-darwin.so,sha256=Bd-r6O9pG8m0SL3rkS3PQF8Z1ie_WD7uqaFPRyitb9E,49832
3
- nexaai/_version.py,sha256=w1I23pLkLt0xrD0hMhWK5fW9rqbLqnW1ii4yYp9UCTo,144
2
+ nexaai/_stub.cpython-310-darwin.so,sha256=KljcA21kmHR-5BpCJJJMrHFMwI2Wgixalw80FJSNBe8,49832
3
+ nexaai/_version.py,sha256=bFprtDX2rUyZwPnP3h4-J3LGlRF9uZAd4KOCazdh12I,144
4
4
  nexaai/asr.py,sha256=NljMXDErwPNMOPaRkJZMEDka9Nk8xyur7L8i924TStY,2054
5
5
  nexaai/base.py,sha256=N8PRgDFA-XPku2vWnQIofQ7ipz3pPlO6f8YZGnuhquE,982
6
6
  nexaai/common.py,sha256=Y0NJNLTi4Nq4x1WL6PQsSvGUto0eGmWhjpsC6jcekfA,3444
@@ -19,7 +19,7 @@ nexaai/asr_impl/pybind_asr_impl.py,sha256=pE9Hb_hMi5yAc4MF83bLVOb8zDtreCkB3_u7XE
19
19
  nexaai/binds/__init__.py,sha256=eYuay_8DDXeOUWz2_R9HFSabohxs6hvZn391t2L0Po0,104
20
20
  nexaai/binds/common_bind.cpython-310-darwin.so,sha256=km1TU5WOJHVjvyM4l5mgAkS_omxuKt8pM92E9Wv0VqM,235488
21
21
  nexaai/binds/embedder_bind.cpython-310-darwin.so,sha256=b2NoXFAJvPLi_P1X7lXLKmAUU0v2HJI3Zwa10gfqHdw,202032
22
- nexaai/binds/libnexa_bridge.dylib,sha256=SLP_DHAJeSl5gJMSs2fZtPLv-VgNyojZTK0auqDXSpo,250408
22
+ nexaai/binds/libnexa_bridge.dylib,sha256=v770dZQxEZvtXZN8drsqLrrCPfblKSBJQOIbu96YUUY,250408
23
23
  nexaai/binds/llm_bind.cpython-310-darwin.so,sha256=p1ZTGMolEkWywkmwzOUjTr3RpSEH21BHZAggVzo89Ks,183088
24
24
  nexaai/binds/vlm_bind.cpython-310-darwin.so,sha256=LGd-tykePnQFfGca25HnPIBfXsfrMzbwyx6d5Ld3xps,183000
25
25
  nexaai/binds/nexa_llama_cpp/libggml-base.dylib,sha256=GyOkHOM-5uHp7NUZ4Sr9BWak6BYpcc9aqI9A-zPnQp4,629528
@@ -247,7 +247,7 @@ nexaai/mlx_backend/tts/interface.py,sha256=0FvZbIyOvg8jERZEQ6bygbv7v02O9xHO4-TPU
247
247
  nexaai/mlx_backend/vlm/__init__.py,sha256=_25kvMEviX16Hg3bro8Ws70V0eeIEqYKV8ZDXqYzKew,73
248
248
  nexaai/mlx_backend/vlm/generate.py,sha256=DqHFEAuqk-nko8ho6U9GAXTDAWz4d8GTe_hCt-XFyCw,19071
249
249
  nexaai/mlx_backend/vlm/generate_qwen3_vl.py,sha256=undjso1mfxqpd6FMTksSA5qagRttxAGbOBj1x7cqI1s,9211
250
- nexaai/mlx_backend/vlm/interface.py,sha256=vFTzJCbqq55ybv_tbDBC9NVn1_sXgCfqXdsV-3ia8vo,16177
250
+ nexaai/mlx_backend/vlm/interface.py,sha256=0BLfodbYOU71jFvAvv01FuLBE_KBtyB-8Cd7LqzzRHY,17450
251
251
  nexaai/mlx_backend/vlm/main.py,sha256=nPcg25jupeDD74uvRoxpWp3Dsulw7WddI7vll6zejak,10664
252
252
  nexaai/mlx_backend/vlm/modeling/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
253
253
  nexaai/mlx_backend/vlm/modeling/convert.py,sha256=ia5i9cgTufFGmKyhkYUaW0nfNqT_bMo8i-Hg_zy5JC4,1863
@@ -362,7 +362,7 @@ nexaai/mlx_backend/vlm/modeling/models/qwen3_vl/qwen3vl.py,sha256=LArnNtI98B_GJO
362
362
  nexaai/mlx_backend/vlm/modeling/models/qwen3_vl/llm_common/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
363
363
  nexaai/mlx_backend/vlm/modeling/models/qwen3_vl/llm_common/base.py,sha256=4RlZwgz8YX2ngmJNaymxFFpw9hJu-0EMw9xwXpngW9o,3496
364
364
  nexaai/mlx_backend/vlm/modeling/models/qwen3_vl/llm_common/cache.py,sha256=NMOB6x-RT6svF4H-Ymo5WqnP7ptAal3aaKjWZXWGMsM,17671
365
- nexaai/mlx_backend/vlm/modeling/models/qwen3_vl/llm_common/generate.py,sha256=Mw7Btz0_t7erQOrfWzCXT-ktEwZl61OODcmDMIo3VS0,26719
365
+ nexaai/mlx_backend/vlm/modeling/models/qwen3_vl/llm_common/generate.py,sha256=bchCpnlewysWQss5TQKxdKPXYd5VA7ySUDfRt8Xj_H4,26677
366
366
  nexaai/mlx_backend/vlm/modeling/models/qwen3_vl/llm_common/rope_utils.py,sha256=ty0dA3SsEUFtFbHo16tKdnKymrNKKsUO3KMYapMajbY,8704
367
367
  nexaai/mlx_backend/vlm/modeling/models/qwen3_vl/llm_common/sample_utils.py,sha256=8SEeVwgjuvaYy-4ALAU0RHQMuRr2k7EkXba_csxk498,10673
368
368
  nexaai/mlx_backend/vlm/modeling/models/qwen3_vl/llm_common/tokenizer_utils.py,sha256=Gqanx4hBDcon_k5ClhUsS4YpMbZNiee8jvImGS9h43s,13229
@@ -388,7 +388,7 @@ nexaai/utils/quantization_utils.py,sha256=FYcNSAKGlBqFDUTx3jSKOr2lnq4nyiyC0ZG8oS
388
388
  nexaai/vlm_impl/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
389
389
  nexaai/vlm_impl/mlx_vlm_impl.py,sha256=pLtWm_ckz8a0U-AtAOMVseFDO4OVPvHyYO2KlfBaGYk,10833
390
390
  nexaai/vlm_impl/pybind_vlm_impl.py,sha256=FAbhpRJzHgI78r0mUvKybO97R1szvNhH0aTn_I52oT4,8597
391
- nexaai-1.0.16rc13.dist-info/METADATA,sha256=eqPLK_7JBryWiB7qvdppmdEoHd42jZohyBHi0j1Lges,1202
392
- nexaai-1.0.16rc13.dist-info/WHEEL,sha256=0KYp5feZ1CMUhsfFXKpSQTbSmQbXy4mv6yPPVBXg2EM,110
393
- nexaai-1.0.16rc13.dist-info/top_level.txt,sha256=LRE2YERlrZk2vfuygnSzsEeqSknnZbz3Z1MHyNmBU4w,7
394
- nexaai-1.0.16rc13.dist-info/RECORD,,
391
+ nexaai-1.0.16rc14.dist-info/METADATA,sha256=rD9zD2HduPUSrlSkUZUQ4Ut2g6nvwQ-PN0kgODO7TEU,1202
392
+ nexaai-1.0.16rc14.dist-info/WHEEL,sha256=0KYp5feZ1CMUhsfFXKpSQTbSmQbXy4mv6yPPVBXg2EM,110
393
+ nexaai-1.0.16rc14.dist-info/top_level.txt,sha256=LRE2YERlrZk2vfuygnSzsEeqSknnZbz3Z1MHyNmBU4w,7
394
+ nexaai-1.0.16rc14.dist-info/RECORD,,