PyPI - nexaai - Versions diffs - 1.0.16rc13__cp310-cp310-macosx_13_0_x86_64.whl → 1.0.16rc14__cp310-cp310-macosx_13_0_x86_64.whl - Mend

nexaai 1.0.16rc13__cp310-cp310-macosx_13_0_x86_64.whl → 1.0.16rc14__cp310-cp310-macosx_13_0_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of nexaai might be problematic. Click here for more details.

Files changed (9) hide show

nexaai/_stub.cpython-310-darwin.so CHANGED Viewed

Binary file

nexaai/_version.py CHANGED Viewed

@@ -1,4 +1,4 @@
 # This file is generated by CMake from _version.py.in
 # Do not modify this file manually - it will be overwritten
-__version__ = "1.0.16-rc13"
+__version__ = "1.0.16-rc14"

nexaai/binds/libnexa_bridge.dylib CHANGED Viewed

Binary file

nexaai/mlx_backend/vlm/interface.py CHANGED Viewed

@@ -80,6 +80,9 @@ class VLM(ProfilingMixin):
         # Init deafutl sampler config with defualt.
         self.sampler_config = SamplerConfig()
+        # Track global character position for incremental processing
+        self.global_n_past_chars = 0
     def destroy(self) -> None:
         """Destroy the model and free resources."""
@@ -89,6 +92,7 @@ class VLM(ProfilingMixin):
     def reset(self) -> None:
         """Reset the model state."""
         self._reset_cache()
+        self.global_n_past_chars = 0
     def _reset_cache(self) -> None:
         """Reset the KV cache."""
@@ -141,6 +145,16 @@ class VLM(ProfilingMixin):
         image_list = [str(path) for path in image_paths] if image_paths else None
         audio_list = [str(path) for path in audio_paths] if audio_paths else None
+        # Extract incremental portion of the prompt (similar to llama.cpp VLM)
+        full_prompt_len = len(prompt)
+        incremental_prompt = prompt
+        if self.global_n_past_chars < full_prompt_len:
+            incremental_prompt = prompt[self.global_n_past_chars:]
+        else:
+            # No new text to process
+            incremental_prompt = ""
         # End prompt processing, start decode
         self._prompt_end()
         self._decode_start()
@@ -152,7 +166,7 @@ class VLM(ProfilingMixin):
             text, stats = generate(
                 self.model,
                 self.processor,
-                prompt,
+                incremental_prompt,  # Use incremental prompt instead of full prompt
                 image=image_list,
                 audio=audio_list,
                 **gen_kwargs,
@@ -181,6 +195,10 @@ class VLM(ProfilingMixin):
             self._update_prompt_tokens(prompt_tokens)
             self._update_generated_tokens(generated_tokens)
             self._set_stop_reason(StopReason.ML_STOP_REASON_COMPLETED)
+            # Update global character position
+            self.global_n_past_chars = full_prompt_len + len(text)
             self._decode_end()
             self._end_profiling()
@@ -226,6 +244,16 @@ class VLM(ProfilingMixin):
         image_list = [str(path) for path in image_paths] if image_paths else None
         audio_list = [str(path) for path in audio_paths] if audio_paths else None
+        # Extract incremental portion of the prompt (similar to llama.cpp VLM)
+        full_prompt_len = len(prompt)
+        incremental_prompt = prompt
+        if self.global_n_past_chars < full_prompt_len:
+            incremental_prompt = prompt[self.global_n_past_chars:]
+        else:
+            # No new text to process
+            incremental_prompt = ""
         # End prompt processing, start decode
         self._prompt_end()
         self._decode_start()
@@ -239,7 +267,7 @@ class VLM(ProfilingMixin):
             for result in stream_generate_impl(
                 self.model,
                 self.processor,
-                prompt,
+                incremental_prompt,  # Use incremental prompt instead of full prompt
                 image=image_list,
                 audio=audio_list,
                 **gen_kwargs,
@@ -266,6 +294,9 @@ class VLM(ProfilingMixin):
                 self._update_prompt_tokens(last_result.prompt_tokens)
                 self._update_generated_tokens(last_result.generation_tokens)
+            # Update global character position
+            self.global_n_past_chars = full_prompt_len + len(text)
             self._decode_end()
             self._end_profiling()

nexaai/mlx_backend/vlm/modeling/models/qwen3_vl/llm_common/generate.py CHANGED Viewed

@@ -232,7 +232,7 @@ def generate_step(
             prompt_progress_callback(prompt_processed_tokens, total_prompt_tokens)
             prompt_processed_tokens += prefill_step_size
             y = y[prefill_step_size:]
-            mx.metal.clear_cache()
+            mx.clear_cache()
         y, logprobs = _step(y)
@@ -249,7 +249,7 @@ def generate_step(
             break
         yield y.item(), logprobs
         if n % 256 == 0:
-            mx.metal.clear_cache()
+            mx.clear_cache()
         y, logprobs = next_y, next_logprobs
         n += 1
@@ -371,7 +371,7 @@ def nexa_generate_step(
             prompt_progress_callback(prompt_processed_tokens, total_prompt_tokens)
             prompt_processed_tokens += prefill_step_size
             y = y[prefill_step_size:]
-            mx.metal.clear_cache()
+            mx.clear_cache()
         y, logprobs = _step(y)
@@ -388,7 +388,7 @@ def nexa_generate_step(
             break
         yield y.item(), logprobs
         if n % 256 == 0:
-            mx.metal.clear_cache()
+            mx.clear_cache()
         y, logprobs = next_y, next_logprobs
         n += 1
@@ -507,7 +507,7 @@ def nexa_multimodal_generate_step(
             prompt_progress_callback(prompt_processed_tokens, total_prompt_tokens)
             prompt_processed_tokens += prefill_step_size
             y = y[prefill_step_size:]
-            mx.metal.clear_cache()
+            mx.clear_cache()
         y, logprobs = _step(y)
@@ -524,7 +524,7 @@ def nexa_multimodal_generate_step(
             break
         yield y.item(), logprobs
         if n % 256 == 0:
-            mx.metal.clear_cache()
+            mx.clear_cache()
         y, logprobs = next_y, next_logprobs
         n += 1
@@ -632,7 +632,7 @@ def speculative_generate_step(
             quantize_cache_fn(cache)
             mx.eval([c.state for c in cache])
             y = y[prefill_step_size:]
-            mx.metal.clear_cache()
+            mx.clear_cache()
         return y
     def _rewind_cache(num_draft, num_accept):

{nexaai-1.0.16rc13.dist-info → nexaai-1.0.16rc14.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: nexaai
-Version: 1.0.16rc13
+Version: 1.0.16rc14
 Summary: Python bindings for NexaSDK C-lib backend
 Author-email: "Nexa AI, Inc." <dev@nexa.ai>
 Project-URL: Homepage, https://github.com/NexaAI/nexasdk-bridge

{nexaai-1.0.16rc13.dist-info → nexaai-1.0.16rc14.dist-info}/RECORD RENAMED Viewed

@@ -1,6 +1,6 @@
 nexaai/__init__.py,sha256=L8oB7GFZZMGnUpCg0PecDbI_ycKuQak-ZEJ4Y12_QIw,2184
-nexaai/_stub.cpython-310-darwin.so,sha256=Bd-r6O9pG8m0SL3rkS3PQF8Z1ie_WD7uqaFPRyitb9E,49832
-nexaai/_version.py,sha256=w1I23pLkLt0xrD0hMhWK5fW9rqbLqnW1ii4yYp9UCTo,144
+nexaai/_stub.cpython-310-darwin.so,sha256=KljcA21kmHR-5BpCJJJMrHFMwI2Wgixalw80FJSNBe8,49832
+nexaai/_version.py,sha256=bFprtDX2rUyZwPnP3h4-J3LGlRF9uZAd4KOCazdh12I,144
 nexaai/asr.py,sha256=NljMXDErwPNMOPaRkJZMEDka9Nk8xyur7L8i924TStY,2054
 nexaai/base.py,sha256=N8PRgDFA-XPku2vWnQIofQ7ipz3pPlO6f8YZGnuhquE,982
 nexaai/common.py,sha256=Y0NJNLTi4Nq4x1WL6PQsSvGUto0eGmWhjpsC6jcekfA,3444
@@ -19,7 +19,7 @@ nexaai/asr_impl/pybind_asr_impl.py,sha256=pE9Hb_hMi5yAc4MF83bLVOb8zDtreCkB3_u7XE
 nexaai/binds/__init__.py,sha256=eYuay_8DDXeOUWz2_R9HFSabohxs6hvZn391t2L0Po0,104
 nexaai/binds/common_bind.cpython-310-darwin.so,sha256=km1TU5WOJHVjvyM4l5mgAkS_omxuKt8pM92E9Wv0VqM,235488
 nexaai/binds/embedder_bind.cpython-310-darwin.so,sha256=b2NoXFAJvPLi_P1X7lXLKmAUU0v2HJI3Zwa10gfqHdw,202032
-nexaai/binds/libnexa_bridge.dylib,sha256=SLP_DHAJeSl5gJMSs2fZtPLv-VgNyojZTK0auqDXSpo,250408
+nexaai/binds/libnexa_bridge.dylib,sha256=v770dZQxEZvtXZN8drsqLrrCPfblKSBJQOIbu96YUUY,250408
 nexaai/binds/llm_bind.cpython-310-darwin.so,sha256=p1ZTGMolEkWywkmwzOUjTr3RpSEH21BHZAggVzo89Ks,183088
 nexaai/binds/vlm_bind.cpython-310-darwin.so,sha256=LGd-tykePnQFfGca25HnPIBfXsfrMzbwyx6d5Ld3xps,183000
 nexaai/binds/nexa_llama_cpp/libggml-base.dylib,sha256=GyOkHOM-5uHp7NUZ4Sr9BWak6BYpcc9aqI9A-zPnQp4,629528
@@ -247,7 +247,7 @@ nexaai/mlx_backend/tts/interface.py,sha256=0FvZbIyOvg8jERZEQ6bygbv7v02O9xHO4-TPU
 nexaai/mlx_backend/vlm/__init__.py,sha256=_25kvMEviX16Hg3bro8Ws70V0eeIEqYKV8ZDXqYzKew,73
 nexaai/mlx_backend/vlm/generate.py,sha256=DqHFEAuqk-nko8ho6U9GAXTDAWz4d8GTe_hCt-XFyCw,19071
 nexaai/mlx_backend/vlm/generate_qwen3_vl.py,sha256=undjso1mfxqpd6FMTksSA5qagRttxAGbOBj1x7cqI1s,9211
-nexaai/mlx_backend/vlm/interface.py,sha256=vFTzJCbqq55ybv_tbDBC9NVn1_sXgCfqXdsV-3ia8vo,16177
+nexaai/mlx_backend/vlm/interface.py,sha256=0BLfodbYOU71jFvAvv01FuLBE_KBtyB-8Cd7LqzzRHY,17450
 nexaai/mlx_backend/vlm/main.py,sha256=nPcg25jupeDD74uvRoxpWp3Dsulw7WddI7vll6zejak,10664
 nexaai/mlx_backend/vlm/modeling/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 nexaai/mlx_backend/vlm/modeling/convert.py,sha256=ia5i9cgTufFGmKyhkYUaW0nfNqT_bMo8i-Hg_zy5JC4,1863
@@ -362,7 +362,7 @@ nexaai/mlx_backend/vlm/modeling/models/qwen3_vl/qwen3vl.py,sha256=LArnNtI98B_GJO
 nexaai/mlx_backend/vlm/modeling/models/qwen3_vl/llm_common/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 nexaai/mlx_backend/vlm/modeling/models/qwen3_vl/llm_common/base.py,sha256=4RlZwgz8YX2ngmJNaymxFFpw9hJu-0EMw9xwXpngW9o,3496
 nexaai/mlx_backend/vlm/modeling/models/qwen3_vl/llm_common/cache.py,sha256=NMOB6x-RT6svF4H-Ymo5WqnP7ptAal3aaKjWZXWGMsM,17671
-nexaai/mlx_backend/vlm/modeling/models/qwen3_vl/llm_common/generate.py,sha256=Mw7Btz0_t7erQOrfWzCXT-ktEwZl61OODcmDMIo3VS0,26719
+nexaai/mlx_backend/vlm/modeling/models/qwen3_vl/llm_common/generate.py,sha256=bchCpnlewysWQss5TQKxdKPXYd5VA7ySUDfRt8Xj_H4,26677
 nexaai/mlx_backend/vlm/modeling/models/qwen3_vl/llm_common/rope_utils.py,sha256=ty0dA3SsEUFtFbHo16tKdnKymrNKKsUO3KMYapMajbY,8704
 nexaai/mlx_backend/vlm/modeling/models/qwen3_vl/llm_common/sample_utils.py,sha256=8SEeVwgjuvaYy-4ALAU0RHQMuRr2k7EkXba_csxk498,10673
 nexaai/mlx_backend/vlm/modeling/models/qwen3_vl/llm_common/tokenizer_utils.py,sha256=Gqanx4hBDcon_k5ClhUsS4YpMbZNiee8jvImGS9h43s,13229
@@ -388,7 +388,7 @@ nexaai/utils/quantization_utils.py,sha256=FYcNSAKGlBqFDUTx3jSKOr2lnq4nyiyC0ZG8oS
 nexaai/vlm_impl/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 nexaai/vlm_impl/mlx_vlm_impl.py,sha256=pLtWm_ckz8a0U-AtAOMVseFDO4OVPvHyYO2KlfBaGYk,10833
 nexaai/vlm_impl/pybind_vlm_impl.py,sha256=FAbhpRJzHgI78r0mUvKybO97R1szvNhH0aTn_I52oT4,8597
-nexaai-1.0.16rc13.dist-info/METADATA,sha256=eqPLK_7JBryWiB7qvdppmdEoHd42jZohyBHi0j1Lges,1202
-nexaai-1.0.16rc13.dist-info/WHEEL,sha256=0KYp5feZ1CMUhsfFXKpSQTbSmQbXy4mv6yPPVBXg2EM,110
-nexaai-1.0.16rc13.dist-info/top_level.txt,sha256=LRE2YERlrZk2vfuygnSzsEeqSknnZbz3Z1MHyNmBU4w,7
-nexaai-1.0.16rc13.dist-info/RECORD,,
+nexaai-1.0.16rc14.dist-info/METADATA,sha256=rD9zD2HduPUSrlSkUZUQ4Ut2g6nvwQ-PN0kgODO7TEU,1202
+nexaai-1.0.16rc14.dist-info/WHEEL,sha256=0KYp5feZ1CMUhsfFXKpSQTbSmQbXy4mv6yPPVBXg2EM,110
+nexaai-1.0.16rc14.dist-info/top_level.txt,sha256=LRE2YERlrZk2vfuygnSzsEeqSknnZbz3Z1MHyNmBU4w,7
+nexaai-1.0.16rc14.dist-info/RECORD,,

{nexaai-1.0.16rc13.dist-info → nexaai-1.0.16rc14.dist-info}/WHEEL RENAMED Viewed

File without changes

{nexaai-1.0.16rc13.dist-info → nexaai-1.0.16rc14.dist-info}/top_level.txt RENAMED Viewed

File without changes